nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
migrations/0003_allfields_key.py
CHANGED
@@ -17,45 +17,11 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from typing import Optional
|
21
20
|
|
22
|
-
from nucliadb_protos.resources_pb2 import AllFieldIDs, FieldID
|
23
|
-
|
24
|
-
from nucliadb.common import datamanagers
|
25
21
|
from nucliadb.migrator.context import ExecutionContext
|
26
|
-
from nucliadb.migrator.migrator import logger
|
27
22
|
|
28
23
|
|
29
24
|
async def migrate(context: ExecutionContext) -> None: ...
|
30
25
|
|
31
26
|
|
32
|
-
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
33
|
-
async for resource_id in datamanagers.resources.iterate_resource_ids(kbid=kbid):
|
34
|
-
async with context.kv_driver.transaction() as txn:
|
35
|
-
resource = await datamanagers.resources.get_resource(
|
36
|
-
txn, kbid=kbid, rid=resource_id
|
37
|
-
)
|
38
|
-
if resource is None:
|
39
|
-
logger.warning(
|
40
|
-
f"kb={kbid} rid={resource_id}: resource not found. Skipping..."
|
41
|
-
)
|
42
|
-
continue
|
43
|
-
|
44
|
-
all_fields: Optional[AllFieldIDs] = await resource.get_all_field_ids()
|
45
|
-
if all_fields is not None:
|
46
|
-
logger.warning(
|
47
|
-
f"kb={kbid} rid={resource_id}: already has all fields key. Skipping..."
|
48
|
-
)
|
49
|
-
continue
|
50
|
-
|
51
|
-
# Migrate resource
|
52
|
-
logger.warning(f"kb={kbid} rid={resource_id}: migrating...")
|
53
|
-
all_fields = AllFieldIDs()
|
54
|
-
async for (
|
55
|
-
field_type,
|
56
|
-
field_id,
|
57
|
-
) in resource._deprecated_scan_fields_ids():
|
58
|
-
fid = FieldID(field_type=field_type, field=field_id)
|
59
|
-
all_fields.fields.append(fid)
|
60
|
-
await resource.set_all_field_ids(all_fields)
|
61
|
-
await txn.commit()
|
27
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None: ...
|
@@ -26,7 +26,6 @@ rollover will do the upgrade automatically.
|
|
26
26
|
|
27
27
|
"""
|
28
28
|
|
29
|
-
from nucliadb.common.cluster.rollover import rollover_kb_shards
|
30
29
|
from nucliadb.migrator.context import ExecutionContext
|
31
30
|
|
32
31
|
|
@@ -34,4 +33,7 @@ async def migrate(context: ExecutionContext) -> None: ...
|
|
34
33
|
|
35
34
|
|
36
35
|
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
37
|
-
|
36
|
+
"""
|
37
|
+
We only need 1 rollover migration defined at a time; otherwise, we will
|
38
|
+
possibly run many for a kb when we only ever need to run one
|
39
|
+
"""
|
@@ -27,23 +27,23 @@ index data loss. Rollover affected KBs
|
|
27
27
|
|
28
28
|
import logging
|
29
29
|
|
30
|
-
from nucliadb.common.cluster.rollover import rollover_kb_shards
|
31
30
|
from nucliadb.migrator.context import ExecutionContext
|
32
31
|
|
33
32
|
logger = logging.getLogger(__name__)
|
34
33
|
|
35
|
-
AFFECTED_KBS = [
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
]
|
34
|
+
# AFFECTED_KBS = [
|
35
|
+
# "1efc5a33-bc5a-490c-8b47-b190beee212d",
|
36
|
+
# "f11d6eb9-da5e-4519-ac3d-e304bfa5c354",
|
37
|
+
# "096d9070-f7be-40c8-a24c-19c89072e3ff",
|
38
|
+
# "848f01bc-341a-4346-b473-6b11b76b26eb",
|
39
|
+
# ]
|
41
40
|
|
42
41
|
|
43
42
|
async def migrate(context: ExecutionContext) -> None: ...
|
44
43
|
|
45
44
|
|
46
45
|
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
47
|
-
|
48
|
-
|
49
|
-
|
46
|
+
"""
|
47
|
+
We only need 1 rollover migration defined at a time; otherwise, we will
|
48
|
+
possibly run many for a kb when we only ever need to run one
|
49
|
+
"""
|
@@ -26,7 +26,6 @@ Tikv doesn't really like scanning a lot of keys, so we need to materialize the l
|
|
26
26
|
|
27
27
|
import logging
|
28
28
|
|
29
|
-
from nucliadb.common import datamanagers
|
30
29
|
from nucliadb.migrator.context import ExecutionContext
|
31
30
|
|
32
31
|
logger = logging.getLogger(__name__)
|
@@ -35,18 +34,4 @@ logger = logging.getLogger(__name__)
|
|
35
34
|
async def migrate(context: ExecutionContext) -> None: ...
|
36
35
|
|
37
36
|
|
38
|
-
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
39
|
-
async with context.kv_driver.transaction() as txn:
|
40
|
-
labelset_list = await datamanagers.labels._get_labelset_ids(txn, kbid=kbid)
|
41
|
-
if labelset_list is not None:
|
42
|
-
logger.info("No need for labelset list migration", extra={"kbid": kbid})
|
43
|
-
return
|
44
|
-
|
45
|
-
labelset_list = await datamanagers.labels._deprecated_scan_labelset_ids(
|
46
|
-
txn, kbid=kbid
|
47
|
-
)
|
48
|
-
await datamanagers.labels._set_labelset_ids(
|
49
|
-
txn, kbid=kbid, labelsets=labelset_list
|
50
|
-
)
|
51
|
-
logger.info("Labelset list migrated", extra={"kbid": kbid})
|
52
|
-
await txn.commit()
|
37
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None: ...
|
@@ -23,25 +23,20 @@ Allow targeted rollover of KBs
|
|
23
23
|
"""
|
24
24
|
|
25
25
|
import logging
|
26
|
-
import os
|
27
26
|
|
28
|
-
from nucliadb.common.cluster.rollover import rollover_kb_shards
|
29
27
|
from nucliadb.migrator.context import ExecutionContext
|
30
28
|
|
31
29
|
logger = logging.getLogger(__name__)
|
32
30
|
|
33
31
|
|
34
|
-
AFFECTED_KBS = [
|
35
|
-
kbid.strip()
|
36
|
-
for kbid in os.environ.get("ROLLOVER_KBS", "").split(",")
|
37
|
-
if kbid.strip()
|
38
|
-
]
|
32
|
+
# AFFECTED_KBS = [kbid.strip() for kbid in os.environ.get("ROLLOVER_KBS", "").split(",") if kbid.strip()]
|
39
33
|
|
40
34
|
|
41
35
|
async def migrate(context: ExecutionContext) -> None: ...
|
42
36
|
|
43
37
|
|
44
38
|
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
45
|
-
|
46
|
-
|
47
|
-
|
39
|
+
"""
|
40
|
+
We only need 1 rollover migration defined at a time; otherwise, we will
|
41
|
+
possibly run many for a kb when we only ever need to run one
|
42
|
+
"""
|
@@ -23,9 +23,7 @@ Allow targeted rollover of KBs
|
|
23
23
|
"""
|
24
24
|
|
25
25
|
import logging
|
26
|
-
import os
|
27
26
|
|
28
|
-
from nucliadb.common.cluster.rollover import rollover_kb_shards
|
29
27
|
from nucliadb.migrator.context import ExecutionContext
|
30
28
|
|
31
29
|
logger = logging.getLogger(__name__)
|
@@ -35,6 +33,7 @@ async def migrate(context: ExecutionContext) -> None: ...
|
|
35
33
|
|
36
34
|
|
37
35
|
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
38
|
-
|
39
|
-
|
40
|
-
|
36
|
+
"""
|
37
|
+
We only need 1 rollover migration defined at a time; otherwise, we will
|
38
|
+
possibly run many for a kb when we only ever need to run one
|
39
|
+
"""
|
@@ -24,25 +24,20 @@ Targeted rollover for a specific KB
|
|
24
24
|
"""
|
25
25
|
|
26
26
|
import logging
|
27
|
-
import os
|
28
27
|
|
29
|
-
from nucliadb.common.cluster.rollover import rollover_kb_shards
|
30
28
|
from nucliadb.migrator.context import ExecutionContext
|
31
29
|
|
32
30
|
logger = logging.getLogger(__name__)
|
33
31
|
|
34
32
|
|
35
|
-
AFFECTED_KBS = [
|
36
|
-
kbid.strip()
|
37
|
-
for kbid in os.environ.get("ROLLOVER_KBS", "").split(",")
|
38
|
-
if kbid.strip()
|
39
|
-
]
|
33
|
+
# AFFECTED_KBS = [kbid.strip() for kbid in os.environ.get("ROLLOVER_KBS", "").split(",") if kbid.strip()]
|
40
34
|
|
41
35
|
|
42
36
|
async def migrate(context: ExecutionContext) -> None: ...
|
43
37
|
|
44
38
|
|
45
39
|
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
46
|
-
|
47
|
-
|
48
|
-
|
40
|
+
"""
|
41
|
+
We only need 1 rollover migration defined at a time; otherwise, we will
|
42
|
+
possibly run many for a kb when we only ever need to run one
|
43
|
+
"""
|
@@ -25,10 +25,6 @@ Targeted rollover for a specific KBs which still don't have the latest version o
|
|
25
25
|
|
26
26
|
import logging
|
27
27
|
|
28
|
-
from nucliadb_protos.noderesources_pb2 import ShardCreated
|
29
|
-
|
30
|
-
from nucliadb.common import datamanagers
|
31
|
-
from nucliadb.common.cluster.rollover import rollover_kb_shards
|
32
28
|
from nucliadb.migrator.context import ExecutionContext
|
33
29
|
|
34
30
|
logger = logging.getLogger(__name__)
|
@@ -41,29 +37,30 @@ async def migrate(context: ExecutionContext) -> None: ...
|
|
41
37
|
|
42
38
|
|
43
39
|
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
40
|
+
"""
|
41
|
+
We only need 1 rollover migration defined at a time; otherwise, we will
|
42
|
+
possibly run many for a kb when we only ever need to run one
|
43
|
+
"""
|
44
|
+
# try:
|
45
|
+
# if await has_old_paragraphs_index(context, kbid):
|
46
|
+
# logger.info("Rolling over affected KB", extra={"kbid": kbid})
|
47
|
+
# await rollover_kb_index(context, kbid)
|
48
|
+
# else:
|
49
|
+
# logger.info(
|
50
|
+
# "KB already has the latest version of the paragraphs index, skipping rollover",
|
51
|
+
# extra={"kbid": kbid},
|
52
|
+
# )
|
53
|
+
# except ShardsObjectNotFound:
|
54
|
+
# logger.warning("KB not found, skipping rollover", extra={"kbid": kbid})
|
55
55
|
|
56
56
|
|
57
|
-
async def has_old_paragraphs_index(context: ExecutionContext, kbid: str) -> bool:
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
):
|
68
|
-
return True
|
69
|
-
return False
|
57
|
+
# async def has_old_paragraphs_index(context: ExecutionContext, kbid: str) -> bool:
|
58
|
+
# async with context.kv_driver.transaction(read_only=True) as txn:
|
59
|
+
# shards_object = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=False)
|
60
|
+
# if not shards_object:
|
61
|
+
# raise ShardsObjectNotFound()
|
62
|
+
# for shard in shards_object.shards:
|
63
|
+
# for replica in shard.replicas:
|
64
|
+
# if replica.shard.paragraph_service != ShardCreated.ParagraphService.PARAGRAPH_V2:
|
65
|
+
# return True
|
66
|
+
# return False
|
@@ -42,7 +42,7 @@ async def migrate(context: ExecutionContext) -> None: ...
|
|
42
42
|
|
43
43
|
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
44
44
|
async with context.kv_driver.transaction() as txn:
|
45
|
-
shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
|
45
|
+
shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid, for_update=True)
|
46
46
|
if shards is None:
|
47
47
|
logger.error("KB without shards", extra={"kbid": kbid})
|
48
48
|
return
|
@@ -52,9 +52,7 @@ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
|
52
52
|
shards.shards[shards.actual].read_only = False
|
53
53
|
|
54
54
|
# just ensure we're writing it correctly
|
55
|
-
assert [shard_object.read_only for shard_object in shards.shards].count(
|
56
|
-
False
|
57
|
-
) == 1
|
55
|
+
assert [shard_object.read_only for shard_object in shards.shards].count(False) == 1
|
58
56
|
|
59
57
|
await datamanagers.cluster.update_kb_shards(txn, kbid=kbid, shards=shards)
|
60
58
|
await txn.commit()
|
@@ -18,17 +18,17 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
"""Migration #
|
21
|
+
"""Migration #18
|
22
22
|
|
23
23
|
Due to a bug on backend services, some kbslugs were not properly deleted and got
|
24
24
|
orphan. Let's delete them!
|
25
25
|
|
26
26
|
"""
|
27
|
+
|
27
28
|
import logging
|
28
29
|
|
29
30
|
from nucliadb.common import datamanagers
|
30
31
|
from nucliadb.common.datamanagers.kb import KB_SLUGS_BASE
|
31
|
-
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
32
32
|
from nucliadb.migrator.context import ExecutionContext
|
33
33
|
|
34
34
|
logger = logging.getLogger(__name__)
|
@@ -36,14 +36,12 @@ logger = logging.getLogger(__name__)
|
|
36
36
|
|
37
37
|
async def migrate(context: ExecutionContext) -> None:
|
38
38
|
async with context.kv_driver.transaction() as txn:
|
39
|
-
async for key in txn.keys(KB_SLUGS_BASE
|
39
|
+
async for key in txn.keys(KB_SLUGS_BASE):
|
40
40
|
slug = key.replace(KB_SLUGS_BASE, "")
|
41
|
-
value = await txn.get(key)
|
41
|
+
value = await txn.get(key, for_update=False)
|
42
42
|
if value is None:
|
43
43
|
# KB with slug but without uuid? Seems wrong, let's remove it too
|
44
|
-
logger.info(
|
45
|
-
"Removing /kbslugs with empty value", extra={"maindb_key": key}
|
46
|
-
)
|
44
|
+
logger.info("Removing /kbslugs with empty value", extra={"maindb_key": key})
|
47
45
|
await txn.delete(key)
|
48
46
|
continue
|
49
47
|
|
@@ -25,10 +25,6 @@ Targeted rollover for a specific KBs which still don't have the latest version o
|
|
25
25
|
|
26
26
|
import logging
|
27
27
|
|
28
|
-
from nucliadb_protos.noderesources_pb2 import ShardCreated
|
29
|
-
|
30
|
-
from nucliadb.common import datamanagers
|
31
|
-
from nucliadb.common.cluster.rollover import rollover_kb_shards
|
32
28
|
from nucliadb.migrator.context import ExecutionContext
|
33
29
|
|
34
30
|
logger = logging.getLogger(__name__)
|
@@ -41,29 +37,30 @@ async def migrate(context: ExecutionContext) -> None: ...
|
|
41
37
|
|
42
38
|
|
43
39
|
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
40
|
+
"""
|
41
|
+
We only need 1 rollover migration defined at a time; otherwise, we will
|
42
|
+
possibly run many for a kb when we only ever need to run one
|
43
|
+
"""
|
44
|
+
# try:
|
45
|
+
# if await has_old_paragraphs_index(context, kbid):
|
46
|
+
# logger.info("Rolling over affected KB", extra={"kbid": kbid})
|
47
|
+
# await rollover_kb_index(context, kbid)
|
48
|
+
# else:
|
49
|
+
# logger.info(
|
50
|
+
# "KB already has the latest version of the paragraphs index, skipping rollover",
|
51
|
+
# extra={"kbid": kbid},
|
52
|
+
# )
|
53
|
+
# except ShardsObjectNotFound:
|
54
|
+
# logger.warning("KB not found, skipping rollover", extra={"kbid": kbid})
|
55
55
|
|
56
56
|
|
57
|
-
async def has_old_paragraphs_index(context: ExecutionContext, kbid: str) -> bool:
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
):
|
68
|
-
return True
|
69
|
-
return False
|
57
|
+
# async def has_old_paragraphs_index(context: ExecutionContext, kbid: str) -> bool:
|
58
|
+
# async with context.kv_driver.transaction(read_only=True) as txn:
|
59
|
+
# shards_object = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
|
60
|
+
# if not shards_object:
|
61
|
+
# raise ShardsObjectNotFound()
|
62
|
+
# for shard in shards_object.shards:
|
63
|
+
# for replica in shard.replicas:
|
64
|
+
# if replica.shard.paragraph_service != ShardCreated.ParagraphService.PARAGRAPH_V3:
|
65
|
+
# return True
|
66
|
+
# return False
|
@@ -29,7 +29,7 @@ create new shards in the remaining nodes.
|
|
29
29
|
import logging
|
30
30
|
|
31
31
|
from nucliadb.common import datamanagers
|
32
|
-
from nucliadb.common.cluster.rollover import
|
32
|
+
from nucliadb.common.cluster.rollover import rollover_kb_index
|
33
33
|
from nucliadb.common.cluster.settings import settings as cluster_settings
|
34
34
|
from nucliadb.migrator.context import ExecutionContext
|
35
35
|
|
@@ -56,11 +56,11 @@ async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
|
56
56
|
return
|
57
57
|
|
58
58
|
logger.info("Rolling over affected KB", extra={"kbid": kbid})
|
59
|
-
await
|
59
|
+
await rollover_kb_index(context, kbid, drain_nodes=drain_node_ids)
|
60
60
|
|
61
61
|
|
62
62
|
async def kb_has_shards_on_drain_nodes(kbid: str, drain_node_ids: list[str]) -> bool:
|
63
|
-
async with datamanagers.
|
63
|
+
async with datamanagers.with_ro_transaction() as txn:
|
64
64
|
shards = await datamanagers.cluster.get_kb_shards(txn, kbid=kbid)
|
65
65
|
if not shards:
|
66
66
|
logger.warning("Shards object not found", extra={"kbid": kbid})
|
@@ -17,31 +17,28 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from unittest import mock
|
21
20
|
|
22
|
-
|
21
|
+
"""Migration #21
|
23
22
|
|
24
|
-
|
25
|
-
|
23
|
+
With the new vectorsets implementation, we need to store some information on
|
24
|
+
maindb. As the key "/kbs/{kbid}/vectorsets" was already used at some point, this
|
25
|
+
migration will ensure to overwrite the key and set the new value
|
26
26
|
|
27
|
-
|
27
|
+
"""
|
28
28
|
|
29
|
+
import logging
|
29
30
|
|
30
|
-
|
31
|
-
|
32
|
-
with (
|
33
|
-
mock.patch("uvicorn.Server.run"),
|
34
|
-
mock.patch("nucliadb.standalone.run.parser", return_value=Settings()),
|
35
|
-
mock.patch(f"{STANDALONE_RUN}.get_latest_nucliadb", return_value="1.0.0"),
|
36
|
-
mock.patch("uvicorn.Server.startup"),
|
37
|
-
mock.patch(f"{STANDALONE_RUN}.run_migrations"),
|
38
|
-
):
|
39
|
-
yield
|
31
|
+
from nucliadb.common import datamanagers
|
32
|
+
from nucliadb.migrator.context import ExecutionContext
|
40
33
|
|
34
|
+
logger = logging.getLogger(__name__)
|
41
35
|
|
42
|
-
def test_run():
|
43
|
-
run()
|
44
36
|
|
37
|
+
async def migrate(context: ExecutionContext) -> None: ...
|
45
38
|
|
46
|
-
|
47
|
-
|
39
|
+
|
40
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
41
|
+
async with context.kv_driver.transaction() as txn:
|
42
|
+
logger.info(f"Overwriting vectorsets key", extra={"kbid": kbid})
|
43
|
+
await datamanagers.vectorsets.initialize(txn, kbid=kbid)
|
44
|
+
await txn.commit()
|
@@ -17,22 +17,27 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from unittest.mock import Mock
|
21
20
|
|
22
|
-
|
21
|
+
"""Migration #22
|
23
22
|
|
24
|
-
|
25
|
-
|
23
|
+
There was a bug while ingesting/indexing that made paragraphs not being properly
|
24
|
+
removed in some cases. This rollover migration ensures data is consistently
|
25
|
+
indexed.
|
26
26
|
|
27
|
+
"""
|
27
28
|
|
28
|
-
|
29
|
-
return Mount(path=path, app=Mock())
|
29
|
+
import logging
|
30
30
|
|
31
|
+
from nucliadb.migrator.context import ExecutionContext
|
31
32
|
|
32
|
-
|
33
|
-
assert is_versioned_route(get_route(path="/api/v1/search"))
|
34
|
-
assert not is_versioned_route(get_route(path="/metrics"))
|
33
|
+
logger = logging.getLogger(__name__)
|
35
34
|
|
36
35
|
|
37
|
-
def
|
38
|
-
|
36
|
+
async def migrate(context: ExecutionContext) -> None: ...
|
37
|
+
|
38
|
+
|
39
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
40
|
+
"""
|
41
|
+
We only need 1 rollover migration defined at a time; otherwise, we will
|
42
|
+
possibly run many for a kb when we only ever need to run one
|
43
|
+
"""
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #23
|
22
|
+
|
23
|
+
Backfill the data into the PG catalog
|
24
|
+
|
25
|
+
"""
|
26
|
+
|
27
|
+
import logging
|
28
|
+
from typing import cast
|
29
|
+
|
30
|
+
from nucliadb.common import datamanagers
|
31
|
+
from nucliadb.common.maindb.pg import PGDriver, PGTransaction
|
32
|
+
from nucliadb.ingest.orm.processor.pgcatalog import pgcatalog_update
|
33
|
+
from nucliadb.migrator.context import ExecutionContext
|
34
|
+
|
35
|
+
logger = logging.getLogger(__name__)
|
36
|
+
|
37
|
+
|
38
|
+
async def migrate(context: ExecutionContext) -> None: ...
|
39
|
+
|
40
|
+
|
41
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
42
|
+
if not isinstance(context.kv_driver, PGDriver):
|
43
|
+
return
|
44
|
+
|
45
|
+
BATCH_SIZE = 100
|
46
|
+
async with context.kv_driver.transaction() as txn:
|
47
|
+
txn = cast(PGTransaction, txn)
|
48
|
+
continue_sql = ""
|
49
|
+
while True:
|
50
|
+
async with txn.connection.cursor() as cur:
|
51
|
+
# Get list of resources except those already in the catalog
|
52
|
+
await cur.execute(
|
53
|
+
f"""
|
54
|
+
SELECT SPLIT_PART(key, '/', 5)::UUID FROM resources
|
55
|
+
LEFT JOIN catalog ON kbid = %s AND SPLIT_PART(key, '/', 5)::UUID = rid
|
56
|
+
WHERE key SIMILAR TO %s
|
57
|
+
AND rid IS NULL
|
58
|
+
{continue_sql}
|
59
|
+
ORDER BY key
|
60
|
+
LIMIT %s
|
61
|
+
""",
|
62
|
+
(kbid, f"/kbs/{kbid}/r/[a-f0-9]*", BATCH_SIZE),
|
63
|
+
)
|
64
|
+
resources_to_index = [r[0] for r in await cur.fetchall()]
|
65
|
+
if len(resources_to_index) == 0:
|
66
|
+
return
|
67
|
+
|
68
|
+
# Index each resource
|
69
|
+
for rid in resources_to_index:
|
70
|
+
rid = str(rid).replace("-", "")
|
71
|
+
resource = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=rid)
|
72
|
+
if resource is None:
|
73
|
+
logger.warning(f"Could not load resource {rid} for kbid {kbid}")
|
74
|
+
continue
|
75
|
+
|
76
|
+
await resource.compute_global_tags(resource.indexer)
|
77
|
+
await pgcatalog_update(txn, kbid, resource)
|
78
|
+
|
79
|
+
await txn.commit()
|
80
|
+
continue_sql = f"AND key > '/kbs/{kbid}/r/{rid}'"
|