nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -18,6 +18,7 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import enum
|
21
|
+
from typing import Optional
|
21
22
|
|
22
23
|
from pydantic import Field
|
23
24
|
from pydantic_settings import BaseSettings
|
@@ -85,6 +86,11 @@ class Settings(BaseSettings):
|
|
85
86
|
cluster_discovery_kubernetes_selector: str = "appType=node"
|
86
87
|
cluster_discovery_manual_addresses: list[str] = []
|
87
88
|
|
89
|
+
nidx_api_address: Optional[str] = Field(default=None, description="NIDX gRPC API address")
|
90
|
+
nidx_searcher_address: Optional[str] = Field(
|
91
|
+
default=None, description="NIDX gRPC searcher API address"
|
92
|
+
)
|
93
|
+
|
88
94
|
|
89
95
|
settings = Settings()
|
90
96
|
|
@@ -30,6 +30,7 @@ from nucliadb_protos.nodereader_pb2 import (
|
|
30
30
|
DocumentItem,
|
31
31
|
EdgeList,
|
32
32
|
GetShardRequest,
|
33
|
+
IdCollection,
|
33
34
|
ParagraphItem,
|
34
35
|
ParagraphSearchRequest,
|
35
36
|
ParagraphSearchResponse,
|
@@ -46,15 +47,13 @@ from nucliadb_protos.noderesources_pb2 import (
|
|
46
47
|
EmptyResponse,
|
47
48
|
Resource,
|
48
49
|
ResourceID,
|
49
|
-
)
|
50
|
-
from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
|
51
|
-
from nucliadb_protos.noderesources_pb2 import (
|
52
50
|
ShardCreated,
|
53
51
|
ShardId,
|
54
52
|
ShardIds,
|
55
53
|
VectorSetID,
|
56
54
|
VectorSetList,
|
57
55
|
)
|
56
|
+
from nucliadb_protos.noderesources_pb2 import Shard as NodeResourcesShard
|
58
57
|
from nucliadb_protos.nodewriter_pb2 import NewShardRequest, OpStatus
|
59
58
|
|
60
59
|
from ..settings import settings
|
@@ -68,8 +67,7 @@ except ImportError: # pragma: no cover
|
|
68
67
|
IndexNodeException = Exception
|
69
68
|
|
70
69
|
try:
|
71
|
-
from nucliadb_node_binding import NodeReader
|
72
|
-
from nucliadb_node_binding import NodeWriter # type: ignore
|
70
|
+
from nucliadb_node_binding import NodeReader, NodeWriter
|
73
71
|
except ImportError: # pragma: no cover
|
74
72
|
NodeReader = None
|
75
73
|
NodeWriter = None
|
@@ -80,15 +78,11 @@ class StandaloneReaderWrapper:
|
|
80
78
|
|
81
79
|
def __init__(self):
|
82
80
|
if NodeReader is None:
|
83
|
-
raise ImportError(
|
84
|
-
"NucliaDB index node bindings are not installed (reader not found)"
|
85
|
-
)
|
81
|
+
raise ImportError("NucliaDB index node bindings are not installed (reader not found)")
|
86
82
|
self.reader = NodeReader()
|
87
83
|
self.executor = ThreadPoolExecutor(settings.local_reader_threads)
|
88
84
|
|
89
|
-
async def Search(
|
90
|
-
self, request: SearchRequest, retry: bool = False
|
91
|
-
) -> SearchResponse:
|
85
|
+
async def Search(self, request: SearchRequest, retry: bool = False) -> SearchResponse:
|
92
86
|
try:
|
93
87
|
loop = asyncio.get_running_loop()
|
94
88
|
result = await loop.run_in_executor(
|
@@ -112,30 +106,6 @@ class StandaloneReaderWrapper:
|
|
112
106
|
else:
|
113
107
|
raise
|
114
108
|
|
115
|
-
async def ParagraphSearch(
|
116
|
-
self, request: ParagraphSearchRequest
|
117
|
-
) -> ParagraphSearchResponse:
|
118
|
-
loop = asyncio.get_running_loop()
|
119
|
-
result = await loop.run_in_executor(
|
120
|
-
self.executor, self.reader.paragraph_search, request.SerializeToString()
|
121
|
-
)
|
122
|
-
pb_bytes = bytes(result)
|
123
|
-
pb = ParagraphSearchResponse()
|
124
|
-
pb.ParseFromString(pb_bytes)
|
125
|
-
return pb
|
126
|
-
|
127
|
-
async def RelationSearch(
|
128
|
-
self, request: RelationSearchRequest
|
129
|
-
) -> RelationSearchResponse:
|
130
|
-
loop = asyncio.get_running_loop()
|
131
|
-
result = await loop.run_in_executor(
|
132
|
-
self.executor, self.reader.relation_search, request.SerializeToString()
|
133
|
-
)
|
134
|
-
pb_bytes = bytes(result)
|
135
|
-
pb = RelationSearchResponse()
|
136
|
-
pb.ParseFromString(pb_bytes)
|
137
|
-
return pb
|
138
|
-
|
139
109
|
async def GetShard(self, request: GetShardRequest) -> NodeResourcesShard:
|
140
110
|
loop = asyncio.get_running_loop()
|
141
111
|
result = await loop.run_in_executor(
|
@@ -200,9 +170,7 @@ class StandaloneReaderWrapper:
|
|
200
170
|
raise exception
|
201
171
|
await loop.run_in_executor(self.executor, t1.join)
|
202
172
|
|
203
|
-
async def Paragraphs(
|
204
|
-
self, stream_request: StreamRequest
|
205
|
-
) -> AsyncIterator[ParagraphItem]:
|
173
|
+
async def Paragraphs(self, stream_request: StreamRequest) -> AsyncIterator[ParagraphItem]:
|
206
174
|
loop = asyncio.get_running_loop()
|
207
175
|
q: asyncio.Queue[ParagraphItem] = asyncio.Queue(1)
|
208
176
|
exception = None
|
@@ -248,30 +216,15 @@ class StandaloneReaderWrapper:
|
|
248
216
|
edge_list.ParseFromString(pb_bytes)
|
249
217
|
return edge_list
|
250
218
|
|
251
|
-
|
252
|
-
async def Search(self, request: SearchRequest, retry: bool = False) -> SearchResponse:
|
253
|
-
try:
|
219
|
+
async def VectorIds(self, request: VectorSetID) -> IdCollection:
|
254
220
|
loop = asyncio.get_running_loop()
|
255
221
|
result = await loop.run_in_executor(
|
256
|
-
self.executor, self.reader.
|
222
|
+
self.executor, self.reader.vector_ids, request.SerializeToString()
|
257
223
|
)
|
258
224
|
pb_bytes = bytes(result)
|
259
|
-
|
260
|
-
|
261
|
-
return
|
262
|
-
except IndexNodeException as exc:
|
263
|
-
if "IO error" not in str(exc):
|
264
|
-
# ignore any other error
|
265
|
-
raise
|
266
|
-
|
267
|
-
# try some mitigations...
|
268
|
-
logger.error(f"IndexNodeException in Search: {request}", exc_info=True)
|
269
|
-
if not retry:
|
270
|
-
# reinit?
|
271
|
-
self.reader = NodeReader()
|
272
|
-
return await self.Search(request, retry=True)
|
273
|
-
else:
|
274
|
-
raise
|
225
|
+
ids = IdCollection()
|
226
|
+
ids.ParseFromString(pb_bytes)
|
227
|
+
return ids
|
275
228
|
|
276
229
|
|
277
230
|
class StandaloneWriterWrapper:
|
@@ -280,9 +233,7 @@ class StandaloneWriterWrapper:
|
|
280
233
|
def __init__(self):
|
281
234
|
os.makedirs(settings.data_path, exist_ok=True)
|
282
235
|
if NodeWriter is None:
|
283
|
-
raise ImportError(
|
284
|
-
"NucliaDB index node bindings are not installed (writer not found)"
|
285
|
-
)
|
236
|
+
raise ImportError("NucliaDB index node bindings are not installed (writer not found)")
|
286
237
|
self.writer = NodeWriter()
|
287
238
|
self.executor = ThreadPoolExecutor(settings.local_writer_threads)
|
288
239
|
|
@@ -369,9 +320,7 @@ class StandaloneWriterWrapper:
|
|
369
320
|
|
370
321
|
async def GC(self, request: ShardId) -> EmptyResponse:
|
371
322
|
loop = asyncio.get_running_loop()
|
372
|
-
resp = await loop.run_in_executor(
|
373
|
-
self.executor, self.writer.gc, request.SerializeToString()
|
374
|
-
)
|
323
|
+
resp = await loop.run_in_executor(self.executor, self.writer.gc, request.SerializeToString())
|
375
324
|
pb_bytes = bytes(resp)
|
376
325
|
op_status = EmptyResponse()
|
377
326
|
op_status.ParseFromString(pb_bytes)
|
@@ -20,10 +20,7 @@
|
|
20
20
|
from typing import Any, Optional
|
21
21
|
|
22
22
|
from nucliadb.common.cluster.base import AbstractIndexNode
|
23
|
-
from nucliadb.common.cluster.grpc_node_dummy import
|
24
|
-
DummyReaderStub,
|
25
|
-
DummyWriterStub,
|
26
|
-
)
|
23
|
+
from nucliadb.common.cluster.grpc_node_dummy import DummyReaderStub, DummyWriterStub
|
27
24
|
from nucliadb.common.cluster.settings import settings as cluster_settings
|
28
25
|
from nucliadb.common.cluster.standalone import grpc_node_binding
|
29
26
|
from nucliadb_protos import standalone_pb2, standalone_pb2_grpc
|
@@ -79,7 +76,7 @@ class ProxyCallerWrapper:
|
|
79
76
|
else:
|
80
77
|
grpc_address = address
|
81
78
|
self._channel = get_traced_grpc_channel(grpc_address, "standalone_proxy")
|
82
|
-
self._stub = standalone_pb2_grpc.StandaloneClusterServiceStub(self._channel)
|
79
|
+
self._stub = standalone_pb2_grpc.StandaloneClusterServiceStub(self._channel)
|
83
80
|
|
84
81
|
def __getattr__(self, name):
|
85
82
|
async def call(request):
|
@@ -95,9 +92,7 @@ class ProxyCallerWrapper:
|
|
95
92
|
else:
|
96
93
|
raise NotImplementedError(f"Unknown type {self._type}")
|
97
94
|
except KeyError:
|
98
|
-
raise NotImplementedError(
|
99
|
-
f"Unknown method for type {self._type}: {name}"
|
100
|
-
)
|
95
|
+
raise NotImplementedError(f"Unknown method for type {self._type}: {name}")
|
101
96
|
return_value = return_type()
|
102
97
|
return_value.ParseFromString(resp.payload)
|
103
98
|
return return_value
|
@@ -116,9 +111,7 @@ class ProxyStandaloneIndexNode(StandaloneIndexNode):
|
|
116
111
|
available_disk: int,
|
117
112
|
dummy: bool = False,
|
118
113
|
):
|
119
|
-
super().__init__(
|
120
|
-
id, address, shard_count, available_disk=available_disk, dummy=dummy
|
121
|
-
)
|
114
|
+
super().__init__(id, address, shard_count, available_disk=available_disk, dummy=dummy)
|
122
115
|
if dummy:
|
123
116
|
return
|
124
117
|
|
@@ -32,9 +32,7 @@ from nucliadb_protos import standalone_pb2, standalone_pb2_grpc
|
|
32
32
|
from nucliadb_utils.grpc import get_traced_grpc_server
|
33
33
|
|
34
34
|
|
35
|
-
class StandaloneClusterServiceServicer(
|
36
|
-
standalone_pb2_grpc.StandaloneClusterServiceServicer
|
37
|
-
):
|
35
|
+
class StandaloneClusterServiceServicer(standalone_pb2_grpc.StandaloneClusterServiceServicer):
|
38
36
|
@backoff.on_exception(backoff.expo, (AioRpcError,), max_time=60)
|
39
37
|
async def NodeAction( # type: ignore
|
40
38
|
self, request: standalone_pb2.NodeActionRequest, context
|
@@ -61,9 +59,7 @@ class StandaloneClusterServiceServicer(
|
|
61
59
|
self, request: standalone_pb2.NodeInfoRequest, context
|
62
60
|
) -> standalone_pb2.NodeInfoResponse:
|
63
61
|
index_node = get_self()
|
64
|
-
index_node.shard_count = len(
|
65
|
-
os.listdir(os.path.join(cluster_settings.data_path, "shards"))
|
66
|
-
)
|
62
|
+
index_node.shard_count = len(os.listdir(os.path.join(cluster_settings.data_path, "shards")))
|
67
63
|
total_disk, _, available_disk = shutil.disk_usage(cluster_settings.data_path)
|
68
64
|
return standalone_pb2.NodeInfoResponse(
|
69
65
|
id=index_node.id,
|
@@ -56,9 +56,7 @@ def get_self() -> StandaloneIndexNode:
|
|
56
56
|
make another grpc request since this node can service it directly.
|
57
57
|
"""
|
58
58
|
if not is_index_node():
|
59
|
-
raise Exception(
|
60
|
-
"This node is not an Index Node. You should not reach this code path."
|
61
|
-
)
|
59
|
+
raise Exception("This node is not an Index Node. You should not reach this code path.")
|
62
60
|
global _SELF_INDEX_NODE
|
63
61
|
node_id = get_standalone_node_id()
|
64
62
|
if _SELF_INDEX_NODE is None or node_id != _SELF_INDEX_NODE.id:
|
@@ -68,9 +66,7 @@ def get_self() -> StandaloneIndexNode:
|
|
68
66
|
host = f"{hn}.{ns}"
|
69
67
|
else:
|
70
68
|
host = gethostname()
|
71
|
-
_SELF_INDEX_NODE = StandaloneIndexNode(
|
72
|
-
id=node_id, address=host, shard_count=0, available_disk=0
|
73
|
-
)
|
69
|
+
_SELF_INDEX_NODE = StandaloneIndexNode(id=node_id, address=host, shard_count=0, available_disk=0)
|
74
70
|
try:
|
75
71
|
_, _, available_disk = shutil.disk_usage(cluster_settings.data_path)
|
76
72
|
_SELF_INDEX_NODE.available_disk = available_disk
|
nucliadb/common/cluster/utils.py
CHANGED
@@ -37,7 +37,8 @@ from nucliadb.common.cluster.standalone.service import (
|
|
37
37
|
start_grpc as start_standalone_grpc,
|
38
38
|
)
|
39
39
|
from nucliadb.common.cluster.standalone.utils import is_index_node
|
40
|
-
from
|
40
|
+
from nucliadb.ingest.orm.resource import Resource
|
41
|
+
from nucliadb_protos import nodereader_pb2, writer_pb2
|
41
42
|
from nucliadb_utils import const
|
42
43
|
from nucliadb_utils.settings import is_onprem_nucliadb
|
43
44
|
from nucliadb_utils.utilities import Utility, clean_utility, get_utility, set_utility
|
@@ -119,38 +120,44 @@ async def wait_for_node(app_context: ApplicationContext, node_id: str) -> None:
|
|
119
120
|
await asyncio.sleep(sleep)
|
120
121
|
|
121
122
|
|
122
|
-
|
123
|
-
|
124
|
-
)
|
123
|
+
async def get_resource(kbid: str, resource_id: str) -> Optional[Resource]:
|
124
|
+
async with datamanagers.with_ro_transaction() as txn:
|
125
|
+
return await datamanagers.resources.get_resource(txn, kbid=kbid, rid=resource_id)
|
126
|
+
|
127
|
+
|
128
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=8)
|
129
|
+
async def get_resource_index_message(kbid: str, resource_id: str) -> Optional[nodereader_pb2.Resource]:
|
130
|
+
async with datamanagers.with_ro_transaction() as txn:
|
131
|
+
resource = await datamanagers.resources.get_resource(txn, kbid=kbid, rid=resource_id)
|
132
|
+
if resource is None:
|
133
|
+
logger.warning(
|
134
|
+
"Resource not found while indexing, skipping",
|
135
|
+
extra={"kbid": kbid, "resource_id": resource_id},
|
136
|
+
)
|
137
|
+
return None
|
138
|
+
resource_index_message = (await resource.generate_index_message(reindex=False)).brain
|
139
|
+
return resource_index_message
|
140
|
+
|
141
|
+
|
142
|
+
@backoff.on_exception(backoff.expo, (Exception,), jitter=backoff.random_jitter, max_tries=8)
|
125
143
|
async def index_resource_to_shard(
|
126
144
|
app_context: ApplicationContext,
|
127
145
|
kbid: str,
|
128
146
|
resource_id: str,
|
129
147
|
shard: writer_pb2.ShardObject,
|
130
|
-
|
148
|
+
resource_index_message: Optional[nodereader_pb2.Resource] = None,
|
149
|
+
) -> None:
|
131
150
|
logger.info("Indexing resource", extra={"kbid": kbid, "resource_id": resource_id})
|
132
|
-
|
133
151
|
sm = app_context.shard_manager
|
134
152
|
partitioning = app_context.partitioning
|
135
153
|
|
136
|
-
async with datamanagers.with_transaction() as txn:
|
137
|
-
resource_index_message = (
|
138
|
-
await datamanagers.resources.get_resource_index_message(
|
139
|
-
txn, kbid=kbid, rid=resource_id
|
140
|
-
)
|
141
|
-
)
|
142
|
-
|
143
154
|
if resource_index_message is None:
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
return None
|
155
|
+
resource_index_message = await get_resource_index_message(kbid, resource_id)
|
156
|
+
if resource_index_message is None:
|
157
|
+
return
|
158
|
+
|
149
159
|
partition = partitioning.generate_partition(kbid, resource_id)
|
150
|
-
await sm.add_resource(
|
151
|
-
shard, resource_index_message, txid=-1, partition=str(partition), kb=kbid
|
152
|
-
)
|
153
|
-
return resource_index_message
|
160
|
+
await sm.add_resource(shard, resource_index_message, txid=-1, partition=str(partition), kb=kbid)
|
154
161
|
|
155
162
|
|
156
163
|
async def delete_resource_from_shard(
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
AVG_PARAGRAPH_SIZE_BYTES = 10_000
|
@@ -24,6 +24,7 @@ from nucliadb.common.cluster.settings import in_standalone_mode
|
|
24
24
|
from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
|
25
25
|
from nucliadb.common.maindb.driver import Driver
|
26
26
|
from nucliadb.common.maindb.utils import setup_driver, teardown_driver
|
27
|
+
from nucliadb.common.nidx import start_nidx_utility, stop_nidx_utility
|
27
28
|
from nucliadb_utils.indexing import IndexingUtility
|
28
29
|
from nucliadb_utils.nats import NatsConnectionManager
|
29
30
|
from nucliadb_utils.partition import PartitionUtility
|
@@ -78,11 +79,13 @@ class ApplicationContext:
|
|
78
79
|
)
|
79
80
|
self.indexing = await start_indexing_utility()
|
80
81
|
self.transaction = await start_transaction_utility(self.service_name)
|
82
|
+
self.nidx = await start_nidx_utility()
|
81
83
|
|
82
84
|
async def finalize(self) -> None:
|
83
85
|
if not self._initialized:
|
84
86
|
return
|
85
87
|
|
88
|
+
await stop_nidx_utility()
|
86
89
|
await stop_transaction_utility()
|
87
90
|
if not in_standalone_mode():
|
88
91
|
await stop_indexing_utility()
|
@@ -18,25 +18,28 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
+
from contextlib import asynccontextmanager
|
22
|
+
|
21
23
|
from fastapi import FastAPI
|
22
24
|
from starlette.routing import Mount
|
23
25
|
|
24
26
|
from nucliadb.common.context import ApplicationContext
|
25
27
|
|
26
28
|
|
27
|
-
|
29
|
+
@asynccontextmanager
|
30
|
+
async def inject_app_context(app: FastAPI):
|
28
31
|
context = ApplicationContext()
|
29
32
|
|
30
33
|
app.state.context = context
|
31
|
-
app.add_event_handler("startup", context.initialize)
|
32
|
-
app.add_event_handler("shutdown", context.finalize)
|
33
34
|
|
34
35
|
# Need to add app context in all sub-applications
|
35
36
|
for route in app.router.routes:
|
36
37
|
if isinstance(route, Mount) and isinstance(route.app, FastAPI):
|
37
38
|
route.app.state.context = context
|
38
|
-
|
39
|
-
|
39
|
+
|
40
|
+
await context.initialize()
|
41
|
+
yield context
|
42
|
+
await context.finalize()
|
40
43
|
|
41
44
|
|
42
45
|
def get_app_context(application: FastAPI) -> ApplicationContext:
|
@@ -18,5 +18,11 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
from
|
22
|
-
|
21
|
+
from dataclasses import dataclass
|
22
|
+
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class IndexCounts:
|
26
|
+
fields: int
|
27
|
+
paragraphs: int
|
28
|
+
sentences: int
|
@@ -33,25 +33,31 @@ from . import (
|
|
33
33
|
cluster,
|
34
34
|
entities,
|
35
35
|
exceptions,
|
36
|
+
fields,
|
36
37
|
kb,
|
37
38
|
labels,
|
38
39
|
processing,
|
39
40
|
resources,
|
40
41
|
rollover,
|
41
42
|
synonyms,
|
43
|
+
vectorsets,
|
42
44
|
)
|
43
|
-
from .utils import with_transaction
|
45
|
+
from .utils import with_ro_transaction, with_rw_transaction, with_transaction
|
44
46
|
|
45
47
|
__all__ = (
|
46
48
|
"atomic",
|
47
49
|
"cluster",
|
48
50
|
"entities",
|
49
51
|
"exceptions",
|
52
|
+
"fields",
|
50
53
|
"kb",
|
51
54
|
"labels",
|
52
55
|
"processing",
|
53
56
|
"resources",
|
54
57
|
"rollover",
|
55
58
|
"synonyms",
|
59
|
+
"vectorsets",
|
56
60
|
"with_transaction",
|
61
|
+
"with_rw_transaction",
|
62
|
+
"with_ro_transaction",
|
57
63
|
)
|
@@ -39,8 +39,10 @@ import sys
|
|
39
39
|
from functools import wraps
|
40
40
|
|
41
41
|
from . import kb as kb_dm
|
42
|
+
from . import labels as labels_dm
|
42
43
|
from . import resources as resources_dm
|
43
|
-
from .
|
44
|
+
from . import synonyms as synonyms_dm
|
45
|
+
from .utils import with_ro_transaction, with_transaction
|
44
46
|
|
45
47
|
# XXX: we are using the not exported _ParamSpec to support 3.9. Whenever we
|
46
48
|
# upgrade to >= 3.10 we'll be able to use ParamSpecKwargs and improve the
|
@@ -49,7 +51,7 @@ from .utils import with_transaction
|
|
49
51
|
|
50
52
|
__python_version = (sys.version_info.major, sys.version_info.minor)
|
51
53
|
if __python_version == (3, 9):
|
52
|
-
from
|
54
|
+
from typing_extensions import ParamSpec
|
53
55
|
else:
|
54
56
|
from typing import ParamSpec # type: ignore
|
55
57
|
|
@@ -59,7 +61,7 @@ P = ParamSpec("P")
|
|
59
61
|
def ro_txn_wrap(fun: P) -> P: # type: ignore
|
60
62
|
@wraps(fun)
|
61
63
|
async def wrapper(**kwargs: P.kwargs):
|
62
|
-
async with
|
64
|
+
async with with_ro_transaction() as txn:
|
63
65
|
return await fun(txn, **kwargs)
|
64
66
|
|
65
67
|
return wrapper
|
@@ -69,16 +71,32 @@ def rw_txn_wrap(fun: P) -> P: # type: ignore
|
|
69
71
|
@wraps(fun)
|
70
72
|
async def wrapper(**kwargs: P.kwargs):
|
71
73
|
async with with_transaction() as txn:
|
72
|
-
|
74
|
+
result = await fun(txn, **kwargs)
|
75
|
+
await txn.commit()
|
76
|
+
return result
|
73
77
|
|
74
78
|
return wrapper
|
75
79
|
|
76
80
|
|
77
81
|
class kb:
|
78
82
|
exists_kb = ro_txn_wrap(kb_dm.exists_kb)
|
83
|
+
get_config = ro_txn_wrap(kb_dm.get_config)
|
84
|
+
get_external_index_provider_metadata = ro_txn_wrap(kb_dm.get_external_index_provider_metadata)
|
79
85
|
|
80
86
|
|
81
87
|
class resources:
|
82
88
|
get_resource_uuid_from_slug = ro_txn_wrap(resources_dm.get_resource_uuid_from_slug)
|
83
89
|
resource_exists = ro_txn_wrap(resources_dm.resource_exists)
|
84
90
|
slug_exists = ro_txn_wrap(resources_dm.slug_exists)
|
91
|
+
|
92
|
+
|
93
|
+
class labelset:
|
94
|
+
get = ro_txn_wrap(labels_dm.get_labelset)
|
95
|
+
set = rw_txn_wrap(labels_dm.set_labelset)
|
96
|
+
delete = rw_txn_wrap(labels_dm.delete_labelset)
|
97
|
+
get_all = ro_txn_wrap(labels_dm.get_labels)
|
98
|
+
|
99
|
+
|
100
|
+
class synonyms:
|
101
|
+
get = ro_txn_wrap(synonyms_dm.get)
|
102
|
+
set = rw_txn_wrap(synonyms_dm.set)
|
@@ -31,13 +31,13 @@ logger = logging.getLogger(__name__)
|
|
31
31
|
KB_SHARDS = "/kbs/{kbid}/shards"
|
32
32
|
|
33
33
|
|
34
|
-
async def get_kb_shards(
|
34
|
+
async def get_kb_shards(
|
35
|
+
txn: Transaction, *, kbid: str, for_update: bool = False
|
36
|
+
) -> Optional[writer_pb2.Shards]:
|
35
37
|
key = KB_SHARDS.format(kbid=kbid)
|
36
|
-
return await get_kv_pb(txn, key, writer_pb2.Shards)
|
38
|
+
return await get_kv_pb(txn, key, writer_pb2.Shards, for_update=for_update)
|
37
39
|
|
38
40
|
|
39
|
-
async def update_kb_shards(
|
40
|
-
txn: Transaction, *, kbid: str, shards: writer_pb2.Shards
|
41
|
-
) -> None:
|
41
|
+
async def update_kb_shards(txn: Transaction, *, kbid: str, shards: writer_pb2.Shards) -> None:
|
42
42
|
key = KB_SHARDS.format(kbid=kbid)
|
43
43
|
await txn.set(key, shards.SerializeToString())
|
@@ -85,11 +85,9 @@ async def set_entities_group(
|
|
85
85
|
await txn.set(key, entities.SerializeToString())
|
86
86
|
|
87
87
|
|
88
|
-
async def iterate_entities_groups(
|
89
|
-
txn: Transaction, *, kbid: str
|
90
|
-
) -> AsyncGenerator[str, None]:
|
88
|
+
async def iterate_entities_groups(txn: Transaction, *, kbid: str) -> AsyncGenerator[str, None]:
|
91
89
|
entities_key = KB_ENTITIES.format(kbid=kbid)
|
92
|
-
async for key in txn.keys(entities_key
|
90
|
+
async for key in txn.keys(entities_key):
|
93
91
|
group = key.split("/")[-1]
|
94
92
|
yield group
|
95
93
|
|
@@ -106,9 +104,7 @@ async def get_entities_group(
|
|
106
104
|
return eg
|
107
105
|
|
108
106
|
|
109
|
-
async def get_deleted_groups(
|
110
|
-
txn: Transaction, *, kbid: str
|
111
|
-
) -> kb_pb2.DeletedEntitiesGroups:
|
107
|
+
async def get_deleted_groups(txn: Transaction, *, kbid: str) -> kb_pb2.DeletedEntitiesGroups:
|
112
108
|
deleted_groups_key = KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid)
|
113
109
|
payload = await txn.get(deleted_groups_key)
|
114
110
|
deg = kb_pb2.DeletedEntitiesGroups()
|
@@ -122,18 +118,14 @@ async def mark_group_as_deleted(txn: Transaction, *, kbid: str, group: str) -> N
|
|
122
118
|
deg = await get_deleted_groups(txn, kbid=kbid)
|
123
119
|
if group not in deg.entities_groups:
|
124
120
|
deg.entities_groups.append(group)
|
125
|
-
await txn.set(
|
126
|
-
KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString()
|
127
|
-
)
|
121
|
+
await txn.set(KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString())
|
128
122
|
|
129
123
|
|
130
124
|
async def unmark_group_as_deleted(txn: Transaction, *, kbid: str, group: str) -> None:
|
131
125
|
deg = await get_deleted_groups(txn, kbid=kbid)
|
132
126
|
if group in deg.entities_groups:
|
133
127
|
deg.entities_groups.remove(group)
|
134
|
-
await txn.set(
|
135
|
-
KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString()
|
136
|
-
)
|
128
|
+
await txn.set(KB_DELETED_ENTITIES_GROUPS.format(kbid=kbid), deg.SerializeToString())
|
137
129
|
|
138
130
|
|
139
131
|
async def get_entities_meta_cache(txn: Transaction, *, kbid: str) -> EntitiesMetaCache:
|
@@ -143,7 +135,5 @@ async def get_entities_meta_cache(txn: Transaction, *, kbid: str) -> EntitiesMet
|
|
143
135
|
return pickle.loads(value)
|
144
136
|
|
145
137
|
|
146
|
-
async def set_entities_meta_cache(
|
147
|
-
txn: Transaction, kbid: str, cache: EntitiesMetaCache
|
148
|
-
) -> None:
|
138
|
+
async def set_entities_meta_cache(txn: Transaction, kbid: str, cache: EntitiesMetaCache) -> None:
|
149
139
|
await txn.set(KB_ENTITIES_CACHE.format(kbid=kbid), pickle.dumps(cache, protocol=5))
|