nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/standalone/auth.py
CHANGED
@@ -81,9 +81,7 @@ class AuthHeaderAuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
81
81
|
def __init__(self, settings: Settings) -> None:
|
82
82
|
self.settings = settings
|
83
83
|
|
84
|
-
async def authenticate(
|
85
|
-
self, request: HTTPConnection
|
86
|
-
) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
84
|
+
async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
87
85
|
token_resp = await authenticate_auth_token(self.settings, request)
|
88
86
|
if token_resp is not None:
|
89
87
|
return token_resp
|
@@ -94,9 +92,7 @@ class AuthHeaderAuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
94
92
|
user = request.headers[self.settings.auth_policy_user_header]
|
95
93
|
nuclia_user: BaseUser = NucliaUser(username=user)
|
96
94
|
|
97
|
-
auth_creds = AuthCredentials(
|
98
|
-
get_mapped_roles(settings=self.settings, data={"user": user})
|
99
|
-
)
|
95
|
+
auth_creds = AuthCredentials(get_mapped_roles(settings=self.settings, data={"user": user}))
|
100
96
|
|
101
97
|
return auth_creds, nuclia_user
|
102
98
|
|
@@ -113,9 +109,7 @@ class OAuth2AuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
113
109
|
def __init__(self, settings: Settings) -> None:
|
114
110
|
self.settings = settings
|
115
111
|
|
116
|
-
async def authenticate(
|
117
|
-
self, request: HTTPConnection
|
118
|
-
) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
112
|
+
async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
119
113
|
token_resp = await authenticate_auth_token(self.settings, request)
|
120
114
|
if token_resp is not None:
|
121
115
|
return token_resp
|
@@ -133,9 +127,7 @@ class OAuth2AuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
133
127
|
try:
|
134
128
|
token_data = orjson.loads(base64.b64decode(token_split[1] + "==="))
|
135
129
|
except Exception:
|
136
|
-
logger.warning(
|
137
|
-
f"Could not parse jwt bearer token value: {token}", exc_info=True
|
138
|
-
)
|
130
|
+
logger.warning(f"Could not parse jwt bearer token value: {token}", exc_info=True)
|
139
131
|
return None
|
140
132
|
|
141
133
|
if "sub" not in token_data:
|
@@ -168,9 +160,7 @@ class BasicAuthAuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
168
160
|
def __init__(self, settings: Settings) -> None:
|
169
161
|
self.settings = settings
|
170
162
|
|
171
|
-
async def authenticate(
|
172
|
-
self, request: HTTPConnection
|
173
|
-
) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
163
|
+
async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
174
164
|
token_resp = await authenticate_auth_token(self.settings, request)
|
175
165
|
if token_resp is not None:
|
176
166
|
return token_resp
|
@@ -186,9 +176,7 @@ class BasicAuthAuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
186
176
|
user = token.split(":")[0]
|
187
177
|
|
188
178
|
nuclia_user: BaseUser = NucliaUser(username=user)
|
189
|
-
auth_creds = AuthCredentials(
|
190
|
-
get_mapped_roles(settings=self.settings, data={"user": user})
|
191
|
-
)
|
179
|
+
auth_creds = AuthCredentials(get_mapped_roles(settings=self.settings, data={"user": user}))
|
192
180
|
|
193
181
|
return auth_creds, nuclia_user
|
194
182
|
|
@@ -201,9 +189,7 @@ class UpstreamNaiveAuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
201
189
|
user_header=settings.auth_policy_user_header,
|
202
190
|
)
|
203
191
|
|
204
|
-
async def authenticate(
|
205
|
-
self, request: HTTPConnection
|
206
|
-
) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
192
|
+
async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
207
193
|
token_resp = await authenticate_auth_token(self.settings, request)
|
208
194
|
if token_resp is not None:
|
209
195
|
return token_resp
|
nucliadb/standalone/config.py
CHANGED
@@ -37,19 +37,16 @@ def config_standalone_driver(nucliadb_args: Settings):
|
|
37
37
|
)
|
38
38
|
|
39
39
|
# update global settings with arg values
|
40
|
-
for fieldname in DriverSettings.
|
40
|
+
for fieldname in DriverSettings.model_fields.keys():
|
41
41
|
setattr(ingest_settings, fieldname, getattr(nucliadb_args, fieldname))
|
42
|
-
for fieldname in StorageSettings.
|
42
|
+
for fieldname in StorageSettings.model_fields.keys():
|
43
43
|
setattr(storage_settings, fieldname, getattr(nucliadb_args, fieldname))
|
44
44
|
|
45
45
|
if ingest_settings.driver == DriverConfig.NOT_SET:
|
46
46
|
# no driver specified, for standalone, we force defaulting to local here
|
47
|
-
ingest_settings.driver = DriverConfig.
|
47
|
+
ingest_settings.driver = DriverConfig.PG
|
48
48
|
|
49
|
-
if
|
50
|
-
ingest_settings.driver == DriverConfig.LOCAL
|
51
|
-
and ingest_settings.driver_local_url is None
|
52
|
-
):
|
49
|
+
if ingest_settings.driver == DriverConfig.LOCAL and ingest_settings.driver_local_url is None:
|
53
50
|
# also provide default path for local driver when none provided
|
54
51
|
ingest_settings.driver_local_url = "./data/main"
|
55
52
|
|
@@ -57,11 +54,11 @@ def config_standalone_driver(nucliadb_args: Settings):
|
|
57
54
|
# no driver specified, for standalone, we try to automate some settings here
|
58
55
|
storage_settings.file_backend = FileBackendConfig.LOCAL
|
59
56
|
|
60
|
-
if
|
61
|
-
storage_settings.
|
62
|
-
|
63
|
-
|
64
|
-
|
57
|
+
if storage_settings.file_backend == FileBackendConfig.LOCAL:
|
58
|
+
if storage_settings.local_files is None:
|
59
|
+
storage_settings.local_files = "./data/blob"
|
60
|
+
if storage_settings.local_indexing_bucket is None:
|
61
|
+
storage_settings.local_indexing_bucket = "indexer"
|
65
62
|
|
66
63
|
if ingest_settings.driver_local_url is not None and not os.path.isdir(
|
67
64
|
ingest_settings.driver_local_url
|
@@ -34,7 +34,7 @@ from pydantic import BaseModel
|
|
34
34
|
|
35
35
|
from nucliadb.common.cluster import manager as cluster_manager
|
36
36
|
from nucliadb.standalone.settings import Settings
|
37
|
-
from nucliadb_telemetry.settings import LogSettings
|
37
|
+
from nucliadb_telemetry.settings import LogOutputType, LogSettings
|
38
38
|
|
39
39
|
MB = 1024 * 1024
|
40
40
|
CHUNK_SIZE = 2 * MB
|
@@ -70,7 +70,7 @@ class NodeInfo(BaseModel):
|
|
70
70
|
id: str
|
71
71
|
address: str
|
72
72
|
shard_count: int
|
73
|
-
primary_id: Optional[str]
|
73
|
+
primary_id: Optional[str] = None
|
74
74
|
|
75
75
|
|
76
76
|
class ClusterInfo(BaseModel):
|
@@ -86,7 +86,7 @@ async def stream_tar(app: FastAPI) -> AsyncGenerator[bytes, None]:
|
|
86
86
|
await add_cluster_info(temp_dir, tar)
|
87
87
|
settings: Settings = app.settings.copy() # type: ignore
|
88
88
|
await add_settings(temp_dir, tar, settings)
|
89
|
-
if settings.log_output_type ==
|
89
|
+
if settings.log_output_type == LogOutputType.FILE:
|
90
90
|
await add_logs(tar)
|
91
91
|
|
92
92
|
async for chunk in stream_out_tar(tar_file):
|
@@ -164,7 +164,7 @@ def _add_cluster_info_to_tar(temp_dir: str, tar: tarfile.TarFile):
|
|
164
164
|
)
|
165
165
|
cluster_info_file = os.path.join(temp_dir, "cluster_info.txt")
|
166
166
|
with open(cluster_info_file, "w") as f:
|
167
|
-
f.write(cluster_info.
|
167
|
+
f.write(cluster_info.model_dump_json(indent=4))
|
168
168
|
tar.add(cluster_info_file, arcname="cluster_info.txt")
|
169
169
|
|
170
170
|
|
@@ -177,7 +177,7 @@ def _add_settings_to_tar(temp_dir: str, tar: tarfile.TarFile, settings: Settings
|
|
177
177
|
remove_sensitive_settings(settings)
|
178
178
|
settings_file = os.path.join(temp_dir, "settings.json")
|
179
179
|
with open(settings_file, "w") as f:
|
180
|
-
f.write(settings.
|
180
|
+
f.write(settings.model_dump_json(indent=4))
|
181
181
|
tar.add(settings_file, arcname="settings.json")
|
182
182
|
|
183
183
|
|
nucliadb/standalone/lifecycle.py
CHANGED
@@ -18,48 +18,49 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import asyncio
|
21
|
+
from contextlib import asynccontextmanager
|
22
|
+
|
23
|
+
from fastapi import FastAPI
|
21
24
|
|
22
25
|
from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
|
26
|
+
from nucliadb.common.context.fastapi import inject_app_context
|
23
27
|
from nucliadb.ingest.app import initialize_grpc as initialize_ingest_grpc
|
24
28
|
from nucliadb.ingest.app import initialize_pull_workers
|
25
29
|
from nucliadb.ingest.settings import settings as ingest_settings
|
26
|
-
from nucliadb.reader.lifecycle import
|
27
|
-
from nucliadb.
|
28
|
-
from nucliadb.
|
29
|
-
from nucliadb.
|
30
|
-
from nucliadb.train.lifecycle import finalize as finalize_train
|
31
|
-
from nucliadb.train.lifecycle import initialize as initialize_train
|
32
|
-
from nucliadb.writer.lifecycle import finalize as finalize_writer
|
33
|
-
from nucliadb.writer.lifecycle import initialize as initialize_writer
|
30
|
+
from nucliadb.reader.lifecycle import lifespan as reader_lifespan
|
31
|
+
from nucliadb.search.lifecycle import lifespan as search_lifespan
|
32
|
+
from nucliadb.train.lifecycle import lifespan as train_lifespan
|
33
|
+
from nucliadb.writer.lifecycle import lifespan as writer_lifespan
|
34
34
|
from nucliadb_utils.utilities import finalize_utilities
|
35
35
|
|
36
36
|
SYNC_FINALIZERS = []
|
37
37
|
|
38
38
|
|
39
|
-
|
39
|
+
@asynccontextmanager
|
40
|
+
async def lifespan(app: FastAPI):
|
40
41
|
if ingest_settings.disable_pull_worker:
|
41
42
|
finalizers = await initialize_ingest_grpc()
|
42
43
|
else:
|
43
44
|
finalizers = await initialize_pull_workers()
|
44
45
|
SYNC_FINALIZERS.extend(finalizers)
|
45
|
-
await initialize_writer()
|
46
|
-
await initialize_reader()
|
47
|
-
await initialize_search()
|
48
|
-
await initialize_train()
|
49
|
-
await setup_cluster()
|
50
46
|
|
47
|
+
async with (
|
48
|
+
writer_lifespan(app),
|
49
|
+
reader_lifespan(app),
|
50
|
+
search_lifespan(app),
|
51
|
+
train_lifespan(app),
|
52
|
+
inject_app_context(app),
|
53
|
+
):
|
54
|
+
await setup_cluster()
|
55
|
+
|
56
|
+
yield
|
51
57
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
SYNC_FINALIZERS.clear()
|
58
|
+
for finalizer in SYNC_FINALIZERS:
|
59
|
+
if asyncio.iscoroutinefunction(finalizer):
|
60
|
+
await finalizer()
|
61
|
+
else:
|
62
|
+
finalizer()
|
63
|
+
SYNC_FINALIZERS.clear()
|
59
64
|
|
60
|
-
await finalize_writer()
|
61
|
-
await finalize_reader()
|
62
|
-
await finalize_search()
|
63
|
-
await finalize_train()
|
64
65
|
await finalize_utilities()
|
65
66
|
await teardown_cluster()
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
import asyncio
|
21
|
+
import sys
|
22
|
+
|
23
|
+
from nucliadb.common import locking
|
24
|
+
from nucliadb.common.cluster.standalone.utils import is_worker_node
|
25
|
+
from nucliadb.migrator.command import run as run_migrator
|
26
|
+
|
27
|
+
|
28
|
+
def run_migrations():
|
29
|
+
"""
|
30
|
+
Run migrations for the standalone mode.
|
31
|
+
"""
|
32
|
+
loop = asyncio.new_event_loop()
|
33
|
+
loop.run_until_complete(safe_run_migrations())
|
34
|
+
loop.close()
|
35
|
+
|
36
|
+
|
37
|
+
async def safe_run_migrations():
|
38
|
+
"""
|
39
|
+
Run migrations for the standalone mode, only if the node is a worker node.
|
40
|
+
The worker node will keep blocked until the migrations are run -- it relies
|
41
|
+
on the migrator's internal distributed lock.
|
42
|
+
"""
|
43
|
+
if not is_worker_node():
|
44
|
+
return
|
45
|
+
|
46
|
+
sys.stdout.write(
|
47
|
+
"""-------------------------------------------------
|
48
|
+
| Running Migrations for NucliaDB Standalone
|
49
|
+
-------------------------------------------------
|
50
|
+
"""
|
51
|
+
)
|
52
|
+
while True:
|
53
|
+
try:
|
54
|
+
await run_migrator(forever=False)
|
55
|
+
break
|
56
|
+
except locking.ResourceLocked:
|
57
|
+
sys.stdout.write("Another worker is already running migrations. Waiting...\n")
|
58
|
+
continue
|
nucliadb/standalone/purge.py
CHANGED
@@ -20,21 +20,22 @@
|
|
20
20
|
# Standalone purge command
|
21
21
|
import asyncio
|
22
22
|
|
23
|
-
import
|
23
|
+
import argdantic
|
24
24
|
|
25
25
|
from nucliadb.standalone.config import config_nucliadb
|
26
26
|
from nucliadb.standalone.settings import Settings
|
27
27
|
|
28
|
+
parser: argdantic.ArgParser = argdantic.ArgParser()
|
29
|
+
|
30
|
+
|
31
|
+
@parser.command(singleton=True, name="NucliaDB", help="NucliaDB Starting script")
|
32
|
+
def setting(settings: Settings) -> Settings:
|
33
|
+
return settings
|
34
|
+
|
28
35
|
|
29
36
|
def purge():
|
30
37
|
from nucliadb.purge import main
|
31
38
|
|
32
|
-
|
33
|
-
model=Settings,
|
34
|
-
prog="NucliaDB",
|
35
|
-
description="NucliaDB Starting script",
|
36
|
-
)
|
37
|
-
nucliadb_args = parser.parse_typed_args()
|
38
|
-
|
39
|
+
nucliadb_args = parser()
|
39
40
|
config_nucliadb(nucliadb_args)
|
40
41
|
asyncio.run(main())
|
File without changes
|
nucliadb/standalone/run.py
CHANGED
@@ -23,7 +23,7 @@ import os
|
|
23
23
|
import sys
|
24
24
|
from typing import Optional
|
25
25
|
|
26
|
-
import
|
26
|
+
import argdantic
|
27
27
|
import uvicorn # type: ignore
|
28
28
|
from fastapi import FastAPI
|
29
29
|
|
@@ -31,6 +31,7 @@ from nucliadb.common.cluster.settings import settings as cluster_settings
|
|
31
31
|
from nucliadb.ingest.settings import settings as ingest_settings
|
32
32
|
from nucliadb.standalone import versions
|
33
33
|
from nucliadb.standalone.config import config_nucliadb
|
34
|
+
from nucliadb.standalone.migrations import run_migrations
|
34
35
|
from nucliadb.standalone.settings import Settings
|
35
36
|
from nucliadb_telemetry import errors
|
36
37
|
from nucliadb_telemetry.fastapi import instrument_app
|
@@ -41,14 +42,17 @@ from nucliadb_utils.settings import nuclia_settings, storage_settings
|
|
41
42
|
logger = logging.getLogger(__name__)
|
42
43
|
|
43
44
|
|
45
|
+
parser: argdantic.ArgParser = argdantic.ArgParser()
|
46
|
+
|
47
|
+
|
48
|
+
@parser.command(singleton=True, name="NucliaDB", help="NucliaDB Starting script")
|
49
|
+
def setting_parser(settings: Settings) -> Settings:
|
50
|
+
return settings
|
51
|
+
|
52
|
+
|
44
53
|
def setup() -> Settings:
|
45
54
|
errors.setup_error_handling(versions.get_installed_version("nucliadb"))
|
46
|
-
|
47
|
-
model=Settings,
|
48
|
-
prog="NucliaDB",
|
49
|
-
description="NucliaDB Starting script",
|
50
|
-
)
|
51
|
-
nucliadb_args = parser.parse_typed_args()
|
55
|
+
nucliadb_args = parser()
|
52
56
|
|
53
57
|
log_settings = LogSettings(
|
54
58
|
# change default settings for standalone
|
@@ -80,6 +84,7 @@ def get_server(settings: Settings) -> tuple[FastAPI, uvicorn.Server]:
|
|
80
84
|
|
81
85
|
def run():
|
82
86
|
settings = setup()
|
87
|
+
run_migrations()
|
83
88
|
app, server = get_server(settings)
|
84
89
|
instrument_app(app, excluded_urls=["/"], metrics=True)
|
85
90
|
|
@@ -92,29 +97,31 @@ def run():
|
|
92
97
|
settings_to_output = {
|
93
98
|
"API": f"http://{settings.http_host}:{settings.http_port}/api",
|
94
99
|
"Admin UI": f"http://{settings.http_host}:{settings.http_port}/admin",
|
95
|
-
"Key-value backend": ingest_settings.driver,
|
96
|
-
"
|
97
|
-
"Cluster discovery mode": cluster_settings.cluster_discovery_mode,
|
100
|
+
"Key-value backend": ingest_settings.driver.value,
|
101
|
+
"Blob storage backend": storage_settings.file_backend.value,
|
102
|
+
"Cluster discovery mode": cluster_settings.cluster_discovery_mode.value,
|
98
103
|
"Node replicas": cluster_settings.node_replicas,
|
99
104
|
"Index data path": os.path.realpath(cluster_settings.data_path),
|
100
105
|
"Node port": cluster_settings.standalone_node_port,
|
101
|
-
"Auth policy": settings.auth_policy,
|
102
|
-
"
|
103
|
-
|
106
|
+
"Auth policy": settings.auth_policy.value,
|
107
|
+
"Node role": cluster_settings.standalone_node_role.value,
|
108
|
+
}
|
109
|
+
log_settings = {
|
110
|
+
"Log output type": settings.log_output_type.value,
|
111
|
+
"Log format type": settings.log_format_type.value,
|
112
|
+
"Log level": settings.log_level.value,
|
104
113
|
}
|
105
114
|
if settings.log_output_type == LogOutputType.FILE:
|
106
115
|
log_folder = os.path.realpath(os.path.dirname(LogSettings().access_log))
|
107
|
-
|
116
|
+
log_settings["Log folder path"] = log_folder
|
117
|
+
settings_to_output.update(log_settings)
|
108
118
|
|
109
119
|
if nuclia_settings.nuclia_service_account:
|
110
120
|
settings_to_output["NUA API key"] = "Configured ✔"
|
111
121
|
settings_to_output["NUA API zone"] = nuclia_settings.nuclia_zone
|
112
122
|
|
113
123
|
settings_to_output_fmted = "\n".join(
|
114
|
-
[
|
115
|
-
f"|| - {k}:{' ' * (27 - len(k))}{v}"
|
116
|
-
for k, v in settings_to_output.items()
|
117
|
-
]
|
124
|
+
[f"|| - {k}:{' ' * (27 - len(k))}{v}" for k, v in settings_to_output.items()]
|
118
125
|
)
|
119
126
|
|
120
127
|
installed_version = versions.installed_nucliadb()
|
nucliadb/standalone/settings.py
CHANGED
@@ -27,42 +27,40 @@ from nucliadb.ingest.settings import DriverSettings
|
|
27
27
|
from nucliadb_models.resource import NucliaDBRoles
|
28
28
|
from nucliadb_telemetry.settings import LogFormatType, LogLevel, LogOutputType
|
29
29
|
from nucliadb_utils.settings import StorageSettings
|
30
|
+
from nucliadb_utils.storages.settings import Settings as ExtendedStorageSettings
|
30
31
|
|
31
32
|
|
32
|
-
class StandaloneDiscoveryMode(
|
33
|
+
class StandaloneDiscoveryMode(Enum):
|
33
34
|
DEFAULT = "default"
|
34
35
|
MANUAL = "manual"
|
35
36
|
KUBERNETES = "kubernetes"
|
36
37
|
SINGLE_NODE = "single_node"
|
37
38
|
|
38
39
|
|
39
|
-
class AuthPolicy(
|
40
|
+
class AuthPolicy(Enum):
|
40
41
|
UPSTREAM_NAIVE = "upstream_naive"
|
41
42
|
UPSTREAM_AUTH_HEADER = "upstream_auth_header"
|
42
43
|
UPSTREAM_OAUTH2 = "upstream_oauth2"
|
43
44
|
UPSTREAM_BASICAUTH = "upstream_basicauth"
|
44
45
|
|
45
46
|
|
46
|
-
class Settings(DriverSettings, StorageSettings):
|
47
|
+
class Settings(DriverSettings, StorageSettings, ExtendedStorageSettings):
|
47
48
|
# be consistent here with DATA_PATH env var
|
48
|
-
data_path: str = pydantic.Field(
|
49
|
-
"./data/node", description="Path to node index files"
|
50
|
-
)
|
49
|
+
data_path: str = pydantic.Field("./data/node", description="Path to node index files")
|
51
50
|
|
52
51
|
# all settings here are mapped in to other env var settings used
|
53
52
|
# in the app. These are helper settings to make things easier to
|
54
53
|
# use with standalone app vs cluster app.
|
55
54
|
nua_api_key: Optional[str] = pydantic.Field(
|
56
|
-
|
55
|
+
default=None,
|
56
|
+
description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/rag/advanced/understanding/intro#get-a-nua-key", # noqa
|
57
57
|
)
|
58
|
-
zone: Optional[str] = pydantic.Field(description="Nuclia Understanding API Zone ID")
|
58
|
+
zone: Optional[str] = pydantic.Field(default=None, description="Nuclia Understanding API Zone ID")
|
59
59
|
http_host: str = pydantic.Field(default="0.0.0.0", description="HTTP Port")
|
60
60
|
http_port: int = pydantic.Field(default=8080, description="HTTP Port")
|
61
61
|
ingest_grpc_port: int = pydantic.Field(default=8030, description="Ingest GRPC Port")
|
62
62
|
train_grpc_port: int = pydantic.Field(default=8031, description="Train GRPC Port")
|
63
|
-
standalone_node_port: int = pydantic.Field(
|
64
|
-
default=10009, description="Node GRPC Port"
|
65
|
-
)
|
63
|
+
standalone_node_port: int = pydantic.Field(default=10009, description="Node GRPC Port")
|
66
64
|
|
67
65
|
auth_policy: AuthPolicy = pydantic.Field(
|
68
66
|
default=AuthPolicy.UPSTREAM_NAIVE,
|
@@ -90,9 +88,7 @@ class Settings(DriverSettings, StorageSettings):
|
|
90
88
|
description="Default role to assign to user that is authenticated \
|
91
89
|
upstream. Not used with `upstream_naive` auth policy.",
|
92
90
|
)
|
93
|
-
auth_policy_role_mapping: Optional[
|
94
|
-
dict[str, dict[str, list[NucliaDBRoles]]]
|
95
|
-
] = pydantic.Field(
|
91
|
+
auth_policy_role_mapping: Optional[dict[str, dict[str, list[NucliaDBRoles]]]] = pydantic.Field(
|
96
92
|
default=None,
|
97
93
|
description="""
|
98
94
|
Role mapping for `upstream_auth_header`, `upstream_oauth2` and `upstream_basicauth` auth policies.
|
nucliadb/standalone/versions.py
CHANGED
@@ -18,10 +18,10 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import enum
|
21
|
+
import importlib.metadata
|
21
22
|
import logging
|
22
23
|
from typing import Optional
|
23
24
|
|
24
|
-
import pkg_resources
|
25
25
|
from cachetools import TTLCache
|
26
26
|
|
27
27
|
from nucliadb.common.http_clients.pypi import PyPi
|
@@ -65,14 +65,24 @@ def is_newer_release(installed: str, latest: str) -> bool:
|
|
65
65
|
>>> is_newer_release("1.2.3", "1.2.3.post1")
|
66
66
|
False
|
67
67
|
"""
|
68
|
-
|
69
|
-
|
68
|
+
|
69
|
+
def parse_version(version: str) -> tuple[int, int, int]:
|
70
|
+
parts = version.split(".")
|
71
|
+
if len(parts) > 3:
|
72
|
+
raise ValueError(f"Invalid version string: {version}")
|
73
|
+
major = int(parts[0]) if len(parts) >= 1 else 0
|
74
|
+
minor = int(parts[1]) if len(parts) >= 2 else 0
|
75
|
+
patch = int(parts[2]) if len(parts) == 3 else 0
|
76
|
+
return (major, minor, patch)
|
77
|
+
|
78
|
+
parsed_installed = parse_version(_release(installed))
|
79
|
+
parsed_latest = parse_version(_release(latest))
|
70
80
|
return parsed_latest > parsed_installed
|
71
81
|
|
72
82
|
|
73
83
|
def _release(version: str) -> str:
|
74
84
|
"""
|
75
|
-
Strips the .postX part of the version so that
|
85
|
+
Strips the .postX part of the version so that we can compare major.minor.patch only.
|
76
86
|
|
77
87
|
>>> _release("1.2.3")
|
78
88
|
'1.2.3'
|
@@ -83,7 +93,7 @@ def _release(version: str) -> str:
|
|
83
93
|
|
84
94
|
|
85
95
|
def get_installed_version(package_name: str) -> str:
|
86
|
-
return
|
96
|
+
return importlib.metadata.distribution(package_name).version
|
87
97
|
|
88
98
|
|
89
99
|
async def get_latest_version(package: str) -> Optional[str]:
|
nucliadb/tasks/consumer.py
CHANGED
@@ -60,7 +60,9 @@ class NatsTaskConsumer:
|
|
60
60
|
async def initialize(self, context: ApplicationContext):
|
61
61
|
self.context = context
|
62
62
|
await create_nats_stream_if_not_exists(
|
63
|
-
self.context,
|
63
|
+
self.context,
|
64
|
+
self.stream.name, # type: ignore
|
65
|
+
subjects=[self.stream.subject], # type: ignore
|
64
66
|
)
|
65
67
|
await self._setup_nats_subscription()
|
66
68
|
self.initialized = True
|
@@ -128,11 +130,9 @@ class NatsTaskConsumer:
|
|
128
130
|
f"Message received: subject:{subject}, seqid: {seqid}, reply: {reply}",
|
129
131
|
extra={"consumer_name": self.name},
|
130
132
|
)
|
131
|
-
async with MessageProgressUpdater(
|
132
|
-
msg, nats_consumer_settings.nats_ack_wait * 0.66
|
133
|
-
):
|
133
|
+
async with MessageProgressUpdater(msg, nats_consumer_settings.nats_ack_wait * 0.66):
|
134
134
|
try:
|
135
|
-
task_msg = self.msg_type.
|
135
|
+
task_msg = self.msg_type.model_validate_json(msg.data)
|
136
136
|
except pydantic.ValidationError as e:
|
137
137
|
errors.capture_exception(e)
|
138
138
|
logger.error(
|
@@ -144,9 +144,7 @@ class NatsTaskConsumer:
|
|
144
144
|
await msg.ack()
|
145
145
|
return
|
146
146
|
|
147
|
-
logger.info(
|
148
|
-
f"Starting task consumption", extra={"consumer_name": self.name}
|
149
|
-
)
|
147
|
+
logger.info(f"Starting task consumption", extra={"consumer_name": self.name})
|
150
148
|
try:
|
151
149
|
await self.callback(self.context, task_msg) # type: ignore
|
152
150
|
except asyncio.CancelledError:
|
@@ -200,9 +198,7 @@ def create_consumer(
|
|
200
198
|
return consumer
|
201
199
|
|
202
200
|
|
203
|
-
async def start_consumer(
|
204
|
-
task_name: str, context: ApplicationContext
|
205
|
-
) -> NatsTaskConsumer:
|
201
|
+
async def start_consumer(task_name: str, context: ApplicationContext) -> NatsTaskConsumer:
|
206
202
|
"""
|
207
203
|
Returns an initialized consumer for the given task name, ready to consume messages from the task stream.
|
208
204
|
"""
|
@@ -214,7 +210,7 @@ async def start_consumer(
|
|
214
210
|
name=f"{task_name}_consumer",
|
215
211
|
stream=task.stream,
|
216
212
|
callback=task.callback, # type: ignore
|
217
|
-
msg_type=task.msg_type,
|
213
|
+
msg_type=task.msg_type,
|
218
214
|
max_concurrent_messages=task.max_concurrent_messages,
|
219
215
|
)
|
220
216
|
await consumer.initialize(context)
|
nucliadb/tasks/producer.py
CHANGED
@@ -44,11 +44,13 @@ class NatsTaskProducer:
|
|
44
44
|
async def initialize(self, context: ApplicationContext):
|
45
45
|
self.context = context
|
46
46
|
await create_nats_stream_if_not_exists(
|
47
|
-
self.context,
|
47
|
+
self.context,
|
48
|
+
self.stream.name, # type: ignore
|
49
|
+
subjects=[self.stream.subject], # type: ignore
|
48
50
|
)
|
49
51
|
self.initialized = True
|
50
52
|
|
51
|
-
async def __call__(self, msg: MsgType) -> int:
|
53
|
+
async def __call__(self, msg: MsgType) -> int:
|
52
54
|
"""
|
53
55
|
Publish message to the producer's nats stream.
|
54
56
|
Returns the sequence number of the published message.
|
@@ -57,7 +59,8 @@ class NatsTaskProducer:
|
|
57
59
|
raise RuntimeError("NatsTaskProducer not initialized")
|
58
60
|
try:
|
59
61
|
pub_ack = await self.context.nats_manager.js.publish( # type: ignore
|
60
|
-
self.stream.subject,
|
62
|
+
self.stream.subject, # type: ignore
|
63
|
+
msg.model_dump_json().encode("utf-8"), # type: ignore
|
61
64
|
)
|
62
65
|
logger.info(
|
63
66
|
"Message sent to Nats",
|
@@ -93,8 +96,6 @@ async def get_producer(task_name: str, context: ApplicationContext) -> NatsTaskP
|
|
93
96
|
task = get_registered_task(task_name)
|
94
97
|
except KeyError:
|
95
98
|
raise ValueError(f"Task {task_name} not registered")
|
96
|
-
producer = create_producer(
|
97
|
-
name=f"{task_name}_producer", stream=task.stream, msg_type=task.msg_type
|
98
|
-
)
|
99
|
+
producer = create_producer(name=f"{task_name}_producer", stream=task.stream, msg_type=task.msg_type)
|
99
100
|
await producer.initialize(context)
|
100
101
|
return producer
|