nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
nucliadb/standalone/auth.py
CHANGED
@@ -81,9 +81,7 @@ class AuthHeaderAuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
81
81
|
def __init__(self, settings: Settings) -> None:
|
82
82
|
self.settings = settings
|
83
83
|
|
84
|
-
async def authenticate(
|
85
|
-
self, request: HTTPConnection
|
86
|
-
) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
84
|
+
async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
87
85
|
token_resp = await authenticate_auth_token(self.settings, request)
|
88
86
|
if token_resp is not None:
|
89
87
|
return token_resp
|
@@ -94,9 +92,7 @@ class AuthHeaderAuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
94
92
|
user = request.headers[self.settings.auth_policy_user_header]
|
95
93
|
nuclia_user: BaseUser = NucliaUser(username=user)
|
96
94
|
|
97
|
-
auth_creds = AuthCredentials(
|
98
|
-
get_mapped_roles(settings=self.settings, data={"user": user})
|
99
|
-
)
|
95
|
+
auth_creds = AuthCredentials(get_mapped_roles(settings=self.settings, data={"user": user}))
|
100
96
|
|
101
97
|
return auth_creds, nuclia_user
|
102
98
|
|
@@ -113,9 +109,7 @@ class OAuth2AuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
113
109
|
def __init__(self, settings: Settings) -> None:
|
114
110
|
self.settings = settings
|
115
111
|
|
116
|
-
async def authenticate(
|
117
|
-
self, request: HTTPConnection
|
118
|
-
) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
112
|
+
async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
119
113
|
token_resp = await authenticate_auth_token(self.settings, request)
|
120
114
|
if token_resp is not None:
|
121
115
|
return token_resp
|
@@ -133,9 +127,7 @@ class OAuth2AuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
133
127
|
try:
|
134
128
|
token_data = orjson.loads(base64.b64decode(token_split[1] + "==="))
|
135
129
|
except Exception:
|
136
|
-
logger.warning(
|
137
|
-
f"Could not parse jwt bearer token value: {token}", exc_info=True
|
138
|
-
)
|
130
|
+
logger.warning(f"Could not parse jwt bearer token value: {token}", exc_info=True)
|
139
131
|
return None
|
140
132
|
|
141
133
|
if "sub" not in token_data:
|
@@ -168,9 +160,7 @@ class BasicAuthAuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
168
160
|
def __init__(self, settings: Settings) -> None:
|
169
161
|
self.settings = settings
|
170
162
|
|
171
|
-
async def authenticate(
|
172
|
-
self, request: HTTPConnection
|
173
|
-
) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
163
|
+
async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
174
164
|
token_resp = await authenticate_auth_token(self.settings, request)
|
175
165
|
if token_resp is not None:
|
176
166
|
return token_resp
|
@@ -186,9 +176,7 @@ class BasicAuthAuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
186
176
|
user = token.split(":")[0]
|
187
177
|
|
188
178
|
nuclia_user: BaseUser = NucliaUser(username=user)
|
189
|
-
auth_creds = AuthCredentials(
|
190
|
-
get_mapped_roles(settings=self.settings, data={"user": user})
|
191
|
-
)
|
179
|
+
auth_creds = AuthCredentials(get_mapped_roles(settings=self.settings, data={"user": user}))
|
192
180
|
|
193
181
|
return auth_creds, nuclia_user
|
194
182
|
|
@@ -201,9 +189,7 @@ class UpstreamNaiveAuthenticationBackend(NucliaCloudAuthenticationBackend):
|
|
201
189
|
user_header=settings.auth_policy_user_header,
|
202
190
|
)
|
203
191
|
|
204
|
-
async def authenticate(
|
205
|
-
self, request: HTTPConnection
|
206
|
-
) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
192
|
+
async def authenticate(self, request: HTTPConnection) -> Optional[tuple[AuthCredentials, BaseUser]]:
|
207
193
|
token_resp = await authenticate_auth_token(self.settings, request)
|
208
194
|
if token_resp is not None:
|
209
195
|
return token_resp
|
nucliadb/standalone/config.py
CHANGED
@@ -44,12 +44,9 @@ def config_standalone_driver(nucliadb_args: Settings):
|
|
44
44
|
|
45
45
|
if ingest_settings.driver == DriverConfig.NOT_SET:
|
46
46
|
# no driver specified, for standalone, we force defaulting to local here
|
47
|
-
ingest_settings.driver = DriverConfig.
|
47
|
+
ingest_settings.driver = DriverConfig.PG
|
48
48
|
|
49
|
-
if
|
50
|
-
ingest_settings.driver == DriverConfig.LOCAL
|
51
|
-
and ingest_settings.driver_local_url is None
|
52
|
-
):
|
49
|
+
if ingest_settings.driver == DriverConfig.LOCAL and ingest_settings.driver_local_url is None:
|
53
50
|
# also provide default path for local driver when none provided
|
54
51
|
ingest_settings.driver_local_url = "./data/main"
|
55
52
|
|
@@ -57,11 +54,11 @@ def config_standalone_driver(nucliadb_args: Settings):
|
|
57
54
|
# no driver specified, for standalone, we try to automate some settings here
|
58
55
|
storage_settings.file_backend = FileBackendConfig.LOCAL
|
59
56
|
|
60
|
-
if
|
61
|
-
storage_settings.
|
62
|
-
|
63
|
-
|
64
|
-
|
57
|
+
if storage_settings.file_backend == FileBackendConfig.LOCAL:
|
58
|
+
if storage_settings.local_files is None:
|
59
|
+
storage_settings.local_files = "./data/blob"
|
60
|
+
if storage_settings.local_indexing_bucket is None:
|
61
|
+
storage_settings.local_indexing_bucket = "indexer"
|
65
62
|
|
66
63
|
if ingest_settings.driver_local_url is not None and not os.path.isdir(
|
67
64
|
ingest_settings.driver_local_url
|
nucliadb/standalone/lifecycle.py
CHANGED
@@ -18,48 +18,49 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import asyncio
|
21
|
+
from contextlib import asynccontextmanager
|
22
|
+
|
23
|
+
from fastapi import FastAPI
|
21
24
|
|
22
25
|
from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
|
26
|
+
from nucliadb.common.context.fastapi import inject_app_context
|
23
27
|
from nucliadb.ingest.app import initialize_grpc as initialize_ingest_grpc
|
24
28
|
from nucliadb.ingest.app import initialize_pull_workers
|
25
29
|
from nucliadb.ingest.settings import settings as ingest_settings
|
26
|
-
from nucliadb.reader.lifecycle import
|
27
|
-
from nucliadb.
|
28
|
-
from nucliadb.
|
29
|
-
from nucliadb.
|
30
|
-
from nucliadb.train.lifecycle import finalize as finalize_train
|
31
|
-
from nucliadb.train.lifecycle import initialize as initialize_train
|
32
|
-
from nucliadb.writer.lifecycle import finalize as finalize_writer
|
33
|
-
from nucliadb.writer.lifecycle import initialize as initialize_writer
|
30
|
+
from nucliadb.reader.lifecycle import lifespan as reader_lifespan
|
31
|
+
from nucliadb.search.lifecycle import lifespan as search_lifespan
|
32
|
+
from nucliadb.train.lifecycle import lifespan as train_lifespan
|
33
|
+
from nucliadb.writer.lifecycle import lifespan as writer_lifespan
|
34
34
|
from nucliadb_utils.utilities import finalize_utilities
|
35
35
|
|
36
36
|
SYNC_FINALIZERS = []
|
37
37
|
|
38
38
|
|
39
|
-
|
39
|
+
@asynccontextmanager
|
40
|
+
async def lifespan(app: FastAPI):
|
40
41
|
if ingest_settings.disable_pull_worker:
|
41
42
|
finalizers = await initialize_ingest_grpc()
|
42
43
|
else:
|
43
44
|
finalizers = await initialize_pull_workers()
|
44
45
|
SYNC_FINALIZERS.extend(finalizers)
|
45
|
-
await initialize_writer()
|
46
|
-
await initialize_reader()
|
47
|
-
await initialize_search()
|
48
|
-
await initialize_train()
|
49
|
-
await setup_cluster()
|
50
46
|
|
47
|
+
async with (
|
48
|
+
writer_lifespan(app),
|
49
|
+
reader_lifespan(app),
|
50
|
+
search_lifespan(app),
|
51
|
+
train_lifespan(app),
|
52
|
+
inject_app_context(app),
|
53
|
+
):
|
54
|
+
await setup_cluster()
|
55
|
+
|
56
|
+
yield
|
51
57
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
SYNC_FINALIZERS.clear()
|
58
|
+
for finalizer in SYNC_FINALIZERS:
|
59
|
+
if asyncio.iscoroutinefunction(finalizer):
|
60
|
+
await finalizer()
|
61
|
+
else:
|
62
|
+
finalizer()
|
63
|
+
SYNC_FINALIZERS.clear()
|
59
64
|
|
60
|
-
await finalize_writer()
|
61
|
-
await finalize_reader()
|
62
|
-
await finalize_search()
|
63
|
-
await finalize_train()
|
64
65
|
await finalize_utilities()
|
65
66
|
await teardown_cluster()
|
@@ -54,7 +54,5 @@ async def safe_run_migrations():
|
|
54
54
|
await run_migrator(forever=False)
|
55
55
|
break
|
56
56
|
except locking.ResourceLocked:
|
57
|
-
sys.stdout.write(
|
58
|
-
"Another worker is already running migrations. Waiting...\n"
|
59
|
-
)
|
57
|
+
sys.stdout.write("Another worker is already running migrations. Waiting...\n")
|
60
58
|
continue
|
nucliadb/standalone/purge.py
CHANGED
@@ -25,7 +25,7 @@ import argdantic
|
|
25
25
|
from nucliadb.standalone.config import config_nucliadb
|
26
26
|
from nucliadb.standalone.settings import Settings
|
27
27
|
|
28
|
-
parser = argdantic.ArgParser()
|
28
|
+
parser: argdantic.ArgParser = argdantic.ArgParser()
|
29
29
|
|
30
30
|
|
31
31
|
@parser.command(singleton=True, name="NucliaDB", help="NucliaDB Starting script")
|
File without changes
|
nucliadb/standalone/run.py
CHANGED
@@ -42,7 +42,7 @@ from nucliadb_utils.settings import nuclia_settings, storage_settings
|
|
42
42
|
logger = logging.getLogger(__name__)
|
43
43
|
|
44
44
|
|
45
|
-
parser = argdantic.ArgParser()
|
45
|
+
parser: argdantic.ArgParser = argdantic.ArgParser()
|
46
46
|
|
47
47
|
|
48
48
|
@parser.command(singleton=True, name="NucliaDB", help="NucliaDB Starting script")
|
@@ -98,7 +98,7 @@ def run():
|
|
98
98
|
"API": f"http://{settings.http_host}:{settings.http_port}/api",
|
99
99
|
"Admin UI": f"http://{settings.http_host}:{settings.http_port}/admin",
|
100
100
|
"Key-value backend": ingest_settings.driver.value,
|
101
|
-
"
|
101
|
+
"Blob storage backend": storage_settings.file_backend.value,
|
102
102
|
"Cluster discovery mode": cluster_settings.cluster_discovery_mode.value,
|
103
103
|
"Node replicas": cluster_settings.node_replicas,
|
104
104
|
"Index data path": os.path.realpath(cluster_settings.data_path),
|
@@ -121,10 +121,7 @@ def run():
|
|
121
121
|
settings_to_output["NUA API zone"] = nuclia_settings.nuclia_zone
|
122
122
|
|
123
123
|
settings_to_output_fmted = "\n".join(
|
124
|
-
[
|
125
|
-
f"|| - {k}:{' ' * (27 - len(k))}{v}"
|
126
|
-
for k, v in settings_to_output.items()
|
127
|
-
]
|
124
|
+
[f"|| - {k}:{' ' * (27 - len(k))}{v}" for k, v in settings_to_output.items()]
|
128
125
|
)
|
129
126
|
|
130
127
|
installed_version = versions.installed_nucliadb()
|
nucliadb/standalone/settings.py
CHANGED
@@ -27,6 +27,7 @@ from nucliadb.ingest.settings import DriverSettings
|
|
27
27
|
from nucliadb_models.resource import NucliaDBRoles
|
28
28
|
from nucliadb_telemetry.settings import LogFormatType, LogLevel, LogOutputType
|
29
29
|
from nucliadb_utils.settings import StorageSettings
|
30
|
+
from nucliadb_utils.storages.settings import Settings as ExtendedStorageSettings
|
30
31
|
|
31
32
|
|
32
33
|
class StandaloneDiscoveryMode(Enum):
|
@@ -43,29 +44,23 @@ class AuthPolicy(Enum):
|
|
43
44
|
UPSTREAM_BASICAUTH = "upstream_basicauth"
|
44
45
|
|
45
46
|
|
46
|
-
class Settings(DriverSettings, StorageSettings):
|
47
|
+
class Settings(DriverSettings, StorageSettings, ExtendedStorageSettings):
|
47
48
|
# be consistent here with DATA_PATH env var
|
48
|
-
data_path: str = pydantic.Field(
|
49
|
-
"./data/node", description="Path to node index files"
|
50
|
-
)
|
49
|
+
data_path: str = pydantic.Field("./data/node", description="Path to node index files")
|
51
50
|
|
52
51
|
# all settings here are mapped in to other env var settings used
|
53
52
|
# in the app. These are helper settings to make things easier to
|
54
53
|
# use with standalone app vs cluster app.
|
55
54
|
nua_api_key: Optional[str] = pydantic.Field(
|
56
55
|
default=None,
|
57
|
-
description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/
|
58
|
-
)
|
59
|
-
zone: Optional[str] = pydantic.Field(
|
60
|
-
default=None, description="Nuclia Understanding API Zone ID"
|
56
|
+
description="Nuclia Understanding API Key. Read how to generate a NUA Key here: https://docs.nuclia.dev/docs/rag/advanced/understanding/intro#get-a-nua-key", # noqa
|
61
57
|
)
|
58
|
+
zone: Optional[str] = pydantic.Field(default=None, description="Nuclia Understanding API Zone ID")
|
62
59
|
http_host: str = pydantic.Field(default="0.0.0.0", description="HTTP Port")
|
63
60
|
http_port: int = pydantic.Field(default=8080, description="HTTP Port")
|
64
61
|
ingest_grpc_port: int = pydantic.Field(default=8030, description="Ingest GRPC Port")
|
65
62
|
train_grpc_port: int = pydantic.Field(default=8031, description="Train GRPC Port")
|
66
|
-
standalone_node_port: int = pydantic.Field(
|
67
|
-
default=10009, description="Node GRPC Port"
|
68
|
-
)
|
63
|
+
standalone_node_port: int = pydantic.Field(default=10009, description="Node GRPC Port")
|
69
64
|
|
70
65
|
auth_policy: AuthPolicy = pydantic.Field(
|
71
66
|
default=AuthPolicy.UPSTREAM_NAIVE,
|
@@ -93,10 +88,9 @@ class Settings(DriverSettings, StorageSettings):
|
|
93
88
|
description="Default role to assign to user that is authenticated \
|
94
89
|
upstream. Not used with `upstream_naive` auth policy.",
|
95
90
|
)
|
96
|
-
auth_policy_role_mapping: Optional[dict[str, dict[str, list[NucliaDBRoles]]]] = (
|
97
|
-
|
98
|
-
|
99
|
-
description="""
|
91
|
+
auth_policy_role_mapping: Optional[dict[str, dict[str, list[NucliaDBRoles]]]] = pydantic.Field(
|
92
|
+
default=None,
|
93
|
+
description="""
|
100
94
|
Role mapping for `upstream_auth_header`, `upstream_oauth2` and `upstream_basicauth` auth policies.
|
101
95
|
Allows mapping different properties from the auth request to a role.
|
102
96
|
Available roles are: `READER`, `WRITER`, `MANAGER`.
|
@@ -106,7 +100,6 @@ Examples:
|
|
106
100
|
- `{"group": {"managers": "MANAGER"}}` will map the users that have a `group` claim of
|
107
101
|
`managers` on the jwt provided by upstream to the role `MANAGER` on `upstream_oauth2` policies.
|
108
102
|
""",
|
109
|
-
)
|
110
103
|
)
|
111
104
|
|
112
105
|
jwk_key: Optional[str] = pydantic.Field(
|
nucliadb/standalone/versions.py
CHANGED
@@ -18,10 +18,10 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import enum
|
21
|
+
import importlib.metadata
|
21
22
|
import logging
|
22
23
|
from typing import Optional
|
23
24
|
|
24
|
-
import pkg_resources
|
25
25
|
from cachetools import TTLCache
|
26
26
|
|
27
27
|
from nucliadb.common.http_clients.pypi import PyPi
|
@@ -65,14 +65,24 @@ def is_newer_release(installed: str, latest: str) -> bool:
|
|
65
65
|
>>> is_newer_release("1.2.3", "1.2.3.post1")
|
66
66
|
False
|
67
67
|
"""
|
68
|
-
|
69
|
-
|
68
|
+
|
69
|
+
def parse_version(version: str) -> tuple[int, int, int]:
|
70
|
+
parts = version.split(".")
|
71
|
+
if len(parts) > 3:
|
72
|
+
raise ValueError(f"Invalid version string: {version}")
|
73
|
+
major = int(parts[0]) if len(parts) >= 1 else 0
|
74
|
+
minor = int(parts[1]) if len(parts) >= 2 else 0
|
75
|
+
patch = int(parts[2]) if len(parts) == 3 else 0
|
76
|
+
return (major, minor, patch)
|
77
|
+
|
78
|
+
parsed_installed = parse_version(_release(installed))
|
79
|
+
parsed_latest = parse_version(_release(latest))
|
70
80
|
return parsed_latest > parsed_installed
|
71
81
|
|
72
82
|
|
73
83
|
def _release(version: str) -> str:
|
74
84
|
"""
|
75
|
-
Strips the .postX part of the version so that
|
85
|
+
Strips the .postX part of the version so that we can compare major.minor.patch only.
|
76
86
|
|
77
87
|
>>> _release("1.2.3")
|
78
88
|
'1.2.3'
|
@@ -83,7 +93,7 @@ def _release(version: str) -> str:
|
|
83
93
|
|
84
94
|
|
85
95
|
def get_installed_version(package_name: str) -> str:
|
86
|
-
return
|
96
|
+
return importlib.metadata.distribution(package_name).version
|
87
97
|
|
88
98
|
|
89
99
|
async def get_latest_version(package: str) -> Optional[str]:
|
nucliadb/tasks/consumer.py
CHANGED
@@ -60,7 +60,9 @@ class NatsTaskConsumer:
|
|
60
60
|
async def initialize(self, context: ApplicationContext):
|
61
61
|
self.context = context
|
62
62
|
await create_nats_stream_if_not_exists(
|
63
|
-
self.context,
|
63
|
+
self.context,
|
64
|
+
self.stream.name, # type: ignore
|
65
|
+
subjects=[self.stream.subject], # type: ignore
|
64
66
|
)
|
65
67
|
await self._setup_nats_subscription()
|
66
68
|
self.initialized = True
|
@@ -128,11 +130,9 @@ class NatsTaskConsumer:
|
|
128
130
|
f"Message received: subject:{subject}, seqid: {seqid}, reply: {reply}",
|
129
131
|
extra={"consumer_name": self.name},
|
130
132
|
)
|
131
|
-
async with MessageProgressUpdater(
|
132
|
-
msg, nats_consumer_settings.nats_ack_wait * 0.66
|
133
|
-
):
|
133
|
+
async with MessageProgressUpdater(msg, nats_consumer_settings.nats_ack_wait * 0.66):
|
134
134
|
try:
|
135
|
-
task_msg = self.msg_type.
|
135
|
+
task_msg = self.msg_type.model_validate_json(msg.data)
|
136
136
|
except pydantic.ValidationError as e:
|
137
137
|
errors.capture_exception(e)
|
138
138
|
logger.error(
|
@@ -144,9 +144,7 @@ class NatsTaskConsumer:
|
|
144
144
|
await msg.ack()
|
145
145
|
return
|
146
146
|
|
147
|
-
logger.info(
|
148
|
-
f"Starting task consumption", extra={"consumer_name": self.name}
|
149
|
-
)
|
147
|
+
logger.info(f"Starting task consumption", extra={"consumer_name": self.name})
|
150
148
|
try:
|
151
149
|
await self.callback(self.context, task_msg) # type: ignore
|
152
150
|
except asyncio.CancelledError:
|
@@ -200,9 +198,7 @@ def create_consumer(
|
|
200
198
|
return consumer
|
201
199
|
|
202
200
|
|
203
|
-
async def start_consumer(
|
204
|
-
task_name: str, context: ApplicationContext
|
205
|
-
) -> NatsTaskConsumer:
|
201
|
+
async def start_consumer(task_name: str, context: ApplicationContext) -> NatsTaskConsumer:
|
206
202
|
"""
|
207
203
|
Returns an initialized consumer for the given task name, ready to consume messages from the task stream.
|
208
204
|
"""
|
@@ -214,7 +210,7 @@ async def start_consumer(
|
|
214
210
|
name=f"{task_name}_consumer",
|
215
211
|
stream=task.stream,
|
216
212
|
callback=task.callback, # type: ignore
|
217
|
-
msg_type=task.msg_type,
|
213
|
+
msg_type=task.msg_type,
|
218
214
|
max_concurrent_messages=task.max_concurrent_messages,
|
219
215
|
)
|
220
216
|
await consumer.initialize(context)
|
nucliadb/tasks/producer.py
CHANGED
@@ -44,11 +44,13 @@ class NatsTaskProducer:
|
|
44
44
|
async def initialize(self, context: ApplicationContext):
|
45
45
|
self.context = context
|
46
46
|
await create_nats_stream_if_not_exists(
|
47
|
-
self.context,
|
47
|
+
self.context,
|
48
|
+
self.stream.name, # type: ignore
|
49
|
+
subjects=[self.stream.subject], # type: ignore
|
48
50
|
)
|
49
51
|
self.initialized = True
|
50
52
|
|
51
|
-
async def __call__(self, msg: MsgType) -> int:
|
53
|
+
async def __call__(self, msg: MsgType) -> int:
|
52
54
|
"""
|
53
55
|
Publish message to the producer's nats stream.
|
54
56
|
Returns the sequence number of the published message.
|
@@ -57,7 +59,8 @@ class NatsTaskProducer:
|
|
57
59
|
raise RuntimeError("NatsTaskProducer not initialized")
|
58
60
|
try:
|
59
61
|
pub_ack = await self.context.nats_manager.js.publish( # type: ignore
|
60
|
-
self.stream.subject,
|
62
|
+
self.stream.subject, # type: ignore
|
63
|
+
msg.model_dump_json().encode("utf-8"), # type: ignore
|
61
64
|
)
|
62
65
|
logger.info(
|
63
66
|
"Message sent to Nats",
|
@@ -93,8 +96,6 @@ async def get_producer(task_name: str, context: ApplicationContext) -> NatsTaskP
|
|
93
96
|
task = get_registered_task(task_name)
|
94
97
|
except KeyError:
|
95
98
|
raise ValueError(f"Task {task_name} not registered")
|
96
|
-
producer = create_producer(
|
97
|
-
name=f"{task_name}_producer", stream=task.stream, msg_type=task.msg_type
|
98
|
-
)
|
99
|
+
producer = create_producer(name=f"{task_name}_producer", stream=task.stream, msg_type=task.msg_type)
|
99
100
|
await producer.initialize(context)
|
100
101
|
return producer
|
nucliadb/tests/config.py
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
# This is a test fixture which is useful outside nucliadb tests. In particular
|
22
|
+
# it is used for the testbed. Keeping it under src so it can be imported outside
|
23
|
+
def reset_config():
|
24
|
+
from nucliadb.common.cluster import settings as cluster_settings
|
25
|
+
from nucliadb.ingest import settings as ingest_settings
|
26
|
+
from nucliadb.train import settings as train_settings
|
27
|
+
from nucliadb.writer import settings as writer_settings
|
28
|
+
from nucliadb_utils import settings as utils_settings
|
29
|
+
from nucliadb_utils.cache import settings as cache_settings
|
30
|
+
|
31
|
+
all_settings = [
|
32
|
+
cluster_settings.settings,
|
33
|
+
ingest_settings.settings,
|
34
|
+
train_settings.settings,
|
35
|
+
writer_settings.settings,
|
36
|
+
cache_settings.settings,
|
37
|
+
utils_settings.audit_settings,
|
38
|
+
utils_settings.http_settings,
|
39
|
+
utils_settings.indexing_settings,
|
40
|
+
utils_settings.nuclia_settings,
|
41
|
+
utils_settings.nucliadb_settings,
|
42
|
+
utils_settings.storage_settings,
|
43
|
+
utils_settings.transaction_settings,
|
44
|
+
]
|
45
|
+
for settings in all_settings:
|
46
|
+
defaults = type(settings)()
|
47
|
+
for attr, _value in settings:
|
48
|
+
default_value = getattr(defaults, attr)
|
49
|
+
setattr(settings, attr, default_value)
|
50
|
+
|
51
|
+
from nucliadb.common.cluster import manager
|
52
|
+
|
53
|
+
manager.INDEX_NODES.clear()
|
nucliadb/train/__init__.py
CHANGED
@@ -28,9 +28,7 @@ logger = logging.getLogger(SERVICE_NAME)
|
|
28
28
|
class EndpointFilter(logging.Filter):
|
29
29
|
def filter(self, record: logging.LogRecord) -> bool:
|
30
30
|
return (
|
31
|
-
record.args is not None
|
32
|
-
and len(record.args) >= 3
|
33
|
-
and record.args[2] not in ("/", "/metrics") # type: ignore
|
31
|
+
record.args is not None and len(record.args) >= 3 and record.args[2] not in ("/", "/metrics") # type: ignore
|
34
32
|
)
|
35
33
|
|
36
34
|
|
nucliadb/train/api/utils.py
CHANGED
@@ -21,9 +21,8 @@
|
|
21
21
|
|
22
22
|
from typing import Optional
|
23
23
|
|
24
|
-
from nucliadb_protos.dataset_pb2 import TrainSet
|
25
|
-
|
26
24
|
from nucliadb.train.utils import get_shard_manager
|
25
|
+
from nucliadb_protos.dataset_pb2 import TrainSet
|
27
26
|
|
28
27
|
|
29
28
|
async def get_kb_partitions(kbid: str, prefix: Optional[str] = None):
|
nucliadb/train/api/v1/shards.py
CHANGED
@@ -21,7 +21,7 @@
|
|
21
21
|
|
22
22
|
from fastapi import HTTPException, Request
|
23
23
|
from fastapi.responses import StreamingResponse
|
24
|
-
from fastapi_versioning import version
|
24
|
+
from fastapi_versioning import version
|
25
25
|
|
26
26
|
from nucliadb.train.api.utils import get_kb_partitions, get_train
|
27
27
|
from nucliadb.train.api.v1.router import KB_PREFIX, api
|
@@ -21,7 +21,7 @@
|
|
21
21
|
from typing import Optional
|
22
22
|
|
23
23
|
from fastapi import Request
|
24
|
-
from fastapi_versioning import version
|
24
|
+
from fastapi_versioning import version
|
25
25
|
|
26
26
|
from nucliadb.train.api.utils import get_kb_partitions
|
27
27
|
from nucliadb.train.api.v1.router import KB_PREFIX, api
|
@@ -52,9 +52,7 @@ async def get_partitions_all(request: Request, kbid: str) -> TrainSetPartitions:
|
|
52
52
|
)
|
53
53
|
@requires_one([NucliaDBRoles.READER])
|
54
54
|
@version(1)
|
55
|
-
async def get_partitions_prefix(
|
56
|
-
request: Request, kbid: str, prefix: str
|
57
|
-
) -> TrainSetPartitions:
|
55
|
+
async def get_partitions_prefix(request: Request, kbid: str, prefix: str) -> TrainSetPartitions:
|
58
56
|
return await get_partitions(kbid, prefix=prefix)
|
59
57
|
|
60
58
|
|
nucliadb/train/app.py
CHANGED
@@ -17,65 +17,44 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
import
|
20
|
+
import importlib.metadata
|
21
|
+
|
21
22
|
from fastapi import FastAPI
|
22
23
|
from starlette.middleware import Middleware
|
23
24
|
from starlette.middleware.authentication import AuthenticationMiddleware
|
24
|
-
from starlette.middleware.cors import CORSMiddleware
|
25
25
|
from starlette.requests import ClientDisconnect, Request
|
26
26
|
from starlette.responses import HTMLResponse
|
27
27
|
|
28
|
-
from nucliadb.middleware.transaction import ReadOnlyTransactionMiddleware
|
29
28
|
from nucliadb.train import API_PREFIX
|
30
29
|
from nucliadb.train.api.v1.router import api
|
31
|
-
from nucliadb.train.lifecycle import
|
30
|
+
from nucliadb.train.lifecycle import lifespan
|
32
31
|
from nucliadb_telemetry import errors
|
33
32
|
from nucliadb_telemetry.fastapi.utils import (
|
34
33
|
client_disconnect_handler,
|
35
34
|
global_exception_handler,
|
36
35
|
)
|
37
|
-
from nucliadb_utils import
|
36
|
+
from nucliadb_utils.audit.stream import AuditMiddleware
|
38
37
|
from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
|
39
38
|
from nucliadb_utils.fastapi.openapi import extend_openapi
|
40
39
|
from nucliadb_utils.fastapi.versioning import VersionedFastAPI
|
41
|
-
from nucliadb_utils.settings import
|
42
|
-
from nucliadb_utils.utilities import
|
40
|
+
from nucliadb_utils.settings import running_settings
|
41
|
+
from nucliadb_utils.utilities import get_audit
|
43
42
|
|
44
43
|
middleware = []
|
45
|
-
|
46
|
-
if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
|
47
|
-
middleware.append(
|
48
|
-
Middleware(
|
49
|
-
CORSMiddleware,
|
50
|
-
allow_origins=http_settings.cors_origins,
|
51
|
-
allow_methods=["*"],
|
52
|
-
# Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
|
53
|
-
# Browsers already showing deprecation notices, so it needs to be specified explicitly
|
54
|
-
allow_headers=["*", "Authorization"],
|
55
|
-
)
|
56
|
-
)
|
57
|
-
|
58
44
|
middleware.extend(
|
59
45
|
[
|
60
|
-
Middleware(
|
61
|
-
|
62
|
-
),
|
63
|
-
Middleware(ReadOnlyTransactionMiddleware),
|
46
|
+
Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend()),
|
47
|
+
Middleware(AuditMiddleware, audit_utility_getter=get_audit),
|
64
48
|
]
|
65
49
|
)
|
66
50
|
|
67
|
-
errors.setup_error_handling(
|
68
|
-
|
69
|
-
|
70
|
-
on_startup = [initialize]
|
71
|
-
on_shutdown = [finalize]
|
51
|
+
errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
|
72
52
|
|
73
53
|
|
74
54
|
fastapi_settings = dict(
|
75
55
|
debug=running_settings.debug,
|
76
56
|
middleware=middleware,
|
77
|
-
|
78
|
-
on_shutdown=on_shutdown,
|
57
|
+
lifespan=lifespan,
|
79
58
|
exception_handlers={
|
80
59
|
Exception: global_exception_handler,
|
81
60
|
ClientDisconnect: client_disconnect_handler,
|