nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/writer/api/v1/upload.py
CHANGED
@@ -18,7 +18,6 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import base64
|
21
|
-
import mimetypes
|
22
21
|
import pickle
|
23
22
|
import uuid
|
24
23
|
from datetime import datetime
|
@@ -30,9 +29,7 @@ from fastapi import HTTPException
|
|
30
29
|
from fastapi.params import Header
|
31
30
|
from fastapi.requests import Request
|
32
31
|
from fastapi.responses import Response
|
33
|
-
from fastapi_versioning import version
|
34
|
-
from nucliadb_protos.resources_pb2 import FieldFile, Metadata
|
35
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
32
|
+
from fastapi_versioning import version
|
36
33
|
from starlette.requests import Request as StarletteRequest
|
37
34
|
|
38
35
|
from nucliadb.common import datamanagers
|
@@ -40,13 +37,15 @@ from nucliadb.ingest.orm.utils import set_title
|
|
40
37
|
from nucliadb.ingest.processing import PushPayload, Source
|
41
38
|
from nucliadb.models.responses import HTTPClientError
|
42
39
|
from nucliadb.writer import SERVICE_NAME
|
40
|
+
from nucliadb.writer.api.v1 import transaction
|
43
41
|
from nucliadb.writer.api.v1.resource import (
|
44
42
|
get_rid_from_slug_or_raise_error,
|
45
43
|
validate_rid_exists_or_raise_error,
|
46
44
|
)
|
45
|
+
from nucliadb.writer.api.v1.slug import ensure_slug_uniqueness, noop_context_manager
|
47
46
|
from nucliadb.writer.back_pressure import maybe_back_pressure
|
48
47
|
from nucliadb.writer.resource.audit import parse_audit
|
49
|
-
from nucliadb.writer.resource.basic import
|
48
|
+
from nucliadb.writer.resource.basic import parse_basic_creation
|
50
49
|
from nucliadb.writer.resource.field import parse_fields
|
51
50
|
from nucliadb.writer.resource.origin import parse_extra, parse_origin
|
52
51
|
from nucliadb.writer.tus import TUSUPLOAD, UPLOAD, get_dm, get_storage_manager
|
@@ -58,20 +57,21 @@ from nucliadb.writer.tus.exceptions import (
|
|
58
57
|
InvalidTUSMetadata,
|
59
58
|
ResumableURINotAvailable,
|
60
59
|
)
|
61
|
-
from nucliadb.writer.tus.storage import FileStorageManager
|
60
|
+
from nucliadb.writer.tus.storage import FileStorageManager
|
62
61
|
from nucliadb.writer.tus.utils import parse_tus_metadata
|
63
62
|
from nucliadb.writer.utilities import get_processing
|
63
|
+
from nucliadb_models import content_types
|
64
64
|
from nucliadb_models.resource import NucliaDBRoles
|
65
65
|
from nucliadb_models.utils import FieldIdString
|
66
66
|
from nucliadb_models.writer import CreateResourcePayload, ResourceFileUploaded
|
67
|
+
from nucliadb_protos.resources_pb2 import CloudFile, FieldFile, Metadata
|
68
|
+
from nucliadb_protos.writer_pb2 import BrokerMessage
|
67
69
|
from nucliadb_utils.authentication import requires_one
|
68
70
|
from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError
|
69
71
|
from nucliadb_utils.storages.storage import KB_RESOURCE_FIELD
|
70
|
-
from nucliadb_utils.transaction import TransactionCommitTimeoutError
|
71
72
|
from nucliadb_utils.utilities import (
|
72
73
|
get_partitioning,
|
73
74
|
get_storage,
|
74
|
-
get_transaction_utility,
|
75
75
|
)
|
76
76
|
|
77
77
|
from .router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
|
@@ -196,6 +196,13 @@ async def _tus_post(
|
|
196
196
|
if path_rid is not None:
|
197
197
|
await validate_rid_exists_or_raise_error(kbid, path_rid)
|
198
198
|
|
199
|
+
kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
|
200
|
+
if item and item.hidden and not (kb_config and kb_config.hidden_resources_enabled):
|
201
|
+
raise HTTPException(
|
202
|
+
status_code=422,
|
203
|
+
detail="Cannot hide a resource: the KB does not have hidden resources enabled",
|
204
|
+
)
|
205
|
+
|
199
206
|
await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
|
200
207
|
|
201
208
|
dm = get_dm()
|
@@ -221,15 +228,11 @@ async def _tus_post(
|
|
221
228
|
try:
|
222
229
|
metadata = parse_tus_metadata(request.headers["upload-metadata"])
|
223
230
|
except InvalidTUSMetadata as exc:
|
224
|
-
raise HTTPBadRequest(
|
225
|
-
detail=f"Upload-Metadata header contains errors: {str(exc)}"
|
226
|
-
)
|
231
|
+
raise HTTPBadRequest(detail=f"Upload-Metadata header contains errors: {str(exc)}")
|
227
232
|
else:
|
228
233
|
metadata = {}
|
229
234
|
|
230
|
-
path, rid, field = await validate_field_upload(
|
231
|
-
kbid, path_rid, field_id, metadata.get("md5")
|
232
|
-
)
|
235
|
+
path, rid, field = await validate_field_upload(kbid, path_rid, field_id, metadata.get("md5"))
|
233
236
|
|
234
237
|
if implies_resource_creation:
|
235
238
|
# When uploading a file to a new kb resource, we want to allow multiple
|
@@ -255,8 +258,15 @@ async def _tus_post(
|
|
255
258
|
request_content_type = None
|
256
259
|
if item is None:
|
257
260
|
request_content_type = request.headers.get("content-type")
|
258
|
-
if
|
259
|
-
request_content_type =
|
261
|
+
if request_content_type is None:
|
262
|
+
request_content_type = content_types.guess(metadata["filename"]) or "application/octet-stream"
|
263
|
+
|
264
|
+
if request_content_type is not None and not content_types.valid(request_content_type):
|
265
|
+
raise HTTPException(
|
266
|
+
status_code=415,
|
267
|
+
detail=f"Unsupported content type: {request_content_type}",
|
268
|
+
)
|
269
|
+
|
260
270
|
metadata.setdefault("content_type", request_content_type)
|
261
271
|
|
262
272
|
metadata["implies_resource_creation"] = implies_resource_creation
|
@@ -286,9 +296,7 @@ async def _tus_post(
|
|
286
296
|
await dm.save()
|
287
297
|
|
288
298
|
# Find the URL for upload, with the same parameter as this call
|
289
|
-
location = api.url_path_for(
|
290
|
-
"Upload information", upload_id=upload_id, **request.path_params
|
291
|
-
)
|
299
|
+
location = api.url_path_for("Upload information", upload_id=upload_id, **request.path_params)
|
292
300
|
return Response(
|
293
301
|
status_code=201,
|
294
302
|
headers={
|
@@ -465,7 +473,7 @@ async def _tus_patch(
|
|
465
473
|
field: Optional[str] = None,
|
466
474
|
) -> Response:
|
467
475
|
"""
|
468
|
-
Upload all bytes in the requests and append them in the
|
476
|
+
Upload all bytes in the requests and append them in the specified offset
|
469
477
|
"""
|
470
478
|
if rid is not None:
|
471
479
|
await validate_rid_exists_or_raise_error(kbid, rid)
|
@@ -494,8 +502,7 @@ async def _tus_patch(
|
|
494
502
|
|
495
503
|
if offset != dm.offset:
|
496
504
|
raise HTTPConflict(
|
497
|
-
detail=f"Current upload offset({offset}) does not match "
|
498
|
-
f"object offset {dm.offset}"
|
505
|
+
detail=f"Current upload offset({offset}) does not match " f"object offset {dm.offset}"
|
499
506
|
)
|
500
507
|
|
501
508
|
storage_manager = get_storage_manager()
|
@@ -507,9 +514,7 @@ async def _tus_patch(
|
|
507
514
|
|
508
515
|
if to_upload and read_bytes != to_upload: # pragma: no cover
|
509
516
|
# check length matches if provided
|
510
|
-
raise HTTPPreconditionFailed(
|
511
|
-
detail="Upload size does not match what was provided"
|
512
|
-
)
|
517
|
+
raise HTTPPreconditionFailed(detail="Upload size does not match what was provided")
|
513
518
|
await dm.update(offset=offset + read_bytes)
|
514
519
|
|
515
520
|
headers = {
|
@@ -521,7 +526,6 @@ async def _tus_patch(
|
|
521
526
|
}
|
522
527
|
|
523
528
|
upload_finished = dm.get("size") is not None and dm.offset >= dm.get("size")
|
524
|
-
|
525
529
|
if upload_finished:
|
526
530
|
rid = dm.get("rid", rid)
|
527
531
|
if rid is None:
|
@@ -540,13 +544,19 @@ async def _tus_patch(
|
|
540
544
|
if isinstance(item_payload, str):
|
541
545
|
item_payload = item_payload.encode()
|
542
546
|
creation_payload = pickle.loads(base64.b64decode(item_payload))
|
547
|
+
|
548
|
+
content_type = dm.get("metadata", {}).get("content_type")
|
549
|
+
if content_type is not None and not content_types.valid(content_type):
|
550
|
+
return HTTPClientError(
|
551
|
+
status_code=415,
|
552
|
+
detail=f"Unsupported content type: {content_type}",
|
553
|
+
)
|
554
|
+
|
543
555
|
try:
|
544
556
|
seqid = await store_file_on_nuclia_db(
|
545
557
|
size=dm.get("size"),
|
546
|
-
content_type=
|
547
|
-
override_resource_title=dm.get("metadata", {}).get(
|
548
|
-
"implies_resource_creation", False
|
549
|
-
),
|
558
|
+
content_type=content_type,
|
559
|
+
override_resource_title=dm.get("metadata", {}).get("implies_resource_creation", False),
|
550
560
|
filename=dm.get("metadata", {}).get("filename"),
|
551
561
|
password=dm.get("metadata", {}).get("password"),
|
552
562
|
language=dm.get("metadata", {}).get("language"),
|
@@ -565,20 +575,17 @@ async def _tus_patch(
|
|
565
575
|
|
566
576
|
headers["NDB-Seq"] = f"{seqid}"
|
567
577
|
else:
|
568
|
-
|
578
|
+
validate_intermediate_tus_chunk(read_bytes, storage_manager)
|
569
579
|
await dm.save()
|
570
580
|
|
571
581
|
return Response(headers=headers)
|
572
582
|
|
573
583
|
|
574
|
-
def
|
575
|
-
|
576
|
-
storage_manager.
|
577
|
-
|
578
|
-
|
579
|
-
raise HTTPPreconditionFailed(
|
580
|
-
detail=f"Intermediate chunks cannot be smaller than {storage_manager.min_upload_size} bytes"
|
581
|
-
)
|
584
|
+
def validate_intermediate_tus_chunk(read_bytes: int, storage_manager: FileStorageManager):
|
585
|
+
try:
|
586
|
+
storage_manager.validate_intermediate_chunk(read_bytes)
|
587
|
+
except ValueError as err:
|
588
|
+
raise HTTPPreconditionFailed(detail=str(err))
|
582
589
|
|
583
590
|
|
584
591
|
@api.post(
|
@@ -688,9 +695,7 @@ async def _upload(
|
|
688
695
|
await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
|
689
696
|
|
690
697
|
md5_user = x_md5[0] if x_md5 is not None and len(x_md5) > 0 else None
|
691
|
-
path, rid, valid_field = await validate_field_upload(
|
692
|
-
kbid, path_rid, field, md5_user
|
693
|
-
)
|
698
|
+
path, rid, valid_field = await validate_field_upload(kbid, path_rid, field, md5_user)
|
694
699
|
dm = get_dm()
|
695
700
|
storage_manager = get_storage_manager()
|
696
701
|
|
@@ -719,8 +724,14 @@ async def _upload(
|
|
719
724
|
# - content-type set by the user in the upload request header takes precedence.
|
720
725
|
# - if not set, we will try to guess it from the filename and default to a generic binary content type otherwise
|
721
726
|
content_type = request.headers.get("content-type")
|
722
|
-
if
|
723
|
-
content_type =
|
727
|
+
if content_type is None:
|
728
|
+
content_type = content_types.guess(filename) or "application/octet-stream"
|
729
|
+
|
730
|
+
if not content_types.valid(content_type):
|
731
|
+
raise HTTPException(
|
732
|
+
status_code=415,
|
733
|
+
detail=f"Unsupported content type: {content_type}",
|
734
|
+
)
|
724
735
|
|
725
736
|
metadata = {"content_type": content_type, "filename": filename}
|
726
737
|
|
@@ -791,13 +802,9 @@ async def validate_field_upload(
|
|
791
802
|
if rid is None:
|
792
803
|
# we are going to create a new resource and a field
|
793
804
|
if md5 is not None:
|
794
|
-
exists = await datamanagers.atomic.resources.resource_exists(
|
795
|
-
kbid=kbid, rid=md5
|
796
|
-
)
|
805
|
+
exists = await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=md5)
|
797
806
|
if exists:
|
798
|
-
raise HTTPConflict(
|
799
|
-
"A resource with the same uploaded file already exists"
|
800
|
-
)
|
807
|
+
raise HTTPConflict("A resource with the same uploaded file already exists")
|
801
808
|
rid = md5
|
802
809
|
else:
|
803
810
|
rid = uuid.uuid4().hex
|
@@ -823,7 +830,7 @@ async def store_file_on_nuclia_db(
|
|
823
830
|
path: str,
|
824
831
|
request: Request,
|
825
832
|
bucket: str,
|
826
|
-
source: Source,
|
833
|
+
source: CloudFile.Source.ValueType,
|
827
834
|
rid: str,
|
828
835
|
field: str,
|
829
836
|
content_type: str = "application/octet-stream",
|
@@ -835,9 +842,7 @@ async def store_file_on_nuclia_db(
|
|
835
842
|
item: Optional[CreateResourcePayload] = None,
|
836
843
|
) -> Optional[int]:
|
837
844
|
# File is on NucliaDB Storage at path
|
838
|
-
|
839
845
|
partitioning = get_partitioning()
|
840
|
-
transaction = get_transaction_utility()
|
841
846
|
processing = get_processing()
|
842
847
|
storage = await get_storage(service_name=SERVICE_NAME)
|
843
848
|
|
@@ -859,14 +864,17 @@ async def store_file_on_nuclia_db(
|
|
859
864
|
|
860
865
|
parse_audit(writer.audit, request)
|
861
866
|
|
867
|
+
unique_slug_context_manager = noop_context_manager()
|
862
868
|
if item is not None:
|
863
869
|
if item.slug:
|
870
|
+
unique_slug_context_manager = ensure_slug_uniqueness(kbid, item.slug)
|
864
871
|
writer.slug = item.slug
|
865
872
|
toprocess.slug = item.slug
|
866
873
|
|
867
874
|
toprocess.processing_options = item.processing_options
|
868
875
|
|
869
|
-
|
876
|
+
kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
|
877
|
+
parse_basic_creation(writer, item, toprocess, kb_config)
|
870
878
|
if item.origin is not None:
|
871
879
|
parse_origin(writer.origin, item.origin)
|
872
880
|
if item.extra is not None:
|
@@ -882,62 +890,61 @@ async def store_file_on_nuclia_db(
|
|
882
890
|
uuid=rid,
|
883
891
|
x_skip_store=False,
|
884
892
|
)
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
file_field.file.
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
await transaction.commit(writer, partition, wait=True)
|
924
|
-
except TransactionCommitTimeoutError:
|
925
|
-
raise HTTPException(
|
926
|
-
status_code=501,
|
927
|
-
detail="Inconsistent write. This resource will not be processed and may not be stored.",
|
893
|
+
else:
|
894
|
+
# Use defaults for everything, but don't forget hidden which depends on KB config
|
895
|
+
kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
|
896
|
+
if kb_config and kb_config.hidden_resources_hide_on_creation:
|
897
|
+
writer.basic.hidden = True
|
898
|
+
|
899
|
+
async with unique_slug_context_manager:
|
900
|
+
if override_resource_title and filename is not None:
|
901
|
+
set_title(writer, toprocess, filename)
|
902
|
+
|
903
|
+
writer.basic.icon = content_type
|
904
|
+
writer.basic.created.FromDatetime(datetime.now())
|
905
|
+
|
906
|
+
# Update resource with file
|
907
|
+
file_field = FieldFile()
|
908
|
+
file_field.added.FromDatetime(datetime.now())
|
909
|
+
file_field.file.bucket_name = bucket
|
910
|
+
file_field.file.content_type = content_type
|
911
|
+
if filename is not None:
|
912
|
+
file_field.file.filename = filename
|
913
|
+
file_field.file.uri = path
|
914
|
+
file_field.file.source = source
|
915
|
+
|
916
|
+
if md5:
|
917
|
+
file_field.file.md5 = md5
|
918
|
+
if size:
|
919
|
+
file_field.file.size = size
|
920
|
+
if language:
|
921
|
+
file_field.language = language
|
922
|
+
if password:
|
923
|
+
file_field.password = password
|
924
|
+
|
925
|
+
writer.files[field].CopyFrom(file_field)
|
926
|
+
# Do not store passwords on maindb
|
927
|
+
writer.files[field].ClearField("password")
|
928
|
+
|
929
|
+
toprocess.filefield[field] = await processing.convert_internal_filefield_to_str(
|
930
|
+
file_field, storage=storage
|
928
931
|
)
|
929
932
|
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
detail=
|
938
|
-
|
933
|
+
writer.source = BrokerMessage.MessageSource.WRITER
|
934
|
+
writer.basic.metadata.status = Metadata.Status.PENDING
|
935
|
+
writer.basic.metadata.useful = True
|
936
|
+
await transaction.commit(writer, partition)
|
937
|
+
try:
|
938
|
+
processing_info = await processing.send_to_process(toprocess, partition)
|
939
|
+
except LimitsExceededError as exc:
|
940
|
+
raise HTTPException(status_code=exc.status_code, detail=exc.detail)
|
941
|
+
except SendToProcessError:
|
942
|
+
raise HTTPException(
|
943
|
+
status_code=500,
|
944
|
+
detail="Error while sending to process. Try calling /reprocess",
|
945
|
+
)
|
939
946
|
|
940
|
-
|
947
|
+
return processing_info.seqid
|
941
948
|
|
942
949
|
|
943
950
|
def maybe_b64decode(some_string: str) -> str:
|
@@ -946,9 +953,3 @@ def maybe_b64decode(some_string: str) -> str:
|
|
946
953
|
except ValueError:
|
947
954
|
# not b64encoded
|
948
955
|
return some_string
|
949
|
-
|
950
|
-
|
951
|
-
def guess_content_type(filename: str) -> str:
|
952
|
-
default = "application/octet-stream"
|
953
|
-
guessed, _ = mimetypes.guess_type(filename)
|
954
|
-
return guessed or default
|
nucliadb/writer/app.py
CHANGED
@@ -18,61 +18,38 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
import
|
21
|
+
import importlib.metadata
|
22
22
|
|
23
|
-
import pkg_resources
|
24
23
|
from fastapi import FastAPI
|
25
24
|
from starlette.middleware import Middleware
|
26
25
|
from starlette.middleware.authentication import AuthenticationMiddleware
|
27
|
-
from starlette.middleware.cors import CORSMiddleware
|
28
26
|
from starlette.requests import ClientDisconnect
|
29
27
|
from starlette.responses import HTMLResponse
|
30
28
|
|
31
|
-
from nucliadb.common.context.fastapi import get_app_context, set_app_context
|
32
29
|
from nucliadb.writer import API_PREFIX
|
33
30
|
from nucliadb.writer.api.v1.router import api as api_v1
|
34
|
-
from nucliadb.writer.lifecycle import
|
31
|
+
from nucliadb.writer.lifecycle import lifespan
|
35
32
|
from nucliadb_telemetry import errors
|
36
33
|
from nucliadb_telemetry.fastapi.utils import (
|
37
34
|
client_disconnect_handler,
|
38
35
|
global_exception_handler,
|
39
36
|
)
|
40
|
-
from nucliadb_utils import const
|
41
37
|
from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
|
42
38
|
from nucliadb_utils.fastapi.openapi import extend_openapi
|
43
39
|
from nucliadb_utils.fastapi.versioning import VersionedFastAPI
|
44
|
-
from nucliadb_utils.settings import
|
45
|
-
from nucliadb_utils.utilities import has_feature
|
40
|
+
from nucliadb_utils.settings import running_settings
|
46
41
|
|
47
42
|
middleware = []
|
48
43
|
|
49
|
-
|
50
|
-
middleware.append(
|
51
|
-
Middleware(
|
52
|
-
CORSMiddleware,
|
53
|
-
allow_origins=http_settings.cors_origins,
|
54
|
-
allow_methods=["*"],
|
55
|
-
# Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
|
56
|
-
# Browsers already showing deprecation notices, so it needs to be specified explicitly
|
57
|
-
allow_headers=["*", "Authorization"],
|
58
|
-
)
|
59
|
-
)
|
60
|
-
|
61
|
-
middleware.extend(
|
62
|
-
[Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())]
|
63
|
-
)
|
64
|
-
|
44
|
+
middleware.extend([Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())])
|
65
45
|
|
66
|
-
errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
|
67
46
|
|
68
|
-
|
69
|
-
on_shutdown = [finalize]
|
47
|
+
errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
|
70
48
|
|
71
49
|
fastapi_settings = dict(
|
72
50
|
debug=running_settings.debug,
|
73
51
|
middleware=middleware,
|
74
|
-
|
75
|
-
on_shutdown=on_shutdown,
|
52
|
+
lifespan=lifespan,
|
76
53
|
exception_handlers={
|
77
54
|
Exception: global_exception_handler,
|
78
55
|
ClientDisconnect: client_disconnect_handler,
|
@@ -102,18 +79,4 @@ def create_application() -> FastAPI:
|
|
102
79
|
# Use raw starlette routes to avoid unnecessary overhead
|
103
80
|
application.add_route("/", homepage)
|
104
81
|
|
105
|
-
set_app_context(application)
|
106
|
-
maybe_configure_back_pressure(application)
|
107
82
|
return application
|
108
|
-
|
109
|
-
|
110
|
-
def maybe_configure_back_pressure(application: FastAPI):
|
111
|
-
from nucliadb.writer.back_pressure import start_materializer, stop_materializer
|
112
|
-
from nucliadb.writer.settings import back_pressure_settings
|
113
|
-
from nucliadb_utils.settings import is_onprem_nucliadb
|
114
|
-
|
115
|
-
if back_pressure_settings.enabled and not is_onprem_nucliadb():
|
116
|
-
context = get_app_context(application)
|
117
|
-
start_materializer_with_context = functools.partial(start_materializer, context)
|
118
|
-
application.add_event_handler("startup", start_materializer_with_context)
|
119
|
-
application.add_event_handler("shutdown", stop_materializer)
|
nucliadb/writer/back_pressure.py
CHANGED
@@ -28,7 +28,6 @@ from typing import Optional
|
|
28
28
|
from async_lru import alru_cache
|
29
29
|
from cachetools import TTLCache
|
30
30
|
from fastapi import HTTPException, Request
|
31
|
-
from nucliadb_protos.writer_pb2 import ShardObject
|
32
31
|
|
33
32
|
from nucliadb.common import datamanagers
|
34
33
|
from nucliadb.common.cluster.manager import get_index_nodes
|
@@ -37,6 +36,7 @@ from nucliadb.common.context.fastapi import get_app_context
|
|
37
36
|
from nucliadb.common.http_clients.processing import ProcessingHTTPClient
|
38
37
|
from nucliadb.writer import logger
|
39
38
|
from nucliadb.writer.settings import back_pressure_settings as settings
|
39
|
+
from nucliadb_protos.writer_pb2 import ShardObject
|
40
40
|
from nucliadb_telemetry import metrics
|
41
41
|
from nucliadb_utils import const
|
42
42
|
from nucliadb_utils.nats import NatsConnectionManager
|
@@ -112,9 +112,7 @@ def cached_back_pressure(kbid: str, resource_uuid: Optional[str] = None):
|
|
112
112
|
if data is not None:
|
113
113
|
try_after = data.try_after
|
114
114
|
back_pressure_type = data.type
|
115
|
-
RATE_LIMITED_REQUESTS_COUNTER.inc(
|
116
|
-
{"type": back_pressure_type, "cached": "true"}
|
117
|
-
)
|
115
|
+
RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "true"})
|
118
116
|
logger.info(
|
119
117
|
"Back pressure applied from cache",
|
120
118
|
extra={
|
@@ -137,9 +135,7 @@ def cached_back_pressure(kbid: str, resource_uuid: Optional[str] = None):
|
|
137
135
|
except BackPressureException as exc:
|
138
136
|
try_after = exc.data.try_after
|
139
137
|
back_pressure_type = exc.data.type
|
140
|
-
RATE_LIMITED_REQUESTS_COUNTER.inc(
|
141
|
-
{"type": back_pressure_type, "cached": "false"}
|
142
|
-
)
|
138
|
+
RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "false"})
|
143
139
|
_cache.set(cache_key, exc.data)
|
144
140
|
raise HTTPException(
|
145
141
|
status_code=429,
|
@@ -248,14 +244,10 @@ class Materializer:
|
|
248
244
|
for node in get_index_nodes():
|
249
245
|
try:
|
250
246
|
with back_pressure_observer({"type": "get_indexing_pending"}):
|
251
|
-
self.indexing_pending[node.id] = (
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
consumer=const.Streams.INDEX.group.format(
|
256
|
-
node=node.id
|
257
|
-
),
|
258
|
-
)
|
247
|
+
self.indexing_pending[node.id] = await get_nats_consumer_pending_messages(
|
248
|
+
self.nats_manager,
|
249
|
+
stream=const.Streams.INDEX.name,
|
250
|
+
consumer=const.Streams.INDEX.group.format(node=node.id),
|
259
251
|
)
|
260
252
|
except Exception:
|
261
253
|
logger.exception(
|
@@ -336,9 +328,7 @@ def get_materializer() -> Materializer:
|
|
336
328
|
return MATERIALIZER
|
337
329
|
|
338
330
|
|
339
|
-
async def maybe_back_pressure(
|
340
|
-
request: Request, kbid: str, resource_uuid: Optional[str] = None
|
341
|
-
) -> None:
|
331
|
+
async def maybe_back_pressure(request: Request, kbid: str, resource_uuid: Optional[str] = None) -> None:
|
342
332
|
"""
|
343
333
|
This function does system checks to see if we need to put back pressure on writes.
|
344
334
|
In that case, a HTTP 429 will be raised with the estimated time to try again.
|
@@ -348,9 +338,7 @@ async def maybe_back_pressure(
|
|
348
338
|
await back_pressure_checks(request, kbid, resource_uuid)
|
349
339
|
|
350
340
|
|
351
|
-
async def back_pressure_checks(
|
352
|
-
request: Request, kbid: str, resource_uuid: Optional[str] = None
|
353
|
-
):
|
341
|
+
async def back_pressure_checks(request: Request, kbid: str, resource_uuid: Optional[str] = None):
|
354
342
|
"""
|
355
343
|
Will raise a 429 if back pressure is needed:
|
356
344
|
- If the processing engine is behind.
|
@@ -361,9 +349,7 @@ async def back_pressure_checks(
|
|
361
349
|
materializer = get_materializer()
|
362
350
|
with cached_back_pressure(kbid, resource_uuid):
|
363
351
|
check_ingest_behind(materializer.get_ingest_pending())
|
364
|
-
await check_indexing_behind(
|
365
|
-
context, kbid, resource_uuid, materializer.get_indexing_pending()
|
366
|
-
)
|
352
|
+
await check_indexing_behind(context, kbid, resource_uuid, materializer.get_indexing_pending())
|
367
353
|
await check_processing_behind(materializer, kbid)
|
368
354
|
|
369
355
|
|
@@ -418,9 +404,7 @@ async def check_indexing_behind(
|
|
418
404
|
|
419
405
|
# Get nodes that are involved in the indexing of the request
|
420
406
|
if resource_uuid is not None:
|
421
|
-
nodes_to_check = await get_nodes_for_resource_shard(
|
422
|
-
context, kbid, resource_uuid
|
423
|
-
)
|
407
|
+
nodes_to_check = await get_nodes_for_resource_shard(context, kbid, resource_uuid)
|
424
408
|
else:
|
425
409
|
nodes_to_check = await get_nodes_for_kb_active_shards(context, kbid)
|
426
410
|
|
@@ -488,9 +472,7 @@ def estimate_try_after(rate: float, pending: int, max_wait: int) -> datetime:
|
|
488
472
|
|
489
473
|
|
490
474
|
@alru_cache(maxsize=1024, ttl=60 * 15)
|
491
|
-
async def get_nodes_for_kb_active_shards(
|
492
|
-
context: ApplicationContext, kbid: str
|
493
|
-
) -> list[str]:
|
475
|
+
async def get_nodes_for_kb_active_shards(context: ApplicationContext, kbid: str) -> list[str]:
|
494
476
|
with back_pressure_observer({"type": "get_kb_active_shard"}):
|
495
477
|
active_shard = await get_kb_active_shard(context, kbid)
|
496
478
|
if active_shard is None:
|
@@ -521,20 +503,16 @@ async def get_nats_consumer_pending_messages(
|
|
521
503
|
return consumer_info.num_pending
|
522
504
|
|
523
505
|
|
524
|
-
async def get_kb_active_shard(
|
525
|
-
context
|
526
|
-
) -> Optional[ShardObject]:
|
527
|
-
async with context.kv_driver.transaction() as txn:
|
506
|
+
async def get_kb_active_shard(context: ApplicationContext, kbid: str) -> Optional[ShardObject]:
|
507
|
+
async with context.kv_driver.transaction(read_only=True) as txn:
|
528
508
|
return await context.shard_manager.get_current_active_shard(txn, kbid)
|
529
509
|
|
530
510
|
|
531
511
|
async def get_resource_shard(
|
532
512
|
context: ApplicationContext, kbid: str, resource_uuid: str
|
533
513
|
) -> Optional[ShardObject]:
|
534
|
-
async with datamanagers.
|
535
|
-
shard_id = await datamanagers.resources.get_resource_shard_id(
|
536
|
-
txn, kbid=kbid, rid=resource_uuid
|
537
|
-
)
|
514
|
+
async with datamanagers.with_ro_transaction() as txn:
|
515
|
+
shard_id = await datamanagers.resources.get_resource_shard_id(txn, kbid=kbid, rid=resource_uuid)
|
538
516
|
if shard_id is None:
|
539
517
|
# Resource does not exist
|
540
518
|
logger.debug(
|