nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/writer/app.py
CHANGED
@@ -18,74 +18,38 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
|
21
|
-
import
|
21
|
+
import importlib.metadata
|
22
22
|
|
23
|
-
import pkg_resources
|
24
23
|
from fastapi import FastAPI
|
25
|
-
from fastapi.responses import JSONResponse
|
26
24
|
from starlette.middleware import Middleware
|
27
25
|
from starlette.middleware.authentication import AuthenticationMiddleware
|
28
|
-
from starlette.
|
29
|
-
from starlette.requests import ClientDisconnect, Request
|
26
|
+
from starlette.requests import ClientDisconnect
|
30
27
|
from starlette.responses import HTMLResponse
|
31
28
|
|
32
|
-
from nucliadb.common.context.fastapi import get_app_context, set_app_context
|
33
29
|
from nucliadb.writer import API_PREFIX
|
34
30
|
from nucliadb.writer.api.v1.router import api as api_v1
|
35
|
-
from nucliadb.writer.lifecycle import
|
31
|
+
from nucliadb.writer.lifecycle import lifespan
|
36
32
|
from nucliadb_telemetry import errors
|
37
|
-
from
|
33
|
+
from nucliadb_telemetry.fastapi.utils import (
|
34
|
+
client_disconnect_handler,
|
35
|
+
global_exception_handler,
|
36
|
+
)
|
38
37
|
from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
|
39
38
|
from nucliadb_utils.fastapi.openapi import extend_openapi
|
40
39
|
from nucliadb_utils.fastapi.versioning import VersionedFastAPI
|
41
|
-
from nucliadb_utils.settings import
|
42
|
-
from nucliadb_utils.utilities import has_feature
|
40
|
+
from nucliadb_utils.settings import running_settings
|
43
41
|
|
44
42
|
middleware = []
|
45
43
|
|
46
|
-
|
47
|
-
middleware.append(
|
48
|
-
Middleware(
|
49
|
-
CORSMiddleware,
|
50
|
-
allow_origins=http_settings.cors_origins,
|
51
|
-
allow_methods=["*"],
|
52
|
-
# Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
|
53
|
-
# Browsers already showing deprecation notices, so it needs to be specified explicitly
|
54
|
-
allow_headers=["*", "Authorization"],
|
55
|
-
)
|
56
|
-
)
|
57
|
-
|
58
|
-
middleware.extend(
|
59
|
-
[Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())]
|
60
|
-
)
|
61
|
-
|
62
|
-
|
63
|
-
errors.setup_error_handling(pkg_resources.get_distribution("nucliadb").version)
|
44
|
+
middleware.extend([Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend())])
|
64
45
|
|
65
|
-
on_startup = [initialize]
|
66
|
-
on_shutdown = [finalize]
|
67
|
-
|
68
|
-
|
69
|
-
async def global_exception_handler(request: Request, exc: Exception):
|
70
|
-
errors.capture_exception(exc)
|
71
|
-
return JSONResponse(
|
72
|
-
status_code=500,
|
73
|
-
content={"detail": "Something went wrong, please contact your administrator"},
|
74
|
-
)
|
75
|
-
|
76
|
-
|
77
|
-
async def client_disconnect_handler(request: Request, exc: ClientDisconnect):
|
78
|
-
return JSONResponse(
|
79
|
-
status_code=200,
|
80
|
-
content={"detail": "Client disconnected while an operation was in course"},
|
81
|
-
)
|
82
46
|
|
47
|
+
errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
|
83
48
|
|
84
49
|
fastapi_settings = dict(
|
85
50
|
debug=running_settings.debug,
|
86
51
|
middleware=middleware,
|
87
|
-
|
88
|
-
on_shutdown=on_shutdown,
|
52
|
+
lifespan=lifespan,
|
89
53
|
exception_handlers={
|
90
54
|
Exception: global_exception_handler,
|
91
55
|
ClientDisconnect: client_disconnect_handler,
|
@@ -115,18 +79,4 @@ def create_application() -> FastAPI:
|
|
115
79
|
# Use raw starlette routes to avoid unnecessary overhead
|
116
80
|
application.add_route("/", homepage)
|
117
81
|
|
118
|
-
set_app_context(application)
|
119
|
-
maybe_configure_back_pressure(application)
|
120
82
|
return application
|
121
|
-
|
122
|
-
|
123
|
-
def maybe_configure_back_pressure(application: FastAPI):
|
124
|
-
from nucliadb.writer.back_pressure import start_materializer, stop_materializer
|
125
|
-
from nucliadb.writer.settings import back_pressure_settings
|
126
|
-
from nucliadb_utils.settings import is_onprem_nucliadb
|
127
|
-
|
128
|
-
if back_pressure_settings.enabled and not is_onprem_nucliadb():
|
129
|
-
context = get_app_context(application)
|
130
|
-
start_materializer_with_context = functools.partial(start_materializer, context)
|
131
|
-
application.add_event_handler("startup", start_materializer_with_context)
|
132
|
-
application.add_event_handler("shutdown", stop_materializer)
|
nucliadb/writer/back_pressure.py
CHANGED
@@ -28,7 +28,6 @@ from typing import Optional
|
|
28
28
|
from async_lru import alru_cache
|
29
29
|
from cachetools import TTLCache
|
30
30
|
from fastapi import HTTPException, Request
|
31
|
-
from nucliadb_protos.writer_pb2 import ShardObject
|
32
31
|
|
33
32
|
from nucliadb.common import datamanagers
|
34
33
|
from nucliadb.common.cluster.manager import get_index_nodes
|
@@ -37,11 +36,11 @@ from nucliadb.common.context.fastapi import get_app_context
|
|
37
36
|
from nucliadb.common.http_clients.processing import ProcessingHTTPClient
|
38
37
|
from nucliadb.writer import logger
|
39
38
|
from nucliadb.writer.settings import back_pressure_settings as settings
|
39
|
+
from nucliadb_protos.writer_pb2 import ShardObject
|
40
40
|
from nucliadb_telemetry import metrics
|
41
41
|
from nucliadb_utils import const
|
42
42
|
from nucliadb_utils.nats import NatsConnectionManager
|
43
43
|
from nucliadb_utils.settings import is_onprem_nucliadb
|
44
|
-
from nucliadb_utils.utilities import has_feature
|
45
44
|
|
46
45
|
__all__ = ["maybe_back_pressure"]
|
47
46
|
|
@@ -49,7 +48,7 @@ __all__ = ["maybe_back_pressure"]
|
|
49
48
|
back_pressure_observer = metrics.Observer("nucliadb_back_pressure", labels={"type": ""})
|
50
49
|
|
51
50
|
|
52
|
-
|
51
|
+
RATE_LIMITED_REQUESTS_COUNTER = metrics.Counter(
|
53
52
|
"nucliadb_rate_limited_requests", labels={"type": "", "cached": ""}
|
54
53
|
)
|
55
54
|
|
@@ -113,8 +112,15 @@ def cached_back_pressure(kbid: str, resource_uuid: Optional[str] = None):
|
|
113
112
|
if data is not None:
|
114
113
|
try_after = data.try_after
|
115
114
|
back_pressure_type = data.type
|
116
|
-
|
117
|
-
|
115
|
+
RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "true"})
|
116
|
+
logger.info(
|
117
|
+
"Back pressure applied from cache",
|
118
|
+
extra={
|
119
|
+
"type": back_pressure_type,
|
120
|
+
"try_after": try_after,
|
121
|
+
"kbid": kbid,
|
122
|
+
"resource_uuid": resource_uuid,
|
123
|
+
},
|
118
124
|
)
|
119
125
|
raise HTTPException(
|
120
126
|
status_code=429,
|
@@ -129,9 +135,7 @@ def cached_back_pressure(kbid: str, resource_uuid: Optional[str] = None):
|
|
129
135
|
except BackPressureException as exc:
|
130
136
|
try_after = exc.data.try_after
|
131
137
|
back_pressure_type = exc.data.type
|
132
|
-
|
133
|
-
{"type": back_pressure_type, "cached": "false"}
|
134
|
-
)
|
138
|
+
RATE_LIMITED_REQUESTS_COUNTER.inc({"type": back_pressure_type, "cached": "false"})
|
135
139
|
_cache.set(cache_key, exc.data)
|
136
140
|
raise HTTPException(
|
137
141
|
status_code=429,
|
@@ -216,6 +220,11 @@ class Materializer:
|
|
216
220
|
)
|
217
221
|
return 0
|
218
222
|
|
223
|
+
if pending > 0:
|
224
|
+
logger.info(
|
225
|
+
f"Processing returned {pending} pending messages for KB",
|
226
|
+
extra={"kbid": kbid},
|
227
|
+
)
|
219
228
|
self.processing_pending_cache[kbid] = pending
|
220
229
|
return pending
|
221
230
|
|
@@ -235,9 +244,7 @@ class Materializer:
|
|
235
244
|
for node in get_index_nodes():
|
236
245
|
try:
|
237
246
|
with back_pressure_observer({"type": "get_indexing_pending"}):
|
238
|
-
self.indexing_pending[
|
239
|
-
node.id
|
240
|
-
] = await get_nats_consumer_pending_messages(
|
247
|
+
self.indexing_pending[node.id] = await get_nats_consumer_pending_messages(
|
241
248
|
self.nats_manager,
|
242
249
|
stream=const.Streams.INDEX.name,
|
243
250
|
consumer=const.Streams.INDEX.group.format(node=node.id),
|
@@ -321,22 +328,17 @@ def get_materializer() -> Materializer:
|
|
321
328
|
return MATERIALIZER
|
322
329
|
|
323
330
|
|
324
|
-
async def maybe_back_pressure(
|
325
|
-
request: Request, kbid: str, resource_uuid: Optional[str] = None
|
326
|
-
) -> None:
|
331
|
+
async def maybe_back_pressure(request: Request, kbid: str, resource_uuid: Optional[str] = None) -> None:
|
327
332
|
"""
|
328
333
|
This function does system checks to see if we need to put back pressure on writes.
|
329
334
|
In that case, a HTTP 429 will be raised with the estimated time to try again.
|
330
335
|
"""
|
331
|
-
if
|
332
|
-
|
333
|
-
|
334
|
-
await back_pressure_checks(request, kbid, resource_uuid)
|
336
|
+
if not is_back_pressure_enabled() or is_onprem_nucliadb():
|
337
|
+
return
|
338
|
+
await back_pressure_checks(request, kbid, resource_uuid)
|
335
339
|
|
336
340
|
|
337
|
-
async def back_pressure_checks(
|
338
|
-
request: Request, kbid: str, resource_uuid: Optional[str] = None
|
339
|
-
):
|
341
|
+
async def back_pressure_checks(request: Request, kbid: str, resource_uuid: Optional[str] = None):
|
340
342
|
"""
|
341
343
|
Will raise a 429 if back pressure is needed:
|
342
344
|
- If the processing engine is behind.
|
@@ -347,9 +349,7 @@ async def back_pressure_checks(
|
|
347
349
|
materializer = get_materializer()
|
348
350
|
with cached_back_pressure(kbid, resource_uuid):
|
349
351
|
check_ingest_behind(materializer.get_ingest_pending())
|
350
|
-
await check_indexing_behind(
|
351
|
-
context, kbid, resource_uuid, materializer.get_indexing_pending()
|
352
|
-
)
|
352
|
+
await check_indexing_behind(context, kbid, resource_uuid, materializer.get_indexing_pending())
|
353
353
|
await check_processing_behind(materializer, kbid)
|
354
354
|
|
355
355
|
|
@@ -366,9 +366,19 @@ async def check_processing_behind(materializer: Materializer, kbid: str):
|
|
366
366
|
kb_pending = await materializer.get_processing_pending(kbid)
|
367
367
|
if kb_pending > max_pending:
|
368
368
|
try_after = estimate_try_after(
|
369
|
-
rate=settings.processing_rate,
|
369
|
+
rate=settings.processing_rate,
|
370
|
+
pending=kb_pending,
|
371
|
+
max_wait=settings.max_wait_time,
|
370
372
|
)
|
371
373
|
data = BackPressureData(type="processing", try_after=try_after)
|
374
|
+
logger.info(
|
375
|
+
"Processing back pressure applied",
|
376
|
+
extra={
|
377
|
+
"kbid": kbid,
|
378
|
+
"try_after": try_after,
|
379
|
+
"pending": kb_pending,
|
380
|
+
},
|
381
|
+
)
|
372
382
|
raise BackPressureException(data)
|
373
383
|
|
374
384
|
|
@@ -394,9 +404,7 @@ async def check_indexing_behind(
|
|
394
404
|
|
395
405
|
# Get nodes that are involved in the indexing of the request
|
396
406
|
if resource_uuid is not None:
|
397
|
-
nodes_to_check = await get_nodes_for_resource_shard(
|
398
|
-
context, kbid, resource_uuid
|
399
|
-
)
|
407
|
+
nodes_to_check = await get_nodes_for_resource_shard(context, kbid, resource_uuid)
|
400
408
|
else:
|
401
409
|
nodes_to_check = await get_nodes_for_kb_active_shards(context, kbid)
|
402
410
|
|
@@ -418,9 +426,20 @@ async def check_indexing_behind(
|
|
418
426
|
|
419
427
|
if highest_pending > max_pending:
|
420
428
|
try_after = estimate_try_after(
|
421
|
-
rate=settings.indexing_rate,
|
429
|
+
rate=settings.indexing_rate,
|
430
|
+
pending=highest_pending,
|
431
|
+
max_wait=settings.max_wait_time,
|
422
432
|
)
|
423
433
|
data = BackPressureData(type="indexing", try_after=try_after)
|
434
|
+
logger.info(
|
435
|
+
"Indexing back pressure applied",
|
436
|
+
extra={
|
437
|
+
"kbid": kbid,
|
438
|
+
"resource_uuid": resource_uuid,
|
439
|
+
"try_after": try_after,
|
440
|
+
"pending": highest_pending,
|
441
|
+
},
|
442
|
+
)
|
424
443
|
raise BackPressureException(data)
|
425
444
|
|
426
445
|
|
@@ -432,24 +451,28 @@ def check_ingest_behind(ingest_pending: int):
|
|
432
451
|
|
433
452
|
if ingest_pending > max_pending:
|
434
453
|
try_after = estimate_try_after(
|
435
|
-
rate=settings.ingest_rate,
|
454
|
+
rate=settings.ingest_rate,
|
455
|
+
pending=ingest_pending,
|
456
|
+
max_wait=settings.max_wait_time,
|
436
457
|
)
|
437
458
|
data = BackPressureData(type="ingest", try_after=try_after)
|
459
|
+
logger.info(
|
460
|
+
"Ingest back pressure applied",
|
461
|
+
extra={"try_after": try_after, "pending": ingest_pending},
|
462
|
+
)
|
438
463
|
raise BackPressureException(data)
|
439
464
|
|
440
465
|
|
441
|
-
def estimate_try_after(rate: float, pending: int) -> datetime:
|
466
|
+
def estimate_try_after(rate: float, pending: int, max_wait: int) -> datetime:
|
442
467
|
"""
|
443
468
|
This function estimates the time to try again based on the rate and the number of pending messages.
|
444
469
|
"""
|
445
|
-
delta_seconds = pending / rate
|
470
|
+
delta_seconds = min(pending / rate, max_wait)
|
446
471
|
return datetime.utcnow() + timedelta(seconds=delta_seconds)
|
447
472
|
|
448
473
|
|
449
474
|
@alru_cache(maxsize=1024, ttl=60 * 15)
|
450
|
-
async def get_nodes_for_kb_active_shards(
|
451
|
-
context: ApplicationContext, kbid: str
|
452
|
-
) -> list[str]:
|
475
|
+
async def get_nodes_for_kb_active_shards(context: ApplicationContext, kbid: str) -> list[str]:
|
453
476
|
with back_pressure_observer({"type": "get_kb_active_shard"}):
|
454
477
|
active_shard = await get_kb_active_shard(context, kbid)
|
455
478
|
if active_shard is None:
|
@@ -480,20 +503,16 @@ async def get_nats_consumer_pending_messages(
|
|
480
503
|
return consumer_info.num_pending
|
481
504
|
|
482
505
|
|
483
|
-
async def get_kb_active_shard(
|
484
|
-
context
|
485
|
-
) -> Optional[ShardObject]:
|
486
|
-
async with context.kv_driver.transaction() as txn:
|
506
|
+
async def get_kb_active_shard(context: ApplicationContext, kbid: str) -> Optional[ShardObject]:
|
507
|
+
async with context.kv_driver.transaction(read_only=True) as txn:
|
487
508
|
return await context.shard_manager.get_current_active_shard(txn, kbid)
|
488
509
|
|
489
510
|
|
490
511
|
async def get_resource_shard(
|
491
512
|
context: ApplicationContext, kbid: str, resource_uuid: str
|
492
513
|
) -> Optional[ShardObject]:
|
493
|
-
async with datamanagers.
|
494
|
-
shard_id = await datamanagers.resources.get_resource_shard_id(
|
495
|
-
txn, kbid=kbid, rid=resource_uuid
|
496
|
-
)
|
514
|
+
async with datamanagers.with_ro_transaction() as txn:
|
515
|
+
shard_id = await datamanagers.resources.get_resource_shard_id(txn, kbid=kbid, rid=resource_uuid)
|
497
516
|
if shard_id is None:
|
498
517
|
# Resource does not exist
|
499
518
|
logger.debug(
|
nucliadb/writer/exceptions.py
CHANGED
nucliadb/writer/lifecycle.py
CHANGED
@@ -17,13 +17,20 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from
|
20
|
+
from contextlib import asynccontextmanager
|
21
|
+
|
22
|
+
from fastapi import FastAPI
|
23
|
+
|
24
|
+
from nucliadb.common.context.fastapi import inject_app_context
|
25
|
+
from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
|
21
26
|
from nucliadb.ingest.utils import start_ingest, stop_ingest
|
22
27
|
from nucliadb.writer import SERVICE_NAME
|
28
|
+
from nucliadb.writer.back_pressure import start_materializer, stop_materializer
|
29
|
+
from nucliadb.writer.settings import back_pressure_settings
|
23
30
|
from nucliadb.writer.tus import finalize as storage_finalize
|
24
31
|
from nucliadb.writer.tus import initialize as storage_initialize
|
25
|
-
from nucliadb.writer.utilities import get_processing
|
26
32
|
from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry
|
33
|
+
from nucliadb_utils.settings import is_onprem_nucliadb
|
27
34
|
from nucliadb_utils.utilities import (
|
28
35
|
finalize_utilities,
|
29
36
|
start_partitioning_utility,
|
@@ -32,29 +39,28 @@ from nucliadb_utils.utilities import (
|
|
32
39
|
)
|
33
40
|
|
34
41
|
|
35
|
-
|
36
|
-
|
42
|
+
@asynccontextmanager
|
43
|
+
async def lifespan(app: FastAPI):
|
44
|
+
back_pressure_enabled = back_pressure_settings.enabled and not is_onprem_nucliadb()
|
37
45
|
|
46
|
+
await setup_telemetry(SERVICE_NAME)
|
38
47
|
await start_ingest(SERVICE_NAME)
|
39
|
-
|
40
48
|
await start_processing_engine()
|
41
|
-
|
42
49
|
start_partitioning_utility()
|
43
|
-
|
44
50
|
await start_transaction_utility(SERVICE_NAME)
|
45
51
|
await storage_initialize()
|
46
52
|
|
53
|
+
# Inject application context into the fastapi app's state
|
54
|
+
async with inject_app_context(app) as context:
|
55
|
+
if back_pressure_enabled:
|
56
|
+
await start_materializer(context)
|
57
|
+
yield
|
47
58
|
|
48
|
-
|
59
|
+
if back_pressure_enabled:
|
60
|
+
await stop_materializer()
|
49
61
|
await stop_transaction_utility()
|
50
|
-
|
51
62
|
await stop_ingest()
|
52
|
-
|
53
|
-
if processing is not None:
|
54
|
-
await processing.finalize()
|
55
|
-
|
63
|
+
await stop_processing_engine()
|
56
64
|
await storage_finalize()
|
57
|
-
|
58
65
|
await clean_telemetry(SERVICE_NAME)
|
59
|
-
|
60
66
|
await finalize_utilities()
|
nucliadb/writer/py.typed
ADDED
File without changes
|
@@ -19,9 +19,10 @@
|
|
19
19
|
#
|
20
20
|
from datetime import datetime
|
21
21
|
|
22
|
-
from nucliadb_protos.writer_pb2 import Audit
|
23
22
|
from starlette.requests import Request
|
24
23
|
|
24
|
+
from nucliadb_protos.writer_pb2 import Audit
|
25
|
+
|
25
26
|
|
26
27
|
def parse_audit(audit: Audit, request: Request):
|
27
28
|
audit.user = request.headers.get("X-NUCLIADB-USER", "")
|
@@ -18,8 +18,31 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
from datetime import datetime
|
21
|
+
from typing import Optional
|
21
22
|
|
22
23
|
from fastapi import HTTPException
|
24
|
+
|
25
|
+
from nucliadb.common.models_utils import to_proto
|
26
|
+
from nucliadb.common.models_utils.from_proto import (
|
27
|
+
RelationNodeTypeMap,
|
28
|
+
RelationTypeMap,
|
29
|
+
)
|
30
|
+
from nucliadb.ingest.orm.utils import set_title
|
31
|
+
from nucliadb.ingest.processing import PushPayload
|
32
|
+
from nucliadb_models.content_types import GENERIC_MIME_TYPE
|
33
|
+
from nucliadb_models.file import FileField
|
34
|
+
from nucliadb_models.link import LinkField
|
35
|
+
from nucliadb_models.metadata import (
|
36
|
+
ParagraphAnnotation,
|
37
|
+
QuestionAnswerAnnotation,
|
38
|
+
)
|
39
|
+
from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE, PushTextFormat, Text
|
40
|
+
from nucliadb_models.writer import (
|
41
|
+
ComingResourcePayload,
|
42
|
+
CreateResourcePayload,
|
43
|
+
UpdateResourcePayload,
|
44
|
+
)
|
45
|
+
from nucliadb_protos.knowledgebox_pb2 import KnowledgeBoxConfig
|
23
46
|
from nucliadb_protos.resources_pb2 import (
|
24
47
|
Answers,
|
25
48
|
Basic,
|
@@ -30,38 +53,19 @@ from nucliadb_protos.resources_pb2 import (
|
|
30
53
|
Metadata,
|
31
54
|
PageSelections,
|
32
55
|
Paragraph,
|
56
|
+
TokenSplit,
|
57
|
+
UserFieldMetadata,
|
58
|
+
VisualSelection,
|
33
59
|
)
|
34
60
|
from nucliadb_protos.resources_pb2 import ParagraphAnnotation as PBParagraphAnnotation
|
35
61
|
from nucliadb_protos.resources_pb2 import (
|
36
62
|
QuestionAnswerAnnotation as PBQuestionAnswerAnnotation,
|
37
63
|
)
|
38
|
-
from nucliadb_protos.resources_pb2 import TokenSplit, UserFieldMetadata, VisualSelection
|
39
64
|
from nucliadb_protos.utils_pb2 import Relation, RelationNode
|
40
65
|
from nucliadb_protos.writer_pb2 import BrokerMessage
|
41
66
|
|
42
|
-
from nucliadb.ingest.orm.utils import set_title
|
43
|
-
from nucliadb.ingest.processing import ProcessingInfo, PushPayload
|
44
|
-
from nucliadb_models.common import FIELD_TYPES_MAP_REVERSE
|
45
|
-
from nucliadb_models.file import FileField
|
46
|
-
from nucliadb_models.link import LinkField
|
47
|
-
from nucliadb_models.metadata import (
|
48
|
-
ParagraphAnnotation,
|
49
|
-
QuestionAnswerAnnotation,
|
50
|
-
RelationNodeTypeMap,
|
51
|
-
RelationTypeMap,
|
52
|
-
)
|
53
|
-
from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE, PushTextFormat, Text
|
54
|
-
from nucliadb_models.writer import (
|
55
|
-
GENERIC_MIME_TYPE,
|
56
|
-
ComingResourcePayload,
|
57
|
-
CreateResourcePayload,
|
58
|
-
UpdateResourcePayload,
|
59
|
-
)
|
60
|
-
|
61
67
|
|
62
|
-
def parse_basic_modify(
|
63
|
-
bm: BrokerMessage, item: ComingResourcePayload, toprocess: PushPayload
|
64
|
-
):
|
68
|
+
def parse_basic_modify(bm: BrokerMessage, item: ComingResourcePayload, toprocess: PushPayload):
|
65
69
|
bm.basic.modified.FromDatetime(datetime.now())
|
66
70
|
if item.title:
|
67
71
|
set_title(bm, toprocess, item.title)
|
@@ -83,14 +87,9 @@ def parse_basic_modify(
|
|
83
87
|
bm.basic.metadata.useful = True
|
84
88
|
bm.basic.metadata.status = Metadata.Status.PENDING
|
85
89
|
|
86
|
-
toprocess.genericfield["summary"] = Text(
|
87
|
-
body=item.summary, format=PushTextFormat.PLAIN
|
88
|
-
)
|
90
|
+
toprocess.genericfield["summary"] = Text(body=item.summary, format=PushTextFormat.PLAIN)
|
89
91
|
if item.thumbnail:
|
90
92
|
bm.basic.thumbnail = item.thumbnail
|
91
|
-
if item.layout:
|
92
|
-
bm.basic.layout = item.layout
|
93
|
-
|
94
93
|
if item.metadata is not None:
|
95
94
|
bm.basic.metadata.metadata.update(item.metadata.metadata)
|
96
95
|
if item.metadata.language:
|
@@ -147,9 +146,8 @@ def parse_basic_modify(
|
|
147
146
|
userfieldmetadata.question_answers.append(qa_annotation_pb)
|
148
147
|
|
149
148
|
userfieldmetadata.field.field = fieldmetadata.field.field
|
150
|
-
|
151
|
-
|
152
|
-
]
|
149
|
+
|
150
|
+
userfieldmetadata.field.field_type = to_proto.field_type(fieldmetadata.field.field_type)
|
153
151
|
|
154
152
|
bm.basic.fieldmetadata.append(userfieldmetadata)
|
155
153
|
|
@@ -167,9 +165,7 @@ def parse_basic_modify(
|
|
167
165
|
]
|
168
166
|
)
|
169
167
|
|
170
|
-
relation_node_resource = RelationNode(
|
171
|
-
value=bm.uuid, ntype=RelationNode.NodeType.RESOURCE
|
172
|
-
)
|
168
|
+
relation_node_resource = RelationNode(value=bm.uuid, ntype=RelationNode.NodeType.RESOURCE)
|
173
169
|
relations = []
|
174
170
|
for relation in item.usermetadata.relations:
|
175
171
|
if relation.from_ is None:
|
@@ -205,8 +201,16 @@ def parse_basic_modify(
|
|
205
201
|
unique_groups = list(set(item.security.access_groups))
|
206
202
|
bm.security.access_groups.extend(unique_groups)
|
207
203
|
|
204
|
+
if item.hidden is not None:
|
205
|
+
bm.basic.hidden = item.hidden
|
208
206
|
|
209
|
-
|
207
|
+
|
208
|
+
def parse_basic_creation(
|
209
|
+
bm: BrokerMessage,
|
210
|
+
item: CreateResourcePayload,
|
211
|
+
toprocess: PushPayload,
|
212
|
+
kb_config: Optional[KnowledgeBoxConfig],
|
213
|
+
):
|
210
214
|
bm.basic.created.FromDatetime(datetime.now())
|
211
215
|
|
212
216
|
if item.title is None:
|
@@ -215,6 +219,10 @@ def parse_basic(bm: BrokerMessage, item: CreateResourcePayload, toprocess: PushP
|
|
215
219
|
|
216
220
|
parse_basic_modify(bm, item, toprocess)
|
217
221
|
|
222
|
+
if item.hidden is None:
|
223
|
+
if kb_config and kb_config.hidden_resources_hide_on_creation:
|
224
|
+
bm.basic.hidden = True
|
225
|
+
|
218
226
|
|
219
227
|
def set_status(basic: Basic, item: CreateResourcePayload):
|
220
228
|
basic.metadata.status = Metadata.Status.PENDING
|
@@ -224,34 +232,14 @@ def set_status_modify(basic: Basic, item: UpdateResourcePayload):
|
|
224
232
|
basic.metadata.status = Metadata.Status.PENDING
|
225
233
|
|
226
234
|
|
227
|
-
def set_processing_info(bm: BrokerMessage, processing_info: ProcessingInfo):
|
228
|
-
"""
|
229
|
-
Processing V2 does not have this awkward processing info data field and storage
|
230
|
-
but keeping for b/w compatibility.
|
231
|
-
|
232
|
-
Once V1 is removed, this code can be removed because status checking will be done
|
233
|
-
in a separate API that is not part of NucliaDB.
|
234
|
-
"""
|
235
|
-
if processing_info.seqid is not None:
|
236
|
-
bm.basic.last_seqid = processing_info.seqid
|
237
|
-
if processing_info.account_seq is not None:
|
238
|
-
bm.basic.last_account_seq = processing_info.account_seq
|
239
|
-
if processing_info.queue is not None:
|
240
|
-
bm.basic.queue = bm.basic.QueueType.Value(processing_info.queue.name)
|
241
|
-
|
242
|
-
|
243
235
|
def validate_classifications(paragraph: ParagraphAnnotation):
|
244
236
|
classifications = paragraph.classifications
|
245
237
|
if len(classifications) == 0:
|
246
|
-
raise HTTPException(
|
247
|
-
status_code=422, detail="ensure classifications has at least 1 items"
|
248
|
-
)
|
238
|
+
raise HTTPException(status_code=422, detail="ensure classifications has at least 1 items")
|
249
239
|
|
250
|
-
unique_classifications = {tuple(cf.
|
240
|
+
unique_classifications = {tuple(cf.model_dump().values()) for cf in classifications}
|
251
241
|
if len(unique_classifications) != len(classifications):
|
252
|
-
raise HTTPException(
|
253
|
-
status_code=422, detail="Paragraph classifications need to be unique"
|
254
|
-
)
|
242
|
+
raise HTTPException(status_code=422, detail="Paragraph classifications need to be unique")
|
255
243
|
|
256
244
|
|
257
245
|
def compute_title(item: CreateResourcePayload, rid: str) -> str:
|
@@ -289,9 +277,7 @@ def build_question_answer_annotation_pb(
|
|
289
277
|
pb.cancelled_by_user = qa_annotation.cancelled_by_user
|
290
278
|
pb.question_answer.question.text = qa_annotation.question_answer.question.text
|
291
279
|
if qa_annotation.question_answer.question.language is not None:
|
292
|
-
pb.question_answer.question.language =
|
293
|
-
qa_annotation.question_answer.question.language
|
294
|
-
)
|
280
|
+
pb.question_answer.question.language = qa_annotation.question_answer.question.language
|
295
281
|
pb.question_answer.question.ids_paragraphs.extend(
|
296
282
|
qa_annotation.question_answer.question.ids_paragraphs
|
297
283
|
)
|