nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -25,17 +25,6 @@ from uuid import uuid4
|
|
25
25
|
|
26
26
|
from fastapi import HTTPException, Query, Response
|
27
27
|
from fastapi_versioning import version
|
28
|
-
from grpc import StatusCode as GrpcStatusCode
|
29
|
-
from grpc.aio import AioRpcError
|
30
|
-
from nucliadb_protos.resources_pb2 import Metadata
|
31
|
-
from nucliadb_protos.writer_pb2 import (
|
32
|
-
BrokerMessage,
|
33
|
-
IndexResource,
|
34
|
-
ResourceFieldExistsResponse,
|
35
|
-
ResourceFieldId,
|
36
|
-
ResourceIdRequest,
|
37
|
-
ResourceIdResponse,
|
38
|
-
)
|
39
28
|
from starlette.requests import Request
|
40
29
|
|
41
30
|
from nucliadb.common import datamanagers
|
@@ -46,7 +35,8 @@ from nucliadb.common.maindb.utils import get_driver
|
|
46
35
|
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
47
36
|
from nucliadb.ingest.processing import ProcessingInfo, PushPayload, Source
|
48
37
|
from nucliadb.writer import SERVICE_NAME, logger
|
49
|
-
from nucliadb.writer.api.constants import SKIP_STORE_DEFAULT,
|
38
|
+
from nucliadb.writer.api.constants import SKIP_STORE_DEFAULT, X_NUCLIADB_USER
|
39
|
+
from nucliadb.writer.api.v1 import transaction
|
50
40
|
from nucliadb.writer.api.v1.router import (
|
51
41
|
KB_PREFIX,
|
52
42
|
RESOURCE_PREFIX,
|
@@ -54,24 +44,17 @@ from nucliadb.writer.api.v1.router import (
|
|
54
44
|
RSLUG_PREFIX,
|
55
45
|
api,
|
56
46
|
)
|
47
|
+
from nucliadb.writer.api.v1.slug import ensure_slug_uniqueness, noop_context_manager
|
57
48
|
from nucliadb.writer.back_pressure import maybe_back_pressure
|
58
|
-
from nucliadb.writer.exceptions import IngestNotAvailable
|
59
49
|
from nucliadb.writer.resource.audit import parse_audit
|
60
50
|
from nucliadb.writer.resource.basic import (
|
61
|
-
|
51
|
+
parse_basic_creation,
|
62
52
|
parse_basic_modify,
|
63
|
-
set_processing_info,
|
64
53
|
set_status,
|
65
54
|
set_status_modify,
|
66
55
|
)
|
67
56
|
from nucliadb.writer.resource.field import extract_fields, parse_fields
|
68
57
|
from nucliadb.writer.resource.origin import parse_extra, parse_origin
|
69
|
-
from nucliadb.writer.resource.slug import resource_slug_exists
|
70
|
-
from nucliadb.writer.resource.vectors import (
|
71
|
-
create_vectorset,
|
72
|
-
get_vectorsets,
|
73
|
-
parse_vectors,
|
74
|
-
)
|
75
58
|
from nucliadb.writer.utilities import get_processing
|
76
59
|
from nucliadb_models.resource import NucliaDBRoles
|
77
60
|
from nucliadb_models.writer import (
|
@@ -80,6 +63,8 @@ from nucliadb_models.writer import (
|
|
80
63
|
ResourceUpdated,
|
81
64
|
UpdateResourcePayload,
|
82
65
|
)
|
66
|
+
from nucliadb_protos.resources_pb2 import Metadata
|
67
|
+
from nucliadb_protos.writer_pb2 import BrokerMessage, IndexResource
|
83
68
|
from nucliadb_telemetry.errors import capture_exception
|
84
69
|
from nucliadb_utils.authentication import requires
|
85
70
|
from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError
|
@@ -87,7 +72,6 @@ from nucliadb_utils.utilities import (
|
|
87
72
|
get_ingest,
|
88
73
|
get_partitioning,
|
89
74
|
get_storage,
|
90
|
-
get_transaction_utility,
|
91
75
|
)
|
92
76
|
|
93
77
|
|
@@ -107,11 +91,17 @@ async def create_resource(
|
|
107
91
|
item: CreateResourcePayload,
|
108
92
|
kbid: str,
|
109
93
|
x_skip_store: bool = SKIP_STORE_DEFAULT,
|
110
|
-
|
94
|
+
x_nucliadb_user: str = X_NUCLIADB_USER,
|
111
95
|
):
|
96
|
+
kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
|
97
|
+
if item.hidden and not (kb_config and kb_config.hidden_resources_enabled):
|
98
|
+
raise HTTPException(
|
99
|
+
status_code=422,
|
100
|
+
detail="Cannot hide a resource: the KB does not have hidden resources enabled",
|
101
|
+
)
|
102
|
+
|
112
103
|
await maybe_back_pressure(request, kbid)
|
113
104
|
|
114
|
-
transaction = get_transaction_utility()
|
115
105
|
partitioning = get_partitioning()
|
116
106
|
|
117
107
|
# Create resource message
|
@@ -123,7 +113,7 @@ async def create_resource(
|
|
123
113
|
uuid=uuid,
|
124
114
|
kbid=kbid,
|
125
115
|
partition=partition,
|
126
|
-
userid=
|
116
|
+
userid=x_nucliadb_user,
|
127
117
|
processing_options=item.processing_options,
|
128
118
|
)
|
129
119
|
|
@@ -134,75 +124,52 @@ async def create_resource(
|
|
134
124
|
toprocess.source = Source.HTTP
|
135
125
|
toprocess.title = item.title
|
136
126
|
|
127
|
+
unique_slug_context_manager = noop_context_manager()
|
137
128
|
if item.slug:
|
138
|
-
|
139
|
-
raise HTTPException(
|
140
|
-
status_code=409, detail=f"Resource slug {item.slug} already exists"
|
141
|
-
)
|
129
|
+
unique_slug_context_manager = ensure_slug_uniqueness(kbid, item.slug)
|
142
130
|
writer.slug = item.slug
|
143
131
|
toprocess.slug = item.slug
|
144
132
|
|
145
|
-
|
146
|
-
|
133
|
+
async with unique_slug_context_manager:
|
134
|
+
parse_audit(writer.audit, request)
|
135
|
+
parse_basic_creation(writer, item, toprocess, kb_config)
|
136
|
+
|
137
|
+
if item.origin is not None:
|
138
|
+
parse_origin(writer.origin, item.origin)
|
139
|
+
if item.extra is not None:
|
140
|
+
parse_extra(writer.extra, item.extra)
|
141
|
+
|
142
|
+
await parse_fields(
|
143
|
+
writer=writer,
|
144
|
+
item=item,
|
145
|
+
toprocess=toprocess,
|
146
|
+
kbid=kbid,
|
147
|
+
uuid=uuid,
|
148
|
+
x_skip_store=x_skip_store,
|
149
|
+
)
|
147
150
|
|
148
|
-
|
149
|
-
parse_origin(writer.origin, item.origin)
|
150
|
-
if item.extra is not None:
|
151
|
-
parse_extra(writer.extra, item.extra)
|
151
|
+
set_status(writer.basic, item)
|
152
152
|
|
153
|
-
|
154
|
-
writer=writer,
|
155
|
-
item=item,
|
156
|
-
toprocess=toprocess,
|
157
|
-
kbid=kbid,
|
158
|
-
uuid=uuid,
|
159
|
-
x_skip_store=x_skip_store,
|
160
|
-
)
|
153
|
+
writer.source = BrokerMessage.MessageSource.WRITER
|
161
154
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
155
|
+
if item.wait_for_commit:
|
156
|
+
t0 = time()
|
157
|
+
await transaction.commit(writer, partition, wait=item.wait_for_commit)
|
158
|
+
|
159
|
+
if item.wait_for_commit:
|
160
|
+
txn_time = time() - t0
|
166
161
|
else:
|
167
|
-
|
168
|
-
if vector.vectors is not None:
|
169
|
-
for vectorset, uservector in vector.vectors.items():
|
170
|
-
if len(uservector) == 0:
|
171
|
-
raise HTTPException(
|
172
|
-
status_code=412,
|
173
|
-
detail=str("Vectorset without vector not allowed"),
|
174
|
-
)
|
175
|
-
first_vector = list(uservector.values())[0]
|
176
|
-
await create_vectorset(
|
177
|
-
kbid, vectorset, len(first_vector.vector)
|
178
|
-
)
|
179
|
-
vectorsets = await get_vectorsets(kbid)
|
180
|
-
if vectorsets is None or len(vectorsets.vectorsets) == 0:
|
181
|
-
raise HTTPException(
|
182
|
-
status_code=412,
|
183
|
-
detail=str("Vectorset was not able to be created"),
|
184
|
-
)
|
185
|
-
parse_vectors(writer, item.uservectors, vectorsets)
|
186
|
-
|
187
|
-
set_status(writer.basic, item)
|
188
|
-
|
189
|
-
seqid = await maybe_send_to_process(writer, toprocess, partition)
|
162
|
+
txn_time = None
|
190
163
|
|
191
|
-
|
192
|
-
if x_synchronous:
|
193
|
-
t0 = time()
|
194
|
-
await transaction.commit(writer, partition, wait=x_synchronous)
|
164
|
+
seqid = await maybe_send_to_process(toprocess, partition)
|
195
165
|
|
196
|
-
|
197
|
-
return ResourceCreated(seqid=seqid, uuid=uuid, elapsed=time() - t0)
|
198
|
-
else:
|
199
|
-
return ResourceCreated(seqid=seqid, uuid=uuid)
|
166
|
+
return ResourceCreated(seqid=seqid, uuid=uuid, elapsed=txn_time)
|
200
167
|
|
201
168
|
|
202
169
|
@api.patch(
|
203
170
|
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}",
|
204
171
|
status_code=200,
|
205
|
-
|
172
|
+
summary="Modify Resource (by slug)",
|
206
173
|
response_model=ResourceUpdated,
|
207
174
|
tags=["Resources"],
|
208
175
|
)
|
@@ -214,16 +181,15 @@ async def modify_resource_rslug_prefix(
|
|
214
181
|
rslug: str,
|
215
182
|
item: UpdateResourcePayload,
|
216
183
|
x_skip_store: bool = SKIP_STORE_DEFAULT,
|
217
|
-
x_synchronous: bool = SYNC_CALL,
|
218
184
|
x_nucliadb_user: str = X_NUCLIADB_USER,
|
219
185
|
):
|
186
|
+
rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
|
220
187
|
return await modify_resource_endpoint(
|
221
188
|
request,
|
222
189
|
item,
|
223
190
|
kbid,
|
224
|
-
|
191
|
+
rid,
|
225
192
|
x_skip_store=x_skip_store,
|
226
|
-
x_synchronous=x_synchronous,
|
227
193
|
x_nucliadb_user=x_nucliadb_user,
|
228
194
|
)
|
229
195
|
|
@@ -231,7 +197,7 @@ async def modify_resource_rslug_prefix(
|
|
231
197
|
@api.patch(
|
232
198
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}",
|
233
199
|
status_code=200,
|
234
|
-
|
200
|
+
summary="Modify Resource (by id)",
|
235
201
|
response_model=ResourceUpdated,
|
236
202
|
tags=["Resources"],
|
237
203
|
)
|
@@ -243,16 +209,14 @@ async def modify_resource_rid_prefix(
|
|
243
209
|
rid: str,
|
244
210
|
item: UpdateResourcePayload,
|
245
211
|
x_skip_store: bool = SKIP_STORE_DEFAULT,
|
246
|
-
x_synchronous: bool = SYNC_CALL,
|
247
212
|
x_nucliadb_user: str = X_NUCLIADB_USER,
|
248
213
|
):
|
249
214
|
return await modify_resource_endpoint(
|
250
215
|
request,
|
251
216
|
item,
|
252
217
|
kbid,
|
253
|
-
|
218
|
+
rid,
|
254
219
|
x_skip_store=x_skip_store,
|
255
|
-
x_synchronous=x_synchronous,
|
256
220
|
x_nucliadb_user=x_nucliadb_user,
|
257
221
|
)
|
258
222
|
|
@@ -261,15 +225,13 @@ async def modify_resource_endpoint(
|
|
261
225
|
request: Request,
|
262
226
|
item: UpdateResourcePayload,
|
263
227
|
kbid: str,
|
228
|
+
rid: str,
|
264
229
|
x_skip_store: bool,
|
265
|
-
x_synchronous: bool,
|
266
230
|
x_nucliadb_user: str,
|
267
|
-
path_rid: Optional[str] = None,
|
268
|
-
path_rslug: Optional[str] = None,
|
269
231
|
):
|
270
|
-
|
232
|
+
await validate_rid_exists_or_raise_error(kbid, rid)
|
271
233
|
|
272
|
-
await maybe_back_pressure(request, kbid, resource_uuid=
|
234
|
+
await maybe_back_pressure(request, kbid, resource_uuid=rid)
|
273
235
|
|
274
236
|
if item.slug is None:
|
275
237
|
return await modify_resource(
|
@@ -277,22 +239,18 @@ async def modify_resource_endpoint(
|
|
277
239
|
item,
|
278
240
|
kbid,
|
279
241
|
x_skip_store=x_skip_store,
|
280
|
-
x_synchronous=x_synchronous,
|
281
242
|
x_nucliadb_user=x_nucliadb_user,
|
282
|
-
rid=
|
243
|
+
rid=rid,
|
283
244
|
)
|
284
245
|
|
285
|
-
async with safe_update_resource_slug(
|
286
|
-
request, kbid, rid=resource_uuid, new_slug=item.slug
|
287
|
-
):
|
246
|
+
async with safe_update_resource_slug(request, kbid, rid=rid, new_slug=item.slug):
|
288
247
|
return await modify_resource(
|
289
248
|
request,
|
290
249
|
item,
|
291
250
|
kbid,
|
292
251
|
x_skip_store=x_skip_store,
|
293
|
-
x_synchronous=x_synchronous,
|
294
252
|
x_nucliadb_user=x_nucliadb_user,
|
295
|
-
rid=
|
253
|
+
rid=rid,
|
296
254
|
)
|
297
255
|
|
298
256
|
|
@@ -301,12 +259,17 @@ async def modify_resource(
|
|
301
259
|
item: UpdateResourcePayload,
|
302
260
|
kbid: str,
|
303
261
|
x_skip_store: bool,
|
304
|
-
x_synchronous: bool,
|
305
262
|
x_nucliadb_user: str,
|
306
263
|
*,
|
307
264
|
rid: str,
|
308
265
|
):
|
309
|
-
|
266
|
+
kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
|
267
|
+
if item.hidden and not (kb_config and kb_config.hidden_resources_enabled):
|
268
|
+
raise HTTPException(
|
269
|
+
status_code=422,
|
270
|
+
detail="Cannot hide a resource: the KB does not have hidden resources enabled",
|
271
|
+
)
|
272
|
+
|
310
273
|
partitioning = get_partitioning()
|
311
274
|
|
312
275
|
partition = partitioning.generate_partition(kbid, rid)
|
@@ -341,23 +304,16 @@ async def modify_resource(
|
|
341
304
|
uuid=rid,
|
342
305
|
x_skip_store=x_skip_store,
|
343
306
|
)
|
344
|
-
if item.uservectors:
|
345
|
-
vectorsets = await get_vectorsets(kbid)
|
346
|
-
if vectorsets:
|
347
|
-
parse_vectors(writer, item.uservectors, vectorsets)
|
348
|
-
else:
|
349
|
-
raise HTTPException(status_code=412, detail=str("No vectorsets found"))
|
350
|
-
|
351
307
|
set_status_modify(writer.basic, item)
|
352
308
|
|
353
309
|
toprocess.title = writer.basic.title
|
354
|
-
seqid = await maybe_send_to_process(writer, toprocess, partition)
|
355
310
|
|
356
311
|
writer.source = BrokerMessage.MessageSource.WRITER
|
357
312
|
|
358
313
|
maybe_mark_reindex(writer, item)
|
359
314
|
|
360
|
-
await transaction.commit(writer, partition
|
315
|
+
await transaction.commit(writer, partition)
|
316
|
+
seqid = await maybe_send_to_process(toprocess, partition)
|
361
317
|
|
362
318
|
return ResourceUpdated(seqid=seqid)
|
363
319
|
|
@@ -397,9 +353,7 @@ async def update_resource_slug(
|
|
397
353
|
new_slug: str,
|
398
354
|
):
|
399
355
|
async with driver.transaction() as txn:
|
400
|
-
old_slug = await datamanagers.resources.modify_slug(
|
401
|
-
txn, kbid=kbid, rid=rid, new_slug=new_slug
|
402
|
-
)
|
356
|
+
old_slug = await datamanagers.resources.modify_slug(txn, kbid=kbid, rid=rid, new_slug=new_slug)
|
403
357
|
await txn.commit()
|
404
358
|
return old_slug
|
405
359
|
|
@@ -407,7 +361,7 @@ async def update_resource_slug(
|
|
407
361
|
@api.post(
|
408
362
|
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/reprocess",
|
409
363
|
status_code=202,
|
410
|
-
|
364
|
+
summary="Reprocess resource (by slug)",
|
411
365
|
response_model=ResourceUpdated,
|
412
366
|
tags=["Resources"],
|
413
367
|
)
|
@@ -419,15 +373,14 @@ async def reprocess_resource_rslug_prefix(
|
|
419
373
|
rslug: str,
|
420
374
|
x_nucliadb_user: str = X_NUCLIADB_USER,
|
421
375
|
):
|
422
|
-
|
423
|
-
|
424
|
-
)
|
376
|
+
rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
|
377
|
+
return await _reprocess_resource(request, kbid, rid, x_nucliadb_user=x_nucliadb_user)
|
425
378
|
|
426
379
|
|
427
380
|
@api.post(
|
428
381
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/reprocess",
|
429
382
|
status_code=202,
|
430
|
-
|
383
|
+
summary="Reprocess resource (by id)",
|
431
384
|
response_model=ResourceUpdated,
|
432
385
|
tags=["Resources"],
|
433
386
|
)
|
@@ -439,25 +392,20 @@ async def reprocess_resource_rid_prefix(
|
|
439
392
|
rid: str,
|
440
393
|
x_nucliadb_user: str = X_NUCLIADB_USER,
|
441
394
|
):
|
442
|
-
return await _reprocess_resource(
|
443
|
-
request, kbid, rid=rid, x_nucliadb_user=x_nucliadb_user
|
444
|
-
)
|
395
|
+
return await _reprocess_resource(request, kbid, rid, x_nucliadb_user=x_nucliadb_user)
|
445
396
|
|
446
397
|
|
447
398
|
async def _reprocess_resource(
|
448
399
|
request: Request,
|
449
400
|
kbid: str,
|
401
|
+
rid: str,
|
450
402
|
x_nucliadb_user: str,
|
451
|
-
rid: Optional[str] = None,
|
452
|
-
rslug: Optional[str] = None,
|
453
403
|
):
|
454
|
-
|
455
|
-
partitioning = get_partitioning()
|
456
|
-
|
457
|
-
rid = await get_rid_from_params_or_raise_error(kbid, rid, rslug)
|
458
|
-
|
404
|
+
await validate_rid_exists_or_raise_error(kbid, rid)
|
459
405
|
await maybe_back_pressure(request, kbid, resource_uuid=rid)
|
460
406
|
|
407
|
+
partitioning = get_partitioning()
|
408
|
+
|
461
409
|
partition = partitioning.generate_partition(kbid, rid)
|
462
410
|
|
463
411
|
toprocess = PushPayload(
|
@@ -483,16 +431,14 @@ async def _reprocess_resource(
|
|
483
431
|
|
484
432
|
await extract_fields(resource=resource, toprocess=toprocess)
|
485
433
|
|
486
|
-
processing_info = await send_to_process(toprocess, partition)
|
487
|
-
|
488
434
|
writer = BrokerMessage()
|
489
435
|
writer.kbid = kbid
|
490
436
|
writer.uuid = rid
|
491
437
|
writer.source = BrokerMessage.MessageSource.WRITER
|
492
438
|
writer.basic.metadata.useful = True
|
493
439
|
writer.basic.metadata.status = Metadata.Status.PENDING
|
494
|
-
set_processing_info(writer, processing_info)
|
495
440
|
await transaction.commit(writer, partition, wait=False)
|
441
|
+
processing_info = await send_to_process(toprocess, partition)
|
496
442
|
|
497
443
|
return ResourceUpdated(seqid=processing_info.seqid)
|
498
444
|
|
@@ -500,7 +446,7 @@ async def _reprocess_resource(
|
|
500
446
|
@api.delete(
|
501
447
|
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}",
|
502
448
|
status_code=204,
|
503
|
-
|
449
|
+
summary="Delete Resource (by slug)",
|
504
450
|
tags=["Resources"],
|
505
451
|
)
|
506
452
|
@requires(NucliaDBRoles.WRITER)
|
@@ -509,17 +455,15 @@ async def delete_resource_rslug_prefix(
|
|
509
455
|
request: Request,
|
510
456
|
kbid: str,
|
511
457
|
rslug: str,
|
512
|
-
x_synchronous: bool = SYNC_CALL,
|
513
458
|
):
|
514
|
-
|
515
|
-
|
516
|
-
)
|
459
|
+
rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
|
460
|
+
return await _delete_resource(request, kbid, rid)
|
517
461
|
|
518
462
|
|
519
463
|
@api.delete(
|
520
464
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}",
|
521
465
|
status_code=204,
|
522
|
-
|
466
|
+
summary="Delete Resource (by id)",
|
523
467
|
tags=["Resources"],
|
524
468
|
)
|
525
469
|
@requires(NucliaDBRoles.WRITER)
|
@@ -528,22 +472,18 @@ async def delete_resource_rid_prefix(
|
|
528
472
|
request: Request,
|
529
473
|
kbid: str,
|
530
474
|
rid: str,
|
531
|
-
x_synchronous: bool = SYNC_CALL,
|
532
475
|
):
|
533
|
-
return await _delete_resource(request, kbid, rid
|
476
|
+
return await _delete_resource(request, kbid, rid)
|
534
477
|
|
535
478
|
|
536
479
|
async def _delete_resource(
|
537
480
|
request: Request,
|
538
481
|
kbid: str,
|
539
|
-
|
540
|
-
rid: Optional[str] = None,
|
541
|
-
rslug: Optional[str] = None,
|
482
|
+
rid: str,
|
542
483
|
):
|
543
|
-
|
544
|
-
partitioning = get_partitioning()
|
484
|
+
await validate_rid_exists_or_raise_error(kbid, rid)
|
545
485
|
|
546
|
-
|
486
|
+
partitioning = get_partitioning()
|
547
487
|
|
548
488
|
partition = partitioning.generate_partition(kbid, rid)
|
549
489
|
writer = BrokerMessage()
|
@@ -553,10 +493,7 @@ async def _delete_resource(
|
|
553
493
|
writer.type = BrokerMessage.MessageType.DELETE
|
554
494
|
|
555
495
|
parse_audit(writer.audit, request)
|
556
|
-
|
557
|
-
# Create processing message
|
558
|
-
await transaction.commit(writer, partition, wait=x_synchronous)
|
559
|
-
|
496
|
+
await transaction.commit(writer, partition)
|
560
497
|
processing = get_processing()
|
561
498
|
asyncio.create_task(processing.delete_from_processing(kbid=kbid, resource_id=rid))
|
562
499
|
|
@@ -566,7 +503,7 @@ async def _delete_resource(
|
|
566
503
|
@api.post(
|
567
504
|
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/reindex",
|
568
505
|
status_code=204,
|
569
|
-
|
506
|
+
summary="Reindex Resource (by slug)",
|
570
507
|
tags=["Resources"],
|
571
508
|
)
|
572
509
|
@requires(NucliaDBRoles.WRITER)
|
@@ -577,15 +514,14 @@ async def reindex_resource_rslug_prefix(
|
|
577
514
|
rslug: str,
|
578
515
|
reindex_vectors: bool = Query(False),
|
579
516
|
):
|
580
|
-
|
581
|
-
|
582
|
-
)
|
517
|
+
rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
|
518
|
+
return await _reindex_resource(request, kbid, rid, reindex_vectors=reindex_vectors)
|
583
519
|
|
584
520
|
|
585
521
|
@api.post(
|
586
522
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/reindex",
|
587
523
|
status_code=204,
|
588
|
-
|
524
|
+
summary="Reindex Resource (by id)",
|
589
525
|
tags=["Resources"],
|
590
526
|
)
|
591
527
|
@requires(NucliaDBRoles.WRITER)
|
@@ -596,20 +532,16 @@ async def reindex_resource_rid_prefix(
|
|
596
532
|
rid: str,
|
597
533
|
reindex_vectors: bool = Query(False),
|
598
534
|
):
|
599
|
-
return await _reindex_resource(
|
600
|
-
request, kbid, rid=rid, reindex_vectors=reindex_vectors
|
601
|
-
)
|
535
|
+
return await _reindex_resource(request, kbid, rid, reindex_vectors=reindex_vectors)
|
602
536
|
|
603
537
|
|
604
538
|
async def _reindex_resource(
|
605
539
|
request: Request,
|
606
540
|
kbid: str,
|
541
|
+
rid: str,
|
607
542
|
reindex_vectors: bool,
|
608
|
-
rid: Optional[str] = None,
|
609
|
-
rslug: Optional[str] = None,
|
610
543
|
):
|
611
|
-
|
612
|
-
|
544
|
+
await validate_rid_exists_or_raise_error(kbid, rid)
|
613
545
|
await maybe_back_pressure(request, kbid, resource_uuid=rid)
|
614
546
|
|
615
547
|
ingest = get_ingest()
|
@@ -622,55 +554,18 @@ async def _reindex_resource(
|
|
622
554
|
return Response(status_code=200)
|
623
555
|
|
624
556
|
|
625
|
-
async def
|
626
|
-
|
627
|
-
pbrequest = ResourceIdRequest()
|
628
|
-
pbrequest.kbid = kbid
|
629
|
-
pbrequest.slug = slug
|
630
|
-
try:
|
631
|
-
response: ResourceIdResponse = await ingest.GetResourceId(pbrequest) # type: ignore
|
632
|
-
except AioRpcError as exc:
|
633
|
-
if exc.code() is GrpcStatusCode.UNAVAILABLE:
|
634
|
-
raise IngestNotAvailable()
|
635
|
-
else:
|
636
|
-
raise exc
|
637
|
-
return response.uuid
|
638
|
-
|
639
|
-
|
640
|
-
async def get_rid_from_params_or_raise_error(
|
641
|
-
kbid: str,
|
642
|
-
rid: Optional[str] = None,
|
643
|
-
slug: Optional[str] = None,
|
644
|
-
) -> str:
|
645
|
-
if rid is not None:
|
646
|
-
ingest = get_ingest()
|
647
|
-
pbrequest = ResourceFieldId()
|
648
|
-
pbrequest.kbid = kbid
|
649
|
-
pbrequest.rid = rid
|
650
|
-
|
651
|
-
try:
|
652
|
-
response: ResourceFieldExistsResponse = await ingest.ResourceFieldExists(pbrequest) # type: ignore
|
653
|
-
except AioRpcError as exc:
|
654
|
-
if exc.code() is GrpcStatusCode.UNAVAILABLE:
|
655
|
-
raise IngestNotAvailable()
|
656
|
-
else:
|
657
|
-
raise exc
|
658
|
-
|
659
|
-
if response.found:
|
660
|
-
return rid
|
661
|
-
else:
|
662
|
-
raise HTTPException(status_code=404, detail="Resource does not exist")
|
663
|
-
|
664
|
-
if slug is None:
|
665
|
-
raise ValueError("Either rid or slug must be set")
|
666
|
-
|
667
|
-
rid = await get_resource_uuid_from_slug(kbid, slug)
|
557
|
+
async def get_rid_from_slug_or_raise_error(kbid: str, rslug: str) -> str:
|
558
|
+
rid = await datamanagers.atomic.resources.get_resource_uuid_from_slug(kbid=kbid, slug=rslug)
|
668
559
|
if not rid:
|
669
560
|
raise HTTPException(status_code=404, detail="Resource does not exist")
|
670
|
-
|
671
561
|
return rid
|
672
562
|
|
673
563
|
|
564
|
+
async def validate_rid_exists_or_raise_error(kbid: str, rid: str):
|
565
|
+
if not (await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=rid)):
|
566
|
+
raise HTTPException(status_code=404, detail="Resource does not exist")
|
567
|
+
|
568
|
+
|
674
569
|
def maybe_mark_reindex(message: BrokerMessage, item: UpdateResourcePayload):
|
675
570
|
if needs_resource_reindex(item):
|
676
571
|
message.reindex = True
|
@@ -681,24 +576,25 @@ def needs_resource_reindex(item: UpdateResourcePayload) -> bool:
|
|
681
576
|
# a resource and that means this message should force reindexing everything.
|
682
577
|
# XXX This is not ideal. Long term, we should handle it differently
|
683
578
|
# so this is not required
|
684
|
-
return
|
685
|
-
item.
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
579
|
+
return (
|
580
|
+
item.usermetadata is not None
|
581
|
+
or item.hidden is not None
|
582
|
+
or (
|
583
|
+
item.origin is not None
|
584
|
+
and (
|
585
|
+
item.origin.created is not None
|
586
|
+
or item.origin.modified is not None
|
587
|
+
or item.origin.metadata is not None
|
588
|
+
)
|
690
589
|
)
|
691
590
|
)
|
692
591
|
|
693
592
|
|
694
|
-
async def maybe_send_to_process(
|
695
|
-
writer: BrokerMessage, toprocess: PushPayload, partition
|
696
|
-
) -> Optional[int]:
|
593
|
+
async def maybe_send_to_process(toprocess: PushPayload, partition) -> Optional[int]:
|
697
594
|
if not needs_reprocess(toprocess):
|
698
595
|
return None
|
699
596
|
|
700
597
|
processing_info = await send_to_process(toprocess, partition)
|
701
|
-
set_processing_info(writer, processing_info)
|
702
598
|
return processing_info.seqid
|
703
599
|
|
704
600
|
|
@@ -710,7 +606,10 @@ async def send_to_process(toprocess: PushPayload, partition) -> ProcessingInfo:
|
|
710
606
|
except LimitsExceededError as exc:
|
711
607
|
raise HTTPException(status_code=exc.status_code, detail=exc.detail)
|
712
608
|
except SendToProcessError:
|
713
|
-
raise HTTPException(
|
609
|
+
raise HTTPException(
|
610
|
+
status_code=500,
|
611
|
+
detail="Error while sending to process. Try calling /reprocess",
|
612
|
+
)
|
714
613
|
|
715
614
|
|
716
615
|
def needs_reprocess(processing_payload: PushPayload) -> bool:
|
@@ -724,7 +623,6 @@ def needs_reprocess(processing_payload: PushPayload) -> bool:
|
|
724
623
|
"filefield",
|
725
624
|
"linkfield",
|
726
625
|
"textfield",
|
727
|
-
"layoutfield",
|
728
626
|
"conversationfield",
|
729
627
|
):
|
730
628
|
if len(getattr(processing_payload, field)) > 0:
|