nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/writer/api/v1/upload.py
CHANGED
@@ -18,7 +18,6 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
import base64
|
21
|
-
import mimetypes
|
22
21
|
import pickle
|
23
22
|
import uuid
|
24
23
|
from datetime import datetime
|
@@ -30,30 +29,23 @@ from fastapi import HTTPException
|
|
30
29
|
from fastapi.params import Header
|
31
30
|
from fastapi.requests import Request
|
32
31
|
from fastapi.responses import Response
|
33
|
-
from fastapi_versioning import version
|
34
|
-
from grpc import StatusCode as GrpcStatusCode
|
35
|
-
from grpc.aio import AioRpcError
|
36
|
-
from nucliadb_protos.resources_pb2 import FieldFile
|
37
|
-
from nucliadb_protos.writer_pb2 import (
|
38
|
-
BrokerMessage,
|
39
|
-
ResourceFieldExistsResponse,
|
40
|
-
ResourceFieldId,
|
41
|
-
)
|
32
|
+
from fastapi_versioning import version
|
42
33
|
from starlette.requests import Request as StarletteRequest
|
43
34
|
|
35
|
+
from nucliadb.common import datamanagers
|
44
36
|
from nucliadb.ingest.orm.utils import set_title
|
45
37
|
from nucliadb.ingest.processing import PushPayload, Source
|
46
38
|
from nucliadb.models.responses import HTTPClientError
|
47
39
|
from nucliadb.writer import SERVICE_NAME
|
48
|
-
from nucliadb.writer.api.v1
|
49
|
-
from nucliadb.writer.
|
50
|
-
|
51
|
-
|
52
|
-
IngestNotAvailable,
|
53
|
-
ResourceNotFound,
|
40
|
+
from nucliadb.writer.api.v1 import transaction
|
41
|
+
from nucliadb.writer.api.v1.resource import (
|
42
|
+
get_rid_from_slug_or_raise_error,
|
43
|
+
validate_rid_exists_or_raise_error,
|
54
44
|
)
|
45
|
+
from nucliadb.writer.api.v1.slug import ensure_slug_uniqueness, noop_context_manager
|
46
|
+
from nucliadb.writer.back_pressure import maybe_back_pressure
|
55
47
|
from nucliadb.writer.resource.audit import parse_audit
|
56
|
-
from nucliadb.writer.resource.basic import
|
48
|
+
from nucliadb.writer.resource.basic import parse_basic_creation
|
57
49
|
from nucliadb.writer.resource.field import parse_fields
|
58
50
|
from nucliadb.writer.resource.origin import parse_extra, parse_origin
|
59
51
|
from nucliadb.writer.tus import TUSUPLOAD, UPLOAD, get_dm, get_storage_manager
|
@@ -62,23 +54,24 @@ from nucliadb.writer.tus.exceptions import (
|
|
62
54
|
HTTPConflict,
|
63
55
|
HTTPNotFound,
|
64
56
|
HTTPPreconditionFailed,
|
65
|
-
HTTPServiceUnavailable,
|
66
57
|
InvalidTUSMetadata,
|
67
58
|
ResumableURINotAvailable,
|
68
59
|
)
|
69
|
-
from nucliadb.writer.tus.storage import FileStorageManager
|
60
|
+
from nucliadb.writer.tus.storage import FileStorageManager
|
70
61
|
from nucliadb.writer.tus.utils import parse_tus_metadata
|
71
62
|
from nucliadb.writer.utilities import get_processing
|
63
|
+
from nucliadb_models import content_types
|
72
64
|
from nucliadb_models.resource import NucliaDBRoles
|
65
|
+
from nucliadb_models.utils import FieldIdString
|
73
66
|
from nucliadb_models.writer import CreateResourcePayload, ResourceFileUploaded
|
67
|
+
from nucliadb_protos.resources_pb2 import CloudFile, FieldFile, Metadata
|
68
|
+
from nucliadb_protos.writer_pb2 import BrokerMessage
|
74
69
|
from nucliadb_utils.authentication import requires_one
|
75
70
|
from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError
|
76
71
|
from nucliadb_utils.storages.storage import KB_RESOURCE_FIELD
|
77
72
|
from nucliadb_utils.utilities import (
|
78
|
-
get_ingest,
|
79
73
|
get_partitioning,
|
80
74
|
get_storage,
|
81
|
-
get_transaction_utility,
|
82
75
|
)
|
83
76
|
|
84
77
|
from .router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
|
@@ -105,14 +98,14 @@ TUS_HEADERS = {
|
|
105
98
|
@api.options(
|
106
99
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/file/{{field}}/{TUSUPLOAD}",
|
107
100
|
tags=["Resource field TUS uploads"],
|
108
|
-
|
101
|
+
summary="TUS Server information",
|
109
102
|
openapi_extra={"x-operation-order": 4},
|
110
103
|
include_in_schema=False,
|
111
104
|
)
|
112
105
|
@api.options(
|
113
106
|
f"/{KB_PREFIX}/{{kbid}}/{TUSUPLOAD}",
|
114
107
|
tags=["Knowledge Box TUS uploads"],
|
115
|
-
|
108
|
+
summary="TUS Server information",
|
116
109
|
openapi_extra={"x-operation-order": 4},
|
117
110
|
)
|
118
111
|
@version(1)
|
@@ -138,7 +131,7 @@ def _tus_options() -> Response:
|
|
138
131
|
@api.post(
|
139
132
|
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/file/{{field}}/{TUSUPLOAD}",
|
140
133
|
tags=["Resource field TUS uploads"],
|
141
|
-
|
134
|
+
summary="Create new upload on a Resource (by slug)",
|
142
135
|
openapi_extra={"x-operation-order": 1},
|
143
136
|
)
|
144
137
|
@requires_one([NucliaDBRoles.WRITER])
|
@@ -147,16 +140,17 @@ async def tus_post_rslug_prefix(
|
|
147
140
|
request: Request,
|
148
141
|
kbid: str,
|
149
142
|
rslug: str,
|
150
|
-
field:
|
143
|
+
field: FieldIdString,
|
151
144
|
item: Optional[CreateResourcePayload] = None,
|
152
145
|
) -> Response:
|
153
|
-
|
146
|
+
rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
|
147
|
+
return await _tus_post(request, kbid, item, path_rid=rid, field_id=field)
|
154
148
|
|
155
149
|
|
156
150
|
@api.post(
|
157
151
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{path_rid}}/file/{{field}}/{TUSUPLOAD}",
|
158
152
|
tags=["Resource field TUS uploads"],
|
159
|
-
|
153
|
+
summary="Create new upload on a Resource (by id)",
|
160
154
|
openapi_extra={"x-operation-order": 1},
|
161
155
|
)
|
162
156
|
@requires_one([NucliaDBRoles.WRITER])
|
@@ -165,16 +159,16 @@ async def tus_post_rid_prefix(
|
|
165
159
|
request: Request,
|
166
160
|
kbid: str,
|
167
161
|
path_rid: str,
|
168
|
-
field:
|
162
|
+
field: FieldIdString,
|
169
163
|
item: Optional[CreateResourcePayload] = None,
|
170
164
|
) -> Response:
|
171
|
-
return await _tus_post(request, kbid, item
|
165
|
+
return await _tus_post(request, kbid, item, path_rid=path_rid, field_id=field)
|
172
166
|
|
173
167
|
|
174
168
|
@api.post(
|
175
169
|
f"/{KB_PREFIX}/{{kbid}}/{TUSUPLOAD}",
|
176
170
|
tags=["Knowledge Box TUS uploads"],
|
177
|
-
|
171
|
+
summary="Create new upload on a Knowledge Box",
|
178
172
|
openapi_extra={"x-operation-order": 1},
|
179
173
|
)
|
180
174
|
@requires_one([NucliaDBRoles.WRITER])
|
@@ -184,7 +178,7 @@ async def tus_post(
|
|
184
178
|
kbid: str,
|
185
179
|
item: Optional[CreateResourcePayload] = None,
|
186
180
|
) -> Response:
|
187
|
-
return await _tus_post(request, kbid, item
|
181
|
+
return await _tus_post(request, kbid, item)
|
188
182
|
|
189
183
|
|
190
184
|
# called by one the three POST above - there are defined distinctly to produce clean API doc
|
@@ -193,23 +187,29 @@ async def _tus_post(
|
|
193
187
|
kbid: str,
|
194
188
|
item: Optional[CreateResourcePayload] = None,
|
195
189
|
path_rid: Optional[str] = None,
|
196
|
-
|
197
|
-
field: Optional[str] = None,
|
190
|
+
field_id: Optional[str] = None,
|
198
191
|
) -> Response:
|
199
192
|
"""
|
200
193
|
An empty POST request is used to create a new upload resource.
|
201
194
|
The Upload-Length header indicates the size of the entire upload in bytes.
|
202
195
|
"""
|
196
|
+
if path_rid is not None:
|
197
|
+
await validate_rid_exists_or_raise_error(kbid, path_rid)
|
198
|
+
|
199
|
+
kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
|
200
|
+
if item and item.hidden and not (kb_config and kb_config.hidden_resources_enabled):
|
201
|
+
raise HTTPException(
|
202
|
+
status_code=422,
|
203
|
+
detail="Cannot hide a resource: the KB does not have hidden resources enabled",
|
204
|
+
)
|
205
|
+
|
206
|
+
await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
|
207
|
+
|
203
208
|
dm = get_dm()
|
204
209
|
storage_manager = get_storage_manager()
|
205
210
|
|
206
|
-
if rslug is not None:
|
207
|
-
path_rid = await get_rid_from_params_or_raise_error(kbid, slug=rslug)
|
208
|
-
|
209
211
|
implies_resource_creation = path_rid is None
|
210
212
|
|
211
|
-
await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
|
212
|
-
|
213
213
|
deferred_length = False
|
214
214
|
if request.headers.get("upload-defer-length") == "1":
|
215
215
|
deferred_length = True
|
@@ -228,22 +228,11 @@ async def _tus_post(
|
|
228
228
|
try:
|
229
229
|
metadata = parse_tus_metadata(request.headers["upload-metadata"])
|
230
230
|
except InvalidTUSMetadata as exc:
|
231
|
-
raise HTTPBadRequest(
|
232
|
-
detail=f"Upload-Metadata header contains errors: {str(exc)}"
|
233
|
-
)
|
231
|
+
raise HTTPBadRequest(detail=f"Upload-Metadata header contains errors: {str(exc)}")
|
234
232
|
else:
|
235
233
|
metadata = {}
|
236
234
|
|
237
|
-
|
238
|
-
path, rid, field = await start_upload_field(
|
239
|
-
kbid, path_rid, field, metadata.get("md5")
|
240
|
-
)
|
241
|
-
except ResourceNotFound:
|
242
|
-
raise HTTPNotFound("Resource is not found or not yet available")
|
243
|
-
except ConflictError:
|
244
|
-
raise HTTPConflict("A resource with the same uploaded file already exists")
|
245
|
-
except IngestNotAvailable:
|
246
|
-
raise HTTPServiceUnavailable("Upload not available right now, try again")
|
235
|
+
path, rid, field = await validate_field_upload(kbid, path_rid, field_id, metadata.get("md5"))
|
247
236
|
|
248
237
|
if implies_resource_creation:
|
249
238
|
# When uploading a file to a new kb resource, we want to allow multiple
|
@@ -269,8 +258,15 @@ async def _tus_post(
|
|
269
258
|
request_content_type = None
|
270
259
|
if item is None:
|
271
260
|
request_content_type = request.headers.get("content-type")
|
272
|
-
if
|
273
|
-
request_content_type =
|
261
|
+
if request_content_type is None:
|
262
|
+
request_content_type = content_types.guess(metadata["filename"]) or "application/octet-stream"
|
263
|
+
|
264
|
+
if request_content_type is not None and not content_types.valid(request_content_type):
|
265
|
+
raise HTTPException(
|
266
|
+
status_code=415,
|
267
|
+
detail=f"Unsupported content type: {request_content_type}",
|
268
|
+
)
|
269
|
+
|
274
270
|
metadata.setdefault("content_type", request_content_type)
|
275
271
|
|
276
272
|
metadata["implies_resource_creation"] = implies_resource_creation
|
@@ -300,9 +296,7 @@ async def _tus_post(
|
|
300
296
|
await dm.save()
|
301
297
|
|
302
298
|
# Find the URL for upload, with the same parameter as this call
|
303
|
-
location = api.url_path_for(
|
304
|
-
"Upload information", upload_id=upload_id, **request.path_params
|
305
|
-
)
|
299
|
+
location = api.url_path_for("Upload information", upload_id=upload_id, **request.path_params)
|
306
300
|
return Response(
|
307
301
|
status_code=201,
|
308
302
|
headers={
|
@@ -319,6 +313,7 @@ async def _tus_post(
|
|
319
313
|
status_code=200,
|
320
314
|
openapi_extra={"x-operation-order": 3},
|
321
315
|
name="Upload information",
|
316
|
+
summary="Upload information",
|
322
317
|
)
|
323
318
|
@requires_one([NucliaDBRoles.WRITER])
|
324
319
|
@version(1)
|
@@ -326,7 +321,7 @@ async def tus_head_rslug_prefix(
|
|
326
321
|
request: Request,
|
327
322
|
kbid: str,
|
328
323
|
rslug: str,
|
329
|
-
field:
|
324
|
+
field: FieldIdString,
|
330
325
|
upload_id: str,
|
331
326
|
) -> Response:
|
332
327
|
return await _tus_head(upload_id)
|
@@ -338,6 +333,7 @@ async def tus_head_rslug_prefix(
|
|
338
333
|
status_code=200,
|
339
334
|
openapi_extra={"x-operation-order": 3},
|
340
335
|
name="Upload information",
|
336
|
+
summary="Upload information",
|
341
337
|
)
|
342
338
|
@requires_one([NucliaDBRoles.WRITER])
|
343
339
|
@version(1)
|
@@ -345,7 +341,7 @@ async def tus_head_rid_prefix(
|
|
345
341
|
request: Request,
|
346
342
|
kbid: str,
|
347
343
|
path_rid: str,
|
348
|
-
field:
|
344
|
+
field: FieldIdString,
|
349
345
|
upload_id: str,
|
350
346
|
) -> Response:
|
351
347
|
return await _tus_head(upload_id)
|
@@ -357,6 +353,7 @@ async def tus_head_rid_prefix(
|
|
357
353
|
status_code=200,
|
358
354
|
openapi_extra={"x-operation-order": 3},
|
359
355
|
name="Upload information",
|
356
|
+
summary="Upload information",
|
360
357
|
)
|
361
358
|
@requires_one([NucliaDBRoles.WRITER])
|
362
359
|
@version(1)
|
@@ -393,7 +390,7 @@ async def _tus_head(
|
|
393
390
|
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}",
|
394
391
|
tags=["Resource field TUS uploads"],
|
395
392
|
status_code=200,
|
396
|
-
|
393
|
+
summary="Upload data on a Resource (by slug)",
|
397
394
|
openapi_extra={"x-operation-order": 2},
|
398
395
|
)
|
399
396
|
@requires_one([NucliaDBRoles.WRITER])
|
@@ -402,20 +399,18 @@ async def tus_patch_rslug_prefix(
|
|
402
399
|
request: Request,
|
403
400
|
kbid: str,
|
404
401
|
rslug: str,
|
405
|
-
field:
|
402
|
+
field: FieldIdString,
|
406
403
|
upload_id: str,
|
407
|
-
x_synchronous: bool = Header(False), # type: ignore
|
408
404
|
) -> Response:
|
409
|
-
|
410
|
-
|
411
|
-
)
|
405
|
+
rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
|
406
|
+
return await tus_patch(request, kbid, upload_id, rid=rid, field=field)
|
412
407
|
|
413
408
|
|
414
409
|
@api.patch(
|
415
410
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}",
|
416
411
|
tags=["Resource field TUS uploads"],
|
417
412
|
status_code=200,
|
418
|
-
|
413
|
+
summary="Upload data on a Resource (by id)",
|
419
414
|
openapi_extra={"x-operation-order": 2},
|
420
415
|
)
|
421
416
|
@requires_one([NucliaDBRoles.WRITER])
|
@@ -424,20 +419,17 @@ async def tus_patch_rid_prefix(
|
|
424
419
|
request: Request,
|
425
420
|
kbid: str,
|
426
421
|
rid: str,
|
427
|
-
field:
|
422
|
+
field: FieldIdString,
|
428
423
|
upload_id: str,
|
429
|
-
x_synchronous: bool = Header(False), # type: ignore
|
430
424
|
) -> Response:
|
431
|
-
return await tus_patch(
|
432
|
-
request, kbid, upload_id, rid=rid, field=field, x_synchronous=x_synchronous
|
433
|
-
)
|
425
|
+
return await tus_patch(request, kbid, upload_id, rid=rid, field=field)
|
434
426
|
|
435
427
|
|
436
428
|
@api.patch(
|
437
429
|
f"/{KB_PREFIX}/{{kbid}}/{TUSUPLOAD}/{{upload_id}}",
|
438
430
|
tags=["Knowledge Box TUS uploads"],
|
439
431
|
status_code=200,
|
440
|
-
|
432
|
+
summary="Upload data on a Knowledge Box",
|
441
433
|
openapi_extra={"x-operation-order": 2},
|
442
434
|
)
|
443
435
|
@requires_one([NucliaDBRoles.WRITER])
|
@@ -446,9 +438,8 @@ async def patch(
|
|
446
438
|
request: Request,
|
447
439
|
kbid: str,
|
448
440
|
upload_id: str,
|
449
|
-
x_synchronous: bool = Header(False), # type: ignore
|
450
441
|
) -> Response:
|
451
|
-
return await tus_patch(request, kbid, upload_id
|
442
|
+
return await tus_patch(request, kbid, upload_id)
|
452
443
|
|
453
444
|
|
454
445
|
async def tus_patch(
|
@@ -456,9 +447,7 @@ async def tus_patch(
|
|
456
447
|
kbid: str,
|
457
448
|
upload_id: str,
|
458
449
|
rid: Optional[str] = None,
|
459
|
-
rslug: Optional[str] = None,
|
460
450
|
field: Optional[str] = None,
|
461
|
-
x_synchronous: bool = False,
|
462
451
|
):
|
463
452
|
try:
|
464
453
|
return await _tus_patch(
|
@@ -466,9 +455,7 @@ async def tus_patch(
|
|
466
455
|
kbid,
|
467
456
|
upload_id,
|
468
457
|
rid=rid,
|
469
|
-
rslug=rslug,
|
470
458
|
field=field,
|
471
|
-
x_synchronous=x_synchronous,
|
472
459
|
)
|
473
460
|
except ResumableURINotAvailable:
|
474
461
|
return HTTPClientError(
|
@@ -483,15 +470,13 @@ async def _tus_patch(
|
|
483
470
|
kbid: str,
|
484
471
|
upload_id: str,
|
485
472
|
rid: Optional[str] = None,
|
486
|
-
rslug: Optional[str] = None,
|
487
473
|
field: Optional[str] = None,
|
488
|
-
x_synchronous: bool = False,
|
489
474
|
) -> Response:
|
490
475
|
"""
|
491
|
-
Upload all bytes in the requests and append them in the
|
476
|
+
Upload all bytes in the requests and append them in the specified offset
|
492
477
|
"""
|
493
|
-
if
|
494
|
-
|
478
|
+
if rid is not None:
|
479
|
+
await validate_rid_exists_or_raise_error(kbid, rid)
|
495
480
|
|
496
481
|
dm = get_dm()
|
497
482
|
await dm.load(upload_id)
|
@@ -517,8 +502,7 @@ async def _tus_patch(
|
|
517
502
|
|
518
503
|
if offset != dm.offset:
|
519
504
|
raise HTTPConflict(
|
520
|
-
detail=f"Current upload offset({offset}) does not match "
|
521
|
-
f"object offset {dm.offset}"
|
505
|
+
detail=f"Current upload offset({offset}) does not match " f"object offset {dm.offset}"
|
522
506
|
)
|
523
507
|
|
524
508
|
storage_manager = get_storage_manager()
|
@@ -530,9 +514,7 @@ async def _tus_patch(
|
|
530
514
|
|
531
515
|
if to_upload and read_bytes != to_upload: # pragma: no cover
|
532
516
|
# check length matches if provided
|
533
|
-
raise HTTPPreconditionFailed(
|
534
|
-
detail="Upload size does not match what was provided"
|
535
|
-
)
|
517
|
+
raise HTTPPreconditionFailed(detail="Upload size does not match what was provided")
|
536
518
|
await dm.update(offset=offset + read_bytes)
|
537
519
|
|
538
520
|
headers = {
|
@@ -544,7 +526,6 @@ async def _tus_patch(
|
|
544
526
|
}
|
545
527
|
|
546
528
|
upload_finished = dm.get("size") is not None and dm.offset >= dm.get("size")
|
547
|
-
|
548
529
|
if upload_finished:
|
549
530
|
rid = dm.get("rid", rid)
|
550
531
|
if rid is None:
|
@@ -563,13 +544,19 @@ async def _tus_patch(
|
|
563
544
|
if isinstance(item_payload, str):
|
564
545
|
item_payload = item_payload.encode()
|
565
546
|
creation_payload = pickle.loads(base64.b64decode(item_payload))
|
547
|
+
|
548
|
+
content_type = dm.get("metadata", {}).get("content_type")
|
549
|
+
if content_type is not None and not content_types.valid(content_type):
|
550
|
+
return HTTPClientError(
|
551
|
+
status_code=415,
|
552
|
+
detail=f"Unsupported content type: {content_type}",
|
553
|
+
)
|
554
|
+
|
566
555
|
try:
|
567
556
|
seqid = await store_file_on_nuclia_db(
|
568
557
|
size=dm.get("size"),
|
569
|
-
content_type=
|
570
|
-
override_resource_title=dm.get("metadata", {}).get(
|
571
|
-
"implies_resource_creation", False
|
572
|
-
),
|
558
|
+
content_type=content_type,
|
559
|
+
override_resource_title=dm.get("metadata", {}).get("implies_resource_creation", False),
|
573
560
|
filename=dm.get("metadata", {}).get("filename"),
|
574
561
|
password=dm.get("metadata", {}).get("password"),
|
575
562
|
language=dm.get("metadata", {}).get("language"),
|
@@ -582,34 +569,30 @@ async def _tus_patch(
|
|
582
569
|
request=request,
|
583
570
|
bucket=storage_manager.storage.get_bucket_name(kbid),
|
584
571
|
item=creation_payload,
|
585
|
-
wait_on_commit=x_synchronous,
|
586
572
|
)
|
587
573
|
except LimitsExceededError as exc:
|
588
574
|
raise HTTPException(status_code=exc.status_code, detail=exc.detail)
|
589
575
|
|
590
576
|
headers["NDB-Seq"] = f"{seqid}"
|
591
577
|
else:
|
592
|
-
|
578
|
+
validate_intermediate_tus_chunk(read_bytes, storage_manager)
|
593
579
|
await dm.save()
|
594
580
|
|
595
581
|
return Response(headers=headers)
|
596
582
|
|
597
583
|
|
598
|
-
def
|
599
|
-
|
600
|
-
storage_manager.
|
601
|
-
|
602
|
-
|
603
|
-
raise HTTPPreconditionFailed(
|
604
|
-
detail=f"Intermediate chunks cannot be smaller than {storage_manager.min_upload_size} bytes"
|
605
|
-
)
|
584
|
+
def validate_intermediate_tus_chunk(read_bytes: int, storage_manager: FileStorageManager):
|
585
|
+
try:
|
586
|
+
storage_manager.validate_intermediate_chunk(read_bytes)
|
587
|
+
except ValueError as err:
|
588
|
+
raise HTTPPreconditionFailed(detail=str(err))
|
606
589
|
|
607
590
|
|
608
591
|
@api.post(
|
609
592
|
f"/{KB_PREFIX}/{{kbid}}/{RSLUG_PREFIX}/{{rslug}}/file/{{field}}/{UPLOAD}",
|
610
593
|
status_code=201,
|
611
594
|
tags=["Resource fields"],
|
612
|
-
|
595
|
+
summary="Upload binary file on a Resource (by slug)",
|
613
596
|
description="Upload a file as a field on an existing resource, if the field exists will return a conflict (419)",
|
614
597
|
)
|
615
598
|
@requires_one([NucliaDBRoles.WRITER])
|
@@ -618,23 +601,22 @@ async def upload_rslug_prefix(
|
|
618
601
|
request: StarletteRequest,
|
619
602
|
kbid: str,
|
620
603
|
rslug: str,
|
621
|
-
field:
|
604
|
+
field: FieldIdString,
|
622
605
|
x_filename: Optional[list[str]] = Header(None), # type: ignore
|
623
606
|
x_password: Optional[list[str]] = Header(None), # type: ignore
|
624
607
|
x_language: Optional[list[str]] = Header(None), # type: ignore
|
625
608
|
x_md5: Optional[list[str]] = Header(None), # type: ignore
|
626
|
-
x_synchronous: bool = Header(False), # type: ignore
|
627
609
|
) -> ResourceFileUploaded:
|
610
|
+
rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
|
628
611
|
return await _upload(
|
629
612
|
request,
|
630
613
|
kbid,
|
631
|
-
|
614
|
+
path_rid=rid,
|
632
615
|
field=field,
|
633
616
|
x_filename=x_filename,
|
634
617
|
x_password=x_password,
|
635
618
|
x_language=x_language,
|
636
619
|
x_md5=x_md5,
|
637
|
-
x_synchronous=x_synchronous,
|
638
620
|
)
|
639
621
|
|
640
622
|
|
@@ -642,7 +624,7 @@ async def upload_rslug_prefix(
|
|
642
624
|
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{path_rid}}/file/{{field}}/{UPLOAD}",
|
643
625
|
status_code=201,
|
644
626
|
tags=["Resource fields"],
|
645
|
-
|
627
|
+
summary="Upload binary file on a Resource (by id)",
|
646
628
|
description="Upload a file as a field on an existing resource, if the field exists will return a conflict (419)",
|
647
629
|
)
|
648
630
|
@requires_one([NucliaDBRoles.WRITER])
|
@@ -651,12 +633,11 @@ async def upload_rid_prefix(
|
|
651
633
|
request: StarletteRequest,
|
652
634
|
kbid: str,
|
653
635
|
path_rid: str,
|
654
|
-
field:
|
636
|
+
field: FieldIdString,
|
655
637
|
x_filename: Optional[list[str]] = Header(None), # type: ignore
|
656
638
|
x_password: Optional[list[str]] = Header(None), # type: ignore
|
657
639
|
x_language: Optional[list[str]] = Header(None), # type: ignore
|
658
640
|
x_md5: Optional[list[str]] = Header(None), # type: ignore
|
659
|
-
x_synchronous: bool = Header(False), # type: ignore
|
660
641
|
) -> ResourceFileUploaded:
|
661
642
|
return await _upload(
|
662
643
|
request,
|
@@ -667,7 +648,6 @@ async def upload_rid_prefix(
|
|
667
648
|
x_password=x_password,
|
668
649
|
x_language=x_language,
|
669
650
|
x_md5=x_md5,
|
670
|
-
x_synchronous=x_synchronous,
|
671
651
|
)
|
672
652
|
|
673
653
|
|
@@ -675,7 +655,7 @@ async def upload_rid_prefix(
|
|
675
655
|
f"/{KB_PREFIX}/{{kbid}}/{UPLOAD}",
|
676
656
|
status_code=201,
|
677
657
|
tags=["Knowledge Boxes"],
|
678
|
-
|
658
|
+
summary="Upload binary file on a Knowledge Box",
|
679
659
|
description="Upload a file onto a Knowledge Box, field id will be file and rid will be autogenerated. ",
|
680
660
|
)
|
681
661
|
@requires_one([NucliaDBRoles.WRITER])
|
@@ -687,7 +667,6 @@ async def upload(
|
|
687
667
|
x_password: Optional[list[str]] = Header(None), # type: ignore
|
688
668
|
x_language: Optional[list[str]] = Header(None), # type: ignore
|
689
669
|
x_md5: Optional[list[str]] = Header(None), # type: ignore
|
690
|
-
x_synchronous: bool = Header(False), # type: ignore
|
691
670
|
) -> ResourceFileUploaded:
|
692
671
|
return await _upload(
|
693
672
|
request,
|
@@ -696,7 +675,6 @@ async def upload(
|
|
696
675
|
x_password=x_password,
|
697
676
|
x_language=x_language,
|
698
677
|
x_md5=x_md5,
|
699
|
-
x_synchronous=x_synchronous,
|
700
678
|
)
|
701
679
|
|
702
680
|
|
@@ -705,30 +683,19 @@ async def _upload(
|
|
705
683
|
request: StarletteRequest,
|
706
684
|
kbid: str,
|
707
685
|
path_rid: Optional[str] = None,
|
708
|
-
rslug: Optional[str] = None,
|
709
686
|
field: Optional[str] = None,
|
710
687
|
x_filename: Optional[list[str]] = Header(None), # type: ignore
|
711
688
|
x_password: Optional[list[str]] = Header(None), # type: ignore
|
712
689
|
x_language: Optional[list[str]] = Header(None), # type: ignore
|
713
690
|
x_md5: Optional[list[str]] = Header(None), # type: ignore
|
714
|
-
x_synchronous: bool = Header(False), # type: ignore
|
715
691
|
) -> ResourceFileUploaded:
|
716
|
-
if
|
717
|
-
|
692
|
+
if path_rid is not None:
|
693
|
+
await validate_rid_exists_or_raise_error(kbid, path_rid)
|
718
694
|
|
719
695
|
await maybe_back_pressure(request, kbid, resource_uuid=path_rid)
|
720
696
|
|
721
697
|
md5_user = x_md5[0] if x_md5 is not None and len(x_md5) > 0 else None
|
722
|
-
|
723
|
-
path, rid, valid_field = await start_upload_field(
|
724
|
-
kbid, path_rid, field, md5_user
|
725
|
-
)
|
726
|
-
except ResourceNotFound:
|
727
|
-
raise HTTPNotFound("Resource is not found or not yet available")
|
728
|
-
except ConflictError:
|
729
|
-
raise HTTPConflict("A resource with the same uploaded file already exists")
|
730
|
-
except IngestNotAvailable:
|
731
|
-
raise HTTPServiceUnavailable("Upload not available right now, try again")
|
698
|
+
path, rid, valid_field = await validate_field_upload(kbid, path_rid, field, md5_user)
|
732
699
|
dm = get_dm()
|
733
700
|
storage_manager = get_storage_manager()
|
734
701
|
|
@@ -757,8 +724,14 @@ async def _upload(
|
|
757
724
|
# - content-type set by the user in the upload request header takes precedence.
|
758
725
|
# - if not set, we will try to guess it from the filename and default to a generic binary content type otherwise
|
759
726
|
content_type = request.headers.get("content-type")
|
760
|
-
if
|
761
|
-
content_type =
|
727
|
+
if content_type is None:
|
728
|
+
content_type = content_types.guess(filename) or "application/octet-stream"
|
729
|
+
|
730
|
+
if not content_types.valid(content_type):
|
731
|
+
raise HTTPException(
|
732
|
+
status_code=415,
|
733
|
+
detail=f"Unsupported content type: {content_type}",
|
734
|
+
)
|
762
735
|
|
763
736
|
metadata = {"content_type": content_type, "filename": filename}
|
764
737
|
|
@@ -808,7 +781,6 @@ async def _upload(
|
|
808
781
|
path=path,
|
809
782
|
request=request,
|
810
783
|
bucket=storage_manager.storage.get_bucket_name(kbid),
|
811
|
-
wait_on_commit=x_synchronous,
|
812
784
|
)
|
813
785
|
except LimitsExceededError as exc:
|
814
786
|
raise HTTPException(status_code=exc.status_code, detail=exc.detail)
|
@@ -816,43 +788,37 @@ async def _upload(
|
|
816
788
|
return ResourceFileUploaded(seqid=seqid, uuid=rid, field_id=valid_field)
|
817
789
|
|
818
790
|
|
819
|
-
async def
|
791
|
+
async def validate_field_upload(
|
820
792
|
kbid: str,
|
821
793
|
rid: Optional[str] = None,
|
822
794
|
field: Optional[str] = None,
|
823
795
|
md5: Optional[str] = None,
|
824
796
|
):
|
825
|
-
|
826
|
-
pbrequest = ResourceFieldId()
|
827
|
-
pbrequest.kbid = kbid
|
828
|
-
if rid is not None:
|
829
|
-
pbrequest.rid = rid
|
797
|
+
"""Validate field upload and return blob storage path, rid and field id.
|
830
798
|
|
831
|
-
|
832
|
-
|
799
|
+
This function assumes KB exists
|
800
|
+
"""
|
833
801
|
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
802
|
+
if rid is None:
|
803
|
+
# we are going to create a new resource and a field
|
804
|
+
if md5 is not None:
|
805
|
+
exists = await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=md5)
|
806
|
+
if exists:
|
807
|
+
raise HTTPConflict("A resource with the same uploaded file already exists")
|
808
|
+
rid = md5
|
839
809
|
else:
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
if
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
if field is None and md5 is None:
|
853
|
-
field = uuid.uuid4().hex
|
854
|
-
elif field is None:
|
855
|
-
field = md5
|
810
|
+
rid = uuid.uuid4().hex
|
811
|
+
else:
|
812
|
+
# we're adding a field to a resource
|
813
|
+
exists = await datamanagers.atomic.resources.resource_exists(kbid=kbid, rid=rid)
|
814
|
+
if not exists:
|
815
|
+
raise HTTPNotFound("Resource is not found or not yet available")
|
816
|
+
|
817
|
+
if field is None:
|
818
|
+
if md5 is None:
|
819
|
+
field = uuid.uuid4().hex
|
820
|
+
else:
|
821
|
+
field = md5
|
856
822
|
|
857
823
|
path = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, field=field)
|
858
824
|
return path, rid, field
|
@@ -864,7 +830,7 @@ async def store_file_on_nuclia_db(
|
|
864
830
|
path: str,
|
865
831
|
request: Request,
|
866
832
|
bucket: str,
|
867
|
-
source: Source,
|
833
|
+
source: CloudFile.Source.ValueType,
|
868
834
|
rid: str,
|
869
835
|
field: str,
|
870
836
|
content_type: str = "application/octet-stream",
|
@@ -874,12 +840,9 @@ async def store_file_on_nuclia_db(
|
|
874
840
|
language: Optional[str] = None,
|
875
841
|
md5: Optional[str] = None,
|
876
842
|
item: Optional[CreateResourcePayload] = None,
|
877
|
-
wait_on_commit: bool = False,
|
878
843
|
) -> Optional[int]:
|
879
844
|
# File is on NucliaDB Storage at path
|
880
|
-
|
881
845
|
partitioning = get_partitioning()
|
882
|
-
transaction = get_transaction_utility()
|
883
846
|
processing = get_processing()
|
884
847
|
storage = await get_storage(service_name=SERVICE_NAME)
|
885
848
|
|
@@ -901,14 +864,17 @@ async def store_file_on_nuclia_db(
|
|
901
864
|
|
902
865
|
parse_audit(writer.audit, request)
|
903
866
|
|
867
|
+
unique_slug_context_manager = noop_context_manager()
|
904
868
|
if item is not None:
|
905
869
|
if item.slug:
|
870
|
+
unique_slug_context_manager = ensure_slug_uniqueness(kbid, item.slug)
|
906
871
|
writer.slug = item.slug
|
907
872
|
toprocess.slug = item.slug
|
908
873
|
|
909
874
|
toprocess.processing_options = item.processing_options
|
910
875
|
|
911
|
-
|
876
|
+
kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
|
877
|
+
parse_basic_creation(writer, item, toprocess, kb_config)
|
912
878
|
if item.origin is not None:
|
913
879
|
parse_origin(writer.origin, item.origin)
|
914
880
|
if item.extra is not None:
|
@@ -924,52 +890,61 @@ async def store_file_on_nuclia_db(
|
|
924
890
|
uuid=rid,
|
925
891
|
x_skip_store=False,
|
926
892
|
)
|
893
|
+
else:
|
894
|
+
# Use defaults for everything, but don't forget hidden which depends on KB config
|
895
|
+
kb_config = await datamanagers.atomic.kb.get_config(kbid=kbid)
|
896
|
+
if kb_config and kb_config.hidden_resources_hide_on_creation:
|
897
|
+
writer.basic.hidden = True
|
898
|
+
|
899
|
+
async with unique_slug_context_manager:
|
900
|
+
if override_resource_title and filename is not None:
|
901
|
+
set_title(writer, toprocess, filename)
|
902
|
+
|
903
|
+
writer.basic.icon = content_type
|
904
|
+
writer.basic.created.FromDatetime(datetime.now())
|
905
|
+
|
906
|
+
# Update resource with file
|
907
|
+
file_field = FieldFile()
|
908
|
+
file_field.added.FromDatetime(datetime.now())
|
909
|
+
file_field.file.bucket_name = bucket
|
910
|
+
file_field.file.content_type = content_type
|
911
|
+
if filename is not None:
|
912
|
+
file_field.file.filename = filename
|
913
|
+
file_field.file.uri = path
|
914
|
+
file_field.file.source = source
|
915
|
+
|
916
|
+
if md5:
|
917
|
+
file_field.file.md5 = md5
|
918
|
+
if size:
|
919
|
+
file_field.file.size = size
|
920
|
+
if language:
|
921
|
+
file_field.language = language
|
922
|
+
if password:
|
923
|
+
file_field.password = password
|
924
|
+
|
925
|
+
writer.files[field].CopyFrom(file_field)
|
926
|
+
# Do not store passwords on maindb
|
927
|
+
writer.files[field].ClearField("password")
|
928
|
+
|
929
|
+
toprocess.filefield[field] = await processing.convert_internal_filefield_to_str(
|
930
|
+
file_field, storage=storage
|
931
|
+
)
|
927
932
|
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
file_field.file.uri = path
|
942
|
-
file_field.file.source = source
|
943
|
-
|
944
|
-
if md5:
|
945
|
-
file_field.file.md5 = md5
|
946
|
-
if size:
|
947
|
-
file_field.file.size = size
|
948
|
-
if language:
|
949
|
-
file_field.language = language
|
950
|
-
if password:
|
951
|
-
file_field.password = password
|
952
|
-
|
953
|
-
writer.files[field].CopyFrom(file_field)
|
954
|
-
# Do not store passwords on maindb
|
955
|
-
writer.files[field].ClearField("password")
|
956
|
-
|
957
|
-
toprocess.filefield[field] = await processing.convert_internal_filefield_to_str(
|
958
|
-
file_field, storage=storage
|
959
|
-
)
|
960
|
-
|
961
|
-
try:
|
962
|
-
processing_info = await processing.send_to_process(toprocess, partition)
|
963
|
-
except LimitsExceededError as exc:
|
964
|
-
raise HTTPException(status_code=exc.status_code, detail=exc.detail)
|
965
|
-
except SendToProcessError:
|
966
|
-
raise HTTPException(status_code=500, detail="Error while sending to process")
|
967
|
-
|
968
|
-
writer.source = BrokerMessage.MessageSource.WRITER
|
969
|
-
set_processing_info(writer, processing_info)
|
970
|
-
await transaction.commit(writer, partition, wait=wait_on_commit)
|
933
|
+
writer.source = BrokerMessage.MessageSource.WRITER
|
934
|
+
writer.basic.metadata.status = Metadata.Status.PENDING
|
935
|
+
writer.basic.metadata.useful = True
|
936
|
+
await transaction.commit(writer, partition)
|
937
|
+
try:
|
938
|
+
processing_info = await processing.send_to_process(toprocess, partition)
|
939
|
+
except LimitsExceededError as exc:
|
940
|
+
raise HTTPException(status_code=exc.status_code, detail=exc.detail)
|
941
|
+
except SendToProcessError:
|
942
|
+
raise HTTPException(
|
943
|
+
status_code=500,
|
944
|
+
detail="Error while sending to process. Try calling /reprocess",
|
945
|
+
)
|
971
946
|
|
972
|
-
|
947
|
+
return processing_info.seqid
|
973
948
|
|
974
949
|
|
975
950
|
def maybe_b64decode(some_string: str) -> str:
|
@@ -978,9 +953,3 @@ def maybe_b64decode(some_string: str) -> str:
|
|
978
953
|
except ValueError:
|
979
954
|
# not b64encoded
|
980
955
|
return some_string
|
981
|
-
|
982
|
-
|
983
|
-
def guess_content_type(filename: str) -> str:
|
984
|
-
default = "application/octet-stream"
|
985
|
-
guessed, _ = mimetypes.guess_type(filename)
|
986
|
-
return guessed or default
|