nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -1,740 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import asyncio
|
21
|
-
import base64
|
22
|
-
import io
|
23
|
-
import os
|
24
|
-
from typing import Callable
|
25
|
-
|
26
|
-
import pytest
|
27
|
-
from httpx import AsyncClient
|
28
|
-
from nucliadb_protos.resources_pb2 import FieldID, FieldType
|
29
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
30
|
-
|
31
|
-
from nucliadb.common import datamanagers
|
32
|
-
from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX
|
33
|
-
from nucliadb.writer.api.v1.upload import maybe_b64decode
|
34
|
-
from nucliadb.writer.tus import TUSUPLOAD, UPLOAD, get_storage_manager
|
35
|
-
from nucliadb_models.resource import NucliaDBRoles
|
36
|
-
from nucliadb_utils import const
|
37
|
-
from nucliadb_utils.utilities import get_storage, get_transaction_utility
|
38
|
-
|
39
|
-
ASSETS_PATH = os.path.dirname(__file__) + "/assets"
|
40
|
-
|
41
|
-
|
42
|
-
@pytest.mark.asyncio
|
43
|
-
async def test_knowledgebox_file_tus_options(
|
44
|
-
writer_api: Callable[[list[NucliaDBRoles]], AsyncClient], knowledgebox_writer: str
|
45
|
-
):
|
46
|
-
client: AsyncClient
|
47
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
48
|
-
resp = await client.options(
|
49
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/resource/xxx/file/xxx/{TUSUPLOAD}/xxx"
|
50
|
-
)
|
51
|
-
assert resp.status_code == 204
|
52
|
-
assert resp.headers["tus-resumable"] == "1.0.0"
|
53
|
-
assert resp.headers["tus-version"] == "1.0.0"
|
54
|
-
assert resp.headers["tus-extension"] == "creation-defer-length"
|
55
|
-
|
56
|
-
resp = await client.options(
|
57
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/resource/xxx/file/xxx/{TUSUPLOAD}"
|
58
|
-
)
|
59
|
-
assert resp.status_code == 204
|
60
|
-
assert resp.headers["tus-resumable"] == "1.0.0"
|
61
|
-
assert resp.headers["tus-version"] == "1.0.0"
|
62
|
-
assert resp.headers["tus-extension"] == "creation-defer-length"
|
63
|
-
|
64
|
-
resp = await client.options(f"/{KB_PREFIX}/{knowledgebox_writer}/{TUSUPLOAD}")
|
65
|
-
assert resp.status_code == 204
|
66
|
-
assert resp.headers["tus-resumable"] == "1.0.0"
|
67
|
-
assert resp.headers["tus-version"] == "1.0.0"
|
68
|
-
assert resp.headers["tus-extension"] == "creation-defer-length"
|
69
|
-
|
70
|
-
resp = await client.options(
|
71
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/{TUSUPLOAD}/xxx"
|
72
|
-
)
|
73
|
-
assert resp.status_code == 204
|
74
|
-
assert resp.headers["tus-resumable"] == "1.0.0"
|
75
|
-
assert resp.headers["tus-version"] == "1.0.0"
|
76
|
-
assert resp.headers["tus-extension"] == "creation-defer-length"
|
77
|
-
|
78
|
-
|
79
|
-
@pytest.mark.asyncio
|
80
|
-
async def test_knowledgebox_file_tus_upload_root(writer_api, knowledgebox_writer):
|
81
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
82
|
-
language = base64.b64encode(b"ca").decode()
|
83
|
-
filename = base64.b64encode(b"image.jpg").decode()
|
84
|
-
md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
|
85
|
-
resp = await client.post(
|
86
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/{TUSUPLOAD}",
|
87
|
-
headers={
|
88
|
-
"tus-resumable": "1.0.0",
|
89
|
-
"upload-metadata": f"filename {filename},language {language},md5 {md5}",
|
90
|
-
"content-type": "image/jpg",
|
91
|
-
"upload-defer-length": "1",
|
92
|
-
},
|
93
|
-
)
|
94
|
-
assert resp.status_code == 201
|
95
|
-
url = resp.headers["location"]
|
96
|
-
|
97
|
-
offset = 0
|
98
|
-
|
99
|
-
# We upload a file that spans across more than one chunk
|
100
|
-
min_chunk_size = get_storage_manager().min_upload_size
|
101
|
-
raw_bytes = b"x" * min_chunk_size + b"y" * 500
|
102
|
-
io_bytes = io.BytesIO(raw_bytes)
|
103
|
-
data = io_bytes.read(min_chunk_size)
|
104
|
-
while data != b"":
|
105
|
-
resp = await client.head(url)
|
106
|
-
assert resp.headers["Upload-Length"] == f"0"
|
107
|
-
assert resp.headers["Upload-Offset"] == f"{offset}"
|
108
|
-
|
109
|
-
headers = {
|
110
|
-
"upload-offset": f"{offset}",
|
111
|
-
"content-length": f"{len(data)}",
|
112
|
-
}
|
113
|
-
is_last_chunk = len(data) < min_chunk_size
|
114
|
-
if is_last_chunk:
|
115
|
-
headers["upload-length"] = f"{offset + len(data)}"
|
116
|
-
|
117
|
-
resp = await client.patch(
|
118
|
-
url,
|
119
|
-
content=data,
|
120
|
-
headers=headers,
|
121
|
-
)
|
122
|
-
offset += len(data)
|
123
|
-
data = io_bytes.read(min_chunk_size)
|
124
|
-
|
125
|
-
assert resp.headers["Tus-Upload-Finished"] == "1"
|
126
|
-
|
127
|
-
transaction = get_transaction_utility()
|
128
|
-
|
129
|
-
sub = await transaction.js.pull_subscribe(
|
130
|
-
const.Streams.INGEST.subject.format(partition="1"), "auto"
|
131
|
-
)
|
132
|
-
msgs = await sub.fetch(1)
|
133
|
-
|
134
|
-
writer = BrokerMessage()
|
135
|
-
writer.ParseFromString(msgs[0].data)
|
136
|
-
await msgs[0].ack()
|
137
|
-
|
138
|
-
path = resp.headers["ndb-field"]
|
139
|
-
field = path.split("/")[-1]
|
140
|
-
rid = path.split("/")[-3]
|
141
|
-
assert writer.uuid == rid
|
142
|
-
assert writer.basic.icon == "image/jpg"
|
143
|
-
assert writer.basic.title == "image.jpg"
|
144
|
-
assert writer.files[field].language == "ca"
|
145
|
-
assert writer.files[field].file.size == len(raw_bytes)
|
146
|
-
assert writer.files[field].file.filename == "image.jpg"
|
147
|
-
assert writer.files[field].file.md5 == "7af0916dba8b70e29d99e72941923529"
|
148
|
-
|
149
|
-
storage = await get_storage()
|
150
|
-
data = await storage.downloadbytes(
|
151
|
-
bucket=writer.files[field].file.bucket_name,
|
152
|
-
key=writer.files[field].file.uri,
|
153
|
-
)
|
154
|
-
assert len(data.read()) == len(raw_bytes)
|
155
|
-
await asyncio.sleep(1)
|
156
|
-
|
157
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
158
|
-
resp = await client.post(
|
159
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/{TUSUPLOAD}",
|
160
|
-
headers={
|
161
|
-
"tus-resumable": "1.0.0",
|
162
|
-
"upload-metadata": f"filename {filename},language {language},md5 {md5}",
|
163
|
-
"content-type": "image/jpg",
|
164
|
-
"upload-defer-length": "1",
|
165
|
-
},
|
166
|
-
)
|
167
|
-
assert resp.status_code == 409
|
168
|
-
|
169
|
-
|
170
|
-
@pytest.mark.asyncio
|
171
|
-
async def test_knowledgebox_file_upload_root(
|
172
|
-
writer_api: Callable[[list[NucliaDBRoles]], AsyncClient],
|
173
|
-
knowledgebox_writer: str,
|
174
|
-
):
|
175
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
176
|
-
with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
|
177
|
-
resp = await client.post(
|
178
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/{UPLOAD}",
|
179
|
-
content=f.read(),
|
180
|
-
headers={
|
181
|
-
"content-type": "image/jpg",
|
182
|
-
"X-MD5": "7af0916dba8b70e29d99e72941923529",
|
183
|
-
},
|
184
|
-
)
|
185
|
-
assert resp.status_code == 201
|
186
|
-
|
187
|
-
transaction = get_transaction_utility()
|
188
|
-
|
189
|
-
assert transaction.js is not None
|
190
|
-
sub = await transaction.js.pull_subscribe(
|
191
|
-
const.Streams.INGEST.subject.format(partition="1"), "auto"
|
192
|
-
)
|
193
|
-
msgs = await sub.fetch(1)
|
194
|
-
writer = BrokerMessage()
|
195
|
-
writer.ParseFromString(msgs[0].data)
|
196
|
-
await msgs[0].ack()
|
197
|
-
|
198
|
-
body = resp.json()
|
199
|
-
field = body["field_id"]
|
200
|
-
rid = body["uuid"]
|
201
|
-
assert writer.uuid == rid
|
202
|
-
assert writer.basic.icon == "image/jpg"
|
203
|
-
assert writer.files[field].file.size == 30472
|
204
|
-
|
205
|
-
storage = await get_storage()
|
206
|
-
data = await storage.downloadbytes(
|
207
|
-
bucket=writer.files[field].file.bucket_name,
|
208
|
-
key=writer.files[field].file.uri,
|
209
|
-
)
|
210
|
-
assert len(data.read()) == 30472
|
211
|
-
await asyncio.sleep(1)
|
212
|
-
|
213
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
214
|
-
with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
|
215
|
-
resp = await client.post(
|
216
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/{UPLOAD}",
|
217
|
-
content=f.read(),
|
218
|
-
headers={
|
219
|
-
"content-type": "image/jpg",
|
220
|
-
"X-MD5": "7af0916dba8b70e29d99e72941923529",
|
221
|
-
},
|
222
|
-
)
|
223
|
-
assert resp.status_code == 409
|
224
|
-
|
225
|
-
|
226
|
-
@pytest.mark.asyncio
|
227
|
-
async def test_knowledgebox_file_upload_root_headers(
|
228
|
-
writer_api: Callable[[list[NucliaDBRoles]], AsyncClient],
|
229
|
-
knowledgebox_writer: str,
|
230
|
-
):
|
231
|
-
async with writer_api([NucliaDBRoles.WRITER]) as client:
|
232
|
-
filename = base64.b64encode(b"image.jpg").decode()
|
233
|
-
with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
|
234
|
-
resp = await client.post(
|
235
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/{UPLOAD}",
|
236
|
-
content=f.read(),
|
237
|
-
headers={
|
238
|
-
"X-FILENAME": filename,
|
239
|
-
"X-LANGUAGE": "ca",
|
240
|
-
"X-MD5": "7af0916dba8b70e29d99e72941923529",
|
241
|
-
"content-type": "image/jpg",
|
242
|
-
},
|
243
|
-
)
|
244
|
-
assert resp.status_code == 201
|
245
|
-
|
246
|
-
transaction = get_transaction_utility()
|
247
|
-
|
248
|
-
assert transaction.js is not None
|
249
|
-
sub = await transaction.js.pull_subscribe(
|
250
|
-
const.Streams.INGEST.subject.format(partition="1"), "auto"
|
251
|
-
)
|
252
|
-
msgs = await sub.fetch(1)
|
253
|
-
writer = BrokerMessage()
|
254
|
-
writer.ParseFromString(msgs[0].data)
|
255
|
-
await msgs[0].ack()
|
256
|
-
|
257
|
-
body = resp.json()
|
258
|
-
field = body["field_id"]
|
259
|
-
rid = body["uuid"]
|
260
|
-
assert writer.uuid == rid
|
261
|
-
assert writer.basic.icon == "image/jpg"
|
262
|
-
assert writer.basic.title == "image.jpg"
|
263
|
-
assert writer.files[field].language == "ca"
|
264
|
-
assert writer.files[field].file.size == 30472
|
265
|
-
|
266
|
-
storage = await get_storage()
|
267
|
-
data = await storage.downloadbytes(
|
268
|
-
bucket=writer.files[field].file.bucket_name,
|
269
|
-
key=writer.files[field].file.uri,
|
270
|
-
)
|
271
|
-
assert len(data.read()) == 30472
|
272
|
-
|
273
|
-
|
274
|
-
@pytest.mark.asyncio
|
275
|
-
async def test_knowledgebox_file_tus_upload_field(
|
276
|
-
writer_api, knowledgebox_writer, resource
|
277
|
-
):
|
278
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
279
|
-
language = base64.b64encode(b"ca").decode()
|
280
|
-
filename = base64.b64encode(b"image.jpg").decode()
|
281
|
-
md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
|
282
|
-
|
283
|
-
resp = await client.post(
|
284
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/resource/invalidresource/file/field1/{TUSUPLOAD}",
|
285
|
-
headers={
|
286
|
-
"tus-resumable": "1.0.0",
|
287
|
-
"upload-metadata": f"filename {filename},language {language},md5 {md5}",
|
288
|
-
"content-type": "image/jpg",
|
289
|
-
"upload-defer-length": "1",
|
290
|
-
},
|
291
|
-
)
|
292
|
-
assert resp.status_code == 404
|
293
|
-
await asyncio.sleep(1)
|
294
|
-
|
295
|
-
resp = await client.post(
|
296
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/resource/{resource}/file/field1/{TUSUPLOAD}",
|
297
|
-
headers={
|
298
|
-
"tus-resumable": "1.0.0",
|
299
|
-
"upload-metadata": f"filename {filename},language {language},md5 {md5}",
|
300
|
-
"content-type": "image/jpg",
|
301
|
-
"upload-defer-length": "1",
|
302
|
-
},
|
303
|
-
)
|
304
|
-
assert resp.status_code == 201
|
305
|
-
url = resp.headers["location"]
|
306
|
-
|
307
|
-
offset = 0
|
308
|
-
# We upload a file that spans across more than one chunk
|
309
|
-
min_chunk_size = get_storage_manager().min_upload_size
|
310
|
-
raw_bytes = b"x" * min_chunk_size + b"y" * 500
|
311
|
-
io_bytes = io.BytesIO(raw_bytes)
|
312
|
-
data = io_bytes.read(min_chunk_size)
|
313
|
-
while data != b"":
|
314
|
-
resp = await client.head(url)
|
315
|
-
|
316
|
-
assert resp.headers["Upload-Length"] == f"0"
|
317
|
-
assert resp.headers["Upload-Offset"] == f"{offset}"
|
318
|
-
|
319
|
-
headers = {
|
320
|
-
"upload-offset": f"{offset}",
|
321
|
-
"content-length": f"{len(data)}",
|
322
|
-
}
|
323
|
-
is_last_chunk = len(data) < min_chunk_size
|
324
|
-
if is_last_chunk:
|
325
|
-
headers["upload-length"] = f"{offset + len(data)}"
|
326
|
-
|
327
|
-
resp = await client.patch(
|
328
|
-
url,
|
329
|
-
content=data,
|
330
|
-
headers=headers,
|
331
|
-
)
|
332
|
-
assert resp.status_code == 200
|
333
|
-
offset += len(data)
|
334
|
-
data = io_bytes.read(min_chunk_size)
|
335
|
-
|
336
|
-
assert resp.headers["Tus-Upload-Finished"] == "1"
|
337
|
-
|
338
|
-
transaction = get_transaction_utility()
|
339
|
-
|
340
|
-
sub = await transaction.js.pull_subscribe(
|
341
|
-
const.Streams.INGEST.subject.format(partition="1"), "auto"
|
342
|
-
)
|
343
|
-
msgs = await sub.fetch(2)
|
344
|
-
|
345
|
-
writer = BrokerMessage()
|
346
|
-
writer.ParseFromString(msgs[1].data)
|
347
|
-
await msgs[1].ack()
|
348
|
-
|
349
|
-
path = resp.headers["ndb-field"]
|
350
|
-
field = path.split("/")[-1]
|
351
|
-
rid = path.split("/")[-3]
|
352
|
-
assert writer.uuid == rid
|
353
|
-
assert writer.basic.icon == "image/jpg"
|
354
|
-
assert writer.basic.title == ""
|
355
|
-
assert writer.files[field].language == "ca"
|
356
|
-
assert writer.files[field].file.size == len(raw_bytes)
|
357
|
-
assert writer.files[field].file.filename == "image.jpg"
|
358
|
-
assert writer.files[field].file.md5 == "7af0916dba8b70e29d99e72941923529"
|
359
|
-
|
360
|
-
storage = await get_storage()
|
361
|
-
data = await storage.downloadbytes(
|
362
|
-
bucket=writer.files[field].file.bucket_name,
|
363
|
-
key=writer.files[field].file.uri,
|
364
|
-
)
|
365
|
-
assert len(data.read()) == len(raw_bytes)
|
366
|
-
|
367
|
-
|
368
|
-
@pytest.mark.asyncio
|
369
|
-
async def test_knowledgebox_file_upload_field_headers(
|
370
|
-
writer_api, knowledgebox_writer, resource
|
371
|
-
):
|
372
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
373
|
-
filename = "image.jpg"
|
374
|
-
encoded_filename = base64.b64encode(filename.encode()).decode()
|
375
|
-
with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
|
376
|
-
resp = await client.post(
|
377
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/resource/{resource}/file/field1/{UPLOAD}",
|
378
|
-
content=f.read(),
|
379
|
-
headers={
|
380
|
-
"X-FILENAME": encoded_filename,
|
381
|
-
"X-LANGUAGE": "ca",
|
382
|
-
"X-MD5": "7af0916dba8b70e29d99e72941923529",
|
383
|
-
"content-type": "image/jpg",
|
384
|
-
},
|
385
|
-
)
|
386
|
-
assert resp.status_code == 201
|
387
|
-
|
388
|
-
transaction = get_transaction_utility()
|
389
|
-
|
390
|
-
sub = await transaction.js.pull_subscribe(
|
391
|
-
const.Streams.INGEST.subject.format(partition="1"), "auto"
|
392
|
-
)
|
393
|
-
msgs = await sub.fetch(2)
|
394
|
-
writer = BrokerMessage()
|
395
|
-
writer.ParseFromString(msgs[1].data)
|
396
|
-
await msgs[1].ack()
|
397
|
-
|
398
|
-
body = resp.json()
|
399
|
-
field = body["field_id"]
|
400
|
-
rid = body["uuid"]
|
401
|
-
assert writer.uuid == rid
|
402
|
-
assert writer.basic.icon == "image/jpg"
|
403
|
-
assert writer.basic.title == ""
|
404
|
-
assert writer.files[field].language == "ca"
|
405
|
-
assert writer.files[field].file.size == 30472
|
406
|
-
assert writer.files[field].file.filename == filename
|
407
|
-
|
408
|
-
storage = await get_storage()
|
409
|
-
data = await storage.downloadbytes(
|
410
|
-
bucket=writer.files[field].file.bucket_name,
|
411
|
-
key=writer.files[field].file.uri,
|
412
|
-
)
|
413
|
-
assert len(data.read()) == 30472
|
414
|
-
|
415
|
-
|
416
|
-
@pytest.mark.asyncio
|
417
|
-
async def test_knowledgebox_file_upload_field_sync(
|
418
|
-
writer_api, knowledgebox_writer, resource
|
419
|
-
):
|
420
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
421
|
-
filename = "image.jpg"
|
422
|
-
with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
|
423
|
-
resp = await client.post(
|
424
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/resource/{resource}/file/field1/{UPLOAD}",
|
425
|
-
content=f.read(),
|
426
|
-
headers={
|
427
|
-
"X-FILENAME": filename,
|
428
|
-
"X-LANGUAGE": "ca",
|
429
|
-
"X-MD5": "7af0916dba8b70e29d99e72941923529",
|
430
|
-
"content-type": "image/jpg",
|
431
|
-
},
|
432
|
-
)
|
433
|
-
assert resp.status_code == 201
|
434
|
-
|
435
|
-
async with datamanagers.with_transaction(read_only=True) as txn:
|
436
|
-
assert (
|
437
|
-
await datamanagers.resources.has_field(
|
438
|
-
txn,
|
439
|
-
kbid=knowledgebox_writer,
|
440
|
-
rid=resource,
|
441
|
-
field_id=FieldID(field="field1", field_type=FieldType.FILE),
|
442
|
-
)
|
443
|
-
) is True
|
444
|
-
|
445
|
-
|
446
|
-
@pytest.mark.asyncio
|
447
|
-
async def test_file_tus_upload_field_by_slug(writer_api, knowledgebox_writer, resource):
|
448
|
-
kb = knowledgebox_writer
|
449
|
-
rslug = "resource1"
|
450
|
-
|
451
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
452
|
-
language = base64.b64encode(b"ca").decode()
|
453
|
-
filename = base64.b64encode(b"image.jpg").decode()
|
454
|
-
md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
|
455
|
-
headers = {
|
456
|
-
"tus-resumable": "1.0.0",
|
457
|
-
"upload-metadata": f"filename {filename},language {language},md5 {md5}",
|
458
|
-
"content-type": "image/jpg",
|
459
|
-
"upload-defer-length": "1",
|
460
|
-
}
|
461
|
-
|
462
|
-
resp = await client.post(
|
463
|
-
f"/{KB_PREFIX}/{kb}/slug/idonotexist/file/field1/{TUSUPLOAD}",
|
464
|
-
headers=headers,
|
465
|
-
)
|
466
|
-
assert resp.status_code == 404
|
467
|
-
|
468
|
-
resp = await client.post(
|
469
|
-
f"/{KB_PREFIX}/{kb}/slug/{rslug}/file/field1/{TUSUPLOAD}",
|
470
|
-
headers=headers,
|
471
|
-
)
|
472
|
-
assert resp.status_code == 201
|
473
|
-
url = resp.headers["location"]
|
474
|
-
|
475
|
-
# Check that we are using the slug for the whole file upload
|
476
|
-
assert f"{RSLUG_PREFIX}/{rslug}" in url
|
477
|
-
|
478
|
-
offset = 0
|
479
|
-
min_chunk_size = get_storage_manager().min_upload_size
|
480
|
-
raw_bytes = b"x" * min_chunk_size + b"y" * 500
|
481
|
-
io_bytes = io.BytesIO(raw_bytes)
|
482
|
-
data = io_bytes.read(min_chunk_size)
|
483
|
-
while data != b"":
|
484
|
-
resp = await client.head(url)
|
485
|
-
|
486
|
-
assert resp.headers["Upload-Length"] == f"0"
|
487
|
-
assert resp.headers["Upload-Offset"] == f"{offset}"
|
488
|
-
|
489
|
-
headers = {
|
490
|
-
"upload-offset": f"{offset}",
|
491
|
-
"content-length": f"{len(data)}",
|
492
|
-
}
|
493
|
-
is_last_chunk = len(data) < min_chunk_size
|
494
|
-
if is_last_chunk:
|
495
|
-
headers["upload-length"] = f"{offset + len(data)}"
|
496
|
-
|
497
|
-
resp = await client.patch(
|
498
|
-
url,
|
499
|
-
content=data,
|
500
|
-
headers=headers,
|
501
|
-
)
|
502
|
-
assert resp.status_code == 200
|
503
|
-
offset += len(data)
|
504
|
-
data = io_bytes.read(min_chunk_size)
|
505
|
-
|
506
|
-
assert resp.headers["Tus-Upload-Finished"] == "1"
|
507
|
-
|
508
|
-
transaction = get_transaction_utility()
|
509
|
-
|
510
|
-
sub = await transaction.js.pull_subscribe(
|
511
|
-
const.Streams.INGEST.subject.format(partition="1"), "auto"
|
512
|
-
)
|
513
|
-
msgs = await sub.fetch(2)
|
514
|
-
|
515
|
-
writer = BrokerMessage()
|
516
|
-
writer.ParseFromString(msgs[1].data)
|
517
|
-
await msgs[1].ack()
|
518
|
-
|
519
|
-
path = resp.headers["ndb-field"]
|
520
|
-
field = path.split("/")[-1]
|
521
|
-
rid = path.split("/")[-3]
|
522
|
-
assert writer.uuid == rid
|
523
|
-
assert writer.basic.icon == "image/jpg"
|
524
|
-
assert writer.basic.title == ""
|
525
|
-
assert writer.files[field].language == "ca"
|
526
|
-
assert writer.files[field].file.size == len(raw_bytes)
|
527
|
-
assert writer.files[field].file.filename == "image.jpg"
|
528
|
-
assert writer.files[field].file.md5 == "7af0916dba8b70e29d99e72941923529"
|
529
|
-
|
530
|
-
storage = await get_storage()
|
531
|
-
data = await storage.downloadbytes(
|
532
|
-
bucket=writer.files[field].file.bucket_name,
|
533
|
-
key=writer.files[field].file.uri,
|
534
|
-
)
|
535
|
-
assert len(data.read()) == len(raw_bytes)
|
536
|
-
|
537
|
-
|
538
|
-
@pytest.mark.asyncio
|
539
|
-
async def test_file_tus_upload_urls_field_by_resource_id(
|
540
|
-
writer_api, knowledgebox_writer, resource
|
541
|
-
):
|
542
|
-
kb = knowledgebox_writer
|
543
|
-
|
544
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
545
|
-
language = base64.b64encode(b"ca").decode()
|
546
|
-
filename = base64.b64encode(b"image.jpg").decode()
|
547
|
-
md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
|
548
|
-
headers = {
|
549
|
-
"tus-resumable": "1.0.0",
|
550
|
-
"upload-metadata": f"filename {filename},language {language},md5 {md5}",
|
551
|
-
"content-type": "image/jpg",
|
552
|
-
"upload-defer-length": "1",
|
553
|
-
}
|
554
|
-
|
555
|
-
resp = await client.post(
|
556
|
-
f"/{KB_PREFIX}/{kb}/resource/idonotexist/file/field1/{TUSUPLOAD}",
|
557
|
-
headers=headers,
|
558
|
-
)
|
559
|
-
assert resp.status_code == 404
|
560
|
-
|
561
|
-
resp = await client.post(
|
562
|
-
f"/{KB_PREFIX}/{kb}/resource/{resource}/file/field1/{TUSUPLOAD}",
|
563
|
-
headers=headers,
|
564
|
-
)
|
565
|
-
assert resp.status_code == 201
|
566
|
-
url = resp.headers["location"]
|
567
|
-
|
568
|
-
# Check that we are using the resource for the whole file upload
|
569
|
-
assert f"{RESOURCE_PREFIX}/{resource}" in url
|
570
|
-
|
571
|
-
# Make sure the returned URL works
|
572
|
-
resp = await client.head(url)
|
573
|
-
assert resp.status_code == 200
|
574
|
-
|
575
|
-
assert resp.headers["Upload-Length"] == "0"
|
576
|
-
assert resp.headers["Upload-Offset"] == "0"
|
577
|
-
|
578
|
-
|
579
|
-
@pytest.mark.asyncio
|
580
|
-
async def test_multiple_tus_file_upload_tries(
|
581
|
-
writer_api, knowledgebox_writer, resource
|
582
|
-
):
|
583
|
-
kb = knowledgebox_writer
|
584
|
-
rslug = "resource1"
|
585
|
-
|
586
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
587
|
-
headers = {
|
588
|
-
"tus-resumable": "1.0.0",
|
589
|
-
"content-type": "image/jpg",
|
590
|
-
"upload-defer-length": "1",
|
591
|
-
}
|
592
|
-
|
593
|
-
resp = await client.post(
|
594
|
-
f"/{KB_PREFIX}/{kb}/slug/{rslug}/file/field1/{TUSUPLOAD}",
|
595
|
-
headers=headers,
|
596
|
-
)
|
597
|
-
assert resp.status_code == 201
|
598
|
-
url = resp.headers["location"]
|
599
|
-
|
600
|
-
# Check that we are using the slug for the whole file upload
|
601
|
-
assert f"{RSLUG_PREFIX}/{rslug}" in url
|
602
|
-
resp = await client.patch(
|
603
|
-
url,
|
604
|
-
content=b"x" * 10000,
|
605
|
-
headers={
|
606
|
-
"upload-offset": "0",
|
607
|
-
"content-length": "10000",
|
608
|
-
"upload-length": "10000",
|
609
|
-
},
|
610
|
-
)
|
611
|
-
assert resp.status_code == 200
|
612
|
-
|
613
|
-
assert resp.headers["Tus-Upload-Finished"] == "1"
|
614
|
-
|
615
|
-
# next one should work as well
|
616
|
-
resp = await client.post(
|
617
|
-
f"/{KB_PREFIX}/{kb}/slug/{rslug}/file/field1/{TUSUPLOAD}",
|
618
|
-
headers=headers,
|
619
|
-
)
|
620
|
-
assert resp.status_code == 201
|
621
|
-
url = resp.headers["location"]
|
622
|
-
|
623
|
-
# Check that we are using the slug for the whole file upload
|
624
|
-
assert f"{RSLUG_PREFIX}/{rslug}" in url
|
625
|
-
resp = await client.patch(
|
626
|
-
url,
|
627
|
-
content=b"x" * 10000,
|
628
|
-
headers={
|
629
|
-
"upload-offset": "0",
|
630
|
-
"content-length": "10000",
|
631
|
-
"upload-length": "10000",
|
632
|
-
},
|
633
|
-
)
|
634
|
-
assert resp.status_code == 200
|
635
|
-
|
636
|
-
assert resp.headers["Tus-Upload-Finished"] == "1"
|
637
|
-
|
638
|
-
|
639
|
-
@pytest.mark.asyncio
|
640
|
-
async def test_file_upload_by_slug(writer_api, knowledgebox_writer):
|
641
|
-
kb = knowledgebox_writer
|
642
|
-
rslug = "myslug"
|
643
|
-
|
644
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
645
|
-
resp = await client.post(
|
646
|
-
f"/{KB_PREFIX}/{kb}/resources",
|
647
|
-
json={
|
648
|
-
"slug": rslug,
|
649
|
-
},
|
650
|
-
)
|
651
|
-
assert str(resp.status_code).startswith("2")
|
652
|
-
|
653
|
-
filename = "image.jpg"
|
654
|
-
with open(f"{ASSETS_PATH}/image001.jpg", "rb") as f:
|
655
|
-
resp = await client.post(
|
656
|
-
f"/{KB_PREFIX}/{kb}/{RSLUG_PREFIX}/{rslug}/file/file1/{UPLOAD}",
|
657
|
-
content=f.read(),
|
658
|
-
headers={
|
659
|
-
"X-FILENAME": filename,
|
660
|
-
"content-type": "image/jpg",
|
661
|
-
"X-MD5": "7af0916dba8b70e29d99e72941923529",
|
662
|
-
},
|
663
|
-
)
|
664
|
-
assert resp.status_code == 201
|
665
|
-
|
666
|
-
transaction = get_transaction_utility()
|
667
|
-
|
668
|
-
sub = await transaction.js.pull_subscribe(
|
669
|
-
const.Streams.INGEST.subject.format(partition="1"), "auto"
|
670
|
-
)
|
671
|
-
msgs = await sub.fetch(2)
|
672
|
-
|
673
|
-
writer = BrokerMessage()
|
674
|
-
writer.ParseFromString(msgs[-1].data)
|
675
|
-
await msgs[-1].ack()
|
676
|
-
|
677
|
-
body = resp.json()
|
678
|
-
field = body["field_id"]
|
679
|
-
rid = body["uuid"]
|
680
|
-
|
681
|
-
assert writer.uuid == rid
|
682
|
-
assert writer.basic.icon == "image/jpg"
|
683
|
-
assert writer.files[field].file.size == 30472
|
684
|
-
assert writer.files[field].file.filename == filename
|
685
|
-
|
686
|
-
storage = await get_storage()
|
687
|
-
data = await storage.downloadbytes(
|
688
|
-
bucket=writer.files[field].file.bucket_name,
|
689
|
-
key=writer.files[field].file.uri,
|
690
|
-
)
|
691
|
-
assert len(data.read()) == 30472
|
692
|
-
|
693
|
-
|
694
|
-
def test_maybe_b64decode():
|
695
|
-
something = "something"
|
696
|
-
something_encoded = base64.b64encode(something.encode())
|
697
|
-
assert maybe_b64decode(something_encoded) == something
|
698
|
-
assert maybe_b64decode(something) == something
|
699
|
-
|
700
|
-
|
701
|
-
@pytest.mark.asyncio
|
702
|
-
async def test_tus_validates_intermediate_chunks_length(
|
703
|
-
writer_api, knowledgebox_writer
|
704
|
-
):
|
705
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
706
|
-
language = base64.b64encode(b"ca").decode()
|
707
|
-
filename = base64.b64encode(b"image.jpg").decode()
|
708
|
-
md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
|
709
|
-
resp = await client.post(
|
710
|
-
f"/{KB_PREFIX}/{knowledgebox_writer}/{TUSUPLOAD}",
|
711
|
-
headers={
|
712
|
-
"tus-resumable": "1.0.0",
|
713
|
-
"upload-metadata": f"filename {filename},language {language},md5 {md5}",
|
714
|
-
"content-type": "image/jpg",
|
715
|
-
"upload-defer-length": "1",
|
716
|
-
},
|
717
|
-
)
|
718
|
-
assert resp.status_code == 201
|
719
|
-
url = resp.headers["location"]
|
720
|
-
# We upload a chunk that is smaller than the minimum chunk size
|
721
|
-
min_chunk_size = get_storage_manager().min_upload_size
|
722
|
-
raw_bytes = b"x" * min_chunk_size + b"y" * 500
|
723
|
-
io_bytes = io.BytesIO(raw_bytes)
|
724
|
-
chunk = io_bytes.read(min_chunk_size - 10)
|
725
|
-
|
726
|
-
resp = await client.head(url)
|
727
|
-
|
728
|
-
headers = {
|
729
|
-
"upload-offset": f"0",
|
730
|
-
"content-length": f"{len(chunk)}",
|
731
|
-
}
|
732
|
-
resp = await client.patch(
|
733
|
-
url,
|
734
|
-
content=chunk,
|
735
|
-
headers=headers,
|
736
|
-
)
|
737
|
-
assert resp.status_code == 412
|
738
|
-
assert resp.json()["detail"].startswith(
|
739
|
-
"Intermediate chunks cannot be smaller than"
|
740
|
-
)
|