nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,63 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from fastapi import Request
|
21
|
-
from fastapi_versioning import version
|
22
|
-
|
23
|
-
from nucliadb.learning_proxy import learning_collector_proxy
|
24
|
-
from nucliadb.reader.api.v1.router import KB_PREFIX, api
|
25
|
-
from nucliadb_models.resource import NucliaDBRoles
|
26
|
-
from nucliadb_utils.authentication import requires
|
27
|
-
|
28
|
-
|
29
|
-
@api.get(
|
30
|
-
path=f"/{KB_PREFIX}/{{kbid}}/feedback/{{month}}",
|
31
|
-
status_code=200,
|
32
|
-
summary="Download feedback of a Knowledge Box",
|
33
|
-
description="Download the feedback of a particular month in a Knowledge Box", # noqa
|
34
|
-
response_model=None,
|
35
|
-
tags=["Knowledge Boxes"],
|
36
|
-
)
|
37
|
-
@requires(NucliaDBRoles.READER)
|
38
|
-
@version(1)
|
39
|
-
async def feedback_download(
|
40
|
-
request: Request,
|
41
|
-
kbid: str,
|
42
|
-
month: str,
|
43
|
-
):
|
44
|
-
return await learning_collector_proxy(
|
45
|
-
request, "GET", f"/collect/feedback/{kbid}/{month}"
|
46
|
-
)
|
47
|
-
|
48
|
-
|
49
|
-
@api.get(
|
50
|
-
path=f"/{KB_PREFIX}/{{kbid}}/feedback",
|
51
|
-
status_code=200,
|
52
|
-
summary="Feedback avalaible months",
|
53
|
-
description="List of months within the last year with feedback data",
|
54
|
-
response_model=None,
|
55
|
-
tags=["Knowledge Boxes"],
|
56
|
-
)
|
57
|
-
@requires(NucliaDBRoles.READER)
|
58
|
-
@version(1)
|
59
|
-
async def feedback_list_months(
|
60
|
-
request: Request,
|
61
|
-
kbid: str,
|
62
|
-
):
|
63
|
-
return await learning_collector_proxy(request, "GET", f"/collect/feedback/{kbid}")
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
@@ -1,31 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
pytest_plugins = [
|
21
|
-
"pytest_docker_fixtures",
|
22
|
-
"nucliadb.tests.fixtures",
|
23
|
-
"nucliadb.tests.tikv",
|
24
|
-
"nucliadb.ingest.tests.fixtures", # should be refactored out
|
25
|
-
"nucliadb.reader.tests.fixtures",
|
26
|
-
"nucliadb_utils.tests.nats",
|
27
|
-
"nucliadb_utils.tests.conftest",
|
28
|
-
"nucliadb_utils.tests.gcs",
|
29
|
-
"nucliadb_utils.tests.s3",
|
30
|
-
"nucliadb_utils.tests.asyncbenchmark",
|
31
|
-
]
|
@@ -1,136 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import uuid
|
21
|
-
from datetime import datetime
|
22
|
-
from enum import Enum
|
23
|
-
from typing import Optional
|
24
|
-
|
25
|
-
import pytest
|
26
|
-
from httpx import AsyncClient
|
27
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
28
|
-
|
29
|
-
from nucliadb.ingest.orm.resource import KB_RESOURCE_SLUG_BASE
|
30
|
-
from nucliadb.reader import API_PREFIX
|
31
|
-
from nucliadb_utils.utilities import clear_global_cache
|
32
|
-
|
33
|
-
|
34
|
-
@pytest.fixture(scope="function")
|
35
|
-
def test_settings_reader(cache, gcs, fake_node, maindb_driver): # type: ignore
|
36
|
-
from nucliadb_utils.settings import (
|
37
|
-
FileBackendConfig,
|
38
|
-
running_settings,
|
39
|
-
storage_settings,
|
40
|
-
)
|
41
|
-
|
42
|
-
running_settings.debug = False
|
43
|
-
print(f"Driver ready at {maindb_driver.url}")
|
44
|
-
|
45
|
-
storage_settings.gcs_endpoint_url = gcs
|
46
|
-
storage_settings.file_backend = FileBackendConfig.GCS
|
47
|
-
storage_settings.gcs_bucket = "test"
|
48
|
-
|
49
|
-
yield
|
50
|
-
|
51
|
-
|
52
|
-
@pytest.fixture(scope="function")
|
53
|
-
async def reader_api(test_settings_reader: None, local_files): # type: ignore
|
54
|
-
from nucliadb.reader.app import create_application
|
55
|
-
|
56
|
-
application = create_application()
|
57
|
-
|
58
|
-
def make_client_fixture(
|
59
|
-
roles: Optional[list[Enum]] = None,
|
60
|
-
user: str = "",
|
61
|
-
version: str = "1",
|
62
|
-
) -> AsyncClient:
|
63
|
-
roles = roles or []
|
64
|
-
client_base_url = "http://test"
|
65
|
-
client_base_url = f"{client_base_url}/{API_PREFIX}/v{version}"
|
66
|
-
|
67
|
-
client = AsyncClient(app=application, base_url=client_base_url) # type: ignore
|
68
|
-
client.headers["X-NUCLIADB-ROLES"] = ";".join([role.value for role in roles])
|
69
|
-
client.headers["X-NUCLIADB-USER"] = user
|
70
|
-
|
71
|
-
return client
|
72
|
-
|
73
|
-
await application.router.startup()
|
74
|
-
yield make_client_fixture
|
75
|
-
await application.router.shutdown()
|
76
|
-
clear_global_cache()
|
77
|
-
|
78
|
-
|
79
|
-
def broker_simple_resource(knowledgebox: str, number: int) -> BrokerMessage:
|
80
|
-
rid = str(uuid.uuid4())
|
81
|
-
message1: BrokerMessage = BrokerMessage(
|
82
|
-
kbid=knowledgebox,
|
83
|
-
uuid=rid,
|
84
|
-
slug=str(number),
|
85
|
-
type=BrokerMessage.AUTOCOMMIT,
|
86
|
-
)
|
87
|
-
|
88
|
-
message1.basic.icon = "text/plain"
|
89
|
-
message1.basic.title = str(number)
|
90
|
-
message1.basic.summary = "Summary of document"
|
91
|
-
message1.basic.thumbnail = "doc"
|
92
|
-
message1.basic.layout = "default"
|
93
|
-
message1.basic.metadata.useful = True
|
94
|
-
message1.basic.metadata.language = "es"
|
95
|
-
message1.basic.created.FromDatetime(datetime.utcnow())
|
96
|
-
message1.basic.modified.FromDatetime(datetime.utcnow())
|
97
|
-
message1.source = BrokerMessage.MessageSource.WRITER
|
98
|
-
|
99
|
-
return message1
|
100
|
-
|
101
|
-
|
102
|
-
@pytest.fixture(scope="function")
|
103
|
-
async def test_resources(processor, knowledgebox_ingest, test_settings_reader):
|
104
|
-
"""
|
105
|
-
Create a set of resources with only basic information to test pagination
|
106
|
-
"""
|
107
|
-
resources = []
|
108
|
-
amount = 10
|
109
|
-
for i in range(1, 10 + 1):
|
110
|
-
message = broker_simple_resource(knowledgebox_ingest, i)
|
111
|
-
await processor.process(message=message, seqid=i)
|
112
|
-
resources.append(message.uuid)
|
113
|
-
# Give processed data some time to reach the node
|
114
|
-
|
115
|
-
from time import time
|
116
|
-
|
117
|
-
from nucliadb.common.maindb.utils import get_driver
|
118
|
-
|
119
|
-
driver = get_driver()
|
120
|
-
|
121
|
-
t0 = time()
|
122
|
-
|
123
|
-
while time() - t0 < 30: # wait max 30 seconds for it
|
124
|
-
txn = await driver.begin()
|
125
|
-
count = 0
|
126
|
-
async for key in txn.keys(
|
127
|
-
match=KB_RESOURCE_SLUG_BASE.format(kbid=knowledgebox_ingest), count=-1
|
128
|
-
):
|
129
|
-
count += 1
|
130
|
-
|
131
|
-
await txn.abort()
|
132
|
-
if count == amount:
|
133
|
-
break
|
134
|
-
print(f"got {count}, retrying")
|
135
|
-
|
136
|
-
yield knowledgebox_ingest, resources
|
@@ -1,75 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from typing import Callable, Optional
|
21
|
-
|
22
|
-
import pytest
|
23
|
-
from httpx import AsyncClient
|
24
|
-
|
25
|
-
from nucliadb.reader.api import DEFAULT_RESOURCE_LIST_PAGE_SIZE
|
26
|
-
from nucliadb.reader.api.v1.router import KB_PREFIX
|
27
|
-
from nucliadb_models.resource import NucliaDBRoles
|
28
|
-
|
29
|
-
# All this scenarios are meant to match a total of 10 resources
|
30
|
-
# coming from test_pagination_resources. Tests uses redis so order
|
31
|
-
# is not guaranteed
|
32
|
-
PAGINATION_TEST_SCENARIOS = [
|
33
|
-
(None, None, 10, True), # Get first (also last) page with default values
|
34
|
-
(0, 20, 10, True), # Get first (also last)page explicitly
|
35
|
-
(1, 20, 0, True), # Get invalid page
|
36
|
-
(0, 5, 5, False), # Get first non-last page
|
37
|
-
(1, 5, 5, True), # Get last full page
|
38
|
-
(1, 6, 4, True), # Get last partial page
|
39
|
-
]
|
40
|
-
|
41
|
-
|
42
|
-
@pytest.mark.asyncio
|
43
|
-
@pytest.mark.parametrize(
|
44
|
-
"page, size, expected_resources_count, expected_is_last_page",
|
45
|
-
PAGINATION_TEST_SCENARIOS,
|
46
|
-
)
|
47
|
-
async def test_list_resources(
|
48
|
-
reader_api: Callable[..., AsyncClient],
|
49
|
-
test_resources: tuple[str, list[str]],
|
50
|
-
page: Optional[int],
|
51
|
-
size: Optional[int],
|
52
|
-
expected_resources_count: int,
|
53
|
-
expected_is_last_page: bool,
|
54
|
-
) -> None:
|
55
|
-
kbid = test_resources[0]
|
56
|
-
|
57
|
-
query_params = {}
|
58
|
-
if page is not None:
|
59
|
-
query_params["page"] = page
|
60
|
-
|
61
|
-
if size is not None:
|
62
|
-
query_params["size"] = size
|
63
|
-
|
64
|
-
async with reader_api(roles=[NucliaDBRoles.READER]) as client:
|
65
|
-
resp = await client.get(f"/{KB_PREFIX}/{kbid}/resources", params=query_params)
|
66
|
-
assert resp.status_code == 200
|
67
|
-
resources = resp.json()["resources"]
|
68
|
-
pagination = resp.json()["pagination"]
|
69
|
-
|
70
|
-
assert len(resources) == expected_resources_count
|
71
|
-
assert pagination["last"] == expected_is_last_page
|
72
|
-
assert pagination["page"] == query_params.get("page", 0)
|
73
|
-
assert pagination["size"] == query_params.get(
|
74
|
-
"size", DEFAULT_RESOURCE_LIST_PAGE_SIZE
|
75
|
-
)
|
@@ -1,273 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import os
|
21
|
-
from typing import Callable
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
from httpx import AsyncClient
|
25
|
-
from nucliadb_protos.resources_pb2 import FieldType
|
26
|
-
|
27
|
-
import nucliadb.ingest.tests.fixtures
|
28
|
-
from nucliadb.ingest.orm.resource import Resource
|
29
|
-
from nucliadb.ingest.tests.fixtures import TEST_CLOUDFILE, THUMBNAIL
|
30
|
-
from nucliadb.reader.api.v1.download import parse_media_range, safe_http_header_encode
|
31
|
-
from nucliadb.reader.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX
|
32
|
-
from nucliadb_models.resource import NucliaDBRoles
|
33
|
-
|
34
|
-
BASE = ("field_id", "field_type")
|
35
|
-
VALUE = ("value",)
|
36
|
-
EXTRACTED = ("extracted",)
|
37
|
-
|
38
|
-
|
39
|
-
@pytest.mark.asyncio
|
40
|
-
async def test_resource_download_extracted_file(
|
41
|
-
reader_api: Callable[..., AsyncClient], test_resource: Resource
|
42
|
-
) -> None:
|
43
|
-
rsc = test_resource
|
44
|
-
kbid = rsc.kb.kbid
|
45
|
-
rid = rsc.uuid
|
46
|
-
field_type = "text"
|
47
|
-
field_id = "text1"
|
48
|
-
download_type = "extracted"
|
49
|
-
download_field = "thumbnail"
|
50
|
-
|
51
|
-
async with reader_api(roles=[NucliaDBRoles.READER]) as client:
|
52
|
-
resp = await client.get(
|
53
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/{field_type}/{field_id}/download/{download_type}/{download_field}", # noqa
|
54
|
-
)
|
55
|
-
assert resp.status_code == 200
|
56
|
-
filename = f"{os.path.dirname(nucliadb.ingest.tests.fixtures.__file__)}{THUMBNAIL.bucket_name}/{THUMBNAIL.uri}"
|
57
|
-
|
58
|
-
open(filename, "rb").read() == resp.content
|
59
|
-
|
60
|
-
|
61
|
-
@pytest.mark.asyncio
|
62
|
-
async def test_resource_download_field_file(
|
63
|
-
reader_api: Callable[..., AsyncClient], test_resource: Resource
|
64
|
-
) -> None:
|
65
|
-
rsc = test_resource
|
66
|
-
kbid = rsc.kb.kbid
|
67
|
-
rid = rsc.uuid
|
68
|
-
field_id = "file1"
|
69
|
-
|
70
|
-
async with reader_api(roles=[NucliaDBRoles.READER]) as client:
|
71
|
-
resp = await client.get(
|
72
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}?show=values",
|
73
|
-
)
|
74
|
-
assert (
|
75
|
-
resp.json()["data"]["files"]["file1"]["value"]["file"]["filename"]
|
76
|
-
== "text.pb"
|
77
|
-
)
|
78
|
-
|
79
|
-
# Check that invalid range is handled
|
80
|
-
resp = await client.get(
|
81
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/download/field",
|
82
|
-
headers={"range": "bytes=invalid-range"},
|
83
|
-
)
|
84
|
-
assert resp.status_code == 416
|
85
|
-
assert resp.json()["detail"]["reason"] == "rangeNotParsable"
|
86
|
-
|
87
|
-
# Check that multipart ranges not implemented is handled
|
88
|
-
resp = await client.get(
|
89
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/download/field",
|
90
|
-
headers={"range": "bytes=0-50, 100-150"},
|
91
|
-
)
|
92
|
-
assert resp.status_code == 416
|
93
|
-
assert resp.json()["detail"]["reason"] == "rangeNotSupported"
|
94
|
-
|
95
|
-
resp = await client.get(
|
96
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/download/field",
|
97
|
-
headers={"range": "bytes=0-"},
|
98
|
-
)
|
99
|
-
assert resp.status_code == 206
|
100
|
-
assert resp.headers["Content-Disposition"]
|
101
|
-
|
102
|
-
filename = f"{os.path.dirname(nucliadb.ingest.tests.fixtures.__file__)}/{TEST_CLOUDFILE.bucket_name}/{TEST_CLOUDFILE.uri}" # noqa
|
103
|
-
|
104
|
-
open(filename, "rb").read() == resp.content
|
105
|
-
|
106
|
-
resp = await client.get(
|
107
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}?show=values",
|
108
|
-
)
|
109
|
-
assert resp.status_code == 200
|
110
|
-
|
111
|
-
assert (
|
112
|
-
resp.json()["data"]["texts"]["text1"]["value"]["md5"]
|
113
|
-
== "74a3187271f1d526b1f6271bfb7df52e"
|
114
|
-
)
|
115
|
-
assert (
|
116
|
-
resp.json()["data"]["files"]["file1"]["value"]["file"]["md5"]
|
117
|
-
== "01cca3f53edb934a445a3112c6caa652"
|
118
|
-
)
|
119
|
-
|
120
|
-
|
121
|
-
@pytest.mark.asyncio
|
122
|
-
async def test_resource_download_field_layout(
|
123
|
-
reader_api: Callable[..., AsyncClient], test_resource: Resource
|
124
|
-
) -> None:
|
125
|
-
rsc = test_resource
|
126
|
-
kbid = rsc.kb.kbid
|
127
|
-
rid = rsc.uuid
|
128
|
-
field_id = "layout1"
|
129
|
-
download_field = "field1"
|
130
|
-
|
131
|
-
async with reader_api(roles=[NucliaDBRoles.READER]) as client:
|
132
|
-
resp = await client.get(
|
133
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/layout/{field_id}/download/field/{download_field}",
|
134
|
-
)
|
135
|
-
assert resp.status_code == 200
|
136
|
-
filename = f"{os.path.dirname(nucliadb.ingest.tests.fixtures.__file__)}/{TEST_CLOUDFILE.bucket_name}/{TEST_CLOUDFILE.uri}" # noqa
|
137
|
-
|
138
|
-
open(filename, "rb").read() == resp.content
|
139
|
-
|
140
|
-
|
141
|
-
@pytest.mark.asyncio
|
142
|
-
async def test_resource_download_field_conversation(
|
143
|
-
reader_api: Callable[..., AsyncClient], test_resource: Resource
|
144
|
-
) -> None:
|
145
|
-
rsc = test_resource
|
146
|
-
kbid = rsc.kb.kbid
|
147
|
-
rid = rsc.uuid
|
148
|
-
field_id = "conv1"
|
149
|
-
|
150
|
-
msg_id, file_id = await _get_message_with_file(test_resource)
|
151
|
-
|
152
|
-
async with reader_api(roles=[NucliaDBRoles.READER]) as client:
|
153
|
-
resp = await client.get(
|
154
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/conversation/{field_id}/download/field/{msg_id}/{file_id}",
|
155
|
-
)
|
156
|
-
assert resp.status_code == 200
|
157
|
-
filename = f"{os.path.dirname(nucliadb.ingest.tests.fixtures.__file__)}/{THUMBNAIL.bucket_name}/{THUMBNAIL.uri}" # noqa
|
158
|
-
assert open(filename, "rb").read() == resp.content
|
159
|
-
|
160
|
-
|
161
|
-
@pytest.mark.parametrize(
|
162
|
-
"endpoint_part,endpoint_params",
|
163
|
-
[
|
164
|
-
[
|
165
|
-
"{field_type}/{field_id}/download/extracted/{download_field}",
|
166
|
-
{"field_type": "text", "field_id": "text1", "download_field": "thumbnail"},
|
167
|
-
], # noqa
|
168
|
-
["file/{field_id}/download/field", {"field_id": "file1"}],
|
169
|
-
[
|
170
|
-
"layout/{field_id}/download/field/{download_field}",
|
171
|
-
{"field_id": "layout1", "download_field": "field1"},
|
172
|
-
],
|
173
|
-
[
|
174
|
-
"conversation/{field_id}/download/field/{message_id}/{file_num}",
|
175
|
-
{"field_id": "conv1"},
|
176
|
-
],
|
177
|
-
],
|
178
|
-
)
|
179
|
-
@pytest.mark.asyncio
|
180
|
-
async def test_download_fields_by_resource_slug(
|
181
|
-
reader_api, test_resource, endpoint_part, endpoint_params
|
182
|
-
):
|
183
|
-
rsc = test_resource
|
184
|
-
kbid = rsc.kb.kbid
|
185
|
-
slug = rsc.basic.slug
|
186
|
-
if endpoint_part.startswith("conversation"):
|
187
|
-
# For conversations, we need to get a message id and a file number
|
188
|
-
msg_id, file_num = await _get_message_with_file(test_resource)
|
189
|
-
endpoint_params["message_id"] = msg_id
|
190
|
-
endpoint_params["file_num"] = file_num
|
191
|
-
|
192
|
-
async with reader_api(roles=[NucliaDBRoles.READER]) as client:
|
193
|
-
resource_path = f"/{KB_PREFIX}/{kbid}/{RSLUG_PREFIX}/{slug}"
|
194
|
-
endpoint = endpoint_part.format(**endpoint_params)
|
195
|
-
resp = await client.get(
|
196
|
-
f"{resource_path}/{endpoint}",
|
197
|
-
)
|
198
|
-
assert resp.status_code == 200
|
199
|
-
|
200
|
-
# Check that 404 is returned when a slug does not exist
|
201
|
-
unexisting_resource_path = f"/{KB_PREFIX}/{kbid}/{RSLUG_PREFIX}/idonotexist"
|
202
|
-
resp = await client.get(
|
203
|
-
f"{unexisting_resource_path}/{endpoint}",
|
204
|
-
)
|
205
|
-
assert resp.status_code == 404
|
206
|
-
assert resp.json()["detail"] == "Resource does not exist"
|
207
|
-
|
208
|
-
|
209
|
-
async def _get_message_with_file(test_resource):
|
210
|
-
conversation_field = await test_resource.get_field("conv1", FieldType.CONVERSATION)
|
211
|
-
conversations = await conversation_field.get_value(page=1)
|
212
|
-
message_with_files = conversations.messages[33]
|
213
|
-
msg_id, file_num = message_with_files.content.attachments[1].uri.split("/")[-2:]
|
214
|
-
return msg_id, file_num
|
215
|
-
|
216
|
-
|
217
|
-
@pytest.mark.parametrize(
|
218
|
-
"range_request,filesize,start,end,range_size,exception",
|
219
|
-
[
|
220
|
-
# No end range specified
|
221
|
-
("bytes=0-", 10, 0, 9, 10, None),
|
222
|
-
# End range == file size
|
223
|
-
(f"bytes=0-10", 10, 0, 9, 10, None),
|
224
|
-
# End range < file size
|
225
|
-
(f"bytes=0-5", 10, 0, 5, 6, None),
|
226
|
-
# End range > file size
|
227
|
-
(f"bytes=0-11", 10, 0, 9, 10, None),
|
228
|
-
# Starting at a middle point until the end
|
229
|
-
(f"bytes=2-", 10, 2, 9, 8, None),
|
230
|
-
# A slice of bytes in the middle of the file
|
231
|
-
(f"bytes=2-8", 10, 2, 8, 7, None),
|
232
|
-
# Invalid range
|
233
|
-
("bytes=something", 10, None, None, None, ValueError),
|
234
|
-
# Multi-part ranges not supported yet
|
235
|
-
("bytes=0-50, 100-150", 10, None, None, None, NotImplementedError),
|
236
|
-
],
|
237
|
-
)
|
238
|
-
def test_parse_media_range(range_request, filesize, start, end, range_size, exception):
|
239
|
-
if not exception:
|
240
|
-
result = parse_media_range(range_request, filesize)
|
241
|
-
assert result == (start, end, range_size)
|
242
|
-
else:
|
243
|
-
with pytest.raises(exception):
|
244
|
-
parse_media_range(range_request, filesize)
|
245
|
-
|
246
|
-
|
247
|
-
@pytest.mark.asyncio
|
248
|
-
async def test_resource_download_field_file_content_disposition(
|
249
|
-
reader_api: Callable[..., AsyncClient], test_resource: Resource
|
250
|
-
) -> None:
|
251
|
-
rsc = test_resource
|
252
|
-
kbid = rsc.kb.kbid
|
253
|
-
rid = rsc.uuid
|
254
|
-
field_id = "file1"
|
255
|
-
download_url = (
|
256
|
-
f"/{KB_PREFIX}/{kbid}/{RESOURCE_PREFIX}/{rid}/file/{field_id}/download/field"
|
257
|
-
)
|
258
|
-
async with reader_api(roles=[NucliaDBRoles.READER]) as client:
|
259
|
-
# Defaults to attachment
|
260
|
-
resp = await client.get(download_url)
|
261
|
-
assert resp.status_code == 200
|
262
|
-
assert resp.headers["Content-Disposition"].startswith("attachment; filename=")
|
263
|
-
|
264
|
-
resp = await client.get(f"{download_url}?inline=true")
|
265
|
-
assert resp.status_code == 200
|
266
|
-
assert resp.headers["Content-Disposition"] == "inline"
|
267
|
-
|
268
|
-
|
269
|
-
@pytest.mark.parametrize("text", ["ÇŞĞIİÖÜ"])
|
270
|
-
def test_safe_http_header_encode(text):
|
271
|
-
safe_text = safe_http_header_encode(text)
|
272
|
-
# This is how startette encodes the headers
|
273
|
-
safe_text.lower().encode("latin-1")
|