nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,157 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
import asyncio
|
21
|
-
import random
|
22
|
-
from unittest.mock import AsyncMock, MagicMock, patch
|
23
|
-
|
24
|
-
import pytest
|
25
|
-
from nucliadb_protos.utils_pb2 import ExtractedText
|
26
|
-
|
27
|
-
from nucliadb.search.search import paragraphs
|
28
|
-
|
29
|
-
|
30
|
-
@pytest.fixture()
|
31
|
-
def extracted_text():
|
32
|
-
yield ExtractedText(
|
33
|
-
text=b"Hello World!",
|
34
|
-
split_text={"1": b"Hello", "2": b"World!"},
|
35
|
-
)
|
36
|
-
|
37
|
-
|
38
|
-
@pytest.fixture()
|
39
|
-
def storage_field(extracted_text):
|
40
|
-
mock = MagicMock()
|
41
|
-
|
42
|
-
data = extracted_text.SerializeToString()
|
43
|
-
|
44
|
-
async def _read_range(start, end):
|
45
|
-
yield data[start:end]
|
46
|
-
|
47
|
-
mock.read_range = _read_range
|
48
|
-
yield mock
|
49
|
-
|
50
|
-
|
51
|
-
@pytest.fixture()
|
52
|
-
def field(storage_field, extracted_text):
|
53
|
-
mock = MagicMock()
|
54
|
-
mock.get_storage_field.return_value = storage_field
|
55
|
-
mock.get_extracted_text = AsyncMock(return_value=extracted_text)
|
56
|
-
yield mock
|
57
|
-
|
58
|
-
|
59
|
-
async def test_get_paragraph_from_full_text(field, extracted_text: ExtractedText):
|
60
|
-
assert (
|
61
|
-
await paragraphs.get_paragraph_from_full_text(
|
62
|
-
field=field, start=0, end=12, split=None
|
63
|
-
)
|
64
|
-
== extracted_text.text
|
65
|
-
)
|
66
|
-
|
67
|
-
|
68
|
-
async def test_get_paragraph_from_full_text_with_split(
|
69
|
-
field, extracted_text: ExtractedText
|
70
|
-
):
|
71
|
-
assert (
|
72
|
-
await paragraphs.get_paragraph_from_full_text(
|
73
|
-
field=field, start=0, end=6, split="1"
|
74
|
-
)
|
75
|
-
== extracted_text.split_text["1"]
|
76
|
-
)
|
77
|
-
|
78
|
-
|
79
|
-
class TestGetParagraphText:
|
80
|
-
@pytest.fixture()
|
81
|
-
def orm_resource(self, field):
|
82
|
-
mock = AsyncMock()
|
83
|
-
mock.get_field.return_value = field
|
84
|
-
with patch(
|
85
|
-
"nucliadb.search.search.paragraphs.get_resource_from_cache",
|
86
|
-
return_value=mock,
|
87
|
-
):
|
88
|
-
yield mock
|
89
|
-
|
90
|
-
async def test_get_paragraph_text(self, orm_resource):
|
91
|
-
assert (
|
92
|
-
await paragraphs.get_paragraph_text(
|
93
|
-
kbid="kbid",
|
94
|
-
rid="rid",
|
95
|
-
field="/t/text",
|
96
|
-
start=0,
|
97
|
-
end=12,
|
98
|
-
split=None,
|
99
|
-
highlight=True,
|
100
|
-
ematches=None,
|
101
|
-
matches=None,
|
102
|
-
)
|
103
|
-
== "Hello World!"
|
104
|
-
)
|
105
|
-
|
106
|
-
orm_resource.get_field.assert_called_once_with("text", 4, load=False)
|
107
|
-
|
108
|
-
|
109
|
-
async def fake_get_extracted_text_from_gcloud(*args, **kwargs):
|
110
|
-
await asyncio.sleep(random.uniform(0, 1))
|
111
|
-
return ExtractedText(text=b"Hello World!")
|
112
|
-
|
113
|
-
|
114
|
-
async def test_get_field_extracted_text_is_cached(field):
|
115
|
-
field.kbid = "kbid"
|
116
|
-
field.uuid = "rid"
|
117
|
-
field.id = "fid"
|
118
|
-
# Simulate a slow response from GCloud
|
119
|
-
field.get_extracted_text = AsyncMock(
|
120
|
-
side_effect=fake_get_extracted_text_from_gcloud
|
121
|
-
)
|
122
|
-
|
123
|
-
# Run 10 times in parallel to check that the cache is working
|
124
|
-
etcache = paragraphs.ExtractedTextCache()
|
125
|
-
futures = [
|
126
|
-
paragraphs.get_field_extracted_text(field, cache=etcache) for _ in range(10)
|
127
|
-
]
|
128
|
-
await asyncio.gather(*futures)
|
129
|
-
|
130
|
-
field.get_extracted_text.assert_awaited_once()
|
131
|
-
|
132
|
-
|
133
|
-
async def test_get_field_extracted_text_is_not_cached_when_none(field):
|
134
|
-
field.get_extracted_text = AsyncMock(return_value=None)
|
135
|
-
|
136
|
-
await paragraphs.get_field_extracted_text(field)
|
137
|
-
await paragraphs.get_field_extracted_text(field)
|
138
|
-
|
139
|
-
assert field.get_extracted_text.await_count == 2
|
140
|
-
|
141
|
-
|
142
|
-
def test_extracted_text_cache():
|
143
|
-
etcache = paragraphs.ExtractedTextCache()
|
144
|
-
assert etcache.get_value("foo") is None
|
145
|
-
|
146
|
-
assert isinstance(etcache.get_lock("foo"), asyncio.Lock)
|
147
|
-
assert len(etcache.locks) == 1
|
148
|
-
|
149
|
-
etcache.set_value("foo", "bar")
|
150
|
-
assert len(etcache.values) == 1
|
151
|
-
|
152
|
-
assert etcache.get_value("foo") == "bar"
|
153
|
-
|
154
|
-
etcache.clear()
|
155
|
-
|
156
|
-
assert len(etcache.values) == 0
|
157
|
-
assert len(etcache.locks) == 0
|
@@ -1,106 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
|
21
|
-
from unittest.mock import AsyncMock, Mock, patch
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
from fastapi.datastructures import QueryParams
|
25
|
-
from fastapi.responses import JSONResponse, StreamingResponse
|
26
|
-
|
27
|
-
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
28
|
-
from nucliadb.search.search.predict_proxy import PredictProxiedEndpoints, predict_proxy
|
29
|
-
|
30
|
-
MODULE = "nucliadb.search.search.predict_proxy"
|
31
|
-
|
32
|
-
|
33
|
-
@pytest.fixture(scope="function")
|
34
|
-
def exists_kb():
|
35
|
-
with patch(f"{MODULE}.exists_kb", return_value=True) as mock:
|
36
|
-
yield mock
|
37
|
-
|
38
|
-
|
39
|
-
@pytest.fixture(scope="function")
|
40
|
-
def predict_response():
|
41
|
-
async def iter_any():
|
42
|
-
for i in range(3):
|
43
|
-
yield i.to_bytes(i, "big")
|
44
|
-
|
45
|
-
resp = Mock()
|
46
|
-
resp.status = 200
|
47
|
-
resp.headers = {}
|
48
|
-
resp.content = Mock(iter_any=iter_any)
|
49
|
-
resp.json = AsyncMock(return_value={"answer": "foo"})
|
50
|
-
yield resp
|
51
|
-
|
52
|
-
|
53
|
-
@pytest.fixture(scope="function")
|
54
|
-
def predict(predict_response):
|
55
|
-
predict_engine = Mock()
|
56
|
-
predict_engine.get_predict_headers = Mock(return_value={})
|
57
|
-
predict_engine.make_request = AsyncMock(return_value=predict_response)
|
58
|
-
with patch(f"{MODULE}.get_predict", return_value=predict_engine):
|
59
|
-
yield predict_engine
|
60
|
-
|
61
|
-
|
62
|
-
async def test_raises_error_on_non_existing_kb(exists_kb):
|
63
|
-
exists_kb.return_value = False
|
64
|
-
with pytest.raises(KnowledgeBoxNotFound):
|
65
|
-
await predict_proxy(
|
66
|
-
"foo",
|
67
|
-
PredictProxiedEndpoints.CHAT,
|
68
|
-
"GET",
|
69
|
-
QueryParams(),
|
70
|
-
)
|
71
|
-
|
72
|
-
|
73
|
-
async def test_stream_response(exists_kb, predict, predict_response):
|
74
|
-
predict_response.headers["Transfer-Encoding"] = "chunked"
|
75
|
-
predict_response.headers["NUCLIA-LEARNING-ID"] = "foo"
|
76
|
-
|
77
|
-
resp = await predict_proxy(
|
78
|
-
"foo",
|
79
|
-
PredictProxiedEndpoints.CHAT,
|
80
|
-
"GET",
|
81
|
-
QueryParams(),
|
82
|
-
)
|
83
|
-
|
84
|
-
assert isinstance(resp, StreamingResponse)
|
85
|
-
assert resp.status_code == 200
|
86
|
-
assert resp.headers["NUCLIA-LEARNING-ID"] == "foo"
|
87
|
-
assert resp.headers["Access-Control-Expose-Headers"] == "NUCLIA-LEARNING-ID"
|
88
|
-
body = [chunk async for chunk in resp.body_iterator]
|
89
|
-
assert list(map(lambda x: x.to_bytes(x, "big"), range(3))) == body
|
90
|
-
|
91
|
-
|
92
|
-
async def test_json_response(exists_kb, predict, predict_response):
|
93
|
-
predict_response.headers["NUCLIA-LEARNING-ID"] = "foo"
|
94
|
-
|
95
|
-
resp = await predict_proxy(
|
96
|
-
"foo",
|
97
|
-
PredictProxiedEndpoints.CHAT,
|
98
|
-
"GET",
|
99
|
-
QueryParams(),
|
100
|
-
)
|
101
|
-
|
102
|
-
assert isinstance(resp, JSONResponse)
|
103
|
-
assert resp.status_code == 200
|
104
|
-
assert resp.headers["NUCLIA-LEARNING-ID"] == "foo"
|
105
|
-
assert resp.headers["Access-Control-Expose-Headers"] == "NUCLIA-LEARNING-ID"
|
106
|
-
assert resp.body == b'{"answer":"foo"}'
|
@@ -1,153 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
import json
|
21
|
-
import unittest
|
22
|
-
from unittest.mock import AsyncMock, Mock, patch
|
23
|
-
|
24
|
-
import pytest
|
25
|
-
from nucliadb_protos.knowledgebox_pb2 import Synonyms
|
26
|
-
from nucliadb_protos.nodereader_pb2 import SearchRequest
|
27
|
-
from nucliadb_protos.utils_pb2 import RelationNode
|
28
|
-
|
29
|
-
from nucliadb.search.search.exceptions import InvalidQueryError
|
30
|
-
from nucliadb.search.search.query import (
|
31
|
-
QueryParser,
|
32
|
-
check_supported_filters,
|
33
|
-
parse_entities_to_filters,
|
34
|
-
)
|
35
|
-
from nucliadb_models.search import MinScore
|
36
|
-
|
37
|
-
QUERY_MODULE = "nucliadb.search.search.query"
|
38
|
-
|
39
|
-
|
40
|
-
def test_parse_entities_to_filters():
|
41
|
-
detected_entities = [
|
42
|
-
RelationNode(value="John", ntype=RelationNode.NodeType.ENTITY, subtype="person")
|
43
|
-
]
|
44
|
-
|
45
|
-
request = SearchRequest()
|
46
|
-
request.filter.field_labels.append("/e/person/Austin")
|
47
|
-
request.filter.expression = json.dumps({"and": [{"literal": "/e/person/Austin"}]})
|
48
|
-
assert parse_entities_to_filters(request, detected_entities) == ["/e/person/John"]
|
49
|
-
assert request.filter.field_labels == ["/e/person/Austin", "/e/person/John"]
|
50
|
-
assert json.loads(request.filter.expression) == {
|
51
|
-
"and": [
|
52
|
-
{"literal": "/e/person/John"},
|
53
|
-
{"and": [{"literal": "/e/person/Austin"}]},
|
54
|
-
]
|
55
|
-
}
|
56
|
-
|
57
|
-
assert parse_entities_to_filters(request, detected_entities) == []
|
58
|
-
assert request.filter.field_labels == ["/e/person/Austin", "/e/person/John"]
|
59
|
-
|
60
|
-
|
61
|
-
@pytest.fixture()
|
62
|
-
def read_only_txn():
|
63
|
-
txn = unittest.mock.AsyncMock()
|
64
|
-
with unittest.mock.patch(
|
65
|
-
f"{QUERY_MODULE}.get_read_only_transaction", return_value=txn
|
66
|
-
):
|
67
|
-
yield txn
|
68
|
-
|
69
|
-
|
70
|
-
@pytest.fixture()
|
71
|
-
def kbdm(read_only_txn):
|
72
|
-
kbdm = unittest.mock.AsyncMock()
|
73
|
-
with unittest.mock.patch(f"{QUERY_MODULE}.datamanagers.kb", kbdm):
|
74
|
-
yield kbdm
|
75
|
-
|
76
|
-
|
77
|
-
class TestApplySynonymsToRequest:
|
78
|
-
@pytest.fixture
|
79
|
-
def get_synonyms(self):
|
80
|
-
get_kb_synonyms = AsyncMock()
|
81
|
-
synonyms = Synonyms()
|
82
|
-
synonyms.terms["planet"].synonyms.extend(["earth", "globe"])
|
83
|
-
get_kb_synonyms.return_value = synonyms
|
84
|
-
yield get_kb_synonyms
|
85
|
-
|
86
|
-
@pytest.fixture
|
87
|
-
def query_parser(self, get_synonyms):
|
88
|
-
qp = QueryParser(
|
89
|
-
kbid="kbid",
|
90
|
-
features=[],
|
91
|
-
query="query",
|
92
|
-
filters=[],
|
93
|
-
faceted=[],
|
94
|
-
page_number=0,
|
95
|
-
page_size=10,
|
96
|
-
min_score=MinScore(semantic=0.5),
|
97
|
-
with_synonyms=True,
|
98
|
-
)
|
99
|
-
with patch("nucliadb.search.search.query.get_kb_synonyms", get_synonyms):
|
100
|
-
yield qp
|
101
|
-
|
102
|
-
@pytest.mark.asyncio
|
103
|
-
async def test_not_applies_if_empty_body(
|
104
|
-
self, query_parser: QueryParser, get_synonyms
|
105
|
-
):
|
106
|
-
query_parser.query = ""
|
107
|
-
search_request = Mock()
|
108
|
-
await query_parser.parse_synonyms(search_request)
|
109
|
-
|
110
|
-
get_synonyms.assert_not_awaited()
|
111
|
-
search_request.ClearField.assert_not_called()
|
112
|
-
|
113
|
-
@pytest.mark.asyncio
|
114
|
-
async def test_not_applies_if_synonyms_object_not_found(
|
115
|
-
self, query_parser: QueryParser, get_synonyms
|
116
|
-
):
|
117
|
-
query_parser.query = "planet"
|
118
|
-
get_synonyms.return_value = None
|
119
|
-
request = Mock()
|
120
|
-
|
121
|
-
await query_parser.parse_synonyms(Mock())
|
122
|
-
|
123
|
-
request.ClearField.assert_not_called()
|
124
|
-
get_synonyms.assert_awaited_once_with("kbid")
|
125
|
-
|
126
|
-
@pytest.mark.asyncio
|
127
|
-
async def test_not_applies_if_synonyms_not_found_for_query(
|
128
|
-
self, query_parser: QueryParser, get_synonyms
|
129
|
-
):
|
130
|
-
query_parser.query = "foobar"
|
131
|
-
request = Mock()
|
132
|
-
|
133
|
-
await query_parser.parse_synonyms(request)
|
134
|
-
|
135
|
-
request.ClearField.assert_not_called()
|
136
|
-
|
137
|
-
query_parser.query = "planet"
|
138
|
-
await query_parser.parse_synonyms(request)
|
139
|
-
|
140
|
-
request.ClearField.assert_called_once_with("body")
|
141
|
-
assert request.advanced_query == "planet OR earth OR globe"
|
142
|
-
|
143
|
-
|
144
|
-
def test_check_supported_filters():
|
145
|
-
check_supported_filters({"literal": "a"}, ["a"])
|
146
|
-
check_supported_filters({"or": [{"literal": "a"}, {"literal": "b"}]}, [])
|
147
|
-
with pytest.raises(InvalidQueryError):
|
148
|
-
check_supported_filters({"or": [{"literal": "a"}, {"literal": "b"}]}, ["b"])
|
149
|
-
with pytest.raises(InvalidQueryError):
|
150
|
-
check_supported_filters(
|
151
|
-
{"and": [{"literal": "a"}, {"and": [{"literal": "c"}, {"literal": "b"}]}]},
|
152
|
-
["b"],
|
153
|
-
)
|
@@ -1,79 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
import json
|
21
|
-
from unittest.mock import patch
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
|
25
|
-
from nucliadb.common.cluster.index_node import IndexNode
|
26
|
-
from nucliadb.search import app
|
27
|
-
|
28
|
-
pytestmark = pytest.mark.asyncio
|
29
|
-
|
30
|
-
|
31
|
-
async def test_alive():
|
32
|
-
with patch.object(app.manager, "get_index_nodes", return_value=[{"id": "node1"}]):
|
33
|
-
resp = await app.alive(None)
|
34
|
-
assert resp.status_code == 200
|
35
|
-
|
36
|
-
|
37
|
-
async def test_not_alive():
|
38
|
-
with patch.object(app.manager, "get_index_nodes", return_value=[]):
|
39
|
-
resp = await app.alive(None)
|
40
|
-
assert resp.status_code == 503
|
41
|
-
|
42
|
-
|
43
|
-
async def test_ready():
|
44
|
-
with patch.object(app.manager, "get_index_nodes", return_value=[{"id": "node1"}]):
|
45
|
-
resp = await app.ready(None)
|
46
|
-
assert resp.status_code == 200
|
47
|
-
|
48
|
-
|
49
|
-
async def test_not_ready():
|
50
|
-
with patch.object(app.manager, "get_index_nodes", return_value=[]):
|
51
|
-
resp = await app.ready(None)
|
52
|
-
assert resp.status_code == 503
|
53
|
-
|
54
|
-
|
55
|
-
async def test_node_members():
|
56
|
-
nodes = [
|
57
|
-
IndexNode(
|
58
|
-
id="node1", address="node1", shard_count=0, available_disk=100, dummy=True
|
59
|
-
),
|
60
|
-
IndexNode(
|
61
|
-
id="node2",
|
62
|
-
address="node2",
|
63
|
-
shard_count=0,
|
64
|
-
available_disk=50,
|
65
|
-
dummy=True,
|
66
|
-
primary_id="node1",
|
67
|
-
),
|
68
|
-
]
|
69
|
-
with patch.object(app.manager, "get_index_nodes", return_value=nodes):
|
70
|
-
resp = await app.node_members(None)
|
71
|
-
assert resp.status_code == 200
|
72
|
-
members = json.loads(resp.body)
|
73
|
-
sorted(members, key=lambda x: x["id"])
|
74
|
-
assert members[0]["id"] == "node1"
|
75
|
-
assert members[0]["primary_id"] is None
|
76
|
-
assert members[0]["available_disk"] == 100
|
77
|
-
assert members[1]["id"] == "node2"
|
78
|
-
assert members[1]["primary_id"] == "node1"
|
79
|
-
assert members[1]["available_disk"] == 50
|
@@ -1,112 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
|
21
|
-
import random
|
22
|
-
|
23
|
-
from nucliadb_protos.nodereader_pb2 import DocumentScored, ParagraphResult
|
24
|
-
|
25
|
-
from nucliadb.search.search.find_merge import Orderer, merge_paragraphs_vectors
|
26
|
-
from nucliadb_models.search import SCORE_TYPE
|
27
|
-
|
28
|
-
|
29
|
-
def test_orderer():
|
30
|
-
orderer = Orderer()
|
31
|
-
|
32
|
-
items = {}
|
33
|
-
for i in range(30):
|
34
|
-
key = str(i)
|
35
|
-
score = random.random() * 25
|
36
|
-
items[key] = score
|
37
|
-
|
38
|
-
boosted = {4, 10, 28}
|
39
|
-
|
40
|
-
boosted_items = []
|
41
|
-
regular_items = []
|
42
|
-
|
43
|
-
for i, (key, score) in enumerate(items.items()):
|
44
|
-
if i in boosted:
|
45
|
-
boosted_items.append(key)
|
46
|
-
orderer.add_boosted(key)
|
47
|
-
else:
|
48
|
-
regular_items.append(key)
|
49
|
-
orderer.add(key)
|
50
|
-
|
51
|
-
sorted_items = list(orderer.sorted_by_insertion())
|
52
|
-
assert sorted_items == boosted_items + regular_items
|
53
|
-
|
54
|
-
|
55
|
-
def test_orderer_handles_duplicate_insertions():
|
56
|
-
orderer = Orderer()
|
57
|
-
orderer.add_boosted("a")
|
58
|
-
orderer.add_boosted("b")
|
59
|
-
orderer.add_boosted("a")
|
60
|
-
orderer.add_boosted("c")
|
61
|
-
orderer.add("a")
|
62
|
-
assert list(orderer.sorted_by_insertion()) == ["a", "b", "c"]
|
63
|
-
|
64
|
-
|
65
|
-
def test_merge_paragraphs_vectors():
|
66
|
-
paragraphs = []
|
67
|
-
for i in range(5):
|
68
|
-
pr = ParagraphResult()
|
69
|
-
pr.uuid = "foo"
|
70
|
-
pr.score.bm25 = i
|
71
|
-
pr.score.booster = 0
|
72
|
-
pr.paragraph = f"id/text/paragraph/{i}/0-10"
|
73
|
-
pr.start = 0
|
74
|
-
pr.end = 10
|
75
|
-
pr.field = "/a/title"
|
76
|
-
paragraphs.append(pr)
|
77
|
-
|
78
|
-
vectors = []
|
79
|
-
for i in range(5):
|
80
|
-
score = max(5 / float(i + 1), 1)
|
81
|
-
vr = DocumentScored()
|
82
|
-
vr.doc_id.id = f"id/vector/paragraph/{i}/0-2"
|
83
|
-
vr.score = score
|
84
|
-
vr.metadata.position.start = 0
|
85
|
-
vr.metadata.position.start = 2
|
86
|
-
vectors.append(vr)
|
87
|
-
|
88
|
-
paragraphs, next_page = merge_paragraphs_vectors(
|
89
|
-
[paragraphs], [vectors], 20, 0, min_score=1
|
90
|
-
)
|
91
|
-
assert not next_page
|
92
|
-
assert len(paragraphs) == 10
|
93
|
-
|
94
|
-
vector_scores = set()
|
95
|
-
for index, score_type in [
|
96
|
-
(0, SCORE_TYPE.BM25),
|
97
|
-
(1, SCORE_TYPE.VECTOR),
|
98
|
-
(2, SCORE_TYPE.BM25),
|
99
|
-
(3, SCORE_TYPE.BM25),
|
100
|
-
(4, SCORE_TYPE.VECTOR),
|
101
|
-
(5, SCORE_TYPE.BM25),
|
102
|
-
(6, SCORE_TYPE.BM25),
|
103
|
-
(7, SCORE_TYPE.VECTOR),
|
104
|
-
(8, SCORE_TYPE.VECTOR),
|
105
|
-
(9, SCORE_TYPE.VECTOR),
|
106
|
-
]:
|
107
|
-
assert paragraphs[index].paragraph.score_type == score_type
|
108
|
-
if score_type == SCORE_TYPE.VECTOR:
|
109
|
-
vector_scores.add(paragraphs[index].paragraph.score)
|
110
|
-
|
111
|
-
# Check that the vector scores are different
|
112
|
-
assert len(vector_scores) == 5
|
@@ -1,34 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from unittest.mock import patch
|
21
|
-
|
22
|
-
from nucliadb.search.search.merge import ResourceSearchResults, merge_paragraphs_results
|
23
|
-
|
24
|
-
|
25
|
-
async def test_str_model():
|
26
|
-
# make sure __str__ works as advertised
|
27
|
-
res = await merge_paragraphs_results([], 1, 1, "kbid", [], [], [], False, 1)
|
28
|
-
assert str(res) == res.json()
|
29
|
-
|
30
|
-
|
31
|
-
async def test_str_model_fallback():
|
32
|
-
with patch.object(ResourceSearchResults, "json", side_effect=Exception("ERROR")):
|
33
|
-
res = await merge_paragraphs_results([], 1, 1, "kbid", [], [], [], False, 1)
|
34
|
-
assert "sentences=None" in str(res)
|