nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -1,45 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import asyncio
|
21
|
-
from unittest.mock import AsyncMock, Mock
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
from nucliadb_protos.nodereader_pb2 import SearchRequest
|
25
|
-
|
26
|
-
from nucliadb.search.search.shards import node_observer, query_shard
|
27
|
-
|
28
|
-
|
29
|
-
async def test_node_observer_records_timeout_errors():
|
30
|
-
node = Mock(id="node-1")
|
31
|
-
# When waiting for a task to finish with asyncio, if it times out asyncio will
|
32
|
-
# cancell the task throwing a CancelledError on that task
|
33
|
-
node.reader.Search = AsyncMock(side_effect=asyncio.CancelledError)
|
34
|
-
query = SearchRequest(body="foo")
|
35
|
-
|
36
|
-
node_observer.counter.clear()
|
37
|
-
|
38
|
-
with pytest.raises(asyncio.CancelledError):
|
39
|
-
await query_shard(node, "shard", query)
|
40
|
-
|
41
|
-
sample = node_observer.counter.collect()[0].samples[0]
|
42
|
-
assert sample.name == "node_client_count_total"
|
43
|
-
assert sample.labels["type"] == "search"
|
44
|
-
assert sample.labels["node_id"] == "node-1"
|
45
|
-
assert sample.labels["status"] == "timeout"
|
@@ -1,82 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import pytest
|
21
|
-
|
22
|
-
from nucliadb.search.search.utils import (
|
23
|
-
has_user_vectors,
|
24
|
-
is_empty_query,
|
25
|
-
is_exact_match_only_query,
|
26
|
-
should_disable_vector_search,
|
27
|
-
)
|
28
|
-
from nucliadb_models.search import SearchRequest
|
29
|
-
|
30
|
-
|
31
|
-
@pytest.mark.parametrize(
|
32
|
-
"item,empty",
|
33
|
-
[
|
34
|
-
(SearchRequest(query=""), True),
|
35
|
-
(SearchRequest(query="foo"), False),
|
36
|
-
],
|
37
|
-
)
|
38
|
-
def test_is_empty_query(item, empty):
|
39
|
-
assert is_empty_query(item) is empty
|
40
|
-
|
41
|
-
|
42
|
-
@pytest.mark.parametrize(
|
43
|
-
"query,exact_match",
|
44
|
-
[
|
45
|
-
("some", False),
|
46
|
-
("some query terms", False),
|
47
|
-
('"something"', True),
|
48
|
-
(' "something"', True),
|
49
|
-
('"something" ', True),
|
50
|
-
('"something exact"', True),
|
51
|
-
('"something exact" and something else', False),
|
52
|
-
],
|
53
|
-
)
|
54
|
-
def test_is_exact_match_only_query(query, exact_match):
|
55
|
-
item = SearchRequest(query=query)
|
56
|
-
assert is_exact_match_only_query(item) is exact_match
|
57
|
-
|
58
|
-
|
59
|
-
@pytest.mark.parametrize(
|
60
|
-
"item,has_vectors",
|
61
|
-
[
|
62
|
-
(SearchRequest(query=""), False),
|
63
|
-
(SearchRequest(vector=[]), False),
|
64
|
-
(SearchRequest(vector=[1.0]), True),
|
65
|
-
],
|
66
|
-
)
|
67
|
-
def test_has_user_vectors(item, has_vectors):
|
68
|
-
assert has_user_vectors(item) is has_vectors
|
69
|
-
|
70
|
-
|
71
|
-
@pytest.mark.parametrize(
|
72
|
-
"item,disable_vectors",
|
73
|
-
[
|
74
|
-
(SearchRequest(query=""), True),
|
75
|
-
(SearchRequest(query='"exact match"'), True),
|
76
|
-
(SearchRequest(query="foo"), False),
|
77
|
-
(SearchRequest(query="", vector=[1.0, 2.0]), False),
|
78
|
-
(SearchRequest(query='"exact match"', vector=[1.0, 2.0]), False),
|
79
|
-
],
|
80
|
-
)
|
81
|
-
def test_should_disable_vectors(item, disable_vectors):
|
82
|
-
assert should_disable_vector_search(item) is disable_vectors
|
@@ -1,270 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
from unittest import mock
|
20
|
-
from unittest.mock import AsyncMock, patch
|
21
|
-
|
22
|
-
import pytest
|
23
|
-
|
24
|
-
from nucliadb.ingest.orm.resource import KB_REVERSE
|
25
|
-
from nucliadb.search.search.chat import prompt as chat_prompt
|
26
|
-
from nucliadb_models.search import (
|
27
|
-
SCORE_TYPE,
|
28
|
-
FindField,
|
29
|
-
FindParagraph,
|
30
|
-
FindResource,
|
31
|
-
KnowledgeboxFindResults,
|
32
|
-
MinScore,
|
33
|
-
)
|
34
|
-
from nucliadb_protos import resources_pb2
|
35
|
-
|
36
|
-
|
37
|
-
@pytest.fixture()
|
38
|
-
def messages():
|
39
|
-
msgs = [
|
40
|
-
resources_pb2.Message(
|
41
|
-
ident="1", content=resources_pb2.MessageContent(text="Message 1")
|
42
|
-
),
|
43
|
-
resources_pb2.Message(
|
44
|
-
ident="2", content=resources_pb2.MessageContent(text="Message 2")
|
45
|
-
),
|
46
|
-
resources_pb2.Message(
|
47
|
-
ident="3",
|
48
|
-
who="1",
|
49
|
-
content=resources_pb2.MessageContent(text="Message 3"),
|
50
|
-
type=resources_pb2.Message.MessageType.QUESTION,
|
51
|
-
),
|
52
|
-
resources_pb2.Message(
|
53
|
-
ident="4",
|
54
|
-
content=resources_pb2.MessageContent(text="Message 4"),
|
55
|
-
type=resources_pb2.Message.MessageType.ANSWER,
|
56
|
-
to=["1"],
|
57
|
-
),
|
58
|
-
resources_pb2.Message(
|
59
|
-
ident="5", content=resources_pb2.MessageContent(text="Message 5")
|
60
|
-
),
|
61
|
-
]
|
62
|
-
yield msgs
|
63
|
-
|
64
|
-
|
65
|
-
@pytest.fixture()
|
66
|
-
def field_obj(messages):
|
67
|
-
mock = AsyncMock()
|
68
|
-
mock.get_metadata.return_value = resources_pb2.FieldConversation(pages=1, total=5)
|
69
|
-
mock.db_get_value.return_value = resources_pb2.Conversation(messages=messages)
|
70
|
-
|
71
|
-
yield mock
|
72
|
-
|
73
|
-
|
74
|
-
@pytest.fixture()
|
75
|
-
def kb(field_obj):
|
76
|
-
mock = AsyncMock()
|
77
|
-
mock.get.return_value.get_field.return_value = field_obj
|
78
|
-
yield mock
|
79
|
-
|
80
|
-
|
81
|
-
@pytest.mark.asyncio
|
82
|
-
async def test_get_next_conversation_messages(field_obj, messages):
|
83
|
-
assert (
|
84
|
-
len(
|
85
|
-
await chat_prompt.get_next_conversation_messages(
|
86
|
-
field_obj=field_obj, page=1, start_idx=0, num_messages=5
|
87
|
-
)
|
88
|
-
)
|
89
|
-
== 5
|
90
|
-
)
|
91
|
-
assert (
|
92
|
-
len(
|
93
|
-
await chat_prompt.get_next_conversation_messages(
|
94
|
-
field_obj=field_obj, page=1, start_idx=0, num_messages=1
|
95
|
-
)
|
96
|
-
)
|
97
|
-
== 1
|
98
|
-
)
|
99
|
-
|
100
|
-
assert await chat_prompt.get_next_conversation_messages(
|
101
|
-
field_obj=field_obj,
|
102
|
-
page=1,
|
103
|
-
start_idx=0,
|
104
|
-
num_messages=1,
|
105
|
-
message_type=resources_pb2.Message.MessageType.ANSWER,
|
106
|
-
msg_to="1",
|
107
|
-
) == [messages[3]]
|
108
|
-
|
109
|
-
|
110
|
-
@pytest.mark.asyncio
|
111
|
-
async def test_find_conversation_message(field_obj, messages):
|
112
|
-
assert await chat_prompt.find_conversation_message(
|
113
|
-
field_obj=field_obj, mident="3"
|
114
|
-
) == (messages[2], 1, 2)
|
115
|
-
|
116
|
-
|
117
|
-
@pytest.mark.asyncio
|
118
|
-
async def test_get_expanded_conversation_messages(kb, messages):
|
119
|
-
assert await chat_prompt.get_expanded_conversation_messages(
|
120
|
-
kb=kb, rid="rid", field_id="field_id", mident="3"
|
121
|
-
) == [messages[3]]
|
122
|
-
|
123
|
-
|
124
|
-
@pytest.mark.asyncio
|
125
|
-
async def test_get_expanded_conversation_messages_question(kb, messages):
|
126
|
-
assert (
|
127
|
-
await chat_prompt.get_expanded_conversation_messages(
|
128
|
-
kb=kb, rid="rid", field_id="field_id", mident="1"
|
129
|
-
)
|
130
|
-
== messages[1:]
|
131
|
-
)
|
132
|
-
|
133
|
-
kb.get.assert_called_with("rid")
|
134
|
-
kb.get.return_value.get_field.assert_called_with(
|
135
|
-
"field_id", KB_REVERSE["c"], load=True
|
136
|
-
)
|
137
|
-
|
138
|
-
|
139
|
-
@pytest.mark.asyncio
|
140
|
-
async def test_get_expanded_conversation_messages_missing(kb, messages):
|
141
|
-
assert (
|
142
|
-
await chat_prompt.get_expanded_conversation_messages(
|
143
|
-
kb=kb, rid="rid", field_id="field_id", mident="missing"
|
144
|
-
)
|
145
|
-
== []
|
146
|
-
)
|
147
|
-
|
148
|
-
|
149
|
-
def _create_find_result(
|
150
|
-
_id: str, result_text: str, score_type: SCORE_TYPE = SCORE_TYPE.BM25, order=1
|
151
|
-
):
|
152
|
-
return FindResource(
|
153
|
-
id=_id.split("/")[0],
|
154
|
-
fields={
|
155
|
-
"c/conv": FindField(
|
156
|
-
paragraphs={
|
157
|
-
_id: FindParagraph(
|
158
|
-
id=_id,
|
159
|
-
score=1.0,
|
160
|
-
score_type=score_type,
|
161
|
-
order=order,
|
162
|
-
text=result_text,
|
163
|
-
)
|
164
|
-
}
|
165
|
-
)
|
166
|
-
},
|
167
|
-
)
|
168
|
-
|
169
|
-
|
170
|
-
@pytest.mark.asyncio
|
171
|
-
async def test_default_prompt_context(kb):
|
172
|
-
result_text = " ".join(["text"] * 10)
|
173
|
-
with (
|
174
|
-
patch("nucliadb.search.search.chat.prompt.get_read_only_transaction"),
|
175
|
-
patch("nucliadb.search.search.chat.prompt.get_storage"),
|
176
|
-
patch("nucliadb.search.search.chat.prompt.KnowledgeBoxORM", return_value=kb),
|
177
|
-
):
|
178
|
-
context = chat_prompt.CappedPromptContext(max_size=int(1e6))
|
179
|
-
find_results = KnowledgeboxFindResults(
|
180
|
-
facets={},
|
181
|
-
resources={
|
182
|
-
"bmid": _create_find_result(
|
183
|
-
"bmid/c/conv/ident", result_text, SCORE_TYPE.BM25, order=1
|
184
|
-
),
|
185
|
-
"vecid": _create_find_result(
|
186
|
-
"vecid/c/conv/ident", result_text, SCORE_TYPE.VECTOR, order=2
|
187
|
-
),
|
188
|
-
"both_id": _create_find_result(
|
189
|
-
"both_id/c/conv/ident", result_text, SCORE_TYPE.BOTH, order=0
|
190
|
-
),
|
191
|
-
},
|
192
|
-
)
|
193
|
-
ordered_paragraphs = chat_prompt.get_ordered_paragraphs(find_results)
|
194
|
-
|
195
|
-
await chat_prompt.default_prompt_context(
|
196
|
-
context,
|
197
|
-
"kbid",
|
198
|
-
ordered_paragraphs,
|
199
|
-
)
|
200
|
-
prompt_result = context.output
|
201
|
-
# Check that the results are sorted by increasing order and that the extra
|
202
|
-
# context is added at the beginning, indicating that it has the most priority
|
203
|
-
paragraph_ids = [pid for pid in prompt_result.keys()]
|
204
|
-
assert paragraph_ids == [
|
205
|
-
"both_id/c/conv/ident",
|
206
|
-
"bmid/c/conv/ident",
|
207
|
-
"vecid/c/conv/ident",
|
208
|
-
]
|
209
|
-
|
210
|
-
|
211
|
-
@pytest.fixture(scope="function")
|
212
|
-
def find_results():
|
213
|
-
return KnowledgeboxFindResults(
|
214
|
-
facets={},
|
215
|
-
resources={
|
216
|
-
"resource1": _create_find_result(
|
217
|
-
"resource1/a/title", "Resource 1", SCORE_TYPE.BOTH, order=1
|
218
|
-
),
|
219
|
-
"resource2": _create_find_result(
|
220
|
-
"resource2/a/title", "Resource 2", SCORE_TYPE.VECTOR, order=2
|
221
|
-
),
|
222
|
-
},
|
223
|
-
min_score=MinScore(semantic=-1),
|
224
|
-
)
|
225
|
-
|
226
|
-
|
227
|
-
@pytest.mark.asyncio
|
228
|
-
async def test_prompt_context_builder_prepends_user_context(
|
229
|
-
find_results: KnowledgeboxFindResults,
|
230
|
-
):
|
231
|
-
builder = chat_prompt.PromptContextBuilder(
|
232
|
-
kbid="kbid", find_results=find_results, user_context=["Carrots are orange"]
|
233
|
-
)
|
234
|
-
|
235
|
-
async def _mock_build_context(context, *args, **kwargs):
|
236
|
-
context["resource1/a/title"] = "Resource 1"
|
237
|
-
context["resource2/a/title"] = "Resource 2"
|
238
|
-
|
239
|
-
with mock.patch.object(builder, "_build_context", new=_mock_build_context):
|
240
|
-
context, context_order, image_context = await builder.build()
|
241
|
-
assert len(context) == 3
|
242
|
-
assert len(context_order) == 3
|
243
|
-
assert len(image_context) == 0
|
244
|
-
assert context["USER_CONTEXT_0"] == "Carrots are orange"
|
245
|
-
assert context["resource1/a/title"] == "Resource 1"
|
246
|
-
assert context["resource2/a/title"] == "Resource 2"
|
247
|
-
assert context_order["USER_CONTEXT_0"] == 0
|
248
|
-
assert context_order["resource1/a/title"] == 1
|
249
|
-
assert context_order["resource2/a/title"] == 2
|
250
|
-
|
251
|
-
|
252
|
-
def test_capped_prompt_context():
|
253
|
-
context = chat_prompt.CappedPromptContext(max_size=2)
|
254
|
-
|
255
|
-
# Check that output is trimmed
|
256
|
-
context["key1"] = "123"
|
257
|
-
|
258
|
-
assert context.output == {"key1": "12"}
|
259
|
-
assert context.size == 2
|
260
|
-
|
261
|
-
# Update existing value
|
262
|
-
context["key1"] = "foobar"
|
263
|
-
assert context.output == {"key1": "fo"}
|
264
|
-
assert context.size == 2
|
265
|
-
|
266
|
-
# Check without limits
|
267
|
-
context = chat_prompt.CappedPromptContext(max_size=None)
|
268
|
-
context["key1"] = "foo" * int(1e6)
|
269
|
-
|
270
|
-
assert context.output == {"key1": "foo" * int(1e6)}
|
@@ -1,108 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
import time
|
21
|
-
|
22
|
-
import pytest
|
23
|
-
from pytest_benchmark.fixture import BenchmarkFixture # type: ignore
|
24
|
-
|
25
|
-
from nucliadb.search.search.paragraphs import highlight_paragraph as highlight
|
26
|
-
|
27
|
-
|
28
|
-
@pytest.mark.benchmark(
|
29
|
-
group="highlight",
|
30
|
-
min_time=0.1,
|
31
|
-
max_time=0.5,
|
32
|
-
min_rounds=5,
|
33
|
-
timer=time.time,
|
34
|
-
disable_gc=True,
|
35
|
-
warmup=False,
|
36
|
-
)
|
37
|
-
def test_highligh_error(benchmark: BenchmarkFixture):
|
38
|
-
text = "bu kimlik belgelerinin geçerlilik sürelerinin standartlara aykırı olmadığını, fotoğraftaki yakın alan iletişim çipindeki bilgilerin tutarlı ve geçerli olmadığını ve İçişleri Bakanlığı'nın ortasında kimlik değişimine erişebilenleri onaylar. sistem" # noqa
|
39
|
-
ematch = ["kimlik", "sistem"]
|
40
|
-
res = benchmark(highlight, text, [], ematch)
|
41
|
-
assert res.count("mark") == 6
|
42
|
-
assert (
|
43
|
-
res
|
44
|
-
== "bu <mark>kimlik</mark> belgelerinin geçerlilik sürelerinin standartlara aykırı olmadığını, fotoğraftaki yakın alan iletişim çipindeki bilgilerin tutarlı ve geçerli olmadığını ve İçişleri Bakanlığı'nın ortasında <mark>kimlik</mark> değişimine erişebilenleri onaylar. <mark>sistem</mark>" # noqa
|
45
|
-
)
|
46
|
-
|
47
|
-
|
48
|
-
def test_highlight():
|
49
|
-
res = highlight(
|
50
|
-
"Query whatever you want my to make it work my query with this",
|
51
|
-
["this", "is", "my", "query"],
|
52
|
-
)
|
53
|
-
assert (
|
54
|
-
res
|
55
|
-
== "<mark>Query</mark> whatever you want <mark>my</mark> to make it work <mark>my</mark> <mark>query</mark> with <mark>this</mark>" # noqa
|
56
|
-
)
|
57
|
-
|
58
|
-
res = highlight(
|
59
|
-
"Query whatever you want to make it work my query with this",
|
60
|
-
["this", "is"],
|
61
|
-
["my query"],
|
62
|
-
)
|
63
|
-
|
64
|
-
assert (
|
65
|
-
res
|
66
|
-
== "Query whatever you want to make it work <mark>my query</mark> with <mark>this</mark>"
|
67
|
-
)
|
68
|
-
|
69
|
-
res = highlight(
|
70
|
-
"Query whatever you redis want to make it work my query with this",
|
71
|
-
["this", "is"],
|
72
|
-
["my query"],
|
73
|
-
)
|
74
|
-
|
75
|
-
assert (
|
76
|
-
res
|
77
|
-
== "Query whatever you redis want to make it work <mark>my query</mark> with <mark>this</mark>"
|
78
|
-
)
|
79
|
-
|
80
|
-
res = highlight(
|
81
|
-
"Plone offers superior security controls, often without cost, of course!",
|
82
|
-
["use", "cases", "of", "plone"],
|
83
|
-
)
|
84
|
-
|
85
|
-
assert (
|
86
|
-
res
|
87
|
-
== "<mark>Plone</mark> offers superior security controls, often without cost, <mark>of</mark> course!"
|
88
|
-
)
|
89
|
-
|
90
|
-
res = highlight(
|
91
|
-
"In contrast, traditional companies often make it impossible",
|
92
|
-
["of", "market"],
|
93
|
-
["of", "market"],
|
94
|
-
)
|
95
|
-
assert res == "In contrast, traditional companies often make it impossible"
|
96
|
-
|
97
|
-
# sc-3067: Unbalanced parenthesis or brackets in query should not make highlight fail
|
98
|
-
res = highlight(
|
99
|
-
"Some sentence here",
|
100
|
-
[
|
101
|
-
"Some).",
|
102
|
-
],
|
103
|
-
[
|
104
|
-
"sent)ence",
|
105
|
-
"(here",
|
106
|
-
],
|
107
|
-
)
|
108
|
-
assert res == "Some sentence here"
|
@@ -1,125 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
from unittest import mock
|
20
|
-
|
21
|
-
import jsonschema # type: ignore
|
22
|
-
import pytest
|
23
|
-
|
24
|
-
from nucliadb.search.search.filters import (
|
25
|
-
INDEX_NODE_FILTERS_SCHEMA,
|
26
|
-
convert_filter_to_node_schema,
|
27
|
-
convert_to_node_filters,
|
28
|
-
iter_filter_labels_expression,
|
29
|
-
translate_label_filters,
|
30
|
-
)
|
31
|
-
from nucliadb_models.search import Filter
|
32
|
-
|
33
|
-
|
34
|
-
@pytest.fixture(scope="function")
|
35
|
-
def is_paragraph_labelset_kind_mock():
|
36
|
-
with mock.patch(
|
37
|
-
"nucliadb.search.search.filters.is_paragraph_labelset_kind"
|
38
|
-
) as mocked:
|
39
|
-
yield mocked
|
40
|
-
|
41
|
-
|
42
|
-
@pytest.mark.parametrize(
|
43
|
-
"original,converted",
|
44
|
-
[
|
45
|
-
("foo", {"literal": "foo"}),
|
46
|
-
(Filter(all=["foo"]), {"literal": "foo"}),
|
47
|
-
(Filter(all=["foo", "bar"]), {"and": [{"literal": "foo"}, {"literal": "bar"}]}),
|
48
|
-
(Filter(any=["foo"]), {"literal": "foo"}),
|
49
|
-
(Filter(any=["foo", "bar"]), {"or": [{"literal": "foo"}, {"literal": "bar"}]}),
|
50
|
-
(Filter(none=["foo"]), {"not": {"literal": "foo"}}),
|
51
|
-
(
|
52
|
-
Filter(none=["foo", "bar"]),
|
53
|
-
{"not": {"or": [{"literal": "foo"}, {"literal": "bar"}]}},
|
54
|
-
),
|
55
|
-
(Filter(not_all=["foo"]), {"not": {"literal": "foo"}}),
|
56
|
-
(
|
57
|
-
Filter(not_all=["foo", "bar"]),
|
58
|
-
{"not": {"and": [{"literal": "foo"}, {"literal": "bar"}]}},
|
59
|
-
),
|
60
|
-
],
|
61
|
-
)
|
62
|
-
def test_convert_filter_to_node_schema(original, converted):
|
63
|
-
assert convert_filter_to_node_schema(original) == converted
|
64
|
-
jsonschema.validate(converted, INDEX_NODE_FILTERS_SCHEMA)
|
65
|
-
|
66
|
-
|
67
|
-
def test_convert_to_node_filters():
|
68
|
-
assert convert_to_node_filters([]) == {}
|
69
|
-
assert convert_to_node_filters(["foo"]) == {"literal": "foo"}
|
70
|
-
assert convert_to_node_filters(["foo", "bar"]) == {
|
71
|
-
"and": [{"literal": "foo"}, {"literal": "bar"}]
|
72
|
-
}
|
73
|
-
assert convert_to_node_filters([Filter(all=["foo"])]) == {"literal": "foo"}
|
74
|
-
assert convert_to_node_filters([Filter(all=["foo"]), Filter(any=["bar"])]) == {
|
75
|
-
"and": [{"literal": "foo"}, {"literal": "bar"}]
|
76
|
-
}
|
77
|
-
|
78
|
-
|
79
|
-
def test_translate_label_filters():
|
80
|
-
literal = {"literal": "/classification.labels/foo/bar"}
|
81
|
-
translated = {"literal": "/l/foo/bar"}
|
82
|
-
|
83
|
-
assert translate_label_filters(literal) == translated
|
84
|
-
assert translate_label_filters({"not": literal}) == {"not": translated}
|
85
|
-
assert translate_label_filters({"and": [literal, literal]}) == {
|
86
|
-
"and": [translated, translated]
|
87
|
-
}
|
88
|
-
assert translate_label_filters({"or": [literal, literal]}) == {
|
89
|
-
"or": [translated, translated]
|
90
|
-
}
|
91
|
-
assert translate_label_filters(
|
92
|
-
{"and": [{"or": [literal, literal]}, {"not": literal}]}
|
93
|
-
) == {
|
94
|
-
"and": [
|
95
|
-
{"or": [translated, translated]},
|
96
|
-
{"not": translated},
|
97
|
-
]
|
98
|
-
}
|
99
|
-
|
100
|
-
|
101
|
-
def test_iter_filter_labels_expression():
|
102
|
-
literal = {"literal": "foo"}
|
103
|
-
assert list(iter_filter_labels_expression(literal)) == ["foo"]
|
104
|
-
assert list(iter_filter_labels_expression({"and": [literal, literal]})) == [
|
105
|
-
"foo",
|
106
|
-
"foo",
|
107
|
-
]
|
108
|
-
assert list(iter_filter_labels_expression({"or": [literal, literal]})) == [
|
109
|
-
"foo",
|
110
|
-
"foo",
|
111
|
-
]
|
112
|
-
assert list(
|
113
|
-
iter_filter_labels_expression({"not": {"and": [literal, literal]}})
|
114
|
-
) == ["foo", "foo"]
|
115
|
-
|
116
|
-
|
117
|
-
def test_filters_model():
|
118
|
-
f = Filter(all=["foo", "bar"], any=None)
|
119
|
-
assert f.all == ["foo", "bar"]
|
120
|
-
assert f.any is None
|
121
|
-
assert f.none is None
|
122
|
-
assert f.not_all is None
|
123
|
-
|
124
|
-
with pytest.raises(ValueError):
|
125
|
-
Filter(all=["foo"], any=["bar"])
|