nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/search/api/v1/chat.py
DELETED
@@ -1,263 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import base64
|
21
|
-
import json
|
22
|
-
from typing import Any, Optional, Union
|
23
|
-
|
24
|
-
import pydantic
|
25
|
-
from fastapi import Body, Header, Request, Response
|
26
|
-
from fastapi.openapi.models import Example
|
27
|
-
from fastapi_versioning import version
|
28
|
-
from starlette.responses import StreamingResponse
|
29
|
-
|
30
|
-
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
31
|
-
from nucliadb.models.responses import HTTPClientError
|
32
|
-
from nucliadb.search import logger, predict
|
33
|
-
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
34
|
-
from nucliadb.search.predict import AnswerStatusCode
|
35
|
-
from nucliadb.search.search.chat.query import (
|
36
|
-
START_OF_CITATIONS,
|
37
|
-
chat,
|
38
|
-
get_relations_results,
|
39
|
-
)
|
40
|
-
from nucliadb.search.search.exceptions import (
|
41
|
-
IncompleteFindResultsError,
|
42
|
-
InvalidQueryError,
|
43
|
-
)
|
44
|
-
from nucliadb_models.resource import NucliaDBRoles
|
45
|
-
from nucliadb_models.search import (
|
46
|
-
ChatOptions,
|
47
|
-
ChatRequest,
|
48
|
-
KnowledgeboxFindResults,
|
49
|
-
NucliaDBClientType,
|
50
|
-
PromptContext,
|
51
|
-
PromptContextOrder,
|
52
|
-
Relations,
|
53
|
-
parse_max_tokens,
|
54
|
-
)
|
55
|
-
from nucliadb_telemetry.errors import capture_exception
|
56
|
-
from nucliadb_utils.authentication import requires
|
57
|
-
from nucliadb_utils.exceptions import LimitsExceededError
|
58
|
-
|
59
|
-
END_OF_STREAM = "_END_"
|
60
|
-
|
61
|
-
|
62
|
-
class SyncChatResponse(pydantic.BaseModel):
|
63
|
-
answer: str
|
64
|
-
relations: Optional[Relations] = None
|
65
|
-
results: KnowledgeboxFindResults
|
66
|
-
status: AnswerStatusCode
|
67
|
-
citations: dict[str, Any] = {}
|
68
|
-
prompt_context: Optional[PromptContext] = None
|
69
|
-
prompt_context_order: Optional[PromptContextOrder] = None
|
70
|
-
|
71
|
-
|
72
|
-
CHAT_EXAMPLES = {
|
73
|
-
"search_and_chat": Example(
|
74
|
-
summary="Ask who won the league final",
|
75
|
-
description="You can ask a question to your knowledge box", # noqa
|
76
|
-
value={
|
77
|
-
"query": "Who won the league final?",
|
78
|
-
},
|
79
|
-
),
|
80
|
-
"search_and_chat_with_custom_prompt": Example(
|
81
|
-
summary="Ask for the gold price evolution in 2023 in a very conscise way",
|
82
|
-
description="You can ask a question and specify a custom prompt to tweak the tone of the response", # noqa
|
83
|
-
value={
|
84
|
-
"query": "How has the price of gold evolved during 2023?",
|
85
|
-
"prompt": "Given this context: {context}. Answer this {question} in a concise way using the provided context", # noqa
|
86
|
-
},
|
87
|
-
),
|
88
|
-
}
|
89
|
-
|
90
|
-
|
91
|
-
@api.post(
|
92
|
-
f"/{KB_PREFIX}/{{kbid}}/chat",
|
93
|
-
status_code=200,
|
94
|
-
summary="Chat on a Knowledge Box",
|
95
|
-
description="Chat on a Knowledge Box",
|
96
|
-
tags=["Search"],
|
97
|
-
response_model=None,
|
98
|
-
)
|
99
|
-
@requires(NucliaDBRoles.READER)
|
100
|
-
@version(1)
|
101
|
-
async def chat_knowledgebox_endpoint(
|
102
|
-
request: Request,
|
103
|
-
kbid: str,
|
104
|
-
item: ChatRequest = Body(openapi_examples=CHAT_EXAMPLES),
|
105
|
-
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
106
|
-
x_nucliadb_user: str = Header(""),
|
107
|
-
x_forwarded_for: str = Header(""),
|
108
|
-
x_synchronous: bool = Header(
|
109
|
-
False,
|
110
|
-
description="When set to true, outputs response as JSON in a non-streaming way. "
|
111
|
-
"This is slower and requires waiting for entire answer to be ready.",
|
112
|
-
),
|
113
|
-
) -> Union[StreamingResponse, HTTPClientError, Response]:
|
114
|
-
try:
|
115
|
-
return await create_chat_response(
|
116
|
-
kbid, item, x_nucliadb_user, x_ndb_client, x_forwarded_for, x_synchronous
|
117
|
-
)
|
118
|
-
except KnowledgeBoxNotFound:
|
119
|
-
return HTTPClientError(
|
120
|
-
status_code=404,
|
121
|
-
detail=f"Knowledge Box '{kbid}' not found.",
|
122
|
-
)
|
123
|
-
except LimitsExceededError as exc:
|
124
|
-
return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
|
125
|
-
except predict.ProxiedPredictAPIError as err:
|
126
|
-
return HTTPClientError(
|
127
|
-
status_code=err.status,
|
128
|
-
detail=err.detail,
|
129
|
-
)
|
130
|
-
except IncompleteFindResultsError:
|
131
|
-
return HTTPClientError(
|
132
|
-
status_code=529,
|
133
|
-
detail="Temporary error on information retrieval. Please try again.",
|
134
|
-
)
|
135
|
-
except predict.RephraseMissingContextError:
|
136
|
-
return HTTPClientError(
|
137
|
-
status_code=412,
|
138
|
-
detail="Unable to rephrase the query with the provided context.",
|
139
|
-
)
|
140
|
-
except predict.RephraseError as err:
|
141
|
-
return HTTPClientError(
|
142
|
-
status_code=529,
|
143
|
-
detail=f"Temporary error while rephrasing the query. Please try again later. Error: {err}",
|
144
|
-
)
|
145
|
-
except InvalidQueryError as exc:
|
146
|
-
return HTTPClientError(status_code=412, detail=str(exc))
|
147
|
-
|
148
|
-
|
149
|
-
async def create_chat_response(
|
150
|
-
kbid: str,
|
151
|
-
chat_request: ChatRequest,
|
152
|
-
user_id: str,
|
153
|
-
client_type: NucliaDBClientType,
|
154
|
-
origin: str,
|
155
|
-
x_synchronous: bool,
|
156
|
-
resource: Optional[str] = None,
|
157
|
-
) -> Response:
|
158
|
-
chat_request.max_tokens = parse_max_tokens(chat_request.max_tokens)
|
159
|
-
chat_result = await chat(
|
160
|
-
kbid,
|
161
|
-
chat_request,
|
162
|
-
user_id,
|
163
|
-
client_type,
|
164
|
-
origin,
|
165
|
-
resource=resource,
|
166
|
-
)
|
167
|
-
if x_synchronous:
|
168
|
-
streamed_answer = b""
|
169
|
-
async for chunk in chat_result.answer_stream:
|
170
|
-
streamed_answer += chunk
|
171
|
-
|
172
|
-
answer, citations = parse_streamed_answer(
|
173
|
-
streamed_answer, chat_request.citations
|
174
|
-
)
|
175
|
-
|
176
|
-
relations_results = None
|
177
|
-
if ChatOptions.RELATIONS in chat_request.features:
|
178
|
-
# XXX should use query parser here
|
179
|
-
relations_results = await get_relations_results(
|
180
|
-
kbid=kbid, text_answer=answer, target_shard_replicas=chat_request.shards
|
181
|
-
)
|
182
|
-
|
183
|
-
sync_chat_resp = SyncChatResponse(
|
184
|
-
answer=answer,
|
185
|
-
relations=relations_results,
|
186
|
-
results=chat_result.find_results,
|
187
|
-
status=chat_result.status_code.value,
|
188
|
-
citations=citations,
|
189
|
-
)
|
190
|
-
if chat_request.debug:
|
191
|
-
sync_chat_resp.prompt_context = chat_result.prompt_context
|
192
|
-
sync_chat_resp.prompt_context_order = chat_result.prompt_context_order
|
193
|
-
return Response(
|
194
|
-
content=sync_chat_resp.json(exclude_unset=True),
|
195
|
-
headers={
|
196
|
-
"NUCLIA-LEARNING-ID": chat_result.nuclia_learning_id or "unknown",
|
197
|
-
"Access-Control-Expose-Headers": "NUCLIA-LEARNING-ID",
|
198
|
-
"Content-Type": "application/json",
|
199
|
-
},
|
200
|
-
)
|
201
|
-
else:
|
202
|
-
|
203
|
-
async def _streaming_response():
|
204
|
-
bytes_results = base64.b64encode(chat_result.find_results.json().encode())
|
205
|
-
yield len(bytes_results).to_bytes(length=4, byteorder="big", signed=False)
|
206
|
-
yield bytes_results
|
207
|
-
|
208
|
-
streamed_answer = b""
|
209
|
-
async for chunk in chat_result.answer_stream:
|
210
|
-
streamed_answer += chunk
|
211
|
-
yield chunk
|
212
|
-
|
213
|
-
answer, _ = parse_streamed_answer(streamed_answer, chat_request.citations)
|
214
|
-
|
215
|
-
yield END_OF_STREAM.encode()
|
216
|
-
if ChatOptions.RELATIONS in chat_request.features:
|
217
|
-
# XXX should use query parser here
|
218
|
-
relations_results = await get_relations_results(
|
219
|
-
kbid=kbid,
|
220
|
-
text_answer=answer,
|
221
|
-
target_shard_replicas=chat_request.shards,
|
222
|
-
)
|
223
|
-
yield base64.b64encode(relations_results.json().encode())
|
224
|
-
|
225
|
-
return StreamingResponse(
|
226
|
-
_streaming_response(),
|
227
|
-
media_type="application/octet-stream",
|
228
|
-
headers={
|
229
|
-
"NUCLIA-LEARNING-ID": chat_result.nuclia_learning_id or "unknown",
|
230
|
-
"Access-Control-Expose-Headers": "NUCLIA-LEARNING-ID",
|
231
|
-
},
|
232
|
-
)
|
233
|
-
|
234
|
-
|
235
|
-
def parse_streamed_answer(
|
236
|
-
streamed_bytes: bytes, requested_citations: bool
|
237
|
-
) -> tuple[str, dict[str, Any]]:
|
238
|
-
try:
|
239
|
-
text_answer, tail = streamed_bytes.split(START_OF_CITATIONS, 1)
|
240
|
-
except ValueError:
|
241
|
-
if requested_citations:
|
242
|
-
logger.warning(
|
243
|
-
"Citations were requested but not found in the answer. "
|
244
|
-
"Returning the answer without citations."
|
245
|
-
)
|
246
|
-
return streamed_bytes.decode("utf-8"), {}
|
247
|
-
if not requested_citations:
|
248
|
-
logger.warning(
|
249
|
-
"Citations were not requested but found in the answer. "
|
250
|
-
"Returning the answer without citations."
|
251
|
-
)
|
252
|
-
return text_answer.decode("utf-8"), {}
|
253
|
-
try:
|
254
|
-
citations_length = int.from_bytes(tail[:4], byteorder="big", signed=False)
|
255
|
-
citations_bytes = tail[4 : 4 + citations_length]
|
256
|
-
citations = json.loads(base64.b64decode(citations_bytes).decode())
|
257
|
-
return text_answer.decode("utf-8"), citations
|
258
|
-
except Exception as exc:
|
259
|
-
capture_exception(exc)
|
260
|
-
logger.exception(
|
261
|
-
"Error parsing citations. Returning the answer without citations."
|
262
|
-
)
|
263
|
-
return text_answer.decode("utf-8"), {}
|
@@ -1,174 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from typing import Optional, Union
|
21
|
-
|
22
|
-
from fastapi import Header, Request, Response
|
23
|
-
from fastapi_versioning import version
|
24
|
-
from starlette.responses import StreamingResponse
|
25
|
-
|
26
|
-
from nucliadb.common import datamanagers
|
27
|
-
from nucliadb.models.responses import HTTPClientError
|
28
|
-
from nucliadb.search import predict
|
29
|
-
from nucliadb.search.api.v1.router import KB_PREFIX, RESOURCE_SLUG_PREFIX, api
|
30
|
-
from nucliadb.search.search.exceptions import (
|
31
|
-
IncompleteFindResultsError,
|
32
|
-
InvalidQueryError,
|
33
|
-
)
|
34
|
-
from nucliadb_models.resource import NucliaDBRoles
|
35
|
-
from nucliadb_models.search import ChatRequest, NucliaDBClientType
|
36
|
-
from nucliadb_utils.authentication import requires
|
37
|
-
from nucliadb_utils.exceptions import LimitsExceededError
|
38
|
-
|
39
|
-
from ..chat import create_chat_response
|
40
|
-
|
41
|
-
|
42
|
-
@api.post(
|
43
|
-
f"/{KB_PREFIX}/{{kbid}}/resource/{{rid}}/chat",
|
44
|
-
status_code=200,
|
45
|
-
summary="Chat with a resource (by id)",
|
46
|
-
description="Chat with a resource",
|
47
|
-
tags=["Search"],
|
48
|
-
response_model=None,
|
49
|
-
)
|
50
|
-
@requires(NucliaDBRoles.READER)
|
51
|
-
@version(1)
|
52
|
-
async def resource_chat_endpoint_by_uuid(
|
53
|
-
request: Request,
|
54
|
-
kbid: str,
|
55
|
-
rid: str,
|
56
|
-
item: ChatRequest,
|
57
|
-
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
58
|
-
x_nucliadb_user: str = Header(""),
|
59
|
-
x_forwarded_for: str = Header(""),
|
60
|
-
x_synchronous: bool = Header(
|
61
|
-
False,
|
62
|
-
description="When set to true, outputs response as JSON in a non-streaming way. "
|
63
|
-
"This is slower and requires waiting for entire answer to be ready.",
|
64
|
-
),
|
65
|
-
) -> Union[StreamingResponse, HTTPClientError, Response]:
|
66
|
-
return await resource_chat_endpoint(
|
67
|
-
request,
|
68
|
-
kbid,
|
69
|
-
item,
|
70
|
-
x_ndb_client,
|
71
|
-
x_nucliadb_user,
|
72
|
-
x_forwarded_for,
|
73
|
-
x_synchronous,
|
74
|
-
resource_id=rid,
|
75
|
-
)
|
76
|
-
|
77
|
-
|
78
|
-
@api.post(
|
79
|
-
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_SLUG_PREFIX}/{{slug}}/chat",
|
80
|
-
status_code=200,
|
81
|
-
summary="Chat with a resource (by slug)",
|
82
|
-
description="Chat with a resource",
|
83
|
-
tags=["Search"],
|
84
|
-
response_model=None,
|
85
|
-
)
|
86
|
-
@requires(NucliaDBRoles.READER)
|
87
|
-
@version(1)
|
88
|
-
async def resource_chat_endpoint_by_slug(
|
89
|
-
request: Request,
|
90
|
-
kbid: str,
|
91
|
-
slug: str,
|
92
|
-
item: ChatRequest,
|
93
|
-
x_ndb_client: NucliaDBClientType = Header(NucliaDBClientType.API),
|
94
|
-
x_nucliadb_user: str = Header(""),
|
95
|
-
x_forwarded_for: str = Header(""),
|
96
|
-
x_synchronous: bool = Header(
|
97
|
-
False,
|
98
|
-
description="When set to true, outputs response as JSON in a non-streaming way. "
|
99
|
-
"This is slower and requires waiting for entire answer to be ready.",
|
100
|
-
),
|
101
|
-
) -> Union[StreamingResponse, HTTPClientError, Response]:
|
102
|
-
return await resource_chat_endpoint(
|
103
|
-
request,
|
104
|
-
kbid,
|
105
|
-
item,
|
106
|
-
x_ndb_client,
|
107
|
-
x_nucliadb_user,
|
108
|
-
x_forwarded_for,
|
109
|
-
x_synchronous,
|
110
|
-
resource_slug=slug,
|
111
|
-
)
|
112
|
-
|
113
|
-
|
114
|
-
async def resource_chat_endpoint(
|
115
|
-
request: Request,
|
116
|
-
kbid: str,
|
117
|
-
item: ChatRequest,
|
118
|
-
x_ndb_client: NucliaDBClientType,
|
119
|
-
x_nucliadb_user: str,
|
120
|
-
x_forwarded_for: str,
|
121
|
-
x_synchronous: bool,
|
122
|
-
resource_id: Optional[str] = None,
|
123
|
-
resource_slug: Optional[str] = None,
|
124
|
-
) -> Union[StreamingResponse, HTTPClientError, Response]:
|
125
|
-
|
126
|
-
if resource_id is None:
|
127
|
-
if resource_slug is None:
|
128
|
-
raise ValueError("Either resource_id or resource_slug must be provided")
|
129
|
-
|
130
|
-
resource_id = await get_resource_uuid_by_slug(kbid, resource_slug)
|
131
|
-
if resource_id is None:
|
132
|
-
return HTTPClientError(status_code=404, detail="Resource not found")
|
133
|
-
|
134
|
-
try:
|
135
|
-
return await create_chat_response(
|
136
|
-
kbid,
|
137
|
-
item,
|
138
|
-
x_nucliadb_user,
|
139
|
-
x_ndb_client,
|
140
|
-
x_forwarded_for,
|
141
|
-
x_synchronous,
|
142
|
-
resource=resource_id,
|
143
|
-
)
|
144
|
-
except LimitsExceededError as exc:
|
145
|
-
return HTTPClientError(status_code=exc.status_code, detail=exc.detail)
|
146
|
-
except predict.ProxiedPredictAPIError as err:
|
147
|
-
return HTTPClientError(
|
148
|
-
status_code=err.status,
|
149
|
-
detail=err.detail,
|
150
|
-
)
|
151
|
-
except IncompleteFindResultsError:
|
152
|
-
return HTTPClientError(
|
153
|
-
status_code=529,
|
154
|
-
detail="Temporary error on information retrieval. Please try again.",
|
155
|
-
)
|
156
|
-
except predict.RephraseMissingContextError:
|
157
|
-
return HTTPClientError(
|
158
|
-
status_code=412,
|
159
|
-
detail="Unable to rephrase the query with the provided context.",
|
160
|
-
)
|
161
|
-
except predict.RephraseError as err:
|
162
|
-
return HTTPClientError(
|
163
|
-
status_code=529,
|
164
|
-
detail=f"Temporary error while rephrasing the query. Please try again later. Error: {err}",
|
165
|
-
)
|
166
|
-
except InvalidQueryError as exc:
|
167
|
-
return HTTPClientError(status_code=412, detail=str(exc))
|
168
|
-
|
169
|
-
|
170
|
-
async def get_resource_uuid_by_slug(kbid: str, slug: str) -> Optional[str]:
|
171
|
-
async with datamanagers.with_transaction() as txn:
|
172
|
-
return await datamanagers.resources.get_resource_uuid_from_slug(
|
173
|
-
txn, kbid=kbid, slug=slug
|
174
|
-
)
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
@@ -1,33 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
pytest_plugins = [
|
21
|
-
"pytest_docker_fixtures",
|
22
|
-
"nucliadb.tests.fixtures",
|
23
|
-
"nucliadb.tests.tikv",
|
24
|
-
"nucliadb.ingest.tests.fixtures", # should be refactored out
|
25
|
-
"nucliadb.search.tests.node",
|
26
|
-
"nucliadb.search.tests.fixtures",
|
27
|
-
"nucliadb_utils.tests.conftest",
|
28
|
-
"nucliadb_utils.tests.gcs",
|
29
|
-
"nucliadb_utils.tests.s3",
|
30
|
-
"nucliadb_utils.tests.nats",
|
31
|
-
"nucliadb_utils.tests.asyncbenchmark",
|
32
|
-
"nucliadb_utils.tests.indexing",
|
33
|
-
]
|
@@ -1,199 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
import asyncio
|
21
|
-
from enum import Enum
|
22
|
-
from typing import Optional
|
23
|
-
|
24
|
-
import pytest
|
25
|
-
from httpx import AsyncClient
|
26
|
-
from nucliadb_protos.nodereader_pb2 import GetShardRequest
|
27
|
-
from nucliadb_protos.noderesources_pb2 import Shard
|
28
|
-
from redis import asyncio as aioredis
|
29
|
-
|
30
|
-
from nucliadb.common.cluster.manager import KBShardManager, get_index_node
|
31
|
-
from nucliadb.common.maindb.utils import get_driver
|
32
|
-
from nucliadb.ingest.cache import clear_ingest_cache
|
33
|
-
from nucliadb.ingest.tests.fixtures import broker_resource
|
34
|
-
from nucliadb.search import API_PREFIX
|
35
|
-
from nucliadb_utils.tests import free_port
|
36
|
-
from nucliadb_utils.utilities import clear_global_cache
|
37
|
-
|
38
|
-
|
39
|
-
@pytest.fixture(scope="function")
|
40
|
-
def test_settings_search(storage, natsd, node, maindb_driver): # type: ignore
|
41
|
-
from nucliadb.ingest.settings import settings as ingest_settings
|
42
|
-
from nucliadb_utils.cache.settings import settings as cache_settings
|
43
|
-
from nucliadb_utils.settings import (
|
44
|
-
nuclia_settings,
|
45
|
-
nucliadb_settings,
|
46
|
-
running_settings,
|
47
|
-
)
|
48
|
-
|
49
|
-
cache_settings.cache_pubsub_nats_url = [natsd]
|
50
|
-
|
51
|
-
running_settings.debug = False
|
52
|
-
|
53
|
-
ingest_settings.disable_pull_worker = True
|
54
|
-
|
55
|
-
ingest_settings.nuclia_partitions = 1
|
56
|
-
|
57
|
-
nuclia_settings.dummy_processing = True
|
58
|
-
nuclia_settings.dummy_predict = True
|
59
|
-
nuclia_settings.dummy_learning_services = True
|
60
|
-
|
61
|
-
ingest_settings.grpc_port = free_port()
|
62
|
-
|
63
|
-
nucliadb_settings.nucliadb_ingest = f"localhost:{ingest_settings.grpc_port}"
|
64
|
-
|
65
|
-
|
66
|
-
@pytest.mark.asyncio
|
67
|
-
@pytest.fixture(scope="function")
|
68
|
-
async def search_api(test_settings_search, transaction_utility, redis): # type: ignore
|
69
|
-
from nucliadb.common.cluster import manager
|
70
|
-
from nucliadb.search.app import application
|
71
|
-
|
72
|
-
driver = aioredis.from_url(f"redis://{redis[0]}:{redis[1]}")
|
73
|
-
await driver.flushall()
|
74
|
-
|
75
|
-
await application.router.startup()
|
76
|
-
|
77
|
-
# Make sure is clean
|
78
|
-
await asyncio.sleep(1)
|
79
|
-
count = 0
|
80
|
-
while len(manager.INDEX_NODES) < 2:
|
81
|
-
print("awaiting cluster nodes - search fixtures.py")
|
82
|
-
await asyncio.sleep(1)
|
83
|
-
if count == 40:
|
84
|
-
raise Exception("No cluster")
|
85
|
-
count += 1
|
86
|
-
|
87
|
-
def make_client_fixture(
|
88
|
-
roles: Optional[list[Enum]] = None,
|
89
|
-
user: str = "",
|
90
|
-
version: str = "1",
|
91
|
-
root: bool = False,
|
92
|
-
extra_headers: Optional[dict[str, str]] = None,
|
93
|
-
) -> AsyncClient:
|
94
|
-
roles = roles or []
|
95
|
-
client_base_url = "http://test"
|
96
|
-
|
97
|
-
if root is False:
|
98
|
-
client_base_url = f"{client_base_url}/{API_PREFIX}/v{version}"
|
99
|
-
|
100
|
-
client = AsyncClient(app=application, base_url=client_base_url) # type: ignore
|
101
|
-
client.headers["X-NUCLIADB-ROLES"] = ";".join([role.value for role in roles])
|
102
|
-
client.headers["X-NUCLIADB-USER"] = user
|
103
|
-
|
104
|
-
extra_headers = extra_headers or {}
|
105
|
-
if len(extra_headers) == 0:
|
106
|
-
return client
|
107
|
-
|
108
|
-
for header, value in extra_headers.items():
|
109
|
-
client.headers[f"{header}"] = value
|
110
|
-
|
111
|
-
return client
|
112
|
-
|
113
|
-
yield make_client_fixture
|
114
|
-
await application.router.shutdown()
|
115
|
-
# Make sure nodes can sync
|
116
|
-
await asyncio.sleep(1)
|
117
|
-
await driver.flushall()
|
118
|
-
await driver.close(close_connection_pool=True)
|
119
|
-
clear_ingest_cache()
|
120
|
-
clear_global_cache()
|
121
|
-
manager.INDEX_NODES.clear()
|
122
|
-
|
123
|
-
|
124
|
-
@pytest.fixture(scope="function")
|
125
|
-
async def test_search_resource(
|
126
|
-
indexing_utility_registered,
|
127
|
-
processor,
|
128
|
-
knowledgebox_ingest,
|
129
|
-
):
|
130
|
-
"""
|
131
|
-
Create a resource that has every possible bit of information
|
132
|
-
"""
|
133
|
-
message1 = broker_resource(knowledgebox_ingest, rid="foobar", slug="foobar-slug")
|
134
|
-
kbid = await inject_message(processor, knowledgebox_ingest, message1)
|
135
|
-
resource_field_count = 3
|
136
|
-
await wait_for_shard(knowledgebox_ingest, resource_field_count)
|
137
|
-
yield kbid
|
138
|
-
|
139
|
-
|
140
|
-
@pytest.fixture(scope="function")
|
141
|
-
async def multiple_search_resource(
|
142
|
-
indexing_utility_registered,
|
143
|
-
processor,
|
144
|
-
knowledgebox_ingest,
|
145
|
-
):
|
146
|
-
"""
|
147
|
-
Create 100 resources that have every possible bit of information
|
148
|
-
"""
|
149
|
-
n_resources = 100
|
150
|
-
fields_per_resource = 3
|
151
|
-
for count in range(1, n_resources + 1):
|
152
|
-
message = broker_resource(knowledgebox_ingest)
|
153
|
-
await processor.process(message=message, seqid=count)
|
154
|
-
|
155
|
-
await wait_for_shard(knowledgebox_ingest, n_resources * fields_per_resource)
|
156
|
-
return knowledgebox_ingest
|
157
|
-
|
158
|
-
|
159
|
-
async def inject_message(
|
160
|
-
processor, knowledgebox_ingest, message, count: int = 1
|
161
|
-
) -> str:
|
162
|
-
await processor.process(message=message, seqid=count)
|
163
|
-
await wait_for_shard(knowledgebox_ingest, count)
|
164
|
-
return knowledgebox_ingest
|
165
|
-
|
166
|
-
|
167
|
-
async def wait_for_shard(knowledgebox_ingest: str, count: int) -> str:
|
168
|
-
# Make sure is indexed
|
169
|
-
driver = get_driver()
|
170
|
-
txn = await driver.begin()
|
171
|
-
shard_manager = KBShardManager()
|
172
|
-
shard = await shard_manager.get_current_active_shard(txn, knowledgebox_ingest)
|
173
|
-
if shard is None:
|
174
|
-
raise Exception("Could not find shard")
|
175
|
-
await txn.abort()
|
176
|
-
|
177
|
-
checks: dict[str, bool] = {}
|
178
|
-
for replica in shard.replicas:
|
179
|
-
if replica.shard.id not in checks:
|
180
|
-
checks[replica.shard.id] = False
|
181
|
-
|
182
|
-
for i in range(30):
|
183
|
-
for replica in shard.replicas:
|
184
|
-
node_obj = get_index_node(replica.node)
|
185
|
-
if node_obj is not None:
|
186
|
-
req = GetShardRequest()
|
187
|
-
req.shard_id.id = replica.shard.id
|
188
|
-
count_shard: Shard = await node_obj.reader.GetShard(req) # type: ignore
|
189
|
-
if count_shard.fields >= count:
|
190
|
-
checks[replica.shard.id] = True
|
191
|
-
else:
|
192
|
-
checks[replica.shard.id] = False
|
193
|
-
|
194
|
-
if all(checks.values()):
|
195
|
-
break
|
196
|
-
await asyncio.sleep(1)
|
197
|
-
|
198
|
-
assert all(checks.values())
|
199
|
-
return knowledgebox_ingest
|