nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -18,7 +18,6 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
from datetime import datetime
|
21
|
-
from time import time
|
22
21
|
from typing import Optional, Union
|
23
22
|
|
24
23
|
from fastapi import Header, Request, Response
|
@@ -28,9 +27,11 @@ from nucliadb.models.responses import HTTPClientError
|
|
28
27
|
from nucliadb.search.api.v1.router import KB_PREFIX, api
|
29
28
|
from nucliadb.search.api.v1.utils import fastapi_query
|
30
29
|
from nucliadb.search.requesters.utils import Method, node_query
|
30
|
+
from nucliadb.search.search import cache
|
31
31
|
from nucliadb.search.search.exceptions import InvalidQueryError
|
32
32
|
from nucliadb.search.search.merge import merge_suggest_results
|
33
33
|
from nucliadb.search.search.query import suggest_query_to_pb
|
34
|
+
from nucliadb.search.search.utils import filter_hidden_resources
|
34
35
|
from nucliadb_models.common import FieldTypeName
|
35
36
|
from nucliadb_models.resource import NucliaDBRoles
|
36
37
|
from nucliadb_models.search import (
|
@@ -40,8 +41,8 @@ from nucliadb_models.search import (
|
|
40
41
|
SearchParamDefaults,
|
41
42
|
SuggestOptions,
|
42
43
|
)
|
44
|
+
from nucliadb_models.utils import DateTime
|
43
45
|
from nucliadb_utils.authentication import requires
|
44
|
-
from nucliadb_utils.utilities import get_audit
|
45
46
|
|
46
47
|
|
47
48
|
@api.get(
|
@@ -63,21 +64,15 @@ async def suggest_knowledgebox(
|
|
63
64
|
fields: list[str] = fastapi_query(SearchParamDefaults.fields),
|
64
65
|
filters: list[str] = fastapi_query(SearchParamDefaults.filters),
|
65
66
|
faceted: list[str] = fastapi_query(SearchParamDefaults.faceted),
|
66
|
-
range_creation_start: Optional[
|
67
|
-
|
68
|
-
|
69
|
-
range_creation_end: Optional[datetime] = fastapi_query(
|
70
|
-
SearchParamDefaults.range_creation_end
|
71
|
-
),
|
72
|
-
range_modification_start: Optional[datetime] = fastapi_query(
|
67
|
+
range_creation_start: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_start),
|
68
|
+
range_creation_end: Optional[DateTime] = fastapi_query(SearchParamDefaults.range_creation_end),
|
69
|
+
range_modification_start: Optional[DateTime] = fastapi_query(
|
73
70
|
SearchParamDefaults.range_modification_start
|
74
71
|
),
|
75
|
-
range_modification_end: Optional[
|
72
|
+
range_modification_end: Optional[DateTime] = fastapi_query(
|
76
73
|
SearchParamDefaults.range_modification_end
|
77
74
|
),
|
78
|
-
features: list[SuggestOptions] = fastapi_query(
|
79
|
-
SearchParamDefaults.suggest_features
|
80
|
-
),
|
75
|
+
features: list[SuggestOptions] = fastapi_query(SearchParamDefaults.suggest_features),
|
81
76
|
show: list[ResourceProperties] = fastapi_query(SearchParamDefaults.show),
|
82
77
|
field_type_filter: list[FieldTypeName] = fastapi_query(
|
83
78
|
SearchParamDefaults.field_type_filter, alias="field_type"
|
@@ -87,6 +82,7 @@ async def suggest_knowledgebox(
|
|
87
82
|
x_forwarded_for: str = Header(""),
|
88
83
|
debug: bool = fastapi_query(SearchParamDefaults.debug),
|
89
84
|
highlight: bool = fastapi_query(SearchParamDefaults.highlight),
|
85
|
+
show_hidden: bool = fastapi_query(SearchParamDefaults.show_hidden),
|
90
86
|
) -> Union[KnowledgeboxSuggestResults, HTTPClientError]:
|
91
87
|
try:
|
92
88
|
return await suggest(
|
@@ -108,6 +104,7 @@ async def suggest_knowledgebox(
|
|
108
104
|
x_forwarded_for,
|
109
105
|
debug,
|
110
106
|
highlight,
|
107
|
+
show_hidden,
|
111
108
|
)
|
112
109
|
except InvalidQueryError as exc:
|
113
110
|
return HTTPClientError(status_code=412, detail=str(exc))
|
@@ -132,49 +129,37 @@ async def suggest(
|
|
132
129
|
x_forwarded_for: str,
|
133
130
|
debug: bool,
|
134
131
|
highlight: bool,
|
132
|
+
show_hidden: bool,
|
135
133
|
) -> KnowledgeboxSuggestResults:
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
)
|
152
|
-
results, incomplete_results, queried_nodes = await node_query(
|
153
|
-
kbid, Method.SUGGEST, pb_query
|
154
|
-
)
|
155
|
-
|
156
|
-
# We need to merge
|
157
|
-
search_results = await merge_suggest_results(
|
158
|
-
results,
|
159
|
-
kbid=kbid,
|
160
|
-
show=show,
|
161
|
-
field_type_filter=field_type_filter,
|
162
|
-
highlight=highlight,
|
163
|
-
)
|
134
|
+
with cache.request_caches():
|
135
|
+
hidden = await filter_hidden_resources(kbid, show_hidden)
|
136
|
+
pb_query = suggest_query_to_pb(
|
137
|
+
features,
|
138
|
+
query,
|
139
|
+
fields,
|
140
|
+
filters,
|
141
|
+
faceted,
|
142
|
+
range_creation_start,
|
143
|
+
range_creation_end,
|
144
|
+
range_modification_start,
|
145
|
+
range_modification_end,
|
146
|
+
hidden,
|
147
|
+
)
|
148
|
+
results, incomplete_results, queried_nodes = await node_query(kbid, Method.SUGGEST, pb_query)
|
164
149
|
|
165
|
-
|
150
|
+
# We need to merge
|
151
|
+
search_results = await merge_suggest_results(
|
152
|
+
results,
|
153
|
+
kbid=kbid,
|
154
|
+
show=show,
|
155
|
+
field_type_filter=field_type_filter,
|
156
|
+
highlight=highlight,
|
157
|
+
)
|
166
158
|
|
167
|
-
|
168
|
-
if debug and queried_shards:
|
169
|
-
search_results.shards = queried_shards
|
159
|
+
response.status_code = 206 if incomplete_results else 200
|
170
160
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
x_nucliadb_user,
|
175
|
-
x_ndb_client.to_proto(),
|
176
|
-
x_forwarded_for,
|
177
|
-
time() - start_time,
|
178
|
-
)
|
161
|
+
queried_shards = [shard_id for _, shard_id in queried_nodes]
|
162
|
+
if debug and queried_shards:
|
163
|
+
search_results.shards = queried_shards
|
179
164
|
|
180
|
-
|
165
|
+
return search_results
|
nucliadb/search/app.py
CHANGED
@@ -17,72 +17,52 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
import
|
20
|
+
import importlib.metadata
|
21
|
+
|
21
22
|
from fastapi import FastAPI
|
22
23
|
from fastapi.responses import JSONResponse
|
23
24
|
from starlette.middleware import Middleware
|
24
25
|
from starlette.middleware.authentication import AuthenticationMiddleware
|
25
|
-
from starlette.middleware.cors import CORSMiddleware
|
26
26
|
from starlette.requests import ClientDisconnect, Request
|
27
27
|
from starlette.responses import HTMLResponse
|
28
28
|
|
29
29
|
from nucliadb.common.cluster import manager
|
30
|
+
from nucliadb.ingest.settings import DriverConfig
|
30
31
|
from nucliadb.middleware import ProcessTimeHeaderMiddleware
|
31
|
-
from nucliadb.middleware.transaction import ReadOnlyTransactionMiddleware
|
32
32
|
from nucliadb.search import API_PREFIX
|
33
33
|
from nucliadb.search.api.v1.router import api as api_v1
|
34
|
-
from nucliadb.search.lifecycle import
|
34
|
+
from nucliadb.search.lifecycle import lifespan
|
35
35
|
from nucliadb.search.settings import settings
|
36
36
|
from nucliadb_telemetry import errors
|
37
37
|
from nucliadb_telemetry.fastapi.utils import (
|
38
38
|
client_disconnect_handler,
|
39
39
|
global_exception_handler,
|
40
40
|
)
|
41
|
-
from nucliadb_utils import
|
41
|
+
from nucliadb_utils.audit.stream import AuditMiddleware
|
42
42
|
from nucliadb_utils.authentication import NucliaCloudAuthenticationBackend
|
43
43
|
from nucliadb_utils.fastapi.openapi import extend_openapi
|
44
44
|
from nucliadb_utils.fastapi.versioning import VersionedFastAPI
|
45
|
-
from nucliadb_utils.settings import
|
46
|
-
from nucliadb_utils.utilities import
|
45
|
+
from nucliadb_utils.settings import running_settings
|
46
|
+
from nucliadb_utils.utilities import get_audit
|
47
47
|
|
48
48
|
middleware = []
|
49
|
-
|
50
|
-
if has_feature(const.Features.CORS_MIDDLEWARE, default=False):
|
51
|
-
middleware.append(
|
52
|
-
Middleware(
|
53
|
-
CORSMiddleware,
|
54
|
-
allow_origins=http_settings.cors_origins,
|
55
|
-
allow_methods=["*"],
|
56
|
-
# Authorization will be exluded from * in the future, (CORS non-wildcard request-header).
|
57
|
-
# Browsers already showing deprecation notices, so it needs to be specified explicitly
|
58
|
-
allow_headers=["*", "Authorization"],
|
59
|
-
)
|
60
|
-
)
|
61
|
-
|
62
49
|
middleware.extend(
|
63
50
|
[
|
64
|
-
Middleware(
|
65
|
-
|
66
|
-
),
|
67
|
-
Middleware(ReadOnlyTransactionMiddleware),
|
51
|
+
Middleware(AuthenticationMiddleware, backend=NucliaCloudAuthenticationBackend()),
|
52
|
+
Middleware(AuditMiddleware, audit_utility_getter=get_audit),
|
68
53
|
]
|
69
54
|
)
|
70
55
|
|
71
56
|
if running_settings.debug:
|
72
57
|
middleware.append(Middleware(ProcessTimeHeaderMiddleware))
|
73
58
|
|
74
|
-
errors.setup_error_handling(
|
75
|
-
|
76
|
-
|
77
|
-
on_startup = [initialize]
|
78
|
-
on_shutdown = [finalize]
|
59
|
+
errors.setup_error_handling(importlib.metadata.distribution("nucliadb").version)
|
79
60
|
|
80
61
|
|
81
62
|
fastapi_settings = dict(
|
82
63
|
debug=running_settings.debug,
|
83
64
|
middleware=middleware,
|
84
|
-
|
85
|
-
on_shutdown=on_shutdown,
|
65
|
+
lifespan=lifespan,
|
86
66
|
exception_handlers={
|
87
67
|
Exception: global_exception_handler,
|
88
68
|
ClientDisconnect: client_disconnect_handler,
|
@@ -127,7 +107,7 @@ async def node_members(request: Request) -> JSONResponse:
|
|
127
107
|
|
128
108
|
|
129
109
|
async def alive(request: Request) -> JSONResponse:
|
130
|
-
if len(manager.get_index_nodes()) == 0 and settings.driver !=
|
110
|
+
if len(manager.get_index_nodes()) == 0 and settings.driver != DriverConfig.LOCAL:
|
131
111
|
return JSONResponse({"status": "error"}, status_code=503)
|
132
112
|
else:
|
133
113
|
return JSONResponse({"status": "ok"})
|
nucliadb/search/lifecycle.py
CHANGED
@@ -17,8 +17,13 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
+
from contextlib import asynccontextmanager
|
21
|
+
|
22
|
+
from fastapi import FastAPI
|
23
|
+
|
20
24
|
from nucliadb.common.cluster.utils import setup_cluster, teardown_cluster
|
21
|
-
from nucliadb.common.maindb.utils import setup_driver
|
25
|
+
from nucliadb.common.maindb.utils import setup_driver
|
26
|
+
from nucliadb.common.nidx import start_nidx_utility
|
22
27
|
from nucliadb.ingest.utils import start_ingest, stop_ingest
|
23
28
|
from nucliadb.search import SERVICE_NAME
|
24
29
|
from nucliadb.search.predict import start_predict_engine
|
@@ -33,7 +38,8 @@ from nucliadb_utils.utilities import (
|
|
33
38
|
)
|
34
39
|
|
35
40
|
|
36
|
-
|
41
|
+
@asynccontextmanager
|
42
|
+
async def lifespan(app: FastAPI):
|
37
43
|
await setup_telemetry(SERVICE_NAME)
|
38
44
|
|
39
45
|
await start_ingest(SERVICE_NAME)
|
@@ -41,11 +47,12 @@ async def initialize() -> None:
|
|
41
47
|
|
42
48
|
await setup_driver()
|
43
49
|
await setup_cluster()
|
50
|
+
await start_nidx_utility()
|
44
51
|
|
45
52
|
await start_audit_utility(SERVICE_NAME)
|
46
53
|
|
54
|
+
yield
|
47
55
|
|
48
|
-
async def finalize() -> None:
|
49
56
|
await stop_ingest()
|
50
57
|
if get_utility(Utility.PARTITION):
|
51
58
|
clean_utility(Utility.PARTITION)
|