nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/train/uploader.py
CHANGED
@@ -20,6 +20,14 @@
|
|
20
20
|
from typing import Optional
|
21
21
|
|
22
22
|
import aiohttp
|
23
|
+
|
24
|
+
from nucliadb.common import datamanagers
|
25
|
+
from nucliadb.common.maindb.utils import setup_driver
|
26
|
+
from nucliadb.ingest.orm.entities import EntitiesManager
|
27
|
+
from nucliadb.ingest.orm.processor import Processor
|
28
|
+
from nucliadb.train import SERVICE_NAME
|
29
|
+
from nucliadb.train.models import RequestData
|
30
|
+
from nucliadb.train.settings import settings
|
23
31
|
from nucliadb_protos.knowledgebox_pb2 import Labels
|
24
32
|
from nucliadb_protos.train_pb2 import (
|
25
33
|
EnabledMetadata,
|
@@ -34,13 +42,6 @@ from nucliadb_protos.writer_pb2 import (
|
|
34
42
|
GetLabelsRequest,
|
35
43
|
GetLabelsResponse,
|
36
44
|
)
|
37
|
-
|
38
|
-
from nucliadb.common.maindb.utils import setup_driver
|
39
|
-
from nucliadb.ingest.orm.entities import EntitiesManager
|
40
|
-
from nucliadb.ingest.orm.processor import Processor
|
41
|
-
from nucliadb.train import SERVICE_NAME
|
42
|
-
from nucliadb.train.models import RequestData
|
43
|
-
from nucliadb.train.settings import settings
|
44
45
|
from nucliadb_utils.utilities import get_pubsub, get_storage
|
45
46
|
|
46
47
|
|
@@ -51,8 +52,7 @@ class UploadServicer:
|
|
51
52
|
pubsub = await get_pubsub()
|
52
53
|
self.proc = Processor(driver=driver, storage=storage, pubsub=pubsub)
|
53
54
|
|
54
|
-
async def finalize(self):
|
55
|
-
...
|
55
|
+
async def finalize(self): ...
|
56
56
|
|
57
57
|
async def GetSentences(self, request: GetSentencesRequest, context=None):
|
58
58
|
async for sentence in self.proc.kb_sentences(request):
|
@@ -75,9 +75,8 @@ class UploadServicer:
|
|
75
75
|
) -> GetEntitiesResponse:
|
76
76
|
kbid = request.kb.uuid
|
77
77
|
response = GetEntitiesResponse()
|
78
|
-
async with self.proc.driver.transaction() as txn:
|
78
|
+
async with self.proc.driver.transaction(read_only=True) as txn:
|
79
79
|
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
80
|
-
|
81
80
|
if kbobj is None:
|
82
81
|
response.status = GetEntitiesResponse.Status.NOTFOUND
|
83
82
|
return response
|
@@ -91,20 +90,16 @@ class UploadServicer:
|
|
91
90
|
async def GetOntology( # type: ignore
|
92
91
|
self, request: GetLabelsRequest, context=None
|
93
92
|
) -> GetLabelsResponse:
|
94
|
-
|
95
|
-
kbobj = await self.proc.get_kb_obj(txn, request.kb)
|
96
|
-
labels: Optional[Labels] = None
|
97
|
-
if kbobj is not None:
|
98
|
-
labels = await kbobj.get_labels()
|
99
|
-
|
93
|
+
kbid = request.kb.uuid
|
100
94
|
response = GetLabelsResponse()
|
101
|
-
|
95
|
+
kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
|
96
|
+
if not kb_exists:
|
102
97
|
response.status = GetLabelsResponse.Status.NOTFOUND
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
98
|
+
return response
|
99
|
+
response.kb.uuid = kbid
|
100
|
+
labels: Optional[Labels] = await datamanagers.atomic.labelset.get_all(kbid=kbid)
|
101
|
+
if labels is not None:
|
102
|
+
response.labels.CopyFrom(labels)
|
108
103
|
return response
|
109
104
|
|
110
105
|
|
@@ -124,9 +119,9 @@ async def start_upload(request: str, kb: str):
|
|
124
119
|
}
|
125
120
|
) as sess:
|
126
121
|
req = await sess.get(f"{url}/request")
|
127
|
-
request_data = RequestData.
|
122
|
+
request_data = RequestData.model_validate_json(await req.read())
|
128
123
|
|
129
|
-
metadata = EnabledMetadata(**request_data.metadata.
|
124
|
+
metadata = EnabledMetadata(**request_data.metadata.model_dump())
|
130
125
|
|
131
126
|
if request_data.sentences:
|
132
127
|
pbsr = GetSentencesRequest()
|
nucliadb/train/utils.py
CHANGED
@@ -23,7 +23,7 @@ from grpc import aio
|
|
23
23
|
from grpc_health.v1 import health, health_pb2_grpc
|
24
24
|
|
25
25
|
from nucliadb.common.maindb.utils import setup_driver, teardown_driver
|
26
|
-
from nucliadb.train.nodes import TrainShardManager
|
26
|
+
from nucliadb.train.nodes import TrainShardManager
|
27
27
|
from nucliadb.train.settings import settings
|
28
28
|
from nucliadb_protos import train_pb2_grpc
|
29
29
|
from nucliadb_telemetry.utils import setup_telemetry
|
nucliadb/writer/__init__.py
CHANGED
@@ -29,9 +29,7 @@ logger = logging.getLogger(SERVICE_NAME)
|
|
29
29
|
class EndpointFilter(logging.Filter):
|
30
30
|
def filter(self, record: logging.LogRecord) -> bool:
|
31
31
|
return (
|
32
|
-
record.args is not None
|
33
|
-
and len(record.args) >= 3
|
34
|
-
and record.args[2] not in ("/", "/metrics") # type: ignore
|
32
|
+
record.args is not None and len(record.args) >= 3 and record.args[2] not in ("/", "/metrics") # type: ignore
|
35
33
|
)
|
36
34
|
|
37
35
|
|
nucliadb/writer/api/constants.py
CHANGED
@@ -23,7 +23,6 @@ from fastapi.params import Header
|
|
23
23
|
|
24
24
|
if TYPE_CHECKING: # pragma: no cover
|
25
25
|
SKIP_STORE_DEFAULT = False
|
26
|
-
SYNC_CALL = False
|
27
26
|
X_NUCLIADB_USER = ""
|
28
27
|
X_FILE_PASSWORD = None
|
29
28
|
else:
|
@@ -31,10 +30,6 @@ else:
|
|
31
30
|
False,
|
32
31
|
description="If set to true, file fields will not be saved in the blob storage. They will only be sent to process.", # noqa
|
33
32
|
)
|
34
|
-
SYNC_CALL = Header(
|
35
|
-
False,
|
36
|
-
description="If set to true, the request will return when the changes to be commited to the database.",
|
37
|
-
)
|
38
33
|
X_NUCLIADB_USER = Header("")
|
39
34
|
X_FILE_PASSWORD = Header(
|
40
35
|
None,
|
@@ -17,18 +17,21 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from
|
20
|
+
from functools import wraps
|
21
21
|
|
22
|
-
from
|
22
|
+
from fastapi import HTTPException
|
23
23
|
|
24
|
+
from nucliadb_utils.settings import is_onprem_nucliadb
|
24
25
|
|
25
|
-
class Keywordset(Field):
|
26
|
-
pbklass = FieldKeywordset
|
27
|
-
value: FieldKeywordset
|
28
|
-
type: str = "k"
|
29
26
|
|
30
|
-
|
31
|
-
|
27
|
+
def only_for_onprem(fun):
|
28
|
+
@wraps(fun)
|
29
|
+
async def endpoint_wrapper(*args, **kwargs):
|
30
|
+
if not is_onprem_nucliadb():
|
31
|
+
raise HTTPException(
|
32
|
+
status_code=403,
|
33
|
+
detail="This endpoint is only available for onprem NucliaDB",
|
34
|
+
)
|
35
|
+
return await fun(*args, **kwargs)
|
32
36
|
|
33
|
-
|
34
|
-
return await self.db_get_value()
|
37
|
+
return endpoint_wrapper
|
@@ -17,6 +17,8 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
+
from datetime import datetime
|
21
|
+
from typing import AsyncGenerator
|
20
22
|
from uuid import uuid4
|
21
23
|
|
22
24
|
from fastapi_versioning import version
|
@@ -28,23 +30,32 @@ from nucliadb.common.context import ApplicationContext
|
|
28
30
|
from nucliadb.common.context.fastapi import get_app_context
|
29
31
|
from nucliadb.export_import import importer
|
30
32
|
from nucliadb.export_import.datamanager import ExportImportDataManager
|
33
|
+
from nucliadb.export_import.exceptions import (
|
34
|
+
IncompatibleExport,
|
35
|
+
)
|
31
36
|
from nucliadb.export_import.models import (
|
32
37
|
ExportMetadata,
|
33
38
|
ImportMetadata,
|
34
39
|
NatsTaskMessage,
|
35
40
|
)
|
36
41
|
from nucliadb.export_import.tasks import get_exports_producer, get_imports_producer
|
37
|
-
from nucliadb.export_import.utils import
|
42
|
+
from nucliadb.export_import.utils import ExportStreamReader, stream_compatible_with_kb
|
38
43
|
from nucliadb.models.responses import HTTPClientError
|
39
44
|
from nucliadb.writer import logger
|
40
|
-
from nucliadb.writer.api.
|
45
|
+
from nucliadb.writer.api.utils import only_for_onprem
|
46
|
+
from nucliadb.writer.api.v1.knowledgebox import create_kb
|
47
|
+
from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX, api
|
41
48
|
from nucliadb.writer.back_pressure import maybe_back_pressure
|
42
49
|
from nucliadb_models.export_import import (
|
43
50
|
CreateExportResponse,
|
44
51
|
CreateImportResponse,
|
52
|
+
NewImportedKbResponse,
|
45
53
|
Status,
|
46
54
|
)
|
47
|
-
from nucliadb_models.resource import
|
55
|
+
from nucliadb_models.resource import (
|
56
|
+
KnowledgeBoxConfig,
|
57
|
+
NucliaDBRoles,
|
58
|
+
)
|
48
59
|
from nucliadb_telemetry import errors
|
49
60
|
from nucliadb_utils.authentication import requires_one
|
50
61
|
|
@@ -52,7 +63,7 @@ from nucliadb_utils.authentication import requires_one
|
|
52
63
|
@api.post(
|
53
64
|
f"/{KB_PREFIX}/{{kbid}}/export",
|
54
65
|
status_code=200,
|
55
|
-
|
66
|
+
summary="Start an export of a Knowledge Box",
|
56
67
|
tags=["Knowledge Boxes"],
|
57
68
|
response_model=CreateExportResponse,
|
58
69
|
)
|
@@ -60,9 +71,8 @@ from nucliadb_utils.authentication import requires_one
|
|
60
71
|
@version(1)
|
61
72
|
async def start_kb_export_endpoint(request: Request, kbid: str):
|
62
73
|
context = get_app_context(request.app)
|
63
|
-
|
64
|
-
|
65
|
-
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
74
|
+
if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
|
75
|
+
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
66
76
|
|
67
77
|
export_id = uuid4().hex
|
68
78
|
if in_standalone_mode():
|
@@ -74,10 +84,60 @@ async def start_kb_export_endpoint(request: Request, kbid: str):
|
|
74
84
|
return CreateExportResponse(export_id=export_id)
|
75
85
|
|
76
86
|
|
87
|
+
@only_for_onprem
|
88
|
+
@api.post(
|
89
|
+
f"/{KBS_PREFIX}/import",
|
90
|
+
summary="Create a KB from an export and import its content",
|
91
|
+
tags=["Knowledge Boxes"],
|
92
|
+
response_model=NewImportedKbResponse,
|
93
|
+
openapi_extra={"x-hidden-operation": True},
|
94
|
+
)
|
95
|
+
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
|
96
|
+
@version(1)
|
97
|
+
async def kb_create_and_import_endpoint(request: Request):
|
98
|
+
context = get_app_context(request.app)
|
99
|
+
|
100
|
+
# Read stream and parse learning configuration
|
101
|
+
stream = request.stream()
|
102
|
+
stream_reader = ExportStreamReader(stream)
|
103
|
+
learning_config, leftover_bytes = await stream_reader.maybe_read_learning_config()
|
104
|
+
if learning_config is None:
|
105
|
+
return HTTPClientError(
|
106
|
+
status_code=400,
|
107
|
+
detail="Trying to import an export missing learning config. Try using import on an existing KB or use a newer export",
|
108
|
+
)
|
109
|
+
|
110
|
+
# Create a KB with the import learning config
|
111
|
+
|
112
|
+
now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
113
|
+
import_kb_config = KnowledgeBoxConfig(
|
114
|
+
title=f"Imported KB - {now}",
|
115
|
+
learning_configuration=learning_config.dict(),
|
116
|
+
)
|
117
|
+
kbid, slug = await create_kb(import_kb_config)
|
118
|
+
|
119
|
+
# Import contents to the new KB
|
120
|
+
|
121
|
+
async def stream_with_leftovers(leftovers: bytes, stream: AsyncGenerator[bytes, None]):
|
122
|
+
if len(leftovers) > 0:
|
123
|
+
yield leftovers
|
124
|
+
async for chunk in stream:
|
125
|
+
yield chunk
|
126
|
+
|
127
|
+
await importer.import_kb(
|
128
|
+
context=context, kbid=kbid, stream=stream_with_leftovers(leftover_bytes, stream)
|
129
|
+
)
|
130
|
+
|
131
|
+
return NewImportedKbResponse(
|
132
|
+
kbid=kbid,
|
133
|
+
slug=slug,
|
134
|
+
)
|
135
|
+
|
136
|
+
|
77
137
|
@api.post(
|
78
138
|
f"/{KB_PREFIX}/{{kbid}}/import",
|
79
139
|
status_code=200,
|
80
|
-
|
140
|
+
summary="Start an import to a Knowledge Box",
|
81
141
|
tags=["Knowledge Boxes"],
|
82
142
|
response_model=CreateImportResponse,
|
83
143
|
)
|
@@ -85,40 +145,45 @@ async def start_kb_export_endpoint(request: Request, kbid: str):
|
|
85
145
|
@version(1)
|
86
146
|
async def start_kb_import_endpoint(request: Request, kbid: str):
|
87
147
|
context = get_app_context(request.app)
|
88
|
-
|
89
|
-
|
90
|
-
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
148
|
+
if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
|
149
|
+
return HTTPClientError(status_code=404, detail="Knowledge Box not found")
|
91
150
|
|
92
151
|
await maybe_back_pressure(request, kbid)
|
93
152
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
153
|
+
stream = stream_compatible_with_kb(kbid, request.stream())
|
154
|
+
try:
|
155
|
+
import_id = uuid4().hex
|
156
|
+
if in_standalone_mode():
|
157
|
+
# In standalone mode, we import directly from the request content stream.
|
158
|
+
# Note that we return an import_id simply to keep the API consistent with hosted nucliadb.
|
159
|
+
await importer.import_kb(
|
160
|
+
context=context,
|
161
|
+
kbid=kbid,
|
162
|
+
stream=stream,
|
163
|
+
)
|
164
|
+
return CreateImportResponse(import_id=import_id)
|
165
|
+
else:
|
166
|
+
import_size = await upload_import_to_blob_storage(
|
167
|
+
context=context,
|
168
|
+
stream=stream,
|
169
|
+
kbid=kbid,
|
170
|
+
import_id=import_id,
|
171
|
+
)
|
172
|
+
await start_import_task(context, kbid, import_id, import_size)
|
173
|
+
return CreateImportResponse(import_id=import_id)
|
174
|
+
except IncompatibleExport as exc:
|
175
|
+
return HTTPClientError(status_code=400, detail=str(exc))
|
114
176
|
|
115
177
|
|
116
178
|
async def upload_import_to_blob_storage(
|
117
|
-
context: ApplicationContext,
|
179
|
+
context: ApplicationContext,
|
180
|
+
stream: AsyncGenerator[bytes, None],
|
181
|
+
kbid: str,
|
182
|
+
import_id: str,
|
118
183
|
) -> int:
|
119
184
|
dm = ExportImportDataManager(context.kv_driver, context.blob_storage)
|
120
185
|
return await dm.upload_import(
|
121
|
-
import_bytes=
|
186
|
+
import_bytes=stream,
|
122
187
|
kbid=kbid,
|
123
188
|
import_id=import_id,
|
124
189
|
)
|
@@ -133,37 +198,25 @@ async def start_export_task(context: ApplicationContext, kbid: str, export_id: s
|
|
133
198
|
producer = await get_exports_producer(context)
|
134
199
|
msg = NatsTaskMessage(kbid=kbid, id=export_id)
|
135
200
|
seqid = await producer(msg) # type: ignore
|
136
|
-
logger.info(
|
137
|
-
f"Export task produced. seqid={seqid} kbid={kbid} export_id={export_id}"
|
138
|
-
)
|
201
|
+
logger.info(f"Export task produced. seqid={seqid} kbid={kbid} export_id={export_id}")
|
139
202
|
except Exception as e:
|
140
203
|
errors.capture_exception(e)
|
141
204
|
await dm.delete_metadata("export", metadata)
|
142
205
|
raise
|
143
206
|
|
144
207
|
|
145
|
-
async def start_import_task(
|
146
|
-
context: ApplicationContext, kbid: str, import_id: str, import_size: int
|
147
|
-
):
|
208
|
+
async def start_import_task(context: ApplicationContext, kbid: str, import_id: str, import_size: int):
|
148
209
|
dm = ExportImportDataManager(context.kv_driver, context.blob_storage)
|
149
210
|
metadata = ImportMetadata(kbid=kbid, id=import_id)
|
150
211
|
metadata.task.status = Status.SCHEDULED
|
151
|
-
metadata.total = import_size
|
212
|
+
metadata.total = import_size or 0
|
152
213
|
await dm.set_metadata("import", metadata)
|
153
214
|
try:
|
154
215
|
producer = await get_imports_producer(context)
|
155
216
|
msg = NatsTaskMessage(kbid=kbid, id=import_id)
|
156
217
|
seqid = await producer(msg) # type: ignore
|
157
|
-
logger.info(
|
158
|
-
f"Import task produced. seqid={seqid} kbid={kbid} import_id={import_id}"
|
159
|
-
)
|
218
|
+
logger.info(f"Import task produced. seqid={seqid} kbid={kbid} import_id={import_id}")
|
160
219
|
except Exception as e:
|
161
220
|
errors.capture_exception(e)
|
162
221
|
await dm.delete_metadata("import", metadata)
|
163
222
|
raise
|
164
|
-
|
165
|
-
|
166
|
-
class FastAPIExportStream(IteratorExportStream):
|
167
|
-
def __init__(self, request: Request):
|
168
|
-
iterator = request.stream().__aiter__()
|
169
|
-
super().__init__(iterator)
|