nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -19,21 +19,11 @@
|
|
19
19
|
#
|
20
20
|
from fastapi import HTTPException, Response
|
21
21
|
from fastapi_versioning import version
|
22
|
-
from nucliadb_protos.knowledgebox_pb2 import Label as LabelPB
|
23
|
-
from nucliadb_protos.knowledgebox_pb2 import LabelSet as LabelSetPB
|
24
|
-
from nucliadb_protos.writer_pb2 import (
|
25
|
-
DelEntitiesRequest,
|
26
|
-
DelLabelsRequest,
|
27
|
-
NewEntitiesGroupRequest,
|
28
|
-
NewEntitiesGroupResponse,
|
29
|
-
OpStatusWriter,
|
30
|
-
SetLabelsRequest,
|
31
|
-
UpdateEntitiesGroupRequest,
|
32
|
-
UpdateEntitiesGroupResponse,
|
33
|
-
)
|
34
22
|
from starlette.requests import Request
|
35
23
|
|
36
24
|
from nucliadb.common import datamanagers
|
25
|
+
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
26
|
+
from nucliadb.common.models_utils import to_proto
|
37
27
|
from nucliadb.models.responses import (
|
38
28
|
HTTPConflict,
|
39
29
|
HTTPInternalServerError,
|
@@ -47,6 +37,17 @@ from nucliadb_models.entities import (
|
|
47
37
|
from nucliadb_models.labels import LabelSet
|
48
38
|
from nucliadb_models.resource import NucliaDBRoles
|
49
39
|
from nucliadb_models.synonyms import KnowledgeBoxSynonyms
|
40
|
+
from nucliadb_protos import writer_pb2
|
41
|
+
from nucliadb_protos.knowledgebox_pb2 import Label as LabelPB
|
42
|
+
from nucliadb_protos.knowledgebox_pb2 import LabelSet as LabelSetPB
|
43
|
+
from nucliadb_protos.writer_pb2 import (
|
44
|
+
DelEntitiesRequest,
|
45
|
+
NewEntitiesGroupRequest,
|
46
|
+
NewEntitiesGroupResponse,
|
47
|
+
OpStatusWriter,
|
48
|
+
UpdateEntitiesGroupRequest,
|
49
|
+
UpdateEntitiesGroupResponse,
|
50
|
+
)
|
50
51
|
from nucliadb_utils.authentication import requires
|
51
52
|
from nucliadb_utils.utilities import get_ingest
|
52
53
|
|
@@ -60,9 +61,7 @@ from nucliadb_utils.utilities import get_ingest
|
|
60
61
|
)
|
61
62
|
@requires(NucliaDBRoles.WRITER)
|
62
63
|
@version(1)
|
63
|
-
async def create_entities_group(
|
64
|
-
request: Request, kbid: str, item: CreateEntitiesGroupPayload
|
65
|
-
):
|
64
|
+
async def create_entities_group(request: Request, kbid: str, item: CreateEntitiesGroupPayload):
|
66
65
|
ingest = get_ingest()
|
67
66
|
|
68
67
|
pbrequest: NewEntitiesGroupRequest = NewEntitiesGroupRequest()
|
@@ -91,9 +90,7 @@ async def create_entities_group(
|
|
91
90
|
detail=f"Entities group {item.group} already exists in this Knowledge box",
|
92
91
|
)
|
93
92
|
elif status.status == NewEntitiesGroupResponse.Status.ERROR:
|
94
|
-
return HTTPInternalServerError(
|
95
|
-
detail="Error on settings entities on a Knowledge box"
|
96
|
-
)
|
93
|
+
return HTTPInternalServerError(detail="Error on settings entities on a Knowledge box")
|
97
94
|
|
98
95
|
|
99
96
|
@api.patch(
|
@@ -138,9 +135,7 @@ async def update_entities_group(
|
|
138
135
|
elif status.status == UpdateEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND:
|
139
136
|
return HTTPNotFound(detail="Entities group does not exist")
|
140
137
|
elif status.status == UpdateEntitiesGroupResponse.Status.ERROR:
|
141
|
-
return HTTPInternalServerError(
|
142
|
-
detail="Error on settings entities on a Knowledge box"
|
143
|
-
)
|
138
|
+
return HTTPInternalServerError(detail="Error on settings entities on a Knowledge box")
|
144
139
|
|
145
140
|
|
146
141
|
@api.delete(
|
@@ -164,9 +159,7 @@ async def delete_entities(request: Request, kbid: str, group: str):
|
|
164
159
|
elif status.status == OpStatusWriter.Status.NOTFOUND:
|
165
160
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
166
161
|
elif status.status == OpStatusWriter.Status.ERROR:
|
167
|
-
raise HTTPException(
|
168
|
-
status_code=500, detail="Error on deleting entities from a Knowledge box"
|
169
|
-
)
|
162
|
+
raise HTTPException(status_code=500, detail="Error on deleting entities from a Knowledge box")
|
170
163
|
|
171
164
|
return Response(status_code=204)
|
172
165
|
|
@@ -180,21 +173,25 @@ async def delete_entities(request: Request, kbid: str, group: str):
|
|
180
173
|
)
|
181
174
|
@requires(NucliaDBRoles.WRITER)
|
182
175
|
@version(1)
|
183
|
-
async def
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
if item.title:
|
189
|
-
pbrequest.labelset.title = item.title
|
176
|
+
async def set_labelset_endpoint(request: Request, kbid: str, labelset: str, item: LabelSet):
|
177
|
+
try:
|
178
|
+
await set_labelset(kbid, labelset, item)
|
179
|
+
except KnowledgeBoxNotFound:
|
180
|
+
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
190
181
|
|
191
|
-
if item.color:
|
192
|
-
pbrequest.labelset.color = item.color
|
193
182
|
|
194
|
-
|
183
|
+
async def set_labelset(kbid: str, labelset_id: str, item: LabelSet):
|
184
|
+
kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
|
185
|
+
if not kb_exists:
|
186
|
+
raise KnowledgeBoxNotFound()
|
187
|
+
labelset = writer_pb2.LabelSet()
|
188
|
+
if item.title is not None:
|
189
|
+
labelset.title = item.title
|
190
|
+
if item.color is not None:
|
191
|
+
labelset.color = item.color
|
192
|
+
labelset.multiple = item.multiple
|
195
193
|
for kind in item.kind:
|
196
|
-
|
197
|
-
|
194
|
+
labelset.kind.append(LabelSetPB.LabelSetKind.Value(kind))
|
198
195
|
for label_input in item.labels:
|
199
196
|
lbl = LabelPB()
|
200
197
|
if label_input.uri:
|
@@ -205,16 +202,8 @@ async def set_labels(request: Request, kbid: str, labelset: str, item: LabelSet)
|
|
205
202
|
lbl.related = label_input.related
|
206
203
|
if label_input.title:
|
207
204
|
lbl.title = label_input.title
|
208
|
-
|
209
|
-
|
210
|
-
if status.status == OpStatusWriter.Status.OK:
|
211
|
-
return None
|
212
|
-
elif status.status == OpStatusWriter.Status.NOTFOUND:
|
213
|
-
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
214
|
-
elif status.status == OpStatusWriter.Status.ERROR:
|
215
|
-
raise HTTPException(
|
216
|
-
status_code=500, detail="Error on settings labels on a Knowledge box"
|
217
|
-
)
|
205
|
+
labelset.labels.append(lbl)
|
206
|
+
await datamanagers.atomic.labelset.set(kbid=kbid, labelset_id=labelset_id, labelset=labelset)
|
218
207
|
|
219
208
|
|
220
209
|
@api.delete(
|
@@ -226,21 +215,18 @@ async def set_labels(request: Request, kbid: str, labelset: str, item: LabelSet)
|
|
226
215
|
)
|
227
216
|
@requires(NucliaDBRoles.WRITER)
|
228
217
|
@version(1)
|
229
|
-
async def
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
pbrequest.id = labelset
|
234
|
-
status: OpStatusWriter = await ingest.DelLabels(pbrequest) # type: ignore
|
235
|
-
if status.status == OpStatusWriter.Status.OK:
|
236
|
-
return None
|
237
|
-
elif status.status == OpStatusWriter.Status.NOTFOUND:
|
218
|
+
async def delete_labelset_endpoint(request: Request, kbid: str, labelset: str):
|
219
|
+
try:
|
220
|
+
await delete_labelset(kbid, labelset)
|
221
|
+
except KnowledgeBoxNotFound:
|
238
222
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
223
|
+
|
224
|
+
|
225
|
+
async def delete_labelset(kbid: str, labelset_id: str):
|
226
|
+
kb_exists = await datamanagers.atomic.kb.exists_kb(kbid=kbid)
|
227
|
+
if not kb_exists:
|
228
|
+
raise KnowledgeBoxNotFound()
|
229
|
+
await datamanagers.atomic.labelset.delete(kbid=kbid, labelset_id=labelset_id)
|
244
230
|
|
245
231
|
|
246
232
|
@api.put(
|
@@ -253,15 +239,10 @@ async def delete_labels(request: Request, kbid: str, labelset: str):
|
|
253
239
|
@requires(NucliaDBRoles.WRITER)
|
254
240
|
@version(1)
|
255
241
|
async def set_custom_synonyms(request: Request, kbid: str, item: KnowledgeBoxSynonyms):
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
261
|
-
|
262
|
-
await datamanagers.synonyms.set(txn, kbid=kbid, synonyms=synonyms)
|
263
|
-
await txn.commit()
|
264
|
-
|
242
|
+
if not await datamanagers.atomic.kb.exists_kb(kbid=kbid):
|
243
|
+
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
244
|
+
synonyms = to_proto.kb_synonyms(item)
|
245
|
+
await datamanagers.atomic.synonyms.set(kbid=kbid, synonyms=synonyms)
|
265
246
|
return Response(status_code=204)
|
266
247
|
|
267
248
|
|
@@ -276,7 +257,7 @@ async def set_custom_synonyms(request: Request, kbid: str, item: KnowledgeBoxSyn
|
|
276
257
|
@version(1)
|
277
258
|
async def delete_custom_synonyms(request: Request, kbid: str):
|
278
259
|
async with datamanagers.with_transaction() as txn:
|
279
|
-
if not datamanagers.kb.exists_kb(txn, kbid=kbid):
|
260
|
+
if not await datamanagers.kb.exists_kb(txn, kbid=kbid):
|
280
261
|
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
281
262
|
|
282
263
|
await datamanagers.synonyms.delete(txn, kbid=kbid)
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
import contextlib
|
21
|
+
|
22
|
+
from fastapi import HTTPException
|
23
|
+
|
24
|
+
from nucliadb.common import datamanagers, locking
|
25
|
+
|
26
|
+
|
27
|
+
@contextlib.asynccontextmanager
|
28
|
+
async def noop_context_manager():
|
29
|
+
"""
|
30
|
+
This is used for requests where slug is not set by the user and we don't need to
|
31
|
+
care about uniqueness
|
32
|
+
"""
|
33
|
+
yield
|
34
|
+
|
35
|
+
|
36
|
+
@contextlib.asynccontextmanager
|
37
|
+
async def ensure_slug_uniqueness(kbid: str, slug: str):
|
38
|
+
"""
|
39
|
+
Make sure slug is unique by:
|
40
|
+
- First check if the slug is already taken by another existing resource
|
41
|
+
- Otherwise, use the transaction lock to prevent from multiple concurrent
|
42
|
+
create resource requests having the same slug.
|
43
|
+
"""
|
44
|
+
try:
|
45
|
+
async with locking.distributed_lock(
|
46
|
+
key=locking.RESOURCE_CREATION_SLUG_LOCK.format(kbid=kbid, resource_slug=slug),
|
47
|
+
# We don't want to wait here. If the lock exists, just raise exception
|
48
|
+
lock_timeout=0.0,
|
49
|
+
# Matches aprox the max amount of time that the ingest can take
|
50
|
+
# to ingest a broker message from the writer.
|
51
|
+
expire_timeout=60.0,
|
52
|
+
# We don't want to refresh it here
|
53
|
+
refresh_timeout=120.0,
|
54
|
+
):
|
55
|
+
if await datamanagers.atomic.resources.slug_exists(kbid=kbid, slug=slug):
|
56
|
+
raise HTTPException(status_code=409, detail=f"Resource slug {slug} already exists")
|
57
|
+
yield
|
58
|
+
except locking.ResourceLocked:
|
59
|
+
raise HTTPException(
|
60
|
+
status_code=409, detail=f"Another resource with the same {slug} is already being ingested"
|
61
|
+
)
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
import backoff
|
22
|
+
from fastapi import HTTPException
|
23
|
+
|
24
|
+
from nucliadb_protos.writer_pb2 import BrokerMessage
|
25
|
+
from nucliadb_utils import const
|
26
|
+
from nucliadb_utils.transaction import (
|
27
|
+
MaxTransactionSizeExceededError,
|
28
|
+
StreamingServerError,
|
29
|
+
TransactionCommitTimeoutError,
|
30
|
+
)
|
31
|
+
from nucliadb_utils.utilities import get_transaction_utility
|
32
|
+
|
33
|
+
|
34
|
+
async def commit(writer: BrokerMessage, partition: int, wait: bool = True) -> None:
|
35
|
+
try:
|
36
|
+
await transaction_commit(writer, partition, wait)
|
37
|
+
except TransactionCommitTimeoutError:
|
38
|
+
raise HTTPException(
|
39
|
+
status_code=501,
|
40
|
+
detail="Inconsistent write. This resource will not be processed and may not be stored.",
|
41
|
+
)
|
42
|
+
except MaxTransactionSizeExceededError:
|
43
|
+
raise HTTPException(
|
44
|
+
status_code=413,
|
45
|
+
detail="Transaction size exceeded. The resource is too large to be stored. Consider using file fields or split into multiple requests.",
|
46
|
+
)
|
47
|
+
except StreamingServerError:
|
48
|
+
raise HTTPException(
|
49
|
+
status_code=504,
|
50
|
+
detail="Timeout waiting for the streaming server to respond. Please back off and retry.",
|
51
|
+
)
|
52
|
+
|
53
|
+
|
54
|
+
@backoff.on_exception(
|
55
|
+
backoff.expo,
|
56
|
+
(StreamingServerError,),
|
57
|
+
jitter=backoff.random_jitter,
|
58
|
+
max_tries=3,
|
59
|
+
)
|
60
|
+
async def transaction_commit(writer: BrokerMessage, partition: int, wait: bool = True):
|
61
|
+
transaction = get_transaction_utility()
|
62
|
+
await transaction.commit(
|
63
|
+
writer,
|
64
|
+
partition,
|
65
|
+
wait=wait,
|
66
|
+
target_subject=const.Streams.INGEST.subject.format(partition=partition),
|
67
|
+
)
|