nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -0,0 +1,181 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
from typing import cast
|
22
|
+
|
23
|
+
from nucliadb.ingest.fields.base import Field
|
24
|
+
from nucliadb.ingest.fields.conversation import Conversation
|
25
|
+
from nucliadb.ingest.fields.file import File
|
26
|
+
from nucliadb.ingest.fields.link import Link
|
27
|
+
from nucliadb.ingest.orm.resource import Resource
|
28
|
+
from nucliadb_protos.resources_pb2 import (
|
29
|
+
ExtractedTextWrapper,
|
30
|
+
ExtractedVectorsWrapper,
|
31
|
+
FieldComputedMetadataWrapper,
|
32
|
+
FieldType,
|
33
|
+
LargeComputedMetadataWrapper,
|
34
|
+
)
|
35
|
+
from nucliadb_protos.writer_pb2 import BrokerMessage
|
36
|
+
|
37
|
+
|
38
|
+
async def generate_broker_message(resource: Resource) -> BrokerMessage:
|
39
|
+
"""Generate a full broker message from a resource. This means downloading
|
40
|
+
all the pointers minus the ones to external files that are not PB. Iterate
|
41
|
+
all resource fields and create a BrokerMessage
|
42
|
+
"""
|
43
|
+
builder = _BrokerMessageBuilder()
|
44
|
+
bm = await builder.build_from(resource)
|
45
|
+
return bm
|
46
|
+
|
47
|
+
|
48
|
+
class _BrokerMessageBuilder:
|
49
|
+
def __init__(self):
|
50
|
+
self.bm = BrokerMessage()
|
51
|
+
|
52
|
+
async def build_from(self, resource: Resource):
|
53
|
+
# clear the state and generate a new broker message
|
54
|
+
self.bm.Clear()
|
55
|
+
|
56
|
+
self.bm.kbid = resource.kb.kbid
|
57
|
+
self.bm.uuid = resource.uuid
|
58
|
+
basic = await resource.get_basic()
|
59
|
+
if basic is not None:
|
60
|
+
self.bm.basic.CopyFrom(basic)
|
61
|
+
|
62
|
+
self.bm.slug = self.bm.basic.slug
|
63
|
+
origin = await resource.get_origin()
|
64
|
+
if origin is not None:
|
65
|
+
self.bm.origin.CopyFrom(origin)
|
66
|
+
relations = await resource.get_relations()
|
67
|
+
if relations is not None:
|
68
|
+
for relation in relations.relations:
|
69
|
+
self.bm.relations.append(relation)
|
70
|
+
|
71
|
+
fields = await resource.get_fields(force=True)
|
72
|
+
for (type_id, field_id), field in fields.items():
|
73
|
+
# Value
|
74
|
+
await self.generate_field(type_id, field_id, field)
|
75
|
+
|
76
|
+
# Extracted text
|
77
|
+
await self.generate_extracted_text(type_id, field_id, field)
|
78
|
+
|
79
|
+
# Field Computed Metadata
|
80
|
+
await self.generate_field_computed_metadata(type_id, field_id, field)
|
81
|
+
|
82
|
+
if type_id == FieldType.FILE and isinstance(field, File):
|
83
|
+
field_extracted_data = await field.get_file_extracted_data()
|
84
|
+
if field_extracted_data is not None:
|
85
|
+
self.bm.file_extracted_data.append(field_extracted_data)
|
86
|
+
|
87
|
+
elif type_id == FieldType.LINK and isinstance(field, Link):
|
88
|
+
link_extracted_data = await field.get_link_extracted_data()
|
89
|
+
if link_extracted_data is not None:
|
90
|
+
self.bm.link_extracted_data.append(link_extracted_data)
|
91
|
+
|
92
|
+
# Field vectors
|
93
|
+
await self.generate_field_vectors(type_id, field_id, field)
|
94
|
+
|
95
|
+
# Large metadata
|
96
|
+
await self.generate_field_large_computed_metadata(type_id, field_id, field)
|
97
|
+
|
98
|
+
return self.bm
|
99
|
+
|
100
|
+
async def generate_field(
|
101
|
+
self,
|
102
|
+
type_id: FieldType.ValueType,
|
103
|
+
field_id: str,
|
104
|
+
field: Field,
|
105
|
+
):
|
106
|
+
# Used for exporting a field
|
107
|
+
if type_id == FieldType.TEXT:
|
108
|
+
value = await field.get_value()
|
109
|
+
self.bm.texts[field_id].CopyFrom(value)
|
110
|
+
elif type_id == FieldType.LINK:
|
111
|
+
value = await field.get_value()
|
112
|
+
self.bm.links[field_id].CopyFrom(value)
|
113
|
+
elif type_id == FieldType.FILE:
|
114
|
+
value = await field.get_value()
|
115
|
+
self.bm.files[field_id].CopyFrom(value)
|
116
|
+
elif type_id == FieldType.CONVERSATION:
|
117
|
+
field = cast(Conversation, field)
|
118
|
+
value = await field.get_full_conversation()
|
119
|
+
self.bm.conversations[field_id].CopyFrom(value)
|
120
|
+
|
121
|
+
async def generate_extracted_text(
|
122
|
+
self,
|
123
|
+
type_id: FieldType.ValueType,
|
124
|
+
field_id: str,
|
125
|
+
field: Field,
|
126
|
+
):
|
127
|
+
etw = ExtractedTextWrapper()
|
128
|
+
etw.field.field = field_id
|
129
|
+
etw.field.field_type = type_id
|
130
|
+
extracted_text = await field.get_extracted_text()
|
131
|
+
if extracted_text is not None:
|
132
|
+
etw.body.CopyFrom(extracted_text)
|
133
|
+
self.bm.extracted_text.append(etw)
|
134
|
+
|
135
|
+
async def generate_field_computed_metadata(
|
136
|
+
self,
|
137
|
+
type_id: FieldType.ValueType,
|
138
|
+
field_id: str,
|
139
|
+
field: Field,
|
140
|
+
):
|
141
|
+
fcmw = FieldComputedMetadataWrapper()
|
142
|
+
fcmw.field.field = field_id
|
143
|
+
fcmw.field.field_type = type_id
|
144
|
+
|
145
|
+
field_metadata = await field.get_field_metadata()
|
146
|
+
if field_metadata is not None:
|
147
|
+
fcmw.metadata.CopyFrom(field_metadata)
|
148
|
+
fcmw.field.field = field_id
|
149
|
+
fcmw.field.field_type = type_id
|
150
|
+
self.bm.field_metadata.append(fcmw)
|
151
|
+
# Make sure cloud files are removed for exporting
|
152
|
+
|
153
|
+
async def generate_field_vectors(
|
154
|
+
self,
|
155
|
+
type_id: FieldType.ValueType,
|
156
|
+
field_id: str,
|
157
|
+
field: Field,
|
158
|
+
):
|
159
|
+
vo = await field.get_vectors()
|
160
|
+
if vo is None:
|
161
|
+
return
|
162
|
+
evw = ExtractedVectorsWrapper()
|
163
|
+
evw.field.field = field_id
|
164
|
+
evw.field.field_type = type_id
|
165
|
+
evw.vectors.CopyFrom(vo)
|
166
|
+
self.bm.field_vectors.append(evw)
|
167
|
+
|
168
|
+
async def generate_field_large_computed_metadata(
|
169
|
+
self,
|
170
|
+
type_id: FieldType.ValueType,
|
171
|
+
field_id: str,
|
172
|
+
field: Field,
|
173
|
+
):
|
174
|
+
lcm = await field.get_large_field_metadata()
|
175
|
+
if lcm is None:
|
176
|
+
return
|
177
|
+
lcmw = LargeComputedMetadataWrapper()
|
178
|
+
lcmw.field.field = field_id
|
179
|
+
lcmw.field.field_type = type_id
|
180
|
+
lcmw.real.CopyFrom(lcm)
|
181
|
+
self.bm.field_large_metadata.append(lcmw)
|
nucliadb/ingest/orm/entities.py
CHANGED
@@ -21,24 +21,6 @@
|
|
21
21
|
import asyncio
|
22
22
|
from typing import AsyncGenerator, Optional
|
23
23
|
|
24
|
-
from nucliadb_protos.knowledgebox_pb2 import (
|
25
|
-
DeletedEntitiesGroups,
|
26
|
-
EntitiesGroup,
|
27
|
-
EntitiesGroupSummary,
|
28
|
-
Entity,
|
29
|
-
)
|
30
|
-
from nucliadb_protos.nodereader_pb2 import (
|
31
|
-
Faceted,
|
32
|
-
RelationNodeFilter,
|
33
|
-
RelationPrefixSearchRequest,
|
34
|
-
RelationSearchRequest,
|
35
|
-
RelationSearchResponse,
|
36
|
-
SearchRequest,
|
37
|
-
SearchResponse,
|
38
|
-
)
|
39
|
-
from nucliadb_protos.utils_pb2 import RelationNode
|
40
|
-
from nucliadb_protos.writer_pb2 import GetEntitiesResponse
|
41
|
-
|
42
24
|
from nucliadb.common import datamanagers
|
43
25
|
from nucliadb.common.cluster.base import AbstractIndexNode
|
44
26
|
from nucliadb.common.cluster.exceptions import (
|
@@ -55,7 +37,25 @@ from nucliadb.common.datamanagers.entities import (
|
|
55
37
|
from nucliadb.common.maindb.driver import Transaction
|
56
38
|
from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
|
57
39
|
from nucliadb.ingest.settings import settings
|
40
|
+
from nucliadb_protos.knowledgebox_pb2 import (
|
41
|
+
DeletedEntitiesGroups,
|
42
|
+
EntitiesGroup,
|
43
|
+
EntitiesGroupSummary,
|
44
|
+
Entity,
|
45
|
+
)
|
46
|
+
from nucliadb_protos.nodereader_pb2 import (
|
47
|
+
Faceted,
|
48
|
+
RelationNodeFilter,
|
49
|
+
RelationPrefixSearchRequest,
|
50
|
+
RelationSearchResponse,
|
51
|
+
SearchRequest,
|
52
|
+
SearchResponse,
|
53
|
+
)
|
54
|
+
from nucliadb_protos.utils_pb2 import RelationNode
|
55
|
+
from nucliadb_protos.writer_pb2 import GetEntitiesResponse
|
58
56
|
from nucliadb_telemetry import errors
|
57
|
+
from nucliadb_utils import const
|
58
|
+
from nucliadb_utils.utilities import has_feature
|
59
59
|
|
60
60
|
from .exceptions import EntityManagementException
|
61
61
|
|
@@ -199,37 +199,33 @@ class EntitiesManager:
|
|
199
199
|
elif stored is not None and indexed is not None:
|
200
200
|
entities_group = self.merge_entities_groups(indexed, stored)
|
201
201
|
else:
|
202
|
-
entities_group = stored or indexed
|
202
|
+
entities_group = stored or indexed
|
203
203
|
return entities_group
|
204
204
|
|
205
205
|
async def get_stored_entities_group(self, group: str) -> Optional[EntitiesGroup]:
|
206
|
-
return await datamanagers.entities.get_entities_group(
|
207
|
-
self.txn, kbid=self.kbid, group=group
|
208
|
-
)
|
206
|
+
return await datamanagers.entities.get_entities_group(self.txn, kbid=self.kbid, group=group)
|
209
207
|
|
210
208
|
async def get_indexed_entities_group(self, group: str) -> Optional[EntitiesGroup]:
|
211
209
|
shard_manager = get_shard_manager()
|
212
210
|
|
213
|
-
async def do_entities_search(
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
shard_id=shard_id,
|
218
|
-
prefix=RelationPrefixSearchRequest(
|
211
|
+
async def do_entities_search(node: AbstractIndexNode, shard_id: str) -> RelationSearchResponse:
|
212
|
+
request = SearchRequest(
|
213
|
+
shard=shard_id,
|
214
|
+
relation_prefix=RelationPrefixSearchRequest(
|
219
215
|
prefix="",
|
220
216
|
node_filters=[
|
221
|
-
RelationNodeFilter(
|
222
|
-
node_type=RelationNode.NodeType.ENTITY, node_subtype=group
|
223
|
-
)
|
217
|
+
RelationNodeFilter(node_type=RelationNode.NodeType.ENTITY, node_subtype=group)
|
224
218
|
],
|
225
219
|
),
|
226
220
|
)
|
227
|
-
|
221
|
+
response = await node.reader.Search(request) # type: ignore
|
222
|
+
return response.relation
|
228
223
|
|
229
224
|
results = await shard_manager.apply_for_all_shards(
|
230
225
|
self.kbid,
|
231
226
|
do_entities_search,
|
232
227
|
settings.relation_search_timeout,
|
228
|
+
use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": self.kbid}),
|
233
229
|
use_read_replica_nodes=self.use_read_replica_nodes,
|
234
230
|
)
|
235
231
|
for result in results:
|
@@ -239,9 +235,7 @@ class EntitiesManager:
|
|
239
235
|
|
240
236
|
entities = {}
|
241
237
|
for result in results:
|
242
|
-
entities.update(
|
243
|
-
{node.value: Entity(value=node.value) for node in result.prefix.nodes}
|
244
|
-
)
|
238
|
+
entities.update({node.value: Entity(value=node.value) for node in result.prefix.nodes})
|
245
239
|
|
246
240
|
if not entities:
|
247
241
|
return None
|
@@ -292,7 +286,7 @@ class EntitiesManager:
|
|
292
286
|
|
293
287
|
# stored groups
|
294
288
|
entities_key = KB_ENTITIES.format(kbid=self.kbid)
|
295
|
-
async for key in self.txn.keys(entities_key
|
289
|
+
async for key in self.txn.keys(entities_key):
|
296
290
|
group = key.split("/")[-1]
|
297
291
|
if exclude_deleted and group in deleted_groups:
|
298
292
|
continue
|
@@ -312,9 +306,7 @@ class EntitiesManager:
|
|
312
306
|
) -> set[str]:
|
313
307
|
shard_manager = get_shard_manager()
|
314
308
|
|
315
|
-
async def query_indexed_entities_group_names(
|
316
|
-
node: AbstractIndexNode, shard_id: str
|
317
|
-
) -> set[str]:
|
309
|
+
async def query_indexed_entities_group_names(node: AbstractIndexNode, shard_id: str) -> set[str]:
|
318
310
|
request = SearchRequest(
|
319
311
|
shard=shard_id,
|
320
312
|
result_per_page=0,
|
@@ -335,6 +327,7 @@ class EntitiesManager:
|
|
335
327
|
self.kbid,
|
336
328
|
query_indexed_entities_group_names,
|
337
329
|
settings.relation_types_timeout,
|
330
|
+
use_nidx=has_feature(const.Features.NIDX_READS, context={"kbid": self.kbid}),
|
338
331
|
use_read_replica_nodes=self.use_read_replica_nodes,
|
339
332
|
)
|
340
333
|
for result in results:
|
@@ -347,9 +340,7 @@ class EntitiesManager:
|
|
347
340
|
return set.union(*results)
|
348
341
|
|
349
342
|
async def store_entities_group(self, group: str, eg: EntitiesGroup):
|
350
|
-
meta_cache = await datamanagers.entities.get_entities_meta_cache(
|
351
|
-
self.txn, kbid=self.kbid
|
352
|
-
)
|
343
|
+
meta_cache = await datamanagers.entities.get_entities_meta_cache(self.txn, kbid=self.kbid)
|
353
344
|
duplicates = {}
|
354
345
|
deleted = []
|
355
346
|
duplicate_count = 0
|
@@ -373,9 +364,7 @@ class EntitiesManager:
|
|
373
364
|
|
374
365
|
meta_cache.set_duplicates(group, duplicates)
|
375
366
|
meta_cache.set_deleted(group, deleted)
|
376
|
-
await datamanagers.entities.set_entities_meta_cache(
|
377
|
-
self.txn, kbid=self.kbid, cache=meta_cache
|
378
|
-
)
|
367
|
+
await datamanagers.entities.set_entities_meta_cache(self.txn, kbid=self.kbid, cache=meta_cache)
|
379
368
|
|
380
369
|
await datamanagers.entities.set_entities_group(
|
381
370
|
self.txn, kbid=self.kbid, group_id=group, entities=eg
|
@@ -392,14 +381,10 @@ class EntitiesManager:
|
|
392
381
|
await self.txn.delete(entities_key)
|
393
382
|
|
394
383
|
async def mark_entities_group_as_deleted(self, group: str):
|
395
|
-
await datamanagers.entities.mark_group_as_deleted(
|
396
|
-
self.txn, kbid=self.kbid, group=group
|
397
|
-
)
|
384
|
+
await datamanagers.entities.mark_group_as_deleted(self.txn, kbid=self.kbid, group=group)
|
398
385
|
|
399
386
|
async def unmark_entities_group_as_deleted(self, group: str):
|
400
|
-
await datamanagers.entities.unmark_group_as_deleted(
|
401
|
-
self.txn, kbid=self.kbid, group=group
|
402
|
-
)
|
387
|
+
await datamanagers.entities.unmark_group_as_deleted(self.txn, kbid=self.kbid, group=group)
|
403
388
|
|
404
389
|
@staticmethod
|
405
390
|
def merge_entities_groups(indexed: EntitiesGroup, stored: EntitiesGroup):
|
@@ -23,6 +23,10 @@ class NotFound(Exception):
|
|
23
23
|
pass
|
24
24
|
|
25
25
|
|
26
|
+
class KnowledgeBoxCreationError(Exception):
|
27
|
+
pass
|
28
|
+
|
29
|
+
|
26
30
|
class KnowledgeBoxConflict(Exception):
|
27
31
|
pass
|
28
32
|
|
@@ -48,3 +52,11 @@ class ResourceNotIndexable(Exception):
|
|
48
52
|
|
49
53
|
class EntityManagementException(Exception):
|
50
54
|
pass
|
55
|
+
|
56
|
+
|
57
|
+
class VectorSetConflict(Exception):
|
58
|
+
pass
|
59
|
+
|
60
|
+
|
61
|
+
class InvalidBrokerMessage(ValueError):
|
62
|
+
pass
|