nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
nucliadb/writer/lifecycle.py
CHANGED
@@ -17,13 +17,20 @@
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
|
-
from
|
20
|
+
from contextlib import asynccontextmanager
|
21
|
+
|
22
|
+
from fastapi import FastAPI
|
23
|
+
|
24
|
+
from nucliadb.common.context.fastapi import inject_app_context
|
25
|
+
from nucliadb.ingest.processing import start_processing_engine, stop_processing_engine
|
21
26
|
from nucliadb.ingest.utils import start_ingest, stop_ingest
|
22
27
|
from nucliadb.writer import SERVICE_NAME
|
28
|
+
from nucliadb.writer.back_pressure import start_materializer, stop_materializer
|
29
|
+
from nucliadb.writer.settings import back_pressure_settings
|
23
30
|
from nucliadb.writer.tus import finalize as storage_finalize
|
24
31
|
from nucliadb.writer.tus import initialize as storage_initialize
|
25
|
-
from nucliadb.writer.utilities import get_processing
|
26
32
|
from nucliadb_telemetry.utils import clean_telemetry, setup_telemetry
|
33
|
+
from nucliadb_utils.settings import is_onprem_nucliadb
|
27
34
|
from nucliadb_utils.utilities import (
|
28
35
|
finalize_utilities,
|
29
36
|
start_partitioning_utility,
|
@@ -32,29 +39,28 @@ from nucliadb_utils.utilities import (
|
|
32
39
|
)
|
33
40
|
|
34
41
|
|
35
|
-
|
36
|
-
|
42
|
+
@asynccontextmanager
|
43
|
+
async def lifespan(app: FastAPI):
|
44
|
+
back_pressure_enabled = back_pressure_settings.enabled and not is_onprem_nucliadb()
|
37
45
|
|
46
|
+
await setup_telemetry(SERVICE_NAME)
|
38
47
|
await start_ingest(SERVICE_NAME)
|
39
|
-
|
40
48
|
await start_processing_engine()
|
41
|
-
|
42
49
|
start_partitioning_utility()
|
43
|
-
|
44
50
|
await start_transaction_utility(SERVICE_NAME)
|
45
51
|
await storage_initialize()
|
46
52
|
|
53
|
+
# Inject application context into the fastapi app's state
|
54
|
+
async with inject_app_context(app) as context:
|
55
|
+
if back_pressure_enabled:
|
56
|
+
await start_materializer(context)
|
57
|
+
yield
|
47
58
|
|
48
|
-
|
59
|
+
if back_pressure_enabled:
|
60
|
+
await stop_materializer()
|
49
61
|
await stop_transaction_utility()
|
50
|
-
|
51
62
|
await stop_ingest()
|
52
|
-
|
53
|
-
if processing is not None:
|
54
|
-
await processing.finalize()
|
55
|
-
|
63
|
+
await stop_processing_engine()
|
56
64
|
await storage_finalize()
|
57
|
-
|
58
65
|
await clean_telemetry(SERVICE_NAME)
|
59
|
-
|
60
66
|
await finalize_utilities()
|
nucliadb/writer/py.typed
ADDED
File without changes
|
@@ -19,9 +19,10 @@
|
|
19
19
|
#
|
20
20
|
from datetime import datetime
|
21
21
|
|
22
|
-
from nucliadb_protos.writer_pb2 import Audit
|
23
22
|
from starlette.requests import Request
|
24
23
|
|
24
|
+
from nucliadb_protos.writer_pb2 import Audit
|
25
|
+
|
25
26
|
|
26
27
|
def parse_audit(audit: Audit, request: Request):
|
27
28
|
audit.user = request.headers.get("X-NUCLIADB-USER", "")
|
@@ -18,8 +18,31 @@
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
19
|
#
|
20
20
|
from datetime import datetime
|
21
|
+
from typing import Optional
|
21
22
|
|
22
23
|
from fastapi import HTTPException
|
24
|
+
|
25
|
+
from nucliadb.common.models_utils import to_proto
|
26
|
+
from nucliadb.common.models_utils.from_proto import (
|
27
|
+
RelationNodeTypeMap,
|
28
|
+
RelationTypeMap,
|
29
|
+
)
|
30
|
+
from nucliadb.ingest.orm.utils import set_title
|
31
|
+
from nucliadb.ingest.processing import PushPayload
|
32
|
+
from nucliadb_models.content_types import GENERIC_MIME_TYPE
|
33
|
+
from nucliadb_models.file import FileField
|
34
|
+
from nucliadb_models.link import LinkField
|
35
|
+
from nucliadb_models.metadata import (
|
36
|
+
ParagraphAnnotation,
|
37
|
+
QuestionAnswerAnnotation,
|
38
|
+
)
|
39
|
+
from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE, PushTextFormat, Text
|
40
|
+
from nucliadb_models.writer import (
|
41
|
+
ComingResourcePayload,
|
42
|
+
CreateResourcePayload,
|
43
|
+
UpdateResourcePayload,
|
44
|
+
)
|
45
|
+
from nucliadb_protos.knowledgebox_pb2 import KnowledgeBoxConfig
|
23
46
|
from nucliadb_protos.resources_pb2 import (
|
24
47
|
Answers,
|
25
48
|
Basic,
|
@@ -30,38 +53,19 @@ from nucliadb_protos.resources_pb2 import (
|
|
30
53
|
Metadata,
|
31
54
|
PageSelections,
|
32
55
|
Paragraph,
|
56
|
+
TokenSplit,
|
57
|
+
UserFieldMetadata,
|
58
|
+
VisualSelection,
|
33
59
|
)
|
34
60
|
from nucliadb_protos.resources_pb2 import ParagraphAnnotation as PBParagraphAnnotation
|
35
61
|
from nucliadb_protos.resources_pb2 import (
|
36
62
|
QuestionAnswerAnnotation as PBQuestionAnswerAnnotation,
|
37
63
|
)
|
38
|
-
from nucliadb_protos.resources_pb2 import TokenSplit, UserFieldMetadata, VisualSelection
|
39
64
|
from nucliadb_protos.utils_pb2 import Relation, RelationNode
|
40
65
|
from nucliadb_protos.writer_pb2 import BrokerMessage
|
41
66
|
|
42
|
-
from nucliadb.ingest.orm.utils import set_title
|
43
|
-
from nucliadb.ingest.processing import PushPayload
|
44
|
-
from nucliadb_models.common import FIELD_TYPES_MAP_REVERSE
|
45
|
-
from nucliadb_models.file import FileField
|
46
|
-
from nucliadb_models.link import LinkField
|
47
|
-
from nucliadb_models.metadata import (
|
48
|
-
ParagraphAnnotation,
|
49
|
-
QuestionAnswerAnnotation,
|
50
|
-
RelationNodeTypeMap,
|
51
|
-
RelationTypeMap,
|
52
|
-
)
|
53
|
-
from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE, PushTextFormat, Text
|
54
|
-
from nucliadb_models.writer import (
|
55
|
-
GENERIC_MIME_TYPE,
|
56
|
-
ComingResourcePayload,
|
57
|
-
CreateResourcePayload,
|
58
|
-
UpdateResourcePayload,
|
59
|
-
)
|
60
|
-
|
61
67
|
|
62
|
-
def parse_basic_modify(
|
63
|
-
bm: BrokerMessage, item: ComingResourcePayload, toprocess: PushPayload
|
64
|
-
):
|
68
|
+
def parse_basic_modify(bm: BrokerMessage, item: ComingResourcePayload, toprocess: PushPayload):
|
65
69
|
bm.basic.modified.FromDatetime(datetime.now())
|
66
70
|
if item.title:
|
67
71
|
set_title(bm, toprocess, item.title)
|
@@ -83,14 +87,9 @@ def parse_basic_modify(
|
|
83
87
|
bm.basic.metadata.useful = True
|
84
88
|
bm.basic.metadata.status = Metadata.Status.PENDING
|
85
89
|
|
86
|
-
toprocess.genericfield["summary"] = Text(
|
87
|
-
body=item.summary, format=PushTextFormat.PLAIN
|
88
|
-
)
|
90
|
+
toprocess.genericfield["summary"] = Text(body=item.summary, format=PushTextFormat.PLAIN)
|
89
91
|
if item.thumbnail:
|
90
92
|
bm.basic.thumbnail = item.thumbnail
|
91
|
-
if item.layout:
|
92
|
-
bm.basic.layout = item.layout
|
93
|
-
|
94
93
|
if item.metadata is not None:
|
95
94
|
bm.basic.metadata.metadata.update(item.metadata.metadata)
|
96
95
|
if item.metadata.language:
|
@@ -147,9 +146,8 @@ def parse_basic_modify(
|
|
147
146
|
userfieldmetadata.question_answers.append(qa_annotation_pb)
|
148
147
|
|
149
148
|
userfieldmetadata.field.field = fieldmetadata.field.field
|
150
|
-
|
151
|
-
|
152
|
-
]
|
149
|
+
|
150
|
+
userfieldmetadata.field.field_type = to_proto.field_type(fieldmetadata.field.field_type)
|
153
151
|
|
154
152
|
bm.basic.fieldmetadata.append(userfieldmetadata)
|
155
153
|
|
@@ -167,9 +165,7 @@ def parse_basic_modify(
|
|
167
165
|
]
|
168
166
|
)
|
169
167
|
|
170
|
-
relation_node_resource = RelationNode(
|
171
|
-
value=bm.uuid, ntype=RelationNode.NodeType.RESOURCE
|
172
|
-
)
|
168
|
+
relation_node_resource = RelationNode(value=bm.uuid, ntype=RelationNode.NodeType.RESOURCE)
|
173
169
|
relations = []
|
174
170
|
for relation in item.usermetadata.relations:
|
175
171
|
if relation.from_ is None:
|
@@ -205,8 +201,16 @@ def parse_basic_modify(
|
|
205
201
|
unique_groups = list(set(item.security.access_groups))
|
206
202
|
bm.security.access_groups.extend(unique_groups)
|
207
203
|
|
204
|
+
if item.hidden is not None:
|
205
|
+
bm.basic.hidden = item.hidden
|
206
|
+
|
208
207
|
|
209
|
-
def
|
208
|
+
def parse_basic_creation(
|
209
|
+
bm: BrokerMessage,
|
210
|
+
item: CreateResourcePayload,
|
211
|
+
toprocess: PushPayload,
|
212
|
+
kb_config: Optional[KnowledgeBoxConfig],
|
213
|
+
):
|
210
214
|
bm.basic.created.FromDatetime(datetime.now())
|
211
215
|
|
212
216
|
if item.title is None:
|
@@ -215,6 +219,10 @@ def parse_basic(bm: BrokerMessage, item: CreateResourcePayload, toprocess: PushP
|
|
215
219
|
|
216
220
|
parse_basic_modify(bm, item, toprocess)
|
217
221
|
|
222
|
+
if item.hidden is None:
|
223
|
+
if kb_config and kb_config.hidden_resources_hide_on_creation:
|
224
|
+
bm.basic.hidden = True
|
225
|
+
|
218
226
|
|
219
227
|
def set_status(basic: Basic, item: CreateResourcePayload):
|
220
228
|
basic.metadata.status = Metadata.Status.PENDING
|
@@ -227,15 +235,11 @@ def set_status_modify(basic: Basic, item: UpdateResourcePayload):
|
|
227
235
|
def validate_classifications(paragraph: ParagraphAnnotation):
|
228
236
|
classifications = paragraph.classifications
|
229
237
|
if len(classifications) == 0:
|
230
|
-
raise HTTPException(
|
231
|
-
status_code=422, detail="ensure classifications has at least 1 items"
|
232
|
-
)
|
238
|
+
raise HTTPException(status_code=422, detail="ensure classifications has at least 1 items")
|
233
239
|
|
234
|
-
unique_classifications = {tuple(cf.
|
240
|
+
unique_classifications = {tuple(cf.model_dump().values()) for cf in classifications}
|
235
241
|
if len(unique_classifications) != len(classifications):
|
236
|
-
raise HTTPException(
|
237
|
-
status_code=422, detail="Paragraph classifications need to be unique"
|
238
|
-
)
|
242
|
+
raise HTTPException(status_code=422, detail="Paragraph classifications need to be unique")
|
239
243
|
|
240
244
|
|
241
245
|
def compute_title(item: CreateResourcePayload, rid: str) -> str:
|
@@ -273,9 +277,7 @@ def build_question_answer_annotation_pb(
|
|
273
277
|
pb.cancelled_by_user = qa_annotation.cancelled_by_user
|
274
278
|
pb.question_answer.question.text = qa_annotation.question_answer.question.text
|
275
279
|
if qa_annotation.question_answer.question.language is not None:
|
276
|
-
pb.question_answer.question.language =
|
277
|
-
qa_annotation.question_answer.question.language
|
278
|
-
)
|
280
|
+
pb.question_answer.question.language = qa_annotation.question_answer.question.language
|
279
281
|
pb.question_answer.question.ids_paragraphs.extend(
|
280
282
|
qa_annotation.question_answer.question.ids_paragraphs
|
281
283
|
)
|
@@ -21,23 +21,23 @@ from datetime import datetime
|
|
21
21
|
from typing import Optional, Union
|
22
22
|
|
23
23
|
from google.protobuf.json_format import MessageToDict
|
24
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
25
24
|
|
26
25
|
import nucliadb_models as models
|
26
|
+
from nucliadb.common.models_utils import from_proto, to_proto
|
27
27
|
from nucliadb.ingest.fields.conversation import Conversation
|
28
28
|
from nucliadb.ingest.orm.resource import Resource as ORMResource
|
29
29
|
from nucliadb.ingest.processing import PushPayload
|
30
30
|
from nucliadb.writer import SERVICE_NAME
|
31
|
-
from nucliadb.writer.layouts import serialize_blocks
|
32
31
|
from nucliadb.writer.utilities import get_processing
|
33
|
-
from nucliadb_models.common import
|
32
|
+
from nucliadb_models.common import FieldTypeName
|
33
|
+
from nucliadb_models.content_types import GENERIC_MIME_TYPE
|
34
34
|
from nucliadb_models.conversation import PushConversation
|
35
35
|
from nucliadb_models.writer import (
|
36
|
-
GENERIC_MIME_TYPE,
|
37
36
|
CreateResourcePayload,
|
38
37
|
UpdateResourcePayload,
|
39
38
|
)
|
40
39
|
from nucliadb_protos import resources_pb2
|
40
|
+
from nucliadb_protos.writer_pb2 import BrokerMessage
|
41
41
|
from nucliadb_utils.storages.storage import StorageField
|
42
42
|
from nucliadb_utils.utilities import get_storage
|
43
43
|
|
@@ -80,12 +80,11 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
|
|
80
80
|
storage = await get_storage(service_name=SERVICE_NAME)
|
81
81
|
await resource.get_fields()
|
82
82
|
for (field_type, field_id), field in resource.fields.items():
|
83
|
-
field_type_name =
|
83
|
+
field_type_name = from_proto.field_type_name(field_type)
|
84
84
|
|
85
85
|
if field_type_name not in {
|
86
86
|
FieldTypeName.TEXT,
|
87
87
|
FieldTypeName.FILE,
|
88
|
-
FieldTypeName.LAYOUT,
|
89
88
|
FieldTypeName.CONVERSATION,
|
90
89
|
FieldTypeName.LINK,
|
91
90
|
}:
|
@@ -114,28 +113,7 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
|
|
114
113
|
parsed_text["format"] = models.PushTextFormat[parsed_text["format"]]
|
115
114
|
toprocess.textfield[field_id] = models.Text(**parsed_text)
|
116
115
|
|
117
|
-
if field_type_name is FieldTypeName.
|
118
|
-
parsed_layout = MessageToDict(
|
119
|
-
field_pb,
|
120
|
-
preserving_proto_field_name=True,
|
121
|
-
including_default_value_fields=True,
|
122
|
-
)
|
123
|
-
parsed_layout["format"] = resources_pb2.FieldLayout.Format.Value(
|
124
|
-
parsed_layout["format"]
|
125
|
-
)
|
126
|
-
|
127
|
-
for blockid, block in parsed_layout["body"]["blocks"].items():
|
128
|
-
cf = field_pb.body.blocks[blockid].file
|
129
|
-
block["file"] = await processing.convert_internal_cf_to_str(cf, storage)
|
130
|
-
|
131
|
-
parsed_layout["blocks"] = parsed_layout.get("body", {}).get("blocks", {})
|
132
|
-
del parsed_layout["body"]
|
133
|
-
|
134
|
-
toprocess.layoutfield[field_id] = models.LayoutDiff(**parsed_layout)
|
135
|
-
|
136
|
-
if field_type_name is FieldTypeName.CONVERSATION and isinstance(
|
137
|
-
field, Conversation
|
138
|
-
):
|
116
|
+
if field_type_name is FieldTypeName.CONVERSATION and isinstance(field, Conversation):
|
139
117
|
metadata = await field.get_metadata()
|
140
118
|
if metadata.pages == 0:
|
141
119
|
continue
|
@@ -156,14 +134,13 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
|
|
156
134
|
await processing.convert_internal_cf_to_str(cf, storage)
|
157
135
|
for cf in message.content.attachments
|
158
136
|
]
|
159
|
-
parsed_message["content"]
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
full_conversation.messages.append(
|
165
|
-
models.PushMessage(**parsed_message)
|
137
|
+
if "attachments_fields" in parsed_message["content"]:
|
138
|
+
# Not defined on the push payload
|
139
|
+
del parsed_message["content"]["attachments_fields"]
|
140
|
+
parsed_message["content"]["format"] = resources_pb2.MessageContent.Format.Value(
|
141
|
+
parsed_message["content"]["format"]
|
166
142
|
)
|
143
|
+
full_conversation.messages.append(models.PushMessage(**parsed_message))
|
167
144
|
toprocess.conversationfield[field_id] = full_conversation
|
168
145
|
|
169
146
|
|
@@ -176,9 +153,7 @@ async def parse_fields(
|
|
176
153
|
x_skip_store: bool,
|
177
154
|
):
|
178
155
|
for key, file_field in item.files.items():
|
179
|
-
await parse_file_field(
|
180
|
-
key, file_field, writer, toprocess, kbid, uuid, skip_store=x_skip_store
|
181
|
-
)
|
156
|
+
await parse_file_field(key, file_field, writer, toprocess, kbid, uuid, skip_store=x_skip_store)
|
182
157
|
|
183
158
|
for key, link_field in item.links.items():
|
184
159
|
parse_link_field(key, link_field, writer, toprocess)
|
@@ -186,19 +161,8 @@ async def parse_fields(
|
|
186
161
|
for key, text_field in item.texts.items():
|
187
162
|
parse_text_field(key, text_field, writer, toprocess)
|
188
163
|
|
189
|
-
for key, layout_field in item.layouts.items():
|
190
|
-
await parse_layout_field(key, layout_field, writer, toprocess, kbid, uuid)
|
191
|
-
|
192
164
|
for key, conversation_field in item.conversations.items():
|
193
|
-
await parse_conversation_field(
|
194
|
-
key, conversation_field, writer, toprocess, kbid, uuid
|
195
|
-
)
|
196
|
-
|
197
|
-
for key, datetime_field in item.datetimes.items():
|
198
|
-
parse_datetime_field(key, datetime_field, writer, toprocess)
|
199
|
-
|
200
|
-
for key, keywordset_field in item.keywordsets.items():
|
201
|
-
parse_keywordset_field(key, keywordset_field, writer, toprocess)
|
165
|
+
await parse_conversation_field(key, conversation_field, writer, toprocess, kbid, uuid)
|
202
166
|
|
203
167
|
|
204
168
|
def parse_text_field(
|
@@ -208,9 +172,7 @@ def parse_text_field(
|
|
208
172
|
toprocess: PushPayload,
|
209
173
|
) -> None:
|
210
174
|
writer.texts[key].body = text_field.body
|
211
|
-
writer.texts[key].format = resources_pb2.FieldText.Format.Value(
|
212
|
-
text_field.format.value
|
213
|
-
)
|
175
|
+
writer.texts[key].format = resources_pb2.FieldText.Format.Value(text_field.format.value)
|
214
176
|
etw = resources_pb2.ExtractedTextWrapper()
|
215
177
|
etw.field.field = key
|
216
178
|
etw.field.field_type = resources_pb2.FieldType.TEXT
|
@@ -338,78 +300,6 @@ def parse_link_field(
|
|
338
300
|
)
|
339
301
|
|
340
302
|
|
341
|
-
def parse_keywordset_field(
|
342
|
-
key: str,
|
343
|
-
keywordset_field: models.FieldKeywordset,
|
344
|
-
writer: BrokerMessage,
|
345
|
-
toprocess: PushPayload,
|
346
|
-
) -> None:
|
347
|
-
if keywordset_field.keywords is None:
|
348
|
-
return
|
349
|
-
|
350
|
-
for keyword in keywordset_field.keywords:
|
351
|
-
fieldpb = resources_pb2.Keyword()
|
352
|
-
fieldpb.value = keyword.value
|
353
|
-
writer.keywordsets[key].keywords.append(fieldpb)
|
354
|
-
|
355
|
-
|
356
|
-
def parse_datetime_field(
|
357
|
-
key: str,
|
358
|
-
datetime_field: models.FieldDatetime,
|
359
|
-
writer: BrokerMessage,
|
360
|
-
toprocess: PushPayload,
|
361
|
-
) -> None:
|
362
|
-
if datetime_field.value is None:
|
363
|
-
return
|
364
|
-
|
365
|
-
writer.datetimes[key].value.FromDatetime(datetime_field.value)
|
366
|
-
|
367
|
-
|
368
|
-
async def parse_layout_field(
|
369
|
-
key: str,
|
370
|
-
layout_field: models.InputLayoutField,
|
371
|
-
writer: BrokerMessage,
|
372
|
-
toprocess: PushPayload,
|
373
|
-
kbid: str,
|
374
|
-
uuid: str,
|
375
|
-
) -> None:
|
376
|
-
storage = await get_storage(service_name=SERVICE_NAME)
|
377
|
-
processing = get_processing()
|
378
|
-
|
379
|
-
lc: resources_pb2.FieldLayout = await serialize_blocks(
|
380
|
-
layout_field, kbid, uuid, key, storage
|
381
|
-
)
|
382
|
-
writer.layouts[key].CopyFrom(lc)
|
383
|
-
|
384
|
-
toprocess_blocks = {}
|
385
|
-
for blockid, block in layout_field.body.blocks.items():
|
386
|
-
sf_conv_field: StorageField = storage.layout_field(
|
387
|
-
kbid, uuid, field=key, ident=block.ident
|
388
|
-
)
|
389
|
-
cf_conv_field = await storage.upload_b64file_to_cloudfile(
|
390
|
-
sf_conv_field,
|
391
|
-
block.file.payload.encode(),
|
392
|
-
block.file.filename,
|
393
|
-
block.file.content_type,
|
394
|
-
block.file.md5,
|
395
|
-
)
|
396
|
-
|
397
|
-
toprocess_blocks[blockid] = models.PushLayoutBlock(
|
398
|
-
x=block.x,
|
399
|
-
y=block.y,
|
400
|
-
cols=block.cols,
|
401
|
-
rows=block.rows,
|
402
|
-
type=block.type,
|
403
|
-
ident=block.ident,
|
404
|
-
payload=block.payload,
|
405
|
-
file=await processing.convert_internal_cf_to_str(cf_conv_field, storage),
|
406
|
-
)
|
407
|
-
|
408
|
-
toprocess.layoutfield[key] = models.LayoutDiff(
|
409
|
-
format=lc.format, blocks=toprocess_blocks # type: ignore
|
410
|
-
)
|
411
|
-
|
412
|
-
|
413
303
|
async def parse_conversation_field(
|
414
304
|
key: str,
|
415
305
|
conversation_field: models.InputConversationField,
|
@@ -441,8 +331,16 @@ async def parse_conversation_field(
|
|
441
331
|
)
|
442
332
|
|
443
333
|
cm.content.text = message.content.text
|
444
|
-
cm.content.format = resources_pb2.MessageContent.Format.Value(
|
445
|
-
|
334
|
+
cm.content.format = resources_pb2.MessageContent.Format.Value(message.content.format.value)
|
335
|
+
cm.content.attachments_fields.extend(
|
336
|
+
[
|
337
|
+
resources_pb2.FieldRef(
|
338
|
+
field_type=to_proto.field_type_name(attachment.field_type),
|
339
|
+
field_id=attachment.field_id,
|
340
|
+
split=attachment.split if attachment.split is not None else "",
|
341
|
+
)
|
342
|
+
for attachment in message.content.attachments_fields
|
343
|
+
]
|
446
344
|
)
|
447
345
|
|
448
346
|
for count, file in enumerate(message.content.attachments):
|
@@ -16,10 +16,9 @@
|
|
16
16
|
#
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
from nucliadb_protos.resources_pb2 import Origin
|
20
|
-
|
21
19
|
from nucliadb_models import Extra, InputOrigin
|
22
20
|
from nucliadb_protos import resources_pb2
|
21
|
+
from nucliadb_protos.resources_pb2 import Origin
|
23
22
|
|
24
23
|
|
25
24
|
def parse_origin(origin: Origin, origin_payload: InputOrigin):
|
nucliadb/writer/settings.py
CHANGED
@@ -48,16 +48,20 @@ class BackPressureSettings(BaseSettings):
|
|
48
48
|
description="Estimation of the processing rate in messages per second. This is used to calculate the try again in time", # noqa
|
49
49
|
)
|
50
50
|
max_indexing_pending: int = Field(
|
51
|
-
default=
|
51
|
+
default=200,
|
52
52
|
description="Max number of messages pending to index in a node queue before rate limiting writes. Set to 0 to disable indexing back pressure checks", # noqa
|
53
|
+
alias="back_pressure_max_indexing_pending",
|
53
54
|
)
|
54
55
|
max_ingest_pending: int = Field(
|
55
|
-
default
|
56
|
+
# Disabled by default
|
57
|
+
default=0,
|
56
58
|
description="Max number of messages pending to be ingested by processed consumers before rate limiting writes. Set to 0 to disable ingest back pressure checks", # noqa
|
59
|
+
alias="back_pressure_max_ingest_pending",
|
57
60
|
)
|
58
61
|
max_processing_pending: int = Field(
|
59
62
|
default=1000,
|
60
63
|
description="Max number of messages pending to process per Knowledge Box before rate limiting writes. Set to 0 to disable processing back pressure checks", # noqa
|
64
|
+
alias="back_pressure_max_processing_pending",
|
61
65
|
)
|
62
66
|
indexing_check_interval: int = Field(
|
63
67
|
default=30,
|
nucliadb/writer/tus/__init__.py
CHANGED
@@ -23,10 +23,6 @@ from typing import Optional
|
|
23
23
|
from nucliadb.writer.settings import settings as writer_settings
|
24
24
|
from nucliadb.writer.tus.dm import FileDataManager, RedisFileDataManagerFactory
|
25
25
|
from nucliadb.writer.tus.exceptions import ManagerNotAvailable
|
26
|
-
from nucliadb.writer.tus.gcs import GCloudBlobStore, GCloudFileStorageManager
|
27
|
-
from nucliadb.writer.tus.local import LocalBlobStore, LocalFileStorageManager
|
28
|
-
from nucliadb.writer.tus.pg import PGBlobStore, PGFileStorageManager
|
29
|
-
from nucliadb.writer.tus.s3 import S3BlobStore, S3FileStorageManager
|
30
26
|
from nucliadb.writer.tus.storage import BlobStore, FileStorageManager
|
31
27
|
from nucliadb_utils.exceptions import ConfigurationError
|
32
28
|
from nucliadb_utils.settings import FileBackendConfig, storage_settings
|
@@ -48,6 +44,8 @@ REDIS_FILE_DATA_MANAGER_FACTORY: Optional[RedisFileDataManagerFactory] = None
|
|
48
44
|
async def initialize():
|
49
45
|
global DRIVER
|
50
46
|
if storage_settings.file_backend == FileBackendConfig.GCS:
|
47
|
+
from nucliadb.writer.tus.gcs import GCloudBlobStore, GCloudFileStorageManager
|
48
|
+
|
51
49
|
storage_backend = GCloudBlobStore()
|
52
50
|
|
53
51
|
await storage_backend.initialize(
|
@@ -64,6 +62,8 @@ async def initialize():
|
|
64
62
|
DRIVER = TusStorageDriver(backend=storage_backend, manager=storage_manager)
|
65
63
|
|
66
64
|
elif storage_settings.file_backend == FileBackendConfig.S3:
|
65
|
+
from nucliadb.writer.tus.s3 import S3BlobStore, S3FileStorageManager
|
66
|
+
|
67
67
|
storage_backend = S3BlobStore()
|
68
68
|
|
69
69
|
await storage_backend.initialize(
|
@@ -83,6 +83,8 @@ async def initialize():
|
|
83
83
|
DRIVER = TusStorageDriver(backend=storage_backend, manager=storage_manager)
|
84
84
|
|
85
85
|
elif storage_settings.file_backend == FileBackendConfig.LOCAL:
|
86
|
+
from nucliadb.writer.tus.local import LocalBlobStore, LocalFileStorageManager
|
87
|
+
|
86
88
|
storage_backend = LocalBlobStore(storage_settings.local_files)
|
87
89
|
|
88
90
|
await storage_backend.initialize()
|
@@ -91,12 +93,18 @@ async def initialize():
|
|
91
93
|
|
92
94
|
DRIVER = TusStorageDriver(backend=storage_backend, manager=storage_manager)
|
93
95
|
|
94
|
-
elif storage_settings.file_backend == FileBackendConfig.
|
95
|
-
|
96
|
+
elif storage_settings.file_backend == FileBackendConfig.AZURE:
|
97
|
+
from nucliadb.writer.tus.azure import AzureBlobStore, AzureFileStorageManager
|
96
98
|
|
97
|
-
|
99
|
+
if storage_settings.azure_account_url is None:
|
100
|
+
raise ConfigurationError("AZURE_ACCOUNT_URL env variable not configured")
|
98
101
|
|
99
|
-
|
102
|
+
storage_backend = AzureBlobStore()
|
103
|
+
await storage_backend.initialize(
|
104
|
+
storage_settings.azure_account_url,
|
105
|
+
connection_string=storage_settings.azure_connection_string,
|
106
|
+
)
|
107
|
+
storage_manager = AzureFileStorageManager(storage_backend)
|
100
108
|
|
101
109
|
DRIVER = TusStorageDriver(backend=storage_backend, manager=storage_manager)
|
102
110
|
|
@@ -117,7 +125,7 @@ async def finalize():
|
|
117
125
|
REDIS_FILE_DATA_MANAGER_FACTORY = None
|
118
126
|
|
119
127
|
|
120
|
-
def get_dm() -> FileDataManager:
|
128
|
+
def get_dm() -> FileDataManager:
|
121
129
|
if writer_settings.dm_enabled:
|
122
130
|
global REDIS_FILE_DATA_MANAGER_FACTORY
|
123
131
|
if REDIS_FILE_DATA_MANAGER_FACTORY is None:
|
@@ -136,9 +144,3 @@ def get_storage_manager() -> FileStorageManager:
|
|
136
144
|
if DRIVER is None:
|
137
145
|
raise ManagerNotAvailable()
|
138
146
|
return DRIVER.manager
|
139
|
-
|
140
|
-
|
141
|
-
def clear_storage():
|
142
|
-
global DRIVER
|
143
|
-
|
144
|
-
DRIVER = None
|