nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2798__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -403
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +70 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +143 -117
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +37 -128
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2798.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2798.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2798.dist-info}/zip-safe +0 -0
@@ -1,18 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
@@ -1,78 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
|
21
|
-
import asyncio
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
|
25
|
-
from nucliadb.ingest.consumer import auditing
|
26
|
-
from nucliadb_protos import audit_pb2, writer_pb2
|
27
|
-
from nucliadb_utils import const
|
28
|
-
from nucliadb_utils.audit.stream import StreamAuditStorage
|
29
|
-
|
30
|
-
pytestmark = pytest.mark.asyncio
|
31
|
-
|
32
|
-
|
33
|
-
async def test_audit_counters(
|
34
|
-
stream_audit: StreamAuditStorage,
|
35
|
-
pubsub,
|
36
|
-
nats_manager,
|
37
|
-
fake_node,
|
38
|
-
knowledgebox_ingest,
|
39
|
-
):
|
40
|
-
from nucliadb_utils.settings import audit_settings
|
41
|
-
|
42
|
-
partition = stream_audit.get_partition(knowledgebox_ingest)
|
43
|
-
subject = audit_settings.audit_jetstream_target.format( # type: ignore
|
44
|
-
partition=partition, type="*"
|
45
|
-
)
|
46
|
-
await nats_manager.js.add_stream(
|
47
|
-
name=audit_settings.audit_stream, subjects=[subject]
|
48
|
-
)
|
49
|
-
psub = await nats_manager.js.pull_subscribe(subject, "psub")
|
50
|
-
|
51
|
-
iah = auditing.IndexAuditHandler(
|
52
|
-
audit=stream_audit,
|
53
|
-
pubsub=pubsub,
|
54
|
-
check_delay=0.05,
|
55
|
-
)
|
56
|
-
await iah.initialize()
|
57
|
-
|
58
|
-
await pubsub.publish(
|
59
|
-
const.PubSubChannels.RESOURCE_NOTIFY.format(kbid=knowledgebox_ingest),
|
60
|
-
writer_pb2.Notification(
|
61
|
-
kbid=knowledgebox_ingest,
|
62
|
-
action=writer_pb2.Notification.Action.INDEXED,
|
63
|
-
).SerializeToString(),
|
64
|
-
)
|
65
|
-
|
66
|
-
await asyncio.sleep(0.1)
|
67
|
-
|
68
|
-
await iah.finalize()
|
69
|
-
|
70
|
-
# should have produced audit message, get the message
|
71
|
-
msg = await psub.fetch(1)
|
72
|
-
auditreq = audit_pb2.AuditRequest()
|
73
|
-
auditreq.ParseFromString(msg[0].data)
|
74
|
-
|
75
|
-
assert auditreq.kbid == knowledgebox_ingest
|
76
|
-
assert auditreq.type == audit_pb2.AuditRequest.AuditType.INDEXED
|
77
|
-
assert auditreq.kb_counter.fields == 2
|
78
|
-
assert auditreq.kb_counter.paragraphs == 2
|
@@ -1,126 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
|
21
|
-
import asyncio
|
22
|
-
from unittest.mock import AsyncMock, MagicMock, patch
|
23
|
-
|
24
|
-
import pytest
|
25
|
-
|
26
|
-
from nucliadb.common import datamanagers
|
27
|
-
from nucliadb.ingest.consumer import materializer
|
28
|
-
from nucliadb.ingest.tests.fixtures import create_resource
|
29
|
-
from nucliadb_protos import writer_pb2
|
30
|
-
from nucliadb_utils import const
|
31
|
-
from nucliadb_utils.audit.stream import StreamAuditStorage
|
32
|
-
from nucliadb_utils.nuclia_usage.protos.kb_usage_pb2 import KbUsage, Service
|
33
|
-
from nucliadb_utils.utilities import Utility, clean_utility, set_utility
|
34
|
-
|
35
|
-
pytestmark = pytest.mark.asyncio
|
36
|
-
|
37
|
-
|
38
|
-
@pytest.fixture()
|
39
|
-
def nats():
|
40
|
-
mock = AsyncMock()
|
41
|
-
mock.jetstream = MagicMock(return_value=AsyncMock())
|
42
|
-
yield mock
|
43
|
-
|
44
|
-
|
45
|
-
@pytest.fixture()
|
46
|
-
async def audit_storage(nats):
|
47
|
-
with patch("nucliadb_utils.audit.stream.nats.connect", return_value=nats):
|
48
|
-
aud = StreamAuditStorage(
|
49
|
-
nats_servers=["nats://localhost:4222"],
|
50
|
-
nats_target="test",
|
51
|
-
partitions=1,
|
52
|
-
seed=1,
|
53
|
-
nats_creds="nats_creds",
|
54
|
-
)
|
55
|
-
await aud.initialize()
|
56
|
-
set_utility(Utility.AUDIT, aud)
|
57
|
-
yield aud
|
58
|
-
clean_utility(Utility.AUDIT)
|
59
|
-
await aud.finalize()
|
60
|
-
|
61
|
-
|
62
|
-
async def test_materialize_kb_data(
|
63
|
-
maindb_driver,
|
64
|
-
pubsub,
|
65
|
-
storage,
|
66
|
-
fake_node,
|
67
|
-
knowledgebox_ingest,
|
68
|
-
audit_storage,
|
69
|
-
):
|
70
|
-
count = 10
|
71
|
-
for _ in range(count):
|
72
|
-
await create_resource(
|
73
|
-
storage=storage,
|
74
|
-
driver=maindb_driver,
|
75
|
-
knowledgebox_ingest=knowledgebox_ingest,
|
76
|
-
)
|
77
|
-
|
78
|
-
mz = materializer.MaterializerHandler(
|
79
|
-
driver=maindb_driver,
|
80
|
-
storage=storage,
|
81
|
-
pubsub=pubsub,
|
82
|
-
check_delay=0.05,
|
83
|
-
)
|
84
|
-
await mz.initialize()
|
85
|
-
|
86
|
-
async with datamanagers.with_transaction() as txn:
|
87
|
-
assert (
|
88
|
-
await datamanagers.resources.get_number_of_resources(
|
89
|
-
txn, kbid=knowledgebox_ingest
|
90
|
-
)
|
91
|
-
== -1
|
92
|
-
)
|
93
|
-
assert (
|
94
|
-
await datamanagers.resources.calculate_number_of_resources(
|
95
|
-
txn, kbid=knowledgebox_ingest
|
96
|
-
)
|
97
|
-
== count
|
98
|
-
)
|
99
|
-
|
100
|
-
await pubsub.publish(
|
101
|
-
const.PubSubChannels.RESOURCE_NOTIFY.format(kbid=knowledgebox_ingest),
|
102
|
-
writer_pb2.Notification(
|
103
|
-
kbid=knowledgebox_ingest,
|
104
|
-
action=writer_pb2.Notification.Action.COMMIT,
|
105
|
-
).SerializeToString(),
|
106
|
-
)
|
107
|
-
|
108
|
-
await asyncio.sleep(0.2)
|
109
|
-
|
110
|
-
async with datamanagers.with_transaction() as txn:
|
111
|
-
assert (
|
112
|
-
await datamanagers.resources.get_number_of_resources(
|
113
|
-
txn, kbid=knowledgebox_ingest
|
114
|
-
)
|
115
|
-
== count
|
116
|
-
)
|
117
|
-
|
118
|
-
await mz.finalize()
|
119
|
-
|
120
|
-
assert audit_storage.js.publish.call_count == 1
|
121
|
-
assert audit_storage.js.publish.call_args[0][0] == "kb-usage.nuclia_db"
|
122
|
-
pb = KbUsage()
|
123
|
-
pb.ParseFromString(audit_storage.js.publish.call_args[0][1])
|
124
|
-
assert pb.storage.resources == count
|
125
|
-
assert pb.service == Service.NUCLIA_DB
|
126
|
-
assert pb.kb_id == knowledgebox_ingest
|
@@ -1,144 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import asyncio
|
21
|
-
import base64
|
22
|
-
import time
|
23
|
-
import uuid
|
24
|
-
from dataclasses import dataclass
|
25
|
-
from unittest.mock import patch
|
26
|
-
|
27
|
-
import pytest
|
28
|
-
from fastapi import FastAPI
|
29
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
30
|
-
from uvicorn.config import Config # type: ignore
|
31
|
-
from uvicorn.server import Server # type: ignore
|
32
|
-
|
33
|
-
from nucliadb.ingest.consumer.pull import PullWorker
|
34
|
-
from nucliadb_utils import const
|
35
|
-
from nucliadb_utils.fastapi.run import start_server
|
36
|
-
from nucliadb_utils.nats import NatsConnectionManager
|
37
|
-
from nucliadb_utils.tests import free_port
|
38
|
-
|
39
|
-
pytestmark = pytest.mark.asyncio
|
40
|
-
|
41
|
-
|
42
|
-
def create_broker_message(kbid: str) -> BrokerMessage:
|
43
|
-
bm = BrokerMessage()
|
44
|
-
bm.uuid = uuid.uuid4().hex
|
45
|
-
bm.kbid = kbid
|
46
|
-
bm.texts["text1"].body = "My text1"
|
47
|
-
bm.basic.title = "My Title"
|
48
|
-
bm.source == BrokerMessage.MessageSource.PROCESSOR
|
49
|
-
|
50
|
-
return bm
|
51
|
-
|
52
|
-
|
53
|
-
@dataclass
|
54
|
-
class PullProcessorAPI:
|
55
|
-
url: str
|
56
|
-
messages: list[BrokerMessage]
|
57
|
-
|
58
|
-
|
59
|
-
@pytest.fixture()
|
60
|
-
async def pull_processor_api():
|
61
|
-
app = FastAPI()
|
62
|
-
messages: list[BrokerMessage] = [] # type: ignore
|
63
|
-
|
64
|
-
@app.get("/api/v1/internal/processing/pull")
|
65
|
-
async def pull():
|
66
|
-
if len(messages) == 0:
|
67
|
-
return {"status": "empty"}
|
68
|
-
message = messages.pop()
|
69
|
-
return {
|
70
|
-
"status": "ok",
|
71
|
-
"payload": base64.b64encode(message.SerializeToString()).decode(),
|
72
|
-
"msgid": str(len(messages)),
|
73
|
-
}
|
74
|
-
|
75
|
-
port = free_port()
|
76
|
-
config = Config(app, host="0.0.0.0", port=port, http="auto")
|
77
|
-
server = Server(config=config)
|
78
|
-
|
79
|
-
await start_server(server, config)
|
80
|
-
|
81
|
-
url = f"http://127.0.0.1:{port}"
|
82
|
-
with patch(
|
83
|
-
"nucliadb.common.http_clients.processing.nuclia_settings.nuclia_processing_cluster_url",
|
84
|
-
url,
|
85
|
-
):
|
86
|
-
yield PullProcessorAPI(url=url, messages=messages)
|
87
|
-
|
88
|
-
await server.shutdown()
|
89
|
-
|
90
|
-
|
91
|
-
@pytest.fixture()
|
92
|
-
async def pull_worker(maindb_driver, pull_processor_api: PullProcessorAPI):
|
93
|
-
worker = PullWorker(
|
94
|
-
driver=maindb_driver,
|
95
|
-
partition="1",
|
96
|
-
storage=None, # type: ignore
|
97
|
-
pull_time_error_backoff=5,
|
98
|
-
pull_time_empty_backoff=0.1,
|
99
|
-
)
|
100
|
-
|
101
|
-
task = asyncio.create_task(worker.loop())
|
102
|
-
yield worker
|
103
|
-
task.cancel()
|
104
|
-
|
105
|
-
|
106
|
-
async def wait_for_messages(messages: list[BrokerMessage], max_time: int = 10) -> None:
|
107
|
-
start = time.monotonic()
|
108
|
-
while time.monotonic() - start < max_time:
|
109
|
-
if len(messages) == 0:
|
110
|
-
await asyncio.sleep(
|
111
|
-
0.1
|
112
|
-
) # extra sleep to make sure it's flushed to consumer
|
113
|
-
return
|
114
|
-
|
115
|
-
await asyncio.sleep(0.1)
|
116
|
-
|
117
|
-
|
118
|
-
async def test_pull_full_integration(
|
119
|
-
ingest_consumers,
|
120
|
-
ingest_processed_consumer,
|
121
|
-
pull_worker: PullWorker,
|
122
|
-
pull_processor_api: PullProcessorAPI,
|
123
|
-
knowledgebox_ingest: str,
|
124
|
-
nats_manager: NatsConnectionManager,
|
125
|
-
):
|
126
|
-
# make sure stream is empty
|
127
|
-
consumer_info1 = await nats_manager.js.consumer_info(
|
128
|
-
const.Streams.INGEST.name, const.Streams.INGEST.group.format(partition="1")
|
129
|
-
)
|
130
|
-
consumer_info2 = await nats_manager.js.consumer_info(
|
131
|
-
const.Streams.INGEST_PROCESSED.name, const.Streams.INGEST_PROCESSED.group
|
132
|
-
)
|
133
|
-
assert consumer_info1.delivered.stream_seq == 0
|
134
|
-
assert consumer_info2.delivered.stream_seq == 0
|
135
|
-
|
136
|
-
# add message that should go to first consumer
|
137
|
-
pull_processor_api.messages.append(create_broker_message(knowledgebox_ingest))
|
138
|
-
await wait_for_messages(pull_processor_api.messages)
|
139
|
-
|
140
|
-
consumer_info1 = await nats_manager.js.consumer_info(
|
141
|
-
const.Streams.INGEST.name, const.Streams.INGEST_PROCESSED.group
|
142
|
-
)
|
143
|
-
|
144
|
-
assert consumer_info1.delivered.stream_seq == 1
|
@@ -1,81 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import uuid
|
21
|
-
|
22
|
-
import pytest
|
23
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
24
|
-
|
25
|
-
from nucliadb_utils import const
|
26
|
-
from nucliadb_utils.nats import NatsConnectionManager
|
27
|
-
from nucliadb_utils.transaction import TransactionUtility
|
28
|
-
|
29
|
-
pytestmark = pytest.mark.asyncio
|
30
|
-
|
31
|
-
|
32
|
-
def create_broker_message(kbid: str) -> BrokerMessage:
|
33
|
-
bm = BrokerMessage()
|
34
|
-
bm.uuid = uuid.uuid4().hex
|
35
|
-
bm.kbid = kbid
|
36
|
-
bm.texts["text1"].body = "My text1"
|
37
|
-
bm.basic.title = "My Title"
|
38
|
-
|
39
|
-
return bm
|
40
|
-
|
41
|
-
|
42
|
-
async def test_separated_ingest_consumer(
|
43
|
-
ingest_consumers,
|
44
|
-
ingest_processed_consumer,
|
45
|
-
knowledgebox_ingest,
|
46
|
-
transaction_utility: TransactionUtility,
|
47
|
-
nats_manager: NatsConnectionManager,
|
48
|
-
pubsub,
|
49
|
-
):
|
50
|
-
bm_normal = create_broker_message(knowledgebox_ingest)
|
51
|
-
bm_processed = create_broker_message(knowledgebox_ingest)
|
52
|
-
bm_processed.source == BrokerMessage.MessageSource.PROCESSOR
|
53
|
-
|
54
|
-
await transaction_utility.commit(bm_normal, partition=1, wait=True)
|
55
|
-
|
56
|
-
consumer_info1 = await nats_manager.js.consumer_info(
|
57
|
-
const.Streams.INGEST.name, const.Streams.INGEST.group.format(partition="1")
|
58
|
-
)
|
59
|
-
consumer_info2 = await nats_manager.js.consumer_info(
|
60
|
-
const.Streams.INGEST_PROCESSED.name, const.Streams.INGEST_PROCESSED.group
|
61
|
-
)
|
62
|
-
|
63
|
-
assert consumer_info1.delivered.stream_seq == 1
|
64
|
-
assert consumer_info2.delivered.stream_seq == 0
|
65
|
-
|
66
|
-
await transaction_utility.commit(
|
67
|
-
bm_normal,
|
68
|
-
partition=1,
|
69
|
-
wait=True,
|
70
|
-
target_subject=const.Streams.INGEST_PROCESSED.subject,
|
71
|
-
)
|
72
|
-
|
73
|
-
consumer_info1 = await nats_manager.js.consumer_info(
|
74
|
-
const.Streams.INGEST.name, const.Streams.INGEST.group.format(partition="1")
|
75
|
-
)
|
76
|
-
consumer_info2 = await nats_manager.js.consumer_info(
|
77
|
-
const.Streams.INGEST_PROCESSED.name, const.Streams.INGEST_PROCESSED.group
|
78
|
-
)
|
79
|
-
|
80
|
-
assert consumer_info1.delivered.stream_seq == 1
|
81
|
-
assert consumer_info2.delivered.stream_seq == 2
|
@@ -1,68 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
|
21
|
-
import asyncio
|
22
|
-
|
23
|
-
import pytest
|
24
|
-
|
25
|
-
from nucliadb.ingest.consumer import shard_creator
|
26
|
-
from nucliadb_protos import writer_pb2
|
27
|
-
from nucliadb_utils import const
|
28
|
-
|
29
|
-
pytestmark = pytest.mark.asyncio
|
30
|
-
|
31
|
-
|
32
|
-
async def test_shard_auto_create(
|
33
|
-
maindb_driver,
|
34
|
-
pubsub,
|
35
|
-
storage,
|
36
|
-
fake_node,
|
37
|
-
knowledgebox_ingest,
|
38
|
-
):
|
39
|
-
from nucliadb.common.cluster.settings import settings
|
40
|
-
|
41
|
-
settings.max_shard_paragraphs = 1
|
42
|
-
|
43
|
-
sc = shard_creator.ShardCreatorHandler(
|
44
|
-
driver=maindb_driver,
|
45
|
-
storage=storage,
|
46
|
-
pubsub=pubsub,
|
47
|
-
check_delay=0.05,
|
48
|
-
)
|
49
|
-
await sc.initialize()
|
50
|
-
|
51
|
-
original_kb_shards = await sc.shard_manager.get_shards_by_kbid_inner(
|
52
|
-
knowledgebox_ingest
|
53
|
-
)
|
54
|
-
|
55
|
-
await pubsub.publish(
|
56
|
-
const.PubSubChannels.RESOURCE_NOTIFY.format(kbid=knowledgebox_ingest),
|
57
|
-
writer_pb2.Notification(
|
58
|
-
kbid=knowledgebox_ingest,
|
59
|
-
action=writer_pb2.Notification.Action.INDEXED,
|
60
|
-
).SerializeToString(),
|
61
|
-
)
|
62
|
-
|
63
|
-
await asyncio.sleep(0.2)
|
64
|
-
|
65
|
-
await sc.finalize()
|
66
|
-
|
67
|
-
kb_shards = await sc.shard_manager.get_shards_by_kbid_inner(knowledgebox_ingest)
|
68
|
-
assert len(kb_shards.shards) == len(original_kb_shards.shards) + 1
|