nucliadb 4.0.0.post542__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0003_allfields_key.py +1 -35
- migrations/0009_upgrade_relations_and_texts_to_v2.py +4 -2
- migrations/0010_fix_corrupt_indexes.py +10 -10
- migrations/0011_materialize_labelset_ids.py +1 -16
- migrations/0012_rollover_shards.py +5 -10
- migrations/0014_rollover_shards.py +4 -5
- migrations/0015_targeted_rollover.py +5 -10
- migrations/0016_upgrade_to_paragraphs_v2.py +25 -28
- migrations/0017_multiple_writable_shards.py +2 -4
- migrations/0018_purge_orphan_kbslugs.py +5 -7
- migrations/0019_upgrade_to_paragraphs_v3.py +25 -28
- migrations/0020_drain_nodes_from_cluster.py +3 -3
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +16 -19
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +30 -16
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +3 -11
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +174 -59
- nucliadb/common/cluster/rebalance.py +27 -29
- nucliadb/common/cluster/rollover.py +353 -194
- nucliadb/common/cluster/settings.py +6 -0
- nucliadb/common/cluster/standalone/grpc_node_binding.py +13 -64
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +2 -6
- nucliadb/common/cluster/utils.py +29 -22
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +3 -0
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +7 -1
- nucliadb/common/datamanagers/atomic.py +22 -4
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +83 -37
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +41 -103
- nucliadb/common/datamanagers/rollover.py +76 -15
- nucliadb/common/datamanagers/synonyms.py +1 -1
- nucliadb/common/datamanagers/utils.py +15 -6
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/orm/test_orm_utils.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +29 -7
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +3 -0
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +11 -42
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exporter.py +5 -11
- nucliadb/export_import/importer.py +5 -7
- nucliadb/export_import/models.py +3 -3
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +25 -37
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +21 -19
- nucliadb/ingest/consumer/consumer.py +82 -47
- nucliadb/ingest/consumer/materializer.py +5 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +19 -17
- nucliadb/ingest/consumer/shard_creator.py +2 -4
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +137 -105
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -16
- nucliadb/ingest/fields/link.py +5 -10
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +200 -213
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +322 -197
- nucliadb/ingest/orm/processor/__init__.py +2 -700
- nucliadb/ingest/orm/processor/auditing.py +4 -23
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +249 -402
- nucliadb/ingest/orm/utils.py +4 -4
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +64 -73
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -167
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +185 -412
- nucliadb/ingest/settings.py +10 -20
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +242 -55
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +47 -30
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +1 -12
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +21 -88
- nucliadb/reader/api/v1/export_import.py +1 -1
- nucliadb/reader/api/v1/knowledgebox.py +10 -10
- nucliadb/reader/api/v1/learning_config.py +2 -6
- nucliadb/reader/api/v1/resource.py +62 -88
- nucliadb/reader/api/v1/services.py +64 -83
- nucliadb/reader/app.py +12 -29
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -28
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +1 -2
- nucliadb/search/api/v1/ask.py +17 -10
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +16 -24
- nucliadb/search/api/v1/find.py +36 -36
- nucliadb/search/api/v1/knowledgebox.py +89 -60
- nucliadb/search/api/v1/resource/ask.py +2 -8
- nucliadb/search/api/v1/resource/search.py +49 -70
- nucliadb/search/api/v1/search.py +44 -210
- nucliadb/search/api/v1/suggest.py +39 -54
- nucliadb/search/app.py +12 -32
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +136 -187
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +25 -58
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +571 -123
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -14
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +817 -266
- nucliadb/search/search/chat/query.py +213 -309
- nucliadb/{tests/migrations/__init__.py → search/search/cut.py} +8 -8
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -53
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +187 -223
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +305 -150
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +3 -32
- nucliadb/search/search/summarize.py +7 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +8 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +7 -10
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +1 -3
- nucliadb/standalone/purge.py +1 -1
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +3 -6
- nucliadb/standalone/settings.py +9 -16
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +1 -1
- nucliadb/train/api/v1/trainset.py +2 -4
- nucliadb/train/app.py +10 -31
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +48 -39
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +19 -23
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +67 -14
- nucliadb/writer/api/v1/field.py +16 -269
- nucliadb/writer/api/v1/knowledgebox.py +218 -68
- nucliadb/writer/api/v1/resource.py +68 -88
- nucliadb/writer/api/v1/services.py +51 -70
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +114 -113
- nucliadb/writer/app.py +6 -43
- nucliadb/writer/back_pressure.py +16 -38
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -46
- nucliadb/writer/resource/field.py +25 -127
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +6 -2
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +49 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -433
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -764
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -78
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -126
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -684
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_brain_vectors.py +0 -74
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -331
- nucliadb/ingest/tests/unit/test_cache.py +0 -31
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -353
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -263
- nucliadb/search/api/v1/resource/chat.py +0 -174
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -466
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_ask.py +0 -120
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -99
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -211
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -270
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -153
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -525
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_migrations.py +0 -63
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -735
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -202
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -107
- nucliadb/tests/migrations/test_migration_0017.py +0 -76
- nucliadb/tests/migrations/test_migration_0018.py +0 -95
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -172
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -114
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -61
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -408
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -173
- nucliadb/tests/unit/common/cluster/test_rebalance.py +0 -38
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -282
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -92
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -301
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -92
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -58
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -86
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -136
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -171
- nucliadb/tests/utils/broker_messages/fields.py +0 -197
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -221
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -101
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -191
- nucliadb/writer/tests/test_fields.py +0 -475
- nucliadb/writer/tests/test_files.py +0 -740
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -133
- nucliadb/writer/tests/test_resources.py +0 -476
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-4.0.0.post542.dist-info/METADATA +0 -135
- nucliadb-4.0.0.post542.dist-info/RECORD +0 -462
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-4.0.0.post542.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,137 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import pytest
|
21
|
-
|
22
|
-
from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX
|
23
|
-
from nucliadb_models.entities import CreateEntitiesGroupPayload, Entity
|
24
|
-
from nucliadb_models.labels import Label, LabelSet
|
25
|
-
from nucliadb_models.resource import NucliaDBRoles
|
26
|
-
from nucliadb_protos import knowledgebox_pb2, writer_pb2
|
27
|
-
from nucliadb_utils.utilities import get_ingest
|
28
|
-
|
29
|
-
|
30
|
-
@pytest.mark.asyncio
|
31
|
-
async def test_service_lifecycle_entities(writer_api, entities_manager_mock):
|
32
|
-
async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
|
33
|
-
resp = await client.post(
|
34
|
-
f"/{KBS_PREFIX}",
|
35
|
-
json={
|
36
|
-
"slug": "kbid1",
|
37
|
-
"title": "My Knowledge Box",
|
38
|
-
},
|
39
|
-
)
|
40
|
-
assert resp.status_code == 201
|
41
|
-
data = resp.json()
|
42
|
-
assert data["slug"] == "kbid1"
|
43
|
-
kbid = data["uuid"]
|
44
|
-
|
45
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
46
|
-
eg = CreateEntitiesGroupPayload(
|
47
|
-
group="0",
|
48
|
-
title="My group",
|
49
|
-
color="#0000000",
|
50
|
-
entities={
|
51
|
-
"ent1": Entity(value="asd", merged=False),
|
52
|
-
"ent2": Entity(value="asd", merged=False),
|
53
|
-
"ent3": Entity(value="asd", merged=False),
|
54
|
-
},
|
55
|
-
)
|
56
|
-
|
57
|
-
resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
|
58
|
-
assert resp.status_code == 200
|
59
|
-
|
60
|
-
ingest = get_ingest()
|
61
|
-
result = await ingest.GetEntities(
|
62
|
-
writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
|
63
|
-
)
|
64
|
-
assert set(result.groups.keys()) == {"0"}
|
65
|
-
assert result.groups["0"].title == eg.title
|
66
|
-
assert result.groups["0"].color == eg.color
|
67
|
-
assert set(result.groups["0"].entities.keys()) == {"ent1", "ent2", "ent3"}
|
68
|
-
assert result.groups["0"].entities["ent1"].value == "asd"
|
69
|
-
|
70
|
-
eg.group = "1"
|
71
|
-
resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
|
72
|
-
assert resp.status_code == 200
|
73
|
-
result = await ingest.GetEntities(
|
74
|
-
writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
|
75
|
-
)
|
76
|
-
assert set(result.groups.keys()) == {"0", "1"}
|
77
|
-
|
78
|
-
|
79
|
-
@pytest.mark.asyncio
|
80
|
-
async def test_entities_custom_field_for_user_defined_groups(
|
81
|
-
writer_api, entities_manager_mock
|
82
|
-
):
|
83
|
-
"""
|
84
|
-
Test description:
|
85
|
-
|
86
|
-
- Create an entity group and check that the default value for the `custom`
|
87
|
-
field is True
|
88
|
-
"""
|
89
|
-
async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
|
90
|
-
resp = await client.post(
|
91
|
-
f"/{KBS_PREFIX}",
|
92
|
-
json={
|
93
|
-
"slug": "kbid1",
|
94
|
-
"title": "My Knowledge Box",
|
95
|
-
},
|
96
|
-
)
|
97
|
-
assert resp.status_code == 201
|
98
|
-
data = resp.json()
|
99
|
-
kbid = data["uuid"]
|
100
|
-
|
101
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
102
|
-
eg = CreateEntitiesGroupPayload(group="0")
|
103
|
-
resp = await client.post(f"/{KB_PREFIX}/{kbid}/entitiesgroups", json=eg.dict())
|
104
|
-
assert resp.status_code == 200
|
105
|
-
|
106
|
-
ingest = get_ingest()
|
107
|
-
result = await ingest.GetEntities(
|
108
|
-
writer_pb2.GetEntitiesRequest(kb=knowledgebox_pb2.KnowledgeBoxID(uuid=kbid))
|
109
|
-
)
|
110
|
-
assert result.groups["0"].custom is True
|
111
|
-
|
112
|
-
|
113
|
-
@pytest.mark.asyncio
|
114
|
-
async def test_service_lifecycle_labels(writer_api):
|
115
|
-
async with writer_api(roles=[NucliaDBRoles.MANAGER]) as client:
|
116
|
-
resp = await client.post(
|
117
|
-
f"/{KBS_PREFIX}",
|
118
|
-
json={
|
119
|
-
"slug": "kbid1",
|
120
|
-
"title": "My Knowledge Box",
|
121
|
-
},
|
122
|
-
)
|
123
|
-
assert resp.status_code == 201
|
124
|
-
data = resp.json()
|
125
|
-
assert data["slug"] == "kbid1"
|
126
|
-
kbid = data["uuid"]
|
127
|
-
|
128
|
-
async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
|
129
|
-
ls = LabelSet(
|
130
|
-
title="My labelset", color="#0000000", multiple=False, kind=["RESOURCES"]
|
131
|
-
)
|
132
|
-
ls.labels.append(Label(title="asd"))
|
133
|
-
ls.labels.append(Label(title="asd"))
|
134
|
-
resp = await client.post(f"/{KB_PREFIX}/{kbid}/labelset/ls1", json=ls.dict())
|
135
|
-
assert resp.status_code == 200
|
136
|
-
resp = await client.post(f"/{KB_PREFIX}/{kbid}/labelset/ls2", json=ls.dict())
|
137
|
-
assert resp.status_code == 200
|
@@ -1,203 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import tempfile
|
21
|
-
import uuid
|
22
|
-
|
23
|
-
import asyncpg
|
24
|
-
import pytest
|
25
|
-
|
26
|
-
from nucliadb.writer.settings import settings
|
27
|
-
from nucliadb.writer.tus import get_dm
|
28
|
-
from nucliadb.writer.tus.exceptions import CloudFileNotFound
|
29
|
-
from nucliadb.writer.tus.gcs import GCloudBlobStore, GCloudFileStorageManager
|
30
|
-
from nucliadb.writer.tus.local import LocalBlobStore, LocalFileStorageManager
|
31
|
-
from nucliadb.writer.tus.pg import PGBlobStore, PGFileStorageManager
|
32
|
-
from nucliadb.writer.tus.s3 import S3BlobStore, S3FileStorageManager
|
33
|
-
from nucliadb.writer.tus.storage import BlobStore, FileStorageManager
|
34
|
-
from nucliadb_utils.storages.pg import PostgresStorage
|
35
|
-
from nucliadb_utils.storages.storage import KB_RESOURCE_FIELD
|
36
|
-
|
37
|
-
|
38
|
-
@pytest.fixture(scope="function")
|
39
|
-
async def s3_storage_tus(s3):
|
40
|
-
storage = S3BlobStore()
|
41
|
-
await storage.initialize(
|
42
|
-
client_id="",
|
43
|
-
client_secret="",
|
44
|
-
max_pool_connections=2,
|
45
|
-
endpoint_url=s3,
|
46
|
-
verify_ssl=False,
|
47
|
-
ssl=False,
|
48
|
-
region_name=None,
|
49
|
-
bucket="test_{kbid}",
|
50
|
-
bucket_tags={"testTag": "test"},
|
51
|
-
)
|
52
|
-
yield storage
|
53
|
-
await storage.finalize()
|
54
|
-
|
55
|
-
|
56
|
-
@pytest.fixture(scope="function")
|
57
|
-
async def gcs_storage_tus(gcs):
|
58
|
-
storage = GCloudBlobStore()
|
59
|
-
await storage.initialize(
|
60
|
-
json_credentials=None,
|
61
|
-
bucket="test_{kbid}",
|
62
|
-
location="location",
|
63
|
-
project="project",
|
64
|
-
bucket_labels={},
|
65
|
-
object_base_url=gcs,
|
66
|
-
)
|
67
|
-
yield storage
|
68
|
-
await storage.finalize()
|
69
|
-
|
70
|
-
|
71
|
-
@pytest.fixture(scope="function")
|
72
|
-
async def local_storage_tus():
|
73
|
-
folder = tempfile.TemporaryDirectory()
|
74
|
-
storage = LocalBlobStore(local_testing_files=folder.name)
|
75
|
-
await storage.initialize()
|
76
|
-
yield storage
|
77
|
-
await storage.finalize()
|
78
|
-
folder.cleanup()
|
79
|
-
|
80
|
-
|
81
|
-
@pytest.fixture(scope="function")
|
82
|
-
async def pg_storage_tus(pg):
|
83
|
-
dsn = f"postgresql://postgres:postgres@{pg[0]}:{pg[1]}/postgres"
|
84
|
-
conn = await asyncpg.connect(dsn)
|
85
|
-
await conn.execute(
|
86
|
-
"""
|
87
|
-
DROP table IF EXISTS kb_files;
|
88
|
-
DROP table IF EXISTS kb_files_fileparts;
|
89
|
-
"""
|
90
|
-
)
|
91
|
-
await conn.close()
|
92
|
-
fstorage = PostgresStorage(dsn) # set everything up
|
93
|
-
await fstorage.initialize()
|
94
|
-
await fstorage.finalize()
|
95
|
-
|
96
|
-
storage = PGBlobStore(dsn)
|
97
|
-
await storage.initialize()
|
98
|
-
yield storage
|
99
|
-
await storage.finalize()
|
100
|
-
|
101
|
-
|
102
|
-
async def clean_dm():
|
103
|
-
from nucliadb.writer.tus import REDIS_FILE_DATA_MANAGER_FACTORY
|
104
|
-
|
105
|
-
if REDIS_FILE_DATA_MANAGER_FACTORY is not None:
|
106
|
-
await REDIS_FILE_DATA_MANAGER_FACTORY.finalize()
|
107
|
-
REDIS_FILE_DATA_MANAGER_FACTORY = None
|
108
|
-
|
109
|
-
|
110
|
-
@pytest.fixture(scope="function")
|
111
|
-
async def redis_dm(redis):
|
112
|
-
prev = settings.dm_enabled
|
113
|
-
|
114
|
-
settings.dm_enabled = True
|
115
|
-
settings.dm_redis_host = redis[0]
|
116
|
-
settings.dm_redis_port = redis[1]
|
117
|
-
|
118
|
-
dm = get_dm()
|
119
|
-
|
120
|
-
yield dm
|
121
|
-
|
122
|
-
await clean_dm()
|
123
|
-
|
124
|
-
settings.dm_enabled = prev
|
125
|
-
|
126
|
-
|
127
|
-
@pytest.mark.asyncio
|
128
|
-
async def test_pg_driver(redis_dm, pg_storage_tus: PGBlobStore):
|
129
|
-
await storage_test(pg_storage_tus, PGFileStorageManager(pg_storage_tus))
|
130
|
-
|
131
|
-
|
132
|
-
@pytest.mark.asyncio
|
133
|
-
async def test_s3_driver(redis_dm, s3_storage_tus: S3BlobStore):
|
134
|
-
await storage_test(s3_storage_tus, S3FileStorageManager(s3_storage_tus))
|
135
|
-
|
136
|
-
|
137
|
-
@pytest.mark.asyncio
|
138
|
-
async def test_gcs_driver(redis_dm, gcs_storage_tus: GCloudBlobStore):
|
139
|
-
await storage_test(gcs_storage_tus, GCloudFileStorageManager(gcs_storage_tus))
|
140
|
-
|
141
|
-
|
142
|
-
@pytest.mark.asyncio
|
143
|
-
async def test_local_driver(local_storage_tus: LocalBlobStore):
|
144
|
-
settings.dm_enabled = False
|
145
|
-
await storage_test(local_storage_tus, LocalFileStorageManager(local_storage_tus))
|
146
|
-
settings.dm_enabled = True
|
147
|
-
|
148
|
-
|
149
|
-
async def storage_test(storage: BlobStore, file_storage_manager: FileStorageManager):
|
150
|
-
example = b"mytestinfo"
|
151
|
-
field = "myfield"
|
152
|
-
rid = "myrid"
|
153
|
-
kbid = "mykb_tus_test"
|
154
|
-
|
155
|
-
metadata: dict[str, str] = {}
|
156
|
-
bucket_name = storage.get_bucket_name(kbid)
|
157
|
-
assert bucket_name in [
|
158
|
-
"test_mykb_tus_test",
|
159
|
-
"test-mykb-tus-test",
|
160
|
-
"ndb_mykb_tus_test",
|
161
|
-
"mykb_tus_test",
|
162
|
-
]
|
163
|
-
|
164
|
-
if not isinstance(storage, PGBlobStore):
|
165
|
-
# this is silly, but we don't need this for pg
|
166
|
-
assert await storage.check_exists(bucket_name) is False
|
167
|
-
|
168
|
-
exists = await storage.create_bucket(bucket_name)
|
169
|
-
assert exists is False
|
170
|
-
|
171
|
-
upload_id = uuid.uuid4().hex
|
172
|
-
dm = get_dm()
|
173
|
-
await dm.load(upload_id)
|
174
|
-
await dm.start({})
|
175
|
-
await dm.update(
|
176
|
-
upload_file_id=f"{upload_id}",
|
177
|
-
rid=rid,
|
178
|
-
field=field,
|
179
|
-
metadata=metadata,
|
180
|
-
deferred_length=True,
|
181
|
-
offset=0,
|
182
|
-
item=None,
|
183
|
-
)
|
184
|
-
|
185
|
-
path = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, field=field)
|
186
|
-
await file_storage_manager.start(dm, path=path, kbid=kbid)
|
187
|
-
|
188
|
-
async def generate():
|
189
|
-
yield example
|
190
|
-
|
191
|
-
size = await file_storage_manager.append(dm, generate(), 0)
|
192
|
-
await dm.update(offset=size)
|
193
|
-
assert size == len(example)
|
194
|
-
await file_storage_manager.finish(dm)
|
195
|
-
|
196
|
-
async for data in file_storage_manager.read_range(path, kbid, 1, size):
|
197
|
-
assert data == example[1:]
|
198
|
-
|
199
|
-
await file_storage_manager.delete_upload(path, kbid)
|
200
|
-
|
201
|
-
with pytest.raises(CloudFileNotFound):
|
202
|
-
async for data in file_storage_manager.read_range(path, kbid, 1, size):
|
203
|
-
assert data == example[1:]
|
nucliadb/writer/tests/utils.py
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
import hashlib
|
21
|
-
from base64 import b64encode
|
22
|
-
from os.path import dirname
|
23
|
-
|
24
|
-
|
25
|
-
def load_file_as_FileB64_payload(f: str, content_type: str) -> dict:
|
26
|
-
file_location = f"{dirname(__file__)}/{f}"
|
27
|
-
filename = f.split("/")[-1]
|
28
|
-
data = b64encode(open(file_location, "rb").read())
|
29
|
-
|
30
|
-
return {
|
31
|
-
"filename": filename,
|
32
|
-
"content_type": content_type,
|
33
|
-
"payload": data.decode("utf-8"),
|
34
|
-
"md5": hashlib.md5(data).hexdigest(),
|
35
|
-
}
|
nucliadb/writer/tus/pg.py
DELETED
@@ -1,125 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from __future__ import annotations
|
21
|
-
|
22
|
-
from typing import AsyncIterator
|
23
|
-
|
24
|
-
import asyncpg
|
25
|
-
from nucliadb_protos.resources_pb2 import CloudFile
|
26
|
-
|
27
|
-
from nucliadb.writer.tus.dm import FileDataManager
|
28
|
-
from nucliadb.writer.tus.exceptions import CloudFileNotFound
|
29
|
-
from nucliadb.writer.tus.storage import BlobStore, FileStorageManager
|
30
|
-
from nucliadb_utils.storages import CHUNK_SIZE
|
31
|
-
from nucliadb_utils.storages.pg import PostgresFileDataLayer
|
32
|
-
|
33
|
-
|
34
|
-
class PGFileStorageManager(FileStorageManager):
|
35
|
-
_handler = None
|
36
|
-
storage: PGBlobStore
|
37
|
-
chunk_size = min_upload_size = CHUNK_SIZE
|
38
|
-
|
39
|
-
async def start(self, dm: FileDataManager, path: str, kbid: str):
|
40
|
-
bucket = self.storage.get_bucket_name(kbid)
|
41
|
-
|
42
|
-
async with self.storage.pool.acquire() as conn:
|
43
|
-
async with conn.transaction():
|
44
|
-
dl = PostgresFileDataLayer(conn)
|
45
|
-
if path is not None:
|
46
|
-
await dl.delete_file(kbid, path)
|
47
|
-
|
48
|
-
await dl.create_file(
|
49
|
-
kb_id=bucket,
|
50
|
-
file_id=path,
|
51
|
-
filename=dm.filename,
|
52
|
-
size=dm.size,
|
53
|
-
content_type=dm.content_type,
|
54
|
-
)
|
55
|
-
|
56
|
-
await dm.update(upload_file_id=path, path=path, bucket=bucket)
|
57
|
-
|
58
|
-
async def iter_data(self, uri, kbid: str, headers=None):
|
59
|
-
bucket = self.storage.get_bucket_name(kbid)
|
60
|
-
|
61
|
-
async with self.storage.pool.acquire() as conn:
|
62
|
-
dl = PostgresFileDataLayer(conn)
|
63
|
-
async for chunk in dl.iterate_chunks(bucket, uri):
|
64
|
-
yield chunk["data"]
|
65
|
-
|
66
|
-
async def read_range(
|
67
|
-
self, uri: str, kbid: str, start: int, end: int
|
68
|
-
) -> AsyncIterator[bytes]:
|
69
|
-
"""
|
70
|
-
Iterate through ranges of data
|
71
|
-
"""
|
72
|
-
bucket = self.storage.get_bucket_name(kbid)
|
73
|
-
|
74
|
-
async with self.storage.pool.acquire() as conn:
|
75
|
-
dl = PostgresFileDataLayer(conn)
|
76
|
-
file_info = await dl.get_file_info(kbid, uri)
|
77
|
-
if file_info is None:
|
78
|
-
raise CloudFileNotFound()
|
79
|
-
async for data in dl.iterate_range(
|
80
|
-
kb_id=bucket, file_id=uri, start=start, end=end
|
81
|
-
):
|
82
|
-
yield data
|
83
|
-
|
84
|
-
async def append(self, dm: FileDataManager, iterable, offset) -> int:
|
85
|
-
bucket = dm.get("bucket")
|
86
|
-
path = dm.get("path")
|
87
|
-
count = 0
|
88
|
-
async with self.storage.pool.acquire() as conn:
|
89
|
-
dl = PostgresFileDataLayer(conn)
|
90
|
-
async for chunk in iterable:
|
91
|
-
await dl.append_chunk(kb_id=bucket, file_id=path, data=chunk)
|
92
|
-
size = len(chunk)
|
93
|
-
count += size
|
94
|
-
offset += len(chunk)
|
95
|
-
return count
|
96
|
-
|
97
|
-
async def finish(self, dm: FileDataManager):
|
98
|
-
path = dm.get("path")
|
99
|
-
await dm.finish()
|
100
|
-
return path
|
101
|
-
|
102
|
-
async def delete_upload(self, uri: str, kbid: str):
|
103
|
-
async with self.storage.pool.acquire() as conn:
|
104
|
-
async with conn.transaction():
|
105
|
-
dl = PostgresFileDataLayer(conn)
|
106
|
-
await dl.delete_file(kbid, uri)
|
107
|
-
|
108
|
-
|
109
|
-
class PGBlobStore(BlobStore):
|
110
|
-
def __init__(self, dsn: str):
|
111
|
-
self.dsn = dsn
|
112
|
-
self.source = CloudFile.POSTGRES
|
113
|
-
|
114
|
-
async def initialize(self):
|
115
|
-
self.pool = await asyncpg.create_pool(self.dsn)
|
116
|
-
|
117
|
-
async def finalize(self):
|
118
|
-
await self.pool.close()
|
119
|
-
self.initialized = False
|
120
|
-
|
121
|
-
async def check_exists(self, bucket_name: str) -> bool:
|
122
|
-
return True
|
123
|
-
|
124
|
-
def get_bucket_name(self, kbid: str) -> str:
|
125
|
-
return kbid
|
@@ -1,135 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: nucliadb
|
3
|
-
Version: 4.0.0.post542
|
4
|
-
Home-page: https://docs.nuclia.dev/docs/guides/nucliadb/intro
|
5
|
-
Author: NucliaDB Community
|
6
|
-
Author-email: nucliadb@nuclia.com
|
7
|
-
License: BSD
|
8
|
-
Project-URL: Nuclia, https://nuclia.com
|
9
|
-
Project-URL: Github, https://github.com/nuclia/nucliadb
|
10
|
-
Project-URL: Discord, https://discord.gg/8EvQwmsbzf
|
11
|
-
Project-URL: API Reference, https://docs.nuclia.dev/docs/api
|
12
|
-
Keywords: search,semantic,AI
|
13
|
-
Classifier: Development Status :: 4 - Beta
|
14
|
-
Classifier: Intended Audience :: Developers
|
15
|
-
Classifier: Intended Audience :: Information Technology
|
16
|
-
Classifier: License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)
|
17
|
-
Classifier: Programming Language :: Python
|
18
|
-
Classifier: Programming Language :: Python :: 3.9
|
19
|
-
Classifier: Programming Language :: Python :: 3.10
|
20
|
-
Classifier: Programming Language :: Python :: 3.11
|
21
|
-
Classifier: Programming Language :: Python :: 3 :: Only
|
22
|
-
Requires-Python: >=3.9, <4
|
23
|
-
Description-Content-Type: text/markdown
|
24
|
-
Requires-Dist: nucliadb-telemetry[all] >=4.0.0.post542
|
25
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages] >=4.0.0.post542
|
26
|
-
Requires-Dist: nucliadb-protos >=4.0.0.post542
|
27
|
-
Requires-Dist: nucliadb-models >=4.0.0.post542
|
28
|
-
Requires-Dist: nucliadb-admin-assets >=1.0.0.post1224
|
29
|
-
Requires-Dist: nucliadb-node-binding >=2.26.0
|
30
|
-
Requires-Dist: uvicorn <0.19.0
|
31
|
-
Requires-Dist: argdantic
|
32
|
-
Requires-Dist: aiohttp >=3.9.4
|
33
|
-
Requires-Dist: lru-dict >=1.1.7
|
34
|
-
Requires-Dist: backoff
|
35
|
-
Requires-Dist: aiofiles >=0.8.0
|
36
|
-
Requires-Dist: psutil >=5.9.7
|
37
|
-
Requires-Dist: types-psutil >=5.9.5.17
|
38
|
-
Requires-Dist: types-aiofiles >=0.8.3
|
39
|
-
Requires-Dist: protobuf >=4.22.3
|
40
|
-
Requires-Dist: types-protobuf <5,>=4.24
|
41
|
-
Requires-Dist: grpcio <1.63.0,>=1.44.0
|
42
|
-
Requires-Dist: grpcio-health-checking <1.63.0,>=1.44.0
|
43
|
-
Requires-Dist: grpcio-channelz <1.63.0,>=1.44.0
|
44
|
-
Requires-Dist: grpcio-status <1.63.0,>=1.44.0
|
45
|
-
Requires-Dist: grpcio-tools <1.63.0,>=1.44.0
|
46
|
-
Requires-Dist: grpcio-testing <1.63.0,>=1.44.0
|
47
|
-
Requires-Dist: grpcio-reflection <1.63.0,>=1.44.0
|
48
|
-
Requires-Dist: orjson >=3.6.7
|
49
|
-
Requires-Dist: types-setuptools
|
50
|
-
Requires-Dist: pydantic >=2.7
|
51
|
-
Requires-Dist: pydantic-settings >=2.2
|
52
|
-
Requires-Dist: aiobotocore >=2.9.0
|
53
|
-
Requires-Dist: botocore >=1.34.0
|
54
|
-
Requires-Dist: google-cloud-storage
|
55
|
-
Requires-Dist: gcloud
|
56
|
-
Requires-Dist: oauth2client
|
57
|
-
Requires-Dist: jwcrypto >=1.5.6
|
58
|
-
Requires-Dist: fastapi-versioning >=0.10.0
|
59
|
-
Requires-Dist: fastapi >=0.95.2
|
60
|
-
Requires-Dist: sentry-sdk >=1.5.12
|
61
|
-
Requires-Dist: pyjwt >=2.4.0
|
62
|
-
Requires-Dist: mmh3 >=3.0.0
|
63
|
-
Requires-Dist: httpx >=0.23.0
|
64
|
-
Requires-Dist: types-pkg-resources >=0.1.3
|
65
|
-
Requires-Dist: grpc-stubs >=1.44.0
|
66
|
-
Requires-Dist: aiodns >=3.0.0
|
67
|
-
Requires-Dist: types-orjson
|
68
|
-
Requires-Dist: asyncpg >=0.27.0
|
69
|
-
Requires-Dist: tikv-client ==0.0.3
|
70
|
-
Requires-Dist: multidict >=6.0.4
|
71
|
-
Requires-Dist: deprecated >=1.2.12
|
72
|
-
Requires-Dist: asgiref >=3.3.2
|
73
|
-
Requires-Dist: jmespath >=1.0.0
|
74
|
-
Requires-Dist: idna >=3.3
|
75
|
-
Requires-Dist: sniffio >=1.2.0
|
76
|
-
Requires-Dist: async-lru >=2.0.4
|
77
|
-
Requires-Dist: async-timeout >=4.0.3
|
78
|
-
Requires-Dist: cachetools >=5.3.2
|
79
|
-
Requires-Dist: types-cachetools >=5.3.0.5
|
80
|
-
Requires-Dist: kubernetes-asyncio
|
81
|
-
Provides-Extra: redis
|
82
|
-
Requires-Dist: redis >=4.3.4 ; extra == 'redis'
|
83
|
-
|
84
|
-
# nucliadb
|
85
|
-
|
86
|
-
This module contains most of the Python components for NucliaDB:
|
87
|
-
|
88
|
-
- ingest
|
89
|
-
- reader
|
90
|
-
- writer
|
91
|
-
- search
|
92
|
-
- train
|
93
|
-
|
94
|
-
# NucliaDB Migrations
|
95
|
-
|
96
|
-
This module is used to manage NucliaDB Migrations.
|
97
|
-
|
98
|
-
All migrations will be provided in the `migrations` folder and have a filename
|
99
|
-
that follows the structure: `[sequence]_[migration name].py`.
|
100
|
-
Where `sequence` is the order the migration should be run in with zero padding.
|
101
|
-
Example: `0001_migrate_data.py`.
|
102
|
-
|
103
|
-
Each migration should have the following:
|
104
|
-
|
105
|
-
```python
|
106
|
-
from nucliadb.migrator.context import ExecutionContext
|
107
|
-
|
108
|
-
|
109
|
-
async def migrate(context: ExecutionContext) -> None:
|
110
|
-
"""
|
111
|
-
Non-kb type of migration. Migrate global data.
|
112
|
-
"""
|
113
|
-
|
114
|
-
|
115
|
-
async def migrate_kb(context: ExecutionContext, kbid: str) -> None:
|
116
|
-
"""
|
117
|
-
Migrate kb.
|
118
|
-
|
119
|
-
Must have both types of migrations.
|
120
|
-
"""
|
121
|
-
```
|
122
|
-
|
123
|
-
|
124
|
-
## How migrations are managed
|
125
|
-
|
126
|
-
- All migrations utilize a distributed lock to prevent simulateously running jobs
|
127
|
-
- Global migration state:
|
128
|
-
- current version
|
129
|
-
- target version
|
130
|
-
- KBs to migrate
|
131
|
-
- KB Migration State:
|
132
|
-
- current version
|
133
|
-
|
134
|
-
- Migrations are currently run with a deployment and will be continuously retried on failure.
|
135
|
-
- Running migrations in a deployment is to make sure a migration does not prevent code deployment.
|