nucliadb 2.46.1.post382__py3-none-any.whl → 6.2.1.post2777__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0002_rollover_shards.py +1 -2
- migrations/0003_allfields_key.py +2 -37
- migrations/0004_rollover_shards.py +1 -2
- migrations/0005_rollover_shards.py +1 -2
- migrations/0006_rollover_shards.py +2 -4
- migrations/0008_cleanup_leftover_rollover_metadata.py +1 -2
- migrations/0009_upgrade_relations_and_texts_to_v2.py +5 -4
- migrations/0010_fix_corrupt_indexes.py +11 -12
- migrations/0011_materialize_labelset_ids.py +2 -18
- migrations/0012_rollover_shards.py +6 -12
- migrations/0013_rollover_shards.py +2 -4
- migrations/0014_rollover_shards.py +5 -7
- migrations/0015_targeted_rollover.py +6 -12
- migrations/0016_upgrade_to_paragraphs_v2.py +27 -32
- migrations/0017_multiple_writable_shards.py +3 -6
- migrations/0018_purge_orphan_kbslugs.py +59 -0
- migrations/0019_upgrade_to_paragraphs_v3.py +66 -0
- migrations/0020_drain_nodes_from_cluster.py +83 -0
- nucliadb/standalone/tests/unit/test_run.py → migrations/0021_overwrite_vectorsets_key.py +17 -18
- nucliadb/tests/unit/test_openapi.py → migrations/0022_fix_paragraph_deletion_bug.py +16 -11
- migrations/0023_backfill_pg_catalog.py +80 -0
- migrations/0025_assign_models_to_kbs_v2.py +113 -0
- migrations/0026_fix_high_cardinality_content_types.py +61 -0
- migrations/0027_rollover_texts3.py +73 -0
- nucliadb/ingest/fields/date.py → migrations/pg/0001_bootstrap.py +10 -12
- migrations/pg/0002_catalog.py +42 -0
- nucliadb/ingest/tests/unit/test_settings.py → migrations/pg/0003_catalog_kbid_index.py +5 -3
- nucliadb/common/cluster/base.py +41 -24
- nucliadb/common/cluster/discovery/base.py +6 -14
- nucliadb/common/cluster/discovery/k8s.py +9 -19
- nucliadb/common/cluster/discovery/manual.py +1 -3
- nucliadb/common/cluster/discovery/single.py +1 -2
- nucliadb/common/cluster/discovery/utils.py +1 -3
- nucliadb/common/cluster/grpc_node_dummy.py +11 -16
- nucliadb/common/cluster/index_node.py +10 -19
- nucliadb/common/cluster/manager.py +223 -102
- nucliadb/common/cluster/rebalance.py +42 -37
- nucliadb/common/cluster/rollover.py +377 -204
- nucliadb/common/cluster/settings.py +16 -9
- nucliadb/common/cluster/standalone/grpc_node_binding.py +24 -76
- nucliadb/common/cluster/standalone/index_node.py +4 -11
- nucliadb/common/cluster/standalone/service.py +2 -6
- nucliadb/common/cluster/standalone/utils.py +9 -6
- nucliadb/common/cluster/utils.py +43 -29
- nucliadb/common/constants.py +20 -0
- nucliadb/common/context/__init__.py +6 -4
- nucliadb/common/context/fastapi.py +8 -5
- nucliadb/{tests/knowledgeboxes/__init__.py → common/counters.py} +8 -2
- nucliadb/common/datamanagers/__init__.py +24 -5
- nucliadb/common/datamanagers/atomic.py +102 -0
- nucliadb/common/datamanagers/cluster.py +5 -5
- nucliadb/common/datamanagers/entities.py +6 -16
- nucliadb/common/datamanagers/fields.py +84 -0
- nucliadb/common/datamanagers/kb.py +101 -24
- nucliadb/common/datamanagers/labels.py +26 -56
- nucliadb/common/datamanagers/processing.py +2 -6
- nucliadb/common/datamanagers/resources.py +214 -117
- nucliadb/common/datamanagers/rollover.py +77 -16
- nucliadb/{ingest/orm → common/datamanagers}/synonyms.py +16 -28
- nucliadb/common/datamanagers/utils.py +19 -11
- nucliadb/common/datamanagers/vectorsets.py +110 -0
- nucliadb/common/external_index_providers/base.py +257 -0
- nucliadb/{ingest/tests/unit/test_cache.py → common/external_index_providers/exceptions.py} +9 -8
- nucliadb/common/external_index_providers/manager.py +101 -0
- nucliadb/common/external_index_providers/pinecone.py +933 -0
- nucliadb/common/external_index_providers/settings.py +52 -0
- nucliadb/common/http_clients/auth.py +3 -6
- nucliadb/common/http_clients/processing.py +6 -11
- nucliadb/common/http_clients/utils.py +1 -3
- nucliadb/common/ids.py +240 -0
- nucliadb/common/locking.py +43 -13
- nucliadb/common/maindb/driver.py +11 -35
- nucliadb/common/maindb/exceptions.py +6 -6
- nucliadb/common/maindb/local.py +22 -9
- nucliadb/common/maindb/pg.py +206 -111
- nucliadb/common/maindb/utils.py +13 -44
- nucliadb/common/models_utils/from_proto.py +479 -0
- nucliadb/common/models_utils/to_proto.py +60 -0
- nucliadb/common/nidx.py +260 -0
- nucliadb/export_import/datamanager.py +25 -19
- nucliadb/export_import/exceptions.py +8 -0
- nucliadb/export_import/exporter.py +20 -7
- nucliadb/export_import/importer.py +6 -11
- nucliadb/export_import/models.py +5 -5
- nucliadb/export_import/tasks.py +4 -4
- nucliadb/export_import/utils.py +94 -54
- nucliadb/health.py +1 -3
- nucliadb/ingest/app.py +15 -11
- nucliadb/ingest/consumer/auditing.py +30 -147
- nucliadb/ingest/consumer/consumer.py +96 -52
- nucliadb/ingest/consumer/materializer.py +10 -12
- nucliadb/ingest/consumer/pull.py +12 -27
- nucliadb/ingest/consumer/service.py +20 -19
- nucliadb/ingest/consumer/shard_creator.py +7 -14
- nucliadb/ingest/consumer/utils.py +1 -3
- nucliadb/ingest/fields/base.py +139 -188
- nucliadb/ingest/fields/conversation.py +18 -5
- nucliadb/ingest/fields/exceptions.py +1 -4
- nucliadb/ingest/fields/file.py +7 -25
- nucliadb/ingest/fields/link.py +11 -16
- nucliadb/ingest/fields/text.py +9 -4
- nucliadb/ingest/orm/brain.py +255 -262
- nucliadb/ingest/orm/broker_message.py +181 -0
- nucliadb/ingest/orm/entities.py +36 -51
- nucliadb/ingest/orm/exceptions.py +12 -0
- nucliadb/ingest/orm/knowledgebox.py +334 -278
- nucliadb/ingest/orm/processor/__init__.py +2 -697
- nucliadb/ingest/orm/processor/auditing.py +117 -0
- nucliadb/ingest/orm/processor/data_augmentation.py +164 -0
- nucliadb/ingest/orm/processor/pgcatalog.py +84 -0
- nucliadb/ingest/orm/processor/processor.py +752 -0
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -1
- nucliadb/ingest/orm/resource.py +280 -520
- nucliadb/ingest/orm/utils.py +25 -31
- nucliadb/ingest/partitions.py +3 -9
- nucliadb/ingest/processing.py +76 -81
- nucliadb/ingest/py.typed +0 -0
- nucliadb/ingest/serialize.py +37 -173
- nucliadb/ingest/service/__init__.py +1 -3
- nucliadb/ingest/service/writer.py +186 -577
- nucliadb/ingest/settings.py +13 -22
- nucliadb/ingest/utils.py +3 -6
- nucliadb/learning_proxy.py +264 -51
- nucliadb/metrics_exporter.py +30 -19
- nucliadb/middleware/__init__.py +1 -3
- nucliadb/migrator/command.py +1 -3
- nucliadb/migrator/datamanager.py +13 -13
- nucliadb/migrator/migrator.py +57 -37
- nucliadb/migrator/settings.py +2 -1
- nucliadb/migrator/utils.py +18 -10
- nucliadb/purge/__init__.py +139 -33
- nucliadb/purge/orphan_shards.py +7 -13
- nucliadb/reader/__init__.py +1 -3
- nucliadb/reader/api/models.py +3 -14
- nucliadb/reader/api/v1/__init__.py +0 -1
- nucliadb/reader/api/v1/download.py +27 -94
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +13 -13
- nucliadb/reader/api/v1/learning_config.py +8 -12
- nucliadb/reader/api/v1/resource.py +67 -93
- nucliadb/reader/api/v1/services.py +70 -125
- nucliadb/reader/app.py +16 -46
- nucliadb/reader/lifecycle.py +18 -4
- nucliadb/reader/py.typed +0 -0
- nucliadb/reader/reader/notifications.py +10 -31
- nucliadb/search/__init__.py +1 -3
- nucliadb/search/api/v1/__init__.py +2 -2
- nucliadb/search/api/v1/ask.py +112 -0
- nucliadb/search/api/v1/catalog.py +184 -0
- nucliadb/search/api/v1/feedback.py +17 -25
- nucliadb/search/api/v1/find.py +41 -41
- nucliadb/search/api/v1/knowledgebox.py +90 -62
- nucliadb/search/api/v1/predict_proxy.py +2 -2
- nucliadb/search/api/v1/resource/ask.py +66 -117
- nucliadb/search/api/v1/resource/search.py +51 -72
- nucliadb/search/api/v1/router.py +1 -0
- nucliadb/search/api/v1/search.py +50 -197
- nucliadb/search/api/v1/suggest.py +40 -54
- nucliadb/search/api/v1/summarize.py +9 -5
- nucliadb/search/api/v1/utils.py +2 -1
- nucliadb/search/app.py +16 -48
- nucliadb/search/lifecycle.py +10 -3
- nucliadb/search/predict.py +176 -188
- nucliadb/search/py.typed +0 -0
- nucliadb/search/requesters/utils.py +41 -63
- nucliadb/search/search/cache.py +149 -20
- nucliadb/search/search/chat/ask.py +918 -0
- nucliadb/search/{tests/unit/test_run.py → search/chat/exceptions.py} +14 -13
- nucliadb/search/search/chat/images.py +41 -17
- nucliadb/search/search/chat/prompt.py +851 -282
- nucliadb/search/search/chat/query.py +274 -267
- nucliadb/{writer/resource/slug.py → search/search/cut.py} +8 -6
- nucliadb/search/search/fetch.py +43 -36
- nucliadb/search/search/filters.py +9 -15
- nucliadb/search/search/find.py +214 -54
- nucliadb/search/search/find_merge.py +408 -391
- nucliadb/search/search/hydrator.py +191 -0
- nucliadb/search/search/merge.py +198 -234
- nucliadb/search/search/metrics.py +73 -2
- nucliadb/search/search/paragraphs.py +64 -106
- nucliadb/search/search/pgcatalog.py +233 -0
- nucliadb/search/search/predict_proxy.py +1 -1
- nucliadb/search/search/query.py +386 -257
- nucliadb/search/search/query_parser/exceptions.py +22 -0
- nucliadb/search/search/query_parser/models.py +101 -0
- nucliadb/search/search/query_parser/parser.py +183 -0
- nucliadb/search/search/rank_fusion.py +204 -0
- nucliadb/search/search/rerankers.py +270 -0
- nucliadb/search/search/shards.py +4 -38
- nucliadb/search/search/summarize.py +14 -18
- nucliadb/search/search/utils.py +27 -4
- nucliadb/search/settings.py +15 -1
- nucliadb/standalone/api_router.py +4 -10
- nucliadb/standalone/app.py +17 -14
- nucliadb/standalone/auth.py +7 -21
- nucliadb/standalone/config.py +9 -12
- nucliadb/standalone/introspect.py +5 -5
- nucliadb/standalone/lifecycle.py +26 -25
- nucliadb/standalone/migrations.py +58 -0
- nucliadb/standalone/purge.py +9 -8
- nucliadb/standalone/py.typed +0 -0
- nucliadb/standalone/run.py +25 -18
- nucliadb/standalone/settings.py +10 -14
- nucliadb/standalone/versions.py +15 -5
- nucliadb/tasks/consumer.py +8 -12
- nucliadb/tasks/producer.py +7 -6
- nucliadb/tests/config.py +53 -0
- nucliadb/train/__init__.py +1 -3
- nucliadb/train/api/utils.py +1 -2
- nucliadb/train/api/v1/shards.py +2 -2
- nucliadb/train/api/v1/trainset.py +4 -6
- nucliadb/train/app.py +14 -47
- nucliadb/train/generator.py +10 -19
- nucliadb/train/generators/field_classifier.py +7 -19
- nucliadb/train/generators/field_streaming.py +156 -0
- nucliadb/train/generators/image_classifier.py +12 -18
- nucliadb/train/generators/paragraph_classifier.py +5 -9
- nucliadb/train/generators/paragraph_streaming.py +6 -9
- nucliadb/train/generators/question_answer_streaming.py +19 -20
- nucliadb/train/generators/sentence_classifier.py +9 -15
- nucliadb/train/generators/token_classifier.py +45 -36
- nucliadb/train/generators/utils.py +14 -18
- nucliadb/train/lifecycle.py +7 -3
- nucliadb/train/nodes.py +23 -32
- nucliadb/train/py.typed +0 -0
- nucliadb/train/servicer.py +13 -21
- nucliadb/train/settings.py +2 -6
- nucliadb/train/types.py +13 -10
- nucliadb/train/upload.py +3 -6
- nucliadb/train/uploader.py +20 -25
- nucliadb/train/utils.py +1 -1
- nucliadb/writer/__init__.py +1 -3
- nucliadb/writer/api/constants.py +0 -5
- nucliadb/{ingest/fields/keywordset.py → writer/api/utils.py} +13 -10
- nucliadb/writer/api/v1/export_import.py +102 -49
- nucliadb/writer/api/v1/field.py +196 -620
- nucliadb/writer/api/v1/knowledgebox.py +221 -71
- nucliadb/writer/api/v1/learning_config.py +2 -2
- nucliadb/writer/api/v1/resource.py +114 -216
- nucliadb/writer/api/v1/services.py +64 -132
- nucliadb/writer/api/v1/slug.py +61 -0
- nucliadb/writer/api/v1/transaction.py +67 -0
- nucliadb/writer/api/v1/upload.py +184 -215
- nucliadb/writer/app.py +11 -61
- nucliadb/writer/back_pressure.py +62 -43
- nucliadb/writer/exceptions.py +0 -4
- nucliadb/writer/lifecycle.py +21 -15
- nucliadb/writer/py.typed +0 -0
- nucliadb/writer/resource/audit.py +2 -1
- nucliadb/writer/resource/basic.py +48 -62
- nucliadb/writer/resource/field.py +45 -135
- nucliadb/writer/resource/origin.py +1 -2
- nucliadb/writer/settings.py +14 -5
- nucliadb/writer/tus/__init__.py +17 -15
- nucliadb/writer/tus/azure.py +111 -0
- nucliadb/writer/tus/dm.py +17 -5
- nucliadb/writer/tus/exceptions.py +1 -3
- nucliadb/writer/tus/gcs.py +56 -84
- nucliadb/writer/tus/local.py +21 -37
- nucliadb/writer/tus/s3.py +28 -68
- nucliadb/writer/tus/storage.py +5 -56
- nucliadb/writer/vectorsets.py +125 -0
- nucliadb-6.2.1.post2777.dist-info/METADATA +148 -0
- nucliadb-6.2.1.post2777.dist-info/RECORD +343 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/WHEEL +1 -1
- nucliadb/common/maindb/redis.py +0 -194
- nucliadb/common/maindb/tikv.py +0 -412
- nucliadb/ingest/fields/layout.py +0 -58
- nucliadb/ingest/tests/conftest.py +0 -30
- nucliadb/ingest/tests/fixtures.py +0 -771
- nucliadb/ingest/tests/integration/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/integration/consumer/test_auditing.py +0 -80
- nucliadb/ingest/tests/integration/consumer/test_materializer.py +0 -89
- nucliadb/ingest/tests/integration/consumer/test_pull.py +0 -144
- nucliadb/ingest/tests/integration/consumer/test_service.py +0 -81
- nucliadb/ingest/tests/integration/consumer/test_shard_creator.py +0 -68
- nucliadb/ingest/tests/integration/ingest/test_ingest.py +0 -691
- nucliadb/ingest/tests/integration/ingest/test_processing_engine.py +0 -95
- nucliadb/ingest/tests/integration/ingest/test_relations.py +0 -272
- nucliadb/ingest/tests/unit/consumer/__init__.py +0 -18
- nucliadb/ingest/tests/unit/consumer/test_auditing.py +0 -140
- nucliadb/ingest/tests/unit/consumer/test_consumer.py +0 -69
- nucliadb/ingest/tests/unit/consumer/test_pull.py +0 -60
- nucliadb/ingest/tests/unit/consumer/test_shard_creator.py +0 -139
- nucliadb/ingest/tests/unit/consumer/test_utils.py +0 -67
- nucliadb/ingest/tests/unit/orm/__init__.py +0 -19
- nucliadb/ingest/tests/unit/orm/test_brain.py +0 -247
- nucliadb/ingest/tests/unit/orm/test_processor.py +0 -131
- nucliadb/ingest/tests/unit/orm/test_resource.py +0 -275
- nucliadb/ingest/tests/unit/test_partitions.py +0 -40
- nucliadb/ingest/tests/unit/test_processing.py +0 -171
- nucliadb/middleware/transaction.py +0 -117
- nucliadb/reader/api/v1/learning_collector.py +0 -63
- nucliadb/reader/tests/__init__.py +0 -19
- nucliadb/reader/tests/conftest.py +0 -31
- nucliadb/reader/tests/fixtures.py +0 -136
- nucliadb/reader/tests/test_list_resources.py +0 -75
- nucliadb/reader/tests/test_reader_file_download.py +0 -273
- nucliadb/reader/tests/test_reader_resource.py +0 -379
- nucliadb/reader/tests/test_reader_resource_field.py +0 -219
- nucliadb/search/api/v1/chat.py +0 -258
- nucliadb/search/api/v1/resource/chat.py +0 -94
- nucliadb/search/tests/__init__.py +0 -19
- nucliadb/search/tests/conftest.py +0 -33
- nucliadb/search/tests/fixtures.py +0 -199
- nucliadb/search/tests/node.py +0 -465
- nucliadb/search/tests/unit/__init__.py +0 -18
- nucliadb/search/tests/unit/api/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/__init__.py +0 -19
- nucliadb/search/tests/unit/api/v1/resource/test_ask.py +0 -67
- nucliadb/search/tests/unit/api/v1/resource/test_chat.py +0 -97
- nucliadb/search/tests/unit/api/v1/test_chat.py +0 -96
- nucliadb/search/tests/unit/api/v1/test_predict_proxy.py +0 -98
- nucliadb/search/tests/unit/api/v1/test_summarize.py +0 -93
- nucliadb/search/tests/unit/search/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/__init__.py +0 -18
- nucliadb/search/tests/unit/search/requesters/test_utils.py +0 -210
- nucliadb/search/tests/unit/search/search/__init__.py +0 -19
- nucliadb/search/tests/unit/search/search/test_shards.py +0 -45
- nucliadb/search/tests/unit/search/search/test_utils.py +0 -82
- nucliadb/search/tests/unit/search/test_chat_prompt.py +0 -266
- nucliadb/search/tests/unit/search/test_fetch.py +0 -108
- nucliadb/search/tests/unit/search/test_filters.py +0 -125
- nucliadb/search/tests/unit/search/test_paragraphs.py +0 -157
- nucliadb/search/tests/unit/search/test_predict_proxy.py +0 -106
- nucliadb/search/tests/unit/search/test_query.py +0 -201
- nucliadb/search/tests/unit/test_app.py +0 -79
- nucliadb/search/tests/unit/test_find_merge.py +0 -112
- nucliadb/search/tests/unit/test_merge.py +0 -34
- nucliadb/search/tests/unit/test_predict.py +0 -584
- nucliadb/standalone/tests/__init__.py +0 -19
- nucliadb/standalone/tests/conftest.py +0 -33
- nucliadb/standalone/tests/fixtures.py +0 -38
- nucliadb/standalone/tests/unit/__init__.py +0 -18
- nucliadb/standalone/tests/unit/test_api_router.py +0 -61
- nucliadb/standalone/tests/unit/test_auth.py +0 -169
- nucliadb/standalone/tests/unit/test_introspect.py +0 -35
- nucliadb/standalone/tests/unit/test_versions.py +0 -68
- nucliadb/tests/benchmarks/__init__.py +0 -19
- nucliadb/tests/benchmarks/test_search.py +0 -99
- nucliadb/tests/conftest.py +0 -32
- nucliadb/tests/fixtures.py +0 -736
- nucliadb/tests/knowledgeboxes/philosophy_books.py +0 -203
- nucliadb/tests/knowledgeboxes/ten_dummy_resources.py +0 -109
- nucliadb/tests/migrations/__init__.py +0 -19
- nucliadb/tests/migrations/test_migration_0017.py +0 -80
- nucliadb/tests/tikv.py +0 -240
- nucliadb/tests/unit/__init__.py +0 -19
- nucliadb/tests/unit/common/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/__init__.py +0 -19
- nucliadb/tests/unit/common/cluster/discovery/test_k8s.py +0 -170
- nucliadb/tests/unit/common/cluster/standalone/__init__.py +0 -18
- nucliadb/tests/unit/common/cluster/standalone/test_service.py +0 -113
- nucliadb/tests/unit/common/cluster/standalone/test_utils.py +0 -59
- nucliadb/tests/unit/common/cluster/test_cluster.py +0 -399
- nucliadb/tests/unit/common/cluster/test_kb_shard_manager.py +0 -178
- nucliadb/tests/unit/common/cluster/test_rollover.py +0 -279
- nucliadb/tests/unit/common/maindb/__init__.py +0 -18
- nucliadb/tests/unit/common/maindb/test_driver.py +0 -127
- nucliadb/tests/unit/common/maindb/test_tikv.py +0 -53
- nucliadb/tests/unit/common/maindb/test_utils.py +0 -81
- nucliadb/tests/unit/common/test_context.py +0 -36
- nucliadb/tests/unit/export_import/__init__.py +0 -19
- nucliadb/tests/unit/export_import/test_datamanager.py +0 -37
- nucliadb/tests/unit/export_import/test_utils.py +0 -294
- nucliadb/tests/unit/migrator/__init__.py +0 -19
- nucliadb/tests/unit/migrator/test_migrator.py +0 -87
- nucliadb/tests/unit/tasks/__init__.py +0 -19
- nucliadb/tests/unit/tasks/conftest.py +0 -42
- nucliadb/tests/unit/tasks/test_consumer.py +0 -93
- nucliadb/tests/unit/tasks/test_producer.py +0 -95
- nucliadb/tests/unit/tasks/test_tasks.py +0 -60
- nucliadb/tests/unit/test_field_ids.py +0 -49
- nucliadb/tests/unit/test_health.py +0 -84
- nucliadb/tests/unit/test_kb_slugs.py +0 -54
- nucliadb/tests/unit/test_learning_proxy.py +0 -252
- nucliadb/tests/unit/test_metrics_exporter.py +0 -77
- nucliadb/tests/unit/test_purge.py +0 -138
- nucliadb/tests/utils/__init__.py +0 -74
- nucliadb/tests/utils/aiohttp_session.py +0 -44
- nucliadb/tests/utils/broker_messages/__init__.py +0 -167
- nucliadb/tests/utils/broker_messages/fields.py +0 -181
- nucliadb/tests/utils/broker_messages/helpers.py +0 -33
- nucliadb/tests/utils/entities.py +0 -78
- nucliadb/train/api/v1/check.py +0 -60
- nucliadb/train/tests/__init__.py +0 -19
- nucliadb/train/tests/conftest.py +0 -29
- nucliadb/train/tests/fixtures.py +0 -342
- nucliadb/train/tests/test_field_classification.py +0 -122
- nucliadb/train/tests/test_get_entities.py +0 -80
- nucliadb/train/tests/test_get_info.py +0 -51
- nucliadb/train/tests/test_get_ontology.py +0 -34
- nucliadb/train/tests/test_get_ontology_count.py +0 -63
- nucliadb/train/tests/test_image_classification.py +0 -222
- nucliadb/train/tests/test_list_fields.py +0 -39
- nucliadb/train/tests/test_list_paragraphs.py +0 -73
- nucliadb/train/tests/test_list_resources.py +0 -39
- nucliadb/train/tests/test_list_sentences.py +0 -71
- nucliadb/train/tests/test_paragraph_classification.py +0 -123
- nucliadb/train/tests/test_paragraph_streaming.py +0 -118
- nucliadb/train/tests/test_question_answer_streaming.py +0 -239
- nucliadb/train/tests/test_sentence_classification.py +0 -143
- nucliadb/train/tests/test_token_classification.py +0 -136
- nucliadb/train/tests/utils.py +0 -108
- nucliadb/writer/layouts/__init__.py +0 -51
- nucliadb/writer/layouts/v1.py +0 -59
- nucliadb/writer/resource/vectors.py +0 -120
- nucliadb/writer/tests/__init__.py +0 -19
- nucliadb/writer/tests/conftest.py +0 -31
- nucliadb/writer/tests/fixtures.py +0 -192
- nucliadb/writer/tests/test_fields.py +0 -486
- nucliadb/writer/tests/test_files.py +0 -743
- nucliadb/writer/tests/test_knowledgebox.py +0 -49
- nucliadb/writer/tests/test_reprocess_file_field.py +0 -139
- nucliadb/writer/tests/test_resources.py +0 -546
- nucliadb/writer/tests/test_service.py +0 -137
- nucliadb/writer/tests/test_tus.py +0 -203
- nucliadb/writer/tests/utils.py +0 -35
- nucliadb/writer/tus/pg.py +0 -125
- nucliadb-2.46.1.post382.dist-info/METADATA +0 -134
- nucliadb-2.46.1.post382.dist-info/RECORD +0 -451
- {nucliadb/ingest/tests → migrations/pg}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration → common/external_index_providers}/__init__.py +0 -0
- /nucliadb/{ingest/tests/integration/ingest → common/models_utils}/__init__.py +0 -0
- /nucliadb/{ingest/tests/unit → search/search/query_parser}/__init__.py +0 -0
- /nucliadb/{ingest/tests → tests}/vectors.py +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/entry_points.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/top_level.txt +0 -0
- {nucliadb-2.46.1.post382.dist-info → nucliadb-6.2.1.post2777.dist-info}/zip-safe +0 -0
@@ -1,136 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
import asyncio
|
21
|
-
|
22
|
-
import aiohttp
|
23
|
-
import pytest
|
24
|
-
from nucliadb_protos.dataset_pb2 import TaskType, TokenClassificationBatch, TrainSet
|
25
|
-
from nucliadb_protos.resources_pb2 import Position
|
26
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
27
|
-
from nucliadb_protos.writer_pb2_grpc import WriterStub
|
28
|
-
|
29
|
-
from nucliadb.tests.utils import inject_message
|
30
|
-
from nucliadb.tests.utils.broker_messages import BrokerMessageBuilder, FieldBuilder
|
31
|
-
from nucliadb.train import API_PREFIX
|
32
|
-
from nucliadb.train.api.v1.router import KB_PREFIX
|
33
|
-
from nucliadb.train.tests.utils import get_batches_from_train_response_stream
|
34
|
-
from nucliadb_protos import resources_pb2 as rpb
|
35
|
-
|
36
|
-
|
37
|
-
@pytest.mark.asyncio
|
38
|
-
@pytest.mark.parametrize("knowledgebox", ["STABLE", "EXPERIMENTAL"], indirect=True)
|
39
|
-
async def test_generator_token_classification(
|
40
|
-
train_rest_api: aiohttp.ClientSession,
|
41
|
-
knowledgebox_with_entities: str,
|
42
|
-
nucliadb_grpc: WriterStub,
|
43
|
-
):
|
44
|
-
kbid = knowledgebox_with_entities
|
45
|
-
|
46
|
-
await inject_resource_with_token_classification(kbid, nucliadb_grpc)
|
47
|
-
|
48
|
-
async with train_rest_api.get(
|
49
|
-
f"/{API_PREFIX}/v1/{KB_PREFIX}/{kbid}/trainset"
|
50
|
-
) as partitions:
|
51
|
-
assert partitions.status == 200
|
52
|
-
data = await partitions.json()
|
53
|
-
assert len(data["partitions"]) == 1
|
54
|
-
partition_id = data["partitions"][0]
|
55
|
-
|
56
|
-
trainset = TrainSet()
|
57
|
-
trainset.type = TaskType.TOKEN_CLASSIFICATION
|
58
|
-
trainset.batch_size = 2
|
59
|
-
trainset.filter.labels.append("PERSON")
|
60
|
-
trainset.filter.labels.append("ORG")
|
61
|
-
async with train_rest_api.post(
|
62
|
-
f"/{API_PREFIX}/v1/{KB_PREFIX}/{kbid}/trainset/{partition_id}",
|
63
|
-
data=trainset.SerializeToString(),
|
64
|
-
) as response:
|
65
|
-
assert response.status == 200
|
66
|
-
batches: list[TokenClassificationBatch] = []
|
67
|
-
async for batch in get_batches_from_train_response_stream(
|
68
|
-
response, TokenClassificationBatch
|
69
|
-
):
|
70
|
-
batches.append(batch)
|
71
|
-
|
72
|
-
for batch in batches:
|
73
|
-
if batch.data[0].token == "Eudald":
|
74
|
-
assert batch.data[0].label == "B-PERSON"
|
75
|
-
assert batch.data[1].label == "I-PERSON"
|
76
|
-
assert batch.data[2].label == "O"
|
77
|
-
if batch.data[0].token == "This":
|
78
|
-
assert batch.data[4].label == "B-PERSON"
|
79
|
-
assert batch.data[5].label == "I-PERSON"
|
80
|
-
if batch.data[0].token == "Where":
|
81
|
-
assert batch.data[3].label == "B-ORG"
|
82
|
-
assert batch.data[4].label == "I-ORG"
|
83
|
-
assert batch.data[5].label == "I-ORG"
|
84
|
-
if batch.data[0].token == "Summary":
|
85
|
-
assert batch.data[2].label == "B-ORG"
|
86
|
-
assert batch.data[4].label == "B-ORG"
|
87
|
-
if batch.data[0].token == "My":
|
88
|
-
assert batch.data[3].label == "B-PERSON"
|
89
|
-
assert batch.data[12].label == "B-ORG"
|
90
|
-
|
91
|
-
|
92
|
-
async def inject_resource_with_token_classification(knowledgebox, writer):
|
93
|
-
bm = broker_resource(knowledgebox)
|
94
|
-
await inject_message(writer, bm)
|
95
|
-
await asyncio.sleep(0.1)
|
96
|
-
return bm.uuid
|
97
|
-
|
98
|
-
|
99
|
-
def broker_resource(knowledgebox: str) -> BrokerMessage:
|
100
|
-
bmb = BrokerMessageBuilder(kbid=knowledgebox)
|
101
|
-
|
102
|
-
bmb.with_title("This is a bird, its a plane, no, its el Super Fran")
|
103
|
-
title_field = bmb.field_builder("title", rpb.FieldType.GENERIC)
|
104
|
-
title_field.with_extracted_entity(
|
105
|
-
"PERSON", "el Super Fran", positions=[Position(start=37, end=50)]
|
106
|
-
)
|
107
|
-
|
108
|
-
bmb.with_summary("Summary of Nuclia using Debian")
|
109
|
-
summary_field = bmb.field_builder("summary", rpb.FieldType.GENERIC)
|
110
|
-
summary_field.with_extracted_entity(
|
111
|
-
"ORG", "Nuclia", positions=[Position(start=11, end=17)]
|
112
|
-
)
|
113
|
-
summary_field.with_extracted_entity(
|
114
|
-
"ORG", "Debian", positions=[Position(start=24, end=30)]
|
115
|
-
)
|
116
|
-
|
117
|
-
file_field = FieldBuilder("file", rpb.FieldType.FILE)
|
118
|
-
file_field.with_extracted_text(
|
119
|
-
"My own text Ramon. This is great to be at Nuclia. \n Where is the Generalitat de Catalunya? Eudald Camprubi, do you want to go shooping? This is a test Carmen Iniesta!" # noqa
|
120
|
-
)
|
121
|
-
file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=0, end=49))
|
122
|
-
file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=50, end=90))
|
123
|
-
file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=91, end=135))
|
124
|
-
file_field.with_extracted_paragraph_metadata(rpb.Paragraph(start=136, end=166))
|
125
|
-
|
126
|
-
file_field.with_user_entity("PERSON", "Ramon", start=12, end=17)
|
127
|
-
file_field.with_user_entity("ORG", "Nuclia", start=42, end=48)
|
128
|
-
file_field.with_user_entity("ORG", "Generalitat de Catalunya", start=65, end=89)
|
129
|
-
file_field.with_user_entity("PERSON", "Eudald", start=91, end=106)
|
130
|
-
file_field.with_user_entity("PERSON", "Carmen Iniesta", start=151, end=165)
|
131
|
-
|
132
|
-
bmb.add_field_builder(file_field)
|
133
|
-
|
134
|
-
bm = bmb.build()
|
135
|
-
|
136
|
-
return bm
|
nucliadb/train/tests/utils.py
DELETED
@@ -1,108 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
|
20
|
-
from typing import AsyncGenerator, overload
|
21
|
-
|
22
|
-
import aiohttp
|
23
|
-
from nucliadb_protos.dataset_pb2 import (
|
24
|
-
FieldClassificationBatch,
|
25
|
-
ImageClassificationBatch,
|
26
|
-
ParagraphClassificationBatch,
|
27
|
-
ParagraphStreamingBatch,
|
28
|
-
QuestionAnswerStreamingBatch,
|
29
|
-
SentenceClassificationBatch,
|
30
|
-
TokenClassificationBatch,
|
31
|
-
)
|
32
|
-
|
33
|
-
from nucliadb.train.types import TrainBatch, TrainBatchType
|
34
|
-
|
35
|
-
# NOTE: we use def instead of async def to make mypy happy. Otherwise, it
|
36
|
-
# considers the overloaded functions as corountines returning async iterators
|
37
|
-
# instead of async iterators themselves and complains about it
|
38
|
-
|
39
|
-
|
40
|
-
@overload
|
41
|
-
def get_batches_from_train_response_stream(
|
42
|
-
response: aiohttp.ClientResponse,
|
43
|
-
pb_klass: type[FieldClassificationBatch],
|
44
|
-
) -> AsyncGenerator[FieldClassificationBatch, None]:
|
45
|
-
...
|
46
|
-
|
47
|
-
|
48
|
-
@overload
|
49
|
-
def get_batches_from_train_response_stream(
|
50
|
-
response: aiohttp.ClientResponse,
|
51
|
-
pb_klass: type[ImageClassificationBatch],
|
52
|
-
) -> AsyncGenerator[ImageClassificationBatch, None]:
|
53
|
-
...
|
54
|
-
|
55
|
-
|
56
|
-
@overload
|
57
|
-
def get_batches_from_train_response_stream(
|
58
|
-
response: aiohttp.ClientResponse,
|
59
|
-
pb_klass: type[ParagraphClassificationBatch],
|
60
|
-
) -> AsyncGenerator[ParagraphClassificationBatch, None]:
|
61
|
-
...
|
62
|
-
|
63
|
-
|
64
|
-
@overload
|
65
|
-
def get_batches_from_train_response_stream(
|
66
|
-
response: aiohttp.ClientResponse,
|
67
|
-
pb_klass: type[ParagraphStreamingBatch],
|
68
|
-
) -> AsyncGenerator[ParagraphStreamingBatch, None]:
|
69
|
-
...
|
70
|
-
|
71
|
-
|
72
|
-
@overload
|
73
|
-
def get_batches_from_train_response_stream(
|
74
|
-
response: aiohttp.ClientResponse,
|
75
|
-
pb_klass: type[QuestionAnswerStreamingBatch],
|
76
|
-
) -> AsyncGenerator[QuestionAnswerStreamingBatch, None]:
|
77
|
-
...
|
78
|
-
|
79
|
-
|
80
|
-
@overload
|
81
|
-
def get_batches_from_train_response_stream(
|
82
|
-
response: aiohttp.ClientResponse,
|
83
|
-
pb_klass: type[SentenceClassificationBatch],
|
84
|
-
) -> AsyncGenerator[SentenceClassificationBatch, None]:
|
85
|
-
...
|
86
|
-
|
87
|
-
|
88
|
-
@overload
|
89
|
-
def get_batches_from_train_response_stream(
|
90
|
-
response: aiohttp.ClientResponse,
|
91
|
-
pb_klass: type[TokenClassificationBatch],
|
92
|
-
) -> AsyncGenerator[TokenClassificationBatch, None]:
|
93
|
-
...
|
94
|
-
|
95
|
-
|
96
|
-
async def get_batches_from_train_response_stream(
|
97
|
-
response: aiohttp.ClientResponse,
|
98
|
-
pb_klass: TrainBatchType,
|
99
|
-
) -> AsyncGenerator[TrainBatch, None]:
|
100
|
-
while True:
|
101
|
-
header = await response.content.read(4)
|
102
|
-
if header == b"":
|
103
|
-
break
|
104
|
-
payload_size = int.from_bytes(header, byteorder="big", signed=False)
|
105
|
-
payload = await response.content.read(payload_size)
|
106
|
-
batch = pb_klass()
|
107
|
-
batch.ParseFromString(payload)
|
108
|
-
yield batch
|
@@ -1,51 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from typing import Any, Callable, Coroutine
|
21
|
-
|
22
|
-
from nucliadb_protos.resources_pb2 import FieldLayout
|
23
|
-
|
24
|
-
import nucliadb_models as models
|
25
|
-
from nucliadb_utils.storages.storage import Storage
|
26
|
-
|
27
|
-
VERSION: dict[
|
28
|
-
int,
|
29
|
-
Callable[
|
30
|
-
[models.InputLayoutField, str, str, str, Storage],
|
31
|
-
Coroutine[Any, Any, FieldLayout],
|
32
|
-
],
|
33
|
-
] = {}
|
34
|
-
|
35
|
-
import nucliadb.writer.layouts.v1 # noqa isort:skip
|
36
|
-
|
37
|
-
|
38
|
-
async def serialize_blocks(
|
39
|
-
layout_field: models.InputLayoutField,
|
40
|
-
kbid: str,
|
41
|
-
uuid: str,
|
42
|
-
field: str,
|
43
|
-
storage: Storage,
|
44
|
-
) -> FieldLayout:
|
45
|
-
if layout_field.format in VERSION:
|
46
|
-
layout = await VERSION[layout_field.format](
|
47
|
-
layout_field, kbid, uuid, field, storage
|
48
|
-
)
|
49
|
-
else:
|
50
|
-
raise KeyError("Invalid version")
|
51
|
-
return layout
|
nucliadb/writer/layouts/v1.py
DELETED
@@ -1,59 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
from nucliadb_protos.resources_pb2 import Block as PBBlock
|
21
|
-
from nucliadb_protos.resources_pb2 import FieldLayout
|
22
|
-
|
23
|
-
import nucliadb_models as models
|
24
|
-
from nucliadb.writer.layouts import VERSION
|
25
|
-
from nucliadb_utils.storages.storage import Storage
|
26
|
-
|
27
|
-
|
28
|
-
async def serialize_block(
|
29
|
-
layout_field: models.InputLayoutField,
|
30
|
-
kbid: str,
|
31
|
-
uuid: str,
|
32
|
-
field: str,
|
33
|
-
storage: Storage,
|
34
|
-
) -> FieldLayout:
|
35
|
-
pblayout = FieldLayout()
|
36
|
-
for key, block in layout_field.body.blocks.items():
|
37
|
-
pbblock = PBBlock()
|
38
|
-
pbblock.x = block.x
|
39
|
-
pbblock.y = block.y
|
40
|
-
pbblock.cols = block.cols
|
41
|
-
pbblock.rows = block.rows
|
42
|
-
pbblock.type = PBBlock.TypeBlock.Value(block.type)
|
43
|
-
pbblock.ident = block.ident if block.ident else key
|
44
|
-
pbblock.payload = block.payload
|
45
|
-
|
46
|
-
sf = storage.layout_field(kbid, uuid, field, key)
|
47
|
-
await storage.upload_b64file_to_cloudfile(
|
48
|
-
sf,
|
49
|
-
block.file.payload.encode(),
|
50
|
-
block.file.filename,
|
51
|
-
block.file.content_type,
|
52
|
-
block.file.md5,
|
53
|
-
)
|
54
|
-
pblayout.body.blocks[key].CopyFrom(pbblock)
|
55
|
-
pblayout.format = FieldLayout.Format.Value(layout_field.format.value)
|
56
|
-
return pblayout
|
57
|
-
|
58
|
-
|
59
|
-
VERSION[models.LayoutFormat.NUCLIAv1] = serialize_block
|
@@ -1,120 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
from typing import Optional
|
20
|
-
|
21
|
-
from fastapi import HTTPException
|
22
|
-
from nucliadb_protos.resources_pb2 import UserVectorsWrapper
|
23
|
-
from nucliadb_protos.writer_pb2 import (
|
24
|
-
BrokerMessage,
|
25
|
-
GetVectorSetsRequest,
|
26
|
-
GetVectorSetsResponse,
|
27
|
-
OpStatusWriter,
|
28
|
-
SetVectorSetRequest,
|
29
|
-
VectorSets,
|
30
|
-
)
|
31
|
-
|
32
|
-
from nucliadb_models.common import FIELD_TYPES_MAP_REVERSE
|
33
|
-
from nucliadb_models.vectors import UserVectorsWrapper as UserVectorsWrapperPy
|
34
|
-
from nucliadb_models.vectors import VectorSimilarity
|
35
|
-
from nucliadb_utils.utilities import get_ingest
|
36
|
-
|
37
|
-
|
38
|
-
async def create_vectorset(
|
39
|
-
kbid: str,
|
40
|
-
vectorset: str,
|
41
|
-
dimension: Optional[int] = None,
|
42
|
-
similarity: Optional[VectorSimilarity] = None,
|
43
|
-
):
|
44
|
-
ingest = get_ingest()
|
45
|
-
pbrequest: SetVectorSetRequest = SetVectorSetRequest(id=vectorset)
|
46
|
-
pbrequest.kb.uuid = kbid
|
47
|
-
|
48
|
-
if dimension is not None:
|
49
|
-
pbrequest.vectorset.dimension = dimension
|
50
|
-
if similarity:
|
51
|
-
pbrequest.vectorset.similarity = similarity.to_pb()
|
52
|
-
|
53
|
-
status: OpStatusWriter = await ingest.SetVectorSet(pbrequest) # type: ignore
|
54
|
-
if status.status == OpStatusWriter.Status.OK:
|
55
|
-
return None
|
56
|
-
elif status.status == OpStatusWriter.Status.NOTFOUND:
|
57
|
-
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
58
|
-
elif status.status == OpStatusWriter.Status.ERROR:
|
59
|
-
raise HTTPException(
|
60
|
-
status_code=500, detail="Error on settings labels on a Knowledge box"
|
61
|
-
)
|
62
|
-
|
63
|
-
|
64
|
-
async def get_vectorsets(kbid: str) -> Optional[VectorSets]:
|
65
|
-
ingest = get_ingest()
|
66
|
-
pbrequest: GetVectorSetsRequest = GetVectorSetsRequest()
|
67
|
-
pbrequest.kb.uuid = kbid
|
68
|
-
|
69
|
-
vectorsets: GetVectorSetsResponse = await ingest.GetVectorSets(pbrequest) # type: ignore
|
70
|
-
if vectorsets.status == GetVectorSetsResponse.Status.OK:
|
71
|
-
return vectorsets.vectorsets
|
72
|
-
else:
|
73
|
-
return None
|
74
|
-
|
75
|
-
|
76
|
-
def parse_vectors(
|
77
|
-
writer: BrokerMessage, vectors: UserVectorsWrapperPy, vectorsets: VectorSets
|
78
|
-
):
|
79
|
-
for vector in vectors:
|
80
|
-
evw = UserVectorsWrapper()
|
81
|
-
evw.field.field_type = FIELD_TYPES_MAP_REVERSE[
|
82
|
-
vector.field.field_type.value
|
83
|
-
] # type: ignore
|
84
|
-
evw.field.field = vector.field.field
|
85
|
-
if vector.vectors is not None:
|
86
|
-
for vectorset, user_vectors in vector.vectors.items():
|
87
|
-
if vectorset not in vectorsets.vectorsets:
|
88
|
-
raise HTTPException(
|
89
|
-
status_code=412,
|
90
|
-
detail=str(f"Invalid vectorset"),
|
91
|
-
)
|
92
|
-
else:
|
93
|
-
dimension = vectorsets.vectorsets[vectorset].dimension
|
94
|
-
for key, user_vector in user_vectors.items():
|
95
|
-
vo = evw.vectors.vectors[vectorset].vectors[key]
|
96
|
-
if len(user_vector.vector) == dimension:
|
97
|
-
if user_vector.vector.count(0) == dimension:
|
98
|
-
raise HTTPException(
|
99
|
-
status_code=412,
|
100
|
-
detail=str(f"Invalid vector should not be 0"),
|
101
|
-
)
|
102
|
-
vo.vector.extend(user_vector.vector)
|
103
|
-
else:
|
104
|
-
raise HTTPException(
|
105
|
-
status_code=412,
|
106
|
-
detail=str(
|
107
|
-
f"Invalid dimension should be {dimension} was {len(user_vector.vector)}"
|
108
|
-
),
|
109
|
-
)
|
110
|
-
if user_vector.positions is not None:
|
111
|
-
vo.start = user_vector.positions[0]
|
112
|
-
vo.end = user_vector.positions[1]
|
113
|
-
|
114
|
-
if vector.vectors_to_delete is not None:
|
115
|
-
for vectorset, user_vector_list in vector.vectors_to_delete.items():
|
116
|
-
evw.vectors_to_delete[vectorset].vectors.extend(
|
117
|
-
user_vector_list.vectors
|
118
|
-
)
|
119
|
-
|
120
|
-
writer.user_vectors.append(evw)
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
@@ -1,31 +0,0 @@
|
|
1
|
-
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
-
#
|
3
|
-
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
-
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
-
#
|
6
|
-
# AGPL:
|
7
|
-
# This program is free software: you can redistribute it and/or modify
|
8
|
-
# it under the terms of the GNU Affero General Public License as
|
9
|
-
# published by the Free Software Foundation, either version 3 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# This program is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
-
# GNU Affero General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU Affero General Public License
|
18
|
-
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
-
#
|
20
|
-
pytest_plugins = [
|
21
|
-
"pytest_mock",
|
22
|
-
"pytest_docker_fixtures",
|
23
|
-
"nucliadb_utils.tests.nats",
|
24
|
-
"nucliadb.tests.fixtures",
|
25
|
-
"nucliadb.tests.tikv",
|
26
|
-
"nucliadb.ingest.tests.fixtures", # should be refactored out
|
27
|
-
"nucliadb.writer.tests.fixtures",
|
28
|
-
"nucliadb_utils.tests.conftest",
|
29
|
-
"nucliadb_utils.tests.gcs",
|
30
|
-
"nucliadb_utils.tests.s3",
|
31
|
-
]
|