nucliadb 6.7.2.post4874__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +8 -4
- migrations/0028_extracted_vectors_reference.py +1 -1
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +8 -4
- migrations/0039_backfill_converation_splits_metadata.py +106 -0
- migrations/0040_migrate_search_configurations.py +79 -0
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/__init__.py +79 -0
- nucliadb/common/catalog/dummy.py +36 -0
- nucliadb/common/catalog/interface.py +85 -0
- nucliadb/{search/search/pgcatalog.py → common/catalog/pg.py} +330 -232
- nucliadb/common/catalog/utils.py +56 -0
- nucliadb/common/cluster/manager.py +8 -23
- nucliadb/common/cluster/rebalance.py +484 -112
- nucliadb/common/cluster/rollover.py +36 -9
- nucliadb/common/cluster/settings.py +4 -9
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +9 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +5 -34
- nucliadb/common/external_index_providers/settings.py +1 -27
- nucliadb/common/filter_expression.py +129 -41
- nucliadb/common/http_clients/exceptions.py +8 -0
- nucliadb/common/http_clients/processing.py +16 -23
- nucliadb/common/http_clients/utils.py +3 -0
- nucliadb/common/ids.py +82 -58
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +22 -5
- nucliadb/common/vector_index_config.py +1 -1
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +10 -8
- nucliadb/ingest/consumer/service.py +5 -30
- nucliadb/ingest/consumer/shard_creator.py +16 -5
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +37 -49
- nucliadb/ingest/fields/conversation.py +55 -9
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +89 -57
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +128 -113
- nucliadb/ingest/orm/knowledgebox.py +91 -59
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +98 -153
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +82 -71
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/partitions.py +12 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +15 -114
- nucliadb/ingest/settings.py +36 -15
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +23 -26
- nucliadb/metrics_exporter.py +20 -6
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +4 -11
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/knowledgebox.py +37 -9
- nucliadb/reader/api/v1/learning_config.py +33 -14
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +3 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +15 -19
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +328 -0
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +28 -8
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +33 -19
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -42
- nucliadb/search/search/chat/ask.py +131 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +453 -32
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +49 -0
- nucliadb/search/search/hydrator/fields.py +217 -0
- nucliadb/search/search/hydrator/images.py +130 -0
- nucliadb/search/search/hydrator/paragraphs.py +323 -0
- nucliadb/search/search/hydrator/resources.py +60 -0
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +24 -7
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +44 -18
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -48
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +5 -6
- nucliadb/search/search/query_parser/parsers/catalog.py +7 -11
- nucliadb/search/search/query_parser/parsers/common.py +21 -13
- nucliadb/search/search/query_parser/parsers/find.py +6 -29
- nucliadb/search/search/query_parser/parsers/graph.py +18 -28
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -56
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +6 -7
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +5 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +4 -10
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +15 -14
- nucliadb/writer/api/v1/knowledgebox.py +18 -56
- nucliadb/writer/api/v1/learning_config.py +5 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +43 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +5 -7
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +15 -22
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +10 -11
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb/common/external_index_providers/pinecone.py +0 -894
- nucliadb/ingest/orm/processor/pgcatalog.py +0 -129
- nucliadb/search/search/hydrator.py +0 -197
- nucliadb-6.7.2.post4874.dist-info/RECORD +0 -383
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.7.2.post4874.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import AsyncGenerator
|
|
22
22
|
|
|
23
23
|
from nucliadb.train.generators.utils import batchify
|
|
24
24
|
from nucliadb_models.filters import FilterExpression
|
|
@@ -33,7 +33,7 @@ def image_classification_batch_generator(
|
|
|
33
33
|
kbid: str,
|
|
34
34
|
trainset: TrainSet,
|
|
35
35
|
shard_replica_id: str,
|
|
36
|
-
filter_expression:
|
|
36
|
+
filter_expression: FilterExpression | None,
|
|
37
37
|
) -> AsyncGenerator[ImageClassificationBatch, None]:
|
|
38
38
|
generator = generate_image_classification_payloads(kbid, trainset, shard_replica_id)
|
|
39
39
|
batch_generator = batchify(generator, trainset.batch_size, ImageClassificationBatch)
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import AsyncGenerator
|
|
22
22
|
|
|
23
23
|
from fastapi import HTTPException
|
|
24
24
|
from nidx_protos.nodereader_pb2 import StreamRequest
|
|
@@ -38,7 +38,7 @@ def paragraph_classification_batch_generator(
|
|
|
38
38
|
kbid: str,
|
|
39
39
|
trainset: TrainSet,
|
|
40
40
|
shard_replica_id: str,
|
|
41
|
-
filter_expression:
|
|
41
|
+
filter_expression: FilterExpression | None,
|
|
42
42
|
) -> AsyncGenerator[ParagraphClassificationBatch, None]:
|
|
43
43
|
if len(trainset.filter.labels) != 1:
|
|
44
44
|
raise HTTPException(
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import AsyncGenerator
|
|
22
22
|
|
|
23
23
|
from nidx_protos.nodereader_pb2 import StreamRequest
|
|
24
24
|
|
|
@@ -38,7 +38,7 @@ def paragraph_streaming_batch_generator(
|
|
|
38
38
|
kbid: str,
|
|
39
39
|
trainset: TrainSet,
|
|
40
40
|
shard_replica_id: str,
|
|
41
|
-
filter_expression:
|
|
41
|
+
filter_expression: FilterExpression | None,
|
|
42
42
|
) -> AsyncGenerator[ParagraphStreamingBatch, None]:
|
|
43
43
|
generator = generate_paragraph_streaming_payloads(kbid, trainset, shard_replica_id)
|
|
44
44
|
batch_generator = batchify(generator, trainset.batch_size, ParagraphStreamingBatch)
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import AsyncGenerator
|
|
22
22
|
|
|
23
23
|
from nidx_protos.nodereader_pb2 import StreamRequest
|
|
24
24
|
|
|
@@ -47,7 +47,7 @@ def question_answer_batch_generator(
|
|
|
47
47
|
kbid: str,
|
|
48
48
|
trainset: TrainSet,
|
|
49
49
|
shard_replica_id: str,
|
|
50
|
-
filter_expression:
|
|
50
|
+
filter_expression: FilterExpression | None,
|
|
51
51
|
) -> AsyncGenerator[QuestionAnswerStreamingBatch, None]:
|
|
52
52
|
generator = generate_question_answer_streaming_payloads(kbid, trainset, shard_replica_id)
|
|
53
53
|
batch_generator = batchify(generator, trainset.batch_size, QuestionAnswerStreamingBatch)
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import AsyncGenerator
|
|
22
22
|
|
|
23
23
|
from fastapi import HTTPException
|
|
24
24
|
from nidx_protos.nodereader_pb2 import StreamRequest
|
|
@@ -40,7 +40,7 @@ def sentence_classification_batch_generator(
|
|
|
40
40
|
kbid: str,
|
|
41
41
|
trainset: TrainSet,
|
|
42
42
|
shard_replica_id: str,
|
|
43
|
-
filter_expression:
|
|
43
|
+
filter_expression: FilterExpression | None,
|
|
44
44
|
) -> AsyncGenerator[SentenceClassificationBatch, None]:
|
|
45
45
|
if len(trainset.filter.labels) == 0:
|
|
46
46
|
raise HTTPException(
|
|
@@ -116,10 +116,7 @@ async def get_sentences(kbid: str, result: str) -> list[str]:
|
|
|
116
116
|
if split is not None:
|
|
117
117
|
text = extracted_text.split_text[split]
|
|
118
118
|
for paragraph in field_metadata.split_metadata[split].paragraphs:
|
|
119
|
-
|
|
120
|
-
key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
|
|
121
|
-
else:
|
|
122
|
-
key = paragraph.key
|
|
119
|
+
key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
|
|
123
120
|
if key == result:
|
|
124
121
|
for sentence in paragraph.sentences:
|
|
125
122
|
splitted_text = text[sentence.start : sentence.end]
|
|
@@ -127,10 +124,7 @@ async def get_sentences(kbid: str, result: str) -> list[str]:
|
|
|
127
124
|
else:
|
|
128
125
|
text = extracted_text.text
|
|
129
126
|
for paragraph in field_metadata.metadata.paragraphs:
|
|
130
|
-
|
|
131
|
-
key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
|
|
132
|
-
else:
|
|
133
|
-
key = paragraph.key
|
|
127
|
+
key = f"{rid}/{field_type}/{field}/{paragraph.start}-{paragraph.end}"
|
|
134
128
|
if key == result:
|
|
135
129
|
for sentence in paragraph.sentences:
|
|
136
130
|
splitted_text = text[sentence.start : sentence.end]
|
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
#
|
|
20
20
|
|
|
21
21
|
from collections import OrderedDict
|
|
22
|
-
from
|
|
22
|
+
from collections.abc import AsyncGenerator
|
|
23
|
+
from typing import cast
|
|
23
24
|
|
|
24
25
|
from nidx_protos.nodereader_pb2 import StreamFilter, StreamRequest
|
|
25
26
|
|
|
@@ -43,7 +44,7 @@ def token_classification_batch_generator(
|
|
|
43
44
|
kbid: str,
|
|
44
45
|
trainset: TrainSet,
|
|
45
46
|
shard_replica_id: str,
|
|
46
|
-
filter_expression:
|
|
47
|
+
filter_expression: FilterExpression | None,
|
|
47
48
|
) -> AsyncGenerator[TokenClassificationBatch, None]:
|
|
48
49
|
generator = generate_token_classification_payloads(kbid, trainset, shard_replica_id)
|
|
49
50
|
batch_generator = batchify(generator, trainset.batch_size, TokenClassificationBatch)
|
|
@@ -18,7 +18,8 @@
|
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
20
|
|
|
21
|
-
from
|
|
21
|
+
from collections.abc import AsyncGenerator, AsyncIterator
|
|
22
|
+
from typing import Any
|
|
22
23
|
|
|
23
24
|
from nucliadb.common.cache import get_resource_cache
|
|
24
25
|
from nucliadb.common.ids import FIELD_TYPE_STR_TO_PB
|
|
@@ -30,16 +31,16 @@ from nucliadb.train.types import T
|
|
|
30
31
|
from nucliadb_utils.utilities import get_storage
|
|
31
32
|
|
|
32
33
|
|
|
33
|
-
async def get_resource_from_cache_or_db(kbid: str, uuid: str) ->
|
|
34
|
+
async def get_resource_from_cache_or_db(kbid: str, uuid: str) -> ResourceORM | None:
|
|
34
35
|
resource_cache = get_resource_cache()
|
|
35
36
|
if resource_cache is None:
|
|
36
|
-
return await _get_resource_from_db(kbid, uuid)
|
|
37
37
|
logger.warning("Resource cache is not set")
|
|
38
|
+
return await _get_resource_from_db(kbid, uuid)
|
|
38
39
|
|
|
39
40
|
return await resource_cache.get(kbid, uuid)
|
|
40
41
|
|
|
41
42
|
|
|
42
|
-
async def _get_resource_from_db(kbid: str, uuid: str) ->
|
|
43
|
+
async def _get_resource_from_db(kbid: str, uuid: str) -> ResourceORM | None:
|
|
43
44
|
storage = await get_storage(service_name=SERVICE_NAME)
|
|
44
45
|
async with get_driver().ro_transaction() as transaction:
|
|
45
46
|
kb = KnowledgeBoxORM(transaction, storage, kbid)
|
|
@@ -81,7 +82,7 @@ async def get_paragraph(kbid: str, paragraph_id: str) -> str:
|
|
|
81
82
|
|
|
82
83
|
|
|
83
84
|
async def batchify(
|
|
84
|
-
producer: AsyncIterator[Any], size: int, batch_klass:
|
|
85
|
+
producer: AsyncIterator[Any], size: int, batch_klass: type[T]
|
|
85
86
|
) -> AsyncGenerator[T, None]:
|
|
86
87
|
# NOTE: we are supposing all protobuffers have a data field
|
|
87
88
|
batch = []
|
nucliadb/train/nodes.py
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from
|
|
20
|
+
from collections.abc import AsyncIterator
|
|
21
21
|
|
|
22
22
|
from nucliadb.common import datamanagers
|
|
23
23
|
from nucliadb.common.cluster import manager
|
|
@@ -62,7 +62,7 @@ class TrainShardManager(manager.KBShardManager):
|
|
|
62
62
|
|
|
63
63
|
return shard_object.nidx_shard_id
|
|
64
64
|
|
|
65
|
-
async def get_kb_obj(self, txn: Transaction, kbid: str) ->
|
|
65
|
+
async def get_kb_obj(self, txn: Transaction, kbid: str) -> KnowledgeBox | None:
|
|
66
66
|
if kbid is None:
|
|
67
67
|
return None
|
|
68
68
|
|
|
@@ -72,7 +72,7 @@ class TrainShardManager(manager.KBShardManager):
|
|
|
72
72
|
kbobj = KnowledgeBox(txn, self.storage, kbid)
|
|
73
73
|
return kbobj
|
|
74
74
|
|
|
75
|
-
async def get_kb_entities_manager(self, txn: Transaction, kbid: str) ->
|
|
75
|
+
async def get_kb_entities_manager(self, txn: Transaction, kbid: str) -> EntitiesManager | None:
|
|
76
76
|
kbobj = await self.get_kb_obj(txn, kbid)
|
|
77
77
|
if kbobj is None:
|
|
78
78
|
return None
|
nucliadb/train/resource.py
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
|
-
from
|
|
22
|
+
from collections.abc import AsyncIterator, MutableMapping
|
|
23
23
|
|
|
24
24
|
from nucliadb.common import datamanagers
|
|
25
25
|
from nucliadb.ingest.orm.resource import Resource
|
|
@@ -69,9 +69,7 @@ async def iterate_sentences(
|
|
|
69
69
|
# return any
|
|
70
70
|
vectorset_id = None
|
|
71
71
|
async with datamanagers.with_ro_transaction() as txn:
|
|
72
|
-
async for vectorset_id, vs in datamanagers.vectorsets.iter(
|
|
73
|
-
txn=txn, kbid=resource.kb.kbid
|
|
74
|
-
):
|
|
72
|
+
async for vectorset_id, vs in datamanagers.vectorsets.iter(txn=txn, kbid=resource.kbid):
|
|
75
73
|
break
|
|
76
74
|
assert vectorset_id is not None, "All KBs must have at least a vectorset"
|
|
77
75
|
vo = await field.get_vectors(vectorset_id, vs.storage_key_kind)
|
|
@@ -81,7 +79,7 @@ async def iterate_sentences(
|
|
|
81
79
|
if fm is None:
|
|
82
80
|
continue
|
|
83
81
|
|
|
84
|
-
field_metadatas: list[tuple[
|
|
82
|
+
field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
|
|
85
83
|
for subfield_metadata, splitted_metadata in fm.split_metadata.items():
|
|
86
84
|
field_metadatas.append((subfield_metadata, splitted_metadata))
|
|
87
85
|
|
|
@@ -188,7 +186,7 @@ async def iterate_paragraphs(
|
|
|
188
186
|
if fm is None:
|
|
189
187
|
continue
|
|
190
188
|
|
|
191
|
-
field_metadatas: list[tuple[
|
|
189
|
+
field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
|
|
192
190
|
for subfield_metadata, splitted_metadata in fm.split_metadata.items():
|
|
193
191
|
field_metadatas.append((subfield_metadata, splitted_metadata))
|
|
194
192
|
|
|
@@ -264,7 +262,7 @@ async def iterate_fields(
|
|
|
264
262
|
if fm is None:
|
|
265
263
|
continue
|
|
266
264
|
|
|
267
|
-
field_metadatas: list[tuple[
|
|
265
|
+
field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
|
|
268
266
|
for subfield_metadata, splitted_metadata in fm.split_metadata.items():
|
|
269
267
|
field_metadatas.append((subfield_metadata, splitted_metadata))
|
|
270
268
|
|
|
@@ -319,7 +317,7 @@ async def generate_train_resource(
|
|
|
319
317
|
if fm is None:
|
|
320
318
|
continue
|
|
321
319
|
|
|
322
|
-
field_metadatas: list[tuple[
|
|
320
|
+
field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
|
|
323
321
|
for subfield_metadata, splitted_metadata in fm.split_metadata.items():
|
|
324
322
|
field_metadatas.append((subfield_metadata, splitted_metadata))
|
|
325
323
|
|
nucliadb/train/settings.py
CHANGED
|
@@ -17,17 +17,16 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
from nucliadb.ingest.settings import DriverSettings
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
class Settings(DriverSettings):
|
|
26
25
|
grpc_port: int = 8031
|
|
27
|
-
train_grpc_address:
|
|
26
|
+
train_grpc_address: str | None = None
|
|
28
27
|
|
|
29
|
-
nuclia_learning_url:
|
|
30
|
-
nuclia_learning_apikey:
|
|
28
|
+
nuclia_learning_url: str | None = "https://nuclia.cloud/api/v1/learning/"
|
|
29
|
+
nuclia_learning_apikey: str | None = None
|
|
31
30
|
|
|
32
31
|
internal_counter_api: str = "http://search.nuclia.svc.cluster.local:8030/api/v1/kb/{kbid}/counters"
|
|
33
32
|
|
nucliadb/train/types.py
CHANGED
|
@@ -17,20 +17,20 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import TypeVar
|
|
20
|
+
from typing import TypeVar
|
|
21
21
|
|
|
22
22
|
from nucliadb_protos import dataset_pb2 as dpb
|
|
23
23
|
|
|
24
|
-
TrainBatch =
|
|
25
|
-
dpb.FieldClassificationBatch
|
|
26
|
-
dpb.ImageClassificationBatch
|
|
27
|
-
dpb.ParagraphClassificationBatch
|
|
28
|
-
dpb.ParagraphStreamingBatch
|
|
29
|
-
dpb.QuestionAnswerStreamingBatch
|
|
30
|
-
dpb.SentenceClassificationBatch
|
|
31
|
-
dpb.TokenClassificationBatch
|
|
32
|
-
dpb.FieldStreamingBatch
|
|
33
|
-
|
|
24
|
+
TrainBatch = (
|
|
25
|
+
dpb.FieldClassificationBatch
|
|
26
|
+
| dpb.ImageClassificationBatch
|
|
27
|
+
| dpb.ParagraphClassificationBatch
|
|
28
|
+
| dpb.ParagraphStreamingBatch
|
|
29
|
+
| dpb.QuestionAnswerStreamingBatch
|
|
30
|
+
| dpb.SentenceClassificationBatch
|
|
31
|
+
| dpb.TokenClassificationBatch
|
|
32
|
+
| dpb.FieldStreamingBatch
|
|
33
|
+
)
|
|
34
34
|
|
|
35
35
|
T = TypeVar(
|
|
36
36
|
"T",
|
nucliadb/train/upload.py
CHANGED
|
@@ -20,8 +20,9 @@
|
|
|
20
20
|
import argparse
|
|
21
21
|
import asyncio
|
|
22
22
|
import importlib.metadata
|
|
23
|
+
import inspect
|
|
23
24
|
from asyncio import tasks
|
|
24
|
-
from
|
|
25
|
+
from collections.abc import Callable
|
|
25
26
|
|
|
26
27
|
from nucliadb.train.uploader import start_upload
|
|
27
28
|
from nucliadb_telemetry import errors
|
|
@@ -89,7 +90,7 @@ def run() -> None:
|
|
|
89
90
|
finally:
|
|
90
91
|
try:
|
|
91
92
|
for finalizer in finalizers:
|
|
92
|
-
if
|
|
93
|
+
if inspect.iscoroutinefunction(finalizer):
|
|
93
94
|
loop.run_until_complete(finalizer())
|
|
94
95
|
else:
|
|
95
96
|
finalizer()
|
nucliadb/train/uploader.py
CHANGED
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
import aiohttp
|
|
23
22
|
|
|
@@ -97,7 +96,7 @@ class UploadServicer:
|
|
|
97
96
|
response.status = GetLabelsResponse.Status.NOTFOUND
|
|
98
97
|
return response
|
|
99
98
|
response.kb.uuid = kbid
|
|
100
|
-
labels:
|
|
99
|
+
labels: Labels | None = await datamanagers.atomic.labelset.get_all(kbid=kbid)
|
|
101
100
|
if labels is not None:
|
|
102
101
|
response.labels.CopyFrom(labels)
|
|
103
102
|
return response
|
nucliadb/train/utils.py
CHANGED
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
from grpc import aio
|
|
23
22
|
from grpc_health.v1 import health, health_pb2_grpc
|
|
@@ -37,7 +36,7 @@ from nucliadb_utils.utilities import (
|
|
|
37
36
|
)
|
|
38
37
|
|
|
39
38
|
|
|
40
|
-
async def start_train_grpc(service_name:
|
|
39
|
+
async def start_train_grpc(service_name: str | None = None):
|
|
41
40
|
actual_service = get_utility(Utility.TRAIN)
|
|
42
41
|
if actual_service is not None:
|
|
43
42
|
return
|
|
@@ -17,8 +17,8 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
+
from collections.abc import AsyncGenerator
|
|
20
21
|
from datetime import datetime
|
|
21
|
-
from typing import AsyncGenerator
|
|
22
22
|
from uuid import uuid4
|
|
23
23
|
|
|
24
24
|
from fastapi_versioning import version
|
|
@@ -66,6 +66,7 @@ from nucliadb_utils.authentication import requires_one
|
|
|
66
66
|
summary="Start an export of a Knowledge Box",
|
|
67
67
|
tags=["Knowledge Boxes"],
|
|
68
68
|
response_model=CreateExportResponse,
|
|
69
|
+
include_in_schema=False,
|
|
69
70
|
)
|
|
70
71
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
|
|
71
72
|
@version(1)
|
|
@@ -91,6 +92,7 @@ async def start_kb_export_endpoint(request: Request, kbid: str):
|
|
|
91
92
|
tags=["Knowledge Boxes"],
|
|
92
93
|
response_model=NewImportedKbResponse,
|
|
93
94
|
openapi_extra={"x-hidden-operation": True},
|
|
95
|
+
include_in_schema=False,
|
|
94
96
|
)
|
|
95
97
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
|
|
96
98
|
@version(1)
|
|
@@ -140,6 +142,7 @@ async def kb_create_and_import_endpoint(request: Request):
|
|
|
140
142
|
summary="Start an import to a Knowledge Box",
|
|
141
143
|
tags=["Knowledge Boxes"],
|
|
142
144
|
response_model=CreateImportResponse,
|
|
145
|
+
include_in_schema=False,
|
|
143
146
|
)
|
|
144
147
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
|
|
145
148
|
@version(1)
|
nucliadb/writer/api/v1/field.py
CHANGED
|
@@ -17,8 +17,9 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
+
from collections.abc import Callable
|
|
20
21
|
from inspect import iscoroutinefunction
|
|
21
|
-
from typing import TYPE_CHECKING, Annotated
|
|
22
|
+
from typing import TYPE_CHECKING, Annotated
|
|
22
23
|
|
|
23
24
|
import pydantic
|
|
24
25
|
from fastapi import HTTPException, Query, Response
|
|
@@ -72,12 +73,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
72
73
|
else:
|
|
73
74
|
FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP: dict[models.FieldTypeName, int]
|
|
74
75
|
|
|
75
|
-
FieldModelType =
|
|
76
|
-
models.TextField,
|
|
77
|
-
models.LinkField,
|
|
78
|
-
models.InputConversationField,
|
|
79
|
-
models.FileField,
|
|
80
|
-
]
|
|
76
|
+
FieldModelType = models.TextField | models.LinkField | models.InputConversationField | models.FileField
|
|
81
77
|
|
|
82
78
|
FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP = {
|
|
83
79
|
models.FieldTypeName.FILE: resources_pb2.FieldType.FILE,
|
|
@@ -249,9 +245,10 @@ async def parse_conversation_field_adapter(
|
|
|
249
245
|
writer: BrokerMessage,
|
|
250
246
|
toprocess: PushPayload,
|
|
251
247
|
resource_classifications: ResourceClassifications,
|
|
248
|
+
replace_field: bool = False,
|
|
252
249
|
):
|
|
253
250
|
return await parse_conversation_field(
|
|
254
|
-
field_id, field_payload, writer, toprocess, kbid, rid, resource_classifications
|
|
251
|
+
field_id, field_payload, writer, toprocess, kbid, rid, resource_classifications, replace_field
|
|
255
252
|
)
|
|
256
253
|
|
|
257
254
|
|
|
@@ -277,7 +274,7 @@ async def parse_file_field_adapter(
|
|
|
277
274
|
)
|
|
278
275
|
|
|
279
276
|
|
|
280
|
-
FIELD_PARSERS_MAP: dict[
|
|
277
|
+
FIELD_PARSERS_MAP: dict[type, Callable] = {
|
|
281
278
|
models.TextField: parse_text_field_adapter,
|
|
282
279
|
models.LinkField: parse_link_field_adapter,
|
|
283
280
|
models.InputConversationField: parse_conversation_field_adapter,
|
|
@@ -380,7 +377,9 @@ async def add_resource_field_conversation_rslug_prefix(
|
|
|
380
377
|
field_id: FieldIdString,
|
|
381
378
|
field_payload: models.InputConversationField,
|
|
382
379
|
) -> ResourceFieldAdded:
|
|
383
|
-
return await add_field_to_resource_by_slug(
|
|
380
|
+
return await add_field_to_resource_by_slug(
|
|
381
|
+
request, kbid, rslug, field_id, field_payload, replace_field=True
|
|
382
|
+
)
|
|
384
383
|
|
|
385
384
|
|
|
386
385
|
@api.put(
|
|
@@ -399,7 +398,7 @@ async def add_resource_field_conversation_rid_prefix(
|
|
|
399
398
|
field_id: FieldIdString,
|
|
400
399
|
field_payload: models.InputConversationField,
|
|
401
400
|
) -> ResourceFieldAdded:
|
|
402
|
-
return await add_field_to_resource(request, kbid, rid, field_id, field_payload)
|
|
401
|
+
return await add_field_to_resource(request, kbid, rid, field_id, field_payload, replace_field=True)
|
|
403
402
|
|
|
404
403
|
|
|
405
404
|
@api.put(
|
|
@@ -466,7 +465,9 @@ async def append_messages_to_conversation_field_rslug_prefix(
|
|
|
466
465
|
field = models.InputConversationField(messages=messages)
|
|
467
466
|
except pydantic.ValidationError as e:
|
|
468
467
|
raise HTTPException(status_code=422, detail=str(e))
|
|
469
|
-
return await add_field_to_resource_by_slug(
|
|
468
|
+
return await add_field_to_resource_by_slug(
|
|
469
|
+
request, kbid, rslug, field_id, field, replace_field=False
|
|
470
|
+
)
|
|
470
471
|
|
|
471
472
|
|
|
472
473
|
@api.put(
|
|
@@ -489,7 +490,7 @@ async def append_messages_to_conversation_field_rid_prefix(
|
|
|
489
490
|
field = models.InputConversationField(messages=messages)
|
|
490
491
|
except pydantic.ValidationError as e:
|
|
491
492
|
raise HTTPException(status_code=422, detail=str(e))
|
|
492
|
-
return await add_field_to_resource(request, kbid, rid, field_id, field)
|
|
493
|
+
return await add_field_to_resource(request, kbid, rid, field_id, field, replace_field=False)
|
|
493
494
|
|
|
494
495
|
|
|
495
496
|
@api.delete(
|
|
@@ -545,7 +546,7 @@ async def reprocess_file_field(
|
|
|
545
546
|
rid: str,
|
|
546
547
|
field_id: FieldIdString,
|
|
547
548
|
x_nucliadb_user: Annotated[str, X_NUCLIADB_USER] = "",
|
|
548
|
-
x_file_password: Annotated[
|
|
549
|
+
x_file_password: Annotated[str | None, X_FILE_PASSWORD] = None,
|
|
549
550
|
reset_title: bool = Query(
|
|
550
551
|
default=False,
|
|
551
552
|
description="Reset the title of the resource so that the file or link computed titles are set after processing.",
|
|
@@ -17,10 +17,9 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
import asyncio
|
|
21
20
|
from functools import partial
|
|
22
21
|
|
|
23
|
-
from fastapi import HTTPException
|
|
22
|
+
from fastapi import BackgroundTasks, HTTPException
|
|
24
23
|
from fastapi_versioning import version
|
|
25
24
|
from starlette.requests import Request
|
|
26
25
|
|
|
@@ -36,10 +35,6 @@ from nucliadb.writer import logger
|
|
|
36
35
|
from nucliadb.writer.api.utils import only_for_onprem
|
|
37
36
|
from nucliadb.writer.api.v1.router import KB_PREFIX, KBS_PREFIX, api
|
|
38
37
|
from nucliadb.writer.utilities import get_processing
|
|
39
|
-
from nucliadb_models.external_index_providers import (
|
|
40
|
-
ExternalIndexProviderType,
|
|
41
|
-
PineconeServerlessCloud,
|
|
42
|
-
)
|
|
43
38
|
from nucliadb_models.resource import (
|
|
44
39
|
KnowledgeBoxConfig,
|
|
45
40
|
KnowledgeBoxObj,
|
|
@@ -118,20 +113,6 @@ async def create_kb(item: KnowledgeBoxConfig) -> tuple[str, str]:
|
|
|
118
113
|
external_index_provider = knowledgebox_pb2.CreateExternalIndexProviderMetadata(
|
|
119
114
|
type=knowledgebox_pb2.ExternalIndexProviderType.UNSET,
|
|
120
115
|
)
|
|
121
|
-
if (
|
|
122
|
-
item.external_index_provider
|
|
123
|
-
and item.external_index_provider.type == ExternalIndexProviderType.PINECONE
|
|
124
|
-
):
|
|
125
|
-
pinecone_api_key = item.external_index_provider.api_key
|
|
126
|
-
serverless_pb = to_pinecone_serverless_cloud_pb(item.external_index_provider.serverless_cloud)
|
|
127
|
-
external_index_provider = knowledgebox_pb2.CreateExternalIndexProviderMetadata(
|
|
128
|
-
type=knowledgebox_pb2.ExternalIndexProviderType.PINECONE,
|
|
129
|
-
pinecone_config=knowledgebox_pb2.CreatePineconeConfig(
|
|
130
|
-
api_key=pinecone_api_key,
|
|
131
|
-
serverless_cloud=serverless_pb,
|
|
132
|
-
),
|
|
133
|
-
)
|
|
134
|
-
|
|
135
116
|
try:
|
|
136
117
|
(kbid, slug) = await KnowledgeBox.create(
|
|
137
118
|
driver,
|
|
@@ -165,8 +146,6 @@ async def create_kb(item: KnowledgeBoxConfig) -> tuple[str, str]:
|
|
|
165
146
|
@requires(NucliaDBRoles.MANAGER)
|
|
166
147
|
@version(1)
|
|
167
148
|
async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig) -> KnowledgeBoxObjID:
|
|
168
|
-
driver = get_driver()
|
|
169
|
-
config = None
|
|
170
149
|
if (
|
|
171
150
|
item.slug
|
|
172
151
|
or item.title
|
|
@@ -174,29 +153,24 @@ async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig) -> Kn
|
|
|
174
153
|
or item.hidden_resources_enabled
|
|
175
154
|
or item.hidden_resources_hide_on_creation
|
|
176
155
|
):
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
title=item.title or "",
|
|
180
|
-
description=item.description or "",
|
|
181
|
-
hidden_resources_enabled=item.hidden_resources_enabled,
|
|
182
|
-
hidden_resources_hide_on_creation=item.hidden_resources_hide_on_creation,
|
|
183
|
-
)
|
|
184
|
-
try:
|
|
185
|
-
async with driver.rw_transaction() as txn:
|
|
156
|
+
try:
|
|
157
|
+
driver = get_driver()
|
|
186
158
|
await KnowledgeBox.update(
|
|
187
|
-
|
|
188
|
-
|
|
159
|
+
driver,
|
|
160
|
+
kbid=kbid,
|
|
189
161
|
slug=item.slug,
|
|
190
|
-
|
|
162
|
+
title=item.title,
|
|
163
|
+
description=item.description,
|
|
164
|
+
hidden_resources_enabled=item.hidden_resources_enabled,
|
|
165
|
+
hidden_resources_hide_on_creation=item.hidden_resources_hide_on_creation,
|
|
191
166
|
)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
return KnowledgeBoxObjID(uuid=kbid)
|
|
167
|
+
except datamanagers.exceptions.KnowledgeBoxNotFound:
|
|
168
|
+
raise HTTPException(status_code=404, detail="Knowledge box does not exist")
|
|
169
|
+
except Exception as exc:
|
|
170
|
+
logger.exception("Could not update KB", exc_info=exc, extra={"kbid": kbid})
|
|
171
|
+
raise HTTPException(status_code=500, detail="Error updating knowledge box")
|
|
172
|
+
|
|
173
|
+
return KnowledgeBoxObjID(uuid=kbid)
|
|
200
174
|
|
|
201
175
|
|
|
202
176
|
@only_for_onprem
|
|
@@ -209,7 +183,7 @@ async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig) -> Kn
|
|
|
209
183
|
)
|
|
210
184
|
@requires(NucliaDBRoles.MANAGER)
|
|
211
185
|
@version(1)
|
|
212
|
-
async def delete_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
|
186
|
+
async def delete_kb(request: Request, kbid: str, background: BackgroundTasks) -> KnowledgeBoxObj:
|
|
213
187
|
driver = get_driver()
|
|
214
188
|
try:
|
|
215
189
|
await KnowledgeBox.delete(driver, kbid=kbid)
|
|
@@ -233,18 +207,6 @@ async def delete_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
|
|
233
207
|
# be nice and notify processing this KB is being deleted so we waste
|
|
234
208
|
# resources
|
|
235
209
|
processing = get_processing()
|
|
236
|
-
|
|
210
|
+
background.add_task(processing.delete_from_processing, kbid=kbid)
|
|
237
211
|
|
|
238
212
|
return KnowledgeBoxObj(uuid=kbid)
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def to_pinecone_serverless_cloud_pb(
|
|
242
|
-
serverless: PineconeServerlessCloud,
|
|
243
|
-
) -> knowledgebox_pb2.PineconeServerlessCloud.ValueType:
|
|
244
|
-
return {
|
|
245
|
-
PineconeServerlessCloud.AWS_EU_WEST_1: knowledgebox_pb2.PineconeServerlessCloud.AWS_EU_WEST_1,
|
|
246
|
-
PineconeServerlessCloud.AWS_US_EAST_1: knowledgebox_pb2.PineconeServerlessCloud.AWS_US_EAST_1,
|
|
247
|
-
PineconeServerlessCloud.AWS_US_WEST_2: knowledgebox_pb2.PineconeServerlessCloud.AWS_US_WEST_2,
|
|
248
|
-
PineconeServerlessCloud.AZURE_EASTUS2: knowledgebox_pb2.PineconeServerlessCloud.AZURE_EASTUS2,
|
|
249
|
-
PineconeServerlessCloud.GCP_US_CENTRAL1: knowledgebox_pb2.PineconeServerlessCloud.GCP_US_CENTRAL1,
|
|
250
|
-
}[serverless]
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from fastapi import Request
|
|
20
|
+
from fastapi import Header, Request
|
|
21
21
|
from fastapi_versioning import version
|
|
22
22
|
from nuclia_models.config.proto import ExtractConfig, SplitConfiguration
|
|
23
23
|
|
|
@@ -55,10 +55,11 @@ async def set_configuration(
|
|
|
55
55
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
|
|
56
56
|
@version(1)
|
|
57
57
|
async def patch_configuration(
|
|
58
|
-
request: Request,
|
|
59
|
-
kbid: str,
|
|
58
|
+
request: Request, kbid: str, x_nucliadb_account: str = Header(default="", include_in_schema=False)
|
|
60
59
|
):
|
|
61
|
-
return await learning_config_proxy(
|
|
60
|
+
return await learning_config_proxy(
|
|
61
|
+
request, "PATCH", f"/config/{kbid}", headers={"account-id": x_nucliadb_account}
|
|
62
|
+
)
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
@api.post(
|