nucliadb 6.9.1.post5192__py3-none-any.whl → 6.10.0.post5705__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0023_backfill_pg_catalog.py +2 -2
- migrations/0029_backfill_field_status.py +3 -4
- migrations/0032_remove_old_relations.py +2 -3
- migrations/0038_backfill_catalog_field_labels.py +2 -2
- migrations/0039_backfill_converation_splits_metadata.py +2 -2
- migrations/0041_reindex_conversations.py +137 -0
- migrations/pg/0010_shards_index.py +34 -0
- nucliadb/search/api/v1/resource/utils.py → migrations/pg/0011_catalog_statistics.py +5 -6
- migrations/pg/0012_catalog_statistics_undo.py +26 -0
- nucliadb/backups/create.py +2 -15
- nucliadb/backups/restore.py +4 -15
- nucliadb/backups/tasks.py +4 -1
- nucliadb/common/back_pressure/cache.py +2 -3
- nucliadb/common/back_pressure/materializer.py +7 -13
- nucliadb/common/back_pressure/settings.py +6 -6
- nucliadb/common/back_pressure/utils.py +1 -0
- nucliadb/common/cache.py +9 -9
- nucliadb/common/catalog/interface.py +12 -12
- nucliadb/common/catalog/pg.py +41 -29
- nucliadb/common/catalog/utils.py +3 -3
- nucliadb/common/cluster/manager.py +5 -4
- nucliadb/common/cluster/rebalance.py +483 -114
- nucliadb/common/cluster/rollover.py +25 -9
- nucliadb/common/cluster/settings.py +3 -8
- nucliadb/common/cluster/utils.py +34 -8
- nucliadb/common/context/__init__.py +7 -8
- nucliadb/common/context/fastapi.py +1 -2
- nucliadb/common/datamanagers/__init__.py +2 -4
- nucliadb/common/datamanagers/atomic.py +4 -2
- nucliadb/common/datamanagers/cluster.py +1 -2
- nucliadb/common/datamanagers/fields.py +3 -4
- nucliadb/common/datamanagers/kb.py +6 -6
- nucliadb/common/datamanagers/labels.py +2 -3
- nucliadb/common/datamanagers/resources.py +10 -33
- nucliadb/common/datamanagers/rollover.py +5 -7
- nucliadb/common/datamanagers/search_configurations.py +1 -2
- nucliadb/common/datamanagers/synonyms.py +1 -2
- nucliadb/common/datamanagers/utils.py +4 -4
- nucliadb/common/datamanagers/vectorsets.py +4 -4
- nucliadb/common/external_index_providers/base.py +32 -5
- nucliadb/common/external_index_providers/manager.py +4 -5
- nucliadb/common/filter_expression.py +128 -40
- nucliadb/common/http_clients/processing.py +12 -23
- nucliadb/common/ids.py +6 -4
- nucliadb/common/locking.py +1 -2
- nucliadb/common/maindb/driver.py +9 -8
- nucliadb/common/maindb/local.py +5 -5
- nucliadb/common/maindb/pg.py +9 -8
- nucliadb/common/nidx.py +3 -4
- nucliadb/export_import/datamanager.py +4 -3
- nucliadb/export_import/exporter.py +11 -19
- nucliadb/export_import/importer.py +13 -6
- nucliadb/export_import/tasks.py +2 -0
- nucliadb/export_import/utils.py +6 -18
- nucliadb/health.py +2 -2
- nucliadb/ingest/app.py +8 -8
- nucliadb/ingest/consumer/consumer.py +8 -10
- nucliadb/ingest/consumer/pull.py +3 -8
- nucliadb/ingest/consumer/service.py +3 -3
- nucliadb/ingest/consumer/utils.py +1 -1
- nucliadb/ingest/fields/base.py +28 -49
- nucliadb/ingest/fields/conversation.py +12 -12
- nucliadb/ingest/fields/exceptions.py +1 -2
- nucliadb/ingest/fields/file.py +22 -8
- nucliadb/ingest/fields/link.py +7 -7
- nucliadb/ingest/fields/text.py +2 -3
- nucliadb/ingest/orm/brain_v2.py +78 -64
- nucliadb/ingest/orm/broker_message.py +2 -4
- nucliadb/ingest/orm/entities.py +10 -209
- nucliadb/ingest/orm/index_message.py +4 -4
- nucliadb/ingest/orm/knowledgebox.py +18 -27
- nucliadb/ingest/orm/processor/auditing.py +1 -3
- nucliadb/ingest/orm/processor/data_augmentation.py +1 -2
- nucliadb/ingest/orm/processor/processor.py +27 -27
- nucliadb/ingest/orm/processor/sequence_manager.py +1 -2
- nucliadb/ingest/orm/resource.py +72 -70
- nucliadb/ingest/orm/utils.py +1 -1
- nucliadb/ingest/processing.py +17 -17
- nucliadb/ingest/serialize.py +202 -145
- nucliadb/ingest/service/writer.py +3 -109
- nucliadb/ingest/settings.py +3 -4
- nucliadb/ingest/utils.py +1 -2
- nucliadb/learning_proxy.py +11 -11
- nucliadb/metrics_exporter.py +5 -4
- nucliadb/middleware/__init__.py +82 -1
- nucliadb/migrator/datamanager.py +3 -4
- nucliadb/migrator/migrator.py +1 -2
- nucliadb/migrator/models.py +1 -2
- nucliadb/migrator/settings.py +1 -2
- nucliadb/models/internal/augment.py +614 -0
- nucliadb/models/internal/processing.py +19 -19
- nucliadb/openapi.py +2 -2
- nucliadb/purge/__init__.py +3 -8
- nucliadb/purge/orphan_shards.py +1 -2
- nucliadb/reader/__init__.py +5 -0
- nucliadb/reader/api/models.py +6 -13
- nucliadb/reader/api/v1/download.py +59 -38
- nucliadb/reader/api/v1/export_import.py +4 -4
- nucliadb/reader/api/v1/learning_config.py +24 -4
- nucliadb/reader/api/v1/resource.py +61 -9
- nucliadb/reader/api/v1/services.py +18 -14
- nucliadb/reader/app.py +3 -1
- nucliadb/reader/reader/notifications.py +1 -2
- nucliadb/search/api/v1/__init__.py +2 -0
- nucliadb/search/api/v1/ask.py +3 -4
- nucliadb/search/api/v1/augment.py +585 -0
- nucliadb/search/api/v1/catalog.py +11 -15
- nucliadb/search/api/v1/find.py +16 -22
- nucliadb/search/api/v1/hydrate.py +25 -25
- nucliadb/search/api/v1/knowledgebox.py +1 -2
- nucliadb/search/api/v1/predict_proxy.py +1 -2
- nucliadb/search/api/v1/resource/ask.py +7 -7
- nucliadb/search/api/v1/resource/ingestion_agents.py +5 -6
- nucliadb/search/api/v1/resource/search.py +9 -11
- nucliadb/search/api/v1/retrieve.py +130 -0
- nucliadb/search/api/v1/search.py +28 -32
- nucliadb/search/api/v1/suggest.py +11 -14
- nucliadb/search/api/v1/summarize.py +1 -2
- nucliadb/search/api/v1/utils.py +2 -2
- nucliadb/search/app.py +3 -2
- nucliadb/search/augmentor/__init__.py +21 -0
- nucliadb/search/augmentor/augmentor.py +232 -0
- nucliadb/search/augmentor/fields.py +704 -0
- nucliadb/search/augmentor/metrics.py +24 -0
- nucliadb/search/augmentor/paragraphs.py +334 -0
- nucliadb/search/augmentor/resources.py +238 -0
- nucliadb/search/augmentor/utils.py +33 -0
- nucliadb/search/lifecycle.py +3 -1
- nucliadb/search/predict.py +24 -17
- nucliadb/search/predict_models.py +8 -9
- nucliadb/search/requesters/utils.py +11 -10
- nucliadb/search/search/cache.py +19 -23
- nucliadb/search/search/chat/ask.py +88 -59
- nucliadb/search/search/chat/exceptions.py +3 -5
- nucliadb/search/search/chat/fetcher.py +201 -0
- nucliadb/search/search/chat/images.py +6 -4
- nucliadb/search/search/chat/old_prompt.py +1375 -0
- nucliadb/search/search/chat/parser.py +510 -0
- nucliadb/search/search/chat/prompt.py +563 -615
- nucliadb/search/search/chat/query.py +449 -36
- nucliadb/search/search/chat/rpc.py +85 -0
- nucliadb/search/search/fetch.py +3 -4
- nucliadb/search/search/filters.py +8 -11
- nucliadb/search/search/find.py +33 -31
- nucliadb/search/search/find_merge.py +124 -331
- nucliadb/search/search/graph_strategy.py +14 -12
- nucliadb/search/search/hydrator/__init__.py +3 -152
- nucliadb/search/search/hydrator/fields.py +92 -50
- nucliadb/search/search/hydrator/images.py +7 -7
- nucliadb/search/search/hydrator/paragraphs.py +42 -26
- nucliadb/search/search/hydrator/resources.py +20 -16
- nucliadb/search/search/ingestion_agents.py +5 -5
- nucliadb/search/search/merge.py +90 -94
- nucliadb/search/search/metrics.py +10 -9
- nucliadb/search/search/paragraphs.py +7 -9
- nucliadb/search/search/predict_proxy.py +13 -9
- nucliadb/search/search/query.py +14 -86
- nucliadb/search/search/query_parser/fetcher.py +51 -82
- nucliadb/search/search/query_parser/models.py +19 -20
- nucliadb/search/search/query_parser/old_filters.py +20 -19
- nucliadb/search/search/query_parser/parsers/ask.py +4 -5
- nucliadb/search/search/query_parser/parsers/catalog.py +5 -6
- nucliadb/search/search/query_parser/parsers/common.py +5 -6
- nucliadb/search/search/query_parser/parsers/find.py +6 -26
- nucliadb/search/search/query_parser/parsers/graph.py +13 -23
- nucliadb/search/search/query_parser/parsers/retrieve.py +207 -0
- nucliadb/search/search/query_parser/parsers/search.py +15 -53
- nucliadb/search/search/query_parser/parsers/unit_retrieval.py +8 -29
- nucliadb/search/search/rank_fusion.py +18 -13
- nucliadb/search/search/rerankers.py +5 -6
- nucliadb/search/search/retrieval.py +300 -0
- nucliadb/search/search/summarize.py +5 -6
- nucliadb/search/search/utils.py +3 -4
- nucliadb/search/settings.py +1 -2
- nucliadb/standalone/api_router.py +1 -1
- nucliadb/standalone/app.py +4 -3
- nucliadb/standalone/auth.py +5 -6
- nucliadb/standalone/lifecycle.py +2 -2
- nucliadb/standalone/run.py +2 -4
- nucliadb/standalone/settings.py +5 -6
- nucliadb/standalone/versions.py +3 -4
- nucliadb/tasks/consumer.py +13 -8
- nucliadb/tasks/models.py +2 -1
- nucliadb/tasks/producer.py +3 -3
- nucliadb/tasks/retries.py +8 -7
- nucliadb/train/api/utils.py +1 -3
- nucliadb/train/api/v1/shards.py +1 -2
- nucliadb/train/api/v1/trainset.py +1 -2
- nucliadb/train/app.py +1 -1
- nucliadb/train/generator.py +4 -4
- nucliadb/train/generators/field_classifier.py +2 -2
- nucliadb/train/generators/field_streaming.py +6 -6
- nucliadb/train/generators/image_classifier.py +2 -2
- nucliadb/train/generators/paragraph_classifier.py +2 -2
- nucliadb/train/generators/paragraph_streaming.py +2 -2
- nucliadb/train/generators/question_answer_streaming.py +2 -2
- nucliadb/train/generators/sentence_classifier.py +2 -2
- nucliadb/train/generators/token_classifier.py +3 -2
- nucliadb/train/generators/utils.py +6 -5
- nucliadb/train/nodes.py +3 -3
- nucliadb/train/resource.py +6 -8
- nucliadb/train/settings.py +3 -4
- nucliadb/train/types.py +11 -11
- nucliadb/train/upload.py +3 -2
- nucliadb/train/uploader.py +1 -2
- nucliadb/train/utils.py +1 -2
- nucliadb/writer/api/v1/export_import.py +4 -1
- nucliadb/writer/api/v1/field.py +7 -11
- nucliadb/writer/api/v1/knowledgebox.py +3 -4
- nucliadb/writer/api/v1/resource.py +9 -20
- nucliadb/writer/api/v1/services.py +10 -132
- nucliadb/writer/api/v1/upload.py +73 -72
- nucliadb/writer/app.py +8 -2
- nucliadb/writer/resource/basic.py +12 -15
- nucliadb/writer/resource/field.py +7 -5
- nucliadb/writer/resource/origin.py +7 -0
- nucliadb/writer/settings.py +2 -3
- nucliadb/writer/tus/__init__.py +2 -3
- nucliadb/writer/tus/azure.py +1 -3
- nucliadb/writer/tus/dm.py +3 -3
- nucliadb/writer/tus/exceptions.py +3 -4
- nucliadb/writer/tus/gcs.py +5 -6
- nucliadb/writer/tus/s3.py +2 -3
- nucliadb/writer/tus/storage.py +3 -3
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/METADATA +9 -10
- nucliadb-6.10.0.post5705.dist-info/RECORD +410 -0
- nucliadb/common/datamanagers/entities.py +0 -139
- nucliadb-6.9.1.post5192.dist-info/RECORD +0 -392
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/WHEEL +0 -0
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.9.1.post5192.dist-info → nucliadb-6.10.0.post5705.dist-info}/top_level.txt +0 -0
nucliadb/train/nodes.py
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from
|
|
20
|
+
from collections.abc import AsyncIterator
|
|
21
21
|
|
|
22
22
|
from nucliadb.common import datamanagers
|
|
23
23
|
from nucliadb.common.cluster import manager
|
|
@@ -62,7 +62,7 @@ class TrainShardManager(manager.KBShardManager):
|
|
|
62
62
|
|
|
63
63
|
return shard_object.nidx_shard_id
|
|
64
64
|
|
|
65
|
-
async def get_kb_obj(self, txn: Transaction, kbid: str) ->
|
|
65
|
+
async def get_kb_obj(self, txn: Transaction, kbid: str) -> KnowledgeBox | None:
|
|
66
66
|
if kbid is None:
|
|
67
67
|
return None
|
|
68
68
|
|
|
@@ -72,7 +72,7 @@ class TrainShardManager(manager.KBShardManager):
|
|
|
72
72
|
kbobj = KnowledgeBox(txn, self.storage, kbid)
|
|
73
73
|
return kbobj
|
|
74
74
|
|
|
75
|
-
async def get_kb_entities_manager(self, txn: Transaction, kbid: str) ->
|
|
75
|
+
async def get_kb_entities_manager(self, txn: Transaction, kbid: str) -> EntitiesManager | None:
|
|
76
76
|
kbobj = await self.get_kb_obj(txn, kbid)
|
|
77
77
|
if kbobj is None:
|
|
78
78
|
return None
|
nucliadb/train/resource.py
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
#
|
|
20
20
|
from __future__ import annotations
|
|
21
21
|
|
|
22
|
-
from
|
|
22
|
+
from collections.abc import AsyncIterator, MutableMapping
|
|
23
23
|
|
|
24
24
|
from nucliadb.common import datamanagers
|
|
25
25
|
from nucliadb.ingest.orm.resource import Resource
|
|
@@ -69,9 +69,7 @@ async def iterate_sentences(
|
|
|
69
69
|
# return any
|
|
70
70
|
vectorset_id = None
|
|
71
71
|
async with datamanagers.with_ro_transaction() as txn:
|
|
72
|
-
async for vectorset_id, vs in datamanagers.vectorsets.iter(
|
|
73
|
-
txn=txn, kbid=resource.kb.kbid
|
|
74
|
-
):
|
|
72
|
+
async for vectorset_id, vs in datamanagers.vectorsets.iter(txn=txn, kbid=resource.kbid):
|
|
75
73
|
break
|
|
76
74
|
assert vectorset_id is not None, "All KBs must have at least a vectorset"
|
|
77
75
|
vo = await field.get_vectors(vectorset_id, vs.storage_key_kind)
|
|
@@ -81,7 +79,7 @@ async def iterate_sentences(
|
|
|
81
79
|
if fm is None:
|
|
82
80
|
continue
|
|
83
81
|
|
|
84
|
-
field_metadatas: list[tuple[
|
|
82
|
+
field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
|
|
85
83
|
for subfield_metadata, splitted_metadata in fm.split_metadata.items():
|
|
86
84
|
field_metadatas.append((subfield_metadata, splitted_metadata))
|
|
87
85
|
|
|
@@ -188,7 +186,7 @@ async def iterate_paragraphs(
|
|
|
188
186
|
if fm is None:
|
|
189
187
|
continue
|
|
190
188
|
|
|
191
|
-
field_metadatas: list[tuple[
|
|
189
|
+
field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
|
|
192
190
|
for subfield_metadata, splitted_metadata in fm.split_metadata.items():
|
|
193
191
|
field_metadatas.append((subfield_metadata, splitted_metadata))
|
|
194
192
|
|
|
@@ -264,7 +262,7 @@ async def iterate_fields(
|
|
|
264
262
|
if fm is None:
|
|
265
263
|
continue
|
|
266
264
|
|
|
267
|
-
field_metadatas: list[tuple[
|
|
265
|
+
field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
|
|
268
266
|
for subfield_metadata, splitted_metadata in fm.split_metadata.items():
|
|
269
267
|
field_metadatas.append((subfield_metadata, splitted_metadata))
|
|
270
268
|
|
|
@@ -319,7 +317,7 @@ async def generate_train_resource(
|
|
|
319
317
|
if fm is None:
|
|
320
318
|
continue
|
|
321
319
|
|
|
322
|
-
field_metadatas: list[tuple[
|
|
320
|
+
field_metadatas: list[tuple[str | None, FieldMetadata]] = [(None, fm.metadata)]
|
|
323
321
|
for subfield_metadata, splitted_metadata in fm.split_metadata.items():
|
|
324
322
|
field_metadatas.append((subfield_metadata, splitted_metadata))
|
|
325
323
|
|
nucliadb/train/settings.py
CHANGED
|
@@ -17,17 +17,16 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
from nucliadb.ingest.settings import DriverSettings
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
class Settings(DriverSettings):
|
|
26
25
|
grpc_port: int = 8031
|
|
27
|
-
train_grpc_address:
|
|
26
|
+
train_grpc_address: str | None = None
|
|
28
27
|
|
|
29
|
-
nuclia_learning_url:
|
|
30
|
-
nuclia_learning_apikey:
|
|
28
|
+
nuclia_learning_url: str | None = "https://nuclia.cloud/api/v1/learning/"
|
|
29
|
+
nuclia_learning_apikey: str | None = None
|
|
31
30
|
|
|
32
31
|
internal_counter_api: str = "http://search.nuclia.svc.cluster.local:8030/api/v1/kb/{kbid}/counters"
|
|
33
32
|
|
nucliadb/train/types.py
CHANGED
|
@@ -17,20 +17,20 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import TypeVar
|
|
20
|
+
from typing import TypeVar
|
|
21
21
|
|
|
22
22
|
from nucliadb_protos import dataset_pb2 as dpb
|
|
23
23
|
|
|
24
|
-
TrainBatch =
|
|
25
|
-
dpb.FieldClassificationBatch
|
|
26
|
-
dpb.ImageClassificationBatch
|
|
27
|
-
dpb.ParagraphClassificationBatch
|
|
28
|
-
dpb.ParagraphStreamingBatch
|
|
29
|
-
dpb.QuestionAnswerStreamingBatch
|
|
30
|
-
dpb.SentenceClassificationBatch
|
|
31
|
-
dpb.TokenClassificationBatch
|
|
32
|
-
dpb.FieldStreamingBatch
|
|
33
|
-
|
|
24
|
+
TrainBatch = (
|
|
25
|
+
dpb.FieldClassificationBatch
|
|
26
|
+
| dpb.ImageClassificationBatch
|
|
27
|
+
| dpb.ParagraphClassificationBatch
|
|
28
|
+
| dpb.ParagraphStreamingBatch
|
|
29
|
+
| dpb.QuestionAnswerStreamingBatch
|
|
30
|
+
| dpb.SentenceClassificationBatch
|
|
31
|
+
| dpb.TokenClassificationBatch
|
|
32
|
+
| dpb.FieldStreamingBatch
|
|
33
|
+
)
|
|
34
34
|
|
|
35
35
|
T = TypeVar(
|
|
36
36
|
"T",
|
nucliadb/train/upload.py
CHANGED
|
@@ -20,8 +20,9 @@
|
|
|
20
20
|
import argparse
|
|
21
21
|
import asyncio
|
|
22
22
|
import importlib.metadata
|
|
23
|
+
import inspect
|
|
23
24
|
from asyncio import tasks
|
|
24
|
-
from
|
|
25
|
+
from collections.abc import Callable
|
|
25
26
|
|
|
26
27
|
from nucliadb.train.uploader import start_upload
|
|
27
28
|
from nucliadb_telemetry import errors
|
|
@@ -89,7 +90,7 @@ def run() -> None:
|
|
|
89
90
|
finally:
|
|
90
91
|
try:
|
|
91
92
|
for finalizer in finalizers:
|
|
92
|
-
if
|
|
93
|
+
if inspect.iscoroutinefunction(finalizer):
|
|
93
94
|
loop.run_until_complete(finalizer())
|
|
94
95
|
else:
|
|
95
96
|
finalizer()
|
nucliadb/train/uploader.py
CHANGED
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
import aiohttp
|
|
23
22
|
|
|
@@ -97,7 +96,7 @@ class UploadServicer:
|
|
|
97
96
|
response.status = GetLabelsResponse.Status.NOTFOUND
|
|
98
97
|
return response
|
|
99
98
|
response.kb.uuid = kbid
|
|
100
|
-
labels:
|
|
99
|
+
labels: Labels | None = await datamanagers.atomic.labelset.get_all(kbid=kbid)
|
|
101
100
|
if labels is not None:
|
|
102
101
|
response.labels.CopyFrom(labels)
|
|
103
102
|
return response
|
nucliadb/train/utils.py
CHANGED
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
from grpc import aio
|
|
23
22
|
from grpc_health.v1 import health, health_pb2_grpc
|
|
@@ -37,7 +36,7 @@ from nucliadb_utils.utilities import (
|
|
|
37
36
|
)
|
|
38
37
|
|
|
39
38
|
|
|
40
|
-
async def start_train_grpc(service_name:
|
|
39
|
+
async def start_train_grpc(service_name: str | None = None):
|
|
41
40
|
actual_service = get_utility(Utility.TRAIN)
|
|
42
41
|
if actual_service is not None:
|
|
43
42
|
return
|
|
@@ -17,8 +17,8 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
+
from collections.abc import AsyncGenerator
|
|
20
21
|
from datetime import datetime
|
|
21
|
-
from typing import AsyncGenerator
|
|
22
22
|
from uuid import uuid4
|
|
23
23
|
|
|
24
24
|
from fastapi_versioning import version
|
|
@@ -66,6 +66,7 @@ from nucliadb_utils.authentication import requires_one
|
|
|
66
66
|
summary="Start an export of a Knowledge Box",
|
|
67
67
|
tags=["Knowledge Boxes"],
|
|
68
68
|
response_model=CreateExportResponse,
|
|
69
|
+
include_in_schema=False,
|
|
69
70
|
)
|
|
70
71
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
|
|
71
72
|
@version(1)
|
|
@@ -91,6 +92,7 @@ async def start_kb_export_endpoint(request: Request, kbid: str):
|
|
|
91
92
|
tags=["Knowledge Boxes"],
|
|
92
93
|
response_model=NewImportedKbResponse,
|
|
93
94
|
openapi_extra={"x-hidden-operation": True},
|
|
95
|
+
include_in_schema=False,
|
|
94
96
|
)
|
|
95
97
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
|
|
96
98
|
@version(1)
|
|
@@ -140,6 +142,7 @@ async def kb_create_and_import_endpoint(request: Request):
|
|
|
140
142
|
summary="Start an import to a Knowledge Box",
|
|
141
143
|
tags=["Knowledge Boxes"],
|
|
142
144
|
response_model=CreateImportResponse,
|
|
145
|
+
include_in_schema=False,
|
|
143
146
|
)
|
|
144
147
|
@requires_one([NucliaDBRoles.MANAGER, NucliaDBRoles.WRITER])
|
|
145
148
|
@version(1)
|
nucliadb/writer/api/v1/field.py
CHANGED
|
@@ -17,8 +17,9 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
+
from collections.abc import Callable
|
|
20
21
|
from inspect import iscoroutinefunction
|
|
21
|
-
from typing import TYPE_CHECKING, Annotated
|
|
22
|
+
from typing import TYPE_CHECKING, Annotated
|
|
22
23
|
|
|
23
24
|
import pydantic
|
|
24
25
|
from fastapi import HTTPException, Query, Response
|
|
@@ -72,12 +73,7 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
72
73
|
else:
|
|
73
74
|
FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP: dict[models.FieldTypeName, int]
|
|
74
75
|
|
|
75
|
-
FieldModelType =
|
|
76
|
-
models.TextField,
|
|
77
|
-
models.LinkField,
|
|
78
|
-
models.InputConversationField,
|
|
79
|
-
models.FileField,
|
|
80
|
-
]
|
|
76
|
+
FieldModelType = models.TextField | models.LinkField | models.InputConversationField | models.FileField
|
|
81
77
|
|
|
82
78
|
FIELD_TYPE_NAME_TO_FIELD_TYPE_MAP = {
|
|
83
79
|
models.FieldTypeName.FILE: resources_pb2.FieldType.FILE,
|
|
@@ -278,7 +274,7 @@ async def parse_file_field_adapter(
|
|
|
278
274
|
)
|
|
279
275
|
|
|
280
276
|
|
|
281
|
-
FIELD_PARSERS_MAP: dict[
|
|
277
|
+
FIELD_PARSERS_MAP: dict[type, Callable] = {
|
|
282
278
|
models.TextField: parse_text_field_adapter,
|
|
283
279
|
models.LinkField: parse_link_field_adapter,
|
|
284
280
|
models.InputConversationField: parse_conversation_field_adapter,
|
|
@@ -463,7 +459,7 @@ async def append_messages_to_conversation_field_rslug_prefix(
|
|
|
463
459
|
kbid: str,
|
|
464
460
|
rslug: str,
|
|
465
461
|
field_id: FieldIdString,
|
|
466
|
-
messages:
|
|
462
|
+
messages: list[models.InputMessage],
|
|
467
463
|
) -> ResourceFieldAdded:
|
|
468
464
|
try:
|
|
469
465
|
field = models.InputConversationField(messages=messages)
|
|
@@ -488,7 +484,7 @@ async def append_messages_to_conversation_field_rid_prefix(
|
|
|
488
484
|
kbid: str,
|
|
489
485
|
rid: str,
|
|
490
486
|
field_id: FieldIdString,
|
|
491
|
-
messages:
|
|
487
|
+
messages: list[models.InputMessage],
|
|
492
488
|
) -> ResourceFieldAdded:
|
|
493
489
|
try:
|
|
494
490
|
field = models.InputConversationField(messages=messages)
|
|
@@ -550,7 +546,7 @@ async def reprocess_file_field(
|
|
|
550
546
|
rid: str,
|
|
551
547
|
field_id: FieldIdString,
|
|
552
548
|
x_nucliadb_user: Annotated[str, X_NUCLIADB_USER] = "",
|
|
553
|
-
x_file_password: Annotated[
|
|
549
|
+
x_file_password: Annotated[str | None, X_FILE_PASSWORD] = None,
|
|
554
550
|
reset_title: bool = Query(
|
|
555
551
|
default=False,
|
|
556
552
|
description="Reset the title of the resource so that the file or link computed titles are set after processing.",
|
|
@@ -17,10 +17,9 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
import asyncio
|
|
21
20
|
from functools import partial
|
|
22
21
|
|
|
23
|
-
from fastapi import HTTPException
|
|
22
|
+
from fastapi import BackgroundTasks, HTTPException
|
|
24
23
|
from fastapi_versioning import version
|
|
25
24
|
from starlette.requests import Request
|
|
26
25
|
|
|
@@ -184,7 +183,7 @@ async def update_kb(request: Request, kbid: str, item: KnowledgeBoxConfig) -> Kn
|
|
|
184
183
|
)
|
|
185
184
|
@requires(NucliaDBRoles.MANAGER)
|
|
186
185
|
@version(1)
|
|
187
|
-
async def delete_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
|
186
|
+
async def delete_kb(request: Request, kbid: str, background: BackgroundTasks) -> KnowledgeBoxObj:
|
|
188
187
|
driver = get_driver()
|
|
189
188
|
try:
|
|
190
189
|
await KnowledgeBox.delete(driver, kbid=kbid)
|
|
@@ -208,6 +207,6 @@ async def delete_kb(request: Request, kbid: str) -> KnowledgeBoxObj:
|
|
|
208
207
|
# be nice and notify processing this KB is being deleted so we waste
|
|
209
208
|
# resources
|
|
210
209
|
processing = get_processing()
|
|
211
|
-
|
|
210
|
+
background.add_task(processing.delete_from_processing, kbid=kbid)
|
|
212
211
|
|
|
213
212
|
return KnowledgeBoxObj(uuid=kbid)
|
|
@@ -17,13 +17,12 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
import asyncio
|
|
21
20
|
import contextlib
|
|
22
21
|
from time import time
|
|
23
|
-
from typing import Annotated
|
|
22
|
+
from typing import Annotated
|
|
24
23
|
from uuid import uuid4
|
|
25
24
|
|
|
26
|
-
from fastapi import HTTPException, Query, Response
|
|
25
|
+
from fastapi import BackgroundTasks, HTTPException, Query, Response
|
|
27
26
|
from fastapi_versioning import version
|
|
28
27
|
from starlette.requests import Request
|
|
29
28
|
|
|
@@ -498,12 +497,10 @@ async def _reprocess_resource(
|
|
|
498
497
|
@requires(NucliaDBRoles.WRITER)
|
|
499
498
|
@version(1)
|
|
500
499
|
async def delete_resource_rslug_prefix(
|
|
501
|
-
request: Request,
|
|
502
|
-
kbid: str,
|
|
503
|
-
rslug: str,
|
|
500
|
+
request: Request, kbid: str, rslug: str, background: BackgroundTasks
|
|
504
501
|
):
|
|
505
502
|
rid = await get_rid_from_slug_or_raise_error(kbid, rslug)
|
|
506
|
-
return await _delete_resource(request, kbid, rid)
|
|
503
|
+
return await _delete_resource(request, kbid, rid, background)
|
|
507
504
|
|
|
508
505
|
|
|
509
506
|
@api.delete(
|
|
@@ -514,19 +511,11 @@ async def delete_resource_rslug_prefix(
|
|
|
514
511
|
)
|
|
515
512
|
@requires(NucliaDBRoles.WRITER)
|
|
516
513
|
@version(1)
|
|
517
|
-
async def delete_resource_rid_prefix(
|
|
518
|
-
request
|
|
519
|
-
kbid: str,
|
|
520
|
-
rid: str,
|
|
521
|
-
):
|
|
522
|
-
return await _delete_resource(request, kbid, rid)
|
|
514
|
+
async def delete_resource_rid_prefix(request: Request, kbid: str, rid: str, background: BackgroundTasks):
|
|
515
|
+
return await _delete_resource(request, kbid, rid, background)
|
|
523
516
|
|
|
524
517
|
|
|
525
|
-
async def _delete_resource(
|
|
526
|
-
request: Request,
|
|
527
|
-
kbid: str,
|
|
528
|
-
rid: str,
|
|
529
|
-
):
|
|
518
|
+
async def _delete_resource(request: Request, kbid: str, rid: str, background: BackgroundTasks):
|
|
530
519
|
await validate_rid_exists_or_raise_error(kbid, rid)
|
|
531
520
|
|
|
532
521
|
partitioning = get_partitioning()
|
|
@@ -541,7 +530,7 @@ async def _delete_resource(
|
|
|
541
530
|
parse_audit(writer.audit, request)
|
|
542
531
|
await transaction.commit(writer, partition)
|
|
543
532
|
processing = get_processing()
|
|
544
|
-
|
|
533
|
+
background.add_task(processing.delete_from_processing, kbid=kbid, resource_id=rid)
|
|
545
534
|
|
|
546
535
|
return Response(status_code=204)
|
|
547
536
|
|
|
@@ -637,7 +626,7 @@ def needs_resource_reindex(item: UpdateResourcePayload) -> bool:
|
|
|
637
626
|
)
|
|
638
627
|
|
|
639
628
|
|
|
640
|
-
async def maybe_send_to_process(toprocess: PushPayload, partition) ->
|
|
629
|
+
async def maybe_send_to_process(toprocess: PushPayload, partition) -> int | None:
|
|
641
630
|
if not needs_reprocess(toprocess):
|
|
642
631
|
return None
|
|
643
632
|
|
|
@@ -17,152 +17,22 @@
|
|
|
17
17
|
# You should have received a copy of the GNU Affero General Public License
|
|
18
18
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
19
19
|
#
|
|
20
|
-
from fastapi import HTTPException, Response
|
|
20
|
+
from fastapi import Body, HTTPException, Path, Response
|
|
21
21
|
from fastapi_versioning import version
|
|
22
22
|
from starlette.requests import Request
|
|
23
23
|
|
|
24
24
|
from nucliadb.common import datamanagers
|
|
25
25
|
from nucliadb.common.datamanagers.exceptions import KnowledgeBoxNotFound
|
|
26
26
|
from nucliadb.common.models_utils import to_proto
|
|
27
|
-
from nucliadb.models.responses import (
|
|
28
|
-
HTTPConflict,
|
|
29
|
-
HTTPInternalServerError,
|
|
30
|
-
HTTPNotFound,
|
|
31
|
-
)
|
|
32
27
|
from nucliadb.writer.api.v1.router import KB_PREFIX, api
|
|
33
28
|
from nucliadb_models.configuration import SearchConfiguration
|
|
34
|
-
from nucliadb_models.entities import (
|
|
35
|
-
CreateEntitiesGroupPayload,
|
|
36
|
-
UpdateEntitiesGroupPayload,
|
|
37
|
-
)
|
|
38
29
|
from nucliadb_models.labels import LabelSet
|
|
39
30
|
from nucliadb_models.resource import NucliaDBRoles
|
|
40
31
|
from nucliadb_models.synonyms import KnowledgeBoxSynonyms
|
|
41
32
|
from nucliadb_protos import writer_pb2
|
|
42
33
|
from nucliadb_protos.knowledgebox_pb2 import Label as LabelPB
|
|
43
34
|
from nucliadb_protos.knowledgebox_pb2 import LabelSet as LabelSetPB
|
|
44
|
-
from nucliadb_protos.writer_pb2 import (
|
|
45
|
-
DelEntitiesRequest,
|
|
46
|
-
NewEntitiesGroupRequest,
|
|
47
|
-
NewEntitiesGroupResponse,
|
|
48
|
-
OpStatusWriter,
|
|
49
|
-
UpdateEntitiesGroupRequest,
|
|
50
|
-
UpdateEntitiesGroupResponse,
|
|
51
|
-
)
|
|
52
35
|
from nucliadb_utils.authentication import requires
|
|
53
|
-
from nucliadb_utils.utilities import get_ingest
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
@api.post(
|
|
57
|
-
f"/{KB_PREFIX}/{{kbid}}/entitiesgroups",
|
|
58
|
-
status_code=200,
|
|
59
|
-
summary="Create Knowledge Box Entities Group",
|
|
60
|
-
tags=["Knowledge Box Services"],
|
|
61
|
-
openapi_extra={"x-operation_order": 1},
|
|
62
|
-
)
|
|
63
|
-
@requires(NucliaDBRoles.WRITER)
|
|
64
|
-
@version(1)
|
|
65
|
-
async def create_entities_group(request: Request, kbid: str, item: CreateEntitiesGroupPayload):
|
|
66
|
-
ingest = get_ingest()
|
|
67
|
-
|
|
68
|
-
pbrequest: NewEntitiesGroupRequest = NewEntitiesGroupRequest()
|
|
69
|
-
pbrequest.kb.uuid = kbid
|
|
70
|
-
pbrequest.group = item.group
|
|
71
|
-
pbrequest.entities.custom = True
|
|
72
|
-
if item.title:
|
|
73
|
-
pbrequest.entities.title = item.title
|
|
74
|
-
if item.color:
|
|
75
|
-
pbrequest.entities.color = item.color
|
|
76
|
-
|
|
77
|
-
for key, entity in item.entities.items():
|
|
78
|
-
entitypb = pbrequest.entities.entities[key]
|
|
79
|
-
entitypb.value = entity.value
|
|
80
|
-
entitypb.merged = entity.merged
|
|
81
|
-
entitypb.deleted = False
|
|
82
|
-
entitypb.represents.extend(entity.represents)
|
|
83
|
-
|
|
84
|
-
status: NewEntitiesGroupResponse = await ingest.NewEntitiesGroup(pbrequest) # type: ignore
|
|
85
|
-
if status.status == NewEntitiesGroupResponse.Status.OK:
|
|
86
|
-
return
|
|
87
|
-
elif status.status == NewEntitiesGroupResponse.Status.KB_NOT_FOUND:
|
|
88
|
-
return HTTPNotFound(detail="Knowledge Box does not exist")
|
|
89
|
-
elif status.status == NewEntitiesGroupResponse.Status.ALREADY_EXISTS:
|
|
90
|
-
return HTTPConflict(
|
|
91
|
-
detail=f"Entities group {item.group} already exists in this Knowledge box",
|
|
92
|
-
)
|
|
93
|
-
elif status.status == NewEntitiesGroupResponse.Status.ERROR:
|
|
94
|
-
return HTTPInternalServerError(detail="Error on settings entities on a Knowledge box")
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
@api.patch(
|
|
98
|
-
f"/{KB_PREFIX}/{{kbid}}/entitiesgroup/{{group}}",
|
|
99
|
-
status_code=200,
|
|
100
|
-
summary="Update Knowledge Box Entities Group",
|
|
101
|
-
tags=["Knowledge Box Services"],
|
|
102
|
-
openapi_extra={"x-operation_order": 2},
|
|
103
|
-
)
|
|
104
|
-
@requires(NucliaDBRoles.WRITER)
|
|
105
|
-
@version(1)
|
|
106
|
-
async def update_entities_group(
|
|
107
|
-
request: Request, kbid: str, group: str, item: UpdateEntitiesGroupPayload
|
|
108
|
-
):
|
|
109
|
-
ingest = get_ingest()
|
|
110
|
-
|
|
111
|
-
pbrequest: UpdateEntitiesGroupRequest = UpdateEntitiesGroupRequest()
|
|
112
|
-
pbrequest.kb.uuid = kbid
|
|
113
|
-
pbrequest.group = group
|
|
114
|
-
pbrequest.title = item.title or ""
|
|
115
|
-
pbrequest.color = item.color or ""
|
|
116
|
-
|
|
117
|
-
for name, entity in item.add.items():
|
|
118
|
-
entitypb = pbrequest.add[name]
|
|
119
|
-
entitypb.value = entity.value
|
|
120
|
-
entitypb.merged = entity.merged
|
|
121
|
-
entitypb.represents.extend(entity.represents)
|
|
122
|
-
|
|
123
|
-
for name, entity in item.update.items():
|
|
124
|
-
entitypb = pbrequest.update[name]
|
|
125
|
-
entitypb.value = entity.value
|
|
126
|
-
entitypb.merged = entity.merged
|
|
127
|
-
entitypb.represents.extend(entity.represents)
|
|
128
|
-
|
|
129
|
-
pbrequest.delete.extend(item.delete)
|
|
130
|
-
|
|
131
|
-
status: UpdateEntitiesGroupResponse = await ingest.UpdateEntitiesGroup(pbrequest) # type: ignore
|
|
132
|
-
if status.status == UpdateEntitiesGroupResponse.Status.OK:
|
|
133
|
-
return
|
|
134
|
-
elif status.status == UpdateEntitiesGroupResponse.Status.KB_NOT_FOUND:
|
|
135
|
-
return HTTPNotFound(detail="Knowledge Box does not exist")
|
|
136
|
-
elif status.status == UpdateEntitiesGroupResponse.Status.ENTITIES_GROUP_NOT_FOUND:
|
|
137
|
-
return HTTPNotFound(detail="Entities group does not exist")
|
|
138
|
-
elif status.status == UpdateEntitiesGroupResponse.Status.ERROR:
|
|
139
|
-
return HTTPInternalServerError(detail="Error on settings entities on a Knowledge box")
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
@api.delete(
|
|
143
|
-
f"/{KB_PREFIX}/{{kbid}}/entitiesgroup/{{group}}",
|
|
144
|
-
status_code=200,
|
|
145
|
-
summary="Delete Knowledge Box Entities",
|
|
146
|
-
tags=["Knowledge Box Services"],
|
|
147
|
-
openapi_extra={"x-operation_order": 3},
|
|
148
|
-
)
|
|
149
|
-
@requires(NucliaDBRoles.WRITER)
|
|
150
|
-
@version(1)
|
|
151
|
-
async def delete_entities(request: Request, kbid: str, group: str):
|
|
152
|
-
ingest = get_ingest()
|
|
153
|
-
pbrequest: DelEntitiesRequest = DelEntitiesRequest()
|
|
154
|
-
pbrequest.kb.uuid = kbid
|
|
155
|
-
pbrequest.group = group
|
|
156
|
-
|
|
157
|
-
status: OpStatusWriter = await ingest.DelEntities(pbrequest) # type: ignore
|
|
158
|
-
if status.status == OpStatusWriter.Status.OK:
|
|
159
|
-
return None
|
|
160
|
-
elif status.status == OpStatusWriter.Status.NOTFOUND:
|
|
161
|
-
raise HTTPException(status_code=404, detail="Knowledge Box does not exist")
|
|
162
|
-
elif status.status == OpStatusWriter.Status.ERROR:
|
|
163
|
-
raise HTTPException(status_code=500, detail="Error on deleting entities from a Knowledge box")
|
|
164
|
-
|
|
165
|
-
return Response(status_code=204)
|
|
166
36
|
|
|
167
37
|
|
|
168
38
|
@api.post(
|
|
@@ -174,7 +44,15 @@ async def delete_entities(request: Request, kbid: str, group: str):
|
|
|
174
44
|
)
|
|
175
45
|
@requires(NucliaDBRoles.WRITER)
|
|
176
46
|
@version(1)
|
|
177
|
-
async def set_labelset_endpoint(
|
|
47
|
+
async def set_labelset_endpoint(
|
|
48
|
+
request: Request,
|
|
49
|
+
kbid: str,
|
|
50
|
+
labelset: str = Path(
|
|
51
|
+
title="The ID of the labelset to create or update. This is a unique identifier that should be used at search time.",
|
|
52
|
+
examples=["categories", "movie-genres", "document-types"],
|
|
53
|
+
),
|
|
54
|
+
item: LabelSet = Body(...),
|
|
55
|
+
):
|
|
178
56
|
if item.title is None:
|
|
179
57
|
item.title = labelset
|
|
180
58
|
|