nucliadb 6.2.1.post2842__py3-none-any.whl → 6.2.1.post2855__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- migrations/0029_backfill_field_status.py +149 -0
- nucliadb/common/datamanagers/fields.py +46 -7
- nucliadb/ingest/fields/base.py +20 -1
- nucliadb/ingest/orm/resource.py +85 -0
- nucliadb/search/search/graph_strategy.py +3 -1
- nucliadb/writer/api/v1/field.py +7 -1
- nucliadb/writer/api/v1/resource.py +10 -3
- nucliadb/writer/api/v1/upload.py +3 -3
- nucliadb/writer/resource/field.py +26 -1
- {nucliadb-6.2.1.post2842.dist-info → nucliadb-6.2.1.post2855.dist-info}/METADATA +5 -5
- {nucliadb-6.2.1.post2842.dist-info → nucliadb-6.2.1.post2855.dist-info}/RECORD +15 -14
- {nucliadb-6.2.1.post2842.dist-info → nucliadb-6.2.1.post2855.dist-info}/WHEEL +0 -0
- {nucliadb-6.2.1.post2842.dist-info → nucliadb-6.2.1.post2855.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.2.1.post2842.dist-info → nucliadb-6.2.1.post2855.dist-info}/top_level.txt +0 -0
- {nucliadb-6.2.1.post2842.dist-info → nucliadb-6.2.1.post2855.dist-info}/zip-safe +0 -0
@@ -0,0 +1,149 @@
|
|
1
|
+
# Copyright (C) 2021 Bosutech XXI S.L.
|
2
|
+
#
|
3
|
+
# nucliadb is offered under the AGPL v3.0 and as commercial software.
|
4
|
+
# For commercial licensing, contact us at info@nuclia.com.
|
5
|
+
#
|
6
|
+
# AGPL:
|
7
|
+
# This program is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU Affero General Public License as
|
9
|
+
# published by the Free Software Foundation, either version 3 of the
|
10
|
+
# License, or (at your option) any later version.
|
11
|
+
#
|
12
|
+
# This program is distributed in the hope that it will be useful,
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
+
# GNU Affero General Public License for more details.
|
16
|
+
#
|
17
|
+
# You should have received a copy of the GNU Affero General Public License
|
18
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
+
#
|
20
|
+
|
21
|
+
"""Migration #29
|
22
|
+
|
23
|
+
Backfill field status (from error)
|
24
|
+
"""
|
25
|
+
|
26
|
+
import logging
|
27
|
+
from typing import Optional
|
28
|
+
|
29
|
+
from nucliadb.migrator.context import ExecutionContext
|
30
|
+
from nucliadb_protos import resources_pb2, writer_pb2
|
31
|
+
|
32
|
+
logger = logging.getLogger(__name__)
|
33
|
+
|
34
|
+
|
35
|
+
async def migrate(context: ExecutionContext) -> None:
|
36
|
+
start: Optional[str] = ""
|
37
|
+
while True:
|
38
|
+
if start is None:
|
39
|
+
break
|
40
|
+
start = await do_batch(context, start)
|
41
|
+
|
42
|
+
|
43
|
+
async def migrate_kb(context: ExecutionContext, kbid: str) -> None: ...
|
44
|
+
|
45
|
+
|
46
|
+
async def do_batch(context: ExecutionContext, start: str) -> Optional[str]:
|
47
|
+
logger.info(f"Running batch from {start}")
|
48
|
+
async with context.kv_driver.transaction(read_only=False) as txn:
|
49
|
+
async with txn.connection.cursor() as cur: # type: ignore
|
50
|
+
# Retrieve a batch of fields
|
51
|
+
await cur.execute(
|
52
|
+
"""
|
53
|
+
SELECT key FROM resources
|
54
|
+
WHERE key ~ '^/kbs/[^/]*/r/[^/]*/f/[^/]*/[^/]*$'
|
55
|
+
AND key > %s
|
56
|
+
ORDER BY key
|
57
|
+
LIMIT 500""",
|
58
|
+
(start,),
|
59
|
+
)
|
60
|
+
records = await cur.fetchall()
|
61
|
+
if len(records) == 0:
|
62
|
+
return None
|
63
|
+
|
64
|
+
field_keys = [r[0] for r in records]
|
65
|
+
|
66
|
+
# Retrieve resources basic (to check status)
|
67
|
+
resource_keys = set(["/".join(f.split("/")[:5]) for f in field_keys])
|
68
|
+
await cur.execute(
|
69
|
+
"""
|
70
|
+
SELECT key, value FROM resources
|
71
|
+
WHERE key = ANY (%s)
|
72
|
+
ORDER BY key
|
73
|
+
""",
|
74
|
+
(list(resource_keys),),
|
75
|
+
)
|
76
|
+
records = await cur.fetchall()
|
77
|
+
resources_basic = {}
|
78
|
+
for k, v in records:
|
79
|
+
row_basic = resources_pb2.Basic()
|
80
|
+
row_basic.ParseFromString(v)
|
81
|
+
resources_basic[k] = row_basic
|
82
|
+
|
83
|
+
# Retrieve field errors
|
84
|
+
await cur.execute(
|
85
|
+
"""
|
86
|
+
SELECT key, value FROM resources
|
87
|
+
WHERE key ~ '^/kbs/[^/]*/r/[^/]*/f/[^/]*/[^/]*/error$'
|
88
|
+
AND key > %s AND key <= %s
|
89
|
+
ORDER BY key
|
90
|
+
""",
|
91
|
+
(start, field_keys[-1] + "/error"),
|
92
|
+
)
|
93
|
+
records = await cur.fetchall()
|
94
|
+
errors = {}
|
95
|
+
for k, v in records:
|
96
|
+
row_error = writer_pb2.Error()
|
97
|
+
row_error.ParseFromString(v)
|
98
|
+
errors[k] = row_error
|
99
|
+
|
100
|
+
# Retrieve existing status keys
|
101
|
+
await cur.execute(
|
102
|
+
"""
|
103
|
+
SELECT key FROM resources
|
104
|
+
WHERE key ~ '^/kbs/[^/]*/r/[^/]*/f/[^/]*/[^/]*/status$'
|
105
|
+
AND key > %s AND key <= %s
|
106
|
+
ORDER BY key
|
107
|
+
""",
|
108
|
+
(start, field_keys[-1] + "/status"),
|
109
|
+
)
|
110
|
+
records = await cur.fetchall()
|
111
|
+
has_status = [r[0] for r in records]
|
112
|
+
|
113
|
+
set_batch = []
|
114
|
+
for field_key in field_keys:
|
115
|
+
if field_key + "/status" in has_status:
|
116
|
+
# Already has status, skip
|
117
|
+
continue
|
118
|
+
|
119
|
+
resource_key = "/".join(field_key.split("/")[:5])
|
120
|
+
basic = resources_basic.get(resource_key, None)
|
121
|
+
if basic is None:
|
122
|
+
logger.warn(f"{field_key} resource has no basic, skipped")
|
123
|
+
continue
|
124
|
+
|
125
|
+
status = writer_pb2.FieldStatus()
|
126
|
+
status.status = writer_pb2.FieldStatus.Status.PROCESSED
|
127
|
+
error = errors.get(field_key + "/error", None)
|
128
|
+
# We only copy errors if they come from data augmentation or if the resource is in error
|
129
|
+
# This way we ensure we do not set an error for resources that were previously not in error
|
130
|
+
# There is no way to do this 100% accurate since the /error key is only cleared on field deletion
|
131
|
+
if error:
|
132
|
+
if (
|
133
|
+
error.code == writer_pb2.Error.ErrorCode.DATAAUGMENTATION
|
134
|
+
or basic.metadata.status == resources_pb2.Metadata.Status.ERROR
|
135
|
+
):
|
136
|
+
field_error = writer_pb2.FieldError(
|
137
|
+
source_error=error,
|
138
|
+
)
|
139
|
+
status.errors.append(field_error)
|
140
|
+
status.status = writer_pb2.FieldStatus.Status.ERROR
|
141
|
+
set_batch.append((field_key + "/status", status.SerializeToString()))
|
142
|
+
|
143
|
+
# Write everything to the database in batch
|
144
|
+
async with cur.copy("COPY resources (key, value) FROM STDIN") as copy:
|
145
|
+
for row in set_batch:
|
146
|
+
await copy.write_row(row)
|
147
|
+
await txn.commit()
|
148
|
+
|
149
|
+
return field_keys[-1]
|
@@ -23,11 +23,13 @@ from typing import Optional
|
|
23
23
|
from google.protobuf.message import Message
|
24
24
|
|
25
25
|
from nucliadb.common.datamanagers.utils import get_kv_pb
|
26
|
+
from nucliadb.common.ids import FIELD_TYPE_PB_TO_STR
|
26
27
|
from nucliadb.common.maindb.driver import Transaction
|
27
28
|
from nucliadb_protos import writer_pb2
|
28
29
|
|
29
30
|
KB_RESOURCE_FIELD = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}"
|
30
31
|
KB_RESOURCE_FIELD_ERROR = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}/error"
|
32
|
+
KB_RESOURCE_FIELD_STATUS = "/kbs/{kbid}/r/{uuid}/f/{type}/{field}/status"
|
31
33
|
|
32
34
|
|
33
35
|
async def get_raw(
|
@@ -52,13 +54,7 @@ async def set(
|
|
52
54
|
|
53
55
|
async def delete(txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str):
|
54
56
|
base_key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
55
|
-
|
56
|
-
keys_to_delete = []
|
57
|
-
async for key in txn.keys(base_key):
|
58
|
-
keys_to_delete.append(key)
|
59
|
-
|
60
|
-
for key in keys_to_delete:
|
61
|
-
await txn.delete(key)
|
57
|
+
await txn.delete_by_prefix(base_key)
|
62
58
|
|
63
59
|
|
64
60
|
# Error
|
@@ -82,3 +78,46 @@ async def set_error(
|
|
82
78
|
):
|
83
79
|
key = KB_RESOURCE_FIELD_ERROR.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
84
80
|
await txn.set(key, error.SerializeToString())
|
81
|
+
|
82
|
+
|
83
|
+
# Status, replaces error
|
84
|
+
|
85
|
+
|
86
|
+
async def get_status(
|
87
|
+
txn: Transaction, *, kbid: str, rid: str, field_type: str, field_id: str
|
88
|
+
) -> Optional[writer_pb2.FieldStatus]:
|
89
|
+
key = KB_RESOURCE_FIELD_STATUS.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
90
|
+
return await get_kv_pb(txn, key, writer_pb2.FieldStatus)
|
91
|
+
|
92
|
+
|
93
|
+
async def get_statuses(
|
94
|
+
txn: Transaction, *, kbid: str, rid: str, fields: list[writer_pb2.FieldID]
|
95
|
+
) -> list[writer_pb2.FieldStatus]:
|
96
|
+
keys = [
|
97
|
+
KB_RESOURCE_FIELD_STATUS.format(
|
98
|
+
kbid=kbid, uuid=rid, type=FIELD_TYPE_PB_TO_STR[fid.field_type], field=fid.field
|
99
|
+
)
|
100
|
+
for fid in fields
|
101
|
+
]
|
102
|
+
serialized = await txn.batch_get(keys, for_update=False)
|
103
|
+
statuses = []
|
104
|
+
for serialized_status in serialized:
|
105
|
+
pb = writer_pb2.FieldStatus()
|
106
|
+
if serialized_status is not None:
|
107
|
+
pb.ParseFromString(serialized_status)
|
108
|
+
statuses.append(pb)
|
109
|
+
|
110
|
+
return statuses
|
111
|
+
|
112
|
+
|
113
|
+
async def set_status(
|
114
|
+
txn: Transaction,
|
115
|
+
*,
|
116
|
+
kbid: str,
|
117
|
+
rid: str,
|
118
|
+
field_type: str,
|
119
|
+
field_id: str,
|
120
|
+
status: writer_pb2.FieldStatus,
|
121
|
+
):
|
122
|
+
key = KB_RESOURCE_FIELD_STATUS.format(kbid=kbid, uuid=rid, type=field_type, field=field_id)
|
123
|
+
await txn.set(key, status.SerializeToString())
|
nucliadb/ingest/fields/base.py
CHANGED
@@ -41,7 +41,7 @@ from nucliadb_protos.resources_pb2 import (
|
|
41
41
|
QuestionAnswers,
|
42
42
|
)
|
43
43
|
from nucliadb_protos.utils_pb2 import ExtractedText, VectorObject
|
44
|
-
from nucliadb_protos.writer_pb2 import Error
|
44
|
+
from nucliadb_protos.writer_pb2 import Error, FieldStatus
|
45
45
|
from nucliadb_utils.storages.storage import Storage, StorageField
|
46
46
|
|
47
47
|
SUBFIELDFIELDS = ("c",)
|
@@ -215,6 +215,25 @@ class Field(Generic[PbType]):
|
|
215
215
|
error=error,
|
216
216
|
)
|
217
217
|
|
218
|
+
async def get_status(self) -> Optional[FieldStatus]:
|
219
|
+
return await datamanagers.fields.get_status(
|
220
|
+
self.resource.txn,
|
221
|
+
kbid=self.kbid,
|
222
|
+
rid=self.uuid,
|
223
|
+
field_type=self.type,
|
224
|
+
field_id=self.id,
|
225
|
+
)
|
226
|
+
|
227
|
+
async def set_status(self, status: FieldStatus) -> None:
|
228
|
+
await datamanagers.fields.set_status(
|
229
|
+
self.resource.txn,
|
230
|
+
kbid=self.kbid,
|
231
|
+
rid=self.uuid,
|
232
|
+
field_type=self.type,
|
233
|
+
field_id=self.id,
|
234
|
+
status=status,
|
235
|
+
)
|
236
|
+
|
218
237
|
async def get_question_answers(self, force=False) -> Optional[FieldQuestionAnswers]:
|
219
238
|
if self.question_answers is None or force:
|
220
239
|
sf = self.get_storage_field(FieldTypes.QUESTION_ANSWERS)
|
nucliadb/ingest/orm/resource.py
CHANGED
@@ -21,6 +21,7 @@ from __future__ import annotations
|
|
21
21
|
|
22
22
|
import asyncio
|
23
23
|
import logging
|
24
|
+
from collections import defaultdict
|
24
25
|
from concurrent.futures import ThreadPoolExecutor
|
25
26
|
from functools import partial
|
26
27
|
from typing import TYPE_CHECKING, Any, AsyncIterator, MutableMapping, Optional, Type
|
@@ -534,6 +535,86 @@ class Resource:
|
|
534
535
|
errors=message.errors, # type: ignore
|
535
536
|
)
|
536
537
|
|
538
|
+
@processor_observer.wrap({"type": "apply_fields_status"})
|
539
|
+
async def apply_fields_status(self, message: BrokerMessage, updated_fields: list[FieldID]):
|
540
|
+
# Dictionary of all errors per field (we may have several due to DA tasks)
|
541
|
+
errors_by_field: dict[tuple[FieldType.ValueType, str], list[writer_pb2.Error]] = defaultdict(
|
542
|
+
list
|
543
|
+
)
|
544
|
+
|
545
|
+
# Make sure if a file is updated without errors, it ends up in errors_by_field
|
546
|
+
for field_id in updated_fields:
|
547
|
+
errors_by_field[(field_id.field_type, field_id.field)] = []
|
548
|
+
for fs in message.field_statuses:
|
549
|
+
errors_by_field[(fs.id.field_type, fs.id.field)] = []
|
550
|
+
|
551
|
+
for error in message.errors:
|
552
|
+
errors_by_field[(error.field_type, error.field)].append(error)
|
553
|
+
|
554
|
+
# If this message comes from the processor (not a DA worker), we clear all previous errors
|
555
|
+
# TODO: When generated_by is populated with DA tasks by processor, remove only related errors
|
556
|
+
from_processor = any((x.WhichOneof("generator") == "processor" for x in message.generated_by))
|
557
|
+
|
558
|
+
for (field_type, field), errors in errors_by_field.items():
|
559
|
+
field_obj = await self.get_field(field, field_type, load=False)
|
560
|
+
if from_processor:
|
561
|
+
status = writer_pb2.FieldStatus()
|
562
|
+
else:
|
563
|
+
status = await field_obj.get_status() or writer_pb2.FieldStatus()
|
564
|
+
|
565
|
+
for error in errors:
|
566
|
+
field_error = writer_pb2.FieldError(
|
567
|
+
source_error=error,
|
568
|
+
)
|
569
|
+
field_error.created.GetCurrentTime()
|
570
|
+
status.errors.append(field_error)
|
571
|
+
|
572
|
+
# We infer the status for processor messages
|
573
|
+
if message.source == BrokerMessage.MessageSource.PROCESSOR:
|
574
|
+
if len(errors) > 0:
|
575
|
+
status.status = writer_pb2.FieldStatus.Status.ERROR
|
576
|
+
else:
|
577
|
+
status.status = writer_pb2.FieldStatus.Status.PROCESSED
|
578
|
+
else:
|
579
|
+
field_status = next(
|
580
|
+
(
|
581
|
+
fs.status
|
582
|
+
for fs in message.field_statuses
|
583
|
+
if fs.id.field_type == field_type and fs.id.field == field
|
584
|
+
),
|
585
|
+
None,
|
586
|
+
)
|
587
|
+
if field_status:
|
588
|
+
status.status = field_status
|
589
|
+
|
590
|
+
await field_obj.set_status(status)
|
591
|
+
|
592
|
+
async def update_status(self):
|
593
|
+
field_ids = await self.get_all_field_ids(for_update=False)
|
594
|
+
field_statuses = await datamanagers.fields.get_statuses(
|
595
|
+
self.txn, kbid=self.kb.kbid, rid=self.uuid, fields=field_ids.fields
|
596
|
+
)
|
597
|
+
# If any field is processing -> PENDING
|
598
|
+
if any((f.status == writer_pb2.FieldStatus.Status.PENDING for f in field_statuses)):
|
599
|
+
self.basic.metadata.status = PBMetadata.Status.PENDING
|
600
|
+
# If we have any non-DA error -> ERROR
|
601
|
+
elif any(
|
602
|
+
(
|
603
|
+
f.status == writer_pb2.FieldStatus.Status.ERROR
|
604
|
+
and any(
|
605
|
+
(
|
606
|
+
e.source_error.code != writer_pb2.Error.ErrorCode.DATAAUGMENTATION
|
607
|
+
for e in f.errors
|
608
|
+
)
|
609
|
+
)
|
610
|
+
for f in field_statuses
|
611
|
+
)
|
612
|
+
):
|
613
|
+
self.basic.metadata.status = PBMetadata.Status.ERROR
|
614
|
+
# Otherwise (everything processed or we only have DA errors) -> PROCESSED
|
615
|
+
else:
|
616
|
+
self.basic.metadata.status = PBMetadata.Status.PROCESSED
|
617
|
+
|
537
618
|
@processor_observer.wrap({"type": "apply_extracted"})
|
538
619
|
async def apply_extracted(self, message: BrokerMessage):
|
539
620
|
errors = False
|
@@ -563,6 +644,10 @@ class Resource:
|
|
563
644
|
for extracted_text in message.extracted_text:
|
564
645
|
await self._apply_extracted_text(extracted_text)
|
565
646
|
|
647
|
+
# TODO: Update field and resource status depending on processing results
|
648
|
+
await self.apply_fields_status(message, self._modified_extracted_text)
|
649
|
+
# await self.update_status()
|
650
|
+
|
566
651
|
extracted_languages = []
|
567
652
|
|
568
653
|
for link_extracted_data in message.link_extracted_data:
|
@@ -311,11 +311,12 @@ async def get_graph_results(
|
|
311
311
|
) -> tuple[KnowledgeboxFindResults, QueryParser]:
|
312
312
|
relations = Relations(entities={})
|
313
313
|
explored_entities: set[str] = set()
|
314
|
+
scores: dict[str, list[float]] = {}
|
314
315
|
predict = get_predict()
|
315
316
|
|
316
317
|
for hop in range(graph_strategy.hops):
|
317
318
|
entities_to_explore: Iterable[RelationNode] = []
|
318
|
-
|
319
|
+
|
319
320
|
if hop == 0:
|
320
321
|
# Get the entities from the query
|
321
322
|
with metrics.time("graph_strat_query_entities"):
|
@@ -415,6 +416,7 @@ async def get_graph_results(
|
|
415
416
|
capture_exception(e)
|
416
417
|
logger.exception("Error in ranking relations for graph strategy")
|
417
418
|
relations = Relations(entities={})
|
419
|
+
scores = {}
|
418
420
|
break
|
419
421
|
|
420
422
|
# Get the text blocks of the paragraphs that contain the top relations
|
nucliadb/writer/api/v1/field.py
CHANGED
@@ -55,7 +55,7 @@ from nucliadb_models.utils import FieldIdString
|
|
55
55
|
from nucliadb_models.writer import ResourceFieldAdded, ResourceUpdated
|
56
56
|
from nucliadb_protos import resources_pb2
|
57
57
|
from nucliadb_protos.resources_pb2 import FieldID, Metadata
|
58
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
58
|
+
from nucliadb_protos.writer_pb2 import BrokerMessage, FieldIDStatus, FieldStatus
|
59
59
|
from nucliadb_utils.authentication import requires
|
60
60
|
from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError
|
61
61
|
from nucliadb_utils.utilities import (
|
@@ -553,6 +553,12 @@ async def reprocess_file_field(
|
|
553
553
|
writer.source = BrokerMessage.MessageSource.WRITER
|
554
554
|
writer.basic.metadata.useful = True
|
555
555
|
writer.basic.metadata.status = Metadata.Status.PENDING
|
556
|
+
writer.field_statuses.append(
|
557
|
+
FieldIDStatus(
|
558
|
+
id=FieldID(field_type=resources_pb2.FieldType.FILE, field=field_id),
|
559
|
+
status=FieldStatus.Status.PENDING,
|
560
|
+
)
|
561
|
+
)
|
556
562
|
await transaction.commit(writer, partition, wait=False)
|
557
563
|
# Send current resource to reprocess.
|
558
564
|
try:
|
@@ -63,8 +63,8 @@ from nucliadb_models.writer import (
|
|
63
63
|
ResourceUpdated,
|
64
64
|
UpdateResourcePayload,
|
65
65
|
)
|
66
|
-
from nucliadb_protos.resources_pb2 import Metadata
|
67
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage, IndexResource
|
66
|
+
from nucliadb_protos.resources_pb2 import FieldID, Metadata
|
67
|
+
from nucliadb_protos.writer_pb2 import BrokerMessage, FieldIDStatus, FieldStatus, IndexResource
|
68
68
|
from nucliadb_telemetry.errors import capture_exception
|
69
69
|
from nucliadb_utils.authentication import requires
|
70
70
|
from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError
|
@@ -422,6 +422,7 @@ async def _reprocess_resource(
|
|
422
422
|
storage = await get_storage(service_name=SERVICE_NAME)
|
423
423
|
driver = get_driver()
|
424
424
|
|
425
|
+
writer = BrokerMessage()
|
425
426
|
async with driver.transaction() as txn:
|
426
427
|
kb = KnowledgeBox(txn, storage, kbid)
|
427
428
|
|
@@ -430,8 +431,14 @@ async def _reprocess_resource(
|
|
430
431
|
raise HTTPException(status_code=404, detail="Resource does not exist")
|
431
432
|
|
432
433
|
await extract_fields(resource=resource, toprocess=toprocess)
|
434
|
+
for field_type, field_id in resource.fields.keys():
|
435
|
+
writer.field_statuses.append(
|
436
|
+
FieldIDStatus(
|
437
|
+
id=FieldID(field_type=field_type, field=field_id),
|
438
|
+
status=FieldStatus.Status.PENDING,
|
439
|
+
)
|
440
|
+
)
|
433
441
|
|
434
|
-
writer = BrokerMessage()
|
435
442
|
writer.kbid = kbid
|
436
443
|
writer.uuid = rid
|
437
444
|
writer.source = BrokerMessage.MessageSource.WRITER
|
nucliadb/writer/api/v1/upload.py
CHANGED
@@ -74,7 +74,7 @@ from nucliadb_utils.utilities import (
|
|
74
74
|
get_storage,
|
75
75
|
)
|
76
76
|
|
77
|
-
from .router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX, api
|
77
|
+
from .router import KB_PREFIX, RESOURCE_PREFIX, RESOURCES_PREFIX, RSLUG_PREFIX, api
|
78
78
|
|
79
79
|
TUS_HEADERS = {
|
80
80
|
"Tus-Resumable": "1.0.0",
|
@@ -544,8 +544,8 @@ async def _tus_patch(
|
|
544
544
|
raise AttributeError()
|
545
545
|
path = await storage_manager.finish(dm)
|
546
546
|
headers["Tus-Upload-Finished"] = "1"
|
547
|
-
headers["NDB-Resource"] = f"/{KB_PREFIX}/{kbid}/
|
548
|
-
headers["NDB-Field"] = f"/{KB_PREFIX}/{kbid}/
|
547
|
+
headers["NDB-Resource"] = f"/{KB_PREFIX}/{kbid}/{RESOURCES_PREFIX}/{rid}"
|
548
|
+
headers["NDB-Field"] = f"/{KB_PREFIX}/{kbid}/{RESOURCES_PREFIX}/{rid}/field/{field}"
|
549
549
|
|
550
550
|
item_payload = dm.get("item")
|
551
551
|
creation_payload = None
|
@@ -37,7 +37,7 @@ from nucliadb_models.writer import (
|
|
37
37
|
UpdateResourcePayload,
|
38
38
|
)
|
39
39
|
from nucliadb_protos import resources_pb2
|
40
|
-
from nucliadb_protos.writer_pb2 import BrokerMessage
|
40
|
+
from nucliadb_protos.writer_pb2 import BrokerMessage, FieldIDStatus, FieldStatus
|
41
41
|
from nucliadb_utils.storages.storage import StorageField
|
42
42
|
from nucliadb_utils.utilities import get_storage
|
43
43
|
|
@@ -186,6 +186,12 @@ def parse_text_field(
|
|
186
186
|
format=getattr(models.PushTextFormat, text_field.format.value),
|
187
187
|
extract_strategy=text_field.extract_strategy,
|
188
188
|
)
|
189
|
+
writer.field_statuses.append(
|
190
|
+
FieldIDStatus(
|
191
|
+
id=resources_pb2.FieldID(field_type=resources_pb2.FieldType.TEXT, field=key),
|
192
|
+
status=FieldStatus.Status.PENDING,
|
193
|
+
)
|
194
|
+
)
|
189
195
|
|
190
196
|
|
191
197
|
async def parse_file_field(
|
@@ -204,6 +210,13 @@ async def parse_file_field(
|
|
204
210
|
key, file_field, writer, toprocess, kbid, uuid, skip_store=skip_store
|
205
211
|
)
|
206
212
|
|
213
|
+
writer.field_statuses.append(
|
214
|
+
FieldIDStatus(
|
215
|
+
id=resources_pb2.FieldID(field_type=resources_pb2.FieldType.FILE, field=key),
|
216
|
+
status=FieldStatus.Status.PENDING,
|
217
|
+
)
|
218
|
+
)
|
219
|
+
|
207
220
|
|
208
221
|
async def parse_internal_file_field(
|
209
222
|
key: str,
|
@@ -310,6 +323,12 @@ def parse_link_field(
|
|
310
323
|
xpath=link_field.xpath,
|
311
324
|
extract_strategy=link_field.extract_strategy,
|
312
325
|
)
|
326
|
+
writer.field_statuses.append(
|
327
|
+
FieldIDStatus(
|
328
|
+
id=resources_pb2.FieldID(field_type=resources_pb2.FieldType.LINK, field=key),
|
329
|
+
status=FieldStatus.Status.PENDING,
|
330
|
+
)
|
331
|
+
)
|
313
332
|
|
314
333
|
|
315
334
|
async def parse_conversation_field(
|
@@ -385,3 +404,9 @@ async def parse_conversation_field(
|
|
385
404
|
|
386
405
|
toprocess.conversationfield[key] = convs
|
387
406
|
writer.conversations[key].CopyFrom(field_value)
|
407
|
+
writer.field_statuses.append(
|
408
|
+
FieldIDStatus(
|
409
|
+
id=resources_pb2.FieldID(field_type=resources_pb2.FieldType.CONVERSATION, field=key),
|
410
|
+
status=FieldStatus.Status.PENDING,
|
411
|
+
)
|
412
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.2.1.
|
3
|
+
Version: 6.2.1.post2855
|
4
4
|
Home-page: https://docs.nuclia.dev/docs/management/nucliadb/intro
|
5
5
|
Author: NucliaDB Community
|
6
6
|
Author-email: nucliadb@nuclia.com
|
@@ -22,10 +22,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
22
22
|
Classifier: Programming Language :: Python :: 3 :: Only
|
23
23
|
Requires-Python: >=3.9, <4
|
24
24
|
Description-Content-Type: text/markdown
|
25
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.
|
26
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.
|
27
|
-
Requires-Dist: nucliadb-protos>=6.2.1.
|
28
|
-
Requires-Dist: nucliadb-models>=6.2.1.
|
25
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.2.1.post2855
|
26
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.2.1.post2855
|
27
|
+
Requires-Dist: nucliadb-protos>=6.2.1.post2855
|
28
|
+
Requires-Dist: nucliadb-models>=6.2.1.post2855
|
29
29
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
30
30
|
Requires-Dist: nucliadb-node-binding>=2.26.0
|
31
31
|
Requires-Dist: nuclia-models>=0.24.2
|
@@ -24,6 +24,7 @@ migrations/0025_assign_models_to_kbs_v2.py,sha256=QC6nDF2Wyc6zQMqNoKzvz-3507UpDy
|
|
24
24
|
migrations/0026_fix_high_cardinality_content_types.py,sha256=BsbBkvZDzjRHQfoouZNNtHA1xMxTKm8wOVnp_WAS9j4,2322
|
25
25
|
migrations/0027_rollover_texts3.py,sha256=UQDaMOayVuqDisf82NDrPStoEVveHvdjkSmzbIcU9o4,2730
|
26
26
|
migrations/0028_extracted_vectors_reference.py,sha256=49DHCIlBpjofU8cYVHTdWv0EBIlnPTWV2WCezf0rJUo,2392
|
27
|
+
migrations/0029_backfill_field_status.py,sha256=QWF69n1da9lpRnbEpgbqPjSQ-Wfn6rMC7Enz6bBYGt4,5663
|
27
28
|
migrations/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
28
29
|
migrations/pg/0001_bootstrap.py,sha256=Fsqkeof50m7fKiJN05kmNEMwiKDlOrAgcAS5sLLkutA,1256
|
29
30
|
migrations/pg/0002_catalog.py,sha256=Rsleecu351Ty19kYZgOpqX5G3MEAY8nMxCJrAeuS2Mw,1690
|
@@ -70,7 +71,7 @@ nucliadb/common/datamanagers/atomic.py,sha256=DU7RihO8WaGNuh_GTEpQ-8hkoinY5GSpNS
|
|
70
71
|
nucliadb/common/datamanagers/cluster.py,sha256=psTwAWSLj83vhFnC1iJJ6holrolAI4nKos9PuEWspYY,1500
|
71
72
|
nucliadb/common/datamanagers/entities.py,sha256=hqw4YcEOumGK_1vgNNfxP-WafHvWN5jf61n4U01WJtc,5311
|
72
73
|
nucliadb/common/datamanagers/exceptions.py,sha256=Atz_PP_GGq4jgJaWcAkcRbHBoBaGcC9yJvFteylKtTE,883
|
73
|
-
nucliadb/common/datamanagers/fields.py,sha256=
|
74
|
+
nucliadb/common/datamanagers/fields.py,sha256=QqWVFqLp58Ib9fadXkSBuaYU-Mo6VJHZWg-taUqreNM,3915
|
74
75
|
nucliadb/common/datamanagers/kb.py,sha256=P7EhF4tApIUG2jw_HH1oMufTKG9__kuOLKnrCNGbDM4,6156
|
75
76
|
nucliadb/common/datamanagers/labels.py,sha256=Zm0GQpSPoGXEEysUY7VsDIcyKSIIQsMVphj23IyM9_c,4502
|
76
77
|
nucliadb/common/datamanagers/processing.py,sha256=ByxdZzdbAfJGqC6__mY-zryjk040TyQfcUq3rxujeoY,1587
|
@@ -127,7 +128,7 @@ nucliadb/ingest/consumer/service.py,sha256=EZM1sABW_7bj6j2UgKUHUuK-EGIEYnLdtPAn8
|
|
127
128
|
nucliadb/ingest/consumer/shard_creator.py,sha256=19wf-Bu_9hb_muCDVblamWuvLr09e5dMu9Id5I4-rGw,4324
|
128
129
|
nucliadb/ingest/consumer/utils.py,sha256=jpX8D4lKzuPCpArQLZeX_Zczq3pfen_zAf8sPJfOEZU,2642
|
129
130
|
nucliadb/ingest/fields/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
130
|
-
nucliadb/ingest/fields/base.py,sha256=
|
131
|
+
nucliadb/ingest/fields/base.py,sha256=GuyZaumtaaAzoy86-mewBMhNX9DaSDBIK4pCqrxZlDc,19836
|
131
132
|
nucliadb/ingest/fields/conversation.py,sha256=OcQOHvi72Pm0OyNGwxLo9gONo8f1NhwASq0_gS-E64A,7021
|
132
133
|
nucliadb/ingest/fields/exceptions.py,sha256=LBZ-lw11f42Pk-ck-NSN9mSJ2kOw-NeRwb-UE31ILTQ,1171
|
133
134
|
nucliadb/ingest/fields/file.py,sha256=1v4jLg3balUua2VmSV8hHkAwPFShTUCOzufZvIUQcQw,4740
|
@@ -141,7 +142,7 @@ nucliadb/ingest/orm/entities.py,sha256=2PslT1FZ6yCvJtjR0UpKTSzxJrtS-C_gZx4ZTWHun
|
|
141
142
|
nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
|
142
143
|
nucliadb/ingest/orm/knowledgebox.py,sha256=UpWJrVaVfCtk8R4qfSR6h6vzwOKXa8Teuwkna5QSljE,24508
|
143
144
|
nucliadb/ingest/orm/metrics.py,sha256=OkwMSPKLZcKba0ZTwtTiIxwBgaLMX5ydhGieKvi2y7E,1096
|
144
|
-
nucliadb/ingest/orm/resource.py,sha256=
|
145
|
+
nucliadb/ingest/orm/resource.py,sha256=jvaKLsTlHtmIWUjjWCu8XBF7qQl5hoUihAa8sHDpLV8,59540
|
145
146
|
nucliadb/ingest/orm/utils.py,sha256=vCe_9UxHu26JDFGLwQ0wH-XyzJIpQCTK-Ow9dtZR5Vg,2716
|
146
147
|
nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
|
147
148
|
nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
|
@@ -219,7 +220,7 @@ nucliadb/search/search/fetch.py,sha256=XJHIFnZmXM_8Kb37lb4lg1GYG7cZ1plT-qAIb_Qzi
|
|
219
220
|
nucliadb/search/search/filters.py,sha256=1MkHlJjAQqoRCj7e5cEzK2HvBxGLE17I_omsjiklbtw,6476
|
220
221
|
nucliadb/search/search/find.py,sha256=yQbttt85wQFc4NEaj2RNGgozP7IQx_bjAOhHke3fXY0,9890
|
221
222
|
nucliadb/search/search/find_merge.py,sha256=_R_YpHAZv5BHh3XABQ8MRd1Ci0seclGYf26yJHJ7H0I,17178
|
222
|
-
nucliadb/search/search/graph_strategy.py,sha256=
|
223
|
+
nucliadb/search/search/graph_strategy.py,sha256=6d-KjGDbOnaXQzEwyBpA-iQM0rkveVTiK3A3m2UJq8Q,33538
|
223
224
|
nucliadb/search/search/hydrator.py,sha256=-R37gCrGxkyaiHQalnTWHNG_FCx11Zucd7qA1vQCxuw,6985
|
224
225
|
nucliadb/search/search/merge.py,sha256=g0PxUejWtYIYWG-VroArMCgwB6AOp3lZMkzoHAPYBKE,22183
|
225
226
|
nucliadb/search/search/metrics.py,sha256=81X-tahGW4n2CLvUzCPdNxNClmZqUWZjcVOGCUHoiUM,2872
|
@@ -314,19 +315,19 @@ nucliadb/writer/api/constants.py,sha256=qWEDjFUycrEZnSJyLnNK4PQNodU2oVmkO4NycaEZ
|
|
314
315
|
nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,1313
|
315
316
|
nucliadb/writer/api/v1/__init__.py,sha256=FVn7N9VJ6bsEoy4TRnkclr4Umd5hECiwPXVqRnJ8BME,1095
|
316
317
|
nucliadb/writer/api/v1/export_import.py,sha256=6_gn0-emCjmK6bCUX5kgMvG0qkZr4HlfGmBXhhngsxo,8243
|
317
|
-
nucliadb/writer/api/v1/field.py,sha256=
|
318
|
+
nucliadb/writer/api/v1/field.py,sha256=OsWOYA0WQ6onE5Rkl20QIEdtrSi7Jgnu62fUt90Ziy8,17503
|
318
319
|
nucliadb/writer/api/v1/knowledgebox.py,sha256=Mr1vJSWOtiraDdtoTqQ1V2rSirMdojL4wN0Q3cOiX4k,10929
|
319
320
|
nucliadb/writer/api/v1/learning_config.py,sha256=GaYaagjBrVG9ZxrWQyVQfqGMQV3tAJjqJ5CStaKhktU,2058
|
320
|
-
nucliadb/writer/api/v1/resource.py,sha256=
|
321
|
+
nucliadb/writer/api/v1/resource.py,sha256=A8fAHlN5XFsg6XFYKhfWJS8czgNH6yXr-PsnUqz2WUE,18757
|
321
322
|
nucliadb/writer/api/v1/router.py,sha256=RjuoWLpZer6Kl2BW_wznpNo6XL3BOpdTGqXZCn3QrrQ,1034
|
322
323
|
nucliadb/writer/api/v1/services.py,sha256=U8OGxhA1tdt-wxw2uDAjFpwFXFEXSDTfBe1iV5nfmx8,9897
|
323
324
|
nucliadb/writer/api/v1/slug.py,sha256=xlVBDBpRi9bNulpBHZwhyftVvulfE0zFm1XZIWl-AKY,2389
|
324
325
|
nucliadb/writer/api/v1/transaction.py,sha256=d2Vbgnkk_-FLGSTt3vfldwiJIUf0XoyD0wP1jQNz_DY,2430
|
325
|
-
nucliadb/writer/api/v1/upload.py,sha256=
|
326
|
+
nucliadb/writer/api/v1/upload.py,sha256=yfVbIDHZEmFUDLi0Fenv9i-oj1m0JZPyTK7UPlq8Wws,32905
|
326
327
|
nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
327
328
|
nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
|
328
329
|
nucliadb/writer/resource/basic.py,sha256=l9zD-Qiq4eUkHezMf0w1Ksx2izKYLYuNoMIlXcNxxpM,11163
|
329
|
-
nucliadb/writer/resource/field.py,sha256=
|
330
|
+
nucliadb/writer/resource/field.py,sha256=HsOERELyAsb9e0dx2IkSQ9lk0SThALFRcDKCVBw8ifU,15478
|
330
331
|
nucliadb/writer/resource/origin.py,sha256=pvhUDdU0mlWPUcpoQi4LDUJaRtfjzVVrA8XcGVI_N8k,2021
|
331
332
|
nucliadb/writer/tus/__init__.py,sha256=huWpKnDnjsrKlBBJk30ta5vamlA-4x0TbPs_2Up8hyM,5443
|
332
333
|
nucliadb/writer/tus/azure.py,sha256=XhWAlWTM0vmXcXtuEPYjjeEhuZjiZXZu8q9WsJ7omFE,4107
|
@@ -337,9 +338,9 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
337
338
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
338
339
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
339
340
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
340
|
-
nucliadb-6.2.1.
|
341
|
-
nucliadb-6.2.1.
|
342
|
-
nucliadb-6.2.1.
|
343
|
-
nucliadb-6.2.1.
|
344
|
-
nucliadb-6.2.1.
|
345
|
-
nucliadb-6.2.1.
|
341
|
+
nucliadb-6.2.1.post2855.dist-info/METADATA,sha256=F8qPYGVhRM6117pUwuEC5Sk8k8EAjk5jcJ79wjawFxw,4689
|
342
|
+
nucliadb-6.2.1.post2855.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
343
|
+
nucliadb-6.2.1.post2855.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
344
|
+
nucliadb-6.2.1.post2855.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
345
|
+
nucliadb-6.2.1.post2855.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
346
|
+
nucliadb-6.2.1.post2855.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|