nucliadb 6.3.5.post3995__py3-none-any.whl → 6.3.5.post3997__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb/ingest/orm/brain_v2.py +16 -5
- nucliadb/ingest/orm/index_message.py +11 -5
- nucliadb/ingest/orm/metrics.py +12 -1
- nucliadb/ingest/orm/processor/processor.py +4 -4
- {nucliadb-6.3.5.post3995.dist-info → nucliadb-6.3.5.post3997.dist-info}/METADATA +6 -6
- {nucliadb-6.3.5.post3995.dist-info → nucliadb-6.3.5.post3997.dist-info}/RECORD +9 -9
- {nucliadb-6.3.5.post3995.dist-info → nucliadb-6.3.5.post3997.dist-info}/WHEEL +0 -0
- {nucliadb-6.3.5.post3995.dist-info → nucliadb-6.3.5.post3997.dist-info}/entry_points.txt +0 -0
- {nucliadb-6.3.5.post3995.dist-info → nucliadb-6.3.5.post3997.dist-info}/top_level.txt +0 -0
nucliadb/ingest/orm/brain_v2.py
CHANGED
@@ -24,6 +24,7 @@ from typing import Optional
|
|
24
24
|
|
25
25
|
from nucliadb.common import ids
|
26
26
|
from nucliadb.ingest import logger
|
27
|
+
from nucliadb.ingest.orm.metrics import brain_observer as observer
|
27
28
|
from nucliadb.ingest.orm.utils import compute_paragraph_key
|
28
29
|
from nucliadb_models.labels import BASE_LABELS, LABEL_HIDDEN, flatten_resource_labels
|
29
30
|
from nucliadb_models.metadata import ResourceProcessingStatus
|
@@ -74,7 +75,8 @@ class ResourceBrainV2:
|
|
74
75
|
self.brain: PBBrainResource = PBBrainResource(resource=ResourceID(uuid=rid))
|
75
76
|
self.labels: dict[str, set[str]] = deepcopy(BASE_LABELS)
|
76
77
|
|
77
|
-
|
78
|
+
@observer.wrap({"type": "generate_resource_metadata"})
|
79
|
+
def generate_resource_metadata(
|
78
80
|
self,
|
79
81
|
basic: Basic,
|
80
82
|
user_relations: Relations,
|
@@ -89,7 +91,8 @@ class ResourceBrainV2:
|
|
89
91
|
if security is not None:
|
90
92
|
self._set_resource_security(security)
|
91
93
|
|
92
|
-
|
94
|
+
@observer.wrap({"type": "generate_texts"})
|
95
|
+
def generate_texts(
|
93
96
|
self,
|
94
97
|
field_key: str,
|
95
98
|
extracted_text: ExtractedText,
|
@@ -112,6 +115,7 @@ class ResourceBrainV2:
|
|
112
115
|
basic_user_metadata,
|
113
116
|
)
|
114
117
|
|
118
|
+
@observer.wrap({"type": "apply_field_text"})
|
115
119
|
def apply_field_text(
|
116
120
|
self,
|
117
121
|
field_key: str,
|
@@ -131,6 +135,7 @@ class ResourceBrainV2:
|
|
131
135
|
full_field_id = ids.FieldId(rid=self.rid, type=ftype, key=fkey).full()
|
132
136
|
self.brain.texts_to_delete.append(full_field_id)
|
133
137
|
|
138
|
+
@observer.wrap({"type": "apply_field_labels"})
|
134
139
|
def apply_field_labels(
|
135
140
|
self,
|
136
141
|
field_key: str,
|
@@ -200,7 +205,8 @@ class ResourceBrainV2:
|
|
200
205
|
|
201
206
|
self.brain.texts[field_key].labels.extend(flatten_resource_labels(labels))
|
202
207
|
|
203
|
-
|
208
|
+
@observer.wrap({"type": "generate_paragraphs"})
|
209
|
+
def generate_paragraphs(
|
204
210
|
self,
|
205
211
|
field_key: str,
|
206
212
|
field_computed_metadata: FieldComputedMetadata,
|
@@ -228,6 +234,7 @@ class ResourceBrainV2:
|
|
228
234
|
skip_paragraphs=skip_index,
|
229
235
|
)
|
230
236
|
|
237
|
+
@observer.wrap({"type": "apply_field_paragraphs"})
|
231
238
|
def apply_field_paragraphs(
|
232
239
|
self,
|
233
240
|
field_key: str,
|
@@ -371,7 +378,8 @@ class ResourceBrainV2:
|
|
371
378
|
pc.valid.setdefault(paragraph_key, []).append(classif_label)
|
372
379
|
return pc
|
373
380
|
|
374
|
-
|
381
|
+
@observer.wrap({"type": "generate_relations"})
|
382
|
+
def generate_relations(
|
375
383
|
self,
|
376
384
|
field_key: str,
|
377
385
|
field_computed_metadata: Optional[FieldComputedMetadata],
|
@@ -477,7 +485,8 @@ class ResourceBrainV2:
|
|
477
485
|
self.brain.sentences_to_delete.append(full_field_id)
|
478
486
|
self.brain.relation_fields_to_delete.append(field_key)
|
479
487
|
|
480
|
-
|
488
|
+
@observer.wrap({"type": "generate_vectors"})
|
489
|
+
def generate_vectors(
|
481
490
|
self,
|
482
491
|
field_id: str,
|
483
492
|
vo: utils_pb2.VectorObject,
|
@@ -547,6 +556,7 @@ class ResourceBrainV2:
|
|
547
556
|
full_field_id = ids.FieldId(rid=self.rid, type=fid.type, key=fid.key).full()
|
548
557
|
self.brain.vector_prefixes_to_delete[vectorset].items.append(full_field_id)
|
549
558
|
|
559
|
+
@observer.wrap({"type": "apply_field_vector"})
|
550
560
|
def _apply_field_vector(
|
551
561
|
self,
|
552
562
|
field_id: str,
|
@@ -764,6 +774,7 @@ class ParagraphPages:
|
|
764
774
|
self.positions = positions
|
765
775
|
self._materialized = self._materialize_page_numbers(positions)
|
766
776
|
|
777
|
+
@observer.wrap({"type": "materialize_page_numbers"})
|
767
778
|
def _materialize_page_numbers(self, positions: FilePagePositions) -> list[int]:
|
768
779
|
page_numbers_by_index = []
|
769
780
|
for page_number, (page_start, page_end) in positions.items():
|
@@ -26,6 +26,7 @@ from nucliadb.common import datamanagers
|
|
26
26
|
from nucliadb.ingest.fields.exceptions import FieldAuthorNotFound
|
27
27
|
from nucliadb.ingest.fields.file import File
|
28
28
|
from nucliadb.ingest.orm.brain_v2 import ResourceBrainV2 as ResourceBrain
|
29
|
+
from nucliadb.ingest.orm.metrics import index_message_observer as observer
|
29
30
|
from nucliadb.ingest.orm.resource import Resource, get_file_page_positions
|
30
31
|
from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig
|
31
32
|
from nucliadb_protos.noderesources_pb2 import Resource as IndexMessage
|
@@ -40,6 +41,7 @@ class IndexMessageBuilder:
|
|
40
41
|
self.resource = resource
|
41
42
|
self.brain = ResourceBrain(resource.uuid)
|
42
43
|
|
44
|
+
@observer.wrap({"type": "resource_data"})
|
43
45
|
async def _apply_resource_index_data(self, brain: ResourceBrain) -> None:
|
44
46
|
# Set the metadata at the resource level
|
45
47
|
basic = await self.resource.get_basic()
|
@@ -48,7 +50,7 @@ class IndexMessageBuilder:
|
|
48
50
|
origin = await self.resource.get_origin()
|
49
51
|
security = await self.resource.get_security()
|
50
52
|
await asyncio.to_thread(
|
51
|
-
brain.
|
53
|
+
brain.generate_resource_metadata,
|
52
54
|
basic,
|
53
55
|
user_relations,
|
54
56
|
origin,
|
@@ -56,6 +58,7 @@ class IndexMessageBuilder:
|
|
56
58
|
security,
|
57
59
|
)
|
58
60
|
|
61
|
+
@observer.wrap({"type": "field_data"})
|
59
62
|
async def _apply_field_index_data(
|
60
63
|
self,
|
61
64
|
brain: ResourceBrain,
|
@@ -87,7 +90,7 @@ class IndexMessageBuilder:
|
|
87
90
|
except FieldAuthorNotFound:
|
88
91
|
field_author = None
|
89
92
|
await asyncio.to_thread(
|
90
|
-
brain.
|
93
|
+
brain.generate_texts,
|
91
94
|
self.resource.generate_field_id(fieldid),
|
92
95
|
extracted_text,
|
93
96
|
field_computed_metadata,
|
@@ -108,7 +111,7 @@ class IndexMessageBuilder:
|
|
108
111
|
await get_file_page_positions(field) if isinstance(field, File) else None
|
109
112
|
)
|
110
113
|
await asyncio.to_thread(
|
111
|
-
brain.
|
114
|
+
brain.generate_paragraphs,
|
112
115
|
self.resource.generate_field_id(fieldid),
|
113
116
|
field_computed_metadata,
|
114
117
|
extracted_text,
|
@@ -127,7 +130,7 @@ class IndexMessageBuilder:
|
|
127
130
|
if vo is not None:
|
128
131
|
dimension = vectorset_config.vectorset_index_config.vector_dimension
|
129
132
|
await asyncio.to_thread(
|
130
|
-
brain.
|
133
|
+
brain.generate_vectors,
|
131
134
|
self.resource.generate_field_id(fieldid),
|
132
135
|
vo,
|
133
136
|
vectorset=vectorset_config.vectorset_id,
|
@@ -136,7 +139,7 @@ class IndexMessageBuilder:
|
|
136
139
|
)
|
137
140
|
if relations:
|
138
141
|
await asyncio.to_thread(
|
139
|
-
brain.
|
142
|
+
brain.generate_relations,
|
140
143
|
self.resource.generate_field_id(fieldid),
|
141
144
|
field_computed_metadata,
|
142
145
|
basic.usermetadata,
|
@@ -151,6 +154,7 @@ class IndexMessageBuilder:
|
|
151
154
|
for field_id in field_ids:
|
152
155
|
brain.delete_field(self.resource.generate_field_id(field_id))
|
153
156
|
|
157
|
+
@observer.wrap({"type": "writer_bm"})
|
154
158
|
async def for_writer_bm(
|
155
159
|
self,
|
156
160
|
messages: list[BrokerMessage],
|
@@ -192,6 +196,7 @@ class IndexMessageBuilder:
|
|
192
196
|
)
|
193
197
|
return self.brain.brain
|
194
198
|
|
199
|
+
@observer.wrap({"type": "processor_bm"})
|
195
200
|
async def for_processor_bm(
|
196
201
|
self,
|
197
202
|
messages: list[BrokerMessage],
|
@@ -223,6 +228,7 @@ class IndexMessageBuilder:
|
|
223
228
|
)
|
224
229
|
return self.brain.brain
|
225
230
|
|
231
|
+
@observer.wrap({"type": "full"})
|
226
232
|
async def full(self, reindex: bool) -> IndexMessage:
|
227
233
|
await self._apply_resource_index_data(self.brain)
|
228
234
|
basic = await self.get_basic()
|
nucliadb/ingest/orm/metrics.py
CHANGED
@@ -22,6 +22,17 @@ from nucliadb_telemetry import metrics
|
|
22
22
|
|
23
23
|
processor_observer = metrics.Observer(
|
24
24
|
"nucliadb_ingest_processor",
|
25
|
-
labels={"type": ""
|
25
|
+
labels={"type": ""},
|
26
26
|
error_mappings={"kb_conflict": KnowledgeBoxConflict},
|
27
27
|
)
|
28
|
+
|
29
|
+
|
30
|
+
index_message_observer = metrics.Observer(
|
31
|
+
"index_message_builder",
|
32
|
+
labels={"type": ""},
|
33
|
+
)
|
34
|
+
|
35
|
+
brain_observer = metrics.Observer(
|
36
|
+
"brain",
|
37
|
+
labels={"type": ""},
|
38
|
+
)
|
@@ -462,6 +462,7 @@ class Processor:
|
|
462
462
|
source=source,
|
463
463
|
)
|
464
464
|
|
465
|
+
@processor_observer.wrap({"type": "generate_index_message_v2"})
|
465
466
|
async def generate_index_message_v2(
|
466
467
|
self,
|
467
468
|
resource: Resource,
|
@@ -471,14 +472,13 @@ class Processor:
|
|
471
472
|
builder = IndexMessageBuilder(resource)
|
472
473
|
message_source = messages_source(messages)
|
473
474
|
if message_source == nodewriter_pb2.IndexMessageSource.WRITER:
|
474
|
-
|
475
|
-
return await builder.for_writer_bm(messages, resource_created)
|
475
|
+
return await builder.for_writer_bm(messages, resource_created)
|
476
476
|
elif message_source == nodewriter_pb2.IndexMessageSource.PROCESSOR:
|
477
|
-
|
478
|
-
return await builder.for_processor_bm(messages)
|
477
|
+
return await builder.for_processor_bm(messages)
|
479
478
|
else: # pragma: no cover
|
480
479
|
raise InvalidBrokerMessage(f"Unknown broker message source: {message_source}")
|
481
480
|
|
481
|
+
@processor_observer.wrap({"type": "generate_index_message_v1"})
|
482
482
|
async def generate_index_message_v1(
|
483
483
|
self,
|
484
484
|
resource: Resource,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: nucliadb
|
3
|
-
Version: 6.3.5.
|
3
|
+
Version: 6.3.5.post3997
|
4
4
|
Summary: NucliaDB
|
5
5
|
Author-email: Nuclia <nucliadb@nuclia.com>
|
6
6
|
License: AGPL
|
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
|
|
20
20
|
Classifier: Programming Language :: Python :: 3 :: Only
|
21
21
|
Requires-Python: <4,>=3.9
|
22
22
|
Description-Content-Type: text/markdown
|
23
|
-
Requires-Dist: nucliadb-telemetry[all]>=6.3.5.
|
24
|
-
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.5.
|
25
|
-
Requires-Dist: nucliadb-protos>=6.3.5.
|
26
|
-
Requires-Dist: nucliadb-models>=6.3.5.
|
27
|
-
Requires-Dist: nidx-protos>=6.3.5.
|
23
|
+
Requires-Dist: nucliadb-telemetry[all]>=6.3.5.post3997
|
24
|
+
Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.5.post3997
|
25
|
+
Requires-Dist: nucliadb-protos>=6.3.5.post3997
|
26
|
+
Requires-Dist: nucliadb-models>=6.3.5.post3997
|
27
|
+
Requires-Dist: nidx-protos>=6.3.5.post3997
|
28
28
|
Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
|
29
29
|
Requires-Dist: nuclia-models>=0.24.2
|
30
30
|
Requires-Dist: uvicorn[standard]
|
@@ -142,20 +142,20 @@ nucliadb/ingest/fields/link.py,sha256=kN_gjRUEEj5cy8K_BwPijYg3TiWhedc24apXYlTbRJ
|
|
142
142
|
nucliadb/ingest/fields/text.py,sha256=tFvSQJAe0W7ePpp2_WDfLiE2yglR1OTU0Zht9acvOFw,1594
|
143
143
|
nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
144
144
|
nucliadb/ingest/orm/brain.py,sha256=8nXdxgI3zYn6DGnCq5ciq3PA7ouhcTW5dSgHaxAO6xg,29074
|
145
|
-
nucliadb/ingest/orm/brain_v2.py,sha256=
|
145
|
+
nucliadb/ingest/orm/brain_v2.py,sha256=XEOfvjpnvSKNrAOtbO4vt9n_PWVbzOhB-seHs76uY0M,33588
|
146
146
|
nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
|
147
147
|
nucliadb/ingest/orm/entities.py,sha256=a-aYuKBUQhxDKFtXOzTAkLlY_t2JiTfaptw2vt3AQDQ,14915
|
148
148
|
nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
|
149
|
-
nucliadb/ingest/orm/index_message.py,sha256=
|
149
|
+
nucliadb/ingest/orm/index_message.py,sha256=fFNYRZTH45fm6IZ9tHNwa4KNgV8KxzwS5uuklRe65ww,16044
|
150
150
|
nucliadb/ingest/orm/knowledgebox.py,sha256=Bfb4-MIQWlaJrQAUDbgs_iIsXCYjS7s5YiiGl_Jb4jo,23887
|
151
|
-
nucliadb/ingest/orm/metrics.py,sha256=
|
151
|
+
nucliadb/ingest/orm/metrics.py,sha256=OiuggTh-n3kZHA2G73NEUdIlh8c3yFrbusI88DK-Mko,1273
|
152
152
|
nucliadb/ingest/orm/resource.py,sha256=GjxcEPuu8bM06Uea7_yJk0UFvOfiZNP9i_G4V-4D8_U,46845
|
153
153
|
nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,2693
|
154
154
|
nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
|
155
155
|
nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
|
156
156
|
nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
|
157
157
|
nucliadb/ingest/orm/processor/pgcatalog.py,sha256=H-OCRz0RuTUb80LZBxDowLA9V7ECv1DWiXlnzKW5XGI,3103
|
158
|
-
nucliadb/ingest/orm/processor/processor.py,sha256=
|
158
|
+
nucliadb/ingest/orm/processor/processor.py,sha256=q2iBJJ_5SV_bxA3t5MrbV70iQhir94aFbjZjnYJzEAQ,33141
|
159
159
|
nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
|
160
160
|
nucliadb/ingest/service/__init__.py,sha256=MME_G_ERxzJR6JW_hfE2qcfXpmpH1kdG-S0a-M0qRm8,2043
|
161
161
|
nucliadb/ingest/service/exceptions.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
|
@@ -360,8 +360,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
|
|
360
360
|
nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
|
361
361
|
nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
|
362
362
|
nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
|
363
|
-
nucliadb-6.3.5.
|
364
|
-
nucliadb-6.3.5.
|
365
|
-
nucliadb-6.3.5.
|
366
|
-
nucliadb-6.3.5.
|
367
|
-
nucliadb-6.3.5.
|
363
|
+
nucliadb-6.3.5.post3997.dist-info/METADATA,sha256=K-G5B3YkJzhIf9IiEVxZT9t41hvtsJymVAn5KZheMVY,4301
|
364
|
+
nucliadb-6.3.5.post3997.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
365
|
+
nucliadb-6.3.5.post3997.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
|
366
|
+
nucliadb-6.3.5.post3997.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
|
367
|
+
nucliadb-6.3.5.post3997.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|