nucliadb 6.3.5.post3995__py3-none-any.whl → 6.3.5.post3997__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,7 @@ from typing import Optional
24
24
 
25
25
  from nucliadb.common import ids
26
26
  from nucliadb.ingest import logger
27
+ from nucliadb.ingest.orm.metrics import brain_observer as observer
27
28
  from nucliadb.ingest.orm.utils import compute_paragraph_key
28
29
  from nucliadb_models.labels import BASE_LABELS, LABEL_HIDDEN, flatten_resource_labels
29
30
  from nucliadb_models.metadata import ResourceProcessingStatus
@@ -74,7 +75,8 @@ class ResourceBrainV2:
74
75
  self.brain: PBBrainResource = PBBrainResource(resource=ResourceID(uuid=rid))
75
76
  self.labels: dict[str, set[str]] = deepcopy(BASE_LABELS)
76
77
 
77
- def generate_resource_indexing_metadata(
78
+ @observer.wrap({"type": "generate_resource_metadata"})
79
+ def generate_resource_metadata(
78
80
  self,
79
81
  basic: Basic,
80
82
  user_relations: Relations,
@@ -89,7 +91,8 @@ class ResourceBrainV2:
89
91
  if security is not None:
90
92
  self._set_resource_security(security)
91
93
 
92
- def generate_texts_index_message(
94
+ @observer.wrap({"type": "generate_texts"})
95
+ def generate_texts(
93
96
  self,
94
97
  field_key: str,
95
98
  extracted_text: ExtractedText,
@@ -112,6 +115,7 @@ class ResourceBrainV2:
112
115
  basic_user_metadata,
113
116
  )
114
117
 
118
+ @observer.wrap({"type": "apply_field_text"})
115
119
  def apply_field_text(
116
120
  self,
117
121
  field_key: str,
@@ -131,6 +135,7 @@ class ResourceBrainV2:
131
135
  full_field_id = ids.FieldId(rid=self.rid, type=ftype, key=fkey).full()
132
136
  self.brain.texts_to_delete.append(full_field_id)
133
137
 
138
+ @observer.wrap({"type": "apply_field_labels"})
134
139
  def apply_field_labels(
135
140
  self,
136
141
  field_key: str,
@@ -200,7 +205,8 @@ class ResourceBrainV2:
200
205
 
201
206
  self.brain.texts[field_key].labels.extend(flatten_resource_labels(labels))
202
207
 
203
- def generate_paragraphs_index_message(
208
+ @observer.wrap({"type": "generate_paragraphs"})
209
+ def generate_paragraphs(
204
210
  self,
205
211
  field_key: str,
206
212
  field_computed_metadata: FieldComputedMetadata,
@@ -228,6 +234,7 @@ class ResourceBrainV2:
228
234
  skip_paragraphs=skip_index,
229
235
  )
230
236
 
237
+ @observer.wrap({"type": "apply_field_paragraphs"})
231
238
  def apply_field_paragraphs(
232
239
  self,
233
240
  field_key: str,
@@ -371,7 +378,8 @@ class ResourceBrainV2:
371
378
  pc.valid.setdefault(paragraph_key, []).append(classif_label)
372
379
  return pc
373
380
 
374
- def generate_relations_index_message(
381
+ @observer.wrap({"type": "generate_relations"})
382
+ def generate_relations(
375
383
  self,
376
384
  field_key: str,
377
385
  field_computed_metadata: Optional[FieldComputedMetadata],
@@ -477,7 +485,8 @@ class ResourceBrainV2:
477
485
  self.brain.sentences_to_delete.append(full_field_id)
478
486
  self.brain.relation_fields_to_delete.append(field_key)
479
487
 
480
- def generate_vectors_index_message(
488
+ @observer.wrap({"type": "generate_vectors"})
489
+ def generate_vectors(
481
490
  self,
482
491
  field_id: str,
483
492
  vo: utils_pb2.VectorObject,
@@ -547,6 +556,7 @@ class ResourceBrainV2:
547
556
  full_field_id = ids.FieldId(rid=self.rid, type=fid.type, key=fid.key).full()
548
557
  self.brain.vector_prefixes_to_delete[vectorset].items.append(full_field_id)
549
558
 
559
+ @observer.wrap({"type": "apply_field_vector"})
550
560
  def _apply_field_vector(
551
561
  self,
552
562
  field_id: str,
@@ -764,6 +774,7 @@ class ParagraphPages:
764
774
  self.positions = positions
765
775
  self._materialized = self._materialize_page_numbers(positions)
766
776
 
777
+ @observer.wrap({"type": "materialize_page_numbers"})
767
778
  def _materialize_page_numbers(self, positions: FilePagePositions) -> list[int]:
768
779
  page_numbers_by_index = []
769
780
  for page_number, (page_start, page_end) in positions.items():
@@ -26,6 +26,7 @@ from nucliadb.common import datamanagers
26
26
  from nucliadb.ingest.fields.exceptions import FieldAuthorNotFound
27
27
  from nucliadb.ingest.fields.file import File
28
28
  from nucliadb.ingest.orm.brain_v2 import ResourceBrainV2 as ResourceBrain
29
+ from nucliadb.ingest.orm.metrics import index_message_observer as observer
29
30
  from nucliadb.ingest.orm.resource import Resource, get_file_page_positions
30
31
  from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig
31
32
  from nucliadb_protos.noderesources_pb2 import Resource as IndexMessage
@@ -40,6 +41,7 @@ class IndexMessageBuilder:
40
41
  self.resource = resource
41
42
  self.brain = ResourceBrain(resource.uuid)
42
43
 
44
+ @observer.wrap({"type": "resource_data"})
43
45
  async def _apply_resource_index_data(self, brain: ResourceBrain) -> None:
44
46
  # Set the metadata at the resource level
45
47
  basic = await self.resource.get_basic()
@@ -48,7 +50,7 @@ class IndexMessageBuilder:
48
50
  origin = await self.resource.get_origin()
49
51
  security = await self.resource.get_security()
50
52
  await asyncio.to_thread(
51
- brain.generate_resource_indexing_metadata,
53
+ brain.generate_resource_metadata,
52
54
  basic,
53
55
  user_relations,
54
56
  origin,
@@ -56,6 +58,7 @@ class IndexMessageBuilder:
56
58
  security,
57
59
  )
58
60
 
61
+ @observer.wrap({"type": "field_data"})
59
62
  async def _apply_field_index_data(
60
63
  self,
61
64
  brain: ResourceBrain,
@@ -87,7 +90,7 @@ class IndexMessageBuilder:
87
90
  except FieldAuthorNotFound:
88
91
  field_author = None
89
92
  await asyncio.to_thread(
90
- brain.generate_texts_index_message,
93
+ brain.generate_texts,
91
94
  self.resource.generate_field_id(fieldid),
92
95
  extracted_text,
93
96
  field_computed_metadata,
@@ -108,7 +111,7 @@ class IndexMessageBuilder:
108
111
  await get_file_page_positions(field) if isinstance(field, File) else None
109
112
  )
110
113
  await asyncio.to_thread(
111
- brain.generate_paragraphs_index_message,
114
+ brain.generate_paragraphs,
112
115
  self.resource.generate_field_id(fieldid),
113
116
  field_computed_metadata,
114
117
  extracted_text,
@@ -127,7 +130,7 @@ class IndexMessageBuilder:
127
130
  if vo is not None:
128
131
  dimension = vectorset_config.vectorset_index_config.vector_dimension
129
132
  await asyncio.to_thread(
130
- brain.generate_vectors_index_message,
133
+ brain.generate_vectors,
131
134
  self.resource.generate_field_id(fieldid),
132
135
  vo,
133
136
  vectorset=vectorset_config.vectorset_id,
@@ -136,7 +139,7 @@ class IndexMessageBuilder:
136
139
  )
137
140
  if relations:
138
141
  await asyncio.to_thread(
139
- brain.generate_relations_index_message,
142
+ brain.generate_relations,
140
143
  self.resource.generate_field_id(fieldid),
141
144
  field_computed_metadata,
142
145
  basic.usermetadata,
@@ -151,6 +154,7 @@ class IndexMessageBuilder:
151
154
  for field_id in field_ids:
152
155
  brain.delete_field(self.resource.generate_field_id(field_id))
153
156
 
157
+ @observer.wrap({"type": "writer_bm"})
154
158
  async def for_writer_bm(
155
159
  self,
156
160
  messages: list[BrokerMessage],
@@ -192,6 +196,7 @@ class IndexMessageBuilder:
192
196
  )
193
197
  return self.brain.brain
194
198
 
199
+ @observer.wrap({"type": "processor_bm"})
195
200
  async def for_processor_bm(
196
201
  self,
197
202
  messages: list[BrokerMessage],
@@ -223,6 +228,7 @@ class IndexMessageBuilder:
223
228
  )
224
229
  return self.brain.brain
225
230
 
231
+ @observer.wrap({"type": "full"})
226
232
  async def full(self, reindex: bool) -> IndexMessage:
227
233
  await self._apply_resource_index_data(self.brain)
228
234
  basic = await self.get_basic()
@@ -22,6 +22,17 @@ from nucliadb_telemetry import metrics
22
22
 
23
23
  processor_observer = metrics.Observer(
24
24
  "nucliadb_ingest_processor",
25
- labels={"type": "", "source": ""},
25
+ labels={"type": ""},
26
26
  error_mappings={"kb_conflict": KnowledgeBoxConflict},
27
27
  )
28
+
29
+
30
+ index_message_observer = metrics.Observer(
31
+ "index_message_builder",
32
+ labels={"type": ""},
33
+ )
34
+
35
+ brain_observer = metrics.Observer(
36
+ "brain",
37
+ labels={"type": ""},
38
+ )
@@ -462,6 +462,7 @@ class Processor:
462
462
  source=source,
463
463
  )
464
464
 
465
+ @processor_observer.wrap({"type": "generate_index_message_v2"})
465
466
  async def generate_index_message_v2(
466
467
  self,
467
468
  resource: Resource,
@@ -471,14 +472,13 @@ class Processor:
471
472
  builder = IndexMessageBuilder(resource)
472
473
  message_source = messages_source(messages)
473
474
  if message_source == nodewriter_pb2.IndexMessageSource.WRITER:
474
- with processor_observer({"type": "generate_index_message", "source": "writer"}):
475
- return await builder.for_writer_bm(messages, resource_created)
475
+ return await builder.for_writer_bm(messages, resource_created)
476
476
  elif message_source == nodewriter_pb2.IndexMessageSource.PROCESSOR:
477
- with processor_observer({"type": "generate_index_message", "source": "processor"}):
478
- return await builder.for_processor_bm(messages)
477
+ return await builder.for_processor_bm(messages)
479
478
  else: # pragma: no cover
480
479
  raise InvalidBrokerMessage(f"Unknown broker message source: {message_source}")
481
480
 
481
+ @processor_observer.wrap({"type": "generate_index_message_v1"})
482
482
  async def generate_index_message_v1(
483
483
  self,
484
484
  resource: Resource,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.3.5.post3995
3
+ Version: 6.3.5.post3997
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.5.post3995
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.5.post3995
25
- Requires-Dist: nucliadb-protos>=6.3.5.post3995
26
- Requires-Dist: nucliadb-models>=6.3.5.post3995
27
- Requires-Dist: nidx-protos>=6.3.5.post3995
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.5.post3997
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.5.post3997
25
+ Requires-Dist: nucliadb-protos>=6.3.5.post3997
26
+ Requires-Dist: nucliadb-models>=6.3.5.post3997
27
+ Requires-Dist: nidx-protos>=6.3.5.post3997
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn[standard]
@@ -142,20 +142,20 @@ nucliadb/ingest/fields/link.py,sha256=kN_gjRUEEj5cy8K_BwPijYg3TiWhedc24apXYlTbRJ
142
142
  nucliadb/ingest/fields/text.py,sha256=tFvSQJAe0W7ePpp2_WDfLiE2yglR1OTU0Zht9acvOFw,1594
143
143
  nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
144
144
  nucliadb/ingest/orm/brain.py,sha256=8nXdxgI3zYn6DGnCq5ciq3PA7ouhcTW5dSgHaxAO6xg,29074
145
- nucliadb/ingest/orm/brain_v2.py,sha256=3ejtH58X9Hkhvg2m7wrp2eEyIQybKSMHzoJBDtkL0b8,33065
145
+ nucliadb/ingest/orm/brain_v2.py,sha256=XEOfvjpnvSKNrAOtbO4vt9n_PWVbzOhB-seHs76uY0M,33588
146
146
  nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
147
147
  nucliadb/ingest/orm/entities.py,sha256=a-aYuKBUQhxDKFtXOzTAkLlY_t2JiTfaptw2vt3AQDQ,14915
148
148
  nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
149
- nucliadb/ingest/orm/index_message.py,sha256=3Okq-POmGnxpgzysU1bvgj5skRLSDBUpP6dnTYqNA18,15821
149
+ nucliadb/ingest/orm/index_message.py,sha256=fFNYRZTH45fm6IZ9tHNwa4KNgV8KxzwS5uuklRe65ww,16044
150
150
  nucliadb/ingest/orm/knowledgebox.py,sha256=Bfb4-MIQWlaJrQAUDbgs_iIsXCYjS7s5YiiGl_Jb4jo,23887
151
- nucliadb/ingest/orm/metrics.py,sha256=z-xVOJWeWXZFPIzRg_NB8nlbkdJFs_myEwLROdTNp24,1110
151
+ nucliadb/ingest/orm/metrics.py,sha256=OiuggTh-n3kZHA2G73NEUdIlh8c3yFrbusI88DK-Mko,1273
152
152
  nucliadb/ingest/orm/resource.py,sha256=GjxcEPuu8bM06Uea7_yJk0UFvOfiZNP9i_G4V-4D8_U,46845
153
153
  nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,2693
154
154
  nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
155
155
  nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
156
156
  nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
157
157
  nucliadb/ingest/orm/processor/pgcatalog.py,sha256=H-OCRz0RuTUb80LZBxDowLA9V7ECv1DWiXlnzKW5XGI,3103
158
- nucliadb/ingest/orm/processor/processor.py,sha256=flw2U1OB6il9mP7h6fCY_dZX-Jo6XemWq1dAwtH7pMs,33202
158
+ nucliadb/ingest/orm/processor/processor.py,sha256=q2iBJJ_5SV_bxA3t5MrbV70iQhir94aFbjZjnYJzEAQ,33141
159
159
  nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
160
160
  nucliadb/ingest/service/__init__.py,sha256=MME_G_ERxzJR6JW_hfE2qcfXpmpH1kdG-S0a-M0qRm8,2043
161
161
  nucliadb/ingest/service/exceptions.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -360,8 +360,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
360
360
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
361
361
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
362
362
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
363
- nucliadb-6.3.5.post3995.dist-info/METADATA,sha256=WMPXEMAoMggoPDt4NkDQBnl5014KHnBKxU7AEAueHsk,4301
364
- nucliadb-6.3.5.post3995.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
365
- nucliadb-6.3.5.post3995.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
366
- nucliadb-6.3.5.post3995.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
367
- nucliadb-6.3.5.post3995.dist-info/RECORD,,
363
+ nucliadb-6.3.5.post3997.dist-info/METADATA,sha256=K-G5B3YkJzhIf9IiEVxZT9t41hvtsJymVAn5KZheMVY,4301
364
+ nucliadb-6.3.5.post3997.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
365
+ nucliadb-6.3.5.post3997.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
366
+ nucliadb-6.3.5.post3997.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
367
+ nucliadb-6.3.5.post3997.dist-info/RECORD,,