nucliadb 6.3.5.post3990__py3-none-any.whl → 6.3.5.post3996__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,7 @@ from typing import Optional
24
24
 
25
25
  from nucliadb.common import ids
26
26
  from nucliadb.ingest import logger
27
+ from nucliadb.ingest.orm.metrics import brain_observer as observer
27
28
  from nucliadb.ingest.orm.utils import compute_paragraph_key
28
29
  from nucliadb_models.labels import BASE_LABELS, LABEL_HIDDEN, flatten_resource_labels
29
30
  from nucliadb_models.metadata import ResourceProcessingStatus
@@ -74,7 +75,8 @@ class ResourceBrainV2:
74
75
  self.brain: PBBrainResource = PBBrainResource(resource=ResourceID(uuid=rid))
75
76
  self.labels: dict[str, set[str]] = deepcopy(BASE_LABELS)
76
77
 
77
- def generate_resource_indexing_metadata(
78
+ @observer.wrap({"type": "generate_resource_metadata"})
79
+ def generate_resource_metadata(
78
80
  self,
79
81
  basic: Basic,
80
82
  user_relations: Relations,
@@ -89,7 +91,8 @@ class ResourceBrainV2:
89
91
  if security is not None:
90
92
  self._set_resource_security(security)
91
93
 
92
- def generate_texts_index_message(
94
+ @observer.wrap({"type": "generate_texts"})
95
+ def generate_texts(
93
96
  self,
94
97
  field_key: str,
95
98
  extracted_text: ExtractedText,
@@ -112,6 +115,7 @@ class ResourceBrainV2:
112
115
  basic_user_metadata,
113
116
  )
114
117
 
118
+ @observer.wrap({"type": "apply_field_text"})
115
119
  def apply_field_text(
116
120
  self,
117
121
  field_key: str,
@@ -131,6 +135,7 @@ class ResourceBrainV2:
131
135
  full_field_id = ids.FieldId(rid=self.rid, type=ftype, key=fkey).full()
132
136
  self.brain.texts_to_delete.append(full_field_id)
133
137
 
138
+ @observer.wrap({"type": "apply_field_labels"})
134
139
  def apply_field_labels(
135
140
  self,
136
141
  field_key: str,
@@ -200,7 +205,8 @@ class ResourceBrainV2:
200
205
 
201
206
  self.brain.texts[field_key].labels.extend(flatten_resource_labels(labels))
202
207
 
203
- def generate_paragraphs_index_message(
208
+ @observer.wrap({"type": "generate_paragraphs"})
209
+ def generate_paragraphs(
204
210
  self,
205
211
  field_key: str,
206
212
  field_computed_metadata: FieldComputedMetadata,
@@ -228,6 +234,7 @@ class ResourceBrainV2:
228
234
  skip_paragraphs=skip_index,
229
235
  )
230
236
 
237
+ @observer.wrap({"type": "apply_field_paragraphs"})
231
238
  def apply_field_paragraphs(
232
239
  self,
233
240
  field_key: str,
@@ -371,7 +378,8 @@ class ResourceBrainV2:
371
378
  pc.valid.setdefault(paragraph_key, []).append(classif_label)
372
379
  return pc
373
380
 
374
- def generate_relations_index_message(
381
+ @observer.wrap({"type": "generate_relations"})
382
+ def generate_relations(
375
383
  self,
376
384
  field_key: str,
377
385
  field_computed_metadata: Optional[FieldComputedMetadata],
@@ -477,7 +485,8 @@ class ResourceBrainV2:
477
485
  self.brain.sentences_to_delete.append(full_field_id)
478
486
  self.brain.relation_fields_to_delete.append(field_key)
479
487
 
480
- def generate_vectors_index_message(
488
+ @observer.wrap({"type": "generate_vectors"})
489
+ def generate_vectors(
481
490
  self,
482
491
  field_id: str,
483
492
  vo: utils_pb2.VectorObject,
@@ -547,6 +556,7 @@ class ResourceBrainV2:
547
556
  full_field_id = ids.FieldId(rid=self.rid, type=fid.type, key=fid.key).full()
548
557
  self.brain.vector_prefixes_to_delete[vectorset].items.append(full_field_id)
549
558
 
559
+ @observer.wrap({"type": "apply_field_vector"})
550
560
  def _apply_field_vector(
551
561
  self,
552
562
  field_id: str,
@@ -764,6 +774,7 @@ class ParagraphPages:
764
774
  self.positions = positions
765
775
  self._materialized = self._materialize_page_numbers(positions)
766
776
 
777
+ @observer.wrap({"type": "materialize_page_numbers"})
767
778
  def _materialize_page_numbers(self, positions: FilePagePositions) -> list[int]:
768
779
  page_numbers_by_index = []
769
780
  for page_number, (page_start, page_end) in positions.items():
@@ -26,6 +26,7 @@ from nucliadb.common import datamanagers
26
26
  from nucliadb.ingest.fields.exceptions import FieldAuthorNotFound
27
27
  from nucliadb.ingest.fields.file import File
28
28
  from nucliadb.ingest.orm.brain_v2 import ResourceBrainV2 as ResourceBrain
29
+ from nucliadb.ingest.orm.metrics import index_message_observer as observer
29
30
  from nucliadb.ingest.orm.resource import Resource, get_file_page_positions
30
31
  from nucliadb_protos.knowledgebox_pb2 import VectorSetConfig
31
32
  from nucliadb_protos.noderesources_pb2 import Resource as IndexMessage
@@ -40,6 +41,7 @@ class IndexMessageBuilder:
40
41
  self.resource = resource
41
42
  self.brain = ResourceBrain(resource.uuid)
42
43
 
44
+ @observer.wrap({"type": "resource_data"})
43
45
  async def _apply_resource_index_data(self, brain: ResourceBrain) -> None:
44
46
  # Set the metadata at the resource level
45
47
  basic = await self.resource.get_basic()
@@ -48,7 +50,7 @@ class IndexMessageBuilder:
48
50
  origin = await self.resource.get_origin()
49
51
  security = await self.resource.get_security()
50
52
  await asyncio.to_thread(
51
- brain.generate_resource_indexing_metadata,
53
+ brain.generate_resource_metadata,
52
54
  basic,
53
55
  user_relations,
54
56
  origin,
@@ -56,6 +58,7 @@ class IndexMessageBuilder:
56
58
  security,
57
59
  )
58
60
 
61
+ @observer.wrap({"type": "field_data"})
59
62
  async def _apply_field_index_data(
60
63
  self,
61
64
  brain: ResourceBrain,
@@ -87,7 +90,7 @@ class IndexMessageBuilder:
87
90
  except FieldAuthorNotFound:
88
91
  field_author = None
89
92
  await asyncio.to_thread(
90
- brain.generate_texts_index_message,
93
+ brain.generate_texts,
91
94
  self.resource.generate_field_id(fieldid),
92
95
  extracted_text,
93
96
  field_computed_metadata,
@@ -108,7 +111,7 @@ class IndexMessageBuilder:
108
111
  await get_file_page_positions(field) if isinstance(field, File) else None
109
112
  )
110
113
  await asyncio.to_thread(
111
- brain.generate_paragraphs_index_message,
114
+ brain.generate_paragraphs,
112
115
  self.resource.generate_field_id(fieldid),
113
116
  field_computed_metadata,
114
117
  extracted_text,
@@ -127,7 +130,7 @@ class IndexMessageBuilder:
127
130
  if vo is not None:
128
131
  dimension = vectorset_config.vectorset_index_config.vector_dimension
129
132
  await asyncio.to_thread(
130
- brain.generate_vectors_index_message,
133
+ brain.generate_vectors,
131
134
  self.resource.generate_field_id(fieldid),
132
135
  vo,
133
136
  vectorset=vectorset_config.vectorset_id,
@@ -136,7 +139,7 @@ class IndexMessageBuilder:
136
139
  )
137
140
  if relations:
138
141
  await asyncio.to_thread(
139
- brain.generate_relations_index_message,
142
+ brain.generate_relations,
140
143
  self.resource.generate_field_id(fieldid),
141
144
  field_computed_metadata,
142
145
  basic.usermetadata,
@@ -151,6 +154,7 @@ class IndexMessageBuilder:
151
154
  for field_id in field_ids:
152
155
  brain.delete_field(self.resource.generate_field_id(field_id))
153
156
 
157
+ @observer.wrap({"type": "writer_bm"})
154
158
  async def for_writer_bm(
155
159
  self,
156
160
  messages: list[BrokerMessage],
@@ -192,6 +196,7 @@ class IndexMessageBuilder:
192
196
  )
193
197
  return self.brain.brain
194
198
 
199
+ @observer.wrap({"type": "processor_bm"})
195
200
  async def for_processor_bm(
196
201
  self,
197
202
  messages: list[BrokerMessage],
@@ -223,6 +228,7 @@ class IndexMessageBuilder:
223
228
  )
224
229
  return self.brain.brain
225
230
 
231
+ @observer.wrap({"type": "full"})
226
232
  async def full(self, reindex: bool) -> IndexMessage:
227
233
  await self._apply_resource_index_data(self.brain)
228
234
  basic = await self.get_basic()
@@ -22,6 +22,17 @@ from nucliadb_telemetry import metrics
22
22
 
23
23
  processor_observer = metrics.Observer(
24
24
  "nucliadb_ingest_processor",
25
- labels={"type": "", "source": ""},
25
+ labels={"type": ""},
26
26
  error_mappings={"kb_conflict": KnowledgeBoxConflict},
27
27
  )
28
+
29
+
30
+ index_message_observer = metrics.Observer(
31
+ "index_message_builder",
32
+ labels={"type": ""},
33
+ )
34
+
35
+ brain_observer = metrics.Observer(
36
+ "brain",
37
+ labels={"type": ""},
38
+ )
@@ -23,8 +23,8 @@ from dataclasses import dataclass, field
23
23
  from typing import Optional
24
24
 
25
25
  from nucliadb.ingest.orm.resource import Resource
26
- from nucliadb.ingest.processing import ProcessingEngine, PushPayload, Source
27
- from nucliadb_models.text import PushTextFormat, Text
26
+ from nucliadb.ingest.processing import ProcessingEngine
27
+ from nucliadb.models.internal.processing import PushPayload, PushTextFormat, Source, Text
28
28
  from nucliadb_protos import resources_pb2, writer_pb2
29
29
  from nucliadb_protos.resources_pb2 import FieldType
30
30
  from nucliadb_utils.utilities import Utility, get_partitioning, get_utility
@@ -462,6 +462,7 @@ class Processor:
462
462
  source=source,
463
463
  )
464
464
 
465
+ @processor_observer.wrap({"type": "generate_index_message_v2"})
465
466
  async def generate_index_message_v2(
466
467
  self,
467
468
  resource: Resource,
@@ -471,14 +472,13 @@ class Processor:
471
472
  builder = IndexMessageBuilder(resource)
472
473
  message_source = messages_source(messages)
473
474
  if message_source == nodewriter_pb2.IndexMessageSource.WRITER:
474
- with processor_observer({"type": "generate_index_message", "source": "writer"}):
475
- return await builder.for_writer_bm(messages, resource_created)
475
+ return await builder.for_writer_bm(messages, resource_created)
476
476
  elif message_source == nodewriter_pb2.IndexMessageSource.PROCESSOR:
477
- with processor_observer({"type": "generate_index_message", "source": "processor"}):
478
- return await builder.for_processor_bm(messages)
477
+ return await builder.for_processor_bm(messages)
479
478
  else: # pragma: no cover
480
479
  raise InvalidBrokerMessage(f"Unknown broker message source: {message_source}")
481
480
 
481
+ @processor_observer.wrap({"type": "generate_index_message_v1"})
482
482
  async def generate_index_message_v1(
483
483
  self,
484
484
  resource: Resource,
@@ -20,8 +20,7 @@
20
20
  import urllib.parse
21
21
  from typing import Sequence
22
22
 
23
- from nucliadb.ingest.processing import PushPayload
24
- from nucliadb_models.text import PushTextFormat, Text
23
+ from nucliadb.models.internal.processing import PushPayload, PushTextFormat, Text
25
24
  from nucliadb_protos.resources_pb2 import (
26
25
  ExtractedTextWrapper,
27
26
  FieldComputedMetadataWrapper,
@@ -25,15 +25,14 @@ import uuid
25
25
  from collections import defaultdict
26
26
  from contextlib import AsyncExitStack
27
27
  from enum import Enum
28
- from typing import TYPE_CHECKING, Any, Optional, TypeVar
28
+ from typing import Any, Optional, TypeVar
29
29
 
30
30
  import aiohttp
31
31
  import backoff
32
32
  import jwt
33
- from pydantic import BaseModel, Field
34
33
 
35
34
  import nucliadb_models as models
36
- from nucliadb_models.labels import ClassificationLabel
35
+ from nucliadb.models.internal.processing import ClassificationLabel, ProcessingInfo, PushPayload
37
36
  from nucliadb_models.resource import QueueType
38
37
  from nucliadb_protos.resources_pb2 import CloudFile
39
38
  from nucliadb_protos.resources_pb2 import FieldFile as FieldFilePB
@@ -52,10 +51,6 @@ logger = logging.getLogger(__name__)
52
51
 
53
52
  _T = TypeVar("_T")
54
53
 
55
- if TYPE_CHECKING: # pragma: no cover
56
- SourceValue = CloudFile.Source.V
57
- else:
58
- SourceValue = int
59
54
 
60
55
  RETRIABLE_EXCEPTIONS = (aiohttp.client_exceptions.ClientConnectorError,)
61
56
  MAX_TRIES = 4
@@ -71,53 +66,6 @@ processing_observer = metrics.Observer(
71
66
  )
72
67
 
73
68
 
74
- class Source(SourceValue, Enum): # type: ignore
75
- HTTP = 0
76
- INGEST = 1
77
-
78
-
79
- class ProcessingInfo(BaseModel):
80
- seqid: Optional[int] = None
81
- account_seq: Optional[int] = None
82
- queue: Optional[QueueType] = None
83
-
84
-
85
- class PushPayload(BaseModel):
86
- # There are multiple options of payload
87
- uuid: str
88
- slug: Optional[str] = None
89
- kbid: str
90
- source: Optional[Source] = None
91
- userid: str
92
-
93
- title: Optional[str] = None
94
-
95
- genericfield: dict[str, models.Text] = {}
96
-
97
- # New File
98
- filefield: dict[str, str] = Field(
99
- default={},
100
- description="Map of each file field to the jwt token computed in ProcessingEngine methods",
101
- )
102
-
103
- # New Link
104
- linkfield: dict[str, models.LinkUpload] = {}
105
-
106
- # Diff on Text Field
107
- textfield: dict[str, models.Text] = {}
108
-
109
- # New conversations to process
110
- conversationfield: dict[str, models.PushConversation] = {}
111
-
112
- # Only internal
113
- partition: int
114
-
115
- # List of available processing options (with default values)
116
- processing_options: Optional[models.PushProcessingOptions] = Field(
117
- default_factory=models.PushProcessingOptions
118
- )
119
-
120
-
121
69
  async def start_processing_engine():
122
70
  processing_engine = get_utility(Utility.PROCESSING)
123
71
  if processing_engine is not None:
@@ -0,0 +1,19 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
@@ -0,0 +1,160 @@
1
+ # Copyright (C) 2021 Bosutech XXI S.L.
2
+ #
3
+ # nucliadb is offered under the AGPL v3.0 and as commercial software.
4
+ # For commercial licensing, contact us at info@nuclia.com.
5
+ #
6
+ # AGPL:
7
+ # This program is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU Affero General Public License as
9
+ # published by the Free Software Foundation, either version 3 of the
10
+ # License, or (at your option) any later version.
11
+ #
12
+ # This program is distributed in the hope that it will be useful,
13
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ # GNU Affero General Public License for more details.
16
+ #
17
+ # You should have received a copy of the GNU Affero General Public License
18
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
19
+ #
20
+
21
+
22
+ # Processing classes (Those used to sent to push endpoints)
23
+
24
+
25
+ from datetime import datetime
26
+ from enum import Enum
27
+ from typing import TYPE_CHECKING, Optional
28
+
29
+ from pydantic import BaseModel, Field
30
+
31
+ from nucliadb_models.processing import PushProcessingOptions
32
+ from nucliadb_models.resource import QueueType
33
+ from nucliadb_protos.resources_pb2 import CloudFile
34
+
35
+ if TYPE_CHECKING: # pragma: no cover
36
+ SourceValue = CloudFile.Source.V
37
+ else:
38
+ SourceValue = int
39
+
40
+
41
+ class ClassificationLabel(BaseModel):
42
+ """
43
+ NOTE: This model is used to send the labels of each field in the processing requests.
44
+ It is a rath is not meant to be used by api users.
45
+ """
46
+
47
+ labelset: str
48
+ label: str
49
+
50
+ def __hash__(self):
51
+ return hash((self.labelset, self.label))
52
+
53
+
54
+ class PushTextFormat(int, Enum):
55
+ PLAIN = 0
56
+ HTML = 1
57
+ MARKDOWN = 2
58
+ RST = 3
59
+ JSON = 4
60
+ KEEP_MARKDOWN = 5
61
+ JSONL = 6
62
+ PLAIN_BLANKLINE_SPLIT = 7
63
+
64
+
65
+ class Text(BaseModel):
66
+ body: str
67
+ format: PushTextFormat
68
+ extract_strategy: Optional[str] = None
69
+ classification_labels: list[ClassificationLabel] = []
70
+
71
+
72
+ class LinkUpload(BaseModel):
73
+ link: str
74
+ headers: dict[str, str] = {}
75
+ cookies: dict[str, str] = {}
76
+ localstorage: dict[str, str] = {}
77
+ css_selector: Optional[str] = Field(
78
+ None,
79
+ title="Css selector",
80
+ description="Css selector to parse the link",
81
+ )
82
+ xpath: Optional[str] = Field(
83
+ None,
84
+ title="Xpath",
85
+ description="Xpath to parse the link",
86
+ )
87
+ extract_strategy: Optional[str] = None
88
+ classification_labels: list[ClassificationLabel] = []
89
+
90
+
91
+ class PushMessageFormat(int, Enum):
92
+ PLAIN = 0
93
+ HTML = 1
94
+ MARKDOWN = 2
95
+ RST = 3
96
+ JSON = 4
97
+
98
+
99
+ class PushMessageContent(BaseModel):
100
+ text: Optional[str] = None
101
+ format: PushMessageFormat
102
+ attachments: list[str] = []
103
+
104
+
105
+ class PushMessage(BaseModel):
106
+ timestamp: Optional[datetime] = None
107
+ who: Optional[str] = None
108
+ to: list[str] = []
109
+ content: PushMessageContent
110
+ ident: str
111
+
112
+
113
+ class PushConversation(BaseModel):
114
+ messages: list[PushMessage] = []
115
+ extract_strategy: Optional[str] = None
116
+ classification_labels: list[ClassificationLabel] = []
117
+
118
+
119
+ class Source(SourceValue, Enum): # type: ignore
120
+ HTTP = 0
121
+ INGEST = 1
122
+
123
+
124
+ class ProcessingInfo(BaseModel):
125
+ seqid: Optional[int] = None
126
+ account_seq: Optional[int] = None
127
+ queue: Optional[QueueType] = None
128
+
129
+
130
+ class PushPayload(BaseModel):
131
+ uuid: str
132
+ slug: Optional[str] = None
133
+ kbid: str
134
+ source: Optional[Source] = None
135
+ userid: str
136
+
137
+ title: Optional[str] = None
138
+
139
+ genericfield: dict[str, Text] = {}
140
+
141
+ # New File
142
+ filefield: dict[str, str] = Field(
143
+ default={},
144
+ description="Map of each file field to the jwt token computed in ProcessingEngine methods",
145
+ )
146
+
147
+ # New Link
148
+ linkfield: dict[str, LinkUpload] = {}
149
+
150
+ # Diff on Text Field
151
+ textfield: dict[str, Text] = {}
152
+
153
+ # New conversations to process
154
+ conversationfield: dict[str, PushConversation] = {}
155
+
156
+ # Only internal
157
+ partition: int
158
+
159
+ # List of available processing options (with default values)
160
+ processing_options: Optional[PushProcessingOptions] = Field(default_factory=PushProcessingOptions)
@@ -27,7 +27,7 @@ from starlette.requests import Request
27
27
  import nucliadb_models as models
28
28
  from nucliadb.common.maindb.utils import get_driver
29
29
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
30
- from nucliadb.ingest.processing import PushPayload, Source
30
+ from nucliadb.models.internal.processing import PushPayload, Source
31
31
  from nucliadb.writer import SERVICE_NAME
32
32
  from nucliadb.writer.api.constants import (
33
33
  X_FILE_PASSWORD,
@@ -33,7 +33,7 @@ from nucliadb.common.maindb.driver import Driver
33
33
  from nucliadb.common.maindb.exceptions import ConflictError, NotFoundError
34
34
  from nucliadb.common.maindb.utils import get_driver
35
35
  from nucliadb.ingest.orm.knowledgebox import KnowledgeBox
36
- from nucliadb.ingest.processing import ProcessingInfo, PushPayload, Source
36
+ from nucliadb.models.internal.processing import ProcessingInfo, PushPayload, Source
37
37
  from nucliadb.writer import SERVICE_NAME, logger
38
38
  from nucliadb.writer.api.constants import X_NUCLIADB_USER, X_SKIP_STORE
39
39
  from nucliadb.writer.api.v1 import transaction
@@ -33,7 +33,7 @@ from starlette.requests import Request as StarletteRequest
33
33
 
34
34
  from nucliadb.common import datamanagers
35
35
  from nucliadb.ingest.orm.utils import set_title
36
- from nucliadb.ingest.processing import PushPayload, Source
36
+ from nucliadb.models.internal.processing import PushPayload, Source
37
37
  from nucliadb.models.responses import HTTPClientError
38
38
  from nucliadb.writer import SERVICE_NAME
39
39
  from nucliadb.writer.api.constants import X_EXTRACT_STRATEGY, X_FILENAME, X_LANGUAGE, X_MD5, X_PASSWORD
@@ -28,16 +28,15 @@ from nucliadb.common.models_utils.from_proto import (
28
28
  RelationTypeMap,
29
29
  )
30
30
  from nucliadb.ingest.orm.utils import set_title
31
- from nucliadb.ingest.processing import PushPayload
31
+ from nucliadb.models.internal.processing import ClassificationLabel, PushPayload, PushTextFormat, Text
32
32
  from nucliadb_models.content_types import GENERIC_MIME_TYPE
33
33
  from nucliadb_models.file import FileField
34
- from nucliadb_models.labels import ClassificationLabel
35
34
  from nucliadb_models.link import LinkField
36
35
  from nucliadb_models.metadata import (
37
36
  ParagraphAnnotation,
38
37
  QuestionAnswerAnnotation,
39
38
  )
40
- from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE, PushTextFormat, Text
39
+ from nucliadb_models.text import TEXT_FORMAT_TO_MIMETYPE
41
40
  from nucliadb_models.writer import (
42
41
  ComingResourcePayload,
43
42
  CreateResourcePayload,
@@ -29,13 +29,12 @@ from nucliadb.common.maindb.driver import Transaction
29
29
  from nucliadb.common.models_utils import from_proto, to_proto
30
30
  from nucliadb.ingest.fields.conversation import Conversation
31
31
  from nucliadb.ingest.orm.resource import Resource as ORMResource
32
- from nucliadb.ingest.processing import PushPayload
32
+ from nucliadb.models.internal import processing as processing_models
33
+ from nucliadb.models.internal.processing import ClassificationLabel, PushConversation, PushPayload
33
34
  from nucliadb.writer import SERVICE_NAME
34
35
  from nucliadb.writer.utilities import get_processing
35
36
  from nucliadb_models.common import FieldTypeName
36
37
  from nucliadb_models.content_types import GENERIC_MIME_TYPE
37
- from nucliadb_models.conversation import PushConversation
38
- from nucliadb_models.labels import ClassificationLabel
39
38
  from nucliadb_models.writer import (
40
39
  CreateResourcePayload,
41
40
  UpdateResourcePayload,
@@ -134,7 +133,7 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
134
133
  including_default_value_fields=True,
135
134
  )
136
135
  parsed_link["link"] = parsed_link.pop("uri", None)
137
- toprocess.linkfield[field_id] = models.LinkUpload(**parsed_link)
136
+ toprocess.linkfield[field_id] = processing_models.LinkUpload(**parsed_link)
138
137
  toprocess.linkfield[field_id].classification_labels = classif_labels
139
138
 
140
139
  if field_type_name is FieldTypeName.TEXT:
@@ -143,8 +142,8 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
143
142
  preserving_proto_field_name=True,
144
143
  including_default_value_fields=True,
145
144
  )
146
- parsed_text["format"] = models.PushTextFormat[parsed_text["format"]]
147
- toprocess.textfield[field_id] = models.Text(**parsed_text)
145
+ parsed_text["format"] = processing_models.PushTextFormat[parsed_text["format"]]
146
+ toprocess.textfield[field_id] = processing_models.Text(**parsed_text)
148
147
  toprocess.textfield[field_id].classification_labels = classif_labels
149
148
 
150
149
  if field_type_name is FieldTypeName.CONVERSATION and isinstance(field, Conversation):
@@ -174,7 +173,7 @@ async def extract_fields(resource: ORMResource, toprocess: PushPayload):
174
173
  parsed_message["content"]["format"] = resources_pb2.MessageContent.Format.Value(
175
174
  parsed_message["content"]["format"]
176
175
  )
177
- full_conversation.messages.append(models.PushMessage(**parsed_message))
176
+ full_conversation.messages.append(processing_models.PushMessage(**parsed_message))
178
177
  toprocess.conversationfield[field_id] = full_conversation
179
178
  toprocess.conversationfield[field_id].classification_labels = classif_labels
180
179
 
@@ -247,9 +246,9 @@ def parse_text_field(
247
246
  etw.field.field_type = resources_pb2.FieldType.TEXT
248
247
  etw.body.text = text_field.body
249
248
  writer.extracted_text.append(etw)
250
- toprocess.textfield[key] = models.Text(
249
+ toprocess.textfield[key] = processing_models.Text(
251
250
  body=text_field.body,
252
- format=getattr(models.PushTextFormat, text_field.format.value),
251
+ format=getattr(processing_models.PushTextFormat, text_field.format.value),
253
252
  extract_strategy=text_field.extract_strategy,
254
253
  classification_labels=classif_labels,
255
254
  )
@@ -393,7 +392,7 @@ def parse_link_field(
393
392
  if link_field.extract_strategy is not None:
394
393
  writer.links[key].extract_strategy = link_field.extract_strategy
395
394
 
396
- toprocess.linkfield[key] = models.LinkUpload(
395
+ toprocess.linkfield[key] = processing_models.LinkUpload(
397
396
  link=link_field.uri,
398
397
  headers=link_field.headers or {},
399
398
  cookies=link_field.cookies or {},
@@ -424,7 +423,7 @@ async def parse_conversation_field(
424
423
  storage = await get_storage(service_name=SERVICE_NAME)
425
424
  processing = get_processing()
426
425
  field_value = resources_pb2.Conversation()
427
- convs = models.PushConversation()
426
+ convs = processing_models.PushConversation()
428
427
  for message in conversation_field.messages:
429
428
  cm = resources_pb2.Message()
430
429
  if message.timestamp:
@@ -437,9 +436,9 @@ async def parse_conversation_field(
437
436
  if message.type_ is not None:
438
437
  cm.type = resources_pb2.Message.MessageType.Value(message.type_.value)
439
438
 
440
- processing_message_content = models.PushMessageContent(
439
+ processing_message_content = processing_models.PushMessageContent(
441
440
  text=message.content.text,
442
- format=getattr(models.PushMessageFormat, message.content.format.value),
441
+ format=getattr(processing_models.PushMessageFormat, message.content.format.value),
443
442
  )
444
443
 
445
444
  cm.content.text = message.content.text
@@ -472,7 +471,7 @@ async def parse_conversation_field(
472
471
  await processing.convert_internal_cf_to_str(cf_conv_field, storage)
473
472
  )
474
473
 
475
- processing_message = models.PushMessage(
474
+ processing_message = processing_models.PushMessage(
476
475
  timestamp=message.timestamp,
477
476
  content=processing_message_content,
478
477
  ident=message.ident,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb
3
- Version: 6.3.5.post3990
3
+ Version: 6.3.5.post3996
4
4
  Summary: NucliaDB
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License: AGPL
@@ -20,11 +20,11 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3 :: Only
21
21
  Requires-Python: <4,>=3.9
22
22
  Description-Content-Type: text/markdown
23
- Requires-Dist: nucliadb-telemetry[all]>=6.3.5.post3990
24
- Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.5.post3990
25
- Requires-Dist: nucliadb-protos>=6.3.5.post3990
26
- Requires-Dist: nucliadb-models>=6.3.5.post3990
27
- Requires-Dist: nidx-protos>=6.3.5.post3990
23
+ Requires-Dist: nucliadb-telemetry[all]>=6.3.5.post3996
24
+ Requires-Dist: nucliadb-utils[cache,fastapi,storages]>=6.3.5.post3996
25
+ Requires-Dist: nucliadb-protos>=6.3.5.post3996
26
+ Requires-Dist: nucliadb-models>=6.3.5.post3996
27
+ Requires-Dist: nidx-protos>=6.3.5.post3996
28
28
  Requires-Dist: nucliadb-admin-assets>=1.0.0.post1224
29
29
  Requires-Dist: nuclia-models>=0.24.2
30
30
  Requires-Dist: uvicorn[standard]
@@ -118,7 +118,7 @@ nucliadb/export_import/utils.py,sha256=8XOVMYXXw8b4ikojG7RjQ4tKN3Xu7nfu2yCUOqD50
118
118
  nucliadb/ingest/__init__.py,sha256=fsw3C38VP50km3R-nHL775LNGPpJ4JxqXJ2Ib1f5SqE,1011
119
119
  nucliadb/ingest/app.py,sha256=TaVgh5B2riFVmcsrbPb7a5YCzmnybjx-NK0BXgTwGAY,7535
120
120
  nucliadb/ingest/partitions.py,sha256=2NIhMYbNT0TNBL6bX1UMSi7vxFGICstCKEqsB0TXHOE,2410
121
- nucliadb/ingest/processing.py,sha256=7NNoVxbSwsRdbo5goqVSrUc_QXZRVfOT_jZPzrmbxJQ,22207
121
+ nucliadb/ingest/processing.py,sha256=QmkHq-BU4vub7JRWe9VHvQ2DcAmT6-CzgFXuZxXhcBU,20953
122
122
  nucliadb/ingest/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
123
123
  nucliadb/ingest/serialize.py,sha256=-TIjibJTbMqAowzRvyrG3R209vKqBZqXpdrQL9Dq4lo,16135
124
124
  nucliadb/ingest/settings.py,sha256=0B-wQNa8FLqtNcQgRzh-fuIuGptM816XHcbH1NQKfmE,3050
@@ -142,20 +142,20 @@ nucliadb/ingest/fields/link.py,sha256=kN_gjRUEEj5cy8K_BwPijYg3TiWhedc24apXYlTbRJ
142
142
  nucliadb/ingest/fields/text.py,sha256=tFvSQJAe0W7ePpp2_WDfLiE2yglR1OTU0Zht9acvOFw,1594
143
143
  nucliadb/ingest/orm/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
144
144
  nucliadb/ingest/orm/brain.py,sha256=8nXdxgI3zYn6DGnCq5ciq3PA7ouhcTW5dSgHaxAO6xg,29074
145
- nucliadb/ingest/orm/brain_v2.py,sha256=3ejtH58X9Hkhvg2m7wrp2eEyIQybKSMHzoJBDtkL0b8,33065
145
+ nucliadb/ingest/orm/brain_v2.py,sha256=XEOfvjpnvSKNrAOtbO4vt9n_PWVbzOhB-seHs76uY0M,33588
146
146
  nucliadb/ingest/orm/broker_message.py,sha256=XWaiZgDOz94NPOPT-hqbRr5ZkpVimUw6PjUJNftfoVw,7514
147
147
  nucliadb/ingest/orm/entities.py,sha256=a-aYuKBUQhxDKFtXOzTAkLlY_t2JiTfaptw2vt3AQDQ,14915
148
148
  nucliadb/ingest/orm/exceptions.py,sha256=k4Esv4NtL4TrGTcsQpwrSfDhPQpiYcRbB1SpYmBX5MY,1432
149
- nucliadb/ingest/orm/index_message.py,sha256=3Okq-POmGnxpgzysU1bvgj5skRLSDBUpP6dnTYqNA18,15821
149
+ nucliadb/ingest/orm/index_message.py,sha256=fFNYRZTH45fm6IZ9tHNwa4KNgV8KxzwS5uuklRe65ww,16044
150
150
  nucliadb/ingest/orm/knowledgebox.py,sha256=Bfb4-MIQWlaJrQAUDbgs_iIsXCYjS7s5YiiGl_Jb4jo,23887
151
- nucliadb/ingest/orm/metrics.py,sha256=z-xVOJWeWXZFPIzRg_NB8nlbkdJFs_myEwLROdTNp24,1110
151
+ nucliadb/ingest/orm/metrics.py,sha256=OiuggTh-n3kZHA2G73NEUdIlh8c3yFrbusI88DK-Mko,1273
152
152
  nucliadb/ingest/orm/resource.py,sha256=GjxcEPuu8bM06Uea7_yJk0UFvOfiZNP9i_G4V-4D8_U,46845
153
- nucliadb/ingest/orm/utils.py,sha256=vCe_9UxHu26JDFGLwQ0wH-XyzJIpQCTK-Ow9dtZR5Vg,2716
153
+ nucliadb/ingest/orm/utils.py,sha256=fCQRuyecgqhaY7mcBG93oaXMkzkKb9BFjOcy4-ZiSNw,2693
154
154
  nucliadb/ingest/orm/processor/__init__.py,sha256=Aqd9wCNTvggkMkCY3WvoI8spdr94Jnqk-0iq9XpLs18,922
155
155
  nucliadb/ingest/orm/processor/auditing.py,sha256=TeYhXGJRyQ7ROytbb2u8R0fIh_FYi3HgTu3S1ribY3U,4623
156
- nucliadb/ingest/orm/processor/data_augmentation.py,sha256=HpSU9olDHcTfECDYCsmm4yA-Hu0mBrd_zTtx50XDGFE,5164
156
+ nucliadb/ingest/orm/processor/data_augmentation.py,sha256=v-pj4GbBWSuO8dQyahs5UDr5ghsyfhCZDS0ftKd6ZYc,5179
157
157
  nucliadb/ingest/orm/processor/pgcatalog.py,sha256=H-OCRz0RuTUb80LZBxDowLA9V7ECv1DWiXlnzKW5XGI,3103
158
- nucliadb/ingest/orm/processor/processor.py,sha256=flw2U1OB6il9mP7h6fCY_dZX-Jo6XemWq1dAwtH7pMs,33202
158
+ nucliadb/ingest/orm/processor/processor.py,sha256=q2iBJJ_5SV_bxA3t5MrbV70iQhir94aFbjZjnYJzEAQ,33141
159
159
  nucliadb/ingest/orm/processor/sequence_manager.py,sha256=uqEphtI1Ir_yk9jRl2gPf7BlzzXWovbARY5MNZSBI_8,1704
160
160
  nucliadb/ingest/service/__init__.py,sha256=MME_G_ERxzJR6JW_hfE2qcfXpmpH1kdG-S0a-M0qRm8,2043
161
161
  nucliadb/ingest/service/exceptions.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
@@ -172,6 +172,8 @@ nucliadb/migrator/settings.py,sha256=3eK6PIwqqtoCea9V7-RMjVfZC_0BC5DqPPlfo9XMyNE
172
172
  nucliadb/migrator/utils.py,sha256=NgUreUvON8_nWEzTxELBMWlfV7E6-6qi-g0DMEbVEz4,2885
173
173
  nucliadb/models/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
174
174
  nucliadb/models/responses.py,sha256=qnuOoc7TrVSUnpikfTwHLKez47_DE4mSFzpxrwtqijA,1599
175
+ nucliadb/models/internal/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
176
+ nucliadb/models/internal/processing.py,sha256=bzPr-hXliY81zMUgG-PDyDiFKP7Xbs71s2d0SIAu4Do,4090
175
177
  nucliadb/purge/__init__.py,sha256=UXbto56EWYLwZj6uEc-flQVe3gDDNFtM6EV-aIkryPU,12353
176
178
  nucliadb/purge/orphan_shards.py,sha256=WSQmVQJQ-rAoQAmypOUJBpDP-9VJchWao3ZLTzHdJ1U,7764
177
179
  nucliadb/reader/__init__.py,sha256=C5Efic7WlGm2U2C5WOyquMFbIj2Pojwe_8mwzVYnOzE,1304
@@ -334,20 +336,20 @@ nucliadb/writer/api/constants.py,sha256=qWEDjFUycrEZnSJyLnNK4PQNodU2oVmkO4NycaEZ
334
336
  nucliadb/writer/api/utils.py,sha256=wIQHlU8RQiIGVLI72suvyVIKlCU44Unh0Ae0IiN6Qwo,1313
335
337
  nucliadb/writer/api/v1/__init__.py,sha256=akI9A_jloNLb0dU4T5zjfdyvmSAiDeIdjAlzNx74FlU,1128
336
338
  nucliadb/writer/api/v1/export_import.py,sha256=elf-EQY5DD3mhw8kWb9tQpDcbrF9sY6VFYqxQOjuVP0,8201
337
- nucliadb/writer/api/v1/field.py,sha256=FySCMpcruSAKGeepeAlOihjwxyUPcDO73Uilq5VDWRk,18514
339
+ nucliadb/writer/api/v1/field.py,sha256=KOOBqBJzwsNczn_isxl-YFBL-bmduz3rzSDWMbAJefc,18523
338
340
  nucliadb/writer/api/v1/knowledgebox.py,sha256=PHEYDFa-sN5JrI8-EiVVg5FDOsRuCLT43kyAB4xt-xA,9530
339
341
  nucliadb/writer/api/v1/learning_config.py,sha256=CKBjqcbewkfPwGUPLDWzZSpro6XkmCaVppe5Qtpu5Go,3117
340
- nucliadb/writer/api/v1/resource.py,sha256=r5YTAZuHzaiBz1dLstWxvG74_LJGl2mgjpEsDmRB2qY,19777
342
+ nucliadb/writer/api/v1/resource.py,sha256=jxphiyeXJq342BR1R8pRQ81L0i3Tczf_Yarqx_DqvWs,19786
341
343
  nucliadb/writer/api/v1/router.py,sha256=RjuoWLpZer6Kl2BW_wznpNo6XL3BOpdTGqXZCn3QrrQ,1034
342
344
  nucliadb/writer/api/v1/services.py,sha256=3AUjk-SmvqJx76v7y89DZx6oyasojPliGYeniRQjpcU,13337
343
345
  nucliadb/writer/api/v1/slug.py,sha256=xlVBDBpRi9bNulpBHZwhyftVvulfE0zFm1XZIWl-AKY,2389
344
346
  nucliadb/writer/api/v1/transaction.py,sha256=d2Vbgnkk_-FLGSTt3vfldwiJIUf0XoyD0wP1jQNz_DY,2430
345
- nucliadb/writer/api/v1/upload.py,sha256=hLMHXSaqEOE-vjKjhIupgdx8klJc3mVQp_oMwx5N-7o,33800
347
+ nucliadb/writer/api/v1/upload.py,sha256=fwWXA5BuLPuGKhOcuyf0CdutWJITjJ6fAvDzV_X9VsU,33809
346
348
  nucliadb/writer/api/v1/vectorsets.py,sha256=F3iMViL5G95_Tns4aO2SOA0DwAzxK2_P8MXxtd_XLRE,6973
347
349
  nucliadb/writer/resource/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
348
350
  nucliadb/writer/resource/audit.py,sha256=FvxMZPzrNHtd31HgpZEvxzwAkbxJTZRhPLqRYYJi3tA,1426
349
- nucliadb/writer/resource/basic.py,sha256=P2VXXXLKs43_Cd7Uvrcd-JTeuOJuUGu1Jpx8eujGi7Q,10451
350
- nucliadb/writer/resource/field.py,sha256=e5QGkR5ZDT1VUQgMXK7v6GGXJ2eek6jxGA0nPqjq_g4,20241
351
+ nucliadb/writer/resource/basic.py,sha256=fjxZEsC_ftuRrpPDOQqSDfZR6JlVNSFPMckVGmjQ4lY,10426
352
+ nucliadb/writer/resource/field.py,sha256=wL71wScbLVhu5LHv_aCUBFae6LhKtFr1aXKEubMSgcI,20366
351
353
  nucliadb/writer/resource/origin.py,sha256=pvhUDdU0mlWPUcpoQi4LDUJaRtfjzVVrA8XcGVI_N8k,2021
352
354
  nucliadb/writer/tus/__init__.py,sha256=huWpKnDnjsrKlBBJk30ta5vamlA-4x0TbPs_2Up8hyM,5443
353
355
  nucliadb/writer/tus/azure.py,sha256=XhWAlWTM0vmXcXtuEPYjjeEhuZjiZXZu8q9WsJ7omFE,4107
@@ -358,8 +360,8 @@ nucliadb/writer/tus/local.py,sha256=7jYa_w9b-N90jWgN2sQKkNcomqn6JMVBOVeDOVYJHto,
358
360
  nucliadb/writer/tus/s3.py,sha256=vF0NkFTXiXhXq3bCVXXVV-ED38ECVoUeeYViP8uMqcU,8357
359
361
  nucliadb/writer/tus/storage.py,sha256=ToqwjoYnjI4oIcwzkhha_MPxi-k4Jk3Lt55zRwaC1SM,2903
360
362
  nucliadb/writer/tus/utils.py,sha256=MSdVbRsRSZVdkaum69_0wku7X3p5wlZf4nr6E0GMKbw,2556
361
- nucliadb-6.3.5.post3990.dist-info/METADATA,sha256=nT9CHADQl_oE04SzbtUN2-ah8Rd2IHj9JnfasePEqy4,4301
362
- nucliadb-6.3.5.post3990.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
363
- nucliadb-6.3.5.post3990.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
364
- nucliadb-6.3.5.post3990.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
365
- nucliadb-6.3.5.post3990.dist-info/RECORD,,
363
+ nucliadb-6.3.5.post3996.dist-info/METADATA,sha256=4JGtXA-f0Q61lp2HVgY0ujHmW4VAQ0CP6MhdLtDlcNc,4301
364
+ nucliadb-6.3.5.post3996.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
365
+ nucliadb-6.3.5.post3996.dist-info/entry_points.txt,sha256=XqGfgFDuY3zXQc8ewXM2TRVjTModIq851zOsgrmaXx4,1268
366
+ nucliadb-6.3.5.post3996.dist-info/top_level.txt,sha256=hwYhTVnX7jkQ9gJCkVrbqEG1M4lT2F_iPQND1fCzF80,20
367
+ nucliadb-6.3.5.post3996.dist-info/RECORD,,