nucliadb-models 6.8.1.post4983__py3-none-any.whl → 6.10.0.post5694__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb-models might be problematic. Click here for more details.

Files changed (34) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +359 -0
  3. nucliadb_models/common.py +66 -57
  4. nucliadb_models/configuration.py +9 -9
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +30 -29
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +5 -20
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +78 -74
  12. nucliadb_models/graph/requests.py +38 -47
  13. nucliadb_models/hydration.py +423 -0
  14. nucliadb_models/internal/predict.py +7 -9
  15. nucliadb_models/internal/shards.py +2 -3
  16. nucliadb_models/labels.py +18 -11
  17. nucliadb_models/link.py +18 -19
  18. nucliadb_models/metadata.py +80 -53
  19. nucliadb_models/notifications.py +3 -3
  20. nucliadb_models/processing.py +1 -2
  21. nucliadb_models/resource.py +85 -102
  22. nucliadb_models/retrieval.py +147 -0
  23. nucliadb_models/search.py +360 -306
  24. nucliadb_models/security.py +2 -3
  25. nucliadb_models/text.py +7 -8
  26. nucliadb_models/trainset.py +1 -2
  27. nucliadb_models/utils.py +2 -3
  28. nucliadb_models/vectors.py +2 -5
  29. nucliadb_models/writer.py +56 -57
  30. {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/METADATA +2 -3
  31. nucliadb_models-6.10.0.post5694.dist-info/RECORD +41 -0
  32. nucliadb_models-6.8.1.post4983.dist-info/RECORD +0 -38
  33. {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/WHEEL +0 -0
  34. {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  from enum import Enum
16
- from typing import Optional
17
16
 
18
17
  from pydantic import BaseModel, Field
19
18
 
@@ -41,11 +40,12 @@ class AgentsFilter(BaseModel):
41
40
 
42
41
 
43
42
  class ResourceAgentsRequest(BaseModel):
44
- filters: Optional[list[AgentsFilter]] = Field(
43
+ filters: list[AgentsFilter] | None = Field(
44
+ title="Resource Agent Filters",
45
45
  default=None,
46
46
  description="Filters to apply to the agents. If None, all curently configured agents are applied.",
47
47
  )
48
- agent_ids: Optional[list[str]] = Field(
48
+ agent_ids: list[str] | None = Field(
49
49
  default=None,
50
50
  title="An optional list of Data Augmentation Agent IDs to run. If None, all configured agents that match the filters are run.",
51
51
  )
@@ -57,7 +57,7 @@ class NewTextField(BaseModel):
57
57
 
58
58
 
59
59
  class AppliedDataAugmentation(BaseModel):
60
- qas: Optional[QuestionAnswers] = Field(
60
+ qas: QuestionAnswers | None = Field(
61
61
  default=None,
62
62
  description="Question and answers generated by the Question Answers agent",
63
63
  )
@@ -0,0 +1,359 @@
1
+ # Copyright 2025 Bosutech XXI S.L.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ from enum import Enum
17
+ from typing import Annotated
18
+
19
+ from pydantic import BaseModel, Field, StringConstraints, model_validator
20
+ from typing_extensions import Self
21
+
22
+ from nucliadb_models import filters
23
+ from nucliadb_models.common import FieldTypeName
24
+ from nucliadb_models.resource import ExtractedDataTypeName, Resource
25
+ from nucliadb_models.search import ResourceProperties
26
+
27
+ ResourceIdPattern = r"^([0-9a-f]{32}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$"
28
+ ResourceId = Annotated[
29
+ str,
30
+ StringConstraints(pattern=ResourceIdPattern, min_length=32, max_length=36),
31
+ ]
32
+
33
+ FieldIdPattern = r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+(/[^/]{1,128})?$"
34
+ FieldId = Annotated[
35
+ str,
36
+ StringConstraints(
37
+ pattern=FieldIdPattern,
38
+ min_length=32 + 1 + 1 + 1 + 1 + 0 + 0,
39
+ # max field id of 250
40
+ max_length=32 + 1 + 1 + 1 + 250 + 1 + 218,
41
+ ),
42
+ ]
43
+
44
+ ParagraphIdPattern = r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+(/[^/]{1,128})?/[0-9]+-[0-9]+$"
45
+ ParagraphId = Annotated[
46
+ str,
47
+ StringConstraints(
48
+ # resource-uuid/field-type/field-id/[split-id/]paragraph-id
49
+ pattern=ParagraphIdPattern,
50
+ min_length=32 + 1 + 1 + 1 + 1 + 0 + 0 + 1 + 3,
51
+ # max field id of 250 and 10 digit paragraphs. More than enough
52
+ max_length=32 + 1 + 1 + 1 + 250 + 1 + 128 + 1 + 21,
53
+ ),
54
+ ]
55
+
56
+
57
+ # Request
58
+
59
+
60
+ class ResourceProp(str, Enum):
61
+ """Superset of former `show` and `extracted` serializations options."""
62
+
63
+ # `show` props
64
+ BASIC = "basic"
65
+ ORIGIN = "origin"
66
+ EXTRA = "extra"
67
+ RELATIONS = "relations"
68
+ VALUES = "values"
69
+ ERRORS = "errors"
70
+ SECURITY = "security"
71
+ # `extracted` props
72
+ EXTRACTED_TEXT = "extracted_text"
73
+ EXTRACTED_METADATA = "extracted_metadata"
74
+ EXTRACTED_SHORTENED_METADATA = "extracted_shortened_metadata"
75
+ EXTRACTED_LARGE_METADATA = "extracted_large_metadata"
76
+ EXTRACTED_VECTOR = "extracted_vectors"
77
+ EXTRACTED_LINK = "extracted_link"
78
+ EXTRACTED_FILE = "extracted_file"
79
+ EXTRACTED_QA = "extracted_question_answers"
80
+ # new granular props
81
+ TITLE = "title"
82
+ SUMMARY = "summary"
83
+ CLASSIFICATION_LABELS = "classification_labels"
84
+
85
+ @classmethod
86
+ def from_show_and_extracted(
87
+ cls, show: list[ResourceProperties], extracted: list[ExtractedDataTypeName]
88
+ ) -> list["ResourceProp"]:
89
+ _show_to_prop = {
90
+ ResourceProperties.BASIC: cls.BASIC,
91
+ ResourceProperties.ORIGIN: cls.ORIGIN,
92
+ ResourceProperties.EXTRA: cls.EXTRA,
93
+ ResourceProperties.RELATIONS: cls.RELATIONS,
94
+ ResourceProperties.VALUES: cls.VALUES,
95
+ ResourceProperties.ERRORS: cls.ERRORS,
96
+ ResourceProperties.SECURITY: cls.SECURITY,
97
+ }
98
+ _extracted_to_prop = {
99
+ ExtractedDataTypeName.TEXT: cls.EXTRACTED_TEXT,
100
+ ExtractedDataTypeName.METADATA: cls.EXTRACTED_METADATA,
101
+ ExtractedDataTypeName.SHORTENED_METADATA: cls.EXTRACTED_SHORTENED_METADATA,
102
+ ExtractedDataTypeName.LARGE_METADATA: cls.EXTRACTED_LARGE_METADATA,
103
+ ExtractedDataTypeName.VECTOR: cls.EXTRACTED_VECTOR,
104
+ ExtractedDataTypeName.LINK: cls.EXTRACTED_LINK,
105
+ ExtractedDataTypeName.FILE: cls.EXTRACTED_FILE,
106
+ ExtractedDataTypeName.QA: cls.EXTRACTED_QA,
107
+ }
108
+
109
+ props = []
110
+ for s in show:
111
+ show_prop = _show_to_prop.get(s)
112
+ # show=extracted is not in the dict
113
+ if show_prop is None:
114
+ continue
115
+ props.append(show_prop)
116
+
117
+ if ResourceProperties.EXTRACTED in show:
118
+ for e in extracted:
119
+ extracted_prop = _extracted_to_prop[e]
120
+ props.append(extracted_prop)
121
+
122
+ return props
123
+
124
+
125
+ class AugmentResourceFields(BaseModel):
126
+ text: bool = False
127
+ classification_labels: bool = False
128
+
129
+ filters: list[filters.Field | filters.Generated]
130
+
131
+
132
+ class AugmentResources(BaseModel):
133
+ given: list[ResourceId]
134
+
135
+ # TODO(decoupled-ask): replace this select for bool fields
136
+ select: list[ResourceProp] = Field(default_factory=list)
137
+
138
+ field_type_filter: list[FieldTypeName] | None = Field(
139
+ default=None,
140
+ deprecated="Only use this for legacy resource serialization",
141
+ title="Field type filter",
142
+ description=(
143
+ "Define which field types are serialized on resources of search results. "
144
+ "If omitted and legacy serialization is used, all field types will be serialized"
145
+ ),
146
+ )
147
+
148
+ fields: AugmentResourceFields | None = None
149
+
150
+ @model_validator(mode="after")
151
+ def bwc_resource_serialization(self) -> Self:
152
+ if self.field_type_filter is not None and self.fields is not None:
153
+ raise ValueError("`field_type_filter` and `fields` are incompatible together")
154
+
155
+ return self
156
+
157
+
158
+ class AugmentFields(BaseModel):
159
+ given: list[FieldId]
160
+
161
+ text: bool = False
162
+ classification_labels: bool = False
163
+ entities: bool = False # also known as ners
164
+
165
+ # For file fields, augment the path to the thumbnail image
166
+ file_thumbnail: bool = False
167
+
168
+ # When enabled, augment all the messages from the conversation. This is
169
+ # incompatible with max_conversation_messages defined
170
+ full_conversation: bool = False
171
+
172
+ # When `full` disbled, this option controls the max amount of messages to be
173
+ # augmented. This number will be a best-effort window centered around the
174
+ # selected message. In addition, the 1st message of the conversation will
175
+ # always be included.
176
+ #
177
+ # This option is combinable with attachments.
178
+ max_conversation_messages: int | None = None
179
+
180
+ # Given a message, if it's a question, try to find an answer. Otherwise,
181
+ # return a window of messages following the requested one.
182
+ #
183
+ # This was previously done without explicit user consent, now it's an option.
184
+ conversation_answer_or_messages_after: bool = False
185
+
186
+ # Both attachment options will only add attachments for the full or the 1st
187
+ # + window, not answer nor messages after
188
+
189
+ # include conversation text attachments
190
+ conversation_text_attachments: bool = False
191
+ # include conversation image attachments
192
+ conversation_image_attachments: bool = False
193
+
194
+ @model_validator(mode="after")
195
+ def validate_cross_options(self):
196
+ if self.full_conversation and self.max_conversation_messages is not None:
197
+ raise ValueError(
198
+ "`full_conversation` and `max_conversation_messages` are not compatible together"
199
+ )
200
+ if (
201
+ (self.conversation_text_attachments or self.conversation_image_attachments)
202
+ and self.full_conversation is False
203
+ and self.max_conversation_messages is None
204
+ ):
205
+ raise ValueError(
206
+ "Attachments are only compatible with `full_conversation` and `max_conversation_messages`"
207
+ )
208
+ return self
209
+
210
+
211
+ # TODO(decoupled-ask): remove unused metadata
212
+ class ParagraphMetadata(BaseModel):
213
+ field_labels: list[str]
214
+ paragraph_labels: list[str]
215
+
216
+ is_an_image: bool
217
+ is_a_table: bool
218
+
219
+ # for extracted from visual content (ocr, inception, tables)
220
+ source_file: str | None
221
+
222
+ # for documents (pdf, docx...) only
223
+ page: int | None
224
+ in_page_with_visual: bool | None
225
+
226
+
227
+ class AugmentParagraph(BaseModel):
228
+ id: ParagraphId
229
+ metadata: ParagraphMetadata | None = None
230
+
231
+
232
+ class AugmentParagraphs(BaseModel):
233
+ given: list[AugmentParagraph]
234
+
235
+ text: bool = True
236
+
237
+ neighbours_before: int = 0
238
+ neighbours_after: int = 0
239
+
240
+ # paragraph extracted from an image, return an image
241
+ source_image: bool = False
242
+
243
+ # paragraph extracted from a table, return table image
244
+ table_image: bool = False
245
+
246
+ # return page_preview instead of table image if table image enabled
247
+ table_prefers_page_preview: bool = False
248
+
249
+ # paragraph from a page, return page preview image
250
+ page_preview_image: bool = False
251
+
252
+ @model_validator(mode="after")
253
+ def table_options_work_together(self) -> Self:
254
+ if not self.table_image and self.table_prefers_page_preview:
255
+ raise ValueError("`table_prefers_page_preview` can only be enabled with `table_image`")
256
+ return self
257
+
258
+
259
+ class AugmentRequest(BaseModel):
260
+ resources: AugmentResources | None = None
261
+ fields: AugmentFields | None = None
262
+ paragraphs: AugmentParagraphs | None = None
263
+
264
+
265
+ # Response
266
+
267
+
268
+ class AugmentedParagraph(BaseModel):
269
+ text: str | None = None
270
+
271
+ neighbours_before: list[ParagraphId] | None = None
272
+ neighbours_after: list[ParagraphId] | None = None
273
+
274
+ source_image: str | None = None
275
+ table_image: str | None = None
276
+ page_preview_image: str | None = None
277
+
278
+
279
+ class AugmentedField(BaseModel):
280
+ text: str | None = None
281
+
282
+ classification_labels: dict[str, list[str]] | None = None
283
+
284
+ # former ners
285
+ entities: dict[str, list[str]] | None = None
286
+
287
+
288
+ class AugmentedFileField(BaseModel):
289
+ text: str | None = None
290
+
291
+ classification_labels: dict[str, list[str]] | None = None
292
+
293
+ # former ners
294
+ entities: dict[str, list[str]] | None = None
295
+
296
+ # TODO(decoupled-ask): implement image strategy
297
+ page_preview_image: str | None = None
298
+
299
+ # Path for the download API to retrieve the file thumbnail image
300
+ thumbnail_image: str | None = None
301
+
302
+
303
+ class AugmentedConversationMessage(BaseModel):
304
+ ident: str
305
+ text: str | None = None
306
+ attachments: list[FieldId] | None = None
307
+
308
+
309
+ class AugmentedConversationField(BaseModel):
310
+ classification_labels: dict[str, list[str]] | None = None
311
+ # former ners
312
+ entities: dict[str, list[str]] | None = None
313
+
314
+ messages: list[AugmentedConversationMessage] | None = None
315
+
316
+ @property
317
+ def text(self) -> str | None:
318
+ """Syntactic sugar to access aggregate text from all messages"""
319
+ if self.messages is None:
320
+ return None
321
+
322
+ text = ""
323
+ for message in self.messages:
324
+ text += message.text or ""
325
+
326
+ return text or None
327
+
328
+ @property
329
+ def attachments(self) -> list[FieldId] | None:
330
+ """Syntactic sugar to access the aggregate of attachments from all messages."""
331
+ if self.messages is None:
332
+ return None
333
+
334
+ has_attachments = False
335
+ attachments = []
336
+ for message in self.messages:
337
+ if message.attachments is None:
338
+ continue
339
+ has_attachments = True
340
+ attachments.extend(message.attachments)
341
+
342
+ if has_attachments:
343
+ return attachments
344
+ else:
345
+ return None
346
+
347
+
348
+ class AugmentedResource(Resource):
349
+ classification_labels: dict[str, list[str]] | None = None
350
+
351
+ def updated_from(self, origin: Resource):
352
+ for key in origin.model_fields.keys():
353
+ self.__setattr__(key, getattr(origin, key))
354
+
355
+
356
+ class AugmentResponse(BaseModel):
357
+ resources: dict[ResourceId, AugmentedResource]
358
+ fields: dict[FieldId, AugmentedField | AugmentedFileField | AugmentedConversationField]
359
+ paragraphs: dict[ParagraphId, AugmentedParagraph]
nucliadb_models/common.py CHANGED
@@ -16,7 +16,7 @@ import base64
16
16
  import hashlib
17
17
  import re
18
18
  from enum import Enum
19
- from typing import Any, Dict, List, Optional
19
+ from typing import Any
20
20
 
21
21
  from pydantic import (
22
22
  BaseModel,
@@ -38,7 +38,7 @@ FIELD_TYPE_CHAR_MAP = {
38
38
  }
39
39
 
40
40
  STORAGE_FILE_MATCH = re.compile(
41
- r"/?kbs/(?P<kbid>[^/]+)/r/(?P<rid>[^/]+)/(?P<download_type>[fe])/(?P<field_type>\w)/(?P<field_id>[^/]+)/?(?P<key>.*)?" # noqa
41
+ r"/?kbs/(?P<kbid>[^/]+)/r/(?P<rid>[^/]+)/(?P<download_type>[fe])/(?P<field_type>\w)/(?P<field_id>[^/]+)/?(?P<key>.*)?"
42
42
  )
43
43
  DOWNLOAD_TYPE_MAP = {"f": "field", "e": "extracted"}
44
44
  DOWNLOAD_URI = "/kb/{kbid}/resource/{rid}/{field_type}/{field_id}/download/{download_type}/{key}"
@@ -50,9 +50,9 @@ class ParamDefault(BaseModel):
50
50
  default: Any = None
51
51
  title: str
52
52
  description: str
53
- le: Optional[float] = None
54
- gt: Optional[float] = None
55
- max_items: Optional[int] = None
53
+ le: float | None = None
54
+ gt: float | None = None
55
+ max_items: int | None = None
56
56
  deprecated: bool = False
57
57
 
58
58
  def to_pydantic_field(self, default=_NOT_SET, **kw) -> Field: # type: ignore
@@ -86,13 +86,13 @@ class FieldID(BaseModel):
86
86
 
87
87
 
88
88
  class File(BaseModel):
89
- filename: Optional[str] = None
89
+ filename: str | None = None
90
90
  content_type: str = "application/octet-stream"
91
- payload: Optional[str] = Field(default=None, description="Base64 encoded file content")
92
- md5: Optional[str] = None
91
+ payload: str | None = Field(default=None, description="Base64 encoded file content")
92
+ md5: str | None = None
93
93
  # These are to be used for external files
94
- uri: Optional[str] = None
95
- extra_headers: Dict[str, str] = {}
94
+ uri: str | None = None
95
+ extra_headers: dict[str, str] = {}
96
96
 
97
97
  @model_validator(mode="after")
98
98
  def _check_internal_file_fields(self) -> Self:
@@ -108,7 +108,7 @@ class File(BaseModel):
108
108
  if self.md5 is None:
109
109
  # In case md5 is not supplied, compute it
110
110
  try:
111
- result = hashlib.md5(base64.b64decode(self.payload))
111
+ result = hashlib.md5(base64.b64decode(self.payload), usedforsecurity=False)
112
112
  self.md5 = result.hexdigest()
113
113
  except Exception:
114
114
  raise ValueError("MD5 could not be computed")
@@ -134,10 +134,10 @@ class FileB64(BaseModel):
134
134
 
135
135
 
136
136
  class CloudFile(BaseModel):
137
- uri: Optional[str] = None
138
- size: Optional[int] = None
139
- content_type: Optional[str] = None
140
- bucket_name: Optional[str] = None
137
+ uri: str | None = None
138
+ size: int | None = None
139
+ content_type: str | None = None
140
+ bucket_name: str | None = None
141
141
 
142
142
  class Source(Enum):
143
143
  FLAPS = "FLAPS"
@@ -146,23 +146,23 @@ class CloudFile(BaseModel):
146
146
  LOCAL = "LOCAL"
147
147
  EXTERNAL = "EXTERNAL"
148
148
 
149
- source: Optional[Source]
150
- filename: Optional[str]
151
- resumable_uri: Optional[str]
152
- offset: Optional[int]
153
- upload_uri: Optional[str]
154
- parts: Optional[List[str]]
155
- old_uri: Optional[str]
156
- old_bucket: Optional[str]
157
- md5: Optional[str]
149
+ source: Source | None
150
+ filename: str | None
151
+ resumable_uri: str | None
152
+ offset: int | None
153
+ upload_uri: str | None
154
+ parts: list[str] | None
155
+ old_uri: str | None
156
+ old_bucket: str | None
157
+ md5: str | None
158
158
 
159
159
 
160
160
  class CloudLink(BaseModel):
161
- uri: Optional[str] = None
162
- size: Optional[int] = None
163
- content_type: Optional[str] = None
164
- filename: Optional[str] = None
165
- md5: Optional[str] = None
161
+ uri: str | None = None
162
+ size: int | None = None
163
+ content_type: str | None = None
164
+ filename: str | None = None
165
+ md5: str | None = None
166
166
 
167
167
  @staticmethod
168
168
  def format_reader_download_uri(uri: str) -> str:
@@ -203,16 +203,25 @@ class FieldTypeName(str, Enum):
203
203
  "a": FieldTypeName.GENERIC,
204
204
  }[abbr]
205
205
 
206
+ def abbreviation(self) -> str:
207
+ return {
208
+ FieldTypeName.TEXT: "t",
209
+ FieldTypeName.FILE: "f",
210
+ FieldTypeName.LINK: "u",
211
+ FieldTypeName.CONVERSATION: "c",
212
+ FieldTypeName.GENERIC: "a",
213
+ }[self]
214
+
206
215
 
207
216
  class FieldRef(BaseModel):
208
217
  field_type: FieldTypeName
209
218
  field_id: str
210
- split: Optional[str] = None
219
+ split: str | None = None
211
220
 
212
221
 
213
222
  class Classification(BaseModel):
214
- labelset: str
215
- label: str
223
+ labelset: str = Field(title="The ID of the labelset")
224
+ label: str = Field(title="The label assigned from the labelset")
216
225
 
217
226
 
218
227
  class UserClassification(Classification):
@@ -220,19 +229,19 @@ class UserClassification(Classification):
220
229
 
221
230
 
222
231
  class Sentence(BaseModel):
223
- start: Optional[int] = None
224
- end: Optional[int] = None
225
- key: Optional[str] = None
232
+ start: int | None = None
233
+ end: int | None = None
234
+ key: str | None = None
226
235
 
227
236
 
228
237
  class PageInformation(BaseModel):
229
- page: Optional[int] = None
230
- page_with_visual: Optional[bool] = None
238
+ page: int | None = Field(default=None, title="Page Information Page")
239
+ page_with_visual: bool | None = None
231
240
 
232
241
 
233
242
  class Representation(BaseModel):
234
- is_a_table: Optional[bool] = None
235
- reference_file: Optional[str] = None
243
+ is_a_table: bool | None = None
244
+ reference_file: str | None = None
236
245
 
237
246
 
238
247
  class ParagraphRelations(BaseModel):
@@ -242,10 +251,10 @@ class ParagraphRelations(BaseModel):
242
251
 
243
252
 
244
253
  class Paragraph(BaseModel):
245
- start: Optional[int] = None
246
- end: Optional[int] = None
247
- start_seconds: Optional[List[int]] = None
248
- end_seconds: Optional[List[int]] = None
254
+ start: int | None = None
255
+ end: int | None = None
256
+ start_seconds: list[int] | None = None
257
+ end_seconds: list[int] | None = None
249
258
 
250
259
  class TypeParagraph(str, Enum):
251
260
  TEXT = "TEXT"
@@ -256,35 +265,35 @@ class Paragraph(BaseModel):
256
265
  TITLE = "TITLE"
257
266
  TABLE = "TABLE"
258
267
 
259
- kind: Optional[TypeParagraph] = None
260
- classifications: Optional[List[Classification]] = None
261
- sentences: Optional[List[Sentence]] = None
262
- key: Optional[str] = None
263
- page: Optional[PageInformation] = None
264
- representation: Optional[Representation] = None
265
- relations: Optional[ParagraphRelations] = None
268
+ kind: TypeParagraph | None = None
269
+ classifications: list[Classification] | None = None
270
+ sentences: list[Sentence] | None = None
271
+ key: str | None = None
272
+ page: PageInformation | None = None
273
+ representation: Representation | None = None
274
+ relations: ParagraphRelations | None = None
266
275
 
267
276
 
268
277
  class Shards(BaseModel):
269
- shards: Optional[List[str]] = None
278
+ shards: list[str] | None = None
270
279
 
271
280
 
272
281
  class Question(BaseModel):
273
282
  text: str
274
- language: Optional[str] = None
275
- ids_paragraphs: List[str]
283
+ language: str | None = None
284
+ ids_paragraphs: list[str]
276
285
 
277
286
 
278
287
  class Answer(BaseModel):
279
288
  text: str
280
- language: Optional[str] = None
281
- ids_paragraphs: List[str]
289
+ language: str | None = None
290
+ ids_paragraphs: list[str]
282
291
 
283
292
 
284
293
  class QuestionAnswer(BaseModel):
285
294
  question: Question
286
- answers: List[Answer]
295
+ answers: list[Answer]
287
296
 
288
297
 
289
298
  class QuestionAnswers(BaseModel):
290
- question_answer: List[QuestionAnswer]
299
+ question_answer: list[QuestionAnswer]
@@ -14,7 +14,7 @@
14
14
  #
15
15
 
16
16
  import warnings
17
- from typing import Annotated, Any, Literal, Optional, Union
17
+ from typing import Annotated, Any, Literal
18
18
 
19
19
  from pydantic import BaseModel, Field, create_model
20
20
 
@@ -28,11 +28,11 @@ class KBConfiguration(BaseModel):
28
28
  super().__init__(**data)
29
29
 
30
30
  # Do not touch this model synced on Processing side
31
- semantic_model: Optional[str] = None
32
- generative_model: Optional[str] = None
33
- ner_model: Optional[str] = None
34
- anonymization_model: Optional[str] = None
35
- visual_labeling: Optional[str] = None
31
+ semantic_model: str | None = None
32
+ generative_model: str | None = None
33
+ ner_model: str | None = None
34
+ anonymization_model: str | None = None
35
+ visual_labeling: str | None = None
36
36
 
37
37
 
38
38
  #
@@ -44,7 +44,7 @@ def _model_fields(model: type[BaseModel], skip: list[str]) -> dict[str, Any]:
44
44
  }
45
45
 
46
46
 
47
- # FindConfig is a FindConfig without `search_configuration`
47
+ # FindConfig is a FindRequest without `search_configuration`
48
48
  FindConfig = create_model("FindConfig", **_model_fields(FindRequest, skip=["search_configuration"]))
49
49
 
50
50
 
@@ -57,7 +57,7 @@ class FindSearchConfiguration(BaseModel):
57
57
  AskConfig = create_model(
58
58
  "AskConfig",
59
59
  **_model_fields(AskRequest, skip=["query", "search_configuration"]),
60
- query=(Optional[str], None),
60
+ query=(str | None, None),
61
61
  )
62
62
 
63
63
 
@@ -67,7 +67,7 @@ class AskSearchConfiguration(BaseModel):
67
67
 
68
68
 
69
69
  SearchConfiguration = Annotated[
70
- Union[FindSearchConfiguration, AskSearchConfiguration], Field(discriminator="kind")
70
+ FindSearchConfiguration | AskSearchConfiguration, Field(discriminator="kind")
71
71
  ]
72
72
 
73
73
  # We need this to avoid issues with pydantic and generic types defined in another module