nucliadb-models 6.9.5.post5452__py3-none-any.whl → 6.10.0.post5694__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb-models might be problematic. Click here for more details.

Files changed (34) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +297 -23
  3. nucliadb_models/common.py +57 -57
  4. nucliadb_models/configuration.py +8 -8
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +25 -26
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +1 -2
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +78 -74
  12. nucliadb_models/graph/requests.py +38 -47
  13. nucliadb_models/hydration.py +48 -50
  14. nucliadb_models/internal/predict.py +7 -9
  15. nucliadb_models/internal/shards.py +2 -3
  16. nucliadb_models/labels.py +18 -11
  17. nucliadb_models/link.py +18 -19
  18. nucliadb_models/metadata.py +65 -53
  19. nucliadb_models/notifications.py +3 -3
  20. nucliadb_models/processing.py +1 -2
  21. nucliadb_models/resource.py +85 -102
  22. nucliadb_models/retrieval.py +147 -0
  23. nucliadb_models/search.py +266 -276
  24. nucliadb_models/security.py +2 -3
  25. nucliadb_models/text.py +7 -8
  26. nucliadb_models/trainset.py +1 -2
  27. nucliadb_models/utils.py +2 -3
  28. nucliadb_models/vectors.py +2 -5
  29. nucliadb_models/writer.py +56 -57
  30. {nucliadb_models-6.9.5.post5452.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/METADATA +1 -1
  31. nucliadb_models-6.10.0.post5694.dist-info/RECORD +41 -0
  32. nucliadb_models-6.9.5.post5452.dist-info/RECORD +0 -40
  33. {nucliadb_models-6.9.5.post5452.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/WHEEL +0 -0
  34. {nucliadb_models-6.9.5.post5452.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  from enum import Enum
16
- from typing import Optional
17
16
 
18
17
  from pydantic import BaseModel, Field
19
18
 
@@ -41,11 +40,12 @@ class AgentsFilter(BaseModel):
41
40
 
42
41
 
43
42
  class ResourceAgentsRequest(BaseModel):
44
- filters: Optional[list[AgentsFilter]] = Field(
43
+ filters: list[AgentsFilter] | None = Field(
44
+ title="Resource Agent Filters",
45
45
  default=None,
46
46
  description="Filters to apply to the agents. If None, all curently configured agents are applied.",
47
47
  )
48
- agent_ids: Optional[list[str]] = Field(
48
+ agent_ids: list[str] | None = Field(
49
49
  default=None,
50
50
  title="An optional list of Data Augmentation Agent IDs to run. If None, all configured agents that match the filters are run.",
51
51
  )
@@ -57,7 +57,7 @@ class NewTextField(BaseModel):
57
57
 
58
58
 
59
59
  class AppliedDataAugmentation(BaseModel):
60
- qas: Optional[QuestionAnswers] = Field(
60
+ qas: QuestionAnswers | None = Field(
61
61
  default=None,
62
62
  description="Question and answers generated by the Question Answers agent",
63
63
  )
@@ -13,45 +13,220 @@
13
13
  # limitations under the License.
14
14
  #
15
15
 
16
- from pydantic import BaseModel
16
+ from enum import Enum
17
+ from typing import Annotated
17
18
 
19
+ from pydantic import BaseModel, Field, StringConstraints, model_validator
20
+ from typing_extensions import Self
21
+
22
+ from nucliadb_models import filters
18
23
  from nucliadb_models.common import FieldTypeName
19
24
  from nucliadb_models.resource import ExtractedDataTypeName, Resource
20
- from nucliadb_models.search import Image, ResourceProperties, SearchParamDefaults
25
+ from nucliadb_models.search import ResourceProperties
21
26
 
22
- ParagraphId = str
27
+ ResourceIdPattern = r"^([0-9a-f]{32}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$"
28
+ ResourceId = Annotated[
29
+ str,
30
+ StringConstraints(pattern=ResourceIdPattern, min_length=32, max_length=36),
31
+ ]
23
32
 
33
+ FieldIdPattern = r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+(/[^/]{1,128})?$"
34
+ FieldId = Annotated[
35
+ str,
36
+ StringConstraints(
37
+ pattern=FieldIdPattern,
38
+ min_length=32 + 1 + 1 + 1 + 1 + 0 + 0,
39
+ # max field id of 250
40
+ max_length=32 + 1 + 1 + 1 + 250 + 1 + 218,
41
+ ),
42
+ ]
24
43
 
25
- class AugmentedParagraph(BaseModel):
26
- text: str | None = None
44
+ ParagraphIdPattern = r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+(/[^/]{1,128})?/[0-9]+-[0-9]+$"
45
+ ParagraphId = Annotated[
46
+ str,
47
+ StringConstraints(
48
+ # resource-uuid/field-type/field-id/[split-id/]paragraph-id
49
+ pattern=ParagraphIdPattern,
50
+ min_length=32 + 1 + 1 + 1 + 1 + 0 + 0 + 1 + 3,
51
+ # max field id of 250 and 10 digit paragraphs. More than enough
52
+ max_length=32 + 1 + 1 + 1 + 250 + 1 + 128 + 1 + 21,
53
+ ),
54
+ ]
27
55
 
28
- neighbours_before: dict[ParagraphId, str] | None = None
29
- neighbours_after: dict[ParagraphId, str] | None = None
30
56
 
31
- image: Image | None = None
57
+ # Request
32
58
 
33
59
 
34
- class AugmentedField(BaseModel):
35
- page_preview_image: Image | None = None
60
+ class ResourceProp(str, Enum):
61
+ """Superset of former `show` and `extracted` serializations options."""
36
62
 
63
+ # `show` props
64
+ BASIC = "basic"
65
+ ORIGIN = "origin"
66
+ EXTRA = "extra"
67
+ RELATIONS = "relations"
68
+ VALUES = "values"
69
+ ERRORS = "errors"
70
+ SECURITY = "security"
71
+ # `extracted` props
72
+ EXTRACTED_TEXT = "extracted_text"
73
+ EXTRACTED_METADATA = "extracted_metadata"
74
+ EXTRACTED_SHORTENED_METADATA = "extracted_shortened_metadata"
75
+ EXTRACTED_LARGE_METADATA = "extracted_large_metadata"
76
+ EXTRACTED_VECTOR = "extracted_vectors"
77
+ EXTRACTED_LINK = "extracted_link"
78
+ EXTRACTED_FILE = "extracted_file"
79
+ EXTRACTED_QA = "extracted_question_answers"
80
+ # new granular props
81
+ TITLE = "title"
82
+ SUMMARY = "summary"
83
+ CLASSIFICATION_LABELS = "classification_labels"
37
84
 
38
- class AugmentedResource(Resource):
39
- def updated_from(self, origin: Resource):
40
- for key in origin.model_fields.keys():
41
- self.__setattr__(key, getattr(origin, key))
85
+ @classmethod
86
+ def from_show_and_extracted(
87
+ cls, show: list[ResourceProperties], extracted: list[ExtractedDataTypeName]
88
+ ) -> list["ResourceProp"]:
89
+ _show_to_prop = {
90
+ ResourceProperties.BASIC: cls.BASIC,
91
+ ResourceProperties.ORIGIN: cls.ORIGIN,
92
+ ResourceProperties.EXTRA: cls.EXTRA,
93
+ ResourceProperties.RELATIONS: cls.RELATIONS,
94
+ ResourceProperties.VALUES: cls.VALUES,
95
+ ResourceProperties.ERRORS: cls.ERRORS,
96
+ ResourceProperties.SECURITY: cls.SECURITY,
97
+ }
98
+ _extracted_to_prop = {
99
+ ExtractedDataTypeName.TEXT: cls.EXTRACTED_TEXT,
100
+ ExtractedDataTypeName.METADATA: cls.EXTRACTED_METADATA,
101
+ ExtractedDataTypeName.SHORTENED_METADATA: cls.EXTRACTED_SHORTENED_METADATA,
102
+ ExtractedDataTypeName.LARGE_METADATA: cls.EXTRACTED_LARGE_METADATA,
103
+ ExtractedDataTypeName.VECTOR: cls.EXTRACTED_VECTOR,
104
+ ExtractedDataTypeName.LINK: cls.EXTRACTED_LINK,
105
+ ExtractedDataTypeName.FILE: cls.EXTRACTED_FILE,
106
+ ExtractedDataTypeName.QA: cls.EXTRACTED_QA,
107
+ }
108
+
109
+ props = []
110
+ for s in show:
111
+ show_prop = _show_to_prop.get(s)
112
+ # show=extracted is not in the dict
113
+ if show_prop is None:
114
+ continue
115
+ props.append(show_prop)
116
+
117
+ if ResourceProperties.EXTRACTED in show:
118
+ for e in extracted:
119
+ extracted_prop = _extracted_to_prop[e]
120
+ props.append(extracted_prop)
121
+
122
+ return props
123
+
124
+
125
+ class AugmentResourceFields(BaseModel):
126
+ text: bool = False
127
+ classification_labels: bool = False
128
+
129
+ filters: list[filters.Field | filters.Generated]
42
130
 
43
131
 
44
132
  class AugmentResources(BaseModel):
45
- given: list[str]
133
+ given: list[ResourceId]
134
+
135
+ # TODO(decoupled-ask): replace this select for bool fields
136
+ select: list[ResourceProp] = Field(default_factory=list)
137
+
138
+ field_type_filter: list[FieldTypeName] | None = Field(
139
+ default=None,
140
+ deprecated="Only use this for legacy resource serialization",
141
+ title="Field type filter",
142
+ description=(
143
+ "Define which field types are serialized on resources of search results. "
144
+ "If omitted and legacy serialization is used, all field types will be serialized"
145
+ ),
146
+ )
147
+
148
+ fields: AugmentResourceFields | None = None
149
+
150
+ @model_validator(mode="after")
151
+ def bwc_resource_serialization(self) -> Self:
152
+ if self.field_type_filter is not None and self.fields is not None:
153
+ raise ValueError("`field_type_filter` and `fields` are incompatible together")
154
+
155
+ return self
46
156
 
47
- show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
48
- extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
49
- field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
50
- # TODO: field name filter, da field prefix filter
157
+
158
+ class AugmentFields(BaseModel):
159
+ given: list[FieldId]
160
+
161
+ text: bool = False
162
+ classification_labels: bool = False
163
+ entities: bool = False # also known as ners
164
+
165
+ # For file fields, augment the path to the thumbnail image
166
+ file_thumbnail: bool = False
167
+
168
+ # When enabled, augment all the messages from the conversation. This is
169
+ # incompatible with max_conversation_messages defined
170
+ full_conversation: bool = False
171
+
172
+ # When `full` disbled, this option controls the max amount of messages to be
173
+ # augmented. This number will be a best-effort window centered around the
174
+ # selected message. In addition, the 1st message of the conversation will
175
+ # always be included.
176
+ #
177
+ # This option is combinable with attachments.
178
+ max_conversation_messages: int | None = None
179
+
180
+ # Given a message, if it's a question, try to find an answer. Otherwise,
181
+ # return a window of messages following the requested one.
182
+ #
183
+ # This was previously done without explicit user consent, now it's an option.
184
+ conversation_answer_or_messages_after: bool = False
185
+
186
+ # Both attachment options will only add attachments for the full or the 1st
187
+ # + window, not answer nor messages after
188
+
189
+ # include conversation text attachments
190
+ conversation_text_attachments: bool = False
191
+ # include conversation image attachments
192
+ conversation_image_attachments: bool = False
193
+
194
+ @model_validator(mode="after")
195
+ def validate_cross_options(self):
196
+ if self.full_conversation and self.max_conversation_messages is not None:
197
+ raise ValueError(
198
+ "`full_conversation` and `max_conversation_messages` are not compatible together"
199
+ )
200
+ if (
201
+ (self.conversation_text_attachments or self.conversation_image_attachments)
202
+ and self.full_conversation is False
203
+ and self.max_conversation_messages is None
204
+ ):
205
+ raise ValueError(
206
+ "Attachments are only compatible with `full_conversation` and `max_conversation_messages`"
207
+ )
208
+ return self
209
+
210
+
211
+ # TODO(decoupled-ask): remove unused metadata
212
+ class ParagraphMetadata(BaseModel):
213
+ field_labels: list[str]
214
+ paragraph_labels: list[str]
215
+
216
+ is_an_image: bool
217
+ is_a_table: bool
218
+
219
+ # for extracted from visual content (ocr, inception, tables)
220
+ source_file: str | None
221
+
222
+ # for documents (pdf, docx...) only
223
+ page: int | None
224
+ in_page_with_visual: bool | None
51
225
 
52
226
 
53
227
  class AugmentParagraph(BaseModel):
54
228
  id: ParagraphId
229
+ metadata: ParagraphMetadata | None = None
55
230
 
56
231
 
57
232
  class AugmentParagraphs(BaseModel):
@@ -74,12 +249,111 @@ class AugmentParagraphs(BaseModel):
74
249
  # paragraph from a page, return page preview image
75
250
  page_preview_image: bool = False
76
251
 
252
+ @model_validator(mode="after")
253
+ def table_options_work_together(self) -> Self:
254
+ if not self.table_image and self.table_prefers_page_preview:
255
+ raise ValueError("`table_prefers_page_preview` can only be enabled with `table_image`")
256
+ return self
257
+
77
258
 
78
259
  class AugmentRequest(BaseModel):
79
- resources: AugmentResources
80
- paragraphs: AugmentParagraphs
260
+ resources: AugmentResources | None = None
261
+ fields: AugmentFields | None = None
262
+ paragraphs: AugmentParagraphs | None = None
263
+
264
+
265
+ # Response
266
+
267
+
268
+ class AugmentedParagraph(BaseModel):
269
+ text: str | None = None
270
+
271
+ neighbours_before: list[ParagraphId] | None = None
272
+ neighbours_after: list[ParagraphId] | None = None
273
+
274
+ source_image: str | None = None
275
+ table_image: str | None = None
276
+ page_preview_image: str | None = None
277
+
278
+
279
+ class AugmentedField(BaseModel):
280
+ text: str | None = None
281
+
282
+ classification_labels: dict[str, list[str]] | None = None
283
+
284
+ # former ners
285
+ entities: dict[str, list[str]] | None = None
286
+
287
+
288
+ class AugmentedFileField(BaseModel):
289
+ text: str | None = None
290
+
291
+ classification_labels: dict[str, list[str]] | None = None
292
+
293
+ # former ners
294
+ entities: dict[str, list[str]] | None = None
295
+
296
+ # TODO(decoupled-ask): implement image strategy
297
+ page_preview_image: str | None = None
298
+
299
+ # Path for the download API to retrieve the file thumbnail image
300
+ thumbnail_image: str | None = None
301
+
302
+
303
+ class AugmentedConversationMessage(BaseModel):
304
+ ident: str
305
+ text: str | None = None
306
+ attachments: list[FieldId] | None = None
307
+
308
+
309
+ class AugmentedConversationField(BaseModel):
310
+ classification_labels: dict[str, list[str]] | None = None
311
+ # former ners
312
+ entities: dict[str, list[str]] | None = None
313
+
314
+ messages: list[AugmentedConversationMessage] | None = None
315
+
316
+ @property
317
+ def text(self) -> str | None:
318
+ """Syntactic sugar to access aggregate text from all messages"""
319
+ if self.messages is None:
320
+ return None
321
+
322
+ text = ""
323
+ for message in self.messages:
324
+ text += message.text or ""
325
+
326
+ return text or None
327
+
328
+ @property
329
+ def attachments(self) -> list[FieldId] | None:
330
+ """Syntactic sugar to access the aggregate of attachments from all messages."""
331
+ if self.messages is None:
332
+ return None
333
+
334
+ has_attachments = False
335
+ attachments = []
336
+ for message in self.messages:
337
+ if message.attachments is None:
338
+ continue
339
+ has_attachments = True
340
+ attachments.extend(message.attachments)
341
+
342
+ if has_attachments:
343
+ return attachments
344
+ else:
345
+ return None
346
+
347
+
348
+ class AugmentedResource(Resource):
349
+ classification_labels: dict[str, list[str]] | None = None
350
+
351
+ def updated_from(self, origin: Resource):
352
+ for key in origin.model_fields.keys():
353
+ self.__setattr__(key, getattr(origin, key))
81
354
 
82
355
 
83
356
  class AugmentResponse(BaseModel):
84
- resources: dict[str, AugmentedResource]
85
- paragraphs: dict[str, AugmentedParagraph]
357
+ resources: dict[ResourceId, AugmentedResource]
358
+ fields: dict[FieldId, AugmentedField | AugmentedFileField | AugmentedConversationField]
359
+ paragraphs: dict[ParagraphId, AugmentedParagraph]
nucliadb_models/common.py CHANGED
@@ -16,7 +16,7 @@ import base64
16
16
  import hashlib
17
17
  import re
18
18
  from enum import Enum
19
- from typing import Any, Dict, List, Optional
19
+ from typing import Any
20
20
 
21
21
  from pydantic import (
22
22
  BaseModel,
@@ -38,7 +38,7 @@ FIELD_TYPE_CHAR_MAP = {
38
38
  }
39
39
 
40
40
  STORAGE_FILE_MATCH = re.compile(
41
- r"/?kbs/(?P<kbid>[^/]+)/r/(?P<rid>[^/]+)/(?P<download_type>[fe])/(?P<field_type>\w)/(?P<field_id>[^/]+)/?(?P<key>.*)?" # noqa
41
+ r"/?kbs/(?P<kbid>[^/]+)/r/(?P<rid>[^/]+)/(?P<download_type>[fe])/(?P<field_type>\w)/(?P<field_id>[^/]+)/?(?P<key>.*)?"
42
42
  )
43
43
  DOWNLOAD_TYPE_MAP = {"f": "field", "e": "extracted"}
44
44
  DOWNLOAD_URI = "/kb/{kbid}/resource/{rid}/{field_type}/{field_id}/download/{download_type}/{key}"
@@ -50,9 +50,9 @@ class ParamDefault(BaseModel):
50
50
  default: Any = None
51
51
  title: str
52
52
  description: str
53
- le: Optional[float] = None
54
- gt: Optional[float] = None
55
- max_items: Optional[int] = None
53
+ le: float | None = None
54
+ gt: float | None = None
55
+ max_items: int | None = None
56
56
  deprecated: bool = False
57
57
 
58
58
  def to_pydantic_field(self, default=_NOT_SET, **kw) -> Field: # type: ignore
@@ -86,13 +86,13 @@ class FieldID(BaseModel):
86
86
 
87
87
 
88
88
  class File(BaseModel):
89
- filename: Optional[str] = None
89
+ filename: str | None = None
90
90
  content_type: str = "application/octet-stream"
91
- payload: Optional[str] = Field(default=None, description="Base64 encoded file content")
92
- md5: Optional[str] = None
91
+ payload: str | None = Field(default=None, description="Base64 encoded file content")
92
+ md5: str | None = None
93
93
  # These are to be used for external files
94
- uri: Optional[str] = None
95
- extra_headers: Dict[str, str] = {}
94
+ uri: str | None = None
95
+ extra_headers: dict[str, str] = {}
96
96
 
97
97
  @model_validator(mode="after")
98
98
  def _check_internal_file_fields(self) -> Self:
@@ -108,7 +108,7 @@ class File(BaseModel):
108
108
  if self.md5 is None:
109
109
  # In case md5 is not supplied, compute it
110
110
  try:
111
- result = hashlib.md5(base64.b64decode(self.payload))
111
+ result = hashlib.md5(base64.b64decode(self.payload), usedforsecurity=False)
112
112
  self.md5 = result.hexdigest()
113
113
  except Exception:
114
114
  raise ValueError("MD5 could not be computed")
@@ -134,10 +134,10 @@ class FileB64(BaseModel):
134
134
 
135
135
 
136
136
  class CloudFile(BaseModel):
137
- uri: Optional[str] = None
138
- size: Optional[int] = None
139
- content_type: Optional[str] = None
140
- bucket_name: Optional[str] = None
137
+ uri: str | None = None
138
+ size: int | None = None
139
+ content_type: str | None = None
140
+ bucket_name: str | None = None
141
141
 
142
142
  class Source(Enum):
143
143
  FLAPS = "FLAPS"
@@ -146,23 +146,23 @@ class CloudFile(BaseModel):
146
146
  LOCAL = "LOCAL"
147
147
  EXTERNAL = "EXTERNAL"
148
148
 
149
- source: Optional[Source]
150
- filename: Optional[str]
151
- resumable_uri: Optional[str]
152
- offset: Optional[int]
153
- upload_uri: Optional[str]
154
- parts: Optional[List[str]]
155
- old_uri: Optional[str]
156
- old_bucket: Optional[str]
157
- md5: Optional[str]
149
+ source: Source | None
150
+ filename: str | None
151
+ resumable_uri: str | None
152
+ offset: int | None
153
+ upload_uri: str | None
154
+ parts: list[str] | None
155
+ old_uri: str | None
156
+ old_bucket: str | None
157
+ md5: str | None
158
158
 
159
159
 
160
160
  class CloudLink(BaseModel):
161
- uri: Optional[str] = None
162
- size: Optional[int] = None
163
- content_type: Optional[str] = None
164
- filename: Optional[str] = None
165
- md5: Optional[str] = None
161
+ uri: str | None = None
162
+ size: int | None = None
163
+ content_type: str | None = None
164
+ filename: str | None = None
165
+ md5: str | None = None
166
166
 
167
167
  @staticmethod
168
168
  def format_reader_download_uri(uri: str) -> str:
@@ -216,12 +216,12 @@ class FieldTypeName(str, Enum):
216
216
  class FieldRef(BaseModel):
217
217
  field_type: FieldTypeName
218
218
  field_id: str
219
- split: Optional[str] = None
219
+ split: str | None = None
220
220
 
221
221
 
222
222
  class Classification(BaseModel):
223
- labelset: str
224
- label: str
223
+ labelset: str = Field(title="The ID of the labelset")
224
+ label: str = Field(title="The label assigned from the labelset")
225
225
 
226
226
 
227
227
  class UserClassification(Classification):
@@ -229,19 +229,19 @@ class UserClassification(Classification):
229
229
 
230
230
 
231
231
  class Sentence(BaseModel):
232
- start: Optional[int] = None
233
- end: Optional[int] = None
234
- key: Optional[str] = None
232
+ start: int | None = None
233
+ end: int | None = None
234
+ key: str | None = None
235
235
 
236
236
 
237
237
  class PageInformation(BaseModel):
238
- page: Optional[int] = None
239
- page_with_visual: Optional[bool] = None
238
+ page: int | None = Field(default=None, title="Page Information Page")
239
+ page_with_visual: bool | None = None
240
240
 
241
241
 
242
242
  class Representation(BaseModel):
243
- is_a_table: Optional[bool] = None
244
- reference_file: Optional[str] = None
243
+ is_a_table: bool | None = None
244
+ reference_file: str | None = None
245
245
 
246
246
 
247
247
  class ParagraphRelations(BaseModel):
@@ -251,10 +251,10 @@ class ParagraphRelations(BaseModel):
251
251
 
252
252
 
253
253
  class Paragraph(BaseModel):
254
- start: Optional[int] = None
255
- end: Optional[int] = None
256
- start_seconds: Optional[List[int]] = None
257
- end_seconds: Optional[List[int]] = None
254
+ start: int | None = None
255
+ end: int | None = None
256
+ start_seconds: list[int] | None = None
257
+ end_seconds: list[int] | None = None
258
258
 
259
259
  class TypeParagraph(str, Enum):
260
260
  TEXT = "TEXT"
@@ -265,35 +265,35 @@ class Paragraph(BaseModel):
265
265
  TITLE = "TITLE"
266
266
  TABLE = "TABLE"
267
267
 
268
- kind: Optional[TypeParagraph] = None
269
- classifications: Optional[List[Classification]] = None
270
- sentences: Optional[List[Sentence]] = None
271
- key: Optional[str] = None
272
- page: Optional[PageInformation] = None
273
- representation: Optional[Representation] = None
274
- relations: Optional[ParagraphRelations] = None
268
+ kind: TypeParagraph | None = None
269
+ classifications: list[Classification] | None = None
270
+ sentences: list[Sentence] | None = None
271
+ key: str | None = None
272
+ page: PageInformation | None = None
273
+ representation: Representation | None = None
274
+ relations: ParagraphRelations | None = None
275
275
 
276
276
 
277
277
  class Shards(BaseModel):
278
- shards: Optional[List[str]] = None
278
+ shards: list[str] | None = None
279
279
 
280
280
 
281
281
  class Question(BaseModel):
282
282
  text: str
283
- language: Optional[str] = None
284
- ids_paragraphs: List[str]
283
+ language: str | None = None
284
+ ids_paragraphs: list[str]
285
285
 
286
286
 
287
287
  class Answer(BaseModel):
288
288
  text: str
289
- language: Optional[str] = None
290
- ids_paragraphs: List[str]
289
+ language: str | None = None
290
+ ids_paragraphs: list[str]
291
291
 
292
292
 
293
293
  class QuestionAnswer(BaseModel):
294
294
  question: Question
295
- answers: List[Answer]
295
+ answers: list[Answer]
296
296
 
297
297
 
298
298
  class QuestionAnswers(BaseModel):
299
- question_answer: List[QuestionAnswer]
299
+ question_answer: list[QuestionAnswer]
@@ -14,7 +14,7 @@
14
14
  #
15
15
 
16
16
  import warnings
17
- from typing import Annotated, Any, Literal, Optional, Union
17
+ from typing import Annotated, Any, Literal
18
18
 
19
19
  from pydantic import BaseModel, Field, create_model
20
20
 
@@ -28,11 +28,11 @@ class KBConfiguration(BaseModel):
28
28
  super().__init__(**data)
29
29
 
30
30
  # Do not touch this model synced on Processing side
31
- semantic_model: Optional[str] = None
32
- generative_model: Optional[str] = None
33
- ner_model: Optional[str] = None
34
- anonymization_model: Optional[str] = None
35
- visual_labeling: Optional[str] = None
31
+ semantic_model: str | None = None
32
+ generative_model: str | None = None
33
+ ner_model: str | None = None
34
+ anonymization_model: str | None = None
35
+ visual_labeling: str | None = None
36
36
 
37
37
 
38
38
  #
@@ -57,7 +57,7 @@ class FindSearchConfiguration(BaseModel):
57
57
  AskConfig = create_model(
58
58
  "AskConfig",
59
59
  **_model_fields(AskRequest, skip=["query", "search_configuration"]),
60
- query=(Optional[str], None),
60
+ query=(str | None, None),
61
61
  )
62
62
 
63
63
 
@@ -67,7 +67,7 @@ class AskSearchConfiguration(BaseModel):
67
67
 
68
68
 
69
69
  SearchConfiguration = Annotated[
70
- Union[FindSearchConfiguration, AskSearchConfiguration], Field(discriminator="kind")
70
+ FindSearchConfiguration | AskSearchConfiguration, Field(discriminator="kind")
71
71
  ]
72
72
 
73
73
  # We need this to avoid issues with pydantic and generic types defined in another module