nucliadb-models 6.9.7.post5583__py3-none-any.whl → 6.11.1.post5822__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +100 -84
  3. nucliadb_models/common.py +56 -56
  4. nucliadb_models/configuration.py +8 -8
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +25 -26
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +1 -2
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +78 -74
  12. nucliadb_models/graph/requests.py +40 -48
  13. nucliadb_models/graph/responses.py +13 -1
  14. nucliadb_models/hydration.py +48 -50
  15. nucliadb_models/internal/predict.py +7 -9
  16. nucliadb_models/internal/shards.py +2 -3
  17. nucliadb_models/labels.py +18 -11
  18. nucliadb_models/link.py +18 -19
  19. nucliadb_models/metadata.py +66 -54
  20. nucliadb_models/notifications.py +3 -3
  21. nucliadb_models/processing.py +1 -2
  22. nucliadb_models/resource.py +85 -93
  23. nucliadb_models/retrieval.py +147 -0
  24. nucliadb_models/search.py +263 -275
  25. nucliadb_models/security.py +2 -3
  26. nucliadb_models/text.py +7 -8
  27. nucliadb_models/trainset.py +1 -2
  28. nucliadb_models/utils.py +2 -3
  29. nucliadb_models/vectors.py +2 -5
  30. nucliadb_models/writer.py +56 -57
  31. {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/METADATA +1 -1
  32. nucliadb_models-6.11.1.post5822.dist-info/RECORD +41 -0
  33. {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/WHEEL +1 -1
  34. nucliadb_models-6.9.7.post5583.dist-info/RECORD +0 -40
  35. {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  from enum import Enum
16
- from typing import Optional
17
16
 
18
17
  from pydantic import BaseModel, Field
19
18
 
@@ -41,11 +40,12 @@ class AgentsFilter(BaseModel):
41
40
 
42
41
 
43
42
  class ResourceAgentsRequest(BaseModel):
44
- filters: Optional[list[AgentsFilter]] = Field(
43
+ filters: list[AgentsFilter] | None = Field(
44
+ title="Resource Agent Filters",
45
45
  default=None,
46
46
  description="Filters to apply to the agents. If None, all curently configured agents are applied.",
47
47
  )
48
- agent_ids: Optional[list[str]] = Field(
48
+ agent_ids: list[str] | None = Field(
49
49
  default=None,
50
50
  title="An optional list of Data Augmentation Agent IDs to run. If None, all configured agents that match the filters are run.",
51
51
  )
@@ -57,7 +57,7 @@ class NewTextField(BaseModel):
57
57
 
58
58
 
59
59
  class AppliedDataAugmentation(BaseModel):
60
- qas: Optional[QuestionAnswers] = Field(
60
+ qas: QuestionAnswers | None = Field(
61
61
  default=None,
62
62
  description="Question and answers generated by the Question Answers agent",
63
63
  )
@@ -13,16 +13,15 @@
13
13
  # limitations under the License.
14
14
  #
15
15
 
16
- from enum import Enum
17
16
  from typing import Annotated
18
17
 
19
18
  from pydantic import BaseModel, Field, StringConstraints, model_validator
20
- from typing_extensions import Self
19
+ from typing_extensions import Self, assert_never
21
20
 
22
21
  from nucliadb_models import filters
23
22
  from nucliadb_models.common import FieldTypeName
24
23
  from nucliadb_models.resource import ExtractedDataTypeName, Resource
25
- from nucliadb_models.search import Image, ResourceProperties
24
+ from nucliadb_models.search import ResourceProperties, TextPosition
26
25
 
27
26
  ResourceIdPattern = r"^([0-9a-f]{32}|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$"
28
27
  ResourceId = Annotated[
@@ -57,71 +56,6 @@ ParagraphId = Annotated[
57
56
  # Request
58
57
 
59
58
 
60
- class ResourceProp(str, Enum):
61
- """Superset of former `show` and `extracted` serializations options."""
62
-
63
- # `show` props
64
- BASIC = "basic"
65
- ORIGIN = "origin"
66
- EXTRA = "extra"
67
- RELATIONS = "relations"
68
- VALUES = "values"
69
- ERRORS = "errors"
70
- SECURITY = "security"
71
- # `extracted` props
72
- EXTRACTED_TEXT = "extracted_text"
73
- EXTRACTED_METADATA = "extracted_metadata"
74
- EXTRACTED_SHORTENED_METADATA = "extracted_shortened_metadata"
75
- EXTRACTED_LARGE_METADATA = "extracted_large_metadata"
76
- EXTRACTED_VECTOR = "extracted_vectors"
77
- EXTRACTED_LINK = "extracted_link"
78
- EXTRACTED_FILE = "extracted_file"
79
- EXTRACTED_QA = "extracted_question_answers"
80
- # new granular props
81
- TITLE = "title"
82
- SUMMARY = "summary"
83
- CLASSIFICATION_LABELS = "classification_labels"
84
-
85
- @classmethod
86
- def from_show_and_extracted(
87
- cls, show: list[ResourceProperties], extracted: list[ExtractedDataTypeName]
88
- ) -> list["ResourceProp"]:
89
- _show_to_prop = {
90
- ResourceProperties.BASIC: cls.BASIC,
91
- ResourceProperties.ORIGIN: cls.ORIGIN,
92
- ResourceProperties.EXTRA: cls.EXTRA,
93
- ResourceProperties.RELATIONS: cls.RELATIONS,
94
- ResourceProperties.VALUES: cls.VALUES,
95
- ResourceProperties.ERRORS: cls.ERRORS,
96
- ResourceProperties.SECURITY: cls.SECURITY,
97
- }
98
- _extracted_to_prop = {
99
- ExtractedDataTypeName.TEXT: cls.EXTRACTED_TEXT,
100
- ExtractedDataTypeName.METADATA: cls.EXTRACTED_METADATA,
101
- ExtractedDataTypeName.SHORTENED_METADATA: cls.EXTRACTED_SHORTENED_METADATA,
102
- ExtractedDataTypeName.LARGE_METADATA: cls.EXTRACTED_LARGE_METADATA,
103
- ExtractedDataTypeName.VECTOR: cls.EXTRACTED_VECTOR,
104
- ExtractedDataTypeName.LINK: cls.EXTRACTED_LINK,
105
- ExtractedDataTypeName.FILE: cls.EXTRACTED_FILE,
106
- ExtractedDataTypeName.QA: cls.EXTRACTED_QA,
107
- }
108
-
109
- props = []
110
- for s in show:
111
- show_prop = _show_to_prop.get(s)
112
- # show=extracted is not in the dict
113
- if show_prop is None:
114
- continue
115
- props.append(show_prop)
116
-
117
- if ResourceProperties.EXTRACTED in show:
118
- for e in extracted:
119
- extracted_prop = _extracted_to_prop[e]
120
- props.append(extracted_prop)
121
-
122
- return props
123
-
124
-
125
59
  class AugmentResourceFields(BaseModel):
126
60
  text: bool = False
127
61
  classification_labels: bool = False
@@ -132,8 +66,29 @@ class AugmentResourceFields(BaseModel):
132
66
  class AugmentResources(BaseModel):
133
67
  given: list[ResourceId]
134
68
 
135
- # TODO(decoupled-ask): replace this select for bool fields
136
- select: list[ResourceProp] = Field(default_factory=list)
69
+ # `show` props
70
+ basic: bool = False
71
+ origin: bool = False
72
+ extra: bool = False
73
+ relations: bool = False
74
+ values: bool = False
75
+ errors: bool = False
76
+ security: bool = False
77
+
78
+ # `extracted` props
79
+ extracted_text: bool = False
80
+ extracted_metadata: bool = False
81
+ extracted_shortened_metadata: bool = False
82
+ extracted_large_metadata: bool = False
83
+ extracted_vector: bool = False
84
+ extracted_link: bool = False
85
+ extracted_file: bool = False
86
+ extracted_qa: bool = False
87
+
88
+ # new granular props
89
+ title: bool = False
90
+ summary: bool = False
91
+ classification_labels: bool = False
137
92
 
138
93
  field_type_filter: list[FieldTypeName] | None = Field(
139
94
  default=None,
@@ -154,6 +109,51 @@ class AugmentResources(BaseModel):
154
109
 
155
110
  return self
156
111
 
112
+ def apply_show_and_extracted(
113
+ self, show: list[ResourceProperties], extracted: list[ExtractedDataTypeName]
114
+ ):
115
+ show_extracted = False
116
+ for s in show:
117
+ if s == ResourceProperties.BASIC:
118
+ self.basic = True
119
+ elif s == ResourceProperties.ORIGIN:
120
+ self.origin = True
121
+ elif s == ResourceProperties.EXTRA:
122
+ self.extra = True
123
+ elif s == ResourceProperties.RELATIONS:
124
+ self.relations = True
125
+ elif s == ResourceProperties.VALUES:
126
+ self.values = True
127
+ elif s == ResourceProperties.ERRORS:
128
+ self.errors = True
129
+ elif s == ResourceProperties.SECURITY:
130
+ self.security = True
131
+ elif s == ResourceProperties.EXTRACTED:
132
+ show_extracted = True
133
+ else: # pragma: no cover
134
+ assert_never(s)
135
+
136
+ if show_extracted:
137
+ for e in extracted:
138
+ if e == ExtractedDataTypeName.TEXT:
139
+ self.extracted_text = True
140
+ elif e == ExtractedDataTypeName.METADATA:
141
+ self.extracted_metadata = True
142
+ elif e == ExtractedDataTypeName.SHORTENED_METADATA:
143
+ self.extracted_shortened_metadata = True
144
+ elif e == ExtractedDataTypeName.LARGE_METADATA:
145
+ self.extracted_large_metadata = True
146
+ elif e == ExtractedDataTypeName.VECTOR:
147
+ self.extracted_vector = True
148
+ elif e == ExtractedDataTypeName.LINK:
149
+ self.extracted_link = True
150
+ elif e == ExtractedDataTypeName.FILE:
151
+ self.extracted_file = True
152
+ elif e == ExtractedDataTypeName.QA:
153
+ self.extracted_qa = True
154
+ else: # pragma: no cover
155
+ assert_never(s)
156
+
157
157
 
158
158
  class AugmentFields(BaseModel):
159
159
  given: list[FieldId]
@@ -162,6 +162,9 @@ class AugmentFields(BaseModel):
162
162
  classification_labels: bool = False
163
163
  entities: bool = False # also known as ners
164
164
 
165
+ # For file fields, augment the path to the thumbnail image
166
+ file_thumbnail: bool = False
167
+
165
168
  # When enabled, augment all the messages from the conversation. This is
166
169
  # incompatible with max_conversation_messages defined
167
170
  full_conversation: bool = False
@@ -205,11 +208,7 @@ class AugmentFields(BaseModel):
205
208
  return self
206
209
 
207
210
 
208
- # TODO(decoupled-ask): remove unused metadata
209
211
  class ParagraphMetadata(BaseModel):
210
- field_labels: list[str]
211
- paragraph_labels: list[str]
212
-
213
212
  is_an_image: bool
214
213
  is_a_table: bool
215
214
 
@@ -234,27 +233,29 @@ class AugmentParagraphs(BaseModel):
234
233
  neighbours_before: int = 0
235
234
  neighbours_after: int = 0
236
235
 
237
- # TODO(decoupled-ask): implement image strategy
238
236
  # paragraph extracted from an image, return an image
239
237
  source_image: bool = False
240
238
 
241
- # TODO(decoupled-ask): implement image strategy
242
239
  # paragraph extracted from a table, return table image
243
240
  table_image: bool = False
244
241
 
245
- # TODO(decoupled-ask): implement image strategy
246
242
  # return page_preview instead of table image if table image enabled
247
243
  table_prefers_page_preview: bool = False
248
244
 
249
- # TODO(decoupled-ask): implement image strategy
250
245
  # paragraph from a page, return page preview image
251
246
  page_preview_image: bool = False
252
247
 
248
+ @model_validator(mode="after")
249
+ def table_options_work_together(self) -> Self:
250
+ if not self.table_image and self.table_prefers_page_preview:
251
+ raise ValueError("`table_prefers_page_preview` can only be enabled with `table_image`")
252
+ return self
253
+
253
254
 
254
255
  class AugmentRequest(BaseModel):
255
- resources: AugmentResources | None = None
256
- fields: AugmentFields | None = None
257
- paragraphs: AugmentParagraphs | None = None
256
+ resources: list[AugmentResources] | None = Field(default=None, min_length=1)
257
+ fields: list[AugmentFields] | None = Field(default=None, min_length=1)
258
+ paragraphs: list[AugmentParagraphs] | None = Field(default=None, min_length=1)
258
259
 
259
260
 
260
261
  # Response
@@ -262,11 +263,14 @@ class AugmentRequest(BaseModel):
262
263
 
263
264
  class AugmentedParagraph(BaseModel):
264
265
  text: str | None = None
266
+ position: TextPosition | None = None
265
267
 
266
268
  neighbours_before: list[ParagraphId] | None = None
267
269
  neighbours_after: list[ParagraphId] | None = None
268
270
 
269
- image: Image | None = None
271
+ source_image: str | None = None
272
+ table_image: str | None = None
273
+ page_preview_image: str | None = None
270
274
 
271
275
 
272
276
  class AugmentedField(BaseModel):
@@ -277,7 +281,19 @@ class AugmentedField(BaseModel):
277
281
  # former ners
278
282
  entities: dict[str, list[str]] | None = None
279
283
 
280
- page_preview_image: Image | None = None
284
+
285
+ class AugmentedFileField(BaseModel):
286
+ text: str | None = None
287
+
288
+ classification_labels: dict[str, list[str]] | None = None
289
+
290
+ # former ners
291
+ entities: dict[str, list[str]] | None = None
292
+
293
+ page_preview_image: str | None = None
294
+
295
+ # Path for the download API to retrieve the file thumbnail image
296
+ thumbnail_image: str | None = None
281
297
 
282
298
 
283
299
  class AugmentedConversationMessage(BaseModel):
@@ -335,5 +351,5 @@ class AugmentedResource(Resource):
335
351
 
336
352
  class AugmentResponse(BaseModel):
337
353
  resources: dict[ResourceId, AugmentedResource]
338
- fields: dict[FieldId, AugmentedField | AugmentedConversationField]
354
+ fields: dict[FieldId, AugmentedField | AugmentedFileField | AugmentedConversationField]
339
355
  paragraphs: dict[ParagraphId, AugmentedParagraph]
nucliadb_models/common.py CHANGED
@@ -16,7 +16,7 @@ import base64
16
16
  import hashlib
17
17
  import re
18
18
  from enum import Enum
19
- from typing import Any, Dict, List, Optional
19
+ from typing import Any
20
20
 
21
21
  from pydantic import (
22
22
  BaseModel,
@@ -38,7 +38,7 @@ FIELD_TYPE_CHAR_MAP = {
38
38
  }
39
39
 
40
40
  STORAGE_FILE_MATCH = re.compile(
41
- r"/?kbs/(?P<kbid>[^/]+)/r/(?P<rid>[^/]+)/(?P<download_type>[fe])/(?P<field_type>\w)/(?P<field_id>[^/]+)/?(?P<key>.*)?" # noqa
41
+ r"/?kbs/(?P<kbid>[^/]+)/r/(?P<rid>[^/]+)/(?P<download_type>[fe])/(?P<field_type>\w)/(?P<field_id>[^/]+)/?(?P<key>.*)?"
42
42
  )
43
43
  DOWNLOAD_TYPE_MAP = {"f": "field", "e": "extracted"}
44
44
  DOWNLOAD_URI = "/kb/{kbid}/resource/{rid}/{field_type}/{field_id}/download/{download_type}/{key}"
@@ -50,9 +50,9 @@ class ParamDefault(BaseModel):
50
50
  default: Any = None
51
51
  title: str
52
52
  description: str
53
- le: Optional[float] = None
54
- gt: Optional[float] = None
55
- max_items: Optional[int] = None
53
+ le: float | None = None
54
+ gt: float | None = None
55
+ max_items: int | None = None
56
56
  deprecated: bool = False
57
57
 
58
58
  def to_pydantic_field(self, default=_NOT_SET, **kw) -> Field: # type: ignore
@@ -86,13 +86,13 @@ class FieldID(BaseModel):
86
86
 
87
87
 
88
88
  class File(BaseModel):
89
- filename: Optional[str] = None
89
+ filename: str | None = None
90
90
  content_type: str = "application/octet-stream"
91
- payload: Optional[str] = Field(default=None, description="Base64 encoded file content")
92
- md5: Optional[str] = None
91
+ payload: str | None = Field(default=None, description="Base64 encoded file content")
92
+ md5: str | None = None
93
93
  # These are to be used for external files
94
- uri: Optional[str] = None
95
- extra_headers: Dict[str, str] = {}
94
+ uri: str | None = None
95
+ extra_headers: dict[str, str] = {}
96
96
 
97
97
  @model_validator(mode="after")
98
98
  def _check_internal_file_fields(self) -> Self:
@@ -134,10 +134,10 @@ class FileB64(BaseModel):
134
134
 
135
135
 
136
136
  class CloudFile(BaseModel):
137
- uri: Optional[str] = None
138
- size: Optional[int] = None
139
- content_type: Optional[str] = None
140
- bucket_name: Optional[str] = None
137
+ uri: str | None = None
138
+ size: int | None = None
139
+ content_type: str | None = None
140
+ bucket_name: str | None = None
141
141
 
142
142
  class Source(Enum):
143
143
  FLAPS = "FLAPS"
@@ -146,23 +146,23 @@ class CloudFile(BaseModel):
146
146
  LOCAL = "LOCAL"
147
147
  EXTERNAL = "EXTERNAL"
148
148
 
149
- source: Optional[Source]
150
- filename: Optional[str]
151
- resumable_uri: Optional[str]
152
- offset: Optional[int]
153
- upload_uri: Optional[str]
154
- parts: Optional[List[str]]
155
- old_uri: Optional[str]
156
- old_bucket: Optional[str]
157
- md5: Optional[str]
149
+ source: Source | None
150
+ filename: str | None
151
+ resumable_uri: str | None
152
+ offset: int | None
153
+ upload_uri: str | None
154
+ parts: list[str] | None
155
+ old_uri: str | None
156
+ old_bucket: str | None
157
+ md5: str | None
158
158
 
159
159
 
160
160
  class CloudLink(BaseModel):
161
- uri: Optional[str] = None
162
- size: Optional[int] = None
163
- content_type: Optional[str] = None
164
- filename: Optional[str] = None
165
- md5: Optional[str] = None
161
+ uri: str | None = None
162
+ size: int | None = None
163
+ content_type: str | None = None
164
+ filename: str | None = None
165
+ md5: str | None = None
166
166
 
167
167
  @staticmethod
168
168
  def format_reader_download_uri(uri: str) -> str:
@@ -216,12 +216,12 @@ class FieldTypeName(str, Enum):
216
216
  class FieldRef(BaseModel):
217
217
  field_type: FieldTypeName
218
218
  field_id: str
219
- split: Optional[str] = None
219
+ split: str | None = None
220
220
 
221
221
 
222
222
  class Classification(BaseModel):
223
- labelset: str
224
- label: str
223
+ labelset: str = Field(title="The ID of the labelset")
224
+ label: str = Field(title="The label assigned from the labelset")
225
225
 
226
226
 
227
227
  class UserClassification(Classification):
@@ -229,19 +229,19 @@ class UserClassification(Classification):
229
229
 
230
230
 
231
231
  class Sentence(BaseModel):
232
- start: Optional[int] = None
233
- end: Optional[int] = None
234
- key: Optional[str] = None
232
+ start: int | None = None
233
+ end: int | None = None
234
+ key: str | None = None
235
235
 
236
236
 
237
237
  class PageInformation(BaseModel):
238
- page: Optional[int] = None
239
- page_with_visual: Optional[bool] = None
238
+ page: int | None = Field(default=None, title="Page Information Page")
239
+ page_with_visual: bool | None = None
240
240
 
241
241
 
242
242
  class Representation(BaseModel):
243
- is_a_table: Optional[bool] = None
244
- reference_file: Optional[str] = None
243
+ is_a_table: bool | None = None
244
+ reference_file: str | None = None
245
245
 
246
246
 
247
247
  class ParagraphRelations(BaseModel):
@@ -251,10 +251,10 @@ class ParagraphRelations(BaseModel):
251
251
 
252
252
 
253
253
  class Paragraph(BaseModel):
254
- start: Optional[int] = None
255
- end: Optional[int] = None
256
- start_seconds: Optional[List[int]] = None
257
- end_seconds: Optional[List[int]] = None
254
+ start: int | None = None
255
+ end: int | None = None
256
+ start_seconds: list[int] | None = None
257
+ end_seconds: list[int] | None = None
258
258
 
259
259
  class TypeParagraph(str, Enum):
260
260
  TEXT = "TEXT"
@@ -265,35 +265,35 @@ class Paragraph(BaseModel):
265
265
  TITLE = "TITLE"
266
266
  TABLE = "TABLE"
267
267
 
268
- kind: Optional[TypeParagraph] = None
269
- classifications: Optional[List[Classification]] = None
270
- sentences: Optional[List[Sentence]] = None
271
- key: Optional[str] = None
272
- page: Optional[PageInformation] = None
273
- representation: Optional[Representation] = None
274
- relations: Optional[ParagraphRelations] = None
268
+ kind: TypeParagraph | None = None
269
+ classifications: list[Classification] | None = None
270
+ sentences: list[Sentence] | None = None
271
+ key: str | None = None
272
+ page: PageInformation | None = None
273
+ representation: Representation | None = None
274
+ relations: ParagraphRelations | None = None
275
275
 
276
276
 
277
277
  class Shards(BaseModel):
278
- shards: Optional[List[str]] = None
278
+ shards: list[str] | None = None
279
279
 
280
280
 
281
281
  class Question(BaseModel):
282
282
  text: str
283
- language: Optional[str] = None
284
- ids_paragraphs: List[str]
283
+ language: str | None = None
284
+ ids_paragraphs: list[str]
285
285
 
286
286
 
287
287
  class Answer(BaseModel):
288
288
  text: str
289
- language: Optional[str] = None
290
- ids_paragraphs: List[str]
289
+ language: str | None = None
290
+ ids_paragraphs: list[str]
291
291
 
292
292
 
293
293
  class QuestionAnswer(BaseModel):
294
294
  question: Question
295
- answers: List[Answer]
295
+ answers: list[Answer]
296
296
 
297
297
 
298
298
  class QuestionAnswers(BaseModel):
299
- question_answer: List[QuestionAnswer]
299
+ question_answer: list[QuestionAnswer]
@@ -14,7 +14,7 @@
14
14
  #
15
15
 
16
16
  import warnings
17
- from typing import Annotated, Any, Literal, Optional, Union
17
+ from typing import Annotated, Any, Literal
18
18
 
19
19
  from pydantic import BaseModel, Field, create_model
20
20
 
@@ -28,11 +28,11 @@ class KBConfiguration(BaseModel):
28
28
  super().__init__(**data)
29
29
 
30
30
  # Do not touch this model synced on Processing side
31
- semantic_model: Optional[str] = None
32
- generative_model: Optional[str] = None
33
- ner_model: Optional[str] = None
34
- anonymization_model: Optional[str] = None
35
- visual_labeling: Optional[str] = None
31
+ semantic_model: str | None = None
32
+ generative_model: str | None = None
33
+ ner_model: str | None = None
34
+ anonymization_model: str | None = None
35
+ visual_labeling: str | None = None
36
36
 
37
37
 
38
38
  #
@@ -57,7 +57,7 @@ class FindSearchConfiguration(BaseModel):
57
57
  AskConfig = create_model(
58
58
  "AskConfig",
59
59
  **_model_fields(AskRequest, skip=["query", "search_configuration"]),
60
- query=(Optional[str], None),
60
+ query=(str | None, None),
61
61
  )
62
62
 
63
63
 
@@ -67,7 +67,7 @@ class AskSearchConfiguration(BaseModel):
67
67
 
68
68
 
69
69
  SearchConfiguration = Annotated[
70
- Union[FindSearchConfiguration, AskSearchConfiguration], Field(discriminator="kind")
70
+ FindSearchConfiguration | AskSearchConfiguration, Field(discriminator="kind")
71
71
  ]
72
72
 
73
73
  # We need this to avoid issues with pydantic and generic types defined in another module
@@ -14,7 +14,6 @@
14
14
 
15
15
 
16
16
  import mimetypes
17
- from typing import Optional
18
17
 
19
18
  GENERIC_MIME_TYPE = "application/generic"
20
19
 
@@ -26,7 +25,9 @@ NUCLIA_CUSTOM_CONTENT_TYPES = {
26
25
 
27
26
  EXTRA_VALID_CONTENT_TYPES = {
28
27
  "application/font-woff",
28
+ "application/javascript",
29
29
  "application/mp4",
30
+ "application/rtf",
30
31
  "application/toml",
31
32
  "application/vnd.jgraph.mxfile",
32
33
  "application/vnd.ms-excel.sheet.macroenabled.12",
@@ -38,6 +39,7 @@ EXTRA_VALID_CONTENT_TYPES = {
38
39
  "application/x-git",
39
40
  "application/x-gzip",
40
41
  "application/x-iwork-pages-sffpages",
42
+ "application/x-javascript",
41
43
  "application/x-mach-binary",
42
44
  "application/x-mobipocket-ebook",
43
45
  "application/x-ms-shortcut",
@@ -46,10 +48,15 @@ EXTRA_VALID_CONTENT_TYPES = {
46
48
  "application/x-openscad",
47
49
  "application/x-sql",
48
50
  "application/x-zip-compressed",
51
+ "application/x-zip",
49
52
  "application/zstd",
53
+ "audio/m4a",
50
54
  "audio/vnd.dlna.adts",
51
55
  "audio/wav",
52
56
  "audio/x-m4a",
57
+ "image/svg+xml",
58
+ "image/tif",
59
+ "image/x-ico",
53
60
  "model/stl",
54
61
  "multipart/form-data",
55
62
  "text/jsx",
@@ -58,26 +65,21 @@ EXTRA_VALID_CONTENT_TYPES = {
58
65
  "text/rtf",
59
66
  "text/x-c++",
60
67
  "text/x-java-source",
68
+ "text/x-javascript",
61
69
  "text/x-log",
62
70
  "text/x-python-script",
63
71
  "text/x-ruby-script",
64
72
  "text/yaml",
65
- "video/x-m4v",
66
- "video/YouTube",
67
- "image/tif",
73
+ "video/mkv",
68
74
  "video/qt",
69
75
  "video/webp",
70
- "application/rtf",
71
- "application/x-zip",
72
- "video/mkv",
73
- "image/x-ico",
74
- "audio/m4a",
75
- "image/svg+xml",
76
+ "video/x-m4v",
76
77
  "video/x-msvideo",
78
+ "video/YouTube",
77
79
  } | NUCLIA_CUSTOM_CONTENT_TYPES
78
80
 
79
81
 
80
- def guess(filename: str) -> Optional[str]:
82
+ def guess(filename: str) -> str | None:
81
83
  """
82
84
  Guess the content type of a file based on its filename.
83
85
  Returns None if the content type could not be guessed.