nucliadb-models 6.9.6.post5453__py3-none-any.whl → 6.11.1.post5822__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +294 -24
  3. nucliadb_models/common.py +57 -57
  4. nucliadb_models/configuration.py +8 -8
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +25 -26
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +1 -2
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +79 -75
  12. nucliadb_models/graph/requests.py +40 -48
  13. nucliadb_models/graph/responses.py +13 -1
  14. nucliadb_models/hydration.py +48 -50
  15. nucliadb_models/internal/predict.py +7 -9
  16. nucliadb_models/internal/shards.py +2 -3
  17. nucliadb_models/labels.py +18 -11
  18. nucliadb_models/link.py +18 -19
  19. nucliadb_models/metadata.py +66 -54
  20. nucliadb_models/notifications.py +3 -3
  21. nucliadb_models/processing.py +1 -2
  22. nucliadb_models/resource.py +85 -102
  23. nucliadb_models/retrieval.py +147 -0
  24. nucliadb_models/search.py +300 -276
  25. nucliadb_models/security.py +2 -3
  26. nucliadb_models/text.py +7 -8
  27. nucliadb_models/trainset.py +1 -2
  28. nucliadb_models/utils.py +2 -3
  29. nucliadb_models/vectors.py +2 -5
  30. nucliadb_models/writer.py +56 -57
  31. {nucliadb_models-6.9.6.post5453.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/METADATA +1 -1
  32. nucliadb_models-6.11.1.post5822.dist-info/RECORD +41 -0
  33. {nucliadb_models-6.9.6.post5453.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/WHEEL +1 -1
  34. nucliadb_models-6.9.6.post5453.dist-info/RECORD +0 -40
  35. {nucliadb_models-6.9.6.post5453.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/top_level.txt +0 -0
nucliadb_models/search.py CHANGED
@@ -14,19 +14,19 @@
14
14
  #
15
15
  import json
16
16
  from enum import Enum
17
- from typing import Any, Literal, Optional, Union
17
+ from typing import Annotated, Any, Literal
18
+ from uuid import UUID
18
19
 
19
20
  from pydantic import BaseModel, Field, field_validator, model_validator
20
21
  from pydantic.aliases import AliasChoices
21
22
  from pydantic.json_schema import SkipJsonSchema
22
- from typing_extensions import Annotated, Self
23
+ from typing_extensions import Self
23
24
 
24
25
  from nucliadb_models import RelationMetadata
25
26
  from nucliadb_models.common import FieldTypeName, ParamDefault
26
27
  from nucliadb_models.graph.requests import GraphPathQuery
27
28
 
28
29
  # Bw/c import to avoid breaking users
29
- # noqa isort: skip
30
30
  from nucliadb_models.metadata import RelationNodeType, RelationType, ResourceProcessingStatus
31
31
  from nucliadb_models.resource import ExtractedDataTypeName, Resource
32
32
  from nucliadb_models.security import RequestSecurity
@@ -152,12 +152,12 @@ FacetsResult = dict[str, Any]
152
152
 
153
153
 
154
154
  class TextPosition(BaseModel):
155
- page_number: Optional[int] = None
155
+ page_number: int | None = None
156
156
  index: int
157
157
  start: int
158
158
  end: int
159
- start_seconds: Optional[list[int]] = None
160
- end_seconds: Optional[list[int]] = None
159
+ start_seconds: list[int] | None = None
160
+ end_seconds: list[int] | None = None
161
161
 
162
162
 
163
163
  class Sentence(BaseModel):
@@ -166,8 +166,8 @@ class Sentence(BaseModel):
166
166
  text: str
167
167
  field_type: str
168
168
  field: str
169
- index: Optional[str] = None
170
- position: Optional[TextPosition] = None
169
+ index: str | None = None
170
+ position: TextPosition | None = None
171
171
 
172
172
 
173
173
  class Sentences(BaseModel):
@@ -177,7 +177,7 @@ class Sentences(BaseModel):
177
177
  page_size: int = 20
178
178
  min_score: float = Field(
179
179
  title="Minimum score",
180
- description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.", # noqa: E501
180
+ description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
181
181
  )
182
182
 
183
183
 
@@ -188,45 +188,45 @@ class Paragraph(BaseModel):
188
188
  field: str
189
189
  text: str
190
190
  labels: list[str] = []
191
- start_seconds: Optional[list[int]] = None
192
- end_seconds: Optional[list[int]] = None
193
- position: Optional[TextPosition] = None
191
+ start_seconds: list[int] | None = None
192
+ end_seconds: list[int] | None = None
193
+ position: TextPosition | None = None
194
194
  fuzzy_result: bool = False
195
195
 
196
196
 
197
197
  class Paragraphs(BaseModel):
198
198
  results: list[Paragraph] = []
199
- facets: Optional[FacetsResult] = None
200
- query: Optional[str] = None
199
+ facets: FacetsResult | None = None
200
+ query: str | None = Field(default=None, title="Paragraphs Query")
201
201
  total: int = 0
202
202
  page_number: int = 0
203
203
  page_size: int = 20
204
204
  next_page: bool = False
205
205
  min_score: float = Field(
206
206
  title="Minimum score",
207
- description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.", # noqa: E501
207
+ description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
208
208
  )
209
209
 
210
210
 
211
211
  class ResourceResult(BaseModel):
212
- score: Union[float, int]
212
+ score: float | int
213
213
  rid: str
214
214
  field_type: str
215
215
  field: str
216
- labels: Optional[list[str]] = None
216
+ labels: list[str] | None = None
217
217
 
218
218
 
219
219
  class Resources(BaseModel):
220
220
  results: list[ResourceResult]
221
- facets: Optional[FacetsResult] = None
222
- query: Optional[str] = None
221
+ facets: FacetsResult | None = None
222
+ query: str | None = Field(default=None, title="Resources Query")
223
223
  total: int = 0
224
224
  page_number: int = 0
225
225
  page_size: int = 20
226
226
  next_page: bool = False
227
227
  min_score: float = Field(
228
228
  title="Minimum score",
229
- description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.", # noqa: E501
229
+ description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
230
230
  )
231
231
 
232
232
 
@@ -246,7 +246,7 @@ class DirectionalRelation(BaseModel):
246
246
  relation: RelationType
247
247
  relation_label: str
248
248
  direction: RelationDirection
249
- metadata: Optional[RelationMetadata] = None
249
+ metadata: RelationMetadata | None = None
250
250
  resource_id: str
251
251
 
252
252
 
@@ -280,23 +280,23 @@ class RelatedEntities(BaseModel):
280
280
  class ResourceSearchResults(JsonBaseModel):
281
281
  """Search on resource results"""
282
282
 
283
- sentences: Optional[Sentences] = None
284
- paragraphs: Optional[Paragraphs] = None
285
- relations: Optional[Relations] = None
286
- nodes: Optional[list[dict[str, str]]] = None
287
- shards: Optional[list[str]] = None
283
+ sentences: Sentences | None = None
284
+ paragraphs: Paragraphs | None = None
285
+ relations: Relations | None = None
286
+ nodes: list[dict[str, str]] | None = None
287
+ shards: list[str] | None = None
288
288
 
289
289
 
290
290
  class KnowledgeboxSearchResults(JsonBaseModel):
291
291
  """Search on knowledgebox results"""
292
292
 
293
293
  resources: dict[str, Resource] = {}
294
- sentences: Optional[Sentences] = None
295
- paragraphs: Optional[Paragraphs] = None
296
- fulltext: Optional[Resources] = None
297
- relations: Optional[Relations] = None
298
- nodes: Optional[list[dict[str, str]]] = None
299
- shards: Optional[list[str]] = None
294
+ sentences: Sentences | None = None
295
+ paragraphs: Paragraphs | None = None
296
+ fulltext: Resources | None = None
297
+ relations: Relations | None = None
298
+ nodes: list[dict[str, str]] | None = None
299
+ shards: list[str] | None = None
300
300
 
301
301
  # TODO: remove on a future major release
302
302
  autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
@@ -306,16 +306,16 @@ class CatalogResponse(BaseModel):
306
306
  """Catalog results"""
307
307
 
308
308
  resources: dict[str, Resource] = {}
309
- fulltext: Optional[Resources] = None
310
- shards: Optional[list[str]] = None
309
+ fulltext: Resources | None = None
310
+ shards: list[str] | None = None
311
311
 
312
312
 
313
313
  class KnowledgeboxSuggestResults(JsonBaseModel):
314
314
  """Suggest on resource results"""
315
315
 
316
- paragraphs: Optional[Paragraphs] = None
317
- entities: Optional[RelatedEntities] = None
318
- shards: Optional[list[str]] = None
316
+ paragraphs: Paragraphs | None = None
317
+ entities: RelatedEntities | None = None
318
+ shards: list[str] | None = None
319
319
 
320
320
 
321
321
  class KnowledgeboxCounters(BaseModel):
@@ -323,7 +323,7 @@ class KnowledgeboxCounters(BaseModel):
323
323
  paragraphs: int
324
324
  fields: int
325
325
  sentences: int
326
- shards: Optional[list[str]] = None
326
+ shards: list[str] | None = None
327
327
  index_size: float = Field(default=0.0, title="Index size (bytes)")
328
328
 
329
329
 
@@ -378,13 +378,13 @@ class ReciprocalRankFusion(_BaseRankFusion):
378
378
  k: float = Field(
379
379
  default=60.0,
380
380
  title="RRF k parameter",
381
- description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets", # noqa: E501
381
+ description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
382
382
  )
383
- window: Optional[int] = Field(
383
+ window: int | None = Field(
384
384
  default=None,
385
385
  le=MAX_RANK_FUSION_WINDOW,
386
386
  title="RRF window",
387
- description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time", # noqa: E501
387
+ description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
388
388
  )
389
389
  boosting: ReciprocalRankFusionWeights = Field(
390
390
  default_factory=ReciprocalRankFusionWeights,
@@ -395,12 +395,12 @@ Define different weights for each retriever. This allows to assign different pri
395
395
  The default is 1 for each retriever, which means no extra boost for any of them. Weights below 0 can be used for negative boosting.
396
396
 
397
397
  This kind of boosting can be useful in multilingual search, for example, where keyword search may not give good results and can degrade the final search experience
398
- """, # noqa: E501
398
+ """,
399
399
  )
400
400
 
401
401
 
402
402
  RankFusion = Annotated[
403
- Union[ReciprocalRankFusion],
403
+ ReciprocalRankFusion,
404
404
  Field(discriminator="name"),
405
405
  ]
406
406
 
@@ -435,15 +435,15 @@ class _BaseReranker(BaseModel):
435
435
 
436
436
  class PredictReranker(_BaseReranker):
437
437
  name: Literal[RerankerName.PREDICT_RERANKER] = RerankerName.PREDICT_RERANKER
438
- window: Optional[int] = Field(
438
+ window: int | None = Field(
439
439
  default=None,
440
440
  le=200,
441
441
  title="Reranker window",
442
- description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k", # noqa: E501
442
+ description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
443
443
  )
444
444
 
445
445
 
446
- Reranker = Annotated[Union[PredictReranker], Field(discriminator="name")]
446
+ Reranker = Annotated[PredictReranker, Field(discriminator="name")]
447
447
 
448
448
 
449
449
  class KnowledgeBoxCount(BaseModel):
@@ -472,18 +472,18 @@ class SearchParamDefaults:
472
472
  )
473
473
  filters = ParamDefault(
474
474
  default=[],
475
- title="Filters",
476
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
475
+ title="Search Filters",
476
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
477
477
  )
478
478
  resource_filters = ParamDefault(
479
479
  default=[],
480
480
  title="Resources filter",
481
- description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.", # noqa: E501
481
+ description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
482
482
  )
483
483
  faceted = ParamDefault(
484
484
  default=[],
485
485
  title="Faceted",
486
- description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
486
+ description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
487
487
  max_items=50,
488
488
  )
489
489
  chat_query = ParamDefault(
@@ -520,12 +520,12 @@ class SearchParamDefaults:
520
520
  highlight = ParamDefault(
521
521
  default=False,
522
522
  title="Highlight",
523
- description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags", # noqa: E501
523
+ description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
524
524
  )
525
525
  with_duplicates = ParamDefault(
526
526
  default=False,
527
527
  title="With duplicate paragraphs",
528
- description="Whether to return duplicate paragraphs on the same document", # noqa: E501
528
+ description="Whether to return duplicate paragraphs on the same document",
529
529
  )
530
530
  with_status = ParamDefault(
531
531
  default=None,
@@ -535,7 +535,7 @@ class SearchParamDefaults:
535
535
  with_synonyms = ParamDefault(
536
536
  default=False,
537
537
  title="With custom synonyms",
538
- description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.", # noqa: E501
538
+ description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
539
539
  )
540
540
  sort_order = ParamDefault(
541
541
  default=SortOrder.DESC,
@@ -565,12 +565,12 @@ class SearchParamDefaults:
565
565
  reranker = ParamDefault(
566
566
  default=RerankerName.PREDICT_RERANKER,
567
567
  title="Reranker",
568
- description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval", # noqa: E501
568
+ description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
569
569
  )
570
570
  debug = ParamDefault(
571
571
  default=False,
572
572
  title="Debug mode",
573
- description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.", # noqa: E501
573
+ description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
574
574
  )
575
575
  show = ParamDefault(
576
576
  default=[ResourceProperties.BASIC],
@@ -591,27 +591,27 @@ class SearchParamDefaults:
591
591
  range_creation_start = ParamDefault(
592
592
  default=None,
593
593
  title="Resource creation range start",
594
- description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
594
+ description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
595
595
  )
596
596
  range_creation_end = ParamDefault(
597
597
  default=None,
598
598
  title="Resource creation range end",
599
- description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
599
+ description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
600
600
  )
601
601
  range_modification_start = ParamDefault(
602
602
  default=None,
603
603
  title="Resource modification range start",
604
- description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
604
+ description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
605
605
  )
606
606
  range_modification_end = ParamDefault(
607
607
  default=None,
608
608
  title="Resource modification range end",
609
- description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
609
+ description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
610
610
  )
611
611
  vector = ParamDefault(
612
612
  default=None,
613
613
  title="Search Vector",
614
- description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.", # noqa: E501
614
+ description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
615
615
  )
616
616
  vectorset = ParamDefault(
617
617
  default=None,
@@ -627,12 +627,12 @@ class SearchParamDefaults:
627
627
  chat_history = ParamDefault(
628
628
  default=None,
629
629
  title="Chat history",
630
- description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.", # noqa: E501
630
+ description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
631
631
  )
632
632
  chat_features = ParamDefault(
633
633
  default=[ChatOptions.SEMANTIC, ChatOptions.KEYWORD],
634
634
  title="Chat features",
635
- description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead", # noqa: E501
635
+ description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
636
636
  )
637
637
  suggest_features = ParamDefault(
638
638
  default=[
@@ -645,17 +645,17 @@ class SearchParamDefaults:
645
645
  security = ParamDefault(
646
646
  default=None,
647
647
  title="Security",
648
- description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.", # noqa: E501
648
+ description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
649
649
  )
650
650
  security_groups = ParamDefault(
651
651
  default=[],
652
652
  title="Security groups",
653
- description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.", # noqa: E501
653
+ description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
654
654
  )
655
655
  rephrase = ParamDefault(
656
656
  default=False,
657
657
  title="Rephrase query consuming LLMs",
658
- description="Rephrase query consuming LLMs - it will make the query slower", # noqa: E501
658
+ description="Rephrase query consuming LLMs - it will make the query slower",
659
659
  )
660
660
  prefer_markdown = ParamDefault(
661
661
  default=False,
@@ -695,10 +695,10 @@ class SearchParamDefaults:
695
695
 
696
696
 
697
697
  class Filter(BaseModel):
698
- all: Optional[list[str]] = Field(default=None, min_length=1)
699
- any: Optional[list[str]] = Field(default=None, min_length=1)
700
- none: Optional[list[str]] = Field(default=None, min_length=1)
701
- not_all: Optional[list[str]] = Field(default=None, min_length=1)
698
+ all: list[str] | None = Field(default=None, min_length=1)
699
+ any: list[str] | None = Field(default=None, min_length=1)
700
+ none: list[str] | None = Field(default=None, min_length=1)
701
+ not_all: list[str] | None = Field(default=None, min_length=1)
702
702
 
703
703
  @model_validator(mode="after")
704
704
  def validate_filter(self) -> Self:
@@ -740,19 +740,19 @@ class CatalogQuery(BaseModel):
740
740
 
741
741
 
742
742
  class CatalogRequest(BaseModel):
743
- query: Union[str, CatalogQuery] = ParamDefault(
743
+ query: str | CatalogQuery = ParamDefault(
744
744
  default="",
745
- title="Query",
745
+ title="Catalog Request Query",
746
746
  description="The query to search for",
747
747
  ).to_pydantic_field()
748
- filter_expression: Optional[CatalogFilterExpression] = (
748
+ filter_expression: CatalogFilterExpression | None = (
749
749
  SearchParamDefaults.catalog_filter_expression.to_pydantic_field()
750
750
  )
751
751
  faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
752
- sort: Optional[SortOptions] = SearchParamDefaults.sort.to_pydantic_field()
752
+ sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
753
753
  page_number: int = SearchParamDefaults.catalog_page_number.to_pydantic_field()
754
754
  page_size: int = SearchParamDefaults.catalog_page_size.to_pydantic_field()
755
- hidden: Optional[bool] = SearchParamDefaults.hidden.to_pydantic_field()
755
+ hidden: bool | None = SearchParamDefaults.hidden.to_pydantic_field()
756
756
  show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field(
757
757
  default=[ResourceProperties.BASIC, ResourceProperties.ERRORS]
758
758
  )
@@ -760,32 +760,30 @@ class CatalogRequest(BaseModel):
760
760
  debug: SkipJsonSchema[bool] = SearchParamDefaults.debug.to_pydantic_field()
761
761
 
762
762
  # Deprecated filter parameters
763
- filters: Union[list[str], list[Filter]] = Field(
763
+ filters: list[str] | list[Filter] = Field(
764
764
  default=[],
765
- title="Filters",
766
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
765
+ title="Catalog Filters",
766
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
767
767
  deprecated="Use filter_expression instead",
768
768
  )
769
- with_status: Optional[ResourceProcessingStatus] = Field(
769
+ with_status: ResourceProcessingStatus | None = Field(
770
770
  default=None,
771
771
  title="With processing status",
772
772
  description="Filter results by resource processing status",
773
773
  deprecated="Use filter_expression instead",
774
774
  )
775
- range_creation_start: Optional[DateTime] = (
776
- SearchParamDefaults.range_creation_start.to_pydantic_field(
777
- deprecated="Use filter_expression instead",
778
- )
775
+ range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field(
776
+ deprecated="Use filter_expression instead",
779
777
  )
780
- range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field(
778
+ range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field(
781
779
  deprecated="Use filter_expression instead",
782
780
  )
783
- range_modification_start: Optional[DateTime] = (
781
+ range_modification_start: DateTime | None = (
784
782
  SearchParamDefaults.range_modification_start.to_pydantic_field(
785
783
  deprecated="Use filter_expression instead",
786
784
  )
787
785
  )
788
- range_modification_end: Optional[DateTime] = (
786
+ range_modification_end: DateTime | None = (
789
787
  SearchParamDefaults.range_modification_end.to_pydantic_field(
790
788
  deprecated="Use filter_expression instead",
791
789
  )
@@ -798,15 +796,15 @@ class CatalogRequest(BaseModel):
798
796
 
799
797
 
800
798
  class MinScore(BaseModel):
801
- semantic: Optional[float] = Field(
799
+ semantic: float | None = Field(
802
800
  default=None,
803
801
  title="Minimum semantic score",
804
- description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
802
+ description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
805
803
  )
806
804
  bm25: float = Field(
807
805
  default=0,
808
806
  title="Minimum bm25 score",
809
- description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
807
+ description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
810
808
  ge=0,
811
809
  )
812
810
 
@@ -820,7 +818,7 @@ AUDIT_METADATA_MAX_BYTES = 1024 * 10 # 10KB
820
818
 
821
819
 
822
820
  class AuditMetadataBase(BaseModel):
823
- audit_metadata: Optional[dict[str, str]] = Field(
821
+ audit_metadata: dict[str, str] | None = Field(
824
822
  default=None,
825
823
  title="Audit metadata",
826
824
  description=(
@@ -844,29 +842,27 @@ class AuditMetadataBase(BaseModel):
844
842
 
845
843
  class BaseSearchRequest(AuditMetadataBase):
846
844
  query: str = SearchParamDefaults.query.to_pydantic_field()
847
- filter_expression: Optional[FilterExpression] = (
845
+ filter_expression: FilterExpression | None = (
848
846
  SearchParamDefaults.filter_expression.to_pydantic_field()
849
847
  )
850
848
  fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
851
- filters: Union[list[str], list[Filter]] = Field(
849
+ filters: list[str] | list[Filter] = Field(
852
850
  default=[],
853
- title="Filters",
854
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
851
+ title="Search Filters",
852
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
855
853
  )
856
854
  top_k: int = SearchParamDefaults.top_k.to_pydantic_field()
857
- min_score: Optional[Union[float, MinScore]] = Field(
855
+ min_score: float | MinScore | None = Field(
858
856
  default=None,
859
857
  title="Minimum score",
860
- description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.", # noqa: E501
861
- )
862
- range_creation_start: Optional[DateTime] = (
863
- SearchParamDefaults.range_creation_start.to_pydantic_field()
858
+ description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
864
859
  )
865
- range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
866
- range_modification_start: Optional[DateTime] = (
860
+ range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
861
+ range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
862
+ range_modification_start: DateTime | None = (
867
863
  SearchParamDefaults.range_modification_start.to_pydantic_field()
868
864
  )
869
- range_modification_end: Optional[DateTime] = (
865
+ range_modification_end: DateTime | None = (
870
866
  SearchParamDefaults.range_modification_end.to_pydantic_field()
871
867
  )
872
868
  debug: bool = SearchParamDefaults.debug.to_pydantic_field()
@@ -874,15 +870,15 @@ class BaseSearchRequest(AuditMetadataBase):
874
870
  show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
875
871
  field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
876
872
  extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
877
- vector: Optional[list[float]] = SearchParamDefaults.vector.to_pydantic_field()
878
- vectorset: Optional[str] = SearchParamDefaults.vectorset.to_pydantic_field()
873
+ vector: list[float] | None = SearchParamDefaults.vector.to_pydantic_field()
874
+ vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
879
875
  with_duplicates: bool = SearchParamDefaults.with_duplicates.to_pydantic_field()
880
876
  with_synonyms: bool = SearchParamDefaults.with_synonyms.to_pydantic_field()
881
877
  # autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
882
878
  # avoid breaking changes in the python sdks. Please remove on a future major release.
883
879
  autofilter: SkipJsonSchema[bool] = False
884
880
  resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
885
- security: Optional[RequestSecurity] = SearchParamDefaults.security.to_pydantic_field()
881
+ security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
886
882
  show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
887
883
 
888
884
  rephrase: bool = Field(
@@ -892,7 +888,7 @@ class BaseSearchRequest(AuditMetadataBase):
892
888
  ),
893
889
  )
894
890
 
895
- rephrase_prompt: Optional[str] = Field(
891
+ rephrase_prompt: str | None = Field(
896
892
  default=None,
897
893
  title="Rephrase",
898
894
  description=(
@@ -911,7 +907,7 @@ Please return ONLY the question without any explanation. Just the rephrased ques
911
907
  Please return ONLY the question without any explanation.""",
912
908
  ],
913
909
  )
914
- query_image: Optional[Image] = Field(
910
+ query_image: Image | None = Field(
915
911
  default=None,
916
912
  title="Query image",
917
913
  description="Image that will be used together with the query text for retrieval.",
@@ -931,6 +927,13 @@ Please return ONLY the question without any explanation. Just the rephrased ques
931
927
  values["top_k"] = SearchParamDefaults.top_k.default
932
928
  return values
933
929
 
930
+ @field_validator("resource_filters", mode="after")
931
+ def validate_resource_filters(cls, values: list[str]) -> list[str]:
932
+ if values is not None:
933
+ for v in values:
934
+ _validate_resource_filter(v)
935
+ return values
936
+
934
937
 
935
938
  class SearchRequest(BaseSearchRequest):
936
939
  features: list[SearchOptions] = SearchParamDefaults.search_features.to_pydantic_field(
@@ -941,7 +944,7 @@ class SearchRequest(BaseSearchRequest):
941
944
  ]
942
945
  )
943
946
  faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
944
- sort: Optional[SortOptions] = SearchParamDefaults.sort.to_pydantic_field()
947
+ sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
945
948
  offset: int = SearchParamDefaults.offset.to_pydantic_field()
946
949
 
947
950
  @field_validator("faceted")
@@ -962,7 +965,7 @@ class SearchRequest(BaseSearchRequest):
962
965
 
963
966
  @field_validator("sort", mode="after")
964
967
  @classmethod
965
- def sorting_by_title_not_supported(cls, value: Optional[SortOptions]) -> Optional[SortOptions]:
968
+ def sorting_by_title_not_supported(cls, value: SortOptions | None) -> SortOptions | None:
966
969
  if value and value.field == SortField.TITLE:
967
970
  raise ValueError("sorting by title not supported in /search")
968
971
 
@@ -988,19 +991,19 @@ class UserPrompt(BaseModel):
988
991
 
989
992
 
990
993
  class MaxTokens(BaseModel):
991
- context: Optional[int] = Field(
994
+ context: int | None = Field(
992
995
  default=None,
993
996
  title="Maximum context tokens",
994
997
  description="Use to limit the amount of tokens used in the LLM context",
995
998
  )
996
- answer: Optional[int] = Field(
999
+ answer: int | None = Field(
997
1000
  default=None,
998
1001
  title="Maximum answer tokens",
999
1002
  description="Use to limit the amount of tokens used in the LLM answer",
1000
1003
  )
1001
1004
 
1002
1005
 
1003
- def parse_max_tokens(max_tokens: Optional[Union[int, MaxTokens]]) -> Optional[MaxTokens]:
1006
+ def parse_max_tokens(max_tokens: int | MaxTokens | None) -> MaxTokens | None:
1004
1007
  if isinstance(max_tokens, int):
1005
1008
  # If the max_tokens is an integer, it is interpreted as the max_tokens value for the generated answer.
1006
1009
  # The max tokens for the context is set to None to use the default value for the model (comes in the
@@ -1046,7 +1049,7 @@ class ChatModel(BaseModel):
1046
1049
  question: str = Field(description="Question to ask the generative model")
1047
1050
  user_id: str
1048
1051
  retrieval: bool = True
1049
- system: Optional[str] = Field(
1052
+ system: str | None = Field(
1050
1053
  default=None,
1051
1054
  title="System prompt",
1052
1055
  description="Optional system prompt input by the user",
@@ -1055,9 +1058,9 @@ class ChatModel(BaseModel):
1055
1058
  default={},
1056
1059
  description="The information retrieval context for the current query",
1057
1060
  )
1058
- query_context_order: Optional[dict[str, int]] = Field(
1061
+ query_context_order: dict[str, int] | None = Field(
1059
1062
  default=None,
1060
- description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model", # noqa: E501
1063
+ description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
1061
1064
  )
1062
1065
  chat_history: list[ChatContextMessage] = Field(
1063
1066
  default=[], description="The chat conversation history"
@@ -1066,29 +1069,29 @@ class ChatModel(BaseModel):
1066
1069
  default=True,
1067
1070
  description="Truncate the chat context in case it doesn't fit the generative input",
1068
1071
  )
1069
- user_prompt: Optional[UserPrompt] = Field(
1072
+ user_prompt: UserPrompt | None = Field(
1070
1073
  default=None, description="Optional custom prompt input by the user"
1071
1074
  )
1072
- citations: Union[bool, None, CitationsType] = Field(
1075
+ citations: bool | None | CitationsType = Field(
1073
1076
  default=None,
1074
1077
  description="Whether to include citations in the response. "
1075
1078
  "If set to None or False, no citations will be computed. "
1076
1079
  "If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
1077
1080
  "If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
1078
1081
  )
1079
- citation_threshold: Optional[float] = Field(
1082
+ citation_threshold: float | None = Field(
1080
1083
  default=None,
1081
1084
  description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
1082
1085
  ge=0.0,
1083
1086
  le=1.0,
1084
1087
  )
1085
- generative_model: Optional[str] = Field(
1088
+ generative_model: str | None = Field(
1086
1089
  default=None,
1087
1090
  title="Generative model",
1088
- description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1091
+ description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
1089
1092
  )
1090
1093
 
1091
- max_tokens: Optional[int] = Field(default=None, description="Maximum characters to generate")
1094
+ max_tokens: int | None = Field(default=None, description="Maximum characters to generate")
1092
1095
 
1093
1096
  query_context_images: dict[str, Image] = Field(
1094
1097
  default={},
@@ -1099,7 +1102,7 @@ class ChatModel(BaseModel):
1099
1102
  default=False,
1100
1103
  description="If set to true, the response will be in markdown format",
1101
1104
  )
1102
- json_schema: Optional[dict[str, Any]] = Field(
1105
+ json_schema: dict[str, Any] | None = Field(
1103
1106
  default=None,
1104
1107
  description="The JSON schema to use for the generative model answers",
1105
1108
  )
@@ -1107,17 +1110,18 @@ class ChatModel(BaseModel):
1107
1110
  default=False,
1108
1111
  description="Whether to reorder the query context based on a reranker",
1109
1112
  )
1110
- top_k: Optional[int] = Field(default=None, description="Number of best elements to get from")
1113
+ top_k: int | None = Field(default=None, description="Number of best elements to get from")
1111
1114
 
1112
1115
  format_prompt: bool = Field(
1113
1116
  default=True,
1114
- description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively", # noqa: E501
1117
+ description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
1115
1118
  )
1116
- seed: Optional[int] = Field(
1119
+ seed: int | None = Field(
1117
1120
  default=None,
1118
1121
  description="Seed use for the generative model for a deterministic output.",
1119
1122
  )
1120
- reasoning: Union[Reasoning, bool] = Field(
1123
+ reasoning: Reasoning | bool = Field(
1124
+ title="Reasoning options",
1121
1125
  default=False,
1122
1126
  description=(
1123
1127
  "Reasoning options for the generative model. "
@@ -1131,26 +1135,25 @@ class RephraseModel(BaseModel):
1131
1135
  chat_history: list[ChatContextMessage] = []
1132
1136
  user_id: str
1133
1137
  user_context: list[str] = []
1134
- generative_model: Optional[str] = Field(
1138
+ generative_model: str | None = Field(
1135
1139
  default=None,
1136
1140
  title="Generative model",
1137
- description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1141
+ description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
1138
1142
  )
1139
- chat_history_relevance_threshold: Optional[
1143
+ chat_history_relevance_threshold: (
1140
1144
  Annotated[
1141
1145
  float,
1142
1146
  Field(
1143
1147
  ge=0.0,
1144
1148
  le=1.0,
1145
- description=(
1146
- "Threshold to determine if the past chat history is relevant to rephrase the user's question. "
1147
- "0 - Always treat previous messages as relevant (always rephrase)."
1148
- "1 Always treat previous messages as irrelevant (never rephrase)."
1149
- "Values in between adjust the sensitivity."
1150
- ),
1149
+ description="Threshold to determine if the past chat history is relevant to rephrase the user's question. "
1150
+ "0 - Always treat previous messages as relevant (always rephrase)."
1151
+ "1 - Always treat previous messages as irrelevant (never rephrase)."
1152
+ "Values in between adjust the sensitivity.",
1151
1153
  ),
1152
1154
  ]
1153
- ] = None
1155
+ | None
1156
+ ) = None
1154
1157
 
1155
1158
 
1156
1159
  class RagStrategyName:
@@ -1234,13 +1237,13 @@ class FullResourceApplyTo(BaseModel):
1234
1237
  exclude: list[str] = Field(
1235
1238
  default_factory=list,
1236
1239
  title="Labels to exclude from full resource expansion",
1237
- description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens", # noqa: E501
1240
+ description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
1238
1241
  )
1239
1242
 
1240
1243
 
1241
1244
  class FullResourceStrategy(RagStrategy):
1242
1245
  name: Literal["full_resource"] = "full_resource"
1243
- count: Optional[int] = Field(
1246
+ count: int | None = Field(
1244
1247
  default=None,
1245
1248
  title="Count",
1246
1249
  description="Maximum number of full documents to retrieve. If not specified, all matching documents are retrieved.",
@@ -1251,7 +1254,7 @@ class FullResourceStrategy(RagStrategy):
1251
1254
  title="Include remaining text blocks",
1252
1255
  description="Whether to include the remaining text blocks after the maximum number of resources has been reached.",
1253
1256
  )
1254
- apply_to: Optional[FullResourceApplyTo] = Field(
1257
+ apply_to: FullResourceApplyTo | None = Field(
1255
1258
  default=None,
1256
1259
  title="Apply to certain resources only",
1257
1260
  description="Define which resources to exclude from serialization",
@@ -1357,7 +1360,7 @@ class PreQuery(BaseModel):
1357
1360
  ),
1358
1361
  ge=0,
1359
1362
  )
1360
- id: Optional[str] = Field(
1363
+ id: str | None = Field(
1361
1364
  default=None,
1362
1365
  title="Prequery id",
1363
1366
  min_length=1,
@@ -1491,7 +1494,7 @@ class TableImageStrategy(ImageRagStrategy):
1491
1494
 
1492
1495
  class PageImageStrategy(ImageRagStrategy):
1493
1496
  name: Literal["page_image"] = "page_image"
1494
- count: Optional[int] = Field(
1497
+ count: int | None = Field(
1495
1498
  default=None,
1496
1499
  title="Count",
1497
1500
  description="Maximum number of page images to retrieve. By default, at most 5 images are retrieved.",
@@ -1503,20 +1506,18 @@ class ParagraphImageStrategy(ImageRagStrategy):
1503
1506
 
1504
1507
 
1505
1508
  RagStrategies = Annotated[
1506
- Union[
1507
- FieldExtensionStrategy,
1508
- FullResourceStrategy,
1509
- HierarchyResourceStrategy,
1510
- NeighbouringParagraphsStrategy,
1511
- MetadataExtensionStrategy,
1512
- ConversationalStrategy,
1513
- PreQueriesStrategy,
1514
- GraphStrategy,
1515
- ],
1509
+ FieldExtensionStrategy
1510
+ | FullResourceStrategy
1511
+ | HierarchyResourceStrategy
1512
+ | NeighbouringParagraphsStrategy
1513
+ | MetadataExtensionStrategy
1514
+ | ConversationalStrategy
1515
+ | PreQueriesStrategy
1516
+ | GraphStrategy,
1516
1517
  Field(discriminator="name"),
1517
1518
  ]
1518
1519
  RagImagesStrategies = Annotated[
1519
- Union[PageImageStrategy, ParagraphImageStrategy, TableImageStrategy],
1520
+ PageImageStrategy | ParagraphImageStrategy | TableImageStrategy,
1520
1521
  Field(discriminator="name"),
1521
1522
  ]
1522
1523
  PromptContext = dict[str, str]
@@ -1525,10 +1526,10 @@ PromptContextImages = dict[str, Image]
1525
1526
 
1526
1527
 
1527
1528
  class CustomPrompt(BaseModel):
1528
- system: Optional[str] = Field(
1529
+ system: str | None = Field(
1529
1530
  default=None,
1530
1531
  title="System prompt",
1531
- description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.", # noqa: E501
1532
+ description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
1532
1533
  min_length=1,
1533
1534
  examples=[
1534
1535
  "You are a medical assistant, use medical terminology",
@@ -1537,10 +1538,10 @@ class CustomPrompt(BaseModel):
1537
1538
  "You are a financial expert, use correct terms",
1538
1539
  ],
1539
1540
  )
1540
- user: Optional[str] = Field(
1541
+ user: str | None = Field(
1541
1542
  default=None,
1542
1543
  title="User prompt",
1543
- description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.", # noqa: E501
1544
+ description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
1544
1545
  min_length=1,
1545
1546
  examples=[
1546
1547
  "Taking into account our previous conversation, and this context: {context} answer this {question}",
@@ -1549,7 +1550,7 @@ class CustomPrompt(BaseModel):
1549
1550
  "Given this context: {context}. Answer this {question} using the provided context. Please, answer always in French",
1550
1551
  ],
1551
1552
  )
1552
- rephrase: Optional[str] = Field(
1553
+ rephrase: str | None = Field(
1553
1554
  default=None,
1554
1555
  title="Rephrase",
1555
1556
  description=(
@@ -1579,23 +1580,23 @@ class AskRequest(AuditMetadataBase):
1579
1580
  le=200,
1580
1581
  description="The top most relevant results to fetch at the retrieval step. The maximum number of results allowed is 200.",
1581
1582
  )
1582
- filter_expression: Optional[FilterExpression] = (
1583
+ filter_expression: FilterExpression | None = (
1583
1584
  SearchParamDefaults.filter_expression.to_pydantic_field()
1584
1585
  )
1585
1586
  fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
1586
- filters: Union[list[str], list[Filter]] = Field(
1587
+ filters: list[str] | list[Filter] = Field(
1587
1588
  default=[],
1588
- title="Filters",
1589
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
1589
+ title="Search Filters",
1590
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
1590
1591
  )
1591
- keyword_filters: Union[list[str], list[Filter]] = Field(
1592
+ keyword_filters: list[str] | list[Filter] = Field(
1592
1593
  default=[],
1593
1594
  title="Keyword filters",
1594
1595
  description=(
1595
1596
  "List of keyword filter expressions to apply to the retrieval step. "
1596
1597
  "The text block search will only be performed on the documents that contain the specified keywords. "
1597
1598
  "The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
1598
- "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters" # noqa: E501
1599
+ "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
1599
1600
  ),
1600
1601
  examples=[
1601
1602
  ["NLP", "BERT"],
@@ -1603,43 +1604,39 @@ class AskRequest(AuditMetadataBase):
1603
1604
  ["Friedrich Nietzsche", "Immanuel Kant"],
1604
1605
  ],
1605
1606
  )
1606
- vectorset: Optional[str] = SearchParamDefaults.vectorset.to_pydantic_field()
1607
- min_score: Optional[Union[float, MinScore]] = Field(
1607
+ vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
1608
+ min_score: float | MinScore | None = Field(
1608
1609
  default=None,
1609
1610
  title="Minimum score",
1610
- description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.", # noqa: E501
1611
+ description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
1611
1612
  )
1612
1613
  features: list[ChatOptions] = SearchParamDefaults.chat_features.to_pydantic_field()
1613
- range_creation_start: Optional[DateTime] = (
1614
- SearchParamDefaults.range_creation_start.to_pydantic_field()
1615
- )
1616
- range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
1617
- range_modification_start: Optional[DateTime] = (
1614
+ range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
1615
+ range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
1616
+ range_modification_start: DateTime | None = (
1618
1617
  SearchParamDefaults.range_modification_start.to_pydantic_field()
1619
1618
  )
1620
- range_modification_end: Optional[DateTime] = (
1619
+ range_modification_end: DateTime | None = (
1621
1620
  SearchParamDefaults.range_modification_end.to_pydantic_field()
1622
1621
  )
1623
1622
  show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
1624
1623
  field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
1625
1624
  extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
1626
- context: Optional[list[ChatContextMessage]] = SearchParamDefaults.chat_context.to_pydantic_field()
1627
- chat_history: Optional[list[ChatContextMessage]] = (
1628
- SearchParamDefaults.chat_history.to_pydantic_field()
1629
- )
1630
- extra_context: Optional[list[str]] = Field(
1625
+ context: list[ChatContextMessage] | None = SearchParamDefaults.chat_context.to_pydantic_field()
1626
+ chat_history: list[ChatContextMessage] | None = SearchParamDefaults.chat_history.to_pydantic_field()
1627
+ extra_context: list[str] | None = Field(
1631
1628
  default=None,
1632
1629
  title="Extra query context",
1633
1630
  description="""Additional context that is added to the retrieval context sent to the LLM.
1634
1631
  It allows extending the chat feature with content that may not be in the Knowledge Box.""",
1635
1632
  )
1636
- extra_context_images: Optional[list[Image]] = Field(
1633
+ extra_context_images: list[Image] | None = Field(
1637
1634
  default=None,
1638
1635
  title="Extra query context images",
1639
1636
  description="""Additional images added to the retrieval context sent to the LLM."
1640
1637
  It allows extending the chat feature with content that may not be in the Knowledge Box.""",
1641
1638
  )
1642
- query_image: Optional[Image] = Field(
1639
+ query_image: Image | None = Field(
1643
1640
  default=None,
1644
1641
  title="Query image",
1645
1642
  description="Image that will be used together with the query text for retrieval and then sent to the LLM as part of the context. "
@@ -1652,27 +1649,27 @@ class AskRequest(AuditMetadataBase):
1652
1649
 
1653
1650
  highlight: bool = SearchParamDefaults.highlight.to_pydantic_field()
1654
1651
  resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
1655
- prompt: Optional[Union[str, CustomPrompt]] = Field(
1652
+ prompt: str | CustomPrompt | None = Field(
1656
1653
  default=None,
1657
1654
  title="Prompts",
1658
- description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.", # noqa: E501
1655
+ description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
1659
1656
  )
1660
- rank_fusion: Union[RankFusionName, RankFusion] = SearchParamDefaults.rank_fusion.to_pydantic_field()
1661
- reranker: Union[RerankerName, Reranker] = SearchParamDefaults.reranker.to_pydantic_field()
1662
- citations: Union[bool, None, CitationsType] = Field(
1657
+ rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
1658
+ reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
1659
+ citations: bool | None | CitationsType = Field(
1663
1660
  default=None,
1664
1661
  description="Whether to include citations in the response. "
1665
1662
  "If set to None or False, no citations will be computed. "
1666
1663
  "If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
1667
1664
  "If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
1668
1665
  )
1669
- citation_threshold: Optional[float] = Field(
1666
+ citation_threshold: float | None = Field(
1670
1667
  default=None,
1671
1668
  description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
1672
1669
  ge=0.0,
1673
1670
  le=1.0,
1674
1671
  )
1675
- security: Optional[RequestSecurity] = SearchParamDefaults.security.to_pydantic_field()
1672
+ security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
1676
1673
  show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
1677
1674
  rag_strategies: list[RagStrategies] = Field(
1678
1675
  default=[],
@@ -1737,21 +1734,21 @@ If empty, the default strategy is used, which simply adds the text of the matchi
1737
1734
  )
1738
1735
  debug: bool = SearchParamDefaults.debug.to_pydantic_field()
1739
1736
 
1740
- generative_model: Optional[str] = Field(
1737
+ generative_model: str | None = Field(
1741
1738
  default=None,
1742
1739
  title="Generative model",
1743
- description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1740
+ description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
1744
1741
  )
1745
- generative_model_seed: Optional[int] = Field(
1742
+ generative_model_seed: int | None = Field(
1746
1743
  default=None,
1747
1744
  title="Seed for the generative model",
1748
1745
  description="The seed to use for the generative model for deterministic generation. Only supported by some models.",
1749
1746
  )
1750
1747
 
1751
- max_tokens: Optional[Union[int, MaxTokens]] = Field(
1748
+ max_tokens: int | MaxTokens | None = Field(
1752
1749
  default=None,
1753
1750
  title="Maximum LLM tokens to use for the request",
1754
- description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.", # noqa: E501
1751
+ description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
1755
1752
  )
1756
1753
 
1757
1754
  rephrase: bool = Field(
@@ -1760,7 +1757,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
1760
1757
  "Rephrase the query for a more efficient retrieval. This will consume LLM tokens and make the request slower."
1761
1758
  ),
1762
1759
  )
1763
- chat_history_relevance_threshold: Optional[float] = Field(
1760
+ chat_history_relevance_threshold: float | None = Field(
1764
1761
  default=None,
1765
1762
  ge=0.0,
1766
1763
  le=1.0,
@@ -1778,7 +1775,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
1778
1775
  description="If set to true, the response will be in markdown format",
1779
1776
  )
1780
1777
 
1781
- answer_json_schema: Optional[dict[str, Any]] = Field(
1778
+ answer_json_schema: dict[str, Any] | None = Field(
1782
1779
  default=None,
1783
1780
  title="Answer JSON schema",
1784
1781
  description="""Desired JSON schema for the LLM answer.
@@ -1794,13 +1791,14 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
1794
1791
  description="Whether to generate an answer using the generative model. If set to false, the response will only contain the retrieval results.",
1795
1792
  )
1796
1793
 
1797
- search_configuration: Optional[str] = Field(
1794
+ search_configuration: str | None = Field(
1798
1795
  default=None,
1799
1796
  description="Load ask parameters from this configuration. Parameters in the request override parameters from the configuration.",
1800
1797
  )
1801
1798
 
1802
- reasoning: Union[Reasoning, bool] = Field(
1799
+ reasoning: Reasoning | bool = Field(
1803
1800
  default=False,
1801
+ title="Reasoning options",
1804
1802
  description=(
1805
1803
  "Reasoning options for the generative model. "
1806
1804
  "Set to True to enable default reasoning, False to disable, or provide a Reasoning object for custom options."
@@ -1860,6 +1858,13 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
1860
1858
  self.context = None
1861
1859
  return self
1862
1860
 
1861
+ @field_validator("resource_filters", mode="after")
1862
+ def validate_resource_filters(cls, values: list[str]) -> list[str]:
1863
+ if values is not None:
1864
+ for v in values:
1865
+ _validate_resource_filter(v)
1866
+ return values
1867
+
1863
1868
 
1864
1869
  # Alias (for backwards compatiblity with testbed)
1865
1870
  class ChatRequest(AskRequest):
@@ -1881,8 +1886,8 @@ class SummarizeModel(BaseModel):
1881
1886
  """
1882
1887
 
1883
1888
  resources: dict[str, SummarizeResourceModel] = {}
1884
- generative_model: Optional[str] = None
1885
- user_prompt: Optional[str] = None
1889
+ generative_model: str | None = None
1890
+ user_prompt: str | None = None
1886
1891
  summary_kind: SummaryKind = SummaryKind.SIMPLE
1887
1892
 
1888
1893
 
@@ -1891,13 +1896,13 @@ class SummarizeRequest(BaseModel):
1891
1896
  Model for the request payload of the summarize endpoint
1892
1897
  """
1893
1898
 
1894
- generative_model: Optional[str] = Field(
1899
+ generative_model: str | None = Field(
1895
1900
  default=None,
1896
1901
  title="Generative model",
1897
- description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1902
+ description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
1898
1903
  )
1899
1904
 
1900
- user_prompt: Optional[str] = Field(
1905
+ user_prompt: str | None = Field(
1901
1906
  default=None,
1902
1907
  title="User prompt",
1903
1908
  description="Optional custom prompt input by the user",
@@ -1908,7 +1913,7 @@ class SummarizeRequest(BaseModel):
1908
1913
  min_length=1,
1909
1914
  max_length=100,
1910
1915
  title="Resources",
1911
- description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.", # noqa: E501
1916
+ description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
1912
1917
  )
1913
1918
 
1914
1919
  summary_kind: SummaryKind = Field(
@@ -1934,20 +1939,20 @@ class SummarizedResponse(BaseModel):
1934
1939
  title="Summary",
1935
1940
  description="Global summary of all resources combined.",
1936
1941
  )
1937
- consumption: Optional[Consumption] = None
1942
+ consumption: Consumption | None = None
1938
1943
 
1939
1944
 
1940
1945
  class KnowledgeGraphEntity(BaseModel):
1941
1946
  name: str
1942
- type: Optional[RelationNodeType] = None
1943
- subtype: Optional[str] = None
1947
+ type: RelationNodeType | None = None
1948
+ subtype: str | None = None
1944
1949
 
1945
1950
 
1946
1951
  class FindRequest(BaseSearchRequest):
1947
- query_entities: SkipJsonSchema[Optional[list[KnowledgeGraphEntity]]] = Field(
1952
+ query_entities: SkipJsonSchema[list[KnowledgeGraphEntity] | None] = Field(
1948
1953
  default=None, title="Query entities", description="Entities to use in a knowledge graph search"
1949
1954
  )
1950
- graph_query: Optional[GraphPathQuery] = Field(
1955
+ graph_query: GraphPathQuery | None = Field(
1951
1956
  default=None,
1952
1957
  title="Graph query",
1953
1958
  description="Query for the knowledge graph. Paths (node-relation-node) extracted from a paragraph_id will be used to extend the results",
@@ -1958,17 +1963,17 @@ class FindRequest(BaseSearchRequest):
1958
1963
  FindOptions.SEMANTIC,
1959
1964
  ]
1960
1965
  )
1961
- rank_fusion: Union[RankFusionName, RankFusion] = SearchParamDefaults.rank_fusion.to_pydantic_field()
1962
- reranker: Union[RerankerName, Reranker] = SearchParamDefaults.reranker.to_pydantic_field()
1966
+ rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
1967
+ reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
1963
1968
 
1964
- keyword_filters: Union[list[str], list[Filter]] = Field(
1969
+ keyword_filters: list[str] | list[Filter] = Field(
1965
1970
  default=[],
1966
1971
  title="Keyword filters",
1967
1972
  description=(
1968
1973
  "List of keyword filter expressions to apply to the retrieval step. "
1969
1974
  "The text block search will only be performed on the documents that contain the specified keywords. "
1970
1975
  "The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
1971
- "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters" # noqa: E501
1976
+ "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
1972
1977
  ),
1973
1978
  examples=[
1974
1979
  ["NLP", "BERT"],
@@ -1977,11 +1982,11 @@ class FindRequest(BaseSearchRequest):
1977
1982
  ],
1978
1983
  )
1979
1984
 
1980
- search_configuration: Optional[str] = Field(
1985
+ search_configuration: str | None = Field(
1981
1986
  default=None,
1982
1987
  description="Load find parameters from this configuration. Parameters in the request override parameters from the configuration.",
1983
1988
  )
1984
- generative_model: Optional[str] = Field(
1989
+ generative_model: str | None = Field(
1985
1990
  default=None,
1986
1991
  title="Generative model",
1987
1992
  description="The generative model used to rephrase the query. If not provided, the model configured for the Knowledge Box is used.",
@@ -2015,9 +2020,9 @@ class SCORE_TYPE(str, Enum):
2015
2020
 
2016
2021
 
2017
2022
  class FindTextPosition(BaseModel):
2018
- page_number: Optional[int] = None
2019
- start_seconds: Optional[list[int]] = None
2020
- end_seconds: Optional[list[int]] = None
2023
+ page_number: int | None = None
2024
+ start_seconds: list[int] | None = None
2025
+ end_seconds: list[int] | None = None
2021
2026
  index: int
2022
2027
  start: int
2023
2028
  end: int
@@ -2029,15 +2034,15 @@ class FindParagraph(BaseModel):
2029
2034
  order: int = Field(default=0, ge=0)
2030
2035
  text: str
2031
2036
  id: str
2032
- labels: Optional[list[str]] = []
2033
- position: Optional[TextPosition] = None
2037
+ labels: list[str] | None = []
2038
+ position: TextPosition | None = None
2034
2039
  fuzzy_result: bool = False
2035
2040
  page_with_visual: bool = Field(
2036
2041
  default=False,
2037
2042
  title="Page where this paragraph belongs is a visual page",
2038
2043
  description="This flag informs if the page may have information that has not been extracted",
2039
2044
  )
2040
- reference: Optional[str] = Field(
2045
+ reference: str | None = Field(
2041
2046
  default=None,
2042
2047
  title="Reference to the image that represents this text",
2043
2048
  description="Reference to the extracted image that represents this paragraph",
@@ -2047,7 +2052,7 @@ class FindParagraph(BaseModel):
2047
2052
  title="Is a table",
2048
2053
  description="The referenced image of the paragraph is a table",
2049
2054
  )
2050
- relevant_relations: Optional[Relations] = Field(
2055
+ relevant_relations: Relations | None = Field(
2051
2056
  default=None,
2052
2057
  title="Relevant relations",
2053
2058
  description="Relevant relations from which the paragraph was found, will only be filled if using the Graph RAG Strategy",
@@ -2062,17 +2067,19 @@ class FindResource(Resource):
2062
2067
  fields: dict[str, FindField]
2063
2068
 
2064
2069
  def updated_from(self, origin: Resource):
2070
+ find_resource_model_fields = self.model_fields.keys()
2065
2071
  for key in origin.model_fields.keys():
2066
- self.__setattr__(key, getattr(origin, key))
2072
+ if key in find_resource_model_fields:
2073
+ self.__setattr__(key, getattr(origin, key))
2067
2074
 
2068
2075
 
2069
2076
  class KnowledgeboxFindResults(JsonBaseModel):
2070
2077
  """Find on knowledgebox results"""
2071
2078
 
2072
2079
  resources: dict[str, FindResource]
2073
- relations: Optional[Relations] = None
2074
- query: Optional[str] = None
2075
- rephrased_query: Optional[str] = None
2080
+ relations: Relations | None = None
2081
+ query: str | None = Field(default=None, title="Find Results Query")
2082
+ rephrased_query: str | None = None
2076
2083
  total: int = 0
2077
2084
  page_number: int = Field(
2078
2085
  default=0,
@@ -2086,18 +2093,18 @@ class KnowledgeboxFindResults(JsonBaseModel):
2086
2093
  default=False,
2087
2094
  description="Pagination will be deprecated, please, refer to `top_k` in the request",
2088
2095
  )
2089
- nodes: Optional[list[dict[str, str]]] = Field(
2096
+ nodes: list[dict[str, str]] | None = Field(
2090
2097
  default=None,
2091
2098
  title="Nodes",
2092
2099
  description="List of nodes queried in the search",
2093
2100
  )
2094
- shards: Optional[list[str]] = Field(
2101
+ shards: list[str] | None = Field(
2095
2102
  default=None,
2096
2103
  title="Shards",
2097
2104
  description="The list of shard replica ids used for the search.",
2098
2105
  )
2099
2106
  autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
2100
- min_score: Optional[Union[float, MinScore]] = Field(
2107
+ min_score: float | MinScore | None = Field(
2101
2108
  default=MinScore(),
2102
2109
  title="Minimum result score",
2103
2110
  description="The minimum scores that have been used for the search operation.",
@@ -2105,9 +2112,9 @@ class KnowledgeboxFindResults(JsonBaseModel):
2105
2112
  best_matches: list[str] = Field(
2106
2113
  default=[],
2107
2114
  title="Best matches",
2108
- description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).", # noqa: E501
2115
+ description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
2109
2116
  )
2110
- metrics: Optional[dict[str, Any]] = Field(
2117
+ metrics: dict[str, Any] | None = Field(
2111
2118
  default=None,
2112
2119
  title="Metrics",
2113
2120
  description=(
@@ -2125,15 +2132,15 @@ class FeedbackTasks(str, Enum):
2125
2132
  class FeedbackRequest(BaseModel):
2126
2133
  ident: str = Field(
2127
2134
  title="Request identifier",
2128
- description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.", # noqa: E501
2135
+ description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
2129
2136
  )
2130
2137
  good: bool = Field(title="Good", description="Whether the result was good or not")
2131
2138
  task: FeedbackTasks = Field(
2132
2139
  title="Task",
2133
2140
  description="The task the feedback is for. For now, only `CHAT` task is available",
2134
2141
  )
2135
- feedback: Optional[str] = Field(None, title="Feedback", description="Feedback text")
2136
- text_block_id: Optional[str] = Field(None, title="Text block", description="Text block id")
2142
+ feedback: str | None = Field(None, title="Feedback", description="Feedback text")
2143
+ text_block_id: str | None = Field(None, title="Text block", description="Text block id")
2137
2144
 
2138
2145
 
2139
2146
  def validate_facets(facets):
@@ -2184,13 +2191,11 @@ class AugmentedTextBlock(BaseModel):
2184
2191
  text: str = Field(
2185
2192
  description="The text of the augmented text block. It may include additional metadata to enrich the context"
2186
2193
  )
2187
- position: Optional[TextPosition] = Field(
2194
+ position: TextPosition | None = Field(
2188
2195
  default=None,
2189
2196
  description="Metadata about the position of the text block in the original document.",
2190
2197
  )
2191
- parent: Optional[str] = Field(
2192
- default=None, description="The parent text block that was augmented for."
2193
- )
2198
+ parent: str | None = Field(default=None, description="The parent text block that was augmented for.")
2194
2199
  augmentation_type: TextBlockAugmentationType = Field(description="Type of augmentation.")
2195
2200
 
2196
2201
 
@@ -2214,12 +2219,12 @@ class AskTokens(BaseModel):
2214
2219
  title="Output tokens",
2215
2220
  description="Number of LLM tokens used for the answer",
2216
2221
  )
2217
- input_nuclia: Optional[float] = Field(
2222
+ input_nuclia: float | None = Field(
2218
2223
  title="Input Nuclia tokens",
2219
2224
  description="Number of Nuclia LLM tokens used for the context in the query",
2220
2225
  default=None,
2221
2226
  )
2222
- output_nuclia: Optional[float] = Field(
2227
+ output_nuclia: float | None = Field(
2223
2228
  title="Output Nuclia tokens",
2224
2229
  description="Number of Nuclia LLM tokens used for the answer",
2225
2230
  default=None,
@@ -2227,12 +2232,12 @@ class AskTokens(BaseModel):
2227
2232
 
2228
2233
 
2229
2234
  class AskTimings(BaseModel):
2230
- generative_first_chunk: Optional[float] = Field(
2235
+ generative_first_chunk: float | None = Field(
2231
2236
  default=None,
2232
2237
  title="Generative first chunk",
2233
2238
  description="Time the LLM took to generate the first chunk of the answer",
2234
2239
  )
2235
- generative_total: Optional[float] = Field(
2240
+ generative_total: float | None = Field(
2236
2241
  default=None,
2237
2242
  title="Generative total",
2238
2243
  description="Total time the LLM took to generate the answer",
@@ -2240,12 +2245,12 @@ class AskTimings(BaseModel):
2240
2245
 
2241
2246
 
2242
2247
  class SyncAskMetadata(BaseModel):
2243
- tokens: Optional[AskTokens] = Field(
2248
+ tokens: AskTokens | None = Field(
2244
2249
  default=None,
2245
2250
  title="Tokens",
2246
2251
  description="Number of tokens used in the LLM context and answer",
2247
2252
  )
2248
- timings: Optional[AskTimings] = Field(
2253
+ timings: AskTimings | None = Field(
2249
2254
  default=None,
2250
2255
  title="Timings",
2251
2256
  description="Timings of the generative model",
@@ -2264,19 +2269,19 @@ class SyncAskResponse(BaseModel):
2264
2269
  title="Answer",
2265
2270
  description="The generative answer to the query",
2266
2271
  )
2267
- reasoning: Optional[str] = Field(
2272
+ reasoning: str | None = Field(
2268
2273
  default=None,
2269
- title="Reasoning",
2270
- description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.", # noqa: E501
2274
+ title="Reasoning steps",
2275
+ description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
2271
2276
  )
2272
- answer_json: Optional[dict[str, Any]] = Field(
2277
+ answer_json: dict[str, Any] | None = Field(
2273
2278
  default=None,
2274
2279
  title="Answer JSON",
2275
- description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.", # noqa: E501
2280
+ description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
2276
2281
  )
2277
2282
  status: str = Field(
2278
2283
  title="Status",
2279
- description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'", # noqa: E501
2284
+ description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
2280
2285
  )
2281
2286
  retrieval_results: KnowledgeboxFindResults = Field(
2282
2287
  title="Retrieval results",
@@ -2287,7 +2292,7 @@ class SyncAskResponse(BaseModel):
2287
2292
  title="Retrieval best matches",
2288
2293
  description="Sorted list of best matching text blocks in the retrieval step. This includes the main query and prequeries results, if any.",
2289
2294
  )
2290
- prequeries: Optional[dict[str, KnowledgeboxFindResults]] = Field(
2295
+ prequeries: dict[str, KnowledgeboxFindResults] | None = Field(
2291
2296
  default=None,
2292
2297
  title="Prequeries",
2293
2298
  description="The retrieval results of the prequeries",
@@ -2295,9 +2300,9 @@ class SyncAskResponse(BaseModel):
2295
2300
  learning_id: str = Field(
2296
2301
  default="",
2297
2302
  title="Learning id",
2298
- description="The id of the learning request. This id can be used to provide feedback on the learning process.", # noqa: E501
2303
+ description="The id of the learning request. This id can be used to provide feedback on the learning process.",
2299
2304
  )
2300
- relations: Optional[Relations] = Field(
2305
+ relations: Relations | None = Field(
2301
2306
  default=None,
2302
2307
  title="Relations",
2303
2308
  description="The detected relations of the answer",
@@ -2312,29 +2317,29 @@ class SyncAskResponse(BaseModel):
2312
2317
  title="Citation footnote to context",
2313
2318
  description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)""",
2314
2319
  )
2315
- augmented_context: Optional[AugmentedContext] = Field(
2320
+ augmented_context: AugmentedContext | None = Field(
2316
2321
  default=None,
2317
2322
  description=(
2318
2323
  "Augmented text blocks that were sent to the LLM as part of the RAG strategies "
2319
2324
  "applied on the retrieval results in the request."
2320
2325
  ),
2321
2326
  )
2322
- prompt_context: Optional[list[str]] = Field(
2327
+ prompt_context: list[str] | None = Field(
2323
2328
  default=None,
2324
2329
  title="Prompt context",
2325
2330
  description="The prompt context used to generate the answer. Returned only if the debug flag is set to true",
2326
2331
  )
2327
- predict_request: Optional[dict[str, Any]] = Field(
2332
+ predict_request: dict[str, Any] | None = Field(
2328
2333
  default=None,
2329
2334
  title="Predict request",
2330
2335
  description="The internal predict request used to generate the answer. Returned only if the debug flag is set to true",
2331
2336
  )
2332
- metadata: Optional[SyncAskMetadata] = Field(
2337
+ metadata: SyncAskMetadata | None = Field(
2333
2338
  default=None,
2334
2339
  title="Metadata",
2335
- description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.", # noqa: E501
2340
+ description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
2336
2341
  )
2337
- consumption: Optional[Consumption] = Field(
2342
+ consumption: Consumption | None = Field(
2338
2343
  default=None,
2339
2344
  title="Consumption",
2340
2345
  description=(
@@ -2342,12 +2347,12 @@ class SyncAskResponse(BaseModel):
2342
2347
  " 'X-show-consumption' header is set to true in the request."
2343
2348
  ),
2344
2349
  )
2345
- error_details: Optional[str] = Field(
2350
+ error_details: str | None = Field(
2346
2351
  default=None,
2347
2352
  title="Error details",
2348
2353
  description="Error details message in case there was an error",
2349
2354
  )
2350
- debug: Optional[dict[str, Any]] = Field(
2355
+ debug: dict[str, Any] | None = Field(
2351
2356
  default=None,
2352
2357
  title="Debug information",
2353
2358
  description=(
@@ -2437,7 +2442,7 @@ class StatusAskResponseItem(BaseModel):
2437
2442
  type: Literal["status"] = "status"
2438
2443
  code: str
2439
2444
  status: str
2440
- details: Optional[str] = None
2445
+ details: str | None = None
2441
2446
 
2442
2447
 
2443
2448
  class ErrorAskResponseItem(BaseModel):
@@ -2456,22 +2461,22 @@ class DebugAskResponseItem(BaseModel):
2456
2461
  metrics: dict[str, Any]
2457
2462
 
2458
2463
 
2459
- AskResponseItemType = Union[
2460
- AnswerAskResponseItem,
2461
- ReasoningAskResponseItem,
2462
- JSONAskResponseItem,
2463
- MetadataAskResponseItem,
2464
- AugmentedContextResponseItem,
2465
- CitationsAskResponseItem,
2466
- FootnoteCitationsAskResponseItem,
2467
- StatusAskResponseItem,
2468
- ErrorAskResponseItem,
2469
- RetrievalAskResponseItem,
2470
- RelationsAskResponseItem,
2471
- DebugAskResponseItem,
2472
- PrequeriesAskResponseItem,
2473
- ConsumptionResponseItem,
2474
- ]
2464
+ AskResponseItemType = (
2465
+ AnswerAskResponseItem
2466
+ | ReasoningAskResponseItem
2467
+ | JSONAskResponseItem
2468
+ | MetadataAskResponseItem
2469
+ | AugmentedContextResponseItem
2470
+ | CitationsAskResponseItem
2471
+ | FootnoteCitationsAskResponseItem
2472
+ | StatusAskResponseItem
2473
+ | ErrorAskResponseItem
2474
+ | RetrievalAskResponseItem
2475
+ | RelationsAskResponseItem
2476
+ | DebugAskResponseItem
2477
+ | PrequeriesAskResponseItem
2478
+ | ConsumptionResponseItem
2479
+ )
2475
2480
 
2476
2481
 
2477
2482
  class AskResponseItem(BaseModel):
@@ -2491,7 +2496,7 @@ def parse_custom_prompt(item: AskRequest) -> CustomPrompt:
2491
2496
  return prompt
2492
2497
 
2493
2498
 
2494
- def parse_rephrase_prompt(item: AskRequest) -> Optional[str]:
2499
+ def parse_rephrase_prompt(item: AskRequest) -> str | None:
2495
2500
  prompt = parse_custom_prompt(item)
2496
2501
  return prompt.rephrase
2497
2502
 
@@ -2502,7 +2507,7 @@ FindRequest.model_rebuild()
2502
2507
 
2503
2508
  class CatalogFacetsPrefix(BaseModel):
2504
2509
  prefix: str = Field(pattern="^((/[^/]+)*)$")
2505
- depth: Optional[int] = Field(
2510
+ depth: int | None = Field(
2506
2511
  default=None,
2507
2512
  ge=0,
2508
2513
  description="Only include facets up to this depth from the prefix, leave empty to include all depths",
@@ -2524,3 +2529,22 @@ class CatalogFacetsRequest(BaseModel):
2524
2529
 
2525
2530
  class CatalogFacetsResponse(BaseModel):
2526
2531
  facets: dict[str, int]
2532
+
2533
+
2534
+ def _validate_resource_filter(v: str):
2535
+ parts = v.split("/")
2536
+
2537
+ rid = parts[0]
2538
+ try:
2539
+ UUID(rid)
2540
+ except ValueError:
2541
+ raise ValueError(f"resource id filter '{rid}' should be a valid UUID")
2542
+
2543
+ if len(parts) > 1:
2544
+ field_type = parts[1]
2545
+ try:
2546
+ FieldTypeName.from_abbreviation(field_type)
2547
+ except KeyError: # pragma: no cover
2548
+ raise ValueError(
2549
+ f"resource filter {v} has an invalid field type: {field_type}",
2550
+ )