nucliadb-models 6.9.3.post5346__py3-none-any.whl → 6.10.0.post5788__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb-models might be problematic. Click here for more details.

Files changed (35) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +355 -0
  3. nucliadb_models/common.py +57 -57
  4. nucliadb_models/configuration.py +8 -8
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +25 -26
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +1 -2
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +79 -75
  12. nucliadb_models/graph/requests.py +40 -48
  13. nucliadb_models/graph/responses.py +13 -1
  14. nucliadb_models/hydration.py +50 -52
  15. nucliadb_models/internal/predict.py +7 -9
  16. nucliadb_models/internal/shards.py +2 -3
  17. nucliadb_models/labels.py +18 -11
  18. nucliadb_models/link.py +18 -19
  19. nucliadb_models/metadata.py +66 -54
  20. nucliadb_models/notifications.py +3 -3
  21. nucliadb_models/processing.py +1 -2
  22. nucliadb_models/resource.py +85 -102
  23. nucliadb_models/retrieval.py +147 -0
  24. nucliadb_models/search.py +331 -283
  25. nucliadb_models/security.py +2 -3
  26. nucliadb_models/text.py +7 -8
  27. nucliadb_models/trainset.py +1 -2
  28. nucliadb_models/utils.py +2 -3
  29. nucliadb_models/vectors.py +2 -5
  30. nucliadb_models/writer.py +56 -57
  31. {nucliadb_models-6.9.3.post5346.dist-info → nucliadb_models-6.10.0.post5788.dist-info}/METADATA +1 -1
  32. nucliadb_models-6.10.0.post5788.dist-info/RECORD +41 -0
  33. nucliadb_models-6.9.3.post5346.dist-info/RECORD +0 -39
  34. {nucliadb_models-6.9.3.post5346.dist-info → nucliadb_models-6.10.0.post5788.dist-info}/WHEEL +0 -0
  35. {nucliadb_models-6.9.3.post5346.dist-info → nucliadb_models-6.10.0.post5788.dist-info}/top_level.txt +0 -0
nucliadb_models/search.py CHANGED
@@ -14,19 +14,19 @@
14
14
  #
15
15
  import json
16
16
  from enum import Enum
17
- from typing import Any, Literal, Optional, Union
17
+ from typing import Annotated, Any, Literal
18
+ from uuid import UUID
18
19
 
19
20
  from pydantic import BaseModel, Field, field_validator, model_validator
20
21
  from pydantic.aliases import AliasChoices
21
22
  from pydantic.json_schema import SkipJsonSchema
22
- from typing_extensions import Annotated, Self
23
+ from typing_extensions import Self
23
24
 
24
25
  from nucliadb_models import RelationMetadata
25
26
  from nucliadb_models.common import FieldTypeName, ParamDefault
26
27
  from nucliadb_models.graph.requests import GraphPathQuery
27
28
 
28
29
  # Bw/c import to avoid breaking users
29
- # noqa isort: skip
30
30
  from nucliadb_models.metadata import RelationNodeType, RelationType, ResourceProcessingStatus
31
31
  from nucliadb_models.resource import ExtractedDataTypeName, Resource
32
32
  from nucliadb_models.security import RequestSecurity
@@ -152,12 +152,12 @@ FacetsResult = dict[str, Any]
152
152
 
153
153
 
154
154
  class TextPosition(BaseModel):
155
- page_number: Optional[int] = None
155
+ page_number: int | None = None
156
156
  index: int
157
157
  start: int
158
158
  end: int
159
- start_seconds: Optional[list[int]] = None
160
- end_seconds: Optional[list[int]] = None
159
+ start_seconds: list[int] | None = None
160
+ end_seconds: list[int] | None = None
161
161
 
162
162
 
163
163
  class Sentence(BaseModel):
@@ -166,8 +166,8 @@ class Sentence(BaseModel):
166
166
  text: str
167
167
  field_type: str
168
168
  field: str
169
- index: Optional[str] = None
170
- position: Optional[TextPosition] = None
169
+ index: str | None = None
170
+ position: TextPosition | None = None
171
171
 
172
172
 
173
173
  class Sentences(BaseModel):
@@ -177,7 +177,7 @@ class Sentences(BaseModel):
177
177
  page_size: int = 20
178
178
  min_score: float = Field(
179
179
  title="Minimum score",
180
- description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.", # noqa: E501
180
+ description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
181
181
  )
182
182
 
183
183
 
@@ -188,45 +188,45 @@ class Paragraph(BaseModel):
188
188
  field: str
189
189
  text: str
190
190
  labels: list[str] = []
191
- start_seconds: Optional[list[int]] = None
192
- end_seconds: Optional[list[int]] = None
193
- position: Optional[TextPosition] = None
191
+ start_seconds: list[int] | None = None
192
+ end_seconds: list[int] | None = None
193
+ position: TextPosition | None = None
194
194
  fuzzy_result: bool = False
195
195
 
196
196
 
197
197
  class Paragraphs(BaseModel):
198
198
  results: list[Paragraph] = []
199
- facets: Optional[FacetsResult] = None
200
- query: Optional[str] = None
199
+ facets: FacetsResult | None = None
200
+ query: str | None = Field(default=None, title="Paragraphs Query")
201
201
  total: int = 0
202
202
  page_number: int = 0
203
203
  page_size: int = 20
204
204
  next_page: bool = False
205
205
  min_score: float = Field(
206
206
  title="Minimum score",
207
- description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.", # noqa: E501
207
+ description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
208
208
  )
209
209
 
210
210
 
211
211
  class ResourceResult(BaseModel):
212
- score: Union[float, int]
212
+ score: float | int
213
213
  rid: str
214
214
  field_type: str
215
215
  field: str
216
- labels: Optional[list[str]] = None
216
+ labels: list[str] | None = None
217
217
 
218
218
 
219
219
  class Resources(BaseModel):
220
220
  results: list[ResourceResult]
221
- facets: Optional[FacetsResult] = None
222
- query: Optional[str] = None
221
+ facets: FacetsResult | None = None
222
+ query: str | None = Field(default=None, title="Resources Query")
223
223
  total: int = 0
224
224
  page_number: int = 0
225
225
  page_size: int = 20
226
226
  next_page: bool = False
227
227
  min_score: float = Field(
228
228
  title="Minimum score",
229
- description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.", # noqa: E501
229
+ description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
230
230
  )
231
231
 
232
232
 
@@ -246,7 +246,7 @@ class DirectionalRelation(BaseModel):
246
246
  relation: RelationType
247
247
  relation_label: str
248
248
  direction: RelationDirection
249
- metadata: Optional[RelationMetadata] = None
249
+ metadata: RelationMetadata | None = None
250
250
  resource_id: str
251
251
 
252
252
 
@@ -280,23 +280,23 @@ class RelatedEntities(BaseModel):
280
280
  class ResourceSearchResults(JsonBaseModel):
281
281
  """Search on resource results"""
282
282
 
283
- sentences: Optional[Sentences] = None
284
- paragraphs: Optional[Paragraphs] = None
285
- relations: Optional[Relations] = None
286
- nodes: Optional[list[dict[str, str]]] = None
287
- shards: Optional[list[str]] = None
283
+ sentences: Sentences | None = None
284
+ paragraphs: Paragraphs | None = None
285
+ relations: Relations | None = None
286
+ nodes: list[dict[str, str]] | None = None
287
+ shards: list[str] | None = None
288
288
 
289
289
 
290
290
  class KnowledgeboxSearchResults(JsonBaseModel):
291
291
  """Search on knowledgebox results"""
292
292
 
293
293
  resources: dict[str, Resource] = {}
294
- sentences: Optional[Sentences] = None
295
- paragraphs: Optional[Paragraphs] = None
296
- fulltext: Optional[Resources] = None
297
- relations: Optional[Relations] = None
298
- nodes: Optional[list[dict[str, str]]] = None
299
- shards: Optional[list[str]] = None
294
+ sentences: Sentences | None = None
295
+ paragraphs: Paragraphs | None = None
296
+ fulltext: Resources | None = None
297
+ relations: Relations | None = None
298
+ nodes: list[dict[str, str]] | None = None
299
+ shards: list[str] | None = None
300
300
 
301
301
  # TODO: remove on a future major release
302
302
  autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
@@ -306,16 +306,16 @@ class CatalogResponse(BaseModel):
306
306
  """Catalog results"""
307
307
 
308
308
  resources: dict[str, Resource] = {}
309
- fulltext: Optional[Resources] = None
310
- shards: Optional[list[str]] = None
309
+ fulltext: Resources | None = None
310
+ shards: list[str] | None = None
311
311
 
312
312
 
313
313
  class KnowledgeboxSuggestResults(JsonBaseModel):
314
314
  """Suggest on resource results"""
315
315
 
316
- paragraphs: Optional[Paragraphs] = None
317
- entities: Optional[RelatedEntities] = None
318
- shards: Optional[list[str]] = None
316
+ paragraphs: Paragraphs | None = None
317
+ entities: RelatedEntities | None = None
318
+ shards: list[str] | None = None
319
319
 
320
320
 
321
321
  class KnowledgeboxCounters(BaseModel):
@@ -323,7 +323,7 @@ class KnowledgeboxCounters(BaseModel):
323
323
  paragraphs: int
324
324
  fields: int
325
325
  sentences: int
326
- shards: Optional[list[str]] = None
326
+ shards: list[str] | None = None
327
327
  index_size: float = Field(default=0.0, title="Index size (bytes)")
328
328
 
329
329
 
@@ -347,10 +347,12 @@ SortOrderMap = {
347
347
 
348
348
  class SortOptions(BaseModel):
349
349
  field: SortField
350
- limit: Optional[int] = Field(None, gt=0)
351
350
  order: SortOrder = SortOrder.DESC
352
351
 
353
352
 
353
+ MAX_RANK_FUSION_WINDOW = 500
354
+
355
+
354
356
  class RankFusionName(str, Enum):
355
357
  RECIPROCAL_RANK_FUSION = "rrf"
356
358
 
@@ -376,13 +378,13 @@ class ReciprocalRankFusion(_BaseRankFusion):
376
378
  k: float = Field(
377
379
  default=60.0,
378
380
  title="RRF k parameter",
379
- description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets", # noqa: E501
381
+ description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
380
382
  )
381
- window: Optional[int] = Field(
383
+ window: int | None = Field(
382
384
  default=None,
383
- le=500,
385
+ le=MAX_RANK_FUSION_WINDOW,
384
386
  title="RRF window",
385
- description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time", # noqa: E501
387
+ description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
386
388
  )
387
389
  boosting: ReciprocalRankFusionWeights = Field(
388
390
  default_factory=ReciprocalRankFusionWeights,
@@ -393,12 +395,12 @@ Define different weights for each retriever. This allows to assign different pri
393
395
  The default is 1 for each retriever, which means no extra boost for any of them. Weights below 0 can be used for negative boosting.
394
396
 
395
397
  This kind of boosting can be useful in multilingual search, for example, where keyword search may not give good results and can degrade the final search experience
396
- """, # noqa: E501
398
+ """,
397
399
  )
398
400
 
399
401
 
400
402
  RankFusion = Annotated[
401
- Union[ReciprocalRankFusion],
403
+ ReciprocalRankFusion,
402
404
  Field(discriminator="name"),
403
405
  ]
404
406
 
@@ -433,15 +435,15 @@ class _BaseReranker(BaseModel):
433
435
 
434
436
  class PredictReranker(_BaseReranker):
435
437
  name: Literal[RerankerName.PREDICT_RERANKER] = RerankerName.PREDICT_RERANKER
436
- window: Optional[int] = Field(
438
+ window: int | None = Field(
437
439
  default=None,
438
440
  le=200,
439
441
  title="Reranker window",
440
- description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k", # noqa: E501
442
+ description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
441
443
  )
442
444
 
443
445
 
444
- Reranker = Annotated[Union[PredictReranker], Field(discriminator="name")]
446
+ Reranker = Annotated[PredictReranker, Field(discriminator="name")]
445
447
 
446
448
 
447
449
  class KnowledgeBoxCount(BaseModel):
@@ -470,18 +472,18 @@ class SearchParamDefaults:
470
472
  )
471
473
  filters = ParamDefault(
472
474
  default=[],
473
- title="Filters",
474
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
475
+ title="Search Filters",
476
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
475
477
  )
476
478
  resource_filters = ParamDefault(
477
479
  default=[],
478
480
  title="Resources filter",
479
- description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.", # noqa: E501
481
+ description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
480
482
  )
481
483
  faceted = ParamDefault(
482
484
  default=[],
483
485
  title="Faceted",
484
- description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
486
+ description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
485
487
  max_items=50,
486
488
  )
487
489
  chat_query = ParamDefault(
@@ -503,19 +505,27 @@ class SearchParamDefaults:
503
505
  )
504
506
  top_k = ParamDefault(
505
507
  default=20,
508
+ gt=-1,
506
509
  le=200,
507
510
  title="Top k",
508
511
  description="The number of results search should return. The maximum number of results allowed is 200.",
509
512
  )
513
+ offset = ParamDefault(
514
+ default=0,
515
+ gt=-1,
516
+ le=1000,
517
+ title="Results offset",
518
+ description="The number of results to skip, starting from the beginning in sort order. Used for pagination. It can only be used with the keyword and fulltext indexes.",
519
+ )
510
520
  highlight = ParamDefault(
511
521
  default=False,
512
522
  title="Highlight",
513
- description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags", # noqa: E501
523
+ description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
514
524
  )
515
525
  with_duplicates = ParamDefault(
516
526
  default=False,
517
527
  title="With duplicate paragraphs",
518
- description="Whether to return duplicate paragraphs on the same document", # noqa: E501
528
+ description="Whether to return duplicate paragraphs on the same document",
519
529
  )
520
530
  with_status = ParamDefault(
521
531
  default=None,
@@ -525,19 +535,13 @@ class SearchParamDefaults:
525
535
  with_synonyms = ParamDefault(
526
536
  default=False,
527
537
  title="With custom synonyms",
528
- description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.", # noqa: E501
538
+ description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
529
539
  )
530
540
  sort_order = ParamDefault(
531
541
  default=SortOrder.DESC,
532
542
  title="Sort order",
533
543
  description="Order to sort results with",
534
544
  )
535
- sort_limit = ParamDefault(
536
- default=None,
537
- title="Sort limit",
538
- description="",
539
- gt=0,
540
- )
541
545
  sort_field = ParamDefault(
542
546
  default=None,
543
547
  title="Sort field",
@@ -561,12 +565,12 @@ class SearchParamDefaults:
561
565
  reranker = ParamDefault(
562
566
  default=RerankerName.PREDICT_RERANKER,
563
567
  title="Reranker",
564
- description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval", # noqa: E501
568
+ description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
565
569
  )
566
570
  debug = ParamDefault(
567
571
  default=False,
568
572
  title="Debug mode",
569
- description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.", # noqa: E501
573
+ description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
570
574
  )
571
575
  show = ParamDefault(
572
576
  default=[ResourceProperties.BASIC],
@@ -587,27 +591,27 @@ class SearchParamDefaults:
587
591
  range_creation_start = ParamDefault(
588
592
  default=None,
589
593
  title="Resource creation range start",
590
- description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
594
+ description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
591
595
  )
592
596
  range_creation_end = ParamDefault(
593
597
  default=None,
594
598
  title="Resource creation range end",
595
- description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
599
+ description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
596
600
  )
597
601
  range_modification_start = ParamDefault(
598
602
  default=None,
599
603
  title="Resource modification range start",
600
- description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
604
+ description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
601
605
  )
602
606
  range_modification_end = ParamDefault(
603
607
  default=None,
604
608
  title="Resource modification range end",
605
- description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
609
+ description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
606
610
  )
607
611
  vector = ParamDefault(
608
612
  default=None,
609
613
  title="Search Vector",
610
- description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.", # noqa: E501
614
+ description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
611
615
  )
612
616
  vectorset = ParamDefault(
613
617
  default=None,
@@ -623,12 +627,12 @@ class SearchParamDefaults:
623
627
  chat_history = ParamDefault(
624
628
  default=None,
625
629
  title="Chat history",
626
- description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.", # noqa: E501
630
+ description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
627
631
  )
628
632
  chat_features = ParamDefault(
629
633
  default=[ChatOptions.SEMANTIC, ChatOptions.KEYWORD],
630
634
  title="Chat features",
631
- description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead", # noqa: E501
635
+ description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
632
636
  )
633
637
  suggest_features = ParamDefault(
634
638
  default=[
@@ -641,17 +645,17 @@ class SearchParamDefaults:
641
645
  security = ParamDefault(
642
646
  default=None,
643
647
  title="Security",
644
- description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.", # noqa: E501
648
+ description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
645
649
  )
646
650
  security_groups = ParamDefault(
647
651
  default=[],
648
652
  title="Security groups",
649
- description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.", # noqa: E501
653
+ description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
650
654
  )
651
655
  rephrase = ParamDefault(
652
656
  default=False,
653
657
  title="Rephrase query consuming LLMs",
654
- description="Rephrase query consuming LLMs - it will make the query slower", # noqa: E501
658
+ description="Rephrase query consuming LLMs - it will make the query slower",
655
659
  )
656
660
  prefer_markdown = ParamDefault(
657
661
  default=False,
@@ -691,10 +695,10 @@ class SearchParamDefaults:
691
695
 
692
696
 
693
697
  class Filter(BaseModel):
694
- all: Optional[list[str]] = Field(default=None, min_length=1)
695
- any: Optional[list[str]] = Field(default=None, min_length=1)
696
- none: Optional[list[str]] = Field(default=None, min_length=1)
697
- not_all: Optional[list[str]] = Field(default=None, min_length=1)
698
+ all: list[str] | None = Field(default=None, min_length=1)
699
+ any: list[str] | None = Field(default=None, min_length=1)
700
+ none: list[str] | None = Field(default=None, min_length=1)
701
+ not_all: list[str] | None = Field(default=None, min_length=1)
698
702
 
699
703
  @model_validator(mode="after")
700
704
  def validate_filter(self) -> Self:
@@ -736,19 +740,19 @@ class CatalogQuery(BaseModel):
736
740
 
737
741
 
738
742
  class CatalogRequest(BaseModel):
739
- query: Union[str, CatalogQuery] = ParamDefault(
743
+ query: str | CatalogQuery = ParamDefault(
740
744
  default="",
741
- title="Query",
745
+ title="Catalog Request Query",
742
746
  description="The query to search for",
743
747
  ).to_pydantic_field()
744
- filter_expression: Optional[CatalogFilterExpression] = (
748
+ filter_expression: CatalogFilterExpression | None = (
745
749
  SearchParamDefaults.catalog_filter_expression.to_pydantic_field()
746
750
  )
747
751
  faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
748
- sort: Optional[SortOptions] = SearchParamDefaults.sort.to_pydantic_field()
752
+ sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
749
753
  page_number: int = SearchParamDefaults.catalog_page_number.to_pydantic_field()
750
754
  page_size: int = SearchParamDefaults.catalog_page_size.to_pydantic_field()
751
- hidden: Optional[bool] = SearchParamDefaults.hidden.to_pydantic_field()
755
+ hidden: bool | None = SearchParamDefaults.hidden.to_pydantic_field()
752
756
  show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field(
753
757
  default=[ResourceProperties.BASIC, ResourceProperties.ERRORS]
754
758
  )
@@ -756,32 +760,30 @@ class CatalogRequest(BaseModel):
756
760
  debug: SkipJsonSchema[bool] = SearchParamDefaults.debug.to_pydantic_field()
757
761
 
758
762
  # Deprecated filter parameters
759
- filters: Union[list[str], list[Filter]] = Field(
763
+ filters: list[str] | list[Filter] = Field(
760
764
  default=[],
761
- title="Filters",
762
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
765
+ title="Catalog Filters",
766
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
763
767
  deprecated="Use filter_expression instead",
764
768
  )
765
- with_status: Optional[ResourceProcessingStatus] = Field(
769
+ with_status: ResourceProcessingStatus | None = Field(
766
770
  default=None,
767
771
  title="With processing status",
768
772
  description="Filter results by resource processing status",
769
773
  deprecated="Use filter_expression instead",
770
774
  )
771
- range_creation_start: Optional[DateTime] = (
772
- SearchParamDefaults.range_creation_start.to_pydantic_field(
773
- deprecated="Use filter_expression instead",
774
- )
775
+ range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field(
776
+ deprecated="Use filter_expression instead",
775
777
  )
776
- range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field(
778
+ range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field(
777
779
  deprecated="Use filter_expression instead",
778
780
  )
779
- range_modification_start: Optional[DateTime] = (
781
+ range_modification_start: DateTime | None = (
780
782
  SearchParamDefaults.range_modification_start.to_pydantic_field(
781
783
  deprecated="Use filter_expression instead",
782
784
  )
783
785
  )
784
- range_modification_end: Optional[DateTime] = (
786
+ range_modification_end: DateTime | None = (
785
787
  SearchParamDefaults.range_modification_end.to_pydantic_field(
786
788
  deprecated="Use filter_expression instead",
787
789
  )
@@ -794,15 +796,15 @@ class CatalogRequest(BaseModel):
794
796
 
795
797
 
796
798
  class MinScore(BaseModel):
797
- semantic: Optional[float] = Field(
799
+ semantic: float | None = Field(
798
800
  default=None,
799
801
  title="Minimum semantic score",
800
- description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
802
+ description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
801
803
  )
802
804
  bm25: float = Field(
803
805
  default=0,
804
806
  title="Minimum bm25 score",
805
- description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
807
+ description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
806
808
  ge=0,
807
809
  )
808
810
 
@@ -816,7 +818,7 @@ AUDIT_METADATA_MAX_BYTES = 1024 * 10 # 10KB
816
818
 
817
819
 
818
820
  class AuditMetadataBase(BaseModel):
819
- audit_metadata: Optional[dict[str, str]] = Field(
821
+ audit_metadata: dict[str, str] | None = Field(
820
822
  default=None,
821
823
  title="Audit metadata",
822
824
  description=(
@@ -840,29 +842,27 @@ class AuditMetadataBase(BaseModel):
840
842
 
841
843
  class BaseSearchRequest(AuditMetadataBase):
842
844
  query: str = SearchParamDefaults.query.to_pydantic_field()
843
- filter_expression: Optional[FilterExpression] = (
845
+ filter_expression: FilterExpression | None = (
844
846
  SearchParamDefaults.filter_expression.to_pydantic_field()
845
847
  )
846
848
  fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
847
- filters: Union[list[str], list[Filter]] = Field(
849
+ filters: list[str] | list[Filter] = Field(
848
850
  default=[],
849
- title="Filters",
850
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
851
+ title="Search Filters",
852
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
851
853
  )
852
854
  top_k: int = SearchParamDefaults.top_k.to_pydantic_field()
853
- min_score: Optional[Union[float, MinScore]] = Field(
855
+ min_score: float | MinScore | None = Field(
854
856
  default=None,
855
857
  title="Minimum score",
856
- description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.", # noqa: E501
857
- )
858
- range_creation_start: Optional[DateTime] = (
859
- SearchParamDefaults.range_creation_start.to_pydantic_field()
858
+ description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
860
859
  )
861
- range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
862
- range_modification_start: Optional[DateTime] = (
860
+ range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
861
+ range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
862
+ range_modification_start: DateTime | None = (
863
863
  SearchParamDefaults.range_modification_start.to_pydantic_field()
864
864
  )
865
- range_modification_end: Optional[DateTime] = (
865
+ range_modification_end: DateTime | None = (
866
866
  SearchParamDefaults.range_modification_end.to_pydantic_field()
867
867
  )
868
868
  debug: bool = SearchParamDefaults.debug.to_pydantic_field()
@@ -870,15 +870,15 @@ class BaseSearchRequest(AuditMetadataBase):
870
870
  show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
871
871
  field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
872
872
  extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
873
- vector: Optional[list[float]] = SearchParamDefaults.vector.to_pydantic_field()
874
- vectorset: Optional[str] = SearchParamDefaults.vectorset.to_pydantic_field()
873
+ vector: list[float] | None = SearchParamDefaults.vector.to_pydantic_field()
874
+ vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
875
875
  with_duplicates: bool = SearchParamDefaults.with_duplicates.to_pydantic_field()
876
876
  with_synonyms: bool = SearchParamDefaults.with_synonyms.to_pydantic_field()
877
877
  # autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
878
878
  # avoid breaking changes in the python sdks. Please remove on a future major release.
879
879
  autofilter: SkipJsonSchema[bool] = False
880
880
  resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
881
- security: Optional[RequestSecurity] = SearchParamDefaults.security.to_pydantic_field()
881
+ security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
882
882
  show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
883
883
 
884
884
  rephrase: bool = Field(
@@ -888,7 +888,7 @@ class BaseSearchRequest(AuditMetadataBase):
888
888
  ),
889
889
  )
890
890
 
891
- rephrase_prompt: Optional[str] = Field(
891
+ rephrase_prompt: str | None = Field(
892
892
  default=None,
893
893
  title="Rephrase",
894
894
  description=(
@@ -907,7 +907,7 @@ Please return ONLY the question without any explanation. Just the rephrased ques
907
907
  Please return ONLY the question without any explanation.""",
908
908
  ],
909
909
  )
910
- query_image: Optional[Image] = Field(
910
+ query_image: Image | None = Field(
911
911
  default=None,
912
912
  title="Query image",
913
913
  description="Image that will be used together with the query text for retrieval.",
@@ -927,6 +927,13 @@ Please return ONLY the question without any explanation. Just the rephrased ques
927
927
  values["top_k"] = SearchParamDefaults.top_k.default
928
928
  return values
929
929
 
930
+ @field_validator("resource_filters", mode="after")
931
+ def validate_resource_filters(cls, values: list[str]) -> list[str]:
932
+ if values is not None:
933
+ for v in values:
934
+ _validate_resource_filter(v)
935
+ return values
936
+
930
937
 
931
938
  class SearchRequest(BaseSearchRequest):
932
939
  features: list[SearchOptions] = SearchParamDefaults.search_features.to_pydantic_field(
@@ -937,13 +944,33 @@ class SearchRequest(BaseSearchRequest):
937
944
  ]
938
945
  )
939
946
  faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
940
- sort: Optional[SortOptions] = SearchParamDefaults.sort.to_pydantic_field()
947
+ sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
948
+ offset: int = SearchParamDefaults.offset.to_pydantic_field()
941
949
 
942
950
  @field_validator("faceted")
943
951
  @classmethod
944
952
  def nested_facets_not_supported(cls, facets):
945
953
  return validate_facets(facets)
946
954
 
955
+ @model_validator(mode="after")
956
+ def offset_sort_only_on_keyword_indexes(self):
957
+ has_non_keyword_indexes = set(self.features) & {SearchOptions.SEMANTIC, SearchOptions.RELATIONS}
958
+ if has_non_keyword_indexes:
959
+ if self.offset > 0:
960
+ raise ValueError("offset cannot be used with the semantic or relations index")
961
+ if self.sort and self.sort.field != SortField.SCORE:
962
+ raise ValueError("sort by date cannot be used with the semantic or relations index")
963
+
964
+ return self
965
+
966
+ @field_validator("sort", mode="after")
967
+ @classmethod
968
+ def sorting_by_title_not_supported(cls, value: SortOptions | None) -> SortOptions | None:
969
+ if value and value.field == SortField.TITLE:
970
+ raise ValueError("sorting by title not supported in /search")
971
+
972
+ return value
973
+
947
974
 
948
975
  class Author(str, Enum):
949
976
  NUCLIA = "NUCLIA"
@@ -964,19 +991,19 @@ class UserPrompt(BaseModel):
964
991
 
965
992
 
966
993
  class MaxTokens(BaseModel):
967
- context: Optional[int] = Field(
994
+ context: int | None = Field(
968
995
  default=None,
969
996
  title="Maximum context tokens",
970
997
  description="Use to limit the amount of tokens used in the LLM context",
971
998
  )
972
- answer: Optional[int] = Field(
999
+ answer: int | None = Field(
973
1000
  default=None,
974
1001
  title="Maximum answer tokens",
975
1002
  description="Use to limit the amount of tokens used in the LLM answer",
976
1003
  )
977
1004
 
978
1005
 
979
- def parse_max_tokens(max_tokens: Optional[Union[int, MaxTokens]]) -> Optional[MaxTokens]:
1006
+ def parse_max_tokens(max_tokens: int | MaxTokens | None) -> MaxTokens | None:
980
1007
  if isinstance(max_tokens, int):
981
1008
  # If the max_tokens is an integer, it is interpreted as the max_tokens value for the generated answer.
982
1009
  # The max tokens for the context is set to None to use the default value for the model (comes in the
@@ -1022,7 +1049,7 @@ class ChatModel(BaseModel):
1022
1049
  question: str = Field(description="Question to ask the generative model")
1023
1050
  user_id: str
1024
1051
  retrieval: bool = True
1025
- system: Optional[str] = Field(
1052
+ system: str | None = Field(
1026
1053
  default=None,
1027
1054
  title="System prompt",
1028
1055
  description="Optional system prompt input by the user",
@@ -1031,9 +1058,9 @@ class ChatModel(BaseModel):
1031
1058
  default={},
1032
1059
  description="The information retrieval context for the current query",
1033
1060
  )
1034
- query_context_order: Optional[dict[str, int]] = Field(
1061
+ query_context_order: dict[str, int] | None = Field(
1035
1062
  default=None,
1036
- description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model", # noqa: E501
1063
+ description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
1037
1064
  )
1038
1065
  chat_history: list[ChatContextMessage] = Field(
1039
1066
  default=[], description="The chat conversation history"
@@ -1042,29 +1069,29 @@ class ChatModel(BaseModel):
1042
1069
  default=True,
1043
1070
  description="Truncate the chat context in case it doesn't fit the generative input",
1044
1071
  )
1045
- user_prompt: Optional[UserPrompt] = Field(
1072
+ user_prompt: UserPrompt | None = Field(
1046
1073
  default=None, description="Optional custom prompt input by the user"
1047
1074
  )
1048
- citations: Union[bool, None, CitationsType] = Field(
1075
+ citations: bool | None | CitationsType = Field(
1049
1076
  default=None,
1050
1077
  description="Whether to include citations in the response. "
1051
1078
  "If set to None or False, no citations will be computed. "
1052
1079
  "If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
1053
1080
  "If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
1054
1081
  )
1055
- citation_threshold: Optional[float] = Field(
1082
+ citation_threshold: float | None = Field(
1056
1083
  default=None,
1057
1084
  description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
1058
1085
  ge=0.0,
1059
1086
  le=1.0,
1060
1087
  )
1061
- generative_model: Optional[str] = Field(
1088
+ generative_model: str | None = Field(
1062
1089
  default=None,
1063
1090
  title="Generative model",
1064
- description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1091
+ description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
1065
1092
  )
1066
1093
 
1067
- max_tokens: Optional[int] = Field(default=None, description="Maximum characters to generate")
1094
+ max_tokens: int | None = Field(default=None, description="Maximum characters to generate")
1068
1095
 
1069
1096
  query_context_images: dict[str, Image] = Field(
1070
1097
  default={},
@@ -1075,7 +1102,7 @@ class ChatModel(BaseModel):
1075
1102
  default=False,
1076
1103
  description="If set to true, the response will be in markdown format",
1077
1104
  )
1078
- json_schema: Optional[dict[str, Any]] = Field(
1105
+ json_schema: dict[str, Any] | None = Field(
1079
1106
  default=None,
1080
1107
  description="The JSON schema to use for the generative model answers",
1081
1108
  )
@@ -1083,17 +1110,18 @@ class ChatModel(BaseModel):
1083
1110
  default=False,
1084
1111
  description="Whether to reorder the query context based on a reranker",
1085
1112
  )
1086
- top_k: Optional[int] = Field(default=None, description="Number of best elements to get from")
1113
+ top_k: int | None = Field(default=None, description="Number of best elements to get from")
1087
1114
 
1088
1115
  format_prompt: bool = Field(
1089
1116
  default=True,
1090
- description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively", # noqa: E501
1117
+ description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
1091
1118
  )
1092
- seed: Optional[int] = Field(
1119
+ seed: int | None = Field(
1093
1120
  default=None,
1094
1121
  description="Seed use for the generative model for a deterministic output.",
1095
1122
  )
1096
- reasoning: Union[Reasoning, bool] = Field(
1123
+ reasoning: Reasoning | bool = Field(
1124
+ title="Reasoning options",
1097
1125
  default=False,
1098
1126
  description=(
1099
1127
  "Reasoning options for the generative model. "
@@ -1107,26 +1135,25 @@ class RephraseModel(BaseModel):
1107
1135
  chat_history: list[ChatContextMessage] = []
1108
1136
  user_id: str
1109
1137
  user_context: list[str] = []
1110
- generative_model: Optional[str] = Field(
1138
+ generative_model: str | None = Field(
1111
1139
  default=None,
1112
1140
  title="Generative model",
1113
- description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1141
+ description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
1114
1142
  )
1115
- chat_history_relevance_threshold: Optional[
1143
+ chat_history_relevance_threshold: (
1116
1144
  Annotated[
1117
1145
  float,
1118
1146
  Field(
1119
1147
  ge=0.0,
1120
1148
  le=1.0,
1121
- description=(
1122
- "Threshold to determine if the past chat history is relevant to rephrase the user's question. "
1123
- "0 - Always treat previous messages as relevant (always rephrase)."
1124
- "1 Always treat previous messages as irrelevant (never rephrase)."
1125
- "Values in between adjust the sensitivity."
1126
- ),
1149
+ description="Threshold to determine if the past chat history is relevant to rephrase the user's question. "
1150
+ "0 - Always treat previous messages as relevant (always rephrase)."
1151
+ "1 - Always treat previous messages as irrelevant (never rephrase)."
1152
+ "Values in between adjust the sensitivity.",
1127
1153
  ),
1128
1154
  ]
1129
- ] = None
1155
+ | None
1156
+ ) = None
1130
1157
 
1131
1158
 
1132
1159
  class RagStrategyName:
@@ -1210,13 +1237,13 @@ class FullResourceApplyTo(BaseModel):
1210
1237
  exclude: list[str] = Field(
1211
1238
  default_factory=list,
1212
1239
  title="Labels to exclude from full resource expansion",
1213
- description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens", # noqa: E501
1240
+ description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
1214
1241
  )
1215
1242
 
1216
1243
 
1217
1244
  class FullResourceStrategy(RagStrategy):
1218
1245
  name: Literal["full_resource"] = "full_resource"
1219
- count: Optional[int] = Field(
1246
+ count: int | None = Field(
1220
1247
  default=None,
1221
1248
  title="Count",
1222
1249
  description="Maximum number of full documents to retrieve. If not specified, all matching documents are retrieved.",
@@ -1227,7 +1254,7 @@ class FullResourceStrategy(RagStrategy):
1227
1254
  title="Include remaining text blocks",
1228
1255
  description="Whether to include the remaining text blocks after the maximum number of resources has been reached.",
1229
1256
  )
1230
- apply_to: Optional[FullResourceApplyTo] = Field(
1257
+ apply_to: FullResourceApplyTo | None = Field(
1231
1258
  default=None,
1232
1259
  title="Apply to certain resources only",
1233
1260
  description="Define which resources to exclude from serialization",
@@ -1333,7 +1360,7 @@ class PreQuery(BaseModel):
1333
1360
  ),
1334
1361
  ge=0,
1335
1362
  )
1336
- id: Optional[str] = Field(
1363
+ id: str | None = Field(
1337
1364
  default=None,
1338
1365
  title="Prequery id",
1339
1366
  min_length=1,
@@ -1467,7 +1494,7 @@ class TableImageStrategy(ImageRagStrategy):
1467
1494
 
1468
1495
  class PageImageStrategy(ImageRagStrategy):
1469
1496
  name: Literal["page_image"] = "page_image"
1470
- count: Optional[int] = Field(
1497
+ count: int | None = Field(
1471
1498
  default=None,
1472
1499
  title="Count",
1473
1500
  description="Maximum number of page images to retrieve. By default, at most 5 images are retrieved.",
@@ -1479,20 +1506,18 @@ class ParagraphImageStrategy(ImageRagStrategy):
1479
1506
 
1480
1507
 
1481
1508
  RagStrategies = Annotated[
1482
- Union[
1483
- FieldExtensionStrategy,
1484
- FullResourceStrategy,
1485
- HierarchyResourceStrategy,
1486
- NeighbouringParagraphsStrategy,
1487
- MetadataExtensionStrategy,
1488
- ConversationalStrategy,
1489
- PreQueriesStrategy,
1490
- GraphStrategy,
1491
- ],
1509
+ FieldExtensionStrategy
1510
+ | FullResourceStrategy
1511
+ | HierarchyResourceStrategy
1512
+ | NeighbouringParagraphsStrategy
1513
+ | MetadataExtensionStrategy
1514
+ | ConversationalStrategy
1515
+ | PreQueriesStrategy
1516
+ | GraphStrategy,
1492
1517
  Field(discriminator="name"),
1493
1518
  ]
1494
1519
  RagImagesStrategies = Annotated[
1495
- Union[PageImageStrategy, ParagraphImageStrategy, TableImageStrategy],
1520
+ PageImageStrategy | ParagraphImageStrategy | TableImageStrategy,
1496
1521
  Field(discriminator="name"),
1497
1522
  ]
1498
1523
  PromptContext = dict[str, str]
@@ -1501,10 +1526,10 @@ PromptContextImages = dict[str, Image]
1501
1526
 
1502
1527
 
1503
1528
  class CustomPrompt(BaseModel):
1504
- system: Optional[str] = Field(
1529
+ system: str | None = Field(
1505
1530
  default=None,
1506
1531
  title="System prompt",
1507
- description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.", # noqa: E501
1532
+ description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
1508
1533
  min_length=1,
1509
1534
  examples=[
1510
1535
  "You are a medical assistant, use medical terminology",
@@ -1513,10 +1538,10 @@ class CustomPrompt(BaseModel):
1513
1538
  "You are a financial expert, use correct terms",
1514
1539
  ],
1515
1540
  )
1516
- user: Optional[str] = Field(
1541
+ user: str | None = Field(
1517
1542
  default=None,
1518
1543
  title="User prompt",
1519
- description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.", # noqa: E501
1544
+ description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
1520
1545
  min_length=1,
1521
1546
  examples=[
1522
1547
  "Taking into account our previous conversation, and this context: {context} answer this {question}",
@@ -1525,7 +1550,7 @@ class CustomPrompt(BaseModel):
1525
1550
  "Given this context: {context}. Answer this {question} using the provided context. Please, answer always in French",
1526
1551
  ],
1527
1552
  )
1528
- rephrase: Optional[str] = Field(
1553
+ rephrase: str | None = Field(
1529
1554
  default=None,
1530
1555
  title="Rephrase",
1531
1556
  description=(
@@ -1555,23 +1580,23 @@ class AskRequest(AuditMetadataBase):
1555
1580
  le=200,
1556
1581
  description="The top most relevant results to fetch at the retrieval step. The maximum number of results allowed is 200.",
1557
1582
  )
1558
- filter_expression: Optional[FilterExpression] = (
1583
+ filter_expression: FilterExpression | None = (
1559
1584
  SearchParamDefaults.filter_expression.to_pydantic_field()
1560
1585
  )
1561
1586
  fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
1562
- filters: Union[list[str], list[Filter]] = Field(
1587
+ filters: list[str] | list[Filter] = Field(
1563
1588
  default=[],
1564
- title="Filters",
1565
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
1589
+ title="Search Filters",
1590
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
1566
1591
  )
1567
- keyword_filters: Union[list[str], list[Filter]] = Field(
1592
+ keyword_filters: list[str] | list[Filter] = Field(
1568
1593
  default=[],
1569
1594
  title="Keyword filters",
1570
1595
  description=(
1571
1596
  "List of keyword filter expressions to apply to the retrieval step. "
1572
1597
  "The text block search will only be performed on the documents that contain the specified keywords. "
1573
1598
  "The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
1574
- "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters" # noqa: E501
1599
+ "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
1575
1600
  ),
1576
1601
  examples=[
1577
1602
  ["NLP", "BERT"],
@@ -1579,43 +1604,39 @@ class AskRequest(AuditMetadataBase):
1579
1604
  ["Friedrich Nietzsche", "Immanuel Kant"],
1580
1605
  ],
1581
1606
  )
1582
- vectorset: Optional[str] = SearchParamDefaults.vectorset.to_pydantic_field()
1583
- min_score: Optional[Union[float, MinScore]] = Field(
1607
+ vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
1608
+ min_score: float | MinScore | None = Field(
1584
1609
  default=None,
1585
1610
  title="Minimum score",
1586
- description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.", # noqa: E501
1611
+ description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
1587
1612
  )
1588
1613
  features: list[ChatOptions] = SearchParamDefaults.chat_features.to_pydantic_field()
1589
- range_creation_start: Optional[DateTime] = (
1590
- SearchParamDefaults.range_creation_start.to_pydantic_field()
1591
- )
1592
- range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
1593
- range_modification_start: Optional[DateTime] = (
1614
+ range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
1615
+ range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
1616
+ range_modification_start: DateTime | None = (
1594
1617
  SearchParamDefaults.range_modification_start.to_pydantic_field()
1595
1618
  )
1596
- range_modification_end: Optional[DateTime] = (
1619
+ range_modification_end: DateTime | None = (
1597
1620
  SearchParamDefaults.range_modification_end.to_pydantic_field()
1598
1621
  )
1599
1622
  show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
1600
1623
  field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
1601
1624
  extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
1602
- context: Optional[list[ChatContextMessage]] = SearchParamDefaults.chat_context.to_pydantic_field()
1603
- chat_history: Optional[list[ChatContextMessage]] = (
1604
- SearchParamDefaults.chat_history.to_pydantic_field()
1605
- )
1606
- extra_context: Optional[list[str]] = Field(
1625
+ context: list[ChatContextMessage] | None = SearchParamDefaults.chat_context.to_pydantic_field()
1626
+ chat_history: list[ChatContextMessage] | None = SearchParamDefaults.chat_history.to_pydantic_field()
1627
+ extra_context: list[str] | None = Field(
1607
1628
  default=None,
1608
1629
  title="Extra query context",
1609
1630
  description="""Additional context that is added to the retrieval context sent to the LLM.
1610
1631
  It allows extending the chat feature with content that may not be in the Knowledge Box.""",
1611
1632
  )
1612
- extra_context_images: Optional[list[Image]] = Field(
1633
+ extra_context_images: list[Image] | None = Field(
1613
1634
  default=None,
1614
1635
  title="Extra query context images",
1615
1636
  description="""Additional images added to the retrieval context sent to the LLM."
1616
1637
  It allows extending the chat feature with content that may not be in the Knowledge Box.""",
1617
1638
  )
1618
- query_image: Optional[Image] = Field(
1639
+ query_image: Image | None = Field(
1619
1640
  default=None,
1620
1641
  title="Query image",
1621
1642
  description="Image that will be used together with the query text for retrieval and then sent to the LLM as part of the context. "
@@ -1628,27 +1649,27 @@ class AskRequest(AuditMetadataBase):
1628
1649
 
1629
1650
  highlight: bool = SearchParamDefaults.highlight.to_pydantic_field()
1630
1651
  resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
1631
- prompt: Optional[Union[str, CustomPrompt]] = Field(
1652
+ prompt: str | CustomPrompt | None = Field(
1632
1653
  default=None,
1633
1654
  title="Prompts",
1634
- description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.", # noqa: E501
1655
+ description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
1635
1656
  )
1636
- rank_fusion: Union[RankFusionName, RankFusion] = SearchParamDefaults.rank_fusion.to_pydantic_field()
1637
- reranker: Union[RerankerName, Reranker] = SearchParamDefaults.reranker.to_pydantic_field()
1638
- citations: Union[bool, None, CitationsType] = Field(
1657
+ rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
1658
+ reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
1659
+ citations: bool | None | CitationsType = Field(
1639
1660
  default=None,
1640
1661
  description="Whether to include citations in the response. "
1641
1662
  "If set to None or False, no citations will be computed. "
1642
1663
  "If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
1643
1664
  "If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
1644
1665
  )
1645
- citation_threshold: Optional[float] = Field(
1666
+ citation_threshold: float | None = Field(
1646
1667
  default=None,
1647
1668
  description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
1648
1669
  ge=0.0,
1649
1670
  le=1.0,
1650
1671
  )
1651
- security: Optional[RequestSecurity] = SearchParamDefaults.security.to_pydantic_field()
1672
+ security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
1652
1673
  show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
1653
1674
  rag_strategies: list[RagStrategies] = Field(
1654
1675
  default=[],
@@ -1713,21 +1734,21 @@ If empty, the default strategy is used, which simply adds the text of the matchi
1713
1734
  )
1714
1735
  debug: bool = SearchParamDefaults.debug.to_pydantic_field()
1715
1736
 
1716
- generative_model: Optional[str] = Field(
1737
+ generative_model: str | None = Field(
1717
1738
  default=None,
1718
1739
  title="Generative model",
1719
- description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1740
+ description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
1720
1741
  )
1721
- generative_model_seed: Optional[int] = Field(
1742
+ generative_model_seed: int | None = Field(
1722
1743
  default=None,
1723
1744
  title="Seed for the generative model",
1724
1745
  description="The seed to use for the generative model for deterministic generation. Only supported by some models.",
1725
1746
  )
1726
1747
 
1727
- max_tokens: Optional[Union[int, MaxTokens]] = Field(
1748
+ max_tokens: int | MaxTokens | None = Field(
1728
1749
  default=None,
1729
1750
  title="Maximum LLM tokens to use for the request",
1730
- description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.", # noqa: E501
1751
+ description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
1731
1752
  )
1732
1753
 
1733
1754
  rephrase: bool = Field(
@@ -1736,7 +1757,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
1736
1757
  "Rephrase the query for a more efficient retrieval. This will consume LLM tokens and make the request slower."
1737
1758
  ),
1738
1759
  )
1739
- chat_history_relevance_threshold: Optional[float] = Field(
1760
+ chat_history_relevance_threshold: float | None = Field(
1740
1761
  default=None,
1741
1762
  ge=0.0,
1742
1763
  le=1.0,
@@ -1754,7 +1775,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
1754
1775
  description="If set to true, the response will be in markdown format",
1755
1776
  )
1756
1777
 
1757
- answer_json_schema: Optional[dict[str, Any]] = Field(
1778
+ answer_json_schema: dict[str, Any] | None = Field(
1758
1779
  default=None,
1759
1780
  title="Answer JSON schema",
1760
1781
  description="""Desired JSON schema for the LLM answer.
@@ -1770,13 +1791,14 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
1770
1791
  description="Whether to generate an answer using the generative model. If set to false, the response will only contain the retrieval results.",
1771
1792
  )
1772
1793
 
1773
- search_configuration: Optional[str] = Field(
1794
+ search_configuration: str | None = Field(
1774
1795
  default=None,
1775
1796
  description="Load ask parameters from this configuration. Parameters in the request override parameters from the configuration.",
1776
1797
  )
1777
1798
 
1778
- reasoning: Union[Reasoning, bool] = Field(
1799
+ reasoning: Reasoning | bool = Field(
1779
1800
  default=False,
1801
+ title="Reasoning options",
1780
1802
  description=(
1781
1803
  "Reasoning options for the generative model. "
1782
1804
  "Set to True to enable default reasoning, False to disable, or provide a Reasoning object for custom options."
@@ -1836,6 +1858,13 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
1836
1858
  self.context = None
1837
1859
  return self
1838
1860
 
1861
+ @field_validator("resource_filters", mode="after")
1862
+ def validate_resource_filters(cls, values: list[str]) -> list[str]:
1863
+ if values is not None:
1864
+ for v in values:
1865
+ _validate_resource_filter(v)
1866
+ return values
1867
+
1839
1868
 
1840
1869
  # Alias (for backwards compatiblity with testbed)
1841
1870
  class ChatRequest(AskRequest):
@@ -1857,8 +1886,8 @@ class SummarizeModel(BaseModel):
1857
1886
  """
1858
1887
 
1859
1888
  resources: dict[str, SummarizeResourceModel] = {}
1860
- generative_model: Optional[str] = None
1861
- user_prompt: Optional[str] = None
1889
+ generative_model: str | None = None
1890
+ user_prompt: str | None = None
1862
1891
  summary_kind: SummaryKind = SummaryKind.SIMPLE
1863
1892
 
1864
1893
 
@@ -1867,13 +1896,13 @@ class SummarizeRequest(BaseModel):
1867
1896
  Model for the request payload of the summarize endpoint
1868
1897
  """
1869
1898
 
1870
- generative_model: Optional[str] = Field(
1899
+ generative_model: str | None = Field(
1871
1900
  default=None,
1872
1901
  title="Generative model",
1873
- description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1902
+ description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
1874
1903
  )
1875
1904
 
1876
- user_prompt: Optional[str] = Field(
1905
+ user_prompt: str | None = Field(
1877
1906
  default=None,
1878
1907
  title="User prompt",
1879
1908
  description="Optional custom prompt input by the user",
@@ -1884,7 +1913,7 @@ class SummarizeRequest(BaseModel):
1884
1913
  min_length=1,
1885
1914
  max_length=100,
1886
1915
  title="Resources",
1887
- description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.", # noqa: E501
1916
+ description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
1888
1917
  )
1889
1918
 
1890
1919
  summary_kind: SummaryKind = Field(
@@ -1910,20 +1939,20 @@ class SummarizedResponse(BaseModel):
1910
1939
  title="Summary",
1911
1940
  description="Global summary of all resources combined.",
1912
1941
  )
1913
- consumption: Optional[Consumption] = None
1942
+ consumption: Consumption | None = None
1914
1943
 
1915
1944
 
1916
1945
  class KnowledgeGraphEntity(BaseModel):
1917
1946
  name: str
1918
- type: Optional[RelationNodeType] = None
1919
- subtype: Optional[str] = None
1947
+ type: RelationNodeType | None = None
1948
+ subtype: str | None = None
1920
1949
 
1921
1950
 
1922
1951
  class FindRequest(BaseSearchRequest):
1923
- query_entities: SkipJsonSchema[Optional[list[KnowledgeGraphEntity]]] = Field(
1952
+ query_entities: SkipJsonSchema[list[KnowledgeGraphEntity] | None] = Field(
1924
1953
  default=None, title="Query entities", description="Entities to use in a knowledge graph search"
1925
1954
  )
1926
- graph_query: Optional[GraphPathQuery] = Field(
1955
+ graph_query: GraphPathQuery | None = Field(
1927
1956
  default=None,
1928
1957
  title="Graph query",
1929
1958
  description="Query for the knowledge graph. Paths (node-relation-node) extracted from a paragraph_id will be used to extend the results",
@@ -1934,17 +1963,17 @@ class FindRequest(BaseSearchRequest):
1934
1963
  FindOptions.SEMANTIC,
1935
1964
  ]
1936
1965
  )
1937
- rank_fusion: Union[RankFusionName, RankFusion] = SearchParamDefaults.rank_fusion.to_pydantic_field()
1938
- reranker: Union[RerankerName, Reranker] = SearchParamDefaults.reranker.to_pydantic_field()
1966
+ rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
1967
+ reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
1939
1968
 
1940
- keyword_filters: Union[list[str], list[Filter]] = Field(
1969
+ keyword_filters: list[str] | list[Filter] = Field(
1941
1970
  default=[],
1942
1971
  title="Keyword filters",
1943
1972
  description=(
1944
1973
  "List of keyword filter expressions to apply to the retrieval step. "
1945
1974
  "The text block search will only be performed on the documents that contain the specified keywords. "
1946
1975
  "The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
1947
- "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters" # noqa: E501
1976
+ "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
1948
1977
  ),
1949
1978
  examples=[
1950
1979
  ["NLP", "BERT"],
@@ -1953,11 +1982,11 @@ class FindRequest(BaseSearchRequest):
1953
1982
  ],
1954
1983
  )
1955
1984
 
1956
- search_configuration: Optional[str] = Field(
1985
+ search_configuration: str | None = Field(
1957
1986
  default=None,
1958
1987
  description="Load find parameters from this configuration. Parameters in the request override parameters from the configuration.",
1959
1988
  )
1960
- generative_model: Optional[str] = Field(
1989
+ generative_model: str | None = Field(
1961
1990
  default=None,
1962
1991
  title="Generative model",
1963
1992
  description="The generative model used to rephrase the query. If not provided, the model configured for the Knowledge Box is used.",
@@ -1991,9 +2020,9 @@ class SCORE_TYPE(str, Enum):
1991
2020
 
1992
2021
 
1993
2022
  class FindTextPosition(BaseModel):
1994
- page_number: Optional[int] = None
1995
- start_seconds: Optional[list[int]] = None
1996
- end_seconds: Optional[list[int]] = None
2023
+ page_number: int | None = None
2024
+ start_seconds: list[int] | None = None
2025
+ end_seconds: list[int] | None = None
1997
2026
  index: int
1998
2027
  start: int
1999
2028
  end: int
@@ -2005,15 +2034,15 @@ class FindParagraph(BaseModel):
2005
2034
  order: int = Field(default=0, ge=0)
2006
2035
  text: str
2007
2036
  id: str
2008
- labels: Optional[list[str]] = []
2009
- position: Optional[TextPosition] = None
2037
+ labels: list[str] | None = []
2038
+ position: TextPosition | None = None
2010
2039
  fuzzy_result: bool = False
2011
2040
  page_with_visual: bool = Field(
2012
2041
  default=False,
2013
2042
  title="Page where this paragraph belongs is a visual page",
2014
2043
  description="This flag informs if the page may have information that has not been extracted",
2015
2044
  )
2016
- reference: Optional[str] = Field(
2045
+ reference: str | None = Field(
2017
2046
  default=None,
2018
2047
  title="Reference to the image that represents this text",
2019
2048
  description="Reference to the extracted image that represents this paragraph",
@@ -2023,7 +2052,7 @@ class FindParagraph(BaseModel):
2023
2052
  title="Is a table",
2024
2053
  description="The referenced image of the paragraph is a table",
2025
2054
  )
2026
- relevant_relations: Optional[Relations] = Field(
2055
+ relevant_relations: Relations | None = Field(
2027
2056
  default=None,
2028
2057
  title="Relevant relations",
2029
2058
  description="Relevant relations from which the paragraph was found, will only be filled if using the Graph RAG Strategy",
@@ -2038,17 +2067,19 @@ class FindResource(Resource):
2038
2067
  fields: dict[str, FindField]
2039
2068
 
2040
2069
  def updated_from(self, origin: Resource):
2070
+ find_resource_model_fields = self.model_fields.keys()
2041
2071
  for key in origin.model_fields.keys():
2042
- self.__setattr__(key, getattr(origin, key))
2072
+ if key in find_resource_model_fields:
2073
+ self.__setattr__(key, getattr(origin, key))
2043
2074
 
2044
2075
 
2045
2076
  class KnowledgeboxFindResults(JsonBaseModel):
2046
2077
  """Find on knowledgebox results"""
2047
2078
 
2048
2079
  resources: dict[str, FindResource]
2049
- relations: Optional[Relations] = None
2050
- query: Optional[str] = None
2051
- rephrased_query: Optional[str] = None
2080
+ relations: Relations | None = None
2081
+ query: str | None = Field(default=None, title="Find Results Query")
2082
+ rephrased_query: str | None = None
2052
2083
  total: int = 0
2053
2084
  page_number: int = Field(
2054
2085
  default=0,
@@ -2062,18 +2093,18 @@ class KnowledgeboxFindResults(JsonBaseModel):
2062
2093
  default=False,
2063
2094
  description="Pagination will be deprecated, please, refer to `top_k` in the request",
2064
2095
  )
2065
- nodes: Optional[list[dict[str, str]]] = Field(
2096
+ nodes: list[dict[str, str]] | None = Field(
2066
2097
  default=None,
2067
2098
  title="Nodes",
2068
2099
  description="List of nodes queried in the search",
2069
2100
  )
2070
- shards: Optional[list[str]] = Field(
2101
+ shards: list[str] | None = Field(
2071
2102
  default=None,
2072
2103
  title="Shards",
2073
2104
  description="The list of shard replica ids used for the search.",
2074
2105
  )
2075
2106
  autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
2076
- min_score: Optional[Union[float, MinScore]] = Field(
2107
+ min_score: float | MinScore | None = Field(
2077
2108
  default=MinScore(),
2078
2109
  title="Minimum result score",
2079
2110
  description="The minimum scores that have been used for the search operation.",
@@ -2081,9 +2112,9 @@ class KnowledgeboxFindResults(JsonBaseModel):
2081
2112
  best_matches: list[str] = Field(
2082
2113
  default=[],
2083
2114
  title="Best matches",
2084
- description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).", # noqa: E501
2115
+ description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
2085
2116
  )
2086
- metrics: Optional[dict[str, Any]] = Field(
2117
+ metrics: dict[str, Any] | None = Field(
2087
2118
  default=None,
2088
2119
  title="Metrics",
2089
2120
  description=(
@@ -2101,15 +2132,15 @@ class FeedbackTasks(str, Enum):
2101
2132
  class FeedbackRequest(BaseModel):
2102
2133
  ident: str = Field(
2103
2134
  title="Request identifier",
2104
- description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.", # noqa: E501
2135
+ description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
2105
2136
  )
2106
2137
  good: bool = Field(title="Good", description="Whether the result was good or not")
2107
2138
  task: FeedbackTasks = Field(
2108
2139
  title="Task",
2109
2140
  description="The task the feedback is for. For now, only `CHAT` task is available",
2110
2141
  )
2111
- feedback: Optional[str] = Field(None, title="Feedback", description="Feedback text")
2112
- text_block_id: Optional[str] = Field(None, title="Text block", description="Text block id")
2142
+ feedback: str | None = Field(None, title="Feedback", description="Feedback text")
2143
+ text_block_id: str | None = Field(None, title="Text block", description="Text block id")
2113
2144
 
2114
2145
 
2115
2146
  def validate_facets(facets):
@@ -2160,13 +2191,11 @@ class AugmentedTextBlock(BaseModel):
2160
2191
  text: str = Field(
2161
2192
  description="The text of the augmented text block. It may include additional metadata to enrich the context"
2162
2193
  )
2163
- position: Optional[TextPosition] = Field(
2194
+ position: TextPosition | None = Field(
2164
2195
  default=None,
2165
2196
  description="Metadata about the position of the text block in the original document.",
2166
2197
  )
2167
- parent: Optional[str] = Field(
2168
- default=None, description="The parent text block that was augmented for."
2169
- )
2198
+ parent: str | None = Field(default=None, description="The parent text block that was augmented for.")
2170
2199
  augmentation_type: TextBlockAugmentationType = Field(description="Type of augmentation.")
2171
2200
 
2172
2201
 
@@ -2190,12 +2219,12 @@ class AskTokens(BaseModel):
2190
2219
  title="Output tokens",
2191
2220
  description="Number of LLM tokens used for the answer",
2192
2221
  )
2193
- input_nuclia: Optional[float] = Field(
2222
+ input_nuclia: float | None = Field(
2194
2223
  title="Input Nuclia tokens",
2195
2224
  description="Number of Nuclia LLM tokens used for the context in the query",
2196
2225
  default=None,
2197
2226
  )
2198
- output_nuclia: Optional[float] = Field(
2227
+ output_nuclia: float | None = Field(
2199
2228
  title="Output Nuclia tokens",
2200
2229
  description="Number of Nuclia LLM tokens used for the answer",
2201
2230
  default=None,
@@ -2203,12 +2232,12 @@ class AskTokens(BaseModel):
2203
2232
 
2204
2233
 
2205
2234
  class AskTimings(BaseModel):
2206
- generative_first_chunk: Optional[float] = Field(
2235
+ generative_first_chunk: float | None = Field(
2207
2236
  default=None,
2208
2237
  title="Generative first chunk",
2209
2238
  description="Time the LLM took to generate the first chunk of the answer",
2210
2239
  )
2211
- generative_total: Optional[float] = Field(
2240
+ generative_total: float | None = Field(
2212
2241
  default=None,
2213
2242
  title="Generative total",
2214
2243
  description="Total time the LLM took to generate the answer",
@@ -2216,12 +2245,12 @@ class AskTimings(BaseModel):
2216
2245
 
2217
2246
 
2218
2247
  class SyncAskMetadata(BaseModel):
2219
- tokens: Optional[AskTokens] = Field(
2248
+ tokens: AskTokens | None = Field(
2220
2249
  default=None,
2221
2250
  title="Tokens",
2222
2251
  description="Number of tokens used in the LLM context and answer",
2223
2252
  )
2224
- timings: Optional[AskTimings] = Field(
2253
+ timings: AskTimings | None = Field(
2225
2254
  default=None,
2226
2255
  title="Timings",
2227
2256
  description="Timings of the generative model",
@@ -2240,19 +2269,19 @@ class SyncAskResponse(BaseModel):
2240
2269
  title="Answer",
2241
2270
  description="The generative answer to the query",
2242
2271
  )
2243
- reasoning: Optional[str] = Field(
2272
+ reasoning: str | None = Field(
2244
2273
  default=None,
2245
- title="Reasoning",
2246
- description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.", # noqa: E501
2274
+ title="Reasoning steps",
2275
+ description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
2247
2276
  )
2248
- answer_json: Optional[dict[str, Any]] = Field(
2277
+ answer_json: dict[str, Any] | None = Field(
2249
2278
  default=None,
2250
2279
  title="Answer JSON",
2251
- description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.", # noqa: E501
2280
+ description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
2252
2281
  )
2253
2282
  status: str = Field(
2254
2283
  title="Status",
2255
- description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'", # noqa: E501
2284
+ description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
2256
2285
  )
2257
2286
  retrieval_results: KnowledgeboxFindResults = Field(
2258
2287
  title="Retrieval results",
@@ -2263,7 +2292,7 @@ class SyncAskResponse(BaseModel):
2263
2292
  title="Retrieval best matches",
2264
2293
  description="Sorted list of best matching text blocks in the retrieval step. This includes the main query and prequeries results, if any.",
2265
2294
  )
2266
- prequeries: Optional[dict[str, KnowledgeboxFindResults]] = Field(
2295
+ prequeries: dict[str, KnowledgeboxFindResults] | None = Field(
2267
2296
  default=None,
2268
2297
  title="Prequeries",
2269
2298
  description="The retrieval results of the prequeries",
@@ -2271,9 +2300,9 @@ class SyncAskResponse(BaseModel):
2271
2300
  learning_id: str = Field(
2272
2301
  default="",
2273
2302
  title="Learning id",
2274
- description="The id of the learning request. This id can be used to provide feedback on the learning process.", # noqa: E501
2303
+ description="The id of the learning request. This id can be used to provide feedback on the learning process.",
2275
2304
  )
2276
- relations: Optional[Relations] = Field(
2305
+ relations: Relations | None = Field(
2277
2306
  default=None,
2278
2307
  title="Relations",
2279
2308
  description="The detected relations of the answer",
@@ -2288,29 +2317,29 @@ class SyncAskResponse(BaseModel):
2288
2317
  title="Citation footnote to context",
2289
2318
  description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)""",
2290
2319
  )
2291
- augmented_context: Optional[AugmentedContext] = Field(
2320
+ augmented_context: AugmentedContext | None = Field(
2292
2321
  default=None,
2293
2322
  description=(
2294
2323
  "Augmented text blocks that were sent to the LLM as part of the RAG strategies "
2295
2324
  "applied on the retrieval results in the request."
2296
2325
  ),
2297
2326
  )
2298
- prompt_context: Optional[list[str]] = Field(
2327
+ prompt_context: list[str] | None = Field(
2299
2328
  default=None,
2300
2329
  title="Prompt context",
2301
2330
  description="The prompt context used to generate the answer. Returned only if the debug flag is set to true",
2302
2331
  )
2303
- predict_request: Optional[dict[str, Any]] = Field(
2332
+ predict_request: dict[str, Any] | None = Field(
2304
2333
  default=None,
2305
2334
  title="Predict request",
2306
2335
  description="The internal predict request used to generate the answer. Returned only if the debug flag is set to true",
2307
2336
  )
2308
- metadata: Optional[SyncAskMetadata] = Field(
2337
+ metadata: SyncAskMetadata | None = Field(
2309
2338
  default=None,
2310
2339
  title="Metadata",
2311
- description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.", # noqa: E501
2340
+ description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
2312
2341
  )
2313
- consumption: Optional[Consumption] = Field(
2342
+ consumption: Consumption | None = Field(
2314
2343
  default=None,
2315
2344
  title="Consumption",
2316
2345
  description=(
@@ -2318,12 +2347,12 @@ class SyncAskResponse(BaseModel):
2318
2347
  " 'X-show-consumption' header is set to true in the request."
2319
2348
  ),
2320
2349
  )
2321
- error_details: Optional[str] = Field(
2350
+ error_details: str | None = Field(
2322
2351
  default=None,
2323
2352
  title="Error details",
2324
2353
  description="Error details message in case there was an error",
2325
2354
  )
2326
- debug: Optional[dict[str, Any]] = Field(
2355
+ debug: dict[str, Any] | None = Field(
2327
2356
  default=None,
2328
2357
  title="Debug information",
2329
2358
  description=(
@@ -2413,7 +2442,7 @@ class StatusAskResponseItem(BaseModel):
2413
2442
  type: Literal["status"] = "status"
2414
2443
  code: str
2415
2444
  status: str
2416
- details: Optional[str] = None
2445
+ details: str | None = None
2417
2446
 
2418
2447
 
2419
2448
  class ErrorAskResponseItem(BaseModel):
@@ -2432,22 +2461,22 @@ class DebugAskResponseItem(BaseModel):
2432
2461
  metrics: dict[str, Any]
2433
2462
 
2434
2463
 
2435
- AskResponseItemType = Union[
2436
- AnswerAskResponseItem,
2437
- ReasoningAskResponseItem,
2438
- JSONAskResponseItem,
2439
- MetadataAskResponseItem,
2440
- AugmentedContextResponseItem,
2441
- CitationsAskResponseItem,
2442
- FootnoteCitationsAskResponseItem,
2443
- StatusAskResponseItem,
2444
- ErrorAskResponseItem,
2445
- RetrievalAskResponseItem,
2446
- RelationsAskResponseItem,
2447
- DebugAskResponseItem,
2448
- PrequeriesAskResponseItem,
2449
- ConsumptionResponseItem,
2450
- ]
2464
+ AskResponseItemType = (
2465
+ AnswerAskResponseItem
2466
+ | ReasoningAskResponseItem
2467
+ | JSONAskResponseItem
2468
+ | MetadataAskResponseItem
2469
+ | AugmentedContextResponseItem
2470
+ | CitationsAskResponseItem
2471
+ | FootnoteCitationsAskResponseItem
2472
+ | StatusAskResponseItem
2473
+ | ErrorAskResponseItem
2474
+ | RetrievalAskResponseItem
2475
+ | RelationsAskResponseItem
2476
+ | DebugAskResponseItem
2477
+ | PrequeriesAskResponseItem
2478
+ | ConsumptionResponseItem
2479
+ )
2451
2480
 
2452
2481
 
2453
2482
  class AskResponseItem(BaseModel):
@@ -2467,7 +2496,7 @@ def parse_custom_prompt(item: AskRequest) -> CustomPrompt:
2467
2496
  return prompt
2468
2497
 
2469
2498
 
2470
- def parse_rephrase_prompt(item: AskRequest) -> Optional[str]:
2499
+ def parse_rephrase_prompt(item: AskRequest) -> str | None:
2471
2500
  prompt = parse_custom_prompt(item)
2472
2501
  return prompt.rephrase
2473
2502
 
@@ -2478,7 +2507,7 @@ FindRequest.model_rebuild()
2478
2507
 
2479
2508
  class CatalogFacetsPrefix(BaseModel):
2480
2509
  prefix: str = Field(pattern="^((/[^/]+)*)$")
2481
- depth: Optional[int] = Field(
2510
+ depth: int | None = Field(
2482
2511
  default=None,
2483
2512
  ge=0,
2484
2513
  description="Only include facets up to this depth from the prefix, leave empty to include all depths",
@@ -2500,3 +2529,22 @@ class CatalogFacetsRequest(BaseModel):
2500
2529
 
2501
2530
  class CatalogFacetsResponse(BaseModel):
2502
2531
  facets: dict[str, int]
2532
+
2533
+
2534
+ def _validate_resource_filter(v: str):
2535
+ parts = v.split("/")
2536
+
2537
+ rid = parts[0]
2538
+ try:
2539
+ UUID(rid)
2540
+ except ValueError:
2541
+ raise ValueError(f"resource id filter '{rid}' should be a valid UUID")
2542
+
2543
+ if len(parts) > 1:
2544
+ field_type = parts[1]
2545
+ try:
2546
+ FieldTypeName.from_abbreviation(field_type)
2547
+ except KeyError: # pragma: no cover
2548
+ raise ValueError(
2549
+ f"resource filter {v} has an invalid field type: {field_type}",
2550
+ )