nucliadb-models 6.8.1.post4983__py3-none-any.whl → 6.10.0.post5694__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb-models might be problematic. Click here for more details.

Files changed (34) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +359 -0
  3. nucliadb_models/common.py +66 -57
  4. nucliadb_models/configuration.py +9 -9
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +30 -29
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +5 -20
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +78 -74
  12. nucliadb_models/graph/requests.py +38 -47
  13. nucliadb_models/hydration.py +423 -0
  14. nucliadb_models/internal/predict.py +7 -9
  15. nucliadb_models/internal/shards.py +2 -3
  16. nucliadb_models/labels.py +18 -11
  17. nucliadb_models/link.py +18 -19
  18. nucliadb_models/metadata.py +80 -53
  19. nucliadb_models/notifications.py +3 -3
  20. nucliadb_models/processing.py +1 -2
  21. nucliadb_models/resource.py +85 -102
  22. nucliadb_models/retrieval.py +147 -0
  23. nucliadb_models/search.py +360 -306
  24. nucliadb_models/security.py +2 -3
  25. nucliadb_models/text.py +7 -8
  26. nucliadb_models/trainset.py +1 -2
  27. nucliadb_models/utils.py +2 -3
  28. nucliadb_models/vectors.py +2 -5
  29. nucliadb_models/writer.py +56 -57
  30. {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/METADATA +2 -3
  31. nucliadb_models-6.10.0.post5694.dist-info/RECORD +41 -0
  32. nucliadb_models-6.8.1.post4983.dist-info/RECORD +0 -38
  33. {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/WHEEL +0 -0
  34. {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/top_level.txt +0 -0
nucliadb_models/search.py CHANGED
@@ -14,19 +14,18 @@
14
14
  #
15
15
  import json
16
16
  from enum import Enum
17
- from typing import Any, Literal, Optional, Union
17
+ from typing import Annotated, Any, Literal
18
18
 
19
19
  from pydantic import BaseModel, Field, field_validator, model_validator
20
20
  from pydantic.aliases import AliasChoices
21
21
  from pydantic.json_schema import SkipJsonSchema
22
- from typing_extensions import Annotated, Self
22
+ from typing_extensions import Self
23
23
 
24
24
  from nucliadb_models import RelationMetadata
25
25
  from nucliadb_models.common import FieldTypeName, ParamDefault
26
26
  from nucliadb_models.graph.requests import GraphPathQuery
27
27
 
28
28
  # Bw/c import to avoid breaking users
29
- # noqa isort: skip
30
29
  from nucliadb_models.metadata import RelationNodeType, RelationType, ResourceProcessingStatus
31
30
  from nucliadb_models.resource import ExtractedDataTypeName, Resource
32
31
  from nucliadb_models.security import RequestSecurity
@@ -79,8 +78,9 @@ ANSWER_JSON_SCHEMA_EXAMPLE = {
79
78
  class ModelParamDefaults:
80
79
  applied_autofilters = ParamDefault(
81
80
  default=[],
82
- title="Autofilters",
83
- description="List of filters automatically applied to the search query",
81
+ title="Applied autofilters",
82
+ description="[deprecated] list of filters automatically applied to the search query",
83
+ deprecated=True,
84
84
  )
85
85
 
86
86
 
@@ -151,12 +151,12 @@ FacetsResult = dict[str, Any]
151
151
 
152
152
 
153
153
  class TextPosition(BaseModel):
154
- page_number: Optional[int] = None
154
+ page_number: int | None = None
155
155
  index: int
156
156
  start: int
157
157
  end: int
158
- start_seconds: Optional[list[int]] = None
159
- end_seconds: Optional[list[int]] = None
158
+ start_seconds: list[int] | None = None
159
+ end_seconds: list[int] | None = None
160
160
 
161
161
 
162
162
  class Sentence(BaseModel):
@@ -165,8 +165,8 @@ class Sentence(BaseModel):
165
165
  text: str
166
166
  field_type: str
167
167
  field: str
168
- index: Optional[str] = None
169
- position: Optional[TextPosition] = None
168
+ index: str | None = None
169
+ position: TextPosition | None = None
170
170
 
171
171
 
172
172
  class Sentences(BaseModel):
@@ -176,7 +176,7 @@ class Sentences(BaseModel):
176
176
  page_size: int = 20
177
177
  min_score: float = Field(
178
178
  title="Minimum score",
179
- description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.", # noqa: E501
179
+ description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
180
180
  )
181
181
 
182
182
 
@@ -187,45 +187,45 @@ class Paragraph(BaseModel):
187
187
  field: str
188
188
  text: str
189
189
  labels: list[str] = []
190
- start_seconds: Optional[list[int]] = None
191
- end_seconds: Optional[list[int]] = None
192
- position: Optional[TextPosition] = None
190
+ start_seconds: list[int] | None = None
191
+ end_seconds: list[int] | None = None
192
+ position: TextPosition | None = None
193
193
  fuzzy_result: bool = False
194
194
 
195
195
 
196
196
  class Paragraphs(BaseModel):
197
197
  results: list[Paragraph] = []
198
- facets: Optional[FacetsResult] = None
199
- query: Optional[str] = None
198
+ facets: FacetsResult | None = None
199
+ query: str | None = Field(default=None, title="Paragraphs Query")
200
200
  total: int = 0
201
201
  page_number: int = 0
202
202
  page_size: int = 20
203
203
  next_page: bool = False
204
204
  min_score: float = Field(
205
205
  title="Minimum score",
206
- description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.", # noqa: E501
206
+ description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
207
207
  )
208
208
 
209
209
 
210
210
  class ResourceResult(BaseModel):
211
- score: Union[float, int]
211
+ score: float | int
212
212
  rid: str
213
213
  field_type: str
214
214
  field: str
215
- labels: Optional[list[str]] = None
215
+ labels: list[str] | None = None
216
216
 
217
217
 
218
218
  class Resources(BaseModel):
219
219
  results: list[ResourceResult]
220
- facets: Optional[FacetsResult] = None
221
- query: Optional[str] = None
220
+ facets: FacetsResult | None = None
221
+ query: str | None = Field(default=None, title="Resources Query")
222
222
  total: int = 0
223
223
  page_number: int = 0
224
224
  page_size: int = 20
225
225
  next_page: bool = False
226
226
  min_score: float = Field(
227
227
  title="Minimum score",
228
- description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.", # noqa: E501
228
+ description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
229
229
  )
230
230
 
231
231
 
@@ -245,7 +245,7 @@ class DirectionalRelation(BaseModel):
245
245
  relation: RelationType
246
246
  relation_label: str
247
247
  direction: RelationDirection
248
- metadata: Optional[RelationMetadata] = None
248
+ metadata: RelationMetadata | None = None
249
249
  resource_id: str
250
250
 
251
251
 
@@ -279,23 +279,25 @@ class RelatedEntities(BaseModel):
279
279
  class ResourceSearchResults(JsonBaseModel):
280
280
  """Search on resource results"""
281
281
 
282
- sentences: Optional[Sentences] = None
283
- paragraphs: Optional[Paragraphs] = None
284
- relations: Optional[Relations] = None
285
- nodes: Optional[list[dict[str, str]]] = None
286
- shards: Optional[list[str]] = None
282
+ sentences: Sentences | None = None
283
+ paragraphs: Paragraphs | None = None
284
+ relations: Relations | None = None
285
+ nodes: list[dict[str, str]] | None = None
286
+ shards: list[str] | None = None
287
287
 
288
288
 
289
289
  class KnowledgeboxSearchResults(JsonBaseModel):
290
290
  """Search on knowledgebox results"""
291
291
 
292
292
  resources: dict[str, Resource] = {}
293
- sentences: Optional[Sentences] = None
294
- paragraphs: Optional[Paragraphs] = None
295
- fulltext: Optional[Resources] = None
296
- relations: Optional[Relations] = None
297
- nodes: Optional[list[dict[str, str]]] = None
298
- shards: Optional[list[str]] = None
293
+ sentences: Sentences | None = None
294
+ paragraphs: Paragraphs | None = None
295
+ fulltext: Resources | None = None
296
+ relations: Relations | None = None
297
+ nodes: list[dict[str, str]] | None = None
298
+ shards: list[str] | None = None
299
+
300
+ # TODO: remove on a future major release
299
301
  autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
300
302
 
301
303
 
@@ -303,16 +305,16 @@ class CatalogResponse(BaseModel):
303
305
  """Catalog results"""
304
306
 
305
307
  resources: dict[str, Resource] = {}
306
- fulltext: Optional[Resources] = None
307
- shards: Optional[list[str]] = None
308
+ fulltext: Resources | None = None
309
+ shards: list[str] | None = None
308
310
 
309
311
 
310
312
  class KnowledgeboxSuggestResults(JsonBaseModel):
311
313
  """Suggest on resource results"""
312
314
 
313
- paragraphs: Optional[Paragraphs] = None
314
- entities: Optional[RelatedEntities] = None
315
- shards: Optional[list[str]] = None
315
+ paragraphs: Paragraphs | None = None
316
+ entities: RelatedEntities | None = None
317
+ shards: list[str] | None = None
316
318
 
317
319
 
318
320
  class KnowledgeboxCounters(BaseModel):
@@ -320,7 +322,7 @@ class KnowledgeboxCounters(BaseModel):
320
322
  paragraphs: int
321
323
  fields: int
322
324
  sentences: int
323
- shards: Optional[list[str]] = None
325
+ shards: list[str] | None = None
324
326
  index_size: float = Field(default=0.0, title="Index size (bytes)")
325
327
 
326
328
 
@@ -344,10 +346,12 @@ SortOrderMap = {
344
346
 
345
347
  class SortOptions(BaseModel):
346
348
  field: SortField
347
- limit: Optional[int] = Field(None, gt=0)
348
349
  order: SortOrder = SortOrder.DESC
349
350
 
350
351
 
352
+ MAX_RANK_FUSION_WINDOW = 500
353
+
354
+
351
355
  class RankFusionName(str, Enum):
352
356
  RECIPROCAL_RANK_FUSION = "rrf"
353
357
 
@@ -373,13 +377,13 @@ class ReciprocalRankFusion(_BaseRankFusion):
373
377
  k: float = Field(
374
378
  default=60.0,
375
379
  title="RRF k parameter",
376
- description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets", # noqa: E501
380
+ description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
377
381
  )
378
- window: Optional[int] = Field(
382
+ window: int | None = Field(
379
383
  default=None,
380
- le=500,
384
+ le=MAX_RANK_FUSION_WINDOW,
381
385
  title="RRF window",
382
- description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time", # noqa: E501
386
+ description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
383
387
  )
384
388
  boosting: ReciprocalRankFusionWeights = Field(
385
389
  default_factory=ReciprocalRankFusionWeights,
@@ -390,12 +394,12 @@ Define different weights for each retriever. This allows to assign different pri
390
394
  The default is 1 for each retriever, which means no extra boost for any of them. Weights below 0 can be used for negative boosting.
391
395
 
392
396
  This kind of boosting can be useful in multilingual search, for example, where keyword search may not give good results and can degrade the final search experience
393
- """, # noqa: E501
397
+ """,
394
398
  )
395
399
 
396
400
 
397
401
  RankFusion = Annotated[
398
- Union[ReciprocalRankFusion],
402
+ ReciprocalRankFusion,
399
403
  Field(discriminator="name"),
400
404
  ]
401
405
 
@@ -430,15 +434,15 @@ class _BaseReranker(BaseModel):
430
434
 
431
435
  class PredictReranker(_BaseReranker):
432
436
  name: Literal[RerankerName.PREDICT_RERANKER] = RerankerName.PREDICT_RERANKER
433
- window: Optional[int] = Field(
437
+ window: int | None = Field(
434
438
  default=None,
435
439
  le=200,
436
440
  title="Reranker window",
437
- description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k", # noqa: E501
441
+ description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
438
442
  )
439
443
 
440
444
 
441
- Reranker = Annotated[Union[PredictReranker], Field(discriminator="name")]
445
+ Reranker = Annotated[PredictReranker, Field(discriminator="name")]
442
446
 
443
447
 
444
448
  class KnowledgeBoxCount(BaseModel):
@@ -467,25 +471,20 @@ class SearchParamDefaults:
467
471
  )
468
472
  filters = ParamDefault(
469
473
  default=[],
470
- title="Filters",
471
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
474
+ title="Search Filters",
475
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
472
476
  )
473
477
  resource_filters = ParamDefault(
474
478
  default=[],
475
479
  title="Resources filter",
476
- description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.", # noqa: E501
480
+ description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
477
481
  )
478
482
  faceted = ParamDefault(
479
483
  default=[],
480
484
  title="Faceted",
481
- description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
485
+ description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
482
486
  max_items=50,
483
487
  )
484
- autofilter = ParamDefault(
485
- default=False,
486
- title="Automatic search filtering",
487
- description="If set to true, the search will automatically add filters to the query. For example, it will filter results containing the entities detected in the query", # noqa: E501
488
- )
489
488
  chat_query = ParamDefault(
490
489
  default=...,
491
490
  title="Query",
@@ -505,19 +504,27 @@ class SearchParamDefaults:
505
504
  )
506
505
  top_k = ParamDefault(
507
506
  default=20,
507
+ gt=-1,
508
508
  le=200,
509
509
  title="Top k",
510
510
  description="The number of results search should return. The maximum number of results allowed is 200.",
511
511
  )
512
+ offset = ParamDefault(
513
+ default=0,
514
+ gt=-1,
515
+ le=1000,
516
+ title="Results offset",
517
+ description="The number of results to skip, starting from the beginning in sort order. Used for pagination. It can only be used with the keyword and fulltext indexes.",
518
+ )
512
519
  highlight = ParamDefault(
513
520
  default=False,
514
521
  title="Highlight",
515
- description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags", # noqa: E501
522
+ description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
516
523
  )
517
524
  with_duplicates = ParamDefault(
518
525
  default=False,
519
526
  title="With duplicate paragraphs",
520
- description="Whether to return duplicate paragraphs on the same document", # noqa: E501
527
+ description="Whether to return duplicate paragraphs on the same document",
521
528
  )
522
529
  with_status = ParamDefault(
523
530
  default=None,
@@ -527,19 +534,13 @@ class SearchParamDefaults:
527
534
  with_synonyms = ParamDefault(
528
535
  default=False,
529
536
  title="With custom synonyms",
530
- description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.", # noqa: E501
537
+ description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
531
538
  )
532
539
  sort_order = ParamDefault(
533
540
  default=SortOrder.DESC,
534
541
  title="Sort order",
535
542
  description="Order to sort results with",
536
543
  )
537
- sort_limit = ParamDefault(
538
- default=None,
539
- title="Sort limit",
540
- description="",
541
- gt=0,
542
- )
543
544
  sort_field = ParamDefault(
544
545
  default=None,
545
546
  title="Sort field",
@@ -563,12 +564,12 @@ class SearchParamDefaults:
563
564
  reranker = ParamDefault(
564
565
  default=RerankerName.PREDICT_RERANKER,
565
566
  title="Reranker",
566
- description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval", # noqa: E501
567
+ description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
567
568
  )
568
569
  debug = ParamDefault(
569
570
  default=False,
570
571
  title="Debug mode",
571
- description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.", # noqa: E501
572
+ description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
572
573
  )
573
574
  show = ParamDefault(
574
575
  default=[ResourceProperties.BASIC],
@@ -589,27 +590,27 @@ class SearchParamDefaults:
589
590
  range_creation_start = ParamDefault(
590
591
  default=None,
591
592
  title="Resource creation range start",
592
- description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
593
+ description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
593
594
  )
594
595
  range_creation_end = ParamDefault(
595
596
  default=None,
596
597
  title="Resource creation range end",
597
- description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
598
+ description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
598
599
  )
599
600
  range_modification_start = ParamDefault(
600
601
  default=None,
601
602
  title="Resource modification range start",
602
- description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
603
+ description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
603
604
  )
604
605
  range_modification_end = ParamDefault(
605
606
  default=None,
606
607
  title="Resource modification range end",
607
- description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.", # noqa: E501
608
+ description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
608
609
  )
609
610
  vector = ParamDefault(
610
611
  default=None,
611
612
  title="Search Vector",
612
- description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.", # noqa: E501
613
+ description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
613
614
  )
614
615
  vectorset = ParamDefault(
615
616
  default=None,
@@ -625,12 +626,12 @@ class SearchParamDefaults:
625
626
  chat_history = ParamDefault(
626
627
  default=None,
627
628
  title="Chat history",
628
- description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.", # noqa: E501
629
+ description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
629
630
  )
630
631
  chat_features = ParamDefault(
631
632
  default=[ChatOptions.SEMANTIC, ChatOptions.KEYWORD],
632
633
  title="Chat features",
633
- description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead", # noqa: E501
634
+ description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
634
635
  )
635
636
  suggest_features = ParamDefault(
636
637
  default=[
@@ -643,17 +644,17 @@ class SearchParamDefaults:
643
644
  security = ParamDefault(
644
645
  default=None,
645
646
  title="Security",
646
- description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.", # noqa: E501
647
+ description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
647
648
  )
648
649
  security_groups = ParamDefault(
649
650
  default=[],
650
651
  title="Security groups",
651
- description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.", # noqa: E501
652
+ description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
652
653
  )
653
654
  rephrase = ParamDefault(
654
655
  default=False,
655
656
  title="Rephrase query consuming LLMs",
656
- description="Rephrase query consuming LLMs - it will make the query slower", # noqa: E501
657
+ description="Rephrase query consuming LLMs - it will make the query slower",
657
658
  )
658
659
  prefer_markdown = ParamDefault(
659
660
  default=False,
@@ -693,10 +694,10 @@ class SearchParamDefaults:
693
694
 
694
695
 
695
696
  class Filter(BaseModel):
696
- all: Optional[list[str]] = Field(default=None, min_length=1)
697
- any: Optional[list[str]] = Field(default=None, min_length=1)
698
- none: Optional[list[str]] = Field(default=None, min_length=1)
699
- not_all: Optional[list[str]] = Field(default=None, min_length=1)
697
+ all: list[str] | None = Field(default=None, min_length=1)
698
+ any: list[str] | None = Field(default=None, min_length=1)
699
+ none: list[str] | None = Field(default=None, min_length=1)
700
+ not_all: list[str] | None = Field(default=None, min_length=1)
700
701
 
701
702
  @model_validator(mode="after")
702
703
  def validate_filter(self) -> Self:
@@ -738,19 +739,19 @@ class CatalogQuery(BaseModel):
738
739
 
739
740
 
740
741
  class CatalogRequest(BaseModel):
741
- query: Union[str, CatalogQuery] = ParamDefault(
742
+ query: str | CatalogQuery = ParamDefault(
742
743
  default="",
743
- title="Query",
744
+ title="Catalog Request Query",
744
745
  description="The query to search for",
745
746
  ).to_pydantic_field()
746
- filter_expression: Optional[CatalogFilterExpression] = (
747
+ filter_expression: CatalogFilterExpression | None = (
747
748
  SearchParamDefaults.catalog_filter_expression.to_pydantic_field()
748
749
  )
749
750
  faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
750
- sort: Optional[SortOptions] = SearchParamDefaults.sort.to_pydantic_field()
751
+ sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
751
752
  page_number: int = SearchParamDefaults.catalog_page_number.to_pydantic_field()
752
753
  page_size: int = SearchParamDefaults.catalog_page_size.to_pydantic_field()
753
- hidden: Optional[bool] = SearchParamDefaults.hidden.to_pydantic_field()
754
+ hidden: bool | None = SearchParamDefaults.hidden.to_pydantic_field()
754
755
  show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field(
755
756
  default=[ResourceProperties.BASIC, ResourceProperties.ERRORS]
756
757
  )
@@ -758,32 +759,30 @@ class CatalogRequest(BaseModel):
758
759
  debug: SkipJsonSchema[bool] = SearchParamDefaults.debug.to_pydantic_field()
759
760
 
760
761
  # Deprecated filter parameters
761
- filters: Union[list[str], list[Filter]] = Field(
762
+ filters: list[str] | list[Filter] = Field(
762
763
  default=[],
763
- title="Filters",
764
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
764
+ title="Catalog Filters",
765
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
765
766
  deprecated="Use filter_expression instead",
766
767
  )
767
- with_status: Optional[ResourceProcessingStatus] = Field(
768
+ with_status: ResourceProcessingStatus | None = Field(
768
769
  default=None,
769
770
  title="With processing status",
770
771
  description="Filter results by resource processing status",
771
772
  deprecated="Use filter_expression instead",
772
773
  )
773
- range_creation_start: Optional[DateTime] = (
774
- SearchParamDefaults.range_creation_start.to_pydantic_field(
775
- deprecated="Use filter_expression instead",
776
- )
774
+ range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field(
775
+ deprecated="Use filter_expression instead",
777
776
  )
778
- range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field(
777
+ range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field(
779
778
  deprecated="Use filter_expression instead",
780
779
  )
781
- range_modification_start: Optional[DateTime] = (
780
+ range_modification_start: DateTime | None = (
782
781
  SearchParamDefaults.range_modification_start.to_pydantic_field(
783
782
  deprecated="Use filter_expression instead",
784
783
  )
785
784
  )
786
- range_modification_end: Optional[DateTime] = (
785
+ range_modification_end: DateTime | None = (
787
786
  SearchParamDefaults.range_modification_end.to_pydantic_field(
788
787
  deprecated="Use filter_expression instead",
789
788
  )
@@ -796,15 +795,15 @@ class CatalogRequest(BaseModel):
796
795
 
797
796
 
798
797
  class MinScore(BaseModel):
799
- semantic: Optional[float] = Field(
798
+ semantic: float | None = Field(
800
799
  default=None,
801
800
  title="Minimum semantic score",
802
- description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
801
+ description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
803
802
  )
804
803
  bm25: float = Field(
805
804
  default=0,
806
805
  title="Minimum bm25 score",
807
- description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score", # noqa: E501
806
+ description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
808
807
  ge=0,
809
808
  )
810
809
 
@@ -818,7 +817,7 @@ AUDIT_METADATA_MAX_BYTES = 1024 * 10 # 10KB
818
817
 
819
818
 
820
819
  class AuditMetadataBase(BaseModel):
821
- audit_metadata: Optional[dict[str, str]] = Field(
820
+ audit_metadata: dict[str, str] | None = Field(
822
821
  default=None,
823
822
  title="Audit metadata",
824
823
  description=(
@@ -842,29 +841,27 @@ class AuditMetadataBase(BaseModel):
842
841
 
843
842
  class BaseSearchRequest(AuditMetadataBase):
844
843
  query: str = SearchParamDefaults.query.to_pydantic_field()
845
- filter_expression: Optional[FilterExpression] = (
844
+ filter_expression: FilterExpression | None = (
846
845
  SearchParamDefaults.filter_expression.to_pydantic_field()
847
846
  )
848
847
  fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
849
- filters: Union[list[str], list[Filter]] = Field(
848
+ filters: list[str] | list[Filter] = Field(
850
849
  default=[],
851
- title="Filters",
852
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
850
+ title="Search Filters",
851
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
853
852
  )
854
853
  top_k: int = SearchParamDefaults.top_k.to_pydantic_field()
855
- min_score: Optional[Union[float, MinScore]] = Field(
854
+ min_score: float | MinScore | None = Field(
856
855
  default=None,
857
856
  title="Minimum score",
858
- description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.", # noqa: E501
857
+ description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
859
858
  )
860
- range_creation_start: Optional[DateTime] = (
861
- SearchParamDefaults.range_creation_start.to_pydantic_field()
862
- )
863
- range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
864
- range_modification_start: Optional[DateTime] = (
859
+ range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
860
+ range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
861
+ range_modification_start: DateTime | None = (
865
862
  SearchParamDefaults.range_modification_start.to_pydantic_field()
866
863
  )
867
- range_modification_end: Optional[DateTime] = (
864
+ range_modification_end: DateTime | None = (
868
865
  SearchParamDefaults.range_modification_end.to_pydantic_field()
869
866
  )
870
867
  debug: bool = SearchParamDefaults.debug.to_pydantic_field()
@@ -872,13 +869,15 @@ class BaseSearchRequest(AuditMetadataBase):
872
869
  show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
873
870
  field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
874
871
  extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
875
- vector: Optional[list[float]] = SearchParamDefaults.vector.to_pydantic_field()
876
- vectorset: Optional[str] = SearchParamDefaults.vectorset.to_pydantic_field()
872
+ vector: list[float] | None = SearchParamDefaults.vector.to_pydantic_field()
873
+ vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
877
874
  with_duplicates: bool = SearchParamDefaults.with_duplicates.to_pydantic_field()
878
875
  with_synonyms: bool = SearchParamDefaults.with_synonyms.to_pydantic_field()
879
- autofilter: bool = SearchParamDefaults.autofilter.to_pydantic_field()
876
+ # autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
877
+ # avoid breaking changes in the python sdks. Please remove on a future major release.
878
+ autofilter: SkipJsonSchema[bool] = False
880
879
  resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
881
- security: Optional[RequestSecurity] = SearchParamDefaults.security.to_pydantic_field()
880
+ security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
882
881
  show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
883
882
 
884
883
  rephrase: bool = Field(
@@ -888,7 +887,7 @@ class BaseSearchRequest(AuditMetadataBase):
888
887
  ),
889
888
  )
890
889
 
891
- rephrase_prompt: Optional[str] = Field(
890
+ rephrase_prompt: str | None = Field(
892
891
  default=None,
893
892
  title="Rephrase",
894
893
  description=(
@@ -907,7 +906,7 @@ Please return ONLY the question without any explanation. Just the rephrased ques
907
906
  Please return ONLY the question without any explanation.""",
908
907
  ],
909
908
  )
910
- query_image: Optional[Image] = Field(
909
+ query_image: Image | None = Field(
911
910
  default=None,
912
911
  title="Query image",
913
912
  description="Image that will be used together with the query text for retrieval.",
@@ -937,13 +936,33 @@ class SearchRequest(BaseSearchRequest):
937
936
  ]
938
937
  )
939
938
  faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
940
- sort: Optional[SortOptions] = SearchParamDefaults.sort.to_pydantic_field()
939
+ sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
940
+ offset: int = SearchParamDefaults.offset.to_pydantic_field()
941
941
 
942
942
  @field_validator("faceted")
943
943
  @classmethod
944
944
  def nested_facets_not_supported(cls, facets):
945
945
  return validate_facets(facets)
946
946
 
947
+ @model_validator(mode="after")
948
+ def offset_sort_only_on_keyword_indexes(self):
949
+ has_non_keyword_indexes = set(self.features) & {SearchOptions.SEMANTIC, SearchOptions.RELATIONS}
950
+ if has_non_keyword_indexes:
951
+ if self.offset > 0:
952
+ raise ValueError("offset cannot be used with the semantic or relations index")
953
+ if self.sort and self.sort.field != SortField.SCORE:
954
+ raise ValueError("sort by date cannot be used with the semantic or relations index")
955
+
956
+ return self
957
+
958
+ @field_validator("sort", mode="after")
959
+ @classmethod
960
+ def sorting_by_title_not_supported(cls, value: SortOptions | None) -> SortOptions | None:
961
+ if value and value.field == SortField.TITLE:
962
+ raise ValueError("sorting by title not supported in /search")
963
+
964
+ return value
965
+
947
966
 
948
967
  class Author(str, Enum):
949
968
  NUCLIA = "NUCLIA"
@@ -964,19 +983,19 @@ class UserPrompt(BaseModel):
964
983
 
965
984
 
966
985
  class MaxTokens(BaseModel):
967
- context: Optional[int] = Field(
986
+ context: int | None = Field(
968
987
  default=None,
969
988
  title="Maximum context tokens",
970
989
  description="Use to limit the amount of tokens used in the LLM context",
971
990
  )
972
- answer: Optional[int] = Field(
991
+ answer: int | None = Field(
973
992
  default=None,
974
993
  title="Maximum answer tokens",
975
994
  description="Use to limit the amount of tokens used in the LLM answer",
976
995
  )
977
996
 
978
997
 
979
- def parse_max_tokens(max_tokens: Optional[Union[int, MaxTokens]]) -> Optional[MaxTokens]:
998
+ def parse_max_tokens(max_tokens: int | MaxTokens | None) -> MaxTokens | None:
980
999
  if isinstance(max_tokens, int):
981
1000
  # If the max_tokens is an integer, it is interpreted as the max_tokens value for the generated answer.
982
1001
  # The max tokens for the context is set to None to use the default value for the model (comes in the
@@ -1008,6 +1027,12 @@ class Reasoning(BaseModel):
1008
1027
  )
1009
1028
 
1010
1029
 
1030
+ class CitationsType(str, Enum):
1031
+ NONE = "none"
1032
+ DEFAULT = "default"
1033
+ LLM_FOOTNOTES = "llm_footnotes"
1034
+
1035
+
1011
1036
  class ChatModel(BaseModel):
1012
1037
  """
1013
1038
  This is the model for the predict request payload on the chat endpoint
@@ -1016,7 +1041,7 @@ class ChatModel(BaseModel):
1016
1041
  question: str = Field(description="Question to ask the generative model")
1017
1042
  user_id: str
1018
1043
  retrieval: bool = True
1019
- system: Optional[str] = Field(
1044
+ system: str | None = Field(
1020
1045
  default=None,
1021
1046
  title="System prompt",
1022
1047
  description="Optional system prompt input by the user",
@@ -1025,9 +1050,9 @@ class ChatModel(BaseModel):
1025
1050
  default={},
1026
1051
  description="The information retrieval context for the current query",
1027
1052
  )
1028
- query_context_order: Optional[dict[str, int]] = Field(
1053
+ query_context_order: dict[str, int] | None = Field(
1029
1054
  default=None,
1030
- description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model", # noqa: E501
1055
+ description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
1031
1056
  )
1032
1057
  chat_history: list[ChatContextMessage] = Field(
1033
1058
  default=[], description="The chat conversation history"
@@ -1036,23 +1061,29 @@ class ChatModel(BaseModel):
1036
1061
  default=True,
1037
1062
  description="Truncate the chat context in case it doesn't fit the generative input",
1038
1063
  )
1039
- user_prompt: Optional[UserPrompt] = Field(
1064
+ user_prompt: UserPrompt | None = Field(
1040
1065
  default=None, description="Optional custom prompt input by the user"
1041
1066
  )
1042
- citations: bool = Field(default=False, description="Whether to include the citations in the answer")
1043
- citation_threshold: Optional[float] = Field(
1067
+ citations: bool | None | CitationsType = Field(
1044
1068
  default=None,
1045
- description="If citations is True, this sets the similarity threshold (0 to 1) for paragraphs to be included as citations. Lower values result in more citations. If not provided, Nuclia's default threshold is used.", # noqa: E501
1069
+ description="Whether to include citations in the response. "
1070
+ "If set to None or False, no citations will be computed. "
1071
+ "If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
1072
+ "If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
1073
+ )
1074
+ citation_threshold: float | None = Field(
1075
+ default=None,
1076
+ description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
1046
1077
  ge=0.0,
1047
1078
  le=1.0,
1048
1079
  )
1049
- generative_model: Optional[str] = Field(
1080
+ generative_model: str | None = Field(
1050
1081
  default=None,
1051
1082
  title="Generative model",
1052
- description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1083
+ description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
1053
1084
  )
1054
1085
 
1055
- max_tokens: Optional[int] = Field(default=None, description="Maximum characters to generate")
1086
+ max_tokens: int | None = Field(default=None, description="Maximum characters to generate")
1056
1087
 
1057
1088
  query_context_images: dict[str, Image] = Field(
1058
1089
  default={},
@@ -1063,7 +1094,7 @@ class ChatModel(BaseModel):
1063
1094
  default=False,
1064
1095
  description="If set to true, the response will be in markdown format",
1065
1096
  )
1066
- json_schema: Optional[dict[str, Any]] = Field(
1097
+ json_schema: dict[str, Any] | None = Field(
1067
1098
  default=None,
1068
1099
  description="The JSON schema to use for the generative model answers",
1069
1100
  )
@@ -1071,17 +1102,18 @@ class ChatModel(BaseModel):
1071
1102
  default=False,
1072
1103
  description="Whether to reorder the query context based on a reranker",
1073
1104
  )
1074
- top_k: Optional[int] = Field(default=None, description="Number of best elements to get from")
1105
+ top_k: int | None = Field(default=None, description="Number of best elements to get from")
1075
1106
 
1076
1107
  format_prompt: bool = Field(
1077
1108
  default=True,
1078
- description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively", # noqa: E501
1109
+ description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
1079
1110
  )
1080
- seed: Optional[int] = Field(
1111
+ seed: int | None = Field(
1081
1112
  default=None,
1082
1113
  description="Seed use for the generative model for a deterministic output.",
1083
1114
  )
1084
- reasoning: Union[Reasoning, bool] = Field(
1115
+ reasoning: Reasoning | bool = Field(
1116
+ title="Reasoning options",
1085
1117
  default=False,
1086
1118
  description=(
1087
1119
  "Reasoning options for the generative model. "
@@ -1095,26 +1127,25 @@ class RephraseModel(BaseModel):
1095
1127
  chat_history: list[ChatContextMessage] = []
1096
1128
  user_id: str
1097
1129
  user_context: list[str] = []
1098
- generative_model: Optional[str] = Field(
1130
+ generative_model: str | None = Field(
1099
1131
  default=None,
1100
1132
  title="Generative model",
1101
- description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1133
+ description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
1102
1134
  )
1103
- chat_history_relevance_threshold: Optional[
1135
+ chat_history_relevance_threshold: (
1104
1136
  Annotated[
1105
1137
  float,
1106
1138
  Field(
1107
1139
  ge=0.0,
1108
1140
  le=1.0,
1109
- description=(
1110
- "Threshold to determine if the past chat history is relevant to rephrase the user's question. "
1111
- "0 - Always treat previous messages as relevant (always rephrase)."
1112
- "1 Always treat previous messages as irrelevant (never rephrase)."
1113
- "Values in between adjust the sensitivity."
1114
- ),
1141
+ description="Threshold to determine if the past chat history is relevant to rephrase the user's question. "
1142
+ "0 - Always treat previous messages as relevant (always rephrase)."
1143
+ "1 - Always treat previous messages as irrelevant (never rephrase)."
1144
+ "Values in between adjust the sensitivity.",
1115
1145
  ),
1116
1146
  ]
1117
- ] = None
1147
+ | None
1148
+ ) = None
1118
1149
 
1119
1150
 
1120
1151
  class RagStrategyName:
@@ -1158,7 +1189,7 @@ ALLOWED_FIELD_TYPES: dict[str, str] = {
1158
1189
  "t": "text",
1159
1190
  "f": "file",
1160
1191
  "u": "link",
1161
- "d": "datetime",
1192
+ "c": "conversation",
1162
1193
  "a": "generic",
1163
1194
  }
1164
1195
 
@@ -1166,16 +1197,19 @@ ALLOWED_FIELD_TYPES: dict[str, str] = {
1166
1197
  class FieldExtensionStrategy(RagStrategy):
1167
1198
  name: Literal["field_extension"] = "field_extension"
1168
1199
  fields: list[str] = Field(
1200
+ default=[],
1169
1201
  title="Fields",
1170
- description="List of field ids to extend the context with. It will try to extend the retrieval context with the specified fields in the matching resources. The field ids have to be in the format `{field_type}/{field_name}`, like 'a/title', 'a/summary' for title and summary fields or 't/amend' for a text field named 'amend'.", # noqa: E501
1171
- min_length=1,
1202
+ description="List of field ids to extend the context with. It will try to extend the retrieval context with the specified fields in the matching resources. The field ids have to be in the format `{field_type}/{field_name}`, like 'a/title', 'a/summary' for title and summary fields or 't/amend' for a text field named 'amend'.",
1203
+ )
1204
+ data_augmentation_field_prefixes: list[str] = Field(
1205
+ default=[],
1206
+ description="List of prefixes for data augmentation added fields to extend the context with. For example, if the prefix is 'simpson', all fields that are a result of data augmentation with that prefix will be used to extend the context.",
1172
1207
  )
1173
1208
 
1174
- @field_validator("fields", mode="after")
1175
- @classmethod
1176
- def fields_validator(cls, fields) -> Self:
1209
+ @model_validator(mode="after")
1210
+ def field_extension_strategy_validator(self) -> Self:
1177
1211
  # Check that the fields are in the format {field_type}/{field_name}
1178
- for field in fields:
1212
+ for field in self.fields:
1179
1213
  try:
1180
1214
  field_type, _ = field.strip("/").split("/")
1181
1215
  except ValueError:
@@ -1188,21 +1222,20 @@ class FieldExtensionStrategy(RagStrategy):
1188
1222
  f"Field '{field}' does not have a valid field type. "
1189
1223
  f"Valid field types are: {allowed_field_types_part}."
1190
1224
  )
1191
-
1192
- return fields
1225
+ return self
1193
1226
 
1194
1227
 
1195
1228
  class FullResourceApplyTo(BaseModel):
1196
1229
  exclude: list[str] = Field(
1197
1230
  default_factory=list,
1198
1231
  title="Labels to exclude from full resource expansion",
1199
- description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens", # noqa: E501
1232
+ description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
1200
1233
  )
1201
1234
 
1202
1235
 
1203
1236
  class FullResourceStrategy(RagStrategy):
1204
1237
  name: Literal["full_resource"] = "full_resource"
1205
- count: Optional[int] = Field(
1238
+ count: int | None = Field(
1206
1239
  default=None,
1207
1240
  title="Count",
1208
1241
  description="Maximum number of full documents to retrieve. If not specified, all matching documents are retrieved.",
@@ -1213,7 +1246,7 @@ class FullResourceStrategy(RagStrategy):
1213
1246
  title="Include remaining text blocks",
1214
1247
  description="Whether to include the remaining text blocks after the maximum number of resources has been reached.",
1215
1248
  )
1216
- apply_to: Optional[FullResourceApplyTo] = Field(
1249
+ apply_to: FullResourceApplyTo | None = Field(
1217
1250
  default=None,
1218
1251
  title="Apply to certain resources only",
1219
1252
  description="Define which resources to exclude from serialization",
@@ -1227,6 +1260,7 @@ class HierarchyResourceStrategy(RagStrategy):
1227
1260
  title="Count",
1228
1261
  description="Number of extra characters that are added to each matching paragraph when adding to the context.",
1229
1262
  ge=0,
1263
+ le=1024,
1230
1264
  )
1231
1265
 
1232
1266
 
@@ -1318,7 +1352,7 @@ class PreQuery(BaseModel):
1318
1352
  ),
1319
1353
  ge=0,
1320
1354
  )
1321
- id: Optional[str] = Field(
1355
+ id: str | None = Field(
1322
1356
  default=None,
1323
1357
  title="Prequery id",
1324
1358
  min_length=1,
@@ -1452,10 +1486,10 @@ class TableImageStrategy(ImageRagStrategy):
1452
1486
 
1453
1487
  class PageImageStrategy(ImageRagStrategy):
1454
1488
  name: Literal["page_image"] = "page_image"
1455
- count: Optional[int] = Field(
1489
+ count: int | None = Field(
1456
1490
  default=None,
1457
1491
  title="Count",
1458
- description="Maximum number of images to retrieve from the page. By default, at most 5 images are retrieved.",
1492
+ description="Maximum number of page images to retrieve. By default, at most 5 images are retrieved.",
1459
1493
  )
1460
1494
 
1461
1495
 
@@ -1464,20 +1498,18 @@ class ParagraphImageStrategy(ImageRagStrategy):
1464
1498
 
1465
1499
 
1466
1500
  RagStrategies = Annotated[
1467
- Union[
1468
- FieldExtensionStrategy,
1469
- FullResourceStrategy,
1470
- HierarchyResourceStrategy,
1471
- NeighbouringParagraphsStrategy,
1472
- MetadataExtensionStrategy,
1473
- ConversationalStrategy,
1474
- PreQueriesStrategy,
1475
- GraphStrategy,
1476
- ],
1501
+ FieldExtensionStrategy
1502
+ | FullResourceStrategy
1503
+ | HierarchyResourceStrategy
1504
+ | NeighbouringParagraphsStrategy
1505
+ | MetadataExtensionStrategy
1506
+ | ConversationalStrategy
1507
+ | PreQueriesStrategy
1508
+ | GraphStrategy,
1477
1509
  Field(discriminator="name"),
1478
1510
  ]
1479
1511
  RagImagesStrategies = Annotated[
1480
- Union[PageImageStrategy, ParagraphImageStrategy, TableImageStrategy],
1512
+ PageImageStrategy | ParagraphImageStrategy | TableImageStrategy,
1481
1513
  Field(discriminator="name"),
1482
1514
  ]
1483
1515
  PromptContext = dict[str, str]
@@ -1486,10 +1518,10 @@ PromptContextImages = dict[str, Image]
1486
1518
 
1487
1519
 
1488
1520
  class CustomPrompt(BaseModel):
1489
- system: Optional[str] = Field(
1521
+ system: str | None = Field(
1490
1522
  default=None,
1491
1523
  title="System prompt",
1492
- description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.", # noqa: E501
1524
+ description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
1493
1525
  min_length=1,
1494
1526
  examples=[
1495
1527
  "You are a medical assistant, use medical terminology",
@@ -1498,10 +1530,10 @@ class CustomPrompt(BaseModel):
1498
1530
  "You are a financial expert, use correct terms",
1499
1531
  ],
1500
1532
  )
1501
- user: Optional[str] = Field(
1533
+ user: str | None = Field(
1502
1534
  default=None,
1503
1535
  title="User prompt",
1504
- description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.", # noqa: E501
1536
+ description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
1505
1537
  min_length=1,
1506
1538
  examples=[
1507
1539
  "Taking into account our previous conversation, and this context: {context} answer this {question}",
@@ -1510,7 +1542,7 @@ class CustomPrompt(BaseModel):
1510
1542
  "Given this context: {context}. Answer this {question} using the provided context. Please, answer always in French",
1511
1543
  ],
1512
1544
  )
1513
- rephrase: Optional[str] = Field(
1545
+ rephrase: str | None = Field(
1514
1546
  default=None,
1515
1547
  title="Rephrase",
1516
1548
  description=(
@@ -1540,23 +1572,23 @@ class AskRequest(AuditMetadataBase):
1540
1572
  le=200,
1541
1573
  description="The top most relevant results to fetch at the retrieval step. The maximum number of results allowed is 200.",
1542
1574
  )
1543
- filter_expression: Optional[FilterExpression] = (
1575
+ filter_expression: FilterExpression | None = (
1544
1576
  SearchParamDefaults.filter_expression.to_pydantic_field()
1545
1577
  )
1546
1578
  fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
1547
- filters: Union[list[str], list[Filter]] = Field(
1579
+ filters: list[str] | list[Filter] = Field(
1548
1580
  default=[],
1549
- title="Filters",
1550
- description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
1581
+ title="Search Filters",
1582
+ description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
1551
1583
  )
1552
- keyword_filters: Union[list[str], list[Filter]] = Field(
1584
+ keyword_filters: list[str] | list[Filter] = Field(
1553
1585
  default=[],
1554
1586
  title="Keyword filters",
1555
1587
  description=(
1556
1588
  "List of keyword filter expressions to apply to the retrieval step. "
1557
1589
  "The text block search will only be performed on the documents that contain the specified keywords. "
1558
1590
  "The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
1559
- "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters" # noqa: E501
1591
+ "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
1560
1592
  ),
1561
1593
  examples=[
1562
1594
  ["NLP", "BERT"],
@@ -1564,69 +1596,72 @@ class AskRequest(AuditMetadataBase):
1564
1596
  ["Friedrich Nietzsche", "Immanuel Kant"],
1565
1597
  ],
1566
1598
  )
1567
- vectorset: Optional[str] = SearchParamDefaults.vectorset.to_pydantic_field()
1568
- min_score: Optional[Union[float, MinScore]] = Field(
1599
+ vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
1600
+ min_score: float | MinScore | None = Field(
1569
1601
  default=None,
1570
1602
  title="Minimum score",
1571
- description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.", # noqa: E501
1603
+ description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
1572
1604
  )
1573
1605
  features: list[ChatOptions] = SearchParamDefaults.chat_features.to_pydantic_field()
1574
- range_creation_start: Optional[DateTime] = (
1575
- SearchParamDefaults.range_creation_start.to_pydantic_field()
1576
- )
1577
- range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
1578
- range_modification_start: Optional[DateTime] = (
1606
+ range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
1607
+ range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
1608
+ range_modification_start: DateTime | None = (
1579
1609
  SearchParamDefaults.range_modification_start.to_pydantic_field()
1580
1610
  )
1581
- range_modification_end: Optional[DateTime] = (
1611
+ range_modification_end: DateTime | None = (
1582
1612
  SearchParamDefaults.range_modification_end.to_pydantic_field()
1583
1613
  )
1584
1614
  show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
1585
1615
  field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
1586
1616
  extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
1587
- context: Optional[list[ChatContextMessage]] = SearchParamDefaults.chat_context.to_pydantic_field()
1588
- chat_history: Optional[list[ChatContextMessage]] = (
1589
- SearchParamDefaults.chat_history.to_pydantic_field()
1590
- )
1591
- extra_context: Optional[list[str]] = Field(
1617
+ context: list[ChatContextMessage] | None = SearchParamDefaults.chat_context.to_pydantic_field()
1618
+ chat_history: list[ChatContextMessage] | None = SearchParamDefaults.chat_history.to_pydantic_field()
1619
+ extra_context: list[str] | None = Field(
1592
1620
  default=None,
1593
1621
  title="Extra query context",
1594
1622
  description="""Additional context that is added to the retrieval context sent to the LLM.
1595
1623
  It allows extending the chat feature with content that may not be in the Knowledge Box.""",
1596
1624
  )
1597
- extra_context_images: Optional[list[Image]] = Field(
1625
+ extra_context_images: list[Image] | None = Field(
1598
1626
  default=None,
1599
1627
  title="Extra query context images",
1600
1628
  description="""Additional images added to the retrieval context sent to the LLM."
1601
1629
  It allows extending the chat feature with content that may not be in the Knowledge Box.""",
1602
1630
  )
1603
- query_image: Optional[Image] = Field(
1631
+ query_image: Image | None = Field(
1604
1632
  default=None,
1605
1633
  title="Query image",
1606
1634
  description="Image that will be used together with the query text for retrieval and then sent to the LLM as part of the context. "
1607
1635
  "If a query image is provided, the `extra_context_images` and `rag_images_strategies` will be disabled.",
1608
1636
  )
1609
- autofilter: bool = SearchParamDefaults.autofilter.to_pydantic_field()
1637
+
1638
+ # autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
1639
+ # avoid breaking changes in the python sdks. Please remove on a future major release.
1640
+ autofilter: SkipJsonSchema[bool] = False
1641
+
1610
1642
  highlight: bool = SearchParamDefaults.highlight.to_pydantic_field()
1611
1643
  resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
1612
- prompt: Optional[Union[str, CustomPrompt]] = Field(
1644
+ prompt: str | CustomPrompt | None = Field(
1613
1645
  default=None,
1614
1646
  title="Prompts",
1615
- description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.", # noqa: E501
1647
+ description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
1616
1648
  )
1617
- rank_fusion: Union[RankFusionName, RankFusion] = SearchParamDefaults.rank_fusion.to_pydantic_field()
1618
- reranker: Union[RerankerName, Reranker] = SearchParamDefaults.reranker.to_pydantic_field()
1619
- citations: bool = Field(
1620
- default=False,
1621
- description="Whether to include the citations for the answer in the response",
1649
+ rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
1650
+ reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
1651
+ citations: bool | None | CitationsType = Field(
1652
+ default=None,
1653
+ description="Whether to include citations in the response. "
1654
+ "If set to None or False, no citations will be computed. "
1655
+ "If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
1656
+ "If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
1622
1657
  )
1623
- citation_threshold: Optional[float] = Field(
1658
+ citation_threshold: float | None = Field(
1624
1659
  default=None,
1625
- description="If citations is True, this sets the similarity threshold (0 to 1) for paragraphs to be included as citations. Lower values result in more citations. If not provided, Nuclia's default threshold is used.",
1660
+ description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
1626
1661
  ge=0.0,
1627
1662
  le=1.0,
1628
1663
  )
1629
- security: Optional[RequestSecurity] = SearchParamDefaults.security.to_pydantic_field()
1664
+ security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
1630
1665
  show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
1631
1666
  rag_strategies: list[RagStrategies] = Field(
1632
1667
  default=[],
@@ -1691,21 +1726,21 @@ If empty, the default strategy is used, which simply adds the text of the matchi
1691
1726
  )
1692
1727
  debug: bool = SearchParamDefaults.debug.to_pydantic_field()
1693
1728
 
1694
- generative_model: Optional[str] = Field(
1729
+ generative_model: str | None = Field(
1695
1730
  default=None,
1696
1731
  title="Generative model",
1697
- description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1732
+ description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
1698
1733
  )
1699
- generative_model_seed: Optional[int] = Field(
1734
+ generative_model_seed: int | None = Field(
1700
1735
  default=None,
1701
1736
  title="Seed for the generative model",
1702
1737
  description="The seed to use for the generative model for deterministic generation. Only supported by some models.",
1703
1738
  )
1704
1739
 
1705
- max_tokens: Optional[Union[int, MaxTokens]] = Field(
1740
+ max_tokens: int | MaxTokens | None = Field(
1706
1741
  default=None,
1707
1742
  title="Maximum LLM tokens to use for the request",
1708
- description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.", # noqa: E501
1743
+ description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
1709
1744
  )
1710
1745
 
1711
1746
  rephrase: bool = Field(
@@ -1714,7 +1749,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
1714
1749
  "Rephrase the query for a more efficient retrieval. This will consume LLM tokens and make the request slower."
1715
1750
  ),
1716
1751
  )
1717
- chat_history_relevance_threshold: Optional[float] = Field(
1752
+ chat_history_relevance_threshold: float | None = Field(
1718
1753
  default=None,
1719
1754
  ge=0.0,
1720
1755
  le=1.0,
@@ -1732,7 +1767,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
1732
1767
  description="If set to true, the response will be in markdown format",
1733
1768
  )
1734
1769
 
1735
- answer_json_schema: Optional[dict[str, Any]] = Field(
1770
+ answer_json_schema: dict[str, Any] | None = Field(
1736
1771
  default=None,
1737
1772
  title="Answer JSON schema",
1738
1773
  description="""Desired JSON schema for the LLM answer.
@@ -1748,13 +1783,14 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
1748
1783
  description="Whether to generate an answer using the generative model. If set to false, the response will only contain the retrieval results.",
1749
1784
  )
1750
1785
 
1751
- search_configuration: Optional[str] = Field(
1786
+ search_configuration: str | None = Field(
1752
1787
  default=None,
1753
1788
  description="Load ask parameters from this configuration. Parameters in the request override parameters from the configuration.",
1754
1789
  )
1755
1790
 
1756
- reasoning: Union[Reasoning, bool] = Field(
1791
+ reasoning: Reasoning | bool = Field(
1757
1792
  default=False,
1793
+ title="Reasoning options",
1758
1794
  description=(
1759
1795
  "Reasoning options for the generative model. "
1760
1796
  "Set to True to enable default reasoning, False to disable, or provide a Reasoning object for custom options."
@@ -1835,8 +1871,8 @@ class SummarizeModel(BaseModel):
1835
1871
  """
1836
1872
 
1837
1873
  resources: dict[str, SummarizeResourceModel] = {}
1838
- generative_model: Optional[str] = None
1839
- user_prompt: Optional[str] = None
1874
+ generative_model: str | None = None
1875
+ user_prompt: str | None = None
1840
1876
  summary_kind: SummaryKind = SummaryKind.SIMPLE
1841
1877
 
1842
1878
 
@@ -1845,13 +1881,13 @@ class SummarizeRequest(BaseModel):
1845
1881
  Model for the request payload of the summarize endpoint
1846
1882
  """
1847
1883
 
1848
- generative_model: Optional[str] = Field(
1884
+ generative_model: str | None = Field(
1849
1885
  default=None,
1850
1886
  title="Generative model",
1851
- description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.", # noqa: E501
1887
+ description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
1852
1888
  )
1853
1889
 
1854
- user_prompt: Optional[str] = Field(
1890
+ user_prompt: str | None = Field(
1855
1891
  default=None,
1856
1892
  title="User prompt",
1857
1893
  description="Optional custom prompt input by the user",
@@ -1862,7 +1898,7 @@ class SummarizeRequest(BaseModel):
1862
1898
  min_length=1,
1863
1899
  max_length=100,
1864
1900
  title="Resources",
1865
- description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.", # noqa: E501
1901
+ description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
1866
1902
  )
1867
1903
 
1868
1904
  summary_kind: SummaryKind = Field(
@@ -1888,20 +1924,20 @@ class SummarizedResponse(BaseModel):
1888
1924
  title="Summary",
1889
1925
  description="Global summary of all resources combined.",
1890
1926
  )
1891
- consumption: Optional[Consumption] = None
1927
+ consumption: Consumption | None = None
1892
1928
 
1893
1929
 
1894
1930
  class KnowledgeGraphEntity(BaseModel):
1895
1931
  name: str
1896
- type: Optional[RelationNodeType] = None
1897
- subtype: Optional[str] = None
1932
+ type: RelationNodeType | None = None
1933
+ subtype: str | None = None
1898
1934
 
1899
1935
 
1900
1936
  class FindRequest(BaseSearchRequest):
1901
- query_entities: SkipJsonSchema[Optional[list[KnowledgeGraphEntity]]] = Field(
1937
+ query_entities: SkipJsonSchema[list[KnowledgeGraphEntity] | None] = Field(
1902
1938
  default=None, title="Query entities", description="Entities to use in a knowledge graph search"
1903
1939
  )
1904
- graph_query: Optional[GraphPathQuery] = Field(
1940
+ graph_query: GraphPathQuery | None = Field(
1905
1941
  default=None,
1906
1942
  title="Graph query",
1907
1943
  description="Query for the knowledge graph. Paths (node-relation-node) extracted from a paragraph_id will be used to extend the results",
@@ -1912,17 +1948,17 @@ class FindRequest(BaseSearchRequest):
1912
1948
  FindOptions.SEMANTIC,
1913
1949
  ]
1914
1950
  )
1915
- rank_fusion: Union[RankFusionName, RankFusion] = SearchParamDefaults.rank_fusion.to_pydantic_field()
1916
- reranker: Union[RerankerName, Reranker] = SearchParamDefaults.reranker.to_pydantic_field()
1951
+ rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
1952
+ reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
1917
1953
 
1918
- keyword_filters: Union[list[str], list[Filter]] = Field(
1954
+ keyword_filters: list[str] | list[Filter] = Field(
1919
1955
  default=[],
1920
1956
  title="Keyword filters",
1921
1957
  description=(
1922
1958
  "List of keyword filter expressions to apply to the retrieval step. "
1923
1959
  "The text block search will only be performed on the documents that contain the specified keywords. "
1924
1960
  "The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
1925
- "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters" # noqa: E501
1961
+ "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
1926
1962
  ),
1927
1963
  examples=[
1928
1964
  ["NLP", "BERT"],
@@ -1931,11 +1967,11 @@ class FindRequest(BaseSearchRequest):
1931
1967
  ],
1932
1968
  )
1933
1969
 
1934
- search_configuration: Optional[str] = Field(
1970
+ search_configuration: str | None = Field(
1935
1971
  default=None,
1936
1972
  description="Load find parameters from this configuration. Parameters in the request override parameters from the configuration.",
1937
1973
  )
1938
- generative_model: Optional[str] = Field(
1974
+ generative_model: str | None = Field(
1939
1975
  default=None,
1940
1976
  title="Generative model",
1941
1977
  description="The generative model used to rephrase the query. If not provided, the model configured for the Knowledge Box is used.",
@@ -1969,9 +2005,9 @@ class SCORE_TYPE(str, Enum):
1969
2005
 
1970
2006
 
1971
2007
  class FindTextPosition(BaseModel):
1972
- page_number: Optional[int] = None
1973
- start_seconds: Optional[list[int]] = None
1974
- end_seconds: Optional[list[int]] = None
2008
+ page_number: int | None = None
2009
+ start_seconds: list[int] | None = None
2010
+ end_seconds: list[int] | None = None
1975
2011
  index: int
1976
2012
  start: int
1977
2013
  end: int
@@ -1983,15 +2019,15 @@ class FindParagraph(BaseModel):
1983
2019
  order: int = Field(default=0, ge=0)
1984
2020
  text: str
1985
2021
  id: str
1986
- labels: Optional[list[str]] = []
1987
- position: Optional[TextPosition] = None
2022
+ labels: list[str] | None = []
2023
+ position: TextPosition | None = None
1988
2024
  fuzzy_result: bool = False
1989
2025
  page_with_visual: bool = Field(
1990
2026
  default=False,
1991
2027
  title="Page where this paragraph belongs is a visual page",
1992
2028
  description="This flag informs if the page may have information that has not been extracted",
1993
2029
  )
1994
- reference: Optional[str] = Field(
2030
+ reference: str | None = Field(
1995
2031
  default=None,
1996
2032
  title="Reference to the image that represents this text",
1997
2033
  description="Reference to the extracted image that represents this paragraph",
@@ -2001,7 +2037,7 @@ class FindParagraph(BaseModel):
2001
2037
  title="Is a table",
2002
2038
  description="The referenced image of the paragraph is a table",
2003
2039
  )
2004
- relevant_relations: Optional[Relations] = Field(
2040
+ relevant_relations: Relations | None = Field(
2005
2041
  default=None,
2006
2042
  title="Relevant relations",
2007
2043
  description="Relevant relations from which the paragraph was found, will only be filled if using the Graph RAG Strategy",
@@ -2016,17 +2052,19 @@ class FindResource(Resource):
2016
2052
  fields: dict[str, FindField]
2017
2053
 
2018
2054
  def updated_from(self, origin: Resource):
2055
+ find_resource_model_fields = self.model_fields.keys()
2019
2056
  for key in origin.model_fields.keys():
2020
- self.__setattr__(key, getattr(origin, key))
2057
+ if key in find_resource_model_fields:
2058
+ self.__setattr__(key, getattr(origin, key))
2021
2059
 
2022
2060
 
2023
2061
  class KnowledgeboxFindResults(JsonBaseModel):
2024
2062
  """Find on knowledgebox results"""
2025
2063
 
2026
2064
  resources: dict[str, FindResource]
2027
- relations: Optional[Relations] = None
2028
- query: Optional[str] = None
2029
- rephrased_query: Optional[str] = None
2065
+ relations: Relations | None = None
2066
+ query: str | None = Field(default=None, title="Find Results Query")
2067
+ rephrased_query: str | None = None
2030
2068
  total: int = 0
2031
2069
  page_number: int = Field(
2032
2070
  default=0,
@@ -2040,18 +2078,18 @@ class KnowledgeboxFindResults(JsonBaseModel):
2040
2078
  default=False,
2041
2079
  description="Pagination will be deprecated, please, refer to `top_k` in the request",
2042
2080
  )
2043
- nodes: Optional[list[dict[str, str]]] = Field(
2081
+ nodes: list[dict[str, str]] | None = Field(
2044
2082
  default=None,
2045
2083
  title="Nodes",
2046
2084
  description="List of nodes queried in the search",
2047
2085
  )
2048
- shards: Optional[list[str]] = Field(
2086
+ shards: list[str] | None = Field(
2049
2087
  default=None,
2050
2088
  title="Shards",
2051
2089
  description="The list of shard replica ids used for the search.",
2052
2090
  )
2053
2091
  autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
2054
- min_score: Optional[Union[float, MinScore]] = Field(
2092
+ min_score: float | MinScore | None = Field(
2055
2093
  default=MinScore(),
2056
2094
  title="Minimum result score",
2057
2095
  description="The minimum scores that have been used for the search operation.",
@@ -2059,9 +2097,9 @@ class KnowledgeboxFindResults(JsonBaseModel):
2059
2097
  best_matches: list[str] = Field(
2060
2098
  default=[],
2061
2099
  title="Best matches",
2062
- description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).", # noqa: E501
2100
+ description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
2063
2101
  )
2064
- metrics: Optional[dict[str, Any]] = Field(
2102
+ metrics: dict[str, Any] | None = Field(
2065
2103
  default=None,
2066
2104
  title="Metrics",
2067
2105
  description=(
@@ -2079,15 +2117,15 @@ class FeedbackTasks(str, Enum):
2079
2117
  class FeedbackRequest(BaseModel):
2080
2118
  ident: str = Field(
2081
2119
  title="Request identifier",
2082
- description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.", # noqa: E501
2120
+ description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
2083
2121
  )
2084
2122
  good: bool = Field(title="Good", description="Whether the result was good or not")
2085
2123
  task: FeedbackTasks = Field(
2086
2124
  title="Task",
2087
2125
  description="The task the feedback is for. For now, only `CHAT` task is available",
2088
2126
  )
2089
- feedback: Optional[str] = Field(None, title="Feedback", description="Feedback text")
2090
- text_block_id: Optional[str] = Field(None, title="Text block", description="Text block id")
2127
+ feedback: str | None = Field(None, title="Feedback", description="Feedback text")
2128
+ text_block_id: str | None = Field(None, title="Text block", description="Text block id")
2091
2129
 
2092
2130
 
2093
2131
  def validate_facets(facets):
@@ -2138,13 +2176,11 @@ class AugmentedTextBlock(BaseModel):
2138
2176
  text: str = Field(
2139
2177
  description="The text of the augmented text block. It may include additional metadata to enrich the context"
2140
2178
  )
2141
- position: Optional[TextPosition] = Field(
2179
+ position: TextPosition | None = Field(
2142
2180
  default=None,
2143
2181
  description="Metadata about the position of the text block in the original document.",
2144
2182
  )
2145
- parent: Optional[str] = Field(
2146
- default=None, description="The parent text block that was augmented for."
2147
- )
2183
+ parent: str | None = Field(default=None, description="The parent text block that was augmented for.")
2148
2184
  augmentation_type: TextBlockAugmentationType = Field(description="Type of augmentation.")
2149
2185
 
2150
2186
 
@@ -2168,12 +2204,12 @@ class AskTokens(BaseModel):
2168
2204
  title="Output tokens",
2169
2205
  description="Number of LLM tokens used for the answer",
2170
2206
  )
2171
- input_nuclia: Optional[float] = Field(
2207
+ input_nuclia: float | None = Field(
2172
2208
  title="Input Nuclia tokens",
2173
2209
  description="Number of Nuclia LLM tokens used for the context in the query",
2174
2210
  default=None,
2175
2211
  )
2176
- output_nuclia: Optional[float] = Field(
2212
+ output_nuclia: float | None = Field(
2177
2213
  title="Output Nuclia tokens",
2178
2214
  description="Number of Nuclia LLM tokens used for the answer",
2179
2215
  default=None,
@@ -2181,12 +2217,12 @@ class AskTokens(BaseModel):
2181
2217
 
2182
2218
 
2183
2219
  class AskTimings(BaseModel):
2184
- generative_first_chunk: Optional[float] = Field(
2220
+ generative_first_chunk: float | None = Field(
2185
2221
  default=None,
2186
2222
  title="Generative first chunk",
2187
2223
  description="Time the LLM took to generate the first chunk of the answer",
2188
2224
  )
2189
- generative_total: Optional[float] = Field(
2225
+ generative_total: float | None = Field(
2190
2226
  default=None,
2191
2227
  title="Generative total",
2192
2228
  description="Total time the LLM took to generate the answer",
@@ -2194,12 +2230,12 @@ class AskTimings(BaseModel):
2194
2230
 
2195
2231
 
2196
2232
  class SyncAskMetadata(BaseModel):
2197
- tokens: Optional[AskTokens] = Field(
2233
+ tokens: AskTokens | None = Field(
2198
2234
  default=None,
2199
2235
  title="Tokens",
2200
2236
  description="Number of tokens used in the LLM context and answer",
2201
2237
  )
2202
- timings: Optional[AskTimings] = Field(
2238
+ timings: AskTimings | None = Field(
2203
2239
  default=None,
2204
2240
  title="Timings",
2205
2241
  description="Timings of the generative model",
@@ -2218,19 +2254,19 @@ class SyncAskResponse(BaseModel):
2218
2254
  title="Answer",
2219
2255
  description="The generative answer to the query",
2220
2256
  )
2221
- reasoning: Optional[str] = Field(
2257
+ reasoning: str | None = Field(
2222
2258
  default=None,
2223
- title="Reasoning",
2224
- description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.", # noqa: E501
2259
+ title="Reasoning steps",
2260
+ description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
2225
2261
  )
2226
- answer_json: Optional[dict[str, Any]] = Field(
2262
+ answer_json: dict[str, Any] | None = Field(
2227
2263
  default=None,
2228
2264
  title="Answer JSON",
2229
- description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.", # noqa: E501
2265
+ description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
2230
2266
  )
2231
2267
  status: str = Field(
2232
2268
  title="Status",
2233
- description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'", # noqa: E501
2269
+ description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
2234
2270
  )
2235
2271
  retrieval_results: KnowledgeboxFindResults = Field(
2236
2272
  title="Retrieval results",
@@ -2241,7 +2277,7 @@ class SyncAskResponse(BaseModel):
2241
2277
  title="Retrieval best matches",
2242
2278
  description="Sorted list of best matching text blocks in the retrieval step. This includes the main query and prequeries results, if any.",
2243
2279
  )
2244
- prequeries: Optional[dict[str, KnowledgeboxFindResults]] = Field(
2280
+ prequeries: dict[str, KnowledgeboxFindResults] | None = Field(
2245
2281
  default=None,
2246
2282
  title="Prequeries",
2247
2283
  description="The retrieval results of the prequeries",
@@ -2249,41 +2285,46 @@ class SyncAskResponse(BaseModel):
2249
2285
  learning_id: str = Field(
2250
2286
  default="",
2251
2287
  title="Learning id",
2252
- description="The id of the learning request. This id can be used to provide feedback on the learning process.", # noqa: E501
2288
+ description="The id of the learning request. This id can be used to provide feedback on the learning process.",
2253
2289
  )
2254
- relations: Optional[Relations] = Field(
2290
+ relations: Relations | None = Field(
2255
2291
  default=None,
2256
2292
  title="Relations",
2257
2293
  description="The detected relations of the answer",
2258
2294
  )
2259
2295
  citations: dict[str, Any] = Field(
2260
- default={},
2296
+ default_factory=dict,
2261
2297
  title="Citations",
2262
2298
  description="The citations of the answer. List of references to the resources used to generate the answer.",
2263
2299
  )
2264
- augmented_context: Optional[AugmentedContext] = Field(
2300
+ citation_footnote_to_context: dict[str, str] = Field(
2301
+ default_factory=dict,
2302
+ title="Citation footnote to context",
2303
+ description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)""",
2304
+ )
2305
+ augmented_context: AugmentedContext | None = Field(
2265
2306
  default=None,
2266
2307
  description=(
2267
2308
  "Augmented text blocks that were sent to the LLM as part of the RAG strategies "
2268
2309
  "applied on the retrieval results in the request."
2269
2310
  ),
2270
2311
  )
2271
- prompt_context: Optional[list[str]] = Field(
2312
+ prompt_context: list[str] | None = Field(
2272
2313
  default=None,
2273
2314
  title="Prompt context",
2274
2315
  description="The prompt context used to generate the answer. Returned only if the debug flag is set to true",
2275
2316
  )
2276
- predict_request: Optional[dict[str, Any]] = Field(
2317
+ predict_request: dict[str, Any] | None = Field(
2277
2318
  default=None,
2278
2319
  title="Predict request",
2279
2320
  description="The internal predict request used to generate the answer. Returned only if the debug flag is set to true",
2280
2321
  )
2281
- metadata: Optional[SyncAskMetadata] = Field(
2322
+ metadata: SyncAskMetadata | None = Field(
2282
2323
  default=None,
2283
2324
  title="Metadata",
2284
- description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.", # noqa: E501
2325
+ description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
2285
2326
  )
2286
- consumption: Optional[Consumption] = Field(
2327
+ consumption: Consumption | None = Field(
2287
2328
  default=None,
2288
2329
  title="Consumption",
2289
2330
  description=(
@@ -2291,12 +2332,12 @@ class SyncAskResponse(BaseModel):
2291
2332
  " 'X-show-consumption' header is set to true in the request."
2292
2333
  ),
2293
2334
  )
2294
- error_details: Optional[str] = Field(
2335
+ error_details: str | None = Field(
2295
2336
  default=None,
2296
2337
  title="Error details",
2297
2338
  description="Error details message in case there was an error",
2298
2339
  )
2299
- debug: Optional[dict[str, Any]] = Field(
2340
+ debug: dict[str, Any] | None = Field(
2300
2341
  default=None,
2301
2342
  title="Debug information",
2302
2343
  description=(
@@ -2370,11 +2411,23 @@ class CitationsAskResponseItem(BaseModel):
2370
2411
  citations: dict[str, Any]
2371
2412
 
2372
2413
 
2414
+ class FootnoteCitationsAskResponseItem(BaseModel):
2415
+ type: Literal["footnote_citations"] = "footnote_citations"
2416
+ footnote_to_context: dict[str, str] = Field(
2417
+ description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)
2418
+ e.g.,
2419
+ { "block-AA": "f44f4e8acbfb1d48de3fd3c2fb04a885/f/f44f4e8acbfb1d48de3fd3c2fb04a885/73758-73972", ... }
2420
+ If the query_context is a list, it will map to 1-based indices as strings
2421
+ e.g., { "block-AA": "1", "block-AB": "2", ... }
2422
+ """
2423
+ )
2424
+
2425
+
2373
2426
  class StatusAskResponseItem(BaseModel):
2374
2427
  type: Literal["status"] = "status"
2375
2428
  code: str
2376
2429
  status: str
2377
- details: Optional[str] = None
2430
+ details: str | None = None
2378
2431
 
2379
2432
 
2380
2433
  class ErrorAskResponseItem(BaseModel):
@@ -2393,21 +2446,22 @@ class DebugAskResponseItem(BaseModel):
2393
2446
  metrics: dict[str, Any]
2394
2447
 
2395
2448
 
2396
- AskResponseItemType = Union[
2397
- AnswerAskResponseItem,
2398
- ReasoningAskResponseItem,
2399
- JSONAskResponseItem,
2400
- MetadataAskResponseItem,
2401
- AugmentedContextResponseItem,
2402
- CitationsAskResponseItem,
2403
- StatusAskResponseItem,
2404
- ErrorAskResponseItem,
2405
- RetrievalAskResponseItem,
2406
- RelationsAskResponseItem,
2407
- DebugAskResponseItem,
2408
- PrequeriesAskResponseItem,
2409
- ConsumptionResponseItem,
2410
- ]
2449
+ AskResponseItemType = (
2450
+ AnswerAskResponseItem
2451
+ | ReasoningAskResponseItem
2452
+ | JSONAskResponseItem
2453
+ | MetadataAskResponseItem
2454
+ | AugmentedContextResponseItem
2455
+ | CitationsAskResponseItem
2456
+ | FootnoteCitationsAskResponseItem
2457
+ | StatusAskResponseItem
2458
+ | ErrorAskResponseItem
2459
+ | RetrievalAskResponseItem
2460
+ | RelationsAskResponseItem
2461
+ | DebugAskResponseItem
2462
+ | PrequeriesAskResponseItem
2463
+ | ConsumptionResponseItem
2464
+ )
2411
2465
 
2412
2466
 
2413
2467
  class AskResponseItem(BaseModel):
@@ -2427,7 +2481,7 @@ def parse_custom_prompt(item: AskRequest) -> CustomPrompt:
2427
2481
  return prompt
2428
2482
 
2429
2483
 
2430
- def parse_rephrase_prompt(item: AskRequest) -> Optional[str]:
2484
+ def parse_rephrase_prompt(item: AskRequest) -> str | None:
2431
2485
  prompt = parse_custom_prompt(item)
2432
2486
  return prompt.rephrase
2433
2487
 
@@ -2438,7 +2492,7 @@ FindRequest.model_rebuild()
2438
2492
 
2439
2493
  class CatalogFacetsPrefix(BaseModel):
2440
2494
  prefix: str = Field(pattern="^((/[^/]+)*)$")
2441
- depth: Optional[int] = Field(
2495
+ depth: int | None = Field(
2442
2496
  default=None,
2443
2497
  ge=0,
2444
2498
  description="Only include facets up to this depth from the prefix, leave empty to include all depths",