nucliadb-models 6.8.1.post4983__py3-none-any.whl → 6.10.0.post5694__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb-models might be problematic. Click here for more details.
- nucliadb_models/agents/ingestion.py +4 -4
- nucliadb_models/augment.py +359 -0
- nucliadb_models/common.py +66 -57
- nucliadb_models/configuration.py +9 -9
- nucliadb_models/content_types.py +13 -11
- nucliadb_models/conversation.py +30 -29
- nucliadb_models/entities.py +17 -18
- nucliadb_models/external_index_providers.py +5 -20
- nucliadb_models/extracted.py +82 -83
- nucliadb_models/file.py +10 -11
- nucliadb_models/filters.py +78 -74
- nucliadb_models/graph/requests.py +38 -47
- nucliadb_models/hydration.py +423 -0
- nucliadb_models/internal/predict.py +7 -9
- nucliadb_models/internal/shards.py +2 -3
- nucliadb_models/labels.py +18 -11
- nucliadb_models/link.py +18 -19
- nucliadb_models/metadata.py +80 -53
- nucliadb_models/notifications.py +3 -3
- nucliadb_models/processing.py +1 -2
- nucliadb_models/resource.py +85 -102
- nucliadb_models/retrieval.py +147 -0
- nucliadb_models/search.py +360 -306
- nucliadb_models/security.py +2 -3
- nucliadb_models/text.py +7 -8
- nucliadb_models/trainset.py +1 -2
- nucliadb_models/utils.py +2 -3
- nucliadb_models/vectors.py +2 -5
- nucliadb_models/writer.py +56 -57
- {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/METADATA +2 -3
- nucliadb_models-6.10.0.post5694.dist-info/RECORD +41 -0
- nucliadb_models-6.8.1.post4983.dist-info/RECORD +0 -38
- {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/WHEEL +0 -0
- {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/top_level.txt +0 -0
nucliadb_models/search.py
CHANGED
|
@@ -14,19 +14,18 @@
|
|
|
14
14
|
#
|
|
15
15
|
import json
|
|
16
16
|
from enum import Enum
|
|
17
|
-
from typing import Any, Literal
|
|
17
|
+
from typing import Annotated, Any, Literal
|
|
18
18
|
|
|
19
19
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
20
20
|
from pydantic.aliases import AliasChoices
|
|
21
21
|
from pydantic.json_schema import SkipJsonSchema
|
|
22
|
-
from typing_extensions import
|
|
22
|
+
from typing_extensions import Self
|
|
23
23
|
|
|
24
24
|
from nucliadb_models import RelationMetadata
|
|
25
25
|
from nucliadb_models.common import FieldTypeName, ParamDefault
|
|
26
26
|
from nucliadb_models.graph.requests import GraphPathQuery
|
|
27
27
|
|
|
28
28
|
# Bw/c import to avoid breaking users
|
|
29
|
-
# noqa isort: skip
|
|
30
29
|
from nucliadb_models.metadata import RelationNodeType, RelationType, ResourceProcessingStatus
|
|
31
30
|
from nucliadb_models.resource import ExtractedDataTypeName, Resource
|
|
32
31
|
from nucliadb_models.security import RequestSecurity
|
|
@@ -79,8 +78,9 @@ ANSWER_JSON_SCHEMA_EXAMPLE = {
|
|
|
79
78
|
class ModelParamDefaults:
|
|
80
79
|
applied_autofilters = ParamDefault(
|
|
81
80
|
default=[],
|
|
82
|
-
title="
|
|
83
|
-
description="
|
|
81
|
+
title="Applied autofilters",
|
|
82
|
+
description="[deprecated] list of filters automatically applied to the search query",
|
|
83
|
+
deprecated=True,
|
|
84
84
|
)
|
|
85
85
|
|
|
86
86
|
|
|
@@ -151,12 +151,12 @@ FacetsResult = dict[str, Any]
|
|
|
151
151
|
|
|
152
152
|
|
|
153
153
|
class TextPosition(BaseModel):
|
|
154
|
-
page_number:
|
|
154
|
+
page_number: int | None = None
|
|
155
155
|
index: int
|
|
156
156
|
start: int
|
|
157
157
|
end: int
|
|
158
|
-
start_seconds:
|
|
159
|
-
end_seconds:
|
|
158
|
+
start_seconds: list[int] | None = None
|
|
159
|
+
end_seconds: list[int] | None = None
|
|
160
160
|
|
|
161
161
|
|
|
162
162
|
class Sentence(BaseModel):
|
|
@@ -165,8 +165,8 @@ class Sentence(BaseModel):
|
|
|
165
165
|
text: str
|
|
166
166
|
field_type: str
|
|
167
167
|
field: str
|
|
168
|
-
index:
|
|
169
|
-
position:
|
|
168
|
+
index: str | None = None
|
|
169
|
+
position: TextPosition | None = None
|
|
170
170
|
|
|
171
171
|
|
|
172
172
|
class Sentences(BaseModel):
|
|
@@ -176,7 +176,7 @@ class Sentences(BaseModel):
|
|
|
176
176
|
page_size: int = 20
|
|
177
177
|
min_score: float = Field(
|
|
178
178
|
title="Minimum score",
|
|
179
|
-
description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
|
|
179
|
+
description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
|
|
180
180
|
)
|
|
181
181
|
|
|
182
182
|
|
|
@@ -187,45 +187,45 @@ class Paragraph(BaseModel):
|
|
|
187
187
|
field: str
|
|
188
188
|
text: str
|
|
189
189
|
labels: list[str] = []
|
|
190
|
-
start_seconds:
|
|
191
|
-
end_seconds:
|
|
192
|
-
position:
|
|
190
|
+
start_seconds: list[int] | None = None
|
|
191
|
+
end_seconds: list[int] | None = None
|
|
192
|
+
position: TextPosition | None = None
|
|
193
193
|
fuzzy_result: bool = False
|
|
194
194
|
|
|
195
195
|
|
|
196
196
|
class Paragraphs(BaseModel):
|
|
197
197
|
results: list[Paragraph] = []
|
|
198
|
-
facets:
|
|
199
|
-
query:
|
|
198
|
+
facets: FacetsResult | None = None
|
|
199
|
+
query: str | None = Field(default=None, title="Paragraphs Query")
|
|
200
200
|
total: int = 0
|
|
201
201
|
page_number: int = 0
|
|
202
202
|
page_size: int = 20
|
|
203
203
|
next_page: bool = False
|
|
204
204
|
min_score: float = Field(
|
|
205
205
|
title="Minimum score",
|
|
206
|
-
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
206
|
+
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
207
207
|
)
|
|
208
208
|
|
|
209
209
|
|
|
210
210
|
class ResourceResult(BaseModel):
|
|
211
|
-
score:
|
|
211
|
+
score: float | int
|
|
212
212
|
rid: str
|
|
213
213
|
field_type: str
|
|
214
214
|
field: str
|
|
215
|
-
labels:
|
|
215
|
+
labels: list[str] | None = None
|
|
216
216
|
|
|
217
217
|
|
|
218
218
|
class Resources(BaseModel):
|
|
219
219
|
results: list[ResourceResult]
|
|
220
|
-
facets:
|
|
221
|
-
query:
|
|
220
|
+
facets: FacetsResult | None = None
|
|
221
|
+
query: str | None = Field(default=None, title="Resources Query")
|
|
222
222
|
total: int = 0
|
|
223
223
|
page_number: int = 0
|
|
224
224
|
page_size: int = 20
|
|
225
225
|
next_page: bool = False
|
|
226
226
|
min_score: float = Field(
|
|
227
227
|
title="Minimum score",
|
|
228
|
-
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
228
|
+
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
229
229
|
)
|
|
230
230
|
|
|
231
231
|
|
|
@@ -245,7 +245,7 @@ class DirectionalRelation(BaseModel):
|
|
|
245
245
|
relation: RelationType
|
|
246
246
|
relation_label: str
|
|
247
247
|
direction: RelationDirection
|
|
248
|
-
metadata:
|
|
248
|
+
metadata: RelationMetadata | None = None
|
|
249
249
|
resource_id: str
|
|
250
250
|
|
|
251
251
|
|
|
@@ -279,23 +279,25 @@ class RelatedEntities(BaseModel):
|
|
|
279
279
|
class ResourceSearchResults(JsonBaseModel):
|
|
280
280
|
"""Search on resource results"""
|
|
281
281
|
|
|
282
|
-
sentences:
|
|
283
|
-
paragraphs:
|
|
284
|
-
relations:
|
|
285
|
-
nodes:
|
|
286
|
-
shards:
|
|
282
|
+
sentences: Sentences | None = None
|
|
283
|
+
paragraphs: Paragraphs | None = None
|
|
284
|
+
relations: Relations | None = None
|
|
285
|
+
nodes: list[dict[str, str]] | None = None
|
|
286
|
+
shards: list[str] | None = None
|
|
287
287
|
|
|
288
288
|
|
|
289
289
|
class KnowledgeboxSearchResults(JsonBaseModel):
|
|
290
290
|
"""Search on knowledgebox results"""
|
|
291
291
|
|
|
292
292
|
resources: dict[str, Resource] = {}
|
|
293
|
-
sentences:
|
|
294
|
-
paragraphs:
|
|
295
|
-
fulltext:
|
|
296
|
-
relations:
|
|
297
|
-
nodes:
|
|
298
|
-
shards:
|
|
293
|
+
sentences: Sentences | None = None
|
|
294
|
+
paragraphs: Paragraphs | None = None
|
|
295
|
+
fulltext: Resources | None = None
|
|
296
|
+
relations: Relations | None = None
|
|
297
|
+
nodes: list[dict[str, str]] | None = None
|
|
298
|
+
shards: list[str] | None = None
|
|
299
|
+
|
|
300
|
+
# TODO: remove on a future major release
|
|
299
301
|
autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
|
|
300
302
|
|
|
301
303
|
|
|
@@ -303,16 +305,16 @@ class CatalogResponse(BaseModel):
|
|
|
303
305
|
"""Catalog results"""
|
|
304
306
|
|
|
305
307
|
resources: dict[str, Resource] = {}
|
|
306
|
-
fulltext:
|
|
307
|
-
shards:
|
|
308
|
+
fulltext: Resources | None = None
|
|
309
|
+
shards: list[str] | None = None
|
|
308
310
|
|
|
309
311
|
|
|
310
312
|
class KnowledgeboxSuggestResults(JsonBaseModel):
|
|
311
313
|
"""Suggest on resource results"""
|
|
312
314
|
|
|
313
|
-
paragraphs:
|
|
314
|
-
entities:
|
|
315
|
-
shards:
|
|
315
|
+
paragraphs: Paragraphs | None = None
|
|
316
|
+
entities: RelatedEntities | None = None
|
|
317
|
+
shards: list[str] | None = None
|
|
316
318
|
|
|
317
319
|
|
|
318
320
|
class KnowledgeboxCounters(BaseModel):
|
|
@@ -320,7 +322,7 @@ class KnowledgeboxCounters(BaseModel):
|
|
|
320
322
|
paragraphs: int
|
|
321
323
|
fields: int
|
|
322
324
|
sentences: int
|
|
323
|
-
shards:
|
|
325
|
+
shards: list[str] | None = None
|
|
324
326
|
index_size: float = Field(default=0.0, title="Index size (bytes)")
|
|
325
327
|
|
|
326
328
|
|
|
@@ -344,10 +346,12 @@ SortOrderMap = {
|
|
|
344
346
|
|
|
345
347
|
class SortOptions(BaseModel):
|
|
346
348
|
field: SortField
|
|
347
|
-
limit: Optional[int] = Field(None, gt=0)
|
|
348
349
|
order: SortOrder = SortOrder.DESC
|
|
349
350
|
|
|
350
351
|
|
|
352
|
+
MAX_RANK_FUSION_WINDOW = 500
|
|
353
|
+
|
|
354
|
+
|
|
351
355
|
class RankFusionName(str, Enum):
|
|
352
356
|
RECIPROCAL_RANK_FUSION = "rrf"
|
|
353
357
|
|
|
@@ -373,13 +377,13 @@ class ReciprocalRankFusion(_BaseRankFusion):
|
|
|
373
377
|
k: float = Field(
|
|
374
378
|
default=60.0,
|
|
375
379
|
title="RRF k parameter",
|
|
376
|
-
description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
|
|
380
|
+
description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
|
|
377
381
|
)
|
|
378
|
-
window:
|
|
382
|
+
window: int | None = Field(
|
|
379
383
|
default=None,
|
|
380
|
-
le=
|
|
384
|
+
le=MAX_RANK_FUSION_WINDOW,
|
|
381
385
|
title="RRF window",
|
|
382
|
-
description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
|
|
386
|
+
description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
|
|
383
387
|
)
|
|
384
388
|
boosting: ReciprocalRankFusionWeights = Field(
|
|
385
389
|
default_factory=ReciprocalRankFusionWeights,
|
|
@@ -390,12 +394,12 @@ Define different weights for each retriever. This allows to assign different pri
|
|
|
390
394
|
The default is 1 for each retriever, which means no extra boost for any of them. Weights below 0 can be used for negative boosting.
|
|
391
395
|
|
|
392
396
|
This kind of boosting can be useful in multilingual search, for example, where keyword search may not give good results and can degrade the final search experience
|
|
393
|
-
""",
|
|
397
|
+
""",
|
|
394
398
|
)
|
|
395
399
|
|
|
396
400
|
|
|
397
401
|
RankFusion = Annotated[
|
|
398
|
-
|
|
402
|
+
ReciprocalRankFusion,
|
|
399
403
|
Field(discriminator="name"),
|
|
400
404
|
]
|
|
401
405
|
|
|
@@ -430,15 +434,15 @@ class _BaseReranker(BaseModel):
|
|
|
430
434
|
|
|
431
435
|
class PredictReranker(_BaseReranker):
|
|
432
436
|
name: Literal[RerankerName.PREDICT_RERANKER] = RerankerName.PREDICT_RERANKER
|
|
433
|
-
window:
|
|
437
|
+
window: int | None = Field(
|
|
434
438
|
default=None,
|
|
435
439
|
le=200,
|
|
436
440
|
title="Reranker window",
|
|
437
|
-
description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
|
|
441
|
+
description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
|
|
438
442
|
)
|
|
439
443
|
|
|
440
444
|
|
|
441
|
-
Reranker = Annotated[
|
|
445
|
+
Reranker = Annotated[PredictReranker, Field(discriminator="name")]
|
|
442
446
|
|
|
443
447
|
|
|
444
448
|
class KnowledgeBoxCount(BaseModel):
|
|
@@ -467,25 +471,20 @@ class SearchParamDefaults:
|
|
|
467
471
|
)
|
|
468
472
|
filters = ParamDefault(
|
|
469
473
|
default=[],
|
|
470
|
-
title="Filters",
|
|
471
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
474
|
+
title="Search Filters",
|
|
475
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
472
476
|
)
|
|
473
477
|
resource_filters = ParamDefault(
|
|
474
478
|
default=[],
|
|
475
479
|
title="Resources filter",
|
|
476
|
-
description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
|
|
480
|
+
description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
|
|
477
481
|
)
|
|
478
482
|
faceted = ParamDefault(
|
|
479
483
|
default=[],
|
|
480
484
|
title="Faceted",
|
|
481
|
-
description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
485
|
+
description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
482
486
|
max_items=50,
|
|
483
487
|
)
|
|
484
|
-
autofilter = ParamDefault(
|
|
485
|
-
default=False,
|
|
486
|
-
title="Automatic search filtering",
|
|
487
|
-
description="If set to true, the search will automatically add filters to the query. For example, it will filter results containing the entities detected in the query", # noqa: E501
|
|
488
|
-
)
|
|
489
488
|
chat_query = ParamDefault(
|
|
490
489
|
default=...,
|
|
491
490
|
title="Query",
|
|
@@ -505,19 +504,27 @@ class SearchParamDefaults:
|
|
|
505
504
|
)
|
|
506
505
|
top_k = ParamDefault(
|
|
507
506
|
default=20,
|
|
507
|
+
gt=-1,
|
|
508
508
|
le=200,
|
|
509
509
|
title="Top k",
|
|
510
510
|
description="The number of results search should return. The maximum number of results allowed is 200.",
|
|
511
511
|
)
|
|
512
|
+
offset = ParamDefault(
|
|
513
|
+
default=0,
|
|
514
|
+
gt=-1,
|
|
515
|
+
le=1000,
|
|
516
|
+
title="Results offset",
|
|
517
|
+
description="The number of results to skip, starting from the beginning in sort order. Used for pagination. It can only be used with the keyword and fulltext indexes.",
|
|
518
|
+
)
|
|
512
519
|
highlight = ParamDefault(
|
|
513
520
|
default=False,
|
|
514
521
|
title="Highlight",
|
|
515
|
-
description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
|
|
522
|
+
description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
|
|
516
523
|
)
|
|
517
524
|
with_duplicates = ParamDefault(
|
|
518
525
|
default=False,
|
|
519
526
|
title="With duplicate paragraphs",
|
|
520
|
-
description="Whether to return duplicate paragraphs on the same document",
|
|
527
|
+
description="Whether to return duplicate paragraphs on the same document",
|
|
521
528
|
)
|
|
522
529
|
with_status = ParamDefault(
|
|
523
530
|
default=None,
|
|
@@ -527,19 +534,13 @@ class SearchParamDefaults:
|
|
|
527
534
|
with_synonyms = ParamDefault(
|
|
528
535
|
default=False,
|
|
529
536
|
title="With custom synonyms",
|
|
530
|
-
description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
|
|
537
|
+
description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
|
|
531
538
|
)
|
|
532
539
|
sort_order = ParamDefault(
|
|
533
540
|
default=SortOrder.DESC,
|
|
534
541
|
title="Sort order",
|
|
535
542
|
description="Order to sort results with",
|
|
536
543
|
)
|
|
537
|
-
sort_limit = ParamDefault(
|
|
538
|
-
default=None,
|
|
539
|
-
title="Sort limit",
|
|
540
|
-
description="",
|
|
541
|
-
gt=0,
|
|
542
|
-
)
|
|
543
544
|
sort_field = ParamDefault(
|
|
544
545
|
default=None,
|
|
545
546
|
title="Sort field",
|
|
@@ -563,12 +564,12 @@ class SearchParamDefaults:
|
|
|
563
564
|
reranker = ParamDefault(
|
|
564
565
|
default=RerankerName.PREDICT_RERANKER,
|
|
565
566
|
title="Reranker",
|
|
566
|
-
description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
|
|
567
|
+
description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
|
|
567
568
|
)
|
|
568
569
|
debug = ParamDefault(
|
|
569
570
|
default=False,
|
|
570
571
|
title="Debug mode",
|
|
571
|
-
description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
|
|
572
|
+
description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
|
|
572
573
|
)
|
|
573
574
|
show = ParamDefault(
|
|
574
575
|
default=[ResourceProperties.BASIC],
|
|
@@ -589,27 +590,27 @@ class SearchParamDefaults:
|
|
|
589
590
|
range_creation_start = ParamDefault(
|
|
590
591
|
default=None,
|
|
591
592
|
title="Resource creation range start",
|
|
592
|
-
description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
593
|
+
description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
593
594
|
)
|
|
594
595
|
range_creation_end = ParamDefault(
|
|
595
596
|
default=None,
|
|
596
597
|
title="Resource creation range end",
|
|
597
|
-
description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
598
|
+
description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
598
599
|
)
|
|
599
600
|
range_modification_start = ParamDefault(
|
|
600
601
|
default=None,
|
|
601
602
|
title="Resource modification range start",
|
|
602
|
-
description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
603
|
+
description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
603
604
|
)
|
|
604
605
|
range_modification_end = ParamDefault(
|
|
605
606
|
default=None,
|
|
606
607
|
title="Resource modification range end",
|
|
607
|
-
description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
608
|
+
description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
608
609
|
)
|
|
609
610
|
vector = ParamDefault(
|
|
610
611
|
default=None,
|
|
611
612
|
title="Search Vector",
|
|
612
|
-
description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
|
|
613
|
+
description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
|
|
613
614
|
)
|
|
614
615
|
vectorset = ParamDefault(
|
|
615
616
|
default=None,
|
|
@@ -625,12 +626,12 @@ class SearchParamDefaults:
|
|
|
625
626
|
chat_history = ParamDefault(
|
|
626
627
|
default=None,
|
|
627
628
|
title="Chat history",
|
|
628
|
-
description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
|
|
629
|
+
description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
|
|
629
630
|
)
|
|
630
631
|
chat_features = ParamDefault(
|
|
631
632
|
default=[ChatOptions.SEMANTIC, ChatOptions.KEYWORD],
|
|
632
633
|
title="Chat features",
|
|
633
|
-
description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
|
|
634
|
+
description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
|
|
634
635
|
)
|
|
635
636
|
suggest_features = ParamDefault(
|
|
636
637
|
default=[
|
|
@@ -643,17 +644,17 @@ class SearchParamDefaults:
|
|
|
643
644
|
security = ParamDefault(
|
|
644
645
|
default=None,
|
|
645
646
|
title="Security",
|
|
646
|
-
description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
|
|
647
|
+
description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
|
|
647
648
|
)
|
|
648
649
|
security_groups = ParamDefault(
|
|
649
650
|
default=[],
|
|
650
651
|
title="Security groups",
|
|
651
|
-
description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
|
|
652
|
+
description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
|
|
652
653
|
)
|
|
653
654
|
rephrase = ParamDefault(
|
|
654
655
|
default=False,
|
|
655
656
|
title="Rephrase query consuming LLMs",
|
|
656
|
-
description="Rephrase query consuming LLMs - it will make the query slower",
|
|
657
|
+
description="Rephrase query consuming LLMs - it will make the query slower",
|
|
657
658
|
)
|
|
658
659
|
prefer_markdown = ParamDefault(
|
|
659
660
|
default=False,
|
|
@@ -693,10 +694,10 @@ class SearchParamDefaults:
|
|
|
693
694
|
|
|
694
695
|
|
|
695
696
|
class Filter(BaseModel):
|
|
696
|
-
all:
|
|
697
|
-
any:
|
|
698
|
-
none:
|
|
699
|
-
not_all:
|
|
697
|
+
all: list[str] | None = Field(default=None, min_length=1)
|
|
698
|
+
any: list[str] | None = Field(default=None, min_length=1)
|
|
699
|
+
none: list[str] | None = Field(default=None, min_length=1)
|
|
700
|
+
not_all: list[str] | None = Field(default=None, min_length=1)
|
|
700
701
|
|
|
701
702
|
@model_validator(mode="after")
|
|
702
703
|
def validate_filter(self) -> Self:
|
|
@@ -738,19 +739,19 @@ class CatalogQuery(BaseModel):
|
|
|
738
739
|
|
|
739
740
|
|
|
740
741
|
class CatalogRequest(BaseModel):
|
|
741
|
-
query:
|
|
742
|
+
query: str | CatalogQuery = ParamDefault(
|
|
742
743
|
default="",
|
|
743
|
-
title="Query",
|
|
744
|
+
title="Catalog Request Query",
|
|
744
745
|
description="The query to search for",
|
|
745
746
|
).to_pydantic_field()
|
|
746
|
-
filter_expression:
|
|
747
|
+
filter_expression: CatalogFilterExpression | None = (
|
|
747
748
|
SearchParamDefaults.catalog_filter_expression.to_pydantic_field()
|
|
748
749
|
)
|
|
749
750
|
faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
|
|
750
|
-
sort:
|
|
751
|
+
sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
|
|
751
752
|
page_number: int = SearchParamDefaults.catalog_page_number.to_pydantic_field()
|
|
752
753
|
page_size: int = SearchParamDefaults.catalog_page_size.to_pydantic_field()
|
|
753
|
-
hidden:
|
|
754
|
+
hidden: bool | None = SearchParamDefaults.hidden.to_pydantic_field()
|
|
754
755
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field(
|
|
755
756
|
default=[ResourceProperties.BASIC, ResourceProperties.ERRORS]
|
|
756
757
|
)
|
|
@@ -758,32 +759,30 @@ class CatalogRequest(BaseModel):
|
|
|
758
759
|
debug: SkipJsonSchema[bool] = SearchParamDefaults.debug.to_pydantic_field()
|
|
759
760
|
|
|
760
761
|
# Deprecated filter parameters
|
|
761
|
-
filters:
|
|
762
|
+
filters: list[str] | list[Filter] = Field(
|
|
762
763
|
default=[],
|
|
763
|
-
title="Filters",
|
|
764
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
764
|
+
title="Catalog Filters",
|
|
765
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
765
766
|
deprecated="Use filter_expression instead",
|
|
766
767
|
)
|
|
767
|
-
with_status:
|
|
768
|
+
with_status: ResourceProcessingStatus | None = Field(
|
|
768
769
|
default=None,
|
|
769
770
|
title="With processing status",
|
|
770
771
|
description="Filter results by resource processing status",
|
|
771
772
|
deprecated="Use filter_expression instead",
|
|
772
773
|
)
|
|
773
|
-
range_creation_start:
|
|
774
|
-
|
|
775
|
-
deprecated="Use filter_expression instead",
|
|
776
|
-
)
|
|
774
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field(
|
|
775
|
+
deprecated="Use filter_expression instead",
|
|
777
776
|
)
|
|
778
|
-
range_creation_end:
|
|
777
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field(
|
|
779
778
|
deprecated="Use filter_expression instead",
|
|
780
779
|
)
|
|
781
|
-
range_modification_start:
|
|
780
|
+
range_modification_start: DateTime | None = (
|
|
782
781
|
SearchParamDefaults.range_modification_start.to_pydantic_field(
|
|
783
782
|
deprecated="Use filter_expression instead",
|
|
784
783
|
)
|
|
785
784
|
)
|
|
786
|
-
range_modification_end:
|
|
785
|
+
range_modification_end: DateTime | None = (
|
|
787
786
|
SearchParamDefaults.range_modification_end.to_pydantic_field(
|
|
788
787
|
deprecated="Use filter_expression instead",
|
|
789
788
|
)
|
|
@@ -796,15 +795,15 @@ class CatalogRequest(BaseModel):
|
|
|
796
795
|
|
|
797
796
|
|
|
798
797
|
class MinScore(BaseModel):
|
|
799
|
-
semantic:
|
|
798
|
+
semantic: float | None = Field(
|
|
800
799
|
default=None,
|
|
801
800
|
title="Minimum semantic score",
|
|
802
|
-
description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
801
|
+
description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
803
802
|
)
|
|
804
803
|
bm25: float = Field(
|
|
805
804
|
default=0,
|
|
806
805
|
title="Minimum bm25 score",
|
|
807
|
-
description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
806
|
+
description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
808
807
|
ge=0,
|
|
809
808
|
)
|
|
810
809
|
|
|
@@ -818,7 +817,7 @@ AUDIT_METADATA_MAX_BYTES = 1024 * 10 # 10KB
|
|
|
818
817
|
|
|
819
818
|
|
|
820
819
|
class AuditMetadataBase(BaseModel):
|
|
821
|
-
audit_metadata:
|
|
820
|
+
audit_metadata: dict[str, str] | None = Field(
|
|
822
821
|
default=None,
|
|
823
822
|
title="Audit metadata",
|
|
824
823
|
description=(
|
|
@@ -842,29 +841,27 @@ class AuditMetadataBase(BaseModel):
|
|
|
842
841
|
|
|
843
842
|
class BaseSearchRequest(AuditMetadataBase):
|
|
844
843
|
query: str = SearchParamDefaults.query.to_pydantic_field()
|
|
845
|
-
filter_expression:
|
|
844
|
+
filter_expression: FilterExpression | None = (
|
|
846
845
|
SearchParamDefaults.filter_expression.to_pydantic_field()
|
|
847
846
|
)
|
|
848
847
|
fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
|
|
849
|
-
filters:
|
|
848
|
+
filters: list[str] | list[Filter] = Field(
|
|
850
849
|
default=[],
|
|
851
|
-
title="Filters",
|
|
852
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
850
|
+
title="Search Filters",
|
|
851
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
853
852
|
)
|
|
854
853
|
top_k: int = SearchParamDefaults.top_k.to_pydantic_field()
|
|
855
|
-
min_score:
|
|
854
|
+
min_score: float | MinScore | None = Field(
|
|
856
855
|
default=None,
|
|
857
856
|
title="Minimum score",
|
|
858
|
-
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
857
|
+
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
859
858
|
)
|
|
860
|
-
range_creation_start:
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
864
|
-
range_modification_start: Optional[DateTime] = (
|
|
859
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
860
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
861
|
+
range_modification_start: DateTime | None = (
|
|
865
862
|
SearchParamDefaults.range_modification_start.to_pydantic_field()
|
|
866
863
|
)
|
|
867
|
-
range_modification_end:
|
|
864
|
+
range_modification_end: DateTime | None = (
|
|
868
865
|
SearchParamDefaults.range_modification_end.to_pydantic_field()
|
|
869
866
|
)
|
|
870
867
|
debug: bool = SearchParamDefaults.debug.to_pydantic_field()
|
|
@@ -872,13 +869,15 @@ class BaseSearchRequest(AuditMetadataBase):
|
|
|
872
869
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
|
|
873
870
|
field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
|
|
874
871
|
extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
|
|
875
|
-
vector:
|
|
876
|
-
vectorset:
|
|
872
|
+
vector: list[float] | None = SearchParamDefaults.vector.to_pydantic_field()
|
|
873
|
+
vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
|
|
877
874
|
with_duplicates: bool = SearchParamDefaults.with_duplicates.to_pydantic_field()
|
|
878
875
|
with_synonyms: bool = SearchParamDefaults.with_synonyms.to_pydantic_field()
|
|
879
|
-
autofilter
|
|
876
|
+
# autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
|
|
877
|
+
# avoid breaking changes in the python sdks. Please remove on a future major release.
|
|
878
|
+
autofilter: SkipJsonSchema[bool] = False
|
|
880
879
|
resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
|
|
881
|
-
security:
|
|
880
|
+
security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
|
|
882
881
|
show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
|
|
883
882
|
|
|
884
883
|
rephrase: bool = Field(
|
|
@@ -888,7 +887,7 @@ class BaseSearchRequest(AuditMetadataBase):
|
|
|
888
887
|
),
|
|
889
888
|
)
|
|
890
889
|
|
|
891
|
-
rephrase_prompt:
|
|
890
|
+
rephrase_prompt: str | None = Field(
|
|
892
891
|
default=None,
|
|
893
892
|
title="Rephrase",
|
|
894
893
|
description=(
|
|
@@ -907,7 +906,7 @@ Please return ONLY the question without any explanation. Just the rephrased ques
|
|
|
907
906
|
Please return ONLY the question without any explanation.""",
|
|
908
907
|
],
|
|
909
908
|
)
|
|
910
|
-
query_image:
|
|
909
|
+
query_image: Image | None = Field(
|
|
911
910
|
default=None,
|
|
912
911
|
title="Query image",
|
|
913
912
|
description="Image that will be used together with the query text for retrieval.",
|
|
@@ -937,13 +936,33 @@ class SearchRequest(BaseSearchRequest):
|
|
|
937
936
|
]
|
|
938
937
|
)
|
|
939
938
|
faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
|
|
940
|
-
sort:
|
|
939
|
+
sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
|
|
940
|
+
offset: int = SearchParamDefaults.offset.to_pydantic_field()
|
|
941
941
|
|
|
942
942
|
@field_validator("faceted")
|
|
943
943
|
@classmethod
|
|
944
944
|
def nested_facets_not_supported(cls, facets):
|
|
945
945
|
return validate_facets(facets)
|
|
946
946
|
|
|
947
|
+
@model_validator(mode="after")
|
|
948
|
+
def offset_sort_only_on_keyword_indexes(self):
|
|
949
|
+
has_non_keyword_indexes = set(self.features) & {SearchOptions.SEMANTIC, SearchOptions.RELATIONS}
|
|
950
|
+
if has_non_keyword_indexes:
|
|
951
|
+
if self.offset > 0:
|
|
952
|
+
raise ValueError("offset cannot be used with the semantic or relations index")
|
|
953
|
+
if self.sort and self.sort.field != SortField.SCORE:
|
|
954
|
+
raise ValueError("sort by date cannot be used with the semantic or relations index")
|
|
955
|
+
|
|
956
|
+
return self
|
|
957
|
+
|
|
958
|
+
@field_validator("sort", mode="after")
|
|
959
|
+
@classmethod
|
|
960
|
+
def sorting_by_title_not_supported(cls, value: SortOptions | None) -> SortOptions | None:
|
|
961
|
+
if value and value.field == SortField.TITLE:
|
|
962
|
+
raise ValueError("sorting by title not supported in /search")
|
|
963
|
+
|
|
964
|
+
return value
|
|
965
|
+
|
|
947
966
|
|
|
948
967
|
class Author(str, Enum):
|
|
949
968
|
NUCLIA = "NUCLIA"
|
|
@@ -964,19 +983,19 @@ class UserPrompt(BaseModel):
|
|
|
964
983
|
|
|
965
984
|
|
|
966
985
|
class MaxTokens(BaseModel):
|
|
967
|
-
context:
|
|
986
|
+
context: int | None = Field(
|
|
968
987
|
default=None,
|
|
969
988
|
title="Maximum context tokens",
|
|
970
989
|
description="Use to limit the amount of tokens used in the LLM context",
|
|
971
990
|
)
|
|
972
|
-
answer:
|
|
991
|
+
answer: int | None = Field(
|
|
973
992
|
default=None,
|
|
974
993
|
title="Maximum answer tokens",
|
|
975
994
|
description="Use to limit the amount of tokens used in the LLM answer",
|
|
976
995
|
)
|
|
977
996
|
|
|
978
997
|
|
|
979
|
-
def parse_max_tokens(max_tokens:
|
|
998
|
+
def parse_max_tokens(max_tokens: int | MaxTokens | None) -> MaxTokens | None:
|
|
980
999
|
if isinstance(max_tokens, int):
|
|
981
1000
|
# If the max_tokens is an integer, it is interpreted as the max_tokens value for the generated answer.
|
|
982
1001
|
# The max tokens for the context is set to None to use the default value for the model (comes in the
|
|
@@ -1008,6 +1027,12 @@ class Reasoning(BaseModel):
|
|
|
1008
1027
|
)
|
|
1009
1028
|
|
|
1010
1029
|
|
|
1030
|
+
class CitationsType(str, Enum):
|
|
1031
|
+
NONE = "none"
|
|
1032
|
+
DEFAULT = "default"
|
|
1033
|
+
LLM_FOOTNOTES = "llm_footnotes"
|
|
1034
|
+
|
|
1035
|
+
|
|
1011
1036
|
class ChatModel(BaseModel):
|
|
1012
1037
|
"""
|
|
1013
1038
|
This is the model for the predict request payload on the chat endpoint
|
|
@@ -1016,7 +1041,7 @@ class ChatModel(BaseModel):
|
|
|
1016
1041
|
question: str = Field(description="Question to ask the generative model")
|
|
1017
1042
|
user_id: str
|
|
1018
1043
|
retrieval: bool = True
|
|
1019
|
-
system:
|
|
1044
|
+
system: str | None = Field(
|
|
1020
1045
|
default=None,
|
|
1021
1046
|
title="System prompt",
|
|
1022
1047
|
description="Optional system prompt input by the user",
|
|
@@ -1025,9 +1050,9 @@ class ChatModel(BaseModel):
|
|
|
1025
1050
|
default={},
|
|
1026
1051
|
description="The information retrieval context for the current query",
|
|
1027
1052
|
)
|
|
1028
|
-
query_context_order:
|
|
1053
|
+
query_context_order: dict[str, int] | None = Field(
|
|
1029
1054
|
default=None,
|
|
1030
|
-
description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
|
|
1055
|
+
description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
|
|
1031
1056
|
)
|
|
1032
1057
|
chat_history: list[ChatContextMessage] = Field(
|
|
1033
1058
|
default=[], description="The chat conversation history"
|
|
@@ -1036,23 +1061,29 @@ class ChatModel(BaseModel):
|
|
|
1036
1061
|
default=True,
|
|
1037
1062
|
description="Truncate the chat context in case it doesn't fit the generative input",
|
|
1038
1063
|
)
|
|
1039
|
-
user_prompt:
|
|
1064
|
+
user_prompt: UserPrompt | None = Field(
|
|
1040
1065
|
default=None, description="Optional custom prompt input by the user"
|
|
1041
1066
|
)
|
|
1042
|
-
citations: bool
|
|
1043
|
-
citation_threshold: Optional[float] = Field(
|
|
1067
|
+
citations: bool | None | CitationsType = Field(
|
|
1044
1068
|
default=None,
|
|
1045
|
-
description="
|
|
1069
|
+
description="Whether to include citations in the response. "
|
|
1070
|
+
"If set to None or False, no citations will be computed. "
|
|
1071
|
+
"If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
|
|
1072
|
+
"If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
|
|
1073
|
+
)
|
|
1074
|
+
citation_threshold: float | None = Field(
|
|
1075
|
+
default=None,
|
|
1076
|
+
description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
|
|
1046
1077
|
ge=0.0,
|
|
1047
1078
|
le=1.0,
|
|
1048
1079
|
)
|
|
1049
|
-
generative_model:
|
|
1080
|
+
generative_model: str | None = Field(
|
|
1050
1081
|
default=None,
|
|
1051
1082
|
title="Generative model",
|
|
1052
|
-
description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1083
|
+
description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1053
1084
|
)
|
|
1054
1085
|
|
|
1055
|
-
max_tokens:
|
|
1086
|
+
max_tokens: int | None = Field(default=None, description="Maximum characters to generate")
|
|
1056
1087
|
|
|
1057
1088
|
query_context_images: dict[str, Image] = Field(
|
|
1058
1089
|
default={},
|
|
@@ -1063,7 +1094,7 @@ class ChatModel(BaseModel):
|
|
|
1063
1094
|
default=False,
|
|
1064
1095
|
description="If set to true, the response will be in markdown format",
|
|
1065
1096
|
)
|
|
1066
|
-
json_schema:
|
|
1097
|
+
json_schema: dict[str, Any] | None = Field(
|
|
1067
1098
|
default=None,
|
|
1068
1099
|
description="The JSON schema to use for the generative model answers",
|
|
1069
1100
|
)
|
|
@@ -1071,17 +1102,18 @@ class ChatModel(BaseModel):
|
|
|
1071
1102
|
default=False,
|
|
1072
1103
|
description="Whether to reorder the query context based on a reranker",
|
|
1073
1104
|
)
|
|
1074
|
-
top_k:
|
|
1105
|
+
top_k: int | None = Field(default=None, description="Number of best elements to get from")
|
|
1075
1106
|
|
|
1076
1107
|
format_prompt: bool = Field(
|
|
1077
1108
|
default=True,
|
|
1078
|
-
description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
|
|
1109
|
+
description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
|
|
1079
1110
|
)
|
|
1080
|
-
seed:
|
|
1111
|
+
seed: int | None = Field(
|
|
1081
1112
|
default=None,
|
|
1082
1113
|
description="Seed use for the generative model for a deterministic output.",
|
|
1083
1114
|
)
|
|
1084
|
-
reasoning:
|
|
1115
|
+
reasoning: Reasoning | bool = Field(
|
|
1116
|
+
title="Reasoning options",
|
|
1085
1117
|
default=False,
|
|
1086
1118
|
description=(
|
|
1087
1119
|
"Reasoning options for the generative model. "
|
|
@@ -1095,26 +1127,25 @@ class RephraseModel(BaseModel):
|
|
|
1095
1127
|
chat_history: list[ChatContextMessage] = []
|
|
1096
1128
|
user_id: str
|
|
1097
1129
|
user_context: list[str] = []
|
|
1098
|
-
generative_model:
|
|
1130
|
+
generative_model: str | None = Field(
|
|
1099
1131
|
default=None,
|
|
1100
1132
|
title="Generative model",
|
|
1101
|
-
description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1133
|
+
description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1102
1134
|
)
|
|
1103
|
-
chat_history_relevance_threshold:
|
|
1135
|
+
chat_history_relevance_threshold: (
|
|
1104
1136
|
Annotated[
|
|
1105
1137
|
float,
|
|
1106
1138
|
Field(
|
|
1107
1139
|
ge=0.0,
|
|
1108
1140
|
le=1.0,
|
|
1109
|
-
description=
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
"Values in between adjust the sensitivity."
|
|
1114
|
-
),
|
|
1141
|
+
description="Threshold to determine if the past chat history is relevant to rephrase the user's question. "
|
|
1142
|
+
"0 - Always treat previous messages as relevant (always rephrase)."
|
|
1143
|
+
"1 - Always treat previous messages as irrelevant (never rephrase)."
|
|
1144
|
+
"Values in between adjust the sensitivity.",
|
|
1115
1145
|
),
|
|
1116
1146
|
]
|
|
1117
|
-
|
|
1147
|
+
| None
|
|
1148
|
+
) = None
|
|
1118
1149
|
|
|
1119
1150
|
|
|
1120
1151
|
class RagStrategyName:
|
|
@@ -1158,7 +1189,7 @@ ALLOWED_FIELD_TYPES: dict[str, str] = {
|
|
|
1158
1189
|
"t": "text",
|
|
1159
1190
|
"f": "file",
|
|
1160
1191
|
"u": "link",
|
|
1161
|
-
"
|
|
1192
|
+
"c": "conversation",
|
|
1162
1193
|
"a": "generic",
|
|
1163
1194
|
}
|
|
1164
1195
|
|
|
@@ -1166,16 +1197,19 @@ ALLOWED_FIELD_TYPES: dict[str, str] = {
|
|
|
1166
1197
|
class FieldExtensionStrategy(RagStrategy):
|
|
1167
1198
|
name: Literal["field_extension"] = "field_extension"
|
|
1168
1199
|
fields: list[str] = Field(
|
|
1200
|
+
default=[],
|
|
1169
1201
|
title="Fields",
|
|
1170
|
-
description="List of field ids to extend the context with. It will try to extend the retrieval context with the specified fields in the matching resources. The field ids have to be in the format `{field_type}/{field_name}`, like 'a/title', 'a/summary' for title and summary fields or 't/amend' for a text field named 'amend'.",
|
|
1171
|
-
|
|
1202
|
+
description="List of field ids to extend the context with. It will try to extend the retrieval context with the specified fields in the matching resources. The field ids have to be in the format `{field_type}/{field_name}`, like 'a/title', 'a/summary' for title and summary fields or 't/amend' for a text field named 'amend'.",
|
|
1203
|
+
)
|
|
1204
|
+
data_augmentation_field_prefixes: list[str] = Field(
|
|
1205
|
+
default=[],
|
|
1206
|
+
description="List of prefixes for data augmentation added fields to extend the context with. For example, if the prefix is 'simpson', all fields that are a result of data augmentation with that prefix will be used to extend the context.",
|
|
1172
1207
|
)
|
|
1173
1208
|
|
|
1174
|
-
@
|
|
1175
|
-
|
|
1176
|
-
def fields_validator(cls, fields) -> Self:
|
|
1209
|
+
@model_validator(mode="after")
|
|
1210
|
+
def field_extension_strategy_validator(self) -> Self:
|
|
1177
1211
|
# Check that the fields are in the format {field_type}/{field_name}
|
|
1178
|
-
for field in fields:
|
|
1212
|
+
for field in self.fields:
|
|
1179
1213
|
try:
|
|
1180
1214
|
field_type, _ = field.strip("/").split("/")
|
|
1181
1215
|
except ValueError:
|
|
@@ -1188,21 +1222,20 @@ class FieldExtensionStrategy(RagStrategy):
|
|
|
1188
1222
|
f"Field '{field}' does not have a valid field type. "
|
|
1189
1223
|
f"Valid field types are: {allowed_field_types_part}."
|
|
1190
1224
|
)
|
|
1191
|
-
|
|
1192
|
-
return fields
|
|
1225
|
+
return self
|
|
1193
1226
|
|
|
1194
1227
|
|
|
1195
1228
|
class FullResourceApplyTo(BaseModel):
|
|
1196
1229
|
exclude: list[str] = Field(
|
|
1197
1230
|
default_factory=list,
|
|
1198
1231
|
title="Labels to exclude from full resource expansion",
|
|
1199
|
-
description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
|
|
1232
|
+
description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
|
|
1200
1233
|
)
|
|
1201
1234
|
|
|
1202
1235
|
|
|
1203
1236
|
class FullResourceStrategy(RagStrategy):
|
|
1204
1237
|
name: Literal["full_resource"] = "full_resource"
|
|
1205
|
-
count:
|
|
1238
|
+
count: int | None = Field(
|
|
1206
1239
|
default=None,
|
|
1207
1240
|
title="Count",
|
|
1208
1241
|
description="Maximum number of full documents to retrieve. If not specified, all matching documents are retrieved.",
|
|
@@ -1213,7 +1246,7 @@ class FullResourceStrategy(RagStrategy):
|
|
|
1213
1246
|
title="Include remaining text blocks",
|
|
1214
1247
|
description="Whether to include the remaining text blocks after the maximum number of resources has been reached.",
|
|
1215
1248
|
)
|
|
1216
|
-
apply_to:
|
|
1249
|
+
apply_to: FullResourceApplyTo | None = Field(
|
|
1217
1250
|
default=None,
|
|
1218
1251
|
title="Apply to certain resources only",
|
|
1219
1252
|
description="Define which resources to exclude from serialization",
|
|
@@ -1227,6 +1260,7 @@ class HierarchyResourceStrategy(RagStrategy):
|
|
|
1227
1260
|
title="Count",
|
|
1228
1261
|
description="Number of extra characters that are added to each matching paragraph when adding to the context.",
|
|
1229
1262
|
ge=0,
|
|
1263
|
+
le=1024,
|
|
1230
1264
|
)
|
|
1231
1265
|
|
|
1232
1266
|
|
|
@@ -1318,7 +1352,7 @@ class PreQuery(BaseModel):
|
|
|
1318
1352
|
),
|
|
1319
1353
|
ge=0,
|
|
1320
1354
|
)
|
|
1321
|
-
id:
|
|
1355
|
+
id: str | None = Field(
|
|
1322
1356
|
default=None,
|
|
1323
1357
|
title="Prequery id",
|
|
1324
1358
|
min_length=1,
|
|
@@ -1452,10 +1486,10 @@ class TableImageStrategy(ImageRagStrategy):
|
|
|
1452
1486
|
|
|
1453
1487
|
class PageImageStrategy(ImageRagStrategy):
|
|
1454
1488
|
name: Literal["page_image"] = "page_image"
|
|
1455
|
-
count:
|
|
1489
|
+
count: int | None = Field(
|
|
1456
1490
|
default=None,
|
|
1457
1491
|
title="Count",
|
|
1458
|
-
description="Maximum number of images to retrieve
|
|
1492
|
+
description="Maximum number of page images to retrieve. By default, at most 5 images are retrieved.",
|
|
1459
1493
|
)
|
|
1460
1494
|
|
|
1461
1495
|
|
|
@@ -1464,20 +1498,18 @@ class ParagraphImageStrategy(ImageRagStrategy):
|
|
|
1464
1498
|
|
|
1465
1499
|
|
|
1466
1500
|
RagStrategies = Annotated[
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
GraphStrategy,
|
|
1476
|
-
],
|
|
1501
|
+
FieldExtensionStrategy
|
|
1502
|
+
| FullResourceStrategy
|
|
1503
|
+
| HierarchyResourceStrategy
|
|
1504
|
+
| NeighbouringParagraphsStrategy
|
|
1505
|
+
| MetadataExtensionStrategy
|
|
1506
|
+
| ConversationalStrategy
|
|
1507
|
+
| PreQueriesStrategy
|
|
1508
|
+
| GraphStrategy,
|
|
1477
1509
|
Field(discriminator="name"),
|
|
1478
1510
|
]
|
|
1479
1511
|
RagImagesStrategies = Annotated[
|
|
1480
|
-
|
|
1512
|
+
PageImageStrategy | ParagraphImageStrategy | TableImageStrategy,
|
|
1481
1513
|
Field(discriminator="name"),
|
|
1482
1514
|
]
|
|
1483
1515
|
PromptContext = dict[str, str]
|
|
@@ -1486,10 +1518,10 @@ PromptContextImages = dict[str, Image]
|
|
|
1486
1518
|
|
|
1487
1519
|
|
|
1488
1520
|
class CustomPrompt(BaseModel):
|
|
1489
|
-
system:
|
|
1521
|
+
system: str | None = Field(
|
|
1490
1522
|
default=None,
|
|
1491
1523
|
title="System prompt",
|
|
1492
|
-
description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
|
|
1524
|
+
description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
|
|
1493
1525
|
min_length=1,
|
|
1494
1526
|
examples=[
|
|
1495
1527
|
"You are a medical assistant, use medical terminology",
|
|
@@ -1498,10 +1530,10 @@ class CustomPrompt(BaseModel):
|
|
|
1498
1530
|
"You are a financial expert, use correct terms",
|
|
1499
1531
|
],
|
|
1500
1532
|
)
|
|
1501
|
-
user:
|
|
1533
|
+
user: str | None = Field(
|
|
1502
1534
|
default=None,
|
|
1503
1535
|
title="User prompt",
|
|
1504
|
-
description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
|
|
1536
|
+
description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
|
|
1505
1537
|
min_length=1,
|
|
1506
1538
|
examples=[
|
|
1507
1539
|
"Taking into account our previous conversation, and this context: {context} answer this {question}",
|
|
@@ -1510,7 +1542,7 @@ class CustomPrompt(BaseModel):
|
|
|
1510
1542
|
"Given this context: {context}. Answer this {question} using the provided context. Please, answer always in French",
|
|
1511
1543
|
],
|
|
1512
1544
|
)
|
|
1513
|
-
rephrase:
|
|
1545
|
+
rephrase: str | None = Field(
|
|
1514
1546
|
default=None,
|
|
1515
1547
|
title="Rephrase",
|
|
1516
1548
|
description=(
|
|
@@ -1540,23 +1572,23 @@ class AskRequest(AuditMetadataBase):
|
|
|
1540
1572
|
le=200,
|
|
1541
1573
|
description="The top most relevant results to fetch at the retrieval step. The maximum number of results allowed is 200.",
|
|
1542
1574
|
)
|
|
1543
|
-
filter_expression:
|
|
1575
|
+
filter_expression: FilterExpression | None = (
|
|
1544
1576
|
SearchParamDefaults.filter_expression.to_pydantic_field()
|
|
1545
1577
|
)
|
|
1546
1578
|
fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
|
|
1547
|
-
filters:
|
|
1579
|
+
filters: list[str] | list[Filter] = Field(
|
|
1548
1580
|
default=[],
|
|
1549
|
-
title="Filters",
|
|
1550
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
1581
|
+
title="Search Filters",
|
|
1582
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
1551
1583
|
)
|
|
1552
|
-
keyword_filters:
|
|
1584
|
+
keyword_filters: list[str] | list[Filter] = Field(
|
|
1553
1585
|
default=[],
|
|
1554
1586
|
title="Keyword filters",
|
|
1555
1587
|
description=(
|
|
1556
1588
|
"List of keyword filter expressions to apply to the retrieval step. "
|
|
1557
1589
|
"The text block search will only be performed on the documents that contain the specified keywords. "
|
|
1558
1590
|
"The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
|
|
1559
|
-
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1591
|
+
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1560
1592
|
),
|
|
1561
1593
|
examples=[
|
|
1562
1594
|
["NLP", "BERT"],
|
|
@@ -1564,69 +1596,72 @@ class AskRequest(AuditMetadataBase):
|
|
|
1564
1596
|
["Friedrich Nietzsche", "Immanuel Kant"],
|
|
1565
1597
|
],
|
|
1566
1598
|
)
|
|
1567
|
-
vectorset:
|
|
1568
|
-
min_score:
|
|
1599
|
+
vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
|
|
1600
|
+
min_score: float | MinScore | None = Field(
|
|
1569
1601
|
default=None,
|
|
1570
1602
|
title="Minimum score",
|
|
1571
|
-
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
1603
|
+
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
1572
1604
|
)
|
|
1573
1605
|
features: list[ChatOptions] = SearchParamDefaults.chat_features.to_pydantic_field()
|
|
1574
|
-
range_creation_start:
|
|
1575
|
-
|
|
1576
|
-
|
|
1577
|
-
range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
1578
|
-
range_modification_start: Optional[DateTime] = (
|
|
1606
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
1607
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
1608
|
+
range_modification_start: DateTime | None = (
|
|
1579
1609
|
SearchParamDefaults.range_modification_start.to_pydantic_field()
|
|
1580
1610
|
)
|
|
1581
|
-
range_modification_end:
|
|
1611
|
+
range_modification_end: DateTime | None = (
|
|
1582
1612
|
SearchParamDefaults.range_modification_end.to_pydantic_field()
|
|
1583
1613
|
)
|
|
1584
1614
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
|
|
1585
1615
|
field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
|
|
1586
1616
|
extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
|
|
1587
|
-
context:
|
|
1588
|
-
chat_history:
|
|
1589
|
-
|
|
1590
|
-
)
|
|
1591
|
-
extra_context: Optional[list[str]] = Field(
|
|
1617
|
+
context: list[ChatContextMessage] | None = SearchParamDefaults.chat_context.to_pydantic_field()
|
|
1618
|
+
chat_history: list[ChatContextMessage] | None = SearchParamDefaults.chat_history.to_pydantic_field()
|
|
1619
|
+
extra_context: list[str] | None = Field(
|
|
1592
1620
|
default=None,
|
|
1593
1621
|
title="Extra query context",
|
|
1594
1622
|
description="""Additional context that is added to the retrieval context sent to the LLM.
|
|
1595
1623
|
It allows extending the chat feature with content that may not be in the Knowledge Box.""",
|
|
1596
1624
|
)
|
|
1597
|
-
extra_context_images:
|
|
1625
|
+
extra_context_images: list[Image] | None = Field(
|
|
1598
1626
|
default=None,
|
|
1599
1627
|
title="Extra query context images",
|
|
1600
1628
|
description="""Additional images added to the retrieval context sent to the LLM."
|
|
1601
1629
|
It allows extending the chat feature with content that may not be in the Knowledge Box.""",
|
|
1602
1630
|
)
|
|
1603
|
-
query_image:
|
|
1631
|
+
query_image: Image | None = Field(
|
|
1604
1632
|
default=None,
|
|
1605
1633
|
title="Query image",
|
|
1606
1634
|
description="Image that will be used together with the query text for retrieval and then sent to the LLM as part of the context. "
|
|
1607
1635
|
"If a query image is provided, the `extra_context_images` and `rag_images_strategies` will be disabled.",
|
|
1608
1636
|
)
|
|
1609
|
-
|
|
1637
|
+
|
|
1638
|
+
# autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
|
|
1639
|
+
# avoid breaking changes in the python sdks. Please remove on a future major release.
|
|
1640
|
+
autofilter: SkipJsonSchema[bool] = False
|
|
1641
|
+
|
|
1610
1642
|
highlight: bool = SearchParamDefaults.highlight.to_pydantic_field()
|
|
1611
1643
|
resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
|
|
1612
|
-
prompt:
|
|
1644
|
+
prompt: str | CustomPrompt | None = Field(
|
|
1613
1645
|
default=None,
|
|
1614
1646
|
title="Prompts",
|
|
1615
|
-
description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
|
|
1647
|
+
description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
|
|
1616
1648
|
)
|
|
1617
|
-
rank_fusion:
|
|
1618
|
-
reranker:
|
|
1619
|
-
citations: bool = Field(
|
|
1620
|
-
default=
|
|
1621
|
-
description="Whether to include
|
|
1649
|
+
rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
|
|
1650
|
+
reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
|
|
1651
|
+
citations: bool | None | CitationsType = Field(
|
|
1652
|
+
default=None,
|
|
1653
|
+
description="Whether to include citations in the response. "
|
|
1654
|
+
"If set to None or False, no citations will be computed. "
|
|
1655
|
+
"If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
|
|
1656
|
+
"If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
|
|
1622
1657
|
)
|
|
1623
|
-
citation_threshold:
|
|
1658
|
+
citation_threshold: float | None = Field(
|
|
1624
1659
|
default=None,
|
|
1625
|
-
description="If citations is True, this
|
|
1660
|
+
description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
|
|
1626
1661
|
ge=0.0,
|
|
1627
1662
|
le=1.0,
|
|
1628
1663
|
)
|
|
1629
|
-
security:
|
|
1664
|
+
security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
|
|
1630
1665
|
show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
|
|
1631
1666
|
rag_strategies: list[RagStrategies] = Field(
|
|
1632
1667
|
default=[],
|
|
@@ -1691,21 +1726,21 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1691
1726
|
)
|
|
1692
1727
|
debug: bool = SearchParamDefaults.debug.to_pydantic_field()
|
|
1693
1728
|
|
|
1694
|
-
generative_model:
|
|
1729
|
+
generative_model: str | None = Field(
|
|
1695
1730
|
default=None,
|
|
1696
1731
|
title="Generative model",
|
|
1697
|
-
description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1732
|
+
description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1698
1733
|
)
|
|
1699
|
-
generative_model_seed:
|
|
1734
|
+
generative_model_seed: int | None = Field(
|
|
1700
1735
|
default=None,
|
|
1701
1736
|
title="Seed for the generative model",
|
|
1702
1737
|
description="The seed to use for the generative model for deterministic generation. Only supported by some models.",
|
|
1703
1738
|
)
|
|
1704
1739
|
|
|
1705
|
-
max_tokens:
|
|
1740
|
+
max_tokens: int | MaxTokens | None = Field(
|
|
1706
1741
|
default=None,
|
|
1707
1742
|
title="Maximum LLM tokens to use for the request",
|
|
1708
|
-
description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
|
|
1743
|
+
description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
|
|
1709
1744
|
)
|
|
1710
1745
|
|
|
1711
1746
|
rephrase: bool = Field(
|
|
@@ -1714,7 +1749,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1714
1749
|
"Rephrase the query for a more efficient retrieval. This will consume LLM tokens and make the request slower."
|
|
1715
1750
|
),
|
|
1716
1751
|
)
|
|
1717
|
-
chat_history_relevance_threshold:
|
|
1752
|
+
chat_history_relevance_threshold: float | None = Field(
|
|
1718
1753
|
default=None,
|
|
1719
1754
|
ge=0.0,
|
|
1720
1755
|
le=1.0,
|
|
@@ -1732,7 +1767,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1732
1767
|
description="If set to true, the response will be in markdown format",
|
|
1733
1768
|
)
|
|
1734
1769
|
|
|
1735
|
-
answer_json_schema:
|
|
1770
|
+
answer_json_schema: dict[str, Any] | None = Field(
|
|
1736
1771
|
default=None,
|
|
1737
1772
|
title="Answer JSON schema",
|
|
1738
1773
|
description="""Desired JSON schema for the LLM answer.
|
|
@@ -1748,13 +1783,14 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
|
|
|
1748
1783
|
description="Whether to generate an answer using the generative model. If set to false, the response will only contain the retrieval results.",
|
|
1749
1784
|
)
|
|
1750
1785
|
|
|
1751
|
-
search_configuration:
|
|
1786
|
+
search_configuration: str | None = Field(
|
|
1752
1787
|
default=None,
|
|
1753
1788
|
description="Load ask parameters from this configuration. Parameters in the request override parameters from the configuration.",
|
|
1754
1789
|
)
|
|
1755
1790
|
|
|
1756
|
-
reasoning:
|
|
1791
|
+
reasoning: Reasoning | bool = Field(
|
|
1757
1792
|
default=False,
|
|
1793
|
+
title="Reasoning options",
|
|
1758
1794
|
description=(
|
|
1759
1795
|
"Reasoning options for the generative model. "
|
|
1760
1796
|
"Set to True to enable default reasoning, False to disable, or provide a Reasoning object for custom options."
|
|
@@ -1835,8 +1871,8 @@ class SummarizeModel(BaseModel):
|
|
|
1835
1871
|
"""
|
|
1836
1872
|
|
|
1837
1873
|
resources: dict[str, SummarizeResourceModel] = {}
|
|
1838
|
-
generative_model:
|
|
1839
|
-
user_prompt:
|
|
1874
|
+
generative_model: str | None = None
|
|
1875
|
+
user_prompt: str | None = None
|
|
1840
1876
|
summary_kind: SummaryKind = SummaryKind.SIMPLE
|
|
1841
1877
|
|
|
1842
1878
|
|
|
@@ -1845,13 +1881,13 @@ class SummarizeRequest(BaseModel):
|
|
|
1845
1881
|
Model for the request payload of the summarize endpoint
|
|
1846
1882
|
"""
|
|
1847
1883
|
|
|
1848
|
-
generative_model:
|
|
1884
|
+
generative_model: str | None = Field(
|
|
1849
1885
|
default=None,
|
|
1850
1886
|
title="Generative model",
|
|
1851
|
-
description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
|
|
1887
|
+
description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
|
|
1852
1888
|
)
|
|
1853
1889
|
|
|
1854
|
-
user_prompt:
|
|
1890
|
+
user_prompt: str | None = Field(
|
|
1855
1891
|
default=None,
|
|
1856
1892
|
title="User prompt",
|
|
1857
1893
|
description="Optional custom prompt input by the user",
|
|
@@ -1862,7 +1898,7 @@ class SummarizeRequest(BaseModel):
|
|
|
1862
1898
|
min_length=1,
|
|
1863
1899
|
max_length=100,
|
|
1864
1900
|
title="Resources",
|
|
1865
|
-
description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
|
|
1901
|
+
description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
|
|
1866
1902
|
)
|
|
1867
1903
|
|
|
1868
1904
|
summary_kind: SummaryKind = Field(
|
|
@@ -1888,20 +1924,20 @@ class SummarizedResponse(BaseModel):
|
|
|
1888
1924
|
title="Summary",
|
|
1889
1925
|
description="Global summary of all resources combined.",
|
|
1890
1926
|
)
|
|
1891
|
-
consumption:
|
|
1927
|
+
consumption: Consumption | None = None
|
|
1892
1928
|
|
|
1893
1929
|
|
|
1894
1930
|
class KnowledgeGraphEntity(BaseModel):
|
|
1895
1931
|
name: str
|
|
1896
|
-
type:
|
|
1897
|
-
subtype:
|
|
1932
|
+
type: RelationNodeType | None = None
|
|
1933
|
+
subtype: str | None = None
|
|
1898
1934
|
|
|
1899
1935
|
|
|
1900
1936
|
class FindRequest(BaseSearchRequest):
|
|
1901
|
-
query_entities: SkipJsonSchema[
|
|
1937
|
+
query_entities: SkipJsonSchema[list[KnowledgeGraphEntity] | None] = Field(
|
|
1902
1938
|
default=None, title="Query entities", description="Entities to use in a knowledge graph search"
|
|
1903
1939
|
)
|
|
1904
|
-
graph_query:
|
|
1940
|
+
graph_query: GraphPathQuery | None = Field(
|
|
1905
1941
|
default=None,
|
|
1906
1942
|
title="Graph query",
|
|
1907
1943
|
description="Query for the knowledge graph. Paths (node-relation-node) extracted from a paragraph_id will be used to extend the results",
|
|
@@ -1912,17 +1948,17 @@ class FindRequest(BaseSearchRequest):
|
|
|
1912
1948
|
FindOptions.SEMANTIC,
|
|
1913
1949
|
]
|
|
1914
1950
|
)
|
|
1915
|
-
rank_fusion:
|
|
1916
|
-
reranker:
|
|
1951
|
+
rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
|
|
1952
|
+
reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
|
|
1917
1953
|
|
|
1918
|
-
keyword_filters:
|
|
1954
|
+
keyword_filters: list[str] | list[Filter] = Field(
|
|
1919
1955
|
default=[],
|
|
1920
1956
|
title="Keyword filters",
|
|
1921
1957
|
description=(
|
|
1922
1958
|
"List of keyword filter expressions to apply to the retrieval step. "
|
|
1923
1959
|
"The text block search will only be performed on the documents that contain the specified keywords. "
|
|
1924
1960
|
"The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
|
|
1925
|
-
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1961
|
+
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1926
1962
|
),
|
|
1927
1963
|
examples=[
|
|
1928
1964
|
["NLP", "BERT"],
|
|
@@ -1931,11 +1967,11 @@ class FindRequest(BaseSearchRequest):
|
|
|
1931
1967
|
],
|
|
1932
1968
|
)
|
|
1933
1969
|
|
|
1934
|
-
search_configuration:
|
|
1970
|
+
search_configuration: str | None = Field(
|
|
1935
1971
|
default=None,
|
|
1936
1972
|
description="Load find parameters from this configuration. Parameters in the request override parameters from the configuration.",
|
|
1937
1973
|
)
|
|
1938
|
-
generative_model:
|
|
1974
|
+
generative_model: str | None = Field(
|
|
1939
1975
|
default=None,
|
|
1940
1976
|
title="Generative model",
|
|
1941
1977
|
description="The generative model used to rephrase the query. If not provided, the model configured for the Knowledge Box is used.",
|
|
@@ -1969,9 +2005,9 @@ class SCORE_TYPE(str, Enum):
|
|
|
1969
2005
|
|
|
1970
2006
|
|
|
1971
2007
|
class FindTextPosition(BaseModel):
|
|
1972
|
-
page_number:
|
|
1973
|
-
start_seconds:
|
|
1974
|
-
end_seconds:
|
|
2008
|
+
page_number: int | None = None
|
|
2009
|
+
start_seconds: list[int] | None = None
|
|
2010
|
+
end_seconds: list[int] | None = None
|
|
1975
2011
|
index: int
|
|
1976
2012
|
start: int
|
|
1977
2013
|
end: int
|
|
@@ -1983,15 +2019,15 @@ class FindParagraph(BaseModel):
|
|
|
1983
2019
|
order: int = Field(default=0, ge=0)
|
|
1984
2020
|
text: str
|
|
1985
2021
|
id: str
|
|
1986
|
-
labels:
|
|
1987
|
-
position:
|
|
2022
|
+
labels: list[str] | None = []
|
|
2023
|
+
position: TextPosition | None = None
|
|
1988
2024
|
fuzzy_result: bool = False
|
|
1989
2025
|
page_with_visual: bool = Field(
|
|
1990
2026
|
default=False,
|
|
1991
2027
|
title="Page where this paragraph belongs is a visual page",
|
|
1992
2028
|
description="This flag informs if the page may have information that has not been extracted",
|
|
1993
2029
|
)
|
|
1994
|
-
reference:
|
|
2030
|
+
reference: str | None = Field(
|
|
1995
2031
|
default=None,
|
|
1996
2032
|
title="Reference to the image that represents this text",
|
|
1997
2033
|
description="Reference to the extracted image that represents this paragraph",
|
|
@@ -2001,7 +2037,7 @@ class FindParagraph(BaseModel):
|
|
|
2001
2037
|
title="Is a table",
|
|
2002
2038
|
description="The referenced image of the paragraph is a table",
|
|
2003
2039
|
)
|
|
2004
|
-
relevant_relations:
|
|
2040
|
+
relevant_relations: Relations | None = Field(
|
|
2005
2041
|
default=None,
|
|
2006
2042
|
title="Relevant relations",
|
|
2007
2043
|
description="Relevant relations from which the paragraph was found, will only be filled if using the Graph RAG Strategy",
|
|
@@ -2016,17 +2052,19 @@ class FindResource(Resource):
|
|
|
2016
2052
|
fields: dict[str, FindField]
|
|
2017
2053
|
|
|
2018
2054
|
def updated_from(self, origin: Resource):
|
|
2055
|
+
find_resource_model_fields = self.model_fields.keys()
|
|
2019
2056
|
for key in origin.model_fields.keys():
|
|
2020
|
-
|
|
2057
|
+
if key in find_resource_model_fields:
|
|
2058
|
+
self.__setattr__(key, getattr(origin, key))
|
|
2021
2059
|
|
|
2022
2060
|
|
|
2023
2061
|
class KnowledgeboxFindResults(JsonBaseModel):
|
|
2024
2062
|
"""Find on knowledgebox results"""
|
|
2025
2063
|
|
|
2026
2064
|
resources: dict[str, FindResource]
|
|
2027
|
-
relations:
|
|
2028
|
-
query:
|
|
2029
|
-
rephrased_query:
|
|
2065
|
+
relations: Relations | None = None
|
|
2066
|
+
query: str | None = Field(default=None, title="Find Results Query")
|
|
2067
|
+
rephrased_query: str | None = None
|
|
2030
2068
|
total: int = 0
|
|
2031
2069
|
page_number: int = Field(
|
|
2032
2070
|
default=0,
|
|
@@ -2040,18 +2078,18 @@ class KnowledgeboxFindResults(JsonBaseModel):
|
|
|
2040
2078
|
default=False,
|
|
2041
2079
|
description="Pagination will be deprecated, please, refer to `top_k` in the request",
|
|
2042
2080
|
)
|
|
2043
|
-
nodes:
|
|
2081
|
+
nodes: list[dict[str, str]] | None = Field(
|
|
2044
2082
|
default=None,
|
|
2045
2083
|
title="Nodes",
|
|
2046
2084
|
description="List of nodes queried in the search",
|
|
2047
2085
|
)
|
|
2048
|
-
shards:
|
|
2086
|
+
shards: list[str] | None = Field(
|
|
2049
2087
|
default=None,
|
|
2050
2088
|
title="Shards",
|
|
2051
2089
|
description="The list of shard replica ids used for the search.",
|
|
2052
2090
|
)
|
|
2053
2091
|
autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
|
|
2054
|
-
min_score:
|
|
2092
|
+
min_score: float | MinScore | None = Field(
|
|
2055
2093
|
default=MinScore(),
|
|
2056
2094
|
title="Minimum result score",
|
|
2057
2095
|
description="The minimum scores that have been used for the search operation.",
|
|
@@ -2059,9 +2097,9 @@ class KnowledgeboxFindResults(JsonBaseModel):
|
|
|
2059
2097
|
best_matches: list[str] = Field(
|
|
2060
2098
|
default=[],
|
|
2061
2099
|
title="Best matches",
|
|
2062
|
-
description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
|
|
2100
|
+
description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
|
|
2063
2101
|
)
|
|
2064
|
-
metrics:
|
|
2102
|
+
metrics: dict[str, Any] | None = Field(
|
|
2065
2103
|
default=None,
|
|
2066
2104
|
title="Metrics",
|
|
2067
2105
|
description=(
|
|
@@ -2079,15 +2117,15 @@ class FeedbackTasks(str, Enum):
|
|
|
2079
2117
|
class FeedbackRequest(BaseModel):
|
|
2080
2118
|
ident: str = Field(
|
|
2081
2119
|
title="Request identifier",
|
|
2082
|
-
description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
|
|
2120
|
+
description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
|
|
2083
2121
|
)
|
|
2084
2122
|
good: bool = Field(title="Good", description="Whether the result was good or not")
|
|
2085
2123
|
task: FeedbackTasks = Field(
|
|
2086
2124
|
title="Task",
|
|
2087
2125
|
description="The task the feedback is for. For now, only `CHAT` task is available",
|
|
2088
2126
|
)
|
|
2089
|
-
feedback:
|
|
2090
|
-
text_block_id:
|
|
2127
|
+
feedback: str | None = Field(None, title="Feedback", description="Feedback text")
|
|
2128
|
+
text_block_id: str | None = Field(None, title="Text block", description="Text block id")
|
|
2091
2129
|
|
|
2092
2130
|
|
|
2093
2131
|
def validate_facets(facets):
|
|
@@ -2138,13 +2176,11 @@ class AugmentedTextBlock(BaseModel):
|
|
|
2138
2176
|
text: str = Field(
|
|
2139
2177
|
description="The text of the augmented text block. It may include additional metadata to enrich the context"
|
|
2140
2178
|
)
|
|
2141
|
-
position:
|
|
2179
|
+
position: TextPosition | None = Field(
|
|
2142
2180
|
default=None,
|
|
2143
2181
|
description="Metadata about the position of the text block in the original document.",
|
|
2144
2182
|
)
|
|
2145
|
-
parent:
|
|
2146
|
-
default=None, description="The parent text block that was augmented for."
|
|
2147
|
-
)
|
|
2183
|
+
parent: str | None = Field(default=None, description="The parent text block that was augmented for.")
|
|
2148
2184
|
augmentation_type: TextBlockAugmentationType = Field(description="Type of augmentation.")
|
|
2149
2185
|
|
|
2150
2186
|
|
|
@@ -2168,12 +2204,12 @@ class AskTokens(BaseModel):
|
|
|
2168
2204
|
title="Output tokens",
|
|
2169
2205
|
description="Number of LLM tokens used for the answer",
|
|
2170
2206
|
)
|
|
2171
|
-
input_nuclia:
|
|
2207
|
+
input_nuclia: float | None = Field(
|
|
2172
2208
|
title="Input Nuclia tokens",
|
|
2173
2209
|
description="Number of Nuclia LLM tokens used for the context in the query",
|
|
2174
2210
|
default=None,
|
|
2175
2211
|
)
|
|
2176
|
-
output_nuclia:
|
|
2212
|
+
output_nuclia: float | None = Field(
|
|
2177
2213
|
title="Output Nuclia tokens",
|
|
2178
2214
|
description="Number of Nuclia LLM tokens used for the answer",
|
|
2179
2215
|
default=None,
|
|
@@ -2181,12 +2217,12 @@ class AskTokens(BaseModel):
|
|
|
2181
2217
|
|
|
2182
2218
|
|
|
2183
2219
|
class AskTimings(BaseModel):
|
|
2184
|
-
generative_first_chunk:
|
|
2220
|
+
generative_first_chunk: float | None = Field(
|
|
2185
2221
|
default=None,
|
|
2186
2222
|
title="Generative first chunk",
|
|
2187
2223
|
description="Time the LLM took to generate the first chunk of the answer",
|
|
2188
2224
|
)
|
|
2189
|
-
generative_total:
|
|
2225
|
+
generative_total: float | None = Field(
|
|
2190
2226
|
default=None,
|
|
2191
2227
|
title="Generative total",
|
|
2192
2228
|
description="Total time the LLM took to generate the answer",
|
|
@@ -2194,12 +2230,12 @@ class AskTimings(BaseModel):
|
|
|
2194
2230
|
|
|
2195
2231
|
|
|
2196
2232
|
class SyncAskMetadata(BaseModel):
|
|
2197
|
-
tokens:
|
|
2233
|
+
tokens: AskTokens | None = Field(
|
|
2198
2234
|
default=None,
|
|
2199
2235
|
title="Tokens",
|
|
2200
2236
|
description="Number of tokens used in the LLM context and answer",
|
|
2201
2237
|
)
|
|
2202
|
-
timings:
|
|
2238
|
+
timings: AskTimings | None = Field(
|
|
2203
2239
|
default=None,
|
|
2204
2240
|
title="Timings",
|
|
2205
2241
|
description="Timings of the generative model",
|
|
@@ -2218,19 +2254,19 @@ class SyncAskResponse(BaseModel):
|
|
|
2218
2254
|
title="Answer",
|
|
2219
2255
|
description="The generative answer to the query",
|
|
2220
2256
|
)
|
|
2221
|
-
reasoning:
|
|
2257
|
+
reasoning: str | None = Field(
|
|
2222
2258
|
default=None,
|
|
2223
|
-
title="Reasoning",
|
|
2224
|
-
description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
|
|
2259
|
+
title="Reasoning steps",
|
|
2260
|
+
description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
|
|
2225
2261
|
)
|
|
2226
|
-
answer_json:
|
|
2262
|
+
answer_json: dict[str, Any] | None = Field(
|
|
2227
2263
|
default=None,
|
|
2228
2264
|
title="Answer JSON",
|
|
2229
|
-
description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
|
|
2265
|
+
description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
|
|
2230
2266
|
)
|
|
2231
2267
|
status: str = Field(
|
|
2232
2268
|
title="Status",
|
|
2233
|
-
description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
|
|
2269
|
+
description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
|
|
2234
2270
|
)
|
|
2235
2271
|
retrieval_results: KnowledgeboxFindResults = Field(
|
|
2236
2272
|
title="Retrieval results",
|
|
@@ -2241,7 +2277,7 @@ class SyncAskResponse(BaseModel):
|
|
|
2241
2277
|
title="Retrieval best matches",
|
|
2242
2278
|
description="Sorted list of best matching text blocks in the retrieval step. This includes the main query and prequeries results, if any.",
|
|
2243
2279
|
)
|
|
2244
|
-
prequeries:
|
|
2280
|
+
prequeries: dict[str, KnowledgeboxFindResults] | None = Field(
|
|
2245
2281
|
default=None,
|
|
2246
2282
|
title="Prequeries",
|
|
2247
2283
|
description="The retrieval results of the prequeries",
|
|
@@ -2249,41 +2285,46 @@ class SyncAskResponse(BaseModel):
|
|
|
2249
2285
|
learning_id: str = Field(
|
|
2250
2286
|
default="",
|
|
2251
2287
|
title="Learning id",
|
|
2252
|
-
description="The id of the learning request. This id can be used to provide feedback on the learning process.",
|
|
2288
|
+
description="The id of the learning request. This id can be used to provide feedback on the learning process.",
|
|
2253
2289
|
)
|
|
2254
|
-
relations:
|
|
2290
|
+
relations: Relations | None = Field(
|
|
2255
2291
|
default=None,
|
|
2256
2292
|
title="Relations",
|
|
2257
2293
|
description="The detected relations of the answer",
|
|
2258
2294
|
)
|
|
2259
2295
|
citations: dict[str, Any] = Field(
|
|
2260
|
-
|
|
2296
|
+
default_factory=dict,
|
|
2261
2297
|
title="Citations",
|
|
2262
2298
|
description="The citations of the answer. List of references to the resources used to generate the answer.",
|
|
2263
2299
|
)
|
|
2264
|
-
|
|
2300
|
+
citation_footnote_to_context: dict[str, str] = Field(
|
|
2301
|
+
default_factory=dict,
|
|
2302
|
+
title="Citation footnote to context",
|
|
2303
|
+
description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)""",
|
|
2304
|
+
)
|
|
2305
|
+
augmented_context: AugmentedContext | None = Field(
|
|
2265
2306
|
default=None,
|
|
2266
2307
|
description=(
|
|
2267
2308
|
"Augmented text blocks that were sent to the LLM as part of the RAG strategies "
|
|
2268
2309
|
"applied on the retrieval results in the request."
|
|
2269
2310
|
),
|
|
2270
2311
|
)
|
|
2271
|
-
prompt_context:
|
|
2312
|
+
prompt_context: list[str] | None = Field(
|
|
2272
2313
|
default=None,
|
|
2273
2314
|
title="Prompt context",
|
|
2274
2315
|
description="The prompt context used to generate the answer. Returned only if the debug flag is set to true",
|
|
2275
2316
|
)
|
|
2276
|
-
predict_request:
|
|
2317
|
+
predict_request: dict[str, Any] | None = Field(
|
|
2277
2318
|
default=None,
|
|
2278
2319
|
title="Predict request",
|
|
2279
2320
|
description="The internal predict request used to generate the answer. Returned only if the debug flag is set to true",
|
|
2280
2321
|
)
|
|
2281
|
-
metadata:
|
|
2322
|
+
metadata: SyncAskMetadata | None = Field(
|
|
2282
2323
|
default=None,
|
|
2283
2324
|
title="Metadata",
|
|
2284
|
-
description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
|
|
2325
|
+
description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
|
|
2285
2326
|
)
|
|
2286
|
-
consumption:
|
|
2327
|
+
consumption: Consumption | None = Field(
|
|
2287
2328
|
default=None,
|
|
2288
2329
|
title="Consumption",
|
|
2289
2330
|
description=(
|
|
@@ -2291,12 +2332,12 @@ class SyncAskResponse(BaseModel):
|
|
|
2291
2332
|
" 'X-show-consumption' header is set to true in the request."
|
|
2292
2333
|
),
|
|
2293
2334
|
)
|
|
2294
|
-
error_details:
|
|
2335
|
+
error_details: str | None = Field(
|
|
2295
2336
|
default=None,
|
|
2296
2337
|
title="Error details",
|
|
2297
2338
|
description="Error details message in case there was an error",
|
|
2298
2339
|
)
|
|
2299
|
-
debug:
|
|
2340
|
+
debug: dict[str, Any] | None = Field(
|
|
2300
2341
|
default=None,
|
|
2301
2342
|
title="Debug information",
|
|
2302
2343
|
description=(
|
|
@@ -2370,11 +2411,23 @@ class CitationsAskResponseItem(BaseModel):
|
|
|
2370
2411
|
citations: dict[str, Any]
|
|
2371
2412
|
|
|
2372
2413
|
|
|
2414
|
+
class FootnoteCitationsAskResponseItem(BaseModel):
|
|
2415
|
+
type: Literal["footnote_citations"] = "footnote_citations"
|
|
2416
|
+
footnote_to_context: dict[str, str] = Field(
|
|
2417
|
+
description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)
|
|
2418
|
+
e.g.,
|
|
2419
|
+
{ "block-AA": "f44f4e8acbfb1d48de3fd3c2fb04a885/f/f44f4e8acbfb1d48de3fd3c2fb04a885/73758-73972", ... }
|
|
2420
|
+
If the query_context is a list, it will map to 1-based indices as strings
|
|
2421
|
+
e.g., { "block-AA": "1", "block-AB": "2", ... }
|
|
2422
|
+
"""
|
|
2423
|
+
)
|
|
2424
|
+
|
|
2425
|
+
|
|
2373
2426
|
class StatusAskResponseItem(BaseModel):
|
|
2374
2427
|
type: Literal["status"] = "status"
|
|
2375
2428
|
code: str
|
|
2376
2429
|
status: str
|
|
2377
|
-
details:
|
|
2430
|
+
details: str | None = None
|
|
2378
2431
|
|
|
2379
2432
|
|
|
2380
2433
|
class ErrorAskResponseItem(BaseModel):
|
|
@@ -2393,21 +2446,22 @@ class DebugAskResponseItem(BaseModel):
|
|
|
2393
2446
|
metrics: dict[str, Any]
|
|
2394
2447
|
|
|
2395
2448
|
|
|
2396
|
-
AskResponseItemType =
|
|
2397
|
-
AnswerAskResponseItem
|
|
2398
|
-
ReasoningAskResponseItem
|
|
2399
|
-
JSONAskResponseItem
|
|
2400
|
-
MetadataAskResponseItem
|
|
2401
|
-
AugmentedContextResponseItem
|
|
2402
|
-
CitationsAskResponseItem
|
|
2403
|
-
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
|
|
2409
|
-
|
|
2410
|
-
|
|
2449
|
+
AskResponseItemType = (
|
|
2450
|
+
AnswerAskResponseItem
|
|
2451
|
+
| ReasoningAskResponseItem
|
|
2452
|
+
| JSONAskResponseItem
|
|
2453
|
+
| MetadataAskResponseItem
|
|
2454
|
+
| AugmentedContextResponseItem
|
|
2455
|
+
| CitationsAskResponseItem
|
|
2456
|
+
| FootnoteCitationsAskResponseItem
|
|
2457
|
+
| StatusAskResponseItem
|
|
2458
|
+
| ErrorAskResponseItem
|
|
2459
|
+
| RetrievalAskResponseItem
|
|
2460
|
+
| RelationsAskResponseItem
|
|
2461
|
+
| DebugAskResponseItem
|
|
2462
|
+
| PrequeriesAskResponseItem
|
|
2463
|
+
| ConsumptionResponseItem
|
|
2464
|
+
)
|
|
2411
2465
|
|
|
2412
2466
|
|
|
2413
2467
|
class AskResponseItem(BaseModel):
|
|
@@ -2427,7 +2481,7 @@ def parse_custom_prompt(item: AskRequest) -> CustomPrompt:
|
|
|
2427
2481
|
return prompt
|
|
2428
2482
|
|
|
2429
2483
|
|
|
2430
|
-
def parse_rephrase_prompt(item: AskRequest) ->
|
|
2484
|
+
def parse_rephrase_prompt(item: AskRequest) -> str | None:
|
|
2431
2485
|
prompt = parse_custom_prompt(item)
|
|
2432
2486
|
return prompt.rephrase
|
|
2433
2487
|
|
|
@@ -2438,7 +2492,7 @@ FindRequest.model_rebuild()
|
|
|
2438
2492
|
|
|
2439
2493
|
class CatalogFacetsPrefix(BaseModel):
|
|
2440
2494
|
prefix: str = Field(pattern="^((/[^/]+)*)$")
|
|
2441
|
-
depth:
|
|
2495
|
+
depth: int | None = Field(
|
|
2442
2496
|
default=None,
|
|
2443
2497
|
ge=0,
|
|
2444
2498
|
description="Only include facets up to this depth from the prefix, leave empty to include all depths",
|