nucliadb-models 6.9.6.post5453__py3-none-any.whl → 6.11.1.post5822__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb_models/agents/ingestion.py +4 -4
- nucliadb_models/augment.py +294 -24
- nucliadb_models/common.py +57 -57
- nucliadb_models/configuration.py +8 -8
- nucliadb_models/content_types.py +13 -11
- nucliadb_models/conversation.py +25 -26
- nucliadb_models/entities.py +17 -18
- nucliadb_models/external_index_providers.py +1 -2
- nucliadb_models/extracted.py +82 -83
- nucliadb_models/file.py +10 -11
- nucliadb_models/filters.py +79 -75
- nucliadb_models/graph/requests.py +40 -48
- nucliadb_models/graph/responses.py +13 -1
- nucliadb_models/hydration.py +48 -50
- nucliadb_models/internal/predict.py +7 -9
- nucliadb_models/internal/shards.py +2 -3
- nucliadb_models/labels.py +18 -11
- nucliadb_models/link.py +18 -19
- nucliadb_models/metadata.py +66 -54
- nucliadb_models/notifications.py +3 -3
- nucliadb_models/processing.py +1 -2
- nucliadb_models/resource.py +85 -102
- nucliadb_models/retrieval.py +147 -0
- nucliadb_models/search.py +300 -276
- nucliadb_models/security.py +2 -3
- nucliadb_models/text.py +7 -8
- nucliadb_models/trainset.py +1 -2
- nucliadb_models/utils.py +2 -3
- nucliadb_models/vectors.py +2 -5
- nucliadb_models/writer.py +56 -57
- {nucliadb_models-6.9.6.post5453.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/METADATA +1 -1
- nucliadb_models-6.11.1.post5822.dist-info/RECORD +41 -0
- {nucliadb_models-6.9.6.post5453.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/WHEEL +1 -1
- nucliadb_models-6.9.6.post5453.dist-info/RECORD +0 -40
- {nucliadb_models-6.9.6.post5453.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/top_level.txt +0 -0
nucliadb_models/search.py
CHANGED
|
@@ -14,19 +14,19 @@
|
|
|
14
14
|
#
|
|
15
15
|
import json
|
|
16
16
|
from enum import Enum
|
|
17
|
-
from typing import Any, Literal
|
|
17
|
+
from typing import Annotated, Any, Literal
|
|
18
|
+
from uuid import UUID
|
|
18
19
|
|
|
19
20
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
20
21
|
from pydantic.aliases import AliasChoices
|
|
21
22
|
from pydantic.json_schema import SkipJsonSchema
|
|
22
|
-
from typing_extensions import
|
|
23
|
+
from typing_extensions import Self
|
|
23
24
|
|
|
24
25
|
from nucliadb_models import RelationMetadata
|
|
25
26
|
from nucliadb_models.common import FieldTypeName, ParamDefault
|
|
26
27
|
from nucliadb_models.graph.requests import GraphPathQuery
|
|
27
28
|
|
|
28
29
|
# Bw/c import to avoid breaking users
|
|
29
|
-
# noqa isort: skip
|
|
30
30
|
from nucliadb_models.metadata import RelationNodeType, RelationType, ResourceProcessingStatus
|
|
31
31
|
from nucliadb_models.resource import ExtractedDataTypeName, Resource
|
|
32
32
|
from nucliadb_models.security import RequestSecurity
|
|
@@ -152,12 +152,12 @@ FacetsResult = dict[str, Any]
|
|
|
152
152
|
|
|
153
153
|
|
|
154
154
|
class TextPosition(BaseModel):
|
|
155
|
-
page_number:
|
|
155
|
+
page_number: int | None = None
|
|
156
156
|
index: int
|
|
157
157
|
start: int
|
|
158
158
|
end: int
|
|
159
|
-
start_seconds:
|
|
160
|
-
end_seconds:
|
|
159
|
+
start_seconds: list[int] | None = None
|
|
160
|
+
end_seconds: list[int] | None = None
|
|
161
161
|
|
|
162
162
|
|
|
163
163
|
class Sentence(BaseModel):
|
|
@@ -166,8 +166,8 @@ class Sentence(BaseModel):
|
|
|
166
166
|
text: str
|
|
167
167
|
field_type: str
|
|
168
168
|
field: str
|
|
169
|
-
index:
|
|
170
|
-
position:
|
|
169
|
+
index: str | None = None
|
|
170
|
+
position: TextPosition | None = None
|
|
171
171
|
|
|
172
172
|
|
|
173
173
|
class Sentences(BaseModel):
|
|
@@ -177,7 +177,7 @@ class Sentences(BaseModel):
|
|
|
177
177
|
page_size: int = 20
|
|
178
178
|
min_score: float = Field(
|
|
179
179
|
title="Minimum score",
|
|
180
|
-
description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
|
|
180
|
+
description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
|
|
181
181
|
)
|
|
182
182
|
|
|
183
183
|
|
|
@@ -188,45 +188,45 @@ class Paragraph(BaseModel):
|
|
|
188
188
|
field: str
|
|
189
189
|
text: str
|
|
190
190
|
labels: list[str] = []
|
|
191
|
-
start_seconds:
|
|
192
|
-
end_seconds:
|
|
193
|
-
position:
|
|
191
|
+
start_seconds: list[int] | None = None
|
|
192
|
+
end_seconds: list[int] | None = None
|
|
193
|
+
position: TextPosition | None = None
|
|
194
194
|
fuzzy_result: bool = False
|
|
195
195
|
|
|
196
196
|
|
|
197
197
|
class Paragraphs(BaseModel):
|
|
198
198
|
results: list[Paragraph] = []
|
|
199
|
-
facets:
|
|
200
|
-
query:
|
|
199
|
+
facets: FacetsResult | None = None
|
|
200
|
+
query: str | None = Field(default=None, title="Paragraphs Query")
|
|
201
201
|
total: int = 0
|
|
202
202
|
page_number: int = 0
|
|
203
203
|
page_size: int = 20
|
|
204
204
|
next_page: bool = False
|
|
205
205
|
min_score: float = Field(
|
|
206
206
|
title="Minimum score",
|
|
207
|
-
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
207
|
+
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
208
208
|
)
|
|
209
209
|
|
|
210
210
|
|
|
211
211
|
class ResourceResult(BaseModel):
|
|
212
|
-
score:
|
|
212
|
+
score: float | int
|
|
213
213
|
rid: str
|
|
214
214
|
field_type: str
|
|
215
215
|
field: str
|
|
216
|
-
labels:
|
|
216
|
+
labels: list[str] | None = None
|
|
217
217
|
|
|
218
218
|
|
|
219
219
|
class Resources(BaseModel):
|
|
220
220
|
results: list[ResourceResult]
|
|
221
|
-
facets:
|
|
222
|
-
query:
|
|
221
|
+
facets: FacetsResult | None = None
|
|
222
|
+
query: str | None = Field(default=None, title="Resources Query")
|
|
223
223
|
total: int = 0
|
|
224
224
|
page_number: int = 0
|
|
225
225
|
page_size: int = 20
|
|
226
226
|
next_page: bool = False
|
|
227
227
|
min_score: float = Field(
|
|
228
228
|
title="Minimum score",
|
|
229
|
-
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
229
|
+
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
230
230
|
)
|
|
231
231
|
|
|
232
232
|
|
|
@@ -246,7 +246,7 @@ class DirectionalRelation(BaseModel):
|
|
|
246
246
|
relation: RelationType
|
|
247
247
|
relation_label: str
|
|
248
248
|
direction: RelationDirection
|
|
249
|
-
metadata:
|
|
249
|
+
metadata: RelationMetadata | None = None
|
|
250
250
|
resource_id: str
|
|
251
251
|
|
|
252
252
|
|
|
@@ -280,23 +280,23 @@ class RelatedEntities(BaseModel):
|
|
|
280
280
|
class ResourceSearchResults(JsonBaseModel):
|
|
281
281
|
"""Search on resource results"""
|
|
282
282
|
|
|
283
|
-
sentences:
|
|
284
|
-
paragraphs:
|
|
285
|
-
relations:
|
|
286
|
-
nodes:
|
|
287
|
-
shards:
|
|
283
|
+
sentences: Sentences | None = None
|
|
284
|
+
paragraphs: Paragraphs | None = None
|
|
285
|
+
relations: Relations | None = None
|
|
286
|
+
nodes: list[dict[str, str]] | None = None
|
|
287
|
+
shards: list[str] | None = None
|
|
288
288
|
|
|
289
289
|
|
|
290
290
|
class KnowledgeboxSearchResults(JsonBaseModel):
|
|
291
291
|
"""Search on knowledgebox results"""
|
|
292
292
|
|
|
293
293
|
resources: dict[str, Resource] = {}
|
|
294
|
-
sentences:
|
|
295
|
-
paragraphs:
|
|
296
|
-
fulltext:
|
|
297
|
-
relations:
|
|
298
|
-
nodes:
|
|
299
|
-
shards:
|
|
294
|
+
sentences: Sentences | None = None
|
|
295
|
+
paragraphs: Paragraphs | None = None
|
|
296
|
+
fulltext: Resources | None = None
|
|
297
|
+
relations: Relations | None = None
|
|
298
|
+
nodes: list[dict[str, str]] | None = None
|
|
299
|
+
shards: list[str] | None = None
|
|
300
300
|
|
|
301
301
|
# TODO: remove on a future major release
|
|
302
302
|
autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
|
|
@@ -306,16 +306,16 @@ class CatalogResponse(BaseModel):
|
|
|
306
306
|
"""Catalog results"""
|
|
307
307
|
|
|
308
308
|
resources: dict[str, Resource] = {}
|
|
309
|
-
fulltext:
|
|
310
|
-
shards:
|
|
309
|
+
fulltext: Resources | None = None
|
|
310
|
+
shards: list[str] | None = None
|
|
311
311
|
|
|
312
312
|
|
|
313
313
|
class KnowledgeboxSuggestResults(JsonBaseModel):
|
|
314
314
|
"""Suggest on resource results"""
|
|
315
315
|
|
|
316
|
-
paragraphs:
|
|
317
|
-
entities:
|
|
318
|
-
shards:
|
|
316
|
+
paragraphs: Paragraphs | None = None
|
|
317
|
+
entities: RelatedEntities | None = None
|
|
318
|
+
shards: list[str] | None = None
|
|
319
319
|
|
|
320
320
|
|
|
321
321
|
class KnowledgeboxCounters(BaseModel):
|
|
@@ -323,7 +323,7 @@ class KnowledgeboxCounters(BaseModel):
|
|
|
323
323
|
paragraphs: int
|
|
324
324
|
fields: int
|
|
325
325
|
sentences: int
|
|
326
|
-
shards:
|
|
326
|
+
shards: list[str] | None = None
|
|
327
327
|
index_size: float = Field(default=0.0, title="Index size (bytes)")
|
|
328
328
|
|
|
329
329
|
|
|
@@ -378,13 +378,13 @@ class ReciprocalRankFusion(_BaseRankFusion):
|
|
|
378
378
|
k: float = Field(
|
|
379
379
|
default=60.0,
|
|
380
380
|
title="RRF k parameter",
|
|
381
|
-
description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
|
|
381
|
+
description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
|
|
382
382
|
)
|
|
383
|
-
window:
|
|
383
|
+
window: int | None = Field(
|
|
384
384
|
default=None,
|
|
385
385
|
le=MAX_RANK_FUSION_WINDOW,
|
|
386
386
|
title="RRF window",
|
|
387
|
-
description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
|
|
387
|
+
description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
|
|
388
388
|
)
|
|
389
389
|
boosting: ReciprocalRankFusionWeights = Field(
|
|
390
390
|
default_factory=ReciprocalRankFusionWeights,
|
|
@@ -395,12 +395,12 @@ Define different weights for each retriever. This allows to assign different pri
|
|
|
395
395
|
The default is 1 for each retriever, which means no extra boost for any of them. Weights below 0 can be used for negative boosting.
|
|
396
396
|
|
|
397
397
|
This kind of boosting can be useful in multilingual search, for example, where keyword search may not give good results and can degrade the final search experience
|
|
398
|
-
""",
|
|
398
|
+
""",
|
|
399
399
|
)
|
|
400
400
|
|
|
401
401
|
|
|
402
402
|
RankFusion = Annotated[
|
|
403
|
-
|
|
403
|
+
ReciprocalRankFusion,
|
|
404
404
|
Field(discriminator="name"),
|
|
405
405
|
]
|
|
406
406
|
|
|
@@ -435,15 +435,15 @@ class _BaseReranker(BaseModel):
|
|
|
435
435
|
|
|
436
436
|
class PredictReranker(_BaseReranker):
|
|
437
437
|
name: Literal[RerankerName.PREDICT_RERANKER] = RerankerName.PREDICT_RERANKER
|
|
438
|
-
window:
|
|
438
|
+
window: int | None = Field(
|
|
439
439
|
default=None,
|
|
440
440
|
le=200,
|
|
441
441
|
title="Reranker window",
|
|
442
|
-
description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
|
|
442
|
+
description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
|
|
443
443
|
)
|
|
444
444
|
|
|
445
445
|
|
|
446
|
-
Reranker = Annotated[
|
|
446
|
+
Reranker = Annotated[PredictReranker, Field(discriminator="name")]
|
|
447
447
|
|
|
448
448
|
|
|
449
449
|
class KnowledgeBoxCount(BaseModel):
|
|
@@ -472,18 +472,18 @@ class SearchParamDefaults:
|
|
|
472
472
|
)
|
|
473
473
|
filters = ParamDefault(
|
|
474
474
|
default=[],
|
|
475
|
-
title="Filters",
|
|
476
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
475
|
+
title="Search Filters",
|
|
476
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
477
477
|
)
|
|
478
478
|
resource_filters = ParamDefault(
|
|
479
479
|
default=[],
|
|
480
480
|
title="Resources filter",
|
|
481
|
-
description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
|
|
481
|
+
description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
|
|
482
482
|
)
|
|
483
483
|
faceted = ParamDefault(
|
|
484
484
|
default=[],
|
|
485
485
|
title="Faceted",
|
|
486
|
-
description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
486
|
+
description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
487
487
|
max_items=50,
|
|
488
488
|
)
|
|
489
489
|
chat_query = ParamDefault(
|
|
@@ -520,12 +520,12 @@ class SearchParamDefaults:
|
|
|
520
520
|
highlight = ParamDefault(
|
|
521
521
|
default=False,
|
|
522
522
|
title="Highlight",
|
|
523
|
-
description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
|
|
523
|
+
description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
|
|
524
524
|
)
|
|
525
525
|
with_duplicates = ParamDefault(
|
|
526
526
|
default=False,
|
|
527
527
|
title="With duplicate paragraphs",
|
|
528
|
-
description="Whether to return duplicate paragraphs on the same document",
|
|
528
|
+
description="Whether to return duplicate paragraphs on the same document",
|
|
529
529
|
)
|
|
530
530
|
with_status = ParamDefault(
|
|
531
531
|
default=None,
|
|
@@ -535,7 +535,7 @@ class SearchParamDefaults:
|
|
|
535
535
|
with_synonyms = ParamDefault(
|
|
536
536
|
default=False,
|
|
537
537
|
title="With custom synonyms",
|
|
538
|
-
description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
|
|
538
|
+
description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
|
|
539
539
|
)
|
|
540
540
|
sort_order = ParamDefault(
|
|
541
541
|
default=SortOrder.DESC,
|
|
@@ -565,12 +565,12 @@ class SearchParamDefaults:
|
|
|
565
565
|
reranker = ParamDefault(
|
|
566
566
|
default=RerankerName.PREDICT_RERANKER,
|
|
567
567
|
title="Reranker",
|
|
568
|
-
description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
|
|
568
|
+
description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
|
|
569
569
|
)
|
|
570
570
|
debug = ParamDefault(
|
|
571
571
|
default=False,
|
|
572
572
|
title="Debug mode",
|
|
573
|
-
description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
|
|
573
|
+
description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
|
|
574
574
|
)
|
|
575
575
|
show = ParamDefault(
|
|
576
576
|
default=[ResourceProperties.BASIC],
|
|
@@ -591,27 +591,27 @@ class SearchParamDefaults:
|
|
|
591
591
|
range_creation_start = ParamDefault(
|
|
592
592
|
default=None,
|
|
593
593
|
title="Resource creation range start",
|
|
594
|
-
description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
594
|
+
description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
595
595
|
)
|
|
596
596
|
range_creation_end = ParamDefault(
|
|
597
597
|
default=None,
|
|
598
598
|
title="Resource creation range end",
|
|
599
|
-
description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
599
|
+
description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
600
600
|
)
|
|
601
601
|
range_modification_start = ParamDefault(
|
|
602
602
|
default=None,
|
|
603
603
|
title="Resource modification range start",
|
|
604
|
-
description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
604
|
+
description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
605
605
|
)
|
|
606
606
|
range_modification_end = ParamDefault(
|
|
607
607
|
default=None,
|
|
608
608
|
title="Resource modification range end",
|
|
609
|
-
description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
609
|
+
description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
610
610
|
)
|
|
611
611
|
vector = ParamDefault(
|
|
612
612
|
default=None,
|
|
613
613
|
title="Search Vector",
|
|
614
|
-
description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
|
|
614
|
+
description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
|
|
615
615
|
)
|
|
616
616
|
vectorset = ParamDefault(
|
|
617
617
|
default=None,
|
|
@@ -627,12 +627,12 @@ class SearchParamDefaults:
|
|
|
627
627
|
chat_history = ParamDefault(
|
|
628
628
|
default=None,
|
|
629
629
|
title="Chat history",
|
|
630
|
-
description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
|
|
630
|
+
description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
|
|
631
631
|
)
|
|
632
632
|
chat_features = ParamDefault(
|
|
633
633
|
default=[ChatOptions.SEMANTIC, ChatOptions.KEYWORD],
|
|
634
634
|
title="Chat features",
|
|
635
|
-
description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
|
|
635
|
+
description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
|
|
636
636
|
)
|
|
637
637
|
suggest_features = ParamDefault(
|
|
638
638
|
default=[
|
|
@@ -645,17 +645,17 @@ class SearchParamDefaults:
|
|
|
645
645
|
security = ParamDefault(
|
|
646
646
|
default=None,
|
|
647
647
|
title="Security",
|
|
648
|
-
description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
|
|
648
|
+
description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
|
|
649
649
|
)
|
|
650
650
|
security_groups = ParamDefault(
|
|
651
651
|
default=[],
|
|
652
652
|
title="Security groups",
|
|
653
|
-
description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
|
|
653
|
+
description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
|
|
654
654
|
)
|
|
655
655
|
rephrase = ParamDefault(
|
|
656
656
|
default=False,
|
|
657
657
|
title="Rephrase query consuming LLMs",
|
|
658
|
-
description="Rephrase query consuming LLMs - it will make the query slower",
|
|
658
|
+
description="Rephrase query consuming LLMs - it will make the query slower",
|
|
659
659
|
)
|
|
660
660
|
prefer_markdown = ParamDefault(
|
|
661
661
|
default=False,
|
|
@@ -695,10 +695,10 @@ class SearchParamDefaults:
|
|
|
695
695
|
|
|
696
696
|
|
|
697
697
|
class Filter(BaseModel):
|
|
698
|
-
all:
|
|
699
|
-
any:
|
|
700
|
-
none:
|
|
701
|
-
not_all:
|
|
698
|
+
all: list[str] | None = Field(default=None, min_length=1)
|
|
699
|
+
any: list[str] | None = Field(default=None, min_length=1)
|
|
700
|
+
none: list[str] | None = Field(default=None, min_length=1)
|
|
701
|
+
not_all: list[str] | None = Field(default=None, min_length=1)
|
|
702
702
|
|
|
703
703
|
@model_validator(mode="after")
|
|
704
704
|
def validate_filter(self) -> Self:
|
|
@@ -740,19 +740,19 @@ class CatalogQuery(BaseModel):
|
|
|
740
740
|
|
|
741
741
|
|
|
742
742
|
class CatalogRequest(BaseModel):
|
|
743
|
-
query:
|
|
743
|
+
query: str | CatalogQuery = ParamDefault(
|
|
744
744
|
default="",
|
|
745
|
-
title="Query",
|
|
745
|
+
title="Catalog Request Query",
|
|
746
746
|
description="The query to search for",
|
|
747
747
|
).to_pydantic_field()
|
|
748
|
-
filter_expression:
|
|
748
|
+
filter_expression: CatalogFilterExpression | None = (
|
|
749
749
|
SearchParamDefaults.catalog_filter_expression.to_pydantic_field()
|
|
750
750
|
)
|
|
751
751
|
faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
|
|
752
|
-
sort:
|
|
752
|
+
sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
|
|
753
753
|
page_number: int = SearchParamDefaults.catalog_page_number.to_pydantic_field()
|
|
754
754
|
page_size: int = SearchParamDefaults.catalog_page_size.to_pydantic_field()
|
|
755
|
-
hidden:
|
|
755
|
+
hidden: bool | None = SearchParamDefaults.hidden.to_pydantic_field()
|
|
756
756
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field(
|
|
757
757
|
default=[ResourceProperties.BASIC, ResourceProperties.ERRORS]
|
|
758
758
|
)
|
|
@@ -760,32 +760,30 @@ class CatalogRequest(BaseModel):
|
|
|
760
760
|
debug: SkipJsonSchema[bool] = SearchParamDefaults.debug.to_pydantic_field()
|
|
761
761
|
|
|
762
762
|
# Deprecated filter parameters
|
|
763
|
-
filters:
|
|
763
|
+
filters: list[str] | list[Filter] = Field(
|
|
764
764
|
default=[],
|
|
765
|
-
title="Filters",
|
|
766
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
765
|
+
title="Catalog Filters",
|
|
766
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
767
767
|
deprecated="Use filter_expression instead",
|
|
768
768
|
)
|
|
769
|
-
with_status:
|
|
769
|
+
with_status: ResourceProcessingStatus | None = Field(
|
|
770
770
|
default=None,
|
|
771
771
|
title="With processing status",
|
|
772
772
|
description="Filter results by resource processing status",
|
|
773
773
|
deprecated="Use filter_expression instead",
|
|
774
774
|
)
|
|
775
|
-
range_creation_start:
|
|
776
|
-
|
|
777
|
-
deprecated="Use filter_expression instead",
|
|
778
|
-
)
|
|
775
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field(
|
|
776
|
+
deprecated="Use filter_expression instead",
|
|
779
777
|
)
|
|
780
|
-
range_creation_end:
|
|
778
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field(
|
|
781
779
|
deprecated="Use filter_expression instead",
|
|
782
780
|
)
|
|
783
|
-
range_modification_start:
|
|
781
|
+
range_modification_start: DateTime | None = (
|
|
784
782
|
SearchParamDefaults.range_modification_start.to_pydantic_field(
|
|
785
783
|
deprecated="Use filter_expression instead",
|
|
786
784
|
)
|
|
787
785
|
)
|
|
788
|
-
range_modification_end:
|
|
786
|
+
range_modification_end: DateTime | None = (
|
|
789
787
|
SearchParamDefaults.range_modification_end.to_pydantic_field(
|
|
790
788
|
deprecated="Use filter_expression instead",
|
|
791
789
|
)
|
|
@@ -798,15 +796,15 @@ class CatalogRequest(BaseModel):
|
|
|
798
796
|
|
|
799
797
|
|
|
800
798
|
class MinScore(BaseModel):
|
|
801
|
-
semantic:
|
|
799
|
+
semantic: float | None = Field(
|
|
802
800
|
default=None,
|
|
803
801
|
title="Minimum semantic score",
|
|
804
|
-
description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
802
|
+
description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
805
803
|
)
|
|
806
804
|
bm25: float = Field(
|
|
807
805
|
default=0,
|
|
808
806
|
title="Minimum bm25 score",
|
|
809
|
-
description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
807
|
+
description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
810
808
|
ge=0,
|
|
811
809
|
)
|
|
812
810
|
|
|
@@ -820,7 +818,7 @@ AUDIT_METADATA_MAX_BYTES = 1024 * 10 # 10KB
|
|
|
820
818
|
|
|
821
819
|
|
|
822
820
|
class AuditMetadataBase(BaseModel):
|
|
823
|
-
audit_metadata:
|
|
821
|
+
audit_metadata: dict[str, str] | None = Field(
|
|
824
822
|
default=None,
|
|
825
823
|
title="Audit metadata",
|
|
826
824
|
description=(
|
|
@@ -844,29 +842,27 @@ class AuditMetadataBase(BaseModel):
|
|
|
844
842
|
|
|
845
843
|
class BaseSearchRequest(AuditMetadataBase):
|
|
846
844
|
query: str = SearchParamDefaults.query.to_pydantic_field()
|
|
847
|
-
filter_expression:
|
|
845
|
+
filter_expression: FilterExpression | None = (
|
|
848
846
|
SearchParamDefaults.filter_expression.to_pydantic_field()
|
|
849
847
|
)
|
|
850
848
|
fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
|
|
851
|
-
filters:
|
|
849
|
+
filters: list[str] | list[Filter] = Field(
|
|
852
850
|
default=[],
|
|
853
|
-
title="Filters",
|
|
854
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
851
|
+
title="Search Filters",
|
|
852
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
855
853
|
)
|
|
856
854
|
top_k: int = SearchParamDefaults.top_k.to_pydantic_field()
|
|
857
|
-
min_score:
|
|
855
|
+
min_score: float | MinScore | None = Field(
|
|
858
856
|
default=None,
|
|
859
857
|
title="Minimum score",
|
|
860
|
-
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
861
|
-
)
|
|
862
|
-
range_creation_start: Optional[DateTime] = (
|
|
863
|
-
SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
858
|
+
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
864
859
|
)
|
|
865
|
-
|
|
866
|
-
|
|
860
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
861
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
862
|
+
range_modification_start: DateTime | None = (
|
|
867
863
|
SearchParamDefaults.range_modification_start.to_pydantic_field()
|
|
868
864
|
)
|
|
869
|
-
range_modification_end:
|
|
865
|
+
range_modification_end: DateTime | None = (
|
|
870
866
|
SearchParamDefaults.range_modification_end.to_pydantic_field()
|
|
871
867
|
)
|
|
872
868
|
debug: bool = SearchParamDefaults.debug.to_pydantic_field()
|
|
@@ -874,15 +870,15 @@ class BaseSearchRequest(AuditMetadataBase):
|
|
|
874
870
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
|
|
875
871
|
field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
|
|
876
872
|
extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
|
|
877
|
-
vector:
|
|
878
|
-
vectorset:
|
|
873
|
+
vector: list[float] | None = SearchParamDefaults.vector.to_pydantic_field()
|
|
874
|
+
vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
|
|
879
875
|
with_duplicates: bool = SearchParamDefaults.with_duplicates.to_pydantic_field()
|
|
880
876
|
with_synonyms: bool = SearchParamDefaults.with_synonyms.to_pydantic_field()
|
|
881
877
|
# autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
|
|
882
878
|
# avoid breaking changes in the python sdks. Please remove on a future major release.
|
|
883
879
|
autofilter: SkipJsonSchema[bool] = False
|
|
884
880
|
resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
|
|
885
|
-
security:
|
|
881
|
+
security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
|
|
886
882
|
show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
|
|
887
883
|
|
|
888
884
|
rephrase: bool = Field(
|
|
@@ -892,7 +888,7 @@ class BaseSearchRequest(AuditMetadataBase):
|
|
|
892
888
|
),
|
|
893
889
|
)
|
|
894
890
|
|
|
895
|
-
rephrase_prompt:
|
|
891
|
+
rephrase_prompt: str | None = Field(
|
|
896
892
|
default=None,
|
|
897
893
|
title="Rephrase",
|
|
898
894
|
description=(
|
|
@@ -911,7 +907,7 @@ Please return ONLY the question without any explanation. Just the rephrased ques
|
|
|
911
907
|
Please return ONLY the question without any explanation.""",
|
|
912
908
|
],
|
|
913
909
|
)
|
|
914
|
-
query_image:
|
|
910
|
+
query_image: Image | None = Field(
|
|
915
911
|
default=None,
|
|
916
912
|
title="Query image",
|
|
917
913
|
description="Image that will be used together with the query text for retrieval.",
|
|
@@ -931,6 +927,13 @@ Please return ONLY the question without any explanation. Just the rephrased ques
|
|
|
931
927
|
values["top_k"] = SearchParamDefaults.top_k.default
|
|
932
928
|
return values
|
|
933
929
|
|
|
930
|
+
@field_validator("resource_filters", mode="after")
|
|
931
|
+
def validate_resource_filters(cls, values: list[str]) -> list[str]:
|
|
932
|
+
if values is not None:
|
|
933
|
+
for v in values:
|
|
934
|
+
_validate_resource_filter(v)
|
|
935
|
+
return values
|
|
936
|
+
|
|
934
937
|
|
|
935
938
|
class SearchRequest(BaseSearchRequest):
|
|
936
939
|
features: list[SearchOptions] = SearchParamDefaults.search_features.to_pydantic_field(
|
|
@@ -941,7 +944,7 @@ class SearchRequest(BaseSearchRequest):
|
|
|
941
944
|
]
|
|
942
945
|
)
|
|
943
946
|
faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
|
|
944
|
-
sort:
|
|
947
|
+
sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
|
|
945
948
|
offset: int = SearchParamDefaults.offset.to_pydantic_field()
|
|
946
949
|
|
|
947
950
|
@field_validator("faceted")
|
|
@@ -962,7 +965,7 @@ class SearchRequest(BaseSearchRequest):
|
|
|
962
965
|
|
|
963
966
|
@field_validator("sort", mode="after")
|
|
964
967
|
@classmethod
|
|
965
|
-
def sorting_by_title_not_supported(cls, value:
|
|
968
|
+
def sorting_by_title_not_supported(cls, value: SortOptions | None) -> SortOptions | None:
|
|
966
969
|
if value and value.field == SortField.TITLE:
|
|
967
970
|
raise ValueError("sorting by title not supported in /search")
|
|
968
971
|
|
|
@@ -988,19 +991,19 @@ class UserPrompt(BaseModel):
|
|
|
988
991
|
|
|
989
992
|
|
|
990
993
|
class MaxTokens(BaseModel):
|
|
991
|
-
context:
|
|
994
|
+
context: int | None = Field(
|
|
992
995
|
default=None,
|
|
993
996
|
title="Maximum context tokens",
|
|
994
997
|
description="Use to limit the amount of tokens used in the LLM context",
|
|
995
998
|
)
|
|
996
|
-
answer:
|
|
999
|
+
answer: int | None = Field(
|
|
997
1000
|
default=None,
|
|
998
1001
|
title="Maximum answer tokens",
|
|
999
1002
|
description="Use to limit the amount of tokens used in the LLM answer",
|
|
1000
1003
|
)
|
|
1001
1004
|
|
|
1002
1005
|
|
|
1003
|
-
def parse_max_tokens(max_tokens:
|
|
1006
|
+
def parse_max_tokens(max_tokens: int | MaxTokens | None) -> MaxTokens | None:
|
|
1004
1007
|
if isinstance(max_tokens, int):
|
|
1005
1008
|
# If the max_tokens is an integer, it is interpreted as the max_tokens value for the generated answer.
|
|
1006
1009
|
# The max tokens for the context is set to None to use the default value for the model (comes in the
|
|
@@ -1046,7 +1049,7 @@ class ChatModel(BaseModel):
|
|
|
1046
1049
|
question: str = Field(description="Question to ask the generative model")
|
|
1047
1050
|
user_id: str
|
|
1048
1051
|
retrieval: bool = True
|
|
1049
|
-
system:
|
|
1052
|
+
system: str | None = Field(
|
|
1050
1053
|
default=None,
|
|
1051
1054
|
title="System prompt",
|
|
1052
1055
|
description="Optional system prompt input by the user",
|
|
@@ -1055,9 +1058,9 @@ class ChatModel(BaseModel):
|
|
|
1055
1058
|
default={},
|
|
1056
1059
|
description="The information retrieval context for the current query",
|
|
1057
1060
|
)
|
|
1058
|
-
query_context_order:
|
|
1061
|
+
query_context_order: dict[str, int] | None = Field(
|
|
1059
1062
|
default=None,
|
|
1060
|
-
description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
|
|
1063
|
+
description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
|
|
1061
1064
|
)
|
|
1062
1065
|
chat_history: list[ChatContextMessage] = Field(
|
|
1063
1066
|
default=[], description="The chat conversation history"
|
|
@@ -1066,29 +1069,29 @@ class ChatModel(BaseModel):
|
|
|
1066
1069
|
default=True,
|
|
1067
1070
|
description="Truncate the chat context in case it doesn't fit the generative input",
|
|
1068
1071
|
)
|
|
1069
|
-
user_prompt:
|
|
1072
|
+
user_prompt: UserPrompt | None = Field(
|
|
1070
1073
|
default=None, description="Optional custom prompt input by the user"
|
|
1071
1074
|
)
|
|
1072
|
-
citations:
|
|
1075
|
+
citations: bool | None | CitationsType = Field(
|
|
1073
1076
|
default=None,
|
|
1074
1077
|
description="Whether to include citations in the response. "
|
|
1075
1078
|
"If set to None or False, no citations will be computed. "
|
|
1076
1079
|
"If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
|
|
1077
1080
|
"If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
|
|
1078
1081
|
)
|
|
1079
|
-
citation_threshold:
|
|
1082
|
+
citation_threshold: float | None = Field(
|
|
1080
1083
|
default=None,
|
|
1081
1084
|
description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
|
|
1082
1085
|
ge=0.0,
|
|
1083
1086
|
le=1.0,
|
|
1084
1087
|
)
|
|
1085
|
-
generative_model:
|
|
1088
|
+
generative_model: str | None = Field(
|
|
1086
1089
|
default=None,
|
|
1087
1090
|
title="Generative model",
|
|
1088
|
-
description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1091
|
+
description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1089
1092
|
)
|
|
1090
1093
|
|
|
1091
|
-
max_tokens:
|
|
1094
|
+
max_tokens: int | None = Field(default=None, description="Maximum characters to generate")
|
|
1092
1095
|
|
|
1093
1096
|
query_context_images: dict[str, Image] = Field(
|
|
1094
1097
|
default={},
|
|
@@ -1099,7 +1102,7 @@ class ChatModel(BaseModel):
|
|
|
1099
1102
|
default=False,
|
|
1100
1103
|
description="If set to true, the response will be in markdown format",
|
|
1101
1104
|
)
|
|
1102
|
-
json_schema:
|
|
1105
|
+
json_schema: dict[str, Any] | None = Field(
|
|
1103
1106
|
default=None,
|
|
1104
1107
|
description="The JSON schema to use for the generative model answers",
|
|
1105
1108
|
)
|
|
@@ -1107,17 +1110,18 @@ class ChatModel(BaseModel):
|
|
|
1107
1110
|
default=False,
|
|
1108
1111
|
description="Whether to reorder the query context based on a reranker",
|
|
1109
1112
|
)
|
|
1110
|
-
top_k:
|
|
1113
|
+
top_k: int | None = Field(default=None, description="Number of best elements to get from")
|
|
1111
1114
|
|
|
1112
1115
|
format_prompt: bool = Field(
|
|
1113
1116
|
default=True,
|
|
1114
|
-
description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
|
|
1117
|
+
description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
|
|
1115
1118
|
)
|
|
1116
|
-
seed:
|
|
1119
|
+
seed: int | None = Field(
|
|
1117
1120
|
default=None,
|
|
1118
1121
|
description="Seed use for the generative model for a deterministic output.",
|
|
1119
1122
|
)
|
|
1120
|
-
reasoning:
|
|
1123
|
+
reasoning: Reasoning | bool = Field(
|
|
1124
|
+
title="Reasoning options",
|
|
1121
1125
|
default=False,
|
|
1122
1126
|
description=(
|
|
1123
1127
|
"Reasoning options for the generative model. "
|
|
@@ -1131,26 +1135,25 @@ class RephraseModel(BaseModel):
|
|
|
1131
1135
|
chat_history: list[ChatContextMessage] = []
|
|
1132
1136
|
user_id: str
|
|
1133
1137
|
user_context: list[str] = []
|
|
1134
|
-
generative_model:
|
|
1138
|
+
generative_model: str | None = Field(
|
|
1135
1139
|
default=None,
|
|
1136
1140
|
title="Generative model",
|
|
1137
|
-
description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1141
|
+
description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1138
1142
|
)
|
|
1139
|
-
chat_history_relevance_threshold:
|
|
1143
|
+
chat_history_relevance_threshold: (
|
|
1140
1144
|
Annotated[
|
|
1141
1145
|
float,
|
|
1142
1146
|
Field(
|
|
1143
1147
|
ge=0.0,
|
|
1144
1148
|
le=1.0,
|
|
1145
|
-
description=
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
"Values in between adjust the sensitivity."
|
|
1150
|
-
),
|
|
1149
|
+
description="Threshold to determine if the past chat history is relevant to rephrase the user's question. "
|
|
1150
|
+
"0 - Always treat previous messages as relevant (always rephrase)."
|
|
1151
|
+
"1 - Always treat previous messages as irrelevant (never rephrase)."
|
|
1152
|
+
"Values in between adjust the sensitivity.",
|
|
1151
1153
|
),
|
|
1152
1154
|
]
|
|
1153
|
-
|
|
1155
|
+
| None
|
|
1156
|
+
) = None
|
|
1154
1157
|
|
|
1155
1158
|
|
|
1156
1159
|
class RagStrategyName:
|
|
@@ -1234,13 +1237,13 @@ class FullResourceApplyTo(BaseModel):
|
|
|
1234
1237
|
exclude: list[str] = Field(
|
|
1235
1238
|
default_factory=list,
|
|
1236
1239
|
title="Labels to exclude from full resource expansion",
|
|
1237
|
-
description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
|
|
1240
|
+
description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
|
|
1238
1241
|
)
|
|
1239
1242
|
|
|
1240
1243
|
|
|
1241
1244
|
class FullResourceStrategy(RagStrategy):
|
|
1242
1245
|
name: Literal["full_resource"] = "full_resource"
|
|
1243
|
-
count:
|
|
1246
|
+
count: int | None = Field(
|
|
1244
1247
|
default=None,
|
|
1245
1248
|
title="Count",
|
|
1246
1249
|
description="Maximum number of full documents to retrieve. If not specified, all matching documents are retrieved.",
|
|
@@ -1251,7 +1254,7 @@ class FullResourceStrategy(RagStrategy):
|
|
|
1251
1254
|
title="Include remaining text blocks",
|
|
1252
1255
|
description="Whether to include the remaining text blocks after the maximum number of resources has been reached.",
|
|
1253
1256
|
)
|
|
1254
|
-
apply_to:
|
|
1257
|
+
apply_to: FullResourceApplyTo | None = Field(
|
|
1255
1258
|
default=None,
|
|
1256
1259
|
title="Apply to certain resources only",
|
|
1257
1260
|
description="Define which resources to exclude from serialization",
|
|
@@ -1357,7 +1360,7 @@ class PreQuery(BaseModel):
|
|
|
1357
1360
|
),
|
|
1358
1361
|
ge=0,
|
|
1359
1362
|
)
|
|
1360
|
-
id:
|
|
1363
|
+
id: str | None = Field(
|
|
1361
1364
|
default=None,
|
|
1362
1365
|
title="Prequery id",
|
|
1363
1366
|
min_length=1,
|
|
@@ -1491,7 +1494,7 @@ class TableImageStrategy(ImageRagStrategy):
|
|
|
1491
1494
|
|
|
1492
1495
|
class PageImageStrategy(ImageRagStrategy):
|
|
1493
1496
|
name: Literal["page_image"] = "page_image"
|
|
1494
|
-
count:
|
|
1497
|
+
count: int | None = Field(
|
|
1495
1498
|
default=None,
|
|
1496
1499
|
title="Count",
|
|
1497
1500
|
description="Maximum number of page images to retrieve. By default, at most 5 images are retrieved.",
|
|
@@ -1503,20 +1506,18 @@ class ParagraphImageStrategy(ImageRagStrategy):
|
|
|
1503
1506
|
|
|
1504
1507
|
|
|
1505
1508
|
RagStrategies = Annotated[
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
GraphStrategy,
|
|
1515
|
-
],
|
|
1509
|
+
FieldExtensionStrategy
|
|
1510
|
+
| FullResourceStrategy
|
|
1511
|
+
| HierarchyResourceStrategy
|
|
1512
|
+
| NeighbouringParagraphsStrategy
|
|
1513
|
+
| MetadataExtensionStrategy
|
|
1514
|
+
| ConversationalStrategy
|
|
1515
|
+
| PreQueriesStrategy
|
|
1516
|
+
| GraphStrategy,
|
|
1516
1517
|
Field(discriminator="name"),
|
|
1517
1518
|
]
|
|
1518
1519
|
RagImagesStrategies = Annotated[
|
|
1519
|
-
|
|
1520
|
+
PageImageStrategy | ParagraphImageStrategy | TableImageStrategy,
|
|
1520
1521
|
Field(discriminator="name"),
|
|
1521
1522
|
]
|
|
1522
1523
|
PromptContext = dict[str, str]
|
|
@@ -1525,10 +1526,10 @@ PromptContextImages = dict[str, Image]
|
|
|
1525
1526
|
|
|
1526
1527
|
|
|
1527
1528
|
class CustomPrompt(BaseModel):
|
|
1528
|
-
system:
|
|
1529
|
+
system: str | None = Field(
|
|
1529
1530
|
default=None,
|
|
1530
1531
|
title="System prompt",
|
|
1531
|
-
description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
|
|
1532
|
+
description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
|
|
1532
1533
|
min_length=1,
|
|
1533
1534
|
examples=[
|
|
1534
1535
|
"You are a medical assistant, use medical terminology",
|
|
@@ -1537,10 +1538,10 @@ class CustomPrompt(BaseModel):
|
|
|
1537
1538
|
"You are a financial expert, use correct terms",
|
|
1538
1539
|
],
|
|
1539
1540
|
)
|
|
1540
|
-
user:
|
|
1541
|
+
user: str | None = Field(
|
|
1541
1542
|
default=None,
|
|
1542
1543
|
title="User prompt",
|
|
1543
|
-
description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
|
|
1544
|
+
description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
|
|
1544
1545
|
min_length=1,
|
|
1545
1546
|
examples=[
|
|
1546
1547
|
"Taking into account our previous conversation, and this context: {context} answer this {question}",
|
|
@@ -1549,7 +1550,7 @@ class CustomPrompt(BaseModel):
|
|
|
1549
1550
|
"Given this context: {context}. Answer this {question} using the provided context. Please, answer always in French",
|
|
1550
1551
|
],
|
|
1551
1552
|
)
|
|
1552
|
-
rephrase:
|
|
1553
|
+
rephrase: str | None = Field(
|
|
1553
1554
|
default=None,
|
|
1554
1555
|
title="Rephrase",
|
|
1555
1556
|
description=(
|
|
@@ -1579,23 +1580,23 @@ class AskRequest(AuditMetadataBase):
|
|
|
1579
1580
|
le=200,
|
|
1580
1581
|
description="The top most relevant results to fetch at the retrieval step. The maximum number of results allowed is 200.",
|
|
1581
1582
|
)
|
|
1582
|
-
filter_expression:
|
|
1583
|
+
filter_expression: FilterExpression | None = (
|
|
1583
1584
|
SearchParamDefaults.filter_expression.to_pydantic_field()
|
|
1584
1585
|
)
|
|
1585
1586
|
fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
|
|
1586
|
-
filters:
|
|
1587
|
+
filters: list[str] | list[Filter] = Field(
|
|
1587
1588
|
default=[],
|
|
1588
|
-
title="Filters",
|
|
1589
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
1589
|
+
title="Search Filters",
|
|
1590
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
1590
1591
|
)
|
|
1591
|
-
keyword_filters:
|
|
1592
|
+
keyword_filters: list[str] | list[Filter] = Field(
|
|
1592
1593
|
default=[],
|
|
1593
1594
|
title="Keyword filters",
|
|
1594
1595
|
description=(
|
|
1595
1596
|
"List of keyword filter expressions to apply to the retrieval step. "
|
|
1596
1597
|
"The text block search will only be performed on the documents that contain the specified keywords. "
|
|
1597
1598
|
"The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
|
|
1598
|
-
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1599
|
+
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1599
1600
|
),
|
|
1600
1601
|
examples=[
|
|
1601
1602
|
["NLP", "BERT"],
|
|
@@ -1603,43 +1604,39 @@ class AskRequest(AuditMetadataBase):
|
|
|
1603
1604
|
["Friedrich Nietzsche", "Immanuel Kant"],
|
|
1604
1605
|
],
|
|
1605
1606
|
)
|
|
1606
|
-
vectorset:
|
|
1607
|
-
min_score:
|
|
1607
|
+
vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
|
|
1608
|
+
min_score: float | MinScore | None = Field(
|
|
1608
1609
|
default=None,
|
|
1609
1610
|
title="Minimum score",
|
|
1610
|
-
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
1611
|
+
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
1611
1612
|
)
|
|
1612
1613
|
features: list[ChatOptions] = SearchParamDefaults.chat_features.to_pydantic_field()
|
|
1613
|
-
range_creation_start:
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
1617
|
-
range_modification_start: Optional[DateTime] = (
|
|
1614
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
1615
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
1616
|
+
range_modification_start: DateTime | None = (
|
|
1618
1617
|
SearchParamDefaults.range_modification_start.to_pydantic_field()
|
|
1619
1618
|
)
|
|
1620
|
-
range_modification_end:
|
|
1619
|
+
range_modification_end: DateTime | None = (
|
|
1621
1620
|
SearchParamDefaults.range_modification_end.to_pydantic_field()
|
|
1622
1621
|
)
|
|
1623
1622
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
|
|
1624
1623
|
field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
|
|
1625
1624
|
extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
|
|
1626
|
-
context:
|
|
1627
|
-
chat_history:
|
|
1628
|
-
|
|
1629
|
-
)
|
|
1630
|
-
extra_context: Optional[list[str]] = Field(
|
|
1625
|
+
context: list[ChatContextMessage] | None = SearchParamDefaults.chat_context.to_pydantic_field()
|
|
1626
|
+
chat_history: list[ChatContextMessage] | None = SearchParamDefaults.chat_history.to_pydantic_field()
|
|
1627
|
+
extra_context: list[str] | None = Field(
|
|
1631
1628
|
default=None,
|
|
1632
1629
|
title="Extra query context",
|
|
1633
1630
|
description="""Additional context that is added to the retrieval context sent to the LLM.
|
|
1634
1631
|
It allows extending the chat feature with content that may not be in the Knowledge Box.""",
|
|
1635
1632
|
)
|
|
1636
|
-
extra_context_images:
|
|
1633
|
+
extra_context_images: list[Image] | None = Field(
|
|
1637
1634
|
default=None,
|
|
1638
1635
|
title="Extra query context images",
|
|
1639
1636
|
description="""Additional images added to the retrieval context sent to the LLM."
|
|
1640
1637
|
It allows extending the chat feature with content that may not be in the Knowledge Box.""",
|
|
1641
1638
|
)
|
|
1642
|
-
query_image:
|
|
1639
|
+
query_image: Image | None = Field(
|
|
1643
1640
|
default=None,
|
|
1644
1641
|
title="Query image",
|
|
1645
1642
|
description="Image that will be used together with the query text for retrieval and then sent to the LLM as part of the context. "
|
|
@@ -1652,27 +1649,27 @@ class AskRequest(AuditMetadataBase):
|
|
|
1652
1649
|
|
|
1653
1650
|
highlight: bool = SearchParamDefaults.highlight.to_pydantic_field()
|
|
1654
1651
|
resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
|
|
1655
|
-
prompt:
|
|
1652
|
+
prompt: str | CustomPrompt | None = Field(
|
|
1656
1653
|
default=None,
|
|
1657
1654
|
title="Prompts",
|
|
1658
|
-
description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
|
|
1655
|
+
description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
|
|
1659
1656
|
)
|
|
1660
|
-
rank_fusion:
|
|
1661
|
-
reranker:
|
|
1662
|
-
citations:
|
|
1657
|
+
rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
|
|
1658
|
+
reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
|
|
1659
|
+
citations: bool | None | CitationsType = Field(
|
|
1663
1660
|
default=None,
|
|
1664
1661
|
description="Whether to include citations in the response. "
|
|
1665
1662
|
"If set to None or False, no citations will be computed. "
|
|
1666
1663
|
"If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
|
|
1667
1664
|
"If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
|
|
1668
1665
|
)
|
|
1669
|
-
citation_threshold:
|
|
1666
|
+
citation_threshold: float | None = Field(
|
|
1670
1667
|
default=None,
|
|
1671
1668
|
description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
|
|
1672
1669
|
ge=0.0,
|
|
1673
1670
|
le=1.0,
|
|
1674
1671
|
)
|
|
1675
|
-
security:
|
|
1672
|
+
security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
|
|
1676
1673
|
show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
|
|
1677
1674
|
rag_strategies: list[RagStrategies] = Field(
|
|
1678
1675
|
default=[],
|
|
@@ -1737,21 +1734,21 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1737
1734
|
)
|
|
1738
1735
|
debug: bool = SearchParamDefaults.debug.to_pydantic_field()
|
|
1739
1736
|
|
|
1740
|
-
generative_model:
|
|
1737
|
+
generative_model: str | None = Field(
|
|
1741
1738
|
default=None,
|
|
1742
1739
|
title="Generative model",
|
|
1743
|
-
description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1740
|
+
description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1744
1741
|
)
|
|
1745
|
-
generative_model_seed:
|
|
1742
|
+
generative_model_seed: int | None = Field(
|
|
1746
1743
|
default=None,
|
|
1747
1744
|
title="Seed for the generative model",
|
|
1748
1745
|
description="The seed to use for the generative model for deterministic generation. Only supported by some models.",
|
|
1749
1746
|
)
|
|
1750
1747
|
|
|
1751
|
-
max_tokens:
|
|
1748
|
+
max_tokens: int | MaxTokens | None = Field(
|
|
1752
1749
|
default=None,
|
|
1753
1750
|
title="Maximum LLM tokens to use for the request",
|
|
1754
|
-
description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
|
|
1751
|
+
description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
|
|
1755
1752
|
)
|
|
1756
1753
|
|
|
1757
1754
|
rephrase: bool = Field(
|
|
@@ -1760,7 +1757,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1760
1757
|
"Rephrase the query for a more efficient retrieval. This will consume LLM tokens and make the request slower."
|
|
1761
1758
|
),
|
|
1762
1759
|
)
|
|
1763
|
-
chat_history_relevance_threshold:
|
|
1760
|
+
chat_history_relevance_threshold: float | None = Field(
|
|
1764
1761
|
default=None,
|
|
1765
1762
|
ge=0.0,
|
|
1766
1763
|
le=1.0,
|
|
@@ -1778,7 +1775,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1778
1775
|
description="If set to true, the response will be in markdown format",
|
|
1779
1776
|
)
|
|
1780
1777
|
|
|
1781
|
-
answer_json_schema:
|
|
1778
|
+
answer_json_schema: dict[str, Any] | None = Field(
|
|
1782
1779
|
default=None,
|
|
1783
1780
|
title="Answer JSON schema",
|
|
1784
1781
|
description="""Desired JSON schema for the LLM answer.
|
|
@@ -1794,13 +1791,14 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
|
|
|
1794
1791
|
description="Whether to generate an answer using the generative model. If set to false, the response will only contain the retrieval results.",
|
|
1795
1792
|
)
|
|
1796
1793
|
|
|
1797
|
-
search_configuration:
|
|
1794
|
+
search_configuration: str | None = Field(
|
|
1798
1795
|
default=None,
|
|
1799
1796
|
description="Load ask parameters from this configuration. Parameters in the request override parameters from the configuration.",
|
|
1800
1797
|
)
|
|
1801
1798
|
|
|
1802
|
-
reasoning:
|
|
1799
|
+
reasoning: Reasoning | bool = Field(
|
|
1803
1800
|
default=False,
|
|
1801
|
+
title="Reasoning options",
|
|
1804
1802
|
description=(
|
|
1805
1803
|
"Reasoning options for the generative model. "
|
|
1806
1804
|
"Set to True to enable default reasoning, False to disable, or provide a Reasoning object for custom options."
|
|
@@ -1860,6 +1858,13 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
|
|
|
1860
1858
|
self.context = None
|
|
1861
1859
|
return self
|
|
1862
1860
|
|
|
1861
|
+
@field_validator("resource_filters", mode="after")
|
|
1862
|
+
def validate_resource_filters(cls, values: list[str]) -> list[str]:
|
|
1863
|
+
if values is not None:
|
|
1864
|
+
for v in values:
|
|
1865
|
+
_validate_resource_filter(v)
|
|
1866
|
+
return values
|
|
1867
|
+
|
|
1863
1868
|
|
|
1864
1869
|
# Alias (for backwards compatiblity with testbed)
|
|
1865
1870
|
class ChatRequest(AskRequest):
|
|
@@ -1881,8 +1886,8 @@ class SummarizeModel(BaseModel):
|
|
|
1881
1886
|
"""
|
|
1882
1887
|
|
|
1883
1888
|
resources: dict[str, SummarizeResourceModel] = {}
|
|
1884
|
-
generative_model:
|
|
1885
|
-
user_prompt:
|
|
1889
|
+
generative_model: str | None = None
|
|
1890
|
+
user_prompt: str | None = None
|
|
1886
1891
|
summary_kind: SummaryKind = SummaryKind.SIMPLE
|
|
1887
1892
|
|
|
1888
1893
|
|
|
@@ -1891,13 +1896,13 @@ class SummarizeRequest(BaseModel):
|
|
|
1891
1896
|
Model for the request payload of the summarize endpoint
|
|
1892
1897
|
"""
|
|
1893
1898
|
|
|
1894
|
-
generative_model:
|
|
1899
|
+
generative_model: str | None = Field(
|
|
1895
1900
|
default=None,
|
|
1896
1901
|
title="Generative model",
|
|
1897
|
-
description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
|
|
1902
|
+
description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
|
|
1898
1903
|
)
|
|
1899
1904
|
|
|
1900
|
-
user_prompt:
|
|
1905
|
+
user_prompt: str | None = Field(
|
|
1901
1906
|
default=None,
|
|
1902
1907
|
title="User prompt",
|
|
1903
1908
|
description="Optional custom prompt input by the user",
|
|
@@ -1908,7 +1913,7 @@ class SummarizeRequest(BaseModel):
|
|
|
1908
1913
|
min_length=1,
|
|
1909
1914
|
max_length=100,
|
|
1910
1915
|
title="Resources",
|
|
1911
|
-
description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
|
|
1916
|
+
description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
|
|
1912
1917
|
)
|
|
1913
1918
|
|
|
1914
1919
|
summary_kind: SummaryKind = Field(
|
|
@@ -1934,20 +1939,20 @@ class SummarizedResponse(BaseModel):
|
|
|
1934
1939
|
title="Summary",
|
|
1935
1940
|
description="Global summary of all resources combined.",
|
|
1936
1941
|
)
|
|
1937
|
-
consumption:
|
|
1942
|
+
consumption: Consumption | None = None
|
|
1938
1943
|
|
|
1939
1944
|
|
|
1940
1945
|
class KnowledgeGraphEntity(BaseModel):
|
|
1941
1946
|
name: str
|
|
1942
|
-
type:
|
|
1943
|
-
subtype:
|
|
1947
|
+
type: RelationNodeType | None = None
|
|
1948
|
+
subtype: str | None = None
|
|
1944
1949
|
|
|
1945
1950
|
|
|
1946
1951
|
class FindRequest(BaseSearchRequest):
|
|
1947
|
-
query_entities: SkipJsonSchema[
|
|
1952
|
+
query_entities: SkipJsonSchema[list[KnowledgeGraphEntity] | None] = Field(
|
|
1948
1953
|
default=None, title="Query entities", description="Entities to use in a knowledge graph search"
|
|
1949
1954
|
)
|
|
1950
|
-
graph_query:
|
|
1955
|
+
graph_query: GraphPathQuery | None = Field(
|
|
1951
1956
|
default=None,
|
|
1952
1957
|
title="Graph query",
|
|
1953
1958
|
description="Query for the knowledge graph. Paths (node-relation-node) extracted from a paragraph_id will be used to extend the results",
|
|
@@ -1958,17 +1963,17 @@ class FindRequest(BaseSearchRequest):
|
|
|
1958
1963
|
FindOptions.SEMANTIC,
|
|
1959
1964
|
]
|
|
1960
1965
|
)
|
|
1961
|
-
rank_fusion:
|
|
1962
|
-
reranker:
|
|
1966
|
+
rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
|
|
1967
|
+
reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
|
|
1963
1968
|
|
|
1964
|
-
keyword_filters:
|
|
1969
|
+
keyword_filters: list[str] | list[Filter] = Field(
|
|
1965
1970
|
default=[],
|
|
1966
1971
|
title="Keyword filters",
|
|
1967
1972
|
description=(
|
|
1968
1973
|
"List of keyword filter expressions to apply to the retrieval step. "
|
|
1969
1974
|
"The text block search will only be performed on the documents that contain the specified keywords. "
|
|
1970
1975
|
"The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
|
|
1971
|
-
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1976
|
+
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1972
1977
|
),
|
|
1973
1978
|
examples=[
|
|
1974
1979
|
["NLP", "BERT"],
|
|
@@ -1977,11 +1982,11 @@ class FindRequest(BaseSearchRequest):
|
|
|
1977
1982
|
],
|
|
1978
1983
|
)
|
|
1979
1984
|
|
|
1980
|
-
search_configuration:
|
|
1985
|
+
search_configuration: str | None = Field(
|
|
1981
1986
|
default=None,
|
|
1982
1987
|
description="Load find parameters from this configuration. Parameters in the request override parameters from the configuration.",
|
|
1983
1988
|
)
|
|
1984
|
-
generative_model:
|
|
1989
|
+
generative_model: str | None = Field(
|
|
1985
1990
|
default=None,
|
|
1986
1991
|
title="Generative model",
|
|
1987
1992
|
description="The generative model used to rephrase the query. If not provided, the model configured for the Knowledge Box is used.",
|
|
@@ -2015,9 +2020,9 @@ class SCORE_TYPE(str, Enum):
|
|
|
2015
2020
|
|
|
2016
2021
|
|
|
2017
2022
|
class FindTextPosition(BaseModel):
|
|
2018
|
-
page_number:
|
|
2019
|
-
start_seconds:
|
|
2020
|
-
end_seconds:
|
|
2023
|
+
page_number: int | None = None
|
|
2024
|
+
start_seconds: list[int] | None = None
|
|
2025
|
+
end_seconds: list[int] | None = None
|
|
2021
2026
|
index: int
|
|
2022
2027
|
start: int
|
|
2023
2028
|
end: int
|
|
@@ -2029,15 +2034,15 @@ class FindParagraph(BaseModel):
|
|
|
2029
2034
|
order: int = Field(default=0, ge=0)
|
|
2030
2035
|
text: str
|
|
2031
2036
|
id: str
|
|
2032
|
-
labels:
|
|
2033
|
-
position:
|
|
2037
|
+
labels: list[str] | None = []
|
|
2038
|
+
position: TextPosition | None = None
|
|
2034
2039
|
fuzzy_result: bool = False
|
|
2035
2040
|
page_with_visual: bool = Field(
|
|
2036
2041
|
default=False,
|
|
2037
2042
|
title="Page where this paragraph belongs is a visual page",
|
|
2038
2043
|
description="This flag informs if the page may have information that has not been extracted",
|
|
2039
2044
|
)
|
|
2040
|
-
reference:
|
|
2045
|
+
reference: str | None = Field(
|
|
2041
2046
|
default=None,
|
|
2042
2047
|
title="Reference to the image that represents this text",
|
|
2043
2048
|
description="Reference to the extracted image that represents this paragraph",
|
|
@@ -2047,7 +2052,7 @@ class FindParagraph(BaseModel):
|
|
|
2047
2052
|
title="Is a table",
|
|
2048
2053
|
description="The referenced image of the paragraph is a table",
|
|
2049
2054
|
)
|
|
2050
|
-
relevant_relations:
|
|
2055
|
+
relevant_relations: Relations | None = Field(
|
|
2051
2056
|
default=None,
|
|
2052
2057
|
title="Relevant relations",
|
|
2053
2058
|
description="Relevant relations from which the paragraph was found, will only be filled if using the Graph RAG Strategy",
|
|
@@ -2062,17 +2067,19 @@ class FindResource(Resource):
|
|
|
2062
2067
|
fields: dict[str, FindField]
|
|
2063
2068
|
|
|
2064
2069
|
def updated_from(self, origin: Resource):
|
|
2070
|
+
find_resource_model_fields = self.model_fields.keys()
|
|
2065
2071
|
for key in origin.model_fields.keys():
|
|
2066
|
-
|
|
2072
|
+
if key in find_resource_model_fields:
|
|
2073
|
+
self.__setattr__(key, getattr(origin, key))
|
|
2067
2074
|
|
|
2068
2075
|
|
|
2069
2076
|
class KnowledgeboxFindResults(JsonBaseModel):
|
|
2070
2077
|
"""Find on knowledgebox results"""
|
|
2071
2078
|
|
|
2072
2079
|
resources: dict[str, FindResource]
|
|
2073
|
-
relations:
|
|
2074
|
-
query:
|
|
2075
|
-
rephrased_query:
|
|
2080
|
+
relations: Relations | None = None
|
|
2081
|
+
query: str | None = Field(default=None, title="Find Results Query")
|
|
2082
|
+
rephrased_query: str | None = None
|
|
2076
2083
|
total: int = 0
|
|
2077
2084
|
page_number: int = Field(
|
|
2078
2085
|
default=0,
|
|
@@ -2086,18 +2093,18 @@ class KnowledgeboxFindResults(JsonBaseModel):
|
|
|
2086
2093
|
default=False,
|
|
2087
2094
|
description="Pagination will be deprecated, please, refer to `top_k` in the request",
|
|
2088
2095
|
)
|
|
2089
|
-
nodes:
|
|
2096
|
+
nodes: list[dict[str, str]] | None = Field(
|
|
2090
2097
|
default=None,
|
|
2091
2098
|
title="Nodes",
|
|
2092
2099
|
description="List of nodes queried in the search",
|
|
2093
2100
|
)
|
|
2094
|
-
shards:
|
|
2101
|
+
shards: list[str] | None = Field(
|
|
2095
2102
|
default=None,
|
|
2096
2103
|
title="Shards",
|
|
2097
2104
|
description="The list of shard replica ids used for the search.",
|
|
2098
2105
|
)
|
|
2099
2106
|
autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
|
|
2100
|
-
min_score:
|
|
2107
|
+
min_score: float | MinScore | None = Field(
|
|
2101
2108
|
default=MinScore(),
|
|
2102
2109
|
title="Minimum result score",
|
|
2103
2110
|
description="The minimum scores that have been used for the search operation.",
|
|
@@ -2105,9 +2112,9 @@ class KnowledgeboxFindResults(JsonBaseModel):
|
|
|
2105
2112
|
best_matches: list[str] = Field(
|
|
2106
2113
|
default=[],
|
|
2107
2114
|
title="Best matches",
|
|
2108
|
-
description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
|
|
2115
|
+
description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
|
|
2109
2116
|
)
|
|
2110
|
-
metrics:
|
|
2117
|
+
metrics: dict[str, Any] | None = Field(
|
|
2111
2118
|
default=None,
|
|
2112
2119
|
title="Metrics",
|
|
2113
2120
|
description=(
|
|
@@ -2125,15 +2132,15 @@ class FeedbackTasks(str, Enum):
|
|
|
2125
2132
|
class FeedbackRequest(BaseModel):
|
|
2126
2133
|
ident: str = Field(
|
|
2127
2134
|
title="Request identifier",
|
|
2128
|
-
description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
|
|
2135
|
+
description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
|
|
2129
2136
|
)
|
|
2130
2137
|
good: bool = Field(title="Good", description="Whether the result was good or not")
|
|
2131
2138
|
task: FeedbackTasks = Field(
|
|
2132
2139
|
title="Task",
|
|
2133
2140
|
description="The task the feedback is for. For now, only `CHAT` task is available",
|
|
2134
2141
|
)
|
|
2135
|
-
feedback:
|
|
2136
|
-
text_block_id:
|
|
2142
|
+
feedback: str | None = Field(None, title="Feedback", description="Feedback text")
|
|
2143
|
+
text_block_id: str | None = Field(None, title="Text block", description="Text block id")
|
|
2137
2144
|
|
|
2138
2145
|
|
|
2139
2146
|
def validate_facets(facets):
|
|
@@ -2184,13 +2191,11 @@ class AugmentedTextBlock(BaseModel):
|
|
|
2184
2191
|
text: str = Field(
|
|
2185
2192
|
description="The text of the augmented text block. It may include additional metadata to enrich the context"
|
|
2186
2193
|
)
|
|
2187
|
-
position:
|
|
2194
|
+
position: TextPosition | None = Field(
|
|
2188
2195
|
default=None,
|
|
2189
2196
|
description="Metadata about the position of the text block in the original document.",
|
|
2190
2197
|
)
|
|
2191
|
-
parent:
|
|
2192
|
-
default=None, description="The parent text block that was augmented for."
|
|
2193
|
-
)
|
|
2198
|
+
parent: str | None = Field(default=None, description="The parent text block that was augmented for.")
|
|
2194
2199
|
augmentation_type: TextBlockAugmentationType = Field(description="Type of augmentation.")
|
|
2195
2200
|
|
|
2196
2201
|
|
|
@@ -2214,12 +2219,12 @@ class AskTokens(BaseModel):
|
|
|
2214
2219
|
title="Output tokens",
|
|
2215
2220
|
description="Number of LLM tokens used for the answer",
|
|
2216
2221
|
)
|
|
2217
|
-
input_nuclia:
|
|
2222
|
+
input_nuclia: float | None = Field(
|
|
2218
2223
|
title="Input Nuclia tokens",
|
|
2219
2224
|
description="Number of Nuclia LLM tokens used for the context in the query",
|
|
2220
2225
|
default=None,
|
|
2221
2226
|
)
|
|
2222
|
-
output_nuclia:
|
|
2227
|
+
output_nuclia: float | None = Field(
|
|
2223
2228
|
title="Output Nuclia tokens",
|
|
2224
2229
|
description="Number of Nuclia LLM tokens used for the answer",
|
|
2225
2230
|
default=None,
|
|
@@ -2227,12 +2232,12 @@ class AskTokens(BaseModel):
|
|
|
2227
2232
|
|
|
2228
2233
|
|
|
2229
2234
|
class AskTimings(BaseModel):
|
|
2230
|
-
generative_first_chunk:
|
|
2235
|
+
generative_first_chunk: float | None = Field(
|
|
2231
2236
|
default=None,
|
|
2232
2237
|
title="Generative first chunk",
|
|
2233
2238
|
description="Time the LLM took to generate the first chunk of the answer",
|
|
2234
2239
|
)
|
|
2235
|
-
generative_total:
|
|
2240
|
+
generative_total: float | None = Field(
|
|
2236
2241
|
default=None,
|
|
2237
2242
|
title="Generative total",
|
|
2238
2243
|
description="Total time the LLM took to generate the answer",
|
|
@@ -2240,12 +2245,12 @@ class AskTimings(BaseModel):
|
|
|
2240
2245
|
|
|
2241
2246
|
|
|
2242
2247
|
class SyncAskMetadata(BaseModel):
|
|
2243
|
-
tokens:
|
|
2248
|
+
tokens: AskTokens | None = Field(
|
|
2244
2249
|
default=None,
|
|
2245
2250
|
title="Tokens",
|
|
2246
2251
|
description="Number of tokens used in the LLM context and answer",
|
|
2247
2252
|
)
|
|
2248
|
-
timings:
|
|
2253
|
+
timings: AskTimings | None = Field(
|
|
2249
2254
|
default=None,
|
|
2250
2255
|
title="Timings",
|
|
2251
2256
|
description="Timings of the generative model",
|
|
@@ -2264,19 +2269,19 @@ class SyncAskResponse(BaseModel):
|
|
|
2264
2269
|
title="Answer",
|
|
2265
2270
|
description="The generative answer to the query",
|
|
2266
2271
|
)
|
|
2267
|
-
reasoning:
|
|
2272
|
+
reasoning: str | None = Field(
|
|
2268
2273
|
default=None,
|
|
2269
|
-
title="Reasoning",
|
|
2270
|
-
description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
|
|
2274
|
+
title="Reasoning steps",
|
|
2275
|
+
description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
|
|
2271
2276
|
)
|
|
2272
|
-
answer_json:
|
|
2277
|
+
answer_json: dict[str, Any] | None = Field(
|
|
2273
2278
|
default=None,
|
|
2274
2279
|
title="Answer JSON",
|
|
2275
|
-
description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
|
|
2280
|
+
description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
|
|
2276
2281
|
)
|
|
2277
2282
|
status: str = Field(
|
|
2278
2283
|
title="Status",
|
|
2279
|
-
description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
|
|
2284
|
+
description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
|
|
2280
2285
|
)
|
|
2281
2286
|
retrieval_results: KnowledgeboxFindResults = Field(
|
|
2282
2287
|
title="Retrieval results",
|
|
@@ -2287,7 +2292,7 @@ class SyncAskResponse(BaseModel):
|
|
|
2287
2292
|
title="Retrieval best matches",
|
|
2288
2293
|
description="Sorted list of best matching text blocks in the retrieval step. This includes the main query and prequeries results, if any.",
|
|
2289
2294
|
)
|
|
2290
|
-
prequeries:
|
|
2295
|
+
prequeries: dict[str, KnowledgeboxFindResults] | None = Field(
|
|
2291
2296
|
default=None,
|
|
2292
2297
|
title="Prequeries",
|
|
2293
2298
|
description="The retrieval results of the prequeries",
|
|
@@ -2295,9 +2300,9 @@ class SyncAskResponse(BaseModel):
|
|
|
2295
2300
|
learning_id: str = Field(
|
|
2296
2301
|
default="",
|
|
2297
2302
|
title="Learning id",
|
|
2298
|
-
description="The id of the learning request. This id can be used to provide feedback on the learning process.",
|
|
2303
|
+
description="The id of the learning request. This id can be used to provide feedback on the learning process.",
|
|
2299
2304
|
)
|
|
2300
|
-
relations:
|
|
2305
|
+
relations: Relations | None = Field(
|
|
2301
2306
|
default=None,
|
|
2302
2307
|
title="Relations",
|
|
2303
2308
|
description="The detected relations of the answer",
|
|
@@ -2312,29 +2317,29 @@ class SyncAskResponse(BaseModel):
|
|
|
2312
2317
|
title="Citation footnote to context",
|
|
2313
2318
|
description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)""",
|
|
2314
2319
|
)
|
|
2315
|
-
augmented_context:
|
|
2320
|
+
augmented_context: AugmentedContext | None = Field(
|
|
2316
2321
|
default=None,
|
|
2317
2322
|
description=(
|
|
2318
2323
|
"Augmented text blocks that were sent to the LLM as part of the RAG strategies "
|
|
2319
2324
|
"applied on the retrieval results in the request."
|
|
2320
2325
|
),
|
|
2321
2326
|
)
|
|
2322
|
-
prompt_context:
|
|
2327
|
+
prompt_context: list[str] | None = Field(
|
|
2323
2328
|
default=None,
|
|
2324
2329
|
title="Prompt context",
|
|
2325
2330
|
description="The prompt context used to generate the answer. Returned only if the debug flag is set to true",
|
|
2326
2331
|
)
|
|
2327
|
-
predict_request:
|
|
2332
|
+
predict_request: dict[str, Any] | None = Field(
|
|
2328
2333
|
default=None,
|
|
2329
2334
|
title="Predict request",
|
|
2330
2335
|
description="The internal predict request used to generate the answer. Returned only if the debug flag is set to true",
|
|
2331
2336
|
)
|
|
2332
|
-
metadata:
|
|
2337
|
+
metadata: SyncAskMetadata | None = Field(
|
|
2333
2338
|
default=None,
|
|
2334
2339
|
title="Metadata",
|
|
2335
|
-
description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
|
|
2340
|
+
description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
|
|
2336
2341
|
)
|
|
2337
|
-
consumption:
|
|
2342
|
+
consumption: Consumption | None = Field(
|
|
2338
2343
|
default=None,
|
|
2339
2344
|
title="Consumption",
|
|
2340
2345
|
description=(
|
|
@@ -2342,12 +2347,12 @@ class SyncAskResponse(BaseModel):
|
|
|
2342
2347
|
" 'X-show-consumption' header is set to true in the request."
|
|
2343
2348
|
),
|
|
2344
2349
|
)
|
|
2345
|
-
error_details:
|
|
2350
|
+
error_details: str | None = Field(
|
|
2346
2351
|
default=None,
|
|
2347
2352
|
title="Error details",
|
|
2348
2353
|
description="Error details message in case there was an error",
|
|
2349
2354
|
)
|
|
2350
|
-
debug:
|
|
2355
|
+
debug: dict[str, Any] | None = Field(
|
|
2351
2356
|
default=None,
|
|
2352
2357
|
title="Debug information",
|
|
2353
2358
|
description=(
|
|
@@ -2437,7 +2442,7 @@ class StatusAskResponseItem(BaseModel):
|
|
|
2437
2442
|
type: Literal["status"] = "status"
|
|
2438
2443
|
code: str
|
|
2439
2444
|
status: str
|
|
2440
|
-
details:
|
|
2445
|
+
details: str | None = None
|
|
2441
2446
|
|
|
2442
2447
|
|
|
2443
2448
|
class ErrorAskResponseItem(BaseModel):
|
|
@@ -2456,22 +2461,22 @@ class DebugAskResponseItem(BaseModel):
|
|
|
2456
2461
|
metrics: dict[str, Any]
|
|
2457
2462
|
|
|
2458
2463
|
|
|
2459
|
-
AskResponseItemType =
|
|
2460
|
-
AnswerAskResponseItem
|
|
2461
|
-
ReasoningAskResponseItem
|
|
2462
|
-
JSONAskResponseItem
|
|
2463
|
-
MetadataAskResponseItem
|
|
2464
|
-
AugmentedContextResponseItem
|
|
2465
|
-
CitationsAskResponseItem
|
|
2466
|
-
FootnoteCitationsAskResponseItem
|
|
2467
|
-
StatusAskResponseItem
|
|
2468
|
-
ErrorAskResponseItem
|
|
2469
|
-
RetrievalAskResponseItem
|
|
2470
|
-
RelationsAskResponseItem
|
|
2471
|
-
DebugAskResponseItem
|
|
2472
|
-
PrequeriesAskResponseItem
|
|
2473
|
-
ConsumptionResponseItem
|
|
2474
|
-
|
|
2464
|
+
AskResponseItemType = (
|
|
2465
|
+
AnswerAskResponseItem
|
|
2466
|
+
| ReasoningAskResponseItem
|
|
2467
|
+
| JSONAskResponseItem
|
|
2468
|
+
| MetadataAskResponseItem
|
|
2469
|
+
| AugmentedContextResponseItem
|
|
2470
|
+
| CitationsAskResponseItem
|
|
2471
|
+
| FootnoteCitationsAskResponseItem
|
|
2472
|
+
| StatusAskResponseItem
|
|
2473
|
+
| ErrorAskResponseItem
|
|
2474
|
+
| RetrievalAskResponseItem
|
|
2475
|
+
| RelationsAskResponseItem
|
|
2476
|
+
| DebugAskResponseItem
|
|
2477
|
+
| PrequeriesAskResponseItem
|
|
2478
|
+
| ConsumptionResponseItem
|
|
2479
|
+
)
|
|
2475
2480
|
|
|
2476
2481
|
|
|
2477
2482
|
class AskResponseItem(BaseModel):
|
|
@@ -2491,7 +2496,7 @@ def parse_custom_prompt(item: AskRequest) -> CustomPrompt:
|
|
|
2491
2496
|
return prompt
|
|
2492
2497
|
|
|
2493
2498
|
|
|
2494
|
-
def parse_rephrase_prompt(item: AskRequest) ->
|
|
2499
|
+
def parse_rephrase_prompt(item: AskRequest) -> str | None:
|
|
2495
2500
|
prompt = parse_custom_prompt(item)
|
|
2496
2501
|
return prompt.rephrase
|
|
2497
2502
|
|
|
@@ -2502,7 +2507,7 @@ FindRequest.model_rebuild()
|
|
|
2502
2507
|
|
|
2503
2508
|
class CatalogFacetsPrefix(BaseModel):
|
|
2504
2509
|
prefix: str = Field(pattern="^((/[^/]+)*)$")
|
|
2505
|
-
depth:
|
|
2510
|
+
depth: int | None = Field(
|
|
2506
2511
|
default=None,
|
|
2507
2512
|
ge=0,
|
|
2508
2513
|
description="Only include facets up to this depth from the prefix, leave empty to include all depths",
|
|
@@ -2524,3 +2529,22 @@ class CatalogFacetsRequest(BaseModel):
|
|
|
2524
2529
|
|
|
2525
2530
|
class CatalogFacetsResponse(BaseModel):
|
|
2526
2531
|
facets: dict[str, int]
|
|
2532
|
+
|
|
2533
|
+
|
|
2534
|
+
def _validate_resource_filter(v: str):
|
|
2535
|
+
parts = v.split("/")
|
|
2536
|
+
|
|
2537
|
+
rid = parts[0]
|
|
2538
|
+
try:
|
|
2539
|
+
UUID(rid)
|
|
2540
|
+
except ValueError:
|
|
2541
|
+
raise ValueError(f"resource id filter '{rid}' should be a valid UUID")
|
|
2542
|
+
|
|
2543
|
+
if len(parts) > 1:
|
|
2544
|
+
field_type = parts[1]
|
|
2545
|
+
try:
|
|
2546
|
+
FieldTypeName.from_abbreviation(field_type)
|
|
2547
|
+
except KeyError: # pragma: no cover
|
|
2548
|
+
raise ValueError(
|
|
2549
|
+
f"resource filter {v} has an invalid field type: {field_type}",
|
|
2550
|
+
)
|