nucliadb-models 6.9.7.post5583__py3-none-any.whl → 6.11.1.post5822__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb_models/agents/ingestion.py +4 -4
- nucliadb_models/augment.py +100 -84
- nucliadb_models/common.py +56 -56
- nucliadb_models/configuration.py +8 -8
- nucliadb_models/content_types.py +13 -11
- nucliadb_models/conversation.py +25 -26
- nucliadb_models/entities.py +17 -18
- nucliadb_models/external_index_providers.py +1 -2
- nucliadb_models/extracted.py +82 -83
- nucliadb_models/file.py +10 -11
- nucliadb_models/filters.py +78 -74
- nucliadb_models/graph/requests.py +40 -48
- nucliadb_models/graph/responses.py +13 -1
- nucliadb_models/hydration.py +48 -50
- nucliadb_models/internal/predict.py +7 -9
- nucliadb_models/internal/shards.py +2 -3
- nucliadb_models/labels.py +18 -11
- nucliadb_models/link.py +18 -19
- nucliadb_models/metadata.py +66 -54
- nucliadb_models/notifications.py +3 -3
- nucliadb_models/processing.py +1 -2
- nucliadb_models/resource.py +85 -93
- nucliadb_models/retrieval.py +147 -0
- nucliadb_models/search.py +263 -275
- nucliadb_models/security.py +2 -3
- nucliadb_models/text.py +7 -8
- nucliadb_models/trainset.py +1 -2
- nucliadb_models/utils.py +2 -3
- nucliadb_models/vectors.py +2 -5
- nucliadb_models/writer.py +56 -57
- {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/METADATA +1 -1
- nucliadb_models-6.11.1.post5822.dist-info/RECORD +41 -0
- {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/WHEEL +1 -1
- nucliadb_models-6.9.7.post5583.dist-info/RECORD +0 -40
- {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/top_level.txt +0 -0
nucliadb_models/search.py
CHANGED
|
@@ -14,20 +14,19 @@
|
|
|
14
14
|
#
|
|
15
15
|
import json
|
|
16
16
|
from enum import Enum
|
|
17
|
-
from typing import Any, Literal
|
|
17
|
+
from typing import Annotated, Any, Literal
|
|
18
18
|
from uuid import UUID
|
|
19
19
|
|
|
20
20
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
21
21
|
from pydantic.aliases import AliasChoices
|
|
22
22
|
from pydantic.json_schema import SkipJsonSchema
|
|
23
|
-
from typing_extensions import
|
|
23
|
+
from typing_extensions import Self
|
|
24
24
|
|
|
25
25
|
from nucliadb_models import RelationMetadata
|
|
26
26
|
from nucliadb_models.common import FieldTypeName, ParamDefault
|
|
27
27
|
from nucliadb_models.graph.requests import GraphPathQuery
|
|
28
28
|
|
|
29
29
|
# Bw/c import to avoid breaking users
|
|
30
|
-
# noqa isort: skip
|
|
31
30
|
from nucliadb_models.metadata import RelationNodeType, RelationType, ResourceProcessingStatus
|
|
32
31
|
from nucliadb_models.resource import ExtractedDataTypeName, Resource
|
|
33
32
|
from nucliadb_models.security import RequestSecurity
|
|
@@ -153,12 +152,12 @@ FacetsResult = dict[str, Any]
|
|
|
153
152
|
|
|
154
153
|
|
|
155
154
|
class TextPosition(BaseModel):
|
|
156
|
-
page_number:
|
|
155
|
+
page_number: int | None = None
|
|
157
156
|
index: int
|
|
158
157
|
start: int
|
|
159
158
|
end: int
|
|
160
|
-
start_seconds:
|
|
161
|
-
end_seconds:
|
|
159
|
+
start_seconds: list[int] | None = None
|
|
160
|
+
end_seconds: list[int] | None = None
|
|
162
161
|
|
|
163
162
|
|
|
164
163
|
class Sentence(BaseModel):
|
|
@@ -167,8 +166,8 @@ class Sentence(BaseModel):
|
|
|
167
166
|
text: str
|
|
168
167
|
field_type: str
|
|
169
168
|
field: str
|
|
170
|
-
index:
|
|
171
|
-
position:
|
|
169
|
+
index: str | None = None
|
|
170
|
+
position: TextPosition | None = None
|
|
172
171
|
|
|
173
172
|
|
|
174
173
|
class Sentences(BaseModel):
|
|
@@ -178,7 +177,7 @@ class Sentences(BaseModel):
|
|
|
178
177
|
page_size: int = 20
|
|
179
178
|
min_score: float = Field(
|
|
180
179
|
title="Minimum score",
|
|
181
|
-
description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
|
|
180
|
+
description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
|
|
182
181
|
)
|
|
183
182
|
|
|
184
183
|
|
|
@@ -189,45 +188,45 @@ class Paragraph(BaseModel):
|
|
|
189
188
|
field: str
|
|
190
189
|
text: str
|
|
191
190
|
labels: list[str] = []
|
|
192
|
-
start_seconds:
|
|
193
|
-
end_seconds:
|
|
194
|
-
position:
|
|
191
|
+
start_seconds: list[int] | None = None
|
|
192
|
+
end_seconds: list[int] | None = None
|
|
193
|
+
position: TextPosition | None = None
|
|
195
194
|
fuzzy_result: bool = False
|
|
196
195
|
|
|
197
196
|
|
|
198
197
|
class Paragraphs(BaseModel):
|
|
199
198
|
results: list[Paragraph] = []
|
|
200
|
-
facets:
|
|
201
|
-
query:
|
|
199
|
+
facets: FacetsResult | None = None
|
|
200
|
+
query: str | None = Field(default=None, title="Paragraphs Query")
|
|
202
201
|
total: int = 0
|
|
203
202
|
page_number: int = 0
|
|
204
203
|
page_size: int = 20
|
|
205
204
|
next_page: bool = False
|
|
206
205
|
min_score: float = Field(
|
|
207
206
|
title="Minimum score",
|
|
208
|
-
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
207
|
+
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
209
208
|
)
|
|
210
209
|
|
|
211
210
|
|
|
212
211
|
class ResourceResult(BaseModel):
|
|
213
|
-
score:
|
|
212
|
+
score: float | int
|
|
214
213
|
rid: str
|
|
215
214
|
field_type: str
|
|
216
215
|
field: str
|
|
217
|
-
labels:
|
|
216
|
+
labels: list[str] | None = None
|
|
218
217
|
|
|
219
218
|
|
|
220
219
|
class Resources(BaseModel):
|
|
221
220
|
results: list[ResourceResult]
|
|
222
|
-
facets:
|
|
223
|
-
query:
|
|
221
|
+
facets: FacetsResult | None = None
|
|
222
|
+
query: str | None = Field(default=None, title="Resources Query")
|
|
224
223
|
total: int = 0
|
|
225
224
|
page_number: int = 0
|
|
226
225
|
page_size: int = 20
|
|
227
226
|
next_page: bool = False
|
|
228
227
|
min_score: float = Field(
|
|
229
228
|
title="Minimum score",
|
|
230
|
-
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
229
|
+
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
231
230
|
)
|
|
232
231
|
|
|
233
232
|
|
|
@@ -247,7 +246,7 @@ class DirectionalRelation(BaseModel):
|
|
|
247
246
|
relation: RelationType
|
|
248
247
|
relation_label: str
|
|
249
248
|
direction: RelationDirection
|
|
250
|
-
metadata:
|
|
249
|
+
metadata: RelationMetadata | None = None
|
|
251
250
|
resource_id: str
|
|
252
251
|
|
|
253
252
|
|
|
@@ -281,23 +280,23 @@ class RelatedEntities(BaseModel):
|
|
|
281
280
|
class ResourceSearchResults(JsonBaseModel):
|
|
282
281
|
"""Search on resource results"""
|
|
283
282
|
|
|
284
|
-
sentences:
|
|
285
|
-
paragraphs:
|
|
286
|
-
relations:
|
|
287
|
-
nodes:
|
|
288
|
-
shards:
|
|
283
|
+
sentences: Sentences | None = None
|
|
284
|
+
paragraphs: Paragraphs | None = None
|
|
285
|
+
relations: Relations | None = None
|
|
286
|
+
nodes: list[dict[str, str]] | None = None
|
|
287
|
+
shards: list[str] | None = None
|
|
289
288
|
|
|
290
289
|
|
|
291
290
|
class KnowledgeboxSearchResults(JsonBaseModel):
|
|
292
291
|
"""Search on knowledgebox results"""
|
|
293
292
|
|
|
294
293
|
resources: dict[str, Resource] = {}
|
|
295
|
-
sentences:
|
|
296
|
-
paragraphs:
|
|
297
|
-
fulltext:
|
|
298
|
-
relations:
|
|
299
|
-
nodes:
|
|
300
|
-
shards:
|
|
294
|
+
sentences: Sentences | None = None
|
|
295
|
+
paragraphs: Paragraphs | None = None
|
|
296
|
+
fulltext: Resources | None = None
|
|
297
|
+
relations: Relations | None = None
|
|
298
|
+
nodes: list[dict[str, str]] | None = None
|
|
299
|
+
shards: list[str] | None = None
|
|
301
300
|
|
|
302
301
|
# TODO: remove on a future major release
|
|
303
302
|
autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
|
|
@@ -307,16 +306,16 @@ class CatalogResponse(BaseModel):
|
|
|
307
306
|
"""Catalog results"""
|
|
308
307
|
|
|
309
308
|
resources: dict[str, Resource] = {}
|
|
310
|
-
fulltext:
|
|
311
|
-
shards:
|
|
309
|
+
fulltext: Resources | None = None
|
|
310
|
+
shards: list[str] | None = None
|
|
312
311
|
|
|
313
312
|
|
|
314
313
|
class KnowledgeboxSuggestResults(JsonBaseModel):
|
|
315
314
|
"""Suggest on resource results"""
|
|
316
315
|
|
|
317
|
-
paragraphs:
|
|
318
|
-
entities:
|
|
319
|
-
shards:
|
|
316
|
+
paragraphs: Paragraphs | None = None
|
|
317
|
+
entities: RelatedEntities | None = None
|
|
318
|
+
shards: list[str] | None = None
|
|
320
319
|
|
|
321
320
|
|
|
322
321
|
class KnowledgeboxCounters(BaseModel):
|
|
@@ -324,7 +323,7 @@ class KnowledgeboxCounters(BaseModel):
|
|
|
324
323
|
paragraphs: int
|
|
325
324
|
fields: int
|
|
326
325
|
sentences: int
|
|
327
|
-
shards:
|
|
326
|
+
shards: list[str] | None = None
|
|
328
327
|
index_size: float = Field(default=0.0, title="Index size (bytes)")
|
|
329
328
|
|
|
330
329
|
|
|
@@ -379,13 +378,13 @@ class ReciprocalRankFusion(_BaseRankFusion):
|
|
|
379
378
|
k: float = Field(
|
|
380
379
|
default=60.0,
|
|
381
380
|
title="RRF k parameter",
|
|
382
|
-
description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
|
|
381
|
+
description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
|
|
383
382
|
)
|
|
384
|
-
window:
|
|
383
|
+
window: int | None = Field(
|
|
385
384
|
default=None,
|
|
386
385
|
le=MAX_RANK_FUSION_WINDOW,
|
|
387
386
|
title="RRF window",
|
|
388
|
-
description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
|
|
387
|
+
description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
|
|
389
388
|
)
|
|
390
389
|
boosting: ReciprocalRankFusionWeights = Field(
|
|
391
390
|
default_factory=ReciprocalRankFusionWeights,
|
|
@@ -396,12 +395,12 @@ Define different weights for each retriever. This allows to assign different pri
|
|
|
396
395
|
The default is 1 for each retriever, which means no extra boost for any of them. Weights below 0 can be used for negative boosting.
|
|
397
396
|
|
|
398
397
|
This kind of boosting can be useful in multilingual search, for example, where keyword search may not give good results and can degrade the final search experience
|
|
399
|
-
""",
|
|
398
|
+
""",
|
|
400
399
|
)
|
|
401
400
|
|
|
402
401
|
|
|
403
402
|
RankFusion = Annotated[
|
|
404
|
-
|
|
403
|
+
ReciprocalRankFusion,
|
|
405
404
|
Field(discriminator="name"),
|
|
406
405
|
]
|
|
407
406
|
|
|
@@ -436,15 +435,15 @@ class _BaseReranker(BaseModel):
|
|
|
436
435
|
|
|
437
436
|
class PredictReranker(_BaseReranker):
|
|
438
437
|
name: Literal[RerankerName.PREDICT_RERANKER] = RerankerName.PREDICT_RERANKER
|
|
439
|
-
window:
|
|
438
|
+
window: int | None = Field(
|
|
440
439
|
default=None,
|
|
441
440
|
le=200,
|
|
442
441
|
title="Reranker window",
|
|
443
|
-
description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
|
|
442
|
+
description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
|
|
444
443
|
)
|
|
445
444
|
|
|
446
445
|
|
|
447
|
-
Reranker = Annotated[
|
|
446
|
+
Reranker = Annotated[PredictReranker, Field(discriminator="name")]
|
|
448
447
|
|
|
449
448
|
|
|
450
449
|
class KnowledgeBoxCount(BaseModel):
|
|
@@ -473,18 +472,18 @@ class SearchParamDefaults:
|
|
|
473
472
|
)
|
|
474
473
|
filters = ParamDefault(
|
|
475
474
|
default=[],
|
|
476
|
-
title="Filters",
|
|
477
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
475
|
+
title="Search Filters",
|
|
476
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
478
477
|
)
|
|
479
478
|
resource_filters = ParamDefault(
|
|
480
479
|
default=[],
|
|
481
480
|
title="Resources filter",
|
|
482
|
-
description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
|
|
481
|
+
description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
|
|
483
482
|
)
|
|
484
483
|
faceted = ParamDefault(
|
|
485
484
|
default=[],
|
|
486
485
|
title="Faceted",
|
|
487
|
-
description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
486
|
+
description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
488
487
|
max_items=50,
|
|
489
488
|
)
|
|
490
489
|
chat_query = ParamDefault(
|
|
@@ -521,12 +520,12 @@ class SearchParamDefaults:
|
|
|
521
520
|
highlight = ParamDefault(
|
|
522
521
|
default=False,
|
|
523
522
|
title="Highlight",
|
|
524
|
-
description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
|
|
523
|
+
description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
|
|
525
524
|
)
|
|
526
525
|
with_duplicates = ParamDefault(
|
|
527
526
|
default=False,
|
|
528
527
|
title="With duplicate paragraphs",
|
|
529
|
-
description="Whether to return duplicate paragraphs on the same document",
|
|
528
|
+
description="Whether to return duplicate paragraphs on the same document",
|
|
530
529
|
)
|
|
531
530
|
with_status = ParamDefault(
|
|
532
531
|
default=None,
|
|
@@ -536,7 +535,7 @@ class SearchParamDefaults:
|
|
|
536
535
|
with_synonyms = ParamDefault(
|
|
537
536
|
default=False,
|
|
538
537
|
title="With custom synonyms",
|
|
539
|
-
description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
|
|
538
|
+
description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
|
|
540
539
|
)
|
|
541
540
|
sort_order = ParamDefault(
|
|
542
541
|
default=SortOrder.DESC,
|
|
@@ -566,12 +565,12 @@ class SearchParamDefaults:
|
|
|
566
565
|
reranker = ParamDefault(
|
|
567
566
|
default=RerankerName.PREDICT_RERANKER,
|
|
568
567
|
title="Reranker",
|
|
569
|
-
description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
|
|
568
|
+
description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
|
|
570
569
|
)
|
|
571
570
|
debug = ParamDefault(
|
|
572
571
|
default=False,
|
|
573
572
|
title="Debug mode",
|
|
574
|
-
description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
|
|
573
|
+
description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
|
|
575
574
|
)
|
|
576
575
|
show = ParamDefault(
|
|
577
576
|
default=[ResourceProperties.BASIC],
|
|
@@ -592,27 +591,27 @@ class SearchParamDefaults:
|
|
|
592
591
|
range_creation_start = ParamDefault(
|
|
593
592
|
default=None,
|
|
594
593
|
title="Resource creation range start",
|
|
595
|
-
description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
594
|
+
description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
596
595
|
)
|
|
597
596
|
range_creation_end = ParamDefault(
|
|
598
597
|
default=None,
|
|
599
598
|
title="Resource creation range end",
|
|
600
|
-
description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
599
|
+
description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
601
600
|
)
|
|
602
601
|
range_modification_start = ParamDefault(
|
|
603
602
|
default=None,
|
|
604
603
|
title="Resource modification range start",
|
|
605
|
-
description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
604
|
+
description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
606
605
|
)
|
|
607
606
|
range_modification_end = ParamDefault(
|
|
608
607
|
default=None,
|
|
609
608
|
title="Resource modification range end",
|
|
610
|
-
description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
609
|
+
description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
611
610
|
)
|
|
612
611
|
vector = ParamDefault(
|
|
613
612
|
default=None,
|
|
614
613
|
title="Search Vector",
|
|
615
|
-
description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
|
|
614
|
+
description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
|
|
616
615
|
)
|
|
617
616
|
vectorset = ParamDefault(
|
|
618
617
|
default=None,
|
|
@@ -628,12 +627,12 @@ class SearchParamDefaults:
|
|
|
628
627
|
chat_history = ParamDefault(
|
|
629
628
|
default=None,
|
|
630
629
|
title="Chat history",
|
|
631
|
-
description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
|
|
630
|
+
description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
|
|
632
631
|
)
|
|
633
632
|
chat_features = ParamDefault(
|
|
634
633
|
default=[ChatOptions.SEMANTIC, ChatOptions.KEYWORD],
|
|
635
634
|
title="Chat features",
|
|
636
|
-
description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
|
|
635
|
+
description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
|
|
637
636
|
)
|
|
638
637
|
suggest_features = ParamDefault(
|
|
639
638
|
default=[
|
|
@@ -646,17 +645,17 @@ class SearchParamDefaults:
|
|
|
646
645
|
security = ParamDefault(
|
|
647
646
|
default=None,
|
|
648
647
|
title="Security",
|
|
649
|
-
description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
|
|
648
|
+
description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
|
|
650
649
|
)
|
|
651
650
|
security_groups = ParamDefault(
|
|
652
651
|
default=[],
|
|
653
652
|
title="Security groups",
|
|
654
|
-
description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
|
|
653
|
+
description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
|
|
655
654
|
)
|
|
656
655
|
rephrase = ParamDefault(
|
|
657
656
|
default=False,
|
|
658
657
|
title="Rephrase query consuming LLMs",
|
|
659
|
-
description="Rephrase query consuming LLMs - it will make the query slower",
|
|
658
|
+
description="Rephrase query consuming LLMs - it will make the query slower",
|
|
660
659
|
)
|
|
661
660
|
prefer_markdown = ParamDefault(
|
|
662
661
|
default=False,
|
|
@@ -696,10 +695,10 @@ class SearchParamDefaults:
|
|
|
696
695
|
|
|
697
696
|
|
|
698
697
|
class Filter(BaseModel):
|
|
699
|
-
all:
|
|
700
|
-
any:
|
|
701
|
-
none:
|
|
702
|
-
not_all:
|
|
698
|
+
all: list[str] | None = Field(default=None, min_length=1)
|
|
699
|
+
any: list[str] | None = Field(default=None, min_length=1)
|
|
700
|
+
none: list[str] | None = Field(default=None, min_length=1)
|
|
701
|
+
not_all: list[str] | None = Field(default=None, min_length=1)
|
|
703
702
|
|
|
704
703
|
@model_validator(mode="after")
|
|
705
704
|
def validate_filter(self) -> Self:
|
|
@@ -741,19 +740,19 @@ class CatalogQuery(BaseModel):
|
|
|
741
740
|
|
|
742
741
|
|
|
743
742
|
class CatalogRequest(BaseModel):
|
|
744
|
-
query:
|
|
743
|
+
query: str | CatalogQuery = ParamDefault(
|
|
745
744
|
default="",
|
|
746
|
-
title="Query",
|
|
745
|
+
title="Catalog Request Query",
|
|
747
746
|
description="The query to search for",
|
|
748
747
|
).to_pydantic_field()
|
|
749
|
-
filter_expression:
|
|
748
|
+
filter_expression: CatalogFilterExpression | None = (
|
|
750
749
|
SearchParamDefaults.catalog_filter_expression.to_pydantic_field()
|
|
751
750
|
)
|
|
752
751
|
faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
|
|
753
|
-
sort:
|
|
752
|
+
sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
|
|
754
753
|
page_number: int = SearchParamDefaults.catalog_page_number.to_pydantic_field()
|
|
755
754
|
page_size: int = SearchParamDefaults.catalog_page_size.to_pydantic_field()
|
|
756
|
-
hidden:
|
|
755
|
+
hidden: bool | None = SearchParamDefaults.hidden.to_pydantic_field()
|
|
757
756
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field(
|
|
758
757
|
default=[ResourceProperties.BASIC, ResourceProperties.ERRORS]
|
|
759
758
|
)
|
|
@@ -761,32 +760,30 @@ class CatalogRequest(BaseModel):
|
|
|
761
760
|
debug: SkipJsonSchema[bool] = SearchParamDefaults.debug.to_pydantic_field()
|
|
762
761
|
|
|
763
762
|
# Deprecated filter parameters
|
|
764
|
-
filters:
|
|
763
|
+
filters: list[str] | list[Filter] = Field(
|
|
765
764
|
default=[],
|
|
766
|
-
title="Filters",
|
|
767
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
765
|
+
title="Catalog Filters",
|
|
766
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
768
767
|
deprecated="Use filter_expression instead",
|
|
769
768
|
)
|
|
770
|
-
with_status:
|
|
769
|
+
with_status: ResourceProcessingStatus | None = Field(
|
|
771
770
|
default=None,
|
|
772
771
|
title="With processing status",
|
|
773
772
|
description="Filter results by resource processing status",
|
|
774
773
|
deprecated="Use filter_expression instead",
|
|
775
774
|
)
|
|
776
|
-
range_creation_start:
|
|
777
|
-
|
|
778
|
-
deprecated="Use filter_expression instead",
|
|
779
|
-
)
|
|
775
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field(
|
|
776
|
+
deprecated="Use filter_expression instead",
|
|
780
777
|
)
|
|
781
|
-
range_creation_end:
|
|
778
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field(
|
|
782
779
|
deprecated="Use filter_expression instead",
|
|
783
780
|
)
|
|
784
|
-
range_modification_start:
|
|
781
|
+
range_modification_start: DateTime | None = (
|
|
785
782
|
SearchParamDefaults.range_modification_start.to_pydantic_field(
|
|
786
783
|
deprecated="Use filter_expression instead",
|
|
787
784
|
)
|
|
788
785
|
)
|
|
789
|
-
range_modification_end:
|
|
786
|
+
range_modification_end: DateTime | None = (
|
|
790
787
|
SearchParamDefaults.range_modification_end.to_pydantic_field(
|
|
791
788
|
deprecated="Use filter_expression instead",
|
|
792
789
|
)
|
|
@@ -799,15 +796,15 @@ class CatalogRequest(BaseModel):
|
|
|
799
796
|
|
|
800
797
|
|
|
801
798
|
class MinScore(BaseModel):
|
|
802
|
-
semantic:
|
|
799
|
+
semantic: float | None = Field(
|
|
803
800
|
default=None,
|
|
804
801
|
title="Minimum semantic score",
|
|
805
|
-
description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
802
|
+
description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
806
803
|
)
|
|
807
804
|
bm25: float = Field(
|
|
808
805
|
default=0,
|
|
809
806
|
title="Minimum bm25 score",
|
|
810
|
-
description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
807
|
+
description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
811
808
|
ge=0,
|
|
812
809
|
)
|
|
813
810
|
|
|
@@ -821,7 +818,7 @@ AUDIT_METADATA_MAX_BYTES = 1024 * 10 # 10KB
|
|
|
821
818
|
|
|
822
819
|
|
|
823
820
|
class AuditMetadataBase(BaseModel):
|
|
824
|
-
audit_metadata:
|
|
821
|
+
audit_metadata: dict[str, str] | None = Field(
|
|
825
822
|
default=None,
|
|
826
823
|
title="Audit metadata",
|
|
827
824
|
description=(
|
|
@@ -845,29 +842,27 @@ class AuditMetadataBase(BaseModel):
|
|
|
845
842
|
|
|
846
843
|
class BaseSearchRequest(AuditMetadataBase):
|
|
847
844
|
query: str = SearchParamDefaults.query.to_pydantic_field()
|
|
848
|
-
filter_expression:
|
|
845
|
+
filter_expression: FilterExpression | None = (
|
|
849
846
|
SearchParamDefaults.filter_expression.to_pydantic_field()
|
|
850
847
|
)
|
|
851
848
|
fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
|
|
852
|
-
filters:
|
|
849
|
+
filters: list[str] | list[Filter] = Field(
|
|
853
850
|
default=[],
|
|
854
|
-
title="Filters",
|
|
855
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
851
|
+
title="Search Filters",
|
|
852
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
856
853
|
)
|
|
857
854
|
top_k: int = SearchParamDefaults.top_k.to_pydantic_field()
|
|
858
|
-
min_score:
|
|
855
|
+
min_score: float | MinScore | None = Field(
|
|
859
856
|
default=None,
|
|
860
857
|
title="Minimum score",
|
|
861
|
-
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
862
|
-
)
|
|
863
|
-
range_creation_start: Optional[DateTime] = (
|
|
864
|
-
SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
858
|
+
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
865
859
|
)
|
|
866
|
-
|
|
867
|
-
|
|
860
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
861
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
862
|
+
range_modification_start: DateTime | None = (
|
|
868
863
|
SearchParamDefaults.range_modification_start.to_pydantic_field()
|
|
869
864
|
)
|
|
870
|
-
range_modification_end:
|
|
865
|
+
range_modification_end: DateTime | None = (
|
|
871
866
|
SearchParamDefaults.range_modification_end.to_pydantic_field()
|
|
872
867
|
)
|
|
873
868
|
debug: bool = SearchParamDefaults.debug.to_pydantic_field()
|
|
@@ -875,15 +870,15 @@ class BaseSearchRequest(AuditMetadataBase):
|
|
|
875
870
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
|
|
876
871
|
field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
|
|
877
872
|
extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
|
|
878
|
-
vector:
|
|
879
|
-
vectorset:
|
|
873
|
+
vector: list[float] | None = SearchParamDefaults.vector.to_pydantic_field()
|
|
874
|
+
vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
|
|
880
875
|
with_duplicates: bool = SearchParamDefaults.with_duplicates.to_pydantic_field()
|
|
881
876
|
with_synonyms: bool = SearchParamDefaults.with_synonyms.to_pydantic_field()
|
|
882
877
|
# autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
|
|
883
878
|
# avoid breaking changes in the python sdks. Please remove on a future major release.
|
|
884
879
|
autofilter: SkipJsonSchema[bool] = False
|
|
885
880
|
resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
|
|
886
|
-
security:
|
|
881
|
+
security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
|
|
887
882
|
show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
|
|
888
883
|
|
|
889
884
|
rephrase: bool = Field(
|
|
@@ -893,7 +888,7 @@ class BaseSearchRequest(AuditMetadataBase):
|
|
|
893
888
|
),
|
|
894
889
|
)
|
|
895
890
|
|
|
896
|
-
rephrase_prompt:
|
|
891
|
+
rephrase_prompt: str | None = Field(
|
|
897
892
|
default=None,
|
|
898
893
|
title="Rephrase",
|
|
899
894
|
description=(
|
|
@@ -912,7 +907,7 @@ Please return ONLY the question without any explanation. Just the rephrased ques
|
|
|
912
907
|
Please return ONLY the question without any explanation.""",
|
|
913
908
|
],
|
|
914
909
|
)
|
|
915
|
-
query_image:
|
|
910
|
+
query_image: Image | None = Field(
|
|
916
911
|
default=None,
|
|
917
912
|
title="Query image",
|
|
918
913
|
description="Image that will be used together with the query text for retrieval.",
|
|
@@ -949,7 +944,7 @@ class SearchRequest(BaseSearchRequest):
|
|
|
949
944
|
]
|
|
950
945
|
)
|
|
951
946
|
faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
|
|
952
|
-
sort:
|
|
947
|
+
sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
|
|
953
948
|
offset: int = SearchParamDefaults.offset.to_pydantic_field()
|
|
954
949
|
|
|
955
950
|
@field_validator("faceted")
|
|
@@ -970,7 +965,7 @@ class SearchRequest(BaseSearchRequest):
|
|
|
970
965
|
|
|
971
966
|
@field_validator("sort", mode="after")
|
|
972
967
|
@classmethod
|
|
973
|
-
def sorting_by_title_not_supported(cls, value:
|
|
968
|
+
def sorting_by_title_not_supported(cls, value: SortOptions | None) -> SortOptions | None:
|
|
974
969
|
if value and value.field == SortField.TITLE:
|
|
975
970
|
raise ValueError("sorting by title not supported in /search")
|
|
976
971
|
|
|
@@ -996,19 +991,19 @@ class UserPrompt(BaseModel):
|
|
|
996
991
|
|
|
997
992
|
|
|
998
993
|
class MaxTokens(BaseModel):
|
|
999
|
-
context:
|
|
994
|
+
context: int | None = Field(
|
|
1000
995
|
default=None,
|
|
1001
996
|
title="Maximum context tokens",
|
|
1002
997
|
description="Use to limit the amount of tokens used in the LLM context",
|
|
1003
998
|
)
|
|
1004
|
-
answer:
|
|
999
|
+
answer: int | None = Field(
|
|
1005
1000
|
default=None,
|
|
1006
1001
|
title="Maximum answer tokens",
|
|
1007
1002
|
description="Use to limit the amount of tokens used in the LLM answer",
|
|
1008
1003
|
)
|
|
1009
1004
|
|
|
1010
1005
|
|
|
1011
|
-
def parse_max_tokens(max_tokens:
|
|
1006
|
+
def parse_max_tokens(max_tokens: int | MaxTokens | None) -> MaxTokens | None:
|
|
1012
1007
|
if isinstance(max_tokens, int):
|
|
1013
1008
|
# If the max_tokens is an integer, it is interpreted as the max_tokens value for the generated answer.
|
|
1014
1009
|
# The max tokens for the context is set to None to use the default value for the model (comes in the
|
|
@@ -1054,7 +1049,7 @@ class ChatModel(BaseModel):
|
|
|
1054
1049
|
question: str = Field(description="Question to ask the generative model")
|
|
1055
1050
|
user_id: str
|
|
1056
1051
|
retrieval: bool = True
|
|
1057
|
-
system:
|
|
1052
|
+
system: str | None = Field(
|
|
1058
1053
|
default=None,
|
|
1059
1054
|
title="System prompt",
|
|
1060
1055
|
description="Optional system prompt input by the user",
|
|
@@ -1063,9 +1058,9 @@ class ChatModel(BaseModel):
|
|
|
1063
1058
|
default={},
|
|
1064
1059
|
description="The information retrieval context for the current query",
|
|
1065
1060
|
)
|
|
1066
|
-
query_context_order:
|
|
1061
|
+
query_context_order: dict[str, int] | None = Field(
|
|
1067
1062
|
default=None,
|
|
1068
|
-
description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
|
|
1063
|
+
description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
|
|
1069
1064
|
)
|
|
1070
1065
|
chat_history: list[ChatContextMessage] = Field(
|
|
1071
1066
|
default=[], description="The chat conversation history"
|
|
@@ -1074,29 +1069,29 @@ class ChatModel(BaseModel):
|
|
|
1074
1069
|
default=True,
|
|
1075
1070
|
description="Truncate the chat context in case it doesn't fit the generative input",
|
|
1076
1071
|
)
|
|
1077
|
-
user_prompt:
|
|
1072
|
+
user_prompt: UserPrompt | None = Field(
|
|
1078
1073
|
default=None, description="Optional custom prompt input by the user"
|
|
1079
1074
|
)
|
|
1080
|
-
citations:
|
|
1075
|
+
citations: bool | None | CitationsType = Field(
|
|
1081
1076
|
default=None,
|
|
1082
1077
|
description="Whether to include citations in the response. "
|
|
1083
1078
|
"If set to None or False, no citations will be computed. "
|
|
1084
1079
|
"If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
|
|
1085
1080
|
"If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
|
|
1086
1081
|
)
|
|
1087
|
-
citation_threshold:
|
|
1082
|
+
citation_threshold: float | None = Field(
|
|
1088
1083
|
default=None,
|
|
1089
1084
|
description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
|
|
1090
1085
|
ge=0.0,
|
|
1091
1086
|
le=1.0,
|
|
1092
1087
|
)
|
|
1093
|
-
generative_model:
|
|
1088
|
+
generative_model: str | None = Field(
|
|
1094
1089
|
default=None,
|
|
1095
1090
|
title="Generative model",
|
|
1096
|
-
description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1091
|
+
description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1097
1092
|
)
|
|
1098
1093
|
|
|
1099
|
-
max_tokens:
|
|
1094
|
+
max_tokens: int | None = Field(default=None, description="Maximum characters to generate")
|
|
1100
1095
|
|
|
1101
1096
|
query_context_images: dict[str, Image] = Field(
|
|
1102
1097
|
default={},
|
|
@@ -1107,7 +1102,7 @@ class ChatModel(BaseModel):
|
|
|
1107
1102
|
default=False,
|
|
1108
1103
|
description="If set to true, the response will be in markdown format",
|
|
1109
1104
|
)
|
|
1110
|
-
json_schema:
|
|
1105
|
+
json_schema: dict[str, Any] | None = Field(
|
|
1111
1106
|
default=None,
|
|
1112
1107
|
description="The JSON schema to use for the generative model answers",
|
|
1113
1108
|
)
|
|
@@ -1115,17 +1110,18 @@ class ChatModel(BaseModel):
|
|
|
1115
1110
|
default=False,
|
|
1116
1111
|
description="Whether to reorder the query context based on a reranker",
|
|
1117
1112
|
)
|
|
1118
|
-
top_k:
|
|
1113
|
+
top_k: int | None = Field(default=None, description="Number of best elements to get from")
|
|
1119
1114
|
|
|
1120
1115
|
format_prompt: bool = Field(
|
|
1121
1116
|
default=True,
|
|
1122
|
-
description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
|
|
1117
|
+
description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
|
|
1123
1118
|
)
|
|
1124
|
-
seed:
|
|
1119
|
+
seed: int | None = Field(
|
|
1125
1120
|
default=None,
|
|
1126
1121
|
description="Seed use for the generative model for a deterministic output.",
|
|
1127
1122
|
)
|
|
1128
|
-
reasoning:
|
|
1123
|
+
reasoning: Reasoning | bool = Field(
|
|
1124
|
+
title="Reasoning options",
|
|
1129
1125
|
default=False,
|
|
1130
1126
|
description=(
|
|
1131
1127
|
"Reasoning options for the generative model. "
|
|
@@ -1139,26 +1135,25 @@ class RephraseModel(BaseModel):
|
|
|
1139
1135
|
chat_history: list[ChatContextMessage] = []
|
|
1140
1136
|
user_id: str
|
|
1141
1137
|
user_context: list[str] = []
|
|
1142
|
-
generative_model:
|
|
1138
|
+
generative_model: str | None = Field(
|
|
1143
1139
|
default=None,
|
|
1144
1140
|
title="Generative model",
|
|
1145
|
-
description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1141
|
+
description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1146
1142
|
)
|
|
1147
|
-
chat_history_relevance_threshold:
|
|
1143
|
+
chat_history_relevance_threshold: (
|
|
1148
1144
|
Annotated[
|
|
1149
1145
|
float,
|
|
1150
1146
|
Field(
|
|
1151
1147
|
ge=0.0,
|
|
1152
1148
|
le=1.0,
|
|
1153
|
-
description=
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
"Values in between adjust the sensitivity."
|
|
1158
|
-
),
|
|
1149
|
+
description="Threshold to determine if the past chat history is relevant to rephrase the user's question. "
|
|
1150
|
+
"0 - Always treat previous messages as relevant (always rephrase)."
|
|
1151
|
+
"1 - Always treat previous messages as irrelevant (never rephrase)."
|
|
1152
|
+
"Values in between adjust the sensitivity.",
|
|
1159
1153
|
),
|
|
1160
1154
|
]
|
|
1161
|
-
|
|
1155
|
+
| None
|
|
1156
|
+
) = None
|
|
1162
1157
|
|
|
1163
1158
|
|
|
1164
1159
|
class RagStrategyName:
|
|
@@ -1242,13 +1237,13 @@ class FullResourceApplyTo(BaseModel):
|
|
|
1242
1237
|
exclude: list[str] = Field(
|
|
1243
1238
|
default_factory=list,
|
|
1244
1239
|
title="Labels to exclude from full resource expansion",
|
|
1245
|
-
description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
|
|
1240
|
+
description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
|
|
1246
1241
|
)
|
|
1247
1242
|
|
|
1248
1243
|
|
|
1249
1244
|
class FullResourceStrategy(RagStrategy):
|
|
1250
1245
|
name: Literal["full_resource"] = "full_resource"
|
|
1251
|
-
count:
|
|
1246
|
+
count: int | None = Field(
|
|
1252
1247
|
default=None,
|
|
1253
1248
|
title="Count",
|
|
1254
1249
|
description="Maximum number of full documents to retrieve. If not specified, all matching documents are retrieved.",
|
|
@@ -1259,7 +1254,7 @@ class FullResourceStrategy(RagStrategy):
|
|
|
1259
1254
|
title="Include remaining text blocks",
|
|
1260
1255
|
description="Whether to include the remaining text blocks after the maximum number of resources has been reached.",
|
|
1261
1256
|
)
|
|
1262
|
-
apply_to:
|
|
1257
|
+
apply_to: FullResourceApplyTo | None = Field(
|
|
1263
1258
|
default=None,
|
|
1264
1259
|
title="Apply to certain resources only",
|
|
1265
1260
|
description="Define which resources to exclude from serialization",
|
|
@@ -1365,7 +1360,7 @@ class PreQuery(BaseModel):
|
|
|
1365
1360
|
),
|
|
1366
1361
|
ge=0,
|
|
1367
1362
|
)
|
|
1368
|
-
id:
|
|
1363
|
+
id: str | None = Field(
|
|
1369
1364
|
default=None,
|
|
1370
1365
|
title="Prequery id",
|
|
1371
1366
|
min_length=1,
|
|
@@ -1499,7 +1494,7 @@ class TableImageStrategy(ImageRagStrategy):
|
|
|
1499
1494
|
|
|
1500
1495
|
class PageImageStrategy(ImageRagStrategy):
|
|
1501
1496
|
name: Literal["page_image"] = "page_image"
|
|
1502
|
-
count:
|
|
1497
|
+
count: int | None = Field(
|
|
1503
1498
|
default=None,
|
|
1504
1499
|
title="Count",
|
|
1505
1500
|
description="Maximum number of page images to retrieve. By default, at most 5 images are retrieved.",
|
|
@@ -1511,20 +1506,18 @@ class ParagraphImageStrategy(ImageRagStrategy):
|
|
|
1511
1506
|
|
|
1512
1507
|
|
|
1513
1508
|
RagStrategies = Annotated[
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
GraphStrategy,
|
|
1523
|
-
],
|
|
1509
|
+
FieldExtensionStrategy
|
|
1510
|
+
| FullResourceStrategy
|
|
1511
|
+
| HierarchyResourceStrategy
|
|
1512
|
+
| NeighbouringParagraphsStrategy
|
|
1513
|
+
| MetadataExtensionStrategy
|
|
1514
|
+
| ConversationalStrategy
|
|
1515
|
+
| PreQueriesStrategy
|
|
1516
|
+
| GraphStrategy,
|
|
1524
1517
|
Field(discriminator="name"),
|
|
1525
1518
|
]
|
|
1526
1519
|
RagImagesStrategies = Annotated[
|
|
1527
|
-
|
|
1520
|
+
PageImageStrategy | ParagraphImageStrategy | TableImageStrategy,
|
|
1528
1521
|
Field(discriminator="name"),
|
|
1529
1522
|
]
|
|
1530
1523
|
PromptContext = dict[str, str]
|
|
@@ -1533,10 +1526,10 @@ PromptContextImages = dict[str, Image]
|
|
|
1533
1526
|
|
|
1534
1527
|
|
|
1535
1528
|
class CustomPrompt(BaseModel):
|
|
1536
|
-
system:
|
|
1529
|
+
system: str | None = Field(
|
|
1537
1530
|
default=None,
|
|
1538
1531
|
title="System prompt",
|
|
1539
|
-
description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
|
|
1532
|
+
description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
|
|
1540
1533
|
min_length=1,
|
|
1541
1534
|
examples=[
|
|
1542
1535
|
"You are a medical assistant, use medical terminology",
|
|
@@ -1545,10 +1538,10 @@ class CustomPrompt(BaseModel):
|
|
|
1545
1538
|
"You are a financial expert, use correct terms",
|
|
1546
1539
|
],
|
|
1547
1540
|
)
|
|
1548
|
-
user:
|
|
1541
|
+
user: str | None = Field(
|
|
1549
1542
|
default=None,
|
|
1550
1543
|
title="User prompt",
|
|
1551
|
-
description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
|
|
1544
|
+
description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
|
|
1552
1545
|
min_length=1,
|
|
1553
1546
|
examples=[
|
|
1554
1547
|
"Taking into account our previous conversation, and this context: {context} answer this {question}",
|
|
@@ -1557,7 +1550,7 @@ class CustomPrompt(BaseModel):
|
|
|
1557
1550
|
"Given this context: {context}. Answer this {question} using the provided context. Please, answer always in French",
|
|
1558
1551
|
],
|
|
1559
1552
|
)
|
|
1560
|
-
rephrase:
|
|
1553
|
+
rephrase: str | None = Field(
|
|
1561
1554
|
default=None,
|
|
1562
1555
|
title="Rephrase",
|
|
1563
1556
|
description=(
|
|
@@ -1587,23 +1580,23 @@ class AskRequest(AuditMetadataBase):
|
|
|
1587
1580
|
le=200,
|
|
1588
1581
|
description="The top most relevant results to fetch at the retrieval step. The maximum number of results allowed is 200.",
|
|
1589
1582
|
)
|
|
1590
|
-
filter_expression:
|
|
1583
|
+
filter_expression: FilterExpression | None = (
|
|
1591
1584
|
SearchParamDefaults.filter_expression.to_pydantic_field()
|
|
1592
1585
|
)
|
|
1593
1586
|
fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
|
|
1594
|
-
filters:
|
|
1587
|
+
filters: list[str] | list[Filter] = Field(
|
|
1595
1588
|
default=[],
|
|
1596
|
-
title="Filters",
|
|
1597
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
1589
|
+
title="Search Filters",
|
|
1590
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
1598
1591
|
)
|
|
1599
|
-
keyword_filters:
|
|
1592
|
+
keyword_filters: list[str] | list[Filter] = Field(
|
|
1600
1593
|
default=[],
|
|
1601
1594
|
title="Keyword filters",
|
|
1602
1595
|
description=(
|
|
1603
1596
|
"List of keyword filter expressions to apply to the retrieval step. "
|
|
1604
1597
|
"The text block search will only be performed on the documents that contain the specified keywords. "
|
|
1605
1598
|
"The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
|
|
1606
|
-
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1599
|
+
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1607
1600
|
),
|
|
1608
1601
|
examples=[
|
|
1609
1602
|
["NLP", "BERT"],
|
|
@@ -1611,43 +1604,39 @@ class AskRequest(AuditMetadataBase):
|
|
|
1611
1604
|
["Friedrich Nietzsche", "Immanuel Kant"],
|
|
1612
1605
|
],
|
|
1613
1606
|
)
|
|
1614
|
-
vectorset:
|
|
1615
|
-
min_score:
|
|
1607
|
+
vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
|
|
1608
|
+
min_score: float | MinScore | None = Field(
|
|
1616
1609
|
default=None,
|
|
1617
1610
|
title="Minimum score",
|
|
1618
|
-
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
1611
|
+
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
1619
1612
|
)
|
|
1620
1613
|
features: list[ChatOptions] = SearchParamDefaults.chat_features.to_pydantic_field()
|
|
1621
|
-
range_creation_start:
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
1625
|
-
range_modification_start: Optional[DateTime] = (
|
|
1614
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
1615
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
1616
|
+
range_modification_start: DateTime | None = (
|
|
1626
1617
|
SearchParamDefaults.range_modification_start.to_pydantic_field()
|
|
1627
1618
|
)
|
|
1628
|
-
range_modification_end:
|
|
1619
|
+
range_modification_end: DateTime | None = (
|
|
1629
1620
|
SearchParamDefaults.range_modification_end.to_pydantic_field()
|
|
1630
1621
|
)
|
|
1631
1622
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
|
|
1632
1623
|
field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
|
|
1633
1624
|
extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
|
|
1634
|
-
context:
|
|
1635
|
-
chat_history:
|
|
1636
|
-
|
|
1637
|
-
)
|
|
1638
|
-
extra_context: Optional[list[str]] = Field(
|
|
1625
|
+
context: list[ChatContextMessage] | None = SearchParamDefaults.chat_context.to_pydantic_field()
|
|
1626
|
+
chat_history: list[ChatContextMessage] | None = SearchParamDefaults.chat_history.to_pydantic_field()
|
|
1627
|
+
extra_context: list[str] | None = Field(
|
|
1639
1628
|
default=None,
|
|
1640
1629
|
title="Extra query context",
|
|
1641
1630
|
description="""Additional context that is added to the retrieval context sent to the LLM.
|
|
1642
1631
|
It allows extending the chat feature with content that may not be in the Knowledge Box.""",
|
|
1643
1632
|
)
|
|
1644
|
-
extra_context_images:
|
|
1633
|
+
extra_context_images: list[Image] | None = Field(
|
|
1645
1634
|
default=None,
|
|
1646
1635
|
title="Extra query context images",
|
|
1647
1636
|
description="""Additional images added to the retrieval context sent to the LLM."
|
|
1648
1637
|
It allows extending the chat feature with content that may not be in the Knowledge Box.""",
|
|
1649
1638
|
)
|
|
1650
|
-
query_image:
|
|
1639
|
+
query_image: Image | None = Field(
|
|
1651
1640
|
default=None,
|
|
1652
1641
|
title="Query image",
|
|
1653
1642
|
description="Image that will be used together with the query text for retrieval and then sent to the LLM as part of the context. "
|
|
@@ -1660,27 +1649,27 @@ class AskRequest(AuditMetadataBase):
|
|
|
1660
1649
|
|
|
1661
1650
|
highlight: bool = SearchParamDefaults.highlight.to_pydantic_field()
|
|
1662
1651
|
resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
|
|
1663
|
-
prompt:
|
|
1652
|
+
prompt: str | CustomPrompt | None = Field(
|
|
1664
1653
|
default=None,
|
|
1665
1654
|
title="Prompts",
|
|
1666
|
-
description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
|
|
1655
|
+
description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
|
|
1667
1656
|
)
|
|
1668
|
-
rank_fusion:
|
|
1669
|
-
reranker:
|
|
1670
|
-
citations:
|
|
1657
|
+
rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
|
|
1658
|
+
reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
|
|
1659
|
+
citations: bool | None | CitationsType = Field(
|
|
1671
1660
|
default=None,
|
|
1672
1661
|
description="Whether to include citations in the response. "
|
|
1673
1662
|
"If set to None or False, no citations will be computed. "
|
|
1674
1663
|
"If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
|
|
1675
1664
|
"If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
|
|
1676
1665
|
)
|
|
1677
|
-
citation_threshold:
|
|
1666
|
+
citation_threshold: float | None = Field(
|
|
1678
1667
|
default=None,
|
|
1679
1668
|
description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
|
|
1680
1669
|
ge=0.0,
|
|
1681
1670
|
le=1.0,
|
|
1682
1671
|
)
|
|
1683
|
-
security:
|
|
1672
|
+
security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
|
|
1684
1673
|
show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
|
|
1685
1674
|
rag_strategies: list[RagStrategies] = Field(
|
|
1686
1675
|
default=[],
|
|
@@ -1745,21 +1734,21 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1745
1734
|
)
|
|
1746
1735
|
debug: bool = SearchParamDefaults.debug.to_pydantic_field()
|
|
1747
1736
|
|
|
1748
|
-
generative_model:
|
|
1737
|
+
generative_model: str | None = Field(
|
|
1749
1738
|
default=None,
|
|
1750
1739
|
title="Generative model",
|
|
1751
|
-
description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1740
|
+
description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1752
1741
|
)
|
|
1753
|
-
generative_model_seed:
|
|
1742
|
+
generative_model_seed: int | None = Field(
|
|
1754
1743
|
default=None,
|
|
1755
1744
|
title="Seed for the generative model",
|
|
1756
1745
|
description="The seed to use for the generative model for deterministic generation. Only supported by some models.",
|
|
1757
1746
|
)
|
|
1758
1747
|
|
|
1759
|
-
max_tokens:
|
|
1748
|
+
max_tokens: int | MaxTokens | None = Field(
|
|
1760
1749
|
default=None,
|
|
1761
1750
|
title="Maximum LLM tokens to use for the request",
|
|
1762
|
-
description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
|
|
1751
|
+
description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
|
|
1763
1752
|
)
|
|
1764
1753
|
|
|
1765
1754
|
rephrase: bool = Field(
|
|
@@ -1768,7 +1757,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1768
1757
|
"Rephrase the query for a more efficient retrieval. This will consume LLM tokens and make the request slower."
|
|
1769
1758
|
),
|
|
1770
1759
|
)
|
|
1771
|
-
chat_history_relevance_threshold:
|
|
1760
|
+
chat_history_relevance_threshold: float | None = Field(
|
|
1772
1761
|
default=None,
|
|
1773
1762
|
ge=0.0,
|
|
1774
1763
|
le=1.0,
|
|
@@ -1786,7 +1775,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1786
1775
|
description="If set to true, the response will be in markdown format",
|
|
1787
1776
|
)
|
|
1788
1777
|
|
|
1789
|
-
answer_json_schema:
|
|
1778
|
+
answer_json_schema: dict[str, Any] | None = Field(
|
|
1790
1779
|
default=None,
|
|
1791
1780
|
title="Answer JSON schema",
|
|
1792
1781
|
description="""Desired JSON schema for the LLM answer.
|
|
@@ -1802,13 +1791,14 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
|
|
|
1802
1791
|
description="Whether to generate an answer using the generative model. If set to false, the response will only contain the retrieval results.",
|
|
1803
1792
|
)
|
|
1804
1793
|
|
|
1805
|
-
search_configuration:
|
|
1794
|
+
search_configuration: str | None = Field(
|
|
1806
1795
|
default=None,
|
|
1807
1796
|
description="Load ask parameters from this configuration. Parameters in the request override parameters from the configuration.",
|
|
1808
1797
|
)
|
|
1809
1798
|
|
|
1810
|
-
reasoning:
|
|
1799
|
+
reasoning: Reasoning | bool = Field(
|
|
1811
1800
|
default=False,
|
|
1801
|
+
title="Reasoning options",
|
|
1812
1802
|
description=(
|
|
1813
1803
|
"Reasoning options for the generative model. "
|
|
1814
1804
|
"Set to True to enable default reasoning, False to disable, or provide a Reasoning object for custom options."
|
|
@@ -1896,8 +1886,8 @@ class SummarizeModel(BaseModel):
|
|
|
1896
1886
|
"""
|
|
1897
1887
|
|
|
1898
1888
|
resources: dict[str, SummarizeResourceModel] = {}
|
|
1899
|
-
generative_model:
|
|
1900
|
-
user_prompt:
|
|
1889
|
+
generative_model: str | None = None
|
|
1890
|
+
user_prompt: str | None = None
|
|
1901
1891
|
summary_kind: SummaryKind = SummaryKind.SIMPLE
|
|
1902
1892
|
|
|
1903
1893
|
|
|
@@ -1906,13 +1896,13 @@ class SummarizeRequest(BaseModel):
|
|
|
1906
1896
|
Model for the request payload of the summarize endpoint
|
|
1907
1897
|
"""
|
|
1908
1898
|
|
|
1909
|
-
generative_model:
|
|
1899
|
+
generative_model: str | None = Field(
|
|
1910
1900
|
default=None,
|
|
1911
1901
|
title="Generative model",
|
|
1912
|
-
description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
|
|
1902
|
+
description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
|
|
1913
1903
|
)
|
|
1914
1904
|
|
|
1915
|
-
user_prompt:
|
|
1905
|
+
user_prompt: str | None = Field(
|
|
1916
1906
|
default=None,
|
|
1917
1907
|
title="User prompt",
|
|
1918
1908
|
description="Optional custom prompt input by the user",
|
|
@@ -1923,7 +1913,7 @@ class SummarizeRequest(BaseModel):
|
|
|
1923
1913
|
min_length=1,
|
|
1924
1914
|
max_length=100,
|
|
1925
1915
|
title="Resources",
|
|
1926
|
-
description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
|
|
1916
|
+
description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
|
|
1927
1917
|
)
|
|
1928
1918
|
|
|
1929
1919
|
summary_kind: SummaryKind = Field(
|
|
@@ -1949,20 +1939,20 @@ class SummarizedResponse(BaseModel):
|
|
|
1949
1939
|
title="Summary",
|
|
1950
1940
|
description="Global summary of all resources combined.",
|
|
1951
1941
|
)
|
|
1952
|
-
consumption:
|
|
1942
|
+
consumption: Consumption | None = None
|
|
1953
1943
|
|
|
1954
1944
|
|
|
1955
1945
|
class KnowledgeGraphEntity(BaseModel):
|
|
1956
1946
|
name: str
|
|
1957
|
-
type:
|
|
1958
|
-
subtype:
|
|
1947
|
+
type: RelationNodeType | None = None
|
|
1948
|
+
subtype: str | None = None
|
|
1959
1949
|
|
|
1960
1950
|
|
|
1961
1951
|
class FindRequest(BaseSearchRequest):
|
|
1962
|
-
query_entities: SkipJsonSchema[
|
|
1952
|
+
query_entities: SkipJsonSchema[list[KnowledgeGraphEntity] | None] = Field(
|
|
1963
1953
|
default=None, title="Query entities", description="Entities to use in a knowledge graph search"
|
|
1964
1954
|
)
|
|
1965
|
-
graph_query:
|
|
1955
|
+
graph_query: GraphPathQuery | None = Field(
|
|
1966
1956
|
default=None,
|
|
1967
1957
|
title="Graph query",
|
|
1968
1958
|
description="Query for the knowledge graph. Paths (node-relation-node) extracted from a paragraph_id will be used to extend the results",
|
|
@@ -1973,17 +1963,17 @@ class FindRequest(BaseSearchRequest):
|
|
|
1973
1963
|
FindOptions.SEMANTIC,
|
|
1974
1964
|
]
|
|
1975
1965
|
)
|
|
1976
|
-
rank_fusion:
|
|
1977
|
-
reranker:
|
|
1966
|
+
rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
|
|
1967
|
+
reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
|
|
1978
1968
|
|
|
1979
|
-
keyword_filters:
|
|
1969
|
+
keyword_filters: list[str] | list[Filter] = Field(
|
|
1980
1970
|
default=[],
|
|
1981
1971
|
title="Keyword filters",
|
|
1982
1972
|
description=(
|
|
1983
1973
|
"List of keyword filter expressions to apply to the retrieval step. "
|
|
1984
1974
|
"The text block search will only be performed on the documents that contain the specified keywords. "
|
|
1985
1975
|
"The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
|
|
1986
|
-
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1976
|
+
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1987
1977
|
),
|
|
1988
1978
|
examples=[
|
|
1989
1979
|
["NLP", "BERT"],
|
|
@@ -1992,11 +1982,11 @@ class FindRequest(BaseSearchRequest):
|
|
|
1992
1982
|
],
|
|
1993
1983
|
)
|
|
1994
1984
|
|
|
1995
|
-
search_configuration:
|
|
1985
|
+
search_configuration: str | None = Field(
|
|
1996
1986
|
default=None,
|
|
1997
1987
|
description="Load find parameters from this configuration. Parameters in the request override parameters from the configuration.",
|
|
1998
1988
|
)
|
|
1999
|
-
generative_model:
|
|
1989
|
+
generative_model: str | None = Field(
|
|
2000
1990
|
default=None,
|
|
2001
1991
|
title="Generative model",
|
|
2002
1992
|
description="The generative model used to rephrase the query. If not provided, the model configured for the Knowledge Box is used.",
|
|
@@ -2030,9 +2020,9 @@ class SCORE_TYPE(str, Enum):
|
|
|
2030
2020
|
|
|
2031
2021
|
|
|
2032
2022
|
class FindTextPosition(BaseModel):
|
|
2033
|
-
page_number:
|
|
2034
|
-
start_seconds:
|
|
2035
|
-
end_seconds:
|
|
2023
|
+
page_number: int | None = None
|
|
2024
|
+
start_seconds: list[int] | None = None
|
|
2025
|
+
end_seconds: list[int] | None = None
|
|
2036
2026
|
index: int
|
|
2037
2027
|
start: int
|
|
2038
2028
|
end: int
|
|
@@ -2044,15 +2034,15 @@ class FindParagraph(BaseModel):
|
|
|
2044
2034
|
order: int = Field(default=0, ge=0)
|
|
2045
2035
|
text: str
|
|
2046
2036
|
id: str
|
|
2047
|
-
labels:
|
|
2048
|
-
position:
|
|
2037
|
+
labels: list[str] | None = []
|
|
2038
|
+
position: TextPosition | None = None
|
|
2049
2039
|
fuzzy_result: bool = False
|
|
2050
2040
|
page_with_visual: bool = Field(
|
|
2051
2041
|
default=False,
|
|
2052
2042
|
title="Page where this paragraph belongs is a visual page",
|
|
2053
2043
|
description="This flag informs if the page may have information that has not been extracted",
|
|
2054
2044
|
)
|
|
2055
|
-
reference:
|
|
2045
|
+
reference: str | None = Field(
|
|
2056
2046
|
default=None,
|
|
2057
2047
|
title="Reference to the image that represents this text",
|
|
2058
2048
|
description="Reference to the extracted image that represents this paragraph",
|
|
@@ -2062,7 +2052,7 @@ class FindParagraph(BaseModel):
|
|
|
2062
2052
|
title="Is a table",
|
|
2063
2053
|
description="The referenced image of the paragraph is a table",
|
|
2064
2054
|
)
|
|
2065
|
-
relevant_relations:
|
|
2055
|
+
relevant_relations: Relations | None = Field(
|
|
2066
2056
|
default=None,
|
|
2067
2057
|
title="Relevant relations",
|
|
2068
2058
|
description="Relevant relations from which the paragraph was found, will only be filled if using the Graph RAG Strategy",
|
|
@@ -2087,9 +2077,9 @@ class KnowledgeboxFindResults(JsonBaseModel):
|
|
|
2087
2077
|
"""Find on knowledgebox results"""
|
|
2088
2078
|
|
|
2089
2079
|
resources: dict[str, FindResource]
|
|
2090
|
-
relations:
|
|
2091
|
-
query:
|
|
2092
|
-
rephrased_query:
|
|
2080
|
+
relations: Relations | None = None
|
|
2081
|
+
query: str | None = Field(default=None, title="Find Results Query")
|
|
2082
|
+
rephrased_query: str | None = None
|
|
2093
2083
|
total: int = 0
|
|
2094
2084
|
page_number: int = Field(
|
|
2095
2085
|
default=0,
|
|
@@ -2103,18 +2093,18 @@ class KnowledgeboxFindResults(JsonBaseModel):
|
|
|
2103
2093
|
default=False,
|
|
2104
2094
|
description="Pagination will be deprecated, please, refer to `top_k` in the request",
|
|
2105
2095
|
)
|
|
2106
|
-
nodes:
|
|
2096
|
+
nodes: list[dict[str, str]] | None = Field(
|
|
2107
2097
|
default=None,
|
|
2108
2098
|
title="Nodes",
|
|
2109
2099
|
description="List of nodes queried in the search",
|
|
2110
2100
|
)
|
|
2111
|
-
shards:
|
|
2101
|
+
shards: list[str] | None = Field(
|
|
2112
2102
|
default=None,
|
|
2113
2103
|
title="Shards",
|
|
2114
2104
|
description="The list of shard replica ids used for the search.",
|
|
2115
2105
|
)
|
|
2116
2106
|
autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
|
|
2117
|
-
min_score:
|
|
2107
|
+
min_score: float | MinScore | None = Field(
|
|
2118
2108
|
default=MinScore(),
|
|
2119
2109
|
title="Minimum result score",
|
|
2120
2110
|
description="The minimum scores that have been used for the search operation.",
|
|
@@ -2122,9 +2112,9 @@ class KnowledgeboxFindResults(JsonBaseModel):
|
|
|
2122
2112
|
best_matches: list[str] = Field(
|
|
2123
2113
|
default=[],
|
|
2124
2114
|
title="Best matches",
|
|
2125
|
-
description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
|
|
2115
|
+
description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
|
|
2126
2116
|
)
|
|
2127
|
-
metrics:
|
|
2117
|
+
metrics: dict[str, Any] | None = Field(
|
|
2128
2118
|
default=None,
|
|
2129
2119
|
title="Metrics",
|
|
2130
2120
|
description=(
|
|
@@ -2142,15 +2132,15 @@ class FeedbackTasks(str, Enum):
|
|
|
2142
2132
|
class FeedbackRequest(BaseModel):
|
|
2143
2133
|
ident: str = Field(
|
|
2144
2134
|
title="Request identifier",
|
|
2145
|
-
description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
|
|
2135
|
+
description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
|
|
2146
2136
|
)
|
|
2147
2137
|
good: bool = Field(title="Good", description="Whether the result was good or not")
|
|
2148
2138
|
task: FeedbackTasks = Field(
|
|
2149
2139
|
title="Task",
|
|
2150
2140
|
description="The task the feedback is for. For now, only `CHAT` task is available",
|
|
2151
2141
|
)
|
|
2152
|
-
feedback:
|
|
2153
|
-
text_block_id:
|
|
2142
|
+
feedback: str | None = Field(None, title="Feedback", description="Feedback text")
|
|
2143
|
+
text_block_id: str | None = Field(None, title="Text block", description="Text block id")
|
|
2154
2144
|
|
|
2155
2145
|
|
|
2156
2146
|
def validate_facets(facets):
|
|
@@ -2201,13 +2191,11 @@ class AugmentedTextBlock(BaseModel):
|
|
|
2201
2191
|
text: str = Field(
|
|
2202
2192
|
description="The text of the augmented text block. It may include additional metadata to enrich the context"
|
|
2203
2193
|
)
|
|
2204
|
-
position:
|
|
2194
|
+
position: TextPosition | None = Field(
|
|
2205
2195
|
default=None,
|
|
2206
2196
|
description="Metadata about the position of the text block in the original document.",
|
|
2207
2197
|
)
|
|
2208
|
-
parent:
|
|
2209
|
-
default=None, description="The parent text block that was augmented for."
|
|
2210
|
-
)
|
|
2198
|
+
parent: str | None = Field(default=None, description="The parent text block that was augmented for.")
|
|
2211
2199
|
augmentation_type: TextBlockAugmentationType = Field(description="Type of augmentation.")
|
|
2212
2200
|
|
|
2213
2201
|
|
|
@@ -2231,12 +2219,12 @@ class AskTokens(BaseModel):
|
|
|
2231
2219
|
title="Output tokens",
|
|
2232
2220
|
description="Number of LLM tokens used for the answer",
|
|
2233
2221
|
)
|
|
2234
|
-
input_nuclia:
|
|
2222
|
+
input_nuclia: float | None = Field(
|
|
2235
2223
|
title="Input Nuclia tokens",
|
|
2236
2224
|
description="Number of Nuclia LLM tokens used for the context in the query",
|
|
2237
2225
|
default=None,
|
|
2238
2226
|
)
|
|
2239
|
-
output_nuclia:
|
|
2227
|
+
output_nuclia: float | None = Field(
|
|
2240
2228
|
title="Output Nuclia tokens",
|
|
2241
2229
|
description="Number of Nuclia LLM tokens used for the answer",
|
|
2242
2230
|
default=None,
|
|
@@ -2244,12 +2232,12 @@ class AskTokens(BaseModel):
|
|
|
2244
2232
|
|
|
2245
2233
|
|
|
2246
2234
|
class AskTimings(BaseModel):
|
|
2247
|
-
generative_first_chunk:
|
|
2235
|
+
generative_first_chunk: float | None = Field(
|
|
2248
2236
|
default=None,
|
|
2249
2237
|
title="Generative first chunk",
|
|
2250
2238
|
description="Time the LLM took to generate the first chunk of the answer",
|
|
2251
2239
|
)
|
|
2252
|
-
generative_total:
|
|
2240
|
+
generative_total: float | None = Field(
|
|
2253
2241
|
default=None,
|
|
2254
2242
|
title="Generative total",
|
|
2255
2243
|
description="Total time the LLM took to generate the answer",
|
|
@@ -2257,12 +2245,12 @@ class AskTimings(BaseModel):
|
|
|
2257
2245
|
|
|
2258
2246
|
|
|
2259
2247
|
class SyncAskMetadata(BaseModel):
|
|
2260
|
-
tokens:
|
|
2248
|
+
tokens: AskTokens | None = Field(
|
|
2261
2249
|
default=None,
|
|
2262
2250
|
title="Tokens",
|
|
2263
2251
|
description="Number of tokens used in the LLM context and answer",
|
|
2264
2252
|
)
|
|
2265
|
-
timings:
|
|
2253
|
+
timings: AskTimings | None = Field(
|
|
2266
2254
|
default=None,
|
|
2267
2255
|
title="Timings",
|
|
2268
2256
|
description="Timings of the generative model",
|
|
@@ -2281,19 +2269,19 @@ class SyncAskResponse(BaseModel):
|
|
|
2281
2269
|
title="Answer",
|
|
2282
2270
|
description="The generative answer to the query",
|
|
2283
2271
|
)
|
|
2284
|
-
reasoning:
|
|
2272
|
+
reasoning: str | None = Field(
|
|
2285
2273
|
default=None,
|
|
2286
|
-
title="Reasoning",
|
|
2287
|
-
description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
|
|
2274
|
+
title="Reasoning steps",
|
|
2275
|
+
description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
|
|
2288
2276
|
)
|
|
2289
|
-
answer_json:
|
|
2277
|
+
answer_json: dict[str, Any] | None = Field(
|
|
2290
2278
|
default=None,
|
|
2291
2279
|
title="Answer JSON",
|
|
2292
|
-
description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
|
|
2280
|
+
description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
|
|
2293
2281
|
)
|
|
2294
2282
|
status: str = Field(
|
|
2295
2283
|
title="Status",
|
|
2296
|
-
description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
|
|
2284
|
+
description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
|
|
2297
2285
|
)
|
|
2298
2286
|
retrieval_results: KnowledgeboxFindResults = Field(
|
|
2299
2287
|
title="Retrieval results",
|
|
@@ -2304,7 +2292,7 @@ class SyncAskResponse(BaseModel):
|
|
|
2304
2292
|
title="Retrieval best matches",
|
|
2305
2293
|
description="Sorted list of best matching text blocks in the retrieval step. This includes the main query and prequeries results, if any.",
|
|
2306
2294
|
)
|
|
2307
|
-
prequeries:
|
|
2295
|
+
prequeries: dict[str, KnowledgeboxFindResults] | None = Field(
|
|
2308
2296
|
default=None,
|
|
2309
2297
|
title="Prequeries",
|
|
2310
2298
|
description="The retrieval results of the prequeries",
|
|
@@ -2312,9 +2300,9 @@ class SyncAskResponse(BaseModel):
|
|
|
2312
2300
|
learning_id: str = Field(
|
|
2313
2301
|
default="",
|
|
2314
2302
|
title="Learning id",
|
|
2315
|
-
description="The id of the learning request. This id can be used to provide feedback on the learning process.",
|
|
2303
|
+
description="The id of the learning request. This id can be used to provide feedback on the learning process.",
|
|
2316
2304
|
)
|
|
2317
|
-
relations:
|
|
2305
|
+
relations: Relations | None = Field(
|
|
2318
2306
|
default=None,
|
|
2319
2307
|
title="Relations",
|
|
2320
2308
|
description="The detected relations of the answer",
|
|
@@ -2329,29 +2317,29 @@ class SyncAskResponse(BaseModel):
|
|
|
2329
2317
|
title="Citation footnote to context",
|
|
2330
2318
|
description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)""",
|
|
2331
2319
|
)
|
|
2332
|
-
augmented_context:
|
|
2320
|
+
augmented_context: AugmentedContext | None = Field(
|
|
2333
2321
|
default=None,
|
|
2334
2322
|
description=(
|
|
2335
2323
|
"Augmented text blocks that were sent to the LLM as part of the RAG strategies "
|
|
2336
2324
|
"applied on the retrieval results in the request."
|
|
2337
2325
|
),
|
|
2338
2326
|
)
|
|
2339
|
-
prompt_context:
|
|
2327
|
+
prompt_context: list[str] | None = Field(
|
|
2340
2328
|
default=None,
|
|
2341
2329
|
title="Prompt context",
|
|
2342
2330
|
description="The prompt context used to generate the answer. Returned only if the debug flag is set to true",
|
|
2343
2331
|
)
|
|
2344
|
-
predict_request:
|
|
2332
|
+
predict_request: dict[str, Any] | None = Field(
|
|
2345
2333
|
default=None,
|
|
2346
2334
|
title="Predict request",
|
|
2347
2335
|
description="The internal predict request used to generate the answer. Returned only if the debug flag is set to true",
|
|
2348
2336
|
)
|
|
2349
|
-
metadata:
|
|
2337
|
+
metadata: SyncAskMetadata | None = Field(
|
|
2350
2338
|
default=None,
|
|
2351
2339
|
title="Metadata",
|
|
2352
|
-
description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
|
|
2340
|
+
description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
|
|
2353
2341
|
)
|
|
2354
|
-
consumption:
|
|
2342
|
+
consumption: Consumption | None = Field(
|
|
2355
2343
|
default=None,
|
|
2356
2344
|
title="Consumption",
|
|
2357
2345
|
description=(
|
|
@@ -2359,12 +2347,12 @@ class SyncAskResponse(BaseModel):
|
|
|
2359
2347
|
" 'X-show-consumption' header is set to true in the request."
|
|
2360
2348
|
),
|
|
2361
2349
|
)
|
|
2362
|
-
error_details:
|
|
2350
|
+
error_details: str | None = Field(
|
|
2363
2351
|
default=None,
|
|
2364
2352
|
title="Error details",
|
|
2365
2353
|
description="Error details message in case there was an error",
|
|
2366
2354
|
)
|
|
2367
|
-
debug:
|
|
2355
|
+
debug: dict[str, Any] | None = Field(
|
|
2368
2356
|
default=None,
|
|
2369
2357
|
title="Debug information",
|
|
2370
2358
|
description=(
|
|
@@ -2454,7 +2442,7 @@ class StatusAskResponseItem(BaseModel):
|
|
|
2454
2442
|
type: Literal["status"] = "status"
|
|
2455
2443
|
code: str
|
|
2456
2444
|
status: str
|
|
2457
|
-
details:
|
|
2445
|
+
details: str | None = None
|
|
2458
2446
|
|
|
2459
2447
|
|
|
2460
2448
|
class ErrorAskResponseItem(BaseModel):
|
|
@@ -2473,22 +2461,22 @@ class DebugAskResponseItem(BaseModel):
|
|
|
2473
2461
|
metrics: dict[str, Any]
|
|
2474
2462
|
|
|
2475
2463
|
|
|
2476
|
-
AskResponseItemType =
|
|
2477
|
-
AnswerAskResponseItem
|
|
2478
|
-
ReasoningAskResponseItem
|
|
2479
|
-
JSONAskResponseItem
|
|
2480
|
-
MetadataAskResponseItem
|
|
2481
|
-
AugmentedContextResponseItem
|
|
2482
|
-
CitationsAskResponseItem
|
|
2483
|
-
FootnoteCitationsAskResponseItem
|
|
2484
|
-
StatusAskResponseItem
|
|
2485
|
-
ErrorAskResponseItem
|
|
2486
|
-
RetrievalAskResponseItem
|
|
2487
|
-
RelationsAskResponseItem
|
|
2488
|
-
DebugAskResponseItem
|
|
2489
|
-
PrequeriesAskResponseItem
|
|
2490
|
-
ConsumptionResponseItem
|
|
2491
|
-
|
|
2464
|
+
AskResponseItemType = (
|
|
2465
|
+
AnswerAskResponseItem
|
|
2466
|
+
| ReasoningAskResponseItem
|
|
2467
|
+
| JSONAskResponseItem
|
|
2468
|
+
| MetadataAskResponseItem
|
|
2469
|
+
| AugmentedContextResponseItem
|
|
2470
|
+
| CitationsAskResponseItem
|
|
2471
|
+
| FootnoteCitationsAskResponseItem
|
|
2472
|
+
| StatusAskResponseItem
|
|
2473
|
+
| ErrorAskResponseItem
|
|
2474
|
+
| RetrievalAskResponseItem
|
|
2475
|
+
| RelationsAskResponseItem
|
|
2476
|
+
| DebugAskResponseItem
|
|
2477
|
+
| PrequeriesAskResponseItem
|
|
2478
|
+
| ConsumptionResponseItem
|
|
2479
|
+
)
|
|
2492
2480
|
|
|
2493
2481
|
|
|
2494
2482
|
class AskResponseItem(BaseModel):
|
|
@@ -2508,7 +2496,7 @@ def parse_custom_prompt(item: AskRequest) -> CustomPrompt:
|
|
|
2508
2496
|
return prompt
|
|
2509
2497
|
|
|
2510
2498
|
|
|
2511
|
-
def parse_rephrase_prompt(item: AskRequest) ->
|
|
2499
|
+
def parse_rephrase_prompt(item: AskRequest) -> str | None:
|
|
2512
2500
|
prompt = parse_custom_prompt(item)
|
|
2513
2501
|
return prompt.rephrase
|
|
2514
2502
|
|
|
@@ -2519,7 +2507,7 @@ FindRequest.model_rebuild()
|
|
|
2519
2507
|
|
|
2520
2508
|
class CatalogFacetsPrefix(BaseModel):
|
|
2521
2509
|
prefix: str = Field(pattern="^((/[^/]+)*)$")
|
|
2522
|
-
depth:
|
|
2510
|
+
depth: int | None = Field(
|
|
2523
2511
|
default=None,
|
|
2524
2512
|
ge=0,
|
|
2525
2513
|
description="Only include facets up to this depth from the prefix, leave empty to include all depths",
|