nucliadb-models 6.9.5.post5452__py3-none-any.whl → 6.10.0.post5694__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb-models might be problematic. Click here for more details.
- nucliadb_models/agents/ingestion.py +4 -4
- nucliadb_models/augment.py +297 -23
- nucliadb_models/common.py +57 -57
- nucliadb_models/configuration.py +8 -8
- nucliadb_models/content_types.py +13 -11
- nucliadb_models/conversation.py +25 -26
- nucliadb_models/entities.py +17 -18
- nucliadb_models/external_index_providers.py +1 -2
- nucliadb_models/extracted.py +82 -83
- nucliadb_models/file.py +10 -11
- nucliadb_models/filters.py +78 -74
- nucliadb_models/graph/requests.py +38 -47
- nucliadb_models/hydration.py +48 -50
- nucliadb_models/internal/predict.py +7 -9
- nucliadb_models/internal/shards.py +2 -3
- nucliadb_models/labels.py +18 -11
- nucliadb_models/link.py +18 -19
- nucliadb_models/metadata.py +65 -53
- nucliadb_models/notifications.py +3 -3
- nucliadb_models/processing.py +1 -2
- nucliadb_models/resource.py +85 -102
- nucliadb_models/retrieval.py +147 -0
- nucliadb_models/search.py +266 -276
- nucliadb_models/security.py +2 -3
- nucliadb_models/text.py +7 -8
- nucliadb_models/trainset.py +1 -2
- nucliadb_models/utils.py +2 -3
- nucliadb_models/vectors.py +2 -5
- nucliadb_models/writer.py +56 -57
- {nucliadb_models-6.9.5.post5452.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/METADATA +1 -1
- nucliadb_models-6.10.0.post5694.dist-info/RECORD +41 -0
- nucliadb_models-6.9.5.post5452.dist-info/RECORD +0 -40
- {nucliadb_models-6.9.5.post5452.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/WHEEL +0 -0
- {nucliadb_models-6.9.5.post5452.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/top_level.txt +0 -0
nucliadb_models/search.py
CHANGED
|
@@ -14,19 +14,18 @@
|
|
|
14
14
|
#
|
|
15
15
|
import json
|
|
16
16
|
from enum import Enum
|
|
17
|
-
from typing import Any, Literal
|
|
17
|
+
from typing import Annotated, Any, Literal
|
|
18
18
|
|
|
19
19
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
20
20
|
from pydantic.aliases import AliasChoices
|
|
21
21
|
from pydantic.json_schema import SkipJsonSchema
|
|
22
|
-
from typing_extensions import
|
|
22
|
+
from typing_extensions import Self
|
|
23
23
|
|
|
24
24
|
from nucliadb_models import RelationMetadata
|
|
25
25
|
from nucliadb_models.common import FieldTypeName, ParamDefault
|
|
26
26
|
from nucliadb_models.graph.requests import GraphPathQuery
|
|
27
27
|
|
|
28
28
|
# Bw/c import to avoid breaking users
|
|
29
|
-
# noqa isort: skip
|
|
30
29
|
from nucliadb_models.metadata import RelationNodeType, RelationType, ResourceProcessingStatus
|
|
31
30
|
from nucliadb_models.resource import ExtractedDataTypeName, Resource
|
|
32
31
|
from nucliadb_models.security import RequestSecurity
|
|
@@ -152,12 +151,12 @@ FacetsResult = dict[str, Any]
|
|
|
152
151
|
|
|
153
152
|
|
|
154
153
|
class TextPosition(BaseModel):
|
|
155
|
-
page_number:
|
|
154
|
+
page_number: int | None = None
|
|
156
155
|
index: int
|
|
157
156
|
start: int
|
|
158
157
|
end: int
|
|
159
|
-
start_seconds:
|
|
160
|
-
end_seconds:
|
|
158
|
+
start_seconds: list[int] | None = None
|
|
159
|
+
end_seconds: list[int] | None = None
|
|
161
160
|
|
|
162
161
|
|
|
163
162
|
class Sentence(BaseModel):
|
|
@@ -166,8 +165,8 @@ class Sentence(BaseModel):
|
|
|
166
165
|
text: str
|
|
167
166
|
field_type: str
|
|
168
167
|
field: str
|
|
169
|
-
index:
|
|
170
|
-
position:
|
|
168
|
+
index: str | None = None
|
|
169
|
+
position: TextPosition | None = None
|
|
171
170
|
|
|
172
171
|
|
|
173
172
|
class Sentences(BaseModel):
|
|
@@ -177,7 +176,7 @@ class Sentences(BaseModel):
|
|
|
177
176
|
page_size: int = 20
|
|
178
177
|
min_score: float = Field(
|
|
179
178
|
title="Minimum score",
|
|
180
|
-
description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
|
|
179
|
+
description="Minimum similarity score used to filter vector index search. Results with a lower score have been ignored.",
|
|
181
180
|
)
|
|
182
181
|
|
|
183
182
|
|
|
@@ -188,45 +187,45 @@ class Paragraph(BaseModel):
|
|
|
188
187
|
field: str
|
|
189
188
|
text: str
|
|
190
189
|
labels: list[str] = []
|
|
191
|
-
start_seconds:
|
|
192
|
-
end_seconds:
|
|
193
|
-
position:
|
|
190
|
+
start_seconds: list[int] | None = None
|
|
191
|
+
end_seconds: list[int] | None = None
|
|
192
|
+
position: TextPosition | None = None
|
|
194
193
|
fuzzy_result: bool = False
|
|
195
194
|
|
|
196
195
|
|
|
197
196
|
class Paragraphs(BaseModel):
|
|
198
197
|
results: list[Paragraph] = []
|
|
199
|
-
facets:
|
|
200
|
-
query:
|
|
198
|
+
facets: FacetsResult | None = None
|
|
199
|
+
query: str | None = Field(default=None, title="Paragraphs Query")
|
|
201
200
|
total: int = 0
|
|
202
201
|
page_number: int = 0
|
|
203
202
|
page_size: int = 20
|
|
204
203
|
next_page: bool = False
|
|
205
204
|
min_score: float = Field(
|
|
206
205
|
title="Minimum score",
|
|
207
|
-
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
206
|
+
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
208
207
|
)
|
|
209
208
|
|
|
210
209
|
|
|
211
210
|
class ResourceResult(BaseModel):
|
|
212
|
-
score:
|
|
211
|
+
score: float | int
|
|
213
212
|
rid: str
|
|
214
213
|
field_type: str
|
|
215
214
|
field: str
|
|
216
|
-
labels:
|
|
215
|
+
labels: list[str] | None = None
|
|
217
216
|
|
|
218
217
|
|
|
219
218
|
class Resources(BaseModel):
|
|
220
219
|
results: list[ResourceResult]
|
|
221
|
-
facets:
|
|
222
|
-
query:
|
|
220
|
+
facets: FacetsResult | None = None
|
|
221
|
+
query: str | None = Field(default=None, title="Resources Query")
|
|
223
222
|
total: int = 0
|
|
224
223
|
page_number: int = 0
|
|
225
224
|
page_size: int = 20
|
|
226
225
|
next_page: bool = False
|
|
227
226
|
min_score: float = Field(
|
|
228
227
|
title="Minimum score",
|
|
229
|
-
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
228
|
+
description="Minimum bm25 score used to filter bm25 index search. Results with a lower score have been ignored.",
|
|
230
229
|
)
|
|
231
230
|
|
|
232
231
|
|
|
@@ -246,7 +245,7 @@ class DirectionalRelation(BaseModel):
|
|
|
246
245
|
relation: RelationType
|
|
247
246
|
relation_label: str
|
|
248
247
|
direction: RelationDirection
|
|
249
|
-
metadata:
|
|
248
|
+
metadata: RelationMetadata | None = None
|
|
250
249
|
resource_id: str
|
|
251
250
|
|
|
252
251
|
|
|
@@ -280,23 +279,23 @@ class RelatedEntities(BaseModel):
|
|
|
280
279
|
class ResourceSearchResults(JsonBaseModel):
|
|
281
280
|
"""Search on resource results"""
|
|
282
281
|
|
|
283
|
-
sentences:
|
|
284
|
-
paragraphs:
|
|
285
|
-
relations:
|
|
286
|
-
nodes:
|
|
287
|
-
shards:
|
|
282
|
+
sentences: Sentences | None = None
|
|
283
|
+
paragraphs: Paragraphs | None = None
|
|
284
|
+
relations: Relations | None = None
|
|
285
|
+
nodes: list[dict[str, str]] | None = None
|
|
286
|
+
shards: list[str] | None = None
|
|
288
287
|
|
|
289
288
|
|
|
290
289
|
class KnowledgeboxSearchResults(JsonBaseModel):
|
|
291
290
|
"""Search on knowledgebox results"""
|
|
292
291
|
|
|
293
292
|
resources: dict[str, Resource] = {}
|
|
294
|
-
sentences:
|
|
295
|
-
paragraphs:
|
|
296
|
-
fulltext:
|
|
297
|
-
relations:
|
|
298
|
-
nodes:
|
|
299
|
-
shards:
|
|
293
|
+
sentences: Sentences | None = None
|
|
294
|
+
paragraphs: Paragraphs | None = None
|
|
295
|
+
fulltext: Resources | None = None
|
|
296
|
+
relations: Relations | None = None
|
|
297
|
+
nodes: list[dict[str, str]] | None = None
|
|
298
|
+
shards: list[str] | None = None
|
|
300
299
|
|
|
301
300
|
# TODO: remove on a future major release
|
|
302
301
|
autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
|
|
@@ -306,16 +305,16 @@ class CatalogResponse(BaseModel):
|
|
|
306
305
|
"""Catalog results"""
|
|
307
306
|
|
|
308
307
|
resources: dict[str, Resource] = {}
|
|
309
|
-
fulltext:
|
|
310
|
-
shards:
|
|
308
|
+
fulltext: Resources | None = None
|
|
309
|
+
shards: list[str] | None = None
|
|
311
310
|
|
|
312
311
|
|
|
313
312
|
class KnowledgeboxSuggestResults(JsonBaseModel):
|
|
314
313
|
"""Suggest on resource results"""
|
|
315
314
|
|
|
316
|
-
paragraphs:
|
|
317
|
-
entities:
|
|
318
|
-
shards:
|
|
315
|
+
paragraphs: Paragraphs | None = None
|
|
316
|
+
entities: RelatedEntities | None = None
|
|
317
|
+
shards: list[str] | None = None
|
|
319
318
|
|
|
320
319
|
|
|
321
320
|
class KnowledgeboxCounters(BaseModel):
|
|
@@ -323,7 +322,7 @@ class KnowledgeboxCounters(BaseModel):
|
|
|
323
322
|
paragraphs: int
|
|
324
323
|
fields: int
|
|
325
324
|
sentences: int
|
|
326
|
-
shards:
|
|
325
|
+
shards: list[str] | None = None
|
|
327
326
|
index_size: float = Field(default=0.0, title="Index size (bytes)")
|
|
328
327
|
|
|
329
328
|
|
|
@@ -378,13 +377,13 @@ class ReciprocalRankFusion(_BaseRankFusion):
|
|
|
378
377
|
k: float = Field(
|
|
379
378
|
default=60.0,
|
|
380
379
|
title="RRF k parameter",
|
|
381
|
-
description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
|
|
380
|
+
description="k parameter changes the influence top-ranked and lower-ranked elements have. Research has shown that 60 is a performant value across datasets",
|
|
382
381
|
)
|
|
383
|
-
window:
|
|
382
|
+
window: int | None = Field(
|
|
384
383
|
default=None,
|
|
385
384
|
le=MAX_RANK_FUSION_WINDOW,
|
|
386
385
|
title="RRF window",
|
|
387
|
-
description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
|
|
386
|
+
description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time",
|
|
388
387
|
)
|
|
389
388
|
boosting: ReciprocalRankFusionWeights = Field(
|
|
390
389
|
default_factory=ReciprocalRankFusionWeights,
|
|
@@ -395,12 +394,12 @@ Define different weights for each retriever. This allows to assign different pri
|
|
|
395
394
|
The default is 1 for each retriever, which means no extra boost for any of them. Weights below 0 can be used for negative boosting.
|
|
396
395
|
|
|
397
396
|
This kind of boosting can be useful in multilingual search, for example, where keyword search may not give good results and can degrade the final search experience
|
|
398
|
-
""",
|
|
397
|
+
""",
|
|
399
398
|
)
|
|
400
399
|
|
|
401
400
|
|
|
402
401
|
RankFusion = Annotated[
|
|
403
|
-
|
|
402
|
+
ReciprocalRankFusion,
|
|
404
403
|
Field(discriminator="name"),
|
|
405
404
|
]
|
|
406
405
|
|
|
@@ -435,15 +434,15 @@ class _BaseReranker(BaseModel):
|
|
|
435
434
|
|
|
436
435
|
class PredictReranker(_BaseReranker):
|
|
437
436
|
name: Literal[RerankerName.PREDICT_RERANKER] = RerankerName.PREDICT_RERANKER
|
|
438
|
-
window:
|
|
437
|
+
window: int | None = Field(
|
|
439
438
|
default=None,
|
|
440
439
|
le=200,
|
|
441
440
|
title="Reranker window",
|
|
442
|
-
description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
|
|
441
|
+
description="Number of elements reranker will use. Window must be greater or equal to top_k. Greater values will improve results at cost of retrieval and reranking time. By default, this reranker uses a default of 2 times top_k",
|
|
443
442
|
)
|
|
444
443
|
|
|
445
444
|
|
|
446
|
-
Reranker = Annotated[
|
|
445
|
+
Reranker = Annotated[PredictReranker, Field(discriminator="name")]
|
|
447
446
|
|
|
448
447
|
|
|
449
448
|
class KnowledgeBoxCount(BaseModel):
|
|
@@ -472,18 +471,18 @@ class SearchParamDefaults:
|
|
|
472
471
|
)
|
|
473
472
|
filters = ParamDefault(
|
|
474
473
|
default=[],
|
|
475
|
-
title="Filters",
|
|
476
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
474
|
+
title="Search Filters",
|
|
475
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
477
476
|
)
|
|
478
477
|
resource_filters = ParamDefault(
|
|
479
478
|
default=[],
|
|
480
479
|
title="Resources filter",
|
|
481
|
-
description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
|
|
480
|
+
description="List of resource ids to filter search results for. Only paragraphs from the specified resources will be returned.",
|
|
482
481
|
)
|
|
483
482
|
faceted = ParamDefault(
|
|
484
483
|
default=[],
|
|
485
484
|
title="Faceted",
|
|
486
|
-
description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
485
|
+
description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
487
486
|
max_items=50,
|
|
488
487
|
)
|
|
489
488
|
chat_query = ParamDefault(
|
|
@@ -520,12 +519,12 @@ class SearchParamDefaults:
|
|
|
520
519
|
highlight = ParamDefault(
|
|
521
520
|
default=False,
|
|
522
521
|
title="Highlight",
|
|
523
|
-
description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
|
|
522
|
+
description="If set to true, the query terms will be highlighted in the results between <mark>...</mark> tags",
|
|
524
523
|
)
|
|
525
524
|
with_duplicates = ParamDefault(
|
|
526
525
|
default=False,
|
|
527
526
|
title="With duplicate paragraphs",
|
|
528
|
-
description="Whether to return duplicate paragraphs on the same document",
|
|
527
|
+
description="Whether to return duplicate paragraphs on the same document",
|
|
529
528
|
)
|
|
530
529
|
with_status = ParamDefault(
|
|
531
530
|
default=None,
|
|
@@ -535,7 +534,7 @@ class SearchParamDefaults:
|
|
|
535
534
|
with_synonyms = ParamDefault(
|
|
536
535
|
default=False,
|
|
537
536
|
title="With custom synonyms",
|
|
538
|
-
description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
|
|
537
|
+
description="Whether to return matches for custom knowledge box synonyms of the query terms. Note: only supported for `keyword` and `fulltext` search options.",
|
|
539
538
|
)
|
|
540
539
|
sort_order = ParamDefault(
|
|
541
540
|
default=SortOrder.DESC,
|
|
@@ -565,12 +564,12 @@ class SearchParamDefaults:
|
|
|
565
564
|
reranker = ParamDefault(
|
|
566
565
|
default=RerankerName.PREDICT_RERANKER,
|
|
567
566
|
title="Reranker",
|
|
568
|
-
description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
|
|
567
|
+
description="Reranker let you specify which method you want to use to rerank your results at the end of retrieval",
|
|
569
568
|
)
|
|
570
569
|
debug = ParamDefault(
|
|
571
570
|
default=False,
|
|
572
571
|
title="Debug mode",
|
|
573
|
-
description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
|
|
572
|
+
description="If set, the response will include some extra metadata for debugging purposes, like the list of queried nodes.",
|
|
574
573
|
)
|
|
575
574
|
show = ParamDefault(
|
|
576
575
|
default=[ResourceProperties.BASIC],
|
|
@@ -591,27 +590,27 @@ class SearchParamDefaults:
|
|
|
591
590
|
range_creation_start = ParamDefault(
|
|
592
591
|
default=None,
|
|
593
592
|
title="Resource creation range start",
|
|
594
|
-
description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
593
|
+
description="Resources created before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
595
594
|
)
|
|
596
595
|
range_creation_end = ParamDefault(
|
|
597
596
|
default=None,
|
|
598
597
|
title="Resource creation range end",
|
|
599
|
-
description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
598
|
+
description="Resources created after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
600
599
|
)
|
|
601
600
|
range_modification_start = ParamDefault(
|
|
602
601
|
default=None,
|
|
603
602
|
title="Resource modification range start",
|
|
604
|
-
description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
603
|
+
description="Resources modified before this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
605
604
|
)
|
|
606
605
|
range_modification_end = ParamDefault(
|
|
607
606
|
default=None,
|
|
608
607
|
title="Resource modification range end",
|
|
609
|
-
description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
608
|
+
description="Resources modified after this date will be filtered out of search results. Datetime are represented as a str in ISO 8601 format, like: 2008-09-15T15:53:00+05:00.",
|
|
610
609
|
)
|
|
611
610
|
vector = ParamDefault(
|
|
612
611
|
default=None,
|
|
613
612
|
title="Search Vector",
|
|
614
|
-
description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
|
|
613
|
+
description="The vector to perform the search with. If not provided, NucliaDB will use Nuclia Predict API to create the vector off from the query.",
|
|
615
614
|
)
|
|
616
615
|
vectorset = ParamDefault(
|
|
617
616
|
default=None,
|
|
@@ -627,12 +626,12 @@ class SearchParamDefaults:
|
|
|
627
626
|
chat_history = ParamDefault(
|
|
628
627
|
default=None,
|
|
629
628
|
title="Chat history",
|
|
630
|
-
description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
|
|
629
|
+
description="Use to rephrase the new LLM query by taking into account the chat conversation history. This will be passed to the LLM so that it is aware of the previous conversation.",
|
|
631
630
|
)
|
|
632
631
|
chat_features = ParamDefault(
|
|
633
632
|
default=[ChatOptions.SEMANTIC, ChatOptions.KEYWORD],
|
|
634
633
|
title="Chat features",
|
|
635
|
-
description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
|
|
634
|
+
description="Features enabled for the chat endpoint. Semantic search is done if `semantic` is included. If `keyword` is included, the results will include matching paragraphs from the bm25 index. If `relations` is included, a graph of entities related to the answer is returned. `paragraphs` and `vectors` are deprecated, please use `keyword` and `semantic` instead",
|
|
636
635
|
)
|
|
637
636
|
suggest_features = ParamDefault(
|
|
638
637
|
default=[
|
|
@@ -645,17 +644,17 @@ class SearchParamDefaults:
|
|
|
645
644
|
security = ParamDefault(
|
|
646
645
|
default=None,
|
|
647
646
|
title="Security",
|
|
648
|
-
description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
|
|
647
|
+
description="Security metadata for the request. If not provided, the search request is done without the security lookup phase.",
|
|
649
648
|
)
|
|
650
649
|
security_groups = ParamDefault(
|
|
651
650
|
default=[],
|
|
652
651
|
title="Security groups",
|
|
653
|
-
description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
|
|
652
|
+
description="List of security groups to filter search results for. Only resources matching the query and containing the specified security groups will be returned. If empty, all resources will be considered for the search.",
|
|
654
653
|
)
|
|
655
654
|
rephrase = ParamDefault(
|
|
656
655
|
default=False,
|
|
657
656
|
title="Rephrase query consuming LLMs",
|
|
658
|
-
description="Rephrase query consuming LLMs - it will make the query slower",
|
|
657
|
+
description="Rephrase query consuming LLMs - it will make the query slower",
|
|
659
658
|
)
|
|
660
659
|
prefer_markdown = ParamDefault(
|
|
661
660
|
default=False,
|
|
@@ -695,10 +694,10 @@ class SearchParamDefaults:
|
|
|
695
694
|
|
|
696
695
|
|
|
697
696
|
class Filter(BaseModel):
|
|
698
|
-
all:
|
|
699
|
-
any:
|
|
700
|
-
none:
|
|
701
|
-
not_all:
|
|
697
|
+
all: list[str] | None = Field(default=None, min_length=1)
|
|
698
|
+
any: list[str] | None = Field(default=None, min_length=1)
|
|
699
|
+
none: list[str] | None = Field(default=None, min_length=1)
|
|
700
|
+
not_all: list[str] | None = Field(default=None, min_length=1)
|
|
702
701
|
|
|
703
702
|
@model_validator(mode="after")
|
|
704
703
|
def validate_filter(self) -> Self:
|
|
@@ -740,19 +739,19 @@ class CatalogQuery(BaseModel):
|
|
|
740
739
|
|
|
741
740
|
|
|
742
741
|
class CatalogRequest(BaseModel):
|
|
743
|
-
query:
|
|
742
|
+
query: str | CatalogQuery = ParamDefault(
|
|
744
743
|
default="",
|
|
745
|
-
title="Query",
|
|
744
|
+
title="Catalog Request Query",
|
|
746
745
|
description="The query to search for",
|
|
747
746
|
).to_pydantic_field()
|
|
748
|
-
filter_expression:
|
|
747
|
+
filter_expression: CatalogFilterExpression | None = (
|
|
749
748
|
SearchParamDefaults.catalog_filter_expression.to_pydantic_field()
|
|
750
749
|
)
|
|
751
750
|
faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
|
|
752
|
-
sort:
|
|
751
|
+
sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
|
|
753
752
|
page_number: int = SearchParamDefaults.catalog_page_number.to_pydantic_field()
|
|
754
753
|
page_size: int = SearchParamDefaults.catalog_page_size.to_pydantic_field()
|
|
755
|
-
hidden:
|
|
754
|
+
hidden: bool | None = SearchParamDefaults.hidden.to_pydantic_field()
|
|
756
755
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field(
|
|
757
756
|
default=[ResourceProperties.BASIC, ResourceProperties.ERRORS]
|
|
758
757
|
)
|
|
@@ -760,32 +759,30 @@ class CatalogRequest(BaseModel):
|
|
|
760
759
|
debug: SkipJsonSchema[bool] = SearchParamDefaults.debug.to_pydantic_field()
|
|
761
760
|
|
|
762
761
|
# Deprecated filter parameters
|
|
763
|
-
filters:
|
|
762
|
+
filters: list[str] | list[Filter] = Field(
|
|
764
763
|
default=[],
|
|
765
|
-
title="Filters",
|
|
766
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
764
|
+
title="Catalog Filters",
|
|
765
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
767
766
|
deprecated="Use filter_expression instead",
|
|
768
767
|
)
|
|
769
|
-
with_status:
|
|
768
|
+
with_status: ResourceProcessingStatus | None = Field(
|
|
770
769
|
default=None,
|
|
771
770
|
title="With processing status",
|
|
772
771
|
description="Filter results by resource processing status",
|
|
773
772
|
deprecated="Use filter_expression instead",
|
|
774
773
|
)
|
|
775
|
-
range_creation_start:
|
|
776
|
-
|
|
777
|
-
deprecated="Use filter_expression instead",
|
|
778
|
-
)
|
|
774
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field(
|
|
775
|
+
deprecated="Use filter_expression instead",
|
|
779
776
|
)
|
|
780
|
-
range_creation_end:
|
|
777
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field(
|
|
781
778
|
deprecated="Use filter_expression instead",
|
|
782
779
|
)
|
|
783
|
-
range_modification_start:
|
|
780
|
+
range_modification_start: DateTime | None = (
|
|
784
781
|
SearchParamDefaults.range_modification_start.to_pydantic_field(
|
|
785
782
|
deprecated="Use filter_expression instead",
|
|
786
783
|
)
|
|
787
784
|
)
|
|
788
|
-
range_modification_end:
|
|
785
|
+
range_modification_end: DateTime | None = (
|
|
789
786
|
SearchParamDefaults.range_modification_end.to_pydantic_field(
|
|
790
787
|
deprecated="Use filter_expression instead",
|
|
791
788
|
)
|
|
@@ -798,15 +795,15 @@ class CatalogRequest(BaseModel):
|
|
|
798
795
|
|
|
799
796
|
|
|
800
797
|
class MinScore(BaseModel):
|
|
801
|
-
semantic:
|
|
798
|
+
semantic: float | None = Field(
|
|
802
799
|
default=None,
|
|
803
800
|
title="Minimum semantic score",
|
|
804
|
-
description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
801
|
+
description="Minimum semantic similarity score used to filter vector index search. If not specified, the default minimum score of the semantic model associated to the Knowledge Box will be used. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
805
802
|
)
|
|
806
803
|
bm25: float = Field(
|
|
807
804
|
default=0,
|
|
808
805
|
title="Minimum bm25 score",
|
|
809
|
-
description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
806
|
+
description="Minimum score used to filter bm25 index search. Check out the documentation for more information on how to use this parameter: https://docs.nuclia.dev/docs/rag/advanced/search#minimum-score",
|
|
810
807
|
ge=0,
|
|
811
808
|
)
|
|
812
809
|
|
|
@@ -820,7 +817,7 @@ AUDIT_METADATA_MAX_BYTES = 1024 * 10 # 10KB
|
|
|
820
817
|
|
|
821
818
|
|
|
822
819
|
class AuditMetadataBase(BaseModel):
|
|
823
|
-
audit_metadata:
|
|
820
|
+
audit_metadata: dict[str, str] | None = Field(
|
|
824
821
|
default=None,
|
|
825
822
|
title="Audit metadata",
|
|
826
823
|
description=(
|
|
@@ -844,29 +841,27 @@ class AuditMetadataBase(BaseModel):
|
|
|
844
841
|
|
|
845
842
|
class BaseSearchRequest(AuditMetadataBase):
|
|
846
843
|
query: str = SearchParamDefaults.query.to_pydantic_field()
|
|
847
|
-
filter_expression:
|
|
844
|
+
filter_expression: FilterExpression | None = (
|
|
848
845
|
SearchParamDefaults.filter_expression.to_pydantic_field()
|
|
849
846
|
)
|
|
850
847
|
fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
|
|
851
|
-
filters:
|
|
848
|
+
filters: list[str] | list[Filter] = Field(
|
|
852
849
|
default=[],
|
|
853
|
-
title="Filters",
|
|
854
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
850
|
+
title="Search Filters",
|
|
851
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
855
852
|
)
|
|
856
853
|
top_k: int = SearchParamDefaults.top_k.to_pydantic_field()
|
|
857
|
-
min_score:
|
|
854
|
+
min_score: float | MinScore | None = Field(
|
|
858
855
|
default=None,
|
|
859
856
|
title="Minimum score",
|
|
860
|
-
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
861
|
-
)
|
|
862
|
-
range_creation_start: Optional[DateTime] = (
|
|
863
|
-
SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
857
|
+
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
864
858
|
)
|
|
865
|
-
|
|
866
|
-
|
|
859
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
860
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
861
|
+
range_modification_start: DateTime | None = (
|
|
867
862
|
SearchParamDefaults.range_modification_start.to_pydantic_field()
|
|
868
863
|
)
|
|
869
|
-
range_modification_end:
|
|
864
|
+
range_modification_end: DateTime | None = (
|
|
870
865
|
SearchParamDefaults.range_modification_end.to_pydantic_field()
|
|
871
866
|
)
|
|
872
867
|
debug: bool = SearchParamDefaults.debug.to_pydantic_field()
|
|
@@ -874,15 +869,15 @@ class BaseSearchRequest(AuditMetadataBase):
|
|
|
874
869
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
|
|
875
870
|
field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
|
|
876
871
|
extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
|
|
877
|
-
vector:
|
|
878
|
-
vectorset:
|
|
872
|
+
vector: list[float] | None = SearchParamDefaults.vector.to_pydantic_field()
|
|
873
|
+
vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
|
|
879
874
|
with_duplicates: bool = SearchParamDefaults.with_duplicates.to_pydantic_field()
|
|
880
875
|
with_synonyms: bool = SearchParamDefaults.with_synonyms.to_pydantic_field()
|
|
881
876
|
# autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
|
|
882
877
|
# avoid breaking changes in the python sdks. Please remove on a future major release.
|
|
883
878
|
autofilter: SkipJsonSchema[bool] = False
|
|
884
879
|
resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
|
|
885
|
-
security:
|
|
880
|
+
security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
|
|
886
881
|
show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
|
|
887
882
|
|
|
888
883
|
rephrase: bool = Field(
|
|
@@ -892,7 +887,7 @@ class BaseSearchRequest(AuditMetadataBase):
|
|
|
892
887
|
),
|
|
893
888
|
)
|
|
894
889
|
|
|
895
|
-
rephrase_prompt:
|
|
890
|
+
rephrase_prompt: str | None = Field(
|
|
896
891
|
default=None,
|
|
897
892
|
title="Rephrase",
|
|
898
893
|
description=(
|
|
@@ -911,7 +906,7 @@ Please return ONLY the question without any explanation. Just the rephrased ques
|
|
|
911
906
|
Please return ONLY the question without any explanation.""",
|
|
912
907
|
],
|
|
913
908
|
)
|
|
914
|
-
query_image:
|
|
909
|
+
query_image: Image | None = Field(
|
|
915
910
|
default=None,
|
|
916
911
|
title="Query image",
|
|
917
912
|
description="Image that will be used together with the query text for retrieval.",
|
|
@@ -941,7 +936,7 @@ class SearchRequest(BaseSearchRequest):
|
|
|
941
936
|
]
|
|
942
937
|
)
|
|
943
938
|
faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
|
|
944
|
-
sort:
|
|
939
|
+
sort: SortOptions | None = SearchParamDefaults.sort.to_pydantic_field()
|
|
945
940
|
offset: int = SearchParamDefaults.offset.to_pydantic_field()
|
|
946
941
|
|
|
947
942
|
@field_validator("faceted")
|
|
@@ -962,7 +957,7 @@ class SearchRequest(BaseSearchRequest):
|
|
|
962
957
|
|
|
963
958
|
@field_validator("sort", mode="after")
|
|
964
959
|
@classmethod
|
|
965
|
-
def sorting_by_title_not_supported(cls, value:
|
|
960
|
+
def sorting_by_title_not_supported(cls, value: SortOptions | None) -> SortOptions | None:
|
|
966
961
|
if value and value.field == SortField.TITLE:
|
|
967
962
|
raise ValueError("sorting by title not supported in /search")
|
|
968
963
|
|
|
@@ -988,19 +983,19 @@ class UserPrompt(BaseModel):
|
|
|
988
983
|
|
|
989
984
|
|
|
990
985
|
class MaxTokens(BaseModel):
|
|
991
|
-
context:
|
|
986
|
+
context: int | None = Field(
|
|
992
987
|
default=None,
|
|
993
988
|
title="Maximum context tokens",
|
|
994
989
|
description="Use to limit the amount of tokens used in the LLM context",
|
|
995
990
|
)
|
|
996
|
-
answer:
|
|
991
|
+
answer: int | None = Field(
|
|
997
992
|
default=None,
|
|
998
993
|
title="Maximum answer tokens",
|
|
999
994
|
description="Use to limit the amount of tokens used in the LLM answer",
|
|
1000
995
|
)
|
|
1001
996
|
|
|
1002
997
|
|
|
1003
|
-
def parse_max_tokens(max_tokens:
|
|
998
|
+
def parse_max_tokens(max_tokens: int | MaxTokens | None) -> MaxTokens | None:
|
|
1004
999
|
if isinstance(max_tokens, int):
|
|
1005
1000
|
# If the max_tokens is an integer, it is interpreted as the max_tokens value for the generated answer.
|
|
1006
1001
|
# The max tokens for the context is set to None to use the default value for the model (comes in the
|
|
@@ -1046,7 +1041,7 @@ class ChatModel(BaseModel):
|
|
|
1046
1041
|
question: str = Field(description="Question to ask the generative model")
|
|
1047
1042
|
user_id: str
|
|
1048
1043
|
retrieval: bool = True
|
|
1049
|
-
system:
|
|
1044
|
+
system: str | None = Field(
|
|
1050
1045
|
default=None,
|
|
1051
1046
|
title="System prompt",
|
|
1052
1047
|
description="Optional system prompt input by the user",
|
|
@@ -1055,9 +1050,9 @@ class ChatModel(BaseModel):
|
|
|
1055
1050
|
default={},
|
|
1056
1051
|
description="The information retrieval context for the current query",
|
|
1057
1052
|
)
|
|
1058
|
-
query_context_order:
|
|
1053
|
+
query_context_order: dict[str, int] | None = Field(
|
|
1059
1054
|
default=None,
|
|
1060
|
-
description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
|
|
1055
|
+
description="The order of the query context elements. This is used to sort the context elements by relevance before sending them to the generative model",
|
|
1061
1056
|
)
|
|
1062
1057
|
chat_history: list[ChatContextMessage] = Field(
|
|
1063
1058
|
default=[], description="The chat conversation history"
|
|
@@ -1066,29 +1061,29 @@ class ChatModel(BaseModel):
|
|
|
1066
1061
|
default=True,
|
|
1067
1062
|
description="Truncate the chat context in case it doesn't fit the generative input",
|
|
1068
1063
|
)
|
|
1069
|
-
user_prompt:
|
|
1064
|
+
user_prompt: UserPrompt | None = Field(
|
|
1070
1065
|
default=None, description="Optional custom prompt input by the user"
|
|
1071
1066
|
)
|
|
1072
|
-
citations:
|
|
1067
|
+
citations: bool | None | CitationsType = Field(
|
|
1073
1068
|
default=None,
|
|
1074
1069
|
description="Whether to include citations in the response. "
|
|
1075
1070
|
"If set to None or False, no citations will be computed. "
|
|
1076
1071
|
"If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
|
|
1077
1072
|
"If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
|
|
1078
1073
|
)
|
|
1079
|
-
citation_threshold:
|
|
1074
|
+
citation_threshold: float | None = Field(
|
|
1080
1075
|
default=None,
|
|
1081
1076
|
description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
|
|
1082
1077
|
ge=0.0,
|
|
1083
1078
|
le=1.0,
|
|
1084
1079
|
)
|
|
1085
|
-
generative_model:
|
|
1080
|
+
generative_model: str | None = Field(
|
|
1086
1081
|
default=None,
|
|
1087
1082
|
title="Generative model",
|
|
1088
|
-
description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1083
|
+
description="The generative model to use for the predict chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1089
1084
|
)
|
|
1090
1085
|
|
|
1091
|
-
max_tokens:
|
|
1086
|
+
max_tokens: int | None = Field(default=None, description="Maximum characters to generate")
|
|
1092
1087
|
|
|
1093
1088
|
query_context_images: dict[str, Image] = Field(
|
|
1094
1089
|
default={},
|
|
@@ -1099,7 +1094,7 @@ class ChatModel(BaseModel):
|
|
|
1099
1094
|
default=False,
|
|
1100
1095
|
description="If set to true, the response will be in markdown format",
|
|
1101
1096
|
)
|
|
1102
|
-
json_schema:
|
|
1097
|
+
json_schema: dict[str, Any] | None = Field(
|
|
1103
1098
|
default=None,
|
|
1104
1099
|
description="The JSON schema to use for the generative model answers",
|
|
1105
1100
|
)
|
|
@@ -1107,17 +1102,18 @@ class ChatModel(BaseModel):
|
|
|
1107
1102
|
default=False,
|
|
1108
1103
|
description="Whether to reorder the query context based on a reranker",
|
|
1109
1104
|
)
|
|
1110
|
-
top_k:
|
|
1105
|
+
top_k: int | None = Field(default=None, description="Number of best elements to get from")
|
|
1111
1106
|
|
|
1112
1107
|
format_prompt: bool = Field(
|
|
1113
1108
|
default=True,
|
|
1114
|
-
description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
|
|
1109
|
+
description="If set to false, the prompt given as `user_prompt` will be used as is, without any formatting for question or context. If set to true, the prompt must contain the placeholders {question} and {context} to be replaced by the question and context respectively",
|
|
1115
1110
|
)
|
|
1116
|
-
seed:
|
|
1111
|
+
seed: int | None = Field(
|
|
1117
1112
|
default=None,
|
|
1118
1113
|
description="Seed use for the generative model for a deterministic output.",
|
|
1119
1114
|
)
|
|
1120
|
-
reasoning:
|
|
1115
|
+
reasoning: Reasoning | bool = Field(
|
|
1116
|
+
title="Reasoning options",
|
|
1121
1117
|
default=False,
|
|
1122
1118
|
description=(
|
|
1123
1119
|
"Reasoning options for the generative model. "
|
|
@@ -1131,26 +1127,25 @@ class RephraseModel(BaseModel):
|
|
|
1131
1127
|
chat_history: list[ChatContextMessage] = []
|
|
1132
1128
|
user_id: str
|
|
1133
1129
|
user_context: list[str] = []
|
|
1134
|
-
generative_model:
|
|
1130
|
+
generative_model: str | None = Field(
|
|
1135
1131
|
default=None,
|
|
1136
1132
|
title="Generative model",
|
|
1137
|
-
description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1133
|
+
description="The generative model to use for the rephrase endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1138
1134
|
)
|
|
1139
|
-
chat_history_relevance_threshold:
|
|
1135
|
+
chat_history_relevance_threshold: (
|
|
1140
1136
|
Annotated[
|
|
1141
1137
|
float,
|
|
1142
1138
|
Field(
|
|
1143
1139
|
ge=0.0,
|
|
1144
1140
|
le=1.0,
|
|
1145
|
-
description=
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
"Values in between adjust the sensitivity."
|
|
1150
|
-
),
|
|
1141
|
+
description="Threshold to determine if the past chat history is relevant to rephrase the user's question. "
|
|
1142
|
+
"0 - Always treat previous messages as relevant (always rephrase)."
|
|
1143
|
+
"1 - Always treat previous messages as irrelevant (never rephrase)."
|
|
1144
|
+
"Values in between adjust the sensitivity.",
|
|
1151
1145
|
),
|
|
1152
1146
|
]
|
|
1153
|
-
|
|
1147
|
+
| None
|
|
1148
|
+
) = None
|
|
1154
1149
|
|
|
1155
1150
|
|
|
1156
1151
|
class RagStrategyName:
|
|
@@ -1234,13 +1229,13 @@ class FullResourceApplyTo(BaseModel):
|
|
|
1234
1229
|
exclude: list[str] = Field(
|
|
1235
1230
|
default_factory=list,
|
|
1236
1231
|
title="Labels to exclude from full resource expansion",
|
|
1237
|
-
description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
|
|
1232
|
+
description="Resources from matches containing any of these labels won't expand to the full resource. This may be useful to exclude long and not interesting resources and expend less tokens",
|
|
1238
1233
|
)
|
|
1239
1234
|
|
|
1240
1235
|
|
|
1241
1236
|
class FullResourceStrategy(RagStrategy):
|
|
1242
1237
|
name: Literal["full_resource"] = "full_resource"
|
|
1243
|
-
count:
|
|
1238
|
+
count: int | None = Field(
|
|
1244
1239
|
default=None,
|
|
1245
1240
|
title="Count",
|
|
1246
1241
|
description="Maximum number of full documents to retrieve. If not specified, all matching documents are retrieved.",
|
|
@@ -1251,7 +1246,7 @@ class FullResourceStrategy(RagStrategy):
|
|
|
1251
1246
|
title="Include remaining text blocks",
|
|
1252
1247
|
description="Whether to include the remaining text blocks after the maximum number of resources has been reached.",
|
|
1253
1248
|
)
|
|
1254
|
-
apply_to:
|
|
1249
|
+
apply_to: FullResourceApplyTo | None = Field(
|
|
1255
1250
|
default=None,
|
|
1256
1251
|
title="Apply to certain resources only",
|
|
1257
1252
|
description="Define which resources to exclude from serialization",
|
|
@@ -1357,7 +1352,7 @@ class PreQuery(BaseModel):
|
|
|
1357
1352
|
),
|
|
1358
1353
|
ge=0,
|
|
1359
1354
|
)
|
|
1360
|
-
id:
|
|
1355
|
+
id: str | None = Field(
|
|
1361
1356
|
default=None,
|
|
1362
1357
|
title="Prequery id",
|
|
1363
1358
|
min_length=1,
|
|
@@ -1491,7 +1486,7 @@ class TableImageStrategy(ImageRagStrategy):
|
|
|
1491
1486
|
|
|
1492
1487
|
class PageImageStrategy(ImageRagStrategy):
|
|
1493
1488
|
name: Literal["page_image"] = "page_image"
|
|
1494
|
-
count:
|
|
1489
|
+
count: int | None = Field(
|
|
1495
1490
|
default=None,
|
|
1496
1491
|
title="Count",
|
|
1497
1492
|
description="Maximum number of page images to retrieve. By default, at most 5 images are retrieved.",
|
|
@@ -1503,20 +1498,18 @@ class ParagraphImageStrategy(ImageRagStrategy):
|
|
|
1503
1498
|
|
|
1504
1499
|
|
|
1505
1500
|
RagStrategies = Annotated[
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
GraphStrategy,
|
|
1515
|
-
],
|
|
1501
|
+
FieldExtensionStrategy
|
|
1502
|
+
| FullResourceStrategy
|
|
1503
|
+
| HierarchyResourceStrategy
|
|
1504
|
+
| NeighbouringParagraphsStrategy
|
|
1505
|
+
| MetadataExtensionStrategy
|
|
1506
|
+
| ConversationalStrategy
|
|
1507
|
+
| PreQueriesStrategy
|
|
1508
|
+
| GraphStrategy,
|
|
1516
1509
|
Field(discriminator="name"),
|
|
1517
1510
|
]
|
|
1518
1511
|
RagImagesStrategies = Annotated[
|
|
1519
|
-
|
|
1512
|
+
PageImageStrategy | ParagraphImageStrategy | TableImageStrategy,
|
|
1520
1513
|
Field(discriminator="name"),
|
|
1521
1514
|
]
|
|
1522
1515
|
PromptContext = dict[str, str]
|
|
@@ -1525,10 +1518,10 @@ PromptContextImages = dict[str, Image]
|
|
|
1525
1518
|
|
|
1526
1519
|
|
|
1527
1520
|
class CustomPrompt(BaseModel):
|
|
1528
|
-
system:
|
|
1521
|
+
system: str | None = Field(
|
|
1529
1522
|
default=None,
|
|
1530
1523
|
title="System prompt",
|
|
1531
|
-
description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
|
|
1524
|
+
description="System prompt given to the generative model responsible of generating the answer. This can help customize the behavior of the model when generating the answer. If not specified, the default model provider's prompt is used.",
|
|
1532
1525
|
min_length=1,
|
|
1533
1526
|
examples=[
|
|
1534
1527
|
"You are a medical assistant, use medical terminology",
|
|
@@ -1537,10 +1530,10 @@ class CustomPrompt(BaseModel):
|
|
|
1537
1530
|
"You are a financial expert, use correct terms",
|
|
1538
1531
|
],
|
|
1539
1532
|
)
|
|
1540
|
-
user:
|
|
1533
|
+
user: str | None = Field(
|
|
1541
1534
|
default=None,
|
|
1542
1535
|
title="User prompt",
|
|
1543
|
-
description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
|
|
1536
|
+
description="User prompt given to the generative model responsible of generating the answer. Use the words {context} and {question} in brackets where you want those fields to be placed, in case you want them in your prompt. Context will be the data returned by the retrieval step and question will be the user's query.",
|
|
1544
1537
|
min_length=1,
|
|
1545
1538
|
examples=[
|
|
1546
1539
|
"Taking into account our previous conversation, and this context: {context} answer this {question}",
|
|
@@ -1549,7 +1542,7 @@ class CustomPrompt(BaseModel):
|
|
|
1549
1542
|
"Given this context: {context}. Answer this {question} using the provided context. Please, answer always in French",
|
|
1550
1543
|
],
|
|
1551
1544
|
)
|
|
1552
|
-
rephrase:
|
|
1545
|
+
rephrase: str | None = Field(
|
|
1553
1546
|
default=None,
|
|
1554
1547
|
title="Rephrase",
|
|
1555
1548
|
description=(
|
|
@@ -1579,23 +1572,23 @@ class AskRequest(AuditMetadataBase):
|
|
|
1579
1572
|
le=200,
|
|
1580
1573
|
description="The top most relevant results to fetch at the retrieval step. The maximum number of results allowed is 200.",
|
|
1581
1574
|
)
|
|
1582
|
-
filter_expression:
|
|
1575
|
+
filter_expression: FilterExpression | None = (
|
|
1583
1576
|
SearchParamDefaults.filter_expression.to_pydantic_field()
|
|
1584
1577
|
)
|
|
1585
1578
|
fields: list[str] = SearchParamDefaults.fields.to_pydantic_field()
|
|
1586
|
-
filters:
|
|
1579
|
+
filters: list[str] | list[Filter] = Field(
|
|
1587
1580
|
default=[],
|
|
1588
|
-
title="Filters",
|
|
1589
|
-
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
1581
|
+
title="Search Filters",
|
|
1582
|
+
description="The list of filters to apply. Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters",
|
|
1590
1583
|
)
|
|
1591
|
-
keyword_filters:
|
|
1584
|
+
keyword_filters: list[str] | list[Filter] = Field(
|
|
1592
1585
|
default=[],
|
|
1593
1586
|
title="Keyword filters",
|
|
1594
1587
|
description=(
|
|
1595
1588
|
"List of keyword filter expressions to apply to the retrieval step. "
|
|
1596
1589
|
"The text block search will only be performed on the documents that contain the specified keywords. "
|
|
1597
1590
|
"The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
|
|
1598
|
-
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1591
|
+
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1599
1592
|
),
|
|
1600
1593
|
examples=[
|
|
1601
1594
|
["NLP", "BERT"],
|
|
@@ -1603,43 +1596,39 @@ class AskRequest(AuditMetadataBase):
|
|
|
1603
1596
|
["Friedrich Nietzsche", "Immanuel Kant"],
|
|
1604
1597
|
],
|
|
1605
1598
|
)
|
|
1606
|
-
vectorset:
|
|
1607
|
-
min_score:
|
|
1599
|
+
vectorset: str | None = SearchParamDefaults.vectorset.to_pydantic_field()
|
|
1600
|
+
min_score: float | MinScore | None = Field(
|
|
1608
1601
|
default=None,
|
|
1609
1602
|
title="Minimum score",
|
|
1610
|
-
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
1603
|
+
description="Minimum score to filter search results. Results with a lower score will be ignored. Accepts either a float or a dictionary with the minimum scores for the bm25 and vector indexes. If a float is provided, it is interpreted as the minimum score for vector index search.",
|
|
1611
1604
|
)
|
|
1612
1605
|
features: list[ChatOptions] = SearchParamDefaults.chat_features.to_pydantic_field()
|
|
1613
|
-
range_creation_start:
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
range_creation_end: Optional[DateTime] = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
1617
|
-
range_modification_start: Optional[DateTime] = (
|
|
1606
|
+
range_creation_start: DateTime | None = SearchParamDefaults.range_creation_start.to_pydantic_field()
|
|
1607
|
+
range_creation_end: DateTime | None = SearchParamDefaults.range_creation_end.to_pydantic_field()
|
|
1608
|
+
range_modification_start: DateTime | None = (
|
|
1618
1609
|
SearchParamDefaults.range_modification_start.to_pydantic_field()
|
|
1619
1610
|
)
|
|
1620
|
-
range_modification_end:
|
|
1611
|
+
range_modification_end: DateTime | None = (
|
|
1621
1612
|
SearchParamDefaults.range_modification_end.to_pydantic_field()
|
|
1622
1613
|
)
|
|
1623
1614
|
show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
|
|
1624
1615
|
field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
|
|
1625
1616
|
extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
|
|
1626
|
-
context:
|
|
1627
|
-
chat_history:
|
|
1628
|
-
|
|
1629
|
-
)
|
|
1630
|
-
extra_context: Optional[list[str]] = Field(
|
|
1617
|
+
context: list[ChatContextMessage] | None = SearchParamDefaults.chat_context.to_pydantic_field()
|
|
1618
|
+
chat_history: list[ChatContextMessage] | None = SearchParamDefaults.chat_history.to_pydantic_field()
|
|
1619
|
+
extra_context: list[str] | None = Field(
|
|
1631
1620
|
default=None,
|
|
1632
1621
|
title="Extra query context",
|
|
1633
1622
|
description="""Additional context that is added to the retrieval context sent to the LLM.
|
|
1634
1623
|
It allows extending the chat feature with content that may not be in the Knowledge Box.""",
|
|
1635
1624
|
)
|
|
1636
|
-
extra_context_images:
|
|
1625
|
+
extra_context_images: list[Image] | None = Field(
|
|
1637
1626
|
default=None,
|
|
1638
1627
|
title="Extra query context images",
|
|
1639
1628
|
description="""Additional images added to the retrieval context sent to the LLM."
|
|
1640
1629
|
It allows extending the chat feature with content that may not be in the Knowledge Box.""",
|
|
1641
1630
|
)
|
|
1642
|
-
query_image:
|
|
1631
|
+
query_image: Image | None = Field(
|
|
1643
1632
|
default=None,
|
|
1644
1633
|
title="Query image",
|
|
1645
1634
|
description="Image that will be used together with the query text for retrieval and then sent to the LLM as part of the context. "
|
|
@@ -1652,27 +1641,27 @@ class AskRequest(AuditMetadataBase):
|
|
|
1652
1641
|
|
|
1653
1642
|
highlight: bool = SearchParamDefaults.highlight.to_pydantic_field()
|
|
1654
1643
|
resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
|
|
1655
|
-
prompt:
|
|
1644
|
+
prompt: str | CustomPrompt | None = Field(
|
|
1656
1645
|
default=None,
|
|
1657
1646
|
title="Prompts",
|
|
1658
|
-
description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
|
|
1647
|
+
description="Use to customize the prompts given to the generative model. Both system and user prompts can be customized. If a string is provided, it is interpreted as the user prompt.",
|
|
1659
1648
|
)
|
|
1660
|
-
rank_fusion:
|
|
1661
|
-
reranker:
|
|
1662
|
-
citations:
|
|
1649
|
+
rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
|
|
1650
|
+
reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
|
|
1651
|
+
citations: bool | None | CitationsType = Field(
|
|
1663
1652
|
default=None,
|
|
1664
1653
|
description="Whether to include citations in the response. "
|
|
1665
1654
|
"If set to None or False, no citations will be computed. "
|
|
1666
1655
|
"If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
|
|
1667
1656
|
"If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
|
|
1668
1657
|
)
|
|
1669
|
-
citation_threshold:
|
|
1658
|
+
citation_threshold: float | None = Field(
|
|
1670
1659
|
default=None,
|
|
1671
1660
|
description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
|
|
1672
1661
|
ge=0.0,
|
|
1673
1662
|
le=1.0,
|
|
1674
1663
|
)
|
|
1675
|
-
security:
|
|
1664
|
+
security: RequestSecurity | None = SearchParamDefaults.security.to_pydantic_field()
|
|
1676
1665
|
show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
|
|
1677
1666
|
rag_strategies: list[RagStrategies] = Field(
|
|
1678
1667
|
default=[],
|
|
@@ -1737,21 +1726,21 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1737
1726
|
)
|
|
1738
1727
|
debug: bool = SearchParamDefaults.debug.to_pydantic_field()
|
|
1739
1728
|
|
|
1740
|
-
generative_model:
|
|
1729
|
+
generative_model: str | None = Field(
|
|
1741
1730
|
default=None,
|
|
1742
1731
|
title="Generative model",
|
|
1743
|
-
description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1732
|
+
description="The generative model to use for the chat endpoint. If not provided, the model configured for the Knowledge Box is used.",
|
|
1744
1733
|
)
|
|
1745
|
-
generative_model_seed:
|
|
1734
|
+
generative_model_seed: int | None = Field(
|
|
1746
1735
|
default=None,
|
|
1747
1736
|
title="Seed for the generative model",
|
|
1748
1737
|
description="The seed to use for the generative model for deterministic generation. Only supported by some models.",
|
|
1749
1738
|
)
|
|
1750
1739
|
|
|
1751
|
-
max_tokens:
|
|
1740
|
+
max_tokens: int | MaxTokens | None = Field(
|
|
1752
1741
|
default=None,
|
|
1753
1742
|
title="Maximum LLM tokens to use for the request",
|
|
1754
|
-
description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
|
|
1743
|
+
description="Use to limit the amount of tokens used in the LLM context and/or for generating the answer. If not provided, the default maximum tokens of the generative model will be used. If an integer is provided, it is interpreted as the maximum tokens for the answer.",
|
|
1755
1744
|
)
|
|
1756
1745
|
|
|
1757
1746
|
rephrase: bool = Field(
|
|
@@ -1760,7 +1749,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1760
1749
|
"Rephrase the query for a more efficient retrieval. This will consume LLM tokens and make the request slower."
|
|
1761
1750
|
),
|
|
1762
1751
|
)
|
|
1763
|
-
chat_history_relevance_threshold:
|
|
1752
|
+
chat_history_relevance_threshold: float | None = Field(
|
|
1764
1753
|
default=None,
|
|
1765
1754
|
ge=0.0,
|
|
1766
1755
|
le=1.0,
|
|
@@ -1778,7 +1767,7 @@ If empty, the default strategy is used, which simply adds the text of the matchi
|
|
|
1778
1767
|
description="If set to true, the response will be in markdown format",
|
|
1779
1768
|
)
|
|
1780
1769
|
|
|
1781
|
-
answer_json_schema:
|
|
1770
|
+
answer_json_schema: dict[str, Any] | None = Field(
|
|
1782
1771
|
default=None,
|
|
1783
1772
|
title="Answer JSON schema",
|
|
1784
1773
|
description="""Desired JSON schema for the LLM answer.
|
|
@@ -1794,13 +1783,14 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
|
|
|
1794
1783
|
description="Whether to generate an answer using the generative model. If set to false, the response will only contain the retrieval results.",
|
|
1795
1784
|
)
|
|
1796
1785
|
|
|
1797
|
-
search_configuration:
|
|
1786
|
+
search_configuration: str | None = Field(
|
|
1798
1787
|
default=None,
|
|
1799
1788
|
description="Load ask parameters from this configuration. Parameters in the request override parameters from the configuration.",
|
|
1800
1789
|
)
|
|
1801
1790
|
|
|
1802
|
-
reasoning:
|
|
1791
|
+
reasoning: Reasoning | bool = Field(
|
|
1803
1792
|
default=False,
|
|
1793
|
+
title="Reasoning options",
|
|
1804
1794
|
description=(
|
|
1805
1795
|
"Reasoning options for the generative model. "
|
|
1806
1796
|
"Set to True to enable default reasoning, False to disable, or provide a Reasoning object for custom options."
|
|
@@ -1881,8 +1871,8 @@ class SummarizeModel(BaseModel):
|
|
|
1881
1871
|
"""
|
|
1882
1872
|
|
|
1883
1873
|
resources: dict[str, SummarizeResourceModel] = {}
|
|
1884
|
-
generative_model:
|
|
1885
|
-
user_prompt:
|
|
1874
|
+
generative_model: str | None = None
|
|
1875
|
+
user_prompt: str | None = None
|
|
1886
1876
|
summary_kind: SummaryKind = SummaryKind.SIMPLE
|
|
1887
1877
|
|
|
1888
1878
|
|
|
@@ -1891,13 +1881,13 @@ class SummarizeRequest(BaseModel):
|
|
|
1891
1881
|
Model for the request payload of the summarize endpoint
|
|
1892
1882
|
"""
|
|
1893
1883
|
|
|
1894
|
-
generative_model:
|
|
1884
|
+
generative_model: str | None = Field(
|
|
1895
1885
|
default=None,
|
|
1896
1886
|
title="Generative model",
|
|
1897
|
-
description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
|
|
1887
|
+
description="The generative model to use for the summarization. If not provided, the model configured for the Knowledge Box is used.",
|
|
1898
1888
|
)
|
|
1899
1889
|
|
|
1900
|
-
user_prompt:
|
|
1890
|
+
user_prompt: str | None = Field(
|
|
1901
1891
|
default=None,
|
|
1902
1892
|
title="User prompt",
|
|
1903
1893
|
description="Optional custom prompt input by the user",
|
|
@@ -1908,7 +1898,7 @@ class SummarizeRequest(BaseModel):
|
|
|
1908
1898
|
min_length=1,
|
|
1909
1899
|
max_length=100,
|
|
1910
1900
|
title="Resources",
|
|
1911
|
-
description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
|
|
1901
|
+
description="Uids or slugs of the resources to summarize. If the resources are not found, they will be ignored.",
|
|
1912
1902
|
)
|
|
1913
1903
|
|
|
1914
1904
|
summary_kind: SummaryKind = Field(
|
|
@@ -1934,20 +1924,20 @@ class SummarizedResponse(BaseModel):
|
|
|
1934
1924
|
title="Summary",
|
|
1935
1925
|
description="Global summary of all resources combined.",
|
|
1936
1926
|
)
|
|
1937
|
-
consumption:
|
|
1927
|
+
consumption: Consumption | None = None
|
|
1938
1928
|
|
|
1939
1929
|
|
|
1940
1930
|
class KnowledgeGraphEntity(BaseModel):
|
|
1941
1931
|
name: str
|
|
1942
|
-
type:
|
|
1943
|
-
subtype:
|
|
1932
|
+
type: RelationNodeType | None = None
|
|
1933
|
+
subtype: str | None = None
|
|
1944
1934
|
|
|
1945
1935
|
|
|
1946
1936
|
class FindRequest(BaseSearchRequest):
|
|
1947
|
-
query_entities: SkipJsonSchema[
|
|
1937
|
+
query_entities: SkipJsonSchema[list[KnowledgeGraphEntity] | None] = Field(
|
|
1948
1938
|
default=None, title="Query entities", description="Entities to use in a knowledge graph search"
|
|
1949
1939
|
)
|
|
1950
|
-
graph_query:
|
|
1940
|
+
graph_query: GraphPathQuery | None = Field(
|
|
1951
1941
|
default=None,
|
|
1952
1942
|
title="Graph query",
|
|
1953
1943
|
description="Query for the knowledge graph. Paths (node-relation-node) extracted from a paragraph_id will be used to extend the results",
|
|
@@ -1958,17 +1948,17 @@ class FindRequest(BaseSearchRequest):
|
|
|
1958
1948
|
FindOptions.SEMANTIC,
|
|
1959
1949
|
]
|
|
1960
1950
|
)
|
|
1961
|
-
rank_fusion:
|
|
1962
|
-
reranker:
|
|
1951
|
+
rank_fusion: RankFusionName | RankFusion = SearchParamDefaults.rank_fusion.to_pydantic_field()
|
|
1952
|
+
reranker: RerankerName | Reranker = SearchParamDefaults.reranker.to_pydantic_field()
|
|
1963
1953
|
|
|
1964
|
-
keyword_filters:
|
|
1954
|
+
keyword_filters: list[str] | list[Filter] = Field(
|
|
1965
1955
|
default=[],
|
|
1966
1956
|
title="Keyword filters",
|
|
1967
1957
|
description=(
|
|
1968
1958
|
"List of keyword filter expressions to apply to the retrieval step. "
|
|
1969
1959
|
"The text block search will only be performed on the documents that contain the specified keywords. "
|
|
1970
1960
|
"The filters are case-insensitive, and only alphanumeric characters and spaces are allowed. "
|
|
1971
|
-
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1961
|
+
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters"
|
|
1972
1962
|
),
|
|
1973
1963
|
examples=[
|
|
1974
1964
|
["NLP", "BERT"],
|
|
@@ -1977,11 +1967,11 @@ class FindRequest(BaseSearchRequest):
|
|
|
1977
1967
|
],
|
|
1978
1968
|
)
|
|
1979
1969
|
|
|
1980
|
-
search_configuration:
|
|
1970
|
+
search_configuration: str | None = Field(
|
|
1981
1971
|
default=None,
|
|
1982
1972
|
description="Load find parameters from this configuration. Parameters in the request override parameters from the configuration.",
|
|
1983
1973
|
)
|
|
1984
|
-
generative_model:
|
|
1974
|
+
generative_model: str | None = Field(
|
|
1985
1975
|
default=None,
|
|
1986
1976
|
title="Generative model",
|
|
1987
1977
|
description="The generative model used to rephrase the query. If not provided, the model configured for the Knowledge Box is used.",
|
|
@@ -2015,9 +2005,9 @@ class SCORE_TYPE(str, Enum):
|
|
|
2015
2005
|
|
|
2016
2006
|
|
|
2017
2007
|
class FindTextPosition(BaseModel):
|
|
2018
|
-
page_number:
|
|
2019
|
-
start_seconds:
|
|
2020
|
-
end_seconds:
|
|
2008
|
+
page_number: int | None = None
|
|
2009
|
+
start_seconds: list[int] | None = None
|
|
2010
|
+
end_seconds: list[int] | None = None
|
|
2021
2011
|
index: int
|
|
2022
2012
|
start: int
|
|
2023
2013
|
end: int
|
|
@@ -2029,15 +2019,15 @@ class FindParagraph(BaseModel):
|
|
|
2029
2019
|
order: int = Field(default=0, ge=0)
|
|
2030
2020
|
text: str
|
|
2031
2021
|
id: str
|
|
2032
|
-
labels:
|
|
2033
|
-
position:
|
|
2022
|
+
labels: list[str] | None = []
|
|
2023
|
+
position: TextPosition | None = None
|
|
2034
2024
|
fuzzy_result: bool = False
|
|
2035
2025
|
page_with_visual: bool = Field(
|
|
2036
2026
|
default=False,
|
|
2037
2027
|
title="Page where this paragraph belongs is a visual page",
|
|
2038
2028
|
description="This flag informs if the page may have information that has not been extracted",
|
|
2039
2029
|
)
|
|
2040
|
-
reference:
|
|
2030
|
+
reference: str | None = Field(
|
|
2041
2031
|
default=None,
|
|
2042
2032
|
title="Reference to the image that represents this text",
|
|
2043
2033
|
description="Reference to the extracted image that represents this paragraph",
|
|
@@ -2047,7 +2037,7 @@ class FindParagraph(BaseModel):
|
|
|
2047
2037
|
title="Is a table",
|
|
2048
2038
|
description="The referenced image of the paragraph is a table",
|
|
2049
2039
|
)
|
|
2050
|
-
relevant_relations:
|
|
2040
|
+
relevant_relations: Relations | None = Field(
|
|
2051
2041
|
default=None,
|
|
2052
2042
|
title="Relevant relations",
|
|
2053
2043
|
description="Relevant relations from which the paragraph was found, will only be filled if using the Graph RAG Strategy",
|
|
@@ -2062,17 +2052,19 @@ class FindResource(Resource):
|
|
|
2062
2052
|
fields: dict[str, FindField]
|
|
2063
2053
|
|
|
2064
2054
|
def updated_from(self, origin: Resource):
|
|
2055
|
+
find_resource_model_fields = self.model_fields.keys()
|
|
2065
2056
|
for key in origin.model_fields.keys():
|
|
2066
|
-
|
|
2057
|
+
if key in find_resource_model_fields:
|
|
2058
|
+
self.__setattr__(key, getattr(origin, key))
|
|
2067
2059
|
|
|
2068
2060
|
|
|
2069
2061
|
class KnowledgeboxFindResults(JsonBaseModel):
|
|
2070
2062
|
"""Find on knowledgebox results"""
|
|
2071
2063
|
|
|
2072
2064
|
resources: dict[str, FindResource]
|
|
2073
|
-
relations:
|
|
2074
|
-
query:
|
|
2075
|
-
rephrased_query:
|
|
2065
|
+
relations: Relations | None = None
|
|
2066
|
+
query: str | None = Field(default=None, title="Find Results Query")
|
|
2067
|
+
rephrased_query: str | None = None
|
|
2076
2068
|
total: int = 0
|
|
2077
2069
|
page_number: int = Field(
|
|
2078
2070
|
default=0,
|
|
@@ -2086,18 +2078,18 @@ class KnowledgeboxFindResults(JsonBaseModel):
|
|
|
2086
2078
|
default=False,
|
|
2087
2079
|
description="Pagination will be deprecated, please, refer to `top_k` in the request",
|
|
2088
2080
|
)
|
|
2089
|
-
nodes:
|
|
2081
|
+
nodes: list[dict[str, str]] | None = Field(
|
|
2090
2082
|
default=None,
|
|
2091
2083
|
title="Nodes",
|
|
2092
2084
|
description="List of nodes queried in the search",
|
|
2093
2085
|
)
|
|
2094
|
-
shards:
|
|
2086
|
+
shards: list[str] | None = Field(
|
|
2095
2087
|
default=None,
|
|
2096
2088
|
title="Shards",
|
|
2097
2089
|
description="The list of shard replica ids used for the search.",
|
|
2098
2090
|
)
|
|
2099
2091
|
autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
|
|
2100
|
-
min_score:
|
|
2092
|
+
min_score: float | MinScore | None = Field(
|
|
2101
2093
|
default=MinScore(),
|
|
2102
2094
|
title="Minimum result score",
|
|
2103
2095
|
description="The minimum scores that have been used for the search operation.",
|
|
@@ -2105,9 +2097,9 @@ class KnowledgeboxFindResults(JsonBaseModel):
|
|
|
2105
2097
|
best_matches: list[str] = Field(
|
|
2106
2098
|
default=[],
|
|
2107
2099
|
title="Best matches",
|
|
2108
|
-
description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
|
|
2100
|
+
description="List of ids of best matching paragraphs. The list is sorted by decreasing relevance (most relevant first).",
|
|
2109
2101
|
)
|
|
2110
|
-
metrics:
|
|
2102
|
+
metrics: dict[str, Any] | None = Field(
|
|
2111
2103
|
default=None,
|
|
2112
2104
|
title="Metrics",
|
|
2113
2105
|
description=(
|
|
@@ -2125,15 +2117,15 @@ class FeedbackTasks(str, Enum):
|
|
|
2125
2117
|
class FeedbackRequest(BaseModel):
|
|
2126
2118
|
ident: str = Field(
|
|
2127
2119
|
title="Request identifier",
|
|
2128
|
-
description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
|
|
2120
|
+
description="Id of the request to provide feedback for. This id is returned in the response header `Nuclia-Learning-Id` of the chat endpoint.",
|
|
2129
2121
|
)
|
|
2130
2122
|
good: bool = Field(title="Good", description="Whether the result was good or not")
|
|
2131
2123
|
task: FeedbackTasks = Field(
|
|
2132
2124
|
title="Task",
|
|
2133
2125
|
description="The task the feedback is for. For now, only `CHAT` task is available",
|
|
2134
2126
|
)
|
|
2135
|
-
feedback:
|
|
2136
|
-
text_block_id:
|
|
2127
|
+
feedback: str | None = Field(None, title="Feedback", description="Feedback text")
|
|
2128
|
+
text_block_id: str | None = Field(None, title="Text block", description="Text block id")
|
|
2137
2129
|
|
|
2138
2130
|
|
|
2139
2131
|
def validate_facets(facets):
|
|
@@ -2184,13 +2176,11 @@ class AugmentedTextBlock(BaseModel):
|
|
|
2184
2176
|
text: str = Field(
|
|
2185
2177
|
description="The text of the augmented text block. It may include additional metadata to enrich the context"
|
|
2186
2178
|
)
|
|
2187
|
-
position:
|
|
2179
|
+
position: TextPosition | None = Field(
|
|
2188
2180
|
default=None,
|
|
2189
2181
|
description="Metadata about the position of the text block in the original document.",
|
|
2190
2182
|
)
|
|
2191
|
-
parent:
|
|
2192
|
-
default=None, description="The parent text block that was augmented for."
|
|
2193
|
-
)
|
|
2183
|
+
parent: str | None = Field(default=None, description="The parent text block that was augmented for.")
|
|
2194
2184
|
augmentation_type: TextBlockAugmentationType = Field(description="Type of augmentation.")
|
|
2195
2185
|
|
|
2196
2186
|
|
|
@@ -2214,12 +2204,12 @@ class AskTokens(BaseModel):
|
|
|
2214
2204
|
title="Output tokens",
|
|
2215
2205
|
description="Number of LLM tokens used for the answer",
|
|
2216
2206
|
)
|
|
2217
|
-
input_nuclia:
|
|
2207
|
+
input_nuclia: float | None = Field(
|
|
2218
2208
|
title="Input Nuclia tokens",
|
|
2219
2209
|
description="Number of Nuclia LLM tokens used for the context in the query",
|
|
2220
2210
|
default=None,
|
|
2221
2211
|
)
|
|
2222
|
-
output_nuclia:
|
|
2212
|
+
output_nuclia: float | None = Field(
|
|
2223
2213
|
title="Output Nuclia tokens",
|
|
2224
2214
|
description="Number of Nuclia LLM tokens used for the answer",
|
|
2225
2215
|
default=None,
|
|
@@ -2227,12 +2217,12 @@ class AskTokens(BaseModel):
|
|
|
2227
2217
|
|
|
2228
2218
|
|
|
2229
2219
|
class AskTimings(BaseModel):
|
|
2230
|
-
generative_first_chunk:
|
|
2220
|
+
generative_first_chunk: float | None = Field(
|
|
2231
2221
|
default=None,
|
|
2232
2222
|
title="Generative first chunk",
|
|
2233
2223
|
description="Time the LLM took to generate the first chunk of the answer",
|
|
2234
2224
|
)
|
|
2235
|
-
generative_total:
|
|
2225
|
+
generative_total: float | None = Field(
|
|
2236
2226
|
default=None,
|
|
2237
2227
|
title="Generative total",
|
|
2238
2228
|
description="Total time the LLM took to generate the answer",
|
|
@@ -2240,12 +2230,12 @@ class AskTimings(BaseModel):
|
|
|
2240
2230
|
|
|
2241
2231
|
|
|
2242
2232
|
class SyncAskMetadata(BaseModel):
|
|
2243
|
-
tokens:
|
|
2233
|
+
tokens: AskTokens | None = Field(
|
|
2244
2234
|
default=None,
|
|
2245
2235
|
title="Tokens",
|
|
2246
2236
|
description="Number of tokens used in the LLM context and answer",
|
|
2247
2237
|
)
|
|
2248
|
-
timings:
|
|
2238
|
+
timings: AskTimings | None = Field(
|
|
2249
2239
|
default=None,
|
|
2250
2240
|
title="Timings",
|
|
2251
2241
|
description="Timings of the generative model",
|
|
@@ -2264,19 +2254,19 @@ class SyncAskResponse(BaseModel):
|
|
|
2264
2254
|
title="Answer",
|
|
2265
2255
|
description="The generative answer to the query",
|
|
2266
2256
|
)
|
|
2267
|
-
reasoning:
|
|
2257
|
+
reasoning: str | None = Field(
|
|
2268
2258
|
default=None,
|
|
2269
|
-
title="Reasoning",
|
|
2270
|
-
description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
|
|
2259
|
+
title="Reasoning steps",
|
|
2260
|
+
description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.",
|
|
2271
2261
|
)
|
|
2272
|
-
answer_json:
|
|
2262
|
+
answer_json: dict[str, Any] | None = Field(
|
|
2273
2263
|
default=None,
|
|
2274
2264
|
title="Answer JSON",
|
|
2275
|
-
description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
|
|
2265
|
+
description="The generative JSON answer to the query. This is returned only if the answer_json_schema parameter is provided in the request.",
|
|
2276
2266
|
)
|
|
2277
2267
|
status: str = Field(
|
|
2278
2268
|
title="Status",
|
|
2279
|
-
description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
|
|
2269
|
+
description="The status of the query execution. It can be 'success', 'error', 'no_context' or 'no_retrieval_data'",
|
|
2280
2270
|
)
|
|
2281
2271
|
retrieval_results: KnowledgeboxFindResults = Field(
|
|
2282
2272
|
title="Retrieval results",
|
|
@@ -2287,7 +2277,7 @@ class SyncAskResponse(BaseModel):
|
|
|
2287
2277
|
title="Retrieval best matches",
|
|
2288
2278
|
description="Sorted list of best matching text blocks in the retrieval step. This includes the main query and prequeries results, if any.",
|
|
2289
2279
|
)
|
|
2290
|
-
prequeries:
|
|
2280
|
+
prequeries: dict[str, KnowledgeboxFindResults] | None = Field(
|
|
2291
2281
|
default=None,
|
|
2292
2282
|
title="Prequeries",
|
|
2293
2283
|
description="The retrieval results of the prequeries",
|
|
@@ -2295,9 +2285,9 @@ class SyncAskResponse(BaseModel):
|
|
|
2295
2285
|
learning_id: str = Field(
|
|
2296
2286
|
default="",
|
|
2297
2287
|
title="Learning id",
|
|
2298
|
-
description="The id of the learning request. This id can be used to provide feedback on the learning process.",
|
|
2288
|
+
description="The id of the learning request. This id can be used to provide feedback on the learning process.",
|
|
2299
2289
|
)
|
|
2300
|
-
relations:
|
|
2290
|
+
relations: Relations | None = Field(
|
|
2301
2291
|
default=None,
|
|
2302
2292
|
title="Relations",
|
|
2303
2293
|
description="The detected relations of the answer",
|
|
@@ -2312,29 +2302,29 @@ class SyncAskResponse(BaseModel):
|
|
|
2312
2302
|
title="Citation footnote to context",
|
|
2313
2303
|
description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)""",
|
|
2314
2304
|
)
|
|
2315
|
-
augmented_context:
|
|
2305
|
+
augmented_context: AugmentedContext | None = Field(
|
|
2316
2306
|
default=None,
|
|
2317
2307
|
description=(
|
|
2318
2308
|
"Augmented text blocks that were sent to the LLM as part of the RAG strategies "
|
|
2319
2309
|
"applied on the retrieval results in the request."
|
|
2320
2310
|
),
|
|
2321
2311
|
)
|
|
2322
|
-
prompt_context:
|
|
2312
|
+
prompt_context: list[str] | None = Field(
|
|
2323
2313
|
default=None,
|
|
2324
2314
|
title="Prompt context",
|
|
2325
2315
|
description="The prompt context used to generate the answer. Returned only if the debug flag is set to true",
|
|
2326
2316
|
)
|
|
2327
|
-
predict_request:
|
|
2317
|
+
predict_request: dict[str, Any] | None = Field(
|
|
2328
2318
|
default=None,
|
|
2329
2319
|
title="Predict request",
|
|
2330
2320
|
description="The internal predict request used to generate the answer. Returned only if the debug flag is set to true",
|
|
2331
2321
|
)
|
|
2332
|
-
metadata:
|
|
2322
|
+
metadata: SyncAskMetadata | None = Field(
|
|
2333
2323
|
default=None,
|
|
2334
2324
|
title="Metadata",
|
|
2335
|
-
description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
|
|
2325
|
+
description="Metadata of the query execution. This includes the number of tokens used in the LLM context and answer, and the timings of the generative model.",
|
|
2336
2326
|
)
|
|
2337
|
-
consumption:
|
|
2327
|
+
consumption: Consumption | None = Field(
|
|
2338
2328
|
default=None,
|
|
2339
2329
|
title="Consumption",
|
|
2340
2330
|
description=(
|
|
@@ -2342,12 +2332,12 @@ class SyncAskResponse(BaseModel):
|
|
|
2342
2332
|
" 'X-show-consumption' header is set to true in the request."
|
|
2343
2333
|
),
|
|
2344
2334
|
)
|
|
2345
|
-
error_details:
|
|
2335
|
+
error_details: str | None = Field(
|
|
2346
2336
|
default=None,
|
|
2347
2337
|
title="Error details",
|
|
2348
2338
|
description="Error details message in case there was an error",
|
|
2349
2339
|
)
|
|
2350
|
-
debug:
|
|
2340
|
+
debug: dict[str, Any] | None = Field(
|
|
2351
2341
|
default=None,
|
|
2352
2342
|
title="Debug information",
|
|
2353
2343
|
description=(
|
|
@@ -2437,7 +2427,7 @@ class StatusAskResponseItem(BaseModel):
|
|
|
2437
2427
|
type: Literal["status"] = "status"
|
|
2438
2428
|
code: str
|
|
2439
2429
|
status: str
|
|
2440
|
-
details:
|
|
2430
|
+
details: str | None = None
|
|
2441
2431
|
|
|
2442
2432
|
|
|
2443
2433
|
class ErrorAskResponseItem(BaseModel):
|
|
@@ -2456,22 +2446,22 @@ class DebugAskResponseItem(BaseModel):
|
|
|
2456
2446
|
metrics: dict[str, Any]
|
|
2457
2447
|
|
|
2458
2448
|
|
|
2459
|
-
AskResponseItemType =
|
|
2460
|
-
AnswerAskResponseItem
|
|
2461
|
-
ReasoningAskResponseItem
|
|
2462
|
-
JSONAskResponseItem
|
|
2463
|
-
MetadataAskResponseItem
|
|
2464
|
-
AugmentedContextResponseItem
|
|
2465
|
-
CitationsAskResponseItem
|
|
2466
|
-
FootnoteCitationsAskResponseItem
|
|
2467
|
-
StatusAskResponseItem
|
|
2468
|
-
ErrorAskResponseItem
|
|
2469
|
-
RetrievalAskResponseItem
|
|
2470
|
-
RelationsAskResponseItem
|
|
2471
|
-
DebugAskResponseItem
|
|
2472
|
-
PrequeriesAskResponseItem
|
|
2473
|
-
ConsumptionResponseItem
|
|
2474
|
-
|
|
2449
|
+
AskResponseItemType = (
|
|
2450
|
+
AnswerAskResponseItem
|
|
2451
|
+
| ReasoningAskResponseItem
|
|
2452
|
+
| JSONAskResponseItem
|
|
2453
|
+
| MetadataAskResponseItem
|
|
2454
|
+
| AugmentedContextResponseItem
|
|
2455
|
+
| CitationsAskResponseItem
|
|
2456
|
+
| FootnoteCitationsAskResponseItem
|
|
2457
|
+
| StatusAskResponseItem
|
|
2458
|
+
| ErrorAskResponseItem
|
|
2459
|
+
| RetrievalAskResponseItem
|
|
2460
|
+
| RelationsAskResponseItem
|
|
2461
|
+
| DebugAskResponseItem
|
|
2462
|
+
| PrequeriesAskResponseItem
|
|
2463
|
+
| ConsumptionResponseItem
|
|
2464
|
+
)
|
|
2475
2465
|
|
|
2476
2466
|
|
|
2477
2467
|
class AskResponseItem(BaseModel):
|
|
@@ -2491,7 +2481,7 @@ def parse_custom_prompt(item: AskRequest) -> CustomPrompt:
|
|
|
2491
2481
|
return prompt
|
|
2492
2482
|
|
|
2493
2483
|
|
|
2494
|
-
def parse_rephrase_prompt(item: AskRequest) ->
|
|
2484
|
+
def parse_rephrase_prompt(item: AskRequest) -> str | None:
|
|
2495
2485
|
prompt = parse_custom_prompt(item)
|
|
2496
2486
|
return prompt.rephrase
|
|
2497
2487
|
|
|
@@ -2502,7 +2492,7 @@ FindRequest.model_rebuild()
|
|
|
2502
2492
|
|
|
2503
2493
|
class CatalogFacetsPrefix(BaseModel):
|
|
2504
2494
|
prefix: str = Field(pattern="^((/[^/]+)*)$")
|
|
2505
|
-
depth:
|
|
2495
|
+
depth: int | None = Field(
|
|
2506
2496
|
default=None,
|
|
2507
2497
|
ge=0,
|
|
2508
2498
|
description="Only include facets up to this depth from the prefix, leave empty to include all depths",
|