nucliadb-models 6.8.1.post4983__py3-none-any.whl → 6.10.0.post5694__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb-models might be problematic. Click here for more details.

Files changed (34) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +359 -0
  3. nucliadb_models/common.py +66 -57
  4. nucliadb_models/configuration.py +9 -9
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +30 -29
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +5 -20
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +78 -74
  12. nucliadb_models/graph/requests.py +38 -47
  13. nucliadb_models/hydration.py +423 -0
  14. nucliadb_models/internal/predict.py +7 -9
  15. nucliadb_models/internal/shards.py +2 -3
  16. nucliadb_models/labels.py +18 -11
  17. nucliadb_models/link.py +18 -19
  18. nucliadb_models/metadata.py +80 -53
  19. nucliadb_models/notifications.py +3 -3
  20. nucliadb_models/processing.py +1 -2
  21. nucliadb_models/resource.py +85 -102
  22. nucliadb_models/retrieval.py +147 -0
  23. nucliadb_models/search.py +360 -306
  24. nucliadb_models/security.py +2 -3
  25. nucliadb_models/text.py +7 -8
  26. nucliadb_models/trainset.py +1 -2
  27. nucliadb_models/utils.py +2 -3
  28. nucliadb_models/vectors.py +2 -5
  29. nucliadb_models/writer.py +56 -57
  30. {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/METADATA +2 -3
  31. nucliadb_models-6.10.0.post5694.dist-info/RECORD +41 -0
  32. nucliadb_models-6.8.1.post4983.dist-info/RECORD +0 -38
  33. {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/WHEEL +0 -0
  34. {nucliadb_models-6.8.1.post4983.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,147 @@
1
+ # Copyright 2025 Bosutech XXI S.L.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ from enum import Enum
16
+ from typing import Literal
17
+
18
+ from pydantic import BaseModel, Field
19
+
20
+ from nucliadb_models.filters import FilterExpression
21
+ from nucliadb_models.graph.requests import GraphPathQuery
22
+ from nucliadb_models.search import RankFusion, RankFusionName, SearchParamDefaults
23
+ from nucliadb_models.security import RequestSecurity
24
+
25
+
26
+ class KeywordQuery(BaseModel):
27
+ query: str
28
+ min_score: float = 0.0
29
+ with_synonyms: bool = False
30
+
31
+
32
+ class SemanticQuery(BaseModel):
33
+ query: list[float]
34
+ vectorset: str
35
+ min_score: float = -1.0
36
+
37
+
38
+ class GraphQuery(BaseModel):
39
+ query: GraphPathQuery
40
+
41
+
42
+ class Query(BaseModel):
43
+ keyword: KeywordQuery | None = None
44
+ semantic: SemanticQuery | None = None
45
+ graph: GraphQuery | None = None
46
+
47
+
48
+ class Filters(BaseModel):
49
+ filter_expression: FilterExpression | None = (
50
+ SearchParamDefaults.filter_expression.to_pydantic_field()
51
+ )
52
+ show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
53
+ security: RequestSecurity | None = None
54
+ with_duplicates: bool = False
55
+
56
+
57
+ class RetrievalRequest(BaseModel):
58
+ query: Query
59
+ top_k: int = Field(default=20, gt=0, le=500)
60
+ filters: Filters = Field(default_factory=Filters)
61
+ rank_fusion: RankFusionName | RankFusion = Field(default=RankFusionName.RECIPROCAL_RANK_FUSION)
62
+
63
+
64
+ class ScoreSource(str, Enum):
65
+ INDEX = "index"
66
+ RANK_FUSION = "rank_fusion"
67
+ RERANKER = "reranker"
68
+
69
+
70
+ class ScoreType(str, Enum):
71
+ SEMANTIC = "semantic"
72
+ KEYWORD = "keyword"
73
+ GRAPH = "graph"
74
+ RRF = "rrf"
75
+ WCOMB_SUM = "wCombSUM"
76
+ DEFAULT_RERANKER = "default_reranker"
77
+
78
+
79
+ class KeywordScore(BaseModel):
80
+ score: float
81
+ source: Literal[ScoreSource.INDEX] = ScoreSource.INDEX
82
+ type: Literal[ScoreType.KEYWORD] = ScoreType.KEYWORD
83
+
84
+
85
+ class SemanticScore(BaseModel):
86
+ score: float
87
+ source: Literal[ScoreSource.INDEX] = ScoreSource.INDEX
88
+ type: Literal[ScoreType.SEMANTIC] = ScoreType.SEMANTIC
89
+
90
+
91
+ class GraphScore(BaseModel):
92
+ score: float
93
+ source: Literal[ScoreSource.INDEX] = ScoreSource.INDEX
94
+ type: Literal[ScoreType.GRAPH] = ScoreType.GRAPH
95
+
96
+
97
+ class RrfScore(BaseModel):
98
+ score: float
99
+ source: Literal[ScoreSource.RANK_FUSION] = ScoreSource.RANK_FUSION
100
+ type: Literal[ScoreType.RRF] = ScoreType.RRF
101
+
102
+
103
+ class WeightedCombSumScore(BaseModel):
104
+ score: float
105
+ source: Literal[ScoreSource.RANK_FUSION] = ScoreSource.RANK_FUSION
106
+ type: Literal[ScoreType.WCOMB_SUM] = ScoreType.WCOMB_SUM
107
+
108
+
109
+ class RerankerScore(BaseModel):
110
+ score: float
111
+ source: Literal[ScoreSource.RERANKER] = ScoreSource.RERANKER
112
+ type: Literal[ScoreType.DEFAULT_RERANKER] = ScoreType.DEFAULT_RERANKER
113
+
114
+
115
+ Score = KeywordScore | SemanticScore | GraphScore | RrfScore | WeightedCombSumScore | RerankerScore
116
+
117
+
118
+ class Scores(BaseModel):
119
+ value: float
120
+ source: ScoreSource
121
+ type: ScoreType
122
+ history: list[Score]
123
+
124
+
125
+ class Metadata(BaseModel):
126
+ field_labels: list[str]
127
+ paragraph_labels: list[str]
128
+
129
+ is_an_image: bool
130
+ is_a_table: bool
131
+
132
+ # for extracted from visual content (ocr, inception, tables)
133
+ source_file: str | None
134
+
135
+ # for documents (pdf, docx...) only
136
+ page: int | None
137
+ in_page_with_visual: bool | None
138
+
139
+
140
+ class RetrievalMatch(BaseModel):
141
+ id: str
142
+ score: Scores
143
+ metadata: Metadata
144
+
145
+
146
+ class RetrievalResponse(BaseModel):
147
+ matches: list[RetrievalMatch]