nucliadb-models 6.9.7.post5583__py3-none-any.whl → 6.11.1.post5822__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +100 -84
  3. nucliadb_models/common.py +56 -56
  4. nucliadb_models/configuration.py +8 -8
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +25 -26
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +1 -2
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +78 -74
  12. nucliadb_models/graph/requests.py +40 -48
  13. nucliadb_models/graph/responses.py +13 -1
  14. nucliadb_models/hydration.py +48 -50
  15. nucliadb_models/internal/predict.py +7 -9
  16. nucliadb_models/internal/shards.py +2 -3
  17. nucliadb_models/labels.py +18 -11
  18. nucliadb_models/link.py +18 -19
  19. nucliadb_models/metadata.py +66 -54
  20. nucliadb_models/notifications.py +3 -3
  21. nucliadb_models/processing.py +1 -2
  22. nucliadb_models/resource.py +85 -93
  23. nucliadb_models/retrieval.py +147 -0
  24. nucliadb_models/search.py +263 -275
  25. nucliadb_models/security.py +2 -3
  26. nucliadb_models/text.py +7 -8
  27. nucliadb_models/trainset.py +1 -2
  28. nucliadb_models/utils.py +2 -3
  29. nucliadb_models/vectors.py +2 -5
  30. nucliadb_models/writer.py +56 -57
  31. {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/METADATA +1 -1
  32. nucliadb_models-6.11.1.post5822.dist-info/RECORD +41 -0
  33. {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/WHEEL +1 -1
  34. nucliadb_models-6.9.7.post5583.dist-info/RECORD +0 -40
  35. {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/top_level.txt +0 -0
@@ -49,7 +49,7 @@ class ResourceIndexed(BaseModel):
49
49
  seqid: int = Field(
50
50
  ...,
51
51
  title="Sequence ID",
52
- description="Sequence ID of the resource operation. This can be used to track completion of specific operations.", # noqa: E501
52
+ description="Sequence ID of the resource operation. This can be used to track completion of specific operations.",
53
53
  )
54
54
 
55
55
 
@@ -59,7 +59,7 @@ class ResourceWritten(BaseModel):
59
59
  seqid: int = Field(
60
60
  ...,
61
61
  title="Sequence ID",
62
- description="Sequence ID of the resource operation. This can be used to track completion of specific operations.", # noqa: E501
62
+ description="Sequence ID of the resource operation. This can be used to track completion of specific operations.",
63
63
  )
64
64
  operation: ResourceOperationType = Field(
65
65
  ..., title="Operation", description="Type of resource write operation."
@@ -77,7 +77,7 @@ class ResourceProcessed(BaseModel):
77
77
  seqid: int = Field(
78
78
  ...,
79
79
  title="Sequence ID",
80
- description="Sequence ID of the resource operation. This can be used to track completion of specific operations.", # noqa: E501
80
+ description="Sequence ID of the resource operation. This can be used to track completion of specific operations.",
81
81
  )
82
82
  ingestion_succeeded: bool = Field(
83
83
  default=True,
@@ -12,10 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional
16
15
 
17
16
  from pydantic import BaseModel
18
17
 
19
18
 
20
19
  class PushProcessingOptions(BaseModel):
21
- ml_text: Optional[bool] = True
20
+ ml_text: bool | None = True
@@ -16,7 +16,7 @@
16
16
  import string
17
17
  from datetime import datetime
18
18
  from enum import Enum
19
- from typing import Any, Dict, List, Optional, Union
19
+ from typing import Any
20
20
 
21
21
  from pydantic import BaseModel, Field, field_validator, model_validator
22
22
 
@@ -73,34 +73,34 @@ class ExtractedDataTypeName(str, Enum):
73
73
 
74
74
 
75
75
  class KnowledgeBoxConfig(BaseModel):
76
- slug: Optional[SlugString] = Field(
76
+ slug: SlugString | None = Field(
77
77
  default=None, title="Slug", description="Slug for the Knowledge Box."
78
78
  )
79
- title: Optional[str] = Field(default=None, title="Title", description="Title for the Knowledge Box.")
80
- description: Optional[str] = Field(
79
+ title: str | None = Field(default=None, title="Title", description="Title for the Knowledge Box.")
80
+ description: str | None = Field(
81
81
  default=None,
82
82
  title="Description",
83
83
  description="Description for the Knowledge Box.",
84
84
  )
85
- learning_configuration: Optional[Dict[str, Any]] = Field(
85
+ learning_configuration: dict[str, Any] | None = Field(
86
86
  default=None,
87
87
  title="Learning Configuration",
88
- description="Learning configuration for the Knowledge Box. If provided, NucliaDB will set the learning configuration for the Knowledge Box.", # noqa: E501
88
+ description="Learning configuration for the Knowledge Box. If provided, NucliaDB will set the learning configuration for the Knowledge Box.",
89
89
  )
90
90
 
91
- external_index_provider: Optional[ExternalIndexProvider] = Field(
91
+ external_index_provider: ExternalIndexProvider | None = Field(
92
92
  default=None,
93
93
  title="External Index Provider",
94
94
  description="External index provider for the Knowledge Box.",
95
95
  )
96
96
 
97
- configured_external_index_provider: Optional[dict[str, Any]] = Field(
97
+ configured_external_index_provider: dict[str, Any] | None = Field(
98
98
  default=None,
99
99
  title="Configured External Index Provider",
100
100
  description="Metadata for the configured external index provider (if any)",
101
101
  )
102
102
 
103
- similarity: Optional[VectorSimilarity] = Field(
103
+ similarity: VectorSimilarity | None = Field(
104
104
  default=None,
105
105
  description="This field is deprecated. Use 'learning_configuration' instead.",
106
106
  )
@@ -117,7 +117,7 @@ class KnowledgeBoxConfig(BaseModel):
117
117
 
118
118
  @field_validator("slug")
119
119
  @classmethod
120
- def id_check(cls, v: Optional[str]) -> Optional[str]:
120
+ def id_check(cls, v: str | None) -> str | None:
121
121
  if v is None:
122
122
  return v
123
123
 
@@ -136,7 +136,7 @@ class KnowledgeBoxConfig(BaseModel):
136
136
 
137
137
 
138
138
  class KnowledgeBoxObjSummary(BaseModel):
139
- slug: Optional[SlugString] = None
139
+ slug: SlugString | None = None
140
140
  uuid: str
141
141
 
142
142
 
@@ -149,25 +149,25 @@ class KnowledgeBoxObj(BaseModel):
149
149
  The API representation of a Knowledge Box object.
150
150
  """
151
151
 
152
- slug: Optional[SlugString] = None
152
+ slug: SlugString | None = None
153
153
  uuid: str
154
- config: Optional[KnowledgeBoxConfig] = None
155
- model: Optional[SemanticModelMetadata] = None
154
+ config: KnowledgeBoxConfig | None = None
155
+ model: SemanticModelMetadata | None = None
156
156
 
157
157
 
158
158
  class KnowledgeBoxList(BaseModel):
159
- kbs: List[KnowledgeBoxObjSummary] = []
159
+ kbs: list[KnowledgeBoxObjSummary] = []
160
160
 
161
161
 
162
162
  # Resources
163
163
 
164
164
 
165
165
  class ExtractedData(BaseModel):
166
- text: Optional[ExtractedText] = None
167
- metadata: Optional[FieldComputedMetadata] = None
168
- large_metadata: Optional[LargeComputedMetadata] = None
169
- vectors: Optional[VectorObject] = None
170
- question_answers: Optional[FieldQuestionAnswers] = None
166
+ text: ExtractedText | None = None
167
+ metadata: FieldComputedMetadata | None = None
168
+ large_metadata: LargeComputedMetadata | None = None
169
+ vectors: VectorObject | None = None
170
+ question_answers: FieldQuestionAnswers | None = None
171
171
 
172
172
 
173
173
  class TextFieldExtractedData(ExtractedData):
@@ -175,32 +175,31 @@ class TextFieldExtractedData(ExtractedData):
175
175
 
176
176
 
177
177
  class FileFieldExtractedData(ExtractedData):
178
- file: Optional[FileExtractedData] = None
178
+ file: FileExtractedData | None = None
179
179
 
180
180
 
181
181
  class LinkFieldExtractedData(ExtractedData):
182
- link: Optional[LinkExtractedData] = None
182
+ link: LinkExtractedData | None = None
183
183
 
184
184
 
185
185
  class ConversationFieldExtractedData(ExtractedData):
186
186
  pass
187
187
 
188
188
 
189
- ExtractedDataType = Optional[
190
- Union[
191
- TextFieldExtractedData,
192
- FileFieldExtractedData,
193
- LinkFieldExtractedData,
194
- ConversationFieldExtractedData,
195
- ]
196
- ]
189
+ ExtractedDataType = (
190
+ TextFieldExtractedData
191
+ | FileFieldExtractedData
192
+ | LinkFieldExtractedData
193
+ | ConversationFieldExtractedData
194
+ | None
195
+ )
197
196
 
198
197
 
199
198
  class Error(BaseModel):
200
199
  body: str
201
200
  code: int
202
201
  code_str: str
203
- created: Optional[datetime]
202
+ created: datetime | None
204
203
  severity: str
205
204
 
206
205
 
@@ -208,51 +207,51 @@ class FieldData(BaseModel): ...
208
207
 
209
208
 
210
209
  class TextFieldData(BaseModel):
211
- value: Optional[FieldText] = None
212
- extracted: Optional[TextFieldExtractedData] = None
213
- error: Optional[Error] = None
214
- status: Optional[str] = None
215
- errors: Optional[list[Error]] = None
210
+ value: FieldText | None = None
211
+ extracted: TextFieldExtractedData | None = None
212
+ error: Error | None = None
213
+ status: str | None = None
214
+ errors: list[Error] | None = None
216
215
 
217
216
 
218
217
  class FileFieldData(BaseModel):
219
- value: Optional[FieldFile] = None
220
- extracted: Optional[FileFieldExtractedData] = None
221
- error: Optional[Error] = None
222
- status: Optional[str] = None
223
- errors: Optional[list[Error]] = None
218
+ value: FieldFile | None = None
219
+ extracted: FileFieldExtractedData | None = None
220
+ error: Error | None = None
221
+ status: str | None = None
222
+ errors: list[Error] | None = None
224
223
 
225
224
 
226
225
  class LinkFieldData(BaseModel):
227
- value: Optional[FieldLink] = None
228
- extracted: Optional[LinkFieldExtractedData] = None
229
- error: Optional[Error] = None
230
- status: Optional[str] = None
231
- errors: Optional[list[Error]] = None
226
+ value: FieldLink | None = None
227
+ extracted: LinkFieldExtractedData | None = None
228
+ error: Error | None = None
229
+ status: str | None = None
230
+ errors: list[Error] | None = None
232
231
 
233
232
 
234
233
  class ConversationFieldData(BaseModel):
235
- value: Optional[FieldConversation] = None
236
- extracted: Optional[ConversationFieldExtractedData] = None
237
- error: Optional[Error] = None
238
- status: Optional[str] = None
239
- errors: Optional[list[Error]] = None
234
+ value: FieldConversation | None = None
235
+ extracted: ConversationFieldExtractedData | None = None
236
+ error: Error | None = None
237
+ status: str | None = None
238
+ errors: list[Error] | None = None
240
239
 
241
240
 
242
241
  class GenericFieldData(BaseModel):
243
- value: Optional[str] = None
244
- extracted: Optional[TextFieldExtractedData] = None
245
- error: Optional[Error] = None
246
- status: Optional[str] = None
247
- errors: Optional[list[Error]] = None
242
+ value: str | None = None
243
+ extracted: TextFieldExtractedData | None = None
244
+ error: Error | None = None
245
+ status: str | None = None
246
+ errors: list[Error] | None = None
248
247
 
249
248
 
250
249
  class ResourceData(BaseModel):
251
- texts: Optional[Dict[str, TextFieldData]] = None
252
- files: Optional[Dict[str, FileFieldData]] = None
253
- links: Optional[Dict[str, LinkFieldData]] = None
254
- conversations: Optional[Dict[str, ConversationFieldData]] = None
255
- generics: Optional[Dict[str, GenericFieldData]] = None
250
+ texts: dict[str, TextFieldData] | None = None
251
+ files: dict[str, FileFieldData] | None = None
252
+ links: dict[str, LinkFieldData] | None = None
253
+ conversations: dict[str, ConversationFieldData] | None = None
254
+ generics: dict[str, GenericFieldData] | None = None
256
255
 
257
256
 
258
257
  class QueueType(str, Enum):
@@ -264,29 +263,29 @@ class Resource(BaseModel):
264
263
  id: str
265
264
 
266
265
  # This first block of attributes correspond to Basic fields
267
- slug: Optional[str] = None
268
- title: Optional[str] = None
269
- summary: Optional[str] = None
270
- icon: Optional[str] = None
271
- thumbnail: Optional[str] = None
272
- metadata: Optional[Metadata] = None
273
- usermetadata: Optional[UserMetadata] = None
274
- fieldmetadata: Optional[List[UserFieldMetadata]] = None
275
- computedmetadata: Optional[ComputedMetadata] = None
276
- created: Optional[datetime] = None
277
- modified: Optional[datetime] = None
278
- last_seqid: Optional[int] = None
279
- last_account_seq: Optional[int] = None
280
- queue: Optional[QueueType] = None
281
- hidden: Optional[bool] = None
282
-
283
- origin: Optional[Origin] = None
284
- extra: Optional[Extra] = None
285
- relations: Optional[List[Relation]] = None
286
-
287
- data: Optional[ResourceData] = None
288
-
289
- security: Optional[ResourceSecurity] = Field(
266
+ slug: str | None = None
267
+ title: str | None = None
268
+ summary: str | None = None
269
+ icon: str | None = None
270
+ thumbnail: str | None = None
271
+ metadata: Metadata | None = None
272
+ usermetadata: UserMetadata | None = None
273
+ fieldmetadata: list[UserFieldMetadata] | None = None
274
+ computedmetadata: ComputedMetadata | None = None
275
+ created: datetime | None = None
276
+ modified: datetime | None = None
277
+ last_seqid: int | None = None
278
+ last_account_seq: int | None = None
279
+ queue: QueueType | None = None
280
+ hidden: bool | None = None
281
+
282
+ origin: Origin | None = None
283
+ extra: Extra | None = None
284
+ relations: list[Relation] | None = None
285
+
286
+ data: ResourceData | None = None
287
+
288
+ security: ResourceSecurity | None = Field(
290
289
  default=None,
291
290
  title="Security",
292
291
  description="Resource security metadata",
@@ -300,19 +299,12 @@ class ResourcePagination(BaseModel):
300
299
 
301
300
 
302
301
  class ResourceList(BaseModel):
303
- resources: List[Resource]
302
+ resources: list[Resource]
304
303
  pagination: ResourcePagination
305
304
 
306
305
 
307
306
  class ResourceField(BaseModel):
308
307
  field_type: FieldTypeName
309
308
  field_id: str
310
- value: Optional[
311
- Union[
312
- FieldText,
313
- FieldFile,
314
- FieldLink,
315
- Conversation,
316
- ]
317
- ] = None
309
+ value: FieldText | FieldFile | FieldLink | Conversation | None = None
318
310
  extracted: ExtractedDataType = None
@@ -0,0 +1,147 @@
1
+ # Copyright 2025 Bosutech XXI S.L.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ from enum import Enum
16
+ from typing import Literal
17
+
18
+ from pydantic import BaseModel, Field
19
+
20
+ from nucliadb_models.filters import FilterExpression
21
+ from nucliadb_models.graph.requests import GraphPathQuery
22
+ from nucliadb_models.search import RankFusion, RankFusionName, SearchParamDefaults
23
+ from nucliadb_models.security import RequestSecurity
24
+
25
+
26
+ class KeywordQuery(BaseModel):
27
+ query: str
28
+ min_score: float = 0.0
29
+ with_synonyms: bool = False
30
+
31
+
32
+ class SemanticQuery(BaseModel):
33
+ query: list[float]
34
+ vectorset: str
35
+ min_score: float = -1.0
36
+
37
+
38
+ class GraphQuery(BaseModel):
39
+ query: GraphPathQuery
40
+
41
+
42
+ class Query(BaseModel):
43
+ keyword: KeywordQuery | None = None
44
+ semantic: SemanticQuery | None = None
45
+ graph: GraphQuery | None = None
46
+
47
+
48
+ class Filters(BaseModel):
49
+ filter_expression: FilterExpression | None = (
50
+ SearchParamDefaults.filter_expression.to_pydantic_field()
51
+ )
52
+ show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
53
+ security: RequestSecurity | None = None
54
+ with_duplicates: bool = False
55
+
56
+
57
+ class RetrievalRequest(BaseModel):
58
+ query: Query
59
+ top_k: int = Field(default=20, gt=0, le=500)
60
+ filters: Filters = Field(default_factory=Filters)
61
+ rank_fusion: RankFusionName | RankFusion = Field(default=RankFusionName.RECIPROCAL_RANK_FUSION)
62
+
63
+
64
+ class ScoreSource(str, Enum):
65
+ INDEX = "index"
66
+ RANK_FUSION = "rank_fusion"
67
+ RERANKER = "reranker"
68
+
69
+
70
+ class ScoreType(str, Enum):
71
+ SEMANTIC = "semantic"
72
+ KEYWORD = "keyword"
73
+ GRAPH = "graph"
74
+ RRF = "rrf"
75
+ WCOMB_SUM = "wCombSUM"
76
+ DEFAULT_RERANKER = "default_reranker"
77
+
78
+
79
+ class KeywordScore(BaseModel):
80
+ score: float
81
+ source: Literal[ScoreSource.INDEX] = ScoreSource.INDEX
82
+ type: Literal[ScoreType.KEYWORD] = ScoreType.KEYWORD
83
+
84
+
85
+ class SemanticScore(BaseModel):
86
+ score: float
87
+ source: Literal[ScoreSource.INDEX] = ScoreSource.INDEX
88
+ type: Literal[ScoreType.SEMANTIC] = ScoreType.SEMANTIC
89
+
90
+
91
+ class GraphScore(BaseModel):
92
+ score: float
93
+ source: Literal[ScoreSource.INDEX] = ScoreSource.INDEX
94
+ type: Literal[ScoreType.GRAPH] = ScoreType.GRAPH
95
+
96
+
97
+ class RrfScore(BaseModel):
98
+ score: float
99
+ source: Literal[ScoreSource.RANK_FUSION] = ScoreSource.RANK_FUSION
100
+ type: Literal[ScoreType.RRF] = ScoreType.RRF
101
+
102
+
103
+ class WeightedCombSumScore(BaseModel):
104
+ score: float
105
+ source: Literal[ScoreSource.RANK_FUSION] = ScoreSource.RANK_FUSION
106
+ type: Literal[ScoreType.WCOMB_SUM] = ScoreType.WCOMB_SUM
107
+
108
+
109
+ class RerankerScore(BaseModel):
110
+ score: float
111
+ source: Literal[ScoreSource.RERANKER] = ScoreSource.RERANKER
112
+ type: Literal[ScoreType.DEFAULT_RERANKER] = ScoreType.DEFAULT_RERANKER
113
+
114
+
115
+ Score = KeywordScore | SemanticScore | GraphScore | RrfScore | WeightedCombSumScore | RerankerScore
116
+
117
+
118
+ class Scores(BaseModel):
119
+ value: float
120
+ source: ScoreSource
121
+ type: ScoreType
122
+ history: list[Score]
123
+
124
+
125
+ class Metadata(BaseModel):
126
+ field_labels: list[str]
127
+ paragraph_labels: list[str]
128
+
129
+ is_an_image: bool
130
+ is_a_table: bool
131
+
132
+ # for extracted from visual content (ocr, inception, tables)
133
+ source_file: str | None
134
+
135
+ # for documents (pdf, docx...) only
136
+ page: int | None
137
+ in_page_with_visual: bool | None
138
+
139
+
140
+ class RetrievalMatch(BaseModel):
141
+ id: str
142
+ score: Scores
143
+ metadata: Metadata
144
+
145
+
146
+ class RetrievalResponse(BaseModel):
147
+ matches: list[RetrievalMatch]