nucliadb-models 6.9.7.post5550__py3-none-any.whl → 6.10.0.post5788__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb-models might be problematic. Click here for more details.

Files changed (35) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +187 -78
  3. nucliadb_models/common.py +56 -56
  4. nucliadb_models/configuration.py +8 -8
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +25 -26
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +1 -2
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +79 -75
  12. nucliadb_models/graph/requests.py +40 -48
  13. nucliadb_models/graph/responses.py +13 -1
  14. nucliadb_models/hydration.py +48 -50
  15. nucliadb_models/internal/predict.py +7 -9
  16. nucliadb_models/internal/shards.py +2 -3
  17. nucliadb_models/labels.py +18 -11
  18. nucliadb_models/link.py +18 -19
  19. nucliadb_models/metadata.py +66 -54
  20. nucliadb_models/notifications.py +3 -3
  21. nucliadb_models/processing.py +1 -2
  22. nucliadb_models/resource.py +85 -102
  23. nucliadb_models/retrieval.py +147 -0
  24. nucliadb_models/search.py +297 -275
  25. nucliadb_models/security.py +2 -3
  26. nucliadb_models/text.py +7 -8
  27. nucliadb_models/trainset.py +1 -2
  28. nucliadb_models/utils.py +2 -3
  29. nucliadb_models/vectors.py +2 -5
  30. nucliadb_models/writer.py +56 -57
  31. {nucliadb_models-6.9.7.post5550.dist-info → nucliadb_models-6.10.0.post5788.dist-info}/METADATA +1 -1
  32. nucliadb_models-6.10.0.post5788.dist-info/RECORD +41 -0
  33. nucliadb_models-6.9.7.post5550.dist-info/RECORD +0 -40
  34. {nucliadb_models-6.9.7.post5550.dist-info → nucliadb_models-6.10.0.post5788.dist-info}/WHEEL +0 -0
  35. {nucliadb_models-6.9.7.post5550.dist-info → nucliadb_models-6.10.0.post5788.dist-info}/top_level.txt +0 -0
@@ -49,7 +49,7 @@ class ResourceIndexed(BaseModel):
49
49
  seqid: int = Field(
50
50
  ...,
51
51
  title="Sequence ID",
52
- description="Sequence ID of the resource operation. This can be used to track completion of specific operations.", # noqa: E501
52
+ description="Sequence ID of the resource operation. This can be used to track completion of specific operations.",
53
53
  )
54
54
 
55
55
 
@@ -59,7 +59,7 @@ class ResourceWritten(BaseModel):
59
59
  seqid: int = Field(
60
60
  ...,
61
61
  title="Sequence ID",
62
- description="Sequence ID of the resource operation. This can be used to track completion of specific operations.", # noqa: E501
62
+ description="Sequence ID of the resource operation. This can be used to track completion of specific operations.",
63
63
  )
64
64
  operation: ResourceOperationType = Field(
65
65
  ..., title="Operation", description="Type of resource write operation."
@@ -77,7 +77,7 @@ class ResourceProcessed(BaseModel):
77
77
  seqid: int = Field(
78
78
  ...,
79
79
  title="Sequence ID",
80
- description="Sequence ID of the resource operation. This can be used to track completion of specific operations.", # noqa: E501
80
+ description="Sequence ID of the resource operation. This can be used to track completion of specific operations.",
81
81
  )
82
82
  ingestion_succeeded: bool = Field(
83
83
  default=True,
@@ -12,10 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional
16
15
 
17
16
  from pydantic import BaseModel
18
17
 
19
18
 
20
19
  class PushProcessingOptions(BaseModel):
21
- ml_text: Optional[bool] = True
20
+ ml_text: bool | None = True
@@ -16,7 +16,7 @@
16
16
  import string
17
17
  from datetime import datetime
18
18
  from enum import Enum
19
- from typing import Any, Dict, List, Optional, Union
19
+ from typing import Any
20
20
 
21
21
  from pydantic import BaseModel, Field, field_validator, model_validator
22
22
 
@@ -72,44 +72,35 @@ class ExtractedDataTypeName(str, Enum):
72
72
  QA = "question_answers"
73
73
 
74
74
 
75
- class ReleaseChannel(str, Enum):
76
- """
77
- Deprecated. No longer used.
78
- """
79
-
80
- STABLE = "STABLE"
81
- EXPERIMENTAL = "EXPERIMENTAL"
82
-
83
-
84
75
  class KnowledgeBoxConfig(BaseModel):
85
- slug: Optional[SlugString] = Field(
76
+ slug: SlugString | None = Field(
86
77
  default=None, title="Slug", description="Slug for the Knowledge Box."
87
78
  )
88
- title: Optional[str] = Field(default=None, title="Title", description="Title for the Knowledge Box.")
89
- description: Optional[str] = Field(
79
+ title: str | None = Field(default=None, title="Title", description="Title for the Knowledge Box.")
80
+ description: str | None = Field(
90
81
  default=None,
91
82
  title="Description",
92
83
  description="Description for the Knowledge Box.",
93
84
  )
94
- learning_configuration: Optional[Dict[str, Any]] = Field(
85
+ learning_configuration: dict[str, Any] | None = Field(
95
86
  default=None,
96
87
  title="Learning Configuration",
97
- description="Learning configuration for the Knowledge Box. If provided, NucliaDB will set the learning configuration for the Knowledge Box.", # noqa: E501
88
+ description="Learning configuration for the Knowledge Box. If provided, NucliaDB will set the learning configuration for the Knowledge Box.",
98
89
  )
99
90
 
100
- external_index_provider: Optional[ExternalIndexProvider] = Field(
91
+ external_index_provider: ExternalIndexProvider | None = Field(
101
92
  default=None,
102
93
  title="External Index Provider",
103
94
  description="External index provider for the Knowledge Box.",
104
95
  )
105
96
 
106
- configured_external_index_provider: Optional[dict[str, Any]] = Field(
97
+ configured_external_index_provider: dict[str, Any] | None = Field(
107
98
  default=None,
108
99
  title="Configured External Index Provider",
109
100
  description="Metadata for the configured external index provider (if any)",
110
101
  )
111
102
 
112
- similarity: Optional[VectorSimilarity] = Field(
103
+ similarity: VectorSimilarity | None = Field(
113
104
  default=None,
114
105
  description="This field is deprecated. Use 'learning_configuration' instead.",
115
106
  )
@@ -126,7 +117,7 @@ class KnowledgeBoxConfig(BaseModel):
126
117
 
127
118
  @field_validator("slug")
128
119
  @classmethod
129
- def id_check(cls, v: Optional[str]) -> Optional[str]:
120
+ def id_check(cls, v: str | None) -> str | None:
130
121
  if v is None:
131
122
  return v
132
123
 
@@ -145,7 +136,7 @@ class KnowledgeBoxConfig(BaseModel):
145
136
 
146
137
 
147
138
  class KnowledgeBoxObjSummary(BaseModel):
148
- slug: Optional[SlugString] = None
139
+ slug: SlugString | None = None
149
140
  uuid: str
150
141
 
151
142
 
@@ -158,25 +149,25 @@ class KnowledgeBoxObj(BaseModel):
158
149
  The API representation of a Knowledge Box object.
159
150
  """
160
151
 
161
- slug: Optional[SlugString] = None
152
+ slug: SlugString | None = None
162
153
  uuid: str
163
- config: Optional[KnowledgeBoxConfig] = None
164
- model: Optional[SemanticModelMetadata] = None
154
+ config: KnowledgeBoxConfig | None = None
155
+ model: SemanticModelMetadata | None = None
165
156
 
166
157
 
167
158
  class KnowledgeBoxList(BaseModel):
168
- kbs: List[KnowledgeBoxObjSummary] = []
159
+ kbs: list[KnowledgeBoxObjSummary] = []
169
160
 
170
161
 
171
162
  # Resources
172
163
 
173
164
 
174
165
  class ExtractedData(BaseModel):
175
- text: Optional[ExtractedText] = None
176
- metadata: Optional[FieldComputedMetadata] = None
177
- large_metadata: Optional[LargeComputedMetadata] = None
178
- vectors: Optional[VectorObject] = None
179
- question_answers: Optional[FieldQuestionAnswers] = None
166
+ text: ExtractedText | None = None
167
+ metadata: FieldComputedMetadata | None = None
168
+ large_metadata: LargeComputedMetadata | None = None
169
+ vectors: VectorObject | None = None
170
+ question_answers: FieldQuestionAnswers | None = None
180
171
 
181
172
 
182
173
  class TextFieldExtractedData(ExtractedData):
@@ -184,32 +175,31 @@ class TextFieldExtractedData(ExtractedData):
184
175
 
185
176
 
186
177
  class FileFieldExtractedData(ExtractedData):
187
- file: Optional[FileExtractedData] = None
178
+ file: FileExtractedData | None = None
188
179
 
189
180
 
190
181
  class LinkFieldExtractedData(ExtractedData):
191
- link: Optional[LinkExtractedData] = None
182
+ link: LinkExtractedData | None = None
192
183
 
193
184
 
194
185
  class ConversationFieldExtractedData(ExtractedData):
195
186
  pass
196
187
 
197
188
 
198
- ExtractedDataType = Optional[
199
- Union[
200
- TextFieldExtractedData,
201
- FileFieldExtractedData,
202
- LinkFieldExtractedData,
203
- ConversationFieldExtractedData,
204
- ]
205
- ]
189
+ ExtractedDataType = (
190
+ TextFieldExtractedData
191
+ | FileFieldExtractedData
192
+ | LinkFieldExtractedData
193
+ | ConversationFieldExtractedData
194
+ | None
195
+ )
206
196
 
207
197
 
208
198
  class Error(BaseModel):
209
199
  body: str
210
200
  code: int
211
201
  code_str: str
212
- created: Optional[datetime]
202
+ created: datetime | None
213
203
  severity: str
214
204
 
215
205
 
@@ -217,51 +207,51 @@ class FieldData(BaseModel): ...
217
207
 
218
208
 
219
209
  class TextFieldData(BaseModel):
220
- value: Optional[FieldText] = None
221
- extracted: Optional[TextFieldExtractedData] = None
222
- error: Optional[Error] = None
223
- status: Optional[str] = None
224
- errors: Optional[list[Error]] = None
210
+ value: FieldText | None = None
211
+ extracted: TextFieldExtractedData | None = None
212
+ error: Error | None = None
213
+ status: str | None = None
214
+ errors: list[Error] | None = None
225
215
 
226
216
 
227
217
  class FileFieldData(BaseModel):
228
- value: Optional[FieldFile] = None
229
- extracted: Optional[FileFieldExtractedData] = None
230
- error: Optional[Error] = None
231
- status: Optional[str] = None
232
- errors: Optional[list[Error]] = None
218
+ value: FieldFile | None = None
219
+ extracted: FileFieldExtractedData | None = None
220
+ error: Error | None = None
221
+ status: str | None = None
222
+ errors: list[Error] | None = None
233
223
 
234
224
 
235
225
  class LinkFieldData(BaseModel):
236
- value: Optional[FieldLink] = None
237
- extracted: Optional[LinkFieldExtractedData] = None
238
- error: Optional[Error] = None
239
- status: Optional[str] = None
240
- errors: Optional[list[Error]] = None
226
+ value: FieldLink | None = None
227
+ extracted: LinkFieldExtractedData | None = None
228
+ error: Error | None = None
229
+ status: str | None = None
230
+ errors: list[Error] | None = None
241
231
 
242
232
 
243
233
  class ConversationFieldData(BaseModel):
244
- value: Optional[FieldConversation] = None
245
- extracted: Optional[ConversationFieldExtractedData] = None
246
- error: Optional[Error] = None
247
- status: Optional[str] = None
248
- errors: Optional[list[Error]] = None
234
+ value: FieldConversation | None = None
235
+ extracted: ConversationFieldExtractedData | None = None
236
+ error: Error | None = None
237
+ status: str | None = None
238
+ errors: list[Error] | None = None
249
239
 
250
240
 
251
241
  class GenericFieldData(BaseModel):
252
- value: Optional[str] = None
253
- extracted: Optional[TextFieldExtractedData] = None
254
- error: Optional[Error] = None
255
- status: Optional[str] = None
256
- errors: Optional[list[Error]] = None
242
+ value: str | None = None
243
+ extracted: TextFieldExtractedData | None = None
244
+ error: Error | None = None
245
+ status: str | None = None
246
+ errors: list[Error] | None = None
257
247
 
258
248
 
259
249
  class ResourceData(BaseModel):
260
- texts: Optional[Dict[str, TextFieldData]] = None
261
- files: Optional[Dict[str, FileFieldData]] = None
262
- links: Optional[Dict[str, LinkFieldData]] = None
263
- conversations: Optional[Dict[str, ConversationFieldData]] = None
264
- generics: Optional[Dict[str, GenericFieldData]] = None
250
+ texts: dict[str, TextFieldData] | None = None
251
+ files: dict[str, FileFieldData] | None = None
252
+ links: dict[str, LinkFieldData] | None = None
253
+ conversations: dict[str, ConversationFieldData] | None = None
254
+ generics: dict[str, GenericFieldData] | None = None
265
255
 
266
256
 
267
257
  class QueueType(str, Enum):
@@ -273,29 +263,29 @@ class Resource(BaseModel):
273
263
  id: str
274
264
 
275
265
  # This first block of attributes correspond to Basic fields
276
- slug: Optional[str] = None
277
- title: Optional[str] = None
278
- summary: Optional[str] = None
279
- icon: Optional[str] = None
280
- thumbnail: Optional[str] = None
281
- metadata: Optional[Metadata] = None
282
- usermetadata: Optional[UserMetadata] = None
283
- fieldmetadata: Optional[List[UserFieldMetadata]] = None
284
- computedmetadata: Optional[ComputedMetadata] = None
285
- created: Optional[datetime] = None
286
- modified: Optional[datetime] = None
287
- last_seqid: Optional[int] = None
288
- last_account_seq: Optional[int] = None
289
- queue: Optional[QueueType] = None
290
- hidden: Optional[bool] = None
291
-
292
- origin: Optional[Origin] = None
293
- extra: Optional[Extra] = None
294
- relations: Optional[List[Relation]] = None
295
-
296
- data: Optional[ResourceData] = None
297
-
298
- security: Optional[ResourceSecurity] = Field(
266
+ slug: str | None = None
267
+ title: str | None = None
268
+ summary: str | None = None
269
+ icon: str | None = None
270
+ thumbnail: str | None = None
271
+ metadata: Metadata | None = None
272
+ usermetadata: UserMetadata | None = None
273
+ fieldmetadata: list[UserFieldMetadata] | None = None
274
+ computedmetadata: ComputedMetadata | None = None
275
+ created: datetime | None = None
276
+ modified: datetime | None = None
277
+ last_seqid: int | None = None
278
+ last_account_seq: int | None = None
279
+ queue: QueueType | None = None
280
+ hidden: bool | None = None
281
+
282
+ origin: Origin | None = None
283
+ extra: Extra | None = None
284
+ relations: list[Relation] | None = None
285
+
286
+ data: ResourceData | None = None
287
+
288
+ security: ResourceSecurity | None = Field(
299
289
  default=None,
300
290
  title="Security",
301
291
  description="Resource security metadata",
@@ -309,19 +299,12 @@ class ResourcePagination(BaseModel):
309
299
 
310
300
 
311
301
  class ResourceList(BaseModel):
312
- resources: List[Resource]
302
+ resources: list[Resource]
313
303
  pagination: ResourcePagination
314
304
 
315
305
 
316
306
  class ResourceField(BaseModel):
317
307
  field_type: FieldTypeName
318
308
  field_id: str
319
- value: Optional[
320
- Union[
321
- FieldText,
322
- FieldFile,
323
- FieldLink,
324
- Conversation,
325
- ]
326
- ] = None
309
+ value: FieldText | FieldFile | FieldLink | Conversation | None = None
327
310
  extracted: ExtractedDataType = None
@@ -0,0 +1,147 @@
1
+ # Copyright 2025 Bosutech XXI S.L.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ from enum import Enum
16
+ from typing import Literal
17
+
18
+ from pydantic import BaseModel, Field
19
+
20
+ from nucliadb_models.filters import FilterExpression
21
+ from nucliadb_models.graph.requests import GraphPathQuery
22
+ from nucliadb_models.search import RankFusion, RankFusionName, SearchParamDefaults
23
+ from nucliadb_models.security import RequestSecurity
24
+
25
+
26
+ class KeywordQuery(BaseModel):
27
+ query: str
28
+ min_score: float = 0.0
29
+ with_synonyms: bool = False
30
+
31
+
32
+ class SemanticQuery(BaseModel):
33
+ query: list[float]
34
+ vectorset: str
35
+ min_score: float = -1.0
36
+
37
+
38
+ class GraphQuery(BaseModel):
39
+ query: GraphPathQuery
40
+
41
+
42
+ class Query(BaseModel):
43
+ keyword: KeywordQuery | None = None
44
+ semantic: SemanticQuery | None = None
45
+ graph: GraphQuery | None = None
46
+
47
+
48
+ class Filters(BaseModel):
49
+ filter_expression: FilterExpression | None = (
50
+ SearchParamDefaults.filter_expression.to_pydantic_field()
51
+ )
52
+ show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
53
+ security: RequestSecurity | None = None
54
+ with_duplicates: bool = False
55
+
56
+
57
+ class RetrievalRequest(BaseModel):
58
+ query: Query
59
+ top_k: int = Field(default=20, gt=0, le=500)
60
+ filters: Filters = Field(default_factory=Filters)
61
+ rank_fusion: RankFusionName | RankFusion = Field(default=RankFusionName.RECIPROCAL_RANK_FUSION)
62
+
63
+
64
+ class ScoreSource(str, Enum):
65
+ INDEX = "index"
66
+ RANK_FUSION = "rank_fusion"
67
+ RERANKER = "reranker"
68
+
69
+
70
+ class ScoreType(str, Enum):
71
+ SEMANTIC = "semantic"
72
+ KEYWORD = "keyword"
73
+ GRAPH = "graph"
74
+ RRF = "rrf"
75
+ WCOMB_SUM = "wCombSUM"
76
+ DEFAULT_RERANKER = "default_reranker"
77
+
78
+
79
+ class KeywordScore(BaseModel):
80
+ score: float
81
+ source: Literal[ScoreSource.INDEX] = ScoreSource.INDEX
82
+ type: Literal[ScoreType.KEYWORD] = ScoreType.KEYWORD
83
+
84
+
85
+ class SemanticScore(BaseModel):
86
+ score: float
87
+ source: Literal[ScoreSource.INDEX] = ScoreSource.INDEX
88
+ type: Literal[ScoreType.SEMANTIC] = ScoreType.SEMANTIC
89
+
90
+
91
+ class GraphScore(BaseModel):
92
+ score: float
93
+ source: Literal[ScoreSource.INDEX] = ScoreSource.INDEX
94
+ type: Literal[ScoreType.GRAPH] = ScoreType.GRAPH
95
+
96
+
97
+ class RrfScore(BaseModel):
98
+ score: float
99
+ source: Literal[ScoreSource.RANK_FUSION] = ScoreSource.RANK_FUSION
100
+ type: Literal[ScoreType.RRF] = ScoreType.RRF
101
+
102
+
103
+ class WeightedCombSumScore(BaseModel):
104
+ score: float
105
+ source: Literal[ScoreSource.RANK_FUSION] = ScoreSource.RANK_FUSION
106
+ type: Literal[ScoreType.WCOMB_SUM] = ScoreType.WCOMB_SUM
107
+
108
+
109
+ class RerankerScore(BaseModel):
110
+ score: float
111
+ source: Literal[ScoreSource.RERANKER] = ScoreSource.RERANKER
112
+ type: Literal[ScoreType.DEFAULT_RERANKER] = ScoreType.DEFAULT_RERANKER
113
+
114
+
115
+ Score = KeywordScore | SemanticScore | GraphScore | RrfScore | WeightedCombSumScore | RerankerScore
116
+
117
+
118
+ class Scores(BaseModel):
119
+ value: float
120
+ source: ScoreSource
121
+ type: ScoreType
122
+ history: list[Score]
123
+
124
+
125
+ class Metadata(BaseModel):
126
+ field_labels: list[str]
127
+ paragraph_labels: list[str]
128
+
129
+ is_an_image: bool
130
+ is_a_table: bool
131
+
132
+ # for extracted from visual content (ocr, inception, tables)
133
+ source_file: str | None
134
+
135
+ # for documents (pdf, docx...) only
136
+ page: int | None
137
+ in_page_with_visual: bool | None
138
+
139
+
140
+ class RetrievalMatch(BaseModel):
141
+ id: str
142
+ score: Scores
143
+ metadata: Metadata
144
+
145
+
146
+ class RetrievalResponse(BaseModel):
147
+ matches: list[RetrievalMatch]