nucliadb-models 6.7.1.post4848__py3-none-any.whl → 6.9.6.post5453__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,85 @@
1
+ # Copyright 2025 Bosutech XXI S.L.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ from pydantic import BaseModel
17
+
18
+ from nucliadb_models.common import FieldTypeName
19
+ from nucliadb_models.resource import ExtractedDataTypeName, Resource
20
+ from nucliadb_models.search import Image, ResourceProperties, SearchParamDefaults
21
+
22
+ ParagraphId = str
23
+
24
+
25
+ class AugmentedParagraph(BaseModel):
26
+ text: str | None = None
27
+
28
+ neighbours_before: dict[ParagraphId, str] | None = None
29
+ neighbours_after: dict[ParagraphId, str] | None = None
30
+
31
+ image: Image | None = None
32
+
33
+
34
+ class AugmentedField(BaseModel):
35
+ page_preview_image: Image | None = None
36
+
37
+
38
+ class AugmentedResource(Resource):
39
+ def updated_from(self, origin: Resource):
40
+ for key in origin.model_fields.keys():
41
+ self.__setattr__(key, getattr(origin, key))
42
+
43
+
44
+ class AugmentResources(BaseModel):
45
+ given: list[str]
46
+
47
+ show: list[ResourceProperties] = SearchParamDefaults.show.to_pydantic_field()
48
+ extracted: list[ExtractedDataTypeName] = SearchParamDefaults.extracted.to_pydantic_field()
49
+ field_type_filter: list[FieldTypeName] = SearchParamDefaults.field_type_filter.to_pydantic_field()
50
+ # TODO: field name filter, da field prefix filter
51
+
52
+
53
+ class AugmentParagraph(BaseModel):
54
+ id: ParagraphId
55
+
56
+
57
+ class AugmentParagraphs(BaseModel):
58
+ given: list[AugmentParagraph]
59
+
60
+ text: bool = True
61
+
62
+ neighbours_before: int = 0
63
+ neighbours_after: int = 0
64
+
65
+ # paragraph extracted from an image, return an image
66
+ source_image: bool = False
67
+
68
+ # paragraph extracted from a table, return table image
69
+ table_image: bool = False
70
+
71
+ # return page_preview instead of table image if table image enabled
72
+ table_prefers_page_preview: bool = False
73
+
74
+ # paragraph from a page, return page preview image
75
+ page_preview_image: bool = False
76
+
77
+
78
+ class AugmentRequest(BaseModel):
79
+ resources: AugmentResources
80
+ paragraphs: AugmentParagraphs
81
+
82
+
83
+ class AugmentResponse(BaseModel):
84
+ resources: dict[str, AugmentedResource]
85
+ paragraphs: dict[str, AugmentedParagraph]
nucliadb_models/common.py CHANGED
@@ -203,6 +203,15 @@ class FieldTypeName(str, Enum):
203
203
  "a": FieldTypeName.GENERIC,
204
204
  }[abbr]
205
205
 
206
+ def abbreviation(self) -> str:
207
+ return {
208
+ FieldTypeName.TEXT: "t",
209
+ FieldTypeName.FILE: "f",
210
+ FieldTypeName.LINK: "u",
211
+ FieldTypeName.CONVERSATION: "c",
212
+ FieldTypeName.GENERIC: "a",
213
+ }[self]
214
+
206
215
 
207
216
  class FieldRef(BaseModel):
208
217
  field_type: FieldTypeName
@@ -44,7 +44,7 @@ def _model_fields(model: type[BaseModel], skip: list[str]) -> dict[str, Any]:
44
44
  }
45
45
 
46
46
 
47
- # FindConfig is a FindConfig without `search_configuration`
47
+ # FindConfig is a FindRequest without `search_configuration`
48
48
  FindConfig = create_model("FindConfig", **_model_fields(FindRequest, skip=["search_configuration"]))
49
49
 
50
50
 
@@ -86,10 +86,10 @@ class FieldConversation(BaseModel):
86
86
 
87
87
 
88
88
  class InputMessageContent(BaseModel):
89
- text: str
89
+ text: str = Field()
90
90
  format: MessageFormat = MessageFormat.PLAIN
91
- attachments: List[FileB64] = []
92
- attachments_fields: List[FieldRef] = []
91
+ attachments: List[FileB64] = Field(default=[], max_length=50)
92
+ attachments_fields: List[FieldRef] = Field(default=[], max_length=50)
93
93
 
94
94
 
95
95
  class InputMessage(BaseModel):
@@ -102,10 +102,12 @@ class InputMessage(BaseModel):
102
102
  to: List[str] = Field(
103
103
  default_factory=list,
104
104
  description="List of recipients of the message, e.g. ['assistant'] or ['user']",
105
+ max_length=100,
105
106
  )
106
107
  content: InputMessageContent
107
108
  ident: str = Field(
108
- description="Unique identifier for the message. Must be unique within the conversation."
109
+ description="Unique identifier for the message. Must be unique within the conversation.",
110
+ max_length=128,
109
111
  )
110
112
  type_: Optional[MessageType] = Field(None, alias="type")
111
113
 
@@ -126,7 +128,7 @@ class InputMessage(BaseModel):
126
128
  class InputConversationField(BaseModel):
127
129
  messages: List[InputMessage] = Field(
128
130
  default_factory=list,
129
- description="List of messages in the conversation field. Each message must have a unique ident.",
131
+ description="List of messages in the conversation field. Each message must have a unique ident. A single conversation can contain up to 51,200 messages. You can add up to 2,048 messages per request.",
130
132
  )
131
133
  extract_strategy: Optional[str] = Field(
132
134
  default=None,
@@ -22,32 +22,18 @@ from pydantic import BaseModel
22
22
  class ExternalIndexProviderType(str, Enum):
23
23
  """
24
24
  Enum for the different external index providers.
25
- For now only Pinecone is supported, but we may add more in the future.
25
+ For now none are supported, but we may add some in the future.
26
26
  """
27
27
 
28
- PINECONE = "pinecone"
28
+ UNSET = "unset"
29
29
 
30
30
 
31
31
  class ExternalIndexProviderBase(BaseModel):
32
32
  type: ExternalIndexProviderType
33
33
 
34
34
 
35
- class PineconeServerlessCloud(str, Enum):
36
- """
37
- List of cloud providers supported by Pinecone serverless vector database.
38
- """
39
-
40
- AWS_US_EAST_1 = "aws_us_east_1"
41
- AWS_US_WEST_2 = "aws_us_west_2"
42
- AWS_EU_WEST_1 = "aws_eu_west_1"
43
- GCP_US_CENTRAL1 = "gcp_us_central1"
44
- AZURE_EASTUS2 = "azure_eastus2"
45
-
46
-
47
- class PineconeIndexProvider(ExternalIndexProviderBase):
48
- type: ExternalIndexProviderType = ExternalIndexProviderType.PINECONE
49
- api_key: str
50
- serverless_cloud: PineconeServerlessCloud
35
+ class DummyIndexProvider(ExternalIndexProviderBase):
36
+ type: ExternalIndexProviderType = ExternalIndexProviderType.UNSET
51
37
 
52
38
 
53
- ExternalIndexProvider = Union[PineconeIndexProvider,]
39
+ ExternalIndexProvider = Union[DummyIndexProvider,]
@@ -0,0 +1,425 @@
1
+ # Copyright 2025 Bosutech XXI S.L.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+ from typing import Annotated, Optional, Union
16
+
17
+ from pydantic import BaseModel, Field, StringConstraints
18
+
19
+ from nucliadb_models.common import FieldTypeName
20
+ from nucliadb_models.metadata import Origin
21
+ from nucliadb_models.resource import FieldConversation, FieldFile, FieldLink, FieldText
22
+ from nucliadb_models.search import Image
23
+ from nucliadb_models.security import ResourceSecurity
24
+
25
+
26
+ class ResourceHydration(BaseModel, extra="forbid"):
27
+ title: bool = Field(
28
+ default=True,
29
+ description="Hydrate resource titles",
30
+ )
31
+ summary: bool = Field(
32
+ default=False,
33
+ description="Hydrate resource summaries",
34
+ )
35
+
36
+ origin: bool = Field(
37
+ default=False,
38
+ description="Hydrate resource origin",
39
+ )
40
+
41
+ security: bool = Field(
42
+ default=False,
43
+ description="Hydrate resource security metadata",
44
+ )
45
+
46
+
47
+ class TextFieldHydration(BaseModel, extra="forbid"):
48
+ value: bool = Field(
49
+ default=False,
50
+ description="Hydrate text field values. Field values are similar payloads to the ones used to create them",
51
+ )
52
+ extracted_text: bool = Field(
53
+ default=False,
54
+ description="Hydrate extracted text for text fields",
55
+ )
56
+ # TODO: what else should be interesting to add?
57
+
58
+
59
+ class FileFieldHydration(BaseModel, extra="forbid"):
60
+ value: bool = Field(
61
+ default=False,
62
+ description="Hydrate file field values. Field values are similar payloads to the ones used to create them",
63
+ )
64
+ extracted_text: bool = Field(
65
+ default=False,
66
+ description="Hydrate extracted text for file fields",
67
+ )
68
+ # TODO: what else should be interesting to add?
69
+
70
+
71
+ class LinkFieldHydration(BaseModel, extra="forbid"):
72
+ value: bool = Field(
73
+ default=False,
74
+ description="Hydrate link field values. Field values are similar payloads to the ones used to create them",
75
+ )
76
+ extracted_text: bool = Field(
77
+ default=False,
78
+ description="Hydrate extracted text for link fields",
79
+ )
80
+ # TODO: what else should be interesting to add?
81
+
82
+
83
+ class ConversationFieldHydration(BaseModel, extra="forbid"):
84
+ value: bool = Field(
85
+ default=False,
86
+ description="Hydrate conversation field values. Field values are similar payloads to the ones used to create them",
87
+ )
88
+
89
+ # TODO: add fields to hydrate conversation fields. Think about how to handle
90
+ # splits and fulfill the conversational RAG strategies
91
+
92
+ # TODO: what else should be interesting to add?
93
+
94
+
95
+ class GenericFieldHydration(BaseModel, extra="forbid"):
96
+ value: bool = Field(
97
+ default=False,
98
+ description="Hydrate generic field values. Field values are similar payloads to the ones used to create them",
99
+ )
100
+ extracted_text: bool = Field(
101
+ default=False,
102
+ description="Hydrate extracted text for generic fields",
103
+ )
104
+ # TODO: what else should be interesting to add?
105
+
106
+
107
+ class FieldHydration(BaseModel, extra="forbid"):
108
+ text: Optional[TextFieldHydration] = Field(
109
+ default_factory=TextFieldHydration,
110
+ description="Text fields hydration options",
111
+ )
112
+ file: Optional[FileFieldHydration] = Field(
113
+ default_factory=FileFieldHydration,
114
+ description="File fields hydration options",
115
+ )
116
+ link: Optional[LinkFieldHydration] = Field(
117
+ default_factory=LinkFieldHydration,
118
+ description="Link fields hydration options",
119
+ )
120
+ conversation: Optional[ConversationFieldHydration] = Field(
121
+ default_factory=ConversationFieldHydration,
122
+ description="Conversation fields hydration options",
123
+ )
124
+ generic: Optional[GenericFieldHydration] = Field(
125
+ default_factory=GenericFieldHydration,
126
+ description="Generic fields hydration options",
127
+ )
128
+
129
+
130
+ class NeighbourParagraphHydration(BaseModel, extra="forbid"):
131
+ before: int = Field(
132
+ default=2,
133
+ ge=0,
134
+ description="Number of previous paragraphs to hydrate",
135
+ )
136
+ after: int = Field(
137
+ default=2,
138
+ ge=0,
139
+ description="Number of following paragraphs to hydrate",
140
+ )
141
+
142
+
143
+ class RelatedParagraphHydration(BaseModel, extra="forbid"):
144
+ neighbours: Optional[NeighbourParagraphHydration] = Field(
145
+ default=None,
146
+ description="Hydrate extra paragraphs that surround the original one",
147
+ )
148
+
149
+ # TODO: FEATURE: implement related paragraphs by page
150
+ # page: bool = Field(
151
+ # default=False,
152
+ # description="Hydrate all paragraphs in the same page. This only applies to fields with pages",
153
+ # )
154
+
155
+ # TODO: description
156
+ # XXX: should we let users control the amount of elements?
157
+ parents: bool = False
158
+ # TODO: description
159
+ # XXX: should we let users control the amount of elements?
160
+ siblings: bool = False
161
+ # TODO: description
162
+ # XXX: should we let users control the amount of elements?
163
+ replacements: bool = False
164
+
165
+
166
+ class ImageParagraphHydration(BaseModel, extra="forbid"):
167
+ # The source image is also known as reference or reference_file in the
168
+ # paragraph context. The reference/reference_file is the filename of the
169
+ # source image from which the paragraph has been extracted
170
+ source_image: bool = Field(
171
+ default=False,
172
+ description=(
173
+ "When a paragraph has been extracted from an image (using OCR, inception...), "
174
+ "hydrate the image that represents it"
175
+ ),
176
+ )
177
+
178
+
179
+ class TableParagraphHydration(BaseModel, extra="forbid"):
180
+ # TODO: implement. ARAG uses the label "/k/table" to check whether a
181
+ # paragraph is or a table or not. We can also use info on maindb
182
+ table_page_preview: bool = Field(
183
+ default=False,
184
+ description="Hydrate the page preview for the table. This will only hydrate fields with pages",
185
+ )
186
+
187
+
188
+ class ParagraphPageHydration(BaseModel, extra="forbid"):
189
+ # For some field types (file and link) learning generates previews. A
190
+ # preview is a PDF file representing the content. For a docx for example, is
191
+ # the PDF equivalent. Depending on the field type, the preview can
192
+ # represent, for example, a page in a document or a portion of a webpage.
193
+ page_with_visual: bool = Field(
194
+ default=False,
195
+ description=(
196
+ "When a paragraph has been extracted from a page containing visual "
197
+ "content (images, tables...), hydrate the preview of the paragraph's "
198
+ "page as an image. Not all field types have previews nor visual content"
199
+ ),
200
+ )
201
+
202
+
203
+ class ParagraphHydration(BaseModel, extra="forbid"):
204
+ text: bool = Field(
205
+ default=True,
206
+ description="Hydrate paragraph text",
207
+ )
208
+ image: Optional[ImageParagraphHydration] = Field(
209
+ default=None,
210
+ description="Hydrate options for paragraphs extracted from images (using OCR, inception...)",
211
+ )
212
+ table: Optional[TableParagraphHydration] = Field(
213
+ default=None,
214
+ description="Hydrate options for paragraphs extracted from tables",
215
+ )
216
+
217
+ # TODO: at some point, we should add hydration options for paragraphs from
218
+ # audio and video
219
+
220
+ page: Optional[ParagraphPageHydration] = Field(
221
+ default=None,
222
+ description="Hydrte options for paragraphs within a page. This applies to paragraphs in fields with pages",
223
+ )
224
+
225
+ related: Optional[RelatedParagraphHydration] = Field(
226
+ default=None,
227
+ description="Hydration options for related paragraphs. For example, neighbours or sibling paragraphs",
228
+ )
229
+
230
+
231
+ class Hydration(BaseModel, extra="forbid"):
232
+ resource: Optional[ResourceHydration] = Field(
233
+ default_factory=ResourceHydration,
234
+ description="Resource hydration options",
235
+ )
236
+ field: FieldHydration = Field(
237
+ default_factory=FieldHydration,
238
+ description="Field hydration options",
239
+ )
240
+ paragraph: ParagraphHydration = Field(
241
+ default_factory=ParagraphHydration,
242
+ description="Paragraph hydration options",
243
+ )
244
+
245
+
246
+ ParagraphId = Annotated[
247
+ str,
248
+ StringConstraints(
249
+ # resource-uuid/field-type/field-id/[split-id/]paragraph-id
250
+ pattern=r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+(/[^/]{1,128})?/[0-9]+-[0-9]+$",
251
+ min_length=32 + 1 + 1 + 1 + 1 + 0 + 0 + 1 + 3,
252
+ # max field id of 250 and 10 digit paragraphs. More than enough
253
+ max_length=32 + 1 + 1 + 1 + 250 + 1 + 128 + 1 + 21,
254
+ ),
255
+ ]
256
+
257
+
258
+ class HydrateRequest(BaseModel, extra="forbid"):
259
+ data: list[ParagraphId] = Field(
260
+ description="List of paragraph ids we want to hydrate",
261
+ max_length=50,
262
+ )
263
+ hydration: Hydration = Field(description="Description of how hydration must be performed")
264
+
265
+
266
+ ### Response models
267
+
268
+
269
+ class HydratedResource(BaseModel, extra="forbid"):
270
+ id: str = Field(description="Unique resource id")
271
+ slug: str = Field(description="Resource slug")
272
+
273
+ title: Optional[str] = None
274
+ summary: Optional[str] = None
275
+
276
+ origin: Optional[Origin] = None
277
+
278
+ security: Optional[ResourceSecurity] = None
279
+
280
+ # TODO: add resource labels to hydrated resources
281
+
282
+
283
+ class FieldExtractedData(BaseModel, extra="forbid"):
284
+ text: Optional[str] = None
285
+
286
+
287
+ class SplitFieldExtractedData(BaseModel, extra="forbid"):
288
+ texts: Optional[dict[str, str]] = None
289
+
290
+
291
+ class HydratedTextField(BaseModel, extra="forbid"):
292
+ id: str = Field("Unique field id")
293
+ resource: str = Field("Field resource id")
294
+ field_type: FieldTypeName = FieldTypeName.TEXT
295
+
296
+ value: Optional[FieldText] = None
297
+ extracted: Optional[FieldExtractedData] = None
298
+
299
+
300
+ class HydratedFileField(BaseModel, extra="forbid"):
301
+ id: str = Field("Unique field id")
302
+ resource: str = Field("Field resource id")
303
+ field_type: FieldTypeName = FieldTypeName.FILE
304
+
305
+ value: Optional[FieldFile] = None
306
+ extracted: Optional[FieldExtractedData] = None
307
+
308
+ previews: Optional[dict[str, Image]] = Field(
309
+ default=None,
310
+ title="Previews of specific parts of the field",
311
+ description=(
312
+ "Previews for specific pages of this field. Previews are differents"
313
+ "depending on the file type. For example, for a PDF file, a preview"
314
+ "will be an image of a single page."
315
+ "In this field, previews will be populated according to the hydration"
316
+ "options requested."
317
+ ),
318
+ )
319
+
320
+
321
+ class HydratedLinkField(BaseModel, extra="forbid"):
322
+ id: str = Field("Unique field id")
323
+ resource: str = Field("Field resource id")
324
+ field_type: FieldTypeName = FieldTypeName.LINK
325
+
326
+ value: Optional[FieldLink] = None
327
+ extracted: Optional[FieldExtractedData] = None
328
+
329
+
330
+ class HydratedConversationField(BaseModel, extra="forbid"):
331
+ id: str = Field("Unique field id")
332
+ resource: str = Field("Field resource id")
333
+ field_type: FieldTypeName = FieldTypeName.CONVERSATION
334
+
335
+ value: Optional[FieldConversation] = None
336
+ extracted: Optional[FieldExtractedData] = None
337
+
338
+
339
+ class HydratedGenericField(BaseModel, extra="forbid"):
340
+ id: str = Field("Unique field id")
341
+ resource: str = Field("Field resource id")
342
+ field_type: FieldTypeName = FieldTypeName.TEXT
343
+
344
+ value: Optional[str] = None
345
+ extracted: Optional[FieldExtractedData] = None
346
+
347
+
348
+ class RelatedNeighbourParagraphRefs(BaseModel, extra="forbid"):
349
+ before: Optional[list[str]] = None
350
+ after: Optional[list[str]] = None
351
+
352
+
353
+ class RelatedParagraphRefs(BaseModel, extra="forbid"):
354
+ neighbours: Optional[RelatedNeighbourParagraphRefs] = None
355
+ parents: Optional[list[str]] = None
356
+ siblings: Optional[list[str]] = None
357
+ replacements: Optional[list[str]] = None
358
+
359
+
360
+ class HydratedParagraphImage(BaseModel, extra="forbid"):
361
+ source_image: Optional[Image] = Field(
362
+ default=None,
363
+ description=(
364
+ "Source image for this paragraph. This only applies to paragraphs "
365
+ "extracted from an image using OCR or inception, and if this "
366
+ "hydration option has been enabled in the request"
367
+ ),
368
+ )
369
+
370
+
371
+ class HydratedParagraphTable(BaseModel, extra="forbid"):
372
+ page_preview_ref: Optional[str] = Field(
373
+ default=None,
374
+ description=(
375
+ "Referento to the page preview for this paragraph. The actual "
376
+ "preview will be found in the previews of its field. This only "
377
+ "applies to paragraphs generated from a table and if the "
378
+ "corresponding hydration option has been enabled in the request"
379
+ ),
380
+ )
381
+
382
+
383
+ class HydratedParagraphPage(BaseModel, extra="forbid"):
384
+ page_preview_ref: Optional[str] = Field(
385
+ default=None,
386
+ description=(
387
+ "Reference to the page preview for this paragraph. The actual "
388
+ "preview will be found in the previews of its field. This only "
389
+ "applies to paragraphs extracted from a page containing visual "
390
+ "content and if the corresponding hydration option has been enabled "
391
+ "in the request"
392
+ ),
393
+ )
394
+
395
+
396
+ class HydratedParagraph(BaseModel, extra="forbid"):
397
+ id: str = Field(description="Unique paragraph id")
398
+ field: str = Field(description="Paragraph field id")
399
+ resource: str = Field(description="Paragraph resource id")
400
+
401
+ text: Optional[str] = None
402
+
403
+ # TODO: add labels to hydrated paragraphs
404
+ # labels: Optional[list[str]] = None
405
+
406
+ related: Optional[RelatedParagraphRefs] = None
407
+
408
+ image: Optional[HydratedParagraphImage] = None
409
+ table: Optional[HydratedParagraphTable] = None
410
+ page: Optional[HydratedParagraphPage] = None
411
+
412
+
413
+ class Hydrated(BaseModel, extra="forbid"):
414
+ resources: dict[str, HydratedResource]
415
+ fields: dict[
416
+ str,
417
+ Union[
418
+ HydratedTextField,
419
+ HydratedFileField,
420
+ HydratedLinkField,
421
+ HydratedConversationField,
422
+ HydratedGenericField,
423
+ ],
424
+ ]
425
+ paragraphs: dict[str, HydratedParagraph]
@@ -17,7 +17,7 @@ from datetime import datetime
17
17
  from enum import Enum
18
18
  from typing import Any, Dict, List, Optional
19
19
 
20
- from pydantic import BaseModel, Field, model_validator
20
+ from pydantic import BaseModel, Field, field_validator, model_validator
21
21
  from typing_extensions import Self
22
22
 
23
23
  from nucliadb_models.utils import DateTime
@@ -231,15 +231,24 @@ class InputOrigin(BaseModel):
231
231
  default=[],
232
232
  title="Tags",
233
233
  description="Resource tags about the origin system. It can later be used for filtering on search endpoints with '/origin.tags/{tag}'",
234
+ max_length=300,
234
235
  )
235
- collaborators: List[str] = []
236
+ collaborators: List[str] = Field(default=[], max_length=100)
236
237
  filename: Optional[str] = None
237
- related: List[str] = []
238
+ related: List[str] = Field(default=[], max_length=100)
238
239
  path: Optional[str] = Field(
239
240
  default=None,
240
241
  description="Path of the original resource. Typically used to store folder structure information of the resource at the origin system. It can be later used for filtering on search endpoints with '/origin.path/{path}'",
242
+ max_length=2048,
241
243
  )
242
244
 
245
+ @field_validator("tags")
246
+ def validate_tag_length(cls, tags):
247
+ for tag in tags:
248
+ if len(tag) > 512:
249
+ raise ValueError("Each tag must be at most 1024 characters long")
250
+ return tags
251
+
243
252
 
244
253
  class Origin(InputOrigin):
245
254
  # Created and modified are redefined to
@@ -247,6 +256,12 @@ class Origin(InputOrigin):
247
256
  created: Optional[datetime] = None
248
257
  modified: Optional[datetime] = None
249
258
 
259
+ tags: List[str] = Field(
260
+ default=[],
261
+ title="Tags",
262
+ description="Resource tags about the origin system. It can later be used for filtering on search endpoints with '/origin.tags/{tag}'",
263
+ )
264
+
250
265
  class Source(Enum):
251
266
  WEB = "WEB"
252
267
  DESKTOP = "DESKTOP"
nucliadb_models/search.py CHANGED
@@ -79,8 +79,9 @@ ANSWER_JSON_SCHEMA_EXAMPLE = {
79
79
  class ModelParamDefaults:
80
80
  applied_autofilters = ParamDefault(
81
81
  default=[],
82
- title="Autofilters",
83
- description="List of filters automatically applied to the search query",
82
+ title="Applied autofilters",
83
+ description="[deprecated] list of filters automatically applied to the search query",
84
+ deprecated=True,
84
85
  )
85
86
 
86
87
 
@@ -296,6 +297,8 @@ class KnowledgeboxSearchResults(JsonBaseModel):
296
297
  relations: Optional[Relations] = None
297
298
  nodes: Optional[list[dict[str, str]]] = None
298
299
  shards: Optional[list[str]] = None
300
+
301
+ # TODO: remove on a future major release
299
302
  autofilters: list[str] = ModelParamDefaults.applied_autofilters.to_pydantic_field()
300
303
 
301
304
 
@@ -344,10 +347,12 @@ SortOrderMap = {
344
347
 
345
348
  class SortOptions(BaseModel):
346
349
  field: SortField
347
- limit: Optional[int] = Field(None, gt=0)
348
350
  order: SortOrder = SortOrder.DESC
349
351
 
350
352
 
353
+ MAX_RANK_FUSION_WINDOW = 500
354
+
355
+
351
356
  class RankFusionName(str, Enum):
352
357
  RECIPROCAL_RANK_FUSION = "rrf"
353
358
 
@@ -377,7 +382,7 @@ class ReciprocalRankFusion(_BaseRankFusion):
377
382
  )
378
383
  window: Optional[int] = Field(
379
384
  default=None,
380
- le=500,
385
+ le=MAX_RANK_FUSION_WINDOW,
381
386
  title="RRF window",
382
387
  description="Number of elements for retrieval to do RRF. Window must be greater or equal to top_k. Greater values will increase probability of multi match at cost of retrieval time", # noqa: E501
383
388
  )
@@ -481,11 +486,6 @@ class SearchParamDefaults:
481
486
  description="The list of facets to calculate. The facets follow the same syntax as filters: https://docs.nuclia.dev/docs/rag/advanced/search-filters", # noqa: E501
482
487
  max_items=50,
483
488
  )
484
- autofilter = ParamDefault(
485
- default=False,
486
- title="Automatic search filtering",
487
- description="If set to true, the search will automatically add filters to the query. For example, it will filter results containing the entities detected in the query", # noqa: E501
488
- )
489
489
  chat_query = ParamDefault(
490
490
  default=...,
491
491
  title="Query",
@@ -505,10 +505,18 @@ class SearchParamDefaults:
505
505
  )
506
506
  top_k = ParamDefault(
507
507
  default=20,
508
+ gt=-1,
508
509
  le=200,
509
510
  title="Top k",
510
511
  description="The number of results search should return. The maximum number of results allowed is 200.",
511
512
  )
513
+ offset = ParamDefault(
514
+ default=0,
515
+ gt=-1,
516
+ le=1000,
517
+ title="Results offset",
518
+ description="The number of results to skip, starting from the beginning in sort order. Used for pagination. It can only be used with the keyword and fulltext indexes.",
519
+ )
512
520
  highlight = ParamDefault(
513
521
  default=False,
514
522
  title="Highlight",
@@ -534,12 +542,6 @@ class SearchParamDefaults:
534
542
  title="Sort order",
535
543
  description="Order to sort results with",
536
544
  )
537
- sort_limit = ParamDefault(
538
- default=None,
539
- title="Sort limit",
540
- description="",
541
- gt=0,
542
- )
543
545
  sort_field = ParamDefault(
544
546
  default=None,
545
547
  title="Sort field",
@@ -876,7 +878,9 @@ class BaseSearchRequest(AuditMetadataBase):
876
878
  vectorset: Optional[str] = SearchParamDefaults.vectorset.to_pydantic_field()
877
879
  with_duplicates: bool = SearchParamDefaults.with_duplicates.to_pydantic_field()
878
880
  with_synonyms: bool = SearchParamDefaults.with_synonyms.to_pydantic_field()
879
- autofilter: bool = SearchParamDefaults.autofilter.to_pydantic_field()
881
+ # autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
882
+ # avoid breaking changes in the python sdks. Please remove on a future major release.
883
+ autofilter: SkipJsonSchema[bool] = False
880
884
  resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
881
885
  security: Optional[RequestSecurity] = SearchParamDefaults.security.to_pydantic_field()
882
886
  show_hidden: bool = SearchParamDefaults.show_hidden.to_pydantic_field()
@@ -938,12 +942,32 @@ class SearchRequest(BaseSearchRequest):
938
942
  )
939
943
  faceted: list[str] = SearchParamDefaults.faceted.to_pydantic_field()
940
944
  sort: Optional[SortOptions] = SearchParamDefaults.sort.to_pydantic_field()
945
+ offset: int = SearchParamDefaults.offset.to_pydantic_field()
941
946
 
942
947
  @field_validator("faceted")
943
948
  @classmethod
944
949
  def nested_facets_not_supported(cls, facets):
945
950
  return validate_facets(facets)
946
951
 
952
+ @model_validator(mode="after")
953
+ def offset_sort_only_on_keyword_indexes(self):
954
+ has_non_keyword_indexes = set(self.features) & {SearchOptions.SEMANTIC, SearchOptions.RELATIONS}
955
+ if has_non_keyword_indexes:
956
+ if self.offset > 0:
957
+ raise ValueError("offset cannot be used with the semantic or relations index")
958
+ if self.sort and self.sort.field != SortField.SCORE:
959
+ raise ValueError("sort by date cannot be used with the semantic or relations index")
960
+
961
+ return self
962
+
963
+ @field_validator("sort", mode="after")
964
+ @classmethod
965
+ def sorting_by_title_not_supported(cls, value: Optional[SortOptions]) -> Optional[SortOptions]:
966
+ if value and value.field == SortField.TITLE:
967
+ raise ValueError("sorting by title not supported in /search")
968
+
969
+ return value
970
+
947
971
 
948
972
  class Author(str, Enum):
949
973
  NUCLIA = "NUCLIA"
@@ -985,6 +1009,35 @@ def parse_max_tokens(max_tokens: Optional[Union[int, MaxTokens]]) -> Optional[Ma
985
1009
  return max_tokens
986
1010
 
987
1011
 
1012
+ class Reasoning(BaseModel):
1013
+ display: bool = Field(
1014
+ default=True,
1015
+ description="Whether to display the reasoning steps in the response.",
1016
+ )
1017
+ effort: Literal["low", "medium", "high"] = Field(
1018
+ default="medium",
1019
+ description=(
1020
+ "Level of reasoning effort. Used by OpenAI models to control the depth of reasoning. "
1021
+ "This parameter will be automatically mapped to budget_tokens "
1022
+ "if the chosen model does not support effort."
1023
+ ),
1024
+ )
1025
+ budget_tokens: int = Field(
1026
+ default=15_000,
1027
+ description=(
1028
+ "Token budget for reasoning. Used by Anthropic or Google models to limit the number of "
1029
+ "tokens used for reasoning. This parameter will be automatically mapped to effort "
1030
+ "if the chosen model does not support budget_tokens."
1031
+ ),
1032
+ )
1033
+
1034
+
1035
+ class CitationsType(str, Enum):
1036
+ NONE = "none"
1037
+ DEFAULT = "default"
1038
+ LLM_FOOTNOTES = "llm_footnotes"
1039
+
1040
+
988
1041
  class ChatModel(BaseModel):
989
1042
  """
990
1043
  This is the model for the predict request payload on the chat endpoint
@@ -1016,10 +1069,16 @@ class ChatModel(BaseModel):
1016
1069
  user_prompt: Optional[UserPrompt] = Field(
1017
1070
  default=None, description="Optional custom prompt input by the user"
1018
1071
  )
1019
- citations: bool = Field(default=False, description="Whether to include the citations in the answer")
1072
+ citations: Union[bool, None, CitationsType] = Field(
1073
+ default=None,
1074
+ description="Whether to include citations in the response. "
1075
+ "If set to None or False, no citations will be computed. "
1076
+ "If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
1077
+ "If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
1078
+ )
1020
1079
  citation_threshold: Optional[float] = Field(
1021
1080
  default=None,
1022
- description="If citations is True, this sets the similarity threshold (0 to 1) for paragraphs to be included as citations. Lower values result in more citations. If not provided, Nuclia's default threshold is used.", # noqa: E501
1081
+ description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
1023
1082
  ge=0.0,
1024
1083
  le=1.0,
1025
1084
  )
@@ -1058,6 +1117,13 @@ class ChatModel(BaseModel):
1058
1117
  default=None,
1059
1118
  description="Seed use for the generative model for a deterministic output.",
1060
1119
  )
1120
+ reasoning: Union[Reasoning, bool] = Field(
1121
+ default=False,
1122
+ description=(
1123
+ "Reasoning options for the generative model. "
1124
+ "Set to True to enable default reasoning, False to disable, or provide a Reasoning object for custom options."
1125
+ ),
1126
+ )
1061
1127
 
1062
1128
 
1063
1129
  class RephraseModel(BaseModel):
@@ -1128,7 +1194,7 @@ ALLOWED_FIELD_TYPES: dict[str, str] = {
1128
1194
  "t": "text",
1129
1195
  "f": "file",
1130
1196
  "u": "link",
1131
- "d": "datetime",
1197
+ "c": "conversation",
1132
1198
  "a": "generic",
1133
1199
  }
1134
1200
 
@@ -1136,16 +1202,19 @@ ALLOWED_FIELD_TYPES: dict[str, str] = {
1136
1202
  class FieldExtensionStrategy(RagStrategy):
1137
1203
  name: Literal["field_extension"] = "field_extension"
1138
1204
  fields: list[str] = Field(
1205
+ default=[],
1139
1206
  title="Fields",
1140
- description="List of field ids to extend the context with. It will try to extend the retrieval context with the specified fields in the matching resources. The field ids have to be in the format `{field_type}/{field_name}`, like 'a/title', 'a/summary' for title and summary fields or 't/amend' for a text field named 'amend'.", # noqa: E501
1141
- min_length=1,
1207
+ description="List of field ids to extend the context with. It will try to extend the retrieval context with the specified fields in the matching resources. The field ids have to be in the format `{field_type}/{field_name}`, like 'a/title', 'a/summary' for title and summary fields or 't/amend' for a text field named 'amend'.",
1208
+ )
1209
+ data_augmentation_field_prefixes: list[str] = Field(
1210
+ default=[],
1211
+ description="List of prefixes for data augmentation added fields to extend the context with. For example, if the prefix is 'simpson', all fields that are a result of data augmentation with that prefix will be used to extend the context.",
1142
1212
  )
1143
1213
 
1144
- @field_validator("fields", mode="after")
1145
- @classmethod
1146
- def fields_validator(cls, fields) -> Self:
1214
+ @model_validator(mode="after")
1215
+ def field_extension_strategy_validator(self) -> Self:
1147
1216
  # Check that the fields are in the format {field_type}/{field_name}
1148
- for field in fields:
1217
+ for field in self.fields:
1149
1218
  try:
1150
1219
  field_type, _ = field.strip("/").split("/")
1151
1220
  except ValueError:
@@ -1158,8 +1227,7 @@ class FieldExtensionStrategy(RagStrategy):
1158
1227
  f"Field '{field}' does not have a valid field type. "
1159
1228
  f"Valid field types are: {allowed_field_types_part}."
1160
1229
  )
1161
-
1162
- return fields
1230
+ return self
1163
1231
 
1164
1232
 
1165
1233
  class FullResourceApplyTo(BaseModel):
@@ -1197,6 +1265,7 @@ class HierarchyResourceStrategy(RagStrategy):
1197
1265
  title="Count",
1198
1266
  description="Number of extra characters that are added to each matching paragraph when adding to the context.",
1199
1267
  ge=0,
1268
+ le=1024,
1200
1269
  )
1201
1270
 
1202
1271
 
@@ -1425,7 +1494,7 @@ class PageImageStrategy(ImageRagStrategy):
1425
1494
  count: Optional[int] = Field(
1426
1495
  default=None,
1427
1496
  title="Count",
1428
- description="Maximum number of images to retrieve from the page. By default, at most 5 images are retrieved.",
1497
+ description="Maximum number of page images to retrieve. By default, at most 5 images are retrieved.",
1429
1498
  )
1430
1499
 
1431
1500
 
@@ -1576,7 +1645,11 @@ class AskRequest(AuditMetadataBase):
1576
1645
  description="Image that will be used together with the query text for retrieval and then sent to the LLM as part of the context. "
1577
1646
  "If a query image is provided, the `extra_context_images` and `rag_images_strategies` will be disabled.",
1578
1647
  )
1579
- autofilter: bool = SearchParamDefaults.autofilter.to_pydantic_field()
1648
+
1649
+ # autofilter is deprecated and its logic was removed. We're just keeping it in the model definition to
1650
+ # avoid breaking changes in the python sdks. Please remove on a future major release.
1651
+ autofilter: SkipJsonSchema[bool] = False
1652
+
1580
1653
  highlight: bool = SearchParamDefaults.highlight.to_pydantic_field()
1581
1654
  resource_filters: list[str] = SearchParamDefaults.resource_filters.to_pydantic_field()
1582
1655
  prompt: Optional[Union[str, CustomPrompt]] = Field(
@@ -1586,13 +1659,16 @@ class AskRequest(AuditMetadataBase):
1586
1659
  )
1587
1660
  rank_fusion: Union[RankFusionName, RankFusion] = SearchParamDefaults.rank_fusion.to_pydantic_field()
1588
1661
  reranker: Union[RerankerName, Reranker] = SearchParamDefaults.reranker.to_pydantic_field()
1589
- citations: bool = Field(
1590
- default=False,
1591
- description="Whether to include the citations for the answer in the response",
1662
+ citations: Union[bool, None, CitationsType] = Field(
1663
+ default=None,
1664
+ description="Whether to include citations in the response. "
1665
+ "If set to None or False, no citations will be computed. "
1666
+ "If set to True or 'default', citations will be computed after answer generation and send as a separate `CitationsGenerativeResponse` chunk. "
1667
+ "If set to 'llm_footnotes', citations will be included in the LLM's response as markdown-styled footnotes. A `FootnoteCitationsGenerativeResponse` chunk will also be sent to map footnote ids to context keys in the `query_context`.",
1592
1668
  )
1593
1669
  citation_threshold: Optional[float] = Field(
1594
1670
  default=None,
1595
- description="If citations is True, this sets the similarity threshold (0 to 1) for paragraphs to be included as citations. Lower values result in more citations. If not provided, Nuclia's default threshold is used.",
1671
+ description="If citations is set to True or 'default', this will be the similarity threshold. Value between 0 and 1, lower values will produce more citations. If not set, it will be set to the optimized threshold found by Nuclia.",
1596
1672
  ge=0.0,
1597
1673
  le=1.0,
1598
1674
  )
@@ -1723,6 +1799,14 @@ Using this feature also disables the `citations` parameter. For maximal accuracy
1723
1799
  description="Load ask parameters from this configuration. Parameters in the request override parameters from the configuration.",
1724
1800
  )
1725
1801
 
1802
+ reasoning: Union[Reasoning, bool] = Field(
1803
+ default=False,
1804
+ description=(
1805
+ "Reasoning options for the generative model. "
1806
+ "Set to True to enable default reasoning, False to disable, or provide a Reasoning object for custom options."
1807
+ ),
1808
+ )
1809
+
1726
1810
  @field_validator("rag_strategies", mode="before")
1727
1811
  @classmethod
1728
1812
  def validate_rag_strategies(cls, rag_strategies: list[RagStrategies]) -> list[RagStrategies]:
@@ -2180,6 +2264,11 @@ class SyncAskResponse(BaseModel):
2180
2264
  title="Answer",
2181
2265
  description="The generative answer to the query",
2182
2266
  )
2267
+ reasoning: Optional[str] = Field(
2268
+ default=None,
2269
+ title="Reasoning",
2270
+ description="The reasoning steps followed by the LLM to generate the answer. This is returned only if the reasoning feature is enabled in the request.", # noqa: E501
2271
+ )
2183
2272
  answer_json: Optional[dict[str, Any]] = Field(
2184
2273
  default=None,
2185
2274
  title="Answer JSON",
@@ -2214,10 +2303,15 @@ class SyncAskResponse(BaseModel):
2214
2303
  description="The detected relations of the answer",
2215
2304
  )
2216
2305
  citations: dict[str, Any] = Field(
2217
- default={},
2306
+ default_factory=dict,
2218
2307
  title="Citations",
2219
2308
  description="The citations of the answer. List of references to the resources used to generate the answer.",
2220
2309
  )
2310
+ citation_footnote_to_context: dict[str, str] = Field(
2311
+ default_factory=dict,
2312
+ title="Citation footnote to context",
2313
+ description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)""",
2314
+ )
2221
2315
  augmented_context: Optional[AugmentedContext] = Field(
2222
2316
  default=None,
2223
2317
  description=(
@@ -2284,6 +2378,11 @@ class AnswerAskResponseItem(BaseModel):
2284
2378
  text: str
2285
2379
 
2286
2380
 
2381
+ class ReasoningAskResponseItem(BaseModel):
2382
+ type: Literal["reasoning"] = "reasoning"
2383
+ text: str
2384
+
2385
+
2287
2386
  class JSONAskResponseItem(BaseModel):
2288
2387
  type: Literal["answer_json"] = "answer_json"
2289
2388
  object: dict[str, Any]
@@ -2322,6 +2421,18 @@ class CitationsAskResponseItem(BaseModel):
2322
2421
  citations: dict[str, Any]
2323
2422
 
2324
2423
 
2424
+ class FootnoteCitationsAskResponseItem(BaseModel):
2425
+ type: Literal["footnote_citations"] = "footnote_citations"
2426
+ footnote_to_context: dict[str, str] = Field(
2427
+ description="""Maps ids in the footnote citations to query_context keys (normally paragraph ids)
2428
+ e.g.,
2429
+ { "block-AA": "f44f4e8acbfb1d48de3fd3c2fb04a885/f/f44f4e8acbfb1d48de3fd3c2fb04a885/73758-73972", ... }
2430
+ If the query_context is a list, it will map to 1-based indices as strings
2431
+ e.g., { "block-AA": "1", "block-AB": "2", ... }
2432
+ """
2433
+ )
2434
+
2435
+
2325
2436
  class StatusAskResponseItem(BaseModel):
2326
2437
  type: Literal["status"] = "status"
2327
2438
  code: str
@@ -2347,10 +2458,12 @@ class DebugAskResponseItem(BaseModel):
2347
2458
 
2348
2459
  AskResponseItemType = Union[
2349
2460
  AnswerAskResponseItem,
2461
+ ReasoningAskResponseItem,
2350
2462
  JSONAskResponseItem,
2351
2463
  MetadataAskResponseItem,
2352
2464
  AugmentedContextResponseItem,
2353
2465
  CitationsAskResponseItem,
2466
+ FootnoteCitationsAskResponseItem,
2354
2467
  StatusAskResponseItem,
2355
2468
  ErrorAskResponseItem,
2356
2469
  RetrievalAskResponseItem,
nucliadb_models/writer.py CHANGED
@@ -36,7 +36,7 @@ from nucliadb_models.utils import FieldIdPattern, FieldIdString, SlugString
36
36
 
37
37
 
38
38
  class FieldDefaults:
39
- title = Field(None, title="Title")
39
+ title = Field(None, title="Title", max_length=2048)
40
40
  summary = Field(None, title="Summary")
41
41
  slug = Field(
42
42
  None,
@@ -1,19 +1,18 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb_models
3
- Version: 6.7.1.post4848
3
+ Version: 6.9.6.post5453
4
4
  Author-email: Nuclia <nucliadb@nuclia.com>
5
5
  License-Expression: Apache-2.0
6
6
  Project-URL: Homepage, https://nuclia.com
7
7
  Project-URL: Repository, https://github.com/nuclia/nucliadb
8
8
  Classifier: Development Status :: 4 - Beta
9
9
  Classifier: Programming Language :: Python
10
- Classifier: Programming Language :: Python :: 3.9
11
10
  Classifier: Programming Language :: Python :: 3.10
12
11
  Classifier: Programming Language :: Python :: 3.11
13
12
  Classifier: Programming Language :: Python :: 3.12
14
13
  Classifier: Programming Language :: Python :: 3 :: Only
15
14
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
- Requires-Python: <4,>=3.9
15
+ Requires-Python: <4,>=3.10
17
16
  Description-Content-Type: text/markdown
18
17
  Requires-Dist: pydantic!=2.11.5,!=2.11.6,>=2.6
19
18
 
@@ -1,22 +1,24 @@
1
1
  nucliadb_models/__init__.py,sha256=3y8-htogKuCZcbhaUZdSjTeEjUSeec9aRWyL8AlKCyM,1077
2
- nucliadb_models/common.py,sha256=YW84w1NAQARObs2nXw6YBgdxQJeVCmTZZr5lSqj-IdQ,7904
3
- nucliadb_models/configuration.py,sha256=aTV5mBwYFlwiV1_nWyVAXaCh7F6lDVTVh28Xfwy8ox8,2448
2
+ nucliadb_models/augment.py,sha256=vAtFh4D4eC4nvfwaRTlfeuAMOL9Z9TFZnUNiRAMasss,2543
3
+ nucliadb_models/common.py,sha256=2dtKG4ZNi9p-yoNY76Uvyu1SlMeNYpH-MnuU3Q6w9Js,8169
4
+ nucliadb_models/configuration.py,sha256=BBrJsNjP324Cw_5J3dBrGwvpkHQYbXEo3TUaI9IqAOg,2449
4
5
  nucliadb_models/content_types.py,sha256=36Ga-iGf4ivCqgtXC7imFgegrwHB117s9eqP62JtGv0,3456
5
- nucliadb_models/conversation.py,sha256=i8tvQxUj6Hw0Nc2oqCGxxzSA1CCY6h7I0ucop9akops,4859
6
+ nucliadb_models/conversation.py,sha256=k9bKhkDiqhqmdrDfDPNoUfG7-2H_-KAyuOnETd8zV0E,5081
6
7
  nucliadb_models/entities.py,sha256=i-7Y8qmFRRTih5zw0ajv1U_iiXexe66M3TK8hUikQZk,2356
7
8
  nucliadb_models/export_import.py,sha256=mNm9IArOLnC6TLupkwqVFhxD5d08mpIVOVFneECv8UA,1073
8
- nucliadb_models/external_index_providers.py,sha256=IIKjJjLixWQC1zrbzam2FDcAo5UUxShZfueZSxqZu8Y,1535
9
+ nucliadb_models/external_index_providers.py,sha256=pL3leo4MkuJOnKlU1Sg6GT_mnK_VUBxGui-RPmDYVWU,1126
9
10
  nucliadb_models/extracted.py,sha256=Owz7LC3le3Dvau3TtRiO8NY84meOf6IxN-RrOqqpMPs,5593
10
11
  nucliadb_models/file.py,sha256=tXtgB9c7i2ADsnJ7HdbXyroAmXadGvOeA49htBh7BZo,2263
11
12
  nucliadb_models/filters.py,sha256=NQI2-4AFzzJuZy8NeY3jXlTbbU5wxiwMCP-5DrD-7lE,14759
13
+ nucliadb_models/hydration.py,sha256=SlAzraJE6DX0uOpZWxu2k_9-ikYorsj0t8xwsWSBQZY,14363
12
14
  nucliadb_models/labels.py,sha256=9zqRgkpZuX3kUPwsTTgCH7JyOWK7dM5pwyuHJR86YdU,3949
13
15
  nucliadb_models/link.py,sha256=PF5hHLwdOed5TMBTxtokkgWtMh1bFnORZjybh0NwVCw,2526
14
- nucliadb_models/metadata.py,sha256=MFVYnpXMBoY4ylMg029o7yDKGxhK7NB0c0FSshzJHm4,8356
16
+ nucliadb_models/metadata.py,sha256=OOKGy_83NtlG1QKQZEwMuwu4wbVEe7P30Y2QvnGSDto,8933
15
17
  nucliadb_models/notifications.py,sha256=mna8-AoD_29Wds0Thl0AF0zpERnJmYGLZX1w1fUopMY,4036
16
18
  nucliadb_models/processing.py,sha256=nhKuHQjqCdb9zJVkYGPTLub23tK9e_lwL5OCDVymZjY,719
17
19
  nucliadb_models/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
20
  nucliadb_models/resource.py,sha256=RzCos0QRgSMkaV-p7EoceSmt7UTzt9G9be5BKF-iGrQ,9021
19
- nucliadb_models/search.py,sha256=2pmXfnVkmWqs0WAlJC0XlMk3sqnhlpJx1AbtTQ_BjN8,92190
21
+ nucliadb_models/search.py,sha256=_vn3pDXcK4iwiCfim3BtlD5EaQAeXoxl2IfNDsrKesA,97514
20
22
  nucliadb_models/security.py,sha256=opxaDLfvk3aU0sjesK0jGrYLx5h4YCwlKKN0moYs_ig,1150
21
23
  nucliadb_models/synonyms.py,sha256=afbaVqSQSxGLwi2PusVaLSRpkOtA5AZmWOKd1f4nl2E,690
22
24
  nucliadb_models/text.py,sha256=60bxZnOjRHnDdezR8VfR3AZsXTOwePFPs2BKB8wxBak,3414
@@ -24,7 +26,7 @@ nucliadb_models/trainset.py,sha256=BgUfgdClpwhk6UoOq5x6mbpOopgSmqg8he2bBzEzGqg,2
24
26
  nucliadb_models/utils.py,sha256=OnWaDwZGwja8Spd_gpryuUpAMGIMhh-DNDGpoUYyb-A,2460
25
27
  nucliadb_models/vectors.py,sha256=_Z157PojPIwoeF5LStO0gz8IwxKy2styHjhdBkLd_44,1329
26
28
  nucliadb_models/vectorsets.py,sha256=XAgg9DfdfLYpfLh9OepJ_KPH0_RqRQNpVZJr74UnNh0,788
27
- nucliadb_models/writer.py,sha256=diwrarp6DxjSUoRmdEljZb68z_ghNvpOgPUGZeKg328,8220
29
+ nucliadb_models/writer.py,sha256=6hBH32XLsXUqeNWVQlzZ6X-0dLFVgkbxaMSf_s2Cga4,8237
28
30
  nucliadb_models/agents/ingestion.py,sha256=W9cJ0dQT_1vPcjeJ4_Fjb8DylnhQ6qqZrY4v8x1RqUs,3093
29
31
  nucliadb_models/graph/__init__.py,sha256=X538kZPZnndmQeEtnzzPv1hYVGUTDe9U1O7UmAqqxXU,645
30
32
  nucliadb_models/graph/requests.py,sha256=ppQ7cOnybvrw1wGC7qDps-182PfmicWU6-4vLRfK16w,7169
@@ -32,7 +34,7 @@ nucliadb_models/graph/responses.py,sha256=Sdq8OgFAL1YT-1lJyLLrkqcScvj7YTEqAUwQ-k
32
34
  nucliadb_models/internal/__init__.py,sha256=zG33bUz1rHFPtvqQPWn4rDwBJt3FJodGuQYD45quiQg,583
33
35
  nucliadb_models/internal/predict.py,sha256=Pnx6MmLfK65eExe1XnVxqmSlvMwdowewwks9BOEoqMw,2029
34
36
  nucliadb_models/internal/shards.py,sha256=__y1OZtWGiNcPQEWfSFOj8yw458WGi7mM4vZe0K-L1Y,1691
35
- nucliadb_models-6.7.1.post4848.dist-info/METADATA,sha256=8-0X6Yq7JvVI431Yo0vmjn9T6wIsD9zoJcFy92bePHQ,794
36
- nucliadb_models-6.7.1.post4848.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
- nucliadb_models-6.7.1.post4848.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
38
- nucliadb_models-6.7.1.post4848.dist-info/RECORD,,
37
+ nucliadb_models-6.9.6.post5453.dist-info/METADATA,sha256=x-3GFIapOwe9kpigwte5HkdfaNqPk-iL7CuE_NM3PAE,745
38
+ nucliadb_models-6.9.6.post5453.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
39
+ nucliadb_models-6.9.6.post5453.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
40
+ nucliadb_models-6.9.6.post5453.dist-info/RECORD,,