nucliadb-models 6.9.5.post5452__py3-none-any.whl → 6.10.0.post5694__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb-models might be problematic. Click here for more details.

Files changed (34) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +297 -23
  3. nucliadb_models/common.py +57 -57
  4. nucliadb_models/configuration.py +8 -8
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +25 -26
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +1 -2
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +78 -74
  12. nucliadb_models/graph/requests.py +38 -47
  13. nucliadb_models/hydration.py +48 -50
  14. nucliadb_models/internal/predict.py +7 -9
  15. nucliadb_models/internal/shards.py +2 -3
  16. nucliadb_models/labels.py +18 -11
  17. nucliadb_models/link.py +18 -19
  18. nucliadb_models/metadata.py +65 -53
  19. nucliadb_models/notifications.py +3 -3
  20. nucliadb_models/processing.py +1 -2
  21. nucliadb_models/resource.py +85 -102
  22. nucliadb_models/retrieval.py +147 -0
  23. nucliadb_models/search.py +266 -276
  24. nucliadb_models/security.py +2 -3
  25. nucliadb_models/text.py +7 -8
  26. nucliadb_models/trainset.py +1 -2
  27. nucliadb_models/utils.py +2 -3
  28. nucliadb_models/vectors.py +2 -5
  29. nucliadb_models/writer.py +56 -57
  30. {nucliadb_models-6.9.5.post5452.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/METADATA +1 -1
  31. nucliadb_models-6.10.0.post5694.dist-info/RECORD +41 -0
  32. nucliadb_models-6.9.5.post5452.dist-info/RECORD +0 -40
  33. {nucliadb_models-6.9.5.post5452.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/WHEEL +0 -0
  34. {nucliadb_models-6.9.5.post5452.dist-info → nucliadb_models-6.10.0.post5694.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
- from typing import Annotated, Optional, Union
15
+ from typing import Annotated
16
16
 
17
17
  from pydantic import BaseModel, Field, StringConstraints
18
18
 
@@ -105,23 +105,23 @@ class GenericFieldHydration(BaseModel, extra="forbid"):
105
105
 
106
106
 
107
107
  class FieldHydration(BaseModel, extra="forbid"):
108
- text: Optional[TextFieldHydration] = Field(
108
+ text: TextFieldHydration | None = Field(
109
109
  default_factory=TextFieldHydration,
110
110
  description="Text fields hydration options",
111
111
  )
112
- file: Optional[FileFieldHydration] = Field(
112
+ file: FileFieldHydration | None = Field(
113
113
  default_factory=FileFieldHydration,
114
114
  description="File fields hydration options",
115
115
  )
116
- link: Optional[LinkFieldHydration] = Field(
116
+ link: LinkFieldHydration | None = Field(
117
117
  default_factory=LinkFieldHydration,
118
118
  description="Link fields hydration options",
119
119
  )
120
- conversation: Optional[ConversationFieldHydration] = Field(
120
+ conversation: ConversationFieldHydration | None = Field(
121
121
  default_factory=ConversationFieldHydration,
122
122
  description="Conversation fields hydration options",
123
123
  )
124
- generic: Optional[GenericFieldHydration] = Field(
124
+ generic: GenericFieldHydration | None = Field(
125
125
  default_factory=GenericFieldHydration,
126
126
  description="Generic fields hydration options",
127
127
  )
@@ -141,7 +141,7 @@ class NeighbourParagraphHydration(BaseModel, extra="forbid"):
141
141
 
142
142
 
143
143
  class RelatedParagraphHydration(BaseModel, extra="forbid"):
144
- neighbours: Optional[NeighbourParagraphHydration] = Field(
144
+ neighbours: NeighbourParagraphHydration | None = Field(
145
145
  default=None,
146
146
  description="Hydrate extra paragraphs that surround the original one",
147
147
  )
@@ -205,11 +205,11 @@ class ParagraphHydration(BaseModel, extra="forbid"):
205
205
  default=True,
206
206
  description="Hydrate paragraph text",
207
207
  )
208
- image: Optional[ImageParagraphHydration] = Field(
208
+ image: ImageParagraphHydration | None = Field(
209
209
  default=None,
210
210
  description="Hydrate options for paragraphs extracted from images (using OCR, inception...)",
211
211
  )
212
- table: Optional[TableParagraphHydration] = Field(
212
+ table: TableParagraphHydration | None = Field(
213
213
  default=None,
214
214
  description="Hydrate options for paragraphs extracted from tables",
215
215
  )
@@ -217,19 +217,19 @@ class ParagraphHydration(BaseModel, extra="forbid"):
217
217
  # TODO: at some point, we should add hydration options for paragraphs from
218
218
  # audio and video
219
219
 
220
- page: Optional[ParagraphPageHydration] = Field(
220
+ page: ParagraphPageHydration | None = Field(
221
221
  default=None,
222
222
  description="Hydrte options for paragraphs within a page. This applies to paragraphs in fields with pages",
223
223
  )
224
224
 
225
- related: Optional[RelatedParagraphHydration] = Field(
225
+ related: RelatedParagraphHydration | None = Field(
226
226
  default=None,
227
227
  description="Hydration options for related paragraphs. For example, neighbours or sibling paragraphs",
228
228
  )
229
229
 
230
230
 
231
231
  class Hydration(BaseModel, extra="forbid"):
232
- resource: Optional[ResourceHydration] = Field(
232
+ resource: ResourceHydration | None = Field(
233
233
  default_factory=ResourceHydration,
234
234
  description="Resource hydration options",
235
235
  )
@@ -270,22 +270,22 @@ class HydratedResource(BaseModel, extra="forbid"):
270
270
  id: str = Field(description="Unique resource id")
271
271
  slug: str = Field(description="Resource slug")
272
272
 
273
- title: Optional[str] = None
274
- summary: Optional[str] = None
273
+ title: str | None = None
274
+ summary: str | None = None
275
275
 
276
- origin: Optional[Origin] = None
276
+ origin: Origin | None = None
277
277
 
278
- security: Optional[ResourceSecurity] = None
278
+ security: ResourceSecurity | None = None
279
279
 
280
280
  # TODO: add resource labels to hydrated resources
281
281
 
282
282
 
283
283
  class FieldExtractedData(BaseModel, extra="forbid"):
284
- text: Optional[str] = None
284
+ text: str | None = None
285
285
 
286
286
 
287
287
  class SplitFieldExtractedData(BaseModel, extra="forbid"):
288
- texts: Optional[dict[str, str]] = None
288
+ texts: dict[str, str] | None = None
289
289
 
290
290
 
291
291
  class HydratedTextField(BaseModel, extra="forbid"):
@@ -293,8 +293,8 @@ class HydratedTextField(BaseModel, extra="forbid"):
293
293
  resource: str = Field("Field resource id")
294
294
  field_type: FieldTypeName = FieldTypeName.TEXT
295
295
 
296
- value: Optional[FieldText] = None
297
- extracted: Optional[FieldExtractedData] = None
296
+ value: FieldText | None = None
297
+ extracted: FieldExtractedData | None = None
298
298
 
299
299
 
300
300
  class HydratedFileField(BaseModel, extra="forbid"):
@@ -302,10 +302,10 @@ class HydratedFileField(BaseModel, extra="forbid"):
302
302
  resource: str = Field("Field resource id")
303
303
  field_type: FieldTypeName = FieldTypeName.FILE
304
304
 
305
- value: Optional[FieldFile] = None
306
- extracted: Optional[FieldExtractedData] = None
305
+ value: FieldFile | None = None
306
+ extracted: FieldExtractedData | None = None
307
307
 
308
- previews: Optional[dict[str, Image]] = Field(
308
+ previews: dict[str, Image] | None = Field(
309
309
  default=None,
310
310
  title="Previews of specific parts of the field",
311
311
  description=(
@@ -323,8 +323,8 @@ class HydratedLinkField(BaseModel, extra="forbid"):
323
323
  resource: str = Field("Field resource id")
324
324
  field_type: FieldTypeName = FieldTypeName.LINK
325
325
 
326
- value: Optional[FieldLink] = None
327
- extracted: Optional[FieldExtractedData] = None
326
+ value: FieldLink | None = None
327
+ extracted: FieldExtractedData | None = None
328
328
 
329
329
 
330
330
  class HydratedConversationField(BaseModel, extra="forbid"):
@@ -332,8 +332,8 @@ class HydratedConversationField(BaseModel, extra="forbid"):
332
332
  resource: str = Field("Field resource id")
333
333
  field_type: FieldTypeName = FieldTypeName.CONVERSATION
334
334
 
335
- value: Optional[FieldConversation] = None
336
- extracted: Optional[FieldExtractedData] = None
335
+ value: FieldConversation | None = None
336
+ extracted: FieldExtractedData | None = None
337
337
 
338
338
 
339
339
  class HydratedGenericField(BaseModel, extra="forbid"):
@@ -341,24 +341,24 @@ class HydratedGenericField(BaseModel, extra="forbid"):
341
341
  resource: str = Field("Field resource id")
342
342
  field_type: FieldTypeName = FieldTypeName.TEXT
343
343
 
344
- value: Optional[str] = None
345
- extracted: Optional[FieldExtractedData] = None
344
+ value: str | None = None
345
+ extracted: FieldExtractedData | None = None
346
346
 
347
347
 
348
348
  class RelatedNeighbourParagraphRefs(BaseModel, extra="forbid"):
349
- before: Optional[list[str]] = None
350
- after: Optional[list[str]] = None
349
+ before: list[str] | None = None
350
+ after: list[str] | None = None
351
351
 
352
352
 
353
353
  class RelatedParagraphRefs(BaseModel, extra="forbid"):
354
- neighbours: Optional[RelatedNeighbourParagraphRefs] = None
355
- parents: Optional[list[str]] = None
356
- siblings: Optional[list[str]] = None
357
- replacements: Optional[list[str]] = None
354
+ neighbours: RelatedNeighbourParagraphRefs | None = None
355
+ parents: list[str] | None = None
356
+ siblings: list[str] | None = None
357
+ replacements: list[str] | None = None
358
358
 
359
359
 
360
360
  class HydratedParagraphImage(BaseModel, extra="forbid"):
361
- source_image: Optional[Image] = Field(
361
+ source_image: Image | None = Field(
362
362
  default=None,
363
363
  description=(
364
364
  "Source image for this paragraph. This only applies to paragraphs "
@@ -369,7 +369,7 @@ class HydratedParagraphImage(BaseModel, extra="forbid"):
369
369
 
370
370
 
371
371
  class HydratedParagraphTable(BaseModel, extra="forbid"):
372
- page_preview_ref: Optional[str] = Field(
372
+ page_preview_ref: str | None = Field(
373
373
  default=None,
374
374
  description=(
375
375
  "Referento to the page preview for this paragraph. The actual "
@@ -381,7 +381,7 @@ class HydratedParagraphTable(BaseModel, extra="forbid"):
381
381
 
382
382
 
383
383
  class HydratedParagraphPage(BaseModel, extra="forbid"):
384
- page_preview_ref: Optional[str] = Field(
384
+ page_preview_ref: str | None = Field(
385
385
  default=None,
386
386
  description=(
387
387
  "Reference to the page preview for this paragraph. The actual "
@@ -398,28 +398,26 @@ class HydratedParagraph(BaseModel, extra="forbid"):
398
398
  field: str = Field(description="Paragraph field id")
399
399
  resource: str = Field(description="Paragraph resource id")
400
400
 
401
- text: Optional[str] = None
401
+ text: str | None = None
402
402
 
403
403
  # TODO: add labels to hydrated paragraphs
404
404
  # labels: Optional[list[str]] = None
405
405
 
406
- related: Optional[RelatedParagraphRefs] = None
406
+ related: RelatedParagraphRefs | None = None
407
407
 
408
- image: Optional[HydratedParagraphImage] = None
409
- table: Optional[HydratedParagraphTable] = None
410
- page: Optional[HydratedParagraphPage] = None
408
+ image: HydratedParagraphImage | None = None
409
+ table: HydratedParagraphTable | None = None
410
+ page: HydratedParagraphPage | None = None
411
411
 
412
412
 
413
413
  class Hydrated(BaseModel, extra="forbid"):
414
414
  resources: dict[str, HydratedResource]
415
415
  fields: dict[
416
416
  str,
417
- Union[
418
- HydratedTextField,
419
- HydratedFileField,
420
- HydratedLinkField,
421
- HydratedConversationField,
422
- HydratedGenericField,
423
- ],
417
+ HydratedTextField
418
+ | HydratedFileField
419
+ | HydratedLinkField
420
+ | HydratedConversationField
421
+ | HydratedGenericField,
424
422
  ]
425
423
  paragraphs: dict[str, HydratedParagraph]
@@ -19,13 +19,11 @@ Models for Predict API v1.
19
19
  ATENTION! Keep these models in sync with models on Predict API
20
20
  """
21
21
 
22
- from typing import List, Optional
23
-
24
22
  from pydantic import BaseModel, Field
25
23
 
26
24
 
27
25
  class SentenceSearch(BaseModel):
28
- vectors: dict[str, List[float]] = Field(
26
+ vectors: dict[str, list[float]] = Field(
29
27
  default_factory=dict,
30
28
  description="Sentence vectors for each semantic model",
31
29
  min_length=1,
@@ -45,14 +43,14 @@ class Ner(BaseModel):
45
43
 
46
44
 
47
45
  class TokenSearch(BaseModel):
48
- tokens: List[Ner] = []
46
+ tokens: list[Ner] = []
49
47
  time: float
50
48
  input_tokens: int = 0
51
49
 
52
50
 
53
51
  class QueryInfo(BaseModel):
54
- language: Optional[str]
55
- stop_words: List[str] = Field(default_factory=list)
52
+ language: str | None
53
+ stop_words: list[str] = Field(default_factory=list)
56
54
  semantic_thresholds: dict[str, float] = Field(
57
55
  default_factory=dict,
58
56
  description="Semantic threshold for each semantic model",
@@ -60,10 +58,10 @@ class QueryInfo(BaseModel):
60
58
  )
61
59
  visual_llm: bool
62
60
  max_context: int
63
- entities: Optional[TokenSearch]
64
- sentence: Optional[SentenceSearch]
61
+ entities: TokenSearch | None
62
+ sentence: SentenceSearch | None
65
63
  query: str
66
- rephrased_query: Optional[str] = None
64
+ rephrased_query: str | None = None
67
65
 
68
66
 
69
67
  class RerankModel(BaseModel):
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  from enum import Enum
16
- from typing import List, Optional
17
16
 
18
17
  from pydantic import BaseModel
19
18
 
@@ -58,9 +57,9 @@ class ShardReplica(BaseModel):
58
57
 
59
58
  class ShardObject(BaseModel):
60
59
  shard: str
61
- nidx_shard_id: Optional[str]
60
+ nidx_shard_id: str | None
62
61
 
63
62
 
64
63
  class KnowledgeboxShards(BaseModel):
65
64
  kbid: str
66
- shards: List[ShardObject]
65
+ shards: list[ShardObject]
nucliadb_models/labels.py CHANGED
@@ -14,9 +14,8 @@
14
14
  #
15
15
 
16
16
  from enum import Enum
17
- from typing import Dict, List, Optional
18
17
 
19
- from pydantic import BaseModel, model_validator
18
+ from pydantic import BaseModel, Field, model_validator
20
19
  from typing_extensions import Self
21
20
 
22
21
  BASE_LABELS: dict[str, set[str]] = {
@@ -96,18 +95,26 @@ class LabelSetKind(str, Enum):
96
95
 
97
96
 
98
97
  class Label(BaseModel):
99
- title: str
100
- related: Optional[str] = None
101
- text: Optional[str] = None
102
- uri: Optional[str] = None
98
+ title: str = Field(
99
+ description="Title of the label. This is the display name for the label shown in the UI and also used for searching."
100
+ )
101
+ related: str | None = None
102
+ text: str | None = None
103
+ uri: str | None = None
103
104
 
104
105
 
105
106
  class LabelSet(BaseModel):
106
- title: Optional[str] = None
107
- color: Optional[str] = "blue"
107
+ title: str | None = Field(
108
+ default=None,
109
+ description="Title of the labelset. It is a prettier display name for the labelset shown in the UI but it is not intended to be used for searching.",
110
+ )
111
+ color: str | None = "blue"
108
112
  multiple: bool = True
109
- kind: List[LabelSetKind] = []
110
- labels: List[Label] = []
113
+ kind: list[LabelSetKind] = []
114
+ labels: list[Label] = Field(
115
+ default_factory=list,
116
+ description="List of labels in the labelset. The titles of the labels must be unique within the labelset.",
117
+ )
111
118
 
112
119
  @model_validator(mode="after")
113
120
  def check_unique_labels(self) -> Self:
@@ -123,4 +130,4 @@ class LabelSet(BaseModel):
123
130
 
124
131
  class KnowledgeBoxLabels(BaseModel):
125
132
  uuid: str
126
- labelsets: Dict[str, LabelSet] = {}
133
+ labelsets: dict[str, LabelSet] = {}
nucliadb_models/link.py CHANGED
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  from datetime import datetime
16
- from typing import Dict, Optional
17
16
 
18
17
  from pydantic import BaseModel, Field
19
18
 
@@ -25,19 +24,19 @@ from pydantic import BaseModel, Field
25
24
 
26
25
 
27
26
  class FieldLink(BaseModel):
28
- added: Optional[datetime] = None
29
- headers: Optional[Dict[str, str]] = None
30
- cookies: Optional[Dict[str, str]] = None
31
- uri: Optional[str] = None
32
- language: Optional[str] = None
33
- localstorage: Optional[Dict[str, str]] = None
34
- css_selector: Optional[str] = None
35
- xpath: Optional[str] = None
36
- extract_strategy: Optional[str] = Field(
27
+ added: datetime | None = None
28
+ headers: dict[str, str] | None = None
29
+ cookies: dict[str, str] | None = None
30
+ uri: str | None = None
31
+ language: str | None = None
32
+ localstorage: dict[str, str] | None = None
33
+ css_selector: str | None = None
34
+ xpath: str | None = None
35
+ extract_strategy: str | None = Field(
37
36
  default=None,
38
37
  description="Id of the Nuclia extract strategy used at processing time. If not set, the default strategy was used. Extract strategies are defined at the learning configuration api.",
39
38
  )
40
- split_strategy: Optional[str] = Field(
39
+ split_strategy: str | None = Field(
41
40
  default=None,
42
41
  description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
43
42
  )
@@ -47,18 +46,18 @@ class FieldLink(BaseModel):
47
46
 
48
47
 
49
48
  class LinkField(BaseModel):
50
- headers: Optional[Dict[str, str]] = {}
51
- cookies: Optional[Dict[str, str]] = {}
49
+ headers: dict[str, str] | None = {}
50
+ cookies: dict[str, str] | None = {}
52
51
  uri: str
53
- language: Optional[str] = None
54
- localstorage: Optional[Dict[str, str]] = {}
55
- css_selector: Optional[str] = None
56
- xpath: Optional[str] = None
57
- extract_strategy: Optional[str] = Field(
52
+ language: str | None = None
53
+ localstorage: dict[str, str] | None = {}
54
+ css_selector: str | None = None
55
+ xpath: str | None = None
56
+ extract_strategy: str | None = Field(
58
57
  default=None,
59
58
  description="Id of the Nuclia extract strategy to use at processing time. If not set, the default strategy will be used. Extract strategies are defined at the learning configuration api.",
60
59
  )
61
- split_strategy: Optional[str] = Field(
60
+ split_strategy: str | None = Field(
62
61
  default=None,
63
62
  description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
64
63
  )
@@ -15,7 +15,7 @@
15
15
  import warnings
16
16
  from datetime import datetime
17
17
  from enum import Enum
18
- from typing import Any, Dict, List, Optional
18
+ from typing import Any
19
19
 
20
20
  from pydantic import BaseModel, Field, field_validator, model_validator
21
21
  from typing_extensions import Self
@@ -49,7 +49,7 @@ class RelationNodeType(str, Enum):
49
49
  class RelationEntity(BaseModel):
50
50
  value: str
51
51
  type: RelationNodeType
52
- group: Optional[str] = None
52
+ group: str | None = None
53
53
 
54
54
  @model_validator(mode="after")
55
55
  def check_relation_is_valid(self) -> Self:
@@ -60,20 +60,20 @@ class RelationEntity(BaseModel):
60
60
 
61
61
 
62
62
  class RelationMetadata(BaseModel):
63
- paragraph_id: Optional[str] = None
64
- source_start: Optional[int] = None
65
- source_end: Optional[int] = None
66
- to_start: Optional[int] = None
67
- to_end: Optional[int] = None
68
- data_augmentation_task_id: Optional[str] = None
63
+ paragraph_id: str | None = None
64
+ source_start: int | None = None
65
+ source_end: int | None = None
66
+ to_start: int | None = None
67
+ to_end: int | None = None
68
+ data_augmentation_task_id: str | None = None
69
69
 
70
70
 
71
71
  class Relation(BaseModel):
72
72
  relation: RelationType
73
- label: Optional[str] = None
74
- metadata: Optional[RelationMetadata] = None
73
+ label: str | None = None
74
+ metadata: RelationMetadata | None = None
75
75
 
76
- from_: Optional[RelationEntity] = Field(default=None, alias="from")
76
+ from_: RelationEntity | None = Field(default=None, alias="from")
77
77
  to: RelationEntity
78
78
 
79
79
  @model_validator(mode="after")
@@ -100,9 +100,9 @@ class Relation(BaseModel):
100
100
 
101
101
 
102
102
  class InputMetadata(BaseModel):
103
- metadata: Dict[str, str] = {}
104
- language: Optional[str] = None
105
- languages: Optional[List[str]] = Field(default=None, max_length=1024)
103
+ metadata: dict[str, str] = {}
104
+ language: str | None = None
105
+ languages: list[str] | None = Field(default=None, max_length=1024)
106
106
 
107
107
 
108
108
  class ResourceProcessingStatus(Enum):
@@ -120,7 +120,7 @@ class Metadata(InputMetadata):
120
120
 
121
121
  class FieldClassification(BaseModel):
122
122
  field: FieldID
123
- classifications: List[Classification] = []
123
+ classifications: list[Classification] = []
124
124
 
125
125
 
126
126
  class ComputedMetadata(BaseModel):
@@ -129,12 +129,12 @@ class ComputedMetadata(BaseModel):
129
129
  without having to load the whole computed metadata field.
130
130
  """
131
131
 
132
- field_classifications: List[FieldClassification] = []
132
+ field_classifications: list[FieldClassification] = []
133
133
 
134
134
 
135
135
  class UserMetadata(BaseModel):
136
- classifications: List[UserClassification] = []
137
- relations: List[Relation] = []
136
+ classifications: list[UserClassification] = []
137
+ relations: list[Relation] = []
138
138
 
139
139
 
140
140
  class TokenSplit(BaseModel):
@@ -154,7 +154,7 @@ class TokenSplit(BaseModel):
154
154
 
155
155
 
156
156
  class ParagraphAnnotation(BaseModel):
157
- classifications: List[UserClassification] = []
157
+ classifications: list[UserClassification] = []
158
158
  key: str
159
159
 
160
160
 
@@ -169,12 +169,12 @@ class VisualSelection(BaseModel):
169
169
  left: float
170
170
  right: float
171
171
  bottom: float
172
- token_ids: List[int]
172
+ token_ids: list[int]
173
173
 
174
174
 
175
175
  class PageSelections(BaseModel):
176
176
  page: int
177
- visual: List[VisualSelection]
177
+ visual: list[VisualSelection]
178
178
 
179
179
  def __init__(self, **data):
180
180
  warnings.warn(
@@ -190,57 +190,69 @@ class UserFieldMetadata(BaseModel):
190
190
  Field-level metadata set by the user via the rest api
191
191
  """
192
192
 
193
- paragraphs: List[ParagraphAnnotation] = []
194
- question_answers: List[QuestionAnswerAnnotation] = []
193
+ paragraphs: list[ParagraphAnnotation] = []
194
+ question_answers: list[QuestionAnswerAnnotation] = []
195
195
  field: FieldID
196
196
 
197
197
 
198
198
  class Basic(BaseModel):
199
- icon: Optional[str] = None
200
- title: Optional[str] = None
201
- summary: Optional[str] = None
202
- thumbnail: Optional[str] = None
203
- created: Optional[datetime] = None
204
- modified: Optional[datetime] = None
205
- metadata: Optional[Metadata] = None
206
- usermetadata: Optional[UserMetadata] = None
207
- fieldmetadata: Optional[List[UserFieldMetadata]] = None
208
- computedmetadata: Optional[ComputedMetadata] = None
209
- uuid: Optional[str] = None
210
- last_seqid: Optional[int] = None
211
- last_account_seq: Optional[int] = None
199
+ icon: str | None = None
200
+ title: str | None = None
201
+ summary: str | None = None
202
+ thumbnail: str | None = None
203
+ created: datetime | None = None
204
+ modified: datetime | None = None
205
+ metadata: Metadata | None = None
206
+ usermetadata: UserMetadata | None = None
207
+ fieldmetadata: list[UserFieldMetadata] | None = None
208
+ computedmetadata: ComputedMetadata | None = None
209
+ uuid: str | None = None
210
+ last_seqid: int | None = None
211
+ last_account_seq: int | None = None
212
+
213
+
214
+ class SyncMetadata(BaseModel):
215
+ file_id: str = Field(description="Identifier of the file in the origin cloud storage system")
216
+ auth_provider: str = Field(
217
+ description="Authentication provider used to access the origin cloud storage system"
218
+ )
212
219
 
213
220
 
214
221
  class InputOrigin(BaseModel):
215
- source_id: Optional[str] = None
216
- url: Optional[str] = None
217
- created: Optional[DateTime] = Field(
222
+ source_id: str | None = None
223
+ url: str | None = None
224
+ created: DateTime | None = Field(
218
225
  default=None,
219
226
  description="Creation date of the resource at the origin system. This can be later used for date range filtering on search endpoints. Have a look at the advanced search documentation page: https://docs.nuclia.dev/docs/rag/advanced/search/#date-filtering",
220
227
  )
221
- modified: Optional[DateTime] = Field(
228
+ modified: DateTime | None = Field(
222
229
  default=None,
223
230
  description="Modification date of the resource at the origin system. This can be later used for date range filtering on search endpoints. Have a look at the advanced search documentation page: https://docs.nuclia.dev/docs/rag/advanced/search/#date-filtering",
224
231
  )
225
- metadata: Dict[str, str] = Field(
232
+ metadata: dict[str, str] = Field(
226
233
  default={},
227
234
  title="Metadata",
228
235
  description="Generic metadata from the resource at the origin system. It can later be used for filtering on search endpoints with '/origin.metadata/{key}/{value}'",
229
236
  )
230
- tags: List[str] = Field(
237
+ tags: list[str] = Field(
231
238
  default=[],
232
239
  title="Tags",
233
240
  description="Resource tags about the origin system. It can later be used for filtering on search endpoints with '/origin.tags/{tag}'",
234
241
  max_length=300,
235
242
  )
236
- collaborators: List[str] = Field(default=[], max_length=100)
237
- filename: Optional[str] = None
238
- related: List[str] = Field(default=[], max_length=100)
239
- path: Optional[str] = Field(
243
+ collaborators: list[str] = Field(default=[], max_length=100)
244
+ filename: str | None = None
245
+ related: list[str] = Field(default=[], max_length=100)
246
+ path: str | None = Field(
240
247
  default=None,
241
248
  description="Path of the original resource. Typically used to store folder structure information of the resource at the origin system. It can be later used for filtering on search endpoints with '/origin.path/{path}'",
242
249
  max_length=2048,
243
250
  )
251
+ sync_metadata: SyncMetadata | None = Field(
252
+ default=None,
253
+ title="Sync Metadata",
254
+ description="Metadata related to the resource from the origin system fetched by the Progress Agentic RAG's Cloud Storage Sync service.",
255
+ )
244
256
 
245
257
  @field_validator("tags")
246
258
  def validate_tag_length(cls, tags):
@@ -253,10 +265,10 @@ class InputOrigin(BaseModel):
253
265
  class Origin(InputOrigin):
254
266
  # Created and modified are redefined to
255
267
  # use native datetime objects and skip validation
256
- created: Optional[datetime] = None
257
- modified: Optional[datetime] = None
268
+ created: datetime | None = None
269
+ modified: datetime | None = None
258
270
 
259
- tags: List[str] = Field(
271
+ tags: list[str] = Field(
260
272
  default=[],
261
273
  title="Tags",
262
274
  description="Resource tags about the origin system. It can later be used for filtering on search endpoints with '/origin.tags/{tag}'",
@@ -268,16 +280,16 @@ class Origin(InputOrigin):
268
280
  API = "API"
269
281
  PYSDK = "PYSDK"
270
282
 
271
- source: Optional[Source] = Source.API
283
+ source: Source | None = Source.API
272
284
 
273
285
 
274
286
  class Extra(BaseModel):
275
- metadata: Dict[Any, Any] = Field(
287
+ metadata: dict[Any, Any] = Field(
276
288
  ...,
277
289
  title="Metadata",
278
- description="Arbitrary JSON metadata provided by the user that is not meant to be searchable, but can be serialized on results.", # noqa
290
+ description="Arbitrary JSON metadata provided by the user that is not meant to be searchable, but can be serialized on results.",
279
291
  )
280
292
 
281
293
 
282
294
  class Relations(BaseModel):
283
- relations: Optional[List[Relation]] = None
295
+ relations: list[Relation] | None = None