nucliadb-models 6.9.7.post5583__py3-none-any.whl → 6.11.1.post5822__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. nucliadb_models/agents/ingestion.py +4 -4
  2. nucliadb_models/augment.py +100 -84
  3. nucliadb_models/common.py +56 -56
  4. nucliadb_models/configuration.py +8 -8
  5. nucliadb_models/content_types.py +13 -11
  6. nucliadb_models/conversation.py +25 -26
  7. nucliadb_models/entities.py +17 -18
  8. nucliadb_models/external_index_providers.py +1 -2
  9. nucliadb_models/extracted.py +82 -83
  10. nucliadb_models/file.py +10 -11
  11. nucliadb_models/filters.py +78 -74
  12. nucliadb_models/graph/requests.py +40 -48
  13. nucliadb_models/graph/responses.py +13 -1
  14. nucliadb_models/hydration.py +48 -50
  15. nucliadb_models/internal/predict.py +7 -9
  16. nucliadb_models/internal/shards.py +2 -3
  17. nucliadb_models/labels.py +18 -11
  18. nucliadb_models/link.py +18 -19
  19. nucliadb_models/metadata.py +66 -54
  20. nucliadb_models/notifications.py +3 -3
  21. nucliadb_models/processing.py +1 -2
  22. nucliadb_models/resource.py +85 -93
  23. nucliadb_models/retrieval.py +147 -0
  24. nucliadb_models/search.py +263 -275
  25. nucliadb_models/security.py +2 -3
  26. nucliadb_models/text.py +7 -8
  27. nucliadb_models/trainset.py +1 -2
  28. nucliadb_models/utils.py +2 -3
  29. nucliadb_models/vectors.py +2 -5
  30. nucliadb_models/writer.py +56 -57
  31. {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/METADATA +1 -1
  32. nucliadb_models-6.11.1.post5822.dist-info/RECORD +41 -0
  33. {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/WHEEL +1 -1
  34. nucliadb_models-6.9.7.post5583.dist-info/RECORD +0 -40
  35. {nucliadb_models-6.9.7.post5583.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,6 @@
14
14
  #
15
15
  from datetime import datetime
16
16
  from enum import Enum
17
- from typing import List, Optional
18
17
 
19
18
  from pydantic import BaseModel, Field, field_validator
20
19
 
@@ -37,10 +36,10 @@ class MessageFormat(Enum):
37
36
 
38
37
 
39
38
  class MessageContent(BaseModel):
40
- text: Optional[str] = None
41
- format: Optional[MessageFormat] = None
42
- attachments: Optional[List[CloudLink]] = None
43
- attachments_fields: List[FieldRef] = []
39
+ text: str | None = None
40
+ format: MessageFormat | None = None
41
+ attachments: list[CloudLink] | None = None
42
+ attachments_fields: list[FieldRef] = []
44
43
 
45
44
 
46
45
  class MessageType(Enum):
@@ -50,12 +49,12 @@ class MessageType(Enum):
50
49
 
51
50
 
52
51
  class Message(BaseModel):
53
- timestamp: Optional[DateTime] = None
54
- who: Optional[str] = None
55
- to: Optional[List[str]] = []
52
+ timestamp: DateTime | None = None
53
+ who: str | None = None
54
+ to: list[str] | None = []
56
55
  content: MessageContent
57
- ident: Optional[str] = None
58
- type_: Optional[MessageType] = Field(None, alias="type")
56
+ ident: str | None = None
57
+ type_: MessageType | None = Field(None, alias="type")
59
58
 
60
59
 
61
60
  class Conversation(BaseModel):
@@ -64,7 +63,7 @@ class Conversation(BaseModel):
64
63
  a conversation in the field level.
65
64
  """
66
65
 
67
- messages: Optional[List[Message]] = []
66
+ messages: list[Message] | None = []
68
67
 
69
68
 
70
69
  class FieldConversation(BaseModel):
@@ -75,11 +74,11 @@ class FieldConversation(BaseModel):
75
74
  This class is used mainly when exposing a conversation in the resource level
76
75
  """
77
76
 
78
- pages: Optional[int] = None
79
- size: Optional[int] = None
80
- total: Optional[int] = None
81
- extract_strategy: Optional[str] = None
82
- split_strategy: Optional[str] = None
77
+ pages: int | None = None
78
+ size: int | None = None
79
+ total: int | None = None
80
+ extract_strategy: str | None = None
81
+ split_strategy: str | None = None
83
82
 
84
83
 
85
84
  # Creation and update classes (Those used on writer endpoints)
@@ -88,18 +87,18 @@ class FieldConversation(BaseModel):
88
87
  class InputMessageContent(BaseModel):
89
88
  text: str = Field()
90
89
  format: MessageFormat = MessageFormat.PLAIN
91
- attachments: List[FileB64] = Field(default=[], max_length=50)
92
- attachments_fields: List[FieldRef] = Field(default=[], max_length=50)
90
+ attachments: list[FileB64] = Field(default=[], max_length=50)
91
+ attachments_fields: list[FieldRef] = Field(default=[], max_length=50)
93
92
 
94
93
 
95
94
  class InputMessage(BaseModel):
96
- timestamp: Optional[datetime] = Field(
95
+ timestamp: datetime | None = Field(
97
96
  default=None, description="Time at which the message was sent, in ISO 8601 format."
98
97
  )
99
- who: Optional[str] = Field(
98
+ who: str | None = Field(
100
99
  default=None, description="Sender of the message, e.g. 'user' or 'assistant'"
101
100
  )
102
- to: List[str] = Field(
101
+ to: list[str] = Field(
103
102
  default_factory=list,
104
103
  description="List of recipients of the message, e.g. ['assistant'] or ['user']",
105
104
  max_length=100,
@@ -109,7 +108,7 @@ class InputMessage(BaseModel):
109
108
  description="Unique identifier for the message. Must be unique within the conversation.",
110
109
  max_length=128,
111
110
  )
112
- type_: Optional[MessageType] = Field(None, alias="type")
111
+ type_: MessageType | None = Field(None, alias="type")
113
112
 
114
113
  @field_validator("ident", mode="after")
115
114
  @classmethod
@@ -126,22 +125,22 @@ class InputMessage(BaseModel):
126
125
 
127
126
 
128
127
  class InputConversationField(BaseModel):
129
- messages: List[InputMessage] = Field(
128
+ messages: list[InputMessage] = Field(
130
129
  default_factory=list,
131
130
  description="List of messages in the conversation field. Each message must have a unique ident. A single conversation can contain up to 51,200 messages. You can add up to 2,048 messages per request.",
132
131
  )
133
- extract_strategy: Optional[str] = Field(
132
+ extract_strategy: str | None = Field(
134
133
  default=None,
135
134
  description="Id of the Nuclia extract strategy used at processing time. If not set, the default strategy was used. Extract strategies are defined at the learning configuration api.",
136
135
  )
137
- split_strategy: Optional[str] = Field(
136
+ split_strategy: str | None = Field(
138
137
  default=None,
139
138
  description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
140
139
  )
141
140
 
142
141
  @field_validator("messages", mode="after")
143
142
  @classmethod
144
- def idents_are_unique(cls, value: List[InputMessage]) -> List[InputMessage]:
143
+ def idents_are_unique(cls, value: list[InputMessage]) -> list[InputMessage]:
145
144
  seen_idents = set()
146
145
  for message in value:
147
146
  if message.ident in seen_idents:
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  #
15
15
 
16
- from typing import Dict, List, Optional
17
16
 
18
17
  from pydantic import BaseModel, Field
19
18
 
@@ -21,50 +20,50 @@ from pydantic import BaseModel, Field
21
20
  class Entity(BaseModel):
22
21
  value: str
23
22
  merged: bool = False
24
- represents: List[str] = []
23
+ represents: list[str] = []
25
24
 
26
25
 
27
26
  class EntitiesGroupSummary(BaseModel):
28
- title: Optional[str] = Field(default=None, description="Title of the entities group")
29
- color: Optional[str] = Field(
27
+ title: str | None = Field(default=None, description="Title of the entities group")
28
+ color: str | None = Field(
30
29
  default=None,
31
30
  description="Color of the entities group. This is for display purposes only.",
32
31
  )
33
32
  custom: bool = Field(default=False, description="Denotes if it has been created by the user")
34
33
 
35
- entities: Dict[str, Entity] = Field(
34
+ entities: dict[str, Entity] = Field(
36
35
  default={},
37
36
  title="[Deprecated] Entities in the group",
38
- description="This field is deprecated and will be removed in future versions. It will always be empty. Use the /api/v1/kb/{kbid}/entitiesgroup/{group} endpoint to get the entities of a group.", # noqa: E501
37
+ description="This field is deprecated and will be removed in future versions. It will always be empty. Use the /api/v1/kb/{kbid}/entitiesgroup/{group} endpoint to get the entities of a group.",
39
38
  )
40
39
 
41
40
 
42
41
  class EntitiesGroup(BaseModel):
43
- title: Optional[str] = Field(default=None, description="Title of the entities group")
44
- color: Optional[str] = Field(
42
+ title: str | None = Field(default=None, description="Title of the entities group")
43
+ color: str | None = Field(
45
44
  default=None,
46
45
  description="Color of the entities group. This is for display purposes only.",
47
46
  )
48
47
  custom: bool = Field(default=False, description="Denotes if it has been created by the user")
49
- entities: Dict[str, Entity] = {}
48
+ entities: dict[str, Entity] = {}
50
49
 
51
50
 
52
51
  class KnowledgeBoxEntities(BaseModel):
53
52
  uuid: str
54
- groups: Dict[str, EntitiesGroupSummary] = {}
53
+ groups: dict[str, EntitiesGroupSummary] = {}
55
54
 
56
55
 
57
56
  class CreateEntitiesGroupPayload(BaseModel):
58
57
  group: str
59
- entities: Dict[str, Entity] = {}
60
- title: Optional[str] = None
61
- color: Optional[str] = None
58
+ entities: dict[str, Entity] = {}
59
+ title: str | None = None
60
+ color: str | None = None
62
61
 
63
62
 
64
63
  class UpdateEntitiesGroupPayload(BaseModel):
65
- title: Optional[str] = None
66
- color: Optional[str] = None
64
+ title: str | None = None
65
+ color: str | None = None
67
66
 
68
- add: Dict[str, Entity] = {}
69
- update: Dict[str, Entity] = {}
70
- delete: List[str] = []
67
+ add: dict[str, Entity] = {}
68
+ update: dict[str, Entity] = {}
69
+ delete: list[str] = []
@@ -14,7 +14,6 @@
14
14
  #
15
15
 
16
16
  from enum import Enum
17
- from typing import Union
18
17
 
19
18
  from pydantic import BaseModel
20
19
 
@@ -36,4 +35,4 @@ class DummyIndexProvider(ExternalIndexProviderBase):
36
35
  type: ExternalIndexProviderType = ExternalIndexProviderType.UNSET
37
36
 
38
37
 
39
- ExternalIndexProvider = Union[DummyIndexProvider,]
38
+ ExternalIndexProvider = DummyIndexProvider
@@ -13,9 +13,8 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  from datetime import datetime
16
- from typing import Dict, List, Optional
17
16
 
18
- from pydantic import BaseModel
17
+ from pydantic import BaseModel, Field
19
18
 
20
19
  from .common import (
21
20
  Classification,
@@ -27,27 +26,27 @@ from .metadata import Relation
27
26
 
28
27
 
29
28
  class ExtractedText(BaseModel):
30
- text: Optional[str] = None
31
- split_text: Optional[Dict[str, str]] = None
32
- deleted_splits: Optional[List[str]] = None
29
+ text: str | None = None
30
+ split_text: dict[str, str] | None = None
31
+ deleted_splits: list[str] | None = None
33
32
 
34
33
 
35
34
  class Vector(BaseModel):
36
- start: Optional[int] = None
37
- end: Optional[int] = None
38
- start_paragraph: Optional[int] = None
39
- end_paragraph: Optional[int] = None
40
- vector: Optional[List[float]] = None
35
+ start: int | None = None
36
+ end: int | None = None
37
+ start_paragraph: int | None = None
38
+ end_paragraph: int | None = None
39
+ vector: list[float] | None = None
41
40
 
42
41
 
43
42
  class Vectors(BaseModel):
44
- vectors: Optional[List[Vector]] = None
43
+ vectors: list[Vector] | None = None
45
44
 
46
45
 
47
46
  class VectorObject(BaseModel):
48
- vectors: Optional[Vectors] = None
49
- split_vectors: Optional[Dict[str, Vectors]] = None
50
- deleted_splits: Optional[List[str]] = None
47
+ vectors: Vectors | None = None
48
+ split_vectors: dict[str, Vectors] | None = None
49
+ deleted_splits: list[str] | None = None
51
50
 
52
51
 
53
52
  class Position(BaseModel):
@@ -56,14 +55,14 @@ class Position(BaseModel):
56
55
 
57
56
 
58
57
  class Positions(BaseModel):
59
- position: List[Position]
58
+ position: list[Position]
60
59
  entity: str
61
60
 
62
61
 
63
62
  class FieldEntity(BaseModel):
64
63
  text: str
65
64
  label: str
66
- positions: List[Position]
65
+ positions: list[Position]
67
66
 
68
67
 
69
68
  class FieldEntities(BaseModel):
@@ -71,91 +70,91 @@ class FieldEntities(BaseModel):
71
70
  Wrapper for the entities extracted from a field (required because protobuf doesn't support lists of lists)
72
71
  """
73
72
 
74
- entities: List[FieldEntity]
73
+ entities: list[FieldEntity]
75
74
 
76
75
 
77
76
  class FieldMetadata(BaseModel):
78
- links: List[str]
79
- paragraphs: List[Paragraph]
80
- ner: Dict[str, str] # TODO: Remove once processor doesn't use this anymore
81
- entities: Dict[str, FieldEntities]
82
- classifications: List[Classification]
83
- last_index: Optional[datetime] = None
84
- last_understanding: Optional[datetime] = None
85
- last_extract: Optional[datetime] = None
86
- last_summary: Optional[datetime] = None
87
- last_processing_start: Optional[datetime] = None
88
- thumbnail: Optional[CloudLink] = None
89
- language: Optional[str] = None
90
- summary: Optional[str] = None
91
- positions: Dict[str, Positions] # TODO: Remove once processor doesn't use this anymore
92
- relations: Optional[List[Relation]] = None
93
- mime_type: Optional[str] = None
77
+ links: list[str]
78
+ paragraphs: list[Paragraph]
79
+ ner: dict[str, str] # TODO: Remove once processor doesn't use this anymore
80
+ entities: dict[str, FieldEntities]
81
+ classifications: list[Classification]
82
+ last_index: datetime | None = None
83
+ last_understanding: datetime | None = None
84
+ last_extract: datetime | None = None
85
+ last_summary: datetime | None = None
86
+ last_processing_start: datetime | None = None
87
+ thumbnail: CloudLink | None = None
88
+ language: str | None = None
89
+ summary: str | None = None
90
+ positions: dict[str, Positions] # TODO: Remove once processor doesn't use this anymore
91
+ relations: list[Relation] | None = None
92
+ mime_type: str | None = None
94
93
 
95
94
 
96
95
  class FieldComputedMetadata(BaseModel):
97
96
  metadata: FieldMetadata
98
- split_metadata: Optional[Dict[str, FieldMetadata]] = None
99
- deleted_splits: Optional[List[str]] = None
97
+ split_metadata: dict[str, FieldMetadata] | None = None
98
+ deleted_splits: list[str] | None = None
100
99
 
101
100
 
102
101
  class Entity(BaseModel):
103
- token: Optional[str] = None
104
- root: Optional[str] = None
105
- type: Optional[str] = None
102
+ token: str | None = None
103
+ root: str | None = None
104
+ type: str | None = None
106
105
 
107
106
 
108
107
  class FieldLargeMetadata(BaseModel):
109
- entities: Optional[List[Entity]] = None
110
- tokens: Optional[Dict[str, int]] = None
108
+ entities: list[Entity] | None = None
109
+ tokens: dict[str, int] | None = None
111
110
 
112
111
 
113
112
  class LargeComputedMetadata(BaseModel):
114
- metadata: Optional[FieldLargeMetadata] = None
115
- split_metadata: Optional[Dict[str, FieldLargeMetadata]] = None
116
- deleted_splits: Optional[List[str]] = None
113
+ metadata: FieldLargeMetadata | None = None
114
+ split_metadata: dict[str, FieldLargeMetadata] | None = None
115
+ deleted_splits: list[str] | None = None
117
116
 
118
117
 
119
118
  class LinkExtractedData(BaseModel):
120
- date: Optional[datetime] = None
121
- language: Optional[str] = None
122
- title: Optional[str] = None
123
- metadata: Optional[Dict[str, str]] = None
124
- link_thumbnail: Optional[CloudLink] = None
125
- link_preview: Optional[CloudLink] = None
126
- field: Optional[str] = None
127
- link_image: Optional[CloudLink] = None
128
- description: Optional[str] = None
129
- type: Optional[str] = None
130
- embed: Optional[str] = None
131
- file_generated: Optional[Dict[str, CloudLink]] = None
119
+ date: datetime | None = None
120
+ language: str | None = None
121
+ title: str | None = None
122
+ metadata: dict[str, str] | None = None
123
+ link_thumbnail: CloudLink | None = None
124
+ link_preview: CloudLink | None = None
125
+ field: str | None = Field(default=None, title="Link Extracted Data Field")
126
+ link_image: CloudLink | None = None
127
+ description: str | None = None
128
+ type: str | None = None
129
+ embed: str | None = None
130
+ file_generated: dict[str, CloudLink] | None = None
132
131
 
133
132
 
134
133
  class NestedPosition(BaseModel):
135
- start: Optional[int] = None
136
- end: Optional[int] = None
137
- page: Optional[int] = None
134
+ start: int | None = None
135
+ end: int | None = None
136
+ page: int | None = Field(default=None, title="Position Page")
138
137
 
139
138
 
140
139
  class NestedListPosition(BaseModel):
141
- positions: List[NestedPosition]
140
+ positions: list[NestedPosition]
142
141
 
143
142
 
144
143
  class Row(BaseModel):
145
- cell: Optional[List[str]] = None
144
+ cell: list[str] | None = None
146
145
 
147
146
 
148
147
  class Sheet(BaseModel):
149
- rows: Optional[List[Row]] = None
148
+ rows: list[Row] | None = None
150
149
 
151
150
 
152
151
  class RowsPreview(BaseModel):
153
- sheets: Optional[Dict[str, Sheet]] = None
152
+ sheets: dict[str, Sheet] | None = None
154
153
 
155
154
 
156
155
  class PagePositions(BaseModel):
157
- start: Optional[int] = None
158
- end: Optional[int] = None
156
+ start: int | None = None
157
+ end: int | None = None
159
158
 
160
159
 
161
160
  class PageStructurePage(BaseModel):
@@ -174,32 +173,32 @@ class PageStructureToken(BaseModel):
174
173
 
175
174
  class PageStructure(BaseModel):
176
175
  page: PageStructurePage
177
- tokens: List[PageStructureToken]
176
+ tokens: list[PageStructureToken]
178
177
 
179
178
 
180
179
  class FilePages(BaseModel):
181
- pages: Optional[List[CloudLink]] = None
182
- positions: Optional[List[PagePositions]] = None
183
- structures: Optional[List[PageStructure]] = None
180
+ pages: list[CloudLink] | None = None
181
+ positions: list[PagePositions] | None = None
182
+ structures: list[PageStructure] | None = None
184
183
 
185
184
 
186
185
  class FileExtractedData(BaseModel):
187
- language: Optional[str] = None
188
- md5: Optional[str] = None
189
- metadata: Optional[Dict[str, str]] = None
190
- nested: Optional[Dict[str, str]] = None
191
- file_generated: Optional[Dict[str, CloudLink]] = None
192
- file_rows_previews: Optional[Dict[str, RowsPreview]] = None
193
- file_preview: Optional[CloudLink] = None
194
- file_pages_previews: Optional[FilePages] = None
195
- file_thumbnail: Optional[CloudLink] = None
196
- field: Optional[str] = None
197
- icon: Optional[str] = None
198
- nested_position: Optional[Dict[str, NestedPosition]] = None
199
- nested_list_position: Optional[Dict[str, NestedListPosition]] = None
186
+ language: str | None = None
187
+ md5: str | None = None
188
+ metadata: dict[str, str] | None = None
189
+ nested: dict[str, str] | None = None
190
+ file_generated: dict[str, CloudLink] | None = None
191
+ file_rows_previews: dict[str, RowsPreview] | None = None
192
+ file_preview: CloudLink | None = None
193
+ file_pages_previews: FilePages | None = None
194
+ file_thumbnail: CloudLink | None = None
195
+ field: str | None = None
196
+ icon: str | None = None
197
+ nested_position: dict[str, NestedPosition] | None = None
198
+ nested_list_position: dict[str, NestedListPosition] | None = None
200
199
 
201
200
 
202
201
  class FieldQuestionAnswers(BaseModel):
203
202
  question_answers: QuestionAnswers
204
- split_question_answers: Optional[Dict[str, QuestionAnswers]] = None
205
- deleted_splits: Optional[List[str]] = None
203
+ split_question_answers: dict[str, QuestionAnswers] | None = None
204
+ deleted_splits: list[str] | None = None
nucliadb_models/file.py CHANGED
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  #
15
15
  from datetime import datetime
16
- from typing import Optional
17
16
 
18
17
  from pydantic import BaseModel, Field
19
18
 
@@ -26,16 +25,16 @@ from nucliadb_models.common import CloudLink, File
26
25
 
27
26
 
28
27
  class FieldFile(BaseModel):
29
- added: Optional[datetime] = None
30
- file: Optional[CloudLink] = None
31
- language: Optional[str] = None
32
- password: Optional[str] = None
28
+ added: datetime | None = None
29
+ file: CloudLink | None = None
30
+ language: str | None = None
31
+ password: str | None = None
33
32
  external: bool = False
34
- extract_strategy: Optional[str] = Field(
33
+ extract_strategy: str | None = Field(
35
34
  default=None,
36
35
  description="Id of the Nuclia extract strategy used at processing time. If not set, the default strategy was used. Extract strategies are defined at the learning configuration api.",
37
36
  )
38
- split_strategy: Optional[str] = Field(
37
+ split_strategy: str | None = Field(
39
38
  default=None,
40
39
  description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
41
40
  )
@@ -45,14 +44,14 @@ class FieldFile(BaseModel):
45
44
 
46
45
 
47
46
  class FileField(BaseModel):
48
- language: Optional[str] = None
49
- password: Optional[str] = None
47
+ language: str | None = None
48
+ password: str | None = None
50
49
  file: File
51
- extract_strategy: Optional[str] = Field(
50
+ extract_strategy: str | None = Field(
52
51
  default=None,
53
52
  description="Id of the Nuclia extract strategy to use at processing time. If not set, the default strategy will be used. Extract strategies are defined at the learning configuration api.",
54
53
  )
55
- split_strategy: Optional[str] = Field(
54
+ split_strategy: str | None = Field(
56
55
  default=None,
57
56
  description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
58
57
  )