nucliadb-models 6.9.6.post5453__py3-none-any.whl → 6.11.1.post5822__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb_models/agents/ingestion.py +4 -4
- nucliadb_models/augment.py +294 -24
- nucliadb_models/common.py +57 -57
- nucliadb_models/configuration.py +8 -8
- nucliadb_models/content_types.py +13 -11
- nucliadb_models/conversation.py +25 -26
- nucliadb_models/entities.py +17 -18
- nucliadb_models/external_index_providers.py +1 -2
- nucliadb_models/extracted.py +82 -83
- nucliadb_models/file.py +10 -11
- nucliadb_models/filters.py +79 -75
- nucliadb_models/graph/requests.py +40 -48
- nucliadb_models/graph/responses.py +13 -1
- nucliadb_models/hydration.py +48 -50
- nucliadb_models/internal/predict.py +7 -9
- nucliadb_models/internal/shards.py +2 -3
- nucliadb_models/labels.py +18 -11
- nucliadb_models/link.py +18 -19
- nucliadb_models/metadata.py +66 -54
- nucliadb_models/notifications.py +3 -3
- nucliadb_models/processing.py +1 -2
- nucliadb_models/resource.py +85 -102
- nucliadb_models/retrieval.py +147 -0
- nucliadb_models/search.py +300 -276
- nucliadb_models/security.py +2 -3
- nucliadb_models/text.py +7 -8
- nucliadb_models/trainset.py +1 -2
- nucliadb_models/utils.py +2 -3
- nucliadb_models/vectors.py +2 -5
- nucliadb_models/writer.py +56 -57
- {nucliadb_models-6.9.6.post5453.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/METADATA +1 -1
- nucliadb_models-6.11.1.post5822.dist-info/RECORD +41 -0
- {nucliadb_models-6.9.6.post5453.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/WHEEL +1 -1
- nucliadb_models-6.9.6.post5453.dist-info/RECORD +0 -40
- {nucliadb_models-6.9.6.post5453.dist-info → nucliadb_models-6.11.1.post5822.dist-info}/top_level.txt +0 -0
nucliadb_models/content_types.py
CHANGED
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
import mimetypes
|
|
17
|
-
from typing import Optional
|
|
18
17
|
|
|
19
18
|
GENERIC_MIME_TYPE = "application/generic"
|
|
20
19
|
|
|
@@ -26,7 +25,9 @@ NUCLIA_CUSTOM_CONTENT_TYPES = {
|
|
|
26
25
|
|
|
27
26
|
EXTRA_VALID_CONTENT_TYPES = {
|
|
28
27
|
"application/font-woff",
|
|
28
|
+
"application/javascript",
|
|
29
29
|
"application/mp4",
|
|
30
|
+
"application/rtf",
|
|
30
31
|
"application/toml",
|
|
31
32
|
"application/vnd.jgraph.mxfile",
|
|
32
33
|
"application/vnd.ms-excel.sheet.macroenabled.12",
|
|
@@ -38,6 +39,7 @@ EXTRA_VALID_CONTENT_TYPES = {
|
|
|
38
39
|
"application/x-git",
|
|
39
40
|
"application/x-gzip",
|
|
40
41
|
"application/x-iwork-pages-sffpages",
|
|
42
|
+
"application/x-javascript",
|
|
41
43
|
"application/x-mach-binary",
|
|
42
44
|
"application/x-mobipocket-ebook",
|
|
43
45
|
"application/x-ms-shortcut",
|
|
@@ -46,10 +48,15 @@ EXTRA_VALID_CONTENT_TYPES = {
|
|
|
46
48
|
"application/x-openscad",
|
|
47
49
|
"application/x-sql",
|
|
48
50
|
"application/x-zip-compressed",
|
|
51
|
+
"application/x-zip",
|
|
49
52
|
"application/zstd",
|
|
53
|
+
"audio/m4a",
|
|
50
54
|
"audio/vnd.dlna.adts",
|
|
51
55
|
"audio/wav",
|
|
52
56
|
"audio/x-m4a",
|
|
57
|
+
"image/svg+xml",
|
|
58
|
+
"image/tif",
|
|
59
|
+
"image/x-ico",
|
|
53
60
|
"model/stl",
|
|
54
61
|
"multipart/form-data",
|
|
55
62
|
"text/jsx",
|
|
@@ -58,26 +65,21 @@ EXTRA_VALID_CONTENT_TYPES = {
|
|
|
58
65
|
"text/rtf",
|
|
59
66
|
"text/x-c++",
|
|
60
67
|
"text/x-java-source",
|
|
68
|
+
"text/x-javascript",
|
|
61
69
|
"text/x-log",
|
|
62
70
|
"text/x-python-script",
|
|
63
71
|
"text/x-ruby-script",
|
|
64
72
|
"text/yaml",
|
|
65
|
-
"video/
|
|
66
|
-
"video/YouTube",
|
|
67
|
-
"image/tif",
|
|
73
|
+
"video/mkv",
|
|
68
74
|
"video/qt",
|
|
69
75
|
"video/webp",
|
|
70
|
-
"
|
|
71
|
-
"application/x-zip",
|
|
72
|
-
"video/mkv",
|
|
73
|
-
"image/x-ico",
|
|
74
|
-
"audio/m4a",
|
|
75
|
-
"image/svg+xml",
|
|
76
|
+
"video/x-m4v",
|
|
76
77
|
"video/x-msvideo",
|
|
78
|
+
"video/YouTube",
|
|
77
79
|
} | NUCLIA_CUSTOM_CONTENT_TYPES
|
|
78
80
|
|
|
79
81
|
|
|
80
|
-
def guess(filename: str) ->
|
|
82
|
+
def guess(filename: str) -> str | None:
|
|
81
83
|
"""
|
|
82
84
|
Guess the content type of a file based on its filename.
|
|
83
85
|
Returns None if the content type could not be guessed.
|
nucliadb_models/conversation.py
CHANGED
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
#
|
|
15
15
|
from datetime import datetime
|
|
16
16
|
from enum import Enum
|
|
17
|
-
from typing import List, Optional
|
|
18
17
|
|
|
19
18
|
from pydantic import BaseModel, Field, field_validator
|
|
20
19
|
|
|
@@ -37,10 +36,10 @@ class MessageFormat(Enum):
|
|
|
37
36
|
|
|
38
37
|
|
|
39
38
|
class MessageContent(BaseModel):
|
|
40
|
-
text:
|
|
41
|
-
format:
|
|
42
|
-
attachments:
|
|
43
|
-
attachments_fields:
|
|
39
|
+
text: str | None = None
|
|
40
|
+
format: MessageFormat | None = None
|
|
41
|
+
attachments: list[CloudLink] | None = None
|
|
42
|
+
attachments_fields: list[FieldRef] = []
|
|
44
43
|
|
|
45
44
|
|
|
46
45
|
class MessageType(Enum):
|
|
@@ -50,12 +49,12 @@ class MessageType(Enum):
|
|
|
50
49
|
|
|
51
50
|
|
|
52
51
|
class Message(BaseModel):
|
|
53
|
-
timestamp:
|
|
54
|
-
who:
|
|
55
|
-
to:
|
|
52
|
+
timestamp: DateTime | None = None
|
|
53
|
+
who: str | None = None
|
|
54
|
+
to: list[str] | None = []
|
|
56
55
|
content: MessageContent
|
|
57
|
-
ident:
|
|
58
|
-
type_:
|
|
56
|
+
ident: str | None = None
|
|
57
|
+
type_: MessageType | None = Field(None, alias="type")
|
|
59
58
|
|
|
60
59
|
|
|
61
60
|
class Conversation(BaseModel):
|
|
@@ -64,7 +63,7 @@ class Conversation(BaseModel):
|
|
|
64
63
|
a conversation in the field level.
|
|
65
64
|
"""
|
|
66
65
|
|
|
67
|
-
messages:
|
|
66
|
+
messages: list[Message] | None = []
|
|
68
67
|
|
|
69
68
|
|
|
70
69
|
class FieldConversation(BaseModel):
|
|
@@ -75,11 +74,11 @@ class FieldConversation(BaseModel):
|
|
|
75
74
|
This class is used mainly when exposing a conversation in the resource level
|
|
76
75
|
"""
|
|
77
76
|
|
|
78
|
-
pages:
|
|
79
|
-
size:
|
|
80
|
-
total:
|
|
81
|
-
extract_strategy:
|
|
82
|
-
split_strategy:
|
|
77
|
+
pages: int | None = None
|
|
78
|
+
size: int | None = None
|
|
79
|
+
total: int | None = None
|
|
80
|
+
extract_strategy: str | None = None
|
|
81
|
+
split_strategy: str | None = None
|
|
83
82
|
|
|
84
83
|
|
|
85
84
|
# Creation and update classes (Those used on writer endpoints)
|
|
@@ -88,18 +87,18 @@ class FieldConversation(BaseModel):
|
|
|
88
87
|
class InputMessageContent(BaseModel):
|
|
89
88
|
text: str = Field()
|
|
90
89
|
format: MessageFormat = MessageFormat.PLAIN
|
|
91
|
-
attachments:
|
|
92
|
-
attachments_fields:
|
|
90
|
+
attachments: list[FileB64] = Field(default=[], max_length=50)
|
|
91
|
+
attachments_fields: list[FieldRef] = Field(default=[], max_length=50)
|
|
93
92
|
|
|
94
93
|
|
|
95
94
|
class InputMessage(BaseModel):
|
|
96
|
-
timestamp:
|
|
95
|
+
timestamp: datetime | None = Field(
|
|
97
96
|
default=None, description="Time at which the message was sent, in ISO 8601 format."
|
|
98
97
|
)
|
|
99
|
-
who:
|
|
98
|
+
who: str | None = Field(
|
|
100
99
|
default=None, description="Sender of the message, e.g. 'user' or 'assistant'"
|
|
101
100
|
)
|
|
102
|
-
to:
|
|
101
|
+
to: list[str] = Field(
|
|
103
102
|
default_factory=list,
|
|
104
103
|
description="List of recipients of the message, e.g. ['assistant'] or ['user']",
|
|
105
104
|
max_length=100,
|
|
@@ -109,7 +108,7 @@ class InputMessage(BaseModel):
|
|
|
109
108
|
description="Unique identifier for the message. Must be unique within the conversation.",
|
|
110
109
|
max_length=128,
|
|
111
110
|
)
|
|
112
|
-
type_:
|
|
111
|
+
type_: MessageType | None = Field(None, alias="type")
|
|
113
112
|
|
|
114
113
|
@field_validator("ident", mode="after")
|
|
115
114
|
@classmethod
|
|
@@ -126,22 +125,22 @@ class InputMessage(BaseModel):
|
|
|
126
125
|
|
|
127
126
|
|
|
128
127
|
class InputConversationField(BaseModel):
|
|
129
|
-
messages:
|
|
128
|
+
messages: list[InputMessage] = Field(
|
|
130
129
|
default_factory=list,
|
|
131
130
|
description="List of messages in the conversation field. Each message must have a unique ident. A single conversation can contain up to 51,200 messages. You can add up to 2,048 messages per request.",
|
|
132
131
|
)
|
|
133
|
-
extract_strategy:
|
|
132
|
+
extract_strategy: str | None = Field(
|
|
134
133
|
default=None,
|
|
135
134
|
description="Id of the Nuclia extract strategy used at processing time. If not set, the default strategy was used. Extract strategies are defined at the learning configuration api.",
|
|
136
135
|
)
|
|
137
|
-
split_strategy:
|
|
136
|
+
split_strategy: str | None = Field(
|
|
138
137
|
default=None,
|
|
139
138
|
description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
|
|
140
139
|
)
|
|
141
140
|
|
|
142
141
|
@field_validator("messages", mode="after")
|
|
143
142
|
@classmethod
|
|
144
|
-
def idents_are_unique(cls, value:
|
|
143
|
+
def idents_are_unique(cls, value: list[InputMessage]) -> list[InputMessage]:
|
|
145
144
|
seen_idents = set()
|
|
146
145
|
for message in value:
|
|
147
146
|
if message.ident in seen_idents:
|
nucliadb_models/entities.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
|
|
16
|
-
from typing import Dict, List, Optional
|
|
17
16
|
|
|
18
17
|
from pydantic import BaseModel, Field
|
|
19
18
|
|
|
@@ -21,50 +20,50 @@ from pydantic import BaseModel, Field
|
|
|
21
20
|
class Entity(BaseModel):
|
|
22
21
|
value: str
|
|
23
22
|
merged: bool = False
|
|
24
|
-
represents:
|
|
23
|
+
represents: list[str] = []
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
class EntitiesGroupSummary(BaseModel):
|
|
28
|
-
title:
|
|
29
|
-
color:
|
|
27
|
+
title: str | None = Field(default=None, description="Title of the entities group")
|
|
28
|
+
color: str | None = Field(
|
|
30
29
|
default=None,
|
|
31
30
|
description="Color of the entities group. This is for display purposes only.",
|
|
32
31
|
)
|
|
33
32
|
custom: bool = Field(default=False, description="Denotes if it has been created by the user")
|
|
34
33
|
|
|
35
|
-
entities:
|
|
34
|
+
entities: dict[str, Entity] = Field(
|
|
36
35
|
default={},
|
|
37
36
|
title="[Deprecated] Entities in the group",
|
|
38
|
-
description="This field is deprecated and will be removed in future versions. It will always be empty. Use the /api/v1/kb/{kbid}/entitiesgroup/{group} endpoint to get the entities of a group.",
|
|
37
|
+
description="This field is deprecated and will be removed in future versions. It will always be empty. Use the /api/v1/kb/{kbid}/entitiesgroup/{group} endpoint to get the entities of a group.",
|
|
39
38
|
)
|
|
40
39
|
|
|
41
40
|
|
|
42
41
|
class EntitiesGroup(BaseModel):
|
|
43
|
-
title:
|
|
44
|
-
color:
|
|
42
|
+
title: str | None = Field(default=None, description="Title of the entities group")
|
|
43
|
+
color: str | None = Field(
|
|
45
44
|
default=None,
|
|
46
45
|
description="Color of the entities group. This is for display purposes only.",
|
|
47
46
|
)
|
|
48
47
|
custom: bool = Field(default=False, description="Denotes if it has been created by the user")
|
|
49
|
-
entities:
|
|
48
|
+
entities: dict[str, Entity] = {}
|
|
50
49
|
|
|
51
50
|
|
|
52
51
|
class KnowledgeBoxEntities(BaseModel):
|
|
53
52
|
uuid: str
|
|
54
|
-
groups:
|
|
53
|
+
groups: dict[str, EntitiesGroupSummary] = {}
|
|
55
54
|
|
|
56
55
|
|
|
57
56
|
class CreateEntitiesGroupPayload(BaseModel):
|
|
58
57
|
group: str
|
|
59
|
-
entities:
|
|
60
|
-
title:
|
|
61
|
-
color:
|
|
58
|
+
entities: dict[str, Entity] = {}
|
|
59
|
+
title: str | None = None
|
|
60
|
+
color: str | None = None
|
|
62
61
|
|
|
63
62
|
|
|
64
63
|
class UpdateEntitiesGroupPayload(BaseModel):
|
|
65
|
-
title:
|
|
66
|
-
color:
|
|
64
|
+
title: str | None = None
|
|
65
|
+
color: str | None = None
|
|
67
66
|
|
|
68
|
-
add:
|
|
69
|
-
update:
|
|
70
|
-
delete:
|
|
67
|
+
add: dict[str, Entity] = {}
|
|
68
|
+
update: dict[str, Entity] = {}
|
|
69
|
+
delete: list[str] = []
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
#
|
|
15
15
|
|
|
16
16
|
from enum import Enum
|
|
17
|
-
from typing import Union
|
|
18
17
|
|
|
19
18
|
from pydantic import BaseModel
|
|
20
19
|
|
|
@@ -36,4 +35,4 @@ class DummyIndexProvider(ExternalIndexProviderBase):
|
|
|
36
35
|
type: ExternalIndexProviderType = ExternalIndexProviderType.UNSET
|
|
37
36
|
|
|
38
37
|
|
|
39
|
-
ExternalIndexProvider =
|
|
38
|
+
ExternalIndexProvider = DummyIndexProvider
|
nucliadb_models/extracted.py
CHANGED
|
@@ -13,9 +13,8 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
from datetime import datetime
|
|
16
|
-
from typing import Dict, List, Optional
|
|
17
16
|
|
|
18
|
-
from pydantic import BaseModel
|
|
17
|
+
from pydantic import BaseModel, Field
|
|
19
18
|
|
|
20
19
|
from .common import (
|
|
21
20
|
Classification,
|
|
@@ -27,27 +26,27 @@ from .metadata import Relation
|
|
|
27
26
|
|
|
28
27
|
|
|
29
28
|
class ExtractedText(BaseModel):
|
|
30
|
-
text:
|
|
31
|
-
split_text:
|
|
32
|
-
deleted_splits:
|
|
29
|
+
text: str | None = None
|
|
30
|
+
split_text: dict[str, str] | None = None
|
|
31
|
+
deleted_splits: list[str] | None = None
|
|
33
32
|
|
|
34
33
|
|
|
35
34
|
class Vector(BaseModel):
|
|
36
|
-
start:
|
|
37
|
-
end:
|
|
38
|
-
start_paragraph:
|
|
39
|
-
end_paragraph:
|
|
40
|
-
vector:
|
|
35
|
+
start: int | None = None
|
|
36
|
+
end: int | None = None
|
|
37
|
+
start_paragraph: int | None = None
|
|
38
|
+
end_paragraph: int | None = None
|
|
39
|
+
vector: list[float] | None = None
|
|
41
40
|
|
|
42
41
|
|
|
43
42
|
class Vectors(BaseModel):
|
|
44
|
-
vectors:
|
|
43
|
+
vectors: list[Vector] | None = None
|
|
45
44
|
|
|
46
45
|
|
|
47
46
|
class VectorObject(BaseModel):
|
|
48
|
-
vectors:
|
|
49
|
-
split_vectors:
|
|
50
|
-
deleted_splits:
|
|
47
|
+
vectors: Vectors | None = None
|
|
48
|
+
split_vectors: dict[str, Vectors] | None = None
|
|
49
|
+
deleted_splits: list[str] | None = None
|
|
51
50
|
|
|
52
51
|
|
|
53
52
|
class Position(BaseModel):
|
|
@@ -56,14 +55,14 @@ class Position(BaseModel):
|
|
|
56
55
|
|
|
57
56
|
|
|
58
57
|
class Positions(BaseModel):
|
|
59
|
-
position:
|
|
58
|
+
position: list[Position]
|
|
60
59
|
entity: str
|
|
61
60
|
|
|
62
61
|
|
|
63
62
|
class FieldEntity(BaseModel):
|
|
64
63
|
text: str
|
|
65
64
|
label: str
|
|
66
|
-
positions:
|
|
65
|
+
positions: list[Position]
|
|
67
66
|
|
|
68
67
|
|
|
69
68
|
class FieldEntities(BaseModel):
|
|
@@ -71,91 +70,91 @@ class FieldEntities(BaseModel):
|
|
|
71
70
|
Wrapper for the entities extracted from a field (required because protobuf doesn't support lists of lists)
|
|
72
71
|
"""
|
|
73
72
|
|
|
74
|
-
entities:
|
|
73
|
+
entities: list[FieldEntity]
|
|
75
74
|
|
|
76
75
|
|
|
77
76
|
class FieldMetadata(BaseModel):
|
|
78
|
-
links:
|
|
79
|
-
paragraphs:
|
|
80
|
-
ner:
|
|
81
|
-
entities:
|
|
82
|
-
classifications:
|
|
83
|
-
last_index:
|
|
84
|
-
last_understanding:
|
|
85
|
-
last_extract:
|
|
86
|
-
last_summary:
|
|
87
|
-
last_processing_start:
|
|
88
|
-
thumbnail:
|
|
89
|
-
language:
|
|
90
|
-
summary:
|
|
91
|
-
positions:
|
|
92
|
-
relations:
|
|
93
|
-
mime_type:
|
|
77
|
+
links: list[str]
|
|
78
|
+
paragraphs: list[Paragraph]
|
|
79
|
+
ner: dict[str, str] # TODO: Remove once processor doesn't use this anymore
|
|
80
|
+
entities: dict[str, FieldEntities]
|
|
81
|
+
classifications: list[Classification]
|
|
82
|
+
last_index: datetime | None = None
|
|
83
|
+
last_understanding: datetime | None = None
|
|
84
|
+
last_extract: datetime | None = None
|
|
85
|
+
last_summary: datetime | None = None
|
|
86
|
+
last_processing_start: datetime | None = None
|
|
87
|
+
thumbnail: CloudLink | None = None
|
|
88
|
+
language: str | None = None
|
|
89
|
+
summary: str | None = None
|
|
90
|
+
positions: dict[str, Positions] # TODO: Remove once processor doesn't use this anymore
|
|
91
|
+
relations: list[Relation] | None = None
|
|
92
|
+
mime_type: str | None = None
|
|
94
93
|
|
|
95
94
|
|
|
96
95
|
class FieldComputedMetadata(BaseModel):
|
|
97
96
|
metadata: FieldMetadata
|
|
98
|
-
split_metadata:
|
|
99
|
-
deleted_splits:
|
|
97
|
+
split_metadata: dict[str, FieldMetadata] | None = None
|
|
98
|
+
deleted_splits: list[str] | None = None
|
|
100
99
|
|
|
101
100
|
|
|
102
101
|
class Entity(BaseModel):
|
|
103
|
-
token:
|
|
104
|
-
root:
|
|
105
|
-
type:
|
|
102
|
+
token: str | None = None
|
|
103
|
+
root: str | None = None
|
|
104
|
+
type: str | None = None
|
|
106
105
|
|
|
107
106
|
|
|
108
107
|
class FieldLargeMetadata(BaseModel):
|
|
109
|
-
entities:
|
|
110
|
-
tokens:
|
|
108
|
+
entities: list[Entity] | None = None
|
|
109
|
+
tokens: dict[str, int] | None = None
|
|
111
110
|
|
|
112
111
|
|
|
113
112
|
class LargeComputedMetadata(BaseModel):
|
|
114
|
-
metadata:
|
|
115
|
-
split_metadata:
|
|
116
|
-
deleted_splits:
|
|
113
|
+
metadata: FieldLargeMetadata | None = None
|
|
114
|
+
split_metadata: dict[str, FieldLargeMetadata] | None = None
|
|
115
|
+
deleted_splits: list[str] | None = None
|
|
117
116
|
|
|
118
117
|
|
|
119
118
|
class LinkExtractedData(BaseModel):
|
|
120
|
-
date:
|
|
121
|
-
language:
|
|
122
|
-
title:
|
|
123
|
-
metadata:
|
|
124
|
-
link_thumbnail:
|
|
125
|
-
link_preview:
|
|
126
|
-
field:
|
|
127
|
-
link_image:
|
|
128
|
-
description:
|
|
129
|
-
type:
|
|
130
|
-
embed:
|
|
131
|
-
file_generated:
|
|
119
|
+
date: datetime | None = None
|
|
120
|
+
language: str | None = None
|
|
121
|
+
title: str | None = None
|
|
122
|
+
metadata: dict[str, str] | None = None
|
|
123
|
+
link_thumbnail: CloudLink | None = None
|
|
124
|
+
link_preview: CloudLink | None = None
|
|
125
|
+
field: str | None = Field(default=None, title="Link Extracted Data Field")
|
|
126
|
+
link_image: CloudLink | None = None
|
|
127
|
+
description: str | None = None
|
|
128
|
+
type: str | None = None
|
|
129
|
+
embed: str | None = None
|
|
130
|
+
file_generated: dict[str, CloudLink] | None = None
|
|
132
131
|
|
|
133
132
|
|
|
134
133
|
class NestedPosition(BaseModel):
|
|
135
|
-
start:
|
|
136
|
-
end:
|
|
137
|
-
page:
|
|
134
|
+
start: int | None = None
|
|
135
|
+
end: int | None = None
|
|
136
|
+
page: int | None = Field(default=None, title="Position Page")
|
|
138
137
|
|
|
139
138
|
|
|
140
139
|
class NestedListPosition(BaseModel):
|
|
141
|
-
positions:
|
|
140
|
+
positions: list[NestedPosition]
|
|
142
141
|
|
|
143
142
|
|
|
144
143
|
class Row(BaseModel):
|
|
145
|
-
cell:
|
|
144
|
+
cell: list[str] | None = None
|
|
146
145
|
|
|
147
146
|
|
|
148
147
|
class Sheet(BaseModel):
|
|
149
|
-
rows:
|
|
148
|
+
rows: list[Row] | None = None
|
|
150
149
|
|
|
151
150
|
|
|
152
151
|
class RowsPreview(BaseModel):
|
|
153
|
-
sheets:
|
|
152
|
+
sheets: dict[str, Sheet] | None = None
|
|
154
153
|
|
|
155
154
|
|
|
156
155
|
class PagePositions(BaseModel):
|
|
157
|
-
start:
|
|
158
|
-
end:
|
|
156
|
+
start: int | None = None
|
|
157
|
+
end: int | None = None
|
|
159
158
|
|
|
160
159
|
|
|
161
160
|
class PageStructurePage(BaseModel):
|
|
@@ -174,32 +173,32 @@ class PageStructureToken(BaseModel):
|
|
|
174
173
|
|
|
175
174
|
class PageStructure(BaseModel):
|
|
176
175
|
page: PageStructurePage
|
|
177
|
-
tokens:
|
|
176
|
+
tokens: list[PageStructureToken]
|
|
178
177
|
|
|
179
178
|
|
|
180
179
|
class FilePages(BaseModel):
|
|
181
|
-
pages:
|
|
182
|
-
positions:
|
|
183
|
-
structures:
|
|
180
|
+
pages: list[CloudLink] | None = None
|
|
181
|
+
positions: list[PagePositions] | None = None
|
|
182
|
+
structures: list[PageStructure] | None = None
|
|
184
183
|
|
|
185
184
|
|
|
186
185
|
class FileExtractedData(BaseModel):
|
|
187
|
-
language:
|
|
188
|
-
md5:
|
|
189
|
-
metadata:
|
|
190
|
-
nested:
|
|
191
|
-
file_generated:
|
|
192
|
-
file_rows_previews:
|
|
193
|
-
file_preview:
|
|
194
|
-
file_pages_previews:
|
|
195
|
-
file_thumbnail:
|
|
196
|
-
field:
|
|
197
|
-
icon:
|
|
198
|
-
nested_position:
|
|
199
|
-
nested_list_position:
|
|
186
|
+
language: str | None = None
|
|
187
|
+
md5: str | None = None
|
|
188
|
+
metadata: dict[str, str] | None = None
|
|
189
|
+
nested: dict[str, str] | None = None
|
|
190
|
+
file_generated: dict[str, CloudLink] | None = None
|
|
191
|
+
file_rows_previews: dict[str, RowsPreview] | None = None
|
|
192
|
+
file_preview: CloudLink | None = None
|
|
193
|
+
file_pages_previews: FilePages | None = None
|
|
194
|
+
file_thumbnail: CloudLink | None = None
|
|
195
|
+
field: str | None = None
|
|
196
|
+
icon: str | None = None
|
|
197
|
+
nested_position: dict[str, NestedPosition] | None = None
|
|
198
|
+
nested_list_position: dict[str, NestedListPosition] | None = None
|
|
200
199
|
|
|
201
200
|
|
|
202
201
|
class FieldQuestionAnswers(BaseModel):
|
|
203
202
|
question_answers: QuestionAnswers
|
|
204
|
-
split_question_answers:
|
|
205
|
-
deleted_splits:
|
|
203
|
+
split_question_answers: dict[str, QuestionAnswers] | None = None
|
|
204
|
+
deleted_splits: list[str] | None = None
|
nucliadb_models/file.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
from datetime import datetime
|
|
16
|
-
from typing import Optional
|
|
17
16
|
|
|
18
17
|
from pydantic import BaseModel, Field
|
|
19
18
|
|
|
@@ -26,16 +25,16 @@ from nucliadb_models.common import CloudLink, File
|
|
|
26
25
|
|
|
27
26
|
|
|
28
27
|
class FieldFile(BaseModel):
|
|
29
|
-
added:
|
|
30
|
-
file:
|
|
31
|
-
language:
|
|
32
|
-
password:
|
|
28
|
+
added: datetime | None = None
|
|
29
|
+
file: CloudLink | None = None
|
|
30
|
+
language: str | None = None
|
|
31
|
+
password: str | None = None
|
|
33
32
|
external: bool = False
|
|
34
|
-
extract_strategy:
|
|
33
|
+
extract_strategy: str | None = Field(
|
|
35
34
|
default=None,
|
|
36
35
|
description="Id of the Nuclia extract strategy used at processing time. If not set, the default strategy was used. Extract strategies are defined at the learning configuration api.",
|
|
37
36
|
)
|
|
38
|
-
split_strategy:
|
|
37
|
+
split_strategy: str | None = Field(
|
|
39
38
|
default=None,
|
|
40
39
|
description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
|
|
41
40
|
)
|
|
@@ -45,14 +44,14 @@ class FieldFile(BaseModel):
|
|
|
45
44
|
|
|
46
45
|
|
|
47
46
|
class FileField(BaseModel):
|
|
48
|
-
language:
|
|
49
|
-
password:
|
|
47
|
+
language: str | None = None
|
|
48
|
+
password: str | None = None
|
|
50
49
|
file: File
|
|
51
|
-
extract_strategy:
|
|
50
|
+
extract_strategy: str | None = Field(
|
|
52
51
|
default=None,
|
|
53
52
|
description="Id of the Nuclia extract strategy to use at processing time. If not set, the default strategy will be used. Extract strategies are defined at the learning configuration api.",
|
|
54
53
|
)
|
|
55
|
-
split_strategy:
|
|
54
|
+
split_strategy: str | None = Field(
|
|
56
55
|
default=None,
|
|
57
56
|
description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
|
|
58
57
|
)
|