nucliadb-models 6.9.7.post5550__py3-none-any.whl → 6.10.0.post5792__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nucliadb-models might be problematic. Click here for more details.
- nucliadb_models/agents/ingestion.py +4 -4
- nucliadb_models/augment.py +187 -78
- nucliadb_models/common.py +56 -56
- nucliadb_models/configuration.py +8 -8
- nucliadb_models/content_types.py +13 -11
- nucliadb_models/conversation.py +25 -26
- nucliadb_models/entities.py +17 -18
- nucliadb_models/external_index_providers.py +1 -2
- nucliadb_models/extracted.py +82 -83
- nucliadb_models/file.py +10 -11
- nucliadb_models/filters.py +79 -75
- nucliadb_models/graph/requests.py +40 -48
- nucliadb_models/graph/responses.py +13 -1
- nucliadb_models/hydration.py +48 -50
- nucliadb_models/internal/predict.py +7 -9
- nucliadb_models/internal/shards.py +2 -3
- nucliadb_models/labels.py +18 -11
- nucliadb_models/link.py +18 -19
- nucliadb_models/metadata.py +66 -54
- nucliadb_models/notifications.py +3 -3
- nucliadb_models/processing.py +1 -2
- nucliadb_models/resource.py +85 -102
- nucliadb_models/retrieval.py +147 -0
- nucliadb_models/search.py +297 -275
- nucliadb_models/security.py +2 -3
- nucliadb_models/text.py +7 -8
- nucliadb_models/trainset.py +1 -2
- nucliadb_models/utils.py +2 -3
- nucliadb_models/vectors.py +2 -5
- nucliadb_models/writer.py +56 -57
- {nucliadb_models-6.9.7.post5550.dist-info → nucliadb_models-6.10.0.post5792.dist-info}/METADATA +1 -1
- nucliadb_models-6.10.0.post5792.dist-info/RECORD +41 -0
- nucliadb_models-6.9.7.post5550.dist-info/RECORD +0 -40
- {nucliadb_models-6.9.7.post5550.dist-info → nucliadb_models-6.10.0.post5792.dist-info}/WHEEL +0 -0
- {nucliadb_models-6.9.7.post5550.dist-info → nucliadb_models-6.10.0.post5792.dist-info}/top_level.txt +0 -0
nucliadb_models/hydration.py
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
from typing import Annotated
|
|
15
|
+
from typing import Annotated
|
|
16
16
|
|
|
17
17
|
from pydantic import BaseModel, Field, StringConstraints
|
|
18
18
|
|
|
@@ -105,23 +105,23 @@ class GenericFieldHydration(BaseModel, extra="forbid"):
|
|
|
105
105
|
|
|
106
106
|
|
|
107
107
|
class FieldHydration(BaseModel, extra="forbid"):
|
|
108
|
-
text:
|
|
108
|
+
text: TextFieldHydration | None = Field(
|
|
109
109
|
default_factory=TextFieldHydration,
|
|
110
110
|
description="Text fields hydration options",
|
|
111
111
|
)
|
|
112
|
-
file:
|
|
112
|
+
file: FileFieldHydration | None = Field(
|
|
113
113
|
default_factory=FileFieldHydration,
|
|
114
114
|
description="File fields hydration options",
|
|
115
115
|
)
|
|
116
|
-
link:
|
|
116
|
+
link: LinkFieldHydration | None = Field(
|
|
117
117
|
default_factory=LinkFieldHydration,
|
|
118
118
|
description="Link fields hydration options",
|
|
119
119
|
)
|
|
120
|
-
conversation:
|
|
120
|
+
conversation: ConversationFieldHydration | None = Field(
|
|
121
121
|
default_factory=ConversationFieldHydration,
|
|
122
122
|
description="Conversation fields hydration options",
|
|
123
123
|
)
|
|
124
|
-
generic:
|
|
124
|
+
generic: GenericFieldHydration | None = Field(
|
|
125
125
|
default_factory=GenericFieldHydration,
|
|
126
126
|
description="Generic fields hydration options",
|
|
127
127
|
)
|
|
@@ -141,7 +141,7 @@ class NeighbourParagraphHydration(BaseModel, extra="forbid"):
|
|
|
141
141
|
|
|
142
142
|
|
|
143
143
|
class RelatedParagraphHydration(BaseModel, extra="forbid"):
|
|
144
|
-
neighbours:
|
|
144
|
+
neighbours: NeighbourParagraphHydration | None = Field(
|
|
145
145
|
default=None,
|
|
146
146
|
description="Hydrate extra paragraphs that surround the original one",
|
|
147
147
|
)
|
|
@@ -205,11 +205,11 @@ class ParagraphHydration(BaseModel, extra="forbid"):
|
|
|
205
205
|
default=True,
|
|
206
206
|
description="Hydrate paragraph text",
|
|
207
207
|
)
|
|
208
|
-
image:
|
|
208
|
+
image: ImageParagraphHydration | None = Field(
|
|
209
209
|
default=None,
|
|
210
210
|
description="Hydrate options for paragraphs extracted from images (using OCR, inception...)",
|
|
211
211
|
)
|
|
212
|
-
table:
|
|
212
|
+
table: TableParagraphHydration | None = Field(
|
|
213
213
|
default=None,
|
|
214
214
|
description="Hydrate options for paragraphs extracted from tables",
|
|
215
215
|
)
|
|
@@ -217,19 +217,19 @@ class ParagraphHydration(BaseModel, extra="forbid"):
|
|
|
217
217
|
# TODO: at some point, we should add hydration options for paragraphs from
|
|
218
218
|
# audio and video
|
|
219
219
|
|
|
220
|
-
page:
|
|
220
|
+
page: ParagraphPageHydration | None = Field(
|
|
221
221
|
default=None,
|
|
222
222
|
description="Hydrte options for paragraphs within a page. This applies to paragraphs in fields with pages",
|
|
223
223
|
)
|
|
224
224
|
|
|
225
|
-
related:
|
|
225
|
+
related: RelatedParagraphHydration | None = Field(
|
|
226
226
|
default=None,
|
|
227
227
|
description="Hydration options for related paragraphs. For example, neighbours or sibling paragraphs",
|
|
228
228
|
)
|
|
229
229
|
|
|
230
230
|
|
|
231
231
|
class Hydration(BaseModel, extra="forbid"):
|
|
232
|
-
resource:
|
|
232
|
+
resource: ResourceHydration | None = Field(
|
|
233
233
|
default_factory=ResourceHydration,
|
|
234
234
|
description="Resource hydration options",
|
|
235
235
|
)
|
|
@@ -270,22 +270,22 @@ class HydratedResource(BaseModel, extra="forbid"):
|
|
|
270
270
|
id: str = Field(description="Unique resource id")
|
|
271
271
|
slug: str = Field(description="Resource slug")
|
|
272
272
|
|
|
273
|
-
title:
|
|
274
|
-
summary:
|
|
273
|
+
title: str | None = None
|
|
274
|
+
summary: str | None = None
|
|
275
275
|
|
|
276
|
-
origin:
|
|
276
|
+
origin: Origin | None = None
|
|
277
277
|
|
|
278
|
-
security:
|
|
278
|
+
security: ResourceSecurity | None = None
|
|
279
279
|
|
|
280
280
|
# TODO: add resource labels to hydrated resources
|
|
281
281
|
|
|
282
282
|
|
|
283
283
|
class FieldExtractedData(BaseModel, extra="forbid"):
|
|
284
|
-
text:
|
|
284
|
+
text: str | None = None
|
|
285
285
|
|
|
286
286
|
|
|
287
287
|
class SplitFieldExtractedData(BaseModel, extra="forbid"):
|
|
288
|
-
texts:
|
|
288
|
+
texts: dict[str, str] | None = None
|
|
289
289
|
|
|
290
290
|
|
|
291
291
|
class HydratedTextField(BaseModel, extra="forbid"):
|
|
@@ -293,8 +293,8 @@ class HydratedTextField(BaseModel, extra="forbid"):
|
|
|
293
293
|
resource: str = Field("Field resource id")
|
|
294
294
|
field_type: FieldTypeName = FieldTypeName.TEXT
|
|
295
295
|
|
|
296
|
-
value:
|
|
297
|
-
extracted:
|
|
296
|
+
value: FieldText | None = None
|
|
297
|
+
extracted: FieldExtractedData | None = None
|
|
298
298
|
|
|
299
299
|
|
|
300
300
|
class HydratedFileField(BaseModel, extra="forbid"):
|
|
@@ -302,10 +302,10 @@ class HydratedFileField(BaseModel, extra="forbid"):
|
|
|
302
302
|
resource: str = Field("Field resource id")
|
|
303
303
|
field_type: FieldTypeName = FieldTypeName.FILE
|
|
304
304
|
|
|
305
|
-
value:
|
|
306
|
-
extracted:
|
|
305
|
+
value: FieldFile | None = None
|
|
306
|
+
extracted: FieldExtractedData | None = None
|
|
307
307
|
|
|
308
|
-
previews:
|
|
308
|
+
previews: dict[str, Image] | None = Field(
|
|
309
309
|
default=None,
|
|
310
310
|
title="Previews of specific parts of the field",
|
|
311
311
|
description=(
|
|
@@ -323,8 +323,8 @@ class HydratedLinkField(BaseModel, extra="forbid"):
|
|
|
323
323
|
resource: str = Field("Field resource id")
|
|
324
324
|
field_type: FieldTypeName = FieldTypeName.LINK
|
|
325
325
|
|
|
326
|
-
value:
|
|
327
|
-
extracted:
|
|
326
|
+
value: FieldLink | None = None
|
|
327
|
+
extracted: FieldExtractedData | None = None
|
|
328
328
|
|
|
329
329
|
|
|
330
330
|
class HydratedConversationField(BaseModel, extra="forbid"):
|
|
@@ -332,8 +332,8 @@ class HydratedConversationField(BaseModel, extra="forbid"):
|
|
|
332
332
|
resource: str = Field("Field resource id")
|
|
333
333
|
field_type: FieldTypeName = FieldTypeName.CONVERSATION
|
|
334
334
|
|
|
335
|
-
value:
|
|
336
|
-
extracted:
|
|
335
|
+
value: FieldConversation | None = None
|
|
336
|
+
extracted: FieldExtractedData | None = None
|
|
337
337
|
|
|
338
338
|
|
|
339
339
|
class HydratedGenericField(BaseModel, extra="forbid"):
|
|
@@ -341,24 +341,24 @@ class HydratedGenericField(BaseModel, extra="forbid"):
|
|
|
341
341
|
resource: str = Field("Field resource id")
|
|
342
342
|
field_type: FieldTypeName = FieldTypeName.TEXT
|
|
343
343
|
|
|
344
|
-
value:
|
|
345
|
-
extracted:
|
|
344
|
+
value: str | None = None
|
|
345
|
+
extracted: FieldExtractedData | None = None
|
|
346
346
|
|
|
347
347
|
|
|
348
348
|
class RelatedNeighbourParagraphRefs(BaseModel, extra="forbid"):
|
|
349
|
-
before:
|
|
350
|
-
after:
|
|
349
|
+
before: list[str] | None = None
|
|
350
|
+
after: list[str] | None = None
|
|
351
351
|
|
|
352
352
|
|
|
353
353
|
class RelatedParagraphRefs(BaseModel, extra="forbid"):
|
|
354
|
-
neighbours:
|
|
355
|
-
parents:
|
|
356
|
-
siblings:
|
|
357
|
-
replacements:
|
|
354
|
+
neighbours: RelatedNeighbourParagraphRefs | None = None
|
|
355
|
+
parents: list[str] | None = None
|
|
356
|
+
siblings: list[str] | None = None
|
|
357
|
+
replacements: list[str] | None = None
|
|
358
358
|
|
|
359
359
|
|
|
360
360
|
class HydratedParagraphImage(BaseModel, extra="forbid"):
|
|
361
|
-
source_image:
|
|
361
|
+
source_image: Image | None = Field(
|
|
362
362
|
default=None,
|
|
363
363
|
description=(
|
|
364
364
|
"Source image for this paragraph. This only applies to paragraphs "
|
|
@@ -369,7 +369,7 @@ class HydratedParagraphImage(BaseModel, extra="forbid"):
|
|
|
369
369
|
|
|
370
370
|
|
|
371
371
|
class HydratedParagraphTable(BaseModel, extra="forbid"):
|
|
372
|
-
page_preview_ref:
|
|
372
|
+
page_preview_ref: str | None = Field(
|
|
373
373
|
default=None,
|
|
374
374
|
description=(
|
|
375
375
|
"Referento to the page preview for this paragraph. The actual "
|
|
@@ -381,7 +381,7 @@ class HydratedParagraphTable(BaseModel, extra="forbid"):
|
|
|
381
381
|
|
|
382
382
|
|
|
383
383
|
class HydratedParagraphPage(BaseModel, extra="forbid"):
|
|
384
|
-
page_preview_ref:
|
|
384
|
+
page_preview_ref: str | None = Field(
|
|
385
385
|
default=None,
|
|
386
386
|
description=(
|
|
387
387
|
"Reference to the page preview for this paragraph. The actual "
|
|
@@ -398,28 +398,26 @@ class HydratedParagraph(BaseModel, extra="forbid"):
|
|
|
398
398
|
field: str = Field(description="Paragraph field id")
|
|
399
399
|
resource: str = Field(description="Paragraph resource id")
|
|
400
400
|
|
|
401
|
-
text:
|
|
401
|
+
text: str | None = None
|
|
402
402
|
|
|
403
403
|
# TODO: add labels to hydrated paragraphs
|
|
404
404
|
# labels: Optional[list[str]] = None
|
|
405
405
|
|
|
406
|
-
related:
|
|
406
|
+
related: RelatedParagraphRefs | None = None
|
|
407
407
|
|
|
408
|
-
image:
|
|
409
|
-
table:
|
|
410
|
-
page:
|
|
408
|
+
image: HydratedParagraphImage | None = None
|
|
409
|
+
table: HydratedParagraphTable | None = None
|
|
410
|
+
page: HydratedParagraphPage | None = None
|
|
411
411
|
|
|
412
412
|
|
|
413
413
|
class Hydrated(BaseModel, extra="forbid"):
|
|
414
414
|
resources: dict[str, HydratedResource]
|
|
415
415
|
fields: dict[
|
|
416
416
|
str,
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
HydratedGenericField,
|
|
423
|
-
],
|
|
417
|
+
HydratedTextField
|
|
418
|
+
| HydratedFileField
|
|
419
|
+
| HydratedLinkField
|
|
420
|
+
| HydratedConversationField
|
|
421
|
+
| HydratedGenericField,
|
|
424
422
|
]
|
|
425
423
|
paragraphs: dict[str, HydratedParagraph]
|
|
@@ -19,13 +19,11 @@ Models for Predict API v1.
|
|
|
19
19
|
ATENTION! Keep these models in sync with models on Predict API
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
from typing import List, Optional
|
|
23
|
-
|
|
24
22
|
from pydantic import BaseModel, Field
|
|
25
23
|
|
|
26
24
|
|
|
27
25
|
class SentenceSearch(BaseModel):
|
|
28
|
-
vectors: dict[str,
|
|
26
|
+
vectors: dict[str, list[float]] = Field(
|
|
29
27
|
default_factory=dict,
|
|
30
28
|
description="Sentence vectors for each semantic model",
|
|
31
29
|
min_length=1,
|
|
@@ -45,14 +43,14 @@ class Ner(BaseModel):
|
|
|
45
43
|
|
|
46
44
|
|
|
47
45
|
class TokenSearch(BaseModel):
|
|
48
|
-
tokens:
|
|
46
|
+
tokens: list[Ner] = []
|
|
49
47
|
time: float
|
|
50
48
|
input_tokens: int = 0
|
|
51
49
|
|
|
52
50
|
|
|
53
51
|
class QueryInfo(BaseModel):
|
|
54
|
-
language:
|
|
55
|
-
stop_words:
|
|
52
|
+
language: str | None
|
|
53
|
+
stop_words: list[str] = Field(default_factory=list)
|
|
56
54
|
semantic_thresholds: dict[str, float] = Field(
|
|
57
55
|
default_factory=dict,
|
|
58
56
|
description="Semantic threshold for each semantic model",
|
|
@@ -60,10 +58,10 @@ class QueryInfo(BaseModel):
|
|
|
60
58
|
)
|
|
61
59
|
visual_llm: bool
|
|
62
60
|
max_context: int
|
|
63
|
-
entities:
|
|
64
|
-
sentence:
|
|
61
|
+
entities: TokenSearch | None
|
|
62
|
+
sentence: SentenceSearch | None
|
|
65
63
|
query: str
|
|
66
|
-
rephrased_query:
|
|
64
|
+
rephrased_query: str | None = None
|
|
67
65
|
|
|
68
66
|
|
|
69
67
|
class RerankModel(BaseModel):
|
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
from enum import Enum
|
|
16
|
-
from typing import List, Optional
|
|
17
16
|
|
|
18
17
|
from pydantic import BaseModel
|
|
19
18
|
|
|
@@ -58,9 +57,9 @@ class ShardReplica(BaseModel):
|
|
|
58
57
|
|
|
59
58
|
class ShardObject(BaseModel):
|
|
60
59
|
shard: str
|
|
61
|
-
nidx_shard_id:
|
|
60
|
+
nidx_shard_id: str | None
|
|
62
61
|
|
|
63
62
|
|
|
64
63
|
class KnowledgeboxShards(BaseModel):
|
|
65
64
|
kbid: str
|
|
66
|
-
shards:
|
|
65
|
+
shards: list[ShardObject]
|
nucliadb_models/labels.py
CHANGED
|
@@ -14,9 +14,8 @@
|
|
|
14
14
|
#
|
|
15
15
|
|
|
16
16
|
from enum import Enum
|
|
17
|
-
from typing import Dict, List, Optional
|
|
18
17
|
|
|
19
|
-
from pydantic import BaseModel, model_validator
|
|
18
|
+
from pydantic import BaseModel, Field, model_validator
|
|
20
19
|
from typing_extensions import Self
|
|
21
20
|
|
|
22
21
|
BASE_LABELS: dict[str, set[str]] = {
|
|
@@ -96,18 +95,26 @@ class LabelSetKind(str, Enum):
|
|
|
96
95
|
|
|
97
96
|
|
|
98
97
|
class Label(BaseModel):
|
|
99
|
-
title: str
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
98
|
+
title: str = Field(
|
|
99
|
+
description="Title of the label. This is the display name for the label shown in the UI and also used for searching."
|
|
100
|
+
)
|
|
101
|
+
related: str | None = None
|
|
102
|
+
text: str | None = None
|
|
103
|
+
uri: str | None = None
|
|
103
104
|
|
|
104
105
|
|
|
105
106
|
class LabelSet(BaseModel):
|
|
106
|
-
title:
|
|
107
|
-
|
|
107
|
+
title: str | None = Field(
|
|
108
|
+
default=None,
|
|
109
|
+
description="Title of the labelset. It is a prettier display name for the labelset shown in the UI but it is not intended to be used for searching.",
|
|
110
|
+
)
|
|
111
|
+
color: str | None = "blue"
|
|
108
112
|
multiple: bool = True
|
|
109
|
-
kind:
|
|
110
|
-
labels:
|
|
113
|
+
kind: list[LabelSetKind] = []
|
|
114
|
+
labels: list[Label] = Field(
|
|
115
|
+
default_factory=list,
|
|
116
|
+
description="List of labels in the labelset. The titles of the labels must be unique within the labelset.",
|
|
117
|
+
)
|
|
111
118
|
|
|
112
119
|
@model_validator(mode="after")
|
|
113
120
|
def check_unique_labels(self) -> Self:
|
|
@@ -123,4 +130,4 @@ class LabelSet(BaseModel):
|
|
|
123
130
|
|
|
124
131
|
class KnowledgeBoxLabels(BaseModel):
|
|
125
132
|
uuid: str
|
|
126
|
-
labelsets:
|
|
133
|
+
labelsets: dict[str, LabelSet] = {}
|
nucliadb_models/link.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
15
|
from datetime import datetime
|
|
16
|
-
from typing import Dict, Optional
|
|
17
16
|
|
|
18
17
|
from pydantic import BaseModel, Field
|
|
19
18
|
|
|
@@ -25,19 +24,19 @@ from pydantic import BaseModel, Field
|
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
class FieldLink(BaseModel):
|
|
28
|
-
added:
|
|
29
|
-
headers:
|
|
30
|
-
cookies:
|
|
31
|
-
uri:
|
|
32
|
-
language:
|
|
33
|
-
localstorage:
|
|
34
|
-
css_selector:
|
|
35
|
-
xpath:
|
|
36
|
-
extract_strategy:
|
|
27
|
+
added: datetime | None = None
|
|
28
|
+
headers: dict[str, str] | None = None
|
|
29
|
+
cookies: dict[str, str] | None = None
|
|
30
|
+
uri: str | None = None
|
|
31
|
+
language: str | None = None
|
|
32
|
+
localstorage: dict[str, str] | None = None
|
|
33
|
+
css_selector: str | None = None
|
|
34
|
+
xpath: str | None = None
|
|
35
|
+
extract_strategy: str | None = Field(
|
|
37
36
|
default=None,
|
|
38
37
|
description="Id of the Nuclia extract strategy used at processing time. If not set, the default strategy was used. Extract strategies are defined at the learning configuration api.",
|
|
39
38
|
)
|
|
40
|
-
split_strategy:
|
|
39
|
+
split_strategy: str | None = Field(
|
|
41
40
|
default=None,
|
|
42
41
|
description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
|
|
43
42
|
)
|
|
@@ -47,18 +46,18 @@ class FieldLink(BaseModel):
|
|
|
47
46
|
|
|
48
47
|
|
|
49
48
|
class LinkField(BaseModel):
|
|
50
|
-
headers:
|
|
51
|
-
cookies:
|
|
49
|
+
headers: dict[str, str] | None = {}
|
|
50
|
+
cookies: dict[str, str] | None = {}
|
|
52
51
|
uri: str
|
|
53
|
-
language:
|
|
54
|
-
localstorage:
|
|
55
|
-
css_selector:
|
|
56
|
-
xpath:
|
|
57
|
-
extract_strategy:
|
|
52
|
+
language: str | None = None
|
|
53
|
+
localstorage: dict[str, str] | None = {}
|
|
54
|
+
css_selector: str | None = None
|
|
55
|
+
xpath: str | None = None
|
|
56
|
+
extract_strategy: str | None = Field(
|
|
58
57
|
default=None,
|
|
59
58
|
description="Id of the Nuclia extract strategy to use at processing time. If not set, the default strategy will be used. Extract strategies are defined at the learning configuration api.",
|
|
60
59
|
)
|
|
61
|
-
split_strategy:
|
|
60
|
+
split_strategy: str | None = Field(
|
|
62
61
|
default=None,
|
|
63
62
|
description="Id of the Nuclia split strategy used at processing time. If not set, the default strategy was used. Split strategies are defined at the learning configuration api.",
|
|
64
63
|
)
|
nucliadb_models/metadata.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from datetime import datetime
|
|
17
17
|
from enum import Enum
|
|
18
|
-
from typing import Any
|
|
18
|
+
from typing import Any
|
|
19
19
|
|
|
20
20
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
21
21
|
from typing_extensions import Self
|
|
@@ -30,7 +30,7 @@ class EntityRelation(BaseModel):
|
|
|
30
30
|
entity_type: str
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
class RelationType(Enum):
|
|
33
|
+
class RelationType(str, Enum):
|
|
34
34
|
ABOUT = "ABOUT"
|
|
35
35
|
CHILD = "CHILD"
|
|
36
36
|
COLAB = "COLAB"
|
|
@@ -49,7 +49,7 @@ class RelationNodeType(str, Enum):
|
|
|
49
49
|
class RelationEntity(BaseModel):
|
|
50
50
|
value: str
|
|
51
51
|
type: RelationNodeType
|
|
52
|
-
group:
|
|
52
|
+
group: str | None = None
|
|
53
53
|
|
|
54
54
|
@model_validator(mode="after")
|
|
55
55
|
def check_relation_is_valid(self) -> Self:
|
|
@@ -60,20 +60,20 @@ class RelationEntity(BaseModel):
|
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
class RelationMetadata(BaseModel):
|
|
63
|
-
paragraph_id:
|
|
64
|
-
source_start:
|
|
65
|
-
source_end:
|
|
66
|
-
to_start:
|
|
67
|
-
to_end:
|
|
68
|
-
data_augmentation_task_id:
|
|
63
|
+
paragraph_id: str | None = None
|
|
64
|
+
source_start: int | None = None
|
|
65
|
+
source_end: int | None = None
|
|
66
|
+
to_start: int | None = None
|
|
67
|
+
to_end: int | None = None
|
|
68
|
+
data_augmentation_task_id: str | None = None
|
|
69
69
|
|
|
70
70
|
|
|
71
71
|
class Relation(BaseModel):
|
|
72
72
|
relation: RelationType
|
|
73
|
-
label:
|
|
74
|
-
metadata:
|
|
73
|
+
label: str | None = None
|
|
74
|
+
metadata: RelationMetadata | None = None
|
|
75
75
|
|
|
76
|
-
from_:
|
|
76
|
+
from_: RelationEntity | None = Field(default=None, alias="from")
|
|
77
77
|
to: RelationEntity
|
|
78
78
|
|
|
79
79
|
@model_validator(mode="after")
|
|
@@ -100,9 +100,9 @@ class Relation(BaseModel):
|
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
class InputMetadata(BaseModel):
|
|
103
|
-
metadata:
|
|
104
|
-
language:
|
|
105
|
-
languages:
|
|
103
|
+
metadata: dict[str, str] = {}
|
|
104
|
+
language: str | None = None
|
|
105
|
+
languages: list[str] | None = Field(default=None, max_length=1024)
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
class ResourceProcessingStatus(Enum):
|
|
@@ -120,7 +120,7 @@ class Metadata(InputMetadata):
|
|
|
120
120
|
|
|
121
121
|
class FieldClassification(BaseModel):
|
|
122
122
|
field: FieldID
|
|
123
|
-
classifications:
|
|
123
|
+
classifications: list[Classification] = []
|
|
124
124
|
|
|
125
125
|
|
|
126
126
|
class ComputedMetadata(BaseModel):
|
|
@@ -129,12 +129,12 @@ class ComputedMetadata(BaseModel):
|
|
|
129
129
|
without having to load the whole computed metadata field.
|
|
130
130
|
"""
|
|
131
131
|
|
|
132
|
-
field_classifications:
|
|
132
|
+
field_classifications: list[FieldClassification] = []
|
|
133
133
|
|
|
134
134
|
|
|
135
135
|
class UserMetadata(BaseModel):
|
|
136
|
-
classifications:
|
|
137
|
-
relations:
|
|
136
|
+
classifications: list[UserClassification] = []
|
|
137
|
+
relations: list[Relation] = []
|
|
138
138
|
|
|
139
139
|
|
|
140
140
|
class TokenSplit(BaseModel):
|
|
@@ -154,7 +154,7 @@ class TokenSplit(BaseModel):
|
|
|
154
154
|
|
|
155
155
|
|
|
156
156
|
class ParagraphAnnotation(BaseModel):
|
|
157
|
-
classifications:
|
|
157
|
+
classifications: list[UserClassification] = []
|
|
158
158
|
key: str
|
|
159
159
|
|
|
160
160
|
|
|
@@ -169,12 +169,12 @@ class VisualSelection(BaseModel):
|
|
|
169
169
|
left: float
|
|
170
170
|
right: float
|
|
171
171
|
bottom: float
|
|
172
|
-
token_ids:
|
|
172
|
+
token_ids: list[int]
|
|
173
173
|
|
|
174
174
|
|
|
175
175
|
class PageSelections(BaseModel):
|
|
176
176
|
page: int
|
|
177
|
-
visual:
|
|
177
|
+
visual: list[VisualSelection]
|
|
178
178
|
|
|
179
179
|
def __init__(self, **data):
|
|
180
180
|
warnings.warn(
|
|
@@ -190,57 +190,69 @@ class UserFieldMetadata(BaseModel):
|
|
|
190
190
|
Field-level metadata set by the user via the rest api
|
|
191
191
|
"""
|
|
192
192
|
|
|
193
|
-
paragraphs:
|
|
194
|
-
question_answers:
|
|
193
|
+
paragraphs: list[ParagraphAnnotation] = []
|
|
194
|
+
question_answers: list[QuestionAnswerAnnotation] = []
|
|
195
195
|
field: FieldID
|
|
196
196
|
|
|
197
197
|
|
|
198
198
|
class Basic(BaseModel):
|
|
199
|
-
icon:
|
|
200
|
-
title:
|
|
201
|
-
summary:
|
|
202
|
-
thumbnail:
|
|
203
|
-
created:
|
|
204
|
-
modified:
|
|
205
|
-
metadata:
|
|
206
|
-
usermetadata:
|
|
207
|
-
fieldmetadata:
|
|
208
|
-
computedmetadata:
|
|
209
|
-
uuid:
|
|
210
|
-
last_seqid:
|
|
211
|
-
last_account_seq:
|
|
199
|
+
icon: str | None = None
|
|
200
|
+
title: str | None = None
|
|
201
|
+
summary: str | None = None
|
|
202
|
+
thumbnail: str | None = None
|
|
203
|
+
created: datetime | None = None
|
|
204
|
+
modified: datetime | None = None
|
|
205
|
+
metadata: Metadata | None = None
|
|
206
|
+
usermetadata: UserMetadata | None = None
|
|
207
|
+
fieldmetadata: list[UserFieldMetadata] | None = None
|
|
208
|
+
computedmetadata: ComputedMetadata | None = None
|
|
209
|
+
uuid: str | None = None
|
|
210
|
+
last_seqid: int | None = None
|
|
211
|
+
last_account_seq: int | None = None
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class SyncMetadata(BaseModel):
|
|
215
|
+
file_id: str = Field(description="Identifier of the file in the origin cloud storage system")
|
|
216
|
+
auth_provider: str = Field(
|
|
217
|
+
description="Authentication provider used to access the origin cloud storage system"
|
|
218
|
+
)
|
|
212
219
|
|
|
213
220
|
|
|
214
221
|
class InputOrigin(BaseModel):
|
|
215
|
-
source_id:
|
|
216
|
-
url:
|
|
217
|
-
created:
|
|
222
|
+
source_id: str | None = None
|
|
223
|
+
url: str | None = None
|
|
224
|
+
created: DateTime | None = Field(
|
|
218
225
|
default=None,
|
|
219
226
|
description="Creation date of the resource at the origin system. This can be later used for date range filtering on search endpoints. Have a look at the advanced search documentation page: https://docs.nuclia.dev/docs/rag/advanced/search/#date-filtering",
|
|
220
227
|
)
|
|
221
|
-
modified:
|
|
228
|
+
modified: DateTime | None = Field(
|
|
222
229
|
default=None,
|
|
223
230
|
description="Modification date of the resource at the origin system. This can be later used for date range filtering on search endpoints. Have a look at the advanced search documentation page: https://docs.nuclia.dev/docs/rag/advanced/search/#date-filtering",
|
|
224
231
|
)
|
|
225
|
-
metadata:
|
|
232
|
+
metadata: dict[str, str] = Field(
|
|
226
233
|
default={},
|
|
227
234
|
title="Metadata",
|
|
228
235
|
description="Generic metadata from the resource at the origin system. It can later be used for filtering on search endpoints with '/origin.metadata/{key}/{value}'",
|
|
229
236
|
)
|
|
230
|
-
tags:
|
|
237
|
+
tags: list[str] = Field(
|
|
231
238
|
default=[],
|
|
232
239
|
title="Tags",
|
|
233
240
|
description="Resource tags about the origin system. It can later be used for filtering on search endpoints with '/origin.tags/{tag}'",
|
|
234
241
|
max_length=300,
|
|
235
242
|
)
|
|
236
|
-
collaborators:
|
|
237
|
-
filename:
|
|
238
|
-
related:
|
|
239
|
-
path:
|
|
243
|
+
collaborators: list[str] = Field(default=[], max_length=100)
|
|
244
|
+
filename: str | None = None
|
|
245
|
+
related: list[str] = Field(default=[], max_length=100)
|
|
246
|
+
path: str | None = Field(
|
|
240
247
|
default=None,
|
|
241
248
|
description="Path of the original resource. Typically used to store folder structure information of the resource at the origin system. It can be later used for filtering on search endpoints with '/origin.path/{path}'",
|
|
242
249
|
max_length=2048,
|
|
243
250
|
)
|
|
251
|
+
sync_metadata: SyncMetadata | None = Field(
|
|
252
|
+
default=None,
|
|
253
|
+
title="Sync Metadata",
|
|
254
|
+
description="Metadata related to the resource from the origin system fetched by the Progress Agentic RAG's Cloud Storage Sync service.",
|
|
255
|
+
)
|
|
244
256
|
|
|
245
257
|
@field_validator("tags")
|
|
246
258
|
def validate_tag_length(cls, tags):
|
|
@@ -253,10 +265,10 @@ class InputOrigin(BaseModel):
|
|
|
253
265
|
class Origin(InputOrigin):
|
|
254
266
|
# Created and modified are redefined to
|
|
255
267
|
# use native datetime objects and skip validation
|
|
256
|
-
created:
|
|
257
|
-
modified:
|
|
268
|
+
created: datetime | None = None
|
|
269
|
+
modified: datetime | None = None
|
|
258
270
|
|
|
259
|
-
tags:
|
|
271
|
+
tags: list[str] = Field(
|
|
260
272
|
default=[],
|
|
261
273
|
title="Tags",
|
|
262
274
|
description="Resource tags about the origin system. It can later be used for filtering on search endpoints with '/origin.tags/{tag}'",
|
|
@@ -268,16 +280,16 @@ class Origin(InputOrigin):
|
|
|
268
280
|
API = "API"
|
|
269
281
|
PYSDK = "PYSDK"
|
|
270
282
|
|
|
271
|
-
source:
|
|
283
|
+
source: Source | None = Source.API
|
|
272
284
|
|
|
273
285
|
|
|
274
286
|
class Extra(BaseModel):
|
|
275
|
-
metadata:
|
|
287
|
+
metadata: dict[Any, Any] = Field(
|
|
276
288
|
...,
|
|
277
289
|
title="Metadata",
|
|
278
|
-
description="Arbitrary JSON metadata provided by the user that is not meant to be searchable, but can be serialized on results.",
|
|
290
|
+
description="Arbitrary JSON metadata provided by the user that is not meant to be searchable, but can be serialized on results.",
|
|
279
291
|
)
|
|
280
292
|
|
|
281
293
|
|
|
282
294
|
class Relations(BaseModel):
|
|
283
|
-
relations:
|
|
295
|
+
relations: list[Relation] | None = None
|