nucliadb-models 6.4.2.post4379__py3-none-any.whl → 6.4.2.post4403__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nucliadb_models/common.py +10 -0
- nucliadb_models/trainset.py +45 -1
- {nucliadb_models-6.4.2.post4379.dist-info → nucliadb_models-6.4.2.post4403.dist-info}/METADATA +1 -1
- {nucliadb_models-6.4.2.post4379.dist-info → nucliadb_models-6.4.2.post4403.dist-info}/RECORD +6 -6
- {nucliadb_models-6.4.2.post4379.dist-info → nucliadb_models-6.4.2.post4403.dist-info}/WHEEL +0 -0
- {nucliadb_models-6.4.2.post4379.dist-info → nucliadb_models-6.4.2.post4403.dist-info}/top_level.txt +0 -0
nucliadb_models/common.py
CHANGED
@@ -193,6 +193,16 @@ class FieldTypeName(str, Enum):
|
|
193
193
|
CONVERSATION = "conversation"
|
194
194
|
GENERIC = "generic"
|
195
195
|
|
196
|
+
@classmethod
|
197
|
+
def from_abbreviation(cls, abbr: str) -> "FieldTypeName":
|
198
|
+
return {
|
199
|
+
"t": FieldTypeName.TEXT,
|
200
|
+
"f": FieldTypeName.FILE,
|
201
|
+
"u": FieldTypeName.LINK,
|
202
|
+
"c": FieldTypeName.CONVERSATION,
|
203
|
+
"a": FieldTypeName.GENERIC,
|
204
|
+
}[abbr]
|
205
|
+
|
196
206
|
|
197
207
|
class FieldRef(BaseModel):
|
198
208
|
field_type: FieldTypeName
|
nucliadb_models/trainset.py
CHANGED
@@ -14,8 +14,52 @@
|
|
14
14
|
#
|
15
15
|
|
16
16
|
|
17
|
-
from
|
17
|
+
from enum import Enum
|
18
|
+
from typing import Optional
|
19
|
+
|
20
|
+
from pydantic import BaseModel, Field, model_validator
|
21
|
+
|
22
|
+
from nucliadb_models.filters import FilterExpression
|
18
23
|
|
19
24
|
|
20
25
|
class TrainSetPartitions(BaseModel):
|
21
26
|
partitions: list[str]
|
27
|
+
|
28
|
+
|
29
|
+
class TrainSetType(int, Enum):
|
30
|
+
# NOTE: matches the TaskType in nucliadb_protos.dataset.proto
|
31
|
+
FIELD_CLASSIFICATION = 0
|
32
|
+
PARAGRAPH_CLASSIFICATION = 1
|
33
|
+
SENTENCE_CLASSIFICATION = 2
|
34
|
+
TOKEN_CLASSIFICATION = 3
|
35
|
+
IMAGE_CLASSIFICATION = 4
|
36
|
+
PARAGRAPH_STREAMING = 5
|
37
|
+
QUESTION_ANSWER_STREAMING = 6
|
38
|
+
FIELD_STREAMING = 7
|
39
|
+
|
40
|
+
|
41
|
+
class TrainSet(BaseModel):
|
42
|
+
type: TrainSetType = Field(..., description="Streaming type")
|
43
|
+
filter_expression: Optional[FilterExpression] = Field(
|
44
|
+
default=None,
|
45
|
+
title="Filter resource by an expression",
|
46
|
+
description=(
|
47
|
+
"Returns only documents that match this filter expression. "
|
48
|
+
"Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters. "
|
49
|
+
"It is only supported on FIELD_STREAMING types."
|
50
|
+
),
|
51
|
+
)
|
52
|
+
batch_size: int = Field(
|
53
|
+
default=5,
|
54
|
+
description="Batch size of the resulting arrow file. This affects how many rows are read simultaneously while parsing the resulting arrow file.",
|
55
|
+
)
|
56
|
+
exclude_text: bool = Field(
|
57
|
+
default=False,
|
58
|
+
description="Set to True if the extracted text is not needed for the stream and it will not be added. This is useful to reduce the amount of data streamed.",
|
59
|
+
)
|
60
|
+
|
61
|
+
@model_validator(mode="after")
|
62
|
+
def validate_filter_expressions_supported_on_stream(self):
|
63
|
+
if self.filter_expression is not None and self.type != TrainSetType.FIELD_STREAMING:
|
64
|
+
raise ValueError(f"{self.type.name} does not support `filter_expression` parameter yet.")
|
65
|
+
return self
|
{nucliadb_models-6.4.2.post4379.dist-info → nucliadb_models-6.4.2.post4403.dist-info}/RECORD
RENAMED
@@ -1,5 +1,5 @@
|
|
1
1
|
nucliadb_models/__init__.py,sha256=3y8-htogKuCZcbhaUZdSjTeEjUSeec9aRWyL8AlKCyM,1077
|
2
|
-
nucliadb_models/common.py,sha256=
|
2
|
+
nucliadb_models/common.py,sha256=E5rYsahuRKgUX3ZASIFHjR6LPBfezh453JbfJ6hbckI,7885
|
3
3
|
nucliadb_models/configuration.py,sha256=V1d4hhs_1r-_ik7uBpqBHYrpzpMZYI4QeieWpHc8iOM,2508
|
4
4
|
nucliadb_models/content_types.py,sha256=eMlBhWwzfYJFlErcWsNCvBdypbv8J9eC-MXw727QiBE,3430
|
5
5
|
nucliadb_models/conversation.py,sha256=bgePP_aLvvnLV9VK4x90Aazlkdc4ajJKFVDYEa9sOJY,3383
|
@@ -20,7 +20,7 @@ nucliadb_models/search.py,sha256=Cg7W4Ng7G5UgMOAD-pkSz6fEcaiSTBdju6q8ApUNgag,853
|
|
20
20
|
nucliadb_models/security.py,sha256=opxaDLfvk3aU0sjesK0jGrYLx5h4YCwlKKN0moYs_ig,1150
|
21
21
|
nucliadb_models/synonyms.py,sha256=afbaVqSQSxGLwi2PusVaLSRpkOtA5AZmWOKd1f4nl2E,690
|
22
22
|
nucliadb_models/text.py,sha256=kY2ub7AaGm-4vNaLX3Ju2VvRw-eKZ2LRdM9z7XCNaG0,2898
|
23
|
-
nucliadb_models/trainset.py,sha256=
|
23
|
+
nucliadb_models/trainset.py,sha256=BgUfgdClpwhk6UoOq5x6mbpOopgSmqg8he2bBzEzGqg,2406
|
24
24
|
nucliadb_models/utils.py,sha256=OnWaDwZGwja8Spd_gpryuUpAMGIMhh-DNDGpoUYyb-A,2460
|
25
25
|
nucliadb_models/vectors.py,sha256=_Z157PojPIwoeF5LStO0gz8IwxKy2styHjhdBkLd_44,1329
|
26
26
|
nucliadb_models/vectorsets.py,sha256=XAgg9DfdfLYpfLh9OepJ_KPH0_RqRQNpVZJr74UnNh0,788
|
@@ -32,7 +32,7 @@ nucliadb_models/graph/responses.py,sha256=Sdq8OgFAL1YT-1lJyLLrkqcScvj7YTEqAUwQ-k
|
|
32
32
|
nucliadb_models/internal/__init__.py,sha256=zG33bUz1rHFPtvqQPWn4rDwBJt3FJodGuQYD45quiQg,583
|
33
33
|
nucliadb_models/internal/predict.py,sha256=Pnx6MmLfK65eExe1XnVxqmSlvMwdowewwks9BOEoqMw,2029
|
34
34
|
nucliadb_models/internal/shards.py,sha256=__y1OZtWGiNcPQEWfSFOj8yw458WGi7mM4vZe0K-L1Y,1691
|
35
|
-
nucliadb_models-6.4.2.
|
36
|
-
nucliadb_models-6.4.2.
|
37
|
-
nucliadb_models-6.4.2.
|
38
|
-
nucliadb_models-6.4.2.
|
35
|
+
nucliadb_models-6.4.2.post4403.dist-info/METADATA,sha256=knxWOrma5Q5JdKoHAEy2dcV0THXPionQQriNzcKl8oQ,776
|
36
|
+
nucliadb_models-6.4.2.post4403.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
37
|
+
nucliadb_models-6.4.2.post4403.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
|
38
|
+
nucliadb_models-6.4.2.post4403.dist-info/RECORD,,
|
File without changes
|
{nucliadb_models-6.4.2.post4379.dist-info → nucliadb_models-6.4.2.post4403.dist-info}/top_level.txt
RENAMED
File without changes
|