nucliadb-models 6.4.2.post4389__py3-none-any.whl → 6.5.0.post4404__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nucliadb_models/common.py CHANGED
@@ -193,6 +193,16 @@ class FieldTypeName(str, Enum):
193
193
  CONVERSATION = "conversation"
194
194
  GENERIC = "generic"
195
195
 
196
+ @classmethod
197
+ def from_abbreviation(cls, abbr: str) -> "FieldTypeName":
198
+ return {
199
+ "t": FieldTypeName.TEXT,
200
+ "f": FieldTypeName.FILE,
201
+ "u": FieldTypeName.LINK,
202
+ "c": FieldTypeName.CONVERSATION,
203
+ "a": FieldTypeName.GENERIC,
204
+ }[abbr]
205
+
196
206
 
197
207
  class FieldRef(BaseModel):
198
208
  field_type: FieldTypeName
@@ -14,8 +14,52 @@
14
14
  #
15
15
 
16
16
 
17
- from pydantic import BaseModel
17
+ from enum import Enum
18
+ from typing import Optional
19
+
20
+ from pydantic import BaseModel, Field, model_validator
21
+
22
+ from nucliadb_models.filters import FilterExpression
18
23
 
19
24
 
20
25
  class TrainSetPartitions(BaseModel):
21
26
  partitions: list[str]
27
+
28
+
29
+ class TrainSetType(int, Enum):
30
+ # NOTE: matches the TaskType in nucliadb_protos.dataset.proto
31
+ FIELD_CLASSIFICATION = 0
32
+ PARAGRAPH_CLASSIFICATION = 1
33
+ SENTENCE_CLASSIFICATION = 2
34
+ TOKEN_CLASSIFICATION = 3
35
+ IMAGE_CLASSIFICATION = 4
36
+ PARAGRAPH_STREAMING = 5
37
+ QUESTION_ANSWER_STREAMING = 6
38
+ FIELD_STREAMING = 7
39
+
40
+
41
+ class TrainSet(BaseModel):
42
+ type: TrainSetType = Field(..., description="Streaming type")
43
+ filter_expression: Optional[FilterExpression] = Field(
44
+ default=None,
45
+ title="Filter resource by an expression",
46
+ description=(
47
+ "Returns only documents that match this filter expression. "
48
+ "Filtering examples can be found here: https://docs.nuclia.dev/docs/rag/advanced/search-filters. "
49
+ "It is only supported on FIELD_STREAMING types."
50
+ ),
51
+ )
52
+ batch_size: int = Field(
53
+ default=5,
54
+ description="Batch size of the resulting arrow file. This affects how many rows are read simultaneously while parsing the resulting arrow file.",
55
+ )
56
+ exclude_text: bool = Field(
57
+ default=False,
58
+ description="Set to True if the extracted text is not needed for the stream and it will not be added. This is useful to reduce the amount of data streamed.",
59
+ )
60
+
61
+ @model_validator(mode="after")
62
+ def validate_filter_expressions_supported_on_stream(self):
63
+ if self.filter_expression is not None and self.type != TrainSetType.FIELD_STREAMING:
64
+ raise ValueError(f"{self.type.name} does not support `filter_expression` parameter yet.")
65
+ return self
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb_models
3
- Version: 6.4.2.post4389
3
+ Version: 6.5.0.post4404
4
4
  Author-email: Nuclia <nucliadb@nuclia.com>
5
5
  License-Expression: Apache-2.0
6
6
  Project-URL: Homepage, https://nuclia.com
@@ -1,5 +1,5 @@
1
1
  nucliadb_models/__init__.py,sha256=3y8-htogKuCZcbhaUZdSjTeEjUSeec9aRWyL8AlKCyM,1077
2
- nucliadb_models/common.py,sha256=baBX72ngYjbCApmJp2flFEZ0VBBSv1lkeuz9zeTb8oE,7576
2
+ nucliadb_models/common.py,sha256=E5rYsahuRKgUX3ZASIFHjR6LPBfezh453JbfJ6hbckI,7885
3
3
  nucliadb_models/configuration.py,sha256=V1d4hhs_1r-_ik7uBpqBHYrpzpMZYI4QeieWpHc8iOM,2508
4
4
  nucliadb_models/content_types.py,sha256=eMlBhWwzfYJFlErcWsNCvBdypbv8J9eC-MXw727QiBE,3430
5
5
  nucliadb_models/conversation.py,sha256=bgePP_aLvvnLV9VK4x90Aazlkdc4ajJKFVDYEa9sOJY,3383
@@ -20,7 +20,7 @@ nucliadb_models/search.py,sha256=Cg7W4Ng7G5UgMOAD-pkSz6fEcaiSTBdju6q8ApUNgag,853
20
20
  nucliadb_models/security.py,sha256=opxaDLfvk3aU0sjesK0jGrYLx5h4YCwlKKN0moYs_ig,1150
21
21
  nucliadb_models/synonyms.py,sha256=afbaVqSQSxGLwi2PusVaLSRpkOtA5AZmWOKd1f4nl2E,690
22
22
  nucliadb_models/text.py,sha256=kY2ub7AaGm-4vNaLX3Ju2VvRw-eKZ2LRdM9z7XCNaG0,2898
23
- nucliadb_models/trainset.py,sha256=KSFh81353jvg8Yxfp6bgaZSFB_MlN42A6RTlR2eUyX8,681
23
+ nucliadb_models/trainset.py,sha256=BgUfgdClpwhk6UoOq5x6mbpOopgSmqg8he2bBzEzGqg,2406
24
24
  nucliadb_models/utils.py,sha256=OnWaDwZGwja8Spd_gpryuUpAMGIMhh-DNDGpoUYyb-A,2460
25
25
  nucliadb_models/vectors.py,sha256=_Z157PojPIwoeF5LStO0gz8IwxKy2styHjhdBkLd_44,1329
26
26
  nucliadb_models/vectorsets.py,sha256=XAgg9DfdfLYpfLh9OepJ_KPH0_RqRQNpVZJr74UnNh0,788
@@ -32,7 +32,7 @@ nucliadb_models/graph/responses.py,sha256=Sdq8OgFAL1YT-1lJyLLrkqcScvj7YTEqAUwQ-k
32
32
  nucliadb_models/internal/__init__.py,sha256=zG33bUz1rHFPtvqQPWn4rDwBJt3FJodGuQYD45quiQg,583
33
33
  nucliadb_models/internal/predict.py,sha256=Pnx6MmLfK65eExe1XnVxqmSlvMwdowewwks9BOEoqMw,2029
34
34
  nucliadb_models/internal/shards.py,sha256=__y1OZtWGiNcPQEWfSFOj8yw458WGi7mM4vZe0K-L1Y,1691
35
- nucliadb_models-6.4.2.post4389.dist-info/METADATA,sha256=j7oD7NUY20MwkoREt79u3J1GNPdrUbPAbhzSo82DApY,776
36
- nucliadb_models-6.4.2.post4389.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
- nucliadb_models-6.4.2.post4389.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
38
- nucliadb_models-6.4.2.post4389.dist-info/RECORD,,
35
+ nucliadb_models-6.5.0.post4404.dist-info/METADATA,sha256=TNc6ywS-RXIapZG3UPtqRSoYuNQPT73lu77BzwcDeUU,776
36
+ nucliadb_models-6.5.0.post4404.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
37
+ nucliadb_models-6.5.0.post4404.dist-info/top_level.txt,sha256=UrY1I8oeovIRwkXLYplssTrxQdUjhSEFDFbnwaIV3tA,16
38
+ nucliadb_models-6.5.0.post4404.dist-info/RECORD,,