rapidata 2.7.1__py3-none-any.whl → 2.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/api_client/__init__.py +1 -0
- rapidata/api_client/api/dataset_api.py +31 -615
- rapidata/api_client/api/rapid_api.py +252 -0
- rapidata/api_client/api/workflow_api.py +280 -1
- rapidata/api_client/models/__init__.py +1 -0
- rapidata/api_client/models/add_campaign_model.py +1 -1
- rapidata/api_client/models/add_validation_rapid_model.py +2 -2
- rapidata/api_client/models/add_validation_text_rapid_model.py +2 -2
- rapidata/api_client/models/clients_query_result.py +3 -3
- rapidata/api_client/models/compare_workflow_model1.py +2 -2
- rapidata/api_client/models/coordinate.py +2 -2
- rapidata/api_client/models/datapoint.py +9 -2
- rapidata/api_client/models/datapoint_metadata_model.py +11 -4
- rapidata/api_client/models/get_compare_ab_summary_result.py +87 -0
- rapidata/api_client/models/order_model.py +1 -1
- rapidata/api_client/models/preliminary_download_model.py +2 -2
- rapidata/api_client/models/query_validation_rapids_result.py +1 -1
- rapidata/api_client/models/read_bridge_token_keys_result.py +6 -6
- rapidata/api_client/models/report_model.py +1 -1
- rapidata/api_client/models/simple_workflow_model1.py +2 -2
- rapidata/api_client/models/update_campaign_model.py +2 -2
- rapidata/api_client/models/update_validation_rapid_model.py +2 -2
- rapidata/api_client/models/upload_files_from_s3_bucket_model.py +3 -3
- rapidata/api_client/models/upload_text_sources_to_dataset_model.py +11 -4
- rapidata/api_client_README.md +3 -2
- rapidata/rapidata_client/assets/_media_asset.py +42 -28
- rapidata/rapidata_client/order/_rapidata_dataset.py +9 -7
- rapidata/rapidata_client/order/rapidata_order.py +9 -0
- rapidata/rapidata_client/order/rapidata_order_manager.py +3 -1
- rapidata/rapidata_client/validation/validation_set_manager.py +2 -1
- {rapidata-2.7.1.dist-info → rapidata-2.8.0.dist-info}/METADATA +1 -1
- {rapidata-2.7.1.dist-info → rapidata-2.8.0.dist-info}/RECORD +34 -33
- {rapidata-2.7.1.dist-info → rapidata-2.8.0.dist-info}/LICENSE +0 -0
- {rapidata-2.7.1.dist-info → rapidata-2.8.0.dist-info}/WHEEL +0 -0
|
@@ -18,7 +18,7 @@ import re # noqa: F401
|
|
|
18
18
|
import json
|
|
19
19
|
|
|
20
20
|
from datetime import datetime
|
|
21
|
-
from pydantic import BaseModel, ConfigDict, Field, StrictStr, field_validator
|
|
21
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr, field_validator
|
|
22
22
|
from typing import Any, ClassVar, Dict, List, Optional
|
|
23
23
|
from rapidata.api_client.models.datapoint_asset import DatapointAsset
|
|
24
24
|
from typing import Optional, Set
|
|
@@ -29,13 +29,14 @@ class Datapoint(BaseModel):
|
|
|
29
29
|
Datapoint
|
|
30
30
|
""" # noqa: E501
|
|
31
31
|
t: StrictStr = Field(description="Discriminator value for Datapoint", alias="_t")
|
|
32
|
+
sort_index: Optional[StrictInt] = Field(default=None, alias="sortIndex")
|
|
32
33
|
asset: DatapointAsset
|
|
33
34
|
dataset_id: StrictStr = Field(alias="datasetId")
|
|
34
35
|
id: Optional[StrictStr] = None
|
|
35
36
|
deletion_date: Optional[datetime] = Field(default=None, alias="deletionDate")
|
|
36
37
|
deleter_id: Optional[StrictStr] = Field(default=None, alias="deleterId")
|
|
37
38
|
created_at: Optional[datetime] = Field(default=None, alias="createdAt")
|
|
38
|
-
__properties: ClassVar[List[str]] = ["_t", "asset", "datasetId", "id", "deletionDate", "deleterId", "createdAt"]
|
|
39
|
+
__properties: ClassVar[List[str]] = ["_t", "sortIndex", "asset", "datasetId", "id", "deletionDate", "deleterId", "createdAt"]
|
|
39
40
|
|
|
40
41
|
@field_validator('t')
|
|
41
42
|
def t_validate_enum(cls, value):
|
|
@@ -86,6 +87,11 @@ class Datapoint(BaseModel):
|
|
|
86
87
|
# override the default output from pydantic by calling `to_dict()` of asset
|
|
87
88
|
if self.asset:
|
|
88
89
|
_dict['asset'] = self.asset.to_dict()
|
|
90
|
+
# set to None if sort_index (nullable) is None
|
|
91
|
+
# and model_fields_set contains the field
|
|
92
|
+
if self.sort_index is None and "sort_index" in self.model_fields_set:
|
|
93
|
+
_dict['sortIndex'] = None
|
|
94
|
+
|
|
89
95
|
# set to None if deletion_date (nullable) is None
|
|
90
96
|
# and model_fields_set contains the field
|
|
91
97
|
if self.deletion_date is None and "deletion_date" in self.model_fields_set:
|
|
@@ -109,6 +115,7 @@ class Datapoint(BaseModel):
|
|
|
109
115
|
|
|
110
116
|
_obj = cls.model_validate({
|
|
111
117
|
"_t": obj.get("_t") if obj.get("_t") is not None else 'Datapoint',
|
|
118
|
+
"sortIndex": obj.get("sortIndex"),
|
|
112
119
|
"asset": DatapointAsset.from_dict(obj["asset"]) if obj.get("asset") is not None else None,
|
|
113
120
|
"datasetId": obj.get("datasetId"),
|
|
114
121
|
"id": obj.get("id"),
|
|
@@ -17,8 +17,8 @@ import pprint
|
|
|
17
17
|
import re # noqa: F401
|
|
18
18
|
import json
|
|
19
19
|
|
|
20
|
-
from pydantic import BaseModel, ConfigDict, Field, StrictStr
|
|
21
|
-
from typing import Any, ClassVar, Dict, List
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
22
|
from rapidata.api_client.models.datapoint_metadata_model_metadata_inner import DatapointMetadataModelMetadataInner
|
|
23
23
|
from typing import Optional, Set
|
|
24
24
|
from typing_extensions import Self
|
|
@@ -29,7 +29,8 @@ class DatapointMetadataModel(BaseModel):
|
|
|
29
29
|
""" # noqa: E501
|
|
30
30
|
dataset_id: StrictStr = Field(description="The id of the dataset to create the datapoint in.", alias="datasetId")
|
|
31
31
|
metadata: List[DatapointMetadataModelMetadataInner] = Field(description="The metadata of the datapoint.")
|
|
32
|
-
|
|
32
|
+
sort_index: Optional[StrictInt] = Field(default=None, description="The index will be used to keep the datapoints in order. Useful if upload is parallelized", alias="sortIndex")
|
|
33
|
+
__properties: ClassVar[List[str]] = ["datasetId", "metadata", "sortIndex"]
|
|
33
34
|
|
|
34
35
|
model_config = ConfigDict(
|
|
35
36
|
populate_by_name=True,
|
|
@@ -77,6 +78,11 @@ class DatapointMetadataModel(BaseModel):
|
|
|
77
78
|
if _item_metadata:
|
|
78
79
|
_items.append(_item_metadata.to_dict())
|
|
79
80
|
_dict['metadata'] = _items
|
|
81
|
+
# set to None if sort_index (nullable) is None
|
|
82
|
+
# and model_fields_set contains the field
|
|
83
|
+
if self.sort_index is None and "sort_index" in self.model_fields_set:
|
|
84
|
+
_dict['sortIndex'] = None
|
|
85
|
+
|
|
80
86
|
return _dict
|
|
81
87
|
|
|
82
88
|
@classmethod
|
|
@@ -90,7 +96,8 @@ class DatapointMetadataModel(BaseModel):
|
|
|
90
96
|
|
|
91
97
|
_obj = cls.model_validate({
|
|
92
98
|
"datasetId": obj.get("datasetId"),
|
|
93
|
-
"metadata": [DatapointMetadataModelMetadataInner.from_dict(_item) for _item in obj["metadata"]] if obj.get("metadata") is not None else None
|
|
99
|
+
"metadata": [DatapointMetadataModelMetadataInner.from_dict(_item) for _item in obj["metadata"]] if obj.get("metadata") is not None else None,
|
|
100
|
+
"sortIndex": obj.get("sortIndex")
|
|
94
101
|
})
|
|
95
102
|
return _obj
|
|
96
103
|
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Rapidata.Dataset
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: v1
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictFloat, StrictInt
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Union
|
|
22
|
+
from typing import Optional, Set
|
|
23
|
+
from typing_extensions import Self
|
|
24
|
+
|
|
25
|
+
class GetCompareAbSummaryResult(BaseModel):
|
|
26
|
+
"""
|
|
27
|
+
GetCompareAbSummaryResult
|
|
28
|
+
""" # noqa: E501
|
|
29
|
+
winner_counts: Dict[str, Union[StrictFloat, StrictInt]] = Field(alias="winnerCounts")
|
|
30
|
+
__properties: ClassVar[List[str]] = ["winnerCounts"]
|
|
31
|
+
|
|
32
|
+
model_config = ConfigDict(
|
|
33
|
+
populate_by_name=True,
|
|
34
|
+
validate_assignment=True,
|
|
35
|
+
protected_namespaces=(),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def to_str(self) -> str:
|
|
40
|
+
"""Returns the string representation of the model using alias"""
|
|
41
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
42
|
+
|
|
43
|
+
def to_json(self) -> str:
|
|
44
|
+
"""Returns the JSON representation of the model using alias"""
|
|
45
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
46
|
+
return json.dumps(self.to_dict())
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
50
|
+
"""Create an instance of GetCompareAbSummaryResult from a JSON string"""
|
|
51
|
+
return cls.from_dict(json.loads(json_str))
|
|
52
|
+
|
|
53
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
54
|
+
"""Return the dictionary representation of the model using alias.
|
|
55
|
+
|
|
56
|
+
This has the following differences from calling pydantic's
|
|
57
|
+
`self.model_dump(by_alias=True)`:
|
|
58
|
+
|
|
59
|
+
* `None` is only added to the output dict for nullable fields that
|
|
60
|
+
were set at model initialization. Other fields with value `None`
|
|
61
|
+
are ignored.
|
|
62
|
+
"""
|
|
63
|
+
excluded_fields: Set[str] = set([
|
|
64
|
+
])
|
|
65
|
+
|
|
66
|
+
_dict = self.model_dump(
|
|
67
|
+
by_alias=True,
|
|
68
|
+
exclude=excluded_fields,
|
|
69
|
+
exclude_none=True,
|
|
70
|
+
)
|
|
71
|
+
return _dict
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
75
|
+
"""Create an instance of GetCompareAbSummaryResult from a dict"""
|
|
76
|
+
if obj is None:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
if not isinstance(obj, dict):
|
|
80
|
+
return cls.model_validate(obj)
|
|
81
|
+
|
|
82
|
+
_obj = cls.model_validate({
|
|
83
|
+
"winnerCounts": obj.get("winnerCounts")
|
|
84
|
+
})
|
|
85
|
+
return _obj
|
|
86
|
+
|
|
87
|
+
|
|
@@ -29,7 +29,7 @@ class OrderModel(BaseModel):
|
|
|
29
29
|
""" # noqa: E501
|
|
30
30
|
id: StrictStr
|
|
31
31
|
pipeline_id: StrictStr = Field(alias="pipelineId")
|
|
32
|
-
order_date: Optional[datetime] = Field(alias="orderDate")
|
|
32
|
+
order_date: Optional[datetime] = Field(default=None, alias="orderDate")
|
|
33
33
|
customer_mail: StrictStr = Field(alias="customerMail")
|
|
34
34
|
state: StrictStr
|
|
35
35
|
order_name: StrictStr = Field(alias="orderName")
|
|
@@ -24,9 +24,9 @@ from typing_extensions import Self
|
|
|
24
24
|
|
|
25
25
|
class PreliminaryDownloadModel(BaseModel):
|
|
26
26
|
"""
|
|
27
|
-
|
|
27
|
+
The model for creating a preliminary download.
|
|
28
28
|
""" # noqa: E501
|
|
29
|
-
send_email: Optional[StrictBool] = Field(default=True, alias="sendEmail")
|
|
29
|
+
send_email: Optional[StrictBool] = Field(default=True, description="Whether to email the user when the download is ready.", alias="sendEmail")
|
|
30
30
|
__properties: ClassVar[List[str]] = ["sendEmail"]
|
|
31
31
|
|
|
32
32
|
model_config = ConfigDict(
|
|
@@ -32,7 +32,7 @@ class QueryValidationRapidsResult(BaseModel):
|
|
|
32
32
|
""" # noqa: E501
|
|
33
33
|
id: StrictStr
|
|
34
34
|
type: StrictStr
|
|
35
|
-
asset: Optional[QueryValidationRapidsResultAsset]
|
|
35
|
+
asset: Optional[QueryValidationRapidsResultAsset] = None
|
|
36
36
|
truth: Optional[QueryValidationRapidsResultTruth] = None
|
|
37
37
|
payload: QueryValidationRapidsResultPayload
|
|
38
38
|
metadata: List[FileAssetModel1MetadataInner]
|
|
@@ -27,12 +27,12 @@ class ReadBridgeTokenKeysResult(BaseModel):
|
|
|
27
27
|
ReadBridgeTokenKeysResult
|
|
28
28
|
""" # noqa: E501
|
|
29
29
|
t: StrictStr = Field(description="Discriminator value for ReadBridgeTokenKeysResult", alias="_t")
|
|
30
|
-
access_token: Optional[StrictStr] = Field(alias="accessToken")
|
|
31
|
-
expires_in: Optional[StrictInt] = Field(alias="expiresIn")
|
|
32
|
-
refresh_token: Optional[StrictStr] = Field(alias="refreshToken")
|
|
33
|
-
id_token: Optional[StrictStr] = Field(alias="idToken")
|
|
34
|
-
token_type: Optional[StrictStr] = Field(alias="tokenType")
|
|
35
|
-
scope: Optional[StrictStr]
|
|
30
|
+
access_token: Optional[StrictStr] = Field(default=None, alias="accessToken")
|
|
31
|
+
expires_in: Optional[StrictInt] = Field(default=None, alias="expiresIn")
|
|
32
|
+
refresh_token: Optional[StrictStr] = Field(default=None, alias="refreshToken")
|
|
33
|
+
id_token: Optional[StrictStr] = Field(default=None, alias="idToken")
|
|
34
|
+
token_type: Optional[StrictStr] = Field(default=None, alias="tokenType")
|
|
35
|
+
scope: Optional[StrictStr] = None
|
|
36
36
|
__properties: ClassVar[List[str]] = ["_t", "accessToken", "expiresIn", "refreshToken", "idToken", "tokenType", "scope"]
|
|
37
37
|
|
|
38
38
|
@field_validator('t')
|
|
@@ -28,7 +28,7 @@ class ReportModel(BaseModel):
|
|
|
28
28
|
""" # noqa: E501
|
|
29
29
|
rapid_id: StrictStr = Field(alias="rapidId")
|
|
30
30
|
issue: StrictStr
|
|
31
|
-
message: Optional[StrictStr]
|
|
31
|
+
message: Optional[StrictStr] = None
|
|
32
32
|
__properties: ClassVar[List[str]] = ["rapidId", "issue", "message"]
|
|
33
33
|
|
|
34
34
|
@field_validator('issue')
|
|
@@ -30,12 +30,12 @@ class SimpleWorkflowModel1(BaseModel):
|
|
|
30
30
|
""" # noqa: E501
|
|
31
31
|
t: StrictStr = Field(description="Discriminator value for SimpleWorkflowModel", alias="_t")
|
|
32
32
|
id: StrictStr
|
|
33
|
-
dataset_id: Optional[StrictStr] = Field(alias="datasetId")
|
|
33
|
+
dataset_id: Optional[StrictStr] = Field(default=None, alias="datasetId")
|
|
34
34
|
state: StrictStr
|
|
35
35
|
blueprint: ValidationImportPostRequestBlueprint
|
|
36
36
|
referee: CompareWorkflowModel1Referee
|
|
37
37
|
name: StrictStr
|
|
38
|
-
owner_mail: Optional[StrictStr] = Field(alias="ownerMail")
|
|
38
|
+
owner_mail: Optional[StrictStr] = Field(default=None, alias="ownerMail")
|
|
39
39
|
__properties: ClassVar[List[str]] = ["_t", "id", "datasetId", "state", "blueprint", "referee", "name", "ownerMail"]
|
|
40
40
|
|
|
41
41
|
@field_validator('t')
|
|
@@ -26,8 +26,8 @@ class UpdateCampaignModel(BaseModel):
|
|
|
26
26
|
"""
|
|
27
27
|
The model containing the new configuration for a campaign.
|
|
28
28
|
""" # noqa: E501
|
|
29
|
-
priority: Optional[StrictInt] = Field(description="A value above 0 indicating how much the campaign should be prioritized. The higher the value the more weight it will be given during campaign selection.")
|
|
30
|
-
feature_flags: Optional[Dict[str, StrictStr]] = Field(description="The feature flags to assign this campaign.", alias="featureFlags")
|
|
29
|
+
priority: Optional[StrictInt] = Field(default=None, description="A value above 0 indicating how much the campaign should be prioritized. The higher the value the more weight it will be given during campaign selection.")
|
|
30
|
+
feature_flags: Optional[Dict[str, StrictStr]] = Field(default=None, description="The feature flags to assign this campaign.", alias="featureFlags")
|
|
31
31
|
__properties: ClassVar[List[str]] = ["priority", "featureFlags"]
|
|
32
32
|
|
|
33
33
|
model_config = ConfigDict(
|
|
@@ -28,8 +28,8 @@ class UpdateValidationRapidModel(BaseModel):
|
|
|
28
28
|
The model for updating a validation rapid.
|
|
29
29
|
""" # noqa: E501
|
|
30
30
|
truth: UpdateValidationRapidModelTruth
|
|
31
|
-
explanation: Optional[StrictStr]
|
|
32
|
-
prompt: Optional[StrictStr]
|
|
31
|
+
explanation: Optional[StrictStr] = None
|
|
32
|
+
prompt: Optional[StrictStr] = None
|
|
33
33
|
__properties: ClassVar[List[str]] = ["truth", "explanation", "prompt"]
|
|
34
34
|
|
|
35
35
|
model_config = ConfigDict(
|
|
@@ -28,10 +28,10 @@ class UploadFilesFromS3BucketModel(BaseModel):
|
|
|
28
28
|
""" # noqa: E501
|
|
29
29
|
dataset_id: StrictStr = Field(description="The id of the dataset to upload the files to.", alias="datasetId")
|
|
30
30
|
bucket_name: StrictStr = Field(description="The name of the S3 bucket to upload the files from.", alias="bucketName")
|
|
31
|
-
region: Optional[StrictStr] = Field(description="The region of the S3 bucket.")
|
|
31
|
+
region: Optional[StrictStr] = Field(default=None, description="The region of the S3 bucket.")
|
|
32
32
|
source_prefix: StrictStr = Field(description="The prefix of the files to upload.", alias="sourcePrefix")
|
|
33
|
-
access_key: Optional[StrictStr] = Field(description="The access key to use for the S3 bucket.", alias="accessKey")
|
|
34
|
-
secret_key: Optional[StrictStr] = Field(description="The secret key to use for the S3 bucket.", alias="secretKey")
|
|
33
|
+
access_key: Optional[StrictStr] = Field(default=None, description="The access key to use for the S3 bucket.", alias="accessKey")
|
|
34
|
+
secret_key: Optional[StrictStr] = Field(default=None, description="The secret key to use for the S3 bucket.", alias="secretKey")
|
|
35
35
|
use_custom_aws_credentials: StrictBool = Field(description="Whether to use custom AWS credentials.", alias="useCustomAwsCredentials")
|
|
36
36
|
clear_dataset: StrictBool = Field(description="Whether to clear the dataset before uploading the files.", alias="clearDataset")
|
|
37
37
|
__properties: ClassVar[List[str]] = ["datasetId", "bucketName", "region", "sourcePrefix", "accessKey", "secretKey", "useCustomAwsCredentials", "clearDataset"]
|
|
@@ -17,8 +17,8 @@ import pprint
|
|
|
17
17
|
import re # noqa: F401
|
|
18
18
|
import json
|
|
19
19
|
|
|
20
|
-
from pydantic import BaseModel, ConfigDict, Field, StrictStr
|
|
21
|
-
from typing import Any, ClassVar, Dict, List
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
22
22
|
from typing import Optional, Set
|
|
23
23
|
from typing_extensions import Self
|
|
24
24
|
|
|
@@ -28,7 +28,8 @@ class UploadTextSourcesToDatasetModel(BaseModel):
|
|
|
28
28
|
""" # noqa: E501
|
|
29
29
|
dataset_id: StrictStr = Field(description="The id of the dataset to upload the text sources to.", alias="datasetId")
|
|
30
30
|
text_sources: List[StrictStr] = Field(description="The text sources to upload.", alias="textSources")
|
|
31
|
-
|
|
31
|
+
sort_index: Optional[StrictInt] = Field(default=None, description="The index will be used to keep the datapoints in order. Useful if upload is parallelized", alias="sortIndex")
|
|
32
|
+
__properties: ClassVar[List[str]] = ["datasetId", "textSources", "sortIndex"]
|
|
32
33
|
|
|
33
34
|
model_config = ConfigDict(
|
|
34
35
|
populate_by_name=True,
|
|
@@ -69,6 +70,11 @@ class UploadTextSourcesToDatasetModel(BaseModel):
|
|
|
69
70
|
exclude=excluded_fields,
|
|
70
71
|
exclude_none=True,
|
|
71
72
|
)
|
|
73
|
+
# set to None if sort_index (nullable) is None
|
|
74
|
+
# and model_fields_set contains the field
|
|
75
|
+
if self.sort_index is None and "sort_index" in self.model_fields_set:
|
|
76
|
+
_dict['sortIndex'] = None
|
|
77
|
+
|
|
72
78
|
return _dict
|
|
73
79
|
|
|
74
80
|
@classmethod
|
|
@@ -82,7 +88,8 @@ class UploadTextSourcesToDatasetModel(BaseModel):
|
|
|
82
88
|
|
|
83
89
|
_obj = cls.model_validate({
|
|
84
90
|
"datasetId": obj.get("datasetId"),
|
|
85
|
-
"textSources": obj.get("textSources")
|
|
91
|
+
"textSources": obj.get("textSources"),
|
|
92
|
+
"sortIndex": obj.get("sortIndex")
|
|
86
93
|
})
|
|
87
94
|
return _obj
|
|
88
95
|
|
rapidata/api_client_README.md
CHANGED
|
@@ -90,9 +90,7 @@ Class | Method | HTTP request | Description
|
|
|
90
90
|
*DatasetApi* | [**dataset_get_by_id_get**](rapidata/api_client/docs/DatasetApi.md#dataset_get_by_id_get) | **GET** /Dataset/GetById | Gets a dataset by its id.
|
|
91
91
|
*DatasetApi* | [**dataset_import_post**](rapidata/api_client/docs/DatasetApi.md#dataset_import_post) | **POST** /Dataset/Import | Imports datapoints from a csv file.
|
|
92
92
|
*DatasetApi* | [**dataset_update_name_post**](rapidata/api_client/docs/DatasetApi.md#dataset_update_name_post) | **POST** /Dataset/UpdateName | Updates the name of a dataset.
|
|
93
|
-
*DatasetApi* | [**dataset_upload_datapoint_post**](rapidata/api_client/docs/DatasetApi.md#dataset_upload_datapoint_post) | **POST** /Dataset/UploadDatapoint | Creates a new multi asset datapoint.
|
|
94
93
|
*DatasetApi* | [**dataset_upload_files_from_s3_post**](rapidata/api_client/docs/DatasetApi.md#dataset_upload_files_from_s3_post) | **POST** /Dataset/UploadFilesFromS3 | Uploads files from an S3 bucket to a dataset.
|
|
95
|
-
*DatasetApi* | [**dataset_upload_images_to_dataset_post**](rapidata/api_client/docs/DatasetApi.md#dataset_upload_images_to_dataset_post) | **POST** /Dataset/UploadImagesToDataset | Uploads images to a dataset.
|
|
96
94
|
*IdentityApi* | [**identity_create_bridge_token_post**](rapidata/api_client/docs/IdentityApi.md#identity_create_bridge_token_post) | **POST** /Identity/CreateBridgeToken | Creates a pair of read and write keys for a client. The write key is used to store the authentication result. The read key is used to retrieve the authentication result.
|
|
97
95
|
*IdentityApi* | [**identity_read_bridge_token_get**](rapidata/api_client/docs/IdentityApi.md#identity_read_bridge_token_get) | **GET** /Identity/ReadBridgeToken | Tries to read the bridge token keys for a given read key. The read key is used to retrieve the authentication result written by the write key.
|
|
98
96
|
*IdentityApi* | [**identity_register_temporary_post**](rapidata/api_client/docs/IdentityApi.md#identity_register_temporary_post) | **POST** /Identity/RegisterTemporary | Registers and logs in a temporary customer.
|
|
@@ -129,6 +127,7 @@ Class | Method | HTTP request | Description
|
|
|
129
127
|
*RapidApi* | [**rapid_add_user_guess_post**](rapidata/api_client/docs/RapidApi.md#rapid_add_user_guess_post) | **POST** /Rapid/AddUserGuess | Submits a user guess for a Rapid.
|
|
130
128
|
*RapidApi* | [**rapid_create_demographic_rapid_post**](rapidata/api_client/docs/RapidApi.md#rapid_create_demographic_rapid_post) | **POST** /Rapid/CreateDemographicRapid | Creates a new Demographic Rapid.
|
|
131
129
|
*RapidApi* | [**rapid_query_validation_rapids_get**](rapidata/api_client/docs/RapidApi.md#rapid_query_validation_rapids_get) | **GET** /Rapid/QueryValidationRapids | Queries the validation rapids for a specific validation set.
|
|
130
|
+
*RapidApi* | [**rapid_rapid_id_delete**](rapidata/api_client/docs/RapidApi.md#rapid_rapid_id_delete) | **DELETE** /rapid/{rapidId} | Deletes a rapid.
|
|
132
131
|
*RapidApi* | [**rapid_report_post**](rapidata/api_client/docs/RapidApi.md#rapid_report_post) | **POST** /Rapid/Report | Used to report an issue with a rapid.
|
|
133
132
|
*RapidApi* | [**rapid_skip_user_guess_post**](rapidata/api_client/docs/RapidApi.md#rapid_skip_user_guess_post) | **POST** /Rapid/SkipUserGuess | Skips a Rapid for the user.
|
|
134
133
|
*RapidApi* | [**rapid_validate_current_rapid_bag_get**](rapidata/api_client/docs/RapidApi.md#rapid_validate_current_rapid_bag_get) | **GET** /Rapid/ValidateCurrentRapidBag | Validates that the rapids associated with the current user are active.
|
|
@@ -147,6 +146,7 @@ Class | Method | HTTP request | Description
|
|
|
147
146
|
*WorkflowApi* | [**workflow_delete_delete**](rapidata/api_client/docs/WorkflowApi.md#workflow_delete_delete) | **DELETE** /Workflow/Delete | Deletes a workflow.
|
|
148
147
|
*WorkflowApi* | [**workflow_get_by_id_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_get_by_id_get) | **GET** /Workflow/GetById | Get a workflow by its ID.
|
|
149
148
|
*WorkflowApi* | [**workflow_get_progress_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_get_progress_get) | **GET** /Workflow/GetProgress | Get the progress of a workflow.
|
|
149
|
+
*WorkflowApi* | [**workflow_id_compare_ab_summary_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_id_compare_ab_summary_get) | **GET** /workflow/{id}/compare-ab-summary | Calculates a summary of the results for a simple compare workflow. The summary includes the number of times an asset at each index was the winner.
|
|
150
150
|
*WorkflowApi* | [**workflow_query_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_query_get) | **GET** /Workflow/Query | Queries workflows based on the provided filter, page, and sort criteria.
|
|
151
151
|
|
|
152
152
|
|
|
@@ -262,6 +262,7 @@ Class | Method | HTTP request | Description
|
|
|
262
262
|
- [Gender](rapidata/api_client/docs/Gender.md)
|
|
263
263
|
- [GenderUserFilterModel](rapidata/api_client/docs/GenderUserFilterModel.md)
|
|
264
264
|
- [GetAvailableValidationSetsResult](rapidata/api_client/docs/GetAvailableValidationSetsResult.md)
|
|
265
|
+
- [GetCompareAbSummaryResult](rapidata/api_client/docs/GetCompareAbSummaryResult.md)
|
|
265
266
|
- [GetCompareWorkflowResultsModel](rapidata/api_client/docs/GetCompareWorkflowResultsModel.md)
|
|
266
267
|
- [GetCompareWorkflowResultsResult](rapidata/api_client/docs/GetCompareWorkflowResultsResult.md)
|
|
267
268
|
- [GetCompareWorkflowResultsResultAsset](rapidata/api_client/docs/GetCompareWorkflowResultsResultAsset.md)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
"""Media Asset Module
|
|
1
|
+
"""Media Asset Module with Lazy Loading
|
|
2
2
|
|
|
3
3
|
Defines the MediaAsset class for handling media file paths within assets.
|
|
4
|
+
Implements lazy loading for URL-based media to prevent unnecessary downloads.
|
|
4
5
|
"""
|
|
5
6
|
|
|
6
7
|
import os
|
|
@@ -12,18 +13,20 @@ from PIL import Image
|
|
|
12
13
|
from tinytag import TinyTag
|
|
13
14
|
import tempfile
|
|
14
15
|
from pydantic import StrictStr, StrictBytes
|
|
15
|
-
from typing import Optional
|
|
16
|
+
from typing import Optional, cast
|
|
16
17
|
import logging
|
|
18
|
+
from functools import cached_property
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
class MediaAsset(BaseAsset):
|
|
20
|
-
"""MediaAsset Class
|
|
22
|
+
"""MediaAsset Class with Lazy Loading
|
|
21
23
|
|
|
22
|
-
Represents a media asset by storing the file path.
|
|
24
|
+
Represents a media asset by storing the file path or URL.
|
|
25
|
+
Only downloads URL content when needed.
|
|
23
26
|
Supports local files and URLs for images, MP3, and MP4.
|
|
24
27
|
|
|
25
28
|
Args:
|
|
26
|
-
path (str): The file system path to the media asset.
|
|
29
|
+
path (str): The file system path to the media asset or URL.
|
|
27
30
|
|
|
28
31
|
Raises:
|
|
29
32
|
FileNotFoundError: If the provided file path does not exist.
|
|
@@ -67,23 +70,40 @@ class MediaAsset(BaseAsset):
|
|
|
67
70
|
|
|
68
71
|
Raises:
|
|
69
72
|
FileNotFoundError: If the provided file path does not exist.
|
|
70
|
-
ValueError: If
|
|
73
|
+
ValueError: If path is not a string.
|
|
71
74
|
"""
|
|
72
75
|
if not isinstance(path, str):
|
|
73
76
|
raise ValueError("Media must be a string, either a local file path or a URL")
|
|
74
77
|
|
|
78
|
+
self._url = None
|
|
79
|
+
self._content = None
|
|
80
|
+
|
|
75
81
|
if re.match(r'^https?://', path):
|
|
76
|
-
self.
|
|
82
|
+
self._url = path
|
|
77
83
|
self.name = path.split('/')[-1]
|
|
78
84
|
self.name = self.__check_name_ending(self.name)
|
|
85
|
+
self.path = None # Will be set when content is downloaded
|
|
79
86
|
return
|
|
80
87
|
|
|
81
88
|
if not os.path.exists(path):
|
|
82
89
|
raise FileNotFoundError(f"File not found: {path}")
|
|
83
90
|
|
|
84
|
-
self.path
|
|
91
|
+
self.path = path
|
|
85
92
|
self.name = path
|
|
86
93
|
|
|
94
|
+
@cached_property
|
|
95
|
+
def content(self) -> bytes:
|
|
96
|
+
"""
|
|
97
|
+
Lazy loader for URL content. Only downloads when first accessed.
|
|
98
|
+
Uses cached_property to store the result after first download.
|
|
99
|
+
"""
|
|
100
|
+
if self._url is None:
|
|
101
|
+
self.path = cast(str, self.path)
|
|
102
|
+
with open(self.path, 'rb') as f:
|
|
103
|
+
return f.read()
|
|
104
|
+
|
|
105
|
+
return self.__get_media_bytes(self._url)
|
|
106
|
+
|
|
87
107
|
def get_duration(self) -> int:
|
|
88
108
|
"""
|
|
89
109
|
Get the duration of audio/video files in milliseconds.
|
|
@@ -97,27 +117,22 @@ class MediaAsset(BaseAsset):
|
|
|
97
117
|
"""
|
|
98
118
|
path_to_check = self.name.lower()
|
|
99
119
|
|
|
100
|
-
# Return 0 for
|
|
120
|
+
# Return 0 for static images
|
|
101
121
|
if any(path_to_check.endswith(ext) for ext in ('.jpg', '.jpeg', '.png', '.webp', '.gif')):
|
|
102
122
|
return 0
|
|
103
123
|
|
|
104
124
|
try:
|
|
105
|
-
#
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
# Close the file so it can be read
|
|
111
|
-
tmp_path = tmp.name
|
|
125
|
+
# Create temporary file from content
|
|
126
|
+
with tempfile.NamedTemporaryFile(suffix=os.path.splitext(self.name)[1], delete=False) as tmp:
|
|
127
|
+
tmp.write(self.content)
|
|
128
|
+
tmp.flush()
|
|
129
|
+
tmp_path = tmp.name
|
|
112
130
|
|
|
113
131
|
try:
|
|
114
132
|
tag = TinyTag.get(tmp_path)
|
|
115
133
|
finally:
|
|
116
134
|
# Clean up the temporary file
|
|
117
135
|
os.unlink(tmp_path)
|
|
118
|
-
else:
|
|
119
|
-
# For local files, use path directly
|
|
120
|
-
tag = TinyTag.get(self.path)
|
|
121
136
|
|
|
122
137
|
if tag.duration is None:
|
|
123
138
|
raise ValueError("Could not read duration from file")
|
|
@@ -136,17 +151,14 @@ class MediaAsset(BaseAsset):
|
|
|
136
151
|
return None
|
|
137
152
|
|
|
138
153
|
try:
|
|
139
|
-
|
|
140
|
-
img = Image.open(BytesIO(self.path))
|
|
141
|
-
else:
|
|
142
|
-
img = Image.open(self.path)
|
|
154
|
+
img = Image.open(BytesIO(self.content))
|
|
143
155
|
return img.size
|
|
144
156
|
except Exception:
|
|
145
157
|
return None
|
|
146
158
|
|
|
147
159
|
def set_custom_name(self, name: str) -> 'MediaAsset':
|
|
148
160
|
"""Set a custom name for the media asset (only works with URLs)."""
|
|
149
|
-
if
|
|
161
|
+
if self._url is not None:
|
|
150
162
|
self.name = self.__check_name_ending(name)
|
|
151
163
|
else:
|
|
152
164
|
raise ValueError("Custom name can only be set for URLs.")
|
|
@@ -265,8 +277,10 @@ class MediaAsset(BaseAsset):
|
|
|
265
277
|
self._logger.error(error_msg)
|
|
266
278
|
raise ValueError(error_msg)
|
|
267
279
|
|
|
268
|
-
def to_file(self) -> StrictStr | tuple[StrictStr, StrictBytes] | StrictBytes:
|
|
269
|
-
|
|
280
|
+
def to_file(self) -> StrictStr | tuple[StrictStr, StrictBytes] | StrictBytes:
|
|
281
|
+
"""Convert the media asset to a file representation."""
|
|
282
|
+
if self._url is None:
|
|
283
|
+
self.path = cast(str, self.path)
|
|
270
284
|
return self.path
|
|
271
|
-
else:
|
|
272
|
-
return (self.name, self.
|
|
285
|
+
else:
|
|
286
|
+
return (self.name, self.content)
|
|
@@ -36,7 +36,7 @@ class RapidataDataset:
|
|
|
36
36
|
isinstance(asset, TextAsset) for asset in text_asset.assets
|
|
37
37
|
), "All assets in a MultiAsset must be of type TextAsset."
|
|
38
38
|
|
|
39
|
-
def upload_text_datapoint(text_asset: TextAsset | MultiAsset) -> None:
|
|
39
|
+
def upload_text_datapoint(text_asset: TextAsset | MultiAsset, index: int) -> None:
|
|
40
40
|
if isinstance(text_asset, TextAsset):
|
|
41
41
|
texts = [text_asset.text]
|
|
42
42
|
elif isinstance(text_asset, MultiAsset):
|
|
@@ -46,7 +46,8 @@ class RapidataDataset:
|
|
|
46
46
|
|
|
47
47
|
model = UploadTextSourcesToDatasetModel(
|
|
48
48
|
datasetId=self.dataset_id,
|
|
49
|
-
textSources=texts
|
|
49
|
+
textSources=texts,
|
|
50
|
+
sortIndex=index,
|
|
50
51
|
)
|
|
51
52
|
|
|
52
53
|
upload_response = self.openapi_service.dataset_api.dataset_creat_text_datapoint_post(model)
|
|
@@ -57,8 +58,8 @@ class RapidataDataset:
|
|
|
57
58
|
total_uploads = len(text_assets)
|
|
58
59
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
59
60
|
futures = [
|
|
60
|
-
executor.submit(upload_text_datapoint, text_asset)
|
|
61
|
-
for text_asset in text_assets
|
|
61
|
+
executor.submit(upload_text_datapoint, text_asset, index=i)
|
|
62
|
+
for i, text_asset in enumerate(text_assets)
|
|
62
63
|
]
|
|
63
64
|
|
|
64
65
|
with tqdm(total=total_uploads, desc="Uploading text datapoints") as pbar:
|
|
@@ -83,7 +84,7 @@ class RapidataDataset:
|
|
|
83
84
|
isinstance(asset, MediaAsset) for asset in media_path.assets
|
|
84
85
|
), "All assets in a MultiAsset must be of type MediaAsset."
|
|
85
86
|
|
|
86
|
-
def upload_datapoint(media_asset: MediaAsset | MultiAsset, meta: Metadata | None) -> None:
|
|
87
|
+
def upload_datapoint(media_asset: MediaAsset | MultiAsset, meta: Metadata | None, index: int) -> None:
|
|
87
88
|
if isinstance(media_asset, MediaAsset):
|
|
88
89
|
assets = [media_asset]
|
|
89
90
|
elif isinstance(media_asset, MultiAsset):
|
|
@@ -99,6 +100,7 @@ class RapidataDataset:
|
|
|
99
100
|
if meta_model
|
|
100
101
|
else []
|
|
101
102
|
),
|
|
103
|
+
sortIndex=index,
|
|
102
104
|
)
|
|
103
105
|
|
|
104
106
|
files: list[tuple[StrictStr, StrictBytes] | StrictStr | StrictBytes] = []
|
|
@@ -116,8 +118,8 @@ class RapidataDataset:
|
|
|
116
118
|
total_uploads = len(media_paths)
|
|
117
119
|
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
118
120
|
futures = [
|
|
119
|
-
executor.submit(upload_datapoint, media_asset, meta)
|
|
120
|
-
for media_asset, meta in zip_longest(media_paths, metadata or [])
|
|
121
|
+
executor.submit(upload_datapoint, media_asset, meta, index=i)
|
|
122
|
+
for i, (media_asset, meta) in enumerate(zip_longest(media_paths, metadata or []))
|
|
121
123
|
]
|
|
122
124
|
|
|
123
125
|
with tqdm(total=total_uploads, desc="Uploading datapoints") as pbar:
|
|
@@ -65,6 +65,15 @@ class RapidataOrder:
|
|
|
65
65
|
def get_status(self) -> str:
|
|
66
66
|
"""
|
|
67
67
|
Gets the status of the order.
|
|
68
|
+
|
|
69
|
+
Different states are:
|
|
70
|
+
Created: The order has been created but not started yet.\n
|
|
71
|
+
Submitted: The order has been submitted and is being reviewed.\n
|
|
72
|
+
ManualReview: The order is in manual review - something went wrong with the automatic approval.\n
|
|
73
|
+
Processing: The order is actively being processed.\n
|
|
74
|
+
Paused: The order has been paused.\n
|
|
75
|
+
Completed: The order has been completed.\n
|
|
76
|
+
Failed: The order has failed.
|
|
68
77
|
|
|
69
78
|
Returns:
|
|
70
79
|
The status of the order.
|
|
@@ -36,6 +36,8 @@ from rapidata.api_client.models.root_filter import RootFilter
|
|
|
36
36
|
from rapidata.api_client.models.filter import Filter
|
|
37
37
|
from rapidata.api_client.models.sort_criterion import SortCriterion
|
|
38
38
|
|
|
39
|
+
from tqdm import tqdm
|
|
40
|
+
|
|
39
41
|
|
|
40
42
|
class RapidataOrderManager:
|
|
41
43
|
"""
|
|
@@ -444,7 +446,7 @@ class RapidataOrderManager:
|
|
|
444
446
|
|
|
445
447
|
assets = [MediaAsset(path=path) for path in datapoints]
|
|
446
448
|
|
|
447
|
-
for asset in assets:
|
|
449
|
+
for asset in tqdm(assets, desc="Downloading assets and checking duration"):
|
|
448
450
|
if not asset.get_duration():
|
|
449
451
|
raise ValueError("The datapoints for this order must have a duration. (e.g. video or audio)")
|
|
450
452
|
|
|
@@ -15,6 +15,7 @@ from urllib3._collections import HTTPHeaderDict
|
|
|
15
15
|
from rapidata.rapidata_client.validation.rapids.box import Box
|
|
16
16
|
|
|
17
17
|
from rapidata.api_client.models.query_validation_set_model import QueryValidationSetModel
|
|
18
|
+
from tqdm import tqdm
|
|
18
19
|
|
|
19
20
|
|
|
20
21
|
class ValidationSetManager:
|
|
@@ -404,7 +405,7 @@ class ValidationSetManager:
|
|
|
404
405
|
openapi_service=self.__openapi_service
|
|
405
406
|
)
|
|
406
407
|
|
|
407
|
-
for rapid in rapids:
|
|
408
|
+
for rapid in tqdm(rapids, desc="Uploading validation tasks"):
|
|
408
409
|
validation_set.add_rapid(rapid)
|
|
409
410
|
|
|
410
411
|
return validation_set
|