rapidata 2.7.1__py3-none-any.whl → 2.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rapidata might be problematic. Click here for more details.

Files changed (34) hide show
  1. rapidata/api_client/__init__.py +1 -0
  2. rapidata/api_client/api/dataset_api.py +31 -615
  3. rapidata/api_client/api/rapid_api.py +252 -0
  4. rapidata/api_client/api/workflow_api.py +280 -1
  5. rapidata/api_client/models/__init__.py +1 -0
  6. rapidata/api_client/models/add_campaign_model.py +1 -1
  7. rapidata/api_client/models/add_validation_rapid_model.py +2 -2
  8. rapidata/api_client/models/add_validation_text_rapid_model.py +2 -2
  9. rapidata/api_client/models/clients_query_result.py +3 -3
  10. rapidata/api_client/models/compare_workflow_model1.py +2 -2
  11. rapidata/api_client/models/coordinate.py +2 -2
  12. rapidata/api_client/models/datapoint.py +9 -2
  13. rapidata/api_client/models/datapoint_metadata_model.py +11 -4
  14. rapidata/api_client/models/get_compare_ab_summary_result.py +87 -0
  15. rapidata/api_client/models/order_model.py +1 -1
  16. rapidata/api_client/models/preliminary_download_model.py +2 -2
  17. rapidata/api_client/models/query_validation_rapids_result.py +1 -1
  18. rapidata/api_client/models/read_bridge_token_keys_result.py +6 -6
  19. rapidata/api_client/models/report_model.py +1 -1
  20. rapidata/api_client/models/simple_workflow_model1.py +2 -2
  21. rapidata/api_client/models/update_campaign_model.py +2 -2
  22. rapidata/api_client/models/update_validation_rapid_model.py +2 -2
  23. rapidata/api_client/models/upload_files_from_s3_bucket_model.py +3 -3
  24. rapidata/api_client/models/upload_text_sources_to_dataset_model.py +11 -4
  25. rapidata/api_client_README.md +3 -2
  26. rapidata/rapidata_client/assets/_media_asset.py +42 -28
  27. rapidata/rapidata_client/order/_rapidata_dataset.py +9 -7
  28. rapidata/rapidata_client/order/rapidata_order.py +9 -0
  29. rapidata/rapidata_client/order/rapidata_order_manager.py +3 -1
  30. rapidata/rapidata_client/validation/validation_set_manager.py +2 -1
  31. {rapidata-2.7.1.dist-info → rapidata-2.8.0.dist-info}/METADATA +1 -1
  32. {rapidata-2.7.1.dist-info → rapidata-2.8.0.dist-info}/RECORD +34 -33
  33. {rapidata-2.7.1.dist-info → rapidata-2.8.0.dist-info}/LICENSE +0 -0
  34. {rapidata-2.7.1.dist-info → rapidata-2.8.0.dist-info}/WHEEL +0 -0
@@ -18,7 +18,7 @@ import re # noqa: F401
18
18
  import json
19
19
 
20
20
  from datetime import datetime
21
- from pydantic import BaseModel, ConfigDict, Field, StrictStr, field_validator
21
+ from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr, field_validator
22
22
  from typing import Any, ClassVar, Dict, List, Optional
23
23
  from rapidata.api_client.models.datapoint_asset import DatapointAsset
24
24
  from typing import Optional, Set
@@ -29,13 +29,14 @@ class Datapoint(BaseModel):
29
29
  Datapoint
30
30
  """ # noqa: E501
31
31
  t: StrictStr = Field(description="Discriminator value for Datapoint", alias="_t")
32
+ sort_index: Optional[StrictInt] = Field(default=None, alias="sortIndex")
32
33
  asset: DatapointAsset
33
34
  dataset_id: StrictStr = Field(alias="datasetId")
34
35
  id: Optional[StrictStr] = None
35
36
  deletion_date: Optional[datetime] = Field(default=None, alias="deletionDate")
36
37
  deleter_id: Optional[StrictStr] = Field(default=None, alias="deleterId")
37
38
  created_at: Optional[datetime] = Field(default=None, alias="createdAt")
38
- __properties: ClassVar[List[str]] = ["_t", "asset", "datasetId", "id", "deletionDate", "deleterId", "createdAt"]
39
+ __properties: ClassVar[List[str]] = ["_t", "sortIndex", "asset", "datasetId", "id", "deletionDate", "deleterId", "createdAt"]
39
40
 
40
41
  @field_validator('t')
41
42
  def t_validate_enum(cls, value):
@@ -86,6 +87,11 @@ class Datapoint(BaseModel):
86
87
  # override the default output from pydantic by calling `to_dict()` of asset
87
88
  if self.asset:
88
89
  _dict['asset'] = self.asset.to_dict()
90
+ # set to None if sort_index (nullable) is None
91
+ # and model_fields_set contains the field
92
+ if self.sort_index is None and "sort_index" in self.model_fields_set:
93
+ _dict['sortIndex'] = None
94
+
89
95
  # set to None if deletion_date (nullable) is None
90
96
  # and model_fields_set contains the field
91
97
  if self.deletion_date is None and "deletion_date" in self.model_fields_set:
@@ -109,6 +115,7 @@ class Datapoint(BaseModel):
109
115
 
110
116
  _obj = cls.model_validate({
111
117
  "_t": obj.get("_t") if obj.get("_t") is not None else 'Datapoint',
118
+ "sortIndex": obj.get("sortIndex"),
112
119
  "asset": DatapointAsset.from_dict(obj["asset"]) if obj.get("asset") is not None else None,
113
120
  "datasetId": obj.get("datasetId"),
114
121
  "id": obj.get("id"),
@@ -17,8 +17,8 @@ import pprint
17
17
  import re # noqa: F401
18
18
  import json
19
19
 
20
- from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
- from typing import Any, ClassVar, Dict, List
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
22
  from rapidata.api_client.models.datapoint_metadata_model_metadata_inner import DatapointMetadataModelMetadataInner
23
23
  from typing import Optional, Set
24
24
  from typing_extensions import Self
@@ -29,7 +29,8 @@ class DatapointMetadataModel(BaseModel):
29
29
  """ # noqa: E501
30
30
  dataset_id: StrictStr = Field(description="The id of the dataset to create the datapoint in.", alias="datasetId")
31
31
  metadata: List[DatapointMetadataModelMetadataInner] = Field(description="The metadata of the datapoint.")
32
- __properties: ClassVar[List[str]] = ["datasetId", "metadata"]
32
+ sort_index: Optional[StrictInt] = Field(default=None, description="The index will be used to keep the datapoints in order. Useful if upload is parallelized", alias="sortIndex")
33
+ __properties: ClassVar[List[str]] = ["datasetId", "metadata", "sortIndex"]
33
34
 
34
35
  model_config = ConfigDict(
35
36
  populate_by_name=True,
@@ -77,6 +78,11 @@ class DatapointMetadataModel(BaseModel):
77
78
  if _item_metadata:
78
79
  _items.append(_item_metadata.to_dict())
79
80
  _dict['metadata'] = _items
81
+ # set to None if sort_index (nullable) is None
82
+ # and model_fields_set contains the field
83
+ if self.sort_index is None and "sort_index" in self.model_fields_set:
84
+ _dict['sortIndex'] = None
85
+
80
86
  return _dict
81
87
 
82
88
  @classmethod
@@ -90,7 +96,8 @@ class DatapointMetadataModel(BaseModel):
90
96
 
91
97
  _obj = cls.model_validate({
92
98
  "datasetId": obj.get("datasetId"),
93
- "metadata": [DatapointMetadataModelMetadataInner.from_dict(_item) for _item in obj["metadata"]] if obj.get("metadata") is not None else None
99
+ "metadata": [DatapointMetadataModelMetadataInner.from_dict(_item) for _item in obj["metadata"]] if obj.get("metadata") is not None else None,
100
+ "sortIndex": obj.get("sortIndex")
94
101
  })
95
102
  return _obj
96
103
 
@@ -0,0 +1,87 @@
1
+ # coding: utf-8
2
+
3
+ """
4
+ Rapidata.Dataset
5
+
6
+ No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
7
+
8
+ The version of the OpenAPI document: v1
9
+ Generated by OpenAPI Generator (https://openapi-generator.tech)
10
+
11
+ Do not edit the class manually.
12
+ """ # noqa: E501
13
+
14
+
15
+ from __future__ import annotations
16
+ import pprint
17
+ import re # noqa: F401
18
+ import json
19
+
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictFloat, StrictInt
21
+ from typing import Any, ClassVar, Dict, List, Union
22
+ from typing import Optional, Set
23
+ from typing_extensions import Self
24
+
25
+ class GetCompareAbSummaryResult(BaseModel):
26
+ """
27
+ GetCompareAbSummaryResult
28
+ """ # noqa: E501
29
+ winner_counts: Dict[str, Union[StrictFloat, StrictInt]] = Field(alias="winnerCounts")
30
+ __properties: ClassVar[List[str]] = ["winnerCounts"]
31
+
32
+ model_config = ConfigDict(
33
+ populate_by_name=True,
34
+ validate_assignment=True,
35
+ protected_namespaces=(),
36
+ )
37
+
38
+
39
+ def to_str(self) -> str:
40
+ """Returns the string representation of the model using alias"""
41
+ return pprint.pformat(self.model_dump(by_alias=True))
42
+
43
+ def to_json(self) -> str:
44
+ """Returns the JSON representation of the model using alias"""
45
+ # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
46
+ return json.dumps(self.to_dict())
47
+
48
+ @classmethod
49
+ def from_json(cls, json_str: str) -> Optional[Self]:
50
+ """Create an instance of GetCompareAbSummaryResult from a JSON string"""
51
+ return cls.from_dict(json.loads(json_str))
52
+
53
+ def to_dict(self) -> Dict[str, Any]:
54
+ """Return the dictionary representation of the model using alias.
55
+
56
+ This has the following differences from calling pydantic's
57
+ `self.model_dump(by_alias=True)`:
58
+
59
+ * `None` is only added to the output dict for nullable fields that
60
+ were set at model initialization. Other fields with value `None`
61
+ are ignored.
62
+ """
63
+ excluded_fields: Set[str] = set([
64
+ ])
65
+
66
+ _dict = self.model_dump(
67
+ by_alias=True,
68
+ exclude=excluded_fields,
69
+ exclude_none=True,
70
+ )
71
+ return _dict
72
+
73
+ @classmethod
74
+ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
75
+ """Create an instance of GetCompareAbSummaryResult from a dict"""
76
+ if obj is None:
77
+ return None
78
+
79
+ if not isinstance(obj, dict):
80
+ return cls.model_validate(obj)
81
+
82
+ _obj = cls.model_validate({
83
+ "winnerCounts": obj.get("winnerCounts")
84
+ })
85
+ return _obj
86
+
87
+
@@ -29,7 +29,7 @@ class OrderModel(BaseModel):
29
29
  """ # noqa: E501
30
30
  id: StrictStr
31
31
  pipeline_id: StrictStr = Field(alias="pipelineId")
32
- order_date: Optional[datetime] = Field(alias="orderDate")
32
+ order_date: Optional[datetime] = Field(default=None, alias="orderDate")
33
33
  customer_mail: StrictStr = Field(alias="customerMail")
34
34
  state: StrictStr
35
35
  order_name: StrictStr = Field(alias="orderName")
@@ -24,9 +24,9 @@ from typing_extensions import Self
24
24
 
25
25
  class PreliminaryDownloadModel(BaseModel):
26
26
  """
27
- PreliminaryDownloadModel
27
+ The model for creating a preliminary download.
28
28
  """ # noqa: E501
29
- send_email: Optional[StrictBool] = Field(default=True, alias="sendEmail")
29
+ send_email: Optional[StrictBool] = Field(default=True, description="Whether to email the user when the download is ready.", alias="sendEmail")
30
30
  __properties: ClassVar[List[str]] = ["sendEmail"]
31
31
 
32
32
  model_config = ConfigDict(
@@ -32,7 +32,7 @@ class QueryValidationRapidsResult(BaseModel):
32
32
  """ # noqa: E501
33
33
  id: StrictStr
34
34
  type: StrictStr
35
- asset: Optional[QueryValidationRapidsResultAsset]
35
+ asset: Optional[QueryValidationRapidsResultAsset] = None
36
36
  truth: Optional[QueryValidationRapidsResultTruth] = None
37
37
  payload: QueryValidationRapidsResultPayload
38
38
  metadata: List[FileAssetModel1MetadataInner]
@@ -27,12 +27,12 @@ class ReadBridgeTokenKeysResult(BaseModel):
27
27
  ReadBridgeTokenKeysResult
28
28
  """ # noqa: E501
29
29
  t: StrictStr = Field(description="Discriminator value for ReadBridgeTokenKeysResult", alias="_t")
30
- access_token: Optional[StrictStr] = Field(alias="accessToken")
31
- expires_in: Optional[StrictInt] = Field(alias="expiresIn")
32
- refresh_token: Optional[StrictStr] = Field(alias="refreshToken")
33
- id_token: Optional[StrictStr] = Field(alias="idToken")
34
- token_type: Optional[StrictStr] = Field(alias="tokenType")
35
- scope: Optional[StrictStr]
30
+ access_token: Optional[StrictStr] = Field(default=None, alias="accessToken")
31
+ expires_in: Optional[StrictInt] = Field(default=None, alias="expiresIn")
32
+ refresh_token: Optional[StrictStr] = Field(default=None, alias="refreshToken")
33
+ id_token: Optional[StrictStr] = Field(default=None, alias="idToken")
34
+ token_type: Optional[StrictStr] = Field(default=None, alias="tokenType")
35
+ scope: Optional[StrictStr] = None
36
36
  __properties: ClassVar[List[str]] = ["_t", "accessToken", "expiresIn", "refreshToken", "idToken", "tokenType", "scope"]
37
37
 
38
38
  @field_validator('t')
@@ -28,7 +28,7 @@ class ReportModel(BaseModel):
28
28
  """ # noqa: E501
29
29
  rapid_id: StrictStr = Field(alias="rapidId")
30
30
  issue: StrictStr
31
- message: Optional[StrictStr]
31
+ message: Optional[StrictStr] = None
32
32
  __properties: ClassVar[List[str]] = ["rapidId", "issue", "message"]
33
33
 
34
34
  @field_validator('issue')
@@ -30,12 +30,12 @@ class SimpleWorkflowModel1(BaseModel):
30
30
  """ # noqa: E501
31
31
  t: StrictStr = Field(description="Discriminator value for SimpleWorkflowModel", alias="_t")
32
32
  id: StrictStr
33
- dataset_id: Optional[StrictStr] = Field(alias="datasetId")
33
+ dataset_id: Optional[StrictStr] = Field(default=None, alias="datasetId")
34
34
  state: StrictStr
35
35
  blueprint: ValidationImportPostRequestBlueprint
36
36
  referee: CompareWorkflowModel1Referee
37
37
  name: StrictStr
38
- owner_mail: Optional[StrictStr] = Field(alias="ownerMail")
38
+ owner_mail: Optional[StrictStr] = Field(default=None, alias="ownerMail")
39
39
  __properties: ClassVar[List[str]] = ["_t", "id", "datasetId", "state", "blueprint", "referee", "name", "ownerMail"]
40
40
 
41
41
  @field_validator('t')
@@ -26,8 +26,8 @@ class UpdateCampaignModel(BaseModel):
26
26
  """
27
27
  The model containing the new configuration for a campaign.
28
28
  """ # noqa: E501
29
- priority: Optional[StrictInt] = Field(description="A value above 0 indicating how much the campaign should be prioritized. The higher the value the more weight it will be given during campaign selection.")
30
- feature_flags: Optional[Dict[str, StrictStr]] = Field(description="The feature flags to assign this campaign.", alias="featureFlags")
29
+ priority: Optional[StrictInt] = Field(default=None, description="A value above 0 indicating how much the campaign should be prioritized. The higher the value the more weight it will be given during campaign selection.")
30
+ feature_flags: Optional[Dict[str, StrictStr]] = Field(default=None, description="The feature flags to assign this campaign.", alias="featureFlags")
31
31
  __properties: ClassVar[List[str]] = ["priority", "featureFlags"]
32
32
 
33
33
  model_config = ConfigDict(
@@ -28,8 +28,8 @@ class UpdateValidationRapidModel(BaseModel):
28
28
  The model for updating a validation rapid.
29
29
  """ # noqa: E501
30
30
  truth: UpdateValidationRapidModelTruth
31
- explanation: Optional[StrictStr]
32
- prompt: Optional[StrictStr]
31
+ explanation: Optional[StrictStr] = None
32
+ prompt: Optional[StrictStr] = None
33
33
  __properties: ClassVar[List[str]] = ["truth", "explanation", "prompt"]
34
34
 
35
35
  model_config = ConfigDict(
@@ -28,10 +28,10 @@ class UploadFilesFromS3BucketModel(BaseModel):
28
28
  """ # noqa: E501
29
29
  dataset_id: StrictStr = Field(description="The id of the dataset to upload the files to.", alias="datasetId")
30
30
  bucket_name: StrictStr = Field(description="The name of the S3 bucket to upload the files from.", alias="bucketName")
31
- region: Optional[StrictStr] = Field(description="The region of the S3 bucket.")
31
+ region: Optional[StrictStr] = Field(default=None, description="The region of the S3 bucket.")
32
32
  source_prefix: StrictStr = Field(description="The prefix of the files to upload.", alias="sourcePrefix")
33
- access_key: Optional[StrictStr] = Field(description="The access key to use for the S3 bucket.", alias="accessKey")
34
- secret_key: Optional[StrictStr] = Field(description="The secret key to use for the S3 bucket.", alias="secretKey")
33
+ access_key: Optional[StrictStr] = Field(default=None, description="The access key to use for the S3 bucket.", alias="accessKey")
34
+ secret_key: Optional[StrictStr] = Field(default=None, description="The secret key to use for the S3 bucket.", alias="secretKey")
35
35
  use_custom_aws_credentials: StrictBool = Field(description="Whether to use custom AWS credentials.", alias="useCustomAwsCredentials")
36
36
  clear_dataset: StrictBool = Field(description="Whether to clear the dataset before uploading the files.", alias="clearDataset")
37
37
  __properties: ClassVar[List[str]] = ["datasetId", "bucketName", "region", "sourcePrefix", "accessKey", "secretKey", "useCustomAwsCredentials", "clearDataset"]
@@ -17,8 +17,8 @@ import pprint
17
17
  import re # noqa: F401
18
18
  import json
19
19
 
20
- from pydantic import BaseModel, ConfigDict, Field, StrictStr
21
- from typing import Any, ClassVar, Dict, List
20
+ from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
21
+ from typing import Any, ClassVar, Dict, List, Optional
22
22
  from typing import Optional, Set
23
23
  from typing_extensions import Self
24
24
 
@@ -28,7 +28,8 @@ class UploadTextSourcesToDatasetModel(BaseModel):
28
28
  """ # noqa: E501
29
29
  dataset_id: StrictStr = Field(description="The id of the dataset to upload the text sources to.", alias="datasetId")
30
30
  text_sources: List[StrictStr] = Field(description="The text sources to upload.", alias="textSources")
31
- __properties: ClassVar[List[str]] = ["datasetId", "textSources"]
31
+ sort_index: Optional[StrictInt] = Field(default=None, description="The index will be used to keep the datapoints in order. Useful if upload is parallelized", alias="sortIndex")
32
+ __properties: ClassVar[List[str]] = ["datasetId", "textSources", "sortIndex"]
32
33
 
33
34
  model_config = ConfigDict(
34
35
  populate_by_name=True,
@@ -69,6 +70,11 @@ class UploadTextSourcesToDatasetModel(BaseModel):
69
70
  exclude=excluded_fields,
70
71
  exclude_none=True,
71
72
  )
73
+ # set to None if sort_index (nullable) is None
74
+ # and model_fields_set contains the field
75
+ if self.sort_index is None and "sort_index" in self.model_fields_set:
76
+ _dict['sortIndex'] = None
77
+
72
78
  return _dict
73
79
 
74
80
  @classmethod
@@ -82,7 +88,8 @@ class UploadTextSourcesToDatasetModel(BaseModel):
82
88
 
83
89
  _obj = cls.model_validate({
84
90
  "datasetId": obj.get("datasetId"),
85
- "textSources": obj.get("textSources")
91
+ "textSources": obj.get("textSources"),
92
+ "sortIndex": obj.get("sortIndex")
86
93
  })
87
94
  return _obj
88
95
 
@@ -90,9 +90,7 @@ Class | Method | HTTP request | Description
90
90
  *DatasetApi* | [**dataset_get_by_id_get**](rapidata/api_client/docs/DatasetApi.md#dataset_get_by_id_get) | **GET** /Dataset/GetById | Gets a dataset by its id.
91
91
  *DatasetApi* | [**dataset_import_post**](rapidata/api_client/docs/DatasetApi.md#dataset_import_post) | **POST** /Dataset/Import | Imports datapoints from a csv file.
92
92
  *DatasetApi* | [**dataset_update_name_post**](rapidata/api_client/docs/DatasetApi.md#dataset_update_name_post) | **POST** /Dataset/UpdateName | Updates the name of a dataset.
93
- *DatasetApi* | [**dataset_upload_datapoint_post**](rapidata/api_client/docs/DatasetApi.md#dataset_upload_datapoint_post) | **POST** /Dataset/UploadDatapoint | Creates a new multi asset datapoint.
94
93
  *DatasetApi* | [**dataset_upload_files_from_s3_post**](rapidata/api_client/docs/DatasetApi.md#dataset_upload_files_from_s3_post) | **POST** /Dataset/UploadFilesFromS3 | Uploads files from an S3 bucket to a dataset.
95
- *DatasetApi* | [**dataset_upload_images_to_dataset_post**](rapidata/api_client/docs/DatasetApi.md#dataset_upload_images_to_dataset_post) | **POST** /Dataset/UploadImagesToDataset | Uploads images to a dataset.
96
94
  *IdentityApi* | [**identity_create_bridge_token_post**](rapidata/api_client/docs/IdentityApi.md#identity_create_bridge_token_post) | **POST** /Identity/CreateBridgeToken | Creates a pair of read and write keys for a client. The write key is used to store the authentication result. The read key is used to retrieve the authentication result.
97
95
  *IdentityApi* | [**identity_read_bridge_token_get**](rapidata/api_client/docs/IdentityApi.md#identity_read_bridge_token_get) | **GET** /Identity/ReadBridgeToken | Tries to read the bridge token keys for a given read key. The read key is used to retrieve the authentication result written by the write key.
98
96
  *IdentityApi* | [**identity_register_temporary_post**](rapidata/api_client/docs/IdentityApi.md#identity_register_temporary_post) | **POST** /Identity/RegisterTemporary | Registers and logs in a temporary customer.
@@ -129,6 +127,7 @@ Class | Method | HTTP request | Description
129
127
  *RapidApi* | [**rapid_add_user_guess_post**](rapidata/api_client/docs/RapidApi.md#rapid_add_user_guess_post) | **POST** /Rapid/AddUserGuess | Submits a user guess for a Rapid.
130
128
  *RapidApi* | [**rapid_create_demographic_rapid_post**](rapidata/api_client/docs/RapidApi.md#rapid_create_demographic_rapid_post) | **POST** /Rapid/CreateDemographicRapid | Creates a new Demographic Rapid.
131
129
  *RapidApi* | [**rapid_query_validation_rapids_get**](rapidata/api_client/docs/RapidApi.md#rapid_query_validation_rapids_get) | **GET** /Rapid/QueryValidationRapids | Queries the validation rapids for a specific validation set.
130
+ *RapidApi* | [**rapid_rapid_id_delete**](rapidata/api_client/docs/RapidApi.md#rapid_rapid_id_delete) | **DELETE** /rapid/{rapidId} | Deletes a rapid.
132
131
  *RapidApi* | [**rapid_report_post**](rapidata/api_client/docs/RapidApi.md#rapid_report_post) | **POST** /Rapid/Report | Used to report an issue with a rapid.
133
132
  *RapidApi* | [**rapid_skip_user_guess_post**](rapidata/api_client/docs/RapidApi.md#rapid_skip_user_guess_post) | **POST** /Rapid/SkipUserGuess | Skips a Rapid for the user.
134
133
  *RapidApi* | [**rapid_validate_current_rapid_bag_get**](rapidata/api_client/docs/RapidApi.md#rapid_validate_current_rapid_bag_get) | **GET** /Rapid/ValidateCurrentRapidBag | Validates that the rapids associated with the current user are active.
@@ -147,6 +146,7 @@ Class | Method | HTTP request | Description
147
146
  *WorkflowApi* | [**workflow_delete_delete**](rapidata/api_client/docs/WorkflowApi.md#workflow_delete_delete) | **DELETE** /Workflow/Delete | Deletes a workflow.
148
147
  *WorkflowApi* | [**workflow_get_by_id_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_get_by_id_get) | **GET** /Workflow/GetById | Get a workflow by its ID.
149
148
  *WorkflowApi* | [**workflow_get_progress_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_get_progress_get) | **GET** /Workflow/GetProgress | Get the progress of a workflow.
149
+ *WorkflowApi* | [**workflow_id_compare_ab_summary_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_id_compare_ab_summary_get) | **GET** /workflow/{id}/compare-ab-summary | Calculates a summary of the results for a simple compare workflow. The summary includes the number of times an asset at each index was the winner.
150
150
  *WorkflowApi* | [**workflow_query_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_query_get) | **GET** /Workflow/Query | Queries workflows based on the provided filter, page, and sort criteria.
151
151
 
152
152
 
@@ -262,6 +262,7 @@ Class | Method | HTTP request | Description
262
262
  - [Gender](rapidata/api_client/docs/Gender.md)
263
263
  - [GenderUserFilterModel](rapidata/api_client/docs/GenderUserFilterModel.md)
264
264
  - [GetAvailableValidationSetsResult](rapidata/api_client/docs/GetAvailableValidationSetsResult.md)
265
+ - [GetCompareAbSummaryResult](rapidata/api_client/docs/GetCompareAbSummaryResult.md)
265
266
  - [GetCompareWorkflowResultsModel](rapidata/api_client/docs/GetCompareWorkflowResultsModel.md)
266
267
  - [GetCompareWorkflowResultsResult](rapidata/api_client/docs/GetCompareWorkflowResultsResult.md)
267
268
  - [GetCompareWorkflowResultsResultAsset](rapidata/api_client/docs/GetCompareWorkflowResultsResultAsset.md)
@@ -1,6 +1,7 @@
1
- """Media Asset Module
1
+ """Media Asset Module with Lazy Loading
2
2
 
3
3
  Defines the MediaAsset class for handling media file paths within assets.
4
+ Implements lazy loading for URL-based media to prevent unnecessary downloads.
4
5
  """
5
6
 
6
7
  import os
@@ -12,18 +13,20 @@ from PIL import Image
12
13
  from tinytag import TinyTag
13
14
  import tempfile
14
15
  from pydantic import StrictStr, StrictBytes
15
- from typing import Optional
16
+ from typing import Optional, cast
16
17
  import logging
18
+ from functools import cached_property
17
19
 
18
20
 
19
21
  class MediaAsset(BaseAsset):
20
- """MediaAsset Class
22
+ """MediaAsset Class with Lazy Loading
21
23
 
22
- Represents a media asset by storing the file path.
24
+ Represents a media asset by storing the file path or URL.
25
+ Only downloads URL content when needed.
23
26
  Supports local files and URLs for images, MP3, and MP4.
24
27
 
25
28
  Args:
26
- path (str): The file system path to the media asset.
29
+ path (str): The file system path to the media asset or URL.
27
30
 
28
31
  Raises:
29
32
  FileNotFoundError: If the provided file path does not exist.
@@ -67,23 +70,40 @@ class MediaAsset(BaseAsset):
67
70
 
68
71
  Raises:
69
72
  FileNotFoundError: If the provided file path does not exist.
70
- ValueError: If media type is unsupported or duration exceeds 25 seconds.
73
+ ValueError: If path is not a string.
71
74
  """
72
75
  if not isinstance(path, str):
73
76
  raise ValueError("Media must be a string, either a local file path or a URL")
74
77
 
78
+ self._url = None
79
+ self._content = None
80
+
75
81
  if re.match(r'^https?://', path):
76
- self.path = self.__get_media_bytes(path)
82
+ self._url = path
77
83
  self.name = path.split('/')[-1]
78
84
  self.name = self.__check_name_ending(self.name)
85
+ self.path = None # Will be set when content is downloaded
79
86
  return
80
87
 
81
88
  if not os.path.exists(path):
82
89
  raise FileNotFoundError(f"File not found: {path}")
83
90
 
84
- self.path: str | bytes = path
91
+ self.path = path
85
92
  self.name = path
86
93
 
94
+ @cached_property
95
+ def content(self) -> bytes:
96
+ """
97
+ Lazy loader for URL content. Only downloads when first accessed.
98
+ Uses cached_property to store the result after first download.
99
+ """
100
+ if self._url is None:
101
+ self.path = cast(str, self.path)
102
+ with open(self.path, 'rb') as f:
103
+ return f.read()
104
+
105
+ return self.__get_media_bytes(self._url)
106
+
87
107
  def get_duration(self) -> int:
88
108
  """
89
109
  Get the duration of audio/video files in milliseconds.
@@ -97,27 +117,22 @@ class MediaAsset(BaseAsset):
97
117
  """
98
118
  path_to_check = self.name.lower()
99
119
 
100
- # Return 0 for other static images
120
+ # Return 0 for static images
101
121
  if any(path_to_check.endswith(ext) for ext in ('.jpg', '.jpeg', '.png', '.webp', '.gif')):
102
122
  return 0
103
123
 
104
124
  try:
105
- # For URL downloads (bytes), write to temporary file first
106
- if isinstance(self.path, bytes):
107
- with tempfile.NamedTemporaryFile(suffix=os.path.splitext(self.name)[1], delete=False) as tmp:
108
- tmp.write(self.path)
109
- tmp.flush()
110
- # Close the file so it can be read
111
- tmp_path = tmp.name
125
+ # Create temporary file from content
126
+ with tempfile.NamedTemporaryFile(suffix=os.path.splitext(self.name)[1], delete=False) as tmp:
127
+ tmp.write(self.content)
128
+ tmp.flush()
129
+ tmp_path = tmp.name
112
130
 
113
131
  try:
114
132
  tag = TinyTag.get(tmp_path)
115
133
  finally:
116
134
  # Clean up the temporary file
117
135
  os.unlink(tmp_path)
118
- else:
119
- # For local files, use path directly
120
- tag = TinyTag.get(self.path)
121
136
 
122
137
  if tag.duration is None:
123
138
  raise ValueError("Could not read duration from file")
@@ -136,17 +151,14 @@ class MediaAsset(BaseAsset):
136
151
  return None
137
152
 
138
153
  try:
139
- if isinstance(self.path, bytes):
140
- img = Image.open(BytesIO(self.path))
141
- else:
142
- img = Image.open(self.path)
154
+ img = Image.open(BytesIO(self.content))
143
155
  return img.size
144
156
  except Exception:
145
157
  return None
146
158
 
147
159
  def set_custom_name(self, name: str) -> 'MediaAsset':
148
160
  """Set a custom name for the media asset (only works with URLs)."""
149
- if isinstance(self.path, bytes):
161
+ if self._url is not None:
150
162
  self.name = self.__check_name_ending(name)
151
163
  else:
152
164
  raise ValueError("Custom name can only be set for URLs.")
@@ -265,8 +277,10 @@ class MediaAsset(BaseAsset):
265
277
  self._logger.error(error_msg)
266
278
  raise ValueError(error_msg)
267
279
 
268
- def to_file(self) -> StrictStr | tuple[StrictStr, StrictBytes] | StrictBytes: # types for autogenerated models
269
- if isinstance(self.path, str):
280
+ def to_file(self) -> StrictStr | tuple[StrictStr, StrictBytes] | StrictBytes:
281
+ """Convert the media asset to a file representation."""
282
+ if self._url is None:
283
+ self.path = cast(str, self.path)
270
284
  return self.path
271
- else: # isinstance(self.path, bytes)
272
- return (self.name, self.path)
285
+ else:
286
+ return (self.name, self.content)
@@ -36,7 +36,7 @@ class RapidataDataset:
36
36
  isinstance(asset, TextAsset) for asset in text_asset.assets
37
37
  ), "All assets in a MultiAsset must be of type TextAsset."
38
38
 
39
- def upload_text_datapoint(text_asset: TextAsset | MultiAsset) -> None:
39
+ def upload_text_datapoint(text_asset: TextAsset | MultiAsset, index: int) -> None:
40
40
  if isinstance(text_asset, TextAsset):
41
41
  texts = [text_asset.text]
42
42
  elif isinstance(text_asset, MultiAsset):
@@ -46,7 +46,8 @@ class RapidataDataset:
46
46
 
47
47
  model = UploadTextSourcesToDatasetModel(
48
48
  datasetId=self.dataset_id,
49
- textSources=texts
49
+ textSources=texts,
50
+ sortIndex=index,
50
51
  )
51
52
 
52
53
  upload_response = self.openapi_service.dataset_api.dataset_creat_text_datapoint_post(model)
@@ -57,8 +58,8 @@ class RapidataDataset:
57
58
  total_uploads = len(text_assets)
58
59
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
59
60
  futures = [
60
- executor.submit(upload_text_datapoint, text_asset)
61
- for text_asset in text_assets
61
+ executor.submit(upload_text_datapoint, text_asset, index=i)
62
+ for i, text_asset in enumerate(text_assets)
62
63
  ]
63
64
 
64
65
  with tqdm(total=total_uploads, desc="Uploading text datapoints") as pbar:
@@ -83,7 +84,7 @@ class RapidataDataset:
83
84
  isinstance(asset, MediaAsset) for asset in media_path.assets
84
85
  ), "All assets in a MultiAsset must be of type MediaAsset."
85
86
 
86
- def upload_datapoint(media_asset: MediaAsset | MultiAsset, meta: Metadata | None) -> None:
87
+ def upload_datapoint(media_asset: MediaAsset | MultiAsset, meta: Metadata | None, index: int) -> None:
87
88
  if isinstance(media_asset, MediaAsset):
88
89
  assets = [media_asset]
89
90
  elif isinstance(media_asset, MultiAsset):
@@ -99,6 +100,7 @@ class RapidataDataset:
99
100
  if meta_model
100
101
  else []
101
102
  ),
103
+ sortIndex=index,
102
104
  )
103
105
 
104
106
  files: list[tuple[StrictStr, StrictBytes] | StrictStr | StrictBytes] = []
@@ -116,8 +118,8 @@ class RapidataDataset:
116
118
  total_uploads = len(media_paths)
117
119
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
118
120
  futures = [
119
- executor.submit(upload_datapoint, media_asset, meta)
120
- for media_asset, meta in zip_longest(media_paths, metadata or [])
121
+ executor.submit(upload_datapoint, media_asset, meta, index=i)
122
+ for i, (media_asset, meta) in enumerate(zip_longest(media_paths, metadata or []))
121
123
  ]
122
124
 
123
125
  with tqdm(total=total_uploads, desc="Uploading datapoints") as pbar:
@@ -65,6 +65,15 @@ class RapidataOrder:
65
65
  def get_status(self) -> str:
66
66
  """
67
67
  Gets the status of the order.
68
+
69
+ Different states are:
70
+ Created: The order has been created but not started yet.\n
71
+ Submitted: The order has been submitted and is being reviewed.\n
72
+ ManualReview: The order is in manual review - something went wrong with the automatic approval.\n
73
+ Processing: The order is actively being processed.\n
74
+ Paused: The order has been paused.\n
75
+ Completed: The order has been completed.\n
76
+ Failed: The order has failed.
68
77
 
69
78
  Returns:
70
79
  The status of the order.
@@ -36,6 +36,8 @@ from rapidata.api_client.models.root_filter import RootFilter
36
36
  from rapidata.api_client.models.filter import Filter
37
37
  from rapidata.api_client.models.sort_criterion import SortCriterion
38
38
 
39
+ from tqdm import tqdm
40
+
39
41
 
40
42
  class RapidataOrderManager:
41
43
  """
@@ -444,7 +446,7 @@ class RapidataOrderManager:
444
446
 
445
447
  assets = [MediaAsset(path=path) for path in datapoints]
446
448
 
447
- for asset in assets:
449
+ for asset in tqdm(assets, desc="Downloading assets and checking duration"):
448
450
  if not asset.get_duration():
449
451
  raise ValueError("The datapoints for this order must have a duration. (e.g. video or audio)")
450
452
 
@@ -15,6 +15,7 @@ from urllib3._collections import HTTPHeaderDict
15
15
  from rapidata.rapidata_client.validation.rapids.box import Box
16
16
 
17
17
  from rapidata.api_client.models.query_validation_set_model import QueryValidationSetModel
18
+ from tqdm import tqdm
18
19
 
19
20
 
20
21
  class ValidationSetManager:
@@ -404,7 +405,7 @@ class ValidationSetManager:
404
405
  openapi_service=self.__openapi_service
405
406
  )
406
407
 
407
- for rapid in rapids:
408
+ for rapid in tqdm(rapids, desc="Uploading validation tasks"):
408
409
  validation_set.add_rapid(rapid)
409
410
 
410
411
  return validation_set
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: rapidata
3
- Version: 2.7.1
3
+ Version: 2.8.0
4
4
  Summary: Rapidata package containing the Rapidata Python Client to interact with the Rapidata Web API in an easy way.
5
5
  License: Apache-2.0
6
6
  Author: Rapidata AG