PyPI - rapidata - Versions diffs - 2.7.2__py3-none-any.whl → 2.9.0__py3-none-any.whl - Mend

rapidata 2.7.2py3-none-any.whl → 2.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rapidata might be problematic. Click here for more details.

Files changed (33) hide show

rapidata/api_client/models/datapoint.py CHANGED Viewed

@@ -18,7 +18,7 @@ import re  # noqa: F401
 import json
 from datetime import datetime
-from pydantic import BaseModel, ConfigDict, Field, StrictStr, field_validator
+from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr, field_validator
 from typing import Any, ClassVar, Dict, List, Optional
 from rapidata.api_client.models.datapoint_asset import DatapointAsset
 from typing import Optional, Set
@@ -29,13 +29,14 @@ class Datapoint(BaseModel):
     Datapoint
     """ # noqa: E501
     t: StrictStr = Field(description="Discriminator value for Datapoint", alias="_t")
+    sort_index: Optional[StrictInt] = Field(default=None, alias="sortIndex")
     asset: DatapointAsset
     dataset_id: StrictStr = Field(alias="datasetId")
     id: Optional[StrictStr] = None
     deletion_date: Optional[datetime] = Field(default=None, alias="deletionDate")
     deleter_id: Optional[StrictStr] = Field(default=None, alias="deleterId")
     created_at: Optional[datetime] = Field(default=None, alias="createdAt")
-    __properties: ClassVar[List[str]] = ["_t", "asset", "datasetId", "id", "deletionDate", "deleterId", "createdAt"]
+    __properties: ClassVar[List[str]] = ["_t", "sortIndex", "asset", "datasetId", "id", "deletionDate", "deleterId", "createdAt"]
     @field_validator('t')
     def t_validate_enum(cls, value):
@@ -86,6 +87,11 @@ class Datapoint(BaseModel):
         # override the default output from pydantic by calling `to_dict()` of asset
         if self.asset:
             _dict['asset'] = self.asset.to_dict()
+        # set to None if sort_index (nullable) is None
+        # and model_fields_set contains the field
+        if self.sort_index is None and "sort_index" in self.model_fields_set:
+            _dict['sortIndex'] = None
         # set to None if deletion_date (nullable) is None
         # and model_fields_set contains the field
         if self.deletion_date is None and "deletion_date" in self.model_fields_set:
@@ -109,6 +115,7 @@ class Datapoint(BaseModel):
         _obj = cls.model_validate({
             "_t": obj.get("_t") if obj.get("_t") is not None else 'Datapoint',
+            "sortIndex": obj.get("sortIndex"),
             "asset": DatapointAsset.from_dict(obj["asset"]) if obj.get("asset") is not None else None,
             "datasetId": obj.get("datasetId"),
             "id": obj.get("id"),

rapidata/api_client/models/datapoint_metadata_model.py CHANGED Viewed

@@ -17,8 +17,8 @@ import pprint
 import re  # noqa: F401
 import json
-from pydantic import BaseModel, ConfigDict, Field, StrictStr
-from typing import Any, ClassVar, Dict, List
+from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional
 from rapidata.api_client.models.datapoint_metadata_model_metadata_inner import DatapointMetadataModelMetadataInner
 from typing import Optional, Set
 from typing_extensions import Self
@@ -29,7 +29,8 @@ class DatapointMetadataModel(BaseModel):
     """ # noqa: E501
     dataset_id: StrictStr = Field(description="The id of the dataset to create the datapoint in.", alias="datasetId")
     metadata: List[DatapointMetadataModelMetadataInner] = Field(description="The metadata of the datapoint.")
-    __properties: ClassVar[List[str]] = ["datasetId", "metadata"]
+    sort_index: Optional[StrictInt] = Field(default=None, description="The index will be used to keep the datapoints in order. Useful if upload is parallelized", alias="sortIndex")
+    __properties: ClassVar[List[str]] = ["datasetId", "metadata", "sortIndex"]
     model_config = ConfigDict(
         populate_by_name=True,
@@ -77,6 +78,11 @@ class DatapointMetadataModel(BaseModel):
                 if _item_metadata:
                     _items.append(_item_metadata.to_dict())
             _dict['metadata'] = _items
+        # set to None if sort_index (nullable) is None
+        # and model_fields_set contains the field
+        if self.sort_index is None and "sort_index" in self.model_fields_set:
+            _dict['sortIndex'] = None
         return _dict
     @classmethod
@@ -90,7 +96,8 @@ class DatapointMetadataModel(BaseModel):
         _obj = cls.model_validate({
             "datasetId": obj.get("datasetId"),
-            "metadata": [DatapointMetadataModelMetadataInner.from_dict(_item) for _item in obj["metadata"]] if obj.get("metadata") is not None else None
+            "metadata": [DatapointMetadataModelMetadataInner.from_dict(_item) for _item in obj["metadata"]] if obj.get("metadata") is not None else None,
+            "sortIndex": obj.get("sortIndex")
         })
         return _obj

rapidata/api_client/models/get_compare_ab_summary_result.py ADDED Viewed

@@ -0,0 +1,87 @@
+# coding: utf-8
+"""
+    Rapidata.Dataset
+    No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
+    The version of the OpenAPI document: v1
+    Generated by OpenAPI Generator (https://openapi-generator.tech)
+    Do not edit the class manually.
+"""  # noqa: E501
+from __future__ import annotations
+import pprint
+import re  # noqa: F401
+import json
+from pydantic import BaseModel, ConfigDict, Field, StrictFloat, StrictInt
+from typing import Any, ClassVar, Dict, List, Union
+from typing import Optional, Set
+from typing_extensions import Self
+class GetCompareAbSummaryResult(BaseModel):
+    """
+    GetCompareAbSummaryResult
+    """ # noqa: E501
+    winner_counts: Dict[str, Union[StrictFloat, StrictInt]] = Field(alias="winnerCounts")
+    __properties: ClassVar[List[str]] = ["winnerCounts"]
+    model_config = ConfigDict(
+        populate_by_name=True,
+        validate_assignment=True,
+        protected_namespaces=(),
+    )
+    def to_str(self) -> str:
+        """Returns the string representation of the model using alias"""
+        return pprint.pformat(self.model_dump(by_alias=True))
+    def to_json(self) -> str:
+        """Returns the JSON representation of the model using alias"""
+        # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
+        return json.dumps(self.to_dict())
+    @classmethod
+    def from_json(cls, json_str: str) -> Optional[Self]:
+        """Create an instance of GetCompareAbSummaryResult from a JSON string"""
+        return cls.from_dict(json.loads(json_str))
+    def to_dict(self) -> Dict[str, Any]:
+        """Return the dictionary representation of the model using alias.
+        This has the following differences from calling pydantic's
+        `self.model_dump(by_alias=True)`:
+        * `None` is only added to the output dict for nullable fields that
+          were set at model initialization. Other fields with value `None`
+          are ignored.
+        """
+        excluded_fields: Set[str] = set([
+        ])
+        _dict = self.model_dump(
+            by_alias=True,
+            exclude=excluded_fields,
+            exclude_none=True,
+        )
+        return _dict
+    @classmethod
+    def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
+        """Create an instance of GetCompareAbSummaryResult from a dict"""
+        if obj is None:
+            return None
+        if not isinstance(obj, dict):
+            return cls.model_validate(obj)
+        _obj = cls.model_validate({
+            "winnerCounts": obj.get("winnerCounts")
+        })
+        return _obj

rapidata/api_client/models/order_model.py CHANGED Viewed

@@ -29,7 +29,7 @@ class OrderModel(BaseModel):
     """ # noqa: E501
     id: StrictStr
     pipeline_id: StrictStr = Field(alias="pipelineId")
-    order_date: Optional[datetime] = Field(alias="orderDate")
+    order_date: Optional[datetime] = Field(default=None, alias="orderDate")
     customer_mail: StrictStr = Field(alias="customerMail")
     state: StrictStr
     order_name: StrictStr = Field(alias="orderName")

rapidata/api_client/models/preliminary_download_model.py CHANGED Viewed

@@ -24,9 +24,9 @@ from typing_extensions import Self
 class PreliminaryDownloadModel(BaseModel):
     """
-    PreliminaryDownloadModel
+    The model for creating a preliminary download.
     """ # noqa: E501
-    send_email: Optional[StrictBool] = Field(default=True, alias="sendEmail")
+    send_email: Optional[StrictBool] = Field(default=True, description="Whether to email the user when the download is ready.", alias="sendEmail")
     __properties: ClassVar[List[str]] = ["sendEmail"]
     model_config = ConfigDict(

rapidata/api_client/models/query_validation_rapids_result.py CHANGED Viewed

@@ -32,7 +32,7 @@ class QueryValidationRapidsResult(BaseModel):
     """ # noqa: E501
     id: StrictStr
     type: StrictStr
-    asset: Optional[QueryValidationRapidsResultAsset]
+    asset: Optional[QueryValidationRapidsResultAsset] = None
     truth: Optional[QueryValidationRapidsResultTruth] = None
     payload: QueryValidationRapidsResultPayload
     metadata: List[FileAssetModel1MetadataInner]

rapidata/api_client/models/read_bridge_token_keys_result.py CHANGED Viewed

@@ -27,12 +27,12 @@ class ReadBridgeTokenKeysResult(BaseModel):
     ReadBridgeTokenKeysResult
     """ # noqa: E501
     t: StrictStr = Field(description="Discriminator value for ReadBridgeTokenKeysResult", alias="_t")
-    access_token: Optional[StrictStr] = Field(alias="accessToken")
-    expires_in: Optional[StrictInt] = Field(alias="expiresIn")
-    refresh_token: Optional[StrictStr] = Field(alias="refreshToken")
-    id_token: Optional[StrictStr] = Field(alias="idToken")
-    token_type: Optional[StrictStr] = Field(alias="tokenType")
-    scope: Optional[StrictStr]
+    access_token: Optional[StrictStr] = Field(default=None, alias="accessToken")
+    expires_in: Optional[StrictInt] = Field(default=None, alias="expiresIn")
+    refresh_token: Optional[StrictStr] = Field(default=None, alias="refreshToken")
+    id_token: Optional[StrictStr] = Field(default=None, alias="idToken")
+    token_type: Optional[StrictStr] = Field(default=None, alias="tokenType")
+    scope: Optional[StrictStr] = None
     __properties: ClassVar[List[str]] = ["_t", "accessToken", "expiresIn", "refreshToken", "idToken", "tokenType", "scope"]
     @field_validator('t')

rapidata/api_client/models/report_model.py CHANGED Viewed

@@ -28,7 +28,7 @@ class ReportModel(BaseModel):
     """ # noqa: E501
     rapid_id: StrictStr = Field(alias="rapidId")
     issue: StrictStr
-    message: Optional[StrictStr]
+    message: Optional[StrictStr] = None
     __properties: ClassVar[List[str]] = ["rapidId", "issue", "message"]
     @field_validator('issue')

rapidata/api_client/models/simple_workflow_model1.py CHANGED Viewed

@@ -30,12 +30,12 @@ class SimpleWorkflowModel1(BaseModel):
     """ # noqa: E501
     t: StrictStr = Field(description="Discriminator value for SimpleWorkflowModel", alias="_t")
     id: StrictStr
-    dataset_id: Optional[StrictStr] = Field(alias="datasetId")
+    dataset_id: Optional[StrictStr] = Field(default=None, alias="datasetId")
     state: StrictStr
     blueprint: ValidationImportPostRequestBlueprint
     referee: CompareWorkflowModel1Referee
     name: StrictStr
-    owner_mail: Optional[StrictStr] = Field(alias="ownerMail")
+    owner_mail: Optional[StrictStr] = Field(default=None, alias="ownerMail")
     __properties: ClassVar[List[str]] = ["_t", "id", "datasetId", "state", "blueprint", "referee", "name", "ownerMail"]
     @field_validator('t')

rapidata/api_client/models/update_campaign_model.py CHANGED Viewed

@@ -26,8 +26,8 @@ class UpdateCampaignModel(BaseModel):
     """
     The model containing the new configuration for a campaign.
     """ # noqa: E501
-    priority: Optional[StrictInt] = Field(description="A value above 0 indicating how much the campaign should be prioritized.  The higher the value the more weight it will be given during campaign selection.")
-    feature_flags: Optional[Dict[str, StrictStr]] = Field(description="The feature flags to assign this campaign.", alias="featureFlags")
+    priority: Optional[StrictInt] = Field(default=None, description="A value above 0 indicating how much the campaign should be prioritized.  The higher the value the more weight it will be given during campaign selection.")
+    feature_flags: Optional[Dict[str, StrictStr]] = Field(default=None, description="The feature flags to assign this campaign.", alias="featureFlags")
     __properties: ClassVar[List[str]] = ["priority", "featureFlags"]
     model_config = ConfigDict(

rapidata/api_client/models/update_validation_rapid_model.py CHANGED Viewed

@@ -28,8 +28,8 @@ class UpdateValidationRapidModel(BaseModel):
     The model for updating a validation rapid.
     """ # noqa: E501
     truth: UpdateValidationRapidModelTruth
-    explanation: Optional[StrictStr]
-    prompt: Optional[StrictStr]
+    explanation: Optional[StrictStr] = None
+    prompt: Optional[StrictStr] = None
     __properties: ClassVar[List[str]] = ["truth", "explanation", "prompt"]
     model_config = ConfigDict(

rapidata/api_client/models/upload_files_from_s3_bucket_model.py CHANGED Viewed

@@ -28,10 +28,10 @@ class UploadFilesFromS3BucketModel(BaseModel):
     """ # noqa: E501
     dataset_id: StrictStr = Field(description="The id of the dataset to upload the files to.", alias="datasetId")
     bucket_name: StrictStr = Field(description="The name of the S3 bucket to upload the files from.", alias="bucketName")
-    region: Optional[StrictStr] = Field(description="The region of the S3 bucket.")
+    region: Optional[StrictStr] = Field(default=None, description="The region of the S3 bucket.")
     source_prefix: StrictStr = Field(description="The prefix of the files to upload.", alias="sourcePrefix")
-    access_key: Optional[StrictStr] = Field(description="The access key to use for the S3 bucket.", alias="accessKey")
-    secret_key: Optional[StrictStr] = Field(description="The secret key to use for the S3 bucket.", alias="secretKey")
+    access_key: Optional[StrictStr] = Field(default=None, description="The access key to use for the S3 bucket.", alias="accessKey")
+    secret_key: Optional[StrictStr] = Field(default=None, description="The secret key to use for the S3 bucket.", alias="secretKey")
     use_custom_aws_credentials: StrictBool = Field(description="Whether to use custom AWS credentials.", alias="useCustomAwsCredentials")
     clear_dataset: StrictBool = Field(description="Whether to clear the dataset before uploading the files.", alias="clearDataset")
     __properties: ClassVar[List[str]] = ["datasetId", "bucketName", "region", "sourcePrefix", "accessKey", "secretKey", "useCustomAwsCredentials", "clearDataset"]

rapidata/api_client/models/upload_text_sources_to_dataset_model.py CHANGED Viewed

@@ -17,8 +17,8 @@ import pprint
 import re  # noqa: F401
 import json
-from pydantic import BaseModel, ConfigDict, Field, StrictStr
-from typing import Any, ClassVar, Dict, List
+from pydantic import BaseModel, ConfigDict, Field, StrictInt, StrictStr
+from typing import Any, ClassVar, Dict, List, Optional
 from typing import Optional, Set
 from typing_extensions import Self
@@ -28,7 +28,8 @@ class UploadTextSourcesToDatasetModel(BaseModel):
     """ # noqa: E501
     dataset_id: StrictStr = Field(description="The id of the dataset to upload the text sources to.", alias="datasetId")
     text_sources: List[StrictStr] = Field(description="The text sources to upload.", alias="textSources")
-    __properties: ClassVar[List[str]] = ["datasetId", "textSources"]
+    sort_index: Optional[StrictInt] = Field(default=None, description="The index will be used to keep the datapoints in order. Useful if upload is parallelized", alias="sortIndex")
+    __properties: ClassVar[List[str]] = ["datasetId", "textSources", "sortIndex"]
     model_config = ConfigDict(
         populate_by_name=True,
@@ -69,6 +70,11 @@ class UploadTextSourcesToDatasetModel(BaseModel):
             exclude=excluded_fields,
             exclude_none=True,
         )
+        # set to None if sort_index (nullable) is None
+        # and model_fields_set contains the field
+        if self.sort_index is None and "sort_index" in self.model_fields_set:
+            _dict['sortIndex'] = None
         return _dict
     @classmethod
@@ -82,7 +88,8 @@ class UploadTextSourcesToDatasetModel(BaseModel):
         _obj = cls.model_validate({
             "datasetId": obj.get("datasetId"),
-            "textSources": obj.get("textSources")
+            "textSources": obj.get("textSources"),
+            "sortIndex": obj.get("sortIndex")
         })
         return _obj

rapidata/api_client_README.md CHANGED Viewed

@@ -90,9 +90,7 @@ Class | Method | HTTP request | Description
 *DatasetApi* | [**dataset_get_by_id_get**](rapidata/api_client/docs/DatasetApi.md#dataset_get_by_id_get) | **GET** /Dataset/GetById | Gets a dataset by its id.
 *DatasetApi* | [**dataset_import_post**](rapidata/api_client/docs/DatasetApi.md#dataset_import_post) | **POST** /Dataset/Import | Imports datapoints from a csv file.
 *DatasetApi* | [**dataset_update_name_post**](rapidata/api_client/docs/DatasetApi.md#dataset_update_name_post) | **POST** /Dataset/UpdateName | Updates the name of a dataset.
-*DatasetApi* | [**dataset_upload_datapoint_post**](rapidata/api_client/docs/DatasetApi.md#dataset_upload_datapoint_post) | **POST** /Dataset/UploadDatapoint | Creates a new multi asset datapoint.
 *DatasetApi* | [**dataset_upload_files_from_s3_post**](rapidata/api_client/docs/DatasetApi.md#dataset_upload_files_from_s3_post) | **POST** /Dataset/UploadFilesFromS3 | Uploads files from an S3 bucket to a dataset.
-*DatasetApi* | [**dataset_upload_images_to_dataset_post**](rapidata/api_client/docs/DatasetApi.md#dataset_upload_images_to_dataset_post) | **POST** /Dataset/UploadImagesToDataset | Uploads images to a dataset.
 *IdentityApi* | [**identity_create_bridge_token_post**](rapidata/api_client/docs/IdentityApi.md#identity_create_bridge_token_post) | **POST** /Identity/CreateBridgeToken | Creates a pair of read and write keys for a client.  The write key is used to store the authentication result.  The read key is used to retrieve the authentication result.
 *IdentityApi* | [**identity_read_bridge_token_get**](rapidata/api_client/docs/IdentityApi.md#identity_read_bridge_token_get) | **GET** /Identity/ReadBridgeToken | Tries to read the bridge token keys for a given read key.  The read key is used to retrieve the authentication result written by the write key.
 *IdentityApi* | [**identity_register_temporary_post**](rapidata/api_client/docs/IdentityApi.md#identity_register_temporary_post) | **POST** /Identity/RegisterTemporary | Registers and logs in a temporary customer.
@@ -129,6 +127,7 @@ Class | Method | HTTP request | Description
 *RapidApi* | [**rapid_add_user_guess_post**](rapidata/api_client/docs/RapidApi.md#rapid_add_user_guess_post) | **POST** /Rapid/AddUserGuess | Submits a user guess for a Rapid.
 *RapidApi* | [**rapid_create_demographic_rapid_post**](rapidata/api_client/docs/RapidApi.md#rapid_create_demographic_rapid_post) | **POST** /Rapid/CreateDemographicRapid | Creates a new Demographic Rapid.
 *RapidApi* | [**rapid_query_validation_rapids_get**](rapidata/api_client/docs/RapidApi.md#rapid_query_validation_rapids_get) | **GET** /Rapid/QueryValidationRapids | Queries the validation rapids for a specific validation set.
+*RapidApi* | [**rapid_rapid_id_delete**](rapidata/api_client/docs/RapidApi.md#rapid_rapid_id_delete) | **DELETE** /rapid/{rapidId} | Deletes a rapid.
 *RapidApi* | [**rapid_report_post**](rapidata/api_client/docs/RapidApi.md#rapid_report_post) | **POST** /Rapid/Report | Used to report an issue with a rapid.
 *RapidApi* | [**rapid_skip_user_guess_post**](rapidata/api_client/docs/RapidApi.md#rapid_skip_user_guess_post) | **POST** /Rapid/SkipUserGuess | Skips a Rapid for the user.
 *RapidApi* | [**rapid_validate_current_rapid_bag_get**](rapidata/api_client/docs/RapidApi.md#rapid_validate_current_rapid_bag_get) | **GET** /Rapid/ValidateCurrentRapidBag | Validates that the rapids associated with the current user are active.
@@ -147,6 +146,7 @@ Class | Method | HTTP request | Description
 *WorkflowApi* | [**workflow_delete_delete**](rapidata/api_client/docs/WorkflowApi.md#workflow_delete_delete) | **DELETE** /Workflow/Delete | Deletes a workflow.
 *WorkflowApi* | [**workflow_get_by_id_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_get_by_id_get) | **GET** /Workflow/GetById | Get a workflow by its ID.
 *WorkflowApi* | [**workflow_get_progress_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_get_progress_get) | **GET** /Workflow/GetProgress | Get the progress of a workflow.
+*WorkflowApi* | [**workflow_id_compare_ab_summary_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_id_compare_ab_summary_get) | **GET** /workflow/{id}/compare-ab-summary | Calculates a summary of the results for a simple compare workflow.  The summary includes the number of times an asset at each index was the winner.
 *WorkflowApi* | [**workflow_query_get**](rapidata/api_client/docs/WorkflowApi.md#workflow_query_get) | **GET** /Workflow/Query | Queries workflows based on the provided filter, page, and sort criteria.
@@ -262,6 +262,7 @@ Class | Method | HTTP request | Description
  - [Gender](rapidata/api_client/docs/Gender.md)
  - [GenderUserFilterModel](rapidata/api_client/docs/GenderUserFilterModel.md)
  - [GetAvailableValidationSetsResult](rapidata/api_client/docs/GetAvailableValidationSetsResult.md)
+ - [GetCompareAbSummaryResult](rapidata/api_client/docs/GetCompareAbSummaryResult.md)
  - [GetCompareWorkflowResultsModel](rapidata/api_client/docs/GetCompareWorkflowResultsModel.md)
  - [GetCompareWorkflowResultsResult](rapidata/api_client/docs/GetCompareWorkflowResultsResult.md)
  - [GetCompareWorkflowResultsResultAsset](rapidata/api_client/docs/GetCompareWorkflowResultsResultAsset.md)

rapidata/rapidata_client/order/_rapidata_dataset.py CHANGED Viewed

@@ -36,7 +36,7 @@ class RapidataDataset:
                     isinstance(asset, TextAsset) for asset in text_asset.assets
                 ), "All assets in a MultiAsset must be of type TextAsset."
-        def upload_text_datapoint(text_asset: TextAsset | MultiAsset) -> None:
+        def upload_text_datapoint(text_asset: TextAsset | MultiAsset, index: int) -> None:
             if isinstance(text_asset, TextAsset):
                 texts = [text_asset.text]
             elif isinstance(text_asset, MultiAsset):
@@ -46,7 +46,8 @@ class RapidataDataset:
             model = UploadTextSourcesToDatasetModel(
                 datasetId=self.dataset_id,
-                textSources=texts
+                textSources=texts,
+                sortIndex=index,
             )
             upload_response = self.openapi_service.dataset_api.dataset_creat_text_datapoint_post(model)
@@ -57,8 +58,8 @@ class RapidataDataset:
         total_uploads = len(text_assets)
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
             futures = [
-                executor.submit(upload_text_datapoint, text_asset)
-                for text_asset in text_assets
+                executor.submit(upload_text_datapoint, text_asset, index=i)
+                for i, text_asset in enumerate(text_assets)
             ]
             with tqdm(total=total_uploads, desc="Uploading text datapoints") as pbar:
@@ -83,7 +84,7 @@ class RapidataDataset:
                     isinstance(asset, MediaAsset) for asset in media_path.assets
                 ), "All assets in a MultiAsset must be of type MediaAsset."
-        def upload_datapoint(media_asset: MediaAsset | MultiAsset, meta: Metadata | None) -> None:
+        def upload_datapoint(media_asset: MediaAsset | MultiAsset, meta: Metadata | None, index: int) -> None:
             if isinstance(media_asset, MediaAsset):
                 assets = [media_asset]
             elif isinstance(media_asset, MultiAsset):
@@ -99,6 +100,7 @@ class RapidataDataset:
                     if meta_model
                     else []
                 ),
+                sortIndex=index,
             )
             files: list[tuple[StrictStr, StrictBytes] | StrictStr | StrictBytes] = []
@@ -116,8 +118,8 @@ class RapidataDataset:
         total_uploads = len(media_paths)
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
             futures = [
-                executor.submit(upload_datapoint, media_asset, meta)
-                for media_asset, meta in zip_longest(media_paths, metadata or [])
+                executor.submit(upload_datapoint, media_asset, meta, index=i)
+                for i, (media_asset, meta) in enumerate(zip_longest(media_paths, metadata or []))
             ]
             with tqdm(total=total_uploads, desc="Uploading datapoints") as pbar:

rapidata/rapidata_client/order/rapidata_order.py CHANGED Viewed

@@ -8,6 +8,7 @@ from typing import Optional, cast, Any
 from rapidata.api_client.models.workflow_artifact_model import WorkflowArtifactModel
 from rapidata.api_client.models.preliminary_download_model import PreliminaryDownloadModel
 from tqdm import tqdm
+from rapidata.rapidata_client.order.rapidata_results import RapidataResults
 class RapidataOrder:
     """
@@ -65,6 +66,15 @@ class RapidataOrder:
     def get_status(self) -> str:
         """
         Gets the status of the order.
+        Different states are:
+            Created: The order has been created but not started yet.\n
+            Submitted: The order has been submitted and is being reviewed.\n
+            ManualReview: The order is in manual review - something went wrong with the automatic approval.\n
+            Processing: The order is actively being processed.\n
+            Paused: The order has been paused.\n
+            Completed: The order has been completed.\n
+            Failed: The order has failed.
         Returns:
             The status of the order.
@@ -145,13 +155,13 @@ class RapidataOrder:
         return progress
-    def __get_preliminary_results(self) -> dict[str, Any]:
+    def __get_preliminary_results(self) -> RapidataResults:
         pipeline_id = self.__get_pipeline_id()
         try:
             download_id = self.__openapi_service.pipeline_api.pipeline_pipeline_id_preliminary_download_post(pipeline_id, PreliminaryDownloadModel(sendEmail=False)).download_id
             while not (preliminary_results := self.__openapi_service.pipeline_api.pipeline_preliminary_download_preliminary_download_id_get(preliminary_download_id=download_id)):
                 sleep(1)
-            return json.loads(preliminary_results.decode())
+            return RapidataResults(json.loads(preliminary_results.decode()))
         except ApiException as e:
             # Handle API exceptions
@@ -160,7 +170,7 @@ class RapidataOrder:
             # Handle JSON parsing errors
             raise Exception(f"Failed to parse preliminary order results: {str(e)}") from e
-    def get_results(self, preliminary_results=False) -> dict[str, Any]:
+    def get_results(self, preliminary_results: bool=False) -> RapidataResults:
         """
         Gets the results of the order.
         If the order is still processing, this method will block until the order is completed and then return the results.
@@ -185,7 +195,7 @@ class RapidataOrder:
         try:
             # Get the raw result string
-            return self.__openapi_service.order_api.order_get_order_results_get(id=self.order_id) # type: ignore
+            return RapidataResults(self.__openapi_service.order_api.order_get_order_results_get(id=self.order_id)) # type: ignore
         except ApiException as e:
             # Handle API exceptions

rapidata/rapidata_client/order/rapidata_results.py ADDED Viewed

@@ -0,0 +1,143 @@
+import pandas as pd
+from typing import Any
+from pandas.core.indexes.base import Index
+import json
+class RapidataResults(dict):
+    """
+    A specialized dictionary class for handling Rapidata API results.
+    Extends the built-in dict class with specialized methods.
+    """
+    def to_pandas(self) -> pd.DataFrame:
+        """
+        Converts the results to a pandas DataFrame.
+        For Compare results, creates standardized A/B columns for metrics like:
+        - aggregatedResults
+        - aggregatedResultsRatios
+        - summedUserScores
+        - summedUserScoresRatios
+        For regular results, flattens nested dictionaries into columns with underscore-separated names.
+        Returns:
+            pd.DataFrame: A DataFrame containing the processed results
+        """
+        if "results" not in self or not self["results"]:
+            return pd.DataFrame()
+        if self["info"].get("orderType") == "Compare":
+            return self._compare_to_pandas()
+        if self["info"].get("orderType") is None:
+            print("Warning: Results are old and Order type is not specified. Dataframe might be wrong.")
+        # Get the structure from first item
+        first_item = self["results"][0]
+        columns = []
+        path_map = {}  # Maps flattened column names to paths to reach the values
+        # Build the column structure once
+        self._build_column_structure(first_item, columns, path_map)
+        # Extract data using the known structure
+        data = []
+        for item in self["results"]:
+            row = []
+            for path in path_map.values():
+                value = self._get_value_from_path(item, path)
+                row.append(value)
+            data.append(row)
+        return pd.DataFrame(data, columns=Index(columns))
+    def _build_column_structure(
+        self,
+        d: dict[str, Any],
+        columns: list[str],
+        path_map: dict[str, list[str]],
+        parent_key: str = '',
+        current_path: list[str] | None = None
+    ) -> None:
+        """
+        Builds the column structure and paths to reach values in nested dictionaries.
+        Args:
+            d: The dictionary to analyze
+            columns: List to store column names
+            path_map: Dictionary mapping column names to paths for accessing values
+            parent_key: The parent key for nested dictionaries
+            current_path: The current path in the dictionary structure
+        """
+        if current_path is None:
+            current_path = []
+        for key, value in d.items():
+            new_key = f"{parent_key}_{key}" if parent_key else key
+            new_path: list[str] = current_path + [key]
+            if isinstance(value, dict):
+                self._build_column_structure(value, columns, path_map, new_key, new_path)
+            else:
+                columns.append(new_key)
+                path_map[new_key] = new_path
+    def _get_value_from_path(self, d: dict[str, Any], path: list[str]) -> Any:
+        """
+        Retrieves a value from a nested dictionary using a path list.
+        Args:
+            d: The dictionary to retrieve the value from
+            path: List of keys forming the path to the desired value
+        Returns:
+            The value at the specified path, or None if the path doesn't exist
+        """
+        for key in path[:-1]:
+            d = d.get(key, {})
+        return d.get(path[-1])
+    def _compare_to_pandas(self):
+        """
+        Converts Compare results to a pandas DataFrame dynamically.
+        """
+        if not self.get("results"):
+            return pd.DataFrame()
+        rows = []
+        for result in self["results"]:
+            # Get the image names from the first metric we find
+            for key in result:
+                if isinstance(result[key], dict) and len(result[key]) == 2:
+                    assets = list(result[key].keys())
+                    break
+            else:
+                continue
+            asset_a, asset_b = assets[0], assets[1]
+            # Initialize row with non-comparative fields
+            row = {
+                key: value for key, value in result.items()
+                if not isinstance(value, dict)
+            }
+            # Handle comparative metrics
+            for key, values in result.items():
+                if isinstance(values, dict) and len(values) == 2:
+                    row[f'A_{key}'] = values[asset_a]
+                    row[f'B_{key}'] = values[asset_b]
+            rows.append(row)
+        return pd.DataFrame(rows)
+    def to_json(self, path: str="./results.json"):
+        """
+        Saves the results to a JSON file.
+        Args:
+            path: The file path where the JSON should be saved. Defaults to "./results.json".
+        """
+        with open(path, 'w') as f:
+            json.dump(self, f)

rapidata 2.7.2__py3-none-any.whl → 2.9.0__py3-none-any.whl

Potentially problematic release.

rapidata 2.7.2py3-none-any.whl → 2.9.0py3-none-any.whl