rapidata 2.31.1__py3-none-any.whl → 2.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of rapidata might be problematic. Click here for more details.
- rapidata/__init__.py +3 -1
- rapidata/api_client/__init__.py +2 -0
- rapidata/api_client/api/__init__.py +1 -0
- rapidata/api_client/api/benchmark_api.py +26 -297
- rapidata/api_client/api/participant_api.py +1404 -0
- rapidata/api_client/models/__init__.py +1 -0
- rapidata/api_client/models/create_sample_model.py +87 -0
- rapidata/api_client_README.md +6 -1
- rapidata/rapidata_client/__init__.py +9 -7
- rapidata/rapidata_client/benchmark/rapidata_benchmark.py +4 -3
- rapidata/rapidata_client/datapoints/__init__.py +3 -0
- rapidata/rapidata_client/{assets → datapoints/assets}/_media_asset.py +2 -2
- rapidata/rapidata_client/{assets → datapoints/assets}/_multi_asset.py +2 -2
- rapidata/rapidata_client/{assets → datapoints/assets}/_text_asset.py +1 -1
- rapidata/rapidata_client/datapoints/datapoint.py +108 -0
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_media_asset_metadata.py +1 -1
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_private_text_metadata.py +1 -1
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_prompt_identifier_metadata.py +1 -1
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_prompt_metadata.py +1 -1
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_public_text_metadata.py +1 -1
- rapidata/rapidata_client/{metadata → datapoints/metadata}/_select_words_metadata.py +1 -1
- rapidata/rapidata_client/demographic/demographic_manager.py +1 -1
- rapidata/rapidata_client/exceptions/__init__.py +1 -0
- rapidata/rapidata_client/exceptions/failed_upload_exception.py +19 -0
- rapidata/rapidata_client/order/_rapidata_dataset.py +69 -134
- rapidata/rapidata_client/order/_rapidata_order_builder.py +32 -65
- rapidata/rapidata_client/order/rapidata_order.py +0 -5
- rapidata/rapidata_client/order/rapidata_order_manager.py +5 -5
- rapidata/rapidata_client/validation/rapidata_validation_set.py +1 -1
- rapidata/rapidata_client/validation/rapids/rapids.py +3 -3
- rapidata/rapidata_client/validation/rapids/rapids_manager.py +2 -2
- rapidata/rapidata_client/validation/validation_set_manager.py +1 -1
- rapidata/rapidata_client/workflow/_ranking_workflow.py +1 -1
- {rapidata-2.31.1.dist-info → rapidata-2.32.0.dist-info}/METADATA +1 -1
- {rapidata-2.31.1.dist-info → rapidata-2.32.0.dist-info}/RECORD +43 -37
- /rapidata/rapidata_client/{assets → datapoints/assets}/__init__.py +0 -0
- /rapidata/rapidata_client/{assets → datapoints/assets}/_base_asset.py +0 -0
- /rapidata/rapidata_client/{assets → datapoints/assets}/_sessions.py +0 -0
- /rapidata/rapidata_client/{assets → datapoints/assets}/data_type_enum.py +0 -0
- /rapidata/rapidata_client/{metadata → datapoints/metadata}/__init__.py +0 -0
- /rapidata/rapidata_client/{metadata → datapoints/metadata}/_base_metadata.py +0 -0
- {rapidata-2.31.1.dist-info → rapidata-2.32.0.dist-info}/LICENSE +0 -0
- {rapidata-2.31.1.dist-info → rapidata-2.32.0.dist-info}/WHEEL +0 -0
|
@@ -109,6 +109,7 @@ from rapidata.api_client.models.create_order_model_referee import CreateOrderMod
|
|
|
109
109
|
from rapidata.api_client.models.create_order_model_workflow import CreateOrderModelWorkflow
|
|
110
110
|
from rapidata.api_client.models.create_order_result import CreateOrderResult
|
|
111
111
|
from rapidata.api_client.models.create_rapid_result import CreateRapidResult
|
|
112
|
+
from rapidata.api_client.models.create_sample_model import CreateSampleModel
|
|
112
113
|
from rapidata.api_client.models.create_simple_pipeline_model import CreateSimplePipelineModel
|
|
113
114
|
from rapidata.api_client.models.create_simple_pipeline_model_artifacts_inner import CreateSimplePipelineModelArtifactsInner
|
|
114
115
|
from rapidata.api_client.models.create_simple_pipeline_model_pipeline_steps_inner import CreateSimplePipelineModelPipelineStepsInner
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Rapidata.Dataset
|
|
5
|
+
|
|
6
|
+
No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator)
|
|
7
|
+
|
|
8
|
+
The version of the OpenAPI document: v1
|
|
9
|
+
Generated by OpenAPI Generator (https://openapi-generator.tech)
|
|
10
|
+
|
|
11
|
+
Do not edit the class manually.
|
|
12
|
+
""" # noqa: E501
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import pprint
|
|
17
|
+
import re # noqa: F401
|
|
18
|
+
import json
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, ConfigDict, Field, StrictStr
|
|
21
|
+
from typing import Any, ClassVar, Dict, List
|
|
22
|
+
from typing import Optional, Set
|
|
23
|
+
from typing_extensions import Self
|
|
24
|
+
|
|
25
|
+
class CreateSampleModel(BaseModel):
|
|
26
|
+
"""
|
|
27
|
+
The model used to create a sample to a participant.
|
|
28
|
+
""" # noqa: E501
|
|
29
|
+
identifier: StrictStr = Field(description="The identifier used to correlate samples of different participants.")
|
|
30
|
+
__properties: ClassVar[List[str]] = ["identifier"]
|
|
31
|
+
|
|
32
|
+
model_config = ConfigDict(
|
|
33
|
+
populate_by_name=True,
|
|
34
|
+
validate_assignment=True,
|
|
35
|
+
protected_namespaces=(),
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def to_str(self) -> str:
|
|
40
|
+
"""Returns the string representation of the model using alias"""
|
|
41
|
+
return pprint.pformat(self.model_dump(by_alias=True))
|
|
42
|
+
|
|
43
|
+
def to_json(self) -> str:
|
|
44
|
+
"""Returns the JSON representation of the model using alias"""
|
|
45
|
+
# TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead
|
|
46
|
+
return json.dumps(self.to_dict())
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def from_json(cls, json_str: str) -> Optional[Self]:
|
|
50
|
+
"""Create an instance of CreateSampleModel from a JSON string"""
|
|
51
|
+
return cls.from_dict(json.loads(json_str))
|
|
52
|
+
|
|
53
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
54
|
+
"""Return the dictionary representation of the model using alias.
|
|
55
|
+
|
|
56
|
+
This has the following differences from calling pydantic's
|
|
57
|
+
`self.model_dump(by_alias=True)`:
|
|
58
|
+
|
|
59
|
+
* `None` is only added to the output dict for nullable fields that
|
|
60
|
+
were set at model initialization. Other fields with value `None`
|
|
61
|
+
are ignored.
|
|
62
|
+
"""
|
|
63
|
+
excluded_fields: Set[str] = set([
|
|
64
|
+
])
|
|
65
|
+
|
|
66
|
+
_dict = self.model_dump(
|
|
67
|
+
by_alias=True,
|
|
68
|
+
exclude=excluded_fields,
|
|
69
|
+
exclude_none=True,
|
|
70
|
+
)
|
|
71
|
+
return _dict
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
|
|
75
|
+
"""Create an instance of CreateSampleModel from a dict"""
|
|
76
|
+
if obj is None:
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
if not isinstance(obj, dict):
|
|
80
|
+
return cls.model_validate(obj)
|
|
81
|
+
|
|
82
|
+
_obj = cls.model_validate({
|
|
83
|
+
"identifier": obj.get("identifier")
|
|
84
|
+
})
|
|
85
|
+
return _obj
|
|
86
|
+
|
|
87
|
+
|
rapidata/api_client_README.md
CHANGED
|
@@ -76,7 +76,6 @@ Class | Method | HTTP request | Description
|
|
|
76
76
|
*BenchmarkApi* | [**benchmark_benchmark_id_get**](rapidata/api_client/docs/BenchmarkApi.md#benchmark_benchmark_id_get) | **GET** /benchmark/{benchmarkId} | Returns a single benchmark by its ID.
|
|
77
77
|
*BenchmarkApi* | [**benchmark_benchmark_id_name_put**](rapidata/api_client/docs/BenchmarkApi.md#benchmark_benchmark_id_name_put) | **PUT** /benchmark/{benchmarkId}/name | Updates the name of a benchmark.
|
|
78
78
|
*BenchmarkApi* | [**benchmark_benchmark_id_participant_participant_id_delete**](rapidata/api_client/docs/BenchmarkApi.md#benchmark_benchmark_id_participant_participant_id_delete) | **DELETE** /benchmark/{benchmarkId}/participant/{participantId} | Deletes a participant on a benchmark.
|
|
79
|
-
*BenchmarkApi* | [**benchmark_benchmark_id_participant_participant_id_get**](rapidata/api_client/docs/BenchmarkApi.md#benchmark_benchmark_id_participant_participant_id_get) | **GET** /benchmark/{benchmarkId}/participant/{participantId} | Gets a participant by it's Id.
|
|
80
79
|
*BenchmarkApi* | [**benchmark_benchmark_id_participants_get**](rapidata/api_client/docs/BenchmarkApi.md#benchmark_benchmark_id_participants_get) | **GET** /benchmark/{benchmarkId}/participants | Query all participants within a benchmark
|
|
81
80
|
*BenchmarkApi* | [**benchmark_benchmark_id_participants_participant_id_disable_post**](rapidata/api_client/docs/BenchmarkApi.md#benchmark_benchmark_id_participants_participant_id_disable_post) | **POST** /benchmark/{benchmarkId}/participants/{participantId}/disable | This endpoint disables a participant in a benchmark. this means that the participant will no longer actively be matched up against other participants and not collect further results. It will still be visible in the leaderboard.
|
|
82
81
|
*BenchmarkApi* | [**benchmark_benchmark_id_participants_participant_id_submit_post**](rapidata/api_client/docs/BenchmarkApi.md#benchmark_benchmark_id_participants_participant_id_submit_post) | **POST** /benchmark/{benchmarkId}/participants/{participantId}/submit | Submits a participant to a benchmark.
|
|
@@ -163,6 +162,11 @@ Class | Method | HTTP request | Description
|
|
|
163
162
|
*OrderApi* | [**order_unsupported_post**](rapidata/api_client/docs/OrderApi.md#order_unsupported_post) | **POST** /order/unsupported | Notifies the admins that a user wants to create an order with an unsupported label type or data type.
|
|
164
163
|
*OrderApi* | [**orders_get**](rapidata/api_client/docs/OrderApi.md#orders_get) | **GET** /orders | Queries orders based on a filter, page, and sort criteria.
|
|
165
164
|
*OrderApi* | [**orders_public_get**](rapidata/api_client/docs/OrderApi.md#orders_public_get) | **GET** /orders/public | Retrieves orders that are public and can be cloned by any user.
|
|
165
|
+
*ParticipantApi* | [**participant_participant_id_delete**](rapidata/api_client/docs/ParticipantApi.md#participant_participant_id_delete) | **DELETE** /participant/{participantId} | Deletes a participant on a benchmark.
|
|
166
|
+
*ParticipantApi* | [**participant_participant_id_get**](rapidata/api_client/docs/ParticipantApi.md#participant_participant_id_get) | **GET** /participant/{participantId} | Gets a participant by it's Id.
|
|
167
|
+
*ParticipantApi* | [**participant_participant_id_sample_post**](rapidata/api_client/docs/ParticipantApi.md#participant_participant_id_sample_post) | **POST** /participant/{participantId}/sample | Adds a sample to a participant.
|
|
168
|
+
*ParticipantApi* | [**participant_sample_sample_id_delete**](rapidata/api_client/docs/ParticipantApi.md#participant_sample_sample_id_delete) | **DELETE** /participant-sample/{sampleId} | Deletes a sample.
|
|
169
|
+
*ParticipantApi* | [**participants_participant_id_submit_post**](rapidata/api_client/docs/ParticipantApi.md#participants_participant_id_submit_post) | **POST** /participants/{participantId}/submit | Submits a participant to a benchmark.
|
|
166
170
|
*PipelineApi* | [**pipeline_id_workflow_config_artifact_id_put**](rapidata/api_client/docs/PipelineApi.md#pipeline_id_workflow_config_artifact_id_put) | **PUT** /pipeline/{id}/workflow-config/{artifactId} | Updates the workflow configuration for a pipeline.
|
|
167
171
|
*PipelineApi* | [**pipeline_id_workflow_config_put**](rapidata/api_client/docs/PipelineApi.md#pipeline_id_workflow_config_put) | **PUT** /pipeline/{id}/workflow-config | Updates the workflow configuration for a pipeline.
|
|
168
172
|
*PipelineApi* | [**pipeline_pipeline_id_campaign_artifact_id_put**](rapidata/api_client/docs/PipelineApi.md#pipeline_pipeline_id_campaign_artifact_id_put) | **PUT** /pipeline/{pipelineId}/campaign/{artifactId} | Updates a specific campaign for a pipeline.
|
|
@@ -297,6 +301,7 @@ Class | Method | HTTP request | Description
|
|
|
297
301
|
- [CreateOrderModelWorkflow](rapidata/api_client/docs/CreateOrderModelWorkflow.md)
|
|
298
302
|
- [CreateOrderResult](rapidata/api_client/docs/CreateOrderResult.md)
|
|
299
303
|
- [CreateRapidResult](rapidata/api_client/docs/CreateRapidResult.md)
|
|
304
|
+
- [CreateSampleModel](rapidata/api_client/docs/CreateSampleModel.md)
|
|
300
305
|
- [CreateSimplePipelineModel](rapidata/api_client/docs/CreateSimplePipelineModel.md)
|
|
301
306
|
- [CreateSimplePipelineModelArtifactsInner](rapidata/api_client/docs/CreateSimplePipelineModelArtifactsInner.md)
|
|
302
307
|
- [CreateSimplePipelineModelPipelineStepsInner](rapidata/api_client/docs/CreateSimplePipelineModelPipelineStepsInner.md)
|
|
@@ -9,12 +9,19 @@ from .selection import (
|
|
|
9
9
|
RetrievalMode,
|
|
10
10
|
EffortEstimationSelection,
|
|
11
11
|
)
|
|
12
|
-
from .
|
|
12
|
+
from .datapoints import Datapoint
|
|
13
|
+
from .datapoints.metadata import (
|
|
13
14
|
PrivateTextMetadata,
|
|
14
15
|
PublicTextMetadata,
|
|
15
16
|
PromptMetadata,
|
|
16
17
|
SelectWordsMetadata,
|
|
17
18
|
)
|
|
19
|
+
from .datapoints.assets import (
|
|
20
|
+
MediaAsset,
|
|
21
|
+
TextAsset,
|
|
22
|
+
MultiAsset,
|
|
23
|
+
RapidataDataTypes
|
|
24
|
+
)
|
|
18
25
|
from .settings import (
|
|
19
26
|
RapidataSettings,
|
|
20
27
|
TranslationBehaviourOptions,
|
|
@@ -27,12 +34,6 @@ from .settings import (
|
|
|
27
34
|
AllowNeitherBoth,
|
|
28
35
|
)
|
|
29
36
|
from .country_codes import CountryCodes
|
|
30
|
-
from .assets import (
|
|
31
|
-
MediaAsset,
|
|
32
|
-
TextAsset,
|
|
33
|
-
MultiAsset,
|
|
34
|
-
RapidataDataTypes
|
|
35
|
-
)
|
|
36
37
|
from .filter import (
|
|
37
38
|
CountryFilter,
|
|
38
39
|
LanguageFilter,
|
|
@@ -55,3 +56,4 @@ from .logging import (
|
|
|
55
56
|
)
|
|
56
57
|
|
|
57
58
|
from .validation import Box
|
|
59
|
+
from .exceptions import FailedUploadException
|
|
@@ -15,9 +15,10 @@ from rapidata.rapidata_client.logging import logger
|
|
|
15
15
|
from rapidata.service.openapi_service import OpenAPIService
|
|
16
16
|
|
|
17
17
|
from rapidata.rapidata_client.benchmark.leaderboard.rapidata_leaderboard import RapidataLeaderboard
|
|
18
|
-
from rapidata.rapidata_client.metadata import PromptIdentifierMetadata
|
|
19
|
-
from rapidata.rapidata_client.assets import MediaAsset
|
|
18
|
+
from rapidata.rapidata_client.datapoints.metadata import PromptIdentifierMetadata
|
|
19
|
+
from rapidata.rapidata_client.datapoints.assets import MediaAsset
|
|
20
20
|
from rapidata.rapidata_client.order._rapidata_dataset import RapidataDataset
|
|
21
|
+
from rapidata.rapidata_client.datapoints.datapoint import Datapoint
|
|
21
22
|
|
|
22
23
|
class RapidataBenchmark:
|
|
23
24
|
"""
|
|
@@ -287,7 +288,7 @@ class RapidataBenchmark:
|
|
|
287
288
|
dataset = RapidataDataset(participant_result.dataset_id, self.__openapi_service)
|
|
288
289
|
|
|
289
290
|
try:
|
|
290
|
-
dataset.
|
|
291
|
+
dataset.add_datapoints([Datapoint(asset=asset, metadata=metadata) for asset, metadata in zip(assets, prompts_metadata)])
|
|
291
292
|
except Exception as e:
|
|
292
293
|
logger.warning(f"An error occurred while adding datapoints to the dataset: {e}")
|
|
293
294
|
upload_progress = self.__openapi_service.dataset_api.dataset_dataset_id_progress_get(
|
|
@@ -7,7 +7,7 @@ Implements lazy loading for URL-based media to prevent unnecessary downloads.
|
|
|
7
7
|
from typing import Optional, cast
|
|
8
8
|
import os
|
|
9
9
|
from io import BytesIO
|
|
10
|
-
from rapidata.rapidata_client.assets._base_asset import BaseAsset
|
|
10
|
+
from rapidata.rapidata_client.datapoints.assets._base_asset import BaseAsset
|
|
11
11
|
import requests
|
|
12
12
|
import re
|
|
13
13
|
from PIL import Image
|
|
@@ -16,7 +16,7 @@ import tempfile
|
|
|
16
16
|
from pydantic import StrictStr, StrictBytes
|
|
17
17
|
import logging
|
|
18
18
|
from functools import cached_property
|
|
19
|
-
from rapidata.rapidata_client.assets._sessions import SessionManager
|
|
19
|
+
from rapidata.rapidata_client.datapoints.assets._sessions import SessionManager
|
|
20
20
|
from rapidata.rapidata_client.logging import logger
|
|
21
21
|
|
|
22
22
|
class MediaAsset(BaseAsset):
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
Defines the MultiAsset class for handling multiple BaseAsset instances.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
from rapidata.rapidata_client.assets._base_asset import BaseAsset
|
|
7
|
-
from rapidata.rapidata_client.assets import MediaAsset, TextAsset
|
|
6
|
+
from rapidata.rapidata_client.datapoints.assets._base_asset import BaseAsset
|
|
7
|
+
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset
|
|
8
8
|
from typing import Iterator, Sequence, cast
|
|
9
9
|
|
|
10
10
|
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from typing import Sequence, cast
|
|
2
|
+
from rapidata.rapidata_client.datapoints.assets import MediaAsset, TextAsset, MultiAsset, BaseAsset
|
|
3
|
+
from rapidata.rapidata_client.datapoints.metadata import Metadata
|
|
4
|
+
from rapidata.api_client.models.dataset_dataset_id_datapoints_post_request_metadata_inner import DatasetDatasetIdDatapointsPostRequestMetadataInner
|
|
5
|
+
from rapidata.api_client.models.create_datapoint_from_text_sources_model import CreateDatapointFromTextSourcesModel
|
|
6
|
+
from pydantic import StrictStr, StrictBytes
|
|
7
|
+
|
|
8
|
+
class Datapoint:
|
|
9
|
+
def __init__(self, asset: MediaAsset | TextAsset | MultiAsset, metadata: Sequence[Metadata] | None = None):
|
|
10
|
+
if not isinstance(asset, (MediaAsset, TextAsset, MultiAsset)):
|
|
11
|
+
raise TypeError("Asset must be of type MediaAsset, TextAsset, or MultiAsset.")
|
|
12
|
+
|
|
13
|
+
if metadata and not isinstance(metadata, Sequence):
|
|
14
|
+
raise TypeError("Metadata must be a list of Metadata objects.")
|
|
15
|
+
|
|
16
|
+
if metadata and not all(isinstance(m, Metadata) for m in metadata):
|
|
17
|
+
raise TypeError("All metadata objects must be of type Metadata.")
|
|
18
|
+
|
|
19
|
+
self.asset = asset
|
|
20
|
+
self.metadata = metadata
|
|
21
|
+
|
|
22
|
+
def _get_effective_asset_type(self) -> type:
|
|
23
|
+
"""Get the effective asset type, handling MultiAsset by looking at its first asset."""
|
|
24
|
+
if isinstance(self.asset, MultiAsset):
|
|
25
|
+
return type(self.asset.assets[0])
|
|
26
|
+
return type(self.asset)
|
|
27
|
+
|
|
28
|
+
def is_media_asset(self) -> bool:
|
|
29
|
+
"""Check if this datapoint contains media assets."""
|
|
30
|
+
effective_type = self._get_effective_asset_type()
|
|
31
|
+
return issubclass(effective_type, MediaAsset)
|
|
32
|
+
|
|
33
|
+
def is_text_asset(self) -> bool:
|
|
34
|
+
"""Check if this datapoint contains text assets."""
|
|
35
|
+
effective_type = self._get_effective_asset_type()
|
|
36
|
+
return issubclass(effective_type, TextAsset)
|
|
37
|
+
|
|
38
|
+
def get_texts(self) -> list[str]:
|
|
39
|
+
"""Extract text content from the asset(s)."""
|
|
40
|
+
if isinstance(self.asset, TextAsset):
|
|
41
|
+
return [self.asset.text]
|
|
42
|
+
elif isinstance(self.asset, MultiAsset):
|
|
43
|
+
texts = []
|
|
44
|
+
for asset in self.asset.assets:
|
|
45
|
+
if isinstance(asset, TextAsset):
|
|
46
|
+
texts.append(asset.text)
|
|
47
|
+
return texts
|
|
48
|
+
else:
|
|
49
|
+
raise ValueError(f"Cannot extract text from asset type: {type(self.asset)}")
|
|
50
|
+
|
|
51
|
+
def get_media_assets(self) -> list[MediaAsset]:
|
|
52
|
+
"""Extract media assets from the datapoint."""
|
|
53
|
+
if isinstance(self.asset, MediaAsset):
|
|
54
|
+
return [self.asset]
|
|
55
|
+
elif isinstance(self.asset, MultiAsset):
|
|
56
|
+
media_assets = []
|
|
57
|
+
for asset in self.asset.assets:
|
|
58
|
+
if isinstance(asset, MediaAsset):
|
|
59
|
+
media_assets.append(asset)
|
|
60
|
+
return media_assets
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError(f"Cannot extract media assets from asset type: {type(self.asset)}")
|
|
63
|
+
|
|
64
|
+
def get_local_file_paths(self) -> list[StrictStr | tuple[StrictStr, StrictBytes] | StrictBytes]:
|
|
65
|
+
"""Get local file paths for media assets that are stored locally."""
|
|
66
|
+
if not self.is_media_asset():
|
|
67
|
+
return []
|
|
68
|
+
|
|
69
|
+
media_assets = self.get_media_assets()
|
|
70
|
+
return [asset.to_file() for asset in media_assets if asset.is_local()]
|
|
71
|
+
|
|
72
|
+
def get_urls(self) -> list[str]:
|
|
73
|
+
"""Get URLs for media assets that are remote."""
|
|
74
|
+
if not self.is_media_asset():
|
|
75
|
+
return []
|
|
76
|
+
|
|
77
|
+
media_assets = self.get_media_assets()
|
|
78
|
+
return [asset.path for asset in media_assets if not asset.is_local()]
|
|
79
|
+
|
|
80
|
+
def get_prepared_metadata(self) -> list[DatasetDatasetIdDatapointsPostRequestMetadataInner]:
|
|
81
|
+
"""Prepare metadata for API upload."""
|
|
82
|
+
metadata: list[DatasetDatasetIdDatapointsPostRequestMetadataInner] = []
|
|
83
|
+
if self.metadata:
|
|
84
|
+
for meta in self.metadata:
|
|
85
|
+
meta_model = meta.to_model() if meta else None
|
|
86
|
+
if meta_model:
|
|
87
|
+
metadata.append(DatasetDatasetIdDatapointsPostRequestMetadataInner(meta_model))
|
|
88
|
+
return metadata
|
|
89
|
+
|
|
90
|
+
def create_text_upload_model(self, index: int) -> CreateDatapointFromTextSourcesModel:
|
|
91
|
+
"""Create the model for uploading text datapoints."""
|
|
92
|
+
if not self.is_text_asset():
|
|
93
|
+
raise ValueError("Cannot create text upload model for non-text asset")
|
|
94
|
+
|
|
95
|
+
texts = self.get_texts()
|
|
96
|
+
metadata = self.get_prepared_metadata()
|
|
97
|
+
|
|
98
|
+
return CreateDatapointFromTextSourcesModel(
|
|
99
|
+
textSources=texts,
|
|
100
|
+
sortIndex=index,
|
|
101
|
+
metadata=metadata,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def __str__(self):
|
|
105
|
+
return f"Datapoint(asset={self.asset})"
|
|
106
|
+
|
|
107
|
+
def __repr__(self):
|
|
108
|
+
return self.__str__()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from rapidata.api_client.models.prompt_asset_metadata_input import PromptAssetMetadataInput
|
|
2
2
|
from rapidata.api_client.models.url_asset_input import UrlAssetInput
|
|
3
|
-
from rapidata.rapidata_client.metadata._base_metadata import Metadata
|
|
3
|
+
from rapidata.rapidata_client.datapoints.metadata._base_metadata import Metadata
|
|
4
4
|
from rapidata.api_client.models.prompt_asset_metadata_input_asset import PromptAssetMetadataInputAsset
|
|
5
5
|
|
|
6
6
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from rapidata.api_client.models.private_text_metadata_input import (
|
|
2
2
|
PrivateTextMetadataInput,
|
|
3
3
|
)
|
|
4
|
-
from rapidata.rapidata_client.metadata._base_metadata import Metadata
|
|
4
|
+
from rapidata.rapidata_client.datapoints.metadata._base_metadata import Metadata
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class PrivateTextMetadata(Metadata):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from rapidata.api_client.models.public_text_metadata_input import (
|
|
2
2
|
PublicTextMetadataInput,
|
|
3
3
|
)
|
|
4
|
-
from rapidata.rapidata_client.metadata._base_metadata import Metadata
|
|
4
|
+
from rapidata.rapidata_client.datapoints.metadata._base_metadata import Metadata
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class PublicTextMetadata(Metadata):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from rapidata.api_client.models.transcription_metadata_input import (
|
|
2
2
|
TranscriptionMetadataInput,
|
|
3
3
|
)
|
|
4
|
-
from rapidata.rapidata_client.metadata._base_metadata import Metadata
|
|
4
|
+
from rapidata.rapidata_client.datapoints.metadata._base_metadata import Metadata
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class SelectWordsMetadata(Metadata):
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from rapidata.service.openapi_service import OpenAPIService
|
|
2
|
-
from rapidata.rapidata_client.assets import MediaAsset
|
|
2
|
+
from rapidata.rapidata_client.datapoints.assets import MediaAsset
|
|
3
3
|
from rapidata.api_client.models.create_demographic_rapid_model import CreateDemographicRapidModel
|
|
4
4
|
from rapidata.api_client.models.classify_payload import ClassifyPayload
|
|
5
5
|
from rapidata.rapidata_client.logging import logger
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .failed_upload_exception import FailedUploadException
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from rapidata.rapidata_client.datapoints.datapoint import Datapoint
|
|
2
|
+
from rapidata.rapidata_client.order._rapidata_dataset import RapidataDataset
|
|
3
|
+
from rapidata.rapidata_client.order.rapidata_order import RapidataOrder
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class FailedUploadException(Exception):
|
|
7
|
+
"""Custom error class for Failed Uploads to the Rapidata order."""
|
|
8
|
+
def __init__(
|
|
9
|
+
self,
|
|
10
|
+
dataset: RapidataDataset,
|
|
11
|
+
order: RapidataOrder,
|
|
12
|
+
failed_uploads: list[Datapoint]
|
|
13
|
+
):
|
|
14
|
+
self.dataset = dataset
|
|
15
|
+
self.order = order
|
|
16
|
+
self.failed_uploads = failed_uploads
|
|
17
|
+
|
|
18
|
+
def __str__(self) -> str:
|
|
19
|
+
return f"Failed to upload {self.failed_uploads}"
|