PyPI - rapidata - Versions diffs - 2.1.4__py3-none-any.whl → 2.2.0__py3-none-any.whl - Mend

rapidata 2.1.4py3-none-any.whl → 2.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rapidata might be problematic. Click here for more details.

Files changed (35) hide show

rapidata/api_client/models/validation_import_post_request_blueprint.py CHANGED Viewed

@@ -25,12 +25,13 @@ from rapidata.api_client.models.line_rapid_blueprint import LineRapidBlueprint
 from rapidata.api_client.models.locate_rapid_blueprint import LocateRapidBlueprint
 from rapidata.api_client.models.named_entity_rapid_blueprint import NamedEntityRapidBlueprint
 from rapidata.api_client.models.polygon_rapid_blueprint import PolygonRapidBlueprint
+from rapidata.api_client.models.scrub_rapid_blueprint import ScrubRapidBlueprint
 from rapidata.api_client.models.transcription_rapid_blueprint import TranscriptionRapidBlueprint
 from pydantic import StrictStr, Field
 from typing import Union, List, Set, Optional, Dict
 from typing_extensions import Literal, Self
-VALIDATIONIMPORTPOSTREQUESTBLUEPRINT_ONE_OF_SCHEMAS = ["AttachCategoryRapidBlueprint", "BoundingBoxRapidBlueprint", "CompareRapidBlueprint", "FreeTextRapidBlueprint", "LineRapidBlueprint", "LocateRapidBlueprint", "NamedEntityRapidBlueprint", "PolygonRapidBlueprint", "TranscriptionRapidBlueprint"]
+VALIDATIONIMPORTPOSTREQUESTBLUEPRINT_ONE_OF_SCHEMAS = ["AttachCategoryRapidBlueprint", "BoundingBoxRapidBlueprint", "CompareRapidBlueprint", "FreeTextRapidBlueprint", "LineRapidBlueprint", "LocateRapidBlueprint", "NamedEntityRapidBlueprint", "PolygonRapidBlueprint", "ScrubRapidBlueprint", "TranscriptionRapidBlueprint"]
 class ValidationImportPostRequestBlueprint(BaseModel):
     """
@@ -38,24 +39,26 @@ class ValidationImportPostRequestBlueprint(BaseModel):
     """
     # data type: TranscriptionRapidBlueprint
     oneof_schema_1_validator: Optional[TranscriptionRapidBlueprint] = None
+    # data type: ScrubRapidBlueprint
+    oneof_schema_2_validator: Optional[ScrubRapidBlueprint] = None
     # data type: PolygonRapidBlueprint
-    oneof_schema_2_validator: Optional[PolygonRapidBlueprint] = None
+    oneof_schema_3_validator: Optional[PolygonRapidBlueprint] = None
     # data type: NamedEntityRapidBlueprint
-    oneof_schema_3_validator: Optional[NamedEntityRapidBlueprint] = None
+    oneof_schema_4_validator: Optional[NamedEntityRapidBlueprint] = None
     # data type: LocateRapidBlueprint
-    oneof_schema_4_validator: Optional[LocateRapidBlueprint] = None
+    oneof_schema_5_validator: Optional[LocateRapidBlueprint] = None
     # data type: LineRapidBlueprint
-    oneof_schema_5_validator: Optional[LineRapidBlueprint] = None
+    oneof_schema_6_validator: Optional[LineRapidBlueprint] = None
     # data type: FreeTextRapidBlueprint
-    oneof_schema_6_validator: Optional[FreeTextRapidBlueprint] = None
+    oneof_schema_7_validator: Optional[FreeTextRapidBlueprint] = None
     # data type: CompareRapidBlueprint
-    oneof_schema_7_validator: Optional[CompareRapidBlueprint] = None
+    oneof_schema_8_validator: Optional[CompareRapidBlueprint] = None
     # data type: AttachCategoryRapidBlueprint
-    oneof_schema_8_validator: Optional[AttachCategoryRapidBlueprint] = None
+    oneof_schema_9_validator: Optional[AttachCategoryRapidBlueprint] = None
     # data type: BoundingBoxRapidBlueprint
-    oneof_schema_9_validator: Optional[BoundingBoxRapidBlueprint] = None
-    actual_instance: Optional[Union[AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, TranscriptionRapidBlueprint]] = None
-    one_of_schemas: Set[str] = { "AttachCategoryRapidBlueprint", "BoundingBoxRapidBlueprint", "CompareRapidBlueprint", "FreeTextRapidBlueprint", "LineRapidBlueprint", "LocateRapidBlueprint", "NamedEntityRapidBlueprint", "PolygonRapidBlueprint", "TranscriptionRapidBlueprint" }
+    oneof_schema_10_validator: Optional[BoundingBoxRapidBlueprint] = None
+    actual_instance: Optional[Union[AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, ScrubRapidBlueprint, TranscriptionRapidBlueprint]] = None
+    one_of_schemas: Set[str] = { "AttachCategoryRapidBlueprint", "BoundingBoxRapidBlueprint", "CompareRapidBlueprint", "FreeTextRapidBlueprint", "LineRapidBlueprint", "LocateRapidBlueprint", "NamedEntityRapidBlueprint", "PolygonRapidBlueprint", "ScrubRapidBlueprint", "TranscriptionRapidBlueprint" }
     model_config = ConfigDict(
         validate_assignment=True,
@@ -86,6 +89,11 @@ class ValidationImportPostRequestBlueprint(BaseModel):
             error_messages.append(f"Error! Input type `{type(v)}` is not `TranscriptionRapidBlueprint`")
         else:
             match += 1
+        # validate data type: ScrubRapidBlueprint
+        if not isinstance(v, ScrubRapidBlueprint):
+            error_messages.append(f"Error! Input type `{type(v)}` is not `ScrubRapidBlueprint`")
+        else:
+            match += 1
         # validate data type: PolygonRapidBlueprint
         if not isinstance(v, PolygonRapidBlueprint):
             error_messages.append(f"Error! Input type `{type(v)}` is not `PolygonRapidBlueprint`")
@@ -128,10 +136,10 @@ class ValidationImportPostRequestBlueprint(BaseModel):
             match += 1
         if match > 1:
             # more than 1 match
-            raise ValueError("Multiple matches found when setting `actual_instance` in ValidationImportPostRequestBlueprint with oneOf schemas: AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, TranscriptionRapidBlueprint. Details: " + ", ".join(error_messages))
+            raise ValueError("Multiple matches found when setting `actual_instance` in ValidationImportPostRequestBlueprint with oneOf schemas: AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, ScrubRapidBlueprint, TranscriptionRapidBlueprint. Details: " + ", ".join(error_messages))
         elif match == 0:
             # no match
-            raise ValueError("No match found when setting `actual_instance` in ValidationImportPostRequestBlueprint with oneOf schemas: AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, TranscriptionRapidBlueprint. Details: " + ", ".join(error_messages))
+            raise ValueError("No match found when setting `actual_instance` in ValidationImportPostRequestBlueprint with oneOf schemas: AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, ScrubRapidBlueprint, TranscriptionRapidBlueprint. Details: " + ", ".join(error_messages))
         else:
             return v
@@ -152,6 +160,12 @@ class ValidationImportPostRequestBlueprint(BaseModel):
             match += 1
         except (ValidationError, ValueError) as e:
             error_messages.append(str(e))
+        # deserialize data into ScrubRapidBlueprint
+        try:
+            instance.actual_instance = ScrubRapidBlueprint.from_json(json_str)
+            match += 1
+        except (ValidationError, ValueError) as e:
+            error_messages.append(str(e))
         # deserialize data into PolygonRapidBlueprint
         try:
             instance.actual_instance = PolygonRapidBlueprint.from_json(json_str)
@@ -203,10 +217,10 @@ class ValidationImportPostRequestBlueprint(BaseModel):
         if match > 1:
             # more than 1 match
-            raise ValueError("Multiple matches found when deserializing the JSON string into ValidationImportPostRequestBlueprint with oneOf schemas: AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, TranscriptionRapidBlueprint. Details: " + ", ".join(error_messages))
+            raise ValueError("Multiple matches found when deserializing the JSON string into ValidationImportPostRequestBlueprint with oneOf schemas: AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, ScrubRapidBlueprint, TranscriptionRapidBlueprint. Details: " + ", ".join(error_messages))
         elif match == 0:
             # no match
-            raise ValueError("No match found when deserializing the JSON string into ValidationImportPostRequestBlueprint with oneOf schemas: AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, TranscriptionRapidBlueprint. Details: " + ", ".join(error_messages))
+            raise ValueError("No match found when deserializing the JSON string into ValidationImportPostRequestBlueprint with oneOf schemas: AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, ScrubRapidBlueprint, TranscriptionRapidBlueprint. Details: " + ", ".join(error_messages))
         else:
             return instance
@@ -220,7 +234,7 @@ class ValidationImportPostRequestBlueprint(BaseModel):
         else:
             return json.dumps(self.actual_instance)
-    def to_dict(self) -> Optional[Union[Dict[str, Any], AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, TranscriptionRapidBlueprint]]:
+    def to_dict(self) -> Optional[Union[Dict[str, Any], AttachCategoryRapidBlueprint, BoundingBoxRapidBlueprint, CompareRapidBlueprint, FreeTextRapidBlueprint, LineRapidBlueprint, LocateRapidBlueprint, NamedEntityRapidBlueprint, PolygonRapidBlueprint, ScrubRapidBlueprint, TranscriptionRapidBlueprint]]:
         """Returns the dict representation of the actual instance"""
         if self.actual_instance is None:
             return None

rapidata/api_client_README.md CHANGED Viewed

@@ -349,6 +349,8 @@ Class | Method | HTTP request | Description
  - [QueryValidationRapidsResult](rapidata/api_client/docs/QueryValidationRapidsResult.md)
  - [QueryValidationRapidsResultAsset](rapidata/api_client/docs/QueryValidationRapidsResultAsset.md)
  - [QueryValidationRapidsResultPagedResult](rapidata/api_client/docs/QueryValidationRapidsResultPagedResult.md)
+ - [QueryValidationRapidsResultPayload](rapidata/api_client/docs/QueryValidationRapidsResultPayload.md)
+ - [QueryValidationRapidsResultTruth](rapidata/api_client/docs/QueryValidationRapidsResultTruth.md)
  - [QueryValidationSetModel](rapidata/api_client/docs/QueryValidationSetModel.md)
  - [QueryWorkflowsModel](rapidata/api_client/docs/QueryWorkflowsModel.md)
  - [RankedDatapointModel](rapidata/api_client/docs/RankedDatapointModel.md)
@@ -361,6 +363,11 @@ Class | Method | HTTP request | Description
  - [RegisterTemporaryCustomerModel](rapidata/api_client/docs/RegisterTemporaryCustomerModel.md)
  - [RegisterTemporaryCustomerResult](rapidata/api_client/docs/RegisterTemporaryCustomerResult.md)
  - [RootFilter](rapidata/api_client/docs/RootFilter.md)
+ - [ScrubPayload](rapidata/api_client/docs/ScrubPayload.md)
+ - [ScrubRange](rapidata/api_client/docs/ScrubRange.md)
+ - [ScrubRapidBlueprint](rapidata/api_client/docs/ScrubRapidBlueprint.md)
+ - [ScrubResult](rapidata/api_client/docs/ScrubResult.md)
+ - [ScrubTruth](rapidata/api_client/docs/ScrubTruth.md)
  - [SendCompletionMailStepModel](rapidata/api_client/docs/SendCompletionMailStepModel.md)
  - [Shape](rapidata/api_client/docs/Shape.md)
  - [SimpleWorkflowConfig](rapidata/api_client/docs/SimpleWorkflowConfig.md)

rapidata/rapidata_client/assets/_media_asset.py CHANGED Viewed

@@ -9,6 +9,8 @@ from rapidata.rapidata_client.assets._base_asset import BaseAsset
 import requests
 import re
 from PIL import Image
+from tinytag import TinyTag
+import tempfile
 class MediaAsset(BaseAsset):
     """MediaAsset Class
@@ -55,6 +57,49 @@ class MediaAsset(BaseAsset):
         self.path: str | bytes = path
         self.name = path
+    def get_duration(self) -> int:
+        """
+        Get the duration of audio/video files in milliseconds.
+        Returns 0 for static images.
+        Returns:
+            int: Duration in milliseconds for audio/video, 0 for static images
+        Raises:
+            ValueError: If the duration cannot be determined
+        """
+        path_to_check = self.name.lower()
+        # Return 0 for other static images
+        if any(path_to_check.endswith(ext) for ext in ('.jpg', '.jpeg', '.png', '.webp', '.gif')):
+            return 0
+        try:
+            # For URL downloads (bytes), write to temporary file first
+            if isinstance(self.path, bytes):
+                with tempfile.NamedTemporaryFile(suffix=os.path.splitext(self.name)[1], delete=False) as tmp:
+                    tmp.write(self.path)
+                    tmp.flush()
+                    # Close the file so it can be read
+                    tmp_path = tmp.name
+                try:
+                    tag = TinyTag.get(tmp_path)
+                finally:
+                    # Clean up the temporary file
+                    os.unlink(tmp_path)
+            else:
+                # For local files, use path directly
+                tag = TinyTag.get(self.path)
+            if tag.duration is None:
+                raise ValueError("Could not read duration from file")
+            return int(tag.duration * 1000)  # Convert to milliseconds
+        except Exception as e:
+            raise ValueError(f"Could not determine media duration: {str(e)}")
     def get_image_dimension(self) -> tuple[int, int] | None:
         """
         Get the dimensions (width, height) of an image file.

rapidata/rapidata_client/order/_rapidata_dataset.py CHANGED Viewed

@@ -48,7 +48,10 @@ class RapidataDataset:
                 textSources=texts
             )
-            self.openapi_service.dataset_api.dataset_creat_text_datapoint_post(model)
+            upload_response = self.openapi_service.dataset_api.dataset_creat_text_datapoint_post(model)
+            if upload_response.errors:
+                raise ValueError(f"Error uploading text datapoint: {upload_response.errors}")
         total_uploads = len(text_assets)
         with ThreadPoolExecutor(max_workers=max_workers) as executor:

rapidata/rapidata_client/order/rapidata_order_manager.py CHANGED Viewed

@@ -19,7 +19,8 @@ from rapidata.rapidata_client.workflow import (
     FreeTextWorkflow,
     SelectWordsWorkflow,
     LocateWorkflow,
-    DrawWorkflow)
+    DrawWorkflow,
+    TimestampWorkflow)
 from rapidata.rapidata_client.selection.validation_selection import ValidationSelection
 from rapidata.rapidata_client.selection.labeling_selection import LabelingSelection
 from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
@@ -413,6 +414,50 @@ class RapidataOrderManager:
             selections=selections,
             settings=settings
         )
+    def create_timestamp_order(self,
+            name: str,
+            instruction: str,
+            datapoints: list[str],
+            responses_per_datapoint: int = 10,
+            contexts: list[str] | None = None,
+            filters: Sequence[RapidataFilter] = [],
+            settings: Sequence[RapidataSetting] = [],
+            selections: Sequence[RapidataSelection] | None = None,
+        ) -> RapidataOrder:
+        """Create a timestamp order.
+        Args:
+            name (str): The name of the order.
+            instruction (str): The instruction for the timestamp task. Will be shown along side each datapoint.
+            datapoints (list[str]): The list of datapoints for the timestamp - each datapoint will be labeled.
+            responses_per_datapoint (int, optional): The number of responses that will be collected per datapoint. Defaults to 10.
+            contexts (list[str], optional): The list of contexts for the comparison. Defaults to None.\n
+                If provided has to be the same length as datapoints and will be shown in addition to the instruction. (Therefore will be different for each datapoint)
+                Will be match up with the datapoints using the list index.
+            filters (Sequence[RapidataFilter], optional): The list of filters for the timestamp. Defaults to []. Decides who the tasks should be shown to.
+            settings (Sequence[RapidataSetting], optional): The list of settings for the timestamp. Defaults to []. Decides how the tasks should be shown.
+            selections (Sequence[RapidataSelection], optional): The list of selections for the timestamp. Defaults to None. Decides in what order the tasks should be shown.
+        """
+        assets = [MediaAsset(path=path) for path in datapoints]
+        for asset in assets:
+            if not asset.get_duration():
+                raise ValueError("The datapoints for this order must have a duration. (e.g. video or audio)")
+        return self.__create_general_order(
+            name=name,
+            workflow=TimestampWorkflow(
+                instruction=instruction
+            ),
+            assets=assets,
+            responses_per_datapoint=responses_per_datapoint,
+            contexts=contexts,
+            filters=filters,
+            selections=selections,
+            settings=settings
+        )
     def get_order_by_id(self, order_id: str) -> RapidataOrder:
         """Get an order by ID.

rapidata/rapidata_client/selection/rapidata_selections.py CHANGED Viewed

@@ -8,14 +8,16 @@ from rapidata.rapidata_client.selection import (
 class RapidataSelections:
     """RapidataSelections Classes
+    Selections are used to define what type of tasks and in what order they are shown to the user.
     Attributes:
-        demographic (DemographicSelection): The DemographicSelection instance.
         labeling (LabelingSelection): The LabelingSelection instance.
         validation (ValidationSelection): The ValidationSelection instance.
         conditional_validation (ConditionalValidationSelection): The ConditionalValidationSelection instance.
+        demographic (DemographicSelection): The DemographicSelection instance.
         capped (CappedSelection): The CappedSelection instance."""
-    demographic = DemographicSelection
     labeling = LabelingSelection
     validation = ValidationSelection
     conditional_validation = ConditionalValidationSelection
+    demographic = DemographicSelection
     capped = CappedSelection

rapidata/rapidata_client/validation/_validation_rapid_parts.py CHANGED Viewed

@@ -19,6 +19,9 @@ from rapidata.api_client.models.polygon_payload import PolygonPayload
 from rapidata.api_client.models.polygon_truth import PolygonTruth
 from rapidata.api_client.models.transcription_payload import TranscriptionPayload
 from rapidata.api_client.models.transcription_truth import TranscriptionTruth
+from rapidata.api_client.models.scrub_payload import ScrubPayload
+from rapidata.api_client.models.scrub_truth import ScrubTruth
 from rapidata.rapidata_client.assets._media_asset import MediaAsset
 from rapidata.rapidata_client.assets._multi_asset import MultiAsset
 from rapidata.rapidata_client.assets._text_asset import TextAsset
@@ -40,6 +43,7 @@ class ValidatioRapidParts:
         | NamedEntityPayload
         | PolygonPayload
         | TranscriptionPayload
+        | ScrubPayload
     )
     truths: (
         AttachCategoryTruth
@@ -51,6 +55,7 @@ class ValidatioRapidParts:
         | NamedEntityTruth
         | PolygonTruth
         | TranscriptionTruth
+        | ScrubTruth
     )
     metadata: Sequence[Metadata]
     randomCorrectProbability: float

rapidata/rapidata_client/validation/_validation_set_builder.py CHANGED Viewed

@@ -6,6 +6,9 @@ from rapidata.api_client.models.compare_truth import CompareTruth
 from rapidata.api_client.models.transcription_payload import TranscriptionPayload
 from rapidata.api_client.models.transcription_truth import TranscriptionTruth
 from rapidata.api_client.models.transcription_word import TranscriptionWord
+from rapidata.api_client.models.scrub_payload import ScrubPayload
+from rapidata.api_client.models.scrub_truth import ScrubTruth
+from rapidata.api_client.models.scrub_range import ScrubRange
 from rapidata.api_client.models.locate_payload import LocatePayload
 from rapidata.api_client.models.locate_box_truth import LocateBoxTruth
 from rapidata.api_client.models.line_payload import LinePayload
@@ -26,7 +29,8 @@ from rapidata.rapidata_client.validation.rapids.rapids import (
     CompareRapid,
     SelectWordsRapid,
     LocateRapid,
-    DrawRapid
+    DrawRapid,
+    TimestampRapid
 )
 from typing import Sequence
@@ -106,10 +110,13 @@ class ValidationSetBuilder:
             self.__add_select_words_rapid(rapid.asset, rapid.instruction, rapid.sentence, rapid.truths, rapid.strict_grading)
         elif isinstance(rapid, LocateRapid):
-            self.__add_locate_rapid(rapid.asset, rapid.instruction, rapid.truths)
+            self.__add_locate_rapid(rapid.asset, rapid.instruction, rapid.truths, rapid.metadata)
         elif isinstance(rapid, DrawRapid):
-            self.__add_draw_rapid(rapid.asset, rapid.instruction, rapid.truths)
+            self.__add_draw_rapid(rapid.asset, rapid.instruction, rapid.truths, rapid.metadata)
+        elif isinstance(rapid, TimestampRapid):
+            self.__add_timestamp_rapid(rapid.asset, rapid.instruction, rapid.truths, rapid.metadata)
         else:
             raise ValueError("Unsupported rapid type")
@@ -262,7 +269,8 @@ class ValidationSetBuilder:
         self,
         asset: MediaAsset,
         instruction: str,
-        truths: list[Box]
+        truths: list[Box],
+        metadata: Sequence[Metadata] = [],
     ):
         """Add a locate rapid to the validation set.
@@ -301,7 +309,7 @@ class ValidationSetBuilder:
                 instruction=instruction,
                 payload=payload,
                 truths=model_truth,
-                metadata=[],
+                metadata=metadata,
                 randomCorrectProbability=coverage,
                 asset=asset,
             )
@@ -311,7 +319,8 @@ class ValidationSetBuilder:
         self,
         asset: MediaAsset,
         instruction: str,
-        truths: list[Box]
+        truths: list[Box],
+        metadata: Sequence[Metadata] = [],
     ):
         """Add a draw rapid to the validation set.
@@ -348,12 +357,62 @@ class ValidationSetBuilder:
                 instruction=instruction,
                 payload=payload,
                 truths=model_truth,
-                metadata=[],
+                metadata=metadata,
                 randomCorrectProbability=coverage,
                 asset=asset,
             )
         )
+    def __add_timestamp_rapid(
+        self,
+        asset: MediaAsset,
+        instruction: str,
+        truths: list[tuple[int, int]],
+        metadata: Sequence[Metadata] = [],
+    ):
+        """Add a timestamp rapid to the validation set.
+        Args:
+            instruction (str): The instruction for the timestamp rapid.
+            asset (MediaAsset): The asset for the rapid.
+            truths (list[tuple[int, int]]): The truths for the rapid.
+                This is a list of tuples where the first element is the start of the interval and the second element is the end of the interval.
+                The intervals are in miliseconds.
+            metadata (Sequence[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
+        Returns:
+            ValidationSetBuilder: The ValidationSetBuilder instance.
+        """
+        for truth in truths:
+            if len(truth) != 2:
+                raise ValueError("The truths per datapoint must be a tuple of exactly two integers.")
+            if truth[0] > truth[1]:
+                raise ValueError("The start of the interval must be smaller than the end of the interval.")
+        payload = ScrubPayload(
+            _t="ScrubPayload",
+            target=instruction
+        )
+        model_truth = ScrubTruth(
+            _t="ScrubTruth",
+            validRanges=[ScrubRange(
+                start=truth[0],
+                end=truth[1]
+            ) for truth in truths]
+        )
+        self._rapid_parts.append(
+            ValidatioRapidParts(
+                instruction=instruction,
+                payload=payload,
+                truths=model_truth,
+                metadata=metadata,
+                randomCorrectProbability=self._calculate_coverage_ratio(asset.get_duration(), truths),
+                asset=asset,
+            )
+        )
     def _calculate_boxes_coverage(self, boxes: list[Box], image_width: int, image_height: int) -> float:
         if not boxes:
@@ -369,3 +428,44 @@ class ValidationSetBuilder:
         total_covered = len(pixels)
         return total_covered / (image_width * image_height)
+    def _calculate_coverage_ratio(self, total_duration: int, subsections: list[tuple[int, int]]) -> float:
+        """
+        Calculate the ratio of total_duration that is covered by subsections, handling overlaps.
+        Args:
+            total_duration: The total duration to consider
+            subsections: List of tuples containing (start, end) times
+        Returns:
+            float: Ratio of coverage (0 to 1)
+        """
+        if not subsections:
+            return 0.0
+        # Sort subsections by start time and clamp to valid range
+        sorted_ranges = sorted(
+            (max(0, start), min(end, total_duration))
+            for start, end in subsections
+        )
+        # Merge overlapping ranges
+        merged_ranges = []
+        current_range = list(sorted_ranges[0])
+        for next_start, next_end in sorted_ranges[1:]:
+            current_start, current_end = current_range
+            # If ranges overlap or are adjacent
+            if next_start <= current_end:
+                current_range[1] = max(current_end, next_end)
+            else:
+                merged_ranges.append(current_range)
+                current_range = [next_start, next_end]
+        merged_ranges.append(current_range)
+        # Calculate total coverage
+        total_coverage = sum(end - start for start, end in merged_ranges)
+        return total_coverage / total_duration

rapidata/rapidata_client/validation/rapidata_validation_set.py CHANGED Viewed

@@ -34,6 +34,8 @@ from rapidata.api_client.models.polygon_truth import PolygonTruth
 from rapidata.api_client.models.transcription_payload import TranscriptionPayload
 from rapidata.api_client.models.transcription_truth import TranscriptionTruth
 from rapidata.api_client.models.transcription_word import TranscriptionWord
+from rapidata.api_client.models.scrub_payload import ScrubPayload
+from rapidata.api_client.models.scrub_truth import ScrubTruth
 from rapidata.rapidata_client.assets._media_asset import MediaAsset
 from rapidata.rapidata_client.assets._multi_asset import MultiAsset
 from rapidata.rapidata_client.assets._text_asset import TextAsset
@@ -90,6 +92,7 @@ class RapidataValidationSet:
             | NamedEntityPayload
             | PolygonPayload
             | TranscriptionPayload
+            | ScrubPayload
         ),
         truths: (
             AttachCategoryTruth
@@ -101,6 +104,7 @@ class RapidataValidationSet:
             | NamedEntityTruth
             | PolygonTruth
             | TranscriptionTruth
+            | ScrubTruth
         ),
         metadata: Sequence[Metadata],
         asset: MediaAsset | TextAsset | MultiAsset,

rapidata/rapidata_client/validation/rapids/rapids.py CHANGED Viewed

@@ -92,3 +92,37 @@ class DrawRapid(Rapid):
         self.instruction = instruction
         self.asset = asset
         self.truths = truths
+        self.metadata = metadata
+class TimestampRapid(Rapid):
+    """
+    Used to have the labeler timestamp a video or audio file.
+    Args:
+        instruction (str): The instruction for the labeler.
+        truths (list[tuple[int, int]]): The possible accepted timestamps intervals for the labeler (in miliseconds).
+            The first element of the tuple is the start of the interval and the second element is the end of the interval.
+        asset (MediaAsset): The asset that the labeler is timestamping.
+        metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
+    """
+    def __init__(self, instruction: str, truths: list[tuple[int, int]], asset: MediaAsset, metadata: Sequence[Metadata]):
+        if not asset.get_duration():
+            raise ValueError("The datapoints must have a duration. (e.g. video or audio)")
+        if not isinstance(truths, list):
+            raise ValueError("The truths must be a list of tuples.")
+        for truth in truths:
+            if len(truth) != 2 or not all(isinstance(x, int) for x in truth):
+                raise ValueError("The truths per datapoint must be a tuple of exactly two integers.")
+            if truth[0] >= truth[1]:
+                raise ValueError("The start of the interval must be smaller than the end of the interval.")
+            if truth[0] < 0:
+                raise ValueError("The start of the interval must be greater than or equal to 0.")
+            if truth[1] > asset.get_duration():
+                raise ValueError("The end of the interval can not be greater than the duration of the datapoint.")
+        self.instruction = instruction
+        self.truths = truths
+        self.asset = asset
+        self.metadata = metadata

rapidata/rapidata_client/validation/rapids/rapids_manager.py CHANGED Viewed

@@ -4,7 +4,8 @@ from rapidata.rapidata_client.validation.rapids.rapids import (
     CompareRapid,
     SelectWordsRapid,
     LocateRapid,
-    DrawRapid)
+    DrawRapid,
+    TimestampRapid)
 from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
 from rapidata.rapidata_client.metadata import Metadata
 from rapidata.rapidata_client.validation.rapids.box import Box
@@ -161,3 +162,30 @@ class RapidsManager:
                 asset=asset,
                 metadata=metadata,
                 )
+    def timestamp_rapid(self,
+            instruction: str,
+            truths: list[tuple[int, int]],
+            datapoint: str,
+            metadata: Sequence[Metadata] = []
+    ) -> TimestampRapid:
+        """Build a timestamp rapid
+        Args:
+            instruction (str): The instruction for the labeler.
+            truths (list[tuple[int, int]]): The possible accepted timestamps intervals for the labeler (in miliseconds).
+                The first element of the tuple is the start of the interval and the second element is the end of the interval.
+            datapoint (str): The asset that the labeler will be timestamping.
+            metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
+        """
+        asset = MediaAsset(datapoint)
+        return TimestampRapid(
+                instruction=instruction,
+                truths=truths,
+                asset=asset,
+                metadata=metadata,
+                )

rapidata/rapidata_client/validation/validation_set_manager.py CHANGED Viewed

@@ -291,6 +291,56 @@ class ValidationSetManager:
         return validation_set_builder._submit(print_confirmation)
+    def create_timestamp_set(self,
+        name: str,
+        instruction: str,
+        truths: list[list[tuple[int, int]]],
+        datapoints: list[str],
+        contexts: list[str] | None = None,
+        print_confirmation: bool = True
+    ) -> RapidataValidationSet:
+        """Create a timestamp validation set.
+        Args:
+            name (str): The name of the validation set. (will not be shown to the labeler)
+            instruction (str): The instruction to show to the labeler.
+            truths (list[list[tuple[int, int]]]): The truths for each datapoint defined as start and endpoint based on miliseconds.
+            Outher list is for each datapoint, inner list is for each truth.\n
+                example:
+                    datapoints: ["datapoint1", "datapoint2"]
+                    truths: [[(0, 10)], [(20, 30)]] -> first datapoint the correct interval is from 0 to 10, second datapoint the correct interval is from 20 to 30
+            datapoints (list[str]): The datapoints that will be used for validation.
+            contexts (list[str], optional): The contexts for each datapoint. Defaults to None.
+            print_confirmation (bool, optional): Whether to print a confirmation message that validation set has been created. Defaults to True.
+        """
+        if len(datapoints) != len(truths):
+            raise ValueError("The number of datapoints and truths must be equal")
+        if not all([isinstance(truth, list) for truth in truths]):
+            raise ValueError("Truths must be a list of lists")
+        if contexts and len(contexts) != len(datapoints):
+            raise ValueError("The number of contexts and datapoints must be equal")
+        rapids = []
+        for i in range(len(datapoints)):
+            rapids.append(
+                self.rapid.timestamp_rapid(
+                    instruction=instruction,
+                    truths=truths[i],
+                    datapoint=datapoints[i],
+                    metadata=[PromptMetadata(contexts[i])] if contexts else []
+                )
+            )
+        validation_set_builder = ValidationSetBuilder(name, self.__openapi_service)
+        for rapid in rapids:
+            validation_set_builder._add_rapid(rapid)
+        return validation_set_builder._submit(print_confirmation)
     def create_mixed_set(self,
         name: str,
         rapids: Sequence[Rapid],

rapidata/rapidata_client/workflow/__init__.py CHANGED Viewed

@@ -6,3 +6,4 @@ from ._compare_workflow import CompareWorkflow
 from ._free_text_workflow import FreeTextWorkflow
 from ._select_words_workflow import SelectWordsWorkflow
 from ._evaluation_workflow import EvaluationWorkflow
+from ._timestamp_workflow import TimestampWorkflow

rapidata 2.1.4__py3-none-any.whl → 2.2.0__py3-none-any.whl

Potentially problematic release.

rapidata 2.1.4py3-none-any.whl → 2.2.0py3-none-any.whl