PyPI - rapidata - Versions diffs - 1.7.1__py3-none-any.whl → 1.8.1__py3-none-any.whl - Mend

rapidata 1.7.1py3-none-any.whl → 1.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rapidata might be problematic. Click here for more details.

Files changed (39) hide show

rapidata/rapidata_client/dataset/rapid_builders/compare_rapid_builders.py ADDED Viewed

@@ -0,0 +1,145 @@
+from rapidata.rapidata_client.assets import MultiAsset, TextAsset, MediaAsset
+from rapidata.rapidata_client.metadata import PromptMetadata
+from rapidata.rapidata_client.dataset.rapid_builders.rapids import CompareRapid
+import re
+class CompareRapidBuilder:
+    """Final builder class for comparison rapid.
+    This class handles the final construction of a comparison rapid with all required parameters.
+    """
+    def __init__(self, criteria: str, truth: str, asset: MultiAsset):
+        """Initialize the comparison rapid builder.
+        Args:
+            criteria (str): The criteria for comparison
+            truth (str): The correct answer
+            asset (MultiAsset): Collection of assets to be compared
+        """
+        self._criteria = criteria
+        self._truth = truth
+        self._asset = asset
+        self._metadata = []
+    def prompt(self, prompt: str):
+        """Add a prompt to provide additional context for the comparison.
+        Args:
+            prompt (str): Additional instructions or context
+        Returns:
+            CompareRapidBuilder: The builder instance for method chaining
+        """
+        self._metadata.append(PromptMetadata(prompt))
+        return self
+    def build(self):
+        """Constructs and returns the final comparison rapid.
+        Returns:
+            CompareRapid: The constructed comparison rapid
+        """
+        return CompareRapid(
+            criteria=self._criteria,
+            asset=self._asset,
+            truth=self._truth,
+            metadata=self._metadata
+        )
+class CompareRapidTruthBuilder:
+    """Builder class for the truth of the comparison rapid.
+    This adds the truth to the comparison rapid.
+    """
+    def __init__(self, criteria: str, asset: MultiAsset):
+        self._criteria = criteria
+        self._asset = asset
+        self._truth = None
+    def truth(self, truth: str):
+        """Set the truth for the comparison rapid.
+        Args:
+            truth (str): The correct answer for the comparison task. Is the string of the correct media/text asset"""
+        if not isinstance(truth, str):
+            raise ValueError("Truth must be a string.")
+        self._truth = MediaAsset(truth).name
+        return self._build()
+    def _build(self):
+        if self._truth is None:
+            raise ValueError("Truth is required")
+        return CompareRapidBuilder(
+            criteria=self._criteria,
+            asset=self._asset,
+            truth=self._truth,
+        )
+class CompareRapidAssetBuilder:
+    """Builder class for the asset of the comparison rapid.
+    This adds the asset to the comparison rapid.
+    """
+    def __init__(self, criteria: str):
+        self._criteria = criteria
+        self._asset: MultiAsset | None = None
+    def media(self, medias: list[str]):
+        """Set the media assets for the comparison rapid.
+        Args:
+            medias (list[str]): The local file paths or links of the media assets to be compared"""
+        media_assets = [MediaAsset(media) for media in medias]
+        self._asset = MultiAsset(media_assets)
+        return self._build()
+    def text(self, texts: list[str]):
+        """Set the text assets for the comparison rapid.
+        Args:
+            texts (list[str]): The texts to be compared"""
+        text_assets = [TextAsset(text) for text in texts]
+        self._asset = MultiAsset(text_assets)
+        return self._build()
+    def _build(self):
+        if self._asset is None:
+            raise ValueError("Asset is required")
+        return CompareRapidTruthBuilder(
+            criteria=self._criteria,
+            asset=self._asset,
+        )
+class CompareRapidCriteriaBuilder:
+    """Builder class for the criteria of the comparison rapid.
+    This adds the criteria to the comparison rapid."""
+    def __init__(self):
+        self._criteria = None
+    def criteria(self, criteria: str):
+        """Set the criteria for the comparison rapid.
+        Args:
+            criteria (str): The criteria for comparison"""
+        if not isinstance(criteria, str):
+            raise ValueError("Criteria must be a string")
+        self._criteria = criteria
+        return self._build()
+    def _build(self):
+        if self._criteria is None:
+            raise ValueError("Criteria is required")
+        return CompareRapidAssetBuilder(
+            criteria=self._criteria,
+        )

rapidata/rapidata_client/dataset/rapid_builders/rapids.py ADDED Viewed

@@ -0,0 +1,32 @@
+from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
+from rapidata.rapidata_client.metadata import Metadata
+class Rapid:
+    pass
+class ClassificationRapid(Rapid):
+    """A classification rapid. This represents the question, options, truths, asset and metadata that will be given to the user."""
+    def __init__(self, question: str, options: list[str], truths: list[str], asset: MediaAsset | TextAsset, metadata: list[Metadata]):
+        self.question = question
+        self.options = options
+        self.truths = truths
+        self.asset = asset
+        self.metadata = metadata
+class CompareRapid(Rapid):
+    """A comparison rapid. This represents the criteria, asset, truth and metadata that will be given to the user."""
+    def __init__(self, criteria: str, truth: str, asset: MultiAsset, metadata: list[Metadata]):
+        self.criteria = criteria
+        self.asset = asset
+        self.truth = truth
+        self.metadata = metadata
+class TranscriptionRapid(Rapid):
+    """A transcription rapid. This represents the instruction, truths, asset, transcription and strict grading that will be given to the user."""
+    def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, transcription: str, strict_grading: bool):
+        self.instruction = instruction
+        self.truths = truths
+        self.asset = asset
+        self.transcription = transcription
+        self.strict_grading = strict_grading

rapidata/rapidata_client/dataset/rapid_builders/transcription_rapid_builders.py ADDED Viewed

@@ -0,0 +1,132 @@
+from rapidata.rapidata_client.assets import MediaAsset
+from rapidata.rapidata_client.dataset.rapid_builders.rapids import TranscriptionRapid
+class TranscriptionRapidBuilder:
+    """Final builder class for transcription rapid.
+    This class handles the final construction of a transcription rapid with all required parameters.
+    """
+    def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, transcription_text: str):
+        self._instruction = instruction
+        self._truths = truths
+        self._asset = asset
+        self._transcription_text = transcription_text
+        self._strict_grading = True
+    def strict_grading(self, strict_grading: bool = True):
+        """Set whether to use strict grading for the transcription.
+        Strict grading true: In order to be correct, you must select all of the right words
+        Strict grading false: In order to be correct, you must select at least one right word
+        In both cases it will be incorrect if you select any wrong words
+        Args:
+            strict_grading (bool): Whether to use strict grading. Defaults to True.
+        Returns:
+            TranscriptionRapidBuilder: The builder instance for method chaining
+        """
+        self._strict_grading = strict_grading
+        return self
+    def build(self):
+        """Constructs and returns the final transcription rapid.
+        Returns:
+            TranscriptionRapid: The constructed transcription rapid
+        """
+        return TranscriptionRapid(
+            instruction=self._instruction,
+            truths=self._truths,
+            asset=self._asset,
+            transcription=self._transcription_text,
+            strict_grading=self._strict_grading
+        )
+class TranscriptionRapidTruthsBuilder:
+    """Builder class for the truths of the transcription rapid.
+    This adds the truths to the transcription rapid.
+    """
+    def __init__(self, instruction: str, media: MediaAsset, transcription_text: str):
+        self._instruction = instruction
+        self._media = media
+        self._transcription_text = transcription_text
+        self._truths = None
+    def truths(self, truths: list[int]):
+        """Set the truths for the transcription rapid.
+        Args:
+            truths (list[int]): The correct answers for the transcription task. \
+                Each integer represents the index of the correct word in the transcription text."""
+        if not isinstance(truths, list) or not all(isinstance(truth, int) for truth in truths):
+            raise ValueError("Truths must be a list of integers")
+        self._truths = truths
+        return self._build()
+    def _build(self):
+        if self._truths is None:
+            raise ValueError("Truths are required")
+        return TranscriptionRapidBuilder(
+            instruction=self._instruction,
+            truths=self._truths,
+            asset=self._media,
+            transcription_text=self._transcription_text
+        )
+class TranscriptionRapidAssetBuilder:
+    """Builder class for the asset of the transcription rapid.
+    This adds the asset to the transcription rapid.
+    """
+    def __init__(self, instruction: str):
+        self._instruction = instruction
+    def media(self, media: str, transcription_text: str):
+        """Set the media asset for the transcription rapid.
+        Args:
+            media (str): The local file path of the audio or video file to be transcribed
+            transcription_text (str): The text to be transcribed from the media asset""" # is video file okay?
+        self._asset = MediaAsset(media)
+        self._transcription_text = transcription_text
+        return self._build()
+    def _build(self):
+        if not self._asset:
+            raise ValueError("Media is required")
+        return TranscriptionRapidTruthsBuilder(
+            instruction=self._instruction,
+            media=self._asset,
+            transcription_text=self._transcription_text
+        )
+class TranscriptionRapidInstructionBuilder:
+    def __init__(self):
+        self._instruction = None
+    def instruction(self, instruction: str):
+        """Set the instruction for the transcription rapid.
+        Args:
+            instruction (str): The instruction for the transcription task"""
+        if not isinstance(instruction, str):
+            raise ValueError("Instruction must be a string")
+        self._instruction = instruction
+        return self._build()
+    def _build(self):
+        if self._instruction is None:
+            raise ValueError("Instruction is required")
+        return TranscriptionRapidAssetBuilder(
+            instruction=self._instruction,
+        )

rapidata/rapidata_client/dataset/rapidata_dataset.py CHANGED Viewed

@@ -106,10 +106,12 @@ class RapidataDataset:
                     else:
                         files.append(cast(str, asset.path))
-            self.openapi_service.dataset_api.dataset_create_datapoint_post(
+            upload_response = self.openapi_service.dataset_api.dataset_create_datapoint_post(
                 model=model,
                 files=files # type: ignore
             )
+            if upload_response.errors:
+                raise ValueError(f"Error uploading datapoint: {upload_response.errors}")
         total_uploads = len(media_paths)
         with ThreadPoolExecutor(max_workers=max_workers) as executor:

rapidata/rapidata_client/dataset/rapidata_validation_set.py CHANGED Viewed

@@ -52,6 +52,27 @@ class RapidataValidationSet:
         self.openapi_service = openapi_service
         self.name = name
+    def upload_files(self, model: AddValidationRapidModel, assets: list[MediaAsset]):
+        """Upload a file to the validation set.
+        Args:
+            asset list[(MediaAsset)]: The asset to upload.
+        Returns:
+            str: The path to the uploaded file.
+        """
+        files = []
+        for asset in assets:
+            if isinstance(asset.path, str):
+                files.append(asset.path)
+            elif isinstance(asset.path, bytes):
+                files.append((asset.name, asset.path))
+            else:
+                raise ValueError("upload file failed")
+        self.openapi_service.validation_api.validation_add_validation_rapid_post(
+            model=model, files=files
+        )
     def add_general_validation_rapid(
         self,
         payload: (
@@ -107,9 +128,7 @@ class RapidataValidationSet:
             randomCorrectProbability=randomCorrectProbability,
         )
         if isinstance(asset, MediaAsset):
-            self.openapi_service.validation_api.validation_add_validation_rapid_post(
-                model=model, files=[asset.path]
-            )
+            self.upload_files(model=model, assets=[asset])
         elif isinstance(asset, TextAsset):
             model = AddValidationTextRapidModel(
@@ -128,12 +147,10 @@ class RapidataValidationSet:
             )
         elif isinstance(asset, MultiAsset):
-            files = [a.path for a in asset if isinstance(a, MediaAsset)]
+            files = [a for a in asset if isinstance(a, MediaAsset)]
             texts = [a.text for a in asset if isinstance(a, TextAsset)]
             if files:
-                self.openapi_service.validation_api.validation_add_validation_rapid_post(
-                    model=model, files=files # type: ignore
-                )
+                self.upload_files(model=model, assets=files)
             if texts:
                 model = AddValidationTextRapidModel(
                     validationSetId=self.id,

rapidata/rapidata_client/dataset/validation_set_builder.py CHANGED Viewed

@@ -16,6 +16,14 @@ from rapidata.rapidata_client.dataset.validation_rapid_parts import ValidatioRap
 from rapidata.rapidata_client.metadata.base_metadata import Metadata
 from rapidata.service.openapi_service import OpenAPIService
+from rapidata.rapidata_client.dataset.rapid_builders.rapids import (
+    Rapid,
+    ClassificationRapid,
+    CompareRapid,
+    TranscriptionRapid
+)
+from deprecated import deprecated
 class ValidationSetBuilder:
     """The ValidationSetBuilder is used to build a validation set.
@@ -71,7 +79,29 @@ class ValidationSetBuilder:
             )
         return validation_set
+    def add_rapid(self, rapid: Rapid):
+        """Add a rapid to the validation set.
+        To create the Rapid, use the RapidataClient.rapid_builder instance.
+        Args:
+            rapid (Rapid): The rapid to add to the validation set.
+        """
+        if not isinstance(rapid, Rapid):
+            raise ValueError("This method only accepts Rapid instances")
+        if isinstance(rapid, ClassificationRapid):
+            self._add_classify_rapid(rapid.asset, rapid.question, rapid.options, rapid.truths, rapid.metadata)
+        if isinstance(rapid, CompareRapid):
+            self._add_compare_rapid(rapid.asset, rapid.criteria, rapid.truth, rapid.metadata)
+        if isinstance(rapid, TranscriptionRapid):
+            self._add_transcription_rapid(rapid.asset, rapid.instruction, rapid.transcription, rapid.truths, rapid.strict_grading)
+        return self
+    @deprecated("Use add_rapid instead")
     def add_classify_rapid(
         self,
         asset: MediaAsset | TextAsset,
@@ -82,6 +112,33 @@ class ValidationSetBuilder:
     ):
         """Add a classify rapid to the validation set.
+        Args:
+            asset (MediaAsset | TextAsset): The asset for the rapid.
+            question (str): The question for the rapid.
+            categories (list[str]): The list of categories for the rapid.
+            truths (list[str]): The list of truths for the rapid.
+            metadata (list[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
+        Returns:
+            ValidationSetBuilder: The ValidationSetBuilder instance.
+        Raises:
+            ValueError: If the lengths of categories and truths are inconsistent.
+        """
+        self._add_classify_rapid(asset, question, categories, truths, metadata)
+        return self
+    def _add_classify_rapid(
+        self,
+        asset: MediaAsset | TextAsset,
+        question: str,
+        categories: list[str],
+        truths: list[str],
+        metadata: list[Metadata] = [],
+    ):
+        """Add a classify rapid to the validation set.
         Args:
             asset (MediaAsset | TextAsset): The asset for the rapid.
             question (str): The question for the rapid.
@@ -113,8 +170,7 @@ class ValidationSetBuilder:
             )
         )
-        return self
+    @deprecated("Use add_rapid instead")
     def add_compare_rapid(
         self,
         asset: MultiAsset,
@@ -136,7 +192,32 @@ class ValidationSetBuilder:
         Raises:
             ValueError: If the number of assets is not exactly two.
         """
-        payload = ComparePayload(_t="ComparePayload", criteria=question)
+        self._add_compare_rapid(asset, question, truth, metadata)
+        return self
+    def _add_compare_rapid(
+        self,
+        asset: MultiAsset,
+        criteria: str,
+        truth: str,
+        metadata: list[Metadata] = [],
+    ):
+        """Add a compare rapid to the validation set.
+        Args:
+            asset (MultiAsset): The assets for the rapid.
+            criteria (str): The criteria for the comparison.
+            truth (str): The truth identifier for the rapid.
+            metadata (list[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
+        Returns:
+            ValidationSetBuilder: The ValidationSetBuilder instance.
+        Raises:
+            ValueError: If the number of assets is not exactly two.
+        """
+        payload = ComparePayload(_t="ComparePayload", criteria=criteria)
         # take only last part of truth path
         truth = os.path.basename(truth)
         model_truth = CompareTruth(_t="CompareTruth", winnerId=truth)
@@ -146,7 +227,7 @@ class ValidationSetBuilder:
         self._rapid_parts.append(
             ValidatioRapidParts(
-                question=question,
+                question=criteria,
                 payload=payload,
                 truths=model_truth,
                 metadata=metadata,
@@ -155,8 +236,7 @@ class ValidationSetBuilder:
             )
         )
-        return self
+    @deprecated("Use add_rapid instead")
     def add_transcription_rapid(
         self,
         asset: MediaAsset | TextAsset,
@@ -168,6 +248,35 @@ class ValidationSetBuilder:
     ):
         """Add a transcription rapid to the validation set.
+        Args:
+            asset (MediaAsset | TextAsset): The asset for the rapid.
+            question (str): The question for the rapid.
+            transcription (list[str]): The transcription for the rapid.
+            truths (list[int]): The list of indices of the true word selections.
+            strict_grading (bool | None, optional): The strict grading for the rapid. Defaults to None.
+            metadata (list[Metadata], optional): The metadata for the rapid.
+        Returns:
+            ValidationSetBuilder: The ValidationSetBuilder instance.
+        Raises:
+            ValueError: If a correct word is not found in the transcription.
+        """
+        self._add_transcription_rapid(asset, question, transcription, truths, strict_grading, metadata)
+        return self
+    def _add_transcription_rapid(
+        self,
+        asset: MediaAsset | TextAsset,
+        question: str,
+        transcription: str,
+        truths: list[int],
+        strict_grading: bool | None = None,
+        metadata: list[Metadata] = [],
+    ):
+        """Add a transcription rapid to the validation set.
         Args:
             asset (MediaAsset | TextAsset): The asset for the rapid.
             question (str): The question for the rapid.
@@ -213,5 +322,3 @@ class ValidationSetBuilder:
                 randomCorrectProbability = 1 / len(transcription_words),
             )
         )
-        return self

rapidata/rapidata_client/filter/country_filter.py CHANGED Viewed

@@ -7,6 +7,9 @@ class CountryFilter(Filter):
     def __init__(self, country_codes: list[str]):
         # check that all characters in the country codes are uppercase
+        if not isinstance(country_codes, list):
+            raise ValueError("Country codes must be a list")
         if not all([code.isupper() for code in country_codes]):
             raise ValueError("Country codes must be uppercase")

rapidata/rapidata_client/filter/language_filter.py CHANGED Viewed

@@ -8,6 +8,9 @@ from rapidata.api_client.models.language_user_filter_model import (
 class LanguageFilter(Filter):
     def __init__(self, language_codes: list[str]):
+        if not isinstance(language_codes, list):
+            raise ValueError("Language codes must be a list")
         # check that all characters in the language codes are lowercase
         if not all([code.islower() for code in language_codes]):
             raise ValueError("Language codes must be lowercase")

rapidata/rapidata_client/metadata/prompt_metadata.py CHANGED Viewed

@@ -6,8 +6,12 @@ class PromptMetadata(Metadata):
     def __init__(self, prompt: str, identifier: str = "prompt"):
         super().__init__(identifier=identifier)
+        if not isinstance(prompt, str):
+            raise ValueError("Prompt must be a string")
         self._prompt = prompt
     def to_model(self):
-        return PromptMetadataInput(_t="PromptMetadataInput", identifier=self._identifier, prompt=self._prompt)
+        return PromptMetadataInput(_t="PromptMetadataInput", identifier=self._identifier, prompt=self._prompt)

rapidata/rapidata_client/order/rapidata_order.py CHANGED Viewed

@@ -106,7 +106,7 @@ class RapidataOrder:
         Returns:
             The results of the order.
         """
-        while self.get_status() == "Processing":
+        while self.get_status() not in ["Completed", "Paused", "ManuelReview"]:
             sleep(5)
         try:

rapidata/rapidata_client/order/rapidata_order_builder.py CHANGED Viewed

@@ -27,7 +27,7 @@ from rapidata.service.openapi_service import OpenAPIService
 from rapidata.rapidata_client.workflow.compare_workflow import CompareWorkflow
-from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
+from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset, BaseAsset
 from typing import Optional, cast, Sequence
@@ -69,7 +69,7 @@ class RapidataOrderBuilder:
         self._selections: list[Selection] = []
         self._rapids_per_bag: int = 2
         self._priority: int = 50
-        self._assets: list[MediaAsset] | list[TextAsset] | list[MultiAsset] = []
+        self._assets: Sequence[BaseAsset] = []
     def _to_model(self) -> CreateOrderModel:
         """
@@ -202,7 +202,7 @@ class RapidataOrderBuilder:
             order.submit()
         if not disable_link:
-            print(f"Order '{self._name}' is now viewable under https://app.rapidata.ai/order/detail/{order.order_id}.")
+            print(f"Order '{self._name}' is now viewable under: https://app.{self._openapi_service.enviroment}/order/detail/{order.order_id}")
         return order
@@ -240,8 +240,8 @@ class RapidataOrderBuilder:
     def media(
         self,
-        asset: list[MediaAsset] | list[TextAsset] | list[MultiAsset],
-        metadata: Sequence[Metadata] | None = None,
+        asset: Sequence[BaseAsset],
+        metadata: Sequence[Metadata] | None = None, # make this a list of metadata on next major release
     ) -> "RapidataOrderBuilder":
         """
         Set the media assets for the order.

rapidata 1.7.1__py3-none-any.whl → 1.8.1__py3-none-any.whl

Potentially problematic release.

rapidata 1.7.1py3-none-any.whl → 1.8.1py3-none-any.whl