PyPI - rapidata - Versions diffs - 1.10.1__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

rapidata 1.10.1py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rapidata might be problematic. Click here for more details.

Files changed (138) hide show

rapidata/rapidata_client/{dataset → validation}/rapidata_validation_set.py RENAMED Viewed

@@ -34,32 +34,37 @@ from rapidata.api_client.models.polygon_truth import PolygonTruth
 from rapidata.api_client.models.transcription_payload import TranscriptionPayload
 from rapidata.api_client.models.transcription_truth import TranscriptionTruth
 from rapidata.api_client.models.transcription_word import TranscriptionWord
-from rapidata.rapidata_client.assets.media_asset import MediaAsset
-from rapidata.rapidata_client.assets.multi_asset import MultiAsset
-from rapidata.rapidata_client.assets.text_asset import TextAsset
-from rapidata.rapidata_client.metadata.base_metadata import Metadata
+from rapidata.rapidata_client.assets._media_asset import MediaAsset
+from rapidata.rapidata_client.assets._multi_asset import MultiAsset
+from rapidata.rapidata_client.assets._text_asset import TextAsset
+from rapidata.rapidata_client.metadata._base_metadata import Metadata
 from rapidata.service.openapi_service import OpenAPIService
+from typing import Sequence
 class RapidataValidationSet:
     """A class for interacting with a Rapidata validation set.
-    Get a `ValidationSet` either by using `rapi.get_validation_set(id)` to get an existing validation set or by using `rapi.new_validation_set(name)` to create a new validation set.
+    Represents a set of all the validation tasks that can be added to an order.
+    When added to an order, the tasks will be selected randomly from the set.
+    Attributes:
+        id (str): The ID of the validation set.
+        name (str): The name of the validation set.
     """
     def __init__(self, validation_set_id, openapi_service: OpenAPIService, name: str):
         self.id = validation_set_id
-        self.openapi_service = openapi_service
         self.name = name
+        self.__openapi_service = openapi_service
-    def upload_files(self, model: AddValidationRapidModel, assets: list[MediaAsset]):
+    def __upload_files(self, model: AddValidationRapidModel, assets: list[MediaAsset]):
         """Upload a file to the validation set.
         Args:
-            asset list[(MediaAsset)]: The asset to upload.
-        Returns:
-            str: The path to the uploaded file.
+            assets: list[(MediaAsset)]: The asset to upload.
         """
         files = []
         for asset in assets:
@@ -69,11 +74,11 @@ class RapidataValidationSet:
                 files.append((asset.name, asset.path))
             else:
                 raise ValueError("upload file failed")
-        self.openapi_service.validation_api.validation_add_validation_rapid_post(
+        self.__openapi_service.validation_api.validation_add_validation_rapid_post(
             model=model, files=files
         )
-    def add_general_validation_rapid(
+    def _add_general_validation_rapid(
         self,
         payload: (
             BoundingBoxPayload
@@ -97,7 +102,7 @@ class RapidataValidationSet:
             | PolygonTruth
             | TranscriptionTruth
         ),
-        metadata: list[Metadata],
+        metadata: Sequence[Metadata],
         asset: MediaAsset | TextAsset | MultiAsset,
         randomCorrectProbability: float,
     ) -> None:
@@ -122,13 +127,13 @@ class RapidataValidationSet:
             payload=AddValidationRapidModelPayload(payload),
             truth=AddValidationRapidModelTruth(truths),
             metadata=[
-                DatapointMetadataModelMetadataInner(meta.to_model())
+                DatapointMetadataModelMetadataInner(meta._to_model())
                 for meta in metadata
             ],
             randomCorrectProbability=randomCorrectProbability,
         )
         if isinstance(asset, MediaAsset):
-            self.upload_files(model=model, assets=[asset])
+            self.__upload_files(model=model, assets=[asset])
         elif isinstance(asset, TextAsset):
             model = AddValidationTextRapidModel(
@@ -136,13 +141,13 @@ class RapidataValidationSet:
                 payload=AddValidationRapidModelPayload(payload),
                 truth=AddValidationRapidModelTruth(truths),
                 metadata=[
-                    DatapointMetadataModelMetadataInner(meta.to_model())
+                    DatapointMetadataModelMetadataInner(meta._to_model())
                     for meta in metadata
                 ],
                 randomCorrectProbability=randomCorrectProbability,
                 texts=[asset.text],
             )
-            self.openapi_service.validation_api.validation_add_validation_text_rapid_post(
+            self.__openapi_service.validation_api.validation_add_validation_text_rapid_post(
                 add_validation_text_rapid_model=model
             )
@@ -150,55 +155,54 @@ class RapidataValidationSet:
             files = [a for a in asset if isinstance(a, MediaAsset)]
             texts = [a.text for a in asset if isinstance(a, TextAsset)]
             if files:
-                self.upload_files(model=model, assets=files)
+                self.__upload_files(model=model, assets=files)
             if texts:
                 model = AddValidationTextRapidModel(
                     validationSetId=self.id,
                     payload=AddValidationRapidModelPayload(payload),
                     truth=AddValidationRapidModelTruth(truths),
                     metadata=[
-                        DatapointMetadataModelMetadataInner(meta.to_model())
+                        DatapointMetadataModelMetadataInner(meta._to_model())
                         for meta in metadata
                     ],
                     randomCorrectProbability=randomCorrectProbability,
                     texts=texts,
                 )
-                self.openapi_service.validation_api.validation_add_validation_text_rapid_post(
+                self.__openapi_service.validation_api.validation_add_validation_text_rapid_post(
                     add_validation_text_rapid_model=model
                 )
         else:
             raise ValueError("Invalid asset type")
-    def add_classify_rapid(
+    def _add_classify_rapid(
         self,
         asset: MediaAsset | TextAsset,
-        question: str,
+        instruction: str,
         categories: list[str],
         truths: list[str],
-        metadata: list[Metadata] = [],
+        metadata: Sequence[Metadata] = [],
     ) -> None:
         """Add a classify rapid to the validation set.
         Args:
             asset (MediaAsset | TextAsset): The asset for the rapid.
-            question (str): The question for the rapid.
+            instruction (str): The instruction for the rapid.
             categories (list[str]): The list of categories for the rapid.
             truths (list[str]): The list of truths for the rapid.
-            metadata (list[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
+            metadata (Sequence[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
         Returns:
             None
         """
         payload = ClassifyPayload(
-            _t="ClassifyPayload", possibleCategories=categories, title=question
+            _t="ClassifyPayload", possibleCategories=categories, title=instruction
         )
         model_truth = AttachCategoryTruth(
             correctCategories=truths, _t="AttachCategoryTruth"
         )
-        self.add_general_validation_rapid(
+        self._add_general_validation_rapid(
             payload=payload,
             truths=model_truth,
             metadata=metadata,
@@ -206,20 +210,20 @@ class RapidataValidationSet:
             randomCorrectProbability=len(truths) / len(categories),
         )
-    def add_compare_rapid(
+    def _add_compare_rapid(
         self,
         asset: MultiAsset,
-        question: str,
+        instruction: str,
         truth: str,
-        metadata: list[Metadata] = [],
+        metadata: Sequence[Metadata] = [],
     ) -> None:
         """Add a compare rapid to the validation set.
         Args:
             asset (MultiAsset): The assets for the rapid.
-            question (str): The question for the rapid.
+            instruction (str): The instruction for the rapid.
             truth (str): The path to the truth file.
-            metadata (list[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
+            metadata (Sequence[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
         Returns:
             None
@@ -227,7 +231,7 @@ class RapidataValidationSet:
         Raises:
             ValueError: If the number of assets is not exactly two.
         """
-        payload = ComparePayload(_t="ComparePayload", criteria=question)
+        payload = ComparePayload(_t="ComparePayload", criteria=instruction)
         # take only last part of truth path
         truth = os.path.basename(truth)
         model_truth = CompareTruth(_t="CompareTruth", winnerId=truth)
@@ -235,7 +239,7 @@ class RapidataValidationSet:
         if len(asset) != 2:
             raise ValueError("Compare rapid requires exactly two media paths")
-        self.add_general_validation_rapid(
+        self._add_general_validation_rapid(
             payload=payload,
             truths=model_truth,
             metadata=metadata,
@@ -243,24 +247,24 @@ class RapidataValidationSet:
             randomCorrectProbability=1 / len(asset),
         )
-    def add_transcription_rapid(
+    def _add_transcription_rapid(
         self,
         asset: MediaAsset | TextAsset,
-        question: str,
-        transcription: list[str],
+        instruction: str,
+        text: list[str],
         correct_words: list[str],
         strict_grading: bool | None = None,
-        metadata: list[Metadata] = [],
+        metadata: Sequence[Metadata] = [],
     ) -> None:
         """Add a transcription rapid to the validation set.
         Args:
             asset (MediaAsset | TextAsset): The asset for the rapid.
-            question (str): The question for the rapid.
-            transcription (list[str]): The transcription for the rapid.
+            instruction (str): The instruction for the rapid.
+            text (list[str]): The text for the rapid.
             correct_words (list[str]): The list of correct words for the rapid.
             strict_grading (bool | None, optional): The strict grading for the rapid. Defaults to None.
-            metadata (list[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
+            metadata (Sequence[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
         Returns:
             None
@@ -270,19 +274,19 @@ class RapidataValidationSet:
         """
         transcription_words = [
             TranscriptionWord(word=word, wordIndex=i)
-            for i, word in enumerate(transcription)
+            for i, word in enumerate(text)
         ]
         correct_transcription_words = []
         for word in correct_words:
-            if word not in transcription:
+            if word not in text:
                 raise ValueError(f"Correct word '{word}' not found in transcription")
             correct_transcription_words.append(
-                TranscriptionWord(word=word, wordIndex=transcription.index(word))
+                TranscriptionWord(word=word, wordIndex=text.index(word))
             )
         payload = TranscriptionPayload(
-            _t="TranscriptionPayload", title=question, transcription=transcription_words
+            _t="TranscriptionPayload", title=instruction, transcription=transcription_words
         )
         model_truth = TranscriptionTruth(
@@ -291,12 +295,12 @@ class RapidataValidationSet:
             strictGrading=strict_grading,
         )
-        self.add_general_validation_rapid(
+        self._add_general_validation_rapid(
             payload=payload,
             truths=model_truth,
             metadata=metadata,
             asset=asset,
-            randomCorrectProbability=len(correct_words) / len(transcription),
+            randomCorrectProbability=len(correct_words) / len(text),
         )
     def __str__(self):

rapidata/rapidata_client/validation/rapids/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .box import Box

rapidata/rapidata_client/validation/rapids/box.py ADDED Viewed

@@ -0,0 +1,17 @@
+from rapidata.api_client.models.box_shape import BoxShape
+class Box:
+    """
+    Used in the Locate and Draw Validation sets. All coordinates are in pixels.
+    Args:
+        x_min (float): The minimum x value of the box.
+        y_min (float): The minimum y value of the box.
+        x_max (float): The maximum x value of the box.
+        y_max (float): The maximum y value of the box.
+    """
+    def __init__(self, x_min: float, y_min: float, x_max: float, y_max: float):
+        self.x_min = x_min
+        self.y_min = y_min
+        self.x_max = x_max
+        self.y_max = y_max

rapidata/rapidata_client/validation/rapids/rapids.py ADDED Viewed

@@ -0,0 +1,94 @@
+from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
+from rapidata.rapidata_client.metadata import Metadata
+from typing import Sequence
+from rapidata.rapidata_client.validation.rapids.box import Box
+class Rapid:
+    pass
+class ClassificationRapid(Rapid):
+    """
+    A classification rapid. Used as a multiple choice question for the labeler to answer.
+    Args:
+        instruction (str): The instruction how to choose the options.
+        answer_options (list[str]): The options that the labeler can choose from.
+        truths (list[str]): The correct answers to the question.
+        asset (MediaAsset | TextAsset): The asset that the labeler will be labeling.
+        metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
+    """
+    def __init__(self, instruction: str, answer_options: list[str], truths: list[str], asset: MediaAsset | TextAsset, metadata: Sequence[Metadata]):
+        self.instruction = instruction
+        self.answer_options = answer_options
+        self.truths = truths
+        self.asset = asset
+        self.metadata = metadata
+class CompareRapid(Rapid):
+    """
+    Used as a comparison of two assets for the labeler to compare.
+    Args:
+        instruction (str): The instruction that the labeler will be comparing the assets on.
+        truth (str): The correct answer to the comparison. (has to be one of the assets)
+        asset (MultiAsset): The assets that the labeler will be comparing.
+        metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
+    """
+    def __init__(self, instruction: str, truth: str, asset: MultiAsset, metadata: Sequence[Metadata]):
+        self.instruction = instruction
+        self.asset = asset
+        self.truth = truth
+        self.metadata = metadata
+class SelectWordsRapid(Rapid):
+    """
+    Used to give the labeler a text and have them select words from it.
+    Args:
+        instruction (str): The instruction for the labeler.
+        truths (list[int]): The indices of the words that are the correct answers.
+        asset (MediaAsset): The asset that the labeler will be selecting words from.
+        sentence (str): The sentence that the labeler will be selecting words from. (split up by spaces)
+        strict_grading (bool): Whether the grading should be strict or not.
+            True means that all correct words and no wrong words have to be selected for the rapid to be marked as correct.
+            False means that at least one correct word and no wrong words have to be selected for the rapid to be marked as correct.
+    """
+    def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, sentence: str, strict_grading: bool):
+        self.instruction = instruction
+        self.truths = truths
+        self.asset = asset
+        self.sentence = sentence
+        self.strict_grading = strict_grading
+class LocateRapid(Rapid):
+    """
+    Used to have the labeler locate a specific object in an image.
+    Args:
+        instruction (str): The instructions on what the labeler should do.
+        truths (list[Box]): The boxes that the object is located in.
+        asset (MediaAsset): The image that the labeler is locating the object in.
+        metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
+    """
+    def __init__(self, instruction: str, truths: list[Box], asset: MediaAsset, metadata: Sequence[Metadata]):
+        self.instruction = instruction
+        self.asset = asset
+        self.truths = truths
+        self.metadata = metadata
+class DrawRapid(Rapid):
+    """
+    Used to have the labeler draw a specific object in an image.
+    Args:
+        instruction (str): The instructions on what the labeler should do.
+        truths (list[Box]): The boxes that the object is located in.
+        asset (MediaAsset): The image that the labeler is drawing the object in.
+        metadata (Sequence[Metadata]): The metadata that is attached to the rapid.
+    """
+    def __init__(self, instruction: str, truths: list[Box], asset: MediaAsset, metadata: Sequence[Metadata]):
+        self.instruction = instruction
+        self.asset = asset
+        self.truths = truths

rapidata/rapidata_client/validation/rapids/rapids_manager.py ADDED Viewed

@@ -0,0 +1,163 @@
+from rapidata.rapidata_client.assets.data_type_enum import RapidataDataTypes
+from rapidata.rapidata_client.validation.rapids.rapids import (
+    ClassificationRapid,
+    CompareRapid,
+    SelectWordsRapid,
+    LocateRapid,
+    DrawRapid)
+from rapidata.rapidata_client.assets import MediaAsset, TextAsset, MultiAsset
+from rapidata.rapidata_client.metadata import Metadata
+from rapidata.rapidata_client.validation.rapids.box import Box
+from typing import Sequence
+class RapidsManager:
+    """
+    Can be used to build different types of rapids. That can then be added to Validation sets
+    """
+    def __init__(self):
+        pass
+    def classification_rapid(self,
+            instruction: str,
+            answer_options: list[str],
+            datapoint: str,
+            truths: list[str],
+            data_type: str = RapidataDataTypes.MEDIA,
+            metadata: Sequence[Metadata] = [],
+    ) -> ClassificationRapid:
+        """Build a classification rapid
+        Args:
+            instruction (str): The instruction/question to be shown to the labeler.
+            answer_options (list[str]): The options that the labeler can choose from to answer the question.
+            datapoint (str): The datapoint that the labeler will be labeling.
+            truths (list[str]): The correct answers to the question.
+            data_type (str, optional): The type of the datapoint. Defaults to RapidataDataTypes.MEDIA.
+            metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
+        """
+        if data_type == RapidataDataTypes.MEDIA:
+            asset = MediaAsset(datapoint)
+        elif data_type == RapidataDataTypes.TEXT:
+            asset = TextAsset(datapoint)
+        else:
+            raise ValueError(f"Unsupported data type: {data_type}")
+        return ClassificationRapid(
+                instruction=instruction,
+                answer_options=answer_options,
+                asset=asset,
+                truths=truths,
+                metadata=metadata,
+                )
+    def compare_rapid(self,
+            instruction: str,
+            truth: str,
+            datapoint: list[str],
+            data_type: str = RapidataDataTypes.MEDIA,
+            metadata: Sequence[Metadata] = [],
+    ) -> CompareRapid:
+        """Build a compare rapid
+        Args:
+            instruction (str): The instruction that the labeler will be comparing the assets on.
+            truth (str): The correct answer to the comparison. (has to be one of the assets)
+            datapoint (list[str]): The two assets that the labeler will be comparing.
+            data_type (str, optional): The type of the datapoint. Defaults to RapidataDataTypes.MEDIA.
+            metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
+        """
+        if data_type == RapidataDataTypes.MEDIA:
+            assets = [MediaAsset(image) for image in datapoint]
+        elif data_type == RapidataDataTypes.TEXT:
+            assets = [TextAsset(text) for text in datapoint]
+        else:
+            raise ValueError(f"Unsupported data type: {data_type}")
+        asset = MultiAsset(assets)
+        return CompareRapid(
+                instruction=instruction,
+                asset=asset,
+                truth=truth,
+                metadata=metadata,
+                )
+    def select_words_rapid(self,
+            instruction: str,
+            truths: list[int],
+            datapoint: str,
+            sentence: str,
+            strict_grading: bool = True,
+    ) -> SelectWordsRapid:
+        """Build a select words rapid
+        Args:
+            instruction (str): The instruction for the labeler.
+            truths (list[int]): The indices of the words that are the correct answers.
+            datapoint (str): The asset that the labeler will be selecting words from.
+            sentence (str): The sentence that the labeler will be selecting words from. (split up by spaces)
+            strict_grading (bool, optional): Whether the grading should be strict or not.
+                True means that all correct words and no wrong words have to be selected for the rapid to be marked as correct.
+                False means that at least one correct word and no wrong words have to be selected for the rapid to be marked as correct. Defaults to True.
+        """
+        asset = MediaAsset(datapoint)
+        return SelectWordsRapid(
+                instruction=instruction,
+                truths=truths,
+                asset=asset,
+                sentence=sentence,
+                strict_grading=strict_grading,
+                )
+    def locate_rapid(self,
+            instruction: str,
+            truths: list[Box],
+            datapoint: str,
+            metadata: Sequence[Metadata] = [],
+    ) -> LocateRapid:
+        """Build a locate rapid
+        Args:
+            instruction (str): The instruction on what the labeler should do.
+            truths (list[Box]): The bounding boxes of the object that the labeler ought to be locating.
+            datapoint (str): The asset that the labeler will be locating the object in.
+            metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
+        """
+        asset = MediaAsset(datapoint)
+        return LocateRapid(
+                instruction=instruction,
+                truths=truths,
+                asset=asset,
+                metadata=metadata,
+                )
+    def draw_rapid(self,
+            instruction: str,
+            truths: list[Box],
+            datapoint: str,
+            metadata: Sequence[Metadata] = [],
+    ) -> DrawRapid:
+        """Build a draw rapid
+        Args:
+            instruction (str): The instructions on what the labeler
+            truths (list[Box]): The bounding boxes of the object that the labeler ought to be drawing.
+            datapoint (str): The asset that the labeler will be drawing the object in.
+            metadata (Sequence[Metadata], optional): The metadata that is attached to the rapid. Defaults to [].
+        """
+        asset = MediaAsset(datapoint)
+        return DrawRapid(
+                instruction=instruction,
+                truths=truths,
+                asset=asset,
+                metadata=metadata,
+                )

rapidata 1.10.1__py3-none-any.whl → 2.0.0__py3-none-any.whl

Potentially problematic release.

rapidata 1.10.1py3-none-any.whl → 2.0.0py3-none-any.whl