PyPI - rapidata - Versions diffs - 1.8.3__py3-none-any.whl → 1.10.0__py3-none-any.whl - Mend

rapidata 1.8.3py3-none-any.whl → 1.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of rapidata might be problematic. Click here for more details.

Files changed (27) hide show

rapidata/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from .rapidata_client import (
     RapidataClient,
     ClassifyWorkflow,
-    TranscriptionWorkflow,
+    SelectWordsWorkflow,
     CompareWorkflow,
     FreeTextWorkflow,
     DemographicSelection,
@@ -14,7 +14,7 @@ from .rapidata_client import (
     PrivateTextMetadata,
     PublicTextMetadata,
     PromptMetadata,
-    TranscriptionMetadata,
+    SelectWordsMetadata,
     Settings,
     FeatureFlags, # remove next major version
     CountryCodes,

rapidata/api_client/models/rapid_answer.py CHANGED Viewed

@@ -28,11 +28,12 @@ class RapidAnswer(BaseModel):
     RapidAnswer
     """ # noqa: E501
     id: StrictStr
+    user_id: StrictStr = Field(alias="userId")
     country: StrictStr
     result: RapidAnswerResult
     user_score: Union[StrictFloat, StrictInt] = Field(alias="userScore")
     demographic_information: Dict[str, StrictStr] = Field(alias="demographicInformation")
-    __properties: ClassVar[List[str]] = ["id", "country", "result", "userScore", "demographicInformation"]
+    __properties: ClassVar[List[str]] = ["id", "userId", "country", "result", "userScore", "demographicInformation"]
     model_config = ConfigDict(
         populate_by_name=True,
@@ -89,6 +90,7 @@ class RapidAnswer(BaseModel):
         _obj = cls.model_validate({
             "id": obj.get("id"),
+            "userId": obj.get("userId"),
             "country": obj.get("country"),
             "result": RapidAnswerResult.from_dict(obj["result"]) if obj.get("result") is not None else None,
             "userScore": obj.get("userScore"),

rapidata/rapidata_client/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from .rapidata_client import RapidataClient
 from .workflow import (
     ClassifyWorkflow,
-    TranscriptionWorkflow,
+    SelectWordsWorkflow,
     CompareWorkflow,
     FreeTextWorkflow,
 )
@@ -17,7 +17,7 @@ from .metadata import (
     PrivateTextMetadata,
     PublicTextMetadata,
     PromptMetadata,
-    TranscriptionMetadata,
+    SelectWordsMetadata,
 )
 from .settings import Settings, FeatureFlags # remove FeatureFlags next major version
 from .country_codes import CountryCodes

rapidata/rapidata_client/assets/media_asset.py CHANGED Viewed

@@ -13,6 +13,7 @@ class MediaAsset(BaseAsset):
     """MediaAsset Class
     Represents a media asset by storing the file path.
+    Supports local files and URLs for images, MP3, and MP4.
     Args:
         path (str): The file system path to the media asset.
@@ -21,68 +22,76 @@ class MediaAsset(BaseAsset):
         FileNotFoundError: If the provided file path does not exist.
     """
+    ALLOWED_TYPES = [
+        'image/',
+        'audio/mp3',      # MP3
+        'video/mp4',       # MP4
+    ]
     def __init__(self, path: str):
         """
         Initialize a MediaAsset instance.
         Args:
-            path (str): The file system path to the media asset or a link to an image.
+            path (str): The file system path to the media asset or a URL.
         Raises:
             FileNotFoundError: If the provided file path does not exist.
+            ValueError: If media type is unsupported or duration exceeds 25 seconds.
         """
         if not isinstance(path, str):
-            raise ValueError("Media must be a string, either a local file path or an image URL")
+            raise ValueError("Media must be a string, either a local file path or a URL")
         if re.match(r'^https?://', path):
-            self.path = MediaAsset.get_image_bytes(path)
+            self.path = self._get_media_bytes(path)
             self.name = path.split('/')[-1]
-            if not self.name.endswith(('.jpg', '.jpeg', '.png', '.gif')):
-                self.name += '.jpg'
+            if not self.name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.mp3', '.mp4', '.webp')):
+                raise ValueError("Supported file types for custom names: jpg, jpeg, png, gif, mp3, mp4")
             return
         if not os.path.exists(path):
-            raise FileNotFoundError(f"File not found: {path}, please provide a valid local file path.")
+            raise FileNotFoundError(f"File not found: {path}")
         self.path: str | bytes = path
         self.name = path
     def set_custom_name(self, name: str) -> 'MediaAsset':
-        """
-        Set a custom name for the media asset, will only work with links.
-        Args:
-            name (str): The custom name to be set.
-        """
+        """Set a custom name for the media asset (only works with URLs)."""
         if isinstance(self.path, bytes):
-            if not name.endswith(('.jpg', '.jpeg', '.png', '.gif')):
-                name += '.jpg'
+            if not name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.mp3', '.mp4', '.webp')):
+                raise ValueError("Supported file types for custom names: jpg, jpeg, png, gif, mp3, mp4")
             self.name = name
         else:
-            raise ValueError("Custom name can only be set for links.")
+            raise ValueError("Custom name can only be set for URLs.")
         return self
-    @staticmethod
-    def get_image_bytes(image_url: str) -> bytes:
+    def _get_media_bytes(self, url: str) -> bytes:
         """
-        Downloads an image from a URL and converts it to bytes.
-        Validates that the URL points to an actual image.
+        Downloads media files from URL and validates type and duration.
         Args:
-            image_url (str): URL of the image
+            url: URL of the media file
         Returns:
-            bytes: Image data as bytes
+            bytes: Media data
         Raises:
-            ValueError: If URL doesn't point to an image
+            ValueError: If media type is unsupported or duration exceeds limit
             requests.exceptions.RequestException: If download fails
         """
-        response = requests.get(image_url)
+        response = requests.get(url, stream=False)  # Don't stream, we need full file
         response.raise_for_status()
+        content_type = response.headers.get('content-type', '').lower()
-        content_type = response.headers.get('content-type', '')
-        if not content_type.startswith('image/'):
-            raise ValueError(f'URL does not point to an image. Content-Type: {content_type}')
-        return BytesIO(response.content).getvalue()
+        # Validate content type
+        if not any(content_type.startswith(t) for t in self.ALLOWED_TYPES):
+            raise ValueError(
+                f'URL does not point to an allowed media type.\n'
+                f'Content-Type: {content_type}\n'
+                f'Allowed types: {self.ALLOWED_TYPES}'
+            )
+        content = BytesIO(response.content)
+        return content.getvalue()

rapidata/rapidata_client/dataset/rapid_builders/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 from .classify_rapid_builders import ClassifyRapidQuestionBuilder
 from .compare_rapid_builders import CompareRapidCriteriaBuilder
-from .transcription_rapid_builders import TranscriptionRapidInstructionBuilder
+from .select_words_rapid_builders import SelectWordsRapidInstructionBuilder
 from .base_rapid_builder import BaseRapidBuilder

rapidata/rapidata_client/dataset/rapid_builders/base_rapid_builder.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from rapidata.service.openapi_service import OpenAPIService
-from rapidata.rapidata_client.dataset.rapid_builders import ClassifyRapidQuestionBuilder, CompareRapidCriteriaBuilder, TranscriptionRapidInstructionBuilder
+from rapidata.rapidata_client.dataset.rapid_builders import ClassifyRapidQuestionBuilder, CompareRapidCriteriaBuilder, SelectWordsRapidInstructionBuilder
 class BaseRapidBuilder:
     """Base class for creating different types of rapids.
@@ -23,11 +23,11 @@ class BaseRapidBuilder:
         """
         return CompareRapidCriteriaBuilder()
-    def transcription_rapid(self):
-        """Creates a transcription rapid.
+    def select_words_rapid(self):
+        """Creates a select words rapid.
         Returns:
-            TranscriptionRapidInstructionBuilder: A builder for creating the transcription instruction.
+            SelectWordsRapidInstructionBuilder: A builder for creating the select words instruction.
         """
-        return TranscriptionRapidInstructionBuilder()
+        return SelectWordsRapidInstructionBuilder()

rapidata/rapidata_client/dataset/rapid_builders/rapids.py CHANGED Viewed

@@ -5,7 +5,7 @@ class Rapid:
     pass
 class ClassificationRapid(Rapid):
-    """A classification rapid. This represents the question, options, truths, asset and metadata that will be given to the user."""
+    """A classification rapid. This represents the question, options, truths, asset and metadata that will be given to the labeler."""
     def __init__(self, question: str, options: list[str], truths: list[str], asset: MediaAsset | TextAsset, metadata: list[Metadata]):
         self.question = question
         self.options = options
@@ -14,19 +14,20 @@ class ClassificationRapid(Rapid):
         self.metadata = metadata
 class CompareRapid(Rapid):
-    """A comparison rapid. This represents the criteria, asset, truth and metadata that will be given to the user."""
+    """A comparison rapid. This represents the criteria, asset, truth and metadata that will be given to the labeler."""
     def __init__(self, criteria: str, truth: str, asset: MultiAsset, metadata: list[Metadata]):
         self.criteria = criteria
         self.asset = asset
         self.truth = truth
         self.metadata = metadata
-class TranscriptionRapid(Rapid):
-    """A transcription rapid. This represents the instruction, truths, asset, transcription and strict grading that will be given to the user."""
-    def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, transcription: str, strict_grading: bool):
+class SelectWordsRapid(Rapid):
+    """A transcription rapid. This represents the instruction, truths, asset, transcription and strict grading that will be given to the labeler."""
+    def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, text: str, strict_grading: bool):
+        """The text will be split up by spaces to be selected by the labeler."""
         self.instruction = instruction
         self.truths = truths
         self.asset = asset
-        self.transcription = transcription
+        self.text = text
         self.strict_grading = strict_grading

rapidata/rapidata_client/dataset/rapid_builders/{transcription_rapid_builders.py → select_words_rapid_builders.py} RENAMED Viewed

@@ -1,64 +1,56 @@
 from rapidata.rapidata_client.assets import MediaAsset
-from rapidata.rapidata_client.dataset.rapid_builders.rapids import TranscriptionRapid
+from rapidata.rapidata_client.dataset.rapid_builders.rapids import SelectWordsRapid
-class TranscriptionRapidBuilder:
-    """Final builder class for transcription rapid.
+class SelectWordsRapidBuilder:
+    """Final builder class for rapid.
-    This class handles the final construction of a transcription rapid with all required parameters.
+    This class handles the final construction of a rapid with all required parameters.
     """
-    def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, transcription_text: str):
+    def __init__(self, instruction: str, truths: list[int], asset: MediaAsset, text: str):
         self._instruction = instruction
         self._truths = truths
         self._asset = asset
-        self._transcription_text = transcription_text
+        self._text = text
         self._strict_grading = True
     def strict_grading(self, strict_grading: bool = True):
-        """Set whether to use strict grading for the transcription.
+        """Set whether to use strict grading for the select words.
         Strict grading true: In order to be correct, you must select all of the right words
         Strict grading false: In order to be correct, you must select at least one right word
         In both cases it will be incorrect if you select any wrong words
         Args:
-            strict_grading (bool): Whether to use strict grading. Defaults to True.
-        Returns:
-            TranscriptionRapidBuilder: The builder instance for method chaining
-        """
+            strict_grading (bool): Whether to use strict grading. Defaults to True."""
         self._strict_grading = strict_grading
         return self
     def build(self):
-        """Constructs and returns the final transcription rapid.
-        Returns:
-            TranscriptionRapid: The constructed transcription rapid
-        """
-        return TranscriptionRapid(
+        """Constructs and returns the final rapid."""
+        return SelectWordsRapid(
             instruction=self._instruction,
             truths=self._truths,
             asset=self._asset,
-            transcription=self._transcription_text,
+            text=self._text,
             strict_grading=self._strict_grading
         )
-class TranscriptionRapidTruthsBuilder:
-    """Builder class for the truths of the transcription rapid.
+class SelectWordsRapidTruthsBuilder:
+    """Builder class for the truths of the rapid.
-    This adds the truths to the transcription rapid.
+    This adds the truths to the rapid.
     """
-    def __init__(self, instruction: str, media: MediaAsset, transcription_text: str):
+    def __init__(self, instruction: str, media: MediaAsset, text: str):
         self._instruction = instruction
         self._media = media
-        self._transcription_text = transcription_text
+        self._text = text
         self._truths = None
     def truths(self, truths: list[int]):
-        """Set the truths for the transcription rapid.
+        """Set the truths for the rapid.
         Args:
-            truths (list[int]): The correct answers for the transcription task. \
-                Each integer represents the index of the correct word in the transcription text."""
+            truths (list[int]): The correct answers for the task. \
+                Each integer represents the index of the correct word in the text."""
         if not isinstance(truths, list) or not all(isinstance(truth, int) for truth in truths):
             raise ValueError("Truths must be a list of integers")
@@ -70,30 +62,30 @@ class TranscriptionRapidTruthsBuilder:
         if self._truths is None:
             raise ValueError("Truths are required")
-        return TranscriptionRapidBuilder(
+        return SelectWordsRapidBuilder(
             instruction=self._instruction,
             truths=self._truths,
             asset=self._media,
-            transcription_text=self._transcription_text
+            text=self._text
         )
-class TranscriptionRapidAssetBuilder:
-    """Builder class for the asset of the transcription rapid.
+class SelectWordsRapidAssetBuilder:
+    """Builder class for the asset of the rapid.
-    This adds the asset to the transcription rapid.
+    This adds the asset to the rapid.
     """
     def __init__(self, instruction: str):
         self._instruction = instruction
-    def media(self, media: str, transcription_text: str):
-        """Set the media asset for the transcription rapid.
+    def media(self, media: str, text: str):
+        """Set the media asset for the rapid.
         Args:
-            media (str): The local file path of the audio or video file to be transcribed
-            transcription_text (str): The text to be transcribed from the media asset""" # is video file okay?
+            media (str): The local path (image, video, audio) or URL (image) of the media asset.
+            text (str): The text will be split up by spaces and the labeler will be able to select the words"""
         self._asset = MediaAsset(media)
-        self._transcription_text = transcription_text
+        self._text = text
         return self._build()
@@ -101,21 +93,21 @@ class TranscriptionRapidAssetBuilder:
         if not self._asset:
             raise ValueError("Media is required")
-        return TranscriptionRapidTruthsBuilder(
+        return SelectWordsRapidTruthsBuilder(
             instruction=self._instruction,
             media=self._asset,
-            transcription_text=self._transcription_text
+            text=self._text
         )
-class TranscriptionRapidInstructionBuilder:
+class SelectWordsRapidInstructionBuilder:
     def __init__(self):
         self._instruction = None
     def instruction(self, instruction: str):
-        """Set the instruction for the transcription rapid.
+        """Set the instruction for the rapid.
         Args:
-            instruction (str): The instruction for the transcription task"""
+            instruction (str): The instruction for the task"""
         if not isinstance(instruction, str):
             raise ValueError("Instruction must be a string")
@@ -127,6 +119,6 @@ class TranscriptionRapidInstructionBuilder:
         if self._instruction is None:
             raise ValueError("Instruction is required")
-        return TranscriptionRapidAssetBuilder(
+        return SelectWordsRapidAssetBuilder(
             instruction=self._instruction,
         )

rapidata/rapidata_client/dataset/rapidata_validation_set.py CHANGED Viewed

@@ -298,3 +298,9 @@ class RapidataValidationSet:
             asset=asset,
             randomCorrectProbability=len(correct_words) / len(transcription),
         )
+    def __str__(self):
+        return f"name: '{self.name}' id: {self.id}"
+    def __repr__(self):
+        return f"name: '{self.name}' id: {self.id}"

rapidata/rapidata_client/dataset/validation_set_builder.py CHANGED Viewed

@@ -20,7 +20,7 @@ from rapidata.rapidata_client.dataset.rapid_builders.rapids import (
     Rapid,
     ClassificationRapid,
     CompareRapid,
-    TranscriptionRapid
+    SelectWordsRapid
 )
 from deprecated import deprecated
@@ -44,7 +44,7 @@ class ValidationSetBuilder:
         self.validation_set_id: str | None = None
         self._rapid_parts: list[ValidatioRapidParts] = []
-    def create(self) -> RapidataValidationSet:
+    def submit(self) -> RapidataValidationSet:
         """Create the validation set by executing all HTTP requests. This should be the last method called on the builder.
         Returns:
@@ -79,6 +79,18 @@ class ValidationSetBuilder:
             )
         return validation_set
+    @deprecated("Use submit instead")
+    def create(self) -> RapidataValidationSet:
+        """Create the validation set by executing all HTTP requests. This should be the last method called on the builder.
+        Returns:
+            RapidataValidationSet: A RapidataValidationSet instance.
+        Raises:
+            ValueError: If the validation set creation fails.
+        """
+        return self.submit()
     def add_rapid(self, rapid: Rapid):
         """Add a rapid to the validation set.
@@ -96,8 +108,8 @@ class ValidationSetBuilder:
         if isinstance(rapid, CompareRapid):
             self._add_compare_rapid(rapid.asset, rapid.criteria, rapid.truth, rapid.metadata)
-        if isinstance(rapid, TranscriptionRapid):
-            self._add_transcription_rapid(rapid.asset, rapid.instruction, rapid.transcription, rapid.truths, rapid.strict_grading)
+        if isinstance(rapid, SelectWordsRapid):
+            self._add_select_words_rapid(rapid.asset, rapid.instruction, rapid.text, rapid.truths, rapid.strict_grading)
         return self
@@ -237,21 +249,21 @@ class ValidationSetBuilder:
         )
     @deprecated("Use add_rapid instead")
-    def add_transcription_rapid(
+    def add_select_words_rapid(
         self,
         asset: MediaAsset | TextAsset,
         question: str,
-        transcription: str,
+        select_words: str,
         truths: list[int],
         strict_grading: bool | None = None,
         metadata: list[Metadata] = [],
     ):
-        """Add a transcription rapid to the validation set.
+        """Add a select words rapid to the validation set.
         Args:
             asset (MediaAsset | TextAsset): The asset for the rapid.
             question (str): The question for the rapid.
-            transcription (list[str]): The transcription for the rapid.
+            select words (list[str]): The select words for the rapid.
             truths (list[int]): The list of indices of the true word selections.
             strict_grading (bool | None, optional): The strict grading for the rapid. Defaults to None.
             metadata (list[Metadata], optional): The metadata for the rapid.
@@ -260,27 +272,27 @@ class ValidationSetBuilder:
             ValidationSetBuilder: The ValidationSetBuilder instance.
         Raises:
-            ValueError: If a correct word is not found in the transcription.
+            ValueError: If a correct word is not found in the select words.
         """
-        self._add_transcription_rapid(asset, question, transcription, truths, strict_grading, metadata)
+        self._add_select_words_rapid(asset, question, select_words, truths, strict_grading, metadata)
         return self
-    def _add_transcription_rapid(
+    def _add_select_words_rapid(
         self,
         asset: MediaAsset | TextAsset,
         question: str,
-        transcription: str,
+        select_words: str,
         truths: list[int],
         strict_grading: bool | None = None,
         metadata: list[Metadata] = [],
     ):
-        """Add a transcription rapid to the validation set.
+        """Add a select words rapid to the validation set.
         Args:
             asset (MediaAsset | TextAsset): The asset for the rapid.
             question (str): The question for the rapid.
-            transcription (list[str]): The transcription for the rapid.
+            select words (list[str]): The select words for the rapid.
             truths (list[int]): The list of indices of the true word selections.
             strict_grading (bool | None, optional): The strict grading for the rapid. Defaults to None.
             metadata (list[Metadata], optional): The metadata for the rapid.
@@ -289,11 +301,11 @@ class ValidationSetBuilder:
             ValidationSetBuilder: The ValidationSetBuilder instance.
         Raises:
-            ValueError: If a correct word is not found in the transcription.
+            ValueError: If a correct word is not found in the select words.
         """
         transcription_words = [
             TranscriptionWord(word=word, wordIndex=i)
-            for i, word in enumerate(transcription.split())
+            for i, word in enumerate(select_words.split())
         ]
         true_words = []

rapidata/rapidata_client/metadata/__init__.py CHANGED Viewed

@@ -2,4 +2,4 @@ from .base_metadata import Metadata
 from .private_text_metadata import PrivateTextMetadata
 from .public_text_metadata import PublicTextMetadata
 from .prompt_metadata import PromptMetadata
-from .transcription_metadata import TranscriptionMetadata
+from .select_words_metadata import SelectWordsMetadata

rapidata/rapidata_client/metadata/{transcription_metadata.py → select_words_metadata.py} RENAMED Viewed

@@ -4,16 +4,16 @@ from rapidata.api_client.models.transcription_metadata_input import (
 from rapidata.rapidata_client.metadata.base_metadata import Metadata
-class TranscriptionMetadata(Metadata):
+class SelectWordsMetadata(Metadata):
-    def __init__(self, transcription: str, identifier: str = "transcription"):
+    def __init__(self, select_words: str, identifier: str = "transcription"):
         super().__init__(identifier=identifier)
         self.identifier = identifier
-        self.transcription = transcription
+        self.select_words = select_words
     def to_model(self):
         return TranscriptionMetadataInput(
             _t="TranscriptionMetadataInput",
             identifier=self.identifier,
-            transcription=self.transcription,
+            transcription=self.select_words,
         )

rapidata/rapidata_client/order/order_builder.py ADDED Viewed

@@ -0,0 +1,25 @@
+from rapidata.service.openapi_service import OpenAPIService
+from rapidata.rapidata_client.simple_builders.simple_classification_builders import ClassificationQuestionBuilder
+from rapidata.rapidata_client.simple_builders.simple_compare_builders import CompareCriteriaBuilder
+from rapidata.rapidata_client.simple_builders.simple_free_text_builders import FreeTextQuestionBuilder
+from rapidata.rapidata_client.simple_builders.simple_select_words_builders import SelectWordsInstructionBuilder
+from rapidata.rapidata_client.order.rapidata_order_builder import RapidataOrderBuilder
+class BaseOrderBuilder():
+    def __init__(self, openapi_service: OpenAPIService):
+        self.openapi_service = openapi_service
+    def classify_order(self, name: str):
+        return ClassificationQuestionBuilder(name, self.openapi_service)
+    def compare_order(self, name: str):
+        return CompareCriteriaBuilder(name, self.openapi_service)
+    def free_text_order(self, name: str):
+        return FreeTextQuestionBuilder(name, self.openapi_service)
+    def select_words_order(self, name: str):
+        return SelectWordsInstructionBuilder(name, self.openapi_service)
+    def advanced_order(self, name: str):
+        return RapidataOrderBuilder(name, self.openapi_service)

rapidata/rapidata_client/order/rapidata_order.py CHANGED Viewed

@@ -106,7 +106,7 @@ class RapidataOrder:
         Returns:
             The results of the order.
         """
-        while self.get_status() not in ["Completed", "Paused", "ManuelReview"]:
+        while self.get_status() not in ["Completed", "Paused", "ManuelReview", "Failed"]:
             sleep(5)
         try:

rapidata/rapidata_client/order/rapidata_order_builder.py CHANGED Viewed

@@ -46,8 +46,8 @@ class RapidataOrderBuilder:
     def __init__(
         self,
-        openapi_service: OpenAPIService,
         name: str,
+        openapi_service: OpenAPIService,
     ):
         """
         Initialize the RapidataOrderBuilder.

rapidata 1.8.3__py3-none-any.whl → 1.10.0__py3-none-any.whl

Potentially problematic release.

rapidata 1.8.3py3-none-any.whl → 1.10.0py3-none-any.whl