PyPI - rapidata - Versions diffs - 0.1.24__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

rapidata 0.1.24py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

rapidata/rapidata_client/dataset/rapidata_dataset.py CHANGED Viewed

@@ -11,6 +11,8 @@ from rapidata.api_client.models.upload_text_sources_to_dataset_model import (
 from rapidata.rapidata_client.metadata.base_metadata import Metadata
 from rapidata.service import LocalFileService
 from rapidata.service.openapi_service import OpenAPIService
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from tqdm import tqdm
 class RapidataDataset:
@@ -32,19 +34,18 @@ class RapidataDataset:
         self,
         image_paths: list[str | list[str]],
         metadata: list[Metadata] | None = None,
+        max_workers: int = 10,
     ):
         if metadata is not None and len(metadata) != len(image_paths):
             raise ValueError(
                 "metadata must be None or have the same length as image_paths"
             )
-        for media_paths_rapid, meta in zip_longest(image_paths, metadata or []):
+        def upload_datapoint(media_paths_rapid: str | list[str], meta: Metadata | None) -> None:
             if isinstance(media_paths_rapid, list) and not all(
                 os.path.exists(media_path) for media_path in media_paths_rapid
             ):
                 raise FileNotFoundError(f"File not found: {media_paths_rapid}")
             elif isinstance(media_paths_rapid, str) and not os.path.exists(
                 media_paths_rapid
             ):
@@ -60,4 +61,19 @@ class RapidataDataset:
                 ),
             )
-            self.openapi_service.dataset_api.dataset_create_datapoint_post(model=model, files=media_paths_rapid if isinstance(media_paths_rapid, list) else [media_paths_rapid])  # type: ignore
+            self.openapi_service.dataset_api.dataset_create_datapoint_post(
+                model=model,
+                files=media_paths_rapid if isinstance(media_paths_rapid, list) else [media_paths_rapid] # type: ignore
+            )
+        total_uploads = len(image_paths)
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            futures = [
+                executor.submit(upload_datapoint, media_paths, meta)
+                for media_paths, meta in zip_longest(image_paths, metadata or [])
+            ]
+            with tqdm(total=total_uploads, desc="Uploading datapoints") as pbar:
+                for future in as_completed(futures):
+                    future.result()  # This will raise any exceptions that occurred during execution
+                    pbar.update(1)

rapidata/rapidata_client/dataset/rapidata_validation_set.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import os
 from typing import Any
 from rapidata.api_client.models.add_validation_rapid_model import (
     AddValidationRapidModel,
@@ -29,6 +30,7 @@ from rapidata.api_client.models.polygon_payload import PolygonPayload
 from rapidata.api_client.models.polygon_truth import PolygonTruth
 from rapidata.api_client.models.transcription_payload import TranscriptionPayload
 from rapidata.api_client.models.transcription_truth import TranscriptionTruth
+from rapidata.api_client.models.transcription_word import TranscriptionWord
 from rapidata.rapidata_client.metadata.base_metadata import Metadata
 from rapidata.service.openapi_service import OpenAPIService
@@ -43,7 +45,7 @@ class RapidataValidationSet:
         self.id = validation_set_id
         self.openapi_service = openapi_service
-    def add_validation_rapid(
+    def add_general_validation_rapid(
         self,
         payload: (
             BoundingBoxPayload
@@ -70,15 +72,18 @@ class RapidataValidationSet:
         metadata: list[Metadata],
         media_paths: str | list[str],
         randomCorrectProbability: float,
-    ):
+    ) -> None:
         """Add a validation rapid to the validation set.
         Args:
-            payload (BoundingBoxPayload | ClassifyPayload | ComparePayload | FreeTextPayload | LinePayload | LocatePayload | NamedEntityPayload | PolygonPayload | TranscriptionPayload): The payload for the rapid.
-            truths (AttachCategoryTruth | BoundingBoxTruth | CompareTruth | EmptyValidationTruth | LineTruth | LocateBoxTruth | NamedEntityTruth | PolygonTruth | TranscriptionTruth): The truths for the rapid.
+            payload (Union[BoundingBoxPayload, ClassifyPayload, ComparePayload, FreeTextPayload, LinePayload, LocatePayload, NamedEntityPayload, PolygonPayload, TranscriptionPayload]): The payload for the rapid.
+            truths (Union[AttachCategoryTruth, BoundingBoxTruth, CompareTruth, EmptyValidationTruth, LineTruth, LocateBoxTruth, NamedEntityTruth, PolygonTruth, TranscriptionTruth]): The truths for the rapid.
             metadata (list[Metadata]): The metadata for the rapid.
-            media_paths (str | list[str]): The media paths for the rapid.
+            media_paths (Union[str, list[str]]): The media paths for the rapid.
             randomCorrectProbability (float): The random correct probability for the rapid.
+        Returns:
+            None
         """
         model = AddValidationRapidModel(
             validationSetId=self.id,
@@ -94,3 +99,137 @@ class RapidataValidationSet:
         self.openapi_service.validation_api.validation_add_validation_rapid_post(
             model=model, files=media_paths if isinstance(media_paths, list) else [media_paths]  # type: ignore
         )
+    def add_classify_validation_rapid(
+        self,
+        media_path: str,
+        question: str,
+        categories: list[str],
+        truths: list[str],
+        metadata: list[Metadata] = [],
+    ) -> None:
+        """Add a classify rapid to the validation set.
+        Args:
+            media_path (str): The path to the media file.
+            question (str): The question for the rapid.
+            categories (list[str]): The list of categories for the rapid.
+            truths (list[str]): The list of truths for the rapid.
+            metadata (list[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
+        Returns:
+            None
+        """
+        payload = ClassifyPayload(
+            _t="ClassifyPayload", possibleCategories=categories, title=question
+        )
+        model_truth = AttachCategoryTruth(
+            correctCategories=truths, _t="AttachCategoryTruth"
+        )
+        self.add_general_validation_rapid(
+            payload=payload,
+            truths=model_truth,
+            metadata=metadata,
+            media_paths=media_path,
+            randomCorrectProbability=len(truths) / len(categories),
+        )
+    def add_compare_validation_rapid(
+        self,
+        media_paths: list[str],
+        question: str,
+        truth: str,
+        metadata: list[Metadata] = [],
+    ) -> None:
+        """Add a compare rapid to the validation set.
+        Args:
+            media_paths (list[str]): The list of media paths for the rapid.
+            question (str): The question for the rapid.
+            truth (str): The path to the truth file.
+            metadata (list[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
+        Returns:
+            None
+        Raises:
+            ValueError: If the number of media paths is not exactly two.
+            FileNotFoundError: If any of the specified files are not found.
+        """
+        payload = ComparePayload(_t="ComparePayload", criteria=question)
+        # take only last part of truth path
+        truth = os.path.basename(truth)
+        model_truth = CompareTruth(_t="CompareTruth", winnerId=truth)
+        if len(media_paths) != 2:
+            raise ValueError("Compare rapid requires exactly two media paths")
+        # check that files exist
+        for media_path in media_paths:
+            if not os.path.exists(media_path):
+                raise FileNotFoundError(f"File not found: {media_path}")
+        self.add_general_validation_rapid(
+            payload=payload,
+            truths=model_truth,
+            metadata=metadata,
+            media_paths=media_paths,
+            randomCorrectProbability=1 / len(media_paths),
+        )
+    def add_transcription_validation_rapid(
+        self,
+        media_path: str,
+        question: str,
+        transcription: list[str],
+        correct_words: list[str],
+        strict_grading: bool | None = None,
+        metadata: list[Metadata] = [],
+    ) -> None:
+        """Add a transcription rapid to the validation set.
+        Args:
+            media_path (str): The path to the media file.
+            question (str): The question for the rapid.
+            transcription (list[str]): The transcription for the rapid.
+            correct_words (list[str]): The list of correct words for the rapid.
+            strict_grading (Optional[bool], optional): The strict grading for the rapid. Defaults to None.
+            metadata (list[Metadata], optional): The metadata for the rapid. Defaults to an empty list.
+        Returns:
+            None
+        Raises:
+            ValueError: If a correct word is not found in the transcription.
+        """
+        transcription_words = [
+            TranscriptionWord(word=word, wordIndex=i)
+            for i, word in enumerate(transcription)
+        ]
+        correct_transcription_words = []
+        for word in correct_words:
+            if word not in transcription:
+                raise ValueError(f"Correct word '{word}' not found in transcription")
+            correct_transcription_words.append(
+                TranscriptionWord(word=word, wordIndex=transcription.index(word))
+            )
+        payload = TranscriptionPayload(
+            _t="TranscriptionPayload", title=question, transcription=transcription_words
+        )
+        model_truth = TranscriptionTruth(
+            _t="TranscriptionTruth",
+            correctWords=correct_transcription_words,
+            strictGrading=strict_grading,
+        )
+        self.add_general_validation_rapid(
+            payload=payload,
+            truths=model_truth,
+            metadata=metadata,
+            media_paths=media_path,
+            randomCorrectProbability=len(correct_words) / len(transcription),
+        )

rapidata/rapidata_client/dataset/validation_set_builder.py CHANGED Viewed

@@ -48,7 +48,7 @@ class ValidationSetBuilder:
         )
         for rapid_part in self._rapid_parts:
-            validation_set.add_validation_rapid(
+            validation_set.add_general_validation_rapid(
                 payload=rapid_part.payload,
                 truths=rapid_part.truths,
                 metadata=rapid_part.metadata,

rapidata/rapidata_client/order/rapidata_order_builder.py CHANGED Viewed

@@ -87,7 +87,7 @@ class RapidataOrderBuilder:
             ],
         )
-    def create(self, submit=True) -> RapidataOrder:
+    def create(self, submit=True, max_workers=10) -> RapidataOrder:
         """Actually makes the API calls to create the order based on how the order builder was configured.
         Args:
@@ -113,7 +113,7 @@ class RapidataOrderBuilder:
             openapi_service=self._openapi_service,
         )
-        order.dataset.add_media_from_paths(self._media_paths, self._metadata)
+        order.dataset.add_media_from_paths(self._media_paths, self._metadata, max_workers)
         if submit:
             order.submit()

{rapidata-0.1.24.dist-info → rapidata-0.2.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: rapidata
-Version: 0.1.24
+Version: 0.2.0
 Summary: Rapidata package containing the Rapidata Python Client to interact with the Rapidata Web API in an easy way.
 License: Apache-2.0
 Author: Rapidata AG

{rapidata-0.1.24.dist-info → rapidata-0.2.0.dist-info}/RECORD RENAMED Viewed

@@ -222,10 +222,10 @@ rapidata/rapidata_client/config.py,sha256=tQLgN6k_ATOX1GzZh38At2rgBDLStV6rJ6z0vs
 rapidata/rapidata_client/country_codes/__init__.py,sha256=Y8qeG2IMjvMGvhaPydq0nhwRQHb6dQqilctlEXu0_PE,55
 rapidata/rapidata_client/country_codes/country_codes.py,sha256=Q0HMX7uHJQDeLCFPP5bq4iYi6pgcDWEcl2ONGhjgoeU,286
 rapidata/rapidata_client/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-rapidata/rapidata_client/dataset/rapidata_dataset.py,sha256=J5jlSIbdswhqXHOq8Qf9gtzVF70cImq9sioDG7S9Mxs,2457
-rapidata/rapidata_client/dataset/rapidata_validation_set.py,sha256=oQUtAF9ouLWg9AXkHXlnbEsPb8w9zxeKTw2YbSIk3ic,4475
+rapidata/rapidata_client/dataset/rapidata_dataset.py,sha256=QDsl7ZCZxuG02yfEpBSphfSDZh_qHz6m5HUCGwZflfw,3205
+rapidata/rapidata_client/dataset/rapidata_validation_set.py,sha256=YrnUzia9AXgq2z917FtztFxj4fD5EgTWXVPBzLVIujY,9374
 rapidata/rapidata_client/dataset/validation_rapid_parts.py,sha256=SIeQesEXPPOW5kclxYLNWaKllBXHm7DQKBdMU-GXnfc,2104
-rapidata/rapidata_client/dataset/validation_set_builder.py,sha256=_g7acP7lqYMI_5U9q1T6YHuUQ1ZDfZbLJ-QoqsGME4o,7463
+rapidata/rapidata_client/dataset/validation_set_builder.py,sha256=B9D-uNCo_PO0NCUHju_7dsWtz_KcOmvFIsxUgQ67Q2M,7471
 rapidata/rapidata_client/feature_flags/__init__.py,sha256=BNG_NQ4CrrC61fAWliImr8r581pIvegrkepVVbxcBw8,55
 rapidata/rapidata_client/feature_flags/feature_flags.py,sha256=hcS9YRzpsPWpZfw-3QwSuf2TaVg-MOHBxY788oNqIW4,3957
 rapidata/rapidata_client/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -235,7 +235,7 @@ rapidata/rapidata_client/metadata/prompt_metadata.py,sha256=_FypjKWrC3iKUO_G2CVw
 rapidata/rapidata_client/metadata/transcription_metadata.py,sha256=THtDEVCON4UlcXHmXrjilaOLHys4TrktUOPGWnXaCcc,631
 rapidata/rapidata_client/order/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rapidata/rapidata_client/order/rapidata_order.py,sha256=VRDLTPBf2k6UihF0DnWq3nfBLWExfWHzh3T-cJgFF1w,2437
-rapidata/rapidata_client/order/rapidata_order_builder.py,sha256=qGfdmO3wdQMDwlW5NTM8x5ll8YmC0UUuUkGtyzGMObU,8105
+rapidata/rapidata_client/order/rapidata_order_builder.py,sha256=Wx7lhThTd6SwJNSbXzhGCsEgMhizjsriGz2zMjbQyEI,8134
 rapidata/rapidata_client/rapidata_client.py,sha256=z3vz5_GNivnShj7kqii-eUff16rvwSy62zwi8WZqAWo,2776
 rapidata/rapidata_client/referee/__init__.py,sha256=x0AxGCsR6TlDjfqQ00lB9V7QVS9EZCJzweNEIzx42PI,207
 rapidata/rapidata_client/referee/base_referee.py,sha256=bMy7cw0a-pGNbFu6u_1_Jplu0A483Ubj4oDQzh8vu8k,493
@@ -259,7 +259,7 @@ rapidata/service/local_file_service.py,sha256=pgorvlWcx52Uh3cEG6VrdMK_t__7dacQ_5
 rapidata/service/openapi_service.py,sha256=-vrM2jEzQxr9KAerOYkVhpvMEeHwjzRwm9L_VFyzOT0,1537
 rapidata/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 rapidata/utils/image_utils.py,sha256=TldO3eJWG8IhfJjm5MfNGO0mEDm1mQTsRoA0HLU1Uxs,404
-rapidata-0.1.24.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-rapidata-0.1.24.dist-info/METADATA,sha256=vrxhs737rJg2Z4F7GLvOgpDhxXc0tJcxs_WPpxL7ZjY,924
-rapidata-0.1.24.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-rapidata-0.1.24.dist-info/RECORD,,
+rapidata-0.2.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+rapidata-0.2.0.dist-info/METADATA,sha256=Oymi81UjDR9fM9f-axaTKUWOCnlExpeA1i1voXC7ruU,923
+rapidata-0.2.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+rapidata-0.2.0.dist-info/RECORD,,

{rapidata-0.1.24.dist-info → rapidata-0.2.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{rapidata-0.1.24.dist-info → rapidata-0.2.0.dist-info}/WHEEL RENAMED Viewed

File without changes

rapidata 0.1.24__py3-none-any.whl → 0.2.0__py3-none-any.whl

rapidata 0.1.24py3-none-any.whl → 0.2.0py3-none-any.whl