PyPI - hirundo - Versions diffs - 0.1.18__py3-none-any.whl → 0.2.3.post1__py3-none-any.whl - Mend

hirundo 0.1.18py3-none-any.whl → 0.2.3.post1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

hirundo/__init__.py +28 -8
hirundo/_constraints.py +3 -4
hirundo/_headers.py +1 -1
hirundo/_http.py +53 -0
hirundo/_iter_sse_retrying.py +8 -5
hirundo/_llm_pipeline.py +153 -0
hirundo/_run_checking.py +283 -0
hirundo/_urls.py +1 -0
hirundo/cli.py +8 -11
hirundo/dataset_enum.py +2 -0
hirundo/{dataset_optimization.py → dataset_qa.py} +213 -256
hirundo/{dataset_optimization_results.py → dataset_qa_results.py} +7 -7
hirundo/git.py +8 -10
hirundo/labeling.py +22 -19
hirundo/storage.py +26 -26
hirundo/unlearning_llm.py +599 -0
hirundo/unzip.py +12 -13
{hirundo-0.1.18.dist-info → hirundo-0.2.3.post1.dist-info}/METADATA +59 -20
hirundo-0.2.3.post1.dist-info/RECORD +28 -0
{hirundo-0.1.18.dist-info → hirundo-0.2.3.post1.dist-info}/WHEEL +1 -1
hirundo-0.1.18.dist-info/RECORD +0 -25
{hirundo-0.1.18.dist-info → hirundo-0.2.3.post1.dist-info}/entry_points.txt +0 -0
{hirundo-0.1.18.dist-info → hirundo-0.2.3.post1.dist-info}/licenses/LICENSE +0 -0
{hirundo-0.1.18.dist-info → hirundo-0.2.3.post1.dist-info}/top_level.txt +0 -0

hirundo/{dataset_optimization.py → dataset_qa.py} RENAMED Viewed

@@ -1,12 +1,9 @@
 import datetime
-import json
 import typing
 from collections.abc import AsyncGenerator, Generator
 from enum import Enum
 from typing import overload
-import httpx
-import requests
 from pydantic import BaseModel, Field, model_validator
 from tqdm import tqdm
 from tqdm.contrib.logging import logging_redirect_tqdm
@@ -14,12 +11,21 @@ from tqdm.contrib.logging import logging_redirect_tqdm
 from hirundo._constraints import validate_labeling_info, validate_url
 from hirundo._env import API_HOST
 from hirundo._headers import get_headers
-from hirundo._http import raise_for_status_with_reason
-from hirundo._iter_sse_retrying import aiter_sse_retrying, iter_sse_retrying
+from hirundo._http import raise_for_status_with_reason, requests
+from hirundo._run_checking import (
+    STATUS_TO_PROGRESS_MAP,
+    RunStatus,
+    aiter_run_events,
+    build_status_text_map,
+    get_state,
+    handle_run_failure,
+    iter_run_events,
+    update_progress_from_result,
+)
 from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
 from hirundo._urls import HirundoUrl
 from hirundo.dataset_enum import DatasetMetadataType, LabelingType
-from hirundo.dataset_optimization_results import DatasetOptimizationResults
+from hirundo.dataset_qa_results import DatasetQAResults
 from hirundo.labeling import YOLO, LabelingInfo
 from hirundo.logger import get_logger
 from hirundo.storage import ResponseStorageConfig, StorageConfig
@@ -30,75 +36,63 @@ logger = get_logger(__name__)
 class HirundoError(Exception):
     """
-    Custom exception used to indicate errors in `hirundo` dataset optimization runs
+    Custom exception used to indicate errors in `hirundo` dataset QA runs
     """
     pass
-MAX_RETRIES = 200  # Max 200 retries for HTTP SSE connection
+STATUS_TO_TEXT_MAP = build_status_text_map(
+    "Dataset QA",
+    started_detail="Dataset QA run in progress. Downloading dataset",
+)
-class RunStatus(Enum):
-    PENDING = "PENDING"
-    STARTED = "STARTED"
-    SUCCESS = "SUCCESS"
-    FAILURE = "FAILURE"
-    AWAITING_MANUAL_APPROVAL = "AWAITING MANUAL APPROVAL"
-    REVOKED = "REVOKED"
-    REJECTED = "REJECTED"
-    RETRY = "RETRY"
-STATUS_TO_TEXT_MAP = {
-    RunStatus.STARTED.value: "Optimization run in progress. Downloading dataset",
-    RunStatus.PENDING.value: "Optimization run queued and not yet started",
-    RunStatus.SUCCESS.value: "Optimization run completed successfully",
-    RunStatus.FAILURE.value: "Optimization run failed",
-    RunStatus.AWAITING_MANUAL_APPROVAL.value: "Awaiting manual approval",
-    RunStatus.RETRY.value: "Optimization run failed. Retrying",
-    RunStatus.REVOKED.value: "Optimization run was cancelled",
-    RunStatus.REJECTED.value: "Optimization run was rejected",
-}
-STATUS_TO_PROGRESS_MAP = {
-    RunStatus.STARTED.value: 0.0,
-    RunStatus.PENDING.value: 0.0,
-    RunStatus.SUCCESS.value: 100.0,
-    RunStatus.FAILURE.value: 100.0,
-    RunStatus.AWAITING_MANUAL_APPROVAL.value: 100.0,
-    RunStatus.RETRY.value: 0.0,
-    RunStatus.REVOKED.value: 100.0,
-    RunStatus.REJECTED.value: 0.0,
-}
-class VisionRunArgs(BaseModel):
-    upsample: bool = False
+class ClassificationRunArgs(BaseModel):
+    image_size: tuple[int, int] | None = (224, 224)
+    """
+    Size (width, height) to which to resize classification images.
+    It is recommended to keep this value at (224, 224) unless your classes are differentiated by very small differences.
+    """
+    upsample: bool | None = False
     """
     Whether to upsample the dataset to attempt to balance the classes.
     """
-    min_abs_bbox_size: int = 0
+class ObjectDetectionRunArgs(ClassificationRunArgs):
+    min_abs_bbox_size: int | None = None
     """
-    Minimum valid size (in pixels) of a bounding box to keep it in the dataset for optimization.
+    Minimum valid size (in pixels) of a bounding box to keep it in the dataset for QA.
     """
-    min_abs_bbox_area: int = 0
+    min_abs_bbox_area: int | None = None
     """
-    Minimum valid absolute area (in pixels²) of a bounding box to keep it in the dataset for optimization.
+    Minimum valid absolute area (in pixels²) of a bounding box to keep it in the dataset for QA.
     """
-    min_rel_bbox_size: float = 0.0
+    min_rel_bbox_size: float | None = None
     """
     Minimum valid size (as a fraction of both image height and width) for a bounding box
-    to keep it in the dataset for optimization, relative to the corresponding dimension size,
+    to keep it in the dataset for QA, relative to the corresponding dimension size,
     i.e. if the bounding box is 10% of the image width and 5% of the image height, it will be kept if this value is 0.05, but not if the
     value is 0.06 (since both width and height are checked).
     """
-    min_rel_bbox_area: float = 0.0
+    min_rel_bbox_area: float | None = None
+    """
+    Minimum valid relative area (as a fraction of the image area) of a bounding box to keep it in the dataset for QA.
+    """
+    crop_ratio: float | None = None
     """
-    Minimum valid relative area (as a fraction of the image area) of a bounding box to keep it in the dataset for optimization.
+    Ratio of the bounding box to crop.
+    Change this value at your own risk. It is recommended to keep it at 1.0 unless you know what you are doing.
+    """
+    add_mask_channel: bool | None = None
+    """
+    Whether to add a mask channel to the image.
+    Change at your own risk. It is recommended to keep it at False unless you know what you are doing.
     """
-RunArgs = typing.Union[VisionRunArgs]
+RunArgs = ClassificationRunArgs | ObjectDetectionRunArgs
 class AugmentationName(str, Enum):
@@ -111,14 +105,32 @@ class AugmentationName(str, Enum):
     GAUSSIAN_BLUR = "GaussianBlur"
-class Modality(str, Enum):
-    IMAGE = "Image"
-    RADAR = "Radar"
-    EKG = "EKG"
+class ModalityType(str, Enum):
+    RADAR = "RADAR"
+    VISION = "VISION"
+    SPEECH = "SPEECH"
+    TABULAR = "TABULAR"
+MODALITY_TO_SUPPORTED_LABELING_TYPES = {
+    ModalityType.RADAR: [
+        LabelingType.SINGLE_LABEL_CLASSIFICATION,
+        LabelingType.OBJECT_DETECTION,
+    ],
+    ModalityType.VISION: [
+        LabelingType.SINGLE_LABEL_CLASSIFICATION,
+        LabelingType.OBJECT_DETECTION,
+        LabelingType.OBJECT_SEGMENTATION,
+        LabelingType.SEMANTIC_SEGMENTATION,
+        LabelingType.PANOPTIC_SEGMENTATION,
+    ],
+    ModalityType.SPEECH: [LabelingType.SPEECH_TO_TEXT],
+    ModalityType.TABULAR: [LabelingType.SINGLE_LABEL_CLASSIFICATION],
+}
-class OptimizationDataset(BaseModel):
-    id: typing.Optional[int] = Field(default=None)
+class QADataset(BaseModel):
+    id: int | None = Field(default=None)
     """
     The ID of the dataset created on the server.
     """
@@ -134,17 +146,15 @@ class OptimizationDataset(BaseModel):
     - `LabelingType.OBJECT_DETECTION`: Indicates that the dataset is for object detection tasks
     - `LabelingType.SPEECH_TO_TEXT`: Indicates that the dataset is for speech-to-text tasks
     """
-    language: typing.Optional[str] = None
+    language: str | None = None
     """
     Language of the Speech-to-Text audio dataset. This is required for Speech-to-Text datasets.
     """
-    storage_config_id: typing.Optional[int] = None
+    storage_config_id: int | None = None
     """
     The ID of the storage config used to store the dataset and metadata.
     """
-    storage_config: typing.Optional[
-        typing.Union[StorageConfig, ResponseStorageConfig]
-    ] = None
+    storage_config: StorageConfig | ResponseStorageConfig | None = None
     """
     The `StorageConfig` instance to link to.
     """
@@ -158,34 +168,45 @@ class OptimizationDataset(BaseModel):
     Note: All CSV `image_path` entries in the metadata file should be relative to this folder.
     """
-    classes: typing.Optional[list[str]] = None
+    classes: list[str] | None = None
     """
     A full list of possible classes used in classification / object detection.
     It is currently required for clarity and performance.
     """
-    labeling_info: typing.Union[LabelingInfo, list[LabelingInfo]]
+    labeling_info: LabelingInfo | list[LabelingInfo]
-    augmentations: typing.Optional[list[AugmentationName]] = None
+    augmentations: list[AugmentationName] | None = None
     """
     Used to define which augmentations are apply to a vision dataset.
     For audio datasets, this field is ignored.
     If no value is provided, all augmentations are applied to vision datasets.
     """
-    modality: Modality = Modality.IMAGE
+    modality: ModalityType = ModalityType.VISION
     """
     Used to define the modality of the dataset.
     Defaults to Image.
     """
-    run_id: typing.Optional[str] = Field(default=None, init=False)
+    run_id: str | None = Field(default=None, init=False)
     """
-    The ID of the Dataset Optimization run created on the server.
+    The ID of the Dataset QA run created on the server.
     """
-    status: typing.Optional[RunStatus] = None
+    status: RunStatus | None = None
     @model_validator(mode="after")
     def validate_dataset(self):
+        if self.modality not in MODALITY_TO_SUPPORTED_LABELING_TYPES:
+            raise ValueError(
+                f"Modality {self.modality} is not supported. Supported modalities are: {list(MODALITY_TO_SUPPORTED_LABELING_TYPES.keys())}"
+            )
+        if (
+            self.labeling_type
+            not in MODALITY_TO_SUPPORTED_LABELING_TYPES[self.modality]
+        ):
+            raise ValueError(
+                f"Labeling type {self.labeling_type} is not supported for modality {self.modality}. Supported labeling types are: {MODALITY_TO_SUPPORTED_LABELING_TYPES[self.modality]}"
+            )
         if self.storage_config is None and self.storage_config_id is None:
             raise ValueError(
                 "No dataset storage has been provided. Provide one via `storage_config` or `storage_config_id`"
@@ -229,52 +250,52 @@ class OptimizationDataset(BaseModel):
         return self
     @staticmethod
-    def get_by_id(dataset_id: int) -> "OptimizationDataset":
+    def get_by_id(dataset_id: int) -> "QADataset":
         """
-        Get a `OptimizationDataset` instance from the server by its ID
+        Get a `QADataset` instance from the server by its ID
         Args:
-            dataset_id: The ID of the `OptimizationDataset` instance to get
+            dataset_id: The ID of the `QADataset` instance to get
         """
         response = requests.get(
-            f"{API_HOST}/dataset-optimization/dataset/{dataset_id}",
+            f"{API_HOST}/dataset-qa/dataset/{dataset_id}",
             headers=get_headers(),
             timeout=READ_TIMEOUT,
         )
         raise_for_status_with_reason(response)
         dataset = response.json()
-        return OptimizationDataset(**dataset)
+        return QADataset(**dataset)
     @staticmethod
-    def get_by_name(name: str) -> "OptimizationDataset":
+    def get_by_name(name: str) -> "QADataset":
         """
-        Get a `OptimizationDataset` instance from the server by its name
+        Get a `QADataset` instance from the server by its name
         Args:
-            name: The name of the `OptimizationDataset` instance to get
+            name: The name of the `QADataset` instance to get
         """
         response = requests.get(
-            f"{API_HOST}/dataset-optimization/dataset/by-name/{name}",
+            f"{API_HOST}/dataset-qa/dataset/by-name/{name}",
             headers=get_headers(),
             timeout=READ_TIMEOUT,
         )
         raise_for_status_with_reason(response)
         dataset = response.json()
-        return OptimizationDataset(**dataset)
+        return QADataset(**dataset)
     @staticmethod
     def list_datasets(
-        organization_id: typing.Optional[int] = None,
-    ) -> list["DataOptimizationDatasetOut"]:
+        organization_id: int | None = None,
+    ) -> list["QADatasetOut"]:
         """
-        Lists all the optimization datasets created by user's default organization
+        Lists all the datasets created by user's default organization
         or the `organization_id` passed
         Args:
             organization_id: The ID of the organization to list the datasets for.
         """
         response = requests.get(
-            f"{API_HOST}/dataset-optimization/dataset/",
+            f"{API_HOST}/dataset-qa/dataset/",
             params={"dataset_organization_id": organization_id},
             headers=get_headers(),
             timeout=READ_TIMEOUT,
@@ -282,7 +303,7 @@ class OptimizationDataset(BaseModel):
         raise_for_status_with_reason(response)
         datasets = response.json()
         return [
-            DataOptimizationDatasetOut(
+            QADatasetOut(
                 **ds,
             )
             for ds in datasets
@@ -290,26 +311,28 @@ class OptimizationDataset(BaseModel):
     @staticmethod
     def list_runs(
-        organization_id: typing.Optional[int] = None,
-    ) -> list["DataOptimizationRunOut"]:
+        organization_id: int | None = None,
+        archived: bool | None = False,
+    ) -> list["DataQARunOut"]:
         """
-        Lists all the `OptimizationDataset` instances created by user's default organization
+        Lists all the `QADataset` instances created by user's default organization
         or the `organization_id` passed
-        Note: The return type is `list[dict]` and not `list[OptimizationDataset]`
+        Note: The return type is `list[dict]` and not `list[QADataset]`
         Args:
             organization_id: The ID of the organization to list the datasets for.
+            archived: Whether to list archived runs.
         """
         response = requests.get(
-            f"{API_HOST}/dataset-optimization/run/list",
-            params={"dataset_organization_id": organization_id},
+            f"{API_HOST}/dataset-qa/run/list",
+            params={"dataset_organization_id": organization_id, "archived": archived},
             headers=get_headers(),
             timeout=READ_TIMEOUT,
         )
         raise_for_status_with_reason(response)
         runs = response.json()
         return [
-            DataOptimizationRunOut(
+            DataQARunOut(
                 **run,
             )
             for run in runs
@@ -318,13 +341,13 @@ class OptimizationDataset(BaseModel):
     @staticmethod
     def delete_by_id(dataset_id: int) -> None:
         """
-        Deletes a `OptimizationDataset` instance from the server by its ID
+        Deletes a `QADataset` instance from the server by its ID
         Args:
-            dataset_id: The ID of the `OptimizationDataset` instance to delete
+            dataset_id: The ID of the `QADataset` instance to delete
         """
         response = requests.delete(
-            f"{API_HOST}/dataset-optimization/dataset/{dataset_id}",
+            f"{API_HOST}/dataset-qa/dataset/{dataset_id}",
             headers=get_headers(),
             timeout=MODIFY_TIMEOUT,
         )
@@ -333,14 +356,14 @@ class OptimizationDataset(BaseModel):
     def delete(self, storage_config=True) -> None:
         """
-        Deletes the active `OptimizationDataset` instance from the server.
-        It can only be used on a `OptimizationDataset` instance that has been created.
+        Deletes the active `QADataset` instance from the server.
+        It can only be used on a `QADataset` instance that has been created.
         Args:
-            storage_config: If True, the `OptimizationDataset`'s `StorageConfig` will also be deleted
+            storage_config: If True, the `QADataset`'s `StorageConfig` will also be deleted
         Note: If `storage_config` is not set to `False` then the `storage_config_id` must be set
-        This can either be set manually or by creating the `StorageConfig` instance via the `OptimizationDataset`'s
+        This can either be set manually or by creating the `StorageConfig` instance via the `QADataset`'s
         `create` method
         """
         if storage_config:
@@ -353,11 +376,11 @@ class OptimizationDataset(BaseModel):
     def create(
         self,
-        organization_id: typing.Optional[int] = None,
+        organization_id: int | None = None,
         replace_if_exists: bool = False,
     ) -> int:
         """
-        Create a `OptimizationDataset` instance on the server.
+        Create a `QADataset` instance on the server.
         If the `storage_config_id` field is not set, the storage config will also be created and the field will be set.
         Args:
@@ -366,7 +389,7 @@ class OptimizationDataset(BaseModel):
                 (this is determined by a dataset of the same name in the same organization).
         Returns:
-            The ID of the created `OptimizationDataset` instance
+            The ID of the created `QADataset` instance
         """
         if self.storage_config is None and self.storage_config_id is None:
             raise ValueError("No dataset storage has been provided")
@@ -391,7 +414,7 @@ class OptimizationDataset(BaseModel):
         model_dict = self.model_dump(mode="json")
         # ⬆️ Get dict of model fields from Pydantic model instance
         dataset_response = requests.post(
-            f"{API_HOST}/dataset-optimization/dataset/",
+            f"{API_HOST}/dataset-qa/dataset/",
             json={
                 **{k: model_dict[k] for k in model_dict.keys() - {"storage_config"}},
                 "organization_id": organization_id,
@@ -408,17 +431,17 @@ class OptimizationDataset(BaseModel):
         return self.id
     @staticmethod
-    def launch_optimization_run(
+    def launch_qa_run(
         dataset_id: int,
-        organization_id: typing.Optional[int] = None,
-        run_args: typing.Optional[RunArgs] = None,
+        organization_id: int | None = None,
+        run_args: RunArgs | None = None,
     ) -> str:
         """
-        Run the dataset optimization process on the server using the dataset with the given ID
+        Run the dataset QA process on the server using the dataset with the given ID
         i.e. `dataset_id`.
         Args:
-            dataset_id: The ID of the dataset to run optimization on.
+            dataset_id: The ID of the dataset to run QA on.
         Returns:
             ID of the run (`run_id`).
@@ -429,7 +452,7 @@ class OptimizationDataset(BaseModel):
         if run_args:
             run_info["run_args"] = run_args.model_dump(mode="json")
         run_response = requests.post(
-            f"{API_HOST}/dataset-optimization/run/{dataset_id}",
+            f"{API_HOST}/dataset-qa/run/{dataset_id}",
             json=run_info if len(run_info) > 0 else None,
             headers=get_headers(),
             timeout=MODIFY_TIMEOUT,
@@ -440,12 +463,16 @@ class OptimizationDataset(BaseModel):
     def _validate_run_args(self, run_args: RunArgs) -> None:
         if self.labeling_type == LabelingType.SPEECH_TO_TEXT:
             raise Exception("Speech to text cannot have `run_args` set")
-        if self.labeling_type != LabelingType.OBJECT_DETECTION and any(
-            (
-                run_args.min_abs_bbox_size != 0,
-                run_args.min_abs_bbox_area != 0,
-                run_args.min_rel_bbox_size != 0,
-                run_args.min_rel_bbox_area != 0,
+        if (
+            self.labeling_type != LabelingType.OBJECT_DETECTION
+            and isinstance(run_args, ObjectDetectionRunArgs)
+            and any(
+                (
+                    run_args.min_abs_bbox_size != 0,
+                    run_args.min_abs_bbox_area != 0,
+                    run_args.min_rel_bbox_size != 0,
+                    run_args.min_rel_bbox_area != 0,
+                )
             )
         ):
             raise Exception(
@@ -454,21 +481,21 @@ class OptimizationDataset(BaseModel):
                 + f"labeling type {self.labeling_type}"
             )
-    def run_optimization(
+    def run_qa(
         self,
-        organization_id: typing.Optional[int] = None,
+        organization_id: int | None = None,
         replace_dataset_if_exists: bool = False,
-        run_args: typing.Optional[RunArgs] = None,
+        run_args: RunArgs | None = None,
     ) -> str:
         """
         If the dataset was not created on the server yet, it is created.
-        Run the dataset optimization process on the server using the active `OptimizationDataset` instance
+        Run the dataset QA process on the server using the active `QADataset` instance
         Args:
-            organization_id: The ID of the organization to run the optimization for.
+            organization_id: The ID of the organization to run the QA for.
             replace_dataset_if_exists: If True, the dataset will be replaced if it already exists
                 (this is determined by a dataset of the same name in the same organization).
-            run_args: The run arguments to use for the optimization run
+            run_args: The run arguments to use for the QA run
         Returns:
             An ID of the run (`run_id`) and stores that `run_id` on the instance
@@ -478,7 +505,7 @@ class OptimizationDataset(BaseModel):
                 self.id = self.create(replace_if_exists=replace_dataset_if_exists)
             if run_args is not None:
                 self._validate_run_args(run_args)
-            run_id = self.launch_optimization_run(self.id, organization_id, run_args)
+            run_id = self.launch_qa_run(self.id, organization_id, run_args)
             self.run_id = run_id
             logger.info("Started the run with ID: %s", run_id)
             return run_id
@@ -509,83 +536,46 @@ class OptimizationDataset(BaseModel):
     @staticmethod
     def _check_run_by_id(run_id: str, retry=0) -> Generator[dict, None, None]:
-        if retry > MAX_RETRIES:
-            raise HirundoError("Max retries reached")
-        last_event = None
-        with httpx.Client(timeout=httpx.Timeout(None, connect=5.0)) as client:
-            for sse in iter_sse_retrying(
-                client,
-                "GET",
-                f"{API_HOST}/dataset-optimization/run/{run_id}",
-                headers=get_headers(),
-            ):
-                if sse.event == "ping":
-                    continue
-                logger.debug(
-                    "[SYNC] received event: %s with data: %s and ID: %s and retry: %s",
-                    sse.event,
-                    sse.data,
-                    sse.id,
-                    sse.retry,
-                )
-                last_event = json.loads(sse.data)
-                if not last_event:
-                    continue
-                if "data" in last_event:
-                    data = last_event["data"]
-                else:
-                    if "detail" in last_event:
-                        raise HirundoError(last_event["detail"])
-                    elif "reason" in last_event:
-                        raise HirundoError(last_event["reason"])
-                    else:
-                        raise HirundoError("Unknown error")
-                yield data
-        if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
-            OptimizationDataset._check_run_by_id(run_id, retry + 1)
-    @staticmethod
-    def _handle_failure(iteration: dict):
-        if iteration["result"]:
-            raise HirundoError(
-                f"Optimization run failed with error: {iteration['result']}"
-            )
-        else:
-            raise HirundoError(
-                "Optimization run failed with an unknown error in _handle_failure"
-            )
+        yield from iter_run_events(
+            f"{API_HOST}/dataset-qa/run/{run_id}",
+            headers=get_headers(),
+            retry=retry,
+            status_keys=("state",),
+            error_cls=HirundoError,
+            log=logger,
+        )
     @staticmethod
     @overload
     def check_run_by_id(
         run_id: str, stop_on_manual_approval: typing.Literal[True]
-    ) -> typing.Optional[DatasetOptimizationResults]: ...
+    ) -> DatasetQAResults | None: ...
     @staticmethod
     @overload
     def check_run_by_id(
         run_id: str, stop_on_manual_approval: typing.Literal[False] = False
-    ) -> DatasetOptimizationResults: ...
+    ) -> DatasetQAResults: ...
     @staticmethod
     @overload
     def check_run_by_id(
         run_id: str, stop_on_manual_approval: bool
-    ) -> typing.Optional[DatasetOptimizationResults]: ...
+    ) -> DatasetQAResults | None: ...
     @staticmethod
     def check_run_by_id(
         run_id: str, stop_on_manual_approval: bool = False
-    ) -> typing.Optional[DatasetOptimizationResults]:
+    ) -> DatasetQAResults | None:
         """
         Check the status of a run given its ID
         Args:
-            run_id: The `run_id` produced by a `run_optimization` call
+            run_id: The `run_id` produced by a `run_qa` call
             stop_on_manual_approval: If True, the function will return `None` if the run is awaiting manual approval
         Returns:
-            A DatasetOptimizationResults object with the results of the optimization run
+            A DatasetQAResults object with the results of the QA run
         Raises:
             HirundoError: If the maximum number of retries is reached or if the run fails
@@ -593,87 +583,67 @@ class OptimizationDataset(BaseModel):
         logger.debug("Checking run with ID: %s", run_id)
         with logging_redirect_tqdm():
             t = tqdm(total=100.0)
-            for iteration in OptimizationDataset._check_run_by_id(run_id):
-                if iteration["state"] in STATUS_TO_PROGRESS_MAP:
-                    t.set_description(STATUS_TO_TEXT_MAP[iteration["state"]])
-                    t.n = STATUS_TO_PROGRESS_MAP[iteration["state"]]
+            for iteration in QADataset._check_run_by_id(run_id):
+                state = get_state(iteration, ("state",))
+                if state in STATUS_TO_PROGRESS_MAP:
+                    t.set_description(STATUS_TO_TEXT_MAP[state])
+                    t.n = STATUS_TO_PROGRESS_MAP[state]
                     logger.debug("Setting progress to %s", t.n)
                     t.refresh()
-                    if iteration["state"] in [
+                    if state in [
                         RunStatus.FAILURE.value,
                         RunStatus.REJECTED.value,
                         RunStatus.REVOKED.value,
                     ]:
                         logger.error(
                             "State is failure, rejected, or revoked: %s",
-                            iteration["state"],
+                            state,
                         )
-                        OptimizationDataset._handle_failure(iteration)
-                    elif iteration["state"] == RunStatus.SUCCESS.value:
+                        handle_run_failure(
+                            iteration, error_cls=HirundoError, run_label="QA"
+                        )
+                    elif state == RunStatus.SUCCESS.value:
                         t.close()
                         zip_temporary_url = iteration["result"]
-                        logger.debug("Optimization run completed. Downloading results")
+                        logger.debug("QA run completed. Downloading results")
                         return download_and_extract_zip(
                             run_id,
                             zip_temporary_url,
                         )
                     elif (
-                        iteration["state"] == RunStatus.AWAITING_MANUAL_APPROVAL.value
+                        state == RunStatus.AWAITING_MANUAL_APPROVAL.value
                         and stop_on_manual_approval
                     ):
                         t.close()
                         return None
-                elif iteration["state"] is None:
-                    if (
-                        iteration["result"]
-                        and isinstance(iteration["result"], dict)
-                        and iteration["result"]["result"]
-                        and isinstance(iteration["result"]["result"], str)
-                    ):
-                        result_info = iteration["result"]["result"].split(":")
-                        if len(result_info) > 1:
-                            stage = result_info[0]
-                            current_progress_percentage = float(
-                                result_info[1].removeprefix(" ").removesuffix("% done")
-                            )
-                        elif len(result_info) == 1:
-                            stage = result_info[0]
-                            current_progress_percentage = t.n  # Keep the same progress
-                        else:
-                            stage = "Unknown progress state"
-                            current_progress_percentage = t.n  # Keep the same progress
-                        desc = (
-                            "Optimization run completed. Uploading results"
-                            if current_progress_percentage == 100.0
-                            else stage
-                        )
-                        t.set_description(desc)
-                        t.n = current_progress_percentage
-                        logger.debug("Setting progress to %s", t.n)
-                        t.refresh()
-        raise HirundoError(
-            "Optimization run failed with an unknown error in check_run_by_id"
-        )
+                elif state is None:
+                    update_progress_from_result(
+                        iteration,
+                        t,
+                        uploading_text="QA run completed. Uploading results",
+                        log=logger,
+                    )
+        raise HirundoError("QA run failed with an unknown error in check_run_by_id")
     @overload
     def check_run(
         self, stop_on_manual_approval: typing.Literal[True]
-    ) -> typing.Optional[DatasetOptimizationResults]: ...
+    ) -> DatasetQAResults | None: ...
     @overload
     def check_run(
         self, stop_on_manual_approval: typing.Literal[False] = False
-    ) -> DatasetOptimizationResults: ...
+    ) -> DatasetQAResults: ...
     def check_run(
         self, stop_on_manual_approval: bool = False
-    ) -> typing.Optional[DatasetOptimizationResults]:
+    ) -> DatasetQAResults | None:
         """
         Check the status of the current active instance's run.
         Returns:
-            A pandas DataFrame with the results of the optimization run
+            A pandas DataFrame with the results of the QA run
         """
         if not self.run_id:
@@ -690,7 +660,7 @@ class OptimizationDataset(BaseModel):
         This generator will produce values to show progress of the run.
         Args:
-            run_id: The `run_id` produced by a `run_optimization` call
+            run_id: The `run_id` produced by a `run_qa` call
             retry: A number used to track the number of retries to limit re-checks. *Do not* provide this value manually.
         Yields:
@@ -700,32 +670,15 @@ class OptimizationDataset(BaseModel):
         """
         logger.debug("Checking run with ID: %s", run_id)
-        if retry > MAX_RETRIES:
-            raise HirundoError("Max retries reached")
-        last_event = None
-        async with httpx.AsyncClient(
-            timeout=httpx.Timeout(None, connect=5.0)
-        ) as client:
-            async_iterator = await aiter_sse_retrying(
-                client,
-                "GET",
-                f"{API_HOST}/dataset-optimization/run/{run_id}",
-                headers=get_headers(),
-            )
-            async for sse in async_iterator:
-                if sse.event == "ping":
-                    continue
-                logger.debug(
-                    "[ASYNC] Received event: %s with data: %s and ID: %s and retry: %s",
-                    sse.event,
-                    sse.data,
-                    sse.id,
-                    sse.retry,
-                )
-                last_event = json.loads(sse.data)
-                yield last_event["data"]
-        if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
-            OptimizationDataset.acheck_run_by_id(run_id, retry + 1)
+        async for iteration in aiter_run_events(
+            f"{API_HOST}/dataset-qa/run/{run_id}",
+            headers=get_headers(),
+            retry=retry,
+            status_keys=("state",),
+            error_cls=HirundoError,
+            log=logger,
+        ):
+            yield iteration
     async def acheck_run(self) -> AsyncGenerator[dict, None]:
         """
@@ -735,6 +688,8 @@ class OptimizationDataset(BaseModel):
         This generator will produce values to show progress of the run.
+        Note: This function does not handle errors nor show progress. It is expected that you do that.
         Yields:
             Each event will be a dict, where:
             - `"state"` is PENDING, STARTED, RETRY, FAILURE or SUCCESS
@@ -749,14 +704,14 @@ class OptimizationDataset(BaseModel):
     @staticmethod
     def cancel_by_id(run_id: str) -> None:
         """
-        Cancel the dataset optimization run for the given `run_id`.
+        Cancel the dataset QA run for the given `run_id`.
         Args:
             run_id: The ID of the run to cancel
         """
         logger.info("Cancelling run with ID: %s", run_id)
         response = requests.delete(
-            f"{API_HOST}/dataset-optimization/run/{run_id}",
+            f"{API_HOST}/dataset-qa/run/{run_id}",
             headers=get_headers(),
             timeout=MODIFY_TIMEOUT,
         )
@@ -773,14 +728,14 @@ class OptimizationDataset(BaseModel):
     @staticmethod
     def archive_run_by_id(run_id: str) -> None:
         """
-        Archive the dataset optimization run for the given `run_id`.
+        Archive the dataset QA run for the given `run_id`.
         Args:
             run_id: The ID of the run to archive
         """
         logger.info("Archiving run with ID: %s", run_id)
         response = requests.patch(
-            f"{API_HOST}/dataset-optimization/run/archive/{run_id}",
+            f"{API_HOST}/dataset-qa/run/archive/{run_id}",
             headers=get_headers(),
             timeout=MODIFY_TIMEOUT,
         )
@@ -795,7 +750,7 @@ class OptimizationDataset(BaseModel):
         self.archive_run_by_id(self.run_id)
-class DataOptimizationDatasetOut(BaseModel):
+class QADatasetOut(BaseModel):
     id: int
     name: str
@@ -805,16 +760,16 @@ class DataOptimizationDatasetOut(BaseModel):
     data_root_url: HirundoUrl
-    classes: typing.Optional[list[str]] = None
-    labeling_info: typing.Union[LabelingInfo, list[LabelingInfo]]
+    classes: list[str] | None = None
+    labeling_info: LabelingInfo | list[LabelingInfo]
-    organization_id: typing.Optional[int]
-    creator_id: typing.Optional[int]
+    organization_id: int | None
+    creator_id: int | None
     created_at: datetime.datetime
     updated_at: datetime.datetime
-class DataOptimizationRunOut(BaseModel):
+class DataQARunOut(BaseModel):
     id: int
     name: str
     dataset_id: int
@@ -822,4 +777,6 @@ class DataOptimizationRunOut(BaseModel):
     status: RunStatus
     approved: bool
     created_at: datetime.datetime
-    run_args: typing.Optional[RunArgs]
+    run_args: RunArgs | None
+    deleted_at: datetime.datetime | None = None

hirundo 0.1.18__py3-none-any.whl → 0.2.3.post1__py3-none-any.whl

hirundo 0.1.18py3-none-any.whl → 0.2.3.post1py3-none-any.whl