PyPI - hirundo - Versions diffs - 0.1.16__py3-none-any.whl → 0.1.21__py3-none-any.whl - Mend

hirundo 0.1.16py3-none-any.whl → 0.1.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

hirundo/__init__.py +30 -14
hirundo/_constraints.py +164 -53
hirundo/_headers.py +1 -1
hirundo/_http.py +53 -0
hirundo/_iter_sse_retrying.py +1 -1
hirundo/_urls.py +59 -0
hirundo/cli.py +7 -7
hirundo/dataset_enum.py +23 -0
hirundo/{dataset_optimization.py → dataset_qa.py} +195 -168
hirundo/{dataset_optimization_results.py → dataset_qa_results.py} +4 -4
hirundo/git.py +2 -3
hirundo/labeling.py +140 -0
hirundo/storage.py +43 -60
hirundo/unzip.py +9 -10
{hirundo-0.1.16.dist-info → hirundo-0.1.21.dist-info}/METADATA +67 -53
hirundo-0.1.21.dist-info/RECORD +25 -0
{hirundo-0.1.16.dist-info → hirundo-0.1.21.dist-info}/WHEEL +1 -1
hirundo-0.1.16.dist-info/RECORD +0 -23
{hirundo-0.1.16.dist-info → hirundo-0.1.21.dist-info}/entry_points.txt +0 -0
{hirundo-0.1.16.dist-info → hirundo-0.1.21.dist-info}/licenses/LICENSE +0 -0
{hirundo-0.1.16.dist-info → hirundo-0.1.21.dist-info}/top_level.txt +0 -0

hirundo/__init__.py CHANGED Viewed

@@ -1,38 +1,54 @@
 from .dataset_enum import (
     DatasetMetadataType,
     LabelingType,
+    StorageTypes,
 )
-from .dataset_optimization import (
-    COCO,
-    YOLO,
-    HirundoCSV,
+from .dataset_qa import (
+    ClassificationRunArgs,
+    Domain,
     HirundoError,
-    OptimizationDataset,
+    ObjectDetectionRunArgs,
+    QADataset,
     RunArgs,
-    VisionRunArgs,
 )
-from .dataset_optimization_results import DatasetOptimizationResults
+from .dataset_qa_results import DatasetQAResults
 from .git import GitPlainAuth, GitRepo, GitSSHAuth
+from .labeling import (
+    COCO,
+    YOLO,
+    HirundoCSV,
+    KeylabsAuth,
+    KeylabsObjDetImages,
+    KeylabsObjDetVideo,
+    KeylabsObjSegImages,
+    KeylabsObjSegVideo,
+)
 from .storage import (
     StorageConfig,
     StorageGCP,
     # StorageAzure,  TODO: Azure storage is coming soon
     StorageGit,
     StorageS3,
-    StorageTypes,
 )
 from .unzip import load_df, load_from_zip
 __all__ = [
     "COCO",
     "YOLO",
-    "HirundoCSV",
     "HirundoError",
-    "OptimizationDataset",
+    "HirundoCSV",
+    "KeylabsAuth",
+    "KeylabsObjDetImages",
+    "KeylabsObjDetVideo",
+    "KeylabsObjSegImages",
+    "KeylabsObjSegVideo",
+    "QADataset",
+    "Domain",
     "RunArgs",
-    "VisionRunArgs",
-    "LabelingType",
+    "ClassificationRunArgs",
+    "ObjectDetectionRunArgs",
     "DatasetMetadataType",
+    "LabelingType",
     "GitPlainAuth",
     "GitRepo",
     "GitSSHAuth",
@@ -42,9 +58,9 @@ __all__ = [
     # "StorageAzure",  TODO: Azure storage is coming soon
     "StorageGit",
     "StorageConfig",
-    "DatasetOptimizationResults",
+    "DatasetQAResults",
     "load_df",
     "load_from_zip",
 ]
-__version__ = "0.1.16"
+__version__ = "0.1.21"

hirundo/_constraints.py CHANGED Viewed

@@ -1,53 +1,164 @@
-from typing import Annotated
-from pydantic import StringConstraints, UrlConstraints
-from pydantic_core import Url
-S3BucketUrl = Annotated[
-    str,
-    StringConstraints(
-        min_length=8,
-        max_length=1023,
-        pattern=r"s3?://[a-z0-9.-]{3,64}[/]?",  # Only allow real S3 bucket URLs
-    ),
-]
-StorageConfigName = Annotated[
-    str,
-    StringConstraints(
-        min_length=1,
-        max_length=255,
-        pattern=r"^[a-zA-Z0-9-_]+$",
-    ),
-]
-S3_MIN_LENGTH = 8
-S3_MAX_LENGTH = 1023
-S3_PATTERN = r"s3://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
-GCP_MIN_LENGTH = 8
-GCP_MAX_LENGTH = 1023
-GCP_PATTERN = r"gs://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
-RepoUrl = Annotated[
-    Url,
-    UrlConstraints(
-        allowed_schemes=[
-            "ssh",
-            "https",
-            "http",
-        ]
-    ),
-]
-HirundoUrl = Annotated[
-    Url,
-    UrlConstraints(
-        allowed_schemes=[
-            "file",
-            "https",
-            "http",
-            "s3",
-            "gs",
-            "ssh",
-        ]
-    ),
-]
+import re
+import typing
+from typing import TYPE_CHECKING
+from hirundo._urls import (
+    LENGTH_CONSTRAINTS,
+    STORAGE_PATTERNS,
+)
+from hirundo.dataset_enum import DatasetMetadataType, LabelingType, StorageTypes
+from hirundo.labeling import COCO, YOLO, HirundoCSV, Keylabs
+if TYPE_CHECKING:
+    from hirundo._urls import HirundoUrl
+    from hirundo.dataset_qa import LabelingInfo
+    from hirundo.storage import (
+        ResponseStorageConfig,
+        StorageConfig,
+        StorageGCP,
+        StorageGCPOut,
+        StorageS3,
+        StorageS3Out,
+    )
+LABELING_TYPES_TO_DATASET_METADATA_TYPES = {
+    LabelingType.SINGLE_LABEL_CLASSIFICATION: [
+        DatasetMetadataType.HIRUNDO_CSV,
+    ],
+    LabelingType.OBJECT_DETECTION: [
+        DatasetMetadataType.HIRUNDO_CSV,
+        DatasetMetadataType.COCO,
+        DatasetMetadataType.YOLO,
+        DatasetMetadataType.KeylabsObjDetImages,
+        DatasetMetadataType.KeylabsObjDetVideo,
+    ],
+    LabelingType.OBJECT_SEGMENTATION: [
+        DatasetMetadataType.HIRUNDO_CSV,
+        DatasetMetadataType.KeylabsObjSegImages,
+        DatasetMetadataType.KeylabsObjSegVideo,
+    ],
+    LabelingType.SEMANTIC_SEGMENTATION: [
+        DatasetMetadataType.HIRUNDO_CSV,
+    ],
+    LabelingType.PANOPTIC_SEGMENTATION: [
+        DatasetMetadataType.HIRUNDO_CSV,
+    ],
+    LabelingType.SPEECH_TO_TEXT: [
+        DatasetMetadataType.HIRUNDO_CSV,
+    ],
+}
+def validate_s3_url(str_url: str, s3_config: "StorageS3 | StorageS3Out"):
+    if (
+        len(str_url) < LENGTH_CONSTRAINTS[StorageTypes.S3]["min_length"]
+        or len(str_url) > LENGTH_CONSTRAINTS[StorageTypes.S3]["max_length"]
+    ):
+        raise ValueError("S3 URL must be between 8 and 1023 characters")
+    elif not re.match(STORAGE_PATTERNS[StorageTypes.S3], str_url):
+        raise ValueError(
+            f"Invalid S3 URL. Pattern must match: {STORAGE_PATTERNS[StorageTypes.S3]}"
+        )
+    elif not str_url.startswith(f"{s3_config.bucket_url}/"):
+        raise ValueError(f"S3 URL must start with {s3_config.bucket_url}/")
+def validate_gcp_url(str_url: str, gcp_config: "StorageGCP | StorageGCPOut"):
+    matches = re.match(STORAGE_PATTERNS[StorageTypes.GCP], str_url)
+    if (
+        len(str_url) < LENGTH_CONSTRAINTS[StorageTypes.GCP]["min_length"]
+        or len(str_url) > LENGTH_CONSTRAINTS[StorageTypes.GCP]["max_length"]
+    ):
+        raise ValueError(
+            f"GCP URL must be between {LENGTH_CONSTRAINTS[StorageTypes.GCP]['min_length']}"
+            + f" and {LENGTH_CONSTRAINTS[StorageTypes.GCP]['max_length']} characters"
+        )
+    elif not matches:
+        raise ValueError(
+            f"Invalid GCP URL. Pattern must match: {STORAGE_PATTERNS[StorageTypes.GCP]}"
+        )
+    elif (
+        matches
+        and len(matches.group(1))
+        > LENGTH_CONSTRAINTS[StorageTypes.GCP]["bucket_max_length"]
+    ):
+        raise ValueError(
+            f"GCP bucket name must be between {LENGTH_CONSTRAINTS[StorageTypes.GCP]['bucket_min_length']} "
+            + f"and {LENGTH_CONSTRAINTS[StorageTypes.GCP]['bucket_max_length']} characters"
+        )
+    elif not str_url.startswith(f"gs://{gcp_config.bucket_name}/"):
+        raise ValueError(f"GCP URL must start with gs://{gcp_config.bucket_name}")
+def validate_url(
+    url: "HirundoUrl",
+    storage_config: "StorageConfig | ResponseStorageConfig",
+) -> "HirundoUrl":
+    s3_config = storage_config.s3
+    gcp_config = storage_config.gcp
+    git_config = storage_config.git
+    str_url = str(url)
+    if s3_config is not None:
+        validate_s3_url(str_url, s3_config)
+    elif gcp_config is not None:
+        validate_gcp_url(str_url, gcp_config)
+    elif (
+        git_config is not None
+        and not str_url.startswith("https://")
+        and not str_url.startswith("ssh://")
+    ):
+        raise ValueError("Git URL must start with https:// or ssh://")
+    elif storage_config.type == StorageTypes.LOCAL and not str_url.startswith(
+        "file:///datasets/"
+    ):
+        raise ValueError("Local URL must start with file:///datasets/")
+    return url
+def validate_labeling_type(
+    labeling_type: "LabelingType", labeling_info: "LabelingInfo"
+) -> None:
+    """
+    Validate that the labeling type is compatible with the labeling info
+    Args:
+        labeling_type: The type of labeling that will be performed
+        labeling_info: The labeling info to validate
+    """
+    dataset_metadata_types = LABELING_TYPES_TO_DATASET_METADATA_TYPES[labeling_type]
+    if labeling_info.type not in dataset_metadata_types:
+        raise ValueError(
+            f"Cannot use {labeling_info.type.name} labeling info with {labeling_type.name} datasets"
+        )
+def validate_labeling_info(
+    labeling_type: "LabelingType",
+    labeling_info: "typing.Union[LabelingInfo, list[LabelingInfo]]",
+    storage_config: "typing.Union[StorageConfig, ResponseStorageConfig]",
+) -> None:
+    """
+    Validate the labeling info for a dataset
+    Args:
+        labeling_type: The type of labeling that will be performed
+        labeling_info: The labeling info to validate
+        storage_config: The storage configuration for the dataset.
+            StorageConfig is used to validate the URLs in the labeling info
+    """
+    if isinstance(labeling_info, list):
+        for labeling in labeling_info:
+            validate_labeling_info(labeling_type, labeling, storage_config)
+        return
+    elif isinstance(labeling_info, HirundoCSV):
+        validate_url(labeling_info.csv_url, storage_config)
+    elif isinstance(labeling_info, COCO):
+        validate_url(labeling_info.json_url, storage_config)
+    elif isinstance(labeling_info, YOLO):
+        validate_url(labeling_info.labels_dir_url, storage_config)
+        if labeling_info.data_yaml_url is not None:
+            validate_url(labeling_info.data_yaml_url, storage_config)
+    elif isinstance(labeling_info, Keylabs):
+        validate_url(labeling_info.labels_dir_url, storage_config)
+    validate_labeling_type(labeling_type, labeling_info)

hirundo/_headers.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from hirundo._env import API_KEY, check_api_key
-HIRUNDO_API_VERSION = "0.2"
+HIRUNDO_API_VERSION = "0.3"
 _json_headers = {
     "Content-Type": "application/json",

hirundo/_http.py CHANGED Viewed

@@ -1,4 +1,7 @@
+import requests as _requests
 from requests import Response
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
 import hirundo.logger
@@ -7,6 +10,56 @@ logger = hirundo.logger.get_logger(__name__)
 MINIMUM_CLIENT_SERVER_ERROR_CODE = 400
+def _build_retrying_session() -> _requests.Session:
+    # No more than 10 tries total (including the initial attempt)
+    # urllib3 Retry.total counts retries, not total attempts, so use 9 retries
+    retries = Retry(
+        total=9,
+        backoff_factor=1.0,
+        status_forcelist=(429,),
+        allowed_methods=("HEAD", "GET", "PUT", "POST", "PATCH", "DELETE", "OPTIONS"),
+        respect_retry_after_header=True,
+        raise_on_status=False,
+    )
+    adapter = HTTPAdapter(max_retries=retries)
+    session = _requests.Session()
+    session.mount("http://", adapter)
+    session.mount("https://", adapter)
+    return session
+_SESSION = _build_retrying_session()
+class _RequestsShim:
+    """Shim exposing a subset of the requests API but backed by a retrying Session."""
+    HTTPError = _requests.HTTPError
+    Response = _requests.Response
+    def request(self, method: str, url: str, **kwargs) -> Response:
+        return _SESSION.request(method=method, url=url, **kwargs)
+    def get(self, url: str, **kwargs) -> Response:
+        return _SESSION.get(url, **kwargs)
+    def post(self, url: str, **kwargs) -> Response:
+        return _SESSION.post(url, **kwargs)
+    def delete(self, url: str, **kwargs) -> Response:
+        return _SESSION.delete(url, **kwargs)
+    def patch(self, url: str, **kwargs) -> Response:
+        return _SESSION.patch(url, **kwargs)
+    def put(self, url: str, **kwargs) -> Response:
+        return _SESSION.put(url, **kwargs)
+# Public shim to be imported by modules instead of the raw requests package
+requests = _RequestsShim()
 def raise_for_status_with_reason(response: Response):
     try:
         if response.status_code >= MINIMUM_CLIENT_SERVER_ERROR_CODE:

hirundo/_iter_sse_retrying.py CHANGED Viewed

@@ -5,11 +5,11 @@ import uuid
 from collections.abc import AsyncGenerator, Generator
 import httpx
-import requests
 import urllib3
 from httpx_sse import ServerSentEvent, SSEError, aconnect_sse, connect_sse
 from stamina import retry
+from hirundo._http import requests
 from hirundo._timeouts import READ_TIMEOUT
 from hirundo.logger import get_logger

hirundo/_urls.py ADDED Viewed

@@ -0,0 +1,59 @@
+from typing import Annotated
+from pydantic import StringConstraints, UrlConstraints
+from pydantic_core import Url
+from hirundo.dataset_enum import StorageTypes
+S3BucketUrl = Annotated[
+    str,
+    StringConstraints(
+        min_length=8,
+        max_length=1023,
+        pattern=r"s3?://[a-z0-9.-]{3,64}[/]?",  # Only allow real S3 bucket URLs
+    ),
+]
+StorageConfigName = Annotated[
+    str,
+    StringConstraints(
+        min_length=1,
+        max_length=255,
+        pattern=r"^[a-zA-Z0-9-_]+$",
+    ),
+]
+STORAGE_PATTERNS: dict[StorageTypes, str] = {
+    StorageTypes.S3: r"^s3:\/\/[a-z0-9\.\-]{3,63}/[a-zA-Z0-9!\-\/_\.\*'\(\)]+$",
+    StorageTypes.GCP: r"^gs:\/\/([a-z0-9][a-z0-9_-]{1,61}[a-z0-9](\.[a-z0-9][a-z0-9_-]{1,61}[a-z0-9])*)\/[^\x00-\x1F\x7F-\x9F\r\n]*$",
+}
+LENGTH_CONSTRAINTS: dict[StorageTypes, dict] = {
+    StorageTypes.S3: {"min_length": 8, "max_length": 1023, "bucket_max_length": None},
+    StorageTypes.GCP: {"min_length": 8, "max_length": 1023, "bucket_max_length": 222},
+}
+RepoUrl = Annotated[
+    Url,
+    UrlConstraints(
+        allowed_schemes=[
+            "ssh",
+            "https",
+            "http",
+        ]
+    ),
+]
+HirundoUrl = Annotated[
+    Url,
+    UrlConstraints(
+        allowed_schemes=[
+            "file",
+            "https",
+            "http",
+            "s3",
+            "gs",
+            "ssh",
+        ]
+    ),
+]

hirundo/cli.py CHANGED Viewed

@@ -88,7 +88,7 @@ def setup_api_key(
     ],
 ):
     """
-    Setup the API key for the Hirundo client library.
+    Setup the API key for the Hirundo Python SDK.
     Values are saved to a .env file in the current directory for use by the library in requests.
     """
     saved_to = upsert_env("API_KEY", api_key)
@@ -115,7 +115,7 @@ def change_api_remote(
     ],
 ):
     """
-    Change the API server address for the Hirundo client library.
+    Change the API server address for the Hirundo Python SDK.
     This is the same address where you access the Hirundo web interface.
     """
     api_host = fix_api_host(api_host)
@@ -151,7 +151,7 @@ def setup(
     ],
 ):
     """
-    Setup the Hirundo client library.
+    Setup the Hirundo Python SDK.
     """
     api_host = fix_api_host(api_host)
     api_host_saved_to = upsert_env("API_HOST", api_host)
@@ -198,9 +198,9 @@ def check_run(
     """
     Check the status of a run.
     """
-    from hirundo.dataset_optimization import OptimizationDataset
+    from hirundo.dataset_qa import QADataset
-    results = OptimizationDataset.check_run_by_id(run_id)
+    results = QADataset.check_run_by_id(run_id)
     print(f"Run results saved to {results.cached_zip_path}")
@@ -209,9 +209,9 @@ def list_runs():
     """
     List all runs available.
     """
-    from hirundo.dataset_optimization import OptimizationDataset
+    from hirundo.dataset_qa import QADataset
-    runs = OptimizationDataset.list_runs()
+    runs = QADataset.list_runs()
     console = Console()
     table = Table(

hirundo/dataset_enum.py CHANGED Viewed

@@ -10,6 +10,9 @@ class LabelingType(str, Enum):
     SINGLE_LABEL_CLASSIFICATION = "SingleLabelClassification"
     OBJECT_DETECTION = "ObjectDetection"
     SPEECH_TO_TEXT = "SpeechToText"
+    OBJECT_SEGMENTATION = "ObjectSegmentation"
+    SEMANTIC_SEGMENTATION = "SemanticSegmentation"
+    PANOPTIC_SEGMENTATION = "PanopticSegmentation"
 class DatasetMetadataType(str, Enum):
@@ -21,3 +24,23 @@ class DatasetMetadataType(str, Enum):
     HIRUNDO_CSV = "HirundoCSV"
     COCO = "COCO"
     YOLO = "YOLO"
+    KeylabsObjDetImages = "KeylabsObjDetImages"
+    KeylabsObjDetVideo = "KeylabsObjDetVideo"
+    KeylabsObjSegImages = "KeylabsObjSegImages"
+    KeylabsObjSegVideo = "KeylabsObjSegVideo"
+class StorageTypes(str, Enum):
+    """
+    Enum for the different types of storage configs.
+    Supported types are:
+    """
+    S3 = "S3"
+    GCP = "GCP"
+    # AZURE = "Azure"  TODO: Azure storage config is coming soon
+    GIT = "Git"
+    LOCAL = "Local"
+    """
+    Local storage config is only supported for on-premises installations.
+    """

hirundo 0.1.16__py3-none-any.whl → 0.1.21__py3-none-any.whl

hirundo 0.1.16py3-none-any.whl → 0.1.21py3-none-any.whl