hirundo 0.1.16__py3-none-any.whl → 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hirundo/__init__.py CHANGED
@@ -1,38 +1,54 @@
1
1
  from .dataset_enum import (
2
2
  DatasetMetadataType,
3
3
  LabelingType,
4
+ StorageTypes,
4
5
  )
5
- from .dataset_optimization import (
6
- COCO,
7
- YOLO,
8
- HirundoCSV,
6
+ from .dataset_qa import (
7
+ ClassificationRunArgs,
8
+ Domain,
9
9
  HirundoError,
10
- OptimizationDataset,
10
+ ObjectDetectionRunArgs,
11
+ QADataset,
11
12
  RunArgs,
12
- VisionRunArgs,
13
13
  )
14
- from .dataset_optimization_results import DatasetOptimizationResults
14
+ from .dataset_qa_results import DatasetQAResults
15
15
  from .git import GitPlainAuth, GitRepo, GitSSHAuth
16
+ from .labeling import (
17
+ COCO,
18
+ YOLO,
19
+ HirundoCSV,
20
+ KeylabsAuth,
21
+ KeylabsObjDetImages,
22
+ KeylabsObjDetVideo,
23
+ KeylabsObjSegImages,
24
+ KeylabsObjSegVideo,
25
+ )
16
26
  from .storage import (
17
27
  StorageConfig,
18
28
  StorageGCP,
19
29
  # StorageAzure, TODO: Azure storage is coming soon
20
30
  StorageGit,
21
31
  StorageS3,
22
- StorageTypes,
23
32
  )
24
33
  from .unzip import load_df, load_from_zip
25
34
 
26
35
  __all__ = [
27
36
  "COCO",
28
37
  "YOLO",
29
- "HirundoCSV",
30
38
  "HirundoError",
31
- "OptimizationDataset",
39
+ "HirundoCSV",
40
+ "KeylabsAuth",
41
+ "KeylabsObjDetImages",
42
+ "KeylabsObjDetVideo",
43
+ "KeylabsObjSegImages",
44
+ "KeylabsObjSegVideo",
45
+ "QADataset",
46
+ "Domain",
32
47
  "RunArgs",
33
- "VisionRunArgs",
34
- "LabelingType",
48
+ "ClassificationRunArgs",
49
+ "ObjectDetectionRunArgs",
35
50
  "DatasetMetadataType",
51
+ "LabelingType",
36
52
  "GitPlainAuth",
37
53
  "GitRepo",
38
54
  "GitSSHAuth",
@@ -42,9 +58,9 @@ __all__ = [
42
58
  # "StorageAzure", TODO: Azure storage is coming soon
43
59
  "StorageGit",
44
60
  "StorageConfig",
45
- "DatasetOptimizationResults",
61
+ "DatasetQAResults",
46
62
  "load_df",
47
63
  "load_from_zip",
48
64
  ]
49
65
 
50
- __version__ = "0.1.16"
66
+ __version__ = "0.1.21"
hirundo/_constraints.py CHANGED
@@ -1,53 +1,164 @@
1
- from typing import Annotated
2
-
3
- from pydantic import StringConstraints, UrlConstraints
4
- from pydantic_core import Url
5
-
6
- S3BucketUrl = Annotated[
7
- str,
8
- StringConstraints(
9
- min_length=8,
10
- max_length=1023,
11
- pattern=r"s3?://[a-z0-9.-]{3,64}[/]?", # Only allow real S3 bucket URLs
12
- ),
13
- ]
14
-
15
- StorageConfigName = Annotated[
16
- str,
17
- StringConstraints(
18
- min_length=1,
19
- max_length=255,
20
- pattern=r"^[a-zA-Z0-9-_]+$",
21
- ),
22
- ]
23
-
24
- S3_MIN_LENGTH = 8
25
- S3_MAX_LENGTH = 1023
26
- S3_PATTERN = r"s3://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
27
- GCP_MIN_LENGTH = 8
28
- GCP_MAX_LENGTH = 1023
29
- GCP_PATTERN = r"gs://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
30
-
31
- RepoUrl = Annotated[
32
- Url,
33
- UrlConstraints(
34
- allowed_schemes=[
35
- "ssh",
36
- "https",
37
- "http",
38
- ]
39
- ),
40
- ]
41
- HirundoUrl = Annotated[
42
- Url,
43
- UrlConstraints(
44
- allowed_schemes=[
45
- "file",
46
- "https",
47
- "http",
48
- "s3",
49
- "gs",
50
- "ssh",
51
- ]
52
- ),
53
- ]
1
+ import re
2
+ import typing
3
+ from typing import TYPE_CHECKING
4
+
5
+ from hirundo._urls import (
6
+ LENGTH_CONSTRAINTS,
7
+ STORAGE_PATTERNS,
8
+ )
9
+ from hirundo.dataset_enum import DatasetMetadataType, LabelingType, StorageTypes
10
+ from hirundo.labeling import COCO, YOLO, HirundoCSV, Keylabs
11
+
12
+ if TYPE_CHECKING:
13
+ from hirundo._urls import HirundoUrl
14
+ from hirundo.dataset_qa import LabelingInfo
15
+ from hirundo.storage import (
16
+ ResponseStorageConfig,
17
+ StorageConfig,
18
+ StorageGCP,
19
+ StorageGCPOut,
20
+ StorageS3,
21
+ StorageS3Out,
22
+ )
23
+
24
+ LABELING_TYPES_TO_DATASET_METADATA_TYPES = {
25
+ LabelingType.SINGLE_LABEL_CLASSIFICATION: [
26
+ DatasetMetadataType.HIRUNDO_CSV,
27
+ ],
28
+ LabelingType.OBJECT_DETECTION: [
29
+ DatasetMetadataType.HIRUNDO_CSV,
30
+ DatasetMetadataType.COCO,
31
+ DatasetMetadataType.YOLO,
32
+ DatasetMetadataType.KeylabsObjDetImages,
33
+ DatasetMetadataType.KeylabsObjDetVideo,
34
+ ],
35
+ LabelingType.OBJECT_SEGMENTATION: [
36
+ DatasetMetadataType.HIRUNDO_CSV,
37
+ DatasetMetadataType.KeylabsObjSegImages,
38
+ DatasetMetadataType.KeylabsObjSegVideo,
39
+ ],
40
+ LabelingType.SEMANTIC_SEGMENTATION: [
41
+ DatasetMetadataType.HIRUNDO_CSV,
42
+ ],
43
+ LabelingType.PANOPTIC_SEGMENTATION: [
44
+ DatasetMetadataType.HIRUNDO_CSV,
45
+ ],
46
+ LabelingType.SPEECH_TO_TEXT: [
47
+ DatasetMetadataType.HIRUNDO_CSV,
48
+ ],
49
+ }
50
+
51
+
52
+ def validate_s3_url(str_url: str, s3_config: "StorageS3 | StorageS3Out"):
53
+ if (
54
+ len(str_url) < LENGTH_CONSTRAINTS[StorageTypes.S3]["min_length"]
55
+ or len(str_url) > LENGTH_CONSTRAINTS[StorageTypes.S3]["max_length"]
56
+ ):
57
+ raise ValueError("S3 URL must be between 8 and 1023 characters")
58
+ elif not re.match(STORAGE_PATTERNS[StorageTypes.S3], str_url):
59
+ raise ValueError(
60
+ f"Invalid S3 URL. Pattern must match: {STORAGE_PATTERNS[StorageTypes.S3]}"
61
+ )
62
+ elif not str_url.startswith(f"{s3_config.bucket_url}/"):
63
+ raise ValueError(f"S3 URL must start with {s3_config.bucket_url}/")
64
+
65
+
66
+ def validate_gcp_url(str_url: str, gcp_config: "StorageGCP | StorageGCPOut"):
67
+ matches = re.match(STORAGE_PATTERNS[StorageTypes.GCP], str_url)
68
+ if (
69
+ len(str_url) < LENGTH_CONSTRAINTS[StorageTypes.GCP]["min_length"]
70
+ or len(str_url) > LENGTH_CONSTRAINTS[StorageTypes.GCP]["max_length"]
71
+ ):
72
+ raise ValueError(
73
+ f"GCP URL must be between {LENGTH_CONSTRAINTS[StorageTypes.GCP]['min_length']}"
74
+ + f" and {LENGTH_CONSTRAINTS[StorageTypes.GCP]['max_length']} characters"
75
+ )
76
+ elif not matches:
77
+ raise ValueError(
78
+ f"Invalid GCP URL. Pattern must match: {STORAGE_PATTERNS[StorageTypes.GCP]}"
79
+ )
80
+ elif (
81
+ matches
82
+ and len(matches.group(1))
83
+ > LENGTH_CONSTRAINTS[StorageTypes.GCP]["bucket_max_length"]
84
+ ):
85
+ raise ValueError(
86
+ f"GCP bucket name must be between {LENGTH_CONSTRAINTS[StorageTypes.GCP]['bucket_min_length']} "
87
+ + f"and {LENGTH_CONSTRAINTS[StorageTypes.GCP]['bucket_max_length']} characters"
88
+ )
89
+ elif not str_url.startswith(f"gs://{gcp_config.bucket_name}/"):
90
+ raise ValueError(f"GCP URL must start with gs://{gcp_config.bucket_name}")
91
+
92
+
93
+ def validate_url(
94
+ url: "HirundoUrl",
95
+ storage_config: "StorageConfig | ResponseStorageConfig",
96
+ ) -> "HirundoUrl":
97
+ s3_config = storage_config.s3
98
+ gcp_config = storage_config.gcp
99
+ git_config = storage_config.git
100
+ str_url = str(url)
101
+
102
+ if s3_config is not None:
103
+ validate_s3_url(str_url, s3_config)
104
+ elif gcp_config is not None:
105
+ validate_gcp_url(str_url, gcp_config)
106
+ elif (
107
+ git_config is not None
108
+ and not str_url.startswith("https://")
109
+ and not str_url.startswith("ssh://")
110
+ ):
111
+ raise ValueError("Git URL must start with https:// or ssh://")
112
+ elif storage_config.type == StorageTypes.LOCAL and not str_url.startswith(
113
+ "file:///datasets/"
114
+ ):
115
+ raise ValueError("Local URL must start with file:///datasets/")
116
+ return url
117
+
118
+
119
+ def validate_labeling_type(
120
+ labeling_type: "LabelingType", labeling_info: "LabelingInfo"
121
+ ) -> None:
122
+ """
123
+ Validate that the labeling type is compatible with the labeling info
124
+
125
+ Args:
126
+ labeling_type: The type of labeling that will be performed
127
+ labeling_info: The labeling info to validate
128
+ """
129
+ dataset_metadata_types = LABELING_TYPES_TO_DATASET_METADATA_TYPES[labeling_type]
130
+ if labeling_info.type not in dataset_metadata_types:
131
+ raise ValueError(
132
+ f"Cannot use {labeling_info.type.name} labeling info with {labeling_type.name} datasets"
133
+ )
134
+
135
+
136
+ def validate_labeling_info(
137
+ labeling_type: "LabelingType",
138
+ labeling_info: "typing.Union[LabelingInfo, list[LabelingInfo]]",
139
+ storage_config: "typing.Union[StorageConfig, ResponseStorageConfig]",
140
+ ) -> None:
141
+ """
142
+ Validate the labeling info for a dataset
143
+
144
+ Args:
145
+ labeling_type: The type of labeling that will be performed
146
+ labeling_info: The labeling info to validate
147
+ storage_config: The storage configuration for the dataset.
148
+ StorageConfig is used to validate the URLs in the labeling info
149
+ """
150
+ if isinstance(labeling_info, list):
151
+ for labeling in labeling_info:
152
+ validate_labeling_info(labeling_type, labeling, storage_config)
153
+ return
154
+ elif isinstance(labeling_info, HirundoCSV):
155
+ validate_url(labeling_info.csv_url, storage_config)
156
+ elif isinstance(labeling_info, COCO):
157
+ validate_url(labeling_info.json_url, storage_config)
158
+ elif isinstance(labeling_info, YOLO):
159
+ validate_url(labeling_info.labels_dir_url, storage_config)
160
+ if labeling_info.data_yaml_url is not None:
161
+ validate_url(labeling_info.data_yaml_url, storage_config)
162
+ elif isinstance(labeling_info, Keylabs):
163
+ validate_url(labeling_info.labels_dir_url, storage_config)
164
+ validate_labeling_type(labeling_type, labeling_info)
hirundo/_headers.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from hirundo._env import API_KEY, check_api_key
2
2
 
3
- HIRUNDO_API_VERSION = "0.2"
3
+ HIRUNDO_API_VERSION = "0.3"
4
4
 
5
5
  _json_headers = {
6
6
  "Content-Type": "application/json",
hirundo/_http.py CHANGED
@@ -1,4 +1,7 @@
1
+ import requests as _requests
1
2
  from requests import Response
3
+ from requests.adapters import HTTPAdapter
4
+ from urllib3.util.retry import Retry
2
5
 
3
6
  import hirundo.logger
4
7
 
@@ -7,6 +10,56 @@ logger = hirundo.logger.get_logger(__name__)
7
10
  MINIMUM_CLIENT_SERVER_ERROR_CODE = 400
8
11
 
9
12
 
13
+ def _build_retrying_session() -> _requests.Session:
14
+ # No more than 10 tries total (including the initial attempt)
15
+ # urllib3 Retry.total counts retries, not total attempts, so use 9 retries
16
+ retries = Retry(
17
+ total=9,
18
+ backoff_factor=1.0,
19
+ status_forcelist=(429,),
20
+ allowed_methods=("HEAD", "GET", "PUT", "POST", "PATCH", "DELETE", "OPTIONS"),
21
+ respect_retry_after_header=True,
22
+ raise_on_status=False,
23
+ )
24
+ adapter = HTTPAdapter(max_retries=retries)
25
+ session = _requests.Session()
26
+ session.mount("http://", adapter)
27
+ session.mount("https://", adapter)
28
+ return session
29
+
30
+
31
+ _SESSION = _build_retrying_session()
32
+
33
+
34
+ class _RequestsShim:
35
+ """Shim exposing a subset of the requests API but backed by a retrying Session."""
36
+
37
+ HTTPError = _requests.HTTPError
38
+ Response = _requests.Response
39
+
40
+ def request(self, method: str, url: str, **kwargs) -> Response:
41
+ return _SESSION.request(method=method, url=url, **kwargs)
42
+
43
+ def get(self, url: str, **kwargs) -> Response:
44
+ return _SESSION.get(url, **kwargs)
45
+
46
+ def post(self, url: str, **kwargs) -> Response:
47
+ return _SESSION.post(url, **kwargs)
48
+
49
+ def delete(self, url: str, **kwargs) -> Response:
50
+ return _SESSION.delete(url, **kwargs)
51
+
52
+ def patch(self, url: str, **kwargs) -> Response:
53
+ return _SESSION.patch(url, **kwargs)
54
+
55
+ def put(self, url: str, **kwargs) -> Response:
56
+ return _SESSION.put(url, **kwargs)
57
+
58
+
59
+ # Public shim to be imported by modules instead of the raw requests package
60
+ requests = _RequestsShim()
61
+
62
+
10
63
  def raise_for_status_with_reason(response: Response):
11
64
  try:
12
65
  if response.status_code >= MINIMUM_CLIENT_SERVER_ERROR_CODE:
@@ -5,11 +5,11 @@ import uuid
5
5
  from collections.abc import AsyncGenerator, Generator
6
6
 
7
7
  import httpx
8
- import requests
9
8
  import urllib3
10
9
  from httpx_sse import ServerSentEvent, SSEError, aconnect_sse, connect_sse
11
10
  from stamina import retry
12
11
 
12
+ from hirundo._http import requests
13
13
  from hirundo._timeouts import READ_TIMEOUT
14
14
  from hirundo.logger import get_logger
15
15
 
hirundo/_urls.py ADDED
@@ -0,0 +1,59 @@
1
+ from typing import Annotated
2
+
3
+ from pydantic import StringConstraints, UrlConstraints
4
+ from pydantic_core import Url
5
+
6
+ from hirundo.dataset_enum import StorageTypes
7
+
8
+ S3BucketUrl = Annotated[
9
+ str,
10
+ StringConstraints(
11
+ min_length=8,
12
+ max_length=1023,
13
+ pattern=r"s3?://[a-z0-9.-]{3,64}[/]?", # Only allow real S3 bucket URLs
14
+ ),
15
+ ]
16
+
17
+ StorageConfigName = Annotated[
18
+ str,
19
+ StringConstraints(
20
+ min_length=1,
21
+ max_length=255,
22
+ pattern=r"^[a-zA-Z0-9-_]+$",
23
+ ),
24
+ ]
25
+
26
+ STORAGE_PATTERNS: dict[StorageTypes, str] = {
27
+ StorageTypes.S3: r"^s3:\/\/[a-z0-9\.\-]{3,63}/[a-zA-Z0-9!\-\/_\.\*'\(\)]+$",
28
+ StorageTypes.GCP: r"^gs:\/\/([a-z0-9][a-z0-9_-]{1,61}[a-z0-9](\.[a-z0-9][a-z0-9_-]{1,61}[a-z0-9])*)\/[^\x00-\x1F\x7F-\x9F\r\n]*$",
29
+ }
30
+
31
+
32
+ LENGTH_CONSTRAINTS: dict[StorageTypes, dict] = {
33
+ StorageTypes.S3: {"min_length": 8, "max_length": 1023, "bucket_max_length": None},
34
+ StorageTypes.GCP: {"min_length": 8, "max_length": 1023, "bucket_max_length": 222},
35
+ }
36
+
37
+ RepoUrl = Annotated[
38
+ Url,
39
+ UrlConstraints(
40
+ allowed_schemes=[
41
+ "ssh",
42
+ "https",
43
+ "http",
44
+ ]
45
+ ),
46
+ ]
47
+ HirundoUrl = Annotated[
48
+ Url,
49
+ UrlConstraints(
50
+ allowed_schemes=[
51
+ "file",
52
+ "https",
53
+ "http",
54
+ "s3",
55
+ "gs",
56
+ "ssh",
57
+ ]
58
+ ),
59
+ ]
hirundo/cli.py CHANGED
@@ -88,7 +88,7 @@ def setup_api_key(
88
88
  ],
89
89
  ):
90
90
  """
91
- Setup the API key for the Hirundo client library.
91
+ Setup the API key for the Hirundo Python SDK.
92
92
  Values are saved to a .env file in the current directory for use by the library in requests.
93
93
  """
94
94
  saved_to = upsert_env("API_KEY", api_key)
@@ -115,7 +115,7 @@ def change_api_remote(
115
115
  ],
116
116
  ):
117
117
  """
118
- Change the API server address for the Hirundo client library.
118
+ Change the API server address for the Hirundo Python SDK.
119
119
  This is the same address where you access the Hirundo web interface.
120
120
  """
121
121
  api_host = fix_api_host(api_host)
@@ -151,7 +151,7 @@ def setup(
151
151
  ],
152
152
  ):
153
153
  """
154
- Setup the Hirundo client library.
154
+ Setup the Hirundo Python SDK.
155
155
  """
156
156
  api_host = fix_api_host(api_host)
157
157
  api_host_saved_to = upsert_env("API_HOST", api_host)
@@ -198,9 +198,9 @@ def check_run(
198
198
  """
199
199
  Check the status of a run.
200
200
  """
201
- from hirundo.dataset_optimization import OptimizationDataset
201
+ from hirundo.dataset_qa import QADataset
202
202
 
203
- results = OptimizationDataset.check_run_by_id(run_id)
203
+ results = QADataset.check_run_by_id(run_id)
204
204
  print(f"Run results saved to {results.cached_zip_path}")
205
205
 
206
206
 
@@ -209,9 +209,9 @@ def list_runs():
209
209
  """
210
210
  List all runs available.
211
211
  """
212
- from hirundo.dataset_optimization import OptimizationDataset
212
+ from hirundo.dataset_qa import QADataset
213
213
 
214
- runs = OptimizationDataset.list_runs()
214
+ runs = QADataset.list_runs()
215
215
 
216
216
  console = Console()
217
217
  table = Table(
hirundo/dataset_enum.py CHANGED
@@ -10,6 +10,9 @@ class LabelingType(str, Enum):
10
10
  SINGLE_LABEL_CLASSIFICATION = "SingleLabelClassification"
11
11
  OBJECT_DETECTION = "ObjectDetection"
12
12
  SPEECH_TO_TEXT = "SpeechToText"
13
+ OBJECT_SEGMENTATION = "ObjectSegmentation"
14
+ SEMANTIC_SEGMENTATION = "SemanticSegmentation"
15
+ PANOPTIC_SEGMENTATION = "PanopticSegmentation"
13
16
 
14
17
 
15
18
  class DatasetMetadataType(str, Enum):
@@ -21,3 +24,23 @@ class DatasetMetadataType(str, Enum):
21
24
  HIRUNDO_CSV = "HirundoCSV"
22
25
  COCO = "COCO"
23
26
  YOLO = "YOLO"
27
+ KeylabsObjDetImages = "KeylabsObjDetImages"
28
+ KeylabsObjDetVideo = "KeylabsObjDetVideo"
29
+ KeylabsObjSegImages = "KeylabsObjSegImages"
30
+ KeylabsObjSegVideo = "KeylabsObjSegVideo"
31
+
32
+
33
+ class StorageTypes(str, Enum):
34
+ """
35
+ Enum for the different types of storage configs.
36
+ Supported types are:
37
+ """
38
+
39
+ S3 = "S3"
40
+ GCP = "GCP"
41
+ # AZURE = "Azure" TODO: Azure storage config is coming soon
42
+ GIT = "Git"
43
+ LOCAL = "Local"
44
+ """
45
+ Local storage config is only supported for on-premises installations.
46
+ """