hirundo 0.1.8__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hirundo/__init__.py CHANGED
@@ -1,35 +1,50 @@
1
+ from .dataset_enum import (
2
+ DatasetMetadataType,
3
+ LabelingType,
4
+ )
1
5
  from .dataset_optimization import (
6
+ COCO,
7
+ YOLO,
8
+ HirundoCSV,
2
9
  HirundoError,
3
10
  OptimizationDataset,
11
+ RunArgs,
12
+ VisionRunArgs,
4
13
  )
5
- from .enum import (
6
- DatasetMetadataType,
7
- LabellingType,
8
- )
9
- from .git import GitRepo
14
+ from .dataset_optimization_results import DatasetOptimizationResults
15
+ from .git import GitPlainAuth, GitRepo, GitSSHAuth
10
16
  from .storage import (
17
+ StorageConfig,
11
18
  StorageGCP,
12
- # StorageAzure, TODO: Azure storage integration is coming soon
19
+ # StorageAzure, TODO: Azure storage is coming soon
13
20
  StorageGit,
14
- StorageIntegration,
15
- StorageLink,
16
21
  StorageS3,
17
22
  StorageTypes,
18
23
  )
24
+ from .unzip import load_df, load_from_zip
19
25
 
20
26
  __all__ = [
27
+ "COCO",
28
+ "YOLO",
29
+ "HirundoCSV",
21
30
  "HirundoError",
22
31
  "OptimizationDataset",
23
- "LabellingType",
32
+ "RunArgs",
33
+ "VisionRunArgs",
34
+ "LabelingType",
24
35
  "DatasetMetadataType",
36
+ "GitPlainAuth",
25
37
  "GitRepo",
26
- "StorageLink",
38
+ "GitSSHAuth",
27
39
  "StorageTypes",
28
40
  "StorageS3",
29
41
  "StorageGCP",
30
- # "StorageAzure", TODO: Azure storage integration is coming soon
42
+ # "StorageAzure", TODO: Azure storage is coming soon
31
43
  "StorageGit",
32
- "StorageIntegration",
44
+ "StorageConfig",
45
+ "DatasetOptimizationResults",
46
+ "load_df",
47
+ "load_from_zip",
33
48
  ]
34
49
 
35
- __version__ = "0.1.8"
50
+ __version__ = "0.1.16"
hirundo/_constraints.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from typing import Annotated
2
2
 
3
- from pydantic import StringConstraints
3
+ from pydantic import StringConstraints, UrlConstraints
4
+ from pydantic_core import Url
4
5
 
5
6
  S3BucketUrl = Annotated[
6
7
  str,
@@ -11,7 +12,7 @@ S3BucketUrl = Annotated[
11
12
  ),
12
13
  ]
13
14
 
14
- StorageIntegrationName = Annotated[
15
+ StorageConfigName = Annotated[
15
16
  str,
16
17
  StringConstraints(
17
18
  min_length=1,
@@ -19,3 +20,34 @@ StorageIntegrationName = Annotated[
19
20
  pattern=r"^[a-zA-Z0-9-_]+$",
20
21
  ),
21
22
  ]
23
+
24
+ S3_MIN_LENGTH = 8
25
+ S3_MAX_LENGTH = 1023
26
+ S3_PATTERN = r"s3://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
27
+ GCP_MIN_LENGTH = 8
28
+ GCP_MAX_LENGTH = 1023
29
+ GCP_PATTERN = r"gs://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
30
+
31
+ RepoUrl = Annotated[
32
+ Url,
33
+ UrlConstraints(
34
+ allowed_schemes=[
35
+ "ssh",
36
+ "https",
37
+ "http",
38
+ ]
39
+ ),
40
+ ]
41
+ HirundoUrl = Annotated[
42
+ Url,
43
+ UrlConstraints(
44
+ allowed_schemes=[
45
+ "file",
46
+ "https",
47
+ "http",
48
+ "s3",
49
+ "gs",
50
+ "ssh",
51
+ ]
52
+ ),
53
+ ]
hirundo/_dataframe.py ADDED
@@ -0,0 +1,43 @@
1
+ has_pandas = False
2
+ has_polars = False
3
+
4
+ pd = None
5
+ pl = None
6
+ int32 = type[None]
7
+ float32 = type[None]
8
+ string = type[None]
9
+ # ⬆️ These are just placeholders for the int32, float32 and string types
10
+ # for when neither pandas nor polars are available
11
+
12
+ try:
13
+ import numpy as np
14
+ import pandas as pd
15
+
16
+ has_pandas = True
17
+ int32 = np.int32
18
+ float32 = np.float32
19
+ string = str
20
+ except ImportError:
21
+ pass
22
+
23
+ try:
24
+ import polars as pl
25
+ import polars.datatypes as pl_datatypes
26
+
27
+ has_polars = True
28
+ int32 = pl_datatypes.Int32
29
+ float32 = pl_datatypes.Float32
30
+ string = pl_datatypes.String
31
+ except ImportError:
32
+ pass
33
+
34
+
35
+ __all__ = [
36
+ "has_polars",
37
+ "has_pandas",
38
+ "pd",
39
+ "pl",
40
+ "int32",
41
+ "float32",
42
+ "string",
43
+ ]
hirundo/_env.py CHANGED
@@ -2,11 +2,11 @@ import enum
2
2
  import os
3
3
  from pathlib import Path
4
4
 
5
- from dotenv import load_dotenv
5
+ from dotenv import find_dotenv, load_dotenv
6
6
 
7
7
 
8
8
  class EnvLocation(enum.Enum):
9
- DOTENV = Path.cwd() / ".env"
9
+ DOTENV = find_dotenv(".env")
10
10
  HOME = Path.home() / ".hirundo.conf"
11
11
 
12
12
 
hirundo/_headers.py CHANGED
@@ -1,13 +1,29 @@
1
1
  from hirundo._env import API_KEY, check_api_key
2
2
 
3
- json_headers = {
3
+ HIRUNDO_API_VERSION = "0.2"
4
+
5
+ _json_headers = {
4
6
  "Content-Type": "application/json",
5
7
  "Accept": "application/json",
6
8
  }
7
9
 
8
10
 
9
- def get_auth_headers():
11
+ def _get_auth_headers():
10
12
  check_api_key()
11
13
  return {
12
14
  "Authorization": f"Bearer {API_KEY}",
13
15
  }
16
+
17
+
18
+ def _get_api_version_header():
19
+ return {
20
+ "HIRUNDO-API-VERSION": HIRUNDO_API_VERSION,
21
+ }
22
+
23
+
24
+ def get_headers():
25
+ return {
26
+ **_json_headers,
27
+ **_get_auth_headers(),
28
+ **_get_api_version_header(),
29
+ }
hirundo/_http.py CHANGED
@@ -4,11 +4,16 @@ import hirundo.logger
4
4
 
5
5
  logger = hirundo.logger.get_logger(__name__)
6
6
 
7
+ MINIMUM_CLIENT_SERVER_ERROR_CODE = 400
8
+
7
9
 
8
10
  def raise_for_status_with_reason(response: Response):
9
11
  try:
10
- response.reason = response.json().get("reason", None)
12
+ if response.status_code >= MINIMUM_CLIENT_SERVER_ERROR_CODE:
13
+ response.reason = response.json().get("reason", None)
14
+ if response.reason is None:
15
+ response.reason = response.json().get("detail", None)
11
16
  except Exception as e:
12
- logger.debug("Failed to parse response as JSON: %s", e)
17
+ logger.debug("Could not parse response as JSON: %s", e)
13
18
 
14
19
  response.raise_for_status()
@@ -1,12 +1,20 @@
1
1
  import asyncio
2
2
  import time
3
3
  import typing
4
+ import uuid
4
5
  from collections.abc import AsyncGenerator, Generator
5
6
 
6
7
  import httpx
7
- from httpx_sse import ServerSentEvent, aconnect_sse, connect_sse
8
+ import requests
9
+ import urllib3
10
+ from httpx_sse import ServerSentEvent, SSEError, aconnect_sse, connect_sse
8
11
  from stamina import retry
9
12
 
13
+ from hirundo._timeouts import READ_TIMEOUT
14
+ from hirundo.logger import get_logger
15
+
16
+ logger = get_logger(__name__)
17
+
10
18
 
11
19
  # Credit: https://github.com/florimondmanca/httpx-sse/blob/master/README.md#handling-reconnections
12
20
  def iter_sse_retrying(
@@ -28,7 +36,13 @@ def iter_sse_retrying(
28
36
  # This may happen when the server is overloaded and closes the connection or
29
37
  # when Kubernetes restarts / replaces a pod.
30
38
  # Likewise, this will likely be temporary, hence the retries.
31
- @retry(on=(httpx.ReadError, httpx.RemoteProtocolError))
39
+ @retry(
40
+ on=(
41
+ httpx.ReadError,
42
+ httpx.RemoteProtocolError,
43
+ urllib3.exceptions.ReadTimeoutError,
44
+ )
45
+ )
32
46
  def _iter_sse():
33
47
  nonlocal last_event_id, reconnection_delay
34
48
 
@@ -44,13 +58,27 @@ def iter_sse_retrying(
44
58
  connect_headers["Last-Event-ID"] = last_event_id
45
59
 
46
60
  with connect_sse(client, method, url, headers=connect_headers) as event_source:
47
- for sse in event_source.iter_sse():
48
- last_event_id = sse.id
49
-
50
- if sse.retry is not None:
51
- reconnection_delay = sse.retry / 1000
52
-
53
- yield sse
61
+ try:
62
+ for sse in event_source.iter_sse():
63
+ last_event_id = sse.id
64
+
65
+ if sse.retry is not None:
66
+ reconnection_delay = sse.retry / 1000
67
+
68
+ yield sse
69
+ except SSEError:
70
+ logger.error("SSE error occurred. Trying regular request")
71
+ response = requests.get(
72
+ url,
73
+ headers=connect_headers,
74
+ timeout=READ_TIMEOUT,
75
+ )
76
+ yield ServerSentEvent(
77
+ event="",
78
+ data=response.text,
79
+ id=uuid.uuid4().hex,
80
+ retry=None,
81
+ )
54
82
 
55
83
  return _iter_sse()
56
84
 
@@ -72,7 +100,13 @@ async def aiter_sse_retrying(
72
100
  # This may happen when the server is overloaded and closes the connection or
73
101
  # when Kubernetes restarts / replaces a pod.
74
102
  # Likewise, this will likely be temporary, hence the retries.
75
- @retry(on=(httpx.ReadError, httpx.RemoteProtocolError))
103
+ @retry(
104
+ on=(
105
+ httpx.ReadError,
106
+ httpx.RemoteProtocolError,
107
+ urllib3.exceptions.ReadTimeoutError,
108
+ )
109
+ )
76
110
  async def _iter_sse() -> AsyncGenerator[ServerSentEvent, None]:
77
111
  nonlocal last_event_id, reconnection_delay
78
112
 
@@ -86,12 +120,22 @@ async def aiter_sse_retrying(
86
120
  async with aconnect_sse(
87
121
  client, method, url, headers=connect_headers
88
122
  ) as event_source:
89
- async for sse in event_source.aiter_sse():
90
- last_event_id = sse.id
91
-
92
- if sse.retry is not None:
93
- reconnection_delay = sse.retry / 1000
94
-
95
- yield sse
123
+ try:
124
+ async for sse in event_source.aiter_sse():
125
+ last_event_id = sse.id
126
+
127
+ if sse.retry is not None:
128
+ reconnection_delay = sse.retry / 1000
129
+
130
+ yield sse
131
+ except SSEError:
132
+ logger.error("SSE error occurred. Trying regular request")
133
+ response = await client.get(url, headers=connect_headers)
134
+ yield ServerSentEvent(
135
+ event="",
136
+ data=response.text,
137
+ id=uuid.uuid4().hex,
138
+ retry=None,
139
+ )
96
140
 
97
141
  return _iter_sse()
hirundo/_timeouts.py CHANGED
@@ -1,2 +1,3 @@
1
1
  READ_TIMEOUT = 30.0
2
2
  MODIFY_TIMEOUT = 60.0
3
+ DOWNLOAD_READ_TIMEOUT = 600.0 # 10 minutes
hirundo/cli.py CHANGED
@@ -7,6 +7,8 @@ from typing import Annotated
7
7
  from urllib.parse import urlparse
8
8
 
9
9
  import typer
10
+ from rich.console import Console
11
+ from rich.table import Table
10
12
 
11
13
  from hirundo._env import API_HOST, EnvLocation
12
14
 
@@ -189,6 +191,56 @@ def setup(
189
191
  )
190
192
 
191
193
 
194
+ @app.command("check-run", epilog=hirundo_epilog)
195
+ def check_run(
196
+ run_id: str,
197
+ ):
198
+ """
199
+ Check the status of a run.
200
+ """
201
+ from hirundo.dataset_optimization import OptimizationDataset
202
+
203
+ results = OptimizationDataset.check_run_by_id(run_id)
204
+ print(f"Run results saved to {results.cached_zip_path}")
205
+
206
+
207
+ @app.command("list-runs", epilog=hirundo_epilog)
208
+ def list_runs():
209
+ """
210
+ List all runs available.
211
+ """
212
+ from hirundo.dataset_optimization import OptimizationDataset
213
+
214
+ runs = OptimizationDataset.list_runs()
215
+
216
+ console = Console()
217
+ table = Table(
218
+ title="Runs:",
219
+ expand=True,
220
+ )
221
+ cols = (
222
+ "Dataset name",
223
+ "Run ID",
224
+ "Status",
225
+ "Created At",
226
+ "Run Args",
227
+ )
228
+ for col in cols:
229
+ table.add_column(
230
+ col,
231
+ overflow="fold",
232
+ )
233
+ for run in runs:
234
+ table.add_row(
235
+ str(run.name),
236
+ str(run.id),
237
+ str(run.status),
238
+ run.created_at.isoformat(),
239
+ run.run_args.model_dump_json() if run.run_args else None,
240
+ )
241
+ console.print(table)
242
+
243
+
192
244
  typer_click_object = typer.main.get_command(app)
193
245
 
194
246
  if __name__ == "__main__":
@@ -0,0 +1,23 @@
1
+ from enum import Enum
2
+
3
+
4
+ class LabelingType(str, Enum):
5
+ """
6
+ Enum indicate what type of labeling is used for the given dataset.
7
+ Supported types are:
8
+ """
9
+
10
+ SINGLE_LABEL_CLASSIFICATION = "SingleLabelClassification"
11
+ OBJECT_DETECTION = "ObjectDetection"
12
+ SPEECH_TO_TEXT = "SpeechToText"
13
+
14
+
15
+ class DatasetMetadataType(str, Enum):
16
+ """
17
+ Enum indicate what type of metadata is provided for the given dataset.
18
+ Supported types are:
19
+ """
20
+
21
+ HIRUNDO_CSV = "HirundoCSV"
22
+ COCO = "COCO"
23
+ YOLO = "YOLO"