hirundo 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hirundo/__init__.py +1 -1
- hirundo/_env.py +12 -1
- hirundo/_http.py +14 -0
- hirundo/_iter_sse_retrying.py +2 -2
- hirundo/cli.py +75 -16
- hirundo/dataset_optimization.py +111 -57
- hirundo/git.py +11 -9
- hirundo/logger.py +3 -1
- hirundo/storage.py +17 -17
- hirundo-0.1.8.dist-info/METADATA +176 -0
- hirundo-0.1.8.dist-info/RECORD +20 -0
- {hirundo-0.1.7.dist-info → hirundo-0.1.8.dist-info}/WHEEL +1 -1
- hirundo-0.1.7.dist-info/METADATA +0 -118
- hirundo-0.1.7.dist-info/RECORD +0 -19
- {hirundo-0.1.7.dist-info → hirundo-0.1.8.dist-info}/LICENSE +0 -0
- {hirundo-0.1.7.dist-info → hirundo-0.1.8.dist-info}/entry_points.txt +0 -0
- {hirundo-0.1.7.dist-info → hirundo-0.1.8.dist-info}/top_level.txt +0 -0
hirundo/__init__.py
CHANGED
hirundo/_env.py
CHANGED
|
@@ -1,8 +1,19 @@
|
|
|
1
|
+
import enum
|
|
1
2
|
import os
|
|
3
|
+
from pathlib import Path
|
|
2
4
|
|
|
3
5
|
from dotenv import load_dotenv
|
|
4
6
|
|
|
5
|
-
|
|
7
|
+
|
|
8
|
+
class EnvLocation(enum.Enum):
|
|
9
|
+
DOTENV = Path.cwd() / ".env"
|
|
10
|
+
HOME = Path.home() / ".hirundo.conf"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
if os.path.exists(EnvLocation.DOTENV.value):
|
|
14
|
+
load_dotenv(EnvLocation.DOTENV.value)
|
|
15
|
+
elif os.path.exists(EnvLocation.HOME.value):
|
|
16
|
+
load_dotenv(EnvLocation.HOME.value)
|
|
6
17
|
|
|
7
18
|
API_HOST = os.getenv("API_HOST", "https://api.hirundo.io")
|
|
8
19
|
API_KEY = os.getenv("API_KEY")
|
hirundo/_http.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from requests import Response
|
|
2
|
+
|
|
3
|
+
import hirundo.logger
|
|
4
|
+
|
|
5
|
+
logger = hirundo.logger.get_logger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def raise_for_status_with_reason(response: Response):
|
|
9
|
+
try:
|
|
10
|
+
response.reason = response.json().get("reason", None)
|
|
11
|
+
except Exception as e:
|
|
12
|
+
logger.debug("Failed to parse response as JSON: %s", e)
|
|
13
|
+
|
|
14
|
+
response.raise_for_status()
|
hirundo/_iter_sse_retrying.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import time
|
|
3
|
+
import typing
|
|
3
4
|
from collections.abc import AsyncGenerator, Generator
|
|
4
|
-
from typing import Union
|
|
5
5
|
|
|
6
6
|
import httpx
|
|
7
7
|
from httpx_sse import ServerSentEvent, aconnect_sse, connect_sse
|
|
@@ -13,7 +13,7 @@ def iter_sse_retrying(
|
|
|
13
13
|
client: httpx.Client,
|
|
14
14
|
method: str,
|
|
15
15
|
url: str,
|
|
16
|
-
headers:
|
|
16
|
+
headers: typing.Optional[dict[str, str]] = None,
|
|
17
17
|
) -> Generator[ServerSentEvent, None, None]:
|
|
18
18
|
if headers is None:
|
|
19
19
|
headers = {}
|
hirundo/cli.py
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
|
+
import os
|
|
1
2
|
import re
|
|
2
3
|
import sys
|
|
4
|
+
import typing
|
|
5
|
+
from pathlib import Path
|
|
3
6
|
from typing import Annotated
|
|
4
7
|
from urllib.parse import urlparse
|
|
5
8
|
|
|
6
9
|
import typer
|
|
7
10
|
|
|
8
|
-
from hirundo._env import API_HOST
|
|
11
|
+
from hirundo._env import API_HOST, EnvLocation
|
|
9
12
|
|
|
10
13
|
docs = "sphinx" in sys.modules
|
|
11
14
|
hirundo_epilog = (
|
|
@@ -23,7 +26,9 @@ app = typer.Typer(
|
|
|
23
26
|
)
|
|
24
27
|
|
|
25
28
|
|
|
26
|
-
def
|
|
29
|
+
def _upsert_env(
|
|
30
|
+
dotenv_filepath: typing.Union[str, Path], var_name: str, var_value: str
|
|
31
|
+
):
|
|
27
32
|
"""
|
|
28
33
|
Change an environment variable in the .env file.
|
|
29
34
|
If the variable does not exist, it will be added.
|
|
@@ -32,18 +37,30 @@ def upsert_env(var_name: str, var_value: str):
|
|
|
32
37
|
var_name: The name of the environment variable to change.
|
|
33
38
|
var_value: The new value of the environment variable.
|
|
34
39
|
"""
|
|
35
|
-
dotenv = "./.env"
|
|
36
40
|
regex = re.compile(rf"^{var_name}=.*$")
|
|
37
|
-
|
|
38
|
-
|
|
41
|
+
lines = []
|
|
42
|
+
if os.path.exists(dotenv_filepath):
|
|
43
|
+
with open(dotenv_filepath) as f:
|
|
44
|
+
lines = f.readlines()
|
|
39
45
|
|
|
40
|
-
with open(
|
|
46
|
+
with open(dotenv_filepath, "w") as f:
|
|
41
47
|
f.writelines(line for line in lines if not regex.search(line) and line != "\n")
|
|
42
48
|
|
|
43
|
-
with open(
|
|
49
|
+
with open(dotenv_filepath, "a") as f:
|
|
44
50
|
f.writelines(f"\n{var_name}={var_value}")
|
|
45
51
|
|
|
46
52
|
|
|
53
|
+
def upsert_env(var_name: str, var_value: str):
|
|
54
|
+
if os.path.exists(EnvLocation.DOTENV.value):
|
|
55
|
+
# If a `.env` file exists, re-use it
|
|
56
|
+
_upsert_env(EnvLocation.DOTENV.value, var_name, var_value)
|
|
57
|
+
return EnvLocation.DOTENV.name
|
|
58
|
+
else:
|
|
59
|
+
# Create a `.hirundo.conf` file with environment variables in the home directory
|
|
60
|
+
_upsert_env(EnvLocation.HOME.value, var_name, var_value)
|
|
61
|
+
return EnvLocation.HOME.name
|
|
62
|
+
|
|
63
|
+
|
|
47
64
|
def fix_api_host(api_host: str):
|
|
48
65
|
if not api_host.startswith("http") and not api_host.startswith("https"):
|
|
49
66
|
api_host = f"https://{api_host}"
|
|
@@ -72,8 +89,15 @@ def setup_api_key(
|
|
|
72
89
|
Setup the API key for the Hirundo client library.
|
|
73
90
|
Values are saved to a .env file in the current directory for use by the library in requests.
|
|
74
91
|
"""
|
|
75
|
-
upsert_env("API_KEY", api_key)
|
|
76
|
-
|
|
92
|
+
saved_to = upsert_env("API_KEY", api_key)
|
|
93
|
+
if saved_to == EnvLocation.HOME.name:
|
|
94
|
+
print(
|
|
95
|
+
"API key saved to ~/.hirundo.conf for future use. Please do not share the ~/.hirundo.conf file since it contains your secret API key."
|
|
96
|
+
)
|
|
97
|
+
elif saved_to == EnvLocation.DOTENV.name:
|
|
98
|
+
print(
|
|
99
|
+
"API key saved to local .env file for future use. Please do not share the .env file since it contains your secret API key."
|
|
100
|
+
)
|
|
77
101
|
|
|
78
102
|
|
|
79
103
|
@app.command("change-remote", epilog=hirundo_epilog)
|
|
@@ -94,8 +118,13 @@ def change_api_remote(
|
|
|
94
118
|
"""
|
|
95
119
|
api_host = fix_api_host(api_host)
|
|
96
120
|
|
|
97
|
-
upsert_env("API_HOST", api_host)
|
|
98
|
-
|
|
121
|
+
saved_to = upsert_env("API_HOST", api_host)
|
|
122
|
+
if saved_to == EnvLocation.HOME.name:
|
|
123
|
+
print(
|
|
124
|
+
"API host saved to ~/.hirundo.conf for future use. Please do not share the ~/.hirundo.conf file"
|
|
125
|
+
)
|
|
126
|
+
elif saved_to == EnvLocation.DOTENV.name:
|
|
127
|
+
print("API host saved to .env for future use. Please do not share this file")
|
|
99
128
|
|
|
100
129
|
|
|
101
130
|
@app.command("setup", epilog=hirundo_epilog)
|
|
@@ -123,11 +152,41 @@ def setup(
|
|
|
123
152
|
Setup the Hirundo client library.
|
|
124
153
|
"""
|
|
125
154
|
api_host = fix_api_host(api_host)
|
|
126
|
-
upsert_env("API_HOST", api_host)
|
|
127
|
-
upsert_env("API_KEY", api_key)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
155
|
+
api_host_saved_to = upsert_env("API_HOST", api_host)
|
|
156
|
+
api_key_saved_to = upsert_env("API_KEY", api_key)
|
|
157
|
+
if api_host_saved_to != api_key_saved_to:
|
|
158
|
+
print(
|
|
159
|
+
"API host and API key saved to different locations. This should not happen. Please report this issue."
|
|
160
|
+
)
|
|
161
|
+
if (
|
|
162
|
+
api_host_saved_to == EnvLocation.HOME.name
|
|
163
|
+
and api_key_saved_to == EnvLocation.DOTENV.name
|
|
164
|
+
):
|
|
165
|
+
print(
|
|
166
|
+
"API host saved to ~/.hirundo.conf for future use. Please do not share the ~/.hirundo.conf file"
|
|
167
|
+
)
|
|
168
|
+
print(
|
|
169
|
+
"API key saved to local .env file for future use. Please do not share the .env file since it contains your secret API key."
|
|
170
|
+
)
|
|
171
|
+
elif (
|
|
172
|
+
api_host_saved_to == EnvLocation.DOTENV.name
|
|
173
|
+
and api_key_saved_to == EnvLocation.HOME.name
|
|
174
|
+
):
|
|
175
|
+
print(
|
|
176
|
+
"API host saved to .env for future use. Please do not share this file"
|
|
177
|
+
)
|
|
178
|
+
print(
|
|
179
|
+
"API key saved to ~/.hirundo.conf for future use. Please do not share the ~/.hirundo.conf file since it contains your secret API key."
|
|
180
|
+
)
|
|
181
|
+
return
|
|
182
|
+
if api_host_saved_to == EnvLocation.HOME.name:
|
|
183
|
+
print(
|
|
184
|
+
"API host and API key saved to ~/.hirundo.conf for future use. Please do not share the ~/.hirundo.conf file since it contains your secret API key."
|
|
185
|
+
)
|
|
186
|
+
elif api_host_saved_to == EnvLocation.DOTENV.name:
|
|
187
|
+
print(
|
|
188
|
+
"API host and API key saved to .env for future use. Please do not share this file since it contains your secret API key."
|
|
189
|
+
)
|
|
131
190
|
|
|
132
191
|
|
|
133
192
|
typer_click_object = typer.main.get_command(app)
|
hirundo/dataset_optimization.py
CHANGED
|
@@ -3,17 +3,20 @@ import typing
|
|
|
3
3
|
from collections.abc import AsyncGenerator, Generator
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from io import StringIO
|
|
6
|
-
from typing import
|
|
6
|
+
from typing import overload
|
|
7
7
|
|
|
8
8
|
import httpx
|
|
9
|
+
import numpy as np
|
|
9
10
|
import pandas as pd
|
|
10
11
|
import requests
|
|
12
|
+
from pandas._typing import DtypeArg
|
|
11
13
|
from pydantic import BaseModel, Field, model_validator
|
|
12
14
|
from tqdm import tqdm
|
|
13
15
|
from tqdm.contrib.logging import logging_redirect_tqdm
|
|
14
16
|
|
|
15
17
|
from hirundo._env import API_HOST
|
|
16
18
|
from hirundo._headers import get_auth_headers, json_headers
|
|
19
|
+
from hirundo._http import raise_for_status_with_reason
|
|
17
20
|
from hirundo._iter_sse_retrying import aiter_sse_retrying, iter_sse_retrying
|
|
18
21
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
19
22
|
from hirundo.enum import DatasetMetadataType, LabellingType
|
|
@@ -40,6 +43,58 @@ class RunStatus(Enum):
|
|
|
40
43
|
SUCCESS = "SUCCESS"
|
|
41
44
|
FAILURE = "FAILURE"
|
|
42
45
|
AWAITING_MANUAL_APPROVAL = "AWAITING MANUAL APPROVAL"
|
|
46
|
+
RETRYING = "RETRYING"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
STATUS_TO_TEXT_MAP = {
|
|
50
|
+
RunStatus.STARTED.value: "Optimization run in progress. Downloading dataset",
|
|
51
|
+
RunStatus.PENDING.value: "Optimization run queued and not yet started",
|
|
52
|
+
RunStatus.SUCCESS.value: "Optimization run completed successfully",
|
|
53
|
+
RunStatus.FAILURE.value: "Optimization run failed",
|
|
54
|
+
RunStatus.AWAITING_MANUAL_APPROVAL.value: "Awaiting manual approval",
|
|
55
|
+
RunStatus.RETRYING.value: "Optimization run failed. Retrying",
|
|
56
|
+
}
|
|
57
|
+
STATUS_TO_PROGRESS_MAP = {
|
|
58
|
+
RunStatus.STARTED.value: 0.0,
|
|
59
|
+
RunStatus.PENDING.value: 0.0,
|
|
60
|
+
RunStatus.SUCCESS.value: 100.0,
|
|
61
|
+
RunStatus.FAILURE.value: 100.0,
|
|
62
|
+
RunStatus.AWAITING_MANUAL_APPROVAL.value: 100.0,
|
|
63
|
+
RunStatus.RETRYING.value: 0.0,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class DatasetOptimizationResults(BaseModel):
|
|
68
|
+
model_config = {"arbitrary_types_allowed": True}
|
|
69
|
+
|
|
70
|
+
suspects: pd.DataFrame
|
|
71
|
+
"""
|
|
72
|
+
A pandas DataFrame containing the results of the optimization run
|
|
73
|
+
"""
|
|
74
|
+
warnings_and_errors: pd.DataFrame
|
|
75
|
+
"""
|
|
76
|
+
A pandas DataFrame containing the warnings and errors of the optimization run
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
CUSTOMER_INTERCHANGE_DTYPES: DtypeArg = {
|
|
81
|
+
"image_path": str,
|
|
82
|
+
"label_path": str,
|
|
83
|
+
"segments_mask_path": str,
|
|
84
|
+
"segment_id": np.int32,
|
|
85
|
+
"label": str,
|
|
86
|
+
"bbox_id": str,
|
|
87
|
+
"xmin": np.int32,
|
|
88
|
+
"ymin": np.int32,
|
|
89
|
+
"xmax": np.int32,
|
|
90
|
+
"ymax": np.int32,
|
|
91
|
+
"suspect_level": np.float32, # If exists, must be one of the values in the enum below
|
|
92
|
+
"suggested_label": str,
|
|
93
|
+
"suggested_label_conf": np.float32,
|
|
94
|
+
"status": str,
|
|
95
|
+
# ⬆️ If exists, must be one of the following:
|
|
96
|
+
# NO_LABELS/MISSING_IMAGE/INVALID_IMAGE/INVALID_BBOX/INVALID_BBOX_SIZE/INVALID_SEG/INVALID_SEG_SIZE
|
|
97
|
+
}
|
|
43
98
|
|
|
44
99
|
|
|
45
100
|
class OptimizationDataset(BaseModel):
|
|
@@ -54,13 +109,13 @@ class OptimizationDataset(BaseModel):
|
|
|
54
109
|
- `LabellingType.SingleLabelClassification`: Indicates that the dataset is for classification tasks
|
|
55
110
|
- `LabellingType.ObjectDetection`: Indicates that the dataset is for object detection tasks
|
|
56
111
|
"""
|
|
57
|
-
dataset_storage:
|
|
112
|
+
dataset_storage: typing.Optional[StorageLink]
|
|
58
113
|
"""
|
|
59
114
|
The storage link to the dataset. This can be a link to a file or a directory containing the dataset.
|
|
60
115
|
If `None`, the `dataset_id` field must be set.
|
|
61
116
|
"""
|
|
62
117
|
|
|
63
|
-
classes: list[str]
|
|
118
|
+
classes: typing.Optional[list[str]] = None
|
|
64
119
|
"""
|
|
65
120
|
A full list of possible classes used in classification / object detection.
|
|
66
121
|
It is currently required for clarity and performance.
|
|
@@ -78,15 +133,15 @@ class OptimizationDataset(BaseModel):
|
|
|
78
133
|
Currently no other formats are supported. Future versions of `hirundo` may support additional formats.
|
|
79
134
|
"""
|
|
80
135
|
|
|
81
|
-
storage_integration_id:
|
|
136
|
+
storage_integration_id: typing.Optional[int] = Field(default=None, init=False)
|
|
82
137
|
"""
|
|
83
138
|
The ID of the storage integration used to store the dataset and metadata.
|
|
84
139
|
"""
|
|
85
|
-
dataset_id:
|
|
140
|
+
dataset_id: typing.Optional[int] = Field(default=None, init=False)
|
|
86
141
|
"""
|
|
87
142
|
The ID of the dataset created on the server.
|
|
88
143
|
"""
|
|
89
|
-
run_id:
|
|
144
|
+
run_id: typing.Optional[str] = Field(default=None, init=False)
|
|
90
145
|
"""
|
|
91
146
|
The ID of the Dataset Optimization run created on the server.
|
|
92
147
|
"""
|
|
@@ -98,7 +153,7 @@ class OptimizationDataset(BaseModel):
|
|
|
98
153
|
return self
|
|
99
154
|
|
|
100
155
|
@staticmethod
|
|
101
|
-
def list(organization_id:
|
|
156
|
+
def list(organization_id: typing.Optional[int] = None) -> list[dict]:
|
|
102
157
|
"""
|
|
103
158
|
Lists all the `OptimizationDataset` instances created by user's default organization
|
|
104
159
|
or the `organization_id` passed
|
|
@@ -113,7 +168,7 @@ class OptimizationDataset(BaseModel):
|
|
|
113
168
|
headers=get_auth_headers(),
|
|
114
169
|
timeout=READ_TIMEOUT,
|
|
115
170
|
)
|
|
116
|
-
response
|
|
171
|
+
raise_for_status_with_reason(response)
|
|
117
172
|
return response.json()
|
|
118
173
|
|
|
119
174
|
@staticmethod
|
|
@@ -129,7 +184,7 @@ class OptimizationDataset(BaseModel):
|
|
|
129
184
|
headers=get_auth_headers(),
|
|
130
185
|
timeout=MODIFY_TIMEOUT,
|
|
131
186
|
)
|
|
132
|
-
response
|
|
187
|
+
raise_for_status_with_reason(response)
|
|
133
188
|
logger.info("Deleted dataset with ID: %s", dataset_id)
|
|
134
189
|
|
|
135
190
|
def delete(self, storage_integration=True) -> None:
|
|
@@ -184,7 +239,7 @@ class OptimizationDataset(BaseModel):
|
|
|
184
239
|
},
|
|
185
240
|
timeout=MODIFY_TIMEOUT,
|
|
186
241
|
)
|
|
187
|
-
dataset_response
|
|
242
|
+
raise_for_status_with_reason(dataset_response)
|
|
188
243
|
self.dataset_id = dataset_response.json()["id"]
|
|
189
244
|
if not self.dataset_id:
|
|
190
245
|
raise HirundoError("Failed to create the dataset")
|
|
@@ -208,7 +263,7 @@ class OptimizationDataset(BaseModel):
|
|
|
208
263
|
headers=get_auth_headers(),
|
|
209
264
|
timeout=MODIFY_TIMEOUT,
|
|
210
265
|
)
|
|
211
|
-
run_response
|
|
266
|
+
raise_for_status_with_reason(run_response)
|
|
212
267
|
return run_response.json()["run_id"]
|
|
213
268
|
|
|
214
269
|
def run_optimization(self) -> str:
|
|
@@ -274,10 +329,19 @@ class OptimizationDataset(BaseModel):
|
|
|
274
329
|
return df
|
|
275
330
|
|
|
276
331
|
@staticmethod
|
|
277
|
-
def
|
|
332
|
+
def _read_csvs_to_df(data: dict):
|
|
278
333
|
if data["state"] == RunStatus.SUCCESS.value:
|
|
279
|
-
data["result"] = OptimizationDataset._clean_df_index(
|
|
280
|
-
pd.read_csv(
|
|
334
|
+
data["result"]["suspects"] = OptimizationDataset._clean_df_index(
|
|
335
|
+
pd.read_csv(
|
|
336
|
+
StringIO(data["result"]["suspects"]),
|
|
337
|
+
dtype=CUSTOMER_INTERCHANGE_DTYPES,
|
|
338
|
+
)
|
|
339
|
+
)
|
|
340
|
+
data["result"]["warnings_and_errors"] = OptimizationDataset._clean_df_index(
|
|
341
|
+
pd.read_csv(
|
|
342
|
+
StringIO(data["result"]["warnings_and_errors"]),
|
|
343
|
+
dtype=CUSTOMER_INTERCHANGE_DTYPES,
|
|
344
|
+
)
|
|
281
345
|
)
|
|
282
346
|
else:
|
|
283
347
|
pass
|
|
@@ -307,7 +371,7 @@ class OptimizationDataset(BaseModel):
|
|
|
307
371
|
if not last_event:
|
|
308
372
|
continue
|
|
309
373
|
data = last_event["data"]
|
|
310
|
-
OptimizationDataset.
|
|
374
|
+
OptimizationDataset._read_csvs_to_df(data)
|
|
311
375
|
yield data
|
|
312
376
|
if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
|
|
313
377
|
OptimizationDataset._check_run_by_id(run_id, retry + 1)
|
|
@@ -316,27 +380,24 @@ class OptimizationDataset(BaseModel):
|
|
|
316
380
|
@overload
|
|
317
381
|
def check_run_by_id(
|
|
318
382
|
run_id: str, stop_on_manual_approval: typing.Literal[True]
|
|
319
|
-
) -> typing.Optional[
|
|
320
|
-
...
|
|
383
|
+
) -> typing.Optional[DatasetOptimizationResults]: ...
|
|
321
384
|
|
|
322
385
|
@staticmethod
|
|
323
386
|
@overload
|
|
324
387
|
def check_run_by_id(
|
|
325
388
|
run_id: str, stop_on_manual_approval: typing.Literal[False] = False
|
|
326
|
-
) ->
|
|
327
|
-
...
|
|
389
|
+
) -> DatasetOptimizationResults: ...
|
|
328
390
|
|
|
329
391
|
@staticmethod
|
|
330
392
|
@overload
|
|
331
393
|
def check_run_by_id(
|
|
332
394
|
run_id: str, stop_on_manual_approval: bool
|
|
333
|
-
) -> typing.Optional[
|
|
334
|
-
...
|
|
395
|
+
) -> typing.Optional[DatasetOptimizationResults]: ...
|
|
335
396
|
|
|
336
397
|
@staticmethod
|
|
337
398
|
def check_run_by_id(
|
|
338
399
|
run_id: str, stop_on_manual_approval: bool = False
|
|
339
|
-
) -> typing.Optional[
|
|
400
|
+
) -> typing.Optional[DatasetOptimizationResults]:
|
|
340
401
|
"""
|
|
341
402
|
Check the status of a run given its ID
|
|
342
403
|
|
|
@@ -345,7 +406,7 @@ class OptimizationDataset(BaseModel):
|
|
|
345
406
|
stop_on_manual_approval: If True, the function will return `None` if the run is awaiting manual approval
|
|
346
407
|
|
|
347
408
|
Returns:
|
|
348
|
-
A
|
|
409
|
+
A DatasetOptimizationResults object with the results of the optimization run
|
|
349
410
|
|
|
350
411
|
Raises:
|
|
351
412
|
HirundoError: If the maximum number of retries is reached or if the run fails
|
|
@@ -354,22 +415,29 @@ class OptimizationDataset(BaseModel):
|
|
|
354
415
|
with logging_redirect_tqdm():
|
|
355
416
|
t = tqdm(total=100.0)
|
|
356
417
|
for iteration in OptimizationDataset._check_run_by_id(run_id):
|
|
357
|
-
if iteration["state"]
|
|
358
|
-
t.set_description("
|
|
359
|
-
t.n =
|
|
360
|
-
t.
|
|
361
|
-
t.close()
|
|
362
|
-
return iteration["result"]
|
|
363
|
-
elif iteration["state"] == RunStatus.PENDING.value:
|
|
364
|
-
t.set_description("Optimization run queued and not yet started")
|
|
365
|
-
t.n = 0.0
|
|
366
|
-
t.refresh()
|
|
367
|
-
elif iteration["state"] == RunStatus.STARTED.value:
|
|
368
|
-
t.set_description(
|
|
369
|
-
"Optimization run in progress. Downloading dataset"
|
|
370
|
-
)
|
|
371
|
-
t.n = 0.0
|
|
418
|
+
if iteration["state"] in STATUS_TO_PROGRESS_MAP:
|
|
419
|
+
t.set_description(STATUS_TO_TEXT_MAP[iteration["state"]])
|
|
420
|
+
t.n = STATUS_TO_PROGRESS_MAP[iteration["state"]]
|
|
421
|
+
logger.debug("Setting progress to %s", t.n)
|
|
372
422
|
t.refresh()
|
|
423
|
+
if iteration["state"] == RunStatus.FAILURE.value:
|
|
424
|
+
raise HirundoError(
|
|
425
|
+
f"Optimization run failed with error: {iteration['result']}"
|
|
426
|
+
)
|
|
427
|
+
elif iteration["state"] == RunStatus.SUCCESS.value:
|
|
428
|
+
t.close()
|
|
429
|
+
return DatasetOptimizationResults(
|
|
430
|
+
suspects=iteration["result"]["suspects"],
|
|
431
|
+
warnings_and_errors=iteration["result"][
|
|
432
|
+
"warnings_and_errors"
|
|
433
|
+
],
|
|
434
|
+
)
|
|
435
|
+
elif (
|
|
436
|
+
iteration["state"] == RunStatus.AWAITING_MANUAL_APPROVAL.value
|
|
437
|
+
and stop_on_manual_approval
|
|
438
|
+
):
|
|
439
|
+
t.close()
|
|
440
|
+
return None
|
|
373
441
|
elif iteration["state"] is None:
|
|
374
442
|
if (
|
|
375
443
|
iteration["result"]
|
|
@@ -387,37 +455,23 @@ class OptimizationDataset(BaseModel):
|
|
|
387
455
|
)
|
|
388
456
|
t.set_description(desc)
|
|
389
457
|
t.n = current_progress_percentage
|
|
458
|
+
logger.debug("Setting progress to %s", t.n)
|
|
390
459
|
t.refresh()
|
|
391
|
-
elif iteration["state"] == RunStatus.AWAITING_MANUAL_APPROVAL.value:
|
|
392
|
-
t.set_description("Awaiting manual approval")
|
|
393
|
-
t.n = 100.0
|
|
394
|
-
t.refresh()
|
|
395
|
-
if stop_on_manual_approval:
|
|
396
|
-
t.close()
|
|
397
|
-
return None
|
|
398
|
-
elif iteration["state"] == RunStatus.FAILURE.value:
|
|
399
|
-
t.set_description("Optimization run failed")
|
|
400
|
-
t.close()
|
|
401
|
-
raise HirundoError(
|
|
402
|
-
f"Optimization run failed with error: {iteration['result']}"
|
|
403
|
-
)
|
|
404
460
|
raise HirundoError("Optimization run failed with an unknown error")
|
|
405
461
|
|
|
406
462
|
@overload
|
|
407
463
|
def check_run(
|
|
408
464
|
self, stop_on_manual_approval: typing.Literal[True]
|
|
409
|
-
) -> typing.
|
|
410
|
-
...
|
|
465
|
+
) -> typing.Optional[DatasetOptimizationResults]: ...
|
|
411
466
|
|
|
412
467
|
@overload
|
|
413
468
|
def check_run(
|
|
414
469
|
self, stop_on_manual_approval: typing.Literal[False] = False
|
|
415
|
-
) ->
|
|
416
|
-
...
|
|
470
|
+
) -> DatasetOptimizationResults: ...
|
|
417
471
|
|
|
418
472
|
def check_run(
|
|
419
473
|
self, stop_on_manual_approval: bool = False
|
|
420
|
-
) -> typing.
|
|
474
|
+
) -> typing.Optional[DatasetOptimizationResults]:
|
|
421
475
|
"""
|
|
422
476
|
Check the status of the current active instance's run.
|
|
423
477
|
|
|
@@ -511,7 +565,7 @@ class OptimizationDataset(BaseModel):
|
|
|
511
565
|
headers=get_auth_headers(),
|
|
512
566
|
timeout=MODIFY_TIMEOUT,
|
|
513
567
|
)
|
|
514
|
-
response
|
|
568
|
+
raise_for_status_with_reason(response)
|
|
515
569
|
|
|
516
570
|
def cancel(self) -> None:
|
|
517
571
|
"""
|
hirundo/git.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
|
-
|
|
2
|
+
import typing
|
|
3
|
+
from typing import Annotated
|
|
3
4
|
|
|
4
5
|
import pydantic
|
|
5
6
|
import requests
|
|
@@ -8,6 +9,7 @@ from pydantic_core import Url
|
|
|
8
9
|
|
|
9
10
|
from hirundo._env import API_HOST
|
|
10
11
|
from hirundo._headers import get_auth_headers, json_headers
|
|
12
|
+
from hirundo._http import raise_for_status_with_reason
|
|
11
13
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
12
14
|
from hirundo.logger import get_logger
|
|
13
15
|
|
|
@@ -30,14 +32,14 @@ class GitSSHAuthBase(BaseModel):
|
|
|
30
32
|
"""
|
|
31
33
|
The SSH key for the Git repository
|
|
32
34
|
"""
|
|
33
|
-
ssh_password:
|
|
35
|
+
ssh_password: typing.Optional[str]
|
|
34
36
|
"""
|
|
35
37
|
The password for the SSH key for the Git repository.
|
|
36
38
|
"""
|
|
37
39
|
|
|
38
40
|
|
|
39
41
|
class GitRepo(BaseModel):
|
|
40
|
-
id:
|
|
42
|
+
id: typing.Optional[int] = None
|
|
41
43
|
"""
|
|
42
44
|
The ID of the Git repository.
|
|
43
45
|
"""
|
|
@@ -51,20 +53,20 @@ class GitRepo(BaseModel):
|
|
|
51
53
|
The URL of the Git repository, it should start with `ssh://` or `https://` or be in the form `user@host:path`.
|
|
52
54
|
If it is in the form `user@host:path`, it will be rewritten to `ssh://user@host:path`.
|
|
53
55
|
"""
|
|
54
|
-
organization_id:
|
|
56
|
+
organization_id: typing.Optional[int] = None
|
|
55
57
|
"""
|
|
56
58
|
The ID of the organization that the Git repository belongs to.
|
|
57
59
|
If not provided, it will be assigned to your default organization.
|
|
58
60
|
"""
|
|
59
61
|
|
|
60
|
-
plain_auth:
|
|
62
|
+
plain_auth: typing.Optional[GitPlainAuthBase] = pydantic.Field(
|
|
61
63
|
default=None, examples=[None, {"username": "ben", "password": "password"}]
|
|
62
64
|
)
|
|
63
65
|
"""
|
|
64
66
|
The plain authentication details for the Git repository.
|
|
65
67
|
Use this if using a special user with a username and password for authentication.
|
|
66
68
|
"""
|
|
67
|
-
ssh_auth:
|
|
69
|
+
ssh_auth: typing.Optional[GitSSHAuthBase] = pydantic.Field(
|
|
68
70
|
default=None,
|
|
69
71
|
examples=[
|
|
70
72
|
{
|
|
@@ -112,7 +114,7 @@ class GitRepo(BaseModel):
|
|
|
112
114
|
},
|
|
113
115
|
timeout=MODIFY_TIMEOUT,
|
|
114
116
|
)
|
|
115
|
-
git_repo
|
|
117
|
+
raise_for_status_with_reason(git_repo)
|
|
116
118
|
git_repo_id = git_repo.json()["id"]
|
|
117
119
|
self.id = git_repo_id
|
|
118
120
|
return git_repo_id
|
|
@@ -129,7 +131,7 @@ class GitRepo(BaseModel):
|
|
|
129
131
|
},
|
|
130
132
|
timeout=READ_TIMEOUT,
|
|
131
133
|
)
|
|
132
|
-
git_repos
|
|
134
|
+
raise_for_status_with_reason(git_repos)
|
|
133
135
|
return git_repos.json()
|
|
134
136
|
|
|
135
137
|
@staticmethod
|
|
@@ -147,7 +149,7 @@ class GitRepo(BaseModel):
|
|
|
147
149
|
},
|
|
148
150
|
timeout=MODIFY_TIMEOUT,
|
|
149
151
|
)
|
|
150
|
-
git_repo
|
|
152
|
+
raise_for_status_with_reason(git_repo)
|
|
151
153
|
|
|
152
154
|
def delete(self):
|
|
153
155
|
"""
|
hirundo/logger.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import os
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
def get_logger(name: str) -> logging.Logger:
|
|
5
6
|
logger = logging.getLogger(name)
|
|
6
|
-
|
|
7
|
+
log_level = os.getenv("LOG_LEVEL")
|
|
8
|
+
logger.setLevel(log_level if log_level else logging.INFO)
|
|
7
9
|
logger.addHandler(logging.StreamHandler())
|
|
8
10
|
return logger
|
hirundo/storage.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import typing
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import Union
|
|
4
3
|
|
|
5
4
|
import pydantic
|
|
6
5
|
import requests
|
|
@@ -10,6 +9,7 @@ from pydantic_core import Url
|
|
|
10
9
|
from hirundo._constraints import S3BucketUrl, StorageIntegrationName
|
|
11
10
|
from hirundo._env import API_HOST
|
|
12
11
|
from hirundo._headers import get_auth_headers, json_headers
|
|
12
|
+
from hirundo._http import raise_for_status_with_reason
|
|
13
13
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
14
14
|
from hirundo.git import GitRepo
|
|
15
15
|
from hirundo.logger import get_logger
|
|
@@ -18,18 +18,18 @@ logger = get_logger(__name__)
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class StorageS3(BaseModel):
|
|
21
|
-
endpoint_url:
|
|
21
|
+
endpoint_url: typing.Optional[Url] = None
|
|
22
22
|
bucket_url: S3BucketUrl
|
|
23
23
|
region_name: str
|
|
24
24
|
# ⬆️ We could restrict this, but if we're allowing custom endpoints then the validation may be wrong
|
|
25
|
-
access_key_id:
|
|
26
|
-
secret_access_key:
|
|
25
|
+
access_key_id: typing.Optional[str] = None
|
|
26
|
+
secret_access_key: typing.Optional[str] = None
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class StorageGCP(BaseModel):
|
|
30
30
|
bucket_name: str
|
|
31
31
|
project: str
|
|
32
|
-
credentials_json:
|
|
32
|
+
credentials_json: typing.Optional[dict] = None
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
# TODO: Azure storage integration is coming soon
|
|
@@ -40,12 +40,12 @@ class StorageGCP(BaseModel):
|
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
class StorageGit(BaseModel):
|
|
43
|
-
repo_id:
|
|
43
|
+
repo_id: typing.Optional[int] = None
|
|
44
44
|
"""
|
|
45
45
|
The ID of the Git repository in the Hirundo system.
|
|
46
46
|
Either `repo_id` or `repo` must be provided.
|
|
47
47
|
"""
|
|
48
|
-
repo:
|
|
48
|
+
repo: typing.Optional[GitRepo] = None
|
|
49
49
|
"""
|
|
50
50
|
The Git repository to link to.
|
|
51
51
|
Either `repo_id` or `repo` must be provided.
|
|
@@ -79,9 +79,9 @@ class StorageTypes(str, Enum):
|
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
class StorageIntegration(BaseModel):
|
|
82
|
-
id:
|
|
82
|
+
id: typing.Optional[int] = None
|
|
83
83
|
|
|
84
|
-
organization_id:
|
|
84
|
+
organization_id: typing.Optional[int] = None
|
|
85
85
|
"""
|
|
86
86
|
The ID of the organization that the `StorageIntegration` belongs to.
|
|
87
87
|
If not provided, it will be assigned to your default organization.
|
|
@@ -107,7 +107,7 @@ class StorageIntegration(BaseModel):
|
|
|
107
107
|
- `Azure` (coming soon)
|
|
108
108
|
- `Git`
|
|
109
109
|
"""
|
|
110
|
-
s3:
|
|
110
|
+
s3: typing.Optional[StorageS3] = pydantic.Field(
|
|
111
111
|
default=None,
|
|
112
112
|
examples=[
|
|
113
113
|
{
|
|
@@ -125,7 +125,7 @@ class StorageIntegration(BaseModel):
|
|
|
125
125
|
The Amazon Web Services (AWS) S3 storage integration details.
|
|
126
126
|
Use this if you want to link to an S3 bucket.
|
|
127
127
|
"""
|
|
128
|
-
gcp:
|
|
128
|
+
gcp: typing.Optional[StorageGCP] = pydantic.Field(
|
|
129
129
|
default=None,
|
|
130
130
|
examples=[
|
|
131
131
|
None,
|
|
@@ -155,7 +155,7 @@ class StorageIntegration(BaseModel):
|
|
|
155
155
|
Use this if you want to link to an GCS bucket.
|
|
156
156
|
"""
|
|
157
157
|
azure: None = None
|
|
158
|
-
# azure:
|
|
158
|
+
# azure: typing.Optional[StorageAzure] = pydantic.Field(
|
|
159
159
|
# default=None,
|
|
160
160
|
# examples=[
|
|
161
161
|
# None,
|
|
@@ -168,7 +168,7 @@ class StorageIntegration(BaseModel):
|
|
|
168
168
|
# None,
|
|
169
169
|
# ],
|
|
170
170
|
# ) TODO: Azure storage integration is coming soon
|
|
171
|
-
git:
|
|
171
|
+
git: typing.Optional[StorageGit] = pydantic.Field(
|
|
172
172
|
default=None,
|
|
173
173
|
examples=[
|
|
174
174
|
None,
|
|
@@ -191,7 +191,7 @@ class StorageIntegration(BaseModel):
|
|
|
191
191
|
"""
|
|
192
192
|
|
|
193
193
|
@staticmethod
|
|
194
|
-
def list(organization_id: typing.
|
|
194
|
+
def list(organization_id: typing.Optional[int] = None) -> list[dict]:
|
|
195
195
|
"""
|
|
196
196
|
Lists all the `StorageIntegration`'s created by user's default organization
|
|
197
197
|
Note: The return type is `list[dict]` and not `list[StorageIntegration]`
|
|
@@ -206,7 +206,7 @@ class StorageIntegration(BaseModel):
|
|
|
206
206
|
headers=get_auth_headers(),
|
|
207
207
|
timeout=READ_TIMEOUT,
|
|
208
208
|
)
|
|
209
|
-
storage_integrations
|
|
209
|
+
raise_for_status_with_reason(storage_integrations)
|
|
210
210
|
return storage_integrations.json()
|
|
211
211
|
|
|
212
212
|
@staticmethod
|
|
@@ -222,7 +222,7 @@ class StorageIntegration(BaseModel):
|
|
|
222
222
|
headers=get_auth_headers(),
|
|
223
223
|
timeout=MODIFY_TIMEOUT,
|
|
224
224
|
)
|
|
225
|
-
storage_integration
|
|
225
|
+
raise_for_status_with_reason(storage_integration)
|
|
226
226
|
logger.info("Deleted storage integration with ID: %s", storage_integration_id)
|
|
227
227
|
|
|
228
228
|
def delete(self) -> None:
|
|
@@ -248,7 +248,7 @@ class StorageIntegration(BaseModel):
|
|
|
248
248
|
},
|
|
249
249
|
timeout=MODIFY_TIMEOUT,
|
|
250
250
|
)
|
|
251
|
-
storage_integration
|
|
251
|
+
raise_for_status_with_reason(storage_integration)
|
|
252
252
|
storage_integration_id = storage_integration.json()["id"]
|
|
253
253
|
self.id = storage_integration_id
|
|
254
254
|
logger.info("Created storage integration with ID: %s", storage_integration_id)
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: hirundo
|
|
3
|
+
Version: 0.1.8
|
|
4
|
+
Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
|
|
5
|
+
Author-email: Hirundo <dev@hirundo.io>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024, Hirundo
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
15
|
+
|
|
16
|
+
Project-URL: Homepage, https://github.com/Hirundo-io/hirundo-client
|
|
17
|
+
Keywords: dataset,machine learning,data science,data engineering
|
|
18
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
+
Classifier: Programming Language :: Python
|
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
25
|
+
Requires-Dist: types-PyYAML>=6.0.12
|
|
26
|
+
Requires-Dist: pydantic>=2.7.1
|
|
27
|
+
Requires-Dist: twine>=5.0.0
|
|
28
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
29
|
+
Requires-Dist: types-requests>=2.31.0
|
|
30
|
+
Requires-Dist: typer>=0.12.3
|
|
31
|
+
Requires-Dist: httpx>=0.27.0
|
|
32
|
+
Requires-Dist: stamina>=24.2.0
|
|
33
|
+
Requires-Dist: httpx-sse>=0.4.0
|
|
34
|
+
Requires-Dist: pandas>=2.2.2
|
|
35
|
+
Requires-Dist: tqdm>=4.66.5
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: pyyaml>=6.0.1; extra == "dev"
|
|
38
|
+
Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
|
|
39
|
+
Requires-Dist: pydantic>=2.7.1; extra == "dev"
|
|
40
|
+
Requires-Dist: twine>=5.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: python-dotenv>=1.0.1; extra == "dev"
|
|
42
|
+
Requires-Dist: types-requests>=2.31.0; extra == "dev"
|
|
43
|
+
Requires-Dist: types-setuptools>=69.5.0; extra == "dev"
|
|
44
|
+
Requires-Dist: typer>=0.12.3; extra == "dev"
|
|
45
|
+
Requires-Dist: httpx>=0.27.0; extra == "dev"
|
|
46
|
+
Requires-Dist: stamina>=24.2.0; extra == "dev"
|
|
47
|
+
Requires-Dist: httpx-sse>=0.4.0; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest>=8.2.0; extra == "dev"
|
|
49
|
+
Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
|
|
50
|
+
Requires-Dist: uv; extra == "dev"
|
|
51
|
+
Requires-Dist: pre-commit>=3.7.1; extra == "dev"
|
|
52
|
+
Requires-Dist: ruff==0.6.5; extra == "dev"
|
|
53
|
+
Requires-Dist: bumpver; extra == "dev"
|
|
54
|
+
Provides-Extra: docs
|
|
55
|
+
Requires-Dist: sphinx>=7.4.7; extra == "docs"
|
|
56
|
+
Requires-Dist: sphinx-autobuild>=2024.4.16; extra == "docs"
|
|
57
|
+
Requires-Dist: sphinx-click>=5.0.1; extra == "docs"
|
|
58
|
+
Requires-Dist: autodoc-pydantic>=2.2.0; extra == "docs"
|
|
59
|
+
Requires-Dist: furo; extra == "docs"
|
|
60
|
+
Requires-Dist: sphinx-multiversion; extra == "docs"
|
|
61
|
+
|
|
62
|
+
# Hirundo
|
|
63
|
+
|
|
64
|
+
This package exposes access to Hirundo APIs for dataset optimization for Machine Learning.
|
|
65
|
+
|
|
66
|
+
Dataset optimization is currently available for datasets labelled for classification and object detection.
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
Support dataset storage integrations include:
|
|
70
|
+
- Google Cloud (GCP) Storage
|
|
71
|
+
- Amazon Web Services (AWS) S3
|
|
72
|
+
- Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
|
|
73
|
+
|
|
74
|
+
Optimizing a classification dataset
|
|
75
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
76
|
+
|
|
77
|
+
Currently ``hirundo`` requires a CSV file with the following columns (all columns are required):
|
|
78
|
+
- ``image_path``: The location of the image within the dataset ``root``
|
|
79
|
+
- ``label``: The label of the image, i.e. which the class that was annotated for this image
|
|
80
|
+
|
|
81
|
+
And outputs a CSV with the same columns and:
|
|
82
|
+
- ``suspect_level``: mislabel suspect level
|
|
83
|
+
- ``suggested_label``: suggested label
|
|
84
|
+
- ``suggested_label_conf``: suggested label confidence
|
|
85
|
+
|
|
86
|
+
Optimizing an object detection (OD) dataset
|
|
87
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
88
|
+
|
|
89
|
+
Currently ``hirundo`` requires a CSV file with the following columns (all columns are required):
|
|
90
|
+
- ``image_path``: The location of the image within the dataset ``root``
|
|
91
|
+
- ``bbox_id``: The index of the bounding box within the dataset. Used to indicate label suspects
|
|
92
|
+
- ``label``: The label of the image, i.e. which the class that was annotated for this image
|
|
93
|
+
- ``x1``, ``y1``, ``x2``, ``y2``: The bounding box coordinates of the object within the image
|
|
94
|
+
|
|
95
|
+
And outputs a CSV with the same columns and:
|
|
96
|
+
- ``suspect_level``: object mislabel suspect level
|
|
97
|
+
- ``suggested_label``: suggested object label
|
|
98
|
+
- ``suggested_label_conf``: suggested object label confidence
|
|
99
|
+
|
|
100
|
+
Note: This Python package must be used alongside a Hirundo server, either the SaaS platform, a custom VPC deployment or an on-premises installation.
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
## Installation
|
|
104
|
+
|
|
105
|
+
You can install the codebase with a simple `pip install hirundo` to install the latest version of this package. If you prefer to install from the Git repository and/or need a specific version or branch, you can simply clone the repository, check out the relevant commit and then run `pip install .` to install that version. A full list of dependencies can be found in `requirements.txt`, but these will be installed automatically by either of these commands.
|
|
106
|
+
|
|
107
|
+
## Usage
|
|
108
|
+
|
|
109
|
+
Classification example:
|
|
110
|
+
```
|
|
111
|
+
from hirundo.dataset_optimization import OptimizationDataset
|
|
112
|
+
from hirundo.enum import LabellingType
|
|
113
|
+
from hirundo.storage import StorageIntegration, StorageLink, StorageTypes
|
|
114
|
+
|
|
115
|
+
test_dataset = OptimizationDataset(
|
|
116
|
+
name="TEST-GCP cifar 100 classification dataset",
|
|
117
|
+
labelling_type=LabellingType.SingleLabelClassification,
|
|
118
|
+
dataset_storage=StorageLink(
|
|
119
|
+
storage_integration=StorageIntegration(
|
|
120
|
+
name="cifar100bucket",
|
|
121
|
+
type=StorageTypes.GCP,
|
|
122
|
+
gcp=StorageGCP(
|
|
123
|
+
bucket_name="cifar100bucket",
|
|
124
|
+
project="Hirundo-global",
|
|
125
|
+
credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
|
|
126
|
+
),
|
|
127
|
+
),
|
|
128
|
+
path="/pytorch-cifar/data",
|
|
129
|
+
),
|
|
130
|
+
dataset_metadata_path="cifar100.csv",
|
|
131
|
+
classes=cifar100_classes,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
test_dataset.run_optimization()
|
|
135
|
+
results = test_dataset.check_run()
|
|
136
|
+
print(results)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
Object detection example:
|
|
141
|
+
|
|
142
|
+
```
|
|
143
|
+
from hirundo.dataset_optimization import OptimizationDataset
|
|
144
|
+
from hirundo.enum import LabellingType
|
|
145
|
+
from hirundo.storage import StorageIntegration, StorageLink, StorageTypes
|
|
146
|
+
|
|
147
|
+
test_dataset = OptimizationDataset(
|
|
148
|
+
name=f"TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset{unique_id}",
|
|
149
|
+
labelling_type=LabellingType.ObjectDetection,
|
|
150
|
+
dataset_storage=StorageLink(
|
|
151
|
+
storage_integration=StorageIntegration(
|
|
152
|
+
name=f"BDD-100k-validation-dataset{unique_id}",
|
|
153
|
+
type=StorageTypes.GIT,
|
|
154
|
+
git=StorageGit(
|
|
155
|
+
repo=GitRepo(
|
|
156
|
+
name=f"BDD-100k-validation-dataset{unique_id}",
|
|
157
|
+
repository_url="https://git@hf.co/datasets/hirundo-io/bdd100k-validation-only",
|
|
158
|
+
),
|
|
159
|
+
branch="main",
|
|
160
|
+
),
|
|
161
|
+
),
|
|
162
|
+
path="/BDD100K Val from Hirundo.zip/bdd100k",
|
|
163
|
+
),
|
|
164
|
+
dataset_metadata_path="bdd100k.csv",
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
test_dataset.run_optimization()
|
|
168
|
+
results = test_dataset.check_run()
|
|
169
|
+
print(results)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyPy support may be introduced in the future.
|
|
173
|
+
|
|
174
|
+
## Further documentation
|
|
175
|
+
|
|
176
|
+
To learn about mroe how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
hirundo/__init__.py,sha256=EINZmdlmNjdW_dM85wksapRxGL-pPC49OYvKUBRtxQk,707
|
|
2
|
+
hirundo/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
|
|
3
|
+
hirundo/_constraints.py,sha256=-RAUV9GnCsaT9pLGSqYglKOeK0joPBBexGTo87j5nkI,425
|
|
4
|
+
hirundo/_env.py,sha256=dXUFPeEL1zPe-eBdWD4_WZvlgiY2cpWuVDzf41Qjuto,609
|
|
5
|
+
hirundo/_headers.py,sha256=ggTyBwVT3nGyPidCcmYMX6pv0idzMxCI2S1BJQE-Bbs,253
|
|
6
|
+
hirundo/_http.py,sha256=INrHX7ncpXS9vdyjrske3B5vUKL5ke9SIY6daffahtE,350
|
|
7
|
+
hirundo/_iter_sse_retrying.py,sha256=0u-jJe5vHCZegImKBB1rpI9O1BnN7oWJytdabl34ih4,3345
|
|
8
|
+
hirundo/_timeouts.py,sha256=IfX8-mrLp809-A_xSLv1DhIqZnO-Qvy4FcTtOtvqLog,42
|
|
9
|
+
hirundo/cli.py,sha256=4-pdV483zqRJl8d-R9p_9YOGlehOnoMJzb3XAAdPRb0,6634
|
|
10
|
+
hirundo/dataset_optimization.py,sha256=I2AzkSns_MLwlwI4mGGxaJB6OUG3pv7VJ5uFAtcJdTM,21825
|
|
11
|
+
hirundo/enum.py,sha256=-3w09g-_yRYIMiM8VA_Nb07WoQXf5IjyERTGonzNDs0,457
|
|
12
|
+
hirundo/git.py,sha256=Dbp0ALJYhLDgkmI_5u9iVyE_xEHIxoUTeZdpU8iau_4,4884
|
|
13
|
+
hirundo/logger.py,sha256=MUqrYp0fBlxWFhGl6P5t19_uqO7T_PNhrLN5bqY3i7s,275
|
|
14
|
+
hirundo/storage.py,sha256=xifT6xuFCJpVp5wB-ZZkzKz9HbVcMNrllj10vXlU1vU,9845
|
|
15
|
+
hirundo-0.1.8.dist-info/LICENSE,sha256=fusGGjqT2RGlU6kbkaOk7d-gDnsjk17wq67AO0mwBZI,1065
|
|
16
|
+
hirundo-0.1.8.dist-info/METADATA,sha256=heoP6t876hsxEih-RzaIjGtcLZl8UOpcwExnjQ8thU4,7841
|
|
17
|
+
hirundo-0.1.8.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
18
|
+
hirundo-0.1.8.dist-info/entry_points.txt,sha256=4ZtnA_Nl1Af8fLnHp3lwjbGDEGU1S6ujb_JwtuQ7ZPM,44
|
|
19
|
+
hirundo-0.1.8.dist-info/top_level.txt,sha256=cmyNqrNZOAYxnywJGFI1AJBLe4SkH8HGsfFx6ncdrbI,8
|
|
20
|
+
hirundo-0.1.8.dist-info/RECORD,,
|
hirundo-0.1.7.dist-info/METADATA
DELETED
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: hirundo
|
|
3
|
-
Version: 0.1.7
|
|
4
|
-
Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
|
|
5
|
-
Author-email: Hirundo <dev@hirundo.io>
|
|
6
|
-
License: MIT License
|
|
7
|
-
|
|
8
|
-
Copyright (c) 2024, Hirundo
|
|
9
|
-
|
|
10
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
13
|
-
|
|
14
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
15
|
-
|
|
16
|
-
Project-URL: Homepage, https://github.com/Hirundo-io/hirundo-client
|
|
17
|
-
Keywords: dataset,machine learning,data science,data engineering
|
|
18
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
-
Classifier: Programming Language :: Python
|
|
20
|
-
Classifier: Programming Language :: Python :: 3
|
|
21
|
-
Requires-Python: >=3.9
|
|
22
|
-
Description-Content-Type: text/markdown
|
|
23
|
-
License-File: LICENSE
|
|
24
|
-
Requires-Dist: pyyaml>=6.0.1
|
|
25
|
-
Requires-Dist: types-PyYAML>=6.0.12
|
|
26
|
-
Requires-Dist: pydantic>=2.7.1
|
|
27
|
-
Requires-Dist: twine>=5.0.0
|
|
28
|
-
Requires-Dist: python-dotenv>=1.0.1
|
|
29
|
-
Requires-Dist: types-requests>=2.31.0
|
|
30
|
-
Requires-Dist: typer>=0.12.3
|
|
31
|
-
Requires-Dist: httpx>=0.27.0
|
|
32
|
-
Requires-Dist: stamina>=24.2.0
|
|
33
|
-
Requires-Dist: httpx-sse>=0.4.0
|
|
34
|
-
Requires-Dist: pandas>=2.2.2
|
|
35
|
-
Requires-Dist: tqdm>=4.66.5
|
|
36
|
-
Provides-Extra: dev
|
|
37
|
-
Requires-Dist: pyyaml>=6.0.1; extra == "dev"
|
|
38
|
-
Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
|
|
39
|
-
Requires-Dist: pydantic>=2.7.1; extra == "dev"
|
|
40
|
-
Requires-Dist: twine>=5.0.0; extra == "dev"
|
|
41
|
-
Requires-Dist: python-dotenv>=1.0.1; extra == "dev"
|
|
42
|
-
Requires-Dist: types-requests>=2.31.0; extra == "dev"
|
|
43
|
-
Requires-Dist: types-setuptools>=69.5.0; extra == "dev"
|
|
44
|
-
Requires-Dist: typer>=0.12.3; extra == "dev"
|
|
45
|
-
Requires-Dist: httpx>=0.27.0; extra == "dev"
|
|
46
|
-
Requires-Dist: stamina>=24.2.0; extra == "dev"
|
|
47
|
-
Requires-Dist: httpx-sse>=0.4.0; extra == "dev"
|
|
48
|
-
Requires-Dist: pytest>=8.2.0; extra == "dev"
|
|
49
|
-
Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
|
|
50
|
-
Requires-Dist: uv; extra == "dev"
|
|
51
|
-
Requires-Dist: pre-commit>=3.7.1; extra == "dev"
|
|
52
|
-
Requires-Dist: ruff; extra == "dev"
|
|
53
|
-
Requires-Dist: bumpver; extra == "dev"
|
|
54
|
-
Provides-Extra: docs
|
|
55
|
-
Requires-Dist: sphinx>=7.4.7; extra == "docs"
|
|
56
|
-
Requires-Dist: sphinx-autobuild>=2024.4.16; extra == "docs"
|
|
57
|
-
Requires-Dist: sphinx-click>=5.0.1; extra == "docs"
|
|
58
|
-
Requires-Dist: autodoc-pydantic>=2.2.0; extra == "docs"
|
|
59
|
-
Requires-Dist: furo; extra == "docs"
|
|
60
|
-
Requires-Dist: sphinx-multiversion; extra == "docs"
|
|
61
|
-
|
|
62
|
-
# Hirundo client
|
|
63
|
-
|
|
64
|
-
This repo contains the source code for the Hirundo client library
|
|
65
|
-
|
|
66
|
-
## Usage:
|
|
67
|
-
|
|
68
|
-
To learn about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
|
|
69
|
-
|
|
70
|
-
Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyPy support may be introduced in the future.
|
|
71
|
-
|
|
72
|
-
## Development:
|
|
73
|
-
|
|
74
|
-
### Install dev dependencies
|
|
75
|
-
|
|
76
|
-
```bash
|
|
77
|
-
pip install -r dev-requirements.txt
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
Note: You can install and use `uv` as a faster drop-in replacement for `pip`. We have it as part of our dev dependencies for this reason.
|
|
81
|
-
|
|
82
|
-
### Install `git` hooks (optional)
|
|
83
|
-
|
|
84
|
-
```bash
|
|
85
|
-
pre-commit install
|
|
86
|
-
```
|
|
87
|
-
|
|
88
|
-
### Check lint and apply formatting with Ruff (optional; pre-commit hooks run this automatically)
|
|
89
|
-
|
|
90
|
-
```bash
|
|
91
|
-
ruff check
|
|
92
|
-
ruff format
|
|
93
|
-
```
|
|
94
|
-
|
|
95
|
-
### Change packages
|
|
96
|
-
|
|
97
|
-
#### Update `requirements.txt` files
|
|
98
|
-
|
|
99
|
-
```bash
|
|
100
|
-
uv pip compile pyproject.toml
|
|
101
|
-
uv pip compile --extra dev -o dev-requirements.txt -c requirements.txt pyproject.toml
|
|
102
|
-
uv pip compile --extra docs -o docs-requirements.txt -c requirements.txt pyproject.toml
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
#### Sync installed packages
|
|
106
|
-
|
|
107
|
-
```bash
|
|
108
|
-
uv pip sync dev-requirements.txt
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
### Build process
|
|
112
|
-
|
|
113
|
-
To build the package, run:
|
|
114
|
-
`python -m build`
|
|
115
|
-
|
|
116
|
-
### Publish documentation & releases
|
|
117
|
-
|
|
118
|
-
Documentation & releases are published via GitHub Actions on merges to `main`.
|
hirundo-0.1.7.dist-info/RECORD
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
hirundo/__init__.py,sha256=K0TcpPS937MazldFEgHnWLIKKmf-nt2k6Vjef58aKNs,707
|
|
2
|
-
hirundo/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
|
|
3
|
-
hirundo/_constraints.py,sha256=-RAUV9GnCsaT9pLGSqYglKOeK0joPBBexGTo87j5nkI,425
|
|
4
|
-
hirundo/_env.py,sha256=6RVEn80KelYa-v2Tc18KTQlAZx0cYv3Y1HFS3H7gDt0,307
|
|
5
|
-
hirundo/_headers.py,sha256=ggTyBwVT3nGyPidCcmYMX6pv0idzMxCI2S1BJQE-Bbs,253
|
|
6
|
-
hirundo/_iter_sse_retrying.py,sha256=WLp_lw8ycBuAxoJkkGBu4y74Ajhcu11r1X-vd5_571A,3352
|
|
7
|
-
hirundo/_timeouts.py,sha256=IfX8-mrLp809-A_xSLv1DhIqZnO-Qvy4FcTtOtvqLog,42
|
|
8
|
-
hirundo/cli.py,sha256=pNRaeH__3qvdfcAWhqnGyZDE4uqtqW8BQ6fy4mD7aM4,3936
|
|
9
|
-
hirundo/dataset_optimization.py,sha256=qevme8Gavuk6dBvR-Q5KffMzt9--hTfxzoUnSx4tqFI,19594
|
|
10
|
-
hirundo/enum.py,sha256=-3w09g-_yRYIMiM8VA_Nb07WoQXf5IjyERTGonzNDs0,457
|
|
11
|
-
hirundo/git.py,sha256=-Z_uFHsFfVR-_XqEq85wiiGZtgqIV81PWoC1B6UBSww,4769
|
|
12
|
-
hirundo/logger.py,sha256=e_Kn6dic8DCqjQnDw60z25xnfofOypNoUdlnunySARs,198
|
|
13
|
-
hirundo/storage.py,sha256=Dfi-LEjfrZJZM9mWE_CgQ4upP2lrN6-3HHGd-0znfqE,9737
|
|
14
|
-
hirundo-0.1.7.dist-info/LICENSE,sha256=fusGGjqT2RGlU6kbkaOk7d-gDnsjk17wq67AO0mwBZI,1065
|
|
15
|
-
hirundo-0.1.7.dist-info/METADATA,sha256=EKswSiESzpSqV9qL9o5uqJNsCUgK7UduvAr_mlXO9hg,4558
|
|
16
|
-
hirundo-0.1.7.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
|
17
|
-
hirundo-0.1.7.dist-info/entry_points.txt,sha256=4ZtnA_Nl1Af8fLnHp3lwjbGDEGU1S6ujb_JwtuQ7ZPM,44
|
|
18
|
-
hirundo-0.1.7.dist-info/top_level.txt,sha256=cmyNqrNZOAYxnywJGFI1AJBLe4SkH8HGsfFx6ncdrbI,8
|
|
19
|
-
hirundo-0.1.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|