hirundo 0.1.6__tar.gz → 0.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hirundo-0.1.6 → hirundo-0.1.7}/PKG-INFO +2 -1
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo/__init__.py +1 -1
- hirundo-0.1.7/hirundo/_env.py +15 -0
- hirundo-0.1.7/hirundo/_headers.py +13 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo/cli.py +5 -1
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo/dataset_optimization.py +167 -41
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo/git.py +6 -6
- hirundo-0.1.7/hirundo/logger.py +8 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo/storage.py +40 -5
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo.egg-info/PKG-INFO +2 -1
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo.egg-info/SOURCES.txt +1 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo.egg-info/requires.txt +1 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/pyproject.toml +2 -1
- hirundo-0.1.6/hirundo/_env.py +0 -12
- hirundo-0.1.6/hirundo/_headers.py +0 -9
- {hirundo-0.1.6 → hirundo-0.1.7}/LICENSE +0 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/README.md +0 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo/__main__.py +0 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo/_constraints.py +0 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo/_iter_sse_retrying.py +0 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo/_timeouts.py +0 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo/enum.py +0 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo.egg-info/dependency_links.txt +0 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo.egg-info/entry_points.txt +0 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/hirundo.egg-info/top_level.txt +0 -0
- {hirundo-0.1.6 → hirundo-0.1.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hirundo
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
|
|
5
5
|
Author-email: Hirundo <dev@hirundo.io>
|
|
6
6
|
License: MIT License
|
|
@@ -32,6 +32,7 @@ Requires-Dist: httpx>=0.27.0
|
|
|
32
32
|
Requires-Dist: stamina>=24.2.0
|
|
33
33
|
Requires-Dist: httpx-sse>=0.4.0
|
|
34
34
|
Requires-Dist: pandas>=2.2.2
|
|
35
|
+
Requires-Dist: tqdm>=4.66.5
|
|
35
36
|
Provides-Extra: dev
|
|
36
37
|
Requires-Dist: pyyaml>=6.0.1; extra == "dev"
|
|
37
38
|
Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
|
|
5
|
+
load_dotenv()
|
|
6
|
+
|
|
7
|
+
API_HOST = os.getenv("API_HOST", "https://api.hirundo.io")
|
|
8
|
+
API_KEY = os.getenv("API_KEY")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def check_api_key():
|
|
12
|
+
if not API_KEY:
|
|
13
|
+
raise ValueError(
|
|
14
|
+
"API_KEY is not set. Please run `hirundo setup` to set the API key"
|
|
15
|
+
)
|
|
@@ -14,8 +14,12 @@ hirundo_epilog = (
|
|
|
14
14
|
else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information."
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
+
|
|
17
18
|
app = typer.Typer(
|
|
18
|
-
name="hirundo",
|
|
19
|
+
name="hirundo",
|
|
20
|
+
no_args_is_help=True,
|
|
21
|
+
rich_markup_mode="rich",
|
|
22
|
+
epilog=hirundo_epilog,
|
|
19
23
|
)
|
|
20
24
|
|
|
21
25
|
|
|
@@ -1,22 +1,26 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import
|
|
2
|
+
import typing
|
|
3
3
|
from collections.abc import AsyncGenerator, Generator
|
|
4
|
+
from enum import Enum
|
|
4
5
|
from io import StringIO
|
|
5
|
-
from typing import Union
|
|
6
|
+
from typing import Union, overload
|
|
6
7
|
|
|
7
8
|
import httpx
|
|
8
9
|
import pandas as pd
|
|
9
10
|
import requests
|
|
10
11
|
from pydantic import BaseModel, Field, model_validator
|
|
12
|
+
from tqdm import tqdm
|
|
13
|
+
from tqdm.contrib.logging import logging_redirect_tqdm
|
|
11
14
|
|
|
12
15
|
from hirundo._env import API_HOST
|
|
13
|
-
from hirundo._headers import
|
|
16
|
+
from hirundo._headers import get_auth_headers, json_headers
|
|
14
17
|
from hirundo._iter_sse_retrying import aiter_sse_retrying, iter_sse_retrying
|
|
15
18
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
16
19
|
from hirundo.enum import DatasetMetadataType, LabellingType
|
|
20
|
+
from hirundo.logger import get_logger
|
|
17
21
|
from hirundo.storage import StorageIntegration, StorageLink
|
|
18
22
|
|
|
19
|
-
logger =
|
|
23
|
+
logger = get_logger(__name__)
|
|
20
24
|
|
|
21
25
|
|
|
22
26
|
class HirundoError(Exception):
|
|
@@ -30,6 +34,14 @@ class HirundoError(Exception):
|
|
|
30
34
|
MAX_RETRIES = 200 # Max 200 retries for HTTP SSE connection
|
|
31
35
|
|
|
32
36
|
|
|
37
|
+
class RunStatus(Enum):
|
|
38
|
+
STARTED = "STARTED"
|
|
39
|
+
PENDING = "PENDING"
|
|
40
|
+
SUCCESS = "SUCCESS"
|
|
41
|
+
FAILURE = "FAILURE"
|
|
42
|
+
AWAITING_MANUAL_APPROVAL = "AWAITING MANUAL APPROVAL"
|
|
43
|
+
|
|
44
|
+
|
|
33
45
|
class OptimizationDataset(BaseModel):
|
|
34
46
|
name: str
|
|
35
47
|
"""
|
|
@@ -98,7 +110,7 @@ class OptimizationDataset(BaseModel):
|
|
|
98
110
|
response = requests.get(
|
|
99
111
|
f"{API_HOST}/dataset-optimization/dataset/",
|
|
100
112
|
params={"dataset_organization_id": organization_id},
|
|
101
|
-
headers=
|
|
113
|
+
headers=get_auth_headers(),
|
|
102
114
|
timeout=READ_TIMEOUT,
|
|
103
115
|
)
|
|
104
116
|
response.raise_for_status()
|
|
@@ -114,10 +126,11 @@ class OptimizationDataset(BaseModel):
|
|
|
114
126
|
"""
|
|
115
127
|
response = requests.delete(
|
|
116
128
|
f"{API_HOST}/dataset-optimization/dataset/{dataset_id}",
|
|
117
|
-
headers=
|
|
129
|
+
headers=get_auth_headers(),
|
|
118
130
|
timeout=MODIFY_TIMEOUT,
|
|
119
131
|
)
|
|
120
132
|
response.raise_for_status()
|
|
133
|
+
logger.info("Deleted dataset with ID: %s", dataset_id)
|
|
121
134
|
|
|
122
135
|
def delete(self, storage_integration=True) -> None:
|
|
123
136
|
"""
|
|
@@ -167,7 +180,7 @@ class OptimizationDataset(BaseModel):
|
|
|
167
180
|
},
|
|
168
181
|
headers={
|
|
169
182
|
**json_headers,
|
|
170
|
-
**
|
|
183
|
+
**get_auth_headers(),
|
|
171
184
|
},
|
|
172
185
|
timeout=MODIFY_TIMEOUT,
|
|
173
186
|
)
|
|
@@ -175,6 +188,7 @@ class OptimizationDataset(BaseModel):
|
|
|
175
188
|
self.dataset_id = dataset_response.json()["id"]
|
|
176
189
|
if not self.dataset_id:
|
|
177
190
|
raise HirundoError("Failed to create the dataset")
|
|
191
|
+
logger.info("Created dataset with ID: %s", self.dataset_id)
|
|
178
192
|
return self.dataset_id
|
|
179
193
|
|
|
180
194
|
@staticmethod
|
|
@@ -191,7 +205,7 @@ class OptimizationDataset(BaseModel):
|
|
|
191
205
|
"""
|
|
192
206
|
run_response = requests.post(
|
|
193
207
|
f"{API_HOST}/dataset-optimization/run/{dataset_id}",
|
|
194
|
-
headers=
|
|
208
|
+
headers=get_auth_headers(),
|
|
195
209
|
timeout=MODIFY_TIMEOUT,
|
|
196
210
|
)
|
|
197
211
|
run_response.raise_for_status()
|
|
@@ -210,6 +224,7 @@ class OptimizationDataset(BaseModel):
|
|
|
210
224
|
self.dataset_id = self.create()
|
|
211
225
|
run_id = self.launch_optimization_run(self.dataset_id)
|
|
212
226
|
self.run_id = run_id
|
|
227
|
+
logger.info("Started the run with ID: %s", run_id)
|
|
213
228
|
return run_id
|
|
214
229
|
except requests.HTTPError as error:
|
|
215
230
|
try:
|
|
@@ -237,30 +252,38 @@ class OptimizationDataset(BaseModel):
|
|
|
237
252
|
self.run_id = None
|
|
238
253
|
|
|
239
254
|
@staticmethod
|
|
240
|
-
def
|
|
241
|
-
if data["state"] == "SUCCESS":
|
|
242
|
-
data["result"] = pd.read_csv(StringIO(data["result"]))
|
|
243
|
-
else:
|
|
244
|
-
pass
|
|
245
|
-
|
|
246
|
-
@staticmethod
|
|
247
|
-
def check_run_by_id(run_id: str, retry=0) -> Generator[dict, None, None]:
|
|
255
|
+
def _clean_df_index(df: "pd.DataFrame") -> "pd.DataFrame":
|
|
248
256
|
"""
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
This generator will produce values to show progress of the run.
|
|
257
|
+
Clean the index of a dataframe in case it has unnamed columns.
|
|
252
258
|
|
|
253
259
|
Args:
|
|
254
|
-
|
|
255
|
-
retry: A number used to track the number of retries to limit re-checks. *Do not* provide this value manually.
|
|
256
|
-
|
|
257
|
-
Yields:
|
|
258
|
-
Each event will be a dict, where:
|
|
259
|
-
- `"state"` is PENDING, STARTED, RETRY, FAILURE or SUCCESS
|
|
260
|
-
- `"result"` is a string describing the progress as a percentage for a PENDING state,
|
|
261
|
-
or the error for a FAILURE state or the results for a SUCCESS state
|
|
260
|
+
df (DataFrame): Dataframe to clean
|
|
262
261
|
|
|
262
|
+
Returns:
|
|
263
|
+
DataFrame: Cleaned dataframe
|
|
263
264
|
"""
|
|
265
|
+
index_cols = sorted(
|
|
266
|
+
[col for col in df.columns if col.startswith("Unnamed")], reverse=True
|
|
267
|
+
)
|
|
268
|
+
if len(index_cols) > 0:
|
|
269
|
+
df.set_index(index_cols.pop(), inplace=True)
|
|
270
|
+
df.rename_axis(index=None, columns=None, inplace=True)
|
|
271
|
+
if len(index_cols) > 0:
|
|
272
|
+
df.drop(columns=index_cols, inplace=True)
|
|
273
|
+
|
|
274
|
+
return df
|
|
275
|
+
|
|
276
|
+
@staticmethod
|
|
277
|
+
def _read_csv_to_df(data: dict):
|
|
278
|
+
if data["state"] == RunStatus.SUCCESS.value:
|
|
279
|
+
data["result"] = OptimizationDataset._clean_df_index(
|
|
280
|
+
pd.read_csv(StringIO(data["result"]))
|
|
281
|
+
)
|
|
282
|
+
else:
|
|
283
|
+
pass
|
|
284
|
+
|
|
285
|
+
@staticmethod
|
|
286
|
+
def _check_run_by_id(run_id: str, retry=0) -> Generator[dict, None, None]:
|
|
264
287
|
if retry > MAX_RETRIES:
|
|
265
288
|
raise HirundoError("Max retries reached")
|
|
266
289
|
last_event = None
|
|
@@ -269,7 +292,7 @@ class OptimizationDataset(BaseModel):
|
|
|
269
292
|
client,
|
|
270
293
|
"GET",
|
|
271
294
|
f"{API_HOST}/dataset-optimization/run/{run_id}",
|
|
272
|
-
headers=
|
|
295
|
+
headers=get_auth_headers(),
|
|
273
296
|
):
|
|
274
297
|
if sse.event == "ping":
|
|
275
298
|
continue
|
|
@@ -286,24 +309,125 @@ class OptimizationDataset(BaseModel):
|
|
|
286
309
|
data = last_event["data"]
|
|
287
310
|
OptimizationDataset._read_csv_to_df(data)
|
|
288
311
|
yield data
|
|
289
|
-
if not last_event or last_event["data"]["state"] ==
|
|
290
|
-
OptimizationDataset.
|
|
312
|
+
if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
|
|
313
|
+
OptimizationDataset._check_run_by_id(run_id, retry + 1)
|
|
314
|
+
|
|
315
|
+
@staticmethod
|
|
316
|
+
@overload
|
|
317
|
+
def check_run_by_id(
|
|
318
|
+
run_id: str, stop_on_manual_approval: typing.Literal[True]
|
|
319
|
+
) -> typing.Optional[pd.DataFrame]:
|
|
320
|
+
...
|
|
291
321
|
|
|
292
|
-
|
|
322
|
+
@staticmethod
|
|
323
|
+
@overload
|
|
324
|
+
def check_run_by_id(
|
|
325
|
+
run_id: str, stop_on_manual_approval: typing.Literal[False] = False
|
|
326
|
+
) -> pd.DataFrame:
|
|
327
|
+
...
|
|
328
|
+
|
|
329
|
+
@staticmethod
|
|
330
|
+
@overload
|
|
331
|
+
def check_run_by_id(
|
|
332
|
+
run_id: str, stop_on_manual_approval: bool
|
|
333
|
+
) -> typing.Optional[pd.DataFrame]:
|
|
334
|
+
...
|
|
335
|
+
|
|
336
|
+
@staticmethod
|
|
337
|
+
def check_run_by_id(
|
|
338
|
+
run_id: str, stop_on_manual_approval: bool = False
|
|
339
|
+
) -> typing.Optional[pd.DataFrame]:
|
|
293
340
|
"""
|
|
294
|
-
Check the status of
|
|
341
|
+
Check the status of a run given its ID
|
|
295
342
|
|
|
296
|
-
|
|
343
|
+
Args:
|
|
344
|
+
run_id: The `run_id` produced by a `run_optimization` call
|
|
345
|
+
stop_on_manual_approval: If True, the function will return `None` if the run is awaiting manual approval
|
|
297
346
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
347
|
+
Returns:
|
|
348
|
+
A pandas DataFrame with the results of the optimization run
|
|
349
|
+
|
|
350
|
+
Raises:
|
|
351
|
+
HirundoError: If the maximum number of retries is reached or if the run fails
|
|
352
|
+
"""
|
|
353
|
+
logger.debug("Checking run with ID: %s", run_id)
|
|
354
|
+
with logging_redirect_tqdm():
|
|
355
|
+
t = tqdm(total=100.0)
|
|
356
|
+
for iteration in OptimizationDataset._check_run_by_id(run_id):
|
|
357
|
+
if iteration["state"] == RunStatus.SUCCESS.value:
|
|
358
|
+
t.set_description("Optimization run completed successfully")
|
|
359
|
+
t.n = 100.0
|
|
360
|
+
t.refresh()
|
|
361
|
+
t.close()
|
|
362
|
+
return iteration["result"]
|
|
363
|
+
elif iteration["state"] == RunStatus.PENDING.value:
|
|
364
|
+
t.set_description("Optimization run queued and not yet started")
|
|
365
|
+
t.n = 0.0
|
|
366
|
+
t.refresh()
|
|
367
|
+
elif iteration["state"] == RunStatus.STARTED.value:
|
|
368
|
+
t.set_description(
|
|
369
|
+
"Optimization run in progress. Downloading dataset"
|
|
370
|
+
)
|
|
371
|
+
t.n = 0.0
|
|
372
|
+
t.refresh()
|
|
373
|
+
elif iteration["state"] is None:
|
|
374
|
+
if (
|
|
375
|
+
iteration["result"]
|
|
376
|
+
and isinstance(iteration["result"], dict)
|
|
377
|
+
and iteration["result"]["result"]
|
|
378
|
+
and isinstance(iteration["result"]["result"], str)
|
|
379
|
+
):
|
|
380
|
+
current_progress_percentage = float(
|
|
381
|
+
iteration["result"]["result"].removesuffix("% done")
|
|
382
|
+
)
|
|
383
|
+
desc = (
|
|
384
|
+
"Optimization run completed. Uploading results"
|
|
385
|
+
if current_progress_percentage == 100.0
|
|
386
|
+
else "Optimization run in progress"
|
|
387
|
+
)
|
|
388
|
+
t.set_description(desc)
|
|
389
|
+
t.n = current_progress_percentage
|
|
390
|
+
t.refresh()
|
|
391
|
+
elif iteration["state"] == RunStatus.AWAITING_MANUAL_APPROVAL.value:
|
|
392
|
+
t.set_description("Awaiting manual approval")
|
|
393
|
+
t.n = 100.0
|
|
394
|
+
t.refresh()
|
|
395
|
+
if stop_on_manual_approval:
|
|
396
|
+
t.close()
|
|
397
|
+
return None
|
|
398
|
+
elif iteration["state"] == RunStatus.FAILURE.value:
|
|
399
|
+
t.set_description("Optimization run failed")
|
|
400
|
+
t.close()
|
|
401
|
+
raise HirundoError(
|
|
402
|
+
f"Optimization run failed with error: {iteration['result']}"
|
|
403
|
+
)
|
|
404
|
+
raise HirundoError("Optimization run failed with an unknown error")
|
|
405
|
+
|
|
406
|
+
@overload
|
|
407
|
+
def check_run(
|
|
408
|
+
self, stop_on_manual_approval: typing.Literal[True]
|
|
409
|
+
) -> typing.Union[pd.DataFrame, None]:
|
|
410
|
+
...
|
|
411
|
+
|
|
412
|
+
@overload
|
|
413
|
+
def check_run(
|
|
414
|
+
self, stop_on_manual_approval: typing.Literal[False] = False
|
|
415
|
+
) -> pd.DataFrame:
|
|
416
|
+
...
|
|
417
|
+
|
|
418
|
+
def check_run(
|
|
419
|
+
self, stop_on_manual_approval: bool = False
|
|
420
|
+
) -> typing.Union[pd.DataFrame, None]:
|
|
421
|
+
"""
|
|
422
|
+
Check the status of the current active instance's run.
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
A pandas DataFrame with the results of the optimization run
|
|
302
426
|
|
|
303
427
|
"""
|
|
304
428
|
if not self.run_id:
|
|
305
429
|
raise ValueError("No run has been started")
|
|
306
|
-
return self.check_run_by_id(self.run_id)
|
|
430
|
+
return self.check_run_by_id(self.run_id, stop_on_manual_approval)
|
|
307
431
|
|
|
308
432
|
@staticmethod
|
|
309
433
|
async def acheck_run_by_id(run_id: str, retry=0) -> AsyncGenerator[dict, None]:
|
|
@@ -324,6 +448,7 @@ class OptimizationDataset(BaseModel):
|
|
|
324
448
|
- `"result"` is a string describing the progress as a percentage for a PENDING state, or the error for a FAILURE state or the results for a SUCCESS state
|
|
325
449
|
|
|
326
450
|
"""
|
|
451
|
+
logger.debug("Checking run with ID: %s", run_id)
|
|
327
452
|
if retry > MAX_RETRIES:
|
|
328
453
|
raise HirundoError("Max retries reached")
|
|
329
454
|
last_event = None
|
|
@@ -334,7 +459,7 @@ class OptimizationDataset(BaseModel):
|
|
|
334
459
|
client,
|
|
335
460
|
"GET",
|
|
336
461
|
f"{API_HOST}/dataset-optimization/run/{run_id}",
|
|
337
|
-
headers=
|
|
462
|
+
headers=get_auth_headers(),
|
|
338
463
|
)
|
|
339
464
|
async for sse in async_iterator:
|
|
340
465
|
if sse.event == "ping":
|
|
@@ -348,7 +473,7 @@ class OptimizationDataset(BaseModel):
|
|
|
348
473
|
)
|
|
349
474
|
last_event = json.loads(sse.data)
|
|
350
475
|
yield last_event["data"]
|
|
351
|
-
if not last_event or last_event["data"]["state"] ==
|
|
476
|
+
if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
|
|
352
477
|
OptimizationDataset.acheck_run_by_id(run_id, retry + 1)
|
|
353
478
|
|
|
354
479
|
async def acheck_run(self) -> AsyncGenerator[dict, None]:
|
|
@@ -380,9 +505,10 @@ class OptimizationDataset(BaseModel):
|
|
|
380
505
|
"""
|
|
381
506
|
if not run_id:
|
|
382
507
|
raise ValueError("No run has been started")
|
|
508
|
+
logger.info("Cancelling run with ID: %s", run_id)
|
|
383
509
|
response = requests.delete(
|
|
384
510
|
f"{API_HOST}/dataset-optimization/run/{run_id}",
|
|
385
|
-
headers=
|
|
511
|
+
headers=get_auth_headers(),
|
|
386
512
|
timeout=MODIFY_TIMEOUT,
|
|
387
513
|
)
|
|
388
514
|
response.raise_for_status()
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import re
|
|
3
2
|
from typing import Annotated, Union
|
|
4
3
|
|
|
@@ -8,10 +7,11 @@ from pydantic import BaseModel, field_validator
|
|
|
8
7
|
from pydantic_core import Url
|
|
9
8
|
|
|
10
9
|
from hirundo._env import API_HOST
|
|
11
|
-
from hirundo._headers import
|
|
10
|
+
from hirundo._headers import get_auth_headers, json_headers
|
|
12
11
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
12
|
+
from hirundo.logger import get_logger
|
|
13
13
|
|
|
14
|
-
logger =
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class GitPlainAuthBase(BaseModel):
|
|
@@ -108,7 +108,7 @@ class GitRepo(BaseModel):
|
|
|
108
108
|
json=self.model_dump(),
|
|
109
109
|
headers={
|
|
110
110
|
**json_headers,
|
|
111
|
-
**
|
|
111
|
+
**get_auth_headers(),
|
|
112
112
|
},
|
|
113
113
|
timeout=MODIFY_TIMEOUT,
|
|
114
114
|
)
|
|
@@ -125,7 +125,7 @@ class GitRepo(BaseModel):
|
|
|
125
125
|
git_repos = requests.get(
|
|
126
126
|
f"{API_HOST}/git-repo/",
|
|
127
127
|
headers={
|
|
128
|
-
**
|
|
128
|
+
**get_auth_headers(),
|
|
129
129
|
},
|
|
130
130
|
timeout=READ_TIMEOUT,
|
|
131
131
|
)
|
|
@@ -143,7 +143,7 @@ class GitRepo(BaseModel):
|
|
|
143
143
|
git_repo = requests.delete(
|
|
144
144
|
f"{API_HOST}/git-repo/{git_repo_id}",
|
|
145
145
|
headers={
|
|
146
|
-
**
|
|
146
|
+
**get_auth_headers(),
|
|
147
147
|
},
|
|
148
148
|
timeout=MODIFY_TIMEOUT,
|
|
149
149
|
)
|
|
@@ -9,9 +9,12 @@ from pydantic_core import Url
|
|
|
9
9
|
|
|
10
10
|
from hirundo._constraints import S3BucketUrl, StorageIntegrationName
|
|
11
11
|
from hirundo._env import API_HOST
|
|
12
|
-
from hirundo._headers import
|
|
12
|
+
from hirundo._headers import get_auth_headers, json_headers
|
|
13
13
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
14
14
|
from hirundo.git import GitRepo
|
|
15
|
+
from hirundo.logger import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
15
18
|
|
|
16
19
|
|
|
17
20
|
class StorageS3(BaseModel):
|
|
@@ -69,6 +72,10 @@ class StorageTypes(str, Enum):
|
|
|
69
72
|
GCP = "GCP"
|
|
70
73
|
# AZURE = "Azure" TODO: Azure storage integration is coming soon
|
|
71
74
|
GIT = "Git"
|
|
75
|
+
LOCAL = "Local"
|
|
76
|
+
"""
|
|
77
|
+
Local storage integration is only supported for on-premises installations.
|
|
78
|
+
"""
|
|
72
79
|
|
|
73
80
|
|
|
74
81
|
class StorageIntegration(BaseModel):
|
|
@@ -84,7 +91,7 @@ class StorageIntegration(BaseModel):
|
|
|
84
91
|
"""
|
|
85
92
|
A name to identify the `StorageIntegration` in the Hirundo system.
|
|
86
93
|
"""
|
|
87
|
-
type: StorageTypes = pydantic.Field(
|
|
94
|
+
type: typing.Optional[StorageTypes] = pydantic.Field(
|
|
88
95
|
examples=[
|
|
89
96
|
StorageTypes.S3,
|
|
90
97
|
StorageTypes.GCP,
|
|
@@ -196,7 +203,7 @@ class StorageIntegration(BaseModel):
|
|
|
196
203
|
storage_integrations = requests.get(
|
|
197
204
|
f"{API_HOST}/storage-integration/",
|
|
198
205
|
params={"storage_integration_organization_id": organization_id},
|
|
199
|
-
headers=
|
|
206
|
+
headers=get_auth_headers(),
|
|
200
207
|
timeout=READ_TIMEOUT,
|
|
201
208
|
)
|
|
202
209
|
storage_integrations.raise_for_status()
|
|
@@ -212,10 +219,11 @@ class StorageIntegration(BaseModel):
|
|
|
212
219
|
"""
|
|
213
220
|
storage_integration = requests.delete(
|
|
214
221
|
f"{API_HOST}/storage-integration/{storage_integration_id}",
|
|
215
|
-
headers=
|
|
222
|
+
headers=get_auth_headers(),
|
|
216
223
|
timeout=MODIFY_TIMEOUT,
|
|
217
224
|
)
|
|
218
225
|
storage_integration.raise_for_status()
|
|
226
|
+
logger.info("Deleted storage integration with ID: %s", storage_integration_id)
|
|
219
227
|
|
|
220
228
|
def delete(self) -> None:
|
|
221
229
|
"""
|
|
@@ -236,15 +244,42 @@ class StorageIntegration(BaseModel):
|
|
|
236
244
|
json=self.model_dump(),
|
|
237
245
|
headers={
|
|
238
246
|
**json_headers,
|
|
239
|
-
**
|
|
247
|
+
**get_auth_headers(),
|
|
240
248
|
},
|
|
241
249
|
timeout=MODIFY_TIMEOUT,
|
|
242
250
|
)
|
|
243
251
|
storage_integration.raise_for_status()
|
|
244
252
|
storage_integration_id = storage_integration.json()["id"]
|
|
245
253
|
self.id = storage_integration_id
|
|
254
|
+
logger.info("Created storage integration with ID: %s", storage_integration_id)
|
|
246
255
|
return storage_integration_id
|
|
247
256
|
|
|
257
|
+
@model_validator(mode="after")
|
|
258
|
+
def validate_storage_type(self):
|
|
259
|
+
if self.type != StorageTypes.LOCAL and (
|
|
260
|
+
[self.s3, self.gcp, self.git].count(None) != 2
|
|
261
|
+
):
|
|
262
|
+
raise ValueError("Exactly one of S3, GCP, or Git must be provided")
|
|
263
|
+
if self.type == StorageTypes.S3 and self.s3 is None:
|
|
264
|
+
raise ValueError("S3 storage details must be provided")
|
|
265
|
+
elif self.type == StorageTypes.GCP and self.gcp is None:
|
|
266
|
+
raise ValueError("GCP storage details must be provided")
|
|
267
|
+
elif self.type == StorageTypes.GIT and self.git is None:
|
|
268
|
+
raise ValueError("Git storage details must be provided")
|
|
269
|
+
if not self.type and not any([self.s3, self.gcp, self.git]):
|
|
270
|
+
raise ValueError("Storage type must be provided")
|
|
271
|
+
elif not self.type:
|
|
272
|
+
self.type = (
|
|
273
|
+
StorageTypes.S3
|
|
274
|
+
if self.s3 is not None
|
|
275
|
+
else StorageTypes.GCP
|
|
276
|
+
if self.gcp is not None
|
|
277
|
+
else StorageTypes.GIT
|
|
278
|
+
if self.git is not None
|
|
279
|
+
else StorageTypes.LOCAL
|
|
280
|
+
)
|
|
281
|
+
return self
|
|
282
|
+
|
|
248
283
|
|
|
249
284
|
class StorageLink(BaseModel):
|
|
250
285
|
storage_integration: StorageIntegration
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hirundo
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
|
|
5
5
|
Author-email: Hirundo <dev@hirundo.io>
|
|
6
6
|
License: MIT License
|
|
@@ -32,6 +32,7 @@ Requires-Dist: httpx>=0.27.0
|
|
|
32
32
|
Requires-Dist: stamina>=24.2.0
|
|
33
33
|
Requires-Dist: httpx-sse>=0.4.0
|
|
34
34
|
Requires-Dist: pandas>=2.2.2
|
|
35
|
+
Requires-Dist: tqdm>=4.66.5
|
|
35
36
|
Provides-Extra: dev
|
|
36
37
|
Requires-Dist: pyyaml>=6.0.1; extra == "dev"
|
|
37
38
|
Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
|
|
@@ -7,7 +7,7 @@ packages = ["hirundo"]
|
|
|
7
7
|
|
|
8
8
|
[project]
|
|
9
9
|
name = "hirundo"
|
|
10
|
-
version = "0.1.
|
|
10
|
+
version = "0.1.7"
|
|
11
11
|
description = "This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets."
|
|
12
12
|
authors = [{ name = "Hirundo", email = "dev@hirundo.io" }]
|
|
13
13
|
readme = "README.md"
|
|
@@ -36,6 +36,7 @@ dependencies = [
|
|
|
36
36
|
"stamina>=24.2.0",
|
|
37
37
|
"httpx-sse>=0.4.0",
|
|
38
38
|
"pandas>=2.2.2",
|
|
39
|
+
"tqdm>=4.66.5",
|
|
39
40
|
]
|
|
40
41
|
|
|
41
42
|
[project.scripts]
|
hirundo-0.1.6/hirundo/_env.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
|
|
3
|
-
from dotenv import load_dotenv
|
|
4
|
-
|
|
5
|
-
load_dotenv()
|
|
6
|
-
|
|
7
|
-
API_HOST = os.getenv("API_HOST", "https://api.hirundo.io")
|
|
8
|
-
API_KEY = os.getenv("API_KEY")
|
|
9
|
-
if not API_KEY:
|
|
10
|
-
raise ValueError(
|
|
11
|
-
"API_KEY is not set. Please run `hirundo setup` to set the API key"
|
|
12
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|