hirundo 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hirundo/__init__.py +1 -1
- hirundo/_env.py +7 -4
- hirundo/_headers.py +8 -4
- hirundo/cli.py +5 -1
- hirundo/dataset_optimization.py +174 -35
- hirundo/git.py +6 -6
- hirundo/logger.py +8 -0
- hirundo/storage.py +40 -5
- {hirundo-0.1.5.dist-info → hirundo-0.1.7.dist-info}/METADATA +38 -34
- hirundo-0.1.7.dist-info/RECORD +19 -0
- {hirundo-0.1.5.dist-info → hirundo-0.1.7.dist-info}/WHEEL +1 -1
- hirundo-0.1.5.dist-info/RECORD +0 -18
- {hirundo-0.1.5.dist-info → hirundo-0.1.7.dist-info}/LICENSE +0 -0
- {hirundo-0.1.5.dist-info → hirundo-0.1.7.dist-info}/entry_points.txt +0 -0
- {hirundo-0.1.5.dist-info → hirundo-0.1.7.dist-info}/top_level.txt +0 -0
hirundo/__init__.py
CHANGED
hirundo/_env.py
CHANGED
|
@@ -6,7 +6,10 @@ load_dotenv()
|
|
|
6
6
|
|
|
7
7
|
API_HOST = os.getenv("API_HOST", "https://api.hirundo.io")
|
|
8
8
|
API_KEY = os.getenv("API_KEY")
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def check_api_key():
|
|
12
|
+
if not API_KEY:
|
|
13
|
+
raise ValueError(
|
|
14
|
+
"API_KEY is not set. Please run `hirundo setup` to set the API key"
|
|
15
|
+
)
|
hirundo/_headers.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
|
-
from hirundo._env import API_KEY
|
|
1
|
+
from hirundo._env import API_KEY, check_api_key
|
|
2
2
|
|
|
3
3
|
json_headers = {
|
|
4
4
|
"Content-Type": "application/json",
|
|
5
5
|
"Accept": "application/json",
|
|
6
6
|
}
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_auth_headers():
|
|
10
|
+
check_api_key()
|
|
11
|
+
return {
|
|
12
|
+
"Authorization": f"Bearer {API_KEY}",
|
|
13
|
+
}
|
hirundo/cli.py
CHANGED
|
@@ -14,8 +14,12 @@ hirundo_epilog = (
|
|
|
14
14
|
else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information."
|
|
15
15
|
)
|
|
16
16
|
|
|
17
|
+
|
|
17
18
|
app = typer.Typer(
|
|
18
|
-
name="hirundo",
|
|
19
|
+
name="hirundo",
|
|
20
|
+
no_args_is_help=True,
|
|
21
|
+
rich_markup_mode="rich",
|
|
22
|
+
epilog=hirundo_epilog,
|
|
19
23
|
)
|
|
20
24
|
|
|
21
25
|
|
hirundo/dataset_optimization.py
CHANGED
|
@@ -1,20 +1,26 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import
|
|
2
|
+
import typing
|
|
3
3
|
from collections.abc import AsyncGenerator, Generator
|
|
4
|
-
from
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from typing import Union, overload
|
|
5
7
|
|
|
6
8
|
import httpx
|
|
9
|
+
import pandas as pd
|
|
7
10
|
import requests
|
|
8
11
|
from pydantic import BaseModel, Field, model_validator
|
|
12
|
+
from tqdm import tqdm
|
|
13
|
+
from tqdm.contrib.logging import logging_redirect_tqdm
|
|
9
14
|
|
|
10
15
|
from hirundo._env import API_HOST
|
|
11
|
-
from hirundo._headers import
|
|
16
|
+
from hirundo._headers import get_auth_headers, json_headers
|
|
12
17
|
from hirundo._iter_sse_retrying import aiter_sse_retrying, iter_sse_retrying
|
|
13
18
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
14
19
|
from hirundo.enum import DatasetMetadataType, LabellingType
|
|
20
|
+
from hirundo.logger import get_logger
|
|
15
21
|
from hirundo.storage import StorageIntegration, StorageLink
|
|
16
22
|
|
|
17
|
-
logger =
|
|
23
|
+
logger = get_logger(__name__)
|
|
18
24
|
|
|
19
25
|
|
|
20
26
|
class HirundoError(Exception):
|
|
@@ -28,6 +34,14 @@ class HirundoError(Exception):
|
|
|
28
34
|
MAX_RETRIES = 200 # Max 200 retries for HTTP SSE connection
|
|
29
35
|
|
|
30
36
|
|
|
37
|
+
class RunStatus(Enum):
|
|
38
|
+
STARTED = "STARTED"
|
|
39
|
+
PENDING = "PENDING"
|
|
40
|
+
SUCCESS = "SUCCESS"
|
|
41
|
+
FAILURE = "FAILURE"
|
|
42
|
+
AWAITING_MANUAL_APPROVAL = "AWAITING MANUAL APPROVAL"
|
|
43
|
+
|
|
44
|
+
|
|
31
45
|
class OptimizationDataset(BaseModel):
|
|
32
46
|
name: str
|
|
33
47
|
"""
|
|
@@ -96,7 +110,7 @@ class OptimizationDataset(BaseModel):
|
|
|
96
110
|
response = requests.get(
|
|
97
111
|
f"{API_HOST}/dataset-optimization/dataset/",
|
|
98
112
|
params={"dataset_organization_id": organization_id},
|
|
99
|
-
headers=
|
|
113
|
+
headers=get_auth_headers(),
|
|
100
114
|
timeout=READ_TIMEOUT,
|
|
101
115
|
)
|
|
102
116
|
response.raise_for_status()
|
|
@@ -112,10 +126,11 @@ class OptimizationDataset(BaseModel):
|
|
|
112
126
|
"""
|
|
113
127
|
response = requests.delete(
|
|
114
128
|
f"{API_HOST}/dataset-optimization/dataset/{dataset_id}",
|
|
115
|
-
headers=
|
|
129
|
+
headers=get_auth_headers(),
|
|
116
130
|
timeout=MODIFY_TIMEOUT,
|
|
117
131
|
)
|
|
118
132
|
response.raise_for_status()
|
|
133
|
+
logger.info("Deleted dataset with ID: %s", dataset_id)
|
|
119
134
|
|
|
120
135
|
def delete(self, storage_integration=True) -> None:
|
|
121
136
|
"""
|
|
@@ -165,7 +180,7 @@ class OptimizationDataset(BaseModel):
|
|
|
165
180
|
},
|
|
166
181
|
headers={
|
|
167
182
|
**json_headers,
|
|
168
|
-
**
|
|
183
|
+
**get_auth_headers(),
|
|
169
184
|
},
|
|
170
185
|
timeout=MODIFY_TIMEOUT,
|
|
171
186
|
)
|
|
@@ -173,6 +188,7 @@ class OptimizationDataset(BaseModel):
|
|
|
173
188
|
self.dataset_id = dataset_response.json()["id"]
|
|
174
189
|
if not self.dataset_id:
|
|
175
190
|
raise HirundoError("Failed to create the dataset")
|
|
191
|
+
logger.info("Created dataset with ID: %s", self.dataset_id)
|
|
176
192
|
return self.dataset_id
|
|
177
193
|
|
|
178
194
|
@staticmethod
|
|
@@ -189,7 +205,7 @@ class OptimizationDataset(BaseModel):
|
|
|
189
205
|
"""
|
|
190
206
|
run_response = requests.post(
|
|
191
207
|
f"{API_HOST}/dataset-optimization/run/{dataset_id}",
|
|
192
|
-
headers=
|
|
208
|
+
headers=get_auth_headers(),
|
|
193
209
|
timeout=MODIFY_TIMEOUT,
|
|
194
210
|
)
|
|
195
211
|
run_response.raise_for_status()
|
|
@@ -208,6 +224,7 @@ class OptimizationDataset(BaseModel):
|
|
|
208
224
|
self.dataset_id = self.create()
|
|
209
225
|
run_id = self.launch_optimization_run(self.dataset_id)
|
|
210
226
|
self.run_id = run_id
|
|
227
|
+
logger.info("Started the run with ID: %s", run_id)
|
|
211
228
|
return run_id
|
|
212
229
|
except requests.HTTPError as error:
|
|
213
230
|
try:
|
|
@@ -235,23 +252,38 @@ class OptimizationDataset(BaseModel):
|
|
|
235
252
|
self.run_id = None
|
|
236
253
|
|
|
237
254
|
@staticmethod
|
|
238
|
-
def
|
|
255
|
+
def _clean_df_index(df: "pd.DataFrame") -> "pd.DataFrame":
|
|
239
256
|
"""
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
This generator will produce values to show progress of the run.
|
|
257
|
+
Clean the index of a dataframe in case it has unnamed columns.
|
|
243
258
|
|
|
244
259
|
Args:
|
|
245
|
-
|
|
246
|
-
retry: A number used to track the number of retries to limit re-checks. *Do not* provide this value manually.
|
|
247
|
-
|
|
248
|
-
Yields:
|
|
249
|
-
Each event will be a dict, where:
|
|
250
|
-
- `"state"` is PENDING, STARTED, RETRY, FAILURE or SUCCESS
|
|
251
|
-
- `"result"` is a string describing the progress as a percentage for a PENDING state,
|
|
252
|
-
or the error for a FAILURE state or the results for a SUCCESS state
|
|
260
|
+
df (DataFrame): Dataframe to clean
|
|
253
261
|
|
|
262
|
+
Returns:
|
|
263
|
+
DataFrame: Cleaned dataframe
|
|
254
264
|
"""
|
|
265
|
+
index_cols = sorted(
|
|
266
|
+
[col for col in df.columns if col.startswith("Unnamed")], reverse=True
|
|
267
|
+
)
|
|
268
|
+
if len(index_cols) > 0:
|
|
269
|
+
df.set_index(index_cols.pop(), inplace=True)
|
|
270
|
+
df.rename_axis(index=None, columns=None, inplace=True)
|
|
271
|
+
if len(index_cols) > 0:
|
|
272
|
+
df.drop(columns=index_cols, inplace=True)
|
|
273
|
+
|
|
274
|
+
return df
|
|
275
|
+
|
|
276
|
+
@staticmethod
|
|
277
|
+
def _read_csv_to_df(data: dict):
|
|
278
|
+
if data["state"] == RunStatus.SUCCESS.value:
|
|
279
|
+
data["result"] = OptimizationDataset._clean_df_index(
|
|
280
|
+
pd.read_csv(StringIO(data["result"]))
|
|
281
|
+
)
|
|
282
|
+
else:
|
|
283
|
+
pass
|
|
284
|
+
|
|
285
|
+
@staticmethod
|
|
286
|
+
def _check_run_by_id(run_id: str, retry=0) -> Generator[dict, None, None]:
|
|
255
287
|
if retry > MAX_RETRIES:
|
|
256
288
|
raise HirundoError("Max retries reached")
|
|
257
289
|
last_event = None
|
|
@@ -260,7 +292,7 @@ class OptimizationDataset(BaseModel):
|
|
|
260
292
|
client,
|
|
261
293
|
"GET",
|
|
262
294
|
f"{API_HOST}/dataset-optimization/run/{run_id}",
|
|
263
|
-
headers=
|
|
295
|
+
headers=get_auth_headers(),
|
|
264
296
|
):
|
|
265
297
|
if sse.event == "ping":
|
|
266
298
|
continue
|
|
@@ -272,25 +304,130 @@ class OptimizationDataset(BaseModel):
|
|
|
272
304
|
sse.retry,
|
|
273
305
|
)
|
|
274
306
|
last_event = json.loads(sse.data)
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
307
|
+
if not last_event:
|
|
308
|
+
continue
|
|
309
|
+
data = last_event["data"]
|
|
310
|
+
OptimizationDataset._read_csv_to_df(data)
|
|
311
|
+
yield data
|
|
312
|
+
if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
|
|
313
|
+
OptimizationDataset._check_run_by_id(run_id, retry + 1)
|
|
314
|
+
|
|
315
|
+
@staticmethod
|
|
316
|
+
@overload
|
|
317
|
+
def check_run_by_id(
|
|
318
|
+
run_id: str, stop_on_manual_approval: typing.Literal[True]
|
|
319
|
+
) -> typing.Optional[pd.DataFrame]:
|
|
320
|
+
...
|
|
321
|
+
|
|
322
|
+
@staticmethod
|
|
323
|
+
@overload
|
|
324
|
+
def check_run_by_id(
|
|
325
|
+
run_id: str, stop_on_manual_approval: typing.Literal[False] = False
|
|
326
|
+
) -> pd.DataFrame:
|
|
327
|
+
...
|
|
278
328
|
|
|
279
|
-
|
|
329
|
+
@staticmethod
|
|
330
|
+
@overload
|
|
331
|
+
def check_run_by_id(
|
|
332
|
+
run_id: str, stop_on_manual_approval: bool
|
|
333
|
+
) -> typing.Optional[pd.DataFrame]:
|
|
334
|
+
...
|
|
335
|
+
|
|
336
|
+
@staticmethod
|
|
337
|
+
def check_run_by_id(
|
|
338
|
+
run_id: str, stop_on_manual_approval: bool = False
|
|
339
|
+
) -> typing.Optional[pd.DataFrame]:
|
|
280
340
|
"""
|
|
281
|
-
Check the status of
|
|
341
|
+
Check the status of a run given its ID
|
|
282
342
|
|
|
283
|
-
|
|
343
|
+
Args:
|
|
344
|
+
run_id: The `run_id` produced by a `run_optimization` call
|
|
345
|
+
stop_on_manual_approval: If True, the function will return `None` if the run is awaiting manual approval
|
|
284
346
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
347
|
+
Returns:
|
|
348
|
+
A pandas DataFrame with the results of the optimization run
|
|
349
|
+
|
|
350
|
+
Raises:
|
|
351
|
+
HirundoError: If the maximum number of retries is reached or if the run fails
|
|
352
|
+
"""
|
|
353
|
+
logger.debug("Checking run with ID: %s", run_id)
|
|
354
|
+
with logging_redirect_tqdm():
|
|
355
|
+
t = tqdm(total=100.0)
|
|
356
|
+
for iteration in OptimizationDataset._check_run_by_id(run_id):
|
|
357
|
+
if iteration["state"] == RunStatus.SUCCESS.value:
|
|
358
|
+
t.set_description("Optimization run completed successfully")
|
|
359
|
+
t.n = 100.0
|
|
360
|
+
t.refresh()
|
|
361
|
+
t.close()
|
|
362
|
+
return iteration["result"]
|
|
363
|
+
elif iteration["state"] == RunStatus.PENDING.value:
|
|
364
|
+
t.set_description("Optimization run queued and not yet started")
|
|
365
|
+
t.n = 0.0
|
|
366
|
+
t.refresh()
|
|
367
|
+
elif iteration["state"] == RunStatus.STARTED.value:
|
|
368
|
+
t.set_description(
|
|
369
|
+
"Optimization run in progress. Downloading dataset"
|
|
370
|
+
)
|
|
371
|
+
t.n = 0.0
|
|
372
|
+
t.refresh()
|
|
373
|
+
elif iteration["state"] is None:
|
|
374
|
+
if (
|
|
375
|
+
iteration["result"]
|
|
376
|
+
and isinstance(iteration["result"], dict)
|
|
377
|
+
and iteration["result"]["result"]
|
|
378
|
+
and isinstance(iteration["result"]["result"], str)
|
|
379
|
+
):
|
|
380
|
+
current_progress_percentage = float(
|
|
381
|
+
iteration["result"]["result"].removesuffix("% done")
|
|
382
|
+
)
|
|
383
|
+
desc = (
|
|
384
|
+
"Optimization run completed. Uploading results"
|
|
385
|
+
if current_progress_percentage == 100.0
|
|
386
|
+
else "Optimization run in progress"
|
|
387
|
+
)
|
|
388
|
+
t.set_description(desc)
|
|
389
|
+
t.n = current_progress_percentage
|
|
390
|
+
t.refresh()
|
|
391
|
+
elif iteration["state"] == RunStatus.AWAITING_MANUAL_APPROVAL.value:
|
|
392
|
+
t.set_description("Awaiting manual approval")
|
|
393
|
+
t.n = 100.0
|
|
394
|
+
t.refresh()
|
|
395
|
+
if stop_on_manual_approval:
|
|
396
|
+
t.close()
|
|
397
|
+
return None
|
|
398
|
+
elif iteration["state"] == RunStatus.FAILURE.value:
|
|
399
|
+
t.set_description("Optimization run failed")
|
|
400
|
+
t.close()
|
|
401
|
+
raise HirundoError(
|
|
402
|
+
f"Optimization run failed with error: {iteration['result']}"
|
|
403
|
+
)
|
|
404
|
+
raise HirundoError("Optimization run failed with an unknown error")
|
|
405
|
+
|
|
406
|
+
@overload
|
|
407
|
+
def check_run(
|
|
408
|
+
self, stop_on_manual_approval: typing.Literal[True]
|
|
409
|
+
) -> typing.Union[pd.DataFrame, None]:
|
|
410
|
+
...
|
|
411
|
+
|
|
412
|
+
@overload
|
|
413
|
+
def check_run(
|
|
414
|
+
self, stop_on_manual_approval: typing.Literal[False] = False
|
|
415
|
+
) -> pd.DataFrame:
|
|
416
|
+
...
|
|
417
|
+
|
|
418
|
+
def check_run(
|
|
419
|
+
self, stop_on_manual_approval: bool = False
|
|
420
|
+
) -> typing.Union[pd.DataFrame, None]:
|
|
421
|
+
"""
|
|
422
|
+
Check the status of the current active instance's run.
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
A pandas DataFrame with the results of the optimization run
|
|
289
426
|
|
|
290
427
|
"""
|
|
291
428
|
if not self.run_id:
|
|
292
429
|
raise ValueError("No run has been started")
|
|
293
|
-
return self.check_run_by_id(self.run_id)
|
|
430
|
+
return self.check_run_by_id(self.run_id, stop_on_manual_approval)
|
|
294
431
|
|
|
295
432
|
@staticmethod
|
|
296
433
|
async def acheck_run_by_id(run_id: str, retry=0) -> AsyncGenerator[dict, None]:
|
|
@@ -311,6 +448,7 @@ class OptimizationDataset(BaseModel):
|
|
|
311
448
|
- `"result"` is a string describing the progress as a percentage for a PENDING state, or the error for a FAILURE state or the results for a SUCCESS state
|
|
312
449
|
|
|
313
450
|
"""
|
|
451
|
+
logger.debug("Checking run with ID: %s", run_id)
|
|
314
452
|
if retry > MAX_RETRIES:
|
|
315
453
|
raise HirundoError("Max retries reached")
|
|
316
454
|
last_event = None
|
|
@@ -321,7 +459,7 @@ class OptimizationDataset(BaseModel):
|
|
|
321
459
|
client,
|
|
322
460
|
"GET",
|
|
323
461
|
f"{API_HOST}/dataset-optimization/run/{run_id}",
|
|
324
|
-
headers=
|
|
462
|
+
headers=get_auth_headers(),
|
|
325
463
|
)
|
|
326
464
|
async for sse in async_iterator:
|
|
327
465
|
if sse.event == "ping":
|
|
@@ -335,7 +473,7 @@ class OptimizationDataset(BaseModel):
|
|
|
335
473
|
)
|
|
336
474
|
last_event = json.loads(sse.data)
|
|
337
475
|
yield last_event["data"]
|
|
338
|
-
if not last_event or last_event["data"]["state"] ==
|
|
476
|
+
if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
|
|
339
477
|
OptimizationDataset.acheck_run_by_id(run_id, retry + 1)
|
|
340
478
|
|
|
341
479
|
async def acheck_run(self) -> AsyncGenerator[dict, None]:
|
|
@@ -367,9 +505,10 @@ class OptimizationDataset(BaseModel):
|
|
|
367
505
|
"""
|
|
368
506
|
if not run_id:
|
|
369
507
|
raise ValueError("No run has been started")
|
|
508
|
+
logger.info("Cancelling run with ID: %s", run_id)
|
|
370
509
|
response = requests.delete(
|
|
371
510
|
f"{API_HOST}/dataset-optimization/run/{run_id}",
|
|
372
|
-
headers=
|
|
511
|
+
headers=get_auth_headers(),
|
|
373
512
|
timeout=MODIFY_TIMEOUT,
|
|
374
513
|
)
|
|
375
514
|
response.raise_for_status()
|
hirundo/git.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import re
|
|
3
2
|
from typing import Annotated, Union
|
|
4
3
|
|
|
@@ -8,10 +7,11 @@ from pydantic import BaseModel, field_validator
|
|
|
8
7
|
from pydantic_core import Url
|
|
9
8
|
|
|
10
9
|
from hirundo._env import API_HOST
|
|
11
|
-
from hirundo._headers import
|
|
10
|
+
from hirundo._headers import get_auth_headers, json_headers
|
|
12
11
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
12
|
+
from hirundo.logger import get_logger
|
|
13
13
|
|
|
14
|
-
logger =
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class GitPlainAuthBase(BaseModel):
|
|
@@ -108,7 +108,7 @@ class GitRepo(BaseModel):
|
|
|
108
108
|
json=self.model_dump(),
|
|
109
109
|
headers={
|
|
110
110
|
**json_headers,
|
|
111
|
-
**
|
|
111
|
+
**get_auth_headers(),
|
|
112
112
|
},
|
|
113
113
|
timeout=MODIFY_TIMEOUT,
|
|
114
114
|
)
|
|
@@ -125,7 +125,7 @@ class GitRepo(BaseModel):
|
|
|
125
125
|
git_repos = requests.get(
|
|
126
126
|
f"{API_HOST}/git-repo/",
|
|
127
127
|
headers={
|
|
128
|
-
**
|
|
128
|
+
**get_auth_headers(),
|
|
129
129
|
},
|
|
130
130
|
timeout=READ_TIMEOUT,
|
|
131
131
|
)
|
|
@@ -143,7 +143,7 @@ class GitRepo(BaseModel):
|
|
|
143
143
|
git_repo = requests.delete(
|
|
144
144
|
f"{API_HOST}/git-repo/{git_repo_id}",
|
|
145
145
|
headers={
|
|
146
|
-
**
|
|
146
|
+
**get_auth_headers(),
|
|
147
147
|
},
|
|
148
148
|
timeout=MODIFY_TIMEOUT,
|
|
149
149
|
)
|
hirundo/logger.py
ADDED
hirundo/storage.py
CHANGED
|
@@ -9,9 +9,12 @@ from pydantic_core import Url
|
|
|
9
9
|
|
|
10
10
|
from hirundo._constraints import S3BucketUrl, StorageIntegrationName
|
|
11
11
|
from hirundo._env import API_HOST
|
|
12
|
-
from hirundo._headers import
|
|
12
|
+
from hirundo._headers import get_auth_headers, json_headers
|
|
13
13
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
14
14
|
from hirundo.git import GitRepo
|
|
15
|
+
from hirundo.logger import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
15
18
|
|
|
16
19
|
|
|
17
20
|
class StorageS3(BaseModel):
|
|
@@ -69,6 +72,10 @@ class StorageTypes(str, Enum):
|
|
|
69
72
|
GCP = "GCP"
|
|
70
73
|
# AZURE = "Azure" TODO: Azure storage integration is coming soon
|
|
71
74
|
GIT = "Git"
|
|
75
|
+
LOCAL = "Local"
|
|
76
|
+
"""
|
|
77
|
+
Local storage integration is only supported for on-premises installations.
|
|
78
|
+
"""
|
|
72
79
|
|
|
73
80
|
|
|
74
81
|
class StorageIntegration(BaseModel):
|
|
@@ -84,7 +91,7 @@ class StorageIntegration(BaseModel):
|
|
|
84
91
|
"""
|
|
85
92
|
A name to identify the `StorageIntegration` in the Hirundo system.
|
|
86
93
|
"""
|
|
87
|
-
type: StorageTypes = pydantic.Field(
|
|
94
|
+
type: typing.Optional[StorageTypes] = pydantic.Field(
|
|
88
95
|
examples=[
|
|
89
96
|
StorageTypes.S3,
|
|
90
97
|
StorageTypes.GCP,
|
|
@@ -196,7 +203,7 @@ class StorageIntegration(BaseModel):
|
|
|
196
203
|
storage_integrations = requests.get(
|
|
197
204
|
f"{API_HOST}/storage-integration/",
|
|
198
205
|
params={"storage_integration_organization_id": organization_id},
|
|
199
|
-
headers=
|
|
206
|
+
headers=get_auth_headers(),
|
|
200
207
|
timeout=READ_TIMEOUT,
|
|
201
208
|
)
|
|
202
209
|
storage_integrations.raise_for_status()
|
|
@@ -212,10 +219,11 @@ class StorageIntegration(BaseModel):
|
|
|
212
219
|
"""
|
|
213
220
|
storage_integration = requests.delete(
|
|
214
221
|
f"{API_HOST}/storage-integration/{storage_integration_id}",
|
|
215
|
-
headers=
|
|
222
|
+
headers=get_auth_headers(),
|
|
216
223
|
timeout=MODIFY_TIMEOUT,
|
|
217
224
|
)
|
|
218
225
|
storage_integration.raise_for_status()
|
|
226
|
+
logger.info("Deleted storage integration with ID: %s", storage_integration_id)
|
|
219
227
|
|
|
220
228
|
def delete(self) -> None:
|
|
221
229
|
"""
|
|
@@ -236,15 +244,42 @@ class StorageIntegration(BaseModel):
|
|
|
236
244
|
json=self.model_dump(),
|
|
237
245
|
headers={
|
|
238
246
|
**json_headers,
|
|
239
|
-
**
|
|
247
|
+
**get_auth_headers(),
|
|
240
248
|
},
|
|
241
249
|
timeout=MODIFY_TIMEOUT,
|
|
242
250
|
)
|
|
243
251
|
storage_integration.raise_for_status()
|
|
244
252
|
storage_integration_id = storage_integration.json()["id"]
|
|
245
253
|
self.id = storage_integration_id
|
|
254
|
+
logger.info("Created storage integration with ID: %s", storage_integration_id)
|
|
246
255
|
return storage_integration_id
|
|
247
256
|
|
|
257
|
+
@model_validator(mode="after")
|
|
258
|
+
def validate_storage_type(self):
|
|
259
|
+
if self.type != StorageTypes.LOCAL and (
|
|
260
|
+
[self.s3, self.gcp, self.git].count(None) != 2
|
|
261
|
+
):
|
|
262
|
+
raise ValueError("Exactly one of S3, GCP, or Git must be provided")
|
|
263
|
+
if self.type == StorageTypes.S3 and self.s3 is None:
|
|
264
|
+
raise ValueError("S3 storage details must be provided")
|
|
265
|
+
elif self.type == StorageTypes.GCP and self.gcp is None:
|
|
266
|
+
raise ValueError("GCP storage details must be provided")
|
|
267
|
+
elif self.type == StorageTypes.GIT and self.git is None:
|
|
268
|
+
raise ValueError("Git storage details must be provided")
|
|
269
|
+
if not self.type and not any([self.s3, self.gcp, self.git]):
|
|
270
|
+
raise ValueError("Storage type must be provided")
|
|
271
|
+
elif not self.type:
|
|
272
|
+
self.type = (
|
|
273
|
+
StorageTypes.S3
|
|
274
|
+
if self.s3 is not None
|
|
275
|
+
else StorageTypes.GCP
|
|
276
|
+
if self.gcp is not None
|
|
277
|
+
else StorageTypes.GIT
|
|
278
|
+
if self.git is not None
|
|
279
|
+
else StorageTypes.LOCAL
|
|
280
|
+
)
|
|
281
|
+
return self
|
|
282
|
+
|
|
248
283
|
|
|
249
284
|
class StorageLink(BaseModel):
|
|
250
285
|
storage_integration: StorageIntegration
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: hirundo
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
|
|
5
5
|
Author-email: Hirundo <dev@hirundo.io>
|
|
6
6
|
License: MIT License
|
|
@@ -21,41 +21,43 @@ Classifier: Programming Language :: Python :: 3
|
|
|
21
21
|
Requires-Python: >=3.9
|
|
22
22
|
Description-Content-Type: text/markdown
|
|
23
23
|
License-File: LICENSE
|
|
24
|
-
Requires-Dist: pyyaml
|
|
25
|
-
Requires-Dist: types-PyYAML
|
|
26
|
-
Requires-Dist: pydantic
|
|
27
|
-
Requires-Dist: twine
|
|
28
|
-
Requires-Dist: python-dotenv
|
|
29
|
-
Requires-Dist: types-requests
|
|
30
|
-
Requires-Dist: typer
|
|
31
|
-
Requires-Dist: httpx
|
|
32
|
-
Requires-Dist: stamina
|
|
33
|
-
Requires-Dist: httpx-sse
|
|
24
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
25
|
+
Requires-Dist: types-PyYAML>=6.0.12
|
|
26
|
+
Requires-Dist: pydantic>=2.7.1
|
|
27
|
+
Requires-Dist: twine>=5.0.0
|
|
28
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
29
|
+
Requires-Dist: types-requests>=2.31.0
|
|
30
|
+
Requires-Dist: typer>=0.12.3
|
|
31
|
+
Requires-Dist: httpx>=0.27.0
|
|
32
|
+
Requires-Dist: stamina>=24.2.0
|
|
33
|
+
Requires-Dist: httpx-sse>=0.4.0
|
|
34
|
+
Requires-Dist: pandas>=2.2.2
|
|
35
|
+
Requires-Dist: tqdm>=4.66.5
|
|
34
36
|
Provides-Extra: dev
|
|
35
|
-
Requires-Dist: pyyaml
|
|
36
|
-
Requires-Dist: types-PyYAML
|
|
37
|
-
Requires-Dist: pydantic
|
|
38
|
-
Requires-Dist: twine
|
|
39
|
-
Requires-Dist: python-dotenv
|
|
40
|
-
Requires-Dist: types-requests
|
|
41
|
-
Requires-Dist: types-setuptools
|
|
42
|
-
Requires-Dist: typer
|
|
43
|
-
Requires-Dist: httpx
|
|
44
|
-
Requires-Dist: stamina
|
|
45
|
-
Requires-Dist: httpx-sse
|
|
46
|
-
Requires-Dist: pytest
|
|
47
|
-
Requires-Dist: pytest-asyncio
|
|
48
|
-
Requires-Dist: uv
|
|
49
|
-
Requires-Dist: pre-commit
|
|
50
|
-
Requires-Dist: ruff
|
|
51
|
-
Requires-Dist: bumpver
|
|
37
|
+
Requires-Dist: pyyaml>=6.0.1; extra == "dev"
|
|
38
|
+
Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
|
|
39
|
+
Requires-Dist: pydantic>=2.7.1; extra == "dev"
|
|
40
|
+
Requires-Dist: twine>=5.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: python-dotenv>=1.0.1; extra == "dev"
|
|
42
|
+
Requires-Dist: types-requests>=2.31.0; extra == "dev"
|
|
43
|
+
Requires-Dist: types-setuptools>=69.5.0; extra == "dev"
|
|
44
|
+
Requires-Dist: typer>=0.12.3; extra == "dev"
|
|
45
|
+
Requires-Dist: httpx>=0.27.0; extra == "dev"
|
|
46
|
+
Requires-Dist: stamina>=24.2.0; extra == "dev"
|
|
47
|
+
Requires-Dist: httpx-sse>=0.4.0; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest>=8.2.0; extra == "dev"
|
|
49
|
+
Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
|
|
50
|
+
Requires-Dist: uv; extra == "dev"
|
|
51
|
+
Requires-Dist: pre-commit>=3.7.1; extra == "dev"
|
|
52
|
+
Requires-Dist: ruff; extra == "dev"
|
|
53
|
+
Requires-Dist: bumpver; extra == "dev"
|
|
52
54
|
Provides-Extra: docs
|
|
53
|
-
Requires-Dist: sphinx
|
|
54
|
-
Requires-Dist: sphinx-autobuild
|
|
55
|
-
Requires-Dist: sphinx-click
|
|
56
|
-
Requires-Dist: autodoc-pydantic
|
|
57
|
-
Requires-Dist: furo
|
|
58
|
-
Requires-Dist: sphinx-multiversion
|
|
55
|
+
Requires-Dist: sphinx>=7.4.7; extra == "docs"
|
|
56
|
+
Requires-Dist: sphinx-autobuild>=2024.4.16; extra == "docs"
|
|
57
|
+
Requires-Dist: sphinx-click>=5.0.1; extra == "docs"
|
|
58
|
+
Requires-Dist: autodoc-pydantic>=2.2.0; extra == "docs"
|
|
59
|
+
Requires-Dist: furo; extra == "docs"
|
|
60
|
+
Requires-Dist: sphinx-multiversion; extra == "docs"
|
|
59
61
|
|
|
60
62
|
# Hirundo client
|
|
61
63
|
|
|
@@ -65,6 +67,8 @@ This repo contains the source code for the Hirundo client library
|
|
|
65
67
|
|
|
66
68
|
To learn about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
|
|
67
69
|
|
|
70
|
+
Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyPy support may be introduced in the future.
|
|
71
|
+
|
|
68
72
|
## Development:
|
|
69
73
|
|
|
70
74
|
### Install dev dependencies
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
hirundo/__init__.py,sha256=K0TcpPS937MazldFEgHnWLIKKmf-nt2k6Vjef58aKNs,707
|
|
2
|
+
hirundo/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
|
|
3
|
+
hirundo/_constraints.py,sha256=-RAUV9GnCsaT9pLGSqYglKOeK0joPBBexGTo87j5nkI,425
|
|
4
|
+
hirundo/_env.py,sha256=6RVEn80KelYa-v2Tc18KTQlAZx0cYv3Y1HFS3H7gDt0,307
|
|
5
|
+
hirundo/_headers.py,sha256=ggTyBwVT3nGyPidCcmYMX6pv0idzMxCI2S1BJQE-Bbs,253
|
|
6
|
+
hirundo/_iter_sse_retrying.py,sha256=WLp_lw8ycBuAxoJkkGBu4y74Ajhcu11r1X-vd5_571A,3352
|
|
7
|
+
hirundo/_timeouts.py,sha256=IfX8-mrLp809-A_xSLv1DhIqZnO-Qvy4FcTtOtvqLog,42
|
|
8
|
+
hirundo/cli.py,sha256=pNRaeH__3qvdfcAWhqnGyZDE4uqtqW8BQ6fy4mD7aM4,3936
|
|
9
|
+
hirundo/dataset_optimization.py,sha256=qevme8Gavuk6dBvR-Q5KffMzt9--hTfxzoUnSx4tqFI,19594
|
|
10
|
+
hirundo/enum.py,sha256=-3w09g-_yRYIMiM8VA_Nb07WoQXf5IjyERTGonzNDs0,457
|
|
11
|
+
hirundo/git.py,sha256=-Z_uFHsFfVR-_XqEq85wiiGZtgqIV81PWoC1B6UBSww,4769
|
|
12
|
+
hirundo/logger.py,sha256=e_Kn6dic8DCqjQnDw60z25xnfofOypNoUdlnunySARs,198
|
|
13
|
+
hirundo/storage.py,sha256=Dfi-LEjfrZJZM9mWE_CgQ4upP2lrN6-3HHGd-0znfqE,9737
|
|
14
|
+
hirundo-0.1.7.dist-info/LICENSE,sha256=fusGGjqT2RGlU6kbkaOk7d-gDnsjk17wq67AO0mwBZI,1065
|
|
15
|
+
hirundo-0.1.7.dist-info/METADATA,sha256=EKswSiESzpSqV9qL9o5uqJNsCUgK7UduvAr_mlXO9hg,4558
|
|
16
|
+
hirundo-0.1.7.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
|
|
17
|
+
hirundo-0.1.7.dist-info/entry_points.txt,sha256=4ZtnA_Nl1Af8fLnHp3lwjbGDEGU1S6ujb_JwtuQ7ZPM,44
|
|
18
|
+
hirundo-0.1.7.dist-info/top_level.txt,sha256=cmyNqrNZOAYxnywJGFI1AJBLe4SkH8HGsfFx6ncdrbI,8
|
|
19
|
+
hirundo-0.1.7.dist-info/RECORD,,
|
hirundo-0.1.5.dist-info/RECORD
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
hirundo/__init__.py,sha256=pYe3c1CYHXebdHb7kWrK37-ZZa2yMCfXbq45lRIAIrI,707
|
|
2
|
-
hirundo/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
|
|
3
|
-
hirundo/_constraints.py,sha256=-RAUV9GnCsaT9pLGSqYglKOeK0joPBBexGTo87j5nkI,425
|
|
4
|
-
hirundo/_env.py,sha256=aObkRVLo9NBZiByd2FcoLrk3m8tnswuYzP4Tnj3EE-o,268
|
|
5
|
-
hirundo/_headers.py,sha256=htxHRjtD91C5D0svyk-zqhKV9LwQCEZauIa4ZTAfe5k,188
|
|
6
|
-
hirundo/_iter_sse_retrying.py,sha256=WLp_lw8ycBuAxoJkkGBu4y74Ajhcu11r1X-vd5_571A,3352
|
|
7
|
-
hirundo/_timeouts.py,sha256=IfX8-mrLp809-A_xSLv1DhIqZnO-Qvy4FcTtOtvqLog,42
|
|
8
|
-
hirundo/cli.py,sha256=qj1Txt6lOU3V10SLtzH4uEWJ4DdkdOIEQaKn8wiJMss,3922
|
|
9
|
-
hirundo/dataset_optimization.py,sha256=CLo9eclW_trDwzfr6uZlJ8JQb6XpWcKtACqSTaAF_fo,14583
|
|
10
|
-
hirundo/enum.py,sha256=-3w09g-_yRYIMiM8VA_Nb07WoQXf5IjyERTGonzNDs0,457
|
|
11
|
-
hirundo/git.py,sha256=GtowxPL78KleVhSY3QISu7-cUPrFbWC4YWBAuzuzryw,4731
|
|
12
|
-
hirundo/storage.py,sha256=CxRdSnZGf4mtzNV2Ge_hwowd9pDP7NT9-xvWTbl187M,8185
|
|
13
|
-
hirundo-0.1.5.dist-info/LICENSE,sha256=fusGGjqT2RGlU6kbkaOk7d-gDnsjk17wq67AO0mwBZI,1065
|
|
14
|
-
hirundo-0.1.5.dist-info/METADATA,sha256=a94x33-X2G3GrEyusmNTSmx_aerNiTmhZoUL6XAE_9g,4428
|
|
15
|
-
hirundo-0.1.5.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
16
|
-
hirundo-0.1.5.dist-info/entry_points.txt,sha256=4ZtnA_Nl1Af8fLnHp3lwjbGDEGU1S6ujb_JwtuQ7ZPM,44
|
|
17
|
-
hirundo-0.1.5.dist-info/top_level.txt,sha256=cmyNqrNZOAYxnywJGFI1AJBLe4SkH8HGsfFx6ncdrbI,8
|
|
18
|
-
hirundo-0.1.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|