hirundo 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {hirundo-0.1.5 → hirundo-0.1.7}/PKG-INFO +5 -1
  2. {hirundo-0.1.5 → hirundo-0.1.7}/README.md +2 -0
  3. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo/__init__.py +1 -1
  4. hirundo-0.1.7/hirundo/_env.py +15 -0
  5. hirundo-0.1.7/hirundo/_headers.py +13 -0
  6. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo/cli.py +5 -1
  7. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo/dataset_optimization.py +174 -35
  8. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo/git.py +6 -6
  9. hirundo-0.1.7/hirundo/logger.py +8 -0
  10. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo/storage.py +40 -5
  11. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo.egg-info/PKG-INFO +5 -1
  12. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo.egg-info/SOURCES.txt +1 -0
  13. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo.egg-info/requires.txt +2 -0
  14. {hirundo-0.1.5 → hirundo-0.1.7}/pyproject.toml +3 -1
  15. hirundo-0.1.5/hirundo/_env.py +0 -12
  16. hirundo-0.1.5/hirundo/_headers.py +0 -9
  17. {hirundo-0.1.5 → hirundo-0.1.7}/LICENSE +0 -0
  18. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo/__main__.py +0 -0
  19. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo/_constraints.py +0 -0
  20. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo/_iter_sse_retrying.py +0 -0
  21. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo/_timeouts.py +0 -0
  22. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo/enum.py +0 -0
  23. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo.egg-info/dependency_links.txt +0 -0
  24. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo.egg-info/entry_points.txt +0 -0
  25. {hirundo-0.1.5 → hirundo-0.1.7}/hirundo.egg-info/top_level.txt +0 -0
  26. {hirundo-0.1.5 → hirundo-0.1.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hirundo
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
5
5
  Author-email: Hirundo <dev@hirundo.io>
6
6
  License: MIT License
@@ -31,6 +31,8 @@ Requires-Dist: typer>=0.12.3
31
31
  Requires-Dist: httpx>=0.27.0
32
32
  Requires-Dist: stamina>=24.2.0
33
33
  Requires-Dist: httpx-sse>=0.4.0
34
+ Requires-Dist: pandas>=2.2.2
35
+ Requires-Dist: tqdm>=4.66.5
34
36
  Provides-Extra: dev
35
37
  Requires-Dist: pyyaml>=6.0.1; extra == "dev"
36
38
  Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
@@ -65,6 +67,8 @@ This repo contains the source code for the Hirundo client library
65
67
 
66
68
  To learn about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
67
69
 
70
+ Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyPy support may be introduced in the future.
71
+
68
72
  ## Development:
69
73
 
70
74
  ### Install dev dependencies
@@ -6,6 +6,8 @@ This repo contains the source code for the Hirundo client library
6
6
 
7
7
  To learn about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
8
8
 
9
+ Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyPy support may be introduced in the future.
10
+
9
11
  ## Development:
10
12
 
11
13
  ### Install dev dependencies
@@ -32,4 +32,4 @@ __all__ = [
32
32
  "StorageIntegration",
33
33
  ]
34
34
 
35
- __version__ = "0.1.5"
35
+ __version__ = "0.1.7"
@@ -0,0 +1,15 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ API_HOST = os.getenv("API_HOST", "https://api.hirundo.io")
8
+ API_KEY = os.getenv("API_KEY")
9
+
10
+
11
+ def check_api_key():
12
+ if not API_KEY:
13
+ raise ValueError(
14
+ "API_KEY is not set. Please run `hirundo setup` to set the API key"
15
+ )
@@ -0,0 +1,13 @@
1
+ from hirundo._env import API_KEY, check_api_key
2
+
3
+ json_headers = {
4
+ "Content-Type": "application/json",
5
+ "Accept": "application/json",
6
+ }
7
+
8
+
9
+ def get_auth_headers():
10
+ check_api_key()
11
+ return {
12
+ "Authorization": f"Bearer {API_KEY}",
13
+ }
@@ -14,8 +14,12 @@ hirundo_epilog = (
14
14
  else "Made with ❤️ by Hirundo. Visit https://www.hirundo.io for more information."
15
15
  )
16
16
 
17
+
17
18
  app = typer.Typer(
18
- name="hirundo", no_args_is_help=True, rich_markup_mode="rich", epilog=hirundo_epilog
19
+ name="hirundo",
20
+ no_args_is_help=True,
21
+ rich_markup_mode="rich",
22
+ epilog=hirundo_epilog,
19
23
  )
20
24
 
21
25
 
@@ -1,20 +1,26 @@
1
1
  import json
2
- import logging
2
+ import typing
3
3
  from collections.abc import AsyncGenerator, Generator
4
- from typing import Union
4
+ from enum import Enum
5
+ from io import StringIO
6
+ from typing import Union, overload
5
7
 
6
8
  import httpx
9
+ import pandas as pd
7
10
  import requests
8
11
  from pydantic import BaseModel, Field, model_validator
12
+ from tqdm import tqdm
13
+ from tqdm.contrib.logging import logging_redirect_tqdm
9
14
 
10
15
  from hirundo._env import API_HOST
11
- from hirundo._headers import auth_headers, json_headers
16
+ from hirundo._headers import get_auth_headers, json_headers
12
17
  from hirundo._iter_sse_retrying import aiter_sse_retrying, iter_sse_retrying
13
18
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
14
19
  from hirundo.enum import DatasetMetadataType, LabellingType
20
+ from hirundo.logger import get_logger
15
21
  from hirundo.storage import StorageIntegration, StorageLink
16
22
 
17
- logger = logging.getLogger(__name__)
23
+ logger = get_logger(__name__)
18
24
 
19
25
 
20
26
  class HirundoError(Exception):
@@ -28,6 +34,14 @@ class HirundoError(Exception):
28
34
  MAX_RETRIES = 200 # Max 200 retries for HTTP SSE connection
29
35
 
30
36
 
37
+ class RunStatus(Enum):
38
+ STARTED = "STARTED"
39
+ PENDING = "PENDING"
40
+ SUCCESS = "SUCCESS"
41
+ FAILURE = "FAILURE"
42
+ AWAITING_MANUAL_APPROVAL = "AWAITING MANUAL APPROVAL"
43
+
44
+
31
45
  class OptimizationDataset(BaseModel):
32
46
  name: str
33
47
  """
@@ -96,7 +110,7 @@ class OptimizationDataset(BaseModel):
96
110
  response = requests.get(
97
111
  f"{API_HOST}/dataset-optimization/dataset/",
98
112
  params={"dataset_organization_id": organization_id},
99
- headers=auth_headers,
113
+ headers=get_auth_headers(),
100
114
  timeout=READ_TIMEOUT,
101
115
  )
102
116
  response.raise_for_status()
@@ -112,10 +126,11 @@ class OptimizationDataset(BaseModel):
112
126
  """
113
127
  response = requests.delete(
114
128
  f"{API_HOST}/dataset-optimization/dataset/{dataset_id}",
115
- headers=auth_headers,
129
+ headers=get_auth_headers(),
116
130
  timeout=MODIFY_TIMEOUT,
117
131
  )
118
132
  response.raise_for_status()
133
+ logger.info("Deleted dataset with ID: %s", dataset_id)
119
134
 
120
135
  def delete(self, storage_integration=True) -> None:
121
136
  """
@@ -165,7 +180,7 @@ class OptimizationDataset(BaseModel):
165
180
  },
166
181
  headers={
167
182
  **json_headers,
168
- **auth_headers,
183
+ **get_auth_headers(),
169
184
  },
170
185
  timeout=MODIFY_TIMEOUT,
171
186
  )
@@ -173,6 +188,7 @@ class OptimizationDataset(BaseModel):
173
188
  self.dataset_id = dataset_response.json()["id"]
174
189
  if not self.dataset_id:
175
190
  raise HirundoError("Failed to create the dataset")
191
+ logger.info("Created dataset with ID: %s", self.dataset_id)
176
192
  return self.dataset_id
177
193
 
178
194
  @staticmethod
@@ -189,7 +205,7 @@ class OptimizationDataset(BaseModel):
189
205
  """
190
206
  run_response = requests.post(
191
207
  f"{API_HOST}/dataset-optimization/run/{dataset_id}",
192
- headers=auth_headers,
208
+ headers=get_auth_headers(),
193
209
  timeout=MODIFY_TIMEOUT,
194
210
  )
195
211
  run_response.raise_for_status()
@@ -208,6 +224,7 @@ class OptimizationDataset(BaseModel):
208
224
  self.dataset_id = self.create()
209
225
  run_id = self.launch_optimization_run(self.dataset_id)
210
226
  self.run_id = run_id
227
+ logger.info("Started the run with ID: %s", run_id)
211
228
  return run_id
212
229
  except requests.HTTPError as error:
213
230
  try:
@@ -235,23 +252,38 @@ class OptimizationDataset(BaseModel):
235
252
  self.run_id = None
236
253
 
237
254
  @staticmethod
238
- def check_run_by_id(run_id: str, retry=0) -> Generator[dict, None, None]:
255
+ def _clean_df_index(df: "pd.DataFrame") -> "pd.DataFrame":
239
256
  """
240
- Check the status of a run given its ID
241
-
242
- This generator will produce values to show progress of the run.
257
+ Clean the index of a dataframe in case it has unnamed columns.
243
258
 
244
259
  Args:
245
- run_id: The `run_id` produced by a `run_optimization` call
246
- retry: A number used to track the number of retries to limit re-checks. *Do not* provide this value manually.
247
-
248
- Yields:
249
- Each event will be a dict, where:
250
- - `"state"` is PENDING, STARTED, RETRY, FAILURE or SUCCESS
251
- - `"result"` is a string describing the progress as a percentage for a PENDING state,
252
- or the error for a FAILURE state or the results for a SUCCESS state
260
+ df (DataFrame): Dataframe to clean
253
261
 
262
+ Returns:
263
+ DataFrame: Cleaned dataframe
254
264
  """
265
+ index_cols = sorted(
266
+ [col for col in df.columns if col.startswith("Unnamed")], reverse=True
267
+ )
268
+ if len(index_cols) > 0:
269
+ df.set_index(index_cols.pop(), inplace=True)
270
+ df.rename_axis(index=None, columns=None, inplace=True)
271
+ if len(index_cols) > 0:
272
+ df.drop(columns=index_cols, inplace=True)
273
+
274
+ return df
275
+
276
+ @staticmethod
277
+ def _read_csv_to_df(data: dict):
278
+ if data["state"] == RunStatus.SUCCESS.value:
279
+ data["result"] = OptimizationDataset._clean_df_index(
280
+ pd.read_csv(StringIO(data["result"]))
281
+ )
282
+ else:
283
+ pass
284
+
285
+ @staticmethod
286
+ def _check_run_by_id(run_id: str, retry=0) -> Generator[dict, None, None]:
255
287
  if retry > MAX_RETRIES:
256
288
  raise HirundoError("Max retries reached")
257
289
  last_event = None
@@ -260,7 +292,7 @@ class OptimizationDataset(BaseModel):
260
292
  client,
261
293
  "GET",
262
294
  f"{API_HOST}/dataset-optimization/run/{run_id}",
263
- headers=auth_headers,
295
+ headers=get_auth_headers(),
264
296
  ):
265
297
  if sse.event == "ping":
266
298
  continue
@@ -272,25 +304,130 @@ class OptimizationDataset(BaseModel):
272
304
  sse.retry,
273
305
  )
274
306
  last_event = json.loads(sse.data)
275
- yield last_event["data"]
276
- if not last_event or last_event["data"]["state"] == "PENDING":
277
- OptimizationDataset.check_run_by_id(run_id, retry + 1)
307
+ if not last_event:
308
+ continue
309
+ data = last_event["data"]
310
+ OptimizationDataset._read_csv_to_df(data)
311
+ yield data
312
+ if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
313
+ OptimizationDataset._check_run_by_id(run_id, retry + 1)
314
+
315
+ @staticmethod
316
+ @overload
317
+ def check_run_by_id(
318
+ run_id: str, stop_on_manual_approval: typing.Literal[True]
319
+ ) -> typing.Optional[pd.DataFrame]:
320
+ ...
321
+
322
+ @staticmethod
323
+ @overload
324
+ def check_run_by_id(
325
+ run_id: str, stop_on_manual_approval: typing.Literal[False] = False
326
+ ) -> pd.DataFrame:
327
+ ...
278
328
 
279
- def check_run(self) -> Generator[dict, None, None]:
329
+ @staticmethod
330
+ @overload
331
+ def check_run_by_id(
332
+ run_id: str, stop_on_manual_approval: bool
333
+ ) -> typing.Optional[pd.DataFrame]:
334
+ ...
335
+
336
+ @staticmethod
337
+ def check_run_by_id(
338
+ run_id: str, stop_on_manual_approval: bool = False
339
+ ) -> typing.Optional[pd.DataFrame]:
280
340
  """
281
- Check the status of the current active instance's run.
341
+ Check the status of a run given its ID
282
342
 
283
- This generator will produce values to show progress of the run.
343
+ Args:
344
+ run_id: The `run_id` produced by a `run_optimization` call
345
+ stop_on_manual_approval: If True, the function will return `None` if the run is awaiting manual approval
284
346
 
285
- Yields:
286
- Each event will be a dict, where:
287
- - `"state"` is PENDING, STARTED, RETRY, FAILURE or SUCCESS
288
- - `"result"` is a string describing the progress as a percentage for a PENDING state, or the error for a FAILURE state or the results for a SUCCESS state
347
+ Returns:
348
+ A pandas DataFrame with the results of the optimization run
349
+
350
+ Raises:
351
+ HirundoError: If the maximum number of retries is reached or if the run fails
352
+ """
353
+ logger.debug("Checking run with ID: %s", run_id)
354
+ with logging_redirect_tqdm():
355
+ t = tqdm(total=100.0)
356
+ for iteration in OptimizationDataset._check_run_by_id(run_id):
357
+ if iteration["state"] == RunStatus.SUCCESS.value:
358
+ t.set_description("Optimization run completed successfully")
359
+ t.n = 100.0
360
+ t.refresh()
361
+ t.close()
362
+ return iteration["result"]
363
+ elif iteration["state"] == RunStatus.PENDING.value:
364
+ t.set_description("Optimization run queued and not yet started")
365
+ t.n = 0.0
366
+ t.refresh()
367
+ elif iteration["state"] == RunStatus.STARTED.value:
368
+ t.set_description(
369
+ "Optimization run in progress. Downloading dataset"
370
+ )
371
+ t.n = 0.0
372
+ t.refresh()
373
+ elif iteration["state"] is None:
374
+ if (
375
+ iteration["result"]
376
+ and isinstance(iteration["result"], dict)
377
+ and iteration["result"]["result"]
378
+ and isinstance(iteration["result"]["result"], str)
379
+ ):
380
+ current_progress_percentage = float(
381
+ iteration["result"]["result"].removesuffix("% done")
382
+ )
383
+ desc = (
384
+ "Optimization run completed. Uploading results"
385
+ if current_progress_percentage == 100.0
386
+ else "Optimization run in progress"
387
+ )
388
+ t.set_description(desc)
389
+ t.n = current_progress_percentage
390
+ t.refresh()
391
+ elif iteration["state"] == RunStatus.AWAITING_MANUAL_APPROVAL.value:
392
+ t.set_description("Awaiting manual approval")
393
+ t.n = 100.0
394
+ t.refresh()
395
+ if stop_on_manual_approval:
396
+ t.close()
397
+ return None
398
+ elif iteration["state"] == RunStatus.FAILURE.value:
399
+ t.set_description("Optimization run failed")
400
+ t.close()
401
+ raise HirundoError(
402
+ f"Optimization run failed with error: {iteration['result']}"
403
+ )
404
+ raise HirundoError("Optimization run failed with an unknown error")
405
+
406
+ @overload
407
+ def check_run(
408
+ self, stop_on_manual_approval: typing.Literal[True]
409
+ ) -> typing.Union[pd.DataFrame, None]:
410
+ ...
411
+
412
+ @overload
413
+ def check_run(
414
+ self, stop_on_manual_approval: typing.Literal[False] = False
415
+ ) -> pd.DataFrame:
416
+ ...
417
+
418
+ def check_run(
419
+ self, stop_on_manual_approval: bool = False
420
+ ) -> typing.Union[pd.DataFrame, None]:
421
+ """
422
+ Check the status of the current active instance's run.
423
+
424
+ Returns:
425
+ A pandas DataFrame with the results of the optimization run
289
426
 
290
427
  """
291
428
  if not self.run_id:
292
429
  raise ValueError("No run has been started")
293
- return self.check_run_by_id(self.run_id)
430
+ return self.check_run_by_id(self.run_id, stop_on_manual_approval)
294
431
 
295
432
  @staticmethod
296
433
  async def acheck_run_by_id(run_id: str, retry=0) -> AsyncGenerator[dict, None]:
@@ -311,6 +448,7 @@ class OptimizationDataset(BaseModel):
311
448
  - `"result"` is a string describing the progress as a percentage for a PENDING state, or the error for a FAILURE state or the results for a SUCCESS state
312
449
 
313
450
  """
451
+ logger.debug("Checking run with ID: %s", run_id)
314
452
  if retry > MAX_RETRIES:
315
453
  raise HirundoError("Max retries reached")
316
454
  last_event = None
@@ -321,7 +459,7 @@ class OptimizationDataset(BaseModel):
321
459
  client,
322
460
  "GET",
323
461
  f"{API_HOST}/dataset-optimization/run/{run_id}",
324
- headers=auth_headers,
462
+ headers=get_auth_headers(),
325
463
  )
326
464
  async for sse in async_iterator:
327
465
  if sse.event == "ping":
@@ -335,7 +473,7 @@ class OptimizationDataset(BaseModel):
335
473
  )
336
474
  last_event = json.loads(sse.data)
337
475
  yield last_event["data"]
338
- if not last_event or last_event["data"]["state"] == "PENDING":
476
+ if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
339
477
  OptimizationDataset.acheck_run_by_id(run_id, retry + 1)
340
478
 
341
479
  async def acheck_run(self) -> AsyncGenerator[dict, None]:
@@ -367,9 +505,10 @@ class OptimizationDataset(BaseModel):
367
505
  """
368
506
  if not run_id:
369
507
  raise ValueError("No run has been started")
508
+ logger.info("Cancelling run with ID: %s", run_id)
370
509
  response = requests.delete(
371
510
  f"{API_HOST}/dataset-optimization/run/{run_id}",
372
- headers=auth_headers,
511
+ headers=get_auth_headers(),
373
512
  timeout=MODIFY_TIMEOUT,
374
513
  )
375
514
  response.raise_for_status()
@@ -1,4 +1,3 @@
1
- import logging
2
1
  import re
3
2
  from typing import Annotated, Union
4
3
 
@@ -8,10 +7,11 @@ from pydantic import BaseModel, field_validator
8
7
  from pydantic_core import Url
9
8
 
10
9
  from hirundo._env import API_HOST
11
- from hirundo._headers import auth_headers, json_headers
10
+ from hirundo._headers import get_auth_headers, json_headers
12
11
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
12
+ from hirundo.logger import get_logger
13
13
 
14
- logger = logging.getLogger(__name__)
14
+ logger = get_logger(__name__)
15
15
 
16
16
 
17
17
  class GitPlainAuthBase(BaseModel):
@@ -108,7 +108,7 @@ class GitRepo(BaseModel):
108
108
  json=self.model_dump(),
109
109
  headers={
110
110
  **json_headers,
111
- **auth_headers,
111
+ **get_auth_headers(),
112
112
  },
113
113
  timeout=MODIFY_TIMEOUT,
114
114
  )
@@ -125,7 +125,7 @@ class GitRepo(BaseModel):
125
125
  git_repos = requests.get(
126
126
  f"{API_HOST}/git-repo/",
127
127
  headers={
128
- **auth_headers,
128
+ **get_auth_headers(),
129
129
  },
130
130
  timeout=READ_TIMEOUT,
131
131
  )
@@ -143,7 +143,7 @@ class GitRepo(BaseModel):
143
143
  git_repo = requests.delete(
144
144
  f"{API_HOST}/git-repo/{git_repo_id}",
145
145
  headers={
146
- **auth_headers,
146
+ **get_auth_headers(),
147
147
  },
148
148
  timeout=MODIFY_TIMEOUT,
149
149
  )
@@ -0,0 +1,8 @@
1
+ import logging
2
+
3
+
4
+ def get_logger(name: str) -> logging.Logger:
5
+ logger = logging.getLogger(name)
6
+ logger.setLevel(logging.INFO)
7
+ logger.addHandler(logging.StreamHandler())
8
+ return logger
@@ -9,9 +9,12 @@ from pydantic_core import Url
9
9
 
10
10
  from hirundo._constraints import S3BucketUrl, StorageIntegrationName
11
11
  from hirundo._env import API_HOST
12
- from hirundo._headers import auth_headers, json_headers
12
+ from hirundo._headers import get_auth_headers, json_headers
13
13
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
14
14
  from hirundo.git import GitRepo
15
+ from hirundo.logger import get_logger
16
+
17
+ logger = get_logger(__name__)
15
18
 
16
19
 
17
20
  class StorageS3(BaseModel):
@@ -69,6 +72,10 @@ class StorageTypes(str, Enum):
69
72
  GCP = "GCP"
70
73
  # AZURE = "Azure" TODO: Azure storage integration is coming soon
71
74
  GIT = "Git"
75
+ LOCAL = "Local"
76
+ """
77
+ Local storage integration is only supported for on-premises installations.
78
+ """
72
79
 
73
80
 
74
81
  class StorageIntegration(BaseModel):
@@ -84,7 +91,7 @@ class StorageIntegration(BaseModel):
84
91
  """
85
92
  A name to identify the `StorageIntegration` in the Hirundo system.
86
93
  """
87
- type: StorageTypes = pydantic.Field(
94
+ type: typing.Optional[StorageTypes] = pydantic.Field(
88
95
  examples=[
89
96
  StorageTypes.S3,
90
97
  StorageTypes.GCP,
@@ -196,7 +203,7 @@ class StorageIntegration(BaseModel):
196
203
  storage_integrations = requests.get(
197
204
  f"{API_HOST}/storage-integration/",
198
205
  params={"storage_integration_organization_id": organization_id},
199
- headers=auth_headers,
206
+ headers=get_auth_headers(),
200
207
  timeout=READ_TIMEOUT,
201
208
  )
202
209
  storage_integrations.raise_for_status()
@@ -212,10 +219,11 @@ class StorageIntegration(BaseModel):
212
219
  """
213
220
  storage_integration = requests.delete(
214
221
  f"{API_HOST}/storage-integration/{storage_integration_id}",
215
- headers=auth_headers,
222
+ headers=get_auth_headers(),
216
223
  timeout=MODIFY_TIMEOUT,
217
224
  )
218
225
  storage_integration.raise_for_status()
226
+ logger.info("Deleted storage integration with ID: %s", storage_integration_id)
219
227
 
220
228
  def delete(self) -> None:
221
229
  """
@@ -236,15 +244,42 @@ class StorageIntegration(BaseModel):
236
244
  json=self.model_dump(),
237
245
  headers={
238
246
  **json_headers,
239
- **auth_headers,
247
+ **get_auth_headers(),
240
248
  },
241
249
  timeout=MODIFY_TIMEOUT,
242
250
  )
243
251
  storage_integration.raise_for_status()
244
252
  storage_integration_id = storage_integration.json()["id"]
245
253
  self.id = storage_integration_id
254
+ logger.info("Created storage integration with ID: %s", storage_integration_id)
246
255
  return storage_integration_id
247
256
 
257
+ @model_validator(mode="after")
258
+ def validate_storage_type(self):
259
+ if self.type != StorageTypes.LOCAL and (
260
+ [self.s3, self.gcp, self.git].count(None) != 2
261
+ ):
262
+ raise ValueError("Exactly one of S3, GCP, or Git must be provided")
263
+ if self.type == StorageTypes.S3 and self.s3 is None:
264
+ raise ValueError("S3 storage details must be provided")
265
+ elif self.type == StorageTypes.GCP and self.gcp is None:
266
+ raise ValueError("GCP storage details must be provided")
267
+ elif self.type == StorageTypes.GIT and self.git is None:
268
+ raise ValueError("Git storage details must be provided")
269
+ if not self.type and not any([self.s3, self.gcp, self.git]):
270
+ raise ValueError("Storage type must be provided")
271
+ elif not self.type:
272
+ self.type = (
273
+ StorageTypes.S3
274
+ if self.s3 is not None
275
+ else StorageTypes.GCP
276
+ if self.gcp is not None
277
+ else StorageTypes.GIT
278
+ if self.git is not None
279
+ else StorageTypes.LOCAL
280
+ )
281
+ return self
282
+
248
283
 
249
284
  class StorageLink(BaseModel):
250
285
  storage_integration: StorageIntegration
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hirundo
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
5
5
  Author-email: Hirundo <dev@hirundo.io>
6
6
  License: MIT License
@@ -31,6 +31,8 @@ Requires-Dist: typer>=0.12.3
31
31
  Requires-Dist: httpx>=0.27.0
32
32
  Requires-Dist: stamina>=24.2.0
33
33
  Requires-Dist: httpx-sse>=0.4.0
34
+ Requires-Dist: pandas>=2.2.2
35
+ Requires-Dist: tqdm>=4.66.5
34
36
  Provides-Extra: dev
35
37
  Requires-Dist: pyyaml>=6.0.1; extra == "dev"
36
38
  Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
@@ -65,6 +67,8 @@ This repo contains the source code for the Hirundo client library
65
67
 
66
68
  To learn about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
67
69
 
70
+ Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyPy support may be introduced in the future.
71
+
68
72
  ## Development:
69
73
 
70
74
  ### Install dev dependencies
@@ -12,6 +12,7 @@ hirundo/cli.py
12
12
  hirundo/dataset_optimization.py
13
13
  hirundo/enum.py
14
14
  hirundo/git.py
15
+ hirundo/logger.py
15
16
  hirundo/storage.py
16
17
  hirundo.egg-info/PKG-INFO
17
18
  hirundo.egg-info/SOURCES.txt
@@ -8,6 +8,8 @@ typer>=0.12.3
8
8
  httpx>=0.27.0
9
9
  stamina>=24.2.0
10
10
  httpx-sse>=0.4.0
11
+ pandas>=2.2.2
12
+ tqdm>=4.66.5
11
13
 
12
14
  [dev]
13
15
  pyyaml>=6.0.1
@@ -7,7 +7,7 @@ packages = ["hirundo"]
7
7
 
8
8
  [project]
9
9
  name = "hirundo"
10
- version = "0.1.5"
10
+ version = "0.1.7"
11
11
  description = "This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets."
12
12
  authors = [{ name = "Hirundo", email = "dev@hirundo.io" }]
13
13
  readme = "README.md"
@@ -35,6 +35,8 @@ dependencies = [
35
35
  "httpx>=0.27.0",
36
36
  "stamina>=24.2.0",
37
37
  "httpx-sse>=0.4.0",
38
+ "pandas>=2.2.2",
39
+ "tqdm>=4.66.5",
38
40
  ]
39
41
 
40
42
  [project.scripts]
@@ -1,12 +0,0 @@
1
- import os
2
-
3
- from dotenv import load_dotenv
4
-
5
- load_dotenv()
6
-
7
- API_HOST = os.getenv("API_HOST", "https://api.hirundo.io")
8
- API_KEY = os.getenv("API_KEY")
9
- if not API_KEY:
10
- raise ValueError(
11
- "API_KEY is not set. Please run `hirundo setup` to set the API key"
12
- )
@@ -1,9 +0,0 @@
1
- from hirundo._env import API_KEY
2
-
3
- json_headers = {
4
- "Content-Type": "application/json",
5
- "Accept": "application/json",
6
- }
7
- auth_headers = {
8
- "Authorization": f"Bearer {API_KEY}",
9
- }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes