hirundo 0.1.9__tar.gz → 0.1.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {hirundo-0.1.9 → hirundo-0.1.16}/PKG-INFO +9 -5
  2. {hirundo-0.1.9 → hirundo-0.1.16}/README.md +1 -1
  3. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/__init__.py +13 -6
  4. hirundo-0.1.16/hirundo/_dataframe.py +43 -0
  5. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/_env.py +2 -2
  6. hirundo-0.1.16/hirundo/_headers.py +29 -0
  7. hirundo-0.1.16/hirundo/_timeouts.py +3 -0
  8. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/cli.py +52 -0
  9. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/dataset_optimization.py +31 -106
  10. hirundo-0.1.16/hirundo/dataset_optimization_results.py +42 -0
  11. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/git.py +11 -18
  12. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/storage.py +13 -16
  13. hirundo-0.1.16/hirundo/unzip.py +247 -0
  14. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo.egg-info/PKG-INFO +9 -5
  15. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo.egg-info/SOURCES.txt +4 -1
  16. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo.egg-info/requires.txt +7 -2
  17. {hirundo-0.1.9 → hirundo-0.1.16}/pyproject.toml +12 -3
  18. hirundo-0.1.9/hirundo/_headers.py +0 -13
  19. hirundo-0.1.9/hirundo/_timeouts.py +0 -2
  20. {hirundo-0.1.9 → hirundo-0.1.16}/LICENSE +0 -0
  21. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/__main__.py +0 -0
  22. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/_constraints.py +0 -0
  23. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/_http.py +0 -0
  24. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/_iter_sse_retrying.py +0 -0
  25. /hirundo-0.1.9/hirundo/enum.py → /hirundo-0.1.16/hirundo/dataset_enum.py +0 -0
  26. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo/logger.py +0 -0
  27. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo.egg-info/dependency_links.txt +0 -0
  28. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo.egg-info/entry_points.txt +0 -0
  29. {hirundo-0.1.9 → hirundo-0.1.16}/hirundo.egg-info/top_level.txt +0 -0
  30. {hirundo-0.1.9 → hirundo-0.1.16}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: hirundo
3
- Version: 0.1.9
3
+ Version: 0.1.16
4
4
  Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
5
5
  Author-email: Hirundo <dev@hirundo.io>
6
6
  License: MIT License
@@ -31,7 +31,6 @@ Requires-Dist: typer>=0.12.3
31
31
  Requires-Dist: httpx>=0.27.0
32
32
  Requires-Dist: stamina>=24.2.0
33
33
  Requires-Dist: httpx-sse>=0.4.0
34
- Requires-Dist: pandas>=2.2.2
35
34
  Requires-Dist: tqdm>=4.66.5
36
35
  Provides-Extra: dev
37
36
  Requires-Dist: pyyaml>=6.0.1; extra == "dev"
@@ -50,7 +49,7 @@ Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
50
49
  Requires-Dist: uv>=0.5.8; extra == "dev"
51
50
  Requires-Dist: pre-commit>=3.7.1; extra == "dev"
52
51
  Requires-Dist: virtualenv>=20.6.6; extra == "dev"
53
- Requires-Dist: ruff>=0.8.2; extra == "dev"
52
+ Requires-Dist: ruff>=0.11.6; extra == "dev"
54
53
  Requires-Dist: bumpver; extra == "dev"
55
54
  Requires-Dist: platformdirs>=4.3.6; extra == "dev"
56
55
  Requires-Dist: safety>=3.2.13; extra == "dev"
@@ -64,6 +63,11 @@ Requires-Dist: sphinx-multiversion; extra == "docs"
64
63
  Requires-Dist: esbonio; extra == "docs"
65
64
  Requires-Dist: starlette>0.40.0; extra == "docs"
66
65
  Requires-Dist: markupsafe>=3.0.2; extra == "docs"
66
+ Provides-Extra: pandas
67
+ Requires-Dist: pandas>=2.2.2; extra == "pandas"
68
+ Provides-Extra: polars
69
+ Requires-Dist: polars>=1.0.0; extra == "polars"
70
+ Dynamic: license-file
67
71
 
68
72
  # Hirundo
69
73
 
@@ -165,7 +169,7 @@ from hirundo import (
165
169
  git_storage = StorageGit(
166
170
  repo=GitRepo(
167
171
  name="BDD-100k-validation-dataset",
168
- repository_url="https://git@hf.co/datasets/hirundo-io/bdd100k-validation-only.git",
172
+ repository_url="https://huggingface.co/datasets/hirundo-io/bdd100k-validation-only",
169
173
  ),
170
174
  branch="main",
171
175
  )
@@ -98,7 +98,7 @@ from hirundo import (
98
98
  git_storage = StorageGit(
99
99
  repo=GitRepo(
100
100
  name="BDD-100k-validation-dataset",
101
- repository_url="https://git@hf.co/datasets/hirundo-io/bdd100k-validation-only.git",
101
+ repository_url="https://huggingface.co/datasets/hirundo-io/bdd100k-validation-only",
102
102
  ),
103
103
  branch="main",
104
104
  )
@@ -1,3 +1,7 @@
1
+ from .dataset_enum import (
2
+ DatasetMetadataType,
3
+ LabelingType,
4
+ )
1
5
  from .dataset_optimization import (
2
6
  COCO,
3
7
  YOLO,
@@ -7,11 +11,8 @@ from .dataset_optimization import (
7
11
  RunArgs,
8
12
  VisionRunArgs,
9
13
  )
10
- from .enum import (
11
- DatasetMetadataType,
12
- LabelingType,
13
- )
14
- from .git import GitRepo
14
+ from .dataset_optimization_results import DatasetOptimizationResults
15
+ from .git import GitPlainAuth, GitRepo, GitSSHAuth
15
16
  from .storage import (
16
17
  StorageConfig,
17
18
  StorageGCP,
@@ -20,6 +21,7 @@ from .storage import (
20
21
  StorageS3,
21
22
  StorageTypes,
22
23
  )
24
+ from .unzip import load_df, load_from_zip
23
25
 
24
26
  __all__ = [
25
27
  "COCO",
@@ -31,13 +33,18 @@ __all__ = [
31
33
  "VisionRunArgs",
32
34
  "LabelingType",
33
35
  "DatasetMetadataType",
36
+ "GitPlainAuth",
34
37
  "GitRepo",
38
+ "GitSSHAuth",
35
39
  "StorageTypes",
36
40
  "StorageS3",
37
41
  "StorageGCP",
38
42
  # "StorageAzure", TODO: Azure storage is coming soon
39
43
  "StorageGit",
40
44
  "StorageConfig",
45
+ "DatasetOptimizationResults",
46
+ "load_df",
47
+ "load_from_zip",
41
48
  ]
42
49
 
43
- __version__ = "0.1.9"
50
+ __version__ = "0.1.16"
@@ -0,0 +1,43 @@
1
+ has_pandas = False
2
+ has_polars = False
3
+
4
+ pd = None
5
+ pl = None
6
+ int32 = type[None]
7
+ float32 = type[None]
8
+ string = type[None]
9
+ # ⬆️ These are just placeholders for the int32, float32 and string types
10
+ # for when neither pandas nor polars are available
11
+
12
+ try:
13
+ import numpy as np
14
+ import pandas as pd
15
+
16
+ has_pandas = True
17
+ int32 = np.int32
18
+ float32 = np.float32
19
+ string = str
20
+ except ImportError:
21
+ pass
22
+
23
+ try:
24
+ import polars as pl
25
+ import polars.datatypes as pl_datatypes
26
+
27
+ has_polars = True
28
+ int32 = pl_datatypes.Int32
29
+ float32 = pl_datatypes.Float32
30
+ string = pl_datatypes.String
31
+ except ImportError:
32
+ pass
33
+
34
+
35
+ __all__ = [
36
+ "has_polars",
37
+ "has_pandas",
38
+ "pd",
39
+ "pl",
40
+ "int32",
41
+ "float32",
42
+ "string",
43
+ ]
@@ -2,11 +2,11 @@ import enum
2
2
  import os
3
3
  from pathlib import Path
4
4
 
5
- from dotenv import load_dotenv
5
+ from dotenv import find_dotenv, load_dotenv
6
6
 
7
7
 
8
8
  class EnvLocation(enum.Enum):
9
- DOTENV = Path.cwd() / ".env"
9
+ DOTENV = find_dotenv(".env")
10
10
  HOME = Path.home() / ".hirundo.conf"
11
11
 
12
12
 
@@ -0,0 +1,29 @@
1
+ from hirundo._env import API_KEY, check_api_key
2
+
3
+ HIRUNDO_API_VERSION = "0.2"
4
+
5
+ _json_headers = {
6
+ "Content-Type": "application/json",
7
+ "Accept": "application/json",
8
+ }
9
+
10
+
11
+ def _get_auth_headers():
12
+ check_api_key()
13
+ return {
14
+ "Authorization": f"Bearer {API_KEY}",
15
+ }
16
+
17
+
18
+ def _get_api_version_header():
19
+ return {
20
+ "HIRUNDO-API-VERSION": HIRUNDO_API_VERSION,
21
+ }
22
+
23
+
24
+ def get_headers():
25
+ return {
26
+ **_json_headers,
27
+ **_get_auth_headers(),
28
+ **_get_api_version_header(),
29
+ }
@@ -0,0 +1,3 @@
1
+ READ_TIMEOUT = 30.0
2
+ MODIFY_TIMEOUT = 60.0
3
+ DOWNLOAD_READ_TIMEOUT = 600.0 # 10 minutes
@@ -7,6 +7,8 @@ from typing import Annotated
7
7
  from urllib.parse import urlparse
8
8
 
9
9
  import typer
10
+ from rich.console import Console
11
+ from rich.table import Table
10
12
 
11
13
  from hirundo._env import API_HOST, EnvLocation
12
14
 
@@ -189,6 +191,56 @@ def setup(
189
191
  )
190
192
 
191
193
 
194
+ @app.command("check-run", epilog=hirundo_epilog)
195
+ def check_run(
196
+ run_id: str,
197
+ ):
198
+ """
199
+ Check the status of a run.
200
+ """
201
+ from hirundo.dataset_optimization import OptimizationDataset
202
+
203
+ results = OptimizationDataset.check_run_by_id(run_id)
204
+ print(f"Run results saved to {results.cached_zip_path}")
205
+
206
+
207
+ @app.command("list-runs", epilog=hirundo_epilog)
208
+ def list_runs():
209
+ """
210
+ List all runs available.
211
+ """
212
+ from hirundo.dataset_optimization import OptimizationDataset
213
+
214
+ runs = OptimizationDataset.list_runs()
215
+
216
+ console = Console()
217
+ table = Table(
218
+ title="Runs:",
219
+ expand=True,
220
+ )
221
+ cols = (
222
+ "Dataset name",
223
+ "Run ID",
224
+ "Status",
225
+ "Created At",
226
+ "Run Args",
227
+ )
228
+ for col in cols:
229
+ table.add_column(
230
+ col,
231
+ overflow="fold",
232
+ )
233
+ for run in runs:
234
+ table.add_row(
235
+ str(run.name),
236
+ str(run.id),
237
+ str(run.status),
238
+ run.created_at.isoformat(),
239
+ run.run_args.model_dump_json() if run.run_args else None,
240
+ )
241
+ console.print(table)
242
+
243
+
192
244
  typer_click_object = typer.main.get_command(app)
193
245
 
194
246
  if __name__ == "__main__":
@@ -4,27 +4,25 @@ import typing
4
4
  from abc import ABC, abstractmethod
5
5
  from collections.abc import AsyncGenerator, Generator
6
6
  from enum import Enum
7
- from io import StringIO
8
7
  from typing import overload
9
8
 
10
9
  import httpx
11
- import numpy as np
12
- import pandas as pd
13
10
  import requests
14
- from pandas._typing import DtypeArg
15
11
  from pydantic import BaseModel, Field, model_validator
16
12
  from tqdm import tqdm
17
13
  from tqdm.contrib.logging import logging_redirect_tqdm
18
14
 
19
15
  from hirundo._constraints import HirundoUrl
20
16
  from hirundo._env import API_HOST
21
- from hirundo._headers import get_auth_headers, json_headers
17
+ from hirundo._headers import get_headers
22
18
  from hirundo._http import raise_for_status_with_reason
23
19
  from hirundo._iter_sse_retrying import aiter_sse_retrying, iter_sse_retrying
24
20
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
25
- from hirundo.enum import DatasetMetadataType, LabelingType
21
+ from hirundo.dataset_enum import DatasetMetadataType, LabelingType
22
+ from hirundo.dataset_optimization_results import DatasetOptimizationResults
26
23
  from hirundo.logger import get_logger
27
24
  from hirundo.storage import ResponseStorageConfig, StorageConfig
25
+ from hirundo.unzip import download_and_extract_zip
28
26
 
29
27
  logger = get_logger(__name__)
30
28
 
@@ -73,39 +71,6 @@ STATUS_TO_PROGRESS_MAP = {
73
71
  }
74
72
 
75
73
 
76
- class DatasetOptimizationResults(BaseModel):
77
- model_config = {"arbitrary_types_allowed": True}
78
-
79
- suspects: pd.DataFrame
80
- """
81
- A pandas DataFrame containing the results of the optimization run
82
- """
83
- warnings_and_errors: pd.DataFrame
84
- """
85
- A pandas DataFrame containing the warnings and errors of the optimization run
86
- """
87
-
88
-
89
- CUSTOMER_INTERCHANGE_DTYPES: DtypeArg = {
90
- "image_path": str,
91
- "label_path": str,
92
- "segments_mask_path": str,
93
- "segment_id": np.int32,
94
- "label": str,
95
- "bbox_id": str,
96
- "xmin": np.float32,
97
- "ymin": np.float32,
98
- "xmax": np.float32,
99
- "ymax": np.float32,
100
- "suspect_level": np.float32, # If exists, must be one of the values in the enum below
101
- "suggested_label": str,
102
- "suggested_label_conf": np.float32,
103
- "status": str,
104
- # ⬆️ If exists, must be one of the following:
105
- # NO_LABELS/MISSING_IMAGE/INVALID_IMAGE/INVALID_BBOX/INVALID_BBOX_SIZE/INVALID_SEG/INVALID_SEG_SIZE
106
- }
107
-
108
-
109
74
  class Metadata(BaseModel, ABC):
110
75
  type: DatasetMetadataType
111
76
 
@@ -201,13 +166,14 @@ class VisionRunArgs(BaseModel):
201
166
  RunArgs = typing.Union[VisionRunArgs]
202
167
 
203
168
 
204
- class AugmentationNames(str, Enum):
205
- RandomHorizontalFlip = "RandomHorizontalFlip"
206
- RandomVerticalFlip = "RandomVerticalFlip"
207
- RandomRotation = "RandomRotation"
208
- ColorJitter = "ColorJitter"
209
- RandomAffine = "RandomAffine"
210
- RandomPerspective = "RandomPerspective"
169
+ class AugmentationName(str, Enum):
170
+ RANDOM_HORIZONTAL_FLIP = "RandomHorizontalFlip"
171
+ RANDOM_VERTICAL_FLIP = "RandomVerticalFlip"
172
+ RANDOM_ROTATION = "RandomRotation"
173
+ RANDOM_PERSPECTIVE = "RandomPerspective"
174
+ GAUSSIAN_NOISE = "GaussianNoise"
175
+ RANDOM_GRAYSCALE = "RandomGrayscale"
176
+ GAUSSIAN_BLUR = "GaussianBlur"
211
177
 
212
178
 
213
179
  class Modality(str, Enum):
@@ -264,7 +230,7 @@ class OptimizationDataset(BaseModel):
264
230
  """
265
231
  labeling_info: LabelingInfo
266
232
 
267
- augmentations: typing.Optional[list[AugmentationNames]] = None
233
+ augmentations: typing.Optional[list[AugmentationName]] = None
268
234
  """
269
235
  Used to define which augmentations are apply to a vision dataset.
270
236
  For audio datasets, this field is ignored.
@@ -323,7 +289,7 @@ class OptimizationDataset(BaseModel):
323
289
  """
324
290
  response = requests.get(
325
291
  f"{API_HOST}/dataset-optimization/dataset/{dataset_id}",
326
- headers=get_auth_headers(),
292
+ headers=get_headers(),
327
293
  timeout=READ_TIMEOUT,
328
294
  )
329
295
  raise_for_status_with_reason(response)
@@ -340,7 +306,7 @@ class OptimizationDataset(BaseModel):
340
306
  """
341
307
  response = requests.get(
342
308
  f"{API_HOST}/dataset-optimization/dataset/by-name/{name}",
343
- headers=get_auth_headers(),
309
+ headers=get_headers(),
344
310
  timeout=READ_TIMEOUT,
345
311
  )
346
312
  raise_for_status_with_reason(response)
@@ -361,7 +327,7 @@ class OptimizationDataset(BaseModel):
361
327
  response = requests.get(
362
328
  f"{API_HOST}/dataset-optimization/dataset/",
363
329
  params={"dataset_organization_id": organization_id},
364
- headers=get_auth_headers(),
330
+ headers=get_headers(),
365
331
  timeout=READ_TIMEOUT,
366
332
  )
367
333
  raise_for_status_with_reason(response)
@@ -388,7 +354,7 @@ class OptimizationDataset(BaseModel):
388
354
  response = requests.get(
389
355
  f"{API_HOST}/dataset-optimization/run/list",
390
356
  params={"dataset_organization_id": organization_id},
391
- headers=get_auth_headers(),
357
+ headers=get_headers(),
392
358
  timeout=READ_TIMEOUT,
393
359
  )
394
360
  raise_for_status_with_reason(response)
@@ -410,7 +376,7 @@ class OptimizationDataset(BaseModel):
410
376
  """
411
377
  response = requests.delete(
412
378
  f"{API_HOST}/dataset-optimization/dataset/{dataset_id}",
413
- headers=get_auth_headers(),
379
+ headers=get_headers(),
414
380
  timeout=MODIFY_TIMEOUT,
415
381
  )
416
382
  raise_for_status_with_reason(response)
@@ -482,10 +448,7 @@ class OptimizationDataset(BaseModel):
482
448
  "organization_id": organization_id,
483
449
  "replace_if_exists": replace_if_exists,
484
450
  },
485
- headers={
486
- **json_headers,
487
- **get_auth_headers(),
488
- },
451
+ headers=get_headers(),
489
452
  timeout=MODIFY_TIMEOUT,
490
453
  )
491
454
  raise_for_status_with_reason(dataset_response)
@@ -519,7 +482,7 @@ class OptimizationDataset(BaseModel):
519
482
  run_response = requests.post(
520
483
  f"{API_HOST}/dataset-optimization/run/{dataset_id}",
521
484
  json=run_info if len(run_info) > 0 else None,
522
- headers=get_auth_headers(),
485
+ headers=get_headers(),
523
486
  timeout=MODIFY_TIMEOUT,
524
487
  )
525
488
  raise_for_status_with_reason(run_response)
@@ -595,46 +558,6 @@ class OptimizationDataset(BaseModel):
595
558
  self.id = None
596
559
  self.run_id = None
597
560
 
598
- @staticmethod
599
- def _clean_df_index(df: "pd.DataFrame") -> "pd.DataFrame":
600
- """
601
- Clean the index of a dataframe in case it has unnamed columns.
602
-
603
- Args:
604
- df (DataFrame): Dataframe to clean
605
-
606
- Returns:
607
- DataFrame: Cleaned dataframe
608
- """
609
- index_cols = sorted(
610
- [col for col in df.columns if col.startswith("Unnamed")], reverse=True
611
- )
612
- if len(index_cols) > 0:
613
- df.set_index(index_cols.pop(), inplace=True)
614
- df.rename_axis(index=None, columns=None, inplace=True)
615
- if len(index_cols) > 0:
616
- df.drop(columns=index_cols, inplace=True)
617
-
618
- return df
619
-
620
- @staticmethod
621
- def _read_csvs_to_df(data: dict):
622
- if data["state"] == RunStatus.SUCCESS.value:
623
- data["result"]["suspects"] = OptimizationDataset._clean_df_index(
624
- pd.read_csv(
625
- StringIO(data["result"]["suspects"]),
626
- dtype=CUSTOMER_INTERCHANGE_DTYPES,
627
- )
628
- )
629
- data["result"]["warnings_and_errors"] = OptimizationDataset._clean_df_index(
630
- pd.read_csv(
631
- StringIO(data["result"]["warnings_and_errors"]),
632
- dtype=CUSTOMER_INTERCHANGE_DTYPES,
633
- )
634
- )
635
- else:
636
- pass
637
-
638
561
  @staticmethod
639
562
  def _check_run_by_id(run_id: str, retry=0) -> Generator[dict, None, None]:
640
563
  if retry > MAX_RETRIES:
@@ -645,7 +568,7 @@ class OptimizationDataset(BaseModel):
645
568
  client,
646
569
  "GET",
647
570
  f"{API_HOST}/dataset-optimization/run/{run_id}",
648
- headers=get_auth_headers(),
571
+ headers=get_headers(),
649
572
  ):
650
573
  if sse.event == "ping":
651
574
  continue
@@ -668,7 +591,6 @@ class OptimizationDataset(BaseModel):
668
591
  raise HirundoError(last_event["reason"])
669
592
  else:
670
593
  raise HirundoError("Unknown error")
671
- OptimizationDataset._read_csvs_to_df(data)
672
594
  yield data
673
595
  if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
674
596
  OptimizationDataset._check_run_by_id(run_id, retry + 1)
@@ -727,11 +649,12 @@ class OptimizationDataset(BaseModel):
727
649
  )
728
650
  elif iteration["state"] == RunStatus.SUCCESS.value:
729
651
  t.close()
730
- return DatasetOptimizationResults(
731
- suspects=iteration["result"]["suspects"],
732
- warnings_and_errors=iteration["result"][
733
- "warnings_and_errors"
734
- ],
652
+ zip_temporary_url = iteration["result"]
653
+ logger.debug("Optimization run completed. Downloading results")
654
+
655
+ return download_and_extract_zip(
656
+ run_id,
657
+ zip_temporary_url,
735
658
  )
736
659
  elif (
737
660
  iteration["state"] == RunStatus.AWAITING_MANUAL_APPROVAL.value
@@ -823,7 +746,7 @@ class OptimizationDataset(BaseModel):
823
746
  client,
824
747
  "GET",
825
748
  f"{API_HOST}/dataset-optimization/run/{run_id}",
826
- headers=get_auth_headers(),
749
+ headers=get_headers(),
827
750
  )
828
751
  async for sse in async_iterator:
829
752
  if sse.event == "ping":
@@ -872,7 +795,7 @@ class OptimizationDataset(BaseModel):
872
795
  logger.info("Cancelling run with ID: %s", run_id)
873
796
  response = requests.delete(
874
797
  f"{API_HOST}/dataset-optimization/run/{run_id}",
875
- headers=get_auth_headers(),
798
+ headers=get_headers(),
876
799
  timeout=MODIFY_TIMEOUT,
877
800
  )
878
801
  raise_for_status_with_reason(response)
@@ -908,7 +831,9 @@ class DataOptimizationDatasetOut(BaseModel):
908
831
  class DataOptimizationRunOut(BaseModel):
909
832
  id: int
910
833
  name: str
834
+ dataset_id: int
911
835
  run_id: str
912
836
  status: RunStatus
913
837
  approved: bool
914
838
  created_at: datetime.datetime
839
+ run_args: typing.Optional[RunArgs]
@@ -0,0 +1,42 @@
1
+ import typing
2
+ from pathlib import Path
3
+
4
+ from pydantic import BaseModel
5
+ from typing_extensions import TypeAliasType
6
+
7
+ from hirundo._dataframe import has_pandas, has_polars
8
+
9
+ DataFrameType = TypeAliasType("DataFrameType", None)
10
+
11
+ if has_pandas:
12
+ from hirundo._dataframe import pd
13
+
14
+ DataFrameType = TypeAliasType("DataFrameType", typing.Union[pd.DataFrame, None])
15
+ if has_polars:
16
+ from hirundo._dataframe import pl
17
+
18
+ DataFrameType = TypeAliasType("DataFrameType", typing.Union[pl.DataFrame, None])
19
+
20
+
21
+ T = typing.TypeVar("T")
22
+
23
+
24
+ class DatasetOptimizationResults(BaseModel, typing.Generic[T]):
25
+ model_config = {"arbitrary_types_allowed": True}
26
+
27
+ cached_zip_path: Path
28
+ """
29
+ The path to the cached zip file of the results
30
+ """
31
+ suspects: T
32
+ """
33
+ A polars/pandas DataFrame containing the results of the optimization run
34
+ """
35
+ object_suspects: typing.Optional[T]
36
+ """
37
+ A polars/pandas DataFrame containing the object-level results of the optimization run
38
+ """
39
+ warnings_and_errors: T
40
+ """
41
+ A polars/pandas DataFrame containing the warnings and errors of the optimization run
42
+ """
@@ -9,7 +9,7 @@ from pydantic_core import Url
9
9
 
10
10
  from hirundo._constraints import RepoUrl
11
11
  from hirundo._env import API_HOST
12
- from hirundo._headers import get_auth_headers, json_headers
12
+ from hirundo._headers import get_headers
13
13
  from hirundo._http import raise_for_status_with_reason
14
14
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
15
15
  from hirundo.logger import get_logger
@@ -17,7 +17,7 @@ from hirundo.logger import get_logger
17
17
  logger = get_logger(__name__)
18
18
 
19
19
 
20
- class GitPlainAuthBase(BaseModel):
20
+ class GitPlainAuth(BaseModel):
21
21
  username: str
22
22
  """
23
23
  The username for the Git repository
@@ -28,7 +28,7 @@ class GitPlainAuthBase(BaseModel):
28
28
  """
29
29
 
30
30
 
31
- class GitSSHAuthBase(BaseModel):
31
+ class GitSSHAuth(BaseModel):
32
32
  ssh_key: str
33
33
  """
34
34
  The SSH key for the Git repository
@@ -52,7 +52,7 @@ class GitRepo(BaseModel):
52
52
  repository_url: typing.Union[str, RepoUrl]
53
53
  """
54
54
  The URL of the Git repository, it should start with `ssh://` or `https://` or be in the form `user@host:path`.
55
- If it is in the form `user@host:path`, it will be rewritten to `ssh://user@host:path`.
55
+ If it is in the form `user@host:path`, it will be rewritten to `ssh://user@host/path`.
56
56
  """
57
57
  organization_id: typing.Optional[int] = None
58
58
  """
@@ -60,14 +60,14 @@ class GitRepo(BaseModel):
60
60
  If not provided, it will be assigned to your default organization.
61
61
  """
62
62
 
63
- plain_auth: typing.Optional[GitPlainAuthBase] = pydantic.Field(
63
+ plain_auth: typing.Optional[GitPlainAuth] = pydantic.Field(
64
64
  default=None, examples=[None, {"username": "ben", "password": "password"}]
65
65
  )
66
66
  """
67
67
  The plain authentication details for the Git repository.
68
68
  Use this if using a special user with a username and password for authentication.
69
69
  """
70
- ssh_auth: typing.Optional[GitSSHAuthBase] = pydantic.Field(
70
+ ssh_auth: typing.Optional[GitSSHAuth] = pydantic.Field(
71
71
  default=None,
72
72
  examples=[
73
73
  {
@@ -124,10 +124,7 @@ class GitRepo(BaseModel):
124
124
  **self.model_dump(mode="json"),
125
125
  "replace_if_exists": replace_if_exists,
126
126
  },
127
- headers={
128
- **json_headers,
129
- **get_auth_headers(),
130
- },
127
+ headers=get_headers(),
131
128
  timeout=MODIFY_TIMEOUT,
132
129
  )
133
130
  raise_for_status_with_reason(git_repo)
@@ -145,7 +142,7 @@ class GitRepo(BaseModel):
145
142
  """
146
143
  git_repo = requests.get(
147
144
  f"{API_HOST}/git-repo/{git_repo_id}",
148
- headers=get_auth_headers(),
145
+ headers=get_headers(),
149
146
  timeout=READ_TIMEOUT,
150
147
  )
151
148
  raise_for_status_with_reason(git_repo)
@@ -163,7 +160,7 @@ class GitRepo(BaseModel):
163
160
  """
164
161
  git_repo = requests.get(
165
162
  f"{API_HOST}/git-repo/by-name/{name}",
166
- headers=get_auth_headers(),
163
+ headers=get_headers(),
167
164
  timeout=READ_TIMEOUT,
168
165
  )
169
166
  raise_for_status_with_reason(git_repo)
@@ -176,9 +173,7 @@ class GitRepo(BaseModel):
176
173
  """
177
174
  git_repos = requests.get(
178
175
  f"{API_HOST}/git-repo/",
179
- headers={
180
- **get_auth_headers(),
181
- },
176
+ headers=get_headers(),
182
177
  timeout=READ_TIMEOUT,
183
178
  )
184
179
  raise_for_status_with_reason(git_repos)
@@ -200,9 +195,7 @@ class GitRepo(BaseModel):
200
195
  """
201
196
  git_repo = requests.delete(
202
197
  f"{API_HOST}/git-repo/{git_repo_id}",
203
- headers={
204
- **get_auth_headers(),
205
- },
198
+ headers=get_headers(),
206
199
  timeout=MODIFY_TIMEOUT,
207
200
  )
208
201
  raise_for_status_with_reason(git_repo)
@@ -9,7 +9,7 @@ from pydantic_core import Url
9
9
 
10
10
  from hirundo._constraints import S3BucketUrl, StorageConfigName
11
11
  from hirundo._env import API_HOST
12
- from hirundo._headers import get_auth_headers, json_headers
12
+ from hirundo._headers import get_headers
13
13
  from hirundo._http import raise_for_status_with_reason
14
14
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
15
15
  from hirundo.git import GitRepo, GitRepoOut
@@ -34,7 +34,7 @@ class StorageS3Base(BaseModel):
34
34
  Chains the bucket URL with the path, ensuring that the path is formatted correctly
35
35
 
36
36
  Args:
37
- - path: The path to the file in the S3 bucket, e.g. `my-file.txt` or `/my-folder/my-file.txt`
37
+ path: The path to the file in the S3 bucket, e.g. `my-file.txt` or `/my-folder/my-file.txt`
38
38
 
39
39
  Returns:
40
40
  The full URL to the file in the S3 bucket, e.g. `s3://my-bucket/my-file.txt` or `s3://my-bucket/my-folder/my-file.txt`,
@@ -64,7 +64,7 @@ class StorageGCPBase(BaseModel):
64
64
  Chains the bucket URL with the path, ensuring that the path is formatted correctly
65
65
 
66
66
  Args:
67
- - path: The path to the file in the GCP bucket, e.g. `my-file.txt` or `/my-folder/my-file.txt`
67
+ path: The path to the file in the GCP bucket, e.g. `my-file.txt` or `/my-folder/my-file.txt`
68
68
 
69
69
  Returns:
70
70
  The full URL to the file in the GCP bucket, e.g. `gs://my-bucket/my-file.txt` or `gs://my-bucket/my-folder/my-file.txt`,
@@ -94,7 +94,7 @@ class StorageGCPOut(StorageGCPBase):
94
94
  # Chains the container URL with the path, ensuring that the path is formatted correctly
95
95
 
96
96
  # Args:
97
- # - path: The path to the file in the Azure container, e.g. `my-file.txt` or `/my-folder/my-file.txt`
97
+ # path: The path to the file in the Azure container, e.g. `my-file.txt` or `/my-folder/my-file.txt`
98
98
 
99
99
  # Returns:
100
100
  # The full URL to the file in the Azure container
@@ -114,8 +114,8 @@ def get_git_repo_url(
114
114
  Chains the repository URL with the path, ensuring that the path is formatted correctly
115
115
 
116
116
  Args:
117
- - repo_url: The URL of the git repository, e.g. `https://my-git-repository.com`
118
- - path: The path to the file in the git repository, e.g. `my-file.txt` or `/my-folder/my-file.txt`
117
+ repo_url: The URL of the git repository, e.g. `https://my-git-repository.com`
118
+ path: The path to the file in the git repository, e.g. `my-file.txt` or `/my-folder/my-file.txt`
119
119
 
120
120
  Returns:
121
121
  The full URL to the file in the git repository, e.g. `https://my-git-repository.com/my-file.txt` or `https://my-git-repository.com/my-folder/my-file.txt`
@@ -156,7 +156,7 @@ class StorageGit(BaseModel):
156
156
  Chains the repository URL with the path, ensuring that the path is formatted correctly
157
157
 
158
158
  Args:
159
- - path: The path to the file in the git repository, e.g. `my-file.txt` or `/my-folder/my-file.txt`
159
+ path: The path to the file in the git repository, e.g. `my-file.txt` or `/my-folder/my-file.txt`
160
160
 
161
161
  Returns:
162
162
  The full URL to the file in the git repository, e.g. `https://my-git-repository.com/my-file.txt` or `https://my-git-repository.com/my-folder/my-file.txt`,
@@ -179,7 +179,7 @@ class StorageGitOut(BaseModel):
179
179
  Chains the repository URL with the path, ensuring that the path is formatted correctly
180
180
 
181
181
  Args:
182
- - path: The path to the file in the git repository, e.g. `my-file.txt` or `/my-folder/my-file.txt`
182
+ path: The path to the file in the git repository, e.g. `my-file.txt` or `/my-folder/my-file.txt`
183
183
 
184
184
  Returns:
185
185
  The full URL to the file in the git repository, e.g. `https://my-git-repository.com/my-file.txt` or `https://my-git-repository.com/my-folder/my-file.txt`,
@@ -330,7 +330,7 @@ class StorageConfig(BaseModel):
330
330
  """
331
331
  storage_config = requests.get(
332
332
  f"{API_HOST}/storage-config/{storage_config_id}",
333
- headers=get_auth_headers(),
333
+ headers=get_headers(),
334
334
  timeout=READ_TIMEOUT,
335
335
  )
336
336
  raise_for_status_with_reason(storage_config)
@@ -349,7 +349,7 @@ class StorageConfig(BaseModel):
349
349
  """
350
350
  storage_config = requests.get(
351
351
  f"{API_HOST}/storage-config/by-name/{name}?storage_type={storage_type.value}",
352
- headers=get_auth_headers(),
352
+ headers=get_headers(),
353
353
  timeout=READ_TIMEOUT,
354
354
  )
355
355
  raise_for_status_with_reason(storage_config)
@@ -370,7 +370,7 @@ class StorageConfig(BaseModel):
370
370
  storage_configs = requests.get(
371
371
  f"{API_HOST}/storage-config/",
372
372
  params={"storage_config_organization_id": organization_id},
373
- headers=get_auth_headers(),
373
+ headers=get_headers(),
374
374
  timeout=READ_TIMEOUT,
375
375
  )
376
376
  raise_for_status_with_reason(storage_configs)
@@ -386,7 +386,7 @@ class StorageConfig(BaseModel):
386
386
  """
387
387
  storage_config = requests.delete(
388
388
  f"{API_HOST}/storage-config/{storage_config_id}",
389
- headers=get_auth_headers(),
389
+ headers=get_headers(),
390
390
  timeout=MODIFY_TIMEOUT,
391
391
  )
392
392
  raise_for_status_with_reason(storage_config)
@@ -415,10 +415,7 @@ class StorageConfig(BaseModel):
415
415
  **self.model_dump(mode="json"),
416
416
  "replace_if_exists": replace_if_exists,
417
417
  },
418
- headers={
419
- **json_headers,
420
- **get_auth_headers(),
421
- },
418
+ headers=get_headers(),
422
419
  timeout=MODIFY_TIMEOUT,
423
420
  )
424
421
  raise_for_status_with_reason(storage_config)
@@ -0,0 +1,247 @@
1
+ import typing
2
+ import zipfile
3
+ from collections.abc import Mapping
4
+ from pathlib import Path
5
+ from typing import IO, cast
6
+
7
+ import requests
8
+ from pydantic_core import Url
9
+
10
+ from hirundo._dataframe import (
11
+ float32,
12
+ has_pandas,
13
+ has_polars,
14
+ int32,
15
+ pd,
16
+ pl,
17
+ string,
18
+ )
19
+ from hirundo._env import API_HOST
20
+ from hirundo._headers import _get_auth_headers
21
+ from hirundo._timeouts import DOWNLOAD_READ_TIMEOUT
22
+ from hirundo.dataset_optimization_results import (
23
+ DataFrameType,
24
+ DatasetOptimizationResults,
25
+ )
26
+ from hirundo.logger import get_logger
27
+
28
+ ZIP_FILE_CHUNK_SIZE = 50 * 1024 * 1024 # 50 MB
29
+
30
+ Dtype = typing.Union[type[int32], type[float32], type[string]]
31
+
32
+
33
+ CUSTOMER_INTERCHANGE_DTYPES: Mapping[str, Dtype] = {
34
+ "image_path": string,
35
+ "label_path": string,
36
+ "segments_mask_path": string,
37
+ "segment_id": int32,
38
+ "label": string,
39
+ "bbox_id": string,
40
+ "xmin": float32,
41
+ "ymin": float32,
42
+ "xmax": float32,
43
+ "ymax": float32,
44
+ "suspect_level": float32, # If exists, must be one of the values in the enum below
45
+ "suggested_label": string,
46
+ "suggested_label_conf": float32,
47
+ "status": string,
48
+ # ⬆️ If exists, must be one of the following:
49
+ # NO_LABELS/MISSING_IMAGE/INVALID_IMAGE/INVALID_BBOX/INVALID_BBOX_SIZE/INVALID_SEG/INVALID_SEG_SIZE
50
+ }
51
+
52
+ logger = get_logger(__name__)
53
+
54
+
55
+ def _clean_df_index(df: "pd.DataFrame") -> "pd.DataFrame":
56
+ """
57
+ Clean the index of a DataFrame in case it has unnamed columns.
58
+
59
+ Args:
60
+ df (DataFrame): DataFrame to clean
61
+
62
+ Returns:
63
+ Cleaned Pandas DataFrame
64
+ """
65
+ index_cols = sorted(
66
+ [col for col in df.columns if col.startswith("Unnamed")], reverse=True
67
+ )
68
+ if len(index_cols) > 0:
69
+ df.set_index(index_cols.pop(), inplace=True)
70
+ df.rename_axis(index=None, columns=None, inplace=True)
71
+ if len(index_cols) > 0:
72
+ df.drop(columns=index_cols, inplace=True)
73
+
74
+ return df
75
+
76
+
77
+ def load_df(
78
+ file: "typing.Union[str, IO[bytes]]",
79
+ ) -> "DataFrameType":
80
+ """
81
+ Load a DataFrame from a CSV file.
82
+
83
+ Args:
84
+ file_name: The name of the CSV file to load.
85
+ dtypes: The data types of the columns in the DataFrame.
86
+
87
+ Returns:
88
+ The loaded DataFrame or `None` if neither Polars nor Pandas is available.
89
+ """
90
+ if has_polars:
91
+ return pl.read_csv(file, schema_overrides=CUSTOMER_INTERCHANGE_DTYPES)
92
+ elif has_pandas:
93
+ if typing.TYPE_CHECKING:
94
+ from pandas._typing import DtypeArg
95
+
96
+ dtype = cast("DtypeArg", CUSTOMER_INTERCHANGE_DTYPES)
97
+ # ⬆️ Casting since CUSTOMER_INTERCHANGE_DTYPES is a Mapping[str, Dtype] in this case
98
+ df = pd.read_csv(file, dtype=dtype)
99
+ return cast("DataFrameType", _clean_df_index(df))
100
+ # ⬆️ Casting since the return type is pd.DataFrame, but this is what DataFrameType is in this case
101
+ else:
102
+ return None
103
+
104
+
105
+ def get_mislabel_suspect_filename(filenames: list[str]):
106
+ mislabel_suspect_filename = "mislabel_suspects.csv"
107
+ if mislabel_suspect_filename not in filenames:
108
+ mislabel_suspect_filename = "image_mislabel_suspects.csv"
109
+ if mislabel_suspect_filename not in filenames:
110
+ mislabel_suspect_filename = "suspects.csv"
111
+ if mislabel_suspect_filename not in filenames:
112
+ raise ValueError(
113
+ "None of mislabel_suspects.csv, image_mislabel_suspects.csv or suspects.csv were found in the zip file"
114
+ )
115
+ return mislabel_suspect_filename
116
+
117
+
118
+ def download_and_extract_zip(
119
+ run_id: str, zip_url: str
120
+ ) -> DatasetOptimizationResults[DataFrameType]:
121
+ """
122
+ Download and extract the zip file from the given URL.
123
+
124
+ Note: It will only extract the `mislabel_suspects.csv` (vision - classification)
125
+ or `image_mislabel_suspects.csv` & `object_mislabel_suspects.csv` (vision - OD)
126
+ or `suspects.csv` (STT)
127
+ and `warnings_and_errors.csv` files from the zip file.
128
+
129
+ Args:
130
+ run_id: The ID of the optimization run.
131
+ zip_url: The URL of the zip file to download.
132
+
133
+ Returns:
134
+ The dataset optimization results object.
135
+ """
136
+ # Define the local file path
137
+ cache_dir = Path.home() / ".hirundo" / "cache"
138
+ cache_dir.mkdir(parents=True, exist_ok=True)
139
+ zip_file_path = cache_dir / f"{run_id}.zip"
140
+
141
+ headers = None
142
+ if Url(zip_url).scheme == "file":
143
+ zip_url = (
144
+ f"{API_HOST}/dataset-optimization/run/local-download"
145
+ + zip_url.replace("file://", "")
146
+ )
147
+ headers = _get_auth_headers()
148
+ # Stream the zip file download
149
+ with requests.get(
150
+ zip_url,
151
+ headers=headers,
152
+ timeout=DOWNLOAD_READ_TIMEOUT,
153
+ stream=True,
154
+ ) as r:
155
+ r.raise_for_status()
156
+ with open(zip_file_path, "wb") as f:
157
+ for chunk in r.iter_content(chunk_size=ZIP_FILE_CHUNK_SIZE):
158
+ f.write(chunk)
159
+ logger.info(
160
+ "Successfully downloaded the result zip file for run ID %s to %s",
161
+ run_id,
162
+ zip_file_path,
163
+ )
164
+
165
+ with zipfile.ZipFile(zip_file_path, "r") as z:
166
+ # Extract suspects file
167
+ suspects_df = None
168
+ object_suspects_df = None
169
+ warnings_and_errors_df = None
170
+
171
+ filenames = []
172
+ try:
173
+ filenames = [file.filename for file in z.filelist]
174
+ except Exception as e:
175
+ logger.error("Failed to get filenames from ZIP", exc_info=e)
176
+
177
+ try:
178
+ mislabel_suspect_filename = get_mislabel_suspect_filename(filenames)
179
+ with z.open(mislabel_suspect_filename) as suspects_file:
180
+ suspects_df = load_df(suspects_file)
181
+ logger.debug(
182
+ "Successfully loaded mislabel suspects into DataFrame for run ID %s",
183
+ run_id,
184
+ )
185
+ except Exception as e:
186
+ logger.error(
187
+ "Failed to load mislabel suspects into DataFrame", exc_info=e
188
+ )
189
+
190
+ object_mislabel_suspects_filename = "object_mislabel_suspects.csv"
191
+ if object_mislabel_suspects_filename in filenames:
192
+ try:
193
+ with z.open(
194
+ object_mislabel_suspects_filename
195
+ ) as object_suspects_file:
196
+ object_suspects_df = load_df(object_suspects_file)
197
+ logger.debug(
198
+ "Successfully loaded object mislabel suspects into DataFrame for run ID %s",
199
+ run_id,
200
+ )
201
+ except Exception as e:
202
+ logger.error(
203
+ "Failed to load object mislabel suspects into DataFrame",
204
+ exc_info=e,
205
+ )
206
+
207
+ try:
208
+ # Extract warnings_and_errors file
209
+ with z.open("warnings_and_errors.csv") as warnings_file:
210
+ warnings_and_errors_df = load_df(warnings_file)
211
+ logger.debug(
212
+ "Successfully loaded warnings and errors into DataFrame for run ID %s",
213
+ run_id,
214
+ )
215
+ except Exception as e:
216
+ logger.error(
217
+ "Failed to load warnings and errors into DataFrame", exc_info=e
218
+ )
219
+
220
+ return DatasetOptimizationResults[DataFrameType](
221
+ cached_zip_path=zip_file_path,
222
+ suspects=suspects_df,
223
+ object_suspects=object_suspects_df,
224
+ warnings_and_errors=warnings_and_errors_df,
225
+ )
226
+
227
+
228
+ def load_from_zip(
229
+ zip_path: Path, file_name: str
230
+ ) -> "typing.Union[pd.DataFrame, pl.DataFrame, None]":
231
+ """
232
+ Load a given file from a given zip file.
233
+
234
+ Args:
235
+ zip_path: The path to the zip file.
236
+ file_name: The name of the file to load.
237
+
238
+ Returns:
239
+ The loaded DataFrame or `None` if neither Polars nor Pandas is available.
240
+ """
241
+ with zipfile.ZipFile(zip_path, "r") as z:
242
+ try:
243
+ with z.open(file_name) as file:
244
+ return load_df(file)
245
+ except Exception as e:
246
+ logger.error("Failed to load %s from zip file", file_name, exc_info=e)
247
+ return None
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: hirundo
3
- Version: 0.1.9
3
+ Version: 0.1.16
4
4
  Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
5
5
  Author-email: Hirundo <dev@hirundo.io>
6
6
  License: MIT License
@@ -31,7 +31,6 @@ Requires-Dist: typer>=0.12.3
31
31
  Requires-Dist: httpx>=0.27.0
32
32
  Requires-Dist: stamina>=24.2.0
33
33
  Requires-Dist: httpx-sse>=0.4.0
34
- Requires-Dist: pandas>=2.2.2
35
34
  Requires-Dist: tqdm>=4.66.5
36
35
  Provides-Extra: dev
37
36
  Requires-Dist: pyyaml>=6.0.1; extra == "dev"
@@ -50,7 +49,7 @@ Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
50
49
  Requires-Dist: uv>=0.5.8; extra == "dev"
51
50
  Requires-Dist: pre-commit>=3.7.1; extra == "dev"
52
51
  Requires-Dist: virtualenv>=20.6.6; extra == "dev"
53
- Requires-Dist: ruff>=0.8.2; extra == "dev"
52
+ Requires-Dist: ruff>=0.11.6; extra == "dev"
54
53
  Requires-Dist: bumpver; extra == "dev"
55
54
  Requires-Dist: platformdirs>=4.3.6; extra == "dev"
56
55
  Requires-Dist: safety>=3.2.13; extra == "dev"
@@ -64,6 +63,11 @@ Requires-Dist: sphinx-multiversion; extra == "docs"
64
63
  Requires-Dist: esbonio; extra == "docs"
65
64
  Requires-Dist: starlette>0.40.0; extra == "docs"
66
65
  Requires-Dist: markupsafe>=3.0.2; extra == "docs"
66
+ Provides-Extra: pandas
67
+ Requires-Dist: pandas>=2.2.2; extra == "pandas"
68
+ Provides-Extra: polars
69
+ Requires-Dist: polars>=1.0.0; extra == "polars"
70
+ Dynamic: license-file
67
71
 
68
72
  # Hirundo
69
73
 
@@ -165,7 +169,7 @@ from hirundo import (
165
169
  git_storage = StorageGit(
166
170
  repo=GitRepo(
167
171
  name="BDD-100k-validation-dataset",
168
- repository_url="https://git@hf.co/datasets/hirundo-io/bdd100k-validation-only.git",
172
+ repository_url="https://huggingface.co/datasets/hirundo-io/bdd100k-validation-only",
169
173
  ),
170
174
  branch="main",
171
175
  )
@@ -4,17 +4,20 @@ pyproject.toml
4
4
  hirundo/__init__.py
5
5
  hirundo/__main__.py
6
6
  hirundo/_constraints.py
7
+ hirundo/_dataframe.py
7
8
  hirundo/_env.py
8
9
  hirundo/_headers.py
9
10
  hirundo/_http.py
10
11
  hirundo/_iter_sse_retrying.py
11
12
  hirundo/_timeouts.py
12
13
  hirundo/cli.py
14
+ hirundo/dataset_enum.py
13
15
  hirundo/dataset_optimization.py
14
- hirundo/enum.py
16
+ hirundo/dataset_optimization_results.py
15
17
  hirundo/git.py
16
18
  hirundo/logger.py
17
19
  hirundo/storage.py
20
+ hirundo/unzip.py
18
21
  hirundo.egg-info/PKG-INFO
19
22
  hirundo.egg-info/SOURCES.txt
20
23
  hirundo.egg-info/dependency_links.txt
@@ -8,7 +8,6 @@ typer>=0.12.3
8
8
  httpx>=0.27.0
9
9
  stamina>=24.2.0
10
10
  httpx-sse>=0.4.0
11
- pandas>=2.2.2
12
11
  tqdm>=4.66.5
13
12
 
14
13
  [dev]
@@ -28,7 +27,7 @@ pytest-asyncio>=0.23.6
28
27
  uv>=0.5.8
29
28
  pre-commit>=3.7.1
30
29
  virtualenv>=20.6.6
31
- ruff>=0.8.2
30
+ ruff>=0.11.6
32
31
  bumpver
33
32
  platformdirs>=4.3.6
34
33
  safety>=3.2.13
@@ -43,3 +42,9 @@ sphinx-multiversion
43
42
  esbonio
44
43
  starlette>0.40.0
45
44
  markupsafe>=3.0.2
45
+
46
+ [pandas]
47
+ pandas>=2.2.2
48
+
49
+ [polars]
50
+ polars>=1.0.0
@@ -7,7 +7,7 @@ packages = ["hirundo"]
7
7
 
8
8
  [project]
9
9
  name = "hirundo"
10
- version = "0.1.9"
10
+ version = "0.1.16"
11
11
  description = "This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets."
12
12
  authors = [{ name = "Hirundo", email = "dev@hirundo.io" }]
13
13
  readme = "README.md"
@@ -35,7 +35,6 @@ dependencies = [
35
35
  "httpx>=0.27.0",
36
36
  "stamina>=24.2.0",
37
37
  "httpx-sse>=0.4.0",
38
- "pandas>=2.2.2",
39
38
  "tqdm>=4.66.5",
40
39
  ]
41
40
 
@@ -64,7 +63,7 @@ dev = [
64
63
  "pre-commit>=3.7.1",
65
64
  "virtualenv>=20.6.6",
66
65
  # ⬆️ Needed for `pre-commit` and locking version for `safety-cli`
67
- "ruff>=0.8.2",
66
+ "ruff>=0.11.6",
68
67
  "bumpver",
69
68
  "platformdirs>=4.3.6",
70
69
  "safety>=3.2.13",
@@ -83,6 +82,12 @@ docs = [
83
82
  "markupsafe>=3.0.2",
84
83
  # Force `starlette` and `markupsafe` to versions compatible with `dev` dependencies.
85
84
  ]
85
+ pandas = [
86
+ "pandas>=2.2.2",
87
+ ]
88
+ polars = [
89
+ "polars>=1.0.0",
90
+ ]
86
91
 
87
92
  [tool.bumpver]
88
93
  current_version = "0.1.3b1"
@@ -174,3 +179,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
174
179
  [tool.ruff.lint.per-file-ignores]
175
180
  "tests/*.py" = ["S101"]
176
181
  "notebooks/**/*.ipynb" = ["S324"]
182
+
183
+ [tool.pyright]
184
+ typeCheckingMode = "standard"
185
+ autoSearchPaths = true
@@ -1,13 +0,0 @@
1
- from hirundo._env import API_KEY, check_api_key
2
-
3
- json_headers = {
4
- "Content-Type": "application/json",
5
- "Accept": "application/json",
6
- }
7
-
8
-
9
- def get_auth_headers():
10
- check_api_key()
11
- return {
12
- "Authorization": f"Bearer {API_KEY}",
13
- }
@@ -1,2 +0,0 @@
1
- READ_TIMEOUT = 30.0
2
- MODIFY_TIMEOUT = 60.0
File without changes
File without changes
File without changes
File without changes
File without changes