hirundo 0.1.8__py3-none-any.whl → 0.1.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: hirundo
3
- Version: 0.1.8
3
+ Version: 0.1.16
4
4
  Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
5
5
  Author-email: Hirundo <dev@hirundo.io>
6
6
  License: MIT License
@@ -31,7 +31,6 @@ Requires-Dist: typer>=0.12.3
31
31
  Requires-Dist: httpx>=0.27.0
32
32
  Requires-Dist: stamina>=24.2.0
33
33
  Requires-Dist: httpx-sse>=0.4.0
34
- Requires-Dist: pandas>=2.2.2
35
34
  Requires-Dist: tqdm>=4.66.5
36
35
  Provides-Extra: dev
37
36
  Requires-Dist: pyyaml>=6.0.1; extra == "dev"
@@ -47,17 +46,28 @@ Requires-Dist: stamina>=24.2.0; extra == "dev"
47
46
  Requires-Dist: httpx-sse>=0.4.0; extra == "dev"
48
47
  Requires-Dist: pytest>=8.2.0; extra == "dev"
49
48
  Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
50
- Requires-Dist: uv; extra == "dev"
49
+ Requires-Dist: uv>=0.5.8; extra == "dev"
51
50
  Requires-Dist: pre-commit>=3.7.1; extra == "dev"
52
- Requires-Dist: ruff==0.6.5; extra == "dev"
51
+ Requires-Dist: virtualenv>=20.6.6; extra == "dev"
52
+ Requires-Dist: ruff>=0.11.6; extra == "dev"
53
53
  Requires-Dist: bumpver; extra == "dev"
54
+ Requires-Dist: platformdirs>=4.3.6; extra == "dev"
55
+ Requires-Dist: safety>=3.2.13; extra == "dev"
54
56
  Provides-Extra: docs
55
57
  Requires-Dist: sphinx>=7.4.7; extra == "docs"
56
58
  Requires-Dist: sphinx-autobuild>=2024.4.16; extra == "docs"
57
59
  Requires-Dist: sphinx-click>=5.0.1; extra == "docs"
58
- Requires-Dist: autodoc-pydantic>=2.2.0; extra == "docs"
60
+ Requires-Dist: autodoc_pydantic>=2.2.0; extra == "docs"
59
61
  Requires-Dist: furo; extra == "docs"
60
62
  Requires-Dist: sphinx-multiversion; extra == "docs"
63
+ Requires-Dist: esbonio; extra == "docs"
64
+ Requires-Dist: starlette>0.40.0; extra == "docs"
65
+ Requires-Dist: markupsafe>=3.0.2; extra == "docs"
66
+ Provides-Extra: pandas
67
+ Requires-Dist: pandas>=2.2.2; extra == "pandas"
68
+ Provides-Extra: polars
69
+ Requires-Dist: polars>=1.0.0; extra == "polars"
70
+ Dynamic: license-file
61
71
 
62
72
  # Hirundo
63
73
 
@@ -66,7 +76,7 @@ This package exposes access to Hirundo APIs for dataset optimization for Machine
66
76
  Dataset optimization is currently available for datasets labelled for classification and object detection.
67
77
 
68
78
 
69
- Support dataset storage integrations include:
79
+ Support dataset storage configs include:
70
80
  - Google Cloud (GCP) Storage
71
81
  - Amazon Web Services (AWS) S3
72
82
  - Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
@@ -107,27 +117,33 @@ You can install the codebase with a simple `pip install hirundo` to install the
107
117
  ## Usage
108
118
 
109
119
  Classification example:
110
- ```
111
- from hirundo.dataset_optimization import OptimizationDataset
112
- from hirundo.enum import LabellingType
113
- from hirundo.storage import StorageIntegration, StorageLink, StorageTypes
120
+ ```python
121
+ from hirundo import (
122
+ HirundoCSV,
123
+ LabelingType,
124
+ OptimizationDataset,
125
+ StorageGCP,
126
+ StorageConfig,
127
+ StorageTypes,
128
+ )
114
129
 
130
+ gcp_bucket = StorageGCP(
131
+ bucket_name="cifar100bucket",
132
+ project="Hirundo-global",
133
+ credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
134
+ )
115
135
  test_dataset = OptimizationDataset(
116
136
  name="TEST-GCP cifar 100 classification dataset",
117
- labelling_type=LabellingType.SingleLabelClassification,
118
- dataset_storage=StorageLink(
119
- storage_integration=StorageIntegration(
120
- name="cifar100bucket",
121
- type=StorageTypes.GCP,
122
- gcp=StorageGCP(
123
- bucket_name="cifar100bucket",
124
- project="Hirundo-global",
125
- credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
126
- ),
127
- ),
128
- path="/pytorch-cifar/data",
137
+ labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
138
+ storage_config=StorageConfig(
139
+ name="cifar100bucket",
140
+ type=StorageTypes.GCP,
141
+ gcp=gcp_bucket,
142
+ ),
143
+ data_root_url=gcp_bucket.get_url(path="/pytorch-cifar/data"),
144
+ labeling_info=HirundoCSV(
145
+ csv_url=gcp_bucket.get_url(path="/pytorch-cifar/data/cifar100.csv"),
129
146
  ),
130
- dataset_metadata_path="cifar100.csv",
131
147
  classes=cifar100_classes,
132
148
  )
133
149
 
@@ -139,29 +155,53 @@ print(results)
139
155
 
140
156
  Object detection example:
141
157
 
142
- ```
143
- from hirundo.dataset_optimization import OptimizationDataset
144
- from hirundo.enum import LabellingType
145
- from hirundo.storage import StorageIntegration, StorageLink, StorageTypes
158
+ ```python
159
+ from hirundo import (
160
+ GitRepo,
161
+ HirundoCSV,
162
+ LabelingType,
163
+ OptimizationDataset,
164
+ StorageGit,
165
+ StorageConfig,
166
+ StorageTypes,
167
+ )
146
168
 
169
+ git_storage = StorageGit(
170
+ repo=GitRepo(
171
+ name="BDD-100k-validation-dataset",
172
+ repository_url="https://huggingface.co/datasets/hirundo-io/bdd100k-validation-only",
173
+ ),
174
+ branch="main",
175
+ )
147
176
  test_dataset = OptimizationDataset(
148
- name=f"TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset{unique_id}",
149
- labelling_type=LabellingType.ObjectDetection,
150
- dataset_storage=StorageLink(
151
- storage_integration=StorageIntegration(
152
- name=f"BDD-100k-validation-dataset{unique_id}",
153
- type=StorageTypes.GIT,
154
- git=StorageGit(
155
- repo=GitRepo(
156
- name=f"BDD-100k-validation-dataset{unique_id}",
157
- repository_url="https://git@hf.co/datasets/hirundo-io/bdd100k-validation-only",
158
- ),
159
- branch="main",
160
- ),
177
+ name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
178
+ labeling_type=LabelingType.OBJECT_DETECTION,
179
+ storage_config=StorageConfig(
180
+ name="BDD-100k-validation-dataset",
181
+ type=StorageTypes.GIT,
182
+ git=git_storage,
183
+ ),
184
+ data_root_url=git_storage.get_url(path="/BDD100K Val from Hirundo.zip/bdd100k"),
185
+ labeling_info=HirundoCSV(
186
+ csv_url=git_storage.get_url(
187
+ path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
161
188
  ),
162
- path="/BDD100K Val from Hirundo.zip/bdd100k",
163
189
  ),
164
- dataset_metadata_path="bdd100k.csv",
190
+ classes=[
191
+ "traffic light",
192
+ "traffic sign",
193
+ "car",
194
+ "pedestrian",
195
+ "bus",
196
+ "truck",
197
+ "rider",
198
+ "bicycle",
199
+ "motorcycle",
200
+ "train",
201
+ "other vehicle",
202
+ "other person",
203
+ "trailer",
204
+ ],
165
205
  )
166
206
 
167
207
  test_dataset.run_optimization()
@@ -173,4 +213,4 @@ Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyP
173
213
 
174
214
  ## Further documentation
175
215
 
176
- To learn about mroe how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
216
+ To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
@@ -0,0 +1,23 @@
1
+ hirundo/__init__.py,sha256=qKC89bNReZSjGtmf7l3PZD2JoptyVphpsD0Kf2PNXvY,1035
2
+ hirundo/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
3
+ hirundo/_constraints.py,sha256=gRv7fXwtjPGqYWIhkVYxu1B__3PdlYRqFyDkTpa9f74,1032
4
+ hirundo/_dataframe.py,sha256=sXEEbCNcLi83wyU9ii884YikCzfASo_3nnrDxhuCv7U,758
5
+ hirundo/_env.py,sha256=efX2sjvYlHkFr2Lcstelei67YSTFpVGT0l08ZsfiMuE,622
6
+ hirundo/_headers.py,sha256=3hybpD_X4SODv3cFZPt9AjGY2vvZaag5OKT3z1SHSjA,521
7
+ hirundo/_http.py,sha256=izlnuxStyPugjTAbD8Lo30tA4lZJ5d3kOENNduqrbX4,573
8
+ hirundo/_iter_sse_retrying.py,sha256=U331_wZRIbVzi-jnMqo8bp9jBC8MtFBLEs-X0ZvhSDw,4634
9
+ hirundo/_timeouts.py,sha256=gE58NU0t2e4KgKq2sk5rZcezDJAkgvRIbM5AVYFY6Ho,86
10
+ hirundo/cli.py,sha256=5Tn0eXZGG92BR9HJYUaYozjFbS1t6UTw_I2R0tZBE04,7824
11
+ hirundo/dataset_enum.py,sha256=ZEYBP-lrlVqfNWptlmw7JgLNhCyDirtWWPtoMvtg2AE,531
12
+ hirundo/dataset_optimization.py,sha256=jR4ZOlKKl05jrA4cq9L1IQuKVPJ3ytXkhOJEg6efFqI,31390
13
+ hirundo/dataset_optimization_results.py,sha256=A9YyF5zaZXVtzeDE08I_05v90dhZQADpSjDcS_6eLMc,1129
14
+ hirundo/git.py,sha256=6h1hFPlw5FfYMGWXPCitnTqGICmBKmQtb5qKGe3Icmk,6580
15
+ hirundo/logger.py,sha256=MUqrYp0fBlxWFhGl6P5t19_uqO7T_PNhrLN5bqY3i7s,275
16
+ hirundo/storage.py,sha256=kO-LWlQAM3qTnALEl8s79AiFMYqCG9Sem4MIFQcyvAg,15950
17
+ hirundo/unzip.py,sha256=XJqvt2m5pWR-G-fnzgW75VOdd-K4_Rw2r4wiEhZgKZA,8245
18
+ hirundo-0.1.16.dist-info/licenses/LICENSE,sha256=fusGGjqT2RGlU6kbkaOk7d-gDnsjk17wq67AO0mwBZI,1065
19
+ hirundo-0.1.16.dist-info/METADATA,sha256=CxdCbzafRuVRf1BGsS_tgjodO0g745uuNBl7y4UFMj8,8501
20
+ hirundo-0.1.16.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
21
+ hirundo-0.1.16.dist-info/entry_points.txt,sha256=4ZtnA_Nl1Af8fLnHp3lwjbGDEGU1S6ujb_JwtuQ7ZPM,44
22
+ hirundo-0.1.16.dist-info/top_level.txt,sha256=cmyNqrNZOAYxnywJGFI1AJBLe4SkH8HGsfFx6ncdrbI,8
23
+ hirundo-0.1.16.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
hirundo/enum.py DELETED
@@ -1,20 +0,0 @@
1
- from enum import Enum
2
-
3
-
4
- class LabellingType(str, Enum):
5
- """
6
- Enum indicate what type of labelling is used for the given dataset.
7
- Supported types are:
8
- """
9
-
10
- SingleLabelClassification = "SingleLabelClassification"
11
- ObjectDetection = "ObjectDetection"
12
-
13
-
14
- class DatasetMetadataType(str, Enum):
15
- """
16
- Enum indicate what type of metadata is provided for the given dataset.
17
- Supported types are:
18
- """
19
-
20
- HirundoCSV = "HirundoCSV"
@@ -1,20 +0,0 @@
1
- hirundo/__init__.py,sha256=EINZmdlmNjdW_dM85wksapRxGL-pPC49OYvKUBRtxQk,707
2
- hirundo/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
3
- hirundo/_constraints.py,sha256=-RAUV9GnCsaT9pLGSqYglKOeK0joPBBexGTo87j5nkI,425
4
- hirundo/_env.py,sha256=dXUFPeEL1zPe-eBdWD4_WZvlgiY2cpWuVDzf41Qjuto,609
5
- hirundo/_headers.py,sha256=ggTyBwVT3nGyPidCcmYMX6pv0idzMxCI2S1BJQE-Bbs,253
6
- hirundo/_http.py,sha256=INrHX7ncpXS9vdyjrske3B5vUKL5ke9SIY6daffahtE,350
7
- hirundo/_iter_sse_retrying.py,sha256=0u-jJe5vHCZegImKBB1rpI9O1BnN7oWJytdabl34ih4,3345
8
- hirundo/_timeouts.py,sha256=IfX8-mrLp809-A_xSLv1DhIqZnO-Qvy4FcTtOtvqLog,42
9
- hirundo/cli.py,sha256=4-pdV483zqRJl8d-R9p_9YOGlehOnoMJzb3XAAdPRb0,6634
10
- hirundo/dataset_optimization.py,sha256=I2AzkSns_MLwlwI4mGGxaJB6OUG3pv7VJ5uFAtcJdTM,21825
11
- hirundo/enum.py,sha256=-3w09g-_yRYIMiM8VA_Nb07WoQXf5IjyERTGonzNDs0,457
12
- hirundo/git.py,sha256=Dbp0ALJYhLDgkmI_5u9iVyE_xEHIxoUTeZdpU8iau_4,4884
13
- hirundo/logger.py,sha256=MUqrYp0fBlxWFhGl6P5t19_uqO7T_PNhrLN5bqY3i7s,275
14
- hirundo/storage.py,sha256=xifT6xuFCJpVp5wB-ZZkzKz9HbVcMNrllj10vXlU1vU,9845
15
- hirundo-0.1.8.dist-info/LICENSE,sha256=fusGGjqT2RGlU6kbkaOk7d-gDnsjk17wq67AO0mwBZI,1065
16
- hirundo-0.1.8.dist-info/METADATA,sha256=heoP6t876hsxEih-RzaIjGtcLZl8UOpcwExnjQ8thU4,7841
17
- hirundo-0.1.8.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
18
- hirundo-0.1.8.dist-info/entry_points.txt,sha256=4ZtnA_Nl1Af8fLnHp3lwjbGDEGU1S6ujb_JwtuQ7ZPM,44
19
- hirundo-0.1.8.dist-info/top_level.txt,sha256=cmyNqrNZOAYxnywJGFI1AJBLe4SkH8HGsfFx6ncdrbI,8
20
- hirundo-0.1.8.dist-info/RECORD,,