hirundo 0.1.7__tar.gz → 0.1.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hirundo-0.1.9/PKG-INFO +212 -0
- hirundo-0.1.9/README.md +145 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo/__init__.py +17 -9
- hirundo-0.1.9/hirundo/_constraints.py +53 -0
- hirundo-0.1.9/hirundo/_env.py +26 -0
- hirundo-0.1.9/hirundo/_http.py +19 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo/_iter_sse_retrying.py +63 -19
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo/cli.py +75 -16
- hirundo-0.1.9/hirundo/dataset_optimization.py +914 -0
- hirundo-0.1.9/hirundo/enum.py +23 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo/git.py +95 -28
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo/logger.py +3 -1
- hirundo-0.1.9/hirundo/storage.py +466 -0
- hirundo-0.1.9/hirundo.egg-info/PKG-INFO +212 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo.egg-info/SOURCES.txt +1 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo.egg-info/requires.txt +8 -2
- {hirundo-0.1.7 → hirundo-0.1.9}/pyproject.toml +13 -3
- hirundo-0.1.7/PKG-INFO +0 -118
- hirundo-0.1.7/README.md +0 -57
- hirundo-0.1.7/hirundo/_constraints.py +0 -21
- hirundo-0.1.7/hirundo/_env.py +0 -15
- hirundo-0.1.7/hirundo/dataset_optimization.py +0 -522
- hirundo-0.1.7/hirundo/enum.py +0 -20
- hirundo-0.1.7/hirundo/storage.py +0 -295
- hirundo-0.1.7/hirundo.egg-info/PKG-INFO +0 -118
- {hirundo-0.1.7 → hirundo-0.1.9}/LICENSE +0 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo/__main__.py +0 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo/_headers.py +0 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo/_timeouts.py +0 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo.egg-info/dependency_links.txt +0 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo.egg-info/entry_points.txt +0 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/hirundo.egg-info/top_level.txt +0 -0
- {hirundo-0.1.7 → hirundo-0.1.9}/setup.cfg +0 -0
hirundo-0.1.9/PKG-INFO
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: hirundo
|
|
3
|
+
Version: 0.1.9
|
|
4
|
+
Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
|
|
5
|
+
Author-email: Hirundo <dev@hirundo.io>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024, Hirundo
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
13
|
+
|
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
15
|
+
|
|
16
|
+
Project-URL: Homepage, https://github.com/Hirundo-io/hirundo-client
|
|
17
|
+
Keywords: dataset,machine learning,data science,data engineering
|
|
18
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
+
Classifier: Programming Language :: Python
|
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
25
|
+
Requires-Dist: types-PyYAML>=6.0.12
|
|
26
|
+
Requires-Dist: pydantic>=2.7.1
|
|
27
|
+
Requires-Dist: twine>=5.0.0
|
|
28
|
+
Requires-Dist: python-dotenv>=1.0.1
|
|
29
|
+
Requires-Dist: types-requests>=2.31.0
|
|
30
|
+
Requires-Dist: typer>=0.12.3
|
|
31
|
+
Requires-Dist: httpx>=0.27.0
|
|
32
|
+
Requires-Dist: stamina>=24.2.0
|
|
33
|
+
Requires-Dist: httpx-sse>=0.4.0
|
|
34
|
+
Requires-Dist: pandas>=2.2.2
|
|
35
|
+
Requires-Dist: tqdm>=4.66.5
|
|
36
|
+
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: pyyaml>=6.0.1; extra == "dev"
|
|
38
|
+
Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
|
|
39
|
+
Requires-Dist: pydantic>=2.7.1; extra == "dev"
|
|
40
|
+
Requires-Dist: twine>=5.0.0; extra == "dev"
|
|
41
|
+
Requires-Dist: python-dotenv>=1.0.1; extra == "dev"
|
|
42
|
+
Requires-Dist: types-requests>=2.31.0; extra == "dev"
|
|
43
|
+
Requires-Dist: types-setuptools>=69.5.0; extra == "dev"
|
|
44
|
+
Requires-Dist: typer>=0.12.3; extra == "dev"
|
|
45
|
+
Requires-Dist: httpx>=0.27.0; extra == "dev"
|
|
46
|
+
Requires-Dist: stamina>=24.2.0; extra == "dev"
|
|
47
|
+
Requires-Dist: httpx-sse>=0.4.0; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest>=8.2.0; extra == "dev"
|
|
49
|
+
Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
|
|
50
|
+
Requires-Dist: uv>=0.5.8; extra == "dev"
|
|
51
|
+
Requires-Dist: pre-commit>=3.7.1; extra == "dev"
|
|
52
|
+
Requires-Dist: virtualenv>=20.6.6; extra == "dev"
|
|
53
|
+
Requires-Dist: ruff>=0.8.2; extra == "dev"
|
|
54
|
+
Requires-Dist: bumpver; extra == "dev"
|
|
55
|
+
Requires-Dist: platformdirs>=4.3.6; extra == "dev"
|
|
56
|
+
Requires-Dist: safety>=3.2.13; extra == "dev"
|
|
57
|
+
Provides-Extra: docs
|
|
58
|
+
Requires-Dist: sphinx>=7.4.7; extra == "docs"
|
|
59
|
+
Requires-Dist: sphinx-autobuild>=2024.4.16; extra == "docs"
|
|
60
|
+
Requires-Dist: sphinx-click>=5.0.1; extra == "docs"
|
|
61
|
+
Requires-Dist: autodoc_pydantic>=2.2.0; extra == "docs"
|
|
62
|
+
Requires-Dist: furo; extra == "docs"
|
|
63
|
+
Requires-Dist: sphinx-multiversion; extra == "docs"
|
|
64
|
+
Requires-Dist: esbonio; extra == "docs"
|
|
65
|
+
Requires-Dist: starlette>0.40.0; extra == "docs"
|
|
66
|
+
Requires-Dist: markupsafe>=3.0.2; extra == "docs"
|
|
67
|
+
|
|
68
|
+
# Hirundo
|
|
69
|
+
|
|
70
|
+
This package exposes access to Hirundo APIs for dataset optimization for Machine Learning.
|
|
71
|
+
|
|
72
|
+
Dataset optimization is currently available for datasets labelled for classification and object detection.
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
Support dataset storage configs include:
|
|
76
|
+
- Google Cloud (GCP) Storage
|
|
77
|
+
- Amazon Web Services (AWS) S3
|
|
78
|
+
- Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
|
|
79
|
+
|
|
80
|
+
Optimizing a classification dataset
|
|
81
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
82
|
+
|
|
83
|
+
Currently ``hirundo`` requires a CSV file with the following columns (all columns are required):
|
|
84
|
+
- ``image_path``: The location of the image within the dataset ``root``
|
|
85
|
+
- ``label``: The label of the image, i.e. which the class that was annotated for this image
|
|
86
|
+
|
|
87
|
+
And outputs a CSV with the same columns and:
|
|
88
|
+
- ``suspect_level``: mislabel suspect level
|
|
89
|
+
- ``suggested_label``: suggested label
|
|
90
|
+
- ``suggested_label_conf``: suggested label confidence
|
|
91
|
+
|
|
92
|
+
Optimizing an object detection (OD) dataset
|
|
93
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
94
|
+
|
|
95
|
+
Currently ``hirundo`` requires a CSV file with the following columns (all columns are required):
|
|
96
|
+
- ``image_path``: The location of the image within the dataset ``root``
|
|
97
|
+
- ``bbox_id``: The index of the bounding box within the dataset. Used to indicate label suspects
|
|
98
|
+
- ``label``: The label of the image, i.e. which the class that was annotated for this image
|
|
99
|
+
- ``x1``, ``y1``, ``x2``, ``y2``: The bounding box coordinates of the object within the image
|
|
100
|
+
|
|
101
|
+
And outputs a CSV with the same columns and:
|
|
102
|
+
- ``suspect_level``: object mislabel suspect level
|
|
103
|
+
- ``suggested_label``: suggested object label
|
|
104
|
+
- ``suggested_label_conf``: suggested object label confidence
|
|
105
|
+
|
|
106
|
+
Note: This Python package must be used alongside a Hirundo server, either the SaaS platform, a custom VPC deployment or an on-premises installation.
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
## Installation
|
|
110
|
+
|
|
111
|
+
You can install the codebase with a simple `pip install hirundo` to install the latest version of this package. If you prefer to install from the Git repository and/or need a specific version or branch, you can simply clone the repository, check out the relevant commit and then run `pip install .` to install that version. A full list of dependencies can be found in `requirements.txt`, but these will be installed automatically by either of these commands.
|
|
112
|
+
|
|
113
|
+
## Usage
|
|
114
|
+
|
|
115
|
+
Classification example:
|
|
116
|
+
```python
|
|
117
|
+
from hirundo import (
|
|
118
|
+
HirundoCSV,
|
|
119
|
+
LabelingType,
|
|
120
|
+
OptimizationDataset,
|
|
121
|
+
StorageGCP,
|
|
122
|
+
StorageConfig,
|
|
123
|
+
StorageTypes,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
gcp_bucket = StorageGCP(
|
|
127
|
+
bucket_name="cifar100bucket",
|
|
128
|
+
project="Hirundo-global",
|
|
129
|
+
credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
|
|
130
|
+
)
|
|
131
|
+
test_dataset = OptimizationDataset(
|
|
132
|
+
name="TEST-GCP cifar 100 classification dataset",
|
|
133
|
+
labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
|
|
134
|
+
storage_config=StorageConfig(
|
|
135
|
+
name="cifar100bucket",
|
|
136
|
+
type=StorageTypes.GCP,
|
|
137
|
+
gcp=gcp_bucket,
|
|
138
|
+
),
|
|
139
|
+
data_root_url=gcp_bucket.get_url(path="/pytorch-cifar/data"),
|
|
140
|
+
labeling_info=HirundoCSV(
|
|
141
|
+
csv_url=gcp_bucket.get_url(path="/pytorch-cifar/data/cifar100.csv"),
|
|
142
|
+
),
|
|
143
|
+
classes=cifar100_classes,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
test_dataset.run_optimization()
|
|
147
|
+
results = test_dataset.check_run()
|
|
148
|
+
print(results)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
Object detection example:
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
from hirundo import (
|
|
156
|
+
GitRepo,
|
|
157
|
+
HirundoCSV,
|
|
158
|
+
LabelingType,
|
|
159
|
+
OptimizationDataset,
|
|
160
|
+
StorageGit,
|
|
161
|
+
StorageConfig,
|
|
162
|
+
StorageTypes,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
git_storage = StorageGit(
|
|
166
|
+
repo=GitRepo(
|
|
167
|
+
name="BDD-100k-validation-dataset",
|
|
168
|
+
repository_url="https://git@hf.co/datasets/hirundo-io/bdd100k-validation-only.git",
|
|
169
|
+
),
|
|
170
|
+
branch="main",
|
|
171
|
+
)
|
|
172
|
+
test_dataset = OptimizationDataset(
|
|
173
|
+
name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
|
|
174
|
+
labeling_type=LabelingType.OBJECT_DETECTION,
|
|
175
|
+
storage_config=StorageConfig(
|
|
176
|
+
name="BDD-100k-validation-dataset",
|
|
177
|
+
type=StorageTypes.GIT,
|
|
178
|
+
git=git_storage,
|
|
179
|
+
),
|
|
180
|
+
data_root_url=git_storage.get_url(path="/BDD100K Val from Hirundo.zip/bdd100k"),
|
|
181
|
+
labeling_info=HirundoCSV(
|
|
182
|
+
csv_url=git_storage.get_url(
|
|
183
|
+
path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
|
|
184
|
+
),
|
|
185
|
+
),
|
|
186
|
+
classes=[
|
|
187
|
+
"traffic light",
|
|
188
|
+
"traffic sign",
|
|
189
|
+
"car",
|
|
190
|
+
"pedestrian",
|
|
191
|
+
"bus",
|
|
192
|
+
"truck",
|
|
193
|
+
"rider",
|
|
194
|
+
"bicycle",
|
|
195
|
+
"motorcycle",
|
|
196
|
+
"train",
|
|
197
|
+
"other vehicle",
|
|
198
|
+
"other person",
|
|
199
|
+
"trailer",
|
|
200
|
+
],
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
test_dataset.run_optimization()
|
|
204
|
+
results = test_dataset.check_run()
|
|
205
|
+
print(results)
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyPy support may be introduced in the future.
|
|
209
|
+
|
|
210
|
+
## Further documentation
|
|
211
|
+
|
|
212
|
+
To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
|
hirundo-0.1.9/README.md
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# Hirundo
|
|
2
|
+
|
|
3
|
+
This package exposes access to Hirundo APIs for dataset optimization for Machine Learning.
|
|
4
|
+
|
|
5
|
+
Dataset optimization is currently available for datasets labelled for classification and object detection.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
Support dataset storage configs include:
|
|
9
|
+
- Google Cloud (GCP) Storage
|
|
10
|
+
- Amazon Web Services (AWS) S3
|
|
11
|
+
- Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
|
|
12
|
+
|
|
13
|
+
Optimizing a classification dataset
|
|
14
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
15
|
+
|
|
16
|
+
Currently ``hirundo`` requires a CSV file with the following columns (all columns are required):
|
|
17
|
+
- ``image_path``: The location of the image within the dataset ``root``
|
|
18
|
+
- ``label``: The label of the image, i.e. which the class that was annotated for this image
|
|
19
|
+
|
|
20
|
+
And outputs a CSV with the same columns and:
|
|
21
|
+
- ``suspect_level``: mislabel suspect level
|
|
22
|
+
- ``suggested_label``: suggested label
|
|
23
|
+
- ``suggested_label_conf``: suggested label confidence
|
|
24
|
+
|
|
25
|
+
Optimizing an object detection (OD) dataset
|
|
26
|
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|
27
|
+
|
|
28
|
+
Currently ``hirundo`` requires a CSV file with the following columns (all columns are required):
|
|
29
|
+
- ``image_path``: The location of the image within the dataset ``root``
|
|
30
|
+
- ``bbox_id``: The index of the bounding box within the dataset. Used to indicate label suspects
|
|
31
|
+
- ``label``: The label of the image, i.e. which the class that was annotated for this image
|
|
32
|
+
- ``x1``, ``y1``, ``x2``, ``y2``: The bounding box coordinates of the object within the image
|
|
33
|
+
|
|
34
|
+
And outputs a CSV with the same columns and:
|
|
35
|
+
- ``suspect_level``: object mislabel suspect level
|
|
36
|
+
- ``suggested_label``: suggested object label
|
|
37
|
+
- ``suggested_label_conf``: suggested object label confidence
|
|
38
|
+
|
|
39
|
+
Note: This Python package must be used alongside a Hirundo server, either the SaaS platform, a custom VPC deployment or an on-premises installation.
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
You can install the codebase with a simple `pip install hirundo` to install the latest version of this package. If you prefer to install from the Git repository and/or need a specific version or branch, you can simply clone the repository, check out the relevant commit and then run `pip install .` to install that version. A full list of dependencies can be found in `requirements.txt`, but these will be installed automatically by either of these commands.
|
|
45
|
+
|
|
46
|
+
## Usage
|
|
47
|
+
|
|
48
|
+
Classification example:
|
|
49
|
+
```python
|
|
50
|
+
from hirundo import (
|
|
51
|
+
HirundoCSV,
|
|
52
|
+
LabelingType,
|
|
53
|
+
OptimizationDataset,
|
|
54
|
+
StorageGCP,
|
|
55
|
+
StorageConfig,
|
|
56
|
+
StorageTypes,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
gcp_bucket = StorageGCP(
|
|
60
|
+
bucket_name="cifar100bucket",
|
|
61
|
+
project="Hirundo-global",
|
|
62
|
+
credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
|
|
63
|
+
)
|
|
64
|
+
test_dataset = OptimizationDataset(
|
|
65
|
+
name="TEST-GCP cifar 100 classification dataset",
|
|
66
|
+
labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
|
|
67
|
+
storage_config=StorageConfig(
|
|
68
|
+
name="cifar100bucket",
|
|
69
|
+
type=StorageTypes.GCP,
|
|
70
|
+
gcp=gcp_bucket,
|
|
71
|
+
),
|
|
72
|
+
data_root_url=gcp_bucket.get_url(path="/pytorch-cifar/data"),
|
|
73
|
+
labeling_info=HirundoCSV(
|
|
74
|
+
csv_url=gcp_bucket.get_url(path="/pytorch-cifar/data/cifar100.csv"),
|
|
75
|
+
),
|
|
76
|
+
classes=cifar100_classes,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
test_dataset.run_optimization()
|
|
80
|
+
results = test_dataset.check_run()
|
|
81
|
+
print(results)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
Object detection example:
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from hirundo import (
|
|
89
|
+
GitRepo,
|
|
90
|
+
HirundoCSV,
|
|
91
|
+
LabelingType,
|
|
92
|
+
OptimizationDataset,
|
|
93
|
+
StorageGit,
|
|
94
|
+
StorageConfig,
|
|
95
|
+
StorageTypes,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
git_storage = StorageGit(
|
|
99
|
+
repo=GitRepo(
|
|
100
|
+
name="BDD-100k-validation-dataset",
|
|
101
|
+
repository_url="https://git@hf.co/datasets/hirundo-io/bdd100k-validation-only.git",
|
|
102
|
+
),
|
|
103
|
+
branch="main",
|
|
104
|
+
)
|
|
105
|
+
test_dataset = OptimizationDataset(
|
|
106
|
+
name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
|
|
107
|
+
labeling_type=LabelingType.OBJECT_DETECTION,
|
|
108
|
+
storage_config=StorageConfig(
|
|
109
|
+
name="BDD-100k-validation-dataset",
|
|
110
|
+
type=StorageTypes.GIT,
|
|
111
|
+
git=git_storage,
|
|
112
|
+
),
|
|
113
|
+
data_root_url=git_storage.get_url(path="/BDD100K Val from Hirundo.zip/bdd100k"),
|
|
114
|
+
labeling_info=HirundoCSV(
|
|
115
|
+
csv_url=git_storage.get_url(
|
|
116
|
+
path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
|
|
117
|
+
),
|
|
118
|
+
),
|
|
119
|
+
classes=[
|
|
120
|
+
"traffic light",
|
|
121
|
+
"traffic sign",
|
|
122
|
+
"car",
|
|
123
|
+
"pedestrian",
|
|
124
|
+
"bus",
|
|
125
|
+
"truck",
|
|
126
|
+
"rider",
|
|
127
|
+
"bicycle",
|
|
128
|
+
"motorcycle",
|
|
129
|
+
"train",
|
|
130
|
+
"other vehicle",
|
|
131
|
+
"other person",
|
|
132
|
+
"trailer",
|
|
133
|
+
],
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
test_dataset.run_optimization()
|
|
137
|
+
results = test_dataset.check_run()
|
|
138
|
+
print(results)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyPy support may be introduced in the future.
|
|
142
|
+
|
|
143
|
+
## Further documentation
|
|
144
|
+
|
|
145
|
+
To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
|
|
@@ -1,35 +1,43 @@
|
|
|
1
1
|
from .dataset_optimization import (
|
|
2
|
+
COCO,
|
|
3
|
+
YOLO,
|
|
4
|
+
HirundoCSV,
|
|
2
5
|
HirundoError,
|
|
3
6
|
OptimizationDataset,
|
|
7
|
+
RunArgs,
|
|
8
|
+
VisionRunArgs,
|
|
4
9
|
)
|
|
5
10
|
from .enum import (
|
|
6
11
|
DatasetMetadataType,
|
|
7
|
-
|
|
12
|
+
LabelingType,
|
|
8
13
|
)
|
|
9
14
|
from .git import GitRepo
|
|
10
15
|
from .storage import (
|
|
16
|
+
StorageConfig,
|
|
11
17
|
StorageGCP,
|
|
12
|
-
# StorageAzure, TODO: Azure storage
|
|
18
|
+
# StorageAzure, TODO: Azure storage is coming soon
|
|
13
19
|
StorageGit,
|
|
14
|
-
StorageIntegration,
|
|
15
|
-
StorageLink,
|
|
16
20
|
StorageS3,
|
|
17
21
|
StorageTypes,
|
|
18
22
|
)
|
|
19
23
|
|
|
20
24
|
__all__ = [
|
|
25
|
+
"COCO",
|
|
26
|
+
"YOLO",
|
|
27
|
+
"HirundoCSV",
|
|
21
28
|
"HirundoError",
|
|
22
29
|
"OptimizationDataset",
|
|
23
|
-
"
|
|
30
|
+
"RunArgs",
|
|
31
|
+
"VisionRunArgs",
|
|
32
|
+
"LabelingType",
|
|
24
33
|
"DatasetMetadataType",
|
|
25
34
|
"GitRepo",
|
|
26
|
-
"StorageLink",
|
|
27
35
|
"StorageTypes",
|
|
28
36
|
"StorageS3",
|
|
29
37
|
"StorageGCP",
|
|
30
|
-
# "StorageAzure", TODO: Azure storage
|
|
38
|
+
# "StorageAzure", TODO: Azure storage is coming soon
|
|
31
39
|
"StorageGit",
|
|
32
|
-
"
|
|
40
|
+
"StorageConfig",
|
|
33
41
|
]
|
|
34
42
|
|
|
35
|
-
__version__ = "0.1.
|
|
43
|
+
__version__ = "0.1.9"
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
from pydantic import StringConstraints, UrlConstraints
|
|
4
|
+
from pydantic_core import Url
|
|
5
|
+
|
|
6
|
+
S3BucketUrl = Annotated[
|
|
7
|
+
str,
|
|
8
|
+
StringConstraints(
|
|
9
|
+
min_length=8,
|
|
10
|
+
max_length=1023,
|
|
11
|
+
pattern=r"s3?://[a-z0-9.-]{3,64}[/]?", # Only allow real S3 bucket URLs
|
|
12
|
+
),
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
StorageConfigName = Annotated[
|
|
16
|
+
str,
|
|
17
|
+
StringConstraints(
|
|
18
|
+
min_length=1,
|
|
19
|
+
max_length=255,
|
|
20
|
+
pattern=r"^[a-zA-Z0-9-_]+$",
|
|
21
|
+
),
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
S3_MIN_LENGTH = 8
|
|
25
|
+
S3_MAX_LENGTH = 1023
|
|
26
|
+
S3_PATTERN = r"s3://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
|
|
27
|
+
GCP_MIN_LENGTH = 8
|
|
28
|
+
GCP_MAX_LENGTH = 1023
|
|
29
|
+
GCP_PATTERN = r"gs://[a-zA-Z0-9.-]{3,64}/[a-zA-Z0-9.-/]+"
|
|
30
|
+
|
|
31
|
+
RepoUrl = Annotated[
|
|
32
|
+
Url,
|
|
33
|
+
UrlConstraints(
|
|
34
|
+
allowed_schemes=[
|
|
35
|
+
"ssh",
|
|
36
|
+
"https",
|
|
37
|
+
"http",
|
|
38
|
+
]
|
|
39
|
+
),
|
|
40
|
+
]
|
|
41
|
+
HirundoUrl = Annotated[
|
|
42
|
+
Url,
|
|
43
|
+
UrlConstraints(
|
|
44
|
+
allowed_schemes=[
|
|
45
|
+
"file",
|
|
46
|
+
"https",
|
|
47
|
+
"http",
|
|
48
|
+
"s3",
|
|
49
|
+
"gs",
|
|
50
|
+
"ssh",
|
|
51
|
+
]
|
|
52
|
+
),
|
|
53
|
+
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from dotenv import load_dotenv
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EnvLocation(enum.Enum):
|
|
9
|
+
DOTENV = Path.cwd() / ".env"
|
|
10
|
+
HOME = Path.home() / ".hirundo.conf"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
if os.path.exists(EnvLocation.DOTENV.value):
|
|
14
|
+
load_dotenv(EnvLocation.DOTENV.value)
|
|
15
|
+
elif os.path.exists(EnvLocation.HOME.value):
|
|
16
|
+
load_dotenv(EnvLocation.HOME.value)
|
|
17
|
+
|
|
18
|
+
API_HOST = os.getenv("API_HOST", "https://api.hirundo.io")
|
|
19
|
+
API_KEY = os.getenv("API_KEY")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def check_api_key():
|
|
23
|
+
if not API_KEY:
|
|
24
|
+
raise ValueError(
|
|
25
|
+
"API_KEY is not set. Please run `hirundo setup` to set the API key"
|
|
26
|
+
)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from requests import Response
|
|
2
|
+
|
|
3
|
+
import hirundo.logger
|
|
4
|
+
|
|
5
|
+
logger = hirundo.logger.get_logger(__name__)
|
|
6
|
+
|
|
7
|
+
MINIMUM_CLIENT_SERVER_ERROR_CODE = 400
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def raise_for_status_with_reason(response: Response):
|
|
11
|
+
try:
|
|
12
|
+
if response.status_code >= MINIMUM_CLIENT_SERVER_ERROR_CODE:
|
|
13
|
+
response.reason = response.json().get("reason", None)
|
|
14
|
+
if response.reason is None:
|
|
15
|
+
response.reason = response.json().get("detail", None)
|
|
16
|
+
except Exception as e:
|
|
17
|
+
logger.debug("Could not parse response as JSON: %s", e)
|
|
18
|
+
|
|
19
|
+
response.raise_for_status()
|
|
@@ -1,19 +1,27 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import time
|
|
3
|
+
import typing
|
|
4
|
+
import uuid
|
|
3
5
|
from collections.abc import AsyncGenerator, Generator
|
|
4
|
-
from typing import Union
|
|
5
6
|
|
|
6
7
|
import httpx
|
|
7
|
-
|
|
8
|
+
import requests
|
|
9
|
+
import urllib3
|
|
10
|
+
from httpx_sse import ServerSentEvent, SSEError, aconnect_sse, connect_sse
|
|
8
11
|
from stamina import retry
|
|
9
12
|
|
|
13
|
+
from hirundo._timeouts import READ_TIMEOUT
|
|
14
|
+
from hirundo.logger import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
10
18
|
|
|
11
19
|
# Credit: https://github.com/florimondmanca/httpx-sse/blob/master/README.md#handling-reconnections
|
|
12
20
|
def iter_sse_retrying(
|
|
13
21
|
client: httpx.Client,
|
|
14
22
|
method: str,
|
|
15
23
|
url: str,
|
|
16
|
-
headers:
|
|
24
|
+
headers: typing.Optional[dict[str, str]] = None,
|
|
17
25
|
) -> Generator[ServerSentEvent, None, None]:
|
|
18
26
|
if headers is None:
|
|
19
27
|
headers = {}
|
|
@@ -28,7 +36,13 @@ def iter_sse_retrying(
|
|
|
28
36
|
# This may happen when the server is overloaded and closes the connection or
|
|
29
37
|
# when Kubernetes restarts / replaces a pod.
|
|
30
38
|
# Likewise, this will likely be temporary, hence the retries.
|
|
31
|
-
@retry(
|
|
39
|
+
@retry(
|
|
40
|
+
on=(
|
|
41
|
+
httpx.ReadError,
|
|
42
|
+
httpx.RemoteProtocolError,
|
|
43
|
+
urllib3.exceptions.ReadTimeoutError,
|
|
44
|
+
)
|
|
45
|
+
)
|
|
32
46
|
def _iter_sse():
|
|
33
47
|
nonlocal last_event_id, reconnection_delay
|
|
34
48
|
|
|
@@ -44,13 +58,27 @@ def iter_sse_retrying(
|
|
|
44
58
|
connect_headers["Last-Event-ID"] = last_event_id
|
|
45
59
|
|
|
46
60
|
with connect_sse(client, method, url, headers=connect_headers) as event_source:
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
61
|
+
try:
|
|
62
|
+
for sse in event_source.iter_sse():
|
|
63
|
+
last_event_id = sse.id
|
|
64
|
+
|
|
65
|
+
if sse.retry is not None:
|
|
66
|
+
reconnection_delay = sse.retry / 1000
|
|
67
|
+
|
|
68
|
+
yield sse
|
|
69
|
+
except SSEError:
|
|
70
|
+
logger.error("SSE error occurred. Trying regular request")
|
|
71
|
+
response = requests.get(
|
|
72
|
+
url,
|
|
73
|
+
headers=connect_headers,
|
|
74
|
+
timeout=READ_TIMEOUT,
|
|
75
|
+
)
|
|
76
|
+
yield ServerSentEvent(
|
|
77
|
+
event="",
|
|
78
|
+
data=response.text,
|
|
79
|
+
id=uuid.uuid4().hex,
|
|
80
|
+
retry=None,
|
|
81
|
+
)
|
|
54
82
|
|
|
55
83
|
return _iter_sse()
|
|
56
84
|
|
|
@@ -72,7 +100,13 @@ async def aiter_sse_retrying(
|
|
|
72
100
|
# This may happen when the server is overloaded and closes the connection or
|
|
73
101
|
# when Kubernetes restarts / replaces a pod.
|
|
74
102
|
# Likewise, this will likely be temporary, hence the retries.
|
|
75
|
-
@retry(
|
|
103
|
+
@retry(
|
|
104
|
+
on=(
|
|
105
|
+
httpx.ReadError,
|
|
106
|
+
httpx.RemoteProtocolError,
|
|
107
|
+
urllib3.exceptions.ReadTimeoutError,
|
|
108
|
+
)
|
|
109
|
+
)
|
|
76
110
|
async def _iter_sse() -> AsyncGenerator[ServerSentEvent, None]:
|
|
77
111
|
nonlocal last_event_id, reconnection_delay
|
|
78
112
|
|
|
@@ -86,12 +120,22 @@ async def aiter_sse_retrying(
|
|
|
86
120
|
async with aconnect_sse(
|
|
87
121
|
client, method, url, headers=connect_headers
|
|
88
122
|
) as event_source:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
123
|
+
try:
|
|
124
|
+
async for sse in event_source.aiter_sse():
|
|
125
|
+
last_event_id = sse.id
|
|
126
|
+
|
|
127
|
+
if sse.retry is not None:
|
|
128
|
+
reconnection_delay = sse.retry / 1000
|
|
129
|
+
|
|
130
|
+
yield sse
|
|
131
|
+
except SSEError:
|
|
132
|
+
logger.error("SSE error occurred. Trying regular request")
|
|
133
|
+
response = await client.get(url, headers=connect_headers)
|
|
134
|
+
yield ServerSentEvent(
|
|
135
|
+
event="",
|
|
136
|
+
data=response.text,
|
|
137
|
+
id=uuid.uuid4().hex,
|
|
138
|
+
retry=None,
|
|
139
|
+
)
|
|
96
140
|
|
|
97
141
|
return _iter_sse()
|