hirundo 0.1.16__tar.gz → 0.1.21__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {hirundo-0.1.16 → hirundo-0.1.21}/PKG-INFO +67 -53
  2. {hirundo-0.1.16 → hirundo-0.1.21}/README.md +53 -46
  3. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/__init__.py +30 -14
  4. hirundo-0.1.21/hirundo/_constraints.py +164 -0
  5. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/_headers.py +1 -1
  6. hirundo-0.1.21/hirundo/_http.py +72 -0
  7. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/_iter_sse_retrying.py +1 -1
  8. hirundo-0.1.16/hirundo/_constraints.py → hirundo-0.1.21/hirundo/_urls.py +12 -6
  9. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/cli.py +7 -7
  10. hirundo-0.1.21/hirundo/dataset_enum.py +46 -0
  11. hirundo-0.1.16/hirundo/dataset_optimization.py → hirundo-0.1.21/hirundo/dataset_qa.py +195 -168
  12. hirundo-0.1.16/hirundo/dataset_optimization_results.py → hirundo-0.1.21/hirundo/dataset_qa_results.py +4 -4
  13. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/git.py +2 -3
  14. hirundo-0.1.21/hirundo/labeling.py +140 -0
  15. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/storage.py +43 -60
  16. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/unzip.py +9 -10
  17. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo.egg-info/PKG-INFO +67 -53
  18. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo.egg-info/SOURCES.txt +4 -2
  19. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo.egg-info/requires.txt +12 -5
  20. {hirundo-0.1.16 → hirundo-0.1.21}/pyproject.toml +36 -31
  21. hirundo-0.1.16/hirundo/_http.py +0 -19
  22. hirundo-0.1.16/hirundo/dataset_enum.py +0 -23
  23. {hirundo-0.1.16 → hirundo-0.1.21}/LICENSE +0 -0
  24. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/__main__.py +0 -0
  25. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/_dataframe.py +0 -0
  26. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/_env.py +0 -0
  27. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/_timeouts.py +0 -0
  28. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo/logger.py +0 -0
  29. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo.egg-info/dependency_links.txt +0 -0
  30. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo.egg-info/entry_points.txt +0 -0
  31. {hirundo-0.1.16 → hirundo-0.1.21}/hirundo.egg-info/top_level.txt +0 -0
  32. {hirundo-0.1.16 → hirundo-0.1.21}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hirundo
3
- Version: 0.1.16
3
+ Version: 0.1.21
4
4
  Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
5
5
  Author-email: Hirundo <dev@hirundo.io>
6
6
  License: MIT License
@@ -13,7 +13,7 @@ License: MIT License
13
13
 
14
14
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15
15
 
16
- Project-URL: Homepage, https://github.com/Hirundo-io/hirundo-client
16
+ Project-URL: Homepage, https://github.com/Hirundo-io/hirundo-python-sdk
17
17
  Keywords: dataset,machine learning,data science,data engineering
18
18
  Classifier: License :: OSI Approved :: MIT License
19
19
  Classifier: Programming Language :: Python
@@ -32,6 +32,10 @@ Requires-Dist: httpx>=0.27.0
32
32
  Requires-Dist: stamina>=24.2.0
33
33
  Requires-Dist: httpx-sse>=0.4.0
34
34
  Requires-Dist: tqdm>=4.66.5
35
+ Requires-Dist: h11>=0.16.0
36
+ Requires-Dist: requests>=2.32.4
37
+ Requires-Dist: urllib3>=2.5.0
38
+ Requires-Dist: setuptools>=78.1.1
35
39
  Provides-Extra: dev
36
40
  Requires-Dist: pyyaml>=6.0.1; extra == "dev"
37
41
  Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
@@ -46,69 +50,94 @@ Requires-Dist: stamina>=24.2.0; extra == "dev"
46
50
  Requires-Dist: httpx-sse>=0.4.0; extra == "dev"
47
51
  Requires-Dist: pytest>=8.2.0; extra == "dev"
48
52
  Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
49
- Requires-Dist: uv>=0.5.8; extra == "dev"
53
+ Requires-Dist: uv>=0.8.6; extra == "dev"
50
54
  Requires-Dist: pre-commit>=3.7.1; extra == "dev"
51
55
  Requires-Dist: virtualenv>=20.6.6; extra == "dev"
52
- Requires-Dist: ruff>=0.11.6; extra == "dev"
56
+ Requires-Dist: ruff>=0.12.0; extra == "dev"
53
57
  Requires-Dist: bumpver; extra == "dev"
54
58
  Requires-Dist: platformdirs>=4.3.6; extra == "dev"
55
59
  Requires-Dist: safety>=3.2.13; extra == "dev"
60
+ Requires-Dist: cryptography>=44.0.1; extra == "dev"
61
+ Requires-Dist: jinja2>=3.1.6; extra == "dev"
56
62
  Provides-Extra: docs
57
63
  Requires-Dist: sphinx>=7.4.7; extra == "docs"
58
- Requires-Dist: sphinx-autobuild>=2024.4.16; extra == "docs"
64
+ Requires-Dist: sphinx-autobuild>=2024.9.3; extra == "docs"
59
65
  Requires-Dist: sphinx-click>=5.0.1; extra == "docs"
60
66
  Requires-Dist: autodoc_pydantic>=2.2.0; extra == "docs"
61
67
  Requires-Dist: furo; extra == "docs"
62
68
  Requires-Dist: sphinx-multiversion; extra == "docs"
63
69
  Requires-Dist: esbonio; extra == "docs"
64
- Requires-Dist: starlette>0.40.0; extra == "docs"
70
+ Requires-Dist: starlette>=0.47.2; extra == "docs"
65
71
  Requires-Dist: markupsafe>=3.0.2; extra == "docs"
72
+ Requires-Dist: jinja2>=3.1.6; extra == "docs"
66
73
  Provides-Extra: pandas
67
- Requires-Dist: pandas>=2.2.2; extra == "pandas"
74
+ Requires-Dist: pandas>=2.2.3; extra == "pandas"
68
75
  Provides-Extra: polars
69
76
  Requires-Dist: polars>=1.0.0; extra == "polars"
70
77
  Dynamic: license-file
71
78
 
72
79
  # Hirundo
73
80
 
74
- This package exposes access to Hirundo APIs for dataset optimization for Machine Learning.
75
-
76
- Dataset optimization is currently available for datasets labelled for classification and object detection.
81
+ This package exposes access to Hirundo APIs for dataset QA for Machine Learning.
77
82
 
83
+ Dataset QA is currently available for datasets labelled for classification and object detection.
78
84
 
79
85
  Support dataset storage configs include:
80
- - Google Cloud (GCP) Storage
81
- - Amazon Web Services (AWS) S3
82
- - Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
86
+
87
+ - Google Cloud (GCP) Storage
88
+ - Amazon Web Services (AWS) S3
89
+ - Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
90
+
91
+ Note: This Python package must be used alongside a Hirundo server, either the SaaS platform, a custom VPC deployment or an on-premises installation.
83
92
 
84
93
  Optimizing a classification dataset
85
94
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
86
95
 
87
- Currently ``hirundo`` requires a CSV file with the following columns (all columns are required):
88
- - ``image_path``: The location of the image within the dataset ``root``
89
- - ``label``: The label of the image, i.e. which the class that was annotated for this image
96
+ Currently `hirundo` requires a CSV file with the following columns (all columns are required):
97
+
98
+ - `image_path`: The location of the image within the dataset `data_root_url`
99
+ - `class_name`: The semantic label, i.e. the class name of the class that the image was annotated as belonging to
100
+
101
+ And outputs two Pandas DataFrames with the dataset columns as well as:
102
+
103
+ Suspect DataFrame (filename: `mislabel_suspects.csv`) columns:
90
104
 
91
- And outputs a CSV with the same columns and:
92
- - ``suspect_level``: mislabel suspect level
93
- - ``suggested_label``: suggested label
94
- - ``suggested_label_conf``: suggested label confidence
105
+ - ``suspect_score``: mislabel suspect score
106
+ - ``suspect_level``: mislabel suspect level
107
+ - ``suspect_rank``: mislabel suspect ranking
108
+ - ``suggested_class_name``: suggested semantic label
109
+ - ``suggested_class_conf``: suggested semantic label confidence
110
+
111
+ Errors and warnings DataFrame (filename: `invalid_data.csv`) columns:
112
+
113
+ - ``status``: status message (one of ``NO_LABELS`` / ``MISSING_IMAGE`` / ``INVALID_IMAGE``)
95
114
 
96
115
  Optimizing an object detection (OD) dataset
97
116
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
98
117
 
99
118
  Currently ``hirundo`` requires a CSV file with the following columns (all columns are required):
100
- - ``image_path``: The location of the image within the dataset ``root``
101
- - ``bbox_id``: The index of the bounding box within the dataset. Used to indicate label suspects
102
- - ``label``: The label of the image, i.e. which the class that was annotated for this image
103
- - ``x1``, ``y1``, ``x2``, ``y2``: The bounding box coordinates of the object within the image
104
119
 
105
- And outputs a CSV with the same columns and:
106
- - ``suspect_level``: object mislabel suspect level
107
- - ``suggested_label``: suggested object label
108
- - ``suggested_label_conf``: suggested object label confidence
120
+ - ``image_path``: The location of the image within the dataset ``data_root_url``
121
+ - ``object_id``: The ID of the bounding box within the dataset. Used to indicate object suspects
122
+ - ``class_name``: Object semantic label, i.e. the class name of the object that was annotated
123
+ - ``xmin``: leftmost horizontal pixel coordinate of the object's bounding box
124
+ - ``ymin``: uppermost vertical pixel coordinate of the object's bounding box
125
+ - ``xmax``: rightmost horizontal pixel coordinate of the object's bounding box
126
+ - ``ymax``: lowermost vertical pixel coordinate of the object's bounding box
109
127
 
110
- Note: This Python package must be used alongside a Hirundo server, either the SaaS platform, a custom VPC deployment or an on-premises installation.
111
128
 
129
+ And outputs two Pandas DataFrames with the dataset columns as well as:
130
+
131
+ Suspect DataFrame (filename: `mislabel_suspects.csv`) columns:
132
+
133
+ - ``suspect_score``: object mislabel suspect score
134
+ - ``suspect_level``: object mislabel suspect level
135
+ - ``suspect_rank``: object mislabel suspect ranking
136
+ - ``suggested_class_name``: suggested object semantic label
137
+ - ``suggested_class_conf``: suggested object semantic label confidence
138
+
139
+ Errors and warnings DataFrame (filename: `invalid_data.csv`) columns:
140
+ - ``status``: status message (one of ``NO_LABELS`` / ``MISSING_IMAGE`` / ``INVALID_IMAGE`` / ``INVALID_BBOX`` / ``INVALID_BBOX_SIZE``)
112
141
 
113
142
  ## Installation
114
143
 
@@ -117,11 +146,12 @@ You can install the codebase with a simple `pip install hirundo` to install the
117
146
  ## Usage
118
147
 
119
148
  Classification example:
149
+
120
150
  ```python
121
151
  from hirundo import (
122
152
  HirundoCSV,
123
153
  LabelingType,
124
- OptimizationDataset,
154
+ QADataset,
125
155
  StorageGCP,
126
156
  StorageConfig,
127
157
  StorageTypes,
@@ -132,7 +162,7 @@ gcp_bucket = StorageGCP(
132
162
  project="Hirundo-global",
133
163
  credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
134
164
  )
135
- test_dataset = OptimizationDataset(
165
+ test_dataset = QADataset(
136
166
  name="TEST-GCP cifar 100 classification dataset",
137
167
  labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
138
168
  storage_config=StorageConfig(
@@ -147,12 +177,11 @@ test_dataset = OptimizationDataset(
147
177
  classes=cifar100_classes,
148
178
  )
149
179
 
150
- test_dataset.run_optimization()
180
+ test_dataset.run_qa()
151
181
  results = test_dataset.check_run()
152
182
  print(results)
153
183
  ```
154
184
 
155
-
156
185
  Object detection example:
157
186
 
158
187
  ```python
@@ -160,7 +189,7 @@ from hirundo import (
160
189
  GitRepo,
161
190
  HirundoCSV,
162
191
  LabelingType,
163
- OptimizationDataset,
192
+ QADataset,
164
193
  StorageGit,
165
194
  StorageConfig,
166
195
  StorageTypes,
@@ -173,7 +202,7 @@ git_storage = StorageGit(
173
202
  ),
174
203
  branch="main",
175
204
  )
176
- test_dataset = OptimizationDataset(
205
+ test_dataset = QADataset(
177
206
  name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
178
207
  labeling_type=LabelingType.OBJECT_DETECTION,
179
208
  storage_config=StorageConfig(
@@ -187,30 +216,15 @@ test_dataset = OptimizationDataset(
187
216
  path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
188
217
  ),
189
218
  ),
190
- classes=[
191
- "traffic light",
192
- "traffic sign",
193
- "car",
194
- "pedestrian",
195
- "bus",
196
- "truck",
197
- "rider",
198
- "bicycle",
199
- "motorcycle",
200
- "train",
201
- "other vehicle",
202
- "other person",
203
- "trailer",
204
- ],
205
219
  )
206
220
 
207
- test_dataset.run_optimization()
221
+ test_dataset.run_qa()
208
222
  results = test_dataset.check_run()
209
223
  print(results)
210
224
  ```
211
225
 
212
- Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyPy support may be introduced in the future.
226
+ Note: Currently we only support the main CPython release 3.9, 3.10, 3.11, 3.12 & 3.13. PyPy support may be introduced in the future.
213
227
 
214
228
  ## Further documentation
215
229
 
216
- To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
230
+ To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the [Google Colab examples](https://github.com/Hirundo-io/hirundo-python-sdk/tree/main/notebooks).
@@ -1,43 +1,65 @@
1
1
  # Hirundo
2
2
 
3
- This package exposes access to Hirundo APIs for dataset optimization for Machine Learning.
4
-
5
- Dataset optimization is currently available for datasets labelled for classification and object detection.
3
+ This package exposes access to Hirundo APIs for dataset QA for Machine Learning.
6
4
 
5
+ Dataset QA is currently available for datasets labelled for classification and object detection.
7
6
 
8
7
  Support dataset storage configs include:
9
- - Google Cloud (GCP) Storage
10
- - Amazon Web Services (AWS) S3
11
- - Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
8
+
9
+ - Google Cloud (GCP) Storage
10
+ - Amazon Web Services (AWS) S3
11
+ - Git LFS (Large File Storage) repositories (e.g. GitHub or HuggingFace)
12
+
13
+ Note: This Python package must be used alongside a Hirundo server, either the SaaS platform, a custom VPC deployment or an on-premises installation.
12
14
 
13
15
  Optimizing a classification dataset
14
16
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15
17
 
16
- Currently ``hirundo`` requires a CSV file with the following columns (all columns are required):
17
- - ``image_path``: The location of the image within the dataset ``root``
18
- - ``label``: The label of the image, i.e. which the class that was annotated for this image
18
+ Currently `hirundo` requires a CSV file with the following columns (all columns are required):
19
+
20
+ - `image_path`: The location of the image within the dataset `data_root_url`
21
+ - `class_name`: The semantic label, i.e. the class name of the class that the image was annotated as belonging to
22
+
23
+ And outputs two Pandas DataFrames with the dataset columns as well as:
24
+
25
+ Suspect DataFrame (filename: `mislabel_suspects.csv`) columns:
19
26
 
20
- And outputs a CSV with the same columns and:
21
- - ``suspect_level``: mislabel suspect level
22
- - ``suggested_label``: suggested label
23
- - ``suggested_label_conf``: suggested label confidence
27
+ - ``suspect_score``: mislabel suspect score
28
+ - ``suspect_level``: mislabel suspect level
29
+ - ``suspect_rank``: mislabel suspect ranking
30
+ - ``suggested_class_name``: suggested semantic label
31
+ - ``suggested_class_conf``: suggested semantic label confidence
32
+
33
+ Errors and warnings DataFrame (filename: `invalid_data.csv`) columns:
34
+
35
+ - ``status``: status message (one of ``NO_LABELS`` / ``MISSING_IMAGE`` / ``INVALID_IMAGE``)
24
36
 
25
37
  Optimizing an object detection (OD) dataset
26
38
  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
27
39
 
28
40
  Currently ``hirundo`` requires a CSV file with the following columns (all columns are required):
29
- - ``image_path``: The location of the image within the dataset ``root``
30
- - ``bbox_id``: The index of the bounding box within the dataset. Used to indicate label suspects
31
- - ``label``: The label of the image, i.e. which the class that was annotated for this image
32
- - ``x1``, ``y1``, ``x2``, ``y2``: The bounding box coordinates of the object within the image
33
41
 
34
- And outputs a CSV with the same columns and:
35
- - ``suspect_level``: object mislabel suspect level
36
- - ``suggested_label``: suggested object label
37
- - ``suggested_label_conf``: suggested object label confidence
42
+ - ``image_path``: The location of the image within the dataset ``data_root_url``
43
+ - ``object_id``: The ID of the bounding box within the dataset. Used to indicate object suspects
44
+ - ``class_name``: Object semantic label, i.e. the class name of the object that was annotated
45
+ - ``xmin``: leftmost horizontal pixel coordinate of the object's bounding box
46
+ - ``ymin``: uppermost vertical pixel coordinate of the object's bounding box
47
+ - ``xmax``: rightmost horizontal pixel coordinate of the object's bounding box
48
+ - ``ymax``: lowermost vertical pixel coordinate of the object's bounding box
38
49
 
39
- Note: This Python package must be used alongside a Hirundo server, either the SaaS platform, a custom VPC deployment or an on-premises installation.
40
50
 
51
+ And outputs two Pandas DataFrames with the dataset columns as well as:
52
+
53
+ Suspect DataFrame (filename: `mislabel_suspects.csv`) columns:
54
+
55
+ - ``suspect_score``: object mislabel suspect score
56
+ - ``suspect_level``: object mislabel suspect level
57
+ - ``suspect_rank``: object mislabel suspect ranking
58
+ - ``suggested_class_name``: suggested object semantic label
59
+ - ``suggested_class_conf``: suggested object semantic label confidence
60
+
61
+ Errors and warnings DataFrame (filename: `invalid_data.csv`) columns:
62
+ - ``status``: status message (one of ``NO_LABELS`` / ``MISSING_IMAGE`` / ``INVALID_IMAGE`` / ``INVALID_BBOX`` / ``INVALID_BBOX_SIZE``)
41
63
 
42
64
  ## Installation
43
65
 
@@ -46,11 +68,12 @@ You can install the codebase with a simple `pip install hirundo` to install the
46
68
  ## Usage
47
69
 
48
70
  Classification example:
71
+
49
72
  ```python
50
73
  from hirundo import (
51
74
  HirundoCSV,
52
75
  LabelingType,
53
- OptimizationDataset,
76
+ QADataset,
54
77
  StorageGCP,
55
78
  StorageConfig,
56
79
  StorageTypes,
@@ -61,7 +84,7 @@ gcp_bucket = StorageGCP(
61
84
  project="Hirundo-global",
62
85
  credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
63
86
  )
64
- test_dataset = OptimizationDataset(
87
+ test_dataset = QADataset(
65
88
  name="TEST-GCP cifar 100 classification dataset",
66
89
  labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
67
90
  storage_config=StorageConfig(
@@ -76,12 +99,11 @@ test_dataset = OptimizationDataset(
76
99
  classes=cifar100_classes,
77
100
  )
78
101
 
79
- test_dataset.run_optimization()
102
+ test_dataset.run_qa()
80
103
  results = test_dataset.check_run()
81
104
  print(results)
82
105
  ```
83
106
 
84
-
85
107
  Object detection example:
86
108
 
87
109
  ```python
@@ -89,7 +111,7 @@ from hirundo import (
89
111
  GitRepo,
90
112
  HirundoCSV,
91
113
  LabelingType,
92
- OptimizationDataset,
114
+ QADataset,
93
115
  StorageGit,
94
116
  StorageConfig,
95
117
  StorageTypes,
@@ -102,7 +124,7 @@ git_storage = StorageGit(
102
124
  ),
103
125
  branch="main",
104
126
  )
105
- test_dataset = OptimizationDataset(
127
+ test_dataset = QADataset(
106
128
  name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
107
129
  labeling_type=LabelingType.OBJECT_DETECTION,
108
130
  storage_config=StorageConfig(
@@ -116,30 +138,15 @@ test_dataset = OptimizationDataset(
116
138
  path="/BDD100K Val from Hirundo.zip/bdd100k/bdd100k.csv"
117
139
  ),
118
140
  ),
119
- classes=[
120
- "traffic light",
121
- "traffic sign",
122
- "car",
123
- "pedestrian",
124
- "bus",
125
- "truck",
126
- "rider",
127
- "bicycle",
128
- "motorcycle",
129
- "train",
130
- "other vehicle",
131
- "other person",
132
- "trailer",
133
- ],
134
141
  )
135
142
 
136
- test_dataset.run_optimization()
143
+ test_dataset.run_qa()
137
144
  results = test_dataset.check_run()
138
145
  print(results)
139
146
  ```
140
147
 
141
- Note: Currently we only support the main CPython release 3.9, 3.10 and 3.11. PyPy support may be introduced in the future.
148
+ Note: Currently we only support the main CPython release 3.9, 3.10, 3.11, 3.12 & 3.13. PyPy support may be introduced in the future.
142
149
 
143
150
  ## Further documentation
144
151
 
145
- To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the Google Colab examples.
152
+ To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the [Google Colab examples](https://github.com/Hirundo-io/hirundo-python-sdk/tree/main/notebooks).
@@ -1,38 +1,54 @@
1
1
  from .dataset_enum import (
2
2
  DatasetMetadataType,
3
3
  LabelingType,
4
+ StorageTypes,
4
5
  )
5
- from .dataset_optimization import (
6
- COCO,
7
- YOLO,
8
- HirundoCSV,
6
+ from .dataset_qa import (
7
+ ClassificationRunArgs,
8
+ Domain,
9
9
  HirundoError,
10
- OptimizationDataset,
10
+ ObjectDetectionRunArgs,
11
+ QADataset,
11
12
  RunArgs,
12
- VisionRunArgs,
13
13
  )
14
- from .dataset_optimization_results import DatasetOptimizationResults
14
+ from .dataset_qa_results import DatasetQAResults
15
15
  from .git import GitPlainAuth, GitRepo, GitSSHAuth
16
+ from .labeling import (
17
+ COCO,
18
+ YOLO,
19
+ HirundoCSV,
20
+ KeylabsAuth,
21
+ KeylabsObjDetImages,
22
+ KeylabsObjDetVideo,
23
+ KeylabsObjSegImages,
24
+ KeylabsObjSegVideo,
25
+ )
16
26
  from .storage import (
17
27
  StorageConfig,
18
28
  StorageGCP,
19
29
  # StorageAzure, TODO: Azure storage is coming soon
20
30
  StorageGit,
21
31
  StorageS3,
22
- StorageTypes,
23
32
  )
24
33
  from .unzip import load_df, load_from_zip
25
34
 
26
35
  __all__ = [
27
36
  "COCO",
28
37
  "YOLO",
29
- "HirundoCSV",
30
38
  "HirundoError",
31
- "OptimizationDataset",
39
+ "HirundoCSV",
40
+ "KeylabsAuth",
41
+ "KeylabsObjDetImages",
42
+ "KeylabsObjDetVideo",
43
+ "KeylabsObjSegImages",
44
+ "KeylabsObjSegVideo",
45
+ "QADataset",
46
+ "Domain",
32
47
  "RunArgs",
33
- "VisionRunArgs",
34
- "LabelingType",
48
+ "ClassificationRunArgs",
49
+ "ObjectDetectionRunArgs",
35
50
  "DatasetMetadataType",
51
+ "LabelingType",
36
52
  "GitPlainAuth",
37
53
  "GitRepo",
38
54
  "GitSSHAuth",
@@ -42,9 +58,9 @@ __all__ = [
42
58
  # "StorageAzure", TODO: Azure storage is coming soon
43
59
  "StorageGit",
44
60
  "StorageConfig",
45
- "DatasetOptimizationResults",
61
+ "DatasetQAResults",
46
62
  "load_df",
47
63
  "load_from_zip",
48
64
  ]
49
65
 
50
- __version__ = "0.1.16"
66
+ __version__ = "0.1.21"
@@ -0,0 +1,164 @@
1
+ import re
2
+ import typing
3
+ from typing import TYPE_CHECKING
4
+
5
+ from hirundo._urls import (
6
+ LENGTH_CONSTRAINTS,
7
+ STORAGE_PATTERNS,
8
+ )
9
+ from hirundo.dataset_enum import DatasetMetadataType, LabelingType, StorageTypes
10
+ from hirundo.labeling import COCO, YOLO, HirundoCSV, Keylabs
11
+
12
+ if TYPE_CHECKING:
13
+ from hirundo._urls import HirundoUrl
14
+ from hirundo.dataset_qa import LabelingInfo
15
+ from hirundo.storage import (
16
+ ResponseStorageConfig,
17
+ StorageConfig,
18
+ StorageGCP,
19
+ StorageGCPOut,
20
+ StorageS3,
21
+ StorageS3Out,
22
+ )
23
+
24
+ LABELING_TYPES_TO_DATASET_METADATA_TYPES = {
25
+ LabelingType.SINGLE_LABEL_CLASSIFICATION: [
26
+ DatasetMetadataType.HIRUNDO_CSV,
27
+ ],
28
+ LabelingType.OBJECT_DETECTION: [
29
+ DatasetMetadataType.HIRUNDO_CSV,
30
+ DatasetMetadataType.COCO,
31
+ DatasetMetadataType.YOLO,
32
+ DatasetMetadataType.KeylabsObjDetImages,
33
+ DatasetMetadataType.KeylabsObjDetVideo,
34
+ ],
35
+ LabelingType.OBJECT_SEGMENTATION: [
36
+ DatasetMetadataType.HIRUNDO_CSV,
37
+ DatasetMetadataType.KeylabsObjSegImages,
38
+ DatasetMetadataType.KeylabsObjSegVideo,
39
+ ],
40
+ LabelingType.SEMANTIC_SEGMENTATION: [
41
+ DatasetMetadataType.HIRUNDO_CSV,
42
+ ],
43
+ LabelingType.PANOPTIC_SEGMENTATION: [
44
+ DatasetMetadataType.HIRUNDO_CSV,
45
+ ],
46
+ LabelingType.SPEECH_TO_TEXT: [
47
+ DatasetMetadataType.HIRUNDO_CSV,
48
+ ],
49
+ }
50
+
51
+
52
+ def validate_s3_url(str_url: str, s3_config: "StorageS3 | StorageS3Out"):
53
+ if (
54
+ len(str_url) < LENGTH_CONSTRAINTS[StorageTypes.S3]["min_length"]
55
+ or len(str_url) > LENGTH_CONSTRAINTS[StorageTypes.S3]["max_length"]
56
+ ):
57
+ raise ValueError("S3 URL must be between 8 and 1023 characters")
58
+ elif not re.match(STORAGE_PATTERNS[StorageTypes.S3], str_url):
59
+ raise ValueError(
60
+ f"Invalid S3 URL. Pattern must match: {STORAGE_PATTERNS[StorageTypes.S3]}"
61
+ )
62
+ elif not str_url.startswith(f"{s3_config.bucket_url}/"):
63
+ raise ValueError(f"S3 URL must start with {s3_config.bucket_url}/")
64
+
65
+
66
+ def validate_gcp_url(str_url: str, gcp_config: "StorageGCP | StorageGCPOut"):
67
+ matches = re.match(STORAGE_PATTERNS[StorageTypes.GCP], str_url)
68
+ if (
69
+ len(str_url) < LENGTH_CONSTRAINTS[StorageTypes.GCP]["min_length"]
70
+ or len(str_url) > LENGTH_CONSTRAINTS[StorageTypes.GCP]["max_length"]
71
+ ):
72
+ raise ValueError(
73
+ f"GCP URL must be between {LENGTH_CONSTRAINTS[StorageTypes.GCP]['min_length']}"
74
+ + f" and {LENGTH_CONSTRAINTS[StorageTypes.GCP]['max_length']} characters"
75
+ )
76
+ elif not matches:
77
+ raise ValueError(
78
+ f"Invalid GCP URL. Pattern must match: {STORAGE_PATTERNS[StorageTypes.GCP]}"
79
+ )
80
+ elif (
81
+ matches
82
+ and len(matches.group(1))
83
+ > LENGTH_CONSTRAINTS[StorageTypes.GCP]["bucket_max_length"]
84
+ ):
85
+ raise ValueError(
86
+ f"GCP bucket name must be between {LENGTH_CONSTRAINTS[StorageTypes.GCP]['bucket_min_length']} "
87
+ + f"and {LENGTH_CONSTRAINTS[StorageTypes.GCP]['bucket_max_length']} characters"
88
+ )
89
+ elif not str_url.startswith(f"gs://{gcp_config.bucket_name}/"):
90
+ raise ValueError(f"GCP URL must start with gs://{gcp_config.bucket_name}")
91
+
92
+
93
+ def validate_url(
94
+ url: "HirundoUrl",
95
+ storage_config: "StorageConfig | ResponseStorageConfig",
96
+ ) -> "HirundoUrl":
97
+ s3_config = storage_config.s3
98
+ gcp_config = storage_config.gcp
99
+ git_config = storage_config.git
100
+ str_url = str(url)
101
+
102
+ if s3_config is not None:
103
+ validate_s3_url(str_url, s3_config)
104
+ elif gcp_config is not None:
105
+ validate_gcp_url(str_url, gcp_config)
106
+ elif (
107
+ git_config is not None
108
+ and not str_url.startswith("https://")
109
+ and not str_url.startswith("ssh://")
110
+ ):
111
+ raise ValueError("Git URL must start with https:// or ssh://")
112
+ elif storage_config.type == StorageTypes.LOCAL and not str_url.startswith(
113
+ "file:///datasets/"
114
+ ):
115
+ raise ValueError("Local URL must start with file:///datasets/")
116
+ return url
117
+
118
+
119
+ def validate_labeling_type(
120
+ labeling_type: "LabelingType", labeling_info: "LabelingInfo"
121
+ ) -> None:
122
+ """
123
+ Validate that the labeling type is compatible with the labeling info
124
+
125
+ Args:
126
+ labeling_type: The type of labeling that will be performed
127
+ labeling_info: The labeling info to validate
128
+ """
129
+ dataset_metadata_types = LABELING_TYPES_TO_DATASET_METADATA_TYPES[labeling_type]
130
+ if labeling_info.type not in dataset_metadata_types:
131
+ raise ValueError(
132
+ f"Cannot use {labeling_info.type.name} labeling info with {labeling_type.name} datasets"
133
+ )
134
+
135
+
136
+ def validate_labeling_info(
137
+ labeling_type: "LabelingType",
138
+ labeling_info: "typing.Union[LabelingInfo, list[LabelingInfo]]",
139
+ storage_config: "typing.Union[StorageConfig, ResponseStorageConfig]",
140
+ ) -> None:
141
+ """
142
+ Validate the labeling info for a dataset
143
+
144
+ Args:
145
+ labeling_type: The type of labeling that will be performed
146
+ labeling_info: The labeling info to validate
147
+ storage_config: The storage configuration for the dataset.
148
+ StorageConfig is used to validate the URLs in the labeling info
149
+ """
150
+ if isinstance(labeling_info, list):
151
+ for labeling in labeling_info:
152
+ validate_labeling_info(labeling_type, labeling, storage_config)
153
+ return
154
+ elif isinstance(labeling_info, HirundoCSV):
155
+ validate_url(labeling_info.csv_url, storage_config)
156
+ elif isinstance(labeling_info, COCO):
157
+ validate_url(labeling_info.json_url, storage_config)
158
+ elif isinstance(labeling_info, YOLO):
159
+ validate_url(labeling_info.labels_dir_url, storage_config)
160
+ if labeling_info.data_yaml_url is not None:
161
+ validate_url(labeling_info.data_yaml_url, storage_config)
162
+ elif isinstance(labeling_info, Keylabs):
163
+ validate_url(labeling_info.labels_dir_url, storage_config)
164
+ validate_labeling_type(labeling_type, labeling_info)
@@ -1,6 +1,6 @@
1
1
  from hirundo._env import API_KEY, check_api_key
2
2
 
3
- HIRUNDO_API_VERSION = "0.2"
3
+ HIRUNDO_API_VERSION = "0.3"
4
4
 
5
5
  _json_headers = {
6
6
  "Content-Type": "application/json",