hirundo 0.1.18__py3-none-any.whl → 0.1.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hirundo/__init__.py CHANGED
@@ -3,13 +3,15 @@ from .dataset_enum import (
3
3
  LabelingType,
4
4
  StorageTypes,
5
5
  )
6
- from .dataset_optimization import (
6
+ from .dataset_qa import (
7
+ ClassificationRunArgs,
8
+ Domain,
7
9
  HirundoError,
8
- OptimizationDataset,
10
+ ObjectDetectionRunArgs,
11
+ QADataset,
9
12
  RunArgs,
10
- VisionRunArgs,
11
13
  )
12
- from .dataset_optimization_results import DatasetOptimizationResults
14
+ from .dataset_qa_results import DatasetQAResults
13
15
  from .git import GitPlainAuth, GitRepo, GitSSHAuth
14
16
  from .labeling import (
15
17
  COCO,
@@ -40,9 +42,11 @@ __all__ = [
40
42
  "KeylabsObjDetVideo",
41
43
  "KeylabsObjSegImages",
42
44
  "KeylabsObjSegVideo",
43
- "OptimizationDataset",
45
+ "QADataset",
46
+ "Domain",
44
47
  "RunArgs",
45
- "VisionRunArgs",
48
+ "ClassificationRunArgs",
49
+ "ObjectDetectionRunArgs",
46
50
  "DatasetMetadataType",
47
51
  "LabelingType",
48
52
  "GitPlainAuth",
@@ -54,9 +58,9 @@ __all__ = [
54
58
  # "StorageAzure", TODO: Azure storage is coming soon
55
59
  "StorageGit",
56
60
  "StorageConfig",
57
- "DatasetOptimizationResults",
61
+ "DatasetQAResults",
58
62
  "load_df",
59
63
  "load_from_zip",
60
64
  ]
61
65
 
62
- __version__ = "0.1.18"
66
+ __version__ = "0.1.21"
hirundo/_constraints.py CHANGED
@@ -11,7 +11,7 @@ from hirundo.labeling import COCO, YOLO, HirundoCSV, Keylabs
11
11
 
12
12
  if TYPE_CHECKING:
13
13
  from hirundo._urls import HirundoUrl
14
- from hirundo.dataset_optimization import LabelingInfo
14
+ from hirundo.dataset_qa import LabelingInfo
15
15
  from hirundo.storage import (
16
16
  ResponseStorageConfig,
17
17
  StorageConfig,
hirundo/_headers.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from hirundo._env import API_KEY, check_api_key
2
2
 
3
- HIRUNDO_API_VERSION = "0.2"
3
+ HIRUNDO_API_VERSION = "0.3"
4
4
 
5
5
  _json_headers = {
6
6
  "Content-Type": "application/json",
hirundo/_http.py CHANGED
@@ -1,4 +1,7 @@
1
+ import requests as _requests
1
2
  from requests import Response
3
+ from requests.adapters import HTTPAdapter
4
+ from urllib3.util.retry import Retry
2
5
 
3
6
  import hirundo.logger
4
7
 
@@ -7,6 +10,56 @@ logger = hirundo.logger.get_logger(__name__)
7
10
  MINIMUM_CLIENT_SERVER_ERROR_CODE = 400
8
11
 
9
12
 
13
+ def _build_retrying_session() -> _requests.Session:
14
+ # No more than 10 tries total (including the initial attempt)
15
+ # urllib3 Retry.total counts retries, not total attempts, so use 9 retries
16
+ retries = Retry(
17
+ total=9,
18
+ backoff_factor=1.0,
19
+ status_forcelist=(429,),
20
+ allowed_methods=("HEAD", "GET", "PUT", "POST", "PATCH", "DELETE", "OPTIONS"),
21
+ respect_retry_after_header=True,
22
+ raise_on_status=False,
23
+ )
24
+ adapter = HTTPAdapter(max_retries=retries)
25
+ session = _requests.Session()
26
+ session.mount("http://", adapter)
27
+ session.mount("https://", adapter)
28
+ return session
29
+
30
+
31
+ _SESSION = _build_retrying_session()
32
+
33
+
34
+ class _RequestsShim:
35
+ """Shim exposing a subset of the requests API but backed by a retrying Session."""
36
+
37
+ HTTPError = _requests.HTTPError
38
+ Response = _requests.Response
39
+
40
+ def request(self, method: str, url: str, **kwargs) -> Response:
41
+ return _SESSION.request(method=method, url=url, **kwargs)
42
+
43
+ def get(self, url: str, **kwargs) -> Response:
44
+ return _SESSION.get(url, **kwargs)
45
+
46
+ def post(self, url: str, **kwargs) -> Response:
47
+ return _SESSION.post(url, **kwargs)
48
+
49
+ def delete(self, url: str, **kwargs) -> Response:
50
+ return _SESSION.delete(url, **kwargs)
51
+
52
+ def patch(self, url: str, **kwargs) -> Response:
53
+ return _SESSION.patch(url, **kwargs)
54
+
55
+ def put(self, url: str, **kwargs) -> Response:
56
+ return _SESSION.put(url, **kwargs)
57
+
58
+
59
+ # Public shim to be imported by modules instead of the raw requests package
60
+ requests = _RequestsShim()
61
+
62
+
10
63
  def raise_for_status_with_reason(response: Response):
11
64
  try:
12
65
  if response.status_code >= MINIMUM_CLIENT_SERVER_ERROR_CODE:
@@ -5,11 +5,11 @@ import uuid
5
5
  from collections.abc import AsyncGenerator, Generator
6
6
 
7
7
  import httpx
8
- import requests
9
8
  import urllib3
10
9
  from httpx_sse import ServerSentEvent, SSEError, aconnect_sse, connect_sse
11
10
  from stamina import retry
12
11
 
12
+ from hirundo._http import requests
13
13
  from hirundo._timeouts import READ_TIMEOUT
14
14
  from hirundo.logger import get_logger
15
15
 
hirundo/cli.py CHANGED
@@ -88,7 +88,7 @@ def setup_api_key(
88
88
  ],
89
89
  ):
90
90
  """
91
- Setup the API key for the Hirundo client library.
91
+ Setup the API key for the Hirundo Python SDK.
92
92
  Values are saved to a .env file in the current directory for use by the library in requests.
93
93
  """
94
94
  saved_to = upsert_env("API_KEY", api_key)
@@ -115,7 +115,7 @@ def change_api_remote(
115
115
  ],
116
116
  ):
117
117
  """
118
- Change the API server address for the Hirundo client library.
118
+ Change the API server address for the Hirundo Python SDK.
119
119
  This is the same address where you access the Hirundo web interface.
120
120
  """
121
121
  api_host = fix_api_host(api_host)
@@ -151,7 +151,7 @@ def setup(
151
151
  ],
152
152
  ):
153
153
  """
154
- Setup the Hirundo client library.
154
+ Setup the Hirundo Python SDK.
155
155
  """
156
156
  api_host = fix_api_host(api_host)
157
157
  api_host_saved_to = upsert_env("API_HOST", api_host)
@@ -198,9 +198,9 @@ def check_run(
198
198
  """
199
199
  Check the status of a run.
200
200
  """
201
- from hirundo.dataset_optimization import OptimizationDataset
201
+ from hirundo.dataset_qa import QADataset
202
202
 
203
- results = OptimizationDataset.check_run_by_id(run_id)
203
+ results = QADataset.check_run_by_id(run_id)
204
204
  print(f"Run results saved to {results.cached_zip_path}")
205
205
 
206
206
 
@@ -209,9 +209,9 @@ def list_runs():
209
209
  """
210
210
  List all runs available.
211
211
  """
212
- from hirundo.dataset_optimization import OptimizationDataset
212
+ from hirundo.dataset_qa import QADataset
213
213
 
214
- runs = OptimizationDataset.list_runs()
214
+ runs = QADataset.list_runs()
215
215
 
216
216
  console = Console()
217
217
  table = Table(
@@ -6,7 +6,6 @@ from enum import Enum
6
6
  from typing import overload
7
7
 
8
8
  import httpx
9
- import requests
10
9
  from pydantic import BaseModel, Field, model_validator
11
10
  from tqdm import tqdm
12
11
  from tqdm.contrib.logging import logging_redirect_tqdm
@@ -14,12 +13,12 @@ from tqdm.contrib.logging import logging_redirect_tqdm
14
13
  from hirundo._constraints import validate_labeling_info, validate_url
15
14
  from hirundo._env import API_HOST
16
15
  from hirundo._headers import get_headers
17
- from hirundo._http import raise_for_status_with_reason
16
+ from hirundo._http import raise_for_status_with_reason, requests
18
17
  from hirundo._iter_sse_retrying import aiter_sse_retrying, iter_sse_retrying
19
18
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
20
19
  from hirundo._urls import HirundoUrl
21
20
  from hirundo.dataset_enum import DatasetMetadataType, LabelingType
22
- from hirundo.dataset_optimization_results import DatasetOptimizationResults
21
+ from hirundo.dataset_qa_results import DatasetQAResults
23
22
  from hirundo.labeling import YOLO, LabelingInfo
24
23
  from hirundo.logger import get_logger
25
24
  from hirundo.storage import ResponseStorageConfig, StorageConfig
@@ -30,7 +29,7 @@ logger = get_logger(__name__)
30
29
 
31
30
  class HirundoError(Exception):
32
31
  """
33
- Custom exception used to indicate errors in `hirundo` dataset optimization runs
32
+ Custom exception used to indicate errors in `hirundo` dataset QA runs
34
33
  """
35
34
 
36
35
  pass
@@ -51,14 +50,14 @@ class RunStatus(Enum):
51
50
 
52
51
 
53
52
  STATUS_TO_TEXT_MAP = {
54
- RunStatus.STARTED.value: "Optimization run in progress. Downloading dataset",
55
- RunStatus.PENDING.value: "Optimization run queued and not yet started",
56
- RunStatus.SUCCESS.value: "Optimization run completed successfully",
57
- RunStatus.FAILURE.value: "Optimization run failed",
53
+ RunStatus.STARTED.value: "Dataset QA run in progress. Downloading dataset",
54
+ RunStatus.PENDING.value: "Dataset QA run queued and not yet started",
55
+ RunStatus.SUCCESS.value: "Dataset QA run completed successfully",
56
+ RunStatus.FAILURE.value: "Dataset QA run failed",
58
57
  RunStatus.AWAITING_MANUAL_APPROVAL.value: "Awaiting manual approval",
59
- RunStatus.RETRY.value: "Optimization run failed. Retrying",
60
- RunStatus.REVOKED.value: "Optimization run was cancelled",
61
- RunStatus.REJECTED.value: "Optimization run was rejected",
58
+ RunStatus.RETRY.value: "Dataset QA run failed. Retrying",
59
+ RunStatus.REVOKED.value: "Dataset QA run was cancelled",
60
+ RunStatus.REJECTED.value: "Dataset QA run was rejected",
62
61
  }
63
62
  STATUS_TO_PROGRESS_MAP = {
64
63
  RunStatus.STARTED.value: 0.0,
@@ -72,33 +71,51 @@ STATUS_TO_PROGRESS_MAP = {
72
71
  }
73
72
 
74
73
 
75
- class VisionRunArgs(BaseModel):
76
- upsample: bool = False
74
+ class ClassificationRunArgs(BaseModel):
75
+ image_size: typing.Optional[tuple[int, int]] = (224, 224)
76
+ """
77
+ Size (width, height) to which to resize classification images.
78
+ It is recommended to keep this value at (224, 224) unless your classes are differentiated by very small differences.
79
+ """
80
+ upsample: typing.Optional[bool] = False
77
81
  """
78
82
  Whether to upsample the dataset to attempt to balance the classes.
79
83
  """
80
- min_abs_bbox_size: int = 0
84
+
85
+
86
+ class ObjectDetectionRunArgs(ClassificationRunArgs):
87
+ min_abs_bbox_size: typing.Optional[int] = None
81
88
  """
82
- Minimum valid size (in pixels) of a bounding box to keep it in the dataset for optimization.
89
+ Minimum valid size (in pixels) of a bounding box to keep it in the dataset for QA.
83
90
  """
84
- min_abs_bbox_area: int = 0
91
+ min_abs_bbox_area: typing.Optional[int] = None
85
92
  """
86
- Minimum valid absolute area (in pixels²) of a bounding box to keep it in the dataset for optimization.
93
+ Minimum valid absolute area (in pixels²) of a bounding box to keep it in the dataset for QA.
87
94
  """
88
- min_rel_bbox_size: float = 0.0
95
+ min_rel_bbox_size: typing.Optional[float] = None
89
96
  """
90
97
  Minimum valid size (as a fraction of both image height and width) for a bounding box
91
- to keep it in the dataset for optimization, relative to the corresponding dimension size,
98
+ to keep it in the dataset for QA, relative to the corresponding dimension size,
92
99
  i.e. if the bounding box is 10% of the image width and 5% of the image height, it will be kept if this value is 0.05, but not if the
93
100
  value is 0.06 (since both width and height are checked).
94
101
  """
95
- min_rel_bbox_area: float = 0.0
102
+ min_rel_bbox_area: typing.Optional[float] = None
103
+ """
104
+ Minimum valid relative area (as a fraction of the image area) of a bounding box to keep it in the dataset for QA.
105
+ """
106
+ crop_ratio: typing.Optional[float] = None
96
107
  """
97
- Minimum valid relative area (as a fraction of the image area) of a bounding box to keep it in the dataset for optimization.
108
+ Ratio of the bounding box to crop.
109
+ Change this value at your own risk. It is recommended to keep it at 1.0 unless you know what you are doing.
110
+ """
111
+ add_mask_channel: typing.Optional[bool] = None
112
+ """
113
+ Whether to add a mask channel to the image.
114
+ Change at your own risk. It is recommended to keep it at False unless you know what you are doing.
98
115
  """
99
116
 
100
117
 
101
- RunArgs = typing.Union[VisionRunArgs]
118
+ RunArgs = typing.Union[ClassificationRunArgs, ObjectDetectionRunArgs]
102
119
 
103
120
 
104
121
  class AugmentationName(str, Enum):
@@ -111,13 +128,31 @@ class AugmentationName(str, Enum):
111
128
  GAUSSIAN_BLUR = "GaussianBlur"
112
129
 
113
130
 
114
- class Modality(str, Enum):
115
- IMAGE = "Image"
116
- RADAR = "Radar"
117
- EKG = "EKG"
131
+ class Domain(str, Enum):
132
+ RADAR = "RADAR"
133
+ VISION = "VISION"
134
+ SPEECH = "SPEECH"
135
+ TABULAR = "TABULAR"
136
+
137
+
138
+ DOMAIN_TO_SUPPORTED_LABELING_TYPES = {
139
+ Domain.RADAR: [
140
+ LabelingType.SINGLE_LABEL_CLASSIFICATION,
141
+ LabelingType.OBJECT_DETECTION,
142
+ ],
143
+ Domain.VISION: [
144
+ LabelingType.SINGLE_LABEL_CLASSIFICATION,
145
+ LabelingType.OBJECT_DETECTION,
146
+ LabelingType.OBJECT_SEGMENTATION,
147
+ LabelingType.SEMANTIC_SEGMENTATION,
148
+ LabelingType.PANOPTIC_SEGMENTATION,
149
+ ],
150
+ Domain.SPEECH: [LabelingType.SPEECH_TO_TEXT],
151
+ Domain.TABULAR: [LabelingType.SINGLE_LABEL_CLASSIFICATION],
152
+ }
118
153
 
119
154
 
120
- class OptimizationDataset(BaseModel):
155
+ class QADataset(BaseModel):
121
156
  id: typing.Optional[int] = Field(default=None)
122
157
  """
123
158
  The ID of the dataset created on the server.
@@ -171,21 +206,29 @@ class OptimizationDataset(BaseModel):
171
206
  For audio datasets, this field is ignored.
172
207
  If no value is provided, all augmentations are applied to vision datasets.
173
208
  """
174
- modality: Modality = Modality.IMAGE
209
+ domain: Domain = Domain.VISION
175
210
  """
176
- Used to define the modality of the dataset.
211
+ Used to define the domain of the dataset.
177
212
  Defaults to Image.
178
213
  """
179
214
 
180
215
  run_id: typing.Optional[str] = Field(default=None, init=False)
181
216
  """
182
- The ID of the Dataset Optimization run created on the server.
217
+ The ID of the Dataset QA run created on the server.
183
218
  """
184
219
 
185
220
  status: typing.Optional[RunStatus] = None
186
221
 
187
222
  @model_validator(mode="after")
188
223
  def validate_dataset(self):
224
+ if self.domain not in DOMAIN_TO_SUPPORTED_LABELING_TYPES:
225
+ raise ValueError(
226
+ f"Domain {self.domain} is not supported. Supported domains are: {list(DOMAIN_TO_SUPPORTED_LABELING_TYPES.keys())}"
227
+ )
228
+ if self.labeling_type not in DOMAIN_TO_SUPPORTED_LABELING_TYPES[self.domain]:
229
+ raise ValueError(
230
+ f"Labeling type {self.labeling_type} is not supported for domain {self.domain}. Supported labeling types are: {DOMAIN_TO_SUPPORTED_LABELING_TYPES[self.domain]}"
231
+ )
189
232
  if self.storage_config is None and self.storage_config_id is None:
190
233
  raise ValueError(
191
234
  "No dataset storage has been provided. Provide one via `storage_config` or `storage_config_id`"
@@ -229,52 +272,52 @@ class OptimizationDataset(BaseModel):
229
272
  return self
230
273
 
231
274
  @staticmethod
232
- def get_by_id(dataset_id: int) -> "OptimizationDataset":
275
+ def get_by_id(dataset_id: int) -> "QADataset":
233
276
  """
234
- Get a `OptimizationDataset` instance from the server by its ID
277
+ Get a `QADataset` instance from the server by its ID
235
278
 
236
279
  Args:
237
- dataset_id: The ID of the `OptimizationDataset` instance to get
280
+ dataset_id: The ID of the `QADataset` instance to get
238
281
  """
239
282
  response = requests.get(
240
- f"{API_HOST}/dataset-optimization/dataset/{dataset_id}",
283
+ f"{API_HOST}/dataset-qa/dataset/{dataset_id}",
241
284
  headers=get_headers(),
242
285
  timeout=READ_TIMEOUT,
243
286
  )
244
287
  raise_for_status_with_reason(response)
245
288
  dataset = response.json()
246
- return OptimizationDataset(**dataset)
289
+ return QADataset(**dataset)
247
290
 
248
291
  @staticmethod
249
- def get_by_name(name: str) -> "OptimizationDataset":
292
+ def get_by_name(name: str) -> "QADataset":
250
293
  """
251
- Get a `OptimizationDataset` instance from the server by its name
294
+ Get a `QADataset` instance from the server by its name
252
295
 
253
296
  Args:
254
- name: The name of the `OptimizationDataset` instance to get
297
+ name: The name of the `QADataset` instance to get
255
298
  """
256
299
  response = requests.get(
257
- f"{API_HOST}/dataset-optimization/dataset/by-name/{name}",
300
+ f"{API_HOST}/dataset-qa/dataset/by-name/{name}",
258
301
  headers=get_headers(),
259
302
  timeout=READ_TIMEOUT,
260
303
  )
261
304
  raise_for_status_with_reason(response)
262
305
  dataset = response.json()
263
- return OptimizationDataset(**dataset)
306
+ return QADataset(**dataset)
264
307
 
265
308
  @staticmethod
266
309
  def list_datasets(
267
310
  organization_id: typing.Optional[int] = None,
268
- ) -> list["DataOptimizationDatasetOut"]:
311
+ ) -> list["QADatasetOut"]:
269
312
  """
270
- Lists all the optimization datasets created by user's default organization
313
+ Lists all the datasets created by user's default organization
271
314
  or the `organization_id` passed
272
315
 
273
316
  Args:
274
317
  organization_id: The ID of the organization to list the datasets for.
275
318
  """
276
319
  response = requests.get(
277
- f"{API_HOST}/dataset-optimization/dataset/",
320
+ f"{API_HOST}/dataset-qa/dataset/",
278
321
  params={"dataset_organization_id": organization_id},
279
322
  headers=get_headers(),
280
323
  timeout=READ_TIMEOUT,
@@ -282,7 +325,7 @@ class OptimizationDataset(BaseModel):
282
325
  raise_for_status_with_reason(response)
283
326
  datasets = response.json()
284
327
  return [
285
- DataOptimizationDatasetOut(
328
+ QADatasetOut(
286
329
  **ds,
287
330
  )
288
331
  for ds in datasets
@@ -291,17 +334,17 @@ class OptimizationDataset(BaseModel):
291
334
  @staticmethod
292
335
  def list_runs(
293
336
  organization_id: typing.Optional[int] = None,
294
- ) -> list["DataOptimizationRunOut"]:
337
+ ) -> list["DataQARunOut"]:
295
338
  """
296
- Lists all the `OptimizationDataset` instances created by user's default organization
339
+ Lists all the `QADataset` instances created by user's default organization
297
340
  or the `organization_id` passed
298
- Note: The return type is `list[dict]` and not `list[OptimizationDataset]`
341
+ Note: The return type is `list[dict]` and not `list[QADataset]`
299
342
 
300
343
  Args:
301
344
  organization_id: The ID of the organization to list the datasets for.
302
345
  """
303
346
  response = requests.get(
304
- f"{API_HOST}/dataset-optimization/run/list",
347
+ f"{API_HOST}/dataset-qa/run/list",
305
348
  params={"dataset_organization_id": organization_id},
306
349
  headers=get_headers(),
307
350
  timeout=READ_TIMEOUT,
@@ -309,7 +352,7 @@ class OptimizationDataset(BaseModel):
309
352
  raise_for_status_with_reason(response)
310
353
  runs = response.json()
311
354
  return [
312
- DataOptimizationRunOut(
355
+ DataQARunOut(
313
356
  **run,
314
357
  )
315
358
  for run in runs
@@ -318,13 +361,13 @@ class OptimizationDataset(BaseModel):
318
361
  @staticmethod
319
362
  def delete_by_id(dataset_id: int) -> None:
320
363
  """
321
- Deletes a `OptimizationDataset` instance from the server by its ID
364
+ Deletes a `QADataset` instance from the server by its ID
322
365
 
323
366
  Args:
324
- dataset_id: The ID of the `OptimizationDataset` instance to delete
367
+ dataset_id: The ID of the `QADataset` instance to delete
325
368
  """
326
369
  response = requests.delete(
327
- f"{API_HOST}/dataset-optimization/dataset/{dataset_id}",
370
+ f"{API_HOST}/dataset-qa/dataset/{dataset_id}",
328
371
  headers=get_headers(),
329
372
  timeout=MODIFY_TIMEOUT,
330
373
  )
@@ -333,14 +376,14 @@ class OptimizationDataset(BaseModel):
333
376
 
334
377
  def delete(self, storage_config=True) -> None:
335
378
  """
336
- Deletes the active `OptimizationDataset` instance from the server.
337
- It can only be used on a `OptimizationDataset` instance that has been created.
379
+ Deletes the active `QADataset` instance from the server.
380
+ It can only be used on a `QADataset` instance that has been created.
338
381
 
339
382
  Args:
340
- storage_config: If True, the `OptimizationDataset`'s `StorageConfig` will also be deleted
383
+ storage_config: If True, the `QADataset`'s `StorageConfig` will also be deleted
341
384
 
342
385
  Note: If `storage_config` is not set to `False` then the `storage_config_id` must be set
343
- This can either be set manually or by creating the `StorageConfig` instance via the `OptimizationDataset`'s
386
+ This can either be set manually or by creating the `StorageConfig` instance via the `QADataset`'s
344
387
  `create` method
345
388
  """
346
389
  if storage_config:
@@ -357,7 +400,7 @@ class OptimizationDataset(BaseModel):
357
400
  replace_if_exists: bool = False,
358
401
  ) -> int:
359
402
  """
360
- Create a `OptimizationDataset` instance on the server.
403
+ Create a `QADataset` instance on the server.
361
404
  If the `storage_config_id` field is not set, the storage config will also be created and the field will be set.
362
405
 
363
406
  Args:
@@ -366,7 +409,7 @@ class OptimizationDataset(BaseModel):
366
409
  (this is determined by a dataset of the same name in the same organization).
367
410
 
368
411
  Returns:
369
- The ID of the created `OptimizationDataset` instance
412
+ The ID of the created `QADataset` instance
370
413
  """
371
414
  if self.storage_config is None and self.storage_config_id is None:
372
415
  raise ValueError("No dataset storage has been provided")
@@ -391,7 +434,7 @@ class OptimizationDataset(BaseModel):
391
434
  model_dict = self.model_dump(mode="json")
392
435
  # ⬆️ Get dict of model fields from Pydantic model instance
393
436
  dataset_response = requests.post(
394
- f"{API_HOST}/dataset-optimization/dataset/",
437
+ f"{API_HOST}/dataset-qa/dataset/",
395
438
  json={
396
439
  **{k: model_dict[k] for k in model_dict.keys() - {"storage_config"}},
397
440
  "organization_id": organization_id,
@@ -408,17 +451,17 @@ class OptimizationDataset(BaseModel):
408
451
  return self.id
409
452
 
410
453
  @staticmethod
411
- def launch_optimization_run(
454
+ def launch_qa_run(
412
455
  dataset_id: int,
413
456
  organization_id: typing.Optional[int] = None,
414
457
  run_args: typing.Optional[RunArgs] = None,
415
458
  ) -> str:
416
459
  """
417
- Run the dataset optimization process on the server using the dataset with the given ID
460
+ Run the dataset QA process on the server using the dataset with the given ID
418
461
  i.e. `dataset_id`.
419
462
 
420
463
  Args:
421
- dataset_id: The ID of the dataset to run optimization on.
464
+ dataset_id: The ID of the dataset to run QA on.
422
465
 
423
466
  Returns:
424
467
  ID of the run (`run_id`).
@@ -429,7 +472,7 @@ class OptimizationDataset(BaseModel):
429
472
  if run_args:
430
473
  run_info["run_args"] = run_args.model_dump(mode="json")
431
474
  run_response = requests.post(
432
- f"{API_HOST}/dataset-optimization/run/{dataset_id}",
475
+ f"{API_HOST}/dataset-qa/run/{dataset_id}",
433
476
  json=run_info if len(run_info) > 0 else None,
434
477
  headers=get_headers(),
435
478
  timeout=MODIFY_TIMEOUT,
@@ -440,12 +483,16 @@ class OptimizationDataset(BaseModel):
440
483
  def _validate_run_args(self, run_args: RunArgs) -> None:
441
484
  if self.labeling_type == LabelingType.SPEECH_TO_TEXT:
442
485
  raise Exception("Speech to text cannot have `run_args` set")
443
- if self.labeling_type != LabelingType.OBJECT_DETECTION and any(
444
- (
445
- run_args.min_abs_bbox_size != 0,
446
- run_args.min_abs_bbox_area != 0,
447
- run_args.min_rel_bbox_size != 0,
448
- run_args.min_rel_bbox_area != 0,
486
+ if (
487
+ self.labeling_type != LabelingType.OBJECT_DETECTION
488
+ and isinstance(run_args, ObjectDetectionRunArgs)
489
+ and any(
490
+ (
491
+ run_args.min_abs_bbox_size != 0,
492
+ run_args.min_abs_bbox_area != 0,
493
+ run_args.min_rel_bbox_size != 0,
494
+ run_args.min_rel_bbox_area != 0,
495
+ )
449
496
  )
450
497
  ):
451
498
  raise Exception(
@@ -454,7 +501,7 @@ class OptimizationDataset(BaseModel):
454
501
  + f"labeling type {self.labeling_type}"
455
502
  )
456
503
 
457
- def run_optimization(
504
+ def run_qa(
458
505
  self,
459
506
  organization_id: typing.Optional[int] = None,
460
507
  replace_dataset_if_exists: bool = False,
@@ -462,13 +509,13 @@ class OptimizationDataset(BaseModel):
462
509
  ) -> str:
463
510
  """
464
511
  If the dataset was not created on the server yet, it is created.
465
- Run the dataset optimization process on the server using the active `OptimizationDataset` instance
512
+ Run the dataset QA process on the server using the active `QADataset` instance
466
513
 
467
514
  Args:
468
- organization_id: The ID of the organization to run the optimization for.
515
+ organization_id: The ID of the organization to run the QA for.
469
516
  replace_dataset_if_exists: If True, the dataset will be replaced if it already exists
470
517
  (this is determined by a dataset of the same name in the same organization).
471
- run_args: The run arguments to use for the optimization run
518
+ run_args: The run arguments to use for the QA run
472
519
 
473
520
  Returns:
474
521
  An ID of the run (`run_id`) and stores that `run_id` on the instance
@@ -478,7 +525,7 @@ class OptimizationDataset(BaseModel):
478
525
  self.id = self.create(replace_if_exists=replace_dataset_if_exists)
479
526
  if run_args is not None:
480
527
  self._validate_run_args(run_args)
481
- run_id = self.launch_optimization_run(self.id, organization_id, run_args)
528
+ run_id = self.launch_qa_run(self.id, organization_id, run_args)
482
529
  self.run_id = run_id
483
530
  logger.info("Started the run with ID: %s", run_id)
484
531
  return run_id
@@ -516,7 +563,7 @@ class OptimizationDataset(BaseModel):
516
563
  for sse in iter_sse_retrying(
517
564
  client,
518
565
  "GET",
519
- f"{API_HOST}/dataset-optimization/run/{run_id}",
566
+ f"{API_HOST}/dataset-qa/run/{run_id}",
520
567
  headers=get_headers(),
521
568
  ):
522
569
  if sse.event == "ping":
@@ -542,50 +589,46 @@ class OptimizationDataset(BaseModel):
542
589
  raise HirundoError("Unknown error")
543
590
  yield data
544
591
  if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
545
- OptimizationDataset._check_run_by_id(run_id, retry + 1)
592
+ QADataset._check_run_by_id(run_id, retry + 1)
546
593
 
547
594
  @staticmethod
548
595
  def _handle_failure(iteration: dict):
549
596
  if iteration["result"]:
550
- raise HirundoError(
551
- f"Optimization run failed with error: {iteration['result']}"
552
- )
597
+ raise HirundoError(f"QA run failed with error: {iteration['result']}")
553
598
  else:
554
- raise HirundoError(
555
- "Optimization run failed with an unknown error in _handle_failure"
556
- )
599
+ raise HirundoError("QA run failed with an unknown error in _handle_failure")
557
600
 
558
601
  @staticmethod
559
602
  @overload
560
603
  def check_run_by_id(
561
604
  run_id: str, stop_on_manual_approval: typing.Literal[True]
562
- ) -> typing.Optional[DatasetOptimizationResults]: ...
605
+ ) -> typing.Optional[DatasetQAResults]: ...
563
606
 
564
607
  @staticmethod
565
608
  @overload
566
609
  def check_run_by_id(
567
610
  run_id: str, stop_on_manual_approval: typing.Literal[False] = False
568
- ) -> DatasetOptimizationResults: ...
611
+ ) -> DatasetQAResults: ...
569
612
 
570
613
  @staticmethod
571
614
  @overload
572
615
  def check_run_by_id(
573
616
  run_id: str, stop_on_manual_approval: bool
574
- ) -> typing.Optional[DatasetOptimizationResults]: ...
617
+ ) -> typing.Optional[DatasetQAResults]: ...
575
618
 
576
619
  @staticmethod
577
620
  def check_run_by_id(
578
621
  run_id: str, stop_on_manual_approval: bool = False
579
- ) -> typing.Optional[DatasetOptimizationResults]:
622
+ ) -> typing.Optional[DatasetQAResults]:
580
623
  """
581
624
  Check the status of a run given its ID
582
625
 
583
626
  Args:
584
- run_id: The `run_id` produced by a `run_optimization` call
627
+ run_id: The `run_id` produced by a `run_qa` call
585
628
  stop_on_manual_approval: If True, the function will return `None` if the run is awaiting manual approval
586
629
 
587
630
  Returns:
588
- A DatasetOptimizationResults object with the results of the optimization run
631
+ A DatasetQAResults object with the results of the QA run
589
632
 
590
633
  Raises:
591
634
  HirundoError: If the maximum number of retries is reached or if the run fails
@@ -593,7 +636,7 @@ class OptimizationDataset(BaseModel):
593
636
  logger.debug("Checking run with ID: %s", run_id)
594
637
  with logging_redirect_tqdm():
595
638
  t = tqdm(total=100.0)
596
- for iteration in OptimizationDataset._check_run_by_id(run_id):
639
+ for iteration in QADataset._check_run_by_id(run_id):
597
640
  if iteration["state"] in STATUS_TO_PROGRESS_MAP:
598
641
  t.set_description(STATUS_TO_TEXT_MAP[iteration["state"]])
599
642
  t.n = STATUS_TO_PROGRESS_MAP[iteration["state"]]
@@ -608,11 +651,11 @@ class OptimizationDataset(BaseModel):
608
651
  "State is failure, rejected, or revoked: %s",
609
652
  iteration["state"],
610
653
  )
611
- OptimizationDataset._handle_failure(iteration)
654
+ QADataset._handle_failure(iteration)
612
655
  elif iteration["state"] == RunStatus.SUCCESS.value:
613
656
  t.close()
614
657
  zip_temporary_url = iteration["result"]
615
- logger.debug("Optimization run completed. Downloading results")
658
+ logger.debug("QA run completed. Downloading results")
616
659
 
617
660
  return download_and_extract_zip(
618
661
  run_id,
@@ -644,7 +687,7 @@ class OptimizationDataset(BaseModel):
644
687
  stage = "Unknown progress state"
645
688
  current_progress_percentage = t.n # Keep the same progress
646
689
  desc = (
647
- "Optimization run completed. Uploading results"
690
+ "QA run completed. Uploading results"
648
691
  if current_progress_percentage == 100.0
649
692
  else stage
650
693
  )
@@ -652,28 +695,26 @@ class OptimizationDataset(BaseModel):
652
695
  t.n = current_progress_percentage
653
696
  logger.debug("Setting progress to %s", t.n)
654
697
  t.refresh()
655
- raise HirundoError(
656
- "Optimization run failed with an unknown error in check_run_by_id"
657
- )
698
+ raise HirundoError("QA run failed with an unknown error in check_run_by_id")
658
699
 
659
700
  @overload
660
701
  def check_run(
661
702
  self, stop_on_manual_approval: typing.Literal[True]
662
- ) -> typing.Optional[DatasetOptimizationResults]: ...
703
+ ) -> typing.Optional[DatasetQAResults]: ...
663
704
 
664
705
  @overload
665
706
  def check_run(
666
707
  self, stop_on_manual_approval: typing.Literal[False] = False
667
- ) -> DatasetOptimizationResults: ...
708
+ ) -> DatasetQAResults: ...
668
709
 
669
710
  def check_run(
670
711
  self, stop_on_manual_approval: bool = False
671
- ) -> typing.Optional[DatasetOptimizationResults]:
712
+ ) -> typing.Optional[DatasetQAResults]:
672
713
  """
673
714
  Check the status of the current active instance's run.
674
715
 
675
716
  Returns:
676
- A pandas DataFrame with the results of the optimization run
717
+ A pandas DataFrame with the results of the QA run
677
718
 
678
719
  """
679
720
  if not self.run_id:
@@ -690,7 +731,7 @@ class OptimizationDataset(BaseModel):
690
731
  This generator will produce values to show progress of the run.
691
732
 
692
733
  Args:
693
- run_id: The `run_id` produced by a `run_optimization` call
734
+ run_id: The `run_id` produced by a `run_qa` call
694
735
  retry: A number used to track the number of retries to limit re-checks. *Do not* provide this value manually.
695
736
 
696
737
  Yields:
@@ -709,7 +750,7 @@ class OptimizationDataset(BaseModel):
709
750
  async_iterator = await aiter_sse_retrying(
710
751
  client,
711
752
  "GET",
712
- f"{API_HOST}/dataset-optimization/run/{run_id}",
753
+ f"{API_HOST}/dataset-qa/run/{run_id}",
713
754
  headers=get_headers(),
714
755
  )
715
756
  async for sse in async_iterator:
@@ -725,7 +766,7 @@ class OptimizationDataset(BaseModel):
725
766
  last_event = json.loads(sse.data)
726
767
  yield last_event["data"]
727
768
  if not last_event or last_event["data"]["state"] == RunStatus.PENDING.value:
728
- OptimizationDataset.acheck_run_by_id(run_id, retry + 1)
769
+ QADataset.acheck_run_by_id(run_id, retry + 1)
729
770
 
730
771
  async def acheck_run(self) -> AsyncGenerator[dict, None]:
731
772
  """
@@ -749,14 +790,14 @@ class OptimizationDataset(BaseModel):
749
790
  @staticmethod
750
791
  def cancel_by_id(run_id: str) -> None:
751
792
  """
752
- Cancel the dataset optimization run for the given `run_id`.
793
+ Cancel the dataset QA run for the given `run_id`.
753
794
 
754
795
  Args:
755
796
  run_id: The ID of the run to cancel
756
797
  """
757
798
  logger.info("Cancelling run with ID: %s", run_id)
758
799
  response = requests.delete(
759
- f"{API_HOST}/dataset-optimization/run/{run_id}",
800
+ f"{API_HOST}/dataset-qa/run/{run_id}",
760
801
  headers=get_headers(),
761
802
  timeout=MODIFY_TIMEOUT,
762
803
  )
@@ -773,14 +814,14 @@ class OptimizationDataset(BaseModel):
773
814
  @staticmethod
774
815
  def archive_run_by_id(run_id: str) -> None:
775
816
  """
776
- Archive the dataset optimization run for the given `run_id`.
817
+ Archive the dataset QA run for the given `run_id`.
777
818
 
778
819
  Args:
779
820
  run_id: The ID of the run to archive
780
821
  """
781
822
  logger.info("Archiving run with ID: %s", run_id)
782
823
  response = requests.patch(
783
- f"{API_HOST}/dataset-optimization/run/archive/{run_id}",
824
+ f"{API_HOST}/dataset-qa/run/archive/{run_id}",
784
825
  headers=get_headers(),
785
826
  timeout=MODIFY_TIMEOUT,
786
827
  )
@@ -795,7 +836,7 @@ class OptimizationDataset(BaseModel):
795
836
  self.archive_run_by_id(self.run_id)
796
837
 
797
838
 
798
- class DataOptimizationDatasetOut(BaseModel):
839
+ class QADatasetOut(BaseModel):
799
840
  id: int
800
841
 
801
842
  name: str
@@ -814,7 +855,7 @@ class DataOptimizationDatasetOut(BaseModel):
814
855
  updated_at: datetime.datetime
815
856
 
816
857
 
817
- class DataOptimizationRunOut(BaseModel):
858
+ class DataQARunOut(BaseModel):
818
859
  id: int
819
860
  name: str
820
861
  dataset_id: int
@@ -21,7 +21,7 @@ if has_polars:
21
21
  T = typing.TypeVar("T")
22
22
 
23
23
 
24
- class DatasetOptimizationResults(BaseModel, typing.Generic[T]):
24
+ class DatasetQAResults(BaseModel, typing.Generic[T]):
25
25
  model_config = {"arbitrary_types_allowed": True}
26
26
 
27
27
  cached_zip_path: Path
@@ -30,13 +30,13 @@ class DatasetOptimizationResults(BaseModel, typing.Generic[T]):
30
30
  """
31
31
  suspects: T
32
32
  """
33
- A polars/pandas DataFrame containing the results of the optimization run
33
+ A polars/pandas DataFrame containing the results of the data QA run
34
34
  """
35
35
  object_suspects: typing.Optional[T]
36
36
  """
37
- A polars/pandas DataFrame containing the object-level results of the optimization run
37
+ A polars/pandas DataFrame containing the object-level results of the data QA run
38
38
  """
39
39
  warnings_and_errors: T
40
40
  """
41
- A polars/pandas DataFrame containing the warnings and errors of the optimization run
41
+ A polars/pandas DataFrame containing the warnings and errors of the data QA run
42
42
  """
hirundo/git.py CHANGED
@@ -3,13 +3,12 @@ import re
3
3
  import typing
4
4
 
5
5
  import pydantic
6
- import requests
7
6
  from pydantic import BaseModel, field_validator
8
7
  from pydantic_core import Url
9
8
 
10
9
  from hirundo._env import API_HOST
11
10
  from hirundo._headers import get_headers
12
- from hirundo._http import raise_for_status_with_reason
11
+ from hirundo._http import raise_for_status_with_reason, requests
13
12
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
14
13
  from hirundo._urls import RepoUrl
15
14
  from hirundo.logger import get_logger
hirundo/storage.py CHANGED
@@ -2,13 +2,12 @@ import typing
2
2
  from pathlib import Path
3
3
 
4
4
  import pydantic
5
- import requests
6
5
  from pydantic import BaseModel, model_validator
7
6
  from pydantic_core import Url
8
7
 
9
8
  from hirundo._env import API_HOST
10
9
  from hirundo._headers import get_headers
11
- from hirundo._http import raise_for_status_with_reason
10
+ from hirundo._http import raise_for_status_with_reason, requests
12
11
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
13
12
  from hirundo._urls import S3BucketUrl, StorageConfigName
14
13
  from hirundo.dataset_enum import StorageTypes
hirundo/unzip.py CHANGED
@@ -4,7 +4,6 @@ from collections.abc import Mapping
4
4
  from pathlib import Path
5
5
  from typing import IO, cast
6
6
 
7
- import requests
8
7
  from pydantic_core import Url
9
8
 
10
9
  from hirundo._dataframe import (
@@ -18,10 +17,11 @@ from hirundo._dataframe import (
18
17
  )
19
18
  from hirundo._env import API_HOST
20
19
  from hirundo._headers import _get_auth_headers
20
+ from hirundo._http import requests
21
21
  from hirundo._timeouts import DOWNLOAD_READ_TIMEOUT
22
- from hirundo.dataset_optimization_results import (
22
+ from hirundo.dataset_qa_results import (
23
23
  DataFrameType,
24
- DatasetOptimizationResults,
24
+ DatasetQAResults,
25
25
  )
26
26
  from hirundo.logger import get_logger
27
27
 
@@ -117,7 +117,7 @@ def get_mislabel_suspect_filename(filenames: list[str]):
117
117
 
118
118
  def download_and_extract_zip(
119
119
  run_id: str, zip_url: str
120
- ) -> DatasetOptimizationResults[DataFrameType]:
120
+ ) -> DatasetQAResults[DataFrameType]:
121
121
  """
122
122
  Download and extract the zip file from the given URL.
123
123
 
@@ -127,11 +127,11 @@ def download_and_extract_zip(
127
127
  and `warnings_and_errors.csv` files from the zip file.
128
128
 
129
129
  Args:
130
- run_id: The ID of the optimization run.
130
+ run_id: The ID of the dataset QA run.
131
131
  zip_url: The URL of the zip file to download.
132
132
 
133
133
  Returns:
134
- The dataset optimization results object.
134
+ The dataset QA results object.
135
135
  """
136
136
  # Define the local file path
137
137
  cache_dir = Path.home() / ".hirundo" / "cache"
@@ -140,9 +140,8 @@ def download_and_extract_zip(
140
140
 
141
141
  headers = None
142
142
  if Url(zip_url).scheme == "file":
143
- zip_url = (
144
- f"{API_HOST}/dataset-optimization/run/local-download"
145
- + zip_url.replace("file://", "")
143
+ zip_url = f"{API_HOST}/dataset-qa/run/local-download" + zip_url.replace(
144
+ "file://", ""
146
145
  )
147
146
  headers = _get_auth_headers()
148
147
  # Stream the zip file download
@@ -217,7 +216,7 @@ def download_and_extract_zip(
217
216
  "Failed to load warnings and errors into DataFrame", exc_info=e
218
217
  )
219
218
 
220
- return DatasetOptimizationResults[DataFrameType](
219
+ return DatasetQAResults[DataFrameType](
221
220
  cached_zip_path=zip_file_path,
222
221
  suspects=suspects_df,
223
222
  object_suspects=object_suspects_df,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hirundo
3
- Version: 0.1.18
3
+ Version: 0.1.21
4
4
  Summary: This package is used to interface with Hirundo's platform. It provides a simple API to optimize your ML datasets.
5
5
  Author-email: Hirundo <dev@hirundo.io>
6
6
  License: MIT License
@@ -13,7 +13,7 @@ License: MIT License
13
13
 
14
14
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15
15
 
16
- Project-URL: Homepage, https://github.com/Hirundo-io/hirundo-client
16
+ Project-URL: Homepage, https://github.com/Hirundo-io/hirundo-python-sdk
17
17
  Keywords: dataset,machine learning,data science,data engineering
18
18
  Classifier: License :: OSI Approved :: MIT License
19
19
  Classifier: Programming Language :: Python
@@ -32,6 +32,10 @@ Requires-Dist: httpx>=0.27.0
32
32
  Requires-Dist: stamina>=24.2.0
33
33
  Requires-Dist: httpx-sse>=0.4.0
34
34
  Requires-Dist: tqdm>=4.66.5
35
+ Requires-Dist: h11>=0.16.0
36
+ Requires-Dist: requests>=2.32.4
37
+ Requires-Dist: urllib3>=2.5.0
38
+ Requires-Dist: setuptools>=78.1.1
35
39
  Provides-Extra: dev
36
40
  Requires-Dist: pyyaml>=6.0.1; extra == "dev"
37
41
  Requires-Dist: types-PyYAML>=6.0.12; extra == "dev"
@@ -46,13 +50,15 @@ Requires-Dist: stamina>=24.2.0; extra == "dev"
46
50
  Requires-Dist: httpx-sse>=0.4.0; extra == "dev"
47
51
  Requires-Dist: pytest>=8.2.0; extra == "dev"
48
52
  Requires-Dist: pytest-asyncio>=0.23.6; extra == "dev"
49
- Requires-Dist: uv>=0.5.8; extra == "dev"
53
+ Requires-Dist: uv>=0.8.6; extra == "dev"
50
54
  Requires-Dist: pre-commit>=3.7.1; extra == "dev"
51
55
  Requires-Dist: virtualenv>=20.6.6; extra == "dev"
52
- Requires-Dist: ruff>=0.11.6; extra == "dev"
56
+ Requires-Dist: ruff>=0.12.0; extra == "dev"
53
57
  Requires-Dist: bumpver; extra == "dev"
54
58
  Requires-Dist: platformdirs>=4.3.6; extra == "dev"
55
59
  Requires-Dist: safety>=3.2.13; extra == "dev"
60
+ Requires-Dist: cryptography>=44.0.1; extra == "dev"
61
+ Requires-Dist: jinja2>=3.1.6; extra == "dev"
56
62
  Provides-Extra: docs
57
63
  Requires-Dist: sphinx>=7.4.7; extra == "docs"
58
64
  Requires-Dist: sphinx-autobuild>=2024.9.3; extra == "docs"
@@ -61,8 +67,9 @@ Requires-Dist: autodoc_pydantic>=2.2.0; extra == "docs"
61
67
  Requires-Dist: furo; extra == "docs"
62
68
  Requires-Dist: sphinx-multiversion; extra == "docs"
63
69
  Requires-Dist: esbonio; extra == "docs"
64
- Requires-Dist: starlette>0.40.0; extra == "docs"
70
+ Requires-Dist: starlette>=0.47.2; extra == "docs"
65
71
  Requires-Dist: markupsafe>=3.0.2; extra == "docs"
72
+ Requires-Dist: jinja2>=3.1.6; extra == "docs"
66
73
  Provides-Extra: pandas
67
74
  Requires-Dist: pandas>=2.2.3; extra == "pandas"
68
75
  Provides-Extra: polars
@@ -71,9 +78,9 @@ Dynamic: license-file
71
78
 
72
79
  # Hirundo
73
80
 
74
- This package exposes access to Hirundo APIs for dataset optimization for Machine Learning.
81
+ This package exposes access to Hirundo APIs for dataset QA for Machine Learning.
75
82
 
76
- Dataset optimization is currently available for datasets labelled for classification and object detection.
83
+ Dataset QA is currently available for datasets labelled for classification and object detection.
77
84
 
78
85
  Support dataset storage configs include:
79
86
 
@@ -144,7 +151,7 @@ Classification example:
144
151
  from hirundo import (
145
152
  HirundoCSV,
146
153
  LabelingType,
147
- OptimizationDataset,
154
+ QADataset,
148
155
  StorageGCP,
149
156
  StorageConfig,
150
157
  StorageTypes,
@@ -155,7 +162,7 @@ gcp_bucket = StorageGCP(
155
162
  project="Hirundo-global",
156
163
  credentials_json=json.loads(os.environ["GCP_CREDENTIALS"]),
157
164
  )
158
- test_dataset = OptimizationDataset(
165
+ test_dataset = QADataset(
159
166
  name="TEST-GCP cifar 100 classification dataset",
160
167
  labeling_type=LabelingType.SINGLE_LABEL_CLASSIFICATION,
161
168
  storage_config=StorageConfig(
@@ -170,7 +177,7 @@ test_dataset = OptimizationDataset(
170
177
  classes=cifar100_classes,
171
178
  )
172
179
 
173
- test_dataset.run_optimization()
180
+ test_dataset.run_qa()
174
181
  results = test_dataset.check_run()
175
182
  print(results)
176
183
  ```
@@ -182,7 +189,7 @@ from hirundo import (
182
189
  GitRepo,
183
190
  HirundoCSV,
184
191
  LabelingType,
185
- OptimizationDataset,
192
+ QADataset,
186
193
  StorageGit,
187
194
  StorageConfig,
188
195
  StorageTypes,
@@ -195,7 +202,7 @@ git_storage = StorageGit(
195
202
  ),
196
203
  branch="main",
197
204
  )
198
- test_dataset = OptimizationDataset(
205
+ test_dataset = QADataset(
199
206
  name="TEST-HuggingFace-BDD-100k-validation-OD-validation-dataset",
200
207
  labeling_type=LabelingType.OBJECT_DETECTION,
201
208
  storage_config=StorageConfig(
@@ -211,7 +218,7 @@ test_dataset = OptimizationDataset(
211
218
  ),
212
219
  )
213
220
 
214
- test_dataset.run_optimization()
221
+ test_dataset.run_qa()
215
222
  results = test_dataset.check_run()
216
223
  print(results)
217
224
  ```
@@ -220,4 +227,4 @@ Note: Currently we only support the main CPython release 3.9, 3.10, 3.11, 3.12 &
220
227
 
221
228
  ## Further documentation
222
229
 
223
- To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the [Google Colab examples](https://github.com/Hirundo-io/hirundo-client/tree/main/notebooks).
230
+ To learn more about how to use this library, please visit the [http://docs.hirundo.io/](documentation) or see the [Google Colab examples](https://github.com/Hirundo-io/hirundo-python-sdk/tree/main/notebooks).
@@ -0,0 +1,25 @@
1
+ hirundo/__init__.py,sha256=GxRK_DHPKG1aqxNa19imqspHRAvBHSAQ5Q0fDwJPCDE,1341
2
+ hirundo/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
3
+ hirundo/_constraints.py,sha256=slW7Rk9Ml5fuwjnXTLUvHIhnY_9hmcUUy57v9hFog1o,6003
4
+ hirundo/_dataframe.py,sha256=sXEEbCNcLi83wyU9ii884YikCzfASo_3nnrDxhuCv7U,758
5
+ hirundo/_env.py,sha256=efX2sjvYlHkFr2Lcstelei67YSTFpVGT0l08ZsfiMuE,622
6
+ hirundo/_headers.py,sha256=Cwha8gXEQNXL2lc9Lb1klLotkMLD82XOpAdX33TLVj8,521
7
+ hirundo/_http.py,sha256=0kfoznumU3jinHhJIpB6qn5Mt4a3kso59GNXVbpWH7M,2267
8
+ hirundo/_iter_sse_retrying.py,sha256=xNpf3W5qAHkKPJz8H4NZjKE3CrI_8b3m1iYeahdpdEc,4653
9
+ hirundo/_timeouts.py,sha256=gE58NU0t2e4KgKq2sk5rZcezDJAkgvRIbM5AVYFY6Ho,86
10
+ hirundo/_urls.py,sha256=0C85EbL0T-Bj25vJwjNs_obUG8ROSADpmbFdTAyhzlw,1375
11
+ hirundo/cli.py,sha256=u-LsrN17-J7temjrq6NeUGnJ4mO04tMCiQYqVMm6el8,7752
12
+ hirundo/dataset_enum.py,sha256=QnS3fy1OF4wvUtiIAHubKRhc611idS8huopEEolgqEM,1217
13
+ hirundo/dataset_qa.py,sha256=U7cqV4JbYkaByXEf2XdoJrQZ_rI9pgDxrXVbQLc50R8,32470
14
+ hirundo/dataset_qa_results.py,sha256=1F7JhRf7TQomwW9tjbNn8OBrhWHwEaWOND80r39l5uY,1104
15
+ hirundo/git.py,sha256=cBjP7kPnaUHR77FI5ZaERst38eTUDy8q1gAQzy45EB4,6567
16
+ hirundo/labeling.py,sha256=zXQCaqfdaLIG4qbzFGbb94L3FDdRMpdzHwbrDJE07Yk,5006
17
+ hirundo/logger.py,sha256=MUqrYp0fBlxWFhGl6P5t19_uqO7T_PNhrLN5bqY3i7s,275
18
+ hirundo/storage.py,sha256=MPKxkhrBmX84Yuexd4QoLDdVIJHrll9RosCLUsz5q3c,15936
19
+ hirundo/unzip.py,sha256=3aPOsBvF-ZgAumHnQ6hq7JtbFUe9eRRRFsiI6K8cRDE,8188
20
+ hirundo-0.1.21.dist-info/licenses/LICENSE,sha256=fusGGjqT2RGlU6kbkaOk7d-gDnsjk17wq67AO0mwBZI,1065
21
+ hirundo-0.1.21.dist-info/METADATA,sha256=3m7R5dMN5h_C-L2Wl76lzYjpreP5upyHcEkIoAZF1lY,9497
22
+ hirundo-0.1.21.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
+ hirundo-0.1.21.dist-info/entry_points.txt,sha256=4ZtnA_Nl1Af8fLnHp3lwjbGDEGU1S6ujb_JwtuQ7ZPM,44
24
+ hirundo-0.1.21.dist-info/top_level.txt,sha256=cmyNqrNZOAYxnywJGFI1AJBLe4SkH8HGsfFx6ncdrbI,8
25
+ hirundo-0.1.21.dist-info/RECORD,,
@@ -1,25 +0,0 @@
1
- hirundo/__init__.py,sha256=1Uy9UZhaZPQQSMfAOJ0A_Of70tM8_MDq-HHdhrmpO6g,1301
2
- hirundo/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
3
- hirundo/_constraints.py,sha256=tgJfvp7ydyXilT8ViNk837rNRlpOVXLLeCSMt_YUUYA,6013
4
- hirundo/_dataframe.py,sha256=sXEEbCNcLi83wyU9ii884YikCzfASo_3nnrDxhuCv7U,758
5
- hirundo/_env.py,sha256=efX2sjvYlHkFr2Lcstelei67YSTFpVGT0l08ZsfiMuE,622
6
- hirundo/_headers.py,sha256=3hybpD_X4SODv3cFZPt9AjGY2vvZaag5OKT3z1SHSjA,521
7
- hirundo/_http.py,sha256=izlnuxStyPugjTAbD8Lo30tA4lZJ5d3kOENNduqrbX4,573
8
- hirundo/_iter_sse_retrying.py,sha256=U331_wZRIbVzi-jnMqo8bp9jBC8MtFBLEs-X0ZvhSDw,4634
9
- hirundo/_timeouts.py,sha256=gE58NU0t2e4KgKq2sk5rZcezDJAkgvRIbM5AVYFY6Ho,86
10
- hirundo/_urls.py,sha256=0C85EbL0T-Bj25vJwjNs_obUG8ROSADpmbFdTAyhzlw,1375
11
- hirundo/cli.py,sha256=5Tn0eXZGG92BR9HJYUaYozjFbS1t6UTw_I2R0tZBE04,7824
12
- hirundo/dataset_enum.py,sha256=QnS3fy1OF4wvUtiIAHubKRhc611idS8huopEEolgqEM,1217
13
- hirundo/dataset_optimization.py,sha256=fXi8MeI0PWwSyc5NuOzCrkgXT_mz24NV-dGOHDPkBR0,31256
14
- hirundo/dataset_optimization_results.py,sha256=A9YyF5zaZXVtzeDE08I_05v90dhZQADpSjDcS_6eLMc,1129
15
- hirundo/git.py,sha256=8LVnF4WCjZsxMHoRaVxbLiDAKpGCBEwlcZp7a30n9Zo,6573
16
- hirundo/labeling.py,sha256=zXQCaqfdaLIG4qbzFGbb94L3FDdRMpdzHwbrDJE07Yk,5006
17
- hirundo/logger.py,sha256=MUqrYp0fBlxWFhGl6P5t19_uqO7T_PNhrLN5bqY3i7s,275
18
- hirundo/storage.py,sha256=y7cr_dngkfZq0gKnwWxrSqUXb1SycGpwFRVmS9Cn3h8,15942
19
- hirundo/unzip.py,sha256=XJqvt2m5pWR-G-fnzgW75VOdd-K4_Rw2r4wiEhZgKZA,8245
20
- hirundo-0.1.18.dist-info/licenses/LICENSE,sha256=fusGGjqT2RGlU6kbkaOk7d-gDnsjk17wq67AO0mwBZI,1065
21
- hirundo-0.1.18.dist-info/METADATA,sha256=F_F0-EfUxVVCcgFue_hwCtxfIfmqBlwnpvzELuhMkAc,9302
22
- hirundo-0.1.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
23
- hirundo-0.1.18.dist-info/entry_points.txt,sha256=4ZtnA_Nl1Af8fLnHp3lwjbGDEGU1S6ujb_JwtuQ7ZPM,44
24
- hirundo-0.1.18.dist-info/top_level.txt,sha256=cmyNqrNZOAYxnywJGFI1AJBLe4SkH8HGsfFx6ncdrbI,8
25
- hirundo-0.1.18.dist-info/RECORD,,