hirundo 0.1.18__py3-none-any.whl → 0.2.3.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,17 +11,17 @@ DataFrameType = TypeAliasType("DataFrameType", None)
11
11
  if has_pandas:
12
12
  from hirundo._dataframe import pd
13
13
 
14
- DataFrameType = TypeAliasType("DataFrameType", typing.Union[pd.DataFrame, None])
14
+ DataFrameType = TypeAliasType("DataFrameType", pd.DataFrame | None)
15
15
  if has_polars:
16
16
  from hirundo._dataframe import pl
17
17
 
18
- DataFrameType = TypeAliasType("DataFrameType", typing.Union[pl.DataFrame, None])
18
+ DataFrameType = TypeAliasType("DataFrameType", pl.DataFrame | None)
19
19
 
20
20
 
21
21
  T = typing.TypeVar("T")
22
22
 
23
23
 
24
- class DatasetOptimizationResults(BaseModel, typing.Generic[T]):
24
+ class DatasetQAResults(BaseModel, typing.Generic[T]):
25
25
  model_config = {"arbitrary_types_allowed": True}
26
26
 
27
27
  cached_zip_path: Path
@@ -30,13 +30,13 @@ class DatasetOptimizationResults(BaseModel, typing.Generic[T]):
30
30
  """
31
31
  suspects: T
32
32
  """
33
- A polars/pandas DataFrame containing the results of the optimization run
33
+ A polars/pandas DataFrame containing the results of the data QA run
34
34
  """
35
- object_suspects: typing.Optional[T]
35
+ object_suspects: T | None
36
36
  """
37
- A polars/pandas DataFrame containing the object-level results of the optimization run
37
+ A polars/pandas DataFrame containing the object-level results of the data QA run
38
38
  """
39
39
  warnings_and_errors: T
40
40
  """
41
- A polars/pandas DataFrame containing the warnings and errors of the optimization run
41
+ A polars/pandas DataFrame containing the warnings and errors of the data QA run
42
42
  """
hirundo/git.py CHANGED
@@ -1,15 +1,13 @@
1
1
  import datetime
2
2
  import re
3
- import typing
4
3
 
5
4
  import pydantic
6
- import requests
7
5
  from pydantic import BaseModel, field_validator
8
6
  from pydantic_core import Url
9
7
 
10
8
  from hirundo._env import API_HOST
11
9
  from hirundo._headers import get_headers
12
- from hirundo._http import raise_for_status_with_reason
10
+ from hirundo._http import raise_for_status_with_reason, requests
13
11
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
14
12
  from hirundo._urls import RepoUrl
15
13
  from hirundo.logger import get_logger
@@ -33,14 +31,14 @@ class GitSSHAuth(BaseModel):
33
31
  """
34
32
  The SSH key for the Git repository
35
33
  """
36
- ssh_password: typing.Optional[str]
34
+ ssh_password: str | None
37
35
  """
38
36
  The password for the SSH key for the Git repository.
39
37
  """
40
38
 
41
39
 
42
40
  class GitRepo(BaseModel):
43
- id: typing.Optional[int] = None
41
+ id: int | None = None
44
42
  """
45
43
  The ID of the Git repository.
46
44
  """
@@ -49,25 +47,25 @@ class GitRepo(BaseModel):
49
47
  """
50
48
  A name to identify the Git repository in the Hirundo system.
51
49
  """
52
- repository_url: typing.Union[str, RepoUrl]
50
+ repository_url: str | RepoUrl
53
51
  """
54
52
  The URL of the Git repository, it should start with `ssh://` or `https://` or be in the form `user@host:path`.
55
53
  If it is in the form `user@host:path`, it will be rewritten to `ssh://user@host/path`.
56
54
  """
57
- organization_id: typing.Optional[int] = None
55
+ organization_id: int | None = None
58
56
  """
59
57
  The ID of the organization that the Git repository belongs to.
60
58
  If not provided, it will be assigned to your default organization.
61
59
  """
62
60
 
63
- plain_auth: typing.Optional[GitPlainAuth] = pydantic.Field(
61
+ plain_auth: GitPlainAuth | None = pydantic.Field(
64
62
  default=None, examples=[None, {"username": "ben", "password": "password"}]
65
63
  )
66
64
  """
67
65
  The plain authentication details for the Git repository.
68
66
  Use this if using a special user with a username and password for authentication.
69
67
  """
70
- ssh_auth: typing.Optional[GitSSHAuth] = pydantic.Field(
68
+ ssh_auth: GitSSHAuth | None = pydantic.Field(
71
69
  default=None,
72
70
  examples=[
73
71
  {
@@ -85,7 +83,7 @@ class GitRepo(BaseModel):
85
83
 
86
84
  @field_validator("repository_url", mode="before", check_fields=True)
87
85
  @classmethod
88
- def check_valid_repository_url(cls, repository_url: typing.Union[str, RepoUrl]):
86
+ def check_valid_repository_url(cls, repository_url: str | RepoUrl):
89
87
  # Check if the URL has the `@` and `:` pattern with a non-numeric section before the next slash
90
88
  match = re.match("([^@]+@[^:]+):([^0-9/][^/]*)/(.+)", str(repository_url))
91
89
  if match:
hirundo/labeling.py CHANGED
@@ -3,11 +3,9 @@ from abc import ABC
3
3
 
4
4
  from pydantic import BaseModel, Field
5
5
 
6
+ from hirundo._urls import HirundoUrl
6
7
  from hirundo.dataset_enum import DatasetMetadataType
7
8
 
8
- if typing.TYPE_CHECKING:
9
- from hirundo._urls import HirundoUrl
10
-
11
9
 
12
10
  class Metadata(BaseModel, ABC, frozen=True):
13
11
  type: DatasetMetadataType
@@ -21,7 +19,7 @@ class HirundoCSV(Metadata, frozen=True):
21
19
  type: typing.Literal[DatasetMetadataType.HIRUNDO_CSV] = (
22
20
  DatasetMetadataType.HIRUNDO_CSV
23
21
  )
24
- csv_url: "HirundoUrl"
22
+ csv_url: HirundoUrl
25
23
  """
26
24
  The URL to access the dataset metadata CSV file.
27
25
  e.g. `s3://my-bucket-name/my-folder/my-metadata.csv`, `gs://my-bucket-name/my-folder/my-metadata.csv`,
@@ -36,7 +34,7 @@ class COCO(Metadata, frozen=True):
36
34
  """
37
35
 
38
36
  type: typing.Literal[DatasetMetadataType.COCO] = DatasetMetadataType.COCO
39
- json_url: "HirundoUrl"
37
+ json_url: HirundoUrl
40
38
  """
41
39
  The URL to access the dataset metadata JSON file.
42
40
  e.g. `s3://my-bucket-name/my-folder/my-metadata.json`, `gs://my-bucket-name/my-folder/my-metadata.json`,
@@ -47,8 +45,18 @@ class COCO(Metadata, frozen=True):
47
45
 
48
46
  class YOLO(Metadata, frozen=True):
49
47
  type: typing.Literal[DatasetMetadataType.YOLO] = DatasetMetadataType.YOLO
50
- data_yaml_url: "typing.Optional[HirundoUrl]" = None
51
- labels_dir_url: "HirundoUrl"
48
+ data_yaml_url: HirundoUrl | None = None
49
+ labels_dir_url: HirundoUrl
50
+
51
+
52
+ class HuggingFaceAudio(Metadata, frozen=True):
53
+ type: typing.Literal[DatasetMetadataType.HuggingFaceAudio] = (
54
+ DatasetMetadataType.HuggingFaceAudio
55
+ )
56
+ audio_column: str
57
+ text_column: str
58
+ subset: str | None = None
59
+ split: str | None = None
52
60
 
53
61
 
54
62
  class KeylabsAuth(BaseModel):
@@ -63,7 +71,7 @@ class Keylabs(Metadata, frozen=True):
63
71
  Keylabs project ID.
64
72
  """
65
73
 
66
- labels_dir_url: "HirundoUrl"
74
+ labels_dir_url: HirundoUrl
67
75
  """
68
76
  URL to the directory containing the Keylabs labels.
69
77
  """
@@ -73,11 +81,11 @@ class Keylabs(Metadata, frozen=True):
73
81
  Whether to include attributes in the class name.
74
82
  """
75
83
 
76
- project_name: typing.Optional[str] = None
84
+ project_name: str | None = None
77
85
  """
78
86
  Keylabs project name (optional; added to output CSV if provided).
79
87
  """
80
- keylabs_auth: typing.Optional[KeylabsAuth] = None
88
+ keylabs_auth: KeylabsAuth | None = None
81
89
  """
82
90
  Keylabs authentication credentials (optional; if provided, used to provide links to each sample).
83
91
  """
@@ -107,9 +115,9 @@ class KeylabsObjSegVideo(Keylabs, frozen=True):
107
115
  )
108
116
 
109
117
 
110
- KeylabsInfo = typing.Union[
111
- KeylabsObjDetImages, KeylabsObjDetVideo, KeylabsObjSegImages, KeylabsObjSegVideo
112
- ]
118
+ KeylabsInfo = (
119
+ KeylabsObjDetImages | KeylabsObjDetVideo | KeylabsObjSegImages | KeylabsObjSegVideo
120
+ )
113
121
  """
114
122
  The dataset labeling info for Keylabs. The dataset labeling info can be one of the following:
115
123
  - `DatasetMetadataType.KeylabsObjDetImages`: Indicates that the dataset metadata file is in the Keylabs object detection image format
@@ -118,12 +126,7 @@ The dataset labeling info for Keylabs. The dataset labeling info can be one of t
118
126
  - `DatasetMetadataType.KeylabsObjSegVideo`: Indicates that the dataset metadata file is in the Keylabs object segmentation video format
119
127
  """
120
128
  LabelingInfo = typing.Annotated[
121
- typing.Union[
122
- HirundoCSV,
123
- COCO,
124
- YOLO,
125
- KeylabsInfo,
126
- ],
129
+ HirundoCSV | COCO | YOLO | KeylabsInfo | HuggingFaceAudio,
127
130
  Field(discriminator="type"),
128
131
  ]
129
132
  """
hirundo/storage.py CHANGED
@@ -1,14 +1,13 @@
1
- import typing
1
+ import datetime
2
2
  from pathlib import Path
3
3
 
4
4
  import pydantic
5
- import requests
6
5
  from pydantic import BaseModel, model_validator
7
6
  from pydantic_core import Url
8
7
 
9
8
  from hirundo._env import API_HOST
10
9
  from hirundo._headers import get_headers
11
- from hirundo._http import raise_for_status_with_reason
10
+ from hirundo._http import raise_for_status_with_reason, requests
12
11
  from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
13
12
  from hirundo._urls import S3BucketUrl, StorageConfigName
14
13
  from hirundo.dataset_enum import StorageTypes
@@ -21,13 +20,13 @@ S3_PREFIX = "s3://"
21
20
 
22
21
 
23
22
  class StorageS3Base(BaseModel):
24
- endpoint_url: typing.Optional[Url] = None
23
+ endpoint_url: Url | None = None
25
24
  bucket_url: S3BucketUrl
26
25
  region_name: str
27
26
  # ⬆️ We could restrict this, but if we're allowing custom endpoints then the validation may be wrong
28
- access_key_id: typing.Optional[str] = None
27
+ access_key_id: str | None = None
29
28
 
30
- def get_url(self, path: typing.Union[str, Path]) -> Url:
29
+ def get_url(self, path: str | Path) -> Url:
31
30
  """
32
31
  Get the full URL for a file in the S3 bucket
33
32
 
@@ -46,7 +45,7 @@ class StorageS3Base(BaseModel):
46
45
 
47
46
 
48
47
  class StorageS3(StorageS3Base):
49
- secret_access_key: typing.Optional[str] = None
48
+ secret_access_key: str | None = None
50
49
 
51
50
 
52
51
  class StorageS3Out(StorageS3Base):
@@ -57,7 +56,7 @@ class StorageGCPBase(BaseModel):
57
56
  bucket_name: str
58
57
  project: str
59
58
 
60
- def get_url(self, path: typing.Union[str, Path]) -> Url:
59
+ def get_url(self, path: str | Path) -> Url:
61
60
  """
62
61
  Get the full URL for a file in the GCP bucket
63
62
 
@@ -74,7 +73,7 @@ class StorageGCPBase(BaseModel):
74
73
 
75
74
 
76
75
  class StorageGCP(StorageGCPBase):
77
- credentials_json: typing.Optional[dict] = None
76
+ credentials_json: dict | None = None
78
77
 
79
78
 
80
79
  class StorageGCPOut(StorageGCPBase):
@@ -105,9 +104,7 @@ class StorageGCPOut(StorageGCPBase):
105
104
  # account_url: str
106
105
 
107
106
 
108
- def get_git_repo_url(
109
- repo_url: typing.Union[str, Url], path: typing.Union[str, Path]
110
- ) -> Url:
107
+ def get_git_repo_url(repo_url: str | Url, path: str | Path) -> Url:
111
108
  """
112
109
  Get the full URL for a file in the git repository
113
110
 
@@ -128,12 +125,12 @@ def get_git_repo_url(
128
125
 
129
126
 
130
127
  class StorageGit(BaseModel):
131
- repo_id: typing.Optional[int] = None
128
+ repo_id: int | None = None
132
129
  """
133
130
  The ID of the Git repository in the Hirundo system.
134
131
  Either :code:`repo_id` or :code:`repo` must be provided.
135
132
  """
136
- repo: typing.Optional[GitRepo] = None
133
+ repo: GitRepo | None = None
137
134
  """
138
135
  The Git repository to link to.
139
136
  Either :code:`repo_id` or :code:`repo` must be provided.
@@ -149,7 +146,7 @@ class StorageGit(BaseModel):
149
146
  raise ValueError("Either repo_id or repo must be provided")
150
147
  return self
151
148
 
152
- def get_url(self, path: typing.Union[str, Path]) -> Url:
149
+ def get_url(self, path: str | Path) -> Url:
153
150
  """
154
151
  Get the full URL for a file in the git repository
155
152
 
@@ -172,7 +169,7 @@ class StorageGitOut(BaseModel):
172
169
  repo: GitRepoOut
173
170
  branch: str
174
171
 
175
- def get_url(self, path: typing.Union[str, Path]) -> Url:
172
+ def get_url(self, path: str | Path) -> Url:
176
173
  """
177
174
  Get the full URL for a file in the git repository
178
175
 
@@ -190,12 +187,12 @@ class StorageGitOut(BaseModel):
190
187
 
191
188
 
192
189
  class StorageConfig(BaseModel):
193
- id: typing.Optional[int] = None
190
+ id: int | None = None
194
191
  """
195
192
  The ID of the :code:`StorageConfig` in the Hirundo system.
196
193
  """
197
194
 
198
- organization_id: typing.Optional[int] = None
195
+ organization_id: int | None = None
199
196
  """
200
197
  The ID of the organization that the :code:`StorageConfig` belongs to.
201
198
  If not provided, it will be assigned to your default organization.
@@ -205,7 +202,7 @@ class StorageConfig(BaseModel):
205
202
  """
206
203
  A name to identify the :code:`StorageConfig` in the Hirundo system.
207
204
  """
208
- type: typing.Optional[StorageTypes] = pydantic.Field(
205
+ type: StorageTypes | None = pydantic.Field(
209
206
  examples=[
210
207
  StorageTypes.S3,
211
208
  StorageTypes.GCP,
@@ -221,7 +218,7 @@ class StorageConfig(BaseModel):
221
218
  - :code:`Azure` (coming soon)
222
219
  - :code:`Git`
223
220
  """
224
- s3: typing.Optional[StorageS3] = pydantic.Field(
221
+ s3: StorageS3 | None = pydantic.Field(
225
222
  default=None,
226
223
  examples=[
227
224
  {
@@ -239,7 +236,7 @@ class StorageConfig(BaseModel):
239
236
  The Amazon Web Services (AWS) S3 storage config details.
240
237
  Use this if you want to link to an S3 bucket.
241
238
  """
242
- gcp: typing.Optional[StorageGCP] = pydantic.Field(
239
+ gcp: StorageGCP | None = pydantic.Field(
243
240
  default=None,
244
241
  examples=[
245
242
  None,
@@ -282,7 +279,7 @@ class StorageConfig(BaseModel):
282
279
  # None,
283
280
  # ],
284
281
  # ) TODO: Azure storage config is coming soon
285
- git: typing.Optional[StorageGit] = pydantic.Field(
282
+ git: StorageGit | None = pydantic.Field(
286
283
  default=None,
287
284
  examples=[
288
285
  None,
@@ -341,7 +338,7 @@ class StorageConfig(BaseModel):
341
338
 
342
339
  @staticmethod
343
340
  def list(
344
- organization_id: typing.Optional[int] = None,
341
+ organization_id: int | None = None,
345
342
  ) -> list["ResponseStorageConfig"]:
346
343
  """
347
344
  Lists all the :code:`StorageConfig`'s created by user's default organization
@@ -441,7 +438,10 @@ class ResponseStorageConfig(BaseModel):
441
438
  type: StorageTypes
442
439
  organization_name: str
443
440
  creator_name: str
444
- s3: typing.Optional[StorageS3Out]
445
- gcp: typing.Optional[StorageGCPOut]
441
+ s3: StorageS3Out | None
442
+ gcp: StorageGCPOut | None
446
443
  # azure: typing.Optional[StorageAzureOut]
447
- git: typing.Optional[StorageGitOut]
444
+ git: StorageGitOut | None
445
+
446
+ created_at: datetime.datetime
447
+ updated_at: datetime.datetime