hirundo 0.1.21__py3-none-any.whl → 0.2.3.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hirundo/__init__.py +19 -3
- hirundo/_constraints.py +2 -3
- hirundo/_iter_sse_retrying.py +7 -4
- hirundo/_llm_pipeline.py +153 -0
- hirundo/_run_checking.py +283 -0
- hirundo/_urls.py +1 -0
- hirundo/cli.py +1 -4
- hirundo/dataset_enum.py +2 -0
- hirundo/dataset_qa.py +106 -190
- hirundo/dataset_qa_results.py +3 -3
- hirundo/git.py +7 -8
- hirundo/labeling.py +22 -19
- hirundo/storage.py +25 -24
- hirundo/unlearning_llm.py +599 -0
- hirundo/unzip.py +3 -3
- {hirundo-0.1.21.dist-info → hirundo-0.2.3.post1.dist-info}/METADATA +42 -10
- hirundo-0.2.3.post1.dist-info/RECORD +28 -0
- {hirundo-0.1.21.dist-info → hirundo-0.2.3.post1.dist-info}/WHEEL +1 -1
- hirundo-0.1.21.dist-info/RECORD +0 -25
- {hirundo-0.1.21.dist-info → hirundo-0.2.3.post1.dist-info}/entry_points.txt +0 -0
- {hirundo-0.1.21.dist-info → hirundo-0.2.3.post1.dist-info}/licenses/LICENSE +0 -0
- {hirundo-0.1.21.dist-info → hirundo-0.2.3.post1.dist-info}/top_level.txt +0 -0
hirundo/labeling.py
CHANGED
|
@@ -3,11 +3,9 @@ from abc import ABC
|
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, Field
|
|
5
5
|
|
|
6
|
+
from hirundo._urls import HirundoUrl
|
|
6
7
|
from hirundo.dataset_enum import DatasetMetadataType
|
|
7
8
|
|
|
8
|
-
if typing.TYPE_CHECKING:
|
|
9
|
-
from hirundo._urls import HirundoUrl
|
|
10
|
-
|
|
11
9
|
|
|
12
10
|
class Metadata(BaseModel, ABC, frozen=True):
|
|
13
11
|
type: DatasetMetadataType
|
|
@@ -21,7 +19,7 @@ class HirundoCSV(Metadata, frozen=True):
|
|
|
21
19
|
type: typing.Literal[DatasetMetadataType.HIRUNDO_CSV] = (
|
|
22
20
|
DatasetMetadataType.HIRUNDO_CSV
|
|
23
21
|
)
|
|
24
|
-
csv_url:
|
|
22
|
+
csv_url: HirundoUrl
|
|
25
23
|
"""
|
|
26
24
|
The URL to access the dataset metadata CSV file.
|
|
27
25
|
e.g. `s3://my-bucket-name/my-folder/my-metadata.csv`, `gs://my-bucket-name/my-folder/my-metadata.csv`,
|
|
@@ -36,7 +34,7 @@ class COCO(Metadata, frozen=True):
|
|
|
36
34
|
"""
|
|
37
35
|
|
|
38
36
|
type: typing.Literal[DatasetMetadataType.COCO] = DatasetMetadataType.COCO
|
|
39
|
-
json_url:
|
|
37
|
+
json_url: HirundoUrl
|
|
40
38
|
"""
|
|
41
39
|
The URL to access the dataset metadata JSON file.
|
|
42
40
|
e.g. `s3://my-bucket-name/my-folder/my-metadata.json`, `gs://my-bucket-name/my-folder/my-metadata.json`,
|
|
@@ -47,8 +45,18 @@ class COCO(Metadata, frozen=True):
|
|
|
47
45
|
|
|
48
46
|
class YOLO(Metadata, frozen=True):
|
|
49
47
|
type: typing.Literal[DatasetMetadataType.YOLO] = DatasetMetadataType.YOLO
|
|
50
|
-
data_yaml_url:
|
|
51
|
-
labels_dir_url:
|
|
48
|
+
data_yaml_url: HirundoUrl | None = None
|
|
49
|
+
labels_dir_url: HirundoUrl
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class HuggingFaceAudio(Metadata, frozen=True):
|
|
53
|
+
type: typing.Literal[DatasetMetadataType.HuggingFaceAudio] = (
|
|
54
|
+
DatasetMetadataType.HuggingFaceAudio
|
|
55
|
+
)
|
|
56
|
+
audio_column: str
|
|
57
|
+
text_column: str
|
|
58
|
+
subset: str | None = None
|
|
59
|
+
split: str | None = None
|
|
52
60
|
|
|
53
61
|
|
|
54
62
|
class KeylabsAuth(BaseModel):
|
|
@@ -63,7 +71,7 @@ class Keylabs(Metadata, frozen=True):
|
|
|
63
71
|
Keylabs project ID.
|
|
64
72
|
"""
|
|
65
73
|
|
|
66
|
-
labels_dir_url:
|
|
74
|
+
labels_dir_url: HirundoUrl
|
|
67
75
|
"""
|
|
68
76
|
URL to the directory containing the Keylabs labels.
|
|
69
77
|
"""
|
|
@@ -73,11 +81,11 @@ class Keylabs(Metadata, frozen=True):
|
|
|
73
81
|
Whether to include attributes in the class name.
|
|
74
82
|
"""
|
|
75
83
|
|
|
76
|
-
project_name:
|
|
84
|
+
project_name: str | None = None
|
|
77
85
|
"""
|
|
78
86
|
Keylabs project name (optional; added to output CSV if provided).
|
|
79
87
|
"""
|
|
80
|
-
keylabs_auth:
|
|
88
|
+
keylabs_auth: KeylabsAuth | None = None
|
|
81
89
|
"""
|
|
82
90
|
Keylabs authentication credentials (optional; if provided, used to provide links to each sample).
|
|
83
91
|
"""
|
|
@@ -107,9 +115,9 @@ class KeylabsObjSegVideo(Keylabs, frozen=True):
|
|
|
107
115
|
)
|
|
108
116
|
|
|
109
117
|
|
|
110
|
-
KeylabsInfo =
|
|
111
|
-
KeylabsObjDetImages
|
|
112
|
-
|
|
118
|
+
KeylabsInfo = (
|
|
119
|
+
KeylabsObjDetImages | KeylabsObjDetVideo | KeylabsObjSegImages | KeylabsObjSegVideo
|
|
120
|
+
)
|
|
113
121
|
"""
|
|
114
122
|
The dataset labeling info for Keylabs. The dataset labeling info can be one of the following:
|
|
115
123
|
- `DatasetMetadataType.KeylabsObjDetImages`: Indicates that the dataset metadata file is in the Keylabs object detection image format
|
|
@@ -118,12 +126,7 @@ The dataset labeling info for Keylabs. The dataset labeling info can be one of t
|
|
|
118
126
|
- `DatasetMetadataType.KeylabsObjSegVideo`: Indicates that the dataset metadata file is in the Keylabs object segmentation video format
|
|
119
127
|
"""
|
|
120
128
|
LabelingInfo = typing.Annotated[
|
|
121
|
-
|
|
122
|
-
HirundoCSV,
|
|
123
|
-
COCO,
|
|
124
|
-
YOLO,
|
|
125
|
-
KeylabsInfo,
|
|
126
|
-
],
|
|
129
|
+
HirundoCSV | COCO | YOLO | KeylabsInfo | HuggingFaceAudio,
|
|
127
130
|
Field(discriminator="type"),
|
|
128
131
|
]
|
|
129
132
|
"""
|
hirundo/storage.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import datetime
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
|
|
4
4
|
import pydantic
|
|
@@ -20,13 +20,13 @@ S3_PREFIX = "s3://"
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class StorageS3Base(BaseModel):
|
|
23
|
-
endpoint_url:
|
|
23
|
+
endpoint_url: Url | None = None
|
|
24
24
|
bucket_url: S3BucketUrl
|
|
25
25
|
region_name: str
|
|
26
26
|
# ⬆️ We could restrict this, but if we're allowing custom endpoints then the validation may be wrong
|
|
27
|
-
access_key_id:
|
|
27
|
+
access_key_id: str | None = None
|
|
28
28
|
|
|
29
|
-
def get_url(self, path:
|
|
29
|
+
def get_url(self, path: str | Path) -> Url:
|
|
30
30
|
"""
|
|
31
31
|
Get the full URL for a file in the S3 bucket
|
|
32
32
|
|
|
@@ -45,7 +45,7 @@ class StorageS3Base(BaseModel):
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class StorageS3(StorageS3Base):
|
|
48
|
-
secret_access_key:
|
|
48
|
+
secret_access_key: str | None = None
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
class StorageS3Out(StorageS3Base):
|
|
@@ -56,7 +56,7 @@ class StorageGCPBase(BaseModel):
|
|
|
56
56
|
bucket_name: str
|
|
57
57
|
project: str
|
|
58
58
|
|
|
59
|
-
def get_url(self, path:
|
|
59
|
+
def get_url(self, path: str | Path) -> Url:
|
|
60
60
|
"""
|
|
61
61
|
Get the full URL for a file in the GCP bucket
|
|
62
62
|
|
|
@@ -73,7 +73,7 @@ class StorageGCPBase(BaseModel):
|
|
|
73
73
|
|
|
74
74
|
|
|
75
75
|
class StorageGCP(StorageGCPBase):
|
|
76
|
-
credentials_json:
|
|
76
|
+
credentials_json: dict | None = None
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
class StorageGCPOut(StorageGCPBase):
|
|
@@ -104,9 +104,7 @@ class StorageGCPOut(StorageGCPBase):
|
|
|
104
104
|
# account_url: str
|
|
105
105
|
|
|
106
106
|
|
|
107
|
-
def get_git_repo_url(
|
|
108
|
-
repo_url: typing.Union[str, Url], path: typing.Union[str, Path]
|
|
109
|
-
) -> Url:
|
|
107
|
+
def get_git_repo_url(repo_url: str | Url, path: str | Path) -> Url:
|
|
110
108
|
"""
|
|
111
109
|
Get the full URL for a file in the git repository
|
|
112
110
|
|
|
@@ -127,12 +125,12 @@ def get_git_repo_url(
|
|
|
127
125
|
|
|
128
126
|
|
|
129
127
|
class StorageGit(BaseModel):
|
|
130
|
-
repo_id:
|
|
128
|
+
repo_id: int | None = None
|
|
131
129
|
"""
|
|
132
130
|
The ID of the Git repository in the Hirundo system.
|
|
133
131
|
Either :code:`repo_id` or :code:`repo` must be provided.
|
|
134
132
|
"""
|
|
135
|
-
repo:
|
|
133
|
+
repo: GitRepo | None = None
|
|
136
134
|
"""
|
|
137
135
|
The Git repository to link to.
|
|
138
136
|
Either :code:`repo_id` or :code:`repo` must be provided.
|
|
@@ -148,7 +146,7 @@ class StorageGit(BaseModel):
|
|
|
148
146
|
raise ValueError("Either repo_id or repo must be provided")
|
|
149
147
|
return self
|
|
150
148
|
|
|
151
|
-
def get_url(self, path:
|
|
149
|
+
def get_url(self, path: str | Path) -> Url:
|
|
152
150
|
"""
|
|
153
151
|
Get the full URL for a file in the git repository
|
|
154
152
|
|
|
@@ -171,7 +169,7 @@ class StorageGitOut(BaseModel):
|
|
|
171
169
|
repo: GitRepoOut
|
|
172
170
|
branch: str
|
|
173
171
|
|
|
174
|
-
def get_url(self, path:
|
|
172
|
+
def get_url(self, path: str | Path) -> Url:
|
|
175
173
|
"""
|
|
176
174
|
Get the full URL for a file in the git repository
|
|
177
175
|
|
|
@@ -189,12 +187,12 @@ class StorageGitOut(BaseModel):
|
|
|
189
187
|
|
|
190
188
|
|
|
191
189
|
class StorageConfig(BaseModel):
|
|
192
|
-
id:
|
|
190
|
+
id: int | None = None
|
|
193
191
|
"""
|
|
194
192
|
The ID of the :code:`StorageConfig` in the Hirundo system.
|
|
195
193
|
"""
|
|
196
194
|
|
|
197
|
-
organization_id:
|
|
195
|
+
organization_id: int | None = None
|
|
198
196
|
"""
|
|
199
197
|
The ID of the organization that the :code:`StorageConfig` belongs to.
|
|
200
198
|
If not provided, it will be assigned to your default organization.
|
|
@@ -204,7 +202,7 @@ class StorageConfig(BaseModel):
|
|
|
204
202
|
"""
|
|
205
203
|
A name to identify the :code:`StorageConfig` in the Hirundo system.
|
|
206
204
|
"""
|
|
207
|
-
type:
|
|
205
|
+
type: StorageTypes | None = pydantic.Field(
|
|
208
206
|
examples=[
|
|
209
207
|
StorageTypes.S3,
|
|
210
208
|
StorageTypes.GCP,
|
|
@@ -220,7 +218,7 @@ class StorageConfig(BaseModel):
|
|
|
220
218
|
- :code:`Azure` (coming soon)
|
|
221
219
|
- :code:`Git`
|
|
222
220
|
"""
|
|
223
|
-
s3:
|
|
221
|
+
s3: StorageS3 | None = pydantic.Field(
|
|
224
222
|
default=None,
|
|
225
223
|
examples=[
|
|
226
224
|
{
|
|
@@ -238,7 +236,7 @@ class StorageConfig(BaseModel):
|
|
|
238
236
|
The Amazon Web Services (AWS) S3 storage config details.
|
|
239
237
|
Use this if you want to link to an S3 bucket.
|
|
240
238
|
"""
|
|
241
|
-
gcp:
|
|
239
|
+
gcp: StorageGCP | None = pydantic.Field(
|
|
242
240
|
default=None,
|
|
243
241
|
examples=[
|
|
244
242
|
None,
|
|
@@ -281,7 +279,7 @@ class StorageConfig(BaseModel):
|
|
|
281
279
|
# None,
|
|
282
280
|
# ],
|
|
283
281
|
# ) TODO: Azure storage config is coming soon
|
|
284
|
-
git:
|
|
282
|
+
git: StorageGit | None = pydantic.Field(
|
|
285
283
|
default=None,
|
|
286
284
|
examples=[
|
|
287
285
|
None,
|
|
@@ -340,7 +338,7 @@ class StorageConfig(BaseModel):
|
|
|
340
338
|
|
|
341
339
|
@staticmethod
|
|
342
340
|
def list(
|
|
343
|
-
organization_id:
|
|
341
|
+
organization_id: int | None = None,
|
|
344
342
|
) -> list["ResponseStorageConfig"]:
|
|
345
343
|
"""
|
|
346
344
|
Lists all the :code:`StorageConfig`'s created by user's default organization
|
|
@@ -440,7 +438,10 @@ class ResponseStorageConfig(BaseModel):
|
|
|
440
438
|
type: StorageTypes
|
|
441
439
|
organization_name: str
|
|
442
440
|
creator_name: str
|
|
443
|
-
s3:
|
|
444
|
-
gcp:
|
|
441
|
+
s3: StorageS3Out | None
|
|
442
|
+
gcp: StorageGCPOut | None
|
|
445
443
|
# azure: typing.Optional[StorageAzureOut]
|
|
446
|
-
git:
|
|
444
|
+
git: StorageGitOut | None
|
|
445
|
+
|
|
446
|
+
created_at: datetime.datetime
|
|
447
|
+
updated_at: datetime.datetime
|