hirundo 0.1.16__py3-none-any.whl → 0.1.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hirundo/__init__.py +30 -14
- hirundo/_constraints.py +164 -53
- hirundo/_headers.py +1 -1
- hirundo/_http.py +53 -0
- hirundo/_iter_sse_retrying.py +1 -1
- hirundo/_urls.py +59 -0
- hirundo/cli.py +7 -7
- hirundo/dataset_enum.py +23 -0
- hirundo/{dataset_optimization.py → dataset_qa.py} +195 -168
- hirundo/{dataset_optimization_results.py → dataset_qa_results.py} +4 -4
- hirundo/git.py +2 -3
- hirundo/labeling.py +140 -0
- hirundo/storage.py +43 -60
- hirundo/unzip.py +9 -10
- {hirundo-0.1.16.dist-info → hirundo-0.1.21.dist-info}/METADATA +67 -53
- hirundo-0.1.21.dist-info/RECORD +25 -0
- {hirundo-0.1.16.dist-info → hirundo-0.1.21.dist-info}/WHEEL +1 -1
- hirundo-0.1.16.dist-info/RECORD +0 -23
- {hirundo-0.1.16.dist-info → hirundo-0.1.21.dist-info}/entry_points.txt +0 -0
- {hirundo-0.1.16.dist-info → hirundo-0.1.21.dist-info}/licenses/LICENSE +0 -0
- {hirundo-0.1.16.dist-info → hirundo-0.1.21.dist-info}/top_level.txt +0 -0
|
@@ -21,7 +21,7 @@ if has_polars:
|
|
|
21
21
|
T = typing.TypeVar("T")
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class
|
|
24
|
+
class DatasetQAResults(BaseModel, typing.Generic[T]):
|
|
25
25
|
model_config = {"arbitrary_types_allowed": True}
|
|
26
26
|
|
|
27
27
|
cached_zip_path: Path
|
|
@@ -30,13 +30,13 @@ class DatasetOptimizationResults(BaseModel, typing.Generic[T]):
|
|
|
30
30
|
"""
|
|
31
31
|
suspects: T
|
|
32
32
|
"""
|
|
33
|
-
A polars/pandas DataFrame containing the results of the
|
|
33
|
+
A polars/pandas DataFrame containing the results of the data QA run
|
|
34
34
|
"""
|
|
35
35
|
object_suspects: typing.Optional[T]
|
|
36
36
|
"""
|
|
37
|
-
A polars/pandas DataFrame containing the object-level results of the
|
|
37
|
+
A polars/pandas DataFrame containing the object-level results of the data QA run
|
|
38
38
|
"""
|
|
39
39
|
warnings_and_errors: T
|
|
40
40
|
"""
|
|
41
|
-
A polars/pandas DataFrame containing the warnings and errors of the
|
|
41
|
+
A polars/pandas DataFrame containing the warnings and errors of the data QA run
|
|
42
42
|
"""
|
hirundo/git.py
CHANGED
|
@@ -3,15 +3,14 @@ import re
|
|
|
3
3
|
import typing
|
|
4
4
|
|
|
5
5
|
import pydantic
|
|
6
|
-
import requests
|
|
7
6
|
from pydantic import BaseModel, field_validator
|
|
8
7
|
from pydantic_core import Url
|
|
9
8
|
|
|
10
|
-
from hirundo._constraints import RepoUrl
|
|
11
9
|
from hirundo._env import API_HOST
|
|
12
10
|
from hirundo._headers import get_headers
|
|
13
|
-
from hirundo._http import raise_for_status_with_reason
|
|
11
|
+
from hirundo._http import raise_for_status_with_reason, requests
|
|
14
12
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
13
|
+
from hirundo._urls import RepoUrl
|
|
15
14
|
from hirundo.logger import get_logger
|
|
16
15
|
|
|
17
16
|
logger = get_logger(__name__)
|
hirundo/labeling.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
from abc import ABC
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
from hirundo.dataset_enum import DatasetMetadataType
|
|
7
|
+
|
|
8
|
+
if typing.TYPE_CHECKING:
|
|
9
|
+
from hirundo._urls import HirundoUrl
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Metadata(BaseModel, ABC, frozen=True):
|
|
13
|
+
type: DatasetMetadataType
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class HirundoCSV(Metadata, frozen=True):
|
|
17
|
+
"""
|
|
18
|
+
A dataset metadata file in the Hirundo CSV format
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
type: typing.Literal[DatasetMetadataType.HIRUNDO_CSV] = (
|
|
22
|
+
DatasetMetadataType.HIRUNDO_CSV
|
|
23
|
+
)
|
|
24
|
+
csv_url: "HirundoUrl"
|
|
25
|
+
"""
|
|
26
|
+
The URL to access the dataset metadata CSV file.
|
|
27
|
+
e.g. `s3://my-bucket-name/my-folder/my-metadata.csv`, `gs://my-bucket-name/my-folder/my-metadata.csv`,
|
|
28
|
+
or `ssh://my-username@my-repo-name/my-folder/my-metadata.csv`
|
|
29
|
+
(or `file:///datasets/my-folder/my-metadata.csv` if using LOCAL storage type with on-premises installation)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class COCO(Metadata, frozen=True):
|
|
34
|
+
"""
|
|
35
|
+
A dataset metadata file in the COCO format
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
type: typing.Literal[DatasetMetadataType.COCO] = DatasetMetadataType.COCO
|
|
39
|
+
json_url: "HirundoUrl"
|
|
40
|
+
"""
|
|
41
|
+
The URL to access the dataset metadata JSON file.
|
|
42
|
+
e.g. `s3://my-bucket-name/my-folder/my-metadata.json`, `gs://my-bucket-name/my-folder/my-metadata.json`,
|
|
43
|
+
or `ssh://my-username@my-repo-name/my-folder/my-metadata.json`
|
|
44
|
+
(or `file:///datasets/my-folder/my-metadata.json` if using LOCAL storage type with on-premises installation)
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class YOLO(Metadata, frozen=True):
|
|
49
|
+
type: typing.Literal[DatasetMetadataType.YOLO] = DatasetMetadataType.YOLO
|
|
50
|
+
data_yaml_url: "typing.Optional[HirundoUrl]" = None
|
|
51
|
+
labels_dir_url: "HirundoUrl"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class KeylabsAuth(BaseModel):
|
|
55
|
+
username: str
|
|
56
|
+
password: str
|
|
57
|
+
instance: str
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class Keylabs(Metadata, frozen=True):
|
|
61
|
+
project_id: str
|
|
62
|
+
"""
|
|
63
|
+
Keylabs project ID.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
labels_dir_url: "HirundoUrl"
|
|
67
|
+
"""
|
|
68
|
+
URL to the directory containing the Keylabs labels.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
with_attributes: bool = True
|
|
72
|
+
"""
|
|
73
|
+
Whether to include attributes in the class name.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
project_name: typing.Optional[str] = None
|
|
77
|
+
"""
|
|
78
|
+
Keylabs project name (optional; added to output CSV if provided).
|
|
79
|
+
"""
|
|
80
|
+
keylabs_auth: typing.Optional[KeylabsAuth] = None
|
|
81
|
+
"""
|
|
82
|
+
Keylabs authentication credentials (optional; if provided, used to provide links to each sample).
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class KeylabsObjDetImages(Keylabs, frozen=True):
|
|
87
|
+
type: typing.Literal[DatasetMetadataType.KeylabsObjDetImages] = (
|
|
88
|
+
DatasetMetadataType.KeylabsObjDetImages
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class KeylabsObjDetVideo(Keylabs, frozen=True):
|
|
93
|
+
type: typing.Literal[DatasetMetadataType.KeylabsObjDetVideo] = (
|
|
94
|
+
DatasetMetadataType.KeylabsObjDetVideo
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class KeylabsObjSegImages(Keylabs, frozen=True):
|
|
99
|
+
type: typing.Literal[DatasetMetadataType.KeylabsObjSegImages] = (
|
|
100
|
+
DatasetMetadataType.KeylabsObjSegImages
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class KeylabsObjSegVideo(Keylabs, frozen=True):
|
|
105
|
+
type: typing.Literal[DatasetMetadataType.KeylabsObjSegVideo] = (
|
|
106
|
+
DatasetMetadataType.KeylabsObjSegVideo
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
KeylabsInfo = typing.Union[
|
|
111
|
+
KeylabsObjDetImages, KeylabsObjDetVideo, KeylabsObjSegImages, KeylabsObjSegVideo
|
|
112
|
+
]
|
|
113
|
+
"""
|
|
114
|
+
The dataset labeling info for Keylabs. The dataset labeling info can be one of the following:
|
|
115
|
+
- `DatasetMetadataType.KeylabsObjDetImages`: Indicates that the dataset metadata file is in the Keylabs object detection image format
|
|
116
|
+
- `DatasetMetadataType.KeylabsObjDetVideo`: Indicates that the dataset metadata file is in the Keylabs object detection video format
|
|
117
|
+
- `DatasetMetadataType.KeylabsObjSegImages`: Indicates that the dataset metadata file is in the Keylabs object segmentation image format
|
|
118
|
+
- `DatasetMetadataType.KeylabsObjSegVideo`: Indicates that the dataset metadata file is in the Keylabs object segmentation video format
|
|
119
|
+
"""
|
|
120
|
+
LabelingInfo = typing.Annotated[
|
|
121
|
+
typing.Union[
|
|
122
|
+
HirundoCSV,
|
|
123
|
+
COCO,
|
|
124
|
+
YOLO,
|
|
125
|
+
KeylabsInfo,
|
|
126
|
+
],
|
|
127
|
+
Field(discriminator="type"),
|
|
128
|
+
]
|
|
129
|
+
"""
|
|
130
|
+
The dataset labeling info. The dataset labeling info can be one of the following:
|
|
131
|
+
- `DatasetMetadataType.HirundoCSV`: Indicates that the dataset metadata file is a CSV file with the Hirundo format
|
|
132
|
+
- `DatasetMetadataType.COCO`: Indicates that the dataset metadata file is a JSON file with the COCO format
|
|
133
|
+
- `DatasetMetadataType.YOLO`: Indicates that the dataset metadata file is in the YOLO format
|
|
134
|
+
- `DatasetMetadataType.KeylabsObjDetImages`: Indicates that the dataset metadata file is in the Keylabs object detection image format
|
|
135
|
+
- `DatasetMetadataType.KeylabsObjDetVideo`: Indicates that the dataset metadata file is in the Keylabs object detection video format
|
|
136
|
+
- `DatasetMetadataType.KeylabsObjSegImages`: Indicates that the dataset metadata file is in the Keylabs object segmentation image format
|
|
137
|
+
- `DatasetMetadataType.KeylabsObjSegVideo`: Indicates that the dataset metadata file is in the Keylabs object segmentation video format
|
|
138
|
+
|
|
139
|
+
Currently no other formats are supported. Future versions of `hirundo` may support additional formats.
|
|
140
|
+
"""
|
hirundo/storage.py
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
import typing
|
|
2
|
-
from enum import Enum
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
4
|
import pydantic
|
|
6
|
-
import requests
|
|
7
5
|
from pydantic import BaseModel, model_validator
|
|
8
6
|
from pydantic_core import Url
|
|
9
7
|
|
|
10
|
-
from hirundo._constraints import S3BucketUrl, StorageConfigName
|
|
11
8
|
from hirundo._env import API_HOST
|
|
12
9
|
from hirundo._headers import get_headers
|
|
13
|
-
from hirundo._http import raise_for_status_with_reason
|
|
10
|
+
from hirundo._http import raise_for_status_with_reason, requests
|
|
14
11
|
from hirundo._timeouts import MODIFY_TIMEOUT, READ_TIMEOUT
|
|
12
|
+
from hirundo._urls import S3BucketUrl, StorageConfigName
|
|
13
|
+
from hirundo.dataset_enum import StorageTypes
|
|
15
14
|
from hirundo.git import GitRepo, GitRepoOut
|
|
16
15
|
from hirundo.logger import get_logger
|
|
17
16
|
|
|
@@ -34,11 +33,11 @@ class StorageS3Base(BaseModel):
|
|
|
34
33
|
Chains the bucket URL with the path, ensuring that the path is formatted correctly
|
|
35
34
|
|
|
36
35
|
Args:
|
|
37
|
-
path: The path to the file in the S3 bucket, e.g.
|
|
36
|
+
path: The path to the file in the S3 bucket, e.g. :file:`my-file.txt` or :file:`/my-folder/my-file.txt`
|
|
38
37
|
|
|
39
38
|
Returns:
|
|
40
|
-
The full URL to the file in the S3 bucket, e.g.
|
|
41
|
-
where
|
|
39
|
+
The full URL to the file in the S3 bucket, e.g. :file:`s3://my-bucket/my-file.txt` or :file:`s3://my-bucket/my-folder/my-file.txt`,
|
|
40
|
+
where :file:`s3://my-bucket` is the bucket URL provided in the S3 storage config
|
|
42
41
|
"""
|
|
43
42
|
return Url(
|
|
44
43
|
f"{S3_PREFIX}{self.bucket_url.removeprefix(S3_PREFIX).removesuffix('/')}/{str(path).removeprefix('/')}"
|
|
@@ -64,11 +63,11 @@ class StorageGCPBase(BaseModel):
|
|
|
64
63
|
Chains the bucket URL with the path, ensuring that the path is formatted correctly
|
|
65
64
|
|
|
66
65
|
Args:
|
|
67
|
-
path: The path to the file in the GCP bucket, e.g.
|
|
66
|
+
path: The path to the file in the GCP bucket, e.g. :file:`my-file.txt` or :file:`/my-folder/my-file.txt`
|
|
68
67
|
|
|
69
68
|
Returns:
|
|
70
|
-
The full URL to the file in the GCP bucket, e.g.
|
|
71
|
-
where
|
|
69
|
+
The full URL to the file in the GCP bucket, e.g. :file:`gs://my-bucket/my-file.txt` or :file:`gs://my-bucket/my-folder/my-file.txt`,
|
|
70
|
+
where :file:`my-bucket` is the bucket name provided in the GCP storage config
|
|
72
71
|
"""
|
|
73
72
|
return Url(f"gs://{self.bucket_name}/{str(path).removeprefix('/')}")
|
|
74
73
|
|
|
@@ -94,7 +93,7 @@ class StorageGCPOut(StorageGCPBase):
|
|
|
94
93
|
# Chains the container URL with the path, ensuring that the path is formatted correctly
|
|
95
94
|
|
|
96
95
|
# Args:
|
|
97
|
-
# path: The path to the file in the Azure container, e.g.
|
|
96
|
+
# path: The path to the file in the Azure container, e.g. :file:`my-file.txt` or :file:`/my-folder/my-file.txt`
|
|
98
97
|
|
|
99
98
|
# Returns:
|
|
100
99
|
# The full URL to the file in the Azure container
|
|
@@ -114,11 +113,11 @@ def get_git_repo_url(
|
|
|
114
113
|
Chains the repository URL with the path, ensuring that the path is formatted correctly
|
|
115
114
|
|
|
116
115
|
Args:
|
|
117
|
-
repo_url: The URL of the git repository, e.g.
|
|
118
|
-
path: The path to the file in the git repository, e.g.
|
|
116
|
+
repo_url: The URL of the git repository, e.g. :file:`https://my-git-repository.com`
|
|
117
|
+
path: The path to the file in the git repository, e.g. :file:`my-file.txt` or :file:`/my-folder/my-file.txt`
|
|
119
118
|
|
|
120
119
|
Returns:
|
|
121
|
-
The full URL to the file in the git repository, e.g.
|
|
120
|
+
The full URL to the file in the git repository, e.g. :file:`https://my-git-repository.com/my-file.txt` or :file:`https://my-git-repository.com/my-folder/my-file.txt`
|
|
122
121
|
"""
|
|
123
122
|
if not isinstance(repo_url, Url):
|
|
124
123
|
repo_url = Url(repo_url)
|
|
@@ -131,12 +130,12 @@ class StorageGit(BaseModel):
|
|
|
131
130
|
repo_id: typing.Optional[int] = None
|
|
132
131
|
"""
|
|
133
132
|
The ID of the Git repository in the Hirundo system.
|
|
134
|
-
Either
|
|
133
|
+
Either :code:`repo_id` or :code:`repo` must be provided.
|
|
135
134
|
"""
|
|
136
135
|
repo: typing.Optional[GitRepo] = None
|
|
137
136
|
"""
|
|
138
137
|
The Git repository to link to.
|
|
139
|
-
Either
|
|
138
|
+
Either :code:`repo_id` or :code:`repo` must be provided.
|
|
140
139
|
"""
|
|
141
140
|
branch: str
|
|
142
141
|
"""
|
|
@@ -156,11 +155,11 @@ class StorageGit(BaseModel):
|
|
|
156
155
|
Chains the repository URL with the path, ensuring that the path is formatted correctly
|
|
157
156
|
|
|
158
157
|
Args:
|
|
159
|
-
path: The path to the file in the git repository, e.g.
|
|
158
|
+
path: The path to the file in the git repository, e.g. :file:`my-file.txt` or :file:`/my-folder/my-file.txt`
|
|
160
159
|
|
|
161
160
|
Returns:
|
|
162
|
-
The full URL to the file in the git repository, e.g.
|
|
163
|
-
where
|
|
161
|
+
The full URL to the file in the git repository, e.g. :file:`https://my-git-repository.com/my-file.txt` or :file:`https://my-git-repository.com/my-folder/my-file.txt`,
|
|
162
|
+
where :file:`https://my-git-repository.com` is the repository URL provided in the git storage config's git repo
|
|
164
163
|
"""
|
|
165
164
|
if not self.repo:
|
|
166
165
|
raise ValueError("Repo must be provided to use `get_url`")
|
|
@@ -179,47 +178,31 @@ class StorageGitOut(BaseModel):
|
|
|
179
178
|
Chains the repository URL with the path, ensuring that the path is formatted correctly
|
|
180
179
|
|
|
181
180
|
Args:
|
|
182
|
-
path: The path to the file in the git repository, e.g.
|
|
181
|
+
path: The path to the file in the git repository, e.g. :file:`my-file.txt` or :file:`/my-folder/my-file.txt`
|
|
183
182
|
|
|
184
183
|
Returns:
|
|
185
|
-
The full URL to the file in the git repository, e.g.
|
|
186
|
-
where
|
|
184
|
+
The full URL to the file in the git repository, e.g. :file:`https://my-git-repository.com/my-file.txt` or :file:`https://my-git-repository.com/my-folder/my-file.txt`,
|
|
185
|
+
where :file:`https://my-git-repository.com` is the repository URL provided in the git storage config's git repo
|
|
187
186
|
"""
|
|
188
187
|
repo_url = self.repo.repository_url
|
|
189
188
|
return get_git_repo_url(repo_url, path)
|
|
190
189
|
|
|
191
190
|
|
|
192
|
-
class StorageTypes(str, Enum):
|
|
193
|
-
"""
|
|
194
|
-
Enum for the different types of storage configs.
|
|
195
|
-
Supported types are:
|
|
196
|
-
"""
|
|
197
|
-
|
|
198
|
-
S3 = "S3"
|
|
199
|
-
GCP = "GCP"
|
|
200
|
-
# AZURE = "Azure" TODO: Azure storage config is coming soon
|
|
201
|
-
GIT = "Git"
|
|
202
|
-
LOCAL = "Local"
|
|
203
|
-
"""
|
|
204
|
-
Local storage config is only supported for on-premises installations.
|
|
205
|
-
"""
|
|
206
|
-
|
|
207
|
-
|
|
208
191
|
class StorageConfig(BaseModel):
|
|
209
192
|
id: typing.Optional[int] = None
|
|
210
193
|
"""
|
|
211
|
-
The ID of the
|
|
194
|
+
The ID of the :code:`StorageConfig` in the Hirundo system.
|
|
212
195
|
"""
|
|
213
196
|
|
|
214
197
|
organization_id: typing.Optional[int] = None
|
|
215
198
|
"""
|
|
216
|
-
The ID of the organization that the
|
|
199
|
+
The ID of the organization that the :code:`StorageConfig` belongs to.
|
|
217
200
|
If not provided, it will be assigned to your default organization.
|
|
218
201
|
"""
|
|
219
202
|
|
|
220
203
|
name: StorageConfigName
|
|
221
204
|
"""
|
|
222
|
-
A name to identify the
|
|
205
|
+
A name to identify the :code:`StorageConfig` in the Hirundo system.
|
|
223
206
|
"""
|
|
224
207
|
type: typing.Optional[StorageTypes] = pydantic.Field(
|
|
225
208
|
examples=[
|
|
@@ -230,12 +213,12 @@ class StorageConfig(BaseModel):
|
|
|
230
213
|
]
|
|
231
214
|
)
|
|
232
215
|
"""
|
|
233
|
-
The type of the
|
|
216
|
+
The type of the :code:`StorageConfig`.
|
|
234
217
|
Supported types are:
|
|
235
|
-
-
|
|
236
|
-
-
|
|
237
|
-
-
|
|
238
|
-
-
|
|
218
|
+
- :code:`S3`
|
|
219
|
+
- :code:`GCP`
|
|
220
|
+
- :code:`Azure` (coming soon)
|
|
221
|
+
- :code:`Git`
|
|
239
222
|
"""
|
|
240
223
|
s3: typing.Optional[StorageS3] = pydantic.Field(
|
|
241
224
|
default=None,
|
|
@@ -323,10 +306,10 @@ class StorageConfig(BaseModel):
|
|
|
323
306
|
@staticmethod
|
|
324
307
|
def get_by_id(storage_config_id: int) -> "ResponseStorageConfig":
|
|
325
308
|
"""
|
|
326
|
-
Retrieves a
|
|
309
|
+
Retrieves a :code:`StorageConfig` instance from the server by its ID
|
|
327
310
|
|
|
328
311
|
Args:
|
|
329
|
-
storage_config_id: The ID of the
|
|
312
|
+
storage_config_id: The ID of the :code:`StorageConfig` to retrieve
|
|
330
313
|
"""
|
|
331
314
|
storage_config = requests.get(
|
|
332
315
|
f"{API_HOST}/storage-config/{storage_config_id}",
|
|
@@ -339,11 +322,11 @@ class StorageConfig(BaseModel):
|
|
|
339
322
|
@staticmethod
|
|
340
323
|
def get_by_name(name: str, storage_type: StorageTypes) -> "ResponseStorageConfig":
|
|
341
324
|
"""
|
|
342
|
-
Retrieves a
|
|
325
|
+
Retrieves a :code:`StorageConfig` instance from the server by its name
|
|
343
326
|
|
|
344
327
|
Args:
|
|
345
|
-
name: The name of the
|
|
346
|
-
storage_type: The type of the
|
|
328
|
+
name: The name of the :code:`StorageConfig` to retrieve
|
|
329
|
+
storage_type: The type of the :code:`StorageConfig` to retrieve
|
|
347
330
|
|
|
348
331
|
Note: The type is required because the name is not unique across different storage types
|
|
349
332
|
"""
|
|
@@ -360,12 +343,12 @@ class StorageConfig(BaseModel):
|
|
|
360
343
|
organization_id: typing.Optional[int] = None,
|
|
361
344
|
) -> list["ResponseStorageConfig"]:
|
|
362
345
|
"""
|
|
363
|
-
Lists all the
|
|
364
|
-
Note: The return type is
|
|
346
|
+
Lists all the :code:`StorageConfig`'s created by user's default organization
|
|
347
|
+
Note: The return type is :code:`list[dict]` and not :code:`list[StorageConfig]`
|
|
365
348
|
|
|
366
349
|
Args:
|
|
367
|
-
organization_id: The ID of the organization to list
|
|
368
|
-
If not provided, it will list
|
|
350
|
+
organization_id: The ID of the organization to list :code:`StorageConfig`'s for.
|
|
351
|
+
If not provided, it will list :code:`StorageConfig`'s for the default organization.
|
|
369
352
|
"""
|
|
370
353
|
storage_configs = requests.get(
|
|
371
354
|
f"{API_HOST}/storage-config/",
|
|
@@ -379,10 +362,10 @@ class StorageConfig(BaseModel):
|
|
|
379
362
|
@staticmethod
|
|
380
363
|
def delete_by_id(storage_config_id) -> None:
|
|
381
364
|
"""
|
|
382
|
-
Deletes a
|
|
365
|
+
Deletes a :code:`StorageConfig` instance from the server by its ID
|
|
383
366
|
|
|
384
367
|
Args:
|
|
385
|
-
storage_config_id: The ID of the
|
|
368
|
+
storage_config_id: The ID of the :code:`StorageConfig` to delete
|
|
386
369
|
"""
|
|
387
370
|
storage_config = requests.delete(
|
|
388
371
|
f"{API_HOST}/storage-config/{storage_config_id}",
|
|
@@ -394,7 +377,7 @@ class StorageConfig(BaseModel):
|
|
|
394
377
|
|
|
395
378
|
def delete(self) -> None:
|
|
396
379
|
"""
|
|
397
|
-
Deletes the
|
|
380
|
+
Deletes the :code:`StorageConfig` instance from the server
|
|
398
381
|
"""
|
|
399
382
|
if not self.id:
|
|
400
383
|
raise ValueError("No StorageConfig has been created")
|
|
@@ -402,10 +385,10 @@ class StorageConfig(BaseModel):
|
|
|
402
385
|
|
|
403
386
|
def create(self, replace_if_exists: bool = False) -> int:
|
|
404
387
|
"""
|
|
405
|
-
Create a
|
|
388
|
+
Create a :code:`StorageConfig` instance on the server
|
|
406
389
|
|
|
407
390
|
Args:
|
|
408
|
-
replace_if_exists: If a
|
|
391
|
+
replace_if_exists: If a :code:`StorageConfig` with the same name and type already exists, replace it.
|
|
409
392
|
"""
|
|
410
393
|
if self.git and self.git.repo:
|
|
411
394
|
self.git.repo_id = self.git.repo.create(replace_if_exists=replace_if_exists)
|
hirundo/unzip.py
CHANGED
|
@@ -4,7 +4,6 @@ from collections.abc import Mapping
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import IO, cast
|
|
6
6
|
|
|
7
|
-
import requests
|
|
8
7
|
from pydantic_core import Url
|
|
9
8
|
|
|
10
9
|
from hirundo._dataframe import (
|
|
@@ -18,10 +17,11 @@ from hirundo._dataframe import (
|
|
|
18
17
|
)
|
|
19
18
|
from hirundo._env import API_HOST
|
|
20
19
|
from hirundo._headers import _get_auth_headers
|
|
20
|
+
from hirundo._http import requests
|
|
21
21
|
from hirundo._timeouts import DOWNLOAD_READ_TIMEOUT
|
|
22
|
-
from hirundo.
|
|
22
|
+
from hirundo.dataset_qa_results import (
|
|
23
23
|
DataFrameType,
|
|
24
|
-
|
|
24
|
+
DatasetQAResults,
|
|
25
25
|
)
|
|
26
26
|
from hirundo.logger import get_logger
|
|
27
27
|
|
|
@@ -117,7 +117,7 @@ def get_mislabel_suspect_filename(filenames: list[str]):
|
|
|
117
117
|
|
|
118
118
|
def download_and_extract_zip(
|
|
119
119
|
run_id: str, zip_url: str
|
|
120
|
-
) ->
|
|
120
|
+
) -> DatasetQAResults[DataFrameType]:
|
|
121
121
|
"""
|
|
122
122
|
Download and extract the zip file from the given URL.
|
|
123
123
|
|
|
@@ -127,11 +127,11 @@ def download_and_extract_zip(
|
|
|
127
127
|
and `warnings_and_errors.csv` files from the zip file.
|
|
128
128
|
|
|
129
129
|
Args:
|
|
130
|
-
run_id: The ID of the
|
|
130
|
+
run_id: The ID of the dataset QA run.
|
|
131
131
|
zip_url: The URL of the zip file to download.
|
|
132
132
|
|
|
133
133
|
Returns:
|
|
134
|
-
The dataset
|
|
134
|
+
The dataset QA results object.
|
|
135
135
|
"""
|
|
136
136
|
# Define the local file path
|
|
137
137
|
cache_dir = Path.home() / ".hirundo" / "cache"
|
|
@@ -140,9 +140,8 @@ def download_and_extract_zip(
|
|
|
140
140
|
|
|
141
141
|
headers = None
|
|
142
142
|
if Url(zip_url).scheme == "file":
|
|
143
|
-
zip_url = (
|
|
144
|
-
|
|
145
|
-
+ zip_url.replace("file://", "")
|
|
143
|
+
zip_url = f"{API_HOST}/dataset-qa/run/local-download" + zip_url.replace(
|
|
144
|
+
"file://", ""
|
|
146
145
|
)
|
|
147
146
|
headers = _get_auth_headers()
|
|
148
147
|
# Stream the zip file download
|
|
@@ -217,7 +216,7 @@ def download_and_extract_zip(
|
|
|
217
216
|
"Failed to load warnings and errors into DataFrame", exc_info=e
|
|
218
217
|
)
|
|
219
218
|
|
|
220
|
-
return
|
|
219
|
+
return DatasetQAResults[DataFrameType](
|
|
221
220
|
cached_zip_path=zip_file_path,
|
|
222
221
|
suspects=suspects_df,
|
|
223
222
|
object_suspects=object_suspects_df,
|