intellif-aihub 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of intellif-aihub might be problematic. Click here for more details.
- aihub/__init__.py +1 -1
- aihub/client.py +91 -87
- aihub/exceptions.py +18 -18
- aihub/models/artifact.py +137 -137
- aihub/models/common.py +13 -13
- aihub/models/dataset_management.py +99 -99
- aihub/models/document_center.py +28 -28
- aihub/models/labelfree.py +31 -31
- aihub/models/model_training_platform.py +230 -0
- aihub/models/tag_resource_management.py +50 -0
- aihub/models/task_center.py +117 -117
- aihub/models/user_system.py +262 -0
- aihub/services/artifact.py +353 -332
- aihub/services/dataset_management.py +240 -240
- aihub/services/document_center.py +43 -43
- aihub/services/labelfree.py +44 -44
- aihub/services/model_training_platform.py +135 -0
- aihub/services/quota_schedule_management.py +18 -18
- aihub/services/reporter.py +20 -20
- aihub/services/tag_resource_management.py +55 -0
- aihub/services/task_center.py +190 -190
- aihub/services/user_system.py +339 -0
- aihub/utils/download.py +69 -69
- aihub/utils/http.py +13 -13
- aihub/utils/s3.py +77 -77
- {intellif_aihub-0.1.2.dist-info → intellif_aihub-0.1.3.dist-info}/METADATA +2 -2
- intellif_aihub-0.1.3.dist-info/RECORD +34 -0
- {intellif_aihub-0.1.2.dist-info → intellif_aihub-0.1.3.dist-info}/licenses/LICENSE +200 -200
- aihub/models/tag_management.py +0 -21
- aihub/models/user.py +0 -46
- aihub/services/tag_management.py +0 -35
- aihub/services/user.py +0 -47
- intellif_aihub-0.1.2.dist-info/RECORD +0 -32
- {intellif_aihub-0.1.2.dist-info → intellif_aihub-0.1.3.dist-info}/WHEEL +0 -0
- {intellif_aihub-0.1.2.dist-info → intellif_aihub-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
# !/usr/bin/env python
|
|
2
|
+
# -*-coding:utf-8 -*-
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
|
|
7
|
+
from ..exceptions import APIError
|
|
8
|
+
from ..models.common import APIWrapper
|
|
9
|
+
from ..models.user_system import *
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class UserSystemService:
|
|
13
|
+
|
|
14
|
+
def __init__(self, http: httpx.Client):
|
|
15
|
+
self._auth = _Auth(http)
|
|
16
|
+
self._menu = _Menu(http)
|
|
17
|
+
self._role = _Role(http)
|
|
18
|
+
self._user = _User(http)
|
|
19
|
+
|
|
20
|
+
# ==================================================
|
|
21
|
+
# AUTH 一级方法
|
|
22
|
+
# ==================================================
|
|
23
|
+
def login(self, payload: LoginRequest) -> LoginResponse:
|
|
24
|
+
return self._auth.login(payload)
|
|
25
|
+
|
|
26
|
+
def signup(self, payload: SignupRequest) -> SignupResponse:
|
|
27
|
+
return self._auth.signup(payload)
|
|
28
|
+
|
|
29
|
+
# ==================================================
|
|
30
|
+
# MENU 一级方法
|
|
31
|
+
# ==================================================
|
|
32
|
+
def list_menus(self, need_roles: bool = False) -> ListMenusResponse:
|
|
33
|
+
return self._menu.list(need_roles)
|
|
34
|
+
|
|
35
|
+
def get_menu(self, menu_id: int) -> Menu:
|
|
36
|
+
return self._menu.get(menu_id)
|
|
37
|
+
|
|
38
|
+
def create_menu(self, payload: CreateMenuRequest) -> int:
|
|
39
|
+
return self._menu.create(payload)
|
|
40
|
+
|
|
41
|
+
def update_menu(self, menu_id: int, payload: UpdateMenuRequest) -> None:
|
|
42
|
+
self._menu.update(menu_id, payload)
|
|
43
|
+
|
|
44
|
+
def delete_menu(self, menu_id: int) -> None:
|
|
45
|
+
self._menu.delete(menu_id)
|
|
46
|
+
|
|
47
|
+
def get_menu_roles(self, menu_id: int) -> List[int]:
|
|
48
|
+
return self._menu.get_roles(menu_id)
|
|
49
|
+
|
|
50
|
+
def set_menu_roles(self, menu_id: int, role_ids: List[int]) -> None:
|
|
51
|
+
self._menu.set_roles(menu_id, role_ids)
|
|
52
|
+
|
|
53
|
+
# ==================================================
|
|
54
|
+
# ROLE 一级方法
|
|
55
|
+
# ==================================================
|
|
56
|
+
def list_roles(self, payload: ListRolesRequest) -> ListRolesResponse:
|
|
57
|
+
return self._role.list(payload)
|
|
58
|
+
|
|
59
|
+
def get_role(self, role_id: int) -> Role:
|
|
60
|
+
return self._role.get(role_id)
|
|
61
|
+
|
|
62
|
+
def create_role(self, payload: CreateRoleRequest) -> int:
|
|
63
|
+
return self._role.create(payload)
|
|
64
|
+
|
|
65
|
+
def update_role(self, role_id: int, payload: UpdateRoleRequest) -> None:
|
|
66
|
+
self._role.update(role_id, payload)
|
|
67
|
+
|
|
68
|
+
def delete_role(self, role_id: int) -> None:
|
|
69
|
+
self._role.delete(role_id)
|
|
70
|
+
|
|
71
|
+
def get_role_menus(self, role_id: int) -> List[int]:
|
|
72
|
+
return self._role.get_menus(role_id)
|
|
73
|
+
|
|
74
|
+
def set_role_menus(self, role_id: int, menu_ids: List[int]) -> None:
|
|
75
|
+
self._role.set_menus(role_id, menu_ids)
|
|
76
|
+
|
|
77
|
+
def search_roles(self, payload: SearchRolesRequest) -> SearchRolesResponse:
|
|
78
|
+
return self._role.search(payload)
|
|
79
|
+
|
|
80
|
+
# ==================================================
|
|
81
|
+
# USER 一级方法
|
|
82
|
+
# ==================================================
|
|
83
|
+
def list_users(self, payload: ListUsersRequest) -> ListUsersResponse:
|
|
84
|
+
return self._user.list(payload)
|
|
85
|
+
|
|
86
|
+
def get_user(self, user_id: int) -> User:
|
|
87
|
+
return self._user.get(user_id)
|
|
88
|
+
|
|
89
|
+
def create_user(self, payload: CreateUserRequest) -> int:
|
|
90
|
+
return self._user.create(payload)
|
|
91
|
+
|
|
92
|
+
def update_user(self, user_id: int, payload: UpdateUserRequest) -> None:
|
|
93
|
+
self._user.update(user_id, payload)
|
|
94
|
+
|
|
95
|
+
def delete_user(self, user_id: int) -> None:
|
|
96
|
+
self._user.delete(user_id)
|
|
97
|
+
|
|
98
|
+
def set_user_roles(self, user_id: int, payload: SetUserRolesRequest) -> None:
|
|
99
|
+
self._user.set_roles(user_id, payload)
|
|
100
|
+
|
|
101
|
+
def get_user_menus(self, user_id: int, parent_id: int | None = None, auth: str | None = None, ) -> List[TreeMenu]:
|
|
102
|
+
return self._user.get_menus(user_id, parent_id=parent_id, auth=auth)
|
|
103
|
+
|
|
104
|
+
def search_users(self, payload: SearchUsersRequest) -> SearchUsersResponse:
|
|
105
|
+
return self._user.search(payload)
|
|
106
|
+
|
|
107
|
+
def search_one(self, payload: SearchUsersRequest) -> int:
|
|
108
|
+
return self._user.search_one(payload)
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def auth(self) -> _Auth:
|
|
112
|
+
return self._auth
|
|
113
|
+
|
|
114
|
+
@property
|
|
115
|
+
def menu(self) -> _Menu:
|
|
116
|
+
return self._menu
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def role(self) -> _Role:
|
|
120
|
+
return self._role
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def user(self) -> _User:
|
|
124
|
+
return self._user
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class _Auth:
|
|
128
|
+
_base = "/api/v1/auth"
|
|
129
|
+
|
|
130
|
+
def __init__(self, http: httpx.Client):
|
|
131
|
+
self._http = http
|
|
132
|
+
|
|
133
|
+
def login(self, req: LoginRequest) -> LoginResponse:
|
|
134
|
+
resp = self._http.post(
|
|
135
|
+
f"{self._base}/login",
|
|
136
|
+
json=req.model_dump(by_alias=True, exclude_none=True),
|
|
137
|
+
)
|
|
138
|
+
wrapper = APIWrapper[LoginResponse].model_validate(resp.json())
|
|
139
|
+
if wrapper.code != 0:
|
|
140
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
141
|
+
return wrapper.data
|
|
142
|
+
|
|
143
|
+
def signup(self, req: SignupRequest) -> SignupResponse:
|
|
144
|
+
resp = self._http.post(
|
|
145
|
+
f"{self._base}/signup",
|
|
146
|
+
json=req.model_dump(by_alias=True, exclude_none=True),
|
|
147
|
+
)
|
|
148
|
+
wrapper = APIWrapper[SignupResponse].model_validate(resp.json())
|
|
149
|
+
if wrapper.code != 0:
|
|
150
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
151
|
+
return wrapper.data
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class _Menu:
|
|
155
|
+
_base = "/api/v1/menus"
|
|
156
|
+
|
|
157
|
+
def __init__(self, http: httpx.Client):
|
|
158
|
+
self._http = http
|
|
159
|
+
|
|
160
|
+
def list(self, need_roles: bool) -> ListMenusResponse:
|
|
161
|
+
resp = self._http.get(self._base, params={"need_roles": str(need_roles).lower()})
|
|
162
|
+
wrapper = APIWrapper[ListMenusResponse].model_validate(resp.json())
|
|
163
|
+
if wrapper.code != 0:
|
|
164
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
165
|
+
return wrapper.data
|
|
166
|
+
|
|
167
|
+
def get(self, menu_id: int) -> Menu:
|
|
168
|
+
resp = self._http.get(f"{self._base}/{menu_id}")
|
|
169
|
+
wrapper = APIWrapper[Menu].model_validate(resp.json())
|
|
170
|
+
if wrapper.code != 0:
|
|
171
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
172
|
+
return wrapper.data
|
|
173
|
+
|
|
174
|
+
def create(self, req: CreateMenuRequest) -> int:
|
|
175
|
+
resp = self._http.post(self._base, json=req.model_dump(by_alias=True, exclude_none=True))
|
|
176
|
+
wrapper = APIWrapper[CreateMenuResponse].model_validate(resp.json())
|
|
177
|
+
if wrapper.code != 0:
|
|
178
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
179
|
+
return wrapper.data.id
|
|
180
|
+
|
|
181
|
+
def update(self, menu_id: int, req: UpdateMenuRequest) -> None:
|
|
182
|
+
resp = self._http.put(f"{self._base}/{menu_id}", json=req.model_dump(by_alias=True, exclude_none=True))
|
|
183
|
+
wrapper = APIWrapper[dict].model_validate(resp.json())
|
|
184
|
+
if wrapper.code != 0:
|
|
185
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
186
|
+
|
|
187
|
+
def delete(self, menu_id: int) -> None:
|
|
188
|
+
resp = self._http.delete(f"{self._base}/{menu_id}")
|
|
189
|
+
wrapper = APIWrapper[dict].model_validate(resp.json())
|
|
190
|
+
if wrapper.code != 0:
|
|
191
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
192
|
+
|
|
193
|
+
def get_roles(self, menu_id: int) -> List[int]:
|
|
194
|
+
resp = self._http.get(f"{self._base}/{menu_id}/roles")
|
|
195
|
+
wrapper = APIWrapper[GetMenuRolesResponse].model_validate(resp.json())
|
|
196
|
+
if wrapper.code != 0:
|
|
197
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
198
|
+
return wrapper.data.role_ids
|
|
199
|
+
|
|
200
|
+
def set_roles(self, menu_id: int, role_ids: List[int]) -> None:
|
|
201
|
+
resp = self._http.put(f"{self._base}/{menu_id}/roles", json={"role_ids": role_ids})
|
|
202
|
+
wrapper = APIWrapper[dict].model_validate(resp.json())
|
|
203
|
+
if wrapper.code != 0:
|
|
204
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class _Role:
|
|
208
|
+
_base = "/api/v1/roles"
|
|
209
|
+
_search = "/api/v1/search-roles"
|
|
210
|
+
|
|
211
|
+
def __init__(self, http: httpx.Client):
|
|
212
|
+
self._http = http
|
|
213
|
+
|
|
214
|
+
def list(self, req: ListRolesRequest) -> ListRolesResponse:
|
|
215
|
+
resp = self._http.get(self._base, params=req.model_dump(by_alias=True, exclude_none=True))
|
|
216
|
+
wrapper = APIWrapper[ListRolesResponse].model_validate(resp.json())
|
|
217
|
+
if wrapper.code != 0:
|
|
218
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
219
|
+
return wrapper.data
|
|
220
|
+
|
|
221
|
+
def get(self, role_id: int) -> Role:
|
|
222
|
+
resp = self._http.get(f"{self._base}/{role_id}")
|
|
223
|
+
wrapper = APIWrapper[Role].model_validate(resp.json())
|
|
224
|
+
if wrapper.code != 0:
|
|
225
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
226
|
+
return wrapper.data
|
|
227
|
+
|
|
228
|
+
def create(self, req: CreateRoleRequest) -> int:
|
|
229
|
+
resp = self._http.post(self._base, json=req.model_dump(by_alias=True, exclude_none=True))
|
|
230
|
+
wrapper = APIWrapper[CreateRoleResponse].model_validate(resp.json())
|
|
231
|
+
if wrapper.code != 0:
|
|
232
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
233
|
+
return wrapper.data.id
|
|
234
|
+
|
|
235
|
+
def update(self, role_id: int, req: UpdateRoleRequest) -> None:
|
|
236
|
+
resp = self._http.put(f"{self._base}/{role_id}", json=req.model_dump(by_alias=True, exclude_none=True))
|
|
237
|
+
wrapper = APIWrapper[dict].model_validate(resp.json())
|
|
238
|
+
if wrapper.code != 0:
|
|
239
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
240
|
+
|
|
241
|
+
def delete(self, role_id: int) -> None:
|
|
242
|
+
resp = self._http.delete(f"{self._base}/{role_id}")
|
|
243
|
+
wrapper = APIWrapper[dict].model_validate(resp.json())
|
|
244
|
+
if wrapper.code != 0:
|
|
245
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
246
|
+
|
|
247
|
+
def get_menus(self, role_id: int) -> List[int]:
|
|
248
|
+
resp = self._http.get(f"{self._base}/{role_id}/menus")
|
|
249
|
+
wrapper = APIWrapper[GetRoleMenusResponse].model_validate(resp.json())
|
|
250
|
+
if wrapper.code != 0:
|
|
251
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
252
|
+
return wrapper.data.menu_ids
|
|
253
|
+
|
|
254
|
+
def set_menus(self, role_id: int, menu_ids: List[int]) -> None:
|
|
255
|
+
resp = self._http.put(f"{self._base}/{role_id}/menus", json={"menu_ids": menu_ids})
|
|
256
|
+
wrapper = APIWrapper[dict].model_validate(resp.json())
|
|
257
|
+
if wrapper.code != 0:
|
|
258
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
259
|
+
|
|
260
|
+
def search(self, req: SearchRolesRequest) -> SearchRolesResponse:
|
|
261
|
+
resp = self._http.post(self._search, json=req.model_dump(by_alias=True, exclude_none=True))
|
|
262
|
+
wrapper = APIWrapper[SearchRolesResponse].model_validate(resp.json())
|
|
263
|
+
if wrapper.code != 0:
|
|
264
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
265
|
+
return wrapper.data
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
class _User:
|
|
269
|
+
_base = "/api/v1/users"
|
|
270
|
+
_search = "/api/v1/search-users"
|
|
271
|
+
|
|
272
|
+
def __init__(self, http: httpx.Client):
|
|
273
|
+
self._http = http
|
|
274
|
+
|
|
275
|
+
def list(self, req: ListUsersRequest) -> ListUsersResponse:
|
|
276
|
+
resp = self._http.get(self._base, params=req.model_dump(by_alias=True, exclude_none=True))
|
|
277
|
+
wrapper = APIWrapper[ListUsersResponse].model_validate(resp.json())
|
|
278
|
+
if wrapper.code != 0:
|
|
279
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
280
|
+
return wrapper.data
|
|
281
|
+
|
|
282
|
+
def get(self, user_id: int) -> User:
|
|
283
|
+
resp = self._http.get(f"{self._base}/{user_id}")
|
|
284
|
+
wrapper = APIWrapper[User].model_validate(resp.json())
|
|
285
|
+
if wrapper.code != 0:
|
|
286
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
287
|
+
return wrapper.data
|
|
288
|
+
|
|
289
|
+
def create(self, req: CreateUserRequest) -> int:
|
|
290
|
+
resp = self._http.post(self._base, json=req.model_dump(by_alias=True, exclude_none=True))
|
|
291
|
+
wrapper = APIWrapper[CreateUserResponse].model_validate(resp.json())
|
|
292
|
+
if wrapper.code != 0:
|
|
293
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
294
|
+
return wrapper.data.id
|
|
295
|
+
|
|
296
|
+
def update(self, user_id: int, req: UpdateUserRequest) -> None:
|
|
297
|
+
resp = self._http.put(f"{self._base}/{user_id}", json=req.model_dump(by_alias=True, exclude_none=True))
|
|
298
|
+
wrapper = APIWrapper[dict].model_validate(resp.json())
|
|
299
|
+
if wrapper.code != 0:
|
|
300
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
301
|
+
|
|
302
|
+
def delete(self, user_id: int) -> None:
|
|
303
|
+
resp = self._http.delete(f"{self._base}/{user_id}")
|
|
304
|
+
wrapper = APIWrapper[dict].model_validate(resp.json())
|
|
305
|
+
if wrapper.code != 0:
|
|
306
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
307
|
+
|
|
308
|
+
def set_roles(self, user_id: int, req: SetUserRolesRequest) -> None:
|
|
309
|
+
resp = self._http.put(f"{self._base}/{user_id}/roles", json=req.model_dump(by_alias=True, exclude_none=True))
|
|
310
|
+
wrapper = APIWrapper[dict].model_validate(resp.json())
|
|
311
|
+
if wrapper.code != 0:
|
|
312
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
313
|
+
|
|
314
|
+
def get_menus(self, user_id: int, parent_id: int | None = None, auth: str | None = None) -> List[TreeMenu]:
|
|
315
|
+
params = {}
|
|
316
|
+
if parent_id is not None:
|
|
317
|
+
params["parent_id"] = parent_id
|
|
318
|
+
if auth:
|
|
319
|
+
params["auth"] = auth
|
|
320
|
+
|
|
321
|
+
resp = self._http.get(f"{self._base}/{user_id}/menus", params=params)
|
|
322
|
+
wrapper = APIWrapper[GetUserMenusResponse].model_validate(resp.json())
|
|
323
|
+
if wrapper.code != 0:
|
|
324
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
325
|
+
return wrapper.data.menus
|
|
326
|
+
|
|
327
|
+
def search(self, req: SearchUsersRequest) -> SearchUsersResponse:
|
|
328
|
+
resp = self._http.post(self._search, json=req.model_dump(by_alias=True, exclude_none=True))
|
|
329
|
+
wrapper = APIWrapper[SearchUsersResponse].model_validate(resp.json())
|
|
330
|
+
if wrapper.code != 0:
|
|
331
|
+
raise APIError(f"backend code {wrapper.code}: {wrapper.msg}")
|
|
332
|
+
return wrapper.data
|
|
333
|
+
|
|
334
|
+
def search_one(self, req: SearchUsersRequest) -> int:
|
|
335
|
+
resp = self.search(req)
|
|
336
|
+
for user in resp.data:
|
|
337
|
+
if user.nickname == req.nickname:
|
|
338
|
+
return user.id
|
|
339
|
+
raise APIError("no user found")
|
aihub/utils/download.py
CHANGED
|
@@ -1,69 +1,69 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import concurrent.futures
|
|
4
|
-
import os
|
|
5
|
-
import tempfile
|
|
6
|
-
from typing import List, TypedDict
|
|
7
|
-
|
|
8
|
-
import pyarrow.parquet as pq
|
|
9
|
-
from tqdm import tqdm
|
|
10
|
-
|
|
11
|
-
from .http import http_download_file
|
|
12
|
-
from .s3 import s3_to_url
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class DatasetParquetMeta(TypedDict):
|
|
16
|
-
parent_dir: str
|
|
17
|
-
name: str
|
|
18
|
-
s3path: str
|
|
19
|
-
type: int # 0=file, 1=dir
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
_ENUM_FILE = 0
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def _read_parquet_index(file_path: str) -> List[DatasetParquetMeta]:
|
|
26
|
-
table = pq.read_table(file_path)
|
|
27
|
-
return table.to_pylist() # 每行转 dict
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def _safe_rel(part: str) -> str:
|
|
31
|
-
if not part:
|
|
32
|
-
return ""
|
|
33
|
-
drive, tail = os.path.splitdrive(part)
|
|
34
|
-
return tail.lstrip(r"\/")
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def dataset_download(index_url: str, local_dir: str, worker: int = 4) -> None:
|
|
38
|
-
with tempfile.TemporaryDirectory() as tmpdir:
|
|
39
|
-
tmp_file = os.path.join(tmpdir, "index.parquet")
|
|
40
|
-
http_download_file(index_url, tmp_file)
|
|
41
|
-
rows = _read_parquet_index(tmp_file)
|
|
42
|
-
|
|
43
|
-
host = (index_url.split("//", 1)[-1]).split("/", 1)[0]
|
|
44
|
-
|
|
45
|
-
files = [
|
|
46
|
-
(
|
|
47
|
-
os.path.join(
|
|
48
|
-
local_dir,
|
|
49
|
-
_safe_rel(row["parent_dir"]),
|
|
50
|
-
_safe_rel(row["name"]),
|
|
51
|
-
),
|
|
52
|
-
s3_to_url(row["s3path"], host),
|
|
53
|
-
)
|
|
54
|
-
for row in rows if row["type"] == _ENUM_FILE
|
|
55
|
-
]
|
|
56
|
-
|
|
57
|
-
if worker < 1:
|
|
58
|
-
worker = 1
|
|
59
|
-
|
|
60
|
-
with tqdm(total=len(files), desc="Downloading dataset") as bar, \
|
|
61
|
-
concurrent.futures.ThreadPoolExecutor(max_workers=worker) as pool:
|
|
62
|
-
|
|
63
|
-
def _one(flocal: str, furl: str):
|
|
64
|
-
http_download_file(furl, flocal)
|
|
65
|
-
bar.update()
|
|
66
|
-
|
|
67
|
-
futures = [pool.submit(_one, p, u) for p, u in files]
|
|
68
|
-
for fut in concurrent.futures.as_completed(futures):
|
|
69
|
-
fut.result()
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
from typing import List, TypedDict
|
|
7
|
+
|
|
8
|
+
import pyarrow.parquet as pq
|
|
9
|
+
from tqdm import tqdm
|
|
10
|
+
|
|
11
|
+
from .http import http_download_file
|
|
12
|
+
from .s3 import s3_to_url
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DatasetParquetMeta(TypedDict):
|
|
16
|
+
parent_dir: str
|
|
17
|
+
name: str
|
|
18
|
+
s3path: str
|
|
19
|
+
type: int # 0=file, 1=dir
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
_ENUM_FILE = 0
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _read_parquet_index(file_path: str) -> List[DatasetParquetMeta]:
|
|
26
|
+
table = pq.read_table(file_path)
|
|
27
|
+
return table.to_pylist() # 每行转 dict
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _safe_rel(part: str) -> str:
|
|
31
|
+
if not part:
|
|
32
|
+
return ""
|
|
33
|
+
drive, tail = os.path.splitdrive(part)
|
|
34
|
+
return tail.lstrip(r"\/")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def dataset_download(index_url: str, local_dir: str, worker: int = 4) -> None:
|
|
38
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
39
|
+
tmp_file = os.path.join(tmpdir, "index.parquet")
|
|
40
|
+
http_download_file(index_url, tmp_file)
|
|
41
|
+
rows = _read_parquet_index(tmp_file)
|
|
42
|
+
|
|
43
|
+
host = (index_url.split("//", 1)[-1]).split("/", 1)[0]
|
|
44
|
+
|
|
45
|
+
files = [
|
|
46
|
+
(
|
|
47
|
+
os.path.join(
|
|
48
|
+
local_dir,
|
|
49
|
+
_safe_rel(row["parent_dir"]),
|
|
50
|
+
_safe_rel(row["name"]),
|
|
51
|
+
),
|
|
52
|
+
s3_to_url(row["s3path"], host),
|
|
53
|
+
)
|
|
54
|
+
for row in rows if row["type"] == _ENUM_FILE
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
if worker < 1:
|
|
58
|
+
worker = 1
|
|
59
|
+
|
|
60
|
+
with tqdm(total=len(files), desc="Downloading dataset") as bar, \
|
|
61
|
+
concurrent.futures.ThreadPoolExecutor(max_workers=worker) as pool:
|
|
62
|
+
|
|
63
|
+
def _one(flocal: str, furl: str):
|
|
64
|
+
http_download_file(furl, flocal)
|
|
65
|
+
bar.update()
|
|
66
|
+
|
|
67
|
+
futures = [pool.submit(_one, p, u) for p, u in files]
|
|
68
|
+
for fut in concurrent.futures.as_completed(futures):
|
|
69
|
+
fut.result()
|
aihub/utils/http.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import httpx
|
|
4
|
-
import os
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def http_download_file(url: str, dst_path: str, chunk: int = 1 << 16) -> None:
|
|
8
|
-
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
|
|
9
|
-
with httpx.stream("GET", url, follow_redirects=True, timeout=None) as r:
|
|
10
|
-
r.raise_for_status()
|
|
11
|
-
with open(dst_path, "wb") as f:
|
|
12
|
-
for block in r.iter_bytes(chunk):
|
|
13
|
-
f.write(block)
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import httpx
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def http_download_file(url: str, dst_path: str, chunk: int = 1 << 16) -> None:
|
|
8
|
+
os.makedirs(os.path.dirname(dst_path), exist_ok=True)
|
|
9
|
+
with httpx.stream("GET", url, follow_redirects=True, timeout=None) as r:
|
|
10
|
+
r.raise_for_status()
|
|
11
|
+
with open(dst_path, "wb") as f:
|
|
12
|
+
for block in r.iter_bytes(chunk):
|
|
13
|
+
f.write(block)
|
aihub/utils/s3.py
CHANGED
|
@@ -1,77 +1,77 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import re
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
from typing import Any
|
|
7
|
-
|
|
8
|
-
from loguru import logger
|
|
9
|
-
from minio import Minio
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def s3_to_url(s3_path: str, host: str) -> str:
|
|
13
|
-
key = s3_path.replace("s3://", "").lstrip("/")
|
|
14
|
-
return f"http://{host.rstrip('/')}/{key}"
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def S3_path_to_info(s3_path) -> tuple[str | Any, str | Any] | None:
|
|
18
|
-
if not s3_path.startswith("s3://"):
|
|
19
|
-
return None
|
|
20
|
-
|
|
21
|
-
pattern = r"s3://(?P<bucket>\w+)/(?P<objectname>.+)"
|
|
22
|
-
|
|
23
|
-
match = re.match(pattern, s3_path)
|
|
24
|
-
|
|
25
|
-
if match:
|
|
26
|
-
bucket = match.group("bucket")
|
|
27
|
-
objectname = match.group("objectname")
|
|
28
|
-
return bucket, objectname
|
|
29
|
-
return None
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def local_path_to_s3_key(work_dir: str, local_path: str) -> str:
|
|
33
|
-
work_dir = Path(work_dir)
|
|
34
|
-
local_path = Path(local_path)
|
|
35
|
-
s3_key = str(local_path.relative_to(work_dir))
|
|
36
|
-
return s3_key
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def upload_dir_to_s3(
|
|
40
|
-
s3_client: Minio, local_dir: str, bucket: str, object_prefix: str
|
|
41
|
-
) -> None:
|
|
42
|
-
logger.info(
|
|
43
|
-
f"Uploading directory {local_dir} to S3 bucket {bucket} with prefix {object_prefix}"
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
for root, dirs, files in os.walk(local_dir):
|
|
47
|
-
for file in files:
|
|
48
|
-
local_path = Path(root) / file
|
|
49
|
-
s3_key = local_path_to_s3_key(local_dir, str(local_path))
|
|
50
|
-
s3_client.fput_object(
|
|
51
|
-
bucket, os.path.join(object_prefix, s3_key), str(local_path)
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
logger.info(
|
|
55
|
-
f"Uploaded directory {local_dir} to S3 bucket {bucket} with prefix {object_prefix}"
|
|
56
|
-
)
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def download_dir_from_s3(
|
|
61
|
-
s3_client: Minio, bucket: str, object_prefix: str, local_dir: str
|
|
62
|
-
) -> None:
|
|
63
|
-
logger.info(
|
|
64
|
-
f"Downloading directory from S3 bucket {bucket} with prefix {object_prefix} to {local_dir}"
|
|
65
|
-
)
|
|
66
|
-
objs = s3_client.list_objects(bucket, object_prefix, recursive=True)
|
|
67
|
-
|
|
68
|
-
for obj in objs:
|
|
69
|
-
file_name = Path(obj.object_name).relative_to(object_prefix)
|
|
70
|
-
s3_client.fget_object(
|
|
71
|
-
bucket, obj.object_name, os.path.join(local_dir, file_name)
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
logger.info(
|
|
75
|
-
f"Downloaded directory from S3 bucket {bucket} with prefix {object_prefix} to {local_dir}"
|
|
76
|
-
)
|
|
77
|
-
return
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import re
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from loguru import logger
|
|
9
|
+
from minio import Minio
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def s3_to_url(s3_path: str, host: str) -> str:
|
|
13
|
+
key = s3_path.replace("s3://", "").lstrip("/")
|
|
14
|
+
return f"http://{host.rstrip('/')}/{key}"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def S3_path_to_info(s3_path) -> tuple[str | Any, str | Any] | None:
|
|
18
|
+
if not s3_path.startswith("s3://"):
|
|
19
|
+
return None
|
|
20
|
+
|
|
21
|
+
pattern = r"s3://(?P<bucket>\w+)/(?P<objectname>.+)"
|
|
22
|
+
|
|
23
|
+
match = re.match(pattern, s3_path)
|
|
24
|
+
|
|
25
|
+
if match:
|
|
26
|
+
bucket = match.group("bucket")
|
|
27
|
+
objectname = match.group("objectname")
|
|
28
|
+
return bucket, objectname
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def local_path_to_s3_key(work_dir: str, local_path: str) -> str:
|
|
33
|
+
work_dir = Path(work_dir)
|
|
34
|
+
local_path = Path(local_path)
|
|
35
|
+
s3_key = str(local_path.relative_to(work_dir))
|
|
36
|
+
return s3_key
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def upload_dir_to_s3(
|
|
40
|
+
s3_client: Minio, local_dir: str, bucket: str, object_prefix: str
|
|
41
|
+
) -> None:
|
|
42
|
+
logger.info(
|
|
43
|
+
f"Uploading directory {local_dir} to S3 bucket {bucket} with prefix {object_prefix}"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
for root, dirs, files in os.walk(local_dir):
|
|
47
|
+
for file in files:
|
|
48
|
+
local_path = Path(root) / file
|
|
49
|
+
s3_key = local_path_to_s3_key(local_dir, str(local_path))
|
|
50
|
+
s3_client.fput_object(
|
|
51
|
+
bucket, os.path.join(object_prefix, s3_key), str(local_path)
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
logger.info(
|
|
55
|
+
f"Uploaded directory {local_dir} to S3 bucket {bucket} with prefix {object_prefix}"
|
|
56
|
+
)
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def download_dir_from_s3(
|
|
61
|
+
s3_client: Minio, bucket: str, object_prefix: str, local_dir: str
|
|
62
|
+
) -> None:
|
|
63
|
+
logger.info(
|
|
64
|
+
f"Downloading directory from S3 bucket {bucket} with prefix {object_prefix} to {local_dir}"
|
|
65
|
+
)
|
|
66
|
+
objs = s3_client.list_objects(bucket, object_prefix, recursive=True)
|
|
67
|
+
|
|
68
|
+
for obj in objs:
|
|
69
|
+
file_name = Path(obj.object_name).relative_to(object_prefix)
|
|
70
|
+
s3_client.fget_object(
|
|
71
|
+
bucket, obj.object_name, os.path.join(local_dir, file_name)
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
logger.info(
|
|
75
|
+
f"Downloaded directory from S3 bucket {bucket} with prefix {object_prefix} to {local_dir}"
|
|
76
|
+
)
|
|
77
|
+
return
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: intellif-aihub
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Intellif AI-hub SDK.
|
|
5
5
|
Author-email: Platform Team <aihub@example.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -13,7 +13,7 @@ License-File: LICENSE
|
|
|
13
13
|
Requires-Dist: httpx<0.28,>=0.27
|
|
14
14
|
Requires-Dist: pydantic<3.0,>=2.5.3
|
|
15
15
|
Requires-Dist: typing-extensions<5.0,>=4.13.2
|
|
16
|
-
Requires-Dist: pyarrow<16.0,>=15.0
|
|
16
|
+
Requires-Dist: pyarrow<16.0,>=15.0.2
|
|
17
17
|
Requires-Dist: tqdm<5.0,>=4.66
|
|
18
18
|
Requires-Dist: loguru>=0.7.3
|
|
19
19
|
Requires-Dist: minio>=7.2.7
|