lsdataset 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lsdataset/__init__.py +4 -0
- lsdataset/api/__init__.py +1 -0
- lsdataset/api/routes.py +233 -0
- lsdataset/core/__init__.py +1 -0
- lsdataset/core/dataset.py +1631 -0
- lsdataset/core/manager.py +442 -0
- lsdataset/io/__init__.py +1 -0
- lsdataset/io/imwriter.py +250 -0
- lsdataset/io/utils.py +340 -0
- lsdataset/io/video_utils.py +591 -0
- lsdataset/ldp/__init__.py +15 -0
- lsdataset/ldp/ldp_proxy.py +613 -0
- lsdataset/logger.py +10 -0
- lsdataset/schemas/__init__.py +1 -0
- lsdataset/schemas/types.py +81 -0
- lsdataset-0.1.0.dist-info/METADATA +88 -0
- lsdataset-0.1.0.dist-info/RECORD +19 -0
- lsdataset-0.1.0.dist-info/WHEEL +5 -0
- lsdataset-0.1.0.dist-info/top_level.txt +1 -0
lsdataset/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""FastAPI routes for `/lsdataset/*` (see `routes.py`)."""
|
lsdataset/api/routes.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
3
|
+
from functools import wraps
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Callable, List, Optional
|
|
6
|
+
|
|
7
|
+
from fastapi import APIRouter, HTTPException, Query
|
|
8
|
+
from fastapi.responses import FileResponse
|
|
9
|
+
|
|
10
|
+
from ..core.manager import manager
|
|
11
|
+
from ..io.utils import get_disk_usage
|
|
12
|
+
from ..ldp.ldp_proxy import (
|
|
13
|
+
get_upload_progress,
|
|
14
|
+
ldp_login,
|
|
15
|
+
ldp_precheck_upload_datasets,
|
|
16
|
+
ldp_upload_datasets,
|
|
17
|
+
)
|
|
18
|
+
from ..logger import logger
|
|
19
|
+
from ..schemas.types import (
|
|
20
|
+
CommonOperationResp,
|
|
21
|
+
ConfigDatasetReq,
|
|
22
|
+
DatasetsListResp,
|
|
23
|
+
DeleteDatasetReq,
|
|
24
|
+
DeleteEpisodeReq,
|
|
25
|
+
LdpLoginReq,
|
|
26
|
+
LdpUploadPrecheckReq,
|
|
27
|
+
LdpUploadReq,
|
|
28
|
+
StartRecordingReq,
|
|
29
|
+
StopRecordingReq,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
router = APIRouter(tags=["Datasets"])
|
|
33
|
+
|
|
34
|
+
# 创建线程池执行器用于执行阻塞操作(进程内单例;退出时在 lifespan 中 shutdown)
|
|
35
|
+
_executor = ThreadPoolExecutor(max_workers=10, thread_name_prefix="lsdataset")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def shutdown_routes_executor(*, wait: bool = True, cancel_futures: bool = True) -> None:
|
|
39
|
+
"""关闭本模块使用的 ``ThreadPoolExecutor``。
|
|
40
|
+
|
|
41
|
+
应在宿主应用 **lifespan 退出阶段**(或进程退出前)调用一次;对 CPython 3.9+ 重复调用 ``shutdown`` 为安全 no-op。
|
|
42
|
+
``cancel_futures=True`` 可在关闭时取消尚未开始执行的排队任务,避免长时间阻塞。
|
|
43
|
+
"""
|
|
44
|
+
try:
|
|
45
|
+
_executor.shutdown(wait=wait, cancel_futures=cancel_futures)
|
|
46
|
+
except Exception as e:
|
|
47
|
+
logger.warning("shutdown_routes_executor: %s", e, exc_info=True)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def run_in_executor(func, *args, **kwargs):
|
|
51
|
+
"""在线程池中执行同步函数"""
|
|
52
|
+
loop = asyncio.get_running_loop()
|
|
53
|
+
if kwargs:
|
|
54
|
+
return await loop.run_in_executor(_executor, lambda: func(*args, **kwargs))
|
|
55
|
+
else:
|
|
56
|
+
return await loop.run_in_executor(_executor, func, *args)
|
|
57
|
+
|
|
58
|
+
def catch_http_exceptions(err_info: str) -> Callable:
|
|
59
|
+
"""装饰器:捕获 HTTPException 并重新抛出,其他异常转换为 HTTP 500(仅用于 async 路由)。"""
|
|
60
|
+
def decorator(func: Callable) -> Callable:
|
|
61
|
+
@wraps(func)
|
|
62
|
+
async def wrapper(*args, **kwargs):
|
|
63
|
+
try:
|
|
64
|
+
return await func(*args, **kwargs)
|
|
65
|
+
except HTTPException:
|
|
66
|
+
raise
|
|
67
|
+
except Exception as e:
|
|
68
|
+
logger.error(f"{err_info}: {e}", exc_info=True)
|
|
69
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
70
|
+
|
|
71
|
+
return wrapper
|
|
72
|
+
return decorator
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@router.get("/lsdataset/disk-usage")
|
|
76
|
+
@catch_http_exceptions("Error getting disk usage")
|
|
77
|
+
async def get_lsdataset_disk_usage():
|
|
78
|
+
"""获取磁盘的使用情况"""
|
|
79
|
+
disk_usage = await run_in_executor(get_disk_usage, Path("."))
|
|
80
|
+
return disk_usage
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@router.get("/lsdataset/datasets-with-details", response_model=DatasetsListResp)
|
|
84
|
+
@catch_http_exceptions("Error listing datasets")
|
|
85
|
+
async def list_datasets_with_details(
|
|
86
|
+
dataset_type: str = Query(..., description="数据集类型"),
|
|
87
|
+
detail_keys: Optional[List[str]] = Query(None, description="需要返回的详情键列表")
|
|
88
|
+
):
|
|
89
|
+
"""获取指定类型的数据集列表"""
|
|
90
|
+
datasets = await run_in_executor(manager.list_datasets, dataset_type, detail_keys)
|
|
91
|
+
return DatasetsListResp(datasets=datasets)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@router.get("/lsdataset/view/{dataset_type}/{repo_id}")
|
|
95
|
+
@catch_http_exceptions("Error getting dataset info")
|
|
96
|
+
async def view_dataset(
|
|
97
|
+
dataset_type: str,
|
|
98
|
+
repo_id: str
|
|
99
|
+
):
|
|
100
|
+
"""获取指定数据集的信息"""
|
|
101
|
+
info = await run_in_executor(manager.get_dataset_info, dataset_type, repo_id)
|
|
102
|
+
return info
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@router.get("/lsdataset/view/{dataset_type}/{repo_id}/{episode_index}")
|
|
106
|
+
@catch_http_exceptions("Error getting episode data")
|
|
107
|
+
async def view_episode(
|
|
108
|
+
dataset_type: str,
|
|
109
|
+
repo_id: str,
|
|
110
|
+
episode_index: int
|
|
111
|
+
):
|
|
112
|
+
"""获取指定数据集 episode 的可视化数据"""
|
|
113
|
+
episode_data = await run_in_executor(manager.get_episode_data, dataset_type, repo_id, episode_index)
|
|
114
|
+
return episode_data
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@router.get("/lsdataset/view/{dataset_type}/{repo_id}/{episode_index}/{video_key}.mp4")
|
|
118
|
+
@catch_http_exceptions("Error getting episode videos")
|
|
119
|
+
async def view_episode_video(
|
|
120
|
+
dataset_type: str,
|
|
121
|
+
repo_id: str,
|
|
122
|
+
episode_index: int,
|
|
123
|
+
video_key: str
|
|
124
|
+
):
|
|
125
|
+
"""获取指定数据集 episode 的视频文件列表"""
|
|
126
|
+
video = await run_in_executor(manager.get_episode_video, dataset_type, repo_id, episode_index, video_key)
|
|
127
|
+
return FileResponse(video, media_type="video/mp4")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@router.get("/lsdataset/internal/config-initialized")
|
|
131
|
+
@catch_http_exceptions("Error checking config initialized")
|
|
132
|
+
async def internal_config_initialized() -> bool:
|
|
133
|
+
"""检查数据集配置是否已初始化"""
|
|
134
|
+
return manager.config_initialized
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@router.post("/lsdataset/internal/config", response_model=CommonOperationResp)
|
|
138
|
+
@catch_http_exceptions("Error configuring dataset")
|
|
139
|
+
async def internal_config_dataset(config: ConfigDatasetReq) -> CommonOperationResp:
|
|
140
|
+
"""配置数据集参数"""
|
|
141
|
+
await run_in_executor(manager.config_dataset, config)
|
|
142
|
+
return CommonOperationResp(ok=True, message="数据集配置已更新")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
@router.post("/lsdataset/internal/start_recording", response_model=CommonOperationResp)
|
|
146
|
+
@catch_http_exceptions("Error starting recording")
|
|
147
|
+
async def internal_start_recording(req: StartRecordingReq) -> CommonOperationResp:
|
|
148
|
+
"""开始记录数据集"""
|
|
149
|
+
await run_in_executor(manager.start_recording, req.dataset_type, req.repo_id)
|
|
150
|
+
return CommonOperationResp(ok=True, message="数据集记录已开始")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@router.post("/lsdataset/internal/stop_recording", response_model=CommonOperationResp)
|
|
154
|
+
@catch_http_exceptions("Error stopping recording")
|
|
155
|
+
async def internal_stop_recording(req: StopRecordingReq) -> CommonOperationResp:
|
|
156
|
+
"""
|
|
157
|
+
停止记录数据集
|
|
158
|
+
"""
|
|
159
|
+
await run_in_executor(manager.stop_recording, req.abort)
|
|
160
|
+
return CommonOperationResp(ok=True, message="数据集记录已停止")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@router.post("/lsdataset/delete_episode", response_model=CommonOperationResp)
|
|
164
|
+
@catch_http_exceptions("Error deleting episode")
|
|
165
|
+
async def delete_episode(req: DeleteEpisodeReq) -> CommonOperationResp:
|
|
166
|
+
"""删除数据集中的指定 episode"""
|
|
167
|
+
await run_in_executor(manager.delete_episode, req.dataset_type, req.repo_id, req.episode_index)
|
|
168
|
+
return CommonOperationResp(ok=True, message="数据集 episode 已删除")
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@router.post("/lsdataset/delete_dataset", response_model=CommonOperationResp)
|
|
172
|
+
@catch_http_exceptions("Error deleting dataset")
|
|
173
|
+
async def delete_dataset(req: DeleteDatasetReq) -> CommonOperationResp:
|
|
174
|
+
"""删除指定的数据集"""
|
|
175
|
+
await run_in_executor(manager.delete_dataset, req.dataset_type, req.repo_id)
|
|
176
|
+
return CommonOperationResp(ok=True, message="数据集已删除")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# LDP 代理接口(对接灵生数据平台)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
@router.post("/lsdataset/ldp/login")
|
|
183
|
+
@catch_http_exceptions("Error proxying LDP login")
|
|
184
|
+
async def ldp_proxy_login(req: LdpLoginReq):
|
|
185
|
+
"""代理调用 LDP 登录接口,返回 access_token 等"""
|
|
186
|
+
result = await run_in_executor(
|
|
187
|
+
ldp_login,
|
|
188
|
+
req.ldp_base_url,
|
|
189
|
+
req.phone,
|
|
190
|
+
req.password,
|
|
191
|
+
)
|
|
192
|
+
return result
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
@router.post("/lsdataset/ldp/upload")
|
|
196
|
+
@catch_http_exceptions("Error proxying LDP upload")
|
|
197
|
+
async def ldp_proxy_upload(req: LdpUploadReq):
|
|
198
|
+
"""代理将 LCP 本地数据集上传到 LDP。采集设备默认从硬件(设备树/DMI)派生机器码,数据类型默认 lerobot。"""
|
|
199
|
+
dataset_ids = [{"dataset_type": d.dataset_type, "repo_id": d.repo_id} for d in req.dataset_ids]
|
|
200
|
+
results = await run_in_executor(
|
|
201
|
+
ldp_upload_datasets,
|
|
202
|
+
req.ldp_base_url,
|
|
203
|
+
req.access_token,
|
|
204
|
+
dataset_ids,
|
|
205
|
+
req.collection_device,
|
|
206
|
+
req.data_type,
|
|
207
|
+
req.source,
|
|
208
|
+
)
|
|
209
|
+
return {"results": results}
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
@router.post("/lsdataset/ldp/upload/precheck")
|
|
213
|
+
@catch_http_exceptions("Error proxying LDP upload precheck")
|
|
214
|
+
async def ldp_proxy_upload_precheck(req: LdpUploadPrecheckReq):
|
|
215
|
+
"""上传前预检查:判断选中的数据集是否可能触发覆盖。"""
|
|
216
|
+
dataset_ids = [{"dataset_type": d.dataset_type, "repo_id": d.repo_id} for d in req.dataset_ids]
|
|
217
|
+
results = await run_in_executor(
|
|
218
|
+
ldp_precheck_upload_datasets,
|
|
219
|
+
req.ldp_base_url,
|
|
220
|
+
req.access_token,
|
|
221
|
+
dataset_ids,
|
|
222
|
+
req.collection_device,
|
|
223
|
+
req.data_type,
|
|
224
|
+
)
|
|
225
|
+
return {"results": results}
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@router.get("/lsdataset/ldp/upload/progress")
|
|
229
|
+
async def ldp_proxy_upload_progress(dataset_name: str):
|
|
230
|
+
"""
|
|
231
|
+
查询指定数据集当前的上传进度(仅针对通过 /lsdataset/ldp/upload 发起的上传)。
|
|
232
|
+
"""
|
|
233
|
+
return get_upload_progress(dataset_name)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Dataset domain (`LsRobotDataset`) and process-wide `LsDatasetManager`."""
|