dataset-toolkit 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataset_toolkit/__init__.py +8 -1
- dataset_toolkit/exporters/yolo_exporter.py +155 -0
- {dataset_toolkit-0.1.1.dist-info → dataset_toolkit-0.1.2.dist-info}/METADATA +1 -1
- {dataset_toolkit-0.1.1.dist-info → dataset_toolkit-0.1.2.dist-info}/RECORD +7 -6
- {dataset_toolkit-0.1.1.dist-info → dataset_toolkit-0.1.2.dist-info}/WHEEL +0 -0
- {dataset_toolkit-0.1.1.dist-info → dataset_toolkit-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {dataset_toolkit-0.1.1.dist-info → dataset_toolkit-0.1.2.dist-info}/top_level.txt +0 -0
dataset_toolkit/__init__.py
CHANGED
@@ -15,7 +15,7 @@ Dataset Toolkit - 计算机视觉数据集处理工具包
|
|
15
15
|
>>> export_to_coco(dataset, "output.json")
|
16
16
|
"""
|
17
17
|
|
18
|
-
__version__ = "0.1.
|
18
|
+
__version__ = "0.1.2"
|
19
19
|
__author__ = "wenxiang.han"
|
20
20
|
__email__ = "wenxiang.han@anker-in.com"
|
21
21
|
|
@@ -43,6 +43,11 @@ from dataset_toolkit.exporters.txt_exporter import (
|
|
43
43
|
export_to_txt
|
44
44
|
)
|
45
45
|
|
46
|
+
from dataset_toolkit.exporters.yolo_exporter import (
|
47
|
+
export_to_yolo_format,
|
48
|
+
export_to_yolo_and_txt
|
49
|
+
)
|
50
|
+
|
46
51
|
from dataset_toolkit.utils.coords import (
|
47
52
|
yolo_to_absolute_bbox
|
48
53
|
)
|
@@ -71,6 +76,8 @@ __all__ = [
|
|
71
76
|
# 导出器
|
72
77
|
"export_to_coco",
|
73
78
|
"export_to_txt",
|
79
|
+
"export_to_yolo_format",
|
80
|
+
"export_to_yolo_and_txt",
|
74
81
|
|
75
82
|
# 工具函数
|
76
83
|
"yolo_to_absolute_bbox",
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# dataset_toolkit/exporters/yolo_exporter.py
|
2
|
+
"""
|
3
|
+
导出为 YOLO 格式(完整的 images/ + labels/ 目录结构)
|
4
|
+
"""
|
5
|
+
import os
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Optional
|
8
|
+
|
9
|
+
|
10
|
+
def export_to_yolo_format(
|
11
|
+
dataset,
|
12
|
+
output_dir: str,
|
13
|
+
use_symlinks: bool = True,
|
14
|
+
overwrite: bool = False
|
15
|
+
):
|
16
|
+
"""
|
17
|
+
导出数据集为完整的 YOLO 格式目录结构
|
18
|
+
|
19
|
+
参数:
|
20
|
+
dataset: Dataset 对象
|
21
|
+
output_dir: 输出目录路径
|
22
|
+
use_symlinks: 是否使用软链接(True)或复制文件(False)
|
23
|
+
overwrite: 是否覆盖已存在的文件
|
24
|
+
|
25
|
+
输出结构:
|
26
|
+
output_dir/
|
27
|
+
├── images/
|
28
|
+
│ ├── img1.jpg
|
29
|
+
│ └── img2.jpg
|
30
|
+
└── labels/
|
31
|
+
├── img1.txt
|
32
|
+
└── img2.txt
|
33
|
+
"""
|
34
|
+
output_path = Path(output_dir)
|
35
|
+
images_dir = output_path / 'images'
|
36
|
+
labels_dir = output_path / 'labels'
|
37
|
+
|
38
|
+
# 创建目录
|
39
|
+
images_dir.mkdir(parents=True, exist_ok=True)
|
40
|
+
labels_dir.mkdir(parents=True, exist_ok=True)
|
41
|
+
|
42
|
+
print(f"导出 YOLO 格式到: {output_path}")
|
43
|
+
print(f" 使用软链接: {use_symlinks}")
|
44
|
+
|
45
|
+
success_count = 0
|
46
|
+
error_count = 0
|
47
|
+
|
48
|
+
for img in dataset.images:
|
49
|
+
try:
|
50
|
+
# 获取图片文件名(不含扩展名)
|
51
|
+
img_path = Path(img.path)
|
52
|
+
img_name = img_path.name
|
53
|
+
stem = img_path.stem
|
54
|
+
|
55
|
+
# 1. 处理图片(软链接或复制)
|
56
|
+
target_img_path = images_dir / img_name
|
57
|
+
|
58
|
+
if target_img_path.exists() and not overwrite:
|
59
|
+
# 文件已存在,跳过
|
60
|
+
pass
|
61
|
+
else:
|
62
|
+
if use_symlinks:
|
63
|
+
# 使用软链接
|
64
|
+
if target_img_path.exists():
|
65
|
+
target_img_path.unlink()
|
66
|
+
target_img_path.symlink_to(img_path.resolve())
|
67
|
+
else:
|
68
|
+
# 复制文件
|
69
|
+
import shutil
|
70
|
+
shutil.copy2(img_path, target_img_path)
|
71
|
+
|
72
|
+
# 2. 生成标注文件
|
73
|
+
label_path = labels_dir / f"{stem}.txt"
|
74
|
+
|
75
|
+
with open(label_path, 'w') as f:
|
76
|
+
for ann in img.annotations:
|
77
|
+
# 内部格式: [x_min, y_min, width, height] (绝对像素值)
|
78
|
+
# YOLO 格式: class_id x_center y_center width height (归一化)
|
79
|
+
|
80
|
+
x_min, y_min, width, height = ann.bbox
|
81
|
+
|
82
|
+
# 转换为 YOLO 归一化格式
|
83
|
+
x_center = (x_min + width / 2) / img.width
|
84
|
+
y_center = (y_min + height / 2) / img.height
|
85
|
+
norm_width = width / img.width
|
86
|
+
norm_height = height / img.height
|
87
|
+
|
88
|
+
# 写入:class_id x_center y_center width height
|
89
|
+
f.write(f"{ann.category_id} {x_center:.6f} {y_center:.6f} {norm_width:.6f} {norm_height:.6f}\n")
|
90
|
+
|
91
|
+
success_count += 1
|
92
|
+
|
93
|
+
except Exception as e:
|
94
|
+
print(f"警告: 处理图片失败 {img.path}: {e}")
|
95
|
+
error_count += 1
|
96
|
+
continue
|
97
|
+
|
98
|
+
print(f"✓ 导出完成:")
|
99
|
+
print(f" 成功: {success_count} 张图片")
|
100
|
+
if error_count > 0:
|
101
|
+
print(f" 失败: {error_count} 张图片")
|
102
|
+
print(f" 图片目录: {images_dir}")
|
103
|
+
print(f" 标注目录: {labels_dir}")
|
104
|
+
|
105
|
+
return output_path
|
106
|
+
|
107
|
+
|
108
|
+
def export_to_yolo_and_txt(
|
109
|
+
dataset,
|
110
|
+
yolo_dir: str,
|
111
|
+
txt_file: str,
|
112
|
+
use_symlinks: bool = True,
|
113
|
+
use_relative_paths: bool = False
|
114
|
+
):
|
115
|
+
"""
|
116
|
+
导出为 YOLO 格式并生成对应的 txt 列表文件
|
117
|
+
|
118
|
+
参数:
|
119
|
+
dataset: Dataset 对象
|
120
|
+
yolo_dir: YOLO 格式输出目录
|
121
|
+
txt_file: txt 列表文件路径
|
122
|
+
use_symlinks: 是否使用软链接
|
123
|
+
use_relative_paths: txt 中是否使用相对路径
|
124
|
+
|
125
|
+
返回:
|
126
|
+
yolo_dir_path: YOLO 目录路径
|
127
|
+
"""
|
128
|
+
# 1. 导出为 YOLO 格式
|
129
|
+
yolo_path = export_to_yolo_format(dataset, yolo_dir, use_symlinks=use_symlinks)
|
130
|
+
|
131
|
+
# 2. 生成 txt 列表文件(指向 YOLO 目录中的 images/)
|
132
|
+
images_dir = yolo_path / 'images'
|
133
|
+
txt_path = Path(txt_file)
|
134
|
+
txt_path.parent.mkdir(parents=True, exist_ok=True)
|
135
|
+
|
136
|
+
print(f"\n生成 txt 列表: {txt_file}")
|
137
|
+
|
138
|
+
with open(txt_file, 'w') as f:
|
139
|
+
for img in dataset.images:
|
140
|
+
img_name = Path(img.path).name
|
141
|
+
# 指向 YOLO 目录中的图片(可能是软链接)
|
142
|
+
img_in_yolo = images_dir / img_name
|
143
|
+
|
144
|
+
if use_relative_paths:
|
145
|
+
# 相对于 txt 文件的路径
|
146
|
+
rel_path = os.path.relpath(img_in_yolo, txt_path.parent)
|
147
|
+
f.write(f"{rel_path}\n")
|
148
|
+
else:
|
149
|
+
# 绝对路径(指向 YOLO images 目录,不要 resolve,保持 YOLO 结构)
|
150
|
+
f.write(f"{str(img_in_yolo.absolute())}\n")
|
151
|
+
|
152
|
+
print(f"✓ txt 列表已生成: {len(dataset.images)} 行")
|
153
|
+
|
154
|
+
return yolo_path
|
155
|
+
|
@@ -1,17 +1,18 @@
|
|
1
|
-
dataset_toolkit/__init__.py,sha256=
|
1
|
+
dataset_toolkit/__init__.py,sha256=3KUOm1r2ldAGf6V0zPbx69QaoJvC9fsuasAvstcN4Vs,1846
|
2
2
|
dataset_toolkit/models.py,sha256=9HD2lAOPuEytFb1qRejODLJAD-uKHc8Ya1n9nbGhRpg,830
|
3
3
|
dataset_toolkit/pipeline.py,sha256=iBJD7SemEVFTwzHxRQrjpUIQQcVdPSZnD4sB_y56Md0,5697
|
4
4
|
dataset_toolkit/exporters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
dataset_toolkit/exporters/coco_exporter.py,sha256=l5sfj7rOcvcMC0-4LNOEJ4PeklGQORDflU_um5GGnxA,2120
|
6
6
|
dataset_toolkit/exporters/txt_exporter.py,sha256=9nTWs6M89MdKJhlODtmfzeZqWkliXac9NMWPgVUrE7c,1246
|
7
|
+
dataset_toolkit/exporters/yolo_exporter.py,sha256=g0jaY6cg7I4ZfxXkE_vmK87OS1-DP6dXEjsP2iVP9D4,5144
|
7
8
|
dataset_toolkit/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
9
|
dataset_toolkit/loaders/local_loader.py,sha256=SCOYG5pursEIL_m3QYGcm-2skXoapiOA4yhqqa2wrDM,7468
|
9
10
|
dataset_toolkit/processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
11
|
dataset_toolkit/processors/merger.py,sha256=h8qQNgSmkPrhoQ3QiWEyIl11CmmjT5K1-8TzNb7_jbk,2834
|
11
12
|
dataset_toolkit/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
13
|
dataset_toolkit/utils/coords.py,sha256=GtTQz2gFyFQfXhKfecI8tzqWFjraJY6Xo85-kRXYAYc,614
|
13
|
-
dataset_toolkit-0.1.
|
14
|
-
dataset_toolkit-0.1.
|
15
|
-
dataset_toolkit-0.1.
|
16
|
-
dataset_toolkit-0.1.
|
17
|
-
dataset_toolkit-0.1.
|
14
|
+
dataset_toolkit-0.1.2.dist-info/licenses/LICENSE,sha256=8_up1FX6vk2DRcusQEZ4pWJGkgkjvEkD14xB1hdLe3c,1067
|
15
|
+
dataset_toolkit-0.1.2.dist-info/METADATA,sha256=qH0LX-6NPdZEd0htGIxuIMHueDmHxRsZmWbrGnbpEtA,7236
|
16
|
+
dataset_toolkit-0.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
17
|
+
dataset_toolkit-0.1.2.dist-info/top_level.txt,sha256=B4D5vMLjUNJBZDdL7Utc0FYIfYoWbzyIGBMVYaeMd3U,16
|
18
|
+
dataset_toolkit-0.1.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|