segment-toolkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- segment_toolkit/__init__.py +60 -0
- segment_toolkit/cli.py +192 -0
- segment_toolkit/helpers.py +201 -0
- segment_toolkit/source.py +566 -0
- segment_toolkit-1.0.0.dist-info/METADATA +217 -0
- segment_toolkit-1.0.0.dist-info/RECORD +9 -0
- segment_toolkit-1.0.0.dist-info/WHEEL +5 -0
- segment_toolkit-1.0.0.dist-info/entry_points.txt +2 -0
- segment_toolkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Segment Toolkit: A library and CLI tool for converting binary segmentation masks to YOLO labels and vice versa.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
__version__ = "1.0.0"
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import subprocess
|
|
9
|
+
|
|
10
|
+
def _ensure_dependencies():
|
|
11
|
+
"""
|
|
12
|
+
Checks for required external modules and attempts to install them via pip if missing.
|
|
13
|
+
"""
|
|
14
|
+
dependencies = {
|
|
15
|
+
"numpy": "numpy",
|
|
16
|
+
"cv2": "opencv-python",
|
|
17
|
+
"PIL": "pillow",
|
|
18
|
+
"pandas": "pandas",
|
|
19
|
+
"matplotlib": "matplotlib"
|
|
20
|
+
}
|
|
21
|
+
missing = []
|
|
22
|
+
for module, pip_name in dependencies.items():
|
|
23
|
+
try:
|
|
24
|
+
__import__(module)
|
|
25
|
+
except ImportError:
|
|
26
|
+
missing.append(pip_name)
|
|
27
|
+
|
|
28
|
+
if missing:
|
|
29
|
+
print(f"[segment_toolkit] Missing required package(s): {', '.join(missing)}", file=sys.stderr)
|
|
30
|
+
print("[segment_toolkit] Attempting to auto-install dependencies...", file=sys.stderr)
|
|
31
|
+
try:
|
|
32
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", *missing])
|
|
33
|
+
print("[segment_toolkit] Dependencies installed successfully.", file=sys.stderr)
|
|
34
|
+
except Exception as err:
|
|
35
|
+
print(f"[segment_toolkit] Error: Auto-installation of dependencies failed: {err}", file=sys.stderr)
|
|
36
|
+
print("[segment_toolkit] Please install them manually using: pip install -r requirements.txt", file=sys.stderr)
|
|
37
|
+
|
|
38
|
+
# Check and install dependencies before importing other submodules
|
|
39
|
+
_ensure_dependencies()
|
|
40
|
+
|
|
41
|
+
from .source import MaskToYoloConverter, YoloToMaskConverter
|
|
42
|
+
from .helpers import (
|
|
43
|
+
safe_read_image,
|
|
44
|
+
preprocess_image,
|
|
45
|
+
get_largest_contour,
|
|
46
|
+
calculate_bbox,
|
|
47
|
+
normalize_coordinates,
|
|
48
|
+
denormalize_coordinates,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
__all__ = [
|
|
52
|
+
"MaskToYoloConverter",
|
|
53
|
+
"YoloToMaskConverter",
|
|
54
|
+
"safe_read_image",
|
|
55
|
+
"preprocess_image",
|
|
56
|
+
"get_largest_contour",
|
|
57
|
+
"calculate_bbox",
|
|
58
|
+
"normalize_coordinates",
|
|
59
|
+
"denormalize_coordinates",
|
|
60
|
+
]
|
segment_toolkit/cli.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command Line Interface (CLI) for segment_toolkit.
|
|
3
|
+
Exposes mask-to-yolo, yolo-to-mask, split, and visualize commands.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import sys
|
|
8
|
+
import logging
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
from .source import MaskToYoloConverter, YoloToMaskConverter
|
|
12
|
+
|
|
13
|
+
# Configure basic logging
|
|
14
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def parse_args(args: List[str]) -> argparse.Namespace:
|
|
19
|
+
"""
|
|
20
|
+
Parses command-line arguments.
|
|
21
|
+
"""
|
|
22
|
+
parser = argparse.ArgumentParser(
|
|
23
|
+
description="Segment Toolkit: Convert segmentation masks to/from YOLO format annotations."
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
subparsers = parser.add_subparsers(dest="command", required=True, help="Subcommands")
|
|
27
|
+
|
|
28
|
+
# subcommand: mask-to-yolo
|
|
29
|
+
m2y_parser = subparsers.add_parser(
|
|
30
|
+
"mask-to-yolo", help="Convert binary mask(s) to YOLO format labels."
|
|
31
|
+
)
|
|
32
|
+
m2y_parser.add_argument("--image", type=str, help="Path to a single input image.")
|
|
33
|
+
m2y_parser.add_argument("--mask", type=str, help="Path to a single input binary mask.")
|
|
34
|
+
m2y_parser.add_argument("--output-txt", type=str, help="Output file path for single YOLO label txt.")
|
|
35
|
+
|
|
36
|
+
m2y_parser.add_argument("--image-dir", type=str, help="Directory containing input images.")
|
|
37
|
+
m2y_parser.add_argument("--mask-dir", type=str, help="Directory containing input masks.")
|
|
38
|
+
m2y_parser.add_argument("--output-dir", type=str, help="Directory to save generated YOLO label files.")
|
|
39
|
+
|
|
40
|
+
m2y_parser.add_argument("--class-id", type=int, default=0, help="Default Class ID to write (default: 0).")
|
|
41
|
+
m2y_parser.add_argument(
|
|
42
|
+
"--ground-truth",
|
|
43
|
+
type=str,
|
|
44
|
+
help="Path to GroundTruth.csv mapping images to multi-class columns.",
|
|
45
|
+
)
|
|
46
|
+
m2y_parser.add_argument(
|
|
47
|
+
"--rotated",
|
|
48
|
+
action="store_true",
|
|
49
|
+
help="Use rotated minimum area rectangle (minAreaRect) instead of axis-aligned.",
|
|
50
|
+
)
|
|
51
|
+
m2y_parser.add_argument(
|
|
52
|
+
"--resize",
|
|
53
|
+
type=int,
|
|
54
|
+
nargs=2,
|
|
55
|
+
default=[640, 640],
|
|
56
|
+
metavar=("WIDTH", "HEIGHT"),
|
|
57
|
+
help="Target dimensions for resizing (default: 640 640).",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# subcommand: yolo-to-mask
|
|
61
|
+
y2m_parser = subparsers.add_parser(
|
|
62
|
+
"yolo-to-mask", help="Convert YOLO format label(s) to binary mask(s)."
|
|
63
|
+
)
|
|
64
|
+
y2m_parser.add_argument("--label", type=str, help="Path to a single input YOLO label txt file.")
|
|
65
|
+
y2m_parser.add_argument("--output-mask", type=str, help="Output path for single binary mask png.")
|
|
66
|
+
|
|
67
|
+
y2m_parser.add_argument("--label-dir", type=str, help="Directory containing YOLO label txt files.")
|
|
68
|
+
y2m_parser.add_argument("--output-dir", type=str, help="Directory to save generated binary mask png files.")
|
|
69
|
+
|
|
70
|
+
y2m_parser.add_argument(
|
|
71
|
+
"--resize",
|
|
72
|
+
type=int,
|
|
73
|
+
nargs=2,
|
|
74
|
+
default=[640, 640],
|
|
75
|
+
metavar=("WIDTH", "HEIGHT"),
|
|
76
|
+
help="Output mask dimensions (default: 640 640).",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# subcommand: split
|
|
80
|
+
split_parser = subparsers.add_parser(
|
|
81
|
+
"split", help="Randomly split a dataset of images and labels into train/test subfolders."
|
|
82
|
+
)
|
|
83
|
+
split_parser.add_argument("--images", type=str, required=True, help="Directory of source images.")
|
|
84
|
+
split_parser.add_argument("--labels", type=str, required=True, help="Directory of source YOLO label txt files.")
|
|
85
|
+
split_parser.add_argument("--output", type=str, required=True, help="Root directory for split dataset outputs.")
|
|
86
|
+
split_parser.add_argument(
|
|
87
|
+
"--ratio", type=float, default=0.8, help="Split ratio for training partition (default: 0.8)."
|
|
88
|
+
)
|
|
89
|
+
split_parser.add_argument("--seed", type=int, default=42, help="Seed value for reproduction (default: 42).")
|
|
90
|
+
|
|
91
|
+
# subcommand: visualize
|
|
92
|
+
vis_parser = subparsers.add_parser(
|
|
93
|
+
"visualize", help="Draw bounding boxes from a YOLO label file onto the source image."
|
|
94
|
+
)
|
|
95
|
+
vis_parser.add_argument("--image", type=str, required=True, help="Path to the source image.")
|
|
96
|
+
vis_parser.add_argument("--label", type=str, required=True, help="Path to the YOLO label file.")
|
|
97
|
+
vis_parser.add_argument("--output", type=str, required=True, help="Path to save output visualization image.")
|
|
98
|
+
vis_parser.add_argument(
|
|
99
|
+
"--resize",
|
|
100
|
+
type=int,
|
|
101
|
+
nargs=2,
|
|
102
|
+
default=[640, 640],
|
|
103
|
+
metavar=("WIDTH", "HEIGHT"),
|
|
104
|
+
help="Resize image for visualization (default: 640 640).",
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
return parser.parse_args(args)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def main(args: Optional[List[str]] = None) -> int:
|
|
111
|
+
"""
|
|
112
|
+
Main entry point for command-line execution.
|
|
113
|
+
"""
|
|
114
|
+
if args is None:
|
|
115
|
+
args = sys.argv[1:]
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
parsed = parse_args(args)
|
|
119
|
+
|
|
120
|
+
if parsed.command == "mask-to-yolo":
|
|
121
|
+
converter = MaskToYoloConverter(
|
|
122
|
+
target_size=(parsed.resize[0], parsed.resize[1]),
|
|
123
|
+
bbox_type="rotated" if parsed.rotated else "standard",
|
|
124
|
+
)
|
|
125
|
+
# Check if processing single file or folder
|
|
126
|
+
if parsed.image or parsed.mask or parsed.output_txt:
|
|
127
|
+
if not (parsed.image and parsed.mask and parsed.output_txt):
|
|
128
|
+
logger.error("Error: --image, --mask, and --output-txt must all be specified for single conversion.")
|
|
129
|
+
return 1
|
|
130
|
+
success = converter.convert_single(
|
|
131
|
+
parsed.image, parsed.mask, parsed.output_txt, class_id=parsed.class_id
|
|
132
|
+
)
|
|
133
|
+
return 0 if success else 1
|
|
134
|
+
elif parsed.image_dir or parsed.mask_dir or parsed.output_dir:
|
|
135
|
+
if not (parsed.image_dir and parsed.mask_dir and parsed.output_dir):
|
|
136
|
+
logger.error("Error: --image-dir, --mask-dir, and --output-dir must all be specified for folder conversion.")
|
|
137
|
+
return 1
|
|
138
|
+
converter.convert_dataset(
|
|
139
|
+
parsed.image_dir,
|
|
140
|
+
parsed.mask_dir,
|
|
141
|
+
parsed.output_dir,
|
|
142
|
+
default_class_id=parsed.class_id,
|
|
143
|
+
ground_truth=parsed.ground_truth,
|
|
144
|
+
)
|
|
145
|
+
return 0
|
|
146
|
+
else:
|
|
147
|
+
logger.error("Error: Must specify either single file arguments (--image, --mask, --output-txt) or directory arguments (--image-dir, --mask-dir, --output-dir).")
|
|
148
|
+
return 1
|
|
149
|
+
|
|
150
|
+
elif parsed.command == "yolo-to-mask":
|
|
151
|
+
converter = YoloToMaskConverter(target_size=(parsed.resize[0], parsed.resize[1]))
|
|
152
|
+
if parsed.label or parsed.output_mask:
|
|
153
|
+
if not (parsed.label and parsed.output_mask):
|
|
154
|
+
logger.error("Error: Both --label and --output-mask must be specified for single conversion.")
|
|
155
|
+
return 1
|
|
156
|
+
success = converter.convert_single(parsed.label, parsed.output_mask)
|
|
157
|
+
return 0 if success else 1
|
|
158
|
+
elif parsed.label_dir or parsed.output_dir:
|
|
159
|
+
if not (parsed.label_dir and parsed.output_dir):
|
|
160
|
+
logger.error("Error: Both --label-dir and --output-dir must be specified for folder conversion.")
|
|
161
|
+
return 1
|
|
162
|
+
converter.convert_dataset(parsed.label_dir, parsed.output_dir)
|
|
163
|
+
return 0
|
|
164
|
+
else:
|
|
165
|
+
logger.error("Error: Must specify either single file arguments (--label, --output-mask) or directory arguments (--label-dir, --output-dir).")
|
|
166
|
+
return 1
|
|
167
|
+
|
|
168
|
+
elif parsed.command == "split":
|
|
169
|
+
converter = MaskToYoloConverter()
|
|
170
|
+
converter.split_dataset(
|
|
171
|
+
images_dir=parsed.images,
|
|
172
|
+
labels_dir=parsed.labels,
|
|
173
|
+
output_dataset_dir=parsed.output,
|
|
174
|
+
split_ratio=parsed.ratio,
|
|
175
|
+
seed=parsed.seed,
|
|
176
|
+
)
|
|
177
|
+
return 0
|
|
178
|
+
|
|
179
|
+
elif parsed.command == "visualize":
|
|
180
|
+
converter = YoloToMaskConverter(target_size=(parsed.resize[0], parsed.resize[1]))
|
|
181
|
+
success = converter.visualize_label(parsed.image, parsed.label, parsed.output)
|
|
182
|
+
return 0 if success else 1
|
|
183
|
+
|
|
184
|
+
except Exception as e:
|
|
185
|
+
logger.error(f"Execution failed: {str(e)}")
|
|
186
|
+
return 1
|
|
187
|
+
|
|
188
|
+
return 0
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
if __name__ == "__main__":
|
|
192
|
+
sys.exit(main())
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Helper utilities for segment_toolkit.
|
|
3
|
+
Provides image preprocessing, contour extraction, bounding box computation,
|
|
4
|
+
coordinate normalization, and visualization helpers.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from typing import Tuple, Optional, Union
|
|
9
|
+
import cv2
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def safe_read_image(path: Union[str, os.PathLike]) -> np.ndarray:
|
|
14
|
+
"""
|
|
15
|
+
Safely reads an image using OpenCV and handles errors.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
path: Path to the image file.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
np.ndarray: The loaded image array.
|
|
22
|
+
|
|
23
|
+
Raises:
|
|
24
|
+
FileNotFoundError: If the file does not exist.
|
|
25
|
+
ValueError: If the file is not a valid image.
|
|
26
|
+
"""
|
|
27
|
+
path_str = str(path)
|
|
28
|
+
if not os.path.exists(path_str):
|
|
29
|
+
raise FileNotFoundError(f"Image file not found: {path_str}")
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
image = cv2.imread(path_str)
|
|
33
|
+
if image is None:
|
|
34
|
+
raise ValueError(f"Failed to decode image: {path_str}")
|
|
35
|
+
return image
|
|
36
|
+
except Exception as e:
|
|
37
|
+
raise ValueError(f"Error reading image {path_str}: {str(e)}") from e
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def preprocess_image(image: np.ndarray, target_size: Tuple[int, int], is_mask: bool = False) -> np.ndarray:
|
|
41
|
+
"""
|
|
42
|
+
Resizes image/mask to target size.
|
|
43
|
+
Uses linear interpolation for images and nearest-neighbor for masks to keep values binary.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
image: Input image array.
|
|
47
|
+
target_size: Target (width, height).
|
|
48
|
+
is_mask: Flag indicating if input is a binary mask.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
np.ndarray: The preprocessed image/mask.
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
interpolation = cv2.INTER_NEAREST if is_mask else cv2.INTER_LINEAR
|
|
55
|
+
resized = cv2.resize(image, target_size, interpolation=interpolation)
|
|
56
|
+
return resized
|
|
57
|
+
except Exception as e:
|
|
58
|
+
raise RuntimeError(f"Failed to preprocess/resize image: {str(e)}") from e
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_largest_contour(mask: np.ndarray) -> Optional[np.ndarray]:
|
|
62
|
+
"""
|
|
63
|
+
Finds the contours of the binary mask and returns the largest one by area.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
mask: Grayscale binary mask array.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Optional[np.ndarray]: The largest contour array, or None if no contours are found.
|
|
70
|
+
"""
|
|
71
|
+
try:
|
|
72
|
+
# Ensure mask is grayscale / single channel
|
|
73
|
+
if len(mask.shape) == 3:
|
|
74
|
+
mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
|
|
75
|
+
|
|
76
|
+
# Threshold to ensure binary mask
|
|
77
|
+
_, binary = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
|
|
78
|
+
|
|
79
|
+
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
|
80
|
+
if not contours:
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
# Return the contour with maximum area
|
|
84
|
+
return max(contours, key=cv2.contourArea)
|
|
85
|
+
except Exception as e:
|
|
86
|
+
raise RuntimeError(f"Error extracting contour from mask: {str(e)}") from e
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def calculate_bbox(contour: np.ndarray, bbox_type: str = "standard") -> Tuple[float, float, float, float]:
|
|
90
|
+
"""
|
|
91
|
+
Calculates center-based bounding box from a contour.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
contour: Contour points array.
|
|
95
|
+
bbox_type: Type of bounding box, either 'standard' or 'rotated'.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Tuple[float, float, float, float]: (x_center, y_center, w, h) in pixel coordinates.
|
|
99
|
+
"""
|
|
100
|
+
try:
|
|
101
|
+
if bbox_type == "rotated":
|
|
102
|
+
# minAreaRect returns ((center_x, center_y), (width, height), angle)
|
|
103
|
+
rect = cv2.minAreaRect(contour)
|
|
104
|
+
(x_center, y_center), (w, h), _ = rect
|
|
105
|
+
return float(x_center), float(y_center), float(w), float(h)
|
|
106
|
+
else:
|
|
107
|
+
# boundingRect returns (x_min, y_min, w, h)
|
|
108
|
+
x_min, y_min, w, h = cv2.boundingRect(contour)
|
|
109
|
+
x_center = x_min + w / 2.0
|
|
110
|
+
y_center = y_min + h / 2.0
|
|
111
|
+
return float(x_center), float(y_center), float(w), float(h)
|
|
112
|
+
except Exception as e:
|
|
113
|
+
raise RuntimeError(f"Error calculating bounding box: {str(e)}") from e
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def normalize_coordinates(
|
|
117
|
+
x: float, y: float, w: float, h: float, img_width: int, img_height: int
|
|
118
|
+
) -> Tuple[float, float, float, float]:
|
|
119
|
+
"""
|
|
120
|
+
Normalizes pixel bounding box coordinates to [0, 1] relative to image dimensions.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
x: Center x pixel.
|
|
124
|
+
y: Center y pixel.
|
|
125
|
+
w: Width pixel.
|
|
126
|
+
h: Height pixel.
|
|
127
|
+
img_width: Image width.
|
|
128
|
+
img_height: Image height.
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
Tuple[float, float, float, float]: (x_norm, y_norm, w_norm, h_norm).
|
|
132
|
+
"""
|
|
133
|
+
x_norm = x / img_width
|
|
134
|
+
y_norm = y / img_height
|
|
135
|
+
w_norm = w / img_width
|
|
136
|
+
h_norm = h / img_height
|
|
137
|
+
|
|
138
|
+
# Clamp to [0, 1] to stay within image bounds
|
|
139
|
+
x_norm = max(0.0, min(1.0, x_norm))
|
|
140
|
+
y_norm = max(0.0, min(1.0, y_norm))
|
|
141
|
+
w_norm = max(0.0, min(1.0, w_norm))
|
|
142
|
+
h_norm = max(0.0, min(1.0, h_norm))
|
|
143
|
+
|
|
144
|
+
return x_norm, y_norm, w_norm, h_norm
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def denormalize_coordinates(
|
|
148
|
+
x_norm: float, y_norm: float, w_norm: float, h_norm: float, img_width: int, img_height: int
|
|
149
|
+
) -> Tuple[int, int, int, int]:
|
|
150
|
+
"""
|
|
151
|
+
Converts normalized center-based coordinates to pixel-space top-left and bottom-right corners.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
x_norm: Normalized center x.
|
|
155
|
+
y_norm: Normalized center y.
|
|
156
|
+
w_norm: Normalized width.
|
|
157
|
+
h_norm: Normalized height.
|
|
158
|
+
img_width: Target image width.
|
|
159
|
+
img_height: Target image height.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Tuple[int, int, int, int]: (x1, y1, x2, y2) pixel corners.
|
|
163
|
+
"""
|
|
164
|
+
x_center = x_norm * img_width
|
|
165
|
+
y_center = y_norm * img_height
|
|
166
|
+
w = w_norm * img_width
|
|
167
|
+
h = h_norm * img_height
|
|
168
|
+
|
|
169
|
+
x1 = int(x_center - w / 2)
|
|
170
|
+
y1 = int(y_center - h / 2)
|
|
171
|
+
x2 = int(x_center + w / 2)
|
|
172
|
+
y2 = int(y_center + h / 2)
|
|
173
|
+
|
|
174
|
+
# Clamp coordinates to image boundaries
|
|
175
|
+
x1 = max(0, min(img_width - 1, x1))
|
|
176
|
+
y1 = max(0, min(img_height - 1, y1))
|
|
177
|
+
x2 = max(0, min(img_width - 1, x2))
|
|
178
|
+
y2 = max(0, min(img_height - 1, y2))
|
|
179
|
+
|
|
180
|
+
return x1, y1, x2, y2
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def draw_bbox_on_image(
|
|
184
|
+
image: np.ndarray, bbox: Tuple[int, int, int, int], color: Tuple[int, int, int] = (0, 255, 255), thickness: int = 2
|
|
185
|
+
) -> np.ndarray:
|
|
186
|
+
"""
|
|
187
|
+
Draws a bounding box on the image.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
image: Image array.
|
|
191
|
+
bbox: (x1, y1, x2, y2) pixel coordinates.
|
|
192
|
+
color: Box color in BGR/RGB (default: yellow).
|
|
193
|
+
thickness: Box border thickness.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
np.ndarray: Image with box drawn.
|
|
197
|
+
"""
|
|
198
|
+
x1, y1, x2, y2 = bbox
|
|
199
|
+
cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness)
|
|
200
|
+
return image
|
|
201
|
+
|
|
@@ -0,0 +1,566 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core conversion classes for segment_toolkit.
|
|
3
|
+
Includes MaskToYoloConverter and YoloToMaskConverter.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import shutil
|
|
8
|
+
import random
|
|
9
|
+
import logging
|
|
10
|
+
from typing import Tuple, Optional, Dict, List, Union
|
|
11
|
+
import numpy as np
|
|
12
|
+
import pandas as pd
|
|
13
|
+
from PIL import Image
|
|
14
|
+
import cv2
|
|
15
|
+
|
|
16
|
+
from .helpers import (
|
|
17
|
+
safe_read_image,
|
|
18
|
+
preprocess_image,
|
|
19
|
+
get_largest_contour,
|
|
20
|
+
calculate_bbox,
|
|
21
|
+
normalize_coordinates,
|
|
22
|
+
denormalize_coordinates,
|
|
23
|
+
draw_bbox_on_image,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MaskToYoloConverter:
|
|
30
|
+
"""
|
|
31
|
+
Converts binary segmentation masks into YOLO bounding box labels.
|
|
32
|
+
Supports single file conversion, full dataset directory batch conversion,
|
|
33
|
+
ground truth CSV classification mapping, and train/test dataset splitting.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, target_size: Tuple[int, int] = (640, 640), bbox_type: str = "standard"):
|
|
37
|
+
"""
|
|
38
|
+
Initialize the converter.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
target_size: Dimensions (width, height) to resize images and masks to.
|
|
42
|
+
bbox_type: Bounding box style, either 'standard' or 'rotated'.
|
|
43
|
+
"""
|
|
44
|
+
self.target_size = target_size
|
|
45
|
+
self.bbox_type = bbox_type
|
|
46
|
+
|
|
47
|
+
def convert_single(
|
|
48
|
+
self, image_path: str, mask_path: str, output_txt_path: str, class_id: int = 0
|
|
49
|
+
) -> bool:
|
|
50
|
+
"""
|
|
51
|
+
Convert a single image and mask pair into a YOLO bounding box label file.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
image_path: Path to the input image.
|
|
55
|
+
mask_path: Path to the input binary mask.
|
|
56
|
+
output_txt_path: Path to save the output YOLO text file.
|
|
57
|
+
class_id: The class ID to write to the label file.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
bool: True if successful, False otherwise.
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
# Read image to obtain original shape
|
|
64
|
+
image = safe_read_image(image_path)
|
|
65
|
+
orig_h, orig_w = image.shape[:2]
|
|
66
|
+
|
|
67
|
+
# Read and process mask
|
|
68
|
+
mask = safe_read_image(mask_path)
|
|
69
|
+
|
|
70
|
+
# Preprocess image and mask to the target size
|
|
71
|
+
resized_image = preprocess_image(image, self.target_size, is_mask=False)
|
|
72
|
+
resized_mask = preprocess_image(mask, self.target_size, is_mask=True)
|
|
73
|
+
|
|
74
|
+
# Get contour from the resized mask
|
|
75
|
+
contour = get_largest_contour(resized_mask)
|
|
76
|
+
if contour is None:
|
|
77
|
+
logger.warning(f"No contour/objects found in mask: {mask_path}")
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
# Calculate bbox on resized coordinates
|
|
81
|
+
x_center, y_center, w, h = calculate_bbox(contour, bbox_type=self.bbox_type)
|
|
82
|
+
|
|
83
|
+
# Normalize coordinates using the target dimensions
|
|
84
|
+
x_norm, y_norm, w_norm, h_norm = normalize_coordinates(
|
|
85
|
+
x_center, y_center, w, h, self.target_size[0], self.target_size[1]
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Ensure output directory exists
|
|
89
|
+
os.makedirs(os.path.dirname(os.path.abspath(output_txt_path)), exist_ok=True)
|
|
90
|
+
|
|
91
|
+
# Write to YOLO label format (class_id center_x center_y width height)
|
|
92
|
+
# Use 'w' to overwrite or write fresh, or 'a+' depending on use case.
|
|
93
|
+
# Here we write single bounding box per file, so 'w' is appropriate.
|
|
94
|
+
with open(output_txt_path, "w", encoding="utf-8") as f:
|
|
95
|
+
f.write(f"{class_id} {x_norm:.6f} {y_norm:.6f} {w_norm:.6f} {h_norm:.6f}\n")
|
|
96
|
+
|
|
97
|
+
logger.info(f"Successfully generated label: {output_txt_path}")
|
|
98
|
+
return True
|
|
99
|
+
|
|
100
|
+
except Exception as e:
|
|
101
|
+
logger.error(f"Failed to convert mask {mask_path} to YOLO label: {str(e)}")
|
|
102
|
+
return False
|
|
103
|
+
|
|
104
|
+
def _load_ground_truth(self, file_path: str) -> Dict[str, int]:
|
|
105
|
+
"""
|
|
106
|
+
Parses a CSV or JSON file and maps image filename/ID to class ID (int).
|
|
107
|
+
"""
|
|
108
|
+
import json
|
|
109
|
+
class_lookup = {}
|
|
110
|
+
isic_mapping = {
|
|
111
|
+
"AKIEC": 0,
|
|
112
|
+
"BCC": 1,
|
|
113
|
+
"BKL": 2,
|
|
114
|
+
"DF": 3,
|
|
115
|
+
"MEL": 4,
|
|
116
|
+
"NV": 5,
|
|
117
|
+
"VASC": 6,
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
# Helper to extract class ID from nested indicators
|
|
121
|
+
def get_class_id_from_indicators(indicators: dict) -> int:
|
|
122
|
+
for col, val in indicators.items():
|
|
123
|
+
if val == 1 or val == 1.0 or str(val).strip() == "1":
|
|
124
|
+
if col in isic_mapping:
|
|
125
|
+
return isic_mapping[col]
|
|
126
|
+
for key in ["class_id", "class", "label"]:
|
|
127
|
+
if key in indicators:
|
|
128
|
+
v = indicators[key]
|
|
129
|
+
if isinstance(v, int):
|
|
130
|
+
return v
|
|
131
|
+
elif str(v).isdigit():
|
|
132
|
+
return int(v)
|
|
133
|
+
elif str(v) in isic_mapping:
|
|
134
|
+
return isic_mapping[str(v)]
|
|
135
|
+
return 0
|
|
136
|
+
|
|
137
|
+
if file_path.endswith(".json"):
|
|
138
|
+
logger.info(f"Parsing ground truth metadata JSON: {file_path}")
|
|
139
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
140
|
+
data = json.load(f)
|
|
141
|
+
|
|
142
|
+
if isinstance(data, dict):
|
|
143
|
+
for img_id, val in data.items():
|
|
144
|
+
img_id = str(img_id).strip()
|
|
145
|
+
if isinstance(val, dict):
|
|
146
|
+
class_lookup[img_id] = get_class_id_from_indicators(val)
|
|
147
|
+
elif isinstance(val, int):
|
|
148
|
+
class_lookup[img_id] = val
|
|
149
|
+
elif str(val).isdigit():
|
|
150
|
+
class_lookup[img_id] = int(val)
|
|
151
|
+
elif str(val) in isic_mapping:
|
|
152
|
+
class_lookup[img_id] = isic_mapping[str(val)]
|
|
153
|
+
else:
|
|
154
|
+
class_lookup[img_id] = 0
|
|
155
|
+
elif isinstance(data, list):
|
|
156
|
+
for item in data:
|
|
157
|
+
if not isinstance(item, dict):
|
|
158
|
+
continue
|
|
159
|
+
keys = list(item.keys())
|
|
160
|
+
if not keys:
|
|
161
|
+
continue
|
|
162
|
+
id_key = keys[0]
|
|
163
|
+
for k in keys:
|
|
164
|
+
if any(x in k.lower() for x in ["image", "file", "name", "id"]):
|
|
165
|
+
id_key = k
|
|
166
|
+
break
|
|
167
|
+
img_id = str(item[id_key]).strip()
|
|
168
|
+
remaining = {k: v for k, v in item.items() if k != id_key}
|
|
169
|
+
class_lookup[img_id] = get_class_id_from_indicators(remaining)
|
|
170
|
+
else:
|
|
171
|
+
logger.info(f"Parsing ground truth metadata CSV: {file_path}")
|
|
172
|
+
df = pd.read_csv(file_path)
|
|
173
|
+
id_col = df.columns[0]
|
|
174
|
+
class_cols = list(df.columns[1:])
|
|
175
|
+
|
|
176
|
+
col_to_class = {}
|
|
177
|
+
for col in class_cols:
|
|
178
|
+
if col in isic_mapping:
|
|
179
|
+
col_to_class[col] = isic_mapping[col]
|
|
180
|
+
else:
|
|
181
|
+
col_to_class[col] = len(col_to_class)
|
|
182
|
+
|
|
183
|
+
for _, row in df.iterrows():
|
|
184
|
+
img_id = str(row[id_col]).strip()
|
|
185
|
+
for col in class_cols:
|
|
186
|
+
if row[col] == 1 or row[col] == 1.0 or str(row[col]).strip() == "1":
|
|
187
|
+
class_lookup[img_id] = col_to_class[col]
|
|
188
|
+
break
|
|
189
|
+
return class_lookup
|
|
190
|
+
|
|
191
|
+
def convert_dataset(
|
|
192
|
+
self,
|
|
193
|
+
images_dir: str,
|
|
194
|
+
masks_dir: str,
|
|
195
|
+
output_labels_dir: str,
|
|
196
|
+
default_class_id: int = 0,
|
|
197
|
+
ground_truth: Optional[str] = None,
|
|
198
|
+
) -> int:
|
|
199
|
+
"""
|
|
200
|
+
Batch convert a folder of masks into YOLO labels.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
images_dir: Directory containing original images.
|
|
204
|
+
masks_dir: Directory containing binary masks.
|
|
205
|
+
output_labels_dir: Directory where YOLO labels will be written.
|
|
206
|
+
default_class_id: Class ID to use if no ground truth metadata is provided or matches.
|
|
207
|
+
ground_truth: Optional path to GroundTruth CSV or JSON mapping filenames to classes.
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
int: Number of successfully generated label files.
|
|
211
|
+
"""
|
|
212
|
+
try:
|
|
213
|
+
if not os.path.exists(images_dir):
|
|
214
|
+
raise FileNotFoundError(f"Images directory not found: {images_dir}")
|
|
215
|
+
if not os.path.exists(masks_dir):
|
|
216
|
+
raise FileNotFoundError(f"Masks directory not found: {masks_dir}")
|
|
217
|
+
|
|
218
|
+
os.makedirs(output_labels_dir, exist_ok=True)
|
|
219
|
+
|
|
220
|
+
# Parse Ground Truth CSV or JSON if provided
|
|
221
|
+
class_lookup = {}
|
|
222
|
+
if ground_truth:
|
|
223
|
+
class_lookup = self._load_ground_truth(ground_truth)
|
|
224
|
+
|
|
225
|
+
# Process files
|
|
226
|
+
success_count = 0
|
|
227
|
+
image_files = [
|
|
228
|
+
f
|
|
229
|
+
for f in os.listdir(images_dir)
|
|
230
|
+
if os.path.isfile(os.path.join(images_dir, f))
|
|
231
|
+
and f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp"))
|
|
232
|
+
]
|
|
233
|
+
|
|
234
|
+
logger.info(f"Found {len(image_files)} images to convert.")
|
|
235
|
+
|
|
236
|
+
for img_file in image_files:
|
|
237
|
+
base_name, ext = os.path.splitext(img_file)
|
|
238
|
+
|
|
239
|
+
# Attempt to find the mask file with common extension variants
|
|
240
|
+
mask_file = None
|
|
241
|
+
for mask_ext in [".png", ".jpg", ".jpeg", "_segmentation.png", "_mask.png"]:
|
|
242
|
+
# Try matching base_name directly or with suffixes
|
|
243
|
+
candidate1 = os.path.join(masks_dir, f"{base_name}{mask_ext}")
|
|
244
|
+
candidate2 = os.path.join(masks_dir, f"{base_name}_segmentation{mask_ext}")
|
|
245
|
+
candidate3 = os.path.join(masks_dir, f"{base_name.replace('_segmentation', '')}{mask_ext}")
|
|
246
|
+
|
|
247
|
+
if os.path.exists(candidate1):
|
|
248
|
+
mask_file = candidate1
|
|
249
|
+
break
|
|
250
|
+
elif os.path.exists(candidate2):
|
|
251
|
+
mask_file = candidate2
|
|
252
|
+
break
|
|
253
|
+
elif os.path.exists(candidate3):
|
|
254
|
+
mask_file = candidate3
|
|
255
|
+
break
|
|
256
|
+
|
|
257
|
+
# If no direct match, check if image_file base name is related to any mask files
|
|
258
|
+
if not mask_file:
|
|
259
|
+
# Search directory
|
|
260
|
+
for m in os.listdir(masks_dir):
|
|
261
|
+
if m.startswith(base_name) and m.lower().endswith((".png", ".jpg", ".jpeg")):
|
|
262
|
+
mask_file = os.path.join(masks_dir, m)
|
|
263
|
+
break
|
|
264
|
+
|
|
265
|
+
if not mask_file:
|
|
266
|
+
logger.warning(f"No matching mask found for image: {img_file}")
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
# Determine Class ID
|
|
270
|
+
class_id = default_class_id
|
|
271
|
+
if ground_truth:
|
|
272
|
+
# Match exact base name or try parts of the prefix
|
|
273
|
+
if base_name in class_lookup:
|
|
274
|
+
class_id = class_lookup[base_name]
|
|
275
|
+
else:
|
|
276
|
+
# Try prefix mapping (e.g. if CSV/JSON has ISIC_0024310 but filename is ISIC_0024310.jpg)
|
|
277
|
+
matched = False
|
|
278
|
+
for k, v in class_lookup.items():
|
|
279
|
+
if base_name.startswith(k) or k.startswith(base_name):
|
|
280
|
+
class_id = v
|
|
281
|
+
matched = True
|
|
282
|
+
break
|
|
283
|
+
if not matched:
|
|
284
|
+
logger.debug(f"Metadata not found in CSV/JSON for {base_name}. Using default Class ID {default_class_id}")
|
|
285
|
+
|
|
286
|
+
output_txt = os.path.join(output_labels_dir, f"{base_name}.txt")
|
|
287
|
+
if self.convert_single(
|
|
288
|
+
os.path.join(images_dir, img_file), mask_file, output_txt, class_id=class_id
|
|
289
|
+
):
|
|
290
|
+
success_count += 1
|
|
291
|
+
|
|
292
|
+
logger.info(f"Dataset conversion completed. Generated {success_count} label files.")
|
|
293
|
+
return success_count
|
|
294
|
+
|
|
295
|
+
except Exception as e:
|
|
296
|
+
logger.error(f"Error during dataset batch conversion: {str(e)}")
|
|
297
|
+
raise
|
|
298
|
+
|
|
299
|
+
def split_dataset(
|
|
300
|
+
self,
|
|
301
|
+
images_dir: str,
|
|
302
|
+
labels_dir: str,
|
|
303
|
+
output_dataset_dir: str,
|
|
304
|
+
split_ratio: float = 0.8,
|
|
305
|
+
seed: int = 42,
|
|
306
|
+
) -> Dict[str, int]:
|
|
307
|
+
"""
|
|
308
|
+
Splits images and labels into train/test splits under the output directory.
|
|
309
|
+
Generates standard YOLO data.yaml configuration.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
images_dir: Directory containing images.
|
|
313
|
+
labels_dir: Directory containing generated text labels.
|
|
314
|
+
output_dataset_dir: Root output directory for structured dataset.
|
|
315
|
+
split_ratio: Percentage of data allocated for training (default: 0.8).
|
|
316
|
+
seed: Random seed for reproducibility.
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
Dict[str, int]: Dictionary with counts of train and test images.
|
|
320
|
+
"""
|
|
321
|
+
try:
|
|
322
|
+
random.seed(seed)
|
|
323
|
+
|
|
324
|
+
if not os.path.exists(images_dir):
|
|
325
|
+
raise FileNotFoundError(f"Images directory not found: {images_dir}")
|
|
326
|
+
if not os.path.exists(labels_dir):
|
|
327
|
+
raise FileNotFoundError(f"Labels directory not found: {labels_dir}")
|
|
328
|
+
|
|
329
|
+
# Collect valid image files that have matching label files
|
|
330
|
+
valid_pairs = []
|
|
331
|
+
image_files = os.listdir(images_dir)
|
|
332
|
+
for img_file in image_files:
|
|
333
|
+
base_name, _ = os.path.splitext(img_file)
|
|
334
|
+
label_file = f"{base_name}.txt"
|
|
335
|
+
if os.path.exists(os.path.join(labels_dir, label_file)):
|
|
336
|
+
valid_pairs.append((img_file, label_file))
|
|
337
|
+
else:
|
|
338
|
+
logger.warning(f"Skipping split for {img_file}: label file {label_file} not found.")
|
|
339
|
+
|
|
340
|
+
if not valid_pairs:
|
|
341
|
+
raise ValueError("No matching image and label file pairs found to split.")
|
|
342
|
+
|
|
343
|
+
random.shuffle(valid_pairs)
|
|
344
|
+
split_idx = int(len(valid_pairs) * split_ratio)
|
|
345
|
+
train_pairs = valid_pairs[:split_idx]
|
|
346
|
+
test_pairs = valid_pairs[split_idx:]
|
|
347
|
+
|
|
348
|
+
# Establish folders
|
|
349
|
+
for split in ["train", "test"]:
|
|
350
|
+
os.makedirs(os.path.join(output_dataset_dir, split, "images"), exist_ok=True)
|
|
351
|
+
os.makedirs(os.path.join(output_dataset_dir, split, "labels"), exist_ok=True)
|
|
352
|
+
|
|
353
|
+
def copy_pairs(pairs: List[Tuple[str, str]], split: str):
|
|
354
|
+
for img, lbl in pairs:
|
|
355
|
+
shutil.copy2(os.path.join(images_dir, img), os.path.join(output_dataset_dir, split, "images", img))
|
|
356
|
+
shutil.copy2(os.path.join(labels_dir, lbl), os.path.join(output_dataset_dir, split, "labels", lbl))
|
|
357
|
+
|
|
358
|
+
copy_pairs(train_pairs, "train")
|
|
359
|
+
copy_pairs(test_pairs, "test")
|
|
360
|
+
|
|
361
|
+
# Collect unique classes to build configuration yaml
|
|
362
|
+
# In YOLO format, we can read classes to set nc and names automatically
|
|
363
|
+
unique_classes = set()
|
|
364
|
+
for _, lbl in valid_pairs:
|
|
365
|
+
try:
|
|
366
|
+
with open(os.path.join(labels_dir, lbl), "r", encoding="utf-8") as f:
|
|
367
|
+
for line in f:
|
|
368
|
+
parts = line.strip().split()
|
|
369
|
+
if parts:
|
|
370
|
+
unique_classes.add(int(parts[0]))
|
|
371
|
+
except Exception:
|
|
372
|
+
pass
|
|
373
|
+
|
|
374
|
+
num_classes = max(unique_classes) + 1 if unique_classes else 1
|
|
375
|
+
# Standard class names
|
|
376
|
+
class_names = [f"class_{i}" for i in range(num_classes)]
|
|
377
|
+
# If standard ISIC classes, rename them
|
|
378
|
+
if num_classes == 7:
|
|
379
|
+
class_names = ["AKIEC", "BCC", "BKL", "DF", "MEL", "NV", "VASC"]
|
|
380
|
+
|
|
381
|
+
yaml_path = os.path.join(output_dataset_dir, "data.yaml")
|
|
382
|
+
with open(yaml_path, "w", encoding="utf-8") as yaml_file:
|
|
383
|
+
yaml_file.write(f"train: ./train/images\n")
|
|
384
|
+
yaml_file.write(f"val: ./test/images\n")
|
|
385
|
+
yaml_file.write(f"test: ./test/images\n\n")
|
|
386
|
+
yaml_file.write(f"nc: {num_classes}\n")
|
|
387
|
+
yaml_file.write(f"names: {class_names}\n")
|
|
388
|
+
|
|
389
|
+
logger.info(f"Splitting done. Train: {len(train_pairs)}, Test: {len(test_pairs)}.")
|
|
390
|
+
return {"train": len(train_pairs), "test": len(test_pairs)}
|
|
391
|
+
|
|
392
|
+
except Exception as e:
|
|
393
|
+
logger.error(f"Error splitting dataset: {str(e)}")
|
|
394
|
+
raise
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
class YoloToMaskConverter:
|
|
398
|
+
"""
|
|
399
|
+
Converts YOLO format bounding box label files back into binary segmentation masks.
|
|
400
|
+
Supports single file generation, batch directory conversion, and bbox overlay visualization.
|
|
401
|
+
"""
|
|
402
|
+
|
|
403
|
+
def __init__(self, target_size: Tuple[int, int] = (640, 640)):
|
|
404
|
+
"""
|
|
405
|
+
Initialize the converter.
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
target_size: Target mask dimensions (width, height) to generate.
|
|
409
|
+
"""
|
|
410
|
+
self.target_size = target_size
|
|
411
|
+
|
|
412
|
+
def convert_single(self, label_path: str, output_mask_path: str) -> bool:
|
|
413
|
+
"""
|
|
414
|
+
Create a binary mask image from a YOLO label file.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
label_path: Path to the input YOLO label text file.
|
|
418
|
+
output_mask_path: Path to save the generated PNG mask.
|
|
419
|
+
|
|
420
|
+
Returns:
|
|
421
|
+
bool: True if successful, False otherwise.
|
|
422
|
+
"""
|
|
423
|
+
try:
|
|
424
|
+
if not os.path.exists(label_path):
|
|
425
|
+
raise FileNotFoundError(f"Label file not found: {label_path}")
|
|
426
|
+
|
|
427
|
+
# Create empty black canvas mask
|
|
428
|
+
mask = np.zeros((self.target_size[1], self.target_size[0]), dtype=np.uint8)
|
|
429
|
+
img_w, img_h = self.target_size
|
|
430
|
+
|
|
431
|
+
has_drawn = False
|
|
432
|
+
with open(label_path, "r", encoding="utf-8") as f:
|
|
433
|
+
for line in f:
|
|
434
|
+
line = line.strip()
|
|
435
|
+
if not line:
|
|
436
|
+
continue
|
|
437
|
+
parts = line.split()
|
|
438
|
+
if len(parts) != 5:
|
|
439
|
+
logger.warning(f"Malformed label line in {label_path}: '{line}'")
|
|
440
|
+
continue
|
|
441
|
+
|
|
442
|
+
# Parse values
|
|
443
|
+
class_id = int(parts[0])
|
|
444
|
+
x_norm, y_norm, w_norm, h_norm = map(float, parts[1:])
|
|
445
|
+
|
|
446
|
+
# Denormalize coordinates to top-left and bottom-right corners
|
|
447
|
+
x1, y1, x2, y2 = denormalize_coordinates(
|
|
448
|
+
x_norm, y_norm, w_norm, h_norm, img_w, img_h
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
# Draw solid rectangle on mask canvas
|
|
452
|
+
if x1 < x2 and y1 < y2:
|
|
453
|
+
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, thickness=-1)
|
|
454
|
+
has_drawn = True
|
|
455
|
+
else:
|
|
456
|
+
logger.warning(f"Invalid bounding box in {label_path}: ({x1}, {y1}) to ({x2}, {y2})")
|
|
457
|
+
|
|
458
|
+
# Save mask file
|
|
459
|
+
os.makedirs(os.path.dirname(os.path.abspath(output_mask_path)), exist_ok=True)
|
|
460
|
+
mask_img = Image.fromarray(mask)
|
|
461
|
+
mask_img.save(output_mask_path)
|
|
462
|
+
|
|
463
|
+
if not has_drawn:
|
|
464
|
+
logger.warning(f"No bounding boxes were drawn to mask: {output_mask_path}")
|
|
465
|
+
|
|
466
|
+
logger.info(f"Successfully generated mask: {output_mask_path}")
|
|
467
|
+
return True
|
|
468
|
+
|
|
469
|
+
except Exception as e:
|
|
470
|
+
logger.error(f"Failed to convert label {label_path} to mask: {str(e)}")
|
|
471
|
+
return False
|
|
472
|
+
|
|
473
|
+
def convert_dataset(self, labels_dir: str, output_masks_dir: str) -> int:
|
|
474
|
+
"""
|
|
475
|
+
Batch convert a folder of YOLO label text files into binary mask images.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
labels_dir: Directory containing YOLO text label files.
|
|
479
|
+
output_masks_dir: Directory where binary masks will be saved.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
int: Number of masks successfully converted.
|
|
483
|
+
"""
|
|
484
|
+
try:
|
|
485
|
+
if not os.path.exists(labels_dir):
|
|
486
|
+
raise FileNotFoundError(f"Labels directory not found: {labels_dir}")
|
|
487
|
+
|
|
488
|
+
os.makedirs(output_masks_dir, exist_ok=True)
|
|
489
|
+
|
|
490
|
+
label_files = [f for f in os.listdir(labels_dir) if f.endswith(".txt")]
|
|
491
|
+
success_count = 0
|
|
492
|
+
|
|
493
|
+
for lbl in label_files:
|
|
494
|
+
base_name = os.path.splitext(lbl)[0]
|
|
495
|
+
output_mask = os.path.join(output_masks_dir, f"{base_name}.png")
|
|
496
|
+
if self.convert_single(os.path.join(labels_dir, lbl), output_mask):
|
|
497
|
+
success_count += 1
|
|
498
|
+
|
|
499
|
+
logger.info(f"Dataset conversion completed. Generated {success_count} masks.")
|
|
500
|
+
return success_count
|
|
501
|
+
|
|
502
|
+
except Exception as e:
|
|
503
|
+
logger.error(f"Error during label dataset conversion: {str(e)}")
|
|
504
|
+
raise
|
|
505
|
+
|
|
506
|
+
def visualize_label(self, image_path: str, label_path: str, output_image_path: str) -> bool:
|
|
507
|
+
"""
|
|
508
|
+
Overlays bounding box coordinates from a YOLO label onto the original image.
|
|
509
|
+
|
|
510
|
+
Args:
|
|
511
|
+
image_path: Path to input image.
|
|
512
|
+
label_path: Path to YOLO label file.
|
|
513
|
+
output_image_path: Path to save the visualized image output.
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
bool: True if success, False otherwise.
|
|
517
|
+
"""
|
|
518
|
+
try:
|
|
519
|
+
image = safe_read_image(image_path)
|
|
520
|
+
# Standardize image size for visualization matching target size
|
|
521
|
+
resized_image = preprocess_image(image, self.target_size, is_mask=False)
|
|
522
|
+
img_w, img_h = self.target_size
|
|
523
|
+
|
|
524
|
+
if not os.path.exists(label_path):
|
|
525
|
+
logger.warning(f"Label file not found for visualization: {label_path}")
|
|
526
|
+
return False
|
|
527
|
+
|
|
528
|
+
with open(label_path, "r", encoding="utf-8") as f:
|
|
529
|
+
for line in f:
|
|
530
|
+
line = line.strip()
|
|
531
|
+
if not line:
|
|
532
|
+
continue
|
|
533
|
+
parts = line.split()
|
|
534
|
+
if len(parts) != 5:
|
|
535
|
+
continue
|
|
536
|
+
|
|
537
|
+
class_id = int(parts[0])
|
|
538
|
+
x_norm, y_norm, w_norm, h_norm = map(float, parts[1:])
|
|
539
|
+
|
|
540
|
+
# Convert coordinates
|
|
541
|
+
x1, y1, x2, y2 = denormalize_coordinates(
|
|
542
|
+
x_norm, y_norm, w_norm, h_norm, img_w, img_h
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
# Draw bounding box
|
|
546
|
+
cv2.rectangle(resized_image, (x1, y1), (x2, y2), (0, 255, 255), 2)
|
|
547
|
+
# Label background
|
|
548
|
+
label_text = f"Class {class_id}"
|
|
549
|
+
cv2.putText(
|
|
550
|
+
resized_image,
|
|
551
|
+
label_text,
|
|
552
|
+
(x1, max(y1 - 10, 15)),
|
|
553
|
+
cv2.FONT_HERSHEY_SIMPLEX,
|
|
554
|
+
0.5,
|
|
555
|
+
(0, 255, 255),
|
|
556
|
+
1,
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
os.makedirs(os.path.dirname(os.path.abspath(output_image_path)), exist_ok=True)
|
|
560
|
+
cv2.imwrite(output_image_path, resized_image)
|
|
561
|
+
logger.info(f"Saved visualization overlay to: {output_image_path}")
|
|
562
|
+
return True
|
|
563
|
+
|
|
564
|
+
except Exception as e:
|
|
565
|
+
logger.error(f"Error visualizing label {label_path}: {str(e)}")
|
|
566
|
+
return False
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: segment_toolkit
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A Python toolkit to convert between binary segmentation masks and YOLO labels
|
|
5
|
+
Author: Antigravity
|
|
6
|
+
Requires-Python: >=3.6
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: numpy
|
|
9
|
+
Requires-Dist: opencv-python
|
|
10
|
+
Requires-Dist: pillow
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: matplotlib
|
|
13
|
+
Dynamic: author
|
|
14
|
+
Dynamic: description
|
|
15
|
+
Dynamic: description-content-type
|
|
16
|
+
Dynamic: requires-dist
|
|
17
|
+
Dynamic: requires-python
|
|
18
|
+
Dynamic: summary
|
|
19
|
+
|
|
20
|
+
# Segment Toolkit 🛠️
|
|
21
|
+
|
|
22
|
+
A modern, robust, and premium Python package designed to bridge the gap between pixel-level **binary segmentation masks** and **YOLO bounding box labels**. It provides a bidirectional pipeline with exception handling, extensive logging, a command-line interface (CLI), and a Python API.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## 📌 Features
|
|
27
|
+
|
|
28
|
+
- **Bidirectional Conversion**:
|
|
29
|
+
- **Forward Pipeline**: Convert binary masks to YOLO format labels (supports standard axis-aligned or advanced minimum area rotated bounding boxes).
|
|
30
|
+
- **Reverse Pipeline**: Reconstruct binary masks from YOLO labels.
|
|
31
|
+
- **Automatic Dependency Installer**: Missing required packages (`numpy`, `opencv-python`, `pillow`, `pandas`, `matplotlib`) are automatically detected and installed via `pip` upon package import or script execution.
|
|
32
|
+
- **Robust Exception Handling**: Try-catch blocks wrapped around file I/O, contour finding, and resizing to prevent application crashes on corrupted or missing files.
|
|
33
|
+
- **Dynamic Dataset Matching**: Read classification mappings (in **CSV** or **JSON** format) to automatically assign multi-class IDs matching standard dataset schemas (like the ISIC dataset).
|
|
34
|
+
- **YOLO Dataset Splitting**: Automatically shuffles and partitions images & labels into training and testing sets with customizable split ratios, creating standard `data.yaml` configs.
|
|
35
|
+
- **Overlay Visualizer**: Overlay bounding boxes and class indicators directly onto source images for annotation inspection.
|
|
36
|
+
- **Dual Interface**: Use as a command-line application (`segment-toolkit`) or import as a Python library (`import segment_toolkit`).
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## 📂 Installation
|
|
41
|
+
|
|
42
|
+
To install the toolkit locally in editable mode (missing dependencies will install automatically):
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install -e .
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Manual Installation
|
|
49
|
+
If you prefer to install dependencies manually before installing the toolkit:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install -r requirements.txt
|
|
53
|
+
pip install .
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## 🚀 Usage
|
|
59
|
+
|
|
60
|
+
### 1. Command Line Interface (CLI)
|
|
61
|
+
|
|
62
|
+
The package installs a console script called `segment-toolkit`.
|
|
63
|
+
|
|
64
|
+
#### Convert Masks to YOLO Labels
|
|
65
|
+
- **Single File Conversion**:
|
|
66
|
+
```bash
|
|
67
|
+
segment-toolkit mask-to-yolo \
|
|
68
|
+
--image images/ISIC_0024310.jpg \
|
|
69
|
+
--mask mask/ISIC_0024310_segmentation.png \
|
|
70
|
+
--output-txt labels/ISIC_0024310.txt \
|
|
71
|
+
--class-id 4
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
- **Batch Directory Conversion**:
|
|
75
|
+
```bash
|
|
76
|
+
segment-toolkit mask-to-yolo \
|
|
77
|
+
--image-dir images/ \
|
|
78
|
+
--mask-dir mask/ \
|
|
79
|
+
--output-dir labels/ \
|
|
80
|
+
--ground-truth GroundTruth.csv
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
- **Options**:
|
|
84
|
+
- `--rotated`: Use rotated minimum area rectangles (`cv2.minAreaRect`) instead of standard axis-aligned rectangles.
|
|
85
|
+
- `--resize WIDTH HEIGHT`: Set target size for image and mask resizing (default: `640 640`).
|
|
86
|
+
|
|
87
|
+
#### Convert YOLO Labels to Masks
|
|
88
|
+
- **Single File Conversion**:
|
|
89
|
+
```bash
|
|
90
|
+
segment-toolkit yolo-to-mask \
|
|
91
|
+
--label labels/ISIC_0024310.txt \
|
|
92
|
+
--output-mask masks_reconstructed/ISIC_0024310_segmentation.png
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
- **Batch Directory Conversion**:
|
|
96
|
+
```bash
|
|
97
|
+
segment-toolkit yolo-to-mask \
|
|
98
|
+
--label-dir labels/ \
|
|
99
|
+
--output-dir masks_reconstructed/
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
#### Visualize Bounding Boxes
|
|
103
|
+
Draw YOLO labels on top of the original image:
|
|
104
|
+
```bash
|
|
105
|
+
segment-toolkit visualize \
|
|
106
|
+
--image images/ISIC_0024310.jpg \
|
|
107
|
+
--label labels/ISIC_0024310.txt \
|
|
108
|
+
--output visualization.png
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
#### Split Dataset
|
|
112
|
+
Organize folders into YOLO-compliant structure (`dataset/train` and `dataset/test` splits) and output `data.yaml`:
|
|
113
|
+
```bash
|
|
114
|
+
segment-toolkit split \
|
|
115
|
+
--images images/ \
|
|
116
|
+
--labels labels/ \
|
|
117
|
+
--output dataset/ \
|
|
118
|
+
--ratio 0.8 \
|
|
119
|
+
--seed 42
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
### 2. Ground Truth Formats
|
|
125
|
+
|
|
126
|
+
The `--ground-truth` parameter in batch conversion supports both CSV and JSON formats.
|
|
127
|
+
|
|
128
|
+
#### CSV Format
|
|
129
|
+
Assumes the first column contains the image identifier/filename, and the subsequent columns represent binary indicator classes (where `1` indicates class presence).
|
|
130
|
+
Example `GroundTruth.csv`:
|
|
131
|
+
```csv
|
|
132
|
+
image,MEL,NV,BCC,AKIEC,BKL,DF,VASC
|
|
133
|
+
ISIC_0024306,0,1,0,0,0,0,0
|
|
134
|
+
ISIC_0024310,1,0,0,0,0,0,0
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
#### JSON Format
|
|
138
|
+
Supports three distinct schemas:
|
|
139
|
+
|
|
140
|
+
1. **Flat Dictionary (Format A)**:
|
|
141
|
+
Maps image IDs directly to class integers or class name strings.
|
|
142
|
+
```json
|
|
143
|
+
{
|
|
144
|
+
"ISIC_0024306": 5,
|
|
145
|
+
"ISIC_0024310": "MEL"
|
|
146
|
+
}
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
2. **Nested Indicators (Format B)**:
|
|
150
|
+
Maps image IDs to dictionaries of binary class indicators.
|
|
151
|
+
```json
|
|
152
|
+
{
|
|
153
|
+
"ISIC_0024306": { "MEL": 0, "NV": 1, "BCC": 0 },
|
|
154
|
+
"ISIC_0024310": { "MEL": 1, "NV": 0, "BCC": 0 }
|
|
155
|
+
}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
3. **List of Records (Format C)**:
|
|
159
|
+
A list of objects containing image IDs and class descriptors.
|
|
160
|
+
```json
|
|
161
|
+
[
|
|
162
|
+
{ "image": "ISIC_0024306", "class_id": 5 },
|
|
163
|
+
{ "image": "ISIC_0024310", "MEL": 1, "NV": 0 }
|
|
164
|
+
]
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
*Note: Class name strings (like `"MEL"`, `"NV"`) are automatically mapped to standard ISIC IDs (`AKIEC=0, BCC=1, BKL=2, DF=3, MEL=4, NV=5, VASC=6`). Custom column names default to index-based IDs.*
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
### 3. Python API
|
|
172
|
+
|
|
173
|
+
Import classes directly into your code to programmatically build custom pipelines:
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
from segment_toolkit import MaskToYoloConverter, YoloToMaskConverter
|
|
177
|
+
|
|
178
|
+
# 1. Convert mask to YOLO label
|
|
179
|
+
yolo_conv = MaskToYoloConverter(target_size=(640, 640), bbox_type="standard")
|
|
180
|
+
yolo_conv.convert_single(
|
|
181
|
+
image_path="images/ISIC_0024310.jpg",
|
|
182
|
+
mask_path="mask/ISIC_0024310_segmentation.png",
|
|
183
|
+
output_txt_path="labels/ISIC_0024310.txt",
|
|
184
|
+
class_id=4
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# 2. Batch convert a folder of masks with a JSON ground truth
|
|
188
|
+
yolo_conv.convert_dataset(
|
|
189
|
+
images_dir="images",
|
|
190
|
+
masks_dir="mask",
|
|
191
|
+
output_labels_dir="labels",
|
|
192
|
+
ground_truth="GroundTruth.json"
|
|
193
|
+
)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
## 🧠 Technical Details
|
|
199
|
+
|
|
200
|
+
### Coordinate Conversion Math
|
|
201
|
+
|
|
202
|
+
#### Bounding Box Center Calculation (Pixel Space)
|
|
203
|
+
For standard bounding boxes, the pixel coordinates from `boundingRect` are $(x_{min}, y_{min}, w_{pixel}, h_{pixel})$.
|
|
204
|
+
$$\text{Center } X \quad x_{center} = x_{min} + \frac{w_{pixel}}{2.0}$$
|
|
205
|
+
$$\text{Center } Y \quad y_{center} = y_{min} + \frac{h_{pixel}}{2.0}$$
|
|
206
|
+
|
|
207
|
+
#### Coordinate Normalization (YOLO Format)
|
|
208
|
+
All coordinates are normalized to the range $[0.0, 1.0]$:
|
|
209
|
+
$$x_{norm} = \frac{x_{center}}{img\_width}, \quad y_{norm} = \frac{y_{center}}{img\_height}$$
|
|
210
|
+
$$w_{norm} = \frac{w_{pixel}}{img\_width}, \quad h_{norm} = \frac{h_{pixel}}{img\_height}$$
|
|
211
|
+
|
|
212
|
+
---
|
|
213
|
+
|
|
214
|
+
## 🧑💻 Author
|
|
215
|
+
**Zakria Gamal**
|
|
216
|
+
- Computer Vision & AI Engineer
|
|
217
|
+
- 🧠 LinkedIn: [Zakria Gamal](https://www.linkedin.com/in/zkaria-gamal-82b486267/)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
segment_toolkit/__init__.py,sha256=SAJCWjYekhFC9tUllztl_jqWuTHrEdl40IP5wemlxlQ,1892
|
|
2
|
+
segment_toolkit/cli.py,sha256=25EnYGv8raqu4mibUdhYiVNgmJaaCk5coTj22GG0yO8,8203
|
|
3
|
+
segment_toolkit/helpers.py,sha256=ZlQepDWWwVUn4YQzDCT4n00YGU0wCYl3EF3G1KjuhxQ,6338
|
|
4
|
+
segment_toolkit/source.py,sha256=_CF9iBEQqI9LeeoM173f3Ysqu_3XrMzNGE6clEhfFUY,23023
|
|
5
|
+
segment_toolkit-1.0.0.dist-info/METADATA,sha256=so-tEZInRXj3R4o87WEU02tAO7bkQX-x0k4HjqS5v8U,6925
|
|
6
|
+
segment_toolkit-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
segment_toolkit-1.0.0.dist-info/entry_points.txt,sha256=5FFKlx8pkcqt3R5Xch2w3WRAles919fevfdY9VcbQzk,61
|
|
8
|
+
segment_toolkit-1.0.0.dist-info/top_level.txt,sha256=_TfxcDKObXH5ClqjkITFsxWGjMy0ERiAzCvkDd_CRAE,16
|
|
9
|
+
segment_toolkit-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
segment_toolkit
|