mirc-dataset-handler 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mirc_dataset_handler-0.1.0/LICENSE +30 -0
- mirc_dataset_handler-0.1.0/PKG-INFO +228 -0
- mirc_dataset_handler-0.1.0/README.md +204 -0
- mirc_dataset_handler-0.1.0/mirc_dataset_handler.egg-info/PKG-INFO +228 -0
- mirc_dataset_handler-0.1.0/mirc_dataset_handler.egg-info/SOURCES.txt +11 -0
- mirc_dataset_handler-0.1.0/mirc_dataset_handler.egg-info/dependency_links.txt +1 -0
- mirc_dataset_handler-0.1.0/mirc_dataset_handler.egg-info/requires.txt +13 -0
- mirc_dataset_handler-0.1.0/mirc_dataset_handler.egg-info/top_level.txt +1 -0
- mirc_dataset_handler-0.1.0/mircdataset/__init__.py +4 -0
- mirc_dataset_handler-0.1.0/mircdataset/data_utils.py +207 -0
- mirc_dataset_handler-0.1.0/mircdataset/mirc_torch_dataset.py +106 -0
- mirc_dataset_handler-0.1.0/pyproject.toml +33 -0
- mirc_dataset_handler-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Iago Rodrigues
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
This license applies only to the source code, packaging files, and documentation
|
|
16
|
+
contained in this repository for the Python package "mirc-dataset-handler".
|
|
17
|
+
|
|
18
|
+
It does not grant any rights over the MIRC dataset itself, including but not
|
|
19
|
+
limited to images, videos, annotations, derived data, redistributed copies, or
|
|
20
|
+
third-party mirrors of the dataset. Any use of the MIRC dataset must follow the
|
|
21
|
+
separate dataset terms, ethics requirements, and repository policies published
|
|
22
|
+
in the official dataset repository and/or official distribution channels.
|
|
23
|
+
|
|
24
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
25
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
26
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
27
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
28
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
29
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
30
|
+
SOFTWARE.
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mirc-dataset-handler
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Utilities for loading MIRC annotations, creating subject-aware cross-validation splits, and building PyTorch DataLoaders for semantic segmentation workflows.
|
|
5
|
+
Author: Iago Rodrigues
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: numpy
|
|
11
|
+
Requires-Dist: Pillow
|
|
12
|
+
Requires-Dist: tqdm
|
|
13
|
+
Requires-Dist: opencv-python
|
|
14
|
+
Requires-Dist: matplotlib
|
|
15
|
+
Requires-Dist: pandas
|
|
16
|
+
Requires-Dist: scikit-learn
|
|
17
|
+
Requires-Dist: segmentation-models-pytorch
|
|
18
|
+
Requires-Dist: torch-pruning==0.2.1
|
|
19
|
+
Requires-Dist: notebook
|
|
20
|
+
Requires-Dist: torch
|
|
21
|
+
Requires-Dist: torchvision
|
|
22
|
+
Requires-Dist: torchaudio
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# mirc-dataset-handler
|
|
26
|
+
|
|
27
|
+
Utilities for loading, organizing, splitting, and creating PyTorch DataLoaders for the MIRC dataset.
|
|
28
|
+
|
|
29
|
+
## Features
|
|
30
|
+
|
|
31
|
+
- Recursive loading of polygon annotations from JSON files
|
|
32
|
+
- Compact in-memory annotation structure
|
|
33
|
+
- Subject-aware cross-validation split
|
|
34
|
+
- Fold-based PyTorch DataLoader creation
|
|
35
|
+
- Support for semantic masks with classes:
|
|
36
|
+
- `0`: background
|
|
37
|
+
- `1`: person
|
|
38
|
+
- `2`: robot
|
|
39
|
+
|
|
40
|
+
## Repository Structure
|
|
41
|
+
|
|
42
|
+
mirc-dataset-handler/
|
|
43
|
+
├── mircdataset/
|
|
44
|
+
│ ├── __init__.py
|
|
45
|
+
│ ├── data_utils.py
|
|
46
|
+
│ └── mirc_torch_dataset.py
|
|
47
|
+
├── README.md
|
|
48
|
+
├── pyproject.toml
|
|
49
|
+
└── LICENSE
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
### Environment setup
|
|
54
|
+
|
|
55
|
+
For better environment setup, before installing the mirc-dataset-handler, please consider to create an environment in this way:
|
|
56
|
+
|
|
57
|
+
- conda create -n test python=3.9
|
|
58
|
+
- conda activate test
|
|
59
|
+
- conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
|
|
60
|
+
|
|
61
|
+
### Local development
|
|
62
|
+
|
|
63
|
+
pip install -e .
|
|
64
|
+
|
|
65
|
+
### From PyPI
|
|
66
|
+
|
|
67
|
+
pip install mirc-dataset-handler
|
|
68
|
+
|
|
69
|
+
## Package Import
|
|
70
|
+
|
|
71
|
+
from mircdataset import data_utils
|
|
72
|
+
|
|
73
|
+
## Expected Annotation Folder Layout
|
|
74
|
+
|
|
75
|
+
annotations/
|
|
76
|
+
└── multi-person/
|
|
77
|
+
└── subject6/
|
|
78
|
+
└── activity1/
|
|
79
|
+
└── routine01_cam1/
|
|
80
|
+
├── frame_crop_cam1_000000.json
|
|
81
|
+
├── frame_crop_cam1_000015.json
|
|
82
|
+
└── ...
|
|
83
|
+
|
|
84
|
+
## Generated Key Format
|
|
85
|
+
|
|
86
|
+
Each loaded JSON is stored in a dictionary using the key format:
|
|
87
|
+
|
|
88
|
+
subjectid_activityid_routineid_camid_frameid
|
|
89
|
+
|
|
90
|
+
Example:
|
|
91
|
+
|
|
92
|
+
6_1_1_1_000000
|
|
93
|
+
6_1_1_1_000015
|
|
94
|
+
|
|
95
|
+
## Stored Annotation Structure
|
|
96
|
+
|
|
97
|
+
Each key maps to a compact dictionary like:
|
|
98
|
+
|
|
99
|
+
{
|
|
100
|
+
"objects": [
|
|
101
|
+
{
|
|
102
|
+
"label": "person",
|
|
103
|
+
"points": [[x1, y1], [x2, y2], ...]
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"label": "robot",
|
|
107
|
+
"points": [[x1, y1], [x2, y2], ...]
|
|
108
|
+
}
|
|
109
|
+
],
|
|
110
|
+
"image_height": 720,
|
|
111
|
+
"image_width": 1280,
|
|
112
|
+
"image_path": "/absolute/path/to/frame_crop_cam1_000000.jpg"
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
## Basic Usage
|
|
116
|
+
|
|
117
|
+
from mircdataset import data_utils
|
|
118
|
+
|
|
119
|
+
root_folder = "/media/mirc-dataset/annotations/multi-person/"
|
|
120
|
+
|
|
121
|
+
dataset_masks = data_utils.load_dataset_masks(root_folder)
|
|
122
|
+
cv_dataset_masks = data_utils.split_dataset_by_subject_cross_validation(dataset_masks, k=5)
|
|
123
|
+
dataloaders = data_utils.create_dataloaders(cv_dataset_masks, batch_size=4)
|
|
124
|
+
|
|
125
|
+
## Cross-Validation Behavior
|
|
126
|
+
|
|
127
|
+
The split is subject-aware, so subjects are never mixed between training and validation folds.
|
|
128
|
+
|
|
129
|
+
Example with subjects `1..9` and `k=5`:
|
|
130
|
+
|
|
131
|
+
fold 0: [1, 2]
|
|
132
|
+
fold 1: [3, 4]
|
|
133
|
+
fold 2: [5, 6]
|
|
134
|
+
fold 3: [7, 8]
|
|
135
|
+
fold 4: [9]
|
|
136
|
+
|
|
137
|
+
Example with subjects `1..6` and `k=5`:
|
|
138
|
+
|
|
139
|
+
fold 0: [1, 2]
|
|
140
|
+
fold 1: [3]
|
|
141
|
+
fold 2: [4]
|
|
142
|
+
fold 3: [5]
|
|
143
|
+
fold 4: [6]
|
|
144
|
+
|
|
145
|
+
## DataLoader Output
|
|
146
|
+
|
|
147
|
+
Each sample returned by the dataset has the format:
|
|
148
|
+
|
|
149
|
+
{
|
|
150
|
+
"image": torch.FloatTensor,
|
|
151
|
+
"mask": torch.LongTensor
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
Expected shapes:
|
|
155
|
+
|
|
156
|
+
image -> [3, 256, 256]
|
|
157
|
+
mask -> [256, 256]
|
|
158
|
+
|
|
159
|
+
Mask classes:
|
|
160
|
+
|
|
161
|
+
0 -> background
|
|
162
|
+
1 -> person
|
|
163
|
+
2 -> robot
|
|
164
|
+
|
|
165
|
+
## Accessing a Fold
|
|
166
|
+
|
|
167
|
+
train_loader, val_loader = dataloaders[0]
|
|
168
|
+
|
|
169
|
+
If `k=5`, then:
|
|
170
|
+
|
|
171
|
+
- `dataloaders[0]` uses fold 0 as validation
|
|
172
|
+
- `dataloaders[1]` uses fold 1 as validation
|
|
173
|
+
- `dataloaders[2]` uses fold 2 as validation
|
|
174
|
+
- `dataloaders[3]` uses fold 3 as validation
|
|
175
|
+
- `dataloaders[4]` uses fold 4 as validation
|
|
176
|
+
|
|
177
|
+
## Inspecting a Batch
|
|
178
|
+
|
|
179
|
+
import matplotlib.pyplot as plt
|
|
180
|
+
|
|
181
|
+
train_loader, val_loader = dataloaders[0]
|
|
182
|
+
|
|
183
|
+
batch = next(iter(train_loader))
|
|
184
|
+
|
|
185
|
+
images = batch["image"]
|
|
186
|
+
masks = batch["mask"]
|
|
187
|
+
|
|
188
|
+
image = images[0].cpu().numpy().transpose(1, 2, 0)
|
|
189
|
+
mask = masks[0].cpu().numpy()
|
|
190
|
+
|
|
191
|
+
plt.figure(figsize=(10, 4))
|
|
192
|
+
|
|
193
|
+
plt.subplot(1, 2, 1)
|
|
194
|
+
plt.imshow(image)
|
|
195
|
+
plt.title("Image")
|
|
196
|
+
plt.axis("off")
|
|
197
|
+
|
|
198
|
+
plt.subplot(1, 2, 2)
|
|
199
|
+
plt.imshow(mask, vmin=0, vmax=2)
|
|
200
|
+
plt.title("Mask")
|
|
201
|
+
plt.axis("off")
|
|
202
|
+
|
|
203
|
+
plt.tight_layout()
|
|
204
|
+
plt.show()
|
|
205
|
+
|
|
206
|
+
## Main Public Functions
|
|
207
|
+
|
|
208
|
+
### `load_dataset_masks(root_folder)`
|
|
209
|
+
|
|
210
|
+
Recursively loads all JSON annotation files and returns a compact dictionary indexed by:
|
|
211
|
+
|
|
212
|
+
subjectid_activityid_routineid_camid_frameid
|
|
213
|
+
|
|
214
|
+
### `split_dataset_by_subject_cross_validation(dataset_dict, k)`
|
|
215
|
+
|
|
216
|
+
Splits the dataset into `k` subject-aware folds.
|
|
217
|
+
|
|
218
|
+
### `create_dataloaders(dataset_folds, batch_size=32, transform=None, mask_mode=None, num_workers=0, pin_memory=False)`
|
|
219
|
+
|
|
220
|
+
Creates a list of `(train_loader, val_loader)` tuples for cross-validation.
|
|
221
|
+
|
|
222
|
+
## Notes
|
|
223
|
+
|
|
224
|
+
- Images are resized to `256x256`
|
|
225
|
+
- Masks are generated from polygons and rasterized at `256x256`
|
|
226
|
+
- Training DataLoaders use `shuffle=True`
|
|
227
|
+
- Validation DataLoaders use `shuffle=False`
|
|
228
|
+
- Subjects are never mixed across folds
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# mirc-dataset-handler
|
|
2
|
+
|
|
3
|
+
Utilities for loading, organizing, splitting, and creating PyTorch DataLoaders for the MIRC dataset.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- Recursive loading of polygon annotations from JSON files
|
|
8
|
+
- Compact in-memory annotation structure
|
|
9
|
+
- Subject-aware cross-validation split
|
|
10
|
+
- Fold-based PyTorch DataLoader creation
|
|
11
|
+
- Support for semantic masks with classes:
|
|
12
|
+
- `0`: background
|
|
13
|
+
- `1`: person
|
|
14
|
+
- `2`: robot
|
|
15
|
+
|
|
16
|
+
## Repository Structure
|
|
17
|
+
|
|
18
|
+
mirc-dataset-handler/
|
|
19
|
+
├── mircdataset/
|
|
20
|
+
│ ├── __init__.py
|
|
21
|
+
│ ├── data_utils.py
|
|
22
|
+
│ └── mirc_torch_dataset.py
|
|
23
|
+
├── README.md
|
|
24
|
+
├── pyproject.toml
|
|
25
|
+
└── LICENSE
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
### Environment setup
|
|
30
|
+
|
|
31
|
+
For better environment setup, before installing the mirc-dataset-handler, please consider to create an environment in this way:
|
|
32
|
+
|
|
33
|
+
- conda create -n test python=3.9
|
|
34
|
+
- conda activate test
|
|
35
|
+
- conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
|
|
36
|
+
|
|
37
|
+
### Local development
|
|
38
|
+
|
|
39
|
+
pip install -e .
|
|
40
|
+
|
|
41
|
+
### From PyPI
|
|
42
|
+
|
|
43
|
+
pip install mirc-dataset-handler
|
|
44
|
+
|
|
45
|
+
## Package Import
|
|
46
|
+
|
|
47
|
+
from mircdataset import data_utils
|
|
48
|
+
|
|
49
|
+
## Expected Annotation Folder Layout
|
|
50
|
+
|
|
51
|
+
annotations/
|
|
52
|
+
└── multi-person/
|
|
53
|
+
└── subject6/
|
|
54
|
+
└── activity1/
|
|
55
|
+
└── routine01_cam1/
|
|
56
|
+
├── frame_crop_cam1_000000.json
|
|
57
|
+
├── frame_crop_cam1_000015.json
|
|
58
|
+
└── ...
|
|
59
|
+
|
|
60
|
+
## Generated Key Format
|
|
61
|
+
|
|
62
|
+
Each loaded JSON is stored in a dictionary using the key format:
|
|
63
|
+
|
|
64
|
+
subjectid_activityid_routineid_camid_frameid
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
|
|
68
|
+
6_1_1_1_000000
|
|
69
|
+
6_1_1_1_000015
|
|
70
|
+
|
|
71
|
+
## Stored Annotation Structure
|
|
72
|
+
|
|
73
|
+
Each key maps to a compact dictionary like:
|
|
74
|
+
|
|
75
|
+
{
|
|
76
|
+
"objects": [
|
|
77
|
+
{
|
|
78
|
+
"label": "person",
|
|
79
|
+
"points": [[x1, y1], [x2, y2], ...]
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"label": "robot",
|
|
83
|
+
"points": [[x1, y1], [x2, y2], ...]
|
|
84
|
+
}
|
|
85
|
+
],
|
|
86
|
+
"image_height": 720,
|
|
87
|
+
"image_width": 1280,
|
|
88
|
+
"image_path": "/absolute/path/to/frame_crop_cam1_000000.jpg"
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
## Basic Usage
|
|
92
|
+
|
|
93
|
+
from mircdataset import data_utils
|
|
94
|
+
|
|
95
|
+
root_folder = "/media/mirc-dataset/annotations/multi-person/"
|
|
96
|
+
|
|
97
|
+
dataset_masks = data_utils.load_dataset_masks(root_folder)
|
|
98
|
+
cv_dataset_masks = data_utils.split_dataset_by_subject_cross_validation(dataset_masks, k=5)
|
|
99
|
+
dataloaders = data_utils.create_dataloaders(cv_dataset_masks, batch_size=4)
|
|
100
|
+
|
|
101
|
+
## Cross-Validation Behavior
|
|
102
|
+
|
|
103
|
+
The split is subject-aware, so subjects are never mixed between training and validation folds.
|
|
104
|
+
|
|
105
|
+
Example with subjects `1..9` and `k=5`:
|
|
106
|
+
|
|
107
|
+
fold 0: [1, 2]
|
|
108
|
+
fold 1: [3, 4]
|
|
109
|
+
fold 2: [5, 6]
|
|
110
|
+
fold 3: [7, 8]
|
|
111
|
+
fold 4: [9]
|
|
112
|
+
|
|
113
|
+
Example with subjects `1..6` and `k=5`:
|
|
114
|
+
|
|
115
|
+
fold 0: [1, 2]
|
|
116
|
+
fold 1: [3]
|
|
117
|
+
fold 2: [4]
|
|
118
|
+
fold 3: [5]
|
|
119
|
+
fold 4: [6]
|
|
120
|
+
|
|
121
|
+
## DataLoader Output
|
|
122
|
+
|
|
123
|
+
Each sample returned by the dataset has the format:
|
|
124
|
+
|
|
125
|
+
{
|
|
126
|
+
"image": torch.FloatTensor,
|
|
127
|
+
"mask": torch.LongTensor
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
Expected shapes:
|
|
131
|
+
|
|
132
|
+
image -> [3, 256, 256]
|
|
133
|
+
mask -> [256, 256]
|
|
134
|
+
|
|
135
|
+
Mask classes:
|
|
136
|
+
|
|
137
|
+
0 -> background
|
|
138
|
+
1 -> person
|
|
139
|
+
2 -> robot
|
|
140
|
+
|
|
141
|
+
## Accessing a Fold
|
|
142
|
+
|
|
143
|
+
train_loader, val_loader = dataloaders[0]
|
|
144
|
+
|
|
145
|
+
If `k=5`, then:
|
|
146
|
+
|
|
147
|
+
- `dataloaders[0]` uses fold 0 as validation
|
|
148
|
+
- `dataloaders[1]` uses fold 1 as validation
|
|
149
|
+
- `dataloaders[2]` uses fold 2 as validation
|
|
150
|
+
- `dataloaders[3]` uses fold 3 as validation
|
|
151
|
+
- `dataloaders[4]` uses fold 4 as validation
|
|
152
|
+
|
|
153
|
+
## Inspecting a Batch
|
|
154
|
+
|
|
155
|
+
import matplotlib.pyplot as plt
|
|
156
|
+
|
|
157
|
+
train_loader, val_loader = dataloaders[0]
|
|
158
|
+
|
|
159
|
+
batch = next(iter(train_loader))
|
|
160
|
+
|
|
161
|
+
images = batch["image"]
|
|
162
|
+
masks = batch["mask"]
|
|
163
|
+
|
|
164
|
+
image = images[0].cpu().numpy().transpose(1, 2, 0)
|
|
165
|
+
mask = masks[0].cpu().numpy()
|
|
166
|
+
|
|
167
|
+
plt.figure(figsize=(10, 4))
|
|
168
|
+
|
|
169
|
+
plt.subplot(1, 2, 1)
|
|
170
|
+
plt.imshow(image)
|
|
171
|
+
plt.title("Image")
|
|
172
|
+
plt.axis("off")
|
|
173
|
+
|
|
174
|
+
plt.subplot(1, 2, 2)
|
|
175
|
+
plt.imshow(mask, vmin=0, vmax=2)
|
|
176
|
+
plt.title("Mask")
|
|
177
|
+
plt.axis("off")
|
|
178
|
+
|
|
179
|
+
plt.tight_layout()
|
|
180
|
+
plt.show()
|
|
181
|
+
|
|
182
|
+
## Main Public Functions
|
|
183
|
+
|
|
184
|
+
### `load_dataset_masks(root_folder)`
|
|
185
|
+
|
|
186
|
+
Recursively loads all JSON annotation files and returns a compact dictionary indexed by:
|
|
187
|
+
|
|
188
|
+
subjectid_activityid_routineid_camid_frameid
|
|
189
|
+
|
|
190
|
+
### `split_dataset_by_subject_cross_validation(dataset_dict, k)`
|
|
191
|
+
|
|
192
|
+
Splits the dataset into `k` subject-aware folds.
|
|
193
|
+
|
|
194
|
+
### `create_dataloaders(dataset_folds, batch_size=32, transform=None, mask_mode=None, num_workers=0, pin_memory=False)`
|
|
195
|
+
|
|
196
|
+
Creates a list of `(train_loader, val_loader)` tuples for cross-validation.
|
|
197
|
+
|
|
198
|
+
## Notes
|
|
199
|
+
|
|
200
|
+
- Images are resized to `256x256`
|
|
201
|
+
- Masks are generated from polygons and rasterized at `256x256`
|
|
202
|
+
- Training DataLoaders use `shuffle=True`
|
|
203
|
+
- Validation DataLoaders use `shuffle=False`
|
|
204
|
+
- Subjects are never mixed across folds
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mirc-dataset-handler
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Utilities for loading MIRC annotations, creating subject-aware cross-validation splits, and building PyTorch DataLoaders for semantic segmentation workflows.
|
|
5
|
+
Author: Iago Rodrigues
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: numpy
|
|
11
|
+
Requires-Dist: Pillow
|
|
12
|
+
Requires-Dist: tqdm
|
|
13
|
+
Requires-Dist: opencv-python
|
|
14
|
+
Requires-Dist: matplotlib
|
|
15
|
+
Requires-Dist: pandas
|
|
16
|
+
Requires-Dist: scikit-learn
|
|
17
|
+
Requires-Dist: segmentation-models-pytorch
|
|
18
|
+
Requires-Dist: torch-pruning==0.2.1
|
|
19
|
+
Requires-Dist: notebook
|
|
20
|
+
Requires-Dist: torch
|
|
21
|
+
Requires-Dist: torchvision
|
|
22
|
+
Requires-Dist: torchaudio
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# mirc-dataset-handler
|
|
26
|
+
|
|
27
|
+
Utilities for loading, organizing, splitting, and creating PyTorch DataLoaders for the MIRC dataset.
|
|
28
|
+
|
|
29
|
+
## Features
|
|
30
|
+
|
|
31
|
+
- Recursive loading of polygon annotations from JSON files
|
|
32
|
+
- Compact in-memory annotation structure
|
|
33
|
+
- Subject-aware cross-validation split
|
|
34
|
+
- Fold-based PyTorch DataLoader creation
|
|
35
|
+
- Support for semantic masks with classes:
|
|
36
|
+
- `0`: background
|
|
37
|
+
- `1`: person
|
|
38
|
+
- `2`: robot
|
|
39
|
+
|
|
40
|
+
## Repository Structure
|
|
41
|
+
|
|
42
|
+
mirc-dataset-handler/
|
|
43
|
+
├── mircdataset/
|
|
44
|
+
│ ├── __init__.py
|
|
45
|
+
│ ├── data_utils.py
|
|
46
|
+
│ └── mirc_torch_dataset.py
|
|
47
|
+
├── README.md
|
|
48
|
+
├── pyproject.toml
|
|
49
|
+
└── LICENSE
|
|
50
|
+
|
|
51
|
+
## Installation
|
|
52
|
+
|
|
53
|
+
### Environment setup
|
|
54
|
+
|
|
55
|
+
For better environment setup, before installing the mirc-dataset-handler, please consider to create an environment in this way:
|
|
56
|
+
|
|
57
|
+
- conda create -n test python=3.9
|
|
58
|
+
- conda activate test
|
|
59
|
+
- conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
|
|
60
|
+
|
|
61
|
+
### Local development
|
|
62
|
+
|
|
63
|
+
pip install -e .
|
|
64
|
+
|
|
65
|
+
### From PyPI
|
|
66
|
+
|
|
67
|
+
pip install mirc-dataset-handler
|
|
68
|
+
|
|
69
|
+
## Package Import
|
|
70
|
+
|
|
71
|
+
from mircdataset import data_utils
|
|
72
|
+
|
|
73
|
+
## Expected Annotation Folder Layout
|
|
74
|
+
|
|
75
|
+
annotations/
|
|
76
|
+
└── multi-person/
|
|
77
|
+
└── subject6/
|
|
78
|
+
└── activity1/
|
|
79
|
+
└── routine01_cam1/
|
|
80
|
+
├── frame_crop_cam1_000000.json
|
|
81
|
+
├── frame_crop_cam1_000015.json
|
|
82
|
+
└── ...
|
|
83
|
+
|
|
84
|
+
## Generated Key Format
|
|
85
|
+
|
|
86
|
+
Each loaded JSON is stored in a dictionary using the key format:
|
|
87
|
+
|
|
88
|
+
subjectid_activityid_routineid_camid_frameid
|
|
89
|
+
|
|
90
|
+
Example:
|
|
91
|
+
|
|
92
|
+
6_1_1_1_000000
|
|
93
|
+
6_1_1_1_000015
|
|
94
|
+
|
|
95
|
+
## Stored Annotation Structure
|
|
96
|
+
|
|
97
|
+
Each key maps to a compact dictionary like:
|
|
98
|
+
|
|
99
|
+
{
|
|
100
|
+
"objects": [
|
|
101
|
+
{
|
|
102
|
+
"label": "person",
|
|
103
|
+
"points": [[x1, y1], [x2, y2], ...]
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"label": "robot",
|
|
107
|
+
"points": [[x1, y1], [x2, y2], ...]
|
|
108
|
+
}
|
|
109
|
+
],
|
|
110
|
+
"image_height": 720,
|
|
111
|
+
"image_width": 1280,
|
|
112
|
+
"image_path": "/absolute/path/to/frame_crop_cam1_000000.jpg"
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
## Basic Usage
|
|
116
|
+
|
|
117
|
+
from mircdataset import data_utils
|
|
118
|
+
|
|
119
|
+
root_folder = "/media/mirc-dataset/annotations/multi-person/"
|
|
120
|
+
|
|
121
|
+
dataset_masks = data_utils.load_dataset_masks(root_folder)
|
|
122
|
+
cv_dataset_masks = data_utils.split_dataset_by_subject_cross_validation(dataset_masks, k=5)
|
|
123
|
+
dataloaders = data_utils.create_dataloaders(cv_dataset_masks, batch_size=4)
|
|
124
|
+
|
|
125
|
+
## Cross-Validation Behavior
|
|
126
|
+
|
|
127
|
+
The split is subject-aware, so subjects are never mixed between training and validation folds.
|
|
128
|
+
|
|
129
|
+
Example with subjects `1..9` and `k=5`:
|
|
130
|
+
|
|
131
|
+
fold 0: [1, 2]
|
|
132
|
+
fold 1: [3, 4]
|
|
133
|
+
fold 2: [5, 6]
|
|
134
|
+
fold 3: [7, 8]
|
|
135
|
+
fold 4: [9]
|
|
136
|
+
|
|
137
|
+
Example with subjects `1..6` and `k=5`:
|
|
138
|
+
|
|
139
|
+
fold 0: [1, 2]
|
|
140
|
+
fold 1: [3]
|
|
141
|
+
fold 2: [4]
|
|
142
|
+
fold 3: [5]
|
|
143
|
+
fold 4: [6]
|
|
144
|
+
|
|
145
|
+
## DataLoader Output
|
|
146
|
+
|
|
147
|
+
Each sample returned by the dataset has the format:
|
|
148
|
+
|
|
149
|
+
{
|
|
150
|
+
"image": torch.FloatTensor,
|
|
151
|
+
"mask": torch.LongTensor
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
Expected shapes:
|
|
155
|
+
|
|
156
|
+
image -> [3, 256, 256]
|
|
157
|
+
mask -> [256, 256]
|
|
158
|
+
|
|
159
|
+
Mask classes:
|
|
160
|
+
|
|
161
|
+
0 -> background
|
|
162
|
+
1 -> person
|
|
163
|
+
2 -> robot
|
|
164
|
+
|
|
165
|
+
## Accessing a Fold
|
|
166
|
+
|
|
167
|
+
train_loader, val_loader = dataloaders[0]
|
|
168
|
+
|
|
169
|
+
If `k=5`, then:
|
|
170
|
+
|
|
171
|
+
- `dataloaders[0]` uses fold 0 as validation
|
|
172
|
+
- `dataloaders[1]` uses fold 1 as validation
|
|
173
|
+
- `dataloaders[2]` uses fold 2 as validation
|
|
174
|
+
- `dataloaders[3]` uses fold 3 as validation
|
|
175
|
+
- `dataloaders[4]` uses fold 4 as validation
|
|
176
|
+
|
|
177
|
+
## Inspecting a Batch
|
|
178
|
+
|
|
179
|
+
import matplotlib.pyplot as plt
|
|
180
|
+
|
|
181
|
+
train_loader, val_loader = dataloaders[0]
|
|
182
|
+
|
|
183
|
+
batch = next(iter(train_loader))
|
|
184
|
+
|
|
185
|
+
images = batch["image"]
|
|
186
|
+
masks = batch["mask"]
|
|
187
|
+
|
|
188
|
+
image = images[0].cpu().numpy().transpose(1, 2, 0)
|
|
189
|
+
mask = masks[0].cpu().numpy()
|
|
190
|
+
|
|
191
|
+
plt.figure(figsize=(10, 4))
|
|
192
|
+
|
|
193
|
+
plt.subplot(1, 2, 1)
|
|
194
|
+
plt.imshow(image)
|
|
195
|
+
plt.title("Image")
|
|
196
|
+
plt.axis("off")
|
|
197
|
+
|
|
198
|
+
plt.subplot(1, 2, 2)
|
|
199
|
+
plt.imshow(mask, vmin=0, vmax=2)
|
|
200
|
+
plt.title("Mask")
|
|
201
|
+
plt.axis("off")
|
|
202
|
+
|
|
203
|
+
plt.tight_layout()
|
|
204
|
+
plt.show()
|
|
205
|
+
|
|
206
|
+
## Main Public Functions
|
|
207
|
+
|
|
208
|
+
### `load_dataset_masks(root_folder)`
|
|
209
|
+
|
|
210
|
+
Recursively loads all JSON annotation files and returns a compact dictionary indexed by:
|
|
211
|
+
|
|
212
|
+
subjectid_activityid_routineid_camid_frameid
|
|
213
|
+
|
|
214
|
+
### `split_dataset_by_subject_cross_validation(dataset_dict, k)`
|
|
215
|
+
|
|
216
|
+
Splits the dataset into `k` subject-aware folds.
|
|
217
|
+
|
|
218
|
+
### `create_dataloaders(dataset_folds, batch_size=32, transform=None, mask_mode=None, num_workers=0, pin_memory=False)`
|
|
219
|
+
|
|
220
|
+
Creates a list of `(train_loader, val_loader)` tuples for cross-validation.
|
|
221
|
+
|
|
222
|
+
## Notes
|
|
223
|
+
|
|
224
|
+
- Images are resized to `256x256`
|
|
225
|
+
- Masks are generated from polygons and rasterized at `256x256`
|
|
226
|
+
- Training DataLoaders use `shuffle=True`
|
|
227
|
+
- Validation DataLoaders use `shuffle=False`
|
|
228
|
+
- Subjects are never mixed across folds
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
mirc_dataset_handler.egg-info/PKG-INFO
|
|
5
|
+
mirc_dataset_handler.egg-info/SOURCES.txt
|
|
6
|
+
mirc_dataset_handler.egg-info/dependency_links.txt
|
|
7
|
+
mirc_dataset_handler.egg-info/requires.txt
|
|
8
|
+
mirc_dataset_handler.egg-info/top_level.txt
|
|
9
|
+
mircdataset/__init__.py
|
|
10
|
+
mircdataset/data_utils.py
|
|
11
|
+
mircdataset/mirc_torch_dataset.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
mircdataset
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import json
|
|
4
|
+
from tqdm import tqdm
|
|
5
|
+
from torch.utils.data import DataLoader
|
|
6
|
+
from .mirc_torch_dataset import MIRCTorchDataset
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_dataset_masks(root_folder):
|
|
10
|
+
json_data = {}
|
|
11
|
+
|
|
12
|
+
all_json_files = []
|
|
13
|
+
|
|
14
|
+
for current_root, _, files in os.walk(root_folder):
|
|
15
|
+
for file_name in files:
|
|
16
|
+
if file_name.endswith(".json"):
|
|
17
|
+
all_json_files.append(os.path.join(current_root, file_name))
|
|
18
|
+
|
|
19
|
+
for file_path in tqdm(all_json_files):
|
|
20
|
+
file_name = os.path.basename(file_path)
|
|
21
|
+
|
|
22
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
23
|
+
raw_content = json.load(f)
|
|
24
|
+
|
|
25
|
+
key = _process_json_key(file_path, file_name)
|
|
26
|
+
content = _build_simplified_json_content(
|
|
27
|
+
folder_path=os.path.dirname(file_path),
|
|
28
|
+
raw_content=raw_content
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
json_data[key] = content
|
|
32
|
+
|
|
33
|
+
del raw_content
|
|
34
|
+
del content
|
|
35
|
+
|
|
36
|
+
return json_data
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _process_json_key(file_path, file_name):
|
|
40
|
+
subject_id, activity_id, routine_id, cam_id = _extract_ids_from_path(file_path)
|
|
41
|
+
frame_id = _extract_frame_id_from_filename(file_name)
|
|
42
|
+
|
|
43
|
+
return f"{subject_id}_{activity_id}_{routine_id}_{cam_id}_{frame_id}"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _extract_ids_from_path(file_path):
|
|
47
|
+
normalized_path = os.path.normpath(file_path)
|
|
48
|
+
|
|
49
|
+
pattern = r"subject(\d+)[/\\]+activity(\d+)[/\\]+routine(\d+)_cam(\d+)"
|
|
50
|
+
match = re.search(pattern, normalized_path, re.IGNORECASE)
|
|
51
|
+
|
|
52
|
+
if not match:
|
|
53
|
+
raise ValueError(f"Not possible to extract IDs from the folder: {file_path}")
|
|
54
|
+
|
|
55
|
+
subject_id, activity_id, routine_id, cam_id = match.groups()
|
|
56
|
+
|
|
57
|
+
return int(subject_id), int(activity_id), int(routine_id), int(cam_id)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _extract_frame_id_from_filename(file_name):
|
|
61
|
+
pattern = r"_(\d+)\.json$"
|
|
62
|
+
match = re.search(pattern, file_name, re.IGNORECASE)
|
|
63
|
+
|
|
64
|
+
if not match:
|
|
65
|
+
raise ValueError(f"Not possible to extract info from file: {file_name}")
|
|
66
|
+
|
|
67
|
+
return match.group(1)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _build_simplified_json_content(folder_path, raw_content):
|
|
71
|
+
objects = []
|
|
72
|
+
|
|
73
|
+
for shape in raw_content.get("shapes", []):
|
|
74
|
+
objects.append({
|
|
75
|
+
"label": shape.get("label"),
|
|
76
|
+
"points": shape.get("points", [])
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
image_path_in_json = raw_content.get("imagePath", "")
|
|
80
|
+
full_image_path = os.path.join(folder_path, image_path_in_json)
|
|
81
|
+
|
|
82
|
+
simplified_content = {
|
|
83
|
+
"objects": objects,
|
|
84
|
+
"image_height": raw_content.get("imageHeight"),
|
|
85
|
+
"image_width": raw_content.get("imageWidth"),
|
|
86
|
+
"image_path": full_image_path
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return simplified_content
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def split_dataset_by_subject_cross_validation(dataset_dict, k):
|
|
93
|
+
subject_folds = _split_subjects_for_cross_validation(dataset_dict, k)
|
|
94
|
+
|
|
95
|
+
subject_to_fold = {}
|
|
96
|
+
|
|
97
|
+
for fold_index, fold_subjects in enumerate(subject_folds):
|
|
98
|
+
for subject_index in fold_subjects:
|
|
99
|
+
subject_to_fold[subject_index] = fold_index
|
|
100
|
+
|
|
101
|
+
splitted_dicts = [dict() for _ in range(len(subject_folds))]
|
|
102
|
+
|
|
103
|
+
keys_to_move = list(dataset_dict.keys())
|
|
104
|
+
|
|
105
|
+
for key in keys_to_move:
|
|
106
|
+
parts = key.split("_")
|
|
107
|
+
|
|
108
|
+
if len(parts) < 5:
|
|
109
|
+
raise ValueError(f"Invalid key format: {key}")
|
|
110
|
+
|
|
111
|
+
subject_index = int(parts[0])
|
|
112
|
+
|
|
113
|
+
if subject_index not in subject_to_fold:
|
|
114
|
+
raise ValueError(f"Subject index {subject_index} not found in fold mapping")
|
|
115
|
+
|
|
116
|
+
fold_index = subject_to_fold[subject_index]
|
|
117
|
+
splitted_dicts[fold_index][key] = dataset_dict.pop(key)
|
|
118
|
+
|
|
119
|
+
return splitted_dicts
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _split_subjects_for_cross_validation(dataset_dict, k):
|
|
123
|
+
if k <= 1:
|
|
124
|
+
raise ValueError("k must be greater than 1")
|
|
125
|
+
|
|
126
|
+
subject_indices = _get_unique_subject_indices(dataset_dict)
|
|
127
|
+
total_subjects = len(subject_indices)
|
|
128
|
+
|
|
129
|
+
if total_subjects == 0:
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
folds = [[] for _ in range(k)]
|
|
133
|
+
|
|
134
|
+
base_size = total_subjects // k
|
|
135
|
+
remainder = total_subjects % k
|
|
136
|
+
|
|
137
|
+
current_index = 0
|
|
138
|
+
|
|
139
|
+
for fold_index in range(k):
|
|
140
|
+
extra = 1 if fold_index < remainder else 0
|
|
141
|
+
fold_size = base_size + extra
|
|
142
|
+
|
|
143
|
+
if fold_size > 0:
|
|
144
|
+
folds[fold_index] = subject_indices[current_index:current_index + fold_size]
|
|
145
|
+
current_index += fold_size
|
|
146
|
+
|
|
147
|
+
return folds
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _get_unique_subject_indices(dataset_dict):
|
|
151
|
+
subject_indices = set()
|
|
152
|
+
|
|
153
|
+
for key in dataset_dict.keys():
|
|
154
|
+
parts = key.split("_")
|
|
155
|
+
|
|
156
|
+
if len(parts) < 5:
|
|
157
|
+
raise ValueError(f"Invalid key format: {key}")
|
|
158
|
+
|
|
159
|
+
subject_indices.add(int(parts[0]))
|
|
160
|
+
|
|
161
|
+
return sorted(subject_indices)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def create_dataloaders(dataset_folds, batch_size=32, transform=None, mask_mode=None, num_workers=0, pin_memory=False):
|
|
165
|
+
if len(dataset_folds) <= 1:
|
|
166
|
+
raise ValueError("dataset_folds must contain at least 2 folds")
|
|
167
|
+
|
|
168
|
+
dataloaders = []
|
|
169
|
+
total_folds = len(dataset_folds)
|
|
170
|
+
|
|
171
|
+
for val_fold_index in range(total_folds):
|
|
172
|
+
train_partitions = [dataset_folds[i] for i in range(total_folds) if i != val_fold_index]
|
|
173
|
+
val_partition = [dataset_folds[val_fold_index]]
|
|
174
|
+
|
|
175
|
+
train_dataset = MIRCTorchDataset(
|
|
176
|
+
dataset_partitions=train_partitions,
|
|
177
|
+
transform=transform,
|
|
178
|
+
mask_mode=mask_mode,
|
|
179
|
+
target_size=(256, 256)
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
val_dataset = MIRCTorchDataset(
|
|
183
|
+
dataset_partitions=val_partition,
|
|
184
|
+
transform=transform,
|
|
185
|
+
mask_mode=mask_mode,
|
|
186
|
+
target_size=(256, 256)
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
train_loader = DataLoader(
|
|
190
|
+
train_dataset,
|
|
191
|
+
batch_size=batch_size,
|
|
192
|
+
shuffle=True,
|
|
193
|
+
num_workers=num_workers,
|
|
194
|
+
pin_memory=pin_memory
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
val_loader = DataLoader(
|
|
198
|
+
val_dataset,
|
|
199
|
+
batch_size=batch_size,
|
|
200
|
+
shuffle=False,
|
|
201
|
+
num_workers=num_workers,
|
|
202
|
+
pin_memory=pin_memory
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
dataloaders.append((train_loader, val_loader))
|
|
206
|
+
|
|
207
|
+
return dataloaders
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from PIL import Image
|
|
3
|
+
from cv2 import fillPoly as cv2_fillPoly
|
|
4
|
+
from torch import from_numpy as torch_from_numpy
|
|
5
|
+
from torch.utils.data import Dataset
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MIRCTorchDataset(Dataset):
|
|
9
|
+
def __init__(self, dataset_partitions, transform=None, mask_mode=None, target_size=(256, 256)):
|
|
10
|
+
if isinstance(dataset_partitions, dict):
|
|
11
|
+
dataset_partitions = [dataset_partitions]
|
|
12
|
+
|
|
13
|
+
self.dataset_partitions = dataset_partitions
|
|
14
|
+
self.transform = transform
|
|
15
|
+
self.mask_mode = mask_mode
|
|
16
|
+
self.target_width = target_size[0]
|
|
17
|
+
self.target_height = target_size[1]
|
|
18
|
+
self.sample_index = []
|
|
19
|
+
|
|
20
|
+
for partition_index, partition in enumerate(self.dataset_partitions):
|
|
21
|
+
for key in partition.keys():
|
|
22
|
+
self.sample_index.append((partition_index, key))
|
|
23
|
+
|
|
24
|
+
def __len__(self):
|
|
25
|
+
return len(self.sample_index)
|
|
26
|
+
|
|
27
|
+
def __getitem__(self, idx):
|
|
28
|
+
partition_index, sample_key = self.sample_index[idx]
|
|
29
|
+
sample = self.dataset_partitions[partition_index][sample_key]
|
|
30
|
+
|
|
31
|
+
image_nd = self.__load_image(sample["image_path"])
|
|
32
|
+
mask_human, mask_robot = self.__build_binary_masks(sample)
|
|
33
|
+
mask_nd = self.__transform_masks(mask_human, mask_robot)
|
|
34
|
+
|
|
35
|
+
image_chw = image_nd.transpose((2, 0, 1)).astype(np.float32) / 255.0
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
"image": torch_from_numpy(image_chw).float(),
|
|
39
|
+
"mask": torch_from_numpy(mask_nd).long()
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
def __load_image(self, image_path):
|
|
43
|
+
with Image.open(image_path) as image:
|
|
44
|
+
image = image.convert("RGB")
|
|
45
|
+
image = image.resize((self.target_width, self.target_height), Image.BILINEAR)
|
|
46
|
+
image_nd = np.array(image, dtype=np.uint8)
|
|
47
|
+
|
|
48
|
+
return image_nd
|
|
49
|
+
|
|
50
|
+
def __build_binary_masks(self, sample):
|
|
51
|
+
mask_human = np.zeros((self.target_height, self.target_width), dtype=np.uint8)
|
|
52
|
+
mask_robot = np.zeros((self.target_height, self.target_width), dtype=np.uint8)
|
|
53
|
+
|
|
54
|
+
image_width = sample["image_width"]
|
|
55
|
+
image_height = sample["image_height"]
|
|
56
|
+
|
|
57
|
+
scale_x = self.target_width / image_width
|
|
58
|
+
scale_y = self.target_height / image_height
|
|
59
|
+
|
|
60
|
+
for obj in sample["objects"]:
|
|
61
|
+
label = obj["label"]
|
|
62
|
+
points = obj["points"]
|
|
63
|
+
|
|
64
|
+
if not points:
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
polygon = self.__scale_polygon(points, scale_x, scale_y)
|
|
68
|
+
|
|
69
|
+
if polygon is None or len(polygon) == 0:
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
if label == "person":
|
|
73
|
+
cv2_fillPoly(mask_human, [polygon], 1)
|
|
74
|
+
elif label == "robot":
|
|
75
|
+
cv2_fillPoly(mask_robot, [polygon], 1)
|
|
76
|
+
|
|
77
|
+
return mask_human, mask_robot
|
|
78
|
+
|
|
79
|
+
def __scale_polygon(self, points, scale_x, scale_y):
|
|
80
|
+
polygon = np.array(points, dtype=np.float32)
|
|
81
|
+
|
|
82
|
+
if polygon.ndim != 2 or polygon.shape[0] == 0 or polygon.shape[1] != 2:
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
polygon[:, 0] = polygon[:, 0] * scale_x
|
|
86
|
+
polygon[:, 1] = polygon[:, 1] * scale_y
|
|
87
|
+
|
|
88
|
+
polygon = np.rint(polygon).astype(np.int32)
|
|
89
|
+
|
|
90
|
+
polygon[:, 0] = np.clip(polygon[:, 0], 0, self.target_width - 1)
|
|
91
|
+
polygon[:, 1] = np.clip(polygon[:, 1], 0, self.target_height - 1)
|
|
92
|
+
|
|
93
|
+
return polygon
|
|
94
|
+
|
|
95
|
+
def __transform_masks(self, mask_human, mask_robot):
|
|
96
|
+
mask_human = np.where(mask_human > 0, 1, 0).astype(np.uint8)
|
|
97
|
+
mask_robot = np.where(mask_robot > 0, 1, 0).astype(np.uint8)
|
|
98
|
+
|
|
99
|
+
if self.mask_mode == "entropy":
|
|
100
|
+
mask = np.where(mask_robot == 1, 2, mask_human).astype(np.uint8)
|
|
101
|
+
else:
|
|
102
|
+
mask = np.zeros_like(mask_human, dtype=np.uint8)
|
|
103
|
+
mask[mask_human == 1] = 1
|
|
104
|
+
mask[mask_robot == 1] = 2
|
|
105
|
+
|
|
106
|
+
return mask
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "mirc-dataset-handler"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Utilities for loading MIRC annotations, creating subject-aware cross-validation splits, and building PyTorch DataLoaders for semantic segmentation workflows."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.9"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
license-files = ["LICENSE"]
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "Iago Rodrigues" }
|
|
11
|
+
]
|
|
12
|
+
dependencies = [
|
|
13
|
+
"numpy",
|
|
14
|
+
"Pillow",
|
|
15
|
+
"tqdm",
|
|
16
|
+
"opencv-python",
|
|
17
|
+
"matplotlib",
|
|
18
|
+
"pandas",
|
|
19
|
+
"scikit-learn",
|
|
20
|
+
"segmentation-models-pytorch",
|
|
21
|
+
"torch-pruning==0.2.1",
|
|
22
|
+
"notebook",
|
|
23
|
+
"torch",
|
|
24
|
+
"torchvision",
|
|
25
|
+
"torchaudio"
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[build-system]
|
|
29
|
+
requires = ["setuptools>=77.0.3", "wheel"]
|
|
30
|
+
build-backend = "setuptools.build_meta"
|
|
31
|
+
|
|
32
|
+
[tool.setuptools.packages.find]
|
|
33
|
+
include = ["mircdataset*"]
|