datamint 2.0.2__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamint might be problematic. Click here for more details.
- datamint/client_cmd_tools/datamint_config.py +263 -7
- datamint/dataset/base_dataset.py +25 -26
- datamint/dataset/dataset.py +63 -38
- {datamint-2.0.2.dist-info → datamint-2.1.1.dist-info}/METADATA +2 -2
- {datamint-2.0.2.dist-info → datamint-2.1.1.dist-info}/RECORD +7 -7
- {datamint-2.0.2.dist-info → datamint-2.1.1.dist-info}/WHEEL +0 -0
- {datamint-2.0.2.dist-info → datamint-2.1.1.dist-info}/entry_points.txt +0 -0
|
@@ -4,6 +4,10 @@ from datamint import configs
|
|
|
4
4
|
from datamint.utils.logging_utils import load_cmdline_logging_config, ConsoleWrapperHandler
|
|
5
5
|
from rich.prompt import Prompt, Confirm
|
|
6
6
|
from rich.console import Console
|
|
7
|
+
import os
|
|
8
|
+
import shutil
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from rich.table import Table
|
|
7
11
|
|
|
8
12
|
_LOGGER = logging.getLogger(__name__)
|
|
9
13
|
_USER_LOGGER = logging.getLogger('user_logger')
|
|
@@ -85,18 +89,246 @@ def configure_api_key():
|
|
|
85
89
|
def test_connection():
|
|
86
90
|
"""Test the API connection with current settings."""
|
|
87
91
|
try:
|
|
88
|
-
from datamint import
|
|
92
|
+
from datamint import Api
|
|
89
93
|
console.print("[accent]🔄 Testing connection...[/accent]")
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
projects = api.get_projects()
|
|
93
|
-
console.print(f"[success]✅ Connection successful! Found {len(projects)} projects.[/success]")
|
|
94
|
+
Api(check_connection=True)
|
|
95
|
+
console.print(f"[success]✅ Connection successful![/success]")
|
|
94
96
|
except ImportError:
|
|
95
97
|
console.print("[error]❌ Full API not available. Install with: pip install datamint[/error]")
|
|
96
98
|
except Exception as e:
|
|
97
99
|
console.print(f"[error]❌ Connection failed: {e}[/error]")
|
|
98
100
|
|
|
99
101
|
|
|
102
|
+
def discover_local_datasets() -> list[dict[str, str]]:
|
|
103
|
+
"""Discover locally downloaded datasets.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
List of dictionaries containing dataset info with keys: 'name', 'path', 'size'
|
|
107
|
+
"""
|
|
108
|
+
from datamint.dataset.base_dataset import DatamintBaseDataset
|
|
109
|
+
|
|
110
|
+
# Check default datamint directory
|
|
111
|
+
default_root = os.path.join(
|
|
112
|
+
os.path.expanduser("~"),
|
|
113
|
+
DatamintBaseDataset.DATAMINT_DEFAULT_DIR,
|
|
114
|
+
DatamintBaseDataset.DATAMINT_DATASETS_DIR
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
datasets = []
|
|
118
|
+
|
|
119
|
+
if not os.path.exists(default_root):
|
|
120
|
+
return datasets
|
|
121
|
+
|
|
122
|
+
for item in os.listdir(default_root):
|
|
123
|
+
dataset_path = os.path.join(default_root, item)
|
|
124
|
+
if os.path.isdir(dataset_path):
|
|
125
|
+
# Check if it has a dataset.json file (indicating it's a datamint dataset)
|
|
126
|
+
dataset_json = os.path.join(dataset_path, 'dataset.json')
|
|
127
|
+
if os.path.exists(dataset_json):
|
|
128
|
+
# Calculate directory size
|
|
129
|
+
total_size = sum(
|
|
130
|
+
os.path.getsize(os.path.join(dirpath, filename))
|
|
131
|
+
for dirpath, dirnames, filenames in os.walk(dataset_path)
|
|
132
|
+
for filename in filenames
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
datasets.append({
|
|
136
|
+
'name': item,
|
|
137
|
+
'path': dataset_path,
|
|
138
|
+
'size': _format_size(total_size),
|
|
139
|
+
'size_bytes': total_size
|
|
140
|
+
})
|
|
141
|
+
|
|
142
|
+
return sorted(datasets, key=lambda x: x['name'])
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _format_size(size_bytes: int) -> str:
|
|
146
|
+
"""Format size in bytes to human readable format."""
|
|
147
|
+
if size_bytes == 0:
|
|
148
|
+
return "0 B"
|
|
149
|
+
|
|
150
|
+
size_names = ["B", "KB", "MB", "GB", "TB"]
|
|
151
|
+
i = 0
|
|
152
|
+
while size_bytes >= 1024 and i < len(size_names) - 1:
|
|
153
|
+
size_bytes /= 1024.0
|
|
154
|
+
i += 1
|
|
155
|
+
|
|
156
|
+
return f"{size_bytes:.1f} {size_names[i]}"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def show_local_datasets() -> list[dict[str, str]]:
|
|
160
|
+
"""Display all locally downloaded datasets."""
|
|
161
|
+
datasets = discover_local_datasets()
|
|
162
|
+
|
|
163
|
+
if not datasets:
|
|
164
|
+
console.print("[dim]No local datasets found.[/dim]")
|
|
165
|
+
return datasets
|
|
166
|
+
|
|
167
|
+
console.print("[title]📁 Local Datasets:[/title]")
|
|
168
|
+
|
|
169
|
+
table = Table(show_header=True, header_style="bold blue")
|
|
170
|
+
table.add_column("Dataset Name", style="cyan")
|
|
171
|
+
table.add_column("Size", justify="right", style="green")
|
|
172
|
+
table.add_column("Path", style="dim")
|
|
173
|
+
|
|
174
|
+
total_size = 0
|
|
175
|
+
for dataset in datasets:
|
|
176
|
+
table.add_row(dataset['name'], dataset['size'], dataset['path'])
|
|
177
|
+
total_size += dataset['size_bytes']
|
|
178
|
+
|
|
179
|
+
console.print(table)
|
|
180
|
+
console.print(f"\n[bold]Total size:[/bold] {_format_size(total_size)}")
|
|
181
|
+
|
|
182
|
+
return datasets
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def clean_dataset(dataset_name: str) -> bool:
|
|
186
|
+
"""Clean a specific dataset.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
dataset_name: Name of the dataset to clean
|
|
190
|
+
|
|
191
|
+
Returns:
|
|
192
|
+
True if dataset was cleaned, False otherwise
|
|
193
|
+
"""
|
|
194
|
+
datasets = discover_local_datasets()
|
|
195
|
+
dataset_to_clean = None
|
|
196
|
+
|
|
197
|
+
for dataset in datasets:
|
|
198
|
+
if dataset['name'] == dataset_name:
|
|
199
|
+
dataset_to_clean = dataset
|
|
200
|
+
break
|
|
201
|
+
|
|
202
|
+
if dataset_to_clean is None:
|
|
203
|
+
console.print(f"[error]❌ Dataset '{dataset_name}' not found locally.[/error]")
|
|
204
|
+
return False
|
|
205
|
+
|
|
206
|
+
console.print(f"[warning]⚠️ About to delete dataset: {dataset_name}[/warning]")
|
|
207
|
+
console.print(f"[dim]Path: {dataset_to_clean['path']}[/dim]")
|
|
208
|
+
console.print(f"[dim]Size: {dataset_to_clean['size']}[/dim]")
|
|
209
|
+
|
|
210
|
+
confirmed = Confirm.ask("Are you sure you want to delete this dataset?",
|
|
211
|
+
default=False, console=console)
|
|
212
|
+
|
|
213
|
+
if not confirmed:
|
|
214
|
+
console.print("[dim]Operation cancelled.[/dim]")
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
try:
|
|
218
|
+
shutil.rmtree(dataset_to_clean['path'])
|
|
219
|
+
console.print(f"[success]✅ Dataset '{dataset_name}' has been deleted.[/success]")
|
|
220
|
+
return True
|
|
221
|
+
except Exception as e:
|
|
222
|
+
console.print(f"[error]❌ Error deleting dataset: {e}[/error]")
|
|
223
|
+
_LOGGER.exception(e)
|
|
224
|
+
return False
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def clean_all_datasets() -> bool:
|
|
228
|
+
"""Clean all locally downloaded datasets.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
True if datasets were cleaned, False otherwise
|
|
232
|
+
"""
|
|
233
|
+
datasets = discover_local_datasets()
|
|
234
|
+
|
|
235
|
+
if not datasets:
|
|
236
|
+
console.print("[dim]No local datasets found to clean.[/dim]")
|
|
237
|
+
return True
|
|
238
|
+
|
|
239
|
+
console.print(f"[warning]⚠️ About to delete {len(datasets)} dataset(s):[/warning]")
|
|
240
|
+
|
|
241
|
+
table = Table(show_header=True, header_style="bold red")
|
|
242
|
+
table.add_column("Dataset Name", style="cyan")
|
|
243
|
+
table.add_column("Size", justify="right", style="green")
|
|
244
|
+
|
|
245
|
+
total_size = 0
|
|
246
|
+
for dataset in datasets:
|
|
247
|
+
table.add_row(dataset['name'], dataset['size'])
|
|
248
|
+
total_size += dataset['size_bytes']
|
|
249
|
+
|
|
250
|
+
console.print(table)
|
|
251
|
+
console.print(f"\n[bold red]Total size to be deleted:[/bold red] {_format_size(total_size)}")
|
|
252
|
+
|
|
253
|
+
confirmed = Confirm.ask("Are you sure you want to delete ALL local datasets? (this does not affect remote datasets)",
|
|
254
|
+
default=False, console=console)
|
|
255
|
+
|
|
256
|
+
if not confirmed:
|
|
257
|
+
console.print("[dim]Operation cancelled.[/dim]")
|
|
258
|
+
return False
|
|
259
|
+
|
|
260
|
+
success_count = 0
|
|
261
|
+
for dataset in datasets:
|
|
262
|
+
try:
|
|
263
|
+
shutil.rmtree(dataset['path'])
|
|
264
|
+
console.print(f"[success]✅ Deleted: {dataset['name']}[/success]")
|
|
265
|
+
success_count += 1
|
|
266
|
+
except Exception as e:
|
|
267
|
+
console.print(f"[error]❌ Failed to delete {dataset['name']}: {e}[/error]")
|
|
268
|
+
_LOGGER.exception(e)
|
|
269
|
+
|
|
270
|
+
if success_count == len(datasets):
|
|
271
|
+
console.print(f"[success]✅ Successfully deleted all {success_count} datasets.[/success]")
|
|
272
|
+
return True
|
|
273
|
+
else:
|
|
274
|
+
console.print(f"[warning]⚠️ Deleted {success_count} out of {len(datasets)} datasets.[/warning]")
|
|
275
|
+
return False
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def interactive_dataset_cleaning() -> None:
|
|
279
|
+
"""Interactive dataset cleaning menu."""
|
|
280
|
+
datasets = show_local_datasets()
|
|
281
|
+
|
|
282
|
+
if not datasets:
|
|
283
|
+
return
|
|
284
|
+
|
|
285
|
+
console.print("\n[title]🧹 Dataset Cleaning Options:[/title]")
|
|
286
|
+
console.print(" [accent](1)[/accent] Clean a specific dataset")
|
|
287
|
+
console.print(" [accent](2)[/accent] Clean all datasets")
|
|
288
|
+
console.print(" [accent](b)[/accent] Back to main menu")
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
choice = Prompt.ask("Enter your choice", console=console).lower().strip()
|
|
292
|
+
|
|
293
|
+
# Handle ESC key (appears as escape sequence)
|
|
294
|
+
if choice in ('', '\x1b', 'esc', 'escape'):
|
|
295
|
+
return
|
|
296
|
+
|
|
297
|
+
if choice == '1':
|
|
298
|
+
dataset_names = [d['name'] for d in datasets]
|
|
299
|
+
console.print("\n[title]Available datasets:[/title]")
|
|
300
|
+
for i, name in enumerate(dataset_names, 1):
|
|
301
|
+
console.print(f" [accent]({i})[/accent] {name}")
|
|
302
|
+
|
|
303
|
+
dataset_choice = Prompt.ask("Enter dataset number or name", console=console).strip()
|
|
304
|
+
|
|
305
|
+
# Handle ESC key in dataset selection
|
|
306
|
+
if dataset_choice in ('', '\x1b', 'esc', 'escape'):
|
|
307
|
+
return
|
|
308
|
+
|
|
309
|
+
# Handle numeric choice
|
|
310
|
+
try:
|
|
311
|
+
dataset_idx = int(dataset_choice) - 1
|
|
312
|
+
if 0 <= dataset_idx < len(dataset_names):
|
|
313
|
+
clean_dataset(dataset_names[dataset_idx])
|
|
314
|
+
return
|
|
315
|
+
except ValueError:
|
|
316
|
+
pass
|
|
317
|
+
|
|
318
|
+
# Handle name choice
|
|
319
|
+
if dataset_choice in dataset_names:
|
|
320
|
+
clean_dataset(dataset_choice)
|
|
321
|
+
else:
|
|
322
|
+
console.print("[error]❌ Invalid dataset selection.[/error]")
|
|
323
|
+
|
|
324
|
+
elif choice == '2':
|
|
325
|
+
clean_all_datasets()
|
|
326
|
+
elif choice != 'b':
|
|
327
|
+
console.print("[error]❌ Invalid choice.[/error]")
|
|
328
|
+
except KeyboardInterrupt:
|
|
329
|
+
pass
|
|
330
|
+
|
|
331
|
+
|
|
100
332
|
def interactive_mode():
|
|
101
333
|
"""Run the interactive configuration mode."""
|
|
102
334
|
console.print("[title]🔧 Datamint Configuration Tool[/title]")
|
|
@@ -113,6 +345,7 @@ def interactive_mode():
|
|
|
113
345
|
console.print(" [accent](3)[/accent] Show all configuration settings")
|
|
114
346
|
console.print(" [accent](4)[/accent] Clear all configuration settings")
|
|
115
347
|
console.print(" [accent](5)[/accent] Test connection")
|
|
348
|
+
console.print(" [accent](6)[/accent] Manage/Show local datasets...")
|
|
116
349
|
console.print(" [accent](q)[/accent] Exit")
|
|
117
350
|
choice = Prompt.ask("Enter your choice", console=console).lower().strip()
|
|
118
351
|
|
|
@@ -126,15 +359,18 @@ def interactive_mode():
|
|
|
126
359
|
clear_all_configurations()
|
|
127
360
|
elif choice == '5':
|
|
128
361
|
test_connection()
|
|
362
|
+
elif choice == '6':
|
|
363
|
+
interactive_dataset_cleaning()
|
|
129
364
|
elif choice in ('q', 'exit', 'quit'):
|
|
130
365
|
break
|
|
131
366
|
else:
|
|
132
|
-
console.print("[error]❌ Invalid choice. Please enter a number between 1 and
|
|
367
|
+
console.print("[error]❌ Invalid choice. Please enter a number between 1 and 7 or 'q' to quit.[/error]")
|
|
133
368
|
except KeyboardInterrupt:
|
|
134
369
|
console.print('')
|
|
135
370
|
|
|
136
371
|
console.print("[success]👋 Goodbye![/success]")
|
|
137
372
|
|
|
373
|
+
|
|
138
374
|
def main():
|
|
139
375
|
"""Main entry point for the configuration tool."""
|
|
140
376
|
global console
|
|
@@ -146,6 +382,9 @@ def main():
|
|
|
146
382
|
Examples:
|
|
147
383
|
datamint-config # Interactive mode
|
|
148
384
|
datamint-config --api-key YOUR_KEY # Set API key
|
|
385
|
+
datamint-config --list-datasets # Show local datasets
|
|
386
|
+
datamint-config --clean-dataset NAME # Clean specific dataset
|
|
387
|
+
datamint-config --clean-all-datasets # Clean all datasets
|
|
149
388
|
|
|
150
389
|
More Documentation: https://sonanceai.github.io/datamint-python-api/command_line_tools.html
|
|
151
390
|
""",
|
|
@@ -155,6 +394,12 @@ More Documentation: https://sonanceai.github.io/datamint-python-api/command_line
|
|
|
155
394
|
parser.add_argument('--default-url', '--url', type=str, help='Default URL to set')
|
|
156
395
|
parser.add_argument('-i', '--interactive', action='store_true',
|
|
157
396
|
help='Interactive mode (default if no other arguments provided)')
|
|
397
|
+
parser.add_argument('--list-datasets', action='store_true',
|
|
398
|
+
help='List all locally downloaded datasets')
|
|
399
|
+
parser.add_argument('--clean-dataset', type=str, metavar='DATASET_NAME',
|
|
400
|
+
help='Clean a specific dataset by name')
|
|
401
|
+
parser.add_argument('--clean-all-datasets', action='store_true',
|
|
402
|
+
help='Clean all locally downloaded datasets')
|
|
158
403
|
|
|
159
404
|
args = parser.parse_args()
|
|
160
405
|
|
|
@@ -170,7 +415,18 @@ More Documentation: https://sonanceai.github.io/datamint-python-api/command_line
|
|
|
170
415
|
configs.set_value(configs.APIURL_KEY, args.default_url)
|
|
171
416
|
console.print("[success]✅ Default URL saved.[/success]")
|
|
172
417
|
|
|
173
|
-
|
|
418
|
+
if args.list_datasets:
|
|
419
|
+
show_local_datasets()
|
|
420
|
+
|
|
421
|
+
if args.clean_dataset:
|
|
422
|
+
clean_dataset(args.clean_dataset)
|
|
423
|
+
|
|
424
|
+
if args.clean_all_datasets:
|
|
425
|
+
clean_all_datasets()
|
|
426
|
+
|
|
427
|
+
no_arguments_provided = (args.api_key is None and args.default_url is None and
|
|
428
|
+
not args.list_datasets and not args.clean_dataset and
|
|
429
|
+
not args.clean_all_datasets)
|
|
174
430
|
|
|
175
431
|
if no_arguments_provided or args.interactive:
|
|
176
432
|
interactive_mode()
|
datamint/dataset/base_dataset.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import requests
|
|
3
3
|
from tqdm.auto import tqdm
|
|
4
|
-
from typing import Optional, Callable, Any, Literal
|
|
4
|
+
from typing import Optional, Callable, Any, Literal, Sequence
|
|
5
5
|
import logging
|
|
6
6
|
import shutil
|
|
7
7
|
import json
|
|
@@ -66,8 +66,8 @@ class DatamintBaseDataset:
|
|
|
66
66
|
project_name: str,
|
|
67
67
|
root: str | None = None,
|
|
68
68
|
auto_update: bool = True,
|
|
69
|
-
api_key:
|
|
70
|
-
server_url:
|
|
69
|
+
api_key: str | None = None,
|
|
70
|
+
server_url: str | None = None,
|
|
71
71
|
return_dicom: bool = False,
|
|
72
72
|
return_metainfo: bool = True,
|
|
73
73
|
return_annotations: bool = True,
|
|
@@ -75,14 +75,14 @@ class DatamintBaseDataset:
|
|
|
75
75
|
include_unannotated: bool = True,
|
|
76
76
|
all_annotations: bool = False,
|
|
77
77
|
# Filtering parameters
|
|
78
|
-
include_annotators:
|
|
79
|
-
exclude_annotators:
|
|
80
|
-
include_segmentation_names:
|
|
81
|
-
exclude_segmentation_names:
|
|
82
|
-
include_image_label_names:
|
|
83
|
-
exclude_image_label_names:
|
|
84
|
-
include_frame_label_names:
|
|
85
|
-
exclude_frame_label_names:
|
|
78
|
+
include_annotators: list[str] | None = None,
|
|
79
|
+
exclude_annotators: list[str] | None = None,
|
|
80
|
+
include_segmentation_names: list[str] | None = None,
|
|
81
|
+
exclude_segmentation_names: list[str] | None = None,
|
|
82
|
+
include_image_label_names: list[str] | None = None,
|
|
83
|
+
exclude_image_label_names: list[str] | None = None,
|
|
84
|
+
include_frame_label_names: list[str] | None = None,
|
|
85
|
+
exclude_frame_label_names: list[str] | None = None,
|
|
86
86
|
):
|
|
87
87
|
self._validate_inputs(project_name, include_annotators, exclude_annotators,
|
|
88
88
|
include_segmentation_names, exclude_segmentation_names,
|
|
@@ -106,14 +106,14 @@ class DatamintBaseDataset:
|
|
|
106
106
|
def _validate_inputs(
|
|
107
107
|
self,
|
|
108
108
|
project_name: str,
|
|
109
|
-
include_annotators:
|
|
110
|
-
exclude_annotators:
|
|
111
|
-
include_segmentation_names:
|
|
112
|
-
exclude_segmentation_names:
|
|
113
|
-
include_image_label_names:
|
|
114
|
-
exclude_image_label_names:
|
|
115
|
-
include_frame_label_names:
|
|
116
|
-
exclude_frame_label_names:
|
|
109
|
+
include_annotators: Sequence[str] | None,
|
|
110
|
+
exclude_annotators: Sequence[str] | None,
|
|
111
|
+
include_segmentation_names: Sequence[str] | None,
|
|
112
|
+
exclude_segmentation_names: Sequence[str] | None,
|
|
113
|
+
include_image_label_names: Sequence[str] | None,
|
|
114
|
+
exclude_image_label_names: Sequence[str] | None,
|
|
115
|
+
include_frame_label_names: Sequence[str] | None,
|
|
116
|
+
exclude_frame_label_names: Sequence[str] | None,
|
|
117
117
|
) -> None:
|
|
118
118
|
"""Validate input parameters."""
|
|
119
119
|
if project_name is None:
|
|
@@ -360,11 +360,13 @@ class DatamintBaseDataset:
|
|
|
360
360
|
@property
|
|
361
361
|
def segmentation_labels_set(self) -> list[str]:
|
|
362
362
|
"""Returns the set of segmentation labels in the dataset."""
|
|
363
|
-
|
|
363
|
+
a = set(self.frame_lsets['segmentation'])
|
|
364
|
+
b = set(self.image_lsets['segmentation'])
|
|
365
|
+
return list(a.union(b))
|
|
364
366
|
|
|
365
367
|
def _get_annotations_internal(
|
|
366
368
|
self,
|
|
367
|
-
annotations:
|
|
369
|
+
annotations: Sequence[Annotation],
|
|
368
370
|
type: Literal['label', 'category', 'segmentation', 'all'] = 'all',
|
|
369
371
|
scope: Literal['frame', 'image', 'all'] = 'all'
|
|
370
372
|
) -> list[Annotation]:
|
|
@@ -441,10 +443,8 @@ class DatamintBaseDataset:
|
|
|
441
443
|
|
|
442
444
|
def get_resources_ids(self) -> list[str]:
|
|
443
445
|
"""Get list of resource IDs."""
|
|
444
|
-
return [
|
|
445
|
-
|
|
446
|
-
for i in self.subset_indices
|
|
447
|
-
]
|
|
446
|
+
return [self._get_image_metainfo(i, bypass_subset_indices=True)['metainfo']['id']
|
|
447
|
+
for i in self.subset_indices]
|
|
448
448
|
|
|
449
449
|
def _get_labels_set(self, framed: bool) -> tuple[dict, dict[str, dict[str, int]]]:
|
|
450
450
|
"""Returns the set of labels and mappings to integers.
|
|
@@ -992,7 +992,6 @@ class DatamintBaseDataset:
|
|
|
992
992
|
return Path(resource['file'])
|
|
993
993
|
else:
|
|
994
994
|
# ext = guess_extension(resource['mimetype'])
|
|
995
|
-
# _LOGGER.debug(f"Guessed extension for resource {resource['id']}|{resource['mimetype']}: {ext}")
|
|
996
995
|
# if ext is None:
|
|
997
996
|
# _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
|
|
998
997
|
# ext = ''
|
datamint/dataset/dataset.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .base_dataset import DatamintBaseDataset
|
|
2
|
-
from typing import List, Optional, Callable, Any, Dict, Literal
|
|
2
|
+
from typing import List, Optional, Callable, Any, Dict, Literal, Sequence
|
|
3
3
|
import torch
|
|
4
4
|
from torch import Tensor
|
|
5
5
|
import os
|
|
@@ -8,6 +8,7 @@ import logging
|
|
|
8
8
|
from PIL import Image
|
|
9
9
|
import albumentations
|
|
10
10
|
from datamint.entities.annotation import Annotation
|
|
11
|
+
from medimgkit.readers import read_array_normalized
|
|
11
12
|
|
|
12
13
|
_LOGGER = logging.getLogger(__name__)
|
|
13
14
|
|
|
@@ -117,7 +118,9 @@ class DatamintDataset(DatamintBaseDataset):
|
|
|
117
118
|
if semantic_seg_merge_strategy is not None and not return_as_semantic_segmentation:
|
|
118
119
|
raise ValueError("semantic_seg_merge_strategy can only be used if return_as_semantic_segmentation is True")
|
|
119
120
|
|
|
120
|
-
def _load_segmentations(self,
|
|
121
|
+
def _load_segmentations(self,
|
|
122
|
+
annotations: list[Annotation],
|
|
123
|
+
img_shape) -> tuple[dict[str, list], dict[str, list]]:
|
|
121
124
|
"""
|
|
122
125
|
Load segmentations from annotations.
|
|
123
126
|
|
|
@@ -152,19 +155,27 @@ class DatamintDataset(DatamintBaseDataset):
|
|
|
152
155
|
|
|
153
156
|
segfilepath = ann.file # png file
|
|
154
157
|
segfilepath = os.path.join(self.dataset_dir, segfilepath)
|
|
155
|
-
|
|
156
|
-
seg
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
158
|
+
seg = read_array_normalized(segfilepath) # (frames, C, H, W)
|
|
159
|
+
if seg.shape[1] != 1:
|
|
160
|
+
raise ValueError(f"Segmentation file must have 1 channel, got {seg.shape} in {segfilepath}")
|
|
161
|
+
seg = seg[:, 0, :, :] # (frames, H, W)
|
|
162
|
+
|
|
163
|
+
# # FIXME: avoid enforcing resizing the mask
|
|
164
|
+
# seg = (Image.open(segfilepath)
|
|
165
|
+
# .convert('L')
|
|
166
|
+
# .resize((w, h), Image.Resampling.NEAREST)
|
|
167
|
+
# )
|
|
168
|
+
# seg = np.array(seg)
|
|
161
169
|
|
|
162
170
|
seg = torch.from_numpy(seg)
|
|
163
171
|
seg = seg == 255 # binary mask
|
|
164
172
|
# map the segmentation label to the code
|
|
165
|
-
seg_code = self.frame_lcodes['segmentation'][ann.name]
|
|
166
173
|
if self.return_frame_by_frame:
|
|
167
174
|
frame_index = 0
|
|
175
|
+
if seg.shape[0] != 1:
|
|
176
|
+
raise NotImplementedError(
|
|
177
|
+
"Volume segmentations are not supported yet when return_frame_by_frame is True")
|
|
178
|
+
seg = seg[0:1] # (#frames, H, W) -> (1, H, W)
|
|
168
179
|
else:
|
|
169
180
|
frame_index = ann.index
|
|
170
181
|
|
|
@@ -174,12 +185,25 @@ class DatamintDataset(DatamintBaseDataset):
|
|
|
174
185
|
author_segs = segmentations[author]
|
|
175
186
|
author_labels = seg_labels[author]
|
|
176
187
|
|
|
177
|
-
if
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
188
|
+
if frame_index is not None and ann.scope == 'frame':
|
|
189
|
+
seg_code = self.frame_lcodes['segmentation'][ann.name]
|
|
190
|
+
if author_segs[frame_index] is None:
|
|
191
|
+
author_segs[frame_index] = []
|
|
192
|
+
author_labels[frame_index] = []
|
|
193
|
+
s = seg[0] if seg.shape[0] == 1 else seg[frame_index]
|
|
194
|
+
author_segs[frame_index].append(s)
|
|
195
|
+
author_labels[frame_index].append(seg_code)
|
|
196
|
+
elif frame_index is None and ann.scope == 'image':
|
|
197
|
+
seg_code = self.image_lcodes['segmentation'][ann.name]
|
|
198
|
+
# apply to all frames
|
|
199
|
+
for i in range(nframes):
|
|
200
|
+
if author_segs[i] is None:
|
|
201
|
+
author_segs[i] = []
|
|
202
|
+
author_labels[i] = []
|
|
203
|
+
author_segs[i].append(seg[i])
|
|
204
|
+
author_labels[i].append(seg_code)
|
|
205
|
+
else:
|
|
206
|
+
raise ValueError(f"Invalid segmentation annotation: {ann}")
|
|
183
207
|
|
|
184
208
|
# convert to tensor
|
|
185
209
|
for author in segmentations.keys():
|
|
@@ -196,8 +220,8 @@ class DatamintDataset(DatamintBaseDataset):
|
|
|
196
220
|
return segmentations, seg_labels
|
|
197
221
|
|
|
198
222
|
def _instanceseg2semanticseg(self,
|
|
199
|
-
segmentations:
|
|
200
|
-
seg_labels:
|
|
223
|
+
segmentations: Sequence[Tensor],
|
|
224
|
+
seg_labels: Sequence[Tensor]) -> Tensor:
|
|
201
225
|
"""
|
|
202
226
|
Convert instance segmentation to semantic segmentation.
|
|
203
227
|
|
|
@@ -208,25 +232,26 @@ class DatamintDataset(DatamintBaseDataset):
|
|
|
208
232
|
Returns:
|
|
209
233
|
Tensor: tensor of shape (n, num_labels, H, W), where `n` is the number of frames.
|
|
210
234
|
"""
|
|
211
|
-
if segmentations is
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
235
|
+
if segmentations is None:
|
|
236
|
+
return None
|
|
237
|
+
|
|
238
|
+
if len(segmentations) != len(seg_labels):
|
|
239
|
+
raise ValueError("segmentations and seg_labels must have the same length")
|
|
240
|
+
|
|
241
|
+
h, w = segmentations[0].shape[1:]
|
|
242
|
+
new_shape = (len(segmentations),
|
|
243
|
+
len(self.segmentation_labels_set)+1, # +1 for background
|
|
244
|
+
h, w)
|
|
245
|
+
new_segmentations = torch.zeros(new_shape, dtype=torch.uint8)
|
|
246
|
+
# for each frame
|
|
247
|
+
for i in range(len(segmentations)):
|
|
248
|
+
# for each instance
|
|
249
|
+
for j in range(len(segmentations[i])):
|
|
250
|
+
new_segmentations[i, seg_labels[i][j]] += segmentations[i][j]
|
|
251
|
+
new_segmentations = new_segmentations > 0
|
|
252
|
+
# pixels that are not in any segmentation are labeled as background
|
|
253
|
+
new_segmentations[:, 0] = new_segmentations.sum(dim=1) == 0
|
|
254
|
+
return new_segmentations.float()
|
|
230
255
|
|
|
231
256
|
def apply_semantic_seg_merge_strategy(self, segmentations: dict[str, Tensor],
|
|
232
257
|
nframes: int,
|
|
@@ -338,7 +363,7 @@ class DatamintDataset(DatamintBaseDataset):
|
|
|
338
363
|
if isinstance(labels, Tensor):
|
|
339
364
|
# single tensor for the author
|
|
340
365
|
seg_names[author] = [code_to_name[code.item()-1] for code in labels]
|
|
341
|
-
elif isinstance(labels,
|
|
366
|
+
elif isinstance(labels, Sequence):
|
|
342
367
|
# list of frame tensors
|
|
343
368
|
seg_names[author] = [[code_to_name[code.item()-1] for code in frame_labels]
|
|
344
369
|
for frame_labels in labels]
|
|
@@ -477,7 +502,7 @@ class DatamintDataset(DatamintBaseDataset):
|
|
|
477
502
|
return new_item
|
|
478
503
|
|
|
479
504
|
def _convert_labels_annotations(self,
|
|
480
|
-
annotations:
|
|
505
|
+
annotations: Sequence[Annotation],
|
|
481
506
|
num_frames: int | None = None) -> dict[str, torch.Tensor]:
|
|
482
507
|
"""
|
|
483
508
|
Converts the annotations, of the same type and scope, to tensor of shape (num_frames, num_labels)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datamint
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.1.1
|
|
4
4
|
Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -21,7 +21,7 @@ Requires-Dist: humanize (>=4.0.0,<5.0.0)
|
|
|
21
21
|
Requires-Dist: lazy-loader (>=0.3.0)
|
|
22
22
|
Requires-Dist: lightning
|
|
23
23
|
Requires-Dist: matplotlib
|
|
24
|
-
Requires-Dist: medimgkit (>=0.6.
|
|
24
|
+
Requires-Dist: medimgkit (>=0.6.3)
|
|
25
25
|
Requires-Dist: nest-asyncio (>=1.0.0,<2.0.0)
|
|
26
26
|
Requires-Dist: nibabel (>=4.0.0)
|
|
27
27
|
Requires-Dist: numpy
|
|
@@ -19,13 +19,13 @@ datamint/apihandler/dto/annotation_dto.py,sha256=KUeHbxLYols16q-ANNxC48eH4EA8Tc-
|
|
|
19
19
|
datamint/apihandler/exp_api_handler.py,sha256=hFUgUgBc5rL7odK7gTW3MnrvMY1pVfJUpUdzRNobMQE,6226
|
|
20
20
|
datamint/apihandler/root_api_handler.py,sha256=jBof_XPTeq4o41CW-l-I5GHQKVa76kaX75RovS_qAM4,63384
|
|
21
21
|
datamint/client_cmd_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
|
-
datamint/client_cmd_tools/datamint_config.py,sha256=
|
|
22
|
+
datamint/client_cmd_tools/datamint_config.py,sha256=MpR5UHv_xpElOOYyEESBkDg2n3JjP_PNLI2jqmZgYQ8,16222
|
|
23
23
|
datamint/client_cmd_tools/datamint_upload.py,sha256=jPzvlNeBZfOOxuG6ryswJ8OG4jXuTrPtArUetoKVGj0,36073
|
|
24
24
|
datamint/configs.py,sha256=Bdp6NydYwyCJ2dk19_gf_o3M2ZyQOmMHpLi8wEWNHUk,1426
|
|
25
25
|
datamint/dataset/__init__.py,sha256=4PlUKSvVhdfQvvuq8jQXrkdqnot-iTTizM3aM1vgSwg,47
|
|
26
26
|
datamint/dataset/annotation.py,sha256=qN1IMjdfLD2ceQ6va3l76jOXA8Vb_c-eBk1oWQu6hW0,7994
|
|
27
|
-
datamint/dataset/base_dataset.py,sha256=
|
|
28
|
-
datamint/dataset/dataset.py,sha256=
|
|
27
|
+
datamint/dataset/base_dataset.py,sha256=xdWnYNZ6yKi__YGkrJ3hssiljRV3InGlJn8zDcgJnp8,49547
|
|
28
|
+
datamint/dataset/dataset.py,sha256=huUOyBRGVtcx0tcpX2FrsWn7Vsqy5i5e_J52gxR_29A,28637
|
|
29
29
|
datamint/entities/__init__.py,sha256=tbHE7rZb0R9Hm-Dc8VWEq3PlRl7BYOzffumrV0ZdsMs,444
|
|
30
30
|
datamint/entities/annotation.py,sha256=ochAEh_JqxAe_FyYTNUfPT47KiIAG7CkBTim52bu7M8,6636
|
|
31
31
|
datamint/entities/base_entity.py,sha256=DniakCgJ-gV7Hz8VKQA_dRYTp4DU5rcjLBVOuD1aZuA,1902
|
|
@@ -44,7 +44,7 @@ datamint/logging.yaml,sha256=tOMxtc2UmwlIMTK6ljtnBwTco1PNrPeq3mx2iMuSbiw,482
|
|
|
44
44
|
datamint/utils/logging_utils.py,sha256=9pRoaPrWu2jOdDCiAoUsjEdP5ZwaealWL3hjUqFvx9g,4022
|
|
45
45
|
datamint/utils/torchmetrics.py,sha256=lwU0nOtsSWfebyp7dvjlAggaqXtj5ohSEUXOg3L0hJE,2837
|
|
46
46
|
datamint/utils/visualization.py,sha256=yaUVAOHar59VrGUjpAWv5eVvQSfztFG0eP9p5Vt3l-M,4470
|
|
47
|
-
datamint-2.
|
|
48
|
-
datamint-2.
|
|
49
|
-
datamint-2.
|
|
50
|
-
datamint-2.
|
|
47
|
+
datamint-2.1.1.dist-info/METADATA,sha256=FBdoHxxRVesJl4EX2IO2LL82e86sy_cJ8Uhuvkm8Sug,4203
|
|
48
|
+
datamint-2.1.1.dist-info/WHEEL,sha256=M5asmiAlL6HEcOq52Yi5mmk9KmTVjY2RDPtO4p9DMrc,88
|
|
49
|
+
datamint-2.1.1.dist-info/entry_points.txt,sha256=mn5H6jPjO-rY0W0CAZ6Z_KKWhMLvyVaSpoqk77jlTI4,145
|
|
50
|
+
datamint-2.1.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|