junifer 0.0.5.dev68__py3-none-any.whl → 0.0.5.dev93__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- junifer/_version.py +2 -2
- junifer/api/functions.py +1 -1
- junifer/configs/juseless/datagrabbers/tests/test_ucla.py +1 -3
- junifer/configs/juseless/datagrabbers/ucla.py +9 -9
- junifer/data/masks.py +10 -22
- junifer/data/parcellations.py +1 -1
- junifer/data/tests/test_masks.py +8 -28
- junifer/datagrabber/aomic/id1000.py +34 -38
- junifer/datagrabber/aomic/piop1.py +33 -37
- junifer/datagrabber/aomic/piop2.py +35 -39
- junifer/datagrabber/aomic/tests/test_id1000.py +10 -11
- junifer/datagrabber/aomic/tests/test_piop1.py +10 -11
- junifer/datagrabber/aomic/tests/test_piop2.py +10 -11
- junifer/datagrabber/datalad_base.py +10 -1
- junifer/datagrabber/dmcc13_benchmark.py +36 -54
- junifer/datagrabber/pattern.py +116 -46
- junifer/datagrabber/pattern_datalad.py +22 -12
- junifer/datagrabber/tests/test_datagrabber_utils.py +15 -9
- junifer/datagrabber/tests/test_dmcc13_benchmark.py +46 -19
- junifer/datagrabber/utils.py +127 -54
- junifer/datareader/default.py +91 -42
- junifer/preprocess/base.py +2 -2
- junifer/preprocess/confounds/fmriprep_confound_remover.py +44 -60
- junifer/preprocess/confounds/tests/test_fmriprep_confound_remover.py +72 -113
- junifer/storage/base.py +37 -1
- junifer/storage/hdf5.py +68 -9
- junifer/storage/tests/test_hdf5.py +82 -10
- junifer/testing/datagrabbers.py +5 -5
- junifer/testing/tests/test_partlycloudytesting_datagrabber.py +7 -7
- {junifer-0.0.5.dev68.dist-info → junifer-0.0.5.dev93.dist-info}/METADATA +1 -1
- {junifer-0.0.5.dev68.dist-info → junifer-0.0.5.dev93.dist-info}/RECORD +36 -36
- {junifer-0.0.5.dev68.dist-info → junifer-0.0.5.dev93.dist-info}/AUTHORS.rst +0 -0
- {junifer-0.0.5.dev68.dist-info → junifer-0.0.5.dev93.dist-info}/LICENSE.md +0 -0
- {junifer-0.0.5.dev68.dist-info → junifer-0.0.5.dev93.dist-info}/WHEEL +0 -0
- {junifer-0.0.5.dev68.dist-info → junifer-0.0.5.dev93.dist-info}/entry_points.txt +0 -0
- {junifer-0.0.5.dev68.dist-info → junifer-0.0.5.dev93.dist-info}/top_level.txt +0 -0
@@ -20,6 +20,7 @@ from junifer.testing import get_testing_data
|
|
20
20
|
from junifer.testing.datagrabbers import (
|
21
21
|
OasisVBMTestingDataGrabber,
|
22
22
|
PartlyCloudyTestingDataGrabber,
|
23
|
+
SPMAuditoryTestingDataGrabber,
|
23
24
|
)
|
24
25
|
|
25
26
|
|
@@ -42,35 +43,10 @@ def test_fMRIPrepConfoundRemover_init() -> None:
|
|
42
43
|
@pytest.mark.parametrize(
|
43
44
|
"input_",
|
44
45
|
[
|
45
|
-
["T1w"],
|
46
46
|
["BOLD"],
|
47
47
|
["T1w", "BOLD"],
|
48
48
|
],
|
49
49
|
)
|
50
|
-
def test_fMRIPrepConfoundRemover_validate_input_errors(
|
51
|
-
input_: List[str],
|
52
|
-
) -> None:
|
53
|
-
"""Test errors for fMRIPrepConfoundRemover validate_input.
|
54
|
-
|
55
|
-
Parameters
|
56
|
-
----------
|
57
|
-
input_ : list of str
|
58
|
-
The input data types.
|
59
|
-
|
60
|
-
"""
|
61
|
-
confound_remover = fMRIPrepConfoundRemover()
|
62
|
-
|
63
|
-
with pytest.raises(ValueError, match="not have the required data"):
|
64
|
-
confound_remover.validate_input(input_)
|
65
|
-
|
66
|
-
|
67
|
-
@pytest.mark.parametrize(
|
68
|
-
"input_",
|
69
|
-
[
|
70
|
-
["BOLD", "BOLD_confounds"],
|
71
|
-
["T1w", "BOLD", "BOLD_confounds"],
|
72
|
-
],
|
73
|
-
)
|
74
50
|
def test_fMRIPrepConfoundRemover_validate_input(input_: List[str]) -> None:
|
75
51
|
"""Test fMRIPrepConfoundRemover validate_input.
|
76
52
|
|
@@ -302,13 +278,13 @@ def test_fMRIPRepConfoundRemover__pick_confounds_fmriprep() -> None:
|
|
302
278
|
with PartlyCloudyTestingDataGrabber() as dg:
|
303
279
|
input = dg["sub-01"]
|
304
280
|
input = reader.fit_transform(input)
|
305
|
-
out1 = confound_remover._pick_confounds(input["
|
281
|
+
out1 = confound_remover._pick_confounds(input["BOLD"]["confounds"])
|
306
282
|
assert set(out1.columns) == {*fmriprep_all_vars, "spike"}
|
307
283
|
|
308
284
|
with PartlyCloudyTestingDataGrabber(reduce_confounds=False) as dg:
|
309
285
|
input = dg["sub-01"]
|
310
286
|
input = reader.fit_transform(input)
|
311
|
-
out2 = confound_remover._pick_confounds(input["
|
287
|
+
out2 = confound_remover._pick_confounds(input["BOLD"]["confounds"])
|
312
288
|
assert set(out2.columns) == {*fmriprep_all_vars, "spike"}
|
313
289
|
|
314
290
|
assert_frame_equal(out1, out2)
|
@@ -348,123 +324,106 @@ def test_fMRIPRepConfoundRemover__pick_confounds_fmriprep_compute() -> None:
|
|
348
324
|
def test_fMRIPrepConfoundRemover__validate_data() -> None:
|
349
325
|
"""Test fMRIPrepConfoundRemover validate data."""
|
350
326
|
confound_remover = fMRIPrepConfoundRemover(strategy={"wm_csf": "full"})
|
351
|
-
|
327
|
+
# Check correct data type
|
352
328
|
with OasisVBMTestingDataGrabber() as dg:
|
353
329
|
element_data = DefaultDataReader().fit_transform(dg["sub-01"])
|
354
330
|
vbm = element_data["VBM_GM"]
|
355
331
|
with pytest.raises(
|
356
332
|
DimensionError, match="incompatible dimensionality"
|
357
333
|
):
|
358
|
-
confound_remover._validate_data(vbm
|
359
|
-
|
360
|
-
with
|
334
|
+
confound_remover._validate_data(vbm)
|
335
|
+
# Check missing nested type in correct data type
|
336
|
+
with SPMAuditoryTestingDataGrabber() as dg:
|
361
337
|
element_data = DefaultDataReader().fit_transform(dg["sub-01"])
|
362
338
|
bold = element_data["BOLD"]
|
363
|
-
|
364
|
-
with pytest.raises(ValueError, match="No extra input"):
|
365
|
-
confound_remover._validate_data(bold, None)
|
339
|
+
# Test confound type
|
366
340
|
with pytest.raises(
|
367
|
-
ValueError, match="`
|
341
|
+
ValueError, match="`BOLD.confounds` data type not provided"
|
368
342
|
):
|
369
|
-
confound_remover._validate_data(bold
|
343
|
+
confound_remover._validate_data(bold)
|
344
|
+
# Test confound data
|
345
|
+
bold["confounds"] = {}
|
370
346
|
with pytest.raises(
|
371
|
-
ValueError, match="`
|
347
|
+
ValueError, match="`BOLD.confounds.data` not provided"
|
372
348
|
):
|
373
|
-
confound_remover._validate_data(bold
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
confound_remover._validate_data(bold, extra_input)
|
381
|
-
|
382
|
-
extra_input = {"BOLD_confounds": {"data": pd.DataFrame()}}
|
349
|
+
confound_remover._validate_data(bold)
|
350
|
+
# Test confound data is valid type
|
351
|
+
bold["confounds"] = {"data": None}
|
352
|
+
with pytest.raises(ValueError, match="must be a `pandas.DataFrame`"):
|
353
|
+
confound_remover._validate_data(bold)
|
354
|
+
# Test confound data dimension mismatch with BOLD
|
355
|
+
bold["confounds"] = {"data": pd.DataFrame()}
|
383
356
|
with pytest.raises(ValueError, match="Image time series and"):
|
384
|
-
confound_remover._validate_data(bold
|
385
|
-
|
386
|
-
|
387
|
-
|
357
|
+
confound_remover._validate_data(bold)
|
358
|
+
# Check nested type variations
|
359
|
+
with PartlyCloudyTestingDataGrabber(reduce_confounds=False) as dg:
|
360
|
+
element_data = DefaultDataReader().fit_transform(dg["sub-01"])
|
361
|
+
# Test format
|
362
|
+
modified_bold = {
|
363
|
+
"data": element_data["BOLD"]["data"],
|
364
|
+
"confounds": {
|
365
|
+
"data": element_data["BOLD"]["confounds"]["data"],
|
366
|
+
"format": "adhoc",
|
367
|
+
},
|
388
368
|
}
|
369
|
+
# Test incorrect format
|
370
|
+
modified_bold["confounds"].update({"format": "wrong"})
|
371
|
+
with pytest.raises(ValueError, match="Invalid confounds format"):
|
372
|
+
confound_remover._validate_data(modified_bold)
|
373
|
+
# Test missing mappings for adhoc
|
374
|
+
modified_bold["confounds"].update({"format": "adhoc"})
|
389
375
|
with pytest.raises(
|
390
|
-
ValueError, match="`
|
376
|
+
ValueError, match="`BOLD.confounds.mappings` need to be set"
|
391
377
|
):
|
392
|
-
confound_remover._validate_data(
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
extra_input = {
|
404
|
-
"BOLD_confounds": {
|
405
|
-
"data": element_data["BOLD_confounds"]["data"],
|
406
|
-
"format": "adhoc",
|
407
|
-
}
|
408
|
-
}
|
409
|
-
with pytest.raises(ValueError, match="need to be set"):
|
410
|
-
confound_remover._validate_data(bold, extra_input)
|
411
|
-
|
412
|
-
extra_input = {
|
413
|
-
"BOLD_confounds": {
|
414
|
-
"data": element_data["BOLD_confounds"]["data"],
|
415
|
-
"format": "adhoc",
|
416
|
-
"mappings": {},
|
417
|
-
}
|
418
|
-
}
|
419
|
-
with pytest.raises(ValueError, match="need to be set"):
|
420
|
-
confound_remover._validate_data(bold, extra_input)
|
421
|
-
|
422
|
-
extra_input = {
|
423
|
-
"BOLD_confounds": {
|
424
|
-
"data": element_data["BOLD_confounds"]["data"],
|
425
|
-
"format": "adhoc",
|
378
|
+
confound_remover._validate_data(modified_bold)
|
379
|
+
# Test missing fmriprep mappings for adhoc
|
380
|
+
modified_bold["confounds"].update({"mappings": {}})
|
381
|
+
with pytest.raises(
|
382
|
+
ValueError,
|
383
|
+
match="`BOLD.confounds.mappings.fmriprep` need to be set",
|
384
|
+
):
|
385
|
+
confound_remover._validate_data(modified_bold)
|
386
|
+
# Test incorrect fmriprep mappings for adhoc
|
387
|
+
modified_bold["confounds"].update(
|
388
|
+
{
|
426
389
|
"mappings": {
|
427
390
|
"fmriprep": {
|
428
391
|
"rot_x": "wrong",
|
429
392
|
"rot_y": "rot_z",
|
430
393
|
"rot_z": "rot_y",
|
431
|
-
}
|
432
|
-
}
|
394
|
+
},
|
395
|
+
}
|
433
396
|
}
|
434
|
-
|
397
|
+
)
|
435
398
|
with pytest.raises(ValueError, match=r"names: \['wrong'\]"):
|
436
|
-
confound_remover._validate_data(
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
"data": element_data["BOLD_confounds"]["data"],
|
441
|
-
"format": "adhoc",
|
399
|
+
confound_remover._validate_data(modified_bold)
|
400
|
+
# Test missing fmriprep mappings for adhoc
|
401
|
+
modified_bold["confounds"].update(
|
402
|
+
{
|
442
403
|
"mappings": {
|
443
404
|
"fmriprep": {
|
444
405
|
"wrong": "rot_x",
|
445
406
|
"rot_y": "rot_z",
|
446
407
|
"rot_z": "rot_y",
|
447
|
-
}
|
448
|
-
}
|
408
|
+
},
|
409
|
+
}
|
449
410
|
}
|
450
|
-
|
411
|
+
)
|
451
412
|
with pytest.raises(ValueError, match=r"Missing columns: \['wrong'\]"):
|
452
|
-
confound_remover._validate_data(
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
"data": element_data["BOLD_confounds"]["data"],
|
457
|
-
"format": "adhoc",
|
413
|
+
confound_remover._validate_data(modified_bold)
|
414
|
+
# Test correct adhoc format
|
415
|
+
modified_bold["confounds"].update(
|
416
|
+
{
|
458
417
|
"mappings": {
|
459
418
|
"fmriprep": {
|
460
419
|
"rot_x": "rot_x",
|
461
420
|
"rot_y": "rot_z",
|
462
421
|
"rot_z": "rot_y",
|
463
|
-
}
|
464
|
-
}
|
422
|
+
},
|
423
|
+
}
|
465
424
|
}
|
466
|
-
|
467
|
-
confound_remover._validate_data(
|
425
|
+
)
|
426
|
+
confound_remover._validate_data(modified_bold)
|
468
427
|
|
469
428
|
|
470
429
|
def test_fMRIPrepConfoundRemover_preprocess() -> None:
|
@@ -476,7 +435,9 @@ def test_fMRIPrepConfoundRemover_preprocess() -> None:
|
|
476
435
|
element_data = DefaultDataReader().fit_transform(dg["sub-01"])
|
477
436
|
orig_bold = element_data["BOLD"]["data"].get_fdata().copy()
|
478
437
|
pre_input = element_data["BOLD"]
|
479
|
-
pre_extra_input = {
|
438
|
+
pre_extra_input = {
|
439
|
+
"BOLD": {"confounds": element_data["BOLD"]["confounds"]}
|
440
|
+
}
|
480
441
|
output, _ = confound_remover.preprocess(pre_input, pre_extra_input)
|
481
442
|
trans_bold = output["data"].get_fdata()
|
482
443
|
# Transformation is in place
|
@@ -530,7 +491,7 @@ def test_fMRIPrepConfoundRemover_fit_transform() -> None:
|
|
530
491
|
assert t_meta["t_r"] is None
|
531
492
|
assert t_meta["masks"] is None
|
532
493
|
|
533
|
-
assert "
|
494
|
+
assert "mask" not in output["BOLD"]
|
534
495
|
|
535
496
|
assert "dependencies" in output["BOLD"]["meta"]
|
536
497
|
dependencies = output["BOLD"]["meta"]["dependencies"]
|
@@ -582,9 +543,7 @@ def test_fMRIPrepConfoundRemover_fit_transform_masks() -> None:
|
|
582
543
|
assert "threshold" in t_meta["masks"]["compute_brain_mask"]
|
583
544
|
assert t_meta["masks"]["compute_brain_mask"]["threshold"] == 0.2
|
584
545
|
|
585
|
-
assert "
|
586
|
-
assert "mask_item" in output["BOLD"]
|
587
|
-
assert output["BOLD"]["mask_item"] == "BOLD_mask"
|
546
|
+
assert "mask" in output["BOLD"]
|
588
547
|
|
589
548
|
assert "dependencies" in output["BOLD"]["meta"]
|
590
549
|
dependencies = output["BOLD"]["meta"]["dependencies"]
|
junifer/storage/base.py
CHANGED
@@ -189,7 +189,7 @@ class BaseFeatureStorage(ABC):
|
|
189
189
|
|
190
190
|
Parameters
|
191
191
|
----------
|
192
|
-
kind : {"matrix", "timeseries", "vector"}
|
192
|
+
kind : {"matrix", "timeseries", "vector", "scalar_table"}
|
193
193
|
The storage kind.
|
194
194
|
**kwargs
|
195
195
|
The keyword arguments.
|
@@ -218,6 +218,10 @@ class BaseFeatureStorage(ABC):
|
|
218
218
|
)
|
219
219
|
elif kind == "vector":
|
220
220
|
self.store_vector(meta_md5=meta_md5, element=t_element, **kwargs)
|
221
|
+
elif kind == "scalar_table":
|
222
|
+
self.store_scalar_table(
|
223
|
+
meta_md5=meta_md5, element=t_element, **kwargs
|
224
|
+
)
|
221
225
|
|
222
226
|
def store_matrix(
|
223
227
|
self,
|
@@ -313,6 +317,38 @@ class BaseFeatureStorage(ABC):
|
|
313
317
|
klass=NotImplementedError,
|
314
318
|
)
|
315
319
|
|
320
|
+
def store_scalar_table(
|
321
|
+
self,
|
322
|
+
meta_md5: str,
|
323
|
+
element: Dict,
|
324
|
+
data: np.ndarray,
|
325
|
+
col_names: Optional[Iterable[str]] = None,
|
326
|
+
row_names: Optional[Iterable[str]] = None,
|
327
|
+
row_header_col_name: Optional[str] = "feature",
|
328
|
+
) -> None:
|
329
|
+
"""Store table with scalar values.
|
330
|
+
|
331
|
+
Parameters
|
332
|
+
----------
|
333
|
+
meta_md5 : str
|
334
|
+
The metadata MD5 hash.
|
335
|
+
element : dict
|
336
|
+
The element as a dictionary.
|
337
|
+
data : numpy.ndarray
|
338
|
+
The timeseries data to store.
|
339
|
+
col_names : list or tuple of str, optional
|
340
|
+
The column labels (default None).
|
341
|
+
row_names : str, optional
|
342
|
+
The row labels (default None).
|
343
|
+
row_header_col_name : str, optional
|
344
|
+
The column name for the row header column (default "feature").
|
345
|
+
|
346
|
+
"""
|
347
|
+
raise_error(
|
348
|
+
msg="Concrete classes need to implement store_scalar_table().",
|
349
|
+
klass=NotImplementedError,
|
350
|
+
)
|
351
|
+
|
316
352
|
@abstractmethod
|
317
353
|
def collect(self) -> None:
|
318
354
|
"""Collect data."""
|
junifer/storage/hdf5.py
CHANGED
@@ -56,7 +56,8 @@ def _create_chunk(
|
|
56
56
|
Raises
|
57
57
|
------
|
58
58
|
ValueError
|
59
|
-
If `kind` is not one of ['vector', 'matrix', 'timeseries'
|
59
|
+
If `kind` is not one of ['vector', 'matrix', 'timeseries',
|
60
|
+
'scalar_table'].
|
60
61
|
|
61
62
|
"""
|
62
63
|
if kind in ["vector", "matrix"]:
|
@@ -77,7 +78,7 @@ def _create_chunk(
|
|
77
78
|
chunk_size=tuple(array_chunk_size),
|
78
79
|
n_chunk=i_chunk,
|
79
80
|
)
|
80
|
-
elif kind
|
81
|
+
elif kind in ["timeseries", "scalar_table"]:
|
81
82
|
out = ChunkedList(
|
82
83
|
data=chunk_data,
|
83
84
|
size=element_count,
|
@@ -86,7 +87,8 @@ def _create_chunk(
|
|
86
87
|
else:
|
87
88
|
raise_error(
|
88
89
|
f"Invalid kind: {kind}. "
|
89
|
-
"Must be one of ['vector', 'matrix', 'timeseries'
|
90
|
+
"Must be one of ['vector', 'matrix', 'timeseries',"
|
91
|
+
"'scalar_table']."
|
90
92
|
)
|
91
93
|
return out
|
92
94
|
|
@@ -146,7 +148,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
146
148
|
uri.parent.mkdir(parents=True, exist_ok=True)
|
147
149
|
|
148
150
|
# Available storage kinds
|
149
|
-
storage_types = ["vector", "timeseries", "matrix"]
|
151
|
+
storage_types = ["vector", "timeseries", "matrix", "scalar_table"]
|
150
152
|
|
151
153
|
super().__init__(
|
152
154
|
uri=uri,
|
@@ -169,7 +171,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
169
171
|
storage.
|
170
172
|
|
171
173
|
"""
|
172
|
-
return ["matrix", "vector", "timeseries"]
|
174
|
+
return ["matrix", "vector", "timeseries", "scalar_table"]
|
173
175
|
|
174
176
|
def _fetch_correct_uri_for_io(self, element: Optional[Dict]) -> str:
|
175
177
|
"""Return proper URI for I/O based on `element`.
|
@@ -508,6 +510,26 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
508
510
|
columns = hdf_data["column_headers"]
|
509
511
|
# Convert data from 3D to 2D
|
510
512
|
reshaped_data = np.concatenate(all_data, axis=0)
|
513
|
+
elif hdf_data["kind"] == "scalar_table":
|
514
|
+
# Create dictionary for aggregating index data
|
515
|
+
element_idx = defaultdict(list)
|
516
|
+
all_data = []
|
517
|
+
for idx, element in enumerate(hdf_data["element"]):
|
518
|
+
# Get row count for the element
|
519
|
+
t_data = hdf_data["data"][idx]
|
520
|
+
all_data.append(t_data)
|
521
|
+
n_rows = len(hdf_data["row_headers"])
|
522
|
+
# Set rows for the index
|
523
|
+
for key, val in element.items():
|
524
|
+
element_idx[key].extend([val] * n_rows)
|
525
|
+
# Add extra column for row header column name
|
526
|
+
element_idx[hdf_data["row_header_column_name"]].extend(
|
527
|
+
hdf_data["row_headers"]
|
528
|
+
)
|
529
|
+
# Set column headers for dataframe
|
530
|
+
columns = hdf_data["column_headers"]
|
531
|
+
# Convert data from 3D to 2D
|
532
|
+
reshaped_data = np.concatenate(all_data, axis=0)
|
511
533
|
|
512
534
|
# Create dataframe for index
|
513
535
|
idx_df = pd.DataFrame(data=element_idx) # type: ignore
|
@@ -643,7 +665,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
643
665
|
|
644
666
|
Parameters
|
645
667
|
----------
|
646
|
-
kind : {"matrix", "vector", "timeseries"}
|
668
|
+
kind : {"matrix", "vector", "timeseries", "scalar_table"}
|
647
669
|
The storage kind.
|
648
670
|
meta_md5 : str
|
649
671
|
The metadata MD5 hash.
|
@@ -739,8 +761,8 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
739
761
|
)
|
740
762
|
|
741
763
|
t_data = stored_data["data"]
|
742
|
-
if kind
|
743
|
-
t_data
|
764
|
+
if kind in ["timeseries", "scalar_table"]:
|
765
|
+
t_data += data
|
744
766
|
else:
|
745
767
|
t_data = np.concatenate((t_data, data), axis=-1)
|
746
768
|
# Existing entry; append to existing
|
@@ -921,6 +943,43 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
921
943
|
row_header_column_name="timepoint",
|
922
944
|
)
|
923
945
|
|
946
|
+
def store_scalar_table(
|
947
|
+
self,
|
948
|
+
meta_md5: str,
|
949
|
+
element: Dict,
|
950
|
+
data: np.ndarray,
|
951
|
+
col_names: Optional[Iterable[str]] = None,
|
952
|
+
row_names: Optional[Iterable[str]] = None,
|
953
|
+
row_header_col_name: Optional[str] = "feature",
|
954
|
+
) -> None:
|
955
|
+
"""Store table with scalar values.
|
956
|
+
|
957
|
+
Parameters
|
958
|
+
----------
|
959
|
+
meta_md5 : str
|
960
|
+
The metadata MD5 hash.
|
961
|
+
element : dict
|
962
|
+
The element as a dictionary.
|
963
|
+
data : numpy.ndarray
|
964
|
+
The scalar table data to store.
|
965
|
+
col_names : list or tuple of str, optional
|
966
|
+
The column labels (default None).
|
967
|
+
row_names : str, optional
|
968
|
+
The row labels (default None).
|
969
|
+
row_header_col_name : str, optional
|
970
|
+
The column name for the row header column (default "feature").
|
971
|
+
|
972
|
+
"""
|
973
|
+
self._store_data(
|
974
|
+
kind="scalar_table",
|
975
|
+
meta_md5=meta_md5,
|
976
|
+
element=[element], # convert to list
|
977
|
+
data=[data], # convert to list
|
978
|
+
column_headers=col_names,
|
979
|
+
row_headers=row_names,
|
980
|
+
row_header_column_name=row_header_col_name,
|
981
|
+
)
|
982
|
+
|
924
983
|
def collect(self) -> None:
|
925
984
|
"""Implement data collection.
|
926
985
|
|
@@ -1029,7 +1088,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
1029
1088
|
kind = static_data["kind"]
|
1030
1089
|
|
1031
1090
|
# Append the "dynamic" data
|
1032
|
-
if kind
|
1091
|
+
if kind in ["timeseries", "scalar_table"]:
|
1033
1092
|
chunk_data.extend(t_data["data"])
|
1034
1093
|
else:
|
1035
1094
|
chunk_data.append(t_data["data"])
|
@@ -25,7 +25,12 @@ from junifer.storage.utils import (
|
|
25
25
|
def test_get_valid_inputs() -> None:
|
26
26
|
"""Test valid inputs."""
|
27
27
|
storage = HDF5FeatureStorage(uri="/tmp")
|
28
|
-
assert storage.get_valid_inputs() == [
|
28
|
+
assert storage.get_valid_inputs() == [
|
29
|
+
"matrix",
|
30
|
+
"vector",
|
31
|
+
"timeseries",
|
32
|
+
"scalar_table",
|
33
|
+
]
|
29
34
|
|
30
35
|
|
31
36
|
def test_single_output(tmp_path: Path) -> None:
|
@@ -808,7 +813,7 @@ def test_store_timeseries(tmp_path: Path) -> None:
|
|
808
813
|
data = np.array([[10], [20], [30], [40], [50]])
|
809
814
|
col_names = ["signal"]
|
810
815
|
|
811
|
-
# Store
|
816
|
+
# Store timeseries
|
812
817
|
storage.store_timeseries(
|
813
818
|
meta_md5=meta_md5,
|
814
819
|
element=element_to_store,
|
@@ -822,6 +827,53 @@ def test_store_timeseries(tmp_path: Path) -> None:
|
|
822
827
|
assert_array_equal(read_df.values, data)
|
823
828
|
|
824
829
|
|
830
|
+
def test_store_scalar_table(tmp_path: Path) -> None:
|
831
|
+
"""Test scalar table store.
|
832
|
+
|
833
|
+
Parameters
|
834
|
+
----------
|
835
|
+
tmp_path : pathlib.Path
|
836
|
+
The path to the test directory.
|
837
|
+
|
838
|
+
"""
|
839
|
+
uri = tmp_path / "test_store_scalar_table.hdf5"
|
840
|
+
storage = HDF5FeatureStorage(uri=uri)
|
841
|
+
# Metadata to store
|
842
|
+
element = {"subject": "test"}
|
843
|
+
meta = {
|
844
|
+
"element": element,
|
845
|
+
"dependencies": ["numpy"],
|
846
|
+
"marker": {"name": "brainprint"},
|
847
|
+
"type": "FreeSurfer",
|
848
|
+
}
|
849
|
+
# Process the metadata
|
850
|
+
meta_md5, meta_to_store, element_to_store = process_meta(meta)
|
851
|
+
# Store metadata
|
852
|
+
storage.store_metadata(
|
853
|
+
meta_md5=meta_md5, element=element_to_store, meta=meta_to_store
|
854
|
+
)
|
855
|
+
|
856
|
+
# Data to store
|
857
|
+
data = np.array([[10, 20], [30, 40], [50, 60]])
|
858
|
+
col_names = ["roi1", "roi2"]
|
859
|
+
row_names = ["ev1", "ev2", "ev3"]
|
860
|
+
|
861
|
+
# Store timeseries
|
862
|
+
storage.store_scalar_table(
|
863
|
+
meta_md5=meta_md5,
|
864
|
+
element=element_to_store,
|
865
|
+
data=data,
|
866
|
+
col_names=col_names,
|
867
|
+
row_names=row_names,
|
868
|
+
row_header_col_name="eigenvalue",
|
869
|
+
)
|
870
|
+
|
871
|
+
# Read into dataframe
|
872
|
+
read_df = storage.read_df(feature_md5=meta_md5)
|
873
|
+
# Check if data are equal
|
874
|
+
assert_array_equal(read_df.values, data)
|
875
|
+
|
876
|
+
|
825
877
|
def _create_data_to_store(n_elements: int, kind: str) -> Tuple[str, Dict]:
|
826
878
|
"""Create data to store.
|
827
879
|
|
@@ -854,13 +906,19 @@ def _create_data_to_store(n_elements: int, kind: str) -> Tuple[str, Dict]:
|
|
854
906
|
"col_names": [f"col-{i}" for i in range(10)],
|
855
907
|
"matrix_kind": "full",
|
856
908
|
}
|
857
|
-
elif kind
|
909
|
+
elif kind in "timeseries":
|
858
910
|
data_to_store = {
|
859
911
|
"data": np.arange(20).reshape(2, 10),
|
860
912
|
"col_names": [f"col-{i}" for i in range(10)],
|
861
913
|
}
|
862
|
-
|
863
|
-
|
914
|
+
elif kind in "scalar_table":
|
915
|
+
data_to_store = {
|
916
|
+
"data": np.arange(50).reshape(5, 10),
|
917
|
+
"row_names": [f"row-{i}" for i in range(5)],
|
918
|
+
"col_names": [f"col-{i}" for i in range(10)],
|
919
|
+
"row_header_col_name": "row",
|
920
|
+
}
|
921
|
+
|
864
922
|
for i in range(n_elements):
|
865
923
|
element = {"subject": f"sub-{i // 2}", "session": f"ses-{i % 2}"}
|
866
924
|
meta = {
|
@@ -903,6 +961,7 @@ def _create_data_to_store(n_elements: int, kind: str) -> Tuple[str, Dict]:
|
|
903
961
|
(10, 3, "matrix"),
|
904
962
|
(10, 5, "matrix"),
|
905
963
|
(10, 5, "timeseries"),
|
964
|
+
(10, 5, "scalar_table"),
|
906
965
|
],
|
907
966
|
)
|
908
967
|
def test_multi_output_store_and_collect(
|
@@ -930,21 +989,20 @@ def test_multi_output_store_and_collect(
|
|
930
989
|
meta_md5, all_data = _create_data_to_store(n_elements, kind)
|
931
990
|
|
932
991
|
for t_data in all_data:
|
933
|
-
# Store metadata
|
992
|
+
# Store metadata
|
934
993
|
storage.store_metadata(
|
935
994
|
meta_md5=meta_md5,
|
936
995
|
element=t_data["element"],
|
937
996
|
meta=t_data["meta"],
|
938
997
|
)
|
998
|
+
# Store data
|
939
999
|
if kind == "vector":
|
940
|
-
# Store tables
|
941
1000
|
storage.store_vector(
|
942
1001
|
meta_md5=meta_md5,
|
943
1002
|
element=t_data["element"],
|
944
1003
|
**t_data["data"],
|
945
1004
|
)
|
946
1005
|
elif kind == "matrix":
|
947
|
-
# Store tables
|
948
1006
|
storage.store_matrix(
|
949
1007
|
meta_md5=meta_md5,
|
950
1008
|
element=t_data["element"],
|
@@ -956,11 +1014,17 @@ def test_multi_output_store_and_collect(
|
|
956
1014
|
element=t_data["element"],
|
957
1015
|
**t_data["data"],
|
958
1016
|
)
|
1017
|
+
elif kind == "scalar_table":
|
1018
|
+
storage.store_scalar_table(
|
1019
|
+
meta_md5=meta_md5,
|
1020
|
+
element=t_data["element"],
|
1021
|
+
**t_data["data"],
|
1022
|
+
)
|
959
1023
|
# Check that base URI does not exist yet
|
960
1024
|
assert not uri.exists()
|
961
1025
|
|
962
1026
|
for t_data in all_data:
|
963
|
-
# Convert element to
|
1027
|
+
# Convert element to prefix
|
964
1028
|
prefix = element_to_prefix(t_data["element"])
|
965
1029
|
# URIs for data storage
|
966
1030
|
elem_uri = uri.parent / f"{prefix}{uri.name}"
|
@@ -977,7 +1041,7 @@ def test_multi_output_store_and_collect(
|
|
977
1041
|
# Check that base URI exists now
|
978
1042
|
assert uri.exists()
|
979
1043
|
|
980
|
-
#
|
1044
|
+
# Read unified metadata
|
981
1045
|
read_unified_meta = storage.list_features()
|
982
1046
|
assert meta_md5 in read_unified_meta
|
983
1047
|
|
@@ -989,6 +1053,10 @@ def test_multi_output_store_and_collect(
|
|
989
1053
|
data_size = np.sum([x["data"]["data"].shape[0] for x in all_data])
|
990
1054
|
assert len(all_df) == data_size
|
991
1055
|
idx_names = [x for x in all_df.index.names if x != "timepoint"]
|
1056
|
+
elif kind == "scalar_table":
|
1057
|
+
data_size = np.sum([x["data"]["data"].shape[0] for x in all_data])
|
1058
|
+
assert len(all_df) == data_size
|
1059
|
+
idx_names = [x for x in all_df.index.names if x != "row"]
|
992
1060
|
else:
|
993
1061
|
assert len(all_df) == len(all_data)
|
994
1062
|
idx_names = all_df.index.names
|
@@ -1013,6 +1081,10 @@ def test_multi_output_store_and_collect(
|
|
1013
1081
|
assert_array_equal(t_series.values, t_data["data"]["data"])
|
1014
1082
|
series_names = t_series.columns.values.tolist()
|
1015
1083
|
assert series_names == t_data["data"]["col_names"]
|
1084
|
+
elif kind == "scalar_table":
|
1085
|
+
assert_array_equal(t_series.values, t_data["data"]["data"])
|
1086
|
+
series_names = t_series.columns.values.tolist()
|
1087
|
+
assert series_names == t_data["data"]["col_names"]
|
1016
1088
|
|
1017
1089
|
|
1018
1090
|
def test_collect_error_single_output() -> None:
|