junifer 0.0.5__py3-none-any.whl → 0.0.5.dev11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- junifer/__init__.py +0 -17
- junifer/_version.py +2 -2
- junifer/api/__init__.py +1 -4
- junifer/api/cli.py +1 -91
- junifer/api/decorators.py +0 -9
- junifer/api/functions.py +10 -56
- junifer/api/parser.py +0 -3
- junifer/api/queue_context/__init__.py +1 -4
- junifer/api/res/afni/run_afni_docker.sh +1 -1
- junifer/api/res/ants/run_ants_docker.sh +1 -1
- junifer/api/res/fsl/run_fsl_docker.sh +1 -1
- junifer/api/tests/test_api_utils.py +2 -4
- junifer/api/tests/test_cli.py +0 -83
- junifer/api/tests/test_functions.py +2 -27
- junifer/configs/__init__.py +1 -1
- junifer/configs/juseless/__init__.py +1 -4
- junifer/configs/juseless/datagrabbers/__init__.py +1 -10
- junifer/configs/juseless/datagrabbers/aomic_id1000_vbm.py +0 -3
- junifer/configs/juseless/datagrabbers/camcan_vbm.py +0 -3
- junifer/configs/juseless/datagrabbers/ixi_vbm.py +0 -3
- junifer/configs/juseless/datagrabbers/tests/test_ucla.py +3 -1
- junifer/configs/juseless/datagrabbers/ucla.py +9 -12
- junifer/configs/juseless/datagrabbers/ukb_vbm.py +0 -3
- junifer/data/__init__.py +1 -21
- junifer/data/coordinates.py +19 -10
- junifer/data/masks.py +87 -58
- junifer/data/parcellations.py +3 -14
- junifer/data/template_spaces.py +1 -4
- junifer/data/tests/test_masks.py +37 -26
- junifer/data/utils.py +0 -3
- junifer/datagrabber/__init__.py +1 -18
- junifer/datagrabber/aomic/__init__.py +0 -3
- junifer/datagrabber/aomic/id1000.py +37 -70
- junifer/datagrabber/aomic/piop1.py +36 -69
- junifer/datagrabber/aomic/piop2.py +38 -71
- junifer/datagrabber/aomic/tests/test_id1000.py +99 -44
- junifer/datagrabber/aomic/tests/test_piop1.py +108 -65
- junifer/datagrabber/aomic/tests/test_piop2.py +102 -45
- junifer/datagrabber/base.py +6 -13
- junifer/datagrabber/datalad_base.py +1 -13
- junifer/datagrabber/dmcc13_benchmark.py +53 -36
- junifer/datagrabber/hcp1200/__init__.py +0 -3
- junifer/datagrabber/hcp1200/datalad_hcp1200.py +0 -3
- junifer/datagrabber/hcp1200/hcp1200.py +1 -4
- junifer/datagrabber/multiple.py +6 -45
- junifer/datagrabber/pattern.py +62 -170
- junifer/datagrabber/pattern_datalad.py +12 -25
- junifer/datagrabber/tests/test_datagrabber_utils.py +218 -0
- junifer/datagrabber/tests/test_datalad_base.py +4 -4
- junifer/datagrabber/tests/test_dmcc13_benchmark.py +19 -46
- junifer/datagrabber/tests/test_multiple.py +84 -161
- junifer/datagrabber/tests/test_pattern.py +0 -45
- junifer/datagrabber/tests/test_pattern_datalad.py +4 -4
- junifer/datagrabber/utils.py +230 -0
- junifer/datareader/__init__.py +1 -4
- junifer/datareader/default.py +43 -95
- junifer/external/__init__.py +1 -1
- junifer/external/nilearn/__init__.py +1 -5
- junifer/external/nilearn/junifer_nifti_spheres_masker.py +9 -23
- junifer/external/nilearn/tests/test_junifer_nifti_spheres_masker.py +1 -76
- junifer/markers/__init__.py +1 -23
- junifer/markers/base.py +28 -68
- junifer/markers/collection.py +2 -10
- junifer/markers/complexity/__init__.py +0 -10
- junifer/markers/complexity/complexity_base.py +43 -26
- junifer/markers/complexity/hurst_exponent.py +0 -3
- junifer/markers/complexity/multiscale_entropy_auc.py +0 -3
- junifer/markers/complexity/perm_entropy.py +0 -3
- junifer/markers/complexity/range_entropy.py +0 -3
- junifer/markers/complexity/range_entropy_auc.py +0 -3
- junifer/markers/complexity/sample_entropy.py +0 -3
- junifer/markers/complexity/tests/test_hurst_exponent.py +3 -11
- junifer/markers/complexity/tests/test_multiscale_entropy_auc.py +3 -11
- junifer/markers/complexity/tests/test_perm_entropy.py +3 -11
- junifer/markers/complexity/tests/test_range_entropy.py +3 -11
- junifer/markers/complexity/tests/test_range_entropy_auc.py +3 -11
- junifer/markers/complexity/tests/test_sample_entropy.py +3 -11
- junifer/markers/complexity/tests/test_weighted_perm_entropy.py +3 -11
- junifer/markers/complexity/weighted_perm_entropy.py +0 -3
- junifer/markers/ets_rss.py +42 -27
- junifer/markers/falff/__init__.py +0 -3
- junifer/markers/falff/_afni_falff.py +2 -5
- junifer/markers/falff/_junifer_falff.py +0 -3
- junifer/markers/falff/falff_base.py +46 -20
- junifer/markers/falff/falff_parcels.py +27 -56
- junifer/markers/falff/falff_spheres.py +29 -60
- junifer/markers/falff/tests/test_falff_parcels.py +23 -39
- junifer/markers/falff/tests/test_falff_spheres.py +23 -39
- junifer/markers/functional_connectivity/__init__.py +0 -9
- junifer/markers/functional_connectivity/crossparcellation_functional_connectivity.py +60 -63
- junifer/markers/functional_connectivity/edge_functional_connectivity_parcels.py +32 -45
- junifer/markers/functional_connectivity/edge_functional_connectivity_spheres.py +36 -49
- junifer/markers/functional_connectivity/functional_connectivity_base.py +70 -71
- junifer/markers/functional_connectivity/functional_connectivity_parcels.py +25 -34
- junifer/markers/functional_connectivity/functional_connectivity_spheres.py +30 -40
- junifer/markers/functional_connectivity/tests/test_crossparcellation_functional_connectivity.py +7 -11
- junifer/markers/functional_connectivity/tests/test_edge_functional_connectivity_parcels.py +7 -27
- junifer/markers/functional_connectivity/tests/test_edge_functional_connectivity_spheres.py +12 -28
- junifer/markers/functional_connectivity/tests/test_functional_connectivity_parcels.py +11 -35
- junifer/markers/functional_connectivity/tests/test_functional_connectivity_spheres.py +62 -36
- junifer/markers/parcel_aggregation.py +61 -47
- junifer/markers/reho/__init__.py +0 -3
- junifer/markers/reho/_afni_reho.py +2 -5
- junifer/markers/reho/_junifer_reho.py +1 -4
- junifer/markers/reho/reho_base.py +27 -8
- junifer/markers/reho/reho_parcels.py +17 -28
- junifer/markers/reho/reho_spheres.py +18 -27
- junifer/markers/reho/tests/test_reho_parcels.py +3 -8
- junifer/markers/reho/tests/test_reho_spheres.py +3 -8
- junifer/markers/sphere_aggregation.py +59 -43
- junifer/markers/temporal_snr/__init__.py +0 -3
- junifer/markers/temporal_snr/temporal_snr_base.py +32 -23
- junifer/markers/temporal_snr/temporal_snr_parcels.py +6 -9
- junifer/markers/temporal_snr/temporal_snr_spheres.py +6 -9
- junifer/markers/temporal_snr/tests/test_temporal_snr_parcels.py +3 -6
- junifer/markers/temporal_snr/tests/test_temporal_snr_spheres.py +3 -6
- junifer/markers/tests/test_collection.py +8 -9
- junifer/markers/tests/test_ets_rss.py +9 -15
- junifer/markers/tests/test_markers_base.py +18 -17
- junifer/markers/tests/test_parcel_aggregation.py +32 -93
- junifer/markers/tests/test_sphere_aggregation.py +19 -72
- junifer/onthefly/__init__.py +1 -4
- junifer/onthefly/read_transform.py +0 -3
- junifer/pipeline/__init__.py +1 -9
- junifer/pipeline/pipeline_step_mixin.py +4 -21
- junifer/pipeline/registry.py +0 -3
- junifer/pipeline/singleton.py +0 -3
- junifer/pipeline/tests/test_registry.py +1 -1
- junifer/pipeline/update_meta_mixin.py +0 -3
- junifer/pipeline/utils.py +1 -67
- junifer/pipeline/workdir_manager.py +0 -3
- junifer/preprocess/__init__.py +2 -10
- junifer/preprocess/ants/__init__.py +4 -0
- junifer/preprocess/ants/ants_apply_transforms_warper.py +185 -0
- junifer/preprocess/ants/tests/test_ants_apply_transforms_warper.py +56 -0
- junifer/preprocess/base.py +3 -6
- junifer/preprocess/bold_warper.py +265 -0
- junifer/preprocess/confounds/__init__.py +0 -3
- junifer/preprocess/confounds/fmriprep_confound_remover.py +60 -47
- junifer/preprocess/confounds/tests/test_fmriprep_confound_remover.py +113 -72
- junifer/preprocess/fsl/__init__.py +4 -0
- junifer/preprocess/fsl/apply_warper.py +179 -0
- junifer/preprocess/fsl/tests/test_apply_warper.py +45 -0
- junifer/preprocess/tests/test_bold_warper.py +159 -0
- junifer/preprocess/warping/__init__.py +0 -3
- junifer/preprocess/warping/_ants_warper.py +0 -3
- junifer/preprocess/warping/_fsl_warper.py +0 -3
- junifer/stats.py +1 -4
- junifer/storage/__init__.py +1 -9
- junifer/storage/base.py +1 -40
- junifer/storage/hdf5.py +9 -71
- junifer/storage/pandas_base.py +0 -3
- junifer/storage/sqlite.py +0 -3
- junifer/storage/tests/test_hdf5.py +10 -82
- junifer/storage/utils.py +0 -9
- junifer/testing/__init__.py +1 -4
- junifer/testing/datagrabbers.py +6 -13
- junifer/testing/tests/test_partlycloudytesting_datagrabber.py +7 -7
- junifer/testing/utils.py +0 -3
- junifer/utils/__init__.py +2 -13
- junifer/utils/fs.py +0 -3
- junifer/utils/helpers.py +1 -32
- junifer/utils/logging.py +4 -33
- junifer/utils/tests/test_logging.py +0 -8
- {junifer-0.0.5.dist-info → junifer-0.0.5.dev11.dist-info}/METADATA +16 -17
- junifer-0.0.5.dev11.dist-info/RECORD +259 -0
- {junifer-0.0.5.dist-info → junifer-0.0.5.dev11.dist-info}/WHEEL +1 -1
- junifer/api/res/freesurfer/mri_binarize +0 -3
- junifer/api/res/freesurfer/mri_mc +0 -3
- junifer/api/res/freesurfer/mri_pretess +0 -3
- junifer/api/res/freesurfer/mris_convert +0 -3
- junifer/api/res/freesurfer/run_freesurfer_docker.sh +0 -61
- junifer/data/masks/ukb/UKB_15K_GM_template.nii.gz +0 -0
- junifer/datagrabber/pattern_validation_mixin.py +0 -388
- junifer/datagrabber/tests/test_pattern_validation_mixin.py +0 -249
- junifer/external/BrainPrint/brainprint/__init__.py +0 -4
- junifer/external/BrainPrint/brainprint/_version.py +0 -3
- junifer/external/BrainPrint/brainprint/asymmetry.py +0 -91
- junifer/external/BrainPrint/brainprint/brainprint.py +0 -441
- junifer/external/BrainPrint/brainprint/surfaces.py +0 -258
- junifer/external/BrainPrint/brainprint/utils/__init__.py +0 -1
- junifer/external/BrainPrint/brainprint/utils/_config.py +0 -112
- junifer/external/BrainPrint/brainprint/utils/utils.py +0 -188
- junifer/external/nilearn/junifer_connectivity_measure.py +0 -483
- junifer/external/nilearn/tests/test_junifer_connectivity_measure.py +0 -1089
- junifer/markers/brainprint.py +0 -459
- junifer/markers/tests/test_brainprint.py +0 -58
- junifer/preprocess/smoothing/__init__.py +0 -9
- junifer/preprocess/smoothing/_afni_smoothing.py +0 -119
- junifer/preprocess/smoothing/_fsl_smoothing.py +0 -116
- junifer/preprocess/smoothing/_nilearn_smoothing.py +0 -69
- junifer/preprocess/smoothing/smoothing.py +0 -174
- junifer/preprocess/smoothing/tests/test_smoothing.py +0 -94
- junifer-0.0.5.dist-info/RECORD +0 -275
- {junifer-0.0.5.dist-info → junifer-0.0.5.dev11.dist-info}/AUTHORS.rst +0 -0
- {junifer-0.0.5.dist-info → junifer-0.0.5.dev11.dist-info}/LICENSE.md +0 -0
- {junifer-0.0.5.dist-info → junifer-0.0.5.dev11.dist-info}/entry_points.txt +0 -0
- {junifer-0.0.5.dist-info → junifer-0.0.5.dev11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
"""Provide tests for ApplyWarper."""
|
2
|
+
|
3
|
+
# Authors: Synchon Mandal <s.mandal@fz-juelich.de>
|
4
|
+
# License: AGPL
|
5
|
+
|
6
|
+
import socket
|
7
|
+
|
8
|
+
import pytest
|
9
|
+
|
10
|
+
from junifer.datagrabber import DataladHCP1200
|
11
|
+
from junifer.datareader import DefaultDataReader
|
12
|
+
from junifer.pipeline.utils import _check_fsl
|
13
|
+
from junifer.preprocess.fsl.apply_warper import _ApplyWarper
|
14
|
+
|
15
|
+
|
16
|
+
def test_ApplyWarper_init() -> None:
|
17
|
+
"""Test ApplyWarper init."""
|
18
|
+
apply_warper = _ApplyWarper(reference="T1w", on="BOLD")
|
19
|
+
assert apply_warper.ref == "T1w"
|
20
|
+
assert apply_warper.on == "BOLD"
|
21
|
+
|
22
|
+
|
23
|
+
@pytest.mark.skipif(_check_fsl() is False, reason="requires FSL to be in PATH")
|
24
|
+
@pytest.mark.skipif(
|
25
|
+
socket.gethostname() != "juseless",
|
26
|
+
reason="only for juseless",
|
27
|
+
)
|
28
|
+
def test_ApplyWarper_preprocess() -> None:
|
29
|
+
"""Test ApplyWarper preprocess."""
|
30
|
+
with DataladHCP1200(
|
31
|
+
tasks=["REST1"],
|
32
|
+
phase_encodings=["LR"],
|
33
|
+
ica_fix=True,
|
34
|
+
) as dg:
|
35
|
+
# Read data
|
36
|
+
element_data = DefaultDataReader().fit_transform(
|
37
|
+
dg[("100206", "REST1", "LR")]
|
38
|
+
)
|
39
|
+
# Preprocess data
|
40
|
+
data_type, data = _ApplyWarper(reference="T1w", on="BOLD").preprocess(
|
41
|
+
input=element_data["BOLD"],
|
42
|
+
extra_input=element_data,
|
43
|
+
)
|
44
|
+
assert isinstance(data_type, str)
|
45
|
+
assert isinstance(data, dict)
|
@@ -0,0 +1,159 @@
|
|
1
|
+
"""Provide tests for BOLDWarper."""
|
2
|
+
|
3
|
+
# Authors: Synchon Mandal <s.mandal@fz-juelich.de>
|
4
|
+
# License: AGPL
|
5
|
+
|
6
|
+
import socket
|
7
|
+
from typing import TYPE_CHECKING, Tuple
|
8
|
+
|
9
|
+
import pytest
|
10
|
+
from numpy.testing import assert_array_equal, assert_raises
|
11
|
+
|
12
|
+
from junifer.datagrabber import DataladHCP1200, DMCC13Benchmark
|
13
|
+
from junifer.datareader import DefaultDataReader
|
14
|
+
from junifer.pipeline.utils import _check_ants, _check_fsl
|
15
|
+
from junifer.preprocess import BOLDWarper
|
16
|
+
|
17
|
+
|
18
|
+
if TYPE_CHECKING:
|
19
|
+
from junifer.datagrabber import BaseDataGrabber
|
20
|
+
|
21
|
+
|
22
|
+
def test_BOLDWarper_init() -> None:
|
23
|
+
"""Test BOLDWarper init."""
|
24
|
+
bold_warper = BOLDWarper(using="ants", reference="T1w")
|
25
|
+
assert bold_warper._on == ["BOLD"]
|
26
|
+
|
27
|
+
|
28
|
+
def test_BOLDWarper_get_valid_inputs() -> None:
|
29
|
+
"""Test BOLDWarper get_valid_inputs."""
|
30
|
+
bold_warper = BOLDWarper(using="ants", reference="T1w")
|
31
|
+
assert bold_warper.get_valid_inputs() == ["BOLD"]
|
32
|
+
|
33
|
+
|
34
|
+
def test_BOLDWarper_get_output_type() -> None:
|
35
|
+
"""Test BOLDWarper get_output_type."""
|
36
|
+
bold_warper = BOLDWarper(using="ants", reference="T1w")
|
37
|
+
assert bold_warper.get_output_type("BOLD") == "BOLD"
|
38
|
+
|
39
|
+
|
40
|
+
@pytest.mark.parametrize(
|
41
|
+
"datagrabber, element",
|
42
|
+
[
|
43
|
+
[
|
44
|
+
DMCC13Benchmark(
|
45
|
+
types=["BOLD", "T1w", "Warp"],
|
46
|
+
sessions=["ses-wave1bas"],
|
47
|
+
tasks=["Rest"],
|
48
|
+
phase_encodings=["AP"],
|
49
|
+
runs=["1"],
|
50
|
+
native_t1w=True,
|
51
|
+
),
|
52
|
+
("sub-f9057kp", "ses-wave1bas", "Rest", "AP", "1"),
|
53
|
+
],
|
54
|
+
[
|
55
|
+
DataladHCP1200(
|
56
|
+
tasks=["REST1"],
|
57
|
+
phase_encodings=["LR"],
|
58
|
+
ica_fix=True,
|
59
|
+
),
|
60
|
+
("100206", "REST1", "LR"),
|
61
|
+
],
|
62
|
+
],
|
63
|
+
)
|
64
|
+
@pytest.mark.skipif(_check_fsl() is False, reason="requires FSL to be in PATH")
|
65
|
+
@pytest.mark.skipif(
|
66
|
+
_check_ants() is False, reason="requires ANTs to be in PATH"
|
67
|
+
)
|
68
|
+
@pytest.mark.skipif(
|
69
|
+
socket.gethostname() != "juseless",
|
70
|
+
reason="only for juseless",
|
71
|
+
)
|
72
|
+
def test_BOLDWarper_preprocess_to_native(
|
73
|
+
datagrabber: "BaseDataGrabber", element: Tuple[str, ...]
|
74
|
+
) -> None:
|
75
|
+
"""Test BOLDWarper preprocess.
|
76
|
+
|
77
|
+
Parameters
|
78
|
+
----------
|
79
|
+
datagrabber : DataGrabber-like object
|
80
|
+
The parametrized DataGrabber objects.
|
81
|
+
element : tuple of str
|
82
|
+
The parametrized elements.
|
83
|
+
|
84
|
+
"""
|
85
|
+
with datagrabber as dg:
|
86
|
+
# Read data
|
87
|
+
element_data = DefaultDataReader().fit_transform(dg[element])
|
88
|
+
# Preprocess data
|
89
|
+
data, _ = BOLDWarper(reference="T1w").preprocess(
|
90
|
+
input=element_data["BOLD"],
|
91
|
+
extra_input=element_data,
|
92
|
+
)
|
93
|
+
assert isinstance(data, dict)
|
94
|
+
|
95
|
+
|
96
|
+
@pytest.mark.parametrize(
|
97
|
+
"datagrabber, element, space",
|
98
|
+
[
|
99
|
+
[
|
100
|
+
DMCC13Benchmark(
|
101
|
+
types=["BOLD"],
|
102
|
+
sessions=["ses-wave1bas"],
|
103
|
+
tasks=["Rest"],
|
104
|
+
phase_encodings=["AP"],
|
105
|
+
runs=["1"],
|
106
|
+
native_t1w=False,
|
107
|
+
),
|
108
|
+
("sub-f9057kp", "ses-wave1bas", "Rest", "AP", "1"),
|
109
|
+
"MNI152NLin2009aAsym",
|
110
|
+
],
|
111
|
+
[
|
112
|
+
DMCC13Benchmark(
|
113
|
+
types=["BOLD"],
|
114
|
+
sessions=["ses-wave1bas"],
|
115
|
+
tasks=["Rest"],
|
116
|
+
phase_encodings=["AP"],
|
117
|
+
runs=["1"],
|
118
|
+
native_t1w=False,
|
119
|
+
),
|
120
|
+
("sub-f9057kp", "ses-wave1bas", "Rest", "AP", "1"),
|
121
|
+
"MNI152NLin6Asym",
|
122
|
+
],
|
123
|
+
],
|
124
|
+
)
|
125
|
+
@pytest.mark.skipif(
|
126
|
+
_check_ants() is False, reason="requires ANTs to be in PATH"
|
127
|
+
)
|
128
|
+
@pytest.mark.skipif(
|
129
|
+
socket.gethostname() != "juseless",
|
130
|
+
reason="only for juseless",
|
131
|
+
)
|
132
|
+
def test_BOLDWarper_preprocess_to_multi_mni(
|
133
|
+
datagrabber: "BaseDataGrabber", element: Tuple[str, ...], space: str
|
134
|
+
) -> None:
|
135
|
+
"""Test BOLDWarper preprocess.
|
136
|
+
|
137
|
+
Parameters
|
138
|
+
----------
|
139
|
+
datagrabber : DataGrabber-like object
|
140
|
+
The parametrized DataGrabber objects.
|
141
|
+
element : tuple of str
|
142
|
+
The parametrized elements.
|
143
|
+
space : str
|
144
|
+
The parametrized template space to transform to.
|
145
|
+
|
146
|
+
"""
|
147
|
+
with datagrabber as dg:
|
148
|
+
# Read data
|
149
|
+
element_data = DefaultDataReader().fit_transform(dg[element])
|
150
|
+
pre_xfm_data = element_data["BOLD"]["data"].get_fdata().copy()
|
151
|
+
# Preprocess data
|
152
|
+
data, _ = BOLDWarper(reference=space).preprocess(
|
153
|
+
input=element_data["BOLD"],
|
154
|
+
extra_input=element_data,
|
155
|
+
)
|
156
|
+
assert isinstance(data, dict)
|
157
|
+
assert data["space"] == space
|
158
|
+
with assert_raises(AssertionError):
|
159
|
+
assert_array_equal(pre_xfm_data, data["data"])
|
junifer/stats.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""
|
1
|
+
"""Provide functions for statistics."""
|
2
2
|
|
3
3
|
# Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
|
4
4
|
# Synchon Mandal <s.mandal@fz-juelich.de>
|
@@ -13,9 +13,6 @@ from scipy.stats.mstats import winsorize
|
|
13
13
|
from .utils import logger, raise_error
|
14
14
|
|
15
15
|
|
16
|
-
__all__ = ["get_aggfunc_by_name", "count", "winsorized_mean", "select"]
|
17
|
-
|
18
|
-
|
19
16
|
def get_aggfunc_by_name(
|
20
17
|
name: str, func_params: Optional[Dict[str, Any]] = None
|
21
18
|
) -> Callable:
|
junifer/storage/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
"""
|
1
|
+
"""Provide imports for storage sub-package."""
|
2
2
|
|
3
3
|
# Authors: Federico Raimondo <f.raimondo@fz-juelich.de>
|
4
4
|
# Synchon Mandal <s.mandal@fz-juelich.de>
|
@@ -8,11 +8,3 @@ from .base import BaseFeatureStorage
|
|
8
8
|
from .pandas_base import PandasBaseFeatureStorage
|
9
9
|
from .sqlite import SQLiteFeatureStorage
|
10
10
|
from .hdf5 import HDF5FeatureStorage
|
11
|
-
|
12
|
-
|
13
|
-
__all__ = [
|
14
|
-
"BaseFeatureStorage",
|
15
|
-
"PandasBaseFeatureStorage",
|
16
|
-
"SQLiteFeatureStorage",
|
17
|
-
"HDF5FeatureStorage",
|
18
|
-
]
|
junifer/storage/base.py
CHANGED
@@ -15,9 +15,6 @@ from ..utils import raise_error
|
|
15
15
|
from .utils import process_meta
|
16
16
|
|
17
17
|
|
18
|
-
__all__ = ["BaseFeatureStorage"]
|
19
|
-
|
20
|
-
|
21
18
|
class BaseFeatureStorage(ABC):
|
22
19
|
"""Abstract base class for feature storage.
|
23
20
|
|
@@ -192,7 +189,7 @@ class BaseFeatureStorage(ABC):
|
|
192
189
|
|
193
190
|
Parameters
|
194
191
|
----------
|
195
|
-
kind : {"matrix", "timeseries", "vector"
|
192
|
+
kind : {"matrix", "timeseries", "vector"}
|
196
193
|
The storage kind.
|
197
194
|
**kwargs
|
198
195
|
The keyword arguments.
|
@@ -221,10 +218,6 @@ class BaseFeatureStorage(ABC):
|
|
221
218
|
)
|
222
219
|
elif kind == "vector":
|
223
220
|
self.store_vector(meta_md5=meta_md5, element=t_element, **kwargs)
|
224
|
-
elif kind == "scalar_table":
|
225
|
-
self.store_scalar_table(
|
226
|
-
meta_md5=meta_md5, element=t_element, **kwargs
|
227
|
-
)
|
228
221
|
|
229
222
|
def store_matrix(
|
230
223
|
self,
|
@@ -320,38 +313,6 @@ class BaseFeatureStorage(ABC):
|
|
320
313
|
klass=NotImplementedError,
|
321
314
|
)
|
322
315
|
|
323
|
-
def store_scalar_table(
|
324
|
-
self,
|
325
|
-
meta_md5: str,
|
326
|
-
element: Dict,
|
327
|
-
data: np.ndarray,
|
328
|
-
col_names: Optional[Iterable[str]] = None,
|
329
|
-
row_names: Optional[Iterable[str]] = None,
|
330
|
-
row_header_col_name: Optional[str] = "feature",
|
331
|
-
) -> None:
|
332
|
-
"""Store table with scalar values.
|
333
|
-
|
334
|
-
Parameters
|
335
|
-
----------
|
336
|
-
meta_md5 : str
|
337
|
-
The metadata MD5 hash.
|
338
|
-
element : dict
|
339
|
-
The element as a dictionary.
|
340
|
-
data : numpy.ndarray
|
341
|
-
The timeseries data to store.
|
342
|
-
col_names : list or tuple of str, optional
|
343
|
-
The column labels (default None).
|
344
|
-
row_names : str, optional
|
345
|
-
The row labels (default None).
|
346
|
-
row_header_col_name : str, optional
|
347
|
-
The column name for the row header column (default "feature").
|
348
|
-
|
349
|
-
"""
|
350
|
-
raise_error(
|
351
|
-
msg="Concrete classes need to implement store_scalar_table().",
|
352
|
-
klass=NotImplementedError,
|
353
|
-
)
|
354
|
-
|
355
316
|
@abstractmethod
|
356
317
|
def collect(self) -> None:
|
357
318
|
"""Collect data."""
|
junifer/storage/hdf5.py
CHANGED
@@ -26,9 +26,6 @@ from .base import BaseFeatureStorage
|
|
26
26
|
from .utils import element_to_prefix, matrix_to_vector, store_matrix_checks
|
27
27
|
|
28
28
|
|
29
|
-
__all__ = ["HDF5FeatureStorage"]
|
30
|
-
|
31
|
-
|
32
29
|
def _create_chunk(
|
33
30
|
chunk_data: List[np.ndarray],
|
34
31
|
kind: str,
|
@@ -59,8 +56,7 @@ def _create_chunk(
|
|
59
56
|
Raises
|
60
57
|
------
|
61
58
|
ValueError
|
62
|
-
If `kind` is not one of ['vector', 'matrix', 'timeseries'
|
63
|
-
'scalar_table'].
|
59
|
+
If `kind` is not one of ['vector', 'matrix', 'timeseries'].
|
64
60
|
|
65
61
|
"""
|
66
62
|
if kind in ["vector", "matrix"]:
|
@@ -81,7 +77,7 @@ def _create_chunk(
|
|
81
77
|
chunk_size=tuple(array_chunk_size),
|
82
78
|
n_chunk=i_chunk,
|
83
79
|
)
|
84
|
-
elif kind
|
80
|
+
elif kind == "timeseries":
|
85
81
|
out = ChunkedList(
|
86
82
|
data=chunk_data,
|
87
83
|
size=element_count,
|
@@ -90,8 +86,7 @@ def _create_chunk(
|
|
90
86
|
else:
|
91
87
|
raise_error(
|
92
88
|
f"Invalid kind: {kind}. "
|
93
|
-
"Must be one of ['vector', 'matrix', 'timeseries'
|
94
|
-
"'scalar_table']."
|
89
|
+
"Must be one of ['vector', 'matrix', 'timeseries']."
|
95
90
|
)
|
96
91
|
return out
|
97
92
|
|
@@ -151,7 +146,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
151
146
|
uri.parent.mkdir(parents=True, exist_ok=True)
|
152
147
|
|
153
148
|
# Available storage kinds
|
154
|
-
storage_types = ["vector", "timeseries", "matrix"
|
149
|
+
storage_types = ["vector", "timeseries", "matrix"]
|
155
150
|
|
156
151
|
super().__init__(
|
157
152
|
uri=uri,
|
@@ -174,7 +169,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
174
169
|
storage.
|
175
170
|
|
176
171
|
"""
|
177
|
-
return ["matrix", "vector", "timeseries"
|
172
|
+
return ["matrix", "vector", "timeseries"]
|
178
173
|
|
179
174
|
def _fetch_correct_uri_for_io(self, element: Optional[Dict]) -> str:
|
180
175
|
"""Return proper URI for I/O based on `element`.
|
@@ -513,26 +508,6 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
513
508
|
columns = hdf_data["column_headers"]
|
514
509
|
# Convert data from 3D to 2D
|
515
510
|
reshaped_data = np.concatenate(all_data, axis=0)
|
516
|
-
elif hdf_data["kind"] == "scalar_table":
|
517
|
-
# Create dictionary for aggregating index data
|
518
|
-
element_idx = defaultdict(list)
|
519
|
-
all_data = []
|
520
|
-
for idx, element in enumerate(hdf_data["element"]):
|
521
|
-
# Get row count for the element
|
522
|
-
t_data = hdf_data["data"][idx]
|
523
|
-
all_data.append(t_data)
|
524
|
-
n_rows = len(hdf_data["row_headers"])
|
525
|
-
# Set rows for the index
|
526
|
-
for key, val in element.items():
|
527
|
-
element_idx[key].extend([val] * n_rows)
|
528
|
-
# Add extra column for row header column name
|
529
|
-
element_idx[hdf_data["row_header_column_name"]].extend(
|
530
|
-
hdf_data["row_headers"]
|
531
|
-
)
|
532
|
-
# Set column headers for dataframe
|
533
|
-
columns = hdf_data["column_headers"]
|
534
|
-
# Convert data from 3D to 2D
|
535
|
-
reshaped_data = np.concatenate(all_data, axis=0)
|
536
511
|
|
537
512
|
# Create dataframe for index
|
538
513
|
idx_df = pd.DataFrame(data=element_idx) # type: ignore
|
@@ -668,7 +643,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
668
643
|
|
669
644
|
Parameters
|
670
645
|
----------
|
671
|
-
kind : {"matrix", "vector", "timeseries"
|
646
|
+
kind : {"matrix", "vector", "timeseries"}
|
672
647
|
The storage kind.
|
673
648
|
meta_md5 : str
|
674
649
|
The metadata MD5 hash.
|
@@ -764,8 +739,8 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
764
739
|
)
|
765
740
|
|
766
741
|
t_data = stored_data["data"]
|
767
|
-
if kind
|
768
|
-
t_data
|
742
|
+
if kind == "timeseries":
|
743
|
+
t_data.append(data)
|
769
744
|
else:
|
770
745
|
t_data = np.concatenate((t_data, data), axis=-1)
|
771
746
|
# Existing entry; append to existing
|
@@ -946,43 +921,6 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
946
921
|
row_header_column_name="timepoint",
|
947
922
|
)
|
948
923
|
|
949
|
-
def store_scalar_table(
|
950
|
-
self,
|
951
|
-
meta_md5: str,
|
952
|
-
element: Dict,
|
953
|
-
data: np.ndarray,
|
954
|
-
col_names: Optional[Iterable[str]] = None,
|
955
|
-
row_names: Optional[Iterable[str]] = None,
|
956
|
-
row_header_col_name: Optional[str] = "feature",
|
957
|
-
) -> None:
|
958
|
-
"""Store table with scalar values.
|
959
|
-
|
960
|
-
Parameters
|
961
|
-
----------
|
962
|
-
meta_md5 : str
|
963
|
-
The metadata MD5 hash.
|
964
|
-
element : dict
|
965
|
-
The element as a dictionary.
|
966
|
-
data : numpy.ndarray
|
967
|
-
The scalar table data to store.
|
968
|
-
col_names : list or tuple of str, optional
|
969
|
-
The column labels (default None).
|
970
|
-
row_names : str, optional
|
971
|
-
The row labels (default None).
|
972
|
-
row_header_col_name : str, optional
|
973
|
-
The column name for the row header column (default "feature").
|
974
|
-
|
975
|
-
"""
|
976
|
-
self._store_data(
|
977
|
-
kind="scalar_table",
|
978
|
-
meta_md5=meta_md5,
|
979
|
-
element=[element], # convert to list
|
980
|
-
data=[data], # convert to list
|
981
|
-
column_headers=col_names,
|
982
|
-
row_headers=row_names,
|
983
|
-
row_header_column_name=row_header_col_name,
|
984
|
-
)
|
985
|
-
|
986
924
|
def collect(self) -> None:
|
987
925
|
"""Implement data collection.
|
988
926
|
|
@@ -1091,7 +1029,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
1091
1029
|
kind = static_data["kind"]
|
1092
1030
|
|
1093
1031
|
# Append the "dynamic" data
|
1094
|
-
if kind
|
1032
|
+
if kind == "timeseries":
|
1095
1033
|
chunk_data.extend(t_data["data"])
|
1096
1034
|
else:
|
1097
1035
|
chunk_data.append(t_data["data"])
|
junifer/storage/pandas_base.py
CHANGED
junifer/storage/sqlite.py
CHANGED
@@ -25,12 +25,7 @@ from junifer.storage.utils import (
|
|
25
25
|
def test_get_valid_inputs() -> None:
|
26
26
|
"""Test valid inputs."""
|
27
27
|
storage = HDF5FeatureStorage(uri="/tmp")
|
28
|
-
assert storage.get_valid_inputs() == [
|
29
|
-
"matrix",
|
30
|
-
"vector",
|
31
|
-
"timeseries",
|
32
|
-
"scalar_table",
|
33
|
-
]
|
28
|
+
assert storage.get_valid_inputs() == ["matrix", "vector", "timeseries"]
|
34
29
|
|
35
30
|
|
36
31
|
def test_single_output(tmp_path: Path) -> None:
|
@@ -813,7 +808,7 @@ def test_store_timeseries(tmp_path: Path) -> None:
|
|
813
808
|
data = np.array([[10], [20], [30], [40], [50]])
|
814
809
|
col_names = ["signal"]
|
815
810
|
|
816
|
-
# Store
|
811
|
+
# Store vector
|
817
812
|
storage.store_timeseries(
|
818
813
|
meta_md5=meta_md5,
|
819
814
|
element=element_to_store,
|
@@ -827,53 +822,6 @@ def test_store_timeseries(tmp_path: Path) -> None:
|
|
827
822
|
assert_array_equal(read_df.values, data)
|
828
823
|
|
829
824
|
|
830
|
-
def test_store_scalar_table(tmp_path: Path) -> None:
|
831
|
-
"""Test scalar table store.
|
832
|
-
|
833
|
-
Parameters
|
834
|
-
----------
|
835
|
-
tmp_path : pathlib.Path
|
836
|
-
The path to the test directory.
|
837
|
-
|
838
|
-
"""
|
839
|
-
uri = tmp_path / "test_store_scalar_table.hdf5"
|
840
|
-
storage = HDF5FeatureStorage(uri=uri)
|
841
|
-
# Metadata to store
|
842
|
-
element = {"subject": "test"}
|
843
|
-
meta = {
|
844
|
-
"element": element,
|
845
|
-
"dependencies": ["numpy"],
|
846
|
-
"marker": {"name": "brainprint"},
|
847
|
-
"type": "FreeSurfer",
|
848
|
-
}
|
849
|
-
# Process the metadata
|
850
|
-
meta_md5, meta_to_store, element_to_store = process_meta(meta)
|
851
|
-
# Store metadata
|
852
|
-
storage.store_metadata(
|
853
|
-
meta_md5=meta_md5, element=element_to_store, meta=meta_to_store
|
854
|
-
)
|
855
|
-
|
856
|
-
# Data to store
|
857
|
-
data = np.array([[10, 20], [30, 40], [50, 60]])
|
858
|
-
col_names = ["roi1", "roi2"]
|
859
|
-
row_names = ["ev1", "ev2", "ev3"]
|
860
|
-
|
861
|
-
# Store timeseries
|
862
|
-
storage.store_scalar_table(
|
863
|
-
meta_md5=meta_md5,
|
864
|
-
element=element_to_store,
|
865
|
-
data=data,
|
866
|
-
col_names=col_names,
|
867
|
-
row_names=row_names,
|
868
|
-
row_header_col_name="eigenvalue",
|
869
|
-
)
|
870
|
-
|
871
|
-
# Read into dataframe
|
872
|
-
read_df = storage.read_df(feature_md5=meta_md5)
|
873
|
-
# Check if data are equal
|
874
|
-
assert_array_equal(read_df.values, data)
|
875
|
-
|
876
|
-
|
877
825
|
def _create_data_to_store(n_elements: int, kind: str) -> Tuple[str, Dict]:
|
878
826
|
"""Create data to store.
|
879
827
|
|
@@ -906,19 +854,13 @@ def _create_data_to_store(n_elements: int, kind: str) -> Tuple[str, Dict]:
|
|
906
854
|
"col_names": [f"col-{i}" for i in range(10)],
|
907
855
|
"matrix_kind": "full",
|
908
856
|
}
|
909
|
-
elif kind
|
857
|
+
elif kind == "timeseries":
|
910
858
|
data_to_store = {
|
911
859
|
"data": np.arange(20).reshape(2, 10),
|
912
860
|
"col_names": [f"col-{i}" for i in range(10)],
|
913
861
|
}
|
914
|
-
|
915
|
-
|
916
|
-
"data": np.arange(50).reshape(5, 10),
|
917
|
-
"row_names": [f"row-{i}" for i in range(5)],
|
918
|
-
"col_names": [f"col-{i}" for i in range(10)],
|
919
|
-
"row_header_col_name": "row",
|
920
|
-
}
|
921
|
-
|
862
|
+
else:
|
863
|
+
raise ValueError(f"Unknown kind {kind}.")
|
922
864
|
for i in range(n_elements):
|
923
865
|
element = {"subject": f"sub-{i // 2}", "session": f"ses-{i % 2}"}
|
924
866
|
meta = {
|
@@ -961,7 +903,6 @@ def _create_data_to_store(n_elements: int, kind: str) -> Tuple[str, Dict]:
|
|
961
903
|
(10, 3, "matrix"),
|
962
904
|
(10, 5, "matrix"),
|
963
905
|
(10, 5, "timeseries"),
|
964
|
-
(10, 5, "scalar_table"),
|
965
906
|
],
|
966
907
|
)
|
967
908
|
def test_multi_output_store_and_collect(
|
@@ -989,20 +930,21 @@ def test_multi_output_store_and_collect(
|
|
989
930
|
meta_md5, all_data = _create_data_to_store(n_elements, kind)
|
990
931
|
|
991
932
|
for t_data in all_data:
|
992
|
-
# Store metadata
|
933
|
+
# Store metadata for tables
|
993
934
|
storage.store_metadata(
|
994
935
|
meta_md5=meta_md5,
|
995
936
|
element=t_data["element"],
|
996
937
|
meta=t_data["meta"],
|
997
938
|
)
|
998
|
-
# Store data
|
999
939
|
if kind == "vector":
|
940
|
+
# Store tables
|
1000
941
|
storage.store_vector(
|
1001
942
|
meta_md5=meta_md5,
|
1002
943
|
element=t_data["element"],
|
1003
944
|
**t_data["data"],
|
1004
945
|
)
|
1005
946
|
elif kind == "matrix":
|
947
|
+
# Store tables
|
1006
948
|
storage.store_matrix(
|
1007
949
|
meta_md5=meta_md5,
|
1008
950
|
element=t_data["element"],
|
@@ -1014,17 +956,11 @@ def test_multi_output_store_and_collect(
|
|
1014
956
|
element=t_data["element"],
|
1015
957
|
**t_data["data"],
|
1016
958
|
)
|
1017
|
-
elif kind == "scalar_table":
|
1018
|
-
storage.store_scalar_table(
|
1019
|
-
meta_md5=meta_md5,
|
1020
|
-
element=t_data["element"],
|
1021
|
-
**t_data["data"],
|
1022
|
-
)
|
1023
959
|
# Check that base URI does not exist yet
|
1024
960
|
assert not uri.exists()
|
1025
961
|
|
1026
962
|
for t_data in all_data:
|
1027
|
-
# Convert element to
|
963
|
+
# Convert element to preifx
|
1028
964
|
prefix = element_to_prefix(t_data["element"])
|
1029
965
|
# URIs for data storage
|
1030
966
|
elem_uri = uri.parent / f"{prefix}{uri.name}"
|
@@ -1041,7 +977,7 @@ def test_multi_output_store_and_collect(
|
|
1041
977
|
# Check that base URI exists now
|
1042
978
|
assert uri.exists()
|
1043
979
|
|
1044
|
-
# Read unified metadata
|
980
|
+
# # Read unified metadata
|
1045
981
|
read_unified_meta = storage.list_features()
|
1046
982
|
assert meta_md5 in read_unified_meta
|
1047
983
|
|
@@ -1053,10 +989,6 @@ def test_multi_output_store_and_collect(
|
|
1053
989
|
data_size = np.sum([x["data"]["data"].shape[0] for x in all_data])
|
1054
990
|
assert len(all_df) == data_size
|
1055
991
|
idx_names = [x for x in all_df.index.names if x != "timepoint"]
|
1056
|
-
elif kind == "scalar_table":
|
1057
|
-
data_size = np.sum([x["data"]["data"].shape[0] for x in all_data])
|
1058
|
-
assert len(all_df) == data_size
|
1059
|
-
idx_names = [x for x in all_df.index.names if x != "row"]
|
1060
992
|
else:
|
1061
993
|
assert len(all_df) == len(all_data)
|
1062
994
|
idx_names = all_df.index.names
|
@@ -1081,10 +1013,6 @@ def test_multi_output_store_and_collect(
|
|
1081
1013
|
assert_array_equal(t_series.values, t_data["data"]["data"])
|
1082
1014
|
series_names = t_series.columns.values.tolist()
|
1083
1015
|
assert series_names == t_data["data"]["col_names"]
|
1084
|
-
elif kind == "scalar_table":
|
1085
|
-
assert_array_equal(t_series.values, t_data["data"]["data"])
|
1086
|
-
series_names = t_series.columns.values.tolist()
|
1087
|
-
assert series_names == t_data["data"]["col_names"]
|
1088
1016
|
|
1089
1017
|
|
1090
1018
|
def test_collect_error_single_output() -> None:
|