junifer 0.0.5.dev240__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- junifer/__init__.py +2 -31
- junifer/__init__.pyi +37 -0
- junifer/_version.py +9 -4
- junifer/api/__init__.py +3 -5
- junifer/api/__init__.pyi +4 -0
- junifer/api/decorators.py +14 -19
- junifer/api/functions.py +165 -109
- junifer/api/py.typed +0 -0
- junifer/api/queue_context/__init__.py +2 -4
- junifer/api/queue_context/__init__.pyi +5 -0
- junifer/api/queue_context/gnu_parallel_local_adapter.py +22 -6
- junifer/api/queue_context/htcondor_adapter.py +23 -6
- junifer/api/queue_context/py.typed +0 -0
- junifer/api/queue_context/tests/test_gnu_parallel_local_adapter.py +3 -3
- junifer/api/queue_context/tests/test_htcondor_adapter.py +3 -3
- junifer/api/tests/test_functions.py +168 -74
- junifer/cli/__init__.py +24 -0
- junifer/cli/__init__.pyi +3 -0
- junifer/{api → cli}/cli.py +141 -125
- junifer/cli/parser.py +235 -0
- junifer/cli/py.typed +0 -0
- junifer/{api → cli}/tests/test_cli.py +8 -8
- junifer/{api/tests/test_api_utils.py → cli/tests/test_cli_utils.py} +5 -4
- junifer/{api → cli}/tests/test_parser.py +2 -2
- junifer/{api → cli}/utils.py +6 -16
- junifer/configs/juseless/__init__.py +2 -2
- junifer/configs/juseless/__init__.pyi +3 -0
- junifer/configs/juseless/datagrabbers/__init__.py +2 -12
- junifer/configs/juseless/datagrabbers/__init__.pyi +13 -0
- junifer/configs/juseless/datagrabbers/ixi_vbm.py +2 -2
- junifer/configs/juseless/datagrabbers/py.typed +0 -0
- junifer/configs/juseless/datagrabbers/tests/test_ucla.py +2 -2
- junifer/configs/juseless/datagrabbers/ucla.py +4 -4
- junifer/configs/juseless/py.typed +0 -0
- junifer/conftest.py +25 -0
- junifer/data/__init__.py +2 -42
- junifer/data/__init__.pyi +29 -0
- junifer/data/_dispatch.py +248 -0
- junifer/data/coordinates/__init__.py +9 -0
- junifer/data/coordinates/__init__.pyi +5 -0
- junifer/data/coordinates/_ants_coordinates_warper.py +104 -0
- junifer/data/coordinates/_coordinates.py +385 -0
- junifer/data/coordinates/_fsl_coordinates_warper.py +81 -0
- junifer/data/{tests → coordinates/tests}/test_coordinates.py +26 -33
- junifer/data/masks/__init__.py +9 -0
- junifer/data/masks/__init__.pyi +6 -0
- junifer/data/masks/_ants_mask_warper.py +177 -0
- junifer/data/masks/_fsl_mask_warper.py +106 -0
- junifer/data/masks/_masks.py +802 -0
- junifer/data/{tests → masks/tests}/test_masks.py +67 -63
- junifer/data/parcellations/__init__.py +9 -0
- junifer/data/parcellations/__init__.pyi +6 -0
- junifer/data/parcellations/_ants_parcellation_warper.py +166 -0
- junifer/data/parcellations/_fsl_parcellation_warper.py +89 -0
- junifer/data/parcellations/_parcellations.py +1388 -0
- junifer/data/{tests → parcellations/tests}/test_parcellations.py +165 -295
- junifer/data/pipeline_data_registry_base.py +76 -0
- junifer/data/py.typed +0 -0
- junifer/data/template_spaces.py +44 -79
- junifer/data/tests/test_data_utils.py +1 -2
- junifer/data/tests/test_template_spaces.py +8 -4
- junifer/data/utils.py +109 -4
- junifer/datagrabber/__init__.py +2 -26
- junifer/datagrabber/__init__.pyi +27 -0
- junifer/datagrabber/aomic/__init__.py +2 -4
- junifer/datagrabber/aomic/__init__.pyi +5 -0
- junifer/datagrabber/aomic/id1000.py +81 -52
- junifer/datagrabber/aomic/piop1.py +83 -55
- junifer/datagrabber/aomic/piop2.py +85 -56
- junifer/datagrabber/aomic/py.typed +0 -0
- junifer/datagrabber/aomic/tests/test_id1000.py +19 -12
- junifer/datagrabber/aomic/tests/test_piop1.py +52 -18
- junifer/datagrabber/aomic/tests/test_piop2.py +50 -17
- junifer/datagrabber/base.py +22 -18
- junifer/datagrabber/datalad_base.py +71 -34
- junifer/datagrabber/dmcc13_benchmark.py +31 -18
- junifer/datagrabber/hcp1200/__init__.py +2 -3
- junifer/datagrabber/hcp1200/__init__.pyi +4 -0
- junifer/datagrabber/hcp1200/datalad_hcp1200.py +3 -3
- junifer/datagrabber/hcp1200/hcp1200.py +26 -15
- junifer/datagrabber/hcp1200/py.typed +0 -0
- junifer/datagrabber/hcp1200/tests/test_hcp1200.py +8 -2
- junifer/datagrabber/multiple.py +14 -9
- junifer/datagrabber/pattern.py +132 -96
- junifer/datagrabber/pattern_validation_mixin.py +206 -94
- junifer/datagrabber/py.typed +0 -0
- junifer/datagrabber/tests/test_datalad_base.py +27 -12
- junifer/datagrabber/tests/test_dmcc13_benchmark.py +28 -11
- junifer/datagrabber/tests/test_multiple.py +48 -2
- junifer/datagrabber/tests/test_pattern_datalad.py +1 -1
- junifer/datagrabber/tests/test_pattern_validation_mixin.py +6 -6
- junifer/datareader/__init__.py +2 -2
- junifer/datareader/__init__.pyi +3 -0
- junifer/datareader/default.py +6 -6
- junifer/datareader/py.typed +0 -0
- junifer/external/nilearn/__init__.py +2 -3
- junifer/external/nilearn/__init__.pyi +4 -0
- junifer/external/nilearn/junifer_connectivity_measure.py +25 -17
- junifer/external/nilearn/junifer_nifti_spheres_masker.py +4 -4
- junifer/external/nilearn/py.typed +0 -0
- junifer/external/nilearn/tests/test_junifer_connectivity_measure.py +17 -16
- junifer/external/nilearn/tests/test_junifer_nifti_spheres_masker.py +2 -3
- junifer/markers/__init__.py +2 -38
- junifer/markers/__init__.pyi +37 -0
- junifer/markers/base.py +11 -14
- junifer/markers/brainprint.py +12 -14
- junifer/markers/complexity/__init__.py +2 -18
- junifer/markers/complexity/__init__.pyi +17 -0
- junifer/markers/complexity/complexity_base.py +9 -11
- junifer/markers/complexity/hurst_exponent.py +7 -7
- junifer/markers/complexity/multiscale_entropy_auc.py +7 -7
- junifer/markers/complexity/perm_entropy.py +7 -7
- junifer/markers/complexity/py.typed +0 -0
- junifer/markers/complexity/range_entropy.py +7 -7
- junifer/markers/complexity/range_entropy_auc.py +7 -7
- junifer/markers/complexity/sample_entropy.py +7 -7
- junifer/markers/complexity/tests/test_complexity_base.py +1 -1
- junifer/markers/complexity/tests/test_hurst_exponent.py +5 -5
- junifer/markers/complexity/tests/test_multiscale_entropy_auc.py +5 -5
- junifer/markers/complexity/tests/test_perm_entropy.py +5 -5
- junifer/markers/complexity/tests/test_range_entropy.py +5 -5
- junifer/markers/complexity/tests/test_range_entropy_auc.py +5 -5
- junifer/markers/complexity/tests/test_sample_entropy.py +5 -5
- junifer/markers/complexity/tests/test_weighted_perm_entropy.py +5 -5
- junifer/markers/complexity/weighted_perm_entropy.py +7 -7
- junifer/markers/ets_rss.py +12 -11
- junifer/markers/falff/__init__.py +2 -3
- junifer/markers/falff/__init__.pyi +4 -0
- junifer/markers/falff/_afni_falff.py +38 -45
- junifer/markers/falff/_junifer_falff.py +16 -19
- junifer/markers/falff/falff_base.py +7 -11
- junifer/markers/falff/falff_parcels.py +9 -9
- junifer/markers/falff/falff_spheres.py +8 -8
- junifer/markers/falff/py.typed +0 -0
- junifer/markers/falff/tests/test_falff_spheres.py +3 -1
- junifer/markers/functional_connectivity/__init__.py +2 -12
- junifer/markers/functional_connectivity/__init__.pyi +13 -0
- junifer/markers/functional_connectivity/crossparcellation_functional_connectivity.py +9 -8
- junifer/markers/functional_connectivity/edge_functional_connectivity_parcels.py +8 -8
- junifer/markers/functional_connectivity/edge_functional_connectivity_spheres.py +7 -7
- junifer/markers/functional_connectivity/functional_connectivity_base.py +13 -12
- junifer/markers/functional_connectivity/functional_connectivity_parcels.py +8 -8
- junifer/markers/functional_connectivity/functional_connectivity_spheres.py +7 -7
- junifer/markers/functional_connectivity/py.typed +0 -0
- junifer/markers/functional_connectivity/tests/test_edge_functional_connectivity_parcels.py +1 -2
- junifer/markers/functional_connectivity/tests/test_edge_functional_connectivity_spheres.py +1 -2
- junifer/markers/functional_connectivity/tests/test_functional_connectivity_parcels.py +6 -6
- junifer/markers/functional_connectivity/tests/test_functional_connectivity_spheres.py +5 -5
- junifer/markers/parcel_aggregation.py +22 -17
- junifer/markers/py.typed +0 -0
- junifer/markers/reho/__init__.py +2 -3
- junifer/markers/reho/__init__.pyi +4 -0
- junifer/markers/reho/_afni_reho.py +29 -35
- junifer/markers/reho/_junifer_reho.py +13 -14
- junifer/markers/reho/py.typed +0 -0
- junifer/markers/reho/reho_base.py +7 -11
- junifer/markers/reho/reho_parcels.py +10 -10
- junifer/markers/reho/reho_spheres.py +9 -9
- junifer/markers/sphere_aggregation.py +22 -17
- junifer/markers/temporal_snr/__init__.py +2 -3
- junifer/markers/temporal_snr/__init__.pyi +4 -0
- junifer/markers/temporal_snr/py.typed +0 -0
- junifer/markers/temporal_snr/temporal_snr_base.py +11 -10
- junifer/markers/temporal_snr/temporal_snr_parcels.py +8 -8
- junifer/markers/temporal_snr/temporal_snr_spheres.py +7 -7
- junifer/markers/tests/test_ets_rss.py +3 -3
- junifer/markers/tests/test_parcel_aggregation.py +24 -24
- junifer/markers/tests/test_sphere_aggregation.py +6 -6
- junifer/markers/utils.py +3 -3
- junifer/onthefly/__init__.py +2 -1
- junifer/onthefly/_brainprint.py +138 -0
- junifer/onthefly/read_transform.py +5 -8
- junifer/pipeline/__init__.py +2 -10
- junifer/pipeline/__init__.pyi +13 -0
- junifer/{markers/collection.py → pipeline/marker_collection.py} +8 -14
- junifer/pipeline/pipeline_component_registry.py +294 -0
- junifer/pipeline/pipeline_step_mixin.py +15 -11
- junifer/pipeline/py.typed +0 -0
- junifer/{markers/tests/test_collection.py → pipeline/tests/test_marker_collection.py} +2 -3
- junifer/pipeline/tests/test_pipeline_component_registry.py +200 -0
- junifer/pipeline/tests/test_pipeline_step_mixin.py +36 -37
- junifer/pipeline/tests/test_update_meta_mixin.py +4 -4
- junifer/pipeline/tests/test_workdir_manager.py +43 -0
- junifer/pipeline/update_meta_mixin.py +21 -17
- junifer/pipeline/utils.py +6 -6
- junifer/pipeline/workdir_manager.py +19 -5
- junifer/preprocess/__init__.py +2 -10
- junifer/preprocess/__init__.pyi +11 -0
- junifer/preprocess/base.py +10 -10
- junifer/preprocess/confounds/__init__.py +2 -2
- junifer/preprocess/confounds/__init__.pyi +3 -0
- junifer/preprocess/confounds/fmriprep_confound_remover.py +243 -64
- junifer/preprocess/confounds/py.typed +0 -0
- junifer/preprocess/confounds/tests/test_fmriprep_confound_remover.py +121 -14
- junifer/preprocess/py.typed +0 -0
- junifer/preprocess/smoothing/__init__.py +2 -2
- junifer/preprocess/smoothing/__init__.pyi +3 -0
- junifer/preprocess/smoothing/_afni_smoothing.py +40 -40
- junifer/preprocess/smoothing/_fsl_smoothing.py +22 -32
- junifer/preprocess/smoothing/_nilearn_smoothing.py +35 -14
- junifer/preprocess/smoothing/py.typed +0 -0
- junifer/preprocess/smoothing/smoothing.py +11 -13
- junifer/preprocess/warping/__init__.py +2 -2
- junifer/preprocess/warping/__init__.pyi +3 -0
- junifer/preprocess/warping/_ants_warper.py +136 -32
- junifer/preprocess/warping/_fsl_warper.py +73 -22
- junifer/preprocess/warping/py.typed +0 -0
- junifer/preprocess/warping/space_warper.py +39 -11
- junifer/preprocess/warping/tests/test_space_warper.py +5 -9
- junifer/py.typed +0 -0
- junifer/stats.py +5 -5
- junifer/storage/__init__.py +2 -10
- junifer/storage/__init__.pyi +11 -0
- junifer/storage/base.py +47 -13
- junifer/storage/hdf5.py +95 -33
- junifer/storage/pandas_base.py +12 -11
- junifer/storage/py.typed +0 -0
- junifer/storage/sqlite.py +11 -11
- junifer/storage/tests/test_hdf5.py +86 -4
- junifer/storage/tests/test_sqlite.py +2 -2
- junifer/storage/tests/test_storage_base.py +5 -2
- junifer/storage/tests/test_utils.py +33 -7
- junifer/storage/utils.py +95 -9
- junifer/testing/__init__.py +2 -3
- junifer/testing/__init__.pyi +4 -0
- junifer/testing/datagrabbers.py +10 -11
- junifer/testing/py.typed +0 -0
- junifer/testing/registry.py +4 -7
- junifer/testing/tests/test_testing_registry.py +9 -17
- junifer/tests/test_stats.py +2 -2
- junifer/typing/__init__.py +9 -0
- junifer/typing/__init__.pyi +31 -0
- junifer/typing/_typing.py +68 -0
- junifer/utils/__init__.py +2 -12
- junifer/utils/__init__.pyi +18 -0
- junifer/utils/_config.py +110 -0
- junifer/utils/_yaml.py +16 -0
- junifer/utils/helpers.py +6 -6
- junifer/utils/logging.py +117 -8
- junifer/utils/py.typed +0 -0
- junifer/{pipeline → utils}/singleton.py +19 -14
- junifer/utils/tests/test_config.py +59 -0
- {junifer-0.0.5.dev240.dist-info → junifer-0.0.6.dist-info}/METADATA +43 -38
- junifer-0.0.6.dist-info/RECORD +350 -0
- {junifer-0.0.5.dev240.dist-info → junifer-0.0.6.dist-info}/WHEEL +1 -1
- junifer-0.0.6.dist-info/entry_points.txt +2 -0
- junifer/api/parser.py +0 -118
- junifer/data/coordinates.py +0 -408
- junifer/data/masks.py +0 -670
- junifer/data/parcellations.py +0 -1828
- junifer/pipeline/registry.py +0 -177
- junifer/pipeline/tests/test_registry.py +0 -150
- junifer-0.0.5.dev240.dist-info/RECORD +0 -275
- junifer-0.0.5.dev240.dist-info/entry_points.txt +0 -2
- /junifer/{api → cli}/tests/data/gmd_mean.yaml +0 -0
- /junifer/{api → cli}/tests/data/gmd_mean_htcondor.yaml +0 -0
- /junifer/{api → cli}/tests/data/partly_cloudy_agg_mean_tian.yml +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/AutobiographicalMemory_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/CogAC_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/CogAR_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/DMNBuckner_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/Dosenbach2010_MNI_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/Empathy_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/Motor_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/MultiTask_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/PhysioStress_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/Power2011_MNI_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/Power2013_MNI_VOIs.tsv +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/Rew_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/Somatosensory_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/ToM_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/VigAtt_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/WM_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/eMDN_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/eSAD_VOIs.txt +0 -0
- /junifer/data/{VOIs → coordinates/VOIs}/meta/extDMN_VOIs.txt +0 -0
- {junifer-0.0.5.dev240.dist-info → junifer-0.0.6.dist-info/licenses}/AUTHORS.rst +0 -0
- {junifer-0.0.5.dev240.dist-info → junifer-0.0.6.dist-info/licenses}/LICENSE.md +0 -0
- {junifer-0.0.5.dev240.dist-info → junifer-0.0.6.dist-info}/top_level.txt +0 -0
junifer/storage/base.py
CHANGED
@@ -5,8 +5,9 @@
|
|
5
5
|
# License: AGPL
|
6
6
|
|
7
7
|
from abc import ABC, abstractmethod
|
8
|
+
from collections.abc import Iterable
|
8
9
|
from pathlib import Path
|
9
|
-
from typing import Any,
|
10
|
+
from typing import Any, Optional, Union
|
10
11
|
|
11
12
|
import numpy as np
|
12
13
|
import pandas as pd
|
@@ -43,7 +44,7 @@ class BaseFeatureStorage(ABC):
|
|
43
44
|
def __init__(
|
44
45
|
self,
|
45
46
|
uri: Union[str, Path],
|
46
|
-
storage_types: Union[
|
47
|
+
storage_types: Union[list[str], str],
|
47
48
|
single_output: bool = True,
|
48
49
|
) -> None:
|
49
50
|
self.uri = uri
|
@@ -61,7 +62,7 @@ class BaseFeatureStorage(ABC):
|
|
61
62
|
self._valid_inputs = storage_types
|
62
63
|
self.single_output = single_output
|
63
64
|
|
64
|
-
def get_valid_inputs(self) ->
|
65
|
+
def get_valid_inputs(self) -> list[str]:
|
65
66
|
"""Get valid storage types for input.
|
66
67
|
|
67
68
|
Returns
|
@@ -76,7 +77,7 @@ class BaseFeatureStorage(ABC):
|
|
76
77
|
klass=NotImplementedError,
|
77
78
|
)
|
78
79
|
|
79
|
-
def validate(self, input_:
|
80
|
+
def validate(self, input_: list[str]) -> None:
|
80
81
|
"""Validate the input to the pipeline step.
|
81
82
|
|
82
83
|
Parameters
|
@@ -98,7 +99,7 @@ class BaseFeatureStorage(ABC):
|
|
98
99
|
)
|
99
100
|
|
100
101
|
@abstractmethod
|
101
|
-
def list_features(self) ->
|
102
|
+
def list_features(self) -> dict[str, dict[str, Any]]:
|
102
103
|
"""List the features in the storage.
|
103
104
|
|
104
105
|
Returns
|
@@ -119,8 +120,8 @@ class BaseFeatureStorage(ABC):
|
|
119
120
|
self,
|
120
121
|
feature_name: Optional[str] = None,
|
121
122
|
feature_md5: Optional[str] = None,
|
122
|
-
) ->
|
123
|
-
str, Union[str,
|
123
|
+
) -> dict[
|
124
|
+
str, Union[str, list[Union[int, str, dict[str, str]]], np.ndarray]
|
124
125
|
]:
|
125
126
|
"""Read stored feature.
|
126
127
|
|
@@ -169,7 +170,7 @@ class BaseFeatureStorage(ABC):
|
|
169
170
|
)
|
170
171
|
|
171
172
|
@abstractmethod
|
172
|
-
def store_metadata(self, meta_md5: str, element:
|
173
|
+
def store_metadata(self, meta_md5: str, element: dict, meta: dict) -> None:
|
173
174
|
"""Store metadata.
|
174
175
|
|
175
176
|
Parameters
|
@@ -225,11 +226,15 @@ class BaseFeatureStorage(ABC):
|
|
225
226
|
self.store_scalar_table(
|
226
227
|
meta_md5=meta_md5, element=t_element, **kwargs
|
227
228
|
)
|
229
|
+
elif kind == "timeseries_2d":
|
230
|
+
self.store_timeseries_2d(
|
231
|
+
meta_md5=meta_md5, element=t_element, **kwargs
|
232
|
+
)
|
228
233
|
|
229
234
|
def store_matrix(
|
230
235
|
self,
|
231
236
|
meta_md5: str,
|
232
|
-
element:
|
237
|
+
element: dict,
|
233
238
|
data: np.ndarray,
|
234
239
|
col_names: Optional[Iterable[str]] = None,
|
235
240
|
row_names: Optional[Iterable[str]] = None,
|
@@ -271,8 +276,8 @@ class BaseFeatureStorage(ABC):
|
|
271
276
|
def store_vector(
|
272
277
|
self,
|
273
278
|
meta_md5: str,
|
274
|
-
element:
|
275
|
-
data: Union[np.ndarray,
|
279
|
+
element: dict,
|
280
|
+
data: Union[np.ndarray, list],
|
276
281
|
col_names: Optional[Iterable[str]] = None,
|
277
282
|
) -> None:
|
278
283
|
"""Store vector.
|
@@ -297,7 +302,7 @@ class BaseFeatureStorage(ABC):
|
|
297
302
|
def store_timeseries(
|
298
303
|
self,
|
299
304
|
meta_md5: str,
|
300
|
-
element:
|
305
|
+
element: dict,
|
301
306
|
data: np.ndarray,
|
302
307
|
col_names: Optional[Iterable[str]] = None,
|
303
308
|
) -> None:
|
@@ -320,10 +325,39 @@ class BaseFeatureStorage(ABC):
|
|
320
325
|
klass=NotImplementedError,
|
321
326
|
)
|
322
327
|
|
328
|
+
def store_timeseries_2d(
|
329
|
+
self,
|
330
|
+
meta_md5: str,
|
331
|
+
element: dict,
|
332
|
+
data: np.ndarray,
|
333
|
+
col_names: Optional[Iterable[str]] = None,
|
334
|
+
row_names: Optional[Iterable[str]] = None,
|
335
|
+
) -> None:
|
336
|
+
"""Store 2D timeseries.
|
337
|
+
|
338
|
+
Parameters
|
339
|
+
----------
|
340
|
+
meta_md5 : str
|
341
|
+
The metadata MD5 hash.
|
342
|
+
element : dict
|
343
|
+
The element as a dictionary.
|
344
|
+
data : numpy.ndarray
|
345
|
+
The timeseries data to store.
|
346
|
+
col_names : list or tuple of str, optional
|
347
|
+
The column labels (default None).
|
348
|
+
row_names : list or tuple of str, optional
|
349
|
+
The row labels (default None).
|
350
|
+
|
351
|
+
"""
|
352
|
+
raise_error(
|
353
|
+
msg="Concrete classes need to implement store_timeseries_2d().",
|
354
|
+
klass=NotImplementedError,
|
355
|
+
)
|
356
|
+
|
323
357
|
def store_scalar_table(
|
324
358
|
self,
|
325
359
|
meta_md5: str,
|
326
|
-
element:
|
360
|
+
element: dict,
|
327
361
|
data: np.ndarray,
|
328
362
|
col_names: Optional[Iterable[str]] = None,
|
329
363
|
row_names: Optional[Iterable[str]] = None,
|
junifer/storage/hdf5.py
CHANGED
@@ -4,10 +4,10 @@
|
|
4
4
|
# Federico Raimondo <f.raimondo@fz-juelich.de>
|
5
5
|
# License: AGPL
|
6
6
|
|
7
|
-
|
8
7
|
from collections import defaultdict
|
8
|
+
from collections.abc import Iterable
|
9
9
|
from pathlib import Path
|
10
|
-
from typing import Any,
|
10
|
+
from typing import Any, Optional, Union
|
11
11
|
|
12
12
|
import numpy as np
|
13
13
|
import pandas as pd
|
@@ -23,14 +23,20 @@ from ..external.h5io.h5io import (
|
|
23
23
|
)
|
24
24
|
from ..utils import logger, raise_error
|
25
25
|
from .base import BaseFeatureStorage
|
26
|
-
from .utils import
|
26
|
+
from .utils import (
|
27
|
+
element_to_prefix,
|
28
|
+
matrix_to_vector,
|
29
|
+
store_matrix_checks,
|
30
|
+
store_timeseries_2d_checks,
|
31
|
+
timeseries2d_to_vector,
|
32
|
+
)
|
27
33
|
|
28
34
|
|
29
35
|
__all__ = ["HDF5FeatureStorage"]
|
30
36
|
|
31
37
|
|
32
38
|
def _create_chunk(
|
33
|
-
chunk_data:
|
39
|
+
chunk_data: list[np.ndarray],
|
34
40
|
kind: str,
|
35
41
|
element_count: int,
|
36
42
|
chunk_size: int,
|
@@ -81,7 +87,7 @@ def _create_chunk(
|
|
81
87
|
chunk_size=tuple(array_chunk_size),
|
82
88
|
n_chunk=i_chunk,
|
83
89
|
)
|
84
|
-
elif kind in ["timeseries", "scalar_table"]:
|
90
|
+
elif kind in ["timeseries", "scalar_table", "timeseries_2d"]:
|
85
91
|
out = ChunkedList(
|
86
92
|
data=chunk_data,
|
87
93
|
size=element_count,
|
@@ -90,8 +96,8 @@ def _create_chunk(
|
|
90
96
|
else:
|
91
97
|
raise_error(
|
92
98
|
f"Invalid kind: {kind}. "
|
93
|
-
"Must be one of ['vector', 'matrix', 'timeseries',"
|
94
|
-
"'scalar_table']."
|
99
|
+
"Must be one of ['vector', 'matrix', 'timeseries', "
|
100
|
+
"'timeseries_2d', 'scalar_table']."
|
95
101
|
)
|
96
102
|
return out
|
97
103
|
|
@@ -164,7 +170,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
164
170
|
self.force_float32 = force_float32
|
165
171
|
self.chunk_size = chunk_size
|
166
172
|
|
167
|
-
def get_valid_inputs(self) ->
|
173
|
+
def get_valid_inputs(self) -> list[str]:
|
168
174
|
"""Get valid storage types for input.
|
169
175
|
|
170
176
|
Returns
|
@@ -176,7 +182,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
176
182
|
"""
|
177
183
|
return ["matrix", "vector", "timeseries", "scalar_table"]
|
178
184
|
|
179
|
-
def _fetch_correct_uri_for_io(self, element: Optional[
|
185
|
+
def _fetch_correct_uri_for_io(self, element: Optional[dict]) -> str:
|
180
186
|
"""Return proper URI for I/O based on `element`.
|
181
187
|
|
182
188
|
If `element` is None, will return `self.uri`.
|
@@ -195,8 +201,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
195
201
|
if not self.single_output and not element:
|
196
202
|
raise_error(
|
197
203
|
msg=(
|
198
|
-
"`element` must be provided when `single_output` "
|
199
|
-
"is False"
|
204
|
+
"`element` must be provided when `single_output` is False"
|
200
205
|
),
|
201
206
|
klass=RuntimeError,
|
202
207
|
)
|
@@ -210,8 +215,8 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
210
215
|
return f"{self.uri.parent}/{prefix}{self.uri.name}" # type: ignore
|
211
216
|
|
212
217
|
def _read_metadata(
|
213
|
-
self, element: Optional[
|
214
|
-
) ->
|
218
|
+
self, element: Optional[dict[str, str]] = None
|
219
|
+
) -> dict[str, dict[str, Any]]:
|
215
220
|
"""Read metadata (should not be called directly).
|
216
221
|
|
217
222
|
Parameters
|
@@ -261,7 +266,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
261
266
|
|
262
267
|
return metadata
|
263
268
|
|
264
|
-
def list_features(self) ->
|
269
|
+
def list_features(self) -> dict[str, dict[str, Any]]:
|
265
270
|
"""List the features in the storage.
|
266
271
|
|
267
272
|
Returns
|
@@ -281,8 +286,8 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
281
286
|
return metadata
|
282
287
|
|
283
288
|
def _read_data(
|
284
|
-
self, md5: str, element: Optional[
|
285
|
-
) ->
|
289
|
+
self, md5: str, element: Optional[dict[str, str]] = None
|
290
|
+
) -> dict[str, Any]:
|
286
291
|
"""Read data (should not be called directly).
|
287
292
|
|
288
293
|
Parameters
|
@@ -338,8 +343,8 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
338
343
|
self,
|
339
344
|
feature_name: Optional[str] = None,
|
340
345
|
feature_md5: Optional[str] = None,
|
341
|
-
) ->
|
342
|
-
str, Union[str,
|
346
|
+
) -> dict[
|
347
|
+
str, Union[str, list[Union[int, str, dict[str, str]]], np.ndarray]
|
343
348
|
]:
|
344
349
|
"""Read stored feature.
|
345
350
|
|
@@ -513,6 +518,27 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
513
518
|
columns = hdf_data["column_headers"]
|
514
519
|
# Convert data from 3D to 2D
|
515
520
|
reshaped_data = np.concatenate(all_data, axis=0)
|
521
|
+
elif hdf_data["kind"] == "timeseries_2d":
|
522
|
+
# Create dictionary for aggregating index data
|
523
|
+
element_idx = defaultdict(list)
|
524
|
+
all_data = []
|
525
|
+
for idx, element in enumerate(hdf_data["element"]):
|
526
|
+
# Get row count for the element
|
527
|
+
t_data = hdf_data["data"][idx]
|
528
|
+
flat_data, columns = timeseries2d_to_vector(
|
529
|
+
data=t_data,
|
530
|
+
col_names=hdf_data["column_headers"],
|
531
|
+
row_names=hdf_data["row_headers"],
|
532
|
+
)
|
533
|
+
all_data.append(flat_data)
|
534
|
+
n_timepoints = flat_data.shape[0]
|
535
|
+
# Set rows for the index
|
536
|
+
for key, val in element.items():
|
537
|
+
element_idx[key].extend([val] * n_timepoints)
|
538
|
+
# Add extra column for timepoints
|
539
|
+
element_idx["timepoint"].extend(np.arange(n_timepoints))
|
540
|
+
# Convert data from 3D to 2D
|
541
|
+
reshaped_data = np.concatenate(all_data, axis=0)
|
516
542
|
elif hdf_data["kind"] == "scalar_table":
|
517
543
|
# Create dictionary for aggregating index data
|
518
544
|
element_idx = defaultdict(list)
|
@@ -562,7 +588,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
562
588
|
return df
|
563
589
|
|
564
590
|
def _write_processed_data(
|
565
|
-
self, fname: str, processed_data:
|
591
|
+
self, fname: str, processed_data: dict[str, Any], title: str
|
566
592
|
) -> None:
|
567
593
|
"""Write processed data to HDF5 (should not be called directly).
|
568
594
|
|
@@ -594,8 +620,8 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
594
620
|
def store_metadata(
|
595
621
|
self,
|
596
622
|
meta_md5: str,
|
597
|
-
element:
|
598
|
-
meta:
|
623
|
+
element: dict[str, str],
|
624
|
+
meta: dict[str, Any],
|
599
625
|
) -> None:
|
600
626
|
"""Store metadata.
|
601
627
|
|
@@ -655,7 +681,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
655
681
|
self,
|
656
682
|
kind: str,
|
657
683
|
meta_md5: str,
|
658
|
-
element:
|
684
|
+
element: list[dict[str, str]],
|
659
685
|
data: np.ndarray,
|
660
686
|
**kwargs: Any,
|
661
687
|
) -> None:
|
@@ -764,7 +790,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
764
790
|
)
|
765
791
|
|
766
792
|
t_data = stored_data["data"]
|
767
|
-
if kind in ["timeseries", "scalar_table"]:
|
793
|
+
if kind in ["timeseries", "scalar_table", "timeseries_2d"]:
|
768
794
|
t_data += data
|
769
795
|
else:
|
770
796
|
t_data = np.concatenate((t_data, data), axis=-1)
|
@@ -797,7 +823,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
797
823
|
def store_matrix(
|
798
824
|
self,
|
799
825
|
meta_md5: str,
|
800
|
-
element:
|
826
|
+
element: dict[str, str],
|
801
827
|
data: np.ndarray,
|
802
828
|
col_names: Optional[Iterable[str]] = None,
|
803
829
|
row_names: Optional[Iterable[str]] = None,
|
@@ -876,8 +902,8 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
876
902
|
def store_vector(
|
877
903
|
self,
|
878
904
|
meta_md5: str,
|
879
|
-
element:
|
880
|
-
data: Union[np.ndarray,
|
905
|
+
element: dict[str, str],
|
906
|
+
data: Union[np.ndarray, list],
|
881
907
|
col_names: Optional[Iterable[str]] = None,
|
882
908
|
) -> None:
|
883
909
|
"""Store vector.
|
@@ -919,7 +945,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
919
945
|
def store_timeseries(
|
920
946
|
self,
|
921
947
|
meta_md5: str,
|
922
|
-
element:
|
948
|
+
element: dict[str, str],
|
923
949
|
data: np.ndarray,
|
924
950
|
col_names: Optional[Iterable[str]] = None,
|
925
951
|
) -> None:
|
@@ -946,10 +972,48 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
946
972
|
row_header_column_name="timepoint",
|
947
973
|
)
|
948
974
|
|
975
|
+
def store_timeseries_2d(
|
976
|
+
self,
|
977
|
+
meta_md5: str,
|
978
|
+
element: dict[str, str],
|
979
|
+
data: np.ndarray,
|
980
|
+
col_names: Optional[Iterable[str]] = None,
|
981
|
+
row_names: Optional[Iterable[str]] = None,
|
982
|
+
) -> None:
|
983
|
+
"""Store a 2D timeseries.
|
984
|
+
|
985
|
+
Parameters
|
986
|
+
----------
|
987
|
+
meta_md5 : str
|
988
|
+
The metadata MD5 hash.
|
989
|
+
element : dict
|
990
|
+
The element as dictionary.
|
991
|
+
data : numpy.ndarray
|
992
|
+
The 2D timeseries data to store.
|
993
|
+
col_names : list or tuple of str, optional
|
994
|
+
The column labels (default None).
|
995
|
+
row_names : list or tuple of str, optional
|
996
|
+
The row labels (default None).
|
997
|
+
|
998
|
+
"""
|
999
|
+
store_timeseries_2d_checks(
|
1000
|
+
data_shape=data.shape,
|
1001
|
+
row_names_len=len(row_names), # type: ignore
|
1002
|
+
col_names_len=len(col_names), # type: ignore
|
1003
|
+
)
|
1004
|
+
self._store_data(
|
1005
|
+
kind="timeseries_2d",
|
1006
|
+
meta_md5=meta_md5,
|
1007
|
+
element=[element], # convert to list
|
1008
|
+
data=[data], # convert to list
|
1009
|
+
column_headers=col_names,
|
1010
|
+
row_headers=row_names,
|
1011
|
+
)
|
1012
|
+
|
949
1013
|
def store_scalar_table(
|
950
1014
|
self,
|
951
1015
|
meta_md5: str,
|
952
|
-
element:
|
1016
|
+
element: dict,
|
953
1017
|
data: np.ndarray,
|
954
1018
|
col_names: Optional[Iterable[str]] = None,
|
955
1019
|
row_names: Optional[Iterable[str]] = None,
|
@@ -1013,8 +1077,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
1013
1077
|
|
1014
1078
|
# Run loop to collect metadata
|
1015
1079
|
logger.info(
|
1016
|
-
"Collecting metadata from "
|
1017
|
-
f"{self.uri.parent}/*_{self.uri.name}" # type: ignore
|
1080
|
+
f"Collecting metadata from {self.uri.parent}/*_{self.uri.name}" # type: ignore
|
1018
1081
|
)
|
1019
1082
|
# Collect element files per feature MD5
|
1020
1083
|
elements_per_feature_md5 = defaultdict(list)
|
@@ -1045,8 +1108,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
1045
1108
|
|
1046
1109
|
# Run loop to collect data per feature per file
|
1047
1110
|
logger.info(
|
1048
|
-
"Collecting data from "
|
1049
|
-
f"{self.uri.parent}/*_{self.uri.name}" # type: ignore
|
1111
|
+
f"Collecting data from {self.uri.parent}/*_{self.uri.name}" # type: ignore
|
1050
1112
|
)
|
1051
1113
|
logger.info(f"Will collect {len(elements_per_feature_md5)} features.")
|
1052
1114
|
|
@@ -1091,7 +1153,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
|
|
1091
1153
|
kind = static_data["kind"]
|
1092
1154
|
|
1093
1155
|
# Append the "dynamic" data
|
1094
|
-
if kind in ["timeseries", "scalar_table"]:
|
1156
|
+
if kind in ["timeseries", "scalar_table", "timeseries_2d"]:
|
1095
1157
|
chunk_data.extend(t_data["data"])
|
1096
1158
|
else:
|
1097
1159
|
chunk_data.append(t_data["data"])
|
junifer/storage/pandas_base.py
CHANGED
@@ -5,8 +5,9 @@
|
|
5
5
|
# License: AGPL
|
6
6
|
|
7
7
|
import json
|
8
|
+
from collections.abc import Iterable
|
8
9
|
from pathlib import Path
|
9
|
-
from typing import
|
10
|
+
from typing import Optional, Union
|
10
11
|
|
11
12
|
import numpy as np
|
12
13
|
import pandas as pd
|
@@ -44,7 +45,7 @@ class PandasBaseFeatureStorage(BaseFeatureStorage):
|
|
44
45
|
) -> None:
|
45
46
|
super().__init__(uri=uri, single_output=single_output, **kwargs)
|
46
47
|
|
47
|
-
def get_valid_inputs(self) ->
|
48
|
+
def get_valid_inputs(self) -> list[str]:
|
48
49
|
"""Get valid storage types for input.
|
49
50
|
|
50
51
|
Returns
|
@@ -56,7 +57,7 @@ class PandasBaseFeatureStorage(BaseFeatureStorage):
|
|
56
57
|
"""
|
57
58
|
return ["matrix", "vector", "timeseries"]
|
58
59
|
|
59
|
-
def _meta_row(self, meta:
|
60
|
+
def _meta_row(self, meta: dict, meta_md5: str) -> pd.DataFrame:
|
60
61
|
"""Convert the metadata to a pandas DataFrame.
|
61
62
|
|
62
63
|
Parameters
|
@@ -80,7 +81,7 @@ class PandasBaseFeatureStorage(BaseFeatureStorage):
|
|
80
81
|
|
81
82
|
@staticmethod
|
82
83
|
def element_to_index(
|
83
|
-
element:
|
84
|
+
element: dict, n_rows: int = 1, rows_col_name: Optional[str] = None
|
84
85
|
) -> Union[pd.Index, pd.MultiIndex]:
|
85
86
|
"""Convert the element metadata to index.
|
86
87
|
|
@@ -101,7 +102,7 @@ class PandasBaseFeatureStorage(BaseFeatureStorage):
|
|
101
102
|
|
102
103
|
"""
|
103
104
|
# Make mapping between element access keys and values
|
104
|
-
elem_idx:
|
105
|
+
elem_idx: dict[str, Iterable[str]] = {
|
105
106
|
k: [v] * n_rows for k, v in element.items()
|
106
107
|
}
|
107
108
|
|
@@ -129,7 +130,7 @@ class PandasBaseFeatureStorage(BaseFeatureStorage):
|
|
129
130
|
return index
|
130
131
|
|
131
132
|
def store_df(
|
132
|
-
self, meta_md5: str, element:
|
133
|
+
self, meta_md5: str, element: dict, df: Union[pd.DataFrame, pd.Series]
|
133
134
|
) -> None:
|
134
135
|
"""Implement pandas DataFrame storing.
|
135
136
|
|
@@ -157,8 +158,8 @@ class PandasBaseFeatureStorage(BaseFeatureStorage):
|
|
157
158
|
def _store_2d(
|
158
159
|
self,
|
159
160
|
meta_md5: str,
|
160
|
-
element:
|
161
|
-
data: Union[np.ndarray,
|
161
|
+
element: dict,
|
162
|
+
data: Union[np.ndarray, list],
|
162
163
|
col_names: Optional[Iterable[str]] = None,
|
163
164
|
rows_col_name: Optional[str] = None,
|
164
165
|
) -> None:
|
@@ -194,8 +195,8 @@ class PandasBaseFeatureStorage(BaseFeatureStorage):
|
|
194
195
|
def store_vector(
|
195
196
|
self,
|
196
197
|
meta_md5: str,
|
197
|
-
element:
|
198
|
-
data: Union[np.ndarray,
|
198
|
+
element: dict,
|
199
|
+
data: Union[np.ndarray, list],
|
199
200
|
col_names: Optional[Iterable[str]] = None,
|
200
201
|
) -> None:
|
201
202
|
"""Store vector.
|
@@ -232,7 +233,7 @@ class PandasBaseFeatureStorage(BaseFeatureStorage):
|
|
232
233
|
def store_timeseries(
|
233
234
|
self,
|
234
235
|
meta_md5: str,
|
235
|
-
element:
|
236
|
+
element: dict,
|
236
237
|
data: np.ndarray,
|
237
238
|
col_names: Optional[Iterable[str]] = None,
|
238
239
|
) -> None:
|
junifer/storage/py.typed
ADDED
File without changes
|
junifer/storage/sqlite.py
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
import json
|
8
8
|
from pathlib import Path
|
9
|
-
from typing import TYPE_CHECKING, Any,
|
9
|
+
from typing import TYPE_CHECKING, Any, Optional, Union
|
10
10
|
|
11
11
|
import numpy as np
|
12
12
|
import pandas as pd
|
@@ -92,7 +92,7 @@ class SQLiteFeatureStorage(PandasBaseFeatureStorage):
|
|
92
92
|
# Set upsert
|
93
93
|
self._upsert = upsert
|
94
94
|
|
95
|
-
def get_engine(self, element: Optional[
|
95
|
+
def get_engine(self, element: Optional[dict] = None) -> "Engine":
|
96
96
|
"""Get engine.
|
97
97
|
|
98
98
|
Parameters
|
@@ -209,7 +209,7 @@ class SQLiteFeatureStorage(PandasBaseFeatureStorage):
|
|
209
209
|
msg=f"Invalid option {if_exists} for if_exists."
|
210
210
|
)
|
211
211
|
|
212
|
-
def list_features(self) ->
|
212
|
+
def list_features(self) -> dict[str, dict[str, Any]]:
|
213
213
|
"""List the features in the storage.
|
214
214
|
|
215
215
|
Returns
|
@@ -229,7 +229,7 @@ class SQLiteFeatureStorage(PandasBaseFeatureStorage):
|
|
229
229
|
# Format index names for retrieved data
|
230
230
|
meta_df.index = meta_df.index.str.replace(r"meta_", "")
|
231
231
|
# Convert dataframe to dictionary
|
232
|
-
out:
|
232
|
+
out: dict[str, dict[str, str]] = meta_df.to_dict(
|
233
233
|
orient="index"
|
234
234
|
) # type: ignore
|
235
235
|
# Format output
|
@@ -242,8 +242,8 @@ class SQLiteFeatureStorage(PandasBaseFeatureStorage):
|
|
242
242
|
self,
|
243
243
|
feature_name: Optional[str] = None,
|
244
244
|
feature_md5: Optional[str] = None,
|
245
|
-
) ->
|
246
|
-
str, Union[str,
|
245
|
+
) -> dict[
|
246
|
+
str, Union[str, list[Union[int, str, dict[str, str]]], np.ndarray]
|
247
247
|
]:
|
248
248
|
"""Read stored feature.
|
249
249
|
|
@@ -358,7 +358,7 @@ class SQLiteFeatureStorage(PandasBaseFeatureStorage):
|
|
358
358
|
df = df.set_index(index_names)
|
359
359
|
return df
|
360
360
|
|
361
|
-
def store_metadata(self, meta_md5: str, element:
|
361
|
+
def store_metadata(self, meta_md5: str, element: dict, meta: dict) -> None:
|
362
362
|
"""Implement metadata storing in the storage.
|
363
363
|
|
364
364
|
Parameters
|
@@ -381,7 +381,7 @@ class SQLiteFeatureStorage(PandasBaseFeatureStorage):
|
|
381
381
|
self._save_upsert(meta_df, "meta", engine)
|
382
382
|
|
383
383
|
def store_df(
|
384
|
-
self, meta_md5: str, element:
|
384
|
+
self, meta_md5: str, element: dict, df: Union[pd.DataFrame, pd.Series]
|
385
385
|
) -> None:
|
386
386
|
"""Implement pandas DataFrame storing.
|
387
387
|
|
@@ -434,10 +434,10 @@ class SQLiteFeatureStorage(PandasBaseFeatureStorage):
|
|
434
434
|
def store_matrix(
|
435
435
|
self,
|
436
436
|
meta_md5: str,
|
437
|
-
element:
|
437
|
+
element: dict,
|
438
438
|
data: np.ndarray,
|
439
|
-
col_names: Optional[
|
440
|
-
row_names: Optional[
|
439
|
+
col_names: Optional[list[str]] = None,
|
440
|
+
row_names: Optional[list[str]] = None,
|
441
441
|
matrix_kind: str = "full",
|
442
442
|
diagonal: bool = True,
|
443
443
|
) -> None:
|