junifer 0.0.5.dev86__py3-none-any.whl → 0.0.5.dev93__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
junifer/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.0.5.dev86'
16
- __version_tuple__ = version_tuple = (0, 0, 5, 'dev86')
15
+ __version__ = version = '0.0.5.dev93'
16
+ __version_tuple__ = version_tuple = (0, 0, 5, 'dev93')
junifer/storage/base.py CHANGED
@@ -189,7 +189,7 @@ class BaseFeatureStorage(ABC):
189
189
 
190
190
  Parameters
191
191
  ----------
192
- kind : {"matrix", "timeseries", "vector"}
192
+ kind : {"matrix", "timeseries", "vector", "scalar_table"}
193
193
  The storage kind.
194
194
  **kwargs
195
195
  The keyword arguments.
@@ -218,6 +218,10 @@ class BaseFeatureStorage(ABC):
218
218
  )
219
219
  elif kind == "vector":
220
220
  self.store_vector(meta_md5=meta_md5, element=t_element, **kwargs)
221
+ elif kind == "scalar_table":
222
+ self.store_scalar_table(
223
+ meta_md5=meta_md5, element=t_element, **kwargs
224
+ )
221
225
 
222
226
  def store_matrix(
223
227
  self,
@@ -313,6 +317,38 @@ class BaseFeatureStorage(ABC):
313
317
  klass=NotImplementedError,
314
318
  )
315
319
 
320
+ def store_scalar_table(
321
+ self,
322
+ meta_md5: str,
323
+ element: Dict,
324
+ data: np.ndarray,
325
+ col_names: Optional[Iterable[str]] = None,
326
+ row_names: Optional[Iterable[str]] = None,
327
+ row_header_col_name: Optional[str] = "feature",
328
+ ) -> None:
329
+ """Store table with scalar values.
330
+
331
+ Parameters
332
+ ----------
333
+ meta_md5 : str
334
+ The metadata MD5 hash.
335
+ element : dict
336
+ The element as a dictionary.
337
+ data : numpy.ndarray
338
+ The timeseries data to store.
339
+ col_names : list or tuple of str, optional
340
+ The column labels (default None).
341
+ row_names : str, optional
342
+ The row labels (default None).
343
+ row_header_col_name : str, optional
344
+ The column name for the row header column (default "feature").
345
+
346
+ """
347
+ raise_error(
348
+ msg="Concrete classes need to implement store_scalar_table().",
349
+ klass=NotImplementedError,
350
+ )
351
+
316
352
  @abstractmethod
317
353
  def collect(self) -> None:
318
354
  """Collect data."""
junifer/storage/hdf5.py CHANGED
@@ -56,7 +56,8 @@ def _create_chunk(
56
56
  Raises
57
57
  ------
58
58
  ValueError
59
- If `kind` is not one of ['vector', 'matrix', 'timeseries'].
59
+ If `kind` is not one of ['vector', 'matrix', 'timeseries',
60
+ 'scalar_table'].
60
61
 
61
62
  """
62
63
  if kind in ["vector", "matrix"]:
@@ -77,7 +78,7 @@ def _create_chunk(
77
78
  chunk_size=tuple(array_chunk_size),
78
79
  n_chunk=i_chunk,
79
80
  )
80
- elif kind == "timeseries":
81
+ elif kind in ["timeseries", "scalar_table"]:
81
82
  out = ChunkedList(
82
83
  data=chunk_data,
83
84
  size=element_count,
@@ -86,7 +87,8 @@ def _create_chunk(
86
87
  else:
87
88
  raise_error(
88
89
  f"Invalid kind: {kind}. "
89
- "Must be one of ['vector', 'matrix', 'timeseries']."
90
+ "Must be one of ['vector', 'matrix', 'timeseries',"
91
+ "'scalar_table']."
90
92
  )
91
93
  return out
92
94
 
@@ -146,7 +148,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
146
148
  uri.parent.mkdir(parents=True, exist_ok=True)
147
149
 
148
150
  # Available storage kinds
149
- storage_types = ["vector", "timeseries", "matrix"]
151
+ storage_types = ["vector", "timeseries", "matrix", "scalar_table"]
150
152
 
151
153
  super().__init__(
152
154
  uri=uri,
@@ -169,7 +171,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
169
171
  storage.
170
172
 
171
173
  """
172
- return ["matrix", "vector", "timeseries"]
174
+ return ["matrix", "vector", "timeseries", "scalar_table"]
173
175
 
174
176
  def _fetch_correct_uri_for_io(self, element: Optional[Dict]) -> str:
175
177
  """Return proper URI for I/O based on `element`.
@@ -508,6 +510,26 @@ class HDF5FeatureStorage(BaseFeatureStorage):
508
510
  columns = hdf_data["column_headers"]
509
511
  # Convert data from 3D to 2D
510
512
  reshaped_data = np.concatenate(all_data, axis=0)
513
+ elif hdf_data["kind"] == "scalar_table":
514
+ # Create dictionary for aggregating index data
515
+ element_idx = defaultdict(list)
516
+ all_data = []
517
+ for idx, element in enumerate(hdf_data["element"]):
518
+ # Get row count for the element
519
+ t_data = hdf_data["data"][idx]
520
+ all_data.append(t_data)
521
+ n_rows = len(hdf_data["row_headers"])
522
+ # Set rows for the index
523
+ for key, val in element.items():
524
+ element_idx[key].extend([val] * n_rows)
525
+ # Add extra column for row header column name
526
+ element_idx[hdf_data["row_header_column_name"]].extend(
527
+ hdf_data["row_headers"]
528
+ )
529
+ # Set column headers for dataframe
530
+ columns = hdf_data["column_headers"]
531
+ # Convert data from 3D to 2D
532
+ reshaped_data = np.concatenate(all_data, axis=0)
511
533
 
512
534
  # Create dataframe for index
513
535
  idx_df = pd.DataFrame(data=element_idx) # type: ignore
@@ -643,7 +665,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
643
665
 
644
666
  Parameters
645
667
  ----------
646
- kind : {"matrix", "vector", "timeseries"}
668
+ kind : {"matrix", "vector", "timeseries", "scalar_table"}
647
669
  The storage kind.
648
670
  meta_md5 : str
649
671
  The metadata MD5 hash.
@@ -739,8 +761,8 @@ class HDF5FeatureStorage(BaseFeatureStorage):
739
761
  )
740
762
 
741
763
  t_data = stored_data["data"]
742
- if kind == "timeseries":
743
- t_data.append(data)
764
+ if kind in ["timeseries", "scalar_table"]:
765
+ t_data += data
744
766
  else:
745
767
  t_data = np.concatenate((t_data, data), axis=-1)
746
768
  # Existing entry; append to existing
@@ -921,6 +943,43 @@ class HDF5FeatureStorage(BaseFeatureStorage):
921
943
  row_header_column_name="timepoint",
922
944
  )
923
945
 
946
+ def store_scalar_table(
947
+ self,
948
+ meta_md5: str,
949
+ element: Dict,
950
+ data: np.ndarray,
951
+ col_names: Optional[Iterable[str]] = None,
952
+ row_names: Optional[Iterable[str]] = None,
953
+ row_header_col_name: Optional[str] = "feature",
954
+ ) -> None:
955
+ """Store table with scalar values.
956
+
957
+ Parameters
958
+ ----------
959
+ meta_md5 : str
960
+ The metadata MD5 hash.
961
+ element : dict
962
+ The element as a dictionary.
963
+ data : numpy.ndarray
964
+ The scalar table data to store.
965
+ col_names : list or tuple of str, optional
966
+ The column labels (default None).
967
+ row_names : str, optional
968
+ The row labels (default None).
969
+ row_header_col_name : str, optional
970
+ The column name for the row header column (default "feature").
971
+
972
+ """
973
+ self._store_data(
974
+ kind="scalar_table",
975
+ meta_md5=meta_md5,
976
+ element=[element], # convert to list
977
+ data=[data], # convert to list
978
+ column_headers=col_names,
979
+ row_headers=row_names,
980
+ row_header_column_name=row_header_col_name,
981
+ )
982
+
924
983
  def collect(self) -> None:
925
984
  """Implement data collection.
926
985
 
@@ -1029,7 +1088,7 @@ class HDF5FeatureStorage(BaseFeatureStorage):
1029
1088
  kind = static_data["kind"]
1030
1089
 
1031
1090
  # Append the "dynamic" data
1032
- if kind == "timeseries":
1091
+ if kind in ["timeseries", "scalar_table"]:
1033
1092
  chunk_data.extend(t_data["data"])
1034
1093
  else:
1035
1094
  chunk_data.append(t_data["data"])
@@ -25,7 +25,12 @@ from junifer.storage.utils import (
25
25
  def test_get_valid_inputs() -> None:
26
26
  """Test valid inputs."""
27
27
  storage = HDF5FeatureStorage(uri="/tmp")
28
- assert storage.get_valid_inputs() == ["matrix", "vector", "timeseries"]
28
+ assert storage.get_valid_inputs() == [
29
+ "matrix",
30
+ "vector",
31
+ "timeseries",
32
+ "scalar_table",
33
+ ]
29
34
 
30
35
 
31
36
  def test_single_output(tmp_path: Path) -> None:
@@ -808,7 +813,7 @@ def test_store_timeseries(tmp_path: Path) -> None:
808
813
  data = np.array([[10], [20], [30], [40], [50]])
809
814
  col_names = ["signal"]
810
815
 
811
- # Store vector
816
+ # Store timeseries
812
817
  storage.store_timeseries(
813
818
  meta_md5=meta_md5,
814
819
  element=element_to_store,
@@ -822,6 +827,53 @@ def test_store_timeseries(tmp_path: Path) -> None:
822
827
  assert_array_equal(read_df.values, data)
823
828
 
824
829
 
830
+ def test_store_scalar_table(tmp_path: Path) -> None:
831
+ """Test scalar table store.
832
+
833
+ Parameters
834
+ ----------
835
+ tmp_path : pathlib.Path
836
+ The path to the test directory.
837
+
838
+ """
839
+ uri = tmp_path / "test_store_scalar_table.hdf5"
840
+ storage = HDF5FeatureStorage(uri=uri)
841
+ # Metadata to store
842
+ element = {"subject": "test"}
843
+ meta = {
844
+ "element": element,
845
+ "dependencies": ["numpy"],
846
+ "marker": {"name": "brainprint"},
847
+ "type": "FreeSurfer",
848
+ }
849
+ # Process the metadata
850
+ meta_md5, meta_to_store, element_to_store = process_meta(meta)
851
+ # Store metadata
852
+ storage.store_metadata(
853
+ meta_md5=meta_md5, element=element_to_store, meta=meta_to_store
854
+ )
855
+
856
+ # Data to store
857
+ data = np.array([[10, 20], [30, 40], [50, 60]])
858
+ col_names = ["roi1", "roi2"]
859
+ row_names = ["ev1", "ev2", "ev3"]
860
+
861
+ # Store timeseries
862
+ storage.store_scalar_table(
863
+ meta_md5=meta_md5,
864
+ element=element_to_store,
865
+ data=data,
866
+ col_names=col_names,
867
+ row_names=row_names,
868
+ row_header_col_name="eigenvalue",
869
+ )
870
+
871
+ # Read into dataframe
872
+ read_df = storage.read_df(feature_md5=meta_md5)
873
+ # Check if data are equal
874
+ assert_array_equal(read_df.values, data)
875
+
876
+
825
877
  def _create_data_to_store(n_elements: int, kind: str) -> Tuple[str, Dict]:
826
878
  """Create data to store.
827
879
 
@@ -854,13 +906,19 @@ def _create_data_to_store(n_elements: int, kind: str) -> Tuple[str, Dict]:
854
906
  "col_names": [f"col-{i}" for i in range(10)],
855
907
  "matrix_kind": "full",
856
908
  }
857
- elif kind == "timeseries":
909
+ elif kind in "timeseries":
858
910
  data_to_store = {
859
911
  "data": np.arange(20).reshape(2, 10),
860
912
  "col_names": [f"col-{i}" for i in range(10)],
861
913
  }
862
- else:
863
- raise ValueError(f"Unknown kind {kind}.")
914
+ elif kind in "scalar_table":
915
+ data_to_store = {
916
+ "data": np.arange(50).reshape(5, 10),
917
+ "row_names": [f"row-{i}" for i in range(5)],
918
+ "col_names": [f"col-{i}" for i in range(10)],
919
+ "row_header_col_name": "row",
920
+ }
921
+
864
922
  for i in range(n_elements):
865
923
  element = {"subject": f"sub-{i // 2}", "session": f"ses-{i % 2}"}
866
924
  meta = {
@@ -903,6 +961,7 @@ def _create_data_to_store(n_elements: int, kind: str) -> Tuple[str, Dict]:
903
961
  (10, 3, "matrix"),
904
962
  (10, 5, "matrix"),
905
963
  (10, 5, "timeseries"),
964
+ (10, 5, "scalar_table"),
906
965
  ],
907
966
  )
908
967
  def test_multi_output_store_and_collect(
@@ -930,21 +989,20 @@ def test_multi_output_store_and_collect(
930
989
  meta_md5, all_data = _create_data_to_store(n_elements, kind)
931
990
 
932
991
  for t_data in all_data:
933
- # Store metadata for tables
992
+ # Store metadata
934
993
  storage.store_metadata(
935
994
  meta_md5=meta_md5,
936
995
  element=t_data["element"],
937
996
  meta=t_data["meta"],
938
997
  )
998
+ # Store data
939
999
  if kind == "vector":
940
- # Store tables
941
1000
  storage.store_vector(
942
1001
  meta_md5=meta_md5,
943
1002
  element=t_data["element"],
944
1003
  **t_data["data"],
945
1004
  )
946
1005
  elif kind == "matrix":
947
- # Store tables
948
1006
  storage.store_matrix(
949
1007
  meta_md5=meta_md5,
950
1008
  element=t_data["element"],
@@ -956,11 +1014,17 @@ def test_multi_output_store_and_collect(
956
1014
  element=t_data["element"],
957
1015
  **t_data["data"],
958
1016
  )
1017
+ elif kind == "scalar_table":
1018
+ storage.store_scalar_table(
1019
+ meta_md5=meta_md5,
1020
+ element=t_data["element"],
1021
+ **t_data["data"],
1022
+ )
959
1023
  # Check that base URI does not exist yet
960
1024
  assert not uri.exists()
961
1025
 
962
1026
  for t_data in all_data:
963
- # Convert element to preifx
1027
+ # Convert element to prefix
964
1028
  prefix = element_to_prefix(t_data["element"])
965
1029
  # URIs for data storage
966
1030
  elem_uri = uri.parent / f"{prefix}{uri.name}"
@@ -977,7 +1041,7 @@ def test_multi_output_store_and_collect(
977
1041
  # Check that base URI exists now
978
1042
  assert uri.exists()
979
1043
 
980
- # # Read unified metadata
1044
+ # Read unified metadata
981
1045
  read_unified_meta = storage.list_features()
982
1046
  assert meta_md5 in read_unified_meta
983
1047
 
@@ -989,6 +1053,10 @@ def test_multi_output_store_and_collect(
989
1053
  data_size = np.sum([x["data"]["data"].shape[0] for x in all_data])
990
1054
  assert len(all_df) == data_size
991
1055
  idx_names = [x for x in all_df.index.names if x != "timepoint"]
1056
+ elif kind == "scalar_table":
1057
+ data_size = np.sum([x["data"]["data"].shape[0] for x in all_data])
1058
+ assert len(all_df) == data_size
1059
+ idx_names = [x for x in all_df.index.names if x != "row"]
992
1060
  else:
993
1061
  assert len(all_df) == len(all_data)
994
1062
  idx_names = all_df.index.names
@@ -1013,6 +1081,10 @@ def test_multi_output_store_and_collect(
1013
1081
  assert_array_equal(t_series.values, t_data["data"]["data"])
1014
1082
  series_names = t_series.columns.values.tolist()
1015
1083
  assert series_names == t_data["data"]["col_names"]
1084
+ elif kind == "scalar_table":
1085
+ assert_array_equal(t_series.values, t_data["data"]["data"])
1086
+ series_names = t_series.columns.values.tolist()
1087
+ assert series_names == t_data["data"]["col_names"]
1016
1088
 
1017
1089
 
1018
1090
  def test_collect_error_single_output() -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: junifer
3
- Version: 0.0.5.dev86
3
+ Version: 0.0.5.dev93
4
4
  Summary: JUelich NeuroImaging FEature extractoR
5
5
  Author-email: Fede Raimondo <f.raimondo@fz-juelich.de>, Synchon Mandal <s.mandal@fz-juelich.de>
6
6
  Maintainer-email: Fede Raimondo <f.raimondo@fz-juelich.de>, Synchon Mandal <s.mandal@fz-juelich.de>
@@ -1,5 +1,5 @@
1
1
  junifer/__init__.py,sha256=x1UR2jUcrUdm2HNl-3Qvyi4UUrU6ms5qm2qcmNY7zZk,391
2
- junifer/_version.py,sha256=tHlKxu4A7NZdSkP4AlDO2Vd1oZbG-OnTzclmDpcl29M,426
2
+ junifer/_version.py,sha256=OuEXOAHKhbHnm8jc8By5AoScd2qmQjT4HRDoRe1JlYM,426
3
3
  junifer/stats.py,sha256=jN22_qFvWYBU9ZIMnCSzN4iOscWnWrcrUPIdLeDkV64,6163
4
4
  junifer/api/__init__.py,sha256=pSj8V8tmwOAQ3sshWJfRfB-n3z5bcJj3pHOBX4-8ONc,251
5
5
  junifer/api/cli.py,sha256=53pews3mXkJ7DUDSkV51PbitYnuVAdQRkWG-gjO08Uw,16142
@@ -225,12 +225,12 @@ junifer/preprocess/warping/_fsl_warper.py,sha256=eELmS44LYYANQaWR3VDKv8iwpEC2qnF
225
225
  junifer/preprocess/warping/space_warper.py,sha256=BW7ymZdr4h7lJRtPLi3RT7qwgmu-HFJFqzZNUl341YU,6589
226
226
  junifer/preprocess/warping/tests/test_space_warper.py,sha256=hHF97XUrMeAu8pIPBUrqD77PijbSv5_dAj9-Zte7UZM,5622
227
227
  junifer/storage/__init__.py,sha256=5ve0Vy1stGmn9iQv0c4w-76-sg-gFqZE2IlRlJhFWpQ,337
228
- junifer/storage/base.py,sha256=UxDvj81gSmqqHspbSs1X_i9HvW5wXysDippI7HWM7aM,9654
229
- junifer/storage/hdf5.py,sha256=oxdPuCG0hxzSDNH0uHnYFwVr_wp0g9yvgZf8bv3PkJM,35631
228
+ junifer/storage/base.py,sha256=X4AYx21tW1MGDhiMA8AFilbzbrMbDzcuzdAuMjM4Az0,10836
229
+ junifer/storage/hdf5.py,sha256=BVvojmiRNICOcxrmqpDdIsBT94wqLNQ6pk6MGLaHmXo,37951
230
230
  junifer/storage/pandas_base.py,sha256=Qu3Az-xEaFftsiZwordONnOF2UBO1JgkrP8tmxhXUN4,7473
231
231
  junifer/storage/sqlite.py,sha256=P-eKlrptY-vRxA4mDAC5UyJzqCWNC2C_rgKlZFGjiKg,21244
232
232
  junifer/storage/utils.py,sha256=I-is6bGE29bNwlqv2P8B5QN2zAnY7A7fm8Y-AwMJ0MU,7258
233
- junifer/storage/tests/test_hdf5.py,sha256=ACTGcO6p9iP_HVGBK9RJ4wF3sixR_aFewSL8PK8OBSI,29338
233
+ junifer/storage/tests/test_hdf5.py,sha256=PybSXK0VA1UUFve1xqV-_Wqc7UyCDZGj_C58Z48c6tc,31512
234
234
  junifer/storage/tests/test_pandas_base.py,sha256=y_TfUGpuXkj_39yVon3rMDxMeBrZXs58ZW6OSty5LNw,4058
235
235
  junifer/storage/tests/test_sqlite.py,sha256=JPfE6r34o86XkKaB6yjMVmO_2vUV40DjsaHICagUtjk,28318
236
236
  junifer/storage/tests/test_storage_base.py,sha256=YzgfspuggzXejyPIoRCPST3ZzH9Pi7dgl0IHN7kynXM,3071
@@ -253,10 +253,10 @@ junifer/utils/logging.py,sha256=furcU3XIUpUvnpe4PEwzWWIWgmH4j2ZA4MQdvSGWjj0,9216
253
253
  junifer/utils/tests/test_fs.py,sha256=WQS7cKlKEZ742CIuiOYYpueeAhY9PqlastfDVpVVtvE,923
254
254
  junifer/utils/tests/test_helpers.py,sha256=k5qqfxK8dFyuewTJyR1Qn6-nFaYNuVr0ysc18bfPjyU,929
255
255
  junifer/utils/tests/test_logging.py,sha256=l8oo-AiBV7H6_IzlsNcj__cLeZBUvgIGoaMszD9VaJg,7754
256
- junifer-0.0.5.dev86.dist-info/AUTHORS.rst,sha256=rmULKpchpSol4ExWFdm-qu4fkpSZPYqIESVJBZtGb6E,163
257
- junifer-0.0.5.dev86.dist-info/LICENSE.md,sha256=MqCnOBu8uXsEOzRZWh9EBVfVz-kE9NkXcLCrtGXo2yU,34354
258
- junifer-0.0.5.dev86.dist-info/METADATA,sha256=czsUrw6gz_iDxuAzASxfrFxc1EZzRwLbZliDG4dE0Cc,8234
259
- junifer-0.0.5.dev86.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
260
- junifer-0.0.5.dev86.dist-info/entry_points.txt,sha256=DxFvKq0pOqRunAK0FxwJcoDfV1-dZvsFDpD5HRqSDhw,48
261
- junifer-0.0.5.dev86.dist-info/top_level.txt,sha256=4bAq1R2QFQ4b3hohjys2JBvxrl0GKk5LNFzYvz9VGcA,8
262
- junifer-0.0.5.dev86.dist-info/RECORD,,
256
+ junifer-0.0.5.dev93.dist-info/AUTHORS.rst,sha256=rmULKpchpSol4ExWFdm-qu4fkpSZPYqIESVJBZtGb6E,163
257
+ junifer-0.0.5.dev93.dist-info/LICENSE.md,sha256=MqCnOBu8uXsEOzRZWh9EBVfVz-kE9NkXcLCrtGXo2yU,34354
258
+ junifer-0.0.5.dev93.dist-info/METADATA,sha256=GPQBJL5Fvtivl1q6VYeAV6eoIDVCjubyMZCizh3mn9k,8234
259
+ junifer-0.0.5.dev93.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
260
+ junifer-0.0.5.dev93.dist-info/entry_points.txt,sha256=DxFvKq0pOqRunAK0FxwJcoDfV1-dZvsFDpD5HRqSDhw,48
261
+ junifer-0.0.5.dev93.dist-info/top_level.txt,sha256=4bAq1R2QFQ4b3hohjys2JBvxrl0GKk5LNFzYvz9VGcA,8
262
+ junifer-0.0.5.dev93.dist-info/RECORD,,