eegdash 0.4.0.dev173498563__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of eegdash might be problematic. Click here for more details.

@@ -18,22 +18,41 @@ from braindecode.datasets.base import (
18
18
 
19
19
  from ..logging import logger
20
20
 
21
+ __all__ = [
22
+ "FeaturesDataset",
23
+ "FeaturesConcatDataset",
24
+ ]
25
+
21
26
 
22
27
  class FeaturesDataset(EEGWindowsDataset):
23
- """Returns samples from a pandas DataFrame object along with a target.
28
+ """A dataset of features extracted from EEG windows.
24
29
 
25
- Dataset which serves samples from a pandas DataFrame object along with a
26
- target. The target is unique for the dataset, and is obtained through the
27
- `description` attribute.
30
+ This class holds features in a pandas DataFrame and provides an interface
31
+ compatible with braindecode's dataset structure. Each row in the feature
32
+ DataFrame corresponds to a single sample (e.g., an EEG window).
28
33
 
29
34
  Parameters
30
35
  ----------
31
- features : a pandas DataFrame
32
- Tabular data.
33
- description : dict | pandas.Series | None
34
- Holds additional description about the continuous signal / subject.
35
- transform : callable | None
36
- On-the-fly transform applied to the example before it is returned.
36
+ features : pandas.DataFrame
37
+ A DataFrame where each row is a sample and each column is a feature.
38
+ metadata : pandas.DataFrame, optional
39
+ A DataFrame containing metadata for each sample, indexed consistently
40
+ with `features`. Must include columns 'i_window_in_trial',
41
+ 'i_start_in_trial', 'i_stop_in_trial', and 'target'.
42
+ description : dict or pandas.Series, optional
43
+ Additional high-level information about the dataset (e.g., subject ID).
44
+ transform : callable, optional
45
+ A function or transform to apply to the feature data on-the-fly.
46
+ raw_info : dict, optional
47
+ Information about the original raw recording, for provenance.
48
+ raw_preproc_kwargs : dict, optional
49
+ Keyword arguments used for preprocessing the raw data.
50
+ window_kwargs : dict, optional
51
+ Keyword arguments used for windowing the data.
52
+ window_preproc_kwargs : dict, optional
53
+ Keyword arguments used for preprocessing the windowed data.
54
+ features_kwargs : dict, optional
55
+ Keyword arguments used for feature extraction.
37
56
 
38
57
  """
39
58
 
@@ -65,7 +84,21 @@ class FeaturesDataset(EEGWindowsDataset):
65
84
  ].to_numpy()
66
85
  self.y = metadata.loc[:, "target"].to_list()
67
86
 
68
- def __getitem__(self, index):
87
+ def __getitem__(self, index: int) -> tuple[np.ndarray, int, list]:
88
+ """Get a single sample from the dataset.
89
+
90
+ Parameters
91
+ ----------
92
+ index : int
93
+ The index of the sample to retrieve.
94
+
95
+ Returns
96
+ -------
97
+ tuple
98
+ A tuple containing the feature vector (X), the target (y), and the
99
+ cropping indices.
100
+
101
+ """
69
102
  crop_inds = self.crop_inds[index].tolist()
70
103
  X = self.features.iloc[index].to_numpy()
71
104
  X = X.copy()
@@ -75,18 +108,27 @@ class FeaturesDataset(EEGWindowsDataset):
75
108
  y = self.y[index]
76
109
  return X, y, crop_inds
77
110
 
78
- def __len__(self):
111
+ def __len__(self) -> int:
112
+ """Return the number of samples in the dataset.
113
+
114
+ Returns
115
+ -------
116
+ int
117
+ The total number of feature samples.
118
+
119
+ """
79
120
  return len(self.features.index)
80
121
 
81
122
 
82
123
  def _compute_stats(
83
124
  ds: FeaturesDataset,
84
- return_count=False,
85
- return_mean=False,
86
- return_var=False,
87
- ddof=1,
88
- numeric_only=False,
89
- ):
125
+ return_count: bool = False,
126
+ return_mean: bool = False,
127
+ return_var: bool = False,
128
+ ddof: int = 1,
129
+ numeric_only: bool = False,
130
+ ) -> tuple:
131
+ """Compute statistics for a single FeaturesDataset."""
90
132
  res = []
91
133
  if return_count:
92
134
  res.append(ds.features.count(numeric_only=numeric_only))
@@ -97,7 +139,14 @@ def _compute_stats(
97
139
  return tuple(res)
98
140
 
99
141
 
100
- def _pooled_var(counts, means, variances, ddof, ddof_in=None):
142
+ def _pooled_var(
143
+ counts: np.ndarray,
144
+ means: np.ndarray,
145
+ variances: np.ndarray,
146
+ ddof: int,
147
+ ddof_in: int | None = None,
148
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
149
+ """Compute pooled variance across multiple datasets."""
101
150
  if ddof_in is None:
102
151
  ddof_in = ddof
103
152
  count = counts.sum(axis=0)
@@ -110,17 +159,20 @@ def _pooled_var(counts, means, variances, ddof, ddof_in=None):
110
159
 
111
160
 
112
161
  class FeaturesConcatDataset(BaseConcatDataset):
113
- """A base class for concatenated datasets.
162
+ """A concatenated dataset of `FeaturesDataset` objects.
163
+
164
+ This class holds a list of :class:`FeaturesDataset` instances and allows
165
+ them to be treated as a single, larger dataset. It provides methods for
114
166
 
115
- Holds either mne.Raw or mne.Epoch in self.datasets and has
116
- a pandas DataFrame with additional description.
167
+ splitting, saving, and performing DataFrame-like operations (e.g., `mean`,
168
+ `var`, `fillna`) across all contained datasets.
117
169
 
118
170
  Parameters
119
171
  ----------
120
- list_of_ds : list
121
- list of BaseDataset, BaseConcatDataset or WindowsDataset
122
- target_transform : callable | None
123
- Optional function to call on targets before returning them.
172
+ list_of_ds : list of FeaturesDataset
173
+ A list of :class:`FeaturesDataset` objects to concatenate.
174
+ target_transform : callable, optional
175
+ A function to apply to the target values before they are returned.
124
176
 
125
177
  """
126
178
 
@@ -140,26 +192,28 @@ class FeaturesConcatDataset(BaseConcatDataset):
140
192
  self,
141
193
  by: str | list[int] | list[list[int]] | dict[str, list[int]],
142
194
  ) -> dict[str, FeaturesConcatDataset]:
143
- """Split the dataset based on information listed in its description.
195
+ """Split the dataset into subsets.
144
196
 
145
- The format could be based on a DataFrame or based on indices.
197
+ The splitting can be done based on a column in the description
198
+ DataFrame or by providing explicit indices for each split.
146
199
 
147
200
  Parameters
148
201
  ----------
149
- by : str | list | dict
150
- If ``by`` is a string, splitting is performed based on the
151
- description DataFrame column with this name.
152
- If ``by`` is a (list of) list of integers, the position in the first
153
- list corresponds to the split id and the integers to the
154
- datapoints of that split.
155
- If a dict then each key will be used in the returned
156
- splits dict and each value should be a list of int.
202
+ by : str or list or dict
203
+ - If a string, splits are created for each unique value in the
204
+ description column `by`.
205
+ - If a list of integers, a single split is created containing the
206
+ datasets at the specified indices.
207
+ - If a list of lists of integers, multiple splits are created, one
208
+ for each sublist of indices.
209
+ - If a dictionary, keys are used as split names and values are
210
+ lists of dataset indices.
157
211
 
158
212
  Returns
159
213
  -------
160
- splits : dict
161
- A dictionary with the name of the split (a string) as key and the
162
- dataset as value.
214
+ dict[str, FeaturesConcatDataset]
215
+ A dictionary where keys are split names and values are the new
216
+ :class:`FeaturesConcatDataset` subsets.
163
217
 
164
218
  """
165
219
  if isinstance(by, str):
@@ -184,14 +238,21 @@ class FeaturesConcatDataset(BaseConcatDataset):
184
238
  }
185
239
 
186
240
  def get_metadata(self) -> pd.DataFrame:
187
- """Concatenate the metadata and description of the wrapped Epochs.
241
+ """Get the metadata of all datasets as a single DataFrame.
242
+
243
+ Concatenates the metadata from all contained datasets and adds columns
244
+ from their `description` attributes.
188
245
 
189
246
  Returns
190
247
  -------
191
- metadata : pd.DataFrame
192
- DataFrame containing as many rows as there are windows in the
193
- BaseConcatDataset, with the metadata and description information
194
- for each window.
248
+ pandas.DataFrame
249
+ A DataFrame containing the metadata for every sample in the
250
+ concatenated dataset.
251
+
252
+ Raises
253
+ ------
254
+ TypeError
255
+ If any of the contained datasets is not a :class:`FeaturesDataset`.
195
256
 
196
257
  """
197
258
  if not all([isinstance(ds, FeaturesDataset) for ds in self.datasets]):
@@ -202,60 +263,59 @@ class FeaturesConcatDataset(BaseConcatDataset):
202
263
 
203
264
  all_dfs = list()
204
265
  for ds in self.datasets:
205
- df = ds.metadata
266
+ df = ds.metadata.copy()
206
267
  for k, v in ds.description.items():
207
268
  df[k] = v
208
269
  all_dfs.append(df)
209
270
 
210
271
  return pd.concat(all_dfs)
211
272
 
212
- def save(self, path: str, overwrite: bool = False, offset: int = 0):
213
- """Save datasets to files by creating one subdirectory for each dataset:
214
- path/
215
- 0/
216
- 0-feat.parquet
217
- metadata_df.pkl
218
- description.json
219
- raw-info.fif (if raw info was saved)
220
- raw_preproc_kwargs.json (if raws were preprocessed)
221
- window_kwargs.json (if this is a windowed dataset)
222
- window_preproc_kwargs.json (if windows were preprocessed)
223
- features_kwargs.json
224
- 1/
225
- 1-feat.parquet
226
- metadata_df.pkl
227
- description.json
228
- raw-info.fif (if raw info was saved)
229
- raw_preproc_kwargs.json (if raws were preprocessed)
230
- window_kwargs.json (if this is a windowed dataset)
231
- window_preproc_kwargs.json (if windows were preprocessed)
232
- features_kwargs.json
273
+ def save(self, path: str, overwrite: bool = False, offset: int = 0) -> None:
274
+ """Save the concatenated dataset to a directory.
275
+
276
+ Creates a directory structure where each contained dataset is saved in
277
+ its own numbered subdirectory.
278
+
279
+ .. code-block::
280
+
281
+ path/
282
+ 0/
283
+ 0-feat.parquet
284
+ metadata_df.pkl
285
+ description.json
286
+ ...
287
+ 1/
288
+ 1-feat.parquet
289
+ ...
233
290
 
234
291
  Parameters
235
292
  ----------
236
293
  path : str
237
- Directory in which subdirectories are created to store
238
- -feat.parquet and .json files to.
239
- overwrite : bool
240
- Whether to delete old subdirectories that will be saved to in this
241
- call.
242
- offset : int
243
- If provided, the integer is added to the id of the dataset in the
244
- concat. This is useful in the setting of very large datasets, where
245
- one dataset has to be processed and saved at a time to account for
246
- its original position.
294
+ The directory where the dataset will be saved.
295
+ overwrite : bool, default False
296
+ If True, any existing subdirectories that conflict with the new
297
+ ones will be removed.
298
+ offset : int, default 0
299
+ An integer to add to the subdirectory names. Useful for saving
300
+ datasets in chunks.
301
+
302
+ Raises
303
+ ------
304
+ ValueError
305
+ If the dataset is empty.
306
+ FileExistsError
307
+ If a subdirectory already exists and `overwrite` is False.
247
308
 
248
309
  """
249
310
  if len(self.datasets) == 0:
250
311
  raise ValueError("Expect at least one dataset")
251
312
  path_contents = os.listdir(path)
252
- n_sub_dirs = len([os.path.isdir(e) for e in path_contents])
313
+ n_sub_dirs = len([os.path.isdir(os.path.join(path, e)) for e in path_contents])
253
314
  for i_ds, ds in enumerate(self.datasets):
254
- # remove subdirectory from list of untouched files / subdirectories
255
- if str(i_ds + offset) in path_contents:
256
- path_contents.remove(str(i_ds + offset))
257
- # save_dir/i_ds/
258
- sub_dir = os.path.join(path, str(i_ds + offset))
315
+ sub_dir_name = str(i_ds + offset)
316
+ if sub_dir_name in path_contents:
317
+ path_contents.remove(sub_dir_name)
318
+ sub_dir = os.path.join(path, sub_dir_name)
259
319
  if os.path.exists(sub_dir):
260
320
  if overwrite:
261
321
  shutil.rmtree(sub_dir)
@@ -265,35 +325,21 @@ class FeaturesConcatDataset(BaseConcatDataset):
265
325
  f" a different directory, set overwrite=True, or "
266
326
  f"resolve manually."
267
327
  )
268
- # save_dir/{i_ds+offset}/
269
328
  os.makedirs(sub_dir)
270
- # save_dir/{i_ds+offset}/{i_ds+offset}-feat.parquet
271
329
  self._save_features(sub_dir, ds, i_ds, offset)
272
- # save_dir/{i_ds+offset}/metadata_df.pkl
273
330
  self._save_metadata(sub_dir, ds)
274
- # save_dir/{i_ds+offset}/description.json
275
331
  self._save_description(sub_dir, ds.description)
276
- # save_dir/{i_ds+offset}/raw-info.fif
277
332
  self._save_raw_info(sub_dir, ds)
278
- # save_dir/{i_ds+offset}/raw_preproc_kwargs.json
279
- # save_dir/{i_ds+offset}/window_kwargs.json
280
- # save_dir/{i_ds+offset}/window_preproc_kwargs.json
281
- # save_dir/{i_ds+offset}/features_kwargs.json
282
333
  self._save_kwargs(sub_dir, ds)
283
- if overwrite:
284
- # the following will be True for all datasets preprocessed and
285
- # stored in parallel with braindecode.preprocessing.preprocess
286
- if i_ds + 1 + offset < n_sub_dirs:
287
- logger.warning(
288
- f"The number of saved datasets ({i_ds + 1 + offset}) "
289
- f"does not match the number of existing "
290
- f"subdirectories ({n_sub_dirs}). You may now "
291
- f"encounter a mix of differently preprocessed "
292
- f"datasets!",
293
- UserWarning,
294
- )
295
- # if path contains files or directories that were not touched, raise
296
- # warning
334
+ if overwrite and i_ds + 1 + offset < n_sub_dirs:
335
+ logger.warning(
336
+ f"The number of saved datasets ({i_ds + 1 + offset}) "
337
+ f"does not match the number of existing "
338
+ f"subdirectories ({n_sub_dirs}). You may now "
339
+ f"encounter a mix of differently preprocessed "
340
+ f"datasets!",
341
+ UserWarning,
342
+ )
297
343
  if path_contents:
298
344
  logger.warning(
299
345
  f"Chosen directory {path} contains other "
@@ -301,20 +347,37 @@ class FeaturesConcatDataset(BaseConcatDataset):
301
347
  )
302
348
 
303
349
  @staticmethod
304
- def _save_features(sub_dir, ds, i_ds, offset):
350
+ def _save_features(sub_dir: str, ds: FeaturesDataset, i_ds: int, offset: int):
351
+ """Save the feature DataFrame to a Parquet file."""
305
352
  parquet_file_name = f"{i_ds + offset}-feat.parquet"
306
353
  parquet_file_path = os.path.join(sub_dir, parquet_file_name)
307
354
  ds.features.to_parquet(parquet_file_path)
308
355
 
309
356
  @staticmethod
310
- def _save_raw_info(sub_dir, ds):
311
- if hasattr(ds, "raw_info"):
357
+ def _save_metadata(sub_dir: str, ds: FeaturesDataset):
358
+ """Save the metadata DataFrame to a pickle file."""
359
+ metadata_file_name = "metadata_df.pkl"
360
+ metadata_file_path = os.path.join(sub_dir, metadata_file_name)
361
+ ds.metadata.to_pickle(metadata_file_path)
362
+
363
+ @staticmethod
364
+ def _save_description(sub_dir: str, description: pd.Series):
365
+ """Save the description Series to a JSON file."""
366
+ desc_file_name = "description.json"
367
+ desc_file_path = os.path.join(sub_dir, desc_file_name)
368
+ description.to_json(desc_file_path)
369
+
370
+ @staticmethod
371
+ def _save_raw_info(sub_dir: str, ds: FeaturesDataset):
372
+ """Save the raw info dictionary to a FIF file if it exists."""
373
+ if hasattr(ds, "raw_info") and ds.raw_info is not None:
312
374
  fif_file_name = "raw-info.fif"
313
375
  fif_file_path = os.path.join(sub_dir, fif_file_name)
314
- ds.raw_info.save(fif_file_path)
376
+ ds.raw_info.save(fif_file_path, overwrite=True)
315
377
 
316
378
  @staticmethod
317
- def _save_kwargs(sub_dir, ds):
379
+ def _save_kwargs(sub_dir: str, ds: FeaturesDataset):
380
+ """Save various keyword argument dictionaries to JSON files."""
318
381
  for kwargs_name in [
319
382
  "raw_preproc_kwargs",
320
383
  "window_kwargs",
@@ -322,10 +385,10 @@ class FeaturesConcatDataset(BaseConcatDataset):
322
385
  "features_kwargs",
323
386
  ]:
324
387
  if hasattr(ds, kwargs_name):
325
- kwargs_file_name = ".".join([kwargs_name, "json"])
326
- kwargs_file_path = os.path.join(sub_dir, kwargs_file_name)
327
388
  kwargs = getattr(ds, kwargs_name)
328
389
  if kwargs is not None:
390
+ kwargs_file_name = ".".join([kwargs_name, "json"])
391
+ kwargs_file_path = os.path.join(sub_dir, kwargs_file_name)
329
392
  with open(kwargs_file_path, "w") as f:
330
393
  json.dump(kwargs, f)
331
394
 
@@ -334,7 +397,25 @@ class FeaturesConcatDataset(BaseConcatDataset):
334
397
  include_metadata: bool | str | List[str] = False,
335
398
  include_target: bool = False,
336
399
  include_crop_inds: bool = False,
337
- ):
400
+ ) -> pd.DataFrame:
401
+ """Convert the dataset to a single pandas DataFrame.
402
+
403
+ Parameters
404
+ ----------
405
+ include_metadata : bool or str or list of str, default False
406
+ If True, include all metadata columns. If a string or list of
407
+ strings, include only the specified metadata columns.
408
+ include_target : bool, default False
409
+ If True, include the 'target' column.
410
+ include_crop_inds : bool, default False
411
+ If True, include window cropping index columns.
412
+
413
+ Returns
414
+ -------
415
+ pandas.DataFrame
416
+ A DataFrame containing the features and requested metadata.
417
+
418
+ """
338
419
  if (
339
420
  not isinstance(include_metadata, bool)
340
421
  or include_metadata
@@ -343,7 +424,7 @@ class FeaturesConcatDataset(BaseConcatDataset):
343
424
  include_dataset = False
344
425
  if isinstance(include_metadata, bool) and include_metadata:
345
426
  include_dataset = True
346
- cols = self.datasets[0].metadata.columns
427
+ cols = self.datasets[0].metadata.columns.tolist()
347
428
  else:
348
429
  cols = include_metadata
349
430
  if isinstance(cols, bool) and not cols:
@@ -352,13 +433,14 @@ class FeaturesConcatDataset(BaseConcatDataset):
352
433
  cols = [cols]
353
434
  cols = set(cols)
354
435
  if include_crop_inds:
355
- cols = {
356
- "i_dataset",
357
- "i_window_in_trial",
358
- "i_start_in_trial",
359
- "i_stop_in_trial",
360
- *cols,
361
- }
436
+ cols.update(
437
+ {
438
+ "i_dataset",
439
+ "i_window_in_trial",
440
+ "i_start_in_trial",
441
+ "i_stop_in_trial",
442
+ }
443
+ )
362
444
  if include_target:
363
445
  cols.add("target")
364
446
  cols = list(cols)
@@ -381,10 +463,26 @@ class FeaturesConcatDataset(BaseConcatDataset):
381
463
  dataframes = [ds.features for ds in self.datasets]
382
464
  return pd.concat(dataframes, axis=0, ignore_index=True)
383
465
 
384
- def _numeric_columns(self):
466
+ def _numeric_columns(self) -> pd.Index:
467
+ """Get the names of numeric columns from the feature DataFrames."""
385
468
  return self.datasets[0].features.select_dtypes(include=np.number).columns
386
469
 
387
- def count(self, numeric_only=False, n_jobs=1):
470
+ def count(self, numeric_only: bool = False, n_jobs: int = 1) -> pd.Series:
471
+ """Count non-NA cells for each feature column.
472
+
473
+ Parameters
474
+ ----------
475
+ numeric_only : bool, default False
476
+ Include only float, int, boolean columns.
477
+ n_jobs : int, default 1
478
+ Number of jobs to run in parallel.
479
+
480
+ Returns
481
+ -------
482
+ pandas.Series
483
+ The count of non-NA cells for each column.
484
+
485
+ """
388
486
  stats = Parallel(n_jobs)(
389
487
  delayed(_compute_stats)(ds, return_count=True, numeric_only=numeric_only)
390
488
  for ds in self.datasets
@@ -393,7 +491,22 @@ class FeaturesConcatDataset(BaseConcatDataset):
393
491
  count = counts.sum(axis=0)
394
492
  return pd.Series(count, index=self._numeric_columns())
395
493
 
396
- def mean(self, numeric_only=False, n_jobs=1):
494
+ def mean(self, numeric_only: bool = False, n_jobs: int = 1) -> pd.Series:
495
+ """Compute the mean for each feature column.
496
+
497
+ Parameters
498
+ ----------
499
+ numeric_only : bool, default False
500
+ Include only float, int, boolean columns.
501
+ n_jobs : int, default 1
502
+ Number of jobs to run in parallel.
503
+
504
+ Returns
505
+ -------
506
+ pandas.Series
507
+ The mean of each column.
508
+
509
+ """
397
510
  stats = Parallel(n_jobs)(
398
511
  delayed(_compute_stats)(
399
512
  ds, return_count=True, return_mean=True, numeric_only=numeric_only
@@ -405,7 +518,26 @@ class FeaturesConcatDataset(BaseConcatDataset):
405
518
  mean = np.sum((counts / count) * means, axis=0)
406
519
  return pd.Series(mean, index=self._numeric_columns())
407
520
 
408
- def var(self, ddof=1, numeric_only=False, n_jobs=1):
521
+ def var(
522
+ self, ddof: int = 1, numeric_only: bool = False, n_jobs: int = 1
523
+ ) -> pd.Series:
524
+ """Compute the variance for each feature column.
525
+
526
+ Parameters
527
+ ----------
528
+ ddof : int, default 1
529
+ Delta Degrees of Freedom. The divisor used in calculations is N - ddof.
530
+ numeric_only : bool, default False
531
+ Include only float, int, boolean columns.
532
+ n_jobs : int, default 1
533
+ Number of jobs to run in parallel.
534
+
535
+ Returns
536
+ -------
537
+ pandas.Series
538
+ The variance of each column.
539
+
540
+ """
409
541
  stats = Parallel(n_jobs)(
410
542
  delayed(_compute_stats)(
411
543
  ds,
@@ -425,12 +557,50 @@ class FeaturesConcatDataset(BaseConcatDataset):
425
557
  _, _, var = _pooled_var(counts, means, variances, ddof, ddof_in=0)
426
558
  return pd.Series(var, index=self._numeric_columns())
427
559
 
428
- def std(self, ddof=1, numeric_only=False, eps=0, n_jobs=1):
560
+ def std(
561
+ self, ddof: int = 1, numeric_only: bool = False, eps: float = 0, n_jobs: int = 1
562
+ ) -> pd.Series:
563
+ """Compute the standard deviation for each feature column.
564
+
565
+ Parameters
566
+ ----------
567
+ ddof : int, default 1
568
+ Delta Degrees of Freedom.
569
+ numeric_only : bool, default False
570
+ Include only float, int, boolean columns.
571
+ eps : float, default 0
572
+ A small epsilon value to add to the variance before taking the
573
+ square root to avoid numerical instability.
574
+ n_jobs : int, default 1
575
+ Number of jobs to run in parallel.
576
+
577
+ Returns
578
+ -------
579
+ pandas.Series
580
+ The standard deviation of each column.
581
+
582
+ """
429
583
  return np.sqrt(
430
584
  self.var(ddof=ddof, numeric_only=numeric_only, n_jobs=n_jobs) + eps
431
585
  )
432
586
 
433
- def zscore(self, ddof=1, numeric_only=False, eps=0, n_jobs=1):
587
+ def zscore(
588
+ self, ddof: int = 1, numeric_only: bool = False, eps: float = 0, n_jobs: int = 1
589
+ ) -> None:
590
+ """Apply z-score normalization to numeric columns in-place.
591
+
592
+ Parameters
593
+ ----------
594
+ ddof : int, default 1
595
+ Delta Degrees of Freedom for variance calculation.
596
+ numeric_only : bool, default False
597
+ Include only float, int, boolean columns.
598
+ eps : float, default 0
599
+ Epsilon for numerical stability.
600
+ n_jobs : int, default 1
601
+ Number of jobs to run in parallel for statistics computation.
602
+
603
+ """
434
604
  stats = Parallel(n_jobs)(
435
605
  delayed(_compute_stats)(
436
606
  ds,
@@ -450,10 +620,13 @@ class FeaturesConcatDataset(BaseConcatDataset):
450
620
  _, mean, var = _pooled_var(counts, means, variances, ddof, ddof_in=0)
451
621
  std = np.sqrt(var + eps)
452
622
  for ds in self.datasets:
453
- ds.features = (ds.features - mean) / std
623
+ ds.features.loc[:, self._numeric_columns()] = (
624
+ ds.features.loc[:, self._numeric_columns()] - mean
625
+ ) / std
454
626
 
455
627
  @staticmethod
456
- def _enforce_inplace_operations(func_name, kwargs):
628
+ def _enforce_inplace_operations(func_name: str, kwargs: dict):
629
+ """Raise an error if 'inplace=False' is passed to a method."""
457
630
  if "inplace" in kwargs and kwargs["inplace"] is False:
458
631
  raise ValueError(
459
632
  f"{func_name} only works inplace, please change "
@@ -461,33 +634,49 @@ class FeaturesConcatDataset(BaseConcatDataset):
461
634
  )
462
635
  kwargs["inplace"] = True
463
636
 
464
- def fillna(self, *args, **kwargs):
637
+ def fillna(self, *args, **kwargs) -> None:
638
+ """Fill NA/NaN values in-place. See :meth:`pandas.DataFrame.fillna`."""
465
639
  FeaturesConcatDataset._enforce_inplace_operations("fillna", kwargs)
466
640
  for ds in self.datasets:
467
641
  ds.features.fillna(*args, **kwargs)
468
642
 
469
- def replace(self, *args, **kwargs):
643
+ def replace(self, *args, **kwargs) -> None:
644
+ """Replace values in-place. See :meth:`pandas.DataFrame.replace`."""
470
645
  FeaturesConcatDataset._enforce_inplace_operations("replace", kwargs)
471
646
  for ds in self.datasets:
472
647
  ds.features.replace(*args, **kwargs)
473
648
 
474
- def interpolate(self, *args, **kwargs):
649
+ def interpolate(self, *args, **kwargs) -> None:
650
+ """Interpolate values in-place. See :meth:`pandas.DataFrame.interpolate`."""
475
651
  FeaturesConcatDataset._enforce_inplace_operations("interpolate", kwargs)
476
652
  for ds in self.datasets:
477
653
  ds.features.interpolate(*args, **kwargs)
478
654
 
479
- def dropna(self, *args, **kwargs):
655
+ def dropna(self, *args, **kwargs) -> None:
656
+ """Remove missing values in-place. See :meth:`pandas.DataFrame.dropna`."""
480
657
  FeaturesConcatDataset._enforce_inplace_operations("dropna", kwargs)
481
658
  for ds in self.datasets:
482
659
  ds.features.dropna(*args, **kwargs)
483
660
 
484
- def drop(self, *args, **kwargs):
661
+ def drop(self, *args, **kwargs) -> None:
662
+ """Drop specified labels from rows or columns in-place. See :meth:`pandas.DataFrame.drop`."""
485
663
  FeaturesConcatDataset._enforce_inplace_operations("drop", kwargs)
486
664
  for ds in self.datasets:
487
665
  ds.features.drop(*args, **kwargs)
488
666
 
489
- def join(self, concat_dataset: FeaturesConcatDataset, **kwargs):
667
+ def join(self, concat_dataset: FeaturesConcatDataset, **kwargs) -> None:
668
+ """Join columns with other FeaturesConcatDataset in-place.
669
+
670
+ Parameters
671
+ ----------
672
+ concat_dataset : FeaturesConcatDataset
673
+ The dataset to join with. Must have the same number of datasets,
674
+ and each corresponding dataset must have the same length.
675
+ **kwargs
676
+ Keyword arguments to pass to :meth:`pandas.DataFrame.join`.
677
+
678
+ """
490
679
  assert len(self.datasets) == len(concat_dataset.datasets)
491
680
  for ds1, ds2 in zip(self.datasets, concat_dataset.datasets):
492
681
  assert len(ds1) == len(ds2)
493
- ds1.features.join(ds2, **kwargs)
682
+ ds1.features = ds1.features.join(ds2.features, **kwargs)