paradigma 1.0.3__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/__init__.py +10 -1
- paradigma/classification.py +38 -21
- paradigma/config.py +187 -123
- paradigma/constants.py +48 -35
- paradigma/feature_extraction.py +345 -255
- paradigma/load.py +476 -0
- paradigma/orchestrator.py +670 -0
- paradigma/pipelines/gait_pipeline.py +685 -246
- paradigma/pipelines/pulse_rate_pipeline.py +456 -155
- paradigma/pipelines/pulse_rate_utils.py +289 -248
- paradigma/pipelines/tremor_pipeline.py +405 -132
- paradigma/prepare_data.py +409 -0
- paradigma/preprocessing.py +500 -163
- paradigma/segmenting.py +180 -140
- paradigma/testing.py +370 -178
- paradigma/util.py +190 -101
- paradigma-1.1.0.dist-info/METADATA +229 -0
- paradigma-1.1.0.dist-info/RECORD +26 -0
- {paradigma-1.0.3.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
- paradigma-1.1.0.dist-info/entry_points.txt +4 -0
- {paradigma-1.0.3.dist-info → paradigma-1.1.0.dist-info/licenses}/LICENSE +0 -1
- paradigma-1.0.3.dist-info/METADATA +0 -138
- paradigma-1.0.3.dist-info/RECORD +0 -22
paradigma/segmenting.py
CHANGED
|
@@ -1,26 +1,27 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
1
|
import numpy as np
|
|
2
|
+
import pandas as pd
|
|
3
3
|
|
|
4
|
-
from typing import List
|
|
5
4
|
from paradigma.constants import DataColumns
|
|
5
|
+
from paradigma.util import deprecated
|
|
6
6
|
|
|
7
|
-
import numpy as np
|
|
8
7
|
|
|
9
8
|
def tabulate_windows(
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
9
|
+
df: pd.DataFrame,
|
|
10
|
+
columns: list[str],
|
|
11
|
+
window_length_s: float,
|
|
12
|
+
window_step_length_s: float,
|
|
13
|
+
fs: int,
|
|
14
|
+
) -> np.ndarray:
|
|
16
15
|
"""
|
|
17
|
-
Split the given DataFrame into overlapping windows of specified length
|
|
16
|
+
Split the given DataFrame into overlapping windows of specified length
|
|
17
|
+
and step size.
|
|
18
18
|
|
|
19
|
-
This function extracts windows of data from the specified columns of the
|
|
20
|
-
the window length and step size provided in the
|
|
21
|
-
a 3D NumPy array, where the
|
|
22
|
-
|
|
23
|
-
|
|
19
|
+
This function extracts windows of data from the specified columns of the
|
|
20
|
+
DataFrame, based on the window length and step size provided in the
|
|
21
|
+
configuration. The windows are returned in a 3D NumPy array, where the
|
|
22
|
+
first dimension represents the window index, the second dimension
|
|
23
|
+
represents the time steps within the window, and the third dimension
|
|
24
|
+
represents the columns of the data.
|
|
24
25
|
|
|
25
26
|
Parameters
|
|
26
27
|
----------
|
|
@@ -39,17 +40,22 @@ def tabulate_windows(
|
|
|
39
40
|
-------
|
|
40
41
|
np.ndarray
|
|
41
42
|
A 3D NumPy array of shape (n_windows, window_size, n_columns), where:
|
|
42
|
-
- `n_windows` is the number of windows that can be formed from the
|
|
43
|
-
|
|
44
|
-
- `
|
|
45
|
-
|
|
46
|
-
|
|
43
|
+
- `n_windows` is the number of windows that can be formed from the
|
|
44
|
+
data.
|
|
45
|
+
- `window_size` is the length of each window in terms of the number
|
|
46
|
+
of time steps.
|
|
47
|
+
- `n_columns` is the number of columns in the input DataFrame
|
|
48
|
+
specified by `columns`.
|
|
49
|
+
|
|
50
|
+
If the length of the data is shorter than the specified window size,
|
|
51
|
+
an empty array is returned.
|
|
47
52
|
|
|
48
53
|
Notes
|
|
49
54
|
-----
|
|
50
|
-
This function uses `np.lib.stride_tricks.sliding_window_view` to
|
|
51
|
-
The step size is applied to extract
|
|
52
|
-
If the data is insufficient for at least one
|
|
55
|
+
This function uses `np.lib.stride_tricks.sliding_window_view` to
|
|
56
|
+
generate sliding windows of data. The step size is applied to extract
|
|
57
|
+
windows at intervals. If the data is insufficient for at least one
|
|
58
|
+
window, an empty array will be returned.
|
|
53
59
|
|
|
54
60
|
Example
|
|
55
61
|
-------
|
|
@@ -66,12 +72,14 @@ def tabulate_windows(
|
|
|
66
72
|
|
|
67
73
|
# Check if data length is sufficient
|
|
68
74
|
if len(data) < window_size:
|
|
69
|
-
return np.empty(
|
|
70
|
-
|
|
75
|
+
return np.empty(
|
|
76
|
+
(0, window_size, n_columns)
|
|
77
|
+
) # Return an empty array if insufficient data
|
|
78
|
+
|
|
71
79
|
windows = np.lib.stride_tricks.sliding_window_view(
|
|
72
80
|
data, window_shape=(window_size, n_columns)
|
|
73
|
-
|
|
74
|
-
|
|
81
|
+
)[::window_step_size].squeeze()
|
|
82
|
+
|
|
75
83
|
# Ensure 3D shape (n_windows, window_size, n_columns)
|
|
76
84
|
if windows.ndim == 2: # Single window case
|
|
77
85
|
windows = windows[np.newaxis, :, :] # Add a new axis at the start
|
|
@@ -79,88 +87,113 @@ def tabulate_windows(
|
|
|
79
87
|
return windows
|
|
80
88
|
|
|
81
89
|
|
|
82
|
-
def tabulate_windows_legacy(config, df, agg_func=
|
|
90
|
+
def tabulate_windows_legacy(config, df, agg_func="first"):
|
|
83
91
|
"""
|
|
84
|
-
Efficiently creates a windowed dataframe from the input dataframe using
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
92
|
+
Efficiently creates a windowed dataframe from the input dataframe using
|
|
93
|
+
vectorized operations.
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
config : object
|
|
98
|
+
A configuration object containing:
|
|
99
|
+
- `window_length_s`: The number of seconds per window.
|
|
100
|
+
- `window_step_length_s`: The number of seconds to shift between windows.
|
|
101
|
+
- `sampling_frequency`: The sampling frequency in Hz.
|
|
102
|
+
- `single_value_colnames`: List of column names where a single value
|
|
103
|
+
(e.g., mean) is needed.
|
|
104
|
+
- `list_value_colnames`: List of column names where all 600 values
|
|
105
|
+
should be stored in a list.
|
|
106
|
+
agg_func : str or callable, optional
|
|
107
|
+
Aggregation function for single-value columns. Can be 'mean',
|
|
108
|
+
'first', or a custom callable. Default is 'first'.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
pd.DataFrame
|
|
113
|
+
A new DataFrame where each row corresponds to a window, containing:
|
|
114
|
+
- `window_nr`: The window number (starting from 1).
|
|
115
|
+
- `window_start`: The start time of the window.
|
|
116
|
+
- `window_end`: The end time of the window.
|
|
117
|
+
- Aggregated values for `single_value_colnames`.
|
|
118
|
+
- Lists of values for `list_value_colnames`.
|
|
119
|
+
|
|
96
120
|
"""
|
|
97
|
-
# If
|
|
98
|
-
if config.
|
|
99
|
-
config.
|
|
100
|
-
if config.
|
|
101
|
-
config.
|
|
121
|
+
# If single_value_colnames or list_value_colnames is None, default to an empty list
|
|
122
|
+
if config.single_value_colnames is None:
|
|
123
|
+
config.single_value_colnames = []
|
|
124
|
+
if config.list_value_colnames is None:
|
|
125
|
+
config.list_value_colnames = []
|
|
102
126
|
|
|
103
127
|
window_length = int(config.window_length_s * config.sampling_frequency)
|
|
104
128
|
window_step_size = int(config.window_step_length_s * config.sampling_frequency)
|
|
105
129
|
|
|
106
130
|
n_rows = len(df)
|
|
107
131
|
if window_length > n_rows:
|
|
108
|
-
raise ValueError(
|
|
109
|
-
|
|
110
|
-
|
|
132
|
+
raise ValueError(
|
|
133
|
+
f"Window size ({window_length}) cannot be greater than the "
|
|
134
|
+
f"number of rows ({n_rows}) in the dataframe."
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Create indices for window start positions
|
|
111
138
|
window_starts = np.arange(0, n_rows - window_length + 1, window_step_size)
|
|
112
|
-
|
|
139
|
+
|
|
113
140
|
# Prepare the result for the final DataFrame
|
|
114
141
|
result = []
|
|
115
|
-
|
|
142
|
+
|
|
116
143
|
# Handle single value columns with vectorized operations
|
|
117
144
|
agg_func_map = {
|
|
118
|
-
|
|
119
|
-
|
|
145
|
+
"mean": np.mean,
|
|
146
|
+
"first": lambda x: x[0],
|
|
120
147
|
}
|
|
121
148
|
|
|
122
149
|
# Check if agg_func is a callable (custom function) or get the function from the map
|
|
123
150
|
if callable(agg_func):
|
|
124
151
|
agg_func_np = agg_func
|
|
125
152
|
else:
|
|
126
|
-
agg_func_np = agg_func_map.get(
|
|
153
|
+
agg_func_np = agg_func_map.get(
|
|
154
|
+
agg_func, agg_func_map["mean"]
|
|
155
|
+
) # Default to 'mean' if agg_func is not recognized
|
|
127
156
|
|
|
128
|
-
|
|
129
157
|
for window_nr, start in enumerate(window_starts, 1):
|
|
130
158
|
end = start + window_length
|
|
131
159
|
window = df.iloc[start:end]
|
|
132
160
|
|
|
133
161
|
agg_data = {
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
162
|
+
"window_nr": window_nr,
|
|
163
|
+
"window_start": window[DataColumns.TIME].iloc[0],
|
|
164
|
+
"window_end": window[DataColumns.TIME].iloc[-1],
|
|
137
165
|
}
|
|
138
|
-
|
|
166
|
+
|
|
139
167
|
# Aggregate single-value columns
|
|
140
|
-
for col in config.
|
|
168
|
+
for col in config.single_value_colnames:
|
|
141
169
|
if col in window.columns: # Only process columns that exist in the window
|
|
142
170
|
agg_data[col] = agg_func_np(window[col].values)
|
|
143
|
-
|
|
171
|
+
|
|
144
172
|
# Collect list-value columns efficiently using numpy slicing
|
|
145
|
-
for col in config.
|
|
173
|
+
for col in config.list_value_colnames:
|
|
146
174
|
if col in window.columns: # Only process columns that exist in the window
|
|
147
175
|
agg_data[col] = window[col].values.tolist()
|
|
148
176
|
|
|
149
177
|
result.append(agg_data)
|
|
150
|
-
|
|
178
|
+
|
|
151
179
|
# Convert result list into a DataFrame
|
|
152
180
|
windowed_df = pd.DataFrame(result)
|
|
153
|
-
|
|
154
|
-
# Ensure the column order is as desired: window_nr, window_start,
|
|
155
|
-
|
|
156
|
-
|
|
181
|
+
|
|
182
|
+
# Ensure the column order is as desired: window_nr, window_start,
|
|
183
|
+
# window_end, pre_or_post, and then the rest
|
|
184
|
+
desired_order = (
|
|
185
|
+
["window_nr", "window_start", "window_end"]
|
|
186
|
+
+ config.single_value_colnames
|
|
187
|
+
+ config.list_value_colnames
|
|
188
|
+
)
|
|
189
|
+
|
|
157
190
|
return windowed_df[desired_order]
|
|
158
191
|
|
|
159
192
|
|
|
160
193
|
def create_segments(
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
194
|
+
time_array: np.ndarray,
|
|
195
|
+
max_segment_gap_s: float,
|
|
196
|
+
):
|
|
164
197
|
# Calculate the difference between consecutive time values
|
|
165
198
|
time_diff = np.diff(time_array, prepend=0.0)
|
|
166
199
|
|
|
@@ -168,23 +201,23 @@ def create_segments(
|
|
|
168
201
|
gap_exceeds = time_diff > max_segment_gap_s
|
|
169
202
|
|
|
170
203
|
# Create the segment number based on the cumulative sum of the gap_exceeds mask
|
|
171
|
-
segments = gap_exceeds.cumsum()
|
|
204
|
+
segments = gap_exceeds.cumsum() + 1
|
|
172
205
|
|
|
173
206
|
return segments
|
|
174
207
|
|
|
175
208
|
|
|
176
209
|
def discard_segments(
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
210
|
+
df: pd.DataFrame,
|
|
211
|
+
segment_nr_colname: str,
|
|
212
|
+
min_segment_length_s: float,
|
|
213
|
+
fs: int,
|
|
214
|
+
format: str = "timestamps",
|
|
215
|
+
) -> pd.DataFrame:
|
|
183
216
|
"""
|
|
184
217
|
Remove segments smaller than a specified size and reset segment enumeration.
|
|
185
218
|
|
|
186
|
-
This function filters out segments from the DataFrame that are smaller than a
|
|
187
|
-
given minimum size, based on the configuration. After removing small segments,
|
|
219
|
+
This function filters out segments from the DataFrame that are smaller than a
|
|
220
|
+
given minimum size, based on the configuration. After removing small segments,
|
|
188
221
|
the segment numbers are reset to start from 1.
|
|
189
222
|
|
|
190
223
|
Parameters
|
|
@@ -201,12 +234,13 @@ def discard_segments(
|
|
|
201
234
|
Returns
|
|
202
235
|
-------
|
|
203
236
|
pd.DataFrame
|
|
204
|
-
A filtered DataFrame where small segments have been removed and segment
|
|
237
|
+
A filtered DataFrame where small segments have been removed and segment
|
|
205
238
|
numbers have been reset to start from 1.
|
|
206
239
|
|
|
207
240
|
Example
|
|
208
241
|
-------
|
|
209
|
-
config = Config(min_segment_length_s=2, sampling_frequency=100,
|
|
242
|
+
config = Config(min_segment_length_s=2, sampling_frequency=100,
|
|
243
|
+
segment_nr_colname='segment')
|
|
210
244
|
df = pd.DataFrame({
|
|
211
245
|
'segment': [1, 1, 2, 2, 2],
|
|
212
246
|
'time': [0, 1, 2, 3, 4]
|
|
@@ -221,43 +255,44 @@ def discard_segments(
|
|
|
221
255
|
# 4 2 4
|
|
222
256
|
"""
|
|
223
257
|
# Minimum segment size in number of samples
|
|
224
|
-
if format ==
|
|
225
|
-
min_samples = min_segment_length_s * fs
|
|
226
|
-
elif format ==
|
|
227
|
-
min_samples = min_segment_length_s
|
|
258
|
+
if format == "timestamps":
|
|
259
|
+
min_samples = int(min_segment_length_s * fs)
|
|
260
|
+
elif format == "windows":
|
|
261
|
+
min_samples = int(min_segment_length_s)
|
|
228
262
|
else:
|
|
229
263
|
raise ValueError("Invalid format. Must be 'timestamps' or 'windows'.")
|
|
230
264
|
|
|
231
|
-
#
|
|
232
|
-
|
|
233
|
-
df.groupby(segment_nr_colname)[segment_nr_colname]
|
|
234
|
-
.transform('size') >= min_samples
|
|
235
|
-
)
|
|
265
|
+
# Count samples per segment
|
|
266
|
+
segment_counts = df.groupby(segment_nr_colname).size()
|
|
236
267
|
|
|
237
|
-
|
|
268
|
+
# Filter rows for valid segments (>= min samples)
|
|
269
|
+
counts_map = segment_counts.to_dict()
|
|
270
|
+
df = df[df[segment_nr_colname].map(counts_map) >= min_samples].copy()
|
|
238
271
|
|
|
239
272
|
if df.empty:
|
|
240
|
-
raise ValueError(
|
|
273
|
+
raise ValueError(
|
|
274
|
+
f"All segments were removed: no segment ≥ {min_samples} samples."
|
|
275
|
+
)
|
|
241
276
|
|
|
242
|
-
# Reset segment numbers
|
|
243
|
-
|
|
244
|
-
df[segment_nr_colname] = unique_segments
|
|
277
|
+
# Reset segment numbers
|
|
278
|
+
df[segment_nr_colname] = pd.factorize(df[segment_nr_colname])[0] + 1
|
|
245
279
|
|
|
246
280
|
return df
|
|
247
281
|
|
|
248
282
|
|
|
249
|
-
|
|
283
|
+
@deprecated("This will be removed in v1.1.")
|
|
284
|
+
def categorize_segments(df, fs, format="timestamps", window_step_length_s=None):
|
|
250
285
|
"""
|
|
251
286
|
Categorize segments based on their duration.
|
|
252
287
|
|
|
253
|
-
This function categorizes segments into four categories based on their duration
|
|
288
|
+
This function categorizes segments into four categories based on their duration
|
|
254
289
|
in seconds. The categories are defined as:
|
|
255
290
|
- Category 1: Segments shorter than 5 seconds
|
|
256
291
|
- Category 2: Segments between 5 and 10 seconds
|
|
257
292
|
- Category 3: Segments between 10 and 20 seconds
|
|
258
293
|
- Category 4: Segments longer than 20 seconds
|
|
259
294
|
|
|
260
|
-
The duration of each segment is calculated based on the sampling frequency and
|
|
295
|
+
The duration of each segment is calculated based on the sampling frequency and
|
|
261
296
|
the number of rows (data points) in the segment.
|
|
262
297
|
|
|
263
298
|
Parameters
|
|
@@ -278,44 +313,46 @@ def categorize_segments(df, fs, format='timestamps', window_step_length_s=None):
|
|
|
278
313
|
- 'long' for segments between 10 and 20 seconds
|
|
279
314
|
- 'very_long' for segments > 20 seconds
|
|
280
315
|
"""
|
|
281
|
-
if format ==
|
|
316
|
+
if format == "windows" and window_step_length_s is None:
|
|
282
317
|
raise ValueError("Window step length must be provided for 'windows' format.")
|
|
283
|
-
|
|
318
|
+
|
|
284
319
|
# Define duration thresholds in seconds
|
|
285
|
-
d_max_duration = {
|
|
286
|
-
|
|
287
|
-
'moderately_long': 10,
|
|
288
|
-
'long': 20
|
|
289
|
-
}
|
|
290
|
-
|
|
320
|
+
d_max_duration = {"short": 5, "moderately_long": 10, "long": 20}
|
|
321
|
+
|
|
291
322
|
# Convert thresholds to rows if format is 'timestamps'
|
|
292
|
-
if format ==
|
|
323
|
+
if format == "timestamps":
|
|
293
324
|
d_max_duration = {k: v * fs for k, v in d_max_duration.items()}
|
|
294
325
|
|
|
295
326
|
# Count rows per segment
|
|
296
|
-
segment_sizes = df[DataColumns.
|
|
327
|
+
segment_sizes = df[DataColumns.GAIT_SEGMENT_NR].value_counts()
|
|
297
328
|
|
|
298
329
|
# Convert segment sizes to duration in seconds
|
|
299
|
-
if format ==
|
|
330
|
+
if format == "windows":
|
|
300
331
|
segment_sizes *= window_step_length_s
|
|
301
332
|
|
|
302
333
|
# Group by the segment column and apply the categorization
|
|
303
334
|
def categorize(segment_size):
|
|
304
|
-
if segment_size < d_max_duration[
|
|
305
|
-
return
|
|
306
|
-
elif segment_size < d_max_duration[
|
|
307
|
-
return
|
|
308
|
-
elif segment_size < d_max_duration[
|
|
309
|
-
return
|
|
335
|
+
if segment_size < d_max_duration["short"]:
|
|
336
|
+
return "short"
|
|
337
|
+
elif segment_size < d_max_duration["moderately_long"]:
|
|
338
|
+
return "moderately_long"
|
|
339
|
+
elif segment_size < d_max_duration["long"]:
|
|
340
|
+
return "long"
|
|
310
341
|
else:
|
|
311
|
-
return
|
|
342
|
+
return "very_long"
|
|
312
343
|
|
|
313
344
|
# Apply categorization to the DataFrame
|
|
314
|
-
return
|
|
345
|
+
return (
|
|
346
|
+
df[DataColumns.GAIT_SEGMENT_NR]
|
|
347
|
+
.map(segment_sizes)
|
|
348
|
+
.map(categorize)
|
|
349
|
+
.astype("category")
|
|
350
|
+
)
|
|
351
|
+
|
|
315
352
|
|
|
316
353
|
class WindowedDataExtractor:
|
|
317
354
|
"""
|
|
318
|
-
A utility class for extracting specific column indices and slices
|
|
355
|
+
A utility class for extracting specific column indices and slices
|
|
319
356
|
from a list of windowed column names.
|
|
320
357
|
|
|
321
358
|
Attributes
|
|
@@ -325,31 +362,31 @@ class WindowedDataExtractor:
|
|
|
325
362
|
|
|
326
363
|
Methods
|
|
327
364
|
-------
|
|
328
|
-
get_index(
|
|
329
|
-
Returns the index of a specific column.
|
|
330
|
-
get_slice(
|
|
331
|
-
Returns a slice object for a range of consecutive
|
|
365
|
+
get_index(colname)
|
|
366
|
+
Returns the index of a specific column name.
|
|
367
|
+
get_slice(colnames)
|
|
368
|
+
Returns a slice object for a range of consecutive column names.
|
|
332
369
|
"""
|
|
333
370
|
|
|
334
|
-
def __init__(self,
|
|
371
|
+
def __init__(self, windowed_colnames: list[str]):
|
|
335
372
|
"""
|
|
336
373
|
Initialize the WindowedDataExtractor.
|
|
337
374
|
|
|
338
375
|
Parameters
|
|
339
376
|
----------
|
|
340
|
-
|
|
377
|
+
windowed_colnames : list of str
|
|
341
378
|
A list of column names in the windowed data.
|
|
342
379
|
|
|
343
380
|
Raises
|
|
344
381
|
------
|
|
345
382
|
ValueError
|
|
346
|
-
If the list of `
|
|
383
|
+
If the list of `windowed_colnames` is empty.
|
|
347
384
|
"""
|
|
348
|
-
if not
|
|
385
|
+
if not windowed_colnames:
|
|
349
386
|
raise ValueError("The list of windowed columns cannot be empty.")
|
|
350
|
-
self.column_indices = {col: idx for idx, col in enumerate(
|
|
387
|
+
self.column_indices = {col: idx for idx, col in enumerate(windowed_colnames)}
|
|
351
388
|
|
|
352
|
-
def get_index(self,
|
|
389
|
+
def get_index(self, colname: str) -> int:
|
|
353
390
|
"""
|
|
354
391
|
Get the index of a specific column.
|
|
355
392
|
|
|
@@ -366,19 +403,19 @@ class WindowedDataExtractor:
|
|
|
366
403
|
Raises
|
|
367
404
|
------
|
|
368
405
|
ValueError
|
|
369
|
-
If the column is not found in the `
|
|
406
|
+
If the column is not found in the `windowed_colnames` list.
|
|
370
407
|
"""
|
|
371
|
-
if
|
|
372
|
-
raise ValueError(f"Column '{
|
|
373
|
-
return self.column_indices[
|
|
408
|
+
if colname not in self.column_indices:
|
|
409
|
+
raise ValueError(f"Column name '{colname}' not found in windowed_colnames.")
|
|
410
|
+
return self.column_indices[colname]
|
|
374
411
|
|
|
375
|
-
def get_slice(self,
|
|
412
|
+
def get_slice(self, colnames: list[str]) -> slice:
|
|
376
413
|
"""
|
|
377
414
|
Get a slice object for a range of consecutive columns.
|
|
378
415
|
|
|
379
416
|
Parameters
|
|
380
417
|
----------
|
|
381
|
-
|
|
418
|
+
colnames : list of str
|
|
382
419
|
A list of consecutive column names to define the slice.
|
|
383
420
|
|
|
384
421
|
Returns
|
|
@@ -389,11 +426,14 @@ class WindowedDataExtractor:
|
|
|
389
426
|
Raises
|
|
390
427
|
------
|
|
391
428
|
ValueError
|
|
392
|
-
If one or more columns in `
|
|
429
|
+
If one or more columns in `colnames` are not found in the
|
|
430
|
+
`windowed_colnames` list.
|
|
393
431
|
"""
|
|
394
|
-
if not all(col in self.column_indices for col in
|
|
395
|
-
missing = [col for col in
|
|
396
|
-
raise ValueError(
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
432
|
+
if not all(col in self.column_indices for col in colnames):
|
|
433
|
+
missing = [col for col in colnames if col not in self.column_indices]
|
|
434
|
+
raise ValueError(
|
|
435
|
+
f"The following columns are missing from windowed_colnames: {missing}"
|
|
436
|
+
)
|
|
437
|
+
start_idx = self.column_indices[colnames[0]]
|
|
438
|
+
end_idx = self.column_indices[colnames[-1]] + 1
|
|
439
|
+
return slice(start_idx, end_idx)
|