paradigma 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- paradigma/__init__.py +10 -1
- paradigma/classification.py +14 -14
- paradigma/config.py +38 -29
- paradigma/constants.py +10 -2
- paradigma/feature_extraction.py +106 -75
- paradigma/load.py +476 -0
- paradigma/orchestrator.py +670 -0
- paradigma/pipelines/gait_pipeline.py +488 -97
- paradigma/pipelines/pulse_rate_pipeline.py +278 -46
- paradigma/pipelines/pulse_rate_utils.py +176 -137
- paradigma/pipelines/tremor_pipeline.py +292 -72
- paradigma/prepare_data.py +409 -0
- paradigma/preprocessing.py +345 -77
- paradigma/segmenting.py +57 -42
- paradigma/testing.py +14 -9
- paradigma/util.py +36 -22
- paradigma-1.1.0.dist-info/METADATA +229 -0
- paradigma-1.1.0.dist-info/RECORD +26 -0
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/WHEEL +1 -1
- paradigma-1.0.4.dist-info/METADATA +0 -140
- paradigma-1.0.4.dist-info/RECORD +0 -23
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/entry_points.txt +0 -0
- {paradigma-1.0.4.dist-info → paradigma-1.1.0.dist-info}/licenses/LICENSE +0 -0
paradigma/load.py
ADDED
|
@@ -0,0 +1,476 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data loading module for ParaDigMa toolbox.
|
|
3
|
+
|
|
4
|
+
This module provides functions to load sensor data from various formats:
|
|
5
|
+
- Raw data: TSDF (.meta/.bin), Empatica (.avro), Axivity (.CWA)
|
|
6
|
+
- Prepared data: parquet, pickle, csv
|
|
7
|
+
|
|
8
|
+
Based on device_specific_data_loading tutorial.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
import pickle
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from avro.datafile import DataFileReader
|
|
17
|
+
from avro.io import DatumReader
|
|
18
|
+
|
|
19
|
+
from paradigma.util import load_tsdf_dataframe
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def load_tsdf_data(
|
|
25
|
+
data_path: str | Path,
|
|
26
|
+
prefix: str = "IMU",
|
|
27
|
+
) -> tuple[pd.DataFrame, dict, dict]:
|
|
28
|
+
"""
|
|
29
|
+
Load TSDF data from .meta and .bin files.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
data_path : str or Path
|
|
34
|
+
Path to directory containing TSDF files.
|
|
35
|
+
prefix : str, default "IMU"
|
|
36
|
+
Prefix for TSDF files (e.g., "IMU_segment0001").
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
tuple
|
|
41
|
+
Tuple containing (DataFrame with loaded data, time metadata
|
|
42
|
+
dict, values metadata dict)
|
|
43
|
+
"""
|
|
44
|
+
data_path = Path(data_path)
|
|
45
|
+
logger.info(f"Loading TSDF data from {data_path} with prefix '{prefix}'")
|
|
46
|
+
|
|
47
|
+
df, time_meta, values_meta = load_tsdf_dataframe(
|
|
48
|
+
path_to_data=data_path, prefix=prefix
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
logger.info(f"Loaded TSDF data: {df.shape[0]} rows, {df.shape[1]} columns")
|
|
52
|
+
return df, time_meta, values_meta
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def load_empatica_data(
|
|
56
|
+
file_path: str | Path,
|
|
57
|
+
) -> pd.DataFrame:
|
|
58
|
+
"""
|
|
59
|
+
Load Empatica .avro file.
|
|
60
|
+
|
|
61
|
+
Parameters
|
|
62
|
+
----------
|
|
63
|
+
file_path : str or Path
|
|
64
|
+
Path to .avro file.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
pd.DataFrame
|
|
69
|
+
DataFrame with columns: time, time_dt, accelerometer_x/y/z,
|
|
70
|
+
gyroscope_x/y/z (if available).
|
|
71
|
+
"""
|
|
72
|
+
file_path = Path(file_path)
|
|
73
|
+
logger.info(f"Loading Empatica data from {file_path}")
|
|
74
|
+
|
|
75
|
+
with open(file_path, "rb") as f:
|
|
76
|
+
reader = DataFileReader(f, DatumReader())
|
|
77
|
+
empatica_data = next(reader)
|
|
78
|
+
|
|
79
|
+
accel_data = empatica_data["rawData"]["accelerometer"]
|
|
80
|
+
|
|
81
|
+
# Check for gyroscope data
|
|
82
|
+
gyro_data = None
|
|
83
|
+
if (
|
|
84
|
+
"gyroscope" in empatica_data["rawData"]
|
|
85
|
+
and len(empatica_data["rawData"]["gyroscope"]["x"]) > 0
|
|
86
|
+
):
|
|
87
|
+
gyro_data = empatica_data["rawData"]["gyroscope"]
|
|
88
|
+
else:
|
|
89
|
+
raise ValueError("Gyroscope data not found in Empatica file.")
|
|
90
|
+
|
|
91
|
+
# Check Avro schema version for conversion
|
|
92
|
+
avro_version = (
|
|
93
|
+
empatica_data["schemaVersion"]["major"],
|
|
94
|
+
empatica_data["schemaVersion"]["minor"],
|
|
95
|
+
empatica_data["schemaVersion"]["patch"],
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# Convert accelerometer data based on schema version
|
|
99
|
+
if avro_version < (6, 5, 0):
|
|
100
|
+
physical_range = (
|
|
101
|
+
accel_data["imuParams"]["physicalMax"]
|
|
102
|
+
- accel_data["imuParams"]["physicalMin"]
|
|
103
|
+
)
|
|
104
|
+
digital_range = (
|
|
105
|
+
accel_data["imuParams"]["digitalMax"]
|
|
106
|
+
- accel_data["imuParams"]["digitalMin"]
|
|
107
|
+
)
|
|
108
|
+
conversion_factor = physical_range / digital_range
|
|
109
|
+
else:
|
|
110
|
+
conversion_factor = accel_data["imuParams"]["conversionFactor"]
|
|
111
|
+
|
|
112
|
+
accel_x = [val * conversion_factor for val in accel_data["x"]]
|
|
113
|
+
accel_y = [val * conversion_factor for val in accel_data["y"]]
|
|
114
|
+
accel_z = [val * conversion_factor for val in accel_data["z"]]
|
|
115
|
+
|
|
116
|
+
sampling_frequency = accel_data["samplingFrequency"]
|
|
117
|
+
nrows = len(accel_x)
|
|
118
|
+
|
|
119
|
+
# Create time arrays
|
|
120
|
+
t_start = accel_data["timestampStart"]
|
|
121
|
+
t_array = [t_start + i * (1e6 / sampling_frequency) for i in range(nrows)]
|
|
122
|
+
t_from_0_array = [(x - t_array[0]) / 1e6 for x in t_array]
|
|
123
|
+
|
|
124
|
+
# Build DataFrame
|
|
125
|
+
df_data = {
|
|
126
|
+
"time": t_from_0_array,
|
|
127
|
+
"time_dt": pd.to_datetime(t_array, unit="us"),
|
|
128
|
+
"accelerometer_x": accel_x,
|
|
129
|
+
"accelerometer_y": accel_y,
|
|
130
|
+
"accelerometer_z": accel_z,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Add gyroscope data if available
|
|
134
|
+
if gyro_data:
|
|
135
|
+
# Apply same conversion to gyroscope
|
|
136
|
+
gyro_x = [val * conversion_factor for val in gyro_data["x"]]
|
|
137
|
+
gyro_y = [val * conversion_factor for val in gyro_data["y"]]
|
|
138
|
+
gyro_z = [val * conversion_factor for val in gyro_data["z"]]
|
|
139
|
+
|
|
140
|
+
df_data.update(
|
|
141
|
+
{
|
|
142
|
+
"gyroscope_x": gyro_x,
|
|
143
|
+
"gyroscope_y": gyro_y,
|
|
144
|
+
"gyroscope_z": gyro_z,
|
|
145
|
+
}
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
df = pd.DataFrame(df_data)
|
|
149
|
+
|
|
150
|
+
logger.info(f"Loaded Empatica data: {nrows} rows at {sampling_frequency} Hz")
|
|
151
|
+
logger.debug(f"Start time: {pd.to_datetime(t_start, unit='us')}")
|
|
152
|
+
logger.debug(f"Columns: {list(df.columns)}")
|
|
153
|
+
|
|
154
|
+
return df
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def load_axivity_data(
|
|
158
|
+
file_path: str | Path,
|
|
159
|
+
) -> pd.DataFrame:
|
|
160
|
+
"""
|
|
161
|
+
Load Axivity .CWA file.
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
file_path : str or Path
|
|
166
|
+
Path to .CWA file.
|
|
167
|
+
|
|
168
|
+
Returns
|
|
169
|
+
-------
|
|
170
|
+
pd.DataFrame
|
|
171
|
+
DataFrame with columns: time, time_dt, accelerometer_x/y/z,
|
|
172
|
+
gyroscope_x/y/z (if available).
|
|
173
|
+
"""
|
|
174
|
+
try:
|
|
175
|
+
from openmovement.load import CwaData
|
|
176
|
+
except ImportError:
|
|
177
|
+
raise ImportError(
|
|
178
|
+
"openmovement package required for Axivity data loading. "
|
|
179
|
+
"Install with: pip install git+https://github.com/digitalinteraction/openmovement-python.git@master"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
file_path = Path(file_path)
|
|
183
|
+
logger.info(f"Loading Axivity data from {file_path}")
|
|
184
|
+
|
|
185
|
+
with CwaData(
|
|
186
|
+
filename=file_path,
|
|
187
|
+
include_gyro=True, # Set to False for AX3 devices without gyroscope
|
|
188
|
+
include_temperature=False,
|
|
189
|
+
) as cwa_data:
|
|
190
|
+
logger.debug(f"Data format info: {cwa_data.data_format}")
|
|
191
|
+
df = cwa_data.get_samples()
|
|
192
|
+
|
|
193
|
+
# Set time to start at 0 seconds
|
|
194
|
+
df["time_dt"] = df["time"].copy()
|
|
195
|
+
df["time"] = (df["time"] - df["time"].iloc[0]).dt.total_seconds()
|
|
196
|
+
|
|
197
|
+
# Standardize column names
|
|
198
|
+
column_mapping = {}
|
|
199
|
+
if "accel_x" in df.columns:
|
|
200
|
+
column_mapping.update(
|
|
201
|
+
{
|
|
202
|
+
"accel_x": "accelerometer_x",
|
|
203
|
+
"accel_y": "accelerometer_y",
|
|
204
|
+
"accel_z": "accelerometer_z",
|
|
205
|
+
}
|
|
206
|
+
)
|
|
207
|
+
if "gyro_x" in df.columns:
|
|
208
|
+
column_mapping.update(
|
|
209
|
+
{"gyro_x": "gyroscope_x", "gyro_y": "gyroscope_y", "gyro_z": "gyroscope_z"}
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
df = df.rename(columns=column_mapping)
|
|
213
|
+
|
|
214
|
+
logger.info(f"Loaded Axivity data: {df.shape[0]} rows, {df.shape[1]} columns")
|
|
215
|
+
logger.debug(f"Columns: {list(df.columns)}")
|
|
216
|
+
|
|
217
|
+
return df
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def load_prepared_data(
|
|
221
|
+
file_path: str | Path,
|
|
222
|
+
) -> pd.DataFrame:
|
|
223
|
+
"""
|
|
224
|
+
Load prepared data from various formats (parquet, pickle, csv, json).
|
|
225
|
+
If json, expects TSDF format with corresponding .bin files.
|
|
226
|
+
|
|
227
|
+
Parameters
|
|
228
|
+
----------
|
|
229
|
+
file_path : str or Path
|
|
230
|
+
Path to prepared data file.
|
|
231
|
+
|
|
232
|
+
Returns
|
|
233
|
+
-------
|
|
234
|
+
pd.DataFrame
|
|
235
|
+
DataFrame with prepared data.
|
|
236
|
+
"""
|
|
237
|
+
file_path = Path(file_path)
|
|
238
|
+
logger.info(f"Loading prepared data from {file_path}")
|
|
239
|
+
|
|
240
|
+
if not file_path.exists():
|
|
241
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
242
|
+
|
|
243
|
+
# Determine file format and load accordingly
|
|
244
|
+
suffix = file_path.suffix.lower()
|
|
245
|
+
|
|
246
|
+
if suffix == ".parquet":
|
|
247
|
+
df = pd.read_parquet(file_path)
|
|
248
|
+
elif suffix == ".csv":
|
|
249
|
+
df = pd.read_csv(file_path)
|
|
250
|
+
elif suffix in [".pkl", ".pickle"]:
|
|
251
|
+
with open(file_path, "rb") as f:
|
|
252
|
+
df = pickle.load(f)
|
|
253
|
+
elif suffix == ".json":
|
|
254
|
+
# Load TSDF from JSON and corresponding .bin files
|
|
255
|
+
df, _, _ = load_tsdf_dataframe(
|
|
256
|
+
path_to_data=file_path.parent,
|
|
257
|
+
prefix=file_path.stem.replace("_meta", ""),
|
|
258
|
+
)
|
|
259
|
+
else:
|
|
260
|
+
raise ValueError(
|
|
261
|
+
f"Unsupported file format: {suffix}. "
|
|
262
|
+
f"Supported: .parquet, .csv, .pkl, .pickle"
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
logger.info(f"Loaded {file_path.name}: {df.shape[0]} rows, {df.shape[1]} columns")
|
|
266
|
+
logger.debug(f"Columns: {list(df.columns)}")
|
|
267
|
+
|
|
268
|
+
return df
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def detect_file_format(file_path: str | Path) -> str:
|
|
272
|
+
"""
|
|
273
|
+
Detect the format of a data file based on its extension.
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
file_path : str or Path
|
|
278
|
+
Path to data file
|
|
279
|
+
|
|
280
|
+
Returns
|
|
281
|
+
-------
|
|
282
|
+
str
|
|
283
|
+
File format: 'json', 'empatica', 'axivity', 'prepared'
|
|
284
|
+
"""
|
|
285
|
+
file_path = Path(file_path)
|
|
286
|
+
suffix = file_path.suffix.lower()
|
|
287
|
+
|
|
288
|
+
if suffix == ".json":
|
|
289
|
+
return "tsdf"
|
|
290
|
+
elif suffix == ".avro":
|
|
291
|
+
return "empatica"
|
|
292
|
+
elif suffix == ".cwa":
|
|
293
|
+
return "axivity"
|
|
294
|
+
elif suffix in [".parquet", ".csv", ".pkl", ".pickle"]:
|
|
295
|
+
return "prepared"
|
|
296
|
+
else:
|
|
297
|
+
raise ValueError(f"Unknown file format: {suffix}")
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def get_data_file_paths(
|
|
301
|
+
data_path: str | Path,
|
|
302
|
+
file_patterns: list[str] | str | None = None,
|
|
303
|
+
) -> list[Path]:
|
|
304
|
+
"""
|
|
305
|
+
Get list of data file paths without loading them.
|
|
306
|
+
|
|
307
|
+
This function is useful for memory-efficient processing where you want to
|
|
308
|
+
load and process files one at a time instead of loading all at once.
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
data_path : str or Path
|
|
313
|
+
Path to directory containing data files
|
|
314
|
+
file_patterns : str or list of str, optional
|
|
315
|
+
File extensions to consider (e.g. ["parquet", "csv", "cwa"]).
|
|
316
|
+
If None, all supported formats are considered.
|
|
317
|
+
|
|
318
|
+
Returns
|
|
319
|
+
-------
|
|
320
|
+
list of Path
|
|
321
|
+
List of file paths found in the directory
|
|
322
|
+
"""
|
|
323
|
+
data_path = Path(data_path)
|
|
324
|
+
|
|
325
|
+
if not data_path.exists():
|
|
326
|
+
raise FileNotFoundError(f"Directory not found: {data_path}")
|
|
327
|
+
|
|
328
|
+
valid_file_patterns = ["parquet", "csv", "pkl", "pickle", "json", "avro", "cwa"]
|
|
329
|
+
|
|
330
|
+
if file_patterns is None:
|
|
331
|
+
file_patterns = valid_file_patterns
|
|
332
|
+
elif isinstance(file_patterns, str):
|
|
333
|
+
file_patterns = [file_patterns]
|
|
334
|
+
|
|
335
|
+
# Collect candidate files
|
|
336
|
+
all_files = [
|
|
337
|
+
f
|
|
338
|
+
for f in data_path.iterdir()
|
|
339
|
+
if f.is_file() and f.suffix[1:].lower() in file_patterns
|
|
340
|
+
]
|
|
341
|
+
|
|
342
|
+
logger.info(f"Found {len(all_files)} data files in {data_path}")
|
|
343
|
+
|
|
344
|
+
return all_files
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def load_single_data_file(
|
|
348
|
+
file_path: str | Path,
|
|
349
|
+
) -> tuple[str, pd.DataFrame]:
|
|
350
|
+
"""
|
|
351
|
+
Load a single data file with automatic format detection.
|
|
352
|
+
|
|
353
|
+
Parameters
|
|
354
|
+
----------
|
|
355
|
+
file_path : str or Path
|
|
356
|
+
Path to data file
|
|
357
|
+
|
|
358
|
+
Returns
|
|
359
|
+
-------
|
|
360
|
+
tuple
|
|
361
|
+
Tuple of (file_key, DataFrame) where file_key is the file name without extension
|
|
362
|
+
"""
|
|
363
|
+
file_path = Path(file_path)
|
|
364
|
+
|
|
365
|
+
if not file_path.exists():
|
|
366
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
367
|
+
|
|
368
|
+
try:
|
|
369
|
+
file_format = detect_file_format(file_path)
|
|
370
|
+
|
|
371
|
+
if file_format == "tsdf":
|
|
372
|
+
# For TSDF, load based on .meta file and infer prefix
|
|
373
|
+
if file_path.suffix.lower() == ".json":
|
|
374
|
+
prefix = file_path.stem.replace("_meta", "")
|
|
375
|
+
df, _, _ = load_tsdf_data(file_path.parent, prefix)
|
|
376
|
+
return prefix, df
|
|
377
|
+
|
|
378
|
+
elif file_format == "empatica":
|
|
379
|
+
df = load_empatica_data(file_path)
|
|
380
|
+
return file_path.stem, df
|
|
381
|
+
|
|
382
|
+
elif file_format == "axivity":
|
|
383
|
+
df = load_axivity_data(file_path)
|
|
384
|
+
return file_path.stem, df
|
|
385
|
+
|
|
386
|
+
elif file_format == "prepared":
|
|
387
|
+
df = load_prepared_data(file_path)
|
|
388
|
+
prefix = file_path.stem.replace("_meta", "")
|
|
389
|
+
return prefix, df
|
|
390
|
+
|
|
391
|
+
else:
|
|
392
|
+
raise ValueError(f"Unknown file format for {file_path}")
|
|
393
|
+
|
|
394
|
+
except Exception as e:
|
|
395
|
+
logger.error(f"Failed to load {file_path}: {e}")
|
|
396
|
+
raise
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def load_data_files(
|
|
400
|
+
data_path: str | Path,
|
|
401
|
+
file_patterns: list[str] | None = None,
|
|
402
|
+
) -> dict[str, pd.DataFrame]:
|
|
403
|
+
"""
|
|
404
|
+
Load all data files from a directory with automatic format detection.
|
|
405
|
+
|
|
406
|
+
Note: This function loads all files into memory at once. For large datasets,
|
|
407
|
+
consider using get_data_file_paths() and load_single_data_file() to process
|
|
408
|
+
files one at a time.
|
|
409
|
+
|
|
410
|
+
Parameters
|
|
411
|
+
----------
|
|
412
|
+
data_path : str or Path
|
|
413
|
+
Path to directory containing data files
|
|
414
|
+
file_patterns : str or list of str, optional
|
|
415
|
+
File extensions to consider (e.g. ["parquet", "csv", "cwa"]).
|
|
416
|
+
If None, all supported formats are considered.
|
|
417
|
+
|
|
418
|
+
Returns
|
|
419
|
+
-------
|
|
420
|
+
dict
|
|
421
|
+
Dictionary mapping file names (without extension) to DataFrames
|
|
422
|
+
"""
|
|
423
|
+
# Get all file paths
|
|
424
|
+
all_files = get_data_file_paths(data_path, file_patterns)
|
|
425
|
+
|
|
426
|
+
loaded_files = {}
|
|
427
|
+
|
|
428
|
+
# Load each file
|
|
429
|
+
for file_path in all_files:
|
|
430
|
+
try:
|
|
431
|
+
file_key, df = load_single_data_file(file_path)
|
|
432
|
+
loaded_files[file_key] = df
|
|
433
|
+
except Exception as e:
|
|
434
|
+
logger.warning(f"Failed to load {file_path}: {e}")
|
|
435
|
+
|
|
436
|
+
if len(loaded_files) == 0:
|
|
437
|
+
logger.warning("No data files were loaded.")
|
|
438
|
+
else:
|
|
439
|
+
logger.info(f"Successfully loaded {len(loaded_files)} files")
|
|
440
|
+
|
|
441
|
+
return loaded_files
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def save_prepared_data(
|
|
445
|
+
df: pd.DataFrame,
|
|
446
|
+
file_path: str | Path,
|
|
447
|
+
file_format: str = "parquet",
|
|
448
|
+
) -> None:
|
|
449
|
+
"""
|
|
450
|
+
Save prepared data to file.
|
|
451
|
+
|
|
452
|
+
Parameters
|
|
453
|
+
----------
|
|
454
|
+
df : pd.DataFrame
|
|
455
|
+
DataFrame to save
|
|
456
|
+
file_path : str or Path
|
|
457
|
+
Path for output file
|
|
458
|
+
file_format : str, default 'parquet'
|
|
459
|
+
Output format: 'parquet', 'csv', 'pickle'
|
|
460
|
+
"""
|
|
461
|
+
file_path = Path(file_path)
|
|
462
|
+
|
|
463
|
+
# Ensure directory exists
|
|
464
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
465
|
+
|
|
466
|
+
if file_format == "parquet":
|
|
467
|
+
df.to_parquet(file_path, index=False)
|
|
468
|
+
elif file_format == "csv":
|
|
469
|
+
df.to_csv(file_path, index=False)
|
|
470
|
+
elif file_format == "pickle":
|
|
471
|
+
with open(file_path, "wb") as f:
|
|
472
|
+
pickle.dump(df, f)
|
|
473
|
+
else:
|
|
474
|
+
raise ValueError(f"Unsupported file_format: {file_format}")
|
|
475
|
+
|
|
476
|
+
logger.info(f"Saved prepared data to {file_path}")
|