mcp-server-mcsa 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,428 +1,429 @@
1
- """File I/O for loading real‑world motor‑current signals.
2
-
3
- Supports CSV, WAV, NumPy binary (.npy), and TDMS formats commonly used
4
- in industrial data‑acquisition systems. Each loader returns a standardised
5
- dictionary with ``signal``, ``sampling_freq_hz``, ``n_samples``, ``duration_s``
6
- plus format‑specific metadata.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import csv
12
- import os
13
- import struct
14
- import wave
15
- from pathlib import Path
16
-
17
- import numpy as np
18
- from numpy.typing import NDArray
19
-
20
-
21
- # ---------------------------------------------------------------------------
22
- # CSV loader
23
- # ---------------------------------------------------------------------------
24
-
25
- def load_csv(
26
- file_path: str,
27
- signal_column: int | str = 1,
28
- time_column: int | str | None = 0,
29
- sampling_freq_hz: float | None = None,
30
- delimiter: str = ",",
31
- skip_header: int = 1,
32
- max_rows: int | None = None,
33
- ) -> dict:
34
- """Load a current signal from a CSV / TSV file.
35
-
36
- The CSV can contain a time column and one or more data columns. The
37
- loader auto‑detects the sampling frequency from the time column unless
38
- ``sampling_freq_hz`` is provided explicitly.
39
-
40
- Args:
41
- file_path: Absolute or relative path to the CSV file.
42
- signal_column: Column index (0‑based int) or header name containing
43
- the current signal.
44
- time_column: Column index or header name for time (seconds).
45
- Set to ``None`` if the file has no time column (then
46
- ``sampling_freq_hz`` is required).
47
- sampling_freq_hz: Explicit sampling frequency. If ``None``, it is
48
- inferred from the time column.
49
- delimiter: Column delimiter (default ``","``).
50
- skip_header: Number of header rows to skip (default 1).
51
- max_rows: Maximum number of data rows to read (``None`` → all).
52
-
53
- Returns:
54
- ``dict`` with keys: ``signal``, ``time_s`` (or ``None``),
55
- ``sampling_freq_hz``, ``n_samples``, ``duration_s``, ``file_path``,
56
- ``format``.
57
-
58
- Raises:
59
- FileNotFoundError: If the file does not exist.
60
- ValueError: If sampling frequency cannot be determined.
61
- """
62
- path = Path(file_path).resolve()
63
- if not path.exists():
64
- raise FileNotFoundError(f"File not found: {path}")
65
-
66
- # ---- Read raw rows ----
67
- rows: list[list[str]] = []
68
- with open(path, newline="", encoding="utf-8-sig") as fh:
69
- reader = csv.reader(fh, delimiter=delimiter)
70
- header_row: list[str] | None = None
71
- for i, row in enumerate(reader):
72
- if i < skip_header:
73
- header_row = row
74
- continue
75
- if max_rows is not None and len(rows) >= max_rows:
76
- break
77
- rows.append(row)
78
-
79
- if not rows:
80
- raise ValueError(f"No data rows found in {path}")
81
-
82
- # ---- Resolve column indices ----
83
- def _resolve_col(col: int | str | None, header: list[str] | None) -> int | None:
84
- if col is None:
85
- return None
86
- if isinstance(col, int):
87
- return col
88
- if header is not None:
89
- stripped = [h.strip() for h in header]
90
- if col in stripped:
91
- return stripped.index(col)
92
- raise ValueError(f"Column '{col}' not found in header: {header}")
93
-
94
- sig_idx = _resolve_col(signal_column, header_row)
95
- time_idx = _resolve_col(time_column, header_row)
96
-
97
- # ---- Parse numeric data ----
98
- signal_vals: list[float] = []
99
- time_vals: list[float] | None = [] if time_idx is not None else None
100
-
101
- for row in rows:
102
- try:
103
- signal_vals.append(float(row[sig_idx])) # type: ignore[index]
104
- if time_vals is not None and time_idx is not None:
105
- time_vals.append(float(row[time_idx]))
106
- except (IndexError, ValueError):
107
- continue # skip malformed rows
108
-
109
- signal = np.array(signal_vals, dtype=np.float64)
110
-
111
- # ---- Infer sampling frequency ----
112
- time_arr: NDArray[np.floating] | None = None
113
- if time_vals is not None and len(time_vals) > 1:
114
- time_arr = np.array(time_vals, dtype=np.float64)
115
- if sampling_freq_hz is None:
116
- dt = np.median(np.diff(time_arr))
117
- if dt <= 0:
118
- raise ValueError("Time column is not monotonically increasing")
119
- sampling_freq_hz = 1.0 / dt
120
-
121
- if sampling_freq_hz is None:
122
- raise ValueError(
123
- "Cannot determine sampling frequency — provide sampling_freq_hz "
124
- "or include a time column in the CSV."
125
- )
126
-
127
- n = len(signal)
128
- duration = n / sampling_freq_hz
129
-
130
- return {
131
- "signal": signal.tolist(),
132
- "time_s": time_arr.tolist() if time_arr is not None else None,
133
- "sampling_freq_hz": float(sampling_freq_hz),
134
- "n_samples": n,
135
- "duration_s": round(duration, 6),
136
- "file_path": str(path),
137
- "format": "csv",
138
- }
139
-
140
-
141
- # ---------------------------------------------------------------------------
142
- # WAV loader
143
- # ---------------------------------------------------------------------------
144
-
145
- def load_wav(
146
- file_path: str,
147
- channel: int = 0,
148
- ) -> dict:
149
- """Load a current signal from a WAV audio file.
150
-
151
- Many portable DAQ systems and low‑cost recorders save data as WAV.
152
- The signal is normalised to the full‑scale range of the bit depth.
153
-
154
- Args:
155
- file_path: Path to the WAV file.
156
- channel: Channel index for multi‑channel files (default 0).
157
-
158
- Returns:
159
- Standardised signal dict.
160
-
161
- Raises:
162
- FileNotFoundError: If the file does not exist.
163
- """
164
- path = Path(file_path).resolve()
165
- if not path.exists():
166
- raise FileNotFoundError(f"File not found: {path}")
167
-
168
- with wave.open(str(path), "rb") as wf:
169
- n_channels = wf.getnchannels()
170
- sampwidth = wf.getsampwidth()
171
- fs = wf.getframerate()
172
- n_frames = wf.getnframes()
173
- raw = wf.readframes(n_frames)
174
-
175
- # Decode to numpy
176
- if sampwidth == 1:
177
- dtype = np.uint8
178
- max_val = 128.0
179
- offset = 128
180
- elif sampwidth == 2:
181
- dtype = np.int16
182
- max_val = 32768.0
183
- offset = 0
184
- elif sampwidth == 3:
185
- # 24-bit — unpack manually
186
- n_samples_total = len(raw) // 3
187
- unpacked = []
188
- for i in range(n_samples_total):
189
- b = raw[3 * i : 3 * i + 3]
190
- val = struct.unpack("<i", b + (b"\xff" if b[2] & 0x80 else b"\x00"))[0]
191
- unpacked.append(val)
192
- data = np.array(unpacked, dtype=np.float64)
193
- max_val = 8388608.0
194
- offset = 0
195
- dtype = None # handled above
196
- elif sampwidth == 4:
197
- dtype = np.int32
198
- max_val = 2147483648.0
199
- offset = 0
200
- else:
201
- raise ValueError(f"Unsupported WAV sample width: {sampwidth}")
202
-
203
- if dtype is not None:
204
- data = np.frombuffer(raw, dtype=dtype).astype(np.float64) - offset
205
-
206
- # De‑interleave channels
207
- if n_channels > 1:
208
- data = data.reshape(-1, n_channels)
209
- if channel >= n_channels:
210
- raise ValueError(f"Channel {channel} out of range (file has {n_channels})")
211
- data = data[:, channel]
212
-
213
- # Normalise to ±1
214
- signal = data / max_val
215
-
216
- n = len(signal)
217
- duration = n / fs
218
-
219
- return {
220
- "signal": signal.tolist(),
221
- "time_s": None,
222
- "sampling_freq_hz": float(fs),
223
- "n_samples": n,
224
- "duration_s": round(duration, 6),
225
- "file_path": str(path),
226
- "format": "wav",
227
- "metadata": {
228
- "channels": n_channels,
229
- "selected_channel": channel,
230
- "sample_width_bytes": sampwidth,
231
- "bit_depth": sampwidth * 8,
232
- },
233
- }
234
-
235
-
236
- # ---------------------------------------------------------------------------
237
- # NumPy binary loader
238
- # ---------------------------------------------------------------------------
239
-
240
- def load_npy(
241
- file_path: str,
242
- sampling_freq_hz: float,
243
- column: int = 0,
244
- ) -> dict:
245
- """Load a current signal from a NumPy ``.npy`` binary file.
246
-
247
- Args:
248
- file_path: Path to the ``.npy`` file.
249
- sampling_freq_hz: Sampling frequency (must be provided for raw arrays).
250
- column: If the array is 2‑D, select this column.
251
-
252
- Returns:
253
- Standardised signal dict.
254
- """
255
- path = Path(file_path).resolve()
256
- if not path.exists():
257
- raise FileNotFoundError(f"File not found: {path}")
258
-
259
- arr = np.load(str(path))
260
-
261
- if arr.ndim == 2:
262
- if column >= arr.shape[1]:
263
- raise ValueError(f"Column {column} out of range (array has {arr.shape[1]} cols)")
264
- signal = arr[:, column].astype(np.float64)
265
- elif arr.ndim == 1:
266
- signal = arr.astype(np.float64)
267
- else:
268
- raise ValueError(f"Expected 1‑D or 2‑D array, got {arr.ndim}‑D")
269
-
270
- n = len(signal)
271
- duration = n / sampling_freq_hz
272
-
273
- return {
274
- "signal": signal.tolist(),
275
- "time_s": None,
276
- "sampling_freq_hz": float(sampling_freq_hz),
277
- "n_samples": n,
278
- "duration_s": round(duration, 6),
279
- "file_path": str(path),
280
- "format": "npy",
281
- "metadata": {
282
- "original_shape": list(arr.shape),
283
- "selected_column": column if arr.ndim == 2 else None,
284
- },
285
- }
286
-
287
-
288
- # ---------------------------------------------------------------------------
289
- # Unified loader (auto‑detect by extension)
290
- # ---------------------------------------------------------------------------
291
-
292
- SUPPORTED_EXTENSIONS = {".csv", ".tsv", ".txt", ".wav", ".npy"}
293
-
294
-
295
- def load_signal(
296
- file_path: str,
297
- sampling_freq_hz: float | None = None,
298
- signal_column: int | str = 1,
299
- time_column: int | str | None = 0,
300
- delimiter: str | None = None,
301
- channel: int = 0,
302
- skip_header: int = 1,
303
- max_rows: int | None = None,
304
- ) -> dict:
305
- """Auto‑detect file format and load a motor‑current signal.
306
-
307
- Dispatches to the appropriate specialised loader based on the file
308
- extension.
309
-
310
- Args:
311
- file_path: Path to the signal file.
312
- sampling_freq_hz: Sampling frequency (required for .npy; optional
313
- for CSV if a time column is present; auto‑detected for WAV).
314
- signal_column: CSV column (index or name) containing the signal.
315
- time_column: CSV column for time. ``None`` → no time column.
316
- delimiter: CSV delimiter. ``None`` → auto‑detect (``,`` for .csv,
317
- ``\\t`` for .tsv/.txt).
318
- channel: WAV channel index.
319
- skip_header: CSV header rows to skip.
320
- max_rows: Maximum rows to read from CSV.
321
-
322
- Returns:
323
- Standardised signal dictionary.
324
- """
325
- path = Path(file_path).resolve()
326
- ext = path.suffix.lower()
327
-
328
- if ext in (".csv", ".tsv", ".txt"):
329
- if delimiter is None:
330
- delimiter = "\t" if ext in (".tsv", ".txt") else ","
331
- return load_csv(
332
- str(path),
333
- signal_column=signal_column,
334
- time_column=time_column,
335
- sampling_freq_hz=sampling_freq_hz,
336
- delimiter=delimiter,
337
- skip_header=skip_header,
338
- max_rows=max_rows,
339
- )
340
- elif ext == ".wav":
341
- return load_wav(str(path), channel=channel)
342
- elif ext == ".npy":
343
- if sampling_freq_hz is None:
344
- raise ValueError("sampling_freq_hz is required for .npy files")
345
- return load_npy(str(path), sampling_freq_hz)
346
- else:
347
- raise ValueError(
348
- f"Unsupported file format: '{ext}'. "
349
- f"Supported: {', '.join(sorted(SUPPORTED_EXTENSIONS))}"
350
- )
351
-
352
-
353
- def get_signal_file_info(file_path: str) -> dict:
354
- """Return file metadata without fully loading the signal.
355
-
356
- Useful for inspecting large files before loading.
357
-
358
- Args:
359
- file_path: Path to the signal file.
360
-
361
- Returns:
362
- Dictionary with file size, format, estimated samples, etc.
363
- """
364
- path = Path(file_path).resolve()
365
- if not path.exists():
366
- raise FileNotFoundError(f"File not found: {path}")
367
-
368
- ext = path.suffix.lower()
369
- size_bytes = path.stat().st_size
370
- info: dict = {
371
- "file_path": str(path),
372
- "file_name": path.name,
373
- "extension": ext,
374
- "size_bytes": size_bytes,
375
- "size_mb": round(size_bytes / (1024 * 1024), 2),
376
- }
377
-
378
- if ext == ".wav":
379
- try:
380
- with wave.open(str(path), "rb") as wf:
381
- info["format"] = "wav"
382
- info["channels"] = wf.getnchannels()
383
- info["sampling_freq_hz"] = wf.getframerate()
384
- info["n_samples"] = wf.getnframes()
385
- info["duration_s"] = round(wf.getnframes() / wf.getframerate(), 3)
386
- info["sample_width_bytes"] = wf.getsampwidth()
387
- info["bit_depth"] = wf.getsampwidth() * 8
388
- except Exception as e:
389
- info["error"] = str(e)
390
- elif ext == ".npy":
391
- try:
392
- # Read only the header — try public API first, fall back to private
393
- with open(path, "rb") as fh:
394
- version = np.lib.format.read_magic(fh)
395
- try:
396
- shape, fortran, dtype = np.lib.format._read_array_header(fh, version) # type: ignore[attr-defined]
397
- except AttributeError:
398
- # NumPy >= 2.0 removed the private helper
399
- fh.seek(0)
400
- _ = np.lib.format.read_magic(fh)
401
- header = np.lib.format.read_array_header_1_0(fh) if version[0] == 1 else np.lib.format.read_array_header_2_0(fh)
402
- shape, fortran, dtype = header
403
- info["format"] = "npy"
404
- info["shape"] = list(shape)
405
- info["dtype"] = str(dtype)
406
- info["n_samples"] = shape[0] if len(shape) >= 1 else 0
407
- except Exception as e:
408
- info["error"] = str(e)
409
- elif ext in (".csv", ".tsv", ".txt"):
410
- try:
411
- # Count lines and peek at header
412
- with open(path, encoding="utf-8-sig") as fh:
413
- first_line = fh.readline().strip()
414
- second_line = fh.readline().strip()
415
- line_count = 2
416
- for _ in fh:
417
- line_count += 1
418
- info["format"] = "csv"
419
- info["header_line"] = first_line
420
- info["sample_data_line"] = second_line
421
- info["total_lines"] = line_count
422
- info["estimated_data_rows"] = max(0, line_count - 1)
423
- except Exception as e:
424
- info["error"] = str(e)
425
- else:
426
- info["format"] = "unknown"
427
-
428
- return info
1
+ """File I/O for loading real‑world motor‑current signals.
2
+
3
+ Supports CSV, WAV, NumPy binary (.npy), and TDMS formats commonly used
4
+ in industrial data‑acquisition systems. Each loader returns a standardised
5
+ dictionary with ``signal``, ``sampling_freq_hz``, ``n_samples``, ``duration_s``
6
+ plus format‑specific metadata.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import csv
12
+ import struct
13
+ import wave
14
+ from pathlib import Path
15
+
16
+ import numpy as np
17
+ from numpy.typing import NDArray
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # CSV loader
21
+ # ---------------------------------------------------------------------------
22
+
23
+ def load_csv(
24
+ file_path: str,
25
+ signal_column: int | str = 1,
26
+ time_column: int | str | None = 0,
27
+ sampling_freq_hz: float | None = None,
28
+ delimiter: str = ",",
29
+ skip_header: int = 1,
30
+ max_rows: int | None = None,
31
+ ) -> dict:
32
+ """Load a current signal from a CSV / TSV file.
33
+
34
+ The CSV can contain a time column and one or more data columns. The
35
+ loader auto‑detects the sampling frequency from the time column unless
36
+ ``sampling_freq_hz`` is provided explicitly.
37
+
38
+ Args:
39
+ file_path: Absolute or relative path to the CSV file.
40
+ signal_column: Column index (0‑based int) or header name containing
41
+ the current signal.
42
+ time_column: Column index or header name for time (seconds).
43
+ Set to ``None`` if the file has no time column (then
44
+ ``sampling_freq_hz`` is required).
45
+ sampling_freq_hz: Explicit sampling frequency. If ``None``, it is
46
+ inferred from the time column.
47
+ delimiter: Column delimiter (default ``","``).
48
+ skip_header: Number of header rows to skip (default 1).
49
+ max_rows: Maximum number of data rows to read (``None`` → all).
50
+
51
+ Returns:
52
+ ``dict`` with keys: ``signal``, ``time_s`` (or ``None``),
53
+ ``sampling_freq_hz``, ``n_samples``, ``duration_s``, ``file_path``,
54
+ ``format``.
55
+
56
+ Raises:
57
+ FileNotFoundError: If the file does not exist.
58
+ ValueError: If sampling frequency cannot be determined.
59
+ """
60
+ path = Path(file_path).resolve()
61
+ if not path.exists():
62
+ raise FileNotFoundError(f"File not found: {path}")
63
+
64
+ # ---- Read raw rows ----
65
+ rows: list[list[str]] = []
66
+ with open(path, newline="", encoding="utf-8-sig") as fh:
67
+ reader = csv.reader(fh, delimiter=delimiter)
68
+ header_row: list[str] | None = None
69
+ for i, row in enumerate(reader):
70
+ if i < skip_header:
71
+ header_row = row
72
+ continue
73
+ if max_rows is not None and len(rows) >= max_rows:
74
+ break
75
+ rows.append(row)
76
+
77
+ if not rows:
78
+ raise ValueError(f"No data rows found in {path}")
79
+
80
+ # ---- Resolve column indices ----
81
+ def _resolve_col(col: int | str | None, header: list[str] | None) -> int | None:
82
+ if col is None:
83
+ return None
84
+ if isinstance(col, int):
85
+ return col
86
+ if header is not None:
87
+ stripped = [h.strip() for h in header]
88
+ if col in stripped:
89
+ return stripped.index(col)
90
+ raise ValueError(f"Column '{col}' not found in header: {header}")
91
+
92
+ sig_idx = _resolve_col(signal_column, header_row)
93
+ time_idx = _resolve_col(time_column, header_row)
94
+
95
+ # ---- Parse numeric data ----
96
+ signal_vals: list[float] = []
97
+ time_vals: list[float] | None = [] if time_idx is not None else None
98
+
99
+ for row in rows:
100
+ try:
101
+ signal_vals.append(float(row[sig_idx])) # type: ignore[index]
102
+ if time_vals is not None and time_idx is not None:
103
+ time_vals.append(float(row[time_idx]))
104
+ except (IndexError, ValueError):
105
+ continue # skip malformed rows
106
+
107
+ signal = np.array(signal_vals, dtype=np.float64)
108
+
109
+ # ---- Infer sampling frequency ----
110
+ time_arr: NDArray[np.floating] | None = None
111
+ if time_vals is not None and len(time_vals) > 1:
112
+ time_arr = np.array(time_vals, dtype=np.float64)
113
+ if sampling_freq_hz is None:
114
+ dt = np.median(np.diff(time_arr))
115
+ if dt <= 0:
116
+ raise ValueError("Time column is not monotonically increasing")
117
+ sampling_freq_hz = float(1.0 / dt)
118
+
119
+ if sampling_freq_hz is None:
120
+ raise ValueError(
121
+ "Cannot determine sampling frequency — provide sampling_freq_hz "
122
+ "or include a time column in the CSV."
123
+ )
124
+
125
+ n = len(signal)
126
+ duration = n / sampling_freq_hz
127
+
128
+ return {
129
+ "signal": signal.tolist(),
130
+ "time_s": time_arr.tolist() if time_arr is not None else None,
131
+ "sampling_freq_hz": float(sampling_freq_hz),
132
+ "n_samples": n,
133
+ "duration_s": round(duration, 6),
134
+ "file_path": str(path),
135
+ "format": "csv",
136
+ }
137
+
138
+
139
+ # ---------------------------------------------------------------------------
140
+ # WAV loader
141
+ # ---------------------------------------------------------------------------
142
+
143
+ def load_wav(
144
+ file_path: str,
145
+ channel: int = 0,
146
+ ) -> dict:
147
+ """Load a current signal from a WAV audio file.
148
+
149
+ Many portable DAQ systems and low‑cost recorders save data as WAV.
150
+ The signal is normalised to the full‑scale range of the bit depth.
151
+
152
+ Args:
153
+ file_path: Path to the WAV file.
154
+ channel: Channel index for multi‑channel files (default 0).
155
+
156
+ Returns:
157
+ Standardised signal dict.
158
+
159
+ Raises:
160
+ FileNotFoundError: If the file does not exist.
161
+ """
162
+ path = Path(file_path).resolve()
163
+ if not path.exists():
164
+ raise FileNotFoundError(f"File not found: {path}")
165
+
166
+ with wave.open(str(path), "rb") as wf:
167
+ n_channels = wf.getnchannels()
168
+ sampwidth = wf.getsampwidth()
169
+ fs = wf.getframerate()
170
+ n_frames = wf.getnframes()
171
+ raw = wf.readframes(n_frames)
172
+
173
+ # Decode to numpy
174
+ if sampwidth == 1:
175
+ dtype = np.uint8
176
+ max_val = 128.0
177
+ offset = 128
178
+ elif sampwidth == 2:
179
+ dtype = np.int16
180
+ max_val = 32768.0
181
+ offset = 0
182
+ elif sampwidth == 3:
183
+ # 24-bit — unpack manually
184
+ n_samples_total = len(raw) // 3
185
+ unpacked = []
186
+ for i in range(n_samples_total):
187
+ b = raw[3 * i : 3 * i + 3]
188
+ val = struct.unpack("<i", b + (b"\xff" if b[2] & 0x80 else b"\x00"))[0]
189
+ unpacked.append(val)
190
+ data = np.array(unpacked, dtype=np.float64)
191
+ max_val = 8388608.0
192
+ offset = 0
193
+ dtype = None # handled above
194
+ elif sampwidth == 4:
195
+ dtype = np.int32
196
+ max_val = 2147483648.0
197
+ offset = 0
198
+ else:
199
+ raise ValueError(f"Unsupported WAV sample width: {sampwidth}")
200
+
201
+ if dtype is not None:
202
+ data = np.frombuffer(raw, dtype=dtype).astype(np.float64) - offset
203
+
204
+ # De‑interleave channels
205
+ if n_channels > 1:
206
+ data = data.reshape(-1, n_channels)
207
+ if channel >= n_channels:
208
+ raise ValueError(f"Channel {channel} out of range (file has {n_channels})")
209
+ data = data[:, channel]
210
+
211
+ # Normalise to ±1
212
+ signal_arr: NDArray[np.floating] = data / max_val # type: ignore[assignment]
213
+
214
+ n = len(signal_arr)
215
+ duration = n / fs
216
+
217
+ return {
218
+ "signal": signal_arr.tolist(),
219
+ "time_s": None,
220
+ "sampling_freq_hz": float(fs),
221
+ "n_samples": n,
222
+ "duration_s": round(duration, 6),
223
+ "file_path": str(path),
224
+ "format": "wav",
225
+ "metadata": {
226
+ "channels": n_channels,
227
+ "selected_channel": channel,
228
+ "sample_width_bytes": sampwidth,
229
+ "bit_depth": sampwidth * 8,
230
+ },
231
+ }
232
+
233
+
234
+ # ---------------------------------------------------------------------------
235
+ # NumPy binary loader
236
+ # ---------------------------------------------------------------------------
237
+
238
+ def load_npy(
239
+ file_path: str,
240
+ sampling_freq_hz: float,
241
+ column: int = 0,
242
+ ) -> dict:
243
+ """Load a current signal from a NumPy ``.npy`` binary file.
244
+
245
+ Args:
246
+ file_path: Path to the ``.npy`` file.
247
+ sampling_freq_hz: Sampling frequency (must be provided for raw arrays).
248
+ column: If the array is 2‑D, select this column.
249
+
250
+ Returns:
251
+ Standardised signal dict.
252
+ """
253
+ path = Path(file_path).resolve()
254
+ if not path.exists():
255
+ raise FileNotFoundError(f"File not found: {path}")
256
+
257
+ arr = np.load(str(path))
258
+
259
+ if arr.ndim == 2:
260
+ if column >= arr.shape[1]:
261
+ raise ValueError(f"Column {column} out of range (array has {arr.shape[1]} cols)")
262
+ signal = arr[:, column].astype(np.float64)
263
+ elif arr.ndim == 1:
264
+ signal = arr.astype(np.float64)
265
+ else:
266
+ raise ValueError(f"Expected 1‑D or 2‑D array, got {arr.ndim}‑D")
267
+
268
+ n = len(signal)
269
+ duration = n / sampling_freq_hz
270
+
271
+ return {
272
+ "signal": signal.tolist(),
273
+ "time_s": None,
274
+ "sampling_freq_hz": float(sampling_freq_hz),
275
+ "n_samples": n,
276
+ "duration_s": round(duration, 6),
277
+ "file_path": str(path),
278
+ "format": "npy",
279
+ "metadata": {
280
+ "original_shape": list(arr.shape),
281
+ "selected_column": column if arr.ndim == 2 else None,
282
+ },
283
+ }
284
+
285
+
286
+ # ---------------------------------------------------------------------------
287
+ # Unified loader (auto‑detect by extension)
288
+ # ---------------------------------------------------------------------------
289
+
290
+ SUPPORTED_EXTENSIONS = {".csv", ".tsv", ".txt", ".wav", ".npy"}
291
+
292
+
293
+ def load_signal(
294
+ file_path: str,
295
+ sampling_freq_hz: float | None = None,
296
+ signal_column: int | str = 1,
297
+ time_column: int | str | None = 0,
298
+ delimiter: str | None = None,
299
+ channel: int = 0,
300
+ skip_header: int = 1,
301
+ max_rows: int | None = None,
302
+ ) -> dict:
303
+ """Auto‑detect file format and load a motor‑current signal.
304
+
305
+ Dispatches to the appropriate specialised loader based on the file
306
+ extension.
307
+
308
+ Args:
309
+ file_path: Path to the signal file.
310
+ sampling_freq_hz: Sampling frequency (required for .npy; optional
311
+ for CSV if a time column is present; auto‑detected for WAV).
312
+ signal_column: CSV column (index or name) containing the signal.
313
+ time_column: CSV column for time. ``None`` no time column.
314
+ delimiter: CSV delimiter. ``None`` auto‑detect (``,`` for .csv,
315
+ ``\\t`` for .tsv/.txt).
316
+ channel: WAV channel index.
317
+ skip_header: CSV header rows to skip.
318
+ max_rows: Maximum rows to read from CSV.
319
+
320
+ Returns:
321
+ Standardised signal dictionary.
322
+ """
323
+ path = Path(file_path).resolve()
324
+ ext = path.suffix.lower()
325
+
326
+ if ext in (".csv", ".tsv", ".txt"):
327
+ if delimiter is None:
328
+ delimiter = "\t" if ext in (".tsv", ".txt") else ","
329
+ return load_csv(
330
+ str(path),
331
+ signal_column=signal_column,
332
+ time_column=time_column,
333
+ sampling_freq_hz=sampling_freq_hz,
334
+ delimiter=delimiter,
335
+ skip_header=skip_header,
336
+ max_rows=max_rows,
337
+ )
338
+ elif ext == ".wav":
339
+ return load_wav(str(path), channel=channel)
340
+ elif ext == ".npy":
341
+ if sampling_freq_hz is None:
342
+ raise ValueError("sampling_freq_hz is required for .npy files")
343
+ return load_npy(str(path), sampling_freq_hz)
344
+ else:
345
+ raise ValueError(
346
+ f"Unsupported file format: '{ext}'. "
347
+ f"Supported: {', '.join(sorted(SUPPORTED_EXTENSIONS))}"
348
+ )
349
+
350
+
351
+ def get_signal_file_info(file_path: str) -> dict:
352
+ """Return file metadata without fully loading the signal.
353
+
354
+ Useful for inspecting large files before loading.
355
+
356
+ Args:
357
+ file_path: Path to the signal file.
358
+
359
+ Returns:
360
+ Dictionary with file size, format, estimated samples, etc.
361
+ """
362
+ path = Path(file_path).resolve()
363
+ if not path.exists():
364
+ raise FileNotFoundError(f"File not found: {path}")
365
+
366
+ ext = path.suffix.lower()
367
+ size_bytes = path.stat().st_size
368
+ info: dict = {
369
+ "file_path": str(path),
370
+ "file_name": path.name,
371
+ "extension": ext,
372
+ "size_bytes": size_bytes,
373
+ "size_mb": round(size_bytes / (1024 * 1024), 2),
374
+ }
375
+
376
+ if ext == ".wav":
377
+ try:
378
+ with wave.open(str(path), "rb") as wf:
379
+ info["format"] = "wav"
380
+ info["channels"] = wf.getnchannels()
381
+ info["sampling_freq_hz"] = wf.getframerate()
382
+ info["n_samples"] = wf.getnframes()
383
+ info["duration_s"] = round(wf.getnframes() / wf.getframerate(), 3)
384
+ info["sample_width_bytes"] = wf.getsampwidth()
385
+ info["bit_depth"] = wf.getsampwidth() * 8
386
+ except Exception as e:
387
+ info["error"] = str(e)
388
+ elif ext == ".npy":
389
+ try:
390
+ # Read only the header — try public API first, fall back to private
391
+ with open(path, "rb") as fh:
392
+ version = np.lib.format.read_magic(fh)
393
+ try:
394
+ shape, fortran, dtype = np.lib.format._read_array_header(fh, version) # type: ignore[attr-defined]
395
+ except AttributeError:
396
+ # NumPy >= 2.0 removed the private helper
397
+ fh.seek(0)
398
+ _ = np.lib.format.read_magic(fh)
399
+ if version[0] == 1:
400
+ header = np.lib.format.read_array_header_1_0(fh)
401
+ else:
402
+ header = np.lib.format.read_array_header_2_0(fh)
403
+ shape, fortran, dtype = header
404
+ info["format"] = "npy"
405
+ info["shape"] = list(shape)
406
+ info["dtype"] = str(dtype)
407
+ info["n_samples"] = shape[0] if len(shape) >= 1 else 0
408
+ except Exception as e:
409
+ info["error"] = str(e)
410
+ elif ext in (".csv", ".tsv", ".txt"):
411
+ try:
412
+ # Count lines and peek at header
413
+ with open(path, encoding="utf-8-sig") as fh:
414
+ first_line = fh.readline().strip()
415
+ second_line = fh.readline().strip()
416
+ line_count = 2
417
+ for _ in fh:
418
+ line_count += 1
419
+ info["format"] = "csv"
420
+ info["header_line"] = first_line
421
+ info["sample_data_line"] = second_line
422
+ info["total_lines"] = line_count
423
+ info["estimated_data_rows"] = max(0, line_count - 1)
424
+ except Exception as e:
425
+ info["error"] = str(e)
426
+ else:
427
+ info["format"] = "unknown"
428
+
429
+ return info