batplot 1.8.0__py3-none-any.whl → 1.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batplot might be problematic. Click here for more details.

Files changed (42) hide show
  1. batplot/__init__.py +1 -1
  2. batplot/args.py +5 -3
  3. batplot/batplot.py +44 -4
  4. batplot/cpc_interactive.py +96 -3
  5. batplot/electrochem_interactive.py +28 -0
  6. batplot/interactive.py +18 -2
  7. batplot/modes.py +12 -12
  8. batplot/operando.py +2 -0
  9. batplot/operando_ec_interactive.py +112 -11
  10. batplot/session.py +35 -1
  11. batplot/utils.py +40 -0
  12. batplot/version_check.py +85 -6
  13. {batplot-1.8.0.dist-info → batplot-1.8.2.dist-info}/METADATA +1 -1
  14. batplot-1.8.2.dist-info/RECORD +75 -0
  15. {batplot-1.8.0.dist-info → batplot-1.8.2.dist-info}/top_level.txt +1 -0
  16. batplot_backup_20251221_101150/__init__.py +5 -0
  17. batplot_backup_20251221_101150/args.py +625 -0
  18. batplot_backup_20251221_101150/batch.py +1176 -0
  19. batplot_backup_20251221_101150/batplot.py +3589 -0
  20. batplot_backup_20251221_101150/cif.py +823 -0
  21. batplot_backup_20251221_101150/cli.py +149 -0
  22. batplot_backup_20251221_101150/color_utils.py +547 -0
  23. batplot_backup_20251221_101150/config.py +198 -0
  24. batplot_backup_20251221_101150/converters.py +204 -0
  25. batplot_backup_20251221_101150/cpc_interactive.py +4409 -0
  26. batplot_backup_20251221_101150/electrochem_interactive.py +4520 -0
  27. batplot_backup_20251221_101150/interactive.py +3894 -0
  28. batplot_backup_20251221_101150/manual.py +323 -0
  29. batplot_backup_20251221_101150/modes.py +799 -0
  30. batplot_backup_20251221_101150/operando.py +603 -0
  31. batplot_backup_20251221_101150/operando_ec_interactive.py +5487 -0
  32. batplot_backup_20251221_101150/plotting.py +228 -0
  33. batplot_backup_20251221_101150/readers.py +2607 -0
  34. batplot_backup_20251221_101150/session.py +2951 -0
  35. batplot_backup_20251221_101150/style.py +1441 -0
  36. batplot_backup_20251221_101150/ui.py +790 -0
  37. batplot_backup_20251221_101150/utils.py +1046 -0
  38. batplot_backup_20251221_101150/version_check.py +253 -0
  39. batplot-1.8.0.dist-info/RECORD +0 -52
  40. {batplot-1.8.0.dist-info → batplot-1.8.2.dist-info}/WHEEL +0 -0
  41. {batplot-1.8.0.dist-info → batplot-1.8.2.dist-info}/entry_points.txt +0 -0
  42. {batplot-1.8.0.dist-info → batplot-1.8.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,2607 @@
1
+ """Readers for various battery cycler data formats.
2
+
3
+ This module provides parsers for different battery testing equipment file formats:
4
+
5
+ Supported Formats:
6
+ - BioLogic .mpt: Native binary format from BioLogic potentiostats
7
+ - BioLogic .txt: Exported text format from EC-Lab software
8
+ - Neware .csv: CSV export from Neware battery testers
9
+ - Landt/Lanhe .xlsx: Excel files with Chinese column headers
10
+ - Generic .csv: Generic CSV with standard battery cycling columns
11
+
12
+ Key Functions:
13
+ - read_mpt_file(): Parse BioLogic .mpt files for CV, GC, CPC modes
14
+ - read_biologic_txt_file(): Parse BioLogic .txt exports
15
+ - read_ec_csv_file(): Parse Neware CSV and Excel files, handles half-cycles
16
+ - read_ec_csv_dqdv_file(): Parse CSV for differential capacity analysis
17
+
18
+ Data Return Formats:
19
+ CV mode: (voltage, current, cycles)
20
+ GC mode: (capacity, voltage, cycles, charge_mask, discharge_mask)
21
+ CPC mode: (cycle_nums, cap_charge, cap_discharge, efficiency)
22
+ dQ/dV mode: (voltage, dqdv, cycles)
23
+
24
+ Special Features:
25
+ - Half-cycle detection and merging (Neware compatibility)
26
+ - Automatic column detection with fuzzy matching
27
+ - Chinese column name support (Landt/Lanhe cyclers)
28
+ - Specific capacity calculation for .mpt files
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import csv
34
+ import numpy as np
35
+ from typing import Tuple, List, Dict, Any, Optional
36
+
37
+
38
+ def _infer_cycles_from_masks(charge_mask: np.ndarray, discharge_mask: np.ndarray, n_points: int) -> np.ndarray:
39
+ """Infer full-cycle numbers by pairing alternating charge/discharge segments.
40
+
41
+ HOW IT WORKS:
42
+ ------------
43
+ Battery cycling data often comes with charge and discharge segments as separate runs.
44
+ This function intelligently pairs them into complete cycles:
45
+
46
+ Example data structure:
47
+ Charge mask: [T T T F F F T T T F F F] (True = charging point)
48
+ Discharge mask: [F F F T T T F F F T T T] (True = discharging point)
49
+ Result cycles: [1 1 1 1 1 1 2 2 2 2 2 2] (Cycle 1 = charge+discharge, Cycle 2 = charge+discharge)
50
+
51
+ Algorithm Steps:
52
+ 1. Find all contiguous charge segments (runs of True in charge_mask)
53
+ 2. Find all contiguous discharge segments (runs of True in discharge_mask)
54
+ 3. Sort all segments by their starting position (chronological order)
55
+ 4. Pair segments sequentially: segment 0+1 = Cycle 1, segment 2+3 = Cycle 2, etc.
56
+ 5. Fill in gaps (rest periods, CV steps) with the cycle number of the previous segment
57
+
58
+ WHY THIS IS NEEDED:
59
+ ------------------
60
+ Many battery cyclers export data where each charge and discharge is numbered separately
61
+ (e.g., "Charge 1", "Discharge 1", "Charge 2", "Discharge 2"). But for plotting, we want
62
+ "Cycle 1" to mean the first complete charge+discharge pair. This function ensures consistent
63
+ cycle numbering regardless of how the cycler software numbered the segments.
64
+
65
+ Args:
66
+ charge_mask: Boolean array, True where data point is during charging
67
+ discharge_mask: Boolean array, True where data point is during discharging
68
+ n_points: Total number of data points in the dataset
69
+
70
+ Returns:
71
+ cycles: Integer array of cycle numbers (1-indexed), same length as n_points
72
+ Example: [1, 1, 1, 1, 2, 2, 2, 2] means first 4 points are Cycle 1, next 4 are Cycle 2
73
+ """
74
+
75
+ # STEP 1: Find all contiguous segments (runs) of charge and discharge
76
+ # A segment is a continuous block of True values in the mask
77
+ # We store each segment as (start_index, end_index, is_charge_flag)
78
+ segments: List[Tuple[int, int, bool]] = [] # (start, end_exclusive, is_charge)
79
+
80
+ def _append_segments(mask: np.ndarray, is_charge_segment: bool):
81
+ """
82
+ Helper function to find all contiguous segments in a boolean mask.
83
+
84
+ HOW IT WORKS:
85
+ - np.where(mask)[0] gives us all indices where mask is True
86
+ - We scan through these indices looking for gaps (non-consecutive numbers)
87
+ - Each continuous block becomes one segment
88
+
89
+ Example:
90
+ mask = [F, T, T, T, F, F, T, T, F]
91
+ indices = [1, 2, 3, 6, 7]
92
+ Segments found: (1, 4) and (6, 8)
93
+ """
94
+ # Get all indices where mask is True
95
+ idx = np.where(mask)[0]
96
+ if idx.size == 0:
97
+ return # No True values found, nothing to do
98
+
99
+ # Start tracking the first segment
100
+ start = int(idx[0]) # Beginning of current segment
101
+ prev = int(idx[0]) # Previous index we saw
102
+
103
+ # Scan through remaining indices looking for gaps
104
+ for cur in idx[1:]:
105
+ # If current index is not consecutive with previous, we found a gap
106
+ # This means the previous segment ended, and a new one starts here
107
+ if cur != prev + 1:
108
+ # Save the segment we just finished: from start to prev+1 (exclusive end)
109
+ segments.append((start, prev + 1, is_charge_segment))
110
+ # Start tracking a new segment
111
+ start = int(cur)
112
+ prev = int(cur)
113
+
114
+ # Don't forget the last segment (after the loop ends)
115
+ segments.append((start, prev + 1, is_charge_segment))
116
+
117
+ # Find all charge segments (continuous blocks where charge_mask is True)
118
+ _append_segments(charge_mask, True)
119
+
120
+ # Find all discharge segments (continuous blocks where discharge_mask is True)
121
+ _append_segments(discharge_mask, False)
122
+
123
+ # STEP 2: Sort all segments by their starting position
124
+ # This puts them in chronological order (first segment that appears in data, then second, etc.)
125
+ segments.sort(key=lambda seg: seg[0])
126
+
127
+ # STEP 3: Initialize the cycles array (all zeros means "not assigned yet")
128
+ cycles = np.zeros(n_points, dtype=int)
129
+
130
+ # Edge case: if no segments found, assign everything to Cycle 1
131
+ if not segments:
132
+ cycles.fill(1)
133
+ return cycles
134
+
135
+ # STEP 4: Assign cycle numbers by pairing segments
136
+ # Strategy: Every two segments form one complete cycle
137
+ # - Segment 0 + Segment 1 = Cycle 1
138
+ # - Segment 2 + Segment 3 = Cycle 2
139
+ # - etc.
140
+ current_cycle = 1 # Start counting cycles from 1 (not 0, for user-friendly display)
141
+ half_index = 0 # Track which half of the cycle we're on (0 = first half, 1 = second half)
142
+
143
+ for start, end, _flag in segments:
144
+ # Assign all points in this segment to the current cycle number
145
+ cycles[start:end] = current_cycle
146
+
147
+ # Move to next half of cycle
148
+ half_index += 1
149
+
150
+ # If we've completed both halves (charge + discharge), move to next cycle
151
+ if half_index == 2:
152
+ current_cycle += 1
153
+ half_index = 0 # Reset for next cycle
154
+
155
+ # STEP 5: Fill in gaps (points that weren't in charge or discharge masks)
156
+ # These are typically rest periods, CV steps, or other non-active intervals
157
+ # We assign them to the same cycle as the previous segment (so they're included in cycle filters)
158
+ last_cycle = 1
159
+ for i in range(n_points):
160
+ if cycles[i] == 0:
161
+ # This point wasn't assigned (it's a gap/rest period)
162
+ # Give it the cycle number of the last assigned point
163
+ cycles[i] = last_cycle
164
+ else:
165
+ # This point was assigned, remember its cycle number for future gaps
166
+ last_cycle = cycles[i]
167
+
168
+ return cycles
169
+
170
+
171
+ def read_excel_to_csv_like(fname: str, header_row: int = 2, data_start_row: int = 3) -> Tuple[list, list]:
172
+ """Read Excel file and convert to CSV-like structure for batplot.
173
+
174
+ This is designed for Chinese cycler data Excel files where:
175
+ - Row 1: File/sample name
176
+ - Row 2: Column headers
177
+ - Row 3+: Data
178
+
179
+ Args:
180
+ fname: Path to Excel file (.xlsx)
181
+ header_row: Row number containing headers (1-indexed, default=2)
182
+ data_start_row: First row containing data (1-indexed, default=3)
183
+
184
+ Returns:
185
+ Tuple of (header_list, rows_list) compatible with CSV processing
186
+ """
187
+ try:
188
+ import openpyxl
189
+ except ImportError:
190
+ raise ImportError("openpyxl is required to read Excel files. Install with: pip install openpyxl")
191
+
192
+ wb = openpyxl.load_workbook(fname, read_only=True, data_only=True)
193
+ ws = wb.active
194
+
195
+ # Read header row
196
+ header = []
197
+ for cell in ws[header_row]:
198
+ header.append(str(cell.value) if cell.value is not None else '')
199
+
200
+ # Read data rows
201
+ rows = []
202
+ for row in ws.iter_rows(min_row=data_start_row, values_only=True):
203
+ # Convert row to list of strings (handle None, datetime, etc.)
204
+ row_data = []
205
+ for val in row:
206
+ if val is None:
207
+ row_data.append('')
208
+ elif isinstance(val, (int, float)):
209
+ row_data.append(str(val))
210
+ else:
211
+ row_data.append(str(val))
212
+ rows.append(row_data)
213
+
214
+ wb.close()
215
+ return header, rows
216
+
217
+
218
+ def _normalize_header_value(cell: Any) -> str:
219
+ """
220
+ Normalize header cell text by removing BOMs, tabs, and trimming whitespace.
221
+
222
+ HOW IT WORKS:
223
+ ------------
224
+ Excel/CSV files sometimes have formatting issues:
225
+ - BOM (Byte Order Mark): Invisible character '\ufeff' at start of file
226
+ - Tabs: Sometimes headers use tabs instead of spaces
227
+ - Extra whitespace: Leading/trailing spaces that cause matching issues
228
+
229
+ This function cleans all of these to ensure consistent header matching.
230
+
231
+ WHAT IS BOM?
232
+ -----------
233
+ BOM (Byte Order Mark) is a special Unicode character sometimes added at the
234
+ start of files to indicate encoding. It's invisible but can break string
235
+ matching. Removing it ensures headers match correctly.
236
+
237
+ Args:
238
+ cell: Header cell value (can be string, number, None, etc.)
239
+
240
+ Returns:
241
+ Cleaned string (no BOM, tabs converted to spaces, trimmed)
242
+ """
243
+ if cell is None:
244
+ return ''
245
+ # Convert to string, remove BOM, replace tabs with spaces, trim whitespace
246
+ return str(cell).replace('\ufeff', '').replace('\t', ' ').strip()
247
+
248
+
249
+ def _normalize_data_value(cell: Any) -> str:
250
+ """Normalize data cell text by removing BOMs and trimming whitespace."""
251
+ if cell is None:
252
+ return ''
253
+ return str(cell).replace('\ufeff', '').strip()
254
+
255
+
256
+ def _looks_like_neware_multilevel(rows: List[List[str]]) -> bool:
257
+ """
258
+ Detect Neware multi-section CSV with cycle/step/record headers.
259
+
260
+ HOW IT WORKS:
261
+ ------------
262
+ Neware battery testers export CSV files with a hierarchical structure:
263
+
264
+ Row 1: Cycle ID header
265
+ Row 2: (empty) Step ID header
266
+ Row 3: (empty) (empty) Record ID header
267
+
268
+ Example structure:
269
+ Cycle ID | Cycle Data...
270
+ (empty) | Step ID | Step Data...
271
+ (empty) | (empty) | Record ID | Record Data...
272
+
273
+ This function checks if the first 3 rows match this pattern. If they do,
274
+ we know it's a Neware multi-level format and need special parsing.
275
+
276
+ WHY DETECT THIS?
277
+ --------------
278
+ Multi-level format requires different parsing logic than simple CSV.
279
+ We need to:
280
+ 1. Identify which level each row belongs to (cycle, step, or record)
281
+ 2. Associate records with their parent step and cycle
282
+ 3. Build a hierarchical data structure
283
+
284
+ Args:
285
+ rows: List of rows (each row is a list of cell values)
286
+
287
+ Returns:
288
+ True if file matches Neware multi-level format pattern, False otherwise
289
+ """
290
+ # Need at least 3 rows to check the pattern
291
+ if len(rows) < 3:
292
+ return False
293
+
294
+ # Extract first 3 rows
295
+ r1 = rows[0] # First row (should have "Cycle ID")
296
+ r2 = rows[1] # Second row (should have empty first cell, "Step ID" in second)
297
+ r3 = rows[2] # Third row (should have empty first two cells, "Record ID" in third)
298
+
299
+ # Normalize and extract key cells (with safe indexing)
300
+ c1 = _normalize_header_value(r1[0]) if r1 else '' # Row 1, column 1
301
+ c2_first = _normalize_header_value(r2[0]) if r2 else '' # Row 2, column 1
302
+ c2_second = _normalize_header_value(r2[1]) if len(r2) > 1 else '' # Row 2, column 2
303
+ c3_first = _normalize_header_value(r3[0]) if r3 else '' # Row 3, column 1
304
+ c3_second = _normalize_header_value(r3[1]) if len(r3) > 1 else '' # Row 3, column 2
305
+ c3_third = _normalize_header_value(r3[2]) if len(r3) > 2 else '' # Row 3, column 3
306
+
307
+ # Check if pattern matches Neware multi-level format
308
+ return (
309
+ c1.lower() == 'cycle id' # Row 1 starts with "Cycle ID"
310
+ and c2_first == '' # Row 2, column 1 is empty
311
+ and c2_second.lower() == 'step id' # Row 2, column 2 is "Step ID"
312
+ and c3_first == '' # Row 3, column 1 is empty
313
+ and c3_second == '' # Row 3, column 2 is empty
314
+ and c3_third.lower() == 'record id' # Row 3, column 3 is "Record ID"
315
+ )
316
+
317
+
318
+ def _parse_neware_multilevel_rows(rows: List[List[str]]) -> Optional[Dict[str, Any]]:
319
+ """Parse multi-level Neware CSV into normalized headers/rows."""
320
+ record_header: Optional[List[str]] = None
321
+ record_rows: List[List[str]] = []
322
+ cycle_header: Optional[List[str]] = None
323
+ cycle_rows: List[List[str]] = []
324
+ step_header: Optional[List[str]] = None
325
+ step_rows: List[List[str]] = []
326
+
327
+ current_cycle_id: Optional[str] = None
328
+ current_step_id: Optional[str] = None
329
+ current_step_name: Optional[str] = None
330
+
331
+ for raw_row in rows:
332
+ normalized = [_normalize_data_value(cell) for cell in raw_row]
333
+ if not any(normalized):
334
+ continue
335
+
336
+ first = normalized[0] if len(normalized) > 0 else ''
337
+ second = normalized[1] if len(normalized) > 1 else ''
338
+
339
+ # Header rows
340
+ if first.lower() == 'cycle id':
341
+ cycle_header = [_normalize_header_value(cell) for cell in raw_row]
342
+ continue
343
+ if first == '' and second.lower() == 'step id':
344
+ step_header = ['Cycle ID'] + [_normalize_header_value(cell) for cell in raw_row[1:]]
345
+ continue
346
+ if first == '' and second == '' and len(normalized) > 2 and normalized[2].lower() == 'record id':
347
+ record_header = ['Cycle ID', 'Step ID', 'Step Type'] + [
348
+ _normalize_header_value(cell) for cell in raw_row[2:]
349
+ ]
350
+ continue
351
+
352
+ # Cycle summary row
353
+ if first != '':
354
+ current_cycle_id = first
355
+ cycle_rows.append(normalized)
356
+ continue
357
+
358
+ # Step summary row (belongs to current cycle)
359
+ if first == '' and second != '':
360
+ current_step_id = second
361
+ current_step_name = normalized[2] if len(normalized) > 2 else ''
362
+ step_rows.append([current_cycle_id or '', second] + normalized[2:])
363
+ continue
364
+
365
+ # Record row
366
+ if record_header is None:
367
+ continue
368
+ record_payload = normalized[2:]
369
+ required_len = max(len(record_header) - 3, 0)
370
+ if len(record_payload) < required_len:
371
+ record_payload.extend([''] * (required_len - len(record_payload)))
372
+ elif len(record_payload) > required_len:
373
+ record_payload = record_payload[:required_len]
374
+ record_rows.append([
375
+ current_cycle_id or '',
376
+ current_step_id or '',
377
+ current_step_name or '',
378
+ ] + record_payload)
379
+
380
+ if record_header is None or not record_rows:
381
+ return None
382
+
383
+ return {
384
+ 'record_header': record_header,
385
+ 'record_rows': record_rows,
386
+ 'cycle_header': cycle_header,
387
+ 'cycle_rows': cycle_rows,
388
+ 'step_header': step_header,
389
+ 'step_rows': step_rows,
390
+ }
391
+
392
+
393
+ def _load_csv_header_and_rows(fname: str) -> Tuple[List[str], List[List[str]], Optional[Dict[str, Any]]]:
394
+ """Load CSV file and return header/rows with Neware multi-level fallback."""
395
+ with open(fname, newline='', encoding='utf-8', errors='ignore') as f:
396
+ reader = csv.reader(f)
397
+ all_rows = list(reader)
398
+
399
+ if len(all_rows) < 2:
400
+ raise ValueError(f"CSV '{fname}' is empty or missing header rows")
401
+
402
+ if _looks_like_neware_multilevel(all_rows):
403
+ parsed = _parse_neware_multilevel_rows(all_rows)
404
+ if parsed is None:
405
+ raise ValueError("Detected Neware multi-section CSV but failed to parse record rows.")
406
+ return parsed['record_header'], parsed['record_rows'], parsed
407
+
408
+ r1 = all_rows[0]
409
+ r2 = all_rows[1]
410
+ if len(r2) > 0 and (_normalize_header_value(r2[0]) == ''):
411
+ header = [_normalize_header_value(c) for c in r1] + [_normalize_header_value(c) for c in r2[1:]]
412
+ rows = all_rows[2:]
413
+ else:
414
+ header = [_normalize_header_value(c) for c in r1]
415
+ rows = all_rows[1:]
416
+
417
+ return header, rows, None
418
+
419
+
420
+ def read_csv_file(fname: str):
421
+ for delim in [",", ";", "\t"]:
422
+ try:
423
+ data = np.genfromtxt(fname, delimiter=delim, comments="#")
424
+ if data.ndim == 1:
425
+ data = data.reshape(1, -1)
426
+ if data.shape[1] >= 2:
427
+ return data
428
+ except Exception:
429
+ continue
430
+ raise ValueError(f"Invalid CSV format in {fname}, need at least 2 columns (x,y).")
431
+
432
+
433
+ def read_gr_file(fname: str):
434
+ """Read a PDF .gr file (r, G(r))."""
435
+ r_vals = []
436
+ g_vals = []
437
+ with open(fname, "r") as f:
438
+ for line in f:
439
+ ls = line.strip()
440
+ if not ls or ls.startswith("#"):
441
+ continue
442
+ parts = ls.replace(",", " ").split()
443
+ floats = []
444
+ for p in parts:
445
+ try:
446
+ floats.append(float(p))
447
+ except ValueError:
448
+ break
449
+ if len(floats) >= 2:
450
+ r_vals.append(floats[0])
451
+ g_vals.append(floats[1])
452
+ if not r_vals:
453
+ raise ValueError(f"No numeric data found in {fname}")
454
+ return np.array(r_vals, dtype=float), np.array(g_vals, dtype=float)
455
+
456
+
457
+ def read_fullprof_rowwise(fname: str):
458
+ with open(fname, "r") as f:
459
+ lines = f.readlines()[1:]
460
+ y_rows = []
461
+ for line in lines:
462
+ line = line.strip()
463
+ if not line or line.startswith("#"):
464
+ continue
465
+ y_rows.extend([float(val) for val in line.split()])
466
+ y = np.array(y_rows)
467
+ return y, len(lines)
468
+
469
+
470
+ def robust_loadtxt_skipheader(fname: str):
471
+ """Skip comments/non-numeric lines and load at least 2-column numeric data.
472
+
473
+ Flexibly handles comma, space, tab, or mixed delimiters.
474
+ """
475
+ data_lines = []
476
+ with open(fname, "r") as f:
477
+ for line in f:
478
+ ls = line.strip()
479
+ if not ls or ls.startswith("#"):
480
+ continue
481
+ # Normalize delimiters: replace commas and tabs with spaces
482
+ # This handles CSV (comma), TSV (tab), space-separated, and mixed formats
483
+ ls_normalized = ls.replace(",", " ").replace("\t", " ")
484
+ floats = []
485
+ for p in ls_normalized.split():
486
+ try:
487
+ floats.append(float(p))
488
+ except ValueError:
489
+ break
490
+ if len(floats) >= 2:
491
+ # Store the normalized line (with all delimiters converted to spaces)
492
+ data_lines.append(ls_normalized)
493
+ if not data_lines:
494
+ raise ValueError(f"No numeric data found in {fname}")
495
+ from io import StringIO
496
+ return np.loadtxt(StringIO("\n".join(data_lines)))
497
+
498
+
499
+ def read_mpt_file(fname: str, mode: str = 'gc', mass_mg: float = None):
500
+ """Read BioLogic .mpt file in various modes.
501
+
502
+ BioLogic .mpt files come in two formats:
503
+ 1. Full EC-Lab format: Complete header with metadata and column names
504
+ 2. Simple export: Just 2-3 columns with minimal/no header
505
+
506
+ This function automatically detects the format and parses accordingly.
507
+
508
+ Modes Explained:
509
+ - 'gc' (Galvanostatic Cycling): Returns capacity vs voltage curves
510
+ Calculates specific capacity from Q(discharge) and active material mass
511
+ Identifies charge/discharge segments from current sign
512
+
513
+ - 'cv' (Cyclic Voltammetry): Returns voltage vs current curves
514
+ Used for electrochemical characterization
515
+
516
+ - 'cpc' (Capacity Per Cycle): Returns cycle statistics
517
+ Extracts max charge/discharge capacity for each cycle
518
+ Calculates coulombic efficiency = Q_discharge / Q_charge * 100
519
+
520
+ - 'time': Returns time-series data (for operando plots)
521
+ Simple x-y format without cycle processing
522
+
523
+ Args:
524
+ fname: Path to .mpt file
525
+ mode: Operating mode - 'gc', 'cv', 'cpc', or 'time'
526
+ mass_mg: Active material mass in milligrams
527
+ Required for 'gc' and 'cpc' modes to calculate specific capacity
528
+ Units: mAh/g = (mAh) / (mg / 1000)
529
+
530
+ Returns:
531
+ Depends on mode:
532
+
533
+ 'gc' mode: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]
534
+ (specific_capacity, voltage, cycles, charge_mask, discharge_mask)
535
+ - specific_capacity: Specific capacity in mAh/g
536
+ - voltage: Voltage in V
537
+ - cycles: Cycle number for each data point
538
+ - charge_mask: Boolean array, True for charging points
539
+ - discharge_mask: Boolean array, True for discharging points
540
+
541
+ 'cv' mode: Tuple[np.ndarray, np.ndarray, np.ndarray]
542
+ (voltage, current, cycles)
543
+
544
+ 'cpc' mode: Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
545
+ (cycle_nums, cap_charge, cap_discharge, efficiency)
546
+ - cycle_nums: Array of cycle numbers [1, 2, 3, ...]
547
+ - cap_charge: Max specific capacity for each cycle (charge)
548
+ - cap_discharge: Max specific capacity for each cycle (discharge)
549
+ - efficiency: Coulombic efficiency % for each cycle
550
+
551
+ 'time' mode: Tuple[np.ndarray, np.ndarray, np.ndarray, str, str]
552
+ (time, voltage, current, x_label, y_label)
553
+ For simple 2-column files: returns (x, y, empty_array, 'x', 'y')
554
+
555
+ Raises:
556
+ ValueError: If mass_mg not provided for 'gc' or 'cpc' mode
557
+ FileNotFoundError: If file doesn't exist
558
+ Exception: If file format is invalid or columns not found
559
+
560
+ File Format Notes:
561
+ - EC-Lab format starts with "EC-Lab ASCII FILE"
562
+ - Header contains "Nb header lines" to skip
563
+ - Column names may be in French or English
564
+ - Simple exports are just tab/space-separated numbers
565
+ """
566
+ import re
567
+
568
+ # === STEP 1: Detect file format ===
569
+ # EC-Lab files start with specific marker, simple exports don't
570
+ is_eclab_format = False
571
+ with open(fname, 'r', encoding='utf-8', errors='ignore') as f:
572
+ first_line = f.readline().strip()
573
+ if first_line.startswith('EC-Lab ASCII FILE'):
574
+ is_eclab_format = True
575
+
576
+ # Handle simple 2-column time/voltage export format (for operando time mode)
577
+ if not is_eclab_format and mode == 'time':
578
+ try:
579
+ # Read with flexible delimiter (tab or whitespace) and handle European comma decimal separator
580
+ with open(fname, 'r', encoding='utf-8', errors='ignore') as f:
581
+ x_vals = []
582
+ y_vals = []
583
+ x_label = 'x'
584
+ y_label = 'y'
585
+ first_line_processed = False
586
+
587
+ for line in f:
588
+ line = line.strip()
589
+ if not line:
590
+ continue
591
+
592
+ # Try to parse as numeric data
593
+ # First try tab-separated, then space-separated
594
+ # Files can use either delimiter, so we check for tabs first
595
+ parts = line.split('\t') if '\t' in line else line.split()
596
+
597
+ if len(parts) >= 2:
598
+ try:
599
+ # Try to parse as numbers
600
+ # Replace comma with period for European locale (e.g., "3,14" → "3.14")
601
+ # European countries use comma as decimal separator, Python expects period
602
+ x_val = float(parts[0].replace(',', '.'))
603
+ y_val = float(parts[1].replace(',', '.'))
604
+ x_vals.append(x_val)
605
+ y_vals.append(y_val)
606
+ first_line_processed = True # Mark that we've seen data
607
+ except ValueError:
608
+ # Parsing failed - this line contains non-numeric data
609
+ if not first_line_processed:
610
+ # This is likely a header line (before any data)
611
+ # Extract column names for labels
612
+ x_label = parts[0].strip()
613
+ y_label = parts[1].strip() if len(parts) > 1 else 'y'
614
+ continue # Skip header line, continue to next
615
+ else:
616
+ # We've already seen data, so this non-numeric line means end of data
617
+ # Stop reading (might be footer or different section)
618
+ break
619
+
620
+ if not x_vals:
621
+ raise ValueError("No data found in file")
622
+
623
+ x_data = np.array(x_vals)
624
+ y_data = np.array(y_vals)
625
+ current_mA = np.zeros_like(x_data) # No current data in simple format
626
+
627
+ # Return raw data without conversion, and include column labels
628
+ return x_data, y_data, current_mA, x_label, y_label
629
+ except Exception as e:
630
+ raise ValueError(f"Failed to read simple .mpt format: {e}")
631
+
632
+ # For non-time modes or EC-Lab format, require full EC-Lab format
633
+ if not is_eclab_format:
634
+ raise ValueError(f"Not a valid EC-Lab .mpt file: {fname}")
635
+
636
+ # Read header to find number of header lines
637
+ header_lines = 0
638
+ with open(fname, 'r', encoding='utf-8', errors='ignore') as f:
639
+ first_line = f.readline().strip()
640
+
641
+ # Find header lines count
642
+ for line in f:
643
+ if line.startswith('Nb header lines'):
644
+ match = re.search(r'Nb header lines\s*:\s*(\d+)', line)
645
+ if match:
646
+ header_lines = int(match.group(1))
647
+ break
648
+ if header_lines == 0:
649
+ raise ValueError(f"Could not find header line count in {fname}")
650
+
651
+ # Read the data
652
+ data_lines = []
653
+ column_names = []
654
+
655
+ with open(fname, 'r', encoding='utf-8', errors='ignore') as f:
656
+ # Skip header lines
657
+ for i in range(header_lines - 1):
658
+ f.readline()
659
+
660
+ # Read column names (should be at header_lines - 1)
661
+ header_line = f.readline().strip()
662
+ column_names = [col.strip() for col in header_line.split('\t')]
663
+
664
+ # Read data lines
665
+ for line in f:
666
+ line = line.strip()
667
+ if not line:
668
+ continue
669
+ try:
670
+ # Replace comma decimal separator with period (European locale support)
671
+ values = [float(val.replace(',', '.')) for val in line.split('\t')]
672
+ if len(values) == len(column_names):
673
+ data_lines.append(values)
674
+ except ValueError:
675
+ continue
676
+
677
+ if not data_lines:
678
+ raise ValueError(f"No valid data found in {fname}")
679
+
680
+ # Convert to numpy array
681
+ data = np.array(data_lines)
682
+
683
+ # Create column index mapping
684
+ col_map = {name: i for i, name in enumerate(column_names)}
685
+ col_map_lower = {name.lower(): i for name, i in col_map.items()}
686
+
687
+ def _find_column_index(candidates):
688
+ """Return the index of the first matching column"""
689
+ for cand in candidates:
690
+ if cand in col_map:
691
+ return col_map[cand]
692
+ for cand in candidates:
693
+ idx = col_map_lower.get(cand.lower())
694
+ if idx is not None:
695
+ return idx
696
+ return None
697
+
698
+ def _split_combined_q_arrays():
699
+ """Build Q charge/discharge arrays from combined columns."""
700
+ combined_idx = _find_column_index([
701
+ 'Q charge/discharge/mA.h',
702
+ 'Q charge/discharge/mAh',
703
+ 'Capacity/mA.h',
704
+ 'Capacity/mAh',
705
+ ])
706
+ half_cycle_idx = _find_column_index(['half cycle', 'Half cycle', 'Half-cycle'])
707
+
708
+ if combined_idx is None or half_cycle_idx is None:
709
+ missing = []
710
+ if combined_idx is None:
711
+ missing.append("'Q charge/discharge/mA.h'")
712
+ if half_cycle_idx is None:
713
+ missing.append("'half cycle'")
714
+ missing_str = " and ".join(missing)
715
+ available = ', '.join(f"'{c}'" for c in column_names)
716
+ raise ValueError(
717
+ f"Could not find {missing_str} columns required to parse combined capacity format.\n"
718
+ f"Available columns: {available}"
719
+ )
720
+
721
+ combined = data[:, combined_idx]
722
+ half_cycle = data[:, half_cycle_idx]
723
+ half_cycle_int = half_cycle.astype(int)
724
+ current_idx = _find_column_index(['<I>/mA', '<I>/A', 'I/mA'])
725
+ current_data = data[:, current_idx] if current_idx is not None else None
726
+
727
+ n = len(combined)
728
+ q_charge = np.zeros(n, dtype=float)
729
+ q_discharge = np.zeros(n, dtype=float)
730
+
731
+ # Determine charge/discharge roles for each half-cycle block
732
+ unique_states = list(dict.fromkeys(half_cycle_int.tolist()))
733
+ if not unique_states:
734
+ unique_states = [0]
735
+
736
+ state_roles = {}
737
+ if current_data is not None:
738
+ for state in unique_states:
739
+ mask = (half_cycle_int == state)
740
+ if not np.any(mask):
741
+ continue
742
+ mean_current = np.nanmean(current_data[mask])
743
+ if np.isnan(mean_current):
744
+ continue
745
+ if mean_current > 0:
746
+ state_roles[state] = 'charge'
747
+ elif mean_current < 0:
748
+ state_roles[state] = 'discharge'
749
+
750
+ # Ensure both roles exist; fall back to alternating assignment if needed
751
+ if 'charge' not in state_roles.values() or 'discharge' not in state_roles.values():
752
+ for idx, state in enumerate(unique_states):
753
+ if state not in state_roles:
754
+ state_roles[state] = 'charge' if idx % 2 == 1 else 'discharge'
755
+ if 'charge' not in state_roles.values():
756
+ state_roles[unique_states[-1]] = 'charge'
757
+ if 'discharge' not in state_roles.values():
758
+ state_roles[unique_states[0]] = 'discharge'
759
+
760
+ i = 0
761
+ segment_counter = 0
762
+ while i < n:
763
+ state = half_cycle_int[i]
764
+ start = i
765
+ start_val = combined[i]
766
+ i += 1
767
+ while i < n and half_cycle_int[i] == state:
768
+ i += 1
769
+ segment = np.abs(combined[start:i] - start_val)
770
+ if segment.size:
771
+ segment = np.maximum.accumulate(segment)
772
+ role = state_roles.get(state)
773
+ if role is None:
774
+ role = 'charge' if segment_counter % 2 == 1 else 'discharge'
775
+ if role == 'charge':
776
+ q_charge[start:i] = segment
777
+ else:
778
+ q_discharge[start:i] = segment
779
+ segment_counter += 1
780
+
781
+ return q_charge, q_discharge
782
+
783
+ def _get_q_columns_or_fallback():
784
+ """Return Q charge and Q discharge arrays, building them if necessary."""
785
+ q_charge_idx = _find_column_index(['Q charge/mA.h', 'Q charge/mAh'])
786
+ q_discharge_idx = _find_column_index(['Q discharge/mA.h', 'Q discharge/mAh'])
787
+ q_charge = data[:, q_charge_idx] if q_charge_idx is not None else None
788
+ q_discharge = data[:, q_discharge_idx] if q_discharge_idx is not None else None
789
+
790
+ if q_charge is not None and q_discharge is not None:
791
+ return q_charge, q_discharge
792
+
793
+ # Fall back to combined column format (newer EC-Lab exports)
794
+ return _split_combined_q_arrays()
795
+
796
+ if mode == 'gc':
797
+ # Galvanostatic cycling: use BioLogic's Q charge and Q discharge columns
798
+ if mass_mg is None or mass_mg <= 0:
799
+ raise ValueError("Mass loading (in mg) is required and must be positive for GC mode. Use --mass parameter.")
800
+
801
+ mass_g = float(mass_mg) / 1000.0
802
+
803
+ # Skip first line of data as requested
804
+ data = data[1:]
805
+
806
+ # Required columns - try common variations
807
+ voltage_col = col_map.get('Ewe/V', None)
808
+ if voltage_col is None:
809
+ voltage_col = col_map.get('Ewe', None)
810
+
811
+ q_charge, q_discharge = _get_q_columns_or_fallback()
812
+
813
+ if voltage_col is None:
814
+ available = ', '.join(f"'{c}'" for c in column_names)
815
+ raise ValueError(f"Could not find 'Ewe/V' or 'Ewe' column for voltage.\nAvailable columns: {available}")
816
+
817
+ voltage = data[:, voltage_col]
818
+
819
+ n = len(voltage)
820
+
821
+ # Determine if experiment starts with charge or discharge
822
+ # by checking which Q column increases first
823
+ starts_with_charge = None
824
+ for i in range(min(100, n - 1)):
825
+ if q_charge[i+1] > q_charge[i] + 1e-6:
826
+ starts_with_charge = True
827
+ break
828
+ elif q_discharge[i+1] > q_discharge[i] + 1e-6:
829
+ starts_with_charge = False
830
+ break
831
+
832
+ if starts_with_charge is None:
833
+ # Default to charge if no clear increase detected
834
+ starts_with_charge = True
835
+
836
+ # Detect charge/discharge segments based on when Q values drop to 0
837
+ # The end of charge is when Q charge drops to ~0
838
+ # The end of discharge is when Q discharge drops to ~0
839
+ is_charge = np.zeros(n, dtype=bool)
840
+
841
+ # Set initial state
842
+ current_is_charge = starts_with_charge
843
+ is_charge[0] = current_is_charge
844
+
845
+ # Detect segment boundaries by finding where Q values reset to ~0
846
+ for i in range(1, n):
847
+ if current_is_charge:
848
+ # We're in a charge segment
849
+ # End of charge is when Q charge drops to near 0
850
+ if q_charge[i] < 1e-10 and q_charge[i-1] > 1e-6:
851
+ # Q charge just dropped to 0, switch to discharge
852
+ current_is_charge = False
853
+ else:
854
+ # We're in a discharge segment
855
+ # End of discharge is when Q discharge drops to near 0
856
+ if q_discharge[i] < 1e-10 and q_discharge[i-1] > 1e-6:
857
+ # Q discharge just dropped to 0, switch to charge
858
+ current_is_charge = True
859
+
860
+ is_charge[i] = current_is_charge
861
+
862
+ # Find charge/discharge segment boundaries
863
+ run_starts = [0]
864
+ for k in range(1, n):
865
+ if is_charge[k] != is_charge[k-1]:
866
+ run_starts.append(k)
867
+ run_starts.append(n)
868
+
869
+ # Create masks
870
+ charge_mask = is_charge
871
+ discharge_mask = ~is_charge
872
+
873
+ # Calculate specific capacity for each segment, starting from 0
874
+ specific_capacity = np.zeros(n, dtype=float)
875
+
876
+ for r in range(len(run_starts) - 1):
877
+ start_idx = run_starts[r]
878
+ end_idx = run_starts[r + 1]
879
+
880
+ if is_charge[start_idx]:
881
+ # Use Q charge column
882
+ q_values = q_charge[start_idx:end_idx]
883
+ else:
884
+ # Use Q discharge column
885
+ q_values = q_discharge[start_idx:end_idx]
886
+
887
+ # Reset capacity to start from 0 for this segment
888
+ q_start = q_values[0]
889
+ specific_capacity[start_idx:end_idx] = (q_values - q_start) / mass_g
890
+
891
+ # Assign cycle numbers: each full charge-discharge or discharge-charge pair is one cycle
892
+ cycle_numbers = np.zeros(n, dtype=int)
893
+ current_cycle = 1
894
+ half_cycle = 0 # Track if we're on first or second half of cycle
895
+
896
+ for r in range(len(run_starts) - 1):
897
+ start_idx = run_starts[r]
898
+ end_idx = run_starts[r + 1]
899
+
900
+ cycle_numbers[start_idx:end_idx] = current_cycle
901
+
902
+ half_cycle += 1
903
+ if half_cycle == 2:
904
+ # Completed one full cycle (charge+discharge or discharge+charge)
905
+ current_cycle += 1
906
+ half_cycle = 0
907
+
908
+ return (specific_capacity, voltage, cycle_numbers, charge_mask, discharge_mask)
909
+
910
+ elif mode == 'time':
911
+ # Time series: time vs voltage/current
912
+ time_col = col_map.get('time/s', None)
913
+ voltage_col = col_map.get('Ewe/V', None)
914
+ if voltage_col is None:
915
+ voltage_col = col_map.get('Ewe', None)
916
+ current_col = _find_column_index(['<I>/mA', '<I>/A', 'I/mA'])
917
+
918
+ if time_col is None:
919
+ available = ', '.join(f"'{c}'" for c in column_names)
920
+ raise ValueError(f"Could not find 'time/s' column.\nAvailable columns: {available}")
921
+ if voltage_col is None:
922
+ available = ', '.join(f"'{c}'" for c in column_names)
923
+ raise ValueError(f"Could not find 'Ewe/V' or 'Ewe' column.\nAvailable columns: {available}")
924
+
925
+ # Convert seconds → hours to match operando/EC panel expectations
926
+ time_data = data[:, time_col] / 3600.0
927
+ voltage_data = data[:, voltage_col]
928
+
929
+ # Current column is optional (only needed for advanced features like ion counting)
930
+ current_data = data[:, current_col] if current_col is not None else None
931
+
932
+ # For EC-Lab files, return standard labels
933
+ return (time_data, voltage_data, current_data, 'Time (h)', 'Voltage (V)')
934
+
935
+ elif mode == 'cv':
936
+ # Cyclic voltammetry: voltage vs current, split by cycle
937
+ voltage_col = col_map.get('Ewe/V', None)
938
+ if voltage_col is None:
939
+ voltage_col = col_map.get('Ewe', None)
940
+ current_col = col_map.get('<I>/mA', None)
941
+ cycle_col = col_map.get('cycle number', None)
942
+
943
+ if voltage_col is None:
944
+ available = ', '.join(f"'{c}'" for c in column_names)
945
+ raise ValueError(f"Could not find 'Ewe/V' or 'Ewe' column for voltage.\nAvailable columns: {available}")
946
+ if current_col is None:
947
+ available = ', '.join(f"'{c}'" for c in column_names)
948
+ raise ValueError(f"Could not find '<I>/mA' column for current.\nAvailable columns: {available}")
949
+
950
+ voltage = data[:, voltage_col]
951
+ current = data[:, current_col]
952
+ if cycle_col is not None:
953
+ cycles = data[:, cycle_col].astype(int)
954
+ else:
955
+ cycles = np.ones(len(voltage), dtype=int)
956
+ return voltage, current, cycles
957
+ elif mode == 'cpc':
958
+ # Capacity-per-cycle: extract end-of-segment charge/discharge capacities and efficiency
959
+ if mass_mg is None or mass_mg <= 0:
960
+ raise ValueError("Mass loading (mg) is required and must be positive for CPC mode. Use --mass.")
961
+
962
+ mass_g = float(mass_mg) / 1000.0
963
+
964
+ # Skip first line of data
965
+ data = data[1:]
966
+
967
+ q_charge, q_discharge = _get_q_columns_or_fallback()
968
+
969
+ n = len(q_charge)
970
+
971
+ # Determine if experiment starts with charge or discharge
972
+ starts_with_charge = None
973
+ for i in range(min(100, n - 1)):
974
+ if q_charge[i+1] > q_charge[i] + 1e-6:
975
+ starts_with_charge = True
976
+ break
977
+ elif q_discharge[i+1] > q_discharge[i] + 1e-6:
978
+ starts_with_charge = False
979
+ break
980
+
981
+ if starts_with_charge is None:
982
+ starts_with_charge = True
983
+
984
+ # Detect segment boundaries by finding where Q values reset to ~0
985
+ is_charge = np.zeros(n, dtype=bool)
986
+ current_is_charge = starts_with_charge
987
+ is_charge[0] = current_is_charge
988
+
989
+ for i in range(1, n):
990
+ if current_is_charge:
991
+ if q_charge[i] < 1e-10 and q_charge[i-1] > 1e-6:
992
+ current_is_charge = False
993
+ else:
994
+ if q_discharge[i] < 1e-10 and q_discharge[i-1] > 1e-6:
995
+ current_is_charge = True
996
+ is_charge[i] = current_is_charge
997
+
998
+ # Find segment boundaries
999
+ run_starts = [0]
1000
+ for k in range(1, n):
1001
+ if is_charge[k] != is_charge[k-1]:
1002
+ run_starts.append(k)
1003
+ run_starts.append(n)
1004
+
1005
+ # Extract end-of-segment capacities
1006
+ cyc_nums = []
1007
+ cap_charge_spec = []
1008
+ cap_discharge_spec = []
1009
+ eff_percent = []
1010
+
1011
+ current_cycle = 1
1012
+ half_cycle = 0
1013
+ cycle_charge_cap = np.nan
1014
+ cycle_discharge_cap = np.nan
1015
+
1016
+ for r in range(len(run_starts) - 1):
1017
+ start_idx = run_starts[r]
1018
+ end_idx = run_starts[r + 1]
1019
+
1020
+ if is_charge[start_idx]:
1021
+ # Charge segment: get capacity at end (just before it resets)
1022
+ # Use the last valid value before segment ends
1023
+ end_cap = q_charge[end_idx - 1] if end_idx > start_idx else 0.0
1024
+ cycle_charge_cap = end_cap / mass_g
1025
+ else:
1026
+ # Discharge segment: get capacity at end
1027
+ end_cap = q_discharge[end_idx - 1] if end_idx > start_idx else 0.0
1028
+ cycle_discharge_cap = end_cap / mass_g
1029
+
1030
+ half_cycle += 1
1031
+ if half_cycle == 2:
1032
+ # Completed one full cycle
1033
+ cyc_nums.append(current_cycle)
1034
+ cap_charge_spec.append(cycle_charge_cap)
1035
+ cap_discharge_spec.append(cycle_discharge_cap)
1036
+
1037
+ # Calculate efficiency
1038
+ if np.isfinite(cycle_charge_cap) and cycle_charge_cap > 0 and np.isfinite(cycle_discharge_cap):
1039
+ eff = (cycle_discharge_cap / cycle_charge_cap) * 100.0
1040
+ else:
1041
+ eff = np.nan
1042
+ eff_percent.append(eff)
1043
+
1044
+ # Reset for next cycle
1045
+ current_cycle += 1
1046
+ half_cycle = 0
1047
+ cycle_charge_cap = np.nan
1048
+ cycle_discharge_cap = np.nan
1049
+
1050
+ return (np.array(cyc_nums, dtype=float),
1051
+ np.array(cap_charge_spec, dtype=float),
1052
+ np.array(cap_discharge_spec, dtype=float),
1053
+ np.array(eff_percent, dtype=float))
1054
+
1055
+ else:
1056
+ raise ValueError(f"Unknown mode '{mode}'. Use 'gc', 'time', or 'cpc'.")
1057
+
1058
+
1059
+ def read_biologic_txt_file(fname: str, mode: str = 'cv') -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
1060
+ """Read BioLogic tab-separated text export (simplified format without EC-Lab header).
1061
+
1062
+ These .txt files have a single header line with tab-separated column names,
1063
+ followed by tab-separated data rows. Common format from BioLogic EC-Lab exports.
1064
+
1065
+ Args:
1066
+ fname: Path to .txt file
1067
+ mode: Currently only 'cv' is supported (cyclic voltammetry)
1068
+
1069
+ Returns:
1070
+ For 'cv' mode: (voltage, current, cycles)
1071
+ """
1072
+ data_lines = []
1073
+ column_names = []
1074
+
1075
+ with open(fname, 'r', encoding='utf-8', errors='ignore') as f:
1076
+ # First line is the header
1077
+ header_line = f.readline().strip()
1078
+ column_names = [col.strip() for col in header_line.split('\t')]
1079
+
1080
+ # Read data lines
1081
+ for line in f:
1082
+ line = line.strip()
1083
+ if not line:
1084
+ continue
1085
+ try:
1086
+ # Replace comma decimal separator with period (European locale support)
1087
+ values = [float(val.replace(',', '.')) for val in line.split('\t')]
1088
+ if len(values) == len(column_names):
1089
+ data_lines.append(values)
1090
+ except ValueError:
1091
+ continue
1092
+
1093
+ if not data_lines:
1094
+ raise ValueError(f"No valid data found in {fname}")
1095
+
1096
+ # Convert to numpy array
1097
+ data = np.array(data_lines)
1098
+
1099
+ # Create column index mapping
1100
+ col_map = {name: i for i, name in enumerate(column_names)}
1101
+
1102
+ if mode == 'cv':
1103
+ # Cyclic voltammetry: voltage vs current, split by cycle
1104
+ voltage_col = col_map.get('Ewe/V', None)
1105
+ if voltage_col is None:
1106
+ voltage_col = col_map.get('Ewe', None)
1107
+ current_col = col_map.get('<I>/mA', None)
1108
+ cycle_col = col_map.get('cycle number', None)
1109
+
1110
+ if voltage_col is None:
1111
+ available = ', '.join(f"'{c}'" for c in column_names)
1112
+ raise ValueError(f"Could not find 'Ewe/V' or 'Ewe' column for voltage.\nAvailable columns: {available}")
1113
+ if current_col is None:
1114
+ available = ', '.join(f"'{c}'" for c in column_names)
1115
+ raise ValueError(f"Could not find '<I>/mA' column for current.\nAvailable columns: {available}")
1116
+
1117
+ voltage = data[:, voltage_col]
1118
+ current = data[:, current_col]
1119
+ if cycle_col is not None:
1120
+ cycles = data[:, cycle_col].astype(int)
1121
+ else:
1122
+ cycles = np.ones(len(voltage), dtype=int)
1123
+ return voltage, current, cycles
1124
+ else:
1125
+ raise ValueError(f"Unknown mode '{mode}' for .txt file. Currently only 'cv' is supported.")
1126
+
1127
+
1128
+ def read_ec_csv_file(fname: str, prefer_specific: bool = True) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
1129
+ """Read battery cycler CSV or Excel file and extract galvanostatic cycling data.
1130
+
1131
+ This function handles data files exported from battery cyclers (primarily Neware format),
1132
+ supporting both CSV and Excel formats with optional Chinese column names. It implements
1133
+ sophisticated half-cycle merging logic to properly pair charge/discharge segments into
1134
+ complete cycles.
1135
+
1136
+ Supported File Formats:
1137
+ -----------------------
1138
+ CSV Files:
1139
+ - Two-line header format (Neware standard):
1140
+ * Line 1: Main column names
1141
+ * Line 2: Continues with additional columns (first cell empty)
1142
+ - Single-line header format (simplified exports)
1143
+ - UTF-8 encoding with error tolerance for malformed characters
1144
+
1145
+ Excel Files (.xlsx, .xls):
1146
+ - Row 1: File/sample metadata (ignored)
1147
+ - Row 2: Column headers (Chinese or English)
1148
+ - Row 3+: Data rows
1149
+ - Automatically converted to CSV-like format internally
1150
+
1151
+ Column Detection (Flexible Fuzzy Matching):
1152
+ -------------------------------------------
1153
+ Required columns (at least one voltage/current variant):
1154
+ - Voltage: 'Voltage(V)' or Chinese equivalents ('充电中压/V', '放电中压/V', etc.)
1155
+ - Current: 'Current(mA)' (optional for summary files)
1156
+ - Cycle Index: 'Cycle Index' or '循环序号' (used for summary exports; per-point data
1157
+ infers cycles from the charge/discharge order)
1158
+
1159
+ Capacity columns (prioritized by prefer_specific flag):
1160
+ Specific capacity (mAh/g):
1161
+ - 'Spec. Cap.(mAh/g)' - combined capacity
1162
+ - 'Chg. Spec. Cap.(mAh/g)' or '充电比容量/mAh/g' - charge only
1163
+ - 'DChg. Spec. Cap.(mAh/g)' or '放电比容量/mAh/g' - discharge only
1164
+
1165
+ Absolute capacity (mAh):
1166
+ - 'Capacity(mAh)' - combined capacity
1167
+ - 'Chg. Cap.(mAh)' or '充电容量/mAh' - charge only
1168
+ - 'DChg. Cap.(mAh)' or '放电容量/mAh' - discharge only
1169
+
1170
+ Optional columns:
1171
+ - 'Step Type': Explicit charge/discharge indicator ('CC Chg', 'CC DChg', etc.)
1172
+ Used to determine charge_mask if present, otherwise inferred from capacity columns
1173
+
1174
+ Half-Cycle Merging Algorithm:
1175
+ ------------------------------
1176
+ Many cyclers export data where each charge and discharge is a separate segment.
1177
+ This function merges them into complete cycles:
1178
+
1179
+ 1. Detect segment boundaries:
1180
+ - Use 'Step Type' column if available
1181
+ - Otherwise, infer from split capacity columns (charge-only vs discharge-only)
1182
+ - Fallback: detect by voltage trend (increasing = charge, decreasing = discharge)
1183
+
1184
+ 2. Merge logic:
1185
+ - Pair consecutive segments as complete cycles regardless of the cycler's own
1186
+ numbering (makes Cycle 1 = first charge+discharge, or discharge+charge)
1187
+ - Handles odd number of segments (incomplete final cycle) by keeping the lone
1188
+ half-cycle with the previous cycle
1189
+
1190
+ 3. Capacity continuity:
1191
+ - Charge segments: capacity increases from 0 → max_charge
1192
+ - Discharge segments: capacity continues from max_charge → max_charge + discharge_capacity
1193
+ - Ensures continuous X-axis for plotting (no gaps between charge/discharge)
1194
+
1195
+ Summary File Detection:
1196
+ -----------------------
1197
+ Automatically detects summary files (cycle statistics without per-point data):
1198
+ - Has capacity columns but missing 'Voltage(V)' or 'Current(mA)'
1199
+ - Returns aggregated cycle data instead of per-point measurements
1200
+ - Used for cycle life plots (capacity retention vs cycle number)
1201
+
1202
+ Args:
1203
+ fname: Path to CSV or Excel file (.csv, .xlsx, .xls extensions supported).
1204
+ prefer_specific: Capacity unit preference flag (default=True).
1205
+ - True: Prioritize specific capacity (mAh/g) if available, fallback to absolute (mAh)
1206
+ - False: Prioritize absolute capacity (mAh) if available, fallback to specific (mAh/g)
1207
+ - Useful when files contain both units and you want to standardize plots
1208
+
1209
+ Returns:
1210
+ Tuple of 5 numpy arrays for galvanostatic cycling plots:
1211
+ capacity_x (np.ndarray): X-axis capacity values for plotting
1212
+ - Units: mAh/g (specific) or mAh (absolute) depending on file and prefer_specific flag
1213
+ - Length: N data points
1214
+ - Continuous across charge/discharge (discharge starts where charge ends)
1215
+ - Example: [0, 50, 100, 150, 100, 50, 0] for one cycle (charge 0→150, discharge 150→0)
1216
+
1217
+ voltage (np.ndarray): Y-axis voltage values in V
1218
+ - Length: N data points (matches capacity_x)
1219
+ - Typical range: 2.5-4.2 V for Li-ion cells
1220
+
1221
+ cycle_numbers (np.ndarray): Cycle index for each data point
1222
+ - Length: N data points
1223
+ - dtype: int
1224
+ - Values: 1, 2, 3, ... (1-indexed)
1225
+ - Always inferred by pairing alternating charge/discharge runs in
1226
+ chronological order (ignoring half-cycle numbering in the file)
1227
+
1228
+ charge_mask (np.ndarray): Boolean mask indicating charging data points
1229
+ - Length: N data points
1230
+ - dtype: bool
1231
+ - True where current > 0 (charging), False otherwise
1232
+ - Determined by 'Step Type' column if present, otherwise inferred from:
1233
+ * Split capacity columns (Chg. Cap. vs DChg. Cap.)
1234
+ * Voltage trend (increasing = charge, decreasing = discharge)
1235
+ - Used to apply different colors/markers to charge vs discharge in plots
1236
+
1237
+ discharge_mask (np.ndarray): Boolean mask indicating discharging data points
1238
+ - Length: N data points
1239
+ - dtype: bool
1240
+ - Exactly inverse of charge_mask (True where charge_mask is False)
1241
+ - Used for styling discharge curves differently in plots
1242
+
1243
+ Raises:
1244
+ ValueError: If CSV/Excel file is malformed or missing required columns:
1245
+ - Empty file or missing header rows
1246
+ - No 'Voltage(V)' or 'Current(mA)' columns (unless summary file)
1247
+ - No valid capacity columns found
1248
+ - File encoding errors (though UTF-8 errors='ignore' provides tolerance)
1249
+ FileNotFoundError: If fname path does not exist
1250
+ openpyxl errors: If Excel file is corrupted or unsupported format
1251
+
1252
+ Examples:
1253
+ >>> # Read Neware CSV with specific capacity preference
1254
+ >>> cap, v, cyc, chg_mask, dchg_mask = read_ec_csv_file('neware_export.csv', prefer_specific=True)
1255
+ >>> print(f"Loaded {len(cap)} points across {cyc.max()} cycles")
1256
+ >>> print(f"Capacity range: {cap.min():.1f} - {cap.max():.1f} mAh/g")
1257
+ >>>
1258
+ >>> # Plot charge and discharge with different colors
1259
+ >>> import matplotlib.pyplot as plt
1260
+ >>> plt.plot(cap[chg_mask], v[chg_mask], 'r-', label='Charge')
1261
+ >>> plt.plot(cap[dchg_mask], v[dchg_mask], 'b-', label='Discharge')
1262
+ >>>
1263
+ >>> # Read Excel file with Chinese headers, prefer absolute capacity
1264
+ >>> cap, v, cyc, _, _ = read_ec_csv_file('循环数据.xlsx', prefer_specific=False)
1265
+ >>> # cap will be in mAh if '充电容量/mAh' columns present
1266
+
1267
+ Notes:
1268
+ - Chinese column name support: Automatically translates common Chinese headers
1269
+ (循环序号, 充电比容量/mAh/g, etc.) to English equivalents for processing
1270
+ - Memory efficiency: Uses numpy arrays for fast vectorized operations on large datasets
1271
+ - Fuzzy matching: Column detection is case-sensitive but exact match on stripped header text
1272
+ - Half-cycle files: Many cyclers export charge and discharge as separate segments;
1273
+ this function intelligently merges them into complete cycles for proper visualization
1274
+ - Summary vs detail: Automatically detects summary files (one row per cycle) vs
1275
+ detailed files (many points per cycle) and handles appropriately
1276
+ """
1277
+ import csv
1278
+ import os
1279
+
1280
+ # Check if file is Excel
1281
+ _, ext = os.path.splitext(fname)
1282
+ if ext.lower() in ['.xlsx', '.xls']:
1283
+ # Read Excel file
1284
+ header, rows = read_excel_to_csv_like(fname)
1285
+ else:
1286
+ header, rows, _ = _load_csv_header_and_rows(fname)
1287
+
1288
+ # Build fast name->index map (case-insensitive match on exact header text)
1289
+ name_to_idx = {h: i for i, h in enumerate(header)}
1290
+
1291
+ # Chinese to English column name mappings
1292
+ chinese_mappings = {
1293
+ '循环序号': 'Cycle Index',
1294
+ '充电比容量/mAh/g': 'Chg. Spec. Cap.(mAh/g)',
1295
+ '放电比容量/mAh/g': 'DChg. Spec. Cap.(mAh/g)',
1296
+ '充电容量/mAh': 'Chg. Cap.(mAh)',
1297
+ '放电容量/mAh': 'DChg. Cap.(mAh)',
1298
+ '效率/%': 'Efficiency(%)',
1299
+ '充电中压/V': 'Voltage(V)',
1300
+ '放电中压/V': 'Voltage(V)',
1301
+ '充电均压/V': 'Voltage(V)',
1302
+ '放电均压/V': 'Voltage(V)',
1303
+ }
1304
+
1305
+ # Add Chinese mappings to name_to_idx
1306
+ for i, h in enumerate(header):
1307
+ if h in chinese_mappings:
1308
+ eng_name = chinese_mappings[h]
1309
+ if eng_name not in name_to_idx:
1310
+ name_to_idx[eng_name] = i
1311
+
1312
+ def _find(name: str):
1313
+ return name_to_idx.get(name, None)
1314
+
1315
+ # Required columns
1316
+ v_idx = _find('Voltage(V)')
1317
+ i_idx = _find('Current(mA)')
1318
+ cyc_idx = _find('Cycle Index')
1319
+ step_type_idx = _find('Step Type') # Optional: explicitly indicates charge/discharge
1320
+
1321
+ # Capacity columns (absolute preferred unless prefer_specific True)
1322
+ cap_abs_idx = _find('Capacity(mAh)')
1323
+ cap_abs_chg_idx = _find('Chg. Cap.(mAh)')
1324
+ cap_abs_dch_idx = _find('DChg. Cap.(mAh)')
1325
+ cap_spec_idx = _find('Spec. Cap.(mAh/g)')
1326
+ cap_spec_chg_idx = _find('Chg. Spec. Cap.(mAh/g)')
1327
+ cap_spec_dch_idx = _find('DChg. Spec. Cap.(mAh/g)')
1328
+
1329
+ # Check if this is a summary file (has capacity columns but no voltage/current)
1330
+ has_capacity_cols = any([cap_abs_chg_idx, cap_abs_dch_idx, cap_spec_chg_idx, cap_spec_dch_idx])
1331
+ is_summary_file = has_capacity_cols and (v_idx is None or i_idx is None)
1332
+
1333
+ if not is_summary_file and (v_idx is None or i_idx is None):
1334
+ raise ValueError("CSV missing required 'Voltage(V)' or 'Current(mA)' columns")
1335
+
1336
+ use_specific = False
1337
+ # Decide which flavor to use
1338
+ if prefer_specific and (cap_spec_chg_idx is not None or cap_spec_idx is not None):
1339
+ use_specific = True
1340
+ elif not prefer_specific and (cap_abs_chg_idx is not None or cap_abs_idx is not None):
1341
+ use_specific = False
1342
+ elif cap_abs_chg_idx is None and cap_abs_idx is None and (cap_spec_idx is not None or cap_spec_chg_idx is not None):
1343
+ use_specific = True
1344
+ # else: fallback stays False (absolute) if both missing we'll error later
1345
+
1346
+ # Prepare arrays
1347
+ n = len(rows)
1348
+
1349
+ def _to_float(val: str) -> float:
1350
+ try:
1351
+ return float(val.strip()) if isinstance(val, str) else float(val)
1352
+ except Exception:
1353
+ return np.nan
1354
+
1355
+ # Special handling for summary files (charge/discharge capacities per cycle, no point-by-point data)
1356
+ if is_summary_file:
1357
+ # For summary files, create synthetic points: one charge point and one discharge point per cycle
1358
+ voltage = []
1359
+ current = []
1360
+ cycles = []
1361
+ cap_x = []
1362
+ is_charge_list = []
1363
+
1364
+ for k, row in enumerate(rows):
1365
+ if len(row) < len(header):
1366
+ row = row + [''] * (len(header) - len(row))
1367
+
1368
+ # Get cycle number
1369
+ cycle_num = 1
1370
+ if cyc_idx is not None:
1371
+ cval = _to_float(row[cyc_idx])
1372
+ cycle_num = int(cval) if not np.isnan(cval) and cval > 0 else 1
1373
+
1374
+ # Get charge and discharge capacities
1375
+ if use_specific:
1376
+ cap_chg = _to_float(row[cap_spec_chg_idx]) if cap_spec_chg_idx is not None else 0
1377
+ cap_dch = _to_float(row[cap_spec_dch_idx]) if cap_spec_dch_idx is not None else 0
1378
+ else:
1379
+ cap_chg = _to_float(row[cap_abs_chg_idx]) if cap_abs_chg_idx is not None else 0
1380
+ cap_dch = _to_float(row[cap_abs_dch_idx]) if cap_abs_dch_idx is not None else 0
1381
+
1382
+ # Create charge point
1383
+ if cap_chg > 0 and not np.isnan(cap_chg):
1384
+ voltage.append(3.5) # Synthetic voltage
1385
+ current.append(0.1) # Synthetic current
1386
+ cycles.append(cycle_num)
1387
+ cap_x.append(cap_chg)
1388
+ is_charge_list.append(True)
1389
+
1390
+ # Create discharge point
1391
+ if cap_dch > 0 and not np.isnan(cap_dch):
1392
+ voltage.append(2.5) # Synthetic voltage
1393
+ current.append(-0.1) # Synthetic current
1394
+ cycles.append(cycle_num)
1395
+ cap_x.append(cap_dch)
1396
+ is_charge_list.append(False)
1397
+
1398
+ voltage = np.array(voltage, dtype=float)
1399
+ current = np.array(current, dtype=float)
1400
+ cycles = np.array(cycles, dtype=int)
1401
+ cap_x = np.array(cap_x, dtype=float)
1402
+ is_charge = np.array(is_charge_list, dtype=bool)
1403
+
1404
+ charge_mask = is_charge
1405
+ discharge_mask = ~is_charge
1406
+
1407
+ return (cap_x, voltage, cycles, charge_mask, discharge_mask)
1408
+
1409
+ # Normal processing for point-by-point data
1410
+ voltage = np.empty(n, dtype=float)
1411
+ current = np.empty(n, dtype=float)
1412
+ cap_x = np.full(n, np.nan, dtype=float)
1413
+
1414
+ for k, row in enumerate(rows):
1415
+ # Ensure row has enough columns
1416
+ if len(row) < len(header):
1417
+ row = row + [''] * (len(header) - len(row))
1418
+ v = _to_float(row[v_idx])
1419
+ i = _to_float(row[i_idx])
1420
+ voltage[k] = v
1421
+ current[k] = i
1422
+ # Don't decide chg/dchg capacity here; we will assign after deriving direction
1423
+ # Fill combined capacity columns if present (used when split columns missing)
1424
+ if use_specific and cap_spec_idx is not None:
1425
+ cap_x[k] = _to_float(row[cap_spec_idx])
1426
+ elif (not use_specific) and cap_abs_idx is not None:
1427
+ cap_x[k] = _to_float(row[cap_abs_idx])
1428
+
1429
+ # ====================================================================================
1430
+ # CHARGE/DISCHARGE DETECTION ALGORITHM
1431
+ # ====================================================================================
1432
+ # This section determines which data points are during charging vs discharging.
1433
+ #
1434
+ # WHY THIS IS NEEDED:
1435
+ # Battery cycler files don't always clearly mark charge/discharge. Different cyclers
1436
+ # use different formats. We need a robust method that works with many file types.
1437
+ #
1438
+ # THREE-TIER PRIORITY SYSTEM (tries most reliable method first):
1439
+ # 1. Step Type column (highest priority) - explicit labels like "CC Chg", "CC DChg"
1440
+ # 2. Split capacity columns (medium priority) - separate charge/discharge capacity columns
1441
+ # 3. Voltage trend analysis (fallback) - infer from whether voltage is increasing/decreasing
1442
+ #
1443
+ # HOW IT WORKS:
1444
+ # We try each method in order. If method 1 works, we use it. If not, try method 2, etc.
1445
+ # This ensures we always get a result, even if the file format is unusual.
1446
+ # ====================================================================================
1447
+
1448
+ # Initialize arrays to track charge/discharge status for each data point
1449
+ is_charge = np.zeros(n, dtype=bool) # True = charging, False = discharging
1450
+ is_rest_segment = np.zeros(n, dtype=bool) # Track rest/CV periods (excluded from both masks)
1451
+ used_step_type = False # Flag: did we successfully use Step Type method?
1452
+ used_capacity_columns = False # Flag: did we successfully use capacity column method?
1453
+
1454
+ # ====================================================================================
1455
+ # PRIORITY 1: STEP TYPE COLUMN (Most Reliable Method)
1456
+ # ====================================================================================
1457
+ # Many cyclers have a "Step Type" column that explicitly labels each row:
1458
+ # - "CC Chg" = Constant Current Charge
1459
+ # - "CC DChg" = Constant Current Discharge
1460
+ # - "Rest" = Rest period (no current)
1461
+ # - "CV" = Constant Voltage (usually end of charge)
1462
+ #
1463
+ # This is the most reliable method because it's explicit - the cycler software tells
1464
+ # us directly what's happening. We parse the text to find keywords.
1465
+ # ====================================================================================
1466
+ if step_type_idx is not None:
1467
+ # Parse Step Type column to determine charge/discharge for each data point
1468
+ # We'll also track which rows are Rest/CV/other non-active steps (these get excluded)
1469
+ is_rest_or_other = np.zeros(n, dtype=bool)
1470
+
1471
+ # Loop through each row in the data file
1472
+ for k, row in enumerate(rows):
1473
+ # Ensure row has enough columns (some CSV files have inconsistent row lengths)
1474
+ if len(row) < len(header):
1475
+ row = row + [''] * (len(header) - len(row))
1476
+
1477
+ # Get the Step Type value for this row and convert to lowercase for case-insensitive matching
1478
+ step_type = str(row[step_type_idx]).strip().lower()
1479
+
1480
+ # STEP 1: Check if this is a Rest/CV/pause period (non-active step)
1481
+ # These are periods where the battery is not being charged or discharged
1482
+ # Examples: "Rest", "Pause", "CV" (Constant Voltage), "Wait"
1483
+ is_cv_only = (
1484
+ ('cv' in step_type)
1485
+ and ('chg' not in step_type)
1486
+ and ('dchg' not in step_type)
1487
+ and ('dis' not in step_type)
1488
+ )
1489
+ is_rest = ('rest' in step_type) or ('pause' in step_type) or is_cv_only
1490
+
1491
+ # STEP 2: Check for discharge indicators
1492
+ # IMPORTANT: Check discharge BEFORE charge because the word "discharge" contains "charge"
1493
+ # If we checked for "charge" first, we'd incorrectly match "discharge" as charge!
1494
+ # Discharge keywords: "dchg", "dischg", "discharge", "cc dchg", etc.
1495
+ is_dchg = 'dchg' in step_type or 'dischg' in step_type or step_type.startswith('dis')
1496
+
1497
+ # STEP 3: Check for charge indicators
1498
+ # Only check if it's NOT discharge and NOT rest (to avoid false matches)
1499
+ # Charge keywords: "chg", "charge", "cc chg", etc.
1500
+ is_chg = (not is_dchg) and (not is_rest) and (('chg' in step_type) or ('charge' in step_type))
1501
+
1502
+ # STEP 4: Assign charge/discharge status based on what we found
1503
+ if is_rest:
1504
+ # This is a rest period - mark it but don't include in charge/discharge masks
1505
+ is_rest_or_other[k] = True
1506
+ is_charge[k] = False # Will be excluded from both masks later
1507
+ elif is_chg:
1508
+ # This row is during charging
1509
+ is_charge[k] = True
1510
+ elif is_dchg:
1511
+ # This row is during discharging
1512
+ is_charge[k] = False
1513
+ else:
1514
+ # Unknown step type - inherit from previous row (assume same state continues)
1515
+ # This handles edge cases where step type might be missing or unrecognized
1516
+ is_charge[k] = is_charge[k-1] if k > 0 else False
1517
+
1518
+ # Mark that we successfully used the Step Type method
1519
+ used_step_type = True
1520
+
1521
+ # ====================================================================================
1522
+ # PRIORITY 2: SPLIT CAPACITY COLUMNS (Medium Reliability Method)
1523
+ # ====================================================================================
1524
+ # Some cyclers have separate columns for charge capacity and discharge capacity:
1525
+ # - "Chg. Spec. Cap.(mAh/g)" or "Chg. Cap.(mAh)" = charge capacity
1526
+ # - "DChg. Spec. Cap.(mAh/g)" or "DChg. Cap.(mAh)" = discharge capacity
1527
+ #
1528
+ # HOW IT WORKS:
1529
+ # During charging, only the charge capacity column has values (discharge column = 0)
1530
+ # During discharging, only the discharge capacity column has values (charge column = 0)
1531
+ # We check which column has a non-zero value to determine the state.
1532
+ #
1533
+ # WHY THIS WORKS:
1534
+ # Battery cyclers track capacity separately for charge and discharge. When you're
1535
+ # charging, the charge capacity increases but discharge capacity stays at 0 (or resets).
1536
+ # When discharging, the opposite happens.
1537
+ # ====================================================================================
1538
+ elif (use_specific and cap_spec_chg_idx is not None and cap_spec_dch_idx is not None) or \
1539
+ (not use_specific and cap_abs_chg_idx is not None and cap_abs_dch_idx is not None):
1540
+
1541
+ # STEP 1: Choose which capacity columns to use (specific vs absolute)
1542
+ # Specific capacity = mAh/g (normalized by active material mass)
1543
+ # Absolute capacity = mAh (total capacity)
1544
+ if use_specific:
1545
+ chg_col_idx = cap_spec_chg_idx # Charge specific capacity column index
1546
+ dch_col_idx = cap_spec_dch_idx # Discharge specific capacity column index
1547
+ else:
1548
+ chg_col_idx = cap_abs_chg_idx # Charge absolute capacity column index
1549
+ dch_col_idx = cap_abs_dch_idx # Discharge absolute capacity column index
1550
+
1551
+ # STEP 2: Read all capacity values from the file
1552
+ cap_chg_vals = np.empty(n, dtype=float) # Array to store charge capacity for each point
1553
+ cap_dch_vals = np.empty(n, dtype=float) # Array to store discharge capacity for each point
1554
+
1555
+ for k, row in enumerate(rows):
1556
+ # Ensure row has enough columns
1557
+ if len(row) < len(header):
1558
+ row = row + [''] * (len(header) - len(row))
1559
+ # Parse capacity values (convert string to float, handle errors)
1560
+ cap_chg_vals[k] = _to_float(row[chg_col_idx])
1561
+ cap_dch_vals[k] = _to_float(row[dch_col_idx])
1562
+
1563
+ # STEP 3: Determine charge/discharge based on which capacity column has values
1564
+ # Logic:
1565
+ # - If charge capacity > threshold AND discharge capacity ≈ 0 → CHARGING
1566
+ # - If discharge capacity > threshold AND charge capacity ≈ 0 → DISCHARGING
1567
+ # - If both are zero or both are non-zero → inherit from previous point (transition period)
1568
+ threshold = 1e-6 # Small threshold to avoid floating-point precision issues
1569
+
1570
+ for k in range(n):
1571
+ # Get capacity values, treating NaN as 0 (missing data)
1572
+ chg_val = cap_chg_vals[k] if not np.isnan(cap_chg_vals[k]) else 0.0
1573
+ dch_val = cap_dch_vals[k] if not np.isnan(cap_dch_vals[k]) else 0.0
1574
+
1575
+ # Decision logic:
1576
+ if chg_val > threshold and dch_val <= threshold:
1577
+ # Charge capacity is non-zero, discharge is zero → this is a charging point
1578
+ is_charge[k] = True
1579
+ elif dch_val > threshold and chg_val <= threshold:
1580
+ # Discharge capacity is non-zero, charge is zero → this is a discharging point
1581
+ is_charge[k] = False
1582
+ else:
1583
+ # Both zero or both non-zero (unusual case, might be transition or data error)
1584
+ # Inherit state from previous point (assume state continues)
1585
+ is_charge[k] = is_charge[k-1] if k > 0 else True # Default to charge if first point
1586
+
1587
+ # Mark that we successfully used the capacity column method
1588
+ used_capacity_columns = True
1589
+
1590
+ # ====================================================================================
1591
+ # PRIORITY 3: VOLTAGE TREND ANALYSIS (Fallback Method)
1592
+ # ====================================================================================
1593
+ # If neither Step Type nor split capacity columns are available, we infer charge/discharge
1594
+ # from the voltage trend:
1595
+ # - Voltage INCREASING (dV > 0) → CHARGING (battery voltage goes up as it charges)
1596
+ # - Voltage DECREASING (dV < 0) → DISCHARGING (battery voltage goes down as it discharges)
1597
+ #
1598
+ # WHY THIS WORKS:
1599
+ # During charging, the battery voltage increases (e.g., 3.0V → 4.2V for Li-ion)
1600
+ # During discharging, the battery voltage decreases (e.g., 4.2V → 3.0V for Li-ion)
1601
+ # This is a fundamental property of batteries.
1602
+ #
1603
+ # CHALLENGES:
1604
+ # - Voltage can have noise (small fluctuations)
1605
+ # - Voltage can have plateaus (flat regions where dV ≈ 0)
1606
+ # - We need to be robust to these issues
1607
+ # ====================================================================================
1608
+ else:
1609
+ # STEP 1: Prepare voltage data and calculate voltage differences
1610
+ v_clean = np.array(voltage, dtype=float) # Clean copy of voltage array
1611
+
1612
+ # Calculate voltage range to set a noise threshold
1613
+ # We need to distinguish real voltage changes from measurement noise
1614
+ v_min = np.nanmin(v_clean) if np.isfinite(v_clean).any() else 0.0
1615
+ v_max = np.nanmax(v_clean) if np.isfinite(v_clean).any() else 1.0
1616
+ v_span = max(1e-6, float(v_max - v_min)) # Total voltage range
1617
+
1618
+ # Set noise threshold: 0.01% of voltage range
1619
+ # Changes smaller than this are considered noise, not real voltage changes
1620
+ eps = max(1e-6, 1e-4 * v_span)
1621
+
1622
+ # Calculate voltage differences: dv[i] = voltage[i+1] - voltage[i]
1623
+ # This tells us if voltage is increasing (positive) or decreasing (negative)
1624
+ dv = np.diff(v_clean)
1625
+ dv = np.nan_to_num(dv, nan=0.0, posinf=0.0, neginf=0.0) # Handle NaN/inf values
1626
+
1627
+ # STEP 2: Determine initial direction (is the experiment starting with charge or discharge?)
1628
+ # We look at the first 500 points to find the first significant voltage change
1629
+ init_dir = None
1630
+ for d in dv[: min(500, dv.size)]:
1631
+ if abs(d) > eps: # Found a significant change (not noise)
1632
+ init_dir = (d > 0) # True if increasing (charge), False if decreasing (discharge)
1633
+ break
1634
+
1635
+ # If we couldn't determine initial direction from voltage, use current sign as fallback
1636
+ if init_dir is None:
1637
+ # Look for first non-zero current value
1638
+ # Positive current usually means charge, negative means discharge
1639
+ nz = None
1640
+ for i_val in current:
1641
+ if abs(i_val) > 1e-12 and np.isfinite(i_val):
1642
+ nz = (i_val >= 0) # True if positive current (charge)
1643
+ break
1644
+ # Default to charge if we still can't determine
1645
+ init_dir = True if nz is None else bool(nz)
1646
+
1647
+ # STEP 3: Assign charge/discharge status to each point based on voltage trend
1648
+ prev_dir = init_dir # Track previous direction (for handling plateaus)
1649
+
1650
+ for k in range(n):
1651
+ dir_set = None # Will be True for charge, False for discharge
1652
+
1653
+ # Strategy: Look backward first (prefer recent trend)
1654
+ # This keeps the last point of a segment with its segment
1655
+ if k > 0:
1656
+ db = dv[k-1] # Voltage difference from previous point
1657
+ if abs(db) > eps: # Significant change (not noise)
1658
+ dir_set = (db > 0) # True if voltage increased (charge)
1659
+
1660
+ # Fallback: If backward look didn't work, look forward
1661
+ # This handles the first point of a new segment
1662
+ if dir_set is None:
1663
+ j = k
1664
+ while j < n-1:
1665
+ d = dv[j] # Look at voltage difference ahead
1666
+ if abs(d) > eps: # Found significant change
1667
+ dir_set = (d > 0) # True if voltage will increase (charge)
1668
+ break
1669
+ j += 1
1670
+
1671
+ # If still couldn't determine (flat voltage plateau), inherit from previous point
1672
+ if dir_set is None:
1673
+ dir_set = prev_dir # Assume state continues
1674
+
1675
+ # Assign charge/discharge status
1676
+ is_charge[k] = dir_set
1677
+ prev_dir = dir_set # Remember for next iteration
1678
+
1679
+ # Build run-length encoding and optionally merge very short flicker runs
1680
+ # (Only apply smoothing when using voltage trend detection, not when using explicit methods)
1681
+ if not used_step_type and not used_capacity_columns:
1682
+ # Smoothing logic for voltage-trend-based detection
1683
+ run_starts = [0]
1684
+ for k in range(1, n):
1685
+ if is_charge[k] != is_charge[k-1]:
1686
+ run_starts.append(k)
1687
+ run_starts.append(n)
1688
+ # Merge runs shorter than 3 samples (or 0.2% of data length, whichever larger)
1689
+ min_len = max(3, int(0.002 * n))
1690
+ if len(run_starts) >= 3:
1691
+ keep_mask = is_charge.copy()
1692
+ new_is_charge = is_charge.copy()
1693
+ for r in range(len(run_starts)-1):
1694
+ a = run_starts[r]
1695
+ b = run_starts[r+1]
1696
+ if (b - a) < min_len:
1697
+ # Prefer to merge into previous run if exists; else next
1698
+ if r > 0:
1699
+ new_is_charge[a:b] = new_is_charge[a-1]
1700
+ elif r+1 < len(run_starts)-1:
1701
+ new_is_charge[a:b] = new_is_charge[b]
1702
+ is_charge = new_is_charge
1703
+
1704
+ # Compute final run starts for cycle inference
1705
+ run_starts = [0]
1706
+ for k in range(1, n):
1707
+ if is_charge[k] != is_charge[k-1]:
1708
+ run_starts.append(k)
1709
+ run_starts.append(n)
1710
+
1711
+ # Build masks from voltage trend
1712
+ # Exclude Rest/CV steps if they were identified
1713
+ if used_step_type and 'is_rest_or_other' in locals():
1714
+ charge_mask = is_charge & ~is_rest_or_other
1715
+ discharge_mask = ~is_charge & ~is_rest_or_other
1716
+ else:
1717
+ charge_mask = is_charge
1718
+ discharge_mask = ~is_charge
1719
+
1720
+ # Assign capacity per-point when split chg/dchg columns exist, using derived direction
1721
+ if use_specific and (cap_spec_chg_idx is not None and cap_spec_dch_idx is not None):
1722
+ for k, row in enumerate(rows):
1723
+ # Ensure row length
1724
+ if len(row) < len(header):
1725
+ row = row + [''] * (len(header) - len(row))
1726
+ cap_chg = _to_float(row[cap_spec_chg_idx])
1727
+ cap_dch = _to_float(row[cap_spec_dch_idx])
1728
+ cap_x[k] = cap_chg if is_charge[k] else cap_dch
1729
+ elif (not use_specific) and (cap_abs_chg_idx is not None and cap_abs_dch_idx is not None):
1730
+ for k, row in enumerate(rows):
1731
+ if len(row) < len(header):
1732
+ row = row + [''] * (len(header) - len(row))
1733
+ cap_chg = _to_float(row[cap_abs_chg_idx])
1734
+ cap_dch = _to_float(row[cap_abs_dch_idx])
1735
+ cap_x[k] = cap_chg if is_charge[k] else cap_dch
1736
+
1737
+ # If capacity column was missing entirely, raise
1738
+ if np.all(np.isnan(cap_x)):
1739
+ raise ValueError("No usable capacity columns found in CSV (looked for 'Capacity(mAh)' or 'Spec. Cap.(mAh/g)')")
1740
+
1741
+ # Replace NaNs in capacity by 0 to avoid plotting gaps within valid segments
1742
+ # but keep masks to split charge/discharge and cycles (NaN voltage gets dropped later by plotting logic)
1743
+ cap_x = np.nan_to_num(cap_x, nan=0.0)
1744
+
1745
+ cycles = _infer_cycles_from_masks(charge_mask, discharge_mask, n)
1746
+
1747
+ return cap_x, voltage, cycles, charge_mask, discharge_mask
1748
+
1749
+
1750
+ def read_ec_csv_dqdv_file(fname: str, prefer_specific: bool = True) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, str]:
1751
+ """Read differential capacity (dQ/dV) data from battery cycler CSV for phase transition analysis.
1752
+
1753
+ Differential capacity analysis (dQ/dV vs V plots) is used to identify electrochemical phase
1754
+ transitions and reaction mechanisms in battery materials. Peaks in dQ/dV correspond to flat
1755
+ voltage plateaus in the galvanostatic profile, revealing redox reactions and structural changes.
1756
+
1757
+ This function extracts pre-calculated dQ/dV data from cycler software exports (e.g., Neware),
1758
+ supporting both absolute (dQ/dV in mAh/V) and specific (dQm/dV in mAh g⁻¹ V⁻¹) units.
1759
+
1760
+ Differential Capacity Theory:
1761
+ -----------------------------
1762
+ dQ/dV = dQ/dt × dt/dV = I / (dV/dt)
1763
+
1764
+ Where:
1765
+ - Q: Capacity (charge passed, mAh or mAh/g)
1766
+ - V: Voltage (V)
1767
+ - I: Current (mA)
1768
+ - t: Time (hours)
1769
+
1770
+ Physical interpretation:
1771
+ - High dQ/dV: Voltage changes slowly with capacity (flat plateau) → sharp peak in plot
1772
+ - Low dQ/dV: Voltage changes rapidly with capacity (sloped region) → baseline in plot
1773
+ - Peaks identify specific phase transitions, intercalation stages, or side reactions
1774
+
1775
+ Common applications:
1776
+ - Identifying Li+ intercalation stages in graphite anodes (peaks around 0.1-0.2 V)
1777
+ - Detecting phase transitions in cathode materials (NMC, LFP, etc.)
1778
+ - Monitoring electrode degradation (peak shift/broadening over cycles)
1779
+ - Comparing electrode materials (peak positions reveal thermodynamics)
1780
+
1781
+ Expected CSV Format:
1782
+ --------------------
1783
+ Uses same two-line header format as read_ec_csv_file():
1784
+ - Line 1: Main column names
1785
+ - Line 2: Additional columns (first cell empty, merged with Line 1)
1786
+
1787
+ Required columns:
1788
+ - 'Voltage(V)': X-axis for dQ/dV plot (typical range 2.5-4.2 V for Li-ion)
1789
+ - At least one dQ/dV column:
1790
+ * 'dQm/dV(mAh/V.g)': Specific differential capacity (preferred for comparing materials)
1791
+ * 'dQ/dV(mAh/V)': Absolute differential capacity (for single cell analysis)
1792
+
1793
+ Optional columns (for charge/discharge classification):
1794
+ - 'Step Type': Explicit step identifier ('CC Chg', 'CC DChg', etc.)
1795
+ - 'Chg. Spec. Cap.(mAh/g)', 'DChg. Spec. Cap.(mAh/g)': Used to infer direction
1796
+ - 'Chg. Cap.(mAh)', 'DChg. Cap.(mAh)': Alternative capacity columns
1797
+ - 'Current(mA)': Used as fallback for voltage trend analysis
1798
+
1799
+ Charge/Discharge Detection Logic (Priority Order):
1800
+ ---------------------------------------------------
1801
+ Uses same robust detection algorithm as read_ec_csv_file():
1802
+
1803
+ 1. **Step Type column** (highest priority, most reliable):
1804
+ - Looks for keywords: 'Chg' or 'DChg' in 'Step Type' column
1805
+ - Example: 'CC Chg' → charge, 'CC DChg' → discharge
1806
+
1807
+ 2. **Split capacity columns** (medium priority, reliable for well-formatted exports):
1808
+ - If 'Chg. Spec. Cap.(mAh/g)' and 'DChg. Spec. Cap.(mAh/g)' both exist:
1809
+ * Non-zero Chg. Cap. → charge segment
1810
+ * Non-zero DChg. Cap. → discharge segment
1811
+ - Same logic for absolute capacity columns ('Chg. Cap.(mAh)', 'DChg. Cap.(mAh)')
1812
+
1813
+ 3. **Voltage trend analysis** (fallback, less reliable):
1814
+ - Calculate voltage differences: dV[i] = V[i+1] - V[i]
1815
+ - dV > 0 (voltage increasing) → charge
1816
+ - dV < 0 (voltage decreasing) → discharge
1817
+ - Includes smoothing to remove flicker from measurement noise
1818
+ - Merges runs shorter than 3 samples (or 0.2% of data) to avoid false transitions
1819
+
1820
+ Cycle Inference:
1821
+ ----------------
1822
+ Cycles are always inferred by pairing alternating charge/discharge segments in
1823
+ chronological order:
1824
+ - Segment 1 + Segment 2 = Cycle 1 (charge→discharge or discharge→charge)
1825
+ - Segment 3 + Segment 4 = Cycle 2
1826
+ - etc.
1827
+
1828
+ This guarantees Cycle 1 always contains the first two electrochemical halves regardless
1829
+ of how the cycler numbered its rows.
1830
+
1831
+ Args:
1832
+ fname: Path to CSV file containing differential capacity data.
1833
+ prefer_specific: Unit preference flag (default=True).
1834
+ - True: Use specific differential capacity (dQm/dV in mAh g⁻¹ V⁻¹) if available
1835
+ - False: Use absolute differential capacity (dQ/dV in mAh V⁻¹) if available
1836
+ - Fallback: Use whichever unit is present if preferred unit missing
1837
+ - Note: Specific capacity allows fair comparison between different active mass loadings
1838
+
1839
+ Returns:
1840
+ Tuple of 6 elements for differential capacity plotting:
1841
+ voltage (np.ndarray): X-axis voltage values in V
1842
+ - Length: N data points
1843
+ - Typical range: 2.5-4.2 V for Li-ion, 1.0-2.5 V for Li-S, 0-3.5 V for supercapacitors
1844
+ - Used as X-axis for dQ/dV plots (dQ/dV vs V)
1845
+
1846
+ dqdv (np.ndarray): Y-axis differential capacity values
1847
+ - Length: N data points
1848
+ - Units: mAh g⁻¹ V⁻¹ (specific) or mAh V⁻¹ (absolute) depending on prefer_specific
1849
+ - Typical values: 0-1000 mAh g⁻¹ V⁻¹ for graphite, 0-5000 for some cathodes
1850
+ - Peaks correspond to electrochemical reactions / phase transitions
1851
+ - May contain NaN values if cycler software couldn't calculate derivative
1852
+
1853
+ cycles (np.ndarray): Cycle number for each data point
1854
+ - Length: N data points
1855
+ - dtype: int
1856
+ - Values: 1, 2, 3, ... (1-indexed)
1857
+ - Used to separate and color-code different cycles in overlay plots
1858
+ - Always inferred by pairing alternating charge/discharge segments so Cycle 1
1859
+ contains the first two electrochemical halves
1860
+
1861
+ charge_mask (np.ndarray): Boolean mask indicating charging data points
1862
+ - Length: N data points
1863
+ - dtype: bool
1864
+ - True during charge (positive current), False during discharge
1865
+ - Determined by Step Type → split capacity columns → voltage trend (priority order)
1866
+ - Used to plot charge/discharge with different styles (e.g., solid vs dashed lines)
1867
+
1868
+ discharge_mask (np.ndarray): Boolean mask indicating discharging data points
1869
+ - Length: N data points
1870
+ - dtype: bool
1871
+ - Exactly inverse of charge_mask
1872
+ - Used for discharge-specific styling in plots
1873
+
1874
+ y_label (str): Formatted axis label for Y-axis with proper LaTeX notation
1875
+ - Value: 'dQm/dV (mAh g$^{-1}$ V$^{-1}$)' for specific capacity
1876
+ - Value: 'dQ/dV (mAh V$^{-1}$)' for absolute capacity
1877
+ - Includes proper superscript formatting for matplotlib rendering
1878
+ - Can be used directly as plt.ylabel() argument
1879
+
1880
+ Raises:
1881
+ ValueError: If CSV file is malformed or missing required data:
1882
+ - Empty file or missing header rows
1883
+ - No 'Voltage(V)' column found
1884
+ - No dQ/dV columns ('dQ/dV(mAh/V)' or 'dQm/dV(mAh/V.g)') found
1885
+ - All dQ/dV values are NaN (calculation failed in cycler software)
1886
+ FileNotFoundError: If fname path does not exist
1887
+ UnicodeDecodeError: Rarely raised due to errors='ignore' flag in file reading
1888
+
1889
+ Examples:
1890
+ >>> # Read specific differential capacity for material comparison
1891
+ >>> v, dqdv, cyc, chg, dchg, ylabel = read_ec_csv_dqdv_file('neware_dqdv.csv', prefer_specific=True)
1892
+ >>> print(f"Loaded {len(v)} points, {cyc.max()} cycles")
1893
+ >>> print(f"Y-axis label: {ylabel}") # 'dQm/dV (mAh g$^{-1}$ V$^{-1}$)'
1894
+ >>>
1895
+ >>> # Plot charge and discharge dQ/dV curves separately
1896
+ >>> import matplotlib.pyplot as plt
1897
+ >>> plt.figure()
1898
+ >>> plt.plot(v[chg], dqdv[chg], 'r-', label='Charge')
1899
+ >>> plt.plot(v[dchg], dqdv[dchg], 'b-', label='Discharge')
1900
+ >>> plt.xlabel('Voltage (V)')
1901
+ >>> plt.ylabel(ylabel)
1902
+ >>> plt.legend()
1903
+ >>>
1904
+ >>> # Overlay multiple cycles to show degradation
1905
+ >>> for cycle_num in range(1, 6): # First 5 cycles
1906
+ >>> mask = (cyc == cycle_num) & chg # Charge only
1907
+ >>> plt.plot(v[mask], dqdv[mask], label=f'Cycle {cycle_num}')
1908
+ >>> # Peak shifts/broadening indicate structural changes
1909
+ >>>
1910
+ >>> # Identify peaks (phase transitions)
1911
+ >>> from scipy.signal import find_peaks
1912
+ >>> chg_data = dqdv[chg & (cyc == 1)] # First charge cycle
1913
+ >>> chg_v = v[chg & (cyc == 1)]
1914
+ >>> peaks, _ = find_peaks(chg_data, height=100, distance=10)
1915
+ >>> print(f"Phase transition voltages: {chg_v[peaks]}")
1916
+
1917
+ Notes:
1918
+ - dQ/dV calculation quality depends on cycler settings:
1919
+ * Smaller voltage steps → better dV resolution → smoother dQ/dV
1920
+ * Typical recommendation: 5 mV voltage steps for high-quality dQ/dV
1921
+ * GITT or PITT techniques provide best dQ/dV resolution (equilibrium data)
1922
+ - Peaks in dQ/dV correspond to inflection points in Q vs V curve (second derivative = 0)
1923
+ - Charge and discharge dQ/dV often show hysteresis due to kinetic limitations
1924
+ - NaN handling: This function preserves NaN values from cycler software (bad derivatives)
1925
+ Plotting code should use `plt.plot(..., 'o-', markevery=lambda x: ~np.isnan(dqdv[x]))`
1926
+ - Memory: Uses numpy arrays for efficient handling of large datasets (100k+ points)
1927
+ - Cycle inference: Assumes alternating charge/discharge; unusual protocols may need manual cycle assignment
1928
+ """
1929
+ header, rows, _ = _load_csv_header_and_rows(fname)
1930
+
1931
+ name_to_idx = {h: i for i, h in enumerate(header)}
1932
+ def _find(name: str):
1933
+ return name_to_idx.get(name, None)
1934
+
1935
+ v_idx = _find('Voltage(V)')
1936
+ i_idx = _find('Current(mA)')
1937
+ dq_abs_idx = _find('dQ/dV(mAh/V)')
1938
+ dq_spec_idx = _find('dQm/dV(mAh/V.g)')
1939
+ step_type_idx = _find('Step Type') # Optional: explicitly indicates charge/discharge
1940
+
1941
+ # Also look for capacity columns to help determine charge/discharge
1942
+ cap_spec_chg_idx = _find('Chg. Spec. Cap.(mAh/g)')
1943
+ cap_spec_dch_idx = _find('DChg. Spec. Cap.(mAh/g)')
1944
+ cap_abs_chg_idx = _find('Chg. Cap.(mAh)')
1945
+ cap_abs_dch_idx = _find('DChg. Cap.(mAh)')
1946
+
1947
+ if v_idx is None:
1948
+ raise ValueError("CSV missing required 'Voltage(V)' column for dQ/dV plot")
1949
+ if dq_abs_idx is None and dq_spec_idx is None:
1950
+ raise ValueError("CSV missing dQ/dV columns: need 'dQ/dV(mAh/V)' or 'dQm/dV(mAh/V.g)'")
1951
+
1952
+ use_spec = False
1953
+ if prefer_specific and dq_spec_idx is not None:
1954
+ use_spec = True
1955
+ elif dq_abs_idx is not None:
1956
+ use_spec = False
1957
+ elif dq_spec_idx is not None:
1958
+ use_spec = True
1959
+
1960
+ y_label = r'dQm/dV (mAh g$^{-1}$ V$^{-1}$)' if use_spec else r'dQ/dV (mAh V$^{-1}$)'
1961
+ n = len(rows)
1962
+ voltage = np.empty(n, dtype=float)
1963
+ dqdv = np.empty(n, dtype=float)
1964
+ current = np.zeros(n, dtype=float)
1965
+ def _to_float(val: str) -> float:
1966
+ try:
1967
+ return float(val.strip()) if isinstance(val, str) else float(val)
1968
+ except Exception:
1969
+ return np.nan
1970
+
1971
+ for k, row in enumerate(rows):
1972
+ if len(row) < len(header):
1973
+ row = row + [''] * (len(header) - len(row))
1974
+ voltage[k] = _to_float(row[v_idx])
1975
+ if use_spec:
1976
+ dqdv[k] = _to_float(row[dq_spec_idx])
1977
+ else:
1978
+ dqdv[k] = _to_float(row[dq_abs_idx])
1979
+ if i_idx is not None:
1980
+ current[k] = _to_float(row[i_idx])
1981
+
1982
+ # --- Derive charge/discharge using same logic as GC mode ---
1983
+ # Priority 1: Use explicit Step Type column
1984
+ is_charge = np.zeros(n, dtype=bool)
1985
+ is_rest_segment = np.zeros(n, dtype=bool)
1986
+ used_step_type = False
1987
+ used_capacity_columns = False
1988
+
1989
+ if step_type_idx is not None:
1990
+ for k, row in enumerate(rows):
1991
+ if len(row) < len(header):
1992
+ row = row + [''] * (len(header) - len(row))
1993
+ step_type = str(row[step_type_idx]).strip().lower()
1994
+ is_cv_only = (
1995
+ ('cv' in step_type)
1996
+ and ('chg' not in step_type)
1997
+ and ('dchg' not in step_type)
1998
+ and ('dis' not in step_type)
1999
+ )
2000
+ is_rest = (
2001
+ ('rest' in step_type)
2002
+ or ('pause' in step_type)
2003
+ or ('wait' in step_type)
2004
+ or (step_type in {'idle'})
2005
+ or is_cv_only
2006
+ )
2007
+ if is_rest:
2008
+ is_rest_segment[k] = True
2009
+ is_charge[k] = is_charge[k-1] if k > 0 else True
2010
+ continue
2011
+ is_dchg = 'dchg' in step_type or 'dischg' in step_type or step_type.startswith('dis')
2012
+ is_chg = (not is_dchg) and (('chg' in step_type) or ('charge' in step_type))
2013
+ if is_chg:
2014
+ is_charge[k] = True
2015
+ elif is_dchg:
2016
+ is_charge[k] = False
2017
+ else:
2018
+ is_charge[k] = is_charge[k-1] if k > 0 else True
2019
+ used_step_type = True
2020
+
2021
+ # Priority 2: Use split charge/discharge capacity columns if available
2022
+ elif (cap_spec_chg_idx is not None and cap_spec_dch_idx is not None) or \
2023
+ (cap_abs_chg_idx is not None and cap_abs_dch_idx is not None):
2024
+ # Prefer specific capacity columns if they exist
2025
+ if cap_spec_chg_idx is not None and cap_spec_dch_idx is not None:
2026
+ chg_col_idx = cap_spec_chg_idx
2027
+ dch_col_idx = cap_spec_dch_idx
2028
+ else:
2029
+ chg_col_idx = cap_abs_chg_idx
2030
+ dch_col_idx = cap_abs_dch_idx
2031
+
2032
+ cap_chg_vals = np.empty(n, dtype=float)
2033
+ cap_dch_vals = np.empty(n, dtype=float)
2034
+
2035
+ for k, row in enumerate(rows):
2036
+ if len(row) < len(header):
2037
+ row = row + [''] * (len(header) - len(row))
2038
+ cap_chg_vals[k] = _to_float(row[chg_col_idx])
2039
+ cap_dch_vals[k] = _to_float(row[dch_col_idx])
2040
+
2041
+ # Determine charge/discharge based on which capacity is non-zero
2042
+ threshold = 1e-6
2043
+ for k in range(n):
2044
+ chg_val = cap_chg_vals[k] if not np.isnan(cap_chg_vals[k]) else 0.0
2045
+ dch_val = cap_dch_vals[k] if not np.isnan(cap_dch_vals[k]) else 0.0
2046
+
2047
+ if chg_val > threshold and dch_val <= threshold:
2048
+ is_charge[k] = True
2049
+ elif dch_val > threshold and chg_val <= threshold:
2050
+ is_charge[k] = False
2051
+ else:
2052
+ is_charge[k] = is_charge[k-1] if k > 0 else True
2053
+ used_capacity_columns = True
2054
+
2055
+ # Priority 3: Fallback to voltage trend
2056
+ else:
2057
+ v_clean = np.array(voltage, dtype=float)
2058
+ v_min = np.nanmin(v_clean) if np.isfinite(v_clean).any() else 0.0
2059
+ v_max = np.nanmax(v_clean) if np.isfinite(v_clean).any() else 1.0
2060
+ v_span = max(1e-6, float(v_max - v_min))
2061
+ eps = max(1e-6, 1e-4 * v_span)
2062
+ dv = np.diff(v_clean)
2063
+ dv = np.nan_to_num(dv, nan=0.0, posinf=0.0, neginf=0.0)
2064
+
2065
+ init_dir = None
2066
+ for d in dv[: min(500, dv.size)]:
2067
+ if abs(d) > eps:
2068
+ init_dir = (d > 0)
2069
+ break
2070
+ if init_dir is None:
2071
+ nz = None
2072
+ for i_val in current:
2073
+ if abs(i_val) > 1e-12 and np.isfinite(i_val):
2074
+ nz = (i_val >= 0)
2075
+ break
2076
+ init_dir = True if nz is None else bool(nz)
2077
+
2078
+ prev_dir = init_dir
2079
+ for k in range(n):
2080
+ dir_set = None
2081
+ # Prefer backward-looking difference to keep the last sample of a run with its run
2082
+ if k > 0:
2083
+ db = dv[k-1]
2084
+ if abs(db) > eps:
2085
+ dir_set = (db > 0)
2086
+ # Fallback: look forward to the next informative change
2087
+ if dir_set is None:
2088
+ j = k
2089
+ while j < n-1:
2090
+ d = dv[j]
2091
+ if abs(d) > eps:
2092
+ dir_set = (d > 0)
2093
+ break
2094
+ j += 1
2095
+ if dir_set is None:
2096
+ dir_set = prev_dir
2097
+ is_charge[k] = dir_set
2098
+ prev_dir = dir_set
2099
+
2100
+ charge_mask = is_charge & ~is_rest_segment
2101
+ discharge_mask = (~is_charge) & ~is_rest_segment
2102
+ inferred_cycles = _infer_cycles_from_masks(charge_mask, discharge_mask, n)
2103
+
2104
+ return voltage, dqdv, inferred_cycles, charge_mask, discharge_mask, y_label
2105
+
2106
+
2107
+ def _compute_dqdv_from_capacity(capacity: np.ndarray,
2108
+ voltage: np.ndarray,
2109
+ charge_mask: np.ndarray) -> np.ndarray:
2110
+ """Compute dQ/dV for contiguous segments without mixing charge/discharge transitions."""
2111
+ n = len(voltage)
2112
+ dqdv = np.full(n, np.nan, dtype=float)
2113
+ if n == 0:
2114
+ return dqdv
2115
+
2116
+ mask_int = charge_mask.astype(np.int8)
2117
+ boundaries = np.where(np.diff(mask_int) != 0)[0] + 1
2118
+ boundaries = np.concatenate(([0], boundaries, [n]))
2119
+
2120
+ for start, end in zip(boundaries[:-1], boundaries[1:]):
2121
+ seg_len = end - start
2122
+ if seg_len <= 1:
2123
+ dqdv[start:end] = 0.0
2124
+ continue
2125
+ v_seg = voltage[start:end]
2126
+ cap_seg = capacity[start:end]
2127
+ if np.allclose(v_seg, v_seg[0]):
2128
+ dqdv[start:end] = np.nan
2129
+ continue
2130
+ with np.errstate(divide='ignore', invalid='ignore'):
2131
+ grad = np.gradient(cap_seg, v_seg, edge_order=1)
2132
+ grad = np.asarray(grad, dtype=float)
2133
+ grad[~np.isfinite(grad)] = np.nan
2134
+ dqdv[start:end] = grad
2135
+ return dqdv
2136
+
2137
+
2138
+ def read_mpt_dqdv_file(fname: str,
2139
+ mass_mg: float,
2140
+ prefer_specific: bool = True) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, str]:
2141
+ """Compute dQ/dV curves from BioLogic .mpt galvanostatic data."""
2142
+ if mass_mg is None or mass_mg <= 0:
2143
+ raise ValueError("Mass loading (mg) is required and must be positive for dQ/dV from .mpt files. Use --mass.")
2144
+
2145
+ specific_capacity, voltage, cycles, charge_mask, discharge_mask = read_mpt_file(
2146
+ fname, mode='gc', mass_mg=mass_mg
2147
+ )
2148
+
2149
+ mass_g = float(mass_mg) / 1000.0
2150
+ absolute_capacity = specific_capacity * mass_g
2151
+
2152
+ dqdv_specific = _compute_dqdv_from_capacity(specific_capacity, voltage, charge_mask)
2153
+ dqdv_absolute = _compute_dqdv_from_capacity(absolute_capacity, voltage, charge_mask)
2154
+
2155
+ if prefer_specific:
2156
+ y_data = dqdv_specific
2157
+ y_label = r'dQm/dV (mAh g$^{-1}$ V$^{-1}$)'
2158
+ else:
2159
+ y_data = dqdv_absolute
2160
+ y_label = r'dQ/dV (mAh V$^{-1}$)'
2161
+
2162
+ return voltage, y_data, cycles, charge_mask, discharge_mask, y_label
2163
+
2164
+
2165
+ def is_cs_b_format(header: List[str]) -> bool:
2166
+ """Check if CSV has CS-B-001 format (has 'Capacity Density(mAh/g)' and 'dQ/dV(mAh/V)' columns)."""
2167
+ header_stripped = [h.strip().replace('\t', '') for h in header]
2168
+ has_cap_density = any('Capacity Density(mAh/g)' in h for h in header_stripped)
2169
+ has_dqdv = any('dQ/dV(mAh/V)' in h for h in header_stripped)
2170
+ return has_cap_density and has_dqdv
2171
+
2172
+
2173
+ def read_cs_b_csv_file(fname: str, mode: str = 'gc') -> Tuple:
2174
+ """Read CS-B-001.csv format with support for GC, CPC, and DQDV modes.
2175
+
2176
+ This function handles a specific CSV format with 3-line headers:
2177
+ - Line 1: Cycle-level headers
2178
+ - Line 2: Step-level headers
2179
+ - Line 3: Record-level headers (actual data columns)
2180
+
2181
+ Column mapping (in record header after prefix):
2182
+ - Current(mA) (column F in raw CSV)
2183
+ - Capacity Density(mAh/g) (column I in raw CSV)
2184
+ - dQ/dV(mAh/V) (column U in raw CSV)
2185
+
2186
+ For GC mode:
2187
+ - Skips resting points (current == 0)
2188
+ - Uses capacity from Capacity Density column
2189
+ - Resets capacity to 0 at start of each charge/discharge segment
2190
+ - Determines charge/discharge from current sign (positive = charge, negative = discharge)
2191
+ - If starts with discharge, first cycle is discharge then charge; if starts with charge, first cycle is charge then discharge
2192
+
2193
+ Args:
2194
+ fname: Path to CSV file
2195
+ mode: 'gc', 'cpc', or 'dqdv'
2196
+
2197
+ Returns:
2198
+ For GC mode: (capacity, voltage, cycles, charge_mask, discharge_mask)
2199
+ For CPC mode: (cycle_nums, cap_charge, cap_discharge, efficiency)
2200
+ For DQDV mode: (voltage, dqdv, cycles, charge_mask, discharge_mask, y_label)
2201
+ """
2202
+ header, rows, parsed = _load_csv_header_and_rows(fname)
2203
+
2204
+ # Build column index map (strip tabs and whitespace)
2205
+ name_to_idx = {}
2206
+ for i, h in enumerate(header):
2207
+ h_clean = h.strip().replace('\t', '')
2208
+ name_to_idx[h_clean] = i
2209
+ # Also try without cleaning for exact match
2210
+ name_to_idx[h] = i
2211
+
2212
+ def _find(name: str):
2213
+ # Try exact match first
2214
+ if name in name_to_idx:
2215
+ return name_to_idx[name]
2216
+ # Try cleaned version
2217
+ name_clean = name.strip().replace('\t', '')
2218
+ return name_to_idx.get(name_clean, None)
2219
+
2220
+ # Find required columns
2221
+ v_idx = _find('Voltage(V)')
2222
+ i_idx = _find('Current(mA)')
2223
+ cap_density_idx = _find('Capacity Density(mAh/g)')
2224
+ cap_abs_idx = _find('Capacity(mAh)')
2225
+ dqdv_idx = _find('dQ/dV(mAh/V)')
2226
+
2227
+ if v_idx is None:
2228
+ raise ValueError("CSV missing required 'Voltage(V)' column")
2229
+ if i_idx is None:
2230
+ raise ValueError("CSV missing required 'Current(mA)' column")
2231
+
2232
+ def _to_float(val: str) -> float:
2233
+ try:
2234
+ val_str = str(val).strip().replace('\t', '')
2235
+ return float(val_str) if val_str else np.nan
2236
+ except Exception:
2237
+ return np.nan
2238
+
2239
+ # Read all data
2240
+ n = len(rows)
2241
+ voltage = np.empty(n, dtype=float)
2242
+ current = np.empty(n, dtype=float)
2243
+ capacity_values = np.full(n, np.nan, dtype=float)
2244
+ dqdv = np.full(n, np.nan, dtype=float)
2245
+
2246
+ for k, row in enumerate(rows):
2247
+ if len(row) < len(header):
2248
+ row = row + [''] * (len(header) - len(row))
2249
+ voltage[k] = _to_float(row[v_idx])
2250
+ current[k] = _to_float(row[i_idx])
2251
+ if cap_density_idx is not None:
2252
+ capacity_values[k] = _to_float(row[cap_density_idx])
2253
+ elif cap_abs_idx is not None:
2254
+ capacity_values[k] = _to_float(row[cap_abs_idx])
2255
+ if dqdv_idx is not None:
2256
+ dqdv[k] = _to_float(row[dqdv_idx])
2257
+
2258
+ # Skip resting points (current == 0)
2259
+ non_rest_mask = np.abs(current) > 1e-10
2260
+ if not np.any(non_rest_mask):
2261
+ raise ValueError("No non-zero current data found (all points are resting)")
2262
+
2263
+ # Filter out resting points
2264
+ voltage = voltage[non_rest_mask]
2265
+ current = current[non_rest_mask]
2266
+ capacity_values = capacity_values[non_rest_mask]
2267
+ dqdv = dqdv[non_rest_mask] if dqdv_idx is not None else np.full(np.sum(non_rest_mask), np.nan)
2268
+ n_active = len(voltage)
2269
+
2270
+ # Determine charge/discharge from current sign
2271
+ # Positive current = charge, negative current = discharge
2272
+ is_charge = current > 0
2273
+ charge_mask = is_charge
2274
+ discharge_mask = ~is_charge
2275
+
2276
+ # Find segment boundaries (where charge/discharge changes)
2277
+ run_starts = [0]
2278
+ for k in range(1, n_active):
2279
+ if is_charge[k] != is_charge[k-1]:
2280
+ run_starts.append(k)
2281
+ run_starts.append(n_active)
2282
+
2283
+ # Determine if experiment starts with charge or discharge
2284
+ starts_with_charge = is_charge[0] if n_active > 0 else True
2285
+
2286
+ if mode == 'gc':
2287
+ # GC mode: capacity from column I, reset to 0 for each segment
2288
+ if cap_density_idx is None and cap_abs_idx is None:
2289
+ raise ValueError("CSV missing required capacity column for GC mode (need 'Capacity Density(mAh/g)' or 'Capacity(mAh)')")
2290
+
2291
+ capacity = np.zeros(n_active, dtype=float)
2292
+
2293
+ # Process each segment, resetting capacity to 0 at start
2294
+ for seg_idx in range(len(run_starts) - 1):
2295
+ start = run_starts[seg_idx]
2296
+ end = run_starts[seg_idx + 1]
2297
+
2298
+ # Get capacity values for this segment
2299
+ seg_cap_density = capacity_values[start:end]
2300
+
2301
+ # Find first valid (non-NaN) capacity value in segment
2302
+ first_valid_idx = None
2303
+ first_valid_val = None
2304
+ for i in range(len(seg_cap_density)):
2305
+ val = seg_cap_density[i]
2306
+ if not np.isnan(val) and np.isfinite(val):
2307
+ first_valid_idx = i
2308
+ first_valid_val = val
2309
+ break
2310
+
2311
+ if first_valid_val is not None:
2312
+ # Reset capacity: subtract the first value so segment starts at 0
2313
+ for i in range(start, end):
2314
+ idx_in_seg = i - start
2315
+ val = capacity_values[i]
2316
+ if not np.isnan(val) and np.isfinite(val):
2317
+ capacity[i] = val - first_valid_val
2318
+ else:
2319
+ # Use previous value or 0
2320
+ if idx_in_seg > 0:
2321
+ capacity[i] = capacity[i-1]
2322
+ else:
2323
+ capacity[i] = 0.0
2324
+
2325
+ # Infer cycles by pairing alternating charge/discharge segments
2326
+ # If starts with discharge, first cycle is discharge then charge
2327
+ # If starts with charge, first cycle is charge then discharge
2328
+ cycles = np.zeros(n_active, dtype=int)
2329
+ current_cycle = 1
2330
+ half_cycle = 0
2331
+
2332
+ for seg_idx in range(len(run_starts) - 1):
2333
+ start = run_starts[seg_idx]
2334
+ end = run_starts[seg_idx + 1]
2335
+ cycles[start:end] = current_cycle
2336
+ half_cycle += 1
2337
+
2338
+ # Complete cycle when we have both charge and discharge
2339
+ if half_cycle == 2:
2340
+ current_cycle += 1
2341
+ half_cycle = 0
2342
+
2343
+ return (capacity, voltage, cycles, charge_mask, discharge_mask)
2344
+
2345
+ elif mode == 'cpc':
2346
+ # CPC mode: extract end-of-segment capacities
2347
+ if cap_density_idx is None and cap_abs_idx is None:
2348
+ raise ValueError("CSV missing required capacity column for CPC mode (need 'Capacity Density(mAh/g)' or 'Capacity(mAh)')")
2349
+
2350
+ cyc_nums = []
2351
+ cap_charge = []
2352
+ cap_discharge = []
2353
+ eff_percent = []
2354
+
2355
+ current_cycle = 1
2356
+ half_cycle = 0
2357
+ cycle_charge_cap = np.nan
2358
+ cycle_discharge_cap = np.nan
2359
+
2360
+ for seg_idx in range(len(run_starts) - 1):
2361
+ start = run_starts[seg_idx]
2362
+ end = run_starts[seg_idx + 1]
2363
+
2364
+ # Get capacity values for this segment
2365
+ seg_cap = capacity_values[start:end]
2366
+
2367
+ # Find first and last valid capacity values
2368
+ first_valid = None
2369
+ last_valid = None
2370
+ for val in seg_cap:
2371
+ if not np.isnan(val) and np.isfinite(val):
2372
+ if first_valid is None:
2373
+ first_valid = val
2374
+ last_valid = val
2375
+
2376
+ # Reset capacity relative to segment start
2377
+ end_cap = 0.0
2378
+ if first_valid is not None and last_valid is not None:
2379
+ end_cap = last_valid - first_valid
2380
+
2381
+ if is_charge[start]:
2382
+ cycle_charge_cap = end_cap
2383
+ else:
2384
+ cycle_discharge_cap = end_cap
2385
+
2386
+ half_cycle += 1
2387
+ if half_cycle == 2:
2388
+ # Completed one full cycle
2389
+ cyc_nums.append(current_cycle)
2390
+ cap_charge.append(cycle_charge_cap)
2391
+ cap_discharge.append(cycle_discharge_cap)
2392
+
2393
+ # Calculate efficiency
2394
+ if np.isfinite(cycle_charge_cap) and cycle_charge_cap > 0 and np.isfinite(cycle_discharge_cap):
2395
+ eff = (cycle_discharge_cap / cycle_charge_cap) * 100.0
2396
+ else:
2397
+ eff = np.nan
2398
+ eff_percent.append(eff)
2399
+
2400
+ # Reset for next cycle
2401
+ current_cycle += 1
2402
+ half_cycle = 0
2403
+ cycle_charge_cap = np.nan
2404
+ cycle_discharge_cap = np.nan
2405
+
2406
+ return (np.array(cyc_nums, dtype=float),
2407
+ np.array(cap_charge, dtype=float),
2408
+ np.array(cap_discharge, dtype=float),
2409
+ np.array(eff_percent, dtype=float))
2410
+
2411
+ elif mode == 'dqdv':
2412
+ # DQDV mode: use dQ/dV from column U
2413
+ if dqdv_idx is None:
2414
+ raise ValueError("CSV missing required 'dQ/dV(mAh/V)' column for DQDV mode")
2415
+
2416
+ # Infer cycles by pairing alternating charge/discharge segments
2417
+ cycles = np.zeros(n_active, dtype=int)
2418
+ current_cycle = 1
2419
+ half_cycle = 0
2420
+
2421
+ for seg_idx in range(len(run_starts) - 1):
2422
+ start = run_starts[seg_idx]
2423
+ end = run_starts[seg_idx + 1]
2424
+ cycles[start:end] = current_cycle
2425
+ half_cycle += 1
2426
+
2427
+ # Complete cycle when we have both charge and discharge
2428
+ if half_cycle == 2:
2429
+ current_cycle += 1
2430
+ half_cycle = 0
2431
+
2432
+ y_label = r'dQ/dV (mAh V$^{-1}$)'
2433
+
2434
+ return (voltage, dqdv, cycles, charge_mask, discharge_mask, y_label)
2435
+
2436
+ else:
2437
+ raise ValueError(f"Unknown mode '{mode}'. Use 'gc', 'cpc', or 'dqdv'.")
2438
+
2439
+
2440
+ def read_csv_time_voltage(fname: str) -> Tuple[np.ndarray, np.ndarray]:
2441
+ """Read time (in hours) and voltage from a cycler CSV file.
2442
+
2443
+ Args:
2444
+ fname: Path to CSV file with columns like 'Total Time' and 'Voltage(V)'
2445
+
2446
+ Returns:
2447
+ (time_h, voltage) where time_h is in hours and voltage in volts
2448
+ """
2449
+ header, rows, _ = _load_csv_header_and_rows(fname)
2450
+
2451
+ # Build column index map
2452
+ name_to_idx = {h: i for i, h in enumerate(header)}
2453
+
2454
+ # Look for time and voltage columns (try multiple common names)
2455
+ time_idx = None
2456
+ for name in ['Total Time', 'Time', 'time/s', 'Time(s)', 'Test Time(s)']:
2457
+ if name in name_to_idx:
2458
+ time_idx = name_to_idx[name]
2459
+ break
2460
+
2461
+ voltage_idx = None
2462
+ for name in ['Voltage(V)', 'Voltage', 'Ewe/V', 'Voltage/V']:
2463
+ if name in name_to_idx:
2464
+ voltage_idx = name_to_idx[name]
2465
+ break
2466
+
2467
+ if time_idx is None:
2468
+ raise ValueError(f"CSV '{fname}' missing time column. Expected 'Total Time', 'Time', 'time/s', etc.")
2469
+ if voltage_idx is None:
2470
+ raise ValueError(f"CSV '{fname}' missing voltage column. Expected 'Voltage(V)', 'Voltage', 'Ewe/V', etc.")
2471
+
2472
+ # Parse data
2473
+ n = len(rows)
2474
+ time_data = np.empty(n, dtype=float)
2475
+ voltage_data = np.empty(n, dtype=float)
2476
+
2477
+ def _parse_time(val: str) -> float:
2478
+ """Parse time from string, handling HH:MM:SS format and numeric seconds."""
2479
+ if isinstance(val, (int, float)):
2480
+ return float(val)
2481
+ val = str(val).strip()
2482
+ # Try HH:MM:SS format
2483
+ if ':' in val:
2484
+ parts = val.split(':')
2485
+ try:
2486
+ if len(parts) == 3: # HH:MM:SS
2487
+ h, m, s = float(parts[0]), float(parts[1]), float(parts[2])
2488
+ return h * 3600 + m * 60 + s
2489
+ elif len(parts) == 2: # MM:SS
2490
+ m, s = float(parts[0]), float(parts[1])
2491
+ return m * 60 + s
2492
+ except:
2493
+ pass
2494
+ # Try as plain number (seconds)
2495
+ try:
2496
+ return float(val)
2497
+ except:
2498
+ return np.nan
2499
+
2500
+ def _to_float(val: str) -> float:
2501
+ try:
2502
+ return float(str(val).strip()) if val else np.nan
2503
+ except:
2504
+ return np.nan
2505
+
2506
+ for k, row in enumerate(rows):
2507
+ if len(row) < len(header):
2508
+ row = row + [''] * (len(header) - len(row))
2509
+ time_data[k] = _parse_time(row[time_idx])
2510
+ voltage_data[k] = _to_float(row[voltage_idx])
2511
+
2512
+ # Convert time from seconds to hours
2513
+ time_h = time_data / 3600.0
2514
+
2515
+ # Remove NaN values
2516
+ mask = ~(np.isnan(time_h) | np.isnan(voltage_data))
2517
+ return time_h[mask], voltage_data[mask]
2518
+
2519
+
2520
+ def read_mpt_time_voltage(fname: str) -> Tuple[np.ndarray, np.ndarray]:
2521
+ """Read time (in hours) and voltage from a BioLogic .mpt file.
2522
+
2523
+ Args:
2524
+ fname: Path to .mpt file
2525
+
2526
+ Returns:
2527
+ (time_h, voltage) where time_h is in hours and voltage in volts
2528
+ """
2529
+ import re
2530
+
2531
+ # Read header to find number of header lines
2532
+ header_lines = 0
2533
+ with open(fname, 'r', encoding='utf-8', errors='ignore') as f:
2534
+ first_line = f.readline().strip()
2535
+ if not first_line.startswith('EC-Lab ASCII FILE'):
2536
+ raise ValueError(f"Not a valid EC-Lab .mpt file: {fname}")
2537
+
2538
+ for line in f:
2539
+ if line.startswith('Nb header lines'):
2540
+ match = re.search(r'Nb header lines\s*:\s*(\d+)', line)
2541
+ if match:
2542
+ header_lines = int(match.group(1))
2543
+ break
2544
+ if header_lines == 0:
2545
+ raise ValueError(f"Could not find header line count in {fname}")
2546
+
2547
+ # Read data
2548
+ data_lines = []
2549
+ column_names = []
2550
+
2551
+ with open(fname, 'r', encoding='utf-8', errors='ignore') as f:
2552
+ # Skip header lines
2553
+ for i in range(header_lines - 1):
2554
+ f.readline()
2555
+
2556
+ # Read column names
2557
+ header_line = f.readline().strip()
2558
+ column_names = [col.strip() for col in header_line.split('\t')]
2559
+
2560
+ # Read data lines
2561
+ for line in f:
2562
+ line = line.strip()
2563
+ if not line:
2564
+ continue
2565
+ try:
2566
+ # Replace comma decimal separator with period
2567
+ values = [float(val.replace(',', '.')) for val in line.split('\t')]
2568
+ if len(values) == len(column_names):
2569
+ data_lines.append(values)
2570
+ except ValueError:
2571
+ continue
2572
+
2573
+ if not data_lines:
2574
+ raise ValueError(f"No valid data found in {fname}")
2575
+
2576
+ # Convert to numpy array
2577
+ data = np.array(data_lines)
2578
+ col_map = {name: i for i, name in enumerate(column_names)}
2579
+
2580
+ # Look for time column (try multiple names)
2581
+ time_idx = None
2582
+ for name in ['time/s', 'Time/s', 'time', 'Time']:
2583
+ if name in col_map:
2584
+ time_idx = col_map[name]
2585
+ break
2586
+
2587
+ # Look for voltage column
2588
+ voltage_idx = None
2589
+ for name in ['Ewe/V', 'Voltage/V', 'Voltage', 'Ewe']:
2590
+ if name in col_map:
2591
+ voltage_idx = col_map[name]
2592
+ break
2593
+
2594
+ if time_idx is None:
2595
+ raise ValueError(f"MPT file '{fname}' missing time column")
2596
+ if voltage_idx is None:
2597
+ raise ValueError(f"MPT file '{fname}' missing voltage column")
2598
+
2599
+ time_s = data[:, time_idx]
2600
+ voltage = data[:, voltage_idx]
2601
+
2602
+ # Convert time from seconds to hours
2603
+ time_h = time_s / 3600.0
2604
+
2605
+ # Remove NaN values
2606
+ mask = ~(np.isnan(time_h) | np.isnan(voltage))
2607
+ return time_h[mask], voltage[mask]