ekgtools 1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ekgtools-1.0/PKG-INFO ADDED
@@ -0,0 +1,156 @@
1
+ Metadata-Version: 2.4
2
+ Name: ekgtools
3
+ Version: 1.0
4
+ Summary: ECG XML parsing, fast loading, and plotting utilities for research pipelines
5
+ Author: WYeung, Mat00, FigLing
6
+ License: MIT
7
+ Keywords: ECG,cardiology,parsing,PyTorch,signal-processing
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: numpy>=1.24
16
+ Requires-Dist: pandas>=2.2
17
+ Requires-Dist: scipy>=1.11
18
+ Requires-Dist: scikit-learn>=1.3
19
+ Requires-Dist: xmltodict>=0.13
20
+ Requires-Dist: wfdb>=4.1
21
+ Requires-Dist: tqdm>=4.65
22
+ Requires-Dist: matplotlib>=3.7
23
+ Requires-Dist: torch>=2.1
24
+ Requires-Dist: torchvision>=0.16
25
+ Requires-Dist: fsspec>=2023.12
26
+ Requires-Dist: s3fs>=2023.12
27
+ Requires-Dist: Pillow>=10.2
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.4; extra == "dev"
30
+ Requires-Dist: pytest-cov>=4.1; extra == "dev"
31
+ Requires-Dist: ruff>=0.5; extra == "dev"
32
+ Requires-Dist: black>=24.3; extra == "dev"
33
+ Requires-Dist: mypy>=1.8; extra == "dev"
34
+ Requires-Dist: ipykernel>=6.29; extra == "dev"
35
+
36
+ # ekgtools
37
+
38
+ Utilities for working with multi-format ECG waveforms in research and production settings.
39
+
40
+ ## Features
41
+ - Unified parser interface for Philips iECG, GE MUSE, ELI, WFDB, and MIMIC records.
42
+ - Centralised configuration with per-call overrides for filter settings and signal scaling.
43
+ - `ECGDataset` that reads from local storage or S3/MinIO buckets with transparent caching.
44
+ - Plotting helpers for rapid visual inspection of 12-lead rhythms.
45
+
46
+ ## Installation
47
+
48
+ ```bash
49
+ pip install ekgtools
50
+ ```
51
+
52
+ Python 3.10+ is required. Install the appropriate PyTorch/TorchVision wheels for your platform separately if needed.
53
+
54
+ ## Quick Start
55
+
56
+ ### Parse an ECG file
57
+
58
+ ```python
59
+ from ekgtools.parser import ECGParser
60
+
61
+ parser = ECGParser(
62
+ path="/data/ecg/philips_001.xml",
63
+ config_overrides={
64
+ "bandpass_lower": 1.0,
65
+ "bandpass_higher": 40.0,
66
+ "median_filter_size": 3,
67
+ },
68
+ )
69
+
70
+ ecg = parser.float_array # (12, n_samples)
71
+ metadata = parser.text_data # demographics & machine measurements
72
+ ```
73
+
74
+ ### Local datasets for PyTorch
75
+
76
+ ```python
77
+ import pandas as pd
78
+ from torch.utils.data import DataLoader
79
+
80
+ from ekgtools.dataset import ECGDataset
81
+
82
+ df = pd.read_csv("/data/labels.csv")
83
+
84
+ dataset = ECGDataset(
85
+ directory="/data/ecg",
86
+ df=df,
87
+ filename_column="filename",
88
+ label_column="label",
89
+ specify_leads=["I", "II", "V1", "V2", "V3", "V4", "V5", "V6"],
90
+ fast=True,
91
+ )
92
+
93
+ loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)
94
+
95
+ signals, labels, complete = next(iter(loader))
96
+ ```
97
+
98
+ ### Stream from S3 with caching
99
+
100
+ ```python
101
+ from ekgtools.dataset import ECGDataset
102
+ from ekgtools.s3 import S3Setup
103
+
104
+ s3 = S3Setup(
105
+ bucket="ecg-data",
106
+ prefix="records",
107
+ endpoint_url="https://minio.example.com",
108
+ access_key_id="ACCESS",
109
+ secret_key="SECRET",
110
+ region_name="us-east-1",
111
+ use_ssl=True,
112
+ )
113
+
114
+ dataset = ECGDataset(
115
+ directory="public/12lead", # prefix under the bucket
116
+ df=df,
117
+ filename_column="filename",
118
+ label_column="label",
119
+ storage="s3",
120
+ s3_setup=s3,
121
+ cache_dir="~/.cache/ekgtools",
122
+ )
123
+ ```
124
+
125
+ ### Plotting helpers
126
+
127
+ ```python
128
+ import numpy as np
129
+ from ekgtools.plot import plot
130
+
131
+ signals = np.random.randn(12, 5000)
132
+ fig = plot(signals, sample_rate=500)
133
+ fig.savefig("preview.png", dpi=200)
134
+ ```
135
+
136
+ ## Configuration
137
+
138
+ Default values live in `ekgtools/config.py` and can be overridden per parser invocation via `config_overrides`. To reuse overrides across multiple parsers:
139
+
140
+ ```python
141
+ from ekgtools.config import resolve_config
142
+
143
+ custom_config = resolve_config(overrides={"bandpass_lower": 0.67})
144
+ parser = ECGParser(path, config=custom_config)
145
+ ```
146
+
147
+ ## Development
148
+
149
+ ```bash
150
+ pip install -e .[dev]
151
+ pytest
152
+ ```
153
+
154
+ ## License
155
+
156
+ MIT
ekgtools-1.0/README.md ADDED
@@ -0,0 +1,121 @@
1
+ # ekgtools
2
+
3
+ Utilities for working with multi-format ECG waveforms in research and production settings.
4
+
5
+ ## Features
6
+ - Unified parser interface for Philips iECG, GE MUSE, ELI, WFDB, and MIMIC records.
7
+ - Centralised configuration with per-call overrides for filter settings and signal scaling.
8
+ - `ECGDataset` that reads from local storage or S3/MinIO buckets with transparent caching.
9
+ - Plotting helpers for rapid visual inspection of 12-lead rhythms.
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ pip install ekgtools
15
+ ```
16
+
17
+ Python 3.10+ is required. Install the appropriate PyTorch/TorchVision wheels for your platform separately if needed.
18
+
19
+ ## Quick Start
20
+
21
+ ### Parse an ECG file
22
+
23
+ ```python
24
+ from ekgtools.parser import ECGParser
25
+
26
+ parser = ECGParser(
27
+ path="/data/ecg/philips_001.xml",
28
+ config_overrides={
29
+ "bandpass_lower": 1.0,
30
+ "bandpass_higher": 40.0,
31
+ "median_filter_size": 3,
32
+ },
33
+ )
34
+
35
+ ecg = parser.float_array # (12, n_samples)
36
+ metadata = parser.text_data # demographics & machine measurements
37
+ ```
38
+
39
+ ### Local datasets for PyTorch
40
+
41
+ ```python
42
+ import pandas as pd
43
+ from torch.utils.data import DataLoader
44
+
45
+ from ekgtools.dataset import ECGDataset
46
+
47
+ df = pd.read_csv("/data/labels.csv")
48
+
49
+ dataset = ECGDataset(
50
+ directory="/data/ecg",
51
+ df=df,
52
+ filename_column="filename",
53
+ label_column="label",
54
+ specify_leads=["I", "II", "V1", "V2", "V3", "V4", "V5", "V6"],
55
+ fast=True,
56
+ )
57
+
58
+ loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)
59
+
60
+ signals, labels, complete = next(iter(loader))
61
+ ```
62
+
63
+ ### Stream from S3 with caching
64
+
65
+ ```python
66
+ from ekgtools.dataset import ECGDataset
67
+ from ekgtools.s3 import S3Setup
68
+
69
+ s3 = S3Setup(
70
+ bucket="ecg-data",
71
+ prefix="records",
72
+ endpoint_url="https://minio.example.com",
73
+ access_key_id="ACCESS",
74
+ secret_key="SECRET",
75
+ region_name="us-east-1",
76
+ use_ssl=True,
77
+ )
78
+
79
+ dataset = ECGDataset(
80
+ directory="public/12lead", # prefix under the bucket
81
+ df=df,
82
+ filename_column="filename",
83
+ label_column="label",
84
+ storage="s3",
85
+ s3_setup=s3,
86
+ cache_dir="~/.cache/ekgtools",
87
+ )
88
+ ```
89
+
90
+ ### Plotting helpers
91
+
92
+ ```python
93
+ import numpy as np
94
+ from ekgtools.plot import plot
95
+
96
+ signals = np.random.randn(12, 5000)
97
+ fig = plot(signals, sample_rate=500)
98
+ fig.savefig("preview.png", dpi=200)
99
+ ```
100
+
101
+ ## Configuration
102
+
103
+ Default values live in `ekgtools/config.py` and can be overridden per parser invocation via `config_overrides`. To reuse overrides across multiple parsers:
104
+
105
+ ```python
106
+ from ekgtools.config import resolve_config
107
+
108
+ custom_config = resolve_config(overrides={"bandpass_lower": 0.67})
109
+ parser = ECGParser(path, config=custom_config)
110
+ ```
111
+
112
+ ## Development
113
+
114
+ ```bash
115
+ pip install -e .[dev]
116
+ pytest
117
+ ```
118
+
119
+ ## License
120
+
121
+ MIT
@@ -0,0 +1,19 @@
1
+ from importlib.metadata import PackageNotFoundError, version
2
+
3
+ try:
4
+ __version__ = version("ekgtools")
5
+ except PackageNotFoundError:
6
+ __version__ = "0.0.0"
7
+
8
+ # Bring top-level API into the package namespace
9
+ from .dataset import ECGDataset
10
+ from .parser import ECGParser
11
+ from .plot import plot, plot_one
12
+
13
+ __all__ = [
14
+ "ECGDataset",
15
+ "ECGParser",
16
+ "plot",
17
+ "plot_one",
18
+ "__version__",
19
+ ]
@@ -0,0 +1,21 @@
1
+ DEFAULT_CONFIG = {
2
+ 'sig_arr_length': 5000,
3
+ 'bandpass_lower': 0.5,
4
+ 'bandpass_higher': 100,
5
+ 'median_filter_size': 5,
6
+ 'default_8_leads': ["I", "II", "V1", "V2", "V3", "V4", "V5", "V6"],
7
+ 'standard_12_leads': ["I", "II", "III", "V1", "V2", "V3", "V4", "V5", "V6", "aVR", "aVL", "aVF"]
8
+ }
9
+
10
+
11
+ def resolve_config(user_config: dict | None = None, overrides: dict | None = None) -> dict:
12
+ """Merge defaults with optional user-provided config and overrides."""
13
+ merged = DEFAULT_CONFIG.copy()
14
+ if user_config:
15
+ merged.update(user_config)
16
+ if overrides:
17
+ merged.update({k: v for k, v in overrides.items() if v is not None})
18
+ return merged
19
+
20
+
21
+ config = resolve_config()
@@ -0,0 +1,271 @@
1
+ import os
2
+ import posixpath
3
+ import tempfile
4
+ import warnings
5
+ from io import BytesIO
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import scipy
10
+ import torch
11
+ import torch.nn.functional as F
12
+ from matplotlib import pyplot as plt
13
+ from torch.utils.data import Dataset
14
+ import torchvision.transforms as T
15
+ from PIL import Image
16
+
17
+ from ekgtools.config import config
18
+ from ekgtools.parser import ECGParser
19
+ from ekgtools.plot import plot, plot_one
20
+ from ekgtools.s3 import S3Setup
21
+
22
+
23
+ class ECGDataset(Dataset):
24
+
25
+ def __init__(self,
26
+ directory: str,
27
+ df: pd.DataFrame,
28
+ filename_column: str,
29
+ label_column: str,
30
+ output_image: bool = False,
31
+ metadata_fields: list = ['age','gender'],
32
+ add_meta: bool = False,
33
+ specify_leads: list = config['standard_12_leads'],
34
+ sig_arr_length: int = 5000,
35
+ scale_method: str = 'standard',
36
+ order: list = ['channels','length'],
37
+ fast: bool = True,
38
+ storage: str = 'local',
39
+ s3_setup: S3Setup | None = None,
40
+ cache_dir: str | None = None):
41
+ """
42
+ Args:
43
+ directory: path to folder containing ECG XML files
44
+ df: dataframe containing filenames, labels, and optional metadata
45
+ filename_column: name of column in df with file names
46
+ label_column: name of column in df with labels
47
+ metadata_fields: optional list of metadata fields (e.g., ['age', 'gender'])
48
+ specify_leads: list of leads to extract (e.g., ['I', 'II', 'V1'])
49
+ sig_arr_length: target length to resample ECG signals
50
+ bandpass_lower: lower frequency for bandpass filtering
51
+ bandpass_higher: upper frequency for bandpass filtering
52
+ median_filter_size: window size for median filter
53
+ denoise_steps: list of denoising steps (default: ['bandpass', 'median'])
54
+ num_class: Number of output classes (if None, inferred)
55
+ add_meta: Whether to use metadata
56
+ storage: 'local' or 's3' data source
57
+ s3_setup: configuration helper when storage='s3'
58
+ cache_dir: local cache root for S3 downloads
59
+ """
60
+ self.base_directory = directory
61
+
62
+ before = len(df)
63
+ self.df = df.dropna(subset=[label_column])
64
+ after = len(self.df)
65
+ if before - after:
66
+ warnings.warn(f"Dropped {before - after} rows due to missing labels!")
67
+
68
+ self.filename_column_idx = self.df.columns.get_loc(filename_column)
69
+ self.label_column = label_column
70
+
71
+ self.num_class = len(self.df[label_column].unique())
72
+
73
+ self.add_meta = metadata_fields if add_meta else []
74
+ for meta in metadata_fields:
75
+ if meta not in ['age','gender']:
76
+ raise NotImplementedError('Only age and gender can be added as metadata!')
77
+ self.output_image = output_image
78
+ self.order = order
79
+ self.specify_leads = specify_leads
80
+ self.sig_arr_length = sig_arr_length
81
+ self.scale_method = scale_method
82
+ self.fast = fast
83
+ if self.fast and add_meta:
84
+ raise ValueError('Fast parsing does not allow for metadata!')
85
+
86
+ storage_mode = storage.lower()
87
+ if storage_mode not in {'local', 's3'}:
88
+ raise ValueError("storage must be either 'local' or 's3'")
89
+ self.storage_mode = storage_mode
90
+
91
+ if self.storage_mode == 'local':
92
+ self.local_root = os.path.abspath(os.path.expanduser(directory))
93
+ self.cache_dir = None
94
+ self.s3_setup = None
95
+ self.s3_prefix = None
96
+ else:
97
+ if s3_setup is None:
98
+ raise ValueError("s3_setup must be provided when storage='s3'")
99
+ self.s3_setup = s3_setup
100
+ prefix = directory.strip('/\\')
101
+ prefix = posixpath.normpath(prefix) if prefix else ''
102
+ self.s3_prefix = '' if prefix in {'.', ''} else prefix
103
+ cache_root = cache_dir or os.path.join(tempfile.gettempdir(), 'ekgtools_cache')
104
+ self.cache_dir = os.path.abspath(os.path.expanduser(cache_root))
105
+ os.makedirs(self.cache_dir, exist_ok=True)
106
+ self.local_root = None
107
+
108
+ @staticmethod
109
+ def create_1d_array(ecg_dict,
110
+ sig_arr_length,
111
+ specify_leads):
112
+ leads = []
113
+ for lead in specify_leads:
114
+ if lead not in ecg_dict.keys():
115
+ print(f"Lead {lead} missing — filling with zeros")
116
+ signal = np.zeros(sig_arr_length) # zero-filled array
117
+ else:
118
+ signal = ecg_dict[lead]
119
+ if len(signal) != sig_arr_length:
120
+ signal = scipy.signal.resample(signal,sig_arr_length) #resample using Fourier method
121
+ leads.append(signal)
122
+ output = np.array(leads)
123
+ complete_leads = all(lead in ecg_dict for lead in specify_leads)
124
+ return output, complete_leads, None
125
+
126
+ def _build_s3_relative_key(self, filename: str) -> str:
127
+ if self.storage_mode != 's3':
128
+ raise RuntimeError("_build_s3_relative_key called while storage_mode is not 's3'")
129
+
130
+ clean_name = str(filename).lstrip('/\\')
131
+ clean_name = clean_name.replace('\\', '/')
132
+ parts = []
133
+ if self.s3_prefix:
134
+ parts.append(self.s3_prefix)
135
+ parts.append(clean_name)
136
+ relative_key = posixpath.normpath(posixpath.join(*parts)) if parts else posixpath.normpath(clean_name)
137
+ if relative_key.startswith('..'):
138
+ raise ValueError(f"Filename '{filename}' resolves outside the configured S3 prefix")
139
+ return '' if relative_key == '.' else relative_key
140
+
141
+ def _download_s3_key(self, relative_key: str, ensure_companion: bool = True) -> str:
142
+ if self.storage_mode != 's3':
143
+ raise RuntimeError("_download_s3_key called while storage_mode is not 's3'")
144
+
145
+ fs = self.s3_setup._get_fs()
146
+ local_path = os.path.join(self.cache_dir, relative_key.replace('/', os.sep)) if relative_key else self.cache_dir
147
+ if not relative_key:
148
+ raise ValueError("Relative key must not be empty for S3 downloads")
149
+
150
+ if not os.path.exists(local_path):
151
+ directory = os.path.dirname(local_path)
152
+ if directory:
153
+ os.makedirs(directory, exist_ok=True)
154
+ key_parts = []
155
+ if self.s3_setup.prefix:
156
+ key_parts.append(self.s3_setup.prefix.strip('/'))
157
+ key_parts.append(relative_key)
158
+ s3_key = posixpath.join(*key_parts) if key_parts else ''
159
+ source = f"{self.s3_setup.bucket}/{s3_key}" if s3_key else self.s3_setup.bucket
160
+ try:
161
+ fs.get(source, local_path)
162
+ except FileNotFoundError as exc:
163
+ raise FileNotFoundError(f"S3 object not found: s3://{source}") from exc
164
+
165
+ if ensure_companion:
166
+ base, ext = os.path.splitext(relative_key)
167
+ ext = ext.lower()
168
+ if ext in {'.hea', '.dat'}:
169
+ companion_key = base + ('.dat' if ext == '.hea' else '.hea')
170
+ companion_local = os.path.join(self.cache_dir, companion_key.replace('/', os.sep))
171
+ if not os.path.exists(companion_local):
172
+ self._download_s3_key(companion_key, ensure_companion=False)
173
+
174
+ return local_path
175
+
176
+ def _resolve_path(self, filename: str) -> str:
177
+ if self.storage_mode == 'local':
178
+ candidate = os.path.abspath(os.path.join(self.local_root, filename))
179
+ if os.path.commonpath([self.local_root, candidate]) != self.local_root:
180
+ raise ValueError(f"Filename '{filename}' resolves outside the dataset directory")
181
+ return candidate
182
+
183
+ relative_key = self._build_s3_relative_key(filename)
184
+ return self._download_s3_key(relative_key)
185
+
186
+ def __len__(self):
187
+ return len(self.df)
188
+
189
+ def __getitem__(self, idx):
190
+ filename = str(self.df.iloc[idx, self.filename_column_idx])
191
+ path = self._resolve_path(filename)
192
+
193
+ ep = None
194
+ try:
195
+ ep = ECGParser(path,fast=self.fast)
196
+ ecg_dict = ep.lead
197
+ except Exception as e:
198
+ print(f"[ERROR] Failed to parse ECG file {path}: {e}")
199
+ raise
200
+
201
+ # Output
202
+ if self.output_image:
203
+ complete_leads = True
204
+ if len(self.specify_leads) == 1:
205
+ fig = plot_one(ep.lead[self.specify_leads[0]])
206
+ elif len(self.specify_leads) == 12:
207
+ fig = plot(ep.float_array, sample_rate=500, lead_index=self.specify_leads, columns=4)
208
+ else:
209
+ raise NotImplementedError('ECGDataset is only configured for 12 lead and single lead plots!')
210
+
211
+ # Save plot to memory buffer
212
+ buf = BytesIO()
213
+ plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
214
+ buf.seek(0)
215
+ plt.close(fig)
216
+
217
+ # Convert to image tensor
218
+ img = Image.open(buf).convert('RGB')
219
+ output = T.ToTensor()(img) # shape: [3, H, W]
220
+ else:
221
+ # Lead derivation
222
+ if "V3-V2" in self.specify_leads:
223
+ output_v3, complete_v3, _ = self.create_1d_array(ecg_dict, self.sig_arr_length, ["V3"])
224
+ output_v2, complete_v2, _ = self.create_1d_array(ecg_dict, self.sig_arr_length, ["V2"])
225
+ output = output_v3 - output_v2
226
+ complete_leads = complete_v3 and complete_v2
227
+ elif "V4-V3" in self.specify_leads:
228
+ output_v3, complete_v3, _ = self.create_1d_array(ecg_dict, self.sig_arr_length, ["V3"])
229
+ output_v4, complete_v4, _ = self.create_1d_array(ecg_dict, self.sig_arr_length, ["V4"])
230
+ output = output_v4 - output_v3
231
+ complete_leads = complete_v3 and complete_v4
232
+ else:
233
+ output, complete_leads, _ = self.create_1d_array(ecg_dict, self.sig_arr_length, self.specify_leads)
234
+
235
+ if output is None or output.size == 0:
236
+ raise ValueError(f"[ERROR] Output is empty for file: {path}; ecg_dict: {ecg_dict}")
237
+
238
+ if len(self.specify_leads) == 1:
239
+ output = torch.tensor(output, dtype=torch.float32).unsqueeze(0)
240
+ else:
241
+ output = torch.tensor(output, dtype=torch.float32)
242
+ if self.order == ['length','channels']:
243
+ output = output.permute(1, 0).contiguous()
244
+
245
+ # Label
246
+ label = self.df[self.label_column].iloc[idx]
247
+ label = torch.tensor([label], dtype=torch.int64)
248
+ if self.num_class > 1:
249
+ label = F.one_hot(label, num_classes=self.num_class).squeeze().to(torch.float32)
250
+
251
+ # Add Metadata and return
252
+ if self.add_meta:
253
+ age = ep.text_data.get('age',0)
254
+ gender = ep.text_data.get('gender',0)
255
+ age_onehot = self.bucketize_age_np(age).reshape(-1, 1)
256
+ gender = np.array(gender).reshape(-1, 1)
257
+ meta = np.concatenate([gender, age_onehot], axis=0).squeeze()
258
+ meta = torch.tensor(meta, dtype=torch.float32)
259
+ return output, meta, label, complete_leads
260
+ else:
261
+ return output, label, complete_leads
262
+
263
+ def bucketize_age_np(self, age_array):
264
+ """
265
+ age_array: (B,) numpy array of float, age values
266
+ return: (B, 6) one-hot encoded age buckets
267
+ """
268
+ # Define the cut:[40, 50, 60, 70, 80]
269
+ buckets = np.digitize(age_array, bins=[40, 50, 60, 70, 80]) # returns 0~5
270
+ one_hot = np.eye(6)[buckets] # (B, 6) one-hot encoding
271
+ return one_hot
@@ -0,0 +1,85 @@
1
+ import os
2
+ import xmltodict
3
+
4
+ from ekgtools.config import resolve_config
5
+ from ekgtools.parser.eli import ELIXMLParser
6
+ from ekgtools.parser.iecg import IECGXMLParser
7
+ from ekgtools.parser.mimic import MIMICParser
8
+ from ekgtools.parser.muse import MUSEXMLParser
9
+ from ekgtools.parser.wfdb import WFDBParser
10
+
11
+ class ECGParser:
12
+
13
+ def __init__(self, path, config: dict | None = None, config_overrides: dict | None = None, waveform_type: str = 'full', normalize: bool = False, scale_method: str | None = 'standard', fast: bool = True):
14
+ effective_config = resolve_config(config, config_overrides)
15
+ self.infer_filetype(path)
16
+ if self.filetype == 'iecg':
17
+ self.obj = IECGXMLParser(config=effective_config, waveform_type=waveform_type, scale_method=scale_method)
18
+ with open(path, 'rb') as fd:
19
+ self.obj.xml_dict = xmltodict.parse(fd.read().decode('utf8'))
20
+ if fast:
21
+ self.obj.parse_fast()
22
+ else:
23
+ self.obj.parse()
24
+ elif self.filetype == 'muse':
25
+ self.obj = MUSEXMLParser(config=effective_config, waveform_type=waveform_type, scale_method=scale_method)
26
+ with open(path, 'rb') as fd:
27
+ self.obj.xml_dict = xmltodict.parse(fd.read().decode('utf8'))
28
+ if fast:
29
+ self.obj.parse_fast()
30
+ else:
31
+ self.obj.parse()
32
+ elif self.filetype == 'eli':
33
+ self.obj = ELIXMLParser(config=effective_config, waveform_type=waveform_type, scale_method=scale_method)
34
+ with open(path, 'rb') as fd:
35
+ self.obj.xml_dict = xmltodict.parse(fd.read().decode('utf8'))
36
+ if fast:
37
+ self.obj.parse_fast()
38
+ else:
39
+ self.obj.parse()
40
+ elif self.filetype == 'mimic':
41
+ self.obj = MIMICParser(config=effective_config, waveform_type=waveform_type, normalize=normalize)
42
+ with open(path, 'r') as fd:
43
+ self.obj.load_from_json(fd.read())
44
+ elif self.filetype == 'wfdb':
45
+ self.obj = WFDBParser(config=effective_config, waveform_type=waveform_type, scale_method=scale_method)
46
+ self.obj.parse(path)
47
+ else:
48
+ raise ValueError(f"Unsupported ECG file type for path: {path}")
49
+
50
+ def __getattr__(self, name):
51
+ return getattr(self.obj, name)
52
+
53
+ def __dir__(self):
54
+ return list(set(dir(type(self)) + dir(self.obj)))
55
+
56
+ def infer_filetype(self, path):
57
+ _, ext = os.path.splitext(path)
58
+ ext = ext.lower()
59
+
60
+ if ext in {'.hea', '.dat'}:
61
+ self.filetype = 'wfdb'
62
+ return
63
+
64
+ wfdb_base_exists = os.path.exists(f"{path}.hea") or os.path.exists(f"{path}.dat")
65
+ if wfdb_base_exists:
66
+ self.filetype = 'wfdb'
67
+ return
68
+ if ext == '.json':
69
+ self.filetype = 'mimic'
70
+ return
71
+
72
+ if not os.path.exists(path):
73
+ self.filetype = 'unknown'
74
+ return
75
+
76
+ with open(path, 'r', encoding='utf8', errors='ignore') as f:
77
+ file_str = f.read()
78
+ if 'MuseInfo' in file_str:
79
+ self.filetype = 'muse'
80
+ elif 'PhilipsECG' in file_str:
81
+ self.filetype = 'iecg'
82
+ elif 'EliEcg' in file_str:
83
+ self.filetype = 'eli'
84
+ else:
85
+ self.filetype = 'unknown'