ekgtools 1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ekgtools-1.0/PKG-INFO +156 -0
- ekgtools-1.0/README.md +121 -0
- ekgtools-1.0/ekgtools/__init__.py +19 -0
- ekgtools-1.0/ekgtools/config.py +21 -0
- ekgtools-1.0/ekgtools/dataset.py +271 -0
- ekgtools-1.0/ekgtools/parser/__init__.py +85 -0
- ekgtools-1.0/ekgtools/parser/base.py +156 -0
- ekgtools-1.0/ekgtools/parser/eli.py +153 -0
- ekgtools-1.0/ekgtools/parser/iecg.py +142 -0
- ekgtools-1.0/ekgtools/parser/mimic.py +355 -0
- ekgtools-1.0/ekgtools/parser/muse.py +135 -0
- ekgtools-1.0/ekgtools/parser/wfdb.py +121 -0
- ekgtools-1.0/ekgtools/plot.py +192 -0
- ekgtools-1.0/ekgtools/s3.py +64 -0
- ekgtools-1.0/ekgtools/utils/__init__.py +3 -0
- ekgtools-1.0/ekgtools/utils/eli.py +367 -0
- ekgtools-1.0/ekgtools/utils/iecg.py +464 -0
- ekgtools-1.0/ekgtools/utils/iecg14bit.py +123 -0
- ekgtools-1.0/ekgtools/utils/wfdb_func.py +158 -0
- ekgtools-1.0/ekgtools.egg-info/PKG-INFO +156 -0
- ekgtools-1.0/ekgtools.egg-info/SOURCES.txt +24 -0
- ekgtools-1.0/ekgtools.egg-info/dependency_links.txt +1 -0
- ekgtools-1.0/ekgtools.egg-info/requires.txt +21 -0
- ekgtools-1.0/ekgtools.egg-info/top_level.txt +1 -0
- ekgtools-1.0/pyproject.toml +57 -0
- ekgtools-1.0/setup.cfg +4 -0
ekgtools-1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ekgtools
|
|
3
|
+
Version: 1.0
|
|
4
|
+
Summary: ECG XML parsing, fast loading, and plotting utilities for research pipelines
|
|
5
|
+
Author: WYeung, Mat00, FigLing
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: ECG,cardiology,parsing,PyTorch,signal-processing
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
|
|
13
|
+
Requires-Python: >=3.10
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: numpy>=1.24
|
|
16
|
+
Requires-Dist: pandas>=2.2
|
|
17
|
+
Requires-Dist: scipy>=1.11
|
|
18
|
+
Requires-Dist: scikit-learn>=1.3
|
|
19
|
+
Requires-Dist: xmltodict>=0.13
|
|
20
|
+
Requires-Dist: wfdb>=4.1
|
|
21
|
+
Requires-Dist: tqdm>=4.65
|
|
22
|
+
Requires-Dist: matplotlib>=3.7
|
|
23
|
+
Requires-Dist: torch>=2.1
|
|
24
|
+
Requires-Dist: torchvision>=0.16
|
|
25
|
+
Requires-Dist: fsspec>=2023.12
|
|
26
|
+
Requires-Dist: s3fs>=2023.12
|
|
27
|
+
Requires-Dist: Pillow>=10.2
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=7.4; extra == "dev"
|
|
30
|
+
Requires-Dist: pytest-cov>=4.1; extra == "dev"
|
|
31
|
+
Requires-Dist: ruff>=0.5; extra == "dev"
|
|
32
|
+
Requires-Dist: black>=24.3; extra == "dev"
|
|
33
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
34
|
+
Requires-Dist: ipykernel>=6.29; extra == "dev"
|
|
35
|
+
|
|
36
|
+
# ekgtools
|
|
37
|
+
|
|
38
|
+
Utilities for working with multi-format ECG waveforms in research and production settings.
|
|
39
|
+
|
|
40
|
+
## Features
|
|
41
|
+
- Unified parser interface for Philips iECG, GE MUSE, ELI, WFDB, and MIMIC records.
|
|
42
|
+
- Centralised configuration with per-call overrides for filter settings and signal scaling.
|
|
43
|
+
- `ECGDataset` that reads from local storage or S3/MinIO buckets with transparent caching.
|
|
44
|
+
- Plotting helpers for rapid visual inspection of 12-lead rhythms.
|
|
45
|
+
|
|
46
|
+
## Installation
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install ekgtools
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Python 3.10+ is required. Install the appropriate PyTorch/TorchVision wheels for your platform separately if needed.
|
|
53
|
+
|
|
54
|
+
## Quick Start
|
|
55
|
+
|
|
56
|
+
### Parse an ECG file
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from ekgtools.parser import ECGParser
|
|
60
|
+
|
|
61
|
+
parser = ECGParser(
|
|
62
|
+
path="/data/ecg/philips_001.xml",
|
|
63
|
+
config_overrides={
|
|
64
|
+
"bandpass_lower": 1.0,
|
|
65
|
+
"bandpass_higher": 40.0,
|
|
66
|
+
"median_filter_size": 3,
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
ecg = parser.float_array # (12, n_samples)
|
|
71
|
+
metadata = parser.text_data # demographics & machine measurements
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Local datasets for PyTorch
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
import pandas as pd
|
|
78
|
+
from torch.utils.data import DataLoader
|
|
79
|
+
|
|
80
|
+
from ekgtools.dataset import ECGDataset
|
|
81
|
+
|
|
82
|
+
df = pd.read_csv("/data/labels.csv")
|
|
83
|
+
|
|
84
|
+
dataset = ECGDataset(
|
|
85
|
+
directory="/data/ecg",
|
|
86
|
+
df=df,
|
|
87
|
+
filename_column="filename",
|
|
88
|
+
label_column="label",
|
|
89
|
+
specify_leads=["I", "II", "V1", "V2", "V3", "V4", "V5", "V6"],
|
|
90
|
+
fast=True,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)
|
|
94
|
+
|
|
95
|
+
signals, labels, complete = next(iter(loader))
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Stream from S3 with caching
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from ekgtools.dataset import ECGDataset
|
|
102
|
+
from ekgtools.s3 import S3Setup
|
|
103
|
+
|
|
104
|
+
s3 = S3Setup(
|
|
105
|
+
bucket="ecg-data",
|
|
106
|
+
prefix="records",
|
|
107
|
+
endpoint_url="https://minio.example.com",
|
|
108
|
+
access_key_id="ACCESS",
|
|
109
|
+
secret_key="SECRET",
|
|
110
|
+
region_name="us-east-1",
|
|
111
|
+
use_ssl=True,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
dataset = ECGDataset(
|
|
115
|
+
directory="public/12lead", # prefix under the bucket
|
|
116
|
+
df=df,
|
|
117
|
+
filename_column="filename",
|
|
118
|
+
label_column="label",
|
|
119
|
+
storage="s3",
|
|
120
|
+
s3_setup=s3,
|
|
121
|
+
cache_dir="~/.cache/ekgtools",
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Plotting helpers
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
import numpy as np
|
|
129
|
+
from ekgtools.plot import plot
|
|
130
|
+
|
|
131
|
+
signals = np.random.randn(12, 5000)
|
|
132
|
+
fig = plot(signals, sample_rate=500)
|
|
133
|
+
fig.savefig("preview.png", dpi=200)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Configuration
|
|
137
|
+
|
|
138
|
+
Default values live in `ekgtools/config.py` and can be overridden per parser invocation via `config_overrides`. To reuse overrides across multiple parsers:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from ekgtools.config import resolve_config
|
|
142
|
+
|
|
143
|
+
custom_config = resolve_config(overrides={"bandpass_lower": 0.67})
|
|
144
|
+
parser = ECGParser(path, config=custom_config)
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Development
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
pip install -e .[dev]
|
|
151
|
+
pytest
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## License
|
|
155
|
+
|
|
156
|
+
MIT
|
ekgtools-1.0/README.md
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# ekgtools
|
|
2
|
+
|
|
3
|
+
Utilities for working with multi-format ECG waveforms in research and production settings.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
- Unified parser interface for Philips iECG, GE MUSE, ELI, WFDB, and MIMIC records.
|
|
7
|
+
- Centralised configuration with per-call overrides for filter settings and signal scaling.
|
|
8
|
+
- `ECGDataset` that reads from local storage or S3/MinIO buckets with transparent caching.
|
|
9
|
+
- Plotting helpers for rapid visual inspection of 12-lead rhythms.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install ekgtools
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Python 3.10+ is required. Install the appropriate PyTorch/TorchVision wheels for your platform separately if needed.
|
|
18
|
+
|
|
19
|
+
## Quick Start
|
|
20
|
+
|
|
21
|
+
### Parse an ECG file
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from ekgtools.parser import ECGParser
|
|
25
|
+
|
|
26
|
+
parser = ECGParser(
|
|
27
|
+
path="/data/ecg/philips_001.xml",
|
|
28
|
+
config_overrides={
|
|
29
|
+
"bandpass_lower": 1.0,
|
|
30
|
+
"bandpass_higher": 40.0,
|
|
31
|
+
"median_filter_size": 3,
|
|
32
|
+
},
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
ecg = parser.float_array # (12, n_samples)
|
|
36
|
+
metadata = parser.text_data # demographics & machine measurements
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Local datasets for PyTorch
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import pandas as pd
|
|
43
|
+
from torch.utils.data import DataLoader
|
|
44
|
+
|
|
45
|
+
from ekgtools.dataset import ECGDataset
|
|
46
|
+
|
|
47
|
+
df = pd.read_csv("/data/labels.csv")
|
|
48
|
+
|
|
49
|
+
dataset = ECGDataset(
|
|
50
|
+
directory="/data/ecg",
|
|
51
|
+
df=df,
|
|
52
|
+
filename_column="filename",
|
|
53
|
+
label_column="label",
|
|
54
|
+
specify_leads=["I", "II", "V1", "V2", "V3", "V4", "V5", "V6"],
|
|
55
|
+
fast=True,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)
|
|
59
|
+
|
|
60
|
+
signals, labels, complete = next(iter(loader))
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Stream from S3 with caching
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from ekgtools.dataset import ECGDataset
|
|
67
|
+
from ekgtools.s3 import S3Setup
|
|
68
|
+
|
|
69
|
+
s3 = S3Setup(
|
|
70
|
+
bucket="ecg-data",
|
|
71
|
+
prefix="records",
|
|
72
|
+
endpoint_url="https://minio.example.com",
|
|
73
|
+
access_key_id="ACCESS",
|
|
74
|
+
secret_key="SECRET",
|
|
75
|
+
region_name="us-east-1",
|
|
76
|
+
use_ssl=True,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
dataset = ECGDataset(
|
|
80
|
+
directory="public/12lead", # prefix under the bucket
|
|
81
|
+
df=df,
|
|
82
|
+
filename_column="filename",
|
|
83
|
+
label_column="label",
|
|
84
|
+
storage="s3",
|
|
85
|
+
s3_setup=s3,
|
|
86
|
+
cache_dir="~/.cache/ekgtools",
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Plotting helpers
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
import numpy as np
|
|
94
|
+
from ekgtools.plot import plot
|
|
95
|
+
|
|
96
|
+
signals = np.random.randn(12, 5000)
|
|
97
|
+
fig = plot(signals, sample_rate=500)
|
|
98
|
+
fig.savefig("preview.png", dpi=200)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Configuration
|
|
102
|
+
|
|
103
|
+
Default values live in `ekgtools/config.py` and can be overridden per parser invocation via `config_overrides`. To reuse overrides across multiple parsers:
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from ekgtools.config import resolve_config
|
|
107
|
+
|
|
108
|
+
custom_config = resolve_config(overrides={"bandpass_lower": 0.67})
|
|
109
|
+
parser = ECGParser(path, config=custom_config)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Development
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
pip install -e .[dev]
|
|
116
|
+
pytest
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## License
|
|
120
|
+
|
|
121
|
+
MIT
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
__version__ = version("ekgtools")
|
|
5
|
+
except PackageNotFoundError:
|
|
6
|
+
__version__ = "0.0.0"
|
|
7
|
+
|
|
8
|
+
# Bring top-level API into the package namespace
|
|
9
|
+
from .dataset import ECGDataset
|
|
10
|
+
from .parser import ECGParser
|
|
11
|
+
from .plot import plot, plot_one
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"ECGDataset",
|
|
15
|
+
"ECGParser",
|
|
16
|
+
"plot",
|
|
17
|
+
"plot_one",
|
|
18
|
+
"__version__",
|
|
19
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
DEFAULT_CONFIG = {
|
|
2
|
+
'sig_arr_length': 5000,
|
|
3
|
+
'bandpass_lower': 0.5,
|
|
4
|
+
'bandpass_higher': 100,
|
|
5
|
+
'median_filter_size': 5,
|
|
6
|
+
'default_8_leads': ["I", "II", "V1", "V2", "V3", "V4", "V5", "V6"],
|
|
7
|
+
'standard_12_leads': ["I", "II", "III", "V1", "V2", "V3", "V4", "V5", "V6", "aVR", "aVL", "aVF"]
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def resolve_config(user_config: dict | None = None, overrides: dict | None = None) -> dict:
|
|
12
|
+
"""Merge defaults with optional user-provided config and overrides."""
|
|
13
|
+
merged = DEFAULT_CONFIG.copy()
|
|
14
|
+
if user_config:
|
|
15
|
+
merged.update(user_config)
|
|
16
|
+
if overrides:
|
|
17
|
+
merged.update({k: v for k, v in overrides.items() if v is not None})
|
|
18
|
+
return merged
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
config = resolve_config()
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import posixpath
|
|
3
|
+
import tempfile
|
|
4
|
+
import warnings
|
|
5
|
+
from io import BytesIO
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import scipy
|
|
10
|
+
import torch
|
|
11
|
+
import torch.nn.functional as F
|
|
12
|
+
from matplotlib import pyplot as plt
|
|
13
|
+
from torch.utils.data import Dataset
|
|
14
|
+
import torchvision.transforms as T
|
|
15
|
+
from PIL import Image
|
|
16
|
+
|
|
17
|
+
from ekgtools.config import config
|
|
18
|
+
from ekgtools.parser import ECGParser
|
|
19
|
+
from ekgtools.plot import plot, plot_one
|
|
20
|
+
from ekgtools.s3 import S3Setup
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ECGDataset(Dataset):
|
|
24
|
+
|
|
25
|
+
def __init__(self,
|
|
26
|
+
directory: str,
|
|
27
|
+
df: pd.DataFrame,
|
|
28
|
+
filename_column: str,
|
|
29
|
+
label_column: str,
|
|
30
|
+
output_image: bool = False,
|
|
31
|
+
metadata_fields: list = ['age','gender'],
|
|
32
|
+
add_meta: bool = False,
|
|
33
|
+
specify_leads: list = config['standard_12_leads'],
|
|
34
|
+
sig_arr_length: int = 5000,
|
|
35
|
+
scale_method: str = 'standard',
|
|
36
|
+
order: list = ['channels','length'],
|
|
37
|
+
fast: bool = True,
|
|
38
|
+
storage: str = 'local',
|
|
39
|
+
s3_setup: S3Setup | None = None,
|
|
40
|
+
cache_dir: str | None = None):
|
|
41
|
+
"""
|
|
42
|
+
Args:
|
|
43
|
+
directory: path to folder containing ECG XML files
|
|
44
|
+
df: dataframe containing filenames, labels, and optional metadata
|
|
45
|
+
filename_column: name of column in df with file names
|
|
46
|
+
label_column: name of column in df with labels
|
|
47
|
+
metadata_fields: optional list of metadata fields (e.g., ['age', 'gender'])
|
|
48
|
+
specify_leads: list of leads to extract (e.g., ['I', 'II', 'V1'])
|
|
49
|
+
sig_arr_length: target length to resample ECG signals
|
|
50
|
+
bandpass_lower: lower frequency for bandpass filtering
|
|
51
|
+
bandpass_higher: upper frequency for bandpass filtering
|
|
52
|
+
median_filter_size: window size for median filter
|
|
53
|
+
denoise_steps: list of denoising steps (default: ['bandpass', 'median'])
|
|
54
|
+
num_class: Number of output classes (if None, inferred)
|
|
55
|
+
add_meta: Whether to use metadata
|
|
56
|
+
storage: 'local' or 's3' data source
|
|
57
|
+
s3_setup: configuration helper when storage='s3'
|
|
58
|
+
cache_dir: local cache root for S3 downloads
|
|
59
|
+
"""
|
|
60
|
+
self.base_directory = directory
|
|
61
|
+
|
|
62
|
+
before = len(df)
|
|
63
|
+
self.df = df.dropna(subset=[label_column])
|
|
64
|
+
after = len(self.df)
|
|
65
|
+
if before - after:
|
|
66
|
+
warnings.warn(f"Dropped {before - after} rows due to missing labels!")
|
|
67
|
+
|
|
68
|
+
self.filename_column_idx = self.df.columns.get_loc(filename_column)
|
|
69
|
+
self.label_column = label_column
|
|
70
|
+
|
|
71
|
+
self.num_class = len(self.df[label_column].unique())
|
|
72
|
+
|
|
73
|
+
self.add_meta = metadata_fields if add_meta else []
|
|
74
|
+
for meta in metadata_fields:
|
|
75
|
+
if meta not in ['age','gender']:
|
|
76
|
+
raise NotImplementedError('Only age and gender can be added as metadata!')
|
|
77
|
+
self.output_image = output_image
|
|
78
|
+
self.order = order
|
|
79
|
+
self.specify_leads = specify_leads
|
|
80
|
+
self.sig_arr_length = sig_arr_length
|
|
81
|
+
self.scale_method = scale_method
|
|
82
|
+
self.fast = fast
|
|
83
|
+
if self.fast and add_meta:
|
|
84
|
+
raise ValueError('Fast parsing does not allow for metadata!')
|
|
85
|
+
|
|
86
|
+
storage_mode = storage.lower()
|
|
87
|
+
if storage_mode not in {'local', 's3'}:
|
|
88
|
+
raise ValueError("storage must be either 'local' or 's3'")
|
|
89
|
+
self.storage_mode = storage_mode
|
|
90
|
+
|
|
91
|
+
if self.storage_mode == 'local':
|
|
92
|
+
self.local_root = os.path.abspath(os.path.expanduser(directory))
|
|
93
|
+
self.cache_dir = None
|
|
94
|
+
self.s3_setup = None
|
|
95
|
+
self.s3_prefix = None
|
|
96
|
+
else:
|
|
97
|
+
if s3_setup is None:
|
|
98
|
+
raise ValueError("s3_setup must be provided when storage='s3'")
|
|
99
|
+
self.s3_setup = s3_setup
|
|
100
|
+
prefix = directory.strip('/\\')
|
|
101
|
+
prefix = posixpath.normpath(prefix) if prefix else ''
|
|
102
|
+
self.s3_prefix = '' if prefix in {'.', ''} else prefix
|
|
103
|
+
cache_root = cache_dir or os.path.join(tempfile.gettempdir(), 'ekgtools_cache')
|
|
104
|
+
self.cache_dir = os.path.abspath(os.path.expanduser(cache_root))
|
|
105
|
+
os.makedirs(self.cache_dir, exist_ok=True)
|
|
106
|
+
self.local_root = None
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def create_1d_array(ecg_dict,
|
|
110
|
+
sig_arr_length,
|
|
111
|
+
specify_leads):
|
|
112
|
+
leads = []
|
|
113
|
+
for lead in specify_leads:
|
|
114
|
+
if lead not in ecg_dict.keys():
|
|
115
|
+
print(f"Lead {lead} missing — filling with zeros")
|
|
116
|
+
signal = np.zeros(sig_arr_length) # zero-filled array
|
|
117
|
+
else:
|
|
118
|
+
signal = ecg_dict[lead]
|
|
119
|
+
if len(signal) != sig_arr_length:
|
|
120
|
+
signal = scipy.signal.resample(signal,sig_arr_length) #resample using Fourier method
|
|
121
|
+
leads.append(signal)
|
|
122
|
+
output = np.array(leads)
|
|
123
|
+
complete_leads = all(lead in ecg_dict for lead in specify_leads)
|
|
124
|
+
return output, complete_leads, None
|
|
125
|
+
|
|
126
|
+
def _build_s3_relative_key(self, filename: str) -> str:
|
|
127
|
+
if self.storage_mode != 's3':
|
|
128
|
+
raise RuntimeError("_build_s3_relative_key called while storage_mode is not 's3'")
|
|
129
|
+
|
|
130
|
+
clean_name = str(filename).lstrip('/\\')
|
|
131
|
+
clean_name = clean_name.replace('\\', '/')
|
|
132
|
+
parts = []
|
|
133
|
+
if self.s3_prefix:
|
|
134
|
+
parts.append(self.s3_prefix)
|
|
135
|
+
parts.append(clean_name)
|
|
136
|
+
relative_key = posixpath.normpath(posixpath.join(*parts)) if parts else posixpath.normpath(clean_name)
|
|
137
|
+
if relative_key.startswith('..'):
|
|
138
|
+
raise ValueError(f"Filename '{filename}' resolves outside the configured S3 prefix")
|
|
139
|
+
return '' if relative_key == '.' else relative_key
|
|
140
|
+
|
|
141
|
+
def _download_s3_key(self, relative_key: str, ensure_companion: bool = True) -> str:
|
|
142
|
+
if self.storage_mode != 's3':
|
|
143
|
+
raise RuntimeError("_download_s3_key called while storage_mode is not 's3'")
|
|
144
|
+
|
|
145
|
+
fs = self.s3_setup._get_fs()
|
|
146
|
+
local_path = os.path.join(self.cache_dir, relative_key.replace('/', os.sep)) if relative_key else self.cache_dir
|
|
147
|
+
if not relative_key:
|
|
148
|
+
raise ValueError("Relative key must not be empty for S3 downloads")
|
|
149
|
+
|
|
150
|
+
if not os.path.exists(local_path):
|
|
151
|
+
directory = os.path.dirname(local_path)
|
|
152
|
+
if directory:
|
|
153
|
+
os.makedirs(directory, exist_ok=True)
|
|
154
|
+
key_parts = []
|
|
155
|
+
if self.s3_setup.prefix:
|
|
156
|
+
key_parts.append(self.s3_setup.prefix.strip('/'))
|
|
157
|
+
key_parts.append(relative_key)
|
|
158
|
+
s3_key = posixpath.join(*key_parts) if key_parts else ''
|
|
159
|
+
source = f"{self.s3_setup.bucket}/{s3_key}" if s3_key else self.s3_setup.bucket
|
|
160
|
+
try:
|
|
161
|
+
fs.get(source, local_path)
|
|
162
|
+
except FileNotFoundError as exc:
|
|
163
|
+
raise FileNotFoundError(f"S3 object not found: s3://{source}") from exc
|
|
164
|
+
|
|
165
|
+
if ensure_companion:
|
|
166
|
+
base, ext = os.path.splitext(relative_key)
|
|
167
|
+
ext = ext.lower()
|
|
168
|
+
if ext in {'.hea', '.dat'}:
|
|
169
|
+
companion_key = base + ('.dat' if ext == '.hea' else '.hea')
|
|
170
|
+
companion_local = os.path.join(self.cache_dir, companion_key.replace('/', os.sep))
|
|
171
|
+
if not os.path.exists(companion_local):
|
|
172
|
+
self._download_s3_key(companion_key, ensure_companion=False)
|
|
173
|
+
|
|
174
|
+
return local_path
|
|
175
|
+
|
|
176
|
+
def _resolve_path(self, filename: str) -> str:
|
|
177
|
+
if self.storage_mode == 'local':
|
|
178
|
+
candidate = os.path.abspath(os.path.join(self.local_root, filename))
|
|
179
|
+
if os.path.commonpath([self.local_root, candidate]) != self.local_root:
|
|
180
|
+
raise ValueError(f"Filename '{filename}' resolves outside the dataset directory")
|
|
181
|
+
return candidate
|
|
182
|
+
|
|
183
|
+
relative_key = self._build_s3_relative_key(filename)
|
|
184
|
+
return self._download_s3_key(relative_key)
|
|
185
|
+
|
|
186
|
+
def __len__(self):
|
|
187
|
+
return len(self.df)
|
|
188
|
+
|
|
189
|
+
def __getitem__(self, idx):
|
|
190
|
+
filename = str(self.df.iloc[idx, self.filename_column_idx])
|
|
191
|
+
path = self._resolve_path(filename)
|
|
192
|
+
|
|
193
|
+
ep = None
|
|
194
|
+
try:
|
|
195
|
+
ep = ECGParser(path,fast=self.fast)
|
|
196
|
+
ecg_dict = ep.lead
|
|
197
|
+
except Exception as e:
|
|
198
|
+
print(f"[ERROR] Failed to parse ECG file {path}: {e}")
|
|
199
|
+
raise
|
|
200
|
+
|
|
201
|
+
# Output
|
|
202
|
+
if self.output_image:
|
|
203
|
+
complete_leads = True
|
|
204
|
+
if len(self.specify_leads) == 1:
|
|
205
|
+
fig = plot_one(ep.lead[self.specify_leads[0]])
|
|
206
|
+
elif len(self.specify_leads) == 12:
|
|
207
|
+
fig = plot(ep.float_array, sample_rate=500, lead_index=self.specify_leads, columns=4)
|
|
208
|
+
else:
|
|
209
|
+
raise NotImplementedError('ECGDataset is only configured for 12 lead and single lead plots!')
|
|
210
|
+
|
|
211
|
+
# Save plot to memory buffer
|
|
212
|
+
buf = BytesIO()
|
|
213
|
+
plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
|
|
214
|
+
buf.seek(0)
|
|
215
|
+
plt.close(fig)
|
|
216
|
+
|
|
217
|
+
# Convert to image tensor
|
|
218
|
+
img = Image.open(buf).convert('RGB')
|
|
219
|
+
output = T.ToTensor()(img) # shape: [3, H, W]
|
|
220
|
+
else:
|
|
221
|
+
# Lead derivation
|
|
222
|
+
if "V3-V2" in self.specify_leads:
|
|
223
|
+
output_v3, complete_v3, _ = self.create_1d_array(ecg_dict, self.sig_arr_length, ["V3"])
|
|
224
|
+
output_v2, complete_v2, _ = self.create_1d_array(ecg_dict, self.sig_arr_length, ["V2"])
|
|
225
|
+
output = output_v3 - output_v2
|
|
226
|
+
complete_leads = complete_v3 and complete_v2
|
|
227
|
+
elif "V4-V3" in self.specify_leads:
|
|
228
|
+
output_v3, complete_v3, _ = self.create_1d_array(ecg_dict, self.sig_arr_length, ["V3"])
|
|
229
|
+
output_v4, complete_v4, _ = self.create_1d_array(ecg_dict, self.sig_arr_length, ["V4"])
|
|
230
|
+
output = output_v4 - output_v3
|
|
231
|
+
complete_leads = complete_v3 and complete_v4
|
|
232
|
+
else:
|
|
233
|
+
output, complete_leads, _ = self.create_1d_array(ecg_dict, self.sig_arr_length, self.specify_leads)
|
|
234
|
+
|
|
235
|
+
if output is None or output.size == 0:
|
|
236
|
+
raise ValueError(f"[ERROR] Output is empty for file: {path}; ecg_dict: {ecg_dict}")
|
|
237
|
+
|
|
238
|
+
if len(self.specify_leads) == 1:
|
|
239
|
+
output = torch.tensor(output, dtype=torch.float32).unsqueeze(0)
|
|
240
|
+
else:
|
|
241
|
+
output = torch.tensor(output, dtype=torch.float32)
|
|
242
|
+
if self.order == ['length','channels']:
|
|
243
|
+
output = output.permute(1, 0).contiguous()
|
|
244
|
+
|
|
245
|
+
# Label
|
|
246
|
+
label = self.df[self.label_column].iloc[idx]
|
|
247
|
+
label = torch.tensor([label], dtype=torch.int64)
|
|
248
|
+
if self.num_class > 1:
|
|
249
|
+
label = F.one_hot(label, num_classes=self.num_class).squeeze().to(torch.float32)
|
|
250
|
+
|
|
251
|
+
# Add Metadata and return
|
|
252
|
+
if self.add_meta:
|
|
253
|
+
age = ep.text_data.get('age',0)
|
|
254
|
+
gender = ep.text_data.get('gender',0)
|
|
255
|
+
age_onehot = self.bucketize_age_np(age).reshape(-1, 1)
|
|
256
|
+
gender = np.array(gender).reshape(-1, 1)
|
|
257
|
+
meta = np.concatenate([gender, age_onehot], axis=0).squeeze()
|
|
258
|
+
meta = torch.tensor(meta, dtype=torch.float32)
|
|
259
|
+
return output, meta, label, complete_leads
|
|
260
|
+
else:
|
|
261
|
+
return output, label, complete_leads
|
|
262
|
+
|
|
263
|
+
def bucketize_age_np(self, age_array):
|
|
264
|
+
"""
|
|
265
|
+
age_array: (B,) numpy array of float, age values
|
|
266
|
+
return: (B, 6) one-hot encoded age buckets
|
|
267
|
+
"""
|
|
268
|
+
# Define the cut:[40, 50, 60, 70, 80]
|
|
269
|
+
buckets = np.digitize(age_array, bins=[40, 50, 60, 70, 80]) # returns 0~5
|
|
270
|
+
one_hot = np.eye(6)[buckets] # (B, 6) one-hot encoding
|
|
271
|
+
return one_hot
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import xmltodict
|
|
3
|
+
|
|
4
|
+
from ekgtools.config import resolve_config
|
|
5
|
+
from ekgtools.parser.eli import ELIXMLParser
|
|
6
|
+
from ekgtools.parser.iecg import IECGXMLParser
|
|
7
|
+
from ekgtools.parser.mimic import MIMICParser
|
|
8
|
+
from ekgtools.parser.muse import MUSEXMLParser
|
|
9
|
+
from ekgtools.parser.wfdb import WFDBParser
|
|
10
|
+
|
|
11
|
+
class ECGParser:
|
|
12
|
+
|
|
13
|
+
def __init__(self, path, config: dict | None = None, config_overrides: dict | None = None, waveform_type: str = 'full', normalize: bool = False, scale_method: str | None = 'standard', fast: bool = True):
|
|
14
|
+
effective_config = resolve_config(config, config_overrides)
|
|
15
|
+
self.infer_filetype(path)
|
|
16
|
+
if self.filetype == 'iecg':
|
|
17
|
+
self.obj = IECGXMLParser(config=effective_config, waveform_type=waveform_type, scale_method=scale_method)
|
|
18
|
+
with open(path, 'rb') as fd:
|
|
19
|
+
self.obj.xml_dict = xmltodict.parse(fd.read().decode('utf8'))
|
|
20
|
+
if fast:
|
|
21
|
+
self.obj.parse_fast()
|
|
22
|
+
else:
|
|
23
|
+
self.obj.parse()
|
|
24
|
+
elif self.filetype == 'muse':
|
|
25
|
+
self.obj = MUSEXMLParser(config=effective_config, waveform_type=waveform_type, scale_method=scale_method)
|
|
26
|
+
with open(path, 'rb') as fd:
|
|
27
|
+
self.obj.xml_dict = xmltodict.parse(fd.read().decode('utf8'))
|
|
28
|
+
if fast:
|
|
29
|
+
self.obj.parse_fast()
|
|
30
|
+
else:
|
|
31
|
+
self.obj.parse()
|
|
32
|
+
elif self.filetype == 'eli':
|
|
33
|
+
self.obj = ELIXMLParser(config=effective_config, waveform_type=waveform_type, scale_method=scale_method)
|
|
34
|
+
with open(path, 'rb') as fd:
|
|
35
|
+
self.obj.xml_dict = xmltodict.parse(fd.read().decode('utf8'))
|
|
36
|
+
if fast:
|
|
37
|
+
self.obj.parse_fast()
|
|
38
|
+
else:
|
|
39
|
+
self.obj.parse()
|
|
40
|
+
elif self.filetype == 'mimic':
|
|
41
|
+
self.obj = MIMICParser(config=effective_config, waveform_type=waveform_type, normalize=normalize)
|
|
42
|
+
with open(path, 'r') as fd:
|
|
43
|
+
self.obj.load_from_json(fd.read())
|
|
44
|
+
elif self.filetype == 'wfdb':
|
|
45
|
+
self.obj = WFDBParser(config=effective_config, waveform_type=waveform_type, scale_method=scale_method)
|
|
46
|
+
self.obj.parse(path)
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError(f"Unsupported ECG file type for path: {path}")
|
|
49
|
+
|
|
50
|
+
def __getattr__(self, name):
|
|
51
|
+
return getattr(self.obj, name)
|
|
52
|
+
|
|
53
|
+
def __dir__(self):
|
|
54
|
+
return list(set(dir(type(self)) + dir(self.obj)))
|
|
55
|
+
|
|
56
|
+
def infer_filetype(self, path):
|
|
57
|
+
_, ext = os.path.splitext(path)
|
|
58
|
+
ext = ext.lower()
|
|
59
|
+
|
|
60
|
+
if ext in {'.hea', '.dat'}:
|
|
61
|
+
self.filetype = 'wfdb'
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
wfdb_base_exists = os.path.exists(f"{path}.hea") or os.path.exists(f"{path}.dat")
|
|
65
|
+
if wfdb_base_exists:
|
|
66
|
+
self.filetype = 'wfdb'
|
|
67
|
+
return
|
|
68
|
+
if ext == '.json':
|
|
69
|
+
self.filetype = 'mimic'
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
if not os.path.exists(path):
|
|
73
|
+
self.filetype = 'unknown'
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
with open(path, 'r', encoding='utf8', errors='ignore') as f:
|
|
77
|
+
file_str = f.read()
|
|
78
|
+
if 'MuseInfo' in file_str:
|
|
79
|
+
self.filetype = 'muse'
|
|
80
|
+
elif 'PhilipsECG' in file_str:
|
|
81
|
+
self.filetype = 'iecg'
|
|
82
|
+
elif 'EliEcg' in file_str:
|
|
83
|
+
self.filetype = 'eli'
|
|
84
|
+
else:
|
|
85
|
+
self.filetype = 'unknown'
|