nmrdenoise 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nmrdenoise-0.1.0/LICENSE +1 -0
- nmrdenoise-0.1.0/PKG-INFO +14 -0
- nmrdenoise-0.1.0/nmrdenoise.egg-info/PKG-INFO +14 -0
- nmrdenoise-0.1.0/nmrdenoise.egg-info/SOURCES.txt +9 -0
- nmrdenoise-0.1.0/nmrdenoise.egg-info/dependency_links.txt +1 -0
- nmrdenoise-0.1.0/nmrdenoise.egg-info/requires.txt +2 -0
- nmrdenoise-0.1.0/nmrdenoise.egg-info/top_level.txt +2 -0
- nmrdenoise-0.1.0/pyproject.toml +27 -0
- nmrdenoise-0.1.0/setup.cfg +4 -0
- nmrdenoise-0.1.0/src/nmrdenoise/__init__.py +4 -0
- nmrdenoise-0.1.0/src/nmrdenoise/core.py +185 -0
nmrdenoise-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nmrdenoise
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python API for denoising NMR spectra
|
|
5
|
+
Author-email: Your Name <your.email@example.com>
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: numpy>=1.20.0
|
|
13
|
+
Requires-Dist: scipy>=1.7.0
|
|
14
|
+
Dynamic: license-file
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nmrdenoise
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python API for denoising NMR spectra
|
|
5
|
+
Author-email: Your Name <your.email@example.com>
|
|
6
|
+
Classifier: Programming Language :: Python :: 3
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.8
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: numpy>=1.20.0
|
|
13
|
+
Requires-Dist: scipy>=1.7.0
|
|
14
|
+
Dynamic: license-file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "nmrdenoise" # This is what people will `pip install`
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name="Your Name", email="your.email@example.com" },
|
|
10
|
+
]
|
|
11
|
+
description = "A Python API for denoising NMR spectra"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.8"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"numpy>=1.20.0",
|
|
21
|
+
"scipy>=1.7.0",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
[tool.setuptools.packages.find]
|
|
25
|
+
[tool.setuptools.package-data]
|
|
26
|
+
"nmrdenoise" = ["checkpoints/*"]
|
|
27
|
+
where = ["src"]
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import io
|
|
3
|
+
import nmrglue as ng
|
|
4
|
+
from nmrglue.fileio.bruker import scale_pdata
|
|
5
|
+
import numpy as np
|
|
6
|
+
import torch
|
|
7
|
+
import torch.nn as nn
|
|
8
|
+
import torch.nn.functional as F
|
|
9
|
+
from importlib import resources
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# Define the Autoencoder model
|
|
14
|
+
class Autoencoder(nn.Module):
|
|
15
|
+
def __init__(self):
|
|
16
|
+
super(Autoencoder, self).__init__()
|
|
17
|
+
# Encoder
|
|
18
|
+
self.encoder = nn.Sequential(
|
|
19
|
+
nn.Conv1d(1, 64, kernel_size=3, padding=1), #1 is for channels
|
|
20
|
+
nn.LeakyReLU(negative_slope=0.2),
|
|
21
|
+
nn.Conv1d(64, 128, kernel_size=3, stride=2, padding=1),
|
|
22
|
+
nn.LeakyReLU(negative_slope=0.2),
|
|
23
|
+
nn.Conv1d(128, 256, kernel_size=3, stride=2, padding=1),
|
|
24
|
+
nn.LeakyReLU(negative_slope=0.2),
|
|
25
|
+
nn.Conv1d(256, 512, kernel_size=3, stride=2, padding=1), # Added layer
|
|
26
|
+
nn.LeakyReLU(negative_slope=0.2)
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Decoder
|
|
30
|
+
self.decoder = nn.Sequential(
|
|
31
|
+
nn.ConvTranspose1d(512, 512, kernel_size=3, stride=2, padding=1, output_padding=1), # Added layer
|
|
32
|
+
nn.LeakyReLU(negative_slope=0.2),
|
|
33
|
+
nn.ConvTranspose1d(512, 256, kernel_size=3, stride=2, padding=1, output_padding=1),
|
|
34
|
+
nn.LeakyReLU(negative_slope=0.2),
|
|
35
|
+
nn.ConvTranspose1d(256, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
|
|
36
|
+
nn.LeakyReLU(negative_slope=0.2),
|
|
37
|
+
nn.Conv1d(128, 64, kernel_size=3, padding=1),
|
|
38
|
+
nn.LeakyReLU(negative_slope=0.2),
|
|
39
|
+
nn.Conv1d(64, 1, kernel_size=3, padding=0),
|
|
40
|
+
nn.Tanh() # Output layer with Tanh to keep values between -1 and 1
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def forward(self, x):
|
|
44
|
+
orig_len = x.shape[-1]
|
|
45
|
+
|
|
46
|
+
# Compute how much padding is needed
|
|
47
|
+
remainder = orig_len % 8
|
|
48
|
+
if remainder != 0:
|
|
49
|
+
pad_len = 8 - remainder
|
|
50
|
+
else:
|
|
51
|
+
pad_len = 0
|
|
52
|
+
|
|
53
|
+
# Pad only on the right
|
|
54
|
+
x = F.pad(x, (0, pad_len), mode='constant', value=0)
|
|
55
|
+
|
|
56
|
+
encoded = self.encoder(x)
|
|
57
|
+
decoded = self.decoder(encoded)
|
|
58
|
+
|
|
59
|
+
# Crop to original length
|
|
60
|
+
decoded = decoded[:, :, :orig_len]
|
|
61
|
+
|
|
62
|
+
return decoded
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def load_denoising_checkpoint(model_name="autoencoder_20000_bruker_ver_135_percent_noise_with_noshuffle_v4_rm_spikelets_with_lb_5.pt"):
|
|
67
|
+
with resources.path("nmrdenoise.checkpoints", model_name) as checkpoint_path:
|
|
68
|
+
print(f"Loading checkpoint from: {checkpoint_path}")
|
|
69
|
+
|
|
70
|
+
return str(checkpoint_path)
|
|
71
|
+
|
|
72
|
+
def read_bruker(dir_path):
|
|
73
|
+
dic, fid = ng.bruker.read(dir_path)
|
|
74
|
+
path_name = os.path.basename(os.path.basename(os.path.dirname(dir_path)))
|
|
75
|
+
base_name = os.path.basename(dir_path)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
dic['acqus']['DTYPA'] = 2 # 2 = float64 - denosiing results in very small decimal values hence we have to set the acqus to float
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
group_delay = dic['acqus'].get('GRPDLY', None)
|
|
82
|
+
if group_delay >= 0:
|
|
83
|
+
fid = ng.bruker.remove_digital_filter(dic, fid)
|
|
84
|
+
dic['acqus']['GRPDLY'] = 0 # Mark as corrected
|
|
85
|
+
print(" Digital filter removed and GRPDLY set to 0.")
|
|
86
|
+
except Exception as e:
|
|
87
|
+
print(f"Skipping filter removal: {e}")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
print("1d from fid")
|
|
91
|
+
data = ng.process.proc_base.fft(fid)
|
|
92
|
+
|
|
93
|
+
scaled_data = scale_pdata(dic, data) * 2
|
|
94
|
+
dic['procs']['NC_proc'] = 0
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
enable_fid_functionality = True
|
|
98
|
+
|
|
99
|
+
sw = dic['acqus']['SW_h'] # Sweep Width in Hz
|
|
100
|
+
sfo1 = dic['acqus']['SFO1'] # Transmitter Frequency Offset in MHz
|
|
101
|
+
o1 = dic['acqus']['O1'] # Frequency Offset in Hz
|
|
102
|
+
|
|
103
|
+
uc = ng.fileiobase.unit_conversion(data.size,True,sw,sfo1,o1) #Convert spectra to PPM
|
|
104
|
+
frq = uc.ppm_scale()
|
|
105
|
+
|
|
106
|
+
sw = dic['acqus']['SW_h'] # spectral width in Hz
|
|
107
|
+
o1 = dic['acqus']['O1'] # carrier freq
|
|
108
|
+
size = data.shape
|
|
109
|
+
|
|
110
|
+
abs_scaled_data = abs(scaled_data)
|
|
111
|
+
phase_scaled_data = np.angle(scaled_data)
|
|
112
|
+
imag_scaled_data = np.imag(scaled_data)
|
|
113
|
+
real_scaled_data = np.real(scaled_data)
|
|
114
|
+
|
|
115
|
+
abs_data = abs(data)
|
|
116
|
+
phase_data = np.angle(data)
|
|
117
|
+
|
|
118
|
+
imag_data = np.imag(data)
|
|
119
|
+
real_data = np.real(data)
|
|
120
|
+
return frq, real_data, imag_data
|
|
121
|
+
|
|
122
|
+
def denoise_component(component, autoencoder_path):
|
|
123
|
+
channels = 1 #the data is 1d
|
|
124
|
+
max_data = np.max(component)
|
|
125
|
+
min_data = np.min(component)
|
|
126
|
+
|
|
127
|
+
max_intensity = max(abs(max_data), abs(min_data))
|
|
128
|
+
# Normalize
|
|
129
|
+
norm_data = component / max_intensity
|
|
130
|
+
test_inp = norm_data
|
|
131
|
+
input_length = np.shape(norm_data)[0]
|
|
132
|
+
|
|
133
|
+
#test_inp = norm_data
|
|
134
|
+
device = torch.device("cpu")
|
|
135
|
+
|
|
136
|
+
# Reshape to (1, input_length)
|
|
137
|
+
test_inp = test_inp.reshape(1, input_length)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# Reshape test data if the shape is incorrect
|
|
141
|
+
if test_inp.shape == (1, input_length):
|
|
142
|
+
test_inp = np.reshape(test_inp, (1, input_length, 1)) # Reshape to (batch_size, input_length, channels)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# Alternatively, if you have shape (batch_size, 1, input_length), transpose to the correct shape
|
|
146
|
+
if test_inp.shape == (1, 1, input_length):
|
|
147
|
+
test_inp = np.transpose(test_inp, (0, 2, 1)) # Transpose to shape (batch_size, input_length, channels)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
#autoencoder_path = os.path.join(base_path, "checkpoints", "autoencoder_20000_bruker_ver_135_percent_noise_with_noshuffle_v4_rm_spikelets_with_lb_5.pt")
|
|
151
|
+
|
|
152
|
+
autoencoder = Autoencoder().to(device)
|
|
153
|
+
try:
|
|
154
|
+
autoencoder.load_state_dict(torch.load(autoencoder_path, map_location=torch.device('cpu')))
|
|
155
|
+
autoencoder.eval() # Set model to evaluation mode
|
|
156
|
+
print("Autoencoder model loaded successfully.")
|
|
157
|
+
except Exception as e:
|
|
158
|
+
raise RuntimeError(f"Error loading autoencoder model: {e}")
|
|
159
|
+
|
|
160
|
+
# Select a test sample to reconstruct
|
|
161
|
+
test_inp_sample = torch.tensor(test_inp.reshape(1, input_length, channels), dtype=torch.float32).to(device) # Shape: (1, 16384, 1)
|
|
162
|
+
|
|
163
|
+
# Correct input shape for Conv1d: (batch_size, channels, input_length)
|
|
164
|
+
test_inp_sample = test_inp_sample.permute(0, 2, 1) # (1, 16384, 1) -> (1, 1, 16384) example size conversion
|
|
165
|
+
|
|
166
|
+
# Use the autoencoder to reconstruct the signal
|
|
167
|
+
with torch.no_grad(): # No need to calculate gradients during evaluation
|
|
168
|
+
# Pass through encoder and decoder
|
|
169
|
+
encoded = autoencoder.encoder(test_inp_sample) # Encode input
|
|
170
|
+
reconstructed_signal = autoencoder(test_inp_sample)
|
|
171
|
+
# Feature-wise norm (or you can use var or entropy)
|
|
172
|
+
confidence_latent = encoded.norm(p=2, dim=1, keepdim=True) # shape: [1, 1, 4503]
|
|
173
|
+
|
|
174
|
+
confidence_upsampled = F.interpolate(confidence_latent, size=np.shape(test_inp_sample)[2], mode='linear', align_corners=True)
|
|
175
|
+
confidence = confidence_upsampled.squeeze().cpu().detach().numpy() # shape: [36027]
|
|
176
|
+
confidence = (confidence - confidence.min()) / (confidence.max() - confidence.min() + 1e-6)
|
|
177
|
+
|
|
178
|
+
# Flatten the arrays for plotting
|
|
179
|
+
#test_inp_flat = test_inp_sample.cpu().numpy().flatten()
|
|
180
|
+
reconstructed_signal_flat = reconstructed_signal.cpu().numpy().flatten()
|
|
181
|
+
|
|
182
|
+
#clipped = reconstructed_signal_flat
|
|
183
|
+
|
|
184
|
+
recovered_data = reconstructed_signal_flat * max_intensity
|
|
185
|
+
return recovered_data, confidence
|