metbit 7.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metbit/STOCSY.py +126 -0
- metbit/__init__.py +21 -0
- metbit/base.py +68 -0
- metbit/boxplot.py +562 -0
- metbit/cross_validation.py +670 -0
- metbit/denoise_spec.py +40 -0
- metbit/dev.py +1176 -0
- metbit/genpage.py +177 -0
- metbit/lazy_opls_da.py +447 -0
- metbit/metbit.py +2118 -0
- metbit/nmr_preprocess.py +398 -0
- metbit/opls.py +271 -0
- metbit/pca_ellipse.py +62 -0
- metbit/peak_processe.py +74 -0
- metbit/plotting.py +240 -0
- metbit/pls.py +129 -0
- metbit/pretreatment.py +93 -0
- metbit/scaler.py +236 -0
- metbit/spec_norm.py +352 -0
- metbit/take_intensity.py +220 -0
- metbit/ui_picky_peak.py +217 -0
- metbit/ui_stocsy.py +224 -0
- metbit/utility.py +944 -0
- metbit/vip.py +101 -0
- metbit-7.3.4.dist-info/METADATA +40 -0
- metbit-7.3.4.dist-info/RECORD +29 -0
- metbit-7.3.4.dist-info/WHEEL +5 -0
- metbit-7.3.4.dist-info/licenses/LICENSE +21 -0
- metbit-7.3.4.dist-info/top_level.txt +1 -0
metbit/STOCSY.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
__auther__ ='aeiwz'
|
|
4
|
+
author_email='theerayut_aeiw_123@hotmail.com'
|
|
5
|
+
__copyright__="Copyright 2024, Theerayut"
|
|
6
|
+
|
|
7
|
+
__license__ = "MIT"
|
|
8
|
+
__maintainer__ = "aeiwz"
|
|
9
|
+
__email__ = "theerayut_aeiw_123@hotmail.com"
|
|
10
|
+
__status__ = "Develop"
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
import plotly.graph_objects as go
|
|
15
|
+
from scipy.stats import pearsonr
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def STOCSY(spectra: pd.DataFrame, anchor_ppm_value, p_value_threshold=0.0001):
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
Performs a STOCSY (Statistic Total Correlation Spectroscopy) analysis on NMR spectra data.
|
|
24
|
+
|
|
25
|
+
This function calculates the Pearson correlation between a specified anchor signal
|
|
26
|
+
(identified by its PPM value) and all other signals in the NMR spectra. It identifies
|
|
27
|
+
significant correlations based on the specified p-value threshold and visualizes
|
|
28
|
+
the results in a scatter plot.
|
|
29
|
+
|
|
30
|
+
Parameters:
|
|
31
|
+
-----------
|
|
32
|
+
spectra : pd.DataFrame
|
|
33
|
+
A DataFrame containing the NMR spectra data, where each column represents a
|
|
34
|
+
chemical shift in ppm and each row represents a sample.
|
|
35
|
+
|
|
36
|
+
anchor_ppm_value : float
|
|
37
|
+
The PPM value of the anchor signal used for correlation analysis.
|
|
38
|
+
|
|
39
|
+
p_value_threshold : float, optional
|
|
40
|
+
The threshold for determining significance. Correlations with a p-value less than
|
|
41
|
+
this threshold will be marked as significant. Default is 0.0001.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
--------
|
|
45
|
+
fig : go.Figure
|
|
46
|
+
A Plotly figure object containing the scatter plot of the correlation results.
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
---------
|
|
50
|
+
>>> fig = STOCSY(spectra=spectra, anchor_ppm_value=1.29275, p_value_threshold=0.0000001)
|
|
51
|
+
>>> fig.show()
|
|
52
|
+
"""
|
|
53
|
+
# Step 1: Load NMR spectra data
|
|
54
|
+
|
|
55
|
+
ppm = spectra.columns.astype(float).to_list() # Convert column names to floats (ppm values)
|
|
56
|
+
|
|
57
|
+
# Step 2: NMR spectra data (X is already a DataFrame)
|
|
58
|
+
X = spectra
|
|
59
|
+
|
|
60
|
+
# Step 3: Find the index of the anchor ppm in the list of ppm values
|
|
61
|
+
anchor_index = np.argmin(np.abs(np.array(ppm) - anchor_ppm_value))
|
|
62
|
+
|
|
63
|
+
# Step 4: Calculate Pearson correlation and p-values for the anchor point against all others
|
|
64
|
+
correlations = []
|
|
65
|
+
p_values = []
|
|
66
|
+
|
|
67
|
+
for col in X.columns:
|
|
68
|
+
# Calculate correlation between the anchor signal and each other signal
|
|
69
|
+
corr, p_val = pearsonr(X.iloc[:, anchor_index], X[col])
|
|
70
|
+
correlations.append(corr)
|
|
71
|
+
p_values.append(p_val)
|
|
72
|
+
|
|
73
|
+
correlations = np.array(correlations)
|
|
74
|
+
p_values = np.array(p_values)
|
|
75
|
+
|
|
76
|
+
# Step 5: Calculate r^2 (squared correlation) for each point
|
|
77
|
+
r_squared = correlations ** 2
|
|
78
|
+
|
|
79
|
+
# Step 6: Prepare plotly scatter plot
|
|
80
|
+
fig = go.Figure()
|
|
81
|
+
|
|
82
|
+
# Scatter plot of non-significant points
|
|
83
|
+
non_significant_mask = p_values >= p_value_threshold
|
|
84
|
+
fig.add_trace(go.Scatter(
|
|
85
|
+
x=np.array(ppm)[non_significant_mask],
|
|
86
|
+
y=X.median()[non_significant_mask],
|
|
87
|
+
mode='markers',
|
|
88
|
+
marker=dict(
|
|
89
|
+
size=3,
|
|
90
|
+
color='gray',
|
|
91
|
+
),
|
|
92
|
+
name='Non-significant'
|
|
93
|
+
))
|
|
94
|
+
|
|
95
|
+
# Scatter plot of significant points (marked in red)
|
|
96
|
+
significant_mask = p_values < p_value_threshold
|
|
97
|
+
fig.add_trace(go.Scatter(
|
|
98
|
+
x=np.array(ppm)[significant_mask],
|
|
99
|
+
y=X.median()[significant_mask],
|
|
100
|
+
mode='markers',
|
|
101
|
+
marker=dict(
|
|
102
|
+
size=3,
|
|
103
|
+
color='red', # Red color for significant points
|
|
104
|
+
),
|
|
105
|
+
name=f'Significant (<i>p</i> < {p_value_threshold})'
|
|
106
|
+
))
|
|
107
|
+
|
|
108
|
+
# Add labels and title
|
|
109
|
+
fig.update_layout(
|
|
110
|
+
title={'text':f'<b>STOCSY: δ {np.round(anchor_ppm_value, decimals=4)}</b>',
|
|
111
|
+
'y':0.9,
|
|
112
|
+
'x':0.5,
|
|
113
|
+
'xanchor':'center',
|
|
114
|
+
'yanchor':'top'},
|
|
115
|
+
xaxis_title='<b>δ<sup>1</sup>H</b>',
|
|
116
|
+
yaxis_title=f'Correlation (r<sup>2</sup>) δ = {np.round(anchor_ppm_value, decimals=4)}',
|
|
117
|
+
showlegend=True
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
#invert x-axis
|
|
121
|
+
fig.update_xaxes(autorange="reversed")
|
|
122
|
+
# Display the interactive plot
|
|
123
|
+
return fig
|
|
124
|
+
|
|
125
|
+
# Example usage
|
|
126
|
+
#plot_nmr_correlation(spectra=spectra, anchor_ppm_value=1.29275, p_value_threshold=0.0000001)
|
metbit/__init__.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
from .metbit import opls_da, pca
|
|
4
|
+
from .utility import univar_stats, Normalise
|
|
5
|
+
from .lazy_opls_da import lazy_opls_da
|
|
6
|
+
from .spec_norm import *
|
|
7
|
+
from .peak_processe import peak_chops
|
|
8
|
+
from .STOCSY import STOCSY
|
|
9
|
+
from .ui_stocsy import STOCSY_app
|
|
10
|
+
from .ui_picky_peak import pickie_peak
|
|
11
|
+
from .take_intensity import *
|
|
12
|
+
from .nmr_preprocess import *
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# -*- coding: utf-8 -*-
|
|
16
|
+
|
|
17
|
+
__author__ = "aeiwz"
|
|
18
|
+
__email__ = "theerayut_aeiw_123@hotmail.com"
|
|
19
|
+
__maintainer__ = "aeiwz"
|
|
20
|
+
__status__ = "Development"
|
|
21
|
+
__copyright__ = "Copyright 2024, Theerayut"
|
metbit/base.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
__auther__ ='aeiwz'
|
|
4
|
+
author_email='theerayut_aeiw_123@hotmail.com'
|
|
5
|
+
__copyright__="Copyright 2024, Theerayut"
|
|
6
|
+
|
|
7
|
+
__license__ = "MIT"
|
|
8
|
+
__maintainer__ = "aeiwz"
|
|
9
|
+
__email__ = "theerayut_aeiw_123@hotmail.com"
|
|
10
|
+
__status__ = "Develop"
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import numpy.linalg as la
|
|
14
|
+
import typing
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def nipals(x: np.ndarray, y: np.ndarray,
|
|
18
|
+
tol: float = 1e-10,
|
|
19
|
+
max_iter: int = 1000,
|
|
20
|
+
dot=np.dot) -> typing.Tuple:
|
|
21
|
+
"""
|
|
22
|
+
Non-linear Iterative Partial Least Squares
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
x: np.ndarray
|
|
27
|
+
Variable matrix with size n by p, where n number of samples,
|
|
28
|
+
p number of variables.
|
|
29
|
+
y: np.ndarray
|
|
30
|
+
Dependent variable with size n by 1.
|
|
31
|
+
tol: float
|
|
32
|
+
Tolerance for the convergence.
|
|
33
|
+
max_iter: int
|
|
34
|
+
Maximal number of iterations.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
w: np.ndarray
|
|
39
|
+
Weights with size p by 1.
|
|
40
|
+
u: np.ndarray
|
|
41
|
+
Y-scores with size n by 1.
|
|
42
|
+
c: float
|
|
43
|
+
Y-weight
|
|
44
|
+
t: np.ndarray
|
|
45
|
+
Scores with size n by 1
|
|
46
|
+
|
|
47
|
+
References
|
|
48
|
+
----------
|
|
49
|
+
[1] Wold S, et al. PLS-regression: a basic tool of chemometrics.
|
|
50
|
+
Chemometr Intell Lab Sys 2001, 58, 109–130.
|
|
51
|
+
[2] Bylesjo M, et al. Model Based Preprocessing and Background
|
|
52
|
+
Elimination: OSC, OPLS, and O2PLS. in Comprehensive Chemometrics.
|
|
53
|
+
|
|
54
|
+
"""
|
|
55
|
+
u = y
|
|
56
|
+
i = 0
|
|
57
|
+
d = tol * 10
|
|
58
|
+
while d > tol and i <= max_iter:
|
|
59
|
+
w = dot(u, x) / dot(u, u)
|
|
60
|
+
w /= la.norm(w)
|
|
61
|
+
t = dot(x, w)
|
|
62
|
+
c = dot(t, y) / dot(t, t)
|
|
63
|
+
u_new = y * c / (c * c)
|
|
64
|
+
d = la.norm(u_new - u) / la.norm(u_new)
|
|
65
|
+
u = u_new
|
|
66
|
+
i += 1
|
|
67
|
+
|
|
68
|
+
return w, u, c, t
|