chemotools 0.0.10__py3-none-any.whl → 0.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,4 +2,5 @@ from .air_pls import AirPls
2
2
  from .cubic_spline_correction import CubicSplineCorrection
3
3
  from .linear_correction import LinearCorrection
4
4
  from .non_negative import NonNegative
5
- from .polynomial_correction import PolynomialCorrection
5
+ from .polynomial_correction import PolynomialCorrection
6
+ from .subtract_reference import SubtractReference
@@ -6,10 +6,10 @@ from sklearn.utils.validation import check_is_fitted
6
6
  from chemotools.utils.check_inputs import check_input
7
7
 
8
8
  class CubicSplineCorrection(BaseEstimator, TransformerMixin):
9
- def __init__(self, indices: tuple = None) -> None:
9
+ def __init__(self, indices: np.ndarray = None) -> None:
10
10
  self.indices = indices
11
11
 
12
- def fit(self, X: np.ndarray, y=None) -> "CubicSplineCorrection":
12
+ def fit(self, X: list, y=None) -> "CubicSplineCorrection":
13
13
  # Check that X is a 2D array and has only finite values
14
14
  X = check_input(X)
15
15
 
@@ -19,6 +19,11 @@ class CubicSplineCorrection(BaseEstimator, TransformerMixin):
19
19
  # Set the fitted attribute to True
20
20
  self._is_fitted = True
21
21
 
22
+ if self.indices is None:
23
+ self.indices_ = [0, len(X[0]) - 1]
24
+ else:
25
+ self.indices_ = self.indices
26
+
22
27
  return self
23
28
 
24
29
  def transform(self, X: np.ndarray, y=None, copy=True):
@@ -39,11 +44,7 @@ class CubicSplineCorrection(BaseEstimator, TransformerMixin):
39
44
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
40
45
 
41
46
  def _spline_baseline_correct(self, x: np.ndarray) -> np.ndarray:
42
- if self.indices is None:
43
- indices = [0, len(x) - 1]
44
- else:
45
- indices = list(self.indices)
46
-
47
+ indices = self.indices_
47
48
  intensity = x[indices]
48
49
  spl = CubicSpline(indices, intensity)
49
50
  baseline = spl(range(len(x)))
@@ -5,7 +5,7 @@ from sklearn.utils.validation import check_is_fitted
5
5
  from chemotools.utils.check_inputs import check_input
6
6
 
7
7
  class PolynomialCorrection(BaseEstimator, TransformerMixin):
8
- def __init__(self, order: int = 1, indices: tuple = (0, -1)) -> None:
8
+ def __init__(self, order: int = 1, indices: list = None) -> None:
9
9
  self.order = order
10
10
  self.indices = indices
11
11
 
@@ -19,6 +19,11 @@ class PolynomialCorrection(BaseEstimator, TransformerMixin):
19
19
  # Set the fitted attribute to True
20
20
  self._is_fitted = True
21
21
 
22
+ if self.indices is None:
23
+ self.indices_ = range(0, len(X[0]))
24
+ else:
25
+ self.indices_ = self.indices
26
+
22
27
  return self
23
28
 
24
29
  def transform(self, X: np.ndarray, y=0, copy=True) -> np.ndarray:
@@ -39,7 +44,7 @@ class PolynomialCorrection(BaseEstimator, TransformerMixin):
39
44
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
40
45
 
41
46
  def _baseline_correct_spectrum(self, x: np.ndarray) -> np.ndarray:
42
- intensity = x[list(self.indices)]
43
- poly = np.polyfit(self.indices, intensity, self.order)
47
+ intensity = x[self.indices_]
48
+ poly = np.polyfit(self.indices_, intensity, self.order)
44
49
  baseline = [np.polyval(poly, i) for i in range(0, len(x))]
45
50
  return x - baseline
@@ -0,0 +1,54 @@
1
+ import numpy as np
2
+ from sklearn.base import BaseEstimator, TransformerMixin
3
+ from sklearn.utils.validation import check_is_fitted
4
+
5
+ from chemotools.utils.check_inputs import check_input
6
+
7
+
8
+ class SubtractReference(BaseEstimator, TransformerMixin):
9
+ def __init__(
10
+ self,
11
+ reference: np.ndarray = None,
12
+ ):
13
+ self.reference = reference
14
+
15
+ def fit(self, X: np.ndarray, y=None) -> "SubtractReference":
16
+ # Check that X is a 2D array and has only finite values
17
+ X = check_input(X)
18
+
19
+ # Set the number of features
20
+ self.n_features_in_ = X.shape[1]
21
+
22
+ # Set the fitted attribute to True
23
+ self._is_fitted = True
24
+
25
+ # Set the reference
26
+
27
+ if self.reference is not None:
28
+ self.reference_ = self.reference.copy()
29
+ return self
30
+
31
+ return self
32
+
33
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
34
+ # Check that the estimator is fitted
35
+ check_is_fitted(self, "_is_fitted")
36
+
37
+ # Check that X is a 2D array and has only finite values
38
+ X = check_input(X)
39
+ X_ = X.copy()
40
+
41
+ # Check that the number of features is the same as the fitted data
42
+ if X_.shape[1] != self.n_features_in_:
43
+ raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
44
+
45
+ if self.reference is None:
46
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
47
+
48
+ # Subtract the reference
49
+ for i, x in enumerate(X_):
50
+ X_[i] = self._subtract_reference(x)
51
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
52
+
53
+ def _subtract_reference(self, x) -> np.ndarray:
54
+ return x - self.reference_
@@ -50,7 +50,7 @@ class NorrisWilliams(BaseEstimator, TransformerMixin):
50
50
  X_[i] = derivative
51
51
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
52
52
 
53
- if self.derivative_order == 1:
53
+ if self.derivative_order == 2:
54
54
  for i, x in enumerate(X_):
55
55
  derivative = self._spectrum_second_derivative(x)
56
56
  X_[i] = derivative
@@ -0,0 +1,121 @@
1
+ Metadata-Version: 2.1
2
+ Name: chemotools
3
+ Version: 0.0.12
4
+ Summary: Package to integrate chemometrics in scikit-learn pipelines
5
+ Home-page: https://github.com/paucablop/chemotools
6
+ Author: Pau Cabaneros Lopez
7
+ Author-email: pau.cabaneros@gmail.com
8
+ Project-URL: Bug Tracker, https://github.com/paucablop/chemotools/issues/
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Requires-Dist: numpy
16
+ Requires-Dist: scipy
17
+ Requires-Dist: scikit-learn
18
+
19
+ [![pypi](https://img.shields.io/pypi/v/chemotools)](https://pypi.org/project/chemotools)
20
+ [![pypi](https://img.shields.io/pypi/pyversions/chemotools)](https://pypi.org/project/chemotools)
21
+ [![pypi](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
22
+ [![codecov](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
23
+
24
+ # __chemotools__
25
+
26
+ Welcome to Chemotools, a Python package that integrates chemometrics with Scikit-learn.
27
+
28
+ 👉 Check the [documentation](https://paucablop.github.io/chemotools/) for a full description on how to use chemotools.
29
+
30
+ ## Description
31
+
32
+ Chemotools is a Python package that provides a collection of preprocessing tools and utilities for working with spectral data. It is built on top of popular scientific libraries and is designed to be highly modular, easy to use, and compatible with Scikit-learn transformers.
33
+
34
+ If you are interested in learning more about chemotools, please visit the [documentation](https://paucablop.github.io/chemotools/) page.
35
+
36
+ Benefits:
37
+ - Provides a collection of preprocessing tools and utilities for working with spectral data
38
+ - Highly modular and compatible with Scikit-learn transformers
39
+ - Can perform popular preprocessing tasks such as baseline correction, smoothing, scaling, derivatization, and scattering correction
40
+ - Open source and available on PyPI
41
+
42
+ Applications:
43
+ - Analyzing and processing spectral data in chemistry, biology, and other fields
44
+ - Developing machine learning models for predicting properties or classifying samples based on spectral data
45
+ - Teaching and learning about chemometrics and data preprocessing in Python
46
+
47
+ ## Installation
48
+
49
+ Chemotools is distributed via PyPI and can be easily installed using pip:
50
+
51
+ ```bash
52
+ pip install chemotools
53
+ ```
54
+
55
+ ## Usage
56
+
57
+ Chemotools is designed to be used in conjunction with Scikit-learn. It follows the same API as other Scikit-learn transformers, so you can easily integrate it into your existing workflow. For example, you can use chemotools to build pipelines that include transformers from chemotools and Scikit-learn:
58
+
59
+ ```python
60
+ from sklearn.preprocessing import StandardScaler
61
+ from sklearn.pipeline import make_pipeline
62
+
63
+ from chemotools.baseline import AirPls
64
+ from chemotools.scatter import MultiplicativeScatterCorrection
65
+
66
+ preprocessing = make_pipeline(AirPls(), MultiplicativeScatterCorrection(), StandardScaler(with_std=False))
67
+ spectra_transformed = preprocessing.fit_transform(spectra)
68
+ ```
69
+
70
+ Check the [documentation](https://paucablop.github.io/chemotools/) for more information on how to use chemotools.
71
+
72
+
73
+ ## Contributing
74
+
75
+ We welcome contributions to Chemotools from anyone interested in improving the package. Whether you have ideas for new features, bug reports, or just want to help improve the code, we appreciate your contributions! You are also welcome to see the [Project Board](https://github.com/users/paucablop/projects/4) to see what we are currently working on.
76
+
77
+ To contribute to Chemotools, please follow these guidelines:
78
+
79
+ #### Reporting Bugs
80
+
81
+ If you encounter a bug or unexpected behavior in Chemotools, please open an issue on the GitHub repository with a detailed description of the problem, including any error messages and steps to reproduce the issue. If possible, include sample code or data that demonstrates the problem.
82
+
83
+ #### Suggesting Enhancements
84
+
85
+ If you have an idea for a new feature or enhancement for Chemotools, please open an issue on the GitHub repository with a detailed description of the proposed feature and its benefits. If possible, include example code or use cases that illustrate how the feature would be used.
86
+
87
+ #### Submitting Changes
88
+
89
+ If you'd like to contribute code changes to Chemotools, please follow these steps:
90
+
91
+ - Create a new branch for your changes. We follow trunk-based development, so all changes should be made on a new branch and branches should be short-lived and merged into main.
92
+
93
+ - Write your code and tests, making sure to follow the Chemotools coding style and conventions. It is fundamental to include tests for both, the Scikit-learn API and the functionality of the transformers.
94
+
95
+ - Run the tests using the provided testing framework to ensure that your changes do not introduce any new errors or regressions.
96
+
97
+ - Submit a pull request to the main Chemotools repository with a detailed description of your changes and the problem they solve.
98
+
99
+ We will review your changes and provide feedback as soon as possible. If we request changes, please make them as quickly as possible to keep the review process moving.
100
+
101
+ #### Code Style
102
+
103
+ Please follow the Chemotools code style and conventions when contributing code changes. Specifically:
104
+
105
+ - Use four spaces for indentation
106
+ - Use descriptive variable names
107
+ - Avoid using magic numbers or hard-coded strings
108
+ - Format your code using Black
109
+
110
+ #### Codecov
111
+
112
+ We use Codecov to track the test coverage of Chemotools. Please make sure that your changes do not reduce the test coverage of the package.
113
+
114
+
115
+ ## License
116
+
117
+ This package is distributed under the MIT license. See the [LICENSE](LICENSE) file for more information. When contributing code to Chemotools, you are agreeing to release your code under the MIT license.
118
+
119
+ ## Credits
120
+
121
+ AirPLS baseline correction is based on the implementation by [Zhang et al.](https://pubs.rsc.org/is/content/articlelanding/2010/an/b922045c). The current implementation is based on the Python implementation by [zmzhang](https://github.com/zmzhang/airPLS).
@@ -1,12 +1,13 @@
1
1
  chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- chemotools/baseline/__init__.py,sha256=DVdmkXqKfrDUcwNOO6xXELmyPwHQza8_lzAIGDKKZdg,228
2
+ chemotools/baseline/__init__.py,sha256=8KVAXB4OwGPH-lan5NOwqdqyr08kJW9qDegJlzd9cO8,278
3
3
  chemotools/baseline/air_pls.py,sha256=34t-APOEPpDTSdkAYq41xJ9Qt_OBDlKSPgnItoqTB6g,2568
4
- chemotools/baseline/cubic_spline_correction.py,sha256=oFnPhbCQT9y5SJMSOeUOQ2vnBlkgQJ1AFwHDvAjKrUU,1732
4
+ chemotools/baseline/cubic_spline_correction.py,sha256=QkPvjoivA3O61zBUrt7Ro98_HidgtQMvxSe6GYCfJ7U,1772
5
5
  chemotools/baseline/linear_correction.py,sha256=2NXW_tiXNK9ZL-At2opTLmDhAxjFF0J9bXnWDqDjNYY,1900
6
6
  chemotools/baseline/non_negative.py,sha256=1L8wE0JUsQWmdku-Sm73ubDiIZ72CtImSvInQMPC-rU,1427
7
- chemotools/baseline/polynomial_correction.py,sha256=cLOhc7J0ERVi7wcXLLvjDaoCpZs-ADzF47iDpqhbgQc,1689
7
+ chemotools/baseline/polynomial_correction.py,sha256=PtoR6T_rdk7ygJt4JsbR1qNOI4cP8Ntmq3ocKUet6Qo,1818
8
+ chemotools/baseline/subtract_reference.py,sha256=jDLKpYQqGFEPcwkNTwzAOzw2tNSkRdubdmz6yJOkT7U,1684
8
9
  chemotools/derivative/__init__.py,sha256=x2F0IJ-uCbEYFoXFbZl_RTPCbSq82vqGOwlM9R_2Klo,84
9
- chemotools/derivative/norris_william.py,sha256=CqqCL41xs0mZ5OlJ06iYPzpnb8bJKAfmh81PAe43vmw,3134
10
+ chemotools/derivative/norris_william.py,sha256=kREo1JYBZrM9pZ9HXVoU4nTndZKP2MbjGomX7KIxFTI,3134
10
11
  chemotools/derivative/savitzky_golay.py,sha256=FO65XpgbY1QNC7nPsLJVmmrx5gurGKpqIAH2NnjSWrk,1801
11
12
  chemotools/scale/__init__.py,sha256=Lr8fVyaBjFMf1cjDAI-a_gCrXivKCpyTOYnuJZHRh98,79
12
13
  chemotools/scale/l_normalize.py,sha256=Z_CtUfCJoVsn8CasVzZUjIzs02QzxLwFg2zqmXN3cWQ,1349
@@ -24,10 +25,10 @@ chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
24
25
  chemotools/utils/check_inputs.py,sha256=fRAV4HIaGamdj_PNXSNnl7LurXytACNTGO51rhPpMUY,512
25
26
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
27
  tests/fixtures.py,sha256=TxsGOMszSMnttUO4iWniNKeiXEkjRzhlPU9aLeyT89s,1505
27
- tests/test_functionality.py,sha256=KLeRTJrgDqmp4f1bQohJSqFCgy_WqTPNENZal6qN7dY,6803
28
- tests/test_sklearn_compliance.py,sha256=VIsCkPtL2U9EisiwhpMAnZsUXDDlfkCxPuhx14JjDi0,2907
29
- chemotools-0.0.10.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
30
- chemotools-0.0.10.dist-info/METADATA,sha256=NntaneHjPpPxaRpEU0YPCmJNYDkwyaY6fnafqbBHavA,18720
31
- chemotools-0.0.10.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
32
- chemotools-0.0.10.dist-info/top_level.txt,sha256=eNcNcKSdo-1H_2gwSDrS__dr7BM3R73Cnn-pBiW5FEw,17
33
- chemotools-0.0.10.dist-info/RECORD,,
28
+ tests/test_functionality.py,sha256=J5yqA29E7WU0AvieFP6wFLCUH783udVZ-XrnErrBT5Y,7351
29
+ tests/test_sklearn_compliance.py,sha256=_a8gazkjJtj9STrt6fwvC37wFviZhpz-KCt3_LnmG_w,3309
30
+ chemotools-0.0.12.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
31
+ chemotools-0.0.12.dist-info/METADATA,sha256=NO-f_BJeOX4l2rMXu_Nu8ZI3my-HlWSpeMboiB5FFM4,6165
32
+ chemotools-0.0.12.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
33
+ chemotools-0.0.12.dist-info/top_level.txt,sha256=eNcNcKSdo-1H_2gwSDrS__dr7BM3R73Cnn-pBiW5FEw,17
34
+ chemotools-0.0.12.dist-info/RECORD,,
@@ -1,6 +1,6 @@
1
1
  import numpy as np
2
2
 
3
- from chemotools.baseline import AirPls, LinearCorrection, NonNegative
3
+ from chemotools.baseline import AirPls, LinearCorrection, NonNegative, SubtractReference
4
4
  from chemotools.derivative import NorrisWilliams, SavitzkyGolay
5
5
  from chemotools.scale import LNormalize, MinMaxScaler
6
6
  from chemotools.scatter import MultiplicativeScatterCorrection, StandardNormalVariate
@@ -238,6 +238,7 @@ def test_saviszky_golay_filter_3():
238
238
  # Assert
239
239
  assert np.allclose(spectrum_corrected[0], np.ones((1, 10)), atol=1e-2)
240
240
 
241
+
241
242
  def test_standard_normal_variate(spectrum, reference_snv):
242
243
  # Arrange
243
244
  snv = StandardNormalVariate()
@@ -248,6 +249,25 @@ def test_standard_normal_variate(spectrum, reference_snv):
248
249
  # Assert
249
250
  assert np.allclose(spectrum_corrected[0], reference_snv[0], atol=1e-2)
250
251
 
252
+ def test_subtract_reference(spectrum):
253
+ # Arrange
254
+ baseline = SubtractReference(reference=spectrum)
255
+
256
+ # Act
257
+ spectrum_corrected = baseline.fit_transform(spectrum)
258
+
259
+ # Assert
260
+ assert np.allclose(spectrum_corrected[0], np.zeros(len(spectrum)), atol=1e-8)
261
+
262
+ def test_subtract_reference_without_reference(spectrum):
263
+ # Arrange
264
+ baseline = SubtractReference()
265
+
266
+ # Act
267
+ spectrum_corrected = baseline.fit_transform(spectrum)
268
+
269
+ # Assert
270
+ assert np.allclose(spectrum_corrected[0], spectrum, atol=1e-8)
251
271
 
252
272
  def test_whitakker_smooth(spectrum, reference_whitakker):
253
273
  # Arrange
@@ -1,11 +1,13 @@
1
1
  from sklearn.utils.estimator_checks import check_estimator
2
2
 
3
- from chemotools.baseline import AirPls, CubicSplineCorrection, LinearCorrection, NonNegative, PolynomialCorrection
3
+ from chemotools.baseline import AirPls, CubicSplineCorrection, LinearCorrection, NonNegative, PolynomialCorrection, SubtractReference
4
4
  from chemotools.derivative import NorrisWilliams, SavitzkyGolay
5
5
  from chemotools.scale import MinMaxScaler, LNormalize
6
6
  from chemotools.scatter import MultiplicativeScatterCorrection, StandardNormalVariate
7
7
  from chemotools.smooth import MeanFilter, MedianFilter, SavitzkyGolayFilter, WhittakerSmooth
8
8
 
9
+ from tests.fixtures import spectrum
10
+
9
11
 
10
12
  # AirPls
11
13
  def test_compliance_air_pls():
@@ -77,6 +79,14 @@ def test_compliance_norris_williams():
77
79
  # Act & Assert
78
80
  check_estimator(transformer)
79
81
 
82
+ # NorrisWilliams
83
+ def test_compliance_norris_williams_2():
84
+ # Arrange
85
+ transformer = NorrisWilliams(derivative_order=2)
86
+ # Act & Assert
87
+ check_estimator(transformer)
88
+
89
+
80
90
  # PolynomialCorrection
81
91
  def test_compliance_polynomial_correction():
82
92
  # Arrange
@@ -105,6 +115,13 @@ def test_compliance_standard_normal_variate():
105
115
  # Act & Assert
106
116
  check_estimator(transformer)
107
117
 
118
+ # SubtractReference
119
+ def test_compliance_subtract_reference():
120
+ # Arrange
121
+ transformer = SubtractReference()
122
+ # Act & Assert
123
+ check_estimator(transformer)
124
+
108
125
  # WhittakerSmooth
109
126
  def test_compliance_whittaker_smooth():
110
127
  # Arrange
@@ -1,411 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: chemotools
3
- Version: 0.0.10
4
- Summary: Package to integrate chemometrics in scikit-learn pipelines
5
- Home-page: https://github.com/paucablop/chemotools
6
- Author: Pau Cabaneros Lopez
7
- Author-email: pau.cabaneros@gmail.com
8
- Project-URL: Bug Tracker, https://github.com/paucablop/chemotools/issues/
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: License :: OSI Approved :: MIT License
11
- Classifier: Operating System :: OS Independent
12
- Requires-Python: >=3.9
13
- Description-Content-Type: text/markdown
14
- License-File: LICENSE
15
- Requires-Dist: numpy
16
- Requires-Dist: scipy
17
- Requires-Dist: scikit-learn
18
-
19
- ## Welcome to chemotools! 🖖
20
-
21
- This project is an implementation of spectral preprocessing techniques integrated with the widely popular sklearn API, providing you with an easy-to-use toolkit for analyzing and manipulating your data. With this integration, you can easily apply techniques such as spectral derivative, scatter correction or baseline removal to your datasets.
22
-
23
- The goal of this project is to provide a comprehensive and user-friendly package for spectral preprocessing, with a focus on making it accessible to a wide range of users, from data scientists to researchers to curious learners. By integrating these techniques with the powerful sklearn API, I enable users to easily build machine learning and chemometric models on top of preprocessed data, making it possible to identify patterns and make predictions with greater accuracy.
24
-
25
- In this repository, you will find a range of tools and resources for using spectral preprocessing techniques with the sklearn API, including code samples, documentation, and examples. I encourage contributions from the community, whether through bug reports, feature requests, or pull requests, to help us make this project even better.
26
-
27
- In addition, I take great care to ensure that all functions are thoroughly unit tested for both API compatibility and functionality. I believe that unit testing is an essential part of any software development process, and it is particularly important for a project like this, which aims to provide reliable data analysis tools. I hope that this project will serve as a useful resource for anyone interested in learning more about spectral preprocessing and machine learning.
28
-
29
- Thank you for your interest in this project, and I hope you find it useful for your data analysis and machine learning needs.
30
-
31
- Table of contents
32
- =================
33
-
34
- <!--ts-->
35
- * [Installation](#installation)
36
- * [Integration with scikit-learn](#integration-with-scikit-learn)
37
- * [Scatter](#scatter)
38
- * [Multiplicative scatter correction](#multiplicative-scatter-correction)
39
- * [Standard normal variate](#standard-normal-variate)
40
- * [Extended multiplicative scatter correction (COMING SOON)](#extended-multiplicative-scatter-correction)
41
- * [Derivative](#derivatives)
42
- * [Savitzky-Golay derivative](#savitzky-golay-derivative)
43
- * [William Norris derivative](#william-norris-derivative)
44
- * [Baseline](#baseline)
45
- * [Linear baseline correction](#linear-baseline-correction)
46
- * [Polynomial baseline correction](#polynomial-baseline-correction)
47
- * [Cubic spline baseline correction](#cubic-spline-baseline-correction)
48
- * [AirPls](#alternate-iterative-reweighed-penalized-least-squares-(AIRPLS)-baseline-correction)
49
- * [Non-negative](#non-negative)
50
- * [Scale](#scale)
51
- * [Min-max scaler](#minmax-scaler)
52
- * [L-Norm scaler](#l-norm-scaler)
53
- * [Smooth](#smooth)
54
- * [Savitzky-Golay smoothing](#savitzky-golay-smoothing)
55
- * [Whittaker smoother](#whittaker-smoother)
56
- * [Mean filter](#mean-filter)
57
- * [Median filter](#median-filter)
58
-
59
- <!--te-->
60
-
61
-
62
- ## __Installation 🚀__
63
- This package is available on PyPI and can be installed using pip:
64
-
65
- ```bash
66
- pip install chemotools
67
- ```
68
-
69
- ## __Integration with scikit-learn ✨__
70
- All preprocessing techniques in this package are compatible with ```scikit-learn``` and can be used in pipelines. For example, the following code creates a pipeline that performs multiplicative scatter correction, followed by a min-max scaling and a Savitzky-Golay smoothing:
71
-
72
- ```python
73
- from sklearn.preprocessing import StandardScaler
74
- from sklearn.pipeline import make_pipeline
75
-
76
- pipeline = make_pipeline(AirPls(), MultiplicativeScatterCorrection(), StandardScaler(with_std=False))
77
- spectra_transformed = pipeline.fit_transform(spectra)
78
- ```
79
-
80
-
81
- ## __Scatter__
82
-
83
- This package contains three common algorithms for scatter correction in spectroscopy:
84
-
85
- - Multiplicative scatter correction (MSC)
86
- - Standard normal variate (SNV)
87
- - Extended multiplicative scatter correction (EMSC)
88
-
89
- ### __Multiplicative scatter correction__
90
- Multiplicative scatter correction (MSC) is a preprocessing technique in spectroscopy that corrects for the influence of light scattering on spectral measurements by dividing each spectrum by a scatter reference spectrum. The current implementation, accepts three types of reference spectra:
91
-
92
- - The mean spectrum of the dataset (_default_).
93
- - The median spectrum of the dataset.
94
- - A single spectrum that is used to correct all spectra in the dataset.
95
-
96
- Usage example for a single reference spectrum:
97
-
98
- Usage example for the mean spectrum:
99
-
100
- ```python
101
- from chemotools.scatter import MultiplicativeScatterCorrection
102
-
103
- msc = MultiplicativeScatterCorrection()
104
- spectra_msc = msc.fit_transform(spectra)
105
- ```
106
-
107
- Usage example for the median spectrum:
108
-
109
- ```python
110
- from chemotools.scatter import MultiplicativeScatterCorrection
111
-
112
- msc = MultiplicativeScatterCorrection(use_median=True)
113
- spectra_msc = msc.fit_transform(spectra)
114
- ```
115
-
116
- Usage example for a single reference spectrum:
117
-
118
- ```python
119
- from chemotools.scatter import MultiplicativeScatterCorrection
120
-
121
- msc = MultiplicativeScatterCorrection(reference=reference_spectrum)
122
- spectra_msc = msc.fit_transform(spectra)
123
- ```
124
-
125
- ![msc](figures/msc.png)
126
-
127
-
128
- ### __Standard normal variate__
129
- Standard normal variate (SNV) is a preprocessing technique in spectroscopy that adjusts for baseline shifts and variations in signal intensity by subtracting the mean and dividing by the standard deviation of each spectrum.
130
-
131
- Usage example for a single reference spectrum:
132
-
133
- ```python
134
- from chemotools.scatter import StandardNormalVariate
135
-
136
- snv = StandardNormalVariate()
137
- spectra_snv = snv.fit_transform(spectra)
138
- ```
139
- ![snv](figures/snv.png)
140
-
141
-
142
- ### __Extended multiplicative scatter correction__
143
- Extended multiplicative scatter correction (EMSC) is a preprocessing technique in spectroscopy that corrects for the influence of light scattering and instrumental drift by fitting a mathematical model to a reference spectrum and using it to normalize all spectra in the dataset.
144
-
145
- An implementation of the EMSC will be available soon 🤓.
146
-
147
- ## __Derivatives__
148
-
149
- This package contains two common algorithms for calculating derivatives in spectroscopy:
150
-
151
- - Savitzky-Golay derivative
152
- - William Norris derivative
153
-
154
- ### __Savitzky-Golay derivative__
155
- Savitzky-Golay derivative is a preprocessing technique in spectroscopy that calculates the derivative of a spectrum by fitting a polynomial to a window of adjacent points and calculating the derivative of the polynomial.
156
-
157
- The following arguments can be set:
158
-
159
- - ```window_size: int```: The length of the window. Must be an odd integer number. _Default: 5_.
160
- - ```polynomial_order: int```: The order of the polynomial used to fit the samples. Must be less than ```window_size```. _Default: 2_.
161
- - ```derivative_order: int```: The order of the derivative to compute. _Default: 1_.
162
- - ```mode: str```: The mode of the boundary. _Default: 'nearest'_, available options: ```'nearest'```, ```'constant'```, ```'reflect'```, ```'wrap'```, ```'mirror'```, ```'interp'```. See the [official documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html) for more information.
163
-
164
- Usage example:
165
-
166
- ```python
167
- from chemotools.derivative import SavitzkyGolay
168
-
169
- sg = SavitzkyGolay(window_size=15, polynomial_order=2, derivate_order=1)
170
- spectra_derivative = sg.fit_transform(spectra)
171
- ```
172
-
173
- ![sgd](figures/sgd.png)
174
-
175
- ### __William Norris derivative__
176
- William Norris derivative is a preprocessing technique in spectroscopy that calculates the derivative of a spectrum using finite differences.
177
-
178
- The following arguments can be set:
179
-
180
- - ```window_size: int```: The length of the window. Must be an odd integer number. _Default: 5_.
181
- - ```gap_size: int```: The number of points between the first and second points of the window. _Default: 3_.
182
- - ```derivative_order: int```: The order of the derivative to compute. _Default: 1_.
183
- - ```mode: str```: The mode of the boundary. _Default: 'nearest'_, available options: ```‘reflect’```, ```‘constant’```, ```‘nearest’```, ```‘mirror’```, ```‘wrap’```. See the [official documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.convolve.html) for more information.
184
-
185
- Usage example:
186
-
187
- ```python
188
- from chemotools.derivative import NorrisWilliams
189
-
190
- nw = NorrisWilliams(window_size=15, gap_size=3, derivative_order=1)
191
- spectra_derivative = nw.fit_transform(spectra)
192
- ```
193
- ![wn](figures/wn.png)
194
-
195
- ## __Baseline__
196
- Baseline correction is a preprocessing technique in spectroscopy that corrects for baseline shifts and variations in signal intensity by subtracting a baseline from a spectrum. The following algorithms are available:
197
-
198
- - Linear baseline correction
199
- - Polynomial baseline correction
200
- - Cubic spline baseline correction
201
- - Alternate iterative reweighed penalized least squares (AIRPLS) baseline correction
202
- - Non-negative
203
-
204
- ### __Linear baseline correction__
205
- Linear baseline correction is a preprocessing technique in spectroscopy that corrects for baseline shifts and variations in signal intensity by subtracting a linear baseline from a spectrum. The current implementation subtracts a linear baseline between the first and last point of the spectrum.
206
-
207
- Usage example:
208
-
209
- ```python
210
- from chemotools.baseline import LinearCorrection
211
-
212
- lc = LinearCorrection()
213
- spectra_baseline = lc.fit_transform(spectra)
214
- ```
215
- ![lb](figures/lb.png)
216
-
217
-
218
- ### __Polynomial baseline correction__
219
- Polynomial baseline correction is a preprocessing technique in spectroscopy that approximates a baseline by fitting a polynomial to selected points of the spectrum. The selected points often correspond to minima in the spectra, and are selected by their index (not by the wavenumber). If no points are selected, the algorithm will select the first and last point of the spectrum.
220
-
221
- The following arguments can be set:
222
-
223
- - ```order: int``` The order of the polynomial used to fit the samples. _Default: 1_.
224
- - ```indices: tuple``` The indices of the points to use for fitting the polynomial. _Default: (0, -1)_. At the moment the indices need to be specified manually as a tuple because ```scikit-learn``` does not support mutable attributes in ```BaseEstimator```. This tuple is transformed to a list when the ```transform``` method is called.
225
-
226
- Usage example:
227
-
228
- ```python
229
- from chemotools.baseline import PolynomialCorrection
230
-
231
- pc = PolynomialCorrection(order=2, indices=(0, 75, 150, 200, 337))
232
- spectra_baseline = pc.fit_transform(spectra)
233
- ```
234
- ![pb](figures/pb.png)
235
-
236
- ### __Cubic spline baseline correction__
237
- Cubic spline baseline correction is a preprocessing technique in spectroscopy that approximates a baseline by fitting a cubic spline to selected points of the spectrum. Similar to the ```PolynomialCorrection```, the selected points often correspond to minima in the spectra, and are selected by their index (not by the wavenumber). If no points are selected, the algorithm will select the first and last point of the spectrum.
238
-
239
- The following arguments can be set:
240
- - ```indices: tuple``` The indices of the points to use for fitting the polynomial. _Default: None_. At the moment the indices need to be specified manually as a tuple because ```scikit-learn``` does not support mutable attributes in ```BaseEstimator```. This tuple is transformed to a list when the ```transform``` method is called.
241
-
242
- Usage example:
243
-
244
- ```python
245
- from chemotools.baseline import CubicSplineCorrection
246
-
247
- cspl = CubicSplineCorrection(indices=(0, 75, 150, 200, 337))
248
- spectra_baseline = cspl.fit_transform(spectra)
249
- ```
250
-
251
- ![splines](figures/splines.png)
252
-
253
- ### __Alternate iterative reweighed penalized least squares (AIRPLS) baseline correction__
254
- It is an automated baseline correction algorithm that uses a penalized least squares approach to fit a baseline to a spectrum. The original algorithm is based on the paper by [Zhang et al.](https://pubs.rsc.org/is/content/articlelanding/2010/an/b922045c). The current implementation is based on the Python implementation by [zmzhang](https://github.com/zmzhang/airPLS).
255
-
256
- The following arguments can be set:
257
- - ```nr_iterations: int``` The number of iterations before exiting the algorithm. _Default: 15_.
258
- - ```lam: float``` smoothing factor. _Default: 1e2_.
259
- - ```polynomial_order: int``` The order of the polynomial used to fit the samples. _Default: 1_.
260
-
261
- Usage example:
262
-
263
- ```python
264
- from chemotools.baseline import AirPls
265
-
266
- airpls = AirPls()
267
- spectra_baseline = airpls.fit_transform(spectra)
268
- ```
269
-
270
- ![airpls](figures/airpls.png)
271
-
272
- ### __Non-negative__
273
- Non-negative baseline correction is a preprocessing technique in spectroscopy that corrects for baseline by removing negative values from a spectrum. Negative values are either replaced by 0, or set to their absolute value.
274
-
275
- The following arguments can be set:
276
- - ```mode: str``` If ```'zero'```, negative values are replaced by 0. If ```'abs'```, negative values are set to their absolute value. _Default: ```'zero'```.
277
-
278
- Usage example:
279
-
280
- ```python
281
- from chemotools.baseline import NonNegative
282
-
283
- nnz = NonNegative(mode='zero')
284
- nna = NonNegative(mode='abs')
285
- spectra_nnz = nnz.fit_transform(spectra_baseline)
286
- spectra_nna = nna.fit_transform(spectra_baseline)
287
- ```
288
-
289
- ![nnz](figures/nnz.png)
290
- ![nna](figures/nna.png)
291
-
292
- ## __Scale__
293
- Scale is a preprocessing technique in spectroscopy that scales the spectra. The following algorithms are available:
294
- - MinMaxScaler: scales each spectrum by its minimum or maximum value.
295
- - L-normalization: scales each spectrum by its L-norm.
296
-
297
- ### __MinMax scaler__
298
- MinMaxScaler is a preprocessing technique in spectroscopy that scales each spectrum by its minimum or maximum value.
299
-
300
- The following arguments can be set:
301
- - ```norm: str``` If ```'min'```, the spectrum is scaled by its minimum value. If ```'max'```, the spectrum is scaled by its maximum value. _Default: ```'max'```_.
302
-
303
- Usage example:
304
-
305
- ```python
306
- from chemotools.scale import MinMaxScaler
307
-
308
- minmax = MinMaxScaler(norm='max')
309
- spectra_norm = minmax.fit_transform(spectra)
310
- ```
311
-
312
- ![minmax](figures/minmax.png)
313
-
314
-
315
- ### __L-normalization__
316
- L-normalization is a preprocessing technique in spectroscopy that scales each spectrum by its L-norm.
317
-
318
- The following arguments can be set:
319
- - ```l-norm: int``` The L-norm to use. _Default: ```2```_.
320
-
321
- Usage example:
322
-
323
- ```python
324
- from chemotools.scale import LNormalize
325
-
326
- lnorm = LNormalize(l_norm=2)
327
- spectra_norm = lnorm.fit_transform(spectra)
328
- ```
329
-
330
- ![lnorm](figures/lnorm.png)
331
-
332
- ## __Smooth__
333
- Smooth is a preprocessing technique in spectroscopy that smooths the spectra. The following algorithms are available:
334
- - Savitzky-Golay filter
335
- - Whittaker smoother
336
- - Mean filter
337
- - Median filter
338
-
339
- ### __Savitzky-Golay filter__
340
- Savitzky-Golay filter is a preprocessing technique in spectroscopy that smooths the spectra by fitting a polynomial to the data. The current implementation is based on the ```scipy.signal.savgol_filter``` function.
341
-
342
- The following arguments can be set:
343
-
344
- - ```window_size: int```: The length of the window. Must be an odd integer number. _Default: 3_.
345
- - ```polynomial_order: int```: The order of the polynomial used to fit the samples. Must be less than ```window_size```. _Default: 1_.
346
- - ```derivative_order: int```: The order of the derivative to compute. _Default: 1_. ```'constant'```, ```'reflect'```, ```'wrap'```, ```'mirror'```, ```'interp'```. See the [official documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.savgol_filter.html) for more information.
347
-
348
- Usage example:
349
-
350
- ```python
351
- from chemotools.smooth import SavitzkyGolayFilter
352
-
353
- sgf = SavitzkyGolayFilter(window_size=15, polynomial_order=2)
354
- spectra_norm = sgf.fit_transform(spectra)
355
- ```
356
-
357
- ![sgf](figures/sgf.png)
358
-
359
- ### __Whittaker smoother__
360
- It is an automated smoothing algorithm that uses a penalized least squares approach to iteratively apply a smoothing operation to the data by minimizing a penalty function that balances the degree of smoothness and the fidelity to the original data.
361
-
362
- The following arguments can be set:
363
- - ```lam: float``` smoothing factor. _Default: 1e2_.
364
- - ```differences: int``` The number of differences to use. _Default: 1_.
365
-
366
- Usage example:
367
-
368
- ```python
369
- from chemotools.smooth import WhittakerSmooth
370
-
371
- wtk = WhittakerSmooth(lam=10)
372
- spectra_norm = wtk.fit_transform(spectra)
373
- ```
374
-
375
- ![wtk](figures/wtk.png)
376
-
377
- ### __Mean filter__
378
- Mean filter is a preprocessing technique in spectroscopy that smooths the spectra by applying a mean filter. The current implementation is based on the ```scipy.ndimage.uniform_filter``` function.
379
-
380
- The following arguments can be set:
381
- - ```window_size: int```: The length of the window. Must be an odd integer number. _Default: 3_.
382
- - ```mode: str```: The mode parameter determines how the array borders are handled, where ```'constant'```, ```'reflect'```, ```'wrap'```, ```'mirror'```, ```'interp'```. See the [official documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.uniform_filter1d.html) for more information. _Default: ```'nearest'```_.
383
-
384
- Usage example:
385
-
386
- ```python
387
- from chemotools.smooth import MeanFilter
388
-
389
- mean_filter = MeanFilter()
390
- spectra_norm = mean_filter.fit_transform(spectra)
391
- ```
392
-
393
- ![mean_filter](figures/mean_filter.png)
394
-
395
- ### __Median filter__
396
- Median filter is a preprocessing technique in spectroscopy that smooths the spectra by applying a median filter. The current implementation is based on the ```scipy.ndimage.median_filter``` function.
397
-
398
- The following arguments can be set:
399
- - ```window_size: int```: The length of the window. Must be an odd integer number. _Default: 3_.
400
- - ```mode: str```: The mode parameter determines how the array borders are handled, where ```'constant'```, ```'reflect'```, ```'wrap'```, ```'mirror'```, ```'interp'```. See the [official documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.median_filter.html) for more information. _Default: ```'nearest'```_.
401
-
402
- Usage example:
403
-
404
- ```python
405
- from chemotools.smooth import MedianFilter
406
-
407
- median_filter = MedianFilter()
408
- spectra_norm = median_filter.fit_transform(spectra)
409
- ```
410
-
411
- ![median_filter](figures/median_filter.png)