chemotools 0.0.10__tar.gz → 0.0.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. chemotools-0.0.12/PKG-INFO +118 -0
  2. chemotools-0.0.12/README.md +103 -0
  3. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/baseline/__init__.py +2 -1
  4. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/baseline/cubic_spline_correction.py +8 -7
  5. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/baseline/polynomial_correction.py +8 -3
  6. chemotools-0.0.12/chemotools/baseline/subtract_reference.py +54 -0
  7. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/derivative/norris_william.py +1 -1
  8. chemotools-0.0.12/chemotools.egg-info/PKG-INFO +118 -0
  9. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools.egg-info/SOURCES.txt +1 -0
  10. {chemotools-0.0.10 → chemotools-0.0.12}/tests/test_functionality.py +21 -1
  11. {chemotools-0.0.10 → chemotools-0.0.12}/tests/test_sklearn_compliance.py +18 -1
  12. chemotools-0.0.10/PKG-INFO +0 -408
  13. chemotools-0.0.10/README.md +0 -393
  14. chemotools-0.0.10/chemotools.egg-info/PKG-INFO +0 -408
  15. {chemotools-0.0.10 → chemotools-0.0.12}/LICENSE +0 -0
  16. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/__init__.py +0 -0
  17. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/baseline/air_pls.py +0 -0
  18. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/baseline/linear_correction.py +0 -0
  19. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/baseline/non_negative.py +0 -0
  20. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/derivative/__init__.py +0 -0
  21. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/derivative/savitzky_golay.py +0 -0
  22. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/scale/__init__.py +0 -0
  23. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/scale/l_normalize.py +0 -0
  24. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/scale/min_max_normalize.py +0 -0
  25. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/scatter/__init__.py +0 -0
  26. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/scatter/extended_multiplicative_scatter_correction.py +0 -0
  27. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/scatter/multiplicative_scatter_correction.py +0 -0
  28. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/scatter/standard_normal_variate.py +0 -0
  29. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/smooth/__init__.py +0 -0
  30. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/smooth/mean_filter.py +0 -0
  31. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/smooth/median_filter.py +0 -0
  32. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/smooth/savitzky_golay_filter.py +0 -0
  33. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/smooth/whittaker_smooth.py +0 -0
  34. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/utils/__init__.py +0 -0
  35. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools/utils/check_inputs.py +0 -0
  36. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools.egg-info/dependency_links.txt +0 -0
  37. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools.egg-info/requires.txt +0 -0
  38. {chemotools-0.0.10 → chemotools-0.0.12}/chemotools.egg-info/top_level.txt +0 -0
  39. {chemotools-0.0.10 → chemotools-0.0.12}/pyproject.toml +0 -0
  40. {chemotools-0.0.10 → chemotools-0.0.12}/setup.cfg +0 -0
  41. {chemotools-0.0.10 → chemotools-0.0.12}/setup.py +0 -0
  42. {chemotools-0.0.10 → chemotools-0.0.12}/tests/__init__.py +0 -0
  43. {chemotools-0.0.10 → chemotools-0.0.12}/tests/fixtures.py +0 -0
@@ -0,0 +1,118 @@
1
+ Metadata-Version: 2.1
2
+ Name: chemotools
3
+ Version: 0.0.12
4
+ Summary: Package to integrate chemometrics in scikit-learn pipelines
5
+ Home-page: https://github.com/paucablop/chemotools
6
+ Author: Pau Cabaneros Lopez
7
+ Author-email: pau.cabaneros@gmail.com
8
+ Project-URL: Bug Tracker, https://github.com/paucablop/chemotools/issues/
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+
16
+ [![pypi](https://img.shields.io/pypi/v/chemotools)](https://pypi.org/project/chemotools)
17
+ [![pypi](https://img.shields.io/pypi/pyversions/chemotools)](https://pypi.org/project/chemotools)
18
+ [![pypi](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
19
+ [![codecov](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
20
+
21
+ # __chemotools__
22
+
23
+ Welcome to Chemotools, a Python package that integrates chemometrics with Scikit-learn.
24
+
25
+ 👉 Check the [documentation](https://paucablop.github.io/chemotools/) for a full description on how to use chemotools.
26
+
27
+ ## Description
28
+
29
+ Chemotools is a Python package that provides a collection of preprocessing tools and utilities for working with spectral data. It is built on top of popular scientific libraries and is designed to be highly modular, easy to use, and compatible with Scikit-learn transformers.
30
+
31
+ If you are interested in learning more about chemotools, please visit the [documentation](https://paucablop.github.io/chemotools/) page.
32
+
33
+ Benefits:
34
+ - Provides a collection of preprocessing tools and utilities for working with spectral data
35
+ - Highly modular and compatible with Scikit-learn transformers
36
+ - Can perform popular preprocessing tasks such as baseline correction, smoothing, scaling, derivatization, and scattering correction
37
+ - Open source and available on PyPI
38
+
39
+ Applications:
40
+ - Analyzing and processing spectral data in chemistry, biology, and other fields
41
+ - Developing machine learning models for predicting properties or classifying samples based on spectral data
42
+ - Teaching and learning about chemometrics and data preprocessing in Python
43
+
44
+ ## Installation
45
+
46
+ Chemotools is distributed via PyPI and can be easily installed using pip:
47
+
48
+ ```bash
49
+ pip install chemotools
50
+ ```
51
+
52
+ ## Usage
53
+
54
+ Chemotools is designed to be used in conjunction with Scikit-learn. It follows the same API as other Scikit-learn transformers, so you can easily integrate it into your existing workflow. For example, you can use chemotools to build pipelines that include transformers from chemotools and Scikit-learn:
55
+
56
+ ```python
57
+ from sklearn.preprocessing import StandardScaler
58
+ from sklearn.pipeline import make_pipeline
59
+
60
+ from chemotools.baseline import AirPls
61
+ from chemotools.scatter import MultiplicativeScatterCorrection
62
+
63
+ preprocessing = make_pipeline(AirPls(), MultiplicativeScatterCorrection(), StandardScaler(with_std=False))
64
+ spectra_transformed = preprocessing.fit_transform(spectra)
65
+ ```
66
+
67
+ Check the [documentation](https://paucablop.github.io/chemotools/) for more information on how to use chemotools.
68
+
69
+
70
+ ## Contributing
71
+
72
+ We welcome contributions to Chemotools from anyone interested in improving the package. Whether you have ideas for new features, bug reports, or just want to help improve the code, we appreciate your contributions! You are also welcome to see the [Project Board](https://github.com/users/paucablop/projects/4) to see what we are currently working on.
73
+
74
+ To contribute to Chemotools, please follow these guidelines:
75
+
76
+ #### Reporting Bugs
77
+
78
+ If you encounter a bug or unexpected behavior in Chemotools, please open an issue on the GitHub repository with a detailed description of the problem, including any error messages and steps to reproduce the issue. If possible, include sample code or data that demonstrates the problem.
79
+
80
+ #### Suggesting Enhancements
81
+
82
+ If you have an idea for a new feature or enhancement for Chemotools, please open an issue on the GitHub repository with a detailed description of the proposed feature and its benefits. If possible, include example code or use cases that illustrate how the feature would be used.
83
+
84
+ #### Submitting Changes
85
+
86
+ If you'd like to contribute code changes to Chemotools, please follow these steps:
87
+
88
+ - Create a new branch for your changes. We follow trunk-based development, so all changes should be made on a new branch and branches should be short-lived and merged into main.
89
+
90
+ - Write your code and tests, making sure to follow the Chemotools coding style and conventions. It is fundamental to include tests for both, the Scikit-learn API and the functionality of the transformers.
91
+
92
+ - Run the tests using the provided testing framework to ensure that your changes do not introduce any new errors or regressions.
93
+
94
+ - Submit a pull request to the main Chemotools repository with a detailed description of your changes and the problem they solve.
95
+
96
+ We will review your changes and provide feedback as soon as possible. If we request changes, please make them as quickly as possible to keep the review process moving.
97
+
98
+ #### Code Style
99
+
100
+ Please follow the Chemotools code style and conventions when contributing code changes. Specifically:
101
+
102
+ - Use four spaces for indentation
103
+ - Use descriptive variable names
104
+ - Avoid using magic numbers or hard-coded strings
105
+ - Format your code using Black
106
+
107
+ #### Codecov
108
+
109
+ We use Codecov to track the test coverage of Chemotools. Please make sure that your changes do not reduce the test coverage of the package.
110
+
111
+
112
+ ## License
113
+
114
+ This package is distributed under the MIT license. See the [LICENSE](LICENSE) file for more information. When contributing code to Chemotools, you are agreeing to release your code under the MIT license.
115
+
116
+ ## Credits
117
+
118
+ AirPLS baseline correction is based on the implementation by [Zhang et al.](https://pubs.rsc.org/is/content/articlelanding/2010/an/b922045c). The current implementation is based on the Python implementation by [zmzhang](https://github.com/zmzhang/airPLS).
@@ -0,0 +1,103 @@
1
+ [![pypi](https://img.shields.io/pypi/v/chemotools)](https://pypi.org/project/chemotools)
2
+ [![pypi](https://img.shields.io/pypi/pyversions/chemotools)](https://pypi.org/project/chemotools)
3
+ [![pypi](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
4
+ [![codecov](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
5
+
6
+ # __chemotools__
7
+
8
+ Welcome to Chemotools, a Python package that integrates chemometrics with Scikit-learn.
9
+
10
+ 👉 Check the [documentation](https://paucablop.github.io/chemotools/) for a full description on how to use chemotools.
11
+
12
+ ## Description
13
+
14
+ Chemotools is a Python package that provides a collection of preprocessing tools and utilities for working with spectral data. It is built on top of popular scientific libraries and is designed to be highly modular, easy to use, and compatible with Scikit-learn transformers.
15
+
16
+ If you are interested in learning more about chemotools, please visit the [documentation](https://paucablop.github.io/chemotools/) page.
17
+
18
+ Benefits:
19
+ - Provides a collection of preprocessing tools and utilities for working with spectral data
20
+ - Highly modular and compatible with Scikit-learn transformers
21
+ - Can perform popular preprocessing tasks such as baseline correction, smoothing, scaling, derivatization, and scattering correction
22
+ - Open source and available on PyPI
23
+
24
+ Applications:
25
+ - Analyzing and processing spectral data in chemistry, biology, and other fields
26
+ - Developing machine learning models for predicting properties or classifying samples based on spectral data
27
+ - Teaching and learning about chemometrics and data preprocessing in Python
28
+
29
+ ## Installation
30
+
31
+ Chemotools is distributed via PyPI and can be easily installed using pip:
32
+
33
+ ```bash
34
+ pip install chemotools
35
+ ```
36
+
37
+ ## Usage
38
+
39
+ Chemotools is designed to be used in conjunction with Scikit-learn. It follows the same API as other Scikit-learn transformers, so you can easily integrate it into your existing workflow. For example, you can use chemotools to build pipelines that include transformers from chemotools and Scikit-learn:
40
+
41
+ ```python
42
+ from sklearn.preprocessing import StandardScaler
43
+ from sklearn.pipeline import make_pipeline
44
+
45
+ from chemotools.baseline import AirPls
46
+ from chemotools.scatter import MultiplicativeScatterCorrection
47
+
48
+ preprocessing = make_pipeline(AirPls(), MultiplicativeScatterCorrection(), StandardScaler(with_std=False))
49
+ spectra_transformed = preprocessing.fit_transform(spectra)
50
+ ```
51
+
52
+ Check the [documentation](https://paucablop.github.io/chemotools/) for more information on how to use chemotools.
53
+
54
+
55
+ ## Contributing
56
+
57
+ We welcome contributions to Chemotools from anyone interested in improving the package. Whether you have ideas for new features, bug reports, or just want to help improve the code, we appreciate your contributions! You are also welcome to see the [Project Board](https://github.com/users/paucablop/projects/4) to see what we are currently working on.
58
+
59
+ To contribute to Chemotools, please follow these guidelines:
60
+
61
+ #### Reporting Bugs
62
+
63
+ If you encounter a bug or unexpected behavior in Chemotools, please open an issue on the GitHub repository with a detailed description of the problem, including any error messages and steps to reproduce the issue. If possible, include sample code or data that demonstrates the problem.
64
+
65
+ #### Suggesting Enhancements
66
+
67
+ If you have an idea for a new feature or enhancement for Chemotools, please open an issue on the GitHub repository with a detailed description of the proposed feature and its benefits. If possible, include example code or use cases that illustrate how the feature would be used.
68
+
69
+ #### Submitting Changes
70
+
71
+ If you'd like to contribute code changes to Chemotools, please follow these steps:
72
+
73
+ - Create a new branch for your changes. We follow trunk-based development, so all changes should be made on a new branch and branches should be short-lived and merged into main.
74
+
75
+ - Write your code and tests, making sure to follow the Chemotools coding style and conventions. It is fundamental to include tests for both, the Scikit-learn API and the functionality of the transformers.
76
+
77
+ - Run the tests using the provided testing framework to ensure that your changes do not introduce any new errors or regressions.
78
+
79
+ - Submit a pull request to the main Chemotools repository with a detailed description of your changes and the problem they solve.
80
+
81
+ We will review your changes and provide feedback as soon as possible. If we request changes, please make them as quickly as possible to keep the review process moving.
82
+
83
+ #### Code Style
84
+
85
+ Please follow the Chemotools code style and conventions when contributing code changes. Specifically:
86
+
87
+ - Use four spaces for indentation
88
+ - Use descriptive variable names
89
+ - Avoid using magic numbers or hard-coded strings
90
+ - Format your code using Black
91
+
92
+ #### Codecov
93
+
94
+ We use Codecov to track the test coverage of Chemotools. Please make sure that your changes do not reduce the test coverage of the package.
95
+
96
+
97
+ ## License
98
+
99
+ This package is distributed under the MIT license. See the [LICENSE](LICENSE) file for more information. When contributing code to Chemotools, you are agreeing to release your code under the MIT license.
100
+
101
+ ## Credits
102
+
103
+ AirPLS baseline correction is based on the implementation by [Zhang et al.](https://pubs.rsc.org/is/content/articlelanding/2010/an/b922045c). The current implementation is based on the Python implementation by [zmzhang](https://github.com/zmzhang/airPLS).
@@ -2,4 +2,5 @@ from .air_pls import AirPls
2
2
  from .cubic_spline_correction import CubicSplineCorrection
3
3
  from .linear_correction import LinearCorrection
4
4
  from .non_negative import NonNegative
5
- from .polynomial_correction import PolynomialCorrection
5
+ from .polynomial_correction import PolynomialCorrection
6
+ from .subtract_reference import SubtractReference
@@ -6,10 +6,10 @@ from sklearn.utils.validation import check_is_fitted
6
6
  from chemotools.utils.check_inputs import check_input
7
7
 
8
8
  class CubicSplineCorrection(BaseEstimator, TransformerMixin):
9
- def __init__(self, indices: tuple = None) -> None:
9
+ def __init__(self, indices: np.ndarray = None) -> None:
10
10
  self.indices = indices
11
11
 
12
- def fit(self, X: np.ndarray, y=None) -> "CubicSplineCorrection":
12
+ def fit(self, X: list, y=None) -> "CubicSplineCorrection":
13
13
  # Check that X is a 2D array and has only finite values
14
14
  X = check_input(X)
15
15
 
@@ -19,6 +19,11 @@ class CubicSplineCorrection(BaseEstimator, TransformerMixin):
19
19
  # Set the fitted attribute to True
20
20
  self._is_fitted = True
21
21
 
22
+ if self.indices is None:
23
+ self.indices_ = [0, len(X[0]) - 1]
24
+ else:
25
+ self.indices_ = self.indices
26
+
22
27
  return self
23
28
 
24
29
  def transform(self, X: np.ndarray, y=None, copy=True):
@@ -39,11 +44,7 @@ class CubicSplineCorrection(BaseEstimator, TransformerMixin):
39
44
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
40
45
 
41
46
  def _spline_baseline_correct(self, x: np.ndarray) -> np.ndarray:
42
- if self.indices is None:
43
- indices = [0, len(x) - 1]
44
- else:
45
- indices = list(self.indices)
46
-
47
+ indices = self.indices_
47
48
  intensity = x[indices]
48
49
  spl = CubicSpline(indices, intensity)
49
50
  baseline = spl(range(len(x)))
@@ -5,7 +5,7 @@ from sklearn.utils.validation import check_is_fitted
5
5
  from chemotools.utils.check_inputs import check_input
6
6
 
7
7
  class PolynomialCorrection(BaseEstimator, TransformerMixin):
8
- def __init__(self, order: int = 1, indices: tuple = (0, -1)) -> None:
8
+ def __init__(self, order: int = 1, indices: list = None) -> None:
9
9
  self.order = order
10
10
  self.indices = indices
11
11
 
@@ -19,6 +19,11 @@ class PolynomialCorrection(BaseEstimator, TransformerMixin):
19
19
  # Set the fitted attribute to True
20
20
  self._is_fitted = True
21
21
 
22
+ if self.indices is None:
23
+ self.indices_ = range(0, len(X[0]))
24
+ else:
25
+ self.indices_ = self.indices
26
+
22
27
  return self
23
28
 
24
29
  def transform(self, X: np.ndarray, y=0, copy=True) -> np.ndarray:
@@ -39,7 +44,7 @@ class PolynomialCorrection(BaseEstimator, TransformerMixin):
39
44
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
40
45
 
41
46
  def _baseline_correct_spectrum(self, x: np.ndarray) -> np.ndarray:
42
- intensity = x[list(self.indices)]
43
- poly = np.polyfit(self.indices, intensity, self.order)
47
+ intensity = x[self.indices_]
48
+ poly = np.polyfit(self.indices_, intensity, self.order)
44
49
  baseline = [np.polyval(poly, i) for i in range(0, len(x))]
45
50
  return x - baseline
@@ -0,0 +1,54 @@
1
+ import numpy as np
2
+ from sklearn.base import BaseEstimator, TransformerMixin
3
+ from sklearn.utils.validation import check_is_fitted
4
+
5
+ from chemotools.utils.check_inputs import check_input
6
+
7
+
8
+ class SubtractReference(BaseEstimator, TransformerMixin):
9
+ def __init__(
10
+ self,
11
+ reference: np.ndarray = None,
12
+ ):
13
+ self.reference = reference
14
+
15
+ def fit(self, X: np.ndarray, y=None) -> "SubtractReference":
16
+ # Check that X is a 2D array and has only finite values
17
+ X = check_input(X)
18
+
19
+ # Set the number of features
20
+ self.n_features_in_ = X.shape[1]
21
+
22
+ # Set the fitted attribute to True
23
+ self._is_fitted = True
24
+
25
+ # Set the reference
26
+
27
+ if self.reference is not None:
28
+ self.reference_ = self.reference.copy()
29
+ return self
30
+
31
+ return self
32
+
33
+ def transform(self, X: np.ndarray, y=None) -> np.ndarray:
34
+ # Check that the estimator is fitted
35
+ check_is_fitted(self, "_is_fitted")
36
+
37
+ # Check that X is a 2D array and has only finite values
38
+ X = check_input(X)
39
+ X_ = X.copy()
40
+
41
+ # Check that the number of features is the same as the fitted data
42
+ if X_.shape[1] != self.n_features_in_:
43
+ raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
44
+
45
+ if self.reference is None:
46
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
47
+
48
+ # Subtract the reference
49
+ for i, x in enumerate(X_):
50
+ X_[i] = self._subtract_reference(x)
51
+ return X_.reshape(-1, 1) if X_.ndim == 1 else X_
52
+
53
+ def _subtract_reference(self, x) -> np.ndarray:
54
+ return x - self.reference_
@@ -50,7 +50,7 @@ class NorrisWilliams(BaseEstimator, TransformerMixin):
50
50
  X_[i] = derivative
51
51
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
52
52
 
53
- if self.derivative_order == 1:
53
+ if self.derivative_order == 2:
54
54
  for i, x in enumerate(X_):
55
55
  derivative = self._spectrum_second_derivative(x)
56
56
  X_[i] = derivative
@@ -0,0 +1,118 @@
1
+ Metadata-Version: 2.1
2
+ Name: chemotools
3
+ Version: 0.0.12
4
+ Summary: Package to integrate chemometrics in scikit-learn pipelines
5
+ Home-page: https://github.com/paucablop/chemotools
6
+ Author: Pau Cabaneros Lopez
7
+ Author-email: pau.cabaneros@gmail.com
8
+ Project-URL: Bug Tracker, https://github.com/paucablop/chemotools/issues/
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+
16
+ [![pypi](https://img.shields.io/pypi/v/chemotools)](https://pypi.org/project/chemotools)
17
+ [![pypi](https://img.shields.io/pypi/pyversions/chemotools)](https://pypi.org/project/chemotools)
18
+ [![pypi](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
19
+ [![codecov](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
20
+
21
+ # __chemotools__
22
+
23
+ Welcome to Chemotools, a Python package that integrates chemometrics with Scikit-learn.
24
+
25
+ 👉 Check the [documentation](https://paucablop.github.io/chemotools/) for a full description on how to use chemotools.
26
+
27
+ ## Description
28
+
29
+ Chemotools is a Python package that provides a collection of preprocessing tools and utilities for working with spectral data. It is built on top of popular scientific libraries and is designed to be highly modular, easy to use, and compatible with Scikit-learn transformers.
30
+
31
+ If you are interested in learning more about chemotools, please visit the [documentation](https://paucablop.github.io/chemotools/) page.
32
+
33
+ Benefits:
34
+ - Provides a collection of preprocessing tools and utilities for working with spectral data
35
+ - Highly modular and compatible with Scikit-learn transformers
36
+ - Can perform popular preprocessing tasks such as baseline correction, smoothing, scaling, derivatization, and scattering correction
37
+ - Open source and available on PyPI
38
+
39
+ Applications:
40
+ - Analyzing and processing spectral data in chemistry, biology, and other fields
41
+ - Developing machine learning models for predicting properties or classifying samples based on spectral data
42
+ - Teaching and learning about chemometrics and data preprocessing in Python
43
+
44
+ ## Installation
45
+
46
+ Chemotools is distributed via PyPI and can be easily installed using pip:
47
+
48
+ ```bash
49
+ pip install chemotools
50
+ ```
51
+
52
+ ## Usage
53
+
54
+ Chemotools is designed to be used in conjunction with Scikit-learn. It follows the same API as other Scikit-learn transformers, so you can easily integrate it into your existing workflow. For example, you can use chemotools to build pipelines that include transformers from chemotools and Scikit-learn:
55
+
56
+ ```python
57
+ from sklearn.preprocessing import StandardScaler
58
+ from sklearn.pipeline import make_pipeline
59
+
60
+ from chemotools.baseline import AirPls
61
+ from chemotools.scatter import MultiplicativeScatterCorrection
62
+
63
+ preprocessing = make_pipeline(AirPls(), MultiplicativeScatterCorrection(), StandardScaler(with_std=False))
64
+ spectra_transformed = preprocessing.fit_transform(spectra)
65
+ ```
66
+
67
+ Check the [documentation](https://paucablop.github.io/chemotools/) for more information on how to use chemotools.
68
+
69
+
70
+ ## Contributing
71
+
72
+ We welcome contributions to Chemotools from anyone interested in improving the package. Whether you have ideas for new features, bug reports, or just want to help improve the code, we appreciate your contributions! You are also welcome to see the [Project Board](https://github.com/users/paucablop/projects/4) to see what we are currently working on.
73
+
74
+ To contribute to Chemotools, please follow these guidelines:
75
+
76
+ #### Reporting Bugs
77
+
78
+ If you encounter a bug or unexpected behavior in Chemotools, please open an issue on the GitHub repository with a detailed description of the problem, including any error messages and steps to reproduce the issue. If possible, include sample code or data that demonstrates the problem.
79
+
80
+ #### Suggesting Enhancements
81
+
82
+ If you have an idea for a new feature or enhancement for Chemotools, please open an issue on the GitHub repository with a detailed description of the proposed feature and its benefits. If possible, include example code or use cases that illustrate how the feature would be used.
83
+
84
+ #### Submitting Changes
85
+
86
+ If you'd like to contribute code changes to Chemotools, please follow these steps:
87
+
88
+ - Create a new branch for your changes. We follow trunk-based development, so all changes should be made on a new branch and branches should be short-lived and merged into main.
89
+
90
+ - Write your code and tests, making sure to follow the Chemotools coding style and conventions. It is fundamental to include tests for both, the Scikit-learn API and the functionality of the transformers.
91
+
92
+ - Run the tests using the provided testing framework to ensure that your changes do not introduce any new errors or regressions.
93
+
94
+ - Submit a pull request to the main Chemotools repository with a detailed description of your changes and the problem they solve.
95
+
96
+ We will review your changes and provide feedback as soon as possible. If we request changes, please make them as quickly as possible to keep the review process moving.
97
+
98
+ #### Code Style
99
+
100
+ Please follow the Chemotools code style and conventions when contributing code changes. Specifically:
101
+
102
+ - Use four spaces for indentation
103
+ - Use descriptive variable names
104
+ - Avoid using magic numbers or hard-coded strings
105
+ - Format your code using Black
106
+
107
+ #### Codecov
108
+
109
+ We use Codecov to track the test coverage of Chemotools. Please make sure that your changes do not reduce the test coverage of the package.
110
+
111
+
112
+ ## License
113
+
114
+ This package is distributed under the MIT license. See the [LICENSE](LICENSE) file for more information. When contributing code to Chemotools, you are agreeing to release your code under the MIT license.
115
+
116
+ ## Credits
117
+
118
+ AirPLS baseline correction is based on the implementation by [Zhang et al.](https://pubs.rsc.org/is/content/articlelanding/2010/an/b922045c). The current implementation is based on the Python implementation by [zmzhang](https://github.com/zmzhang/airPLS).
@@ -9,6 +9,7 @@ setup.py
9
9
  ./chemotools/baseline/linear_correction.py
10
10
  ./chemotools/baseline/non_negative.py
11
11
  ./chemotools/baseline/polynomial_correction.py
12
+ ./chemotools/baseline/subtract_reference.py
12
13
  ./chemotools/derivative/__init__.py
13
14
  ./chemotools/derivative/norris_william.py
14
15
  ./chemotools/derivative/savitzky_golay.py
@@ -1,6 +1,6 @@
1
1
  import numpy as np
2
2
 
3
- from chemotools.baseline import AirPls, LinearCorrection, NonNegative
3
+ from chemotools.baseline import AirPls, LinearCorrection, NonNegative, SubtractReference
4
4
  from chemotools.derivative import NorrisWilliams, SavitzkyGolay
5
5
  from chemotools.scale import LNormalize, MinMaxScaler
6
6
  from chemotools.scatter import MultiplicativeScatterCorrection, StandardNormalVariate
@@ -238,6 +238,7 @@ def test_saviszky_golay_filter_3():
238
238
  # Assert
239
239
  assert np.allclose(spectrum_corrected[0], np.ones((1, 10)), atol=1e-2)
240
240
 
241
+
241
242
  def test_standard_normal_variate(spectrum, reference_snv):
242
243
  # Arrange
243
244
  snv = StandardNormalVariate()
@@ -248,6 +249,25 @@ def test_standard_normal_variate(spectrum, reference_snv):
248
249
  # Assert
249
250
  assert np.allclose(spectrum_corrected[0], reference_snv[0], atol=1e-2)
250
251
 
252
+ def test_subtract_reference(spectrum):
253
+ # Arrange
254
+ baseline = SubtractReference(reference=spectrum)
255
+
256
+ # Act
257
+ spectrum_corrected = baseline.fit_transform(spectrum)
258
+
259
+ # Assert
260
+ assert np.allclose(spectrum_corrected[0], np.zeros(len(spectrum)), atol=1e-8)
261
+
262
+ def test_subtract_reference_without_reference(spectrum):
263
+ # Arrange
264
+ baseline = SubtractReference()
265
+
266
+ # Act
267
+ spectrum_corrected = baseline.fit_transform(spectrum)
268
+
269
+ # Assert
270
+ assert np.allclose(spectrum_corrected[0], spectrum, atol=1e-8)
251
271
 
252
272
  def test_whitakker_smooth(spectrum, reference_whitakker):
253
273
  # Arrange
@@ -1,11 +1,13 @@
1
1
  from sklearn.utils.estimator_checks import check_estimator
2
2
 
3
- from chemotools.baseline import AirPls, CubicSplineCorrection, LinearCorrection, NonNegative, PolynomialCorrection
3
+ from chemotools.baseline import AirPls, CubicSplineCorrection, LinearCorrection, NonNegative, PolynomialCorrection, SubtractReference
4
4
  from chemotools.derivative import NorrisWilliams, SavitzkyGolay
5
5
  from chemotools.scale import MinMaxScaler, LNormalize
6
6
  from chemotools.scatter import MultiplicativeScatterCorrection, StandardNormalVariate
7
7
  from chemotools.smooth import MeanFilter, MedianFilter, SavitzkyGolayFilter, WhittakerSmooth
8
8
 
9
+ from tests.fixtures import spectrum
10
+
9
11
 
10
12
  # AirPls
11
13
  def test_compliance_air_pls():
@@ -77,6 +79,14 @@ def test_compliance_norris_williams():
77
79
  # Act & Assert
78
80
  check_estimator(transformer)
79
81
 
82
+ # NorrisWilliams
83
+ def test_compliance_norris_williams_2():
84
+ # Arrange
85
+ transformer = NorrisWilliams(derivative_order=2)
86
+ # Act & Assert
87
+ check_estimator(transformer)
88
+
89
+
80
90
  # PolynomialCorrection
81
91
  def test_compliance_polynomial_correction():
82
92
  # Arrange
@@ -105,6 +115,13 @@ def test_compliance_standard_normal_variate():
105
115
  # Act & Assert
106
116
  check_estimator(transformer)
107
117
 
118
+ # SubtractReference
119
+ def test_compliance_subtract_reference():
120
+ # Arrange
121
+ transformer = SubtractReference()
122
+ # Act & Assert
123
+ check_estimator(transformer)
124
+
108
125
  # WhittakerSmooth
109
126
  def test_compliance_whittaker_smooth():
110
127
  # Arrange