chemotools 0.1.11rc0__py3-none-any.whl → 0.1.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -193,7 +193,7 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
193
193
  if pad_left:
194
194
  points = x[: pad_length + 1] # Take first pad_length+1 points
195
195
  x_coords = np.arange(len(points))
196
- slope, intercept, _, _, _ = stats.linregress(x_coords, points)
196
+ slope, intercept, *_ = stats.linregress(x_coords, points)
197
197
 
198
198
  # Generate new points using linear regression
199
199
  new_x = np.arange(-pad_length, 0)
@@ -202,7 +202,7 @@ class IndexShift(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
202
202
  else:
203
203
  points = x[-pad_length - 1 :] # Take last pad_length+1 points
204
204
  x_coords = np.arange(len(points))
205
- slope, intercept, _, _, _ = stats.linregress(x_coords, points)
205
+ slope, intercept, *_ = stats.linregress(x_coords, points)
206
206
 
207
207
  # Generate new points using linear regression
208
208
  new_x = np.arange(len(points), len(points) + pad_length)
@@ -1,4 +1,6 @@
1
1
  from ._index_selector import IndexSelector
2
2
  from ._range_cut import RangeCut
3
+ from ._sr_selector import SRSelector
4
+ from ._vip_selector import VIPSelector
3
5
 
4
- __all__ = ["IndexSelector", "RangeCut"]
6
+ __all__ = ["IndexSelector", "RangeCut", "SRSelector", "VIPSelector"]
@@ -0,0 +1,88 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Union
3
+
4
+ import numpy as np
5
+
6
+ from sklearn.base import BaseEstimator
7
+ from sklearn.cross_decomposition._pls import _PLS
8
+ from sklearn.feature_selection._base import SelectorMixin
9
+ from sklearn.pipeline import Pipeline
10
+ from sklearn.utils.validation import check_is_fitted
11
+
12
+ ModelTypes = Union[_PLS, Pipeline]
13
+
14
+
15
+ class _PLSFeatureSelectorBase(ABC, BaseEstimator, SelectorMixin):
16
+ """Feature selection base class for _PLS-like models.
17
+
18
+
19
+ Parameters
20
+ ----------
21
+ model : Union[_PLS, Pipeline]
22
+ A fitted _PLS models or Pipeline ending with such a model
23
+
24
+ threshold : float
25
+ The threshold for feature selection. Features with importance
26
+ above this threshold will be selected.
27
+
28
+ Attributes
29
+ ----------
30
+ estimator_ : ModelTypes
31
+ The fitted model of type _BasePCA or _PLS
32
+
33
+ feature_scores_ : np.ndarray
34
+ The calculated feature scores based on the selected method.
35
+
36
+ support_mask : np.ndarray
37
+ The boolean mask indicating which features are selected.
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ model: Union[_PLS, Pipeline],
43
+ ) -> None:
44
+ self.estimator_ = _validate_and_extract_model(model)
45
+
46
+ @abstractmethod
47
+ def _calculate_features(self, X: np.ndarray) -> np.ndarray:
48
+ """Calculate the residuals of the model.
49
+
50
+ Returns
51
+ -------
52
+ ndarray of shape (n_samples,)
53
+ The residuals of the model
54
+ """
55
+
56
+
57
+ def _validate_and_extract_model(
58
+ model: Union[_PLS, Pipeline],
59
+ ) -> _PLS:
60
+ """Validate and extract the model.
61
+
62
+ Parameters
63
+ ----------
64
+ model : Union[_PLS, Pipeline]
65
+ A fitted _PLS model or Pipeline ending with such a model
66
+
67
+ Returns
68
+ -------
69
+ _PLS
70
+ The extracted estimator
71
+
72
+ Raises
73
+ ------
74
+ TypeError
75
+ If the model is not of type _BasePCA or _PLS or a Pipeline ending with one of these types or if the model is not fitted
76
+ """
77
+ if isinstance(model, Pipeline):
78
+ estimator = model[-1]
79
+ else:
80
+ estimator = model
81
+
82
+ if not isinstance(estimator, _PLS):
83
+ raise TypeError(
84
+ "Model not a valid model. Must be of base type _BasePCA or _PLS or a Pipeline ending with one of these types."
85
+ )
86
+
87
+ check_is_fitted(model)
88
+ return estimator
@@ -0,0 +1,137 @@
1
+ import numpy as np
2
+ from sklearn.utils.validation import validate_data
3
+
4
+ from ._base import _PLSFeatureSelectorBase
5
+
6
+
7
+ class SRSelector(_PLSFeatureSelectorBase):
8
+ """
9
+ This selector is used to select features that contribute significantly
10
+ to the latent variables in a PLS regression model using the Selectivity
11
+ Ratio (SR) method.
12
+
13
+ Parameters
14
+ ----------
15
+ - model: Union[_PLS, Pipeline]
16
+ The PLS regression model or a pipeline with a PLS regression model as last step.
17
+
18
+ - threshold: float, default=1.0
19
+ The threshold for feature selection. Features with importance
20
+ above this threshold will be selected.
21
+
22
+ Attributes
23
+ ----------
24
+ estimator_ : ModelTypes
25
+ The fitted model of type _BasePCA or _PLS
26
+
27
+ feature_scores_ : np.ndarray
28
+ The calculated feature scores based on the selected method.
29
+
30
+ support_mask_ : np.ndarray
31
+ The boolean mask indicating which features are selected.
32
+
33
+ Methods
34
+ -------
35
+ fit(X, y=None)
36
+ Fit the transformer to the input data. It calculates the feature scores and the feature_mask.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ model,
42
+ threshold: float = 1.0,
43
+ ):
44
+ self.model = model
45
+ self.threshold = threshold
46
+ super().__init__(self.model)
47
+
48
+ def fit(self, X: np.ndarray, y=None) -> "SRSelector":
49
+ """
50
+ Fit the transformer to calculate the feature scores and the support mask.
51
+
52
+ Parameters
53
+ ----------
54
+ X : array-like of shape (n_samples, n_features)
55
+ The input data to fit the transformer to.
56
+
57
+ y : None
58
+ Ignored.
59
+
60
+ Returns
61
+ -------
62
+ self : SRSelector
63
+ The fitted transformer.
64
+ """
65
+ # Check that X is a 2D array and has only finite values
66
+ X = validate_data(
67
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
68
+ )
69
+
70
+ # Calculate the SR scores
71
+ self.feature_scores_ = self._calculate_features(X)
72
+
73
+ # Calculate the support mask
74
+ self.support_mask_ = self._get_support_mask()
75
+
76
+ return self
77
+
78
+ def _get_support_mask(self) -> np.ndarray:
79
+ """
80
+ Get the support mask based on the feature scores and threshold.
81
+ Features with scores above the threshold are selected.
82
+ Parameters
83
+ ----------
84
+ self : SRSelector
85
+ The fitted transformer.
86
+
87
+ Returns
88
+ -------
89
+ support_mask_ : np.ndarray
90
+ The boolean mask indicating which features are selected.
91
+ """
92
+ return self.feature_scores_ > self.threshold
93
+
94
+ def _calculate_features(self, X: np.ndarray) -> np.ndarray:
95
+ """
96
+ Vectorized Selectivity Ratio calculation from a fitted _PLS
97
+ like model.
98
+
99
+ Parameters:
100
+ ----------
101
+ - self: SRSelector
102
+ The fitted transformer.
103
+
104
+ - X: array-like of shape (n_samples, n_features)
105
+ The input training data to calculate the feature scores from.
106
+
107
+ Returns
108
+ -------
109
+ feature_scores_ : np.ndarray
110
+ The calculated feature scores based on the selected method.
111
+ """
112
+ bpls = self.estimator_.coef_
113
+ bpls_norm = bpls.T / np.linalg.norm(bpls)
114
+
115
+ # Handle 1D case correctly
116
+ if bpls.ndim == 1:
117
+ bpls_norm = bpls_norm.reshape(-1, 1)
118
+
119
+ # Project X onto the regression vector
120
+ ttp = X @ bpls_norm
121
+ ptp = X.T @ np.linalg.pinv(ttp).T
122
+
123
+ # Predicted part of X
124
+ X_hat = ttp @ ptp.T
125
+
126
+ # Compute squared norms directly
127
+ total_ss = np.linalg.norm(X, axis=0) ** 2
128
+ explained_ss = np.linalg.norm(X_hat, axis=0) ** 2
129
+
130
+ # Calculate residual sum of squares
131
+ residual_ss = total_ss - explained_ss
132
+
133
+ # Stability: avoid division by zero
134
+ epsilon = 1e-12
135
+
136
+ # Calculate Selectivity Ratio
137
+ return explained_ss / (residual_ss + epsilon)
@@ -0,0 +1,129 @@
1
+ import numpy as np
2
+ from sklearn.utils.validation import validate_data
3
+
4
+ from ._base import _PLSFeatureSelectorBase
5
+
6
+
7
+ class VIPSelector(_PLSFeatureSelectorBase):
8
+ """
9
+ This selector is used to select features that contribute significantly
10
+ to the latent variables in a PLS regression model using the Variables
11
+ Importance in Projection (VIP) method.
12
+
13
+ Parameters
14
+ ----------
15
+ - model: Union[_PLS, Pipeline]
16
+ The PLS regression model or a pipeline with a PLS regression model as last step.
17
+
18
+ - threshold: float, default=1.0
19
+ The threshold for feature selection. Features with importance
20
+ above this threshold will be selected.
21
+
22
+ Attributes
23
+ ----------
24
+ estimator_ : ModelTypes
25
+ The fitted model of type _BasePCA or _PLS
26
+
27
+ feature_scores_ : np.ndarray
28
+ The calculated feature scores based on the selected method.
29
+
30
+ support_mask_ : np.ndarray
31
+ The boolean mask indicating which features are selected.
32
+
33
+ Methods
34
+ -------
35
+ fit(X, y=None)
36
+ Fit the transformer to the input data. It calculates the feature scores and the feature_mask.
37
+ """
38
+
39
+ def __init__(
40
+ self,
41
+ model,
42
+ threshold: float = 1.0,
43
+ ):
44
+ self.model = model
45
+ self.threshold = threshold
46
+ super().__init__(self.model)
47
+
48
+ def fit(self, X: np.ndarray, y=None) -> "VIPSelector":
49
+ """
50
+ Fit the transformer to calculate the feature scores and the support mask.
51
+
52
+ Parameters
53
+ ----------
54
+ X : array-like of shape (n_samples, n_features)
55
+ The input data to fit the transformer to.
56
+
57
+ y : None
58
+ Ignored.
59
+
60
+ Returns
61
+ -------
62
+ self : VIPSelector
63
+ The fitted transformer.
64
+ """
65
+ # Check that X is a 2D array and has only finite values
66
+ X = validate_data(
67
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
68
+ )
69
+
70
+ # Calculate the VIP scores
71
+ self.feature_scores_ = self._calculate_features(X)
72
+
73
+ # Calculate the support mask
74
+ self.support_mask_ = self._get_support_mask()
75
+
76
+ return self
77
+
78
+ def _get_support_mask(self) -> np.ndarray:
79
+ """
80
+ Get the support mask based on the feature scores and threshold.
81
+ Features with scores above the threshold are selected.
82
+ Parameters
83
+ ----------
84
+ self : VIPSelector
85
+ The fitted transformer.
86
+
87
+ Returns
88
+ -------
89
+ support_mask_ : np.ndarray
90
+ The boolean mask indicating which features are selected.
91
+ """
92
+ return self.feature_scores_ > self.threshold
93
+
94
+ def _calculate_features(self, X: np.ndarray) -> np.ndarray:
95
+ """
96
+ Calculate the VIP scores based on the fitted model.
97
+
98
+ Parameters
99
+ ----------
100
+ self : VIPSelector
101
+ The fitted transformer.
102
+
103
+ Returns
104
+ -------
105
+ feature_scores_ : np.ndarray
106
+ The calculated feature scores based on the selected method.
107
+ """
108
+ # Calculate sum of squares of y_loadings and x_scores
109
+ sum_of_squares_y_loadings = (
110
+ np.linalg.norm(self.estimator_.y_loadings_, ord=2, axis=0) ** 2
111
+ )
112
+ sum_of_squares_x_scores = (
113
+ np.linalg.norm(self.estimator_.x_scores_, ord=2, axis=0) ** 2
114
+ )
115
+
116
+ # Calculate the sum of squares
117
+ sum_of_squares = sum_of_squares_y_loadings * sum_of_squares_x_scores
118
+
119
+ # Calculate the numerator
120
+ numerator = self.estimator_.n_features_in_ * np.sum(
121
+ sum_of_squares * self.estimator_.x_weights_**2,
122
+ axis=1,
123
+ )
124
+
125
+ # Calculate the denominator
126
+ denominator = np.sum(sum_of_squares, axis=0)
127
+
128
+ # Calculate the VIP scores
129
+ return np.sqrt(numerator / denominator)
@@ -0,0 +1,105 @@
1
+ Metadata-Version: 2.4
2
+ Name: chemotools
3
+ Version: 0.1.17
4
+ Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
+ Author: Pau Cabaneros
6
+ License-Expression: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: >=3.10
9
+ Requires-Dist: numpy<3,>=2.0.0
10
+ Requires-Dist: pandas<3,>=2.0.0
11
+ Requires-Dist: polars<2,>=1.17.0
12
+ Requires-Dist: pyarrow<21,>=18
13
+ Requires-Dist: scikit-learn<2,>=1.4.0
14
+ Provides-Extra: dev
15
+ Requires-Dist: cyclonedx-bom>=7.1.0; extra == 'dev'
16
+ Requires-Dist: mypy<2,>=1.13.0; extra == 'dev'
17
+ Requires-Dist: pandas-stubs<3,>=2.2.3.241126; extra == 'dev'
18
+ Requires-Dist: pytest-cov>=6.3.0; extra == 'dev'
19
+ Requires-Dist: pytest<9,>=8.3.0; extra == 'dev'
20
+ Requires-Dist: ruff<0.9,>=0.8.0; extra == 'dev'
21
+ Requires-Dist: scipy-stubs<2,>=1.15.1.0; extra == 'dev'
22
+ Description-Content-Type: text/markdown
23
+
24
+ ![chemotools](assets/images/banner_dark.png)
25
+
26
+ # chemotools
27
+
28
+
29
+ [![PyPI](https://img.shields.io/pypi/v/chemotools)](https://pypi.org/project/chemotools)
30
+ [![Python Versions](https://img.shields.io/pypi/pyversions/chemotools)](https://pypi.org/project/chemotools)
31
+ [![License](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
32
+ [![Coverage](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
33
+ [![Downloads](https://static.pepy.tech/badge/chemotools)](https://pepy.tech/project/chemotools)
34
+ [![DOI](https://joss.theoj.org/papers/10.21105/joss.06802/status.svg)](https://doi.org/10.21105/joss.06802)
35
+
36
+ ---
37
+
38
+ `chemotools` is a Python library that brings **chemometric preprocessing tools** into the [`scikit-learn`](https://scikit-learn.org/) ecosystem.
39
+
40
+ It provides modular transformers for spectral data, designed to plug seamlessly into your ML workflows.
41
+
42
+ ## Features
43
+
44
+ - Preprocessing for spectral data (baseline correction, smoothing, scaling, derivatization, scatter correction).
45
+ - Fully compatible with `scikit-learn` pipelines and transformers.
46
+ - Simple, modular API for flexible workflows.
47
+ - Open-source, actively maintained, and published on [PyPI](https://pypi.org/project/chemotools/) and [Conda](https://anaconda.org/conda-forge/chemotools).
48
+
49
+ ## Installation
50
+
51
+ Install from PyPI:
52
+
53
+ ```bash
54
+ pip install chemotools
55
+ ````
56
+
57
+ Install from Conda:
58
+
59
+ ```bash
60
+ conda install -c conda-forge chemotools
61
+ ```
62
+
63
+ ## Usage
64
+
65
+ Example: preprocessing pipeline with scikit-learn:
66
+
67
+ ```python
68
+ from sklearn.preprocessing import StandardScaler
69
+ from sklearn.pipeline import make_pipeline
70
+
71
+ from chemotools.baseline import AirPls
72
+ from chemotools.scatter import MultiplicativeScatterCorrection
73
+
74
+ preprocessing = make_pipeline(
75
+ AirPls(),
76
+ MultiplicativeScatterCorrection(),
77
+ StandardScaler(with_std=False),
78
+ )
79
+
80
+ spectra_transformed = preprocessing.fit_transform(spectra)
81
+ ```
82
+
83
+ ➡️ See the [documentation](https://paucablop.github.io/chemotools/) for full details.
84
+
85
+ ## Development
86
+
87
+ This project uses [uv](https://github.com/astral-sh/uv) for dependency management and [Task](https://taskfile.dev) to simplify common development workflows.
88
+ You can get started quickly by using the predefined [Taskfile](./Taskfile.yml), which provides handy shortcuts such as:
89
+
90
+ ```bash
91
+ task install # install all dependencies
92
+ task check # run formatting, linting, typing, and tests
93
+ task coverage # run tests with coverage reporting
94
+ task build # build the package for distribution
95
+ ```
96
+
97
+ ## Contributing
98
+
99
+ Contributions are welcome!
100
+ Check out the [contributing guide](CONTRIBUTING.md) and the [project board](https://github.com/users/paucablop/projects/4).
101
+
102
+ ## License
103
+
104
+ Released under the [MIT License](LICENSE).
105
+
@@ -4,7 +4,7 @@ chemotools/augmentation/_add_noise.py,sha256=fkTJfIYtZXezcjy6Vz8asIhpBoVp4oaIifp
4
4
  chemotools/augmentation/_baseline_shift.py,sha256=kIlYvmKS9pu9vh_-eZ7PSHPuH_58V9mgYbSJt6Gq3BA,3476
5
5
  chemotools/augmentation/_fractional_shift.py,sha256=dJ0Vuc-U02HhjKkOwc48qnOksZYgbHwL2ko7tWCZTQU,6916
6
6
  chemotools/augmentation/_gaussian_broadening.py,sha256=dJsPlTKqpecKaCDU3vOvedIb-t_HyCkQprxNv0DmYZQ,4236
7
- chemotools/augmentation/_index_shift.py,sha256=BTtadweDvvMtiF8t7ldwsE6Kl6FmKLCkVJjSzSWyIDs,6904
7
+ chemotools/augmentation/_index_shift.py,sha256=NeN9Nc212wIF4R1dSoWFIrSuP3OWO1GPTPJ_Ql_SKzw,6894
8
8
  chemotools/augmentation/_spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
9
9
  chemotools/baseline/__init__.py,sha256=VzoblGg8Hx_FkTc_n7a-ZjGvtKP8JE_NwJKWenGFQkM,584
10
10
  chemotools/baseline/_air_pls.py,sha256=eotXuIEsus7Z-c17oLx8UbiwOHM7DzQJ6rruHnwCGPQ,5067
@@ -27,9 +27,12 @@ chemotools/datasets/data/train_spectra.csv,sha256=iVF19W52NHlbqq8BbLomn8n47kSPT0
27
27
  chemotools/derivative/__init__.py,sha256=FkckdzO30jrRWPGpIU3cfnaTtxPtNT5Tb2G9F9PmVTw,134
28
28
  chemotools/derivative/_norris_william.py,sha256=rMY_yntpiB5fbSM1tPph4AaGmF1k-HqJp7o48ijePBs,4958
29
29
  chemotools/derivative/_savitzky_golay.py,sha256=CuCrKoLmrB1YmJ4ihIykgkL3tO3frqkStMogtsVhO3A,3632
30
- chemotools/feature_selection/__init__.py,sha256=1_i28hIxijjwhMypTy1w2fLbzXXVkKD5IYzzY8ZSuHw,117
30
+ chemotools/feature_selection/__init__.py,sha256=e_GFVawlDNEQv3EqrGSXUr5cvDN1jckoxe2C2jRwVl8,222
31
+ chemotools/feature_selection/_base.py,sha256=SIH6kl9AePVWTByL0OvJFfc2j3idqs7lm_7Zi1YMp4Y,2311
31
32
  chemotools/feature_selection/_index_selector.py,sha256=lNTP2b7P3doWl30KiAr3Xd2HOMxeUmj24MuqoXl4Voc,3556
32
33
  chemotools/feature_selection/_range_cut.py,sha256=lVVVC30ZsK2z9jsDGb_z6l8Ty2I89yM05_dIDbMP73Q,3564
34
+ chemotools/feature_selection/_sr_selector.py,sha256=OaXkt3t_NvymgDy6R15ig87jhcb-vM7i63LgtsNdfZo,3969
35
+ chemotools/feature_selection/_vip_selector.py,sha256=ZK3bhdpl3nBYt6xmuHq2IvWtpgJ8ZdElH06xnCFA-Xs,3835
33
36
  chemotools/outliers/__init__.py,sha256=wpdlyqU34n1Pb9kGCM4idhcok35WAakxEhzP0xeKaZw,272
34
37
  chemotools/outliers/_base.py,sha256=zl0LhRKjpvj5IbYc3su6zEZ7YZ0pDSR3yqNWt2qBjNA,5374
35
38
  chemotools/outliers/dmodx.py,sha256=sgizal_BDlqWTZNT8y2D_ImcKAJejXt6vqvFYk4Vqi0,5152
@@ -53,7 +56,7 @@ chemotools/smooth/_median_filter.py,sha256=9ndTJCwrZirWlvDNldiigMddy79KIGq9OwwYN
53
56
  chemotools/smooth/_savitzky_golay_filter.py,sha256=27iFUWxdL9_7oZabR0R5L0ZTpBmYfVUjx2XCTukihBE,3509
54
57
  chemotools/smooth/_whittaker_smooth.py,sha256=lpLAyf4GdyDW4ulT1nyEoK6xQEl2cVUKquawQdGWbHU,3571
55
58
  chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- chemotools-0.1.11rc0.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
57
- chemotools-0.1.11rc0.dist-info/METADATA,sha256=lhDYugMDLS5dHu86xGNho2mAj25vfFucPMEQAHmhfpA,5243
58
- chemotools-0.1.11rc0.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
59
- chemotools-0.1.11rc0.dist-info/RECORD,,
59
+ chemotools-0.1.17.dist-info/METADATA,sha256=hxxeamc9RNFVB7ukM7Srm8PGZzwJi1gI3jMghUD00hc,3652
60
+ chemotools-0.1.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
61
+ chemotools-0.1.17.dist-info/licenses/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
62
+ chemotools-0.1.17.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.2
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,105 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: chemotools
3
- Version: 0.1.11rc0
4
- Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
- License: MIT
6
- Author: Pau Cabaneros
7
- Requires-Python: >=3.10,<4.0
8
- Classifier: License :: OSI Approved :: MIT License
9
- Classifier: Programming Language :: Python :: 3
10
- Classifier: Programming Language :: Python :: 3.10
11
- Classifier: Programming Language :: Python :: 3.11
12
- Classifier: Programming Language :: Python :: 3.12
13
- Classifier: Programming Language :: Python :: 3.13
14
- Requires-Dist: numpy (>=2.0.0,<3.0.0)
15
- Requires-Dist: pandas (>=2.0.0,<3.0.0)
16
- Requires-Dist: polars (>=1.17.0,<2.0.0)
17
- Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
18
- Requires-Dist: scikit-learn (>=1.4.0,<2.0.0)
19
- Description-Content-Type: text/markdown
20
-
21
- ![chemotools](assets/images/logo_pixel.png)
22
-
23
-
24
- [![pypi](https://img.shields.io/pypi/v/chemotools)](https://pypi.org/project/chemotools)
25
- [![pypi](https://img.shields.io/pypi/pyversions/chemotools)](https://pypi.org/project/chemotools)
26
- [![pypi](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
27
- [![codecov](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
28
- [![Downloads](https://static.pepy.tech/badge/chemotools)](https://pepy.tech/project/chemotools)
29
- [![DOI](https://joss.theoj.org/papers/10.21105/joss.06802/status.svg)](https://doi.org/10.21105/joss.06802)
30
-
31
-
32
- # __chemotools__
33
-
34
- Welcome to Chemotools, a Python package that integrates chemometrics with Scikit-learn.
35
-
36
- ## Note
37
-
38
- Since I released Chemotools, I have received a fantastic response from the community. I am really happy for the interest in the project 🤗. This also means that I have received a lot of good feedback and suggestions for improvements. I have been intensively working on releasing new versions of Chemotools to address the feedback and suggestions. If you use Chemotools, __make sure you are using the latest version__ (see installation), which will be aligned with the documentation.
39
-
40
- 👉👉 Check the [latest version](https://pypi.org/project/chemotools/) and make sure you don't miss out on cool new features.
41
-
42
- 👉👉 Check the [documentation](https://paucablop.github.io/chemotools/) for a full description on how to use chemotools.
43
-
44
- ## Description
45
-
46
- Chemotools is a Python package that provides a collection of preprocessing tools and utilities for working with spectral data. It is built on top of popular scientific libraries and is designed to be highly modular, easy to use, and compatible with Scikit-learn transformers.
47
-
48
- If you are interested in learning more about chemotools, please visit the [documentation](https://paucablop.github.io/chemotools/) page.
49
-
50
- Benefits:
51
- - Provides a collection of preprocessing tools and utilities for working with spectral data
52
- - Highly modular and compatible with Scikit-learn transformers
53
- - Can perform popular preprocessing tasks such as baseline correction, smoothing, scaling, derivatization, and scattering correction
54
- - Open source and available on PyPI
55
-
56
- Applications:
57
- - Analyzing and processing spectral data in chemistry, biology, and other fields
58
- - Developing machine learning models for predicting properties or classifying samples based on spectral data
59
- - Teaching and learning about chemometrics and data preprocessing in Python
60
-
61
- ## Installation
62
-
63
- Chemotools is distributed via PyPI and can be easily installed using pip:
64
-
65
- ```bash
66
- pip install chemotools
67
- ```
68
-
69
- Upgrading to the latest version is as simple as:
70
-
71
- ```bash
72
- pip install chemotools --upgrade
73
- ```
74
-
75
- ## Usage
76
-
77
- Chemotools is designed to be used in conjunction with Scikit-learn. It follows the same API as other Scikit-learn transformers, so you can easily integrate it into your existing workflow. For example, you can use chemotools to build pipelines that include transformers from chemotools and Scikit-learn:
78
-
79
- ```python
80
- from sklearn.preprocessing import StandardScaler
81
- from sklearn.pipeline import make_pipeline
82
-
83
- from chemotools.baseline import AirPls
84
- from chemotools.scatter import MultiplicativeScatterCorrection
85
-
86
- preprocessing = make_pipeline(AirPls(), MultiplicativeScatterCorrection(), StandardScaler(with_std=False))
87
- spectra_transformed = preprocessing.fit_transform(spectra)
88
- ```
89
-
90
- Check the [documentation](https://paucablop.github.io/chemotools/) for more information on how to use chemotools.
91
-
92
-
93
- ## Contributing
94
-
95
- We welcome contributions to Chemotools from anyone interested in improving the package. Whether you have ideas for new features, bug reports, or just want to help improve the code, we appreciate your contributions! You are also welcome to see the [Project Board](https://github.com/users/paucablop/projects/4) to see what we are currently working on.
96
-
97
- To contribute to Chemotools, please follow the [contributing guidelines](CONTRIBUTING.md).
98
-
99
- ## License
100
-
101
- This package is distributed under the MIT license. See the [LICENSE](LICENSE) file for more information.
102
-
103
- ## Credits
104
-
105
- AirPLS baseline correction is based on the implementation by [Zhang et al.](https://pubs.rsc.org/is/content/articlelanding/2010/an/b922045c). The current implementation is based on the Python implementation by [zmzhang](https://github.com/zmzhang/airPLS).