chemotools 0.0.22__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. chemotools/augmentation/__init__.py +16 -0
  2. chemotools/augmentation/baseline_shift.py +119 -0
  3. chemotools/augmentation/exponential_noise.py +117 -0
  4. chemotools/augmentation/index_shift.py +120 -0
  5. chemotools/augmentation/normal_noise.py +118 -0
  6. chemotools/augmentation/spectrum_scale.py +120 -0
  7. chemotools/augmentation/uniform_noise.py +124 -0
  8. chemotools/baseline/__init__.py +20 -8
  9. chemotools/baseline/{air_pls.py → _air_pls.py} +20 -32
  10. chemotools/baseline/{ar_pls.py → _ar_pls.py} +18 -31
  11. chemotools/baseline/{constant_baseline_correction.py → _constant_baseline_correction.py} +37 -31
  12. chemotools/baseline/{cubic_spline_correction.py → _cubic_spline_correction.py} +26 -19
  13. chemotools/baseline/{linear_correction.py → _linear_correction.py} +19 -28
  14. chemotools/baseline/{non_negative.py → _non_negative.py} +15 -23
  15. chemotools/baseline/{polynomial_correction.py → _polynomial_correction.py} +29 -31
  16. chemotools/baseline/{subtract_reference.py → _subtract_reference.py} +23 -27
  17. chemotools/datasets/__init__.py +5 -0
  18. chemotools/datasets/_base.py +122 -0
  19. chemotools/datasets/data/coffee_labels.csv +61 -0
  20. chemotools/datasets/data/coffee_spectra.csv +61 -0
  21. chemotools/datasets/data/fermentation_hplc.csv +35 -0
  22. chemotools/datasets/data/fermentation_spectra.csv +1630 -0
  23. chemotools/datasets/data/train_hplc.csv +22 -0
  24. chemotools/datasets/data/train_spectra.csv +22 -0
  25. chemotools/derivative/__init__.py +4 -2
  26. chemotools/derivative/{norris_william.py → _norris_william.py} +20 -25
  27. chemotools/derivative/{savitzky_golay.py → _savitzky_golay.py} +26 -36
  28. chemotools/feature_selection/__init__.py +4 -0
  29. chemotools/feature_selection/_index_selector.py +113 -0
  30. chemotools/feature_selection/_range_cut.py +111 -0
  31. chemotools/scale/__init__.py +5 -3
  32. chemotools/scale/{min_max_scaler.py → _min_max_scaler.py} +36 -39
  33. chemotools/scale/{norm_scaler.py → _norm_scaler.py} +18 -25
  34. chemotools/scale/_point_scaler.py +115 -0
  35. chemotools/scatter/__init__.py +13 -2
  36. chemotools/scatter/_extended_multiplicative_scatter_correction.py +183 -0
  37. chemotools/scatter/_multiplicative_scatter_correction.py +169 -0
  38. chemotools/scatter/_robust_normal_variate.py +101 -0
  39. chemotools/scatter/{standard_normal_variate.py → _standard_normal_variate.py} +21 -26
  40. chemotools/smooth/__init__.py +6 -4
  41. chemotools/smooth/{mean_filter.py → _mean_filter.py} +18 -25
  42. chemotools/smooth/{median_filter.py → _median_filter.py} +32 -24
  43. chemotools/smooth/{savitzky_golay_filter.py → _savitzky_golay_filter.py} +22 -24
  44. chemotools/smooth/{whittaker_smooth.py → _whittaker_smooth.py} +24 -29
  45. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/METADATA +19 -15
  46. chemotools-0.1.6.dist-info/RECORD +51 -0
  47. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/WHEEL +1 -2
  48. chemotools/scale/index_scaler.py +0 -97
  49. chemotools/scatter/extended_multiplicative_scatter_correction.py +0 -33
  50. chemotools/scatter/multiplicative_scatter_correction.py +0 -123
  51. chemotools/utils/check_inputs.py +0 -14
  52. chemotools/variable_selection/__init__.py +0 -1
  53. chemotools/variable_selection/range_cut.py +0 -121
  54. chemotools-0.0.22.dist-info/RECORD +0 -39
  55. chemotools-0.0.22.dist-info/top_level.txt +0 -2
  56. tests/fixtures.py +0 -89
  57. tests/test_functionality.py +0 -397
  58. tests/test_sklearn_compliance.py +0 -192
  59. {tests → chemotools/datasets/data}/__init__.py +0 -0
  60. {chemotools-0.0.22.dist-info → chemotools-0.1.6.dist-info}/LICENSE +0 -0
@@ -1,12 +1,12 @@
1
+ from typing import Literal
2
+
1
3
  import numpy as np
2
4
  from scipy.ndimage import median_filter
3
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
4
- from sklearn.utils.validation import check_is_fitted
5
-
6
- from chemotools.utils.check_inputs import check_input
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
7
7
 
8
8
 
9
- class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
+ class MedianFilter(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
10
  """
11
11
  A transformer that calculates the median filter of the input data.
12
12
 
@@ -19,14 +19,6 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
19
19
  The mode to use for the median filter. Can be "nearest", "constant", "reflect",
20
20
  "wrap", "mirror" or "interp". Default is "nearest".
21
21
 
22
- Attributes
23
- ----------
24
- n_features_in_ : int
25
- The number of features in the input data.
26
-
27
- _is_fitted : bool
28
- Whether the transformer has been fitted to data.
29
-
30
22
  Methods
31
23
  -------
32
24
  fit(X, y=None)
@@ -35,7 +27,21 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
35
27
  transform(X, y=0, copy=True)
36
28
  Transform the input data by calculating the median filter.
37
29
  """
38
- def __init__(self, window_size: int = 3, mode: str = 'nearest') -> None:
30
+
31
+ def __init__(
32
+ self,
33
+ window_size: int = 3,
34
+ mode: Literal[
35
+ "reflect",
36
+ "constant",
37
+ "nearest",
38
+ "mirror",
39
+ "wrap",
40
+ "grid-constant",
41
+ "grid-mirror",
42
+ "grid-wrap",
43
+ ] = "nearest",
44
+ ) -> None:
39
45
  self.window_size = window_size
40
46
  self.mode = mode
41
47
 
@@ -57,14 +63,9 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
57
63
  The fitted transformer.
58
64
  """
59
65
  # Check that X is a 2D array and has only finite values
60
- X = check_input(X)
61
-
62
- # Set the number of features
63
- self.n_features_in_ = X.shape[1]
64
-
65
- # Set the fitted attribute to True
66
- self._is_fitted = True
67
-
66
+ X = validate_data(
67
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
68
+ )
68
69
  return self
69
70
 
70
71
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -85,11 +86,18 @@ class MedianFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
85
86
  The transformed data.
86
87
  """
87
88
  # Check that the estimator is fitted
88
- check_is_fitted(self, "_is_fitted")
89
+ check_is_fitted(self, "n_features_in_")
89
90
 
90
91
  # Check that X is a 2D array and has only finite values
91
- X = check_input(X)
92
- X_ = X.copy()
92
+ X_ = validate_data(
93
+ self,
94
+ X,
95
+ y="no_validation",
96
+ ensure_2d=True,
97
+ copy=True,
98
+ reset=False,
99
+ dtype=np.float64,
100
+ )
93
101
 
94
102
  if X_.shape[1] != self.n_features_in_:
95
103
  raise ValueError(
@@ -1,12 +1,12 @@
1
+ from typing import Literal
2
+
1
3
  import numpy as np
2
4
  from scipy.signal import savgol_filter
3
5
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
4
- from sklearn.utils.validation import check_is_fitted
5
-
6
- from chemotools.utils.check_inputs import check_input
6
+ from sklearn.utils.validation import check_is_fitted, validate_data
7
7
 
8
8
 
9
- class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
+ class SavitzkyGolayFilter(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
10
10
  """
11
11
  A transformer that calculates the Savitzky-Golay filter of the input data.
12
12
 
@@ -24,14 +24,6 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
24
24
  The mode to use for the Savitzky-Golay filter. Can be "nearest", "constant",
25
25
  "reflect", "wrap", "mirror" or "interp". Default is "nearest".
26
26
 
27
- Attributes
28
- ----------
29
- n_features_in_ : int
30
- The number of features in the input data.
31
-
32
- _is_fitted : bool
33
- Whether the transformer has been fitted to data.
34
-
35
27
  Methods
36
28
  -------
37
29
  fit(X, y=None)
@@ -40,8 +32,12 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
40
32
  transform(X, y=0, copy=True)
41
33
  Transform the input data by calculating the Savitzky-Golay filter.
42
34
  """
35
+
43
36
  def __init__(
44
- self, window_size: int = 3, polynomial_order: int = 1, mode: str = "nearest"
37
+ self,
38
+ window_size: int = 3,
39
+ polynomial_order: int = 1,
40
+ mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "nearest",
45
41
  ) -> None:
46
42
  self.window_size = window_size
47
43
  self.polynomial_order = polynomial_order
@@ -65,14 +61,9 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
65
61
  The fitted transformer.
66
62
  """
67
63
  # Check that X is a 2D array and has only finite values
68
- X = check_input(X)
69
-
70
- # Set the number of features
71
- self.n_features_in_ = X.shape[1]
72
-
73
- # Set the fitted attribute to True
74
- self._is_fitted = True
75
-
64
+ X = validate_data(
65
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
66
+ )
76
67
  return self
77
68
 
78
69
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -93,11 +84,18 @@ class SavitzkyGolayFilter(OneToOneFeatureMixin, BaseEstimator, TransformerMixin)
93
84
  The transformed data.
94
85
  """
95
86
  # Check that the estimator is fitted
96
- check_is_fitted(self, "_is_fitted")
87
+ check_is_fitted(self, "n_features_in_")
97
88
 
98
89
  # Check that X is a 2D array and has only finite values
99
- X = check_input(X)
100
- X_ = X.copy()
90
+ X_ = validate_data(
91
+ self,
92
+ X,
93
+ y="no_validation",
94
+ ensure_2d=True,
95
+ copy=True,
96
+ reset=False,
97
+ dtype=np.float64,
98
+ )
101
99
 
102
100
  if X_.shape[1] != self.n_features_in_:
103
101
  raise ValueError(
@@ -2,17 +2,17 @@ import numpy as np
2
2
  from scipy.sparse import csc_matrix, eye, diags
3
3
  from scipy.sparse.linalg import spsolve
4
4
  from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
5
- from sklearn.utils.validation import check_is_fitted
5
+ from sklearn.utils.validation import (
6
+ check_is_fitted,
7
+ validate_data,
8
+ ) # This code is adapted from the following source:
6
9
 
7
- from chemotools.utils.check_inputs import check_input
8
-
9
- # This code is adapted from the following source:
10
- # Z.-M. Zhang, S. Chen, and Y.-Z. Liang,
11
- # Baseline correction using adaptive iteratively reweighted penalized least squares.
10
+ # Z.-M. Zhang, S. Chen, and Y.-Z. Liang,
11
+ # Baseline correction using adaptive iteratively reweighted penalized least squares.
12
12
  # Analyst 135 (5), 1138-1146 (2010).
13
13
 
14
14
 
15
- class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
15
+ class WhittakerSmooth(TransformerMixin, OneToOneFeatureMixin, BaseEstimator):
16
16
  """
17
17
  A transformer that calculates the Whittaker smooth of the input data.
18
18
 
@@ -24,14 +24,6 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
24
24
  differences : int, optional
25
25
  The number of differences to use for the Whittaker smooth. Default is 1.
26
26
 
27
- Attributes
28
- ----------
29
- n_features_in_ : int
30
- The number of features in the input data.
31
-
32
- _is_fitted : bool
33
- Whether the transformer has been fitted to data.
34
-
35
27
  Methods
36
28
  -------
37
29
  fit(X, y=None)
@@ -40,6 +32,7 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
40
32
  transform(X, y=0, copy=True)
41
33
  Transform the input data by calculating the Whittaker smooth.
42
34
  """
35
+
43
36
  def __init__(
44
37
  self,
45
38
  lam: float = 1e2,
@@ -66,14 +59,9 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
66
59
  The fitted transformer.
67
60
  """
68
61
  # Check that X is a 2D array and has only finite values
69
- X = check_input(X)
70
-
71
- # Set the number of features
72
- self.n_features_in_ = X.shape[1]
73
-
74
- # Set the fitted attribute to True
75
- self._is_fitted = True
76
-
62
+ X = validate_data(
63
+ self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
64
+ )
77
65
  return self
78
66
 
79
67
  def transform(self, X: np.ndarray, y=None) -> np.ndarray:
@@ -94,11 +82,18 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
94
82
  The transformed data.
95
83
  """
96
84
  # Check that the estimator is fitted
97
- check_is_fitted(self, "_is_fitted")
85
+ check_is_fitted(self, "n_features_in_")
98
86
 
99
87
  # Check that X is a 2D array and has only finite values
100
- X = check_input(X)
101
- X_ = X.copy()
88
+ X_ = validate_data(
89
+ self,
90
+ X,
91
+ y="no_validation",
92
+ ensure_2d=True,
93
+ copy=True,
94
+ reset=False,
95
+ dtype=np.float64,
96
+ )
102
97
 
103
98
  # Check that the number of features is the same as the fitted data
104
99
  if X_.shape[1] != self.n_features_in_:
@@ -113,14 +108,14 @@ class WhittakerSmooth(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
113
108
  return X_.reshape(-1, 1) if X_.ndim == 1 else X_
114
109
 
115
110
  def _calculate_whittaker_smooth(self, x):
116
- X = np.matrix(x)
111
+ X = np.array(x)
117
112
  m = X.size
118
113
  E = eye(m, format="csc")
119
114
  w = np.ones(m)
120
115
  for i in range(self.differences):
121
116
  E = E[1:] - E[:-1]
122
117
  W = diags(w, 0, shape=(m, m))
123
- A = csc_matrix(W + (self.lam * E.T * E))
124
- B = csc_matrix(W * X.T)
118
+ A = csc_matrix(W + (self.lam * E.T @ E))
119
+ B = csc_matrix(W @ X.T).toarray().ravel()
125
120
  background = spsolve(A, B)
126
121
  return np.array(background)
@@ -1,22 +1,24 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: chemotools
3
- Version: 0.0.22
4
- Summary: Package to integrate chemometrics in scikit-learn pipelines
5
- Home-page: https://github.com/paucablop/chemotools
6
- Author: Pau Cabaneros Lopez
7
- Author-email: pau.cabaneros@gmail.com
8
- Project-URL: Bug Tracker, https://github.com/paucablop/chemotools/issues/
9
- Classifier: Programming Language :: Python :: 3
3
+ Version: 0.1.6
4
+ Summary: chemotools: A Python Package that Integrates Chemometrics and scikit-learn
5
+ License: MIT
6
+ Author: Pau Cabaneros
7
+ Requires-Python: >=3.10,<4.0
10
8
  Classifier: License :: OSI Approved :: MIT License
11
- Classifier: Operating System :: OS Independent
12
- Requires-Python: >=3.9
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Dist: numpy (>=2.0.0,<3.0.0)
15
+ Requires-Dist: pandas (>=2.0.0,<3.0.0)
16
+ Requires-Dist: polars (>=1.17.0,<2.0.0)
17
+ Requires-Dist: pyarrow (>=18.0.0,<19.0.0)
18
+ Requires-Dist: scikit-learn (>=1.4.0,<2.0.0)
13
19
  Description-Content-Type: text/markdown
14
- License-File: LICENSE
15
- Requires-Dist: numpy
16
- Requires-Dist: scipy
17
- Requires-Dist: scikit-learn
18
20
 
19
- ![chemotools](assets/images/logo_5.png)
21
+ ![chemotools](assets/images/logo_pixel.png)
20
22
 
21
23
 
22
24
  [![pypi](https://img.shields.io/pypi/v/chemotools)](https://pypi.org/project/chemotools)
@@ -24,6 +26,8 @@ Requires-Dist: scikit-learn
24
26
  [![pypi](https://img.shields.io/pypi/l/chemotools)](https://github.com/paucablop/chemotools/blob/main/LICENSE)
25
27
  [![codecov](https://codecov.io/github/paucablop/chemotools/branch/main/graph/badge.svg?token=D7JUJM89LN)](https://codecov.io/github/paucablop/chemotools)
26
28
  [![Downloads](https://static.pepy.tech/badge/chemotools)](https://pepy.tech/project/chemotools)
29
+ [![DOI](https://joss.theoj.org/papers/10.21105/joss.06802/status.svg)](https://doi.org/10.21105/joss.06802)
30
+
27
31
 
28
32
  # __chemotools__
29
33
 
@@ -0,0 +1,51 @@
1
+ chemotools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ chemotools/augmentation/__init__.py,sha256=xIUoWov_aluoW5L3zpVAahyPdkWA5erApW-duzdE_9A,385
3
+ chemotools/augmentation/baseline_shift.py,sha256=kIlYvmKS9pu9vh_-eZ7PSHPuH_58V9mgYbSJt6Gq3BA,3476
4
+ chemotools/augmentation/exponential_noise.py,sha256=fhZ4zQGGqmW-OiSu388th6IhgXrFj1xOguqKYAgj8Y4,3348
5
+ chemotools/augmentation/index_shift.py,sha256=DWVfnxCUgm2NNQfASTpqNoMkfhlW1WZT8EoWVsSSF4c,3459
6
+ chemotools/augmentation/normal_noise.py,sha256=-se2Xv1pAWt9HY7H5yC4XlxRArPKZWGeTy2MdyN4lBE,3318
7
+ chemotools/augmentation/spectrum_scale.py,sha256=hMsmzXpssbI7tGm_YnQn9wjbByso3CgVxd3Hs8kfLS8,3442
8
+ chemotools/augmentation/uniform_noise.py,sha256=8a-AYzEDIkLckL6FK2i8mr_jXnQGcFaKXh_roGCICaQ,3456
9
+ chemotools/baseline/__init__.py,sha256=VzoblGg8Hx_FkTc_n7a-ZjGvtKP8JE_NwJKWenGFQkM,584
10
+ chemotools/baseline/_air_pls.py,sha256=eotXuIEsus7Z-c17oLx8UbiwOHM7DzQJ6rruHnwCGPQ,5067
11
+ chemotools/baseline/_ar_pls.py,sha256=Cl0tN0DGQA8JpnbIge4cBqT7aGQ7yltppYEDI6tWqiM,4385
12
+ chemotools/baseline/_constant_baseline_correction.py,sha256=2ARXIma3m_He5KJs0t0Bz3m0Hd7CNHDR4Dd4XfjMWgs,3893
13
+ chemotools/baseline/_cubic_spline_correction.py,sha256=Qr8jLwAM4JIcD-8G6BBU2vLSLyi44iHiIpJrHyZ6qJE,3432
14
+ chemotools/baseline/_linear_correction.py,sha256=jYUy1q5hlBIhoQr5yPWbqr65pTK8NCVPdJdjVg1SFtg,3258
15
+ chemotools/baseline/_non_negative.py,sha256=0Huq4fKAzAoX9nr6Fk-Awx5xBqmah4jTcn0TY31FJQc,2741
16
+ chemotools/baseline/_polynomial_correction.py,sha256=jzoTyj5a9dHBtefTKVer8CVpCwWqV25Ruj7mq7Ra_PI,4005
17
+ chemotools/baseline/_subtract_reference.py,sha256=B92DAYJmJR5VtWTM7Q6_orvIl2xaadmvbGr1r_ZJALA,3379
18
+ chemotools/datasets/__init__.py,sha256=WcchczWPH-A22DmYEnz2-u8A6vfVviJ6tOCBB0zaIAU,196
19
+ chemotools/datasets/_base.py,sha256=g_-R6c9WI5lt_j40FgA_mvEFzFHM9eGW6hj9d1e29P4,4883
20
+ chemotools/datasets/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ chemotools/datasets/data/coffee_labels.csv,sha256=ZXQWQIf8faLHjdnHfRoXfxMR56kq9Q1BGPZBkQyhGlY,487
22
+ chemotools/datasets/data/coffee_spectra.csv,sha256=VA-sN4u0hC5iALlRxxkj-K87Lz3b3mmUHBJPoDXychI,2206147
23
+ chemotools/datasets/data/fermentation_hplc.csv,sha256=AMmiFQxwaXrH8aN310-3h1YQDiDrT8JNRv1RDvhEvg4,2140
24
+ chemotools/datasets/data/fermentation_spectra.csv,sha256=MaaNMQP0lygJgFbEoUX0OUqdA-id8mF5Llvf_vj9tJk,15237508
25
+ chemotools/datasets/data/train_hplc.csv,sha256=DjtmqiePOWB-F6TsOGFngE1pKyXkb7Xmsi-1CLxsTnE,249
26
+ chemotools/datasets/data/train_spectra.csv,sha256=iVF19W52NHlbqq8BbLomn8n47kSPT0QxJv7wtQX4yjQ,203244
27
+ chemotools/derivative/__init__.py,sha256=FkckdzO30jrRWPGpIU3cfnaTtxPtNT5Tb2G9F9PmVTw,134
28
+ chemotools/derivative/_norris_william.py,sha256=rMY_yntpiB5fbSM1tPph4AaGmF1k-HqJp7o48ijePBs,4958
29
+ chemotools/derivative/_savitzky_golay.py,sha256=CuCrKoLmrB1YmJ4ihIykgkL3tO3frqkStMogtsVhO3A,3632
30
+ chemotools/feature_selection/__init__.py,sha256=1_i28hIxijjwhMypTy1w2fLbzXXVkKD5IYzzY8ZSuHw,117
31
+ chemotools/feature_selection/_index_selector.py,sha256=lNTP2b7P3doWl30KiAr3Xd2HOMxeUmj24MuqoXl4Voc,3556
32
+ chemotools/feature_selection/_range_cut.py,sha256=lVVVC30ZsK2z9jsDGb_z6l8Ty2I89yM05_dIDbMP73Q,3564
33
+ chemotools/scale/__init__.py,sha256=eztqcHg-TKE1Rr0N9ArfytHk8teuqVfi4SZi2DS96vc,175
34
+ chemotools/scale/_min_max_scaler.py,sha256=YvqRkV2pXu-viQrpjzWcp9KmSSCYSoubSnrZHRLqgKQ,3011
35
+ chemotools/scale/_norm_scaler.py,sha256=CHWSir2q-pL1hxzw_ZB45yi4mw-SkJ4YOa1CUL4nm2I,2568
36
+ chemotools/scale/_point_scaler.py,sha256=je-vomAk7g3Q7yxmisQK4-3ndKEKI2wDwLrUiNuwzzA,3505
37
+ chemotools/scatter/__init__.py,sha256=ftyC_MGurzxpWMie8WlFDGh5ylalK2K3aCSN4qUzQAw,459
38
+ chemotools/scatter/_extended_multiplicative_scatter_correction.py,sha256=7OpOcvWX1hlMUR18tC29pkSiADLZViDrTh-wro738E4,6560
39
+ chemotools/scatter/_multiplicative_scatter_correction.py,sha256=nPMPYKHl6-U--GAuQdZL8KVNPlr3V52teUAoJ0iRs3g,5801
40
+ chemotools/scatter/_robust_normal_variate.py,sha256=nPfcvjHEpwkcSCjdvD86WN9q2wVMCeZ2Z8wMzcBpM3Y,3110
41
+ chemotools/scatter/_standard_normal_variate.py,sha256=22mJzbbZoXQY-_hHAhGO0vzfYwr3oMqaR6xPjJryHtk,2582
42
+ chemotools/smooth/__init__.py,sha256=G8JvAoBK9d18-k6XgukqN6dbJP-dsEgeDdbKbZdCIkA,265
43
+ chemotools/smooth/_mean_filter.py,sha256=KVAqOzYWv-SnDX2HD3zLWSSDNePi2Zy3EV9NwIX2H38,2827
44
+ chemotools/smooth/_median_filter.py,sha256=9ndTJCwrZirWlvDNldiigMddy79KIGq9OwwYNSXaw14,3111
45
+ chemotools/smooth/_savitzky_golay_filter.py,sha256=27iFUWxdL9_7oZabR0R5L0ZTpBmYfVUjx2XCTukihBE,3509
46
+ chemotools/smooth/_whittaker_smooth.py,sha256=lpLAyf4GdyDW4ulT1nyEoK6xQEl2cVUKquawQdGWbHU,3571
47
+ chemotools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
+ chemotools-0.1.6.dist-info/LICENSE,sha256=qtyOy2wDQVX9hxp58h3T-6Lmfv-mSCHoSRkcLUdM9bg,1070
49
+ chemotools-0.1.6.dist-info/METADATA,sha256=79TZ--QC_SOHj3ou6bDaRYsJsQoFS0sx2Rfe2BUOrG4,5239
50
+ chemotools-0.1.6.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
51
+ chemotools-0.1.6.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.40.0)
2
+ Generator: poetry-core 2.0.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1,97 +0,0 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
6
-
7
-
8
- class IndexScaler(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
- """
10
- A transformer that scales the input data by the value at a given index.
11
-
12
- Parameters
13
- ----------
14
- index : int, optional
15
- The index to scale the data by.
16
-
17
- Attributes
18
- ----------
19
- n_features_in_ : int
20
- The number of features in the input data.
21
-
22
- _is_fitted : bool
23
- Whether the transformer has been fitted to data.
24
-
25
- Methods
26
- -------
27
- fit(X, y=None)
28
- Fit the transformer to the input data.
29
-
30
- transform(X, y=0, copy=True)
31
- Transform the input data by scaling by the value at a given index.
32
- """
33
- def __init__(self, index: int = 0):
34
- self.index = index
35
-
36
-
37
- def fit(self, X: np.ndarray, y=None) -> "IndexScaler":
38
- """
39
- Fit the transformer to the input data.
40
-
41
- Parameters
42
- ----------
43
- X : np.ndarray of shape (n_samples, n_features)
44
- The input data to fit the transformer to.
45
-
46
- y : None
47
- Ignored.
48
-
49
- Returns
50
- -------
51
- self : IndexScaler
52
- The fitted transformer.
53
- """
54
- # Check that X is a 2D array and has only finite values
55
- X = check_input(X)
56
-
57
- # Set the number of features
58
- self.n_features_in_ = X.shape[1]
59
-
60
- # Set the fitted attribute to True
61
- self._is_fitted = True
62
-
63
- return self
64
-
65
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
66
- """
67
- Transform the input data by scaling by the value at a given index.
68
-
69
- Parameters
70
- ----------
71
- X : np.ndarray of shape (n_samples, n_features)
72
- The input data to transform.
73
-
74
- y : None
75
- Ignored.
76
-
77
- Returns
78
- -------
79
- X_ : np.ndarray of shape (n_samples, n_features)
80
- The transformed data.
81
- """
82
- # Check that the estimator is fitted
83
- check_is_fitted(self, "_is_fitted")
84
-
85
- # Check that X is a 2D array and has only finite values
86
- X = check_input(X)
87
- X_ = X.copy()
88
-
89
- # Check that the number of features is the same as the fitted data
90
- if X_.shape[1] != self.n_features_in_:
91
- raise ValueError(f"Expected {self.n_features_in_} features but got {X_.shape[1]}")
92
-
93
- # Scale the data by index
94
- for i, x in enumerate(X_):
95
- X_[i] = x / x[self.index]
96
-
97
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
@@ -1,33 +0,0 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator, TransformerMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
6
-
7
- class ExtendedMultiplicativeScatterCorrection(BaseEstimator, TransformerMixin):
8
- def __init__(self):
9
- self.ref_spec = None
10
- self.coeffs = None
11
-
12
- def fit(self, X, ref_spec=None):
13
- if ref_spec is None:
14
- # Use mean spectrum as reference if none provided
15
- ref_spec = np.mean(X, axis=0)
16
- self.ref_spec = ref_spec
17
-
18
- # Calculate the mean spectrum
19
- mean_spec = np.mean(X, axis=0)
20
-
21
- # Fit a linear model to the reference spectrum
22
- coeffs = np.polyfit(mean_spec, ref_spec, deg=1)
23
- self.coeffs = coeffs
24
-
25
- def transform(self, X):
26
- # Divide the spectra by the linear model
27
- X_emsc = X / np.polyval(self.coeffs, X.mean(axis=1))
28
- return X_emsc
29
-
30
- def fit_transform(self, X, ref_spec=None):
31
- self.fit(X, ref_spec=ref_spec)
32
- X_emsc = self.transform(X)
33
- return X_emsc
@@ -1,123 +0,0 @@
1
- import numpy as np
2
- from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
3
- from sklearn.utils.validation import check_is_fitted
4
-
5
- from chemotools.utils.check_inputs import check_input
6
-
7
-
8
- class MultiplicativeScatterCorrection(OneToOneFeatureMixin, BaseEstimator, TransformerMixin):
9
- """Multiplicative scatter correction (MSC) is a preprocessing technique for
10
- removing scatter effects from spectra. It is based on fitting a linear
11
- regression model to the spectrum using a reference spectrum. The reference
12
- spectrum is usually a mean or median spectrum of a set of spectra.
13
-
14
- Parameters
15
- ----------
16
- reference : np.ndarray, optional
17
- The reference spectrum to use for the correction. If None, the mean
18
- spectrum will be used. The default is None.
19
- use_mean : bool, optional
20
- Whether to use the mean spectrum as the reference. The default is True.
21
- use_median : bool, optional
22
- Whether to use the median spectrum as the reference. The default is False.
23
-
24
- Attributes
25
- ----------
26
- reference_ : np.ndarray
27
- The reference spectrum used for the correction.
28
- n_features_in_ : int
29
- The number of features in the training data.
30
-
31
- Raises
32
- ------
33
- ValueError
34
- If no reference is provided.
35
-
36
- """
37
- def __init__(
38
- self,
39
- reference: np.ndarray = None,
40
- use_mean: bool = True,
41
- use_median: bool = False,
42
- ):
43
- self.reference = reference
44
- self.use_mean = use_mean
45
- self.use_median = use_median
46
-
47
- def fit(self, X: np.ndarray, y=None) -> "MultiplicativeScatterCorrection":
48
- """
49
- Fit the transformer to the input data. If no reference is provided, the
50
- mean or median spectrum will be calculated from the input data.
51
-
52
- Parameters
53
- ----------
54
- X : np.ndarray of shape (n_samples, n_features)
55
- The input data to fit the transformer to.
56
-
57
- y : None
58
- Ignored.
59
-
60
- Returns
61
- -------
62
- self : MultiplicativeScatterCorrection
63
- The fitted transformer.
64
- """
65
- # Check that X is a 2D array and has only finite values
66
- X = check_input(X)
67
-
68
- # Set the number of features
69
- self.n_features_in_ = X.shape[1]
70
-
71
- # Set the fitted attribute to True
72
- self._is_fitted = True
73
-
74
- # Set the reference
75
- if self.reference is None and self.use_mean:
76
- self.reference_ = X.mean(axis=0)
77
- return self
78
-
79
- if self.reference is None and self.use_median:
80
- self.reference_ = np.median(X, axis=0)
81
- return self
82
-
83
- if self.reference is not None:
84
- self.reference_ = self.reference.copy()
85
- return self
86
-
87
- raise ValueError("No reference was provided")
88
-
89
- def transform(self, X: np.ndarray, y=None) -> np.ndarray:
90
- """
91
- Transform the input data by applying the multiplicative scatter
92
- correction.
93
-
94
- Parameters
95
- ----------
96
- X : np.ndarray of shape (n_samples, n_features)
97
- The input data to transform.
98
-
99
- y : None
100
- Ignored.
101
-
102
- Returns
103
- -------
104
- X_ : np.ndarray of shape (n_samples, n_features)
105
- The transformed data.
106
- """
107
- # Check that the estimator is fitted
108
- check_is_fitted(self, "_is_fitted")
109
-
110
- # Check that X is a 2D array and has only finite values
111
- X = check_input(X)
112
- X_ = X.copy()
113
-
114
- # Calculate the multiplicative signal correction
115
- ones = np.ones(X.shape[1])
116
- for i, x in enumerate(X_):
117
- X_[i] = self._calculate_multiplicative_correction(x, ones)
118
- return X_.reshape(-1, 1) if X_.ndim == 1 else X_
119
-
120
- def _calculate_multiplicative_correction(self, x, ones) -> np.ndarray:
121
- A = np.vstack([self.reference_, ones]).T
122
- m, c = np.linalg.lstsq(A, x, rcond=None)[0]
123
- return (x - c) / m
@@ -1,14 +0,0 @@
1
- from sklearn.utils.validation import check_array
2
-
3
-
4
- def check_input(X, y=None):
5
- # Check that X is a 2D array and has only finite values
6
- X = check_array(X, ensure_2d=True, force_all_finite=True)
7
-
8
- # Check that y is None or a 1D array of the same length as X
9
- if y is not None:
10
- y = y.reshape(-1, 1) if y.ndim == 1 else y
11
- y = check_array(y, force_all_finite=True)
12
- if len(y) != X.shape[0]:
13
- raise ValueError("y must have the same number of samples as X")
14
- return X
@@ -1 +0,0 @@
1
- from .range_cut import RangeCut