ftir-prep 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ftir_framework/__init__.py +87 -0
- ftir_framework/config/__init__.py +25 -0
- ftir_framework/config/settings.py +97 -0
- ftir_framework/core/__init__.py +15 -0
- ftir_framework/core/evaluator.py +692 -0
- ftir_framework/core/explainer.py +329 -0
- ftir_framework/core/pipeline.py +262 -0
- ftir_framework/optimization/__init__.py +9 -0
- ftir_framework/optimization/optuna_optimizer.py +1103 -0
- ftir_framework/preprocessing/__init__.py +67 -0
- ftir_framework/preprocessing/baseline.py +656 -0
- ftir_framework/preprocessing/derivatives.py +98 -0
- ftir_framework/preprocessing/normalization.py +333 -0
- ftir_framework/preprocessing/smoothing.py +602 -0
- ftir_framework/preprocessing/truncation.py +158 -0
- ftir_framework/utils/__init__.py +9 -0
- ftir_framework/utils/data_loader.py +160 -0
- ftir_prep-0.1.0.dist-info/METADATA +316 -0
- ftir_prep-0.1.0.dist-info/RECORD +22 -0
- ftir_prep-0.1.0.dist-info/WHEEL +5 -0
- ftir_prep-0.1.0.dist-info/licenses/LICENSE +21 -0
- ftir_prep-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Framework de Pré-processamento FTIR
|
|
3
|
+
|
|
4
|
+
Um framework modular e extensível para otimização de pipelines de pré-processamento
|
|
5
|
+
de espectros FTIR para diagnóstico de doenças.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__version__ = "0.1.0"
|
|
9
|
+
__author__ = "Lucas Mendonça"
|
|
10
|
+
|
|
11
|
+
# Imports principais
|
|
12
|
+
from .core.pipeline import FTIRPipeline, PipelineBuilder, create_pipeline_from_order
|
|
13
|
+
from .core.evaluator import PipelineEvaluator
|
|
14
|
+
from .core.explainer import FTIRExplainer
|
|
15
|
+
from .optimization.optuna_optimizer import OptunaPipelineOptimizer
|
|
16
|
+
from .utils.data_loader import FTIRDataLoader
|
|
17
|
+
|
|
18
|
+
# Imports de validação cruzada
|
|
19
|
+
from sklearn.model_selection import StratifiedGroupKFold
|
|
20
|
+
|
|
21
|
+
# Imports de pré-processamento
|
|
22
|
+
from .preprocessing.baseline import (
|
|
23
|
+
BaselineCorrector, RubberbandBaselineCorrector, PolynomialBaselineCorrector,
|
|
24
|
+
BaselineCorrectorFactory, baseline_rubberband, baseline_polynomial
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
from .preprocessing.normalization import (
|
|
28
|
+
MinMaxNormalizer, VectorNormalizer, AmidaINormalizer, NormalizerFactory,
|
|
29
|
+
normalize_minmax, normalize_vector, normalize_amida_i
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
from .preprocessing.smoothing import (
|
|
33
|
+
SavitzkyGolaySmoother, WaveletSmoother, LocalPolynomialSmoother, SmootherFactory,
|
|
34
|
+
sg_filter, wavelet_denoising, local_polynomial
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
from .preprocessing.derivatives import (
|
|
38
|
+
DerivativeCalculator, apply_derivative
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
# Core
|
|
43
|
+
'FTIRPipeline',
|
|
44
|
+
'PipelineBuilder',
|
|
45
|
+
'create_pipeline_from_order',
|
|
46
|
+
'PipelineEvaluator',
|
|
47
|
+
'FTIRExplainer',
|
|
48
|
+
|
|
49
|
+
# Optimization
|
|
50
|
+
'OptunaPipelineOptimizer',
|
|
51
|
+
|
|
52
|
+
# Data loading
|
|
53
|
+
'FTIRDataLoader',
|
|
54
|
+
|
|
55
|
+
# Cross-validation
|
|
56
|
+
'StratifiedGroupKFold',
|
|
57
|
+
|
|
58
|
+
# Baseline
|
|
59
|
+
'BaselineCorrector',
|
|
60
|
+
'RubberbandBaselineCorrector',
|
|
61
|
+
'PolynomialBaselineCorrector',
|
|
62
|
+
'BaselineCorrectorFactory',
|
|
63
|
+
'baseline_rubberband',
|
|
64
|
+
'baseline_polynomial',
|
|
65
|
+
|
|
66
|
+
# Normalization
|
|
67
|
+
'MinMaxNormalizer',
|
|
68
|
+
'VectorNormalizer',
|
|
69
|
+
'AmidaINormalizer',
|
|
70
|
+
'NormalizerFactory',
|
|
71
|
+
'normalize_minmax',
|
|
72
|
+
'normalize_vector',
|
|
73
|
+
'normalize_amida_i',
|
|
74
|
+
|
|
75
|
+
# Smoothing
|
|
76
|
+
'SavitzkyGolaySmoother',
|
|
77
|
+
'WaveletSmoother',
|
|
78
|
+
'LocalPolynomialSmoother',
|
|
79
|
+
'SmootherFactory',
|
|
80
|
+
'sg_filter',
|
|
81
|
+
'wavelet_denoising',
|
|
82
|
+
'local_polynomial',
|
|
83
|
+
|
|
84
|
+
# Derivatives
|
|
85
|
+
'DerivativeCalculator',
|
|
86
|
+
'apply_derivative'
|
|
87
|
+
]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Módulo de configuração do framework FTIR
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .settings import (
|
|
6
|
+
WAVELENGTH_RANGES,
|
|
7
|
+
BASELINE_CONFIG,
|
|
8
|
+
NORMALIZATION_CONFIG,
|
|
9
|
+
SMOOTHING_CONFIG,
|
|
10
|
+
CV_CONFIG,
|
|
11
|
+
CLASSIFIER_CONFIG,
|
|
12
|
+
OPTIMIZATION_CONFIG,
|
|
13
|
+
DEFAULT_PIPELINE_ORDER
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
'WAVELENGTH_RANGES',
|
|
18
|
+
'BASELINE_CONFIG',
|
|
19
|
+
'NORMALIZATION_CONFIG',
|
|
20
|
+
'SMOOTHING_CONFIG',
|
|
21
|
+
'CV_CONFIG',
|
|
22
|
+
'CLASSIFIER_CONFIG',
|
|
23
|
+
'OPTIMIZATION_CONFIG',
|
|
24
|
+
'DEFAULT_PIPELINE_ORDER'
|
|
25
|
+
]
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Default Settings for the FTIR Preprocessing Framework
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
# Wavelength truncation settings
|
|
6
|
+
WAVELENGTH_RANGES = {
|
|
7
|
+
'fingerprint': (900, 1800), # Fingerprint region
|
|
8
|
+
'amide': (2800, 3050), # Amide region
|
|
9
|
+
'custom': None # For custom ranges
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
# Baseline settings
|
|
13
|
+
BASELINE_CONFIG = {
|
|
14
|
+
'rubberband': {
|
|
15
|
+
'method': 'rubberband'
|
|
16
|
+
},
|
|
17
|
+
'polynomial': {
|
|
18
|
+
'method': 'poly',
|
|
19
|
+
'polynomial_order_range': (1, 6)
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
# Normalization settings
|
|
24
|
+
NORMALIZATION_CONFIG = {
|
|
25
|
+
'minmax': {},
|
|
26
|
+
'vector': {'norm': 'l2'},
|
|
27
|
+
'amide_i': {
|
|
28
|
+
'range': (1600, 1700)
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# Smoothing settings
|
|
33
|
+
SMOOTHING_CONFIG = {
|
|
34
|
+
'savgol': {
|
|
35
|
+
'window_length': 11,
|
|
36
|
+
'polyorder_range': (1, 6),
|
|
37
|
+
'deriv_range': (0, 2)
|
|
38
|
+
},
|
|
39
|
+
'wavelet': {
|
|
40
|
+
'wavelets': ['db2', 'db3', 'db4'],
|
|
41
|
+
'level_range': (1, 3),
|
|
42
|
+
'mode': 'soft'
|
|
43
|
+
},
|
|
44
|
+
'local_polynomial': {
|
|
45
|
+
'bandwidth_range': (1, 6),
|
|
46
|
+
'iterations': 0
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
# Cross-validation settings
|
|
51
|
+
CV_CONFIG = {
|
|
52
|
+
'method': 'StratifiedGroupKFold', # Combines stratification while respecting groups
|
|
53
|
+
'n_splits': 5,
|
|
54
|
+
'shuffle': False,
|
|
55
|
+
'random_state': 42,
|
|
56
|
+
'description': 'Stratified cross-validation that respects groups (patients)',
|
|
57
|
+
'shuffle_warning': '''
|
|
58
|
+
⚠️ WARNING: Shuffling in FTIR data may cause issues:
|
|
59
|
+
|
|
60
|
+
✅ RECOMMENDED (shuffle=False):
|
|
61
|
+
- Preserves the natural temporal order of the data
|
|
62
|
+
- Prevents information leakage between correlated samples
|
|
63
|
+
- More realistic for clinical applications
|
|
64
|
+
|
|
65
|
+
❌ NOT RECOMMENDED (shuffle=True):
|
|
66
|
+
- May create artificial dependencies between train and test
|
|
67
|
+
- May hide generalization issues
|
|
68
|
+
- May artificially inflate accuracy
|
|
69
|
+
|
|
70
|
+
💡 For FTIR data, keep shuffle=False unless:
|
|
71
|
+
- You are certain there is no temporal correlation
|
|
72
|
+
- You are specifically testing robustness to shuffling
|
|
73
|
+
- You have independent external validation
|
|
74
|
+
'''
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
# Classifier settings
|
|
78
|
+
CLASSIFIER_CONFIG = {
|
|
79
|
+
'RandomForest': {
|
|
80
|
+
'n_estimators': 100,
|
|
81
|
+
'random_state': 42,
|
|
82
|
+
'class_weight': 'balanced', # Handles class imbalance
|
|
83
|
+
'max_depth': None,
|
|
84
|
+
'min_samples_split': 2,
|
|
85
|
+
'min_samples_leaf': 1
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
# Optimization settings
|
|
90
|
+
OPTIMIZATION_CONFIG = {
|
|
91
|
+
'n_trials': 30,
|
|
92
|
+
'direction': 'maximize',
|
|
93
|
+
'timeout': None
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
# Default order of preprocessing techniques
|
|
97
|
+
DEFAULT_PIPELINE_ORDER = ['smoothing', 'normalization', 'baseline', 'derivative']
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Módulo core do framework FTIR
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .pipeline import FTIRPipeline, PipelineBuilder, create_pipeline_from_order
|
|
6
|
+
from .evaluator import PipelineEvaluator
|
|
7
|
+
from .explainer import FTIRExplainer
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
'FTIRPipeline',
|
|
11
|
+
'PipelineBuilder',
|
|
12
|
+
'create_pipeline_from_order',
|
|
13
|
+
'PipelineEvaluator',
|
|
14
|
+
'FTIRExplainer'
|
|
15
|
+
]
|