ftir-prep 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ """
2
+ Framework de Pré-processamento FTIR
3
+
4
+ Um framework modular e extensível para otimização de pipelines de pré-processamento
5
+ de espectros FTIR para diagnóstico de doenças.
6
+ """
7
+
8
+ __version__ = "0.1.0"
9
+ __author__ = "Lucas Mendonça"
10
+
11
+ # Imports principais
12
+ from .core.pipeline import FTIRPipeline, PipelineBuilder, create_pipeline_from_order
13
+ from .core.evaluator import PipelineEvaluator
14
+ from .core.explainer import FTIRExplainer
15
+ from .optimization.optuna_optimizer import OptunaPipelineOptimizer
16
+ from .utils.data_loader import FTIRDataLoader
17
+
18
+ # Imports de validação cruzada
19
+ from sklearn.model_selection import StratifiedGroupKFold
20
+
21
+ # Imports de pré-processamento
22
+ from .preprocessing.baseline import (
23
+ BaselineCorrector, RubberbandBaselineCorrector, PolynomialBaselineCorrector,
24
+ BaselineCorrectorFactory, baseline_rubberband, baseline_polynomial
25
+ )
26
+
27
+ from .preprocessing.normalization import (
28
+ MinMaxNormalizer, VectorNormalizer, AmidaINormalizer, NormalizerFactory,
29
+ normalize_minmax, normalize_vector, normalize_amida_i
30
+ )
31
+
32
+ from .preprocessing.smoothing import (
33
+ SavitzkyGolaySmoother, WaveletSmoother, LocalPolynomialSmoother, SmootherFactory,
34
+ sg_filter, wavelet_denoising, local_polynomial
35
+ )
36
+
37
+ from .preprocessing.derivatives import (
38
+ DerivativeCalculator, apply_derivative
39
+ )
40
+
41
+ __all__ = [
42
+ # Core
43
+ 'FTIRPipeline',
44
+ 'PipelineBuilder',
45
+ 'create_pipeline_from_order',
46
+ 'PipelineEvaluator',
47
+ 'FTIRExplainer',
48
+
49
+ # Optimization
50
+ 'OptunaPipelineOptimizer',
51
+
52
+ # Data loading
53
+ 'FTIRDataLoader',
54
+
55
+ # Cross-validation
56
+ 'StratifiedGroupKFold',
57
+
58
+ # Baseline
59
+ 'BaselineCorrector',
60
+ 'RubberbandBaselineCorrector',
61
+ 'PolynomialBaselineCorrector',
62
+ 'BaselineCorrectorFactory',
63
+ 'baseline_rubberband',
64
+ 'baseline_polynomial',
65
+
66
+ # Normalization
67
+ 'MinMaxNormalizer',
68
+ 'VectorNormalizer',
69
+ 'AmidaINormalizer',
70
+ 'NormalizerFactory',
71
+ 'normalize_minmax',
72
+ 'normalize_vector',
73
+ 'normalize_amida_i',
74
+
75
+ # Smoothing
76
+ 'SavitzkyGolaySmoother',
77
+ 'WaveletSmoother',
78
+ 'LocalPolynomialSmoother',
79
+ 'SmootherFactory',
80
+ 'sg_filter',
81
+ 'wavelet_denoising',
82
+ 'local_polynomial',
83
+
84
+ # Derivatives
85
+ 'DerivativeCalculator',
86
+ 'apply_derivative'
87
+ ]
@@ -0,0 +1,25 @@
1
+ """
2
+ Módulo de configuração do framework FTIR
3
+ """
4
+
5
+ from .settings import (
6
+ WAVELENGTH_RANGES,
7
+ BASELINE_CONFIG,
8
+ NORMALIZATION_CONFIG,
9
+ SMOOTHING_CONFIG,
10
+ CV_CONFIG,
11
+ CLASSIFIER_CONFIG,
12
+ OPTIMIZATION_CONFIG,
13
+ DEFAULT_PIPELINE_ORDER
14
+ )
15
+
16
+ __all__ = [
17
+ 'WAVELENGTH_RANGES',
18
+ 'BASELINE_CONFIG',
19
+ 'NORMALIZATION_CONFIG',
20
+ 'SMOOTHING_CONFIG',
21
+ 'CV_CONFIG',
22
+ 'CLASSIFIER_CONFIG',
23
+ 'OPTIMIZATION_CONFIG',
24
+ 'DEFAULT_PIPELINE_ORDER'
25
+ ]
@@ -0,0 +1,97 @@
1
+ """
2
+ Default Settings for the FTIR Preprocessing Framework
3
+ """
4
+
5
+ # Wavelength truncation settings
6
+ WAVELENGTH_RANGES = {
7
+ 'fingerprint': (900, 1800), # Fingerprint region
8
+ 'amide': (2800, 3050), # Amide region
9
+ 'custom': None # For custom ranges
10
+ }
11
+
12
+ # Baseline settings
13
+ BASELINE_CONFIG = {
14
+ 'rubberband': {
15
+ 'method': 'rubberband'
16
+ },
17
+ 'polynomial': {
18
+ 'method': 'poly',
19
+ 'polynomial_order_range': (1, 6)
20
+ }
21
+ }
22
+
23
+ # Normalization settings
24
+ NORMALIZATION_CONFIG = {
25
+ 'minmax': {},
26
+ 'vector': {'norm': 'l2'},
27
+ 'amide_i': {
28
+ 'range': (1600, 1700)
29
+ }
30
+ }
31
+
32
+ # Smoothing settings
33
+ SMOOTHING_CONFIG = {
34
+ 'savgol': {
35
+ 'window_length': 11,
36
+ 'polyorder_range': (1, 6),
37
+ 'deriv_range': (0, 2)
38
+ },
39
+ 'wavelet': {
40
+ 'wavelets': ['db2', 'db3', 'db4'],
41
+ 'level_range': (1, 3),
42
+ 'mode': 'soft'
43
+ },
44
+ 'local_polynomial': {
45
+ 'bandwidth_range': (1, 6),
46
+ 'iterations': 0
47
+ }
48
+ }
49
+
50
+ # Cross-validation settings
51
+ CV_CONFIG = {
52
+ 'method': 'StratifiedGroupKFold', # Combines stratification while respecting groups
53
+ 'n_splits': 5,
54
+ 'shuffle': False,
55
+ 'random_state': 42,
56
+ 'description': 'Stratified cross-validation that respects groups (patients)',
57
+ 'shuffle_warning': '''
58
+ ⚠️ WARNING: Shuffling in FTIR data may cause issues:
59
+
60
+ ✅ RECOMMENDED (shuffle=False):
61
+ - Preserves the natural temporal order of the data
62
+ - Prevents information leakage between correlated samples
63
+ - More realistic for clinical applications
64
+
65
+ ❌ NOT RECOMMENDED (shuffle=True):
66
+ - May create artificial dependencies between train and test
67
+ - May hide generalization issues
68
+ - May artificially inflate accuracy
69
+
70
+ 💡 For FTIR data, keep shuffle=False unless:
71
+ - You are certain there is no temporal correlation
72
+ - You are specifically testing robustness to shuffling
73
+ - You have independent external validation
74
+ '''
75
+ }
76
+
77
+ # Classifier settings
78
+ CLASSIFIER_CONFIG = {
79
+ 'RandomForest': {
80
+ 'n_estimators': 100,
81
+ 'random_state': 42,
82
+ 'class_weight': 'balanced', # Handles class imbalance
83
+ 'max_depth': None,
84
+ 'min_samples_split': 2,
85
+ 'min_samples_leaf': 1
86
+ }
87
+ }
88
+
89
+ # Optimization settings
90
+ OPTIMIZATION_CONFIG = {
91
+ 'n_trials': 30,
92
+ 'direction': 'maximize',
93
+ 'timeout': None
94
+ }
95
+
96
+ # Default order of preprocessing techniques
97
+ DEFAULT_PIPELINE_ORDER = ['smoothing', 'normalization', 'baseline', 'derivative']
@@ -0,0 +1,15 @@
1
+ """
2
+ Módulo core do framework FTIR
3
+ """
4
+
5
+ from .pipeline import FTIRPipeline, PipelineBuilder, create_pipeline_from_order
6
+ from .evaluator import PipelineEvaluator
7
+ from .explainer import FTIRExplainer
8
+
9
+ __all__ = [
10
+ 'FTIRPipeline',
11
+ 'PipelineBuilder',
12
+ 'create_pipeline_from_order',
13
+ 'PipelineEvaluator',
14
+ 'FTIRExplainer'
15
+ ]