pkboost 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. pkboost-2.0.2/.gitignore +69 -0
  2. pkboost-2.0.2/CHANGELOG_V2.0.2.md +87 -0
  3. pkboost-2.0.2/CHANGELOG_V2.md +184 -0
  4. pkboost-2.0.2/Cargo.lock +562 -0
  5. pkboost-2.0.2/Cargo.toml +68 -0
  6. pkboost-2.0.2/DRYBEAN_DRIFT_RESULTS.md +120 -0
  7. pkboost-2.0.2/DryBeanDataset/Dry_Bean_Dataset.arff +13636 -0
  8. pkboost-2.0.2/DryBeanDataset/Dry_Bean_Dataset.txt +78 -0
  9. pkboost-2.0.2/DryBeanDataset/Dry_Bean_Dataset.xlsx +0 -0
  10. pkboost-2.0.2/FEATURES.md +641 -0
  11. pkboost-2.0.2/HAB_CONCLUSION.md +24 -0
  12. pkboost-2.0.2/HAB_FINAL_CONCLUSION.md +83 -0
  13. pkboost-2.0.2/MULTICLASS.md +168 -0
  14. pkboost-2.0.2/Math.pdf +0 -0
  15. pkboost-2.0.2/PKG-INFO +468 -0
  16. pkboost-2.0.2/POISSON_LOSS.md +206 -0
  17. pkboost-2.0.2/PUSH_TO_GITHUB.txt +57 -0
  18. pkboost-2.0.2/README.md +449 -0
  19. pkboost-2.0.2/SHANNON_ANALYSIS.md +108 -0
  20. pkboost-2.0.2/V2_READY.md +133 -0
  21. pkboost-2.0.2/adaptive_comparison.py +235 -0
  22. pkboost-2.0.2/adaptive_comparison_results.csv +8 -0
  23. pkboost-2.0.2/adaptive_regression_metrics.csv +7 -0
  24. pkboost-2.0.2/alb_metrics.csv +13 -0
  25. pkboost-2.0.2/all_rust_code.txt +10467 -0
  26. pkboost-2.0.2/benchmark results/1.png +0 -0
  27. pkboost-2.0.2/benchmark results/all_benchmarks_results.csv +5 -0
  28. pkboost-2.0.2/benchmark results/wosconsin.png +0 -0
  29. pkboost-2.0.2/benchmark_drybean_comparison.py +96 -0
  30. pkboost-2.0.2/benchmark_multiclass_comparison.py +141 -0
  31. pkboost-2.0.2/compare_drift_performance.py +278 -0
  32. pkboost-2.0.2/create_bigger_test.py +28 -0
  33. pkboost-2.0.2/create_extreme_imbalance.py +38 -0
  34. pkboost-2.0.2/create_small_test.py +30 -0
  35. pkboost-2.0.2/data/README.md +0 -0
  36. pkboost-2.0.2/data/confussion_matrix.png +0 -0
  37. pkboost-2.0.2/data/customer_predictor_app.png +0 -0
  38. pkboost-2.0.2/data/e-commerce_churn.png +0 -0
  39. pkboost-2.0.2/data/final_model.sav +0 -0
  40. pkboost-2.0.2/data/logo_shopwise.png +0 -0
  41. pkboost-2.0.2/diagrams/pkboost_system_architecture.drawio +244 -0
  42. pkboost-2.0.2/diagrams/state_machine.drawio +204 -0
  43. pkboost-2.0.2/docs/BENCHMARK_REPRODUCTION.md +607 -0
  44. pkboost-2.0.2/docs/DRIFT_BENCHMARK_REPORT.md +155 -0
  45. pkboost-2.0.2/docs/PYTHON_BINDINGS.md +255 -0
  46. pkboost-2.0.2/docs/SCRIPTS_GUIDE.md +279 -0
  47. pkboost-2.0.2/download_creditcard.py +21 -0
  48. pkboost-2.0.2/download_dataset.py +100 -0
  49. pkboost-2.0.2/drift_comparison_all.py +744 -0
  50. pkboost-2.0.2/drift_comparison_complete.png +0 -0
  51. pkboost-2.0.2/drift_comparison_results.csv +10 -0
  52. pkboost-2.0.2/drift_detailed_results.csv +17 -0
  53. pkboost-2.0.2/pkboost_sklearn/README.md +115 -0
  54. pkboost-2.0.2/pkboost_sklearn/__init__.py +89 -0
  55. pkboost-2.0.2/pkboost_sklearn/classifier.py +190 -0
  56. pkboost-2.0.2/pkboost_sklearn/multiclass.py +136 -0
  57. pkboost-2.0.2/pkboost_sklearn/regressor.py +107 -0
  58. pkboost-2.0.2/pkboost_sklearn/sklearn_interface.py +399 -0
  59. pkboost-2.0.2/pkboost_sklearn/test_sklearn_compat.py +200 -0
  60. pkboost-2.0.2/plot_adaptive_results.py +74 -0
  61. pkboost-2.0.2/prepare_data.py +155 -0
  62. pkboost-2.0.2/pyproject.toml +30 -0
  63. pkboost-2.0.2/python/README.md +250 -0
  64. pkboost-2.0.2/python/example.py +49 -0
  65. pkboost-2.0.2/python/example_creditcard.py +75 -0
  66. pkboost-2.0.2/python/example_creditcard_drift.py +124 -0
  67. pkboost-2.0.2/python/example_drift.py +114 -0
  68. pkboost-2.0.2/raw_data/README.md +0 -0
  69. pkboost-2.0.2/regression_drift_benchmark.py +171 -0
  70. pkboost-2.0.2/resources/comprehensive_benchmark.py +240 -0
  71. pkboost-2.0.2/resources/pybenchmark.py +378 -0
  72. pkboost-2.0.2/resources/test_drift_comparison.py +238 -0
  73. pkboost-2.0.2/resources/text.py +110 -0
  74. pkboost-2.0.2/results/16_drift_scenarios_comparison.csv +49 -0
  75. pkboost-2.0.2/results/threeway_final.csv +4 -0
  76. pkboost-2.0.2/run_all_benchmarks.py +400 -0
  77. pkboost-2.0.2/rust_code_with_structure.txt +10471 -0
  78. pkboost-2.0.2/scripts/prepare_churn_data.py +106 -0
  79. pkboost-2.0.2/src/adaptive_parallel.rs +161 -0
  80. pkboost-2.0.2/src/adversarial.rs +95 -0
  81. pkboost-2.0.2/src/auto_params.rs +75 -0
  82. pkboost-2.0.2/src/auto_tuner.rs +130 -0
  83. pkboost-2.0.2/src/bin/benchmark.rs +180 -0
  84. pkboost-2.0.2/src/bin/benchmark_drybean.rs +133 -0
  85. pkboost-2.0.2/src/bin/benchmark_progressive_precision.rs +88 -0
  86. pkboost-2.0.2/src/bin/hab_vs_baseline_benchmark.rs +100 -0
  87. pkboost-2.0.2/src/bin/multiclass_benchmark.rs +146 -0
  88. pkboost-2.0.2/src/bin/pkboost_drift_benchmark.rs +74 -0
  89. pkboost-2.0.2/src/bin/profile_core.rs +75 -0
  90. pkboost-2.0.2/src/bin/test_16_drift_scenarios.rs +350 -0
  91. pkboost-2.0.2/src/bin/test_16_drift_scenarios_verbose.rs +306 -0
  92. pkboost-2.0.2/src/bin/test_adaptive_regression.rs +161 -0
  93. pkboost-2.0.2/src/bin/test_churn_hab.rs +84 -0
  94. pkboost-2.0.2/src/bin/test_combined_scoring.rs +98 -0
  95. pkboost-2.0.2/src/bin/test_drift.rs +324 -0
  96. pkboost-2.0.2/src/bin/test_drift_sensitivity.rs +106 -0
  97. pkboost-2.0.2/src/bin/test_drybean_drift.rs +121 -0
  98. pkboost-2.0.2/src/bin/test_hab_binary.rs +99 -0
  99. pkboost-2.0.2/src/bin/test_hab_creditcard.rs +95 -0
  100. pkboost-2.0.2/src/bin/test_hab_drift.rs +169 -0
  101. pkboost-2.0.2/src/bin/test_hab_streaming.rs +114 -0
  102. pkboost-2.0.2/src/bin/test_living.rs +67 -0
  103. pkboost-2.0.2/src/bin/test_loss_selection.rs +72 -0
  104. pkboost-2.0.2/src/bin/test_massive_drift.rs +176 -0
  105. pkboost-2.0.2/src/bin/test_multiclass.rs +109 -0
  106. pkboost-2.0.2/src/bin/test_poisson.rs +104 -0
  107. pkboost-2.0.2/src/bin/test_precision.rs +122 -0
  108. pkboost-2.0.2/src/bin/test_regression.rs +63 -0
  109. pkboost-2.0.2/src/bin/test_retrain.rs +171 -0
  110. pkboost-2.0.2/src/bin/test_shannon_multiclass.rs +178 -0
  111. pkboost-2.0.2/src/bin/test_simple_regression.rs +54 -0
  112. pkboost-2.0.2/src/bin/test_static.rs +129 -0
  113. pkboost-2.0.2/src/bin/test_uncertainty.rs +94 -0
  114. pkboost-2.0.2/src/bin/threeway_comparison.rs +355 -0
  115. pkboost-2.0.2/src/constants.rs +80 -0
  116. pkboost-2.0.2/src/histogram_builder.rs +185 -0
  117. pkboost-2.0.2/src/huber_loss.rs +67 -0
  118. pkboost-2.0.2/src/lib.rs +43 -0
  119. pkboost-2.0.2/src/living_booster.rs +585 -0
  120. pkboost-2.0.2/src/living_regressor.rs +768 -0
  121. pkboost-2.0.2/src/loss.rs +150 -0
  122. pkboost-2.0.2/src/metabolism.rs +115 -0
  123. pkboost-2.0.2/src/metrics.rs +137 -0
  124. pkboost-2.0.2/src/model.rs +619 -0
  125. pkboost-2.0.2/src/multiclass.rs +91 -0
  126. pkboost-2.0.2/src/optimized_data.rs +187 -0
  127. pkboost-2.0.2/src/partitioned_classifier.rs +506 -0
  128. pkboost-2.0.2/src/precision.rs +209 -0
  129. pkboost-2.0.2/src/python_bindings.rs +717 -0
  130. pkboost-2.0.2/src/regression.rs +400 -0
  131. pkboost-2.0.2/src/tree.rs +531 -0
  132. pkboost-2.0.2/src/tree_regression.rs +109 -0
  133. pkboost-2.0.2/temp/compare_baseline.py +68 -0
  134. pkboost-2.0.2/temp/compare_drift.py +74 -0
  135. pkboost-2.0.2/temp/compare_models.py +101 -0
  136. pkboost-2.0.2/test_drybean_drift_comparison.py +130 -0
  137. pkboost-2.0.2/test_multiple_runs.py +63 -0
  138. pkboost-2.0.2/three_way_comparison.py +253 -0
  139. pkboost-2.0.2/threeway_benchmark.py +310 -0
  140. pkboost-2.0.2/threeway_final.py +178 -0
  141. pkboost-2.0.2/visualize_multiclass_results.py +81 -0
@@ -0,0 +1,69 @@
1
+ # Rust
2
+ /target/
3
+ **/*.rs.bk
4
+ *.pdb
5
+ Cargo.lock
6
+
7
+ # IDE
8
+ .vscode/
9
+ .idea/
10
+ *.swp
11
+ *.swo
12
+ *~
13
+
14
+ # OS
15
+ .DS_Store
16
+ Thumbs.db
17
+
18
+ # Data files - exclude all datasets
19
+ data/*.csv
20
+ raw_data/*.csv
21
+ *.pkl
22
+ *.zip
23
+ *.xlsx
24
+
25
+ # Python
26
+ __pycache__/
27
+ *.py[cod]
28
+ *$py.class
29
+ .Python
30
+ *.so
31
+ .pytest_cache/
32
+ .ipynb_checkpoints/
33
+
34
+ # Logs
35
+ *.log
36
+
37
+ # Temporary files
38
+ *.tmp
39
+ *.temp
40
+
41
+ # Build artifacts
42
+ *.exe
43
+ *.dll
44
+ *.dylib
45
+
46
+ # Documentation build
47
+ /docs/_build/
48
+ /site/
49
+ FEATURE_LIST_PROGRESSIVE_PRECISION.md
50
+ .gitignore
51
+ NEXT_STEPS_COMPLETED.md
52
+ .gitignore
53
+ PROGRESSIVE_PRECISION_SUMMARY.md
54
+ PROGRESSIVE_PRECISION_RESULTS.md
55
+ three_way_comparison.csv
56
+ THREEWAY_COMPARISON_RESULTS.md
57
+ docs/PROGRESSIVE_PRECISION.md
58
+ temp/val.csv
59
+ temp/test_drift.csv
60
+ .gitignore
61
+ temp/train.csv
62
+ temp/test.csv
63
+ compare_16_drift_scenarios.py
64
+ rust_code_with_structure.txt
65
+ .gitignore
66
+ all_rust_code.txt
67
+ alb_metrics.csv
68
+
69
+ all_rust_code.txt
@@ -0,0 +1,87 @@
1
+ # PKBoost v2.0.2 Changelog
2
+
3
+ ## Release Date: November 2025
4
+
5
+ ## 🎯 Major Feature: Poisson Loss for Count Regression
6
+
7
+ ### New Capabilities
8
+ - **Poisson Regression**: Full support for count-based targets (Y ∈ {0, 1, 2, ...})
9
+ - **Log-Link Function**: Automatic exp() transformation for non-negative predictions
10
+ - **Newton-Raphson Integration**: Seamless fit into existing optimization framework
11
+
12
+ ### Performance
13
+ - **6.4% improvement** over MSE on synthetic Poisson data
14
+ - Optimized for insurance claims, purchase counts, event frequency modeling
15
+
16
+ ### API
17
+ ```rust
18
+ let mut model = PKBoostRegressor::auto(&x_train, &y_train)
19
+ .with_loss(RegressionLossType::Poisson);
20
+ model.fit(&x_train, &y_train, None, true)?;
21
+ let predictions = model.predict(&x_test)?;
22
+ ```
23
+
24
+ ### Files Added
25
+ - `src/loss.rs` - Unified loss module with Poisson, MSE, Huber
26
+ - `src/bin/test_poisson.rs` - Benchmark test for Poisson regression
27
+ - `POISSON_LOSS.md` - Complete documentation and usage guide
28
+
29
+ ### Technical Details
30
+ - Gradient: `exp(f) - y`
31
+ - Hessian: `exp(f)`
32
+ - Overflow prevention: Cap at 10^15
33
+ - Hessian stability: Min 1e-6
34
+
35
+ ## 🔧 Improvements
36
+
37
+ ### Loss Module Refactoring
38
+ - Consolidated loss functions into single module
39
+ - Added `OptimizedShannonLoss` for backward compatibility
40
+ - Unified gradient/hessian interface
41
+
42
+ ### Regression Enhancements
43
+ - `RegressionLossType` enum now includes Poisson
44
+ - `.with_loss()` builder method for easy loss selection
45
+ - Automatic prediction transformation based on loss type
46
+
47
+ ## 📊 Benchmark Results
48
+
49
+ **Synthetic Poisson Data** (5000 train, 1000 test):
50
+ ```
51
+ True model: λ = exp(0.5 + 0.3·x₁ + 0.7·x₂)
52
+
53
+ MSE Loss: RMSE 1.653, MAE 1.202
54
+ Poisson Loss: RMSE 1.548, MAE 1.143 (+6.4% improvement)
55
+ ```
56
+
57
+ ## 🐛 Bug Fixes
58
+ - None (new feature release)
59
+
60
+ ## 📚 Documentation
61
+ - Added comprehensive Poisson loss guide
62
+ - Mathematical foundation and derivations
63
+ - Usage examples and best practices
64
+ - When to use Poisson vs MSE vs Huber
65
+
66
+ ## 🔮 Future Roadmap
67
+ - Gamma Loss (continuous skewed data)
68
+ - Tweedie Loss (insurance pricing)
69
+ - Negative Binomial (overdispersed counts)
70
+
71
+ ## Breaking Changes
72
+ - None (fully backward compatible)
73
+
74
+ ## Migration Guide
75
+ No migration needed. Existing code continues to work. To use Poisson:
76
+ ```rust
77
+ // Old (still works)
78
+ let model = PKBoostRegressor::auto(&x, &y);
79
+
80
+ // New (Poisson)
81
+ let model = PKBoostRegressor::auto(&x, &y)
82
+ .with_loss(RegressionLossType::Poisson);
83
+ ```
84
+
85
+ ---
86
+
87
+ **Full Changelog**: v2.0.1...v2.0.2
@@ -0,0 +1,184 @@
1
+ # PKBoost v2.0 - Changelog
2
+
3
+ ## 🚀 Major Features Added
4
+
5
+ ### Multi-Class Classification
6
+ - **One-vs-Rest (OvR) Strategy**: Parallel training of N binary classifiers
7
+ - **Softmax Normalization**: Calibrated probability outputs
8
+ - **Per-Class Auto-Tuning**: Each binary task optimized independently
9
+ - **Real-World Validation**: 92.36% accuracy on Dry Bean dataset (7 classes)
10
+
11
+ ### Hierarchical Adaptive Boosting (HAB)
12
+ - **Partition-Based Ensemble**: K-means clustering for specialized regions
13
+ - **165x Faster Adaptation**: Selective retraining vs full model
14
+ - **SimSIMD Integration**: SIMD-accelerated distance calculations
15
+ - **Drift Detection**: Per-partition error monitoring with EMA
16
+ - **Selective Metamorphosis**: Retrain only drifted partitions
17
+
18
+ ### Advanced Drift Features
19
+ - **Drift Diagnostics**: Error entropy, temporal patterns, variance changes
20
+ - **Metamorphosis Strategies**: Conservative, DataAware, FeatureAware
21
+ - **Prediction Uncertainty**: Ensemble variance and confidence intervals
22
+ - **2-17x Better Resilience**: vs XGBoost/LightGBM under drift
23
+
24
+ ## 📊 Benchmark Results
25
+
26
+ ### Dry Bean Dataset (Real-World, 7 Classes)
27
+ | Model | Accuracy | Macro-F1 | Drift Resilience |
28
+ |-------|----------|----------|------------------|
29
+ | **PKBoost** | **92.36%** | **0.9360** | **-0.43%** degradation |
30
+ | LightGBM | 92.36% | 0.9352 | -0.55% degradation |
31
+ | XGBoost | 92.25% | 0.9347 | -0.91% degradation |
32
+
33
+ **Key Achievement**: PKBoost wins on Macro-F1 (best minority class detection) and is 2.1x more drift-resilient than XGBoost.
34
+
35
+ ### Credit Card Fraud (Binary, 0.17% positive)
36
+ | Model | PR-AUC | Drift Resilience |
37
+ |-------|--------|------------------|
38
+ | **PKBoost** | **0.878** | **-1.8%** degradation |
39
+ | LightGBM | 0.793 | -42.5% degradation |
40
+ | XGBoost | 0.745 | -31.8% degradation |
41
+
42
+ **Key Achievement**: 17.7x better drift resilience than XGBoost on extreme imbalance.
43
+
44
+ ## 🔧 Performance Optimizations
45
+
46
+ ### Core Model (32-46% Speedup)
47
+ - **Loop Unrolling**: 4x unroll in histogram building
48
+ - **Conditional Entropy**: Skip calculation at depth > 4
49
+ - **Smart Parallelism**: Only when n_features > 20 or n_samples > 5000
50
+ - **Result**: Per-tree time reduced from 19.4ms to 13.2ms
51
+
52
+ ### HAB Architecture
53
+ - **Parallel Specialist Training**: All classifiers train simultaneously
54
+ - **SIMD Distance Calculations**: 18% faster with SimSIMD
55
+ - **Batched Processing**: Memory-efficient for large datasets
56
+
57
+ ## 📚 Documentation
58
+
59
+ ### New Documents
60
+ - **FEATURES.md**: Complete feature list (45 features)
61
+ - **MULTICLASS.md**: Multi-class usage guide
62
+ - **MULTICLASS_BENCHMARK_RESULTS.md**: Detailed comparison
63
+ - **SHANNON_ANALYSIS.md**: Entropy impact analysis
64
+ - **DRYBEAN_DRIFT_RESULTS.md**: Drift resilience study
65
+ - **MULTICLASS_REALISTIC_RESULTS.md**: Honest assessment
66
+
67
+ ### Enhanced README
68
+ - Multi-class usage examples
69
+ - Decision guide flowchart
70
+ - Performance benchmarks
71
+ - Troubleshooting guide
72
+ - API quick reference
73
+
74
+ ## 🐛 Bug Fixes
75
+ - Fixed data leakage in synthetic multi-class dataset
76
+ - Removed unused imports and dead code warnings
77
+ - Fixed gradient explosion handling in Living Regressor
78
+ - Improved error handling in HAB metamorphosis
79
+
80
+ ## 🔄 API Changes
81
+
82
+ ### New Classes
83
+ ```rust
84
+ // Multi-class classification
85
+ MultiClassPKBoost::new(n_classes)
86
+
87
+ // Hierarchical Adaptive Boosting
88
+ PartitionedClassifier::new(config)
89
+ PartitionedClassifierBuilder::new()
90
+ ```
91
+
92
+ ### New Methods
93
+ ```rust
94
+ // Batched prediction for large datasets
95
+ model.predict_proba_batch(&x, batch_size)
96
+
97
+ // Uncertainty quantification
98
+ regressor.predict_with_uncertainty(&x)
99
+
100
+ // Drift detection
101
+ hab.observe_batch(&x, &y) // Returns drifted partitions
102
+ hab.metamorph_partitions(&partition_ids, &buffer_x, &buffer_y, verbose)
103
+ ```
104
+
105
+ ## 📈 Performance Summary
106
+
107
+ | Metric | v1.0 | v2.0 | Improvement |
108
+ |--------|------|------|-------------|
109
+ | Multi-Class Support | ❌ | ✅ | New feature |
110
+ | Drift Adaptation Speed | N/A | 165x faster | New feature |
111
+ | Core Model Speed | Baseline | +32-46% | Optimized |
112
+ | Macro-F1 (Imbalanced) | Good | **Best** | +5-7% vs competitors |
113
+ | Drift Resilience | Good | **2-17x better** | vs XGBoost/LightGBM |
114
+
115
+ ## 🎯 Use Cases
116
+
117
+ ### Perfect For:
118
+ - **Multi-class imbalanced problems** (fraud types, disease categories)
119
+ - **Production systems with drift** (real-time fraud detection)
120
+ - **Minority class critical** (medical diagnosis, anomaly detection)
121
+ - **Zero-tuning deployment** (auto-configuration)
122
+
123
+ ### New Capabilities:
124
+ - **7-class classification** with natural imbalance (Dry Bean: 26% to 3.8%)
125
+ - **Real-time adaptation** with 165x faster retraining (HAB)
126
+ - **Drift monitoring** with automatic detection and recovery
127
+ - **Uncertainty quantification** for confidence-aware predictions
128
+
129
+ ## 🔮 Future Roadmap
130
+
131
+ ### Planned for v2.1:
132
+ - [ ] SHAP-like values for interpretability
133
+ - [ ] Kolmogorov-Smirnov test for drift detection
134
+ - [ ] Platt scaling for probability calibration
135
+ - [ ] Comprehensive error types (PKBoostError enum)
136
+ - [ ] Serde support for model serialization
137
+
138
+ ### Under Consideration:
139
+ - [ ] GPU acceleration for histogram building
140
+ - [ ] Distributed training for massive datasets
141
+ - [ ] AutoML integration for hyperparameter search
142
+ - [ ] Python package (PyPI distribution)
143
+
144
+ ## 📝 Migration Guide (v1.0 → v2.0)
145
+
146
+ ### No Breaking Changes!
147
+ All v1.0 code continues to work. New features are additive.
148
+
149
+ ### To Use New Features:
150
+ ```rust
151
+ // Multi-class (new in v2.0)
152
+ use pkboost::MultiClassPKBoost;
153
+ let mut model = MultiClassPKBoost::new(n_classes);
154
+
155
+ // HAB (new in v2.0)
156
+ use pkboost::{PartitionedClassifier, PartitionConfig};
157
+ let mut hab = PartitionedClassifier::new(PartitionConfig::default());
158
+
159
+ // Batched prediction (new in v2.0)
160
+ let probs = model.predict_proba_batch(&x_test, 1000)?;
161
+ ```
162
+
163
+ ## 🙏 Acknowledgments
164
+
165
+ - **UCI Machine Learning Repository**: Dry Bean dataset
166
+ - **Kaggle**: Credit Card fraud dataset
167
+ - **SimSIMD**: SIMD-accelerated distance calculations
168
+ - **Rayon**: Parallel processing framework
169
+
170
+ ## 📊 Statistics
171
+
172
+ - **Total Features**: 45 (up from 30 in v1.0)
173
+ - **Lines of Code**: ~6,500+ (up from ~5,000)
174
+ - **Datasets Tested**: 12+ (including real-world)
175
+ - **Benchmark Scripts**: 20+
176
+ - **Documentation Pages**: 15+
177
+
178
+ ---
179
+
180
+ **PKBoost v2.0**: The most comprehensive gradient boosting library for imbalanced multi-class problems under drift.
181
+
182
+ **Release Date**: January 2025
183
+ **License**: MIT
184
+ **Author**: Pushp Kharat