dscience-tools 2.3.2__tar.gz → 2.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. dscience_tools-2.3.3/LICENSE +203 -0
  2. {dscience_tools-2.3.2/dscience_tools.egg-info → dscience_tools-2.3.3}/PKG-INFO +106 -47
  3. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/README.md +98 -42
  4. {dscience_tools-2.3.2 → dscience_tools-2.3.3/dscience_tools.egg-info}/PKG-INFO +106 -47
  5. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/dscience_tools.egg-info/SOURCES.txt +0 -1
  6. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/pyproject.toml +34 -4
  7. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/setup.cfg +7 -3
  8. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/src/distance.py +60 -50
  9. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/src/metrics.py +69 -55
  10. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_distance.py +65 -8
  11. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_metrics.py +36 -7
  12. dscience_tools-2.3.2/LICENSE +0 -71
  13. dscience_tools-2.3.2/LICENSE-NC.txt +0 -131
  14. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/MANIFEST.in +0 -0
  15. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/dscience_tools.egg-info/dependency_links.txt +0 -0
  16. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/dscience_tools.egg-info/requires.txt +0 -0
  17. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/dscience_tools.egg-info/top_level.txt +0 -0
  18. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/src/__init__.py +0 -0
  19. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/src/ds_tool.py +0 -0
  20. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/src/models.py +0 -0
  21. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_add_missing.py +0 -0
  22. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_alphanum.py +0 -0
  23. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_category_stats.py +0 -0
  24. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_chatterjee.py +0 -0
  25. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_check_ninf.py +0 -0
  26. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_compute_metrics.py +0 -0
  27. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_corr_matrix.py +0 -0
  28. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_describe_cat.py +0 -0
  29. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_describe_num.py +0 -0
  30. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_df_stats.py +0 -0
  31. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_distance_additional.py +0 -0
  32. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_distance_numba_cupy.py +0 -0
  33. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_entropy.py +0 -0
  34. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_evaluate_cls.py +0 -0
  35. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_function_list.py +0 -0
  36. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_generate_dist.py +0 -0
  37. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_generate_from_metrics.py +0 -0
  38. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_grubbs.py +0 -0
  39. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_kl_divergence.py +0 -0
  40. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_labeling.py +0 -0
  41. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_min_max.py +0 -0
  42. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_models.py +0 -0
  43. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_normality.py +0 -0
  44. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_outliers.py +0 -0
  45. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_plot_cm.py +0 -0
  46. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_sparse_calc.py +0 -0
  47. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_stationarity.py +0 -0
  48. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_trials_res_df.py +0 -0
  49. {dscience_tools-2.3.2 → dscience_tools-2.3.3}/tests/test_zip_io.py +0 -0
@@ -0,0 +1,203 @@
1
+ Copyright 2025
2
+ Sergii Kavun
3
+ Apache License
4
+ Version 2.0, January 2004
5
+ http://www.apache.org/licenses/
6
+
7
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
8
+
9
+ 1. Definitions.
10
+
11
+ "License" shall mean the terms and conditions for use, reproduction,
12
+ and distribution as defined by Sections 1 through 9 of this document.
13
+
14
+ "Licensor" shall mean the copyright owner or entity authorized by
15
+ the copyright owner that is granting the License.
16
+
17
+ "Legal Entity" shall mean the union of the acting entity and all
18
+ other entities that control, are controlled by, or are under common
19
+ control with that entity. For the purposes of this definition,
20
+ "control" means (i) the power, direct or indirect, to cause the
21
+ direction or management of such entity, whether by contract or
22
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
23
+ outstanding shares, or (iii) beneficial ownership of such entity.
24
+
25
+ "You" (or "Your") shall mean an individual or Legal Entity
26
+ exercising permissions granted by this License.
27
+
28
+ "Source" form shall mean the preferred form for making modifications,
29
+ including but not limited to software source code, documentation
30
+ source, and configuration files.
31
+
32
+ "Object" form shall mean any form resulting from mechanical
33
+ transformation or translation of a Source form, including but
34
+ not limited to compiled object code, generated documentation,
35
+ and conversions to other media types.
36
+
37
+ "Work" shall mean the work of authorship, whether in Source or
38
+ Object form, made available under the License, as indicated by a
39
+ copyright notice that is included in or attached to the work
40
+ (an example is provided in the Appendix below).
41
+
42
+ "Derivative Works" shall mean any work, whether in Source or Object
43
+ form, that is based on (or derived from) the Work and for which the
44
+ editorial revisions, annotations, elaborations, or other modifications
45
+ represent, as a whole, an original work of authorship. For the purposes
46
+ of this License, Derivative Works shall not include works that remain
47
+ separable from, or merely link (or bind by name) to the interfaces of,
48
+ the Work and Derivative Works thereof.
49
+
50
+ "Contribution" shall mean any work of authorship, including
51
+ the original version of the Work and any modifications or additions
52
+ to that Work or Derivative Works thereof, that is intentionally
53
+ submitted to Licensor for inclusion in the Work by the copyright owner
54
+ or by an individual or Legal Entity authorized to submit on behalf of
55
+ the copyright owner. For the purposes of this definition, "submitted"
56
+ means any form of electronic, verbal, or written communication sent
57
+ to the Licensor or its representatives, including but not limited to
58
+ communication on electronic mailing lists, source code control systems,
59
+ and issue tracking systems that are managed by, or on behalf of, the
60
+ Licensor for the purpose of discussing and improving the Work, but
61
+ excluding communication that is conspicuously marked or otherwise
62
+ designated in writing by the copyright owner as "Not a Contribution."
63
+
64
+ "Contributor" shall mean Licensor and any individual or Legal Entity
65
+ on behalf of whom a Contribution has been received by Licensor and
66
+ subsequently incorporated within the Work.
67
+
68
+ 2. Grant of Copyright License. Subject to the terms and conditions of
69
+ this License, each Contributor hereby grants to You a perpetual,
70
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
71
+ copyright license to reproduce, prepare Derivative Works of,
72
+ publicly display, publicly perform, sublicense, and distribute the
73
+ Work and such Derivative Works in Source or Object form.
74
+
75
+ 3. Grant of Patent License. Subject to the terms and conditions of
76
+ this License, each Contributor hereby grants to You a perpetual,
77
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
78
+ (except as stated in this section) patent license to make, have made,
79
+ use, offer to sell, sell, import, and otherwise transfer the Work,
80
+ where such license applies only to those patent claims licensable
81
+ by such Contributor that are necessarily infringed by their
82
+ Contribution(s) alone or by combination of their Contribution(s)
83
+ with the Work to which such Contribution(s) was submitted. If You
84
+ institute patent litigation against any entity (including a
85
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
86
+ or a Contribution incorporated within the Work constitutes direct
87
+ or contributory patent infringement, then any patent licenses
88
+ granted to You under this License for that Work shall terminate
89
+ as of the date such litigation is filed.
90
+
91
+ 4. Redistribution. You may reproduce and distribute copies of the
92
+ Work or Derivative Works thereof in any medium, with or without
93
+ modifications, and in Source or Object form, provided that You
94
+ meet the following conditions:
95
+
96
+ (a) You must give any other recipients of the Work or
97
+ Derivative Works a copy of this License; and
98
+
99
+ (b) You must cause any modified files to carry prominent notices
100
+ stating that You changed the files; and
101
+
102
+ (c) You must retain, in the Source form of any Derivative Works
103
+ that You distribute, all copyright, patent, trademark, and
104
+ attribution notices from the Source form of the Work,
105
+ excluding those notices that do not pertain to any part of
106
+ the Derivative Works; and
107
+
108
+ (d) If the Work includes a "NOTICE" text file as part of its
109
+ distribution, then any Derivative Works that You distribute must
110
+ include a readable copy of the attribution notices contained
111
+ within such NOTICE file, excluding those notices that do not
112
+ pertain to any part of the Derivative Works, in at least one
113
+ of the following places: within a NOTICE text file distributed
114
+ as part of the Derivative Works; within the Source form or
115
+ documentation, if provided along with the Derivative Works; or,
116
+ within a display generated by the Derivative Works, if and
117
+ wherever such third-party notices normally appear. The contents
118
+ of the NOTICE file are for informational purposes only and
119
+ do not modify the License. You may add Your own attribution
120
+ notices within Derivative Works that You distribute, alongside
121
+ or as an addendum to the NOTICE text from the Work, provided
122
+ that such additional attribution notices cannot be construed
123
+ as modifying the License.
124
+
125
+ You may add Your own copyright statement to Your modifications and
126
+ may provide additional or different license terms and conditions
127
+ for use, reproduction, or distribution of Your modifications, or
128
+ for any such Derivative Works as a whole, provided Your use,
129
+ reproduction, and distribution of the Work otherwise complies with
130
+ the conditions stated in this License.
131
+
132
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
133
+ any Contribution intentionally submitted for inclusion in the Work
134
+ by You to the Licensor shall be under the terms and conditions of
135
+ this License, without any additional terms or conditions.
136
+ Notwithstanding the above, nothing herein shall supersede or modify
137
+ the terms of any separate license agreement you may have executed
138
+ with Licensor regarding such Contributions.
139
+
140
+ 6. Trademarks. This License does not grant permission to use the trade
141
+ names, trademarks, service marks, or product names of the Licensor,
142
+ except as required for reasonable and customary use in describing the
143
+ origin of the Work and reproducing the content of the NOTICE file.
144
+
145
+ 7. Disclaimer of Warranty. Unless required by applicable law or
146
+ agreed to in writing, Licensor provides the Work (and each
147
+ Contributor provides its Contributions) on an "AS IS" BASIS,
148
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
149
+ implied, including, without limitation, any warranties or conditions
150
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
151
+ PARTICULAR PURPOSE. You are solely responsible for determining the
152
+ appropriateness of using or redistributing the Work and assume any
153
+ risks associated with Your exercise of permissions under this License.
154
+
155
+ 8. Limitation of Liability. In no event and under no legal theory,
156
+ whether in tort (including negligence), contract, or otherwise,
157
+ unless required by applicable law (such as deliberate and grossly
158
+ negligent acts) or agreed to in writing, shall any Contributor be
159
+ liable to You for damages, including any direct, indirect, special,
160
+ incidental, or consequential damages of any character arising as a
161
+ result of this License or out of the use or inability to use the
162
+ Work (including but not limited to damages for loss of goodwill,
163
+ work stoppage, computer failure or malfunction, or any and all
164
+ other commercial damages or losses), even if such Contributor
165
+ has been advised of the possibility of such damages.
166
+
167
+ 9. Accepting Warranty or Additional Liability. While redistributing
168
+ the Work or Derivative Works thereof, You may choose to offer,
169
+ and charge a fee for, acceptance of support, warranty, indemnity,
170
+ or other liability obligations and/or rights consistent with this
171
+ License. However, in accepting such obligations, You may act only
172
+ on Your own behalf and on Your sole responsibility, not on behalf
173
+ of any other Contributor, and only if You agree to indemnify,
174
+ defend, and hold each Contributor harmless for any liability
175
+ incurred by, or claims asserted against, such Contributor by reason
176
+ of your accepting any such warranty or additional liability.
177
+
178
+ END OF TERMS AND CONDITIONS
179
+
180
+ APPENDIX: How to apply the Apache License to your work.
181
+
182
+ To apply the Apache License to your work, attach the following
183
+ boilerplate notice, with the fields enclosed by brackets "[]"
184
+ replaced with your own identifying information. (Don't include
185
+ the brackets!) The text should be enclosed in the appropriate
186
+ comment syntax for the file format. We also recommend that a
187
+ file or class name and description of purpose be included on the
188
+ same "printed page" as the copyright notice for easier
189
+ identification within third-party archives.
190
+
191
+ Copyright [yyyy] [name of copyright owner]
192
+
193
+ Licensed under the Apache License, Version 2.0 (the "License");
194
+ you may not use this file except in compliance with the License.
195
+ You may obtain a copy of the License at
196
+
197
+ http://www.apache.org/licenses/LICENSE-2.0
198
+
199
+ Unless required by applicable law or agreed to in writing, software
200
+ distributed under the License is distributed on an "AS IS" BASIS,
201
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202
+ See the License for the specific language governing permissions and
203
+ limitations under the License.
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dscience_tools
3
- Version: 2.3.2
3
+ Version: 2.3.3
4
4
  Summary: DSTools: Data Science Tools Library
5
5
  Home-page: https://github.com/s-kav/ds_tools
6
6
  Author: Sergii Kavun
7
7
  Author-email: Sergii Kavun <kavserg@gmail.com>
8
8
  Maintainer-email: Sergii Kavun <kavserg@gmail.com>
9
- License: PolyForm-Noncommercial-1.0.0 OR Commercial
9
+ License: Apache License 2.0
10
10
  Project-URL: Homepage, https://github.com/s-kav/ds_tools
11
11
  Project-URL: Documentation, https://s-kav.github.io/ds_tools/
12
12
  Project-URL: Repository, https://github.com/s-kav/ds_tools.git
@@ -20,14 +20,17 @@ Classifier: Operating System :: OS Independent
20
20
  Classifier: Programming Language :: Python
21
21
  Classifier: Programming Language :: Python :: 3
22
22
  Classifier: Programming Language :: Python :: 3 :: Only
23
- Classifier: Programming Language :: Python :: 3.6
23
+ Classifier: Programming Language :: Python :: 3.9
24
+ Classifier: Programming Language :: Python :: 3.10
25
+ Classifier: Programming Language :: Python :: 3.11
26
+ Classifier: Programming Language :: Python :: 3.12
24
27
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
28
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
26
29
  Classifier: Topic :: Scientific/Engineering :: Mathematics
27
- Requires-Python: >=3.6.0
30
+ Classifier: License :: OSI Approved :: Apache Software License
31
+ Requires-Python: >=3.9.0
28
32
  Description-Content-Type: text/markdown
29
33
  License-File: LICENSE
30
- License-File: LICENSE-NC.txt
31
34
  Requires-Dist: pandas<=2.2.3,>=0.25.0
32
35
  Requires-Dist: numpy<=2.2.0,>=1.22.0
33
36
  Provides-Extra: test
@@ -47,7 +50,7 @@ Dynamic: license-file
47
50
  [![Tests](https://img.shields.io/github/actions/workflow/status/s-kav/ds_tools/python-publish.yml?label=Tests&color=darkgreen&style=flat)](https://github.com/s-kav/ds_tools/actions)
48
51
  [![PyPI version](https://img.shields.io/pypi/v/dscience-tools.svg)](https://pypi.org/project/dscience-tools/)
49
52
  [![codecov](https://codecov.io/gh/s-kav/ds_tools/branch/main/graph/badge.svg)](https://codecov.io/gh/s-kav/ds_tools)
50
- [![License: PolyForm Non-Commercial 1.0.0](https://img.shields.io/badge/License-PolyForm%20Non--Commercial-blue.svg)](https://polyformproject.org/licenses/noncommercial/1.0.0/)
53
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
51
54
  [![PyPI Downloads](https://static.pepy.tech/badge/dscience-tools)](https://pepy.tech/projects/dscience-tools)
52
55
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
53
56
  [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-darkgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
@@ -73,17 +76,24 @@ Dynamic: license-file
73
76
  * [License](#license)
74
77
 
75
78
 
76
- # Short intro
79
+ # Short intro: DSTools — drop-in replacement for scattered scipy/sklearn utils
77
80
 
78
- **DSTools** is a Python library designed to assist data scientists and researchers by providing a collection of helpful functions for various stages of a data science project, from data exploration and preprocessing to model evaluation and synthetic data generation.
81
+ **10x faster** distance calculations via Numba/GPU | **Unified API** for metrics + distances | Cohen's d, FFT, Mahalanobis in one import
79
82
 
80
- The library is built upon the author's extensive multi-decade experience (30+ years) in data science, statistical modeling, and enterprise software development. Drawing from real-world challenges encountered across diverse industries including finance, banking, healthcare, insurance, and e-commerce, this toolkit addresses common pain points that practitioners face daily in their analytical workflows.
81
-
82
- The development philosophy emphasizes practical utility over theoretical complexity, incorporating battle-tested patterns and methodologies that have proven effective in production environments. Each function and module reflects lessons learned from managing large-scale data projects, optimizing computational performance, and ensuring code maintainability in collaborative team settings.
83
+ ```bash
84
+ pip install dscience-tools
83
85
 
84
- The library encapsulates best practices developed through years of consulting work, academic research collaborations, and hands-on problem-solving in high-stakes business environments. It represents a distillation of proven techniques, streamlined workflows, and robust error-handling approaches that have evolved through countless iterations and real-world applications.
86
+ from ds_tools import DSTools
87
+ tools = DSTools()
88
+ tools.metrics.cohens_d(group_a, group_b) # numpy/numba/cupy auto-dispatch
89
+ ```
85
90
 
86
- This comprehensive toolkit serves as a bridge between theoretical data science concepts and practical implementation needs, offering developers and researchers a reliable foundation built on decades of field-tested expertise and continuous refinement based on community feedback and emerging industry requirements. This library with helper functions to accelerate and simplify various stages of the data science research cycle.
91
+ | Feature | scipy | sklearn | DSTools |
92
+ | :--- | :---: | :---: | :---: |
93
+ | GPU acceleration | ❌ | ❌ | ✅ |
94
+ | Auto backend dispatch | ❌ | ❌ | ✅ |
95
+ | Cohen's d | ❌ | ❌ | ✅ |
96
+ | FFT + metrics in one API | ❌ | ❌ | ✅ |
87
97
 
88
98
  This toolkit is built on top of popular libraries like Pandas, Polars, Scikit-learn, Optuna, and Matplotlib, providing a higher-level API for common tasks in Exploratory Data Analysis (EDA), feature preprocessing, model evaluation, and synthetic data generation. It is designed for data scientists, analysts, and researchers who want to write cleaner, more efficient, and more reproducible code.
89
99
 
@@ -103,14 +113,9 @@ This toolkit is built on top of popular libraries like Pandas, Polars, Scikit-le
103
113
  - **Advanced Statistics:** Calculate non-parametric correlation (`chatterjee_correlation`), entropy, and KL-divergence.
104
114
  - **Utilities:** Save/load DataFrames to/from ZIP archives, generate random alphanumeric codes, and more.
105
115
 
106
- ## What's New in Version 2.0.0
107
-
108
- This version marks a major architectural refactoring of the library, focusing on modularity, performance, and advanced ML features.
116
+ ## What's New in Version X.X.X
109
117
 
110
- - **✨ Modular Design:** The toolkit is now re-organized into logical namespaces. Instead of a single flat API, you now access functionality through `tools.metrics`, `tools.distance`, etc.
111
- - **🚀 High-Performance Backends:** Major functions in `metrics` and `distance` now automatically leverage **GPU acceleration (CuPy)** and **parallel CPU execution (Numba)** for significant speedups on large datasets.
112
- - **🤖 Gradient Calculation:** Key loss functions (like `mse`, `mae`, `huber_loss`) can now return their gradients (`return_grad=True`), making them suitable for custom training loops in ML frameworks.
113
- - **📈 Training Monitoring:** A new real-time monitoring system has been added to the `metrics` module to track and plot metrics during model training.
118
+ See [CHANGELOG.md](/CHANGELOG.md).
114
119
 
115
120
  ## TODO & Future Plans
116
121
 
@@ -118,17 +123,17 @@ This library is actively maintained and will be expanded to cover more aspects o
118
123
 
119
124
  Here is the development roadmap:
120
125
 
121
- - [ ] **Expand Core Modules:**
122
- - [ ] Add more loss and another functions and metrics to `tools.metrics` (e.g. for classification, clusterization, etc.).
123
- - [ ] Implement more distance measures in `tools.distance` (e.g., Levenshtein for strings, Silhouette, etc.).
126
+ - [X] **Expand Core Modules:**
127
+ - [X] Add more loss and another functions and metrics to `tools.metrics` (e.g. for classification, clusterization, etc.).
128
+ - [X] Implement more distance measures in `tools.distance` (e.g., Levenshtein for strings, Silhouette, etc.).
124
129
  - [ ] **New `Preprocessing` Module:**
125
130
  - [ ] Develop high-performance feature scaling and encoding functions.
126
131
  - [ ] Add utilities for handling time-series data.
127
132
  - [ ] **New `Visualization` Module:**
128
133
  - [ ] Create simple wrappers around Matplotlib/Seaborn for common plots (e.g., feature distribution, ROC curves).
129
- - [ ] **Community & Contributions:**
130
- - [ ] Improve documentation with more examples.
131
- - [ ] Create contribution guidelines (`CONTRIBUTING.md`).
134
+ - [X] **Community & Contributions:**
135
+ - [X] Improve documentation with more examples.
136
+ - [X] Create contribution guidelines ([CONTRIBUTING.md](/CONTRIBUTING.md)).
132
137
 
133
138
  Your feature requests and contributions are highly encouraged! Please open an issue to suggest a new function.
134
139
 
@@ -157,7 +162,7 @@ cd ds_tools
157
162
 
158
163
  ## Install Dependencies
159
164
 
160
- Ensure you have Python version 3.8 or higher and install the required packages:
165
+ Ensure you have Python version 3.9+ or higher and install the required packages:
161
166
 
162
167
  ```bash
163
168
  pip install -r requirements.txt
@@ -166,14 +171,22 @@ pip install -r requirements.txt
166
171
 
167
172
  # Function Overview
168
173
 
169
- The library provides a wide range of functions. To see a full, formatted list of available tools, you can use the function_list method:
174
+ The library provides a wide range of functions. To see a full, formatted list of available tools, metrics and distances, you can use the appropriate list methods:
170
175
 
171
176
  ```python
172
177
 
173
178
  from ds_tools import DSTools
179
+ import pandas as pd
174
180
 
175
181
  tools = DSTools()
176
- tools.function_list()
182
+ print('\n', '='*30, ' Functions ', '='*30, '\n')
183
+ display(pd.DataFrame(tools.function_list()))
184
+
185
+ print('\n', '='*30, ' Distances ', '='*30, '\n')
186
+ display(tools.distance.list_distances().iloc[:, 0:2])
187
+
188
+ print('\n', '='*30, ' Metrics ', '='*30, '\n')
189
+ display(tools.metrics.list_metrics().iloc[:, 0:2])
177
190
 
178
191
  ```
179
192
 
@@ -189,7 +202,7 @@ Calculate Mean Absolute Error and its gradient. The best backend (GPU/Numba/NumP
189
202
 
190
203
  import numpy as np
191
204
 
192
- y_true = np.array()
205
+ y_true = np.array([1.0, 2.0, 3.0, 4.0])
193
206
  y_pred = np.array([1.1, 2.2, 2.8, 4.3])
194
207
 
195
208
  # Calculate only the loss value
@@ -204,8 +217,8 @@ print(f"Gradient: {grad}")
204
217
  ## Using the Distance Module
205
218
 
206
219
  ```python
207
- u = np.array()
208
- v = np.array()
220
+ u = np.random.rand(4)
221
+ v = np.random.rand(4)
209
222
 
210
223
  euc_dist = tools.distance.euclidean(u, v)
211
224
  print(f"Euclidean Distance: {euc_dist:.4f}")
@@ -218,7 +231,7 @@ print(f"Euclidean Distance: {euc_dist:.4f}")
218
231
  tools.metrics.start_monitoring()
219
232
 
220
233
  # 2. Simulate training loop
221
- for epoch in range(10):
234
+ for epoch in range(100):
222
235
  # Dummy loss values
223
236
  loss = 1 / (epoch + 1)
224
237
  val_loss = 1.2 / (epoch + 1) + np.random.rand() * 0.1
@@ -247,7 +260,7 @@ tools = DSTools()
247
260
 
248
261
  # 2. Generate some dummy data
249
262
  y_true = np.array([0, 1, 1, 0, 1, 0, 0, 1])
250
- y_probs = np.array([0.1, 0.8, 0.6, 0.3, 0.9, 0.2, 0.4, 0.7])
263
+ y_probs = np.array([0.1, 0.8, 0.6, 0.7, 0.4, 0.2, 0.4, 0.7])
251
264
 
252
265
  # 3. Get a comprehensive evaluation report
253
266
  # This will print metrics and show plots for ROC and Precision-Recall curves.
@@ -258,6 +271,30 @@ print(f"\nROC AUC Score: {results['roc_auc']:.4f}")
258
271
 
259
272
  ```
260
273
 
274
+ ## Calculate Cohen's distance and fast compute FFT.
275
+
276
+ ```python
277
+
278
+ import numpy as np
279
+ from ds_tools import DSTools
280
+
281
+ tools = DSTools()
282
+
283
+ # 1. Calculate Cohen's distance
284
+ group_a = np.random.normal(10, 2, 5000)
285
+ group_b = np.random.normal(12, 2, 5000)
286
+ effect_size = tools.metrics.cohens_d(group_a, group_b)
287
+ print(f"Effect Size: {effect_size}")
288
+
289
+ # 2. Compute FFT using Numba engine
290
+ signal = np.random.random(1000)
291
+
292
+ # Automatically pads to 1024, or to 128 at 100 as input
293
+ spectrum = tools.metrics.fft(signal, engine='numba')
294
+ print(f"Spectrum shape: {len(spectrum)}")
295
+
296
+ ```
297
+
261
298
  This will produce:
262
299
  - A detailed printout of key metrics (Accuracy, ROC AUC, Average Precision, etc.).
263
300
  - A full classification report.
@@ -315,12 +352,18 @@ print(f"Generated Std: {np.std(generated_data):.2f}")
315
352
  Visualize the relationships in your data with a highly customizable correlation matrix.
316
353
 
317
354
  ```python
318
- # --- Sample Data ---
355
+ import numpy as np
356
+ import pandas as pd
357
+ from ds_tools import DSTools, CorrelationConfig
358
+
359
+ tools = DSTools()
360
+
319
361
  data = {
320
362
  'feature_a': np.random.rand(100) * 100,
321
363
  'feature_b': np.random.rand(100) * 50 + 25,
322
364
  'feature_c': np.random.rand(100) * -80,
323
365
  }
366
+
324
367
  df = pd.DataFrame(data)
325
368
  df['feature_d'] = df['feature_a'] * 1.5 + np.random.normal(0, 10, 100)
326
369
 
@@ -343,7 +386,11 @@ This will display a publication-quality heatmap, masked to show only the lower t
343
386
  Quickly understand the distribution of your categorical features.
344
387
 
345
388
  ```python
346
- # --- Sample Data ---
389
+ import pandas as pd
390
+ from ds_tools import DSTools, CorrelationConfig
391
+
392
+ tools = DSTools()
393
+
347
394
  data = {
348
395
  'city': ['London', 'Paris', 'London', 'New York', 'Paris', 'London'],
349
396
  'status': ['Active', 'Inactive', 'Active', 'Active', 'Inactive', 'Active']
@@ -368,14 +415,21 @@ Helps to plot confusion matrix in graphical kind, especially for calssification
368
415
 
369
416
 
370
417
  ```python
418
+ import numpy as np
419
+ from ds_tools import DSTools, CorrelationConfig
420
+
421
+ tools = DSTools()
422
+
371
423
  np.random.seed(42)
372
424
  N_SAMPLES = 1500
425
+ N_CLASSES = 3
373
426
 
374
- y_true_multi = np.random.randint(0, 3, size=N_SAMPLES)
427
+ random_errors = np.random.randint(1, N_CLASSES, size=N_SAMPLES)
428
+ y_true_multi = np.random.randint(0, N_CLASSES, size=N_SAMPLES)
375
429
  correct_preds = np.random.rand(N_SAMPLES) < 0.75
376
- y_pred_multi = np.where(correct_preds, y_true_multi, (y_true_multi + random_errors) % 3)
430
+ y_pred_multi = np.where(correct_preds, y_true_multi, (y_true_multi + random_errors) % N_CLASSES)
377
431
 
378
- plot_confusion_matrix(
432
+ tools.plot_confusion_matrix(
379
433
  y_true_multi,
380
434
  y_pred_multi,
381
435
  class_labels=['Cat', 'Dog', 'Bird'],
@@ -438,7 +492,7 @@ A high-performance toolkit for calculating distances and similarities.
438
492
 
439
493
  # Contributing
440
494
 
441
- See [CONTRIBUTING](/CONTRIBUTING.md)
495
+ See [CONTRIBUTING](/CONTRIBUTING.md) and [CONTRIBUTOR_LICENSE_AGREEMENT](/CONTRIBUTOR_LICENSE_AGREEMENT.md).
442
496
 
443
497
  # TODO
444
498
  1. Add some important kind of plots:
@@ -461,24 +515,29 @@ See [CONTRIBUTING](/CONTRIBUTING.md)
461
515
 
462
516
  📌 Elbow Curve – Helps choose the right number of clusters in K-Means.
463
517
 
464
- 2. Implement Fast Fourier Transform (FFT) algorithm and Shannon’s interpolation formula: done, 2.3.0.
518
+ 2. Implement Fast Fourier Transform (FFT) algorithm and Shannon’s interpolation formula: done, version 2.3.2.
465
519
 
466
- 3. Add some fast distance metrics (expand of existed): done, 2.3.0.
520
+ 3. Add some fast distance metrics (expand of existed): done, version 2.3.2.
467
521
 
468
522
 
469
523
  # References
470
524
 
471
525
  For citing you should use:
472
526
 
473
- Sergii Kavun. (2025). s-kav/ds_tools: Version 2.0.0 (v.2.0.0). Zenodo. https://doi.org/10.5281/zenodo.17080822
527
+ Sergii Kavun. (2025). s-kav/ds_tools: Version 2.3.2 (v.2.3.2). Zenodo. https://doi.org/10.5281/zenodo.17080822
474
528
 
475
529
  [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.17080822.svg)](https://doi.org/10.5281/zenodo.17080822)
476
530
 
477
531
 
478
532
  # License
479
- This project uses **dual licensing**:
480
533
 
481
- - 🎓 **Free for Academic & Research**: PolyForm Noncommercial 1.0.0
482
- - 💼 **Commercial License Available**: Contact us for business use [License](https://github.com/s-kav/ds_tools/blob/main/CLA.md)
534
+ This project is licensed under the **Apache License 2.0**.
535
+
536
+ You are free to use, modify, and distribute this software in both commercial
537
+ and non-commercial projects.
538
+
539
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
540
+
541
+ ✨ Thanks for visiting DSTools: Data Science Research Toolkit!
483
542
 
484
- [📋 Full License Details](https://github.com/s-kav/ds_tools/blob/main/LICENSE) | [💰 Get Commercial License](mailto:kavserg@gmail.com)
543
+ ![visitors](https://visitor-badge.laobi.icu/badge?page_id=s-kav.ds_tools)