dscience-tools 2.0.1__tar.gz → 2.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {dscience_tools-2.0.1/dscience_tools.egg-info → dscience_tools-2.3.0}/PKG-INFO +26 -5
  2. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/README.md +24 -3
  3. {dscience_tools-2.0.1 → dscience_tools-2.3.0/dscience_tools.egg-info}/PKG-INFO +26 -5
  4. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/dscience_tools.egg-info/SOURCES.txt +2 -0
  5. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/dscience_tools.egg-info/requires.txt +1 -1
  6. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/pyproject.toml +2 -2
  7. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/setup.cfg +9 -5
  8. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/src/__init__.py +1 -1
  9. dscience_tools-2.3.0/src/distance.py +1018 -0
  10. dscience_tools-2.3.0/src/metrics.py +1499 -0
  11. dscience_tools-2.3.0/tests/test_distance.py +949 -0
  12. dscience_tools-2.3.0/tests/test_distance_additional.py +118 -0
  13. dscience_tools-2.3.0/tests/test_distance_numba_cupy.py +181 -0
  14. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_metrics.py +258 -13
  15. dscience_tools-2.0.1/src/distance.py +0 -500
  16. dscience_tools-2.0.1/src/metrics.py +0 -649
  17. dscience_tools-2.0.1/tests/test_distance.py +0 -322
  18. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/LICENSE +0 -0
  19. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/LICENSE-NC.txt +0 -0
  20. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/MANIFEST.in +0 -0
  21. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/dscience_tools.egg-info/dependency_links.txt +0 -0
  22. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/dscience_tools.egg-info/top_level.txt +0 -0
  23. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/src/ds_tool.py +0 -0
  24. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/src/models.py +0 -0
  25. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_add_missing.py +0 -0
  26. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_alphanum.py +0 -0
  27. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_category_stats.py +0 -0
  28. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_chatterjee.py +0 -0
  29. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_check_ninf.py +0 -0
  30. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_compute_metrics.py +0 -0
  31. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_corr_matrix.py +0 -0
  32. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_describe_cat.py +0 -0
  33. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_describe_num.py +0 -0
  34. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_df_stats.py +0 -0
  35. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_entropy.py +0 -0
  36. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_evaluate_cls.py +0 -0
  37. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_function_list.py +0 -0
  38. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_generate_dist.py +0 -0
  39. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_generate_from_metrics.py +0 -0
  40. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_grubbs.py +0 -0
  41. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_kl_divergence.py +0 -0
  42. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_labeling.py +0 -0
  43. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_min_max.py +0 -0
  44. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_models.py +0 -0
  45. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_normality.py +0 -0
  46. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_outliers.py +0 -0
  47. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_plot_cm.py +0 -0
  48. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_sparse_calc.py +0 -0
  49. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_stationarity.py +0 -0
  50. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_trials_res_df.py +0 -0
  51. {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_zip_io.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dscience_tools
3
- Version: 2.0.1
3
+ Version: 2.3.0
4
4
  Summary: DSTools: Data Science Tools Library
5
5
  Home-page: https://github.com/s-kav/ds_tools
6
6
  Author: Sergii Kavun
@@ -35,7 +35,7 @@ Requires-Dist: pytest; extra == "test"
35
35
  Requires-Dist: pytest-cov; extra == "test"
36
36
  Requires-Dist: pytest-mock; extra == "test"
37
37
  Provides-Extra: performance
38
- Requires-Dist: cupy-cuda12x; extra == "performance"
38
+ Requires-Dist: cupy-cuda13x; extra == "performance"
39
39
  Requires-Dist: numba; extra == "performance"
40
40
  Provides-Extra: lint
41
41
  Requires-Dist: black==25.1.0; extra == "lint"
@@ -44,13 +44,18 @@ Dynamic: license-file
44
44
 
45
45
  # DSTools: Data Science Research Toolkit
46
46
 
47
- [![Tests](https://github.com/s-kav/ds_tools/actions/workflows/python-publish.yml/badge.svg)](https://github.com/s-kav/ds_tools/actions)
47
+ [![Tests](https://img.shields.io/github/actions/workflow/status/s-kav/ds_tools/python-publish.yml?label=Tests&color=darkgreen&style=flat)](https://github.com/s-kav/ds_tools/actions)
48
48
  [![PyPI version](https://img.shields.io/pypi/v/dscience-tools.svg)](https://pypi.org/project/dscience-tools/)
49
49
  [![codecov](https://codecov.io/gh/s-kav/ds_tools/branch/main/graph/badge.svg)](https://codecov.io/gh/s-kav/ds_tools)
50
50
  [![License: PolyForm Non-Commercial 1.0.0](https://img.shields.io/badge/License-PolyForm%20Non--Commercial-blue.svg)](https://polyformproject.org/licenses/noncommercial/1.0.0/)
51
51
  [![PyPI Downloads](https://static.pepy.tech/badge/dscience-tools)](https://pepy.tech/projects/dscience-tools)
52
52
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
53
- [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
53
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-darkgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
54
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
55
+ [![PEP8](https://img.shields.io/badge/code%20style-pep8-orange.svg)](https://www.python.org/dev/peps/pep-0008/)
56
+ ![PyPI - Implementation](https://img.shields.io/pypi/implementation/dscience-tools)
57
+ [![Supported Python Versions](https://img.shields.io/badge/Python-3.9%20|%203.10%20|%203.11%20|%203.12-3776AB?style=flat&logo=python&logoColor=white)](https://www.python.org)
58
+ [![Supports GPU and CPU](https://img.shields.io/badge/GPU%2FCPU-Supported-darkgreen?style=flat&logo=nvidia&logoColor=white)](https://pytorch.org/)
54
59
 
55
60
 
56
61
  # Table of Contents
@@ -63,6 +68,7 @@ Dynamic: license-file
63
68
  * [Available Tools](#available-tools)
64
69
  * [Authors](#authors)
65
70
  * [Contributing](#contributing)
71
+ * [TODO](#todo)
66
72
  * [References](#references)
67
73
  * [License](#license)
68
74
 
@@ -432,6 +438,22 @@ A high-performance toolkit for calculating distances and similarities.
432
438
 
433
439
  See [CONTRIBUTING](/CONTRIBUTING.md)
434
440
 
441
+ # TODO
442
+ 1. Add some important kind of plots:
443
+ 📌 KS Plot – Measures how well your model separates positive and negative classes.
444
+ 📌 SHAP Plot – Explains feature impact and model interpretability.
445
+ 📌 QQ Plot – Checks if your data follows a theoretical distribution.
446
+ 📌 Cumulative Explained Variance – Helps decide the optimal number of PCA components.
447
+ 📌 Gini vs Entropy – Key metrics for understanding decision tree impurity.
448
+ 📌 Bias–Variance Tradeoff – Shows the balance between underfitting and overfitting.
449
+ 📌 ROC Curve – Evaluates classification performance across thresholds.
450
+ 📌 Precision–Recall Curve – Crucial for imbalanced datasets.
451
+ 📌 Elbow Curve – Helps choose the right number of clusters in K-Means.
452
+
453
+ 2. Implement Fast Fourier Transform (FFT) algorithm and Shannon’s interpolation formula
454
+
455
+ 3. Add some fast distance metrics (expand of existed).
456
+
435
457
 
436
458
  # References
437
459
 
@@ -449,4 +471,3 @@ This project uses **dual licensing**:
449
471
  - 💼 **Commercial License Available**: Contact us for business use [License](https://github.com/s-kav/ds_tools/blob/main/CLA.md)
450
472
 
451
473
  [📋 Full License Details](https://github.com/s-kav/ds_tools/blob/main/LICENSE) | [💰 Get Commercial License](mailto:kavserg@gmail.com)
452
-
@@ -1,12 +1,17 @@
1
1
  # DSTools: Data Science Research Toolkit
2
2
 
3
- [![Tests](https://github.com/s-kav/ds_tools/actions/workflows/python-publish.yml/badge.svg)](https://github.com/s-kav/ds_tools/actions)
3
+ [![Tests](https://img.shields.io/github/actions/workflow/status/s-kav/ds_tools/python-publish.yml?label=Tests&color=darkgreen&style=flat)](https://github.com/s-kav/ds_tools/actions)
4
4
  [![PyPI version](https://img.shields.io/pypi/v/dscience-tools.svg)](https://pypi.org/project/dscience-tools/)
5
5
  [![codecov](https://codecov.io/gh/s-kav/ds_tools/branch/main/graph/badge.svg)](https://codecov.io/gh/s-kav/ds_tools)
6
6
  [![License: PolyForm Non-Commercial 1.0.0](https://img.shields.io/badge/License-PolyForm%20Non--Commercial-blue.svg)](https://polyformproject.org/licenses/noncommercial/1.0.0/)
7
7
  [![PyPI Downloads](https://static.pepy.tech/badge/dscience-tools)](https://pepy.tech/projects/dscience-tools)
8
8
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
9
- [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
9
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-darkgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
10
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
11
+ [![PEP8](https://img.shields.io/badge/code%20style-pep8-orange.svg)](https://www.python.org/dev/peps/pep-0008/)
12
+ ![PyPI - Implementation](https://img.shields.io/pypi/implementation/dscience-tools)
13
+ [![Supported Python Versions](https://img.shields.io/badge/Python-3.9%20|%203.10%20|%203.11%20|%203.12-3776AB?style=flat&logo=python&logoColor=white)](https://www.python.org)
14
+ [![Supports GPU and CPU](https://img.shields.io/badge/GPU%2FCPU-Supported-darkgreen?style=flat&logo=nvidia&logoColor=white)](https://pytorch.org/)
10
15
 
11
16
 
12
17
  # Table of Contents
@@ -19,6 +24,7 @@
19
24
  * [Available Tools](#available-tools)
20
25
  * [Authors](#authors)
21
26
  * [Contributing](#contributing)
27
+ * [TODO](#todo)
22
28
  * [References](#references)
23
29
  * [License](#license)
24
30
 
@@ -388,6 +394,22 @@ A high-performance toolkit for calculating distances and similarities.
388
394
 
389
395
  See [CONTRIBUTING](/CONTRIBUTING.md)
390
396
 
397
+ # TODO
398
+ 1. Add some important kind of plots:
399
+ 📌 KS Plot – Measures how well your model separates positive and negative classes.
400
+ 📌 SHAP Plot – Explains feature impact and model interpretability.
401
+ 📌 QQ Plot – Checks if your data follows a theoretical distribution.
402
+ 📌 Cumulative Explained Variance – Helps decide the optimal number of PCA components.
403
+ 📌 Gini vs Entropy – Key metrics for understanding decision tree impurity.
404
+ 📌 Bias–Variance Tradeoff – Shows the balance between underfitting and overfitting.
405
+ 📌 ROC Curve – Evaluates classification performance across thresholds.
406
+ 📌 Precision–Recall Curve – Crucial for imbalanced datasets.
407
+ 📌 Elbow Curve – Helps choose the right number of clusters in K-Means.
408
+
409
+ 2. Implement Fast Fourier Transform (FFT) algorithm and Shannon’s interpolation formula
410
+
411
+ 3. Add some fast distance metrics (expand of existed).
412
+
391
413
 
392
414
  # References
393
415
 
@@ -405,4 +427,3 @@ This project uses **dual licensing**:
405
427
  - 💼 **Commercial License Available**: Contact us for business use [License](https://github.com/s-kav/ds_tools/blob/main/CLA.md)
406
428
 
407
429
  [📋 Full License Details](https://github.com/s-kav/ds_tools/blob/main/LICENSE) | [💰 Get Commercial License](mailto:kavserg@gmail.com)
408
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dscience_tools
3
- Version: 2.0.1
3
+ Version: 2.3.0
4
4
  Summary: DSTools: Data Science Tools Library
5
5
  Home-page: https://github.com/s-kav/ds_tools
6
6
  Author: Sergii Kavun
@@ -35,7 +35,7 @@ Requires-Dist: pytest; extra == "test"
35
35
  Requires-Dist: pytest-cov; extra == "test"
36
36
  Requires-Dist: pytest-mock; extra == "test"
37
37
  Provides-Extra: performance
38
- Requires-Dist: cupy-cuda12x; extra == "performance"
38
+ Requires-Dist: cupy-cuda13x; extra == "performance"
39
39
  Requires-Dist: numba; extra == "performance"
40
40
  Provides-Extra: lint
41
41
  Requires-Dist: black==25.1.0; extra == "lint"
@@ -44,13 +44,18 @@ Dynamic: license-file
44
44
 
45
45
  # DSTools: Data Science Research Toolkit
46
46
 
47
- [![Tests](https://github.com/s-kav/ds_tools/actions/workflows/python-publish.yml/badge.svg)](https://github.com/s-kav/ds_tools/actions)
47
+ [![Tests](https://img.shields.io/github/actions/workflow/status/s-kav/ds_tools/python-publish.yml?label=Tests&color=darkgreen&style=flat)](https://github.com/s-kav/ds_tools/actions)
48
48
  [![PyPI version](https://img.shields.io/pypi/v/dscience-tools.svg)](https://pypi.org/project/dscience-tools/)
49
49
  [![codecov](https://codecov.io/gh/s-kav/ds_tools/branch/main/graph/badge.svg)](https://codecov.io/gh/s-kav/ds_tools)
50
50
  [![License: PolyForm Non-Commercial 1.0.0](https://img.shields.io/badge/License-PolyForm%20Non--Commercial-blue.svg)](https://polyformproject.org/licenses/noncommercial/1.0.0/)
51
51
  [![PyPI Downloads](https://static.pepy.tech/badge/dscience-tools)](https://pepy.tech/projects/dscience-tools)
52
52
  [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
53
- [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
53
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-darkgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
54
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
55
+ [![PEP8](https://img.shields.io/badge/code%20style-pep8-orange.svg)](https://www.python.org/dev/peps/pep-0008/)
56
+ ![PyPI - Implementation](https://img.shields.io/pypi/implementation/dscience-tools)
57
+ [![Supported Python Versions](https://img.shields.io/badge/Python-3.9%20|%203.10%20|%203.11%20|%203.12-3776AB?style=flat&logo=python&logoColor=white)](https://www.python.org)
58
+ [![Supports GPU and CPU](https://img.shields.io/badge/GPU%2FCPU-Supported-darkgreen?style=flat&logo=nvidia&logoColor=white)](https://pytorch.org/)
54
59
 
55
60
 
56
61
  # Table of Contents
@@ -63,6 +68,7 @@ Dynamic: license-file
63
68
  * [Available Tools](#available-tools)
64
69
  * [Authors](#authors)
65
70
  * [Contributing](#contributing)
71
+ * [TODO](#todo)
66
72
  * [References](#references)
67
73
  * [License](#license)
68
74
 
@@ -432,6 +438,22 @@ A high-performance toolkit for calculating distances and similarities.
432
438
 
433
439
  See [CONTRIBUTING](/CONTRIBUTING.md)
434
440
 
441
+ # TODO
442
+ 1. Add some important kind of plots:
443
+ 📌 KS Plot – Measures how well your model separates positive and negative classes.
444
+ 📌 SHAP Plot – Explains feature impact and model interpretability.
445
+ 📌 QQ Plot – Checks if your data follows a theoretical distribution.
446
+ 📌 Cumulative Explained Variance – Helps decide the optimal number of PCA components.
447
+ 📌 Gini vs Entropy – Key metrics for understanding decision tree impurity.
448
+ 📌 Bias–Variance Tradeoff – Shows the balance between underfitting and overfitting.
449
+ 📌 ROC Curve – Evaluates classification performance across thresholds.
450
+ 📌 Precision–Recall Curve – Crucial for imbalanced datasets.
451
+ 📌 Elbow Curve – Helps choose the right number of clusters in K-Means.
452
+
453
+ 2. Implement Fast Fourier Transform (FFT) algorithm and Shannon’s interpolation formula
454
+
455
+ 3. Add some fast distance metrics (expand of existed).
456
+
435
457
 
436
458
  # References
437
459
 
@@ -449,4 +471,3 @@ This project uses **dual licensing**:
449
471
  - 💼 **Commercial License Available**: Contact us for business use [License](https://github.com/s-kav/ds_tools/blob/main/CLA.md)
450
472
 
451
473
  [📋 Full License Details](https://github.com/s-kav/ds_tools/blob/main/LICENSE) | [💰 Get Commercial License](mailto:kavserg@gmail.com)
452
-
@@ -25,6 +25,8 @@ tests/test_describe_cat.py
25
25
  tests/test_describe_num.py
26
26
  tests/test_df_stats.py
27
27
  tests/test_distance.py
28
+ tests/test_distance_additional.py
29
+ tests/test_distance_numba_cupy.py
28
30
  tests/test_entropy.py
29
31
  tests/test_evaluate_cls.py
30
32
  tests/test_function_list.py
@@ -6,7 +6,7 @@ black==25.1.0
6
6
  ruff==0.12.12
7
7
 
8
8
  [performance]
9
- cupy-cuda12x
9
+ cupy-cuda13x
10
10
  numba
11
11
 
12
12
  [test]
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dscience_tools"
3
- version = "2.0.1"
3
+ version = "2.3.0"
4
4
  description = "DSTools: Data Science Tools Library"
5
5
  readme = "README.md"
6
6
 
@@ -41,7 +41,7 @@ test = [
41
41
  ]
42
42
 
43
43
  performance = [
44
- "cupy-cuda12x",
44
+ "cupy-cuda13x",
45
45
  "numba",
46
46
  ]
47
47
 
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = dscience_tools
3
- version = 2.0.1
3
+ version = 2.3.0
4
4
  author = Sergii Kavun
5
5
  author_email = kavserg@gmail.com
6
6
  description = A library of helpful functions for various data science research stages.
@@ -10,10 +10,10 @@ url = https://github.com/s-kav/ds_tools
10
10
  license = PolyForm-Noncommercial-1.0.0 OR Commercial
11
11
  classifiers =
12
12
  Programming Language :: Python :: 3
13
- Programming Language :: Python :: 3.8
14
13
  Programming Language :: Python :: 3.9
15
14
  Programming Language :: Python :: 3.10
16
15
  Programming Language :: Python :: 3.11
16
+ Programming Language :: Python :: 3.12
17
17
  License :: OSI Approved :: PolyForm-Noncommercial-1.0.0 OR Commercial
18
18
  Operating System :: OS Independent
19
19
  Intended Audience :: Developers
@@ -25,7 +25,7 @@ classifiers =
25
25
  package_dir =
26
26
  ds_tools = src
27
27
  packages = ds_tools
28
- python_requires = >=3.8
28
+ python_requires = >=3.9
29
29
  install_requires =
30
30
  numpy
31
31
  pandas
@@ -39,9 +39,13 @@ install_requires =
39
39
  pydantic>=2.0
40
40
 
41
41
  [options.extras_require]
42
- performance =
43
- cupy-cuda13x
42
+ performance-numba =
44
43
  numba
44
+ performance-cupy =
45
+ cupy-cuda13x
46
+ performance =
47
+ %(performance-numba)s
48
+ %(performance-cupy)s
45
49
  test =
46
50
  pytest
47
51
  pytest-cov
@@ -13,7 +13,7 @@
13
13
  DSTools: A library of helpful functions for various data science research stages.
14
14
  """
15
15
 
16
- __version__ = "2.0.0"
16
+ __version__ = "2.3.0"
17
17
 
18
18
  from ds_tool import DSTools
19
19
  from models import (