dscience-tools 2.0.1__tar.gz → 2.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dscience_tools-2.0.1/dscience_tools.egg-info → dscience_tools-2.3.0}/PKG-INFO +26 -5
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/README.md +24 -3
- {dscience_tools-2.0.1 → dscience_tools-2.3.0/dscience_tools.egg-info}/PKG-INFO +26 -5
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/dscience_tools.egg-info/SOURCES.txt +2 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/dscience_tools.egg-info/requires.txt +1 -1
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/pyproject.toml +2 -2
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/setup.cfg +9 -5
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/src/__init__.py +1 -1
- dscience_tools-2.3.0/src/distance.py +1018 -0
- dscience_tools-2.3.0/src/metrics.py +1499 -0
- dscience_tools-2.3.0/tests/test_distance.py +949 -0
- dscience_tools-2.3.0/tests/test_distance_additional.py +118 -0
- dscience_tools-2.3.0/tests/test_distance_numba_cupy.py +181 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_metrics.py +258 -13
- dscience_tools-2.0.1/src/distance.py +0 -500
- dscience_tools-2.0.1/src/metrics.py +0 -649
- dscience_tools-2.0.1/tests/test_distance.py +0 -322
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/LICENSE +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/LICENSE-NC.txt +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/MANIFEST.in +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/dscience_tools.egg-info/dependency_links.txt +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/dscience_tools.egg-info/top_level.txt +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/src/ds_tool.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/src/models.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_add_missing.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_alphanum.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_category_stats.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_chatterjee.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_check_ninf.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_compute_metrics.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_corr_matrix.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_describe_cat.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_describe_num.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_df_stats.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_entropy.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_evaluate_cls.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_function_list.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_generate_dist.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_generate_from_metrics.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_grubbs.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_kl_divergence.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_labeling.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_min_max.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_models.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_normality.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_outliers.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_plot_cm.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_sparse_calc.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_stationarity.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_trials_res_df.py +0 -0
- {dscience_tools-2.0.1 → dscience_tools-2.3.0}/tests/test_zip_io.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dscience_tools
|
|
3
|
-
Version: 2.0
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: DSTools: Data Science Tools Library
|
|
5
5
|
Home-page: https://github.com/s-kav/ds_tools
|
|
6
6
|
Author: Sergii Kavun
|
|
@@ -35,7 +35,7 @@ Requires-Dist: pytest; extra == "test"
|
|
|
35
35
|
Requires-Dist: pytest-cov; extra == "test"
|
|
36
36
|
Requires-Dist: pytest-mock; extra == "test"
|
|
37
37
|
Provides-Extra: performance
|
|
38
|
-
Requires-Dist: cupy-
|
|
38
|
+
Requires-Dist: cupy-cuda13x; extra == "performance"
|
|
39
39
|
Requires-Dist: numba; extra == "performance"
|
|
40
40
|
Provides-Extra: lint
|
|
41
41
|
Requires-Dist: black==25.1.0; extra == "lint"
|
|
@@ -44,13 +44,18 @@ Dynamic: license-file
|
|
|
44
44
|
|
|
45
45
|
# DSTools: Data Science Research Toolkit
|
|
46
46
|
|
|
47
|
-
[](https://github.com/s-kav/ds_tools/actions)
|
|
48
48
|
[](https://pypi.org/project/dscience-tools/)
|
|
49
49
|
[](https://codecov.io/gh/s-kav/ds_tools)
|
|
50
50
|
[](https://polyformproject.org/licenses/noncommercial/1.0.0/)
|
|
51
51
|
[](https://pepy.tech/projects/dscience-tools)
|
|
52
52
|
[](https://github.com/psf/black)
|
|
53
|
-
[](https://github.com/pre-commit/pre-commit)
|
|
54
|
+
[](https://github.com/astral-sh/ruff)
|
|
55
|
+
[](https://www.python.org/dev/peps/pep-0008/)
|
|
56
|
+

|
|
57
|
+
[](https://www.python.org)
|
|
58
|
+
[](https://pytorch.org/)
|
|
54
59
|
|
|
55
60
|
|
|
56
61
|
# Table of Contents
|
|
@@ -63,6 +68,7 @@ Dynamic: license-file
|
|
|
63
68
|
* [Available Tools](#available-tools)
|
|
64
69
|
* [Authors](#authors)
|
|
65
70
|
* [Contributing](#contributing)
|
|
71
|
+
* [TODO](#todo)
|
|
66
72
|
* [References](#references)
|
|
67
73
|
* [License](#license)
|
|
68
74
|
|
|
@@ -432,6 +438,22 @@ A high-performance toolkit for calculating distances and similarities.
|
|
|
432
438
|
|
|
433
439
|
See [CONTRIBUTING](/CONTRIBUTING.md)
|
|
434
440
|
|
|
441
|
+
# TODO
|
|
442
|
+
1. Add some important kind of plots:
|
|
443
|
+
📌 KS Plot – Measures how well your model separates positive and negative classes.
|
|
444
|
+
📌 SHAP Plot – Explains feature impact and model interpretability.
|
|
445
|
+
📌 QQ Plot – Checks if your data follows a theoretical distribution.
|
|
446
|
+
📌 Cumulative Explained Variance – Helps decide the optimal number of PCA components.
|
|
447
|
+
📌 Gini vs Entropy – Key metrics for understanding decision tree impurity.
|
|
448
|
+
📌 Bias–Variance Tradeoff – Shows the balance between underfitting and overfitting.
|
|
449
|
+
📌 ROC Curve – Evaluates classification performance across thresholds.
|
|
450
|
+
📌 Precision–Recall Curve – Crucial for imbalanced datasets.
|
|
451
|
+
📌 Elbow Curve – Helps choose the right number of clusters in K-Means.
|
|
452
|
+
|
|
453
|
+
2. Implement Fast Fourier Transform (FFT) algorithm and Shannon’s interpolation formula
|
|
454
|
+
|
|
455
|
+
3. Add some fast distance metrics (expand of existed).
|
|
456
|
+
|
|
435
457
|
|
|
436
458
|
# References
|
|
437
459
|
|
|
@@ -449,4 +471,3 @@ This project uses **dual licensing**:
|
|
|
449
471
|
- 💼 **Commercial License Available**: Contact us for business use [License](https://github.com/s-kav/ds_tools/blob/main/CLA.md)
|
|
450
472
|
|
|
451
473
|
[📋 Full License Details](https://github.com/s-kav/ds_tools/blob/main/LICENSE) | [💰 Get Commercial License](mailto:kavserg@gmail.com)
|
|
452
|
-
|
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
# DSTools: Data Science Research Toolkit
|
|
2
2
|
|
|
3
|
-
[](https://github.com/s-kav/ds_tools/actions)
|
|
4
4
|
[](https://pypi.org/project/dscience-tools/)
|
|
5
5
|
[](https://codecov.io/gh/s-kav/ds_tools)
|
|
6
6
|
[](https://polyformproject.org/licenses/noncommercial/1.0.0/)
|
|
7
7
|
[](https://pepy.tech/projects/dscience-tools)
|
|
8
8
|
[](https://github.com/psf/black)
|
|
9
|
-
[](https://github.com/pre-commit/pre-commit)
|
|
10
|
+
[](https://github.com/astral-sh/ruff)
|
|
11
|
+
[](https://www.python.org/dev/peps/pep-0008/)
|
|
12
|
+

|
|
13
|
+
[](https://www.python.org)
|
|
14
|
+
[](https://pytorch.org/)
|
|
10
15
|
|
|
11
16
|
|
|
12
17
|
# Table of Contents
|
|
@@ -19,6 +24,7 @@
|
|
|
19
24
|
* [Available Tools](#available-tools)
|
|
20
25
|
* [Authors](#authors)
|
|
21
26
|
* [Contributing](#contributing)
|
|
27
|
+
* [TODO](#todo)
|
|
22
28
|
* [References](#references)
|
|
23
29
|
* [License](#license)
|
|
24
30
|
|
|
@@ -388,6 +394,22 @@ A high-performance toolkit for calculating distances and similarities.
|
|
|
388
394
|
|
|
389
395
|
See [CONTRIBUTING](/CONTRIBUTING.md)
|
|
390
396
|
|
|
397
|
+
# TODO
|
|
398
|
+
1. Add some important kind of plots:
|
|
399
|
+
📌 KS Plot – Measures how well your model separates positive and negative classes.
|
|
400
|
+
📌 SHAP Plot – Explains feature impact and model interpretability.
|
|
401
|
+
📌 QQ Plot – Checks if your data follows a theoretical distribution.
|
|
402
|
+
📌 Cumulative Explained Variance – Helps decide the optimal number of PCA components.
|
|
403
|
+
📌 Gini vs Entropy – Key metrics for understanding decision tree impurity.
|
|
404
|
+
📌 Bias–Variance Tradeoff – Shows the balance between underfitting and overfitting.
|
|
405
|
+
📌 ROC Curve – Evaluates classification performance across thresholds.
|
|
406
|
+
📌 Precision–Recall Curve – Crucial for imbalanced datasets.
|
|
407
|
+
📌 Elbow Curve – Helps choose the right number of clusters in K-Means.
|
|
408
|
+
|
|
409
|
+
2. Implement Fast Fourier Transform (FFT) algorithm and Shannon’s interpolation formula
|
|
410
|
+
|
|
411
|
+
3. Add some fast distance metrics (expand of existed).
|
|
412
|
+
|
|
391
413
|
|
|
392
414
|
# References
|
|
393
415
|
|
|
@@ -405,4 +427,3 @@ This project uses **dual licensing**:
|
|
|
405
427
|
- 💼 **Commercial License Available**: Contact us for business use [License](https://github.com/s-kav/ds_tools/blob/main/CLA.md)
|
|
406
428
|
|
|
407
429
|
[📋 Full License Details](https://github.com/s-kav/ds_tools/blob/main/LICENSE) | [💰 Get Commercial License](mailto:kavserg@gmail.com)
|
|
408
|
-
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dscience_tools
|
|
3
|
-
Version: 2.0
|
|
3
|
+
Version: 2.3.0
|
|
4
4
|
Summary: DSTools: Data Science Tools Library
|
|
5
5
|
Home-page: https://github.com/s-kav/ds_tools
|
|
6
6
|
Author: Sergii Kavun
|
|
@@ -35,7 +35,7 @@ Requires-Dist: pytest; extra == "test"
|
|
|
35
35
|
Requires-Dist: pytest-cov; extra == "test"
|
|
36
36
|
Requires-Dist: pytest-mock; extra == "test"
|
|
37
37
|
Provides-Extra: performance
|
|
38
|
-
Requires-Dist: cupy-
|
|
38
|
+
Requires-Dist: cupy-cuda13x; extra == "performance"
|
|
39
39
|
Requires-Dist: numba; extra == "performance"
|
|
40
40
|
Provides-Extra: lint
|
|
41
41
|
Requires-Dist: black==25.1.0; extra == "lint"
|
|
@@ -44,13 +44,18 @@ Dynamic: license-file
|
|
|
44
44
|
|
|
45
45
|
# DSTools: Data Science Research Toolkit
|
|
46
46
|
|
|
47
|
-
[](https://github.com/s-kav/ds_tools/actions)
|
|
48
48
|
[](https://pypi.org/project/dscience-tools/)
|
|
49
49
|
[](https://codecov.io/gh/s-kav/ds_tools)
|
|
50
50
|
[](https://polyformproject.org/licenses/noncommercial/1.0.0/)
|
|
51
51
|
[](https://pepy.tech/projects/dscience-tools)
|
|
52
52
|
[](https://github.com/psf/black)
|
|
53
|
-
[](https://github.com/pre-commit/pre-commit)
|
|
54
|
+
[](https://github.com/astral-sh/ruff)
|
|
55
|
+
[](https://www.python.org/dev/peps/pep-0008/)
|
|
56
|
+

|
|
57
|
+
[](https://www.python.org)
|
|
58
|
+
[](https://pytorch.org/)
|
|
54
59
|
|
|
55
60
|
|
|
56
61
|
# Table of Contents
|
|
@@ -63,6 +68,7 @@ Dynamic: license-file
|
|
|
63
68
|
* [Available Tools](#available-tools)
|
|
64
69
|
* [Authors](#authors)
|
|
65
70
|
* [Contributing](#contributing)
|
|
71
|
+
* [TODO](#todo)
|
|
66
72
|
* [References](#references)
|
|
67
73
|
* [License](#license)
|
|
68
74
|
|
|
@@ -432,6 +438,22 @@ A high-performance toolkit for calculating distances and similarities.
|
|
|
432
438
|
|
|
433
439
|
See [CONTRIBUTING](/CONTRIBUTING.md)
|
|
434
440
|
|
|
441
|
+
# TODO
|
|
442
|
+
1. Add some important kind of plots:
|
|
443
|
+
📌 KS Plot – Measures how well your model separates positive and negative classes.
|
|
444
|
+
📌 SHAP Plot – Explains feature impact and model interpretability.
|
|
445
|
+
📌 QQ Plot – Checks if your data follows a theoretical distribution.
|
|
446
|
+
📌 Cumulative Explained Variance – Helps decide the optimal number of PCA components.
|
|
447
|
+
📌 Gini vs Entropy – Key metrics for understanding decision tree impurity.
|
|
448
|
+
📌 Bias–Variance Tradeoff – Shows the balance between underfitting and overfitting.
|
|
449
|
+
📌 ROC Curve – Evaluates classification performance across thresholds.
|
|
450
|
+
📌 Precision–Recall Curve – Crucial for imbalanced datasets.
|
|
451
|
+
📌 Elbow Curve – Helps choose the right number of clusters in K-Means.
|
|
452
|
+
|
|
453
|
+
2. Implement Fast Fourier Transform (FFT) algorithm and Shannon’s interpolation formula
|
|
454
|
+
|
|
455
|
+
3. Add some fast distance metrics (expand of existed).
|
|
456
|
+
|
|
435
457
|
|
|
436
458
|
# References
|
|
437
459
|
|
|
@@ -449,4 +471,3 @@ This project uses **dual licensing**:
|
|
|
449
471
|
- 💼 **Commercial License Available**: Contact us for business use [License](https://github.com/s-kav/ds_tools/blob/main/CLA.md)
|
|
450
472
|
|
|
451
473
|
[📋 Full License Details](https://github.com/s-kav/ds_tools/blob/main/LICENSE) | [💰 Get Commercial License](mailto:kavserg@gmail.com)
|
|
452
|
-
|
|
@@ -25,6 +25,8 @@ tests/test_describe_cat.py
|
|
|
25
25
|
tests/test_describe_num.py
|
|
26
26
|
tests/test_df_stats.py
|
|
27
27
|
tests/test_distance.py
|
|
28
|
+
tests/test_distance_additional.py
|
|
29
|
+
tests/test_distance_numba_cupy.py
|
|
28
30
|
tests/test_entropy.py
|
|
29
31
|
tests/test_evaluate_cls.py
|
|
30
32
|
tests/test_function_list.py
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "dscience_tools"
|
|
3
|
-
version = "2.0
|
|
3
|
+
version = "2.3.0"
|
|
4
4
|
description = "DSTools: Data Science Tools Library"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
|
|
@@ -41,7 +41,7 @@ test = [
|
|
|
41
41
|
]
|
|
42
42
|
|
|
43
43
|
performance = [
|
|
44
|
-
"cupy-
|
|
44
|
+
"cupy-cuda13x",
|
|
45
45
|
"numba",
|
|
46
46
|
]
|
|
47
47
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = dscience_tools
|
|
3
|
-
version = 2.0
|
|
3
|
+
version = 2.3.0
|
|
4
4
|
author = Sergii Kavun
|
|
5
5
|
author_email = kavserg@gmail.com
|
|
6
6
|
description = A library of helpful functions for various data science research stages.
|
|
@@ -10,10 +10,10 @@ url = https://github.com/s-kav/ds_tools
|
|
|
10
10
|
license = PolyForm-Noncommercial-1.0.0 OR Commercial
|
|
11
11
|
classifiers =
|
|
12
12
|
Programming Language :: Python :: 3
|
|
13
|
-
Programming Language :: Python :: 3.8
|
|
14
13
|
Programming Language :: Python :: 3.9
|
|
15
14
|
Programming Language :: Python :: 3.10
|
|
16
15
|
Programming Language :: Python :: 3.11
|
|
16
|
+
Programming Language :: Python :: 3.12
|
|
17
17
|
License :: OSI Approved :: PolyForm-Noncommercial-1.0.0 OR Commercial
|
|
18
18
|
Operating System :: OS Independent
|
|
19
19
|
Intended Audience :: Developers
|
|
@@ -25,7 +25,7 @@ classifiers =
|
|
|
25
25
|
package_dir =
|
|
26
26
|
ds_tools = src
|
|
27
27
|
packages = ds_tools
|
|
28
|
-
python_requires = >=3.
|
|
28
|
+
python_requires = >=3.9
|
|
29
29
|
install_requires =
|
|
30
30
|
numpy
|
|
31
31
|
pandas
|
|
@@ -39,9 +39,13 @@ install_requires =
|
|
|
39
39
|
pydantic>=2.0
|
|
40
40
|
|
|
41
41
|
[options.extras_require]
|
|
42
|
-
performance =
|
|
43
|
-
cupy-cuda13x
|
|
42
|
+
performance-numba =
|
|
44
43
|
numba
|
|
44
|
+
performance-cupy =
|
|
45
|
+
cupy-cuda13x
|
|
46
|
+
performance =
|
|
47
|
+
%(performance-numba)s
|
|
48
|
+
%(performance-cupy)s
|
|
45
49
|
test =
|
|
46
50
|
pytest
|
|
47
51
|
pytest-cov
|