cs-ml-tools 2025.12.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cs_ml_tools-2025.12.4/.github/workflows/docs.yml +52 -0
- cs_ml_tools-2025.12.4/.github/workflows/publish-PyPI.yml +65 -0
- cs_ml_tools-2025.12.4/.github/workflows/publish-testPyPI.yml +65 -0
- cs_ml_tools-2025.12.4/.gitignore +7 -0
- cs_ml_tools-2025.12.4/LICENSE +21 -0
- cs_ml_tools-2025.12.4/PKG-INFO +20 -0
- cs_ml_tools-2025.12.4/README.md +1 -0
- cs_ml_tools-2025.12.4/cs_ml_tools.egg-info/PKG-INFO +20 -0
- cs_ml_tools-2025.12.4/cs_ml_tools.egg-info/SOURCES.txt +27 -0
- cs_ml_tools-2025.12.4/cs_ml_tools.egg-info/dependency_links.txt +1 -0
- cs_ml_tools-2025.12.4/cs_ml_tools.egg-info/requires.txt +9 -0
- cs_ml_tools-2025.12.4/cs_ml_tools.egg-info/top_level.txt +1 -0
- cs_ml_tools-2025.12.4/ml_tools/__init__.py +39 -0
- cs_ml_tools-2025.12.4/ml_tools/array_utils.py +237 -0
- cs_ml_tools-2025.12.4/ml_tools/distance_calculation.py +51 -0
- cs_ml_tools-2025.12.4/ml_tools/dl_model_arch_utils.py +84 -0
- cs_ml_tools-2025.12.4/ml_tools/plotting.py +423 -0
- cs_ml_tools-2025.12.4/ml_tools/quarto/__init__.py +1 -0
- cs_ml_tools-2025.12.4/ml_tools/quarto/utils.py +71 -0
- cs_ml_tools-2025.12.4/ml_tools/scripts/nb_utils.py +105 -0
- cs_ml_tools-2025.12.4/ml_tools/streamlit/__init__.py +1 -0
- cs_ml_tools-2025.12.4/ml_tools/streamlit/utils.py +48 -0
- cs_ml_tools-2025.12.4/ml_tools/torch/TabularDataLoader.py +90 -0
- cs_ml_tools-2025.12.4/ml_tools/torch/__init__.py +4 -0
- cs_ml_tools-2025.12.4/ml_tools/torch/utils.py +47 -0
- cs_ml_tools-2025.12.4/ml_tools/utils.py +465 -0
- cs_ml_tools-2025.12.4/pyproject.toml +18 -0
- cs_ml_tools-2025.12.4/requirements.txt +9 -0
- cs_ml_tools-2025.12.4/setup.cfg +4 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
name: Documentation
|
|
2
|
+
run-name: Build and Deploy Documentation for commit ${{ github.sha }}
|
|
3
|
+
|
|
4
|
+
# build the documentation whenever there are new commits on main
|
|
5
|
+
on:
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
push:
|
|
8
|
+
branches:
|
|
9
|
+
- main
|
|
10
|
+
# Alternative: only build for tags.
|
|
11
|
+
# tags:
|
|
12
|
+
# - '*'
|
|
13
|
+
|
|
14
|
+
# security: restrict permissions for CI jobs.
|
|
15
|
+
permissions:
|
|
16
|
+
contents: read
|
|
17
|
+
|
|
18
|
+
jobs:
|
|
19
|
+
# Build the documentation and upload the static HTML files as an artifact.
|
|
20
|
+
build:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
- uses: actions/setup-python@v5
|
|
25
|
+
with:
|
|
26
|
+
python-version: "3.12"
|
|
27
|
+
|
|
28
|
+
# ADJUST THIS: install all dependencies (including pdoc)
|
|
29
|
+
- run: pip install -e .
|
|
30
|
+
- run: pip install pdoc
|
|
31
|
+
# ADJUST THIS: build your documentation into docs/.
|
|
32
|
+
# We use a custom build script for pdoc itself, ideally you just run `pdoc -o docs/ ...` here.
|
|
33
|
+
- run: mkdir docs; pdoc -o docs -d numpy ml_tools
|
|
34
|
+
|
|
35
|
+
- uses: actions/upload-pages-artifact@v3
|
|
36
|
+
with:
|
|
37
|
+
path: docs/
|
|
38
|
+
|
|
39
|
+
# Deploy the artifact to GitHub pages.
|
|
40
|
+
# This is a separate job so that only actions/deploy-pages has the necessary permissions.
|
|
41
|
+
deploy:
|
|
42
|
+
needs: build
|
|
43
|
+
runs-on: ubuntu-latest
|
|
44
|
+
permissions:
|
|
45
|
+
pages: write
|
|
46
|
+
id-token: write
|
|
47
|
+
environment:
|
|
48
|
+
name: github-pages
|
|
49
|
+
url: ${{ steps.deployment.outputs.page_url }}
|
|
50
|
+
steps:
|
|
51
|
+
- id: deployment
|
|
52
|
+
uses: actions/deploy-pages@v4
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
run-name: Publish Python Package to PyPI for release ${{ github.event.release.tag_name }}
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
release:
|
|
6
|
+
types: [published]
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
tag_name:
|
|
10
|
+
description: 'Git tag to checkout and publish'
|
|
11
|
+
required: false
|
|
12
|
+
type: string
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
build:
|
|
16
|
+
name: Build distribution 📦
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- name: Checkout code
|
|
21
|
+
uses: actions/checkout@v4
|
|
22
|
+
with:
|
|
23
|
+
ref: ${{ inputs.tag_name || github.ref }}
|
|
24
|
+
|
|
25
|
+
- name: Set up Python
|
|
26
|
+
uses: actions/setup-python@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: "3.x"
|
|
29
|
+
|
|
30
|
+
- name: Install pypa/build
|
|
31
|
+
run: >-
|
|
32
|
+
python3 -m
|
|
33
|
+
pip install
|
|
34
|
+
build
|
|
35
|
+
--user
|
|
36
|
+
- name: Build a binary wheel and a source tarball
|
|
37
|
+
run: python3 -m build
|
|
38
|
+
- name: Store the distribution packages
|
|
39
|
+
uses: actions/upload-artifact@v4
|
|
40
|
+
with:
|
|
41
|
+
name: python-package-distributions
|
|
42
|
+
path: dist/
|
|
43
|
+
publish-to-pypi:
|
|
44
|
+
name: Publish to PyPI
|
|
45
|
+
needs:
|
|
46
|
+
- build
|
|
47
|
+
runs-on: ubuntu-latest
|
|
48
|
+
|
|
49
|
+
environment:
|
|
50
|
+
name: pypi
|
|
51
|
+
url: https://pypi.org/p/cs-ml-tools
|
|
52
|
+
|
|
53
|
+
permissions:
|
|
54
|
+
id-token: write # IMPORTANT: mandatory for trusted publishing
|
|
55
|
+
|
|
56
|
+
steps:
|
|
57
|
+
- name: Download all the dists
|
|
58
|
+
uses: actions/download-artifact@v4
|
|
59
|
+
with:
|
|
60
|
+
name: python-package-distributions
|
|
61
|
+
path: dist/
|
|
62
|
+
- name: Publish distribution 📦 to TestPyPI
|
|
63
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
64
|
+
with:
|
|
65
|
+
repository-url: https://test.pypi.org/legacy/
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
name: Publish to TestPyPI
|
|
2
|
+
run-name: Publish Python Package to TestPyPI for release ${{ github.event.release.tag_name }}
|
|
3
|
+
|
|
4
|
+
on:
|
|
5
|
+
release:
|
|
6
|
+
types: [published]
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
tag_name:
|
|
10
|
+
description: 'Git tag to checkout and publish'
|
|
11
|
+
required: false
|
|
12
|
+
type: string
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
build:
|
|
16
|
+
name: Build distribution 📦
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- name: Checkout code
|
|
21
|
+
uses: actions/checkout@v4
|
|
22
|
+
with:
|
|
23
|
+
ref: ${{ inputs.tag_name || github.ref }}
|
|
24
|
+
|
|
25
|
+
- name: Set up Python
|
|
26
|
+
uses: actions/setup-python@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: "3.x"
|
|
29
|
+
|
|
30
|
+
- name: Install pypa/build
|
|
31
|
+
run: >-
|
|
32
|
+
python3 -m
|
|
33
|
+
pip install
|
|
34
|
+
build
|
|
35
|
+
--user
|
|
36
|
+
- name: Build a binary wheel and a source tarball
|
|
37
|
+
run: python3 -m build
|
|
38
|
+
- name: Store the distribution packages
|
|
39
|
+
uses: actions/upload-artifact@v4
|
|
40
|
+
with:
|
|
41
|
+
name: python-package-distributions
|
|
42
|
+
path: dist/
|
|
43
|
+
publish-to-testpypi:
|
|
44
|
+
name: Publish to TestPyPI
|
|
45
|
+
needs:
|
|
46
|
+
- build
|
|
47
|
+
runs-on: ubuntu-latest
|
|
48
|
+
|
|
49
|
+
environment:
|
|
50
|
+
name: testpypi
|
|
51
|
+
url: https://test.pypi.org/p/cs-ml-tools
|
|
52
|
+
|
|
53
|
+
permissions:
|
|
54
|
+
id-token: write # IMPORTANT: mandatory for trusted publishing
|
|
55
|
+
|
|
56
|
+
steps:
|
|
57
|
+
- name: Download all the dists
|
|
58
|
+
uses: actions/download-artifact@v4
|
|
59
|
+
with:
|
|
60
|
+
name: python-package-distributions
|
|
61
|
+
path: dist/
|
|
62
|
+
- name: Publish distribution 📦 to TestPyPI
|
|
63
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
64
|
+
with:
|
|
65
|
+
repository-url: https://test.pypi.org/legacy/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2020 claudio525
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cs-ml-tools
|
|
3
|
+
Version: 2025.12.4
|
|
4
|
+
Summary: A collection of tools for machine learning
|
|
5
|
+
Author: Claudio Schill
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: streamlit
|
|
10
|
+
Requires-Dist: numpy
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: tslearn
|
|
13
|
+
Requires-Dist: matplotlib
|
|
14
|
+
Requires-Dist: seaborn
|
|
15
|
+
Requires-Dist: pyyaml
|
|
16
|
+
Requires-Dist: torch
|
|
17
|
+
Requires-Dist: statsmodels
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
[Documentation](https://claudio525.github.io/ml_tools/ml_tools.html)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
[Documentation](https://claudio525.github.io/ml_tools/ml_tools.html)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cs-ml-tools
|
|
3
|
+
Version: 2025.12.4
|
|
4
|
+
Summary: A collection of tools for machine learning
|
|
5
|
+
Author: Claudio Schill
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: streamlit
|
|
10
|
+
Requires-Dist: numpy
|
|
11
|
+
Requires-Dist: pandas
|
|
12
|
+
Requires-Dist: tslearn
|
|
13
|
+
Requires-Dist: matplotlib
|
|
14
|
+
Requires-Dist: seaborn
|
|
15
|
+
Requires-Dist: pyyaml
|
|
16
|
+
Requires-Dist: torch
|
|
17
|
+
Requires-Dist: statsmodels
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
[Documentation](https://claudio525.github.io/ml_tools/ml_tools.html)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
.gitignore
|
|
2
|
+
LICENSE
|
|
3
|
+
README.md
|
|
4
|
+
pyproject.toml
|
|
5
|
+
requirements.txt
|
|
6
|
+
.github/workflows/docs.yml
|
|
7
|
+
.github/workflows/publish-PyPI.yml
|
|
8
|
+
.github/workflows/publish-testPyPI.yml
|
|
9
|
+
cs_ml_tools.egg-info/PKG-INFO
|
|
10
|
+
cs_ml_tools.egg-info/SOURCES.txt
|
|
11
|
+
cs_ml_tools.egg-info/dependency_links.txt
|
|
12
|
+
cs_ml_tools.egg-info/requires.txt
|
|
13
|
+
cs_ml_tools.egg-info/top_level.txt
|
|
14
|
+
ml_tools/__init__.py
|
|
15
|
+
ml_tools/array_utils.py
|
|
16
|
+
ml_tools/distance_calculation.py
|
|
17
|
+
ml_tools/dl_model_arch_utils.py
|
|
18
|
+
ml_tools/plotting.py
|
|
19
|
+
ml_tools/utils.py
|
|
20
|
+
ml_tools/quarto/__init__.py
|
|
21
|
+
ml_tools/quarto/utils.py
|
|
22
|
+
ml_tools/scripts/nb_utils.py
|
|
23
|
+
ml_tools/streamlit/__init__.py
|
|
24
|
+
ml_tools/streamlit/utils.py
|
|
25
|
+
ml_tools/torch/TabularDataLoader.py
|
|
26
|
+
ml_tools/torch/__init__.py
|
|
27
|
+
ml_tools/torch/utils.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ml_tools
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
r"""
|
|
2
|
+
Library for machine learning tools.
|
|
3
|
+
|
|
4
|
+
Modules:
|
|
5
|
+
- utils: General utility functions.
|
|
6
|
+
- array_utils: Utility functions for arrays.
|
|
7
|
+
- plotting: Plotting functions.
|
|
8
|
+
- distance_calculation: Functions for calculating distances.
|
|
9
|
+
- streamlit: Functions for working with streamlit.
|
|
10
|
+
- dl_model_arch_utils: Functions for working with deep learning model architectures.
|
|
11
|
+
- torch: Functions for working with PyTorch.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from . import utils
|
|
15
|
+
from . import array_utils
|
|
16
|
+
from . import plotting
|
|
17
|
+
from . import distance_calculation
|
|
18
|
+
from . import dl_model_arch_utils
|
|
19
|
+
from . import streamlit
|
|
20
|
+
from . import quarto
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
optional_imports = []
|
|
24
|
+
try:
|
|
25
|
+
from . import torch
|
|
26
|
+
optional_imports.append("torch")
|
|
27
|
+
except ImportError:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
"utils",
|
|
33
|
+
"array_utils",
|
|
34
|
+
"plotting",
|
|
35
|
+
"distance_calculation",
|
|
36
|
+
"streamlit",
|
|
37
|
+
"dl_model_arch_utils",
|
|
38
|
+
"quarto",
|
|
39
|
+
] + optional_imports
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
from typing import Sequence
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def find_nearest(array: np.ndarray[float], value: float) -> int:
|
|
8
|
+
"""
|
|
9
|
+
Returns the index of the entry that is
|
|
10
|
+
closest to the specified value.
|
|
11
|
+
|
|
12
|
+
This assumes that the array is sorted!
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
array: array of floats
|
|
17
|
+
The array to search
|
|
18
|
+
value: float
|
|
19
|
+
The value to search for
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
ix: int
|
|
24
|
+
The index of the nearest value
|
|
25
|
+
|
|
26
|
+
Examples
|
|
27
|
+
--------
|
|
28
|
+
>>> import numpy as np
|
|
29
|
+
>>> arr = np.array([1.0, 2.5, 3.8, 4.2, 5.9, 7.1])
|
|
30
|
+
>>> find_nearest(arr, 3.9)
|
|
31
|
+
2
|
|
32
|
+
>>> find_nearest(arr, 5.0)
|
|
33
|
+
3
|
|
34
|
+
>>> find_nearest(arr, 7.1)
|
|
35
|
+
5
|
|
36
|
+
"""
|
|
37
|
+
ix = (np.abs(array - value)).argmin()
|
|
38
|
+
return ix
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def find_nearest_larger(array: np.ndarray[float], value: float) -> int:
|
|
42
|
+
"""
|
|
43
|
+
Returns the index of the entry that is
|
|
44
|
+
closest to the specified value. If no exact match
|
|
45
|
+
is found the nearest larger value is returned.
|
|
46
|
+
|
|
47
|
+
This assumes that the array is sorted!
|
|
48
|
+
|
|
49
|
+
Parameters
|
|
50
|
+
----------
|
|
51
|
+
array: array of floats
|
|
52
|
+
The array to search
|
|
53
|
+
value: float
|
|
54
|
+
The value to search for
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
ix: int
|
|
59
|
+
The index of the nearest (larger) value
|
|
60
|
+
|
|
61
|
+
Raises
|
|
62
|
+
------
|
|
63
|
+
ValueError
|
|
64
|
+
If the value is larger than the largest value in the array
|
|
65
|
+
|
|
66
|
+
Examples
|
|
67
|
+
--------
|
|
68
|
+
>>> import numpy as np
|
|
69
|
+
>>> arr = np.array([1.0, 2.5, 3.8, 4.2, 5.9, 7.1])
|
|
70
|
+
>>> find_nearest_larger(arr, 4.0)
|
|
71
|
+
3
|
|
72
|
+
>>> find_nearest_larger(arr, 5.0)
|
|
73
|
+
4
|
|
74
|
+
>>> find_nearest_larger(arr, 7.1)
|
|
75
|
+
5
|
|
76
|
+
"""
|
|
77
|
+
ix = find_nearest(array, value)
|
|
78
|
+
|
|
79
|
+
if array[ix] == value or array[ix] > value:
|
|
80
|
+
return ix
|
|
81
|
+
else:
|
|
82
|
+
if ix == array.size - 1:
|
|
83
|
+
raise ValueError("Value is larger than the largest value in the array")
|
|
84
|
+
return ix + 1
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def find_nearest_smaller(array: np.ndarray[float], value: float) -> int:
|
|
88
|
+
"""
|
|
89
|
+
Returns the index of the entry that is
|
|
90
|
+
closest to the specified value. If no exact match
|
|
91
|
+
is found the nearest larger value is returned.
|
|
92
|
+
|
|
93
|
+
This assumes that the array is sorted!
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
array: array of floats
|
|
98
|
+
The array to search
|
|
99
|
+
value: float
|
|
100
|
+
The value to search for
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
ix: int
|
|
105
|
+
The index of the nearest (smaller) value
|
|
106
|
+
|
|
107
|
+
Raises
|
|
108
|
+
------
|
|
109
|
+
ValueError
|
|
110
|
+
If the value is smaller than the smallest value in the array
|
|
111
|
+
|
|
112
|
+
Examples
|
|
113
|
+
--------
|
|
114
|
+
>>> import numpy as np
|
|
115
|
+
>>> arr = np.array([1.0, 2.5, 3.8, 4.2, 5.9, 7.1])
|
|
116
|
+
>>> find_nearest_smaller(arr, 4.0)
|
|
117
|
+
2
|
|
118
|
+
>>> find_nearest_smaller(arr, 5.0)
|
|
119
|
+
3
|
|
120
|
+
>>> find_nearest_smaller(arr, 1.0)
|
|
121
|
+
0
|
|
122
|
+
"""
|
|
123
|
+
ix = find_nearest(array, value)
|
|
124
|
+
|
|
125
|
+
if array[ix] == value or array[ix] < value:
|
|
126
|
+
return ix
|
|
127
|
+
else:
|
|
128
|
+
if ix == 0:
|
|
129
|
+
raise ValueError("Value is smaller than the smallest value in the array")
|
|
130
|
+
return ix - 1
|
|
131
|
+
|
|
132
|
+
def find_nearest_smaller_vec(array: np.ndarray[float], values: np.ndarray[float]) -> np.ndarray[int]:
|
|
133
|
+
"""
|
|
134
|
+
Returns the indices of the entries that are
|
|
135
|
+
closest to the specified values. If no exact match
|
|
136
|
+
is found the nearest smaller value is returned.
|
|
137
|
+
|
|
138
|
+
This assumes that the array is sorted!
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
array: array of floats
|
|
143
|
+
The array to search
|
|
144
|
+
values: array of floats
|
|
145
|
+
The values to search for
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
ix: np.ndarray[int]
|
|
150
|
+
The indices of the nearest (smaller) values
|
|
151
|
+
|
|
152
|
+
Raises
|
|
153
|
+
------
|
|
154
|
+
ValueError
|
|
155
|
+
If any value is smaller than the smallest value in the array
|
|
156
|
+
|
|
157
|
+
Examples
|
|
158
|
+
--------
|
|
159
|
+
>>> import numpy as np
|
|
160
|
+
>>> arr = np.array([1.0, 2.5, 3.8, 4.2, 5.9, 7.1])
|
|
161
|
+
>>> values = np.array([4.0, 5.0, 1.0])
|
|
162
|
+
>>> find_nearest_smaller_vec(arr, values)
|
|
163
|
+
array([2, 3, 0])
|
|
164
|
+
"""
|
|
165
|
+
if len(array.shape) != 1:
|
|
166
|
+
raise ValueError("Input array must be 1D")
|
|
167
|
+
if len(values.shape) != 1:
|
|
168
|
+
raise ValueError("Input values must be 1D")
|
|
169
|
+
|
|
170
|
+
tmp = np.tile((np.arange(array.size)), (values.shape[0], 1))
|
|
171
|
+
return np.argmax(np.where(array <= values[:, None], tmp, -1), axis=1)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def pandas_isin(array_1: np.ndarray, array_2: np.ndarray) -> np.ndarray:
|
|
175
|
+
"""
|
|
176
|
+
This is the same as a np.isin,
|
|
177
|
+
however it is significantly faster for large arrays
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
array_1: array of any type
|
|
182
|
+
array_2: array of any type
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
np.ndarray
|
|
187
|
+
The boolean array
|
|
188
|
+
|
|
189
|
+
References
|
|
190
|
+
----------
|
|
191
|
+
[1] Stackoverflow: https://stackoverflow.com/questions/15939748/check-if-each-element-in-a-numpy-array-is-in-another-array
|
|
192
|
+
|
|
193
|
+
Examples
|
|
194
|
+
--------
|
|
195
|
+
>>> import numpy as np
|
|
196
|
+
>>> arr = np.array([1, 2, 3, 4, 5])
|
|
197
|
+
>>> arr2 = np.array([1, 3, 5])
|
|
198
|
+
>>> pandas_isin(arr, arr2)
|
|
199
|
+
array([ True, False, True, False, True])
|
|
200
|
+
"""
|
|
201
|
+
return pd.Index(pd.unique(array_2)).get_indexer(array_1) >= 0
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def numpy_str_join(sep: str, *arrays: str | Sequence[str]) -> np.ndarray[str]:
|
|
205
|
+
"""
|
|
206
|
+
Joins multiple string arrays together using the specified separator.
|
|
207
|
+
Also support joining of string values, or a combination of both.
|
|
208
|
+
|
|
209
|
+
Parameters
|
|
210
|
+
----------
|
|
211
|
+
sep: str
|
|
212
|
+
The separator to use
|
|
213
|
+
arrays: string value or string arrays
|
|
214
|
+
The arrays (or string values) to join together
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
np.ndarray
|
|
219
|
+
The joined string array
|
|
220
|
+
|
|
221
|
+
Examples
|
|
222
|
+
--------
|
|
223
|
+
Example 1: Joining two arrays
|
|
224
|
+
>>> numpy_str_join("_", ["a", "b"], ["c", "d"])
|
|
225
|
+
array(['a_c', 'b_d'], dtype='<U3')
|
|
226
|
+
|
|
227
|
+
Example 2: Combination of string value and arrays
|
|
228
|
+
>>> arr1 = np.array(["a", "b"])
|
|
229
|
+
>>> numpy_str_join("_", arr1, "c", ["d", "e"])
|
|
230
|
+
array(['a_c_d', 'b_c_e'], dtype='<U5')
|
|
231
|
+
"""
|
|
232
|
+
result = arrays[0]
|
|
233
|
+
for cur_array in arrays[1:]:
|
|
234
|
+
result = np.char.add(result, sep)
|
|
235
|
+
result = np.char.add(result, cur_array)
|
|
236
|
+
|
|
237
|
+
return result
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from typing import Dict
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def compute_dtw_dist_matrix(
|
|
8
|
+
series_df: pd.DataFrame, dtw_params: Dict = None
|
|
9
|
+
) -> pd.DataFrame:
|
|
10
|
+
"""
|
|
11
|
+
Computes the DTW distance matrix for the given values
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
series_df: array of floats
|
|
16
|
+
The dataframe of series
|
|
17
|
+
for which to compute
|
|
18
|
+
the distance matrix
|
|
19
|
+
|
|
20
|
+
Expected shape: [N, K] where
|
|
21
|
+
N = Number of series
|
|
22
|
+
K = Number of steps
|
|
23
|
+
dtw_params: Dictionary
|
|
24
|
+
Parameters that are passed on to
|
|
25
|
+
tslearn.metrics.dtw
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
pd.DataFrame:
|
|
30
|
+
Distance matrix
|
|
31
|
+
Shape: [N, N]
|
|
32
|
+
"""
|
|
33
|
+
from tslearn import metrics
|
|
34
|
+
|
|
35
|
+
dtw_params = dict() if dtw_params is None else dtw_params
|
|
36
|
+
|
|
37
|
+
N = series_df.shape[0]
|
|
38
|
+
dist_matrix = np.full((N, N), fill_value=np.nan)
|
|
39
|
+
for i in range(N):
|
|
40
|
+
for k in range(i, N):
|
|
41
|
+
dist_matrix[k, i] = dist_matrix[i, k] = metrics.dtw(
|
|
42
|
+
series_df.iloc[i], series_df.iloc[k], **dtw_params
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Sanity checks
|
|
46
|
+
assert np.allclose(dist_matrix, dist_matrix.T)
|
|
47
|
+
assert np.all(~np.isnan(dist_matrix))
|
|
48
|
+
|
|
49
|
+
return pd.DataFrame(
|
|
50
|
+
data=dist_matrix, index=series_df.index, columns=series_df.index
|
|
51
|
+
)
|