edmkit 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edmkit/__init__.py +4 -0
- edmkit/ccm.py +76 -0
- edmkit/embedding.py +56 -0
- edmkit/generate/__init__.py +4 -0
- edmkit/generate/double_pendulum.py +52 -0
- edmkit/generate/lorenz.py +22 -0
- edmkit/generate/mackey_glass.py +17 -0
- edmkit/simplex_projection.py +51 -0
- edmkit/smap.py +72 -0
- edmkit/tensor.py +15 -0
- edmkit/util.py +190 -0
- edmkit-0.0.0.dist-info/METADATA +36 -0
- edmkit-0.0.0.dist-info/RECORD +15 -0
- edmkit-0.0.0.dist-info/WHEEL +4 -0
- edmkit-0.0.0.dist-info/licenses/LICENSE +21 -0
edmkit/__init__.py
ADDED
edmkit/ccm.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from edmkit.embedding import lagged_embed
|
|
4
|
+
from edmkit.tensor import Tensor, dtypes
|
|
5
|
+
from edmkit.util import pad, pairwise_distance, topk
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def calculate_rho(observations: np.ndarray, predictions: np.ndarray):
|
|
9
|
+
assert len(observations) == len(predictions), "observations and predictions must have the same length"
|
|
10
|
+
return np.corrcoef(observations.T, predictions.T)[0, 1]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def search_best_embedding(
|
|
14
|
+
x: np.ndarray, tau_list: list[int], e_list: list[int], Tp: int, max_L: int | None = None, rng: np.random.Generator | None = None
|
|
15
|
+
):
|
|
16
|
+
assert all(tau > 0 for tau in tau_list), f"tau must be positive, got tau_list={tau_list}"
|
|
17
|
+
assert all(e > 0 for e in e_list), f"e must be positive, got e_list={e_list}"
|
|
18
|
+
assert max_L is None or max_L <= len(x), f"max_L must be less than or equal to len(x), got max_L={max_L} and len(x)={len(x)}"
|
|
19
|
+
|
|
20
|
+
if rng is None:
|
|
21
|
+
rng = np.random.default_rng()
|
|
22
|
+
|
|
23
|
+
if max_L is not None:
|
|
24
|
+
x = x[rng.choice(len(x), min(len(x), max_L), replace=False)]
|
|
25
|
+
|
|
26
|
+
# lagged_embed(x, tau, e).shape[0] == len(x) - (e - 1) * tau
|
|
27
|
+
min_L = len(x) - (max(e_list) - 1) * max(tau_list)
|
|
28
|
+
assert min_L > 0, (
|
|
29
|
+
f"Not enough data points to embed, got len(x)(={len(x)}) - max(e)(={max(e_list)}) * max(tau)(={max(tau_list)}) = min_L(={min_L})"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
embeddings: list[np.ndarray] = []
|
|
33
|
+
for tau in tau_list:
|
|
34
|
+
for e in e_list:
|
|
35
|
+
# align the time indices of the embeddings. note that the time index of the first embedding is `1 + (e - 1) * tau`
|
|
36
|
+
embeddings.append(lagged_embed(x, tau, e)[-min_L:]) # (min_L, e)
|
|
37
|
+
|
|
38
|
+
X = pad(embeddings) # X.shape == (len(tau_list) * len(e_list), min_L, max(e_list))
|
|
39
|
+
D = pairwise_distance(Tensor(X, dtype=dtypes.float32)).numpy()
|
|
40
|
+
|
|
41
|
+
L = min_L
|
|
42
|
+
seq = np.arange(L)
|
|
43
|
+
lib_size = L // 2
|
|
44
|
+
|
|
45
|
+
rho = np.zeros((len(tau_list), len(e_list)))
|
|
46
|
+
|
|
47
|
+
for i, tau in enumerate(tau_list):
|
|
48
|
+
for j, e in enumerate(e_list):
|
|
49
|
+
batch = i * len(e_list) + j
|
|
50
|
+
|
|
51
|
+
samples_indecies = np.arange(lib_size, L)
|
|
52
|
+
|
|
53
|
+
observations = X[batch, samples_indecies, :e]
|
|
54
|
+
predictions = np.zeros((len(samples_indecies), e), dtype=x.dtype)
|
|
55
|
+
|
|
56
|
+
for k, t in enumerate(samples_indecies):
|
|
57
|
+
# [0, 1, 2, 3, 4 | 5, 6, 7, 8, 9, 10]; initialize mask, `|` separates lib and test
|
|
58
|
+
mask = np.ones(L, dtype=bool)
|
|
59
|
+
|
|
60
|
+
# [0, 1, 2, 3, 4 | 5, F, 7, 8, 9, 10], t = 6; exclude self
|
|
61
|
+
mask[t] = False
|
|
62
|
+
|
|
63
|
+
# [0, 1, 2, 3, 4 | 5, F, 7, 8, F, F ], Tp = 2; exclude last Tp points to prevent out-of-bound indexing on predictions
|
|
64
|
+
mask[-Tp:] = False
|
|
65
|
+
|
|
66
|
+
# [0, 1, 2, 3, 4 | F, F, F, F, F, F ], lib_size = 5; exclude test points
|
|
67
|
+
mask[lib_size:] = False
|
|
68
|
+
|
|
69
|
+
# find k(=e+1) nearest neighbors in phase space for simplex projection
|
|
70
|
+
indices_masked, _ = topk(D[batch, t, mask], e + 1, largest=False)
|
|
71
|
+
indices = seq[mask][indices_masked]
|
|
72
|
+
predictions[k] = X[batch, indices + Tp, :e].mean()
|
|
73
|
+
|
|
74
|
+
rho[i, j] = calculate_rho(observations, predictions)
|
|
75
|
+
|
|
76
|
+
return rho
|
edmkit/embedding.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def lagged_embed(x: np.ndarray, tau: int, e: int):
|
|
5
|
+
"""Lagged embedding of a time series `x`.
|
|
6
|
+
|
|
7
|
+
Parameters
|
|
8
|
+
----------
|
|
9
|
+
`x` : `np.ndarray` of shape `(N,)`
|
|
10
|
+
`tau` : `int`
|
|
11
|
+
`e` : `int`
|
|
12
|
+
|
|
13
|
+
Returns
|
|
14
|
+
-------
|
|
15
|
+
`np.ndarray` of shape `(N - (e - 1) * tau, e)`
|
|
16
|
+
|
|
17
|
+
Raises
|
|
18
|
+
------
|
|
19
|
+
AssertionError
|
|
20
|
+
- If `x` is not a 1D array.
|
|
21
|
+
- If `tau` or `e` is not positive.
|
|
22
|
+
- If `e * tau >= len(x)`.
|
|
23
|
+
|
|
24
|
+
Notes
|
|
25
|
+
-----
|
|
26
|
+
- While open to interpretation, it's generally more intuitive to consider the embedding as starting from the `(e - 1) * tau`th element of the original time series and ending at the `len(x) - 1`th element (the last value), rather than starting from the 0th element and ending at `len(x) - 1 - (e - 1) * tau`.
|
|
27
|
+
- This distinction reflects whether we think of "attaching past values to the present" or "attaching future values to the present". The information content of the result is the same either way.
|
|
28
|
+
- The use of `reversed` in the implementation emphasizes this perspective.
|
|
29
|
+
|
|
30
|
+
Examples
|
|
31
|
+
--------
|
|
32
|
+
```
|
|
33
|
+
import numpy as np
|
|
34
|
+
from edm.embedding import lagged_embed
|
|
35
|
+
|
|
36
|
+
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
|
|
37
|
+
tau = 2
|
|
38
|
+
e = 3
|
|
39
|
+
|
|
40
|
+
E = lagged_embed(x, tau, e)
|
|
41
|
+
print(E)
|
|
42
|
+
print(E.shape)
|
|
43
|
+
# [[4 2 0]
|
|
44
|
+
# [5 3 1]
|
|
45
|
+
# [6 4 2]
|
|
46
|
+
# [7 5 3]
|
|
47
|
+
# [8 6 4]
|
|
48
|
+
# [9 7 5]]
|
|
49
|
+
# (6, 3)
|
|
50
|
+
```
|
|
51
|
+
"""
|
|
52
|
+
assert len(x.shape) == 1, f"X must be a 1D array, got x.shape={x.shape}"
|
|
53
|
+
assert tau > 0 and e > 0, f"tau and e must be positive, got tau={tau}, e={e}"
|
|
54
|
+
assert (e - 1) * tau <= x.shape[0], f"e and tau must satisfy `(e - 1) * tau < len(X)`, got e={e}, tau={tau}"
|
|
55
|
+
|
|
56
|
+
return np.array([x[tau * (e - 1) :]] + [x[tau * i : -tau * ((e - 1) - i)] for i in reversed(range(e - 1))]).transpose()
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def double_pendulum(
|
|
5
|
+
m1: float,
|
|
6
|
+
m2: float,
|
|
7
|
+
L1: float,
|
|
8
|
+
L2: float,
|
|
9
|
+
g: float,
|
|
10
|
+
X0: np.ndarray, # (theta1, theta2, omega1, omega2)
|
|
11
|
+
dt: float,
|
|
12
|
+
t_max: int,
|
|
13
|
+
):
|
|
14
|
+
def f(x: np.ndarray):
|
|
15
|
+
theta1, theta2, omega1, omega2 = x
|
|
16
|
+
delta = theta1 - theta2
|
|
17
|
+
|
|
18
|
+
denom = 2 * m1 + m2 - m2 * np.cos(2 * delta)
|
|
19
|
+
|
|
20
|
+
dtheta1_dt = omega1
|
|
21
|
+
dtheta2_dt = omega2
|
|
22
|
+
|
|
23
|
+
domega1_dt = (
|
|
24
|
+
-g * (2 * m1 + m2) * np.sin(theta1)
|
|
25
|
+
- m2 * g * np.sin(theta1 - 2 * theta2)
|
|
26
|
+
- 2 * np.sin(delta) * m2 * (omega2**2 * L2 + omega1**2 * L1 * np.cos(delta))
|
|
27
|
+
) / (L1 * denom)
|
|
28
|
+
|
|
29
|
+
domega2_dt = (2 * np.sin(delta) * (omega1**2 * L1 * (m1 + m2) + g * (m1 + m2) * np.cos(theta1) + omega2**2 * L2 * m2 * np.cos(delta))) / (
|
|
30
|
+
L2 * denom
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
return np.array([dtheta1_dt, dtheta2_dt, domega1_dt, domega2_dt])
|
|
34
|
+
|
|
35
|
+
t = np.arange(0, t_max, dt)
|
|
36
|
+
X = np.zeros((len(t), 4))
|
|
37
|
+
X[0] = X0
|
|
38
|
+
|
|
39
|
+
for i in range(1, len(t)):
|
|
40
|
+
X[i] = X[i - 1] + dt * f(X[i - 1])
|
|
41
|
+
|
|
42
|
+
return t, X
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def to_xy(L1: float, L2: float, theta1: np.ndarray, theta2: np.ndarray):
|
|
46
|
+
x1 = L1 * np.sin(theta1)
|
|
47
|
+
y1 = -L1 * np.cos(theta1)
|
|
48
|
+
|
|
49
|
+
x2 = x1 + L2 * np.sin(theta2)
|
|
50
|
+
y2 = y1 - L2 * np.cos(theta2)
|
|
51
|
+
|
|
52
|
+
return x1, y1, x2, y2
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def lorenz(
|
|
5
|
+
sigma: float,
|
|
6
|
+
rho: float,
|
|
7
|
+
beta: float,
|
|
8
|
+
X0: np.ndarray,
|
|
9
|
+
dt: float,
|
|
10
|
+
t_max: int,
|
|
11
|
+
):
|
|
12
|
+
def f(x: np.ndarray):
|
|
13
|
+
return np.array([[-sigma, sigma, 0], [rho, -1, -x[0]], [0, x[0], -beta]]) @ x
|
|
14
|
+
|
|
15
|
+
t = np.arange(0, t_max, dt)
|
|
16
|
+
X = np.zeros((len(t), 3))
|
|
17
|
+
X[0] = X0
|
|
18
|
+
|
|
19
|
+
for i in range(1, len(t)):
|
|
20
|
+
X[i] = X[i - 1] + dt * f(X[i - 1])
|
|
21
|
+
|
|
22
|
+
return t, X
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def mackey_glass(tau: float, n: int, beta: float, gamma: float, x0: float, dt: float, t_max: int):
|
|
5
|
+
def f(x, x_tau):
|
|
6
|
+
return beta * x_tau / (1 + x_tau**n) - gamma * x
|
|
7
|
+
|
|
8
|
+
t = np.arange(0, t_max, dt)
|
|
9
|
+
x = np.zeros_like(t)
|
|
10
|
+
|
|
11
|
+
tau_idx = int(tau / dt)
|
|
12
|
+
x[:tau_idx] = x0
|
|
13
|
+
|
|
14
|
+
for i in range(tau_idx, len(t)):
|
|
15
|
+
x[i] = x[i - 1] + dt * f(x[i - 1], x[i - tau_idx])
|
|
16
|
+
|
|
17
|
+
return t, x
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from edmkit.tensor import Tensor, dtypes
|
|
4
|
+
from edmkit.util import pairwise_distance, topk
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def simplex_projection(
|
|
8
|
+
X: np.ndarray,
|
|
9
|
+
Y: np.ndarray,
|
|
10
|
+
query_points: np.ndarray,
|
|
11
|
+
):
|
|
12
|
+
"""
|
|
13
|
+
Perform simplex projection from `X` to `Y` using the nearest neighbors of the points specified by `query_points`.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
`X` : `np.ndarray`
|
|
18
|
+
The input data
|
|
19
|
+
`Y` : `np.ndarray`
|
|
20
|
+
The target data
|
|
21
|
+
`query_points` : `np.ndarray`
|
|
22
|
+
The query points for which to find the nearest neighbors in `X`.
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
predictions : `np.ndarray`
|
|
27
|
+
The predicted values based on the weighted mean of the nearest neighbors in `Y`.
|
|
28
|
+
|
|
29
|
+
Raises
|
|
30
|
+
------
|
|
31
|
+
AssertionError
|
|
32
|
+
- If the input arrays `X` and `Y` do not have the same number of points.
|
|
33
|
+
"""
|
|
34
|
+
assert X.shape[0] == Y.shape[0], f"X and Y must have the same length, got X.shape={X.shape} and Y.shape={Y.shape}"
|
|
35
|
+
|
|
36
|
+
D = pairwise_distance(Tensor(query_points, dtype=dtypes.float32), Tensor(X, dtype=dtypes.float32)).numpy()
|
|
37
|
+
D = np.sqrt(D)
|
|
38
|
+
|
|
39
|
+
k: int = X.shape[1] + 1
|
|
40
|
+
predictions = np.zeros(len(query_points))
|
|
41
|
+
|
|
42
|
+
for i in range(len(query_points)):
|
|
43
|
+
# find k nearest neighbors
|
|
44
|
+
indices, distances = topk(D[i], k, largest=False)
|
|
45
|
+
|
|
46
|
+
d_min = np.fmax(distances[0], 1e-6) # clamp to avoid division by zero
|
|
47
|
+
weights = np.exp(-distances / d_min)
|
|
48
|
+
|
|
49
|
+
predictions[i] = np.sum(weights * Y[indices]) / np.sum(weights)
|
|
50
|
+
|
|
51
|
+
return predictions
|
edmkit/smap.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from edmkit.tensor import Tensor, dtypes
|
|
4
|
+
from edmkit.util import pairwise_distance
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def smap(
|
|
8
|
+
X: np.ndarray,
|
|
9
|
+
Y: np.ndarray,
|
|
10
|
+
query_points: np.ndarray,
|
|
11
|
+
theta: float,
|
|
12
|
+
):
|
|
13
|
+
"""
|
|
14
|
+
Perform S-Map (local linear regression) from `X` to `Y`.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
`X` : `np.ndarray`
|
|
19
|
+
The input data
|
|
20
|
+
`Y` : `np.ndarray`
|
|
21
|
+
The target data
|
|
22
|
+
`query_points` : `np.ndarray`
|
|
23
|
+
The query points for which to make predictions.
|
|
24
|
+
`theta` : `float`
|
|
25
|
+
Locality parameter. (0: global linear, >0: local linear)
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
predictions : `np.ndarray`
|
|
30
|
+
The predicted values based on the weighted linear regression.
|
|
31
|
+
|
|
32
|
+
Raises
|
|
33
|
+
------
|
|
34
|
+
AssertionError
|
|
35
|
+
- If the input arrays `X` and `Y` do not have the same number of points.
|
|
36
|
+
"""
|
|
37
|
+
assert X.shape[0] == Y.shape[0], f"X and Y must have the same length, got X.shape={X.shape} and Y.shape={Y.shape}"
|
|
38
|
+
X = X[:, None] if X.ndim == 1 else X
|
|
39
|
+
Y = Y[:, None] if Y.ndim == 1 else Y
|
|
40
|
+
query_points = query_points[:, None] if query_points.ndim == 1 else query_points
|
|
41
|
+
|
|
42
|
+
D = pairwise_distance(Tensor(query_points, dtype=dtypes.float32), Tensor(X, dtype=dtypes.float32)).numpy()
|
|
43
|
+
D = np.sqrt(D)
|
|
44
|
+
|
|
45
|
+
N_pred = len(query_points)
|
|
46
|
+
predictions = np.zeros(N_pred)
|
|
47
|
+
|
|
48
|
+
X = np.insert(X, 0, 1, axis=1) # add intercept term
|
|
49
|
+
query_points = np.insert(query_points, 0, 1, axis=1) # add intercept term
|
|
50
|
+
|
|
51
|
+
for i in range(N_pred):
|
|
52
|
+
distances = D[i]
|
|
53
|
+
|
|
54
|
+
if theta == 0:
|
|
55
|
+
weights = np.ones(X.shape[0])
|
|
56
|
+
else:
|
|
57
|
+
d_mean = np.fmax(np.mean(distances), 1e-6) # clamp to avoid division by zero
|
|
58
|
+
weights = np.exp(-theta * distances / d_mean)
|
|
59
|
+
weights = weights[:, None]
|
|
60
|
+
|
|
61
|
+
A = weights * X # A.shape = (N, E+1)
|
|
62
|
+
B = weights * Y # B.shape = (N, E')
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
C, residuals, rank, s = np.linalg.lstsq(A, B)
|
|
66
|
+
except np.linalg.LinAlgError:
|
|
67
|
+
# If singular, fallback to pseudo-inverse
|
|
68
|
+
C = np.linalg.pinv(A) @ B
|
|
69
|
+
|
|
70
|
+
predictions[i] = query_points[i] @ C
|
|
71
|
+
|
|
72
|
+
return predictions
|
edmkit/tensor.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# ruff: noqa: F401
|
|
2
|
+
import platform
|
|
3
|
+
|
|
4
|
+
# workaround
|
|
5
|
+
if platform.system() == "Darwin" and platform.processor() != "arm":
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
os.environ["METAL_XCODE"] = "1"
|
|
9
|
+
os.environ["DISABLE_COMPILER_CACHE"] = "1"
|
|
10
|
+
|
|
11
|
+
print("Running on macOS, setting METAL_XCODE=1 and DISABLE_COMPILER_CACHE=1")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
from tinygrad import Tensor
|
|
15
|
+
from tinygrad.dtype import dtypes
|
edmkit/util.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from edmkit.tensor import Tensor
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def pad(As: list[np.ndarray]):
|
|
7
|
+
"""Pad the `np.ndarray` in `Xs` to merge them into a single `np.ndarray`.
|
|
8
|
+
|
|
9
|
+
Parameters
|
|
10
|
+
----------
|
|
11
|
+
`As` : `list` of `np.ndarray` of shape `(L, D_i)`
|
|
12
|
+
|
|
13
|
+
Returns
|
|
14
|
+
-------
|
|
15
|
+
Single `np.ndarray` of shape `(B, L, max(D))` where `B` is `len(As)`
|
|
16
|
+
|
|
17
|
+
Raises
|
|
18
|
+
------
|
|
19
|
+
AssertionError
|
|
20
|
+
- If any array in `As` is not 2D.
|
|
21
|
+
- If the first dimension of all arrays in `As` are not equal.
|
|
22
|
+
"""
|
|
23
|
+
assert all(A.ndim == 2 for A in As), f"All arrays must be 2D, got {[A.ndim for A in As]}"
|
|
24
|
+
assert all(A.shape[0] == As[0].shape[0] for A in As), f"All arrays must have the same length, got {[A.shape[0] for A in As]}"
|
|
25
|
+
|
|
26
|
+
B = len(As)
|
|
27
|
+
L = As[0].shape[0]
|
|
28
|
+
max_D = max(t.shape[-1] for t in As) # type: ignore
|
|
29
|
+
|
|
30
|
+
A = np.zeros((B, L, max_D), dtype=As[0].dtype)
|
|
31
|
+
for i, x in enumerate(As):
|
|
32
|
+
A[i, :, : x.shape[-1]] = x
|
|
33
|
+
|
|
34
|
+
return np.ascontiguousarray(A)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def pairwise_distance(A: Tensor, B: Tensor | None = None) -> Tensor:
|
|
38
|
+
"""Compute the pairwise squared Euclidean distance between points in `A` (or between points in `A` and `B`).
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
`A` : `Tensor` of shape `(L, D)` or `(B, L, D)`
|
|
43
|
+
- `B`: batch size
|
|
44
|
+
- `L`: number of points
|
|
45
|
+
- `D`: dimension of each point
|
|
46
|
+
`B` : `Tensor` of shape `(L', D)` or `(B, L', D)`
|
|
47
|
+
- `B`: batch size
|
|
48
|
+
- `L'`: number of points
|
|
49
|
+
- `D`: dimension of each point
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
When `A` is of shape `(L, D)`:
|
|
54
|
+
`Tensor` of shape `(L, L)` [or `(L, L')`] where the element at position `(i, j)` is the squared Euclidean distance between `A[i]` and `A[j]` [or between `A[i]` and `B[j]`].
|
|
55
|
+
When `A` is of shape `(B, L, D)`:
|
|
56
|
+
`Tensor` of shape `(B, L, L)` [or `(B, L, L')`] where the element at position `(b, i, j)` is the squared Euclidean distance between `A[b, i]` and `A[b, j]`.
|
|
57
|
+
|
|
58
|
+
Raises
|
|
59
|
+
------
|
|
60
|
+
AssertionError
|
|
61
|
+
- If `A` is not a 2D or 3D tensor.
|
|
62
|
+
- If `B` is not `None` and `A` and `B` have different number of dimensions.
|
|
63
|
+
"""
|
|
64
|
+
assert A.ndim == 2 or A.ndim == 3, f"A must be a 2D or 3D tensor, got A.ndim={A.ndim}"
|
|
65
|
+
assert B is None or (A.ndim == B.ndim), f"A and B must have the same number of dimensions, got A.ndim={A.ndim}, B.ndim={B.ndim}"
|
|
66
|
+
|
|
67
|
+
if B is None:
|
|
68
|
+
B = A
|
|
69
|
+
|
|
70
|
+
A_sq = A.pow(2).sum(-1, keepdim=True)
|
|
71
|
+
B_sq = B.pow(2).sum(-1, keepdim=True).transpose(-1, -2)
|
|
72
|
+
|
|
73
|
+
D = A_sq + B_sq - 2 * A.matmul(B.transpose(-1, -2))
|
|
74
|
+
|
|
75
|
+
return D.clamp(min_=0)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def dtw(A: Tensor, B: Tensor):
|
|
79
|
+
"""
|
|
80
|
+
Computes the Dynamic Time Warping (DTW) distance between two sequences `x` and `y`.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
`A` : Tensor of shape `(N,D)`
|
|
85
|
+
`B` : Tensor of shape `(M,D)`
|
|
86
|
+
|
|
87
|
+
Returns
|
|
88
|
+
-------
|
|
89
|
+
distance : float
|
|
90
|
+
"""
|
|
91
|
+
N = A.shape[0]
|
|
92
|
+
M = B.shape[0]
|
|
93
|
+
|
|
94
|
+
D: np.ndarray = pairwise_distance(A, B).numpy()
|
|
95
|
+
|
|
96
|
+
dp = np.full((N + 1, M + 1), np.inf)
|
|
97
|
+
dp[0, 0] = 0.0 # left-top corner
|
|
98
|
+
|
|
99
|
+
# Process the DP table along anti-diagonals
|
|
100
|
+
# Grouping by anti-diagonals allows for vectorized computation: i + j = k
|
|
101
|
+
for k in range(2, N + M + 1):
|
|
102
|
+
i_start = max(1, k - M)
|
|
103
|
+
i_end = min(N, k - 1)
|
|
104
|
+
if i_start > i_end:
|
|
105
|
+
continue
|
|
106
|
+
|
|
107
|
+
# i and j are vectors of indices that satisfy i + j = k
|
|
108
|
+
i = np.arange(i_start, i_end + 1)
|
|
109
|
+
j = k - i
|
|
110
|
+
|
|
111
|
+
# Vectorized version of dp[i,j] = D[i-1, j-1] + min(dp[i-1,j], dp[i,j-1], dp[i-1,j-1])
|
|
112
|
+
# top left top-left
|
|
113
|
+
min_prev = np.minimum(np.minimum(dp[i - 1, j], dp[i, j - 1]), dp[i - 1, j - 1])
|
|
114
|
+
dp[i, j] = D[i - 1, j - 1] + min_prev
|
|
115
|
+
|
|
116
|
+
return dp[N, M]
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def topk(x: np.ndarray, k: int, largest=True):
|
|
120
|
+
"""Find the `k` largest or smallest elements in `x`.
|
|
121
|
+
|
|
122
|
+
Parameters
|
|
123
|
+
----------
|
|
124
|
+
`x` : `np.ndarray` of shape (N,)
|
|
125
|
+
`k` : `int`
|
|
126
|
+
`largest` : `bool`
|
|
127
|
+
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
130
|
+
`indices` : `np.ndarray` of shape `(k,)`
|
|
131
|
+
`values` : `np.ndarray` of shape `(k,)`
|
|
132
|
+
|
|
133
|
+
Raises
|
|
134
|
+
------
|
|
135
|
+
AssertionError
|
|
136
|
+
- If `x` is not a 1D array.
|
|
137
|
+
- If `k` is not in the range `(0, len(x)]`.
|
|
138
|
+
|
|
139
|
+
Notes
|
|
140
|
+
-----
|
|
141
|
+
`values` are sorted in ascending order. (i.e. `values[0]` is the smallest value in `values`)
|
|
142
|
+
"""
|
|
143
|
+
assert x.ndim == 1, f"x must be a 1D array, got x.ndim={x.ndim}"
|
|
144
|
+
assert k > 0 and k <= len(x), f"k must satisfy 0 < k <= len(x), got k={k}, len(x)={len(x)}"
|
|
145
|
+
|
|
146
|
+
if largest:
|
|
147
|
+
indices = np.argpartition(-x, k - 1)[:k]
|
|
148
|
+
else:
|
|
149
|
+
indices = np.argpartition(x, k - 1)[:k]
|
|
150
|
+
|
|
151
|
+
argsort = np.argsort(x[indices], kind="stable")
|
|
152
|
+
|
|
153
|
+
indices = indices[argsort]
|
|
154
|
+
values = x[indices]
|
|
155
|
+
|
|
156
|
+
return indices, values
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def autocorrelation(x: np.ndarray, max_lag: int, step: int = 1):
|
|
160
|
+
"""
|
|
161
|
+
Computes the autocorrelation of a given 1D numpy array up to a specified maximum lag.
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
`x` : `np.ndarray` The input array for which to compute the autocorrelation.
|
|
166
|
+
`max_lag` : `int` The maximum lag up to which the autocorrelation is computed.
|
|
167
|
+
`step` : `int`, `optional` The step size for the lag. Default is 1.
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
`np.ndarray` of shape `(max_lag // step + 1,)` containing the autocorrelation values.
|
|
172
|
+
"""
|
|
173
|
+
x = x - np.mean(x)
|
|
174
|
+
|
|
175
|
+
n = len(x)
|
|
176
|
+
# next_pow2 is the next power of 2 greater than or equal to 2 * n - 1 for efficient FFT computation
|
|
177
|
+
next_pow2 = int(2 ** np.ceil(np.log2(2 * n - 1)))
|
|
178
|
+
padded = np.zeros(next_pow2)
|
|
179
|
+
padded[:n] = x
|
|
180
|
+
|
|
181
|
+
f = np.fft.fft(padded)
|
|
182
|
+
# Wiener-Khinchin theorem
|
|
183
|
+
acf = np.fft.ifft(f * np.conjugate(f)).real
|
|
184
|
+
acf = acf[:n]
|
|
185
|
+
|
|
186
|
+
# Normalization
|
|
187
|
+
acf = acf / (n * np.var(x))
|
|
188
|
+
|
|
189
|
+
lags = np.arange(0, min(max_lag, n), step)
|
|
190
|
+
return acf[lags]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: edmkit
|
|
3
|
+
Version: 0.0.0
|
|
4
|
+
Summary: Simple EDM (Empirical Dynamic Modeling) library
|
|
5
|
+
Author-email: FUJISHIGE TEMMA <tenma.x0@gmail.com>
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.13
|
|
8
|
+
Requires-Dist: numpy>=2.3.4
|
|
9
|
+
Requires-Dist: tinygrad>=0.11.0
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# edmkit
|
|
13
|
+
|
|
14
|
+
This library is a collection of tools and utilities that are useful for Empirical Data Modeling (EDM) and related tasks. The library is designed to be fast and lightweight, and easy to use.
|
|
15
|
+
|
|
16
|
+
::: warning
|
|
17
|
+
This library is still under intensive development so API may change in the future.
|
|
18
|
+
:::
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
To install the library, you can use pip:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install edmkit
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Or you can also use [uv](https://docs.astral.sh/uv/):
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
uv add edmkit
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Usage
|
|
35
|
+
|
|
36
|
+
Most of the functions accept and return `numpy` arrays or `edmkit.Tensor`(alias to `tinygrad.Tensor`).
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
edmkit/__init__.py,sha256=LmFQcNU-qPKJKy7KO_wNDhv_ce5uL_qvyDIiiq6-p7w,129
|
|
2
|
+
edmkit/ccm.py,sha256=6KL9wvP5NjFYURiUyJ100uUYcQXpp8j-r1utHcFiMjg,3159
|
|
3
|
+
edmkit/embedding.py,sha256=kKxhe5q7EQ7EsjIMCxkStMt_ZZNe2510CHs_pqP_bAQ,1834
|
|
4
|
+
edmkit/simplex_projection.py,sha256=q9RLLZm0_WNkF4LzsY5B8hUwTSry__gRMKDdVDOaNUE,1519
|
|
5
|
+
edmkit/smap.py,sha256=DQuJJyJmQmItzDDAV8kyM75v7M3Zmqzxio-YOiXhbC4,2133
|
|
6
|
+
edmkit/tensor.py,sha256=BL5KIVQQHwBK1DngXlJljRtjYElpRnp_8eKv82T709Q,362
|
|
7
|
+
edmkit/util.py,sha256=xmD2h_cpWIZfVnppLkdFfSZuAsb3MTyKD0i0dZvWVpo,5853
|
|
8
|
+
edmkit/generate/__init__.py,sha256=eltkZ54rCqYo1fFOizcZAPNIebBoQKBRq8KjSD2-VEM,137
|
|
9
|
+
edmkit/generate/double_pendulum.py,sha256=Hj9ZHakGcp1lZjwGFHjg7i6kJcJdoBqSQ4yoqTWI-ok,1310
|
|
10
|
+
edmkit/generate/lorenz.py,sha256=Mu0Dh8NVpSvnE3NtgjF6HCS9FdMSHOQqVhrhXNzE8UM,419
|
|
11
|
+
edmkit/generate/mackey_glass.py,sha256=oDG9qdDKp3Hbm0NcN3e_y9Yw6xBcekEKN5C99ofKz8c,420
|
|
12
|
+
edmkit-0.0.0.dist-info/METADATA,sha256=l5u88ztbuMi1Heu3HhptNJW40QxBuYLsyjmbdkYQhM0,892
|
|
13
|
+
edmkit-0.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
14
|
+
edmkit-0.0.0.dist-info/licenses/LICENSE,sha256=AUUYs0rQU_1YkfukMYuOHPTN33XjUc881t5Xl-u-i1k,1062
|
|
15
|
+
edmkit-0.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 temma
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|