fuzzytool 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fuzzytool/__init__.py +55 -0
- fuzzytool/anfis.py +141 -0
- fuzzytool/cluster.py +231 -0
- fuzzytool/datasets.py +104 -0
- fuzzytool/defuzz.py +84 -0
- fuzzytool/ftransform.py +79 -0
- fuzzytool/inference/__init__.py +6 -0
- fuzzytool/inference/mamdani.py +93 -0
- fuzzytool/inference/tsk.py +77 -0
- fuzzytool/membership.py +149 -0
- fuzzytool/norms.py +105 -0
- fuzzytool/rules.py +34 -0
- fuzzytool/sets.py +210 -0
- fuzzytool/type2/__init__.py +27 -0
- fuzzytool/type2/inference.py +122 -0
- fuzzytool/type2/reduction.py +83 -0
- fuzzytool/type2/sets.py +96 -0
- fuzzytool/viz.py +118 -0
- fuzzytool-0.1.0.dist-info/METADATA +126 -0
- fuzzytool-0.1.0.dist-info/RECORD +22 -0
- fuzzytool-0.1.0.dist-info/WHEEL +4 -0
- fuzzytool-0.1.0.dist-info/licenses/LICENSE +21 -0
fuzzytool/__init__.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""fuzzytool — a clean, extensible fuzzy-logic toolkit in pure Python + NumPy.
|
|
2
|
+
|
|
3
|
+
Quick start (a credit-risk premium system):
|
|
4
|
+
|
|
5
|
+
>>> import fuzzytool as fz
|
|
6
|
+
>>> score = fz.Variable("score", (300, 850), terms=["poor", "fair", "good", "excellent"])
|
|
7
|
+
>>> dti = fz.Variable("dti", (0, 50), terms=["low", "moderate", "high"])
|
|
8
|
+
>>> premium = fz.Variable("premium", (0, 12), terms=["low", "medium", "high"])
|
|
9
|
+
>>> sys = fz.Mamdani()
|
|
10
|
+
>>> _ = sys.rule(score["poor"] | dti["high"], premium["high"])
|
|
11
|
+
>>> _ = sys.rule(score["fair"] & dti["moderate"], premium["medium"])
|
|
12
|
+
>>> _ = sys.rule(score["good"] | score["excellent"], premium["low"])
|
|
13
|
+
>>> sys(score=800, dti=10) < sys(score=520, dti=42)
|
|
14
|
+
True
|
|
15
|
+
|
|
16
|
+
Everything is pluggable behind small Protocols (membership functions,
|
|
17
|
+
t-/s-norms, defuzzifiers): a new variant is a new callable, never a change to
|
|
18
|
+
the inference loop.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from . import cluster, datasets, defuzz, membership, norms, type2
|
|
22
|
+
from .anfis import ANFIS
|
|
23
|
+
from .cluster import fuzzy_cmeans, gustafson_kessel, possibilistic_cmeans
|
|
24
|
+
from .ftransform import FTransform
|
|
25
|
+
from .inference import TSK, Mamdani
|
|
26
|
+
from .membership import gauss, gbell, sigmoid, trap, tri
|
|
27
|
+
from .sets import Variable
|
|
28
|
+
from .type2 import (
|
|
29
|
+
IT2TSK,
|
|
30
|
+
IT2Mamdani,
|
|
31
|
+
it2,
|
|
32
|
+
it2_gauss_uncertain_mean,
|
|
33
|
+
it2_gauss_uncertain_std,
|
|
34
|
+
it2_scale,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
__version__ = "0.1.0"
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
"__version__",
|
|
41
|
+
"Variable",
|
|
42
|
+
"Mamdani",
|
|
43
|
+
"TSK",
|
|
44
|
+
# membership shortcuts
|
|
45
|
+
"tri", "trap", "gauss", "gbell", "sigmoid",
|
|
46
|
+
# interval type-2
|
|
47
|
+
"IT2Mamdani", "IT2TSK",
|
|
48
|
+
"it2", "it2_scale", "it2_gauss_uncertain_mean", "it2_gauss_uncertain_std",
|
|
49
|
+
# fuzzy clustering
|
|
50
|
+
"fuzzy_cmeans", "gustafson_kessel", "possibilistic_cmeans",
|
|
51
|
+
# learning & approximation
|
|
52
|
+
"ANFIS", "FTransform",
|
|
53
|
+
# submodules
|
|
54
|
+
"membership", "norms", "defuzz", "datasets", "type2", "cluster",
|
|
55
|
+
]
|
fuzzytool/anfis.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""ANFIS — an adaptive network-based fuzzy inference system (Jang, 1993).
|
|
2
|
+
|
|
3
|
+
ANFIS is a first-order Takagi-Sugeno system whose parameters are learned from
|
|
4
|
+
data. With a grid partition, each of the ``p`` inputs carries ``n_mf`` Gaussian
|
|
5
|
+
membership functions, giving ``n_mf ** p`` rules; rule ``i`` fires with the
|
|
6
|
+
product of its inputs' memberships and emits an affine function of the inputs.
|
|
7
|
+
|
|
8
|
+
Training uses Jang's **hybrid** scheme, one pass per epoch:
|
|
9
|
+
|
|
10
|
+
1. with the premise (Gaussian) parameters fixed, the consequent (affine)
|
|
11
|
+
parameters are solved in closed form by least squares — the output is linear
|
|
12
|
+
in them;
|
|
13
|
+
2. with the consequents fixed, the premise centers and widths take a
|
|
14
|
+
gradient-descent step on the mean squared error.
|
|
15
|
+
|
|
16
|
+
Pure NumPy; intended for low-dimensional problems (the rule count grows as
|
|
17
|
+
``n_mf ** p``).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import itertools
|
|
23
|
+
|
|
24
|
+
import numpy as np
|
|
25
|
+
|
|
26
|
+
_EPS = 1e-12
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ANFIS:
|
|
30
|
+
"""A trainable first-order Sugeno system over a grid partition.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
n_inputs: number of input features ``p``.
|
|
34
|
+
n_mf: Gaussian membership functions per input (rules = ``n_mf ** p``).
|
|
35
|
+
learning_rate: step size for the premise gradient updates.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, n_inputs: int, n_mf: int = 3,
|
|
39
|
+
learning_rate: float = 0.05) -> None:
|
|
40
|
+
if n_inputs < 1 or n_mf < 2:
|
|
41
|
+
raise ValueError("need n_inputs >= 1 and n_mf >= 2")
|
|
42
|
+
self.p = int(n_inputs)
|
|
43
|
+
self.n_mf = int(n_mf)
|
|
44
|
+
self.lr = float(learning_rate)
|
|
45
|
+
# Rule -> per-input MF index, shape (R, p); R = n_mf ** p.
|
|
46
|
+
self.rule_mf = np.array(list(itertools.product(range(n_mf), repeat=n_inputs)))
|
|
47
|
+
self.R = self.rule_mf.shape[0]
|
|
48
|
+
self.centers_: np.ndarray | None = None # (p, n_mf)
|
|
49
|
+
self.sigmas_: np.ndarray | None = None # (p, n_mf)
|
|
50
|
+
self.coeffs_: np.ndarray | None = None # (R, p + 1)
|
|
51
|
+
self.history_: list[float] = [] # RMSE per epoch
|
|
52
|
+
|
|
53
|
+
# --- internals --------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
def _init_premise(self, X: np.ndarray) -> None:
|
|
56
|
+
lo, hi = X.min(axis=0), X.max(axis=0)
|
|
57
|
+
span = np.where(hi > lo, hi - lo, 1.0)
|
|
58
|
+
self.centers_ = np.linspace(lo, hi, self.n_mf).T # (p, n_mf)
|
|
59
|
+
self.sigmas_ = np.tile((span / (self.n_mf - 1))[:, None], (1, self.n_mf))
|
|
60
|
+
|
|
61
|
+
def _firing(self, X: np.ndarray):
|
|
62
|
+
"""Return ``(mu, mu_sel, w, wn, W)`` for a batch ``X`` (n, p)."""
|
|
63
|
+
c, s = self.centers_, self.sigmas_
|
|
64
|
+
mu = np.exp(-0.5 * ((X[:, :, None] - c[None]) / s[None]) ** 2) # (n, p, M)
|
|
65
|
+
j = np.broadcast_to(np.arange(self.p), (self.R, self.p))
|
|
66
|
+
mu_sel = mu[:, j, self.rule_mf] # (n, R, p)
|
|
67
|
+
w = mu_sel.prod(axis=2) # (n, R)
|
|
68
|
+
W = w.sum(axis=1, keepdims=True) + _EPS
|
|
69
|
+
return mu, mu_sel, w, w / W, W
|
|
70
|
+
|
|
71
|
+
def _lse_consequents(self, X: np.ndarray, y: np.ndarray, wn: np.ndarray) -> None:
|
|
72
|
+
n = X.shape[0]
|
|
73
|
+
xc = np.hstack([X, np.ones((n, 1))]) # (n, p+1)
|
|
74
|
+
phi = (wn[:, :, None] * xc[:, None, :]).reshape(n, self.R * (self.p + 1))
|
|
75
|
+
theta, *_ = np.linalg.lstsq(phi, y, rcond=None)
|
|
76
|
+
self.coeffs_ = theta.reshape(self.R, self.p + 1)
|
|
77
|
+
|
|
78
|
+
def _outputs(self, X: np.ndarray, wn: np.ndarray):
|
|
79
|
+
xc = np.hstack([X, np.ones((X.shape[0], 1))])
|
|
80
|
+
f = xc @ self.coeffs_.T # (n, R)
|
|
81
|
+
return f, (wn * f).sum(axis=1) # (n, R), (n,)
|
|
82
|
+
|
|
83
|
+
# --- public API -------------------------------------------------------
|
|
84
|
+
|
|
85
|
+
def fit(self, X, y, epochs: int = 100) -> ANFIS:
|
|
86
|
+
"""Train on ``X`` (n, p) and targets ``y`` (n,) for ``epochs`` epochs."""
|
|
87
|
+
X = np.asarray(X, dtype=float)
|
|
88
|
+
y = np.asarray(y, dtype=float).ravel()
|
|
89
|
+
if X.ndim != 2 or X.shape[1] != self.p:
|
|
90
|
+
raise ValueError(f"X must be 2-D with {self.p} columns")
|
|
91
|
+
if X.shape[0] != y.shape[0]:
|
|
92
|
+
raise ValueError("X and y have inconsistent lengths")
|
|
93
|
+
n = X.shape[0]
|
|
94
|
+
self._init_premise(X)
|
|
95
|
+
self.history_ = []
|
|
96
|
+
|
|
97
|
+
for _ in range(epochs):
|
|
98
|
+
mu, mu_sel, w, wn, W = self._firing(X)
|
|
99
|
+
self._lse_consequents(X, y, wn) # closed-form consequents
|
|
100
|
+
f, y_pred = self._outputs(X, wn)
|
|
101
|
+
err = y_pred - y
|
|
102
|
+
self.history_.append(float(np.sqrt(np.mean(err ** 2))))
|
|
103
|
+
|
|
104
|
+
# Backprop the MSE to the premise Gaussians.
|
|
105
|
+
dL_dw = (err[:, None] * (f - y_pred[:, None]) / W) / n # (n, R)
|
|
106
|
+
ratio = w[:, :, None] / (mu_sel + _EPS) # (n, R, p)
|
|
107
|
+
g = dL_dw[:, :, None] * ratio # (n, R, p)
|
|
108
|
+
grad_mu = np.zeros_like(mu) # (n, p, M)
|
|
109
|
+
for jj in range(self.p):
|
|
110
|
+
kj = self.rule_mf[:, jj]
|
|
111
|
+
for k in range(self.n_mf):
|
|
112
|
+
mask = kj == k
|
|
113
|
+
if mask.any():
|
|
114
|
+
grad_mu[:, jj, k] = g[:, mask, jj].sum(axis=1)
|
|
115
|
+
|
|
116
|
+
diff = X[:, :, None] - self.centers_[None] # (n, p, M)
|
|
117
|
+
grad_c = (grad_mu * mu * diff / self.sigmas_ ** 2).sum(axis=0)
|
|
118
|
+
grad_s = (grad_mu * mu * diff ** 2 / self.sigmas_ ** 3).sum(axis=0)
|
|
119
|
+
self.centers_ = self.centers_ - self.lr * grad_c
|
|
120
|
+
self.sigmas_ = np.maximum(self.sigmas_ - self.lr * grad_s, 1e-3)
|
|
121
|
+
|
|
122
|
+
# Refit consequents to the final premise parameters.
|
|
123
|
+
*_, wn, _ = self._firing(X)
|
|
124
|
+
self._lse_consequents(X, y, wn)
|
|
125
|
+
return self
|
|
126
|
+
|
|
127
|
+
def predict(self, X) -> np.ndarray:
|
|
128
|
+
"""Predict outputs for ``X`` (n, p). Requires a prior :meth:`fit`."""
|
|
129
|
+
if self.coeffs_ is None:
|
|
130
|
+
raise RuntimeError("call fit before predict")
|
|
131
|
+
X = np.asarray(X, dtype=float)
|
|
132
|
+
if X.ndim != 2 or X.shape[1] != self.p:
|
|
133
|
+
raise ValueError(f"X must be 2-D with {self.p} columns")
|
|
134
|
+
*_, wn, _ = self._firing(X)
|
|
135
|
+
return self._outputs(X, wn)[1]
|
|
136
|
+
|
|
137
|
+
def __repr__(self) -> str:
|
|
138
|
+
return f"ANFIS(n_inputs={self.p}, n_mf={self.n_mf}, rules={self.R})"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
__all__ = ["ANFIS"]
|
fuzzytool/cluster.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""Fuzzy clustering.
|
|
2
|
+
|
|
3
|
+
Unlike crisp k-means, fuzzy clustering lets each sample belong to several
|
|
4
|
+
clusters with graded membership. This module provides three algorithms and a
|
|
5
|
+
small set of validity metrics, all on plain NumPy arrays of shape
|
|
6
|
+
``(n_samples, n_features)``:
|
|
7
|
+
|
|
8
|
+
* :func:`fuzzy_cmeans` — Bezdek's FCM (spherical clusters, Euclidean norm).
|
|
9
|
+
* :func:`gustafson_kessel` — GK, an adaptive norm per cluster that captures
|
|
10
|
+
ellipsoidal shapes.
|
|
11
|
+
* :func:`possibilistic_cmeans` — PCM, which drops the "memberships sum to 1"
|
|
12
|
+
constraint so outliers get low typicality in every cluster.
|
|
13
|
+
|
|
14
|
+
Each returns a :class:`ClusterResult`. Every algorithm accepts a ``seed`` for
|
|
15
|
+
reproducibility.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
|
|
24
|
+
_EPS = 1e-12
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class ClusterResult:
|
|
29
|
+
"""Outcome of a fuzzy clustering run.
|
|
30
|
+
|
|
31
|
+
Attributes:
|
|
32
|
+
centers: cluster prototypes, shape ``(c, n_features)``.
|
|
33
|
+
u: membership/typicality matrix, shape ``(c, n_samples)``.
|
|
34
|
+
n_iter: iterations run until convergence.
|
|
35
|
+
objective: final value of the algorithm's objective function.
|
|
36
|
+
labels: hard assignment ``argmax`` over ``u``, shape ``(n_samples,)``.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
centers: np.ndarray
|
|
40
|
+
u: np.ndarray
|
|
41
|
+
n_iter: int
|
|
42
|
+
objective: float
|
|
43
|
+
labels: np.ndarray = field(init=False)
|
|
44
|
+
|
|
45
|
+
def __post_init__(self) -> None:
|
|
46
|
+
self.labels = self.u.argmax(axis=0)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _check(X: np.ndarray, c: int, m: float) -> np.ndarray:
|
|
50
|
+
X = np.asarray(X, dtype=float)
|
|
51
|
+
if X.ndim != 2:
|
|
52
|
+
raise ValueError("X must be 2-D (n_samples, n_features)")
|
|
53
|
+
if not 1 <= c <= X.shape[0]:
|
|
54
|
+
raise ValueError(f"need 1 <= c <= n_samples, got c={c}, n={X.shape[0]}")
|
|
55
|
+
if m <= 1.0:
|
|
56
|
+
raise ValueError(f"fuzziness m must be > 1, got {m}")
|
|
57
|
+
return X
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _init_u(c: int, n: int, rng: np.random.Generator) -> np.ndarray:
|
|
61
|
+
u = rng.random((c, n))
|
|
62
|
+
return u / u.sum(axis=0, keepdims=True)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _centers(X: np.ndarray, u: np.ndarray, m: float) -> np.ndarray:
|
|
66
|
+
um = u ** m
|
|
67
|
+
return (um @ X) / np.fmax(um.sum(axis=1, keepdims=True), _EPS)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _dist2(X: np.ndarray, centers: np.ndarray) -> np.ndarray:
|
|
71
|
+
"""Squared Euclidean distance, shape ``(c, n)``."""
|
|
72
|
+
return np.sum((centers[:, None, :] - X[None, :, :]) ** 2, axis=2)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _update_u(d2: np.ndarray, m: float) -> np.ndarray:
|
|
76
|
+
d2 = np.fmax(d2, _EPS)
|
|
77
|
+
inv = d2 ** (-1.0 / (m - 1.0))
|
|
78
|
+
return inv / inv.sum(axis=0, keepdims=True)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def fuzzy_cmeans(X: np.ndarray, c: int, m: float = 2.0, max_iter: int = 150,
|
|
82
|
+
tol: float = 1e-5, seed: int | None = None) -> ClusterResult:
|
|
83
|
+
"""Bezdek's fuzzy c-means.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
X: data, shape ``(n_samples, n_features)``.
|
|
87
|
+
c: number of clusters.
|
|
88
|
+
m: fuzziness exponent (``> 1``; ``2.0`` is standard).
|
|
89
|
+
max_iter: maximum iterations.
|
|
90
|
+
tol: stop when the membership matrix changes by less than this (max-norm).
|
|
91
|
+
seed: RNG seed for the membership initialization.
|
|
92
|
+
"""
|
|
93
|
+
X = _check(X, c, m)
|
|
94
|
+
rng = np.random.default_rng(seed)
|
|
95
|
+
u = _init_u(c, X.shape[0], rng)
|
|
96
|
+
centers = _centers(X, u, m)
|
|
97
|
+
j = np.inf
|
|
98
|
+
it = 0
|
|
99
|
+
while it < max_iter:
|
|
100
|
+
it += 1
|
|
101
|
+
centers = _centers(X, u, m)
|
|
102
|
+
d2 = _dist2(X, centers)
|
|
103
|
+
u_new = _update_u(d2, m)
|
|
104
|
+
j = float((u_new ** m * d2).sum())
|
|
105
|
+
if np.abs(u_new - u).max() < tol:
|
|
106
|
+
u = u_new
|
|
107
|
+
break
|
|
108
|
+
u = u_new
|
|
109
|
+
return ClusterResult(centers, u, it, j)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def gustafson_kessel(X: np.ndarray, c: int, m: float = 2.0, max_iter: int = 150,
|
|
113
|
+
tol: float = 1e-5, seed: int | None = None,
|
|
114
|
+
reg: float = 1e-10) -> ClusterResult:
|
|
115
|
+
"""Gustafson-Kessel clustering (adaptive per-cluster Mahalanobis norm).
|
|
116
|
+
|
|
117
|
+
Each cluster learns a covariance-shaped, unit-determinant norm, so GK fits
|
|
118
|
+
ellipsoidal clusters that FCM (fixed spherical norm) cannot.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
X: data, shape ``(n_samples, n_features)``.
|
|
122
|
+
c: number of clusters.
|
|
123
|
+
m: fuzziness exponent (``> 1``).
|
|
124
|
+
max_iter: maximum iterations.
|
|
125
|
+
tol: convergence threshold on the membership matrix (max-norm).
|
|
126
|
+
seed: RNG seed for initialization.
|
|
127
|
+
reg: ridge added to each fuzzy covariance for numerical stability.
|
|
128
|
+
"""
|
|
129
|
+
X = _check(X, c, m)
|
|
130
|
+
n, p = X.shape
|
|
131
|
+
rng = np.random.default_rng(seed)
|
|
132
|
+
u = _init_u(c, n, rng)
|
|
133
|
+
centers = _centers(X, u, m)
|
|
134
|
+
eye = np.eye(p)
|
|
135
|
+
j = np.inf
|
|
136
|
+
it = 0
|
|
137
|
+
while it < max_iter:
|
|
138
|
+
it += 1
|
|
139
|
+
centers = _centers(X, u, m)
|
|
140
|
+
um = u ** m
|
|
141
|
+
d2 = np.empty((c, n))
|
|
142
|
+
for i in range(c):
|
|
143
|
+
diff = X - centers[i] # (n, p)
|
|
144
|
+
cov = np.einsum("k,kp,kq->pq", um[i], diff, diff) / max(um[i].sum(), _EPS)
|
|
145
|
+
cov = cov + reg * eye
|
|
146
|
+
det = np.linalg.det(cov)
|
|
147
|
+
if det <= 0 or not np.isfinite(det):
|
|
148
|
+
a = eye # fall back to Euclidean
|
|
149
|
+
else:
|
|
150
|
+
a = (det ** (1.0 / p)) * np.linalg.inv(cov)
|
|
151
|
+
d2[i] = np.einsum("kp,pq,kq->k", diff, a, diff)
|
|
152
|
+
u_new = _update_u(d2, m)
|
|
153
|
+
j = float((u_new ** m * d2).sum())
|
|
154
|
+
if np.abs(u_new - u).max() < tol:
|
|
155
|
+
u = u_new
|
|
156
|
+
break
|
|
157
|
+
u = u_new
|
|
158
|
+
return ClusterResult(centers, u, it, j)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def possibilistic_cmeans(X: np.ndarray, c: int, m: float = 2.0, max_iter: int = 150,
|
|
162
|
+
tol: float = 1e-5, seed: int | None = None,
|
|
163
|
+
k: float = 1.0) -> ClusterResult:
|
|
164
|
+
"""Possibilistic c-means (Krishnapuram-Keller).
|
|
165
|
+
|
|
166
|
+
PCM drops the probabilistic constraint that memberships sum to 1: each value
|
|
167
|
+
is a *typicality* in ``[0, 1]``, so noise points score low everywhere. It is
|
|
168
|
+
initialized from an FCM run, which also fixes each cluster's scale ``eta``.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
X: data, shape ``(n_samples, n_features)``.
|
|
172
|
+
c: number of clusters.
|
|
173
|
+
m: fuzziness exponent (``> 1``).
|
|
174
|
+
max_iter: maximum iterations.
|
|
175
|
+
tol: convergence threshold on the typicality matrix (max-norm).
|
|
176
|
+
seed: RNG seed (passed to the initializing FCM).
|
|
177
|
+
k: scale multiplier for the bandwidth ``eta`` (``1.0`` is standard).
|
|
178
|
+
"""
|
|
179
|
+
X = _check(X, c, m)
|
|
180
|
+
init = fuzzy_cmeans(X, c, m=m, max_iter=max_iter, tol=tol, seed=seed)
|
|
181
|
+
centers, u = init.centers, init.u
|
|
182
|
+
d2 = _dist2(X, centers)
|
|
183
|
+
um = u ** m
|
|
184
|
+
eta = k * (um * d2).sum(axis=1) / np.fmax(um.sum(axis=1), _EPS) # (c,)
|
|
185
|
+
eta = np.fmax(eta, _EPS)
|
|
186
|
+
j = np.inf
|
|
187
|
+
it = 0
|
|
188
|
+
while it < max_iter:
|
|
189
|
+
it += 1
|
|
190
|
+
d2 = _dist2(X, centers)
|
|
191
|
+
t = 1.0 / (1.0 + (d2 / eta[:, None]) ** (1.0 / (m - 1.0)))
|
|
192
|
+
centers_new = _centers(X, t, m)
|
|
193
|
+
j = float((t ** m * d2).sum() + (eta[:, None] * (1.0 - t) ** m).sum())
|
|
194
|
+
if np.abs(centers_new - centers).max() < tol:
|
|
195
|
+
centers = centers_new
|
|
196
|
+
u = t
|
|
197
|
+
break
|
|
198
|
+
centers, u = centers_new, t
|
|
199
|
+
return ClusterResult(centers, u, it, j)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# --- validity metrics ------------------------------------------------------
|
|
203
|
+
|
|
204
|
+
def partition_coefficient(u: np.ndarray) -> float:
|
|
205
|
+
"""Bezdek's partition coefficient in ``(1/c, 1]``; higher = crisper."""
|
|
206
|
+
u = np.asarray(u, dtype=float)
|
|
207
|
+
return float((u ** 2).sum() / u.shape[1])
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def partition_entropy(u: np.ndarray) -> float:
|
|
211
|
+
"""Partition entropy in ``[0, log c)``; lower = crisper."""
|
|
212
|
+
u = np.asarray(u, dtype=float)
|
|
213
|
+
return float(-(u * np.log(np.fmax(u, _EPS))).sum() / u.shape[1])
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def xie_beni(X, centers: np.ndarray, u: np.ndarray, m: float = 2.0) -> float:
|
|
217
|
+
"""Xie-Beni index (compactness / separation); lower is better."""
|
|
218
|
+
X = np.asarray(X, dtype=float)
|
|
219
|
+
centers = np.asarray(centers, dtype=float)
|
|
220
|
+
u = np.asarray(u, dtype=float)
|
|
221
|
+
compactness = (u ** m * _dist2(X, centers)).sum()
|
|
222
|
+
cc = _dist2(centers, centers)
|
|
223
|
+
sep = cc[~np.eye(centers.shape[0], dtype=bool)].min()
|
|
224
|
+
return float(compactness / (X.shape[0] * np.fmax(sep, _EPS)))
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
__all__ = [
|
|
228
|
+
"ClusterResult",
|
|
229
|
+
"fuzzy_cmeans", "gustafson_kessel", "possibilistic_cmeans",
|
|
230
|
+
"partition_coefficient", "partition_entropy", "xie_beni",
|
|
231
|
+
]
|
fuzzytool/datasets.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Ready-made example systems."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from . import membership as mf
|
|
8
|
+
from .inference import Mamdani
|
|
9
|
+
from .sets import Variable
|
|
10
|
+
from .type2 import IT2Mamdani, it2_gauss_uncertain_mean
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def credit_risk() -> tuple[Mamdani, Variable, Variable, Variable]:
|
|
14
|
+
"""A credit-risk-premium Mamdani system.
|
|
15
|
+
|
|
16
|
+
Given a borrower's credit ``score`` (300-850) and ``dti`` (debt-to-income
|
|
17
|
+
ratio, 0-50%), recommend the ``premium`` (risk points, 0-12) a lender should
|
|
18
|
+
add on top of its base interest rate. Returns ``(system, score, dti,
|
|
19
|
+
premium)`` so callers can inspect the variables (e.g. for plotting).
|
|
20
|
+
|
|
21
|
+
>>> sys, score, dti, premium = credit_risk()
|
|
22
|
+
>>> safe = sys(score=800, dti=10) # great score, low leverage
|
|
23
|
+
>>> risky = sys(score=520, dti=42) # poor score, high leverage
|
|
24
|
+
>>> safe < risky
|
|
25
|
+
True
|
|
26
|
+
"""
|
|
27
|
+
score = Variable("score", (300, 850))
|
|
28
|
+
score["poor"] = mf.trap(300, 300, 500, 600)
|
|
29
|
+
score["fair"] = mf.tri(560, 660, 730)
|
|
30
|
+
score["good"] = mf.tri(690, 760, 810)
|
|
31
|
+
score["excellent"] = mf.trap(780, 830, 850, 850)
|
|
32
|
+
|
|
33
|
+
dti = Variable("dti", (0, 50))
|
|
34
|
+
dti["low"] = mf.trap(0, 0, 15, 25)
|
|
35
|
+
dti["moderate"] = mf.tri(20, 30, 40)
|
|
36
|
+
dti["high"] = mf.trap(35, 45, 50, 50)
|
|
37
|
+
|
|
38
|
+
premium = Variable("premium", (0, 12))
|
|
39
|
+
premium["low"] = mf.tri(0, 1.5, 4)
|
|
40
|
+
premium["medium"] = mf.tri(3, 6, 9)
|
|
41
|
+
premium["high"] = mf.tri(8, 10.5, 12)
|
|
42
|
+
|
|
43
|
+
sys = Mamdani()
|
|
44
|
+
sys.rule(score["poor"] | dti["high"], premium["high"])
|
|
45
|
+
sys.rule(score["fair"] & dti["moderate"], premium["medium"])
|
|
46
|
+
sys.rule(score["good"] | score["excellent"], premium["low"])
|
|
47
|
+
return sys, score, dti, premium
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def credit_risk_it2() -> tuple[IT2Mamdani, Variable, Variable, Variable]:
|
|
51
|
+
"""An interval type-2 version of :func:`credit_risk`.
|
|
52
|
+
|
|
53
|
+
The score and premium terms carry a footprint of uncertainty (uncertain
|
|
54
|
+
Gaussian means), modeling vagueness in how a "good" score or a "low" premium
|
|
55
|
+
is defined. Returns ``(system, score, dti, premium)``.
|
|
56
|
+
|
|
57
|
+
>>> sys, score, dti, premium = credit_risk_it2()
|
|
58
|
+
>>> sys(score=800, dti=10) < sys(score=520, dti=42)
|
|
59
|
+
True
|
|
60
|
+
"""
|
|
61
|
+
score = Variable("score", (300, 850))
|
|
62
|
+
score["poor"] = it2_gauss_uncertain_mean(420, 480, 70)
|
|
63
|
+
score["fair"] = it2_gauss_uncertain_mean(630, 680, 60)
|
|
64
|
+
score["good"] = it2_gauss_uncertain_mean(740, 780, 50)
|
|
65
|
+
score["excellent"] = it2_gauss_uncertain_mean(810, 840, 40)
|
|
66
|
+
|
|
67
|
+
dti = Variable("dti", (0, 50))
|
|
68
|
+
dti["low"] = it2_gauss_uncertain_mean(6, 12, 8)
|
|
69
|
+
dti["moderate"] = it2_gauss_uncertain_mean(27, 33, 7)
|
|
70
|
+
dti["high"] = it2_gauss_uncertain_mean(42, 48, 7)
|
|
71
|
+
|
|
72
|
+
premium = Variable("premium", (0, 12))
|
|
73
|
+
premium["low"] = it2_gauss_uncertain_mean(1.5, 2.5, 1.5)
|
|
74
|
+
premium["medium"] = it2_gauss_uncertain_mean(5.5, 6.5, 1.5)
|
|
75
|
+
premium["high"] = it2_gauss_uncertain_mean(9.5, 10.5, 1.5)
|
|
76
|
+
|
|
77
|
+
sys = IT2Mamdani()
|
|
78
|
+
sys.rule(score["poor"] | dti["high"], premium["high"])
|
|
79
|
+
sys.rule(score["fair"] & dti["moderate"], premium["medium"])
|
|
80
|
+
sys.rule(score["good"] | score["excellent"], premium["low"])
|
|
81
|
+
return sys, score, dti, premium
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def make_blobs(
|
|
85
|
+
centers=((0.0, 0.0), (6.0, 6.0), (0.0, 6.0)),
|
|
86
|
+
n_per: int = 60,
|
|
87
|
+
spread: float = 0.7,
|
|
88
|
+
seed: int | None = 0,
|
|
89
|
+
) -> np.ndarray:
|
|
90
|
+
"""Synthetic isotropic Gaussian blobs for clustering demos and tests.
|
|
91
|
+
|
|
92
|
+
Returns the stacked data ``X`` of shape ``(len(centers) * n_per, n_features)``.
|
|
93
|
+
|
|
94
|
+
>>> X = make_blobs(seed=0)
|
|
95
|
+
>>> X.shape
|
|
96
|
+
(180, 2)
|
|
97
|
+
"""
|
|
98
|
+
rng = np.random.default_rng(seed)
|
|
99
|
+
centers = np.asarray(centers, dtype=float)
|
|
100
|
+
blobs = [rng.normal(c, spread, size=(n_per, centers.shape[1])) for c in centers]
|
|
101
|
+
return np.vstack(blobs)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
__all__ = ["credit_risk", "credit_risk_it2", "make_blobs"]
|
fuzzytool/defuzz.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
"""Defuzzification: collapse an aggregated output set into a crisp value.
|
|
2
|
+
|
|
3
|
+
Each defuzzifier takes the discretized universe ``x`` and the aggregated
|
|
4
|
+
membership ``y`` (same shape) and returns a scalar. They are looked up by name
|
|
5
|
+
through :func:`get_defuzzifier`, so adding a method = registering a function.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Callable
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
# np.trapz was renamed to np.trapezoid in NumPy 2.0; support both.
|
|
15
|
+
_trapezoid = np.trapezoid if hasattr(np, "trapezoid") else np.trapz
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def centroid(x: np.ndarray, y: np.ndarray) -> float:
|
|
19
|
+
"""Center of gravity of the area under ``y`` (the most common choice)."""
|
|
20
|
+
total = _trapezoid(y, x)
|
|
21
|
+
if total == 0:
|
|
22
|
+
return float(x[len(x) // 2]) # no rule fired: fall back to mid-universe
|
|
23
|
+
return float(_trapezoid(x * y, x) / total)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def bisector(x: np.ndarray, y: np.ndarray) -> float:
|
|
27
|
+
"""Abscissa that splits the area under ``y`` into two equal halves."""
|
|
28
|
+
# Cumulative area via the trapezoidal rule.
|
|
29
|
+
dx = np.diff(x)
|
|
30
|
+
seg = (y[:-1] + y[1:]) / 2.0 * dx
|
|
31
|
+
total = seg.sum()
|
|
32
|
+
if total == 0:
|
|
33
|
+
return float(x[len(x) // 2])
|
|
34
|
+
cum = np.concatenate([[0.0], np.cumsum(seg)])
|
|
35
|
+
idx = int(np.searchsorted(cum, total / 2.0))
|
|
36
|
+
return float(x[min(idx, len(x) - 1)])
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def mom(x: np.ndarray, y: np.ndarray) -> float:
|
|
40
|
+
"""Mean of maxima."""
|
|
41
|
+
peak = y.max()
|
|
42
|
+
if peak == 0:
|
|
43
|
+
return float(x[len(x) // 2])
|
|
44
|
+
return float(x[np.isclose(y, peak)].mean())
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def som(x: np.ndarray, y: np.ndarray) -> float:
|
|
48
|
+
"""Smallest of maxima."""
|
|
49
|
+
peak = y.max()
|
|
50
|
+
if peak == 0:
|
|
51
|
+
return float(x[len(x) // 2])
|
|
52
|
+
return float(x[np.isclose(y, peak)].min())
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def lom(x: np.ndarray, y: np.ndarray) -> float:
|
|
56
|
+
"""Largest of maxima."""
|
|
57
|
+
peak = y.max()
|
|
58
|
+
if peak == 0:
|
|
59
|
+
return float(x[len(x) // 2])
|
|
60
|
+
return float(x[np.isclose(y, peak)].max())
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
_METHODS: dict[str, Callable[[np.ndarray, np.ndarray], float]] = {
|
|
64
|
+
"centroid": centroid,
|
|
65
|
+
"bisector": bisector,
|
|
66
|
+
"mom": mom,
|
|
67
|
+
"som": som,
|
|
68
|
+
"lom": lom,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_defuzzifier(name: str | Callable) -> Callable:
|
|
73
|
+
"""Resolve a defuzzifier by name (or pass a callable through unchanged)."""
|
|
74
|
+
if callable(name):
|
|
75
|
+
return name
|
|
76
|
+
try:
|
|
77
|
+
return _METHODS[name]
|
|
78
|
+
except KeyError:
|
|
79
|
+
raise ValueError(
|
|
80
|
+
f"unknown defuzzifier {name!r}; options: {sorted(_METHODS)}"
|
|
81
|
+
) from None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
__all__ = ["centroid", "bisector", "mom", "som", "lom", "get_defuzzifier"]
|