@icyfenix-dmla/cli 2026.5.2-7 → 2026.5.3-821
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -6
- package/scripts/build.js +44 -11
- package/shared_modules/__init__.py +10 -0
- package/shared_modules/bayesian/__init__.py +6 -0
- package/shared_modules/bayesian/bayesian_network.py +105 -0
- package/shared_modules/bayesian/gaussian_mixture_model.py +141 -0
- package/shared_modules/bayesian/gaussian_mixturemodel.py +141 -0
- package/shared_modules/bayesian/multinomial_naive_bayes.py +74 -0
- package/shared_modules/bayesian/simple_bayesian_network.py +99 -0
- package/shared_modules/bayesian/simple_bayesiannetwork.py +99 -0
- package/shared_modules/cnn/__init__.py +5 -0
- package/shared_modules/cnn/alex_net.py +65 -0
- package/shared_modules/cnn/alexnet.py +65 -0
- package/shared_modules/cnn/t_e_r_m1.py +65 -0
- package/shared_modules/cnn/tiny_image_net_dataset.py +67 -0
- package/shared_modules/cnn/tiny_imagenet_dataset.py +67 -0
- package/shared_modules/cnn/tiny_imagenetdataset.py +67 -0
- package/shared_modules/cnn/tinyimagenetdataset.py +67 -0
- package/shared_modules/linear/__init__.py +6 -0
- package/shared_modules/linear/lasso_regression.py +93 -0
- package/shared_modules/linear/logistic_regression.py +78 -0
- package/shared_modules/linear/naive_bayes.py +141 -0
- package/shared_modules/linear/ridge_regression.py +58 -0
- package/shared_modules/neural/__init__.py +4 -0
- package/shared_modules/neural/perceptron.py +80 -0
- package/shared_modules/svm/__init__.py +5 -0
- package/shared_modules/svm/kernel_s_v_m.py +98 -0
- package/shared_modules/svm/kernel_svm.py +98 -0
- package/shared_modules/svm/simple_s_v_m.py +111 -0
- package/shared_modules/svm/simple_svm.py +111 -0
- package/shared_modules/tree/__init__.py +6 -0
- package/shared_modules/tree/ada_boost.py +77 -0
- package/shared_modules/tree/decision_tree_classifier.py +235 -0
- package/shared_modules/tree/decision_treeclassifier.py +235 -0
- package/shared_modules/tree/random_forest_classifier.py +88 -0
- package/shared_modules/tree/random_forestclassifier.py +88 -0
- package/shared_modules/unsupervised/__init__.py +5 -0
- package/shared_modules/unsupervised/k_means.py +127 -0
- package/shared_modules/unsupervised/kmeans.py +127 -0
- package/shared_modules/unsupervised/p_c_a.py +111 -0
- package/shared_modules/unsupervised/pca.py +111 -0
- package/src/commands/data.js +823 -0
- package/src/commands/server.js +209 -4
- package/src/index.js +23 -2
- package/src/server/routes/sandbox.js +70 -3
- package/src/server/sandbox.js +87 -11
- package/version.json +4 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# LassoRegression 类定义
|
|
2
|
+
# 从文档自动提取生成
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
class LassoRegression:
|
|
7
|
+
"""
|
|
8
|
+
Lasso回归实现(L1正则化)
|
|
9
|
+
使用坐标下降算法
|
|
10
|
+
|
|
11
|
+
适用于:
|
|
12
|
+
1. 需要自动特征选择
|
|
13
|
+
2. 特征数量多,部分特征可能无关
|
|
14
|
+
3. 追求稀疏、可解释的模型
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, alpha=1.0, n_iterations=1000, tol=1e-4):
|
|
18
|
+
self.alpha = alpha # 正则化强度λ
|
|
19
|
+
self.n_iterations = n_iterations # 最大迭代次数
|
|
20
|
+
self.tol = tol # 收敛阈值
|
|
21
|
+
self.coef_ = None
|
|
22
|
+
self.intercept_ = None
|
|
23
|
+
|
|
24
|
+
def soft_threshold(self, rho, lambda_):
|
|
25
|
+
"""
|
|
26
|
+
软阈值函数(Lasso的核心操作)
|
|
27
|
+
|
|
28
|
+
将参数"推向"零,可能精确到达零
|
|
29
|
+
"""
|
|
30
|
+
if rho < -lambda_:
|
|
31
|
+
return rho + lambda_
|
|
32
|
+
elif rho > lambda_:
|
|
33
|
+
return rho - lambda_
|
|
34
|
+
else:
|
|
35
|
+
return 0.0
|
|
36
|
+
|
|
37
|
+
def fit(self, X, y):
|
|
38
|
+
"""
|
|
39
|
+
训练模型(坐标下降)
|
|
40
|
+
|
|
41
|
+
每次更新一个参数,轮流迭代直至收敛
|
|
42
|
+
"""
|
|
43
|
+
n_samples, n_features = X.shape
|
|
44
|
+
|
|
45
|
+
# 初始化参数
|
|
46
|
+
self.coef_ = np.zeros(n_features)
|
|
47
|
+
self.intercept_ = np.mean(y)
|
|
48
|
+
y_centered = y - self.intercept_
|
|
49
|
+
|
|
50
|
+
# 数据标准化(加速收敛,保证公平惩罚)
|
|
51
|
+
X_mean = np.mean(X, axis=0)
|
|
52
|
+
X_std = np.std(X, axis=0)
|
|
53
|
+
X_std[X_std == 0] = 1 # 避免除零
|
|
54
|
+
X_normalized = (X - X_mean) / X_std
|
|
55
|
+
|
|
56
|
+
# 坐标下降迭代
|
|
57
|
+
for iteration in range(self.n_iterations):
|
|
58
|
+
coef_old = self.coef_.copy()
|
|
59
|
+
|
|
60
|
+
for j in range(n_features):
|
|
61
|
+
# 计算当前特征的"部分残差"
|
|
62
|
+
# 即:去掉第j个特征后的预测残差
|
|
63
|
+
residual = y_centered - X_normalized @ self.coef_ + self.coef_[j] * X_normalized[:, j]
|
|
64
|
+
|
|
65
|
+
# 计算rho(未正则化的梯度项)
|
|
66
|
+
rho = X_normalized[:, j] @ residual / n_samples
|
|
67
|
+
|
|
68
|
+
# 应用软阈值(Lasso的关键步骤)
|
|
69
|
+
self.coef_[j] = self.soft_threshold(rho, self.alpha)
|
|
70
|
+
|
|
71
|
+
# 检查收敛(在标准化空间中比较)
|
|
72
|
+
if np.max(np.abs(self.coef_ - coef_old)) < self.tol:
|
|
73
|
+
break
|
|
74
|
+
|
|
75
|
+
# 还原到原始尺度(迭代结束后执行一次)
|
|
76
|
+
self.coef_ = self.coef_ / X_std
|
|
77
|
+
|
|
78
|
+
return self
|
|
79
|
+
|
|
80
|
+
def predict(self, X):
|
|
81
|
+
"""预测"""
|
|
82
|
+
return X @ self.coef_ + self.intercept_
|
|
83
|
+
|
|
84
|
+
def score(self, X, y):
|
|
85
|
+
"""R²得分"""
|
|
86
|
+
y_pred = self.predict(X)
|
|
87
|
+
ss_res = np.sum((y - y_pred) ** 2)
|
|
88
|
+
ss_tot = np.sum((y - np.mean(y)) ** 2)
|
|
89
|
+
return 1 - ss_res / ss_tot
|
|
90
|
+
|
|
91
|
+
def get_selected_features(self, threshold=0.01):
|
|
92
|
+
"""返回被选中的特征索引(非零系数)"""
|
|
93
|
+
return np.where(np.abs(self.coef_) > threshold)[0]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# LogisticRegression 类定义
|
|
2
|
+
# 从文档自动提取生成
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
class LogisticRegression:
|
|
7
|
+
"""
|
|
8
|
+
手写逻辑回归实现
|
|
9
|
+
使用梯度下降优化交叉熵损失
|
|
10
|
+
"""
|
|
11
|
+
def __init__(self, learning_rate=0.1, n_iterations=1000):
|
|
12
|
+
self.lr = learning_rate # 学习率,控制梯度下降的步长
|
|
13
|
+
self.n_iterations = n_iterations # 迭代次数,梯度下降的最大迭代轮数
|
|
14
|
+
self.coef_ = None # 特征系数(权重),训练后保存
|
|
15
|
+
self.intercept_ = None # 截距项,训练后保存
|
|
16
|
+
self.loss_history = [] # 损失历史记录,用于可视化收敛过程,供后续可视化使用
|
|
17
|
+
|
|
18
|
+
def sigmoid(self, z):
|
|
19
|
+
"""Sigmoid 函数"""
|
|
20
|
+
z = np.clip(z, -500, 500)
|
|
21
|
+
return 1 / (1 + np.exp(-z))
|
|
22
|
+
|
|
23
|
+
def cross_entropy_loss(self, y, p):
|
|
24
|
+
"""交叉熵损失"""
|
|
25
|
+
# 避免 log(0)
|
|
26
|
+
eps = 1e-15
|
|
27
|
+
p = np.clip(p, eps, 1 - eps)
|
|
28
|
+
return -np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))
|
|
29
|
+
|
|
30
|
+
def fit(self, X, y):
|
|
31
|
+
"""
|
|
32
|
+
训练模型(梯度下降)
|
|
33
|
+
|
|
34
|
+
Parameters:
|
|
35
|
+
X : ndarray, shape (n_samples, n_features)
|
|
36
|
+
特征矩阵
|
|
37
|
+
y : ndarray, shape (n_samples,)
|
|
38
|
+
标签向量 (0 或 1)
|
|
39
|
+
"""
|
|
40
|
+
n_samples, n_features = X.shape
|
|
41
|
+
|
|
42
|
+
# 初始化参数
|
|
43
|
+
self.coef_ = np.zeros(n_features)
|
|
44
|
+
self.intercept_ = 0
|
|
45
|
+
|
|
46
|
+
# 梯度下降迭代
|
|
47
|
+
for i in range(self.n_iterations):
|
|
48
|
+
# 计算预测概率
|
|
49
|
+
z = X @ self.coef_ + self.intercept_
|
|
50
|
+
p = self.sigmoid(z)
|
|
51
|
+
|
|
52
|
+
# 记录损失
|
|
53
|
+
self.loss_history.append(self.cross_entropy_loss(y, p))
|
|
54
|
+
|
|
55
|
+
# 计算梯度(交叉熵损失的简洁梯度)
|
|
56
|
+
gradient_coef = (1 / n_samples) * (X.T @ (p - y))
|
|
57
|
+
gradient_intercept = (1 / n_samples) * np.sum(p - y)
|
|
58
|
+
|
|
59
|
+
# 更新参数
|
|
60
|
+
self.coef_ -= self.lr * gradient_coef
|
|
61
|
+
self.intercept_ -= self.lr * gradient_intercept
|
|
62
|
+
|
|
63
|
+
return self
|
|
64
|
+
|
|
65
|
+
def predict_proba(self, X):
|
|
66
|
+
"""预测概率"""
|
|
67
|
+
z = X @ self.coef_ + self.intercept_
|
|
68
|
+
return self.sigmoid(z)
|
|
69
|
+
|
|
70
|
+
def predict(self, X, threshold=0.5):
|
|
71
|
+
"""预测类别"""
|
|
72
|
+
proba = self.predict_proba(X)
|
|
73
|
+
return (proba >= threshold).astype(int)
|
|
74
|
+
|
|
75
|
+
def score(self, X, y):
|
|
76
|
+
"""计算准确率"""
|
|
77
|
+
y_pred = self.predict(X)
|
|
78
|
+
return np.mean(y_pred == y)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""
|
|
2
|
+
朴素贝叶斯分类器实现
|
|
3
|
+
包含多项式朴素贝叶斯和高斯朴素贝叶斯
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MultinomialNaiveBayes:
|
|
10
|
+
"""
|
|
11
|
+
多项式朴素贝叶斯实现
|
|
12
|
+
适用于离散特征(如文本词频)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, alpha=1.0):
|
|
16
|
+
"""
|
|
17
|
+
Parameters:
|
|
18
|
+
alpha : float, 拉普拉斯平滑参数
|
|
19
|
+
"""
|
|
20
|
+
self.alpha = alpha # 拉普拉斯平滑
|
|
21
|
+
self.class_prior_ = None # P(y)
|
|
22
|
+
self.feature_prob_ = None # P(x|y)
|
|
23
|
+
self.classes_ = None
|
|
24
|
+
|
|
25
|
+
def fit(self, X, y):
|
|
26
|
+
"""
|
|
27
|
+
训练模型
|
|
28
|
+
|
|
29
|
+
Parameters:
|
|
30
|
+
X : ndarray, shape (n_samples, n_features)
|
|
31
|
+
特征矩阵(词频/计数)
|
|
32
|
+
y : ndarray, shape (n_samples,)
|
|
33
|
+
类别标签
|
|
34
|
+
"""
|
|
35
|
+
n_samples, n_features = X.shape
|
|
36
|
+
self.classes_ = np.unique(y)
|
|
37
|
+
n_classes = len(self.classes_)
|
|
38
|
+
|
|
39
|
+
# 计算先验概率 P(y)
|
|
40
|
+
class_counts = np.array([np.sum(y == c) for c in self.classes_])
|
|
41
|
+
self.class_prior_ = class_counts / n_samples
|
|
42
|
+
|
|
43
|
+
# 计算条件概率 P(x|y)
|
|
44
|
+
# 对于每个类别,计算每个特征在该类别文档中的总计数
|
|
45
|
+
self.feature_prob_ = np.zeros((n_classes, n_features))
|
|
46
|
+
|
|
47
|
+
for i, c in enumerate(self.classes_):
|
|
48
|
+
# 获取类别c的所有样本
|
|
49
|
+
X_c = X[y == c]
|
|
50
|
+
# 该类别每个特征的总计数 + 平滑
|
|
51
|
+
feature_counts = X_c.sum(axis=0) + self.alpha
|
|
52
|
+
# 归一化得到条件概率
|
|
53
|
+
total_count = feature_counts.sum()
|
|
54
|
+
self.feature_prob_[i] = feature_counts / total_count
|
|
55
|
+
|
|
56
|
+
return self
|
|
57
|
+
|
|
58
|
+
def predict_log_proba(self, X):
|
|
59
|
+
"""
|
|
60
|
+
计算对数概率
|
|
61
|
+
"""
|
|
62
|
+
# log P(y) + sum(log P(x|y))
|
|
63
|
+
log_prior = np.log(self.class_prior_)
|
|
64
|
+
log_likelihood = X @ np.log(self.feature_prob_.T) # (n_samples, n_classes)
|
|
65
|
+
return log_prior + log_likelihood
|
|
66
|
+
|
|
67
|
+
def predict(self, X):
|
|
68
|
+
"""
|
|
69
|
+
预测类别
|
|
70
|
+
"""
|
|
71
|
+
log_proba = self.predict_log_proba(X)
|
|
72
|
+
return self.classes_[np.argmax(log_proba, axis=1)]
|
|
73
|
+
|
|
74
|
+
def score(self, X, y):
|
|
75
|
+
"""计算准确率"""
|
|
76
|
+
y_pred = self.predict(X)
|
|
77
|
+
return np.mean(y_pred == y)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class GaussianNaiveBayes:
|
|
81
|
+
"""
|
|
82
|
+
高斯朴素贝叶斯实现
|
|
83
|
+
适用于连续特征
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def __init__(self):
|
|
87
|
+
self.classes_ = None
|
|
88
|
+
self.class_prior_ = None
|
|
89
|
+
self.mu_ = None # 每个类别每个特征的均值
|
|
90
|
+
self.sigma_ = None # 每个类别每个特征的标准差
|
|
91
|
+
|
|
92
|
+
def fit(self, X, y):
|
|
93
|
+
"""训练模型"""
|
|
94
|
+
n_samples, n_features = X.shape
|
|
95
|
+
self.classes_ = np.unique(y)
|
|
96
|
+
n_classes = len(self.classes_)
|
|
97
|
+
|
|
98
|
+
# 先验概率
|
|
99
|
+
class_counts = np.array([np.sum(y == c) for c in self.classes_])
|
|
100
|
+
self.class_prior_ = class_counts / n_samples
|
|
101
|
+
|
|
102
|
+
# 计算每个类别的均值和标准差
|
|
103
|
+
self.mu_ = np.zeros((n_classes, n_features))
|
|
104
|
+
self.sigma_ = np.zeros((n_classes, n_features))
|
|
105
|
+
|
|
106
|
+
for i, c in enumerate(self.classes_):
|
|
107
|
+
X_c = X[y == c]
|
|
108
|
+
self.mu_[i] = X_c.mean(axis=0)
|
|
109
|
+
self.sigma_[i] = X_c.std(axis=0) + 1e-9 # 防止除零
|
|
110
|
+
|
|
111
|
+
return self
|
|
112
|
+
|
|
113
|
+
def predict_log_proba(self, X):
|
|
114
|
+
"""计算对数概率"""
|
|
115
|
+
n_samples = X.shape[0]
|
|
116
|
+
n_classes = len(self.classes_)
|
|
117
|
+
log_proba = np.zeros((n_samples, n_classes))
|
|
118
|
+
|
|
119
|
+
for i in range(n_classes):
|
|
120
|
+
# log P(y) + sum(log P(x|y))
|
|
121
|
+
log_prior = np.log(self.class_prior_[i])
|
|
122
|
+
# 对数概率密度(避免数值溢出)
|
|
123
|
+
log_likelihood = np.sum(
|
|
124
|
+
-0.5 * np.log(2 * np.pi)
|
|
125
|
+
- np.log(self.sigma_[i])
|
|
126
|
+
- 0.5 * ((X - self.mu_[i]) / self.sigma_[i])**2,
|
|
127
|
+
axis=1
|
|
128
|
+
)
|
|
129
|
+
log_proba[:, i] = log_prior + log_likelihood
|
|
130
|
+
|
|
131
|
+
return log_proba
|
|
132
|
+
|
|
133
|
+
def predict(self, X):
|
|
134
|
+
"""预测类别"""
|
|
135
|
+
log_proba = self.predict_log_proba(X)
|
|
136
|
+
return self.classes_[np.argmax(log_proba, axis=1)]
|
|
137
|
+
|
|
138
|
+
def score(self, X, y):
|
|
139
|
+
"""计算准确率"""
|
|
140
|
+
y_pred = self.predict(X)
|
|
141
|
+
return np.mean(y_pred == y)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# RidgeRegression 类定义
|
|
2
|
+
# 从文档自动提取生成
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
class RidgeRegression:
|
|
7
|
+
"""
|
|
8
|
+
岭回归实现(L2正则化)
|
|
9
|
+
|
|
10
|
+
适用于:
|
|
11
|
+
1. 特征之间存在共线性
|
|
12
|
+
2. 参数估计不稳定
|
|
13
|
+
3. 需要防止过拟合
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, alpha=1.0):
|
|
17
|
+
self.alpha = alpha # 正则化强度λ
|
|
18
|
+
self.coef_ = None # 特征系数
|
|
19
|
+
self.intercept_ = None # 截距
|
|
20
|
+
|
|
21
|
+
def fit(self, X, y):
|
|
22
|
+
"""
|
|
23
|
+
训练模型(闭式解)
|
|
24
|
+
|
|
25
|
+
Parameters:
|
|
26
|
+
X : ndarray, shape (n_samples, n_features)
|
|
27
|
+
特征矩阵
|
|
28
|
+
y : ndarray, shape (n_samples,)
|
|
29
|
+
目标向量
|
|
30
|
+
"""
|
|
31
|
+
n_samples = X.shape[0]
|
|
32
|
+
X_augmented = np.column_stack([np.ones(n_samples), X])
|
|
33
|
+
|
|
34
|
+
# 岭回归闭式解:β = (X^T X + λI)^(-1) X^T y
|
|
35
|
+
# 注意:不对截距项正则化(I的第一行第一列为0)
|
|
36
|
+
I = np.eye(X_augmented.shape[1])
|
|
37
|
+
I[0, 0] = 0 # 截距项不参与正则化
|
|
38
|
+
|
|
39
|
+
XtX = X_augmented.T @ X_augmented
|
|
40
|
+
Xty = X_augmented.T @ y
|
|
41
|
+
|
|
42
|
+
self.beta_ = np.linalg.solve(XtX + self.alpha * I, Xty)
|
|
43
|
+
|
|
44
|
+
self.intercept_ = self.beta_[0]
|
|
45
|
+
self.coef_ = self.beta_[1:]
|
|
46
|
+
|
|
47
|
+
return self
|
|
48
|
+
|
|
49
|
+
def predict(self, X):
|
|
50
|
+
"""预测"""
|
|
51
|
+
return X @ self.coef_ + self.intercept_
|
|
52
|
+
|
|
53
|
+
def score(self, X, y):
|
|
54
|
+
"""R²得分"""
|
|
55
|
+
y_pred = self.predict(X)
|
|
56
|
+
ss_res = np.sum((y - y_pred) ** 2)
|
|
57
|
+
ss_tot = np.sum((y - np.mean(y)) ** 2)
|
|
58
|
+
return 1 - ss_res / ss_tot
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Perceptron 类定义
|
|
2
|
+
# 从文档自动提取生成
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
class Perceptron:
|
|
7
|
+
"""
|
|
8
|
+
感知机实现
|
|
9
|
+
|
|
10
|
+
使用错误驱动的权重更新规则:
|
|
11
|
+
w = w + eta * y * x (当预测错误时)
|
|
12
|
+
"""
|
|
13
|
+
def __init__(self, learning_rate=1.0, max_iterations=1000):
|
|
14
|
+
self.lr = learning_rate
|
|
15
|
+
self.max_iter = max_iterations
|
|
16
|
+
self.w = None # 权重向量(包含偏置)
|
|
17
|
+
self.errors_history = [] # 每轮迭代错误数
|
|
18
|
+
|
|
19
|
+
def fit(self, X, y):
|
|
20
|
+
"""
|
|
21
|
+
训练感知机
|
|
22
|
+
|
|
23
|
+
Parameters:
|
|
24
|
+
X : ndarray, shape (n_samples, n_features)
|
|
25
|
+
输入特征矩阵
|
|
26
|
+
y : ndarray, shape (n_samples,)
|
|
27
|
+
标签向量,取值为 {1, -1}
|
|
28
|
+
"""
|
|
29
|
+
n_samples, n_features = X.shape
|
|
30
|
+
|
|
31
|
+
# 增广向量形式:添加常数1列(对应偏置)
|
|
32
|
+
X_aug = np.column_stack([X, np.ones(n_samples)])
|
|
33
|
+
|
|
34
|
+
# 初始化权重为零向量
|
|
35
|
+
self.w = np.zeros(n_features + 1)
|
|
36
|
+
|
|
37
|
+
# 训练循环
|
|
38
|
+
for iteration in range(self.max_iter):
|
|
39
|
+
errors = 0
|
|
40
|
+
for i in range(n_samples):
|
|
41
|
+
# 计算预测值
|
|
42
|
+
prediction = np.sign(self.w @ X_aug[i])
|
|
43
|
+
if prediction == 0:
|
|
44
|
+
prediction = -1 # 符号函数边界情况
|
|
45
|
+
|
|
46
|
+
# 若预测错误,更新权重
|
|
47
|
+
if prediction != y[i]:
|
|
48
|
+
self.w += self.lr * y[i] * X_aug[i]
|
|
49
|
+
errors += 1
|
|
50
|
+
|
|
51
|
+
self.errors_history.append(errors)
|
|
52
|
+
|
|
53
|
+
# 若所有样本正确分类,提前终止
|
|
54
|
+
if errors == 0:
|
|
55
|
+
print(f"在第 {iteration + 1} 轮迭代后收敛")
|
|
56
|
+
break
|
|
57
|
+
|
|
58
|
+
return self
|
|
59
|
+
|
|
60
|
+
def predict(self, X):
|
|
61
|
+
"""
|
|
62
|
+
预测
|
|
63
|
+
|
|
64
|
+
Parameters:
|
|
65
|
+
X : ndarray, shape (n_samples, n_features)
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
predictions : ndarray, shape (n_samples,)
|
|
69
|
+
预测标签 {1, -1}
|
|
70
|
+
"""
|
|
71
|
+
n_samples = X.shape[0]
|
|
72
|
+
X_aug = np.column_stack([X, np.ones(n_samples)])
|
|
73
|
+
predictions = np.sign(X_aug @ self.w)
|
|
74
|
+
predictions[predictions == 0] = -1
|
|
75
|
+
return predictions
|
|
76
|
+
|
|
77
|
+
def score(self, X, y):
|
|
78
|
+
"""计算准确率"""
|
|
79
|
+
predictions = self.predict(X)
|
|
80
|
+
return np.mean(predictions == y)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# KernelSVM 类定义
|
|
2
|
+
# 从文档自动提取生成
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
class KernelSVM:
|
|
7
|
+
"""
|
|
8
|
+
核SVM实现
|
|
9
|
+
支持线性核、多项式核、RBF核
|
|
10
|
+
"""
|
|
11
|
+
def __init__(self, kernel='rbf', C=1.0, gamma=1.0, degree=3, coef0=1):
|
|
12
|
+
self.kernel = kernel
|
|
13
|
+
self.C = C
|
|
14
|
+
self.gamma = gamma
|
|
15
|
+
self.degree = degree
|
|
16
|
+
self.coef0 = coef0 # 多项式核的常数项
|
|
17
|
+
|
|
18
|
+
self.alpha = None
|
|
19
|
+
self.b = None
|
|
20
|
+
self.X_train = None
|
|
21
|
+
self.y_train = None
|
|
22
|
+
self.support_vectors_ = None
|
|
23
|
+
self.support_vector_labels_ = None
|
|
24
|
+
self.alpha_sv = None
|
|
25
|
+
|
|
26
|
+
def _kernel(self, X1, X2):
|
|
27
|
+
"""计算核矩阵"""
|
|
28
|
+
if self.kernel == 'linear':
|
|
29
|
+
return X1 @ X2.T
|
|
30
|
+
|
|
31
|
+
elif self.kernel == 'poly':
|
|
32
|
+
return (X1 @ X2.T + self.coef0) ** self.degree
|
|
33
|
+
|
|
34
|
+
elif self.kernel == 'rbf':
|
|
35
|
+
# ||x - x'||^2 = ||x||^2 + ||x'||^2 - 2*x^T*x'
|
|
36
|
+
X1_norm = np.sum(X1 ** 2, axis=1).reshape(-1, 1)
|
|
37
|
+
X2_norm = np.sum(X2 ** 2, axis=1).reshape(1, -1)
|
|
38
|
+
distances = X1_norm + X2_norm - 2 * X1 @ X2.T
|
|
39
|
+
return np.exp(-self.gamma * distances)
|
|
40
|
+
|
|
41
|
+
else:
|
|
42
|
+
raise ValueError(f"未知核函数: {self.kernel}")
|
|
43
|
+
|
|
44
|
+
def fit(self, X, y, lr=0.01, n_iterations=500):
|
|
45
|
+
"""训练模型(简化版SMO思想)"""
|
|
46
|
+
n_samples = X.shape[0]
|
|
47
|
+
self.X_train = X
|
|
48
|
+
self.y_train = y
|
|
49
|
+
|
|
50
|
+
# 计算核矩阵
|
|
51
|
+
K = self._kernel(X, X)
|
|
52
|
+
|
|
53
|
+
# 初始化
|
|
54
|
+
self.alpha = np.zeros(n_samples)
|
|
55
|
+
|
|
56
|
+
# 梯度上升优化
|
|
57
|
+
for _ in range(n_iterations):
|
|
58
|
+
for i in range(n_samples):
|
|
59
|
+
# 梯度
|
|
60
|
+
gradient = 1 - y[i] * np.sum(self.alpha * y * K[:, i])
|
|
61
|
+
self.alpha[i] += lr * gradient
|
|
62
|
+
self.alpha[i] = np.clip(self.alpha[i], 0, self.C)
|
|
63
|
+
|
|
64
|
+
# 约束修正:满足等式约束 sum(alpha * y) = 0
|
|
65
|
+
# 减去均值偏差后,需再次投影到边界约束 [0, C]
|
|
66
|
+
self.alpha = self.alpha - np.mean(self.alpha * y) * y
|
|
67
|
+
self.alpha = np.clip(self.alpha, 0, self.C)
|
|
68
|
+
# 注意:投影后等式约束可能不再精确满足,但迭代过程中误差会累积抵消
|
|
69
|
+
|
|
70
|
+
# 支持向量
|
|
71
|
+
sv_mask = self.alpha > 1e-5
|
|
72
|
+
self.support_vectors_ = X[sv_mask]
|
|
73
|
+
self.support_vector_labels_ = y[sv_mask]
|
|
74
|
+
self.alpha_sv = self.alpha[sv_mask]
|
|
75
|
+
|
|
76
|
+
# 计算b
|
|
77
|
+
if len(self.support_vectors_) > 0:
|
|
78
|
+
K_sv = self._kernel(self.support_vectors_, self.support_vectors_)
|
|
79
|
+
margins = np.sum(self.alpha_sv * self.support_vector_labels_ * K_sv, axis=1)
|
|
80
|
+
self.b = np.mean(self.support_vector_labels_ - margins)
|
|
81
|
+
else:
|
|
82
|
+
self.b = 0
|
|
83
|
+
|
|
84
|
+
return self
|
|
85
|
+
|
|
86
|
+
def decision_function(self, X):
|
|
87
|
+
"""决策函数"""
|
|
88
|
+
K = self._kernel(X, self.support_vectors_)
|
|
89
|
+
return K @ (self.alpha_sv * self.support_vector_labels_) + self.b
|
|
90
|
+
|
|
91
|
+
def predict(self, X):
|
|
92
|
+
"""预测类别"""
|
|
93
|
+
return np.sign(self.decision_function(X)).astype(int)
|
|
94
|
+
|
|
95
|
+
def score(self, X, y):
|
|
96
|
+
"""计算准确率"""
|
|
97
|
+
y_pred = self.predict(X)
|
|
98
|
+
return np.mean(y_pred == y)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# KernelSVM 类定义
|
|
2
|
+
# 从文档自动提取生成
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
class KernelSVM:
|
|
7
|
+
"""
|
|
8
|
+
核SVM实现
|
|
9
|
+
支持线性核、多项式核、RBF核
|
|
10
|
+
"""
|
|
11
|
+
def __init__(self, kernel='rbf', C=1.0, gamma=1.0, degree=3, coef0=1):
|
|
12
|
+
self.kernel = kernel
|
|
13
|
+
self.C = C
|
|
14
|
+
self.gamma = gamma
|
|
15
|
+
self.degree = degree
|
|
16
|
+
self.coef0 = coef0 # 多项式核的常数项
|
|
17
|
+
|
|
18
|
+
self.alpha = None
|
|
19
|
+
self.b = None
|
|
20
|
+
self.X_train = None
|
|
21
|
+
self.y_train = None
|
|
22
|
+
self.support_vectors_ = None
|
|
23
|
+
self.support_vector_labels_ = None
|
|
24
|
+
self.alpha_sv = None
|
|
25
|
+
|
|
26
|
+
def _kernel(self, X1, X2):
|
|
27
|
+
"""计算核矩阵"""
|
|
28
|
+
if self.kernel == 'linear':
|
|
29
|
+
return X1 @ X2.T
|
|
30
|
+
|
|
31
|
+
elif self.kernel == 'poly':
|
|
32
|
+
return (X1 @ X2.T + self.coef0) ** self.degree
|
|
33
|
+
|
|
34
|
+
elif self.kernel == 'rbf':
|
|
35
|
+
# ||x - x'||^2 = ||x||^2 + ||x'||^2 - 2*x^T*x'
|
|
36
|
+
X1_norm = np.sum(X1 ** 2, axis=1).reshape(-1, 1)
|
|
37
|
+
X2_norm = np.sum(X2 ** 2, axis=1).reshape(1, -1)
|
|
38
|
+
distances = X1_norm + X2_norm - 2 * X1 @ X2.T
|
|
39
|
+
return np.exp(-self.gamma * distances)
|
|
40
|
+
|
|
41
|
+
else:
|
|
42
|
+
raise ValueError(f"未知核函数: {self.kernel}")
|
|
43
|
+
|
|
44
|
+
def fit(self, X, y, lr=0.01, n_iterations=500):
|
|
45
|
+
"""训练模型(简化版SMO思想)"""
|
|
46
|
+
n_samples = X.shape[0]
|
|
47
|
+
self.X_train = X
|
|
48
|
+
self.y_train = y
|
|
49
|
+
|
|
50
|
+
# 计算核矩阵
|
|
51
|
+
K = self._kernel(X, X)
|
|
52
|
+
|
|
53
|
+
# 初始化
|
|
54
|
+
self.alpha = np.zeros(n_samples)
|
|
55
|
+
|
|
56
|
+
# 梯度上升优化
|
|
57
|
+
for _ in range(n_iterations):
|
|
58
|
+
for i in range(n_samples):
|
|
59
|
+
# 梯度
|
|
60
|
+
gradient = 1 - y[i] * np.sum(self.alpha * y * K[:, i])
|
|
61
|
+
self.alpha[i] += lr * gradient
|
|
62
|
+
self.alpha[i] = np.clip(self.alpha[i], 0, self.C)
|
|
63
|
+
|
|
64
|
+
# 约束修正:满足等式约束 sum(alpha * y) = 0
|
|
65
|
+
# 减去均值偏差后,需再次投影到边界约束 [0, C]
|
|
66
|
+
self.alpha = self.alpha - np.mean(self.alpha * y) * y
|
|
67
|
+
self.alpha = np.clip(self.alpha, 0, self.C)
|
|
68
|
+
# 注意:投影后等式约束可能不再精确满足,但迭代过程中误差会累积抵消
|
|
69
|
+
|
|
70
|
+
# 支持向量
|
|
71
|
+
sv_mask = self.alpha > 1e-5
|
|
72
|
+
self.support_vectors_ = X[sv_mask]
|
|
73
|
+
self.support_vector_labels_ = y[sv_mask]
|
|
74
|
+
self.alpha_sv = self.alpha[sv_mask]
|
|
75
|
+
|
|
76
|
+
# 计算b
|
|
77
|
+
if len(self.support_vectors_) > 0:
|
|
78
|
+
K_sv = self._kernel(self.support_vectors_, self.support_vectors_)
|
|
79
|
+
margins = np.sum(self.alpha_sv * self.support_vector_labels_ * K_sv, axis=1)
|
|
80
|
+
self.b = np.mean(self.support_vector_labels_ - margins)
|
|
81
|
+
else:
|
|
82
|
+
self.b = 0
|
|
83
|
+
|
|
84
|
+
return self
|
|
85
|
+
|
|
86
|
+
def decision_function(self, X):
|
|
87
|
+
"""决策函数"""
|
|
88
|
+
K = self._kernel(X, self.support_vectors_)
|
|
89
|
+
return K @ (self.alpha_sv * self.support_vector_labels_) + self.b
|
|
90
|
+
|
|
91
|
+
def predict(self, X):
|
|
92
|
+
"""预测类别"""
|
|
93
|
+
return np.sign(self.decision_function(X)).astype(int)
|
|
94
|
+
|
|
95
|
+
def score(self, X, y):
|
|
96
|
+
"""计算准确率"""
|
|
97
|
+
y_pred = self.predict(X)
|
|
98
|
+
return np.mean(y_pred == y)
|