@icyfenix-dmla/cli 2026.5.2-7 → 2026.5.3-821

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/package.json +9 -6
  2. package/scripts/build.js +44 -11
  3. package/shared_modules/__init__.py +10 -0
  4. package/shared_modules/bayesian/__init__.py +6 -0
  5. package/shared_modules/bayesian/bayesian_network.py +105 -0
  6. package/shared_modules/bayesian/gaussian_mixture_model.py +141 -0
  7. package/shared_modules/bayesian/gaussian_mixturemodel.py +141 -0
  8. package/shared_modules/bayesian/multinomial_naive_bayes.py +74 -0
  9. package/shared_modules/bayesian/simple_bayesian_network.py +99 -0
  10. package/shared_modules/bayesian/simple_bayesiannetwork.py +99 -0
  11. package/shared_modules/cnn/__init__.py +5 -0
  12. package/shared_modules/cnn/alex_net.py +65 -0
  13. package/shared_modules/cnn/alexnet.py +65 -0
  14. package/shared_modules/cnn/t_e_r_m1.py +65 -0
  15. package/shared_modules/cnn/tiny_image_net_dataset.py +67 -0
  16. package/shared_modules/cnn/tiny_imagenet_dataset.py +67 -0
  17. package/shared_modules/cnn/tiny_imagenetdataset.py +67 -0
  18. package/shared_modules/cnn/tinyimagenetdataset.py +67 -0
  19. package/shared_modules/linear/__init__.py +6 -0
  20. package/shared_modules/linear/lasso_regression.py +93 -0
  21. package/shared_modules/linear/logistic_regression.py +78 -0
  22. package/shared_modules/linear/naive_bayes.py +141 -0
  23. package/shared_modules/linear/ridge_regression.py +58 -0
  24. package/shared_modules/neural/__init__.py +4 -0
  25. package/shared_modules/neural/perceptron.py +80 -0
  26. package/shared_modules/svm/__init__.py +5 -0
  27. package/shared_modules/svm/kernel_s_v_m.py +98 -0
  28. package/shared_modules/svm/kernel_svm.py +98 -0
  29. package/shared_modules/svm/simple_s_v_m.py +111 -0
  30. package/shared_modules/svm/simple_svm.py +111 -0
  31. package/shared_modules/tree/__init__.py +6 -0
  32. package/shared_modules/tree/ada_boost.py +77 -0
  33. package/shared_modules/tree/decision_tree_classifier.py +235 -0
  34. package/shared_modules/tree/decision_treeclassifier.py +235 -0
  35. package/shared_modules/tree/random_forest_classifier.py +88 -0
  36. package/shared_modules/tree/random_forestclassifier.py +88 -0
  37. package/shared_modules/unsupervised/__init__.py +5 -0
  38. package/shared_modules/unsupervised/k_means.py +127 -0
  39. package/shared_modules/unsupervised/kmeans.py +127 -0
  40. package/shared_modules/unsupervised/p_c_a.py +111 -0
  41. package/shared_modules/unsupervised/pca.py +111 -0
  42. package/src/commands/data.js +823 -0
  43. package/src/commands/server.js +209 -4
  44. package/src/index.js +23 -2
  45. package/src/server/routes/sandbox.js +70 -3
  46. package/src/server/sandbox.js +87 -11
  47. package/version.json +4 -0
@@ -0,0 +1,111 @@
1
+ # SimpleSVM 类定义
2
+ # 从文档自动提取生成
3
+
4
+ import numpy as np
5
+
6
+ class SimpleSVM:
7
+ """
8
+ 简化版软间隔SVM实现
9
+
10
+ 使用梯度上升优化对偶问题,支持软间隔(通过参数C控制)
11
+
12
+ 核心步骤:
13
+ 1. 预计算核矩阵 K = X @ X.T(线性核)
14
+ 2. 迭代更新拉格朗日乘子 alpha
15
+ 3. 根据alpha找出支持向量
16
+ 4. 计算超平面参数 w 和 b
17
+ """
18
+
19
+ def __init__(self, learning_rate=0.01, n_iterations=1000, C=1.0):
20
+ self.lr = learning_rate # 梯度上升的学习率
21
+ self.n_iterations = n_iterations # 迭代次数
22
+ self.C = C # 软间隔惩罚系数
23
+ self.alpha = None # 拉格朗日乘子(训练后获得)
24
+ self.w = None # 超平面法向量
25
+ self.b = None # 超平面截距
26
+ self.support_vectors_ = None # 支持向量集合
27
+
28
+ def fit(self, X, y):
29
+ """
30
+ 训练SVM模型
31
+
32
+ 对偶问题的目标函数:
33
+ max sum(alpha_i) - 0.5 * sum(alpha_i * alpha_j * y_i * y_j * x_i^T x_j)
34
+ 约束:0 <= alpha_i <= C, sum(alpha_i * y_i) = 0
35
+
36
+ 使用梯度上升迭代优化,每次更新一个alpha_i
37
+ """
38
+ n_samples, n_features = X.shape
39
+
40
+ # 初始化拉格朗日乘子(全零)
41
+ self.alpha = np.zeros(n_samples)
42
+
43
+ # 预计算核矩阵(线性核:样本内积)
44
+ # K[i,j] = x_i^T x_j,用于加速目标函数计算
45
+ K = X @ X.T
46
+
47
+ # 梯度上升优化对偶问题
48
+ for iteration in range(self.n_iterations):
49
+ for i in range(n_samples):
50
+ # 计算alpha_i的梯度
51
+ # 目标函数对alpha_i的偏导:1 - y_i * sum_j(alpha_j * y_j * K[j,i])
52
+ gradient = 1 - y[i] * np.sum(self.alpha * y * K[:, i])
53
+
54
+ # 梯度上升更新
55
+ self.alpha[i] += self.lr * gradient
56
+
57
+ # 投影到约束区间 [0, C]
58
+ # 对应软间隔的约束:0 <= alpha_i <= C
59
+ self.alpha[i] = np.clip(self.alpha[i], 0, self.C)
60
+
61
+ # 约束修正:确保 sum(alpha * y) = 0
62
+ # 通过减去均值偏差来近似满足线性约束
63
+ bias = np.mean(self.alpha * y)
64
+ self.alpha = self.alpha - bias * y
65
+ self.alpha = np.clip(self.alpha, 0, self.C)
66
+
67
+ # 找出支持向量(alpha > 阈值的样本)
68
+ sv_threshold = 1e-5
69
+ sv_indices = self.alpha > sv_threshold
70
+ self.support_vectors_ = X[sv_indices]
71
+ sv_labels = y[sv_indices]
72
+ sv_alpha = self.alpha[sv_indices]
73
+
74
+ # 计算超平面参数 w = sum(alpha_i * y_i * x_i)
75
+ # 只有支持向量参与计算(其他样本alpha=0)
76
+ self.w = np.zeros(n_features)
77
+ for i, (sv, label, a) in enumerate(zip(self.support_vectors_, sv_labels, sv_alpha)):
78
+ self.w += a * label * sv
79
+
80
+ # 计算截距 b
81
+ # 使用支持向量计算:对于支持向量,y_i(w^T x_i + b) = 1(硬间隔)
82
+ # 或 y_i(w^T x_i + b) = 1 - xi_i(软间隔)
83
+ # 这里取所有支持向量的平均值
84
+ if len(self.support_vectors_) > 0:
85
+ self.b = np.mean(sv_labels - self.support_vectors_ @ self.w)
86
+ else:
87
+ self.b = 0
88
+
89
+ return self
90
+
91
+ def decision_function(self, X):
92
+ """
93
+ 决策函数值:w^T x + b
94
+
95
+ 正值表示预测为正类,负值表示预测为负类
96
+ 绝对值大小反映样本到超平面的距离
97
+ """
98
+ return X @ self.w + self.b
99
+
100
+ def predict(self, X):
101
+ """
102
+ 预测类别标签
103
+
104
+ sign(w^T x + b): +1 表示正类,-1 表示负类
105
+ """
106
+ return np.sign(self.decision_function(X)).astype(int)
107
+
108
+ def score(self, X, y):
109
+ """计算分类准确率"""
110
+ predictions = self.predict(X)
111
+ return np.mean(predictions == y)
@@ -0,0 +1,111 @@
1
+ # SimpleSVM 类定义
2
+ # 从文档自动提取生成
3
+
4
+ import numpy as np
5
+
6
+ class SimpleSVM:
7
+ """
8
+ 简化版软间隔SVM实现
9
+
10
+ 使用梯度上升优化对偶问题,支持软间隔(通过参数C控制)
11
+
12
+ 核心步骤:
13
+ 1. 预计算核矩阵 K = X @ X.T(线性核)
14
+ 2. 迭代更新拉格朗日乘子 alpha
15
+ 3. 根据alpha找出支持向量
16
+ 4. 计算超平面参数 w 和 b
17
+ """
18
+
19
+ def __init__(self, learning_rate=0.01, n_iterations=1000, C=1.0):
20
+ self.lr = learning_rate # 梯度上升的学习率
21
+ self.n_iterations = n_iterations # 迭代次数
22
+ self.C = C # 软间隔惩罚系数
23
+ self.alpha = None # 拉格朗日乘子(训练后获得)
24
+ self.w = None # 超平面法向量
25
+ self.b = None # 超平面截距
26
+ self.support_vectors_ = None # 支持向量集合
27
+
28
+ def fit(self, X, y):
29
+ """
30
+ 训练SVM模型
31
+
32
+ 对偶问题的目标函数:
33
+ max sum(alpha_i) - 0.5 * sum(alpha_i * alpha_j * y_i * y_j * x_i^T x_j)
34
+ 约束:0 <= alpha_i <= C, sum(alpha_i * y_i) = 0
35
+
36
+ 使用梯度上升迭代优化,每次更新一个alpha_i
37
+ """
38
+ n_samples, n_features = X.shape
39
+
40
+ # 初始化拉格朗日乘子(全零)
41
+ self.alpha = np.zeros(n_samples)
42
+
43
+ # 预计算核矩阵(线性核:样本内积)
44
+ # K[i,j] = x_i^T x_j,用于加速目标函数计算
45
+ K = X @ X.T
46
+
47
+ # 梯度上升优化对偶问题
48
+ for iteration in range(self.n_iterations):
49
+ for i in range(n_samples):
50
+ # 计算alpha_i的梯度
51
+ # 目标函数对alpha_i的偏导:1 - y_i * sum_j(alpha_j * y_j * K[j,i])
52
+ gradient = 1 - y[i] * np.sum(self.alpha * y * K[:, i])
53
+
54
+ # 梯度上升更新
55
+ self.alpha[i] += self.lr * gradient
56
+
57
+ # 投影到约束区间 [0, C]
58
+ # 对应软间隔的约束:0 <= alpha_i <= C
59
+ self.alpha[i] = np.clip(self.alpha[i], 0, self.C)
60
+
61
+ # 约束修正:确保 sum(alpha * y) = 0
62
+ # 通过减去均值偏差来近似满足线性约束
63
+ bias = np.mean(self.alpha * y)
64
+ self.alpha = self.alpha - bias * y
65
+ self.alpha = np.clip(self.alpha, 0, self.C)
66
+
67
+ # 找出支持向量(alpha > 阈值的样本)
68
+ sv_threshold = 1e-5
69
+ sv_indices = self.alpha > sv_threshold
70
+ self.support_vectors_ = X[sv_indices]
71
+ sv_labels = y[sv_indices]
72
+ sv_alpha = self.alpha[sv_indices]
73
+
74
+ # 计算超平面参数 w = sum(alpha_i * y_i * x_i)
75
+ # 只有支持向量参与计算(其他样本alpha=0)
76
+ self.w = np.zeros(n_features)
77
+ for i, (sv, label, a) in enumerate(zip(self.support_vectors_, sv_labels, sv_alpha)):
78
+ self.w += a * label * sv
79
+
80
+ # 计算截距 b
81
+ # 使用支持向量计算:对于支持向量,y_i(w^T x_i + b) = 1(硬间隔)
82
+ # 或 y_i(w^T x_i + b) = 1 - xi_i(软间隔)
83
+ # 这里取所有支持向量的平均值
84
+ if len(self.support_vectors_) > 0:
85
+ self.b = np.mean(sv_labels - self.support_vectors_ @ self.w)
86
+ else:
87
+ self.b = 0
88
+
89
+ return self
90
+
91
+ def decision_function(self, X):
92
+ """
93
+ 决策函数值:w^T x + b
94
+
95
+ 正值表示预测为正类,负值表示预测为负类
96
+ 绝对值大小反映样本到超平面的距离
97
+ """
98
+ return X @ self.w + self.b
99
+
100
+ def predict(self, X):
101
+ """
102
+ 预测类别标签
103
+
104
+ sign(w^T x + b): +1 表示正类,-1 表示负类
105
+ """
106
+ return np.sign(self.decision_function(X)).astype(int)
107
+
108
+ def score(self, X, y):
109
+ """计算分类准确率"""
110
+ predictions = self.predict(X)
111
+ return np.mean(predictions == y)
@@ -0,0 +1,6 @@
1
+ # Tree 模块
2
+ from .decision_tree_classifier import DecisionTreeClassifier
3
+ from .random_forest_classifier import RandomForestClassifier
4
+ from .ada_boost import AdaBoost
5
+
6
+ __all__ = ['DecisionTreeClassifier', 'RandomForestClassifier', 'AdaBoost']
@@ -0,0 +1,77 @@
1
+ # AdaBoost 类定义
2
+ # 从文档自动提取生成
3
+
4
+ import numpy as np
5
+
6
+ class AdaBoost:
7
+ """
8
+ AdaBoost 分类器
9
+
10
+ 核心思想:序列训练多个弱学习器,加权组合成强学习器
11
+ """
12
+
13
+ def __init__(self, n_estimators=50):
14
+ self.n_estimators = n_estimators
15
+ self.stumps = [] # 存储所有弱学习器
16
+ self.alphas = [] # 存储所有学习器权重
17
+
18
+ def fit(self, X, y):
19
+ """
20
+ 训练 AdaBoost
21
+
22
+ 核心步骤(对应理论中的迭代流程):
23
+ 1. 初始化样本权重
24
+ 2. 每轮迭代:训练弱学习器 → 计算错误率 → 计算学习器权重 → 更新样本权重
25
+ 3. 保存所有弱学习器及其权重
26
+ """
27
+ n_samples = X.shape[0]
28
+
29
+ # 初始化权重:所有样本权重相等(对应理论中的 w_i^(1) = 1/n)
30
+ weights = np.ones(n_samples) / n_samples
31
+
32
+ self.stumps = []
33
+ self.alphas = []
34
+
35
+ for t in range(self.n_estimators):
36
+ # 步骤一:训练弱学习器(决策桩)
37
+ stump = DecisionStump()
38
+ stump.fit(X, y, weights)
39
+
40
+ # 步骤二:计算加权错误率 ε_t
41
+ predictions = stump.predict(X)
42
+ error = np.sum(weights[predictions != y])
43
+
44
+ # 防止极端情况(错误率为 0 或 1)
45
+ error = max(error, 1e-10)
46
+ error = min(error, 1 - 1e-10)
47
+
48
+ # 步骤三:计算学习器权重 α_t(对应理论中的公式)
49
+ alpha = 0.5 * np.log((1 - error) / error)
50
+
51
+ # 步骤四:更新样本权重(对应理论中的权重更新公式)
52
+ # 预测正确的样本权重减小,预测错误的样本权重增大
53
+ weights = weights * np.exp(-alpha * y * predictions)
54
+ weights = weights / np.sum(weights) # 归一化
55
+
56
+ self.stumps.append(stump)
57
+ self.alphas.append(alpha)
58
+
59
+ return self
60
+
61
+ def predict(self, X):
62
+ """
63
+ 加权投票预测
64
+
65
+ 对应理论中的 H(x) = sign(Σ α_t * h_t(x))
66
+ """
67
+ n_samples = X.shape[0]
68
+ scores = np.zeros(n_samples)
69
+
70
+ for stump, alpha in zip(self.stumps, self.alphas):
71
+ scores += alpha * stump.predict(X)
72
+
73
+ return np.sign(scores).astype(int)
74
+
75
+ def score(self, X, y):
76
+ """计算准确率"""
77
+ return np.mean(self.predict(X) == y)
@@ -0,0 +1,235 @@
1
+ # DecisionTreeClassifier 类定义
2
+ # 从文档自动提取生成
3
+
4
+ import numpy as np
5
+
6
+ class DecisionTreeClassifier:
7
+ """
8
+ CART 决策树分类器
9
+
10
+ 使用 Gini 指数作为分裂准则,构建二叉决策树。
11
+ 支持预剪枝策略:最大深度限制和叶节点最小样本数限制。
12
+
13
+ 参数:
14
+ max_depth : int, 默认值 10
15
+ 树的最大深度,防止过拟合
16
+ min_samples_split : int, 默认值 2
17
+ 分裂所需的最小样本数,防止学习孤例
18
+ """
19
+
20
+ def __init__(self, max_depth=10, min_samples_split=2, min_gain_threshold=0.0):
21
+ self.max_depth = max_depth
22
+ self.min_samples_split = min_samples_split
23
+ self.min_gain_threshold = min_gain_threshold
24
+ self.tree = None
25
+
26
+ def _gini(self, y):
27
+ """
28
+ 计算数据集的 Gini 指数
29
+
30
+ Gini 指数衡量数据的不纯度,值越小越纯净。
31
+
32
+ 参数:
33
+ y : ndarray
34
+ 目标变量数组
35
+
36
+ 返回:
37
+ float : Gini 指数值
38
+ """
39
+ if len(y) == 0:
40
+ return 0
41
+ _, counts = np.unique(y, return_counts=True)
42
+ probs = counts / len(y)
43
+ return 1 - np.sum(probs ** 2)
44
+
45
+ def _gini_split(self, y_left, y_right):
46
+ """
47
+ 计算分裂后的加权 Gini 指数
48
+
49
+ 加权平均两个子集的 Gini 指数,权重为样本数比例。
50
+
51
+ 参数:
52
+ y_left : ndarray
53
+ 左分支的目标变量
54
+ y_right : ndarray
55
+ 右分支的目标变量
56
+
57
+ 返回:
58
+ float : 分裂后的加权 Gini 指数
59
+ """
60
+ n = len(y_left) + len(y_right)
61
+ return (len(y_left) / n) * self._gini(y_left) + \
62
+ (len(y_right) / n) * self._gini(y_right)
63
+
64
+ def _best_split(self, X, y):
65
+ """
66
+ 寻找最佳分裂特征和分割点
67
+
68
+ 遍历所有特征的所有候选分割点,选择 Gini 指数最小的分裂方案。
69
+ 候选分割点是特征的唯一值(CART 的标准策略)。
70
+
71
+ 参数:
72
+ X : ndarray, shape (n_samples, n_features)
73
+ 特征矩阵
74
+ y : ndarray, shape (n_samples,)
75
+ 目标变量
76
+
77
+ 返回:
78
+ tuple : (最佳特征索引, 最佳分割点, 对应的 Gini 指数)
79
+ """
80
+ best_gini = float('inf')
81
+ best_feature = None
82
+ best_threshold = None
83
+
84
+ n_features = X.shape[1]
85
+
86
+ for feature in range(n_features):
87
+ # 获取该特征的所有唯一值作为候选分割点
88
+ # 使用相邻唯一值的中点作为候选阈值(标准 CART 算法策略)
89
+ thresholds = np.unique(X[:, feature])
90
+ thresholds = (thresholds[:-1] + thresholds[1:]) / 2
91
+
92
+ for threshold in thresholds:
93
+ # 按阈值分裂数据
94
+ left_mask = X[:, feature] <= threshold
95
+ right_mask = ~left_mask
96
+
97
+ y_left = y[left_mask]
98
+ y_right = y[right_mask]
99
+
100
+ # 忽略无效分裂(某分支为空)
101
+ if len(y_left) == 0 or len(y_right) == 0:
102
+ continue
103
+
104
+ gini = self._gini_split(y_left, y_right)
105
+
106
+ # 更新最优分裂
107
+ if gini < best_gini:
108
+ best_gini = gini
109
+ best_feature = feature
110
+ best_threshold = threshold
111
+
112
+ return best_feature, best_threshold, best_gini
113
+
114
+ def _build_tree(self, X, y, depth):
115
+ """
116
+ 递归构建决策树
117
+
118
+ 核心步骤:
119
+ 1. 检查终止条件(深度限制、样本数限制、纯净度)
120
+ 2. 若满足终止条件,返回叶节点(多数类)
121
+ 3. 否则寻找最优分裂,创建内部节点
122
+ 4. 递归构建左右子树
123
+
124
+ 参数:
125
+ X : ndarray
126
+ 特征矩阵
127
+ y : ndarray
128
+ 目标变量
129
+ depth : int
130
+ 当前深度
131
+
132
+ 返回:
133
+ dict : 树节点(字典表示)
134
+ """
135
+ n_samples = len(y)
136
+
137
+ # 检查预剪枝终止条件
138
+ if (depth >= self.max_depth or
139
+ n_samples < self.min_samples_split or
140
+ len(np.unique(y)) == 1):
141
+ # 返回叶节点,预测值为多数类
142
+ values, counts = np.unique(y, return_counts=True)
143
+ return {'leaf': True, 'class': values[np.argmax(counts)]}
144
+
145
+ # 寻找最优分裂
146
+ feature, threshold, gini = self._best_split(X, y)
147
+
148
+ # 若无法分裂或分裂增益不足,返回叶节点
149
+ if feature is None or gini > self._gini(y) - self.min_gain_threshold:
150
+ values, counts = np.unique(y, return_counts=True)
151
+ return {'leaf': True, 'class': values[np.argmax(counts)]}
152
+
153
+ # 分裂数据
154
+ left_mask = X[:, feature] <= threshold
155
+ right_mask = ~left_mask
156
+
157
+ # 递归构建子树
158
+ left_tree = self._build_tree(X[left_mask], y[left_mask], depth + 1)
159
+ right_tree = self._build_tree(X[right_mask], y[right_mask], depth + 1)
160
+
161
+ return {
162
+ 'leaf': False,
163
+ 'feature': feature,
164
+ 'threshold': threshold,
165
+ 'left': left_tree,
166
+ 'right': right_tree
167
+ }
168
+
169
+ def fit(self, X, y):
170
+ """
171
+ 训练决策树
172
+
173
+ 参数:
174
+ X : ndarray, shape (n_samples, n_features)
175
+ 特征矩阵
176
+ y : ndarray, shape (n_samples,)
177
+ 目标变量
178
+
179
+ 返回:
180
+ self : 训练后的模型实例
181
+ """
182
+ self.tree = self._build_tree(X, y, depth=0)
183
+ return self
184
+
185
+ def _predict_one(self, x, node):
186
+ """
187
+ 预测单个样本
188
+
189
+ 从根节点开始,根据分裂条件选择分支,直到到达叶节点。
190
+
191
+ 参数:
192
+ x : ndarray
193
+ 单个样本的特征向量
194
+ node : dict
195
+ 当前树节点
196
+
197
+ 返回:
198
+ int : 预测类别
199
+ """
200
+ if node['leaf']:
201
+ return node['class']
202
+
203
+ if x[node['feature']] <= node['threshold']:
204
+ return self._predict_one(x, node['left'])
205
+ else:
206
+ return self._predict_one(x, node['right'])
207
+
208
+ def predict(self, X):
209
+ """
210
+ 批量预测
211
+
212
+ 参数:
213
+ X : ndarray, shape (n_samples, n_features)
214
+ 特征矩阵
215
+
216
+ 返回:
217
+ ndarray : 预测类别数组
218
+ """
219
+ return np.array([self._predict_one(x, self.tree) for x in X])
220
+
221
+ def score(self, X, y):
222
+ """
223
+ 计算准确率
224
+
225
+ 参数:
226
+ X : ndarray
227
+ 特征矩阵
228
+ y : ndarray
229
+ 真实类别
230
+
231
+ 返回:
232
+ float : 准确率
233
+ """
234
+ y_pred = self.predict(X)
235
+ return np.mean(y_pred == y)