@icyfenix-dmla/cli 2026.5.2-2114 → 2026.5.2-2258

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/package.json +3 -1
  2. package/scripts/build.js +44 -11
  3. package/shared_modules/__init__.py +39 -0
  4. package/shared_modules/bayesian/__init__.py +13 -0
  5. package/shared_modules/bayesian/bayesian_network.py +105 -0
  6. package/shared_modules/bayesian/gaussian_mixture_model.py +141 -0
  7. package/shared_modules/bayesian/gaussian_mixturemodel.py +141 -0
  8. package/shared_modules/bayesian/multinomial_naive_bayes.py +74 -0
  9. package/shared_modules/bayesian/simple_bayesian_network.py +99 -0
  10. package/shared_modules/bayesian/simple_bayesiannetwork.py +99 -0
  11. package/shared_modules/cnn/__init__.py +9 -0
  12. package/shared_modules/cnn/alex_net.py +65 -0
  13. package/shared_modules/cnn/alexnet.py +65 -0
  14. package/shared_modules/cnn/t_e_r_m1.py +65 -0
  15. package/shared_modules/cnn/tiny_image_net_dataset.py +67 -0
  16. package/shared_modules/cnn/tiny_imagenet_dataset.py +67 -0
  17. package/shared_modules/cnn/tiny_imagenetdataset.py +67 -0
  18. package/shared_modules/cnn/tinyimagenetdataset.py +67 -0
  19. package/shared_modules/linear/__init__.py +9 -0
  20. package/shared_modules/linear/lasso_regression.py +93 -0
  21. package/shared_modules/linear/logistic_regression.py +78 -0
  22. package/shared_modules/linear/naive_bayes.py +141 -0
  23. package/shared_modules/linear/ridge_regression.py +58 -0
  24. package/shared_modules/neural/__init__.py +2 -0
  25. package/shared_modules/neural/perceptron.py +80 -0
  26. package/shared_modules/svm/__init__.py +8 -0
  27. package/shared_modules/svm/kernel_s_v_m.py +98 -0
  28. package/shared_modules/svm/kernel_svm.py +98 -0
  29. package/shared_modules/svm/simple_s_v_m.py +111 -0
  30. package/shared_modules/svm/simple_svm.py +111 -0
  31. package/shared_modules/tree/__init__.py +11 -0
  32. package/shared_modules/tree/ada_boost.py +77 -0
  33. package/shared_modules/tree/decision_tree_classifier.py +235 -0
  34. package/shared_modules/tree/decision_treeclassifier.py +235 -0
  35. package/shared_modules/tree/random_forest_classifier.py +88 -0
  36. package/shared_modules/tree/random_forestclassifier.py +88 -0
  37. package/shared_modules/unsupervised/__init__.py +8 -0
  38. package/shared_modules/unsupervised/k_means.py +127 -0
  39. package/shared_modules/unsupervised/kmeans.py +127 -0
  40. package/shared_modules/unsupervised/p_c_a.py +111 -0
  41. package/shared_modules/unsupervised/pca.py +111 -0
  42. package/src/commands/server.js +209 -4
  43. package/src/index.js +7 -2
  44. package/src/server/sandbox.js +5 -3
  45. package/version.json +4 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@icyfenix-dmla/cli",
3
- "version": "2026.5.2-2114",
3
+ "version": "2026.5.2-2258",
4
4
  "description": "DMLA 沙箱服务命令行工具",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -17,6 +17,8 @@
17
17
  "bin/",
18
18
  "src/",
19
19
  "scripts/",
20
+ "shared_modules/",
21
+ "version.json",
20
22
  "README.md"
21
23
  ],
22
24
  "dependencies": {
package/scripts/build.js CHANGED
@@ -1,6 +1,6 @@
1
1
  /**
2
2
  * 构建脚本:将 local-server 代码复制到 CLI 包中
3
- * 用于 npm 发布时包含完整的服务器代码
3
+ * 用于 npm 发布时包含完整的服务器代码和共享模块
4
4
  */
5
5
  import fs from 'fs'
6
6
  import path from 'path'
@@ -11,17 +11,17 @@ const __dirname = path.dirname(__filename)
11
11
 
12
12
  const rootDir = path.resolve(__dirname, '../../..')
13
13
  const localServerSrc = path.resolve(rootDir, 'local-server/src')
14
+ const localServerShared = path.resolve(rootDir, 'local-server/shared_modules')
14
15
  const cliServerDest = path.resolve(__dirname, '../src/server')
16
+ const cliSharedDest = path.resolve(__dirname, '../shared_modules')
15
17
 
16
18
  console.log('📦 构建 CLI 包...')
17
- console.log(` 源目录: ${localServerSrc}`)
18
- console.log(` 目标目录: ${cliServerDest}`)
19
19
 
20
20
  // 递归复制目录
21
- function copyDir(src, dest) {
21
+ function copyDir(src, dest, filter = null) {
22
22
  if (!fs.existsSync(src)) {
23
23
  console.error(`❌ 源目录不存在: ${src}`)
24
- process.exit(1)
24
+ return false
25
25
  }
26
26
 
27
27
  // 创建目标目录
@@ -36,15 +36,48 @@ function copyDir(src, dest) {
36
36
  const destPath = path.join(dest, entry.name)
37
37
 
38
38
  if (entry.isDirectory()) {
39
- copyDir(srcPath, destPath)
40
- } else if (entry.isFile() && entry.name.endsWith('.js')) {
39
+ copyDir(srcPath, destPath, filter)
40
+ } else if (entry.isFile()) {
41
+ // 应用过滤器
42
+ if (filter && !filter(entry.name)) {
43
+ continue
44
+ }
41
45
  fs.copyFileSync(srcPath, destPath)
42
- console.log(` ✓ 复制: ${entry.name}`)
46
+ console.log(` ✓ ${path.relative(rootDir, srcPath)} → ${path.relative(__dirname, destPath)}`)
43
47
  }
44
48
  }
49
+ return true
45
50
  }
46
51
 
47
- // 执行复制
48
- copyDir(localServerSrc, cliServerDest)
52
+ // 复制服务器代码(只复制 .js 文件)
53
+ console.log('\n📋 复制服务器代码...')
54
+ console.log(` 源目录: ${localServerSrc}`)
55
+ console.log(` 目标目录: ${cliServerDest}`)
56
+ copyDir(localServerSrc, cliServerDest, (name) => name.endsWith('.js'))
57
+
58
+ // 复制共享模块(复制所有 .py 文件和 __init__.py)
59
+ console.log('\n📋 复制共享模块...')
60
+ console.log(` 源目录: ${localServerShared}`)
61
+ console.log(` 目标目录: ${cliSharedDest}`)
62
+ const sharedCopied = copyDir(localServerShared, cliSharedDest, (name) => {
63
+ // 复制 Python 文件和初始化文件
64
+ return name.endsWith('.py') || name === '__init__.py'
65
+ })
66
+
67
+ if (!sharedCopied) {
68
+ console.log('⚠️ 共享模块目录不存在,跳过')
69
+ }
70
+
71
+ // 创建版本信息文件(用于 --dev 模式的版本比较)
72
+ const versionInfo = {
73
+ buildTime: new Date().toISOString(),
74
+ // 从 package.json 读取版本
75
+ cliVersion: JSON.parse(fs.readFileSync(path.resolve(__dirname, '../package.json'), 'utf8')).version
76
+ }
77
+ fs.writeFileSync(
78
+ path.resolve(__dirname, '../version.json'),
79
+ JSON.stringify(versionInfo, null, 2)
80
+ )
81
+ console.log('\n✅ 版本信息已生成')
49
82
 
50
- console.log('✅ 服务器代码已复制到 CLI ')
83
+ console.log('\n✅ CLI 包构建完成')
@@ -0,0 +1,39 @@
1
+ from .cnn.tiny_imagenet_dataset import TinyImageNetDataset
2
+ from .cnn.tiny_imagenetdataset import TinyImageNetDataset
3
+ from .cnn.tinyimagenetdataset import TinyImageNetDataset
4
+ from .cnn.alexnet import AlexNet
5
+ from .bayesian.simple_bayesiannetwork import SimpleBayesianNetwork
6
+ from .bayesian.gaussian_mixturemodel import GaussianMixtureModel
7
+ from .tree.decision_treeclassifier import DecisionTreeClassifier
8
+ from .tree.random_forestclassifier import RandomForestClassifier
9
+ from .svm.kernel_svm import KernelSVM
10
+ from .svm.simple_svm import SimpleSVM
11
+ from .unsupervised.kmeans import KMeans
12
+ from .unsupervised.pca import PCA
13
+ from .cnn.tiny_t_e_r_m0_t_e_r_m11 import TinyImageNetDataset
14
+ from .cnn.t_e_r_m1 import AlexNet
15
+ from .bayesian.simple_bayesian_t_e_r_m17 import SimpleBayesianNetwork
16
+ from .bayesian.gaussian_mixture_t_e_r_m18 import GaussianMixtureModel
17
+ from .tree.decision_tree_t_e_r_m15 import DecisionTreeClassifier
18
+ from .tree.random_forest_t_e_r_m15 import RandomForestClassifier
19
+ from .cnn.tiny_image_net_dataset import TinyImageNetDataset
20
+ from .cnn.alex_net import AlexNet
21
+ from .neural.perceptron import Perceptron
22
+ from .unsupervised.p_c_a import PCA
23
+ from .tree.ada_boost import AdaBoost
24
+ from .tree.random_forest_classifier import RandomForestClassifier
25
+ from .unsupervised.k_means import KMeans
26
+ from .tree.decision_tree_classifier import DecisionTreeClassifier
27
+ from .tree.decision_tree_classifier import DecisionTreeClassifier
28
+ from .svm.kernel_s_v_m import KernelSVM
29
+ from .bayesian.gaussian_mixture_model import GaussianMixtureModel
30
+ from .svm.simple_s_v_m import SimpleSVM
31
+ from .bayesian.simple_bayesian_network import SimpleBayesianNetwork
32
+ from .bayesian.multinomial_naive_bayes import MultinomialNaiveBayes
33
+ from .linear.ridge_regression import RidgeRegression
34
+ from .linear.lasso_regression import LassoRegression
35
+ from .linear.logistic_regression import LogisticRegression
36
+ # shared 模块包初始化
37
+ # 包含统计学习系列文档中可复用的类定义
38
+
39
+ from .linear import *
@@ -0,0 +1,13 @@
1
+ # 贝叶斯方法模块
2
+ # 包含朴素贝叶斯、贝叶斯网络、EM算法等实现
3
+
4
+ from .bayesian_network import SimpleBayesianNetwork
5
+
6
+ from .simple_bayesian_network import SimpleBayesianNetwork
7
+ from .multinomial_naive_bayes import MultinomialNaiveBayes
8
+ from .gaussian_mixture_model import GaussianMixtureModel
9
+ from .simple_bayesian_t_e_r_m17 import SimpleBayesianNetwork
10
+ from .gaussian_mixture_t_e_r_m18 import GaussianMixtureModel
11
+ from .simple_bayesiannetwork import SimpleBayesianNetwork
12
+ from .gaussian_mixturemodel import GaussianMixtureModel
13
+ __all__ = ['SimpleBayesianNetwork', 'SimpleBayesianNetwork', 'MultinomialNaiveBayes', 'GaussianMixtureModel', 'SimpleBayesianNetwork', 'GaussianMixtureModel', 'SimpleBayesianNetwork', 'GaussianMixtureModel']
@@ -0,0 +1,105 @@
1
+ """
2
+ 贝叶斯网络实现
3
+ 支持离散变量和精确推断(枚举法)
4
+ """
5
+
6
+ import numpy as np
7
+
8
+
9
+ class SimpleBayesianNetwork:
10
+ """
11
+ 简单贝叶斯网络实现
12
+ 支持离散变量和精确推断(枚举法)
13
+ """
14
+
15
+ def __init__(self):
16
+ self.nodes = {} # 节点信息:{name: {'parents': [], 'values': []}}
17
+ self.cpts = {} # 条件概率表:{name: {parent_values: {value: prob}}}
18
+ self.topo_order = [] # 拓扑排序
19
+
20
+ def add_node(self, name, values, parents=None):
21
+ """添加节点"""
22
+ if parents is None:
23
+ parents = []
24
+ self.nodes[name] = {'parents': parents, 'values': values}
25
+ self._update_topo_order()
26
+
27
+ def set_cpt(self, name, cpt):
28
+ """
29
+ 设置条件概率表
30
+
31
+ cpt格式:{parent_value_tuple: {value: prob}}
32
+ 对于无父节点的变量:{(): {value: prob}}
33
+ """
34
+ self.cpts[name] = cpt
35
+
36
+ def _update_topo_order(self):
37
+ """计算拓扑排序"""
38
+ visited = set()
39
+ order = []
40
+
41
+ def visit(node):
42
+ if node in visited:
43
+ return
44
+ visited.add(node)
45
+ for parent in self.nodes[node]['parents']:
46
+ visit(parent)
47
+ order.append(node)
48
+
49
+ for node in self.nodes:
50
+ visit(node)
51
+
52
+ self.topo_order = order
53
+
54
+ def get_prob(self, name, value, parent_values):
55
+ """获取条件概率 P(name=value | parent_values)"""
56
+ parent_key = tuple(parent_values) if parent_values else ()
57
+ return self.cpts[name].get(parent_key, {}).get(value, 0)
58
+
59
+ def joint_prob(self, assignment):
60
+ """计算联合概率 P(X1, X2, ...)"""
61
+ prob = 1.0
62
+ for node in self.topo_order:
63
+ parents = self.nodes[node]['parents']
64
+ parent_values = [assignment[p] for p in parents]
65
+ value = assignment[node]
66
+ prob *= self.get_prob(node, value, parent_values)
67
+ return prob
68
+
69
+ def enumerate_inference(self, query, evidence):
70
+ """
71
+ 枚举推断:计算 P(query | evidence)
72
+
73
+ query: {node: '?'} 返回分布
74
+ evidence: {node: value}
75
+ """
76
+ query_nodes = list(query.keys())
77
+ hidden = [n for n in self.nodes if n not in query_nodes and n not in evidence]
78
+
79
+ def enumerate_assignments(variables, current):
80
+ if not variables:
81
+ yield current.copy()
82
+ return
83
+ var = variables[0]
84
+ for value in self.nodes[var]['values']:
85
+ current[var] = value
86
+ yield from enumerate_assignments(variables[1:], current)
87
+ del current[var]
88
+
89
+ query_values = {}
90
+ total = 0.0
91
+
92
+ query_node = query_nodes[0]
93
+ for qv in self.nodes[query_node]['values']:
94
+ prob_sum = 0.0
95
+ for assignment in enumerate_assignments(hidden, {}):
96
+ assignment.update(evidence)
97
+ assignment[query_node] = qv
98
+ prob_sum += self.joint_prob(assignment)
99
+ query_values[qv] = prob_sum
100
+ total += prob_sum
101
+
102
+ # 归一化
103
+ for k in query_values:
104
+ query_values[k] /= total
105
+ return query_values
@@ -0,0 +1,141 @@
1
+ # GaussianMixtureModel 类定义
2
+ # 从文档自动提取生成
3
+
4
+ import numpy as np
5
+
6
+ class GaussianMixtureModel:
7
+ """
8
+ 高斯混合模型实现
9
+ 使用EM算法求解
10
+ """
11
+ def __init__(self, n_components=3, max_iter=100, tol=1e-4):
12
+ self.n_components = n_components
13
+ self.max_iter = max_iter
14
+ self.tol = tol # 收敛阈值
15
+
16
+ self.weights_ = None # 混合系数 (K,)
17
+ self.means_ = None # 均值 (K, n_features)
18
+ self.covariances_ = None # 协方差矩阵 (K, n_features, n_features)
19
+ self.log_likelihood_history_ = []
20
+
21
+ def _initialize(self, X):
22
+ """初始化参数"""
23
+ n_samples, n_features = X.shape
24
+ K = self.n_components
25
+
26
+ # 随机初始化均值(从数据中随机选择K个点)
27
+ indices = np.random.choice(n_samples, K, replace=False)
28
+ self.means_ = X[indices].copy()
29
+
30
+ # 初始化协方差为数据协方差的对角线
31
+ data_cov = np.cov(X.T)
32
+ self.covariances_ = np.array([np.diag(np.diag(data_cov)) + 1e-6 * np.eye(n_features)
33
+ for _ in range(K)])
34
+
35
+ # 初始化混合系数为均匀分布
36
+ self.weights_ = np.ones(K) / K
37
+
38
+ def _gaussian_pdf(self, X, mean, cov):
39
+ """计算多元高斯概率密度"""
40
+ n_features = X.shape[1]
41
+ diff = X - mean
42
+
43
+ # 加小值保证数值稳定
44
+ cov_reg = cov + 1e-6 * np.eye(n_features)
45
+
46
+ # 使用Cholesky分解计算行列式和逆
47
+ try:
48
+ L = np.linalg.cholesky(cov_reg)
49
+ log_det = 2 * np.sum(np.log(np.diag(L)))
50
+ diff_L = np.linalg.solve(L, diff.T).T
51
+ mahalanobis = np.sum(diff_L ** 2, axis=1)
52
+ except np.linalg.LinAlgError:
53
+ # 如果Cholesky失败,使用标准方法
54
+ sign, log_det = np.linalg.slogdet(cov_reg)
55
+ cov_inv = np.linalg.inv(cov_reg)
56
+ mahalanobis = np.sum(diff @ cov_inv * diff, axis=1)
57
+
58
+ log_prob = -0.5 * (n_features * np.log(2 * np.pi) + log_det + mahalanobis)
59
+ return log_prob
60
+
61
+ def _e_step(self, X):
62
+ """E步:计算责任度"""
63
+ n_samples = X.shape[0]
64
+ K = self.n_components
65
+
66
+ # 计算每个成分的对数概率
67
+ log_probs = np.zeros((n_samples, K))
68
+ for k in range(K):
69
+ log_probs[:, k] = (np.log(self.weights_[k] + 1e-10) +
70
+ self._gaussian_pdf(X, self.means_[k], self.covariances_[k]))
71
+
72
+ # 计算对数似然
73
+ log_likelihood = np.sum(np.log(np.sum(np.exp(log_probs), axis=1)))
74
+
75
+ # 计算责任度(使用log-sum-exp trick避免数值下溢)
76
+ log_max = log_probs.max(axis=1, keepdims=True)
77
+ log_sum = np.log(np.sum(np.exp(log_probs - log_max), axis=1, keepdims=True)) + log_max
78
+ responsibilities = np.exp(log_probs - log_sum)
79
+
80
+ return responsibilities, log_likelihood
81
+
82
+ def _m_step(self, X, responsibilities):
83
+ """M步:更新参数"""
84
+ n_samples, n_features = X.shape
85
+ K = self.n_components
86
+
87
+ # 计算每个成分的有效样本数
88
+ N_k = responsibilities.sum(axis=0) + 1e-10
89
+
90
+ # 更新混合系数
91
+ self.weights_ = N_k / n_samples
92
+
93
+ # 更新均值
94
+ self.means_ = (responsibilities.T @ X) / N_k[:, np.newaxis]
95
+
96
+ # 更新协方差
97
+ for k in range(K):
98
+ diff = X - self.means_[k]
99
+ weighted_diff = responsibilities[:, k:k+1] * diff
100
+ self.covariances_[k] = (weighted_diff.T @ diff) / N_k[k]
101
+ # 添加正则化
102
+ self.covariances_[k] += 1e-6 * np.eye(n_features)
103
+
104
+ def fit(self, X):
105
+ """训练模型"""
106
+ self._initialize(X)
107
+ self.log_likelihood_history_ = []
108
+
109
+ prev_log_likelihood = -np.inf
110
+
111
+ for iteration in range(self.max_iter):
112
+ # E步
113
+ responsibilities, log_likelihood = self._e_step(X)
114
+ self.log_likelihood_history_.append(log_likelihood)
115
+
116
+ # 检查收敛
117
+ if abs(log_likelihood - prev_log_likelihood) < self.tol:
118
+ print(f"EM收敛于第{iteration}次迭代")
119
+ break
120
+
121
+ # M步
122
+ self._m_step(X, responsibilities)
123
+
124
+ prev_log_likelihood = log_likelihood
125
+
126
+ return self
127
+
128
+ def predict(self, X):
129
+ """预测聚类标签"""
130
+ responsibilities, _ = self._e_step(X)
131
+ return np.argmax(responsibilities, axis=1)
132
+
133
+ def predict_proba(self, X):
134
+ """预测属于各成分的概率"""
135
+ responsibilities, _ = self._e_step(X)
136
+ return responsibilities
137
+
138
+ def score(self, X):
139
+ """计算对数似然"""
140
+ _, log_likelihood = self._e_step(X)
141
+ return log_likelihood
@@ -0,0 +1,141 @@
1
+ # GaussianMixtureModel 类定义
2
+ # 从文档自动提取生成
3
+
4
+ import numpy as np
5
+
6
+ class GaussianMixtureModel:
7
+ """
8
+ 高斯混合模型实现
9
+ 使用EM算法求解
10
+ """
11
+ def __init__(self, n_components=3, max_iter=100, tol=1e-4):
12
+ self.n_components = n_components
13
+ self.max_iter = max_iter
14
+ self.tol = tol # 收敛阈值
15
+
16
+ self.weights_ = None # 混合系数 (K,)
17
+ self.means_ = None # 均值 (K, n_features)
18
+ self.covariances_ = None # 协方差矩阵 (K, n_features, n_features)
19
+ self.log_likelihood_history_ = []
20
+
21
+ def _initialize(self, X):
22
+ """初始化参数"""
23
+ n_samples, n_features = X.shape
24
+ K = self.n_components
25
+
26
+ # 随机初始化均值(从数据中随机选择K个点)
27
+ indices = np.random.choice(n_samples, K, replace=False)
28
+ self.means_ = X[indices].copy()
29
+
30
+ # 初始化协方差为数据协方差的对角线
31
+ data_cov = np.cov(X.T)
32
+ self.covariances_ = np.array([np.diag(np.diag(data_cov)) + 1e-6 * np.eye(n_features)
33
+ for _ in range(K)])
34
+
35
+ # 初始化混合系数为均匀分布
36
+ self.weights_ = np.ones(K) / K
37
+
38
+ def _gaussian_pdf(self, X, mean, cov):
39
+ """计算多元高斯概率密度"""
40
+ n_features = X.shape[1]
41
+ diff = X - mean
42
+
43
+ # 加小值保证数值稳定
44
+ cov_reg = cov + 1e-6 * np.eye(n_features)
45
+
46
+ # 使用Cholesky分解计算行列式和逆
47
+ try:
48
+ L = np.linalg.cholesky(cov_reg)
49
+ log_det = 2 * np.sum(np.log(np.diag(L)))
50
+ diff_L = np.linalg.solve(L, diff.T).T
51
+ mahalanobis = np.sum(diff_L ** 2, axis=1)
52
+ except np.linalg.LinAlgError:
53
+ # 如果Cholesky失败,使用标准方法
54
+ sign, log_det = np.linalg.slogdet(cov_reg)
55
+ cov_inv = np.linalg.inv(cov_reg)
56
+ mahalanobis = np.sum(diff @ cov_inv * diff, axis=1)
57
+
58
+ log_prob = -0.5 * (n_features * np.log(2 * np.pi) + log_det + mahalanobis)
59
+ return log_prob
60
+
61
+ def _e_step(self, X):
62
+ """E步:计算责任度"""
63
+ n_samples = X.shape[0]
64
+ K = self.n_components
65
+
66
+ # 计算每个成分的对数概率
67
+ log_probs = np.zeros((n_samples, K))
68
+ for k in range(K):
69
+ log_probs[:, k] = (np.log(self.weights_[k] + 1e-10) +
70
+ self._gaussian_pdf(X, self.means_[k], self.covariances_[k]))
71
+
72
+ # 计算对数似然
73
+ log_likelihood = np.sum(np.log(np.sum(np.exp(log_probs), axis=1)))
74
+
75
+ # 计算责任度(使用log-sum-exp trick避免数值下溢)
76
+ log_max = log_probs.max(axis=1, keepdims=True)
77
+ log_sum = np.log(np.sum(np.exp(log_probs - log_max), axis=1, keepdims=True)) + log_max
78
+ responsibilities = np.exp(log_probs - log_sum)
79
+
80
+ return responsibilities, log_likelihood
81
+
82
+ def _m_step(self, X, responsibilities):
83
+ """M步:更新参数"""
84
+ n_samples, n_features = X.shape
85
+ K = self.n_components
86
+
87
+ # 计算每个成分的有效样本数
88
+ N_k = responsibilities.sum(axis=0) + 1e-10
89
+
90
+ # 更新混合系数
91
+ self.weights_ = N_k / n_samples
92
+
93
+ # 更新均值
94
+ self.means_ = (responsibilities.T @ X) / N_k[:, np.newaxis]
95
+
96
+ # 更新协方差
97
+ for k in range(K):
98
+ diff = X - self.means_[k]
99
+ weighted_diff = responsibilities[:, k:k+1] * diff
100
+ self.covariances_[k] = (weighted_diff.T @ diff) / N_k[k]
101
+ # 添加正则化
102
+ self.covariances_[k] += 1e-6 * np.eye(n_features)
103
+
104
+ def fit(self, X):
105
+ """训练模型"""
106
+ self._initialize(X)
107
+ self.log_likelihood_history_ = []
108
+
109
+ prev_log_likelihood = -np.inf
110
+
111
+ for iteration in range(self.max_iter):
112
+ # E步
113
+ responsibilities, log_likelihood = self._e_step(X)
114
+ self.log_likelihood_history_.append(log_likelihood)
115
+
116
+ # 检查收敛
117
+ if abs(log_likelihood - prev_log_likelihood) < self.tol:
118
+ print(f"EM收敛于第{iteration}次迭代")
119
+ break
120
+
121
+ # M步
122
+ self._m_step(X, responsibilities)
123
+
124
+ prev_log_likelihood = log_likelihood
125
+
126
+ return self
127
+
128
+ def predict(self, X):
129
+ """预测聚类标签"""
130
+ responsibilities, _ = self._e_step(X)
131
+ return np.argmax(responsibilities, axis=1)
132
+
133
+ def predict_proba(self, X):
134
+ """预测属于各成分的概率"""
135
+ responsibilities, _ = self._e_step(X)
136
+ return responsibilities
137
+
138
+ def score(self, X):
139
+ """计算对数似然"""
140
+ _, log_likelihood = self._e_step(X)
141
+ return log_likelihood
@@ -0,0 +1,74 @@
1
+ # MultinomialNaiveBayes 类定义
2
+ # 从文档自动提取生成
3
+
4
+ import numpy as np
5
+
6
+ class MultinomialNaiveBayes:
7
+ """
8
+ 多项式朴素贝叶斯实现
9
+ 适用于离散特征(如文本词频)
10
+ """
11
+
12
+ def __init__(self, alpha=1.0):
13
+ """
14
+ Parameters:
15
+ alpha : float, 拉普拉斯平滑参数
16
+ """
17
+ self.alpha = alpha # 拉普拉斯平滑
18
+ self.class_prior_ = None # P(y)
19
+ self.feature_prob_ = None # P(x|y)
20
+ self.classes_ = None
21
+
22
+ def fit(self, X, y):
23
+ """
24
+ 训练模型
25
+
26
+ Parameters:
27
+ X : ndarray, shape (n_samples, n_features)
28
+ 特征矩阵(词频/计数)
29
+ y : ndarray, shape (n_samples,)
30
+ 类别标签
31
+ """
32
+ n_samples, n_features = X.shape
33
+ self.classes_ = np.unique(y)
34
+ n_classes = len(self.classes_)
35
+
36
+ # 计算先验概率 P(y)
37
+ class_counts = np.array([np.sum(y == c) for c in self.classes_])
38
+ self.class_prior_ = class_counts / n_samples
39
+
40
+ # 计算条件概率 P(x|y)
41
+ # 对于每个类别,计算每个特征在该类别文档中的总计数
42
+ self.feature_prob_ = np.zeros((n_classes, n_features))
43
+
44
+ for i, c in enumerate(self.classes_):
45
+ # 获取类别c的所有样本
46
+ X_c = X[y == c]
47
+ # 该类别每个特征的总计数 + 平滑
48
+ feature_counts = X_c.sum(axis=0) + self.alpha
49
+ # 归一化得到条件概率
50
+ total_count = feature_counts.sum()
51
+ self.feature_prob_[i] = feature_counts / total_count
52
+
53
+ return self
54
+
55
+ def predict_log_proba(self, X):
56
+ """
57
+ 计算对数概率
58
+ """
59
+ # log P(y) + sum(log P(x|y))
60
+ log_prior = np.log(self.class_prior_)
61
+ log_likelihood = X @ np.log(self.feature_prob_.T) # (n_samples, n_classes)
62
+ return log_prior + log_likelihood
63
+
64
+ def predict(self, X):
65
+ """
66
+ 预测类别
67
+ """
68
+ log_proba = self.predict_log_proba(X)
69
+ return self.classes_[np.argmax(log_proba, axis=1)]
70
+
71
+ def score(self, X, y):
72
+ """计算准确率"""
73
+ y_pred = self.predict(X)
74
+ return np.mean(y_pred == y)