npm - @icyfenix-dmla/cli - Versions diffs - 2026.5.13-2349 → 2026.5.13-2356 - Mend

@icyfenix-dmla/cli 2026.5.13-2349 → 2026.5.13-2356

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json +1 -1
package/version.json +2 -2
package/shared/bayesian/gaussian_mixturemodel.py +0 -141
package/shared/bayesian/simple_bayesiannetwork.py +0 -99
package/shared/cnn/alex_net.py +0 -65
package/shared/cnn/t_e_r_m1.py +0 -65
package/shared/cnn/tiny_image_net_dataset.py +0 -67
package/shared/cnn/tiny_imagenetdataset.py +0 -67
package/shared/cnn/tinyimagenetdataset.py +0 -67
package/shared/svm/kernel_s_v_m.py +0 -98
package/shared/svm/simple_s_v_m.py +0 -111
package/shared/tree/decision_treeclassifier.py +0 -235
package/shared/tree/random_forestclassifier.py +0 -88
package/shared/unsupervised/k_means.py +0 -127
package/shared/unsupervised/p_c_a.py +0 -111

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@icyfenix-dmla/cli",
-  "version": "2026.5.13-2349",
+  "version": "2026.5.13-2356",
   "description": "DMLA 沙箱服务命令行工具",
   "type": "module",
   "main": "src/index.js",

package/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "buildTime": "2026-05-13T15:50:26.143Z",
-  "cliVersion": "2026.5.13-2349"
+  "buildTime": "2026-05-13T15:56:58.097Z",
+  "cliVersion": "2026.5.13-2356"
 }

package/shared/bayesian/gaussian_mixturemodel.py DELETED Viewed

@@ -1,141 +0,0 @@
-# GaussianMixtureModel 类定义
-# 从文档自动提取生成
-import numpy as np
-class GaussianMixtureModel:
-    """
-    高斯混合模型实现
-    使用EM算法求解
-    """
-    def __init__(self, n_components=3, max_iter=100, tol=1e-4):
-        self.n_components = n_components
-        self.max_iter = max_iter
-        self.tol = tol  # 收敛阈值
-        self.weights_ = None   # 混合系数 (K,)
-        self.means_ = None     # 均值 (K, n_features)
-        self.covariances_ = None  # 协方差矩阵 (K, n_features, n_features)
-        self.log_likelihood_history_ = []
-    def _initialize(self, X):
-        """初始化参数"""
-        n_samples, n_features = X.shape
-        K = self.n_components
-        # 随机初始化均值（从数据中随机选择K个点）
-        indices = np.random.choice(n_samples, K, replace=False)
-        self.means_ = X[indices].copy()
-        # 初始化协方差为数据协方差的对角线
-        data_cov = np.cov(X.T)
-        self.covariances_ = np.array([np.diag(np.diag(data_cov)) + 1e-6 * np.eye(n_features)
-                                       for _ in range(K)])
-        # 初始化混合系数为均匀分布
-        self.weights_ = np.ones(K) / K
-    def _gaussian_pdf(self, X, mean, cov):
-        """计算多元高斯概率密度"""
-        n_features = X.shape[1]
-        diff = X - mean
-        # 加小值保证数值稳定
-        cov_reg = cov + 1e-6 * np.eye(n_features)
-        # 使用Cholesky分解计算行列式和逆
-        try:
-            L = np.linalg.cholesky(cov_reg)
-            log_det = 2 * np.sum(np.log(np.diag(L)))
-            diff_L = np.linalg.solve(L, diff.T).T
-            mahalanobis = np.sum(diff_L ** 2, axis=1)
-        except np.linalg.LinAlgError:
-            # 如果Cholesky失败，使用标准方法
-            sign, log_det = np.linalg.slogdet(cov_reg)
-            cov_inv = np.linalg.inv(cov_reg)
-            mahalanobis = np.sum(diff @ cov_inv * diff, axis=1)
-        log_prob = -0.5 * (n_features * np.log(2 * np.pi) + log_det + mahalanobis)
-        return log_prob
-    def _e_step(self, X):
-        """E步：计算责任度"""
-        n_samples = X.shape[0]
-        K = self.n_components
-        # 计算每个成分的对数概率
-        log_probs = np.zeros((n_samples, K))
-        for k in range(K):
-            log_probs[:, k] = (np.log(self.weights_[k] + 1e-10) +
-                               self._gaussian_pdf(X, self.means_[k], self.covariances_[k]))
-        # 计算对数似然
-        log_likelihood = np.sum(np.log(np.sum(np.exp(log_probs), axis=1)))
-        # 计算责任度（使用log-sum-exp trick避免数值下溢）
-        log_max = log_probs.max(axis=1, keepdims=True)
-        log_sum = np.log(np.sum(np.exp(log_probs - log_max), axis=1, keepdims=True)) + log_max
-        responsibilities = np.exp(log_probs - log_sum)
-        return responsibilities, log_likelihood
-    def _m_step(self, X, responsibilities):
-        """M步：更新参数"""
-        n_samples, n_features = X.shape
-        K = self.n_components
-        # 计算每个成分的有效样本数
-        N_k = responsibilities.sum(axis=0) + 1e-10
-        # 更新混合系数
-        self.weights_ = N_k / n_samples
-        # 更新均值
-        self.means_ = (responsibilities.T @ X) / N_k[:, np.newaxis]
-        # 更新协方差
-        for k in range(K):
-            diff = X - self.means_[k]
-            weighted_diff = responsibilities[:, k:k+1] * diff
-            self.covariances_[k] = (weighted_diff.T @ diff) / N_k[k]
-            # 添加正则化
-            self.covariances_[k] += 1e-6 * np.eye(n_features)
-    def fit(self, X):
-        """训练模型"""
-        self._initialize(X)
-        self.log_likelihood_history_ = []
-        prev_log_likelihood = -np.inf
-        for iteration in range(self.max_iter):
-            # E步
-            responsibilities, log_likelihood = self._e_step(X)
-            self.log_likelihood_history_.append(log_likelihood)
-            # 检查收敛
-            if abs(log_likelihood - prev_log_likelihood) < self.tol:
-                print(f"EM收敛于第{iteration}次迭代")
-                break
-            # M步
-            self._m_step(X, responsibilities)
-            prev_log_likelihood = log_likelihood
-        return self
-    def predict(self, X):
-        """预测聚类标签"""
-        responsibilities, _ = self._e_step(X)
-        return np.argmax(responsibilities, axis=1)
-    def predict_proba(self, X):
-        """预测属于各成分的概率"""
-        responsibilities, _ = self._e_step(X)
-        return responsibilities
-    def score(self, X):
-        """计算对数似然"""
-        _, log_likelihood = self._e_step(X)
-        return log_likelihood

package/shared/bayesian/simple_bayesiannetwork.py DELETED Viewed

@@ -1,99 +0,0 @@
-# SimpleBayesianNetwork 类定义
-# 从文档自动提取生成
-class SimpleBayesianNetwork:
-    """
-    简单贝叶斯网络实现
-    支持离散变量和精确推断（枚举法）
-    """
-    def __init__(self):
-        self.nodes = {}  # 节点信息：{name: {'parents': [], 'values': []}}
-        self.cpts = {}   # 条件概率表：{name: {parent_values: {value: prob}}}
-        self.topo_order = []  # 拓扑排序
-    def add_node(self, name, values, parents=None):
-        """添加节点"""
-        if parents is None:
-            parents = []
-        self.nodes[name] = {'parents': parents, 'values': values}
-        self._update_topo_order()
-    def set_cpt(self, name, cpt):
-        """
-        设置条件概率表
-        cpt格式：{parent_value_tuple: {value: prob}}
-        对于无父节点的变量：{(): {value: prob}}
-        """
-        self.cpts[name] = cpt
-    def _update_topo_order(self):
-        """计算拓扑排序"""
-        visited = set()
-        order = []
-        def visit(node):
-            if node in visited:
-                return
-            visited.add(node)
-            for parent in self.nodes[node]['parents']:
-                visit(parent)
-            order.append(node)
-        for node in self.nodes:
-            visit(node)
-        self.topo_order = order
-    def get_prob(self, name, value, parent_values):
-        """获取条件概率 P(name=value | parent_values)"""
-        parent_key = tuple(parent_values) if parent_values else ()
-        return self.cpts[name].get(parent_key, {}).get(value, 0)
-    def joint_prob(self, assignment):
-        """计算联合概率 P(X1, X2, ...)"""
-        prob = 1.0
-        for node in self.topo_order:
-            parents = self.nodes[node]['parents']
-            parent_values = [assignment[p] for p in parents]
-            value = assignment[node]
-            prob *= self.get_prob(node, value, parent_values)
-        return prob
-    def enumerate_inference(self, query, evidence):
-        """
-        枚举推断：计算 P(query | evidence)
-        query: {node: '?'} 返回分布
-        evidence: {node: value}
-        """
-        query_nodes = list(query.keys())
-        hidden = [n for n in self.nodes if n not in query_nodes and n not in evidence]
-        def enumerate_assignments(variables, current):
-            if not variables:
-                yield current.copy()
-                return
-            var = variables[0]
-            for value in self.nodes[var]['values']:
-                current[var] = value
-                yield from enumerate_assignments(variables[1:], current)
-            del current[var]
-        query_values = {}
-        total = 0.0
-        query_node = query_nodes[0]
-        for qv in self.nodes[query_node]['values']:
-            prob_sum = 0.0
-            for assignment in enumerate_assignments(hidden, {}):
-                assignment.update(evidence)
-                assignment[query_node] = qv
-                prob_sum += self.joint_prob(assignment)
-            query_values[qv] = prob_sum
-            total += prob_sum
-        # 归一化
-        for k in query_values:
-            query_values[k] /= total
-        return query_values

package/shared/cnn/alex_net.py DELETED Viewed

@@ -1,65 +0,0 @@
-# AlexNet 类定义
-# 从文档自动提取生成
-import torch
-import torch.nn as nn
-from PIL import Image
-class AlexNet(nn.Module):
-    """
-    AlexNet 网络结构
-    适配 Tiny ImageNet 200 类分类任务
-    原始 AlexNet 为 1000 类，这里修改最后一层为 200 类
-    使用 AdaptiveAvgPool2d 确保输出尺寸固定为 6x6
-    """
-    def __init__(self, num_classes=200):
-        super(AlexNet, self).__init__()
-        # 特征提取层 (5 个卷积层)
-        self.features = nn.Sequential(
-            # Conv1: 11x11 卷积，步长 4，输出 96 通道
-            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=3, stride=2),
-            # Conv2: 5x5 卷积，输出 256 通道
-            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=3, stride=2),
-            # Conv3: 3x3 卷积，输出 384 通道
-            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
-            nn.ReLU(inplace=True),
-            # Conv4: 3x3 卷积，输出 384 通道
-            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
-            nn.ReLU(inplace=True),
-            # Conv5: 3x3 卷积，输出 256 通道
-            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=3, stride=2),
-            # 自适应池化，确保输出固定为 6x6
-            nn.AdaptiveAvgPool2d((6, 6))
-        )
-        # 分类层 (3 个全连接层)
-        self.classifier = nn.Sequential(
-            nn.Dropout(p=0.5),
-            nn.Linear(256 * 6 * 6, 4096),
-            nn.ReLU(inplace=True),
-            nn.Dropout(p=0.5),
-            nn.Linear(4096, 4096),
-            nn.ReLU(inplace=True),
-            nn.Linear(4096, num_classes)
-        )
-    def forward(self, x):
-        x = self.features(x)
-        x = torch.flatten(x, 1)
-        x = self.classifier(x)
-        return x

package/shared/cnn/t_e_r_m1.py DELETED Viewed

@@ -1,65 +0,0 @@
-# AlexNet 类定义
-# 从文档自动提取生成
-import torch
-import torch.nn as nn
-from PIL import Image
-class AlexNet(nn.Module):
-    """
-    AlexNet 网络结构
-    适配 Tiny ImageNet 200 类分类任务
-    原始 AlexNet 为 1000 类，这里修改最后一层为 200 类
-    使用 AdaptiveAvgPool2d 确保输出尺寸固定为 6x6
-    """
-    def __init__(self, num_classes=200):
-        super(AlexNet, self).__init__()
-        # 特征提取层 (5 个卷积层)
-        self.features = nn.Sequential(
-            # Conv1: 11x11 卷积，步长 4，输出 96 通道
-            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=3, stride=2),
-            # Conv2: 5x5 卷积，输出 256 通道
-            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=3, stride=2),
-            # Conv3: 3x3 卷积，输出 384 通道
-            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
-            nn.ReLU(inplace=True),
-            # Conv4: 3x3 卷积，输出 384 通道
-            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
-            nn.ReLU(inplace=True),
-            # Conv5: 3x3 卷积，输出 256 通道
-            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
-            nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=3, stride=2),
-            # 自适应池化，确保输出固定为 6x6
-            nn.AdaptiveAvgPool2d((6, 6))
-        )
-        # 分类层 (3 个全连接层)
-        self.classifier = nn.Sequential(
-            nn.Dropout(p=0.5),
-            nn.Linear(256 * 6 * 6, 4096),
-            nn.ReLU(inplace=True),
-            nn.Dropout(p=0.5),
-            nn.Linear(4096, 4096),
-            nn.ReLU(inplace=True),
-            nn.Linear(4096, num_classes)
-        )
-    def forward(self, x):
-        x = self.features(x)
-        x = torch.flatten(x, 1)
-        x = self.classifier(x)
-        return x

package/shared/cnn/tiny_image_net_dataset.py DELETED Viewed

@@ -1,67 +0,0 @@
-# TinyImageNetDataset 类定义
-# 从文档自动提取生成
-import os
-from PIL import Image
-from torch.utils.data import Dataset, DataLoader
-class TinyImageNetDataset(Dataset):
-    """
-    Tiny ImageNet 200 数据集加载器
-    训练集按类别子目录读取，验证集从标注文件解析标签。
-    支持自定义预处理变换，适配 AlexNet 训练需求。
-    """
-    def __init__(self, root_dir, transform=None, is_train=True):
-        self.root_dir = root_dir
-        self.transform = transform
-        self.is_train = is_train
-        self.samples = []
-        self.classes = []
-        if is_train:
-            train_dir = os.path.join(root_dir, 'train')
-            self.classes = sorted(os.listdir(train_dir))
-            self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
-            for cls in self.classes:
-                cls_dir = os.path.join(train_dir, cls)
-                images_dir = os.path.join(cls_dir, 'images')
-                if os.path.exists(images_dir):
-                    for img_name in os.listdir(images_dir):
-                        if img_name.endswith('.JPEG'):
-                            self.samples.append((
-                                os.path.join(images_dir, img_name),
-                                self.class_to_idx[cls]
-                            ))
-        else:
-            val_dir = os.path.join(root_dir, 'val')
-            val_images_dir = os.path.join(val_dir, 'images')
-            val_annotations = os.path.join(val_dir, 'val_annotations.txt')
-            if os.path.exists(val_annotations):
-                with open(val_annotations, 'r') as f:
-                    for line in f:
-                        parts = line.strip().split('\t')
-                        if len(parts) >= 2:
-                            img_name = parts[0]
-                            cls = parts[1]
-                            if cls not in self.classes:
-                                self.classes.append(cls)
-                            self.samples.append((
-                                os.path.join(val_images_dir, img_name),
-                                self.classes.index(cls)
-                            ))
-    def __len__(self):
-        return len(self.samples)
-    def __getitem__(self, idx):
-        img_path, label = self.samples[idx]
-        image = Image.open(img_path).convert('RGB')
-        if self.transform:
-            image = self.transform(image)
-        return image, label

package/shared/cnn/tiny_imagenetdataset.py DELETED Viewed

@@ -1,67 +0,0 @@
-# TinyImageNetDataset 类定义
-# 从文档自动提取生成
-import os
-from PIL import Image
-from torch.utils.data import Dataset, DataLoader
-class TinyImageNetDataset(Dataset):
-    """
-    Tiny ImageNet 200 数据集加载器
-    训练集按类别子目录读取，验证集从标注文件解析标签。
-    支持自定义预处理变换，适配 AlexNet 训练需求。
-    """
-    def __init__(self, root_dir, transform=None, is_train=True):
-        self.root_dir = root_dir
-        self.transform = transform
-        self.is_train = is_train
-        self.samples = []
-        self.classes = []
-        if is_train:
-            train_dir = os.path.join(root_dir, 'train')
-            self.classes = sorted(os.listdir(train_dir))
-            self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
-            for cls in self.classes:
-                cls_dir = os.path.join(train_dir, cls)
-                images_dir = os.path.join(cls_dir, 'images')
-                if os.path.exists(images_dir):
-                    for img_name in os.listdir(images_dir):
-                        if img_name.endswith('.JPEG'):
-                            self.samples.append((
-                                os.path.join(images_dir, img_name),
-                                self.class_to_idx[cls]
-                            ))
-        else:
-            val_dir = os.path.join(root_dir, 'val')
-            val_images_dir = os.path.join(val_dir, 'images')
-            val_annotations = os.path.join(val_dir, 'val_annotations.txt')
-            if os.path.exists(val_annotations):
-                with open(val_annotations, 'r') as f:
-                    for line in f:
-                        parts = line.strip().split('\t')
-                        if len(parts) >= 2:
-                            img_name = parts[0]
-                            cls = parts[1]
-                            if cls not in self.classes:
-                                self.classes.append(cls)
-                            self.samples.append((
-                                os.path.join(val_images_dir, img_name),
-                                self.classes.index(cls)
-                            ))
-    def __len__(self):
-        return len(self.samples)
-    def __getitem__(self, idx):
-        img_path, label = self.samples[idx]
-        image = Image.open(img_path).convert('RGB')
-        if self.transform:
-            image = self.transform(image)
-        return image, label

package/shared/cnn/tinyimagenetdataset.py DELETED Viewed

@@ -1,67 +0,0 @@
-# TinyImageNetDataset 类定义
-# 从文档自动提取生成
-import os
-from PIL import Image
-from torch.utils.data import Dataset, DataLoader
-class TinyImageNetDataset(Dataset):
-    """
-    Tiny ImageNet 200 数据集加载器
-    训练集按类别子目录读取，验证集从标注文件解析标签。
-    支持自定义预处理变换，适配 AlexNet 训练需求。
-    """
-    def __init__(self, root_dir, transform=None, is_train=True):
-        self.root_dir = root_dir
-        self.transform = transform
-        self.is_train = is_train
-        self.samples = []
-        self.classes = []
-        if is_train:
-            train_dir = os.path.join(root_dir, 'train')
-            self.classes = sorted(os.listdir(train_dir))
-            self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
-            for cls in self.classes:
-                cls_dir = os.path.join(train_dir, cls)
-                images_dir = os.path.join(cls_dir, 'images')
-                if os.path.exists(images_dir):
-                    for img_name in os.listdir(images_dir):
-                        if img_name.endswith('.JPEG'):
-                            self.samples.append((
-                                os.path.join(images_dir, img_name),
-                                self.class_to_idx[cls]
-                            ))
-        else:
-            val_dir = os.path.join(root_dir, 'val')
-            val_images_dir = os.path.join(val_dir, 'images')
-            val_annotations = os.path.join(val_dir, 'val_annotations.txt')
-            if os.path.exists(val_annotations):
-                with open(val_annotations, 'r') as f:
-                    for line in f:
-                        parts = line.strip().split('\t')
-                        if len(parts) >= 2:
-                            img_name = parts[0]
-                            cls = parts[1]
-                            if cls not in self.classes:
-                                self.classes.append(cls)
-                            self.samples.append((
-                                os.path.join(val_images_dir, img_name),
-                                self.classes.index(cls)
-                            ))
-    def __len__(self):
-        return len(self.samples)
-    def __getitem__(self, idx):
-        img_path, label = self.samples[idx]
-        image = Image.open(img_path).convert('RGB')
-        if self.transform:
-            image = self.transform(image)
-        return image, label

package/shared/svm/kernel_s_v_m.py DELETED Viewed

@@ -1,98 +0,0 @@
-# KernelSVM 类定义
-# 从文档自动提取生成
-import numpy as np
-class KernelSVM:
-    """
-    核SVM实现
-    支持线性核、多项式核、RBF核
-    """
-    def __init__(self, kernel='rbf', C=1.0, gamma=1.0, degree=3, coef0=1):
-        self.kernel = kernel
-        self.C = C
-        self.gamma = gamma
-        self.degree = degree
-        self.coef0 = coef0  # 多项式核的常数项
-        self.alpha = None
-        self.b = None
-        self.X_train = None
-        self.y_train = None
-        self.support_vectors_ = None
-        self.support_vector_labels_ = None
-        self.alpha_sv = None
-    def _kernel(self, X1, X2):
-        """计算核矩阵"""
-        if self.kernel == 'linear':
-            return X1 @ X2.T
-        elif self.kernel == 'poly':
-            return (X1 @ X2.T + self.coef0) ** self.degree
-        elif self.kernel == 'rbf':
-            # ||x - x'||^2 = ||x||^2 + ||x'||^2 - 2*x^T*x'
-            X1_norm = np.sum(X1 ** 2, axis=1).reshape(-1, 1)
-            X2_norm = np.sum(X2 ** 2, axis=1).reshape(1, -1)
-            distances = X1_norm + X2_norm - 2 * X1 @ X2.T
-            return np.exp(-self.gamma * distances)
-        else:
-            raise ValueError(f"未知核函数: {self.kernel}")
-    def fit(self, X, y, lr=0.01, n_iterations=500):
-        """训练模型（简化版SMO思想）"""
-        n_samples = X.shape[0]
-        self.X_train = X
-        self.y_train = y
-        # 计算核矩阵
-        K = self._kernel(X, X)
-        # 初始化
-        self.alpha = np.zeros(n_samples)
-        # 梯度上升优化
-        for _ in range(n_iterations):
-            for i in range(n_samples):
-                # 梯度
-                gradient = 1 - y[i] * np.sum(self.alpha * y * K[:, i])
-                self.alpha[i] += lr * gradient
-                self.alpha[i] = np.clip(self.alpha[i], 0, self.C)
-            # 约束修正：满足等式约束 sum(alpha * y) = 0
-            # 减去均值偏差后，需再次投影到边界约束 [0, C]
-            self.alpha = self.alpha - np.mean(self.alpha * y) * y
-            self.alpha = np.clip(self.alpha, 0, self.C)
-            # 注意：投影后等式约束可能不再精确满足，但迭代过程中误差会累积抵消
-        # 支持向量
-        sv_mask = self.alpha > 1e-5
-        self.support_vectors_ = X[sv_mask]
-        self.support_vector_labels_ = y[sv_mask]
-        self.alpha_sv = self.alpha[sv_mask]
-        # 计算b
-        if len(self.support_vectors_) > 0:
-            K_sv = self._kernel(self.support_vectors_, self.support_vectors_)
-            margins = np.sum(self.alpha_sv * self.support_vector_labels_ * K_sv, axis=1)
-            self.b = np.mean(self.support_vector_labels_ - margins)
-        else:
-            self.b = 0
-        return self
-    def decision_function(self, X):
-        """决策函数"""
-        K = self._kernel(X, self.support_vectors_)
-        return K @ (self.alpha_sv * self.support_vector_labels_) + self.b
-    def predict(self, X):
-        """预测类别"""
-        return np.sign(self.decision_function(X)).astype(int)
-    def score(self, X, y):
-        """计算准确率"""
-        y_pred = self.predict(X)
-        return np.mean(y_pred == y)

package/shared/svm/simple_s_v_m.py DELETED Viewed

@@ -1,111 +0,0 @@
-# SimpleSVM 类定义
-# 从文档自动提取生成
-import numpy as np
-class SimpleSVM:
-    """
-    简化版软间隔SVM实现
-    使用梯度上升优化对偶问题，支持软间隔（通过参数C控制）
-    核心步骤：
-    1. 预计算核矩阵 K = X @ X.T（线性核）
-    2. 迭代更新拉格朗日乘子 alpha
-    3. 根据alpha找出支持向量
-    4. 计算超平面参数 w 和 b
-    """
-    def __init__(self, learning_rate=0.01, n_iterations=1000, C=1.0):
-        self.lr = learning_rate       # 梯度上升的学习率
-        self.n_iterations = n_iterations  # 迭代次数
-        self.C = C                    # 软间隔惩罚系数
-        self.alpha = None             # 拉格朗日乘子（训练后获得）
-        self.w = None                 # 超平面法向量
-        self.b = None                 # 超平面截距
-        self.support_vectors_ = None  # 支持向量集合
-    def fit(self, X, y):
-        """
-        训练SVM模型
-        对偶问题的目标函数：
-        max sum(alpha_i) - 0.5 * sum(alpha_i * alpha_j * y_i * y_j * x_i^T x_j)
-        约束：0 <= alpha_i <= C, sum(alpha_i * y_i) = 0
-        使用梯度上升迭代优化，每次更新一个alpha_i
-        """
-        n_samples, n_features = X.shape
-        # 初始化拉格朗日乘子（全零）
-        self.alpha = np.zeros(n_samples)
-        # 预计算核矩阵（线性核：样本内积）
-        # K[i,j] = x_i^T x_j，用于加速目标函数计算
-        K = X @ X.T
-        # 梯度上升优化对偶问题
-        for iteration in range(self.n_iterations):
-            for i in range(n_samples):
-                # 计算alpha_i的梯度
-                # 目标函数对alpha_i的偏导：1 - y_i * sum_j(alpha_j * y_j * K[j,i])
-                gradient = 1 - y[i] * np.sum(self.alpha * y * K[:, i])
-                # 梯度上升更新
-                self.alpha[i] += self.lr * gradient
-                # 投影到约束区间 [0, C]
-                # 对应软间隔的约束：0 <= alpha_i <= C
-                self.alpha[i] = np.clip(self.alpha[i], 0, self.C)
-            # 约束修正：确保 sum(alpha * y) = 0
-            # 通过减去均值偏差来近似满足线性约束
-            bias = np.mean(self.alpha * y)
-            self.alpha = self.alpha - bias * y
-            self.alpha = np.clip(self.alpha, 0, self.C)
-        # 找出支持向量（alpha > 阈值的样本）
-        sv_threshold = 1e-5
-        sv_indices = self.alpha > sv_threshold
-        self.support_vectors_ = X[sv_indices]
-        sv_labels = y[sv_indices]
-        sv_alpha = self.alpha[sv_indices]
-        # 计算超平面参数 w = sum(alpha_i * y_i * x_i)
-        # 只有支持向量参与计算（其他样本alpha=0）
-        self.w = np.zeros(n_features)
-        for i, (sv, label, a) in enumerate(zip(self.support_vectors_, sv_labels, sv_alpha)):
-            self.w += a * label * sv
-        # 计算截距 b
-        # 使用支持向量计算：对于支持向量，y_i(w^T x_i + b) = 1（硬间隔）
-        # 或 y_i(w^T x_i + b) = 1 - xi_i（软间隔）
-        # 这里取所有支持向量的平均值
-        if len(self.support_vectors_) > 0:
-            self.b = np.mean(sv_labels - self.support_vectors_ @ self.w)
-        else:
-            self.b = 0
-        return self
-    def decision_function(self, X):
-        """
-        决策函数值：w^T x + b
-        正值表示预测为正类，负值表示预测为负类
-        绝对值大小反映样本到超平面的距离
-        """
-        return X @ self.w + self.b
-    def predict(self, X):
-        """
-        预测类别标签
-        sign(w^T x + b): +1 表示正类，-1 表示负类
-        """
-        return np.sign(self.decision_function(X)).astype(int)
-    def score(self, X, y):
-        """计算分类准确率"""
-        predictions = self.predict(X)
-        return np.mean(predictions == y)

package/shared/tree/decision_treeclassifier.py DELETED Viewed

@@ -1,235 +0,0 @@
-# DecisionTreeClassifier 类定义
-# 从文档自动提取生成
-import numpy as np
-class DecisionTreeClassifier:
-    """
-    CART 决策树分类器
-    使用 Gini 指数作为分裂准则，构建二叉决策树。
-    支持预剪枝策略：最大深度限制和叶节点最小样本数限制。
-    参数:
-        max_depth : int, 默认值 10
-            树的最大深度，防止过拟合
-        min_samples_split : int, 默认值 2
-            分裂所需的最小样本数，防止学习孤例
-    """
-    def __init__(self, max_depth=10, min_samples_split=2, min_gain_threshold=0.0):
-        self.max_depth = max_depth
-        self.min_samples_split = min_samples_split
-        self.min_gain_threshold = min_gain_threshold
-        self.tree = None
-    def _gini(self, y):
-        """
-        计算数据集的 Gini 指数
-        Gini 指数衡量数据的不纯度，值越小越纯净。
-        参数:
-            y : ndarray
-                目标变量数组
-        返回:
-            float : Gini 指数值
-        """
-        if len(y) == 0:
-            return 0
-        _, counts = np.unique(y, return_counts=True)
-        probs = counts / len(y)
-        return 1 - np.sum(probs ** 2)
-    def _gini_split(self, y_left, y_right):
-        """
-        计算分裂后的加权 Gini 指数
-        加权平均两个子集的 Gini 指数，权重为样本数比例。
-        参数:
-            y_left : ndarray
-                左分支的目标变量
-            y_right : ndarray
-                右分支的目标变量
-        返回:
-            float : 分裂后的加权 Gini 指数
-        """
-        n = len(y_left) + len(y_right)
-        return (len(y_left) / n) * self._gini(y_left) + \
-               (len(y_right) / n) * self._gini(y_right)
-    def _best_split(self, X, y):
-        """
-        寻找最佳分裂特征和分割点
-        遍历所有特征的所有候选分割点，选择 Gini 指数最小的分裂方案。
-        候选分割点是特征的唯一值（CART 的标准策略）。
-        参数:
-            X : ndarray, shape (n_samples, n_features)
-                特征矩阵
-            y : ndarray, shape (n_samples,)
-                目标变量
-        返回:
-            tuple : (最佳特征索引, 最佳分割点, 对应的 Gini 指数)
-        """
-        best_gini = float('inf')
-        best_feature = None
-        best_threshold = None
-        n_features = X.shape[1]
-        for feature in range(n_features):
-            # 获取该特征的所有唯一值作为候选分割点
-            # 使用相邻唯一值的中点作为候选阈值（标准 CART 算法策略）
-            thresholds = np.unique(X[:, feature])
-            thresholds = (thresholds[:-1] + thresholds[1:]) / 2
-            for threshold in thresholds:
-                # 按阈值分裂数据
-                left_mask = X[:, feature] <= threshold
-                right_mask = ~left_mask
-                y_left = y[left_mask]
-                y_right = y[right_mask]
-                # 忽略无效分裂（某分支为空）
-                if len(y_left) == 0 or len(y_right) == 0:
-                    continue
-                gini = self._gini_split(y_left, y_right)
-                # 更新最优分裂
-                if gini < best_gini:
-                    best_gini = gini
-                    best_feature = feature
-                    best_threshold = threshold
-        return best_feature, best_threshold, best_gini
-    def _build_tree(self, X, y, depth):
-        """
-        递归构建决策树
-        核心步骤：
-        1. 检查终止条件（深度限制、样本数限制、纯净度）
-        2. 若满足终止条件，返回叶节点（多数类）
-        3. 否则寻找最优分裂，创建内部节点
-        4. 递归构建左右子树
-        参数:
-            X : ndarray
-                特征矩阵
-            y : ndarray
-                目标变量
-            depth : int
-                当前深度
-        返回:
-            dict : 树节点（字典表示）
-        """
-        n_samples = len(y)
-        # 检查预剪枝终止条件
-        if (depth >= self.max_depth or
-            n_samples < self.min_samples_split or
-            len(np.unique(y)) == 1):
-            # 返回叶节点，预测值为多数类
-            values, counts = np.unique(y, return_counts=True)
-            return {'leaf': True, 'class': values[np.argmax(counts)]}
-        # 寻找最优分裂
-        feature, threshold, gini = self._best_split(X, y)
-        # 若无法分裂或分裂增益不足，返回叶节点
-        if feature is None or gini > self._gini(y) - self.min_gain_threshold:
-            values, counts = np.unique(y, return_counts=True)
-            return {'leaf': True, 'class': values[np.argmax(counts)]}
-        # 分裂数据
-        left_mask = X[:, feature] <= threshold
-        right_mask = ~left_mask
-        # 递归构建子树
-        left_tree = self._build_tree(X[left_mask], y[left_mask], depth + 1)
-        right_tree = self._build_tree(X[right_mask], y[right_mask], depth + 1)
-        return {
-            'leaf': False,
-            'feature': feature,
-            'threshold': threshold,
-            'left': left_tree,
-            'right': right_tree
-        }
-    def fit(self, X, y):
-        """
-        训练决策树
-        参数:
-            X : ndarray, shape (n_samples, n_features)
-                特征矩阵
-            y : ndarray, shape (n_samples,)
-                目标变量
-        返回:
-            self : 训练后的模型实例
-        """
-        self.tree = self._build_tree(X, y, depth=0)
-        return self
-    def _predict_one(self, x, node):
-        """
-        预测单个样本
-        从根节点开始，根据分裂条件选择分支，直到到达叶节点。
-        参数:
-            x : ndarray
-                单个样本的特征向量
-            node : dict
-                当前树节点
-        返回:
-            int : 预测类别
-        """
-        if node['leaf']:
-            return node['class']
-        if x[node['feature']] <= node['threshold']:
-            return self._predict_one(x, node['left'])
-        else:
-            return self._predict_one(x, node['right'])
-    def predict(self, X):
-        """
-        批量预测
-        参数:
-            X : ndarray, shape (n_samples, n_features)
-                特征矩阵
-        返回:
-            ndarray : 预测类别数组
-        """
-        return np.array([self._predict_one(x, self.tree) for x in X])
-    def score(self, X, y):
-        """
-        计算准确率
-        参数:
-            X : ndarray
-                特征矩阵
-            y : ndarray
-                真实类别
-        返回:
-            float : 准确率
-        """
-        y_pred = self.predict(X)
-        return np.mean(y_pred == y)

package/shared/tree/random_forestclassifier.py DELETED Viewed

@@ -1,88 +0,0 @@
-# RandomForestClassifier 类定义
-# 从文档自动提取生成
-import numpy as np
-class RandomForestClassifier:
-    """
-    随机森林分类器
-    实现：
-    1. Bootstrap采样（对应理论：样本随机）
-    2. 多棵决策树训练（每棵树使用不同的Bootstrap样本和特征子集）
-    3. 多数投票预测（对应理论：投票机制）
-    参数:
-        n_estimators : int, 默认值 100
-            树的数量（对应理论中的B）
-        max_depth : int, 默认值 10
-            每棵树的最大深度
-        max_features : str or int, 默认值 'sqrt'
-            每次分裂时考虑的特征数量（对应理论中的m）
-    """
-    def __init__(self, n_estimators=100, max_depth=10, max_features='sqrt'):
-        self.n_estimators = n_estimators
-        self.max_depth = max_depth
-        self.max_features = max_features
-        self.trees = []
-    def _bootstrap_sample(self, X, y):
-        """
-        Bootstrap采样（对应理论：有放回重采样）
-        从原始数据集中有放回地抽取n个样本
-        """
-        n_samples = X.shape[0]
-        indices = np.random.choice(n_samples, n_samples, replace=True)
-        return X[indices], y[indices]
-    def fit(self, X, y):
-        """
-        训练随机森林
-        核心步骤：
-        1. 确定特征子集大小m
-        2. 对每棵树：Bootstrap采样 → 训练决策树
-        """
-        n_features = X.shape[1]
-        # 确定特征子集大小m（对应理论：分类用sqrt(d)，回归用d/3）
-        if self.max_features == 'sqrt':
-            max_features = int(np.sqrt(n_features))
-        elif self.max_features == 'log2':
-            max_features = int(np.log2(n_features))
-        else:
-            max_features = n_features
-        self.trees = []
-        for _ in range(self.n_estimators):
-            # Bootstrap采样
-            X_sample, y_sample = self._bootstrap_sample(X, y)
-            # 训练决策树（带特征随机）
-            tree = DecisionTreeForRF(
-                max_depth=self.max_depth,
-                max_features=max_features
-            )
-            tree.fit(X_sample, y_sample)
-            self.trees.append(tree)
-        return self
-    def predict(self, X):
-        """
-        多数投票预测（对应理论：硬投票）
-        每棵树预测一个类别，选择得票最多的类别
-        """
-        predictions = np.array([tree.predict(X) for tree in self.trees])
-        result = []
-        for i in range(X.shape[0]):
-            values, counts = np.unique(predictions[:, i], return_counts=True)
-            result.append(values[np.argmax(counts)])
-        return np.array(result)
-    def score(self, X, y):
-        """计算准确率"""
-        return np.mean(self.predict(X) == y)

package/shared/unsupervised/k_means.py DELETED Viewed

@@ -1,127 +0,0 @@
-# KMeans 类定义
-# 从文档自动提取生成
-import numpy as np
-class KMeans:
-    """
-    K-means聚类算法实现
-    参数:
-        n_clusters : int, 簇的数量K
-        max_iter : int, 最大迭代次数
-        tol : float, 收敛阈值（中心变化小于此值时停止）
-        n_init : int, 随机初始化的次数（取最优结果）
-    """
-    def __init__(self, n_clusters=3, max_iter=300, tol=1e-4, n_init=10):
-        self.n_clusters = n_clusters
-        self.max_iter = max_iter
-        self.tol = tol
-        self.n_init = n_init
-        self.cluster_centers_ = None  # 簇中心
-        self.labels_ = None           # 每个样本的簇分配
-        self.inertia_ = None          # 目标函数值（距离平方和）
-    def _init_centers(self, X):
-        """
-        随机初始化簇中心
-        从数据中随机选择K个样本作为初始中心
-        """
-        indices = np.random.choice(len(X), self.n_clusters, replace=False)
-        return X[indices].copy()
-    def _assign_clusters(self, X, centers):
-        """
-        分配步骤：将每个样本分配到最近的簇中心
-        计算每个样本到所有中心的距离平方，返回最近的簇编号
-        """
-        distances = np.zeros((len(X), self.n_clusters))
-        for k in range(self.n_clusters):
-            # 计算样本到第k个中心的距离平方（对应目标函数中的||x - μ||²）
-            distances[:, k] = np.sum((X - centers[k]) ** 2, axis=1)
-        return np.argmin(distances, axis=1)
-    def _update_centers(self, X, labels):
-        """
-        更新步骤：重新计算每个簇的中心
-        簇中心 = 簇内样本的均值（这就是"means"的含义）
-        """
-        centers = np.zeros((self.n_clusters, X.shape[1]))
-        for k in range(self.n_clusters):
-            mask = labels == k
-            if np.sum(mask) > 0:
-                # 取簇内样本的均值作为新中心
-                centers[k] = X[mask].mean(axis=0)
-            else:
-                # 空簇的罕见情况：随机重新初始化
-                centers[k] = X[np.random.randint(len(X))]
-        return centers
-    def _compute_inertia(self, X, labels, centers):
-        """
-        计算目标函数值J
-        J = 所有样本到其所属簇中心的距离平方和
-        """
-        inertia = 0
-        for k in range(self.n_clusters):
-            mask = labels == k
-            inertia += np.sum((X[mask] - centers[k]) ** 2)
-        return inertia
-    def fit(self, X):
-        """
-        训练K-means模型
-        执行多次随机初始化，取目标函数最小的结果
-        """
-        best_inertia = float('inf')
-        best_centers = None
-        best_labels = None
-        for init in range(self.n_init):
-            # 初始化簇中心
-            centers = self._init_centers(X)
-            # 迭代直到收敛
-            for i in range(self.max_iter):
-                # 步骤2：分配样本到最近的簇
-                labels = self._assign_clusters(X, centers)
-                # 步骤3：更新簇中心
-                new_centers = self._update_centers(X, labels)
-                # 检查收敛：中心变化是否小于阈值
-                if np.max(np.abs(new_centers - centers)) < self.tol:
-                    break
-                centers = new_centers
-            # 计算本次初始化的目标函数值
-            inertia = self._compute_inertia(X, labels, centers)
-            # 保留最优结果
-            if inertia < best_inertia:
-                best_inertia = inertia
-                best_centers = centers.copy()
-                best_labels = labels.copy()
-        # 存储最优结果
-        self.cluster_centers_ = best_centers
-        self.labels_ = best_labels
-        self.inertia_ = best_inertia
-        return self
-    def predict(self, X):
-        """
-        预测新样本所属的簇
-        根据训练得到的簇中心，将新样本分配到最近的簇
-        """
-        return self._assign_clusters(X, self.cluster_centers_)

package/shared/unsupervised/p_c_a.py DELETED Viewed

@@ -1,111 +0,0 @@
-# PCA 类定义
-# 从文档自动提取生成
-import numpy as np
-class PCA:
-    """
-    主成分分析（Principal Component Analysis）实现
-    核心步骤（对应理论推导）：
-    1. 数据中心化（减去均值）
-    2. 计算协方差矩阵 S = X^T X / (n-1)
-    3. 特征分解 S = V Λ V^T
-    4. 选择前 k 个特征值对应的特征向量作为主成分
-    5. 投影到主成分空间
-    参数说明:
-    n_components : int, 可选
-        要保留的主成分数量。若为 None，保留所有成分
-    """
-    def __init__(self, n_components=None):
-        self.n_components = n_components
-        # 存储 PCA 结果
-        self.components_ = None              # 主成分（特征向量矩阵）
-        self.explained_variance_ = None      # 特征值（各主成分的方差）
-        self.explained_variance_ratio_ = None  # 方差解释比例
-        self.mean_ = None                    # 数据均值向量
-    def fit(self, X):
-        """
-        训练 PCA 模型
-        参数说明:
-        X : ndarray, shape (n_samples, n_features)
-            输入数据矩阵
-        返回:
-        self : PCA 对象实例
-        """
-        n_samples, n_features = X.shape
-        # 步骤1：数据中心化（对应理论中的 x_i - x̄）
-        self.mean_ = X.mean(axis=0)
-        X_centered = X - self.mean_
-        # 步骤2：计算协方差矩阵（对应理论中的 S = 1/n Σ(x_i - x̄)(x_i - x̄)^T）
-        # 使用 n-1 而非 n，得到无偏估计（与 sklearn 一致）
-        cov_matrix = X_centered.T @ X_centered / (n_samples - 1)
-        # 步骤3：特征分解（对应理论中的 S = VΛV^T）
-        # np.linalg.eigh 专门用于对称矩阵，返回实数特征值
-        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
-        # 特征值和特征向量按降序排列（PCA 选择方差最大的方向）
-        indices = np.argsort(eigenvalues)[::-1]
-        eigenvalues = eigenvalues[indices]
-        eigenvectors = eigenvectors[:, indices]
-        # 存储特征值（对应理论中的 λ_j）
-        self.explained_variance_ = eigenvalues
-        # 步骤4：计算方差解释比例（对应理论中的 Σλ_j / Σλ_total）
-        total_variance = eigenvalues.sum()
-        self.explained_variance_ratio_ = eigenvalues / total_variance
-        # 确定主成分数量
-        if self.n_components is None:
-            self.n_components = n_features
-        # 步骤5：选择前 k 个主成分（对应理论中的 V_k）
-        self.components_ = eigenvectors[:, :self.n_components].T
-        return self
-    def transform(self, X):
-        """
-        将数据投影到主成分空间
-        参数说明:
-        X : ndarray, shape (n_samples, n_features)
-            输入数据
-        返回:
-        Z : ndarray, shape (n_samples, n_components)
-            投影后的低维数据
-        """
-        # 中心化后投影（对应理论中的 Z = X̃ V_k）
-        X_centered = X - self.mean_
-        return X_centered @ self.components_.T
-    def fit_transform(self, X):
-        """训练并转换（一步完成）"""
-        self.fit(X)
-        return self.transform(X)
-    def inverse_transform(self, Z):
-        """
-        从低维空间重构原始数据
-        参数说明:
-        Z : ndarray, shape (n_samples, n_components)
-            低维表示
-        返回:
-        X_reconstructed : ndarray, shape (n_samples, n_features)
-            重构的高维数据（加回均值）
-        """
-        # 重构公式（对应理论中的 X̂ = Z V_k^T + x̄）
-        return Z @ self.components_ + self.mean_