@icyfenix-dmla/cli 2026.5.13-2349 → 2026.5.14-2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/server.js +7 -7
- package/version.json +2 -2
- package/shared/bayesian/gaussian_mixturemodel.py +0 -141
- package/shared/bayesian/simple_bayesiannetwork.py +0 -99
- package/shared/cnn/alex_net.py +0 -65
- package/shared/cnn/t_e_r_m1.py +0 -65
- package/shared/cnn/tiny_image_net_dataset.py +0 -67
- package/shared/cnn/tiny_imagenetdataset.py +0 -67
- package/shared/cnn/tinyimagenetdataset.py +0 -67
- package/shared/svm/kernel_s_v_m.py +0 -98
- package/shared/svm/simple_s_v_m.py +0 -111
- package/shared/tree/decision_treeclassifier.py +0 -235
- package/shared/tree/random_forestclassifier.py +0 -88
- package/shared/unsupervised/k_means.py +0 -127
- package/shared/unsupervised/p_c_a.py +0 -111
package/package.json
CHANGED
package/src/commands/server.js
CHANGED
|
@@ -369,12 +369,12 @@ function findProgressReporterPath() {
|
|
|
369
369
|
* --dev 模式下需要挂载此目录
|
|
370
370
|
*/
|
|
371
371
|
function findSharedModulesPath() {
|
|
372
|
-
// 开发环境路径:packages/cli/src/commands -> ../../../local-server/
|
|
373
|
-
const devPath = path.resolve(__dirname, '../../../local-server/
|
|
374
|
-
// npm 包路径:packages/cli/src/commands -> ../../
|
|
375
|
-
const npmPath = path.resolve(__dirname, '../../
|
|
376
|
-
// CLI 包根目录下的
|
|
377
|
-
const cliRootPath = path.resolve(__dirname, '../../
|
|
372
|
+
// 开发环境路径:packages/cli/src/commands -> ../../../local-server/shared
|
|
373
|
+
const devPath = path.resolve(__dirname, '../../../local-server/shared')
|
|
374
|
+
// npm 包路径:packages/cli/src/commands -> ../../shared(构建后)
|
|
375
|
+
const npmPath = path.resolve(__dirname, '../../shared')
|
|
376
|
+
// CLI 包根目录下的 shared(构建后)
|
|
377
|
+
const cliRootPath = path.resolve(__dirname, '../../shared')
|
|
378
378
|
|
|
379
379
|
// 优先使用开发环境路径(如果 local-server 存在)
|
|
380
380
|
if (fs.existsSync(devPath) && fs.readdirSync(devPath).length > 0) {
|
|
@@ -473,7 +473,7 @@ export async function startServerSync(port, useGpu = false, dev = false, shmSize
|
|
|
473
473
|
|
|
474
474
|
if (dev && !sharedModulesPath) {
|
|
475
475
|
console.log(chalk.yellow('⚠️ --dev 模式需要共享模块目录'))
|
|
476
|
-
console.log(chalk.gray(' 未找到
|
|
476
|
+
console.log(chalk.gray(' 未找到 shared 目录,将仅使用镜像内置模块'))
|
|
477
477
|
}
|
|
478
478
|
if (dev && !kernelRunnerPath) {
|
|
479
479
|
console.log(chalk.yellow('⚠️ --dev 模式需要 kernel_runner.py'))
|
package/version.json
CHANGED
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
# GaussianMixtureModel 类定义
|
|
2
|
-
# 从文档自动提取生成
|
|
3
|
-
|
|
4
|
-
import numpy as np
|
|
5
|
-
|
|
6
|
-
class GaussianMixtureModel:
|
|
7
|
-
"""
|
|
8
|
-
高斯混合模型实现
|
|
9
|
-
使用EM算法求解
|
|
10
|
-
"""
|
|
11
|
-
def __init__(self, n_components=3, max_iter=100, tol=1e-4):
|
|
12
|
-
self.n_components = n_components
|
|
13
|
-
self.max_iter = max_iter
|
|
14
|
-
self.tol = tol # 收敛阈值
|
|
15
|
-
|
|
16
|
-
self.weights_ = None # 混合系数 (K,)
|
|
17
|
-
self.means_ = None # 均值 (K, n_features)
|
|
18
|
-
self.covariances_ = None # 协方差矩阵 (K, n_features, n_features)
|
|
19
|
-
self.log_likelihood_history_ = []
|
|
20
|
-
|
|
21
|
-
def _initialize(self, X):
|
|
22
|
-
"""初始化参数"""
|
|
23
|
-
n_samples, n_features = X.shape
|
|
24
|
-
K = self.n_components
|
|
25
|
-
|
|
26
|
-
# 随机初始化均值(从数据中随机选择K个点)
|
|
27
|
-
indices = np.random.choice(n_samples, K, replace=False)
|
|
28
|
-
self.means_ = X[indices].copy()
|
|
29
|
-
|
|
30
|
-
# 初始化协方差为数据协方差的对角线
|
|
31
|
-
data_cov = np.cov(X.T)
|
|
32
|
-
self.covariances_ = np.array([np.diag(np.diag(data_cov)) + 1e-6 * np.eye(n_features)
|
|
33
|
-
for _ in range(K)])
|
|
34
|
-
|
|
35
|
-
# 初始化混合系数为均匀分布
|
|
36
|
-
self.weights_ = np.ones(K) / K
|
|
37
|
-
|
|
38
|
-
def _gaussian_pdf(self, X, mean, cov):
|
|
39
|
-
"""计算多元高斯概率密度"""
|
|
40
|
-
n_features = X.shape[1]
|
|
41
|
-
diff = X - mean
|
|
42
|
-
|
|
43
|
-
# 加小值保证数值稳定
|
|
44
|
-
cov_reg = cov + 1e-6 * np.eye(n_features)
|
|
45
|
-
|
|
46
|
-
# 使用Cholesky分解计算行列式和逆
|
|
47
|
-
try:
|
|
48
|
-
L = np.linalg.cholesky(cov_reg)
|
|
49
|
-
log_det = 2 * np.sum(np.log(np.diag(L)))
|
|
50
|
-
diff_L = np.linalg.solve(L, diff.T).T
|
|
51
|
-
mahalanobis = np.sum(diff_L ** 2, axis=1)
|
|
52
|
-
except np.linalg.LinAlgError:
|
|
53
|
-
# 如果Cholesky失败,使用标准方法
|
|
54
|
-
sign, log_det = np.linalg.slogdet(cov_reg)
|
|
55
|
-
cov_inv = np.linalg.inv(cov_reg)
|
|
56
|
-
mahalanobis = np.sum(diff @ cov_inv * diff, axis=1)
|
|
57
|
-
|
|
58
|
-
log_prob = -0.5 * (n_features * np.log(2 * np.pi) + log_det + mahalanobis)
|
|
59
|
-
return log_prob
|
|
60
|
-
|
|
61
|
-
def _e_step(self, X):
|
|
62
|
-
"""E步:计算责任度"""
|
|
63
|
-
n_samples = X.shape[0]
|
|
64
|
-
K = self.n_components
|
|
65
|
-
|
|
66
|
-
# 计算每个成分的对数概率
|
|
67
|
-
log_probs = np.zeros((n_samples, K))
|
|
68
|
-
for k in range(K):
|
|
69
|
-
log_probs[:, k] = (np.log(self.weights_[k] + 1e-10) +
|
|
70
|
-
self._gaussian_pdf(X, self.means_[k], self.covariances_[k]))
|
|
71
|
-
|
|
72
|
-
# 计算对数似然
|
|
73
|
-
log_likelihood = np.sum(np.log(np.sum(np.exp(log_probs), axis=1)))
|
|
74
|
-
|
|
75
|
-
# 计算责任度(使用log-sum-exp trick避免数值下溢)
|
|
76
|
-
log_max = log_probs.max(axis=1, keepdims=True)
|
|
77
|
-
log_sum = np.log(np.sum(np.exp(log_probs - log_max), axis=1, keepdims=True)) + log_max
|
|
78
|
-
responsibilities = np.exp(log_probs - log_sum)
|
|
79
|
-
|
|
80
|
-
return responsibilities, log_likelihood
|
|
81
|
-
|
|
82
|
-
def _m_step(self, X, responsibilities):
|
|
83
|
-
"""M步:更新参数"""
|
|
84
|
-
n_samples, n_features = X.shape
|
|
85
|
-
K = self.n_components
|
|
86
|
-
|
|
87
|
-
# 计算每个成分的有效样本数
|
|
88
|
-
N_k = responsibilities.sum(axis=0) + 1e-10
|
|
89
|
-
|
|
90
|
-
# 更新混合系数
|
|
91
|
-
self.weights_ = N_k / n_samples
|
|
92
|
-
|
|
93
|
-
# 更新均值
|
|
94
|
-
self.means_ = (responsibilities.T @ X) / N_k[:, np.newaxis]
|
|
95
|
-
|
|
96
|
-
# 更新协方差
|
|
97
|
-
for k in range(K):
|
|
98
|
-
diff = X - self.means_[k]
|
|
99
|
-
weighted_diff = responsibilities[:, k:k+1] * diff
|
|
100
|
-
self.covariances_[k] = (weighted_diff.T @ diff) / N_k[k]
|
|
101
|
-
# 添加正则化
|
|
102
|
-
self.covariances_[k] += 1e-6 * np.eye(n_features)
|
|
103
|
-
|
|
104
|
-
def fit(self, X):
|
|
105
|
-
"""训练模型"""
|
|
106
|
-
self._initialize(X)
|
|
107
|
-
self.log_likelihood_history_ = []
|
|
108
|
-
|
|
109
|
-
prev_log_likelihood = -np.inf
|
|
110
|
-
|
|
111
|
-
for iteration in range(self.max_iter):
|
|
112
|
-
# E步
|
|
113
|
-
responsibilities, log_likelihood = self._e_step(X)
|
|
114
|
-
self.log_likelihood_history_.append(log_likelihood)
|
|
115
|
-
|
|
116
|
-
# 检查收敛
|
|
117
|
-
if abs(log_likelihood - prev_log_likelihood) < self.tol:
|
|
118
|
-
print(f"EM收敛于第{iteration}次迭代")
|
|
119
|
-
break
|
|
120
|
-
|
|
121
|
-
# M步
|
|
122
|
-
self._m_step(X, responsibilities)
|
|
123
|
-
|
|
124
|
-
prev_log_likelihood = log_likelihood
|
|
125
|
-
|
|
126
|
-
return self
|
|
127
|
-
|
|
128
|
-
def predict(self, X):
|
|
129
|
-
"""预测聚类标签"""
|
|
130
|
-
responsibilities, _ = self._e_step(X)
|
|
131
|
-
return np.argmax(responsibilities, axis=1)
|
|
132
|
-
|
|
133
|
-
def predict_proba(self, X):
|
|
134
|
-
"""预测属于各成分的概率"""
|
|
135
|
-
responsibilities, _ = self._e_step(X)
|
|
136
|
-
return responsibilities
|
|
137
|
-
|
|
138
|
-
def score(self, X):
|
|
139
|
-
"""计算对数似然"""
|
|
140
|
-
_, log_likelihood = self._e_step(X)
|
|
141
|
-
return log_likelihood
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
# SimpleBayesianNetwork 类定义
|
|
2
|
-
# 从文档自动提取生成
|
|
3
|
-
|
|
4
|
-
class SimpleBayesianNetwork:
|
|
5
|
-
"""
|
|
6
|
-
简单贝叶斯网络实现
|
|
7
|
-
支持离散变量和精确推断(枚举法)
|
|
8
|
-
"""
|
|
9
|
-
def __init__(self):
|
|
10
|
-
self.nodes = {} # 节点信息:{name: {'parents': [], 'values': []}}
|
|
11
|
-
self.cpts = {} # 条件概率表:{name: {parent_values: {value: prob}}}
|
|
12
|
-
self.topo_order = [] # 拓扑排序
|
|
13
|
-
|
|
14
|
-
def add_node(self, name, values, parents=None):
|
|
15
|
-
"""添加节点"""
|
|
16
|
-
if parents is None:
|
|
17
|
-
parents = []
|
|
18
|
-
self.nodes[name] = {'parents': parents, 'values': values}
|
|
19
|
-
self._update_topo_order()
|
|
20
|
-
|
|
21
|
-
def set_cpt(self, name, cpt):
|
|
22
|
-
"""
|
|
23
|
-
设置条件概率表
|
|
24
|
-
|
|
25
|
-
cpt格式:{parent_value_tuple: {value: prob}}
|
|
26
|
-
对于无父节点的变量:{(): {value: prob}}
|
|
27
|
-
"""
|
|
28
|
-
self.cpts[name] = cpt
|
|
29
|
-
|
|
30
|
-
def _update_topo_order(self):
|
|
31
|
-
"""计算拓扑排序"""
|
|
32
|
-
visited = set()
|
|
33
|
-
order = []
|
|
34
|
-
|
|
35
|
-
def visit(node):
|
|
36
|
-
if node in visited:
|
|
37
|
-
return
|
|
38
|
-
visited.add(node)
|
|
39
|
-
for parent in self.nodes[node]['parents']:
|
|
40
|
-
visit(parent)
|
|
41
|
-
order.append(node)
|
|
42
|
-
|
|
43
|
-
for node in self.nodes:
|
|
44
|
-
visit(node)
|
|
45
|
-
|
|
46
|
-
self.topo_order = order
|
|
47
|
-
|
|
48
|
-
def get_prob(self, name, value, parent_values):
|
|
49
|
-
"""获取条件概率 P(name=value | parent_values)"""
|
|
50
|
-
parent_key = tuple(parent_values) if parent_values else ()
|
|
51
|
-
return self.cpts[name].get(parent_key, {}).get(value, 0)
|
|
52
|
-
|
|
53
|
-
def joint_prob(self, assignment):
|
|
54
|
-
"""计算联合概率 P(X1, X2, ...)"""
|
|
55
|
-
prob = 1.0
|
|
56
|
-
for node in self.topo_order:
|
|
57
|
-
parents = self.nodes[node]['parents']
|
|
58
|
-
parent_values = [assignment[p] for p in parents]
|
|
59
|
-
value = assignment[node]
|
|
60
|
-
prob *= self.get_prob(node, value, parent_values)
|
|
61
|
-
return prob
|
|
62
|
-
|
|
63
|
-
def enumerate_inference(self, query, evidence):
|
|
64
|
-
"""
|
|
65
|
-
枚举推断:计算 P(query | evidence)
|
|
66
|
-
|
|
67
|
-
query: {node: '?'} 返回分布
|
|
68
|
-
evidence: {node: value}
|
|
69
|
-
"""
|
|
70
|
-
query_nodes = list(query.keys())
|
|
71
|
-
hidden = [n for n in self.nodes if n not in query_nodes and n not in evidence]
|
|
72
|
-
|
|
73
|
-
def enumerate_assignments(variables, current):
|
|
74
|
-
if not variables:
|
|
75
|
-
yield current.copy()
|
|
76
|
-
return
|
|
77
|
-
var = variables[0]
|
|
78
|
-
for value in self.nodes[var]['values']:
|
|
79
|
-
current[var] = value
|
|
80
|
-
yield from enumerate_assignments(variables[1:], current)
|
|
81
|
-
del current[var]
|
|
82
|
-
|
|
83
|
-
query_values = {}
|
|
84
|
-
total = 0.0
|
|
85
|
-
|
|
86
|
-
query_node = query_nodes[0]
|
|
87
|
-
for qv in self.nodes[query_node]['values']:
|
|
88
|
-
prob_sum = 0.0
|
|
89
|
-
for assignment in enumerate_assignments(hidden, {}):
|
|
90
|
-
assignment.update(evidence)
|
|
91
|
-
assignment[query_node] = qv
|
|
92
|
-
prob_sum += self.joint_prob(assignment)
|
|
93
|
-
query_values[qv] = prob_sum
|
|
94
|
-
total += prob_sum
|
|
95
|
-
|
|
96
|
-
# 归一化
|
|
97
|
-
for k in query_values:
|
|
98
|
-
query_values[k] /= total
|
|
99
|
-
return query_values
|
package/shared/cnn/alex_net.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
# AlexNet 类定义
|
|
2
|
-
# 从文档自动提取生成
|
|
3
|
-
|
|
4
|
-
import torch
|
|
5
|
-
import torch.nn as nn
|
|
6
|
-
from PIL import Image
|
|
7
|
-
|
|
8
|
-
class AlexNet(nn.Module):
|
|
9
|
-
"""
|
|
10
|
-
AlexNet 网络结构
|
|
11
|
-
适配 Tiny ImageNet 200 类分类任务
|
|
12
|
-
|
|
13
|
-
原始 AlexNet 为 1000 类,这里修改最后一层为 200 类
|
|
14
|
-
使用 AdaptiveAvgPool2d 确保输出尺寸固定为 6x6
|
|
15
|
-
"""
|
|
16
|
-
def __init__(self, num_classes=200):
|
|
17
|
-
super(AlexNet, self).__init__()
|
|
18
|
-
|
|
19
|
-
# 特征提取层 (5 个卷积层)
|
|
20
|
-
self.features = nn.Sequential(
|
|
21
|
-
# Conv1: 11x11 卷积,步长 4,输出 96 通道
|
|
22
|
-
nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
|
|
23
|
-
nn.ReLU(inplace=True),
|
|
24
|
-
nn.MaxPool2d(kernel_size=3, stride=2),
|
|
25
|
-
|
|
26
|
-
# Conv2: 5x5 卷积,输出 256 通道
|
|
27
|
-
nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
|
|
28
|
-
nn.ReLU(inplace=True),
|
|
29
|
-
nn.MaxPool2d(kernel_size=3, stride=2),
|
|
30
|
-
|
|
31
|
-
# Conv3: 3x3 卷积,输出 384 通道
|
|
32
|
-
nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
|
|
33
|
-
nn.ReLU(inplace=True),
|
|
34
|
-
|
|
35
|
-
# Conv4: 3x3 卷积,输出 384 通道
|
|
36
|
-
nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
|
|
37
|
-
nn.ReLU(inplace=True),
|
|
38
|
-
|
|
39
|
-
# Conv5: 3x3 卷积,输出 256 通道
|
|
40
|
-
nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
|
|
41
|
-
nn.ReLU(inplace=True),
|
|
42
|
-
nn.MaxPool2d(kernel_size=3, stride=2),
|
|
43
|
-
|
|
44
|
-
# 自适应池化,确保输出固定为 6x6
|
|
45
|
-
nn.AdaptiveAvgPool2d((6, 6))
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
# 分类层 (3 个全连接层)
|
|
49
|
-
self.classifier = nn.Sequential(
|
|
50
|
-
nn.Dropout(p=0.5),
|
|
51
|
-
nn.Linear(256 * 6 * 6, 4096),
|
|
52
|
-
nn.ReLU(inplace=True),
|
|
53
|
-
|
|
54
|
-
nn.Dropout(p=0.5),
|
|
55
|
-
nn.Linear(4096, 4096),
|
|
56
|
-
nn.ReLU(inplace=True),
|
|
57
|
-
|
|
58
|
-
nn.Linear(4096, num_classes)
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
def forward(self, x):
|
|
62
|
-
x = self.features(x)
|
|
63
|
-
x = torch.flatten(x, 1)
|
|
64
|
-
x = self.classifier(x)
|
|
65
|
-
return x
|
package/shared/cnn/t_e_r_m1.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
# AlexNet 类定义
|
|
2
|
-
# 从文档自动提取生成
|
|
3
|
-
|
|
4
|
-
import torch
|
|
5
|
-
import torch.nn as nn
|
|
6
|
-
from PIL import Image
|
|
7
|
-
|
|
8
|
-
class AlexNet(nn.Module):
|
|
9
|
-
"""
|
|
10
|
-
AlexNet 网络结构
|
|
11
|
-
适配 Tiny ImageNet 200 类分类任务
|
|
12
|
-
|
|
13
|
-
原始 AlexNet 为 1000 类,这里修改最后一层为 200 类
|
|
14
|
-
使用 AdaptiveAvgPool2d 确保输出尺寸固定为 6x6
|
|
15
|
-
"""
|
|
16
|
-
def __init__(self, num_classes=200):
|
|
17
|
-
super(AlexNet, self).__init__()
|
|
18
|
-
|
|
19
|
-
# 特征提取层 (5 个卷积层)
|
|
20
|
-
self.features = nn.Sequential(
|
|
21
|
-
# Conv1: 11x11 卷积,步长 4,输出 96 通道
|
|
22
|
-
nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=2),
|
|
23
|
-
nn.ReLU(inplace=True),
|
|
24
|
-
nn.MaxPool2d(kernel_size=3, stride=2),
|
|
25
|
-
|
|
26
|
-
# Conv2: 5x5 卷积,输出 256 通道
|
|
27
|
-
nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
|
|
28
|
-
nn.ReLU(inplace=True),
|
|
29
|
-
nn.MaxPool2d(kernel_size=3, stride=2),
|
|
30
|
-
|
|
31
|
-
# Conv3: 3x3 卷积,输出 384 通道
|
|
32
|
-
nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
|
|
33
|
-
nn.ReLU(inplace=True),
|
|
34
|
-
|
|
35
|
-
# Conv4: 3x3 卷积,输出 384 通道
|
|
36
|
-
nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
|
|
37
|
-
nn.ReLU(inplace=True),
|
|
38
|
-
|
|
39
|
-
# Conv5: 3x3 卷积,输出 256 通道
|
|
40
|
-
nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
|
|
41
|
-
nn.ReLU(inplace=True),
|
|
42
|
-
nn.MaxPool2d(kernel_size=3, stride=2),
|
|
43
|
-
|
|
44
|
-
# 自适应池化,确保输出固定为 6x6
|
|
45
|
-
nn.AdaptiveAvgPool2d((6, 6))
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
# 分类层 (3 个全连接层)
|
|
49
|
-
self.classifier = nn.Sequential(
|
|
50
|
-
nn.Dropout(p=0.5),
|
|
51
|
-
nn.Linear(256 * 6 * 6, 4096),
|
|
52
|
-
nn.ReLU(inplace=True),
|
|
53
|
-
|
|
54
|
-
nn.Dropout(p=0.5),
|
|
55
|
-
nn.Linear(4096, 4096),
|
|
56
|
-
nn.ReLU(inplace=True),
|
|
57
|
-
|
|
58
|
-
nn.Linear(4096, num_classes)
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
def forward(self, x):
|
|
62
|
-
x = self.features(x)
|
|
63
|
-
x = torch.flatten(x, 1)
|
|
64
|
-
x = self.classifier(x)
|
|
65
|
-
return x
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
# TinyImageNetDataset 类定义
|
|
2
|
-
# 从文档自动提取生成
|
|
3
|
-
|
|
4
|
-
import os
|
|
5
|
-
from PIL import Image
|
|
6
|
-
from torch.utils.data import Dataset, DataLoader
|
|
7
|
-
|
|
8
|
-
class TinyImageNetDataset(Dataset):
|
|
9
|
-
"""
|
|
10
|
-
Tiny ImageNet 200 数据集加载器
|
|
11
|
-
|
|
12
|
-
训练集按类别子目录读取,验证集从标注文件解析标签。
|
|
13
|
-
支持自定义预处理变换,适配 AlexNet 训练需求。
|
|
14
|
-
"""
|
|
15
|
-
def __init__(self, root_dir, transform=None, is_train=True):
|
|
16
|
-
self.root_dir = root_dir
|
|
17
|
-
self.transform = transform
|
|
18
|
-
self.is_train = is_train
|
|
19
|
-
|
|
20
|
-
self.samples = []
|
|
21
|
-
self.classes = []
|
|
22
|
-
|
|
23
|
-
if is_train:
|
|
24
|
-
train_dir = os.path.join(root_dir, 'train')
|
|
25
|
-
self.classes = sorted(os.listdir(train_dir))
|
|
26
|
-
self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
|
|
27
|
-
|
|
28
|
-
for cls in self.classes:
|
|
29
|
-
cls_dir = os.path.join(train_dir, cls)
|
|
30
|
-
images_dir = os.path.join(cls_dir, 'images')
|
|
31
|
-
if os.path.exists(images_dir):
|
|
32
|
-
for img_name in os.listdir(images_dir):
|
|
33
|
-
if img_name.endswith('.JPEG'):
|
|
34
|
-
self.samples.append((
|
|
35
|
-
os.path.join(images_dir, img_name),
|
|
36
|
-
self.class_to_idx[cls]
|
|
37
|
-
))
|
|
38
|
-
else:
|
|
39
|
-
val_dir = os.path.join(root_dir, 'val')
|
|
40
|
-
val_images_dir = os.path.join(val_dir, 'images')
|
|
41
|
-
val_annotations = os.path.join(val_dir, 'val_annotations.txt')
|
|
42
|
-
|
|
43
|
-
if os.path.exists(val_annotations):
|
|
44
|
-
with open(val_annotations, 'r') as f:
|
|
45
|
-
for line in f:
|
|
46
|
-
parts = line.strip().split('\t')
|
|
47
|
-
if len(parts) >= 2:
|
|
48
|
-
img_name = parts[0]
|
|
49
|
-
cls = parts[1]
|
|
50
|
-
if cls not in self.classes:
|
|
51
|
-
self.classes.append(cls)
|
|
52
|
-
self.samples.append((
|
|
53
|
-
os.path.join(val_images_dir, img_name),
|
|
54
|
-
self.classes.index(cls)
|
|
55
|
-
))
|
|
56
|
-
|
|
57
|
-
def __len__(self):
|
|
58
|
-
return len(self.samples)
|
|
59
|
-
|
|
60
|
-
def __getitem__(self, idx):
|
|
61
|
-
img_path, label = self.samples[idx]
|
|
62
|
-
image = Image.open(img_path).convert('RGB')
|
|
63
|
-
|
|
64
|
-
if self.transform:
|
|
65
|
-
image = self.transform(image)
|
|
66
|
-
|
|
67
|
-
return image, label
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
# TinyImageNetDataset 类定义
|
|
2
|
-
# 从文档自动提取生成
|
|
3
|
-
|
|
4
|
-
import os
|
|
5
|
-
from PIL import Image
|
|
6
|
-
from torch.utils.data import Dataset, DataLoader
|
|
7
|
-
|
|
8
|
-
class TinyImageNetDataset(Dataset):
|
|
9
|
-
"""
|
|
10
|
-
Tiny ImageNet 200 数据集加载器
|
|
11
|
-
|
|
12
|
-
训练集按类别子目录读取,验证集从标注文件解析标签。
|
|
13
|
-
支持自定义预处理变换,适配 AlexNet 训练需求。
|
|
14
|
-
"""
|
|
15
|
-
def __init__(self, root_dir, transform=None, is_train=True):
|
|
16
|
-
self.root_dir = root_dir
|
|
17
|
-
self.transform = transform
|
|
18
|
-
self.is_train = is_train
|
|
19
|
-
|
|
20
|
-
self.samples = []
|
|
21
|
-
self.classes = []
|
|
22
|
-
|
|
23
|
-
if is_train:
|
|
24
|
-
train_dir = os.path.join(root_dir, 'train')
|
|
25
|
-
self.classes = sorted(os.listdir(train_dir))
|
|
26
|
-
self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
|
|
27
|
-
|
|
28
|
-
for cls in self.classes:
|
|
29
|
-
cls_dir = os.path.join(train_dir, cls)
|
|
30
|
-
images_dir = os.path.join(cls_dir, 'images')
|
|
31
|
-
if os.path.exists(images_dir):
|
|
32
|
-
for img_name in os.listdir(images_dir):
|
|
33
|
-
if img_name.endswith('.JPEG'):
|
|
34
|
-
self.samples.append((
|
|
35
|
-
os.path.join(images_dir, img_name),
|
|
36
|
-
self.class_to_idx[cls]
|
|
37
|
-
))
|
|
38
|
-
else:
|
|
39
|
-
val_dir = os.path.join(root_dir, 'val')
|
|
40
|
-
val_images_dir = os.path.join(val_dir, 'images')
|
|
41
|
-
val_annotations = os.path.join(val_dir, 'val_annotations.txt')
|
|
42
|
-
|
|
43
|
-
if os.path.exists(val_annotations):
|
|
44
|
-
with open(val_annotations, 'r') as f:
|
|
45
|
-
for line in f:
|
|
46
|
-
parts = line.strip().split('\t')
|
|
47
|
-
if len(parts) >= 2:
|
|
48
|
-
img_name = parts[0]
|
|
49
|
-
cls = parts[1]
|
|
50
|
-
if cls not in self.classes:
|
|
51
|
-
self.classes.append(cls)
|
|
52
|
-
self.samples.append((
|
|
53
|
-
os.path.join(val_images_dir, img_name),
|
|
54
|
-
self.classes.index(cls)
|
|
55
|
-
))
|
|
56
|
-
|
|
57
|
-
def __len__(self):
|
|
58
|
-
return len(self.samples)
|
|
59
|
-
|
|
60
|
-
def __getitem__(self, idx):
|
|
61
|
-
img_path, label = self.samples[idx]
|
|
62
|
-
image = Image.open(img_path).convert('RGB')
|
|
63
|
-
|
|
64
|
-
if self.transform:
|
|
65
|
-
image = self.transform(image)
|
|
66
|
-
|
|
67
|
-
return image, label
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
# TinyImageNetDataset 类定义
|
|
2
|
-
# 从文档自动提取生成
|
|
3
|
-
|
|
4
|
-
import os
|
|
5
|
-
from PIL import Image
|
|
6
|
-
from torch.utils.data import Dataset, DataLoader
|
|
7
|
-
|
|
8
|
-
class TinyImageNetDataset(Dataset):
|
|
9
|
-
"""
|
|
10
|
-
Tiny ImageNet 200 数据集加载器
|
|
11
|
-
|
|
12
|
-
训练集按类别子目录读取,验证集从标注文件解析标签。
|
|
13
|
-
支持自定义预处理变换,适配 AlexNet 训练需求。
|
|
14
|
-
"""
|
|
15
|
-
def __init__(self, root_dir, transform=None, is_train=True):
|
|
16
|
-
self.root_dir = root_dir
|
|
17
|
-
self.transform = transform
|
|
18
|
-
self.is_train = is_train
|
|
19
|
-
|
|
20
|
-
self.samples = []
|
|
21
|
-
self.classes = []
|
|
22
|
-
|
|
23
|
-
if is_train:
|
|
24
|
-
train_dir = os.path.join(root_dir, 'train')
|
|
25
|
-
self.classes = sorted(os.listdir(train_dir))
|
|
26
|
-
self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}
|
|
27
|
-
|
|
28
|
-
for cls in self.classes:
|
|
29
|
-
cls_dir = os.path.join(train_dir, cls)
|
|
30
|
-
images_dir = os.path.join(cls_dir, 'images')
|
|
31
|
-
if os.path.exists(images_dir):
|
|
32
|
-
for img_name in os.listdir(images_dir):
|
|
33
|
-
if img_name.endswith('.JPEG'):
|
|
34
|
-
self.samples.append((
|
|
35
|
-
os.path.join(images_dir, img_name),
|
|
36
|
-
self.class_to_idx[cls]
|
|
37
|
-
))
|
|
38
|
-
else:
|
|
39
|
-
val_dir = os.path.join(root_dir, 'val')
|
|
40
|
-
val_images_dir = os.path.join(val_dir, 'images')
|
|
41
|
-
val_annotations = os.path.join(val_dir, 'val_annotations.txt')
|
|
42
|
-
|
|
43
|
-
if os.path.exists(val_annotations):
|
|
44
|
-
with open(val_annotations, 'r') as f:
|
|
45
|
-
for line in f:
|
|
46
|
-
parts = line.strip().split('\t')
|
|
47
|
-
if len(parts) >= 2:
|
|
48
|
-
img_name = parts[0]
|
|
49
|
-
cls = parts[1]
|
|
50
|
-
if cls not in self.classes:
|
|
51
|
-
self.classes.append(cls)
|
|
52
|
-
self.samples.append((
|
|
53
|
-
os.path.join(val_images_dir, img_name),
|
|
54
|
-
self.classes.index(cls)
|
|
55
|
-
))
|
|
56
|
-
|
|
57
|
-
def __len__(self):
|
|
58
|
-
return len(self.samples)
|
|
59
|
-
|
|
60
|
-
def __getitem__(self, idx):
|
|
61
|
-
img_path, label = self.samples[idx]
|
|
62
|
-
image = Image.open(img_path).convert('RGB')
|
|
63
|
-
|
|
64
|
-
if self.transform:
|
|
65
|
-
image = self.transform(image)
|
|
66
|
-
|
|
67
|
-
return image, label
|