@icyfenix-dmla/cli 2026.5.10-1505 → 2026.5.13-1007
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/shared/__init__.py +1 -0
- package/shared/gan/__init__.py +3 -2
- package/shared/gan/dcgan_generator.py +12 -10
- package/shared/gan/image_vae.py +67 -0
- package/shared/sequence_models/__init__.py +4 -0
- package/shared/sequence_models/poetry_lstm.py +63 -0
- package/src/commands/data.js +9 -0
- package/version.json +2 -2
package/package.json
CHANGED
package/shared/__init__.py
CHANGED
package/shared/gan/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# GAN 模块
|
|
2
|
-
from .dcgan_generator import DCGANGenerator
|
|
3
2
|
from .dcgan_discriminator import DCGANDiscriminator
|
|
3
|
+
from .dcgan_generator import DCGANGenerator
|
|
4
|
+
from .image_vae import ImageVAE
|
|
4
5
|
|
|
5
|
-
__all__ = ['DCGANGenerator', '
|
|
6
|
+
__all__ = ['DCGANDiscriminator', 'DCGANGenerator', 'ImageVAE']
|
|
@@ -1,47 +1,49 @@
|
|
|
1
|
+
# DCGANGenerator 类定义
|
|
2
|
+
# 从文档自动提取生成
|
|
3
|
+
|
|
1
4
|
import torch
|
|
2
5
|
import torch.nn as nn
|
|
3
6
|
|
|
4
|
-
|
|
5
7
|
class DCGANGenerator(nn.Module):
|
|
6
8
|
"""
|
|
7
9
|
DCGAN 生成器
|
|
8
|
-
|
|
10
|
+
|
|
9
11
|
输入: 噪声向量 z (latent_dim 维)
|
|
10
12
|
输出: 64×64×3 RGB 图像 (值域 [-1, 1])
|
|
11
|
-
|
|
13
|
+
|
|
12
14
|
架构: 转置卷积逐步上采样
|
|
13
15
|
1×1 → 4×4 → 8×8 → 16×16 → 32×32 → 64×64
|
|
14
16
|
"""
|
|
15
17
|
def __init__(self, latent_dim=100, img_channels=3):
|
|
16
18
|
super(DCGANGenerator, self).__init__()
|
|
17
19
|
self.latent_dim = latent_dim
|
|
18
|
-
|
|
20
|
+
|
|
19
21
|
self.main = nn.Sequential(
|
|
20
22
|
# 输入: latent_dim × 1 × 1 → 512 × 4 × 4
|
|
21
23
|
nn.ConvTranspose2d(latent_dim, 512, kernel_size=4, stride=1, padding=0, bias=False),
|
|
22
24
|
nn.BatchNorm2d(512),
|
|
23
25
|
nn.ReLU(True),
|
|
24
|
-
|
|
26
|
+
|
|
25
27
|
# 512 × 4 × 4 → 256 × 8 × 8
|
|
26
28
|
nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=False),
|
|
27
29
|
nn.BatchNorm2d(256),
|
|
28
30
|
nn.ReLU(True),
|
|
29
|
-
|
|
31
|
+
|
|
30
32
|
# 256 × 8 × 8 → 128 × 16 × 16
|
|
31
33
|
nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=False),
|
|
32
34
|
nn.BatchNorm2d(128),
|
|
33
35
|
nn.ReLU(True),
|
|
34
|
-
|
|
36
|
+
|
|
35
37
|
# 128 × 16 × 16 → 64 × 32 × 32
|
|
36
38
|
nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1, bias=False),
|
|
37
39
|
nn.BatchNorm2d(64),
|
|
38
40
|
nn.ReLU(True),
|
|
39
|
-
|
|
41
|
+
|
|
40
42
|
# 64 × 32 × 32 → 3 × 64 × 64
|
|
41
43
|
nn.ConvTranspose2d(64, img_channels, kernel_size=4, stride=2, padding=1, bias=False),
|
|
42
44
|
nn.Tanh()
|
|
43
45
|
)
|
|
44
|
-
|
|
46
|
+
|
|
45
47
|
def forward(self, z):
|
|
46
48
|
# 将噪声向量 reshape 为 4D 张量: (batch, latent_dim, 1, 1)
|
|
47
|
-
return self.main(z.view(z.size(0), z.size(1), 1, 1))
|
|
49
|
+
return self.main(z.view(z.size(0), z.size(1), 1, 1))
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# ImageVAE 类定义
|
|
2
|
+
# 从文档自动提取生成
|
|
3
|
+
|
|
4
|
+
import torch
|
|
5
|
+
import torch.nn as nn
|
|
6
|
+
from PIL import Image
|
|
7
|
+
|
|
8
|
+
class ImageVAE(nn.Module):
|
|
9
|
+
"""
|
|
10
|
+
用于 MNIST 图像生成的 VAE
|
|
11
|
+
|
|
12
|
+
网络结构:
|
|
13
|
+
- 编码器: 784 → 512 → 256 → (μ, σ)
|
|
14
|
+
- 解码器: z → 256 → 512 → 784
|
|
15
|
+
|
|
16
|
+
潜在空间维度: 20
|
|
17
|
+
"""
|
|
18
|
+
def __init__(self, latent_dim=20):
|
|
19
|
+
super().__init__()
|
|
20
|
+
|
|
21
|
+
# 编码器(更深的网络,提取更丰富的特征)
|
|
22
|
+
self.encoder = nn.Sequential(
|
|
23
|
+
nn.Linear(784, 512),
|
|
24
|
+
nn.ReLU(),
|
|
25
|
+
nn.Linear(512, 256),
|
|
26
|
+
nn.ReLU()
|
|
27
|
+
)
|
|
28
|
+
self.fc_mu = nn.Linear(256, latent_dim)
|
|
29
|
+
self.fc_logvar = nn.Linear(256, latent_dim)
|
|
30
|
+
|
|
31
|
+
# 解码器(对称结构)
|
|
32
|
+
self.decoder = nn.Sequential(
|
|
33
|
+
nn.Linear(latent_dim, 256),
|
|
34
|
+
nn.ReLU(),
|
|
35
|
+
nn.Linear(256, 512),
|
|
36
|
+
nn.ReLU(),
|
|
37
|
+
nn.Linear(512, 784),
|
|
38
|
+
nn.Sigmoid() # 输出像素概率
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
self.latent_dim = latent_dim
|
|
42
|
+
|
|
43
|
+
def encode(self, x):
|
|
44
|
+
"""编码过程"""
|
|
45
|
+
h = self.encoder(x)
|
|
46
|
+
return self.fc_mu(h), self.fc_logvar(h)
|
|
47
|
+
|
|
48
|
+
def reparameterize(self, mu, logvar):
|
|
49
|
+
"""重参数化"""
|
|
50
|
+
std = torch.exp(logvar / 2)
|
|
51
|
+
eps = torch.randn_like(std)
|
|
52
|
+
return mu + std * eps
|
|
53
|
+
|
|
54
|
+
def decode(self, z):
|
|
55
|
+
"""解码过程"""
|
|
56
|
+
return self.decoder(z)
|
|
57
|
+
|
|
58
|
+
def forward(self, x):
|
|
59
|
+
"""完整流程"""
|
|
60
|
+
mu, logvar = self.encode(x)
|
|
61
|
+
z = self.reparameterize(mu, logvar)
|
|
62
|
+
return self.decode(z), mu, logvar
|
|
63
|
+
|
|
64
|
+
def generate(self, num_samples):
|
|
65
|
+
"""生成新样本"""
|
|
66
|
+
z = torch.randn(num_samples, self.latent_dim)
|
|
67
|
+
return self.decode(z)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# PoetryLSTM 类定义
|
|
2
|
+
# 从文档自动提取生成
|
|
3
|
+
|
|
4
|
+
import torch
|
|
5
|
+
import torch.nn as nn
|
|
6
|
+
|
|
7
|
+
class PoetryLSTM(nn.Module):
|
|
8
|
+
"""LSTM 语言模型(用于古诗词生成)
|
|
9
|
+
|
|
10
|
+
架构: Embedding -> LSTM -> Linear -> Softmax
|
|
11
|
+
"""
|
|
12
|
+
def __init__(self, vocab_size, embedding_dim=256, hidden_dim=256, num_layers=2, dropout=0.3):
|
|
13
|
+
super(PoetryLSTM, self).__init__()
|
|
14
|
+
|
|
15
|
+
self.vocab_size = vocab_size
|
|
16
|
+
self.hidden_dim = hidden_dim
|
|
17
|
+
self.num_layers = num_layers
|
|
18
|
+
|
|
19
|
+
# 嵌入层:字符索引 -> 稠密向量
|
|
20
|
+
self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
|
|
21
|
+
|
|
22
|
+
# LSTM 层
|
|
23
|
+
self.lstm = nn.LSTM(
|
|
24
|
+
input_size=embedding_dim,
|
|
25
|
+
hidden_size=hidden_dim,
|
|
26
|
+
num_layers=num_layers,
|
|
27
|
+
batch_first=True,
|
|
28
|
+
dropout=dropout if num_layers > 1 else 0
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# 输出层:隐藏状态 -> 词汇表概率分布
|
|
32
|
+
self.fc = nn.Linear(hidden_dim, vocab_size)
|
|
33
|
+
|
|
34
|
+
# Dropout 层
|
|
35
|
+
self.dropout = nn.Dropout(dropout)
|
|
36
|
+
|
|
37
|
+
def forward(self, x, hidden=None):
|
|
38
|
+
"""
|
|
39
|
+
参数:
|
|
40
|
+
x: 输入序列 (batch_size, seq_len)
|
|
41
|
+
hidden: 初始隐藏状态 (可选)
|
|
42
|
+
|
|
43
|
+
返回:
|
|
44
|
+
output: 输出 logits (batch_size, seq_len, vocab_size)
|
|
45
|
+
hidden: 最终隐藏状态
|
|
46
|
+
"""
|
|
47
|
+
# 嵌入: (batch_size, seq_len) -> (batch_size, seq_len, embedding_dim)
|
|
48
|
+
embedded = self.embedding(x)
|
|
49
|
+
embedded = self.dropout(embedded)
|
|
50
|
+
|
|
51
|
+
# LSTM: (batch_size, seq_len, embedding_dim) -> (batch_size, seq_len, hidden_dim)
|
|
52
|
+
lstm_out, hidden = self.lstm(embedded, hidden)
|
|
53
|
+
|
|
54
|
+
# 输出: (batch_size, seq_len, hidden_dim) -> (batch_size, seq_len, vocab_size)
|
|
55
|
+
output = self.fc(lstm_out)
|
|
56
|
+
|
|
57
|
+
return output, hidden
|
|
58
|
+
|
|
59
|
+
def init_hidden(self, batch_size, device):
|
|
60
|
+
"""初始化隐藏状态"""
|
|
61
|
+
h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim, device=device)
|
|
62
|
+
c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim, device=device)
|
|
63
|
+
return (h0, c0)
|
package/src/commands/data.js
CHANGED
|
@@ -61,6 +61,15 @@ const DATASETS = [
|
|
|
61
61
|
targetDir: 'datasets/cartoon-face',
|
|
62
62
|
source: 'ModelScope (icyfenix)',
|
|
63
63
|
zipFile: 'faces.zip'
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
id: 'chinese-poetry',
|
|
67
|
+
name: 'Chinese Poetry (古诗词)',
|
|
68
|
+
url: 'https://www.modelscope.cn/datasets/icyfenix/Chinese-Poetry.git',
|
|
69
|
+
size: '~50MB',
|
|
70
|
+
format: 'git',
|
|
71
|
+
targetDir: 'datasets/chinese-poetry',
|
|
72
|
+
source: 'ModelScope (icyfenix)'
|
|
64
73
|
}
|
|
65
74
|
]
|
|
66
75
|
|
package/version.json
CHANGED