nodev 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- __init__.py +0 -0
- nodev/__init__.py +18 -0
- nodev/core.py +297 -0
- nodev/cuda.py +27 -0
- nodev/dataloaders.py +65 -0
- nodev/dataset.py +186 -0
- nodev/layers.py +179 -0
- nodev/model.py +119 -0
- nodev/operations.py +389 -0
- nodev/operations_conv.py +474 -0
- nodev/optimizer.py +72 -0
- nodev/transforms.py +65 -0
- nodev/utils.py +172 -0
- nodev-1.0.0.dist-info/METADATA +14 -0
- nodev-1.0.0.dist-info/RECORD +17 -0
- nodev-1.0.0.dist-info/WHEEL +5 -0
- nodev-1.0.0.dist-info/top_level.txt +2 -0
nodev/layers.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import os.path
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from nodev.utils import pair
|
|
5
|
+
from nodev import cuda
|
|
6
|
+
from nodev import operations as F
|
|
7
|
+
|
|
8
|
+
from nodev.core import Formpara
|
|
9
|
+
import weakref
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Layer:
|
|
13
|
+
def __init__(self):
|
|
14
|
+
self._params=set()
|
|
15
|
+
def __call__(self, *inputs):
|
|
16
|
+
|
|
17
|
+
outputs = self.forward(*inputs)
|
|
18
|
+
if not isinstance(outputs,tuple):
|
|
19
|
+
outputs=(outputs,)
|
|
20
|
+
self.inputs=[weakref.ref(x) for x in inputs]
|
|
21
|
+
self.output=[weakref.ref(y) for y in outputs]
|
|
22
|
+
return outputs if len(outputs)>1 else outputs[0]
|
|
23
|
+
|
|
24
|
+
def _flatten_params(self,param_dict,parent_key=''):
|
|
25
|
+
for name in self._params:
|
|
26
|
+
obj=self.__dict__[name]
|
|
27
|
+
key=parent_key+'/'+name if parent_key else name
|
|
28
|
+
if isinstance(obj,Layer):
|
|
29
|
+
obj._flatten_params(param_dict,key)
|
|
30
|
+
else:
|
|
31
|
+
param_dict[key]=obj
|
|
32
|
+
def save_weights(self,path):
|
|
33
|
+
self.to_cpu()
|
|
34
|
+
params_dict={}
|
|
35
|
+
self._flatten_params(params_dict)
|
|
36
|
+
array_dict={key:param.data for key,param in params_dict.items() if param is not None}
|
|
37
|
+
try:
|
|
38
|
+
np.savez_compressed(path,**array_dict)
|
|
39
|
+
except(Exception,KeyboardInterrupt) as e:
|
|
40
|
+
if os.path.exists(path):
|
|
41
|
+
raise
|
|
42
|
+
def load_weights(self,path):
|
|
43
|
+
npz=np.load(path,allow_pickle=True)
|
|
44
|
+
params_dict={}
|
|
45
|
+
self._flatten_params(params_dict)
|
|
46
|
+
for key,param in params_dict.items():
|
|
47
|
+
param.data=npz[key]
|
|
48
|
+
def __setattr__(self, name, value): #_params 保存示例的参数
|
|
49
|
+
if isinstance(value, (Formpara, Layer)):
|
|
50
|
+
self._params.add(name)
|
|
51
|
+
super().__setattr__(name,value)
|
|
52
|
+
def forward(self,inputs):
|
|
53
|
+
raise NotImplementedError()
|
|
54
|
+
def params(self):
|
|
55
|
+
for name in self._params:
|
|
56
|
+
obj=self.__dict__[name]
|
|
57
|
+
if isinstance(obj,Layer):
|
|
58
|
+
yield from obj.params()
|
|
59
|
+
else:
|
|
60
|
+
yield obj
|
|
61
|
+
def cleargrads(self):
|
|
62
|
+
for param in self.params():
|
|
63
|
+
param.clear_grad()
|
|
64
|
+
def to_cpu(self):
|
|
65
|
+
for param in self.params():
|
|
66
|
+
param.to_cpu()
|
|
67
|
+
def to_gpu(self):
|
|
68
|
+
for param in self.params():
|
|
69
|
+
param.to_gpu()
|
|
70
|
+
class Linear(Layer):
|
|
71
|
+
def __init__(self,out_size,nobias=False,dtype=np.float32,in_size=None):
|
|
72
|
+
super().__init__()
|
|
73
|
+
self.in_size=in_size
|
|
74
|
+
self.out_size=out_size
|
|
75
|
+
self.dtype=dtype
|
|
76
|
+
|
|
77
|
+
self.W=Formpara(None, name='W')
|
|
78
|
+
if self.in_size is not None: #延后处理
|
|
79
|
+
self._init_W()
|
|
80
|
+
if nobias:
|
|
81
|
+
self.b=None
|
|
82
|
+
else:
|
|
83
|
+
self.b=Formpara(np.zeros(out_size, dtype=dtype), name='b')
|
|
84
|
+
|
|
85
|
+
def _init_W(self): #生成一个张量,长宽和输入输出相同
|
|
86
|
+
|
|
87
|
+
I,O=self.in_size,self.out_size
|
|
88
|
+
|
|
89
|
+
W_data=np.random.randn(I,O).astype(self.dtype)*np.sqrt(1/I)
|
|
90
|
+
|
|
91
|
+
self.W.data=W_data
|
|
92
|
+
|
|
93
|
+
def forward(self,x):
|
|
94
|
+
#在传播时初始化权重
|
|
95
|
+
#print("W.data:", self.W.data)
|
|
96
|
+
#print("type:", type(self.W.data))
|
|
97
|
+
|
|
98
|
+
#print(f"repr(self.W.data): {repr(self.W.data)}")
|
|
99
|
+
#print(f"W.data is None: {self.W.data is None}")
|
|
100
|
+
#print(f"W.data == None: {self.W.data == None}")
|
|
101
|
+
if self.W.data is None:
|
|
102
|
+
#print("x type:", type(x))
|
|
103
|
+
#print("W.data before:", self.W.data)
|
|
104
|
+
self.in_size=x.shape[1]
|
|
105
|
+
|
|
106
|
+
self._init_W()
|
|
107
|
+
|
|
108
|
+
y=F.linear(x, self.W, self.b)
|
|
109
|
+
return y
|
|
110
|
+
|
|
111
|
+
"""class Linear(Layer):
|
|
112
|
+
def __init__(self, out_size, nobias=False, dtype=np.float32,in_size=None):
|
|
113
|
+
super().__init__()
|
|
114
|
+
self.in_size = in_size
|
|
115
|
+
self.out_size = out_size
|
|
116
|
+
self.dtype = dtype
|
|
117
|
+
|
|
118
|
+
I, O = self.in_size, self.out_size
|
|
119
|
+
W_data = np.random.randn(I, O).astype(self.dtype) * np.sqrt(1 / I)
|
|
120
|
+
self.W = Formpara(W_data, name="W")
|
|
121
|
+
|
|
122
|
+
if nobias:
|
|
123
|
+
self.b = None
|
|
124
|
+
else:
|
|
125
|
+
self.b = Formpara(np.zeros(out_size, dtype=dtype), name="b")
|
|
126
|
+
|
|
127
|
+
def forward(self, x: Datafield) -> Datafield:
|
|
128
|
+
y = F.linear(x, self.W, self.b)
|
|
129
|
+
return y
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
class Conv2d(Layer):
|
|
133
|
+
def __init__(self, out_channels, kernel_size, stride=1, pad=0, nobias=False, dtype=np.float32, in_channels=None):
|
|
134
|
+
super().__init__()
|
|
135
|
+
self.in_channels = in_channels
|
|
136
|
+
self.out_channels = out_channels
|
|
137
|
+
self.kernel_size = kernel_size
|
|
138
|
+
self.stride = stride
|
|
139
|
+
self.pad = pad
|
|
140
|
+
self.dtype = dtype
|
|
141
|
+
self.W = Formpara(None, name='W')
|
|
142
|
+
if in_channels is not None:
|
|
143
|
+
self._init_W()
|
|
144
|
+
if nobias:
|
|
145
|
+
self.b = None
|
|
146
|
+
else:
|
|
147
|
+
self.b = Formpara(np.zeros(out_channels, dtype=dtype), name='b')
|
|
148
|
+
|
|
149
|
+
def _init_W(self, xp=np):
|
|
150
|
+
C, OC = self.in_channels, self.out_channels
|
|
151
|
+
KH, KW = pair(self.kernel_size)
|
|
152
|
+
scale = np.sqrt(1 / (C * KH * KW))
|
|
153
|
+
W_data = xp.random.randn(OC, C, KH, KW).astype(self.dtype) * scale
|
|
154
|
+
self.W.data = W_data
|
|
155
|
+
|
|
156
|
+
def forward(self, x):
|
|
157
|
+
if self.W.data is None:
|
|
158
|
+
self.in_channels = x.shape[1]
|
|
159
|
+
xp = cuda.get_array_module(x)
|
|
160
|
+
self._init_W(xp)
|
|
161
|
+
y = F.conv2d(x, self.W, self.b, self.stride, self.pad)
|
|
162
|
+
return y
|
|
163
|
+
|
|
164
|
+
class RNN(Layer):
|
|
165
|
+
def __init__(self,hidden_size,in_size=None):
|
|
166
|
+
super().__init__()
|
|
167
|
+
self.x2h=Linear(hidden_size,in_size=in_size)
|
|
168
|
+
self.h2h=Linear(hidden_size,in_size=in_size,nobias=True)
|
|
169
|
+
self.h=None
|
|
170
|
+
def reset_state(self):
|
|
171
|
+
self.h=None
|
|
172
|
+
def forward(self,x):
|
|
173
|
+
if self.h is None:
|
|
174
|
+
h_new=F.tanh(self.x2h(x))
|
|
175
|
+
else:
|
|
176
|
+
h_new = F.tanh(self.x2h(x)+self.h2h(self.h))
|
|
177
|
+
self.h=h_new
|
|
178
|
+
return h_new
|
|
179
|
+
|
nodev/model.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
|
|
2
|
+
import numpy as np
|
|
3
|
+
from nodev import Layer
|
|
4
|
+
from nodev import utils
|
|
5
|
+
import nodev.operations as F
|
|
6
|
+
import nodev.layers as L
|
|
7
|
+
from nodev.operations import dropout
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Model(Layer):
|
|
11
|
+
def plot(self,*inputs,to_file='model.png'):
|
|
12
|
+
y=self.forward(*inputs)
|
|
13
|
+
return utils.plot_dot_graph(y,verbose=True,to_file=to_file)
|
|
14
|
+
|
|
15
|
+
class MLP(Model):
|
|
16
|
+
def __init__(self,fc_output_sizes,activation=F.sigmoid):
|
|
17
|
+
super().__init__() #调用父类初始构造函数
|
|
18
|
+
self.activation=activation
|
|
19
|
+
self.layers=[]
|
|
20
|
+
for i,out_size in enumerate(fc_output_sizes):
|
|
21
|
+
layer=L.Linear(out_size)
|
|
22
|
+
setattr(self,'l'+str(i),layer)
|
|
23
|
+
self.layers.append(layer)
|
|
24
|
+
|
|
25
|
+
def forward(self,x):
|
|
26
|
+
for l in self.layers[:-1]:
|
|
27
|
+
x=self.activation(l(x))
|
|
28
|
+
return self.layers[-1](x)
|
|
29
|
+
|
|
30
|
+
class VGG16(Model):
|
|
31
|
+
WEIGHTS_PATH = 'models/vgg16.npz'
|
|
32
|
+
def __init__(self,pretrained=False):
|
|
33
|
+
super().__init__()
|
|
34
|
+
self.conv1_1=L.Conv2d(64,kernel_size=3,stride=1,pad=1)
|
|
35
|
+
self.conv1_2 = L.Conv2d(64, kernel_size=3, stride=1, pad=1)
|
|
36
|
+
self.conv2_1 = L.Conv2d(128, kernel_size=3, stride=1, pad=1)
|
|
37
|
+
self.conv2_2 = L.Conv2d(128, kernel_size=3, stride=1, pad=1)
|
|
38
|
+
self.conv3_1 = L.Conv2d(256, kernel_size=3, stride=1, pad=1)
|
|
39
|
+
self.conv3_2 = L.Conv2d(256, kernel_size=3, stride=1, pad=1)
|
|
40
|
+
self.conv3_3 = L.Conv2d(256, kernel_size=3, stride=1, pad=1)
|
|
41
|
+
self.conv4_1 = L.Conv2d(512, kernel_size=3, stride=1, pad=1)
|
|
42
|
+
self.conv4_2 = L.Conv2d(512, kernel_size=3, stride=1, pad=1)
|
|
43
|
+
self.conv4_3 = L.Conv2d(512, kernel_size=3, stride=1, pad=1)
|
|
44
|
+
self.conv5_1 = L.Conv2d(512, kernel_size=3, stride=1, pad=1)
|
|
45
|
+
self.conv5_2 = L.Conv2d(512, kernel_size=3, stride=1, pad=1)
|
|
46
|
+
self.conv5_3 = L.Conv2d(512, kernel_size=3, stride=1, pad=1)
|
|
47
|
+
self.fc6=L.Linear(4096)
|
|
48
|
+
self.fc7 = L.Linear(4096)
|
|
49
|
+
self.fc8 = L.Linear(4096)
|
|
50
|
+
if pretrained:
|
|
51
|
+
self.load_weights(VGG16.WEIGHTS_PATH)
|
|
52
|
+
def forward(self,x):
|
|
53
|
+
x=F.relu(self.conv1_1(x))
|
|
54
|
+
x=F.relu(self.conv1_2(x))
|
|
55
|
+
x=F.pooling(x,2,2)
|
|
56
|
+
x=F.relu(self.conv2_1(x))
|
|
57
|
+
x = F.relu(self.conv2_2(x))
|
|
58
|
+
x = F.pooling(x, 2, 2)
|
|
59
|
+
x = F.relu(self.conv3_1(x))
|
|
60
|
+
x = F.relu(self.conv3_2(x))
|
|
61
|
+
x = F.relu(self.conv3_3(x))
|
|
62
|
+
x = F.pooling(x, 2, 2)
|
|
63
|
+
x = F.relu(self.conv4_1(x))
|
|
64
|
+
x = F.relu(self.conv4_2(x))
|
|
65
|
+
x = F.relu(self.conv4_3(x))
|
|
66
|
+
x = F.pooling(x, 2, 2)
|
|
67
|
+
x = F.relu(self.conv5_1(x))
|
|
68
|
+
x = F.relu(self.conv5_2(x))
|
|
69
|
+
x = F.relu(self.conv5_3(x))
|
|
70
|
+
x = F.pooling(x, 2, 2)
|
|
71
|
+
x=F.reshape(x,(x.shape[0],-1))
|
|
72
|
+
x=dropout(F.relu(self.fc6(x)))
|
|
73
|
+
x = dropout(F.relu(self.fc7(x)))
|
|
74
|
+
x = self.fc8(x)
|
|
75
|
+
return x
|
|
76
|
+
|
|
77
|
+
@staticmethod
|
|
78
|
+
def preprocess(image, size=(224, 224), dtype=np.float32):
|
|
79
|
+
image = image.convert('RGB')
|
|
80
|
+
if size:
|
|
81
|
+
image = image.resize(size)
|
|
82
|
+
image = np.asarray(image, dtype=dtype)
|
|
83
|
+
image = image[:, :, ::-1]
|
|
84
|
+
image -= np.array([103.939, 116.779, 123.68], dtype=dtype)
|
|
85
|
+
image = image.transpose((2, 0, 1))
|
|
86
|
+
return image
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class ImprovedLeNet5(Model):
|
|
90
|
+
def __init__(self):
|
|
91
|
+
super().__init__()
|
|
92
|
+
# 针对MNIST优化
|
|
93
|
+
self.conv1 = L.Conv2d(32, 5, stride=1, pad=2) # 28->28
|
|
94
|
+
self.conv2 = L.Conv2d(64, 5, stride=1, pad=2) # 14->14
|
|
95
|
+
self.fc1 = L.Linear(64 * 8 * 8, 512)
|
|
96
|
+
self.fc2 = L.Linear(512, 10)
|
|
97
|
+
self.dropout = 0.5
|
|
98
|
+
|
|
99
|
+
def forward(self, x):
|
|
100
|
+
x = F.relu(self.conv1(x))
|
|
101
|
+
x = F.pooling(x, 2, 2) # 28->14
|
|
102
|
+
x = F.relu(self.conv2(x))
|
|
103
|
+
x = F.pooling(x, 2, 2) # 14->7
|
|
104
|
+
x = x.reshape(x.shape[0], -1)
|
|
105
|
+
x = F.relu(self.fc1(x))
|
|
106
|
+
x = F.dropout(x, self.dropout)
|
|
107
|
+
x = self.fc2(x)
|
|
108
|
+
return x
|
|
109
|
+
class SimpleRNN(Model):
|
|
110
|
+
def __init__(self,hidden_size,out_size):
|
|
111
|
+
super().__init__()
|
|
112
|
+
self.rnn=L.RNN(hidden_size)
|
|
113
|
+
self.fc=L.Linear(out_size)
|
|
114
|
+
def reset_state(self):
|
|
115
|
+
self.rnn.reset_state()
|
|
116
|
+
def forward(self,x):
|
|
117
|
+
h=self.rnn(x)
|
|
118
|
+
y=self.fc(h)
|
|
119
|
+
return y
|
nodev/operations.py
ADDED
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from typing import Tuple
|
|
3
|
+
from nodev import Datafield
|
|
4
|
+
import nodev
|
|
5
|
+
from nodev import as_array
|
|
6
|
+
from nodev import cuda
|
|
7
|
+
from nodev.core import Operation
|
|
8
|
+
from nodev import utils
|
|
9
|
+
from nodev import as_datafield
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Reshape(Operation):#矩阵变形
|
|
13
|
+
def __init__(self,shape):
|
|
14
|
+
self.shape=shape
|
|
15
|
+
def forward(self,x):
|
|
16
|
+
self.x_shape=x.shape
|
|
17
|
+
y=x.reshape(self.shape)
|
|
18
|
+
return y
|
|
19
|
+
def backward(self,gy):
|
|
20
|
+
return reshape(gy,self.x_shape)
|
|
21
|
+
class Sin(Operation):
|
|
22
|
+
def forward(self,x):
|
|
23
|
+
xp=cuda.get_array_module(x)
|
|
24
|
+
y=xp.sin(x)
|
|
25
|
+
return y
|
|
26
|
+
def backward(self,gy):
|
|
27
|
+
x,=self.inputs
|
|
28
|
+
gx=gy*cos(x)
|
|
29
|
+
return gx
|
|
30
|
+
class Cos(Operation):
|
|
31
|
+
def forward(self,x):
|
|
32
|
+
xp = cuda.get_array_module(x)
|
|
33
|
+
y=xp.cos(x)
|
|
34
|
+
return y
|
|
35
|
+
def backward(self,gy):
|
|
36
|
+
x,=self.inputs
|
|
37
|
+
gx=-gy*sin(x)
|
|
38
|
+
return gx
|
|
39
|
+
class Tanh(Operation):
|
|
40
|
+
def forward(self,x):
|
|
41
|
+
xp = cuda.get_array_module(x)
|
|
42
|
+
y=xp.tanh(x)
|
|
43
|
+
return y
|
|
44
|
+
def backward(self,gy):
|
|
45
|
+
y=self.outputs[0]()
|
|
46
|
+
gx=gy*(1-y**2)
|
|
47
|
+
return gx
|
|
48
|
+
class Transpose(Operation):
|
|
49
|
+
def forward(self,x):
|
|
50
|
+
xp = cuda.get_array_module(x)
|
|
51
|
+
y=xp.transpose(x)
|
|
52
|
+
return y
|
|
53
|
+
def backward(self,gy):
|
|
54
|
+
gx=transpose(gy)
|
|
55
|
+
return gx
|
|
56
|
+
class Sum(Operation):
|
|
57
|
+
def __init__(self,axis,keepdims): #axis轴 keepdims=true 保留轴的数量
|
|
58
|
+
self.axis=axis
|
|
59
|
+
self.keepdims=keepdims
|
|
60
|
+
def forward(self,x):
|
|
61
|
+
self.x_shape=x.shape
|
|
62
|
+
y=x.sum(axis=self.axis,keepdims=self.keepdims)
|
|
63
|
+
return y
|
|
64
|
+
def backward(self,gy):
|
|
65
|
+
gy=utils.reshape_sum_backward(gy,self.x_shape,self.axis,self.keepdims)
|
|
66
|
+
gx=broadcast_to(gy,self.x_shape)
|
|
67
|
+
return gx
|
|
68
|
+
class BroadcastTo(Operation):
|
|
69
|
+
def __init__(self,shape):
|
|
70
|
+
self.shape=shape
|
|
71
|
+
|
|
72
|
+
def forward(self,x):
|
|
73
|
+
xp = cuda.get_array_module(x)
|
|
74
|
+
self.x_shape=x.shape
|
|
75
|
+
y=xp.broadcast_to(x,self.shape)
|
|
76
|
+
return y
|
|
77
|
+
def backward(self,gy):
|
|
78
|
+
gx=sum_to(gy,self.x_shape)
|
|
79
|
+
return gx
|
|
80
|
+
class MeanSquaredError(Operation): #均方误差
|
|
81
|
+
def forward(self,x0,x1):
|
|
82
|
+
diff=x0-x1
|
|
83
|
+
y=(diff**2).sum()/len(diff)
|
|
84
|
+
return y
|
|
85
|
+
|
|
86
|
+
def backward(self,gy):
|
|
87
|
+
x0,x1=self.inputs
|
|
88
|
+
diff=x0-x1
|
|
89
|
+
gx0=gy*diff*(2./len(diff))
|
|
90
|
+
gx1=-gx0
|
|
91
|
+
return gx0,gx1
|
|
92
|
+
class Log(Operation):
|
|
93
|
+
def forward(self, x):
|
|
94
|
+
xp = cuda.get_array_module(x)
|
|
95
|
+
y = xp.log(x)
|
|
96
|
+
return y
|
|
97
|
+
def backward(self,gy):
|
|
98
|
+
x,=self.inputs
|
|
99
|
+
gx = gy / x
|
|
100
|
+
return gx
|
|
101
|
+
class Exp(Operation):
|
|
102
|
+
def __init__(self):
|
|
103
|
+
pass
|
|
104
|
+
def forward(self,x):
|
|
105
|
+
xp = cuda.get_array_module(x)
|
|
106
|
+
y=xp.exp(x)
|
|
107
|
+
return y
|
|
108
|
+
def backward(self,gy):
|
|
109
|
+
|
|
110
|
+
x,=self.inputs
|
|
111
|
+
xp = cuda.get_array_module(x)
|
|
112
|
+
y = xp.exp(x.data)
|
|
113
|
+
gx = gy * y
|
|
114
|
+
return gx
|
|
115
|
+
class Clip(Operation):
|
|
116
|
+
def __init__(self,x_min,x_max):
|
|
117
|
+
self.x_min = x_min
|
|
118
|
+
self.x_max = x_max
|
|
119
|
+
def forward(self,x):
|
|
120
|
+
y=np.clip(x,self.x_min,self.x_max)
|
|
121
|
+
return y
|
|
122
|
+
def backward(self, gy):
|
|
123
|
+
x, = self.inputs
|
|
124
|
+
mask = (x.data >= self.x_min) * (x.data <= self.x_max)
|
|
125
|
+
gx = gy * mask
|
|
126
|
+
return gx
|
|
127
|
+
class ReLU(Operation):
|
|
128
|
+
def forward(self,x):
|
|
129
|
+
xp = cuda.get_array_module(x)
|
|
130
|
+
y=xp.maximum(x,0.0)
|
|
131
|
+
return y
|
|
132
|
+
def backward(self,gy):
|
|
133
|
+
x,=self.inputs
|
|
134
|
+
mask=x.data>0
|
|
135
|
+
gx=gy*mask
|
|
136
|
+
return gx
|
|
137
|
+
|
|
138
|
+
class SumTo(Operation):
|
|
139
|
+
def __init__(self,shape):
|
|
140
|
+
self.shape=shape
|
|
141
|
+
|
|
142
|
+
def forward(self,x):
|
|
143
|
+
self.x_shape=x.shape
|
|
144
|
+
y=utils.sum_to(x,self.shape)
|
|
145
|
+
return y
|
|
146
|
+
def backward(self,gy):
|
|
147
|
+
gx=broadcast_to(gy,self.x_shape)
|
|
148
|
+
return gx
|
|
149
|
+
class MatMul(Operation):
|
|
150
|
+
def forward(self,x,w):
|
|
151
|
+
y=x.dot(w)
|
|
152
|
+
return y
|
|
153
|
+
def backward(self,gy):
|
|
154
|
+
x,w=self.inputs
|
|
155
|
+
gx=matmul(gy,w.T)
|
|
156
|
+
gW=matmul(x.T,gy)
|
|
157
|
+
return gx,gW
|
|
158
|
+
class Min(Operation):
|
|
159
|
+
def __init__(self, axis, keepdims):
|
|
160
|
+
self.keepdims = keepdims
|
|
161
|
+
self.axis = axis
|
|
162
|
+
|
|
163
|
+
def forward(self, x):
|
|
164
|
+
y = x.min(axis=self.axis, keepdims=self.keepdims)
|
|
165
|
+
return y
|
|
166
|
+
|
|
167
|
+
def backward(self, gy):
|
|
168
|
+
x = self.inputs[0]
|
|
169
|
+
y = self.outputs[0]()
|
|
170
|
+
class Max(Operation):
|
|
171
|
+
def __init__(self,axis,keepdims):
|
|
172
|
+
self.keepdims=keepdims
|
|
173
|
+
self.axis=axis
|
|
174
|
+
def forward(self,x):
|
|
175
|
+
y = x.max(axis=self.axis, keepdims=self.keepdims)
|
|
176
|
+
return y
|
|
177
|
+
|
|
178
|
+
def backward(self, gy):
|
|
179
|
+
x = self.inputs[0]
|
|
180
|
+
y = self.outputs[0]()
|
|
181
|
+
|
|
182
|
+
shape = utils.max_backward_shape(x.shape, self.axis)
|
|
183
|
+
|
|
184
|
+
gy = reshape(gy, shape)
|
|
185
|
+
y = reshape(y, shape)
|
|
186
|
+
|
|
187
|
+
cond = (x.data == y.data)
|
|
188
|
+
|
|
189
|
+
gy = broadcast_to(gy, cond.shape)
|
|
190
|
+
|
|
191
|
+
gx = gy * cond
|
|
192
|
+
|
|
193
|
+
return gx
|
|
194
|
+
class GetItem(Operation):
|
|
195
|
+
def __init__(self, slices):
|
|
196
|
+
self.slices = slices
|
|
197
|
+
|
|
198
|
+
def forward(self, x):
|
|
199
|
+
y = x[self.slices]
|
|
200
|
+
return y
|
|
201
|
+
|
|
202
|
+
def backward(self, gy):
|
|
203
|
+
x, = self.inputs
|
|
204
|
+
f = GetItemGrad(self.slices, x.shape)
|
|
205
|
+
return f(gy)
|
|
206
|
+
class Linear(Operation):
|
|
207
|
+
def forward(
|
|
208
|
+
self, x: np.ndarray, W: np.ndarray, b= None
|
|
209
|
+
) -> Datafield:
|
|
210
|
+
if W is None or W.data is None:
|
|
211
|
+
print("ERROR: W is None in forward!")
|
|
212
|
+
raise ValueError("W cannot be None")
|
|
213
|
+
y = x.dot(W)
|
|
214
|
+
if b is not None:
|
|
215
|
+
y += b
|
|
216
|
+
return y
|
|
217
|
+
|
|
218
|
+
def backward(self, gy: Datafield) -> Tuple[Datafield, Datafield, Datafield]:
|
|
219
|
+
x, W, b = self.inputs
|
|
220
|
+
gb = None if b.data is None else sum_to(gy, b.shape)
|
|
221
|
+
gx = matmul(gy, W.T)
|
|
222
|
+
gW = matmul(x.T, gy)
|
|
223
|
+
return gx, gW, gb
|
|
224
|
+
class GetItemGrad(Operation):
|
|
225
|
+
def __init__(self, slices, in_shape):
|
|
226
|
+
self.slices = slices
|
|
227
|
+
self.in_shape = in_shape
|
|
228
|
+
|
|
229
|
+
def forward(self, gy):
|
|
230
|
+
gx = np.zeros(self.in_shape)
|
|
231
|
+
np.add.at(gx, self.slices, gy)
|
|
232
|
+
return gx
|
|
233
|
+
|
|
234
|
+
def backward(self, ggx):
|
|
235
|
+
return get_item(ggx, self.slices)
|
|
236
|
+
|
|
237
|
+
def sum(x,axis=None,keepdims=False):
|
|
238
|
+
return Sum(axis,keepdims)(x)
|
|
239
|
+
def sin(x):
|
|
240
|
+
return Sin()(x)
|
|
241
|
+
|
|
242
|
+
def cos(x):
|
|
243
|
+
return Cos()(x)
|
|
244
|
+
def tanh(x):
|
|
245
|
+
return Tanh()(x)
|
|
246
|
+
|
|
247
|
+
def reshape(x,shape):
|
|
248
|
+
if x.shape==shape:
|
|
249
|
+
return as_datafield(x)
|
|
250
|
+
return Reshape(shape)(x)
|
|
251
|
+
def transpose(x):
|
|
252
|
+
return Transpose()(x)
|
|
253
|
+
def broadcast_to(x,shape):
|
|
254
|
+
if x.shape==shape:
|
|
255
|
+
return as_datafield(x)
|
|
256
|
+
else:
|
|
257
|
+
return BroadcastTo(shape)(x)
|
|
258
|
+
|
|
259
|
+
def sum_to(x,shape):
|
|
260
|
+
if x.shape==shape:
|
|
261
|
+
return as_datafield(x)
|
|
262
|
+
else:
|
|
263
|
+
return SumTo(shape)(x)
|
|
264
|
+
|
|
265
|
+
def softmax_simple(x,axis=1):
|
|
266
|
+
x=as_datafield(x)
|
|
267
|
+
x = x - max(x,axis=axis, keepdims=True)
|
|
268
|
+
y=exp(x)
|
|
269
|
+
sum_y=sum(y,axis=axis,keepdims=True)
|
|
270
|
+
return y/sum_y
|
|
271
|
+
"""def softmax_cross_entropy_simple(x,t):
|
|
272
|
+
x,t=as_variable(x),as_variable(t)
|
|
273
|
+
N=x.shape[0]
|
|
274
|
+
p=softmax_simple(x)
|
|
275
|
+
p=clip(p,1e-15,1.0)
|
|
276
|
+
log_p=log(p)
|
|
277
|
+
tlog_p = get_item(log_p, (np.arange(N), t.data))
|
|
278
|
+
#tlog_p=log_p[np.arange(N),t.data]
|
|
279
|
+
y=-1*sum(tlog_p)/N
|
|
280
|
+
return y"""
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def softmax_cross_entropy(x, t):
|
|
284
|
+
x = as_datafield(x)
|
|
285
|
+
t = as_datafield(t)
|
|
286
|
+
|
|
287
|
+
if t.ndim == 1:
|
|
288
|
+
N = x.shape[0]
|
|
289
|
+
|
|
290
|
+
# 数值稳定的 softmax
|
|
291
|
+
# 1. 减去最大值防止 exp 溢出
|
|
292
|
+
max_x = x.max(axis=1, keepdims=True)
|
|
293
|
+
x = x - max_x
|
|
294
|
+
|
|
295
|
+
# 2. 计算 exp
|
|
296
|
+
exp_x = exp(x)
|
|
297
|
+
|
|
298
|
+
# 3. 计算 softmax
|
|
299
|
+
sum_exp_x = sum(exp_x, axis=1, keepdims=True)
|
|
300
|
+
log_sum_exp_x = log(sum_exp_x)
|
|
301
|
+
|
|
302
|
+
# 4. 直接计算 log(softmax) = x - log(sum(exp(x)))
|
|
303
|
+
# 这样可以避免中间结果太大
|
|
304
|
+
log_p = x - log_sum_exp_x
|
|
305
|
+
|
|
306
|
+
# 5. 获取目标类别的 log 概率
|
|
307
|
+
batch_indices = np.arange(N)
|
|
308
|
+
class_indices = t.data.flatten()
|
|
309
|
+
|
|
310
|
+
# 确保索引正确
|
|
311
|
+
if class_indices.ndim > 1:
|
|
312
|
+
class_indices = class_indices.flatten()
|
|
313
|
+
|
|
314
|
+
# 6. 计算交叉熵
|
|
315
|
+
tlog_p = get_item(log_p, (batch_indices, class_indices))
|
|
316
|
+
loss = -sum(tlog_p) / N
|
|
317
|
+
|
|
318
|
+
return loss
|
|
319
|
+
def relu(x):
|
|
320
|
+
return ReLU()(x)
|
|
321
|
+
|
|
322
|
+
def clip(x, x_min, x_max):
|
|
323
|
+
x = as_datafield(x)
|
|
324
|
+
return Clip(x_min, x_max)(x)
|
|
325
|
+
def exp(x):
|
|
326
|
+
return Exp()(x)
|
|
327
|
+
def matmul(x,W):
|
|
328
|
+
return MatMul()(x,W)
|
|
329
|
+
def log(x):
|
|
330
|
+
x=as_datafield(x)
|
|
331
|
+
return Log()(x)
|
|
332
|
+
def mean_squared_error(x0,x1):
|
|
333
|
+
return MeanSquaredError()(x0,x1)
|
|
334
|
+
def linear(x: Datafield, W: Datafield, b= None) -> Datafield:
|
|
335
|
+
return Linear()(x, W, b)
|
|
336
|
+
def get_item(x,slices):
|
|
337
|
+
return GetItem(slices)(x)
|
|
338
|
+
def sigmoid(x): #激活函数
|
|
339
|
+
x=as_datafield(x)
|
|
340
|
+
y=1/(1+exp(-x))
|
|
341
|
+
return y
|
|
342
|
+
def max(x, axis=None, keepdims=False):
|
|
343
|
+
return Max(axis, keepdims)(x)
|
|
344
|
+
def min(x, axis=None, keepdims=False):
|
|
345
|
+
return Min(axis, keepdims)(x)
|
|
346
|
+
|
|
347
|
+
def accuracy(y,t):
|
|
348
|
+
y,t=as_datafield(y),as_datafield(t)
|
|
349
|
+
pred=y.data.argmax(axis=1).reshape(t.shape)
|
|
350
|
+
result=(pred==t.data)
|
|
351
|
+
acc=result.mean()
|
|
352
|
+
return Datafield(as_array(acc))
|
|
353
|
+
def dropout(x,dropout_ratio=0.5):
|
|
354
|
+
x=as_datafield(x)
|
|
355
|
+
if nodev.Config.train is True:
|
|
356
|
+
xp=cuda.get_array_module(x)
|
|
357
|
+
mask=xp.random.rand(*x.shape)>dropout_ratio
|
|
358
|
+
scale=xp.array(1.0-dropout_ratio).astype(x.dtype)
|
|
359
|
+
y=x*mask/scale
|
|
360
|
+
return y
|
|
361
|
+
else:
|
|
362
|
+
return x
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
class Argmax(Operation):
|
|
366
|
+
"""获取最大值的索引"""
|
|
367
|
+
|
|
368
|
+
def __init__(self, axis=None):
|
|
369
|
+
self.axis = axis
|
|
370
|
+
|
|
371
|
+
def forward(self, x):
|
|
372
|
+
# 前向传播:直接使用numpy的argmax
|
|
373
|
+
self.x_shape = x.shape
|
|
374
|
+
y = np.argmax(x, axis=self.axis)
|
|
375
|
+
return y
|
|
376
|
+
|
|
377
|
+
def backward(self, gy):
|
|
378
|
+
# argmax没有梯度(不可导)
|
|
379
|
+
return None
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def argmax(x, axis=None):
|
|
383
|
+
"""DeZero风格的argmax函数"""
|
|
384
|
+
return Argmax(axis)(x)
|
|
385
|
+
|
|
386
|
+
from nodev.operations_conv import conv2d
|
|
387
|
+
#from nodev.functions_conv import conv2d
|
|
388
|
+
from nodev.operations_conv import pooling
|
|
389
|
+
#from nodev.functions_conv import average_pooling
|