nlptoolkit-computationalgraph 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/ComputationalGraph.py +373 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/CrossEntropyLoss.py +30 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/DELU.py +71 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/Dropout.py +60 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/ELU.py +65 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/Function.py +44 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/Logarithm.py +43 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/MeanSquaredErrorLoss.py +36 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/Negation.py +44 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/Power.py +59 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/ReLU.py +47 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/SiLU.py +34 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/Sigmoid.py +49 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/Softmax.py +80 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/Tanh.py +48 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/TanhShrink.py +34 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Function/__init__.py +0 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Initialization/HeUniformInitialization.py +31 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Initialization/Initialization.py +20 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Initialization/RandomInitialization.py +25 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Initialization/UniformXavierInitialization.py +29 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Initialization/__init__.py +0 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/NeuralNetworkParameter.py +39 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Node/ComputationalNode.py +89 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Node/ConcatenatedNode.py +27 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Node/FunctionNode.py +37 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Node/MultiplicationNode.py +52 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Node/__init__.py +0 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Optimizer/Adam.py +89 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Optimizer/AdamW.py +44 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Optimizer/Optimizer.py +97 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Optimizer/SGDMomentum.py +47 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Optimizer/StochasticGradientDescent.py +31 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/Optimizer/__init__.py +0 -0
- nlptoolkit_computationalgraph-1.0.1/ComputationalGraph/__init__.py +0 -0
- nlptoolkit_computationalgraph-1.0.1/PKG-INFO +57 -0
- nlptoolkit_computationalgraph-1.0.1/README.md +46 -0
- nlptoolkit_computationalgraph-1.0.1/nlptoolkit_computationalgraph.egg-info/PKG-INFO +57 -0
- nlptoolkit_computationalgraph-1.0.1/nlptoolkit_computationalgraph.egg-info/SOURCES.txt +46 -0
- nlptoolkit_computationalgraph-1.0.1/nlptoolkit_computationalgraph.egg-info/dependency_links.txt +1 -0
- nlptoolkit_computationalgraph-1.0.1/nlptoolkit_computationalgraph.egg-info/requires.txt +2 -0
- nlptoolkit_computationalgraph-1.0.1/nlptoolkit_computationalgraph.egg-info/top_level.txt +2 -0
- nlptoolkit_computationalgraph-1.0.1/setup.cfg +4 -0
- nlptoolkit_computationalgraph-1.0.1/setup.py +22 -0
- nlptoolkit_computationalgraph-1.0.1/test/ComputationalGraphTest.py +101 -0
- nlptoolkit_computationalgraph-1.0.1/test/LinearPerceptronSingleInput.py +38 -0
- nlptoolkit_computationalgraph-1.0.1/test/NeuralNet.py +115 -0
- nlptoolkit_computationalgraph-1.0.1/test/__init__.py +0 -0
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections import deque
|
|
5
|
+
from typing import Optional
|
|
6
|
+
import pickle
|
|
7
|
+
|
|
8
|
+
from ComputationalGraph.Node.ComputationalNode import ComputationalNode
|
|
9
|
+
from ComputationalGraph.Node.MultiplicationNode import MultiplicationNode
|
|
10
|
+
from ComputationalGraph.Node.FunctionNode import FunctionNode
|
|
11
|
+
from ComputationalGraph.Node.ConcatenatedNode import ConcatenatedNode
|
|
12
|
+
from ComputationalGraph.Function.Function import Function
|
|
13
|
+
from ComputationalGraph.Function.Dropout import Dropout
|
|
14
|
+
from ComputationalGraph.NeuralNetworkParameter import NeuralNetworkParameter
|
|
15
|
+
from Math.Tensor import Tensor
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ComputationalGraph(ABC):
|
|
19
|
+
|
|
20
|
+
def __init__(self, parameters: NeuralNetworkParameter):
|
|
21
|
+
self.input_nodes: list[ComputationalNode] = []
|
|
22
|
+
self.output_node: Optional[ComputationalNode] = None
|
|
23
|
+
self.parameters = parameters
|
|
24
|
+
self._leaf_nodes: Optional[list[ComputationalNode]] = None
|
|
25
|
+
|
|
26
|
+
# ------------------------------------------------------------------
|
|
27
|
+
# Abstract interface
|
|
28
|
+
# ------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def train(self, train_set: list[Tensor]):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
def test(self, test_set: list[Tensor]):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def getOutputValue(self, output_node: ComputationalNode) -> list[float]:
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
# ------------------------------------------------------------------
|
|
43
|
+
# Edge construction helpers
|
|
44
|
+
# ------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
def addEdge(self, first: ComputationalNode, second, is_biased: bool = False) -> ComputationalNode:
|
|
47
|
+
if isinstance(second, Function):
|
|
48
|
+
return second.addEdge([first], is_biased)
|
|
49
|
+
elif isinstance(second, MultiplicationNode):
|
|
50
|
+
new_node = MultiplicationNode(
|
|
51
|
+
learnable=False,
|
|
52
|
+
is_biased=is_biased,
|
|
53
|
+
is_hadamard=second.isHadamard(),
|
|
54
|
+
priority_node=first
|
|
55
|
+
)
|
|
56
|
+
first.addChild(new_node)
|
|
57
|
+
new_node.addParent(first)
|
|
58
|
+
second.addChild(new_node)
|
|
59
|
+
new_node.addParent(second)
|
|
60
|
+
return new_node
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError("Illegal type for argument 'second'")
|
|
63
|
+
|
|
64
|
+
def addFunctionEdge(self, input_nodes: list[ComputationalNode], second: Function, is_biased: bool = False) -> ComputationalNode:
|
|
65
|
+
return second.addEdge(input_nodes, is_biased)
|
|
66
|
+
|
|
67
|
+
def addEdgeHadamard(self, first: ComputationalNode, second: ComputationalNode,
|
|
68
|
+
is_biased: bool, is_hadamard: bool) -> ComputationalNode:
|
|
69
|
+
new_node = MultiplicationNode(learnable=False, is_biased=is_biased, is_hadamard=is_hadamard, priority_node=first)
|
|
70
|
+
first.addChild(new_node)
|
|
71
|
+
new_node.addParent(first)
|
|
72
|
+
second.addChild(new_node)
|
|
73
|
+
new_node.addParent(second)
|
|
74
|
+
return new_node
|
|
75
|
+
|
|
76
|
+
def addAdditionEdge(self, first: ComputationalNode, second: ComputationalNode,
|
|
77
|
+
is_biased: bool = False) -> ComputationalNode:
|
|
78
|
+
new_node = ComputationalNode(learnable=False, is_biased=is_biased)
|
|
79
|
+
first.addChild(new_node)
|
|
80
|
+
new_node.addParent(first)
|
|
81
|
+
second.addChild(new_node)
|
|
82
|
+
new_node.addParent(second)
|
|
83
|
+
return new_node
|
|
84
|
+
|
|
85
|
+
def concatEdges(self, nodes: list[ComputationalNode], dimension: int) -> ComputationalNode:
|
|
86
|
+
new_node = ConcatenatedNode(dimension)
|
|
87
|
+
for node in nodes:
|
|
88
|
+
node.addChild(new_node)
|
|
89
|
+
new_node.addParent(node)
|
|
90
|
+
new_node.addNode(node)
|
|
91
|
+
return new_node
|
|
92
|
+
|
|
93
|
+
# ------------------------------------------------------------------
|
|
94
|
+
# Topological sort
|
|
95
|
+
# ------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
def _sortRecursive(self, node: ComputationalNode, visited: set) -> deque:
|
|
98
|
+
queue = deque()
|
|
99
|
+
visited.add(node)
|
|
100
|
+
for i in range(node.childrenSize()):
|
|
101
|
+
child = node.getChild(i)
|
|
102
|
+
if child not in visited:
|
|
103
|
+
queue.extend(self._sortRecursive(child, visited))
|
|
104
|
+
queue.append(node)
|
|
105
|
+
return queue
|
|
106
|
+
|
|
107
|
+
def _topologicalSort(self) -> deque:
|
|
108
|
+
sorted_list = deque()
|
|
109
|
+
visited = set()
|
|
110
|
+
for node in self._leaf_nodes:
|
|
111
|
+
if node not in visited:
|
|
112
|
+
q = self._sortRecursive(node, visited)
|
|
113
|
+
sorted_list.extend(q)
|
|
114
|
+
return sorted_list
|
|
115
|
+
|
|
116
|
+
# ------------------------------------------------------------------
|
|
117
|
+
# Clear
|
|
118
|
+
# ------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
def _clearRecursive(self, visited: set, node: ComputationalNode):
|
|
121
|
+
visited.add(node)
|
|
122
|
+
if not node.isLearnable():
|
|
123
|
+
node.setValue(None)
|
|
124
|
+
node.setBackward(None)
|
|
125
|
+
for i in range(node.childrenSize()):
|
|
126
|
+
child = node.getChild(i)
|
|
127
|
+
if child not in visited:
|
|
128
|
+
self._clearRecursive(visited, child)
|
|
129
|
+
|
|
130
|
+
def _clear(self):
|
|
131
|
+
visited = set()
|
|
132
|
+
for node in self._leaf_nodes:
|
|
133
|
+
if node not in visited:
|
|
134
|
+
self._clearRecursive(visited, node)
|
|
135
|
+
|
|
136
|
+
# ------------------------------------------------------------------
|
|
137
|
+
# Bias helpers
|
|
138
|
+
# ------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
def _transposeAxes(self, length: int) -> tuple[int, ...]:
|
|
141
|
+
axes = list(range(length))
|
|
142
|
+
axes[-1], axes[-2] = axes[-2], axes[-1]
|
|
143
|
+
return tuple(axes)
|
|
144
|
+
|
|
145
|
+
def _getBiasedPartial(self, tensor: Tensor) -> Tensor:
|
|
146
|
+
shape = tensor.getShape()
|
|
147
|
+
end_indices = list(shape)
|
|
148
|
+
end_indices[-1] -= 1
|
|
149
|
+
|
|
150
|
+
start_indices = (0,) * len(shape)
|
|
151
|
+
|
|
152
|
+
return tensor.partial(start_indices, tuple(end_indices))
|
|
153
|
+
|
|
154
|
+
def _getBiased(self, node: ComputationalNode):
|
|
155
|
+
shape = node.getValue().getShape()
|
|
156
|
+
last_dim = shape[-1]
|
|
157
|
+
old_values = list(node.getValue().getData())
|
|
158
|
+
values = []
|
|
159
|
+
for i, v in enumerate(old_values):
|
|
160
|
+
values.append(v)
|
|
161
|
+
if (i + 1) % last_dim == 0:
|
|
162
|
+
values.append(1.0)
|
|
163
|
+
new_shape = list(shape)
|
|
164
|
+
new_shape[-1] += 1
|
|
165
|
+
node.setValue(Tensor(values, tuple(new_shape)))
|
|
166
|
+
|
|
167
|
+
# ------------------------------------------------------------------
|
|
168
|
+
# Derivative calculation
|
|
169
|
+
# ------------------------------------------------------------------
|
|
170
|
+
|
|
171
|
+
def _calculateDerivative(self, node: ComputationalNode, child: ComputationalNode) -> Optional[Tensor]:
|
|
172
|
+
if child.parentsSize() == 0:
|
|
173
|
+
return None
|
|
174
|
+
|
|
175
|
+
backward = self._getBiasedPartial(child.getBackward()) if child.isBiased() else child.getBackward()
|
|
176
|
+
|
|
177
|
+
if isinstance(child, FunctionNode):
|
|
178
|
+
function = child.getFunction()
|
|
179
|
+
child_value = self._getBiasedPartial(child.getValue()) if child.isBiased() else child.getValue()
|
|
180
|
+
return function.derivative(child_value, backward)
|
|
181
|
+
|
|
182
|
+
if isinstance(child, ConcatenatedNode):
|
|
183
|
+
index = child.getIndex(node)
|
|
184
|
+
block_size = backward.getShape()[child.getDimension()] // child.parentsSize()
|
|
185
|
+
dimensions = block_size
|
|
186
|
+
shape = list(backward.getShape())
|
|
187
|
+
for i in range(len(shape)):
|
|
188
|
+
dim = child.getDimension()
|
|
189
|
+
if dim > i:
|
|
190
|
+
pass
|
|
191
|
+
elif dim < i:
|
|
192
|
+
dimensions *= shape[i]
|
|
193
|
+
else:
|
|
194
|
+
shape[i] = block_size
|
|
195
|
+
child_values = list(backward.getData())
|
|
196
|
+
new_values = []
|
|
197
|
+
i = index * dimensions
|
|
198
|
+
while i < len(child_values):
|
|
199
|
+
for k in range(dimensions):
|
|
200
|
+
new_values.append(child_values[i + k])
|
|
201
|
+
i += child.parentsSize() * dimensions
|
|
202
|
+
return Tensor(new_values, tuple(shape))
|
|
203
|
+
|
|
204
|
+
if isinstance(child, MultiplicationNode):
|
|
205
|
+
left = child.getParent(0)
|
|
206
|
+
right = child.getParent(1)
|
|
207
|
+
if left is node:
|
|
208
|
+
right_value = right.getValue()
|
|
209
|
+
if child.isHadamard():
|
|
210
|
+
return right_value.hadamardProduct(backward)
|
|
211
|
+
return backward.multiply(right_value.transpose(self._transposeAxes(len(right_value.getShape()))))
|
|
212
|
+
left_value = left.getValue()
|
|
213
|
+
if child.isHadamard():
|
|
214
|
+
return left_value.hadamardProduct(backward)
|
|
215
|
+
if left_value is not None and backward is not None:
|
|
216
|
+
return left_value.transpose(self._transposeAxes(len(left_value.getShape()))).multiply(backward)
|
|
217
|
+
raise ValueError("Backward and/or left child values are None")
|
|
218
|
+
|
|
219
|
+
# Plain addition node
|
|
220
|
+
return backward
|
|
221
|
+
|
|
222
|
+
# ------------------------------------------------------------------
|
|
223
|
+
# Backpropagation
|
|
224
|
+
# ------------------------------------------------------------------
|
|
225
|
+
|
|
226
|
+
def backpropagation(self):
|
|
227
|
+
sorted_nodes = self._topologicalSort()
|
|
228
|
+
if not sorted_nodes:
|
|
229
|
+
return
|
|
230
|
+
|
|
231
|
+
output_node = sorted_nodes.popleft()
|
|
232
|
+
batch_size = self.parameters.getBatchSize()
|
|
233
|
+
backward = [1.0 / batch_size] * len(list(output_node.getValue().getData()))
|
|
234
|
+
output_node.setBackward(Tensor(backward, output_node.getValue().getShape()))
|
|
235
|
+
|
|
236
|
+
while sorted_nodes:
|
|
237
|
+
node = sorted_nodes.popleft()
|
|
238
|
+
if node.childrenSize() > 0:
|
|
239
|
+
for i in range(node.childrenSize()):
|
|
240
|
+
child = node.getChild(i)
|
|
241
|
+
derivative = self._calculateDerivative(node, child)
|
|
242
|
+
if derivative is not None:
|
|
243
|
+
if node.getBackward() is None:
|
|
244
|
+
node.setBackward(derivative)
|
|
245
|
+
else:
|
|
246
|
+
node.setBackward(node.getBackward().add(derivative))
|
|
247
|
+
|
|
248
|
+
self.parameters.getOptimizer().updateValues(self._leaf_nodes)
|
|
249
|
+
self._clear()
|
|
250
|
+
|
|
251
|
+
# ------------------------------------------------------------------
|
|
252
|
+
# Forward pass
|
|
253
|
+
# ------------------------------------------------------------------
|
|
254
|
+
|
|
255
|
+
def _findOutputNode(self, node: ComputationalNode) -> ComputationalNode:
|
|
256
|
+
if node.childrenSize() == 0:
|
|
257
|
+
return node
|
|
258
|
+
return self._findOutputNode(node.getChild(0))
|
|
259
|
+
|
|
260
|
+
def _findLeafNodes(self) -> list[ComputationalNode]:
|
|
261
|
+
leaf_nodes = []
|
|
262
|
+
output_node = self._findOutputNode(self.input_nodes[0])
|
|
263
|
+
queue = [output_node]
|
|
264
|
+
visited = set()
|
|
265
|
+
while queue:
|
|
266
|
+
current = queue.pop(0)
|
|
267
|
+
if current.parentsSize() == 0:
|
|
268
|
+
leaf_nodes.append(current)
|
|
269
|
+
for i in range(current.parentsSize()):
|
|
270
|
+
parent = current.getParent(i)
|
|
271
|
+
if parent not in visited:
|
|
272
|
+
visited.add(parent)
|
|
273
|
+
queue.append(parent)
|
|
274
|
+
return leaf_nodes
|
|
275
|
+
|
|
276
|
+
def predict(self) -> list[float]:
|
|
277
|
+
class_labels = self._forwardCalculation(enable_dropout=False)
|
|
278
|
+
self._clear()
|
|
279
|
+
return class_labels
|
|
280
|
+
|
|
281
|
+
def forwardCalculation(self) -> list[float]:
|
|
282
|
+
if self._leaf_nodes is None:
|
|
283
|
+
self._leaf_nodes = self._findLeafNodes()
|
|
284
|
+
return self._forwardCalculation(enable_dropout=True)
|
|
285
|
+
|
|
286
|
+
def _forwardCalculation(self, enable_dropout: bool) -> list[float]:
|
|
287
|
+
sorted_nodes = self._topologicalSort()
|
|
288
|
+
if not sorted_nodes:
|
|
289
|
+
return []
|
|
290
|
+
|
|
291
|
+
concatenated_node_map: dict[ConcatenatedNode, list[Optional[ComputationalNode]]] = {}
|
|
292
|
+
counter_map: dict[ComputationalNode, int] = {}
|
|
293
|
+
|
|
294
|
+
while len(sorted_nodes) > 1:
|
|
295
|
+
current_node = sorted_nodes.pop() # removeLast
|
|
296
|
+
|
|
297
|
+
if current_node.isBiased():
|
|
298
|
+
self._getBiased(current_node)
|
|
299
|
+
|
|
300
|
+
if current_node.getValue() is None:
|
|
301
|
+
raise ValueError("Current node's value is None")
|
|
302
|
+
|
|
303
|
+
if current_node.childrenSize() > 0:
|
|
304
|
+
if current_node is self.output_node and not enable_dropout:
|
|
305
|
+
break
|
|
306
|
+
|
|
307
|
+
for t in range(current_node.childrenSize()):
|
|
308
|
+
child = current_node.getChild(t)
|
|
309
|
+
|
|
310
|
+
if child.getValue() is None:
|
|
311
|
+
if isinstance(child, FunctionNode):
|
|
312
|
+
function = child.getFunction()
|
|
313
|
+
current_value = current_node.getValue()
|
|
314
|
+
if isinstance(function, Dropout):
|
|
315
|
+
if enable_dropout:
|
|
316
|
+
child.setValue(function.calculate(current_value))
|
|
317
|
+
else:
|
|
318
|
+
child.setValue(Tensor(current_value.getData(), current_value.getShape()))
|
|
319
|
+
else:
|
|
320
|
+
child.setValue(function.calculate(current_value))
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
elif isinstance(child, ConcatenatedNode):
|
|
324
|
+
if child not in concatenated_node_map:
|
|
325
|
+
concatenated_node_map[child] = [None] * child.parentsSize()
|
|
326
|
+
concatenated_node_map[child][child.getIndex(current_node)] = current_node
|
|
327
|
+
counter_map[child] = counter_map.get(child, 0) + 1
|
|
328
|
+
if child.parentsSize() == counter_map[child]:
|
|
329
|
+
nodes_arr = concatenated_node_map[child]
|
|
330
|
+
if any(n is None for n in nodes_arr):
|
|
331
|
+
raise ValueError("Concatenation nodes missing parents")
|
|
332
|
+
nodes_arr = [n for n in nodes_arr if n is not None]
|
|
333
|
+
child.setValue(nodes_arr[0].getValue())
|
|
334
|
+
for i in range(1, len(nodes_arr)):
|
|
335
|
+
child.setValue(
|
|
336
|
+
child.getValue().concat(nodes_arr[i].getValue(), child.getDimension())
|
|
337
|
+
)
|
|
338
|
+
else:
|
|
339
|
+
child.setValue(current_node.getValue())
|
|
340
|
+
|
|
341
|
+
else:
|
|
342
|
+
if isinstance(child, MultiplicationNode):
|
|
343
|
+
child_value = child.getValue()
|
|
344
|
+
current_value = current_node.getValue()
|
|
345
|
+
if child.isHadamard():
|
|
346
|
+
child.setValue(child_value.hadamardProduct(current_value))
|
|
347
|
+
elif child.getPriorityNode() is not current_node:
|
|
348
|
+
child.setValue(child_value.multiply(current_value))
|
|
349
|
+
else:
|
|
350
|
+
child.setValue(current_value.multiply(child_value))
|
|
351
|
+
else:
|
|
352
|
+
child.setValue(child.getValue().add(current_node.getValue()))
|
|
353
|
+
|
|
354
|
+
return self.getOutputValue(self.output_node)
|
|
355
|
+
|
|
356
|
+
# ------------------------------------------------------------------
|
|
357
|
+
# Serialization
|
|
358
|
+
# ------------------------------------------------------------------
|
|
359
|
+
|
|
360
|
+
def save(self, file_name: str):
|
|
361
|
+
try:
|
|
362
|
+
with open(file_name, 'wb') as f:
|
|
363
|
+
pickle.dump(self, f)
|
|
364
|
+
except IOError:
|
|
365
|
+
print("Object could not be saved.")
|
|
366
|
+
|
|
367
|
+
@staticmethod
|
|
368
|
+
def loadModel(file_name: str) -> Optional[ComputationalGraph]:
|
|
369
|
+
try:
|
|
370
|
+
with open(file_name, 'rb') as f:
|
|
371
|
+
return pickle.load(f)
|
|
372
|
+
except (IOError, pickle.UnpicklingError):
|
|
373
|
+
return None
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from ComputationalGraph.Node.ComputationalNode import ComputationalNode
|
|
4
|
+
from ComputationalGraph.Node.FunctionNode import FunctionNode
|
|
5
|
+
from ComputationalGraph.Node.MultiplicationNode import MultiplicationNode
|
|
6
|
+
from ComputationalGraph.Function.Logarithm import Logarithm
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CrossEntropyLoss(Logarithm):
|
|
10
|
+
"""
|
|
11
|
+
Cross Entropy Loss function representation for the computational graph.
|
|
12
|
+
Inherits the calculate and derivative math from Logarithm, but overrides the graph wiring.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def addEdge(self, input_nodes: List[ComputationalNode], is_biased: bool) -> ComputationalNode:
|
|
16
|
+
"""
|
|
17
|
+
Constructs the subgraph for the cross-entropy loss operation.
|
|
18
|
+
"""
|
|
19
|
+
# Create a function node that uses the Logarithm math (since 'self' is a Logarithm)
|
|
20
|
+
logy = FunctionNode(function=self, is_biased=False)
|
|
21
|
+
input_nodes[0].add(logy)
|
|
22
|
+
|
|
23
|
+
# Create a Hadamard multiplication node for y * log(y_hat)
|
|
24
|
+
ylogy = MultiplicationNode(learnable=False, is_biased=is_biased, is_hadamard=True)
|
|
25
|
+
input_nodes[1].add(ylogy)
|
|
26
|
+
|
|
27
|
+
# Connect logy to the multiplication node
|
|
28
|
+
logy.add(ylogy)
|
|
29
|
+
|
|
30
|
+
return ylogy
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from ComputationalGraph.Function.Function import Function
|
|
5
|
+
from ComputationalGraph.Node.ComputationalNode import ComputationalNode
|
|
6
|
+
from ComputationalGraph.Node.FunctionNode import FunctionNode
|
|
7
|
+
from Math.Tensor import Tensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DELU(Function):
|
|
11
|
+
"""
|
|
12
|
+
DELU activation function representation for the computational graph.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, a: float = 1.0, b: float = 2.0, xc: float = 1.25643):
|
|
16
|
+
"""
|
|
17
|
+
Initializes the DELU activation function.
|
|
18
|
+
Consolidates both Java constructors using default keyword arguments.
|
|
19
|
+
|
|
20
|
+
:param a: Parameter 'a' for the DELU function.
|
|
21
|
+
:param b: Parameter 'b' for the DELU function.
|
|
22
|
+
:param xc: Threshold parameter 'xc' for the DELU function.
|
|
23
|
+
"""
|
|
24
|
+
self.a: float = a
|
|
25
|
+
self.b: float = b
|
|
26
|
+
self.xc: float = xc
|
|
27
|
+
|
|
28
|
+
def calculate(self, value: Tensor) -> Tensor:
|
|
29
|
+
"""
|
|
30
|
+
Computes the DELU activation for the given value tensor.
|
|
31
|
+
|
|
32
|
+
:param value: The tensor whose values are to be computed.
|
|
33
|
+
:return: A new Tensor containing DELU(x).
|
|
34
|
+
"""
|
|
35
|
+
new_data = []
|
|
36
|
+
|
|
37
|
+
for val in value.getData():
|
|
38
|
+
if val > self.xc:
|
|
39
|
+
new_data.append(val)
|
|
40
|
+
else:
|
|
41
|
+
new_data.append((math.exp(self.a * val) - 1) / self.b)
|
|
42
|
+
|
|
43
|
+
return Tensor(new_data, value.getShape())
|
|
44
|
+
|
|
45
|
+
def derivative(self, value: Tensor, backward: Tensor) -> Tensor:
|
|
46
|
+
"""
|
|
47
|
+
Computes the derivative of the DELU activation function.
|
|
48
|
+
|
|
49
|
+
:param value: The output tensor of the DELU(x) operation.
|
|
50
|
+
:param backward: The backward gradient tensor.
|
|
51
|
+
:return: A new Tensor containing the gradient values.
|
|
52
|
+
"""
|
|
53
|
+
new_data = []
|
|
54
|
+
|
|
55
|
+
for val, back_val in zip(value.getData(), backward.getData()):
|
|
56
|
+
if val > self.xc:
|
|
57
|
+
new_data.append(back_val)
|
|
58
|
+
else:
|
|
59
|
+
derivative_val = (val * self.b + 1) * (self.a / self.b)
|
|
60
|
+
new_data.append(back_val * derivative_val)
|
|
61
|
+
|
|
62
|
+
return Tensor(new_data, value.getShape())
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def addEdge(self, input_nodes: List[ComputationalNode], is_biased: bool) -> ComputationalNode:
|
|
66
|
+
"""
|
|
67
|
+
Adds a DELU node to the computational graph.
|
|
68
|
+
"""
|
|
69
|
+
new_node = FunctionNode(function=self, is_biased=is_biased)
|
|
70
|
+
input_nodes[0].add(new_node)
|
|
71
|
+
return new_node
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import random
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from ComputationalGraph.Function.Function import Function
|
|
5
|
+
from ComputationalGraph.Node.ComputationalNode import ComputationalNode
|
|
6
|
+
from ComputationalGraph.Node.FunctionNode import FunctionNode
|
|
7
|
+
from Math.Tensor import Tensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Dropout(Function):
|
|
11
|
+
"""
|
|
12
|
+
Implements the Dropout function for regularization in the computational graph.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, p: float, rng: Optional[random.Random] = None):
|
|
16
|
+
"""
|
|
17
|
+
Initializes the Dropout function.
|
|
18
|
+
|
|
19
|
+
:param p: The probability of dropping an element.
|
|
20
|
+
:param rng: A random.Random instance (equivalent to Java's Random).
|
|
21
|
+
"""
|
|
22
|
+
self.p: float = p
|
|
23
|
+
# Use the provided RNG or the global random module
|
|
24
|
+
self.random: random.Random = rng if rng is not None else random.Random()
|
|
25
|
+
self.mask: List[float] = []
|
|
26
|
+
|
|
27
|
+
def calculate(self, value: Tensor) -> Tensor:
|
|
28
|
+
"""
|
|
29
|
+
Computes the dropout values and stores the mask for the backward pass.
|
|
30
|
+
"""
|
|
31
|
+
self.mask.clear()
|
|
32
|
+
multiplier = 1.0 / (1.0 - self.p)
|
|
33
|
+
new_data = []
|
|
34
|
+
|
|
35
|
+
old_values = value.getData()
|
|
36
|
+
for old_value in old_values:
|
|
37
|
+
r = self.random.random()
|
|
38
|
+
if r > self.p:
|
|
39
|
+
self.mask.append(multiplier)
|
|
40
|
+
new_data.append(old_value * multiplier)
|
|
41
|
+
else:
|
|
42
|
+
self.mask.append(0.0)
|
|
43
|
+
new_data.append(0.0)
|
|
44
|
+
|
|
45
|
+
return Tensor(new_data, value.getShape())
|
|
46
|
+
|
|
47
|
+
def derivative(self, value: Tensor, backward: Tensor) -> Tensor:
|
|
48
|
+
"""
|
|
49
|
+
Calculates the derivative of the dropout using the stored mask.
|
|
50
|
+
"""
|
|
51
|
+
mask_tensor = Tensor(self.mask, value.getShape())
|
|
52
|
+
return backward.hadamardProduct(mask_tensor)
|
|
53
|
+
|
|
54
|
+
def addEdge(self, input_nodes: List[ComputationalNode], is_biased: bool) -> ComputationalNode:
|
|
55
|
+
"""
|
|
56
|
+
Adds a Dropout node to the computational graph.
|
|
57
|
+
"""
|
|
58
|
+
new_node = FunctionNode(function=self, is_biased=is_biased)
|
|
59
|
+
input_nodes[0].add(new_node)
|
|
60
|
+
return new_node
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from ComputationalGraph.Function.Function import Function
|
|
5
|
+
from ComputationalGraph.Node.ComputationalNode import ComputationalNode
|
|
6
|
+
from ComputationalGraph.Node.FunctionNode import FunctionNode
|
|
7
|
+
from Math.Tensor import Tensor
|
|
8
|
+
|
|
9
|
+
class ELU(Function):
|
|
10
|
+
"""
|
|
11
|
+
Exponential Linear Unit (ELU) activation function representation for the computational graph.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, a: float = 1.0):
|
|
15
|
+
"""
|
|
16
|
+
Initializes the ELU activation function.
|
|
17
|
+
Consolidates the Java constructors using a default keyword argument.
|
|
18
|
+
|
|
19
|
+
:param a: The alpha parameter for the ELU function (defaults to 1.0).
|
|
20
|
+
"""
|
|
21
|
+
self.a: float = a
|
|
22
|
+
|
|
23
|
+
def calculate(self, value: Tensor) -> Tensor:
|
|
24
|
+
"""
|
|
25
|
+
Computes the ELU activation for the given tensor.
|
|
26
|
+
|
|
27
|
+
:param value: The tensor whose values are to be computed.
|
|
28
|
+
:return: A new Tensor containing ELU(x).
|
|
29
|
+
"""
|
|
30
|
+
new_data = []
|
|
31
|
+
for old_value in value.getData():
|
|
32
|
+
if old_value < 0:
|
|
33
|
+
new_data.append(self.a * (math.exp(old_value) - 1.0))
|
|
34
|
+
else:
|
|
35
|
+
new_data.append(old_value)
|
|
36
|
+
|
|
37
|
+
return Tensor(new_data, value.getShape())
|
|
38
|
+
|
|
39
|
+
def derivative(self, value: Tensor, backward: Tensor) -> Tensor:
|
|
40
|
+
"""
|
|
41
|
+
Computes the derivative of the ELU activation function.
|
|
42
|
+
|
|
43
|
+
:param value: The output tensor of the ELU(x) operation.
|
|
44
|
+
:param backward: The backward gradient tensor.
|
|
45
|
+
:return: A new Tensor containing the gradient values.
|
|
46
|
+
"""
|
|
47
|
+
new_data = []
|
|
48
|
+
|
|
49
|
+
for old_value, backward_value in zip(value.getData(), backward.getData()):
|
|
50
|
+
# Because old_value is the output (y) of the ELU function,
|
|
51
|
+
# if x < 0, then y < 0, and dy/dx = y + a
|
|
52
|
+
if old_value < 0:
|
|
53
|
+
new_data.append((old_value + self.a) * backward_value)
|
|
54
|
+
else:
|
|
55
|
+
new_data.append(backward_value)
|
|
56
|
+
|
|
57
|
+
return Tensor(new_data, value.getShape())
|
|
58
|
+
|
|
59
|
+
def addEdge(self, input_nodes: List[ComputationalNode], is_biased: bool) -> ComputationalNode:
|
|
60
|
+
"""
|
|
61
|
+
Adds an ELU node to the computational graph.
|
|
62
|
+
"""
|
|
63
|
+
new_node = FunctionNode(function=self, is_biased=is_biased)
|
|
64
|
+
input_nodes[0].add(new_node)
|
|
65
|
+
return new_node
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import List, TYPE_CHECKING
|
|
3
|
+
|
|
4
|
+
if TYPE_CHECKING:
|
|
5
|
+
from ComputationalGraph.Node.ComputationalNode import ComputationalNode
|
|
6
|
+
from Math.Tensor import Tensor
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Function(ABC):
|
|
10
|
+
"""
|
|
11
|
+
An interface representing a mathematical function in the computational graph.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def calculate(self, matrix: 'Tensor') -> 'Tensor':
|
|
16
|
+
"""
|
|
17
|
+
Calculates the forward pass of the function.
|
|
18
|
+
|
|
19
|
+
:param matrix: The input tensor.
|
|
20
|
+
:return: The resulting tensor after applying the function.
|
|
21
|
+
"""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def derivative(self, value: 'Tensor', backward: 'Tensor') -> 'Tensor':
|
|
26
|
+
"""
|
|
27
|
+
Calculates the derivative (backward pass) of the function.
|
|
28
|
+
|
|
29
|
+
:param value: The current tensor value.
|
|
30
|
+
:param backward: The backward gradient tensor.
|
|
31
|
+
:return: The resulting gradient tensor.
|
|
32
|
+
"""
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def addEdge(self, input_nodes: List['ComputationalNode'], is_biased: bool) -> 'ComputationalNode':
|
|
37
|
+
"""
|
|
38
|
+
Adds an edge to the computational graph.
|
|
39
|
+
|
|
40
|
+
:param input_nodes: A list of input computational nodes.
|
|
41
|
+
:param is_biased: Indicates whether the connection is biased.
|
|
42
|
+
:return: The resulting computational node.
|
|
43
|
+
"""
|
|
44
|
+
pass
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import math
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from ComputationalGraph.Function.Function import Function
|
|
5
|
+
from ComputationalGraph.Node.ComputationalNode import ComputationalNode
|
|
6
|
+
from ComputationalGraph.Node.FunctionNode import FunctionNode
|
|
7
|
+
from Math.Tensor import Tensor
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Logarithm(Function):
|
|
11
|
+
"""
|
|
12
|
+
Applies the natural logarithm function to a tensor.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def calculate(self, value: Tensor) -> Tensor:
|
|
16
|
+
"""
|
|
17
|
+
Applies the natural logarithm to each element of the input tensor.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
new_data = [math.log(x) for x in value.getData()]
|
|
21
|
+
|
|
22
|
+
return Tensor(new_data, value.getShape())
|
|
23
|
+
|
|
24
|
+
def derivative(self, value: Tensor, backward: Tensor) -> Tensor:
|
|
25
|
+
"""
|
|
26
|
+
Computes the derivative of the Logarithm function.
|
|
27
|
+
Note: 'value' here is the output of the log function, so x = exp(value).
|
|
28
|
+
"""
|
|
29
|
+
new_data = []
|
|
30
|
+
|
|
31
|
+
for val, back_val in zip(value.getData(), backward.getData()):
|
|
32
|
+
derivative_val = 1.0 / math.exp(val)
|
|
33
|
+
new_data.append(derivative_val * back_val)
|
|
34
|
+
|
|
35
|
+
return Tensor(new_data, value.getShape())
|
|
36
|
+
|
|
37
|
+
def addEdge(self, input_nodes: List[ComputationalNode], is_biased: bool) -> ComputationalNode:
|
|
38
|
+
"""
|
|
39
|
+
Adds a Logarithm node to the graph.
|
|
40
|
+
"""
|
|
41
|
+
new_node = FunctionNode(function=self, is_biased=is_biased)
|
|
42
|
+
input_nodes[0].add(new_node)
|
|
43
|
+
return new_node
|