QuizGenerator 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuizGenerator/README.md +5 -0
- QuizGenerator/__init__.py +27 -0
- QuizGenerator/__main__.py +7 -0
- QuizGenerator/canvas/__init__.py +13 -0
- QuizGenerator/canvas/canvas_interface.py +622 -0
- QuizGenerator/canvas/classes.py +235 -0
- QuizGenerator/constants.py +149 -0
- QuizGenerator/contentast.py +1809 -0
- QuizGenerator/generate.py +362 -0
- QuizGenerator/logging.yaml +55 -0
- QuizGenerator/misc.py +480 -0
- QuizGenerator/mixins.py +539 -0
- QuizGenerator/performance.py +202 -0
- QuizGenerator/premade_questions/__init__.py +0 -0
- QuizGenerator/premade_questions/basic.py +103 -0
- QuizGenerator/premade_questions/cst334/__init__.py +1 -0
- QuizGenerator/premade_questions/cst334/languages.py +395 -0
- QuizGenerator/premade_questions/cst334/math_questions.py +297 -0
- QuizGenerator/premade_questions/cst334/memory_questions.py +1398 -0
- QuizGenerator/premade_questions/cst334/ostep13_vsfs.py +572 -0
- QuizGenerator/premade_questions/cst334/persistence_questions.py +396 -0
- QuizGenerator/premade_questions/cst334/process.py +649 -0
- QuizGenerator/premade_questions/cst463/__init__.py +0 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/__init__.py +3 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_calculation.py +369 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_descent_questions.py +305 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/loss_calculations.py +650 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/misc.py +73 -0
- QuizGenerator/premade_questions/cst463/math_and_data/__init__.py +2 -0
- QuizGenerator/premade_questions/cst463/math_and_data/matrix_questions.py +631 -0
- QuizGenerator/premade_questions/cst463/math_and_data/vector_questions.py +534 -0
- QuizGenerator/premade_questions/cst463/neural-network-basics/__init__.py +6 -0
- QuizGenerator/premade_questions/cst463/neural-network-basics/neural_network_questions.py +1264 -0
- QuizGenerator/premade_questions/cst463/tensorflow-intro/__init__.py +6 -0
- QuizGenerator/premade_questions/cst463/tensorflow-intro/tensorflow_questions.py +936 -0
- QuizGenerator/qrcode_generator.py +293 -0
- QuizGenerator/question.py +657 -0
- QuizGenerator/quiz.py +468 -0
- QuizGenerator/typst_utils.py +113 -0
- quizgenerator-0.1.0.dist-info/METADATA +263 -0
- quizgenerator-0.1.0.dist-info/RECORD +44 -0
- quizgenerator-0.1.0.dist-info/WHEEL +4 -0
- quizgenerator-0.1.0.dist-info/entry_points.txt +2 -0
- quizgenerator-0.1.0.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,1264 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import io
|
|
5
|
+
import logging
|
|
6
|
+
import math
|
|
7
|
+
import numpy as np
|
|
8
|
+
import uuid
|
|
9
|
+
import os
|
|
10
|
+
from typing import List, Tuple, Dict, Any
|
|
11
|
+
|
|
12
|
+
import matplotlib.pyplot as plt
|
|
13
|
+
import matplotlib.patches as mpatches
|
|
14
|
+
|
|
15
|
+
from QuizGenerator.contentast import ContentAST
|
|
16
|
+
from QuizGenerator.question import Question, Answer, QuestionRegistry
|
|
17
|
+
from QuizGenerator.mixins import TableQuestionMixin, BodyTemplatesMixin
|
|
18
|
+
|
|
19
|
+
log = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SimpleNeuralNetworkBase(Question, abc.ABC):
|
|
23
|
+
"""
|
|
24
|
+
Base class for simple neural network questions.
|
|
25
|
+
|
|
26
|
+
Generates a small feedforward network:
|
|
27
|
+
- 2-3 input neurons
|
|
28
|
+
- 2 hidden neurons (single hidden layer)
|
|
29
|
+
- 1 output neuron
|
|
30
|
+
- Random weights and biases
|
|
31
|
+
- Runs forward pass and stores all activations
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
# Activation function types
|
|
35
|
+
ACTIVATION_SIGMOID = "sigmoid"
|
|
36
|
+
ACTIVATION_RELU = "relu"
|
|
37
|
+
ACTIVATION_LINEAR = "linear"
|
|
38
|
+
|
|
39
|
+
def __init__(self, *args, **kwargs):
|
|
40
|
+
kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
|
|
41
|
+
super().__init__(*args, **kwargs)
|
|
42
|
+
|
|
43
|
+
# Network architecture parameters
|
|
44
|
+
self.num_inputs = kwargs.get("num_inputs", 2)
|
|
45
|
+
self.num_hidden = kwargs.get("num_hidden", 2)
|
|
46
|
+
self.num_outputs = kwargs.get("num_outputs", 1)
|
|
47
|
+
|
|
48
|
+
# Configuration
|
|
49
|
+
self.activation_function = None
|
|
50
|
+
self.use_bias = kwargs.get("use_bias", True)
|
|
51
|
+
|
|
52
|
+
# Network parameters (weights and biases)
|
|
53
|
+
self.W1 = None # Input to hidden weights (num_hidden x num_inputs)
|
|
54
|
+
self.b1 = None # Hidden layer biases (num_hidden,)
|
|
55
|
+
self.W2 = None # Hidden to output weights (num_outputs x num_hidden)
|
|
56
|
+
self.b2 = None # Output layer biases (num_outputs,)
|
|
57
|
+
|
|
58
|
+
# Input data and forward pass results
|
|
59
|
+
self.X = None # Input values (num_inputs,)
|
|
60
|
+
self.z1 = None # Hidden layer pre-activation (num_hidden,)
|
|
61
|
+
self.a1 = None # Hidden layer activations (num_hidden,)
|
|
62
|
+
self.z2 = None # Output layer pre-activation (num_outputs,)
|
|
63
|
+
self.a2 = None # Output layer activation (prediction)
|
|
64
|
+
|
|
65
|
+
# Target and loss (for backprop questions)
|
|
66
|
+
self.y_target = None
|
|
67
|
+
self.loss = None
|
|
68
|
+
|
|
69
|
+
# Gradients (for backprop questions)
|
|
70
|
+
self.dL_da2 = None # Gradient of loss w.r.t. output
|
|
71
|
+
self.da2_dz2 = None # Gradient of activation w.r.t. pre-activation
|
|
72
|
+
self.dL_dz2 = None # Gradient of loss w.r.t. output pre-activation
|
|
73
|
+
|
|
74
|
+
def _generate_network(self, weight_range=(-2, 2), input_range=(-3, 3)):
|
|
75
|
+
"""Generate random network parameters and input."""
|
|
76
|
+
# Generate weights (using small values for numerical stability)
|
|
77
|
+
self.W1 = np.array([
|
|
78
|
+
[self.rng.uniform(weight_range[0], weight_range[1])
|
|
79
|
+
for _ in range(self.num_inputs)]
|
|
80
|
+
for _ in range(self.num_hidden)
|
|
81
|
+
])
|
|
82
|
+
|
|
83
|
+
self.W2 = np.array([
|
|
84
|
+
[self.rng.uniform(weight_range[0], weight_range[1])
|
|
85
|
+
for _ in range(self.num_hidden)]
|
|
86
|
+
for _ in range(self.num_outputs)
|
|
87
|
+
])
|
|
88
|
+
|
|
89
|
+
# Generate biases
|
|
90
|
+
if self.use_bias:
|
|
91
|
+
self.b1 = np.array([
|
|
92
|
+
self.rng.uniform(weight_range[0], weight_range[1])
|
|
93
|
+
for _ in range(self.num_hidden)
|
|
94
|
+
])
|
|
95
|
+
self.b2 = np.array([
|
|
96
|
+
self.rng.uniform(weight_range[0], weight_range[1])
|
|
97
|
+
for _ in range(self.num_outputs)
|
|
98
|
+
])
|
|
99
|
+
else:
|
|
100
|
+
self.b1 = np.zeros(self.num_hidden)
|
|
101
|
+
self.b2 = np.zeros(self.num_outputs)
|
|
102
|
+
|
|
103
|
+
# Round weights to make calculations cleaner
|
|
104
|
+
self.W1 = np.round(self.W1 * 2) / 2 # Round to nearest 0.5
|
|
105
|
+
self.W2 = np.round(self.W2 * 2) / 2
|
|
106
|
+
self.b1 = np.round(self.b1 * 2) / 2
|
|
107
|
+
self.b2 = np.round(self.b2 * 2) / 2
|
|
108
|
+
|
|
109
|
+
# Generate input values
|
|
110
|
+
self.X = np.array([
|
|
111
|
+
self.rng.uniform(input_range[0], input_range[1])
|
|
112
|
+
for _ in range(self.num_inputs)
|
|
113
|
+
])
|
|
114
|
+
self.X = np.round(self.X) # Use integer inputs for simplicity
|
|
115
|
+
|
|
116
|
+
def _select_activation_function(self):
|
|
117
|
+
"""Randomly select an activation function."""
|
|
118
|
+
activations = [
|
|
119
|
+
self.ACTIVATION_SIGMOID,
|
|
120
|
+
self.ACTIVATION_RELU
|
|
121
|
+
]
|
|
122
|
+
self.activation_function = self.rng.choice(activations)
|
|
123
|
+
|
|
124
|
+
def _apply_activation(self, z, function_type=None):
|
|
125
|
+
"""Apply activation function to pre-activation values."""
|
|
126
|
+
if function_type is None:
|
|
127
|
+
function_type = self.activation_function
|
|
128
|
+
|
|
129
|
+
if function_type == self.ACTIVATION_SIGMOID:
|
|
130
|
+
return 1 / (1 + np.exp(-z))
|
|
131
|
+
elif function_type == self.ACTIVATION_RELU:
|
|
132
|
+
return np.maximum(0, z)
|
|
133
|
+
elif function_type == self.ACTIVATION_LINEAR:
|
|
134
|
+
return z
|
|
135
|
+
else:
|
|
136
|
+
raise ValueError(f"Unknown activation function: {function_type}")
|
|
137
|
+
|
|
138
|
+
def _activation_derivative(self, z, function_type=None):
|
|
139
|
+
"""Compute derivative of activation function."""
|
|
140
|
+
if function_type is None:
|
|
141
|
+
function_type = self.activation_function
|
|
142
|
+
|
|
143
|
+
if function_type == self.ACTIVATION_SIGMOID:
|
|
144
|
+
a = self._apply_activation(z, function_type)
|
|
145
|
+
return a * (1 - a)
|
|
146
|
+
elif function_type == self.ACTIVATION_RELU:
|
|
147
|
+
return np.where(z > 0, 1, 0)
|
|
148
|
+
elif function_type == self.ACTIVATION_LINEAR:
|
|
149
|
+
return np.ones_like(z)
|
|
150
|
+
else:
|
|
151
|
+
raise ValueError(f"Unknown activation function: {function_type}")
|
|
152
|
+
|
|
153
|
+
def _forward_pass(self):
|
|
154
|
+
"""Run forward pass through the network."""
|
|
155
|
+
# Hidden layer
|
|
156
|
+
self.z1 = self.W1 @ self.X + self.b1
|
|
157
|
+
self.a1 = self._apply_activation(self.z1)
|
|
158
|
+
|
|
159
|
+
# Output layer
|
|
160
|
+
self.z2 = self.W2 @ self.a1 + self.b2
|
|
161
|
+
self.a2 = self._apply_activation(self.z2, self.ACTIVATION_LINEAR) # Linear output
|
|
162
|
+
|
|
163
|
+
# Round all computed values to display precision to ensure students can reproduce calculations
|
|
164
|
+
# We display z and a values with 4 decimal places
|
|
165
|
+
self.z1 = np.round(self.z1, 4)
|
|
166
|
+
self.a1 = np.round(self.a1, 4)
|
|
167
|
+
self.z2 = np.round(self.z2, 4)
|
|
168
|
+
self.a2 = np.round(self.a2, 4)
|
|
169
|
+
|
|
170
|
+
return self.a2
|
|
171
|
+
|
|
172
|
+
def _compute_loss(self, y_target):
|
|
173
|
+
"""Compute MSE loss."""
|
|
174
|
+
self.y_target = y_target
|
|
175
|
+
self.loss = 0.5 * (y_target - self.a2[0]) ** 2
|
|
176
|
+
return self.loss
|
|
177
|
+
|
|
178
|
+
def _compute_output_gradient(self):
|
|
179
|
+
"""Compute gradient of loss w.r.t. output."""
|
|
180
|
+
# For MSE loss: dL/da2 = -(y - a2)
|
|
181
|
+
self.dL_da2 = -(self.y_target - self.a2[0])
|
|
182
|
+
|
|
183
|
+
# For linear output activation: da2/dz2 = 1
|
|
184
|
+
self.da2_dz2 = 1.0
|
|
185
|
+
|
|
186
|
+
# Chain rule: dL/dz2 = dL/da2 * da2/dz2
|
|
187
|
+
self.dL_dz2 = self.dL_da2 * self.da2_dz2
|
|
188
|
+
|
|
189
|
+
return self.dL_dz2
|
|
190
|
+
|
|
191
|
+
def _compute_gradient_W2(self, hidden_idx):
|
|
192
|
+
"""Compute gradient ∂L/∂W2[0, hidden_idx]."""
|
|
193
|
+
# ∂L/∂w = dL/dz2 * ∂z2/∂w = dL/dz2 * a1[hidden_idx]
|
|
194
|
+
return float(self.dL_dz2 * self.a1[hidden_idx])
|
|
195
|
+
|
|
196
|
+
def _compute_gradient_W1(self, hidden_idx, input_idx):
|
|
197
|
+
"""Compute gradient ∂L/∂W1[hidden_idx, input_idx]."""
|
|
198
|
+
# dL/dz1[hidden_idx] = dL/dz2 * ∂z2/∂a1[hidden_idx] * ∂a1/∂z1[hidden_idx]
|
|
199
|
+
# = dL/dz2 * W2[0, hidden_idx] * activation'(z1[hidden_idx])
|
|
200
|
+
|
|
201
|
+
dz2_da1 = self.W2[0, hidden_idx]
|
|
202
|
+
da1_dz1 = self._activation_derivative(self.z1[hidden_idx])
|
|
203
|
+
|
|
204
|
+
dL_dz1 = self.dL_dz2 * dz2_da1 * da1_dz1
|
|
205
|
+
|
|
206
|
+
# ∂L/∂w = dL/dz1 * ∂z1/∂w = dL/dz1 * X[input_idx]
|
|
207
|
+
return float(dL_dz1 * self.X[input_idx])
|
|
208
|
+
|
|
209
|
+
def _get_activation_name(self):
|
|
210
|
+
"""Get human-readable activation function name."""
|
|
211
|
+
if self.activation_function == self.ACTIVATION_SIGMOID:
|
|
212
|
+
return "sigmoid"
|
|
213
|
+
elif self.activation_function == self.ACTIVATION_RELU:
|
|
214
|
+
return "ReLU"
|
|
215
|
+
elif self.activation_function == self.ACTIVATION_LINEAR:
|
|
216
|
+
return "linear"
|
|
217
|
+
return "unknown"
|
|
218
|
+
|
|
219
|
+
def _get_activation_formula(self):
|
|
220
|
+
"""Get LaTeX formula for activation function."""
|
|
221
|
+
if self.activation_function == self.ACTIVATION_SIGMOID:
|
|
222
|
+
return r"\sigma(z) = \frac{1}{1 + e^{-z}}"
|
|
223
|
+
elif self.activation_function == self.ACTIVATION_RELU:
|
|
224
|
+
return r"\text{ReLU}(z) = \max(0, z)"
|
|
225
|
+
elif self.activation_function == self.ACTIVATION_LINEAR:
|
|
226
|
+
return r"f(z) = z"
|
|
227
|
+
return ""
|
|
228
|
+
|
|
229
|
+
def _generate_parameter_table(self, include_activations=False, include_training_context=False):
|
|
230
|
+
"""
|
|
231
|
+
Generate side-by-side tables showing all network parameters.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
include_activations: If True, include computed activation values
|
|
235
|
+
include_training_context: If True, include target, loss, etc. (for backprop questions)
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
ContentAST.TableGroup with network parameters in two side-by-side tables
|
|
239
|
+
"""
|
|
240
|
+
# Left table: Inputs & Weights
|
|
241
|
+
left_data = []
|
|
242
|
+
left_data.append(["Symbol", "Value"])
|
|
243
|
+
|
|
244
|
+
# Input values
|
|
245
|
+
for i in range(self.num_inputs):
|
|
246
|
+
left_data.append([
|
|
247
|
+
ContentAST.Equation(f"x_{i+1}", inline=True),
|
|
248
|
+
f"{self.X[i]:.1f}"
|
|
249
|
+
])
|
|
250
|
+
|
|
251
|
+
# Weights from input to hidden
|
|
252
|
+
for j in range(self.num_hidden):
|
|
253
|
+
for i in range(self.num_inputs):
|
|
254
|
+
left_data.append([
|
|
255
|
+
ContentAST.Equation(f"w_{{{j+1}{i+1}}}", inline=True),
|
|
256
|
+
f"{self.W1[j, i]:.1f}"
|
|
257
|
+
])
|
|
258
|
+
|
|
259
|
+
# Weights from hidden to output
|
|
260
|
+
for i in range(self.num_hidden):
|
|
261
|
+
left_data.append([
|
|
262
|
+
ContentAST.Equation(f"w_{i+3}", inline=True),
|
|
263
|
+
f"{self.W2[0, i]:.1f}"
|
|
264
|
+
])
|
|
265
|
+
|
|
266
|
+
# Right table: Biases, Activations, Training context
|
|
267
|
+
right_data = []
|
|
268
|
+
right_data.append(["Symbol", "Value"])
|
|
269
|
+
|
|
270
|
+
# Hidden layer biases
|
|
271
|
+
if self.use_bias:
|
|
272
|
+
for j in range(self.num_hidden):
|
|
273
|
+
right_data.append([
|
|
274
|
+
ContentAST.Equation(f"b_{j+1}", inline=True),
|
|
275
|
+
f"{self.b1[j]:.1f}"
|
|
276
|
+
])
|
|
277
|
+
|
|
278
|
+
# Output bias
|
|
279
|
+
if self.use_bias:
|
|
280
|
+
right_data.append([
|
|
281
|
+
ContentAST.Equation(r"b_{out}", inline=True),
|
|
282
|
+
f"{self.b2[0]:.1f}"
|
|
283
|
+
])
|
|
284
|
+
|
|
285
|
+
# Hidden layer activations (if computed and requested)
|
|
286
|
+
if include_activations and self.a1 is not None:
|
|
287
|
+
for i in range(self.num_hidden):
|
|
288
|
+
right_data.append([
|
|
289
|
+
ContentAST.Equation(f"h_{i+1}", inline=True),
|
|
290
|
+
f"{self.a1[i]:.4f}"
|
|
291
|
+
])
|
|
292
|
+
|
|
293
|
+
# Output activation (if computed and requested)
|
|
294
|
+
if include_activations and self.a2 is not None:
|
|
295
|
+
right_data.append([
|
|
296
|
+
ContentAST.Equation(r"\hat{y}", inline=True),
|
|
297
|
+
f"{self.a2[0]:.4f}"
|
|
298
|
+
])
|
|
299
|
+
|
|
300
|
+
# Training context (target, loss - for backprop questions)
|
|
301
|
+
if include_training_context:
|
|
302
|
+
if self.y_target is not None:
|
|
303
|
+
right_data.append([
|
|
304
|
+
ContentAST.Equation("y", inline=True),
|
|
305
|
+
f"{self.y_target:.2f}"
|
|
306
|
+
])
|
|
307
|
+
|
|
308
|
+
if self.loss is not None:
|
|
309
|
+
right_data.append([
|
|
310
|
+
ContentAST.Equation("L", inline=True),
|
|
311
|
+
f"{self.loss:.4f}"
|
|
312
|
+
])
|
|
313
|
+
|
|
314
|
+
# Create table group
|
|
315
|
+
table_group = ContentAST.TableGroup()
|
|
316
|
+
table_group.add_table(ContentAST.Table(data=left_data))
|
|
317
|
+
table_group.add_table(ContentAST.Table(data=right_data))
|
|
318
|
+
|
|
319
|
+
return table_group
|
|
320
|
+
|
|
321
|
+
def _generate_network_diagram(self, show_weights=True, show_activations=False):
|
|
322
|
+
"""
|
|
323
|
+
Generate a simple, clean network diagram.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
show_weights: If True, display weights on edges
|
|
327
|
+
show_activations: If True, display activation values on nodes
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
BytesIO buffer containing PNG image
|
|
331
|
+
"""
|
|
332
|
+
# Create figure with tight layout and equal aspect ratio
|
|
333
|
+
fig = plt.figure(figsize=(8, 2.5))
|
|
334
|
+
ax = fig.add_subplot(111)
|
|
335
|
+
ax.set_aspect('equal', adjustable='box') # Keep circles circular
|
|
336
|
+
ax.axis('off')
|
|
337
|
+
|
|
338
|
+
# Node radius
|
|
339
|
+
r = 0.15
|
|
340
|
+
|
|
341
|
+
# Layer x-positions
|
|
342
|
+
input_x = 0.5
|
|
343
|
+
hidden_x = 2.0
|
|
344
|
+
output_x = 3.5
|
|
345
|
+
|
|
346
|
+
# Calculate y-positions for nodes (top to bottom order)
|
|
347
|
+
def get_y_positions(n, include_bias=False):
|
|
348
|
+
# If including bias, need one more position at the top
|
|
349
|
+
total_nodes = n + 1 if include_bias else n
|
|
350
|
+
if total_nodes == 1:
|
|
351
|
+
return [1.0]
|
|
352
|
+
spacing = min(2.0 / (total_nodes - 1), 0.6)
|
|
353
|
+
# Start from top
|
|
354
|
+
start = 1.0 + (total_nodes - 1) * spacing / 2
|
|
355
|
+
positions = [start - i * spacing for i in range(total_nodes)]
|
|
356
|
+
return positions
|
|
357
|
+
|
|
358
|
+
# Input layer: bias (if present) at top, then x_1, x_2, ... going down
|
|
359
|
+
input_positions = get_y_positions(self.num_inputs, include_bias=self.use_bias)
|
|
360
|
+
if self.use_bias:
|
|
361
|
+
bias1_y = input_positions[0]
|
|
362
|
+
input_y = input_positions[1:] # x_1 is second (below bias), x_2 is third, etc.
|
|
363
|
+
else:
|
|
364
|
+
bias1_y = None
|
|
365
|
+
input_y = input_positions
|
|
366
|
+
|
|
367
|
+
# Hidden layer: bias (if present) at top, then h_1, h_2, ... going down
|
|
368
|
+
hidden_positions = get_y_positions(self.num_hidden, include_bias=self.use_bias)
|
|
369
|
+
if self.use_bias:
|
|
370
|
+
bias2_y = hidden_positions[0]
|
|
371
|
+
hidden_y = hidden_positions[1:]
|
|
372
|
+
else:
|
|
373
|
+
bias2_y = None
|
|
374
|
+
hidden_y = hidden_positions
|
|
375
|
+
|
|
376
|
+
# Output layer: centered
|
|
377
|
+
output_y = [1.0]
|
|
378
|
+
|
|
379
|
+
# Draw edges first (so they're behind nodes)
|
|
380
|
+
# Input to hidden
|
|
381
|
+
for i in range(self.num_inputs):
|
|
382
|
+
for j in range(self.num_hidden):
|
|
383
|
+
ax.plot([input_x, hidden_x], [input_y[i], hidden_y[j]],
|
|
384
|
+
'k-', linewidth=1, alpha=0.7, zorder=1)
|
|
385
|
+
if show_weights:
|
|
386
|
+
label_x = input_x + 0.3
|
|
387
|
+
label_y = input_y[i] + (hidden_y[j] - input_y[i]) * 0.2
|
|
388
|
+
# Use LaTeX math mode for proper subscript rendering
|
|
389
|
+
weight_label = f'$w_{{{j+1}{i+1}}}$'
|
|
390
|
+
ax.text(label_x, label_y, weight_label, fontsize=8,
|
|
391
|
+
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
|
|
392
|
+
|
|
393
|
+
# Bias to hidden
|
|
394
|
+
if self.use_bias:
|
|
395
|
+
for j in range(self.num_hidden):
|
|
396
|
+
ax.plot([input_x, hidden_x], [bias1_y, hidden_y[j]],
|
|
397
|
+
'k-', linewidth=1, alpha=0.7, zorder=1)
|
|
398
|
+
if show_weights:
|
|
399
|
+
label_x = input_x + 0.3
|
|
400
|
+
label_y = bias1_y + (hidden_y[j] - bias1_y) * 0.2
|
|
401
|
+
bias_label = f'$b_{{{j+1}}}$'
|
|
402
|
+
ax.text(label_x, label_y, bias_label, fontsize=8,
|
|
403
|
+
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
|
|
404
|
+
|
|
405
|
+
# Hidden to output
|
|
406
|
+
for i in range(self.num_hidden):
|
|
407
|
+
ax.plot([hidden_x, output_x], [hidden_y[i], output_y[0]],
|
|
408
|
+
'k-', linewidth=1, alpha=0.7, zorder=1)
|
|
409
|
+
if show_weights:
|
|
410
|
+
label_x = hidden_x + 0.3
|
|
411
|
+
label_y = hidden_y[i] + (output_y[0] - hidden_y[i]) * 0.2
|
|
412
|
+
weight_label = f'$w_{{{i+3}}}$'
|
|
413
|
+
ax.text(label_x, label_y, weight_label, fontsize=8,
|
|
414
|
+
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
|
|
415
|
+
|
|
416
|
+
# Bias to output
|
|
417
|
+
if self.use_bias:
|
|
418
|
+
ax.plot([hidden_x, output_x], [bias2_y, output_y[0]],
|
|
419
|
+
'k-', linewidth=1, alpha=0.7, zorder=1)
|
|
420
|
+
if show_weights:
|
|
421
|
+
label_x = hidden_x + 0.3
|
|
422
|
+
label_y = bias2_y + (output_y[0] - bias2_y) * 0.2
|
|
423
|
+
bias_label = r'$b_{out}$'
|
|
424
|
+
ax.text(label_x, label_y, bias_label, fontsize=8,
|
|
425
|
+
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
|
|
426
|
+
|
|
427
|
+
# Draw nodes
|
|
428
|
+
# Input nodes
|
|
429
|
+
for i, y in enumerate(input_y):
|
|
430
|
+
circle = plt.Circle((input_x, y), r, facecolor='lightgray',
|
|
431
|
+
edgecolor='black', linewidth=1.5, zorder=10)
|
|
432
|
+
ax.add_patch(circle)
|
|
433
|
+
label = f'$x_{{{i+1}}}$' if not show_activations else f'$x_{{{i+1}}}$={self.X[i]:.1f}'
|
|
434
|
+
ax.text(input_x - r - 0.15, y, label, fontsize=10, ha='right', va='center')
|
|
435
|
+
|
|
436
|
+
# Bias nodes
|
|
437
|
+
if self.use_bias:
|
|
438
|
+
circle = plt.Circle((input_x, bias1_y), r, facecolor='lightgray',
|
|
439
|
+
edgecolor='black', linewidth=1.5, zorder=10)
|
|
440
|
+
ax.add_patch(circle)
|
|
441
|
+
ax.text(input_x, bias1_y, '1', fontsize=10, ha='center', va='center', weight='bold')
|
|
442
|
+
|
|
443
|
+
circle = plt.Circle((hidden_x, bias2_y), r, facecolor='lightgray',
|
|
444
|
+
edgecolor='black', linewidth=1.5, zorder=10)
|
|
445
|
+
ax.add_patch(circle)
|
|
446
|
+
ax.text(hidden_x, bias2_y, '1', fontsize=10, ha='center', va='center', weight='bold')
|
|
447
|
+
|
|
448
|
+
# Hidden nodes
|
|
449
|
+
for i, y in enumerate(hidden_y):
|
|
450
|
+
circle = plt.Circle((hidden_x, y), r, facecolor='lightblue',
|
|
451
|
+
edgecolor='black', linewidth=1.5, zorder=10)
|
|
452
|
+
ax.add_patch(circle)
|
|
453
|
+
ax.plot([hidden_x, hidden_x], [y - r*0.7, y + r*0.7], 'k-', linewidth=1.2, zorder=11)
|
|
454
|
+
ax.text(hidden_x - r*0.35, y, r'$\Sigma$', fontsize=11, ha='center', va='center', zorder=12)
|
|
455
|
+
ax.text(hidden_x + r*0.35, y, r'$f$', fontsize=10, ha='center', va='center', zorder=12, style='italic')
|
|
456
|
+
if show_activations and self.a1 is not None:
|
|
457
|
+
ax.text(hidden_x, y - r - 0.15, f'{self.a1[i]:.2f}', fontsize=8, ha='center', va='top')
|
|
458
|
+
|
|
459
|
+
# Output node
|
|
460
|
+
y = output_y[0]
|
|
461
|
+
circle = plt.Circle((output_x, y), r, facecolor='lightblue',
|
|
462
|
+
edgecolor='black', linewidth=1.5, zorder=10)
|
|
463
|
+
ax.add_patch(circle)
|
|
464
|
+
ax.plot([output_x, output_x], [y - r*0.7, y + r*0.7], 'k-', linewidth=1.2, zorder=11)
|
|
465
|
+
ax.text(output_x - r*0.35, y, r'$\Sigma$', fontsize=11, ha='center', va='center', zorder=12)
|
|
466
|
+
ax.text(output_x + r*0.35, y, r'$f$', fontsize=10, ha='center', va='center', zorder=12, style='italic')
|
|
467
|
+
label = r'$\hat{y}$' if not show_activations else f'$\\hat{{y}}$={self.a2[0]:.2f}'
|
|
468
|
+
ax.text(output_x + r + 0.15, y, label, fontsize=10, ha='left', va='center')
|
|
469
|
+
|
|
470
|
+
# Save to buffer with minimal padding
|
|
471
|
+
buffer = io.BytesIO()
|
|
472
|
+
plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight',
|
|
473
|
+
facecolor='white', edgecolor='none', pad_inches=0.0)
|
|
474
|
+
plt.close(fig)
|
|
475
|
+
buffer.seek(0)
|
|
476
|
+
|
|
477
|
+
return buffer
|
|
478
|
+
|
|
479
|
+
def _generate_ascii_network(self):
|
|
480
|
+
"""Generate ASCII art representation of the network for alt-text."""
|
|
481
|
+
lines = []
|
|
482
|
+
lines.append("Network Architecture:")
|
|
483
|
+
lines.append("")
|
|
484
|
+
lines.append("Input Layer: Hidden Layer: Output Layer:")
|
|
485
|
+
|
|
486
|
+
# For 2 inputs, 2 hidden, 1 output
|
|
487
|
+
if self.num_inputs == 2 and self.num_hidden == 2:
|
|
488
|
+
lines.append(f" x₁ ----[w₁₁]---→ h₁ ----[w₃]----→")
|
|
489
|
+
lines.append(f" \\ / \\ /")
|
|
490
|
+
lines.append(f" \\ / \\ /")
|
|
491
|
+
lines.append(f" \\ / \\ / ŷ")
|
|
492
|
+
lines.append(f" \\/ \\ /")
|
|
493
|
+
lines.append(f" /\\ \\ /")
|
|
494
|
+
lines.append(f" / \\ \\/")
|
|
495
|
+
lines.append(f" / \\ /\\")
|
|
496
|
+
lines.append(f" / \\ / \\")
|
|
497
|
+
lines.append(f" x₂ ----[w₂₁]---→ h₂ ----[w₄]----→")
|
|
498
|
+
else:
|
|
499
|
+
# Generic representation
|
|
500
|
+
for i in range(max(self.num_inputs, self.num_hidden)):
|
|
501
|
+
parts = []
|
|
502
|
+
if i < self.num_inputs:
|
|
503
|
+
parts.append(f" x₁{i+1}")
|
|
504
|
+
else:
|
|
505
|
+
parts.append(" ")
|
|
506
|
+
parts.append(" ---→ ")
|
|
507
|
+
if i < self.num_hidden:
|
|
508
|
+
parts.append(f"h₁{i+1}")
|
|
509
|
+
else:
|
|
510
|
+
parts.append(" ")
|
|
511
|
+
parts.append(" ---→ ")
|
|
512
|
+
if i == self.num_hidden // 2:
|
|
513
|
+
parts.append("ŷ")
|
|
514
|
+
lines.append("".join(parts))
|
|
515
|
+
|
|
516
|
+
lines.append("")
|
|
517
|
+
lines.append(f"Activation function: {self._get_activation_name()}")
|
|
518
|
+
|
|
519
|
+
return "\n".join(lines)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
@QuestionRegistry.register()
|
|
523
|
+
class ForwardPassQuestion(SimpleNeuralNetworkBase):
|
|
524
|
+
"""
|
|
525
|
+
Question asking students to calculate forward pass through a simple network.
|
|
526
|
+
|
|
527
|
+
Students calculate:
|
|
528
|
+
- Hidden layer activations (h₁, h₂)
|
|
529
|
+
- Final output (ŷ)
|
|
530
|
+
"""
|
|
531
|
+
|
|
532
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
533
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
534
|
+
|
|
535
|
+
# Generate network
|
|
536
|
+
self._generate_network()
|
|
537
|
+
self._select_activation_function()
|
|
538
|
+
|
|
539
|
+
# Run forward pass to get correct answers
|
|
540
|
+
self._forward_pass()
|
|
541
|
+
|
|
542
|
+
# Create answer fields
|
|
543
|
+
self._create_answers()
|
|
544
|
+
|
|
545
|
+
def _create_answers(self):
|
|
546
|
+
"""Create answer fields for forward pass values."""
|
|
547
|
+
self.answers = {}
|
|
548
|
+
|
|
549
|
+
# Hidden layer activations
|
|
550
|
+
for i in range(self.num_hidden):
|
|
551
|
+
key = f"h{i+1}"
|
|
552
|
+
self.answers[key] = Answer.float_value(key, float(self.a1[i]))
|
|
553
|
+
|
|
554
|
+
# Output
|
|
555
|
+
self.answers["y_pred"] = Answer.float_value("y_pred", float(self.a2[0]))
|
|
556
|
+
|
|
557
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
558
|
+
body = ContentAST.Section()
|
|
559
|
+
|
|
560
|
+
# Question description
|
|
561
|
+
body.add_element(ContentAST.Paragraph([
|
|
562
|
+
f"Given the neural network below with {self._get_activation_name()} activation "
|
|
563
|
+
f"in the hidden layer and linear activation (f(z) = z) in the output layer, "
|
|
564
|
+
f"calculate the forward pass for the given input values."
|
|
565
|
+
]))
|
|
566
|
+
|
|
567
|
+
# Network diagram
|
|
568
|
+
body.add_element(
|
|
569
|
+
ContentAST.Picture(
|
|
570
|
+
img_data=self._generate_network_diagram(show_weights=True, show_activations=False),
|
|
571
|
+
caption=f"Neural network architecture"
|
|
572
|
+
)
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
# Network parameters table
|
|
576
|
+
body.add_element(self._generate_parameter_table(include_activations=False))
|
|
577
|
+
|
|
578
|
+
# Activation function
|
|
579
|
+
body.add_element(ContentAST.Paragraph([
|
|
580
|
+
f"**Activation function:** {self._get_activation_name()}"
|
|
581
|
+
]))
|
|
582
|
+
|
|
583
|
+
# Create answer block
|
|
584
|
+
answers = []
|
|
585
|
+
for i in range(self.num_hidden):
|
|
586
|
+
answers.append(
|
|
587
|
+
ContentAST.Answer(
|
|
588
|
+
answer=self.answers[f"h{i+1}"],
|
|
589
|
+
label=f"h_{i+1} (hidden neuron {i+1} output)"
|
|
590
|
+
)
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
answers.append(
|
|
594
|
+
ContentAST.Answer(
|
|
595
|
+
answer=self.answers["y_pred"],
|
|
596
|
+
label="ŷ (network output)"
|
|
597
|
+
)
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
body.add_element(ContentAST.AnswerBlock(answers))
|
|
601
|
+
|
|
602
|
+
return body
|
|
603
|
+
|
|
604
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
605
|
+
explanation = ContentAST.Section()
|
|
606
|
+
|
|
607
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
608
|
+
"To solve this problem, we need to compute the forward pass through the network."
|
|
609
|
+
]))
|
|
610
|
+
|
|
611
|
+
# Hidden layer calculations
|
|
612
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
613
|
+
"**Step 1: Calculate hidden layer pre-activations**"
|
|
614
|
+
]))
|
|
615
|
+
|
|
616
|
+
for i in range(self.num_hidden):
|
|
617
|
+
# Build equation for z_i
|
|
618
|
+
terms = []
|
|
619
|
+
for j in range(self.num_inputs):
|
|
620
|
+
terms.append(f"({self.W1[i,j]:.1f})({self.X[j]:.1f})")
|
|
621
|
+
|
|
622
|
+
z_calc = " + ".join(terms)
|
|
623
|
+
if self.use_bias:
|
|
624
|
+
z_calc += f" + {self.b1[i]:.1f}"
|
|
625
|
+
|
|
626
|
+
explanation.add_element(ContentAST.Equation(
|
|
627
|
+
f"z_{i+1} = {z_calc} = {self.z1[i]:.4f}",
|
|
628
|
+
inline=False
|
|
629
|
+
))
|
|
630
|
+
|
|
631
|
+
# Hidden layer activations
|
|
632
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
633
|
+
f"**Step 2: Apply {self._get_activation_name()} activation**"
|
|
634
|
+
]))
|
|
635
|
+
|
|
636
|
+
for i in range(self.num_hidden):
|
|
637
|
+
if self.activation_function == self.ACTIVATION_SIGMOID:
|
|
638
|
+
explanation.add_element(ContentAST.Equation(
|
|
639
|
+
f"h_{i+1} = \\sigma(z_{i+1}) = \\frac{{1}}{{1 + e^{{-{self.z1[i]:.4f}}}}} = {self.a1[i]:.4f}",
|
|
640
|
+
inline=False
|
|
641
|
+
))
|
|
642
|
+
elif self.activation_function == self.ACTIVATION_RELU:
|
|
643
|
+
explanation.add_element(ContentAST.Equation(
|
|
644
|
+
f"h_{i+1} = \\text{{ReLU}}(z_{i+1}) = \\max(0, {self.z1[i]:.4f}) = {self.a1[i]:.4f}",
|
|
645
|
+
inline=False
|
|
646
|
+
))
|
|
647
|
+
else:
|
|
648
|
+
explanation.add_element(ContentAST.Equation(
|
|
649
|
+
f"h_{i+1} = z_{i+1} = {self.a1[i]:.4f}",
|
|
650
|
+
inline=False
|
|
651
|
+
))
|
|
652
|
+
|
|
653
|
+
# Output layer
|
|
654
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
655
|
+
"**Step 3: Calculate output (with linear activation)**"
|
|
656
|
+
]))
|
|
657
|
+
|
|
658
|
+
terms = []
|
|
659
|
+
for j in range(self.num_hidden):
|
|
660
|
+
terms.append(f"({self.W2[0,j]:.1f})({self.a1[j]:.4f})")
|
|
661
|
+
|
|
662
|
+
z_out_calc = " + ".join(terms)
|
|
663
|
+
if self.use_bias:
|
|
664
|
+
z_out_calc += f" + {self.b2[0]:.1f}"
|
|
665
|
+
|
|
666
|
+
explanation.add_element(ContentAST.Equation(
|
|
667
|
+
f"z_{{out}} = {z_out_calc} = {self.z2[0]:.4f}",
|
|
668
|
+
inline=False
|
|
669
|
+
))
|
|
670
|
+
|
|
671
|
+
explanation.add_element(ContentAST.Equation(
|
|
672
|
+
f"\\hat{{y}} = f(z_{{out}}) = z_{{out}} = {self.a2[0]:.4f}",
|
|
673
|
+
inline=False
|
|
674
|
+
))
|
|
675
|
+
|
|
676
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
677
|
+
"(Note: The output layer uses linear activation, so the output can be any real number)"
|
|
678
|
+
]))
|
|
679
|
+
|
|
680
|
+
return explanation
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
@QuestionRegistry.register()
|
|
684
|
+
class BackpropGradientQuestion(SimpleNeuralNetworkBase):
|
|
685
|
+
"""
|
|
686
|
+
Question asking students to calculate gradients using backpropagation.
|
|
687
|
+
|
|
688
|
+
Given a completed forward pass, students calculate:
|
|
689
|
+
- Gradients for multiple specific weights (∂L/∂w)
|
|
690
|
+
"""
|
|
691
|
+
|
|
692
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
693
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
694
|
+
|
|
695
|
+
# Generate network
|
|
696
|
+
self._generate_network()
|
|
697
|
+
self._select_activation_function()
|
|
698
|
+
|
|
699
|
+
# Run forward pass
|
|
700
|
+
self._forward_pass()
|
|
701
|
+
|
|
702
|
+
# Generate target and compute loss
|
|
703
|
+
# Target should be different from output to create meaningful gradients
|
|
704
|
+
self.y_target = float(self.a2[0] + self.rng.uniform(1, 3) * self.rng.choice([-1, 1]))
|
|
705
|
+
# Round target to display precision (2 decimal places)
|
|
706
|
+
self.y_target = round(self.y_target, 2)
|
|
707
|
+
self._compute_loss(self.y_target)
|
|
708
|
+
# Round loss to display precision (4 decimal places)
|
|
709
|
+
self.loss = round(self.loss, 4)
|
|
710
|
+
self._compute_output_gradient()
|
|
711
|
+
|
|
712
|
+
# Create answer fields for specific weight gradients
|
|
713
|
+
self._create_answers()
|
|
714
|
+
|
|
715
|
+
def _create_answers(self):
|
|
716
|
+
"""Create answer fields for weight gradients."""
|
|
717
|
+
self.answers = {}
|
|
718
|
+
|
|
719
|
+
# Ask for gradients of 2-3 weights
|
|
720
|
+
# Include at least one from each layer
|
|
721
|
+
|
|
722
|
+
# Gradient for W2 (hidden to output)
|
|
723
|
+
for i in range(self.num_hidden):
|
|
724
|
+
key = f"dL_dw2_{i}"
|
|
725
|
+
self.answers[key] = Answer.auto_float(key, self._compute_gradient_W2(i))
|
|
726
|
+
|
|
727
|
+
# Gradient for W1 (input to hidden) - pick first hidden neuron
|
|
728
|
+
for j in range(self.num_inputs):
|
|
729
|
+
key = f"dL_dw1_0{j}"
|
|
730
|
+
self.answers[key] = Answer.auto_float(key, self._compute_gradient_W1(0, j))
|
|
731
|
+
|
|
732
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
733
|
+
body = ContentAST.Section()
|
|
734
|
+
|
|
735
|
+
# Question description
|
|
736
|
+
body.add_element(ContentAST.Paragraph([
|
|
737
|
+
f"Given the neural network below with {self._get_activation_name()} activation "
|
|
738
|
+
f"in the hidden layer, a forward pass has been completed with the values shown. "
|
|
739
|
+
f"Calculate the gradients (∂L/∂w) for the specified weights using backpropagation."
|
|
740
|
+
]))
|
|
741
|
+
|
|
742
|
+
# Network diagram
|
|
743
|
+
body.add_element(
|
|
744
|
+
ContentAST.Picture(
|
|
745
|
+
img_data=self._generate_network_diagram(show_weights=True, show_activations=False),
|
|
746
|
+
caption=f"Neural network architecture"
|
|
747
|
+
)
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
# Network parameters and forward pass results table
|
|
751
|
+
body.add_element(self._generate_parameter_table(include_activations=True, include_training_context=True))
|
|
752
|
+
|
|
753
|
+
# Activation function
|
|
754
|
+
body.add_element(ContentAST.Paragraph([
|
|
755
|
+
f"**Activation function:** {self._get_activation_name()}"
|
|
756
|
+
]))
|
|
757
|
+
|
|
758
|
+
body.add_element(ContentAST.Paragraph([
|
|
759
|
+
"**Calculate the following gradients:**"
|
|
760
|
+
]))
|
|
761
|
+
|
|
762
|
+
# Create answer block
|
|
763
|
+
answers = []
|
|
764
|
+
|
|
765
|
+
# W2 gradients
|
|
766
|
+
for i in range(self.num_hidden):
|
|
767
|
+
answers.append(
|
|
768
|
+
ContentAST.Answer(
|
|
769
|
+
answer=self.answers[f"dL_dw2_{i}"],
|
|
770
|
+
label=f"∂L/∂w_{i+3} (weight from h_{i+1} to output)"
|
|
771
|
+
)
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
# W1 gradients (first hidden neuron)
|
|
775
|
+
for j in range(self.num_inputs):
|
|
776
|
+
answers.append(
|
|
777
|
+
ContentAST.Answer(
|
|
778
|
+
answer=self.answers[f"dL_dw1_0{j}"],
|
|
779
|
+
label=f"∂L/∂w_1{j+1} (weight from x_{j+1} to h_1)"
|
|
780
|
+
)
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
body.add_element(ContentAST.AnswerBlock(answers))
|
|
784
|
+
|
|
785
|
+
return body
|
|
786
|
+
|
|
787
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
788
|
+
explanation = ContentAST.Section()
|
|
789
|
+
|
|
790
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
791
|
+
"To solve this problem, we use the chain rule to compute gradients via backpropagation."
|
|
792
|
+
]))
|
|
793
|
+
|
|
794
|
+
# Output layer gradient
|
|
795
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
796
|
+
"**Step 1: Compute output layer gradient**"
|
|
797
|
+
]))
|
|
798
|
+
|
|
799
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
800
|
+
"For MSE loss with linear output activation:"
|
|
801
|
+
]))
|
|
802
|
+
|
|
803
|
+
explanation.add_element(ContentAST.Equation(
|
|
804
|
+
f"\\frac{{\\partial L}}{{\\partial \\hat{{y}}}} = -(y - \\hat{{y}}) = -({self.y_target:.2f} - {self.a2[0]:.4f}) = {self.dL_da2:.4f}",
|
|
805
|
+
inline=False
|
|
806
|
+
))
|
|
807
|
+
|
|
808
|
+
# W2 gradients
|
|
809
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
810
|
+
"**Step 2: Gradients for hidden-to-output weights**"
|
|
811
|
+
]))
|
|
812
|
+
|
|
813
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
814
|
+
"Using the chain rule:"
|
|
815
|
+
]))
|
|
816
|
+
|
|
817
|
+
for i in range(self.num_hidden):
|
|
818
|
+
grad = self._compute_gradient_W2(i)
|
|
819
|
+
explanation.add_element(ContentAST.Equation(
|
|
820
|
+
f"\\frac{{\\partial L}}{{\\partial w_{i+3}}} = \\frac{{\\partial L}}{{\\partial \\hat{{y}}}} \\cdot \\frac{{\\partial \\hat{{y}}}}{{\\partial w_{i+3}}} = {self.dL_da2:.4f} \\cdot {self.a1[i]:.4f} = {grad:.4f}",
|
|
821
|
+
inline=False
|
|
822
|
+
))
|
|
823
|
+
|
|
824
|
+
# W1 gradients
|
|
825
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
826
|
+
"**Step 3: Gradients for input-to-hidden weights**"
|
|
827
|
+
]))
|
|
828
|
+
|
|
829
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
830
|
+
"First, compute the gradient flowing back to hidden layer:"
|
|
831
|
+
]))
|
|
832
|
+
|
|
833
|
+
for j in range(self.num_inputs):
|
|
834
|
+
# Compute intermediate values
|
|
835
|
+
dz2_da1 = self.W2[0, 0]
|
|
836
|
+
da1_dz1 = self._activation_derivative(self.z1[0])
|
|
837
|
+
dL_dz1 = self.dL_dz2 * dz2_da1 * da1_dz1
|
|
838
|
+
|
|
839
|
+
grad = self._compute_gradient_W1(0, j)
|
|
840
|
+
|
|
841
|
+
if self.activation_function == self.ACTIVATION_SIGMOID:
|
|
842
|
+
act_deriv_str = f"\\sigma(z_1)(1-\\sigma(z_1)) = {self.a1[0]:.4f}(1-{self.a1[0]:.4f}) = {da1_dz1:.4f}"
|
|
843
|
+
elif self.activation_function == self.ACTIVATION_RELU:
|
|
844
|
+
act_deriv_str = f"\\mathbb{{1}}(z_1 > 0) = {da1_dz1:.4f}"
|
|
845
|
+
else:
|
|
846
|
+
act_deriv_str = f"1"
|
|
847
|
+
|
|
848
|
+
explanation.add_element(ContentAST.Equation(
|
|
849
|
+
f"\\frac{{\\partial L}}{{\\partial w_{{1{j+1}}}}} = \\frac{{\\partial L}}{{\\partial \\hat{{y}}}} \\cdot w_{3} \\cdot {act_deriv_str} \\cdot x_{j+1} = {self.dL_da2:.4f} \\cdot {dz2_da1:.4f} \\cdot {da1_dz1:.4f} \\cdot {self.X[j]:.1f} = {grad:.4f}",
|
|
850
|
+
inline=False
|
|
851
|
+
))
|
|
852
|
+
|
|
853
|
+
return explanation
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
@QuestionRegistry.register()
|
|
857
|
+
class EnsembleAveragingQuestion(Question):
|
|
858
|
+
"""
|
|
859
|
+
Question asking students to combine predictions from multiple models (ensemble).
|
|
860
|
+
|
|
861
|
+
Students calculate:
|
|
862
|
+
- Mean prediction (for regression)
|
|
863
|
+
- Optionally: variance or other statistics
|
|
864
|
+
"""
|
|
865
|
+
|
|
866
|
+
def __init__(self, *args, **kwargs):
|
|
867
|
+
kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
|
|
868
|
+
super().__init__(*args, **kwargs)
|
|
869
|
+
|
|
870
|
+
self.num_models = kwargs.get("num_models", 5)
|
|
871
|
+
self.predictions = None
|
|
872
|
+
|
|
873
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
874
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
875
|
+
|
|
876
|
+
# Generate predictions from multiple models
|
|
877
|
+
# Use a range that makes sense for typical regression problems
|
|
878
|
+
base_value = self.rng.uniform(0, 10)
|
|
879
|
+
self.predictions = [
|
|
880
|
+
base_value + self.rng.uniform(-2, 2)
|
|
881
|
+
for _ in range(self.num_models)
|
|
882
|
+
]
|
|
883
|
+
|
|
884
|
+
# Round to make calculations easier
|
|
885
|
+
self.predictions = [round(p, 1) for p in self.predictions]
|
|
886
|
+
|
|
887
|
+
# Create answers
|
|
888
|
+
self._create_answers()
|
|
889
|
+
|
|
890
|
+
def _create_answers(self):
|
|
891
|
+
"""Create answer fields for ensemble statistics."""
|
|
892
|
+
self.answers = {}
|
|
893
|
+
|
|
894
|
+
# Mean prediction
|
|
895
|
+
mean_pred = np.mean(self.predictions)
|
|
896
|
+
self.answers["mean"] = Answer.float_value("mean", float(mean_pred))
|
|
897
|
+
|
|
898
|
+
# Median (optional, but useful)
|
|
899
|
+
median_pred = np.median(self.predictions)
|
|
900
|
+
self.answers["median"] = Answer.float_value("median", float(median_pred))
|
|
901
|
+
|
|
902
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
903
|
+
body = ContentAST.Section()
|
|
904
|
+
|
|
905
|
+
# Question description
|
|
906
|
+
body.add_element(ContentAST.Paragraph([
|
|
907
|
+
f"You have trained {self.num_models} different regression models on the same dataset. "
|
|
908
|
+
f"For a particular test input, each model produces the following predictions:"
|
|
909
|
+
]))
|
|
910
|
+
|
|
911
|
+
# Show predictions
|
|
912
|
+
pred_list = ", ".join([f"{p:.1f}" for p in self.predictions])
|
|
913
|
+
body.add_element(ContentAST.Paragraph([
|
|
914
|
+
f"Model predictions: {pred_list}"
|
|
915
|
+
]))
|
|
916
|
+
|
|
917
|
+
# Question
|
|
918
|
+
body.add_element(ContentAST.Paragraph([
|
|
919
|
+
"To create an ensemble, calculate the combined prediction using the following methods:"
|
|
920
|
+
]))
|
|
921
|
+
|
|
922
|
+
# Create answer block
|
|
923
|
+
answers = []
|
|
924
|
+
answers.append(
|
|
925
|
+
ContentAST.Answer(
|
|
926
|
+
answer=self.answers["mean"],
|
|
927
|
+
label="Mean (average)"
|
|
928
|
+
)
|
|
929
|
+
)
|
|
930
|
+
answers.append(
|
|
931
|
+
ContentAST.Answer(
|
|
932
|
+
answer=self.answers["median"],
|
|
933
|
+
label="Median"
|
|
934
|
+
)
|
|
935
|
+
)
|
|
936
|
+
|
|
937
|
+
body.add_element(ContentAST.AnswerBlock(answers))
|
|
938
|
+
|
|
939
|
+
return body
|
|
940
|
+
|
|
941
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
942
|
+
explanation = ContentAST.Section()
|
|
943
|
+
|
|
944
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
945
|
+
"Ensemble methods combine predictions from multiple models to create a more robust prediction."
|
|
946
|
+
]))
|
|
947
|
+
|
|
948
|
+
# Mean calculation
|
|
949
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
950
|
+
"**Mean (Bagging approach):**"
|
|
951
|
+
]))
|
|
952
|
+
|
|
953
|
+
pred_sum = " + ".join([f"{p:.1f}" for p in self.predictions])
|
|
954
|
+
mean_val = np.mean(self.predictions)
|
|
955
|
+
|
|
956
|
+
explanation.add_element(ContentAST.Equation(
|
|
957
|
+
f"\\text{{mean}} = \\frac{{{pred_sum}}}{{{self.num_models}}} = \\frac{{{sum(self.predictions):.1f}}}{{{self.num_models}}} = {mean_val:.4f}",
|
|
958
|
+
inline=False
|
|
959
|
+
))
|
|
960
|
+
|
|
961
|
+
# Median calculation
|
|
962
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
963
|
+
"**Median:**"
|
|
964
|
+
]))
|
|
965
|
+
|
|
966
|
+
sorted_preds = sorted(self.predictions)
|
|
967
|
+
sorted_str = ", ".join([f"{p:.1f}" for p in sorted_preds])
|
|
968
|
+
median_val = np.median(self.predictions)
|
|
969
|
+
|
|
970
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
971
|
+
f"Sorted predictions: {sorted_str}"
|
|
972
|
+
]))
|
|
973
|
+
|
|
974
|
+
if self.num_models % 2 == 1:
|
|
975
|
+
mid_idx = self.num_models // 2
|
|
976
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
977
|
+
f"Middle value (position {mid_idx + 1}): {median_val:.1f}"
|
|
978
|
+
]))
|
|
979
|
+
else:
|
|
980
|
+
mid_idx1 = self.num_models // 2 - 1
|
|
981
|
+
mid_idx2 = self.num_models // 2
|
|
982
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
983
|
+
f"Average of middle two values (positions {mid_idx1 + 1} and {mid_idx2 + 1}): "
|
|
984
|
+
f"({sorted_preds[mid_idx1]:.1f} + {sorted_preds[mid_idx2]:.1f}) / 2 = {median_val:.1f}"
|
|
985
|
+
]))
|
|
986
|
+
|
|
987
|
+
return explanation
|
|
988
|
+
|
|
989
|
+
|
|
990
|
+
@QuestionRegistry.register()
|
|
991
|
+
class EndToEndTrainingQuestion(SimpleNeuralNetworkBase):
|
|
992
|
+
"""
|
|
993
|
+
End-to-end training step question.
|
|
994
|
+
|
|
995
|
+
Students perform a complete training iteration:
|
|
996
|
+
1. Forward pass → prediction
|
|
997
|
+
2. Loss calculation (MSE)
|
|
998
|
+
3. Backpropagation → gradients for specific weights
|
|
999
|
+
4. Weight update → new weight values
|
|
1000
|
+
"""
|
|
1001
|
+
|
|
1002
|
+
def __init__(self, *args, **kwargs):
|
|
1003
|
+
super().__init__(*args, **kwargs)
|
|
1004
|
+
self.learning_rate = None
|
|
1005
|
+
self.new_W1 = None
|
|
1006
|
+
self.new_W2 = None
|
|
1007
|
+
|
|
1008
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
1009
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
1010
|
+
|
|
1011
|
+
# Generate network
|
|
1012
|
+
self._generate_network()
|
|
1013
|
+
self._select_activation_function()
|
|
1014
|
+
|
|
1015
|
+
# Run forward pass
|
|
1016
|
+
self._forward_pass()
|
|
1017
|
+
|
|
1018
|
+
# Generate target and compute loss
|
|
1019
|
+
self.y_target = float(self.a2[0] + self.rng.uniform(1, 3) * self.rng.choice([-1, 1]))
|
|
1020
|
+
# Round target to display precision (2 decimal places)
|
|
1021
|
+
self.y_target = round(self.y_target, 2)
|
|
1022
|
+
self._compute_loss(self.y_target)
|
|
1023
|
+
# Round loss to display precision (4 decimal places)
|
|
1024
|
+
self.loss = round(self.loss, 4)
|
|
1025
|
+
self._compute_output_gradient()
|
|
1026
|
+
|
|
1027
|
+
# Set learning rate (use small value for stability)
|
|
1028
|
+
self.learning_rate = round(self.rng.uniform(0.05, 0.2), 2)
|
|
1029
|
+
|
|
1030
|
+
# Compute updated weights
|
|
1031
|
+
self._compute_weight_updates()
|
|
1032
|
+
|
|
1033
|
+
# Create answers
|
|
1034
|
+
self._create_answers()
|
|
1035
|
+
|
|
1036
|
+
def _compute_weight_updates(self):
|
|
1037
|
+
"""Compute new weights after gradient descent step."""
|
|
1038
|
+
# Update W2
|
|
1039
|
+
self.new_W2 = np.copy(self.W2)
|
|
1040
|
+
for i in range(self.num_hidden):
|
|
1041
|
+
grad = self._compute_gradient_W2(i)
|
|
1042
|
+
self.new_W2[0, i] = self.W2[0, i] - self.learning_rate * grad
|
|
1043
|
+
|
|
1044
|
+
# Update W1 (first hidden neuron only for simplicity)
|
|
1045
|
+
self.new_W1 = np.copy(self.W1)
|
|
1046
|
+
for j in range(self.num_inputs):
|
|
1047
|
+
grad = self._compute_gradient_W1(0, j)
|
|
1048
|
+
self.new_W1[0, j] = self.W1[0, j] - self.learning_rate * grad
|
|
1049
|
+
|
|
1050
|
+
def _create_answers(self):
|
|
1051
|
+
"""Create answer fields for all steps."""
|
|
1052
|
+
self.answers = {}
|
|
1053
|
+
|
|
1054
|
+
# Forward pass answers
|
|
1055
|
+
self.answers["y_pred"] = Answer.float_value("y_pred", float(self.a2[0]))
|
|
1056
|
+
|
|
1057
|
+
# Loss answer
|
|
1058
|
+
self.answers["loss"] = Answer.float_value("loss", float(self.loss))
|
|
1059
|
+
|
|
1060
|
+
# Gradient answers (for key weights)
|
|
1061
|
+
self.answers["grad_w3"] = Answer.auto_float("grad_w3", self._compute_gradient_W2(0))
|
|
1062
|
+
self.answers["grad_w11"] = Answer.auto_float("grad_w11", self._compute_gradient_W1(0, 0))
|
|
1063
|
+
|
|
1064
|
+
# Updated weight answers
|
|
1065
|
+
self.answers["new_w3"] = Answer.float_value("new_w3", float(self.new_W2[0, 0]))
|
|
1066
|
+
self.answers["new_w11"] = Answer.float_value("new_w11", float(self.new_W1[0, 0]))
|
|
1067
|
+
|
|
1068
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
1069
|
+
body = ContentAST.Section()
|
|
1070
|
+
|
|
1071
|
+
# Question description
|
|
1072
|
+
body.add_element(ContentAST.Paragraph([
|
|
1073
|
+
f"Given the neural network below, perform one complete training step (forward pass, "
|
|
1074
|
+
f"loss calculation, backpropagation, and weight update) for the given input and target."
|
|
1075
|
+
]))
|
|
1076
|
+
|
|
1077
|
+
# Network diagram
|
|
1078
|
+
body.add_element(
|
|
1079
|
+
ContentAST.Picture(
|
|
1080
|
+
img_data=self._generate_network_diagram(show_weights=True, show_activations=False),
|
|
1081
|
+
caption=f"Neural network (before training)"
|
|
1082
|
+
)
|
|
1083
|
+
)
|
|
1084
|
+
|
|
1085
|
+
# Training parameters
|
|
1086
|
+
body.add_element(ContentAST.Paragraph([
|
|
1087
|
+
"**Training parameters:**"
|
|
1088
|
+
]))
|
|
1089
|
+
|
|
1090
|
+
body.add_element(ContentAST.Paragraph([
|
|
1091
|
+
"Input: ",
|
|
1092
|
+
ContentAST.Equation(f"x_1 = {self.X[0]:.1f}", inline=True),
|
|
1093
|
+
", ",
|
|
1094
|
+
ContentAST.Equation(f"x_2 = {self.X[1]:.1f}", inline=True)
|
|
1095
|
+
]))
|
|
1096
|
+
|
|
1097
|
+
body.add_element(ContentAST.Paragraph([
|
|
1098
|
+
"Target: ",
|
|
1099
|
+
ContentAST.Equation(f"y = {self.y_target:.2f}", inline=True)
|
|
1100
|
+
]))
|
|
1101
|
+
|
|
1102
|
+
body.add_element(ContentAST.Paragraph([
|
|
1103
|
+
"Learning rate: ",
|
|
1104
|
+
ContentAST.Equation(f"\\alpha = {self.learning_rate}", inline=True)
|
|
1105
|
+
]))
|
|
1106
|
+
|
|
1107
|
+
body.add_element(ContentAST.Paragraph([
|
|
1108
|
+
f"**Activation function:** {self._get_activation_name()}"
|
|
1109
|
+
]))
|
|
1110
|
+
|
|
1111
|
+
body.add_element(ContentAST.Paragraph([
|
|
1112
|
+
"**Complete the following training steps:**"
|
|
1113
|
+
]))
|
|
1114
|
+
|
|
1115
|
+
# Network parameters table
|
|
1116
|
+
body.add_element(self._generate_parameter_table(include_activations=False))
|
|
1117
|
+
|
|
1118
|
+
# Create answer block
|
|
1119
|
+
answers = []
|
|
1120
|
+
|
|
1121
|
+
answers.append(
|
|
1122
|
+
ContentAST.Answer(
|
|
1123
|
+
answer=self.answers["y_pred"],
|
|
1124
|
+
label="1. Forward Pass - Network output ŷ"
|
|
1125
|
+
)
|
|
1126
|
+
)
|
|
1127
|
+
|
|
1128
|
+
answers.append(
|
|
1129
|
+
ContentAST.Answer(
|
|
1130
|
+
answer=self.answers["loss"],
|
|
1131
|
+
label="2. Loss - MSE: L = (1/2)(y - ŷ)²"
|
|
1132
|
+
)
|
|
1133
|
+
)
|
|
1134
|
+
|
|
1135
|
+
answers.append(
|
|
1136
|
+
ContentAST.Answer(
|
|
1137
|
+
answer=self.answers["grad_w3"],
|
|
1138
|
+
label="3. Gradient ∂L/∂w₃ (weight h₁ → ŷ)"
|
|
1139
|
+
)
|
|
1140
|
+
)
|
|
1141
|
+
|
|
1142
|
+
answers.append(
|
|
1143
|
+
ContentAST.Answer(
|
|
1144
|
+
answer=self.answers["grad_w11"],
|
|
1145
|
+
label="4. Gradient ∂L/∂w₁₁ (weight x₁ → h₁)"
|
|
1146
|
+
)
|
|
1147
|
+
)
|
|
1148
|
+
|
|
1149
|
+
answers.append(
|
|
1150
|
+
ContentAST.Answer(
|
|
1151
|
+
answer=self.answers["new_w3"],
|
|
1152
|
+
label="5. Updated w₃: w₃' = w₃ - α(∂L/∂w₃)"
|
|
1153
|
+
)
|
|
1154
|
+
)
|
|
1155
|
+
|
|
1156
|
+
answers.append(
|
|
1157
|
+
ContentAST.Answer(
|
|
1158
|
+
answer=self.answers["new_w11"],
|
|
1159
|
+
label="6. Updated w₁₁: w₁₁' = w₁₁ - α(∂L/∂w₁₁)"
|
|
1160
|
+
)
|
|
1161
|
+
)
|
|
1162
|
+
|
|
1163
|
+
body.add_element(ContentAST.AnswerBlock(answers))
|
|
1164
|
+
|
|
1165
|
+
return body
|
|
1166
|
+
|
|
1167
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
1168
|
+
explanation = ContentAST.Section()
|
|
1169
|
+
|
|
1170
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1171
|
+
"This problem requires performing one complete training iteration. Let's go through each step."
|
|
1172
|
+
]))
|
|
1173
|
+
|
|
1174
|
+
# Step 1: Forward pass
|
|
1175
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1176
|
+
"**Step 1: Forward Pass**"
|
|
1177
|
+
]))
|
|
1178
|
+
|
|
1179
|
+
# Hidden layer
|
|
1180
|
+
z1_0 = self.W1[0, 0] * self.X[0] + self.W1[0, 1] * self.X[1] + self.b1[0]
|
|
1181
|
+
explanation.add_element(ContentAST.Equation(
|
|
1182
|
+
f"z_1 = w_{{11}} x_1 + w_{{12}} x_2 + b_1 = {self.W1[0,0]:.1f} \\cdot {self.X[0]:.1f} + {self.W1[0,1]:.1f} \\cdot {self.X[1]:.1f} + {self.b1[0]:.1f} = {self.z1[0]:.4f}",
|
|
1183
|
+
inline=False
|
|
1184
|
+
))
|
|
1185
|
+
|
|
1186
|
+
explanation.add_element(ContentAST.Equation(
|
|
1187
|
+
f"h_1 = {self._get_activation_name()}(z_1) = {self.a1[0]:.4f}",
|
|
1188
|
+
inline=False
|
|
1189
|
+
))
|
|
1190
|
+
|
|
1191
|
+
# Similarly for h2 (abbreviated)
|
|
1192
|
+
explanation.add_element(ContentAST.Equation(
|
|
1193
|
+
f"h_2 = {self.a1[1]:.4f} \\text{{ (calculated similarly)}}",
|
|
1194
|
+
inline=False
|
|
1195
|
+
))
|
|
1196
|
+
|
|
1197
|
+
# Output
|
|
1198
|
+
z2 = self.W2[0, 0] * self.a1[0] + self.W2[0, 1] * self.a1[1] + self.b2[0]
|
|
1199
|
+
explanation.add_element(ContentAST.Equation(
|
|
1200
|
+
f"\\hat{{y}} = w_3 h_1 + w_4 h_2 + b_2 = {self.W2[0,0]:.1f} \\cdot {self.a1[0]:.4f} + {self.W2[0,1]:.1f} \\cdot {self.a1[1]:.4f} + {self.b2[0]:.1f} = {self.a2[0]:.4f}",
|
|
1201
|
+
inline=False
|
|
1202
|
+
))
|
|
1203
|
+
|
|
1204
|
+
# Step 2: Loss
|
|
1205
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1206
|
+
"**Step 2: Calculate Loss**"
|
|
1207
|
+
]))
|
|
1208
|
+
|
|
1209
|
+
explanation.add_element(ContentAST.Equation(
|
|
1210
|
+
f"L = \\frac{{1}}{{2}}(y - \\hat{{y}})^2 = \\frac{{1}}{{2}}({self.y_target:.2f} - {self.a2[0]:.4f})^2 = {self.loss:.4f}",
|
|
1211
|
+
inline=False
|
|
1212
|
+
))
|
|
1213
|
+
|
|
1214
|
+
# Step 3: Gradients
|
|
1215
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1216
|
+
"**Step 3: Compute Gradients**"
|
|
1217
|
+
]))
|
|
1218
|
+
|
|
1219
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1220
|
+
"Loss gradient:"
|
|
1221
|
+
]))
|
|
1222
|
+
|
|
1223
|
+
explanation.add_element(ContentAST.Equation(
|
|
1224
|
+
f"\\frac{{\\partial L}}{{\\partial \\hat{{y}}}} = -(y - \\hat{{y}}) = {self.dL_da2:.4f}",
|
|
1225
|
+
inline=False
|
|
1226
|
+
))
|
|
1227
|
+
|
|
1228
|
+
grad_w3 = self._compute_gradient_W2(0)
|
|
1229
|
+
explanation.add_element(ContentAST.Equation(
|
|
1230
|
+
f"\\frac{{\\partial L}}{{\\partial w_3}} = \\frac{{\\partial L}}{{\\partial \\hat{{y}}}} \\cdot h_1 = {self.dL_da2:.4f} \\cdot {self.a1[0]:.4f} = {grad_w3:.4f}",
|
|
1231
|
+
inline=False
|
|
1232
|
+
))
|
|
1233
|
+
|
|
1234
|
+
grad_w11 = self._compute_gradient_W1(0, 0)
|
|
1235
|
+
dz2_da1 = self.W2[0, 0]
|
|
1236
|
+
da1_dz1 = self._activation_derivative(self.z1[0])
|
|
1237
|
+
|
|
1238
|
+
explanation.add_element(ContentAST.Equation(
|
|
1239
|
+
f"\\frac{{\\partial L}}{{\\partial w_{{11}}}} = \\frac{{\\partial L}}{{\\partial \\hat{{y}}}} \\cdot w_3 \\cdot \\sigma'(z_1) \\cdot x_1 = {self.dL_da2:.4f} \\cdot {dz2_da1:.4f} \\cdot {da1_dz1:.4f} \\cdot {self.X[0]:.1f} = {grad_w11:.4f}",
|
|
1240
|
+
inline=False
|
|
1241
|
+
))
|
|
1242
|
+
|
|
1243
|
+
# Step 4: Weight updates
|
|
1244
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1245
|
+
"**Step 4: Update Weights**"
|
|
1246
|
+
]))
|
|
1247
|
+
|
|
1248
|
+
new_w3 = self.new_W2[0, 0]
|
|
1249
|
+
explanation.add_element(ContentAST.Equation(
|
|
1250
|
+
f"w_3^{{new}} = w_3 - \\alpha \\frac{{\\partial L}}{{\\partial w_3}} = {self.W2[0,0]:.1f} - {self.learning_rate} \\cdot {grad_w3:.4f} = {new_w3:.4f}",
|
|
1251
|
+
inline=False
|
|
1252
|
+
))
|
|
1253
|
+
|
|
1254
|
+
new_w11 = self.new_W1[0, 0]
|
|
1255
|
+
explanation.add_element(ContentAST.Equation(
|
|
1256
|
+
f"w_{{11}}^{{new}} = w_{{11}} - \\alpha \\frac{{\\partial L}}{{\\partial w_{{11}}}} = {self.W1[0,0]:.1f} - {self.learning_rate} \\cdot {grad_w11:.4f} = {new_w11:.4f}",
|
|
1257
|
+
inline=False
|
|
1258
|
+
))
|
|
1259
|
+
|
|
1260
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1261
|
+
"These updated weights would be used in the next training iteration."
|
|
1262
|
+
]))
|
|
1263
|
+
|
|
1264
|
+
return explanation
|