QuizGenerator 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuizGenerator/README.md +5 -0
- QuizGenerator/__init__.py +27 -0
- QuizGenerator/__main__.py +7 -0
- QuizGenerator/canvas/__init__.py +13 -0
- QuizGenerator/canvas/canvas_interface.py +627 -0
- QuizGenerator/canvas/classes.py +235 -0
- QuizGenerator/constants.py +149 -0
- QuizGenerator/contentast.py +1955 -0
- QuizGenerator/generate.py +253 -0
- QuizGenerator/logging.yaml +55 -0
- QuizGenerator/misc.py +579 -0
- QuizGenerator/mixins.py +548 -0
- QuizGenerator/performance.py +202 -0
- QuizGenerator/premade_questions/__init__.py +0 -0
- QuizGenerator/premade_questions/basic.py +103 -0
- QuizGenerator/premade_questions/cst334/__init__.py +1 -0
- QuizGenerator/premade_questions/cst334/languages.py +391 -0
- QuizGenerator/premade_questions/cst334/math_questions.py +297 -0
- QuizGenerator/premade_questions/cst334/memory_questions.py +1400 -0
- QuizGenerator/premade_questions/cst334/ostep13_vsfs.py +572 -0
- QuizGenerator/premade_questions/cst334/persistence_questions.py +451 -0
- QuizGenerator/premade_questions/cst334/process.py +648 -0
- QuizGenerator/premade_questions/cst463/__init__.py +0 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/__init__.py +3 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_calculation.py +369 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_descent_questions.py +305 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/loss_calculations.py +650 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/misc.py +73 -0
- QuizGenerator/premade_questions/cst463/math_and_data/__init__.py +2 -0
- QuizGenerator/premade_questions/cst463/math_and_data/matrix_questions.py +631 -0
- QuizGenerator/premade_questions/cst463/math_and_data/vector_questions.py +534 -0
- QuizGenerator/premade_questions/cst463/models/__init__.py +0 -0
- QuizGenerator/premade_questions/cst463/models/attention.py +192 -0
- QuizGenerator/premade_questions/cst463/models/cnns.py +186 -0
- QuizGenerator/premade_questions/cst463/models/matrices.py +24 -0
- QuizGenerator/premade_questions/cst463/models/rnns.py +202 -0
- QuizGenerator/premade_questions/cst463/models/text.py +203 -0
- QuizGenerator/premade_questions/cst463/models/weight_counting.py +227 -0
- QuizGenerator/premade_questions/cst463/neural-network-basics/__init__.py +6 -0
- QuizGenerator/premade_questions/cst463/neural-network-basics/neural_network_questions.py +1314 -0
- QuizGenerator/premade_questions/cst463/tensorflow-intro/__init__.py +6 -0
- QuizGenerator/premade_questions/cst463/tensorflow-intro/tensorflow_questions.py +936 -0
- QuizGenerator/qrcode_generator.py +293 -0
- QuizGenerator/question.py +715 -0
- QuizGenerator/quiz.py +467 -0
- QuizGenerator/regenerate.py +472 -0
- QuizGenerator/typst_utils.py +113 -0
- quizgenerator-0.4.2.dist-info/METADATA +265 -0
- quizgenerator-0.4.2.dist-info/RECORD +52 -0
- quizgenerator-0.4.2.dist-info/WHEEL +4 -0
- quizgenerator-0.4.2.dist-info/entry_points.txt +3 -0
- quizgenerator-0.4.2.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,1314 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import io
|
|
5
|
+
import logging
|
|
6
|
+
import math
|
|
7
|
+
import numpy as np
|
|
8
|
+
import uuid
|
|
9
|
+
import os
|
|
10
|
+
from typing import List, Tuple, Dict, Any
|
|
11
|
+
|
|
12
|
+
import matplotlib.pyplot as plt
|
|
13
|
+
import matplotlib.patches as mpatches
|
|
14
|
+
|
|
15
|
+
from QuizGenerator.contentast import ContentAST
|
|
16
|
+
from QuizGenerator.question import Question, Answer, QuestionRegistry
|
|
17
|
+
from QuizGenerator.mixins import TableQuestionMixin, BodyTemplatesMixin
|
|
18
|
+
from ..models.matrices import MatrixQuestion
|
|
19
|
+
|
|
20
|
+
log = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SimpleNeuralNetworkBase(MatrixQuestion, abc.ABC):
|
|
24
|
+
"""
|
|
25
|
+
Base class for simple neural network questions.
|
|
26
|
+
|
|
27
|
+
Generates a small feedforward network:
|
|
28
|
+
- 2-3 input neurons
|
|
29
|
+
- 2 hidden neurons (single hidden layer)
|
|
30
|
+
- 1 output neuron
|
|
31
|
+
- Random weights and biases
|
|
32
|
+
- Runs forward pass and stores all activations
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
# Activation function types
|
|
36
|
+
ACTIVATION_SIGMOID = "sigmoid"
|
|
37
|
+
ACTIVATION_RELU = "relu"
|
|
38
|
+
ACTIVATION_LINEAR = "linear"
|
|
39
|
+
|
|
40
|
+
def __init__(self, *args, **kwargs):
|
|
41
|
+
kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
|
|
42
|
+
super().__init__(*args, **kwargs)
|
|
43
|
+
|
|
44
|
+
# Network architecture parameters
|
|
45
|
+
self.num_inputs = kwargs.get("num_inputs", 2)
|
|
46
|
+
self.num_hidden = kwargs.get("num_hidden", 2)
|
|
47
|
+
self.num_outputs = kwargs.get("num_outputs", 1)
|
|
48
|
+
|
|
49
|
+
# Configuration
|
|
50
|
+
self.activation_function = None
|
|
51
|
+
self.use_bias = kwargs.get("use_bias", True)
|
|
52
|
+
self.param_digits = kwargs.get("param_digits", 1) # Precision for weights/biases
|
|
53
|
+
|
|
54
|
+
# Network parameters (weights and biases)
|
|
55
|
+
self.W1 = None # Input to hidden weights (num_hidden x num_inputs)
|
|
56
|
+
self.b1 = None # Hidden layer biases (num_hidden,)
|
|
57
|
+
self.W2 = None # Hidden to output weights (num_outputs x num_hidden)
|
|
58
|
+
self.b2 = None # Output layer biases (num_outputs,)
|
|
59
|
+
|
|
60
|
+
# Input data and forward pass results
|
|
61
|
+
self.X = None # Input values (num_inputs,)
|
|
62
|
+
self.z1 = None # Hidden layer pre-activation (num_hidden,)
|
|
63
|
+
self.a1 = None # Hidden layer activations (num_hidden,)
|
|
64
|
+
self.z2 = None # Output layer pre-activation (num_outputs,)
|
|
65
|
+
self.a2 = None # Output layer activation (prediction)
|
|
66
|
+
|
|
67
|
+
# Target and loss (for backprop questions)
|
|
68
|
+
self.y_target = None
|
|
69
|
+
self.loss = None
|
|
70
|
+
|
|
71
|
+
# Gradients (for backprop questions)
|
|
72
|
+
self.dL_da2 = None # Gradient of loss w.r.t. output
|
|
73
|
+
self.da2_dz2 = None # Gradient of activation w.r.t. pre-activation
|
|
74
|
+
self.dL_dz2 = None # Gradient of loss w.r.t. output pre-activation
|
|
75
|
+
|
|
76
|
+
def _generate_network(self, weight_range=(-2, 2), input_range=(-3, 3)):
|
|
77
|
+
"""Generate random network parameters and input."""
|
|
78
|
+
# Generate weights using MatrixQuestion's rounded matrix method
|
|
79
|
+
# Use param_digits to match display precision in tables and explanations
|
|
80
|
+
self.W1 = self.get_rounded_matrix(
|
|
81
|
+
(self.num_hidden, self.num_inputs),
|
|
82
|
+
low=weight_range[0],
|
|
83
|
+
high=weight_range[1],
|
|
84
|
+
digits_to_round=self.param_digits
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
self.W2 = self.get_rounded_matrix(
|
|
88
|
+
(self.num_outputs, self.num_hidden),
|
|
89
|
+
low=weight_range[0],
|
|
90
|
+
high=weight_range[1],
|
|
91
|
+
digits_to_round=self.param_digits
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Generate biases
|
|
95
|
+
if self.use_bias:
|
|
96
|
+
self.b1 = self.get_rounded_matrix(
|
|
97
|
+
(self.num_hidden,),
|
|
98
|
+
low=weight_range[0],
|
|
99
|
+
high=weight_range[1],
|
|
100
|
+
digits_to_round=self.param_digits
|
|
101
|
+
)
|
|
102
|
+
self.b2 = self.get_rounded_matrix(
|
|
103
|
+
(self.num_outputs,),
|
|
104
|
+
low=weight_range[0],
|
|
105
|
+
high=weight_range[1],
|
|
106
|
+
digits_to_round=self.param_digits
|
|
107
|
+
)
|
|
108
|
+
else:
|
|
109
|
+
self.b1 = np.zeros(self.num_hidden)
|
|
110
|
+
self.b2 = np.zeros(self.num_outputs)
|
|
111
|
+
|
|
112
|
+
# Generate input values (keep as integers for simplicity)
|
|
113
|
+
self.X = self.get_rounded_matrix(
|
|
114
|
+
(self.num_inputs,),
|
|
115
|
+
low=input_range[0],
|
|
116
|
+
high=input_range[1],
|
|
117
|
+
digits_to_round=0 # Round to integers
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def _select_activation_function(self):
|
|
121
|
+
"""Randomly select an activation function."""
|
|
122
|
+
activations = [
|
|
123
|
+
self.ACTIVATION_SIGMOID,
|
|
124
|
+
self.ACTIVATION_RELU
|
|
125
|
+
]
|
|
126
|
+
self.activation_function = self.rng.choice(activations)
|
|
127
|
+
|
|
128
|
+
def _apply_activation(self, z, function_type=None):
|
|
129
|
+
"""Apply activation function to pre-activation values."""
|
|
130
|
+
if function_type is None:
|
|
131
|
+
function_type = self.activation_function
|
|
132
|
+
|
|
133
|
+
if function_type == self.ACTIVATION_SIGMOID:
|
|
134
|
+
return 1 / (1 + np.exp(-z))
|
|
135
|
+
elif function_type == self.ACTIVATION_RELU:
|
|
136
|
+
return np.maximum(0, z)
|
|
137
|
+
elif function_type == self.ACTIVATION_LINEAR:
|
|
138
|
+
return z
|
|
139
|
+
else:
|
|
140
|
+
raise ValueError(f"Unknown activation function: {function_type}")
|
|
141
|
+
|
|
142
|
+
def _activation_derivative(self, z, function_type=None):
|
|
143
|
+
"""Compute derivative of activation function."""
|
|
144
|
+
if function_type is None:
|
|
145
|
+
function_type = self.activation_function
|
|
146
|
+
|
|
147
|
+
if function_type == self.ACTIVATION_SIGMOID:
|
|
148
|
+
a = self._apply_activation(z, function_type)
|
|
149
|
+
return a * (1 - a)
|
|
150
|
+
elif function_type == self.ACTIVATION_RELU:
|
|
151
|
+
return np.where(z > 0, 1, 0)
|
|
152
|
+
elif function_type == self.ACTIVATION_LINEAR:
|
|
153
|
+
return np.ones_like(z)
|
|
154
|
+
else:
|
|
155
|
+
raise ValueError(f"Unknown activation function: {function_type}")
|
|
156
|
+
|
|
157
|
+
def _forward_pass(self):
|
|
158
|
+
"""Run forward pass through the network."""
|
|
159
|
+
# Hidden layer
|
|
160
|
+
self.z1 = self.W1 @ self.X + self.b1
|
|
161
|
+
self.a1 = self._apply_activation(self.z1)
|
|
162
|
+
|
|
163
|
+
# Output layer
|
|
164
|
+
self.z2 = self.W2 @ self.a1 + self.b2
|
|
165
|
+
self.a2 = self._apply_activation(self.z2, self.ACTIVATION_SIGMOID) # Sigmoid output for binary classification
|
|
166
|
+
|
|
167
|
+
# Round all computed values to display precision to ensure students can reproduce calculations
|
|
168
|
+
# We display z and a values with 4 decimal places
|
|
169
|
+
self.z1 = np.round(self.z1, 4)
|
|
170
|
+
self.a1 = np.round(self.a1, 4)
|
|
171
|
+
self.z2 = np.round(self.z2, 4)
|
|
172
|
+
self.a2 = np.round(self.a2, 4)
|
|
173
|
+
|
|
174
|
+
return self.a2
|
|
175
|
+
|
|
176
|
+
def _compute_loss(self, y_target):
|
|
177
|
+
"""Compute binary cross-entropy loss."""
|
|
178
|
+
self.y_target = y_target
|
|
179
|
+
# BCE: L = -[y log(ŷ) + (1-y) log(1-ŷ)]
|
|
180
|
+
# Add small epsilon to prevent log(0)
|
|
181
|
+
epsilon = 1e-15
|
|
182
|
+
y_pred = np.clip(self.a2[0], epsilon, 1 - epsilon)
|
|
183
|
+
self.loss = -(y_target * np.log(y_pred) + (1 - y_target) * np.log(1 - y_pred))
|
|
184
|
+
return self.loss
|
|
185
|
+
|
|
186
|
+
def _compute_output_gradient(self):
|
|
187
|
+
"""Compute gradient of loss w.r.t. output."""
|
|
188
|
+
# For BCE loss with sigmoid activation, the gradient simplifies beautifully:
|
|
189
|
+
# dL/dz2 = ŷ - y (this is the combined derivative of BCE loss and sigmoid activation)
|
|
190
|
+
#
|
|
191
|
+
# Derivation:
|
|
192
|
+
# BCE: L = -[y log(ŷ) + (1-y) log(1-ŷ)]
|
|
193
|
+
# dL/dŷ = -[y/ŷ - (1-y)/(1-ŷ)]
|
|
194
|
+
# Sigmoid: ŷ = σ(z), dŷ/dz = ŷ(1-ŷ)
|
|
195
|
+
# Chain rule: dL/dz = dL/dŷ * dŷ/dz = ŷ - y
|
|
196
|
+
|
|
197
|
+
self.dL_dz2 = self.a2[0] - self.y_target
|
|
198
|
+
|
|
199
|
+
# Store intermediate values for explanation purposes
|
|
200
|
+
# Clip to prevent division by zero (same epsilon as in loss calculation)
|
|
201
|
+
epsilon = 1e-15
|
|
202
|
+
y_pred_clipped = np.clip(self.a2[0], epsilon, 1 - epsilon)
|
|
203
|
+
self.dL_da2 = -(self.y_target / y_pred_clipped - (1 - self.y_target) / (1 - y_pred_clipped))
|
|
204
|
+
self.da2_dz2 = self.a2[0] * (1 - self.a2[0])
|
|
205
|
+
|
|
206
|
+
return self.dL_dz2
|
|
207
|
+
|
|
208
|
+
def _compute_gradient_W2(self, hidden_idx):
|
|
209
|
+
"""Compute gradient ∂L/∂W2[0, hidden_idx]."""
|
|
210
|
+
# ∂L/∂w = dL/dz2 * ∂z2/∂w = dL/dz2 * a1[hidden_idx]
|
|
211
|
+
return float(self.dL_dz2 * self.a1[hidden_idx])
|
|
212
|
+
|
|
213
|
+
def _compute_gradient_W1(self, hidden_idx, input_idx):
|
|
214
|
+
"""Compute gradient ∂L/∂W1[hidden_idx, input_idx]."""
|
|
215
|
+
# dL/dz1[hidden_idx] = dL/dz2 * ∂z2/∂a1[hidden_idx] * ∂a1/∂z1[hidden_idx]
|
|
216
|
+
# = dL/dz2 * W2[0, hidden_idx] * activation'(z1[hidden_idx])
|
|
217
|
+
|
|
218
|
+
dz2_da1 = self.W2[0, hidden_idx]
|
|
219
|
+
da1_dz1 = self._activation_derivative(self.z1[hidden_idx])
|
|
220
|
+
|
|
221
|
+
dL_dz1 = self.dL_dz2 * dz2_da1 * da1_dz1
|
|
222
|
+
|
|
223
|
+
# ∂L/∂w = dL/dz1 * ∂z1/∂w = dL/dz1 * X[input_idx]
|
|
224
|
+
return float(dL_dz1 * self.X[input_idx])
|
|
225
|
+
|
|
226
|
+
def _get_activation_name(self):
|
|
227
|
+
"""Get human-readable activation function name."""
|
|
228
|
+
if self.activation_function == self.ACTIVATION_SIGMOID:
|
|
229
|
+
return "sigmoid"
|
|
230
|
+
elif self.activation_function == self.ACTIVATION_RELU:
|
|
231
|
+
return "ReLU"
|
|
232
|
+
elif self.activation_function == self.ACTIVATION_LINEAR:
|
|
233
|
+
return "linear"
|
|
234
|
+
return "unknown"
|
|
235
|
+
|
|
236
|
+
def _get_activation_formula(self):
|
|
237
|
+
"""Get LaTeX formula for activation function."""
|
|
238
|
+
if self.activation_function == self.ACTIVATION_SIGMOID:
|
|
239
|
+
return r"\sigma(z) = \frac{1}{1 + e^{-z}}"
|
|
240
|
+
elif self.activation_function == self.ACTIVATION_RELU:
|
|
241
|
+
return r"\text{ReLU}(z) = \max(0, z)"
|
|
242
|
+
elif self.activation_function == self.ACTIVATION_LINEAR:
|
|
243
|
+
return r"f(z) = z"
|
|
244
|
+
return ""
|
|
245
|
+
|
|
246
|
+
def _generate_parameter_table(self, include_activations=False, include_training_context=False):
|
|
247
|
+
"""
|
|
248
|
+
Generate side-by-side tables showing all network parameters.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
include_activations: If True, include computed activation values
|
|
252
|
+
include_training_context: If True, include target, loss, etc. (for backprop questions)
|
|
253
|
+
|
|
254
|
+
Returns:
|
|
255
|
+
ContentAST.TableGroup with network parameters in two side-by-side tables
|
|
256
|
+
"""
|
|
257
|
+
# Left table: Inputs & Weights
|
|
258
|
+
left_data = []
|
|
259
|
+
left_data.append(["Symbol", "Value"])
|
|
260
|
+
|
|
261
|
+
# Input values
|
|
262
|
+
for i in range(self.num_inputs):
|
|
263
|
+
left_data.append([
|
|
264
|
+
ContentAST.Equation(f"x_{i+1}", inline=True),
|
|
265
|
+
f"{self.X[i]:.1f}" # Inputs are always integers or 1 decimal
|
|
266
|
+
])
|
|
267
|
+
|
|
268
|
+
# Weights from input to hidden
|
|
269
|
+
for j in range(self.num_hidden):
|
|
270
|
+
for i in range(self.num_inputs):
|
|
271
|
+
left_data.append([
|
|
272
|
+
ContentAST.Equation(f"w_{{{j+1}{i+1}}}", inline=True),
|
|
273
|
+
f"{self.W1[j, i]:.{self.param_digits}f}"
|
|
274
|
+
])
|
|
275
|
+
|
|
276
|
+
# Weights from hidden to output
|
|
277
|
+
for i in range(self.num_hidden):
|
|
278
|
+
left_data.append([
|
|
279
|
+
ContentAST.Equation(f"w_{i+3}", inline=True),
|
|
280
|
+
f"{self.W2[0, i]:.{self.param_digits}f}"
|
|
281
|
+
])
|
|
282
|
+
|
|
283
|
+
# Right table: Biases, Activations, Training context
|
|
284
|
+
right_data = []
|
|
285
|
+
right_data.append(["Symbol", "Value"])
|
|
286
|
+
|
|
287
|
+
# Hidden layer biases
|
|
288
|
+
if self.use_bias:
|
|
289
|
+
for j in range(self.num_hidden):
|
|
290
|
+
right_data.append([
|
|
291
|
+
ContentAST.Equation(f"b_{j+1}", inline=True),
|
|
292
|
+
f"{self.b1[j]:.{self.param_digits}f}"
|
|
293
|
+
])
|
|
294
|
+
|
|
295
|
+
# Output bias
|
|
296
|
+
if self.use_bias:
|
|
297
|
+
right_data.append([
|
|
298
|
+
ContentAST.Equation(r"b_{out}", inline=True),
|
|
299
|
+
f"{self.b2[0]:.{self.param_digits}f}"
|
|
300
|
+
])
|
|
301
|
+
|
|
302
|
+
# Hidden layer activations (if computed and requested)
|
|
303
|
+
if include_activations and self.a1 is not None:
|
|
304
|
+
for i in range(self.num_hidden):
|
|
305
|
+
right_data.append([
|
|
306
|
+
ContentAST.Equation(f"h_{i+1}", inline=True),
|
|
307
|
+
f"{self.a1[i]:.4f}"
|
|
308
|
+
])
|
|
309
|
+
|
|
310
|
+
# Output activation (if computed and requested)
|
|
311
|
+
if include_activations and self.a2 is not None:
|
|
312
|
+
right_data.append([
|
|
313
|
+
ContentAST.Equation(r"\hat{y}", inline=True),
|
|
314
|
+
f"{self.a2[0]:.4f}"
|
|
315
|
+
])
|
|
316
|
+
|
|
317
|
+
# Training context (target, loss - for backprop questions)
|
|
318
|
+
if include_training_context:
|
|
319
|
+
if self.y_target is not None:
|
|
320
|
+
right_data.append([
|
|
321
|
+
ContentAST.Equation("y", inline=True),
|
|
322
|
+
f"{int(self.y_target)}" # Binary target (0 or 1)
|
|
323
|
+
])
|
|
324
|
+
|
|
325
|
+
if self.loss is not None:
|
|
326
|
+
right_data.append([
|
|
327
|
+
ContentAST.Equation("L", inline=True),
|
|
328
|
+
f"{self.loss:.4f}"
|
|
329
|
+
])
|
|
330
|
+
|
|
331
|
+
# Create table group
|
|
332
|
+
table_group = ContentAST.TableGroup()
|
|
333
|
+
table_group.add_table(ContentAST.Table(data=left_data))
|
|
334
|
+
table_group.add_table(ContentAST.Table(data=right_data))
|
|
335
|
+
|
|
336
|
+
return table_group
|
|
337
|
+
|
|
338
|
+
def _generate_network_diagram(self, show_weights=True, show_activations=False):
|
|
339
|
+
"""
|
|
340
|
+
Generate a simple, clean network diagram.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
show_weights: If True, display weights on edges
|
|
344
|
+
show_activations: If True, display activation values on nodes
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
BytesIO buffer containing PNG image
|
|
348
|
+
"""
|
|
349
|
+
# Create figure with tight layout and equal aspect ratio
|
|
350
|
+
fig = plt.figure(figsize=(8, 2.5))
|
|
351
|
+
ax = fig.add_subplot(111)
|
|
352
|
+
ax.set_aspect('equal', adjustable='box') # Keep circles circular
|
|
353
|
+
ax.axis('off')
|
|
354
|
+
|
|
355
|
+
# Node radius
|
|
356
|
+
r = 0.15
|
|
357
|
+
|
|
358
|
+
# Layer x-positions
|
|
359
|
+
input_x = 0.5
|
|
360
|
+
hidden_x = 2.0
|
|
361
|
+
output_x = 3.5
|
|
362
|
+
|
|
363
|
+
# Calculate y-positions for nodes (top to bottom order)
|
|
364
|
+
def get_y_positions(n, include_bias=False):
|
|
365
|
+
# If including bias, need one more position at the top
|
|
366
|
+
total_nodes = n + 1 if include_bias else n
|
|
367
|
+
if total_nodes == 1:
|
|
368
|
+
return [1.0]
|
|
369
|
+
spacing = min(2.0 / (total_nodes - 1), 0.6)
|
|
370
|
+
# Start from top
|
|
371
|
+
start = 1.0 + (total_nodes - 1) * spacing / 2
|
|
372
|
+
positions = [start - i * spacing for i in range(total_nodes)]
|
|
373
|
+
return positions
|
|
374
|
+
|
|
375
|
+
# Input layer: bias (if present) at top, then x_1, x_2, ... going down
|
|
376
|
+
input_positions = get_y_positions(self.num_inputs, include_bias=self.use_bias)
|
|
377
|
+
if self.use_bias:
|
|
378
|
+
bias1_y = input_positions[0]
|
|
379
|
+
input_y = input_positions[1:] # x_1 is second (below bias), x_2 is third, etc.
|
|
380
|
+
else:
|
|
381
|
+
bias1_y = None
|
|
382
|
+
input_y = input_positions
|
|
383
|
+
|
|
384
|
+
# Hidden layer: bias (if present) at top, then h_1, h_2, ... going down
|
|
385
|
+
hidden_positions = get_y_positions(self.num_hidden, include_bias=self.use_bias)
|
|
386
|
+
if self.use_bias:
|
|
387
|
+
bias2_y = hidden_positions[0]
|
|
388
|
+
hidden_y = hidden_positions[1:]
|
|
389
|
+
else:
|
|
390
|
+
bias2_y = None
|
|
391
|
+
hidden_y = hidden_positions
|
|
392
|
+
|
|
393
|
+
# Output layer: centered
|
|
394
|
+
output_y = [1.0]
|
|
395
|
+
|
|
396
|
+
# Draw edges first (so they're behind nodes)
|
|
397
|
+
# Input to hidden
|
|
398
|
+
for i in range(self.num_inputs):
|
|
399
|
+
for j in range(self.num_hidden):
|
|
400
|
+
ax.plot([input_x, hidden_x], [input_y[i], hidden_y[j]],
|
|
401
|
+
'k-', linewidth=1, alpha=0.7, zorder=1)
|
|
402
|
+
if show_weights:
|
|
403
|
+
label_x = input_x + 0.3
|
|
404
|
+
label_y = input_y[i] + (hidden_y[j] - input_y[i]) * 0.2
|
|
405
|
+
# Use LaTeX math mode for proper subscript rendering
|
|
406
|
+
weight_label = f'$w_{{{j+1}{i+1}}}$'
|
|
407
|
+
ax.text(label_x, label_y, weight_label, fontsize=8,
|
|
408
|
+
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
|
|
409
|
+
|
|
410
|
+
# Bias to hidden
|
|
411
|
+
if self.use_bias:
|
|
412
|
+
for j in range(self.num_hidden):
|
|
413
|
+
ax.plot([input_x, hidden_x], [bias1_y, hidden_y[j]],
|
|
414
|
+
'k-', linewidth=1, alpha=0.7, zorder=1)
|
|
415
|
+
if show_weights:
|
|
416
|
+
label_x = input_x + 0.3
|
|
417
|
+
label_y = bias1_y + (hidden_y[j] - bias1_y) * 0.2
|
|
418
|
+
bias_label = f'$b_{{{j+1}}}$'
|
|
419
|
+
ax.text(label_x, label_y, bias_label, fontsize=8,
|
|
420
|
+
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
|
|
421
|
+
|
|
422
|
+
# Hidden to output
|
|
423
|
+
for i in range(self.num_hidden):
|
|
424
|
+
ax.plot([hidden_x, output_x], [hidden_y[i], output_y[0]],
|
|
425
|
+
'k-', linewidth=1, alpha=0.7, zorder=1)
|
|
426
|
+
if show_weights:
|
|
427
|
+
label_x = hidden_x + 0.3
|
|
428
|
+
label_y = hidden_y[i] + (output_y[0] - hidden_y[i]) * 0.2
|
|
429
|
+
weight_label = f'$w_{{{i+3}}}$'
|
|
430
|
+
ax.text(label_x, label_y, weight_label, fontsize=8,
|
|
431
|
+
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
|
|
432
|
+
|
|
433
|
+
# Bias to output
|
|
434
|
+
if self.use_bias:
|
|
435
|
+
ax.plot([hidden_x, output_x], [bias2_y, output_y[0]],
|
|
436
|
+
'k-', linewidth=1, alpha=0.7, zorder=1)
|
|
437
|
+
if show_weights:
|
|
438
|
+
label_x = hidden_x + 0.3
|
|
439
|
+
label_y = bias2_y + (output_y[0] - bias2_y) * 0.2
|
|
440
|
+
bias_label = r'$b_{out}$'
|
|
441
|
+
ax.text(label_x, label_y, bias_label, fontsize=8,
|
|
442
|
+
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
|
|
443
|
+
|
|
444
|
+
# Draw nodes
|
|
445
|
+
# Input nodes
|
|
446
|
+
for i, y in enumerate(input_y):
|
|
447
|
+
circle = plt.Circle((input_x, y), r, facecolor='lightgray',
|
|
448
|
+
edgecolor='black', linewidth=1.5, zorder=10)
|
|
449
|
+
ax.add_patch(circle)
|
|
450
|
+
label = f'$x_{{{i+1}}}$' if not show_activations else f'$x_{{{i+1}}}$={self.X[i]:.1f}'
|
|
451
|
+
ax.text(input_x - r - 0.15, y, label, fontsize=10, ha='right', va='center')
|
|
452
|
+
|
|
453
|
+
# Bias nodes
|
|
454
|
+
if self.use_bias:
|
|
455
|
+
circle = plt.Circle((input_x, bias1_y), r, facecolor='lightgray',
|
|
456
|
+
edgecolor='black', linewidth=1.5, zorder=10)
|
|
457
|
+
ax.add_patch(circle)
|
|
458
|
+
ax.text(input_x, bias1_y, '1', fontsize=10, ha='center', va='center', weight='bold')
|
|
459
|
+
|
|
460
|
+
circle = plt.Circle((hidden_x, bias2_y), r, facecolor='lightgray',
|
|
461
|
+
edgecolor='black', linewidth=1.5, zorder=10)
|
|
462
|
+
ax.add_patch(circle)
|
|
463
|
+
ax.text(hidden_x, bias2_y, '1', fontsize=10, ha='center', va='center', weight='bold')
|
|
464
|
+
|
|
465
|
+
# Hidden nodes
|
|
466
|
+
for i, y in enumerate(hidden_y):
|
|
467
|
+
circle = plt.Circle((hidden_x, y), r, facecolor='lightblue',
|
|
468
|
+
edgecolor='black', linewidth=1.5, zorder=10)
|
|
469
|
+
ax.add_patch(circle)
|
|
470
|
+
ax.plot([hidden_x, hidden_x], [y - r*0.7, y + r*0.7], 'k-', linewidth=1.2, zorder=11)
|
|
471
|
+
ax.text(hidden_x - r*0.35, y, r'$\Sigma$', fontsize=11, ha='center', va='center', zorder=12)
|
|
472
|
+
ax.text(hidden_x + r*0.35, y, r'$f$', fontsize=10, ha='center', va='center', zorder=12, style='italic')
|
|
473
|
+
if show_activations and self.a1 is not None:
|
|
474
|
+
ax.text(hidden_x, y - r - 0.15, f'{self.a1[i]:.2f}', fontsize=8, ha='center', va='top')
|
|
475
|
+
|
|
476
|
+
# Output node
|
|
477
|
+
y = output_y[0]
|
|
478
|
+
circle = plt.Circle((output_x, y), r, facecolor='lightblue',
|
|
479
|
+
edgecolor='black', linewidth=1.5, zorder=10)
|
|
480
|
+
ax.add_patch(circle)
|
|
481
|
+
ax.plot([output_x, output_x], [y - r*0.7, y + r*0.7], 'k-', linewidth=1.2, zorder=11)
|
|
482
|
+
ax.text(output_x - r*0.35, y, r'$\Sigma$', fontsize=11, ha='center', va='center', zorder=12)
|
|
483
|
+
ax.text(output_x + r*0.35, y, r'$f$', fontsize=10, ha='center', va='center', zorder=12, style='italic')
|
|
484
|
+
label = r'$\hat{y}$' if not show_activations else f'$\\hat{{y}}$={self.a2[0]:.2f}'
|
|
485
|
+
ax.text(output_x + r + 0.15, y, label, fontsize=10, ha='left', va='center')
|
|
486
|
+
|
|
487
|
+
# Save to buffer with minimal padding
|
|
488
|
+
buffer = io.BytesIO()
|
|
489
|
+
plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight',
|
|
490
|
+
facecolor='white', edgecolor='none', pad_inches=0.0)
|
|
491
|
+
plt.close(fig)
|
|
492
|
+
buffer.seek(0)
|
|
493
|
+
|
|
494
|
+
return buffer
|
|
495
|
+
|
|
496
|
+
def _generate_ascii_network(self):
|
|
497
|
+
"""Generate ASCII art representation of the network for alt-text."""
|
|
498
|
+
lines = []
|
|
499
|
+
lines.append("Network Architecture:")
|
|
500
|
+
lines.append("")
|
|
501
|
+
lines.append("Input Layer: Hidden Layer: Output Layer:")
|
|
502
|
+
|
|
503
|
+
# For 2 inputs, 2 hidden, 1 output
|
|
504
|
+
if self.num_inputs == 2 and self.num_hidden == 2:
|
|
505
|
+
lines.append(f" x₁ ----[w₁₁]---→ h₁ ----[w₃]----→")
|
|
506
|
+
lines.append(f" \\ / \\ /")
|
|
507
|
+
lines.append(f" \\ / \\ /")
|
|
508
|
+
lines.append(f" \\ / \\ / ŷ")
|
|
509
|
+
lines.append(f" \\/ \\ /")
|
|
510
|
+
lines.append(f" /\\ \\ /")
|
|
511
|
+
lines.append(f" / \\ \\/")
|
|
512
|
+
lines.append(f" / \\ /\\")
|
|
513
|
+
lines.append(f" / \\ / \\")
|
|
514
|
+
lines.append(f" x₂ ----[w₂₁]---→ h₂ ----[w₄]----→")
|
|
515
|
+
else:
|
|
516
|
+
# Generic representation
|
|
517
|
+
for i in range(max(self.num_inputs, self.num_hidden)):
|
|
518
|
+
parts = []
|
|
519
|
+
if i < self.num_inputs:
|
|
520
|
+
parts.append(f" x₁{i+1}")
|
|
521
|
+
else:
|
|
522
|
+
parts.append(" ")
|
|
523
|
+
parts.append(" ---→ ")
|
|
524
|
+
if i < self.num_hidden:
|
|
525
|
+
parts.append(f"h₁{i+1}")
|
|
526
|
+
else:
|
|
527
|
+
parts.append(" ")
|
|
528
|
+
parts.append(" ---→ ")
|
|
529
|
+
if i == self.num_hidden // 2:
|
|
530
|
+
parts.append("ŷ")
|
|
531
|
+
lines.append("".join(parts))
|
|
532
|
+
|
|
533
|
+
lines.append("")
|
|
534
|
+
lines.append(f"Activation function: {self._get_activation_name()}")
|
|
535
|
+
|
|
536
|
+
return "\n".join(lines)
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
@QuestionRegistry.register()
|
|
540
|
+
class ForwardPassQuestion(SimpleNeuralNetworkBase):
|
|
541
|
+
"""
|
|
542
|
+
Question asking students to calculate forward pass through a simple network.
|
|
543
|
+
|
|
544
|
+
Students calculate:
|
|
545
|
+
- Hidden layer activations (h₁, h₂)
|
|
546
|
+
- Final output (ŷ)
|
|
547
|
+
"""
|
|
548
|
+
|
|
549
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
550
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
551
|
+
|
|
552
|
+
# Generate network
|
|
553
|
+
self._generate_network()
|
|
554
|
+
self._select_activation_function()
|
|
555
|
+
|
|
556
|
+
# Run forward pass to get correct answers
|
|
557
|
+
self._forward_pass()
|
|
558
|
+
|
|
559
|
+
# Create answer fields
|
|
560
|
+
self._create_answers()
|
|
561
|
+
|
|
562
|
+
def _create_answers(self):
|
|
563
|
+
"""Create answer fields for forward pass values."""
|
|
564
|
+
self.answers = {}
|
|
565
|
+
|
|
566
|
+
# Hidden layer activations
|
|
567
|
+
for i in range(self.num_hidden):
|
|
568
|
+
key = f"h{i+1}"
|
|
569
|
+
self.answers[key] = Answer.float_value(key, float(self.a1[i]))
|
|
570
|
+
|
|
571
|
+
# Output
|
|
572
|
+
self.answers["y_pred"] = Answer.float_value("y_pred", float(self.a2[0]))
|
|
573
|
+
|
|
574
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
575
|
+
body = ContentAST.Section()
|
|
576
|
+
|
|
577
|
+
# Question description
|
|
578
|
+
body.add_element(ContentAST.Paragraph([
|
|
579
|
+
f"Given the neural network below with {self._get_activation_name()} activation "
|
|
580
|
+
f"in the hidden layer and sigmoid activation in the output layer (for binary classification), "
|
|
581
|
+
f"calculate the forward pass for the given input values."
|
|
582
|
+
]))
|
|
583
|
+
|
|
584
|
+
# Network diagram
|
|
585
|
+
body.add_element(
|
|
586
|
+
ContentAST.Picture(
|
|
587
|
+
img_data=self._generate_network_diagram(show_weights=True, show_activations=False),
|
|
588
|
+
caption=f"Neural network architecture"
|
|
589
|
+
)
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
# Network parameters table
|
|
593
|
+
body.add_element(self._generate_parameter_table(include_activations=False))
|
|
594
|
+
|
|
595
|
+
# Activation function
|
|
596
|
+
body.add_element(ContentAST.Paragraph([
|
|
597
|
+
f"**Hidden layer activation:** {self._get_activation_name()}"
|
|
598
|
+
]))
|
|
599
|
+
|
|
600
|
+
# Create answer block
|
|
601
|
+
answers = []
|
|
602
|
+
for i in range(self.num_hidden):
|
|
603
|
+
answers.append(
|
|
604
|
+
ContentAST.Answer(
|
|
605
|
+
answer=self.answers[f"h{i+1}"],
|
|
606
|
+
label=f"h_{i+1}"
|
|
607
|
+
)
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
answers.append(
|
|
611
|
+
ContentAST.Answer(
|
|
612
|
+
answer=self.answers["y_pred"],
|
|
613
|
+
label="ŷ"
|
|
614
|
+
)
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
body.add_element(ContentAST.AnswerBlock(answers))
|
|
618
|
+
|
|
619
|
+
return body
|
|
620
|
+
|
|
621
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
622
|
+
explanation = ContentAST.Section()
|
|
623
|
+
|
|
624
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
625
|
+
"To solve this problem, we need to compute the forward pass through the network."
|
|
626
|
+
]))
|
|
627
|
+
|
|
628
|
+
# Hidden layer calculations
|
|
629
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
630
|
+
"**Step 1: Calculate hidden layer pre-activations**"
|
|
631
|
+
]))
|
|
632
|
+
|
|
633
|
+
for i in range(self.num_hidden):
|
|
634
|
+
# Build equation for z_i
|
|
635
|
+
terms = []
|
|
636
|
+
for j in range(self.num_inputs):
|
|
637
|
+
terms.append(f"({self.W1[i,j]:.{self.param_digits}f})({self.X[j]:.1f})")
|
|
638
|
+
|
|
639
|
+
z_calc = " + ".join(terms)
|
|
640
|
+
if self.use_bias:
|
|
641
|
+
z_calc += f" + {self.b1[i]:.{self.param_digits}f}"
|
|
642
|
+
|
|
643
|
+
explanation.add_element(ContentAST.Equation(
|
|
644
|
+
f"z_{i+1} = {z_calc} = {self.z1[i]:.4f}",
|
|
645
|
+
inline=False
|
|
646
|
+
))
|
|
647
|
+
|
|
648
|
+
# Hidden layer activations
|
|
649
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
650
|
+
f"**Step 2: Apply {self._get_activation_name()} activation**"
|
|
651
|
+
]))
|
|
652
|
+
|
|
653
|
+
for i in range(self.num_hidden):
|
|
654
|
+
if self.activation_function == self.ACTIVATION_SIGMOID:
|
|
655
|
+
explanation.add_element(ContentAST.Equation(
|
|
656
|
+
f"h_{i+1} = \\sigma(z_{i+1}) = \\frac{{1}}{{1 + e^{{-{self.z1[i]:.4f}}}}} = {self.a1[i]:.4f}",
|
|
657
|
+
inline=False
|
|
658
|
+
))
|
|
659
|
+
elif self.activation_function == self.ACTIVATION_RELU:
|
|
660
|
+
explanation.add_element(ContentAST.Equation(
|
|
661
|
+
f"h_{i+1} = \\text{{ReLU}}(z_{i+1}) = \\max(0, {self.z1[i]:.4f}) = {self.a1[i]:.4f}",
|
|
662
|
+
inline=False
|
|
663
|
+
))
|
|
664
|
+
else:
|
|
665
|
+
explanation.add_element(ContentAST.Equation(
|
|
666
|
+
f"h_{i+1} = z_{i+1} = {self.a1[i]:.4f}",
|
|
667
|
+
inline=False
|
|
668
|
+
))
|
|
669
|
+
|
|
670
|
+
# Output layer
|
|
671
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
672
|
+
"**Step 3: Calculate output (with sigmoid activation)**"
|
|
673
|
+
]))
|
|
674
|
+
|
|
675
|
+
terms = []
|
|
676
|
+
for j in range(self.num_hidden):
|
|
677
|
+
terms.append(f"({self.W2[0,j]:.{self.param_digits}f})({self.a1[j]:.4f})")
|
|
678
|
+
|
|
679
|
+
z_out_calc = " + ".join(terms)
|
|
680
|
+
if self.use_bias:
|
|
681
|
+
z_out_calc += f" + {self.b2[0]:.{self.param_digits}f}"
|
|
682
|
+
|
|
683
|
+
explanation.add_element(ContentAST.Equation(
|
|
684
|
+
f"z_{{out}} = {z_out_calc} = {self.z2[0]:.4f}",
|
|
685
|
+
inline=False
|
|
686
|
+
))
|
|
687
|
+
|
|
688
|
+
explanation.add_element(ContentAST.Equation(
|
|
689
|
+
f"\\hat{{y}} = \\sigma(z_{{out}}) = \\frac{{1}}{{1 + e^{{-{self.z2[0]:.4f}}}}} = {self.a2[0]:.4f}",
|
|
690
|
+
inline=False
|
|
691
|
+
))
|
|
692
|
+
|
|
693
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
694
|
+
"(Note: The output layer uses sigmoid activation for binary classification, "
|
|
695
|
+
"so the output is between 0 and 1, representing the probability of class 1)"
|
|
696
|
+
]))
|
|
697
|
+
|
|
698
|
+
return explanation
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
@QuestionRegistry.register()
|
|
702
|
+
class BackpropGradientQuestion(SimpleNeuralNetworkBase):
|
|
703
|
+
"""
|
|
704
|
+
Question asking students to calculate gradients using backpropagation.
|
|
705
|
+
|
|
706
|
+
Given a completed forward pass, students calculate:
|
|
707
|
+
- Gradients for multiple specific weights (∂L/∂w)
|
|
708
|
+
"""
|
|
709
|
+
|
|
710
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
711
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
712
|
+
|
|
713
|
+
# Generate network
|
|
714
|
+
self._generate_network()
|
|
715
|
+
self._select_activation_function()
|
|
716
|
+
|
|
717
|
+
# Run forward pass
|
|
718
|
+
self._forward_pass()
|
|
719
|
+
|
|
720
|
+
# Generate binary target (0 or 1)
|
|
721
|
+
# Choose the opposite of what the network predicts to create meaningful gradients
|
|
722
|
+
if self.a2[0] > 0.5:
|
|
723
|
+
self.y_target = 0
|
|
724
|
+
else:
|
|
725
|
+
self.y_target = 1
|
|
726
|
+
self._compute_loss(self.y_target)
|
|
727
|
+
# Round loss to display precision (4 decimal places)
|
|
728
|
+
self.loss = round(self.loss, 4)
|
|
729
|
+
self._compute_output_gradient()
|
|
730
|
+
|
|
731
|
+
# Create answer fields for specific weight gradients
|
|
732
|
+
self._create_answers()
|
|
733
|
+
|
|
734
|
+
def _create_answers(self):
|
|
735
|
+
"""Create answer fields for weight gradients."""
|
|
736
|
+
self.answers = {}
|
|
737
|
+
|
|
738
|
+
# Ask for gradients of 2-3 weights
|
|
739
|
+
# Include at least one from each layer
|
|
740
|
+
|
|
741
|
+
# Gradient for W2 (hidden to output)
|
|
742
|
+
for i in range(self.num_hidden):
|
|
743
|
+
key = f"dL_dw2_{i}"
|
|
744
|
+
self.answers[key] = Answer.auto_float(key, self._compute_gradient_W2(i))
|
|
745
|
+
|
|
746
|
+
# Gradient for W1 (input to hidden) - pick first hidden neuron
|
|
747
|
+
for j in range(self.num_inputs):
|
|
748
|
+
key = f"dL_dw1_0{j}"
|
|
749
|
+
self.answers[key] = Answer.auto_float(key, self._compute_gradient_W1(0, j))
|
|
750
|
+
|
|
751
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
752
|
+
body = ContentAST.Section()
|
|
753
|
+
|
|
754
|
+
# Question description
|
|
755
|
+
body.add_element(ContentAST.Paragraph([
|
|
756
|
+
f"Given the neural network below with {self._get_activation_name()} activation "
|
|
757
|
+
f"in the hidden layer and sigmoid activation in the output layer (for binary classification), "
|
|
758
|
+
f"a forward pass has been completed with the values shown. "
|
|
759
|
+
f"Calculate the gradients (∂L/∂w) for the specified weights using backpropagation."
|
|
760
|
+
]))
|
|
761
|
+
|
|
762
|
+
# Network diagram
|
|
763
|
+
body.add_element(
|
|
764
|
+
ContentAST.Picture(
|
|
765
|
+
img_data=self._generate_network_diagram(show_weights=True, show_activations=False),
|
|
766
|
+
caption=f"Neural network architecture"
|
|
767
|
+
)
|
|
768
|
+
)
|
|
769
|
+
|
|
770
|
+
# Network parameters and forward pass results table
|
|
771
|
+
body.add_element(self._generate_parameter_table(include_activations=True, include_training_context=True))
|
|
772
|
+
|
|
773
|
+
# Activation function
|
|
774
|
+
body.add_element(ContentAST.Paragraph([
|
|
775
|
+
f"**Hidden layer activation:** {self._get_activation_name()}"
|
|
776
|
+
]))
|
|
777
|
+
|
|
778
|
+
body.add_element(ContentAST.Paragraph([
|
|
779
|
+
"**Calculate the following gradients:**"
|
|
780
|
+
]))
|
|
781
|
+
|
|
782
|
+
# Create answer block
|
|
783
|
+
answers = []
|
|
784
|
+
|
|
785
|
+
# W2 gradients
|
|
786
|
+
for i in range(self.num_hidden):
|
|
787
|
+
answers.append(
|
|
788
|
+
ContentAST.Answer(
|
|
789
|
+
answer=self.answers[f"dL_dw2_{i}"],
|
|
790
|
+
label=f"∂L/∂w_{i+3}"
|
|
791
|
+
)
|
|
792
|
+
)
|
|
793
|
+
|
|
794
|
+
# W1 gradients (first hidden neuron)
|
|
795
|
+
for j in range(self.num_inputs):
|
|
796
|
+
answers.append(
|
|
797
|
+
ContentAST.Answer(
|
|
798
|
+
answer=self.answers[f"dL_dw1_0{j}"],
|
|
799
|
+
label=f"∂L/∂w_1{j+1}"
|
|
800
|
+
)
|
|
801
|
+
)
|
|
802
|
+
|
|
803
|
+
body.add_element(ContentAST.AnswerBlock(answers))
|
|
804
|
+
|
|
805
|
+
return body
|
|
806
|
+
|
|
807
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
808
|
+
explanation = ContentAST.Section()
|
|
809
|
+
|
|
810
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
811
|
+
"To solve this problem, we use the chain rule to compute gradients via backpropagation."
|
|
812
|
+
]))
|
|
813
|
+
|
|
814
|
+
# Output layer gradient
|
|
815
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
816
|
+
"**Step 1: Compute output layer gradient**"
|
|
817
|
+
]))
|
|
818
|
+
|
|
819
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
820
|
+
"For binary cross-entropy loss with sigmoid output activation, "
|
|
821
|
+
"the gradient with respect to the pre-activation simplifies beautifully:"
|
|
822
|
+
]))
|
|
823
|
+
|
|
824
|
+
explanation.add_element(ContentAST.Equation(
|
|
825
|
+
f"\\frac{{\\partial L}}{{\\partial z_{{out}}}} = \\hat{{y}} - y = {self.a2[0]:.4f} - {int(self.y_target)} = {self.dL_dz2:.4f}",
|
|
826
|
+
inline=False
|
|
827
|
+
))
|
|
828
|
+
|
|
829
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
830
|
+
"(This elegant result comes from combining the BCE loss derivative and sigmoid activation derivative)"
|
|
831
|
+
]))
|
|
832
|
+
|
|
833
|
+
# W2 gradients
|
|
834
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
835
|
+
"**Step 2: Gradients for hidden-to-output weights**"
|
|
836
|
+
]))
|
|
837
|
+
|
|
838
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
839
|
+
"Using the chain rule:"
|
|
840
|
+
]))
|
|
841
|
+
|
|
842
|
+
for i in range(self.num_hidden):
|
|
843
|
+
grad = self._compute_gradient_W2(i)
|
|
844
|
+
explanation.add_element(ContentAST.Equation(
|
|
845
|
+
f"\\frac{{\\partial L}}{{\\partial w_{i+3}}} = \\frac{{\\partial L}}{{\\partial z_{{out}}}} \\cdot \\frac{{\\partial z_{{out}}}}{{\\partial w_{i+3}}} = {self.dL_dz2:.4f} \\cdot {self.a1[i]:.4f} = {grad:.4f}",
|
|
846
|
+
inline=False
|
|
847
|
+
))
|
|
848
|
+
|
|
849
|
+
# W1 gradients
|
|
850
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
851
|
+
"**Step 3: Gradients for input-to-hidden weights**"
|
|
852
|
+
]))
|
|
853
|
+
|
|
854
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
855
|
+
"First, compute the gradient flowing back to hidden layer:"
|
|
856
|
+
]))
|
|
857
|
+
|
|
858
|
+
for j in range(self.num_inputs):
|
|
859
|
+
# Compute intermediate values
|
|
860
|
+
dz2_da1 = self.W2[0, 0]
|
|
861
|
+
da1_dz1 = self._activation_derivative(self.z1[0])
|
|
862
|
+
dL_dz1 = self.dL_dz2 * dz2_da1 * da1_dz1
|
|
863
|
+
|
|
864
|
+
grad = self._compute_gradient_W1(0, j)
|
|
865
|
+
|
|
866
|
+
if self.activation_function == self.ACTIVATION_SIGMOID:
|
|
867
|
+
act_deriv_str = f"\\sigma'(z_1) = h_1(1-h_1) = {self.a1[0]:.4f}(1-{self.a1[0]:.4f}) = {da1_dz1:.4f}"
|
|
868
|
+
elif self.activation_function == self.ACTIVATION_RELU:
|
|
869
|
+
act_deriv_str = f"\\text{{ReLU}}'(z_1) = \\mathbb{{1}}(z_1 > 0) = {da1_dz1:.4f}"
|
|
870
|
+
else:
|
|
871
|
+
act_deriv_str = f"1"
|
|
872
|
+
|
|
873
|
+
explanation.add_element(ContentAST.Equation(
|
|
874
|
+
f"\\frac{{\\partial L}}{{\\partial w_{{1{j+1}}}}} = \\frac{{\\partial L}}{{\\partial z_{{out}}}} \\cdot w_{3} \\cdot {act_deriv_str} \\cdot x_{j+1} = {self.dL_dz2:.4f} \\cdot {dz2_da1:.4f} \\cdot {da1_dz1:.4f} \\cdot {self.X[j]:.1f} = {grad:.4f}",
|
|
875
|
+
inline=False
|
|
876
|
+
))
|
|
877
|
+
|
|
878
|
+
return explanation
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
@QuestionRegistry.register()
|
|
882
|
+
class EnsembleAveragingQuestion(Question):
|
|
883
|
+
"""
|
|
884
|
+
Question asking students to combine predictions from multiple models (ensemble).
|
|
885
|
+
|
|
886
|
+
Students calculate:
|
|
887
|
+
- Mean prediction (for regression)
|
|
888
|
+
- Optionally: variance or other statistics
|
|
889
|
+
"""
|
|
890
|
+
|
|
891
|
+
def __init__(self, *args, **kwargs):
|
|
892
|
+
kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
|
|
893
|
+
super().__init__(*args, **kwargs)
|
|
894
|
+
|
|
895
|
+
self.num_models = kwargs.get("num_models", 5)
|
|
896
|
+
self.predictions = None
|
|
897
|
+
|
|
898
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
899
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
900
|
+
|
|
901
|
+
# Generate predictions from multiple models
|
|
902
|
+
# Use a range that makes sense for typical regression problems
|
|
903
|
+
base_value = self.rng.uniform(0, 10)
|
|
904
|
+
self.predictions = [
|
|
905
|
+
base_value + self.rng.uniform(-2, 2)
|
|
906
|
+
for _ in range(self.num_models)
|
|
907
|
+
]
|
|
908
|
+
|
|
909
|
+
# Round to make calculations easier
|
|
910
|
+
self.predictions = [round(p, 1) for p in self.predictions]
|
|
911
|
+
|
|
912
|
+
# Create answers
|
|
913
|
+
self._create_answers()
|
|
914
|
+
|
|
915
|
+
def _create_answers(self):
|
|
916
|
+
"""Create answer fields for ensemble statistics."""
|
|
917
|
+
self.answers = {}
|
|
918
|
+
|
|
919
|
+
# Mean prediction
|
|
920
|
+
mean_pred = np.mean(self.predictions)
|
|
921
|
+
self.answers["mean"] = Answer.float_value("mean", float(mean_pred))
|
|
922
|
+
|
|
923
|
+
# Median (optional, but useful)
|
|
924
|
+
median_pred = np.median(self.predictions)
|
|
925
|
+
self.answers["median"] = Answer.float_value("median", float(median_pred))
|
|
926
|
+
|
|
927
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
928
|
+
body = ContentAST.Section()
|
|
929
|
+
|
|
930
|
+
# Question description
|
|
931
|
+
body.add_element(ContentAST.Paragraph([
|
|
932
|
+
f"You have trained {self.num_models} different regression models on the same dataset. "
|
|
933
|
+
f"For a particular test input, each model produces the following predictions:"
|
|
934
|
+
]))
|
|
935
|
+
|
|
936
|
+
# Show predictions
|
|
937
|
+
pred_list = ", ".join([f"{p:.1f}" for p in self.predictions])
|
|
938
|
+
body.add_element(ContentAST.Paragraph([
|
|
939
|
+
f"Model predictions: {pred_list}"
|
|
940
|
+
]))
|
|
941
|
+
|
|
942
|
+
# Question
|
|
943
|
+
body.add_element(ContentAST.Paragraph([
|
|
944
|
+
"To create an ensemble, calculate the combined prediction using the following methods:"
|
|
945
|
+
]))
|
|
946
|
+
|
|
947
|
+
# Create answer block
|
|
948
|
+
answers = []
|
|
949
|
+
answers.append(
|
|
950
|
+
ContentAST.Answer(
|
|
951
|
+
answer=self.answers["mean"],
|
|
952
|
+
label="Mean (average)"
|
|
953
|
+
)
|
|
954
|
+
)
|
|
955
|
+
answers.append(
|
|
956
|
+
ContentAST.Answer(
|
|
957
|
+
answer=self.answers["median"],
|
|
958
|
+
label="Median"
|
|
959
|
+
)
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
body.add_element(ContentAST.AnswerBlock(answers))
|
|
963
|
+
|
|
964
|
+
return body
|
|
965
|
+
|
|
966
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
967
|
+
explanation = ContentAST.Section()
|
|
968
|
+
|
|
969
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
970
|
+
"Ensemble methods combine predictions from multiple models to create a more robust prediction."
|
|
971
|
+
]))
|
|
972
|
+
|
|
973
|
+
# Mean calculation
|
|
974
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
975
|
+
"**Mean (Bagging approach):**"
|
|
976
|
+
]))
|
|
977
|
+
|
|
978
|
+
pred_sum = " + ".join([f"{p:.1f}" for p in self.predictions])
|
|
979
|
+
mean_val = np.mean(self.predictions)
|
|
980
|
+
|
|
981
|
+
explanation.add_element(ContentAST.Equation(
|
|
982
|
+
f"\\text{{mean}} = \\frac{{{pred_sum}}}{{{self.num_models}}} = \\frac{{{sum(self.predictions):.1f}}}{{{self.num_models}}} = {mean_val:.4f}",
|
|
983
|
+
inline=False
|
|
984
|
+
))
|
|
985
|
+
|
|
986
|
+
# Median calculation
|
|
987
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
988
|
+
"**Median:**"
|
|
989
|
+
]))
|
|
990
|
+
|
|
991
|
+
sorted_preds = sorted(self.predictions)
|
|
992
|
+
sorted_str = ", ".join([f"{p:.1f}" for p in sorted_preds])
|
|
993
|
+
median_val = np.median(self.predictions)
|
|
994
|
+
|
|
995
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
996
|
+
f"Sorted predictions: {sorted_str}"
|
|
997
|
+
]))
|
|
998
|
+
|
|
999
|
+
if self.num_models % 2 == 1:
|
|
1000
|
+
mid_idx = self.num_models // 2
|
|
1001
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1002
|
+
f"Middle value (position {mid_idx + 1}): {median_val:.1f}"
|
|
1003
|
+
]))
|
|
1004
|
+
else:
|
|
1005
|
+
mid_idx1 = self.num_models // 2 - 1
|
|
1006
|
+
mid_idx2 = self.num_models // 2
|
|
1007
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1008
|
+
f"Average of middle two values (positions {mid_idx1 + 1} and {mid_idx2 + 1}): "
|
|
1009
|
+
f"({sorted_preds[mid_idx1]:.1f} + {sorted_preds[mid_idx2]:.1f}) / 2 = {median_val:.1f}"
|
|
1010
|
+
]))
|
|
1011
|
+
|
|
1012
|
+
return explanation
|
|
1013
|
+
|
|
1014
|
+
|
|
1015
|
+
@QuestionRegistry.register()
|
|
1016
|
+
class EndToEndTrainingQuestion(SimpleNeuralNetworkBase):
|
|
1017
|
+
"""
|
|
1018
|
+
End-to-end training step question.
|
|
1019
|
+
|
|
1020
|
+
Students perform a complete training iteration:
|
|
1021
|
+
1. Forward pass → prediction
|
|
1022
|
+
2. Loss calculation (MSE)
|
|
1023
|
+
3. Backpropagation → gradients for specific weights
|
|
1024
|
+
4. Weight update → new weight values
|
|
1025
|
+
"""
|
|
1026
|
+
|
|
1027
|
+
def __init__(self, *args, **kwargs):
|
|
1028
|
+
super().__init__(*args, **kwargs)
|
|
1029
|
+
self.learning_rate = None
|
|
1030
|
+
self.new_W1 = None
|
|
1031
|
+
self.new_W2 = None
|
|
1032
|
+
|
|
1033
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
1034
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
1035
|
+
|
|
1036
|
+
# Generate network
|
|
1037
|
+
self._generate_network()
|
|
1038
|
+
self._select_activation_function()
|
|
1039
|
+
|
|
1040
|
+
# Run forward pass
|
|
1041
|
+
self._forward_pass()
|
|
1042
|
+
|
|
1043
|
+
# Generate binary target (0 or 1)
|
|
1044
|
+
# Choose the opposite of what the network predicts to create meaningful gradients
|
|
1045
|
+
if self.a2[0] > 0.5:
|
|
1046
|
+
self.y_target = 0
|
|
1047
|
+
else:
|
|
1048
|
+
self.y_target = 1
|
|
1049
|
+
self._compute_loss(self.y_target)
|
|
1050
|
+
# Round loss to display precision (4 decimal places)
|
|
1051
|
+
self.loss = round(self.loss, 4)
|
|
1052
|
+
self._compute_output_gradient()
|
|
1053
|
+
|
|
1054
|
+
# Set learning rate (use small value for stability)
|
|
1055
|
+
self.learning_rate = round(self.rng.uniform(0.05, 0.2), 2)
|
|
1056
|
+
|
|
1057
|
+
# Compute updated weights
|
|
1058
|
+
self._compute_weight_updates()
|
|
1059
|
+
|
|
1060
|
+
# Create answers
|
|
1061
|
+
self._create_answers()
|
|
1062
|
+
|
|
1063
|
+
def _compute_weight_updates(self):
|
|
1064
|
+
"""Compute new weights after gradient descent step."""
|
|
1065
|
+
# Update W2
|
|
1066
|
+
self.new_W2 = np.copy(self.W2)
|
|
1067
|
+
for i in range(self.num_hidden):
|
|
1068
|
+
grad = self._compute_gradient_W2(i)
|
|
1069
|
+
self.new_W2[0, i] = self.W2[0, i] - self.learning_rate * grad
|
|
1070
|
+
|
|
1071
|
+
# Update W1 (first hidden neuron only for simplicity)
|
|
1072
|
+
self.new_W1 = np.copy(self.W1)
|
|
1073
|
+
for j in range(self.num_inputs):
|
|
1074
|
+
grad = self._compute_gradient_W1(0, j)
|
|
1075
|
+
self.new_W1[0, j] = self.W1[0, j] - self.learning_rate * grad
|
|
1076
|
+
|
|
1077
|
+
def _create_answers(self):
|
|
1078
|
+
"""Create answer fields for all steps."""
|
|
1079
|
+
self.answers = {}
|
|
1080
|
+
|
|
1081
|
+
# Forward pass answers
|
|
1082
|
+
self.answers["y_pred"] = Answer.float_value("y_pred", float(self.a2[0]))
|
|
1083
|
+
|
|
1084
|
+
# Loss answer
|
|
1085
|
+
self.answers["loss"] = Answer.float_value("loss", float(self.loss))
|
|
1086
|
+
|
|
1087
|
+
# Gradient answers (for key weights)
|
|
1088
|
+
self.answers["grad_w3"] = Answer.auto_float("grad_w3", self._compute_gradient_W2(0))
|
|
1089
|
+
self.answers["grad_w11"] = Answer.auto_float("grad_w11", self._compute_gradient_W1(0, 0))
|
|
1090
|
+
|
|
1091
|
+
# Updated weight answers
|
|
1092
|
+
self.answers["new_w3"] = Answer.float_value("new_w3", float(self.new_W2[0, 0]))
|
|
1093
|
+
self.answers["new_w11"] = Answer.float_value("new_w11", float(self.new_W1[0, 0]))
|
|
1094
|
+
|
|
1095
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
1096
|
+
body = ContentAST.Section()
|
|
1097
|
+
|
|
1098
|
+
# Question description
|
|
1099
|
+
body.add_element(ContentAST.Paragraph([
|
|
1100
|
+
f"Given the neural network below with {self._get_activation_name()} activation "
|
|
1101
|
+
f"in the hidden layer and sigmoid activation in the output layer (for binary classification), "
|
|
1102
|
+
f"perform one complete training step (forward pass, loss calculation, "
|
|
1103
|
+
f"backpropagation, and weight update) for the given input and target."
|
|
1104
|
+
]))
|
|
1105
|
+
|
|
1106
|
+
# Network diagram
|
|
1107
|
+
body.add_element(
|
|
1108
|
+
ContentAST.Picture(
|
|
1109
|
+
img_data=self._generate_network_diagram(show_weights=True, show_activations=False)
|
|
1110
|
+
)
|
|
1111
|
+
)
|
|
1112
|
+
|
|
1113
|
+
# Training parameters
|
|
1114
|
+
body.add_element(ContentAST.Paragraph([
|
|
1115
|
+
"**Training parameters:**"
|
|
1116
|
+
]))
|
|
1117
|
+
|
|
1118
|
+
body.add_element(ContentAST.Paragraph([
|
|
1119
|
+
"Input: ",
|
|
1120
|
+
ContentAST.Equation(f"x_1 = {self.X[0]:.1f}", inline=True),
|
|
1121
|
+
", ",
|
|
1122
|
+
ContentAST.Equation(f"x_2 = {self.X[1]:.1f}", inline=True)
|
|
1123
|
+
]))
|
|
1124
|
+
|
|
1125
|
+
body.add_element(ContentAST.Paragraph([
|
|
1126
|
+
"Target: ",
|
|
1127
|
+
ContentAST.Equation(f"y = {int(self.y_target)}", inline=True)
|
|
1128
|
+
]))
|
|
1129
|
+
|
|
1130
|
+
body.add_element(ContentAST.Paragraph([
|
|
1131
|
+
"Learning rate: ",
|
|
1132
|
+
ContentAST.Equation(f"\\alpha = {self.learning_rate}", inline=True)
|
|
1133
|
+
]))
|
|
1134
|
+
|
|
1135
|
+
body.add_element(ContentAST.Paragraph([
|
|
1136
|
+
f"**Hidden layer activation:** {self._get_activation_name()}"
|
|
1137
|
+
]))
|
|
1138
|
+
|
|
1139
|
+
# Network parameters table
|
|
1140
|
+
body.add_element(self._generate_parameter_table(include_activations=False))
|
|
1141
|
+
|
|
1142
|
+
# Create answer block
|
|
1143
|
+
answers = []
|
|
1144
|
+
|
|
1145
|
+
answers.append(
|
|
1146
|
+
ContentAST.Answer(
|
|
1147
|
+
answer=self.answers["y_pred"],
|
|
1148
|
+
label="1. Forward Pass - Network output ŷ"
|
|
1149
|
+
)
|
|
1150
|
+
)
|
|
1151
|
+
|
|
1152
|
+
answers.append(
|
|
1153
|
+
ContentAST.Answer(
|
|
1154
|
+
answer=self.answers["loss"],
|
|
1155
|
+
label="2. Loss"
|
|
1156
|
+
)
|
|
1157
|
+
)
|
|
1158
|
+
|
|
1159
|
+
answers.append(
|
|
1160
|
+
ContentAST.Answer(
|
|
1161
|
+
answer=self.answers["grad_w3"],
|
|
1162
|
+
label="3. Gradient ∂L/∂w₃"
|
|
1163
|
+
)
|
|
1164
|
+
)
|
|
1165
|
+
|
|
1166
|
+
answers.append(
|
|
1167
|
+
ContentAST.Answer(
|
|
1168
|
+
answer=self.answers["grad_w11"],
|
|
1169
|
+
label="4. Gradient ∂L/∂w₁₁"
|
|
1170
|
+
)
|
|
1171
|
+
)
|
|
1172
|
+
|
|
1173
|
+
answers.append(
|
|
1174
|
+
ContentAST.Answer(
|
|
1175
|
+
answer=self.answers["new_w3"],
|
|
1176
|
+
label="5. Updated w₃:"
|
|
1177
|
+
)
|
|
1178
|
+
)
|
|
1179
|
+
|
|
1180
|
+
answers.append(
|
|
1181
|
+
ContentAST.Answer(
|
|
1182
|
+
answer=self.answers["new_w11"],
|
|
1183
|
+
label="6. Updated w₁₁:"
|
|
1184
|
+
)
|
|
1185
|
+
)
|
|
1186
|
+
|
|
1187
|
+
body.add_element(ContentAST.AnswerBlock(answers))
|
|
1188
|
+
|
|
1189
|
+
return body
|
|
1190
|
+
|
|
1191
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
1192
|
+
explanation = ContentAST.Section()
|
|
1193
|
+
|
|
1194
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1195
|
+
"This problem requires performing one complete training iteration. Let's go through each step."
|
|
1196
|
+
]))
|
|
1197
|
+
|
|
1198
|
+
# Step 1: Forward pass
|
|
1199
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1200
|
+
"**Step 1: Forward Pass**"
|
|
1201
|
+
]))
|
|
1202
|
+
|
|
1203
|
+
# Hidden layer
|
|
1204
|
+
z1_0 = self.W1[0, 0] * self.X[0] + self.W1[0, 1] * self.X[1] + self.b1[0]
|
|
1205
|
+
explanation.add_element(ContentAST.Equation(
|
|
1206
|
+
f"z_1 = w_{{11}} x_1 + w_{{12}} x_2 + b_1 = {self.W1[0,0]:.{self.param_digits}f} \\cdot {self.X[0]:.1f} + {self.W1[0,1]:.{self.param_digits}f} \\cdot {self.X[1]:.1f} + {self.b1[0]:.{self.param_digits}f} = {self.z1[0]:.4f}",
|
|
1207
|
+
inline=False
|
|
1208
|
+
))
|
|
1209
|
+
|
|
1210
|
+
explanation.add_element(ContentAST.Equation(
|
|
1211
|
+
f"h_1 = {self._get_activation_name()}(z_1) = {self.a1[0]:.4f}",
|
|
1212
|
+
inline=False
|
|
1213
|
+
))
|
|
1214
|
+
|
|
1215
|
+
# Similarly for h2 (abbreviated)
|
|
1216
|
+
explanation.add_element(ContentAST.Equation(
|
|
1217
|
+
f"h_2 = {self.a1[1]:.4f} \\text{{ (calculated similarly)}}",
|
|
1218
|
+
inline=False
|
|
1219
|
+
))
|
|
1220
|
+
|
|
1221
|
+
# Output (pre-activation)
|
|
1222
|
+
z2 = self.W2[0, 0] * self.a1[0] + self.W2[0, 1] * self.a1[1] + self.b2[0]
|
|
1223
|
+
explanation.add_element(ContentAST.Equation(
|
|
1224
|
+
f"z_{{out}} = w_3 h_1 + w_4 h_2 + b_2 = {self.W2[0,0]:.{self.param_digits}f} \\cdot {self.a1[0]:.4f} + {self.W2[0,1]:.{self.param_digits}f} \\cdot {self.a1[1]:.4f} + {self.b2[0]:.{self.param_digits}f} = {self.z2[0]:.4f}",
|
|
1225
|
+
inline=False
|
|
1226
|
+
))
|
|
1227
|
+
|
|
1228
|
+
# Output (sigmoid activation)
|
|
1229
|
+
explanation.add_element(ContentAST.Equation(
|
|
1230
|
+
f"\\hat{{y}} = \\sigma(z_{{out}}) = \\frac{{1}}{{1 + e^{{-{self.z2[0]:.4f}}}}} = {self.a2[0]:.4f}",
|
|
1231
|
+
inline=False
|
|
1232
|
+
))
|
|
1233
|
+
|
|
1234
|
+
# Step 2: Loss
|
|
1235
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1236
|
+
"**Step 2: Calculate Loss (Binary Cross-Entropy)**"
|
|
1237
|
+
]))
|
|
1238
|
+
|
|
1239
|
+
# Show the full BCE formula first
|
|
1240
|
+
explanation.add_element(ContentAST.Equation(
|
|
1241
|
+
f"L = -[y \\log(\\hat{{y}}) + (1-y) \\log(1-\\hat{{y}})]",
|
|
1242
|
+
inline=False
|
|
1243
|
+
))
|
|
1244
|
+
|
|
1245
|
+
# Then evaluate it
|
|
1246
|
+
if self.y_target == 1:
|
|
1247
|
+
explanation.add_element(ContentAST.Equation(
|
|
1248
|
+
f"L = -[1 \\cdot \\log({self.a2[0]:.4f}) + 0 \\cdot \\log(1-{self.a2[0]:.4f})] = -\\log({self.a2[0]:.4f}) = {self.loss:.4f}",
|
|
1249
|
+
inline=False
|
|
1250
|
+
))
|
|
1251
|
+
else:
|
|
1252
|
+
explanation.add_element(ContentAST.Equation(
|
|
1253
|
+
f"L = -[0 \\cdot \\log({self.a2[0]:.4f}) + 1 \\cdot \\log(1-{self.a2[0]:.4f})] = -\\log({1-self.a2[0]:.4f}) = {self.loss:.4f}",
|
|
1254
|
+
inline=False
|
|
1255
|
+
))
|
|
1256
|
+
|
|
1257
|
+
# Step 3: Gradients
|
|
1258
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1259
|
+
"**Step 3: Compute Gradients**"
|
|
1260
|
+
]))
|
|
1261
|
+
|
|
1262
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1263
|
+
"For BCE with sigmoid, the output layer gradient simplifies to:"
|
|
1264
|
+
]))
|
|
1265
|
+
|
|
1266
|
+
explanation.add_element(ContentAST.Equation(
|
|
1267
|
+
f"\\frac{{\\partial L}}{{\\partial z_{{out}}}} = \\hat{{y}} - y = {self.a2[0]:.4f} - {int(self.y_target)} = {self.dL_dz2:.4f}",
|
|
1268
|
+
inline=False
|
|
1269
|
+
))
|
|
1270
|
+
|
|
1271
|
+
grad_w3 = self._compute_gradient_W2(0)
|
|
1272
|
+
explanation.add_element(ContentAST.Equation(
|
|
1273
|
+
f"\\frac{{\\partial L}}{{\\partial w_3}} = \\frac{{\\partial L}}{{\\partial z_{{out}}}} \\cdot h_1 = {self.dL_dz2:.4f} \\cdot {self.a1[0]:.4f} = {grad_w3:.4f}",
|
|
1274
|
+
inline=False
|
|
1275
|
+
))
|
|
1276
|
+
|
|
1277
|
+
grad_w11 = self._compute_gradient_W1(0, 0)
|
|
1278
|
+
dz2_da1 = self.W2[0, 0]
|
|
1279
|
+
da1_dz1 = self._activation_derivative(self.z1[0])
|
|
1280
|
+
|
|
1281
|
+
if self.activation_function == self.ACTIVATION_SIGMOID:
|
|
1282
|
+
act_deriv_str = f"h_1(1-h_1)"
|
|
1283
|
+
elif self.activation_function == self.ACTIVATION_RELU:
|
|
1284
|
+
act_deriv_str = f"\\text{{ReLU}}'(z_1)"
|
|
1285
|
+
else:
|
|
1286
|
+
act_deriv_str = f"1"
|
|
1287
|
+
|
|
1288
|
+
explanation.add_element(ContentAST.Equation(
|
|
1289
|
+
f"\\frac{{\\partial L}}{{\\partial w_{{11}}}} = \\frac{{\\partial L}}{{\\partial z_{{out}}}} \\cdot w_3 \\cdot {act_deriv_str} \\cdot x_1 = {self.dL_dz2:.4f} \\cdot {dz2_da1:.4f} \\cdot {da1_dz1:.4f} \\cdot {self.X[0]:.1f} = {grad_w11:.4f}",
|
|
1290
|
+
inline=False
|
|
1291
|
+
))
|
|
1292
|
+
|
|
1293
|
+
# Step 4: Weight updates
|
|
1294
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1295
|
+
"**Step 4: Update Weights**"
|
|
1296
|
+
]))
|
|
1297
|
+
|
|
1298
|
+
new_w3 = self.new_W2[0, 0]
|
|
1299
|
+
explanation.add_element(ContentAST.Equation(
|
|
1300
|
+
f"w_3^{{new}} = w_3 - \\alpha \\frac{{\\partial L}}{{\\partial w_3}} = {self.W2[0,0]:.{self.param_digits}f} - {self.learning_rate} \\cdot {grad_w3:.4f} = {new_w3:.4f}",
|
|
1301
|
+
inline=False
|
|
1302
|
+
))
|
|
1303
|
+
|
|
1304
|
+
new_w11 = self.new_W1[0, 0]
|
|
1305
|
+
explanation.add_element(ContentAST.Equation(
|
|
1306
|
+
f"w_{{11}}^{{new}} = w_{{11}} - \\alpha \\frac{{\\partial L}}{{\\partial w_{{11}}}} = {self.W1[0,0]:.{self.param_digits}f} - {self.learning_rate} \\cdot {grad_w11:.4f} = {new_w11:.4f}",
|
|
1307
|
+
inline=False
|
|
1308
|
+
))
|
|
1309
|
+
|
|
1310
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
1311
|
+
"These updated weights would be used in the next training iteration."
|
|
1312
|
+
]))
|
|
1313
|
+
|
|
1314
|
+
return explanation
|