QuizGenerator 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. QuizGenerator/README.md +5 -0
  2. QuizGenerator/__init__.py +27 -0
  3. QuizGenerator/__main__.py +7 -0
  4. QuizGenerator/canvas/__init__.py +13 -0
  5. QuizGenerator/canvas/canvas_interface.py +622 -0
  6. QuizGenerator/canvas/classes.py +235 -0
  7. QuizGenerator/constants.py +149 -0
  8. QuizGenerator/contentast.py +1809 -0
  9. QuizGenerator/generate.py +362 -0
  10. QuizGenerator/logging.yaml +55 -0
  11. QuizGenerator/misc.py +480 -0
  12. QuizGenerator/mixins.py +539 -0
  13. QuizGenerator/performance.py +202 -0
  14. QuizGenerator/premade_questions/__init__.py +0 -0
  15. QuizGenerator/premade_questions/basic.py +103 -0
  16. QuizGenerator/premade_questions/cst334/__init__.py +1 -0
  17. QuizGenerator/premade_questions/cst334/languages.py +395 -0
  18. QuizGenerator/premade_questions/cst334/math_questions.py +297 -0
  19. QuizGenerator/premade_questions/cst334/memory_questions.py +1398 -0
  20. QuizGenerator/premade_questions/cst334/ostep13_vsfs.py +572 -0
  21. QuizGenerator/premade_questions/cst334/persistence_questions.py +396 -0
  22. QuizGenerator/premade_questions/cst334/process.py +649 -0
  23. QuizGenerator/premade_questions/cst463/__init__.py +0 -0
  24. QuizGenerator/premade_questions/cst463/gradient_descent/__init__.py +3 -0
  25. QuizGenerator/premade_questions/cst463/gradient_descent/gradient_calculation.py +369 -0
  26. QuizGenerator/premade_questions/cst463/gradient_descent/gradient_descent_questions.py +305 -0
  27. QuizGenerator/premade_questions/cst463/gradient_descent/loss_calculations.py +650 -0
  28. QuizGenerator/premade_questions/cst463/gradient_descent/misc.py +73 -0
  29. QuizGenerator/premade_questions/cst463/math_and_data/__init__.py +2 -0
  30. QuizGenerator/premade_questions/cst463/math_and_data/matrix_questions.py +631 -0
  31. QuizGenerator/premade_questions/cst463/math_and_data/vector_questions.py +534 -0
  32. QuizGenerator/premade_questions/cst463/neural-network-basics/__init__.py +6 -0
  33. QuizGenerator/premade_questions/cst463/neural-network-basics/neural_network_questions.py +1264 -0
  34. QuizGenerator/premade_questions/cst463/tensorflow-intro/__init__.py +6 -0
  35. QuizGenerator/premade_questions/cst463/tensorflow-intro/tensorflow_questions.py +936 -0
  36. QuizGenerator/qrcode_generator.py +293 -0
  37. QuizGenerator/question.py +657 -0
  38. QuizGenerator/quiz.py +468 -0
  39. QuizGenerator/typst_utils.py +113 -0
  40. quizgenerator-0.1.0.dist-info/METADATA +263 -0
  41. quizgenerator-0.1.0.dist-info/RECORD +44 -0
  42. quizgenerator-0.1.0.dist-info/WHEEL +4 -0
  43. quizgenerator-0.1.0.dist-info/entry_points.txt +2 -0
  44. quizgenerator-0.1.0.dist-info/licenses/LICENSE +674 -0
@@ -0,0 +1,1264 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import io
5
+ import logging
6
+ import math
7
+ import numpy as np
8
+ import uuid
9
+ import os
10
+ from typing import List, Tuple, Dict, Any
11
+
12
+ import matplotlib.pyplot as plt
13
+ import matplotlib.patches as mpatches
14
+
15
+ from QuizGenerator.contentast import ContentAST
16
+ from QuizGenerator.question import Question, Answer, QuestionRegistry
17
+ from QuizGenerator.mixins import TableQuestionMixin, BodyTemplatesMixin
18
+
19
+ log = logging.getLogger(__name__)
20
+
21
+
22
+ class SimpleNeuralNetworkBase(Question, abc.ABC):
23
+ """
24
+ Base class for simple neural network questions.
25
+
26
+ Generates a small feedforward network:
27
+ - 2-3 input neurons
28
+ - 2 hidden neurons (single hidden layer)
29
+ - 1 output neuron
30
+ - Random weights and biases
31
+ - Runs forward pass and stores all activations
32
+ """
33
+
34
+ # Activation function types
35
+ ACTIVATION_SIGMOID = "sigmoid"
36
+ ACTIVATION_RELU = "relu"
37
+ ACTIVATION_LINEAR = "linear"
38
+
39
+ def __init__(self, *args, **kwargs):
40
+ kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
41
+ super().__init__(*args, **kwargs)
42
+
43
+ # Network architecture parameters
44
+ self.num_inputs = kwargs.get("num_inputs", 2)
45
+ self.num_hidden = kwargs.get("num_hidden", 2)
46
+ self.num_outputs = kwargs.get("num_outputs", 1)
47
+
48
+ # Configuration
49
+ self.activation_function = None
50
+ self.use_bias = kwargs.get("use_bias", True)
51
+
52
+ # Network parameters (weights and biases)
53
+ self.W1 = None # Input to hidden weights (num_hidden x num_inputs)
54
+ self.b1 = None # Hidden layer biases (num_hidden,)
55
+ self.W2 = None # Hidden to output weights (num_outputs x num_hidden)
56
+ self.b2 = None # Output layer biases (num_outputs,)
57
+
58
+ # Input data and forward pass results
59
+ self.X = None # Input values (num_inputs,)
60
+ self.z1 = None # Hidden layer pre-activation (num_hidden,)
61
+ self.a1 = None # Hidden layer activations (num_hidden,)
62
+ self.z2 = None # Output layer pre-activation (num_outputs,)
63
+ self.a2 = None # Output layer activation (prediction)
64
+
65
+ # Target and loss (for backprop questions)
66
+ self.y_target = None
67
+ self.loss = None
68
+
69
+ # Gradients (for backprop questions)
70
+ self.dL_da2 = None # Gradient of loss w.r.t. output
71
+ self.da2_dz2 = None # Gradient of activation w.r.t. pre-activation
72
+ self.dL_dz2 = None # Gradient of loss w.r.t. output pre-activation
73
+
74
+ def _generate_network(self, weight_range=(-2, 2), input_range=(-3, 3)):
75
+ """Generate random network parameters and input."""
76
+ # Generate weights (using small values for numerical stability)
77
+ self.W1 = np.array([
78
+ [self.rng.uniform(weight_range[0], weight_range[1])
79
+ for _ in range(self.num_inputs)]
80
+ for _ in range(self.num_hidden)
81
+ ])
82
+
83
+ self.W2 = np.array([
84
+ [self.rng.uniform(weight_range[0], weight_range[1])
85
+ for _ in range(self.num_hidden)]
86
+ for _ in range(self.num_outputs)
87
+ ])
88
+
89
+ # Generate biases
90
+ if self.use_bias:
91
+ self.b1 = np.array([
92
+ self.rng.uniform(weight_range[0], weight_range[1])
93
+ for _ in range(self.num_hidden)
94
+ ])
95
+ self.b2 = np.array([
96
+ self.rng.uniform(weight_range[0], weight_range[1])
97
+ for _ in range(self.num_outputs)
98
+ ])
99
+ else:
100
+ self.b1 = np.zeros(self.num_hidden)
101
+ self.b2 = np.zeros(self.num_outputs)
102
+
103
+ # Round weights to make calculations cleaner
104
+ self.W1 = np.round(self.W1 * 2) / 2 # Round to nearest 0.5
105
+ self.W2 = np.round(self.W2 * 2) / 2
106
+ self.b1 = np.round(self.b1 * 2) / 2
107
+ self.b2 = np.round(self.b2 * 2) / 2
108
+
109
+ # Generate input values
110
+ self.X = np.array([
111
+ self.rng.uniform(input_range[0], input_range[1])
112
+ for _ in range(self.num_inputs)
113
+ ])
114
+ self.X = np.round(self.X) # Use integer inputs for simplicity
115
+
116
+ def _select_activation_function(self):
117
+ """Randomly select an activation function."""
118
+ activations = [
119
+ self.ACTIVATION_SIGMOID,
120
+ self.ACTIVATION_RELU
121
+ ]
122
+ self.activation_function = self.rng.choice(activations)
123
+
124
+ def _apply_activation(self, z, function_type=None):
125
+ """Apply activation function to pre-activation values."""
126
+ if function_type is None:
127
+ function_type = self.activation_function
128
+
129
+ if function_type == self.ACTIVATION_SIGMOID:
130
+ return 1 / (1 + np.exp(-z))
131
+ elif function_type == self.ACTIVATION_RELU:
132
+ return np.maximum(0, z)
133
+ elif function_type == self.ACTIVATION_LINEAR:
134
+ return z
135
+ else:
136
+ raise ValueError(f"Unknown activation function: {function_type}")
137
+
138
+ def _activation_derivative(self, z, function_type=None):
139
+ """Compute derivative of activation function."""
140
+ if function_type is None:
141
+ function_type = self.activation_function
142
+
143
+ if function_type == self.ACTIVATION_SIGMOID:
144
+ a = self._apply_activation(z, function_type)
145
+ return a * (1 - a)
146
+ elif function_type == self.ACTIVATION_RELU:
147
+ return np.where(z > 0, 1, 0)
148
+ elif function_type == self.ACTIVATION_LINEAR:
149
+ return np.ones_like(z)
150
+ else:
151
+ raise ValueError(f"Unknown activation function: {function_type}")
152
+
153
+ def _forward_pass(self):
154
+ """Run forward pass through the network."""
155
+ # Hidden layer
156
+ self.z1 = self.W1 @ self.X + self.b1
157
+ self.a1 = self._apply_activation(self.z1)
158
+
159
+ # Output layer
160
+ self.z2 = self.W2 @ self.a1 + self.b2
161
+ self.a2 = self._apply_activation(self.z2, self.ACTIVATION_LINEAR) # Linear output
162
+
163
+ # Round all computed values to display precision to ensure students can reproduce calculations
164
+ # We display z and a values with 4 decimal places
165
+ self.z1 = np.round(self.z1, 4)
166
+ self.a1 = np.round(self.a1, 4)
167
+ self.z2 = np.round(self.z2, 4)
168
+ self.a2 = np.round(self.a2, 4)
169
+
170
+ return self.a2
171
+
172
+ def _compute_loss(self, y_target):
173
+ """Compute MSE loss."""
174
+ self.y_target = y_target
175
+ self.loss = 0.5 * (y_target - self.a2[0]) ** 2
176
+ return self.loss
177
+
178
+ def _compute_output_gradient(self):
179
+ """Compute gradient of loss w.r.t. output."""
180
+ # For MSE loss: dL/da2 = -(y - a2)
181
+ self.dL_da2 = -(self.y_target - self.a2[0])
182
+
183
+ # For linear output activation: da2/dz2 = 1
184
+ self.da2_dz2 = 1.0
185
+
186
+ # Chain rule: dL/dz2 = dL/da2 * da2/dz2
187
+ self.dL_dz2 = self.dL_da2 * self.da2_dz2
188
+
189
+ return self.dL_dz2
190
+
191
+ def _compute_gradient_W2(self, hidden_idx):
192
+ """Compute gradient ∂L/∂W2[0, hidden_idx]."""
193
+ # ∂L/∂w = dL/dz2 * ∂z2/∂w = dL/dz2 * a1[hidden_idx]
194
+ return float(self.dL_dz2 * self.a1[hidden_idx])
195
+
196
+ def _compute_gradient_W1(self, hidden_idx, input_idx):
197
+ """Compute gradient ∂L/∂W1[hidden_idx, input_idx]."""
198
+ # dL/dz1[hidden_idx] = dL/dz2 * ∂z2/∂a1[hidden_idx] * ∂a1/∂z1[hidden_idx]
199
+ # = dL/dz2 * W2[0, hidden_idx] * activation'(z1[hidden_idx])
200
+
201
+ dz2_da1 = self.W2[0, hidden_idx]
202
+ da1_dz1 = self._activation_derivative(self.z1[hidden_idx])
203
+
204
+ dL_dz1 = self.dL_dz2 * dz2_da1 * da1_dz1
205
+
206
+ # ∂L/∂w = dL/dz1 * ∂z1/∂w = dL/dz1 * X[input_idx]
207
+ return float(dL_dz1 * self.X[input_idx])
208
+
209
+ def _get_activation_name(self):
210
+ """Get human-readable activation function name."""
211
+ if self.activation_function == self.ACTIVATION_SIGMOID:
212
+ return "sigmoid"
213
+ elif self.activation_function == self.ACTIVATION_RELU:
214
+ return "ReLU"
215
+ elif self.activation_function == self.ACTIVATION_LINEAR:
216
+ return "linear"
217
+ return "unknown"
218
+
219
+ def _get_activation_formula(self):
220
+ """Get LaTeX formula for activation function."""
221
+ if self.activation_function == self.ACTIVATION_SIGMOID:
222
+ return r"\sigma(z) = \frac{1}{1 + e^{-z}}"
223
+ elif self.activation_function == self.ACTIVATION_RELU:
224
+ return r"\text{ReLU}(z) = \max(0, z)"
225
+ elif self.activation_function == self.ACTIVATION_LINEAR:
226
+ return r"f(z) = z"
227
+ return ""
228
+
229
+ def _generate_parameter_table(self, include_activations=False, include_training_context=False):
230
+ """
231
+ Generate side-by-side tables showing all network parameters.
232
+
233
+ Args:
234
+ include_activations: If True, include computed activation values
235
+ include_training_context: If True, include target, loss, etc. (for backprop questions)
236
+
237
+ Returns:
238
+ ContentAST.TableGroup with network parameters in two side-by-side tables
239
+ """
240
+ # Left table: Inputs & Weights
241
+ left_data = []
242
+ left_data.append(["Symbol", "Value"])
243
+
244
+ # Input values
245
+ for i in range(self.num_inputs):
246
+ left_data.append([
247
+ ContentAST.Equation(f"x_{i+1}", inline=True),
248
+ f"{self.X[i]:.1f}"
249
+ ])
250
+
251
+ # Weights from input to hidden
252
+ for j in range(self.num_hidden):
253
+ for i in range(self.num_inputs):
254
+ left_data.append([
255
+ ContentAST.Equation(f"w_{{{j+1}{i+1}}}", inline=True),
256
+ f"{self.W1[j, i]:.1f}"
257
+ ])
258
+
259
+ # Weights from hidden to output
260
+ for i in range(self.num_hidden):
261
+ left_data.append([
262
+ ContentAST.Equation(f"w_{i+3}", inline=True),
263
+ f"{self.W2[0, i]:.1f}"
264
+ ])
265
+
266
+ # Right table: Biases, Activations, Training context
267
+ right_data = []
268
+ right_data.append(["Symbol", "Value"])
269
+
270
+ # Hidden layer biases
271
+ if self.use_bias:
272
+ for j in range(self.num_hidden):
273
+ right_data.append([
274
+ ContentAST.Equation(f"b_{j+1}", inline=True),
275
+ f"{self.b1[j]:.1f}"
276
+ ])
277
+
278
+ # Output bias
279
+ if self.use_bias:
280
+ right_data.append([
281
+ ContentAST.Equation(r"b_{out}", inline=True),
282
+ f"{self.b2[0]:.1f}"
283
+ ])
284
+
285
+ # Hidden layer activations (if computed and requested)
286
+ if include_activations and self.a1 is not None:
287
+ for i in range(self.num_hidden):
288
+ right_data.append([
289
+ ContentAST.Equation(f"h_{i+1}", inline=True),
290
+ f"{self.a1[i]:.4f}"
291
+ ])
292
+
293
+ # Output activation (if computed and requested)
294
+ if include_activations and self.a2 is not None:
295
+ right_data.append([
296
+ ContentAST.Equation(r"\hat{y}", inline=True),
297
+ f"{self.a2[0]:.4f}"
298
+ ])
299
+
300
+ # Training context (target, loss - for backprop questions)
301
+ if include_training_context:
302
+ if self.y_target is not None:
303
+ right_data.append([
304
+ ContentAST.Equation("y", inline=True),
305
+ f"{self.y_target:.2f}"
306
+ ])
307
+
308
+ if self.loss is not None:
309
+ right_data.append([
310
+ ContentAST.Equation("L", inline=True),
311
+ f"{self.loss:.4f}"
312
+ ])
313
+
314
+ # Create table group
315
+ table_group = ContentAST.TableGroup()
316
+ table_group.add_table(ContentAST.Table(data=left_data))
317
+ table_group.add_table(ContentAST.Table(data=right_data))
318
+
319
+ return table_group
320
+
321
+ def _generate_network_diagram(self, show_weights=True, show_activations=False):
322
+ """
323
+ Generate a simple, clean network diagram.
324
+
325
+ Args:
326
+ show_weights: If True, display weights on edges
327
+ show_activations: If True, display activation values on nodes
328
+
329
+ Returns:
330
+ BytesIO buffer containing PNG image
331
+ """
332
+ # Create figure with tight layout and equal aspect ratio
333
+ fig = plt.figure(figsize=(8, 2.5))
334
+ ax = fig.add_subplot(111)
335
+ ax.set_aspect('equal', adjustable='box') # Keep circles circular
336
+ ax.axis('off')
337
+
338
+ # Node radius
339
+ r = 0.15
340
+
341
+ # Layer x-positions
342
+ input_x = 0.5
343
+ hidden_x = 2.0
344
+ output_x = 3.5
345
+
346
+ # Calculate y-positions for nodes (top to bottom order)
347
+ def get_y_positions(n, include_bias=False):
348
+ # If including bias, need one more position at the top
349
+ total_nodes = n + 1 if include_bias else n
350
+ if total_nodes == 1:
351
+ return [1.0]
352
+ spacing = min(2.0 / (total_nodes - 1), 0.6)
353
+ # Start from top
354
+ start = 1.0 + (total_nodes - 1) * spacing / 2
355
+ positions = [start - i * spacing for i in range(total_nodes)]
356
+ return positions
357
+
358
+ # Input layer: bias (if present) at top, then x_1, x_2, ... going down
359
+ input_positions = get_y_positions(self.num_inputs, include_bias=self.use_bias)
360
+ if self.use_bias:
361
+ bias1_y = input_positions[0]
362
+ input_y = input_positions[1:] # x_1 is second (below bias), x_2 is third, etc.
363
+ else:
364
+ bias1_y = None
365
+ input_y = input_positions
366
+
367
+ # Hidden layer: bias (if present) at top, then h_1, h_2, ... going down
368
+ hidden_positions = get_y_positions(self.num_hidden, include_bias=self.use_bias)
369
+ if self.use_bias:
370
+ bias2_y = hidden_positions[0]
371
+ hidden_y = hidden_positions[1:]
372
+ else:
373
+ bias2_y = None
374
+ hidden_y = hidden_positions
375
+
376
+ # Output layer: centered
377
+ output_y = [1.0]
378
+
379
+ # Draw edges first (so they're behind nodes)
380
+ # Input to hidden
381
+ for i in range(self.num_inputs):
382
+ for j in range(self.num_hidden):
383
+ ax.plot([input_x, hidden_x], [input_y[i], hidden_y[j]],
384
+ 'k-', linewidth=1, alpha=0.7, zorder=1)
385
+ if show_weights:
386
+ label_x = input_x + 0.3
387
+ label_y = input_y[i] + (hidden_y[j] - input_y[i]) * 0.2
388
+ # Use LaTeX math mode for proper subscript rendering
389
+ weight_label = f'$w_{{{j+1}{i+1}}}$'
390
+ ax.text(label_x, label_y, weight_label, fontsize=8,
391
+ bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
392
+
393
+ # Bias to hidden
394
+ if self.use_bias:
395
+ for j in range(self.num_hidden):
396
+ ax.plot([input_x, hidden_x], [bias1_y, hidden_y[j]],
397
+ 'k-', linewidth=1, alpha=0.7, zorder=1)
398
+ if show_weights:
399
+ label_x = input_x + 0.3
400
+ label_y = bias1_y + (hidden_y[j] - bias1_y) * 0.2
401
+ bias_label = f'$b_{{{j+1}}}$'
402
+ ax.text(label_x, label_y, bias_label, fontsize=8,
403
+ bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
404
+
405
+ # Hidden to output
406
+ for i in range(self.num_hidden):
407
+ ax.plot([hidden_x, output_x], [hidden_y[i], output_y[0]],
408
+ 'k-', linewidth=1, alpha=0.7, zorder=1)
409
+ if show_weights:
410
+ label_x = hidden_x + 0.3
411
+ label_y = hidden_y[i] + (output_y[0] - hidden_y[i]) * 0.2
412
+ weight_label = f'$w_{{{i+3}}}$'
413
+ ax.text(label_x, label_y, weight_label, fontsize=8,
414
+ bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
415
+
416
+ # Bias to output
417
+ if self.use_bias:
418
+ ax.plot([hidden_x, output_x], [bias2_y, output_y[0]],
419
+ 'k-', linewidth=1, alpha=0.7, zorder=1)
420
+ if show_weights:
421
+ label_x = hidden_x + 0.3
422
+ label_y = bias2_y + (output_y[0] - bias2_y) * 0.2
423
+ bias_label = r'$b_{out}$'
424
+ ax.text(label_x, label_y, bias_label, fontsize=8,
425
+ bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
426
+
427
+ # Draw nodes
428
+ # Input nodes
429
+ for i, y in enumerate(input_y):
430
+ circle = plt.Circle((input_x, y), r, facecolor='lightgray',
431
+ edgecolor='black', linewidth=1.5, zorder=10)
432
+ ax.add_patch(circle)
433
+ label = f'$x_{{{i+1}}}$' if not show_activations else f'$x_{{{i+1}}}$={self.X[i]:.1f}'
434
+ ax.text(input_x - r - 0.15, y, label, fontsize=10, ha='right', va='center')
435
+
436
+ # Bias nodes
437
+ if self.use_bias:
438
+ circle = plt.Circle((input_x, bias1_y), r, facecolor='lightgray',
439
+ edgecolor='black', linewidth=1.5, zorder=10)
440
+ ax.add_patch(circle)
441
+ ax.text(input_x, bias1_y, '1', fontsize=10, ha='center', va='center', weight='bold')
442
+
443
+ circle = plt.Circle((hidden_x, bias2_y), r, facecolor='lightgray',
444
+ edgecolor='black', linewidth=1.5, zorder=10)
445
+ ax.add_patch(circle)
446
+ ax.text(hidden_x, bias2_y, '1', fontsize=10, ha='center', va='center', weight='bold')
447
+
448
+ # Hidden nodes
449
+ for i, y in enumerate(hidden_y):
450
+ circle = plt.Circle((hidden_x, y), r, facecolor='lightblue',
451
+ edgecolor='black', linewidth=1.5, zorder=10)
452
+ ax.add_patch(circle)
453
+ ax.plot([hidden_x, hidden_x], [y - r*0.7, y + r*0.7], 'k-', linewidth=1.2, zorder=11)
454
+ ax.text(hidden_x - r*0.35, y, r'$\Sigma$', fontsize=11, ha='center', va='center', zorder=12)
455
+ ax.text(hidden_x + r*0.35, y, r'$f$', fontsize=10, ha='center', va='center', zorder=12, style='italic')
456
+ if show_activations and self.a1 is not None:
457
+ ax.text(hidden_x, y - r - 0.15, f'{self.a1[i]:.2f}', fontsize=8, ha='center', va='top')
458
+
459
+ # Output node
460
+ y = output_y[0]
461
+ circle = plt.Circle((output_x, y), r, facecolor='lightblue',
462
+ edgecolor='black', linewidth=1.5, zorder=10)
463
+ ax.add_patch(circle)
464
+ ax.plot([output_x, output_x], [y - r*0.7, y + r*0.7], 'k-', linewidth=1.2, zorder=11)
465
+ ax.text(output_x - r*0.35, y, r'$\Sigma$', fontsize=11, ha='center', va='center', zorder=12)
466
+ ax.text(output_x + r*0.35, y, r'$f$', fontsize=10, ha='center', va='center', zorder=12, style='italic')
467
+ label = r'$\hat{y}$' if not show_activations else f'$\\hat{{y}}$={self.a2[0]:.2f}'
468
+ ax.text(output_x + r + 0.15, y, label, fontsize=10, ha='left', va='center')
469
+
470
+ # Save to buffer with minimal padding
471
+ buffer = io.BytesIO()
472
+ plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight',
473
+ facecolor='white', edgecolor='none', pad_inches=0.0)
474
+ plt.close(fig)
475
+ buffer.seek(0)
476
+
477
+ return buffer
478
+
479
+ def _generate_ascii_network(self):
480
+ """Generate ASCII art representation of the network for alt-text."""
481
+ lines = []
482
+ lines.append("Network Architecture:")
483
+ lines.append("")
484
+ lines.append("Input Layer: Hidden Layer: Output Layer:")
485
+
486
+ # For 2 inputs, 2 hidden, 1 output
487
+ if self.num_inputs == 2 and self.num_hidden == 2:
488
+ lines.append(f" x₁ ----[w₁₁]---→ h₁ ----[w₃]----→")
489
+ lines.append(f" \\ / \\ /")
490
+ lines.append(f" \\ / \\ /")
491
+ lines.append(f" \\ / \\ / ŷ")
492
+ lines.append(f" \\/ \\ /")
493
+ lines.append(f" /\\ \\ /")
494
+ lines.append(f" / \\ \\/")
495
+ lines.append(f" / \\ /\\")
496
+ lines.append(f" / \\ / \\")
497
+ lines.append(f" x₂ ----[w₂₁]---→ h₂ ----[w₄]----→")
498
+ else:
499
+ # Generic representation
500
+ for i in range(max(self.num_inputs, self.num_hidden)):
501
+ parts = []
502
+ if i < self.num_inputs:
503
+ parts.append(f" x₁{i+1}")
504
+ else:
505
+ parts.append(" ")
506
+ parts.append(" ---→ ")
507
+ if i < self.num_hidden:
508
+ parts.append(f"h₁{i+1}")
509
+ else:
510
+ parts.append(" ")
511
+ parts.append(" ---→ ")
512
+ if i == self.num_hidden // 2:
513
+ parts.append("ŷ")
514
+ lines.append("".join(parts))
515
+
516
+ lines.append("")
517
+ lines.append(f"Activation function: {self._get_activation_name()}")
518
+
519
+ return "\n".join(lines)
520
+
521
+
522
+ @QuestionRegistry.register()
523
+ class ForwardPassQuestion(SimpleNeuralNetworkBase):
524
+ """
525
+ Question asking students to calculate forward pass through a simple network.
526
+
527
+ Students calculate:
528
+ - Hidden layer activations (h₁, h₂)
529
+ - Final output (ŷ)
530
+ """
531
+
532
+ def refresh(self, rng_seed=None, *args, **kwargs):
533
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
534
+
535
+ # Generate network
536
+ self._generate_network()
537
+ self._select_activation_function()
538
+
539
+ # Run forward pass to get correct answers
540
+ self._forward_pass()
541
+
542
+ # Create answer fields
543
+ self._create_answers()
544
+
545
+ def _create_answers(self):
546
+ """Create answer fields for forward pass values."""
547
+ self.answers = {}
548
+
549
+ # Hidden layer activations
550
+ for i in range(self.num_hidden):
551
+ key = f"h{i+1}"
552
+ self.answers[key] = Answer.float_value(key, float(self.a1[i]))
553
+
554
+ # Output
555
+ self.answers["y_pred"] = Answer.float_value("y_pred", float(self.a2[0]))
556
+
557
+ def get_body(self, **kwargs) -> ContentAST.Section:
558
+ body = ContentAST.Section()
559
+
560
+ # Question description
561
+ body.add_element(ContentAST.Paragraph([
562
+ f"Given the neural network below with {self._get_activation_name()} activation "
563
+ f"in the hidden layer and linear activation (f(z) = z) in the output layer, "
564
+ f"calculate the forward pass for the given input values."
565
+ ]))
566
+
567
+ # Network diagram
568
+ body.add_element(
569
+ ContentAST.Picture(
570
+ img_data=self._generate_network_diagram(show_weights=True, show_activations=False),
571
+ caption=f"Neural network architecture"
572
+ )
573
+ )
574
+
575
+ # Network parameters table
576
+ body.add_element(self._generate_parameter_table(include_activations=False))
577
+
578
+ # Activation function
579
+ body.add_element(ContentAST.Paragraph([
580
+ f"**Activation function:** {self._get_activation_name()}"
581
+ ]))
582
+
583
+ # Create answer block
584
+ answers = []
585
+ for i in range(self.num_hidden):
586
+ answers.append(
587
+ ContentAST.Answer(
588
+ answer=self.answers[f"h{i+1}"],
589
+ label=f"h_{i+1} (hidden neuron {i+1} output)"
590
+ )
591
+ )
592
+
593
+ answers.append(
594
+ ContentAST.Answer(
595
+ answer=self.answers["y_pred"],
596
+ label="ŷ (network output)"
597
+ )
598
+ )
599
+
600
+ body.add_element(ContentAST.AnswerBlock(answers))
601
+
602
+ return body
603
+
604
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
605
+ explanation = ContentAST.Section()
606
+
607
+ explanation.add_element(ContentAST.Paragraph([
608
+ "To solve this problem, we need to compute the forward pass through the network."
609
+ ]))
610
+
611
+ # Hidden layer calculations
612
+ explanation.add_element(ContentAST.Paragraph([
613
+ "**Step 1: Calculate hidden layer pre-activations**"
614
+ ]))
615
+
616
+ for i in range(self.num_hidden):
617
+ # Build equation for z_i
618
+ terms = []
619
+ for j in range(self.num_inputs):
620
+ terms.append(f"({self.W1[i,j]:.1f})({self.X[j]:.1f})")
621
+
622
+ z_calc = " + ".join(terms)
623
+ if self.use_bias:
624
+ z_calc += f" + {self.b1[i]:.1f}"
625
+
626
+ explanation.add_element(ContentAST.Equation(
627
+ f"z_{i+1} = {z_calc} = {self.z1[i]:.4f}",
628
+ inline=False
629
+ ))
630
+
631
+ # Hidden layer activations
632
+ explanation.add_element(ContentAST.Paragraph([
633
+ f"**Step 2: Apply {self._get_activation_name()} activation**"
634
+ ]))
635
+
636
+ for i in range(self.num_hidden):
637
+ if self.activation_function == self.ACTIVATION_SIGMOID:
638
+ explanation.add_element(ContentAST.Equation(
639
+ f"h_{i+1} = \\sigma(z_{i+1}) = \\frac{{1}}{{1 + e^{{-{self.z1[i]:.4f}}}}} = {self.a1[i]:.4f}",
640
+ inline=False
641
+ ))
642
+ elif self.activation_function == self.ACTIVATION_RELU:
643
+ explanation.add_element(ContentAST.Equation(
644
+ f"h_{i+1} = \\text{{ReLU}}(z_{i+1}) = \\max(0, {self.z1[i]:.4f}) = {self.a1[i]:.4f}",
645
+ inline=False
646
+ ))
647
+ else:
648
+ explanation.add_element(ContentAST.Equation(
649
+ f"h_{i+1} = z_{i+1} = {self.a1[i]:.4f}",
650
+ inline=False
651
+ ))
652
+
653
+ # Output layer
654
+ explanation.add_element(ContentAST.Paragraph([
655
+ "**Step 3: Calculate output (with linear activation)**"
656
+ ]))
657
+
658
+ terms = []
659
+ for j in range(self.num_hidden):
660
+ terms.append(f"({self.W2[0,j]:.1f})({self.a1[j]:.4f})")
661
+
662
+ z_out_calc = " + ".join(terms)
663
+ if self.use_bias:
664
+ z_out_calc += f" + {self.b2[0]:.1f}"
665
+
666
+ explanation.add_element(ContentAST.Equation(
667
+ f"z_{{out}} = {z_out_calc} = {self.z2[0]:.4f}",
668
+ inline=False
669
+ ))
670
+
671
+ explanation.add_element(ContentAST.Equation(
672
+ f"\\hat{{y}} = f(z_{{out}}) = z_{{out}} = {self.a2[0]:.4f}",
673
+ inline=False
674
+ ))
675
+
676
+ explanation.add_element(ContentAST.Paragraph([
677
+ "(Note: The output layer uses linear activation, so the output can be any real number)"
678
+ ]))
679
+
680
+ return explanation
681
+
682
+
683
+ @QuestionRegistry.register()
684
+ class BackpropGradientQuestion(SimpleNeuralNetworkBase):
685
+ """
686
+ Question asking students to calculate gradients using backpropagation.
687
+
688
+ Given a completed forward pass, students calculate:
689
+ - Gradients for multiple specific weights (∂L/∂w)
690
+ """
691
+
692
+ def refresh(self, rng_seed=None, *args, **kwargs):
693
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
694
+
695
+ # Generate network
696
+ self._generate_network()
697
+ self._select_activation_function()
698
+
699
+ # Run forward pass
700
+ self._forward_pass()
701
+
702
+ # Generate target and compute loss
703
+ # Target should be different from output to create meaningful gradients
704
+ self.y_target = float(self.a2[0] + self.rng.uniform(1, 3) * self.rng.choice([-1, 1]))
705
+ # Round target to display precision (2 decimal places)
706
+ self.y_target = round(self.y_target, 2)
707
+ self._compute_loss(self.y_target)
708
+ # Round loss to display precision (4 decimal places)
709
+ self.loss = round(self.loss, 4)
710
+ self._compute_output_gradient()
711
+
712
+ # Create answer fields for specific weight gradients
713
+ self._create_answers()
714
+
715
+ def _create_answers(self):
716
+ """Create answer fields for weight gradients."""
717
+ self.answers = {}
718
+
719
+ # Ask for gradients of 2-3 weights
720
+ # Include at least one from each layer
721
+
722
+ # Gradient for W2 (hidden to output)
723
+ for i in range(self.num_hidden):
724
+ key = f"dL_dw2_{i}"
725
+ self.answers[key] = Answer.auto_float(key, self._compute_gradient_W2(i))
726
+
727
+ # Gradient for W1 (input to hidden) - pick first hidden neuron
728
+ for j in range(self.num_inputs):
729
+ key = f"dL_dw1_0{j}"
730
+ self.answers[key] = Answer.auto_float(key, self._compute_gradient_W1(0, j))
731
+
732
+ def get_body(self, **kwargs) -> ContentAST.Section:
733
+ body = ContentAST.Section()
734
+
735
+ # Question description
736
+ body.add_element(ContentAST.Paragraph([
737
+ f"Given the neural network below with {self._get_activation_name()} activation "
738
+ f"in the hidden layer, a forward pass has been completed with the values shown. "
739
+ f"Calculate the gradients (∂L/∂w) for the specified weights using backpropagation."
740
+ ]))
741
+
742
+ # Network diagram
743
+ body.add_element(
744
+ ContentAST.Picture(
745
+ img_data=self._generate_network_diagram(show_weights=True, show_activations=False),
746
+ caption=f"Neural network architecture"
747
+ )
748
+ )
749
+
750
+ # Network parameters and forward pass results table
751
+ body.add_element(self._generate_parameter_table(include_activations=True, include_training_context=True))
752
+
753
+ # Activation function
754
+ body.add_element(ContentAST.Paragraph([
755
+ f"**Activation function:** {self._get_activation_name()}"
756
+ ]))
757
+
758
+ body.add_element(ContentAST.Paragraph([
759
+ "**Calculate the following gradients:**"
760
+ ]))
761
+
762
+ # Create answer block
763
+ answers = []
764
+
765
+ # W2 gradients
766
+ for i in range(self.num_hidden):
767
+ answers.append(
768
+ ContentAST.Answer(
769
+ answer=self.answers[f"dL_dw2_{i}"],
770
+ label=f"∂L/∂w_{i+3} (weight from h_{i+1} to output)"
771
+ )
772
+ )
773
+
774
+ # W1 gradients (first hidden neuron)
775
+ for j in range(self.num_inputs):
776
+ answers.append(
777
+ ContentAST.Answer(
778
+ answer=self.answers[f"dL_dw1_0{j}"],
779
+ label=f"∂L/∂w_1{j+1} (weight from x_{j+1} to h_1)"
780
+ )
781
+ )
782
+
783
+ body.add_element(ContentAST.AnswerBlock(answers))
784
+
785
+ return body
786
+
787
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
788
+ explanation = ContentAST.Section()
789
+
790
+ explanation.add_element(ContentAST.Paragraph([
791
+ "To solve this problem, we use the chain rule to compute gradients via backpropagation."
792
+ ]))
793
+
794
+ # Output layer gradient
795
+ explanation.add_element(ContentAST.Paragraph([
796
+ "**Step 1: Compute output layer gradient**"
797
+ ]))
798
+
799
+ explanation.add_element(ContentAST.Paragraph([
800
+ "For MSE loss with linear output activation:"
801
+ ]))
802
+
803
+ explanation.add_element(ContentAST.Equation(
804
+ f"\\frac{{\\partial L}}{{\\partial \\hat{{y}}}} = -(y - \\hat{{y}}) = -({self.y_target:.2f} - {self.a2[0]:.4f}) = {self.dL_da2:.4f}",
805
+ inline=False
806
+ ))
807
+
808
+ # W2 gradients
809
+ explanation.add_element(ContentAST.Paragraph([
810
+ "**Step 2: Gradients for hidden-to-output weights**"
811
+ ]))
812
+
813
+ explanation.add_element(ContentAST.Paragraph([
814
+ "Using the chain rule:"
815
+ ]))
816
+
817
+ for i in range(self.num_hidden):
818
+ grad = self._compute_gradient_W2(i)
819
+ explanation.add_element(ContentAST.Equation(
820
+ f"\\frac{{\\partial L}}{{\\partial w_{i+3}}} = \\frac{{\\partial L}}{{\\partial \\hat{{y}}}} \\cdot \\frac{{\\partial \\hat{{y}}}}{{\\partial w_{i+3}}} = {self.dL_da2:.4f} \\cdot {self.a1[i]:.4f} = {grad:.4f}",
821
+ inline=False
822
+ ))
823
+
824
+ # W1 gradients
825
+ explanation.add_element(ContentAST.Paragraph([
826
+ "**Step 3: Gradients for input-to-hidden weights**"
827
+ ]))
828
+
829
+ explanation.add_element(ContentAST.Paragraph([
830
+ "First, compute the gradient flowing back to hidden layer:"
831
+ ]))
832
+
833
+ for j in range(self.num_inputs):
834
+ # Compute intermediate values
835
+ dz2_da1 = self.W2[0, 0]
836
+ da1_dz1 = self._activation_derivative(self.z1[0])
837
+ dL_dz1 = self.dL_dz2 * dz2_da1 * da1_dz1
838
+
839
+ grad = self._compute_gradient_W1(0, j)
840
+
841
+ if self.activation_function == self.ACTIVATION_SIGMOID:
842
+ act_deriv_str = f"\\sigma(z_1)(1-\\sigma(z_1)) = {self.a1[0]:.4f}(1-{self.a1[0]:.4f}) = {da1_dz1:.4f}"
843
+ elif self.activation_function == self.ACTIVATION_RELU:
844
+ act_deriv_str = f"\\mathbb{{1}}(z_1 > 0) = {da1_dz1:.4f}"
845
+ else:
846
+ act_deriv_str = f"1"
847
+
848
+ explanation.add_element(ContentAST.Equation(
849
+ f"\\frac{{\\partial L}}{{\\partial w_{{1{j+1}}}}} = \\frac{{\\partial L}}{{\\partial \\hat{{y}}}} \\cdot w_{3} \\cdot {act_deriv_str} \\cdot x_{j+1} = {self.dL_da2:.4f} \\cdot {dz2_da1:.4f} \\cdot {da1_dz1:.4f} \\cdot {self.X[j]:.1f} = {grad:.4f}",
850
+ inline=False
851
+ ))
852
+
853
+ return explanation
854
+
855
+
856
+ @QuestionRegistry.register()
857
+ class EnsembleAveragingQuestion(Question):
858
+ """
859
+ Question asking students to combine predictions from multiple models (ensemble).
860
+
861
+ Students calculate:
862
+ - Mean prediction (for regression)
863
+ - Optionally: variance or other statistics
864
+ """
865
+
866
+ def __init__(self, *args, **kwargs):
867
+ kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
868
+ super().__init__(*args, **kwargs)
869
+
870
+ self.num_models = kwargs.get("num_models", 5)
871
+ self.predictions = None
872
+
873
+ def refresh(self, rng_seed=None, *args, **kwargs):
874
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
875
+
876
+ # Generate predictions from multiple models
877
+ # Use a range that makes sense for typical regression problems
878
+ base_value = self.rng.uniform(0, 10)
879
+ self.predictions = [
880
+ base_value + self.rng.uniform(-2, 2)
881
+ for _ in range(self.num_models)
882
+ ]
883
+
884
+ # Round to make calculations easier
885
+ self.predictions = [round(p, 1) for p in self.predictions]
886
+
887
+ # Create answers
888
+ self._create_answers()
889
+
890
+ def _create_answers(self):
891
+ """Create answer fields for ensemble statistics."""
892
+ self.answers = {}
893
+
894
+ # Mean prediction
895
+ mean_pred = np.mean(self.predictions)
896
+ self.answers["mean"] = Answer.float_value("mean", float(mean_pred))
897
+
898
+ # Median (optional, but useful)
899
+ median_pred = np.median(self.predictions)
900
+ self.answers["median"] = Answer.float_value("median", float(median_pred))
901
+
902
+ def get_body(self, **kwargs) -> ContentAST.Section:
903
+ body = ContentAST.Section()
904
+
905
+ # Question description
906
+ body.add_element(ContentAST.Paragraph([
907
+ f"You have trained {self.num_models} different regression models on the same dataset. "
908
+ f"For a particular test input, each model produces the following predictions:"
909
+ ]))
910
+
911
+ # Show predictions
912
+ pred_list = ", ".join([f"{p:.1f}" for p in self.predictions])
913
+ body.add_element(ContentAST.Paragraph([
914
+ f"Model predictions: {pred_list}"
915
+ ]))
916
+
917
+ # Question
918
+ body.add_element(ContentAST.Paragraph([
919
+ "To create an ensemble, calculate the combined prediction using the following methods:"
920
+ ]))
921
+
922
+ # Create answer block
923
+ answers = []
924
+ answers.append(
925
+ ContentAST.Answer(
926
+ answer=self.answers["mean"],
927
+ label="Mean (average)"
928
+ )
929
+ )
930
+ answers.append(
931
+ ContentAST.Answer(
932
+ answer=self.answers["median"],
933
+ label="Median"
934
+ )
935
+ )
936
+
937
+ body.add_element(ContentAST.AnswerBlock(answers))
938
+
939
+ return body
940
+
941
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
942
+ explanation = ContentAST.Section()
943
+
944
+ explanation.add_element(ContentAST.Paragraph([
945
+ "Ensemble methods combine predictions from multiple models to create a more robust prediction."
946
+ ]))
947
+
948
+ # Mean calculation
949
+ explanation.add_element(ContentAST.Paragraph([
950
+ "**Mean (Bagging approach):**"
951
+ ]))
952
+
953
+ pred_sum = " + ".join([f"{p:.1f}" for p in self.predictions])
954
+ mean_val = np.mean(self.predictions)
955
+
956
+ explanation.add_element(ContentAST.Equation(
957
+ f"\\text{{mean}} = \\frac{{{pred_sum}}}{{{self.num_models}}} = \\frac{{{sum(self.predictions):.1f}}}{{{self.num_models}}} = {mean_val:.4f}",
958
+ inline=False
959
+ ))
960
+
961
+ # Median calculation
962
+ explanation.add_element(ContentAST.Paragraph([
963
+ "**Median:**"
964
+ ]))
965
+
966
+ sorted_preds = sorted(self.predictions)
967
+ sorted_str = ", ".join([f"{p:.1f}" for p in sorted_preds])
968
+ median_val = np.median(self.predictions)
969
+
970
+ explanation.add_element(ContentAST.Paragraph([
971
+ f"Sorted predictions: {sorted_str}"
972
+ ]))
973
+
974
+ if self.num_models % 2 == 1:
975
+ mid_idx = self.num_models // 2
976
+ explanation.add_element(ContentAST.Paragraph([
977
+ f"Middle value (position {mid_idx + 1}): {median_val:.1f}"
978
+ ]))
979
+ else:
980
+ mid_idx1 = self.num_models // 2 - 1
981
+ mid_idx2 = self.num_models // 2
982
+ explanation.add_element(ContentAST.Paragraph([
983
+ f"Average of middle two values (positions {mid_idx1 + 1} and {mid_idx2 + 1}): "
984
+ f"({sorted_preds[mid_idx1]:.1f} + {sorted_preds[mid_idx2]:.1f}) / 2 = {median_val:.1f}"
985
+ ]))
986
+
987
+ return explanation
988
+
989
+
990
+ @QuestionRegistry.register()
991
+ class EndToEndTrainingQuestion(SimpleNeuralNetworkBase):
992
+ """
993
+ End-to-end training step question.
994
+
995
+ Students perform a complete training iteration:
996
+ 1. Forward pass → prediction
997
+ 2. Loss calculation (MSE)
998
+ 3. Backpropagation → gradients for specific weights
999
+ 4. Weight update → new weight values
1000
+ """
1001
+
1002
+ def __init__(self, *args, **kwargs):
1003
+ super().__init__(*args, **kwargs)
1004
+ self.learning_rate = None
1005
+ self.new_W1 = None
1006
+ self.new_W2 = None
1007
+
1008
+ def refresh(self, rng_seed=None, *args, **kwargs):
1009
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
1010
+
1011
+ # Generate network
1012
+ self._generate_network()
1013
+ self._select_activation_function()
1014
+
1015
+ # Run forward pass
1016
+ self._forward_pass()
1017
+
1018
+ # Generate target and compute loss
1019
+ self.y_target = float(self.a2[0] + self.rng.uniform(1, 3) * self.rng.choice([-1, 1]))
1020
+ # Round target to display precision (2 decimal places)
1021
+ self.y_target = round(self.y_target, 2)
1022
+ self._compute_loss(self.y_target)
1023
+ # Round loss to display precision (4 decimal places)
1024
+ self.loss = round(self.loss, 4)
1025
+ self._compute_output_gradient()
1026
+
1027
+ # Set learning rate (use small value for stability)
1028
+ self.learning_rate = round(self.rng.uniform(0.05, 0.2), 2)
1029
+
1030
+ # Compute updated weights
1031
+ self._compute_weight_updates()
1032
+
1033
+ # Create answers
1034
+ self._create_answers()
1035
+
1036
+ def _compute_weight_updates(self):
1037
+ """Compute new weights after gradient descent step."""
1038
+ # Update W2
1039
+ self.new_W2 = np.copy(self.W2)
1040
+ for i in range(self.num_hidden):
1041
+ grad = self._compute_gradient_W2(i)
1042
+ self.new_W2[0, i] = self.W2[0, i] - self.learning_rate * grad
1043
+
1044
+ # Update W1 (first hidden neuron only for simplicity)
1045
+ self.new_W1 = np.copy(self.W1)
1046
+ for j in range(self.num_inputs):
1047
+ grad = self._compute_gradient_W1(0, j)
1048
+ self.new_W1[0, j] = self.W1[0, j] - self.learning_rate * grad
1049
+
1050
+ def _create_answers(self):
1051
+ """Create answer fields for all steps."""
1052
+ self.answers = {}
1053
+
1054
+ # Forward pass answers
1055
+ self.answers["y_pred"] = Answer.float_value("y_pred", float(self.a2[0]))
1056
+
1057
+ # Loss answer
1058
+ self.answers["loss"] = Answer.float_value("loss", float(self.loss))
1059
+
1060
+ # Gradient answers (for key weights)
1061
+ self.answers["grad_w3"] = Answer.auto_float("grad_w3", self._compute_gradient_W2(0))
1062
+ self.answers["grad_w11"] = Answer.auto_float("grad_w11", self._compute_gradient_W1(0, 0))
1063
+
1064
+ # Updated weight answers
1065
+ self.answers["new_w3"] = Answer.float_value("new_w3", float(self.new_W2[0, 0]))
1066
+ self.answers["new_w11"] = Answer.float_value("new_w11", float(self.new_W1[0, 0]))
1067
+
1068
+ def get_body(self, **kwargs) -> ContentAST.Section:
1069
+ body = ContentAST.Section()
1070
+
1071
+ # Question description
1072
+ body.add_element(ContentAST.Paragraph([
1073
+ f"Given the neural network below, perform one complete training step (forward pass, "
1074
+ f"loss calculation, backpropagation, and weight update) for the given input and target."
1075
+ ]))
1076
+
1077
+ # Network diagram
1078
+ body.add_element(
1079
+ ContentAST.Picture(
1080
+ img_data=self._generate_network_diagram(show_weights=True, show_activations=False),
1081
+ caption=f"Neural network (before training)"
1082
+ )
1083
+ )
1084
+
1085
+ # Training parameters
1086
+ body.add_element(ContentAST.Paragraph([
1087
+ "**Training parameters:**"
1088
+ ]))
1089
+
1090
+ body.add_element(ContentAST.Paragraph([
1091
+ "Input: ",
1092
+ ContentAST.Equation(f"x_1 = {self.X[0]:.1f}", inline=True),
1093
+ ", ",
1094
+ ContentAST.Equation(f"x_2 = {self.X[1]:.1f}", inline=True)
1095
+ ]))
1096
+
1097
+ body.add_element(ContentAST.Paragraph([
1098
+ "Target: ",
1099
+ ContentAST.Equation(f"y = {self.y_target:.2f}", inline=True)
1100
+ ]))
1101
+
1102
+ body.add_element(ContentAST.Paragraph([
1103
+ "Learning rate: ",
1104
+ ContentAST.Equation(f"\\alpha = {self.learning_rate}", inline=True)
1105
+ ]))
1106
+
1107
+ body.add_element(ContentAST.Paragraph([
1108
+ f"**Activation function:** {self._get_activation_name()}"
1109
+ ]))
1110
+
1111
+ body.add_element(ContentAST.Paragraph([
1112
+ "**Complete the following training steps:**"
1113
+ ]))
1114
+
1115
+ # Network parameters table
1116
+ body.add_element(self._generate_parameter_table(include_activations=False))
1117
+
1118
+ # Create answer block
1119
+ answers = []
1120
+
1121
+ answers.append(
1122
+ ContentAST.Answer(
1123
+ answer=self.answers["y_pred"],
1124
+ label="1. Forward Pass - Network output ŷ"
1125
+ )
1126
+ )
1127
+
1128
+ answers.append(
1129
+ ContentAST.Answer(
1130
+ answer=self.answers["loss"],
1131
+ label="2. Loss - MSE: L = (1/2)(y - ŷ)²"
1132
+ )
1133
+ )
1134
+
1135
+ answers.append(
1136
+ ContentAST.Answer(
1137
+ answer=self.answers["grad_w3"],
1138
+ label="3. Gradient ∂L/∂w₃ (weight h₁ → ŷ)"
1139
+ )
1140
+ )
1141
+
1142
+ answers.append(
1143
+ ContentAST.Answer(
1144
+ answer=self.answers["grad_w11"],
1145
+ label="4. Gradient ∂L/∂w₁₁ (weight x₁ → h₁)"
1146
+ )
1147
+ )
1148
+
1149
+ answers.append(
1150
+ ContentAST.Answer(
1151
+ answer=self.answers["new_w3"],
1152
+ label="5. Updated w₃: w₃' = w₃ - α(∂L/∂w₃)"
1153
+ )
1154
+ )
1155
+
1156
+ answers.append(
1157
+ ContentAST.Answer(
1158
+ answer=self.answers["new_w11"],
1159
+ label="6. Updated w₁₁: w₁₁' = w₁₁ - α(∂L/∂w₁₁)"
1160
+ )
1161
+ )
1162
+
1163
+ body.add_element(ContentAST.AnswerBlock(answers))
1164
+
1165
+ return body
1166
+
1167
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
1168
+ explanation = ContentAST.Section()
1169
+
1170
+ explanation.add_element(ContentAST.Paragraph([
1171
+ "This problem requires performing one complete training iteration. Let's go through each step."
1172
+ ]))
1173
+
1174
+ # Step 1: Forward pass
1175
+ explanation.add_element(ContentAST.Paragraph([
1176
+ "**Step 1: Forward Pass**"
1177
+ ]))
1178
+
1179
+ # Hidden layer
1180
+ z1_0 = self.W1[0, 0] * self.X[0] + self.W1[0, 1] * self.X[1] + self.b1[0]
1181
+ explanation.add_element(ContentAST.Equation(
1182
+ f"z_1 = w_{{11}} x_1 + w_{{12}} x_2 + b_1 = {self.W1[0,0]:.1f} \\cdot {self.X[0]:.1f} + {self.W1[0,1]:.1f} \\cdot {self.X[1]:.1f} + {self.b1[0]:.1f} = {self.z1[0]:.4f}",
1183
+ inline=False
1184
+ ))
1185
+
1186
+ explanation.add_element(ContentAST.Equation(
1187
+ f"h_1 = {self._get_activation_name()}(z_1) = {self.a1[0]:.4f}",
1188
+ inline=False
1189
+ ))
1190
+
1191
+ # Similarly for h2 (abbreviated)
1192
+ explanation.add_element(ContentAST.Equation(
1193
+ f"h_2 = {self.a1[1]:.4f} \\text{{ (calculated similarly)}}",
1194
+ inline=False
1195
+ ))
1196
+
1197
+ # Output
1198
+ z2 = self.W2[0, 0] * self.a1[0] + self.W2[0, 1] * self.a1[1] + self.b2[0]
1199
+ explanation.add_element(ContentAST.Equation(
1200
+ f"\\hat{{y}} = w_3 h_1 + w_4 h_2 + b_2 = {self.W2[0,0]:.1f} \\cdot {self.a1[0]:.4f} + {self.W2[0,1]:.1f} \\cdot {self.a1[1]:.4f} + {self.b2[0]:.1f} = {self.a2[0]:.4f}",
1201
+ inline=False
1202
+ ))
1203
+
1204
+ # Step 2: Loss
1205
+ explanation.add_element(ContentAST.Paragraph([
1206
+ "**Step 2: Calculate Loss**"
1207
+ ]))
1208
+
1209
+ explanation.add_element(ContentAST.Equation(
1210
+ f"L = \\frac{{1}}{{2}}(y - \\hat{{y}})^2 = \\frac{{1}}{{2}}({self.y_target:.2f} - {self.a2[0]:.4f})^2 = {self.loss:.4f}",
1211
+ inline=False
1212
+ ))
1213
+
1214
+ # Step 3: Gradients
1215
+ explanation.add_element(ContentAST.Paragraph([
1216
+ "**Step 3: Compute Gradients**"
1217
+ ]))
1218
+
1219
+ explanation.add_element(ContentAST.Paragraph([
1220
+ "Loss gradient:"
1221
+ ]))
1222
+
1223
+ explanation.add_element(ContentAST.Equation(
1224
+ f"\\frac{{\\partial L}}{{\\partial \\hat{{y}}}} = -(y - \\hat{{y}}) = {self.dL_da2:.4f}",
1225
+ inline=False
1226
+ ))
1227
+
1228
+ grad_w3 = self._compute_gradient_W2(0)
1229
+ explanation.add_element(ContentAST.Equation(
1230
+ f"\\frac{{\\partial L}}{{\\partial w_3}} = \\frac{{\\partial L}}{{\\partial \\hat{{y}}}} \\cdot h_1 = {self.dL_da2:.4f} \\cdot {self.a1[0]:.4f} = {grad_w3:.4f}",
1231
+ inline=False
1232
+ ))
1233
+
1234
+ grad_w11 = self._compute_gradient_W1(0, 0)
1235
+ dz2_da1 = self.W2[0, 0]
1236
+ da1_dz1 = self._activation_derivative(self.z1[0])
1237
+
1238
+ explanation.add_element(ContentAST.Equation(
1239
+ f"\\frac{{\\partial L}}{{\\partial w_{{11}}}} = \\frac{{\\partial L}}{{\\partial \\hat{{y}}}} \\cdot w_3 \\cdot \\sigma'(z_1) \\cdot x_1 = {self.dL_da2:.4f} \\cdot {dz2_da1:.4f} \\cdot {da1_dz1:.4f} \\cdot {self.X[0]:.1f} = {grad_w11:.4f}",
1240
+ inline=False
1241
+ ))
1242
+
1243
+ # Step 4: Weight updates
1244
+ explanation.add_element(ContentAST.Paragraph([
1245
+ "**Step 4: Update Weights**"
1246
+ ]))
1247
+
1248
+ new_w3 = self.new_W2[0, 0]
1249
+ explanation.add_element(ContentAST.Equation(
1250
+ f"w_3^{{new}} = w_3 - \\alpha \\frac{{\\partial L}}{{\\partial w_3}} = {self.W2[0,0]:.1f} - {self.learning_rate} \\cdot {grad_w3:.4f} = {new_w3:.4f}",
1251
+ inline=False
1252
+ ))
1253
+
1254
+ new_w11 = self.new_W1[0, 0]
1255
+ explanation.add_element(ContentAST.Equation(
1256
+ f"w_{{11}}^{{new}} = w_{{11}} - \\alpha \\frac{{\\partial L}}{{\\partial w_{{11}}}} = {self.W1[0,0]:.1f} - {self.learning_rate} \\cdot {grad_w11:.4f} = {new_w11:.4f}",
1257
+ inline=False
1258
+ ))
1259
+
1260
+ explanation.add_element(ContentAST.Paragraph([
1261
+ "These updated weights would be used in the next training iteration."
1262
+ ]))
1263
+
1264
+ return explanation