QuizGenerator 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. QuizGenerator/README.md +5 -0
  2. QuizGenerator/__init__.py +27 -0
  3. QuizGenerator/__main__.py +7 -0
  4. QuizGenerator/canvas/__init__.py +13 -0
  5. QuizGenerator/canvas/canvas_interface.py +627 -0
  6. QuizGenerator/canvas/classes.py +235 -0
  7. QuizGenerator/constants.py +149 -0
  8. QuizGenerator/contentast.py +1955 -0
  9. QuizGenerator/generate.py +253 -0
  10. QuizGenerator/logging.yaml +55 -0
  11. QuizGenerator/misc.py +579 -0
  12. QuizGenerator/mixins.py +548 -0
  13. QuizGenerator/performance.py +202 -0
  14. QuizGenerator/premade_questions/__init__.py +0 -0
  15. QuizGenerator/premade_questions/basic.py +103 -0
  16. QuizGenerator/premade_questions/cst334/__init__.py +1 -0
  17. QuizGenerator/premade_questions/cst334/languages.py +391 -0
  18. QuizGenerator/premade_questions/cst334/math_questions.py +297 -0
  19. QuizGenerator/premade_questions/cst334/memory_questions.py +1400 -0
  20. QuizGenerator/premade_questions/cst334/ostep13_vsfs.py +572 -0
  21. QuizGenerator/premade_questions/cst334/persistence_questions.py +451 -0
  22. QuizGenerator/premade_questions/cst334/process.py +648 -0
  23. QuizGenerator/premade_questions/cst463/__init__.py +0 -0
  24. QuizGenerator/premade_questions/cst463/gradient_descent/__init__.py +3 -0
  25. QuizGenerator/premade_questions/cst463/gradient_descent/gradient_calculation.py +369 -0
  26. QuizGenerator/premade_questions/cst463/gradient_descent/gradient_descent_questions.py +305 -0
  27. QuizGenerator/premade_questions/cst463/gradient_descent/loss_calculations.py +650 -0
  28. QuizGenerator/premade_questions/cst463/gradient_descent/misc.py +73 -0
  29. QuizGenerator/premade_questions/cst463/math_and_data/__init__.py +2 -0
  30. QuizGenerator/premade_questions/cst463/math_and_data/matrix_questions.py +631 -0
  31. QuizGenerator/premade_questions/cst463/math_and_data/vector_questions.py +534 -0
  32. QuizGenerator/premade_questions/cst463/models/__init__.py +0 -0
  33. QuizGenerator/premade_questions/cst463/models/attention.py +192 -0
  34. QuizGenerator/premade_questions/cst463/models/cnns.py +186 -0
  35. QuizGenerator/premade_questions/cst463/models/matrices.py +24 -0
  36. QuizGenerator/premade_questions/cst463/models/rnns.py +202 -0
  37. QuizGenerator/premade_questions/cst463/models/text.py +203 -0
  38. QuizGenerator/premade_questions/cst463/models/weight_counting.py +227 -0
  39. QuizGenerator/premade_questions/cst463/neural-network-basics/__init__.py +6 -0
  40. QuizGenerator/premade_questions/cst463/neural-network-basics/neural_network_questions.py +1314 -0
  41. QuizGenerator/premade_questions/cst463/tensorflow-intro/__init__.py +6 -0
  42. QuizGenerator/premade_questions/cst463/tensorflow-intro/tensorflow_questions.py +936 -0
  43. QuizGenerator/qrcode_generator.py +293 -0
  44. QuizGenerator/question.py +715 -0
  45. QuizGenerator/quiz.py +467 -0
  46. QuizGenerator/regenerate.py +472 -0
  47. QuizGenerator/typst_utils.py +113 -0
  48. quizgenerator-0.4.2.dist-info/METADATA +265 -0
  49. quizgenerator-0.4.2.dist-info/RECORD +52 -0
  50. quizgenerator-0.4.2.dist-info/WHEEL +4 -0
  51. quizgenerator-0.4.2.dist-info/entry_points.txt +3 -0
  52. quizgenerator-0.4.2.dist-info/licenses/LICENSE +674 -0
@@ -0,0 +1,1314 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import io
5
+ import logging
6
+ import math
7
+ import numpy as np
8
+ import uuid
9
+ import os
10
+ from typing import List, Tuple, Dict, Any
11
+
12
+ import matplotlib.pyplot as plt
13
+ import matplotlib.patches as mpatches
14
+
15
+ from QuizGenerator.contentast import ContentAST
16
+ from QuizGenerator.question import Question, Answer, QuestionRegistry
17
+ from QuizGenerator.mixins import TableQuestionMixin, BodyTemplatesMixin
18
+ from ..models.matrices import MatrixQuestion
19
+
20
+ log = logging.getLogger(__name__)
21
+
22
+
23
+ class SimpleNeuralNetworkBase(MatrixQuestion, abc.ABC):
24
+ """
25
+ Base class for simple neural network questions.
26
+
27
+ Generates a small feedforward network:
28
+ - 2-3 input neurons
29
+ - 2 hidden neurons (single hidden layer)
30
+ - 1 output neuron
31
+ - Random weights and biases
32
+ - Runs forward pass and stores all activations
33
+ """
34
+
35
+ # Activation function types
36
+ ACTIVATION_SIGMOID = "sigmoid"
37
+ ACTIVATION_RELU = "relu"
38
+ ACTIVATION_LINEAR = "linear"
39
+
40
+ def __init__(self, *args, **kwargs):
41
+ kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
42
+ super().__init__(*args, **kwargs)
43
+
44
+ # Network architecture parameters
45
+ self.num_inputs = kwargs.get("num_inputs", 2)
46
+ self.num_hidden = kwargs.get("num_hidden", 2)
47
+ self.num_outputs = kwargs.get("num_outputs", 1)
48
+
49
+ # Configuration
50
+ self.activation_function = None
51
+ self.use_bias = kwargs.get("use_bias", True)
52
+ self.param_digits = kwargs.get("param_digits", 1) # Precision for weights/biases
53
+
54
+ # Network parameters (weights and biases)
55
+ self.W1 = None # Input to hidden weights (num_hidden x num_inputs)
56
+ self.b1 = None # Hidden layer biases (num_hidden,)
57
+ self.W2 = None # Hidden to output weights (num_outputs x num_hidden)
58
+ self.b2 = None # Output layer biases (num_outputs,)
59
+
60
+ # Input data and forward pass results
61
+ self.X = None # Input values (num_inputs,)
62
+ self.z1 = None # Hidden layer pre-activation (num_hidden,)
63
+ self.a1 = None # Hidden layer activations (num_hidden,)
64
+ self.z2 = None # Output layer pre-activation (num_outputs,)
65
+ self.a2 = None # Output layer activation (prediction)
66
+
67
+ # Target and loss (for backprop questions)
68
+ self.y_target = None
69
+ self.loss = None
70
+
71
+ # Gradients (for backprop questions)
72
+ self.dL_da2 = None # Gradient of loss w.r.t. output
73
+ self.da2_dz2 = None # Gradient of activation w.r.t. pre-activation
74
+ self.dL_dz2 = None # Gradient of loss w.r.t. output pre-activation
75
+
76
+ def _generate_network(self, weight_range=(-2, 2), input_range=(-3, 3)):
77
+ """Generate random network parameters and input."""
78
+ # Generate weights using MatrixQuestion's rounded matrix method
79
+ # Use param_digits to match display precision in tables and explanations
80
+ self.W1 = self.get_rounded_matrix(
81
+ (self.num_hidden, self.num_inputs),
82
+ low=weight_range[0],
83
+ high=weight_range[1],
84
+ digits_to_round=self.param_digits
85
+ )
86
+
87
+ self.W2 = self.get_rounded_matrix(
88
+ (self.num_outputs, self.num_hidden),
89
+ low=weight_range[0],
90
+ high=weight_range[1],
91
+ digits_to_round=self.param_digits
92
+ )
93
+
94
+ # Generate biases
95
+ if self.use_bias:
96
+ self.b1 = self.get_rounded_matrix(
97
+ (self.num_hidden,),
98
+ low=weight_range[0],
99
+ high=weight_range[1],
100
+ digits_to_round=self.param_digits
101
+ )
102
+ self.b2 = self.get_rounded_matrix(
103
+ (self.num_outputs,),
104
+ low=weight_range[0],
105
+ high=weight_range[1],
106
+ digits_to_round=self.param_digits
107
+ )
108
+ else:
109
+ self.b1 = np.zeros(self.num_hidden)
110
+ self.b2 = np.zeros(self.num_outputs)
111
+
112
+ # Generate input values (keep as integers for simplicity)
113
+ self.X = self.get_rounded_matrix(
114
+ (self.num_inputs,),
115
+ low=input_range[0],
116
+ high=input_range[1],
117
+ digits_to_round=0 # Round to integers
118
+ )
119
+
120
+ def _select_activation_function(self):
121
+ """Randomly select an activation function."""
122
+ activations = [
123
+ self.ACTIVATION_SIGMOID,
124
+ self.ACTIVATION_RELU
125
+ ]
126
+ self.activation_function = self.rng.choice(activations)
127
+
128
+ def _apply_activation(self, z, function_type=None):
129
+ """Apply activation function to pre-activation values."""
130
+ if function_type is None:
131
+ function_type = self.activation_function
132
+
133
+ if function_type == self.ACTIVATION_SIGMOID:
134
+ return 1 / (1 + np.exp(-z))
135
+ elif function_type == self.ACTIVATION_RELU:
136
+ return np.maximum(0, z)
137
+ elif function_type == self.ACTIVATION_LINEAR:
138
+ return z
139
+ else:
140
+ raise ValueError(f"Unknown activation function: {function_type}")
141
+
142
+ def _activation_derivative(self, z, function_type=None):
143
+ """Compute derivative of activation function."""
144
+ if function_type is None:
145
+ function_type = self.activation_function
146
+
147
+ if function_type == self.ACTIVATION_SIGMOID:
148
+ a = self._apply_activation(z, function_type)
149
+ return a * (1 - a)
150
+ elif function_type == self.ACTIVATION_RELU:
151
+ return np.where(z > 0, 1, 0)
152
+ elif function_type == self.ACTIVATION_LINEAR:
153
+ return np.ones_like(z)
154
+ else:
155
+ raise ValueError(f"Unknown activation function: {function_type}")
156
+
157
+ def _forward_pass(self):
158
+ """Run forward pass through the network."""
159
+ # Hidden layer
160
+ self.z1 = self.W1 @ self.X + self.b1
161
+ self.a1 = self._apply_activation(self.z1)
162
+
163
+ # Output layer
164
+ self.z2 = self.W2 @ self.a1 + self.b2
165
+ self.a2 = self._apply_activation(self.z2, self.ACTIVATION_SIGMOID) # Sigmoid output for binary classification
166
+
167
+ # Round all computed values to display precision to ensure students can reproduce calculations
168
+ # We display z and a values with 4 decimal places
169
+ self.z1 = np.round(self.z1, 4)
170
+ self.a1 = np.round(self.a1, 4)
171
+ self.z2 = np.round(self.z2, 4)
172
+ self.a2 = np.round(self.a2, 4)
173
+
174
+ return self.a2
175
+
176
+ def _compute_loss(self, y_target):
177
+ """Compute binary cross-entropy loss."""
178
+ self.y_target = y_target
179
+ # BCE: L = -[y log(ŷ) + (1-y) log(1-ŷ)]
180
+ # Add small epsilon to prevent log(0)
181
+ epsilon = 1e-15
182
+ y_pred = np.clip(self.a2[0], epsilon, 1 - epsilon)
183
+ self.loss = -(y_target * np.log(y_pred) + (1 - y_target) * np.log(1 - y_pred))
184
+ return self.loss
185
+
186
+ def _compute_output_gradient(self):
187
+ """Compute gradient of loss w.r.t. output."""
188
+ # For BCE loss with sigmoid activation, the gradient simplifies beautifully:
189
+ # dL/dz2 = ŷ - y (this is the combined derivative of BCE loss and sigmoid activation)
190
+ #
191
+ # Derivation:
192
+ # BCE: L = -[y log(ŷ) + (1-y) log(1-ŷ)]
193
+ # dL/dŷ = -[y/ŷ - (1-y)/(1-ŷ)]
194
+ # Sigmoid: ŷ = σ(z), dŷ/dz = ŷ(1-ŷ)
195
+ # Chain rule: dL/dz = dL/dŷ * dŷ/dz = ŷ - y
196
+
197
+ self.dL_dz2 = self.a2[0] - self.y_target
198
+
199
+ # Store intermediate values for explanation purposes
200
+ # Clip to prevent division by zero (same epsilon as in loss calculation)
201
+ epsilon = 1e-15
202
+ y_pred_clipped = np.clip(self.a2[0], epsilon, 1 - epsilon)
203
+ self.dL_da2 = -(self.y_target / y_pred_clipped - (1 - self.y_target) / (1 - y_pred_clipped))
204
+ self.da2_dz2 = self.a2[0] * (1 - self.a2[0])
205
+
206
+ return self.dL_dz2
207
+
208
+ def _compute_gradient_W2(self, hidden_idx):
209
+ """Compute gradient ∂L/∂W2[0, hidden_idx]."""
210
+ # ∂L/∂w = dL/dz2 * ∂z2/∂w = dL/dz2 * a1[hidden_idx]
211
+ return float(self.dL_dz2 * self.a1[hidden_idx])
212
+
213
+ def _compute_gradient_W1(self, hidden_idx, input_idx):
214
+ """Compute gradient ∂L/∂W1[hidden_idx, input_idx]."""
215
+ # dL/dz1[hidden_idx] = dL/dz2 * ∂z2/∂a1[hidden_idx] * ∂a1/∂z1[hidden_idx]
216
+ # = dL/dz2 * W2[0, hidden_idx] * activation'(z1[hidden_idx])
217
+
218
+ dz2_da1 = self.W2[0, hidden_idx]
219
+ da1_dz1 = self._activation_derivative(self.z1[hidden_idx])
220
+
221
+ dL_dz1 = self.dL_dz2 * dz2_da1 * da1_dz1
222
+
223
+ # ∂L/∂w = dL/dz1 * ∂z1/∂w = dL/dz1 * X[input_idx]
224
+ return float(dL_dz1 * self.X[input_idx])
225
+
226
+ def _get_activation_name(self):
227
+ """Get human-readable activation function name."""
228
+ if self.activation_function == self.ACTIVATION_SIGMOID:
229
+ return "sigmoid"
230
+ elif self.activation_function == self.ACTIVATION_RELU:
231
+ return "ReLU"
232
+ elif self.activation_function == self.ACTIVATION_LINEAR:
233
+ return "linear"
234
+ return "unknown"
235
+
236
+ def _get_activation_formula(self):
237
+ """Get LaTeX formula for activation function."""
238
+ if self.activation_function == self.ACTIVATION_SIGMOID:
239
+ return r"\sigma(z) = \frac{1}{1 + e^{-z}}"
240
+ elif self.activation_function == self.ACTIVATION_RELU:
241
+ return r"\text{ReLU}(z) = \max(0, z)"
242
+ elif self.activation_function == self.ACTIVATION_LINEAR:
243
+ return r"f(z) = z"
244
+ return ""
245
+
246
+ def _generate_parameter_table(self, include_activations=False, include_training_context=False):
247
+ """
248
+ Generate side-by-side tables showing all network parameters.
249
+
250
+ Args:
251
+ include_activations: If True, include computed activation values
252
+ include_training_context: If True, include target, loss, etc. (for backprop questions)
253
+
254
+ Returns:
255
+ ContentAST.TableGroup with network parameters in two side-by-side tables
256
+ """
257
+ # Left table: Inputs & Weights
258
+ left_data = []
259
+ left_data.append(["Symbol", "Value"])
260
+
261
+ # Input values
262
+ for i in range(self.num_inputs):
263
+ left_data.append([
264
+ ContentAST.Equation(f"x_{i+1}", inline=True),
265
+ f"{self.X[i]:.1f}" # Inputs are always integers or 1 decimal
266
+ ])
267
+
268
+ # Weights from input to hidden
269
+ for j in range(self.num_hidden):
270
+ for i in range(self.num_inputs):
271
+ left_data.append([
272
+ ContentAST.Equation(f"w_{{{j+1}{i+1}}}", inline=True),
273
+ f"{self.W1[j, i]:.{self.param_digits}f}"
274
+ ])
275
+
276
+ # Weights from hidden to output
277
+ for i in range(self.num_hidden):
278
+ left_data.append([
279
+ ContentAST.Equation(f"w_{i+3}", inline=True),
280
+ f"{self.W2[0, i]:.{self.param_digits}f}"
281
+ ])
282
+
283
+ # Right table: Biases, Activations, Training context
284
+ right_data = []
285
+ right_data.append(["Symbol", "Value"])
286
+
287
+ # Hidden layer biases
288
+ if self.use_bias:
289
+ for j in range(self.num_hidden):
290
+ right_data.append([
291
+ ContentAST.Equation(f"b_{j+1}", inline=True),
292
+ f"{self.b1[j]:.{self.param_digits}f}"
293
+ ])
294
+
295
+ # Output bias
296
+ if self.use_bias:
297
+ right_data.append([
298
+ ContentAST.Equation(r"b_{out}", inline=True),
299
+ f"{self.b2[0]:.{self.param_digits}f}"
300
+ ])
301
+
302
+ # Hidden layer activations (if computed and requested)
303
+ if include_activations and self.a1 is not None:
304
+ for i in range(self.num_hidden):
305
+ right_data.append([
306
+ ContentAST.Equation(f"h_{i+1}", inline=True),
307
+ f"{self.a1[i]:.4f}"
308
+ ])
309
+
310
+ # Output activation (if computed and requested)
311
+ if include_activations and self.a2 is not None:
312
+ right_data.append([
313
+ ContentAST.Equation(r"\hat{y}", inline=True),
314
+ f"{self.a2[0]:.4f}"
315
+ ])
316
+
317
+ # Training context (target, loss - for backprop questions)
318
+ if include_training_context:
319
+ if self.y_target is not None:
320
+ right_data.append([
321
+ ContentAST.Equation("y", inline=True),
322
+ f"{int(self.y_target)}" # Binary target (0 or 1)
323
+ ])
324
+
325
+ if self.loss is not None:
326
+ right_data.append([
327
+ ContentAST.Equation("L", inline=True),
328
+ f"{self.loss:.4f}"
329
+ ])
330
+
331
+ # Create table group
332
+ table_group = ContentAST.TableGroup()
333
+ table_group.add_table(ContentAST.Table(data=left_data))
334
+ table_group.add_table(ContentAST.Table(data=right_data))
335
+
336
+ return table_group
337
+
338
+ def _generate_network_diagram(self, show_weights=True, show_activations=False):
339
+ """
340
+ Generate a simple, clean network diagram.
341
+
342
+ Args:
343
+ show_weights: If True, display weights on edges
344
+ show_activations: If True, display activation values on nodes
345
+
346
+ Returns:
347
+ BytesIO buffer containing PNG image
348
+ """
349
+ # Create figure with tight layout and equal aspect ratio
350
+ fig = plt.figure(figsize=(8, 2.5))
351
+ ax = fig.add_subplot(111)
352
+ ax.set_aspect('equal', adjustable='box') # Keep circles circular
353
+ ax.axis('off')
354
+
355
+ # Node radius
356
+ r = 0.15
357
+
358
+ # Layer x-positions
359
+ input_x = 0.5
360
+ hidden_x = 2.0
361
+ output_x = 3.5
362
+
363
+ # Calculate y-positions for nodes (top to bottom order)
364
+ def get_y_positions(n, include_bias=False):
365
+ # If including bias, need one more position at the top
366
+ total_nodes = n + 1 if include_bias else n
367
+ if total_nodes == 1:
368
+ return [1.0]
369
+ spacing = min(2.0 / (total_nodes - 1), 0.6)
370
+ # Start from top
371
+ start = 1.0 + (total_nodes - 1) * spacing / 2
372
+ positions = [start - i * spacing for i in range(total_nodes)]
373
+ return positions
374
+
375
+ # Input layer: bias (if present) at top, then x_1, x_2, ... going down
376
+ input_positions = get_y_positions(self.num_inputs, include_bias=self.use_bias)
377
+ if self.use_bias:
378
+ bias1_y = input_positions[0]
379
+ input_y = input_positions[1:] # x_1 is second (below bias), x_2 is third, etc.
380
+ else:
381
+ bias1_y = None
382
+ input_y = input_positions
383
+
384
+ # Hidden layer: bias (if present) at top, then h_1, h_2, ... going down
385
+ hidden_positions = get_y_positions(self.num_hidden, include_bias=self.use_bias)
386
+ if self.use_bias:
387
+ bias2_y = hidden_positions[0]
388
+ hidden_y = hidden_positions[1:]
389
+ else:
390
+ bias2_y = None
391
+ hidden_y = hidden_positions
392
+
393
+ # Output layer: centered
394
+ output_y = [1.0]
395
+
396
+ # Draw edges first (so they're behind nodes)
397
+ # Input to hidden
398
+ for i in range(self.num_inputs):
399
+ for j in range(self.num_hidden):
400
+ ax.plot([input_x, hidden_x], [input_y[i], hidden_y[j]],
401
+ 'k-', linewidth=1, alpha=0.7, zorder=1)
402
+ if show_weights:
403
+ label_x = input_x + 0.3
404
+ label_y = input_y[i] + (hidden_y[j] - input_y[i]) * 0.2
405
+ # Use LaTeX math mode for proper subscript rendering
406
+ weight_label = f'$w_{{{j+1}{i+1}}}$'
407
+ ax.text(label_x, label_y, weight_label, fontsize=8,
408
+ bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
409
+
410
+ # Bias to hidden
411
+ if self.use_bias:
412
+ for j in range(self.num_hidden):
413
+ ax.plot([input_x, hidden_x], [bias1_y, hidden_y[j]],
414
+ 'k-', linewidth=1, alpha=0.7, zorder=1)
415
+ if show_weights:
416
+ label_x = input_x + 0.3
417
+ label_y = bias1_y + (hidden_y[j] - bias1_y) * 0.2
418
+ bias_label = f'$b_{{{j+1}}}$'
419
+ ax.text(label_x, label_y, bias_label, fontsize=8,
420
+ bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
421
+
422
+ # Hidden to output
423
+ for i in range(self.num_hidden):
424
+ ax.plot([hidden_x, output_x], [hidden_y[i], output_y[0]],
425
+ 'k-', linewidth=1, alpha=0.7, zorder=1)
426
+ if show_weights:
427
+ label_x = hidden_x + 0.3
428
+ label_y = hidden_y[i] + (output_y[0] - hidden_y[i]) * 0.2
429
+ weight_label = f'$w_{{{i+3}}}$'
430
+ ax.text(label_x, label_y, weight_label, fontsize=8,
431
+ bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
432
+
433
+ # Bias to output
434
+ if self.use_bias:
435
+ ax.plot([hidden_x, output_x], [bias2_y, output_y[0]],
436
+ 'k-', linewidth=1, alpha=0.7, zorder=1)
437
+ if show_weights:
438
+ label_x = hidden_x + 0.3
439
+ label_y = bias2_y + (output_y[0] - bias2_y) * 0.2
440
+ bias_label = r'$b_{out}$'
441
+ ax.text(label_x, label_y, bias_label, fontsize=8,
442
+ bbox=dict(boxstyle='round,pad=0.2', facecolor='white', edgecolor='none'))
443
+
444
+ # Draw nodes
445
+ # Input nodes
446
+ for i, y in enumerate(input_y):
447
+ circle = plt.Circle((input_x, y), r, facecolor='lightgray',
448
+ edgecolor='black', linewidth=1.5, zorder=10)
449
+ ax.add_patch(circle)
450
+ label = f'$x_{{{i+1}}}$' if not show_activations else f'$x_{{{i+1}}}$={self.X[i]:.1f}'
451
+ ax.text(input_x - r - 0.15, y, label, fontsize=10, ha='right', va='center')
452
+
453
+ # Bias nodes
454
+ if self.use_bias:
455
+ circle = plt.Circle((input_x, bias1_y), r, facecolor='lightgray',
456
+ edgecolor='black', linewidth=1.5, zorder=10)
457
+ ax.add_patch(circle)
458
+ ax.text(input_x, bias1_y, '1', fontsize=10, ha='center', va='center', weight='bold')
459
+
460
+ circle = plt.Circle((hidden_x, bias2_y), r, facecolor='lightgray',
461
+ edgecolor='black', linewidth=1.5, zorder=10)
462
+ ax.add_patch(circle)
463
+ ax.text(hidden_x, bias2_y, '1', fontsize=10, ha='center', va='center', weight='bold')
464
+
465
+ # Hidden nodes
466
+ for i, y in enumerate(hidden_y):
467
+ circle = plt.Circle((hidden_x, y), r, facecolor='lightblue',
468
+ edgecolor='black', linewidth=1.5, zorder=10)
469
+ ax.add_patch(circle)
470
+ ax.plot([hidden_x, hidden_x], [y - r*0.7, y + r*0.7], 'k-', linewidth=1.2, zorder=11)
471
+ ax.text(hidden_x - r*0.35, y, r'$\Sigma$', fontsize=11, ha='center', va='center', zorder=12)
472
+ ax.text(hidden_x + r*0.35, y, r'$f$', fontsize=10, ha='center', va='center', zorder=12, style='italic')
473
+ if show_activations and self.a1 is not None:
474
+ ax.text(hidden_x, y - r - 0.15, f'{self.a1[i]:.2f}', fontsize=8, ha='center', va='top')
475
+
476
+ # Output node
477
+ y = output_y[0]
478
+ circle = plt.Circle((output_x, y), r, facecolor='lightblue',
479
+ edgecolor='black', linewidth=1.5, zorder=10)
480
+ ax.add_patch(circle)
481
+ ax.plot([output_x, output_x], [y - r*0.7, y + r*0.7], 'k-', linewidth=1.2, zorder=11)
482
+ ax.text(output_x - r*0.35, y, r'$\Sigma$', fontsize=11, ha='center', va='center', zorder=12)
483
+ ax.text(output_x + r*0.35, y, r'$f$', fontsize=10, ha='center', va='center', zorder=12, style='italic')
484
+ label = r'$\hat{y}$' if not show_activations else f'$\\hat{{y}}$={self.a2[0]:.2f}'
485
+ ax.text(output_x + r + 0.15, y, label, fontsize=10, ha='left', va='center')
486
+
487
+ # Save to buffer with minimal padding
488
+ buffer = io.BytesIO()
489
+ plt.savefig(buffer, format='png', dpi=150, bbox_inches='tight',
490
+ facecolor='white', edgecolor='none', pad_inches=0.0)
491
+ plt.close(fig)
492
+ buffer.seek(0)
493
+
494
+ return buffer
495
+
496
+ def _generate_ascii_network(self):
497
+ """Generate ASCII art representation of the network for alt-text."""
498
+ lines = []
499
+ lines.append("Network Architecture:")
500
+ lines.append("")
501
+ lines.append("Input Layer: Hidden Layer: Output Layer:")
502
+
503
+ # For 2 inputs, 2 hidden, 1 output
504
+ if self.num_inputs == 2 and self.num_hidden == 2:
505
+ lines.append(f" x₁ ----[w₁₁]---→ h₁ ----[w₃]----→")
506
+ lines.append(f" \\ / \\ /")
507
+ lines.append(f" \\ / \\ /")
508
+ lines.append(f" \\ / \\ / ŷ")
509
+ lines.append(f" \\/ \\ /")
510
+ lines.append(f" /\\ \\ /")
511
+ lines.append(f" / \\ \\/")
512
+ lines.append(f" / \\ /\\")
513
+ lines.append(f" / \\ / \\")
514
+ lines.append(f" x₂ ----[w₂₁]---→ h₂ ----[w₄]----→")
515
+ else:
516
+ # Generic representation
517
+ for i in range(max(self.num_inputs, self.num_hidden)):
518
+ parts = []
519
+ if i < self.num_inputs:
520
+ parts.append(f" x₁{i+1}")
521
+ else:
522
+ parts.append(" ")
523
+ parts.append(" ---→ ")
524
+ if i < self.num_hidden:
525
+ parts.append(f"h₁{i+1}")
526
+ else:
527
+ parts.append(" ")
528
+ parts.append(" ---→ ")
529
+ if i == self.num_hidden // 2:
530
+ parts.append("ŷ")
531
+ lines.append("".join(parts))
532
+
533
+ lines.append("")
534
+ lines.append(f"Activation function: {self._get_activation_name()}")
535
+
536
+ return "\n".join(lines)
537
+
538
+
539
+ @QuestionRegistry.register()
540
+ class ForwardPassQuestion(SimpleNeuralNetworkBase):
541
+ """
542
+ Question asking students to calculate forward pass through a simple network.
543
+
544
+ Students calculate:
545
+ - Hidden layer activations (h₁, h₂)
546
+ - Final output (ŷ)
547
+ """
548
+
549
+ def refresh(self, rng_seed=None, *args, **kwargs):
550
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
551
+
552
+ # Generate network
553
+ self._generate_network()
554
+ self._select_activation_function()
555
+
556
+ # Run forward pass to get correct answers
557
+ self._forward_pass()
558
+
559
+ # Create answer fields
560
+ self._create_answers()
561
+
562
+ def _create_answers(self):
563
+ """Create answer fields for forward pass values."""
564
+ self.answers = {}
565
+
566
+ # Hidden layer activations
567
+ for i in range(self.num_hidden):
568
+ key = f"h{i+1}"
569
+ self.answers[key] = Answer.float_value(key, float(self.a1[i]))
570
+
571
+ # Output
572
+ self.answers["y_pred"] = Answer.float_value("y_pred", float(self.a2[0]))
573
+
574
+ def get_body(self, **kwargs) -> ContentAST.Section:
575
+ body = ContentAST.Section()
576
+
577
+ # Question description
578
+ body.add_element(ContentAST.Paragraph([
579
+ f"Given the neural network below with {self._get_activation_name()} activation "
580
+ f"in the hidden layer and sigmoid activation in the output layer (for binary classification), "
581
+ f"calculate the forward pass for the given input values."
582
+ ]))
583
+
584
+ # Network diagram
585
+ body.add_element(
586
+ ContentAST.Picture(
587
+ img_data=self._generate_network_diagram(show_weights=True, show_activations=False),
588
+ caption=f"Neural network architecture"
589
+ )
590
+ )
591
+
592
+ # Network parameters table
593
+ body.add_element(self._generate_parameter_table(include_activations=False))
594
+
595
+ # Activation function
596
+ body.add_element(ContentAST.Paragraph([
597
+ f"**Hidden layer activation:** {self._get_activation_name()}"
598
+ ]))
599
+
600
+ # Create answer block
601
+ answers = []
602
+ for i in range(self.num_hidden):
603
+ answers.append(
604
+ ContentAST.Answer(
605
+ answer=self.answers[f"h{i+1}"],
606
+ label=f"h_{i+1}"
607
+ )
608
+ )
609
+
610
+ answers.append(
611
+ ContentAST.Answer(
612
+ answer=self.answers["y_pred"],
613
+ label="ŷ"
614
+ )
615
+ )
616
+
617
+ body.add_element(ContentAST.AnswerBlock(answers))
618
+
619
+ return body
620
+
621
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
622
+ explanation = ContentAST.Section()
623
+
624
+ explanation.add_element(ContentAST.Paragraph([
625
+ "To solve this problem, we need to compute the forward pass through the network."
626
+ ]))
627
+
628
+ # Hidden layer calculations
629
+ explanation.add_element(ContentAST.Paragraph([
630
+ "**Step 1: Calculate hidden layer pre-activations**"
631
+ ]))
632
+
633
+ for i in range(self.num_hidden):
634
+ # Build equation for z_i
635
+ terms = []
636
+ for j in range(self.num_inputs):
637
+ terms.append(f"({self.W1[i,j]:.{self.param_digits}f})({self.X[j]:.1f})")
638
+
639
+ z_calc = " + ".join(terms)
640
+ if self.use_bias:
641
+ z_calc += f" + {self.b1[i]:.{self.param_digits}f}"
642
+
643
+ explanation.add_element(ContentAST.Equation(
644
+ f"z_{i+1} = {z_calc} = {self.z1[i]:.4f}",
645
+ inline=False
646
+ ))
647
+
648
+ # Hidden layer activations
649
+ explanation.add_element(ContentAST.Paragraph([
650
+ f"**Step 2: Apply {self._get_activation_name()} activation**"
651
+ ]))
652
+
653
+ for i in range(self.num_hidden):
654
+ if self.activation_function == self.ACTIVATION_SIGMOID:
655
+ explanation.add_element(ContentAST.Equation(
656
+ f"h_{i+1} = \\sigma(z_{i+1}) = \\frac{{1}}{{1 + e^{{-{self.z1[i]:.4f}}}}} = {self.a1[i]:.4f}",
657
+ inline=False
658
+ ))
659
+ elif self.activation_function == self.ACTIVATION_RELU:
660
+ explanation.add_element(ContentAST.Equation(
661
+ f"h_{i+1} = \\text{{ReLU}}(z_{i+1}) = \\max(0, {self.z1[i]:.4f}) = {self.a1[i]:.4f}",
662
+ inline=False
663
+ ))
664
+ else:
665
+ explanation.add_element(ContentAST.Equation(
666
+ f"h_{i+1} = z_{i+1} = {self.a1[i]:.4f}",
667
+ inline=False
668
+ ))
669
+
670
+ # Output layer
671
+ explanation.add_element(ContentAST.Paragraph([
672
+ "**Step 3: Calculate output (with sigmoid activation)**"
673
+ ]))
674
+
675
+ terms = []
676
+ for j in range(self.num_hidden):
677
+ terms.append(f"({self.W2[0,j]:.{self.param_digits}f})({self.a1[j]:.4f})")
678
+
679
+ z_out_calc = " + ".join(terms)
680
+ if self.use_bias:
681
+ z_out_calc += f" + {self.b2[0]:.{self.param_digits}f}"
682
+
683
+ explanation.add_element(ContentAST.Equation(
684
+ f"z_{{out}} = {z_out_calc} = {self.z2[0]:.4f}",
685
+ inline=False
686
+ ))
687
+
688
+ explanation.add_element(ContentAST.Equation(
689
+ f"\\hat{{y}} = \\sigma(z_{{out}}) = \\frac{{1}}{{1 + e^{{-{self.z2[0]:.4f}}}}} = {self.a2[0]:.4f}",
690
+ inline=False
691
+ ))
692
+
693
+ explanation.add_element(ContentAST.Paragraph([
694
+ "(Note: The output layer uses sigmoid activation for binary classification, "
695
+ "so the output is between 0 and 1, representing the probability of class 1)"
696
+ ]))
697
+
698
+ return explanation
699
+
700
+
701
+ @QuestionRegistry.register()
702
+ class BackpropGradientQuestion(SimpleNeuralNetworkBase):
703
+ """
704
+ Question asking students to calculate gradients using backpropagation.
705
+
706
+ Given a completed forward pass, students calculate:
707
+ - Gradients for multiple specific weights (∂L/∂w)
708
+ """
709
+
710
+ def refresh(self, rng_seed=None, *args, **kwargs):
711
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
712
+
713
+ # Generate network
714
+ self._generate_network()
715
+ self._select_activation_function()
716
+
717
+ # Run forward pass
718
+ self._forward_pass()
719
+
720
+ # Generate binary target (0 or 1)
721
+ # Choose the opposite of what the network predicts to create meaningful gradients
722
+ if self.a2[0] > 0.5:
723
+ self.y_target = 0
724
+ else:
725
+ self.y_target = 1
726
+ self._compute_loss(self.y_target)
727
+ # Round loss to display precision (4 decimal places)
728
+ self.loss = round(self.loss, 4)
729
+ self._compute_output_gradient()
730
+
731
+ # Create answer fields for specific weight gradients
732
+ self._create_answers()
733
+
734
+ def _create_answers(self):
735
+ """Create answer fields for weight gradients."""
736
+ self.answers = {}
737
+
738
+ # Ask for gradients of 2-3 weights
739
+ # Include at least one from each layer
740
+
741
+ # Gradient for W2 (hidden to output)
742
+ for i in range(self.num_hidden):
743
+ key = f"dL_dw2_{i}"
744
+ self.answers[key] = Answer.auto_float(key, self._compute_gradient_W2(i))
745
+
746
+ # Gradient for W1 (input to hidden) - pick first hidden neuron
747
+ for j in range(self.num_inputs):
748
+ key = f"dL_dw1_0{j}"
749
+ self.answers[key] = Answer.auto_float(key, self._compute_gradient_W1(0, j))
750
+
751
+ def get_body(self, **kwargs) -> ContentAST.Section:
752
+ body = ContentAST.Section()
753
+
754
+ # Question description
755
+ body.add_element(ContentAST.Paragraph([
756
+ f"Given the neural network below with {self._get_activation_name()} activation "
757
+ f"in the hidden layer and sigmoid activation in the output layer (for binary classification), "
758
+ f"a forward pass has been completed with the values shown. "
759
+ f"Calculate the gradients (∂L/∂w) for the specified weights using backpropagation."
760
+ ]))
761
+
762
+ # Network diagram
763
+ body.add_element(
764
+ ContentAST.Picture(
765
+ img_data=self._generate_network_diagram(show_weights=True, show_activations=False),
766
+ caption=f"Neural network architecture"
767
+ )
768
+ )
769
+
770
+ # Network parameters and forward pass results table
771
+ body.add_element(self._generate_parameter_table(include_activations=True, include_training_context=True))
772
+
773
+ # Activation function
774
+ body.add_element(ContentAST.Paragraph([
775
+ f"**Hidden layer activation:** {self._get_activation_name()}"
776
+ ]))
777
+
778
+ body.add_element(ContentAST.Paragraph([
779
+ "**Calculate the following gradients:**"
780
+ ]))
781
+
782
+ # Create answer block
783
+ answers = []
784
+
785
+ # W2 gradients
786
+ for i in range(self.num_hidden):
787
+ answers.append(
788
+ ContentAST.Answer(
789
+ answer=self.answers[f"dL_dw2_{i}"],
790
+ label=f"∂L/∂w_{i+3}"
791
+ )
792
+ )
793
+
794
+ # W1 gradients (first hidden neuron)
795
+ for j in range(self.num_inputs):
796
+ answers.append(
797
+ ContentAST.Answer(
798
+ answer=self.answers[f"dL_dw1_0{j}"],
799
+ label=f"∂L/∂w_1{j+1}"
800
+ )
801
+ )
802
+
803
+ body.add_element(ContentAST.AnswerBlock(answers))
804
+
805
+ return body
806
+
807
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
808
+ explanation = ContentAST.Section()
809
+
810
+ explanation.add_element(ContentAST.Paragraph([
811
+ "To solve this problem, we use the chain rule to compute gradients via backpropagation."
812
+ ]))
813
+
814
+ # Output layer gradient
815
+ explanation.add_element(ContentAST.Paragraph([
816
+ "**Step 1: Compute output layer gradient**"
817
+ ]))
818
+
819
+ explanation.add_element(ContentAST.Paragraph([
820
+ "For binary cross-entropy loss with sigmoid output activation, "
821
+ "the gradient with respect to the pre-activation simplifies beautifully:"
822
+ ]))
823
+
824
+ explanation.add_element(ContentAST.Equation(
825
+ f"\\frac{{\\partial L}}{{\\partial z_{{out}}}} = \\hat{{y}} - y = {self.a2[0]:.4f} - {int(self.y_target)} = {self.dL_dz2:.4f}",
826
+ inline=False
827
+ ))
828
+
829
+ explanation.add_element(ContentAST.Paragraph([
830
+ "(This elegant result comes from combining the BCE loss derivative and sigmoid activation derivative)"
831
+ ]))
832
+
833
+ # W2 gradients
834
+ explanation.add_element(ContentAST.Paragraph([
835
+ "**Step 2: Gradients for hidden-to-output weights**"
836
+ ]))
837
+
838
+ explanation.add_element(ContentAST.Paragraph([
839
+ "Using the chain rule:"
840
+ ]))
841
+
842
+ for i in range(self.num_hidden):
843
+ grad = self._compute_gradient_W2(i)
844
+ explanation.add_element(ContentAST.Equation(
845
+ f"\\frac{{\\partial L}}{{\\partial w_{i+3}}} = \\frac{{\\partial L}}{{\\partial z_{{out}}}} \\cdot \\frac{{\\partial z_{{out}}}}{{\\partial w_{i+3}}} = {self.dL_dz2:.4f} \\cdot {self.a1[i]:.4f} = {grad:.4f}",
846
+ inline=False
847
+ ))
848
+
849
+ # W1 gradients
850
+ explanation.add_element(ContentAST.Paragraph([
851
+ "**Step 3: Gradients for input-to-hidden weights**"
852
+ ]))
853
+
854
+ explanation.add_element(ContentAST.Paragraph([
855
+ "First, compute the gradient flowing back to hidden layer:"
856
+ ]))
857
+
858
+ for j in range(self.num_inputs):
859
+ # Compute intermediate values
860
+ dz2_da1 = self.W2[0, 0]
861
+ da1_dz1 = self._activation_derivative(self.z1[0])
862
+ dL_dz1 = self.dL_dz2 * dz2_da1 * da1_dz1
863
+
864
+ grad = self._compute_gradient_W1(0, j)
865
+
866
+ if self.activation_function == self.ACTIVATION_SIGMOID:
867
+ act_deriv_str = f"\\sigma'(z_1) = h_1(1-h_1) = {self.a1[0]:.4f}(1-{self.a1[0]:.4f}) = {da1_dz1:.4f}"
868
+ elif self.activation_function == self.ACTIVATION_RELU:
869
+ act_deriv_str = f"\\text{{ReLU}}'(z_1) = \\mathbb{{1}}(z_1 > 0) = {da1_dz1:.4f}"
870
+ else:
871
+ act_deriv_str = f"1"
872
+
873
+ explanation.add_element(ContentAST.Equation(
874
+ f"\\frac{{\\partial L}}{{\\partial w_{{1{j+1}}}}} = \\frac{{\\partial L}}{{\\partial z_{{out}}}} \\cdot w_{3} \\cdot {act_deriv_str} \\cdot x_{j+1} = {self.dL_dz2:.4f} \\cdot {dz2_da1:.4f} \\cdot {da1_dz1:.4f} \\cdot {self.X[j]:.1f} = {grad:.4f}",
875
+ inline=False
876
+ ))
877
+
878
+ return explanation
879
+
880
+
881
+ @QuestionRegistry.register()
882
+ class EnsembleAveragingQuestion(Question):
883
+ """
884
+ Question asking students to combine predictions from multiple models (ensemble).
885
+
886
+ Students calculate:
887
+ - Mean prediction (for regression)
888
+ - Optionally: variance or other statistics
889
+ """
890
+
891
+ def __init__(self, *args, **kwargs):
892
+ kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
893
+ super().__init__(*args, **kwargs)
894
+
895
+ self.num_models = kwargs.get("num_models", 5)
896
+ self.predictions = None
897
+
898
+ def refresh(self, rng_seed=None, *args, **kwargs):
899
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
900
+
901
+ # Generate predictions from multiple models
902
+ # Use a range that makes sense for typical regression problems
903
+ base_value = self.rng.uniform(0, 10)
904
+ self.predictions = [
905
+ base_value + self.rng.uniform(-2, 2)
906
+ for _ in range(self.num_models)
907
+ ]
908
+
909
+ # Round to make calculations easier
910
+ self.predictions = [round(p, 1) for p in self.predictions]
911
+
912
+ # Create answers
913
+ self._create_answers()
914
+
915
+ def _create_answers(self):
916
+ """Create answer fields for ensemble statistics."""
917
+ self.answers = {}
918
+
919
+ # Mean prediction
920
+ mean_pred = np.mean(self.predictions)
921
+ self.answers["mean"] = Answer.float_value("mean", float(mean_pred))
922
+
923
+ # Median (optional, but useful)
924
+ median_pred = np.median(self.predictions)
925
+ self.answers["median"] = Answer.float_value("median", float(median_pred))
926
+
927
+ def get_body(self, **kwargs) -> ContentAST.Section:
928
+ body = ContentAST.Section()
929
+
930
+ # Question description
931
+ body.add_element(ContentAST.Paragraph([
932
+ f"You have trained {self.num_models} different regression models on the same dataset. "
933
+ f"For a particular test input, each model produces the following predictions:"
934
+ ]))
935
+
936
+ # Show predictions
937
+ pred_list = ", ".join([f"{p:.1f}" for p in self.predictions])
938
+ body.add_element(ContentAST.Paragraph([
939
+ f"Model predictions: {pred_list}"
940
+ ]))
941
+
942
+ # Question
943
+ body.add_element(ContentAST.Paragraph([
944
+ "To create an ensemble, calculate the combined prediction using the following methods:"
945
+ ]))
946
+
947
+ # Create answer block
948
+ answers = []
949
+ answers.append(
950
+ ContentAST.Answer(
951
+ answer=self.answers["mean"],
952
+ label="Mean (average)"
953
+ )
954
+ )
955
+ answers.append(
956
+ ContentAST.Answer(
957
+ answer=self.answers["median"],
958
+ label="Median"
959
+ )
960
+ )
961
+
962
+ body.add_element(ContentAST.AnswerBlock(answers))
963
+
964
+ return body
965
+
966
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
967
+ explanation = ContentAST.Section()
968
+
969
+ explanation.add_element(ContentAST.Paragraph([
970
+ "Ensemble methods combine predictions from multiple models to create a more robust prediction."
971
+ ]))
972
+
973
+ # Mean calculation
974
+ explanation.add_element(ContentAST.Paragraph([
975
+ "**Mean (Bagging approach):**"
976
+ ]))
977
+
978
+ pred_sum = " + ".join([f"{p:.1f}" for p in self.predictions])
979
+ mean_val = np.mean(self.predictions)
980
+
981
+ explanation.add_element(ContentAST.Equation(
982
+ f"\\text{{mean}} = \\frac{{{pred_sum}}}{{{self.num_models}}} = \\frac{{{sum(self.predictions):.1f}}}{{{self.num_models}}} = {mean_val:.4f}",
983
+ inline=False
984
+ ))
985
+
986
+ # Median calculation
987
+ explanation.add_element(ContentAST.Paragraph([
988
+ "**Median:**"
989
+ ]))
990
+
991
+ sorted_preds = sorted(self.predictions)
992
+ sorted_str = ", ".join([f"{p:.1f}" for p in sorted_preds])
993
+ median_val = np.median(self.predictions)
994
+
995
+ explanation.add_element(ContentAST.Paragraph([
996
+ f"Sorted predictions: {sorted_str}"
997
+ ]))
998
+
999
+ if self.num_models % 2 == 1:
1000
+ mid_idx = self.num_models // 2
1001
+ explanation.add_element(ContentAST.Paragraph([
1002
+ f"Middle value (position {mid_idx + 1}): {median_val:.1f}"
1003
+ ]))
1004
+ else:
1005
+ mid_idx1 = self.num_models // 2 - 1
1006
+ mid_idx2 = self.num_models // 2
1007
+ explanation.add_element(ContentAST.Paragraph([
1008
+ f"Average of middle two values (positions {mid_idx1 + 1} and {mid_idx2 + 1}): "
1009
+ f"({sorted_preds[mid_idx1]:.1f} + {sorted_preds[mid_idx2]:.1f}) / 2 = {median_val:.1f}"
1010
+ ]))
1011
+
1012
+ return explanation
1013
+
1014
+
1015
+ @QuestionRegistry.register()
1016
+ class EndToEndTrainingQuestion(SimpleNeuralNetworkBase):
1017
+ """
1018
+ End-to-end training step question.
1019
+
1020
+ Students perform a complete training iteration:
1021
+ 1. Forward pass → prediction
1022
+ 2. Loss calculation (MSE)
1023
+ 3. Backpropagation → gradients for specific weights
1024
+ 4. Weight update → new weight values
1025
+ """
1026
+
1027
+ def __init__(self, *args, **kwargs):
1028
+ super().__init__(*args, **kwargs)
1029
+ self.learning_rate = None
1030
+ self.new_W1 = None
1031
+ self.new_W2 = None
1032
+
1033
+ def refresh(self, rng_seed=None, *args, **kwargs):
1034
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
1035
+
1036
+ # Generate network
1037
+ self._generate_network()
1038
+ self._select_activation_function()
1039
+
1040
+ # Run forward pass
1041
+ self._forward_pass()
1042
+
1043
+ # Generate binary target (0 or 1)
1044
+ # Choose the opposite of what the network predicts to create meaningful gradients
1045
+ if self.a2[0] > 0.5:
1046
+ self.y_target = 0
1047
+ else:
1048
+ self.y_target = 1
1049
+ self._compute_loss(self.y_target)
1050
+ # Round loss to display precision (4 decimal places)
1051
+ self.loss = round(self.loss, 4)
1052
+ self._compute_output_gradient()
1053
+
1054
+ # Set learning rate (use small value for stability)
1055
+ self.learning_rate = round(self.rng.uniform(0.05, 0.2), 2)
1056
+
1057
+ # Compute updated weights
1058
+ self._compute_weight_updates()
1059
+
1060
+ # Create answers
1061
+ self._create_answers()
1062
+
1063
+ def _compute_weight_updates(self):
1064
+ """Compute new weights after gradient descent step."""
1065
+ # Update W2
1066
+ self.new_W2 = np.copy(self.W2)
1067
+ for i in range(self.num_hidden):
1068
+ grad = self._compute_gradient_W2(i)
1069
+ self.new_W2[0, i] = self.W2[0, i] - self.learning_rate * grad
1070
+
1071
+ # Update W1 (first hidden neuron only for simplicity)
1072
+ self.new_W1 = np.copy(self.W1)
1073
+ for j in range(self.num_inputs):
1074
+ grad = self._compute_gradient_W1(0, j)
1075
+ self.new_W1[0, j] = self.W1[0, j] - self.learning_rate * grad
1076
+
1077
+ def _create_answers(self):
1078
+ """Create answer fields for all steps."""
1079
+ self.answers = {}
1080
+
1081
+ # Forward pass answers
1082
+ self.answers["y_pred"] = Answer.float_value("y_pred", float(self.a2[0]))
1083
+
1084
+ # Loss answer
1085
+ self.answers["loss"] = Answer.float_value("loss", float(self.loss))
1086
+
1087
+ # Gradient answers (for key weights)
1088
+ self.answers["grad_w3"] = Answer.auto_float("grad_w3", self._compute_gradient_W2(0))
1089
+ self.answers["grad_w11"] = Answer.auto_float("grad_w11", self._compute_gradient_W1(0, 0))
1090
+
1091
+ # Updated weight answers
1092
+ self.answers["new_w3"] = Answer.float_value("new_w3", float(self.new_W2[0, 0]))
1093
+ self.answers["new_w11"] = Answer.float_value("new_w11", float(self.new_W1[0, 0]))
1094
+
1095
+ def get_body(self, **kwargs) -> ContentAST.Section:
1096
+ body = ContentAST.Section()
1097
+
1098
+ # Question description
1099
+ body.add_element(ContentAST.Paragraph([
1100
+ f"Given the neural network below with {self._get_activation_name()} activation "
1101
+ f"in the hidden layer and sigmoid activation in the output layer (for binary classification), "
1102
+ f"perform one complete training step (forward pass, loss calculation, "
1103
+ f"backpropagation, and weight update) for the given input and target."
1104
+ ]))
1105
+
1106
+ # Network diagram
1107
+ body.add_element(
1108
+ ContentAST.Picture(
1109
+ img_data=self._generate_network_diagram(show_weights=True, show_activations=False)
1110
+ )
1111
+ )
1112
+
1113
+ # Training parameters
1114
+ body.add_element(ContentAST.Paragraph([
1115
+ "**Training parameters:**"
1116
+ ]))
1117
+
1118
+ body.add_element(ContentAST.Paragraph([
1119
+ "Input: ",
1120
+ ContentAST.Equation(f"x_1 = {self.X[0]:.1f}", inline=True),
1121
+ ", ",
1122
+ ContentAST.Equation(f"x_2 = {self.X[1]:.1f}", inline=True)
1123
+ ]))
1124
+
1125
+ body.add_element(ContentAST.Paragraph([
1126
+ "Target: ",
1127
+ ContentAST.Equation(f"y = {int(self.y_target)}", inline=True)
1128
+ ]))
1129
+
1130
+ body.add_element(ContentAST.Paragraph([
1131
+ "Learning rate: ",
1132
+ ContentAST.Equation(f"\\alpha = {self.learning_rate}", inline=True)
1133
+ ]))
1134
+
1135
+ body.add_element(ContentAST.Paragraph([
1136
+ f"**Hidden layer activation:** {self._get_activation_name()}"
1137
+ ]))
1138
+
1139
+ # Network parameters table
1140
+ body.add_element(self._generate_parameter_table(include_activations=False))
1141
+
1142
+ # Create answer block
1143
+ answers = []
1144
+
1145
+ answers.append(
1146
+ ContentAST.Answer(
1147
+ answer=self.answers["y_pred"],
1148
+ label="1. Forward Pass - Network output ŷ"
1149
+ )
1150
+ )
1151
+
1152
+ answers.append(
1153
+ ContentAST.Answer(
1154
+ answer=self.answers["loss"],
1155
+ label="2. Loss"
1156
+ )
1157
+ )
1158
+
1159
+ answers.append(
1160
+ ContentAST.Answer(
1161
+ answer=self.answers["grad_w3"],
1162
+ label="3. Gradient ∂L/∂w₃"
1163
+ )
1164
+ )
1165
+
1166
+ answers.append(
1167
+ ContentAST.Answer(
1168
+ answer=self.answers["grad_w11"],
1169
+ label="4. Gradient ∂L/∂w₁₁"
1170
+ )
1171
+ )
1172
+
1173
+ answers.append(
1174
+ ContentAST.Answer(
1175
+ answer=self.answers["new_w3"],
1176
+ label="5. Updated w₃:"
1177
+ )
1178
+ )
1179
+
1180
+ answers.append(
1181
+ ContentAST.Answer(
1182
+ answer=self.answers["new_w11"],
1183
+ label="6. Updated w₁₁:"
1184
+ )
1185
+ )
1186
+
1187
+ body.add_element(ContentAST.AnswerBlock(answers))
1188
+
1189
+ return body
1190
+
1191
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
1192
+ explanation = ContentAST.Section()
1193
+
1194
+ explanation.add_element(ContentAST.Paragraph([
1195
+ "This problem requires performing one complete training iteration. Let's go through each step."
1196
+ ]))
1197
+
1198
+ # Step 1: Forward pass
1199
+ explanation.add_element(ContentAST.Paragraph([
1200
+ "**Step 1: Forward Pass**"
1201
+ ]))
1202
+
1203
+ # Hidden layer
1204
+ z1_0 = self.W1[0, 0] * self.X[0] + self.W1[0, 1] * self.X[1] + self.b1[0]
1205
+ explanation.add_element(ContentAST.Equation(
1206
+ f"z_1 = w_{{11}} x_1 + w_{{12}} x_2 + b_1 = {self.W1[0,0]:.{self.param_digits}f} \\cdot {self.X[0]:.1f} + {self.W1[0,1]:.{self.param_digits}f} \\cdot {self.X[1]:.1f} + {self.b1[0]:.{self.param_digits}f} = {self.z1[0]:.4f}",
1207
+ inline=False
1208
+ ))
1209
+
1210
+ explanation.add_element(ContentAST.Equation(
1211
+ f"h_1 = {self._get_activation_name()}(z_1) = {self.a1[0]:.4f}",
1212
+ inline=False
1213
+ ))
1214
+
1215
+ # Similarly for h2 (abbreviated)
1216
+ explanation.add_element(ContentAST.Equation(
1217
+ f"h_2 = {self.a1[1]:.4f} \\text{{ (calculated similarly)}}",
1218
+ inline=False
1219
+ ))
1220
+
1221
+ # Output (pre-activation)
1222
+ z2 = self.W2[0, 0] * self.a1[0] + self.W2[0, 1] * self.a1[1] + self.b2[0]
1223
+ explanation.add_element(ContentAST.Equation(
1224
+ f"z_{{out}} = w_3 h_1 + w_4 h_2 + b_2 = {self.W2[0,0]:.{self.param_digits}f} \\cdot {self.a1[0]:.4f} + {self.W2[0,1]:.{self.param_digits}f} \\cdot {self.a1[1]:.4f} + {self.b2[0]:.{self.param_digits}f} = {self.z2[0]:.4f}",
1225
+ inline=False
1226
+ ))
1227
+
1228
+ # Output (sigmoid activation)
1229
+ explanation.add_element(ContentAST.Equation(
1230
+ f"\\hat{{y}} = \\sigma(z_{{out}}) = \\frac{{1}}{{1 + e^{{-{self.z2[0]:.4f}}}}} = {self.a2[0]:.4f}",
1231
+ inline=False
1232
+ ))
1233
+
1234
+ # Step 2: Loss
1235
+ explanation.add_element(ContentAST.Paragraph([
1236
+ "**Step 2: Calculate Loss (Binary Cross-Entropy)**"
1237
+ ]))
1238
+
1239
+ # Show the full BCE formula first
1240
+ explanation.add_element(ContentAST.Equation(
1241
+ f"L = -[y \\log(\\hat{{y}}) + (1-y) \\log(1-\\hat{{y}})]",
1242
+ inline=False
1243
+ ))
1244
+
1245
+ # Then evaluate it
1246
+ if self.y_target == 1:
1247
+ explanation.add_element(ContentAST.Equation(
1248
+ f"L = -[1 \\cdot \\log({self.a2[0]:.4f}) + 0 \\cdot \\log(1-{self.a2[0]:.4f})] = -\\log({self.a2[0]:.4f}) = {self.loss:.4f}",
1249
+ inline=False
1250
+ ))
1251
+ else:
1252
+ explanation.add_element(ContentAST.Equation(
1253
+ f"L = -[0 \\cdot \\log({self.a2[0]:.4f}) + 1 \\cdot \\log(1-{self.a2[0]:.4f})] = -\\log({1-self.a2[0]:.4f}) = {self.loss:.4f}",
1254
+ inline=False
1255
+ ))
1256
+
1257
+ # Step 3: Gradients
1258
+ explanation.add_element(ContentAST.Paragraph([
1259
+ "**Step 3: Compute Gradients**"
1260
+ ]))
1261
+
1262
+ explanation.add_element(ContentAST.Paragraph([
1263
+ "For BCE with sigmoid, the output layer gradient simplifies to:"
1264
+ ]))
1265
+
1266
+ explanation.add_element(ContentAST.Equation(
1267
+ f"\\frac{{\\partial L}}{{\\partial z_{{out}}}} = \\hat{{y}} - y = {self.a2[0]:.4f} - {int(self.y_target)} = {self.dL_dz2:.4f}",
1268
+ inline=False
1269
+ ))
1270
+
1271
+ grad_w3 = self._compute_gradient_W2(0)
1272
+ explanation.add_element(ContentAST.Equation(
1273
+ f"\\frac{{\\partial L}}{{\\partial w_3}} = \\frac{{\\partial L}}{{\\partial z_{{out}}}} \\cdot h_1 = {self.dL_dz2:.4f} \\cdot {self.a1[0]:.4f} = {grad_w3:.4f}",
1274
+ inline=False
1275
+ ))
1276
+
1277
+ grad_w11 = self._compute_gradient_W1(0, 0)
1278
+ dz2_da1 = self.W2[0, 0]
1279
+ da1_dz1 = self._activation_derivative(self.z1[0])
1280
+
1281
+ if self.activation_function == self.ACTIVATION_SIGMOID:
1282
+ act_deriv_str = f"h_1(1-h_1)"
1283
+ elif self.activation_function == self.ACTIVATION_RELU:
1284
+ act_deriv_str = f"\\text{{ReLU}}'(z_1)"
1285
+ else:
1286
+ act_deriv_str = f"1"
1287
+
1288
+ explanation.add_element(ContentAST.Equation(
1289
+ f"\\frac{{\\partial L}}{{\\partial w_{{11}}}} = \\frac{{\\partial L}}{{\\partial z_{{out}}}} \\cdot w_3 \\cdot {act_deriv_str} \\cdot x_1 = {self.dL_dz2:.4f} \\cdot {dz2_da1:.4f} \\cdot {da1_dz1:.4f} \\cdot {self.X[0]:.1f} = {grad_w11:.4f}",
1290
+ inline=False
1291
+ ))
1292
+
1293
+ # Step 4: Weight updates
1294
+ explanation.add_element(ContentAST.Paragraph([
1295
+ "**Step 4: Update Weights**"
1296
+ ]))
1297
+
1298
+ new_w3 = self.new_W2[0, 0]
1299
+ explanation.add_element(ContentAST.Equation(
1300
+ f"w_3^{{new}} = w_3 - \\alpha \\frac{{\\partial L}}{{\\partial w_3}} = {self.W2[0,0]:.{self.param_digits}f} - {self.learning_rate} \\cdot {grad_w3:.4f} = {new_w3:.4f}",
1301
+ inline=False
1302
+ ))
1303
+
1304
+ new_w11 = self.new_W1[0, 0]
1305
+ explanation.add_element(ContentAST.Equation(
1306
+ f"w_{{11}}^{{new}} = w_{{11}} - \\alpha \\frac{{\\partial L}}{{\\partial w_{{11}}}} = {self.W1[0,0]:.{self.param_digits}f} - {self.learning_rate} \\cdot {grad_w11:.4f} = {new_w11:.4f}",
1307
+ inline=False
1308
+ ))
1309
+
1310
+ explanation.add_element(ContentAST.Paragraph([
1311
+ "These updated weights would be used in the next training iteration."
1312
+ ]))
1313
+
1314
+ return explanation