QuizGenerator 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. QuizGenerator/README.md +5 -0
  2. QuizGenerator/__init__.py +27 -0
  3. QuizGenerator/__main__.py +7 -0
  4. QuizGenerator/canvas/__init__.py +13 -0
  5. QuizGenerator/canvas/canvas_interface.py +627 -0
  6. QuizGenerator/canvas/classes.py +235 -0
  7. QuizGenerator/constants.py +149 -0
  8. QuizGenerator/contentast.py +1955 -0
  9. QuizGenerator/generate.py +253 -0
  10. QuizGenerator/logging.yaml +55 -0
  11. QuizGenerator/misc.py +579 -0
  12. QuizGenerator/mixins.py +548 -0
  13. QuizGenerator/performance.py +202 -0
  14. QuizGenerator/premade_questions/__init__.py +0 -0
  15. QuizGenerator/premade_questions/basic.py +103 -0
  16. QuizGenerator/premade_questions/cst334/__init__.py +1 -0
  17. QuizGenerator/premade_questions/cst334/languages.py +391 -0
  18. QuizGenerator/premade_questions/cst334/math_questions.py +297 -0
  19. QuizGenerator/premade_questions/cst334/memory_questions.py +1400 -0
  20. QuizGenerator/premade_questions/cst334/ostep13_vsfs.py +572 -0
  21. QuizGenerator/premade_questions/cst334/persistence_questions.py +451 -0
  22. QuizGenerator/premade_questions/cst334/process.py +648 -0
  23. QuizGenerator/premade_questions/cst463/__init__.py +0 -0
  24. QuizGenerator/premade_questions/cst463/gradient_descent/__init__.py +3 -0
  25. QuizGenerator/premade_questions/cst463/gradient_descent/gradient_calculation.py +369 -0
  26. QuizGenerator/premade_questions/cst463/gradient_descent/gradient_descent_questions.py +305 -0
  27. QuizGenerator/premade_questions/cst463/gradient_descent/loss_calculations.py +650 -0
  28. QuizGenerator/premade_questions/cst463/gradient_descent/misc.py +73 -0
  29. QuizGenerator/premade_questions/cst463/math_and_data/__init__.py +2 -0
  30. QuizGenerator/premade_questions/cst463/math_and_data/matrix_questions.py +631 -0
  31. QuizGenerator/premade_questions/cst463/math_and_data/vector_questions.py +534 -0
  32. QuizGenerator/premade_questions/cst463/models/__init__.py +0 -0
  33. QuizGenerator/premade_questions/cst463/models/attention.py +192 -0
  34. QuizGenerator/premade_questions/cst463/models/cnns.py +186 -0
  35. QuizGenerator/premade_questions/cst463/models/matrices.py +24 -0
  36. QuizGenerator/premade_questions/cst463/models/rnns.py +202 -0
  37. QuizGenerator/premade_questions/cst463/models/text.py +203 -0
  38. QuizGenerator/premade_questions/cst463/models/weight_counting.py +227 -0
  39. QuizGenerator/premade_questions/cst463/neural-network-basics/__init__.py +6 -0
  40. QuizGenerator/premade_questions/cst463/neural-network-basics/neural_network_questions.py +1314 -0
  41. QuizGenerator/premade_questions/cst463/tensorflow-intro/__init__.py +6 -0
  42. QuizGenerator/premade_questions/cst463/tensorflow-intro/tensorflow_questions.py +936 -0
  43. QuizGenerator/qrcode_generator.py +293 -0
  44. QuizGenerator/question.py +715 -0
  45. QuizGenerator/quiz.py +467 -0
  46. QuizGenerator/regenerate.py +472 -0
  47. QuizGenerator/typst_utils.py +113 -0
  48. quizgenerator-0.4.2.dist-info/METADATA +265 -0
  49. quizgenerator-0.4.2.dist-info/RECORD +52 -0
  50. quizgenerator-0.4.2.dist-info/WHEEL +4 -0
  51. quizgenerator-0.4.2.dist-info/entry_points.txt +3 -0
  52. quizgenerator-0.4.2.dist-info/licenses/LICENSE +674 -0
@@ -0,0 +1,936 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import io
5
+ import logging
6
+ import re
7
+ import numpy as np
8
+ import sympy as sp
9
+ from typing import List, Tuple, Dict, Any
10
+
11
+ from QuizGenerator.contentast import ContentAST
12
+ from QuizGenerator.question import Question, Answer, QuestionRegistry
13
+ from QuizGenerator.mixins import TableQuestionMixin, BodyTemplatesMixin
14
+
15
+ # Import gradient descent utilities
16
+ import sys
17
+ import os
18
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'gradient_descent'))
19
+ from misc import generate_function, format_vector
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+
24
+ @QuestionRegistry.register()
25
+ class ParameterCountingQuestion(Question):
26
+ """
27
+ Question asking students to count parameters in a neural network.
28
+
29
+ Given a dense network architecture, students calculate:
30
+ - Total number of weights
31
+ - Total number of biases
32
+ - Total trainable parameters
33
+ """
34
+
35
+ def __init__(self, *args, **kwargs):
36
+ kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
37
+ super().__init__(*args, **kwargs)
38
+
39
+ self.num_layers = kwargs.get("num_layers", None)
40
+ self.include_biases = kwargs.get("include_biases", True)
41
+
42
+ def refresh(self, rng_seed=None, *args, **kwargs):
43
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
44
+
45
+ # Generate random architecture
46
+ if self.num_layers is None:
47
+ self.num_layers = self.rng.choice([3, 4])
48
+
49
+ # Generate layer sizes
50
+ # Input layer: common sizes for typical problems
51
+ input_sizes = [28*28, 32*32, 784, 1024, 64, 128]
52
+ self.layer_sizes = [self.rng.choice(input_sizes)]
53
+
54
+ # Hidden layers: reasonable sizes
55
+ for i in range(self.num_layers - 2):
56
+ hidden_size = self.rng.choice([32, 64, 128, 256, 512])
57
+ self.layer_sizes.append(hidden_size)
58
+
59
+ # Output layer: typical classification sizes
60
+ output_size = self.rng.choice([2, 10, 100, 1000])
61
+ self.layer_sizes.append(output_size)
62
+
63
+ # Calculate correct answers
64
+ self.total_weights = 0
65
+ self.total_biases = 0
66
+ self.weights_per_layer = []
67
+ self.biases_per_layer = []
68
+
69
+ for i in range(len(self.layer_sizes) - 1):
70
+ weights = self.layer_sizes[i] * self.layer_sizes[i+1]
71
+ biases = self.layer_sizes[i+1] if self.include_biases else 0
72
+
73
+ self.weights_per_layer.append(weights)
74
+ self.biases_per_layer.append(biases)
75
+
76
+ self.total_weights += weights
77
+ self.total_biases += biases
78
+
79
+ self.total_params = self.total_weights + self.total_biases
80
+
81
+ # Create answers
82
+ self._create_answers()
83
+
84
+ def _create_answers(self):
85
+ """Create answer fields."""
86
+ self.answers = {}
87
+
88
+ self.answers["total_weights"] = Answer.integer("total_weights", self.total_weights)
89
+
90
+ if self.include_biases:
91
+ self.answers["total_biases"] = Answer.integer("total_biases", self.total_biases)
92
+ self.answers["total_params"] = Answer.integer("total_params", self.total_params)
93
+ else:
94
+ self.answers["total_params"] = Answer.integer("total_params", self.total_params)
95
+
96
+ def get_body(self, **kwargs) -> ContentAST.Section:
97
+ body = ContentAST.Section()
98
+
99
+ # Question description
100
+ body.add_element(ContentAST.Paragraph([
101
+ "Consider a fully-connected (dense) neural network with the following architecture:"
102
+ ]))
103
+
104
+ # Display architecture
105
+ arch_parts = []
106
+ for i, size in enumerate(self.layer_sizes):
107
+ if i > 0:
108
+ arch_parts.append(" → ")
109
+ arch_parts.append(str(size))
110
+
111
+ body.add_element(ContentAST.Paragraph([
112
+ "Architecture: " + "".join(arch_parts)
113
+ ]))
114
+
115
+ if self.include_biases:
116
+ body.add_element(ContentAST.Paragraph([
117
+ "Each layer includes bias terms."
118
+ ]))
119
+
120
+ # Questions
121
+ # Answer table
122
+ table_data = []
123
+ table_data.append(["Parameter Type", "Count"])
124
+
125
+ table_data.append([
126
+ "Total weights (connections between layers)",
127
+ ContentAST.Answer(self.answers["total_weights"])
128
+ ])
129
+
130
+ if self.include_biases:
131
+ table_data.append([
132
+ "Total biases",
133
+ ContentAST.Answer(self.answers["total_biases"])
134
+ ])
135
+
136
+ table_data.append([
137
+ "Total trainable parameters",
138
+ ContentAST.Answer(self.answers["total_params"])
139
+ ])
140
+
141
+ body.add_element(ContentAST.Table(data=table_data))
142
+
143
+ return body
144
+
145
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
146
+ explanation = ContentAST.Section()
147
+
148
+ explanation.add_element(ContentAST.Paragraph([
149
+ "To count parameters in a dense neural network, we calculate weights and biases for each layer."
150
+ ]))
151
+
152
+ explanation.add_element(ContentAST.Paragraph([
153
+ "**Weights calculation:**"
154
+ ]))
155
+
156
+ for i in range(len(self.layer_sizes) - 1):
157
+ input_size = self.layer_sizes[i]
158
+ output_size = self.layer_sizes[i+1]
159
+ weights = self.weights_per_layer[i]
160
+
161
+ explanation.add_element(ContentAST.Paragraph([
162
+ f"Layer {i+1} → {i+2}: ",
163
+ ContentAST.Equation(f"{input_size} \\times {output_size} = {weights:,}", inline=True),
164
+ " weights"
165
+ ]))
166
+
167
+ explanation.add_element(ContentAST.Paragraph([
168
+ "Total weights: ",
169
+ ContentAST.Equation(
170
+ f"{' + '.join([f'{w:,}' for w in self.weights_per_layer])} = {self.total_weights:,}",
171
+ inline=True
172
+ )
173
+ ]))
174
+
175
+ if self.include_biases:
176
+ explanation.add_element(ContentAST.Paragraph([
177
+ "**Biases calculation:**"
178
+ ]))
179
+
180
+ for i in range(len(self.layer_sizes) - 1):
181
+ output_size = self.layer_sizes[i+1]
182
+ biases = self.biases_per_layer[i]
183
+
184
+ explanation.add_element(ContentAST.Paragraph([
185
+ f"Layer {i+2}: {biases:,} biases (one per neuron)"
186
+ ]))
187
+
188
+ explanation.add_element(ContentAST.Paragraph([
189
+ "Total biases: ",
190
+ ContentAST.Equation(
191
+ f"{' + '.join([f'{b:,}' for b in self.biases_per_layer])} = {self.total_biases:,}",
192
+ inline=True
193
+ )
194
+ ]))
195
+
196
+ explanation.add_element(ContentAST.Paragraph([
197
+ "**Total trainable parameters:**"
198
+ ]))
199
+
200
+ if self.include_biases:
201
+ explanation.add_element(ContentAST.Equation(
202
+ f"\\text{{Total}} = {self.total_weights:,} + {self.total_biases:,} = {self.total_params:,}",
203
+ inline=False
204
+ ))
205
+ else:
206
+ explanation.add_element(ContentAST.Equation(
207
+ f"\\text{{Total}} = {self.total_weights:,}",
208
+ inline=False
209
+ ))
210
+
211
+ return explanation
212
+
213
+
214
+ @QuestionRegistry.register()
215
+ class ActivationFunctionComputationQuestion(Question):
216
+ """
217
+ Question asking students to compute activation function outputs.
218
+
219
+ Given a vector of inputs and an activation function, students calculate
220
+ the output for each element (or entire vector for softmax).
221
+ """
222
+
223
+ ACTIVATION_RELU = "relu"
224
+ ACTIVATION_SIGMOID = "sigmoid"
225
+ ACTIVATION_TANH = "tanh"
226
+ ACTIVATION_SOFTMAX = "softmax"
227
+
228
+ def __init__(self, *args, **kwargs):
229
+ kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
230
+ super().__init__(*args, **kwargs)
231
+
232
+ self.vector_size = kwargs.get("vector_size", None)
233
+ self.activation = kwargs.get("activation", None)
234
+
235
+ def refresh(self, rng_seed=None, *args, **kwargs):
236
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
237
+
238
+ # Generate random input vector
239
+ if self.vector_size is None:
240
+ self.vector_size = self.rng.choice([3, 4, 5])
241
+
242
+ self.input_vector = [
243
+ round(self.rng.uniform(-3, 3), 1)
244
+ for _ in range(self.vector_size)
245
+ ]
246
+
247
+ # Select activation function
248
+ if self.activation is None:
249
+ activations = [
250
+ self.ACTIVATION_RELU,
251
+ self.ACTIVATION_SIGMOID,
252
+ self.ACTIVATION_TANH,
253
+ self.ACTIVATION_SOFTMAX,
254
+ ]
255
+ self.activation = self.rng.choice(activations)
256
+
257
+ # For leaky ReLU, set alpha
258
+ self.leaky_alpha = 0.01
259
+
260
+ # Compute outputs
261
+ self.output_vector = self._compute_activation(self.input_vector)
262
+
263
+ # Create answers
264
+ self._create_answers()
265
+
266
+ def _compute_activation(self, inputs):
267
+ """Compute activation function output."""
268
+ if self.activation == self.ACTIVATION_RELU:
269
+ return [max(0, x) for x in inputs]
270
+
271
+ elif self.activation == self.ACTIVATION_SIGMOID:
272
+ return [1 / (1 + np.exp(-x)) for x in inputs]
273
+
274
+ elif self.activation == self.ACTIVATION_TANH:
275
+ return [np.tanh(x) for x in inputs]
276
+
277
+ elif self.activation == self.ACTIVATION_SOFTMAX:
278
+ # Subtract max for numerical stability
279
+ exp_vals = [np.exp(x - max(inputs)) for x in inputs]
280
+ sum_exp = sum(exp_vals)
281
+ return [e / sum_exp for e in exp_vals]
282
+
283
+ else:
284
+ raise ValueError(f"Unknown activation: {self.activation}")
285
+
286
+ def _get_activation_name(self):
287
+ """Get human-readable activation name."""
288
+ names = {
289
+ self.ACTIVATION_RELU: "ReLU",
290
+ self.ACTIVATION_SIGMOID: "Sigmoid",
291
+ self.ACTIVATION_TANH: "Tanh",
292
+ self.ACTIVATION_SOFTMAX: "Softmax",
293
+ }
294
+ return names.get(self.activation, "Unknown")
295
+
296
+ def _get_activation_formula(self):
297
+ """Get LaTeX formula for activation function."""
298
+ if self.activation == self.ACTIVATION_RELU:
299
+ return r"\text{ReLU}(x) = \max(0, x)"
300
+
301
+ elif self.activation == self.ACTIVATION_SIGMOID:
302
+ return r"\sigma(x) = \frac{1}{1 + e^{-x}}"
303
+
304
+ elif self.activation == self.ACTIVATION_TANH:
305
+ return r"\tanh(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}"
306
+
307
+ elif self.activation == self.ACTIVATION_SOFTMAX:
308
+ return r"\text{softmax}(x_i) = \frac{e^{x_i}}{\sum_j e^{x_j}}"
309
+
310
+ return ""
311
+
312
+ def _create_answers(self):
313
+ """Create answer fields."""
314
+ self.answers = {}
315
+
316
+ if self.activation == self.ACTIVATION_SOFTMAX:
317
+ # Softmax: single vector answer
318
+ self.answers["output"] = Answer.vector_value("output", self.output_vector)
319
+ else:
320
+ # Element-wise: individual answers
321
+ for i, output in enumerate(self.output_vector):
322
+ key = f"output_{i}"
323
+ self.answers[key] = Answer.float_value(key, float(output))
324
+
325
+ def get_body(self, **kwargs) -> ContentAST.Section:
326
+ body = ContentAST.Section()
327
+
328
+ # Question description
329
+ body.add_element(ContentAST.Paragraph([
330
+ f"Given the input vector below, compute the output after applying the {self._get_activation_name()} activation function."
331
+ ]))
332
+
333
+ # Display formula
334
+ body.add_element(ContentAST.Paragraph([
335
+ "Activation function: ",
336
+ ContentAST.Equation(self._get_activation_formula(), inline=True)
337
+ ]))
338
+
339
+ # Input vector
340
+ input_str = ", ".join([f"{x:.1f}" for x in self.input_vector])
341
+ body.add_element(ContentAST.Paragraph([
342
+ "Input: ",
343
+ ContentAST.Equation(f"[{input_str}]", inline=True)
344
+ ]))
345
+
346
+ # Answer table
347
+ if self.activation == self.ACTIVATION_SOFTMAX:
348
+ body.add_element(ContentAST.Paragraph([
349
+ "Compute the output vector:"
350
+ ]))
351
+
352
+ table_data = []
353
+ table_data.append(["Output Vector"])
354
+ table_data.append([ContentAST.Answer(self.answers["output"])])
355
+
356
+ body.add_element(ContentAST.Table(data=table_data))
357
+
358
+ else:
359
+ body.add_element(ContentAST.Paragraph([
360
+ "Compute the output for each element:"
361
+ ]))
362
+
363
+ table_data = []
364
+ table_data.append(["Input", "Output"])
365
+
366
+ for i, x in enumerate(self.input_vector):
367
+ table_data.append([
368
+ ContentAST.Equation(f"{x:.1f}", inline=True),
369
+ ContentAST.Answer(self.answers[f"output_{i}"])
370
+ ])
371
+
372
+ body.add_element(ContentAST.Table(data=table_data))
373
+
374
+ return body
375
+
376
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
377
+ explanation = ContentAST.Section()
378
+
379
+ explanation.add_element(ContentAST.Paragraph([
380
+ f"To compute the {self._get_activation_name()} activation, we apply the formula to each input."
381
+ ]))
382
+
383
+ if self.activation == self.ACTIVATION_SOFTMAX:
384
+ explanation.add_element(ContentAST.Paragraph([
385
+ "**Softmax computation:**"
386
+ ]))
387
+
388
+ # Show exponentials
389
+ exp_strs = [f"e^{{{x:.1f}}}" for x in self.input_vector]
390
+ explanation.add_element(ContentAST.Paragraph([
391
+ "First, compute exponentials: ",
392
+ ContentAST.Equation(", ".join(exp_strs), inline=True)
393
+ ]))
394
+
395
+ # Numerical values
396
+ exp_vals = [np.exp(x) for x in self.input_vector]
397
+ exp_vals_str = ", ".join([f"{e:.4f}" for e in exp_vals])
398
+ explanation.add_element(ContentAST.Paragraph([
399
+ ContentAST.Equation(f"\\approx [{exp_vals_str}]", inline=True)
400
+ ]))
401
+
402
+ # Sum
403
+ sum_exp = sum(exp_vals)
404
+ explanation.add_element(ContentAST.Paragraph([
405
+ "Sum: ",
406
+ ContentAST.Equation(f"{sum_exp:.4f}", inline=True)
407
+ ]))
408
+
409
+ # Final outputs
410
+ explanation.add_element(ContentAST.Paragraph([
411
+ "Divide each by the sum:"
412
+ ]))
413
+
414
+ for i, (exp_val, output) in enumerate(zip(exp_vals, self.output_vector)):
415
+ explanation.add_element(ContentAST.Equation(
416
+ f"\\text{{softmax}}({self.input_vector[i]:.1f}) = \\frac{{{exp_val:.4f}}}{{{sum_exp:.4f}}} = {output:.4f}",
417
+ inline=False
418
+ ))
419
+
420
+ else:
421
+ explanation.add_element(ContentAST.Paragraph([
422
+ "**Element-wise computation:**"
423
+ ]))
424
+
425
+ for i, (x, y) in enumerate(zip(self.input_vector, self.output_vector)):
426
+ if self.activation == self.ACTIVATION_RELU:
427
+ explanation.add_element(ContentAST.Equation(
428
+ f"\\text{{ReLU}}({x:.1f}) = \\max(0, {x:.1f}) = {y:.4f}",
429
+ inline=False
430
+ ))
431
+
432
+ elif self.activation == self.ACTIVATION_SIGMOID:
433
+ explanation.add_element(ContentAST.Equation(
434
+ f"\\sigma({x:.1f}) = \\frac{{1}}{{1 + e^{{-{x:.1f}}}}} = {y:.4f}",
435
+ inline=False
436
+ ))
437
+
438
+ elif self.activation == self.ACTIVATION_TANH:
439
+ explanation.add_element(ContentAST.Equation(
440
+ f"\\tanh({x:.1f}) = {y:.4f}",
441
+ inline=False
442
+ ))
443
+
444
+ return explanation
445
+
446
+
447
+ @QuestionRegistry.register()
448
+ class RegularizationCalculationQuestion(Question):
449
+ """
450
+ Question asking students to calculate loss with L2 regularization.
451
+
452
+ Given a small network (2-4 weights), students calculate:
453
+ - Forward pass
454
+ - Base MSE loss
455
+ - L2 regularization penalty
456
+ - Total loss
457
+ - Gradient with regularization for one weight
458
+ """
459
+
460
+ def __init__(self, *args, **kwargs):
461
+ kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
462
+ super().__init__(*args, **kwargs)
463
+
464
+ self.num_weights = kwargs.get("num_weights", None)
465
+
466
+ def refresh(self, rng_seed=None, *args, **kwargs):
467
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
468
+
469
+ # Generate small network (2-4 weights for simplicity)
470
+ if self.num_weights is None:
471
+ self.num_weights = self.rng.choice([2, 3, 4])
472
+
473
+ # Generate weights (small values)
474
+ self.weights = [
475
+ round(self.rng.uniform(-2, 2), 1)
476
+ for _ in range(self.num_weights)
477
+ ]
478
+
479
+ # Generate input and target
480
+ self.input_val = round(self.rng.uniform(-3, 3), 1)
481
+ self.target = round(self.rng.uniform(-5, 5), 1)
482
+
483
+ # Regularization coefficient
484
+ self.lambda_reg = self.rng.choice([0.01, 0.05, 0.1, 0.5])
485
+
486
+ # Forward pass (simple linear combination for simplicity)
487
+ # prediction = sum(w_i * input^i) for i in 0..n
488
+ # This gives us a polynomial: w0 + w1*x + w2*x^2 + ...
489
+ self.prediction = sum(
490
+ w * (self.input_val ** i)
491
+ for i, w in enumerate(self.weights)
492
+ )
493
+
494
+ # Calculate losses
495
+ self.base_loss = 0.5 * (self.target - self.prediction) ** 2
496
+ self.l2_penalty = (self.lambda_reg / 2) * sum(w**2 for w in self.weights)
497
+ self.total_loss = self.base_loss + self.l2_penalty
498
+
499
+ # Calculate gradient for first weight (w0, the bias term)
500
+ # dL_base/dw0 = -(target - prediction) * dPrediction/dw0
501
+ # dPrediction/dw0 = input^0 = 1
502
+ # dL_reg/dw0 = lambda * w0
503
+ # dL_total/dw0 = dL_base/dw0 + dL_reg/dw0
504
+
505
+ self.grad_base_w0 = -(self.target - self.prediction) * 1 # derivative of w0*x^0
506
+ self.grad_reg_w0 = self.lambda_reg * self.weights[0]
507
+ self.grad_total_w0 = self.grad_base_w0 + self.grad_reg_w0
508
+
509
+ # Create answers
510
+ self._create_answers()
511
+
512
+ def _create_answers(self):
513
+ """Create answer fields."""
514
+ self.answers = {}
515
+
516
+ self.answers["prediction"] = Answer.float_value("prediction", float(self.prediction))
517
+ self.answers["base_loss"] = Answer.float_value("base_loss", float(self.base_loss))
518
+ self.answers["l2_penalty"] = Answer.float_value("l2_penalty", float(self.l2_penalty))
519
+ self.answers["total_loss"] = Answer.float_value("total_loss", float(self.total_loss))
520
+ self.answers["grad_total_w0"] = Answer.auto_float("grad_total_w0", float(self.grad_total_w0))
521
+
522
+ def get_body(self, **kwargs) -> ContentAST.Section:
523
+ body = ContentAST.Section()
524
+
525
+ # Question description
526
+ body.add_element(ContentAST.Paragraph([
527
+ "Consider a simple model with the following parameters:"
528
+ ]))
529
+
530
+ # Display weights
531
+ weight_strs = [f"w_{i} = {w:.1f}" for i, w in enumerate(self.weights)]
532
+ body.add_element(ContentAST.Paragraph([
533
+ "Weights: ",
534
+ ContentAST.Equation(", ".join(weight_strs), inline=True)
535
+ ]))
536
+
537
+ # Model equation
538
+ terms = []
539
+ for i, w in enumerate(self.weights):
540
+ if i == 0:
541
+ terms.append(f"w_0")
542
+ elif i == 1:
543
+ terms.append(f"w_1 x")
544
+ else:
545
+ terms.append(f"w_{i} x^{i}")
546
+
547
+ model_eq = " + ".join(terms)
548
+ body.add_element(ContentAST.Paragraph([
549
+ "Model: ",
550
+ ContentAST.Equation(f"\\hat{{y}} = {model_eq}", inline=True)
551
+ ]))
552
+
553
+ # Data point
554
+ body.add_element(ContentAST.Paragraph([
555
+ "Data point: ",
556
+ ContentAST.Equation(f"x = {self.input_val:.1f}, y = {self.target:.1f}", inline=True)
557
+ ]))
558
+
559
+ # Regularization
560
+ body.add_element(ContentAST.Paragraph([
561
+ "L2 regularization coefficient: ",
562
+ ContentAST.Equation(f"\\lambda = {self.lambda_reg}", inline=True)
563
+ ]))
564
+
565
+ body.add_element(ContentAST.Paragraph([
566
+ "Calculate the following:"
567
+ ]))
568
+
569
+ # Answer table
570
+ table_data = []
571
+ table_data.append(["Calculation", "Value"])
572
+
573
+ table_data.append([
574
+ ContentAST.Paragraph(["Prediction ", ContentAST.Equation(r"\hat{y}", inline=True)]),
575
+ ContentAST.Answer(self.answers["prediction"])
576
+ ])
577
+
578
+ table_data.append([
579
+ ContentAST.Paragraph(["Base MSE loss: ", ContentAST.Equation(r"L_{base} = (1/2)(y - \hat{y})^2", inline=True)]),
580
+ ContentAST.Answer(self.answers["base_loss"])
581
+ ])
582
+
583
+ table_data.append([
584
+ ContentAST.Paragraph(["L2 penalty: ", ContentAST.Equation(r"L_{reg} = (\lambda/2)\sum w_i^2", inline=True)]),
585
+ ContentAST.Answer(self.answers["l2_penalty"])
586
+ ])
587
+
588
+ table_data.append([
589
+ ContentAST.Paragraph(["Total loss: ", ContentAST.Equation(r"L_{total} = L_{base} + L_{reg}", inline=True)]),
590
+ ContentAST.Answer(self.answers["total_loss"])
591
+ ])
592
+
593
+ table_data.append([
594
+ ContentAST.Paragraph(["Gradient: ", ContentAST.Equation(r"\frac{\partial L_{total}}{\partial w_0}", inline=True)]),
595
+ ContentAST.Answer(self.answers["grad_total_w0"])
596
+ ])
597
+
598
+ body.add_element(ContentAST.Table(data=table_data))
599
+
600
+ return body
601
+
602
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
603
+ explanation = ContentAST.Section()
604
+
605
+ explanation.add_element(ContentAST.Paragraph([
606
+ "L2 regularization adds a penalty term to the loss function to prevent overfitting by keeping weights small."
607
+ ]))
608
+
609
+ # Step 1: Forward pass
610
+ explanation.add_element(ContentAST.Paragraph([
611
+ "**Step 1: Compute prediction**"
612
+ ]))
613
+
614
+ terms = []
615
+ for i, w in enumerate(self.weights):
616
+ if i == 0:
617
+ terms.append(f"{w:.1f}")
618
+ else:
619
+ x_term = f"{self.input_val:.1f}^{i}" if i > 1 else f"{self.input_val:.1f}"
620
+ terms.append(f"{w:.1f} \\times {x_term}")
621
+
622
+ explanation.add_element(ContentAST.Equation(
623
+ f"\\hat{{y}} = {' + '.join(terms)} = {self.prediction:.4f}",
624
+ inline=False
625
+ ))
626
+
627
+ # Step 2: Base loss
628
+ explanation.add_element(ContentAST.Paragraph([
629
+ "**Step 2: Compute base MSE loss**"
630
+ ]))
631
+
632
+ explanation.add_element(ContentAST.Equation(
633
+ f"L_{{base}} = \\frac{{1}}{{2}}(y - \\hat{{y}})^2 = \\frac{{1}}{{2}}({self.target:.1f} - {self.prediction:.4f})^2 = {self.base_loss:.4f}",
634
+ inline=False
635
+ ))
636
+
637
+ # Step 3: L2 penalty
638
+ explanation.add_element(ContentAST.Paragraph([
639
+ "**Step 3: Compute L2 penalty**"
640
+ ]))
641
+
642
+ weight_squares = [f"{w:.1f}^2" for w in self.weights]
643
+ sum_squares = sum(w**2 for w in self.weights)
644
+
645
+ explanation.add_element(ContentAST.Equation(
646
+ f"L_{{reg}} = \\frac{{\\lambda}}{{2}} \\sum w_i^2 = \\frac{{{self.lambda_reg}}}{{2}}({' + '.join(weight_squares)}) = \\frac{{{self.lambda_reg}}}{{2}} \\times {sum_squares:.4f} = {self.l2_penalty:.4f}",
647
+ inline=False
648
+ ))
649
+
650
+ # Step 4: Total loss
651
+ explanation.add_element(ContentAST.Paragraph([
652
+ "**Step 4: Compute total loss**"
653
+ ]))
654
+
655
+ explanation.add_element(ContentAST.Equation(
656
+ f"L_{{total}} = L_{{base}} + L_{{reg}} = {self.base_loss:.4f} + {self.l2_penalty:.4f} = {self.total_loss:.4f}",
657
+ inline=False
658
+ ))
659
+
660
+ # Step 5: Gradient with regularization
661
+ explanation.add_element(ContentAST.Paragraph([
662
+ "**Step 5: Compute gradient with regularization**"
663
+ ]))
664
+
665
+ explanation.add_element(ContentAST.Paragraph([
666
+ ContentAST.Equation(r"w_0", inline=True),
667
+ " (the bias term):"
668
+ ]))
669
+
670
+ explanation.add_element(ContentAST.Equation(
671
+ f"\\frac{{\\partial L_{{base}}}}{{\\partial w_0}} = -(y - \\hat{{y}}) \\times 1 = -({self.target:.1f} - {self.prediction:.4f}) = {self.grad_base_w0:.4f}",
672
+ inline=False
673
+ ))
674
+
675
+ explanation.add_element(ContentAST.Equation(
676
+ f"\\frac{{\\partial L_{{reg}}}}{{\\partial w_0}} = \\lambda w_0 = {self.lambda_reg} \\times {self.weights[0]:.1f} = {self.grad_reg_w0:.4f}",
677
+ inline=False
678
+ ))
679
+
680
+ explanation.add_element(ContentAST.Equation(
681
+ f"\\frac{{\\partial L_{{total}}}}{{\\partial w_0}} = {self.grad_base_w0:.4f} + {self.grad_reg_w0:.4f} = {self.grad_total_w0:.4f}",
682
+ inline=False
683
+ ))
684
+
685
+ explanation.add_element(ContentAST.Paragraph([
686
+ "The regularization term adds ",
687
+ ContentAST.Equation(f"\\lambda w_0 = {self.grad_reg_w0:.4f}", inline=True),
688
+ " to the gradient, pushing the weight toward zero."
689
+ ]))
690
+
691
+ return explanation
692
+
693
+
694
+ @QuestionRegistry.register()
695
+ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin):
696
+ """
697
+ Question asking students to perform gradient descent with momentum.
698
+
699
+ Given a function, current weights, gradients, learning rate, and momentum coefficient,
700
+ students calculate:
701
+ - Velocity update using momentum
702
+ - Weight update using the new velocity
703
+ - Comparison to vanilla SGD (optional)
704
+ """
705
+
706
+ def __init__(self, *args, **kwargs):
707
+ kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
708
+ super().__init__(*args, **kwargs)
709
+
710
+ self.num_variables = kwargs.get("num_variables", 2)
711
+ self.show_vanilla_sgd = kwargs.get("show_vanilla_sgd", True)
712
+
713
+ def refresh(self, rng_seed=None, *args, **kwargs):
714
+ super().refresh(rng_seed=rng_seed, *args, **kwargs)
715
+
716
+ # Generate well-conditioned quadratic function
717
+ self.variables, self.function, self.gradient_function, self.equation = \
718
+ generate_function(self.rng, self.num_variables, max_degree=2, use_quadratic=True)
719
+
720
+ # Generate current weights (small integers)
721
+ self.current_weights = [
722
+ self.rng.choice([-2, -1, 0, 1, 2])
723
+ for _ in range(self.num_variables)
724
+ ]
725
+
726
+ # Calculate gradient at current position
727
+ subs_map = dict(zip(self.variables, self.current_weights))
728
+ g_syms = self.gradient_function.subs(subs_map)
729
+ self.gradients = [float(val) for val in g_syms]
730
+
731
+ # Generate previous velocity (for momentum)
732
+ # Start with small or zero velocity
733
+ self.prev_velocity = [
734
+ round(self.rng.uniform(-0.5, 0.5), 2)
735
+ for _ in range(self.num_variables)
736
+ ]
737
+
738
+ # Hyperparameters
739
+ self.learning_rate = self.rng.choice([0.01, 0.05, 0.1])
740
+ self.momentum_beta = self.rng.choice([0.8, 0.9])
741
+
742
+ # Calculate momentum updates
743
+ # v_new = beta * v_old + (1 - beta) * gradient
744
+ self.new_velocity = [
745
+ self.momentum_beta * v_old + (1 - self.momentum_beta) * grad
746
+ for v_old, grad in zip(self.prev_velocity, self.gradients)
747
+ ]
748
+
749
+ # w_new = w_old - alpha * v_new
750
+ self.new_weights = [
751
+ w - self.learning_rate * v
752
+ for w, v in zip(self.current_weights, self.new_velocity)
753
+ ]
754
+
755
+ # Calculate vanilla SGD for comparison
756
+ if self.show_vanilla_sgd:
757
+ self.sgd_weights = [
758
+ w - self.learning_rate * grad
759
+ for w, grad in zip(self.current_weights, self.gradients)
760
+ ]
761
+
762
+ # Create answers
763
+ self._create_answers()
764
+
765
+ def _create_answers(self):
766
+ """Create answer fields."""
767
+ self.answers = {}
768
+
769
+ # New velocity
770
+ self.answers["velocity"] = Answer.vector_value("velocity", self.new_velocity)
771
+
772
+ # New weights with momentum
773
+ self.answers["weights_momentum"] = Answer.vector_value("weights_momentum", self.new_weights)
774
+
775
+ # Vanilla SGD weights for comparison
776
+ if self.show_vanilla_sgd:
777
+ self.answers["weights_sgd"] = Answer.vector_value("weights_sgd", self.sgd_weights)
778
+
779
+ def get_body(self, **kwargs) -> ContentAST.Section:
780
+ body = ContentAST.Section()
781
+
782
+ # Question description
783
+ body.add_element(ContentAST.Paragraph([
784
+ "Consider the optimization problem of minimizing the function:"
785
+ ]))
786
+
787
+ body.add_element(ContentAST.Equation(
788
+ sp.latex(self.function),
789
+ inline=False
790
+ ))
791
+
792
+ body.add_element(ContentAST.Paragraph([
793
+ "The gradient is:"
794
+ ]))
795
+
796
+ body.add_element(ContentAST.Equation(
797
+ f"\\nabla f = {sp.latex(self.gradient_function)}",
798
+ inline=False
799
+ ))
800
+
801
+ # Current state
802
+ body.add_element(ContentAST.Paragraph([
803
+ "**Current optimization state:**"
804
+ ]))
805
+
806
+ body.add_element(ContentAST.Paragraph([
807
+ "Current weights: ",
808
+ ContentAST.Equation(f"{format_vector(self.current_weights)}", inline=True)
809
+ ]))
810
+
811
+ body.add_element(ContentAST.Paragraph([
812
+ "Previous velocity: ",
813
+ ContentAST.Equation(f"{format_vector(self.prev_velocity)}", inline=True)
814
+ ]))
815
+
816
+ # Hyperparameters
817
+ body.add_element(ContentAST.Paragraph([
818
+ "**Hyperparameters:**"
819
+ ]))
820
+
821
+ body.add_element(ContentAST.Paragraph([
822
+ "Learning rate: ",
823
+ ContentAST.Equation(f"\\alpha = {self.learning_rate}", inline=True)
824
+ ]))
825
+
826
+ body.add_element(ContentAST.Paragraph([
827
+ "Momentum coefficient: ",
828
+ ContentAST.Equation(f"\\beta = {self.momentum_beta}", inline=True)
829
+ ]))
830
+
831
+ # Questions
832
+ body.add_element(ContentAST.Paragraph([
833
+ "Calculate the following updates:"
834
+ ]))
835
+
836
+ # Answer table
837
+ table_data = []
838
+ table_data.append(["Update Type", "Formula", "Result"])
839
+
840
+ table_data.append([
841
+ "New velocity",
842
+ ContentAST.Equation(r"v' = \beta v + (1-\beta)\nabla f", inline=True),
843
+ ContentAST.Answer(self.answers["velocity"])
844
+ ])
845
+
846
+ table_data.append([
847
+ "Weights (momentum)",
848
+ ContentAST.Equation(r"w' = w - \alpha v'", inline=True),
849
+ ContentAST.Answer(self.answers["weights_momentum"])
850
+ ])
851
+
852
+ if self.show_vanilla_sgd:
853
+ table_data.append([
854
+ "Weights (vanilla SGD)",
855
+ ContentAST.Equation(r"w' = w - \alpha \nabla f", inline=True),
856
+ ContentAST.Answer(self.answers["weights_sgd"])
857
+ ])
858
+
859
+ body.add_element(ContentAST.Table(data=table_data))
860
+
861
+ return body
862
+
863
+ def get_explanation(self, **kwargs) -> ContentAST.Section:
864
+ explanation = ContentAST.Section()
865
+
866
+ explanation.add_element(ContentAST.Paragraph([
867
+ "Momentum helps gradient descent by accumulating a velocity vector in directions of "
868
+ "consistent gradient, allowing faster convergence and reduced oscillation."
869
+ ]))
870
+
871
+ # Step 1: Calculate new velocity
872
+ explanation.add_element(ContentAST.Paragraph([
873
+ "**Step 1: Update velocity using momentum**"
874
+ ]))
875
+
876
+ explanation.add_element(ContentAST.Paragraph([
877
+ "The momentum update formula is:"
878
+ ]))
879
+
880
+ explanation.add_element(ContentAST.Equation(
881
+ f"v' = \\beta v + (1 - \\beta) \\nabla f",
882
+ inline=False
883
+ ))
884
+
885
+ # Show calculation for each component
886
+ for i in range(self.num_variables):
887
+ var_name = f"x_{i}"
888
+ explanation.add_element(ContentAST.Equation(
889
+ f"v'[{i}] = {self.momentum_beta} \\times {self.prev_velocity[i]:.2f} + "
890
+ f"{1 - self.momentum_beta} \\times {self.gradients[i]:.4f} = {self.new_velocity[i]:.4f}",
891
+ inline=False
892
+ ))
893
+
894
+ # Step 2: Update weights with momentum
895
+ explanation.add_element(ContentAST.Paragraph([
896
+ "**Step 2: Update weights using new velocity**"
897
+ ]))
898
+
899
+ explanation.add_element(ContentAST.Equation(
900
+ f"w' = w - \\alpha v'",
901
+ inline=False
902
+ ))
903
+
904
+ for i in range(self.num_variables):
905
+ explanation.add_element(ContentAST.Equation(
906
+ f"w[{i}] = {self.current_weights[i]} - {self.learning_rate} \\times {self.new_velocity[i]:.4f} = {self.new_weights[i]:.4f}",
907
+ inline=False
908
+ ))
909
+
910
+ # Comparison with vanilla SGD
911
+ if self.show_vanilla_sgd:
912
+ explanation.add_element(ContentAST.Paragraph([
913
+ "**Comparison with vanilla SGD:**"
914
+ ]))
915
+
916
+ explanation.add_element(ContentAST.Paragraph([
917
+ "Vanilla SGD (no momentum) would update directly using the gradient:"
918
+ ]))
919
+
920
+ explanation.add_element(ContentAST.Equation(
921
+ f"w' = w - \\alpha \\nabla f",
922
+ inline=False
923
+ ))
924
+
925
+ for i in range(self.num_variables):
926
+ explanation.add_element(ContentAST.Equation(
927
+ f"w[{i}] = {self.current_weights[i]} - {self.learning_rate} \\times {self.gradients[i]:.4f} = {self.sgd_weights[i]:.4f}",
928
+ inline=False
929
+ ))
930
+
931
+ explanation.add_element(ContentAST.Paragraph([
932
+ "The momentum update differs because it incorporates the previous velocity, "
933
+ "which can help accelerate learning and smooth out noisy gradients."
934
+ ]))
935
+
936
+ return explanation