QuizGenerator 0.6.3__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuizGenerator/contentast.py +2191 -2193
- QuizGenerator/misc.py +1 -1
- QuizGenerator/mixins.py +64 -64
- QuizGenerator/premade_questions/basic.py +16 -16
- QuizGenerator/premade_questions/cst334/languages.py +26 -26
- QuizGenerator/premade_questions/cst334/math_questions.py +42 -42
- QuizGenerator/premade_questions/cst334/memory_questions.py +124 -124
- QuizGenerator/premade_questions/cst334/persistence_questions.py +48 -48
- QuizGenerator/premade_questions/cst334/process.py +38 -38
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_calculation.py +45 -45
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_descent_questions.py +34 -34
- QuizGenerator/premade_questions/cst463/gradient_descent/loss_calculations.py +53 -53
- QuizGenerator/premade_questions/cst463/gradient_descent/misc.py +2 -2
- QuizGenerator/premade_questions/cst463/math_and_data/matrix_questions.py +65 -65
- QuizGenerator/premade_questions/cst463/math_and_data/vector_questions.py +39 -39
- QuizGenerator/premade_questions/cst463/models/attention.py +36 -36
- QuizGenerator/premade_questions/cst463/models/cnns.py +26 -26
- QuizGenerator/premade_questions/cst463/models/rnns.py +36 -36
- QuizGenerator/premade_questions/cst463/models/text.py +32 -32
- QuizGenerator/premade_questions/cst463/models/weight_counting.py +15 -15
- QuizGenerator/premade_questions/cst463/neural-network-basics/neural_network_questions.py +124 -124
- QuizGenerator/premade_questions/cst463/tensorflow-intro/tensorflow_questions.py +161 -161
- QuizGenerator/question.py +41 -41
- QuizGenerator/quiz.py +7 -7
- QuizGenerator/regenerate.py +114 -13
- QuizGenerator/typst_utils.py +2 -2
- {quizgenerator-0.6.3.dist-info → quizgenerator-0.7.1.dist-info}/METADATA +1 -1
- {quizgenerator-0.6.3.dist-info → quizgenerator-0.7.1.dist-info}/RECORD +31 -31
- {quizgenerator-0.6.3.dist-info → quizgenerator-0.7.1.dist-info}/WHEEL +0 -0
- {quizgenerator-0.6.3.dist-info → quizgenerator-0.7.1.dist-info}/entry_points.txt +0 -0
- {quizgenerator-0.6.3.dist-info → quizgenerator-0.7.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -8,7 +8,7 @@ import numpy as np
|
|
|
8
8
|
import sympy as sp
|
|
9
9
|
from typing import List, Tuple, Dict, Any
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
import QuizGenerator.contentast as ca
|
|
12
12
|
from QuizGenerator.question import Question, QuestionRegistry
|
|
13
13
|
from QuizGenerator.mixins import TableQuestionMixin, BodyTemplatesMixin
|
|
14
14
|
|
|
@@ -85,21 +85,21 @@ class ParameterCountingQuestion(Question):
|
|
|
85
85
|
"""Create answer fields."""
|
|
86
86
|
self.answers = {}
|
|
87
87
|
|
|
88
|
-
self.answers["total_weights"] = AnswerTypes.Int(self.total_weights, label="Total weights")
|
|
88
|
+
self.answers["total_weights"] = ca.AnswerTypes.Int(self.total_weights, label="Total weights")
|
|
89
89
|
|
|
90
90
|
if self.include_biases:
|
|
91
|
-
self.answers["total_biases"] = AnswerTypes.Int(self.total_biases, label="Total biases")
|
|
92
|
-
self.answers["total_params"] = AnswerTypes.Int(self.total_params, label="Total trainable parameters")
|
|
91
|
+
self.answers["total_biases"] = ca.AnswerTypes.Int(self.total_biases, label="Total biases")
|
|
92
|
+
self.answers["total_params"] = ca.AnswerTypes.Int(self.total_params, label="Total trainable parameters")
|
|
93
93
|
else:
|
|
94
|
-
self.answers["total_params"] = AnswerTypes.Int(self.total_params, label="Total trainable parameters")
|
|
94
|
+
self.answers["total_params"] = ca.AnswerTypes.Int(self.total_params, label="Total trainable parameters")
|
|
95
95
|
|
|
96
|
-
def _get_body(self, **kwargs) -> Tuple[
|
|
96
|
+
def _get_body(self, **kwargs) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
97
97
|
"""Build question body and collect answers."""
|
|
98
|
-
body =
|
|
98
|
+
body = ca.Section()
|
|
99
99
|
answers = []
|
|
100
100
|
|
|
101
101
|
# Question description
|
|
102
|
-
body.add_element(
|
|
102
|
+
body.add_element(ca.Paragraph([
|
|
103
103
|
"Consider a fully-connected (dense) neural network with the following architecture:"
|
|
104
104
|
]))
|
|
105
105
|
|
|
@@ -110,12 +110,12 @@ class ParameterCountingQuestion(Question):
|
|
|
110
110
|
arch_parts.append(" → ")
|
|
111
111
|
arch_parts.append(str(size))
|
|
112
112
|
|
|
113
|
-
body.add_element(
|
|
113
|
+
body.add_element(ca.Paragraph([
|
|
114
114
|
"Architecture: " + "".join(arch_parts)
|
|
115
115
|
]))
|
|
116
116
|
|
|
117
117
|
if self.include_biases:
|
|
118
|
-
body.add_element(
|
|
118
|
+
body.add_element(ca.Paragraph([
|
|
119
119
|
"Each layer includes bias terms."
|
|
120
120
|
]))
|
|
121
121
|
|
|
@@ -143,25 +143,25 @@ class ParameterCountingQuestion(Question):
|
|
|
143
143
|
self.answers["total_params"]
|
|
144
144
|
])
|
|
145
145
|
|
|
146
|
-
body.add_element(
|
|
146
|
+
body.add_element(ca.Table(data=table_data))
|
|
147
147
|
|
|
148
148
|
return body, answers
|
|
149
149
|
|
|
150
|
-
def get_body(self, **kwargs) ->
|
|
150
|
+
def get_body(self, **kwargs) -> ca.Section:
|
|
151
151
|
"""Build question body (backward compatible interface)."""
|
|
152
152
|
body, _ = self._get_body(**kwargs)
|
|
153
153
|
return body
|
|
154
154
|
|
|
155
|
-
def _get_explanation(self, **kwargs) -> Tuple[
|
|
155
|
+
def _get_explanation(self, **kwargs) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
156
156
|
"""Build question explanation."""
|
|
157
|
-
explanation =
|
|
157
|
+
explanation = ca.Section()
|
|
158
158
|
|
|
159
|
-
explanation.add_element(
|
|
159
|
+
explanation.add_element(ca.Paragraph([
|
|
160
160
|
"To count parameters in a dense neural network, we calculate weights and biases for each layer."
|
|
161
161
|
]))
|
|
162
162
|
|
|
163
|
-
explanation.add_element(
|
|
164
|
-
|
|
163
|
+
explanation.add_element(ca.Paragraph([
|
|
164
|
+
ca.Text("Weights calculation:", emphasis=True)
|
|
165
165
|
]))
|
|
166
166
|
|
|
167
167
|
for i in range(len(self.layer_sizes) - 1):
|
|
@@ -169,59 +169,59 @@ class ParameterCountingQuestion(Question):
|
|
|
169
169
|
output_size = self.layer_sizes[i+1]
|
|
170
170
|
weights = self.weights_per_layer[i]
|
|
171
171
|
|
|
172
|
-
explanation.add_element(
|
|
172
|
+
explanation.add_element(ca.Paragraph([
|
|
173
173
|
f"Layer {i+1} → {i+2}: ",
|
|
174
|
-
|
|
174
|
+
ca.Equation(f"{input_size} \\times {output_size} = {weights:,}", inline=True),
|
|
175
175
|
" weights"
|
|
176
176
|
]))
|
|
177
177
|
|
|
178
|
-
explanation.add_element(
|
|
178
|
+
explanation.add_element(ca.Paragraph([
|
|
179
179
|
"Total weights: ",
|
|
180
|
-
|
|
180
|
+
ca.Equation(
|
|
181
181
|
f"{' + '.join([f'{w:,}' for w in self.weights_per_layer])} = {self.total_weights:,}",
|
|
182
182
|
inline=True
|
|
183
183
|
)
|
|
184
184
|
]))
|
|
185
185
|
|
|
186
186
|
if self.include_biases:
|
|
187
|
-
explanation.add_element(
|
|
188
|
-
|
|
187
|
+
explanation.add_element(ca.Paragraph([
|
|
188
|
+
ca.Text("Biases calculation:", emphasis=True)
|
|
189
189
|
]))
|
|
190
190
|
|
|
191
191
|
for i in range(len(self.layer_sizes) - 1):
|
|
192
192
|
output_size = self.layer_sizes[i+1]
|
|
193
193
|
biases = self.biases_per_layer[i]
|
|
194
194
|
|
|
195
|
-
explanation.add_element(
|
|
195
|
+
explanation.add_element(ca.Paragraph([
|
|
196
196
|
f"Layer {i+2}: {biases:,} biases (one per neuron)"
|
|
197
197
|
]))
|
|
198
198
|
|
|
199
|
-
explanation.add_element(
|
|
199
|
+
explanation.add_element(ca.Paragraph([
|
|
200
200
|
"Total biases: ",
|
|
201
|
-
|
|
201
|
+
ca.Equation(
|
|
202
202
|
f"{' + '.join([f'{b:,}' for b in self.biases_per_layer])} = {self.total_biases:,}",
|
|
203
203
|
inline=True
|
|
204
204
|
)
|
|
205
205
|
]))
|
|
206
206
|
|
|
207
|
-
explanation.add_element(
|
|
208
|
-
|
|
207
|
+
explanation.add_element(ca.Paragraph([
|
|
208
|
+
ca.Text("Total trainable parameters:", emphasis=True)
|
|
209
209
|
]))
|
|
210
210
|
|
|
211
211
|
if self.include_biases:
|
|
212
|
-
explanation.add_element(
|
|
212
|
+
explanation.add_element(ca.Equation(
|
|
213
213
|
f"\\text{{Total}} = {self.total_weights:,} + {self.total_biases:,} = {self.total_params:,}",
|
|
214
214
|
inline=False
|
|
215
215
|
))
|
|
216
216
|
else:
|
|
217
|
-
explanation.add_element(
|
|
217
|
+
explanation.add_element(ca.Equation(
|
|
218
218
|
f"\\text{{Total}} = {self.total_weights:,}",
|
|
219
219
|
inline=False
|
|
220
220
|
))
|
|
221
221
|
|
|
222
222
|
return explanation, []
|
|
223
223
|
|
|
224
|
-
def get_explanation(self, **kwargs) ->
|
|
224
|
+
def get_explanation(self, **kwargs) -> ca.Section:
|
|
225
225
|
"""Build question explanation (backward compatible interface)."""
|
|
226
226
|
explanation, _ = self._get_explanation(**kwargs)
|
|
227
227
|
return explanation
|
|
@@ -331,39 +331,39 @@ class ActivationFunctionComputationQuestion(Question):
|
|
|
331
331
|
|
|
332
332
|
if self.activation == self.ACTIVATION_SOFTMAX:
|
|
333
333
|
# Softmax: single vector answer
|
|
334
|
-
self.answers["output"] = AnswerTypes.Vector(self.output_vector, label="Output vector")
|
|
334
|
+
self.answers["output"] = ca.AnswerTypes.Vector(self.output_vector, label="Output vector")
|
|
335
335
|
else:
|
|
336
336
|
# Element-wise: individual answers
|
|
337
337
|
for i, output in enumerate(self.output_vector):
|
|
338
338
|
key = f"output_{i}"
|
|
339
|
-
self.answers[key] = AnswerTypes.Float(float(output), label=f"Output for input {self.input_vector[i]:.1f}")
|
|
339
|
+
self.answers[key] = ca.AnswerTypes.Float(float(output), label=f"Output for input {self.input_vector[i]:.1f}")
|
|
340
340
|
|
|
341
|
-
def _get_body(self, **kwargs) -> Tuple[
|
|
341
|
+
def _get_body(self, **kwargs) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
342
342
|
"""Build question body and collect answers."""
|
|
343
|
-
body =
|
|
343
|
+
body = ca.Section()
|
|
344
344
|
answers = []
|
|
345
345
|
|
|
346
346
|
# Question description
|
|
347
|
-
body.add_element(
|
|
347
|
+
body.add_element(ca.Paragraph([
|
|
348
348
|
f"Given the input vector below, compute the output after applying the {self._get_activation_name()} activation function."
|
|
349
349
|
]))
|
|
350
350
|
|
|
351
351
|
# Display formula
|
|
352
|
-
body.add_element(
|
|
352
|
+
body.add_element(ca.Paragraph([
|
|
353
353
|
"Activation function: ",
|
|
354
|
-
|
|
354
|
+
ca.Equation(self._get_activation_formula(), inline=True)
|
|
355
355
|
]))
|
|
356
356
|
|
|
357
357
|
# Input vector
|
|
358
358
|
input_str = ", ".join([f"{x:.1f}" for x in self.input_vector])
|
|
359
|
-
body.add_element(
|
|
359
|
+
body.add_element(ca.Paragraph([
|
|
360
360
|
"Input: ",
|
|
361
|
-
|
|
361
|
+
ca.Equation(f"[{input_str}]", inline=True)
|
|
362
362
|
]))
|
|
363
363
|
|
|
364
364
|
# Answer table
|
|
365
365
|
if self.activation == self.ACTIVATION_SOFTMAX:
|
|
366
|
-
body.add_element(
|
|
366
|
+
body.add_element(ca.Paragraph([
|
|
367
367
|
"Compute the output vector:"
|
|
368
368
|
]))
|
|
369
369
|
|
|
@@ -372,10 +372,10 @@ class ActivationFunctionComputationQuestion(Question):
|
|
|
372
372
|
table_data.append(["Output Vector"])
|
|
373
373
|
table_data.append([self.answers["output"]])
|
|
374
374
|
|
|
375
|
-
body.add_element(
|
|
375
|
+
body.add_element(ca.Table(data=table_data))
|
|
376
376
|
|
|
377
377
|
else:
|
|
378
|
-
body.add_element(
|
|
378
|
+
body.add_element(ca.Paragraph([
|
|
379
379
|
"Compute the output for each element:"
|
|
380
380
|
]))
|
|
381
381
|
|
|
@@ -386,91 +386,91 @@ class ActivationFunctionComputationQuestion(Question):
|
|
|
386
386
|
answer = self.answers[f"output_{i}"]
|
|
387
387
|
answers.append(answer)
|
|
388
388
|
table_data.append([
|
|
389
|
-
|
|
389
|
+
ca.Equation(f"{x:.1f}", inline=True),
|
|
390
390
|
answer
|
|
391
391
|
])
|
|
392
392
|
|
|
393
|
-
body.add_element(
|
|
393
|
+
body.add_element(ca.Table(data=table_data))
|
|
394
394
|
|
|
395
395
|
return body, answers
|
|
396
396
|
|
|
397
|
-
def get_body(self, **kwargs) ->
|
|
397
|
+
def get_body(self, **kwargs) -> ca.Section:
|
|
398
398
|
"""Build question body (backward compatible interface)."""
|
|
399
399
|
body, _ = self._get_body(**kwargs)
|
|
400
400
|
return body
|
|
401
401
|
|
|
402
|
-
def _get_explanation(self, **kwargs) -> Tuple[
|
|
402
|
+
def _get_explanation(self, **kwargs) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
403
403
|
"""Build question explanation."""
|
|
404
|
-
explanation =
|
|
404
|
+
explanation = ca.Section()
|
|
405
405
|
|
|
406
|
-
explanation.add_element(
|
|
406
|
+
explanation.add_element(ca.Paragraph([
|
|
407
407
|
f"To compute the {self._get_activation_name()} activation, we apply the formula to each input."
|
|
408
408
|
]))
|
|
409
409
|
|
|
410
410
|
if self.activation == self.ACTIVATION_SOFTMAX:
|
|
411
|
-
explanation.add_element(
|
|
412
|
-
|
|
411
|
+
explanation.add_element(ca.Paragraph([
|
|
412
|
+
ca.Text("Softmax computation:", emphasis=True)
|
|
413
413
|
]))
|
|
414
414
|
|
|
415
415
|
# Show exponentials
|
|
416
416
|
exp_strs = [f"e^{{{x:.1f}}}" for x in self.input_vector]
|
|
417
|
-
explanation.add_element(
|
|
417
|
+
explanation.add_element(ca.Paragraph([
|
|
418
418
|
"First, compute exponentials: ",
|
|
419
|
-
|
|
419
|
+
ca.Equation(", ".join(exp_strs), inline=True)
|
|
420
420
|
]))
|
|
421
421
|
|
|
422
422
|
# Numerical values
|
|
423
423
|
exp_vals = [np.exp(x) for x in self.input_vector]
|
|
424
424
|
exp_vals_str = ", ".join([f"{e:.4f}" for e in exp_vals])
|
|
425
|
-
explanation.add_element(
|
|
426
|
-
|
|
425
|
+
explanation.add_element(ca.Paragraph([
|
|
426
|
+
ca.Equation(f"\\approx [{exp_vals_str}]", inline=True)
|
|
427
427
|
]))
|
|
428
428
|
|
|
429
429
|
# Sum
|
|
430
430
|
sum_exp = sum(exp_vals)
|
|
431
|
-
explanation.add_element(
|
|
431
|
+
explanation.add_element(ca.Paragraph([
|
|
432
432
|
"Sum: ",
|
|
433
|
-
|
|
433
|
+
ca.Equation(f"{sum_exp:.4f}", inline=True)
|
|
434
434
|
]))
|
|
435
435
|
|
|
436
436
|
# Final outputs
|
|
437
|
-
explanation.add_element(
|
|
437
|
+
explanation.add_element(ca.Paragraph([
|
|
438
438
|
"Divide each by the sum:"
|
|
439
439
|
]))
|
|
440
440
|
|
|
441
441
|
for i, (exp_val, output) in enumerate(zip(exp_vals, self.output_vector)):
|
|
442
|
-
explanation.add_element(
|
|
442
|
+
explanation.add_element(ca.Equation(
|
|
443
443
|
f"\\text{{softmax}}({self.input_vector[i]:.1f}) = \\frac{{{exp_val:.4f}}}{{{sum_exp:.4f}}} = {output:.4f}",
|
|
444
444
|
inline=False
|
|
445
445
|
))
|
|
446
446
|
|
|
447
447
|
else:
|
|
448
|
-
explanation.add_element(
|
|
449
|
-
|
|
448
|
+
explanation.add_element(ca.Paragraph([
|
|
449
|
+
ca.Text("Element-wise computation:", emphasis=True)
|
|
450
450
|
]))
|
|
451
451
|
|
|
452
452
|
for i, (x, y) in enumerate(zip(self.input_vector, self.output_vector)):
|
|
453
453
|
if self.activation == self.ACTIVATION_RELU:
|
|
454
|
-
explanation.add_element(
|
|
454
|
+
explanation.add_element(ca.Equation(
|
|
455
455
|
f"\\text{{ReLU}}({x:.1f}) = \\max(0, {x:.1f}) = {y:.4f}",
|
|
456
456
|
inline=False
|
|
457
457
|
))
|
|
458
458
|
|
|
459
459
|
elif self.activation == self.ACTIVATION_SIGMOID:
|
|
460
|
-
explanation.add_element(
|
|
460
|
+
explanation.add_element(ca.Equation(
|
|
461
461
|
f"\\sigma({x:.1f}) = \\frac{{1}}{{1 + e^{{-{x:.1f}}}}} = {y:.4f}",
|
|
462
462
|
inline=False
|
|
463
463
|
))
|
|
464
464
|
|
|
465
465
|
elif self.activation == self.ACTIVATION_TANH:
|
|
466
|
-
explanation.add_element(
|
|
466
|
+
explanation.add_element(ca.Equation(
|
|
467
467
|
f"\\tanh({x:.1f}) = {y:.4f}",
|
|
468
468
|
inline=False
|
|
469
469
|
))
|
|
470
470
|
|
|
471
471
|
return explanation, []
|
|
472
472
|
|
|
473
|
-
def get_explanation(self, **kwargs) ->
|
|
473
|
+
def get_explanation(self, **kwargs) -> ca.Section:
|
|
474
474
|
"""Build question explanation (backward compatible interface)."""
|
|
475
475
|
explanation, _ = self._get_explanation(**kwargs)
|
|
476
476
|
return explanation
|
|
@@ -545,27 +545,27 @@ class RegularizationCalculationQuestion(Question):
|
|
|
545
545
|
"""Create answer fields."""
|
|
546
546
|
self.answers = {}
|
|
547
547
|
|
|
548
|
-
self.answers["prediction"] = AnswerTypes.Float(float(self.prediction), label="Prediction ŷ")
|
|
549
|
-
self.answers["base_loss"] = AnswerTypes.Float(float(self.base_loss), label="Base MSE loss")
|
|
550
|
-
self.answers["l2_penalty"] = AnswerTypes.Float(float(self.l2_penalty), label="L2 penalty")
|
|
551
|
-
self.answers["total_loss"] = AnswerTypes.Float(float(self.total_loss), label="Total loss")
|
|
552
|
-
self.answers["grad_total_w0"] = AnswerTypes.Float(float(self.grad_total_w0), label="Gradient ∂L/∂w₀")
|
|
548
|
+
self.answers["prediction"] = ca.AnswerTypes.Float(float(self.prediction), label="Prediction ŷ")
|
|
549
|
+
self.answers["base_loss"] = ca.AnswerTypes.Float(float(self.base_loss), label="Base MSE loss")
|
|
550
|
+
self.answers["l2_penalty"] = ca.AnswerTypes.Float(float(self.l2_penalty), label="L2 penalty")
|
|
551
|
+
self.answers["total_loss"] = ca.AnswerTypes.Float(float(self.total_loss), label="Total loss")
|
|
552
|
+
self.answers["grad_total_w0"] = ca.AnswerTypes.Float(float(self.grad_total_w0), label="Gradient ∂L/∂w₀")
|
|
553
553
|
|
|
554
|
-
def _get_body(self, **kwargs) -> Tuple[
|
|
554
|
+
def _get_body(self, **kwargs) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
555
555
|
"""Build question body and collect answers."""
|
|
556
|
-
body =
|
|
556
|
+
body = ca.Section()
|
|
557
557
|
answers = []
|
|
558
558
|
|
|
559
559
|
# Question description
|
|
560
|
-
body.add_element(
|
|
560
|
+
body.add_element(ca.Paragraph([
|
|
561
561
|
"Consider a simple model with the following parameters:"
|
|
562
562
|
]))
|
|
563
563
|
|
|
564
564
|
# Display weights
|
|
565
565
|
weight_strs = [f"w_{i} = {w:.1f}" for i, w in enumerate(self.weights)]
|
|
566
|
-
body.add_element(
|
|
566
|
+
body.add_element(ca.Paragraph([
|
|
567
567
|
"Weights: ",
|
|
568
|
-
|
|
568
|
+
ca.Equation(", ".join(weight_strs), inline=True)
|
|
569
569
|
]))
|
|
570
570
|
|
|
571
571
|
# Model equation
|
|
@@ -579,24 +579,24 @@ class RegularizationCalculationQuestion(Question):
|
|
|
579
579
|
terms.append(f"w_{i} x^{i}")
|
|
580
580
|
|
|
581
581
|
model_eq = " + ".join(terms)
|
|
582
|
-
body.add_element(
|
|
582
|
+
body.add_element(ca.Paragraph([
|
|
583
583
|
"Model: ",
|
|
584
|
-
|
|
584
|
+
ca.Equation(f"\\hat{{y}} = {model_eq}", inline=True)
|
|
585
585
|
]))
|
|
586
586
|
|
|
587
587
|
# Data point
|
|
588
|
-
body.add_element(
|
|
588
|
+
body.add_element(ca.Paragraph([
|
|
589
589
|
"Data point: ",
|
|
590
|
-
|
|
590
|
+
ca.Equation(f"x = {self.input_val:.1f}, y = {self.target:.1f}", inline=True)
|
|
591
591
|
]))
|
|
592
592
|
|
|
593
593
|
# Regularization
|
|
594
|
-
body.add_element(
|
|
594
|
+
body.add_element(ca.Paragraph([
|
|
595
595
|
"L2 regularization coefficient: ",
|
|
596
|
-
|
|
596
|
+
ca.Equation(f"\\lambda = {self.lambda_reg}", inline=True)
|
|
597
597
|
]))
|
|
598
598
|
|
|
599
|
-
body.add_element(
|
|
599
|
+
body.add_element(ca.Paragraph([
|
|
600
600
|
"Calculate the following:"
|
|
601
601
|
]))
|
|
602
602
|
|
|
@@ -606,54 +606,54 @@ class RegularizationCalculationQuestion(Question):
|
|
|
606
606
|
|
|
607
607
|
answers.append(self.answers["prediction"])
|
|
608
608
|
table_data.append([
|
|
609
|
-
|
|
609
|
+
ca.Paragraph(["Prediction ", ca.Equation(r"\hat{y}", inline=True)]),
|
|
610
610
|
self.answers["prediction"]
|
|
611
611
|
])
|
|
612
612
|
|
|
613
613
|
answers.append(self.answers["base_loss"])
|
|
614
614
|
table_data.append([
|
|
615
|
-
|
|
615
|
+
ca.Paragraph(["Base MSE loss: ", ca.Equation(r"L_{base} = (1/2)(y - \hat{y})^2", inline=True)]),
|
|
616
616
|
self.answers["base_loss"]
|
|
617
617
|
])
|
|
618
618
|
|
|
619
619
|
answers.append(self.answers["l2_penalty"])
|
|
620
620
|
table_data.append([
|
|
621
|
-
|
|
621
|
+
ca.Paragraph(["L2 penalty: ", ca.Equation(r"L_{reg} = (\lambda/2)\sum w_i^2", inline=True)]),
|
|
622
622
|
self.answers["l2_penalty"]
|
|
623
623
|
])
|
|
624
624
|
|
|
625
625
|
answers.append(self.answers["total_loss"])
|
|
626
626
|
table_data.append([
|
|
627
|
-
|
|
627
|
+
ca.Paragraph(["Total loss: ", ca.Equation(r"L_{total} = L_{base} + L_{reg}", inline=True)]),
|
|
628
628
|
self.answers["total_loss"]
|
|
629
629
|
])
|
|
630
630
|
|
|
631
631
|
answers.append(self.answers["grad_total_w0"])
|
|
632
632
|
table_data.append([
|
|
633
|
-
|
|
633
|
+
ca.Paragraph(["Gradient: ", ca.Equation(r"\frac{\partial L_{total}}{\partial w_0}", inline=True)]),
|
|
634
634
|
self.answers["grad_total_w0"]
|
|
635
635
|
])
|
|
636
636
|
|
|
637
|
-
body.add_element(
|
|
637
|
+
body.add_element(ca.Table(data=table_data))
|
|
638
638
|
|
|
639
639
|
return body, answers
|
|
640
640
|
|
|
641
|
-
def get_body(self, **kwargs) ->
|
|
641
|
+
def get_body(self, **kwargs) -> ca.Section:
|
|
642
642
|
"""Build question body (backward compatible interface)."""
|
|
643
643
|
body, _ = self._get_body(**kwargs)
|
|
644
644
|
return body
|
|
645
645
|
|
|
646
|
-
def _get_explanation(self, **kwargs) -> Tuple[
|
|
646
|
+
def _get_explanation(self, **kwargs) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
647
647
|
"""Build question explanation."""
|
|
648
|
-
explanation =
|
|
648
|
+
explanation = ca.Section()
|
|
649
649
|
|
|
650
|
-
explanation.add_element(
|
|
650
|
+
explanation.add_element(ca.Paragraph([
|
|
651
651
|
"L2 regularization adds a penalty term to the loss function to prevent overfitting by keeping weights small."
|
|
652
652
|
]))
|
|
653
653
|
|
|
654
654
|
# Step 1: Forward pass
|
|
655
|
-
explanation.add_element(
|
|
656
|
-
|
|
655
|
+
explanation.add_element(ca.Paragraph([
|
|
656
|
+
ca.Text("Step 1: Compute prediction", emphasis=True)
|
|
657
657
|
]))
|
|
658
658
|
|
|
659
659
|
terms = []
|
|
@@ -664,78 +664,78 @@ class RegularizationCalculationQuestion(Question):
|
|
|
664
664
|
x_term = f"{self.input_val:.1f}^{i}" if i > 1 else f"{self.input_val:.1f}"
|
|
665
665
|
terms.append(f"{w:.1f} \\times {x_term}")
|
|
666
666
|
|
|
667
|
-
explanation.add_element(
|
|
667
|
+
explanation.add_element(ca.Equation(
|
|
668
668
|
f"\\hat{{y}} = {' + '.join(terms)} = {self.prediction:.4f}",
|
|
669
669
|
inline=False
|
|
670
670
|
))
|
|
671
671
|
|
|
672
672
|
# Step 2: Base loss
|
|
673
|
-
explanation.add_element(
|
|
674
|
-
|
|
673
|
+
explanation.add_element(ca.Paragraph([
|
|
674
|
+
ca.Text("Step 2: Compute base MSE loss", emphasis=True)
|
|
675
675
|
]))
|
|
676
676
|
|
|
677
|
-
explanation.add_element(
|
|
677
|
+
explanation.add_element(ca.Equation(
|
|
678
678
|
f"L_{{base}} = \\frac{{1}}{{2}}(y - \\hat{{y}})^2 = \\frac{{1}}{{2}}({self.target:.1f} - {self.prediction:.4f})^2 = {self.base_loss:.4f}",
|
|
679
679
|
inline=False
|
|
680
680
|
))
|
|
681
681
|
|
|
682
682
|
# Step 3: L2 penalty
|
|
683
|
-
explanation.add_element(
|
|
684
|
-
|
|
683
|
+
explanation.add_element(ca.Paragraph([
|
|
684
|
+
ca.Text("Step 3: Compute L2 penalty", emphasis=True)
|
|
685
685
|
]))
|
|
686
686
|
|
|
687
687
|
weight_squares = [f"{w:.1f}^2" for w in self.weights]
|
|
688
688
|
sum_squares = sum(w**2 for w in self.weights)
|
|
689
689
|
|
|
690
|
-
explanation.add_element(
|
|
690
|
+
explanation.add_element(ca.Equation(
|
|
691
691
|
f"L_{{reg}} = \\frac{{\\lambda}}{{2}} \\sum w_i^2 = \\frac{{{self.lambda_reg}}}{{2}}({' + '.join(weight_squares)}) = \\frac{{{self.lambda_reg}}}{{2}} \\times {sum_squares:.4f} = {self.l2_penalty:.4f}",
|
|
692
692
|
inline=False
|
|
693
693
|
))
|
|
694
694
|
|
|
695
695
|
# Step 4: Total loss
|
|
696
|
-
explanation.add_element(
|
|
697
|
-
|
|
696
|
+
explanation.add_element(ca.Paragraph([
|
|
697
|
+
ca.Text("Step 4: Compute total loss", emphasis=True)
|
|
698
698
|
]))
|
|
699
699
|
|
|
700
|
-
explanation.add_element(
|
|
700
|
+
explanation.add_element(ca.Equation(
|
|
701
701
|
f"L_{{total}} = L_{{base}} + L_{{reg}} = {self.base_loss:.4f} + {self.l2_penalty:.4f} = {self.total_loss:.4f}",
|
|
702
702
|
inline=False
|
|
703
703
|
))
|
|
704
704
|
|
|
705
705
|
# Step 5: Gradient with regularization
|
|
706
|
-
explanation.add_element(
|
|
707
|
-
|
|
706
|
+
explanation.add_element(ca.Paragraph([
|
|
707
|
+
ca.Text("Step 5: Compute gradient with regularization", emphasis=True)
|
|
708
708
|
]))
|
|
709
709
|
|
|
710
|
-
explanation.add_element(
|
|
711
|
-
|
|
710
|
+
explanation.add_element(ca.Paragraph([
|
|
711
|
+
ca.Equation(r"w_0", inline=True),
|
|
712
712
|
" (the bias term):"
|
|
713
713
|
]))
|
|
714
714
|
|
|
715
|
-
explanation.add_element(
|
|
715
|
+
explanation.add_element(ca.Equation(
|
|
716
716
|
f"\\frac{{\\partial L_{{base}}}}{{\\partial w_0}} = -(y - \\hat{{y}}) \\times 1 = -({self.target:.1f} - {self.prediction:.4f}) = {self.grad_base_w0:.4f}",
|
|
717
717
|
inline=False
|
|
718
718
|
))
|
|
719
719
|
|
|
720
|
-
explanation.add_element(
|
|
720
|
+
explanation.add_element(ca.Equation(
|
|
721
721
|
f"\\frac{{\\partial L_{{reg}}}}{{\\partial w_0}} = \\lambda w_0 = {self.lambda_reg} \\times {self.weights[0]:.1f} = {self.grad_reg_w0:.4f}",
|
|
722
722
|
inline=False
|
|
723
723
|
))
|
|
724
724
|
|
|
725
|
-
explanation.add_element(
|
|
725
|
+
explanation.add_element(ca.Equation(
|
|
726
726
|
f"\\frac{{\\partial L_{{total}}}}{{\\partial w_0}} = {self.grad_base_w0:.4f} + {self.grad_reg_w0:.4f} = {self.grad_total_w0:.4f}",
|
|
727
727
|
inline=False
|
|
728
728
|
))
|
|
729
729
|
|
|
730
|
-
explanation.add_element(
|
|
730
|
+
explanation.add_element(ca.Paragraph([
|
|
731
731
|
"The regularization term adds ",
|
|
732
|
-
|
|
732
|
+
ca.Equation(f"\\lambda w_0 = {self.grad_reg_w0:.4f}", inline=True),
|
|
733
733
|
" to the gradient, pushing the weight toward zero."
|
|
734
734
|
]))
|
|
735
735
|
|
|
736
736
|
return explanation, []
|
|
737
737
|
|
|
738
|
-
def get_explanation(self, **kwargs) ->
|
|
738
|
+
def get_explanation(self, **kwargs) -> ca.Section:
|
|
739
739
|
"""Build question explanation (backward compatible interface)."""
|
|
740
740
|
explanation, _ = self._get_explanation(**kwargs)
|
|
741
741
|
return explanation
|
|
@@ -817,71 +817,71 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
817
817
|
self.answers = {}
|
|
818
818
|
|
|
819
819
|
# New velocity
|
|
820
|
-
self.answers["velocity"] = AnswerTypes.Vector(self.new_velocity, label="New velocity")
|
|
820
|
+
self.answers["velocity"] = ca.AnswerTypes.Vector(self.new_velocity, label="New velocity")
|
|
821
821
|
|
|
822
822
|
# New weights with momentum
|
|
823
|
-
self.answers["weights_momentum"] = AnswerTypes.Vector(self.new_weights, label="Weights (momentum)")
|
|
823
|
+
self.answers["weights_momentum"] = ca.AnswerTypes.Vector(self.new_weights, label="Weights (momentum)")
|
|
824
824
|
|
|
825
825
|
# Vanilla SGD weights for comparison
|
|
826
826
|
if self.show_vanilla_sgd:
|
|
827
|
-
self.answers["weights_sgd"] = AnswerTypes.Vector(self.sgd_weights, label="Weights (vanilla SGD)")
|
|
827
|
+
self.answers["weights_sgd"] = ca.AnswerTypes.Vector(self.sgd_weights, label="Weights (vanilla SGD)")
|
|
828
828
|
|
|
829
|
-
def _get_body(self, **kwargs) -> Tuple[
|
|
829
|
+
def _get_body(self, **kwargs) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
830
830
|
"""Build question body and collect answers."""
|
|
831
|
-
body =
|
|
831
|
+
body = ca.Section()
|
|
832
832
|
answers = []
|
|
833
833
|
|
|
834
834
|
# Question description
|
|
835
|
-
body.add_element(
|
|
835
|
+
body.add_element(ca.Paragraph([
|
|
836
836
|
"Consider the optimization problem of minimizing the function:"
|
|
837
837
|
]))
|
|
838
838
|
|
|
839
|
-
body.add_element(
|
|
839
|
+
body.add_element(ca.Equation(
|
|
840
840
|
sp.latex(self.function),
|
|
841
841
|
inline=False
|
|
842
842
|
))
|
|
843
843
|
|
|
844
|
-
body.add_element(
|
|
844
|
+
body.add_element(ca.Paragraph([
|
|
845
845
|
"The gradient is:"
|
|
846
846
|
]))
|
|
847
847
|
|
|
848
|
-
body.add_element(
|
|
848
|
+
body.add_element(ca.Equation(
|
|
849
849
|
f"\\nabla f = {sp.latex(self.gradient_function)}",
|
|
850
850
|
inline=False
|
|
851
851
|
))
|
|
852
852
|
|
|
853
853
|
# Current state
|
|
854
|
-
body.add_element(
|
|
855
|
-
|
|
854
|
+
body.add_element(ca.Paragraph([
|
|
855
|
+
ca.Text("Current optimization state:", emphasis=True)
|
|
856
856
|
]))
|
|
857
857
|
|
|
858
|
-
body.add_element(
|
|
858
|
+
body.add_element(ca.Paragraph([
|
|
859
859
|
"Current weights: ",
|
|
860
|
-
|
|
860
|
+
ca.Equation(f"{format_vector(self.current_weights)}", inline=True)
|
|
861
861
|
]))
|
|
862
862
|
|
|
863
|
-
body.add_element(
|
|
863
|
+
body.add_element(ca.Paragraph([
|
|
864
864
|
"Previous velocity: ",
|
|
865
|
-
|
|
865
|
+
ca.Equation(f"{format_vector(self.prev_velocity)}", inline=True)
|
|
866
866
|
]))
|
|
867
867
|
|
|
868
868
|
# Hyperparameters
|
|
869
|
-
body.add_element(
|
|
870
|
-
|
|
869
|
+
body.add_element(ca.Paragraph([
|
|
870
|
+
ca.Text("Hyperparameters:", emphasis=True)
|
|
871
871
|
]))
|
|
872
872
|
|
|
873
|
-
body.add_element(
|
|
873
|
+
body.add_element(ca.Paragraph([
|
|
874
874
|
"Learning rate: ",
|
|
875
|
-
|
|
875
|
+
ca.Equation(f"\\alpha = {self.learning_rate}", inline=True)
|
|
876
876
|
]))
|
|
877
877
|
|
|
878
|
-
body.add_element(
|
|
878
|
+
body.add_element(ca.Paragraph([
|
|
879
879
|
"Momentum coefficient: ",
|
|
880
|
-
|
|
880
|
+
ca.Equation(f"\\beta = {self.momentum_beta}", inline=True)
|
|
881
881
|
]))
|
|
882
882
|
|
|
883
883
|
# Questions
|
|
884
|
-
body.add_element(
|
|
884
|
+
body.add_element(ca.Paragraph([
|
|
885
885
|
"Calculate the following updates:"
|
|
886
886
|
]))
|
|
887
887
|
|
|
@@ -892,14 +892,14 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
892
892
|
answers.append(self.answers["velocity"])
|
|
893
893
|
table_data.append([
|
|
894
894
|
"New velocity",
|
|
895
|
-
|
|
895
|
+
ca.Equation(r"v' = \beta v + (1-\beta)\nabla f", inline=True),
|
|
896
896
|
self.answers["velocity"]
|
|
897
897
|
])
|
|
898
898
|
|
|
899
899
|
answers.append(self.answers["weights_momentum"])
|
|
900
900
|
table_data.append([
|
|
901
901
|
"Weights (momentum)",
|
|
902
|
-
|
|
902
|
+
ca.Equation(r"w' = w - \alpha v'", inline=True),
|
|
903
903
|
self.answers["weights_momentum"]
|
|
904
904
|
])
|
|
905
905
|
|
|
@@ -907,44 +907,44 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
907
907
|
answers.append(self.answers["weights_sgd"])
|
|
908
908
|
table_data.append([
|
|
909
909
|
"Weights (vanilla SGD)",
|
|
910
|
-
|
|
910
|
+
ca.Equation(r"w' = w - \alpha \nabla f", inline=True),
|
|
911
911
|
self.answers["weights_sgd"]
|
|
912
912
|
])
|
|
913
913
|
|
|
914
|
-
body.add_element(
|
|
914
|
+
body.add_element(ca.Table(data=table_data))
|
|
915
915
|
|
|
916
916
|
return body, answers
|
|
917
917
|
|
|
918
|
-
def get_body(self, **kwargs) ->
|
|
918
|
+
def get_body(self, **kwargs) -> ca.Section:
|
|
919
919
|
"""Build question body (backward compatible interface)."""
|
|
920
920
|
body, _ = self._get_body(**kwargs)
|
|
921
921
|
return body
|
|
922
922
|
|
|
923
|
-
def _get_explanation(self, **kwargs) -> Tuple[
|
|
923
|
+
def _get_explanation(self, **kwargs) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
924
924
|
"""Build question explanation."""
|
|
925
|
-
explanation =
|
|
925
|
+
explanation = ca.Section()
|
|
926
926
|
|
|
927
|
-
explanation.add_element(
|
|
927
|
+
explanation.add_element(ca.Paragraph([
|
|
928
928
|
"Momentum helps gradient descent by accumulating a velocity vector in directions of "
|
|
929
929
|
"consistent gradient, allowing faster convergence and reduced oscillation."
|
|
930
930
|
]))
|
|
931
931
|
|
|
932
932
|
# Step 1: Calculate new velocity
|
|
933
|
-
explanation.add_element(
|
|
934
|
-
|
|
933
|
+
explanation.add_element(ca.Paragraph([
|
|
934
|
+
ca.Text("Step 1: Update velocity using momentum", emphasis=True)
|
|
935
935
|
]))
|
|
936
936
|
|
|
937
|
-
explanation.add_element(
|
|
937
|
+
explanation.add_element(ca.Paragraph([
|
|
938
938
|
"The momentum update formula is:"
|
|
939
939
|
]))
|
|
940
940
|
|
|
941
|
-
explanation.add_element(
|
|
941
|
+
explanation.add_element(ca.Equation(
|
|
942
942
|
f"v' = \\beta v + (1 - \\beta) \\nabla f",
|
|
943
943
|
inline=False
|
|
944
944
|
))
|
|
945
945
|
|
|
946
946
|
# Show calculation for each component
|
|
947
|
-
digits =
|
|
947
|
+
digits = ca.Answer.DEFAULT_ROUNDING_DIGITS
|
|
948
948
|
for i in range(self.num_variables):
|
|
949
949
|
var_name = f"x_{i}"
|
|
950
950
|
# Round all intermediate values to avoid floating point precision issues
|
|
@@ -952,7 +952,7 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
952
952
|
one_minus_beta = round(1 - self.momentum_beta, digits)
|
|
953
953
|
one_minus_beta_times_grad = round((1 - self.momentum_beta) * self.gradients[i], digits)
|
|
954
954
|
|
|
955
|
-
explanation.add_element(
|
|
955
|
+
explanation.add_element(ca.Equation(
|
|
956
956
|
f"v'[{i}] = {self.momentum_beta} \\times {self.prev_velocity[i]:.{digits}f} + "
|
|
957
957
|
f"{one_minus_beta:.{digits}f} \\times {self.gradients[i]:.{digits}f} = "
|
|
958
958
|
f"{beta_times_v:.{digits}f} + {one_minus_beta_times_grad:.{digits}f} = {self.new_velocity[i]:.{digits}f}",
|
|
@@ -960,50 +960,50 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
960
960
|
))
|
|
961
961
|
|
|
962
962
|
# Step 2: Update weights with momentum
|
|
963
|
-
explanation.add_element(
|
|
964
|
-
|
|
963
|
+
explanation.add_element(ca.Paragraph([
|
|
964
|
+
ca.Text("Step 2: Update weights using new velocity", emphasis=True)
|
|
965
965
|
]))
|
|
966
966
|
|
|
967
|
-
explanation.add_element(
|
|
967
|
+
explanation.add_element(ca.Equation(
|
|
968
968
|
f"w' = w - \\alpha v'",
|
|
969
969
|
inline=False
|
|
970
970
|
))
|
|
971
971
|
|
|
972
972
|
for i in range(self.num_variables):
|
|
973
|
-
explanation.add_element(
|
|
973
|
+
explanation.add_element(ca.Equation(
|
|
974
974
|
f"w[{i}] = {self.current_weights[i]} - {self.learning_rate} \\times {self.new_velocity[i]:.4f} = {self.new_weights[i]:.4f}",
|
|
975
975
|
inline=False
|
|
976
976
|
))
|
|
977
977
|
|
|
978
978
|
# Comparison with vanilla SGD
|
|
979
979
|
if self.show_vanilla_sgd:
|
|
980
|
-
explanation.add_element(
|
|
981
|
-
|
|
980
|
+
explanation.add_element(ca.Paragraph([
|
|
981
|
+
ca.Text("Comparison with vanilla SGD:", emphasis=True)
|
|
982
982
|
]))
|
|
983
983
|
|
|
984
|
-
explanation.add_element(
|
|
984
|
+
explanation.add_element(ca.Paragraph([
|
|
985
985
|
"Vanilla SGD (no momentum) would update directly using the gradient:"
|
|
986
986
|
]))
|
|
987
987
|
|
|
988
|
-
explanation.add_element(
|
|
988
|
+
explanation.add_element(ca.Equation(
|
|
989
989
|
f"w' = w - \\alpha \\nabla f",
|
|
990
990
|
inline=False
|
|
991
991
|
))
|
|
992
992
|
|
|
993
993
|
for i in range(self.num_variables):
|
|
994
|
-
explanation.add_element(
|
|
994
|
+
explanation.add_element(ca.Equation(
|
|
995
995
|
f"w[{i}] = {self.current_weights[i]} - {self.learning_rate} \\times {self.gradients[i]:.4f} = {self.sgd_weights[i]:.4f}",
|
|
996
996
|
inline=False
|
|
997
997
|
))
|
|
998
998
|
|
|
999
|
-
explanation.add_element(
|
|
999
|
+
explanation.add_element(ca.Paragraph([
|
|
1000
1000
|
"The momentum update differs because it incorporates the previous velocity, "
|
|
1001
1001
|
"which can help accelerate learning and smooth out noisy gradients."
|
|
1002
1002
|
]))
|
|
1003
1003
|
|
|
1004
1004
|
return explanation, []
|
|
1005
1005
|
|
|
1006
|
-
def get_explanation(self, **kwargs) ->
|
|
1006
|
+
def get_explanation(self, **kwargs) -> ca.Section:
|
|
1007
1007
|
"""Build question explanation (backward compatible interface)."""
|
|
1008
1008
|
explanation, _ = self._get_explanation(**kwargs)
|
|
1009
1009
|
return explanation
|