QuizGenerator 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuizGenerator/README.md +5 -0
- QuizGenerator/__init__.py +27 -0
- QuizGenerator/__main__.py +7 -0
- QuizGenerator/canvas/__init__.py +13 -0
- QuizGenerator/canvas/canvas_interface.py +627 -0
- QuizGenerator/canvas/classes.py +235 -0
- QuizGenerator/constants.py +149 -0
- QuizGenerator/contentast.py +1955 -0
- QuizGenerator/generate.py +253 -0
- QuizGenerator/logging.yaml +55 -0
- QuizGenerator/misc.py +579 -0
- QuizGenerator/mixins.py +548 -0
- QuizGenerator/performance.py +202 -0
- QuizGenerator/premade_questions/__init__.py +0 -0
- QuizGenerator/premade_questions/basic.py +103 -0
- QuizGenerator/premade_questions/cst334/__init__.py +1 -0
- QuizGenerator/premade_questions/cst334/languages.py +391 -0
- QuizGenerator/premade_questions/cst334/math_questions.py +297 -0
- QuizGenerator/premade_questions/cst334/memory_questions.py +1400 -0
- QuizGenerator/premade_questions/cst334/ostep13_vsfs.py +572 -0
- QuizGenerator/premade_questions/cst334/persistence_questions.py +451 -0
- QuizGenerator/premade_questions/cst334/process.py +648 -0
- QuizGenerator/premade_questions/cst463/__init__.py +0 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/__init__.py +3 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_calculation.py +369 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_descent_questions.py +305 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/loss_calculations.py +650 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/misc.py +73 -0
- QuizGenerator/premade_questions/cst463/math_and_data/__init__.py +2 -0
- QuizGenerator/premade_questions/cst463/math_and_data/matrix_questions.py +631 -0
- QuizGenerator/premade_questions/cst463/math_and_data/vector_questions.py +534 -0
- QuizGenerator/premade_questions/cst463/models/__init__.py +0 -0
- QuizGenerator/premade_questions/cst463/models/attention.py +192 -0
- QuizGenerator/premade_questions/cst463/models/cnns.py +186 -0
- QuizGenerator/premade_questions/cst463/models/matrices.py +24 -0
- QuizGenerator/premade_questions/cst463/models/rnns.py +202 -0
- QuizGenerator/premade_questions/cst463/models/text.py +203 -0
- QuizGenerator/premade_questions/cst463/models/weight_counting.py +227 -0
- QuizGenerator/premade_questions/cst463/neural-network-basics/__init__.py +6 -0
- QuizGenerator/premade_questions/cst463/neural-network-basics/neural_network_questions.py +1314 -0
- QuizGenerator/premade_questions/cst463/tensorflow-intro/__init__.py +6 -0
- QuizGenerator/premade_questions/cst463/tensorflow-intro/tensorflow_questions.py +936 -0
- QuizGenerator/qrcode_generator.py +293 -0
- QuizGenerator/question.py +715 -0
- QuizGenerator/quiz.py +467 -0
- QuizGenerator/regenerate.py +472 -0
- QuizGenerator/typst_utils.py +113 -0
- quizgenerator-0.4.2.dist-info/METADATA +265 -0
- quizgenerator-0.4.2.dist-info/RECORD +52 -0
- quizgenerator-0.4.2.dist-info/WHEEL +4 -0
- quizgenerator-0.4.2.dist-info/entry_points.txt +3 -0
- quizgenerator-0.4.2.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,936 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import io
|
|
5
|
+
import logging
|
|
6
|
+
import re
|
|
7
|
+
import numpy as np
|
|
8
|
+
import sympy as sp
|
|
9
|
+
from typing import List, Tuple, Dict, Any
|
|
10
|
+
|
|
11
|
+
from QuizGenerator.contentast import ContentAST
|
|
12
|
+
from QuizGenerator.question import Question, Answer, QuestionRegistry
|
|
13
|
+
from QuizGenerator.mixins import TableQuestionMixin, BodyTemplatesMixin
|
|
14
|
+
|
|
15
|
+
# Import gradient descent utilities
|
|
16
|
+
import sys
|
|
17
|
+
import os
|
|
18
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'gradient_descent'))
|
|
19
|
+
from misc import generate_function, format_vector
|
|
20
|
+
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@QuestionRegistry.register()
|
|
25
|
+
class ParameterCountingQuestion(Question):
|
|
26
|
+
"""
|
|
27
|
+
Question asking students to count parameters in a neural network.
|
|
28
|
+
|
|
29
|
+
Given a dense network architecture, students calculate:
|
|
30
|
+
- Total number of weights
|
|
31
|
+
- Total number of biases
|
|
32
|
+
- Total trainable parameters
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, *args, **kwargs):
|
|
36
|
+
kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
|
|
37
|
+
super().__init__(*args, **kwargs)
|
|
38
|
+
|
|
39
|
+
self.num_layers = kwargs.get("num_layers", None)
|
|
40
|
+
self.include_biases = kwargs.get("include_biases", True)
|
|
41
|
+
|
|
42
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
43
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
44
|
+
|
|
45
|
+
# Generate random architecture
|
|
46
|
+
if self.num_layers is None:
|
|
47
|
+
self.num_layers = self.rng.choice([3, 4])
|
|
48
|
+
|
|
49
|
+
# Generate layer sizes
|
|
50
|
+
# Input layer: common sizes for typical problems
|
|
51
|
+
input_sizes = [28*28, 32*32, 784, 1024, 64, 128]
|
|
52
|
+
self.layer_sizes = [self.rng.choice(input_sizes)]
|
|
53
|
+
|
|
54
|
+
# Hidden layers: reasonable sizes
|
|
55
|
+
for i in range(self.num_layers - 2):
|
|
56
|
+
hidden_size = self.rng.choice([32, 64, 128, 256, 512])
|
|
57
|
+
self.layer_sizes.append(hidden_size)
|
|
58
|
+
|
|
59
|
+
# Output layer: typical classification sizes
|
|
60
|
+
output_size = self.rng.choice([2, 10, 100, 1000])
|
|
61
|
+
self.layer_sizes.append(output_size)
|
|
62
|
+
|
|
63
|
+
# Calculate correct answers
|
|
64
|
+
self.total_weights = 0
|
|
65
|
+
self.total_biases = 0
|
|
66
|
+
self.weights_per_layer = []
|
|
67
|
+
self.biases_per_layer = []
|
|
68
|
+
|
|
69
|
+
for i in range(len(self.layer_sizes) - 1):
|
|
70
|
+
weights = self.layer_sizes[i] * self.layer_sizes[i+1]
|
|
71
|
+
biases = self.layer_sizes[i+1] if self.include_biases else 0
|
|
72
|
+
|
|
73
|
+
self.weights_per_layer.append(weights)
|
|
74
|
+
self.biases_per_layer.append(biases)
|
|
75
|
+
|
|
76
|
+
self.total_weights += weights
|
|
77
|
+
self.total_biases += biases
|
|
78
|
+
|
|
79
|
+
self.total_params = self.total_weights + self.total_biases
|
|
80
|
+
|
|
81
|
+
# Create answers
|
|
82
|
+
self._create_answers()
|
|
83
|
+
|
|
84
|
+
def _create_answers(self):
|
|
85
|
+
"""Create answer fields."""
|
|
86
|
+
self.answers = {}
|
|
87
|
+
|
|
88
|
+
self.answers["total_weights"] = Answer.integer("total_weights", self.total_weights)
|
|
89
|
+
|
|
90
|
+
if self.include_biases:
|
|
91
|
+
self.answers["total_biases"] = Answer.integer("total_biases", self.total_biases)
|
|
92
|
+
self.answers["total_params"] = Answer.integer("total_params", self.total_params)
|
|
93
|
+
else:
|
|
94
|
+
self.answers["total_params"] = Answer.integer("total_params", self.total_params)
|
|
95
|
+
|
|
96
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
97
|
+
body = ContentAST.Section()
|
|
98
|
+
|
|
99
|
+
# Question description
|
|
100
|
+
body.add_element(ContentAST.Paragraph([
|
|
101
|
+
"Consider a fully-connected (dense) neural network with the following architecture:"
|
|
102
|
+
]))
|
|
103
|
+
|
|
104
|
+
# Display architecture
|
|
105
|
+
arch_parts = []
|
|
106
|
+
for i, size in enumerate(self.layer_sizes):
|
|
107
|
+
if i > 0:
|
|
108
|
+
arch_parts.append(" → ")
|
|
109
|
+
arch_parts.append(str(size))
|
|
110
|
+
|
|
111
|
+
body.add_element(ContentAST.Paragraph([
|
|
112
|
+
"Architecture: " + "".join(arch_parts)
|
|
113
|
+
]))
|
|
114
|
+
|
|
115
|
+
if self.include_biases:
|
|
116
|
+
body.add_element(ContentAST.Paragraph([
|
|
117
|
+
"Each layer includes bias terms."
|
|
118
|
+
]))
|
|
119
|
+
|
|
120
|
+
# Questions
|
|
121
|
+
# Answer table
|
|
122
|
+
table_data = []
|
|
123
|
+
table_data.append(["Parameter Type", "Count"])
|
|
124
|
+
|
|
125
|
+
table_data.append([
|
|
126
|
+
"Total weights (connections between layers)",
|
|
127
|
+
ContentAST.Answer(self.answers["total_weights"])
|
|
128
|
+
])
|
|
129
|
+
|
|
130
|
+
if self.include_biases:
|
|
131
|
+
table_data.append([
|
|
132
|
+
"Total biases",
|
|
133
|
+
ContentAST.Answer(self.answers["total_biases"])
|
|
134
|
+
])
|
|
135
|
+
|
|
136
|
+
table_data.append([
|
|
137
|
+
"Total trainable parameters",
|
|
138
|
+
ContentAST.Answer(self.answers["total_params"])
|
|
139
|
+
])
|
|
140
|
+
|
|
141
|
+
body.add_element(ContentAST.Table(data=table_data))
|
|
142
|
+
|
|
143
|
+
return body
|
|
144
|
+
|
|
145
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
146
|
+
explanation = ContentAST.Section()
|
|
147
|
+
|
|
148
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
149
|
+
"To count parameters in a dense neural network, we calculate weights and biases for each layer."
|
|
150
|
+
]))
|
|
151
|
+
|
|
152
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
153
|
+
"**Weights calculation:**"
|
|
154
|
+
]))
|
|
155
|
+
|
|
156
|
+
for i in range(len(self.layer_sizes) - 1):
|
|
157
|
+
input_size = self.layer_sizes[i]
|
|
158
|
+
output_size = self.layer_sizes[i+1]
|
|
159
|
+
weights = self.weights_per_layer[i]
|
|
160
|
+
|
|
161
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
162
|
+
f"Layer {i+1} → {i+2}: ",
|
|
163
|
+
ContentAST.Equation(f"{input_size} \\times {output_size} = {weights:,}", inline=True),
|
|
164
|
+
" weights"
|
|
165
|
+
]))
|
|
166
|
+
|
|
167
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
168
|
+
"Total weights: ",
|
|
169
|
+
ContentAST.Equation(
|
|
170
|
+
f"{' + '.join([f'{w:,}' for w in self.weights_per_layer])} = {self.total_weights:,}",
|
|
171
|
+
inline=True
|
|
172
|
+
)
|
|
173
|
+
]))
|
|
174
|
+
|
|
175
|
+
if self.include_biases:
|
|
176
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
177
|
+
"**Biases calculation:**"
|
|
178
|
+
]))
|
|
179
|
+
|
|
180
|
+
for i in range(len(self.layer_sizes) - 1):
|
|
181
|
+
output_size = self.layer_sizes[i+1]
|
|
182
|
+
biases = self.biases_per_layer[i]
|
|
183
|
+
|
|
184
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
185
|
+
f"Layer {i+2}: {biases:,} biases (one per neuron)"
|
|
186
|
+
]))
|
|
187
|
+
|
|
188
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
189
|
+
"Total biases: ",
|
|
190
|
+
ContentAST.Equation(
|
|
191
|
+
f"{' + '.join([f'{b:,}' for b in self.biases_per_layer])} = {self.total_biases:,}",
|
|
192
|
+
inline=True
|
|
193
|
+
)
|
|
194
|
+
]))
|
|
195
|
+
|
|
196
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
197
|
+
"**Total trainable parameters:**"
|
|
198
|
+
]))
|
|
199
|
+
|
|
200
|
+
if self.include_biases:
|
|
201
|
+
explanation.add_element(ContentAST.Equation(
|
|
202
|
+
f"\\text{{Total}} = {self.total_weights:,} + {self.total_biases:,} = {self.total_params:,}",
|
|
203
|
+
inline=False
|
|
204
|
+
))
|
|
205
|
+
else:
|
|
206
|
+
explanation.add_element(ContentAST.Equation(
|
|
207
|
+
f"\\text{{Total}} = {self.total_weights:,}",
|
|
208
|
+
inline=False
|
|
209
|
+
))
|
|
210
|
+
|
|
211
|
+
return explanation
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
@QuestionRegistry.register()
|
|
215
|
+
class ActivationFunctionComputationQuestion(Question):
|
|
216
|
+
"""
|
|
217
|
+
Question asking students to compute activation function outputs.
|
|
218
|
+
|
|
219
|
+
Given a vector of inputs and an activation function, students calculate
|
|
220
|
+
the output for each element (or entire vector for softmax).
|
|
221
|
+
"""
|
|
222
|
+
|
|
223
|
+
ACTIVATION_RELU = "relu"
|
|
224
|
+
ACTIVATION_SIGMOID = "sigmoid"
|
|
225
|
+
ACTIVATION_TANH = "tanh"
|
|
226
|
+
ACTIVATION_SOFTMAX = "softmax"
|
|
227
|
+
|
|
228
|
+
def __init__(self, *args, **kwargs):
|
|
229
|
+
kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
|
|
230
|
+
super().__init__(*args, **kwargs)
|
|
231
|
+
|
|
232
|
+
self.vector_size = kwargs.get("vector_size", None)
|
|
233
|
+
self.activation = kwargs.get("activation", None)
|
|
234
|
+
|
|
235
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
236
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
237
|
+
|
|
238
|
+
# Generate random input vector
|
|
239
|
+
if self.vector_size is None:
|
|
240
|
+
self.vector_size = self.rng.choice([3, 4, 5])
|
|
241
|
+
|
|
242
|
+
self.input_vector = [
|
|
243
|
+
round(self.rng.uniform(-3, 3), 1)
|
|
244
|
+
for _ in range(self.vector_size)
|
|
245
|
+
]
|
|
246
|
+
|
|
247
|
+
# Select activation function
|
|
248
|
+
if self.activation is None:
|
|
249
|
+
activations = [
|
|
250
|
+
self.ACTIVATION_RELU,
|
|
251
|
+
self.ACTIVATION_SIGMOID,
|
|
252
|
+
self.ACTIVATION_TANH,
|
|
253
|
+
self.ACTIVATION_SOFTMAX,
|
|
254
|
+
]
|
|
255
|
+
self.activation = self.rng.choice(activations)
|
|
256
|
+
|
|
257
|
+
# For leaky ReLU, set alpha
|
|
258
|
+
self.leaky_alpha = 0.01
|
|
259
|
+
|
|
260
|
+
# Compute outputs
|
|
261
|
+
self.output_vector = self._compute_activation(self.input_vector)
|
|
262
|
+
|
|
263
|
+
# Create answers
|
|
264
|
+
self._create_answers()
|
|
265
|
+
|
|
266
|
+
def _compute_activation(self, inputs):
|
|
267
|
+
"""Compute activation function output."""
|
|
268
|
+
if self.activation == self.ACTIVATION_RELU:
|
|
269
|
+
return [max(0, x) for x in inputs]
|
|
270
|
+
|
|
271
|
+
elif self.activation == self.ACTIVATION_SIGMOID:
|
|
272
|
+
return [1 / (1 + np.exp(-x)) for x in inputs]
|
|
273
|
+
|
|
274
|
+
elif self.activation == self.ACTIVATION_TANH:
|
|
275
|
+
return [np.tanh(x) for x in inputs]
|
|
276
|
+
|
|
277
|
+
elif self.activation == self.ACTIVATION_SOFTMAX:
|
|
278
|
+
# Subtract max for numerical stability
|
|
279
|
+
exp_vals = [np.exp(x - max(inputs)) for x in inputs]
|
|
280
|
+
sum_exp = sum(exp_vals)
|
|
281
|
+
return [e / sum_exp for e in exp_vals]
|
|
282
|
+
|
|
283
|
+
else:
|
|
284
|
+
raise ValueError(f"Unknown activation: {self.activation}")
|
|
285
|
+
|
|
286
|
+
def _get_activation_name(self):
|
|
287
|
+
"""Get human-readable activation name."""
|
|
288
|
+
names = {
|
|
289
|
+
self.ACTIVATION_RELU: "ReLU",
|
|
290
|
+
self.ACTIVATION_SIGMOID: "Sigmoid",
|
|
291
|
+
self.ACTIVATION_TANH: "Tanh",
|
|
292
|
+
self.ACTIVATION_SOFTMAX: "Softmax",
|
|
293
|
+
}
|
|
294
|
+
return names.get(self.activation, "Unknown")
|
|
295
|
+
|
|
296
|
+
def _get_activation_formula(self):
|
|
297
|
+
"""Get LaTeX formula for activation function."""
|
|
298
|
+
if self.activation == self.ACTIVATION_RELU:
|
|
299
|
+
return r"\text{ReLU}(x) = \max(0, x)"
|
|
300
|
+
|
|
301
|
+
elif self.activation == self.ACTIVATION_SIGMOID:
|
|
302
|
+
return r"\sigma(x) = \frac{1}{1 + e^{-x}}"
|
|
303
|
+
|
|
304
|
+
elif self.activation == self.ACTIVATION_TANH:
|
|
305
|
+
return r"\tanh(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}"
|
|
306
|
+
|
|
307
|
+
elif self.activation == self.ACTIVATION_SOFTMAX:
|
|
308
|
+
return r"\text{softmax}(x_i) = \frac{e^{x_i}}{\sum_j e^{x_j}}"
|
|
309
|
+
|
|
310
|
+
return ""
|
|
311
|
+
|
|
312
|
+
def _create_answers(self):
|
|
313
|
+
"""Create answer fields."""
|
|
314
|
+
self.answers = {}
|
|
315
|
+
|
|
316
|
+
if self.activation == self.ACTIVATION_SOFTMAX:
|
|
317
|
+
# Softmax: single vector answer
|
|
318
|
+
self.answers["output"] = Answer.vector_value("output", self.output_vector)
|
|
319
|
+
else:
|
|
320
|
+
# Element-wise: individual answers
|
|
321
|
+
for i, output in enumerate(self.output_vector):
|
|
322
|
+
key = f"output_{i}"
|
|
323
|
+
self.answers[key] = Answer.float_value(key, float(output))
|
|
324
|
+
|
|
325
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
326
|
+
body = ContentAST.Section()
|
|
327
|
+
|
|
328
|
+
# Question description
|
|
329
|
+
body.add_element(ContentAST.Paragraph([
|
|
330
|
+
f"Given the input vector below, compute the output after applying the {self._get_activation_name()} activation function."
|
|
331
|
+
]))
|
|
332
|
+
|
|
333
|
+
# Display formula
|
|
334
|
+
body.add_element(ContentAST.Paragraph([
|
|
335
|
+
"Activation function: ",
|
|
336
|
+
ContentAST.Equation(self._get_activation_formula(), inline=True)
|
|
337
|
+
]))
|
|
338
|
+
|
|
339
|
+
# Input vector
|
|
340
|
+
input_str = ", ".join([f"{x:.1f}" for x in self.input_vector])
|
|
341
|
+
body.add_element(ContentAST.Paragraph([
|
|
342
|
+
"Input: ",
|
|
343
|
+
ContentAST.Equation(f"[{input_str}]", inline=True)
|
|
344
|
+
]))
|
|
345
|
+
|
|
346
|
+
# Answer table
|
|
347
|
+
if self.activation == self.ACTIVATION_SOFTMAX:
|
|
348
|
+
body.add_element(ContentAST.Paragraph([
|
|
349
|
+
"Compute the output vector:"
|
|
350
|
+
]))
|
|
351
|
+
|
|
352
|
+
table_data = []
|
|
353
|
+
table_data.append(["Output Vector"])
|
|
354
|
+
table_data.append([ContentAST.Answer(self.answers["output"])])
|
|
355
|
+
|
|
356
|
+
body.add_element(ContentAST.Table(data=table_data))
|
|
357
|
+
|
|
358
|
+
else:
|
|
359
|
+
body.add_element(ContentAST.Paragraph([
|
|
360
|
+
"Compute the output for each element:"
|
|
361
|
+
]))
|
|
362
|
+
|
|
363
|
+
table_data = []
|
|
364
|
+
table_data.append(["Input", "Output"])
|
|
365
|
+
|
|
366
|
+
for i, x in enumerate(self.input_vector):
|
|
367
|
+
table_data.append([
|
|
368
|
+
ContentAST.Equation(f"{x:.1f}", inline=True),
|
|
369
|
+
ContentAST.Answer(self.answers[f"output_{i}"])
|
|
370
|
+
])
|
|
371
|
+
|
|
372
|
+
body.add_element(ContentAST.Table(data=table_data))
|
|
373
|
+
|
|
374
|
+
return body
|
|
375
|
+
|
|
376
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
377
|
+
explanation = ContentAST.Section()
|
|
378
|
+
|
|
379
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
380
|
+
f"To compute the {self._get_activation_name()} activation, we apply the formula to each input."
|
|
381
|
+
]))
|
|
382
|
+
|
|
383
|
+
if self.activation == self.ACTIVATION_SOFTMAX:
|
|
384
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
385
|
+
"**Softmax computation:**"
|
|
386
|
+
]))
|
|
387
|
+
|
|
388
|
+
# Show exponentials
|
|
389
|
+
exp_strs = [f"e^{{{x:.1f}}}" for x in self.input_vector]
|
|
390
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
391
|
+
"First, compute exponentials: ",
|
|
392
|
+
ContentAST.Equation(", ".join(exp_strs), inline=True)
|
|
393
|
+
]))
|
|
394
|
+
|
|
395
|
+
# Numerical values
|
|
396
|
+
exp_vals = [np.exp(x) for x in self.input_vector]
|
|
397
|
+
exp_vals_str = ", ".join([f"{e:.4f}" for e in exp_vals])
|
|
398
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
399
|
+
ContentAST.Equation(f"\\approx [{exp_vals_str}]", inline=True)
|
|
400
|
+
]))
|
|
401
|
+
|
|
402
|
+
# Sum
|
|
403
|
+
sum_exp = sum(exp_vals)
|
|
404
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
405
|
+
"Sum: ",
|
|
406
|
+
ContentAST.Equation(f"{sum_exp:.4f}", inline=True)
|
|
407
|
+
]))
|
|
408
|
+
|
|
409
|
+
# Final outputs
|
|
410
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
411
|
+
"Divide each by the sum:"
|
|
412
|
+
]))
|
|
413
|
+
|
|
414
|
+
for i, (exp_val, output) in enumerate(zip(exp_vals, self.output_vector)):
|
|
415
|
+
explanation.add_element(ContentAST.Equation(
|
|
416
|
+
f"\\text{{softmax}}({self.input_vector[i]:.1f}) = \\frac{{{exp_val:.4f}}}{{{sum_exp:.4f}}} = {output:.4f}",
|
|
417
|
+
inline=False
|
|
418
|
+
))
|
|
419
|
+
|
|
420
|
+
else:
|
|
421
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
422
|
+
"**Element-wise computation:**"
|
|
423
|
+
]))
|
|
424
|
+
|
|
425
|
+
for i, (x, y) in enumerate(zip(self.input_vector, self.output_vector)):
|
|
426
|
+
if self.activation == self.ACTIVATION_RELU:
|
|
427
|
+
explanation.add_element(ContentAST.Equation(
|
|
428
|
+
f"\\text{{ReLU}}({x:.1f}) = \\max(0, {x:.1f}) = {y:.4f}",
|
|
429
|
+
inline=False
|
|
430
|
+
))
|
|
431
|
+
|
|
432
|
+
elif self.activation == self.ACTIVATION_SIGMOID:
|
|
433
|
+
explanation.add_element(ContentAST.Equation(
|
|
434
|
+
f"\\sigma({x:.1f}) = \\frac{{1}}{{1 + e^{{-{x:.1f}}}}} = {y:.4f}",
|
|
435
|
+
inline=False
|
|
436
|
+
))
|
|
437
|
+
|
|
438
|
+
elif self.activation == self.ACTIVATION_TANH:
|
|
439
|
+
explanation.add_element(ContentAST.Equation(
|
|
440
|
+
f"\\tanh({x:.1f}) = {y:.4f}",
|
|
441
|
+
inline=False
|
|
442
|
+
))
|
|
443
|
+
|
|
444
|
+
return explanation
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
@QuestionRegistry.register()
|
|
448
|
+
class RegularizationCalculationQuestion(Question):
|
|
449
|
+
"""
|
|
450
|
+
Question asking students to calculate loss with L2 regularization.
|
|
451
|
+
|
|
452
|
+
Given a small network (2-4 weights), students calculate:
|
|
453
|
+
- Forward pass
|
|
454
|
+
- Base MSE loss
|
|
455
|
+
- L2 regularization penalty
|
|
456
|
+
- Total loss
|
|
457
|
+
- Gradient with regularization for one weight
|
|
458
|
+
"""
|
|
459
|
+
|
|
460
|
+
def __init__(self, *args, **kwargs):
|
|
461
|
+
kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
|
|
462
|
+
super().__init__(*args, **kwargs)
|
|
463
|
+
|
|
464
|
+
self.num_weights = kwargs.get("num_weights", None)
|
|
465
|
+
|
|
466
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
467
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
468
|
+
|
|
469
|
+
# Generate small network (2-4 weights for simplicity)
|
|
470
|
+
if self.num_weights is None:
|
|
471
|
+
self.num_weights = self.rng.choice([2, 3, 4])
|
|
472
|
+
|
|
473
|
+
# Generate weights (small values)
|
|
474
|
+
self.weights = [
|
|
475
|
+
round(self.rng.uniform(-2, 2), 1)
|
|
476
|
+
for _ in range(self.num_weights)
|
|
477
|
+
]
|
|
478
|
+
|
|
479
|
+
# Generate input and target
|
|
480
|
+
self.input_val = round(self.rng.uniform(-3, 3), 1)
|
|
481
|
+
self.target = round(self.rng.uniform(-5, 5), 1)
|
|
482
|
+
|
|
483
|
+
# Regularization coefficient
|
|
484
|
+
self.lambda_reg = self.rng.choice([0.01, 0.05, 0.1, 0.5])
|
|
485
|
+
|
|
486
|
+
# Forward pass (simple linear combination for simplicity)
|
|
487
|
+
# prediction = sum(w_i * input^i) for i in 0..n
|
|
488
|
+
# This gives us a polynomial: w0 + w1*x + w2*x^2 + ...
|
|
489
|
+
self.prediction = sum(
|
|
490
|
+
w * (self.input_val ** i)
|
|
491
|
+
for i, w in enumerate(self.weights)
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
# Calculate losses
|
|
495
|
+
self.base_loss = 0.5 * (self.target - self.prediction) ** 2
|
|
496
|
+
self.l2_penalty = (self.lambda_reg / 2) * sum(w**2 for w in self.weights)
|
|
497
|
+
self.total_loss = self.base_loss + self.l2_penalty
|
|
498
|
+
|
|
499
|
+
# Calculate gradient for first weight (w0, the bias term)
|
|
500
|
+
# dL_base/dw0 = -(target - prediction) * dPrediction/dw0
|
|
501
|
+
# dPrediction/dw0 = input^0 = 1
|
|
502
|
+
# dL_reg/dw0 = lambda * w0
|
|
503
|
+
# dL_total/dw0 = dL_base/dw0 + dL_reg/dw0
|
|
504
|
+
|
|
505
|
+
self.grad_base_w0 = -(self.target - self.prediction) * 1 # derivative of w0*x^0
|
|
506
|
+
self.grad_reg_w0 = self.lambda_reg * self.weights[0]
|
|
507
|
+
self.grad_total_w0 = self.grad_base_w0 + self.grad_reg_w0
|
|
508
|
+
|
|
509
|
+
# Create answers
|
|
510
|
+
self._create_answers()
|
|
511
|
+
|
|
512
|
+
def _create_answers(self):
|
|
513
|
+
"""Create answer fields."""
|
|
514
|
+
self.answers = {}
|
|
515
|
+
|
|
516
|
+
self.answers["prediction"] = Answer.float_value("prediction", float(self.prediction))
|
|
517
|
+
self.answers["base_loss"] = Answer.float_value("base_loss", float(self.base_loss))
|
|
518
|
+
self.answers["l2_penalty"] = Answer.float_value("l2_penalty", float(self.l2_penalty))
|
|
519
|
+
self.answers["total_loss"] = Answer.float_value("total_loss", float(self.total_loss))
|
|
520
|
+
self.answers["grad_total_w0"] = Answer.auto_float("grad_total_w0", float(self.grad_total_w0))
|
|
521
|
+
|
|
522
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
523
|
+
body = ContentAST.Section()
|
|
524
|
+
|
|
525
|
+
# Question description
|
|
526
|
+
body.add_element(ContentAST.Paragraph([
|
|
527
|
+
"Consider a simple model with the following parameters:"
|
|
528
|
+
]))
|
|
529
|
+
|
|
530
|
+
# Display weights
|
|
531
|
+
weight_strs = [f"w_{i} = {w:.1f}" for i, w in enumerate(self.weights)]
|
|
532
|
+
body.add_element(ContentAST.Paragraph([
|
|
533
|
+
"Weights: ",
|
|
534
|
+
ContentAST.Equation(", ".join(weight_strs), inline=True)
|
|
535
|
+
]))
|
|
536
|
+
|
|
537
|
+
# Model equation
|
|
538
|
+
terms = []
|
|
539
|
+
for i, w in enumerate(self.weights):
|
|
540
|
+
if i == 0:
|
|
541
|
+
terms.append(f"w_0")
|
|
542
|
+
elif i == 1:
|
|
543
|
+
terms.append(f"w_1 x")
|
|
544
|
+
else:
|
|
545
|
+
terms.append(f"w_{i} x^{i}")
|
|
546
|
+
|
|
547
|
+
model_eq = " + ".join(terms)
|
|
548
|
+
body.add_element(ContentAST.Paragraph([
|
|
549
|
+
"Model: ",
|
|
550
|
+
ContentAST.Equation(f"\\hat{{y}} = {model_eq}", inline=True)
|
|
551
|
+
]))
|
|
552
|
+
|
|
553
|
+
# Data point
|
|
554
|
+
body.add_element(ContentAST.Paragraph([
|
|
555
|
+
"Data point: ",
|
|
556
|
+
ContentAST.Equation(f"x = {self.input_val:.1f}, y = {self.target:.1f}", inline=True)
|
|
557
|
+
]))
|
|
558
|
+
|
|
559
|
+
# Regularization
|
|
560
|
+
body.add_element(ContentAST.Paragraph([
|
|
561
|
+
"L2 regularization coefficient: ",
|
|
562
|
+
ContentAST.Equation(f"\\lambda = {self.lambda_reg}", inline=True)
|
|
563
|
+
]))
|
|
564
|
+
|
|
565
|
+
body.add_element(ContentAST.Paragraph([
|
|
566
|
+
"Calculate the following:"
|
|
567
|
+
]))
|
|
568
|
+
|
|
569
|
+
# Answer table
|
|
570
|
+
table_data = []
|
|
571
|
+
table_data.append(["Calculation", "Value"])
|
|
572
|
+
|
|
573
|
+
table_data.append([
|
|
574
|
+
ContentAST.Paragraph(["Prediction ", ContentAST.Equation(r"\hat{y}", inline=True)]),
|
|
575
|
+
ContentAST.Answer(self.answers["prediction"])
|
|
576
|
+
])
|
|
577
|
+
|
|
578
|
+
table_data.append([
|
|
579
|
+
ContentAST.Paragraph(["Base MSE loss: ", ContentAST.Equation(r"L_{base} = (1/2)(y - \hat{y})^2", inline=True)]),
|
|
580
|
+
ContentAST.Answer(self.answers["base_loss"])
|
|
581
|
+
])
|
|
582
|
+
|
|
583
|
+
table_data.append([
|
|
584
|
+
ContentAST.Paragraph(["L2 penalty: ", ContentAST.Equation(r"L_{reg} = (\lambda/2)\sum w_i^2", inline=True)]),
|
|
585
|
+
ContentAST.Answer(self.answers["l2_penalty"])
|
|
586
|
+
])
|
|
587
|
+
|
|
588
|
+
table_data.append([
|
|
589
|
+
ContentAST.Paragraph(["Total loss: ", ContentAST.Equation(r"L_{total} = L_{base} + L_{reg}", inline=True)]),
|
|
590
|
+
ContentAST.Answer(self.answers["total_loss"])
|
|
591
|
+
])
|
|
592
|
+
|
|
593
|
+
table_data.append([
|
|
594
|
+
ContentAST.Paragraph(["Gradient: ", ContentAST.Equation(r"\frac{\partial L_{total}}{\partial w_0}", inline=True)]),
|
|
595
|
+
ContentAST.Answer(self.answers["grad_total_w0"])
|
|
596
|
+
])
|
|
597
|
+
|
|
598
|
+
body.add_element(ContentAST.Table(data=table_data))
|
|
599
|
+
|
|
600
|
+
return body
|
|
601
|
+
|
|
602
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
603
|
+
explanation = ContentAST.Section()
|
|
604
|
+
|
|
605
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
606
|
+
"L2 regularization adds a penalty term to the loss function to prevent overfitting by keeping weights small."
|
|
607
|
+
]))
|
|
608
|
+
|
|
609
|
+
# Step 1: Forward pass
|
|
610
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
611
|
+
"**Step 1: Compute prediction**"
|
|
612
|
+
]))
|
|
613
|
+
|
|
614
|
+
terms = []
|
|
615
|
+
for i, w in enumerate(self.weights):
|
|
616
|
+
if i == 0:
|
|
617
|
+
terms.append(f"{w:.1f}")
|
|
618
|
+
else:
|
|
619
|
+
x_term = f"{self.input_val:.1f}^{i}" if i > 1 else f"{self.input_val:.1f}"
|
|
620
|
+
terms.append(f"{w:.1f} \\times {x_term}")
|
|
621
|
+
|
|
622
|
+
explanation.add_element(ContentAST.Equation(
|
|
623
|
+
f"\\hat{{y}} = {' + '.join(terms)} = {self.prediction:.4f}",
|
|
624
|
+
inline=False
|
|
625
|
+
))
|
|
626
|
+
|
|
627
|
+
# Step 2: Base loss
|
|
628
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
629
|
+
"**Step 2: Compute base MSE loss**"
|
|
630
|
+
]))
|
|
631
|
+
|
|
632
|
+
explanation.add_element(ContentAST.Equation(
|
|
633
|
+
f"L_{{base}} = \\frac{{1}}{{2}}(y - \\hat{{y}})^2 = \\frac{{1}}{{2}}({self.target:.1f} - {self.prediction:.4f})^2 = {self.base_loss:.4f}",
|
|
634
|
+
inline=False
|
|
635
|
+
))
|
|
636
|
+
|
|
637
|
+
# Step 3: L2 penalty
|
|
638
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
639
|
+
"**Step 3: Compute L2 penalty**"
|
|
640
|
+
]))
|
|
641
|
+
|
|
642
|
+
weight_squares = [f"{w:.1f}^2" for w in self.weights]
|
|
643
|
+
sum_squares = sum(w**2 for w in self.weights)
|
|
644
|
+
|
|
645
|
+
explanation.add_element(ContentAST.Equation(
|
|
646
|
+
f"L_{{reg}} = \\frac{{\\lambda}}{{2}} \\sum w_i^2 = \\frac{{{self.lambda_reg}}}{{2}}({' + '.join(weight_squares)}) = \\frac{{{self.lambda_reg}}}{{2}} \\times {sum_squares:.4f} = {self.l2_penalty:.4f}",
|
|
647
|
+
inline=False
|
|
648
|
+
))
|
|
649
|
+
|
|
650
|
+
# Step 4: Total loss
|
|
651
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
652
|
+
"**Step 4: Compute total loss**"
|
|
653
|
+
]))
|
|
654
|
+
|
|
655
|
+
explanation.add_element(ContentAST.Equation(
|
|
656
|
+
f"L_{{total}} = L_{{base}} + L_{{reg}} = {self.base_loss:.4f} + {self.l2_penalty:.4f} = {self.total_loss:.4f}",
|
|
657
|
+
inline=False
|
|
658
|
+
))
|
|
659
|
+
|
|
660
|
+
# Step 5: Gradient with regularization
|
|
661
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
662
|
+
"**Step 5: Compute gradient with regularization**"
|
|
663
|
+
]))
|
|
664
|
+
|
|
665
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
666
|
+
ContentAST.Equation(r"w_0", inline=True),
|
|
667
|
+
" (the bias term):"
|
|
668
|
+
]))
|
|
669
|
+
|
|
670
|
+
explanation.add_element(ContentAST.Equation(
|
|
671
|
+
f"\\frac{{\\partial L_{{base}}}}{{\\partial w_0}} = -(y - \\hat{{y}}) \\times 1 = -({self.target:.1f} - {self.prediction:.4f}) = {self.grad_base_w0:.4f}",
|
|
672
|
+
inline=False
|
|
673
|
+
))
|
|
674
|
+
|
|
675
|
+
explanation.add_element(ContentAST.Equation(
|
|
676
|
+
f"\\frac{{\\partial L_{{reg}}}}{{\\partial w_0}} = \\lambda w_0 = {self.lambda_reg} \\times {self.weights[0]:.1f} = {self.grad_reg_w0:.4f}",
|
|
677
|
+
inline=False
|
|
678
|
+
))
|
|
679
|
+
|
|
680
|
+
explanation.add_element(ContentAST.Equation(
|
|
681
|
+
f"\\frac{{\\partial L_{{total}}}}{{\\partial w_0}} = {self.grad_base_w0:.4f} + {self.grad_reg_w0:.4f} = {self.grad_total_w0:.4f}",
|
|
682
|
+
inline=False
|
|
683
|
+
))
|
|
684
|
+
|
|
685
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
686
|
+
"The regularization term adds ",
|
|
687
|
+
ContentAST.Equation(f"\\lambda w_0 = {self.grad_reg_w0:.4f}", inline=True),
|
|
688
|
+
" to the gradient, pushing the weight toward zero."
|
|
689
|
+
]))
|
|
690
|
+
|
|
691
|
+
return explanation
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
@QuestionRegistry.register()
|
|
695
|
+
class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin):
|
|
696
|
+
"""
|
|
697
|
+
Question asking students to perform gradient descent with momentum.
|
|
698
|
+
|
|
699
|
+
Given a function, current weights, gradients, learning rate, and momentum coefficient,
|
|
700
|
+
students calculate:
|
|
701
|
+
- Velocity update using momentum
|
|
702
|
+
- Weight update using the new velocity
|
|
703
|
+
- Comparison to vanilla SGD (optional)
|
|
704
|
+
"""
|
|
705
|
+
|
|
706
|
+
def __init__(self, *args, **kwargs):
|
|
707
|
+
kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
|
|
708
|
+
super().__init__(*args, **kwargs)
|
|
709
|
+
|
|
710
|
+
self.num_variables = kwargs.get("num_variables", 2)
|
|
711
|
+
self.show_vanilla_sgd = kwargs.get("show_vanilla_sgd", True)
|
|
712
|
+
|
|
713
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
714
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
715
|
+
|
|
716
|
+
# Generate well-conditioned quadratic function
|
|
717
|
+
self.variables, self.function, self.gradient_function, self.equation = \
|
|
718
|
+
generate_function(self.rng, self.num_variables, max_degree=2, use_quadratic=True)
|
|
719
|
+
|
|
720
|
+
# Generate current weights (small integers)
|
|
721
|
+
self.current_weights = [
|
|
722
|
+
self.rng.choice([-2, -1, 0, 1, 2])
|
|
723
|
+
for _ in range(self.num_variables)
|
|
724
|
+
]
|
|
725
|
+
|
|
726
|
+
# Calculate gradient at current position
|
|
727
|
+
subs_map = dict(zip(self.variables, self.current_weights))
|
|
728
|
+
g_syms = self.gradient_function.subs(subs_map)
|
|
729
|
+
self.gradients = [float(val) for val in g_syms]
|
|
730
|
+
|
|
731
|
+
# Generate previous velocity (for momentum)
|
|
732
|
+
# Start with small or zero velocity
|
|
733
|
+
self.prev_velocity = [
|
|
734
|
+
round(self.rng.uniform(-0.5, 0.5), 2)
|
|
735
|
+
for _ in range(self.num_variables)
|
|
736
|
+
]
|
|
737
|
+
|
|
738
|
+
# Hyperparameters
|
|
739
|
+
self.learning_rate = self.rng.choice([0.01, 0.05, 0.1])
|
|
740
|
+
self.momentum_beta = self.rng.choice([0.8, 0.9])
|
|
741
|
+
|
|
742
|
+
# Calculate momentum updates
|
|
743
|
+
# v_new = beta * v_old + (1 - beta) * gradient
|
|
744
|
+
self.new_velocity = [
|
|
745
|
+
self.momentum_beta * v_old + (1 - self.momentum_beta) * grad
|
|
746
|
+
for v_old, grad in zip(self.prev_velocity, self.gradients)
|
|
747
|
+
]
|
|
748
|
+
|
|
749
|
+
# w_new = w_old - alpha * v_new
|
|
750
|
+
self.new_weights = [
|
|
751
|
+
w - self.learning_rate * v
|
|
752
|
+
for w, v in zip(self.current_weights, self.new_velocity)
|
|
753
|
+
]
|
|
754
|
+
|
|
755
|
+
# Calculate vanilla SGD for comparison
|
|
756
|
+
if self.show_vanilla_sgd:
|
|
757
|
+
self.sgd_weights = [
|
|
758
|
+
w - self.learning_rate * grad
|
|
759
|
+
for w, grad in zip(self.current_weights, self.gradients)
|
|
760
|
+
]
|
|
761
|
+
|
|
762
|
+
# Create answers
|
|
763
|
+
self._create_answers()
|
|
764
|
+
|
|
765
|
+
def _create_answers(self):
|
|
766
|
+
"""Create answer fields."""
|
|
767
|
+
self.answers = {}
|
|
768
|
+
|
|
769
|
+
# New velocity
|
|
770
|
+
self.answers["velocity"] = Answer.vector_value("velocity", self.new_velocity)
|
|
771
|
+
|
|
772
|
+
# New weights with momentum
|
|
773
|
+
self.answers["weights_momentum"] = Answer.vector_value("weights_momentum", self.new_weights)
|
|
774
|
+
|
|
775
|
+
# Vanilla SGD weights for comparison
|
|
776
|
+
if self.show_vanilla_sgd:
|
|
777
|
+
self.answers["weights_sgd"] = Answer.vector_value("weights_sgd", self.sgd_weights)
|
|
778
|
+
|
|
779
|
+
def get_body(self, **kwargs) -> ContentAST.Section:
|
|
780
|
+
body = ContentAST.Section()
|
|
781
|
+
|
|
782
|
+
# Question description
|
|
783
|
+
body.add_element(ContentAST.Paragraph([
|
|
784
|
+
"Consider the optimization problem of minimizing the function:"
|
|
785
|
+
]))
|
|
786
|
+
|
|
787
|
+
body.add_element(ContentAST.Equation(
|
|
788
|
+
sp.latex(self.function),
|
|
789
|
+
inline=False
|
|
790
|
+
))
|
|
791
|
+
|
|
792
|
+
body.add_element(ContentAST.Paragraph([
|
|
793
|
+
"The gradient is:"
|
|
794
|
+
]))
|
|
795
|
+
|
|
796
|
+
body.add_element(ContentAST.Equation(
|
|
797
|
+
f"\\nabla f = {sp.latex(self.gradient_function)}",
|
|
798
|
+
inline=False
|
|
799
|
+
))
|
|
800
|
+
|
|
801
|
+
# Current state
|
|
802
|
+
body.add_element(ContentAST.Paragraph([
|
|
803
|
+
"**Current optimization state:**"
|
|
804
|
+
]))
|
|
805
|
+
|
|
806
|
+
body.add_element(ContentAST.Paragraph([
|
|
807
|
+
"Current weights: ",
|
|
808
|
+
ContentAST.Equation(f"{format_vector(self.current_weights)}", inline=True)
|
|
809
|
+
]))
|
|
810
|
+
|
|
811
|
+
body.add_element(ContentAST.Paragraph([
|
|
812
|
+
"Previous velocity: ",
|
|
813
|
+
ContentAST.Equation(f"{format_vector(self.prev_velocity)}", inline=True)
|
|
814
|
+
]))
|
|
815
|
+
|
|
816
|
+
# Hyperparameters
|
|
817
|
+
body.add_element(ContentAST.Paragraph([
|
|
818
|
+
"**Hyperparameters:**"
|
|
819
|
+
]))
|
|
820
|
+
|
|
821
|
+
body.add_element(ContentAST.Paragraph([
|
|
822
|
+
"Learning rate: ",
|
|
823
|
+
ContentAST.Equation(f"\\alpha = {self.learning_rate}", inline=True)
|
|
824
|
+
]))
|
|
825
|
+
|
|
826
|
+
body.add_element(ContentAST.Paragraph([
|
|
827
|
+
"Momentum coefficient: ",
|
|
828
|
+
ContentAST.Equation(f"\\beta = {self.momentum_beta}", inline=True)
|
|
829
|
+
]))
|
|
830
|
+
|
|
831
|
+
# Questions
|
|
832
|
+
body.add_element(ContentAST.Paragraph([
|
|
833
|
+
"Calculate the following updates:"
|
|
834
|
+
]))
|
|
835
|
+
|
|
836
|
+
# Answer table
|
|
837
|
+
table_data = []
|
|
838
|
+
table_data.append(["Update Type", "Formula", "Result"])
|
|
839
|
+
|
|
840
|
+
table_data.append([
|
|
841
|
+
"New velocity",
|
|
842
|
+
ContentAST.Equation(r"v' = \beta v + (1-\beta)\nabla f", inline=True),
|
|
843
|
+
ContentAST.Answer(self.answers["velocity"])
|
|
844
|
+
])
|
|
845
|
+
|
|
846
|
+
table_data.append([
|
|
847
|
+
"Weights (momentum)",
|
|
848
|
+
ContentAST.Equation(r"w' = w - \alpha v'", inline=True),
|
|
849
|
+
ContentAST.Answer(self.answers["weights_momentum"])
|
|
850
|
+
])
|
|
851
|
+
|
|
852
|
+
if self.show_vanilla_sgd:
|
|
853
|
+
table_data.append([
|
|
854
|
+
"Weights (vanilla SGD)",
|
|
855
|
+
ContentAST.Equation(r"w' = w - \alpha \nabla f", inline=True),
|
|
856
|
+
ContentAST.Answer(self.answers["weights_sgd"])
|
|
857
|
+
])
|
|
858
|
+
|
|
859
|
+
body.add_element(ContentAST.Table(data=table_data))
|
|
860
|
+
|
|
861
|
+
return body
|
|
862
|
+
|
|
863
|
+
def get_explanation(self, **kwargs) -> ContentAST.Section:
|
|
864
|
+
explanation = ContentAST.Section()
|
|
865
|
+
|
|
866
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
867
|
+
"Momentum helps gradient descent by accumulating a velocity vector in directions of "
|
|
868
|
+
"consistent gradient, allowing faster convergence and reduced oscillation."
|
|
869
|
+
]))
|
|
870
|
+
|
|
871
|
+
# Step 1: Calculate new velocity
|
|
872
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
873
|
+
"**Step 1: Update velocity using momentum**"
|
|
874
|
+
]))
|
|
875
|
+
|
|
876
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
877
|
+
"The momentum update formula is:"
|
|
878
|
+
]))
|
|
879
|
+
|
|
880
|
+
explanation.add_element(ContentAST.Equation(
|
|
881
|
+
f"v' = \\beta v + (1 - \\beta) \\nabla f",
|
|
882
|
+
inline=False
|
|
883
|
+
))
|
|
884
|
+
|
|
885
|
+
# Show calculation for each component
|
|
886
|
+
for i in range(self.num_variables):
|
|
887
|
+
var_name = f"x_{i}"
|
|
888
|
+
explanation.add_element(ContentAST.Equation(
|
|
889
|
+
f"v'[{i}] = {self.momentum_beta} \\times {self.prev_velocity[i]:.2f} + "
|
|
890
|
+
f"{1 - self.momentum_beta} \\times {self.gradients[i]:.4f} = {self.new_velocity[i]:.4f}",
|
|
891
|
+
inline=False
|
|
892
|
+
))
|
|
893
|
+
|
|
894
|
+
# Step 2: Update weights with momentum
|
|
895
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
896
|
+
"**Step 2: Update weights using new velocity**"
|
|
897
|
+
]))
|
|
898
|
+
|
|
899
|
+
explanation.add_element(ContentAST.Equation(
|
|
900
|
+
f"w' = w - \\alpha v'",
|
|
901
|
+
inline=False
|
|
902
|
+
))
|
|
903
|
+
|
|
904
|
+
for i in range(self.num_variables):
|
|
905
|
+
explanation.add_element(ContentAST.Equation(
|
|
906
|
+
f"w[{i}] = {self.current_weights[i]} - {self.learning_rate} \\times {self.new_velocity[i]:.4f} = {self.new_weights[i]:.4f}",
|
|
907
|
+
inline=False
|
|
908
|
+
))
|
|
909
|
+
|
|
910
|
+
# Comparison with vanilla SGD
|
|
911
|
+
if self.show_vanilla_sgd:
|
|
912
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
913
|
+
"**Comparison with vanilla SGD:**"
|
|
914
|
+
]))
|
|
915
|
+
|
|
916
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
917
|
+
"Vanilla SGD (no momentum) would update directly using the gradient:"
|
|
918
|
+
]))
|
|
919
|
+
|
|
920
|
+
explanation.add_element(ContentAST.Equation(
|
|
921
|
+
f"w' = w - \\alpha \\nabla f",
|
|
922
|
+
inline=False
|
|
923
|
+
))
|
|
924
|
+
|
|
925
|
+
for i in range(self.num_variables):
|
|
926
|
+
explanation.add_element(ContentAST.Equation(
|
|
927
|
+
f"w[{i}] = {self.current_weights[i]} - {self.learning_rate} \\times {self.gradients[i]:.4f} = {self.sgd_weights[i]:.4f}",
|
|
928
|
+
inline=False
|
|
929
|
+
))
|
|
930
|
+
|
|
931
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
932
|
+
"The momentum update differs because it incorporates the previous velocity, "
|
|
933
|
+
"which can help accelerate learning and smooth out noisy gradients."
|
|
934
|
+
]))
|
|
935
|
+
|
|
936
|
+
return explanation
|