QuizGenerator 0.8.1__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuizGenerator/contentast.py +1 -1
- QuizGenerator/generate.py +1 -1
- QuizGenerator/mixins.py +6 -2
- QuizGenerator/premade_questions/basic.py +49 -7
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_calculation.py +92 -82
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_descent_questions.py +68 -45
- QuizGenerator/premade_questions/cst463/gradient_descent/loss_calculations.py +235 -162
- QuizGenerator/premade_questions/cst463/neural-network-basics/neural_network_questions.py +51 -45
- QuizGenerator/premade_questions/cst463/tensorflow-intro/tensorflow_questions.py +212 -215
- QuizGenerator/question.py +139 -18
- {quizgenerator-0.8.1.dist-info → quizgenerator-0.9.0.dist-info}/METADATA +9 -6
- {quizgenerator-0.8.1.dist-info → quizgenerator-0.9.0.dist-info}/RECORD +15 -15
- {quizgenerator-0.8.1.dist-info → quizgenerator-0.9.0.dist-info}/WHEEL +0 -0
- {quizgenerator-0.8.1.dist-info → quizgenerator-0.9.0.dist-info}/entry_points.txt +0 -0
- {quizgenerator-0.8.1.dist-info → quizgenerator-0.9.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -39,55 +39,51 @@ class ParameterCountingQuestion(Question):
|
|
|
39
39
|
self.num_layers = kwargs.get("num_layers", None)
|
|
40
40
|
self.include_biases = kwargs.get("include_biases", True)
|
|
41
41
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
self.rng.seed(rng_seed)
|
|
42
|
+
@classmethod
|
|
43
|
+
def _build_context(cls, *, rng_seed=None, **kwargs):
|
|
44
|
+
context = super()._build_context(rng_seed=rng_seed, **kwargs)
|
|
45
|
+
context.num_layers = kwargs.get("num_layers")
|
|
46
|
+
context.include_biases = kwargs.get("include_biases", True)
|
|
49
47
|
|
|
50
48
|
# Generate random architecture
|
|
51
|
-
if
|
|
52
|
-
|
|
49
|
+
if context.num_layers is None:
|
|
50
|
+
context.num_layers = context.rng.choice([3, 4])
|
|
53
51
|
|
|
54
52
|
# Generate layer sizes
|
|
55
53
|
# Input layer: common sizes for typical problems
|
|
56
54
|
input_sizes = [28*28, 32*32, 784, 1024, 64, 128]
|
|
57
|
-
|
|
55
|
+
context.layer_sizes = [context.rng.choice(input_sizes)]
|
|
58
56
|
|
|
59
57
|
# Hidden layers: reasonable sizes
|
|
60
|
-
for
|
|
61
|
-
hidden_size =
|
|
62
|
-
|
|
58
|
+
for _ in range(context.num_layers - 2):
|
|
59
|
+
hidden_size = context.rng.choice([32, 64, 128, 256, 512])
|
|
60
|
+
context.layer_sizes.append(hidden_size)
|
|
63
61
|
|
|
64
62
|
# Output layer: typical classification sizes
|
|
65
|
-
output_size =
|
|
66
|
-
|
|
63
|
+
output_size = context.rng.choice([2, 10, 100, 1000])
|
|
64
|
+
context.layer_sizes.append(output_size)
|
|
67
65
|
|
|
68
66
|
# Calculate correct answers
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
for i in range(len(self.layer_sizes) - 1):
|
|
75
|
-
weights = self.layer_sizes[i] * self.layer_sizes[i+1]
|
|
76
|
-
biases = self.layer_sizes[i+1] if self.include_biases else 0
|
|
67
|
+
context.total_weights = 0
|
|
68
|
+
context.total_biases = 0
|
|
69
|
+
context.weights_per_layer = []
|
|
70
|
+
context.biases_per_layer = []
|
|
77
71
|
|
|
78
|
-
|
|
79
|
-
|
|
72
|
+
for i in range(len(context.layer_sizes) - 1):
|
|
73
|
+
weights = context.layer_sizes[i] * context.layer_sizes[i+1]
|
|
74
|
+
biases = context.layer_sizes[i+1] if context.include_biases else 0
|
|
80
75
|
|
|
81
|
-
|
|
82
|
-
|
|
76
|
+
context.weights_per_layer.append(weights)
|
|
77
|
+
context.biases_per_layer.append(biases)
|
|
83
78
|
|
|
84
|
-
|
|
79
|
+
context.total_weights += weights
|
|
80
|
+
context.total_biases += biases
|
|
85
81
|
|
|
86
|
-
context =
|
|
87
|
-
context["rng_seed"] = rng_seed
|
|
82
|
+
context.total_params = context.total_weights + context.total_biases
|
|
88
83
|
return context
|
|
89
84
|
|
|
90
|
-
|
|
85
|
+
@classmethod
|
|
86
|
+
def _build_body(cls, context) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
91
87
|
"""Build question body and collect answers."""
|
|
92
88
|
body = ca.Section()
|
|
93
89
|
answers = []
|
|
@@ -99,7 +95,7 @@ class ParameterCountingQuestion(Question):
|
|
|
99
95
|
|
|
100
96
|
# Display architecture
|
|
101
97
|
arch_parts = []
|
|
102
|
-
for i, size in enumerate(
|
|
98
|
+
for i, size in enumerate(context.layer_sizes):
|
|
103
99
|
if i > 0:
|
|
104
100
|
arch_parts.append(" → ")
|
|
105
101
|
arch_parts.append(str(size))
|
|
@@ -108,7 +104,7 @@ class ParameterCountingQuestion(Question):
|
|
|
108
104
|
"Architecture: " + "".join(arch_parts)
|
|
109
105
|
]))
|
|
110
106
|
|
|
111
|
-
if
|
|
107
|
+
if context.include_biases:
|
|
112
108
|
body.add_element(ca.Paragraph([
|
|
113
109
|
"Each layer includes bias terms."
|
|
114
110
|
]))
|
|
@@ -118,9 +114,9 @@ class ParameterCountingQuestion(Question):
|
|
|
118
114
|
table_data = []
|
|
119
115
|
table_data.append(["Parameter Type", "Count"])
|
|
120
116
|
|
|
121
|
-
total_weights_answer = ca.AnswerTypes.Int(
|
|
117
|
+
total_weights_answer = ca.AnswerTypes.Int(context.total_weights, label="Total weights")
|
|
122
118
|
total_biases_answer = None
|
|
123
|
-
total_params_answer = ca.AnswerTypes.Int(
|
|
119
|
+
total_params_answer = ca.AnswerTypes.Int(context.total_params, label="Total trainable parameters")
|
|
124
120
|
|
|
125
121
|
answers.append(total_weights_answer)
|
|
126
122
|
table_data.append([
|
|
@@ -128,8 +124,8 @@ class ParameterCountingQuestion(Question):
|
|
|
128
124
|
total_weights_answer
|
|
129
125
|
])
|
|
130
126
|
|
|
131
|
-
if
|
|
132
|
-
total_biases_answer = ca.AnswerTypes.Int(
|
|
127
|
+
if context.include_biases:
|
|
128
|
+
total_biases_answer = ca.AnswerTypes.Int(context.total_biases, label="Total biases")
|
|
133
129
|
answers.append(total_biases_answer)
|
|
134
130
|
table_data.append([
|
|
135
131
|
"Total biases",
|
|
@@ -146,7 +142,8 @@ class ParameterCountingQuestion(Question):
|
|
|
146
142
|
|
|
147
143
|
return body, answers
|
|
148
144
|
|
|
149
|
-
|
|
145
|
+
@classmethod
|
|
146
|
+
def _build_explanation(cls, context) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
150
147
|
"""Build question explanation."""
|
|
151
148
|
explanation = ca.Section()
|
|
152
149
|
|
|
@@ -158,10 +155,10 @@ class ParameterCountingQuestion(Question):
|
|
|
158
155
|
ca.Text("Weights calculation:", emphasis=True)
|
|
159
156
|
]))
|
|
160
157
|
|
|
161
|
-
for i in range(len(
|
|
162
|
-
input_size =
|
|
163
|
-
output_size =
|
|
164
|
-
weights =
|
|
158
|
+
for i in range(len(context.layer_sizes) - 1):
|
|
159
|
+
input_size = context.layer_sizes[i]
|
|
160
|
+
output_size = context.layer_sizes[i+1]
|
|
161
|
+
weights = context.weights_per_layer[i]
|
|
165
162
|
|
|
166
163
|
explanation.add_element(ca.Paragraph([
|
|
167
164
|
f"Layer {i+1} → {i+2}: ",
|
|
@@ -172,19 +169,19 @@ class ParameterCountingQuestion(Question):
|
|
|
172
169
|
explanation.add_element(ca.Paragraph([
|
|
173
170
|
"Total weights: ",
|
|
174
171
|
ca.Equation(
|
|
175
|
-
f"{' + '.join([f'{w:,}' for w in
|
|
172
|
+
f"{' + '.join([f'{w:,}' for w in context.weights_per_layer])} = {context.total_weights:,}",
|
|
176
173
|
inline=True
|
|
177
174
|
)
|
|
178
175
|
]))
|
|
179
176
|
|
|
180
|
-
if
|
|
177
|
+
if context.include_biases:
|
|
181
178
|
explanation.add_element(ca.Paragraph([
|
|
182
179
|
ca.Text("Biases calculation:", emphasis=True)
|
|
183
180
|
]))
|
|
184
181
|
|
|
185
|
-
for i in range(len(
|
|
186
|
-
output_size =
|
|
187
|
-
biases =
|
|
182
|
+
for i in range(len(context.layer_sizes) - 1):
|
|
183
|
+
output_size = context.layer_sizes[i+1]
|
|
184
|
+
biases = context.biases_per_layer[i]
|
|
188
185
|
|
|
189
186
|
explanation.add_element(ca.Paragraph([
|
|
190
187
|
f"Layer {i+2}: {biases:,} biases (one per neuron)"
|
|
@@ -193,7 +190,7 @@ class ParameterCountingQuestion(Question):
|
|
|
193
190
|
explanation.add_element(ca.Paragraph([
|
|
194
191
|
"Total biases: ",
|
|
195
192
|
ca.Equation(
|
|
196
|
-
f"{' + '.join([f'{b:,}' for b in
|
|
193
|
+
f"{' + '.join([f'{b:,}' for b in context.biases_per_layer])} = {context.total_biases:,}",
|
|
197
194
|
inline=True
|
|
198
195
|
)
|
|
199
196
|
]))
|
|
@@ -202,14 +199,14 @@ class ParameterCountingQuestion(Question):
|
|
|
202
199
|
ca.Text("Total trainable parameters:", emphasis=True)
|
|
203
200
|
]))
|
|
204
201
|
|
|
205
|
-
if
|
|
202
|
+
if context.include_biases:
|
|
206
203
|
explanation.add_element(ca.Equation(
|
|
207
|
-
f"\\text{{Total}} = {
|
|
204
|
+
f"\\text{{Total}} = {context.total_weights:,} + {context.total_biases:,} = {context.total_params:,}",
|
|
208
205
|
inline=False
|
|
209
206
|
))
|
|
210
207
|
else:
|
|
211
208
|
explanation.add_element(ca.Equation(
|
|
212
|
-
f"\\text{{Total}} = {
|
|
209
|
+
f"\\text{{Total}} = {context.total_weights:,}",
|
|
213
210
|
inline=False
|
|
214
211
|
))
|
|
215
212
|
|
|
@@ -237,119 +234,118 @@ class ActivationFunctionComputationQuestion(Question):
|
|
|
237
234
|
self.vector_size = kwargs.get("vector_size", None)
|
|
238
235
|
self.activation = kwargs.get("activation", None)
|
|
239
236
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
self.rng.seed(rng_seed)
|
|
237
|
+
@classmethod
|
|
238
|
+
def _build_context(cls, *, rng_seed=None, **kwargs):
|
|
239
|
+
context = super()._build_context(rng_seed=rng_seed, **kwargs)
|
|
240
|
+
context.vector_size = kwargs.get("vector_size")
|
|
241
|
+
context.activation = kwargs.get("activation")
|
|
247
242
|
|
|
248
243
|
# Generate random input vector
|
|
249
|
-
if
|
|
250
|
-
|
|
244
|
+
if context.vector_size is None:
|
|
245
|
+
context.vector_size = context.rng.choice([3, 4, 5])
|
|
251
246
|
|
|
252
|
-
|
|
253
|
-
round(
|
|
254
|
-
for _ in range(
|
|
247
|
+
context.input_vector = [
|
|
248
|
+
round(context.rng.uniform(-3, 3), 1)
|
|
249
|
+
for _ in range(context.vector_size)
|
|
255
250
|
]
|
|
256
251
|
|
|
257
252
|
# Select activation function
|
|
258
|
-
if
|
|
253
|
+
if context.activation is None:
|
|
259
254
|
activations = [
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
255
|
+
cls.ACTIVATION_RELU,
|
|
256
|
+
cls.ACTIVATION_SIGMOID,
|
|
257
|
+
cls.ACTIVATION_TANH,
|
|
258
|
+
cls.ACTIVATION_SOFTMAX,
|
|
264
259
|
]
|
|
265
|
-
|
|
260
|
+
context.activation = context.rng.choice(activations)
|
|
266
261
|
|
|
267
262
|
# For leaky ReLU, set alpha
|
|
268
|
-
|
|
263
|
+
context.leaky_alpha = 0.01
|
|
269
264
|
|
|
270
265
|
# Compute outputs
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
context = dict(kwargs)
|
|
274
|
-
context["rng_seed"] = rng_seed
|
|
266
|
+
context.output_vector = cls._compute_activation(context.activation, context.input_vector)
|
|
275
267
|
return context
|
|
276
268
|
|
|
277
|
-
|
|
269
|
+
@staticmethod
|
|
270
|
+
def _compute_activation(activation, inputs):
|
|
278
271
|
"""Compute activation function output."""
|
|
279
|
-
if
|
|
272
|
+
if activation == ActivationFunctionComputationQuestion.ACTIVATION_RELU:
|
|
280
273
|
return [max(0, x) for x in inputs]
|
|
281
274
|
|
|
282
|
-
elif
|
|
275
|
+
elif activation == ActivationFunctionComputationQuestion.ACTIVATION_SIGMOID:
|
|
283
276
|
return [1 / (1 + np.exp(-x)) for x in inputs]
|
|
284
277
|
|
|
285
|
-
elif
|
|
278
|
+
elif activation == ActivationFunctionComputationQuestion.ACTIVATION_TANH:
|
|
286
279
|
return [np.tanh(x) for x in inputs]
|
|
287
280
|
|
|
288
|
-
elif
|
|
281
|
+
elif activation == ActivationFunctionComputationQuestion.ACTIVATION_SOFTMAX:
|
|
289
282
|
# Subtract max for numerical stability
|
|
290
283
|
exp_vals = [np.exp(x - max(inputs)) for x in inputs]
|
|
291
284
|
sum_exp = sum(exp_vals)
|
|
292
285
|
return [e / sum_exp for e in exp_vals]
|
|
293
286
|
|
|
294
287
|
else:
|
|
295
|
-
raise ValueError(f"Unknown activation: {
|
|
288
|
+
raise ValueError(f"Unknown activation: {activation}")
|
|
296
289
|
|
|
297
|
-
|
|
290
|
+
@staticmethod
|
|
291
|
+
def _get_activation_name(activation):
|
|
298
292
|
"""Get human-readable activation name."""
|
|
299
293
|
names = {
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
294
|
+
ActivationFunctionComputationQuestion.ACTIVATION_RELU: "ReLU",
|
|
295
|
+
ActivationFunctionComputationQuestion.ACTIVATION_SIGMOID: "Sigmoid",
|
|
296
|
+
ActivationFunctionComputationQuestion.ACTIVATION_TANH: "Tanh",
|
|
297
|
+
ActivationFunctionComputationQuestion.ACTIVATION_SOFTMAX: "Softmax",
|
|
304
298
|
}
|
|
305
|
-
return names.get(
|
|
299
|
+
return names.get(activation, "Unknown")
|
|
306
300
|
|
|
307
|
-
|
|
301
|
+
@staticmethod
|
|
302
|
+
def _get_activation_formula(activation):
|
|
308
303
|
"""Get LaTeX formula for activation function."""
|
|
309
|
-
if
|
|
304
|
+
if activation == ActivationFunctionComputationQuestion.ACTIVATION_RELU:
|
|
310
305
|
return r"\text{ReLU}(x) = \max(0, x)"
|
|
311
306
|
|
|
312
|
-
elif
|
|
307
|
+
elif activation == ActivationFunctionComputationQuestion.ACTIVATION_SIGMOID:
|
|
313
308
|
return r"\sigma(x) = \frac{1}{1 + e^{-x}}"
|
|
314
309
|
|
|
315
|
-
elif
|
|
310
|
+
elif activation == ActivationFunctionComputationQuestion.ACTIVATION_TANH:
|
|
316
311
|
return r"\tanh(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}"
|
|
317
312
|
|
|
318
|
-
elif
|
|
313
|
+
elif activation == ActivationFunctionComputationQuestion.ACTIVATION_SOFTMAX:
|
|
319
314
|
return r"\text{softmax}(x_i) = \frac{e^{x_i}}{\sum_j e^{x_j}}"
|
|
320
315
|
|
|
321
316
|
return ""
|
|
322
317
|
|
|
323
|
-
|
|
318
|
+
@classmethod
|
|
319
|
+
def _build_body(cls, context) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
324
320
|
"""Build question body and collect answers."""
|
|
325
321
|
body = ca.Section()
|
|
326
322
|
answers = []
|
|
327
323
|
|
|
328
324
|
# Question description
|
|
329
325
|
body.add_element(ca.Paragraph([
|
|
330
|
-
f"Given the input vector below, compute the output after applying the {
|
|
326
|
+
f"Given the input vector below, compute the output after applying the {cls._get_activation_name(context.activation)} activation function."
|
|
331
327
|
]))
|
|
332
328
|
|
|
333
329
|
# Display formula
|
|
334
330
|
body.add_element(ca.Paragraph([
|
|
335
331
|
"Activation function: ",
|
|
336
|
-
ca.Equation(
|
|
332
|
+
ca.Equation(cls._get_activation_formula(context.activation), inline=True)
|
|
337
333
|
]))
|
|
338
334
|
|
|
339
335
|
# Input vector
|
|
340
|
-
input_str = ", ".join([f"{x:.1f}" for x in
|
|
336
|
+
input_str = ", ".join([f"{x:.1f}" for x in context.input_vector])
|
|
341
337
|
body.add_element(ca.Paragraph([
|
|
342
338
|
"Input: ",
|
|
343
339
|
ca.Equation(f"[{input_str}]", inline=True)
|
|
344
340
|
]))
|
|
345
341
|
|
|
346
342
|
# Answer table
|
|
347
|
-
if
|
|
343
|
+
if context.activation == cls.ACTIVATION_SOFTMAX:
|
|
348
344
|
body.add_element(ca.Paragraph([
|
|
349
345
|
"Compute the output vector:"
|
|
350
346
|
]))
|
|
351
347
|
|
|
352
|
-
output_answer = ca.AnswerTypes.Vector(
|
|
348
|
+
output_answer = ca.AnswerTypes.Vector(context.output_vector, label="Output vector")
|
|
353
349
|
answers.append(output_answer)
|
|
354
350
|
table_data = []
|
|
355
351
|
table_data.append(["Output Vector"])
|
|
@@ -365,10 +361,10 @@ class ActivationFunctionComputationQuestion(Question):
|
|
|
365
361
|
table_data = []
|
|
366
362
|
table_data.append(["Input", "Output"])
|
|
367
363
|
|
|
368
|
-
for i, x in enumerate(
|
|
364
|
+
for i, x in enumerate(context.input_vector):
|
|
369
365
|
answer = ca.AnswerTypes.Float(
|
|
370
|
-
float(
|
|
371
|
-
label=f"Output for input {
|
|
366
|
+
float(context.output_vector[i]),
|
|
367
|
+
label=f"Output for input {context.input_vector[i]:.1f}"
|
|
372
368
|
)
|
|
373
369
|
answers.append(answer)
|
|
374
370
|
table_data.append([
|
|
@@ -380,28 +376,29 @@ class ActivationFunctionComputationQuestion(Question):
|
|
|
380
376
|
|
|
381
377
|
return body, answers
|
|
382
378
|
|
|
383
|
-
|
|
379
|
+
@classmethod
|
|
380
|
+
def _build_explanation(cls, context) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
384
381
|
"""Build question explanation."""
|
|
385
382
|
explanation = ca.Section()
|
|
386
383
|
|
|
387
384
|
explanation.add_element(ca.Paragraph([
|
|
388
|
-
f"To compute the {
|
|
385
|
+
f"To compute the {cls._get_activation_name(context.activation)} activation, we apply the formula to each input."
|
|
389
386
|
]))
|
|
390
387
|
|
|
391
|
-
if
|
|
388
|
+
if context.activation == cls.ACTIVATION_SOFTMAX:
|
|
392
389
|
explanation.add_element(ca.Paragraph([
|
|
393
390
|
ca.Text("Softmax computation:", emphasis=True)
|
|
394
391
|
]))
|
|
395
392
|
|
|
396
393
|
# Show exponentials
|
|
397
|
-
exp_strs = [f"e^{{{x:.1f}}}" for x in
|
|
394
|
+
exp_strs = [f"e^{{{x:.1f}}}" for x in context.input_vector]
|
|
398
395
|
explanation.add_element(ca.Paragraph([
|
|
399
396
|
"First, compute exponentials: ",
|
|
400
397
|
ca.Equation(", ".join(exp_strs), inline=True)
|
|
401
398
|
]))
|
|
402
399
|
|
|
403
400
|
# Numerical values
|
|
404
|
-
exp_vals = [np.exp(x) for x in
|
|
401
|
+
exp_vals = [np.exp(x) for x in context.input_vector]
|
|
405
402
|
exp_vals_str = ", ".join([f"{e:.4f}" for e in exp_vals])
|
|
406
403
|
explanation.add_element(ca.Paragraph([
|
|
407
404
|
ca.Equation(f"\\approx [{exp_vals_str}]", inline=True)
|
|
@@ -419,9 +416,9 @@ class ActivationFunctionComputationQuestion(Question):
|
|
|
419
416
|
"Divide each by the sum:"
|
|
420
417
|
]))
|
|
421
418
|
|
|
422
|
-
for i, (exp_val, output) in enumerate(zip(exp_vals,
|
|
419
|
+
for i, (exp_val, output) in enumerate(zip(exp_vals, context.output_vector)):
|
|
423
420
|
explanation.add_element(ca.Equation(
|
|
424
|
-
f"\\text{{softmax}}({
|
|
421
|
+
f"\\text{{softmax}}({context.input_vector[i]:.1f}) = \\frac{{{exp_val:.4f}}}{{{sum_exp:.4f}}} = {output:.4f}",
|
|
425
422
|
inline=False
|
|
426
423
|
))
|
|
427
424
|
|
|
@@ -430,20 +427,20 @@ class ActivationFunctionComputationQuestion(Question):
|
|
|
430
427
|
ca.Text("Element-wise computation:", emphasis=True)
|
|
431
428
|
]))
|
|
432
429
|
|
|
433
|
-
for i, (x, y) in enumerate(zip(
|
|
434
|
-
if
|
|
430
|
+
for i, (x, y) in enumerate(zip(context.input_vector, context.output_vector)):
|
|
431
|
+
if context.activation == cls.ACTIVATION_RELU:
|
|
435
432
|
explanation.add_element(ca.Equation(
|
|
436
433
|
f"\\text{{ReLU}}({x:.1f}) = \\max(0, {x:.1f}) = {y:.4f}",
|
|
437
434
|
inline=False
|
|
438
435
|
))
|
|
439
436
|
|
|
440
|
-
elif
|
|
437
|
+
elif context.activation == cls.ACTIVATION_SIGMOID:
|
|
441
438
|
explanation.add_element(ca.Equation(
|
|
442
439
|
f"\\sigma({x:.1f}) = \\frac{{1}}{{1 + e^{{-{x:.1f}}}}} = {y:.4f}",
|
|
443
440
|
inline=False
|
|
444
441
|
))
|
|
445
442
|
|
|
446
|
-
elif
|
|
443
|
+
elif context.activation == cls.ACTIVATION_TANH:
|
|
447
444
|
explanation.add_element(ca.Equation(
|
|
448
445
|
f"\\tanh({x:.1f}) = {y:.4f}",
|
|
449
446
|
inline=False
|
|
@@ -471,41 +468,40 @@ class RegularizationCalculationQuestion(Question):
|
|
|
471
468
|
|
|
472
469
|
self.num_weights = kwargs.get("num_weights", None)
|
|
473
470
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
self.rng.seed(rng_seed)
|
|
471
|
+
@classmethod
|
|
472
|
+
def _build_context(cls, *, rng_seed=None, **kwargs):
|
|
473
|
+
context = super()._build_context(rng_seed=rng_seed, **kwargs)
|
|
474
|
+
context.num_weights = kwargs.get("num_weights")
|
|
479
475
|
|
|
480
476
|
# Generate small network (2-4 weights for simplicity)
|
|
481
|
-
if
|
|
482
|
-
|
|
477
|
+
if context.num_weights is None:
|
|
478
|
+
context.num_weights = context.rng.choice([2, 3, 4])
|
|
483
479
|
|
|
484
480
|
# Generate weights (small values)
|
|
485
|
-
|
|
486
|
-
round(
|
|
487
|
-
for _ in range(
|
|
481
|
+
context.weights = [
|
|
482
|
+
round(context.rng.uniform(-2, 2), 1)
|
|
483
|
+
for _ in range(context.num_weights)
|
|
488
484
|
]
|
|
489
485
|
|
|
490
486
|
# Generate input and target
|
|
491
|
-
|
|
492
|
-
|
|
487
|
+
context.input_val = round(context.rng.uniform(-3, 3), 1)
|
|
488
|
+
context.target = round(context.rng.uniform(-5, 5), 1)
|
|
493
489
|
|
|
494
490
|
# Regularization coefficient
|
|
495
|
-
|
|
491
|
+
context.lambda_reg = context.rng.choice([0.01, 0.05, 0.1, 0.5])
|
|
496
492
|
|
|
497
493
|
# Forward pass (simple linear combination for simplicity)
|
|
498
494
|
# prediction = sum(w_i * input^i) for i in 0..n
|
|
499
495
|
# This gives us a polynomial: w0 + w1*x + w2*x^2 + ...
|
|
500
|
-
|
|
501
|
-
w * (
|
|
502
|
-
for i, w in enumerate(
|
|
496
|
+
context.prediction = sum(
|
|
497
|
+
w * (context.input_val ** i)
|
|
498
|
+
for i, w in enumerate(context.weights)
|
|
503
499
|
)
|
|
504
500
|
|
|
505
501
|
# Calculate losses
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
502
|
+
context.base_loss = 0.5 * (context.target - context.prediction) ** 2
|
|
503
|
+
context.l2_penalty = (context.lambda_reg / 2) * sum(w**2 for w in context.weights)
|
|
504
|
+
context.total_loss = context.base_loss + context.l2_penalty
|
|
509
505
|
|
|
510
506
|
# Calculate gradient for first weight (w0, the bias term)
|
|
511
507
|
# dL_base/dw0 = -(target - prediction) * dPrediction/dw0
|
|
@@ -513,15 +509,13 @@ class RegularizationCalculationQuestion(Question):
|
|
|
513
509
|
# dL_reg/dw0 = lambda * w0
|
|
514
510
|
# dL_total/dw0 = dL_base/dw0 + dL_reg/dw0
|
|
515
511
|
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
context = dict(kwargs)
|
|
521
|
-
context["rng_seed"] = rng_seed
|
|
512
|
+
context.grad_base_w0 = -(context.target - context.prediction) * 1 # derivative of w0*x^0
|
|
513
|
+
context.grad_reg_w0 = context.lambda_reg * context.weights[0]
|
|
514
|
+
context.grad_total_w0 = context.grad_base_w0 + context.grad_reg_w0
|
|
522
515
|
return context
|
|
523
516
|
|
|
524
|
-
|
|
517
|
+
@classmethod
|
|
518
|
+
def _build_body(cls, context) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
525
519
|
"""Build question body and collect answers."""
|
|
526
520
|
body = ca.Section()
|
|
527
521
|
answers = []
|
|
@@ -532,7 +526,7 @@ class RegularizationCalculationQuestion(Question):
|
|
|
532
526
|
]))
|
|
533
527
|
|
|
534
528
|
# Display weights
|
|
535
|
-
weight_strs = [f"w_{i} = {w:.1f}" for i, w in enumerate(
|
|
529
|
+
weight_strs = [f"w_{i} = {w:.1f}" for i, w in enumerate(context.weights)]
|
|
536
530
|
body.add_element(ca.Paragraph([
|
|
537
531
|
"Weights: ",
|
|
538
532
|
ca.Equation(", ".join(weight_strs), inline=True)
|
|
@@ -540,7 +534,7 @@ class RegularizationCalculationQuestion(Question):
|
|
|
540
534
|
|
|
541
535
|
# Model equation
|
|
542
536
|
terms = []
|
|
543
|
-
for i, w in enumerate(
|
|
537
|
+
for i, w in enumerate(context.weights):
|
|
544
538
|
if i == 0:
|
|
545
539
|
terms.append(f"w_0")
|
|
546
540
|
elif i == 1:
|
|
@@ -557,13 +551,13 @@ class RegularizationCalculationQuestion(Question):
|
|
|
557
551
|
# Data point
|
|
558
552
|
body.add_element(ca.Paragraph([
|
|
559
553
|
"Data point: ",
|
|
560
|
-
ca.Equation(f"x = {
|
|
554
|
+
ca.Equation(f"x = {context.input_val:.1f}, y = {context.target:.1f}", inline=True)
|
|
561
555
|
]))
|
|
562
556
|
|
|
563
557
|
# Regularization
|
|
564
558
|
body.add_element(ca.Paragraph([
|
|
565
559
|
"L2 regularization coefficient: ",
|
|
566
|
-
ca.Equation(f"\\lambda = {
|
|
560
|
+
ca.Equation(f"\\lambda = {context.lambda_reg}", inline=True)
|
|
567
561
|
]))
|
|
568
562
|
|
|
569
563
|
body.add_element(ca.Paragraph([
|
|
@@ -574,11 +568,11 @@ class RegularizationCalculationQuestion(Question):
|
|
|
574
568
|
table_data = []
|
|
575
569
|
table_data.append(["Calculation", "Value"])
|
|
576
570
|
|
|
577
|
-
prediction_answer = ca.AnswerTypes.Float(float(
|
|
578
|
-
base_loss_answer = ca.AnswerTypes.Float(float(
|
|
579
|
-
l2_penalty_answer = ca.AnswerTypes.Float(float(
|
|
580
|
-
total_loss_answer = ca.AnswerTypes.Float(float(
|
|
581
|
-
grad_total_w0_answer = ca.AnswerTypes.Float(float(
|
|
571
|
+
prediction_answer = ca.AnswerTypes.Float(float(context.prediction), label="Prediction ŷ")
|
|
572
|
+
base_loss_answer = ca.AnswerTypes.Float(float(context.base_loss), label="Base MSE loss")
|
|
573
|
+
l2_penalty_answer = ca.AnswerTypes.Float(float(context.l2_penalty), label="L2 penalty")
|
|
574
|
+
total_loss_answer = ca.AnswerTypes.Float(float(context.total_loss), label="Total loss")
|
|
575
|
+
grad_total_w0_answer = ca.AnswerTypes.Float(float(context.grad_total_w0), label="Gradient ∂L/∂w₀")
|
|
582
576
|
|
|
583
577
|
answers.append(prediction_answer)
|
|
584
578
|
table_data.append([
|
|
@@ -614,7 +608,8 @@ class RegularizationCalculationQuestion(Question):
|
|
|
614
608
|
|
|
615
609
|
return body, answers
|
|
616
610
|
|
|
617
|
-
|
|
611
|
+
@classmethod
|
|
612
|
+
def _build_explanation(cls, context) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
618
613
|
"""Build question explanation."""
|
|
619
614
|
explanation = ca.Section()
|
|
620
615
|
|
|
@@ -628,15 +623,15 @@ class RegularizationCalculationQuestion(Question):
|
|
|
628
623
|
]))
|
|
629
624
|
|
|
630
625
|
terms = []
|
|
631
|
-
for i, w in enumerate(
|
|
626
|
+
for i, w in enumerate(context.weights):
|
|
632
627
|
if i == 0:
|
|
633
628
|
terms.append(f"{w:.1f}")
|
|
634
629
|
else:
|
|
635
|
-
x_term = f"{
|
|
630
|
+
x_term = f"{context.input_val:.1f}^{i}" if i > 1 else f"{context.input_val:.1f}"
|
|
636
631
|
terms.append(f"{w:.1f} \\times {x_term}")
|
|
637
632
|
|
|
638
633
|
explanation.add_element(ca.Equation(
|
|
639
|
-
f"\\hat{{y}} = {' + '.join(terms)} = {
|
|
634
|
+
f"\\hat{{y}} = {' + '.join(terms)} = {context.prediction:.4f}",
|
|
640
635
|
inline=False
|
|
641
636
|
))
|
|
642
637
|
|
|
@@ -646,7 +641,7 @@ class RegularizationCalculationQuestion(Question):
|
|
|
646
641
|
]))
|
|
647
642
|
|
|
648
643
|
explanation.add_element(ca.Equation(
|
|
649
|
-
f"L_{{base}} = \\frac{{1}}{{2}}(y - \\hat{{y}})^2 = \\frac{{1}}{{2}}({
|
|
644
|
+
f"L_{{base}} = \\frac{{1}}{{2}}(y - \\hat{{y}})^2 = \\frac{{1}}{{2}}({context.target:.1f} - {context.prediction:.4f})^2 = {context.base_loss:.4f}",
|
|
650
645
|
inline=False
|
|
651
646
|
))
|
|
652
647
|
|
|
@@ -655,11 +650,11 @@ class RegularizationCalculationQuestion(Question):
|
|
|
655
650
|
ca.Text("Step 3: Compute L2 penalty", emphasis=True)
|
|
656
651
|
]))
|
|
657
652
|
|
|
658
|
-
weight_squares = [f"{w:.1f}^2" for w in
|
|
659
|
-
sum_squares = sum(w**2 for w in
|
|
653
|
+
weight_squares = [f"{w:.1f}^2" for w in context.weights]
|
|
654
|
+
sum_squares = sum(w**2 for w in context.weights)
|
|
660
655
|
|
|
661
656
|
explanation.add_element(ca.Equation(
|
|
662
|
-
f"L_{{reg}} = \\frac{{\\lambda}}{{2}} \\sum w_i^2 = \\frac{{{
|
|
657
|
+
f"L_{{reg}} = \\frac{{\\lambda}}{{2}} \\sum w_i^2 = \\frac{{{context.lambda_reg}}}{{2}}({' + '.join(weight_squares)}) = \\frac{{{context.lambda_reg}}}{{2}} \\times {sum_squares:.4f} = {context.l2_penalty:.4f}",
|
|
663
658
|
inline=False
|
|
664
659
|
))
|
|
665
660
|
|
|
@@ -669,7 +664,7 @@ class RegularizationCalculationQuestion(Question):
|
|
|
669
664
|
]))
|
|
670
665
|
|
|
671
666
|
explanation.add_element(ca.Equation(
|
|
672
|
-
f"L_{{total}} = L_{{base}} + L_{{reg}} = {
|
|
667
|
+
f"L_{{total}} = L_{{base}} + L_{{reg}} = {context.base_loss:.4f} + {context.l2_penalty:.4f} = {context.total_loss:.4f}",
|
|
673
668
|
inline=False
|
|
674
669
|
))
|
|
675
670
|
|
|
@@ -684,23 +679,23 @@ class RegularizationCalculationQuestion(Question):
|
|
|
684
679
|
]))
|
|
685
680
|
|
|
686
681
|
explanation.add_element(ca.Equation(
|
|
687
|
-
f"\\frac{{\\partial L_{{base}}}}{{\\partial w_0}} = -(y - \\hat{{y}}) \\times 1 = -({
|
|
682
|
+
f"\\frac{{\\partial L_{{base}}}}{{\\partial w_0}} = -(y - \\hat{{y}}) \\times 1 = -({context.target:.1f} - {context.prediction:.4f}) = {context.grad_base_w0:.4f}",
|
|
688
683
|
inline=False
|
|
689
684
|
))
|
|
690
685
|
|
|
691
686
|
explanation.add_element(ca.Equation(
|
|
692
|
-
f"\\frac{{\\partial L_{{reg}}}}{{\\partial w_0}} = \\lambda w_0 = {
|
|
687
|
+
f"\\frac{{\\partial L_{{reg}}}}{{\\partial w_0}} = \\lambda w_0 = {context.lambda_reg} \\times {context.weights[0]:.1f} = {context.grad_reg_w0:.4f}",
|
|
693
688
|
inline=False
|
|
694
689
|
))
|
|
695
690
|
|
|
696
691
|
explanation.add_element(ca.Equation(
|
|
697
|
-
f"\\frac{{\\partial L_{{total}}}}{{\\partial w_0}} = {
|
|
692
|
+
f"\\frac{{\\partial L_{{total}}}}{{\\partial w_0}} = {context.grad_base_w0:.4f} + {context.grad_reg_w0:.4f} = {context.grad_total_w0:.4f}",
|
|
698
693
|
inline=False
|
|
699
694
|
))
|
|
700
695
|
|
|
701
696
|
explanation.add_element(ca.Paragraph([
|
|
702
697
|
"The regularization term adds ",
|
|
703
|
-
ca.Equation(f"\\lambda w_0 = {
|
|
698
|
+
ca.Equation(f"\\lambda w_0 = {context.grad_reg_w0:.4f}", inline=True),
|
|
704
699
|
" to the gradient, pushing the weight toward zero."
|
|
705
700
|
]))
|
|
706
701
|
|
|
@@ -726,64 +721,65 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
726
721
|
self.num_variables = kwargs.get("num_variables", 2)
|
|
727
722
|
self.show_vanilla_sgd = kwargs.get("show_vanilla_sgd", True)
|
|
728
723
|
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
self.rng.seed(rng_seed)
|
|
724
|
+
@classmethod
|
|
725
|
+
def _build_context(cls, *, rng_seed=None, **kwargs):
|
|
726
|
+
context = super()._build_context(rng_seed=rng_seed, **kwargs)
|
|
727
|
+
context.num_variables = kwargs.get("num_variables", 2)
|
|
728
|
+
context.show_vanilla_sgd = kwargs.get("show_vanilla_sgd", True)
|
|
736
729
|
|
|
737
730
|
# Generate well-conditioned quadratic function
|
|
738
|
-
|
|
731
|
+
context.variables, context.function, context.gradient_function, context.equation = generate_function(
|
|
732
|
+
context.rng,
|
|
733
|
+
context.num_variables,
|
|
734
|
+
max_degree=2,
|
|
735
|
+
use_quadratic=True
|
|
736
|
+
)
|
|
739
737
|
|
|
740
738
|
# Generate current weights (small integers)
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
for _ in range(
|
|
739
|
+
context.current_weights = [
|
|
740
|
+
context.rng.choice([-2, -1, 0, 1, 2])
|
|
741
|
+
for _ in range(context.num_variables)
|
|
744
742
|
]
|
|
745
743
|
|
|
746
744
|
# Calculate gradient at current position
|
|
747
|
-
subs_map = dict(zip(
|
|
748
|
-
g_syms =
|
|
749
|
-
|
|
745
|
+
subs_map = dict(zip(context.variables, context.current_weights))
|
|
746
|
+
g_syms = context.gradient_function.subs(subs_map)
|
|
747
|
+
context.gradients = [float(val) for val in g_syms]
|
|
750
748
|
|
|
751
749
|
# Generate previous velocity (for momentum)
|
|
752
750
|
# Start with small or zero velocity
|
|
753
|
-
|
|
754
|
-
round(
|
|
755
|
-
for _ in range(
|
|
751
|
+
context.prev_velocity = [
|
|
752
|
+
round(context.rng.uniform(-0.5, 0.5), 2)
|
|
753
|
+
for _ in range(context.num_variables)
|
|
756
754
|
]
|
|
757
755
|
|
|
758
756
|
# Hyperparameters
|
|
759
|
-
|
|
760
|
-
|
|
757
|
+
context.learning_rate = context.rng.choice([0.01, 0.05, 0.1])
|
|
758
|
+
context.momentum_beta = context.rng.choice([0.8, 0.9])
|
|
761
759
|
|
|
762
760
|
# Calculate momentum updates
|
|
763
761
|
# v_new = beta * v_old + (1 - beta) * gradient
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
for v_old, grad in zip(
|
|
762
|
+
context.new_velocity = [
|
|
763
|
+
context.momentum_beta * v_old + (1 - context.momentum_beta) * grad
|
|
764
|
+
for v_old, grad in zip(context.prev_velocity, context.gradients)
|
|
767
765
|
]
|
|
768
766
|
|
|
769
767
|
# w_new = w_old - alpha * v_new
|
|
770
|
-
|
|
771
|
-
w -
|
|
772
|
-
for w, v in zip(
|
|
768
|
+
context.new_weights = [
|
|
769
|
+
w - context.learning_rate * v
|
|
770
|
+
for w, v in zip(context.current_weights, context.new_velocity)
|
|
773
771
|
]
|
|
774
772
|
|
|
775
773
|
# Calculate vanilla SGD for comparison
|
|
776
|
-
if
|
|
777
|
-
|
|
778
|
-
w -
|
|
779
|
-
for w, grad in zip(
|
|
774
|
+
if context.show_vanilla_sgd:
|
|
775
|
+
context.sgd_weights = [
|
|
776
|
+
w - context.learning_rate * grad
|
|
777
|
+
for w, grad in zip(context.current_weights, context.gradients)
|
|
780
778
|
]
|
|
781
|
-
|
|
782
|
-
context = dict(kwargs)
|
|
783
|
-
context["rng_seed"] = rng_seed
|
|
784
779
|
return context
|
|
785
780
|
|
|
786
|
-
|
|
781
|
+
@classmethod
|
|
782
|
+
def _build_body(cls, context) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
787
783
|
"""Build question body and collect answers."""
|
|
788
784
|
body = ca.Section()
|
|
789
785
|
answers = []
|
|
@@ -794,7 +790,7 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
794
790
|
]))
|
|
795
791
|
|
|
796
792
|
body.add_element(ca.Equation(
|
|
797
|
-
sp.latex(
|
|
793
|
+
sp.latex(context.function),
|
|
798
794
|
inline=False
|
|
799
795
|
))
|
|
800
796
|
|
|
@@ -803,7 +799,7 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
803
799
|
]))
|
|
804
800
|
|
|
805
801
|
body.add_element(ca.Equation(
|
|
806
|
-
f"\\nabla f = {sp.latex(
|
|
802
|
+
f"\\nabla f = {sp.latex(context.gradient_function)}",
|
|
807
803
|
inline=False
|
|
808
804
|
))
|
|
809
805
|
|
|
@@ -814,12 +810,12 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
814
810
|
|
|
815
811
|
body.add_element(ca.Paragraph([
|
|
816
812
|
"Current weights: ",
|
|
817
|
-
ca.Equation(f"{format_vector(
|
|
813
|
+
ca.Equation(f"{format_vector(context.current_weights)}", inline=True)
|
|
818
814
|
]))
|
|
819
815
|
|
|
820
816
|
body.add_element(ca.Paragraph([
|
|
821
817
|
"Previous velocity: ",
|
|
822
|
-
ca.Equation(f"{format_vector(
|
|
818
|
+
ca.Equation(f"{format_vector(context.prev_velocity)}", inline=True)
|
|
823
819
|
]))
|
|
824
820
|
|
|
825
821
|
# Hyperparameters
|
|
@@ -829,12 +825,12 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
829
825
|
|
|
830
826
|
body.add_element(ca.Paragraph([
|
|
831
827
|
"Learning rate: ",
|
|
832
|
-
ca.Equation(f"\\alpha = {
|
|
828
|
+
ca.Equation(f"\\alpha = {context.learning_rate}", inline=True)
|
|
833
829
|
]))
|
|
834
830
|
|
|
835
831
|
body.add_element(ca.Paragraph([
|
|
836
832
|
"Momentum coefficient: ",
|
|
837
|
-
ca.Equation(f"\\beta = {
|
|
833
|
+
ca.Equation(f"\\beta = {context.momentum_beta}", inline=True)
|
|
838
834
|
]))
|
|
839
835
|
|
|
840
836
|
# Questions
|
|
@@ -846,8 +842,8 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
846
842
|
table_data = []
|
|
847
843
|
table_data.append(["Update Type", "Formula", "Result"])
|
|
848
844
|
|
|
849
|
-
velocity_answer = ca.AnswerTypes.Vector(
|
|
850
|
-
weights_momentum_answer = ca.AnswerTypes.Vector(
|
|
845
|
+
velocity_answer = ca.AnswerTypes.Vector(context.new_velocity, label="New velocity")
|
|
846
|
+
weights_momentum_answer = ca.AnswerTypes.Vector(context.new_weights, label="Weights (momentum)")
|
|
851
847
|
weights_sgd_answer = None
|
|
852
848
|
|
|
853
849
|
answers.append(velocity_answer)
|
|
@@ -864,8 +860,8 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
864
860
|
weights_momentum_answer
|
|
865
861
|
])
|
|
866
862
|
|
|
867
|
-
if
|
|
868
|
-
weights_sgd_answer = ca.AnswerTypes.Vector(
|
|
863
|
+
if context.show_vanilla_sgd:
|
|
864
|
+
weights_sgd_answer = ca.AnswerTypes.Vector(context.sgd_weights, label="Weights (vanilla SGD)")
|
|
869
865
|
answers.append(weights_sgd_answer)
|
|
870
866
|
table_data.append([
|
|
871
867
|
"Weights (vanilla SGD)",
|
|
@@ -877,7 +873,8 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
877
873
|
|
|
878
874
|
return body, answers
|
|
879
875
|
|
|
880
|
-
|
|
876
|
+
@classmethod
|
|
877
|
+
def _build_explanation(cls, context) -> Tuple[ca.Section, List[ca.Answer]]:
|
|
881
878
|
"""Build question explanation."""
|
|
882
879
|
explanation = ca.Section()
|
|
883
880
|
|
|
@@ -902,17 +899,17 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
902
899
|
|
|
903
900
|
# Show calculation for each component
|
|
904
901
|
digits = ca.Answer.DEFAULT_ROUNDING_DIGITS
|
|
905
|
-
for i in range(
|
|
902
|
+
for i in range(context.num_variables):
|
|
906
903
|
var_name = f"x_{i}"
|
|
907
904
|
# Round all intermediate values to avoid floating point precision issues
|
|
908
|
-
beta_times_v = round(
|
|
909
|
-
one_minus_beta = round(1 -
|
|
910
|
-
one_minus_beta_times_grad = round((1 -
|
|
905
|
+
beta_times_v = round(context.momentum_beta * context.prev_velocity[i], digits)
|
|
906
|
+
one_minus_beta = round(1 - context.momentum_beta, digits)
|
|
907
|
+
one_minus_beta_times_grad = round((1 - context.momentum_beta) * context.gradients[i], digits)
|
|
911
908
|
|
|
912
909
|
explanation.add_element(ca.Equation(
|
|
913
|
-
f"v'[{i}] = {
|
|
914
|
-
f"{one_minus_beta:.{digits}f} \\times {
|
|
915
|
-
f"{beta_times_v:.{digits}f} + {one_minus_beta_times_grad:.{digits}f} = {
|
|
910
|
+
f"v'[{i}] = {context.momentum_beta} \\times {context.prev_velocity[i]:.{digits}f} + "
|
|
911
|
+
f"{one_minus_beta:.{digits}f} \\times {context.gradients[i]:.{digits}f} = "
|
|
912
|
+
f"{beta_times_v:.{digits}f} + {one_minus_beta_times_grad:.{digits}f} = {context.new_velocity[i]:.{digits}f}",
|
|
916
913
|
inline=False
|
|
917
914
|
))
|
|
918
915
|
|
|
@@ -926,14 +923,14 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
926
923
|
inline=False
|
|
927
924
|
))
|
|
928
925
|
|
|
929
|
-
for i in range(
|
|
926
|
+
for i in range(context.num_variables):
|
|
930
927
|
explanation.add_element(ca.Equation(
|
|
931
|
-
f"w[{i}] = {
|
|
928
|
+
f"w[{i}] = {context.current_weights[i]} - {context.learning_rate} \\times {context.new_velocity[i]:.4f} = {context.new_weights[i]:.4f}",
|
|
932
929
|
inline=False
|
|
933
930
|
))
|
|
934
931
|
|
|
935
932
|
# Comparison with vanilla SGD
|
|
936
|
-
if
|
|
933
|
+
if context.show_vanilla_sgd:
|
|
937
934
|
explanation.add_element(ca.Paragraph([
|
|
938
935
|
ca.Text("Comparison with vanilla SGD:", emphasis=True)
|
|
939
936
|
]))
|
|
@@ -947,9 +944,9 @@ class MomentumOptimizerQuestion(Question, TableQuestionMixin, BodyTemplatesMixin
|
|
|
947
944
|
inline=False
|
|
948
945
|
))
|
|
949
946
|
|
|
950
|
-
for i in range(
|
|
947
|
+
for i in range(context.num_variables):
|
|
951
948
|
explanation.add_element(ca.Equation(
|
|
952
|
-
f"w[{i}] = {
|
|
949
|
+
f"w[{i}] = {context.current_weights[i]} - {context.learning_rate} \\times {context.gradients[i]:.4f} = {context.sgd_weights[i]:.4f}",
|
|
953
950
|
inline=False
|
|
954
951
|
))
|
|
955
952
|
|