QuizGenerator 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuizGenerator/README.md +5 -0
- QuizGenerator/__init__.py +27 -0
- QuizGenerator/__main__.py +7 -0
- QuizGenerator/canvas/__init__.py +13 -0
- QuizGenerator/canvas/canvas_interface.py +627 -0
- QuizGenerator/canvas/classes.py +235 -0
- QuizGenerator/constants.py +149 -0
- QuizGenerator/contentast.py +1955 -0
- QuizGenerator/generate.py +253 -0
- QuizGenerator/logging.yaml +55 -0
- QuizGenerator/misc.py +579 -0
- QuizGenerator/mixins.py +548 -0
- QuizGenerator/performance.py +202 -0
- QuizGenerator/premade_questions/__init__.py +0 -0
- QuizGenerator/premade_questions/basic.py +103 -0
- QuizGenerator/premade_questions/cst334/__init__.py +1 -0
- QuizGenerator/premade_questions/cst334/languages.py +391 -0
- QuizGenerator/premade_questions/cst334/math_questions.py +297 -0
- QuizGenerator/premade_questions/cst334/memory_questions.py +1400 -0
- QuizGenerator/premade_questions/cst334/ostep13_vsfs.py +572 -0
- QuizGenerator/premade_questions/cst334/persistence_questions.py +451 -0
- QuizGenerator/premade_questions/cst334/process.py +648 -0
- QuizGenerator/premade_questions/cst463/__init__.py +0 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/__init__.py +3 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_calculation.py +369 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_descent_questions.py +305 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/loss_calculations.py +650 -0
- QuizGenerator/premade_questions/cst463/gradient_descent/misc.py +73 -0
- QuizGenerator/premade_questions/cst463/math_and_data/__init__.py +2 -0
- QuizGenerator/premade_questions/cst463/math_and_data/matrix_questions.py +631 -0
- QuizGenerator/premade_questions/cst463/math_and_data/vector_questions.py +534 -0
- QuizGenerator/premade_questions/cst463/models/__init__.py +0 -0
- QuizGenerator/premade_questions/cst463/models/attention.py +192 -0
- QuizGenerator/premade_questions/cst463/models/cnns.py +186 -0
- QuizGenerator/premade_questions/cst463/models/matrices.py +24 -0
- QuizGenerator/premade_questions/cst463/models/rnns.py +202 -0
- QuizGenerator/premade_questions/cst463/models/text.py +203 -0
- QuizGenerator/premade_questions/cst463/models/weight_counting.py +227 -0
- QuizGenerator/premade_questions/cst463/neural-network-basics/__init__.py +6 -0
- QuizGenerator/premade_questions/cst463/neural-network-basics/neural_network_questions.py +1314 -0
- QuizGenerator/premade_questions/cst463/tensorflow-intro/__init__.py +6 -0
- QuizGenerator/premade_questions/cst463/tensorflow-intro/tensorflow_questions.py +936 -0
- QuizGenerator/qrcode_generator.py +293 -0
- QuizGenerator/question.py +715 -0
- QuizGenerator/quiz.py +467 -0
- QuizGenerator/regenerate.py +472 -0
- QuizGenerator/typst_utils.py +113 -0
- quizgenerator-0.4.2.dist-info/METADATA +265 -0
- quizgenerator-0.4.2.dist-info/RECORD +52 -0
- quizgenerator-0.4.2.dist-info/WHEEL +4 -0
- quizgenerator-0.4.2.dist-info/entry_points.txt +3 -0
- quizgenerator-0.4.2.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,650 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import logging
|
|
5
|
+
import math
|
|
6
|
+
import numpy as np
|
|
7
|
+
from typing import List, Tuple, Dict, Any
|
|
8
|
+
|
|
9
|
+
from QuizGenerator.contentast import ContentAST
|
|
10
|
+
from QuizGenerator.question import Question, Answer, QuestionRegistry
|
|
11
|
+
from QuizGenerator.mixins import TableQuestionMixin, BodyTemplatesMixin
|
|
12
|
+
|
|
13
|
+
log = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class LossQuestion(Question, TableQuestionMixin, BodyTemplatesMixin, abc.ABC):
|
|
17
|
+
"""Base class for loss function calculation questions."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, *args, **kwargs):
|
|
20
|
+
kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
|
|
21
|
+
super().__init__(*args, **kwargs)
|
|
22
|
+
|
|
23
|
+
self.num_samples = kwargs.get("num_samples", 5)
|
|
24
|
+
self.num_samples = max(3, min(10, self.num_samples)) # Constrain to 3-10 range
|
|
25
|
+
|
|
26
|
+
self.num_input_features = kwargs.get("num_input_features", 2)
|
|
27
|
+
self.num_input_features = max(1, min(5, self.num_input_features)) # Constrain to 1-5 features
|
|
28
|
+
self.vector_inputs = kwargs.get("vector_inputs", False) # Whether to show inputs as vectors
|
|
29
|
+
|
|
30
|
+
# Generate sample data
|
|
31
|
+
self.data = []
|
|
32
|
+
self.individual_losses = []
|
|
33
|
+
self.overall_loss = 0.0
|
|
34
|
+
|
|
35
|
+
def refresh(self, rng_seed=None, *args, **kwargs):
|
|
36
|
+
"""Generate new random data and calculate losses."""
|
|
37
|
+
super().refresh(rng_seed=rng_seed, *args, **kwargs)
|
|
38
|
+
self._generate_data()
|
|
39
|
+
self._calculate_losses()
|
|
40
|
+
self._create_answers()
|
|
41
|
+
|
|
42
|
+
@abc.abstractmethod
|
|
43
|
+
def _generate_data(self):
|
|
44
|
+
"""Generate sample data appropriate for this loss function type."""
|
|
45
|
+
pass
|
|
46
|
+
|
|
47
|
+
@abc.abstractmethod
|
|
48
|
+
def _calculate_losses(self):
|
|
49
|
+
"""Calculate individual and overall losses."""
|
|
50
|
+
pass
|
|
51
|
+
|
|
52
|
+
@abc.abstractmethod
|
|
53
|
+
def _get_loss_function_name(self) -> str:
|
|
54
|
+
"""Return the name of the loss function."""
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
@abc.abstractmethod
|
|
58
|
+
def _get_loss_function_formula(self) -> str:
|
|
59
|
+
"""Return the LaTeX formula for the loss function."""
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
@abc.abstractmethod
|
|
63
|
+
def _get_loss_function_short_name(self) -> str:
|
|
64
|
+
"""Return the short name of the loss function (used in question body)."""
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
def _create_answers(self):
|
|
68
|
+
"""Create answer objects for individual losses and overall loss."""
|
|
69
|
+
self.answers = {}
|
|
70
|
+
|
|
71
|
+
# Individual loss answers
|
|
72
|
+
for i in range(self.num_samples):
|
|
73
|
+
self.answers[f"loss_{i}"] = Answer.float_value(f"loss_{i}", self.individual_losses[i])
|
|
74
|
+
|
|
75
|
+
# Overall loss answer
|
|
76
|
+
self.answers["overall_loss"] = Answer.float_value("overall_loss", self.overall_loss)
|
|
77
|
+
|
|
78
|
+
def get_body(self) -> ContentAST.Element:
|
|
79
|
+
"""Generate the question body with data table."""
|
|
80
|
+
body = ContentAST.Section()
|
|
81
|
+
|
|
82
|
+
# Question description
|
|
83
|
+
body.add_element(ContentAST.Paragraph([
|
|
84
|
+
f"Given the dataset below, calculate the {self._get_loss_function_short_name()} for each sample "
|
|
85
|
+
f"and the overall {self._get_loss_function_short_name()}."
|
|
86
|
+
]))
|
|
87
|
+
|
|
88
|
+
# Data table
|
|
89
|
+
body.add_element(self._create_data_table())
|
|
90
|
+
|
|
91
|
+
# Overall loss question
|
|
92
|
+
body.add_element(ContentAST.Paragraph([
|
|
93
|
+
f"Overall {self._get_loss_function_short_name()}: "
|
|
94
|
+
]))
|
|
95
|
+
body.add_element(ContentAST.Answer(self.answers["overall_loss"]))
|
|
96
|
+
|
|
97
|
+
return body
|
|
98
|
+
|
|
99
|
+
@abc.abstractmethod
|
|
100
|
+
def _create_data_table(self) -> ContentAST.Element:
|
|
101
|
+
"""Create the data table with answer fields."""
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
def get_explanation(self) -> ContentAST.Element:
|
|
105
|
+
"""Generate detailed explanation of the loss calculations."""
|
|
106
|
+
explanation = ContentAST.Section()
|
|
107
|
+
|
|
108
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
109
|
+
f"To calculate the {self._get_loss_function_name()}, we apply the formula to each sample:"
|
|
110
|
+
]))
|
|
111
|
+
|
|
112
|
+
explanation.add_element(ContentAST.Equation(self._get_loss_function_formula(), inline=False))
|
|
113
|
+
|
|
114
|
+
# Step-by-step calculations
|
|
115
|
+
explanation.add_element(self._create_calculation_steps())
|
|
116
|
+
|
|
117
|
+
# Completed table
|
|
118
|
+
explanation.add_element(ContentAST.Paragraph(["Completed table:"]))
|
|
119
|
+
explanation.add_element(self._create_completed_table())
|
|
120
|
+
|
|
121
|
+
# Overall loss calculation
|
|
122
|
+
explanation.add_element(self._create_overall_loss_explanation())
|
|
123
|
+
|
|
124
|
+
return explanation
|
|
125
|
+
|
|
126
|
+
@abc.abstractmethod
|
|
127
|
+
def _create_calculation_steps(self) -> ContentAST.Element:
|
|
128
|
+
"""Create step-by-step calculation explanations."""
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
@abc.abstractmethod
|
|
132
|
+
def _create_completed_table(self) -> ContentAST.Element:
|
|
133
|
+
"""Create the completed table with all values filled in."""
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
@abc.abstractmethod
|
|
137
|
+
def _create_overall_loss_explanation(self) -> ContentAST.Element:
|
|
138
|
+
"""Create explanation for overall loss calculation."""
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@QuestionRegistry.register("LossQuestion_Linear")
|
|
143
|
+
class LossQuestion_Linear(LossQuestion):
|
|
144
|
+
"""Linear regression with Mean Squared Error (MSE) loss."""
|
|
145
|
+
|
|
146
|
+
def __init__(self, *args, **kwargs):
|
|
147
|
+
self.num_output_vars = kwargs.get("num_output_vars", 1)
|
|
148
|
+
self.num_output_vars = max(1, min(5, self.num_output_vars)) # Constrain to 1-5 range
|
|
149
|
+
super().__init__(*args, **kwargs)
|
|
150
|
+
|
|
151
|
+
def _generate_data(self):
|
|
152
|
+
"""Generate regression data with continuous target values."""
|
|
153
|
+
self.data = []
|
|
154
|
+
|
|
155
|
+
for i in range(self.num_samples):
|
|
156
|
+
sample = {}
|
|
157
|
+
|
|
158
|
+
# Generate input features (rounded to 2 decimal places)
|
|
159
|
+
sample['inputs'] = [round(self.rng.uniform(-100, 100), 2) for _ in range(self.num_input_features)]
|
|
160
|
+
|
|
161
|
+
# Generate true values (y) - multiple outputs if specified (rounded to 2 decimal places)
|
|
162
|
+
if self.num_output_vars == 1:
|
|
163
|
+
sample['true_values'] = round(self.rng.uniform(-100, 100), 2)
|
|
164
|
+
else:
|
|
165
|
+
sample['true_values'] = [round(self.rng.uniform(-100, 100), 2) for _ in range(self.num_output_vars)]
|
|
166
|
+
|
|
167
|
+
# Generate predictions (p) - multiple outputs if specified (rounded to 2 decimal places)
|
|
168
|
+
if self.num_output_vars == 1:
|
|
169
|
+
sample['predictions'] = round(self.rng.uniform(-100, 100), 2)
|
|
170
|
+
else:
|
|
171
|
+
sample['predictions'] = [round(self.rng.uniform(-100, 100), 2) for _ in range(self.num_output_vars)]
|
|
172
|
+
|
|
173
|
+
self.data.append(sample)
|
|
174
|
+
|
|
175
|
+
def _calculate_losses(self):
|
|
176
|
+
"""Calculate MSE for each sample and overall."""
|
|
177
|
+
self.individual_losses = []
|
|
178
|
+
total_loss = 0.0
|
|
179
|
+
|
|
180
|
+
for sample in self.data:
|
|
181
|
+
if self.num_output_vars == 1:
|
|
182
|
+
# Single output MSE: (y - p)^2
|
|
183
|
+
loss = (sample['true_values'] - sample['predictions']) ** 2
|
|
184
|
+
else:
|
|
185
|
+
# Multi-output MSE: sum of (y_i - p_i)^2
|
|
186
|
+
loss = sum(
|
|
187
|
+
(y - p) ** 2
|
|
188
|
+
for y, p in zip(sample['true_values'], sample['predictions'])
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
self.individual_losses.append(loss)
|
|
192
|
+
total_loss += loss
|
|
193
|
+
|
|
194
|
+
# Overall MSE is average of individual losses
|
|
195
|
+
self.overall_loss = total_loss / self.num_samples
|
|
196
|
+
|
|
197
|
+
def _get_loss_function_name(self) -> str:
|
|
198
|
+
return "Mean Squared Error (MSE)"
|
|
199
|
+
|
|
200
|
+
def _get_loss_function_short_name(self) -> str:
|
|
201
|
+
return "MSE"
|
|
202
|
+
|
|
203
|
+
def _get_loss_function_formula(self) -> str:
|
|
204
|
+
if self.num_output_vars == 1:
|
|
205
|
+
return r"L(y, p) = (y - p)^2"
|
|
206
|
+
else:
|
|
207
|
+
return r"L(\mathbf{y}, \mathbf{p}) = \sum_{i=1}^{k} (y_i - p_i)^2"
|
|
208
|
+
|
|
209
|
+
def _create_data_table(self) -> ContentAST.Element:
|
|
210
|
+
"""Create table with input features, true values, predictions, and loss fields."""
|
|
211
|
+
headers = ["x"]
|
|
212
|
+
|
|
213
|
+
if self.num_output_vars == 1:
|
|
214
|
+
headers.extend(["y", "p", "loss"])
|
|
215
|
+
else:
|
|
216
|
+
# Multiple outputs
|
|
217
|
+
for i in range(self.num_output_vars):
|
|
218
|
+
headers.append(f"y_{i}")
|
|
219
|
+
for i in range(self.num_output_vars):
|
|
220
|
+
headers.append(f"p_{i}")
|
|
221
|
+
headers.append("loss")
|
|
222
|
+
|
|
223
|
+
rows = []
|
|
224
|
+
for i, sample in enumerate(self.data):
|
|
225
|
+
row = {}
|
|
226
|
+
|
|
227
|
+
# Input features as vector
|
|
228
|
+
x_vector = "[" + ", ".join([f"{x:.2f}" for x in sample['inputs']]) + "]"
|
|
229
|
+
row["x"] = x_vector
|
|
230
|
+
|
|
231
|
+
# True values
|
|
232
|
+
if self.num_output_vars == 1:
|
|
233
|
+
row["y"] = f"{sample['true_values']:.2f}"
|
|
234
|
+
else:
|
|
235
|
+
for j in range(self.num_output_vars):
|
|
236
|
+
row[f"y_{j}"] = f"{sample['true_values'][j]:.2f}"
|
|
237
|
+
|
|
238
|
+
# Predictions
|
|
239
|
+
if self.num_output_vars == 1:
|
|
240
|
+
row["p"] = f"{sample['predictions']:.2f}"
|
|
241
|
+
else:
|
|
242
|
+
for j in range(self.num_output_vars):
|
|
243
|
+
row[f"p_{j}"] = f"{sample['predictions'][j]:.2f}"
|
|
244
|
+
|
|
245
|
+
# Loss answer field
|
|
246
|
+
row["loss"] = self.answers[f"loss_{i}"]
|
|
247
|
+
|
|
248
|
+
rows.append(row)
|
|
249
|
+
|
|
250
|
+
return self.create_answer_table(headers, rows, answer_columns=["loss"])
|
|
251
|
+
|
|
252
|
+
def _create_calculation_steps(self) -> ContentAST.Element:
|
|
253
|
+
"""Show step-by-step MSE calculations."""
|
|
254
|
+
steps = ContentAST.Section()
|
|
255
|
+
|
|
256
|
+
for i, sample in enumerate(self.data):
|
|
257
|
+
steps.add_element(ContentAST.Paragraph([f"Sample {i+1}:"]))
|
|
258
|
+
|
|
259
|
+
if self.num_output_vars == 1:
|
|
260
|
+
y = sample['true_values']
|
|
261
|
+
p = sample['predictions']
|
|
262
|
+
loss = self.individual_losses[i]
|
|
263
|
+
diff = y - p
|
|
264
|
+
|
|
265
|
+
# Format the subtraction nicely to avoid double negatives
|
|
266
|
+
if p >= 0:
|
|
267
|
+
calculation = f"L = ({y:.2f} - {p:.2f})^2 = ({diff:.2f})^2 = {loss:.4f}"
|
|
268
|
+
else:
|
|
269
|
+
calculation = f"L = ({y:.2f} - ({p:.2f}))^2 = ({diff:.2f})^2 = {loss:.4f}"
|
|
270
|
+
steps.add_element(ContentAST.Equation(calculation, inline=False))
|
|
271
|
+
else:
|
|
272
|
+
# Multi-output calculation
|
|
273
|
+
y_vals = sample['true_values']
|
|
274
|
+
p_vals = sample['predictions']
|
|
275
|
+
loss = self.individual_losses[i]
|
|
276
|
+
|
|
277
|
+
terms = []
|
|
278
|
+
for j, (y, p) in enumerate(zip(y_vals, p_vals)):
|
|
279
|
+
# Format the subtraction nicely to avoid double negatives
|
|
280
|
+
if p >= 0:
|
|
281
|
+
terms.append(f"({y:.2f} - {p:.2f})^2")
|
|
282
|
+
else:
|
|
283
|
+
terms.append(f"({y:.2f} - ({p:.2f}))^2")
|
|
284
|
+
|
|
285
|
+
calculation = f"L = {' + '.join(terms)} = {loss:.4f}"
|
|
286
|
+
steps.add_element(ContentAST.Equation(calculation, inline=False))
|
|
287
|
+
|
|
288
|
+
return steps
|
|
289
|
+
|
|
290
|
+
def _create_completed_table(self) -> ContentAST.Element:
|
|
291
|
+
"""Create table with all values including calculated losses."""
|
|
292
|
+
headers = ["x_0", "x_1"]
|
|
293
|
+
|
|
294
|
+
if self.num_output_vars == 1:
|
|
295
|
+
headers.extend(["y", "p", "loss"])
|
|
296
|
+
else:
|
|
297
|
+
for i in range(self.num_output_vars):
|
|
298
|
+
headers.append(f"y_{i}")
|
|
299
|
+
for i in range(self.num_output_vars):
|
|
300
|
+
headers.append(f"p_{i}")
|
|
301
|
+
headers.append("loss")
|
|
302
|
+
|
|
303
|
+
rows = []
|
|
304
|
+
for i, sample in enumerate(self.data):
|
|
305
|
+
row = []
|
|
306
|
+
|
|
307
|
+
# Input features
|
|
308
|
+
for x in sample['inputs']:
|
|
309
|
+
row.append(f"{x:.2f}")
|
|
310
|
+
|
|
311
|
+
# True values
|
|
312
|
+
if self.num_output_vars == 1:
|
|
313
|
+
row.append(f"{sample['true_values']:.2f}")
|
|
314
|
+
else:
|
|
315
|
+
for y in sample['true_values']:
|
|
316
|
+
row.append(f"{y:.2f}")
|
|
317
|
+
|
|
318
|
+
# Predictions
|
|
319
|
+
if self.num_output_vars == 1:
|
|
320
|
+
row.append(f"{sample['predictions']:.2f}")
|
|
321
|
+
else:
|
|
322
|
+
for p in sample['predictions']:
|
|
323
|
+
row.append(f"{p:.2f}")
|
|
324
|
+
|
|
325
|
+
# Calculated loss
|
|
326
|
+
row.append(f"{self.individual_losses[i]:.4f}")
|
|
327
|
+
|
|
328
|
+
rows.append(row)
|
|
329
|
+
|
|
330
|
+
return ContentAST.Table(headers=headers, data=rows)
|
|
331
|
+
|
|
332
|
+
def _create_overall_loss_explanation(self) -> ContentAST.Element:
|
|
333
|
+
"""Explain overall MSE calculation."""
|
|
334
|
+
explanation = ContentAST.Section()
|
|
335
|
+
|
|
336
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
337
|
+
"The overall MSE is the average of individual losses:"
|
|
338
|
+
]))
|
|
339
|
+
|
|
340
|
+
losses_str = " + ".join([f"{loss:.4f}" for loss in self.individual_losses])
|
|
341
|
+
calculation = f"MSE = \\frac{{{losses_str}}}{{{self.num_samples}}} = {self.overall_loss:.4f}"
|
|
342
|
+
|
|
343
|
+
explanation.add_element(ContentAST.Equation(calculation, inline=False))
|
|
344
|
+
|
|
345
|
+
return explanation
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
@QuestionRegistry.register("LossQuestion_Logistic")
|
|
349
|
+
class LossQuestion_Logistic(LossQuestion):
|
|
350
|
+
"""Binary logistic regression with log-loss."""
|
|
351
|
+
|
|
352
|
+
def _generate_data(self):
|
|
353
|
+
"""Generate binary classification data."""
|
|
354
|
+
self.data = []
|
|
355
|
+
|
|
356
|
+
for i in range(self.num_samples):
|
|
357
|
+
sample = {}
|
|
358
|
+
|
|
359
|
+
# Generate input features (rounded to 2 decimal places)
|
|
360
|
+
sample['inputs'] = [round(self.rng.uniform(-100, 100), 2) for _ in range(self.num_input_features)]
|
|
361
|
+
|
|
362
|
+
# Generate binary true values (0 or 1)
|
|
363
|
+
sample['true_values'] = self.rng.choice([0, 1])
|
|
364
|
+
|
|
365
|
+
# Generate predicted probabilities (between 0 and 1, rounded to 3 decimal places)
|
|
366
|
+
sample['predictions'] = round(self.rng.uniform(0.1, 0.9), 3) # Avoid extreme values
|
|
367
|
+
|
|
368
|
+
self.data.append(sample)
|
|
369
|
+
|
|
370
|
+
def _calculate_losses(self):
|
|
371
|
+
"""Calculate log-loss for each sample and overall."""
|
|
372
|
+
self.individual_losses = []
|
|
373
|
+
total_loss = 0.0
|
|
374
|
+
|
|
375
|
+
for sample in self.data:
|
|
376
|
+
y = sample['true_values']
|
|
377
|
+
p = sample['predictions']
|
|
378
|
+
|
|
379
|
+
# Log-loss: -[y * log(p) + (1-y) * log(1-p)]
|
|
380
|
+
if y == 1:
|
|
381
|
+
loss = -math.log(p)
|
|
382
|
+
else:
|
|
383
|
+
loss = -math.log(1 - p)
|
|
384
|
+
|
|
385
|
+
self.individual_losses.append(loss)
|
|
386
|
+
total_loss += loss
|
|
387
|
+
|
|
388
|
+
# Overall log-loss is average of individual losses
|
|
389
|
+
self.overall_loss = total_loss / self.num_samples
|
|
390
|
+
|
|
391
|
+
def _get_loss_function_name(self) -> str:
|
|
392
|
+
return "Log-Loss (Binary Cross-Entropy)"
|
|
393
|
+
|
|
394
|
+
def _get_loss_function_short_name(self) -> str:
|
|
395
|
+
return "log-loss"
|
|
396
|
+
|
|
397
|
+
def _get_loss_function_formula(self) -> str:
|
|
398
|
+
return r"L(y, p) = -[y \ln(p) + (1-y) \ln(1-p)]"
|
|
399
|
+
|
|
400
|
+
def _create_data_table(self) -> ContentAST.Element:
|
|
401
|
+
"""Create table with features, true labels, predicted probabilities, and loss fields."""
|
|
402
|
+
headers = ["x", "y", "p", "loss"]
|
|
403
|
+
|
|
404
|
+
rows = []
|
|
405
|
+
for i, sample in enumerate(self.data):
|
|
406
|
+
row = {}
|
|
407
|
+
|
|
408
|
+
# Input features as vector
|
|
409
|
+
x_vector = "[" + ", ".join([f"{x:.2f}" for x in sample['inputs']]) + "]"
|
|
410
|
+
row["x"] = x_vector
|
|
411
|
+
|
|
412
|
+
# True label
|
|
413
|
+
row["y"] = str(sample['true_values'])
|
|
414
|
+
|
|
415
|
+
# Predicted probability
|
|
416
|
+
row["p"] = f"{sample['predictions']:.3f}"
|
|
417
|
+
|
|
418
|
+
# Loss answer field
|
|
419
|
+
row["loss"] = self.answers[f"loss_{i}"]
|
|
420
|
+
|
|
421
|
+
rows.append(row)
|
|
422
|
+
|
|
423
|
+
return self.create_answer_table(headers, rows, answer_columns=["loss"])
|
|
424
|
+
|
|
425
|
+
def _create_calculation_steps(self) -> ContentAST.Element:
|
|
426
|
+
"""Show step-by-step log-loss calculations."""
|
|
427
|
+
steps = ContentAST.Section()
|
|
428
|
+
|
|
429
|
+
for i, sample in enumerate(self.data):
|
|
430
|
+
y = sample['true_values']
|
|
431
|
+
p = sample['predictions']
|
|
432
|
+
loss = self.individual_losses[i]
|
|
433
|
+
|
|
434
|
+
steps.add_element(ContentAST.Paragraph([f"Sample {i+1}:"]))
|
|
435
|
+
|
|
436
|
+
if y == 1:
|
|
437
|
+
calculation = f"L = -[1 \\cdot \\ln({p:.3f}) + 0 \\cdot \\ln(1-{p:.3f})] = -\\ln({p:.3f}) = {loss:.4f}"
|
|
438
|
+
else:
|
|
439
|
+
calculation = f"L = -[0 \\cdot \\ln({p:.3f}) + 1 \\cdot \\ln(1-{p:.3f})] = -\\ln({1-p:.3f}) = {loss:.4f}"
|
|
440
|
+
|
|
441
|
+
steps.add_element(ContentAST.Equation(calculation, inline=False))
|
|
442
|
+
|
|
443
|
+
return steps
|
|
444
|
+
|
|
445
|
+
def _create_completed_table(self) -> ContentAST.Element:
|
|
446
|
+
"""Create table with all values including calculated losses."""
|
|
447
|
+
headers = ["x_0", "x_1", "y", "p", "loss"]
|
|
448
|
+
|
|
449
|
+
rows = []
|
|
450
|
+
for i, sample in enumerate(self.data):
|
|
451
|
+
row = []
|
|
452
|
+
|
|
453
|
+
# Input features
|
|
454
|
+
for x in sample['inputs']:
|
|
455
|
+
row.append(f"{x:.2f}")
|
|
456
|
+
|
|
457
|
+
# True label
|
|
458
|
+
row.append(str(sample['true_values']))
|
|
459
|
+
|
|
460
|
+
# Predicted probability
|
|
461
|
+
row.append(f"{sample['predictions']:.3f}")
|
|
462
|
+
|
|
463
|
+
# Calculated loss
|
|
464
|
+
row.append(f"{self.individual_losses[i]:.4f}")
|
|
465
|
+
|
|
466
|
+
rows.append(row)
|
|
467
|
+
|
|
468
|
+
return ContentAST.Table(headers=headers, data=rows)
|
|
469
|
+
|
|
470
|
+
def _create_overall_loss_explanation(self) -> ContentAST.Element:
|
|
471
|
+
"""Explain overall log-loss calculation."""
|
|
472
|
+
explanation = ContentAST.Section()
|
|
473
|
+
|
|
474
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
475
|
+
"The overall log-loss is the average of individual losses:"
|
|
476
|
+
]))
|
|
477
|
+
|
|
478
|
+
losses_str = " + ".join([f"{loss:.4f}" for loss in self.individual_losses])
|
|
479
|
+
calculation = f"\\text{{Log-Loss}} = \\frac{{{losses_str}}}{{{self.num_samples}}} = {self.overall_loss:.4f}"
|
|
480
|
+
|
|
481
|
+
explanation.add_element(ContentAST.Equation(calculation, inline=False))
|
|
482
|
+
|
|
483
|
+
return explanation
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
@QuestionRegistry.register("LossQuestion_MulticlassLogistic")
|
|
487
|
+
class LossQuestion_MulticlassLogistic(LossQuestion):
|
|
488
|
+
"""Multi-class logistic regression with cross-entropy loss."""
|
|
489
|
+
|
|
490
|
+
def __init__(self, *args, **kwargs):
|
|
491
|
+
self.num_classes = kwargs.get("num_classes", 3)
|
|
492
|
+
self.num_classes = max(3, min(5, self.num_classes)) # Constrain to 3-5 classes
|
|
493
|
+
super().__init__(*args, **kwargs)
|
|
494
|
+
|
|
495
|
+
def _generate_data(self):
|
|
496
|
+
"""Generate multi-class classification data."""
|
|
497
|
+
self.data = []
|
|
498
|
+
|
|
499
|
+
for i in range(self.num_samples):
|
|
500
|
+
sample = {}
|
|
501
|
+
|
|
502
|
+
# Generate input features (rounded to 2 decimal places)
|
|
503
|
+
sample['inputs'] = [round(self.rng.uniform(-100, 100), 2) for _ in range(self.num_input_features)]
|
|
504
|
+
|
|
505
|
+
# Generate true class (one-hot encoded) - ensure exactly one class is 1
|
|
506
|
+
true_class_idx = self.rng.randint(0, self.num_classes - 1)
|
|
507
|
+
sample['true_values'] = [0] * self.num_classes # Start with all zeros
|
|
508
|
+
sample['true_values'][true_class_idx] = 1 # Set exactly one to 1
|
|
509
|
+
|
|
510
|
+
# Generate predicted probabilities (softmax-like, sum to 1, rounded to 3 decimal places)
|
|
511
|
+
raw_probs = [self.rng.uniform(0.1, 2.0) for _ in range(self.num_classes)]
|
|
512
|
+
prob_sum = sum(raw_probs)
|
|
513
|
+
sample['predictions'] = [round(p / prob_sum, 3) for p in raw_probs]
|
|
514
|
+
|
|
515
|
+
self.data.append(sample)
|
|
516
|
+
|
|
517
|
+
def _calculate_losses(self):
|
|
518
|
+
"""Calculate cross-entropy loss for each sample and overall."""
|
|
519
|
+
self.individual_losses = []
|
|
520
|
+
total_loss = 0.0
|
|
521
|
+
|
|
522
|
+
for sample in self.data:
|
|
523
|
+
y_vec = sample['true_values']
|
|
524
|
+
p_vec = sample['predictions']
|
|
525
|
+
|
|
526
|
+
# Cross-entropy: -sum(y_i * log(p_i))
|
|
527
|
+
loss = -sum(y * math.log(max(p, 1e-15)) for y, p in zip(y_vec, p_vec) if y > 0)
|
|
528
|
+
|
|
529
|
+
self.individual_losses.append(loss)
|
|
530
|
+
total_loss += loss
|
|
531
|
+
|
|
532
|
+
# Overall cross-entropy is average of individual losses
|
|
533
|
+
self.overall_loss = total_loss / self.num_samples
|
|
534
|
+
|
|
535
|
+
def _get_loss_function_name(self) -> str:
|
|
536
|
+
return "Cross-Entropy Loss"
|
|
537
|
+
|
|
538
|
+
def _get_loss_function_short_name(self) -> str:
|
|
539
|
+
return "cross-entropy loss"
|
|
540
|
+
|
|
541
|
+
def _get_loss_function_formula(self) -> str:
|
|
542
|
+
return r"L(\mathbf{y}, \mathbf{p}) = -\sum_{i=1}^{K} y_i \ln(p_i)"
|
|
543
|
+
|
|
544
|
+
def _create_data_table(self) -> ContentAST.Element:
|
|
545
|
+
"""Create table with features, true class vectors, predicted probabilities, and loss fields."""
|
|
546
|
+
headers = ["x", "y", "p", "loss"]
|
|
547
|
+
|
|
548
|
+
rows = []
|
|
549
|
+
for i, sample in enumerate(self.data):
|
|
550
|
+
row = {}
|
|
551
|
+
|
|
552
|
+
# Input features as vector
|
|
553
|
+
x_vector = "[" + ", ".join([f"{x:.2f}" for x in sample['inputs']]) + "]"
|
|
554
|
+
row["x"] = x_vector
|
|
555
|
+
|
|
556
|
+
# True values (one-hot vector)
|
|
557
|
+
y_vector = "[" + ", ".join([str(y) for y in sample['true_values']]) + "]"
|
|
558
|
+
row["y"] = y_vector
|
|
559
|
+
|
|
560
|
+
# Predicted probabilities (vector)
|
|
561
|
+
p_vector = "[" + ", ".join([f"{p:.3f}" for p in sample['predictions']]) + "]"
|
|
562
|
+
row["p"] = p_vector
|
|
563
|
+
|
|
564
|
+
# Loss answer field
|
|
565
|
+
row["loss"] = self.answers[f"loss_{i}"]
|
|
566
|
+
|
|
567
|
+
rows.append(row)
|
|
568
|
+
|
|
569
|
+
return self.create_answer_table(headers, rows, answer_columns=["loss"])
|
|
570
|
+
|
|
571
|
+
def _create_calculation_steps(self) -> ContentAST.Element:
|
|
572
|
+
"""Show step-by-step cross-entropy calculations."""
|
|
573
|
+
steps = ContentAST.Section()
|
|
574
|
+
|
|
575
|
+
for i, sample in enumerate(self.data):
|
|
576
|
+
y_vec = sample['true_values']
|
|
577
|
+
p_vec = sample['predictions']
|
|
578
|
+
loss = self.individual_losses[i]
|
|
579
|
+
|
|
580
|
+
steps.add_element(ContentAST.Paragraph([f"Sample {i+1}:"]))
|
|
581
|
+
|
|
582
|
+
# Show vector dot product calculation
|
|
583
|
+
y_str = "[" + ", ".join([str(y) for y in y_vec]) + "]"
|
|
584
|
+
p_str = "[" + ", ".join([f"{p:.3f}" for p in p_vec]) + "]"
|
|
585
|
+
|
|
586
|
+
steps.add_element(ContentAST.Paragraph([f"\\mathbf{{y}} = {y_str}, \\mathbf{{p}} = {p_str}"]))
|
|
587
|
+
|
|
588
|
+
# Find the true class (where y_i = 1)
|
|
589
|
+
try:
|
|
590
|
+
true_class_idx = y_vec.index(1)
|
|
591
|
+
p_true = p_vec[true_class_idx]
|
|
592
|
+
|
|
593
|
+
# Show the vector multiplication more explicitly
|
|
594
|
+
terms = []
|
|
595
|
+
for j, (y, p) in enumerate(zip(y_vec, p_vec)):
|
|
596
|
+
if y == 1:
|
|
597
|
+
terms.append(f"{y} \\cdot \\ln({p:.3f})")
|
|
598
|
+
else:
|
|
599
|
+
terms.append(f"{y} \\cdot \\ln({p:.3f})")
|
|
600
|
+
|
|
601
|
+
calculation = f"L = -\\mathbf{{y}} \\cdot \\ln(\\mathbf{{p}}) = -({' + '.join(terms)}) = -{y_vec[true_class_idx]} \\cdot \\ln({p_true:.3f}) = {loss:.4f}"
|
|
602
|
+
except ValueError:
|
|
603
|
+
# Fallback in case no class is set to 1 (shouldn't happen, but safety check)
|
|
604
|
+
calculation = f"L = -\\mathbf{{y}} \\cdot \\ln(\\mathbf{{p}}) = {loss:.4f}"
|
|
605
|
+
|
|
606
|
+
steps.add_element(ContentAST.Equation(calculation, inline=False))
|
|
607
|
+
|
|
608
|
+
return steps
|
|
609
|
+
|
|
610
|
+
def _create_completed_table(self) -> ContentAST.Element:
|
|
611
|
+
"""Create table with all values including calculated losses."""
|
|
612
|
+
headers = ["x_0", "x_1", "y", "p", "loss"]
|
|
613
|
+
|
|
614
|
+
rows = []
|
|
615
|
+
for i, sample in enumerate(self.data):
|
|
616
|
+
row = []
|
|
617
|
+
|
|
618
|
+
# Input features
|
|
619
|
+
for x in sample['inputs']:
|
|
620
|
+
row.append(f"{x:.2f}")
|
|
621
|
+
|
|
622
|
+
# True values (one-hot vector)
|
|
623
|
+
y_vector = "[" + ", ".join([str(y) for y in sample['true_values']]) + "]"
|
|
624
|
+
row.append(y_vector)
|
|
625
|
+
|
|
626
|
+
# Predicted probabilities (vector)
|
|
627
|
+
p_vector = "[" + ", ".join([f"{p:.3f}" for p in sample['predictions']]) + "]"
|
|
628
|
+
row.append(p_vector)
|
|
629
|
+
|
|
630
|
+
# Calculated loss
|
|
631
|
+
row.append(f"{self.individual_losses[i]:.4f}")
|
|
632
|
+
|
|
633
|
+
rows.append(row)
|
|
634
|
+
|
|
635
|
+
return ContentAST.Table(headers=headers, data=rows)
|
|
636
|
+
|
|
637
|
+
def _create_overall_loss_explanation(self) -> ContentAST.Element:
|
|
638
|
+
"""Explain overall cross-entropy loss calculation."""
|
|
639
|
+
explanation = ContentAST.Section()
|
|
640
|
+
|
|
641
|
+
explanation.add_element(ContentAST.Paragraph([
|
|
642
|
+
"The overall cross-entropy loss is the average of individual losses:"
|
|
643
|
+
]))
|
|
644
|
+
|
|
645
|
+
losses_str = " + ".join([f"{loss:.4f}" for loss in self.individual_losses])
|
|
646
|
+
calculation = f"\\text{{Cross-Entropy}} = \\frac{{{losses_str}}}{{{self.num_samples}}} = {self.overall_loss:.4f}"
|
|
647
|
+
|
|
648
|
+
explanation.add_element(ContentAST.Equation(calculation, inline=False))
|
|
649
|
+
|
|
650
|
+
return explanation
|