QuizGenerator 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- QuizGenerator/README.md +5 -0
- QuizGenerator/canvas/canvas_interface.py +6 -2
- QuizGenerator/contentast.py +33 -11
- QuizGenerator/generate.py +51 -10
- QuizGenerator/logging.yaml +55 -0
- QuizGenerator/mixins.py +6 -2
- QuizGenerator/premade_questions/basic.py +49 -7
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_calculation.py +92 -82
- QuizGenerator/premade_questions/cst463/gradient_descent/gradient_descent_questions.py +68 -45
- QuizGenerator/premade_questions/cst463/gradient_descent/loss_calculations.py +238 -162
- QuizGenerator/premade_questions/cst463/models/attention.py +0 -1
- QuizGenerator/premade_questions/cst463/models/cnns.py +0 -1
- QuizGenerator/premade_questions/cst463/models/rnns.py +0 -1
- QuizGenerator/premade_questions/cst463/models/text.py +0 -1
- QuizGenerator/premade_questions/cst463/models/weight_counting.py +20 -1
- QuizGenerator/premade_questions/cst463/neural-network-basics/neural_network_questions.py +51 -45
- QuizGenerator/premade_questions/cst463/tensorflow-intro/tensorflow_questions.py +212 -215
- QuizGenerator/qrcode_generator.py +116 -54
- QuizGenerator/question.py +168 -23
- QuizGenerator/regenerate.py +23 -9
- {quizgenerator-0.8.1.dist-info → quizgenerator-0.10.0.dist-info}/METADATA +34 -22
- {quizgenerator-0.8.1.dist-info → quizgenerator-0.10.0.dist-info}/RECORD +25 -23
- {quizgenerator-0.8.1.dist-info → quizgenerator-0.10.0.dist-info}/WHEEL +0 -0
- {quizgenerator-0.8.1.dist-info → quizgenerator-0.10.0.dist-info}/entry_points.txt +0 -0
- {quizgenerator-0.8.1.dist-info → quizgenerator-0.10.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -19,138 +19,155 @@ log = logging.getLogger(__name__)
|
|
|
19
19
|
class LossQuestion(Question, TableQuestionMixin, BodyTemplatesMixin, abc.ABC):
|
|
20
20
|
"""Base class for loss function calculation questions."""
|
|
21
21
|
|
|
22
|
+
DEFAULT_NUM_SAMPLES = 5
|
|
23
|
+
DEFAULT_NUM_INPUT_FEATURES = 2
|
|
24
|
+
DEFAULT_VECTOR_INPUTS = False
|
|
25
|
+
|
|
22
26
|
def __init__(self, *args, **kwargs):
|
|
23
27
|
kwargs["topic"] = kwargs.get("topic", Question.Topic.ML_OPTIMIZATION)
|
|
24
28
|
super().__init__(*args, **kwargs)
|
|
25
29
|
|
|
26
|
-
self.num_samples = kwargs.get("num_samples",
|
|
30
|
+
self.num_samples = kwargs.get("num_samples", self.DEFAULT_NUM_SAMPLES)
|
|
27
31
|
self.num_samples = max(3, min(10, self.num_samples)) # Constrain to 3-10 range
|
|
28
32
|
|
|
29
|
-
self.num_input_features = kwargs.get("num_input_features",
|
|
33
|
+
self.num_input_features = kwargs.get("num_input_features", self.DEFAULT_NUM_INPUT_FEATURES)
|
|
30
34
|
self.num_input_features = max(1, min(5, self.num_input_features)) # Constrain to 1-5 features
|
|
31
|
-
self.vector_inputs = kwargs.get("vector_inputs",
|
|
35
|
+
self.vector_inputs = kwargs.get("vector_inputs", self.DEFAULT_VECTOR_INPUTS) # Whether to show inputs as vectors
|
|
32
36
|
|
|
33
37
|
# Generate sample data
|
|
34
38
|
self.data = []
|
|
35
39
|
self.individual_losses = []
|
|
36
40
|
self.overall_loss = 0.0
|
|
37
41
|
|
|
38
|
-
|
|
42
|
+
@classmethod
|
|
43
|
+
def _build_context(cls, *, rng_seed=None, **kwargs):
|
|
39
44
|
"""Generate new random data and calculate losses."""
|
|
45
|
+
context = super()._build_context(rng_seed=rng_seed, **kwargs)
|
|
46
|
+
cls._populate_context(context, **kwargs)
|
|
40
47
|
# Update configurable parameters if provided
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
# Seed RNG and generate data
|
|
49
|
-
self.rng.seed(rng_seed)
|
|
50
|
-
self._generate_data()
|
|
51
|
-
self._calculate_losses()
|
|
52
|
-
|
|
53
|
-
context = dict(kwargs)
|
|
54
|
-
context["rng_seed"] = rng_seed
|
|
48
|
+
context.num_samples = max(3, min(10, kwargs.get("num_samples", cls.DEFAULT_NUM_SAMPLES)))
|
|
49
|
+
context.num_input_features = max(1, min(5, kwargs.get("num_input_features", cls.DEFAULT_NUM_INPUT_FEATURES)))
|
|
50
|
+
context.vector_inputs = kwargs.get("vector_inputs", cls.DEFAULT_VECTOR_INPUTS)
|
|
51
|
+
|
|
52
|
+
# Generate data + losses
|
|
53
|
+
cls._generate_data(context)
|
|
54
|
+
cls._calculate_losses(context)
|
|
55
55
|
return context
|
|
56
56
|
|
|
57
|
+
@classmethod
|
|
58
|
+
def _populate_context(cls, context, **kwargs):
|
|
59
|
+
"""Hook for subclasses to add required context before data generation."""
|
|
60
|
+
return context
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
57
63
|
@abc.abstractmethod
|
|
58
|
-
def _generate_data(
|
|
64
|
+
def _generate_data(cls, context):
|
|
59
65
|
"""Generate sample data appropriate for this loss function type."""
|
|
60
66
|
pass
|
|
61
67
|
|
|
68
|
+
@classmethod
|
|
62
69
|
@abc.abstractmethod
|
|
63
|
-
def _calculate_losses(
|
|
70
|
+
def _calculate_losses(cls, context):
|
|
64
71
|
"""Calculate individual and overall losses."""
|
|
65
72
|
pass
|
|
66
73
|
|
|
74
|
+
@classmethod
|
|
67
75
|
@abc.abstractmethod
|
|
68
|
-
def _get_loss_function_name(
|
|
76
|
+
def _get_loss_function_name(cls, context) -> str:
|
|
69
77
|
"""Return the name of the loss function."""
|
|
70
78
|
pass
|
|
71
79
|
|
|
80
|
+
@classmethod
|
|
72
81
|
@abc.abstractmethod
|
|
73
|
-
def _get_loss_function_formula(
|
|
82
|
+
def _get_loss_function_formula(cls, context) -> str:
|
|
74
83
|
"""Return the LaTeX formula for the loss function."""
|
|
75
84
|
pass
|
|
76
85
|
|
|
86
|
+
@classmethod
|
|
77
87
|
@abc.abstractmethod
|
|
78
|
-
def _get_loss_function_short_name(
|
|
88
|
+
def _get_loss_function_short_name(cls, context) -> str:
|
|
79
89
|
"""Return the short name of the loss function (used in question body)."""
|
|
80
90
|
pass
|
|
81
91
|
|
|
82
|
-
|
|
92
|
+
@classmethod
|
|
93
|
+
def _build_loss_answers(cls, context) -> Tuple[List[ca.Answer], ca.Answer]:
|
|
83
94
|
answers = [
|
|
84
|
-
ca.AnswerTypes.Float(
|
|
85
|
-
for i in range(
|
|
95
|
+
ca.AnswerTypes.Float(context.individual_losses[i], label=f"Sample {i + 1} loss")
|
|
96
|
+
for i in range(context.num_samples)
|
|
86
97
|
]
|
|
87
|
-
overall = ca.AnswerTypes.Float(
|
|
98
|
+
overall = ca.AnswerTypes.Float(context.overall_loss, label="Overall loss")
|
|
88
99
|
return answers, overall
|
|
89
100
|
|
|
90
|
-
|
|
101
|
+
@classmethod
|
|
102
|
+
def _build_body(cls, context) -> Tuple[ca.Element, List[ca.Answer]]:
|
|
91
103
|
"""Build question body and collect answers."""
|
|
92
104
|
body = ca.Section()
|
|
93
105
|
answers = []
|
|
94
106
|
|
|
95
107
|
# Question description
|
|
96
108
|
body.add_element(ca.Paragraph([
|
|
97
|
-
f"Given the dataset below, calculate the {
|
|
98
|
-
f"and the overall {
|
|
109
|
+
f"Given the dataset below, calculate the {cls._get_loss_function_short_name(context)} for each sample "
|
|
110
|
+
f"and the overall {cls._get_loss_function_short_name(context)}."
|
|
99
111
|
]))
|
|
100
112
|
|
|
101
113
|
# Data table (contains individual loss answers)
|
|
102
|
-
loss_answers, overall_answer =
|
|
103
|
-
body.add_element(
|
|
114
|
+
loss_answers, overall_answer = cls._build_loss_answers(context)
|
|
115
|
+
body.add_element(cls._create_data_table(context, loss_answers))
|
|
104
116
|
answers.extend(loss_answers)
|
|
105
117
|
|
|
106
118
|
# Overall loss question
|
|
107
119
|
body.add_element(ca.Paragraph([
|
|
108
|
-
f"Overall {
|
|
120
|
+
f"Overall {cls._get_loss_function_short_name(context)}: "
|
|
109
121
|
]))
|
|
110
122
|
answers.append(overall_answer)
|
|
111
123
|
body.add_element(overall_answer)
|
|
112
124
|
|
|
113
125
|
return body, answers
|
|
114
126
|
|
|
127
|
+
@classmethod
|
|
115
128
|
@abc.abstractmethod
|
|
116
|
-
def _create_data_table(
|
|
129
|
+
def _create_data_table(cls, context, loss_answers: List[ca.Answer]) -> ca.Element:
|
|
117
130
|
"""Create the data table with answer fields."""
|
|
118
131
|
pass
|
|
119
132
|
|
|
120
|
-
|
|
133
|
+
@classmethod
|
|
134
|
+
def _build_explanation(cls, context) -> Tuple[ca.Element, List[ca.Answer]]:
|
|
121
135
|
"""Build question explanation."""
|
|
122
136
|
explanation = ca.Section()
|
|
123
137
|
|
|
124
138
|
explanation.add_element(ca.Paragraph([
|
|
125
|
-
f"To calculate the {
|
|
139
|
+
f"To calculate the {cls._get_loss_function_name(context)}, we apply the formula to each sample:"
|
|
126
140
|
]))
|
|
127
141
|
|
|
128
|
-
explanation.add_element(ca.Equation(
|
|
142
|
+
explanation.add_element(ca.Equation(cls._get_loss_function_formula(context), inline=False))
|
|
129
143
|
|
|
130
144
|
# Step-by-step calculations
|
|
131
|
-
explanation.add_element(
|
|
145
|
+
explanation.add_element(cls._create_calculation_steps(context))
|
|
132
146
|
|
|
133
147
|
# Completed table
|
|
134
148
|
explanation.add_element(ca.Paragraph(["Completed table:"]))
|
|
135
|
-
explanation.add_element(
|
|
149
|
+
explanation.add_element(cls._create_completed_table(context))
|
|
136
150
|
|
|
137
151
|
# Overall loss calculation
|
|
138
|
-
explanation.add_element(
|
|
152
|
+
explanation.add_element(cls._create_overall_loss_explanation(context))
|
|
139
153
|
|
|
140
154
|
return explanation, []
|
|
141
155
|
|
|
156
|
+
@classmethod
|
|
142
157
|
@abc.abstractmethod
|
|
143
|
-
def _create_calculation_steps(
|
|
158
|
+
def _create_calculation_steps(cls, context) -> ca.Element:
|
|
144
159
|
"""Create step-by-step calculation explanations."""
|
|
145
160
|
pass
|
|
146
161
|
|
|
162
|
+
@classmethod
|
|
147
163
|
@abc.abstractmethod
|
|
148
|
-
def _create_completed_table(
|
|
164
|
+
def _create_completed_table(cls, context) -> ca.Element:
|
|
149
165
|
"""Create the completed table with all values filled in."""
|
|
150
166
|
pass
|
|
151
167
|
|
|
168
|
+
@classmethod
|
|
152
169
|
@abc.abstractmethod
|
|
153
|
-
def _create_overall_loss_explanation(
|
|
170
|
+
def _create_overall_loss_explanation(cls, context) -> ca.Element:
|
|
154
171
|
"""Create explanation for overall loss calculation."""
|
|
155
172
|
pass
|
|
156
173
|
|
|
@@ -159,47 +176,68 @@ class LossQuestion(Question, TableQuestionMixin, BodyTemplatesMixin, abc.ABC):
|
|
|
159
176
|
class LossQuestion_Linear(LossQuestion):
|
|
160
177
|
"""Linear regression with Mean Squared Error (MSE) loss."""
|
|
161
178
|
|
|
179
|
+
DEFAULT_NUM_OUTPUT_VARS = 1
|
|
180
|
+
|
|
162
181
|
def __init__(self, *args, **kwargs):
|
|
163
|
-
self.num_output_vars = kwargs.get("num_output_vars",
|
|
182
|
+
self.num_output_vars = kwargs.get("num_output_vars", self.DEFAULT_NUM_OUTPUT_VARS)
|
|
164
183
|
self.num_output_vars = max(1, min(5, self.num_output_vars)) # Constrain to 1-5 range
|
|
165
184
|
super().__init__(*args, **kwargs)
|
|
166
185
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
self.num_output_vars = max(1, min(5, kwargs.get("num_output_vars", self.num_output_vars)))
|
|
186
|
+
@classmethod
|
|
187
|
+
def _build_context(cls, *, rng_seed=None, **kwargs):
|
|
170
188
|
return super()._build_context(rng_seed=rng_seed, **kwargs)
|
|
171
189
|
|
|
172
|
-
|
|
190
|
+
@classmethod
|
|
191
|
+
def _populate_context(cls, context, **kwargs):
|
|
192
|
+
context.num_output_vars = max(
|
|
193
|
+
1,
|
|
194
|
+
min(5, kwargs.get("num_output_vars", cls.DEFAULT_NUM_OUTPUT_VARS))
|
|
195
|
+
)
|
|
196
|
+
return context
|
|
197
|
+
|
|
198
|
+
@classmethod
|
|
199
|
+
def _generate_data(cls, context):
|
|
173
200
|
"""Generate regression data with continuous target values."""
|
|
174
|
-
|
|
201
|
+
context.data = {}
|
|
202
|
+
context["data"] = []
|
|
175
203
|
|
|
176
|
-
for
|
|
204
|
+
for _ in range(context.num_samples):
|
|
177
205
|
sample = {}
|
|
178
206
|
|
|
179
207
|
# Generate input features (rounded to 2 decimal places)
|
|
180
|
-
sample['inputs'] = [
|
|
208
|
+
sample['inputs'] = [
|
|
209
|
+
round(context.rng.uniform(-100, 100), 2)
|
|
210
|
+
for _ in range(context.num_input_features)
|
|
211
|
+
]
|
|
181
212
|
|
|
182
213
|
# Generate true values (y) - multiple outputs if specified (rounded to 2 decimal places)
|
|
183
|
-
if
|
|
184
|
-
sample['true_values'] = round(
|
|
214
|
+
if context.num_output_vars == 1:
|
|
215
|
+
sample['true_values'] = round(context.rng.uniform(-100, 100), 2)
|
|
185
216
|
else:
|
|
186
|
-
sample['true_values'] = [
|
|
217
|
+
sample['true_values'] = [
|
|
218
|
+
round(context.rng.uniform(-100, 100), 2)
|
|
219
|
+
for _ in range(context.num_output_vars)
|
|
220
|
+
]
|
|
187
221
|
|
|
188
222
|
# Generate predictions (p) - multiple outputs if specified (rounded to 2 decimal places)
|
|
189
|
-
if
|
|
190
|
-
sample['predictions'] = round(
|
|
223
|
+
if context.num_output_vars == 1:
|
|
224
|
+
sample['predictions'] = round(context.rng.uniform(-100, 100), 2)
|
|
191
225
|
else:
|
|
192
|
-
sample['predictions'] = [
|
|
226
|
+
sample['predictions'] = [
|
|
227
|
+
round(context.rng.uniform(-100, 100), 2)
|
|
228
|
+
for _ in range(context.num_output_vars)
|
|
229
|
+
]
|
|
193
230
|
|
|
194
|
-
|
|
231
|
+
context["data"].append(sample)
|
|
195
232
|
|
|
196
|
-
|
|
233
|
+
@classmethod
|
|
234
|
+
def _calculate_losses(cls, context):
|
|
197
235
|
"""Calculate MSE for each sample and overall."""
|
|
198
|
-
|
|
236
|
+
context.individual_losses = []
|
|
199
237
|
total_loss = 0.0
|
|
200
238
|
|
|
201
|
-
for sample in
|
|
202
|
-
if
|
|
239
|
+
for sample in context["data"]:
|
|
240
|
+
if context.num_output_vars == 1:
|
|
203
241
|
# Single output MSE: (y - p)^2
|
|
204
242
|
loss = (sample['true_values'] - sample['predictions']) ** 2
|
|
205
243
|
else:
|
|
@@ -209,40 +247,43 @@ class LossQuestion_Linear(LossQuestion):
|
|
|
209
247
|
for y, p in zip(sample['true_values'], sample['predictions'])
|
|
210
248
|
)
|
|
211
249
|
|
|
212
|
-
|
|
250
|
+
context.individual_losses.append(loss)
|
|
213
251
|
total_loss += loss
|
|
214
252
|
|
|
215
253
|
# Overall MSE is average of individual losses
|
|
216
|
-
|
|
254
|
+
context.overall_loss = total_loss / context.num_samples
|
|
217
255
|
|
|
218
|
-
|
|
256
|
+
@classmethod
|
|
257
|
+
def _get_loss_function_name(cls, context) -> str:
|
|
219
258
|
return "Mean Squared Error (MSE)"
|
|
220
259
|
|
|
221
|
-
|
|
260
|
+
@classmethod
|
|
261
|
+
def _get_loss_function_short_name(cls, context) -> str:
|
|
222
262
|
return "MSE"
|
|
223
263
|
|
|
224
|
-
|
|
225
|
-
|
|
264
|
+
@classmethod
|
|
265
|
+
def _get_loss_function_formula(cls, context) -> str:
|
|
266
|
+
if context.num_output_vars == 1:
|
|
226
267
|
return r"L(y, p) = (y - p)^2"
|
|
227
|
-
|
|
228
|
-
return r"L(\mathbf{y}, \mathbf{p}) = \sum_{i=1}^{k} (y_i - p_i)^2"
|
|
268
|
+
return r"L(\mathbf{y}, \mathbf{p}) = \sum_{i=1}^{k} (y_i - p_i)^2"
|
|
229
269
|
|
|
230
|
-
|
|
270
|
+
@classmethod
|
|
271
|
+
def _create_data_table(cls, context, loss_answers: List[ca.Answer]) -> ca.Element:
|
|
231
272
|
"""Create table with input features, true values, predictions, and loss fields."""
|
|
232
273
|
headers = ["x"]
|
|
233
274
|
|
|
234
|
-
if
|
|
275
|
+
if context.num_output_vars == 1:
|
|
235
276
|
headers.extend(["y", "p", "loss"])
|
|
236
277
|
else:
|
|
237
278
|
# Multiple outputs
|
|
238
|
-
for i in range(
|
|
279
|
+
for i in range(context.num_output_vars):
|
|
239
280
|
headers.append(f"y_{i}")
|
|
240
|
-
for i in range(
|
|
281
|
+
for i in range(context.num_output_vars):
|
|
241
282
|
headers.append(f"p_{i}")
|
|
242
283
|
headers.append("loss")
|
|
243
284
|
|
|
244
285
|
rows = []
|
|
245
|
-
for i, sample in enumerate(
|
|
286
|
+
for i, sample in enumerate(context["data"]):
|
|
246
287
|
row = {}
|
|
247
288
|
|
|
248
289
|
# Input features as vector
|
|
@@ -250,17 +291,17 @@ class LossQuestion_Linear(LossQuestion):
|
|
|
250
291
|
row["x"] = x_vector
|
|
251
292
|
|
|
252
293
|
# True values
|
|
253
|
-
if
|
|
294
|
+
if context.num_output_vars == 1:
|
|
254
295
|
row["y"] = f"{sample['true_values']:.2f}"
|
|
255
296
|
else:
|
|
256
|
-
for j in range(
|
|
297
|
+
for j in range(context.num_output_vars):
|
|
257
298
|
row[f"y_{j}"] = f"{sample['true_values'][j]:.2f}"
|
|
258
299
|
|
|
259
300
|
# Predictions
|
|
260
|
-
if
|
|
301
|
+
if context.num_output_vars == 1:
|
|
261
302
|
row["p"] = f"{sample['predictions']:.2f}"
|
|
262
303
|
else:
|
|
263
|
-
for j in range(
|
|
304
|
+
for j in range(context.num_output_vars):
|
|
264
305
|
row[f"p_{j}"] = f"{sample['predictions'][j]:.2f}"
|
|
265
306
|
|
|
266
307
|
# Loss answer field
|
|
@@ -268,19 +309,20 @@ class LossQuestion_Linear(LossQuestion):
|
|
|
268
309
|
|
|
269
310
|
rows.append(row)
|
|
270
311
|
|
|
271
|
-
return
|
|
312
|
+
return cls.create_answer_table(headers, rows, answer_columns=["loss"])
|
|
272
313
|
|
|
273
|
-
|
|
314
|
+
@classmethod
|
|
315
|
+
def _create_calculation_steps(cls, context) -> ca.Element:
|
|
274
316
|
"""Show step-by-step MSE calculations."""
|
|
275
317
|
steps = ca.Section()
|
|
276
318
|
|
|
277
|
-
for i, sample in enumerate(
|
|
319
|
+
for i, sample in enumerate(context["data"]):
|
|
278
320
|
steps.add_element(ca.Paragraph([f"Sample {i+1}:"]))
|
|
279
321
|
|
|
280
|
-
if
|
|
322
|
+
if context.num_output_vars == 1:
|
|
281
323
|
y = sample['true_values']
|
|
282
324
|
p = sample['predictions']
|
|
283
|
-
loss =
|
|
325
|
+
loss = context.individual_losses[i]
|
|
284
326
|
diff = y - p
|
|
285
327
|
|
|
286
328
|
# Format the subtraction nicely to avoid double negatives
|
|
@@ -293,10 +335,10 @@ class LossQuestion_Linear(LossQuestion):
|
|
|
293
335
|
# Multi-output calculation
|
|
294
336
|
y_vals = sample['true_values']
|
|
295
337
|
p_vals = sample['predictions']
|
|
296
|
-
loss =
|
|
338
|
+
loss = context.individual_losses[i]
|
|
297
339
|
|
|
298
340
|
terms = []
|
|
299
|
-
for
|
|
341
|
+
for y, p in zip(y_vals, p_vals):
|
|
300
342
|
# Format the subtraction nicely to avoid double negatives
|
|
301
343
|
if p >= 0:
|
|
302
344
|
terms.append(f"({y:.2f} - {p:.2f})^2")
|
|
@@ -308,21 +350,22 @@ class LossQuestion_Linear(LossQuestion):
|
|
|
308
350
|
|
|
309
351
|
return steps
|
|
310
352
|
|
|
311
|
-
|
|
353
|
+
@classmethod
|
|
354
|
+
def _create_completed_table(cls, context) -> ca.Element:
|
|
312
355
|
"""Create table with all values including calculated losses."""
|
|
313
356
|
headers = ["x_0", "x_1"]
|
|
314
357
|
|
|
315
|
-
if
|
|
358
|
+
if context.num_output_vars == 1:
|
|
316
359
|
headers.extend(["y", "p", "loss"])
|
|
317
360
|
else:
|
|
318
|
-
for i in range(
|
|
361
|
+
for i in range(context.num_output_vars):
|
|
319
362
|
headers.append(f"y_{i}")
|
|
320
|
-
for i in range(
|
|
363
|
+
for i in range(context.num_output_vars):
|
|
321
364
|
headers.append(f"p_{i}")
|
|
322
365
|
headers.append("loss")
|
|
323
366
|
|
|
324
367
|
rows = []
|
|
325
|
-
for i, sample in enumerate(
|
|
368
|
+
for i, sample in enumerate(context["data"]):
|
|
326
369
|
row = []
|
|
327
370
|
|
|
328
371
|
# Input features
|
|
@@ -330,27 +373,28 @@ class LossQuestion_Linear(LossQuestion):
|
|
|
330
373
|
row.append(f"{x:.2f}")
|
|
331
374
|
|
|
332
375
|
# True values
|
|
333
|
-
if
|
|
376
|
+
if context.num_output_vars == 1:
|
|
334
377
|
row.append(f"{sample['true_values']:.2f}")
|
|
335
378
|
else:
|
|
336
379
|
for y in sample['true_values']:
|
|
337
380
|
row.append(f"{y:.2f}")
|
|
338
381
|
|
|
339
382
|
# Predictions
|
|
340
|
-
if
|
|
383
|
+
if context.num_output_vars == 1:
|
|
341
384
|
row.append(f"{sample['predictions']:.2f}")
|
|
342
385
|
else:
|
|
343
386
|
for p in sample['predictions']:
|
|
344
387
|
row.append(f"{p:.2f}")
|
|
345
388
|
|
|
346
389
|
# Calculated loss
|
|
347
|
-
row.append(f"{
|
|
390
|
+
row.append(f"{context.individual_losses[i]:.4f}")
|
|
348
391
|
|
|
349
392
|
rows.append(row)
|
|
350
393
|
|
|
351
394
|
return ca.Table(headers=headers, data=rows)
|
|
352
395
|
|
|
353
|
-
|
|
396
|
+
@classmethod
|
|
397
|
+
def _create_overall_loss_explanation(cls, context) -> ca.Element:
|
|
354
398
|
"""Explain overall MSE calculation."""
|
|
355
399
|
explanation = ca.Section()
|
|
356
400
|
|
|
@@ -358,8 +402,8 @@ class LossQuestion_Linear(LossQuestion):
|
|
|
358
402
|
"The overall MSE is the average of individual losses:"
|
|
359
403
|
]))
|
|
360
404
|
|
|
361
|
-
losses_str = " + ".join([f"{loss:.4f}" for loss in
|
|
362
|
-
calculation = f"MSE = \\frac{{{losses_str}}}{{{
|
|
405
|
+
losses_str = " + ".join([f"{loss:.4f}" for loss in context.individual_losses])
|
|
406
|
+
calculation = f"MSE = \\frac{{{losses_str}}}{{{context.num_samples}}} = {context.overall_loss:.4f}"
|
|
363
407
|
|
|
364
408
|
explanation.add_element(ca.Equation(calculation, inline=False))
|
|
365
409
|
|
|
@@ -370,30 +414,36 @@ class LossQuestion_Linear(LossQuestion):
|
|
|
370
414
|
class LossQuestion_Logistic(LossQuestion):
|
|
371
415
|
"""Binary logistic regression with log-loss."""
|
|
372
416
|
|
|
373
|
-
|
|
417
|
+
@classmethod
|
|
418
|
+
def _generate_data(cls, context):
|
|
374
419
|
"""Generate binary classification data."""
|
|
375
|
-
|
|
420
|
+
context.data = {}
|
|
421
|
+
context["data"] = []
|
|
376
422
|
|
|
377
|
-
for
|
|
423
|
+
for _ in range(context.num_samples):
|
|
378
424
|
sample = {}
|
|
379
425
|
|
|
380
426
|
# Generate input features (rounded to 2 decimal places)
|
|
381
|
-
sample['inputs'] = [
|
|
427
|
+
sample['inputs'] = [
|
|
428
|
+
round(context.rng.uniform(-100, 100), 2)
|
|
429
|
+
for _ in range(context.num_input_features)
|
|
430
|
+
]
|
|
382
431
|
|
|
383
432
|
# Generate binary true values (0 or 1)
|
|
384
|
-
sample['true_values'] =
|
|
433
|
+
sample['true_values'] = context.rng.choice([0, 1])
|
|
385
434
|
|
|
386
435
|
# Generate predicted probabilities (between 0 and 1, rounded to 3 decimal places)
|
|
387
|
-
sample['predictions'] = round(
|
|
436
|
+
sample['predictions'] = round(context.rng.uniform(0.1, 0.9), 3) # Avoid extreme values
|
|
388
437
|
|
|
389
|
-
|
|
438
|
+
context["data"].append(sample)
|
|
390
439
|
|
|
391
|
-
|
|
440
|
+
@classmethod
|
|
441
|
+
def _calculate_losses(cls, context):
|
|
392
442
|
"""Calculate log-loss for each sample and overall."""
|
|
393
|
-
|
|
443
|
+
context.individual_losses = []
|
|
394
444
|
total_loss = 0.0
|
|
395
445
|
|
|
396
|
-
for sample in
|
|
446
|
+
for sample in context["data"]:
|
|
397
447
|
y = sample['true_values']
|
|
398
448
|
p = sample['predictions']
|
|
399
449
|
|
|
@@ -403,27 +453,31 @@ class LossQuestion_Logistic(LossQuestion):
|
|
|
403
453
|
else:
|
|
404
454
|
loss = -math.log(1 - p)
|
|
405
455
|
|
|
406
|
-
|
|
456
|
+
context.individual_losses.append(loss)
|
|
407
457
|
total_loss += loss
|
|
408
458
|
|
|
409
459
|
# Overall log-loss is average of individual losses
|
|
410
|
-
|
|
460
|
+
context.overall_loss = total_loss / context.num_samples
|
|
411
461
|
|
|
412
|
-
|
|
462
|
+
@classmethod
|
|
463
|
+
def _get_loss_function_name(cls, context) -> str:
|
|
413
464
|
return "Log-Loss (Binary Cross-Entropy)"
|
|
414
465
|
|
|
415
|
-
|
|
466
|
+
@classmethod
|
|
467
|
+
def _get_loss_function_short_name(cls, context) -> str:
|
|
416
468
|
return "log-loss"
|
|
417
469
|
|
|
418
|
-
|
|
470
|
+
@classmethod
|
|
471
|
+
def _get_loss_function_formula(cls, context) -> str:
|
|
419
472
|
return r"L(y, p) = -[y \ln(p) + (1-y) \ln(1-p)]"
|
|
420
473
|
|
|
421
|
-
|
|
474
|
+
@classmethod
|
|
475
|
+
def _create_data_table(cls, context, loss_answers: List[ca.Answer]) -> ca.Element:
|
|
422
476
|
"""Create table with features, true labels, predicted probabilities, and loss fields."""
|
|
423
477
|
headers = ["x", "y", "p", "loss"]
|
|
424
478
|
|
|
425
479
|
rows = []
|
|
426
|
-
for i, sample in enumerate(
|
|
480
|
+
for i, sample in enumerate(context["data"]):
|
|
427
481
|
row = {}
|
|
428
482
|
|
|
429
483
|
# Input features as vector
|
|
@@ -441,16 +495,17 @@ class LossQuestion_Logistic(LossQuestion):
|
|
|
441
495
|
|
|
442
496
|
rows.append(row)
|
|
443
497
|
|
|
444
|
-
return
|
|
498
|
+
return cls.create_answer_table(headers, rows, answer_columns=["loss"])
|
|
445
499
|
|
|
446
|
-
|
|
500
|
+
@classmethod
|
|
501
|
+
def _create_calculation_steps(cls, context) -> ca.Element:
|
|
447
502
|
"""Show step-by-step log-loss calculations."""
|
|
448
503
|
steps = ca.Section()
|
|
449
504
|
|
|
450
|
-
for i, sample in enumerate(
|
|
505
|
+
for i, sample in enumerate(context["data"]):
|
|
451
506
|
y = sample['true_values']
|
|
452
507
|
p = sample['predictions']
|
|
453
|
-
loss =
|
|
508
|
+
loss = context.individual_losses[i]
|
|
454
509
|
|
|
455
510
|
steps.add_element(ca.Paragraph([f"Sample {i+1}:"]))
|
|
456
511
|
|
|
@@ -463,12 +518,13 @@ class LossQuestion_Logistic(LossQuestion):
|
|
|
463
518
|
|
|
464
519
|
return steps
|
|
465
520
|
|
|
466
|
-
|
|
521
|
+
@classmethod
|
|
522
|
+
def _create_completed_table(cls, context) -> ca.Element:
|
|
467
523
|
"""Create table with all values including calculated losses."""
|
|
468
524
|
headers = ["x_0", "x_1", "y", "p", "loss"]
|
|
469
525
|
|
|
470
526
|
rows = []
|
|
471
|
-
for i, sample in enumerate(
|
|
527
|
+
for i, sample in enumerate(context["data"]):
|
|
472
528
|
row = []
|
|
473
529
|
|
|
474
530
|
# Input features
|
|
@@ -482,13 +538,14 @@ class LossQuestion_Logistic(LossQuestion):
|
|
|
482
538
|
row.append(f"{sample['predictions']:.3f}")
|
|
483
539
|
|
|
484
540
|
# Calculated loss
|
|
485
|
-
row.append(f"{
|
|
541
|
+
row.append(f"{context.individual_losses[i]:.4f}")
|
|
486
542
|
|
|
487
543
|
rows.append(row)
|
|
488
544
|
|
|
489
545
|
return ca.Table(headers=headers, data=rows)
|
|
490
546
|
|
|
491
|
-
|
|
547
|
+
@classmethod
|
|
548
|
+
def _create_overall_loss_explanation(cls, context) -> ca.Element:
|
|
492
549
|
"""Explain overall log-loss calculation."""
|
|
493
550
|
explanation = ca.Section()
|
|
494
551
|
|
|
@@ -496,8 +553,8 @@ class LossQuestion_Logistic(LossQuestion):
|
|
|
496
553
|
"The overall log-loss is the average of individual losses:"
|
|
497
554
|
]))
|
|
498
555
|
|
|
499
|
-
losses_str = " + ".join([f"{loss:.4f}" for loss in
|
|
500
|
-
calculation = f"\\text{{Log-Loss}} = \\frac{{{losses_str}}}{{{
|
|
556
|
+
losses_str = " + ".join([f"{loss:.4f}" for loss in context.individual_losses])
|
|
557
|
+
calculation = f"\\text{{Log-Loss}} = \\frac{{{losses_str}}}{{{context.num_samples}}} = {context.overall_loss:.4f}"
|
|
501
558
|
|
|
502
559
|
explanation.add_element(ca.Equation(calculation, inline=False))
|
|
503
560
|
|
|
@@ -508,71 +565,90 @@ class LossQuestion_Logistic(LossQuestion):
|
|
|
508
565
|
class LossQuestion_MulticlassLogistic(LossQuestion):
|
|
509
566
|
"""Multi-class logistic regression with cross-entropy loss."""
|
|
510
567
|
|
|
568
|
+
DEFAULT_NUM_CLASSES = 3
|
|
569
|
+
|
|
511
570
|
def __init__(self, *args, **kwargs):
|
|
512
|
-
self.num_classes = kwargs.get("num_classes",
|
|
571
|
+
self.num_classes = kwargs.get("num_classes", self.DEFAULT_NUM_CLASSES)
|
|
513
572
|
self.num_classes = max(3, min(5, self.num_classes)) # Constrain to 3-5 classes
|
|
514
573
|
super().__init__(*args, **kwargs)
|
|
515
574
|
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
self.num_classes = max(3, min(5, kwargs.get("num_classes", self.num_classes)))
|
|
575
|
+
@classmethod
|
|
576
|
+
def _build_context(cls, *, rng_seed=None, **kwargs):
|
|
519
577
|
return super()._build_context(rng_seed=rng_seed, **kwargs)
|
|
520
578
|
|
|
521
|
-
|
|
579
|
+
@classmethod
|
|
580
|
+
def _populate_context(cls, context, **kwargs):
|
|
581
|
+
context.num_classes = max(
|
|
582
|
+
3,
|
|
583
|
+
min(5, kwargs.get("num_classes", cls.DEFAULT_NUM_CLASSES))
|
|
584
|
+
)
|
|
585
|
+
return context
|
|
586
|
+
|
|
587
|
+
@classmethod
|
|
588
|
+
def _generate_data(cls, context):
|
|
522
589
|
"""Generate multi-class classification data."""
|
|
523
|
-
|
|
590
|
+
context.data = {}
|
|
591
|
+
context["data"] = []
|
|
524
592
|
|
|
525
|
-
for
|
|
593
|
+
for _ in range(context.num_samples):
|
|
526
594
|
sample = {}
|
|
527
595
|
|
|
528
596
|
# Generate input features (rounded to 2 decimal places)
|
|
529
|
-
sample['inputs'] = [
|
|
597
|
+
sample['inputs'] = [
|
|
598
|
+
round(context.rng.uniform(-100, 100), 2)
|
|
599
|
+
for _ in range(context.num_input_features)
|
|
600
|
+
]
|
|
530
601
|
|
|
531
602
|
# Generate true class (one-hot encoded) - ensure exactly one class is 1
|
|
532
|
-
true_class_idx =
|
|
533
|
-
sample['true_values'] = [0] *
|
|
534
|
-
sample['true_values'][true_class_idx] = 1
|
|
603
|
+
true_class_idx = context.rng.randint(0, context.num_classes - 1)
|
|
604
|
+
sample['true_values'] = [0] * context.num_classes # Start with all zeros
|
|
605
|
+
sample['true_values'][true_class_idx] = 1 # Set exactly one to 1
|
|
535
606
|
|
|
536
607
|
# Generate predicted probabilities (softmax-like, sum to 1, rounded to 3 decimal places)
|
|
537
|
-
raw_probs = [
|
|
608
|
+
raw_probs = [context.rng.uniform(0.1, 2.0) for _ in range(context.num_classes)]
|
|
538
609
|
prob_sum = sum(raw_probs)
|
|
539
610
|
sample['predictions'] = [round(p / prob_sum, 3) for p in raw_probs]
|
|
540
611
|
|
|
541
|
-
|
|
612
|
+
context["data"].append(sample)
|
|
542
613
|
|
|
543
|
-
|
|
614
|
+
@classmethod
|
|
615
|
+
def _calculate_losses(cls, context):
|
|
544
616
|
"""Calculate cross-entropy loss for each sample and overall."""
|
|
545
|
-
|
|
617
|
+
context.individual_losses = []
|
|
546
618
|
total_loss = 0.0
|
|
547
619
|
|
|
548
|
-
for sample in
|
|
620
|
+
for sample in context["data"]:
|
|
549
621
|
y_vec = sample['true_values']
|
|
550
622
|
p_vec = sample['predictions']
|
|
551
623
|
|
|
552
624
|
# Cross-entropy: -sum(y_i * log(p_i))
|
|
553
625
|
loss = -sum(y * math.log(max(p, 1e-15)) for y, p in zip(y_vec, p_vec) if y > 0)
|
|
554
626
|
|
|
555
|
-
|
|
627
|
+
context.individual_losses.append(loss)
|
|
556
628
|
total_loss += loss
|
|
557
629
|
|
|
558
630
|
# Overall cross-entropy is average of individual losses
|
|
559
|
-
|
|
631
|
+
context.overall_loss = total_loss / context.num_samples
|
|
560
632
|
|
|
561
|
-
|
|
633
|
+
@classmethod
|
|
634
|
+
def _get_loss_function_name(cls, context) -> str:
|
|
562
635
|
return "Cross-Entropy Loss"
|
|
563
636
|
|
|
564
|
-
|
|
637
|
+
@classmethod
|
|
638
|
+
def _get_loss_function_short_name(cls, context) -> str:
|
|
565
639
|
return "cross-entropy loss"
|
|
566
640
|
|
|
567
|
-
|
|
641
|
+
@classmethod
|
|
642
|
+
def _get_loss_function_formula(cls, context) -> str:
|
|
568
643
|
return r"L(\mathbf{y}, \mathbf{p}) = -\sum_{i=1}^{K} y_i \ln(p_i)"
|
|
569
644
|
|
|
570
|
-
|
|
645
|
+
@classmethod
|
|
646
|
+
def _create_data_table(cls, context, loss_answers: List[ca.Answer]) -> ca.Element:
|
|
571
647
|
"""Create table with features, true class vectors, predicted probabilities, and loss fields."""
|
|
572
648
|
headers = ["x", "y", "p", "loss"]
|
|
573
649
|
|
|
574
650
|
rows = []
|
|
575
|
-
for i, sample in enumerate(
|
|
651
|
+
for i, sample in enumerate(context["data"]):
|
|
576
652
|
row = {}
|
|
577
653
|
|
|
578
654
|
# Input features as vector
|
|
@@ -592,16 +668,17 @@ class LossQuestion_MulticlassLogistic(LossQuestion):
|
|
|
592
668
|
|
|
593
669
|
rows.append(row)
|
|
594
670
|
|
|
595
|
-
return
|
|
671
|
+
return cls.create_answer_table(headers, rows, answer_columns=["loss"])
|
|
596
672
|
|
|
597
|
-
|
|
673
|
+
@classmethod
|
|
674
|
+
def _create_calculation_steps(cls, context) -> ca.Element:
|
|
598
675
|
"""Show step-by-step cross-entropy calculations."""
|
|
599
676
|
steps = ca.Section()
|
|
600
677
|
|
|
601
|
-
for i, sample in enumerate(
|
|
678
|
+
for i, sample in enumerate(context["data"]):
|
|
602
679
|
y_vec = sample['true_values']
|
|
603
680
|
p_vec = sample['predictions']
|
|
604
|
-
loss =
|
|
681
|
+
loss = context.individual_losses[i]
|
|
605
682
|
|
|
606
683
|
steps.add_element(ca.Paragraph([f"Sample {i+1}:"]))
|
|
607
684
|
|
|
@@ -618,11 +695,8 @@ class LossQuestion_MulticlassLogistic(LossQuestion):
|
|
|
618
695
|
|
|
619
696
|
# Show the vector multiplication more explicitly
|
|
620
697
|
terms = []
|
|
621
|
-
for
|
|
622
|
-
|
|
623
|
-
terms.append(f"{y} \\cdot \\ln({p:.3f})")
|
|
624
|
-
else:
|
|
625
|
-
terms.append(f"{y} \\cdot \\ln({p:.3f})")
|
|
698
|
+
for y, p in zip(y_vec, p_vec):
|
|
699
|
+
terms.append(f"{y} \\cdot \\ln({p:.3f})")
|
|
626
700
|
|
|
627
701
|
calculation = f"L = -\\mathbf{{y}} \\cdot \\ln(\\mathbf{{p}}) = -({' + '.join(terms)}) = -{y_vec[true_class_idx]} \\cdot \\ln({p_true:.3f}) = {loss:.4f}"
|
|
628
702
|
except ValueError:
|
|
@@ -633,12 +707,13 @@ class LossQuestion_MulticlassLogistic(LossQuestion):
|
|
|
633
707
|
|
|
634
708
|
return steps
|
|
635
709
|
|
|
636
|
-
|
|
710
|
+
@classmethod
|
|
711
|
+
def _create_completed_table(cls, context) -> ca.Element:
|
|
637
712
|
"""Create table with all values including calculated losses."""
|
|
638
713
|
headers = ["x_0", "x_1", "y", "p", "loss"]
|
|
639
714
|
|
|
640
715
|
rows = []
|
|
641
|
-
for i, sample in enumerate(
|
|
716
|
+
for i, sample in enumerate(context["data"]):
|
|
642
717
|
row = []
|
|
643
718
|
|
|
644
719
|
# Input features
|
|
@@ -654,13 +729,14 @@ class LossQuestion_MulticlassLogistic(LossQuestion):
|
|
|
654
729
|
row.append(p_vector)
|
|
655
730
|
|
|
656
731
|
# Calculated loss
|
|
657
|
-
row.append(f"{
|
|
732
|
+
row.append(f"{context.individual_losses[i]:.4f}")
|
|
658
733
|
|
|
659
734
|
rows.append(row)
|
|
660
735
|
|
|
661
736
|
return ca.Table(headers=headers, data=rows)
|
|
662
737
|
|
|
663
|
-
|
|
738
|
+
@classmethod
|
|
739
|
+
def _create_overall_loss_explanation(cls, context) -> ca.Element:
|
|
664
740
|
"""Explain overall cross-entropy loss calculation."""
|
|
665
741
|
explanation = ca.Section()
|
|
666
742
|
|
|
@@ -668,8 +744,8 @@ class LossQuestion_MulticlassLogistic(LossQuestion):
|
|
|
668
744
|
"The overall cross-entropy loss is the average of individual losses:"
|
|
669
745
|
]))
|
|
670
746
|
|
|
671
|
-
losses_str = " + ".join([f"{loss:.4f}" for loss in
|
|
672
|
-
calculation = f"\\text{{Cross-Entropy}} = \\frac{{{losses_str}}}{{{
|
|
747
|
+
losses_str = " + ".join([f"{loss:.4f}" for loss in context.individual_losses])
|
|
748
|
+
calculation = f"\\text{{Cross-Entropy}} = \\frac{{{losses_str}}}{{{context.num_samples}}} = {context.overall_loss:.4f}"
|
|
673
749
|
|
|
674
750
|
explanation.add_element(ca.Equation(calculation, inline=False))
|
|
675
751
|
|