inspect-ai 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. inspect_ai/__init__.py +1 -0
  2. inspect_ai/_cli/common.py +1 -1
  3. inspect_ai/_cli/trace.py +33 -20
  4. inspect_ai/_display/core/active.py +1 -1
  5. inspect_ai/_display/core/display.py +1 -1
  6. inspect_ai/_display/core/footer.py +1 -1
  7. inspect_ai/_display/core/panel.py +1 -1
  8. inspect_ai/_display/core/progress.py +0 -6
  9. inspect_ai/_display/core/rich.py +1 -1
  10. inspect_ai/_display/rich/display.py +2 -2
  11. inspect_ai/_display/textual/app.py +15 -17
  12. inspect_ai/_display/textual/widgets/clock.py +3 -3
  13. inspect_ai/_display/textual/widgets/samples.py +6 -13
  14. inspect_ai/_eval/context.py +9 -1
  15. inspect_ai/_eval/run.py +16 -11
  16. inspect_ai/_eval/score.py +4 -10
  17. inspect_ai/_eval/task/results.py +5 -4
  18. inspect_ai/_eval/task/run.py +6 -12
  19. inspect_ai/_eval/task/task.py +10 -0
  20. inspect_ai/_util/ansi.py +31 -0
  21. inspect_ai/_util/datetime.py +1 -1
  22. inspect_ai/_util/deprecation.py +1 -1
  23. inspect_ai/_util/format.py +7 -0
  24. inspect_ai/_util/json.py +11 -1
  25. inspect_ai/_util/logger.py +14 -13
  26. inspect_ai/_util/throttle.py +10 -1
  27. inspect_ai/_util/trace.py +79 -47
  28. inspect_ai/_util/transcript.py +37 -4
  29. inspect_ai/_util/vscode.py +51 -0
  30. inspect_ai/_view/notify.py +2 -1
  31. inspect_ai/_view/www/.prettierrc.js +12 -0
  32. inspect_ai/_view/www/App.css +22 -1
  33. inspect_ai/_view/www/dist/assets/index.css +2374 -2
  34. inspect_ai/_view/www/dist/assets/index.js +29752 -24492
  35. inspect_ai/_view/www/log-schema.json +262 -215
  36. inspect_ai/_view/www/package.json +1 -0
  37. inspect_ai/_view/www/src/App.mjs +19 -9
  38. inspect_ai/_view/www/src/Types.mjs +0 -1
  39. inspect_ai/_view/www/src/api/Types.mjs +15 -4
  40. inspect_ai/_view/www/src/api/api-http.mjs +2 -0
  41. inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
  42. inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
  43. inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
  44. inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
  45. inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
  46. inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
  47. inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
  48. inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
  49. inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
  50. inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
  51. inspect_ai/_view/www/src/components/Tools.mjs +28 -5
  52. inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
  53. inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
  54. inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
  55. inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
  56. inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
  57. inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
  58. inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
  59. inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
  60. inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
  61. inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
  62. inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
  63. inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
  64. inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
  65. inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
  66. inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
  67. inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
  68. inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
  69. inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
  70. inspect_ai/_view/www/src/types/log.d.ts +28 -20
  71. inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
  72. inspect_ai/_view/www/yarn.lock +44 -0
  73. inspect_ai/approval/_apply.py +4 -0
  74. inspect_ai/approval/_human/panel.py +5 -8
  75. inspect_ai/dataset/_dataset.py +51 -10
  76. inspect_ai/dataset/_util.py +31 -3
  77. inspect_ai/log/__init__.py +2 -0
  78. inspect_ai/log/_log.py +30 -2
  79. inspect_ai/log/_recorders/eval.py +2 -0
  80. inspect_ai/model/_call_tools.py +31 -7
  81. inspect_ai/model/_chat_message.py +3 -0
  82. inspect_ai/model/_model.py +42 -1
  83. inspect_ai/model/_providers/anthropic.py +4 -0
  84. inspect_ai/model/_providers/google.py +24 -6
  85. inspect_ai/model/_providers/openai.py +17 -3
  86. inspect_ai/model/_providers/openai_o1.py +10 -12
  87. inspect_ai/model/_render.py +9 -2
  88. inspect_ai/scorer/_metric.py +12 -1
  89. inspect_ai/solver/__init__.py +2 -0
  90. inspect_ai/solver/_human_agent/agent.py +83 -0
  91. inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
  92. inspect_ai/solver/_human_agent/commands/clock.py +70 -0
  93. inspect_ai/solver/_human_agent/commands/command.py +59 -0
  94. inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
  95. inspect_ai/solver/_human_agent/commands/note.py +42 -0
  96. inspect_ai/solver/_human_agent/commands/score.py +80 -0
  97. inspect_ai/solver/_human_agent/commands/status.py +62 -0
  98. inspect_ai/solver/_human_agent/commands/submit.py +151 -0
  99. inspect_ai/solver/_human_agent/install.py +222 -0
  100. inspect_ai/solver/_human_agent/panel.py +252 -0
  101. inspect_ai/solver/_human_agent/service.py +45 -0
  102. inspect_ai/solver/_human_agent/state.py +55 -0
  103. inspect_ai/solver/_human_agent/view.py +24 -0
  104. inspect_ai/solver/_task_state.py +28 -2
  105. inspect_ai/tool/_tool.py +10 -2
  106. inspect_ai/tool/_tool_info.py +2 -1
  107. inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
  108. inspect_ai/tool/_tools/_web_browser/_web_browser.py +16 -13
  109. inspect_ai/util/__init__.py +12 -4
  110. inspect_ai/{_util/display.py → util/_display.py} +6 -0
  111. inspect_ai/util/_panel.py +31 -9
  112. inspect_ai/util/_sandbox/__init__.py +0 -3
  113. inspect_ai/util/_sandbox/context.py +5 -1
  114. inspect_ai/util/_sandbox/docker/compose.py +17 -13
  115. inspect_ai/util/_sandbox/docker/docker.py +9 -6
  116. inspect_ai/util/_sandbox/docker/internal.py +1 -1
  117. inspect_ai/util/_sandbox/docker/util.py +3 -2
  118. inspect_ai/util/_sandbox/environment.py +6 -5
  119. inspect_ai/util/_sandbox/local.py +1 -1
  120. inspect_ai/util/_sandbox/self_check.py +18 -18
  121. inspect_ai/util/_sandbox/service.py +22 -7
  122. inspect_ai/util/_store.py +7 -8
  123. inspect_ai/util/_store_model.py +110 -0
  124. inspect_ai/util/_subprocess.py +3 -3
  125. inspect_ai/util/_throttle.py +32 -0
  126. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
  127. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +131 -108
  128. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
  129. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
  130. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
  131. {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0
@@ -17,15 +17,26 @@ import {
17
17
  kScoreTypePassFail,
18
18
  } from "../constants.mjs";
19
19
 
20
+ /**
21
+ * Represents a utility summary of the samples that doesn't change with the selected score.
22
+ * @typedef {Object} EvalDescriptor
23
+ * @property {number} epochs - The number of epochs.
24
+ * @property {import("../api/Types.mjs").SampleSummary[]} samples - The list of sample summaries.
25
+ * @property {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
26
+ * @property {(sample: import("../api/Types.mjs").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => ScorerDescriptor} scorerDescriptor - Returns the scorer descriptor for a sample and a specified scorer.
27
+ * @property {(scoreLabel: import("../Types.mjs").ScoreLabel) => ScoreDescriptor} scoreDescriptor - Provides information about the score types and how to render them.
28
+ * @property {(sample: import("../api/Types.mjs").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => SelectedScore} score - Returns information about a score for a sample.
29
+ * @property {(sample: import("../api/Types.mjs").BasicSampleData, scorer: string) => string} scoreAnswer - Returns the answer for a sample and a specified scorer.
30
+ */
31
+
20
32
  /**
21
33
  * Represents a utility summary of the samples.
22
34
  * @typedef {Object} SamplesDescriptor
23
- * @property {ScoreDescriptor} scoreDescriptor - Provides information about the score types and how to render them.
24
- * @property {number} epochs - The number of epochs.
35
+ * @property {EvalDescriptor} evalDescriptor - The EvalDescriptor.
25
36
  * @property {MessageShape} messageShape - The normalized sizes of input, target, and answer messages.
26
- * @property {(sample: import("../api/Types.mjs").SampleSummary) => SelectedScore} selectedScore - Returns the selected score for a sample.
27
- * @property {(sample: import("../api/Types.mjs").SampleSummary, scorer: string) => ScorerDescriptor} scorer - Returns the scorer descriptor for a sample and a specified scorer.
28
- * @property {(sample: import("../api/Types.mjs").SampleSummary) => ScorerDescriptor} selectedScorer - Returns the scorer descriptor for a sample using the selected scorer.
37
+ * @property {ScoreDescriptor} selectedScoreDescriptor - Provides information about the score types and how to render them.
38
+ * @property {(sample: import("../api/Types.mjs").BasicSampleData) => SelectedScore} selectedScore - Returns the selected score for a sample.
39
+ * @property {(sample: import("../api/Types.mjs").BasicSampleData) => ScorerDescriptor} selectedScorerDescriptor - Returns the scorer descriptor for a sample using the selected scorer.
29
40
  */
30
41
 
31
42
  /**
@@ -42,13 +53,14 @@ import {
42
53
  /**
43
54
  * Provides descriptor functions for a scorer.
44
55
  * @typedef {Object} ScorerDescriptor
56
+ * @property {() => string} metadata - Function to retrieve the metadata of the score.
45
57
  * @property {() => string} explanation - Function to retrieve the explanation of the score.
46
58
  * @property {() => string} answer - Function to retrieve the answer associated with the score.
47
59
  * @property {function(): Array<{name: string, rendered: function(): any}>} scores - Function to retrieve scores with their render functions.
48
60
  */
49
61
 
50
62
  /**
51
- * Represents the selected score for a sample, including its value and render function.
63
+ * Represents a score for a sample, including its value and render function.
52
64
  * @typedef {Object} SelectedScore
53
65
  * @property {import("../types/log").Value2} value - The value of the selected score.
54
66
  * @property {function(): any} render - Function to render the selected score.
@@ -72,69 +84,48 @@ import {
72
84
  */
73
85
 
74
86
  /**
75
- * Provides a utility summary of the samples
76
- *
77
- * @param {import("../Types.mjs").ScoreLabel[]} scorers - the list of available scores
87
+ * @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
78
88
  * @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
79
89
  * @param {number} epochs - The number of epochs
80
- * @param {import("../Types.mjs").ScoreLabel} [selectedScore] - the currently selected score
81
- * @returns {SamplesDescriptor} The SamplesDescriptor
90
+ * @returns {EvalDescriptor} The EvalDescriptor
82
91
  */
83
- export const createsSamplesDescriptor = (
84
- scorers,
85
- samples,
86
- epochs,
87
- selectedScore,
88
- ) => {
92
+ export const createEvalDescriptor = (scores, samples, epochs) => {
89
93
  if (!samples) {
90
94
  return undefined;
91
95
  }
92
96
 
93
97
  /**
94
- * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score
95
- * @param {string} scorer - the scorer name
96
- * @returns {import("../types/log").Score} The Score
97
- */
98
- const score = (sample, scorer = selectedScore?.scorer) => {
99
- if (sample.scores[scorer]) {
100
- return sample.scores[scorer];
101
- } else {
102
- return undefined;
103
- }
104
- };
105
-
106
- /**
107
- * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score
98
+ * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
99
+ * @param {import("../Types.mjs").ScoreLabel} scoreLabel - the score label
108
100
  * @returns {import("../types/log").Value2} The Score
109
101
  */
110
- const scoreValue = (sample) => {
102
+ const scoreValue = (sample, scoreLabel) => {
111
103
  // no scores, no value
112
- if (Object.keys(sample.scores).length === 0 || !selectedScore) {
104
+ if (Object.keys(sample.scores).length === 0 || !scoreLabel) {
113
105
  return undefined;
114
106
  }
115
107
 
116
108
  if (
117
- selectedScore.scorer !== selectedScore.name &&
118
- sample.scores[selectedScore.scorer] &&
119
- sample.scores[selectedScore.scorer].value
109
+ scoreLabel.scorer !== scoreLabel.name &&
110
+ sample.scores[scoreLabel.scorer] &&
111
+ sample.scores[scoreLabel.scorer].value
120
112
  ) {
121
- return sample.scores[selectedScore.scorer].value[selectedScore.name];
122
- } else if (sample.scores[selectedScore.name]) {
123
- return sample.scores[selectedScore.name].value;
113
+ return sample.scores[scoreLabel.scorer].value[scoreLabel.name];
114
+ } else if (sample.scores[scoreLabel.name]) {
115
+ return sample.scores[scoreLabel.name].value;
124
116
  } else {
125
117
  return undefined;
126
118
  }
127
119
  };
128
120
 
129
- // Retrieve the answer for a sample
130
121
  /**
131
- * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score
122
+ * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
132
123
  * @param {string} scorer - the scorer name
133
124
  * @returns {string} The answer
134
125
  */
135
126
  const scoreAnswer = (sample, scorer) => {
136
127
  if (sample) {
137
- const sampleScore = score(sample, scorer);
128
+ const sampleScore = sample.scores[scorer];
138
129
  if (sampleScore && sampleScore.answer) {
139
130
  return sampleScore.answer;
140
131
  }
@@ -143,15 +134,14 @@ export const createsSamplesDescriptor = (
143
134
  }
144
135
  };
145
136
 
146
- // Retrieve the answer for a sample
147
137
  /**
148
- * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score
138
+ * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
149
139
  * @param {string} scorer - the scorer name
150
140
  * @returns {string} The explanation
151
141
  */
152
142
  const scoreExplanation = (sample, scorer) => {
153
143
  if (sample) {
154
- const sampleScore = score(sample, scorer);
144
+ const sampleScore = sample.scores[scorer];
155
145
  if (sampleScore && sampleScore.explanation) {
156
146
  return sampleScore.explanation;
157
147
  }
@@ -161,13 +151,13 @@ export const createsSamplesDescriptor = (
161
151
 
162
152
  // Retrieve the metadata for a sample
163
153
  /**
164
- * @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score
154
+ * @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
165
155
  * @param {string} scorer - the scorer name
166
156
  * @returns {Object} The explanation
167
157
  */
168
158
  const scoreMetadata = (sample, scorer) => {
169
159
  if (sample) {
170
- const sampleScore = score(sample, scorer);
160
+ const sampleScore = sample.scores[scorer];
171
161
  if (sampleScore && sampleScore.metadata) {
172
162
  return sampleScore.metadata;
173
163
  }
@@ -175,150 +165,126 @@ export const createsSamplesDescriptor = (
175
165
  return undefined;
176
166
  };
177
167
 
178
- const uniqScoreValues = [
179
- ...new Set(
180
- samples
181
- .filter((sample) => !!sample.scores)
182
- .filter((sample) => {
183
- // There is no selected scorer, so include this value
184
- if (!selectedScore) {
185
- return true;
186
- }
187
-
188
- if (selectedScore.scorer !== selectedScore.name) {
189
- return (
190
- Object.keys(sample.scores).includes(selectedScore.scorer) &&
191
- Object.keys(sample.scores[selectedScore.scorer].value).includes(
192
- selectedScore.name,
193
- )
194
- );
195
- } else {
196
- return Object.keys(sample.scores).includes(selectedScore.name);
197
- }
198
- })
199
- .map((sample) => {
200
- return scoreValue(sample);
201
- })
202
- .filter((value) => {
203
- return value !== null;
204
- }),
205
- ),
206
- ];
207
- const uniqScoreTypes = [
208
- ...new Set(uniqScoreValues.map((scoreValue) => typeof scoreValue)),
209
- ];
210
-
211
- /** @type {ScoreDescriptor} */
212
- let scoreDescriptor;
213
- for (const categorizer of scoreCategorizers) {
214
- scoreDescriptor = categorizer.describe(uniqScoreValues, uniqScoreTypes);
215
- if (scoreDescriptor) {
216
- break;
168
+ /**
169
+ * @param {import("../Types.mjs").ScoreLabel} [scoreLabel]
170
+ * @returns {string}
171
+ */
172
+ const scoreLabelKey = (scoreLabel) => {
173
+ if (!scoreLabel) {
174
+ return "No score key";
217
175
  }
218
- }
176
+ return `${scoreLabel.scorer}.${scoreLabel.name}`;
177
+ };
219
178
 
220
- // Find the total length of the value so we can compute an average
221
- const sizes = samples.reduce(
222
- (previous, current) => {
223
- const text = inputString(current.input).join(" ");
224
- const scoreText = scoreValue(current) ? String(scoreValue(current)) : "";
225
- previous[0] = Math.min(Math.max(previous[0], text.length), 300);
226
- previous[1] = Math.min(
227
- Math.max(previous[1], arrayToString(current.target).length),
228
- 300,
229
- );
230
- previous[2] = Math.min(
231
- Math.max(
232
- previous[2],
233
- scoreAnswer(current, selectedScore?.name)?.length || 0,
234
- ),
235
- 300,
236
- );
237
- previous[3] = Math.min(
238
- Math.max(previous[3], current.limit ? current.limit.length : 0),
239
- 50,
240
- );
241
- previous[4] = Math.min(
242
- Math.max(previous[4], String(current.id).length),
243
- 10,
244
- );
245
- previous[5] = Math.min(Math.max(previous[5], scoreText.length), 30);
179
+ /**
180
+ * The EvalDescriptor is memoized. Compute all descriptors now to avoid duplicate work.
181
+ * @type {Map<string, ScoreDescriptor>}
182
+ */
183
+ const scoreDescriptorMap = new Map();
184
+ for (const scoreLabel of scores) {
185
+ const uniqScoreValues = [
186
+ ...new Set(
187
+ samples
188
+ .filter((sample) => !!sample.scores)
189
+ .filter((sample) => {
190
+ // There is no selected scorer, so include this value
191
+ if (!scoreLabel) {
192
+ return true;
193
+ }
246
194
 
247
- return previous;
248
- },
249
- [0, 0, 0, 0, 0, 0],
250
- );
195
+ if (scoreLabel.scorer !== scoreLabel.name) {
196
+ return (
197
+ Object.keys(sample.scores).includes(scoreLabel.scorer) &&
198
+ Object.keys(sample.scores[scoreLabel.scorer].value).includes(
199
+ scoreLabel.name,
200
+ )
201
+ );
202
+ } else {
203
+ return Object.keys(sample.scores).includes(scoreLabel.name);
204
+ }
205
+ })
206
+ .map((sample) => {
207
+ return scoreValue(sample, scoreLabel);
208
+ })
209
+ .filter((value) => {
210
+ return value !== null;
211
+ }),
212
+ ),
213
+ ];
214
+ const uniqScoreTypes = [
215
+ ...new Set(uniqScoreValues.map((scoreValue) => typeof scoreValue)),
216
+ ];
217
+
218
+ for (const categorizer of scoreCategorizers) {
219
+ const scoreDescriptor = categorizer.describe(
220
+ uniqScoreValues,
221
+ uniqScoreTypes,
222
+ );
223
+ if (scoreDescriptor) {
224
+ scoreDescriptorMap.set(scoreLabelKey(scoreLabel), scoreDescriptor);
225
+ break;
226
+ }
227
+ }
228
+ }
251
229
 
252
- // normalize to base 1
253
- const maxSizes = {
254
- input: Math.min(sizes[0], 300),
255
- target: Math.min(sizes[1], 300),
256
- answer: Math.min(sizes[2], 300),
257
- limit: Math.min(sizes[3], 50),
258
- id: Math.min(sizes[4], 10),
259
- score: Math.min(sizes[4], 30),
260
- };
261
- const base =
262
- maxSizes.input +
263
- maxSizes.target +
264
- maxSizes.answer +
265
- maxSizes.limit +
266
- maxSizes.id +
267
- maxSizes.score || 1;
268
- const messageShape = {
269
- raw: {
270
- input: sizes[0],
271
- target: sizes[1],
272
- answer: sizes[2],
273
- limit: sizes[3],
274
- id: sizes[4],
275
- score: sizes[5],
276
- },
277
- normalized: {
278
- input: maxSizes.input / base,
279
- target: maxSizes.target / base,
280
- answer: maxSizes.answer / base,
281
- limit: maxSizes.limit / base,
282
- id: maxSizes.id / base,
283
- score: maxSizes.score / base,
284
- },
230
+ /**
231
+ * @param {import("../Types.mjs").ScoreLabel} scoreLabel
232
+ * @returns {ScoreDescriptor | undefined}
233
+ */
234
+ const scoreDescriptor = (scoreLabel) => {
235
+ return scoreDescriptorMap.get(scoreLabelKey(scoreLabel));
285
236
  };
286
237
 
287
- const scoreRendered = (sample) => {
288
- const score = scoreValue(sample);
238
+ /**
239
+ * @param {import("../api/Types.mjs").BasicSampleData} sample
240
+ * @param {import("../Types.mjs").ScoreLabel} scoreLabel
241
+ * @returns {any}
242
+ */
243
+ const scoreRendered = (sample, scoreLabel) => {
244
+ const descriptor = scoreDescriptor(scoreLabel);
245
+ const score = scoreValue(sample, scoreLabel);
289
246
  if (score === null || score === "undefined") {
290
247
  return "null";
291
- } else if (scoreDescriptor.render) {
292
- return scoreDescriptor.render(score);
248
+ } else if (descriptor && descriptor.render) {
249
+ return descriptor.render(score);
293
250
  } else {
294
251
  return score;
295
252
  }
296
253
  };
297
254
 
298
- const scorerDescriptor = (sample, scorer) => {
255
+ /**
256
+ * @param {import("../api/Types.mjs").BasicSampleData} sample
257
+ * @param {import("../Types.mjs").ScoreLabel} scoreLabel
258
+ * @returns {ScorerDescriptor}
259
+ */
260
+ const scorerDescriptor = (sample, scoreLabel) => {
299
261
  return {
300
262
  metadata: () => {
301
- return scoreMetadata(sample, scorer);
263
+ return scoreMetadata(sample, scoreLabel.scorer);
302
264
  },
303
265
  explanation: () => {
304
- return scoreExplanation(sample, scorer);
266
+ return scoreExplanation(sample, scoreLabel.scorer);
305
267
  },
306
268
  answer: () => {
307
- return scoreAnswer(sample, scorer);
269
+ return scoreAnswer(sample, scoreLabel.scorer);
308
270
  },
309
271
  scores: () => {
310
272
  if (!sample || !sample.scores) {
311
273
  return [];
312
274
  }
275
+ const myScoreDescriptor = scoreDescriptor(scoreLabel);
276
+ if (!myScoreDescriptor) {
277
+ return [];
278
+ }
313
279
 
314
280
  // Make a list of all the valid score names (this is
315
281
  // used to distinguish between dictionaries that contain
316
282
  // scores that should be treated as standlone scores and
317
283
  // dictionaries that just contain random values, which is allowed)
318
- const scoreNames = scorers.map((score) => {
284
+ const scoreNames = scores.map((score) => {
319
285
  return score.name;
320
286
  });
321
- const sampleScorer = sample.scores[scorer];
287
+ const sampleScorer = sample.scores[scoreLabel.scorer];
322
288
  const scoreVal = sampleScorer.value;
323
289
 
324
290
  if (typeof scoreVal === "object") {
@@ -338,7 +304,7 @@ export const createsSamplesDescriptor = (
338
304
  return {
339
305
  name,
340
306
  rendered: () => {
341
- return scoreDescriptor.render(scoreVal[name]);
307
+ return myScoreDescriptor.render(scoreVal[name]);
342
308
  },
343
309
  };
344
310
  });
@@ -348,9 +314,9 @@ export const createsSamplesDescriptor = (
348
314
  // we just treat it like an opaque dictionary
349
315
  return [
350
316
  {
351
- name: scorer,
317
+ name: scoreLabel.scorer,
352
318
  rendered: () => {
353
- return scoreDescriptor.render(scoreVal);
319
+ return myScoreDescriptor.render(scoreVal);
354
320
  },
355
321
  },
356
322
  ];
@@ -358,9 +324,9 @@ export const createsSamplesDescriptor = (
358
324
  } else {
359
325
  return [
360
326
  {
361
- name: scorer,
327
+ name: scoreLabel.scorer,
362
328
  rendered: () => {
363
- return scoreDescriptor.render(scoreVal);
329
+ return myScoreDescriptor.render(scoreVal);
364
330
  },
365
331
  },
366
332
  ];
@@ -369,25 +335,119 @@ export const createsSamplesDescriptor = (
369
335
  };
370
336
  };
371
337
 
338
+ /**
339
+ * @param {import("../api/Types.mjs").BasicSampleData} sample
340
+ * @param {import("../Types.mjs").ScoreLabel} scoreLabel
341
+ * @returns {SelectedScore}
342
+ */
343
+ const score = (sample, scoreLabel) => {
344
+ return {
345
+ value: scoreValue(sample, scoreLabel),
346
+ render: () => {
347
+ return scoreRendered(sample, scoreLabel);
348
+ },
349
+ };
350
+ };
351
+
372
352
  return {
373
- scoreDescriptor,
374
353
  epochs,
375
- messageShape,
376
- selectedScore: (sample) => {
377
- return {
378
- value: scoreValue(sample),
379
- render: () => {
380
- return scoreRendered(sample);
381
- },
382
- };
354
+ samples,
355
+ scores,
356
+ scorerDescriptor,
357
+ scoreDescriptor,
358
+ score,
359
+ scoreAnswer,
360
+ };
361
+ };
362
+
363
+ /**
364
+ * Provides a utility summary of the samples
365
+ *
366
+ * @param {EvalDescriptor} evalDescriptor - The EvalDescriptor.
367
+ * @param {import("../Types.mjs").ScoreLabel} selectedScore - Selected score.
368
+ * @returns {SamplesDescriptor} - The SamplesDescriptor.
369
+ */
370
+ export const createSamplesDescriptor = (evalDescriptor, selectedScore) => {
371
+ if (!evalDescriptor) {
372
+ return undefined;
373
+ }
374
+
375
+ // Find the total length of the value so we can compute an average
376
+ const sizes = evalDescriptor.samples.reduce(
377
+ (previous, current) => {
378
+ const text = inputString(current.input).join(" ");
379
+ const scoreValue = evalDescriptor.score(current, selectedScore).value;
380
+ const scoreText = scoreValue ? String(scoreValue) : "";
381
+ previous[0] = Math.min(Math.max(previous[0], text.length), 300);
382
+ previous[1] = Math.min(
383
+ Math.max(previous[1], arrayToString(current.target).length),
384
+ 300,
385
+ );
386
+ previous[2] = Math.min(
387
+ Math.max(
388
+ previous[2],
389
+ evalDescriptor.scoreAnswer(current, selectedScore?.name)?.length || 0,
390
+ ),
391
+ 300,
392
+ );
393
+ previous[3] = Math.min(
394
+ Math.max(previous[3], current.limit ? current.limit.length : 0),
395
+ 50,
396
+ );
397
+ previous[4] = Math.min(
398
+ Math.max(previous[4], String(current.id).length),
399
+ 10,
400
+ );
401
+ previous[5] = Math.min(Math.max(previous[5], scoreText.length), 30);
402
+
403
+ return previous;
383
404
  },
384
- scorer: (sample, scorer) => {
385
- return scorerDescriptor(sample, scorer);
405
+ [0, 0, 0, 0, 0, 0],
406
+ );
407
+
408
+ // normalize to base 1
409
+ const maxSizes = {
410
+ input: Math.min(sizes[0], 300),
411
+ target: Math.min(sizes[1], 300),
412
+ answer: Math.min(sizes[2], 300),
413
+ limit: Math.min(sizes[3], 50),
414
+ id: Math.min(sizes[4], 10),
415
+ score: Math.min(sizes[4], 30),
416
+ };
417
+ const base =
418
+ maxSizes.input +
419
+ maxSizes.target +
420
+ maxSizes.answer +
421
+ maxSizes.limit +
422
+ maxSizes.id +
423
+ maxSizes.score || 1;
424
+ const messageShape = {
425
+ raw: {
426
+ input: sizes[0],
427
+ target: sizes[1],
428
+ answer: sizes[2],
429
+ limit: sizes[3],
430
+ id: sizes[4],
431
+ score: sizes[5],
386
432
  },
387
- selectedScorer: (sample) => {
388
- return scorerDescriptor(sample, selectedScore?.scorer);
433
+ normalized: {
434
+ input: maxSizes.input / base,
435
+ target: maxSizes.target / base,
436
+ answer: maxSizes.answer / base,
437
+ limit: maxSizes.limit / base,
438
+ id: maxSizes.id / base,
439
+ score: maxSizes.score / base,
389
440
  },
390
441
  };
442
+
443
+ return {
444
+ evalDescriptor,
445
+ messageShape,
446
+ selectedScoreDescriptor: evalDescriptor.scoreDescriptor(selectedScore),
447
+ selectedScore: (sample) => evalDescriptor.score(sample, selectedScore),
448
+ selectedScorerDescriptor: (sample) =>
449
+ evalDescriptor.scorerDescriptor(sample, selectedScore),
450
+ };
391
451
  };
392
452
 
393
453
  /**
@@ -55,7 +55,9 @@ export const SamplesTab = ({
55
55
  sampleScrollPositionRef,
56
56
  setSampleScrollPosition,
57
57
  }) => {
58
+ /** @type {[ListItem[], function(ListItem[]): void]} */
58
59
  const [items, setItems] = useState([]);
60
+ /** @type {[ListItem[], function(ListItem[]): void]} */
59
61
  const [sampleItems, setSampleItems] = useState([]);
60
62
 
61
63
  const sampleListRef = useRef(/** @type {HTMLElement|null} */ (null));
@@ -287,7 +289,7 @@ const groupBySample = (samples, sampleDescriptor, order) => {
287
289
  }
288
290
  }
289
291
  });
290
- const groupCount = samples.length / sampleDescriptor.epochs;
292
+ const groupCount = samples.length / sampleDescriptor.evalDescriptor.epochs;
291
293
  const itemCount = samples.length / groupCount;
292
294
  const counter = getCounter(itemCount, groupCount, order);
293
295
  return (sample, index, previousSample) => {
@@ -328,7 +330,7 @@ const groupBySample = (samples, sampleDescriptor, order) => {
328
330
  * @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list
329
331
  */
330
332
  const groupByEpoch = (samples, sampleDescriptor, order) => {
331
- const groupCount = sampleDescriptor.epochs;
333
+ const groupCount = sampleDescriptor.evalDescriptor.epochs;
332
334
  const itemCount = samples.length / groupCount;
333
335
  const counter = getCounter(itemCount, groupCount, order);
334
336
 
@@ -30,11 +30,11 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => {
30
30
  }
31
31
  };
32
32
 
33
- switch (descriptor?.scoreDescriptor?.scoreType) {
33
+ switch (descriptor?.selectedScoreDescriptor?.scoreType) {
34
34
  case kScoreTypePassFail: {
35
35
  const options = [{ text: "All", value: "all" }];
36
36
  options.push(
37
- ...descriptor.scoreDescriptor.categories.map((cat) => {
37
+ ...descriptor.selectedScoreDescriptor.categories.map((cat) => {
38
38
  return { text: cat.text, value: cat.val };
39
39
  }),
40
40
  );
@@ -48,7 +48,7 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => {
48
48
  case kScoreTypeCategorical: {
49
49
  const options = [{ text: "All", value: "all" }];
50
50
  options.push(
51
- ...descriptor.scoreDescriptor.categories.map((cat) => {
51
+ ...descriptor.selectedScoreDescriptor.categories.map((cat) => {
52
52
  return { text: cat, value: cat };
53
53
  }),
54
54
  );
@@ -79,12 +79,12 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => {
79
79
  }
80
80
 
81
81
  case kScoreTypeObject: {
82
- if (!descriptor.scoreDescriptor.categories) {
82
+ if (!descriptor.selectedScoreDescriptor.categories) {
83
83
  return "";
84
84
  }
85
85
  const options = [{ text: "All", value: "all" }];
86
86
  options.push(
87
- ...descriptor.scoreDescriptor.categories.map((cat) => {
87
+ ...descriptor.selectedScoreDescriptor.categories.map((cat) => {
88
88
  return { text: cat.text, value: cat.value };
89
89
  }),
90
90
  );
@@ -1,6 +1,13 @@
1
1
  import { html } from "htm/preact";
2
2
  import { FontSize, TextStyle } from "../../appearance/Fonts.mjs";
3
3
 
4
+ /**
5
+ * @param {Object} props
6
+ * @param {import("../../Types.mjs").ScoreLabel[]} props.scores
7
+ * @param {import("../../Types.mjs").ScoreLabel} props.score
8
+ * @param {(score: import("../../Types.mjs").ScoreLabel) => void} props.setScore
9
+ * @returns {import("preact").JSX.Element}
10
+ */
4
11
  export const SelectScorer = ({ scores, score, setScore }) => {
5
12
  const scorers = scores.reduce((accum, scorer) => {
6
13
  if (
@@ -25,7 +25,7 @@ export const SortFilter = ({ sampleDescriptor, sort, setSort, epochs }) => {
25
25
  val: kEpochDescVal,
26
26
  });
27
27
  }
28
- if (sampleDescriptor?.scoreDescriptor?.compare) {
28
+ if (sampleDescriptor?.selectedScoreDescriptor?.compare) {
29
29
  options.push({
30
30
  label: "score asc",
31
31
  val: kScoreAscVal,
@@ -130,12 +130,12 @@ export const sortSamples = (sort, samples, samplesDescriptor) => {
130
130
  }
131
131
 
132
132
  case kScoreAscVal:
133
- return samplesDescriptor.scoreDescriptor.compare(
133
+ return samplesDescriptor.selectedScoreDescriptor.compare(
134
134
  samplesDescriptor.selectedScore(a).value,
135
135
  samplesDescriptor.selectedScore(b).value,
136
136
  );
137
137
  case kScoreDescVal:
138
- return samplesDescriptor.scoreDescriptor.compare(
138
+ return samplesDescriptor.selectedScoreDescriptor.compare(
139
139
  samplesDescriptor.selectedScore(b).value,
140
140
  samplesDescriptor.selectedScore(a).value,
141
141
  );