inspect-ai 0.3.55__py3-none-any.whl → 0.3.57__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inspect_ai/__init__.py +1 -0
- inspect_ai/_cli/common.py +1 -1
- inspect_ai/_cli/trace.py +33 -20
- inspect_ai/_display/core/active.py +1 -1
- inspect_ai/_display/core/display.py +1 -1
- inspect_ai/_display/core/footer.py +1 -1
- inspect_ai/_display/core/panel.py +1 -1
- inspect_ai/_display/core/progress.py +0 -6
- inspect_ai/_display/core/rich.py +1 -1
- inspect_ai/_display/rich/display.py +2 -2
- inspect_ai/_display/textual/app.py +15 -17
- inspect_ai/_display/textual/widgets/clock.py +3 -3
- inspect_ai/_display/textual/widgets/samples.py +6 -13
- inspect_ai/_eval/context.py +9 -1
- inspect_ai/_eval/run.py +16 -11
- inspect_ai/_eval/score.py +4 -10
- inspect_ai/_eval/task/results.py +5 -4
- inspect_ai/_eval/task/run.py +6 -12
- inspect_ai/_eval/task/task.py +10 -0
- inspect_ai/_util/ansi.py +31 -0
- inspect_ai/_util/datetime.py +1 -1
- inspect_ai/_util/deprecation.py +1 -1
- inspect_ai/_util/format.py +7 -0
- inspect_ai/_util/json.py +11 -1
- inspect_ai/_util/logger.py +14 -13
- inspect_ai/_util/throttle.py +10 -1
- inspect_ai/_util/trace.py +79 -47
- inspect_ai/_util/transcript.py +37 -4
- inspect_ai/_util/vscode.py +51 -0
- inspect_ai/_view/notify.py +2 -1
- inspect_ai/_view/www/.prettierrc.js +12 -0
- inspect_ai/_view/www/App.css +22 -1
- inspect_ai/_view/www/dist/assets/index.css +2374 -2
- inspect_ai/_view/www/dist/assets/index.js +29752 -24492
- inspect_ai/_view/www/log-schema.json +262 -215
- inspect_ai/_view/www/package.json +1 -0
- inspect_ai/_view/www/src/App.mjs +19 -9
- inspect_ai/_view/www/src/Types.mjs +0 -1
- inspect_ai/_view/www/src/api/Types.mjs +15 -4
- inspect_ai/_view/www/src/api/api-http.mjs +2 -0
- inspect_ai/_view/www/src/appearance/Icons.mjs +2 -0
- inspect_ai/_view/www/src/components/AsciiCinemaPlayer.mjs +74 -0
- inspect_ai/_view/www/src/components/CopyButton.mjs +0 -1
- inspect_ai/_view/www/src/components/ExpandablePanel.mjs +2 -2
- inspect_ai/_view/www/src/components/FindBand.mjs +5 -4
- inspect_ai/_view/www/src/components/HumanBaselineView.mjs +168 -0
- inspect_ai/_view/www/src/components/LargeModal.mjs +1 -1
- inspect_ai/_view/www/src/components/LightboxCarousel.mjs +217 -0
- inspect_ai/_view/www/src/components/MessageContent.mjs +1 -1
- inspect_ai/_view/www/src/components/TabSet.mjs +1 -1
- inspect_ai/_view/www/src/components/Tools.mjs +28 -5
- inspect_ai/_view/www/src/components/VirtualList.mjs +15 -17
- inspect_ai/_view/www/src/log/remoteLogFile.mjs +2 -1
- inspect_ai/_view/www/src/navbar/Navbar.mjs +44 -32
- inspect_ai/_view/www/src/samples/SampleDisplay.mjs +1 -2
- inspect_ai/_view/www/src/samples/SampleList.mjs +35 -4
- inspect_ai/_view/www/src/samples/SampleScoreView.mjs +13 -2
- inspect_ai/_view/www/src/samples/SampleScores.mjs +11 -2
- inspect_ai/_view/www/src/samples/SamplesDescriptor.mjs +238 -178
- inspect_ai/_view/www/src/samples/SamplesTab.mjs +4 -2
- inspect_ai/_view/www/src/samples/tools/SampleFilter.mjs +5 -5
- inspect_ai/_view/www/src/samples/tools/SelectScorer.mjs +7 -0
- inspect_ai/_view/www/src/samples/tools/SortFilter.mjs +3 -3
- inspect_ai/_view/www/src/samples/transcript/ModelEventView.mjs +3 -2
- inspect_ai/_view/www/src/samples/transcript/ToolEventView.mjs +1 -1
- inspect_ai/_view/www/src/samples/transcript/TranscriptView.mjs +1 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventRenderers.mjs +56 -0
- inspect_ai/_view/www/src/samples/transcript/state/StateEventView.mjs +17 -5
- inspect_ai/_view/www/src/types/asciicinema-player.d.ts +26 -0
- inspect_ai/_view/www/src/types/log.d.ts +28 -20
- inspect_ai/_view/www/src/workspace/WorkSpace.mjs +1 -1
- inspect_ai/_view/www/yarn.lock +44 -0
- inspect_ai/approval/_apply.py +4 -0
- inspect_ai/approval/_human/panel.py +5 -8
- inspect_ai/dataset/_dataset.py +51 -10
- inspect_ai/dataset/_util.py +31 -3
- inspect_ai/log/__init__.py +2 -0
- inspect_ai/log/_log.py +30 -2
- inspect_ai/log/_recorders/eval.py +2 -0
- inspect_ai/model/_call_tools.py +31 -7
- inspect_ai/model/_chat_message.py +3 -0
- inspect_ai/model/_model.py +42 -1
- inspect_ai/model/_providers/anthropic.py +4 -0
- inspect_ai/model/_providers/google.py +24 -6
- inspect_ai/model/_providers/openai.py +17 -3
- inspect_ai/model/_providers/openai_o1.py +10 -12
- inspect_ai/model/_render.py +9 -2
- inspect_ai/scorer/_metric.py +12 -1
- inspect_ai/solver/__init__.py +2 -0
- inspect_ai/solver/_human_agent/agent.py +83 -0
- inspect_ai/solver/_human_agent/commands/__init__.py +36 -0
- inspect_ai/solver/_human_agent/commands/clock.py +70 -0
- inspect_ai/solver/_human_agent/commands/command.py +59 -0
- inspect_ai/solver/_human_agent/commands/instructions.py +74 -0
- inspect_ai/solver/_human_agent/commands/note.py +42 -0
- inspect_ai/solver/_human_agent/commands/score.py +80 -0
- inspect_ai/solver/_human_agent/commands/status.py +62 -0
- inspect_ai/solver/_human_agent/commands/submit.py +151 -0
- inspect_ai/solver/_human_agent/install.py +222 -0
- inspect_ai/solver/_human_agent/panel.py +252 -0
- inspect_ai/solver/_human_agent/service.py +45 -0
- inspect_ai/solver/_human_agent/state.py +55 -0
- inspect_ai/solver/_human_agent/view.py +24 -0
- inspect_ai/solver/_task_state.py +28 -2
- inspect_ai/tool/_tool.py +10 -2
- inspect_ai/tool/_tool_info.py +2 -1
- inspect_ai/tool/_tools/_web_browser/_resources/dm_env_servicer.py +9 -9
- inspect_ai/tool/_tools/_web_browser/_web_browser.py +16 -13
- inspect_ai/util/__init__.py +12 -4
- inspect_ai/{_util/display.py → util/_display.py} +6 -0
- inspect_ai/util/_panel.py +31 -9
- inspect_ai/util/_sandbox/__init__.py +0 -3
- inspect_ai/util/_sandbox/context.py +5 -1
- inspect_ai/util/_sandbox/docker/compose.py +17 -13
- inspect_ai/util/_sandbox/docker/docker.py +9 -6
- inspect_ai/util/_sandbox/docker/internal.py +1 -1
- inspect_ai/util/_sandbox/docker/util.py +3 -2
- inspect_ai/util/_sandbox/environment.py +6 -5
- inspect_ai/util/_sandbox/local.py +1 -1
- inspect_ai/util/_sandbox/self_check.py +18 -18
- inspect_ai/util/_sandbox/service.py +22 -7
- inspect_ai/util/_store.py +7 -8
- inspect_ai/util/_store_model.py +110 -0
- inspect_ai/util/_subprocess.py +3 -3
- inspect_ai/util/_throttle.py +32 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/METADATA +3 -3
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/RECORD +131 -108
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/WHEEL +1 -1
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/LICENSE +0 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/entry_points.txt +0 -0
- {inspect_ai-0.3.55.dist-info → inspect_ai-0.3.57.dist-info}/top_level.txt +0 -0
@@ -17,15 +17,26 @@ import {
|
|
17
17
|
kScoreTypePassFail,
|
18
18
|
} from "../constants.mjs";
|
19
19
|
|
20
|
+
/**
|
21
|
+
* Represents a utility summary of the samples that doesn't change with the selected score.
|
22
|
+
* @typedef {Object} EvalDescriptor
|
23
|
+
* @property {number} epochs - The number of epochs.
|
24
|
+
* @property {import("../api/Types.mjs").SampleSummary[]} samples - The list of sample summaries.
|
25
|
+
* @property {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
|
26
|
+
* @property {(sample: import("../api/Types.mjs").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => ScorerDescriptor} scorerDescriptor - Returns the scorer descriptor for a sample and a specified scorer.
|
27
|
+
* @property {(scoreLabel: import("../Types.mjs").ScoreLabel) => ScoreDescriptor} scoreDescriptor - Provides information about the score types and how to render them.
|
28
|
+
* @property {(sample: import("../api/Types.mjs").BasicSampleData, scoreLabel: import("../Types.mjs").ScoreLabel) => SelectedScore} score - Returns information about a score for a sample.
|
29
|
+
* @property {(sample: import("../api/Types.mjs").BasicSampleData, scorer: string) => string} scoreAnswer - Returns the answer for a sample and a specified scorer.
|
30
|
+
*/
|
31
|
+
|
20
32
|
/**
|
21
33
|
* Represents a utility summary of the samples.
|
22
34
|
* @typedef {Object} SamplesDescriptor
|
23
|
-
* @property {
|
24
|
-
* @property {number} epochs - The number of epochs.
|
35
|
+
* @property {EvalDescriptor} evalDescriptor - The EvalDescriptor.
|
25
36
|
* @property {MessageShape} messageShape - The normalized sizes of input, target, and answer messages.
|
26
|
-
* @property {
|
27
|
-
* @property {(sample: import("../api/Types.mjs").
|
28
|
-
* @property {(sample: import("../api/Types.mjs").
|
37
|
+
* @property {ScoreDescriptor} selectedScoreDescriptor - Provides information about the score types and how to render them.
|
38
|
+
* @property {(sample: import("../api/Types.mjs").BasicSampleData) => SelectedScore} selectedScore - Returns the selected score for a sample.
|
39
|
+
* @property {(sample: import("../api/Types.mjs").BasicSampleData) => ScorerDescriptor} selectedScorerDescriptor - Returns the scorer descriptor for a sample using the selected scorer.
|
29
40
|
*/
|
30
41
|
|
31
42
|
/**
|
@@ -42,13 +53,14 @@ import {
|
|
42
53
|
/**
|
43
54
|
* Provides descriptor functions for a scorer.
|
44
55
|
* @typedef {Object} ScorerDescriptor
|
56
|
+
* @property {() => string} metadata - Function to retrieve the metadata of the score.
|
45
57
|
* @property {() => string} explanation - Function to retrieve the explanation of the score.
|
46
58
|
* @property {() => string} answer - Function to retrieve the answer associated with the score.
|
47
59
|
* @property {function(): Array<{name: string, rendered: function(): any}>} scores - Function to retrieve scores with their render functions.
|
48
60
|
*/
|
49
61
|
|
50
62
|
/**
|
51
|
-
* Represents
|
63
|
+
* Represents a score for a sample, including its value and render function.
|
52
64
|
* @typedef {Object} SelectedScore
|
53
65
|
* @property {import("../types/log").Value2} value - The value of the selected score.
|
54
66
|
* @property {function(): any} render - Function to render the selected score.
|
@@ -72,69 +84,48 @@ import {
|
|
72
84
|
*/
|
73
85
|
|
74
86
|
/**
|
75
|
-
*
|
76
|
-
*
|
77
|
-
* @param {import("../Types.mjs").ScoreLabel[]} scorers - the list of available scores
|
87
|
+
* @param {import("../Types.mjs").ScoreLabel[]} scores - the list of available scores
|
78
88
|
* @param {import("../api/Types.mjs").SampleSummary[]} samples - the list of sample summaries
|
79
89
|
* @param {number} epochs - The number of epochs
|
80
|
-
* @
|
81
|
-
* @returns {SamplesDescriptor} The SamplesDescriptor
|
90
|
+
* @returns {EvalDescriptor} The EvalDescriptor
|
82
91
|
*/
|
83
|
-
export const
|
84
|
-
scorers,
|
85
|
-
samples,
|
86
|
-
epochs,
|
87
|
-
selectedScore,
|
88
|
-
) => {
|
92
|
+
export const createEvalDescriptor = (scores, samples, epochs) => {
|
89
93
|
if (!samples) {
|
90
94
|
return undefined;
|
91
95
|
}
|
92
96
|
|
93
97
|
/**
|
94
|
-
* @param {import("../api/Types.mjs").
|
95
|
-
* @param {
|
96
|
-
* @returns {import("../types/log").Score} The Score
|
97
|
-
*/
|
98
|
-
const score = (sample, scorer = selectedScore?.scorer) => {
|
99
|
-
if (sample.scores[scorer]) {
|
100
|
-
return sample.scores[scorer];
|
101
|
-
} else {
|
102
|
-
return undefined;
|
103
|
-
}
|
104
|
-
};
|
105
|
-
|
106
|
-
/**
|
107
|
-
* @param {import("../api/Types.mjs").SampleSummary} sample - the currently selected score
|
98
|
+
* @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
|
99
|
+
* @param {import("../Types.mjs").ScoreLabel} scoreLabel - the score label
|
108
100
|
* @returns {import("../types/log").Value2} The Score
|
109
101
|
*/
|
110
|
-
const scoreValue = (sample) => {
|
102
|
+
const scoreValue = (sample, scoreLabel) => {
|
111
103
|
// no scores, no value
|
112
|
-
if (Object.keys(sample.scores).length === 0 || !
|
104
|
+
if (Object.keys(sample.scores).length === 0 || !scoreLabel) {
|
113
105
|
return undefined;
|
114
106
|
}
|
115
107
|
|
116
108
|
if (
|
117
|
-
|
118
|
-
sample.scores[
|
119
|
-
sample.scores[
|
109
|
+
scoreLabel.scorer !== scoreLabel.name &&
|
110
|
+
sample.scores[scoreLabel.scorer] &&
|
111
|
+
sample.scores[scoreLabel.scorer].value
|
120
112
|
) {
|
121
|
-
return sample.scores[
|
122
|
-
} else if (sample.scores[
|
123
|
-
return sample.scores[
|
113
|
+
return sample.scores[scoreLabel.scorer].value[scoreLabel.name];
|
114
|
+
} else if (sample.scores[scoreLabel.name]) {
|
115
|
+
return sample.scores[scoreLabel.name].value;
|
124
116
|
} else {
|
125
117
|
return undefined;
|
126
118
|
}
|
127
119
|
};
|
128
120
|
|
129
|
-
// Retrieve the answer for a sample
|
130
121
|
/**
|
131
|
-
* @param {import("../api/Types.mjs").
|
122
|
+
* @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
|
132
123
|
* @param {string} scorer - the scorer name
|
133
124
|
* @returns {string} The answer
|
134
125
|
*/
|
135
126
|
const scoreAnswer = (sample, scorer) => {
|
136
127
|
if (sample) {
|
137
|
-
const sampleScore =
|
128
|
+
const sampleScore = sample.scores[scorer];
|
138
129
|
if (sampleScore && sampleScore.answer) {
|
139
130
|
return sampleScore.answer;
|
140
131
|
}
|
@@ -143,15 +134,14 @@ export const createsSamplesDescriptor = (
|
|
143
134
|
}
|
144
135
|
};
|
145
136
|
|
146
|
-
// Retrieve the answer for a sample
|
147
137
|
/**
|
148
|
-
* @param {import("../api/Types.mjs").
|
138
|
+
* @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
|
149
139
|
* @param {string} scorer - the scorer name
|
150
140
|
* @returns {string} The explanation
|
151
141
|
*/
|
152
142
|
const scoreExplanation = (sample, scorer) => {
|
153
143
|
if (sample) {
|
154
|
-
const sampleScore =
|
144
|
+
const sampleScore = sample.scores[scorer];
|
155
145
|
if (sampleScore && sampleScore.explanation) {
|
156
146
|
return sampleScore.explanation;
|
157
147
|
}
|
@@ -161,13 +151,13 @@ export const createsSamplesDescriptor = (
|
|
161
151
|
|
162
152
|
// Retrieve the metadata for a sample
|
163
153
|
/**
|
164
|
-
* @param {import("../api/Types.mjs").
|
154
|
+
* @param {import("../api/Types.mjs").BasicSampleData} sample - the currently selected score
|
165
155
|
* @param {string} scorer - the scorer name
|
166
156
|
* @returns {Object} The explanation
|
167
157
|
*/
|
168
158
|
const scoreMetadata = (sample, scorer) => {
|
169
159
|
if (sample) {
|
170
|
-
const sampleScore =
|
160
|
+
const sampleScore = sample.scores[scorer];
|
171
161
|
if (sampleScore && sampleScore.metadata) {
|
172
162
|
return sampleScore.metadata;
|
173
163
|
}
|
@@ -175,150 +165,126 @@ export const createsSamplesDescriptor = (
|
|
175
165
|
return undefined;
|
176
166
|
};
|
177
167
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
return true;
|
186
|
-
}
|
187
|
-
|
188
|
-
if (selectedScore.scorer !== selectedScore.name) {
|
189
|
-
return (
|
190
|
-
Object.keys(sample.scores).includes(selectedScore.scorer) &&
|
191
|
-
Object.keys(sample.scores[selectedScore.scorer].value).includes(
|
192
|
-
selectedScore.name,
|
193
|
-
)
|
194
|
-
);
|
195
|
-
} else {
|
196
|
-
return Object.keys(sample.scores).includes(selectedScore.name);
|
197
|
-
}
|
198
|
-
})
|
199
|
-
.map((sample) => {
|
200
|
-
return scoreValue(sample);
|
201
|
-
})
|
202
|
-
.filter((value) => {
|
203
|
-
return value !== null;
|
204
|
-
}),
|
205
|
-
),
|
206
|
-
];
|
207
|
-
const uniqScoreTypes = [
|
208
|
-
...new Set(uniqScoreValues.map((scoreValue) => typeof scoreValue)),
|
209
|
-
];
|
210
|
-
|
211
|
-
/** @type {ScoreDescriptor} */
|
212
|
-
let scoreDescriptor;
|
213
|
-
for (const categorizer of scoreCategorizers) {
|
214
|
-
scoreDescriptor = categorizer.describe(uniqScoreValues, uniqScoreTypes);
|
215
|
-
if (scoreDescriptor) {
|
216
|
-
break;
|
168
|
+
/**
|
169
|
+
* @param {import("../Types.mjs").ScoreLabel} [scoreLabel]
|
170
|
+
* @returns {string}
|
171
|
+
*/
|
172
|
+
const scoreLabelKey = (scoreLabel) => {
|
173
|
+
if (!scoreLabel) {
|
174
|
+
return "No score key";
|
217
175
|
}
|
218
|
-
|
176
|
+
return `${scoreLabel.scorer}.${scoreLabel.name}`;
|
177
|
+
};
|
219
178
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
300,
|
236
|
-
);
|
237
|
-
previous[3] = Math.min(
|
238
|
-
Math.max(previous[3], current.limit ? current.limit.length : 0),
|
239
|
-
50,
|
240
|
-
);
|
241
|
-
previous[4] = Math.min(
|
242
|
-
Math.max(previous[4], String(current.id).length),
|
243
|
-
10,
|
244
|
-
);
|
245
|
-
previous[5] = Math.min(Math.max(previous[5], scoreText.length), 30);
|
179
|
+
/**
|
180
|
+
* The EvalDescriptor is memoized. Compute all descriptors now to avoid duplicate work.
|
181
|
+
* @type {Map<string, ScoreDescriptor>}
|
182
|
+
*/
|
183
|
+
const scoreDescriptorMap = new Map();
|
184
|
+
for (const scoreLabel of scores) {
|
185
|
+
const uniqScoreValues = [
|
186
|
+
...new Set(
|
187
|
+
samples
|
188
|
+
.filter((sample) => !!sample.scores)
|
189
|
+
.filter((sample) => {
|
190
|
+
// There is no selected scorer, so include this value
|
191
|
+
if (!scoreLabel) {
|
192
|
+
return true;
|
193
|
+
}
|
246
194
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
195
|
+
if (scoreLabel.scorer !== scoreLabel.name) {
|
196
|
+
return (
|
197
|
+
Object.keys(sample.scores).includes(scoreLabel.scorer) &&
|
198
|
+
Object.keys(sample.scores[scoreLabel.scorer].value).includes(
|
199
|
+
scoreLabel.name,
|
200
|
+
)
|
201
|
+
);
|
202
|
+
} else {
|
203
|
+
return Object.keys(sample.scores).includes(scoreLabel.name);
|
204
|
+
}
|
205
|
+
})
|
206
|
+
.map((sample) => {
|
207
|
+
return scoreValue(sample, scoreLabel);
|
208
|
+
})
|
209
|
+
.filter((value) => {
|
210
|
+
return value !== null;
|
211
|
+
}),
|
212
|
+
),
|
213
|
+
];
|
214
|
+
const uniqScoreTypes = [
|
215
|
+
...new Set(uniqScoreValues.map((scoreValue) => typeof scoreValue)),
|
216
|
+
];
|
217
|
+
|
218
|
+
for (const categorizer of scoreCategorizers) {
|
219
|
+
const scoreDescriptor = categorizer.describe(
|
220
|
+
uniqScoreValues,
|
221
|
+
uniqScoreTypes,
|
222
|
+
);
|
223
|
+
if (scoreDescriptor) {
|
224
|
+
scoreDescriptorMap.set(scoreLabelKey(scoreLabel), scoreDescriptor);
|
225
|
+
break;
|
226
|
+
}
|
227
|
+
}
|
228
|
+
}
|
251
229
|
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
id: Math.min(sizes[4], 10),
|
259
|
-
score: Math.min(sizes[4], 30),
|
260
|
-
};
|
261
|
-
const base =
|
262
|
-
maxSizes.input +
|
263
|
-
maxSizes.target +
|
264
|
-
maxSizes.answer +
|
265
|
-
maxSizes.limit +
|
266
|
-
maxSizes.id +
|
267
|
-
maxSizes.score || 1;
|
268
|
-
const messageShape = {
|
269
|
-
raw: {
|
270
|
-
input: sizes[0],
|
271
|
-
target: sizes[1],
|
272
|
-
answer: sizes[2],
|
273
|
-
limit: sizes[3],
|
274
|
-
id: sizes[4],
|
275
|
-
score: sizes[5],
|
276
|
-
},
|
277
|
-
normalized: {
|
278
|
-
input: maxSizes.input / base,
|
279
|
-
target: maxSizes.target / base,
|
280
|
-
answer: maxSizes.answer / base,
|
281
|
-
limit: maxSizes.limit / base,
|
282
|
-
id: maxSizes.id / base,
|
283
|
-
score: maxSizes.score / base,
|
284
|
-
},
|
230
|
+
/**
|
231
|
+
* @param {import("../Types.mjs").ScoreLabel} scoreLabel
|
232
|
+
* @returns {ScoreDescriptor | undefined}
|
233
|
+
*/
|
234
|
+
const scoreDescriptor = (scoreLabel) => {
|
235
|
+
return scoreDescriptorMap.get(scoreLabelKey(scoreLabel));
|
285
236
|
};
|
286
237
|
|
287
|
-
|
288
|
-
|
238
|
+
/**
|
239
|
+
* @param {import("../api/Types.mjs").BasicSampleData} sample
|
240
|
+
* @param {import("../Types.mjs").ScoreLabel} scoreLabel
|
241
|
+
* @returns {any}
|
242
|
+
*/
|
243
|
+
const scoreRendered = (sample, scoreLabel) => {
|
244
|
+
const descriptor = scoreDescriptor(scoreLabel);
|
245
|
+
const score = scoreValue(sample, scoreLabel);
|
289
246
|
if (score === null || score === "undefined") {
|
290
247
|
return "null";
|
291
|
-
} else if (
|
292
|
-
return
|
248
|
+
} else if (descriptor && descriptor.render) {
|
249
|
+
return descriptor.render(score);
|
293
250
|
} else {
|
294
251
|
return score;
|
295
252
|
}
|
296
253
|
};
|
297
254
|
|
298
|
-
|
255
|
+
/**
|
256
|
+
* @param {import("../api/Types.mjs").BasicSampleData} sample
|
257
|
+
* @param {import("../Types.mjs").ScoreLabel} scoreLabel
|
258
|
+
* @returns {ScorerDescriptor}
|
259
|
+
*/
|
260
|
+
const scorerDescriptor = (sample, scoreLabel) => {
|
299
261
|
return {
|
300
262
|
metadata: () => {
|
301
|
-
return scoreMetadata(sample, scorer);
|
263
|
+
return scoreMetadata(sample, scoreLabel.scorer);
|
302
264
|
},
|
303
265
|
explanation: () => {
|
304
|
-
return scoreExplanation(sample, scorer);
|
266
|
+
return scoreExplanation(sample, scoreLabel.scorer);
|
305
267
|
},
|
306
268
|
answer: () => {
|
307
|
-
return scoreAnswer(sample, scorer);
|
269
|
+
return scoreAnswer(sample, scoreLabel.scorer);
|
308
270
|
},
|
309
271
|
scores: () => {
|
310
272
|
if (!sample || !sample.scores) {
|
311
273
|
return [];
|
312
274
|
}
|
275
|
+
const myScoreDescriptor = scoreDescriptor(scoreLabel);
|
276
|
+
if (!myScoreDescriptor) {
|
277
|
+
return [];
|
278
|
+
}
|
313
279
|
|
314
280
|
// Make a list of all the valid score names (this is
|
315
281
|
// used to distinguish between dictionaries that contain
|
316
282
|
// scores that should be treated as standlone scores and
|
317
283
|
// dictionaries that just contain random values, which is allowed)
|
318
|
-
const scoreNames =
|
284
|
+
const scoreNames = scores.map((score) => {
|
319
285
|
return score.name;
|
320
286
|
});
|
321
|
-
const sampleScorer = sample.scores[scorer];
|
287
|
+
const sampleScorer = sample.scores[scoreLabel.scorer];
|
322
288
|
const scoreVal = sampleScorer.value;
|
323
289
|
|
324
290
|
if (typeof scoreVal === "object") {
|
@@ -338,7 +304,7 @@ export const createsSamplesDescriptor = (
|
|
338
304
|
return {
|
339
305
|
name,
|
340
306
|
rendered: () => {
|
341
|
-
return
|
307
|
+
return myScoreDescriptor.render(scoreVal[name]);
|
342
308
|
},
|
343
309
|
};
|
344
310
|
});
|
@@ -348,9 +314,9 @@ export const createsSamplesDescriptor = (
|
|
348
314
|
// we just treat it like an opaque dictionary
|
349
315
|
return [
|
350
316
|
{
|
351
|
-
name: scorer,
|
317
|
+
name: scoreLabel.scorer,
|
352
318
|
rendered: () => {
|
353
|
-
return
|
319
|
+
return myScoreDescriptor.render(scoreVal);
|
354
320
|
},
|
355
321
|
},
|
356
322
|
];
|
@@ -358,9 +324,9 @@ export const createsSamplesDescriptor = (
|
|
358
324
|
} else {
|
359
325
|
return [
|
360
326
|
{
|
361
|
-
name: scorer,
|
327
|
+
name: scoreLabel.scorer,
|
362
328
|
rendered: () => {
|
363
|
-
return
|
329
|
+
return myScoreDescriptor.render(scoreVal);
|
364
330
|
},
|
365
331
|
},
|
366
332
|
];
|
@@ -369,25 +335,119 @@ export const createsSamplesDescriptor = (
|
|
369
335
|
};
|
370
336
|
};
|
371
337
|
|
338
|
+
/**
|
339
|
+
* @param {import("../api/Types.mjs").BasicSampleData} sample
|
340
|
+
* @param {import("../Types.mjs").ScoreLabel} scoreLabel
|
341
|
+
* @returns {SelectedScore}
|
342
|
+
*/
|
343
|
+
const score = (sample, scoreLabel) => {
|
344
|
+
return {
|
345
|
+
value: scoreValue(sample, scoreLabel),
|
346
|
+
render: () => {
|
347
|
+
return scoreRendered(sample, scoreLabel);
|
348
|
+
},
|
349
|
+
};
|
350
|
+
};
|
351
|
+
|
372
352
|
return {
|
373
|
-
scoreDescriptor,
|
374
353
|
epochs,
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
354
|
+
samples,
|
355
|
+
scores,
|
356
|
+
scorerDescriptor,
|
357
|
+
scoreDescriptor,
|
358
|
+
score,
|
359
|
+
scoreAnswer,
|
360
|
+
};
|
361
|
+
};
|
362
|
+
|
363
|
+
/**
|
364
|
+
* Provides a utility summary of the samples
|
365
|
+
*
|
366
|
+
* @param {EvalDescriptor} evalDescriptor - The EvalDescriptor.
|
367
|
+
* @param {import("../Types.mjs").ScoreLabel} selectedScore - Selected score.
|
368
|
+
* @returns {SamplesDescriptor} - The SamplesDescriptor.
|
369
|
+
*/
|
370
|
+
export const createSamplesDescriptor = (evalDescriptor, selectedScore) => {
|
371
|
+
if (!evalDescriptor) {
|
372
|
+
return undefined;
|
373
|
+
}
|
374
|
+
|
375
|
+
// Find the total length of the value so we can compute an average
|
376
|
+
const sizes = evalDescriptor.samples.reduce(
|
377
|
+
(previous, current) => {
|
378
|
+
const text = inputString(current.input).join(" ");
|
379
|
+
const scoreValue = evalDescriptor.score(current, selectedScore).value;
|
380
|
+
const scoreText = scoreValue ? String(scoreValue) : "";
|
381
|
+
previous[0] = Math.min(Math.max(previous[0], text.length), 300);
|
382
|
+
previous[1] = Math.min(
|
383
|
+
Math.max(previous[1], arrayToString(current.target).length),
|
384
|
+
300,
|
385
|
+
);
|
386
|
+
previous[2] = Math.min(
|
387
|
+
Math.max(
|
388
|
+
previous[2],
|
389
|
+
evalDescriptor.scoreAnswer(current, selectedScore?.name)?.length || 0,
|
390
|
+
),
|
391
|
+
300,
|
392
|
+
);
|
393
|
+
previous[3] = Math.min(
|
394
|
+
Math.max(previous[3], current.limit ? current.limit.length : 0),
|
395
|
+
50,
|
396
|
+
);
|
397
|
+
previous[4] = Math.min(
|
398
|
+
Math.max(previous[4], String(current.id).length),
|
399
|
+
10,
|
400
|
+
);
|
401
|
+
previous[5] = Math.min(Math.max(previous[5], scoreText.length), 30);
|
402
|
+
|
403
|
+
return previous;
|
383
404
|
},
|
384
|
-
|
385
|
-
|
405
|
+
[0, 0, 0, 0, 0, 0],
|
406
|
+
);
|
407
|
+
|
408
|
+
// normalize to base 1
|
409
|
+
const maxSizes = {
|
410
|
+
input: Math.min(sizes[0], 300),
|
411
|
+
target: Math.min(sizes[1], 300),
|
412
|
+
answer: Math.min(sizes[2], 300),
|
413
|
+
limit: Math.min(sizes[3], 50),
|
414
|
+
id: Math.min(sizes[4], 10),
|
415
|
+
score: Math.min(sizes[4], 30),
|
416
|
+
};
|
417
|
+
const base =
|
418
|
+
maxSizes.input +
|
419
|
+
maxSizes.target +
|
420
|
+
maxSizes.answer +
|
421
|
+
maxSizes.limit +
|
422
|
+
maxSizes.id +
|
423
|
+
maxSizes.score || 1;
|
424
|
+
const messageShape = {
|
425
|
+
raw: {
|
426
|
+
input: sizes[0],
|
427
|
+
target: sizes[1],
|
428
|
+
answer: sizes[2],
|
429
|
+
limit: sizes[3],
|
430
|
+
id: sizes[4],
|
431
|
+
score: sizes[5],
|
386
432
|
},
|
387
|
-
|
388
|
-
|
433
|
+
normalized: {
|
434
|
+
input: maxSizes.input / base,
|
435
|
+
target: maxSizes.target / base,
|
436
|
+
answer: maxSizes.answer / base,
|
437
|
+
limit: maxSizes.limit / base,
|
438
|
+
id: maxSizes.id / base,
|
439
|
+
score: maxSizes.score / base,
|
389
440
|
},
|
390
441
|
};
|
442
|
+
|
443
|
+
return {
|
444
|
+
evalDescriptor,
|
445
|
+
messageShape,
|
446
|
+
selectedScoreDescriptor: evalDescriptor.scoreDescriptor(selectedScore),
|
447
|
+
selectedScore: (sample) => evalDescriptor.score(sample, selectedScore),
|
448
|
+
selectedScorerDescriptor: (sample) =>
|
449
|
+
evalDescriptor.scorerDescriptor(sample, selectedScore),
|
450
|
+
};
|
391
451
|
};
|
392
452
|
|
393
453
|
/**
|
@@ -55,7 +55,9 @@ export const SamplesTab = ({
|
|
55
55
|
sampleScrollPositionRef,
|
56
56
|
setSampleScrollPosition,
|
57
57
|
}) => {
|
58
|
+
/** @type {[ListItem[], function(ListItem[]): void]} */
|
58
59
|
const [items, setItems] = useState([]);
|
60
|
+
/** @type {[ListItem[], function(ListItem[]): void]} */
|
59
61
|
const [sampleItems, setSampleItems] = useState([]);
|
60
62
|
|
61
63
|
const sampleListRef = useRef(/** @type {HTMLElement|null} */ (null));
|
@@ -287,7 +289,7 @@ const groupBySample = (samples, sampleDescriptor, order) => {
|
|
287
289
|
}
|
288
290
|
}
|
289
291
|
});
|
290
|
-
const groupCount = samples.length / sampleDescriptor.epochs;
|
292
|
+
const groupCount = samples.length / sampleDescriptor.evalDescriptor.epochs;
|
291
293
|
const itemCount = samples.length / groupCount;
|
292
294
|
const counter = getCounter(itemCount, groupCount, order);
|
293
295
|
return (sample, index, previousSample) => {
|
@@ -328,7 +330,7 @@ const groupBySample = (samples, sampleDescriptor, order) => {
|
|
328
330
|
* @returns {(sample: import("../api/Types.mjs").SampleSummary, index: number, previousSample: import("../api/Types.mjs").SampleSummary) => ListItem[]} The list
|
329
331
|
*/
|
330
332
|
const groupByEpoch = (samples, sampleDescriptor, order) => {
|
331
|
-
const groupCount = sampleDescriptor.epochs;
|
333
|
+
const groupCount = sampleDescriptor.evalDescriptor.epochs;
|
332
334
|
const itemCount = samples.length / groupCount;
|
333
335
|
const counter = getCounter(itemCount, groupCount, order);
|
334
336
|
|
@@ -30,11 +30,11 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => {
|
|
30
30
|
}
|
31
31
|
};
|
32
32
|
|
33
|
-
switch (descriptor?.
|
33
|
+
switch (descriptor?.selectedScoreDescriptor?.scoreType) {
|
34
34
|
case kScoreTypePassFail: {
|
35
35
|
const options = [{ text: "All", value: "all" }];
|
36
36
|
options.push(
|
37
|
-
...descriptor.
|
37
|
+
...descriptor.selectedScoreDescriptor.categories.map((cat) => {
|
38
38
|
return { text: cat.text, value: cat.val };
|
39
39
|
}),
|
40
40
|
);
|
@@ -48,7 +48,7 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => {
|
|
48
48
|
case kScoreTypeCategorical: {
|
49
49
|
const options = [{ text: "All", value: "all" }];
|
50
50
|
options.push(
|
51
|
-
...descriptor.
|
51
|
+
...descriptor.selectedScoreDescriptor.categories.map((cat) => {
|
52
52
|
return { text: cat, value: cat };
|
53
53
|
}),
|
54
54
|
);
|
@@ -79,12 +79,12 @@ export const SampleFilter = ({ descriptor, filter, filterChanged }) => {
|
|
79
79
|
}
|
80
80
|
|
81
81
|
case kScoreTypeObject: {
|
82
|
-
if (!descriptor.
|
82
|
+
if (!descriptor.selectedScoreDescriptor.categories) {
|
83
83
|
return "";
|
84
84
|
}
|
85
85
|
const options = [{ text: "All", value: "all" }];
|
86
86
|
options.push(
|
87
|
-
...descriptor.
|
87
|
+
...descriptor.selectedScoreDescriptor.categories.map((cat) => {
|
88
88
|
return { text: cat.text, value: cat.value };
|
89
89
|
}),
|
90
90
|
);
|
@@ -1,6 +1,13 @@
|
|
1
1
|
import { html } from "htm/preact";
|
2
2
|
import { FontSize, TextStyle } from "../../appearance/Fonts.mjs";
|
3
3
|
|
4
|
+
/**
|
5
|
+
* @param {Object} props
|
6
|
+
* @param {import("../../Types.mjs").ScoreLabel[]} props.scores
|
7
|
+
* @param {import("../../Types.mjs").ScoreLabel} props.score
|
8
|
+
* @param {(score: import("../../Types.mjs").ScoreLabel) => void} props.setScore
|
9
|
+
* @returns {import("preact").JSX.Element}
|
10
|
+
*/
|
4
11
|
export const SelectScorer = ({ scores, score, setScore }) => {
|
5
12
|
const scorers = scores.reduce((accum, scorer) => {
|
6
13
|
if (
|
@@ -25,7 +25,7 @@ export const SortFilter = ({ sampleDescriptor, sort, setSort, epochs }) => {
|
|
25
25
|
val: kEpochDescVal,
|
26
26
|
});
|
27
27
|
}
|
28
|
-
if (sampleDescriptor?.
|
28
|
+
if (sampleDescriptor?.selectedScoreDescriptor?.compare) {
|
29
29
|
options.push({
|
30
30
|
label: "score asc",
|
31
31
|
val: kScoreAscVal,
|
@@ -130,12 +130,12 @@ export const sortSamples = (sort, samples, samplesDescriptor) => {
|
|
130
130
|
}
|
131
131
|
|
132
132
|
case kScoreAscVal:
|
133
|
-
return samplesDescriptor.
|
133
|
+
return samplesDescriptor.selectedScoreDescriptor.compare(
|
134
134
|
samplesDescriptor.selectedScore(a).value,
|
135
135
|
samplesDescriptor.selectedScore(b).value,
|
136
136
|
);
|
137
137
|
case kScoreDescVal:
|
138
|
-
return samplesDescriptor.
|
138
|
+
return samplesDescriptor.selectedScoreDescriptor.compare(
|
139
139
|
samplesDescriptor.selectedScore(b).value,
|
140
140
|
samplesDescriptor.selectedScore(a).value,
|
141
141
|
);
|