crfm-helm 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/METADATA +81 -112
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/RECORD +165 -155
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
- helm/benchmark/adaptation/common_adapter_specs.py +2 -0
- helm/benchmark/annotation/anthropic_red_team_annotator.py +57 -0
- helm/benchmark/annotation/call_center_annotator.py +258 -0
- helm/benchmark/annotation/financebench_annotator.py +79 -0
- helm/benchmark/annotation/harm_bench_annotator.py +55 -0
- helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
- helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
- helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
- helm/benchmark/annotation/live_qa_annotator.py +37 -45
- helm/benchmark/annotation/medication_qa_annotator.py +36 -44
- helm/benchmark/annotation/model_as_judge.py +96 -0
- helm/benchmark/annotation/simple_safety_tests_annotator.py +50 -0
- helm/benchmark/annotation/xstest_annotator.py +100 -0
- helm/benchmark/metrics/annotation_metrics.py +108 -0
- helm/benchmark/metrics/bhasa_metrics.py +188 -0
- helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
- helm/benchmark/metrics/code_metrics_helper.py +11 -1
- helm/benchmark/metrics/safety_metrics.py +79 -0
- helm/benchmark/metrics/summac/model_summac.py +3 -3
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
- helm/benchmark/metrics/unitxt_metrics.py +17 -3
- helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
- helm/benchmark/metrics/vision_language/image_utils.py +1 -1
- helm/benchmark/model_metadata_registry.py +3 -3
- helm/benchmark/presentation/create_plots.py +1 -1
- helm/benchmark/presentation/schema.py +3 -0
- helm/benchmark/presentation/summarize.py +106 -256
- helm/benchmark/presentation/test_run_entry.py +1 -0
- helm/benchmark/presentation/test_summarize.py +145 -3
- helm/benchmark/run.py +15 -0
- helm/benchmark/run_expander.py +83 -30
- helm/benchmark/run_specs/bhasa_run_specs.py +652 -0
- helm/benchmark/run_specs/call_center_run_specs.py +152 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
- helm/benchmark/run_specs/experimental_run_specs.py +52 -0
- helm/benchmark/run_specs/finance_run_specs.py +82 -1
- helm/benchmark/run_specs/safety_run_specs.py +154 -0
- helm/benchmark/run_specs/vlm_run_specs.py +100 -24
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
- helm/benchmark/scenarios/banking77_scenario.py +51 -0
- helm/benchmark/scenarios/bhasa_scenario.py +1942 -0
- helm/benchmark/scenarios/call_center_scenario.py +84 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
- helm/benchmark/scenarios/ewok_scenario.py +116 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
- helm/benchmark/scenarios/financebench_scenario.py +53 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
- helm/benchmark/scenarios/raft_scenario.py +1 -1
- helm/benchmark/scenarios/scenario.py +1 -1
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
- helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
- helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
- helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
- helm/benchmark/scenarios/test_math_scenario.py +2 -8
- helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
- helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
- helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
- helm/benchmark/scenarios/xstest_scenario.py +35 -0
- helm/benchmark/server.py +1 -6
- helm/benchmark/static/schema_air_bench.yaml +750 -750
- helm/benchmark/static/schema_bhasa.yaml +709 -0
- helm/benchmark/static/schema_call_center.yaml +232 -0
- helm/benchmark/static/schema_cleva.yaml +768 -0
- helm/benchmark/static/schema_decodingtrust.yaml +444 -0
- helm/benchmark/static/schema_ewok.yaml +367 -0
- helm/benchmark/static/schema_finance.yaml +55 -9
- helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
- helm/benchmark/static/schema_legal.yaml +566 -0
- helm/benchmark/static/schema_safety.yaml +266 -0
- helm/benchmark/static/schema_tables.yaml +149 -8
- helm/benchmark/static/schema_thai.yaml +21 -0
- helm/benchmark/static/schema_vhelm.yaml +137 -101
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
- helm/benchmark/static_build/assets/index-3ee38b3d.js +10 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/index.html +2 -2
- helm/benchmark/window_services/test_openai_window_service.py +8 -8
- helm/benchmark/window_services/tokenizer_service.py +0 -5
- helm/clients/ai21_client.py +71 -1
- helm/clients/anthropic_client.py +7 -19
- helm/clients/huggingface_client.py +38 -37
- helm/clients/nvidia_nim_client.py +35 -0
- helm/clients/openai_client.py +18 -4
- helm/clients/palmyra_client.py +24 -0
- helm/clients/perspective_api_client.py +11 -6
- helm/clients/test_client.py +4 -6
- helm/clients/together_client.py +22 -0
- helm/clients/vision_language/open_flamingo_client.py +1 -2
- helm/clients/vision_language/palmyra_vision_client.py +28 -13
- helm/common/cache.py +8 -30
- helm/common/images_utils.py +6 -0
- helm/common/key_value_store.py +9 -9
- helm/common/mongo_key_value_store.py +5 -4
- helm/common/request.py +16 -0
- helm/common/test_cache.py +1 -48
- helm/common/tokenization_request.py +0 -9
- helm/config/model_deployments.yaml +444 -329
- helm/config/model_metadata.yaml +513 -111
- helm/config/tokenizer_configs.yaml +140 -11
- helm/proxy/example_queries.py +14 -21
- helm/proxy/server.py +0 -9
- helm/proxy/services/remote_service.py +0 -6
- helm/proxy/services/server_service.py +6 -20
- helm/proxy/services/service.py +0 -6
- helm/proxy/token_counters/test_auto_token_counter.py +2 -2
- helm/tokenizers/ai21_tokenizer.py +51 -59
- helm/tokenizers/cohere_tokenizer.py +0 -75
- helm/tokenizers/huggingface_tokenizer.py +0 -1
- helm/tokenizers/test_ai21_tokenizer.py +48 -0
- helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
- helm/benchmark/data_overlap/export_scenario_text.py +0 -119
- helm/benchmark/data_overlap/light_scenario.py +0 -60
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/static/benchmarking.css +0 -156
- helm/benchmark/static/benchmarking.js +0 -1705
- helm/benchmark/static/config.js +0 -3
- helm/benchmark/static/general.js +0 -122
- helm/benchmark/static/images/crfm-logo.png +0 -0
- helm/benchmark/static/images/helm-logo-simple.png +0 -0
- helm/benchmark/static/images/helm-logo.png +0 -0
- helm/benchmark/static/images/language-model-helm.png +0 -0
- helm/benchmark/static/images/organizations/ai21.png +0 -0
- helm/benchmark/static/images/organizations/anthropic.png +0 -0
- helm/benchmark/static/images/organizations/bigscience.png +0 -0
- helm/benchmark/static/images/organizations/cohere.png +0 -0
- helm/benchmark/static/images/organizations/eleutherai.png +0 -0
- helm/benchmark/static/images/organizations/google.png +0 -0
- helm/benchmark/static/images/organizations/meta.png +0 -0
- helm/benchmark/static/images/organizations/microsoft.png +0 -0
- helm/benchmark/static/images/organizations/nvidia.png +0 -0
- helm/benchmark/static/images/organizations/openai.png +0 -0
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
- helm/benchmark/static/images/organizations/yandex.png +0 -0
- helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
- helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
- helm/benchmark/static/index.html +0 -68
- helm/benchmark/static/info-icon.png +0 -0
- helm/benchmark/static/json-urls.js +0 -69
- helm/benchmark/static/plot-captions.js +0 -27
- helm/benchmark/static/utils.js +0 -285
- helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
- helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/window_services/ai21_window_service.py +0 -247
- helm/benchmark/window_services/cohere_window_service.py +0 -101
- helm/benchmark/window_services/test_ai21_window_service.py +0 -163
- helm/benchmark/window_services/test_cohere_window_service.py +0 -75
- helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
- helm/benchmark/window_services/test_ice_window_service.py +0 -327
- helm/tokenizers/ice_tokenizer.py +0 -30
- helm/tokenizers/test_ice_tokenizer.py +0 -57
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/LICENSE +0 -0
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/top_level.txt +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
- /helm/benchmark/{data_overlap → scenarios/vision_language/image2struct}/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct/webpage}/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
|
@@ -1,1705 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* A very simple static way to visualize the scenarios, runs, and metrics from the benchmarking project.
|
|
3
|
-
* This code doesn't really belong in `proxy`, but is there for convenience.
|
|
4
|
-
*/
|
|
5
|
-
|
|
6
|
-
// Specifies all the information to help us render and understand the fields
|
|
7
|
-
// for adapters and metrics.
|
|
8
|
-
// Look at `schema.py` for the actual schema.
|
|
9
|
-
class Schema {
|
|
10
|
-
constructor(raw) {
|
|
11
|
-
this.models = raw.models;
|
|
12
|
-
this.adapter = raw.adapter;
|
|
13
|
-
this.metrics = raw.metrics;
|
|
14
|
-
this.perturbations = raw.perturbations;
|
|
15
|
-
this.run_groups = raw.run_groups;
|
|
16
|
-
this.metric_groups = raw.metric_groups;
|
|
17
|
-
|
|
18
|
-
// Allow for quick lookup
|
|
19
|
-
this.adapterFieldNames = this.adapter.map((field) => field.name);
|
|
20
|
-
this.metricsFieldNames = this.metrics.map((field) => field.name);
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
adapterField(name) {
|
|
24
|
-
const field = this.adapter.find((field) => field.name === name);
|
|
25
|
-
if (!field) {
|
|
26
|
-
console.error(`Adapter field ${name} not found`);
|
|
27
|
-
return {};
|
|
28
|
-
}
|
|
29
|
-
return field;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
metricsField(name) {
|
|
33
|
-
const field = this.metrics.find((field) => field.name === name);
|
|
34
|
-
if (!field) {
|
|
35
|
-
console.error(`Metrics field ${name} not found`);
|
|
36
|
-
return {};
|
|
37
|
-
}
|
|
38
|
-
return field;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
metricGroup(name) {
|
|
42
|
-
return this.metric_groups.find((group) => group.name === name);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
$(function () {
|
|
47
|
-
// Array of String containing RunSpec names for which
|
|
48
|
-
// the JSON for displaying requests has been loaded.
|
|
49
|
-
const runSpecsNamesWithLoadedRequests = [];
|
|
50
|
-
const urlParams = decodeUrlParams(window.location.search);
|
|
51
|
-
|
|
52
|
-
/////////////////////////////////// Pages ////////////////////////////////////
|
|
53
|
-
|
|
54
|
-
function renderModels() {
|
|
55
|
-
const $table = $("<table>", { class: "query-table results-table" });
|
|
56
|
-
const $header = $("<tr>").append([
|
|
57
|
-
$("<td>").append("Creator"),
|
|
58
|
-
$("<td>").append("Model"),
|
|
59
|
-
$("<td>").append("Description"),
|
|
60
|
-
$("<td>").append("Access"),
|
|
61
|
-
]);
|
|
62
|
-
$table.append($header);
|
|
63
|
-
|
|
64
|
-
schema.models.forEach((model) => {
|
|
65
|
-
const $name = $("<div>").append([
|
|
66
|
-
$("<div>").append(model.display_name),
|
|
67
|
-
$("<div>", { class: "technical-details" }).append(model.name),
|
|
68
|
-
]);
|
|
69
|
-
const $row = $("<tr>").append([
|
|
70
|
-
$("<td>").append(model.creator_organization),
|
|
71
|
-
$("<td>").append($name),
|
|
72
|
-
$("<td>").append(renderMarkdown(model.description)),
|
|
73
|
-
$("<td>").append(renderAccess(model.access)),
|
|
74
|
-
]);
|
|
75
|
-
$table.append($row);
|
|
76
|
-
});
|
|
77
|
-
return $table;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
function renderScenarios() {
|
|
81
|
-
const $table = $("<table>", { class: "query-table results-table" });
|
|
82
|
-
|
|
83
|
-
const $header = $("<tr>").append([
|
|
84
|
-
$("<td>").append("Scenario"),
|
|
85
|
-
$("<td>").append("Task"),
|
|
86
|
-
$("<td>").append("What"),
|
|
87
|
-
$("<td>").append("Who"),
|
|
88
|
-
$("<td>").append("When"),
|
|
89
|
-
$("<td>").append("Language"),
|
|
90
|
-
$("<td>").append("Description"),
|
|
91
|
-
]);
|
|
92
|
-
$table.append($header);
|
|
93
|
-
|
|
94
|
-
schema.run_groups.forEach((group) => {
|
|
95
|
-
if (group.category && group.category !== "Scenarios") {
|
|
96
|
-
return;
|
|
97
|
-
}
|
|
98
|
-
const href = groupUrl(group.name);
|
|
99
|
-
const $name = $("<div>").append([
|
|
100
|
-
$("<div>").append($("<a>", { href }).append(group.display_name)),
|
|
101
|
-
$("<div>", { class: "technical-details" }).append(group.name),
|
|
102
|
-
]);
|
|
103
|
-
const task = group.taxonomy && group.taxonomy.task;
|
|
104
|
-
const what = group.taxonomy && group.taxonomy.what;
|
|
105
|
-
const who = group.taxonomy && group.taxonomy.who;
|
|
106
|
-
const when = group.taxonomy && group.taxonomy.when;
|
|
107
|
-
const language = group.taxonomy && group.taxonomy.language;
|
|
108
|
-
const $row = $("<tr>").append([
|
|
109
|
-
$("<td>").append($name),
|
|
110
|
-
$("<td>").append(task),
|
|
111
|
-
$("<td>").append(what),
|
|
112
|
-
$("<td>").append(who),
|
|
113
|
-
$("<td>").append(when),
|
|
114
|
-
$("<td>").append(language),
|
|
115
|
-
$("<td>").append(renderMarkdown(group.description)),
|
|
116
|
-
]);
|
|
117
|
-
$table.append($row);
|
|
118
|
-
});
|
|
119
|
-
return $table;
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
function renderPlots() {
|
|
123
|
-
const container = $("<div>", { class: "container" });
|
|
124
|
-
const links = $("<div>");
|
|
125
|
-
container.append(links);
|
|
126
|
-
const tableLinks = [];
|
|
127
|
-
|
|
128
|
-
function renderPlot(name, title) {
|
|
129
|
-
const plot = $("<div>", { class: "plot" });
|
|
130
|
-
const caption = $("<div>", { class: "plot-caption" }).append(
|
|
131
|
-
plotCaptions[name],
|
|
132
|
-
);
|
|
133
|
-
|
|
134
|
-
plot.append($("<h3>").append($("<a>", { id: title }).append(title)));
|
|
135
|
-
plot.append(caption);
|
|
136
|
-
plot.append(
|
|
137
|
-
$("<img>", { src: plotUrl(release, name), class: "img-fluid" }),
|
|
138
|
-
);
|
|
139
|
-
container.append(plot);
|
|
140
|
-
tableLinks.push($("<a>", { href: "#" + title }).append(title));
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
renderPlot("generic_summary", "Metric spread for core scenarios");
|
|
144
|
-
renderPlot("model_ranking_all", "Head-to-head win rate per each model");
|
|
145
|
-
|
|
146
|
-
renderPlot("accuracy_v_x", "Accuracy as a function of other metrics");
|
|
147
|
-
renderPlot("metric_correlation", "Correlation between metrics");
|
|
148
|
-
|
|
149
|
-
renderPlot("accuracy_v_access", "Accuracy as a function of model access");
|
|
150
|
-
renderPlot("accuracy_over_num_parameters", "Accuracy across model sizes");
|
|
151
|
-
renderPlot("accuracy_over_release_date", "Accuracy over time");
|
|
152
|
-
renderPlot(
|
|
153
|
-
"accuracy_over_the_pile_perplexity",
|
|
154
|
-
"Accuracy as a function of The Pile perplexity",
|
|
155
|
-
);
|
|
156
|
-
|
|
157
|
-
renderPlot("targeted_evals", "Targeted evaluations");
|
|
158
|
-
|
|
159
|
-
renderPlot(
|
|
160
|
-
"in_context_ablations",
|
|
161
|
-
"Number of in-context examples ablation",
|
|
162
|
-
);
|
|
163
|
-
renderPlot("mc_ablations", "Multiple-choice adaptation ablation");
|
|
164
|
-
|
|
165
|
-
links.append(renderItems(tableLinks));
|
|
166
|
-
|
|
167
|
-
return container;
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
function renderRunsOverview(runSpecs) {
|
|
171
|
-
let query = "";
|
|
172
|
-
const $search = $("<input>", {
|
|
173
|
-
type: "text",
|
|
174
|
-
size: 40,
|
|
175
|
-
placeholder: "Enter regex query (enter to open all)",
|
|
176
|
-
});
|
|
177
|
-
console.log(urlParams);
|
|
178
|
-
$search.keyup((e) => {
|
|
179
|
-
// Open up all match specs
|
|
180
|
-
if (e.keyCode === 13) {
|
|
181
|
-
const href = encodeUrlParams(
|
|
182
|
-
Object.assign({}, urlParams, { runSpecRegex: ".*" + query + ".*" }),
|
|
183
|
-
);
|
|
184
|
-
console.log(urlParams, href);
|
|
185
|
-
window.open(href);
|
|
186
|
-
}
|
|
187
|
-
query = $search.val();
|
|
188
|
-
renderRunsTable();
|
|
189
|
-
});
|
|
190
|
-
|
|
191
|
-
const $table = $("<table>", { class: "query-table" });
|
|
192
|
-
|
|
193
|
-
function renderRunsTable() {
|
|
194
|
-
$table.empty();
|
|
195
|
-
const $header = $("<tr>")
|
|
196
|
-
.append($("<td>").append($("<b>").append("Run")))
|
|
197
|
-
.append($("<td>").append($("<b>").append("Adaptation method")));
|
|
198
|
-
$table.append($header);
|
|
199
|
-
|
|
200
|
-
runSpecs.forEach((runSpec) => {
|
|
201
|
-
if (!new RegExp(query).test(runSpec.name)) {
|
|
202
|
-
return;
|
|
203
|
-
}
|
|
204
|
-
const href = encodeUrlParams(
|
|
205
|
-
Object.assign({}, urlParams, { runSpec: runSpec.name }),
|
|
206
|
-
);
|
|
207
|
-
const $row = $("<tr>")
|
|
208
|
-
.append($("<td>").append($("<a>", { href }).append(runSpec.name)))
|
|
209
|
-
.append($("<td>").append(runSpec.adapter_spec.method));
|
|
210
|
-
$table.append($row);
|
|
211
|
-
});
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
renderRunsTable();
|
|
215
|
-
|
|
216
|
-
return $("<div>").append([$search, $table]);
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
// Look at logic in `summarize.py`.
|
|
220
|
-
function getMetricNames(scenarioGroup) {
|
|
221
|
-
// A (scenario/run) group defines a list of metric groups, each of which defines the metrics.
|
|
222
|
-
// Just pull the names from those metrics.
|
|
223
|
-
const names = [];
|
|
224
|
-
scenarioGroup.metric_groups.forEach((metricGroupName) => {
|
|
225
|
-
const metricGroup = schema.metricGroup(metricGroupName);
|
|
226
|
-
metricGroup.metrics.forEach((metric) => {
|
|
227
|
-
// This function is supposed to return per-instance metrics, so exclude
|
|
228
|
-
// metrics that mentions perturbations.
|
|
229
|
-
if (metric.perturbation_name) {
|
|
230
|
-
return;
|
|
231
|
-
}
|
|
232
|
-
names.push(substitute(metric.name, scenarioGroup.environment));
|
|
233
|
-
});
|
|
234
|
-
});
|
|
235
|
-
return names;
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
let metricJudgements = null;
|
|
239
|
-
function getMetricJudgements() {
|
|
240
|
-
// Provide information
|
|
241
|
-
// Return dictionary {metric name: {wrongThreshold, correctThreshold, lowerIsBetter}}
|
|
242
|
-
// Example: {exact_match: {wrongThreshold: 0, correctThreshold: 1, lowerIsBetter: true}}
|
|
243
|
-
// TODO: move the hard-coding into schema.yaml
|
|
244
|
-
if (metricJudgements) {
|
|
245
|
-
return metricJudgements;
|
|
246
|
-
}
|
|
247
|
-
metricJudgements = {};
|
|
248
|
-
schema.run_groups.forEach((runGroup) => {
|
|
249
|
-
const name = runGroup.environment && runGroup.environment.main_name;
|
|
250
|
-
if (!["bits_per_byte"].includes(name)) {
|
|
251
|
-
metricJudgements[name] = {
|
|
252
|
-
wrongThreshold: 0,
|
|
253
|
-
correctThreshold: 1,
|
|
254
|
-
lowerIsBetter: false,
|
|
255
|
-
};
|
|
256
|
-
}
|
|
257
|
-
});
|
|
258
|
-
return metricJudgements;
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
function getStatClass(name, value) {
|
|
262
|
-
// Return the CSS class to use if a stat has `value`.
|
|
263
|
-
const judgements = getMetricJudgements();
|
|
264
|
-
const judgement = judgements[name];
|
|
265
|
-
if (!judgement) {
|
|
266
|
-
return "";
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
// Based on `name` determine whether smaller or larger is better.
|
|
270
|
-
if (judgement.lowerIsBetter === false) {
|
|
271
|
-
if (value >= judgement.correctThreshold) {
|
|
272
|
-
return "correct";
|
|
273
|
-
}
|
|
274
|
-
if (value <= judgement.wrongThreshold) {
|
|
275
|
-
return "wrong";
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
|
|
279
|
-
if (judgement.lowerIsBetter === true) {
|
|
280
|
-
if (value <= judgement.correctThreshold) {
|
|
281
|
-
return "correct";
|
|
282
|
-
}
|
|
283
|
-
if (value >= judgement.wrongThreshold) {
|
|
284
|
-
return "wrong";
|
|
285
|
-
}
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
return "";
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
function metricNameCompare(k1, k2) {
|
|
292
|
-
const splitCompare = (k1.split || "").localeCompare(k2.split || "");
|
|
293
|
-
if (splitCompare !== 0) {
|
|
294
|
-
return splitCompare;
|
|
295
|
-
}
|
|
296
|
-
const nameCompare = k1.name.localeCompare(k2.name);
|
|
297
|
-
if (nameCompare !== 0) {
|
|
298
|
-
return nameCompare;
|
|
299
|
-
}
|
|
300
|
-
const perturbationCompare = (
|
|
301
|
-
k1.perturbation ? k1.perturbation.name : ""
|
|
302
|
-
).localeCompare(k2.perturbation ? k2.perturbation.name : "");
|
|
303
|
-
if (perturbationCompare !== 0) {
|
|
304
|
-
return perturbationCompare;
|
|
305
|
-
}
|
|
306
|
-
return 0;
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
function renderGlobalStats(query, keys, statsList, statsPaths) {
|
|
310
|
-
// Render the scenario-level metrics.
|
|
311
|
-
// keys: list of metric names to render (these are the rows of table)
|
|
312
|
-
// statsList: for each run, list of stats
|
|
313
|
-
// statsPath: for each run, list of paths to the stats files
|
|
314
|
-
const $output = $("<div>");
|
|
315
|
-
keys.forEach((key) => {
|
|
316
|
-
// For each key (MetricName - e.g., {name: 'exact_match', ...})
|
|
317
|
-
|
|
318
|
-
if (key.perturbation && key.perturbation.computed_on !== "worst") {
|
|
319
|
-
// Only pay attention to worst (match `summarize.py`)
|
|
320
|
-
return;
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
const displayKey = renderMetricName(key);
|
|
324
|
-
if (
|
|
325
|
-
query !== "" &&
|
|
326
|
-
!query.split(" ").every((q) => displayKey.includes(q))
|
|
327
|
-
) {
|
|
328
|
-
return;
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
const field = schema.metricsField(key.name);
|
|
332
|
-
const helpText = describeMetricName(field, key);
|
|
333
|
-
const $key = $("<td>").append(
|
|
334
|
-
$("<span>").append(helpIcon(helpText)).append(" ").append(displayKey),
|
|
335
|
-
);
|
|
336
|
-
const $row = $("<tr>").append($("<td>").append($key));
|
|
337
|
-
statsList.forEach((stats) => {
|
|
338
|
-
// stats: list of statistics corresponding to one run (column)
|
|
339
|
-
const stat = stats.find((stat) => metricNameEquals(stat.name, key));
|
|
340
|
-
$row.append(
|
|
341
|
-
$("<td>").append(
|
|
342
|
-
stat ? renderFieldValue(field, round(stat.mean, 3)) : "?",
|
|
343
|
-
),
|
|
344
|
-
);
|
|
345
|
-
});
|
|
346
|
-
$output.append($row);
|
|
347
|
-
});
|
|
348
|
-
|
|
349
|
-
// Link to the JSON file
|
|
350
|
-
$output.append(
|
|
351
|
-
$("<tr>")
|
|
352
|
-
.append($("<td>"))
|
|
353
|
-
.append(
|
|
354
|
-
statsPaths.map((statsPath) =>
|
|
355
|
-
$("<td>").append($("<a>", { href: statsPath }).append("JSON")),
|
|
356
|
-
),
|
|
357
|
-
),
|
|
358
|
-
);
|
|
359
|
-
return $output;
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
function highlightNewWords(text, origText) {
|
|
363
|
-
// Render `text`, highlighting any words that don't occur in `origText`
|
|
364
|
-
// Ideally, we would form an alignment between `text` and `origText` and
|
|
365
|
-
// show the full diff, but that's too expensive.
|
|
366
|
-
const origWords = {};
|
|
367
|
-
origText.split(" ").forEach((word) => {
|
|
368
|
-
origWords[word] = true;
|
|
369
|
-
});
|
|
370
|
-
return text
|
|
371
|
-
.split(" ")
|
|
372
|
-
.map((word) =>
|
|
373
|
-
!word.trim() || origWords[word] ? word : "<u>" + word + "</u>",
|
|
374
|
-
)
|
|
375
|
-
.join(" ");
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
function renderRunsHeader(scenario, scenarioPath, scenarioSpec) {
|
|
379
|
-
const $output = $("<div>");
|
|
380
|
-
|
|
381
|
-
$output.append(renderScenarioHeader(scenario, scenarioSpec));
|
|
382
|
-
|
|
383
|
-
// Links
|
|
384
|
-
const links = [];
|
|
385
|
-
if (scenario) {
|
|
386
|
-
links.push($("<a>", { href: scenario.definition_path }).append("Code"));
|
|
387
|
-
}
|
|
388
|
-
if (scenarioPath) {
|
|
389
|
-
links.push($("<a>", { href: scenarioPath }).append("Scenario JSON"));
|
|
390
|
-
}
|
|
391
|
-
links.push($("<a>", { href: "#adapter" }).append("Adapter specification"));
|
|
392
|
-
links.push(
|
|
393
|
-
$("<a>", { href: "#instances" }).append("Instances + predictions"),
|
|
394
|
-
);
|
|
395
|
-
links.push($("<a>", { href: "#metrics" }).append("All metrics"));
|
|
396
|
-
$output.append(renderItems(links));
|
|
397
|
-
|
|
398
|
-
return $output;
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
function renderGroupHeader() {
|
|
402
|
-
const $output = $("<div>");
|
|
403
|
-
$.getJSON(groupsMetadataJsonUrl(), {}, (response) => {
|
|
404
|
-
const group = response[urlParams.group];
|
|
405
|
-
if (group) {
|
|
406
|
-
let groupName = group.display_name;
|
|
407
|
-
if (urlParams.subgroup) {
|
|
408
|
-
groupName += " / " + urlParams.subgroup;
|
|
409
|
-
}
|
|
410
|
-
$output.append($("<h3>").append(groupName));
|
|
411
|
-
$output.append(
|
|
412
|
-
$("<div>").append($("<i>").append(renderMarkdown(group.description))),
|
|
413
|
-
);
|
|
414
|
-
if (group.taxonomy) {
|
|
415
|
-
const $rows = Object.entries(group.taxonomy).map(([k, v]) => {
|
|
416
|
-
return $("<tr>").append([
|
|
417
|
-
$("<td>").append(`<b>${k}</b>`),
|
|
418
|
-
$("<td>").append(v),
|
|
419
|
-
]);
|
|
420
|
-
});
|
|
421
|
-
$output.append(
|
|
422
|
-
$("<table>", { class: "taxonomy-table" }).append($rows),
|
|
423
|
-
);
|
|
424
|
-
}
|
|
425
|
-
}
|
|
426
|
-
});
|
|
427
|
-
return $output;
|
|
428
|
-
}
|
|
429
|
-
|
|
430
|
-
function renderScenarioHeader(scenario, scenarioSpec) {
|
|
431
|
-
const $output = $("<div>");
|
|
432
|
-
$output.append(
|
|
433
|
-
$("<h3>").append(renderScenarioDisplayName(scenario, scenarioSpec)),
|
|
434
|
-
);
|
|
435
|
-
$output.append(
|
|
436
|
-
$("<div>").append($("<i>").append(renderMarkdown(scenario.description))),
|
|
437
|
-
);
|
|
438
|
-
return $output;
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
function instanceKey(instance) {
|
|
442
|
-
// The (instance id, perturbation) should be enough to uniquely identify the instance.
|
|
443
|
-
return JSON.stringify([instance.id, instance.perturbation || "original"]);
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
function predictionInstanceKey(prediction) {
|
|
447
|
-
return JSON.stringify([
|
|
448
|
-
prediction.instance_id,
|
|
449
|
-
prediction.perturbation || "original",
|
|
450
|
-
]);
|
|
451
|
-
}
|
|
452
|
-
|
|
453
|
-
function displayRequestInstanceKey(displayRequest) {
|
|
454
|
-
return JSON.stringify([
|
|
455
|
-
displayRequest.instance_id,
|
|
456
|
-
displayRequest.perturbation || "original",
|
|
457
|
-
]);
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
Handlebars.registerHelper("highlightNewWords", (perturbed, unperturbed) => {
|
|
461
|
-
return new Handlebars.SafeString(highlightNewWords(perturbed, unperturbed));
|
|
462
|
-
});
|
|
463
|
-
|
|
464
|
-
Handlebars.registerHelper("join", (strings, separator) => {
|
|
465
|
-
return strings.join(separator);
|
|
466
|
-
});
|
|
467
|
-
|
|
468
|
-
Handlebars.registerHelper("pluralize", (quantity, singular, plural) => {
|
|
469
|
-
return quantity === 1 ? singular : plural;
|
|
470
|
-
});
|
|
471
|
-
|
|
472
|
-
const instanceTemplate = Handlebars.compile(`
|
|
473
|
-
{{#with instance}}
|
|
474
|
-
{{#if perturbation}}
|
|
475
|
-
<br>
|
|
476
|
-
{{else}}
|
|
477
|
-
<hr>
|
|
478
|
-
{{/if}}
|
|
479
|
-
<div class="instance">
|
|
480
|
-
<div>
|
|
481
|
-
<strong>
|
|
482
|
-
Instance {{id}} [split: {{split}}]
|
|
483
|
-
{{#if perturbation}}
|
|
484
|
-
...with perturbation: {{perturbation.name}}
|
|
485
|
-
{{/if}}
|
|
486
|
-
</strong>
|
|
487
|
-
</div>
|
|
488
|
-
{{#unless ../hideInputOutput}}
|
|
489
|
-
{{#if input}}
|
|
490
|
-
<div>Input:</div>
|
|
491
|
-
<div class="instance-input">
|
|
492
|
-
{{~#if perturbation~}}
|
|
493
|
-
{{highlightNewWords input.text ../unperturbedInstance.input.text}}
|
|
494
|
-
{{~else~}}
|
|
495
|
-
{{{input.text}}}
|
|
496
|
-
{{~/if~}}
|
|
497
|
-
</div>
|
|
498
|
-
{{/if}}
|
|
499
|
-
{{#if references}}
|
|
500
|
-
<div>{{pluralize references.length "Reference" "References"}}:</div>
|
|
501
|
-
<ul>
|
|
502
|
-
{{#each references}}
|
|
503
|
-
<li>
|
|
504
|
-
<span class="instance-reference">
|
|
505
|
-
{{~#if ../perturbation~}}
|
|
506
|
-
{{highlightNewWords output.text (lookup (lookup (lookup ../../unperturbedInstance.references @index) "output") "text")}}
|
|
507
|
-
{{~else~}}
|
|
508
|
-
{{output.text}}
|
|
509
|
-
{{~/if~}}
|
|
510
|
-
</span>
|
|
511
|
-
{{#if tags}} <strong>[{{join tags ","}}]</strong>{{/if}}
|
|
512
|
-
</li>
|
|
513
|
-
{{/each}}
|
|
514
|
-
</ul>
|
|
515
|
-
{{/if}}
|
|
516
|
-
{{/unless}}
|
|
517
|
-
<div class="predictions"></div>
|
|
518
|
-
</div>
|
|
519
|
-
{{/with}}
|
|
520
|
-
`);
|
|
521
|
-
|
|
522
|
-
function renderScenarioInstances(instances, $instances) {
|
|
523
|
-
// Render all the instances in a scenario, outputting to $instances.
|
|
524
|
-
// Return a mapping from instance key to the div where
|
|
525
|
-
// we're rendering the instance, so that we can put the predictions in the
|
|
526
|
-
// right spot.
|
|
527
|
-
const instanceKeyToDiv = {};
|
|
528
|
-
|
|
529
|
-
// Keep track of the original (unperturbed) instances
|
|
530
|
-
const id2originalInstance = {};
|
|
531
|
-
instances.forEach((instance) => {
|
|
532
|
-
if (!instance.perturbation) {
|
|
533
|
-
id2originalInstance[instance.id] = instance;
|
|
534
|
-
}
|
|
535
|
-
});
|
|
536
|
-
let instancesHtml = "";
|
|
537
|
-
const keys = [];
|
|
538
|
-
instances.forEach((instance) => {
|
|
539
|
-
const key = instanceKey(instance);
|
|
540
|
-
instancesHtml += instanceTemplate({
|
|
541
|
-
instance,
|
|
542
|
-
unperturbedInstance: id2originalInstance[instance.id],
|
|
543
|
-
hideInputOutput: urlParams.hideInputOutput,
|
|
544
|
-
});
|
|
545
|
-
keys.push(key);
|
|
546
|
-
});
|
|
547
|
-
$instances.html(instancesHtml);
|
|
548
|
-
const $divs = $instances.find(".instance");
|
|
549
|
-
if (keys.length !== $divs.length) {
|
|
550
|
-
console.error(
|
|
551
|
-
"Could not map instance keys to divs because " +
|
|
552
|
-
"keys length (" +
|
|
553
|
-
keys.length +
|
|
554
|
-
") !== divs length (" +
|
|
555
|
-
$divs.length +
|
|
556
|
-
")",
|
|
557
|
-
);
|
|
558
|
-
} else {
|
|
559
|
-
keys.forEach((key, index) => {
|
|
560
|
-
instanceKeyToDiv[key] = $divs.eq(index);
|
|
561
|
-
});
|
|
562
|
-
}
|
|
563
|
-
return instanceKeyToDiv;
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
function loadAndRenderRequests(
|
|
567
|
-
runSpec,
|
|
568
|
-
suite,
|
|
569
|
-
instanceKeyToDiv,
|
|
570
|
-
predictedIndex,
|
|
571
|
-
) {
|
|
572
|
-
if (runSpecsNamesWithLoadedRequests.includes(runSpec.name)) {
|
|
573
|
-
return;
|
|
574
|
-
}
|
|
575
|
-
runSpecsNamesWithLoadedRequests.push(runSpec.name);
|
|
576
|
-
$.getJSON(requestsJsonUrl(suite, runSpec.name), {}, (displayRequests) => {
|
|
577
|
-
displayRequests.forEach((displayRequest) => {
|
|
578
|
-
$request = instanceKeyToDiv[displayRequestInstanceKey(displayRequest)]
|
|
579
|
-
.find(".request")
|
|
580
|
-
.eq(displayRequest.train_trial_index);
|
|
581
|
-
$request.empty().append(renderRequest(displayRequest.request));
|
|
582
|
-
});
|
|
583
|
-
});
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
function renderRequest(request) {
|
|
587
|
-
// Render the request made to the API as a table.
|
|
588
|
-
const $requestTable = $("<table>");
|
|
589
|
-
|
|
590
|
-
const $requestTableHeader = $("<h6>").append("Request");
|
|
591
|
-
$requestTable.append($requestTableHeader);
|
|
592
|
-
|
|
593
|
-
const $promptRow = $("<tr>").append([
|
|
594
|
-
$("<td>").append("prompt"),
|
|
595
|
-
$('<td class="prompt">').text(request.prompt),
|
|
596
|
-
]);
|
|
597
|
-
$requestTable.append($promptRow);
|
|
598
|
-
|
|
599
|
-
for (let requestKey in request) {
|
|
600
|
-
if (requestKey === "prompt") {
|
|
601
|
-
continue;
|
|
602
|
-
}
|
|
603
|
-
const $requestRow = $("<tr>").append([
|
|
604
|
-
$("<td>").append(requestKey),
|
|
605
|
-
$("<td>").append(
|
|
606
|
-
typeof request[requestKey] === "string"
|
|
607
|
-
? request[requestKey]
|
|
608
|
-
: JSON.stringify(request[requestKey]),
|
|
609
|
-
),
|
|
610
|
-
]);
|
|
611
|
-
$requestTable.append($requestRow);
|
|
612
|
-
}
|
|
613
|
-
return $("<div>").append().append($requestTable);
|
|
614
|
-
}
|
|
615
|
-
|
|
616
|
-
Handlebars.registerHelper("joinEach", (context, separator, options) => {
|
|
617
|
-
return context.map(options.fn).join(separator);
|
|
618
|
-
});
|
|
619
|
-
|
|
620
|
-
const predictionTemplate = Handlebars.compile(`
|
|
621
|
-
{{#if metrics}}
|
|
622
|
-
<div>
|
|
623
|
-
[
|
|
624
|
-
{{#joinEach metrics " | " }}
|
|
625
|
-
<span{{#if class}} class="{{class}}"{{/if}}>
|
|
626
|
-
{{~name}}: {{value~}}
|
|
627
|
-
</span>
|
|
628
|
-
{{/joinEach}}
|
|
629
|
-
]
|
|
630
|
-
</div>
|
|
631
|
-
{{/if}}
|
|
632
|
-
<div class="prediction">
|
|
633
|
-
<strong><a href="#" class="load-requests">
|
|
634
|
-
Prediction
|
|
635
|
-
{{~#if runDisplayName~}}
|
|
636
|
-
[{{runDisplayName}}]
|
|
637
|
-
{{~/if~}}
|
|
638
|
-
{{~#if prediction.reference_index~}}
|
|
639
|
-
[ref {{prediction.reference_index}}]
|
|
640
|
-
{{~/if~}}
|
|
641
|
-
{{~#if numTrainTrials~}}
|
|
642
|
-
\{trial {{prediction.train_trial_index~}} \}
|
|
643
|
-
{{~/if~}}
|
|
644
|
-
</a></strong>:
|
|
645
|
-
<span class="prediction-text">{{{predictedText}}}</span>
|
|
646
|
-
</div>
|
|
647
|
-
<div class="request" style="display: none">Loading...</div>
|
|
648
|
-
`);
|
|
649
|
-
|
|
650
|
-
function renderPredictions(
|
|
651
|
-
runSpec,
|
|
652
|
-
runSuite,
|
|
653
|
-
runDisplayName,
|
|
654
|
-
predictions,
|
|
655
|
-
instanceKeyToDiv,
|
|
656
|
-
$instances,
|
|
657
|
-
) {
|
|
658
|
-
// Add the predictions and statistics from `scenarioState` and `perInstanceStats` to the appropriate divs for each instance.
|
|
659
|
-
// Each instance give rises to multiple requests (whose results are in `scenarioState`):
|
|
660
|
-
//
|
|
661
|
-
// Identity of the instance (instanceKey):
|
|
662
|
-
// - instance_id
|
|
663
|
-
// - perturbation
|
|
664
|
-
// Replication:
|
|
665
|
-
// - train_trial_index
|
|
666
|
-
// Instance-level decompositions:
|
|
667
|
-
// - for adapter method = language_modeling, a long instance is broken up into multiple requests
|
|
668
|
-
// - for adapter method = multiple_choice_separate_original, have one request per reference
|
|
669
|
-
// - for adapter method = multiple_choice_separate_calibrated, have two requests per reference
|
|
670
|
-
const method = runSpec.adapter_spec.method;
|
|
671
|
-
const numTrainTrials =
|
|
672
|
-
predictions.reduce(
|
|
673
|
-
(m, prediction) => Math.max(m, prediction.train_trial_index),
|
|
674
|
-
-1,
|
|
675
|
-
) + 1;
|
|
676
|
-
|
|
677
|
-
// Look for the default metrics for the group
|
|
678
|
-
const metricNames = [];
|
|
679
|
-
schema.run_groups.forEach((runGroup) => {
|
|
680
|
-
if (!runSpec.groups.includes(runGroup.name)) {
|
|
681
|
-
return;
|
|
682
|
-
}
|
|
683
|
-
getMetricNames(runGroup).forEach((name) => {
|
|
684
|
-
if (!metricNames.includes(name)) {
|
|
685
|
-
metricNames.push(name);
|
|
686
|
-
}
|
|
687
|
-
});
|
|
688
|
-
});
|
|
689
|
-
|
|
690
|
-
// For each request state (across all instances)...
|
|
691
|
-
predictions.forEach((prediction) => {
|
|
692
|
-
const $instanceDiv = instanceKeyToDiv[predictionInstanceKey(prediction)];
|
|
693
|
-
if (!$instanceDiv) {
|
|
694
|
-
console.error("Not found: " + predictionInstanceKey(prediction));
|
|
695
|
-
return;
|
|
696
|
-
}
|
|
697
|
-
|
|
698
|
-
// Traverse into the prediction div within the instance div
|
|
699
|
-
const $instance = $instanceDiv.find(".predictions");
|
|
700
|
-
|
|
701
|
-
// For multiple_choice_separate_*, only render the request state for the predicted index
|
|
702
|
-
const predictedIndex = prediction.stats["predicted_index"];
|
|
703
|
-
if (
|
|
704
|
-
prediction.reference_index !== undefined &&
|
|
705
|
-
predictedIndex !== undefined &&
|
|
706
|
-
prediction.reference_index !== predictedIndex
|
|
707
|
-
) {
|
|
708
|
-
return;
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
// Render the prediction
|
|
712
|
-
// TODO: Escape the HTML in predictedText properly
|
|
713
|
-
let predictedText = prediction.predicted_text.trim();
|
|
714
|
-
if (method === "multiple_choice_joint") {
|
|
715
|
-
if (prediction.mapped_output !== undefined) {
|
|
716
|
-
predictedText = truncateMiddle(prediction.mapped_output.trim(), 30);
|
|
717
|
-
} else {
|
|
718
|
-
predictedText =
|
|
719
|
-
truncateMiddle(predictedText, 30) +
|
|
720
|
-
'<span style="color: gray"> (unmapped)</span>';
|
|
721
|
-
}
|
|
722
|
-
} else if (method.startsWith("multiple_choice_separate_")) {
|
|
723
|
-
// For adapter method = separate, prediction starts with the prompt, strip it out
|
|
724
|
-
if (prediction.truncated_predicted_text !== undefined) {
|
|
725
|
-
predictedText =
|
|
726
|
-
'<span style="color: lightgray">...</span> ' +
|
|
727
|
-
truncateMiddle(prediction.truncated_predicted_text.trim(), 30);
|
|
728
|
-
} else {
|
|
729
|
-
console.warn(
|
|
730
|
-
"Prompt was not stripped from predicted text",
|
|
731
|
-
predictedText,
|
|
732
|
-
);
|
|
733
|
-
predictedText = truncateMiddle(predictedText, 30);
|
|
734
|
-
}
|
|
735
|
-
} else if (method === "language_modeling") {
|
|
736
|
-
// For language modeling, first token is just padding, so strip it out
|
|
737
|
-
if (prediction.truncated_predicted_text !== undefined) {
|
|
738
|
-
predictedText = truncateMiddle(
|
|
739
|
-
prediction.truncated_predicted_text.trim(),
|
|
740
|
-
30,
|
|
741
|
-
);
|
|
742
|
-
} else {
|
|
743
|
-
console.warn(
|
|
744
|
-
"First token was not stripped from predicted text",
|
|
745
|
-
predictedText,
|
|
746
|
-
);
|
|
747
|
-
predictedText = truncateMiddle(predictedText, 30);
|
|
748
|
-
}
|
|
749
|
-
}
|
|
750
|
-
const metrics = [];
|
|
751
|
-
metricNames.forEach((metricName) => {
|
|
752
|
-
const metricValue = prediction.stats[metricName];
|
|
753
|
-
if (metricValue !== undefined) {
|
|
754
|
-
const displayName = schema.metricsField(metricName).display_name;
|
|
755
|
-
const statClass = getStatClass(metricName, metricValue);
|
|
756
|
-
metrics.push({
|
|
757
|
-
name: displayName,
|
|
758
|
-
value: metricValue,
|
|
759
|
-
class: statClass,
|
|
760
|
-
});
|
|
761
|
-
}
|
|
762
|
-
});
|
|
763
|
-
$instance.append(
|
|
764
|
-
predictionTemplate({
|
|
765
|
-
runDisplayName,
|
|
766
|
-
prediction,
|
|
767
|
-
predictedText,
|
|
768
|
-
numTrainTrials,
|
|
769
|
-
metrics,
|
|
770
|
-
}),
|
|
771
|
-
);
|
|
772
|
-
});
|
|
773
|
-
$instances.find("a.load-requests").click((event) => {
|
|
774
|
-
$(event.target).closest(".prediction").next(".request").slideToggle();
|
|
775
|
-
loadAndRenderRequests(runSpec, runSuite, instanceKeyToDiv);
|
|
776
|
-
return false;
|
|
777
|
-
});
|
|
778
|
-
}
|
|
779
|
-
|
|
780
|
-
function renderRunsDetailed(runSpecs, runNameToSuite) {
|
|
781
|
-
// Render all the `runSpecs`:
|
|
782
|
-
// 1. Adapter specification
|
|
783
|
-
// 2. Instances + predictions
|
|
784
|
-
// 3. Stats
|
|
785
|
-
// For each block, we show a table and each `runSpec` is a column.
|
|
786
|
-
const CORRECT_TAG = "correct";
|
|
787
|
-
|
|
788
|
-
// Used to hash instances.
|
|
789
|
-
function instanceKey(instance) {
|
|
790
|
-
return JSON.stringify(instance);
|
|
791
|
-
}
|
|
792
|
-
|
|
793
|
-
// Paths (parallel arrays corresponding to `runSpecs`)
|
|
794
|
-
const statsPaths = runSpecs.map((runSpec) => {
|
|
795
|
-
return statsJsonUrl(
|
|
796
|
-
getSuiteForRun(runNameToSuite, runSpec.name),
|
|
797
|
-
runSpec.name,
|
|
798
|
-
);
|
|
799
|
-
});
|
|
800
|
-
const scenarioStatePaths = runSpecs.map((runSpec) => {
|
|
801
|
-
return scenarioStateJsonUrl(
|
|
802
|
-
getSuiteForRun(runNameToSuite, runSpec.name),
|
|
803
|
-
runSpec.name,
|
|
804
|
-
);
|
|
805
|
-
});
|
|
806
|
-
const runSpecPaths = runSpecs.map((runSpec) => {
|
|
807
|
-
return runSpecJsonUrl(
|
|
808
|
-
getSuiteForRun(runNameToSuite, runSpec.name),
|
|
809
|
-
runSpec.name,
|
|
810
|
-
);
|
|
811
|
-
});
|
|
812
|
-
const predictionsPaths = runSpecs.map((runSpec) => {
|
|
813
|
-
return predictionsJsonUrl(
|
|
814
|
-
getSuiteForRun(runNameToSuite, runSpec.name),
|
|
815
|
-
runSpec.name,
|
|
816
|
-
);
|
|
817
|
-
});
|
|
818
|
-
|
|
819
|
-
// Figure out short names for the runs based on where they differ
|
|
820
|
-
const runDisplayNames = findDiff(
|
|
821
|
-
runSpecs.map((runSpec) => runSpec.adapter_spec),
|
|
822
|
-
).map(renderDict);
|
|
823
|
-
|
|
824
|
-
// Setup the basic HTML elements
|
|
825
|
-
const $root = $("<div>");
|
|
826
|
-
const $scenarioInfo = $("<div>", { class: "scenario-info" });
|
|
827
|
-
$scenarioInfo.text("Loading scenario info...");
|
|
828
|
-
$root.append($scenarioInfo);
|
|
829
|
-
|
|
830
|
-
// Adapter
|
|
831
|
-
$root.append(
|
|
832
|
-
$("<a>", { name: "adapter" }).append(
|
|
833
|
-
$("<h5>").append("Adapter specification"),
|
|
834
|
-
),
|
|
835
|
-
);
|
|
836
|
-
const $adapterSpec = $("<table>");
|
|
837
|
-
if (runSpecs.length > 1) {
|
|
838
|
-
$adapterSpec.append(
|
|
839
|
-
$("<tr>")
|
|
840
|
-
.append($("<td>"))
|
|
841
|
-
.append(runDisplayNames.map((name) => $("<td>").append(name))),
|
|
842
|
-
);
|
|
843
|
-
}
|
|
844
|
-
$root.append($("<div>", { class: "table-container" }).append($adapterSpec));
|
|
845
|
-
|
|
846
|
-
// Instances
|
|
847
|
-
$root.append(
|
|
848
|
-
$("<a>", { name: "instances" }).append(
|
|
849
|
-
$("<h5>").append("Instances + predictions"),
|
|
850
|
-
),
|
|
851
|
-
);
|
|
852
|
-
const $instancesContainer = $("<div>");
|
|
853
|
-
$instancesContainer
|
|
854
|
-
.addClass("table-container")
|
|
855
|
-
.text("Loading instances...");
|
|
856
|
-
$root.append($instancesContainer);
|
|
857
|
-
|
|
858
|
-
// Metrics
|
|
859
|
-
$root.append(
|
|
860
|
-
$("<a>", { name: "metrics" }).append($("<h5>").append("All metrics")),
|
|
861
|
-
);
|
|
862
|
-
const $statsContainer = $("<div>");
|
|
863
|
-
$statsContainer.addClass("table-container").text("Loading metrics...");
|
|
864
|
-
$root.append($statsContainer);
|
|
865
|
-
|
|
866
|
-
// Render adapter specs
|
|
867
|
-
$adapterSpec.append(
|
|
868
|
-
$("<tr>")
|
|
869
|
-
.append($("<td>"))
|
|
870
|
-
.append(
|
|
871
|
-
scenarioStatePaths.map((scenarioStatePath, index) => {
|
|
872
|
-
return $("<td>")
|
|
873
|
-
.append(
|
|
874
|
-
$("<a>", { href: runSpecPaths[index] }).append("Spec JSON"),
|
|
875
|
-
)
|
|
876
|
-
.append(" | ")
|
|
877
|
-
.append(
|
|
878
|
-
$("<a>", { href: scenarioStatePaths[index] }).append(
|
|
879
|
-
"Full JSON",
|
|
880
|
-
),
|
|
881
|
-
);
|
|
882
|
-
}),
|
|
883
|
-
),
|
|
884
|
-
);
|
|
885
|
-
const keys = canonicalizeList(
|
|
886
|
-
runSpecs.map((runSpec) => Object.keys(runSpec.adapter_spec)),
|
|
887
|
-
);
|
|
888
|
-
sortListWithReferenceOrder(keys, schema.adapterFieldNames);
|
|
889
|
-
keys.forEach((key) => {
|
|
890
|
-
const field = schema.adapterField(key);
|
|
891
|
-
const helpText = describeField(field);
|
|
892
|
-
const $key = $("<td>").append(
|
|
893
|
-
$("<span>").append(helpIcon(helpText)).append(" ").append(key),
|
|
894
|
-
);
|
|
895
|
-
const $row = $("<tr>").append($key);
|
|
896
|
-
runSpecs.forEach((runSpec) => {
|
|
897
|
-
$row.append(
|
|
898
|
-
$("<td>").append(renderFieldValue(field, runSpec.adapter_spec[key])),
|
|
899
|
-
);
|
|
900
|
-
});
|
|
901
|
-
$adapterSpec.append($row);
|
|
902
|
-
});
|
|
903
|
-
|
|
904
|
-
// Render metrics/stats
|
|
905
|
-
getJSONList(
|
|
906
|
-
statsPaths,
|
|
907
|
-
(statsList) => {
|
|
908
|
-
console.log("metrics", statsList);
|
|
909
|
-
if (statsList.length && statsList.every((stat) => stat.length === 0)) {
|
|
910
|
-
$statsContainer
|
|
911
|
-
.empty()
|
|
912
|
-
.text("Metrics are currently unavailable. Please try again later.");
|
|
913
|
-
return;
|
|
914
|
-
}
|
|
915
|
-
const $stats = $("<table>");
|
|
916
|
-
const $statsSearch = $("<input>", {
|
|
917
|
-
type: "text",
|
|
918
|
-
size: 40,
|
|
919
|
-
placeholder: "Enter keywords to filter metrics",
|
|
920
|
-
});
|
|
921
|
-
if (runSpecs.length > 1) {
|
|
922
|
-
$stats.append(
|
|
923
|
-
$("<tr>")
|
|
924
|
-
.append($("<td>"))
|
|
925
|
-
.append(runDisplayNames.map((name) => $("<td>").append(name))),
|
|
926
|
-
);
|
|
927
|
-
}
|
|
928
|
-
const keys = canonicalizeList(
|
|
929
|
-
statsList.map((stats) => stats.map((stat) => stat.name)),
|
|
930
|
-
metricNameCompare,
|
|
931
|
-
);
|
|
932
|
-
keys.sort(metricNameCompare);
|
|
933
|
-
|
|
934
|
-
function update() {
|
|
935
|
-
$stats
|
|
936
|
-
.empty()
|
|
937
|
-
.append(renderGlobalStats(query, keys, statsList, statsPaths));
|
|
938
|
-
}
|
|
939
|
-
|
|
940
|
-
// Filter
|
|
941
|
-
let query = "";
|
|
942
|
-
$statsSearch.keyup((e) => {
|
|
943
|
-
query = $statsSearch.val();
|
|
944
|
-
update();
|
|
945
|
-
});
|
|
946
|
-
|
|
947
|
-
update();
|
|
948
|
-
$statsContainer.empty().append($statsSearch).append($stats);
|
|
949
|
-
},
|
|
950
|
-
[],
|
|
951
|
-
);
|
|
952
|
-
|
|
953
|
-
// Render scenario header
|
|
954
|
-
const scenarioPath = scenarioJsonUrl(
|
|
955
|
-
getSuiteForRun(runNameToSuite, runSpecs[0].name),
|
|
956
|
-
runSpecs[0].name,
|
|
957
|
-
);
|
|
958
|
-
$.get(scenarioPath, {}, (scenario) => {
|
|
959
|
-
console.log("scenario", scenario);
|
|
960
|
-
$scenarioInfo
|
|
961
|
-
.empty()
|
|
962
|
-
.append(
|
|
963
|
-
renderRunsHeader(scenario, scenarioPath, runSpecs[0].scenario_spec),
|
|
964
|
-
);
|
|
965
|
-
});
|
|
966
|
-
|
|
967
|
-
// Render scenario instances and predictions
|
|
968
|
-
const instancesPath = instancesJsonUrl(
|
|
969
|
-
getSuiteForRun(runNameToSuite, runSpecs[0].name),
|
|
970
|
-
runSpecs[0].name,
|
|
971
|
-
);
|
|
972
|
-
const instancesPromise = $.getJSON(instancesPath, {});
|
|
973
|
-
const predictionsPromise = getJSONList(predictionsPaths);
|
|
974
|
-
$.when(instancesPromise, predictionsPromise).then(
|
|
975
|
-
(instancesResult, predictions) => {
|
|
976
|
-
const instances = instancesResult[0];
|
|
977
|
-
console.log("instances", instances);
|
|
978
|
-
console.log("predictions", predictions);
|
|
979
|
-
const $instances = $("<div>");
|
|
980
|
-
const instanceKeyToDiv = renderScenarioInstances(instances, $instances);
|
|
981
|
-
// For each run / model...
|
|
982
|
-
runSpecs.forEach((runSpec, index) => {
|
|
983
|
-
renderPredictions(
|
|
984
|
-
runSpec,
|
|
985
|
-
getSuiteForRun(runNameToSuite, runSpec.name),
|
|
986
|
-
runDisplayNames[index],
|
|
987
|
-
predictions[index],
|
|
988
|
-
instanceKeyToDiv,
|
|
989
|
-
$instances,
|
|
990
|
-
);
|
|
991
|
-
});
|
|
992
|
-
$instancesContainer.empty().append($instances);
|
|
993
|
-
},
|
|
994
|
-
);
|
|
995
|
-
return $root;
|
|
996
|
-
}
|
|
997
|
-
|
|
998
|
-
function rootUrl(model) {
|
|
999
|
-
return encodeUrlParams({});
|
|
1000
|
-
}
|
|
1001
|
-
|
|
1002
|
-
function modelUrl(model) {
|
|
1003
|
-
return encodeUrlParams(
|
|
1004
|
-
Object.assign({}, urlParams, { scenarios: null, models: 1 }),
|
|
1005
|
-
);
|
|
1006
|
-
}
|
|
1007
|
-
|
|
1008
|
-
function groupUrl(group) {
|
|
1009
|
-
return encodeUrlParams(
|
|
1010
|
-
Object.assign({}, urlParams, { scenarios: null, group }),
|
|
1011
|
-
);
|
|
1012
|
-
}
|
|
1013
|
-
|
|
1014
|
-
function metricUrl(group) {
|
|
1015
|
-
// e.g., Calibration
|
|
1016
|
-
return (
|
|
1017
|
-
encodeUrlParams(
|
|
1018
|
-
Object.assign({}, urlParams, { group: "core_scenarios" }),
|
|
1019
|
-
) +
|
|
1020
|
-
"#" +
|
|
1021
|
-
group.display_name
|
|
1022
|
-
);
|
|
1023
|
-
}
|
|
1024
|
-
|
|
1025
|
-
function groupShortDisplayName(group) {
|
|
1026
|
-
return group.short_display_name || group.display_name || group.name;
|
|
1027
|
-
}
|
|
1028
|
-
|
|
1029
|
-
function metricDisplayName(metric) {
|
|
1030
|
-
return (
|
|
1031
|
-
(metric.display_name || metric.name) +
|
|
1032
|
-
(metric.perturbation_name
|
|
1033
|
-
? " (perturbation: " + metric.perturbation_name + ")"
|
|
1034
|
-
: "")
|
|
1035
|
-
);
|
|
1036
|
-
}
|
|
1037
|
-
|
|
1038
|
-
function renderModelList() {
|
|
1039
|
-
const $result = $("<div>", { class: "col-sm-3" });
|
|
1040
|
-
const models = schema.models;
|
|
1041
|
-
const numModels = models.filter((model) => !model.todo).length;
|
|
1042
|
-
$result.append(
|
|
1043
|
-
$("<div>", { class: "list-header" }).append(`${numModels} models`),
|
|
1044
|
-
);
|
|
1045
|
-
models.forEach((model) => {
|
|
1046
|
-
const extra = model.todo ? " list-item-todo" : "";
|
|
1047
|
-
const display_name =
|
|
1048
|
-
model.creator_organization + " / " + model.display_name;
|
|
1049
|
-
const $item = $("<a>", {
|
|
1050
|
-
href: modelUrl(model.name),
|
|
1051
|
-
class: "list-item" + extra,
|
|
1052
|
-
title: model.description,
|
|
1053
|
-
}).append(display_name);
|
|
1054
|
-
$result.append($("<div>").append($item));
|
|
1055
|
-
});
|
|
1056
|
-
return $result;
|
|
1057
|
-
}
|
|
1058
|
-
|
|
1059
|
-
function renderScenarioList() {
|
|
1060
|
-
const $result = $("<div>", { class: "col-sm-3" });
|
|
1061
|
-
|
|
1062
|
-
const nameToGroup = {};
|
|
1063
|
-
schema.run_groups.forEach((group) => {
|
|
1064
|
-
nameToGroup[group.name] = group;
|
|
1065
|
-
});
|
|
1066
|
-
|
|
1067
|
-
// There are two types of groups we care about:
|
|
1068
|
-
// 1) Top-level groups (e.g., question_answering)
|
|
1069
|
-
// 2) Scenario-level groups (e.g., mmlu)
|
|
1070
|
-
const topGroups = schema.run_groups.filter((group) => {
|
|
1071
|
-
// Must have subgroups
|
|
1072
|
-
return (
|
|
1073
|
-
group.subgroups &&
|
|
1074
|
-
["Core scenarios", "Targeted evaluations"].includes(group.category)
|
|
1075
|
-
);
|
|
1076
|
-
});
|
|
1077
|
-
|
|
1078
|
-
const scenarioGroupNames = {};
|
|
1079
|
-
topGroups.forEach((group) => {
|
|
1080
|
-
group.subgroups.forEach((subgroupName) => {
|
|
1081
|
-
if (!nameToGroup[subgroupName].todo) {
|
|
1082
|
-
scenarioGroupNames[subgroupName] = true;
|
|
1083
|
-
}
|
|
1084
|
-
});
|
|
1085
|
-
});
|
|
1086
|
-
const numScenarios = Object.keys(scenarioGroupNames).length;
|
|
1087
|
-
|
|
1088
|
-
$result.append(
|
|
1089
|
-
$("<div>", { class: "list-header" }).append(`${numScenarios} scenarios`),
|
|
1090
|
-
);
|
|
1091
|
-
topGroups.forEach((group) => {
|
|
1092
|
-
const $group = $("<div>");
|
|
1093
|
-
$group.append(
|
|
1094
|
-
$("<a>", {
|
|
1095
|
-
href: groupUrl(group.name),
|
|
1096
|
-
class: "list-item",
|
|
1097
|
-
title: group.description,
|
|
1098
|
-
}).append(groupShortDisplayName(group)),
|
|
1099
|
-
);
|
|
1100
|
-
$group.append(
|
|
1101
|
-
$("<ul>").append(
|
|
1102
|
-
group.subgroups.map((subgroupName) => {
|
|
1103
|
-
const subgroup = nameToGroup[subgroupName];
|
|
1104
|
-
const extra = subgroup.todo ? " list-item-todo" : "";
|
|
1105
|
-
const $item = $("<a>", {
|
|
1106
|
-
href: groupUrl(subgroup.name),
|
|
1107
|
-
class: "list-item" + extra,
|
|
1108
|
-
title: subgroup.description,
|
|
1109
|
-
}).append(groupShortDisplayName(subgroup));
|
|
1110
|
-
return $("<li>").append($item);
|
|
1111
|
-
}),
|
|
1112
|
-
),
|
|
1113
|
-
);
|
|
1114
|
-
$result.append($group);
|
|
1115
|
-
});
|
|
1116
|
-
return $result;
|
|
1117
|
-
}
|
|
1118
|
-
|
|
1119
|
-
function renderMetricsList() {
|
|
1120
|
-
const $result = $("<div>", { class: "col-sm-3" });
|
|
1121
|
-
|
|
1122
|
-
// Information about individual metrics
|
|
1123
|
-
const nameToMetric = {};
|
|
1124
|
-
schema.metrics.forEach((metric) => {
|
|
1125
|
-
nameToMetric[metric.name] = metric;
|
|
1126
|
-
});
|
|
1127
|
-
|
|
1128
|
-
// Some metric groups depend on environment variables like ${main_name}
|
|
1129
|
-
// Look at the places where that's being used across the runs.
|
|
1130
|
-
// For each metric group, compute the deduped list of main_names.
|
|
1131
|
-
// Example: accuracy => [quasi_exact_match, f1_score, ...]
|
|
1132
|
-
const metricGroupToMainNames = {};
|
|
1133
|
-
schema.run_groups.forEach((group) => {
|
|
1134
|
-
if (group.metric_groups) {
|
|
1135
|
-
group.metric_groups.forEach((metricGroup) => {
|
|
1136
|
-
if (group.environment.main_name) {
|
|
1137
|
-
const old = metricGroupToMainNames[metricGroup] || [];
|
|
1138
|
-
if (!old.includes(group.environment.main_name)) {
|
|
1139
|
-
metricGroupToMainNames[metricGroup] = old.concat([
|
|
1140
|
-
group.environment.main_name,
|
|
1141
|
-
]);
|
|
1142
|
-
}
|
|
1143
|
-
}
|
|
1144
|
-
});
|
|
1145
|
-
}
|
|
1146
|
-
});
|
|
1147
|
-
|
|
1148
|
-
const metricGroups = schema.metric_groups
|
|
1149
|
-
.filter((group) => {
|
|
1150
|
-
// Skip a group if "_detailed" exists.
|
|
1151
|
-
return !schema.metric_groups.some(
|
|
1152
|
-
(group2) => group2.name === group.name + "_detailed",
|
|
1153
|
-
);
|
|
1154
|
-
})
|
|
1155
|
-
.map((group) => {
|
|
1156
|
-
// Expand the metrics for this metric group
|
|
1157
|
-
const newMetrics = [];
|
|
1158
|
-
group.metrics.forEach((metric) => {
|
|
1159
|
-
if (metric.name === "${main_name}") {
|
|
1160
|
-
(
|
|
1161
|
-
metricGroupToMainNames[group.name.replace("_detailed", "")] || []
|
|
1162
|
-
).forEach((name) => {
|
|
1163
|
-
newMetrics.push(Object.assign({}, metric, { name }));
|
|
1164
|
-
});
|
|
1165
|
-
} else {
|
|
1166
|
-
newMetrics.push(metric);
|
|
1167
|
-
}
|
|
1168
|
-
});
|
|
1169
|
-
return Object.assign({}, group, { metrics: newMetrics });
|
|
1170
|
-
});
|
|
1171
|
-
|
|
1172
|
-
// Count the number of metrics
|
|
1173
|
-
const metricNames = {};
|
|
1174
|
-
metricGroups.forEach((group) => {
|
|
1175
|
-
group.metrics.forEach((metric) => {
|
|
1176
|
-
metricNames[metric.name] = true;
|
|
1177
|
-
});
|
|
1178
|
-
});
|
|
1179
|
-
const numMetrics = Object.keys(metricNames).length;
|
|
1180
|
-
|
|
1181
|
-
$result.append(
|
|
1182
|
-
$("<div>", { class: "list-header" }).append(`${numMetrics} metrics`),
|
|
1183
|
-
);
|
|
1184
|
-
metricGroups.forEach((group) => {
|
|
1185
|
-
const $group = $("<div>");
|
|
1186
|
-
$group.append(
|
|
1187
|
-
$("<a>", {
|
|
1188
|
-
href: metricUrl(group),
|
|
1189
|
-
class: "list-item",
|
|
1190
|
-
title: group.description,
|
|
1191
|
-
}).append(groupShortDisplayName(group)),
|
|
1192
|
-
);
|
|
1193
|
-
$group.append(
|
|
1194
|
-
$("<ul>").append(
|
|
1195
|
-
group.metrics.map((metricRef) => {
|
|
1196
|
-
// Get the information from the metric (name, display_name, description)
|
|
1197
|
-
const metric = Object.assign(
|
|
1198
|
-
{},
|
|
1199
|
-
metricRef,
|
|
1200
|
-
nameToMetric[metricRef.name] || metricRef,
|
|
1201
|
-
);
|
|
1202
|
-
const $item = $("<a>", {
|
|
1203
|
-
class: "list-item",
|
|
1204
|
-
title: metric.description,
|
|
1205
|
-
}).append(metricDisplayName(metric));
|
|
1206
|
-
return $("<li>").append($item);
|
|
1207
|
-
}),
|
|
1208
|
-
),
|
|
1209
|
-
);
|
|
1210
|
-
$result.append($group);
|
|
1211
|
-
});
|
|
1212
|
-
return $result;
|
|
1213
|
-
}
|
|
1214
|
-
|
|
1215
|
-
function helmLogo() {
|
|
1216
|
-
return $("<a>", { href: rootUrl() }).append(
|
|
1217
|
-
$("<img>", {
|
|
1218
|
-
src: "images/helm-logo.png",
|
|
1219
|
-
width: "500px",
|
|
1220
|
-
class: "mx-auto d-block",
|
|
1221
|
-
}),
|
|
1222
|
-
);
|
|
1223
|
-
}
|
|
1224
|
-
|
|
1225
|
-
function button(text, href) {
|
|
1226
|
-
return $("<a>", { href, class: "main-link btn btn-lg m-5 px-5" }).append(
|
|
1227
|
-
text,
|
|
1228
|
-
);
|
|
1229
|
-
}
|
|
1230
|
-
|
|
1231
|
-
function renderMainPage() {
|
|
1232
|
-
const $result = $("<div>", { class: "row" });
|
|
1233
|
-
|
|
1234
|
-
$result.append($("<div>", { class: "col-sm-12" }).append(helmLogo()));
|
|
1235
|
-
|
|
1236
|
-
const $blog = button(
|
|
1237
|
-
"Blog post",
|
|
1238
|
-
"https://crfm.stanford.edu/2022/11/17/helm.html",
|
|
1239
|
-
);
|
|
1240
|
-
const $paper = button("Paper", "https://arxiv.org/pdf/2211.09110.pdf");
|
|
1241
|
-
const $code = button("GitHub", "https://github.com/stanford-crfm/helm");
|
|
1242
|
-
$result.append(
|
|
1243
|
-
$("<div>", { class: "col-sm-12" }).append(
|
|
1244
|
-
$("<div>", { class: "text-center" }).append([$blog, $paper, $code]),
|
|
1245
|
-
),
|
|
1246
|
-
);
|
|
1247
|
-
|
|
1248
|
-
const $description = $("<div>", { class: "col-sm-8" }).append([
|
|
1249
|
-
"A language model takes in text and produces text:",
|
|
1250
|
-
$("<div>", { class: "text-center" }).append(
|
|
1251
|
-
$("<img>", {
|
|
1252
|
-
src: "images/language-model-helm.png",
|
|
1253
|
-
width: "600px",
|
|
1254
|
-
style: "width: 600px; margin-left: 130px",
|
|
1255
|
-
}),
|
|
1256
|
-
),
|
|
1257
|
-
"Despite their simplicity, language models are increasingly functioning as the foundation for almost all language technologies from question answering to summarization.",
|
|
1258
|
-
" ",
|
|
1259
|
-
"But their immense capabilities and risks are not well understood.",
|
|
1260
|
-
" ",
|
|
1261
|
-
"Holistic Evaluation of Language Models (HELM) is a living benchmark that aims to improve the transparency of language models.",
|
|
1262
|
-
]);
|
|
1263
|
-
|
|
1264
|
-
function organization(src, href, height) {
|
|
1265
|
-
return $("<div>", { class: "logo-item" }).append(
|
|
1266
|
-
$("<a>", { href }).append($("<img>", { src, height })),
|
|
1267
|
-
);
|
|
1268
|
-
}
|
|
1269
|
-
const defaultSize = 36;
|
|
1270
|
-
const largerSize = 50;
|
|
1271
|
-
const $organizations = $("<div>", { class: "logo-container" }).append([
|
|
1272
|
-
organization(
|
|
1273
|
-
"images/organizations/ai21.png",
|
|
1274
|
-
"https://www.ai21.com/",
|
|
1275
|
-
defaultSize,
|
|
1276
|
-
),
|
|
1277
|
-
organization(
|
|
1278
|
-
"images/organizations/anthropic.png",
|
|
1279
|
-
"https://www.anthropic.com/",
|
|
1280
|
-
defaultSize,
|
|
1281
|
-
),
|
|
1282
|
-
organization(
|
|
1283
|
-
"images/organizations/bigscience.png",
|
|
1284
|
-
"https://bigscience.huggingface.co/",
|
|
1285
|
-
largerSize,
|
|
1286
|
-
),
|
|
1287
|
-
organization(
|
|
1288
|
-
"images/organizations/cohere.png",
|
|
1289
|
-
"https://cohere.ai/",
|
|
1290
|
-
defaultSize,
|
|
1291
|
-
),
|
|
1292
|
-
organization(
|
|
1293
|
-
"images/organizations/eleutherai.png",
|
|
1294
|
-
"https://www.eleuther.ai/",
|
|
1295
|
-
largerSize,
|
|
1296
|
-
),
|
|
1297
|
-
organization(
|
|
1298
|
-
"images/organizations/google.png",
|
|
1299
|
-
"https://ai.google/",
|
|
1300
|
-
defaultSize,
|
|
1301
|
-
),
|
|
1302
|
-
organization(
|
|
1303
|
-
"images/organizations/meta.png",
|
|
1304
|
-
"https://ai.facebook.com/",
|
|
1305
|
-
largerSize,
|
|
1306
|
-
),
|
|
1307
|
-
organization(
|
|
1308
|
-
"images/organizations/microsoft.png",
|
|
1309
|
-
"https://turing.microsoft.com/",
|
|
1310
|
-
defaultSize,
|
|
1311
|
-
),
|
|
1312
|
-
organization(
|
|
1313
|
-
"images/organizations/nvidia.png",
|
|
1314
|
-
"https://www.nvidia.com/en-us/research/machine-learning-artificial-intelligence/",
|
|
1315
|
-
largerSize,
|
|
1316
|
-
),
|
|
1317
|
-
organization(
|
|
1318
|
-
"images/organizations/openai.png",
|
|
1319
|
-
"https://openai.com/",
|
|
1320
|
-
defaultSize,
|
|
1321
|
-
),
|
|
1322
|
-
organization(
|
|
1323
|
-
"images/organizations/tsinghua-keg.png",
|
|
1324
|
-
"https://keg.cs.tsinghua.edu.cn/",
|
|
1325
|
-
largerSize,
|
|
1326
|
-
),
|
|
1327
|
-
organization(
|
|
1328
|
-
"images/organizations/yandex.png",
|
|
1329
|
-
"https://yandex.com/",
|
|
1330
|
-
defaultSize,
|
|
1331
|
-
),
|
|
1332
|
-
organization(
|
|
1333
|
-
"images/organizations/together.png",
|
|
1334
|
-
"https://together.xyz/",
|
|
1335
|
-
defaultSize,
|
|
1336
|
-
),
|
|
1337
|
-
]);
|
|
1338
|
-
$result.append($organizations);
|
|
1339
|
-
|
|
1340
|
-
$description.append(
|
|
1341
|
-
$("<ol>").append([
|
|
1342
|
-
$("<li>")
|
|
1343
|
-
.append(
|
|
1344
|
-
"<b>Broad coverage and recognition of incompleteness</b>. We define a taxonomy over the scenarios we would ideally like to evaluate, select scenarios and metrics to cover the space and make explicit what is missing.",
|
|
1345
|
-
)
|
|
1346
|
-
.append(
|
|
1347
|
-
$("<div>", { class: "text-center" }).append(
|
|
1348
|
-
$("<img>", {
|
|
1349
|
-
src: "images/taxonomy-scenarios.png",
|
|
1350
|
-
width: "300px",
|
|
1351
|
-
}),
|
|
1352
|
-
),
|
|
1353
|
-
),
|
|
1354
|
-
$("<li>")
|
|
1355
|
-
.append(
|
|
1356
|
-
"<b>Multi-metric measurement</b>. Rather than focus on isolated metrics such as accuracy, we simultaneously measure multiple metrics (e.g., accuracy, robustness, calibration, efficiency) for each scenario, allowing analysis of tradeoffs.",
|
|
1357
|
-
)
|
|
1358
|
-
.append(
|
|
1359
|
-
$("<div>", { class: "text-center" }).append(
|
|
1360
|
-
$("<img>", {
|
|
1361
|
-
src: "images/scenarios-by-metrics.png",
|
|
1362
|
-
width: "300px",
|
|
1363
|
-
}),
|
|
1364
|
-
),
|
|
1365
|
-
),
|
|
1366
|
-
$("<li>")
|
|
1367
|
-
.append(
|
|
1368
|
-
'<b>Standardization</b>. We evaluate all the models that we have access to on the same scenarios with the same adaptation strategy (e.g., prompting), allowing for controlled comparisons. Thanks to all the companies for providing API access to the limited-access and closed models and <a href="https://together.xyz">Together</a> for providing the infrastructure to run the open models.',
|
|
1369
|
-
)
|
|
1370
|
-
.append($organizations),
|
|
1371
|
-
$("<li>").append(
|
|
1372
|
-
"<b>Transparency</b>. All the scenarios, predictions, prompts, code are available for further analysis on this website. We invite you to click below to explore!",
|
|
1373
|
-
),
|
|
1374
|
-
]),
|
|
1375
|
-
);
|
|
1376
|
-
|
|
1377
|
-
$result.append([
|
|
1378
|
-
$("<div>", { class: "col-sm-2" }),
|
|
1379
|
-
$description,
|
|
1380
|
-
$("<div>", { class: "col-sm-2" }),
|
|
1381
|
-
]);
|
|
1382
|
-
|
|
1383
|
-
const $models = renderModelList();
|
|
1384
|
-
const $scenarios = renderScenarioList();
|
|
1385
|
-
const $metrics = renderMetricsList();
|
|
1386
|
-
$result.append([
|
|
1387
|
-
$("<div>", { class: "col-sm-2" }),
|
|
1388
|
-
$models,
|
|
1389
|
-
$scenarios,
|
|
1390
|
-
$metrics,
|
|
1391
|
-
$("<div>", { class: "col-sm-1" }),
|
|
1392
|
-
]);
|
|
1393
|
-
|
|
1394
|
-
return $result;
|
|
1395
|
-
}
|
|
1396
|
-
|
|
1397
|
-
function renderCell(cell) {
|
|
1398
|
-
let value = cell.display_value || cell.value;
|
|
1399
|
-
if (value == null) {
|
|
1400
|
-
value = "-";
|
|
1401
|
-
}
|
|
1402
|
-
if (typeof value === "number") {
|
|
1403
|
-
value = Math.round(value * 1000) / 1000;
|
|
1404
|
-
}
|
|
1405
|
-
if (cell.lower_is_better === true) {
|
|
1406
|
-
value += " \u2193"; // DOWN_ARROW
|
|
1407
|
-
}
|
|
1408
|
-
if (cell.lower_is_better === false) {
|
|
1409
|
-
value += " \u2191"; // UP_ARROW
|
|
1410
|
-
}
|
|
1411
|
-
const $value = $("<span>");
|
|
1412
|
-
if (cell.markdown && value) {
|
|
1413
|
-
value = renderMarkdown("" + value);
|
|
1414
|
-
$value.append(value);
|
|
1415
|
-
} else {
|
|
1416
|
-
$value.text(value);
|
|
1417
|
-
}
|
|
1418
|
-
if (cell.style) {
|
|
1419
|
-
$value.css(cell.style);
|
|
1420
|
-
}
|
|
1421
|
-
if (cell.description) {
|
|
1422
|
-
$value.attr("title", cell.description);
|
|
1423
|
-
}
|
|
1424
|
-
const $linkedValue = cell.href
|
|
1425
|
-
? $("<a>", { href: cell.href }).append($value)
|
|
1426
|
-
: $value;
|
|
1427
|
-
return $("<td>").append($linkedValue);
|
|
1428
|
-
}
|
|
1429
|
-
|
|
1430
|
-
function renderTableHeader(table, sortColumnIndex) {
|
|
1431
|
-
const $tableHeader = $("<thead>");
|
|
1432
|
-
const $row = $("<tr>").append(
|
|
1433
|
-
table.header.map((cell, index) => {
|
|
1434
|
-
const $cell = renderCell(cell);
|
|
1435
|
-
const sortOrder =
|
|
1436
|
-
cell.lower_is_better === false
|
|
1437
|
-
? "desc"
|
|
1438
|
-
: cell.lower_is_better === true
|
|
1439
|
-
? "asc"
|
|
1440
|
-
: "";
|
|
1441
|
-
if (sortOrder) {
|
|
1442
|
-
const $sortLink = $("<a>", { href: "#" })
|
|
1443
|
-
.append("sort")
|
|
1444
|
-
.click(() => {
|
|
1445
|
-
const $table = $tableHeader.parent("table");
|
|
1446
|
-
$table.find("tbody").remove();
|
|
1447
|
-
$table.append(renderTableBody(table, index, sortOrder));
|
|
1448
|
-
$cell.parent().find("td").removeClass("table-sort-column");
|
|
1449
|
-
$cell.addClass("table-sort-column");
|
|
1450
|
-
return false;
|
|
1451
|
-
});
|
|
1452
|
-
$cell.append(" [ ").append($sortLink).append(" ]");
|
|
1453
|
-
}
|
|
1454
|
-
if (sortColumnIndex === index) {
|
|
1455
|
-
$cell.addClass("table-sort-column");
|
|
1456
|
-
}
|
|
1457
|
-
return $cell;
|
|
1458
|
-
}),
|
|
1459
|
-
);
|
|
1460
|
-
$tableHeader.append($row);
|
|
1461
|
-
return $tableHeader;
|
|
1462
|
-
}
|
|
1463
|
-
|
|
1464
|
-
/**
|
|
1465
|
-
* Returns a jQuery <tbody> element that contains the rows in the given table.
|
|
1466
|
-
* @param {Object} table - The table to render. Should conform to the Python Table
|
|
1467
|
-
* dataclass schema.
|
|
1468
|
-
* @param {number} [sortColumnIndex] - If set and >= 0, the index of the column to
|
|
1469
|
-
* sort by.
|
|
1470
|
-
* @param {string} [sortOrder] - If set, determines whether to sort in ascending or
|
|
1471
|
-
* descending order. Should be either "asc" or "desc". If unset, defaults to
|
|
1472
|
-
* "asc".
|
|
1473
|
-
*/
|
|
1474
|
-
function renderTableBody(table, sortColumnIndex, sortOrder) {
|
|
1475
|
-
$tableBody = $("<tbody>");
|
|
1476
|
-
const rows = table.rows.slice();
|
|
1477
|
-
if (sortColumnIndex !== undefined && sortColumnIndex >= 0) {
|
|
1478
|
-
rows.sort((row0, row1) => {
|
|
1479
|
-
const cellValues = [row0, row1].map((row) => {
|
|
1480
|
-
const cellValue = row[sortColumnIndex].value;
|
|
1481
|
-
return cellValue !== undefined
|
|
1482
|
-
? cellValue
|
|
1483
|
-
: // Missing values are always last in the sort order
|
|
1484
|
-
sortOrder === "desc"
|
|
1485
|
-
? -Infinity
|
|
1486
|
-
: Infinity;
|
|
1487
|
-
});
|
|
1488
|
-
// Handle Infinity === Infinity or -Infinity === -Infinity
|
|
1489
|
-
return cellValues[0] === cellValues[1]
|
|
1490
|
-
? 0
|
|
1491
|
-
: sortOrder === "desc"
|
|
1492
|
-
? cellValues[1] - cellValues[0]
|
|
1493
|
-
: cellValues[0] - cellValues[1];
|
|
1494
|
-
});
|
|
1495
|
-
}
|
|
1496
|
-
rows.forEach((row) => {
|
|
1497
|
-
const $cells = row.map(renderCell);
|
|
1498
|
-
if (sortColumnIndex !== undefined) {
|
|
1499
|
-
$cells[sortColumnIndex].addClass("table-sort-column");
|
|
1500
|
-
}
|
|
1501
|
-
const $row = $("<tr>").append($cells);
|
|
1502
|
-
$tableBody.append($row);
|
|
1503
|
-
});
|
|
1504
|
-
return $tableBody;
|
|
1505
|
-
}
|
|
1506
|
-
|
|
1507
|
-
function renderTable(table) {
|
|
1508
|
-
const $output = $("<div>");
|
|
1509
|
-
$output.append(
|
|
1510
|
-
$("<h3>").append($("<a>", { name: table.title }).append(table.title)),
|
|
1511
|
-
);
|
|
1512
|
-
const $table = $("<table>", { class: "query-table results-table" });
|
|
1513
|
-
let sortColumnIndex = undefined;
|
|
1514
|
-
let sortOrder = undefined;
|
|
1515
|
-
for (let i = 0; i < table.header.length; i++) {
|
|
1516
|
-
if (table.header[i].lower_is_better !== undefined) {
|
|
1517
|
-
sortColumnIndex = i;
|
|
1518
|
-
sortOrder =
|
|
1519
|
-
table.header[i].lower_is_better === false
|
|
1520
|
-
? "desc"
|
|
1521
|
-
: table.header[i].lower_is_better === true
|
|
1522
|
-
? "asc"
|
|
1523
|
-
: undefined;
|
|
1524
|
-
break;
|
|
1525
|
-
}
|
|
1526
|
-
}
|
|
1527
|
-
$table.append(renderTableHeader(table, sortColumnIndex));
|
|
1528
|
-
$table.append(renderTableBody(table, sortColumnIndex, sortOrder));
|
|
1529
|
-
$output.append($table);
|
|
1530
|
-
|
|
1531
|
-
// Links
|
|
1532
|
-
if (table.links.length > 0) {
|
|
1533
|
-
$output.append(
|
|
1534
|
-
renderItems(
|
|
1535
|
-
table.links.map((link) => {
|
|
1536
|
-
// replace everything before benchmark output, as it is incorrect
|
|
1537
|
-
let myLink = link.href;
|
|
1538
|
-
const delimiter = "benchmark_output";
|
|
1539
|
-
let parts = myLink.split(delimiter);
|
|
1540
|
-
if (parts.length > 1) {
|
|
1541
|
-
let result = BENCHMARK_OUTPUT_BASE_URL + delimiter + parts[1];
|
|
1542
|
-
return $("<a>", { href: result }).append(link.text);
|
|
1543
|
-
} else {
|
|
1544
|
-
console.log(
|
|
1545
|
-
"benchmark_output not found, invalid link for ",
|
|
1546
|
-
table,
|
|
1547
|
-
);
|
|
1548
|
-
}
|
|
1549
|
-
return $("<a>", { href: link.href }).append(link.text);
|
|
1550
|
-
}),
|
|
1551
|
-
),
|
|
1552
|
-
);
|
|
1553
|
-
}
|
|
1554
|
-
return $output;
|
|
1555
|
-
}
|
|
1556
|
-
|
|
1557
|
-
function renderTables(tables, path) {
|
|
1558
|
-
const $output = $("<div>");
|
|
1559
|
-
|
|
1560
|
-
// Links to tables
|
|
1561
|
-
const $jsonLink = $("<a>", { href: path }).append("JSON");
|
|
1562
|
-
$output.append(
|
|
1563
|
-
renderItems(
|
|
1564
|
-
tables
|
|
1565
|
-
.map((table) => {
|
|
1566
|
-
return $("<a>", { href: "#" + table.title }).append(table.title);
|
|
1567
|
-
})
|
|
1568
|
-
.concat([$jsonLink]),
|
|
1569
|
-
),
|
|
1570
|
-
);
|
|
1571
|
-
|
|
1572
|
-
$output.append(
|
|
1573
|
-
tables.map((table) => {
|
|
1574
|
-
return $("<div>", { class: "table-container", id: table.title }).append(
|
|
1575
|
-
renderTable(table),
|
|
1576
|
-
);
|
|
1577
|
-
}),
|
|
1578
|
-
);
|
|
1579
|
-
|
|
1580
|
-
return $output;
|
|
1581
|
-
}
|
|
1582
|
-
|
|
1583
|
-
//////////////////////////////////////////////////////////////////////////////
|
|
1584
|
-
// Main //
|
|
1585
|
-
//////////////////////////////////////////////////////////////////////////////
|
|
1586
|
-
const $main = $("#main");
|
|
1587
|
-
const $summary = $("#summary");
|
|
1588
|
-
|
|
1589
|
-
// Allow overriding release or suite with URL params for debugging.
|
|
1590
|
-
if (urlParams.release) {
|
|
1591
|
-
window.RELEASE = urlParams.release;
|
|
1592
|
-
window.SUITE = null;
|
|
1593
|
-
} else if (urlParams.suite) {
|
|
1594
|
-
window.RELEASE = null;
|
|
1595
|
-
window.SUITE = urlParams.suite;
|
|
1596
|
-
}
|
|
1597
|
-
|
|
1598
|
-
const schemaPromise = $.getJSON(schemaJsonUrl(), {}, (raw) => {
|
|
1599
|
-
console.log("schema", raw);
|
|
1600
|
-
schema = new Schema(raw);
|
|
1601
|
-
});
|
|
1602
|
-
|
|
1603
|
-
const summaryPromise = $.get(summaryJsonUrl(), {}, (response) => {
|
|
1604
|
-
console.log("summary", response);
|
|
1605
|
-
summary = response;
|
|
1606
|
-
if (window.RELEASE) {
|
|
1607
|
-
$summary.append(
|
|
1608
|
-
`Release ${summary.release} (last updated ${summary.date})`,
|
|
1609
|
-
);
|
|
1610
|
-
} else {
|
|
1611
|
-
$summary.append(`Suite ${summary.suite} (last updated ${summary.date})`);
|
|
1612
|
-
}
|
|
1613
|
-
});
|
|
1614
|
-
|
|
1615
|
-
$.when(schemaPromise, summaryPromise).then(() => {
|
|
1616
|
-
if (urlParams.models) {
|
|
1617
|
-
// Models
|
|
1618
|
-
$main.empty();
|
|
1619
|
-
$main.append(renderHeader("Models", renderModels()));
|
|
1620
|
-
refreshHashLocation();
|
|
1621
|
-
} else if (urlParams.scenarios) {
|
|
1622
|
-
// Models
|
|
1623
|
-
$main.empty();
|
|
1624
|
-
$main.append(renderHeader("Scenarios", renderScenarios()));
|
|
1625
|
-
refreshHashLocation();
|
|
1626
|
-
} else if (urlParams.plots) {
|
|
1627
|
-
// Plots
|
|
1628
|
-
$main.empty();
|
|
1629
|
-
$main.append(renderHeader("Plots", renderPlots()));
|
|
1630
|
-
refreshHashLocation();
|
|
1631
|
-
} else if (
|
|
1632
|
-
urlParams.runSpec ||
|
|
1633
|
-
urlParams.runSpecs ||
|
|
1634
|
-
urlParams.runSpecRegex
|
|
1635
|
-
) {
|
|
1636
|
-
// Predictions for a set of run specs (matching a regular expression)
|
|
1637
|
-
$main.text("Loading runs...");
|
|
1638
|
-
$.getJSON(runSpecsJsonUrl(), {}, (response) => {
|
|
1639
|
-
$main.empty();
|
|
1640
|
-
const runSpecs = response;
|
|
1641
|
-
console.log("runSpecs", runSpecs);
|
|
1642
|
-
let matcher;
|
|
1643
|
-
if (urlParams.runSpec) {
|
|
1644
|
-
// Exactly one
|
|
1645
|
-
matcher = (runSpec) => runSpec.name === urlParams.runSpec;
|
|
1646
|
-
} else if (urlParams.runSpecs) {
|
|
1647
|
-
// List
|
|
1648
|
-
const selectedRunSpecs = JSON.parse(urlParams.runSpecs);
|
|
1649
|
-
matcher = (runSpec) => selectedRunSpecs.includes(runSpec.name);
|
|
1650
|
-
} else if (urlParams.runSpecRegex) {
|
|
1651
|
-
// Regular expression
|
|
1652
|
-
const regex = new RegExp("^" + urlParams.runSpecRegex + "$");
|
|
1653
|
-
matcher = (runSpec) => regex.test(runSpec.name);
|
|
1654
|
-
} else {
|
|
1655
|
-
throw "Internal error";
|
|
1656
|
-
}
|
|
1657
|
-
const matchedRunSpecs = runSpecs.filter(matcher);
|
|
1658
|
-
if (matchedRunSpecs.length === 0) {
|
|
1659
|
-
$main.append(renderError("No matching runs"));
|
|
1660
|
-
} else {
|
|
1661
|
-
const getRunToRunSuitesPromise = window.RELEASE
|
|
1662
|
-
? $.get(runsToRunSuitesJsonUrl(), {})
|
|
1663
|
-
: $.Deferred().resolve({});
|
|
1664
|
-
getRunToRunSuitesPromise.then((runNameToSuite) => {
|
|
1665
|
-
$main.append(renderRunsDetailed(matchedRunSpecs, runNameToSuite));
|
|
1666
|
-
});
|
|
1667
|
-
}
|
|
1668
|
-
refreshHashLocation();
|
|
1669
|
-
});
|
|
1670
|
-
} else if (urlParams.runs) {
|
|
1671
|
-
// All runs (with search)
|
|
1672
|
-
$main.text("Loading runs...");
|
|
1673
|
-
$.getJSON(runSpecsJsonUrl(), {}, (runSpecs) => {
|
|
1674
|
-
$main.empty();
|
|
1675
|
-
console.log("runSpecs", runSpecs);
|
|
1676
|
-
$main.append(renderHeader("Runs", renderRunsOverview(runSpecs)));
|
|
1677
|
-
});
|
|
1678
|
-
} else if (urlParams.groups) {
|
|
1679
|
-
// All groups
|
|
1680
|
-
$main.text("Loading groups...");
|
|
1681
|
-
const path = groupsJsonUrl();
|
|
1682
|
-
$.getJSON(path, {}, (tables) => {
|
|
1683
|
-
$main.empty();
|
|
1684
|
-
console.log("groups", tables);
|
|
1685
|
-
$main.append(renderTables(tables, path));
|
|
1686
|
-
refreshHashLocation();
|
|
1687
|
-
});
|
|
1688
|
-
} else if (urlParams.group) {
|
|
1689
|
-
// Specific group
|
|
1690
|
-
$main.text("Loading group...");
|
|
1691
|
-
const path = groupJsonUrl(urlParams.group);
|
|
1692
|
-
$.getJSON(path, {}, (tables) => {
|
|
1693
|
-
$main.empty();
|
|
1694
|
-
console.log("group", tables);
|
|
1695
|
-
$main.append(renderGroupHeader());
|
|
1696
|
-
$main.append(renderTables(tables, path));
|
|
1697
|
-
refreshHashLocation();
|
|
1698
|
-
});
|
|
1699
|
-
} else {
|
|
1700
|
-
// Main landing page
|
|
1701
|
-
$main.empty();
|
|
1702
|
-
$main.append(renderMainPage());
|
|
1703
|
-
}
|
|
1704
|
-
});
|
|
1705
|
-
});
|