crfm-helm 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (184) hide show
  1. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/METADATA +29 -55
  2. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/RECORD +146 -134
  3. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  5. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  6. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  7. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  8. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  9. helm/benchmark/annotation/anthropic_red_team_annotator.py +70 -0
  10. helm/benchmark/annotation/call_center_annotator.py +247 -0
  11. helm/benchmark/annotation/financebench_annotator.py +79 -0
  12. helm/benchmark/annotation/harm_bench_annotator.py +68 -0
  13. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  14. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  15. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  16. helm/benchmark/annotation/live_qa_annotator.py +32 -45
  17. helm/benchmark/annotation/medication_qa_annotator.py +31 -44
  18. helm/benchmark/annotation/model_as_judge.py +45 -0
  19. helm/benchmark/annotation/simple_safety_tests_annotator.py +64 -0
  20. helm/benchmark/annotation/xstest_annotator.py +110 -0
  21. helm/benchmark/metrics/annotation_metrics.py +108 -0
  22. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  23. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  24. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  25. helm/benchmark/metrics/safety_metrics.py +57 -0
  26. helm/benchmark/metrics/summac/model_summac.py +3 -3
  27. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  28. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  29. helm/benchmark/metrics/vision_language/image_metrics.py +1 -1
  30. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  31. helm/benchmark/model_metadata_registry.py +3 -3
  32. helm/benchmark/presentation/test_run_entry.py +1 -0
  33. helm/benchmark/run.py +15 -0
  34. helm/benchmark/run_expander.py +56 -30
  35. helm/benchmark/run_specs/bhasa_run_specs.py +638 -0
  36. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  37. helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
  38. helm/benchmark/run_specs/experimental_run_specs.py +52 -0
  39. helm/benchmark/run_specs/finance_run_specs.py +78 -1
  40. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  41. helm/benchmark/run_specs/vlm_run_specs.py +92 -21
  42. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  43. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  44. helm/benchmark/scenarios/bhasa_scenario.py +1798 -0
  45. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  46. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  47. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  48. helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
  49. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  50. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  51. helm/benchmark/scenarios/scenario.py +1 -1
  52. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  53. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  54. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  55. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  56. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  57. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  58. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  59. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  60. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  61. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  62. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  63. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  64. helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
  65. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  66. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  67. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  68. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  69. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  70. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  71. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  72. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
  73. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
  74. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  75. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  76. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  77. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
  78. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  79. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  80. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  81. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  82. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  83. helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
  84. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  85. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  86. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  87. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
  88. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
  89. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  90. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  91. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  92. helm/benchmark/server.py +1 -6
  93. helm/benchmark/static/schema_air_bench.yaml +750 -750
  94. helm/benchmark/static/schema_bhasa.yaml +709 -0
  95. helm/benchmark/static/schema_call_center.yaml +232 -0
  96. helm/benchmark/static/schema_cleva.yaml +768 -0
  97. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  98. helm/benchmark/static/schema_ewok.yaml +367 -0
  99. helm/benchmark/static/schema_finance.yaml +55 -9
  100. helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
  101. helm/benchmark/static/schema_safety.yaml +247 -0
  102. helm/benchmark/static/schema_tables.yaml +124 -7
  103. helm/benchmark/static/schema_thai.yaml +21 -0
  104. helm/benchmark/static/schema_vhelm.yaml +96 -91
  105. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  106. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  107. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  108. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  109. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  110. helm/benchmark/static_build/assets/index-58f97dcd.js +10 -0
  111. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  112. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  113. helm/benchmark/static_build/index.html +2 -2
  114. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  115. helm/clients/ai21_client.py +71 -1
  116. helm/clients/anthropic_client.py +7 -19
  117. helm/clients/huggingface_client.py +38 -37
  118. helm/clients/nvidia_nim_client.py +35 -0
  119. helm/clients/openai_client.py +2 -3
  120. helm/clients/palmyra_client.py +25 -0
  121. helm/clients/perspective_api_client.py +11 -6
  122. helm/clients/test_client.py +4 -6
  123. helm/clients/vision_language/open_flamingo_client.py +1 -2
  124. helm/clients/vision_language/palmyra_vision_client.py +28 -13
  125. helm/common/images_utils.py +6 -0
  126. helm/common/mongo_key_value_store.py +2 -1
  127. helm/common/request.py +16 -0
  128. helm/config/model_deployments.yaml +315 -332
  129. helm/config/model_metadata.yaml +384 -110
  130. helm/config/tokenizer_configs.yaml +116 -11
  131. helm/proxy/example_queries.py +14 -21
  132. helm/proxy/services/server_service.py +1 -2
  133. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  134. helm/tokenizers/ai21_tokenizer.py +51 -59
  135. helm/tokenizers/cohere_tokenizer.py +0 -75
  136. helm/tokenizers/huggingface_tokenizer.py +0 -1
  137. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  138. helm/benchmark/static/benchmarking.css +0 -156
  139. helm/benchmark/static/benchmarking.js +0 -1705
  140. helm/benchmark/static/config.js +0 -3
  141. helm/benchmark/static/general.js +0 -122
  142. helm/benchmark/static/images/crfm-logo.png +0 -0
  143. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  144. helm/benchmark/static/images/helm-logo.png +0 -0
  145. helm/benchmark/static/images/language-model-helm.png +0 -0
  146. helm/benchmark/static/images/organizations/ai21.png +0 -0
  147. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  148. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  149. helm/benchmark/static/images/organizations/cohere.png +0 -0
  150. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  151. helm/benchmark/static/images/organizations/google.png +0 -0
  152. helm/benchmark/static/images/organizations/meta.png +0 -0
  153. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  154. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  155. helm/benchmark/static/images/organizations/openai.png +0 -0
  156. helm/benchmark/static/images/organizations/together.png +0 -0
  157. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  158. helm/benchmark/static/images/organizations/yandex.png +0 -0
  159. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  160. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  161. helm/benchmark/static/index.html +0 -68
  162. helm/benchmark/static/info-icon.png +0 -0
  163. helm/benchmark/static/json-urls.js +0 -69
  164. helm/benchmark/static/plot-captions.js +0 -27
  165. helm/benchmark/static/utils.js +0 -285
  166. helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
  167. helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
  168. helm/benchmark/window_services/ai21_window_service.py +0 -247
  169. helm/benchmark/window_services/cohere_window_service.py +0 -101
  170. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  171. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  172. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  173. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  174. helm/tokenizers/ice_tokenizer.py +0 -30
  175. helm/tokenizers/test_ice_tokenizer.py +0 -57
  176. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/LICENSE +0 -0
  177. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/entry_points.txt +0 -0
  178. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.3.dist-info}/top_level.txt +0 -0
  179. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  180. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  181. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/__init__.py +0 -0
  182. /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
  183. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/__init__.py +0 -0
  184. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -1,1705 +0,0 @@
1
- /**
2
- * A very simple static way to visualize the scenarios, runs, and metrics from the benchmarking project.
3
- * This code doesn't really belong in `proxy`, but is there for convenience.
4
- */
5
-
6
- // Specifies all the information to help us render and understand the fields
7
- // for adapters and metrics.
8
- // Look at `schema.py` for the actual schema.
9
- class Schema {
10
- constructor(raw) {
11
- this.models = raw.models;
12
- this.adapter = raw.adapter;
13
- this.metrics = raw.metrics;
14
- this.perturbations = raw.perturbations;
15
- this.run_groups = raw.run_groups;
16
- this.metric_groups = raw.metric_groups;
17
-
18
- // Allow for quick lookup
19
- this.adapterFieldNames = this.adapter.map((field) => field.name);
20
- this.metricsFieldNames = this.metrics.map((field) => field.name);
21
- }
22
-
23
- adapterField(name) {
24
- const field = this.adapter.find((field) => field.name === name);
25
- if (!field) {
26
- console.error(`Adapter field ${name} not found`);
27
- return {};
28
- }
29
- return field;
30
- }
31
-
32
- metricsField(name) {
33
- const field = this.metrics.find((field) => field.name === name);
34
- if (!field) {
35
- console.error(`Metrics field ${name} not found`);
36
- return {};
37
- }
38
- return field;
39
- }
40
-
41
- metricGroup(name) {
42
- return this.metric_groups.find((group) => group.name === name);
43
- }
44
- }
45
-
46
- $(function () {
47
- // Array of String containing RunSpec names for which
48
- // the JSON for displaying requests has been loaded.
49
- const runSpecsNamesWithLoadedRequests = [];
50
- const urlParams = decodeUrlParams(window.location.search);
51
-
52
- /////////////////////////////////// Pages ////////////////////////////////////
53
-
54
- function renderModels() {
55
- const $table = $("<table>", { class: "query-table results-table" });
56
- const $header = $("<tr>").append([
57
- $("<td>").append("Creator"),
58
- $("<td>").append("Model"),
59
- $("<td>").append("Description"),
60
- $("<td>").append("Access"),
61
- ]);
62
- $table.append($header);
63
-
64
- schema.models.forEach((model) => {
65
- const $name = $("<div>").append([
66
- $("<div>").append(model.display_name),
67
- $("<div>", { class: "technical-details" }).append(model.name),
68
- ]);
69
- const $row = $("<tr>").append([
70
- $("<td>").append(model.creator_organization),
71
- $("<td>").append($name),
72
- $("<td>").append(renderMarkdown(model.description)),
73
- $("<td>").append(renderAccess(model.access)),
74
- ]);
75
- $table.append($row);
76
- });
77
- return $table;
78
- }
79
-
80
- function renderScenarios() {
81
- const $table = $("<table>", { class: "query-table results-table" });
82
-
83
- const $header = $("<tr>").append([
84
- $("<td>").append("Scenario"),
85
- $("<td>").append("Task"),
86
- $("<td>").append("What"),
87
- $("<td>").append("Who"),
88
- $("<td>").append("When"),
89
- $("<td>").append("Language"),
90
- $("<td>").append("Description"),
91
- ]);
92
- $table.append($header);
93
-
94
- schema.run_groups.forEach((group) => {
95
- if (group.category && group.category !== "Scenarios") {
96
- return;
97
- }
98
- const href = groupUrl(group.name);
99
- const $name = $("<div>").append([
100
- $("<div>").append($("<a>", { href }).append(group.display_name)),
101
- $("<div>", { class: "technical-details" }).append(group.name),
102
- ]);
103
- const task = group.taxonomy && group.taxonomy.task;
104
- const what = group.taxonomy && group.taxonomy.what;
105
- const who = group.taxonomy && group.taxonomy.who;
106
- const when = group.taxonomy && group.taxonomy.when;
107
- const language = group.taxonomy && group.taxonomy.language;
108
- const $row = $("<tr>").append([
109
- $("<td>").append($name),
110
- $("<td>").append(task),
111
- $("<td>").append(what),
112
- $("<td>").append(who),
113
- $("<td>").append(when),
114
- $("<td>").append(language),
115
- $("<td>").append(renderMarkdown(group.description)),
116
- ]);
117
- $table.append($row);
118
- });
119
- return $table;
120
- }
121
-
122
- function renderPlots() {
123
- const container = $("<div>", { class: "container" });
124
- const links = $("<div>");
125
- container.append(links);
126
- const tableLinks = [];
127
-
128
- function renderPlot(name, title) {
129
- const plot = $("<div>", { class: "plot" });
130
- const caption = $("<div>", { class: "plot-caption" }).append(
131
- plotCaptions[name],
132
- );
133
-
134
- plot.append($("<h3>").append($("<a>", { id: title }).append(title)));
135
- plot.append(caption);
136
- plot.append(
137
- $("<img>", { src: plotUrl(release, name), class: "img-fluid" }),
138
- );
139
- container.append(plot);
140
- tableLinks.push($("<a>", { href: "#" + title }).append(title));
141
- }
142
-
143
- renderPlot("generic_summary", "Metric spread for core scenarios");
144
- renderPlot("model_ranking_all", "Head-to-head win rate per each model");
145
-
146
- renderPlot("accuracy_v_x", "Accuracy as a function of other metrics");
147
- renderPlot("metric_correlation", "Correlation between metrics");
148
-
149
- renderPlot("accuracy_v_access", "Accuracy as a function of model access");
150
- renderPlot("accuracy_over_num_parameters", "Accuracy across model sizes");
151
- renderPlot("accuracy_over_release_date", "Accuracy over time");
152
- renderPlot(
153
- "accuracy_over_the_pile_perplexity",
154
- "Accuracy as a function of The Pile perplexity",
155
- );
156
-
157
- renderPlot("targeted_evals", "Targeted evaluations");
158
-
159
- renderPlot(
160
- "in_context_ablations",
161
- "Number of in-context examples ablation",
162
- );
163
- renderPlot("mc_ablations", "Multiple-choice adaptation ablation");
164
-
165
- links.append(renderItems(tableLinks));
166
-
167
- return container;
168
- }
169
-
170
- function renderRunsOverview(runSpecs) {
171
- let query = "";
172
- const $search = $("<input>", {
173
- type: "text",
174
- size: 40,
175
- placeholder: "Enter regex query (enter to open all)",
176
- });
177
- console.log(urlParams);
178
- $search.keyup((e) => {
179
- // Open up all match specs
180
- if (e.keyCode === 13) {
181
- const href = encodeUrlParams(
182
- Object.assign({}, urlParams, { runSpecRegex: ".*" + query + ".*" }),
183
- );
184
- console.log(urlParams, href);
185
- window.open(href);
186
- }
187
- query = $search.val();
188
- renderRunsTable();
189
- });
190
-
191
- const $table = $("<table>", { class: "query-table" });
192
-
193
- function renderRunsTable() {
194
- $table.empty();
195
- const $header = $("<tr>")
196
- .append($("<td>").append($("<b>").append("Run")))
197
- .append($("<td>").append($("<b>").append("Adaptation method")));
198
- $table.append($header);
199
-
200
- runSpecs.forEach((runSpec) => {
201
- if (!new RegExp(query).test(runSpec.name)) {
202
- return;
203
- }
204
- const href = encodeUrlParams(
205
- Object.assign({}, urlParams, { runSpec: runSpec.name }),
206
- );
207
- const $row = $("<tr>")
208
- .append($("<td>").append($("<a>", { href }).append(runSpec.name)))
209
- .append($("<td>").append(runSpec.adapter_spec.method));
210
- $table.append($row);
211
- });
212
- }
213
-
214
- renderRunsTable();
215
-
216
- return $("<div>").append([$search, $table]);
217
- }
218
-
219
- // Look at logic in `summarize.py`.
220
- function getMetricNames(scenarioGroup) {
221
- // A (scenario/run) group defines a list of metric groups, each of which defines the metrics.
222
- // Just pull the names from those metrics.
223
- const names = [];
224
- scenarioGroup.metric_groups.forEach((metricGroupName) => {
225
- const metricGroup = schema.metricGroup(metricGroupName);
226
- metricGroup.metrics.forEach((metric) => {
227
- // This function is supposed to return per-instance metrics, so exclude
228
- // metrics that mentions perturbations.
229
- if (metric.perturbation_name) {
230
- return;
231
- }
232
- names.push(substitute(metric.name, scenarioGroup.environment));
233
- });
234
- });
235
- return names;
236
- }
237
-
238
- let metricJudgements = null;
239
- function getMetricJudgements() {
240
- // Provide information
241
- // Return dictionary {metric name: {wrongThreshold, correctThreshold, lowerIsBetter}}
242
- // Example: {exact_match: {wrongThreshold: 0, correctThreshold: 1, lowerIsBetter: true}}
243
- // TODO: move the hard-coding into schema.yaml
244
- if (metricJudgements) {
245
- return metricJudgements;
246
- }
247
- metricJudgements = {};
248
- schema.run_groups.forEach((runGroup) => {
249
- const name = runGroup.environment && runGroup.environment.main_name;
250
- if (!["bits_per_byte"].includes(name)) {
251
- metricJudgements[name] = {
252
- wrongThreshold: 0,
253
- correctThreshold: 1,
254
- lowerIsBetter: false,
255
- };
256
- }
257
- });
258
- return metricJudgements;
259
- }
260
-
261
- function getStatClass(name, value) {
262
- // Return the CSS class to use if a stat has `value`.
263
- const judgements = getMetricJudgements();
264
- const judgement = judgements[name];
265
- if (!judgement) {
266
- return "";
267
- }
268
-
269
- // Based on `name` determine whether smaller or larger is better.
270
- if (judgement.lowerIsBetter === false) {
271
- if (value >= judgement.correctThreshold) {
272
- return "correct";
273
- }
274
- if (value <= judgement.wrongThreshold) {
275
- return "wrong";
276
- }
277
- }
278
-
279
- if (judgement.lowerIsBetter === true) {
280
- if (value <= judgement.correctThreshold) {
281
- return "correct";
282
- }
283
- if (value >= judgement.wrongThreshold) {
284
- return "wrong";
285
- }
286
- }
287
-
288
- return "";
289
- }
290
-
291
- function metricNameCompare(k1, k2) {
292
- const splitCompare = (k1.split || "").localeCompare(k2.split || "");
293
- if (splitCompare !== 0) {
294
- return splitCompare;
295
- }
296
- const nameCompare = k1.name.localeCompare(k2.name);
297
- if (nameCompare !== 0) {
298
- return nameCompare;
299
- }
300
- const perturbationCompare = (
301
- k1.perturbation ? k1.perturbation.name : ""
302
- ).localeCompare(k2.perturbation ? k2.perturbation.name : "");
303
- if (perturbationCompare !== 0) {
304
- return perturbationCompare;
305
- }
306
- return 0;
307
- }
308
-
309
- function renderGlobalStats(query, keys, statsList, statsPaths) {
310
- // Render the scenario-level metrics.
311
- // keys: list of metric names to render (these are the rows of table)
312
- // statsList: for each run, list of stats
313
- // statsPath: for each run, list of paths to the stats files
314
- const $output = $("<div>");
315
- keys.forEach((key) => {
316
- // For each key (MetricName - e.g., {name: 'exact_match', ...})
317
-
318
- if (key.perturbation && key.perturbation.computed_on !== "worst") {
319
- // Only pay attention to worst (match `summarize.py`)
320
- return;
321
- }
322
-
323
- const displayKey = renderMetricName(key);
324
- if (
325
- query !== "" &&
326
- !query.split(" ").every((q) => displayKey.includes(q))
327
- ) {
328
- return;
329
- }
330
-
331
- const field = schema.metricsField(key.name);
332
- const helpText = describeMetricName(field, key);
333
- const $key = $("<td>").append(
334
- $("<span>").append(helpIcon(helpText)).append(" ").append(displayKey),
335
- );
336
- const $row = $("<tr>").append($("<td>").append($key));
337
- statsList.forEach((stats) => {
338
- // stats: list of statistics corresponding to one run (column)
339
- const stat = stats.find((stat) => metricNameEquals(stat.name, key));
340
- $row.append(
341
- $("<td>").append(
342
- stat ? renderFieldValue(field, round(stat.mean, 3)) : "?",
343
- ),
344
- );
345
- });
346
- $output.append($row);
347
- });
348
-
349
- // Link to the JSON file
350
- $output.append(
351
- $("<tr>")
352
- .append($("<td>"))
353
- .append(
354
- statsPaths.map((statsPath) =>
355
- $("<td>").append($("<a>", { href: statsPath }).append("JSON")),
356
- ),
357
- ),
358
- );
359
- return $output;
360
- }
361
-
362
- function highlightNewWords(text, origText) {
363
- // Render `text`, highlighting any words that don't occur in `origText`
364
- // Ideally, we would form an alignment between `text` and `origText` and
365
- // show the full diff, but that's too expensive.
366
- const origWords = {};
367
- origText.split(" ").forEach((word) => {
368
- origWords[word] = true;
369
- });
370
- return text
371
- .split(" ")
372
- .map((word) =>
373
- !word.trim() || origWords[word] ? word : "<u>" + word + "</u>",
374
- )
375
- .join(" ");
376
- }
377
-
378
- function renderRunsHeader(scenario, scenarioPath, scenarioSpec) {
379
- const $output = $("<div>");
380
-
381
- $output.append(renderScenarioHeader(scenario, scenarioSpec));
382
-
383
- // Links
384
- const links = [];
385
- if (scenario) {
386
- links.push($("<a>", { href: scenario.definition_path }).append("Code"));
387
- }
388
- if (scenarioPath) {
389
- links.push($("<a>", { href: scenarioPath }).append("Scenario JSON"));
390
- }
391
- links.push($("<a>", { href: "#adapter" }).append("Adapter specification"));
392
- links.push(
393
- $("<a>", { href: "#instances" }).append("Instances + predictions"),
394
- );
395
- links.push($("<a>", { href: "#metrics" }).append("All metrics"));
396
- $output.append(renderItems(links));
397
-
398
- return $output;
399
- }
400
-
401
- function renderGroupHeader() {
402
- const $output = $("<div>");
403
- $.getJSON(groupsMetadataJsonUrl(), {}, (response) => {
404
- const group = response[urlParams.group];
405
- if (group) {
406
- let groupName = group.display_name;
407
- if (urlParams.subgroup) {
408
- groupName += " / " + urlParams.subgroup;
409
- }
410
- $output.append($("<h3>").append(groupName));
411
- $output.append(
412
- $("<div>").append($("<i>").append(renderMarkdown(group.description))),
413
- );
414
- if (group.taxonomy) {
415
- const $rows = Object.entries(group.taxonomy).map(([k, v]) => {
416
- return $("<tr>").append([
417
- $("<td>").append(`<b>${k}</b>`),
418
- $("<td>").append(v),
419
- ]);
420
- });
421
- $output.append(
422
- $("<table>", { class: "taxonomy-table" }).append($rows),
423
- );
424
- }
425
- }
426
- });
427
- return $output;
428
- }
429
-
430
- function renderScenarioHeader(scenario, scenarioSpec) {
431
- const $output = $("<div>");
432
- $output.append(
433
- $("<h3>").append(renderScenarioDisplayName(scenario, scenarioSpec)),
434
- );
435
- $output.append(
436
- $("<div>").append($("<i>").append(renderMarkdown(scenario.description))),
437
- );
438
- return $output;
439
- }
440
-
441
- function instanceKey(instance) {
442
- // The (instance id, perturbation) should be enough to uniquely identify the instance.
443
- return JSON.stringify([instance.id, instance.perturbation || "original"]);
444
- }
445
-
446
- function predictionInstanceKey(prediction) {
447
- return JSON.stringify([
448
- prediction.instance_id,
449
- prediction.perturbation || "original",
450
- ]);
451
- }
452
-
453
- function displayRequestInstanceKey(displayRequest) {
454
- return JSON.stringify([
455
- displayRequest.instance_id,
456
- displayRequest.perturbation || "original",
457
- ]);
458
- }
459
-
460
- Handlebars.registerHelper("highlightNewWords", (perturbed, unperturbed) => {
461
- return new Handlebars.SafeString(highlightNewWords(perturbed, unperturbed));
462
- });
463
-
464
- Handlebars.registerHelper("join", (strings, separator) => {
465
- return strings.join(separator);
466
- });
467
-
468
- Handlebars.registerHelper("pluralize", (quantity, singular, plural) => {
469
- return quantity === 1 ? singular : plural;
470
- });
471
-
472
- const instanceTemplate = Handlebars.compile(`
473
- {{#with instance}}
474
- {{#if perturbation}}
475
- <br>
476
- {{else}}
477
- <hr>
478
- {{/if}}
479
- <div class="instance">
480
- <div>
481
- <strong>
482
- Instance {{id}} [split: {{split}}]
483
- {{#if perturbation}}
484
- ...with perturbation: {{perturbation.name}}
485
- {{/if}}
486
- </strong>
487
- </div>
488
- {{#unless ../hideInputOutput}}
489
- {{#if input}}
490
- <div>Input:</div>
491
- <div class="instance-input">
492
- {{~#if perturbation~}}
493
- {{highlightNewWords input.text ../unperturbedInstance.input.text}}
494
- {{~else~}}
495
- {{{input.text}}}
496
- {{~/if~}}
497
- </div>
498
- {{/if}}
499
- {{#if references}}
500
- <div>{{pluralize references.length "Reference" "References"}}:</div>
501
- <ul>
502
- {{#each references}}
503
- <li>
504
- <span class="instance-reference">
505
- {{~#if ../perturbation~}}
506
- {{highlightNewWords output.text (lookup (lookup (lookup ../../unperturbedInstance.references @index) "output") "text")}}
507
- {{~else~}}
508
- {{output.text}}
509
- {{~/if~}}
510
- </span>
511
- {{#if tags}} <strong>[{{join tags ","}}]</strong>{{/if}}
512
- </li>
513
- {{/each}}
514
- </ul>
515
- {{/if}}
516
- {{/unless}}
517
- <div class="predictions"></div>
518
- </div>
519
- {{/with}}
520
- `);
521
-
522
- function renderScenarioInstances(instances, $instances) {
523
- // Render all the instances in a scenario, outputting to $instances.
524
- // Return a mapping from instance key to the div where
525
- // we're rendering the instance, so that we can put the predictions in the
526
- // right spot.
527
- const instanceKeyToDiv = {};
528
-
529
- // Keep track of the original (unperturbed) instances
530
- const id2originalInstance = {};
531
- instances.forEach((instance) => {
532
- if (!instance.perturbation) {
533
- id2originalInstance[instance.id] = instance;
534
- }
535
- });
536
- let instancesHtml = "";
537
- const keys = [];
538
- instances.forEach((instance) => {
539
- const key = instanceKey(instance);
540
- instancesHtml += instanceTemplate({
541
- instance,
542
- unperturbedInstance: id2originalInstance[instance.id],
543
- hideInputOutput: urlParams.hideInputOutput,
544
- });
545
- keys.push(key);
546
- });
547
- $instances.html(instancesHtml);
548
- const $divs = $instances.find(".instance");
549
- if (keys.length !== $divs.length) {
550
- console.error(
551
- "Could not map instance keys to divs because " +
552
- "keys length (" +
553
- keys.length +
554
- ") !== divs length (" +
555
- $divs.length +
556
- ")",
557
- );
558
- } else {
559
- keys.forEach((key, index) => {
560
- instanceKeyToDiv[key] = $divs.eq(index);
561
- });
562
- }
563
- return instanceKeyToDiv;
564
- }
565
-
566
- function loadAndRenderRequests(
567
- runSpec,
568
- suite,
569
- instanceKeyToDiv,
570
- predictedIndex,
571
- ) {
572
- if (runSpecsNamesWithLoadedRequests.includes(runSpec.name)) {
573
- return;
574
- }
575
- runSpecsNamesWithLoadedRequests.push(runSpec.name);
576
- $.getJSON(requestsJsonUrl(suite, runSpec.name), {}, (displayRequests) => {
577
- displayRequests.forEach((displayRequest) => {
578
- $request = instanceKeyToDiv[displayRequestInstanceKey(displayRequest)]
579
- .find(".request")
580
- .eq(displayRequest.train_trial_index);
581
- $request.empty().append(renderRequest(displayRequest.request));
582
- });
583
- });
584
- }
585
-
586
- function renderRequest(request) {
587
- // Render the request made to the API as a table.
588
- const $requestTable = $("<table>");
589
-
590
- const $requestTableHeader = $("<h6>").append("Request");
591
- $requestTable.append($requestTableHeader);
592
-
593
- const $promptRow = $("<tr>").append([
594
- $("<td>").append("prompt"),
595
- $('<td class="prompt">').text(request.prompt),
596
- ]);
597
- $requestTable.append($promptRow);
598
-
599
- for (let requestKey in request) {
600
- if (requestKey === "prompt") {
601
- continue;
602
- }
603
- const $requestRow = $("<tr>").append([
604
- $("<td>").append(requestKey),
605
- $("<td>").append(
606
- typeof request[requestKey] === "string"
607
- ? request[requestKey]
608
- : JSON.stringify(request[requestKey]),
609
- ),
610
- ]);
611
- $requestTable.append($requestRow);
612
- }
613
- return $("<div>").append().append($requestTable);
614
- }
615
-
616
- Handlebars.registerHelper("joinEach", (context, separator, options) => {
617
- return context.map(options.fn).join(separator);
618
- });
619
-
620
- const predictionTemplate = Handlebars.compile(`
621
- {{#if metrics}}
622
- <div>
623
- [
624
- {{#joinEach metrics " | " }}
625
- <span{{#if class}} class="{{class}}"{{/if}}>
626
- {{~name}}: {{value~}}
627
- </span>
628
- {{/joinEach}}
629
- ]
630
- </div>
631
- {{/if}}
632
- <div class="prediction">
633
- <strong><a href="#" class="load-requests">
634
- Prediction
635
- {{~#if runDisplayName~}}
636
- [{{runDisplayName}}]
637
- {{~/if~}}
638
- {{~#if prediction.reference_index~}}
639
- [ref {{prediction.reference_index}}]
640
- {{~/if~}}
641
- {{~#if numTrainTrials~}}
642
- \{trial {{prediction.train_trial_index~}} \}
643
- {{~/if~}}
644
- </a></strong>:
645
- <span class="prediction-text">{{{predictedText}}}</span>
646
- </div>
647
- <div class="request" style="display: none">Loading...</div>
648
- `);
649
-
650
- function renderPredictions(
651
- runSpec,
652
- runSuite,
653
- runDisplayName,
654
- predictions,
655
- instanceKeyToDiv,
656
- $instances,
657
- ) {
658
- // Add the predictions and statistics from `scenarioState` and `perInstanceStats` to the appropriate divs for each instance.
659
- // Each instance give rises to multiple requests (whose results are in `scenarioState`):
660
- //
661
- // Identity of the instance (instanceKey):
662
- // - instance_id
663
- // - perturbation
664
- // Replication:
665
- // - train_trial_index
666
- // Instance-level decompositions:
667
- // - for adapter method = language_modeling, a long instance is broken up into multiple requests
668
- // - for adapter method = multiple_choice_separate_original, have one request per reference
669
- // - for adapter method = multiple_choice_separate_calibrated, have two requests per reference
670
- const method = runSpec.adapter_spec.method;
671
- const numTrainTrials =
672
- predictions.reduce(
673
- (m, prediction) => Math.max(m, prediction.train_trial_index),
674
- -1,
675
- ) + 1;
676
-
677
- // Look for the default metrics for the group
678
- const metricNames = [];
679
- schema.run_groups.forEach((runGroup) => {
680
- if (!runSpec.groups.includes(runGroup.name)) {
681
- return;
682
- }
683
- getMetricNames(runGroup).forEach((name) => {
684
- if (!metricNames.includes(name)) {
685
- metricNames.push(name);
686
- }
687
- });
688
- });
689
-
690
- // For each request state (across all instances)...
691
- predictions.forEach((prediction) => {
692
- const $instanceDiv = instanceKeyToDiv[predictionInstanceKey(prediction)];
693
- if (!$instanceDiv) {
694
- console.error("Not found: " + predictionInstanceKey(prediction));
695
- return;
696
- }
697
-
698
- // Traverse into the prediction div within the instance div
699
- const $instance = $instanceDiv.find(".predictions");
700
-
701
- // For multiple_choice_separate_*, only render the request state for the predicted index
702
- const predictedIndex = prediction.stats["predicted_index"];
703
- if (
704
- prediction.reference_index !== undefined &&
705
- predictedIndex !== undefined &&
706
- prediction.reference_index !== predictedIndex
707
- ) {
708
- return;
709
- }
710
-
711
- // Render the prediction
712
- // TODO: Escape the HTML in predictedText properly
713
- let predictedText = prediction.predicted_text.trim();
714
- if (method === "multiple_choice_joint") {
715
- if (prediction.mapped_output !== undefined) {
716
- predictedText = truncateMiddle(prediction.mapped_output.trim(), 30);
717
- } else {
718
- predictedText =
719
- truncateMiddle(predictedText, 30) +
720
- '<span style="color: gray"> (unmapped)</span>';
721
- }
722
- } else if (method.startsWith("multiple_choice_separate_")) {
723
- // For adapter method = separate, prediction starts with the prompt, strip it out
724
- if (prediction.truncated_predicted_text !== undefined) {
725
- predictedText =
726
- '<span style="color: lightgray">...</span> ' +
727
- truncateMiddle(prediction.truncated_predicted_text.trim(), 30);
728
- } else {
729
- console.warn(
730
- "Prompt was not stripped from predicted text",
731
- predictedText,
732
- );
733
- predictedText = truncateMiddle(predictedText, 30);
734
- }
735
- } else if (method === "language_modeling") {
736
- // For language modeling, first token is just padding, so strip it out
737
- if (prediction.truncated_predicted_text !== undefined) {
738
- predictedText = truncateMiddle(
739
- prediction.truncated_predicted_text.trim(),
740
- 30,
741
- );
742
- } else {
743
- console.warn(
744
- "First token was not stripped from predicted text",
745
- predictedText,
746
- );
747
- predictedText = truncateMiddle(predictedText, 30);
748
- }
749
- }
750
- const metrics = [];
751
- metricNames.forEach((metricName) => {
752
- const metricValue = prediction.stats[metricName];
753
- if (metricValue !== undefined) {
754
- const displayName = schema.metricsField(metricName).display_name;
755
- const statClass = getStatClass(metricName, metricValue);
756
- metrics.push({
757
- name: displayName,
758
- value: metricValue,
759
- class: statClass,
760
- });
761
- }
762
- });
763
- $instance.append(
764
- predictionTemplate({
765
- runDisplayName,
766
- prediction,
767
- predictedText,
768
- numTrainTrials,
769
- metrics,
770
- }),
771
- );
772
- });
773
- $instances.find("a.load-requests").click((event) => {
774
- $(event.target).closest(".prediction").next(".request").slideToggle();
775
- loadAndRenderRequests(runSpec, runSuite, instanceKeyToDiv);
776
- return false;
777
- });
778
- }
779
-
780
- function renderRunsDetailed(runSpecs, runNameToSuite) {
781
- // Render all the `runSpecs`:
782
- // 1. Adapter specification
783
- // 2. Instances + predictions
784
- // 3. Stats
785
- // For each block, we show a table and each `runSpec` is a column.
786
- const CORRECT_TAG = "correct";
787
-
788
- // Used to hash instances.
789
- function instanceKey(instance) {
790
- return JSON.stringify(instance);
791
- }
792
-
793
- // Paths (parallel arrays corresponding to `runSpecs`)
794
- const statsPaths = runSpecs.map((runSpec) => {
795
- return statsJsonUrl(
796
- getSuiteForRun(runNameToSuite, runSpec.name),
797
- runSpec.name,
798
- );
799
- });
800
- const scenarioStatePaths = runSpecs.map((runSpec) => {
801
- return scenarioStateJsonUrl(
802
- getSuiteForRun(runNameToSuite, runSpec.name),
803
- runSpec.name,
804
- );
805
- });
806
- const runSpecPaths = runSpecs.map((runSpec) => {
807
- return runSpecJsonUrl(
808
- getSuiteForRun(runNameToSuite, runSpec.name),
809
- runSpec.name,
810
- );
811
- });
812
- const predictionsPaths = runSpecs.map((runSpec) => {
813
- return predictionsJsonUrl(
814
- getSuiteForRun(runNameToSuite, runSpec.name),
815
- runSpec.name,
816
- );
817
- });
818
-
819
- // Figure out short names for the runs based on where they differ
820
- const runDisplayNames = findDiff(
821
- runSpecs.map((runSpec) => runSpec.adapter_spec),
822
- ).map(renderDict);
823
-
824
- // Setup the basic HTML elements
825
- const $root = $("<div>");
826
- const $scenarioInfo = $("<div>", { class: "scenario-info" });
827
- $scenarioInfo.text("Loading scenario info...");
828
- $root.append($scenarioInfo);
829
-
830
- // Adapter
831
- $root.append(
832
- $("<a>", { name: "adapter" }).append(
833
- $("<h5>").append("Adapter specification"),
834
- ),
835
- );
836
- const $adapterSpec = $("<table>");
837
- if (runSpecs.length > 1) {
838
- $adapterSpec.append(
839
- $("<tr>")
840
- .append($("<td>"))
841
- .append(runDisplayNames.map((name) => $("<td>").append(name))),
842
- );
843
- }
844
- $root.append($("<div>", { class: "table-container" }).append($adapterSpec));
845
-
846
- // Instances
847
- $root.append(
848
- $("<a>", { name: "instances" }).append(
849
- $("<h5>").append("Instances + predictions"),
850
- ),
851
- );
852
- const $instancesContainer = $("<div>");
853
- $instancesContainer
854
- .addClass("table-container")
855
- .text("Loading instances...");
856
- $root.append($instancesContainer);
857
-
858
- // Metrics
859
- $root.append(
860
- $("<a>", { name: "metrics" }).append($("<h5>").append("All metrics")),
861
- );
862
- const $statsContainer = $("<div>");
863
- $statsContainer.addClass("table-container").text("Loading metrics...");
864
- $root.append($statsContainer);
865
-
866
- // Render adapter specs
867
- $adapterSpec.append(
868
- $("<tr>")
869
- .append($("<td>"))
870
- .append(
871
- scenarioStatePaths.map((scenarioStatePath, index) => {
872
- return $("<td>")
873
- .append(
874
- $("<a>", { href: runSpecPaths[index] }).append("Spec JSON"),
875
- )
876
- .append(" | ")
877
- .append(
878
- $("<a>", { href: scenarioStatePaths[index] }).append(
879
- "Full JSON",
880
- ),
881
- );
882
- }),
883
- ),
884
- );
885
- const keys = canonicalizeList(
886
- runSpecs.map((runSpec) => Object.keys(runSpec.adapter_spec)),
887
- );
888
- sortListWithReferenceOrder(keys, schema.adapterFieldNames);
889
- keys.forEach((key) => {
890
- const field = schema.adapterField(key);
891
- const helpText = describeField(field);
892
- const $key = $("<td>").append(
893
- $("<span>").append(helpIcon(helpText)).append(" ").append(key),
894
- );
895
- const $row = $("<tr>").append($key);
896
- runSpecs.forEach((runSpec) => {
897
- $row.append(
898
- $("<td>").append(renderFieldValue(field, runSpec.adapter_spec[key])),
899
- );
900
- });
901
- $adapterSpec.append($row);
902
- });
903
-
904
- // Render metrics/stats
905
- getJSONList(
906
- statsPaths,
907
- (statsList) => {
908
- console.log("metrics", statsList);
909
- if (statsList.length && statsList.every((stat) => stat.length === 0)) {
910
- $statsContainer
911
- .empty()
912
- .text("Metrics are currently unavailable. Please try again later.");
913
- return;
914
- }
915
- const $stats = $("<table>");
916
- const $statsSearch = $("<input>", {
917
- type: "text",
918
- size: 40,
919
- placeholder: "Enter keywords to filter metrics",
920
- });
921
- if (runSpecs.length > 1) {
922
- $stats.append(
923
- $("<tr>")
924
- .append($("<td>"))
925
- .append(runDisplayNames.map((name) => $("<td>").append(name))),
926
- );
927
- }
928
- const keys = canonicalizeList(
929
- statsList.map((stats) => stats.map((stat) => stat.name)),
930
- metricNameCompare,
931
- );
932
- keys.sort(metricNameCompare);
933
-
934
- function update() {
935
- $stats
936
- .empty()
937
- .append(renderGlobalStats(query, keys, statsList, statsPaths));
938
- }
939
-
940
- // Filter
941
- let query = "";
942
- $statsSearch.keyup((e) => {
943
- query = $statsSearch.val();
944
- update();
945
- });
946
-
947
- update();
948
- $statsContainer.empty().append($statsSearch).append($stats);
949
- },
950
- [],
951
- );
952
-
953
- // Render scenario header
954
- const scenarioPath = scenarioJsonUrl(
955
- getSuiteForRun(runNameToSuite, runSpecs[0].name),
956
- runSpecs[0].name,
957
- );
958
- $.get(scenarioPath, {}, (scenario) => {
959
- console.log("scenario", scenario);
960
- $scenarioInfo
961
- .empty()
962
- .append(
963
- renderRunsHeader(scenario, scenarioPath, runSpecs[0].scenario_spec),
964
- );
965
- });
966
-
967
- // Render scenario instances and predictions
968
- const instancesPath = instancesJsonUrl(
969
- getSuiteForRun(runNameToSuite, runSpecs[0].name),
970
- runSpecs[0].name,
971
- );
972
- const instancesPromise = $.getJSON(instancesPath, {});
973
- const predictionsPromise = getJSONList(predictionsPaths);
974
- $.when(instancesPromise, predictionsPromise).then(
975
- (instancesResult, predictions) => {
976
- const instances = instancesResult[0];
977
- console.log("instances", instances);
978
- console.log("predictions", predictions);
979
- const $instances = $("<div>");
980
- const instanceKeyToDiv = renderScenarioInstances(instances, $instances);
981
- // For each run / model...
982
- runSpecs.forEach((runSpec, index) => {
983
- renderPredictions(
984
- runSpec,
985
- getSuiteForRun(runNameToSuite, runSpec.name),
986
- runDisplayNames[index],
987
- predictions[index],
988
- instanceKeyToDiv,
989
- $instances,
990
- );
991
- });
992
- $instancesContainer.empty().append($instances);
993
- },
994
- );
995
- return $root;
996
- }
997
-
998
- function rootUrl(model) {
999
- return encodeUrlParams({});
1000
- }
1001
-
1002
- function modelUrl(model) {
1003
- return encodeUrlParams(
1004
- Object.assign({}, urlParams, { scenarios: null, models: 1 }),
1005
- );
1006
- }
1007
-
1008
- function groupUrl(group) {
1009
- return encodeUrlParams(
1010
- Object.assign({}, urlParams, { scenarios: null, group }),
1011
- );
1012
- }
1013
-
1014
- function metricUrl(group) {
1015
- // e.g., Calibration
1016
- return (
1017
- encodeUrlParams(
1018
- Object.assign({}, urlParams, { group: "core_scenarios" }),
1019
- ) +
1020
- "#" +
1021
- group.display_name
1022
- );
1023
- }
1024
-
1025
- function groupShortDisplayName(group) {
1026
- return group.short_display_name || group.display_name || group.name;
1027
- }
1028
-
1029
- function metricDisplayName(metric) {
1030
- return (
1031
- (metric.display_name || metric.name) +
1032
- (metric.perturbation_name
1033
- ? " (perturbation: " + metric.perturbation_name + ")"
1034
- : "")
1035
- );
1036
- }
1037
-
1038
- function renderModelList() {
1039
- const $result = $("<div>", { class: "col-sm-3" });
1040
- const models = schema.models;
1041
- const numModels = models.filter((model) => !model.todo).length;
1042
- $result.append(
1043
- $("<div>", { class: "list-header" }).append(`${numModels} models`),
1044
- );
1045
- models.forEach((model) => {
1046
- const extra = model.todo ? " list-item-todo" : "";
1047
- const display_name =
1048
- model.creator_organization + " / " + model.display_name;
1049
- const $item = $("<a>", {
1050
- href: modelUrl(model.name),
1051
- class: "list-item" + extra,
1052
- title: model.description,
1053
- }).append(display_name);
1054
- $result.append($("<div>").append($item));
1055
- });
1056
- return $result;
1057
- }
1058
-
1059
- function renderScenarioList() {
1060
- const $result = $("<div>", { class: "col-sm-3" });
1061
-
1062
- const nameToGroup = {};
1063
- schema.run_groups.forEach((group) => {
1064
- nameToGroup[group.name] = group;
1065
- });
1066
-
1067
- // There are two types of groups we care about:
1068
- // 1) Top-level groups (e.g., question_answering)
1069
- // 2) Scenario-level groups (e.g., mmlu)
1070
- const topGroups = schema.run_groups.filter((group) => {
1071
- // Must have subgroups
1072
- return (
1073
- group.subgroups &&
1074
- ["Core scenarios", "Targeted evaluations"].includes(group.category)
1075
- );
1076
- });
1077
-
1078
- const scenarioGroupNames = {};
1079
- topGroups.forEach((group) => {
1080
- group.subgroups.forEach((subgroupName) => {
1081
- if (!nameToGroup[subgroupName].todo) {
1082
- scenarioGroupNames[subgroupName] = true;
1083
- }
1084
- });
1085
- });
1086
- const numScenarios = Object.keys(scenarioGroupNames).length;
1087
-
1088
- $result.append(
1089
- $("<div>", { class: "list-header" }).append(`${numScenarios} scenarios`),
1090
- );
1091
- topGroups.forEach((group) => {
1092
- const $group = $("<div>");
1093
- $group.append(
1094
- $("<a>", {
1095
- href: groupUrl(group.name),
1096
- class: "list-item",
1097
- title: group.description,
1098
- }).append(groupShortDisplayName(group)),
1099
- );
1100
- $group.append(
1101
- $("<ul>").append(
1102
- group.subgroups.map((subgroupName) => {
1103
- const subgroup = nameToGroup[subgroupName];
1104
- const extra = subgroup.todo ? " list-item-todo" : "";
1105
- const $item = $("<a>", {
1106
- href: groupUrl(subgroup.name),
1107
- class: "list-item" + extra,
1108
- title: subgroup.description,
1109
- }).append(groupShortDisplayName(subgroup));
1110
- return $("<li>").append($item);
1111
- }),
1112
- ),
1113
- );
1114
- $result.append($group);
1115
- });
1116
- return $result;
1117
- }
1118
-
1119
- function renderMetricsList() {
1120
- const $result = $("<div>", { class: "col-sm-3" });
1121
-
1122
- // Information about individual metrics
1123
- const nameToMetric = {};
1124
- schema.metrics.forEach((metric) => {
1125
- nameToMetric[metric.name] = metric;
1126
- });
1127
-
1128
- // Some metric groups depend on environment variables like ${main_name}
1129
- // Look at the places where that's being used across the runs.
1130
- // For each metric group, compute the deduped list of main_names.
1131
- // Example: accuracy => [quasi_exact_match, f1_score, ...]
1132
- const metricGroupToMainNames = {};
1133
- schema.run_groups.forEach((group) => {
1134
- if (group.metric_groups) {
1135
- group.metric_groups.forEach((metricGroup) => {
1136
- if (group.environment.main_name) {
1137
- const old = metricGroupToMainNames[metricGroup] || [];
1138
- if (!old.includes(group.environment.main_name)) {
1139
- metricGroupToMainNames[metricGroup] = old.concat([
1140
- group.environment.main_name,
1141
- ]);
1142
- }
1143
- }
1144
- });
1145
- }
1146
- });
1147
-
1148
- const metricGroups = schema.metric_groups
1149
- .filter((group) => {
1150
- // Skip a group if "_detailed" exists.
1151
- return !schema.metric_groups.some(
1152
- (group2) => group2.name === group.name + "_detailed",
1153
- );
1154
- })
1155
- .map((group) => {
1156
- // Expand the metrics for this metric group
1157
- const newMetrics = [];
1158
- group.metrics.forEach((metric) => {
1159
- if (metric.name === "${main_name}") {
1160
- (
1161
- metricGroupToMainNames[group.name.replace("_detailed", "")] || []
1162
- ).forEach((name) => {
1163
- newMetrics.push(Object.assign({}, metric, { name }));
1164
- });
1165
- } else {
1166
- newMetrics.push(metric);
1167
- }
1168
- });
1169
- return Object.assign({}, group, { metrics: newMetrics });
1170
- });
1171
-
1172
- // Count the number of metrics
1173
- const metricNames = {};
1174
- metricGroups.forEach((group) => {
1175
- group.metrics.forEach((metric) => {
1176
- metricNames[metric.name] = true;
1177
- });
1178
- });
1179
- const numMetrics = Object.keys(metricNames).length;
1180
-
1181
- $result.append(
1182
- $("<div>", { class: "list-header" }).append(`${numMetrics} metrics`),
1183
- );
1184
- metricGroups.forEach((group) => {
1185
- const $group = $("<div>");
1186
- $group.append(
1187
- $("<a>", {
1188
- href: metricUrl(group),
1189
- class: "list-item",
1190
- title: group.description,
1191
- }).append(groupShortDisplayName(group)),
1192
- );
1193
- $group.append(
1194
- $("<ul>").append(
1195
- group.metrics.map((metricRef) => {
1196
- // Get the information from the metric (name, display_name, description)
1197
- const metric = Object.assign(
1198
- {},
1199
- metricRef,
1200
- nameToMetric[metricRef.name] || metricRef,
1201
- );
1202
- const $item = $("<a>", {
1203
- class: "list-item",
1204
- title: metric.description,
1205
- }).append(metricDisplayName(metric));
1206
- return $("<li>").append($item);
1207
- }),
1208
- ),
1209
- );
1210
- $result.append($group);
1211
- });
1212
- return $result;
1213
- }
1214
-
1215
- function helmLogo() {
1216
- return $("<a>", { href: rootUrl() }).append(
1217
- $("<img>", {
1218
- src: "images/helm-logo.png",
1219
- width: "500px",
1220
- class: "mx-auto d-block",
1221
- }),
1222
- );
1223
- }
1224
-
1225
- function button(text, href) {
1226
- return $("<a>", { href, class: "main-link btn btn-lg m-5 px-5" }).append(
1227
- text,
1228
- );
1229
- }
1230
-
1231
- function renderMainPage() {
1232
- const $result = $("<div>", { class: "row" });
1233
-
1234
- $result.append($("<div>", { class: "col-sm-12" }).append(helmLogo()));
1235
-
1236
- const $blog = button(
1237
- "Blog post",
1238
- "https://crfm.stanford.edu/2022/11/17/helm.html",
1239
- );
1240
- const $paper = button("Paper", "https://arxiv.org/pdf/2211.09110.pdf");
1241
- const $code = button("GitHub", "https://github.com/stanford-crfm/helm");
1242
- $result.append(
1243
- $("<div>", { class: "col-sm-12" }).append(
1244
- $("<div>", { class: "text-center" }).append([$blog, $paper, $code]),
1245
- ),
1246
- );
1247
-
1248
- const $description = $("<div>", { class: "col-sm-8" }).append([
1249
- "A language model takes in text and produces text:",
1250
- $("<div>", { class: "text-center" }).append(
1251
- $("<img>", {
1252
- src: "images/language-model-helm.png",
1253
- width: "600px",
1254
- style: "width: 600px; margin-left: 130px",
1255
- }),
1256
- ),
1257
- "Despite their simplicity, language models are increasingly functioning as the foundation for almost all language technologies from question answering to summarization.",
1258
- " ",
1259
- "But their immense capabilities and risks are not well understood.",
1260
- " ",
1261
- "Holistic Evaluation of Language Models (HELM) is a living benchmark that aims to improve the transparency of language models.",
1262
- ]);
1263
-
1264
- function organization(src, href, height) {
1265
- return $("<div>", { class: "logo-item" }).append(
1266
- $("<a>", { href }).append($("<img>", { src, height })),
1267
- );
1268
- }
1269
- const defaultSize = 36;
1270
- const largerSize = 50;
1271
- const $organizations = $("<div>", { class: "logo-container" }).append([
1272
- organization(
1273
- "images/organizations/ai21.png",
1274
- "https://www.ai21.com/",
1275
- defaultSize,
1276
- ),
1277
- organization(
1278
- "images/organizations/anthropic.png",
1279
- "https://www.anthropic.com/",
1280
- defaultSize,
1281
- ),
1282
- organization(
1283
- "images/organizations/bigscience.png",
1284
- "https://bigscience.huggingface.co/",
1285
- largerSize,
1286
- ),
1287
- organization(
1288
- "images/organizations/cohere.png",
1289
- "https://cohere.ai/",
1290
- defaultSize,
1291
- ),
1292
- organization(
1293
- "images/organizations/eleutherai.png",
1294
- "https://www.eleuther.ai/",
1295
- largerSize,
1296
- ),
1297
- organization(
1298
- "images/organizations/google.png",
1299
- "https://ai.google/",
1300
- defaultSize,
1301
- ),
1302
- organization(
1303
- "images/organizations/meta.png",
1304
- "https://ai.facebook.com/",
1305
- largerSize,
1306
- ),
1307
- organization(
1308
- "images/organizations/microsoft.png",
1309
- "https://turing.microsoft.com/",
1310
- defaultSize,
1311
- ),
1312
- organization(
1313
- "images/organizations/nvidia.png",
1314
- "https://www.nvidia.com/en-us/research/machine-learning-artificial-intelligence/",
1315
- largerSize,
1316
- ),
1317
- organization(
1318
- "images/organizations/openai.png",
1319
- "https://openai.com/",
1320
- defaultSize,
1321
- ),
1322
- organization(
1323
- "images/organizations/tsinghua-keg.png",
1324
- "https://keg.cs.tsinghua.edu.cn/",
1325
- largerSize,
1326
- ),
1327
- organization(
1328
- "images/organizations/yandex.png",
1329
- "https://yandex.com/",
1330
- defaultSize,
1331
- ),
1332
- organization(
1333
- "images/organizations/together.png",
1334
- "https://together.xyz/",
1335
- defaultSize,
1336
- ),
1337
- ]);
1338
- $result.append($organizations);
1339
-
1340
- $description.append(
1341
- $("<ol>").append([
1342
- $("<li>")
1343
- .append(
1344
- "<b>Broad coverage and recognition of incompleteness</b>. We define a taxonomy over the scenarios we would ideally like to evaluate, select scenarios and metrics to cover the space and make explicit what is missing.",
1345
- )
1346
- .append(
1347
- $("<div>", { class: "text-center" }).append(
1348
- $("<img>", {
1349
- src: "images/taxonomy-scenarios.png",
1350
- width: "300px",
1351
- }),
1352
- ),
1353
- ),
1354
- $("<li>")
1355
- .append(
1356
- "<b>Multi-metric measurement</b>. Rather than focus on isolated metrics such as accuracy, we simultaneously measure multiple metrics (e.g., accuracy, robustness, calibration, efficiency) for each scenario, allowing analysis of tradeoffs.",
1357
- )
1358
- .append(
1359
- $("<div>", { class: "text-center" }).append(
1360
- $("<img>", {
1361
- src: "images/scenarios-by-metrics.png",
1362
- width: "300px",
1363
- }),
1364
- ),
1365
- ),
1366
- $("<li>")
1367
- .append(
1368
- '<b>Standardization</b>. We evaluate all the models that we have access to on the same scenarios with the same adaptation strategy (e.g., prompting), allowing for controlled comparisons. Thanks to all the companies for providing API access to the limited-access and closed models and <a href="https://together.xyz">Together</a> for providing the infrastructure to run the open models.',
1369
- )
1370
- .append($organizations),
1371
- $("<li>").append(
1372
- "<b>Transparency</b>. All the scenarios, predictions, prompts, code are available for further analysis on this website. We invite you to click below to explore!",
1373
- ),
1374
- ]),
1375
- );
1376
-
1377
- $result.append([
1378
- $("<div>", { class: "col-sm-2" }),
1379
- $description,
1380
- $("<div>", { class: "col-sm-2" }),
1381
- ]);
1382
-
1383
- const $models = renderModelList();
1384
- const $scenarios = renderScenarioList();
1385
- const $metrics = renderMetricsList();
1386
- $result.append([
1387
- $("<div>", { class: "col-sm-2" }),
1388
- $models,
1389
- $scenarios,
1390
- $metrics,
1391
- $("<div>", { class: "col-sm-1" }),
1392
- ]);
1393
-
1394
- return $result;
1395
- }
1396
-
1397
- function renderCell(cell) {
1398
- let value = cell.display_value || cell.value;
1399
- if (value == null) {
1400
- value = "-";
1401
- }
1402
- if (typeof value === "number") {
1403
- value = Math.round(value * 1000) / 1000;
1404
- }
1405
- if (cell.lower_is_better === true) {
1406
- value += " \u2193"; // DOWN_ARROW
1407
- }
1408
- if (cell.lower_is_better === false) {
1409
- value += " \u2191"; // UP_ARROW
1410
- }
1411
- const $value = $("<span>");
1412
- if (cell.markdown && value) {
1413
- value = renderMarkdown("" + value);
1414
- $value.append(value);
1415
- } else {
1416
- $value.text(value);
1417
- }
1418
- if (cell.style) {
1419
- $value.css(cell.style);
1420
- }
1421
- if (cell.description) {
1422
- $value.attr("title", cell.description);
1423
- }
1424
- const $linkedValue = cell.href
1425
- ? $("<a>", { href: cell.href }).append($value)
1426
- : $value;
1427
- return $("<td>").append($linkedValue);
1428
- }
1429
-
1430
- function renderTableHeader(table, sortColumnIndex) {
1431
- const $tableHeader = $("<thead>");
1432
- const $row = $("<tr>").append(
1433
- table.header.map((cell, index) => {
1434
- const $cell = renderCell(cell);
1435
- const sortOrder =
1436
- cell.lower_is_better === false
1437
- ? "desc"
1438
- : cell.lower_is_better === true
1439
- ? "asc"
1440
- : "";
1441
- if (sortOrder) {
1442
- const $sortLink = $("<a>", { href: "#" })
1443
- .append("sort")
1444
- .click(() => {
1445
- const $table = $tableHeader.parent("table");
1446
- $table.find("tbody").remove();
1447
- $table.append(renderTableBody(table, index, sortOrder));
1448
- $cell.parent().find("td").removeClass("table-sort-column");
1449
- $cell.addClass("table-sort-column");
1450
- return false;
1451
- });
1452
- $cell.append(" [&nbsp;").append($sortLink).append("&nbsp;]");
1453
- }
1454
- if (sortColumnIndex === index) {
1455
- $cell.addClass("table-sort-column");
1456
- }
1457
- return $cell;
1458
- }),
1459
- );
1460
- $tableHeader.append($row);
1461
- return $tableHeader;
1462
- }
1463
-
1464
- /**
1465
- * Returns a jQuery <tbody> element that contains the rows in the given table.
1466
- * @param {Object} table - The table to render. Should conform to the Python Table
1467
- * dataclass schema.
1468
- * @param {number} [sortColumnIndex] - If set and >= 0, the index of the column to
1469
- * sort by.
1470
- * @param {string} [sortOrder] - If set, determines whether to sort in ascending or
1471
- * descending order. Should be either "asc" or "desc". If unset, defaults to
1472
- * "asc".
1473
- */
1474
- function renderTableBody(table, sortColumnIndex, sortOrder) {
1475
- $tableBody = $("<tbody>");
1476
- const rows = table.rows.slice();
1477
- if (sortColumnIndex !== undefined && sortColumnIndex >= 0) {
1478
- rows.sort((row0, row1) => {
1479
- const cellValues = [row0, row1].map((row) => {
1480
- const cellValue = row[sortColumnIndex].value;
1481
- return cellValue !== undefined
1482
- ? cellValue
1483
- : // Missing values are always last in the sort order
1484
- sortOrder === "desc"
1485
- ? -Infinity
1486
- : Infinity;
1487
- });
1488
- // Handle Infinity === Infinity or -Infinity === -Infinity
1489
- return cellValues[0] === cellValues[1]
1490
- ? 0
1491
- : sortOrder === "desc"
1492
- ? cellValues[1] - cellValues[0]
1493
- : cellValues[0] - cellValues[1];
1494
- });
1495
- }
1496
- rows.forEach((row) => {
1497
- const $cells = row.map(renderCell);
1498
- if (sortColumnIndex !== undefined) {
1499
- $cells[sortColumnIndex].addClass("table-sort-column");
1500
- }
1501
- const $row = $("<tr>").append($cells);
1502
- $tableBody.append($row);
1503
- });
1504
- return $tableBody;
1505
- }
1506
-
1507
- function renderTable(table) {
1508
- const $output = $("<div>");
1509
- $output.append(
1510
- $("<h3>").append($("<a>", { name: table.title }).append(table.title)),
1511
- );
1512
- const $table = $("<table>", { class: "query-table results-table" });
1513
- let sortColumnIndex = undefined;
1514
- let sortOrder = undefined;
1515
- for (let i = 0; i < table.header.length; i++) {
1516
- if (table.header[i].lower_is_better !== undefined) {
1517
- sortColumnIndex = i;
1518
- sortOrder =
1519
- table.header[i].lower_is_better === false
1520
- ? "desc"
1521
- : table.header[i].lower_is_better === true
1522
- ? "asc"
1523
- : undefined;
1524
- break;
1525
- }
1526
- }
1527
- $table.append(renderTableHeader(table, sortColumnIndex));
1528
- $table.append(renderTableBody(table, sortColumnIndex, sortOrder));
1529
- $output.append($table);
1530
-
1531
- // Links
1532
- if (table.links.length > 0) {
1533
- $output.append(
1534
- renderItems(
1535
- table.links.map((link) => {
1536
- // replace everything before benchmark output, as it is incorrect
1537
- let myLink = link.href;
1538
- const delimiter = "benchmark_output";
1539
- let parts = myLink.split(delimiter);
1540
- if (parts.length > 1) {
1541
- let result = BENCHMARK_OUTPUT_BASE_URL + delimiter + parts[1];
1542
- return $("<a>", { href: result }).append(link.text);
1543
- } else {
1544
- console.log(
1545
- "benchmark_output not found, invalid link for ",
1546
- table,
1547
- );
1548
- }
1549
- return $("<a>", { href: link.href }).append(link.text);
1550
- }),
1551
- ),
1552
- );
1553
- }
1554
- return $output;
1555
- }
1556
-
1557
- function renderTables(tables, path) {
1558
- const $output = $("<div>");
1559
-
1560
- // Links to tables
1561
- const $jsonLink = $("<a>", { href: path }).append("JSON");
1562
- $output.append(
1563
- renderItems(
1564
- tables
1565
- .map((table) => {
1566
- return $("<a>", { href: "#" + table.title }).append(table.title);
1567
- })
1568
- .concat([$jsonLink]),
1569
- ),
1570
- );
1571
-
1572
- $output.append(
1573
- tables.map((table) => {
1574
- return $("<div>", { class: "table-container", id: table.title }).append(
1575
- renderTable(table),
1576
- );
1577
- }),
1578
- );
1579
-
1580
- return $output;
1581
- }
1582
-
1583
- //////////////////////////////////////////////////////////////////////////////
1584
- // Main //
1585
- //////////////////////////////////////////////////////////////////////////////
1586
- const $main = $("#main");
1587
- const $summary = $("#summary");
1588
-
1589
- // Allow overriding release or suite with URL params for debugging.
1590
- if (urlParams.release) {
1591
- window.RELEASE = urlParams.release;
1592
- window.SUITE = null;
1593
- } else if (urlParams.suite) {
1594
- window.RELEASE = null;
1595
- window.SUITE = urlParams.suite;
1596
- }
1597
-
1598
- const schemaPromise = $.getJSON(schemaJsonUrl(), {}, (raw) => {
1599
- console.log("schema", raw);
1600
- schema = new Schema(raw);
1601
- });
1602
-
1603
- const summaryPromise = $.get(summaryJsonUrl(), {}, (response) => {
1604
- console.log("summary", response);
1605
- summary = response;
1606
- if (window.RELEASE) {
1607
- $summary.append(
1608
- `Release ${summary.release} (last updated ${summary.date})`,
1609
- );
1610
- } else {
1611
- $summary.append(`Suite ${summary.suite} (last updated ${summary.date})`);
1612
- }
1613
- });
1614
-
1615
- $.when(schemaPromise, summaryPromise).then(() => {
1616
- if (urlParams.models) {
1617
- // Models
1618
- $main.empty();
1619
- $main.append(renderHeader("Models", renderModels()));
1620
- refreshHashLocation();
1621
- } else if (urlParams.scenarios) {
1622
- // Models
1623
- $main.empty();
1624
- $main.append(renderHeader("Scenarios", renderScenarios()));
1625
- refreshHashLocation();
1626
- } else if (urlParams.plots) {
1627
- // Plots
1628
- $main.empty();
1629
- $main.append(renderHeader("Plots", renderPlots()));
1630
- refreshHashLocation();
1631
- } else if (
1632
- urlParams.runSpec ||
1633
- urlParams.runSpecs ||
1634
- urlParams.runSpecRegex
1635
- ) {
1636
- // Predictions for a set of run specs (matching a regular expression)
1637
- $main.text("Loading runs...");
1638
- $.getJSON(runSpecsJsonUrl(), {}, (response) => {
1639
- $main.empty();
1640
- const runSpecs = response;
1641
- console.log("runSpecs", runSpecs);
1642
- let matcher;
1643
- if (urlParams.runSpec) {
1644
- // Exactly one
1645
- matcher = (runSpec) => runSpec.name === urlParams.runSpec;
1646
- } else if (urlParams.runSpecs) {
1647
- // List
1648
- const selectedRunSpecs = JSON.parse(urlParams.runSpecs);
1649
- matcher = (runSpec) => selectedRunSpecs.includes(runSpec.name);
1650
- } else if (urlParams.runSpecRegex) {
1651
- // Regular expression
1652
- const regex = new RegExp("^" + urlParams.runSpecRegex + "$");
1653
- matcher = (runSpec) => regex.test(runSpec.name);
1654
- } else {
1655
- throw "Internal error";
1656
- }
1657
- const matchedRunSpecs = runSpecs.filter(matcher);
1658
- if (matchedRunSpecs.length === 0) {
1659
- $main.append(renderError("No matching runs"));
1660
- } else {
1661
- const getRunToRunSuitesPromise = window.RELEASE
1662
- ? $.get(runsToRunSuitesJsonUrl(), {})
1663
- : $.Deferred().resolve({});
1664
- getRunToRunSuitesPromise.then((runNameToSuite) => {
1665
- $main.append(renderRunsDetailed(matchedRunSpecs, runNameToSuite));
1666
- });
1667
- }
1668
- refreshHashLocation();
1669
- });
1670
- } else if (urlParams.runs) {
1671
- // All runs (with search)
1672
- $main.text("Loading runs...");
1673
- $.getJSON(runSpecsJsonUrl(), {}, (runSpecs) => {
1674
- $main.empty();
1675
- console.log("runSpecs", runSpecs);
1676
- $main.append(renderHeader("Runs", renderRunsOverview(runSpecs)));
1677
- });
1678
- } else if (urlParams.groups) {
1679
- // All groups
1680
- $main.text("Loading groups...");
1681
- const path = groupsJsonUrl();
1682
- $.getJSON(path, {}, (tables) => {
1683
- $main.empty();
1684
- console.log("groups", tables);
1685
- $main.append(renderTables(tables, path));
1686
- refreshHashLocation();
1687
- });
1688
- } else if (urlParams.group) {
1689
- // Specific group
1690
- $main.text("Loading group...");
1691
- const path = groupJsonUrl(urlParams.group);
1692
- $.getJSON(path, {}, (tables) => {
1693
- $main.empty();
1694
- console.log("group", tables);
1695
- $main.append(renderGroupHeader());
1696
- $main.append(renderTables(tables, path));
1697
- refreshHashLocation();
1698
- });
1699
- } else {
1700
- // Main landing page
1701
- $main.empty();
1702
- $main.append(renderMainPage());
1703
- }
1704
- });
1705
- });