crfm-helm 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crfm-helm might be problematic. Click here for more details.

Files changed (209) hide show
  1. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/METADATA +81 -112
  2. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/RECORD +165 -155
  3. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/WHEEL +1 -1
  4. helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
  5. helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
  6. helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
  7. helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
  8. helm/benchmark/adaptation/common_adapter_specs.py +2 -0
  9. helm/benchmark/annotation/anthropic_red_team_annotator.py +57 -0
  10. helm/benchmark/annotation/call_center_annotator.py +258 -0
  11. helm/benchmark/annotation/financebench_annotator.py +79 -0
  12. helm/benchmark/annotation/harm_bench_annotator.py +55 -0
  13. helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
  14. helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
  15. helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
  16. helm/benchmark/annotation/live_qa_annotator.py +37 -45
  17. helm/benchmark/annotation/medication_qa_annotator.py +36 -44
  18. helm/benchmark/annotation/model_as_judge.py +96 -0
  19. helm/benchmark/annotation/simple_safety_tests_annotator.py +50 -0
  20. helm/benchmark/annotation/xstest_annotator.py +100 -0
  21. helm/benchmark/metrics/annotation_metrics.py +108 -0
  22. helm/benchmark/metrics/bhasa_metrics.py +188 -0
  23. helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
  24. helm/benchmark/metrics/code_metrics_helper.py +11 -1
  25. helm/benchmark/metrics/safety_metrics.py +79 -0
  26. helm/benchmark/metrics/summac/model_summac.py +3 -3
  27. helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
  28. helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
  29. helm/benchmark/metrics/unitxt_metrics.py +17 -3
  30. helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
  31. helm/benchmark/metrics/vision_language/image_utils.py +1 -1
  32. helm/benchmark/model_metadata_registry.py +3 -3
  33. helm/benchmark/presentation/create_plots.py +1 -1
  34. helm/benchmark/presentation/schema.py +3 -0
  35. helm/benchmark/presentation/summarize.py +106 -256
  36. helm/benchmark/presentation/test_run_entry.py +1 -0
  37. helm/benchmark/presentation/test_summarize.py +145 -3
  38. helm/benchmark/run.py +15 -0
  39. helm/benchmark/run_expander.py +83 -30
  40. helm/benchmark/run_specs/bhasa_run_specs.py +652 -0
  41. helm/benchmark/run_specs/call_center_run_specs.py +152 -0
  42. helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
  43. helm/benchmark/run_specs/experimental_run_specs.py +52 -0
  44. helm/benchmark/run_specs/finance_run_specs.py +82 -1
  45. helm/benchmark/run_specs/safety_run_specs.py +154 -0
  46. helm/benchmark/run_specs/vlm_run_specs.py +100 -24
  47. helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
  48. helm/benchmark/scenarios/banking77_scenario.py +51 -0
  49. helm/benchmark/scenarios/bhasa_scenario.py +1942 -0
  50. helm/benchmark/scenarios/call_center_scenario.py +84 -0
  51. helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
  52. helm/benchmark/scenarios/ewok_scenario.py +116 -0
  53. helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
  54. helm/benchmark/scenarios/financebench_scenario.py +53 -0
  55. helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
  56. helm/benchmark/scenarios/raft_scenario.py +1 -1
  57. helm/benchmark/scenarios/scenario.py +1 -1
  58. helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
  59. helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
  60. helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
  61. helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
  62. helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
  63. helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
  64. helm/benchmark/scenarios/test_math_scenario.py +2 -8
  65. helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
  66. helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
  67. helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
  68. helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
  69. helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
  70. helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
  71. helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
  72. helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
  73. helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
  74. helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
  75. helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
  76. helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
  77. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
  78. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
  79. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
  80. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
  81. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
  82. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
  83. helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
  84. helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
  85. helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
  86. helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
  87. helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
  88. helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
  89. helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
  90. helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
  91. helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
  92. helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
  93. helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
  94. helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
  95. helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
  96. helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
  97. helm/benchmark/scenarios/xstest_scenario.py +35 -0
  98. helm/benchmark/server.py +1 -6
  99. helm/benchmark/static/schema_air_bench.yaml +750 -750
  100. helm/benchmark/static/schema_bhasa.yaml +709 -0
  101. helm/benchmark/static/schema_call_center.yaml +232 -0
  102. helm/benchmark/static/schema_cleva.yaml +768 -0
  103. helm/benchmark/static/schema_decodingtrust.yaml +444 -0
  104. helm/benchmark/static/schema_ewok.yaml +367 -0
  105. helm/benchmark/static/schema_finance.yaml +55 -9
  106. helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
  107. helm/benchmark/static/schema_legal.yaml +566 -0
  108. helm/benchmark/static/schema_safety.yaml +266 -0
  109. helm/benchmark/static/schema_tables.yaml +149 -8
  110. helm/benchmark/static/schema_thai.yaml +21 -0
  111. helm/benchmark/static/schema_vhelm.yaml +137 -101
  112. helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
  113. helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
  114. helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
  115. helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
  116. helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
  117. helm/benchmark/static_build/assets/index-3ee38b3d.js +10 -0
  118. helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
  119. helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
  120. helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
  121. helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
  122. helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
  123. helm/benchmark/static_build/index.html +2 -2
  124. helm/benchmark/window_services/test_openai_window_service.py +8 -8
  125. helm/benchmark/window_services/tokenizer_service.py +0 -5
  126. helm/clients/ai21_client.py +71 -1
  127. helm/clients/anthropic_client.py +7 -19
  128. helm/clients/huggingface_client.py +38 -37
  129. helm/clients/nvidia_nim_client.py +35 -0
  130. helm/clients/openai_client.py +18 -4
  131. helm/clients/palmyra_client.py +24 -0
  132. helm/clients/perspective_api_client.py +11 -6
  133. helm/clients/test_client.py +4 -6
  134. helm/clients/together_client.py +22 -0
  135. helm/clients/vision_language/open_flamingo_client.py +1 -2
  136. helm/clients/vision_language/palmyra_vision_client.py +28 -13
  137. helm/common/cache.py +8 -30
  138. helm/common/images_utils.py +6 -0
  139. helm/common/key_value_store.py +9 -9
  140. helm/common/mongo_key_value_store.py +5 -4
  141. helm/common/request.py +16 -0
  142. helm/common/test_cache.py +1 -48
  143. helm/common/tokenization_request.py +0 -9
  144. helm/config/model_deployments.yaml +444 -329
  145. helm/config/model_metadata.yaml +513 -111
  146. helm/config/tokenizer_configs.yaml +140 -11
  147. helm/proxy/example_queries.py +14 -21
  148. helm/proxy/server.py +0 -9
  149. helm/proxy/services/remote_service.py +0 -6
  150. helm/proxy/services/server_service.py +6 -20
  151. helm/proxy/services/service.py +0 -6
  152. helm/proxy/token_counters/test_auto_token_counter.py +2 -2
  153. helm/tokenizers/ai21_tokenizer.py +51 -59
  154. helm/tokenizers/cohere_tokenizer.py +0 -75
  155. helm/tokenizers/huggingface_tokenizer.py +0 -1
  156. helm/tokenizers/test_ai21_tokenizer.py +48 -0
  157. helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
  158. helm/benchmark/data_overlap/export_scenario_text.py +0 -119
  159. helm/benchmark/data_overlap/light_scenario.py +0 -60
  160. helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
  161. helm/benchmark/static/benchmarking.css +0 -156
  162. helm/benchmark/static/benchmarking.js +0 -1705
  163. helm/benchmark/static/config.js +0 -3
  164. helm/benchmark/static/general.js +0 -122
  165. helm/benchmark/static/images/crfm-logo.png +0 -0
  166. helm/benchmark/static/images/helm-logo-simple.png +0 -0
  167. helm/benchmark/static/images/helm-logo.png +0 -0
  168. helm/benchmark/static/images/language-model-helm.png +0 -0
  169. helm/benchmark/static/images/organizations/ai21.png +0 -0
  170. helm/benchmark/static/images/organizations/anthropic.png +0 -0
  171. helm/benchmark/static/images/organizations/bigscience.png +0 -0
  172. helm/benchmark/static/images/organizations/cohere.png +0 -0
  173. helm/benchmark/static/images/organizations/eleutherai.png +0 -0
  174. helm/benchmark/static/images/organizations/google.png +0 -0
  175. helm/benchmark/static/images/organizations/meta.png +0 -0
  176. helm/benchmark/static/images/organizations/microsoft.png +0 -0
  177. helm/benchmark/static/images/organizations/nvidia.png +0 -0
  178. helm/benchmark/static/images/organizations/openai.png +0 -0
  179. helm/benchmark/static/images/organizations/together.png +0 -0
  180. helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
  181. helm/benchmark/static/images/organizations/yandex.png +0 -0
  182. helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
  183. helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
  184. helm/benchmark/static/index.html +0 -68
  185. helm/benchmark/static/info-icon.png +0 -0
  186. helm/benchmark/static/json-urls.js +0 -69
  187. helm/benchmark/static/plot-captions.js +0 -27
  188. helm/benchmark/static/utils.js +0 -285
  189. helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
  190. helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
  191. helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
  192. helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
  193. helm/benchmark/window_services/ai21_window_service.py +0 -247
  194. helm/benchmark/window_services/cohere_window_service.py +0 -101
  195. helm/benchmark/window_services/test_ai21_window_service.py +0 -163
  196. helm/benchmark/window_services/test_cohere_window_service.py +0 -75
  197. helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
  198. helm/benchmark/window_services/test_ice_window_service.py +0 -327
  199. helm/tokenizers/ice_tokenizer.py +0 -30
  200. helm/tokenizers/test_ice_tokenizer.py +0 -57
  201. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/LICENSE +0 -0
  202. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/entry_points.txt +0 -0
  203. {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/top_level.txt +0 -0
  204. /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
  205. /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
  206. /helm/benchmark/{data_overlap → scenarios/vision_language/image2struct}/__init__.py +0 -0
  207. /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
  208. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct/webpage}/__init__.py +0 -0
  209. /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
@@ -10,6 +10,7 @@ from helm.benchmark.model_metadata_registry import (
10
10
  get_all_text_models,
11
11
  get_model_metadata,
12
12
  get_model_names_with_tag,
13
+ DEPRECATED_MODEL_TAG,
13
14
  FULL_FUNCTIONALITY_TEXT_MODEL_TAG,
14
15
  LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG,
15
16
  ABLATION_MODEL_TAG,
@@ -343,16 +344,6 @@ class AnthropicClaude3RunExpander(RunExpander):
343
344
  run_spec,
344
345
  adapter_spec=replace(run_spec.adapter_spec, stop_sequences=stop_sequences_with_non_whitespace),
345
346
  )
346
- if run_spec.adapter_spec.method == ADAPT_MULTIPLE_CHOICE_JOINT:
347
- instructions = "Answer with only a single letter."
348
- if run_spec.adapter_spec.instructions:
349
- instructions = f"{instructions}\n\n{run_spec.adapter_spec.instructions}"
350
- return [
351
- replace(
352
- run_spec,
353
- adapter_spec=replace(run_spec.adapter_spec, instructions=instructions),
354
- ),
355
- ]
356
347
  return [run_spec]
357
348
 
358
349
 
@@ -610,6 +601,12 @@ class ModelRunExpander(ReplaceValueRunExpander):
610
601
  values_dict["ablation"] = models
611
602
  else:
612
603
  values_dict[family_name] = models
604
+
605
+ # For each of the keys above, filter out deprecated models.
606
+ deprecated_models = set(get_model_names_with_tag(DEPRECATED_MODEL_TAG))
607
+ for family_name in values_dict.keys():
608
+ values_dict[family_name] = [model for model in values_dict[family_name] if model not in deprecated_models]
609
+
613
610
  return values_dict
614
611
 
615
612
 
@@ -1274,6 +1271,30 @@ class IncreaseMaxTokensRunExpander(RunExpander):
1274
1271
  ]
1275
1272
 
1276
1273
 
1274
+ class TemperatureRunExpander(RunExpander):
1275
+ """
1276
+ Run expander for setting the temperature.
1277
+ """
1278
+
1279
+ name = "temperature"
1280
+
1281
+ def __init__(self, value: float):
1282
+ """
1283
+ Args:
1284
+ value (float): The amount to set temperature to
1285
+ """
1286
+ self.value = value
1287
+
1288
+ def expand(self, run_spec: RunSpec) -> List[RunSpec]:
1289
+ adapter_spec = replace(run_spec.adapter_spec, temperature=self.value)
1290
+ return [
1291
+ replace(
1292
+ run_spec,
1293
+ adapter_spec=adapter_spec,
1294
+ ),
1295
+ ]
1296
+
1297
+
1277
1298
  class IncreaseTemperatureRunExpander(RunExpander):
1278
1299
  """
1279
1300
  Run expander for increasing the temperature.
@@ -1402,23 +1423,26 @@ class OutputFormatInstructions(RunExpander):
1402
1423
 
1403
1424
  name = "output_format_instructions"
1404
1425
 
1426
+ _SUFFIX_SUFFIX = "_suffix"
1427
+
1405
1428
  def __init__(self, scenario: str):
1406
- self.scenario = scenario
1429
+ if scenario.endswith(OutputFormatInstructions._SUFFIX_SUFFIX):
1430
+ self.scenario = scenario[: -len(OutputFormatInstructions._SUFFIX_SUFFIX)]
1431
+ self.suffix = True
1432
+ else:
1433
+ self.scenario = scenario
1434
+ self.suffix = False
1407
1435
 
1408
1436
  def expand(self, run_spec: RunSpec) -> List[RunSpec]:
1409
1437
  if run_spec.adapter_spec.method == ADAPT_MULTIPLE_CHOICE_JOINT:
1410
1438
  if self.scenario == "mmlu_only_last_question":
1411
1439
  instructions = "Answer only the last question with only a single letter."
1440
+ elif self.scenario == "mmlu":
1441
+ instructions = "Answer with only a single letter."
1442
+ elif self.scenario == "mcqa":
1443
+ instructions = "Answer with only a single letter."
1412
1444
  else:
1413
1445
  instructions = "Answer with only a single letter."
1414
- if run_spec.adapter_spec.instructions:
1415
- instructions = f"{instructions}\n\n{run_spec.adapter_spec.instructions}"
1416
- return [
1417
- replace(
1418
- run_spec,
1419
- adapter_spec=replace(run_spec.adapter_spec, instructions=instructions),
1420
- ),
1421
- ]
1422
1446
  elif run_spec.adapter_spec.method == ADAPT_GENERATION:
1423
1447
  output_noun = run_spec.adapter_spec.output_prefix.split(":")[0]
1424
1448
  if self.scenario == "narrative_qa":
@@ -1433,27 +1457,53 @@ class OutputFormatInstructions(RunExpander):
1433
1457
  instructions = f"Answer with the {output_noun.lower()}."
1434
1458
  else:
1435
1459
  instructions = "Answer yes or no."
1460
+ elif self.scenario == "legalbench_abercrombie":
1461
+ instructions = "Answer with only 'generic', 'descriptive', 'suggestive', 'arbitrary' or 'fanciful'."
1462
+ elif self.scenario == "legalbench_function_of_decision_section":
1463
+ instructions = "Answer with only 'Facts', 'Procedural History', 'Issue', 'Rule', 'Analysis', 'Conclusion' or 'Decree'." # noqa: E501
1464
+ elif self.scenario == "legalbench_yes_or_no":
1465
+ instructions = "Answer with only 'Yes' or 'No'."
1436
1466
  elif self.scenario == "wmt_14":
1437
1467
  instructions = "Answer with the English translation."
1438
- else:
1439
- raise ValueError(f"Unknown scenario {self.scenario}")
1440
-
1441
- if run_spec.adapter_spec.output_prefix:
1468
+ elif self.scenario == "wmt_14_only_last_sentence":
1469
+ instructions = "Answer with only the English translation for the last sentence."
1470
+ elif self.scenario == "math":
1471
+ instructions = "Wrap the final answer with the \\boxed{} command."
1472
+ elif self.scenario == "numeric_nlg":
1473
+ instructions = "Answer with only description of the last table as a single paragraph on a single line."
1474
+ elif self.scenario == "tab_fact":
1442
1475
  instructions = (
1443
- f"{instructions} Do not include '{run_spec.adapter_spec.output_prefix.strip()}' in your answer."
1476
+ "Answer with only the classification of the last statement, either 'refuted' or 'entailed'."
1477
+ )
1478
+ elif self.scenario == "wikitq":
1479
+ instructions = (
1480
+ "Answer only the last question with a short answer. "
1481
+ "Avoid extra, unnecessary information in the answer."
1444
1482
  )
1445
-
1446
- if run_spec.adapter_spec.instructions:
1447
- instructions = f"{instructions}\n\n{run_spec.adapter_spec.instructions}"
1448
1483
  else:
1449
- instructions = f"{instructions}\n"
1484
+ raise ValueError(f"Unknown scenario {self.scenario}")
1485
+
1486
+ if self.suffix:
1450
1487
  return [
1451
1488
  replace(
1452
1489
  run_spec,
1453
- adapter_spec=replace(run_spec.adapter_spec, instructions=instructions),
1490
+ adapter_spec=replace(
1491
+ run_spec.adapter_spec,
1492
+ global_suffix=f"{run_spec.adapter_spec.global_suffix}\n\n{instructions}",
1493
+ ),
1454
1494
  ),
1455
1495
  ]
1456
- raise ValueError(f"Unknown scenario {self.scenario}")
1496
+
1497
+ if run_spec.adapter_spec.instructions:
1498
+ instructions = f"{instructions}\n\n{run_spec.adapter_spec.instructions}"
1499
+ else:
1500
+ instructions = f"{instructions}\n"
1501
+ return [
1502
+ replace(
1503
+ run_spec,
1504
+ adapter_spec=replace(run_spec.adapter_spec, instructions=instructions),
1505
+ ),
1506
+ ]
1457
1507
 
1458
1508
 
1459
1509
  RUN_EXPANDER_SUBCLASSES: List[Type[RunExpander]] = [
@@ -1479,6 +1529,9 @@ RUN_EXPANDER_SUBCLASSES: List[Type[RunExpander]] = [
1479
1529
  ChatMLRunExpander,
1480
1530
  EvalSplitRunExpander,
1481
1531
  OutputFormatInstructions,
1532
+ TemperatureRunExpander,
1533
+ IncreaseTemperatureRunExpander,
1534
+ IncreaseMaxTokensRunExpander,
1482
1535
  ]
1483
1536
 
1484
1537