crfm-helm 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crfm-helm might be problematic. Click here for more details.
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/METADATA +81 -112
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/RECORD +165 -155
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/WHEEL +1 -1
- helm/benchmark/adaptation/adapters/multiple_choice_joint_adapter.py +12 -5
- helm/benchmark/adaptation/adapters/test_generation_adapter.py +12 -12
- helm/benchmark/adaptation/adapters/test_language_modeling_adapter.py +8 -8
- helm/benchmark/adaptation/adapters/test_multiple_choice_joint_adapter.py +77 -9
- helm/benchmark/adaptation/common_adapter_specs.py +2 -0
- helm/benchmark/annotation/anthropic_red_team_annotator.py +57 -0
- helm/benchmark/annotation/call_center_annotator.py +258 -0
- helm/benchmark/annotation/financebench_annotator.py +79 -0
- helm/benchmark/annotation/harm_bench_annotator.py +55 -0
- helm/benchmark/annotation/{image2structure → image2struct}/latex_compiler_annotator.py +2 -2
- helm/benchmark/annotation/{image2structure → image2struct}/lilypond_compiler_annotator.py +5 -3
- helm/benchmark/annotation/{image2structure → image2struct}/webpage_compiler_annotator.py +5 -5
- helm/benchmark/annotation/live_qa_annotator.py +37 -45
- helm/benchmark/annotation/medication_qa_annotator.py +36 -44
- helm/benchmark/annotation/model_as_judge.py +96 -0
- helm/benchmark/annotation/simple_safety_tests_annotator.py +50 -0
- helm/benchmark/annotation/xstest_annotator.py +100 -0
- helm/benchmark/metrics/annotation_metrics.py +108 -0
- helm/benchmark/metrics/bhasa_metrics.py +188 -0
- helm/benchmark/metrics/bhasa_metrics_specs.py +10 -0
- helm/benchmark/metrics/code_metrics_helper.py +11 -1
- helm/benchmark/metrics/safety_metrics.py +79 -0
- helm/benchmark/metrics/summac/model_summac.py +3 -3
- helm/benchmark/metrics/tokens/test_ai21_token_cost_estimator.py +2 -2
- helm/benchmark/metrics/tokens/test_openai_token_cost_estimator.py +4 -4
- helm/benchmark/metrics/unitxt_metrics.py +17 -3
- helm/benchmark/metrics/vision_language/image_metrics.py +7 -3
- helm/benchmark/metrics/vision_language/image_utils.py +1 -1
- helm/benchmark/model_metadata_registry.py +3 -3
- helm/benchmark/presentation/create_plots.py +1 -1
- helm/benchmark/presentation/schema.py +3 -0
- helm/benchmark/presentation/summarize.py +106 -256
- helm/benchmark/presentation/test_run_entry.py +1 -0
- helm/benchmark/presentation/test_summarize.py +145 -3
- helm/benchmark/run.py +15 -0
- helm/benchmark/run_expander.py +83 -30
- helm/benchmark/run_specs/bhasa_run_specs.py +652 -0
- helm/benchmark/run_specs/call_center_run_specs.py +152 -0
- helm/benchmark/run_specs/decodingtrust_run_specs.py +8 -8
- helm/benchmark/run_specs/experimental_run_specs.py +52 -0
- helm/benchmark/run_specs/finance_run_specs.py +82 -1
- helm/benchmark/run_specs/safety_run_specs.py +154 -0
- helm/benchmark/run_specs/vlm_run_specs.py +100 -24
- helm/benchmark/scenarios/anthropic_red_team_scenario.py +71 -0
- helm/benchmark/scenarios/banking77_scenario.py +51 -0
- helm/benchmark/scenarios/bhasa_scenario.py +1942 -0
- helm/benchmark/scenarios/call_center_scenario.py +84 -0
- helm/benchmark/scenarios/decodingtrust_stereotype_bias_scenario.py +2 -1
- helm/benchmark/scenarios/ewok_scenario.py +116 -0
- helm/benchmark/scenarios/fin_qa_scenario.py +2 -0
- helm/benchmark/scenarios/financebench_scenario.py +53 -0
- helm/benchmark/scenarios/harm_bench_scenario.py +59 -0
- helm/benchmark/scenarios/raft_scenario.py +1 -1
- helm/benchmark/scenarios/scenario.py +1 -1
- helm/benchmark/scenarios/simple_safety_tests_scenario.py +33 -0
- helm/benchmark/scenarios/test_commonsense_scenario.py +21 -0
- helm/benchmark/scenarios/test_ewok_scenario.py +25 -0
- helm/benchmark/scenarios/test_financebench_scenario.py +26 -0
- helm/benchmark/scenarios/test_gsm_scenario.py +31 -0
- helm/benchmark/scenarios/test_legalbench_scenario.py +30 -0
- helm/benchmark/scenarios/test_math_scenario.py +2 -8
- helm/benchmark/scenarios/test_med_qa_scenario.py +30 -0
- helm/benchmark/scenarios/test_mmlu_scenario.py +33 -0
- helm/benchmark/scenarios/test_narrativeqa_scenario.py +73 -0
- helm/benchmark/scenarios/thai_exam_scenario.py +4 -4
- helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/bingo_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/exams_v_scenario.py +104 -0
- helm/benchmark/scenarios/vision_language/fair_face_scenario.py +136 -0
- helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/gqa_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/chart2csv_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/latex_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/musicsheet_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/utils_latex.py +31 -39
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/driver.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/utils.py +1 -1
- helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage_scenario.py +41 -12
- helm/benchmark/scenarios/vision_language/math_vista_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/mementos_scenario.py +3 -3
- helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/mme_scenario.py +21 -18
- helm/benchmark/scenarios/vision_language/mmmu_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pairs_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/pope_scenario.py +2 -1
- helm/benchmark/scenarios/vision_language/real_world_qa_scenario.py +57 -0
- helm/benchmark/scenarios/vision_language/seed_bench_scenario.py +7 -5
- helm/benchmark/scenarios/vision_language/unicorn_scenario.py +2 -2
- helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +6 -3
- helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +1 -1
- helm/benchmark/scenarios/vision_language/vqa_scenario.py +3 -1
- helm/benchmark/scenarios/xstest_scenario.py +35 -0
- helm/benchmark/server.py +1 -6
- helm/benchmark/static/schema_air_bench.yaml +750 -750
- helm/benchmark/static/schema_bhasa.yaml +709 -0
- helm/benchmark/static/schema_call_center.yaml +232 -0
- helm/benchmark/static/schema_cleva.yaml +768 -0
- helm/benchmark/static/schema_decodingtrust.yaml +444 -0
- helm/benchmark/static/schema_ewok.yaml +367 -0
- helm/benchmark/static/schema_finance.yaml +55 -9
- helm/benchmark/static/{schema_image2structure.yaml → schema_image2struct.yaml} +231 -90
- helm/benchmark/static/schema_legal.yaml +566 -0
- helm/benchmark/static/schema_safety.yaml +266 -0
- helm/benchmark/static/schema_tables.yaml +149 -8
- helm/benchmark/static/schema_thai.yaml +21 -0
- helm/benchmark/static/schema_vhelm.yaml +137 -101
- helm/benchmark/static_build/assets/accenture-6f97eeda.png +0 -0
- helm/benchmark/static_build/assets/aisingapore-6dfc9acf.png +0 -0
- helm/benchmark/static_build/assets/cresta-9e22b983.png +0 -0
- helm/benchmark/static_build/assets/cuhk-8c5631e9.png +0 -0
- helm/benchmark/static_build/assets/index-05c76bb1.css +1 -0
- helm/benchmark/static_build/assets/index-3ee38b3d.js +10 -0
- helm/benchmark/static_build/assets/scb10x-204bd786.png +0 -0
- helm/benchmark/static_build/assets/vhelm-aspects-1437d673.png +0 -0
- helm/benchmark/static_build/assets/vhelm-framework-a1ca3f3f.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-8afb7616.png +0 -0
- helm/benchmark/static_build/assets/wellsfargo-a86a6c4a.png +0 -0
- helm/benchmark/static_build/index.html +2 -2
- helm/benchmark/window_services/test_openai_window_service.py +8 -8
- helm/benchmark/window_services/tokenizer_service.py +0 -5
- helm/clients/ai21_client.py +71 -1
- helm/clients/anthropic_client.py +7 -19
- helm/clients/huggingface_client.py +38 -37
- helm/clients/nvidia_nim_client.py +35 -0
- helm/clients/openai_client.py +18 -4
- helm/clients/palmyra_client.py +24 -0
- helm/clients/perspective_api_client.py +11 -6
- helm/clients/test_client.py +4 -6
- helm/clients/together_client.py +22 -0
- helm/clients/vision_language/open_flamingo_client.py +1 -2
- helm/clients/vision_language/palmyra_vision_client.py +28 -13
- helm/common/cache.py +8 -30
- helm/common/images_utils.py +6 -0
- helm/common/key_value_store.py +9 -9
- helm/common/mongo_key_value_store.py +5 -4
- helm/common/request.py +16 -0
- helm/common/test_cache.py +1 -48
- helm/common/tokenization_request.py +0 -9
- helm/config/model_deployments.yaml +444 -329
- helm/config/model_metadata.yaml +513 -111
- helm/config/tokenizer_configs.yaml +140 -11
- helm/proxy/example_queries.py +14 -21
- helm/proxy/server.py +0 -9
- helm/proxy/services/remote_service.py +0 -6
- helm/proxy/services/server_service.py +6 -20
- helm/proxy/services/service.py +0 -6
- helm/proxy/token_counters/test_auto_token_counter.py +2 -2
- helm/tokenizers/ai21_tokenizer.py +51 -59
- helm/tokenizers/cohere_tokenizer.py +0 -75
- helm/tokenizers/huggingface_tokenizer.py +0 -1
- helm/tokenizers/test_ai21_tokenizer.py +48 -0
- helm/benchmark/data_overlap/data_overlap_spec.py +0 -86
- helm/benchmark/data_overlap/export_scenario_text.py +0 -119
- helm/benchmark/data_overlap/light_scenario.py +0 -60
- helm/benchmark/scenarios/vision_language/image2structure/webpage/__init__.py +0 -0
- helm/benchmark/static/benchmarking.css +0 -156
- helm/benchmark/static/benchmarking.js +0 -1705
- helm/benchmark/static/config.js +0 -3
- helm/benchmark/static/general.js +0 -122
- helm/benchmark/static/images/crfm-logo.png +0 -0
- helm/benchmark/static/images/helm-logo-simple.png +0 -0
- helm/benchmark/static/images/helm-logo.png +0 -0
- helm/benchmark/static/images/language-model-helm.png +0 -0
- helm/benchmark/static/images/organizations/ai21.png +0 -0
- helm/benchmark/static/images/organizations/anthropic.png +0 -0
- helm/benchmark/static/images/organizations/bigscience.png +0 -0
- helm/benchmark/static/images/organizations/cohere.png +0 -0
- helm/benchmark/static/images/organizations/eleutherai.png +0 -0
- helm/benchmark/static/images/organizations/google.png +0 -0
- helm/benchmark/static/images/organizations/meta.png +0 -0
- helm/benchmark/static/images/organizations/microsoft.png +0 -0
- helm/benchmark/static/images/organizations/nvidia.png +0 -0
- helm/benchmark/static/images/organizations/openai.png +0 -0
- helm/benchmark/static/images/organizations/together.png +0 -0
- helm/benchmark/static/images/organizations/tsinghua-keg.png +0 -0
- helm/benchmark/static/images/organizations/yandex.png +0 -0
- helm/benchmark/static/images/scenarios-by-metrics.png +0 -0
- helm/benchmark/static/images/taxonomy-scenarios.png +0 -0
- helm/benchmark/static/index.html +0 -68
- helm/benchmark/static/info-icon.png +0 -0
- helm/benchmark/static/json-urls.js +0 -69
- helm/benchmark/static/plot-captions.js +0 -27
- helm/benchmark/static/utils.js +0 -285
- helm/benchmark/static_build/assets/index-30dbceba.js +0 -10
- helm/benchmark/static_build/assets/index-66b02d40.css +0 -1
- helm/benchmark/static_build/assets/vhelm-framework-cde7618a.png +0 -0
- helm/benchmark/static_build/assets/vhelm-model-6d812526.png +0 -0
- helm/benchmark/window_services/ai21_window_service.py +0 -247
- helm/benchmark/window_services/cohere_window_service.py +0 -101
- helm/benchmark/window_services/test_ai21_window_service.py +0 -163
- helm/benchmark/window_services/test_cohere_window_service.py +0 -75
- helm/benchmark/window_services/test_cohere_window_service_utils.py +0 -8328
- helm/benchmark/window_services/test_ice_window_service.py +0 -327
- helm/tokenizers/ice_tokenizer.py +0 -30
- helm/tokenizers/test_ice_tokenizer.py +0 -57
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/LICENSE +0 -0
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/entry_points.txt +0 -0
- {crfm_helm-0.5.2.dist-info → crfm_helm-0.5.4.dist-info}/top_level.txt +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/__init__.py +0 -0
- /helm/benchmark/annotation/{image2structure → image2struct}/image_compiler_annotator.py +0 -0
- /helm/benchmark/{data_overlap → scenarios/vision_language/image2struct}/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure/image2structure_scenario.py → image2struct/image2struct_scenario.py} +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct/webpage}/__init__.py +0 -0
- /helm/benchmark/scenarios/vision_language/{image2structure → image2struct}/webpage/jekyll_server.py +0 -0
helm/benchmark/run_expander.py
CHANGED
|
@@ -10,6 +10,7 @@ from helm.benchmark.model_metadata_registry import (
|
|
|
10
10
|
get_all_text_models,
|
|
11
11
|
get_model_metadata,
|
|
12
12
|
get_model_names_with_tag,
|
|
13
|
+
DEPRECATED_MODEL_TAG,
|
|
13
14
|
FULL_FUNCTIONALITY_TEXT_MODEL_TAG,
|
|
14
15
|
LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG,
|
|
15
16
|
ABLATION_MODEL_TAG,
|
|
@@ -343,16 +344,6 @@ class AnthropicClaude3RunExpander(RunExpander):
|
|
|
343
344
|
run_spec,
|
|
344
345
|
adapter_spec=replace(run_spec.adapter_spec, stop_sequences=stop_sequences_with_non_whitespace),
|
|
345
346
|
)
|
|
346
|
-
if run_spec.adapter_spec.method == ADAPT_MULTIPLE_CHOICE_JOINT:
|
|
347
|
-
instructions = "Answer with only a single letter."
|
|
348
|
-
if run_spec.adapter_spec.instructions:
|
|
349
|
-
instructions = f"{instructions}\n\n{run_spec.adapter_spec.instructions}"
|
|
350
|
-
return [
|
|
351
|
-
replace(
|
|
352
|
-
run_spec,
|
|
353
|
-
adapter_spec=replace(run_spec.adapter_spec, instructions=instructions),
|
|
354
|
-
),
|
|
355
|
-
]
|
|
356
347
|
return [run_spec]
|
|
357
348
|
|
|
358
349
|
|
|
@@ -610,6 +601,12 @@ class ModelRunExpander(ReplaceValueRunExpander):
|
|
|
610
601
|
values_dict["ablation"] = models
|
|
611
602
|
else:
|
|
612
603
|
values_dict[family_name] = models
|
|
604
|
+
|
|
605
|
+
# For each of the keys above, filter out deprecated models.
|
|
606
|
+
deprecated_models = set(get_model_names_with_tag(DEPRECATED_MODEL_TAG))
|
|
607
|
+
for family_name in values_dict.keys():
|
|
608
|
+
values_dict[family_name] = [model for model in values_dict[family_name] if model not in deprecated_models]
|
|
609
|
+
|
|
613
610
|
return values_dict
|
|
614
611
|
|
|
615
612
|
|
|
@@ -1274,6 +1271,30 @@ class IncreaseMaxTokensRunExpander(RunExpander):
|
|
|
1274
1271
|
]
|
|
1275
1272
|
|
|
1276
1273
|
|
|
1274
|
+
class TemperatureRunExpander(RunExpander):
|
|
1275
|
+
"""
|
|
1276
|
+
Run expander for setting the temperature.
|
|
1277
|
+
"""
|
|
1278
|
+
|
|
1279
|
+
name = "temperature"
|
|
1280
|
+
|
|
1281
|
+
def __init__(self, value: float):
|
|
1282
|
+
"""
|
|
1283
|
+
Args:
|
|
1284
|
+
value (float): The amount to set temperature to
|
|
1285
|
+
"""
|
|
1286
|
+
self.value = value
|
|
1287
|
+
|
|
1288
|
+
def expand(self, run_spec: RunSpec) -> List[RunSpec]:
|
|
1289
|
+
adapter_spec = replace(run_spec.adapter_spec, temperature=self.value)
|
|
1290
|
+
return [
|
|
1291
|
+
replace(
|
|
1292
|
+
run_spec,
|
|
1293
|
+
adapter_spec=adapter_spec,
|
|
1294
|
+
),
|
|
1295
|
+
]
|
|
1296
|
+
|
|
1297
|
+
|
|
1277
1298
|
class IncreaseTemperatureRunExpander(RunExpander):
|
|
1278
1299
|
"""
|
|
1279
1300
|
Run expander for increasing the temperature.
|
|
@@ -1402,23 +1423,26 @@ class OutputFormatInstructions(RunExpander):
|
|
|
1402
1423
|
|
|
1403
1424
|
name = "output_format_instructions"
|
|
1404
1425
|
|
|
1426
|
+
_SUFFIX_SUFFIX = "_suffix"
|
|
1427
|
+
|
|
1405
1428
|
def __init__(self, scenario: str):
|
|
1406
|
-
|
|
1429
|
+
if scenario.endswith(OutputFormatInstructions._SUFFIX_SUFFIX):
|
|
1430
|
+
self.scenario = scenario[: -len(OutputFormatInstructions._SUFFIX_SUFFIX)]
|
|
1431
|
+
self.suffix = True
|
|
1432
|
+
else:
|
|
1433
|
+
self.scenario = scenario
|
|
1434
|
+
self.suffix = False
|
|
1407
1435
|
|
|
1408
1436
|
def expand(self, run_spec: RunSpec) -> List[RunSpec]:
|
|
1409
1437
|
if run_spec.adapter_spec.method == ADAPT_MULTIPLE_CHOICE_JOINT:
|
|
1410
1438
|
if self.scenario == "mmlu_only_last_question":
|
|
1411
1439
|
instructions = "Answer only the last question with only a single letter."
|
|
1440
|
+
elif self.scenario == "mmlu":
|
|
1441
|
+
instructions = "Answer with only a single letter."
|
|
1442
|
+
elif self.scenario == "mcqa":
|
|
1443
|
+
instructions = "Answer with only a single letter."
|
|
1412
1444
|
else:
|
|
1413
1445
|
instructions = "Answer with only a single letter."
|
|
1414
|
-
if run_spec.adapter_spec.instructions:
|
|
1415
|
-
instructions = f"{instructions}\n\n{run_spec.adapter_spec.instructions}"
|
|
1416
|
-
return [
|
|
1417
|
-
replace(
|
|
1418
|
-
run_spec,
|
|
1419
|
-
adapter_spec=replace(run_spec.adapter_spec, instructions=instructions),
|
|
1420
|
-
),
|
|
1421
|
-
]
|
|
1422
1446
|
elif run_spec.adapter_spec.method == ADAPT_GENERATION:
|
|
1423
1447
|
output_noun = run_spec.adapter_spec.output_prefix.split(":")[0]
|
|
1424
1448
|
if self.scenario == "narrative_qa":
|
|
@@ -1433,27 +1457,53 @@ class OutputFormatInstructions(RunExpander):
|
|
|
1433
1457
|
instructions = f"Answer with the {output_noun.lower()}."
|
|
1434
1458
|
else:
|
|
1435
1459
|
instructions = "Answer yes or no."
|
|
1460
|
+
elif self.scenario == "legalbench_abercrombie":
|
|
1461
|
+
instructions = "Answer with only 'generic', 'descriptive', 'suggestive', 'arbitrary' or 'fanciful'."
|
|
1462
|
+
elif self.scenario == "legalbench_function_of_decision_section":
|
|
1463
|
+
instructions = "Answer with only 'Facts', 'Procedural History', 'Issue', 'Rule', 'Analysis', 'Conclusion' or 'Decree'." # noqa: E501
|
|
1464
|
+
elif self.scenario == "legalbench_yes_or_no":
|
|
1465
|
+
instructions = "Answer with only 'Yes' or 'No'."
|
|
1436
1466
|
elif self.scenario == "wmt_14":
|
|
1437
1467
|
instructions = "Answer with the English translation."
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1468
|
+
elif self.scenario == "wmt_14_only_last_sentence":
|
|
1469
|
+
instructions = "Answer with only the English translation for the last sentence."
|
|
1470
|
+
elif self.scenario == "math":
|
|
1471
|
+
instructions = "Wrap the final answer with the \\boxed{} command."
|
|
1472
|
+
elif self.scenario == "numeric_nlg":
|
|
1473
|
+
instructions = "Answer with only description of the last table as a single paragraph on a single line."
|
|
1474
|
+
elif self.scenario == "tab_fact":
|
|
1442
1475
|
instructions = (
|
|
1443
|
-
|
|
1476
|
+
"Answer with only the classification of the last statement, either 'refuted' or 'entailed'."
|
|
1477
|
+
)
|
|
1478
|
+
elif self.scenario == "wikitq":
|
|
1479
|
+
instructions = (
|
|
1480
|
+
"Answer only the last question with a short answer. "
|
|
1481
|
+
"Avoid extra, unnecessary information in the answer."
|
|
1444
1482
|
)
|
|
1445
|
-
|
|
1446
|
-
if run_spec.adapter_spec.instructions:
|
|
1447
|
-
instructions = f"{instructions}\n\n{run_spec.adapter_spec.instructions}"
|
|
1448
1483
|
else:
|
|
1449
|
-
|
|
1484
|
+
raise ValueError(f"Unknown scenario {self.scenario}")
|
|
1485
|
+
|
|
1486
|
+
if self.suffix:
|
|
1450
1487
|
return [
|
|
1451
1488
|
replace(
|
|
1452
1489
|
run_spec,
|
|
1453
|
-
adapter_spec=replace(
|
|
1490
|
+
adapter_spec=replace(
|
|
1491
|
+
run_spec.adapter_spec,
|
|
1492
|
+
global_suffix=f"{run_spec.adapter_spec.global_suffix}\n\n{instructions}",
|
|
1493
|
+
),
|
|
1454
1494
|
),
|
|
1455
1495
|
]
|
|
1456
|
-
|
|
1496
|
+
|
|
1497
|
+
if run_spec.adapter_spec.instructions:
|
|
1498
|
+
instructions = f"{instructions}\n\n{run_spec.adapter_spec.instructions}"
|
|
1499
|
+
else:
|
|
1500
|
+
instructions = f"{instructions}\n"
|
|
1501
|
+
return [
|
|
1502
|
+
replace(
|
|
1503
|
+
run_spec,
|
|
1504
|
+
adapter_spec=replace(run_spec.adapter_spec, instructions=instructions),
|
|
1505
|
+
),
|
|
1506
|
+
]
|
|
1457
1507
|
|
|
1458
1508
|
|
|
1459
1509
|
RUN_EXPANDER_SUBCLASSES: List[Type[RunExpander]] = [
|
|
@@ -1479,6 +1529,9 @@ RUN_EXPANDER_SUBCLASSES: List[Type[RunExpander]] = [
|
|
|
1479
1529
|
ChatMLRunExpander,
|
|
1480
1530
|
EvalSplitRunExpander,
|
|
1481
1531
|
OutputFormatInstructions,
|
|
1532
|
+
TemperatureRunExpander,
|
|
1533
|
+
IncreaseTemperatureRunExpander,
|
|
1534
|
+
IncreaseMaxTokensRunExpander,
|
|
1482
1535
|
]
|
|
1483
1536
|
|
|
1484
1537
|
|