crfm-helm 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/METADATA +11 -8
  2. {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/RECORD +67 -38
  3. {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/WHEEL +1 -1
  4. {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/entry_points.txt +2 -1
  5. helm/benchmark/__init__.py +13 -0
  6. helm/benchmark/adaptation/adapter_spec.py +3 -0
  7. helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -7
  8. helm/benchmark/augmentations/correct_to_misspelling.json +1 -0
  9. helm/benchmark/contamination/__init__.py +0 -0
  10. helm/benchmark/metrics/classification_metrics.py +70 -0
  11. helm/benchmark/metrics/machine_translation_metrics.py +36 -0
  12. helm/benchmark/metrics/summarization_metrics.py +7 -8
  13. helm/benchmark/metrics/test_classification_metrics.py +150 -0
  14. helm/benchmark/presentation/create_plots.py +617 -0
  15. helm/benchmark/presentation/run_display.py +7 -48
  16. helm/benchmark/presentation/summarize.py +4 -2
  17. helm/benchmark/presentation/test_create_plots.py +32 -0
  18. helm/benchmark/run.py +144 -48
  19. helm/benchmark/run_expander.py +164 -47
  20. helm/benchmark/run_specs.py +346 -39
  21. helm/benchmark/runner.py +34 -6
  22. helm/benchmark/scenarios/copyright_scenario.py +1 -1
  23. helm/benchmark/scenarios/covid_dialog_scenario.py +84 -0
  24. helm/benchmark/scenarios/imdb_listdir.json +50014 -0
  25. helm/benchmark/scenarios/lex_glue_scenario.py +253 -0
  26. helm/benchmark/scenarios/lextreme_scenario.py +458 -0
  27. helm/benchmark/scenarios/me_q_sum_scenario.py +86 -0
  28. helm/benchmark/scenarios/med_dialog_scenario.py +132 -0
  29. helm/benchmark/scenarios/med_mcqa_scenario.py +102 -0
  30. helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +119 -0
  31. helm/benchmark/scenarios/med_qa_scenario.py +96 -0
  32. helm/benchmark/scenarios/opinions_qa_scenario.py +194 -0
  33. helm/benchmark/scenarios/scenario.py +5 -0
  34. helm/benchmark/scenarios/the_pile_scenario.py +1 -1
  35. helm/benchmark/scenarios/wmt_14_scenario.py +96 -0
  36. helm/benchmark/static/benchmarking.css +14 -0
  37. helm/benchmark/static/benchmarking.js +43 -0
  38. helm/benchmark/static/index.html +2 -0
  39. helm/benchmark/static/json-urls.js +4 -0
  40. helm/benchmark/static/plot-captions.js +16 -0
  41. helm/benchmark/static/schema.yaml +154 -1
  42. helm/benchmark/window_services/cohere_window_service.py +20 -0
  43. helm/benchmark/window_services/flan_t5_window_service.py +29 -0
  44. helm/benchmark/window_services/huggingface_window_service.py +39 -0
  45. helm/benchmark/window_services/santacoder_window_service.py +27 -0
  46. helm/benchmark/window_services/test_flan_t5_window_service.py +12 -0
  47. helm/benchmark/window_services/wider_ai21_window_service.py +13 -0
  48. helm/benchmark/window_services/window_service_factory.py +34 -7
  49. helm/common/codec.py +123 -0
  50. helm/common/general.py +12 -5
  51. helm/common/test_codec.py +144 -0
  52. helm/proxy/clients/aleph_alpha_client.py +47 -28
  53. helm/proxy/clients/auto_client.py +32 -24
  54. helm/proxy/clients/google_client.py +88 -0
  55. helm/proxy/clients/huggingface_client.py +32 -16
  56. helm/proxy/clients/huggingface_model_registry.py +111 -0
  57. helm/proxy/clients/huggingface_tokenizer.py +25 -7
  58. helm/proxy/clients/openai_client.py +60 -2
  59. helm/proxy/clients/test_huggingface_model_registry.py +57 -0
  60. helm/proxy/clients/test_huggingface_tokenizer.py +3 -0
  61. helm/proxy/clients/together_client.py +17 -2
  62. helm/proxy/clients/yalm_tokenizer/voc_100b.sp +0 -0
  63. helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py +8 -2
  64. helm/proxy/models.py +115 -7
  65. helm/proxy/test_models.py +1 -1
  66. helm/benchmark/presentation/present.py +0 -249
  67. {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/LICENSE +0 -0
  68. {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/top_level.txt +0 -0
helm/proxy/models.py CHANGED
@@ -11,8 +11,14 @@ EMBEDDING_MODEL_TAG: str = "embedding"
11
11
  FULL_FUNCTIONALITY_TEXT_MODEL_TAG: str = "full_functionality_text"
12
12
  LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG: str = "limited_functionality_text"
13
13
 
14
+ # ChatML format
15
+ CHATML_MODEL_TAG: str = "chatml"
16
+
14
17
  # For OpenAI models with wider context windows
15
- WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window"
18
+ WIDER_CONTEXT_WINDOW_TAG: str = "wider_context_window" # 4000 tokens
19
+
20
+ # For AI21 Jurassic-2 models with wider context windows
21
+ AI21_WIDER_CONTEXT_WINDOW_TAG: str = "ai21_wider_context_window"
16
22
 
17
23
  # To fetch models that use these tokenizers
18
24
  GPT2_TOKENIZER_TAG: str = "gpt2_tokenizer"
@@ -122,6 +128,31 @@ ALL_MODELS = [
122
128
  description="Jurassic-1 Large (7.5B parameters)",
123
129
  tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
124
130
  ),
131
+ # AI21 Jurassic-2 Models: https://www.ai21.com/blog/introducing-j2
132
+ Model(
133
+ group="jurassic",
134
+ creator_organization="AI21 Labs",
135
+ name="ai21/j2-jumbo",
136
+ display_name="Jurassic-2 Jumbo (178B)",
137
+ description="Jurassic-2 Jumbo (178B parameters)",
138
+ tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
139
+ ),
140
+ Model(
141
+ group="jurassic",
142
+ creator_organization="AI21 Labs",
143
+ name="ai21/j2-grande",
144
+ display_name="Jurassic-2 Grande (17B)",
145
+ description="Jurassic-2 Grande (17B parameters) with a few tweaks to the training process.",
146
+ tags=[TEXT_MODEL_TAG, AI21_WIDER_CONTEXT_WINDOW_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
147
+ ),
148
+ Model(
149
+ group="jurassic",
150
+ creator_organization="AI21 Labs",
151
+ name="ai21/j2-large",
152
+ display_name="Jurassic-2 Large (7.5B)",
153
+ description="Jurassic-2 Large (7.5B parameters)",
154
+ tags=[TEXT_MODEL_TAG, AI21_WIDER_CONTEXT_WINDOW_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, AI21_TOKENIZER_TAG],
155
+ ),
125
156
  # Aleph Alpha's Luminous models: https://docs.aleph-alpha.com/docs/introduction/luminous
126
157
  Model(
127
158
  group="luminous",
@@ -250,6 +281,24 @@ ALL_MODELS = [
250
281
  description="Cohere small v20220720 (410M parameters)",
251
282
  tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
252
283
  ),
284
+ Model(
285
+ group="cohere",
286
+ creator_organization="Cohere",
287
+ name="cohere/command-medium-beta",
288
+ display_name="Cohere Command beta (6.1B)",
289
+ description="Cohere Command beta (6.1B parameters) is fine-tuned from the medium model "
290
+ "to respond well with instruction-like prompts",
291
+ tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
292
+ ),
293
+ Model(
294
+ group="cohere",
295
+ creator_organization="Cohere",
296
+ name="cohere/command-xlarge-beta",
297
+ display_name="Cohere Command beta (52.4B)",
298
+ description="Cohere Command beta (52.4B parameters) is fine-tuned from the XL model "
299
+ "to respond well with instruction-like prompts",
300
+ tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, COHERE_TOKENIZER_TAG],
301
+ ),
253
302
  # EleutherAI
254
303
  Model(
255
304
  group="together",
@@ -285,6 +334,14 @@ ALL_MODELS = [
285
334
  tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, GPTJ_TOKENIZER_TAG],
286
335
  ),
287
336
  # HuggingFace
337
+ Model(
338
+ group="huggingface",
339
+ creator_organization="OpenAI",
340
+ name="huggingface/gpt2",
341
+ display_name="GPT-2 (1.5B)",
342
+ description="GPT-2 (1.5B parameters)",
343
+ tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, GPT2_TOKENIZER_TAG],
344
+ ),
288
345
  Model(
289
346
  group="huggingface",
290
347
  creator_organization="EleutherAI",
@@ -293,6 +350,15 @@ ALL_MODELS = [
293
350
  description="GPT-J (6B parameters) autoregressive language model trained on The Pile.",
294
351
  tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, GPTJ_TOKENIZER_TAG],
295
352
  ),
353
+ Model(
354
+ group="huggingface",
355
+ creator_organization="BigCode",
356
+ name="huggingface/santacoder",
357
+ display_name="SantaCoder (1.1B)",
358
+ description="SantaCoder (1.1B parameters) model trained on the Python, Java, and "
359
+ "JavaScript subset of The Stack (v1.1).",
360
+ tags=[CODE_MODEL_TAG],
361
+ ),
296
362
  # Google
297
363
  Model(
298
364
  group="together",
@@ -306,6 +372,15 @@ ALL_MODELS = [
306
372
  # Does not support echo=True
307
373
  tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG],
308
374
  ),
375
+ Model(
376
+ group="together",
377
+ creator_organization="Google",
378
+ name="together/flan-t5-xxl",
379
+ display_name="Flan-T5 (11B)",
380
+ description="Flan-T5 (11B parameters) is T5 fine-tuned on 1.8K tasks.",
381
+ # Does not support echo=True
382
+ tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG, NO_NEWLINES_TAG],
383
+ ),
309
384
  Model(
310
385
  group="together",
311
386
  creator_organization="Google",
@@ -323,12 +398,13 @@ ALL_MODELS = [
323
398
  NLG_PREFIX_TAG,
324
399
  ],
325
400
  ),
401
+ # H3 model
326
402
  Model(
327
- group="huggingface",
328
- creator_organization="OpenAI",
329
- name="huggingface/gpt2",
330
- display_name="GPT-2 (1.5B)",
331
- description="GPT-2 (1.5B parameters)",
403
+ group="together",
404
+ creator_organization="HazyResearch",
405
+ name="together/h3-2.7b",
406
+ display_name="H3 (2.7B)",
407
+ description="H3 (2.7B parameters) is a decoder-only language model based on state space models.",
332
408
  tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, GPT2_TOKENIZER_TAG],
333
409
  ),
334
410
  # OPT
@@ -480,7 +556,21 @@ ALL_MODELS = [
480
556
  description="Code model that is a stronger, multilingual version of the Codex (12B) model in the paper.",
481
557
  tags=[CODE_MODEL_TAG, GPT2_TOKENIZER_TAG],
482
558
  ),
483
- # ChatGPT - https://openai.com/blog/chatgpt
559
+ # ChatGPT: https://openai.com/blog/chatgpt
560
+ Model(
561
+ group="gpt3",
562
+ creator_organization="OpenAI",
563
+ name="openai/gpt-3.5-turbo-0301",
564
+ display_name="gpt-3.5-turbo-0301",
565
+ # https://platform.openai.com/docs/models/gpt-3-5
566
+ description="Sibling model of text-davinci-003 is optimized for chat but works well "
567
+ "for traditional completions tasks as well. Snapshot from 2023-03-01.",
568
+ # The claimed sequence length is 4096, but as of 2023-03-07, the empirical usable
569
+ # sequence length is smaller at 4087 with one user input message and one assistant
570
+ # output message because ChatGPT uses special tokens for message roles and boundaries.
571
+ # We use a rounded-down sequence length of 4000 to account for these special tokens.
572
+ tags=[TEXT_MODEL_TAG, WIDER_CONTEXT_WINDOW_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, GPT2_TOKENIZER_TAG],
573
+ ),
484
574
  Model(
485
575
  group="gpt3",
486
576
  creator_organization="OpenAI",
@@ -532,6 +622,14 @@ ALL_MODELS = [
532
622
  description="GPT-JT (6B parameters) is a fork of GPT-J",
533
623
  tags=[TEXT_MODEL_TAG, FULL_FUNCTIONALITY_TEXT_MODEL_TAG, GPTJ_TOKENIZER_TAG],
534
624
  ),
625
+ Model(
626
+ group="together",
627
+ creator_organization="Together",
628
+ name="together/gpt-neoxt-chat-base-20b",
629
+ display_name="GPT-NeoXT-Chat-Base (20B)",
630
+ description="GPT-NeoXT-Chat-Base (20B parameters) is a fork of GPT-NeoX",
631
+ tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, CHATML_MODEL_TAG, GPTNEO_TOKENIZER_TAG],
632
+ ),
535
633
  # Tsinghua
536
634
  Model(
537
635
  group="together",
@@ -557,6 +655,16 @@ ALL_MODELS = [
557
655
  # https://github.com/stanford-crfm/benchmarking/issues/738
558
656
  tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, ABLATION_MODEL_TAG],
559
657
  ),
658
+ # PaLM
659
+ Model(
660
+ group="google",
661
+ creator_organization="Google",
662
+ name="google/palm",
663
+ display_name="PaLM (540B)",
664
+ description="Pathways Language Model (540B parameters) is trained using 6144 TPU v4 chips "
665
+ "([paper](https://arxiv.org/pdf/2204.02311.pdf)).",
666
+ tags=[TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG],
667
+ ),
560
668
  # For debugging
561
669
  Model(
562
670
  group="simple",
helm/proxy/test_models.py CHANGED
@@ -24,4 +24,4 @@ def test_get_models_by_organization():
24
24
 
25
25
 
26
26
  def test_all_code_models():
27
- assert get_all_code_models() == ["openai/code-davinci-002", "openai/code-davinci-001", "openai/code-cushman-001"]
27
+ assert "openai/code-davinci-002" in get_all_code_models()
@@ -1,249 +0,0 @@
1
- import argparse
2
- import os
3
- import traceback
4
-
5
- from tqdm import tqdm
6
- from typing import List, Optional
7
-
8
- from helm.common.authentication import Authentication
9
- from helm.common.general import write_lines
10
- from helm.common.hierarchical_logger import hlog, htrack
11
- from helm.benchmark.run import run_benchmarking, add_run_args, validate_args, LATEST_SYMLINK
12
- from helm.benchmark.runner import RunSpec
13
- from helm.benchmark.presentation.run_entry import read_run_entries
14
- from helm.proxy.services.remote_service import add_service_args, create_authentication
15
-
16
- """
17
- Runs all the RunSpecs in run_specs.conf and outputs JSON files.
18
- TODO: rename this file to `run_all.py`
19
-
20
- Usage:
21
-
22
- venv/bin/helm-run
23
-
24
- """
25
-
26
-
27
- class AllRunner:
28
- """Runs all RunSpecs specified in the configuration file."""
29
-
30
- def __init__(
31
- self,
32
- auth: Authentication,
33
- conf_paths: List[str],
34
- url: str,
35
- local: bool,
36
- local_path: str,
37
- output_path: str,
38
- suite: str,
39
- num_threads: int,
40
- dry_run: Optional[bool],
41
- skip_instances: bool,
42
- max_eval_instances: Optional[int],
43
- num_train_trials: Optional[int],
44
- models_to_run: Optional[List[str]],
45
- groups_to_run: Optional[List[str]],
46
- exit_on_error: bool,
47
- priority: Optional[int],
48
- mongo_uri: str,
49
- ):
50
- self.auth: Authentication = auth
51
- self.conf_paths: List[str] = conf_paths
52
- self.url: str = url
53
- self.local: bool = local
54
- self.local_path: str = local_path
55
- self.output_path: str = output_path
56
- self.suite: str = suite
57
- self.num_threads: int = num_threads
58
- self.dry_run: Optional[bool] = dry_run
59
- self.skip_instances: bool = skip_instances
60
- self.max_eval_instances: Optional[int] = max_eval_instances
61
- self.num_train_trials: Optional[int] = num_train_trials
62
- self.models_to_run: Optional[List[str]] = models_to_run
63
- self.groups_to_run: Optional[List[str]] = groups_to_run
64
- self.exit_on_error: bool = exit_on_error
65
- self.priority: Optional[int] = priority
66
- self.mongo_uri = mongo_uri
67
-
68
- @htrack(None)
69
- def run(self):
70
- run_specs: List[RunSpec] = []
71
- runs_dir: str = os.path.join(self.output_path, "runs")
72
- suite_dir: str = os.path.join(runs_dir, self.suite)
73
-
74
- run_entries = read_run_entries(self.conf_paths)
75
-
76
- for entry in tqdm(run_entries.entries):
77
- # Filter by priority
78
- priority: int = entry.priority
79
- if self.priority is not None and priority > self.priority:
80
- continue
81
-
82
- try:
83
- new_run_specs = run_benchmarking(
84
- run_spec_descriptions=[entry.description],
85
- auth=self.auth,
86
- url=self.url,
87
- local=self.local,
88
- local_path=self.local_path,
89
- num_threads=self.num_threads,
90
- output_path=self.output_path,
91
- suite=self.suite,
92
- dry_run=self.dry_run,
93
- skip_instances=self.skip_instances,
94
- max_eval_instances=self.max_eval_instances,
95
- num_train_trials=self.num_train_trials,
96
- groups=entry.groups,
97
- models_to_run=self.models_to_run,
98
- groups_to_run=self.groups_to_run,
99
- mongo_uri=self.mongo_uri,
100
- )
101
- run_specs.extend(new_run_specs)
102
-
103
- except Exception as e:
104
- if self.exit_on_error:
105
- raise e
106
- else:
107
- hlog(f"Error when running {entry.description}:\n{traceback.format_exc()}")
108
-
109
- if len(run_specs) == 0:
110
- hlog("There were no RunSpecs or they got filtered out.")
111
- return
112
-
113
- hlog(f"{len(run_entries.entries)} entries produced into {len(run_specs)} run specs")
114
-
115
- if self.skip_instances:
116
- self.write_parallel_commands(suite_dir, run_specs)
117
-
118
- # Create a symlink runs/latest -> runs/<name_of_suite>,
119
- # so runs/latest always points to the latest run suite.
120
- symlink_path: str = os.path.abspath(os.path.join(runs_dir, LATEST_SYMLINK))
121
- if os.path.islink(symlink_path):
122
- # Remove the previous symlink if it exists.
123
- os.unlink(symlink_path)
124
- os.symlink(os.path.abspath(suite_dir), symlink_path)
125
-
126
- def write_parallel_commands(self, suite_dir: str, run_specs: List[RunSpec]):
127
- """
128
- Print out scripts to run after.
129
- """
130
- # Print out all the models and groups that we're touching.
131
- models = set()
132
- groups = set()
133
- for run_spec in run_specs:
134
- models.add(run_spec.adapter_spec.model)
135
- for group in run_spec.groups:
136
- groups.add(group)
137
- hlog(f"{len(models)} models: {' '.join(models)}")
138
- hlog(f"{len(groups)} groups: {' '.join(groups)}")
139
-
140
- # Write wrapper for helm-run that can be used through Slurm
141
- lines = [
142
- "#!/bin/bash",
143
- "",
144
- ". venv/bin/activate",
145
- 'helm-run "$@"',
146
- ]
147
- write_lines(os.path.join(suite_dir, "helm-run.sh"), lines)
148
-
149
- # Write out bash script for launching the entire benchmark
150
- lines = []
151
- for model in models:
152
- for group in groups:
153
- # Try to match the arguments of `run_benchmarking`
154
- # Build arguments
155
- present_args = []
156
- present_args.append(f"--confs {' '.join(self.conf_paths)}")
157
- if self.local:
158
- present_args.append("--local")
159
- present_args.append(f"--num-threads {self.num_threads}")
160
- present_args.append(f"--suite {self.suite}")
161
- if self.max_eval_instances is not None:
162
- present_args.append(f"--max-eval-instances {self.max_eval_instances}")
163
- present_args.append(f"--models-to-run {model}")
164
- present_args.append(f"--scenario-groups-to-run {group}")
165
-
166
- lines.append(
167
- f"sbatch --partition john "
168
- f"--cpus {self.num_threads} "
169
- f"-o benchmark_output/runs/{self.suite}/slurm-%j.out "
170
- f"{suite_dir}/helm-run.sh "
171
- f"{' '.join(present_args)}"
172
- )
173
- lines.append("echo '# Run these after Slurm jobs terminate'")
174
- lines.append(f"echo 'helm-run --local --suite {self.suite} --skip-instances'")
175
- lines.append(f"echo 'helm-summarize --suite {self.suite}'")
176
- write_lines(os.path.join(suite_dir, "run-all.sh"), lines)
177
-
178
-
179
- def main():
180
- parser = argparse.ArgumentParser()
181
- add_service_args(parser)
182
- parser.add_argument(
183
- "-c",
184
- "--conf-paths",
185
- nargs="+",
186
- help="Where to read RunSpecs to run from",
187
- default=["src/helm/benchmark/presentation/run_specs.conf"],
188
- )
189
- parser.add_argument(
190
- "--models-to-run",
191
- nargs="+",
192
- help="Only RunSpecs with these models specified. If no model is specified, runs with all models.",
193
- default=None,
194
- )
195
- parser.add_argument(
196
- "--groups-to-run",
197
- nargs="+",
198
- help="Only RunSpecs with these (scenario) groups specified. " "If no group is specified, runs with all groups.",
199
- default=None,
200
- )
201
- parser.add_argument(
202
- "--exit-on-error",
203
- action="store_true",
204
- default=None,
205
- help="Fail and exit immediately if a particular RunSpec fails.",
206
- )
207
- parser.add_argument(
208
- "--priority",
209
- type=int,
210
- default=None,
211
- help="Run RunSpecs with priority less than or equal to this number. "
212
- "If a value for --priority is not specified, run on everything",
213
- )
214
- add_run_args(parser)
215
- args = parser.parse_args()
216
- validate_args(args)
217
-
218
- runner = AllRunner(
219
- # Use a dummy API key when `skip_instances` or `local` is set.
220
- # The benchmarking framework will not make any requests to the proxy server when
221
- # `skip_instances` is set, so a valid API key is not necessary.
222
- # Setting `local` will run and cache everything locally.
223
- auth=Authentication("") if args.skip_instances or args.local else create_authentication(args),
224
- conf_paths=args.conf_paths,
225
- url=args.server_url,
226
- local=args.local,
227
- local_path=args.local_path,
228
- output_path=args.output_path,
229
- suite=args.suite,
230
- num_threads=args.num_threads,
231
- dry_run=args.dry_run,
232
- skip_instances=args.skip_instances,
233
- max_eval_instances=args.max_eval_instances,
234
- num_train_trials=args.num_train_trials,
235
- models_to_run=args.models_to_run,
236
- groups_to_run=args.groups_to_run,
237
- exit_on_error=args.exit_on_error,
238
- priority=args.priority,
239
- mongo_uri=args.mongo_uri,
240
- )
241
-
242
- # Run the benchmark!
243
- runner.run()
244
-
245
- hlog("Done.")
246
-
247
-
248
- if __name__ == "__main__":
249
- main()