PyPI - crfm-helm - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (546) hide show

helm/benchmark/slurm_runner.py CHANGED Viewed

@@ -10,6 +10,10 @@ import sys
 from helm.common.codec import from_json, to_json
 from helm.common.general import write
+from helm.benchmark.config_registry import (
+    register_configs_from_directory,
+    register_builtin_configs_from_helm_package,
+)
 from helm.benchmark.executor import ExecutionSpec
 from helm.benchmark.runner import Runner, RunSpec, RunnerError
 from helm.benchmark.slurm_jobs import (
@@ -24,10 +28,11 @@ from helm.benchmark.slurm_jobs import (
 from helm.common.general import ensure_directory_exists
 from helm.common.hierarchical_logger import hlog, htrack_block
+from helm.benchmark.runner_config_registry import RUNNER_CONFIG
-_DEFAULT_MAX_CONCURRENT_WORKER_SLURM_JOBS = 8
 _MAX_CONCURRENT_WORKER_SLURM_JOBS_ENV_NAME = "HELM_MAX_CONCURRENT_WORKER_SLURM_JOBS"
 _SLURM_NODE_NAMES_ENV_NAME = "HELM_SLURM_NODE_NAMES"
+_DEFAULT_MAX_CONCURRENT_WORKER_SLURM = 8
 @dataclass
@@ -89,12 +94,15 @@ class SlurmRunner(Runner):
         self.slurm_runner_spec_path = os.path.join(self.slurm_base_dir, "slurm_runner_spec.json")
         # Configure max concurrent worker Slurm jobs from the environment variable.
-        # TODO: Read from a configuration file instead
         env_max_concurrent_worker_slurm_jobs = os.getenv(_MAX_CONCURRENT_WORKER_SLURM_JOBS_ENV_NAME)
         self.max_concurrent_worker_slurm_jobs = (
             int(env_max_concurrent_worker_slurm_jobs)
             if env_max_concurrent_worker_slurm_jobs
-            else _DEFAULT_MAX_CONCURRENT_WORKER_SLURM_JOBS
+            else (
+                RUNNER_CONFIG.helm_max_concurrent_workers
+                if RUNNER_CONFIG.helm_max_concurrent_workers > 0
+                else _DEFAULT_MAX_CONCURRENT_WORKER_SLURM
+            )
         )
     def run_all(self, run_specs: List[RunSpec]):
@@ -131,7 +139,7 @@ class SlurmRunner(Runner):
         # When running with multiple models, sorting by RunSpec.name is a heuristic that tries to
         # spread out the load evenly across multiple models, in order to avoid overloading any single model.
         for run_spec in sorted(run_specs, key=lambda run_spec: run_spec.name):
-            if self.skip_completed_runs and self._is_run_completed(run_spec):
+            if self.skip_completed_runs and self._is_run_completed(self._get_run_path(run_spec)):
                 skipped_run_specs.append(run_spec)
             else:
                 queued_run_specs.append(run_spec)
@@ -150,6 +158,9 @@ class SlurmRunner(Runner):
         # Info for all worker Slurm jobs
         run_name_to_slurm_job_info: Dict[str, _SlurmJobInfo] = {}
+        # Location to persist the info for all worker Slurm jobs
+        worker_slurm_jobs_path = os.path.join(self.slurm_base_dir, "worker_slurm_jobs.json")
         # Callback for cleaning up worker Slurm jobs
         def cancel_all_jobs():
             """Cancels all submitted worker Slurm jobs that are in a non-terminal state."""
@@ -159,6 +170,11 @@ class SlurmRunner(Runner):
                     if slurm_job_info.state not in TERMINAL_SLURM_JOB_STATES:
                         hlog(f"Cancelling worker Slurm job run {run_name} with Slurm job ID {slurm_job_info.id}")
                         cancel_slurm_job(slurm_job_info.id)
+                        slurm_job_info.state = SlurmJobState.CANCELLED
+            run_name_to_slurm_job_info_json = to_json(run_name_to_slurm_job_info)
+            hlog(f"Worker Slurm jobs: {run_name_to_slurm_job_info_json}")
+            hlog(f"Writing worker Slurm job states to {worker_slurm_jobs_path}")
+            write(file_path=worker_slurm_jobs_path, content=run_name_to_slurm_job_info_json)
         try:
             # Monitor submitted Slurm jobs for RunSpecs until an exit condition is triggered.
@@ -190,7 +206,6 @@ class SlurmRunner(Runner):
                     for slurm_job_info in run_name_to_slurm_job_info.values():
                         if slurm_job_info.state not in TERMINAL_SLURM_JOB_STATES:
                             slurm_job_info.state = get_slurm_job_state(slurm_job_info.id)
-                    worker_slurm_jobs_path = os.path.join(self.slurm_base_dir, "worker_slurm_jobs.json")
                     run_name_to_slurm_job_info_json = to_json(run_name_to_slurm_job_info)
                     hlog(f"Worker Slurm jobs: {run_name_to_slurm_job_info_json}")
                     hlog(f"Writing worker Slurm job states to {worker_slurm_jobs_path}")
@@ -215,8 +230,7 @@ class SlurmRunner(Runner):
                         break
                     # Refresh every minute
-                    # TODO: Make this period configurable
-                    time.sleep(60)
+                    time.sleep(RUNNER_CONFIG.slurm_monitor_interval)
         finally:
             # Cleanup by cancelling all jobs during program termination or if an exception is raised.
             cancel_all_jobs()
@@ -254,34 +268,48 @@ class SlurmRunner(Runner):
                 run_spec_path,
             ]
         )
-        # TODO: Make default Slurm arguments configurable.
-        raw_slurm_args: Dict[str, str] = {
-            "account": "nlp",
-            "cpus_per_task": "4",
-            "mem": "32G",
-            "gres": "gpu:0",
-            "open_mode": "append",
-            "partition": "john",
-            "time": "14-0",  # Deadline of 14 days
-            "mail_type": "FAIL",
-            "job_name": run_name,
-            "output": log_path,
-            "chdir": os.getcwd(),
-        }
-        # TODO: Move resource requirements into RunSpec.
-        slurm_node_names = os.getenv(_SLURM_NODE_NAMES_ENV_NAME)
-        if run_spec.name.startswith("msmarco:"):
-            raw_slurm_args["mem"] = "64G"
-        if "device=cuda" in run_spec.name:
-            raw_slurm_args["gres"] = "gpu:1"
-            raw_slurm_args["partition"] = "jag-hi"
-        if "model=huggingface" in run_spec.name:
-            raw_slurm_args["gres"] = "gpu:1"
-            raw_slurm_args["partition"] = "sphinx"
-            if not slurm_node_names or "sphinx" not in slurm_node_names:
-                raise Exception(f"Environment variable {_SLURM_NODE_NAMES_ENV_NAME} must be set to sphinx node names")
-        if slurm_node_names:
-            raw_slurm_args["nodelist"] = slurm_node_names
+        if RUNNER_CONFIG.slurm_args is None:
+            raw_slurm_args: Dict[str, str] = {
+                "account": "nlp",
+                "cpus_per_task": "4",
+                "mem": "32G",
+                "gres": "gpu:0",
+                "open_mode": "append",
+                "partition": "john",
+                "time": "14-0",  # Deadline of 14 days
+                "mail_type": "FAIL",
+                "job_name": run_name,
+                "output": log_path,
+                "chdir": os.getcwd(),
+            }
+            # TODO: Move resource requirements into RunSpec.
+            slurm_node_names = os.getenv(_SLURM_NODE_NAMES_ENV_NAME)
+            if run_spec.name.startswith("msmarco:"):
+                raw_slurm_args["mem"] = "64G"
+            if "device=cuda" in run_spec.name:
+                raw_slurm_args["gres"] = "gpu:1"
+                raw_slurm_args["partition"] = "jag-hi"
+            if "model=huggingface" in run_spec.name:
+                raw_slurm_args["gres"] = "gpu:1"
+                raw_slurm_args["partition"] = "sphinx"
+                if not slurm_node_names or "sphinx" not in slurm_node_names:
+                    raise Exception(
+                        f"Environment variable {_SLURM_NODE_NAMES_ENV_NAME} must be set to sphinx node names"
+                    )
+            if slurm_node_names:
+                raw_slurm_args["nodelist"] = slurm_node_names
+        else:
+            raw_slurm_args = RUNNER_CONFIG.slurm_args
+            dynamic_slurm_args = {
+                "job_name": run_name,
+                "output": log_path,
+                "chdir": os.getcwd(),
+            }
+            # User should not set these manually, overwrite them if necessary
+            raw_slurm_args.update(dynamic_slurm_args)
         slurm_args: Dict[str, str] = {key: shlex.quote(value) for key, value in raw_slurm_args.items()}
         # Uncomment this to get notification emails from Slurm for Slurm worker jobs.
@@ -293,27 +321,15 @@ class SlurmRunner(Runner):
         return slurm_job_id
-def run_as_worker(slurm_runner_spec_path: str, run_spec_path: str):
-    """Deserialize SlurmRunner and RunSpec from the given files, then run the RunSpec with the SlurmRunner.
-    Used by the worker Slurm jobs only."""
-    with open(slurm_runner_spec_path, "r") as f:
-        slurm_runner_spec = from_json(f.read(), SlurmRunnerSpec)
-    with open(run_spec_path, "r") as f:
-        run_spec = from_json(f.read(), RunSpec)
-    slurm_runner = SlurmRunner(**slurm_runner_spec.to_kwargs())
-    slurm_runner.run_one(run_spec)
 def main():
     """Entry point for the SlurmRunner's worker Slurm jobs that run a single RunSpec.
     This entry point should only be used by SlurmRunner. Users should use `helm-run` instead.
     SlurmRunner has to use this entry point instead of helm-run because there is no way to
     specify the worker Slurm job parameters through `helm-run`. In particular, there is no way
-    to run a specific `RunSpec` using the `--run-specs` parameter of `helm-run`, because the
-    `run-specs` argument is a `RunSpec` description (not a `RunSpec`), and there is no way to
-    convert a `RunSpec` into a `RunSpec` description."""
+    to run a specific `RunSpec` using the `--run-entries` parameter of `helm-run`, because the
+    `run-entries` argument contains `RunEntry` description (not `RunSpec`s), and there is no way to
+    convert a `RunSpec` into a `RunEntry` description."""
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--slurm-runner-spec-path",
@@ -328,7 +344,19 @@ def main():
         required=True,
     )
     args = parser.parse_args()
-    run_as_worker(slurm_runner_spec_path=args.slurm_runner_spec_path, run_spec_path=args.run_spec_path)
+    # Deserialize SlurmRunner and RunSpec from the given files, then run the RunSpec with the SlurmRunner.
+    with open(args.slurm_runner_spec_path, "r") as f:
+        slurm_runner_spec = from_json(f.read(), SlurmRunnerSpec)
+    with open(args.run_spec_path, "r") as f:
+        run_spec = from_json(f.read(), RunSpec)
+    register_builtin_configs_from_helm_package()
+    if slurm_runner_spec.execution_spec.local_path is not None:
+        register_configs_from_directory(slurm_runner_spec.execution_spec.local_path)
+    slurm_runner = SlurmRunner(**slurm_runner_spec.to_kwargs())
+    slurm_runner.run_one(run_spec)
 if __name__ == "__main__":

helm/benchmark/static/benchmarking.js CHANGED Viewed

@@ -92,7 +92,7 @@ $(function () {
     $table.append($header);
     schema.run_groups.forEach((group) => {
-      if (group.category) {
+      if (group.category && group.category !== "Scenarios") {
         return;
       }
       const href = groupUrl(group.name);
@@ -492,7 +492,7 @@ $(function () {
               {{~#if perturbation~}}
                 {{highlightNewWords input.text ../unperturbedInstance.input.text}}
               {{~else~}}
-                {{input.text}}
+                {{{input.text}}}
               {{~/if~}}
             </div>
           {{/if}}
@@ -1595,8 +1595,7 @@ $(function () {
     window.SUITE = urlParams.suite;
   }
-  const schemaPromise = $.get("schema.yaml", {}, (response) => {
-    const raw = jsyaml.load(response);
+  const schemaPromise = $.getJSON(schemaJsonUrl(), {}, (raw) => {
     console.log("schema", raw);
     schema = new Schema(raw);
   });

helm/benchmark/static/contamination.yaml CHANGED Viewed

@@ -44,6 +44,7 @@ points:
     - anthropic/stanford-online-all-v4-s3
     - anthropic/claude-v1.3
     - anthropic/claude-instant-v1
+    - anthropic/claude-instant-1.2
     groups:
     - the_pile
     level: strong
@@ -83,7 +84,6 @@ points:
     - openai/code-davinci-002
     - openai/code-davinci-001
     - openai/code-cushman-001
-    - openai/chat-gpt
     groups:
     - natural_qa_closedbook
     - natural_qa_openbook_longans

helm/benchmark/static/images/organizations/together.png CHANGED Viewed

Binary file

helm/benchmark/static/json-urls.js CHANGED Viewed

@@ -8,6 +8,10 @@ function versionBaseUrl() {
   }
 }
+function schemaJsonUrl() {
+  return `${versionBaseUrl()}/schema.json`;
+}
 function summaryJsonUrl() {
   return `${versionBaseUrl()}/summary.json`;
 }

crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl