PyPI - crfm-helm - Versions diffs - 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (546) hide show

helm/benchmark/presentation/test_run_entry.py CHANGED Viewed

@@ -1,16 +1,21 @@
 import os
+import pytest
 from helm.common.object_spec import parse_object_spec
 from helm.benchmark.presentation.run_entry import read_run_entries
-from helm.benchmark.run_specs import construct_run_specs
-from helm.benchmark import vlm_run_specs  # noqa
+from helm.benchmark.run_spec_factory import construct_run_specs
-def test_read_all_specs():
-    """Read all the run entries and make sure they parse and we can instantiate them."""
+def list_fnames():
     base_path = os.path.dirname(__file__)
-    for fname in os.listdir(base_path):
-        if fname.endswith(".conf"):
-            run_entries = read_run_entries([os.path.join(base_path, fname)])
-            for entry in run_entries.entries:
-                construct_run_specs(parse_object_spec(entry.description))
+    return [os.path.join(base_path, fname) for fname in os.listdir(base_path) if fname.endswith(".conf")]
+class TestRunEntry:
+    """Read all the run entries and make sure they parse and we can instantiate them."""
+    @pytest.mark.parametrize("fname", list_fnames())
+    def test_read_all_specs(self, fname: str):
+        run_entries = read_run_entries([fname])
+        for entry in run_entries.entries:
+            construct_run_specs(parse_object_spec(entry.description))

helm/benchmark/presentation/test_summarize.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import tempfile
 from helm.benchmark.presentation.summarize import Summarizer
+from helm.benchmark.presentation.schema import get_default_schema_path
 from helm.common.general import ensure_directory_exists
@@ -12,9 +13,11 @@ def test_summarize_suite():
             release=None,
             suites=None,
             suite="test_suite",
+            schema_path=get_default_schema_path(),
             output_path=output_path,
             verbose=False,
             num_threads=4,
+            allow_unknown_models=True,
         )
         summarizer.run_pipeline(skip_completed=True, num_instances=1000)
         assert os.path.isfile(os.path.join(output_path, "runs", "test_suite", "groups.json"))
@@ -28,9 +31,11 @@ def test_summarize_release():
             release="test_release",
             suites=["test_suite_1", "test_suite_2"],
             suite=None,
+            schema_path=get_default_schema_path(),
             output_path=output_path,
             verbose=False,
             num_threads=4,
+            allow_unknown_models=True,
         )
         summarizer.run_pipeline(skip_completed=True, num_instances=1000)
         assert os.path.isfile(os.path.join(output_path, "releases", "test_release", "groups.json"))

helm/benchmark/run.py CHANGED Viewed

@@ -1,25 +1,26 @@
 import argparse
 from dataclasses import replace
+import os
 from typing import List, Optional
-from helm.benchmark.huggingface_registration import (
-    register_huggingface_hub_model_from_flag_value,
-    register_huggingface_local_model_from_flag_value,
-)
 from helm.benchmark.presentation.run_entry import RunEntry, read_run_entries
+from helm.common.cache_backend_config import MongoCacheBackendConfig, SqliteCacheBackendConfig
+from helm.common.general import ensure_directory_exists
 from helm.common.hierarchical_logger import hlog, htrack, htrack_block
 from helm.common.authentication import Authentication
 from helm.common.object_spec import parse_object_spec, get_class_by_name
-from helm.proxy.clients.remote_model_registry import check_and_register_remote_model
 from helm.proxy.services.remote_service import create_authentication, add_service_args
+from helm.proxy.services.service import CACHE_DIR
-from helm.benchmark.model_metadata_registry import register_model_metadata_from_path
-from helm.benchmark.model_deployment_registry import register_model_deployments_from_path
+from helm.benchmark.config_registry import (
+    register_configs_from_directory,
+    register_builtin_configs_from_helm_package,
+)
 from helm.benchmark.adaptation.adapter_spec import AdapterSpec
-from helm.benchmark import vlm_run_specs  # noqa
-from .executor import ExecutionSpec
-from .runner import Runner, RunSpec, LATEST_SYMLINK
-from .run_specs import construct_run_specs
+from helm.benchmark.executor import ExecutionSpec
+from helm.benchmark.runner import Runner, RunSpec, LATEST_SYMLINK, set_benchmark_output_path
+from helm.benchmark.run_spec_factory import construct_run_specs
 def run_entries_to_run_specs(
@@ -83,16 +84,29 @@ def run_benchmarking(
     skip_completed_runs: bool,
     exit_on_error: bool,
     runner_class_name: Optional[str],
-    mongo_uri: str = "",
+    mongo_uri: Optional[str] = None,
+    disable_cache: Optional[bool] = None,
 ) -> List[RunSpec]:
     """Runs RunSpecs given a list of RunSpec descriptions."""
+    sqlite_cache_backend_config: Optional[SqliteCacheBackendConfig] = None
+    mongo_cache_backend_config: Optional[MongoCacheBackendConfig] = None
+    if not disable_cache:
+        if mongo_uri:
+            mongo_cache_backend_config = MongoCacheBackendConfig(mongo_uri)
+        else:
+            sqlite_cache_path = os.path.join(local_path, CACHE_DIR)
+            ensure_directory_exists(sqlite_cache_path)
+            sqlite_cache_backend_config = SqliteCacheBackendConfig(sqlite_cache_path)
     execution_spec = ExecutionSpec(
         auth=auth,
         url=url,
         local_path=local_path,
         parallelism=num_threads,
         dry_run=dry_run,
-        mongo_uri=mongo_uri,
+        sqlite_cache_backend_config=sqlite_cache_backend_config,
+        mongo_cache_backend_config=mongo_cache_backend_config,
     )
     with htrack_block("run_specs"):
         for run_spec in run_specs:
@@ -158,13 +172,6 @@ def add_run_args(parser: argparse.ArgumentParser):
         help="Name of the suite this run belongs to (default is today's date).",
         required=True,
     )
-    parser.add_argument(
-        "--local",
-        action="store_true",
-        help="DEPRECATED: Does nothing. Do not use. Previously enabled local mode. "
-        "Now does nothing and will be removed in the next released version. "
-        "Local mode is enabled by default, and only disabled if the --server_url flag is set.",
-    )
     parser.add_argument(
         "--local-path",
         type=str,
@@ -177,6 +184,11 @@ def add_run_args(parser: argparse.ArgumentParser):
         help="If non-empty, the URL of the MongoDB database that will be used for caching instead of SQLite",
         default="",
     )
+    parser.add_argument(
+        "--disable-cache",
+        action="store_true",
+        help="If true, the request-response cache for model clients and tokenizers will be disabled.",
+    )
 def validate_args(args):
@@ -225,7 +237,14 @@ def main():
         help="Run RunSpecs with priority less than or equal to this number. "
         "If a value for --priority is not specified, run on everything",
     )
-    parser.add_argument("-r", "--run-specs", nargs="*", help="Specifies what to run", default=[])
+    parser.add_argument(
+        "--run-specs",
+        nargs="*",
+        help="DEPRECATED: Use --run-entries instead. Will be removed in a future release. "
+        "Specifies run entries to run.",
+        default=[],
+    )
+    parser.add_argument("-r", "--run-entries", nargs="*", help="Specifies run entries to run", default=[])
     parser.add_argument(
         "--enable-huggingface-models",
         nargs="+",
@@ -239,55 +258,48 @@ def main():
         default=[],
         help="Experimental: Enable using AutoModelForCausalLM models from a local path.",
     )
-    parser.add_argument(
-        "--enable-remote-models",
-        nargs="+",
-        default=[],
-        help="Experimental: Enable remote service models that are not available on the client. "
-        "The client will use RemoteWindowService for windowing.",
-    )
     parser.add_argument(
         "--runner-class-name",
         type=str,
         default=None,
         help="Full class name of the Runner class to use. If unset, uses the default Runner.",
     )
-    parser.add_argument(
-        "--model-metadata-paths",
-        nargs="+",
-        help="Experimental: Where to read model metadata from",
-        default=[],
-    )
-    parser.add_argument(
-        "--model-deployment-paths",
-        nargs="+",
-        help="Experimental: Where to read model deployments from",
-        default=[],
-    )
     add_run_args(parser)
     args = parser.parse_args()
     validate_args(args)
-    for huggingface_model_name in args.enable_huggingface_models:
-        register_huggingface_hub_model_from_flag_value(huggingface_model_name)
-    for huggingface_model_path in args.enable_local_huggingface_models:
-        register_huggingface_local_model_from_flag_value(huggingface_model_path)
-    for model_metadata_path in args.model_metadata_paths:
-        register_model_metadata_from_path(model_metadata_path)
-    for model_deployment_paths in args.model_deployment_paths:
-        register_model_deployments_from_path(model_deployment_paths)
+    register_builtin_configs_from_helm_package()
+    register_configs_from_directory(args.local_path)
+    if args.enable_huggingface_models:
+        from helm.benchmark.huggingface_registration import register_huggingface_hub_model_from_flag_value
+        for huggingface_model_name in args.enable_huggingface_models:
+            register_huggingface_hub_model_from_flag_value(huggingface_model_name)
+    if args.enable_local_huggingface_models:
+        from helm.benchmark.huggingface_registration import register_huggingface_local_model_from_flag_value
-    if args.server_url and args.enable_remote_models:
-        check_and_register_remote_model(args.server_url, args.enable_remote_models)
+        for huggingface_model_path in args.enable_local_huggingface_models:
+            register_huggingface_local_model_from_flag_value(huggingface_model_path)
     run_entries: List[RunEntry] = []
     if args.conf_paths:
         run_entries.extend(read_run_entries(args.conf_paths).entries)
+    if args.run_entries:
+        run_entries.extend(
+            [RunEntry(description=description, priority=1, groups=None) for description in args.run_entries]
+        )
+    # TODO: Remove this eventually.
     if args.run_specs:
         run_entries.extend(
             [RunEntry(description=description, priority=1, groups=None) for description in args.run_specs]
         )
+    # Must set benchmark output path before getting RunSpecs,
+    # because run spec functions can use the benchmark output directory for caching.
+    ensure_directory_exists(args.output_path)
+    set_benchmark_output_path(args.output_path)
     run_specs = run_entries_to_run_specs(
         run_entries=run_entries,
         max_eval_instances=args.max_eval_instances,
@@ -322,13 +334,13 @@ def main():
         exit_on_error=args.exit_on_error,
         runner_class_name=args.runner_class_name,
         mongo_uri=args.mongo_uri,
+        disable_cache=args.disable_cache,
     )
-    if args.local:
+    if args.run_specs:
         hlog(
-            "WARNING: The --local flag is deprecated. It now does nothing and will be removed in "
-            "the next released version. Local mode is enabled by default, and only disabled if the "
-            "--server_url flag is set. Please remove --local from your command."
+            "WARNING: The --run-specs flag is deprecated and will be removed in a future release. "
+            "Use --run-entries instead."
         )
     hlog("Done.")

crfm-helm 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

crfm-helm 0.3.0py3-none-any.whl → 0.5.0py3-none-any.whl