PyPI - crfm-helm - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

crfm-helm 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (125) hide show

{crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/METADATA +19 -5
{crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/RECORD +121 -76
helm/benchmark/adaptation/adapter_spec.py +32 -31
helm/benchmark/adaptation/adapters/multimodal/in_context_learning_multimodal_adapter.py +1 -0
helm/benchmark/adaptation/adapters/multimodal/multimodal_prompt.py +7 -0
helm/benchmark/adaptation/adapters/multimodal/test_multimodal_prompt.py +2 -0
helm/benchmark/annotation/air_bench_annotator.py +64 -0
helm/benchmark/annotation/annotator_factory.py +6 -0
helm/benchmark/annotation/image2structure/lilypond_compiler_annotator.py +1 -1
helm/benchmark/annotation/live_qa_annotator.py +84 -0
helm/benchmark/annotation/medication_qa_annotator.py +81 -0
helm/benchmark/augmentations/perturbation.py +17 -1
helm/benchmark/augmentations/test_perturbation.py +30 -0
helm/benchmark/augmentations/translate_perturbation.py +1 -0
helm/benchmark/huggingface_registration.py +16 -6
helm/benchmark/metrics/air_bench_metrics.py +56 -0
helm/benchmark/metrics/efficiency_metrics.py +9 -2
helm/benchmark/metrics/evaluate_reference_metrics.py +16 -0
helm/benchmark/metrics/fin_qa_metrics.py +60 -0
helm/benchmark/metrics/fin_qa_metrics_helper.py +398 -0
helm/benchmark/metrics/gpt4v_originality_critique_metrics.py +126 -0
helm/benchmark/metrics/instruction_following_critique_metrics.py +1 -0
helm/benchmark/metrics/live_qa_metrics.py +23 -0
helm/benchmark/metrics/medication_qa_metrics.py +23 -0
helm/benchmark/metrics/prometheus_vision_critique_metrics.py +185 -0
helm/benchmark/metrics/reka_vibe_critique_metrics.py +158 -0
helm/benchmark/metrics/unitxt_metrics.py +20 -10
helm/benchmark/metrics/vision_language/emd_utils.py +4 -0
helm/benchmark/metrics/vision_language/image_metrics.py +104 -21
helm/benchmark/model_metadata_registry.py +5 -1
helm/benchmark/presentation/schema.py +54 -4
helm/benchmark/presentation/test_schema.py +11 -0
helm/benchmark/run.py +16 -2
helm/benchmark/run_expander.py +112 -63
helm/benchmark/run_spec_factory.py +15 -10
helm/benchmark/run_specs/air_bench_run_specs.py +40 -0
helm/benchmark/run_specs/classic_run_specs.py +15 -11
helm/benchmark/run_specs/decodingtrust_run_specs.py +3 -1
helm/benchmark/run_specs/experimental_run_specs.py +33 -0
helm/benchmark/run_specs/finance_run_specs.py +33 -0
helm/benchmark/run_specs/vlm_run_specs.py +444 -65
helm/benchmark/scenarios/air_bench_scenario.py +50 -0
helm/benchmark/scenarios/ci_mcqa_scenario.py +80 -0
helm/benchmark/scenarios/entity_data_imputation_scenario.py +8 -2
helm/benchmark/scenarios/fin_qa_scenario.py +117 -0
helm/benchmark/scenarios/legalbench_scenario.py +6 -2
helm/benchmark/scenarios/math_scenario.py +1 -1
helm/benchmark/scenarios/test_air_bench_scenario.py +27 -0
helm/benchmark/scenarios/vision_language/a_okvqa_scenario.py +83 -0
helm/benchmark/scenarios/vision_language/bingo_scenario.py +3 -3
helm/benchmark/scenarios/vision_language/crossmodal_3600_scenario.py +134 -0
helm/benchmark/scenarios/vision_language/flickr30k_scenario.py +74 -0
helm/benchmark/scenarios/vision_language/gqa_scenario.py +91 -0
helm/benchmark/scenarios/vision_language/hateful_memes_scenario.py +4 -2
helm/benchmark/scenarios/vision_language/image2structure/image2structure_scenario.py +13 -2
helm/benchmark/scenarios/vision_language/image2structure/latex_scenario.py +1 -5
helm/benchmark/scenarios/vision_language/image2structure/musicsheet_scenario.py +1 -5
helm/benchmark/scenarios/vision_language/image2structure/webpage_scenario.py +5 -3
helm/benchmark/scenarios/vision_language/math_vista_scenario.py +117 -0
helm/benchmark/scenarios/vision_language/mm_safety_bench_scenario.py +103 -0
helm/benchmark/scenarios/vision_language/mscoco_captioning_scenario.py +92 -0
helm/benchmark/scenarios/vision_language/mscoco_categorization_scenario.py +117 -0
helm/benchmark/scenarios/vision_language/originality_scenario.py +35 -0
helm/benchmark/scenarios/vision_language/pairs_scenario.py +247 -0
helm/benchmark/scenarios/vision_language/unicorn_scenario.py +3 -3
helm/benchmark/scenarios/vision_language/vibe_eval_scenario.py +95 -0
helm/benchmark/scenarios/vision_language/viz_wiz_scenario.py +2 -2
helm/benchmark/scenarios/vision_language/vqa_scenario.py +4 -2
helm/benchmark/static/schema_air_bench.yaml +3149 -0
helm/benchmark/static/schema_classic.yaml +3 -59
helm/benchmark/static/schema_finance.yaml +143 -0
helm/benchmark/static/schema_image2structure.yaml +447 -0
helm/benchmark/static/schema_instruction_following.yaml +3 -52
helm/benchmark/static/schema_lite.yaml +3 -61
helm/benchmark/static/schema_medical.yaml +255 -0
helm/benchmark/static/schema_mmlu.yaml +3 -61
helm/benchmark/static/schema_tables.yaml +200 -0
helm/benchmark/static/schema_thai.yaml +223 -0
helm/benchmark/static/schema_unitxt.yaml +3 -61
helm/benchmark/static/schema_vhelm.yaml +824 -0
helm/benchmark/static/schema_vhelm_lite.yaml +109 -0
helm/benchmark/static_build/assets/air-overview-d2e6c49f.png +0 -0
helm/benchmark/static_build/assets/index-30dbceba.js +10 -0
helm/benchmark/static_build/assets/index-66b02d40.css +1 -0
helm/benchmark/static_build/assets/overview-74aea3d8.png +0 -0
helm/benchmark/static_build/assets/process-flow-bd2eba96.png +0 -0
helm/benchmark/static_build/index.html +2 -2
helm/clients/anthropic_client.py +78 -14
helm/clients/auto_client.py +11 -0
helm/clients/client.py +24 -7
helm/clients/cohere_client.py +98 -3
helm/clients/huggingface_client.py +71 -12
helm/clients/openai_client.py +11 -5
helm/clients/reka_client.py +189 -0
helm/clients/test_client.py +3 -3
helm/clients/test_huggingface_client.py +19 -3
helm/clients/test_together_client.py +72 -2
helm/clients/together_client.py +199 -2
helm/clients/vertexai_client.py +117 -64
helm/clients/vision_language/huggingface_vision2seq_client.py +145 -0
helm/clients/vision_language/huggingface_vlm_client.py +12 -4
helm/clients/vision_language/idefics_client.py +2 -2
helm/clients/vision_language/paligemma_client.py +146 -0
helm/clients/vision_language/palmyra_vision_client.py +84 -0
helm/clients/yi_client.py +31 -0
helm/common/critique_request.py +10 -1
helm/common/images_utils.py +29 -3
helm/config/model_deployments.yaml +504 -12
helm/config/model_metadata.yaml +579 -52
helm/config/tokenizer_configs.yaml +100 -1
helm/proxy/critique/model_critique_client.py +32 -4
helm/proxy/services/server_service.py +1 -1
helm/tokenizers/auto_tokenizer.py +1 -1
helm/tokenizers/cohere_tokenizer.py +44 -2
helm/tokenizers/huggingface_tokenizer.py +36 -13
helm/tokenizers/test_cohere_tokenizer.py +39 -0
helm/tokenizers/test_huggingface_tokenizer.py +5 -1
helm/benchmark/static/schema_vlm.yaml +0 -576
helm/benchmark/static_build/assets/index-5088afcb.css +0 -1
helm/benchmark/static_build/assets/index-d839df55.js +0 -9
helm/benchmark/test_model_deployment_definition.py +0 -90
{crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/LICENSE +0 -0
{crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/WHEEL +0 -0
{crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/entry_points.txt +0 -0
{crfm_helm-0.5.0.dist-info → crfm_helm-0.5.2.dist-info}/top_level.txt +0 -0

helm/clients/vision_language/palmyra_vision_client.py ADDED Viewed

@@ -0,0 +1,84 @@
+from typing import Dict, List
+import json
+import requests
+from helm.common.cache import CacheConfig
+from helm.common.images_utils import encode_base64
+from helm.common.media_object import TEXT_TYPE
+from helm.common.request import Request, RequestResult, GeneratedOutput
+from helm.common.request import wrap_request_time
+from helm.clients.client import CachingClient, generate_uid_for_multimodal_prompt, truncate_and_tokenize_response_text
+from helm.tokenizers.tokenizer import Tokenizer
+class PalmyraVisionClient(CachingClient):
+    def __init__(self, tokenizer: Tokenizer, tokenizer_name: str, endpoint: str, cache_config: CacheConfig):
+        super().__init__(cache_config)
+        self.tokenizer: Tokenizer = tokenizer
+        self.tokenizer_name: str = tokenizer_name
+        # Currently, the Palmyra Vision model does not have a public API, so we need to use a secret endpoint
+        self.endpoint: str = endpoint
+    def make_request(self, request: Request) -> RequestResult:
+        assert request.multimodal_prompt is not None, "Multimodal prompt is required"
+        # Build the prompt
+        prompt: List[Dict[str, str]] = []
+        for media_object in request.multimodal_prompt.media_objects:
+            if media_object.is_type("image") and media_object.location:
+                prompt.append(
+                    {
+                        "type": "InlineData",
+                        "value": encode_base64(media_object.location, format="JPEG"),
+                        "contentType": "image/jpeg",
+                    }
+                )
+            elif media_object.is_type(TEXT_TYPE):
+                if media_object.text is None:
+                    raise ValueError("MediaObject of text type has missing text field value")
+                prompt.append({"type": "Text", "value": media_object.text})
+            else:
+                raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
+        # Generate
+        try:
+            def do_it():
+                response = requests.post(
+                    self.endpoint, headers={"Content-Type": "application/json"}, data=json.dumps({"parts": prompt})
+                )
+                if response.status_code != 200:
+                    curl_command: str = (
+                        f"curl --location '{self.endpoint}' --header 'Content-Type: application/json' "
+                        f"--data '{json.dumps({'parts': prompt})}'"
+                    )
+                    assert False, f"Got status code {response.status_code}. Try {curl_command}"
+                json_response = json.loads(response.text)
+                assert (
+                    "choices" in json_response and "errors" not in json_response
+                ), f"Invalid response: {response.text}"
+                return json_response
+            cache_key = CachingClient.make_cache_key(
+                raw_request={"prompt": generate_uid_for_multimodal_prompt(request.multimodal_prompt)},
+                request=request,
+            )
+            result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+        except RuntimeError as ex:
+            return RequestResult(success=False, cached=False, error=str(ex), completions=[], embedding=[])
+        # The internal endpoint doesn't support any other parameters, so we have to truncate ourselves
+        completions: List[GeneratedOutput] = [
+            truncate_and_tokenize_response_text(choice["text"], request, self.tokenizer, self.tokenizer_name)
+            for choice in result["choices"]
+        ]
+        return RequestResult(
+            success=True,
+            cached=cached,
+            request_time=result["request_time"],
+            completions=completions,
+            embedding=[],
+        )

helm/clients/yi_client.py ADDED Viewed

@@ -0,0 +1,31 @@
+from typing import Optional
+from helm.clients.openai_client import OpenAIClient
+from helm.common.cache import CacheConfig
+from helm.tokenizers.tokenizer import Tokenizer
+class YiChatClient(OpenAIClient):
+    BASE_URL = "http://api.01ww.xyz/v1"
+    def __init__(
+        self,
+        tokenizer: Tokenizer,
+        tokenizer_name: str,
+        cache_config: CacheConfig,
+        api_key: Optional[str] = None,
+    ):
+        self.tokenizer = tokenizer
+        self.tokenizer_name = tokenizer_name
+        super().__init__(
+            tokenizer=tokenizer,
+            tokenizer_name=tokenizer_name,
+            cache_config=cache_config,
+            api_key=api_key,
+            org_id=None,
+            base_url=YiChatClient.BASE_URL,
+        )
+    def _is_chat_model_engine(self, model_engine: str) -> bool:
+        return True

helm/common/critique_request.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from dataclasses import dataclass
-from typing import Dict, List, Union
+from typing import Dict, List, Union, Optional
+from helm.common.media_object import MediaObject
 class QuestionType:
@@ -34,6 +35,11 @@ class CritiqueQuestionTemplate:
     Can contain placeholders like {{placeholder}} that will be interpolated using the fields in CritiqueRequest."""
+    media_object: Optional[MediaObject] = None
+    """Path of image for multimodal input.
+    Image path or URL of the question."""
 @dataclass(frozen=True)
 class CritiqueTaskTemplate:
@@ -53,6 +59,9 @@ class CritiqueTaskTemplate:
     questions: List[CritiqueQuestionTemplate]
     """List of templates for the questions."""
+    max_tokens: Optional[int] = None
+    """Max token to be generated for the free-end generation."""
 @dataclass(frozen=True)
 class CritiqueRequest:

helm/common/images_utils.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import base64
 import io
+import os
 import requests
 import shutil
-from typing import List, Optional
+from typing import List, Optional, Tuple
 from urllib.request import urlopen
 import numpy as np
@@ -28,6 +30,12 @@ def open_image(image_location: str) -> Image.Image:
     return image.convert("RGB")
+def get_dimensions(image_location: str) -> Tuple[int, int]:
+    """Returns the dimensions of the image."""
+    image: Image.Image = open_image(image_location)
+    return image.size
 def encode_base64(image_location: str, format="JPEG") -> str:
     """Returns the base64 representation of an image file."""
     image_file = io.BytesIO()
@@ -36,7 +44,7 @@ def encode_base64(image_location: str, format="JPEG") -> str:
     return base64.b64encode(image_file.getvalue()).decode("ascii")
-def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optional[int] = None):
+def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optional[int] = None) -> None:
     """
     Copies the image file from `src` path to `dest` path. If dimensions `width` and `height`
     are specified, resizes the image before copying. `src` can be a URL.
@@ -44,12 +52,30 @@ def copy_image(src: str, dest: str, width: Optional[int] = None, height: Optiona
     if (width is not None and height is not None) or is_url(src):
         image = open_image(src)
         if width is not None and height is not None:
-            image = image.resize((width, height), Image.ANTIALIAS)
+            image = image.resize((width, height), Image.Resampling.LANCZOS)
         image.save(dest)
     else:
         shutil.copy(src, dest)
+def resize_image_to_max_file_size(src: str, dest: str, max_size_in_bytes: int, step=10):
+    # Open an image file
+    with Image.open(src) as img:
+        width, height = img.size
+        # Reduce dimensions iteratively until the file size is under the limit
+        while True:
+            # Save the image temporarily to check the file size
+            img.save(dest, quality=95)  # Start with high quality
+            if os.path.getsize(dest) < max_size_in_bytes:
+                break
+            # Reduce dimensions
+            width -= step
+            height -= step
+            img = img.resize((width, height), Image.Resampling.LANCZOS)
 def is_blacked_out_image(image_location: str) -> bool:
     """Returns True if the image is all black. False otherwise."""
     try:

crfm-helm 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl