PyPI - crfm-helm - Versions diffs - 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl - Mend

crfm-helm 0.5.4py3-none-any.whl → 0.5.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crfm-helm might be problematic. Click here for more details.

Files changed (580) hide show

helm/clients/vision_language/qwen2_vlm_client.py ADDED Viewed

@@ -0,0 +1,175 @@
+from threading import Lock
+from typing import Any, Dict, List, Optional
+from dataclasses import dataclass
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import torch
+from helm.common.cache import CacheConfig
+from helm.common.gpu_utils import get_torch_device_name
+from helm.common.hierarchical_logger import hlog, htrack_block
+from helm.common.media_object import TEXT_TYPE
+from helm.common.request import Request, RequestResult, GeneratedOutput, Token
+from helm.common.request import wrap_request_time
+from helm.clients.client import CachingClient, generate_uid_for_multimodal_prompt
+@dataclass(frozen=True)
+class LoadedQwen2ModelProcessor:
+    model: Qwen2VLForConditionalGeneration
+    processor: AutoProcessor
+_models_lock: Lock = Lock()
+_models: Dict[str, Optional[LoadedQwen2ModelProcessor]] = {
+    "Qwen/Qwen2-VL-7B-Instruct": None,
+    "Qwen/Qwen2-VL-72B-Instruct": None,
+}
+class Qwen2VLMClient(CachingClient):
+    def __init__(self, cache_config: CacheConfig):
+        super().__init__(cache_config=cache_config)
+        self._device: str = get_torch_device_name()
+    def _get_model_name(self, helm_model_name: str) -> str:
+        if helm_model_name == "qwen2-vl-7b-instruct":
+            return "Qwen/Qwen2-VL-7B-Instruct"
+        elif helm_model_name == "qwen2-vl-72b-instruct":
+            return "Qwen/Qwen2-VL-72B-Instruct"
+        else:
+            raise ValueError(f"Unhandled model name: {helm_model_name}")
+    def _get_model(self, helm_model_name: str) -> LoadedQwen2ModelProcessor:
+        global _models_lock
+        global _models
+        model_name = self._get_model_name(helm_model_name)
+        with _models_lock:
+            loaded = _models[model_name]
+            if loaded is None:
+                hlog(f"Loading model {model_name} and caching in memory...")
+                # https://huggingface.co/docs/transformers/model_doc/qwen2_vl#flash-attention-2-to-speed-up-generation
+                model = Qwen2VLForConditionalGeneration.from_pretrained(
+                    model_name,
+                    torch_dtype=torch.bfloat16,
+                    device_map="auto",
+                    attn_implementation="flash_attention_2",
+                ).eval()
+                processor = AutoProcessor.from_pretrained(model_name)
+                loaded = LoadedQwen2ModelProcessor(model=model, processor=processor)
+                _models[model_name] = loaded
+        return loaded
+    def make_request(self, request: Request) -> RequestResult:
+        assert request.multimodal_prompt is not None, "Multimodal prompt is required"
+        loaded = self._get_model(request.model_engine)
+        model = loaded.model
+        processor = loaded.processor
+        # Build Qwen2 messages
+        # We assume all media objects go into a single "user" message:
+        # messages = [
+        #   {
+        #     "role": "user",
+        #     "content": [
+        #       {"type": "image", "image": "file:///path/to/image1.jpg"},
+        #       {"type": "image", "image": "file:///path/to/image2.jpg"},
+        #       {"type": "text", "text": "Describe these images."}
+        #     ]
+        #   }
+        # ]
+        message_content = []
+        for media_object in request.multimodal_prompt.media_objects:
+            if media_object.is_type("image") and media_object.location:
+                message_content.append({"type": "image", "image": media_object.location})
+            elif media_object.is_type(TEXT_TYPE):
+                if media_object.text is None:
+                    raise ValueError("MediaObject of text type has missing text field value")
+                message_content.append({"type": "text", "text": media_object.text})
+            else:
+                raise ValueError(f"Unrecognized MediaObject type {media_object.type}")
+        messages = [{"role": "user", "content": message_content}]
+        # Prepare text and vision inputs
+        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        ).to(self._device)
+        generation_args = {
+            "max_new_tokens": request.max_tokens,
+        }
+        completions: List[GeneratedOutput] = []
+        request_time: float = 0
+        request_datetime: Optional[int] = None
+        all_cached: bool = True
+        with htrack_block(f"Generating for prompt: {text}"):
+            for completion_index in range(request.num_completions):
+                try:
+                    def do_it() -> Dict[str, Any]:
+                        generated_ids = model.generate(**inputs, **generation_args)
+                        # Remove the input prefix from outputs
+                        generated_ids_trimmed = [
+                            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+                        ]
+                        output_text = processor.batch_decode(
+                            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+                        )
+                        # There's only one batch element
+                        completion = output_text[0]
+                        # For simplicity, we split tokens by whitespace.
+                        # A more accurate tokenization would require a tokenizer for Qwen2, if desired.
+                        tokens = completion.split()
+                        return {"output": (completion, tokens)}
+                    cache_key = CachingClient.make_cache_key(
+                        raw_request={
+                            "completion_index": completion_index,
+                            "model": request.model,
+                            "prompt": generate_uid_for_multimodal_prompt(request.multimodal_prompt),
+                            **generation_args,
+                        },
+                        request=request,
+                    )
+                    result, cached = self.cache.get(cache_key, wrap_request_time(do_it))
+                except RuntimeError as model_error:
+                    return RequestResult(
+                        success=False, cached=False, error=str(model_error), completions=[], embedding=[]
+                    )
+                text_out, tokens = result["output"]
+                completions.append(
+                    GeneratedOutput(
+                        text=text_out,
+                        logprob=0,
+                        tokens=[Token(text=str(token), logprob=0) for token in tokens],
+                    )
+                )
+                hlog(f"Generated: {text_out}")
+                request_time += result["request_time"]
+                request_datetime = request_datetime or result.get("request_datetime")
+                all_cached = all_cached and cached
+        return RequestResult(
+            success=True,
+            cached=all_cached,
+            request_time=request_time,
+            request_datetime=request_datetime,
+            completions=completions,
+            embedding=[],
+        )

helm/clients/vllm_client.py CHANGED Viewed

@@ -2,13 +2,15 @@ from typing import Any, Dict, Optional
 from helm.common.cache import CacheConfig
 from helm.common.request import Request
-from helm.clients.openai_client import OpenAIClient
+from helm.clients.openai_client import OpenAILegacyCompletionsClient
 from helm.tokenizers.tokenizer import Tokenizer
-class VLLMClient(OpenAIClient):
+class VLLMClient(OpenAILegacyCompletionsClient):
     """Sends request to a vLLM server using the OpenAI-compatible API.
+    Only supports the legacy Text Completions API, rather than the Chat Completions API.
     See: https://docs.vllm.ai/en/latest/getting_started/quickstart.html#openai-compatible-server"""
     def __init__(
@@ -29,10 +31,6 @@ class VLLMClient(OpenAIClient):
         self.tokenizer = tokenizer
         self.tokenizer_name = tokenizer_name
-    def _is_chat_model_engine(self, model_engine: str) -> bool:
-        # Only support vLLM completion models for now.
-        return False
     def _get_model_for_request(self, request: Request) -> str:
         # The `model` parameter for vLLM should be the whole model name including the creator organization,
         # unlike OpenAI which only uses the model engine.

helm/clients/yi_client.py CHANGED Viewed

@@ -26,6 +26,3 @@ class YiChatClient(OpenAIClient):
             org_id=None,
             base_url=YiChatClient.BASE_URL,
         )
-    def _is_chat_model_engine(self, model_engine: str) -> bool:
-        return True

helm/common/audio_utils.py ADDED Viewed

@@ -0,0 +1,111 @@
+from io import BytesIO
+from typing import Optional
+from filelock import FileLock
+import base64
+import os
+import ffmpeg
+import numpy as np
+import soundfile as sf
+import subprocess
+from helm.common.hierarchical_logger import hlog
+from helm.common.multimodal_request_utils import get_contents_as_bytes
+from helm.common.optional_dependencies import handle_module_not_found_error
+try:
+    import librosa
+except ModuleNotFoundError as e:
+    handle_module_not_found_error(e, ["audiolm"])
+def ensure_audio_file_exists_from_array(path: str, array: np.ndarray, sample_rate: int) -> None:
+    """Write the array to the wav or mp3 file if it does not already exist.
+    Uses file locking and an atomic rename to avoid file corruption due to incomplete writes and
+    concurrent writes."""
+    file_extension = os.path.splitext(path)[1]
+    if file_extension != ".wav" and file_extension != ".mp3":
+        raise ValueError(f"Path must end with .wav or .mp3: {path}")
+    with FileLock(f"{path}.lock"):
+        if os.path.exists(path):
+            # Skip because file already exists
+            return
+        path_prefix = path.removesuffix(file_extension)
+        tmp_path = f"{path_prefix}.tmp{file_extension}"
+        sf.write(tmp_path, array, samplerate=sample_rate)
+        os.rename(tmp_path, path)
+def get_array_from_audio_file(path: str, sample_rate: Optional[int]) -> np.ndarray:
+    """Get an array from an audio file"""
+    audio_file = (
+        BytesIO(get_contents_as_bytes(path)) if path.startswith("http://") or path.startswith("https://") else path
+    )
+    # librosa accepts a local file path or a file-like object
+    audio_array, _ = librosa.load(audio_file, sr=sample_rate)
+    return audio_array
+def use_ffmpeg_to_convert_audio_file(input_path: str, output_path: str) -> None:
+    if os.path.exists(output_path):
+        return
+    """Use ffmpeg to convert an audio file type"""
+    try:
+        subprocess.run(["ffmpeg", "-i", input_path, output_path], check=True)
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        raise ValueError("Please install ffmpeg using `bash install-shelm-extras.sh` first to convert audio files.")
+def is_invalid_audio_file(audio_path: str) -> bool:
+    """
+    Two conditions for an audio file to be considered invalid:
+    1. The file does not exist.
+    2. The file is empty.
+    """
+    if not os.path.exists(audio_path):
+        return True
+    try:
+        with sf.SoundFile(audio_path) as audio_file:
+            return len(audio_file) == 0
+    except RuntimeError:
+        return True
+def extract_audio(video_path: str, output_audio_path: str) -> None:
+    """
+    Extracts audio from an MP4 video file and saves it as an MP3 file.
+    Args:
+        video_path (str): Path to the input MP4 video file.
+        output_audio_path (str): Path to save the extracted MP3 audio file.
+    Returns:
+        None
+    """
+    try:
+        (
+            ffmpeg.input(video_path)
+            .output(output_audio_path, format="mp3", acodec="libmp3lame", audio_bitrate="192k")
+            .run(overwrite_output=True)
+        )
+    except ffmpeg.Error as e:
+        hlog(f"Error extracting audio from video: {video_path}: {e.stderr.decode()}")
+        raise e
+def encode_audio_to_base64(file_path: str) -> str:
+    """
+    Encodes an audio file to a Base64 string.
+    Args:
+        file_path (str): Path to the audio file.
+    Returns:
+        str: Base64-encoded string of the audio file.
+    """
+    assert os.path.exists(file_path), f"Audio file does not exist at path: {file_path}"
+    with open(file_path, "rb") as audio_file:
+        return base64.b64encode(audio_file.read()).decode("utf-8")

helm/common/file_caches/local_file_cache.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 from typing import Callable
 from helm.common.general import ensure_directory_exists, generate_unique_id
-from .file_cache import FileCache
+from helm.common.file_caches.file_cache import FileCache
 from helm.common.optional_dependencies import handle_module_not_found_error

helm/common/file_caches/test_local_file_cache.py CHANGED Viewed

@@ -3,7 +3,7 @@ import shutil
 import tempfile
 import unittest
-from .local_file_cache import LocalFileCache
+from helm.common.file_caches.local_file_cache import LocalFileCache
 class TestLocalFileCache(unittest.TestCase):

helm/common/images_utils.py CHANGED Viewed

@@ -10,7 +10,7 @@ from urllib.request import urlopen
 import numpy as np
-from .general import is_url
+from helm.common.general import is_url
 from helm.common.optional_dependencies import handle_module_not_found_error
 try:
@@ -25,7 +25,7 @@ def open_image(image_location: str) -> Image.Image:
     """
     image: Image.Image
     if is_url(image_location):
-        image = Image.open(requests.get(image_location, stream=True).raw)
+        image = Image.open(requests.get(image_location, stream=True).raw)  # type: ignore
     else:
         image = Image.open(image_location)
     return image.convert("RGB")

helm/common/media_object.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import urllib
 from copy import deepcopy
 from dataclasses import dataclass, field, replace
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
 IMAGE_TYPE = "image"
@@ -27,7 +27,7 @@ class MediaObject:
     location: Optional[str] = None
     """When the media object is a file, specify the location of the media object, which can be a local path or URL."""
-    def to_dict(self) -> dict:
+    def to_dict(self) -> Dict[str, Any]:
         """Converts the media object to a dictionary."""
         return {key: value for key, value in self.__dict__.items() if value is not None}

helm/common/multimodal_request_utils.py CHANGED Viewed

@@ -1,10 +1,36 @@
+import base64
 from typing import List, Optional
+import requests
+import urllib.parse
 from helm.benchmark.adaptation.request_state import RequestState
 from helm.benchmark.scenarios.scenario import Reference
 from helm.common.request import RequestResult
+def get_contents_as_bytes(path: str) -> bytes:
+    """Get the contents at the location as bytes.
+    The location can be a local path or a URL."""
+    # Fetch the audio file and convert it to a base64 encoded string
+    is_remote = urllib.parse.urlparse(path).scheme in ["http", "https"]
+    if is_remote:
+        response = requests.get(path)
+        response.raise_for_status()
+        return response.content
+    else:
+        with open(path, "rb") as f:
+            return f.read()
+def get_contents_as_base64(path: str) -> str:
+    """Get the contents at the location as a base64-encoded string.
+    The location can be a local path or a URL."""
+    return base64.b64encode(get_contents_as_bytes(path)).decode("utf-8")
 def gather_generated_image_locations(request_result: RequestResult) -> List[str]:
     """Gathers the locations (file paths or URLs) of the generated images."""
     image_locations: List[str] = []

helm/common/reeval_parameters.py ADDED Viewed

@@ -0,0 +1,12 @@
+from dataclasses import dataclass
+from typing import Optional
+@dataclass(frozen=True)
+class REEvalParameters:
+    """
+    Parameters for reeval evaluation.
+    """
+    model_ability: Optional[float] = None
+    """The inital ability of the model to perform the task. Used for reeval evaluation."""

helm/common/request.py CHANGED Viewed

@@ -4,7 +4,8 @@ from typing import Any, Callable, Dict, List, Optional
 from helm.common.media_object import MultimediaObject
 from helm.common.image_generation_parameters import ImageGenerationParameters
-from .general import indent_lines, format_text
+from helm.common.general import indent_lines, format_text
+from helm.common.response_format import ResponseFormat
 @dataclass(frozen=True)
@@ -72,6 +73,9 @@ class Request:
     image_generation_parameters: Optional[ImageGenerationParameters] = None
     """Parameters for image generation."""
+    response_format: Optional[ResponseFormat] = None
+    """EXPERIMENTAL: Response format. Currently only supported by OpenAI and Together."""
     def validate(self):
         if (
             (self.messages and self.prompt)
@@ -193,7 +197,7 @@ class RequestResult:
     """Whether the request was actually cached"""
     request_time: Optional[float] = None
-    """How long did the request take?"""
+    """How long the request took in seconds"""
     request_datetime: Optional[int] = None
     """When was the request sent?

helm/common/response_format.py ADDED Viewed

@@ -0,0 +1,18 @@
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
+@dataclass(frozen=True)
+class ResponseFormat:
+    """EXPERIMENTAL: Model response format.
+    Currently only supports JSON schema.
+    Currently only supported by OpenAI and Together.
+    See:
+    - https://platform.openai.com/docs/guides/structured-outputs
+    - https://docs.together.ai/docs/json-mode"""
+    json_schema: Optional[Dict[str, Any]] = None
+    """EXPERIMENTAL: The JSON schema that the model output should conform to."""

helm/common/test_media_object.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import unittest
-from .media_object import MediaObject, MultimediaObject
+from helm.common.media_object import MediaObject, MultimediaObject
 class TestMediaObject(unittest.TestCase):

crfm-helm 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

Potentially problematic release.

crfm-helm 0.5.4py3-none-any.whl → 0.5.5py3-none-any.whl