PyPI - benchmax - Versions diffs - 0.1.2.dev7__tar.gz → 0.1.2.dev9__tar.gz - Mend

benchmax 0.1.2.dev7tar.gz → 0.1.2.dev9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: benchmax
-Version: 0.1.2.dev7
+Version: 0.1.2.dev9
 Summary: Framework-Agnostic RL Environments for LLM Fine-Tuning
 Author: cgft.io
 Classifier: Programming Language :: Python :: 3

{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "benchmax"
-version = "0.1.2.dev7"
+version = "0.1.2.dev9"
 description = "Framework-Agnostic RL Environments for LLM Fine-Tuning"
 readme = "README.md"
 authors = [{ name = "cgft.io" }]
@@ -56,8 +56,8 @@ conflicts = [[{ group = "skypilot" }, { group = "skyrl" }]]
 [tool.uv.pip]
 extra = ["dev", "skypilot", "skyrl", "excel", "excel-mac-windows", "crm"]
-[tool.uv.extra-build-dependencies]
-flash-attn = [{ requirement = "torch", match-runtime = true }]
+# [tool.uv.extra-build-dependencies]
+# flash-attn = [{ requirement = "torch", match-runtime = true }]
-[tool.uv.extra-build-variables]
-flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }
+# [tool.uv.extra-build-variables]
+# flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }

{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/bundle/loader.py RENAMED Viewed

@@ -1,7 +1,8 @@
 import logging
 import subprocess
 import sys
-from typing import Type
+from pathlib import Path
+from typing import Type, Union
 import cloudpickle
@@ -36,25 +37,26 @@ def load_env(
         DependencyError: pip install failed.
         BundlingError: Unpickling failed.
     """
-    if isinstance(payload, bytes):
-        payload = EnvPayload.from_bytes(payload)
+    env_payload: EnvPayload = (
+        payload if isinstance(payload, EnvPayload) else EnvPayload.from_bytes(payload)
+    )
     # --- Python version check ---
     current_python = f"{sys.version_info.major}.{sys.version_info.minor}"
-    if payload.python_version != current_python and not allow_python_mismatch:
+    if env_payload.python_version != current_python and not allow_python_mismatch:
         raise IncompatiblePythonError(
-            f"Payload was packaged with Python {payload.python_version} "
+            f"Payload was packaged with Python {env_payload.python_version} "
             f"but this machine runs Python {current_python}. "
             "Set allow_python_mismatch=True to override."
         )
     # --- Install pip dependencies ---
-    if install_deps and payload.pip_dependencies:
-        _install_dependencies(payload.pip_dependencies)
+    if install_deps and env_payload.pip_dependencies:
+        _install_dependencies(env_payload.pip_dependencies)
     # --- Unpickle the class ---
     try:
-        env_class = cloudpickle.loads(payload.pickled_class)
+        env_class = cloudpickle.loads(env_payload.pickled_class)
     except Exception as e:
         raise BundlingError(
             f"Failed to unpickle environment class: {e}. "
@@ -85,3 +87,30 @@ def _install_dependencies(deps: list[str]) -> None:
             f"stderr: {result.stderr}"
         )
     logger.info("[bundling] Dependencies installed successfully.")
+def load_env_from_path(
+    path: Union[str, Path],
+    install_deps: bool = True,
+    allow_python_mismatch: bool = False,
+) -> Type[BaseEnv]:
+    """Load a packaged environment class from a file path.
+    Args:
+        path: Path to a .bmx file containing the serialized EnvPayload.
+        install_deps: Install pip_dependencies before unpickling.
+        allow_python_mismatch: If False, raise on Python version mismatch.
+    Returns:
+        The unpickled BaseEnv subclass (class object, not instance).
+    Raises:
+        FileNotFoundError: If the file does not exist.
+        IncompatiblePythonError: Python version mismatch.
+        DependencyError: pip install failed.
+        BundlingError: Unpickling failed.
+    """
+    path = Path(path)
+    with open(path, "rb") as f:
+        payload_bytes = f.read()
+    return load_env(payload_bytes, install_deps, allow_python_mismatch)

{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/bundle/payload.py RENAMED Viewed

@@ -1,6 +1,5 @@
 import json
 import struct
-import sys
 from dataclasses import dataclass, field
 from typing import Any, Dict, List

{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/bundle/validator.py RENAMED Viewed

@@ -221,7 +221,7 @@ def _run_isolated_validation(
                 f"Isolated smoke test failed:\n"
                 f"stdout: {result.stdout}\n"
                 f"stderr: {result.stderr}\n"
-                "This usually means a dependency is missing from pip_dependencies."
+                "This usually means a dependency is missing from pip_dependencies or local_modules."
             )
         print(f"[validator] {result.stdout.strip()}")

{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/proxy_server.py RENAMED Viewed

@@ -383,7 +383,7 @@ class ProxyServer:
             os.execv(sys.executable, [sys.executable] + sys.argv)
         if self.client:
-            await self.client._disconnect()
+            await self.client.close()
         self.cleanup_workspace()
         asyncio.create_task(do_reset())

benchmax-0.1.2.dev9/src/benchmax/prompts/__init__.py ADDED Viewed

File without changes

{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: benchmax
-Version: 0.1.2.dev7
+Version: 0.1.2.dev9
 Summary: Framework-Agnostic RL Environments for LLM Fine-Tuning
 Author: cgft.io
 Classifier: Programming Language :: Python :: 3

{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax.egg-info/SOURCES.txt RENAMED Viewed

@@ -43,7 +43,6 @@ src/benchmax/envs/mcp/provisioners/local_provisioner.py
 src/benchmax/envs/mcp/provisioners/manual_provisioner.py
 src/benchmax/envs/mcp/provisioners/skypilot_provisioner.py
 src/benchmax/envs/mcp/provisioners/utils.py
-src/benchmax/envs/search/search_env.py
 src/benchmax/envs/wikipedia/utils.py
 src/benchmax/envs/wikipedia/wiki_env.py
 src/benchmax/prompts/__init__.py

benchmax-0.1.2.dev7/src/benchmax/bundle/__init__.py DELETED Viewed

@@ -1,40 +0,0 @@
-"""benchmax.bundle - Remote class bundling for custom environments.
-Usage::
-    from benchmax.bundle import bundle_env, load_env, validate_env
-    # On the local machine (e.g., Colab notebook):
-    payload = bundle_env(
-        MySearchEnv,
-        pip_dependencies=["aiohttp"],
-    )
-    payload_bytes = payload.to_bytes()
-    # Send payload_bytes to remote machine...
-    # On the remote machine:
-    env_class = load_env(payload_bytes)
-    env = env_class(api_key="...", base_url="...")
-"""
-from benchmax.bundle.errors import (
-    DependencyError,
-    IncompatiblePythonError,
-    BundlingError,
-    ValidationError,
-)
-from benchmax.bundle.loader import load_env
-from benchmax.bundle.bundler import bundle_env
-from benchmax.bundle.payload import EnvPayload
-from benchmax.bundle.validator import validate_payload
-__all__ = [
-    "bundle_env",
-    "load_env",
-    "validate_payload",
-    "EnvPayload",
-    "BundlingError",
-    "ValidationError",
-    "DependencyError",
-    "IncompatiblePythonError",
-]

benchmax-0.1.2.dev7/src/benchmax/envs/search/search_env.py DELETED Viewed

@@ -1,269 +0,0 @@
-from difflib import SequenceMatcher
-from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple
-import aiohttp
-from benchmax.envs.base_env import BaseEnv
-from benchmax.envs.types import ToolDefinition, StandardizedExample
-SYSTEM_PROMPT = """Please use the search tool provided to find relevant information from the corpus.
-Formulate effective search queries to retrieve the most relevant chunks.
-You can filter by metadata or filename to narrow your search.
-Write your complete answer on the final line only as a concise entity, within the xml tags <answer></answer>.\n
-"""
-def percent_of_text_a_in_text_b(text_a, text_b):
-    if not text_a:
-        return 0.0
-    matcher = SequenceMatcher(None, text_a, text_b)
-    matched_chars = sum(
-        size for _, _, size in matcher.get_matching_blocks()
-    )
-    return (matched_chars / len(text_a))
-async def chunk_overlap_reward_function(
-    completion: str,
-    ground_truth: str,
-    **kwargs: Any
-) -> float:
-    """
-    Reward function that computes the percentage of overlapping text between
-    the completion and the ground truth.
-    Args:
-        completion: The model's generated text
-        ground_truth: The reference text to compare against
-        **kwargs: Additional arguments (not used here)
-    Returns:
-        float: A score between 0.0 and 1.0 representing the overlap percentage.
-    """
-    reference_chunks = kwargs.get("reference_chunks", [])
-    reference_string = " ".join(reference_chunks)
-    completion_str = completion if isinstance(completion, str) else ""
-    if isinstance(completion, list):
-        completion_str = " ".join(
-            [c.get("content", "") for c in completion if isinstance(c, dict) and c.get("role", "") != "assistant"]
-        )
-        for msg in completion:
-            if not isinstance(msg, dict):
-                continue
-            if msg.get("role", "") != "assistant":
-                continue
-            msg_content = msg.get("content", "")
-            if msg_content.count("<tool_call>") >= 4:
-                return 0.0
-    if reference_string:
-        overlap_score = percent_of_text_a_in_text_b(reference_string, completion_str)
-        if overlap_score >= 0.25:
-            return overlap_score
-    return 0.0
-class SearchEnv(BaseEnv):
-    """Search environment with BM25 corpus search tool."""
-    system_prompt: str = SYSTEM_PROMPT
-    def __init__(
-        self,
-        api_key: str,
-        corpus_id: str,
-        base_url: str,
-        **kwargs,
-    ):
-        """
-        Initialize the search environment.
-        Args:
-            api_key: API key for authentication (required)
-            corpus_id: ID of the corpus to search (required)
-            base_url: Base URL of the search API (required)
-        """
-        if not api_key:
-            raise ValueError("api_key is required")
-        if not corpus_id:
-            raise ValueError("corpus_id is required")
-        self._api_key = api_key
-        self._corpus_id = corpus_id
-        self._base_url = base_url.rstrip("/")
-        search_tool_definition = ToolDefinition(
-            name="search_corpus",
-            description="Search the corpus using BM25 with optional metadata and filename filtering.",
-            input_schema={
-                "type": "object",
-                "properties": {
-                    "query": {
-                        "type": "string",
-                        "description": "Search query string.",
-                    },
-                    "metadata": {
-                        "type": "object",
-                        "description": "Optional metadata filters (e.g., {'ticker': 'DDOG', 'year': 2024}).",
-                    },
-                    "filename": {
-                        "type": "string",
-                        "description": "Optional filename filter. Simple string for substring match (e.g., 'config') or regex pattern (e.g., '.*\\.json$').",
-                    },
-                    "limit": {
-                        "type": "integer",
-                        "description": "Max number of results to return (default 10).",
-                    },
-                },
-                "required": ["query"],
-            },
-        )
-        self._tools: Dict[str, Tuple[ToolDefinition, Callable]] = {
-            search_tool_definition.name: (search_tool_definition, self._search_corpus_tool)
-        }
-    async def _search_corpus_tool(
-        self,
-        query: str,
-        metadata: Optional[Dict[str, Any]] = None,
-        filename: Optional[str] = None,
-        limit: int = 10,
-        **kwargs
-    ) -> str:
-        """
-        Search the corpus using BM25.
-        Args:
-            query: Search query string
-            metadata: Optional metadata filters
-            filename: Optional filename filter (substring or regex)
-            limit: Maximum number of results
-        Returns:
-            Formatted search results or error message
-        """
-        if not query:
-            return "Error: Missing required parameter: 'query'"
-        # Build request body
-        request_body = {"query": query, "limit": limit}
-        if metadata:
-            request_body["metadata"] = metadata
-        if filename:
-            request_body["filename"] = filename
-        # Build URL
-        url = f"{self._base_url}/api/corpora/{self._corpus_id}/search"
-        headers = {
-            "x-api-key": self._api_key,
-            "Content-Type": "application/json",
-        }
-        try:
-            async with aiohttp.ClientSession() as session:
-                async with session.post(
-                    url,
-                    json=request_body,
-                    headers=headers,
-                    timeout=aiohttp.ClientTimeout(total=10.0),
-                ) as resp:
-                    if resp.status != 200:
-                        error_text = await resp.text()
-                        return f"Error: API request failed with status {resp.status}: {error_text}"
-                    data = await resp.json()
-            results = data.get("results", [])
-            total = data.get("total", 0)
-            if not results:
-                return "No results found."
-            # Format results
-            lines = []
-            for i, item in enumerate(results, start=1):
-                filename_val = item.get("filename", "—")
-                score = item.get("score")
-                score_str = f"(score: {score:.2f})" if score is not None else "(filtered)"
-                content = item.get("content", "")
-                metadata_val = item.get("metadata", {})
-                lines.append(f"{i}. {filename_val} {score_str}")
-                lines.append(f"   Content: {content}")
-                if metadata_val:
-                    lines.append(f"   Metadata: {metadata_val}")
-            lines.append(f"\nTotal: {total} results")
-            return "\n".join(lines)
-        except aiohttp.ClientError as e:
-            return f"Error: Network error: {str(e)}"
-        except Exception as e:
-            return f"Error: {str(e)}"
-    async def shutdown(self):
-        # no cleanup required
-        pass
-    @classmethod
-    def dataset_preprocess(cls, example: Any, **kwargs) -> StandardizedExample:
-        return StandardizedExample(
-            prompt=example.get("Question", ""),
-            ground_truth=example.get("Answer", None),
-            init_rollout_args={},
-        )
-    async def list_tools(self) -> List[ToolDefinition]:
-        """List available tools."""
-        return [self._tools[k][0] for k in sorted(self._tools)]
-    async def run_tool(self, rollout_id: str, tool_name: str, **tool_args) -> Any:
-        """
-        Execute a tool.
-        Args:
-            rollout_id: Identifier for current rollout (unused for stateless env)
-            tool_name: Name of the tool (e.g., "search_corpus")
-            **tool_args: Arguments for the tool function
-        Returns:
-            Tool execution result or error message
-        """
-        _, tool_function = self._tools[tool_name]
-        return await tool_function(**tool_args)
-    async def init_rollout(self, rollout_id: str, **rollout_args) -> None:
-        """Initialize rollout (no-op for stateless environment)."""
-        pass
-    async def release_rollout(self, rollout_id: str) -> None:
-        """Release rollout (no-op for stateless environment)."""
-        pass
-    async def copy_to_workspace(
-        self, rollout_id: str, src_path: Path, dst_filename: Optional[str] = None
-    ) -> None:
-        """Not implemented for this environment."""
-        pass
-    async def copy_content_to_workspace(
-        self, rollout_id: str, src_content: str | bytes, dst_filename: str
-    ) -> None:
-        """Not implemented for this environment."""
-        pass
-    async def copy_from_workspace(
-        self, rollout_id: str, src_filename: str, dst_path: Path
-    ) -> None:
-        """Not implemented for this environment."""
-        pass
-    async def compute_reward(
-        self, rollout_id: str, completion: str, ground_truth: Any, **kwargs: Any
-    ) -> Dict[str, float]:
-        """Compute rewards using the chunk overlap reward function."""
-        return {
-            "chunk_overlap": await chunk_overlap_reward_function(completion, ground_truth, **kwargs)
-        }