benchmax 0.1.2.dev7__tar.gz → 0.1.2.dev9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/PKG-INFO +1 -1
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/pyproject.toml +5 -5
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/bundle/loader.py +37 -8
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/bundle/payload.py +0 -1
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/bundle/validator.py +1 -1
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/proxy_server.py +1 -1
- benchmax-0.1.2.dev9/src/benchmax/prompts/__init__.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax.egg-info/PKG-INFO +1 -1
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax.egg-info/SOURCES.txt +0 -1
- benchmax-0.1.2.dev7/src/benchmax/bundle/__init__.py +0 -40
- benchmax-0.1.2.dev7/src/benchmax/envs/search/search_env.py +0 -269
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/LICENSE +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/README.md +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/setup.cfg +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/adapters/__init__.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/adapters/benchmax_wrapper.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/adapters/skyrl/benchmax_data_process.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/adapters/skyrl/skyrl_adapter.py +0 -0
- {benchmax-0.1.2.dev7/src/benchmax/envs → benchmax-0.1.2.dev9/src/benchmax/bundle}/__init__.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/bundle/bundler.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/bundle/errors.py +0 -0
- {benchmax-0.1.2.dev7/src/benchmax/envs/excel/workdir → benchmax-0.1.2.dev9/src/benchmax/envs}/__init__.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/base_env.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/crm/crm_env.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/crm/workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/crm/workdir/salesforce_mcp.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/excel/data_utils.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/excel/excel_env.py +0 -0
- {benchmax-0.1.2.dev7/src/benchmax/prompts → benchmax-0.1.2.dev9/src/benchmax/envs/excel/workdir}/__init__.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/excel/workdir/excel_code_runner_mcp.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/excel/workdir/excel_utils.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/excel/workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/math/math_env.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/math/workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/__init__.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/example_workdir/demo_mcp_server.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/example_workdir/reward_fn.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/parallel_mcp_env.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/provisioners/__init__.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/provisioners/base_provisioner.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/provisioners/local_provisioner.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/provisioners/manual_provisioner.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/provisioners/skypilot_provisioner.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/provisioners/utils.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/server_pool.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/utils.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/types.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/wikipedia/utils.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/wikipedia/wiki_env.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/prompts/tools.py +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax.egg-info/dependency_links.txt +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax.egg-info/requires.txt +0 -0
- {benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "benchmax"
|
|
3
|
-
version = "0.1.2.
|
|
3
|
+
version = "0.1.2.dev9"
|
|
4
4
|
description = "Framework-Agnostic RL Environments for LLM Fine-Tuning"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [{ name = "cgft.io" }]
|
|
@@ -56,8 +56,8 @@ conflicts = [[{ group = "skypilot" }, { group = "skyrl" }]]
|
|
|
56
56
|
[tool.uv.pip]
|
|
57
57
|
extra = ["dev", "skypilot", "skyrl", "excel", "excel-mac-windows", "crm"]
|
|
58
58
|
|
|
59
|
-
[tool.uv.extra-build-dependencies]
|
|
60
|
-
flash-attn = [{ requirement = "torch", match-runtime = true }]
|
|
59
|
+
# [tool.uv.extra-build-dependencies]
|
|
60
|
+
# flash-attn = [{ requirement = "torch", match-runtime = true }]
|
|
61
61
|
|
|
62
|
-
[tool.uv.extra-build-variables]
|
|
63
|
-
flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }
|
|
62
|
+
# [tool.uv.extra-build-variables]
|
|
63
|
+
# flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" }
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import subprocess
|
|
3
3
|
import sys
|
|
4
|
-
from
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Type, Union
|
|
5
6
|
|
|
6
7
|
import cloudpickle
|
|
7
8
|
|
|
@@ -36,25 +37,26 @@ def load_env(
|
|
|
36
37
|
DependencyError: pip install failed.
|
|
37
38
|
BundlingError: Unpickling failed.
|
|
38
39
|
"""
|
|
39
|
-
|
|
40
|
-
payload
|
|
40
|
+
env_payload: EnvPayload = (
|
|
41
|
+
payload if isinstance(payload, EnvPayload) else EnvPayload.from_bytes(payload)
|
|
42
|
+
)
|
|
41
43
|
|
|
42
44
|
# --- Python version check ---
|
|
43
45
|
current_python = f"{sys.version_info.major}.{sys.version_info.minor}"
|
|
44
|
-
if
|
|
46
|
+
if env_payload.python_version != current_python and not allow_python_mismatch:
|
|
45
47
|
raise IncompatiblePythonError(
|
|
46
|
-
f"Payload was packaged with Python {
|
|
48
|
+
f"Payload was packaged with Python {env_payload.python_version} "
|
|
47
49
|
f"but this machine runs Python {current_python}. "
|
|
48
50
|
"Set allow_python_mismatch=True to override."
|
|
49
51
|
)
|
|
50
52
|
|
|
51
53
|
# --- Install pip dependencies ---
|
|
52
|
-
if install_deps and
|
|
53
|
-
_install_dependencies(
|
|
54
|
+
if install_deps and env_payload.pip_dependencies:
|
|
55
|
+
_install_dependencies(env_payload.pip_dependencies)
|
|
54
56
|
|
|
55
57
|
# --- Unpickle the class ---
|
|
56
58
|
try:
|
|
57
|
-
env_class = cloudpickle.loads(
|
|
59
|
+
env_class = cloudpickle.loads(env_payload.pickled_class)
|
|
58
60
|
except Exception as e:
|
|
59
61
|
raise BundlingError(
|
|
60
62
|
f"Failed to unpickle environment class: {e}. "
|
|
@@ -85,3 +87,30 @@ def _install_dependencies(deps: list[str]) -> None:
|
|
|
85
87
|
f"stderr: {result.stderr}"
|
|
86
88
|
)
|
|
87
89
|
logger.info("[bundling] Dependencies installed successfully.")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def load_env_from_path(
|
|
93
|
+
path: Union[str, Path],
|
|
94
|
+
install_deps: bool = True,
|
|
95
|
+
allow_python_mismatch: bool = False,
|
|
96
|
+
) -> Type[BaseEnv]:
|
|
97
|
+
"""Load a packaged environment class from a file path.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
path: Path to a .bmx file containing the serialized EnvPayload.
|
|
101
|
+
install_deps: Install pip_dependencies before unpickling.
|
|
102
|
+
allow_python_mismatch: If False, raise on Python version mismatch.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
The unpickled BaseEnv subclass (class object, not instance).
|
|
106
|
+
|
|
107
|
+
Raises:
|
|
108
|
+
FileNotFoundError: If the file does not exist.
|
|
109
|
+
IncompatiblePythonError: Python version mismatch.
|
|
110
|
+
DependencyError: pip install failed.
|
|
111
|
+
BundlingError: Unpickling failed.
|
|
112
|
+
"""
|
|
113
|
+
path = Path(path)
|
|
114
|
+
with open(path, "rb") as f:
|
|
115
|
+
payload_bytes = f.read()
|
|
116
|
+
return load_env(payload_bytes, install_deps, allow_python_mismatch)
|
|
@@ -221,7 +221,7 @@ def _run_isolated_validation(
|
|
|
221
221
|
f"Isolated smoke test failed:\n"
|
|
222
222
|
f"stdout: {result.stdout}\n"
|
|
223
223
|
f"stderr: {result.stderr}\n"
|
|
224
|
-
"This usually means a dependency is missing from pip_dependencies."
|
|
224
|
+
"This usually means a dependency is missing from pip_dependencies or local_modules."
|
|
225
225
|
)
|
|
226
226
|
|
|
227
227
|
print(f"[validator] {result.stdout.strip()}")
|
|
File without changes
|
|
@@ -43,7 +43,6 @@ src/benchmax/envs/mcp/provisioners/local_provisioner.py
|
|
|
43
43
|
src/benchmax/envs/mcp/provisioners/manual_provisioner.py
|
|
44
44
|
src/benchmax/envs/mcp/provisioners/skypilot_provisioner.py
|
|
45
45
|
src/benchmax/envs/mcp/provisioners/utils.py
|
|
46
|
-
src/benchmax/envs/search/search_env.py
|
|
47
46
|
src/benchmax/envs/wikipedia/utils.py
|
|
48
47
|
src/benchmax/envs/wikipedia/wiki_env.py
|
|
49
48
|
src/benchmax/prompts/__init__.py
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
"""benchmax.bundle - Remote class bundling for custom environments.
|
|
2
|
-
|
|
3
|
-
Usage::
|
|
4
|
-
|
|
5
|
-
from benchmax.bundle import bundle_env, load_env, validate_env
|
|
6
|
-
|
|
7
|
-
# On the local machine (e.g., Colab notebook):
|
|
8
|
-
payload = bundle_env(
|
|
9
|
-
MySearchEnv,
|
|
10
|
-
pip_dependencies=["aiohttp"],
|
|
11
|
-
)
|
|
12
|
-
payload_bytes = payload.to_bytes()
|
|
13
|
-
# Send payload_bytes to remote machine...
|
|
14
|
-
|
|
15
|
-
# On the remote machine:
|
|
16
|
-
env_class = load_env(payload_bytes)
|
|
17
|
-
env = env_class(api_key="...", base_url="...")
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
from benchmax.bundle.errors import (
|
|
21
|
-
DependencyError,
|
|
22
|
-
IncompatiblePythonError,
|
|
23
|
-
BundlingError,
|
|
24
|
-
ValidationError,
|
|
25
|
-
)
|
|
26
|
-
from benchmax.bundle.loader import load_env
|
|
27
|
-
from benchmax.bundle.bundler import bundle_env
|
|
28
|
-
from benchmax.bundle.payload import EnvPayload
|
|
29
|
-
from benchmax.bundle.validator import validate_payload
|
|
30
|
-
|
|
31
|
-
__all__ = [
|
|
32
|
-
"bundle_env",
|
|
33
|
-
"load_env",
|
|
34
|
-
"validate_payload",
|
|
35
|
-
"EnvPayload",
|
|
36
|
-
"BundlingError",
|
|
37
|
-
"ValidationError",
|
|
38
|
-
"DependencyError",
|
|
39
|
-
"IncompatiblePythonError",
|
|
40
|
-
]
|
|
@@ -1,269 +0,0 @@
|
|
|
1
|
-
from difflib import SequenceMatcher
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
4
|
-
|
|
5
|
-
import aiohttp
|
|
6
|
-
|
|
7
|
-
from benchmax.envs.base_env import BaseEnv
|
|
8
|
-
from benchmax.envs.types import ToolDefinition, StandardizedExample
|
|
9
|
-
|
|
10
|
-
SYSTEM_PROMPT = """Please use the search tool provided to find relevant information from the corpus.
|
|
11
|
-
Formulate effective search queries to retrieve the most relevant chunks.
|
|
12
|
-
You can filter by metadata or filename to narrow your search.
|
|
13
|
-
Write your complete answer on the final line only as a concise entity, within the xml tags <answer></answer>.\n
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def percent_of_text_a_in_text_b(text_a, text_b):
|
|
18
|
-
if not text_a:
|
|
19
|
-
return 0.0
|
|
20
|
-
|
|
21
|
-
matcher = SequenceMatcher(None, text_a, text_b)
|
|
22
|
-
matched_chars = sum(
|
|
23
|
-
size for _, _, size in matcher.get_matching_blocks()
|
|
24
|
-
)
|
|
25
|
-
return (matched_chars / len(text_a))
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
async def chunk_overlap_reward_function(
|
|
29
|
-
completion: str,
|
|
30
|
-
ground_truth: str,
|
|
31
|
-
**kwargs: Any
|
|
32
|
-
) -> float:
|
|
33
|
-
"""
|
|
34
|
-
Reward function that computes the percentage of overlapping text between
|
|
35
|
-
the completion and the ground truth.
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
completion: The model's generated text
|
|
39
|
-
ground_truth: The reference text to compare against
|
|
40
|
-
**kwargs: Additional arguments (not used here)
|
|
41
|
-
Returns:
|
|
42
|
-
float: A score between 0.0 and 1.0 representing the overlap percentage.
|
|
43
|
-
"""
|
|
44
|
-
reference_chunks = kwargs.get("reference_chunks", [])
|
|
45
|
-
reference_string = " ".join(reference_chunks)
|
|
46
|
-
completion_str = completion if isinstance(completion, str) else ""
|
|
47
|
-
if isinstance(completion, list):
|
|
48
|
-
completion_str = " ".join(
|
|
49
|
-
[c.get("content", "") for c in completion if isinstance(c, dict) and c.get("role", "") != "assistant"]
|
|
50
|
-
)
|
|
51
|
-
for msg in completion:
|
|
52
|
-
if not isinstance(msg, dict):
|
|
53
|
-
continue
|
|
54
|
-
if msg.get("role", "") != "assistant":
|
|
55
|
-
continue
|
|
56
|
-
msg_content = msg.get("content", "")
|
|
57
|
-
if msg_content.count("<tool_call>") >= 4:
|
|
58
|
-
return 0.0
|
|
59
|
-
|
|
60
|
-
if reference_string:
|
|
61
|
-
overlap_score = percent_of_text_a_in_text_b(reference_string, completion_str)
|
|
62
|
-
if overlap_score >= 0.25:
|
|
63
|
-
return overlap_score
|
|
64
|
-
return 0.0
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class SearchEnv(BaseEnv):
|
|
68
|
-
"""Search environment with BM25 corpus search tool."""
|
|
69
|
-
|
|
70
|
-
system_prompt: str = SYSTEM_PROMPT
|
|
71
|
-
|
|
72
|
-
def __init__(
|
|
73
|
-
self,
|
|
74
|
-
api_key: str,
|
|
75
|
-
corpus_id: str,
|
|
76
|
-
base_url: str,
|
|
77
|
-
**kwargs,
|
|
78
|
-
):
|
|
79
|
-
"""
|
|
80
|
-
Initialize the search environment.
|
|
81
|
-
|
|
82
|
-
Args:
|
|
83
|
-
api_key: API key for authentication (required)
|
|
84
|
-
corpus_id: ID of the corpus to search (required)
|
|
85
|
-
base_url: Base URL of the search API (required)
|
|
86
|
-
"""
|
|
87
|
-
if not api_key:
|
|
88
|
-
raise ValueError("api_key is required")
|
|
89
|
-
if not corpus_id:
|
|
90
|
-
raise ValueError("corpus_id is required")
|
|
91
|
-
|
|
92
|
-
self._api_key = api_key
|
|
93
|
-
self._corpus_id = corpus_id
|
|
94
|
-
self._base_url = base_url.rstrip("/")
|
|
95
|
-
|
|
96
|
-
search_tool_definition = ToolDefinition(
|
|
97
|
-
name="search_corpus",
|
|
98
|
-
description="Search the corpus using BM25 with optional metadata and filename filtering.",
|
|
99
|
-
input_schema={
|
|
100
|
-
"type": "object",
|
|
101
|
-
"properties": {
|
|
102
|
-
"query": {
|
|
103
|
-
"type": "string",
|
|
104
|
-
"description": "Search query string.",
|
|
105
|
-
},
|
|
106
|
-
"metadata": {
|
|
107
|
-
"type": "object",
|
|
108
|
-
"description": "Optional metadata filters (e.g., {'ticker': 'DDOG', 'year': 2024}).",
|
|
109
|
-
},
|
|
110
|
-
"filename": {
|
|
111
|
-
"type": "string",
|
|
112
|
-
"description": "Optional filename filter. Simple string for substring match (e.g., 'config') or regex pattern (e.g., '.*\\.json$').",
|
|
113
|
-
},
|
|
114
|
-
"limit": {
|
|
115
|
-
"type": "integer",
|
|
116
|
-
"description": "Max number of results to return (default 10).",
|
|
117
|
-
},
|
|
118
|
-
},
|
|
119
|
-
"required": ["query"],
|
|
120
|
-
},
|
|
121
|
-
)
|
|
122
|
-
|
|
123
|
-
self._tools: Dict[str, Tuple[ToolDefinition, Callable]] = {
|
|
124
|
-
search_tool_definition.name: (search_tool_definition, self._search_corpus_tool)
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
async def _search_corpus_tool(
|
|
128
|
-
self,
|
|
129
|
-
query: str,
|
|
130
|
-
metadata: Optional[Dict[str, Any]] = None,
|
|
131
|
-
filename: Optional[str] = None,
|
|
132
|
-
limit: int = 10,
|
|
133
|
-
**kwargs
|
|
134
|
-
) -> str:
|
|
135
|
-
"""
|
|
136
|
-
Search the corpus using BM25.
|
|
137
|
-
|
|
138
|
-
Args:
|
|
139
|
-
query: Search query string
|
|
140
|
-
metadata: Optional metadata filters
|
|
141
|
-
filename: Optional filename filter (substring or regex)
|
|
142
|
-
limit: Maximum number of results
|
|
143
|
-
|
|
144
|
-
Returns:
|
|
145
|
-
Formatted search results or error message
|
|
146
|
-
"""
|
|
147
|
-
if not query:
|
|
148
|
-
return "Error: Missing required parameter: 'query'"
|
|
149
|
-
|
|
150
|
-
# Build request body
|
|
151
|
-
request_body = {"query": query, "limit": limit}
|
|
152
|
-
if metadata:
|
|
153
|
-
request_body["metadata"] = metadata
|
|
154
|
-
if filename:
|
|
155
|
-
request_body["filename"] = filename
|
|
156
|
-
|
|
157
|
-
# Build URL
|
|
158
|
-
url = f"{self._base_url}/api/corpora/{self._corpus_id}/search"
|
|
159
|
-
headers = {
|
|
160
|
-
"x-api-key": self._api_key,
|
|
161
|
-
"Content-Type": "application/json",
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
try:
|
|
165
|
-
async with aiohttp.ClientSession() as session:
|
|
166
|
-
async with session.post(
|
|
167
|
-
url,
|
|
168
|
-
json=request_body,
|
|
169
|
-
headers=headers,
|
|
170
|
-
timeout=aiohttp.ClientTimeout(total=10.0),
|
|
171
|
-
) as resp:
|
|
172
|
-
if resp.status != 200:
|
|
173
|
-
error_text = await resp.text()
|
|
174
|
-
return f"Error: API request failed with status {resp.status}: {error_text}"
|
|
175
|
-
|
|
176
|
-
data = await resp.json()
|
|
177
|
-
|
|
178
|
-
results = data.get("results", [])
|
|
179
|
-
total = data.get("total", 0)
|
|
180
|
-
|
|
181
|
-
if not results:
|
|
182
|
-
return "No results found."
|
|
183
|
-
|
|
184
|
-
# Format results
|
|
185
|
-
lines = []
|
|
186
|
-
for i, item in enumerate(results, start=1):
|
|
187
|
-
filename_val = item.get("filename", "—")
|
|
188
|
-
score = item.get("score")
|
|
189
|
-
score_str = f"(score: {score:.2f})" if score is not None else "(filtered)"
|
|
190
|
-
content = item.get("content", "")
|
|
191
|
-
metadata_val = item.get("metadata", {})
|
|
192
|
-
|
|
193
|
-
lines.append(f"{i}. {filename_val} {score_str}")
|
|
194
|
-
lines.append(f" Content: {content}")
|
|
195
|
-
if metadata_val:
|
|
196
|
-
lines.append(f" Metadata: {metadata_val}")
|
|
197
|
-
|
|
198
|
-
lines.append(f"\nTotal: {total} results")
|
|
199
|
-
return "\n".join(lines)
|
|
200
|
-
|
|
201
|
-
except aiohttp.ClientError as e:
|
|
202
|
-
return f"Error: Network error: {str(e)}"
|
|
203
|
-
except Exception as e:
|
|
204
|
-
return f"Error: {str(e)}"
|
|
205
|
-
|
|
206
|
-
async def shutdown(self):
|
|
207
|
-
# no cleanup required
|
|
208
|
-
pass
|
|
209
|
-
|
|
210
|
-
@classmethod
|
|
211
|
-
def dataset_preprocess(cls, example: Any, **kwargs) -> StandardizedExample:
|
|
212
|
-
return StandardizedExample(
|
|
213
|
-
prompt=example.get("Question", ""),
|
|
214
|
-
ground_truth=example.get("Answer", None),
|
|
215
|
-
init_rollout_args={},
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
async def list_tools(self) -> List[ToolDefinition]:
|
|
219
|
-
"""List available tools."""
|
|
220
|
-
return [self._tools[k][0] for k in sorted(self._tools)]
|
|
221
|
-
|
|
222
|
-
async def run_tool(self, rollout_id: str, tool_name: str, **tool_args) -> Any:
|
|
223
|
-
"""
|
|
224
|
-
Execute a tool.
|
|
225
|
-
|
|
226
|
-
Args:
|
|
227
|
-
rollout_id: Identifier for current rollout (unused for stateless env)
|
|
228
|
-
tool_name: Name of the tool (e.g., "search_corpus")
|
|
229
|
-
**tool_args: Arguments for the tool function
|
|
230
|
-
|
|
231
|
-
Returns:
|
|
232
|
-
Tool execution result or error message
|
|
233
|
-
"""
|
|
234
|
-
_, tool_function = self._tools[tool_name]
|
|
235
|
-
return await tool_function(**tool_args)
|
|
236
|
-
|
|
237
|
-
async def init_rollout(self, rollout_id: str, **rollout_args) -> None:
|
|
238
|
-
"""Initialize rollout (no-op for stateless environment)."""
|
|
239
|
-
pass
|
|
240
|
-
|
|
241
|
-
async def release_rollout(self, rollout_id: str) -> None:
|
|
242
|
-
"""Release rollout (no-op for stateless environment)."""
|
|
243
|
-
pass
|
|
244
|
-
|
|
245
|
-
async def copy_to_workspace(
|
|
246
|
-
self, rollout_id: str, src_path: Path, dst_filename: Optional[str] = None
|
|
247
|
-
) -> None:
|
|
248
|
-
"""Not implemented for this environment."""
|
|
249
|
-
pass
|
|
250
|
-
|
|
251
|
-
async def copy_content_to_workspace(
|
|
252
|
-
self, rollout_id: str, src_content: str | bytes, dst_filename: str
|
|
253
|
-
) -> None:
|
|
254
|
-
"""Not implemented for this environment."""
|
|
255
|
-
pass
|
|
256
|
-
|
|
257
|
-
async def copy_from_workspace(
|
|
258
|
-
self, rollout_id: str, src_filename: str, dst_path: Path
|
|
259
|
-
) -> None:
|
|
260
|
-
"""Not implemented for this environment."""
|
|
261
|
-
pass
|
|
262
|
-
|
|
263
|
-
async def compute_reward(
|
|
264
|
-
self, rollout_id: str, completion: str, ground_truth: Any, **kwargs: Any
|
|
265
|
-
) -> Dict[str, float]:
|
|
266
|
-
"""Compute rewards using the chunk overlap reward function."""
|
|
267
|
-
return {
|
|
268
|
-
"chunk_overlap": await chunk_overlap_reward_function(completion, ground_truth, **kwargs)
|
|
269
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/adapters/skyrl/benchmax_data_process.py
RENAMED
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev7/src/benchmax/envs → benchmax-0.1.2.dev9/src/benchmax/bundle}/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/excel/workdir/excel_code_runner_mcp.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/example_workdir/demo_mcp_server.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/example_workdir/reward_fn.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/provisioners/base_provisioner.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/provisioners/local_provisioner.py
RENAMED
|
File without changes
|
{benchmax-0.1.2.dev7 → benchmax-0.1.2.dev9}/src/benchmax/envs/mcp/provisioners/manual_provisioner.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|