hyperplane-eval 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
adapters/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Infrastructure adapters for the evaluation framework."""
File without changes
@@ -0,0 +1,64 @@
1
+ import os
2
+ import json
3
+ import re
4
+ import asyncio
5
+ from typing import Any, Dict
6
+ from litellm import acompletion
7
+
8
+
9
+ class LLMClient:
10
+ """
11
+ A unified LLM client using LiteLLM to support multiple providers (OpenAI, Gemini, Anthropic, etc).
12
+ """
13
+
14
+ def __init__(self, model: str | None = None, **kwargs):
15
+ # LiteLLM accepts strings like "gpt-4o", "gemini/gemini-1.5-flash", "anthropic/claude-3-5-sonnet"
16
+ self.model = model or os.environ.get("LLM_MODEL", "gpt-4o")
17
+ self.llm_kwargs = kwargs
18
+ self._semaphore = asyncio.Semaphore(10)
19
+
20
+ def parse_json(self, response: str) -> Dict[str, Any]:
21
+ if not (text := (response or "").strip()):
22
+ return {}
23
+ candidates = [text]
24
+ if match := re.search(r"```(?:json)?\s*(.*?)\s*```", text, re.DOTALL):
25
+ candidates.insert(0, match.group(1))
26
+ start, end = text.find("{"), text.rfind("}")
27
+ if start != -1 and end != -1 and end > start:
28
+ candidates.extend([text[start : end + 1], text[start:]])
29
+ for cand in filter(None, candidates):
30
+ try:
31
+ return json.loads(cand, strict=False)
32
+ except Exception:
33
+ pass
34
+ return {}
35
+
36
+ async def generate(
37
+ self,
38
+ prompt: str,
39
+ response_schema: Dict[str, Any],
40
+ temperature: float,
41
+ ) -> str:
42
+ if response_schema:
43
+ prompt += f"\n\nYOU MUST RETURN A JSON OBJECT WITH THE EXACT FOLLOWING SCHEMA:\n{json.dumps(response_schema, indent=2)}"
44
+
45
+ kwargs = {
46
+ "model": self.model, # Force using the user-selected model
47
+ "messages": [{"role": "user", "content": prompt}],
48
+ "temperature": temperature,
49
+ **self.llm_kwargs,
50
+ }
51
+
52
+ if response_schema:
53
+ kwargs["response_format"] = {"type": "json_object"}
54
+
55
+ async with self._semaphore:
56
+ try:
57
+ response = await acompletion(**kwargs)
58
+ return response.choices[0].message.content
59
+ except Exception as e:
60
+ print(f"[LiteLLM] Error HTTP: {e}")
61
+ raise RuntimeError(f"LLM Server Error: {e}")
62
+
63
+ async def close(self) -> None:
64
+ pass
File without changes
@@ -0,0 +1,97 @@
1
+ import os
2
+ import json
3
+ import subprocess
4
+
5
+
6
+ async def execute_temp_runner(target_path: str, selected_func: dict, params: dict):
7
+ target_dir = os.path.dirname(os.path.abspath(target_path))
8
+ target_basename = os.path.basename(target_path)
9
+ module_name, ext = os.path.splitext(target_basename)
10
+ is_python = ext == ".py"
11
+ is_ts = ext == ".ts"
12
+
13
+ params_json_str = json.dumps(params)
14
+
15
+ if is_python:
16
+ python_script = f"""
17
+ import sys, json, asyncio, inspect, importlib
18
+ sys.path.insert(0, r"{target_dir}")
19
+ try:
20
+ target_func = getattr(importlib.import_module("{module_name}"), "{selected_func['name']}")
21
+ except Exception as e:
22
+ print("VERIFY_RUN_ERROR:Load fail: " + str(e))
23
+ sys.exit(1)
24
+
25
+ params = json.loads(sys.argv[1])
26
+ casted = {{}}
27
+ for name, param in inspect.signature(target_func).parameters.items():
28
+ if name in params: casted[name] = params[name]
29
+
30
+ try:
31
+ res = target_func(**casted)
32
+ if inspect.iscoroutine(res): res = asyncio.run(res)
33
+ print("VERIFY_RUN_SUCCESS:" + json.dumps(res))
34
+ except Exception as e:
35
+ import traceback
36
+ print("VERIFY_RUN_ERROR:" + str(e) + "\\n" + traceback.format_exc())
37
+ """
38
+ cmd = ["python3", "-c", python_script, params_json_str]
39
+ else:
40
+ params_array_str = json.dumps(selected_func["params"])
41
+ # Handle both ES modules and CommonJS
42
+ # For inline evaluation, we'll try to import dynamically
43
+ ts_script = f"""
44
+ async function main() {{
45
+ try {{
46
+ const moduleName = './{module_name}{ext}';
47
+ let mod;
48
+ try {{
49
+ mod = await import(moduleName);
50
+ }} catch(e) {{
51
+ mod = require(moduleName);
52
+ }}
53
+ const func = mod.{selected_func['name']};
54
+ if (!func) throw new Error("Function {selected_func['name']} not found in module.");
55
+
56
+ const params = JSON.parse(process.argv[1]);
57
+ const funcParams = {params_array_str};
58
+ const args = funcParams.map(p => params[p.name]);
59
+
60
+ let res = func(...args);
61
+ if (res instanceof Promise) res = await res;
62
+ console.log("VERIFY_RUN_SUCCESS:" + JSON.stringify(res));
63
+ }} catch (err) {{
64
+ console.log("VERIFY_RUN_ERROR:" + (err.stack || err.message));
65
+ }}
66
+ }}
67
+ main();
68
+ """
69
+ if is_ts:
70
+ cmd = ["npx", "-y", "tsx", "--eval", ts_script, params_json_str]
71
+ else:
72
+ cmd = ["node", "-e", ts_script, params_json_str]
73
+
74
+ res = subprocess.run(cmd, cwd=target_dir, capture_output=True, text=True)
75
+ output = res.stdout
76
+ stderr = res.stderr
77
+
78
+ success_val = ""
79
+ error_val = ""
80
+ for line in output.splitlines():
81
+ if line.startswith("VERIFY_RUN_SUCCESS:"):
82
+ success_val = line[19:]
83
+ elif line.startswith("VERIFY_RUN_ERROR:"):
84
+ error_val = line[17:]
85
+
86
+ if not success_val and not error_val:
87
+ if stderr:
88
+ error_val = f"System Error (stderr): {stderr.strip()}"
89
+ else:
90
+ error_val = f"Empty execution result. Output: {output.strip()}"
91
+
92
+ return {
93
+ "successVal": success_val,
94
+ "errorVal": error_val,
95
+ "output": output,
96
+ "stderr": stderr,
97
+ }
@@ -0,0 +1,124 @@
1
+ import json
2
+ import subprocess
3
+ import ast
4
+
5
+
6
+ def extract_python_functions(filepath: str) -> list[dict]:
7
+ funcs = []
8
+ try:
9
+ with open(filepath, "r", encoding="utf-8") as f:
10
+ src = f.read()
11
+ tree = ast.parse(src)
12
+ lines = src.splitlines()
13
+
14
+ def get_ann(n):
15
+ if not n:
16
+ return "any"
17
+ if isinstance(n, ast.Name):
18
+ return n.id
19
+ if isinstance(n, ast.Subscript):
20
+ s = n.slice
21
+ if hasattr(s, "value"):
22
+ s = s.value
23
+ return f"{get_ann(n.value)}[{get_ann(s)}]"
24
+ if isinstance(n, ast.Attribute):
25
+ return f"{get_ann(n.value)}.{n.attr}"
26
+ if isinstance(n, ast.Tuple):
27
+ return ", ".join(get_ann(e) for e in n.elts)
28
+ if isinstance(n, ast.BinOp) and isinstance(n.op, ast.BitOr):
29
+ return f"{get_ann(n.left)} | {get_ann(n.right)}"
30
+ return "any"
31
+
32
+ for node in ast.walk(tree):
33
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
34
+ if node.name == "__init__":
35
+ continue
36
+ start = min(
37
+ (d.lineno for d in node.decorator_list), default=node.lineno
38
+ )
39
+ end = getattr(node, "end_lineno", len(lines))
40
+ code = "\n".join(lines[max(0, start - 6) : end])
41
+ params = [
42
+ {"name": a.arg, "type": get_ann(a.annotation)}
43
+ for a in node.args.args
44
+ if a.arg not in ("self", "cls")
45
+ ]
46
+ funcs.append(
47
+ {"name": node.name, "params": params, "code": code, "line": start}
48
+ )
49
+
50
+ funcs.sort(key=lambda x: x["line"])
51
+ except Exception:
52
+ pass
53
+ return funcs
54
+
55
+
56
+ def extract_ts_functions(filepath: str) -> list[dict]:
57
+ ts_script = """
58
+ const fs = require('fs');
59
+ const ts = require('typescript');
60
+ const content = fs.readFileSync(process.argv[1], 'utf-8');
61
+ const sourceFile = ts.createSourceFile(process.argv[1], content, ts.ScriptTarget.Latest, true);
62
+ const functions = [];
63
+ const lines = content.split(/\\r?\\n/);
64
+
65
+ function getCode(node) {
66
+ const startPos = sourceFile.getLineAndCharacterOfPosition(node.getStart());
67
+ const endPos = sourceFile.getLineAndCharacterOfPosition(node.getEnd());
68
+ const startLine = Math.max(0, startPos.line - 5);
69
+ return lines.slice(startLine, endPos.line + 1).join('\\n');
70
+ }
71
+
72
+ function getParams(paramsNode) {
73
+ return paramsNode.map(p => {
74
+ let type = 'any';
75
+ if (p.type) type = p.type.getText(sourceFile);
76
+ return { name: p.name.getText(sourceFile), type: type };
77
+ });
78
+ }
79
+
80
+ function visit(node) {
81
+ if (ts.isFunctionDeclaration(node) && node.name) {
82
+ functions.push({
83
+ name: node.name.text,
84
+ params: getParams(node.parameters),
85
+ code: getCode(node)
86
+ });
87
+ } else if (ts.isVariableStatement(node)) {
88
+ for (const decl of node.declarationList.declarations) {
89
+ if (decl.initializer && (ts.isArrowFunction(decl.initializer) || ts.isFunctionExpression(decl.initializer))) {
90
+ if (ts.isIdentifier(decl.name)) {
91
+ functions.push({
92
+ name: decl.name.text,
93
+ params: getParams(decl.initializer.parameters),
94
+ code: getCode(node)
95
+ });
96
+ }
97
+ }
98
+ }
99
+ }
100
+ ts.forEachChild(node, visit);
101
+ }
102
+
103
+ visit(sourceFile);
104
+ console.log(JSON.stringify(functions));
105
+ """
106
+ try:
107
+ # Use npx -y to ensure typescript is available without prompts
108
+ res = subprocess.run(
109
+ ["npx", "-y", "-p", "typescript", "node", "-e", ts_script, filepath],
110
+ capture_output=True,
111
+ text=True,
112
+ )
113
+ if res.returncode == 0:
114
+ return json.loads(res.stdout)
115
+ except Exception:
116
+ pass
117
+ return []
118
+
119
+
120
+ def extract_functions(filepath: str) -> list[dict]:
121
+ if filepath.endswith(".py"):
122
+ return extract_python_functions(filepath)
123
+ else:
124
+ return extract_ts_functions(filepath)
File without changes
@@ -0,0 +1,81 @@
1
+ import json
2
+ from typing import Dict, List, Callable
3
+
4
+
5
+ class AgentRunner:
6
+ """
7
+ Interfaces with the target AI agent to collect performance data locally via direct execution.
8
+ """
9
+
10
+ def __init__(
11
+ self,
12
+ executor_func: Callable = None,
13
+ target_path: str = "",
14
+ selected_func: dict = None,
15
+ ):
16
+ self.executor_func = executor_func
17
+ self.target_path = target_path
18
+ self.selected_func = selected_func
19
+
20
+ async def _call_target_agent(self, messages: List[Dict[str, str]]) -> str:
21
+ """Dispatches a multi-turn request to the agent under evaluation."""
22
+ if not messages:
23
+ return ""
24
+
25
+ prompt = messages[-1]["content"]
26
+
27
+ if self.executor_func:
28
+ # Local Execution
29
+ params = {}
30
+ if isinstance(prompt, str):
31
+ try:
32
+ import re
33
+
34
+ clean_str = prompt.strip()
35
+ first_brace = clean_str.find("{")
36
+ last_brace = clean_str.rfind("}")
37
+ if (
38
+ first_brace != -1
39
+ and last_brace != -1
40
+ and last_brace > first_brace
41
+ ):
42
+ clean_str = clean_str[first_brace : last_brace + 1]
43
+ clean_str = re.sub(r"[\x00-\x1F]", "", clean_str)
44
+ params = json.loads(clean_str)
45
+ except Exception:
46
+ fn_params = (
47
+ self.selected_func.get("params", [])
48
+ if self.selected_func
49
+ else []
50
+ )
51
+ if (
52
+ fn_params
53
+ and len(fn_params) == 1
54
+ and fn_params[0].get("type") == "str"
55
+ ):
56
+ params = {fn_params[0]["name"]: prompt}
57
+ else:
58
+ return "Error: Failed to parse parameters from test agent"
59
+ elif isinstance(prompt, dict):
60
+ params = prompt
61
+
62
+ try:
63
+ result = await self.executor_func(
64
+ self.target_path, self.selected_func, params
65
+ )
66
+ if result.get("successVal"):
67
+ try:
68
+ return json.loads(result["successVal"])
69
+ except json.JSONDecodeError:
70
+ return result["successVal"]
71
+ elif result.get("errorVal"):
72
+ return f"Error: {result['errorVal']}"
73
+ return "Unknown execution state"
74
+ except Exception as e:
75
+ return f"Error: {str(e)}"
76
+ else:
77
+ return ""
78
+
79
+ async def close(self):
80
+ """No-op close method to satisfy framework expectation."""
81
+ pass
cli/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # cli package