skill-runner 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_runner-0.1.0/PKG-INFO +6 -0
- skill_runner-0.1.0/pyproject.toml +18 -0
- skill_runner-0.1.0/setup.cfg +4 -0
- skill_runner-0.1.0/src/skill_runner/__init__.py +3 -0
- skill_runner-0.1.0/src/skill_runner/__main__.py +86 -0
- skill_runner-0.1.0/src/skill_runner/api.py +32 -0
- skill_runner-0.1.0/src/skill_runner/loop.py +63 -0
- skill_runner-0.1.0/src/skill_runner/skill.py +62 -0
- skill_runner-0.1.0/src/skill_runner/tools.py +309 -0
- skill_runner-0.1.0/src/skill_runner.egg-info/PKG-INFO +6 -0
- skill_runner-0.1.0/src/skill_runner.egg-info/SOURCES.txt +16 -0
- skill_runner-0.1.0/src/skill_runner.egg-info/dependency_links.txt +1 -0
- skill_runner-0.1.0/src/skill_runner.egg-info/entry_points.txt +2 -0
- skill_runner-0.1.0/src/skill_runner.egg-info/requires.txt +1 -0
- skill_runner-0.1.0/src/skill_runner.egg-info/top_level.txt +1 -0
- skill_runner-0.1.0/tests/test_loop.py +110 -0
- skill_runner-0.1.0/tests/test_skill.py +62 -0
- skill_runner-0.1.0/tests/test_tools.py +163 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "skill-runner"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Minimal headless agent that executes skill.md files via OpenAI-compatible Chat Completion API"
|
|
9
|
+
requires-python = ">=3.10"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"openai>=1.0.0",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[project.scripts]
|
|
15
|
+
skill-runner = "skill_runner.__main__:main"
|
|
16
|
+
|
|
17
|
+
[tool.setuptools.packages.find]
|
|
18
|
+
where = ["src"]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""CLI entry point: python -m skill_runner"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from .api import Config, create_client
|
|
9
|
+
from .loop import run
|
|
10
|
+
from .skill import load_skill
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main() -> None:
|
|
14
|
+
parser = argparse.ArgumentParser(
|
|
15
|
+
prog="skill-runner",
|
|
16
|
+
description="Headless agent that executes skill.md files via Chat Completion API",
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"--skill", required=True, help="Path to a skill.md file"
|
|
20
|
+
)
|
|
21
|
+
parser.add_argument(
|
|
22
|
+
"--input",
|
|
23
|
+
default=None,
|
|
24
|
+
help="Path to input file (reads stdin if omitted)",
|
|
25
|
+
)
|
|
26
|
+
parser.add_argument(
|
|
27
|
+
"--output",
|
|
28
|
+
default=None,
|
|
29
|
+
help="Path to output file (prints to stdout if omitted)",
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--api-base",
|
|
33
|
+
default=None,
|
|
34
|
+
help="API base URL (overrides SKILL_RUNNER_BASE_URL)",
|
|
35
|
+
)
|
|
36
|
+
parser.add_argument(
|
|
37
|
+
"--model",
|
|
38
|
+
default=None,
|
|
39
|
+
help="Model name (overrides SKILL_RUNNER_MODEL)",
|
|
40
|
+
)
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"--max-turns",
|
|
43
|
+
type=int,
|
|
44
|
+
default=None,
|
|
45
|
+
help="Max agent loop turns (overrides skill front matter)",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
args = parser.parse_args()
|
|
49
|
+
|
|
50
|
+
# Load skill
|
|
51
|
+
skill = load_skill(args.skill)
|
|
52
|
+
if args.max_turns is not None:
|
|
53
|
+
skill.max_turns = args.max_turns
|
|
54
|
+
|
|
55
|
+
# Build config from env, then override with CLI args
|
|
56
|
+
config = Config.from_env()
|
|
57
|
+
if args.api_base:
|
|
58
|
+
config.base_url = args.api_base
|
|
59
|
+
if args.model:
|
|
60
|
+
config.model = args.model
|
|
61
|
+
|
|
62
|
+
# Read user input
|
|
63
|
+
if args.input:
|
|
64
|
+
with open(args.input) as f:
|
|
65
|
+
user_input = f.read()
|
|
66
|
+
elif not sys.stdin.isatty():
|
|
67
|
+
user_input = sys.stdin.read()
|
|
68
|
+
else:
|
|
69
|
+
print("Error: No input provided. Use --input FILE or pipe via stdin.", file=sys.stderr)
|
|
70
|
+
sys.exit(1)
|
|
71
|
+
|
|
72
|
+
# Run agent loop
|
|
73
|
+
client = create_client(config)
|
|
74
|
+
result = run(skill=skill, user_input=user_input, client=client, model=config.model)
|
|
75
|
+
|
|
76
|
+
# Output result
|
|
77
|
+
if args.output:
|
|
78
|
+
with open(args.output, "w") as f:
|
|
79
|
+
f.write(result)
|
|
80
|
+
print(f"Result written to {args.output}", file=sys.stderr)
|
|
81
|
+
else:
|
|
82
|
+
print(result)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
if __name__ == "__main__":
|
|
86
|
+
main()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""OpenAI Chat Completion API wrapper."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from openai import OpenAI
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class Config:
|
|
13
|
+
base_url: str | None = None
|
|
14
|
+
api_key: str | None = None
|
|
15
|
+
model: str = "gpt-4o"
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def from_env(cls) -> Config:
|
|
19
|
+
return cls(
|
|
20
|
+
base_url=os.environ.get("SKILL_RUNNER_BASE_URL"),
|
|
21
|
+
api_key=os.environ.get("SKILL_RUNNER_API_KEY"),
|
|
22
|
+
model=os.environ.get("SKILL_RUNNER_MODEL", "gpt-4o"),
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def create_client(config: Config) -> OpenAI:
|
|
27
|
+
kwargs: dict = {}
|
|
28
|
+
if config.base_url:
|
|
29
|
+
kwargs["base_url"] = config.base_url
|
|
30
|
+
if config.api_key:
|
|
31
|
+
kwargs["api_key"] = config.api_key
|
|
32
|
+
return OpenAI(**kwargs)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Agent loop — the core of skill-runner.
|
|
2
|
+
|
|
3
|
+
Calls Chat Completion API, parses tool_calls, executes tools, feeds results
|
|
4
|
+
back, and repeats until a text-only response or max_turns is reached.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import sys
|
|
10
|
+
|
|
11
|
+
from openai import OpenAI
|
|
12
|
+
|
|
13
|
+
from .skill import Skill
|
|
14
|
+
from .tools import execute_tool, get_tool_schemas
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def run(
|
|
18
|
+
skill: Skill,
|
|
19
|
+
user_input: str,
|
|
20
|
+
client: OpenAI,
|
|
21
|
+
model: str,
|
|
22
|
+
) -> str:
|
|
23
|
+
"""Execute the agent loop and return the final text response."""
|
|
24
|
+
tools = get_tool_schemas(skill.allowed_tools)
|
|
25
|
+
messages: list[dict] = [
|
|
26
|
+
{"role": "system", "content": skill.system_prompt},
|
|
27
|
+
{"role": "user", "content": user_input},
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
for turn in range(skill.max_turns):
|
|
31
|
+
kwargs: dict = {"model": model, "messages": messages}
|
|
32
|
+
if tools:
|
|
33
|
+
kwargs["tools"] = tools
|
|
34
|
+
|
|
35
|
+
response = client.chat.completions.create(**kwargs)
|
|
36
|
+
choice = response.choices[0]
|
|
37
|
+
message = choice.message
|
|
38
|
+
|
|
39
|
+
# Text-only response — done
|
|
40
|
+
if not message.tool_calls:
|
|
41
|
+
return message.content or ""
|
|
42
|
+
|
|
43
|
+
# Tool calls — execute and continue
|
|
44
|
+
# Append assistant message (with tool_calls)
|
|
45
|
+
messages.append(message.model_dump())
|
|
46
|
+
|
|
47
|
+
for call in message.tool_calls:
|
|
48
|
+
print(
|
|
49
|
+
f" [turn {turn + 1}] {call.function.name}({call.function.arguments[:80]}...)"
|
|
50
|
+
if len(call.function.arguments) > 80
|
|
51
|
+
else f" [turn {turn + 1}] {call.function.name}({call.function.arguments})",
|
|
52
|
+
file=sys.stderr,
|
|
53
|
+
)
|
|
54
|
+
result = execute_tool(call.function.name, call.function.arguments)
|
|
55
|
+
messages.append(
|
|
56
|
+
{
|
|
57
|
+
"role": "tool",
|
|
58
|
+
"tool_call_id": call.id,
|
|
59
|
+
"content": result,
|
|
60
|
+
}
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
return "[max_turns reached]"
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""skill.md parser — loads markdown with optional YAML front matter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
DEFAULT_MAX_TURNS = 50
|
|
9
|
+
ALL_TOOLS = ["file_read", "file_write", "file_patch", "bash", "grep", "glob"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Skill:
|
|
14
|
+
system_prompt: str
|
|
15
|
+
allowed_tools: list[str] = field(default_factory=lambda: list(ALL_TOOLS))
|
|
16
|
+
max_turns: int = DEFAULT_MAX_TURNS
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _parse_front_matter(text: str) -> tuple[dict, str]:
|
|
20
|
+
"""Extract YAML front matter (between --- delimiters) and body.
|
|
21
|
+
|
|
22
|
+
Uses simple regex parsing to avoid PyYAML dependency.
|
|
23
|
+
Only supports `tools` (list) and `max_turns` (int).
|
|
24
|
+
"""
|
|
25
|
+
m = re.match(r"\A---\s*\n(.*?)\n---\s*\n(.*)", text, re.DOTALL)
|
|
26
|
+
if not m:
|
|
27
|
+
return {}, text
|
|
28
|
+
|
|
29
|
+
raw, body = m.group(1), m.group(2)
|
|
30
|
+
meta: dict = {}
|
|
31
|
+
|
|
32
|
+
# Parse tools: [file_read, bash]
|
|
33
|
+
tools_match = re.search(r"tools:\s*\[([^\]]*)\]", raw)
|
|
34
|
+
if tools_match:
|
|
35
|
+
meta["tools"] = [t.strip() for t in tools_match.group(1).split(",") if t.strip()]
|
|
36
|
+
|
|
37
|
+
# Parse max_turns: 20
|
|
38
|
+
turns_match = re.search(r"max_turns:\s*(\d+)", raw)
|
|
39
|
+
if turns_match:
|
|
40
|
+
meta["max_turns"] = int(turns_match.group(1))
|
|
41
|
+
|
|
42
|
+
return meta, body
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def load_skill(path: str) -> Skill:
|
|
46
|
+
"""Load a skill.md file and return a Skill instance."""
|
|
47
|
+
with open(path) as f:
|
|
48
|
+
text = f.read()
|
|
49
|
+
|
|
50
|
+
meta, body = _parse_front_matter(text)
|
|
51
|
+
|
|
52
|
+
allowed_tools = meta.get("tools", list(ALL_TOOLS))
|
|
53
|
+
# Validate tool names
|
|
54
|
+
for t in allowed_tools:
|
|
55
|
+
if t not in ALL_TOOLS:
|
|
56
|
+
raise ValueError(f"Unknown tool in front matter: {t}")
|
|
57
|
+
|
|
58
|
+
return Skill(
|
|
59
|
+
system_prompt=body.strip(),
|
|
60
|
+
allowed_tools=allowed_tools,
|
|
61
|
+
max_turns=meta.get("max_turns", DEFAULT_MAX_TURNS),
|
|
62
|
+
)
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
"""Tool definitions and execution for the agent loop.
|
|
2
|
+
|
|
3
|
+
Six tools: file_read, file_write, file_patch, bash, grep, glob.
|
|
4
|
+
Each tool is a plain function that returns a string result.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import fnmatch
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import re
|
|
13
|
+
import subprocess
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
# ---------- blocked commands for bash tool ----------
|
|
17
|
+
|
|
18
|
+
BLOCKED_COMMANDS = [
|
|
19
|
+
"rm -rf /",
|
|
20
|
+
"rm -rf /*",
|
|
21
|
+
"git push --force",
|
|
22
|
+
"git push -f",
|
|
23
|
+
"mkfs.",
|
|
24
|
+
"dd if=/dev/zero",
|
|
25
|
+
":(){ :|:& };:",
|
|
26
|
+
"> /dev/sda",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
# ---------- tool implementations ----------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def file_read(path: str, offset: int = 0, limit: int = 2000) -> str:
|
|
33
|
+
"""Read a file and return its contents with line numbers."""
|
|
34
|
+
try:
|
|
35
|
+
with open(path) as f:
|
|
36
|
+
lines = f.readlines()
|
|
37
|
+
selected = lines[offset : offset + limit]
|
|
38
|
+
numbered = [f"{offset + i + 1}\t{line}" for i, line in enumerate(selected)]
|
|
39
|
+
return "".join(numbered)
|
|
40
|
+
except Exception as e:
|
|
41
|
+
return f"[ERROR] {e}"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def file_write(path: str, content: str) -> str:
|
|
45
|
+
"""Write content to a file, creating parent directories if needed."""
|
|
46
|
+
try:
|
|
47
|
+
Path(path).parent.mkdir(parents=True, exist_ok=True)
|
|
48
|
+
with open(path, "w") as f:
|
|
49
|
+
f.write(content)
|
|
50
|
+
return f"[OK] Wrote {len(content)} bytes to {path}"
|
|
51
|
+
except Exception as e:
|
|
52
|
+
return f"[ERROR] {e}"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def file_patch(path: str, old_string: str, new_string: str) -> str:
|
|
56
|
+
"""Replace old_string with new_string in a file."""
|
|
57
|
+
try:
|
|
58
|
+
with open(path) as f:
|
|
59
|
+
content = f.read()
|
|
60
|
+
if old_string not in content:
|
|
61
|
+
return f"[ERROR] old_string not found in {path}"
|
|
62
|
+
count = content.count(old_string)
|
|
63
|
+
if count > 1:
|
|
64
|
+
return f"[ERROR] old_string found {count} times in {path} (must be unique)"
|
|
65
|
+
patched = content.replace(old_string, new_string, 1)
|
|
66
|
+
with open(path, "w") as f:
|
|
67
|
+
f.write(patched)
|
|
68
|
+
return f"[OK] Patched {path}"
|
|
69
|
+
except Exception as e:
|
|
70
|
+
return f"[ERROR] {e}"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def bash(command: str, timeout: int = 30) -> str:
|
|
74
|
+
"""Execute a shell command with timeout and safety checks."""
|
|
75
|
+
for blocked in BLOCKED_COMMANDS:
|
|
76
|
+
if blocked in command:
|
|
77
|
+
return f"[BLOCKED] Dangerous command detected: {blocked}"
|
|
78
|
+
try:
|
|
79
|
+
result = subprocess.run(
|
|
80
|
+
command,
|
|
81
|
+
shell=True,
|
|
82
|
+
capture_output=True,
|
|
83
|
+
text=True,
|
|
84
|
+
timeout=timeout,
|
|
85
|
+
)
|
|
86
|
+
output = result.stdout + result.stderr
|
|
87
|
+
if len(output) > 50000:
|
|
88
|
+
output = output[:50000] + "\n[TRUNCATED]"
|
|
89
|
+
return output if output else "[OK] (no output)"
|
|
90
|
+
except subprocess.TimeoutExpired:
|
|
91
|
+
return f"[ERROR] Command timed out after {timeout}s"
|
|
92
|
+
except Exception as e:
|
|
93
|
+
return f"[ERROR] {e}"
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def grep(pattern: str, path: str = ".", include: str = "") -> str:
|
|
97
|
+
"""Search file contents using regex pattern."""
|
|
98
|
+
try:
|
|
99
|
+
results = []
|
|
100
|
+
search_path = Path(path)
|
|
101
|
+
|
|
102
|
+
if search_path.is_file():
|
|
103
|
+
files = [search_path]
|
|
104
|
+
else:
|
|
105
|
+
glob_pattern = include if include else "**/*"
|
|
106
|
+
files = sorted(search_path.glob(glob_pattern))
|
|
107
|
+
|
|
108
|
+
regex = re.compile(pattern)
|
|
109
|
+
for fpath in files:
|
|
110
|
+
if not fpath.is_file():
|
|
111
|
+
continue
|
|
112
|
+
try:
|
|
113
|
+
with open(fpath) as f:
|
|
114
|
+
for lineno, line in enumerate(f, 1):
|
|
115
|
+
if regex.search(line):
|
|
116
|
+
results.append(f"{fpath}:{lineno}:{line.rstrip()}")
|
|
117
|
+
except (UnicodeDecodeError, PermissionError):
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
if not results:
|
|
121
|
+
return "[NO MATCHES]"
|
|
122
|
+
output = "\n".join(results)
|
|
123
|
+
if len(output) > 50000:
|
|
124
|
+
output = output[:50000] + "\n[TRUNCATED]"
|
|
125
|
+
return output
|
|
126
|
+
except Exception as e:
|
|
127
|
+
return f"[ERROR] {e}"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def glob_search(pattern: str, path: str = ".") -> str:
|
|
131
|
+
"""Find files matching a glob pattern."""
|
|
132
|
+
try:
|
|
133
|
+
search_path = Path(path)
|
|
134
|
+
matches = sorted(str(p) for p in search_path.glob(pattern) if p.is_file())
|
|
135
|
+
if not matches:
|
|
136
|
+
return "[NO MATCHES]"
|
|
137
|
+
return "\n".join(matches)
|
|
138
|
+
except Exception as e:
|
|
139
|
+
return f"[ERROR] {e}"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ---------- tool registry ----------
|
|
143
|
+
|
|
144
|
+
TOOL_FUNCTIONS = {
|
|
145
|
+
"file_read": file_read,
|
|
146
|
+
"file_write": file_write,
|
|
147
|
+
"file_patch": file_patch,
|
|
148
|
+
"bash": bash,
|
|
149
|
+
"grep": grep,
|
|
150
|
+
"glob": glob_search,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
TOOL_SCHEMAS = [
|
|
154
|
+
{
|
|
155
|
+
"type": "function",
|
|
156
|
+
"function": {
|
|
157
|
+
"name": "file_read",
|
|
158
|
+
"description": "Read a file and return its contents with line numbers.",
|
|
159
|
+
"parameters": {
|
|
160
|
+
"type": "object",
|
|
161
|
+
"properties": {
|
|
162
|
+
"path": {"type": "string", "description": "Path to the file"},
|
|
163
|
+
"offset": {
|
|
164
|
+
"type": "integer",
|
|
165
|
+
"description": "Line offset to start reading from (0-based)",
|
|
166
|
+
"default": 0,
|
|
167
|
+
},
|
|
168
|
+
"limit": {
|
|
169
|
+
"type": "integer",
|
|
170
|
+
"description": "Maximum number of lines to read",
|
|
171
|
+
"default": 2000,
|
|
172
|
+
},
|
|
173
|
+
},
|
|
174
|
+
"required": ["path"],
|
|
175
|
+
},
|
|
176
|
+
},
|
|
177
|
+
},
|
|
178
|
+
{
|
|
179
|
+
"type": "function",
|
|
180
|
+
"function": {
|
|
181
|
+
"name": "file_write",
|
|
182
|
+
"description": "Write content to a file, creating parent directories if needed.",
|
|
183
|
+
"parameters": {
|
|
184
|
+
"type": "object",
|
|
185
|
+
"properties": {
|
|
186
|
+
"path": {"type": "string", "description": "Path to the file"},
|
|
187
|
+
"content": {
|
|
188
|
+
"type": "string",
|
|
189
|
+
"description": "Content to write",
|
|
190
|
+
},
|
|
191
|
+
},
|
|
192
|
+
"required": ["path", "content"],
|
|
193
|
+
},
|
|
194
|
+
},
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
"type": "function",
|
|
198
|
+
"function": {
|
|
199
|
+
"name": "file_patch",
|
|
200
|
+
"description": "Replace old_string with new_string in a file. The old_string must appear exactly once.",
|
|
201
|
+
"parameters": {
|
|
202
|
+
"type": "object",
|
|
203
|
+
"properties": {
|
|
204
|
+
"path": {"type": "string", "description": "Path to the file"},
|
|
205
|
+
"old_string": {
|
|
206
|
+
"type": "string",
|
|
207
|
+
"description": "The exact string to find",
|
|
208
|
+
},
|
|
209
|
+
"new_string": {
|
|
210
|
+
"type": "string",
|
|
211
|
+
"description": "The replacement string",
|
|
212
|
+
},
|
|
213
|
+
},
|
|
214
|
+
"required": ["path", "old_string", "new_string"],
|
|
215
|
+
},
|
|
216
|
+
},
|
|
217
|
+
},
|
|
218
|
+
{
|
|
219
|
+
"type": "function",
|
|
220
|
+
"function": {
|
|
221
|
+
"name": "bash",
|
|
222
|
+
"description": "Execute a shell command and return stdout+stderr. Dangerous commands are blocked.",
|
|
223
|
+
"parameters": {
|
|
224
|
+
"type": "object",
|
|
225
|
+
"properties": {
|
|
226
|
+
"command": {
|
|
227
|
+
"type": "string",
|
|
228
|
+
"description": "The shell command to execute",
|
|
229
|
+
},
|
|
230
|
+
"timeout": {
|
|
231
|
+
"type": "integer",
|
|
232
|
+
"description": "Timeout in seconds",
|
|
233
|
+
"default": 30,
|
|
234
|
+
},
|
|
235
|
+
},
|
|
236
|
+
"required": ["command"],
|
|
237
|
+
},
|
|
238
|
+
},
|
|
239
|
+
},
|
|
240
|
+
{
|
|
241
|
+
"type": "function",
|
|
242
|
+
"function": {
|
|
243
|
+
"name": "grep",
|
|
244
|
+
"description": "Search file contents using a regex pattern.",
|
|
245
|
+
"parameters": {
|
|
246
|
+
"type": "object",
|
|
247
|
+
"properties": {
|
|
248
|
+
"pattern": {
|
|
249
|
+
"type": "string",
|
|
250
|
+
"description": "Regex pattern to search for",
|
|
251
|
+
},
|
|
252
|
+
"path": {
|
|
253
|
+
"type": "string",
|
|
254
|
+
"description": "File or directory to search in",
|
|
255
|
+
"default": ".",
|
|
256
|
+
},
|
|
257
|
+
"include": {
|
|
258
|
+
"type": "string",
|
|
259
|
+
"description": "Glob pattern to filter files (e.g. '**/*.py')",
|
|
260
|
+
"default": "",
|
|
261
|
+
},
|
|
262
|
+
},
|
|
263
|
+
"required": ["pattern"],
|
|
264
|
+
},
|
|
265
|
+
},
|
|
266
|
+
},
|
|
267
|
+
{
|
|
268
|
+
"type": "function",
|
|
269
|
+
"function": {
|
|
270
|
+
"name": "glob",
|
|
271
|
+
"description": "Find files matching a glob pattern.",
|
|
272
|
+
"parameters": {
|
|
273
|
+
"type": "object",
|
|
274
|
+
"properties": {
|
|
275
|
+
"pattern": {
|
|
276
|
+
"type": "string",
|
|
277
|
+
"description": "Glob pattern (e.g. '**/*.py')",
|
|
278
|
+
},
|
|
279
|
+
"path": {
|
|
280
|
+
"type": "string",
|
|
281
|
+
"description": "Base directory to search from",
|
|
282
|
+
"default": ".",
|
|
283
|
+
},
|
|
284
|
+
},
|
|
285
|
+
"required": ["pattern"],
|
|
286
|
+
},
|
|
287
|
+
},
|
|
288
|
+
},
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def get_tool_schemas(allowed_tools: list[str]) -> list[dict]:
|
|
293
|
+
"""Return OpenAI function calling schemas for allowed tools."""
|
|
294
|
+
return [s for s in TOOL_SCHEMAS if s["function"]["name"] in allowed_tools]
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def execute_tool(name: str, arguments: str) -> str:
|
|
298
|
+
"""Execute a tool by name with JSON arguments string."""
|
|
299
|
+
func = TOOL_FUNCTIONS.get(name)
|
|
300
|
+
if not func:
|
|
301
|
+
return f"[ERROR] Unknown tool: {name}"
|
|
302
|
+
try:
|
|
303
|
+
args = json.loads(arguments)
|
|
304
|
+
except json.JSONDecodeError as e:
|
|
305
|
+
return f"[ERROR] Invalid JSON arguments: {e}"
|
|
306
|
+
try:
|
|
307
|
+
return func(**args)
|
|
308
|
+
except TypeError as e:
|
|
309
|
+
return f"[ERROR] Invalid arguments for {name}: {e}"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
src/skill_runner/__init__.py
|
|
3
|
+
src/skill_runner/__main__.py
|
|
4
|
+
src/skill_runner/api.py
|
|
5
|
+
src/skill_runner/loop.py
|
|
6
|
+
src/skill_runner/skill.py
|
|
7
|
+
src/skill_runner/tools.py
|
|
8
|
+
src/skill_runner.egg-info/PKG-INFO
|
|
9
|
+
src/skill_runner.egg-info/SOURCES.txt
|
|
10
|
+
src/skill_runner.egg-info/dependency_links.txt
|
|
11
|
+
src/skill_runner.egg-info/entry_points.txt
|
|
12
|
+
src/skill_runner.egg-info/requires.txt
|
|
13
|
+
src/skill_runner.egg-info/top_level.txt
|
|
14
|
+
tests/test_loop.py
|
|
15
|
+
tests/test_skill.py
|
|
16
|
+
tests/test_tools.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
openai>=1.0.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
skill_runner
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Tests for the agent loop using mock API responses."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from unittest.mock import MagicMock, patch
|
|
6
|
+
|
|
7
|
+
from skill_runner.loop import run
|
|
8
|
+
from skill_runner.skill import Skill
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _make_text_response(content: str):
|
|
12
|
+
"""Create a mock API response with text content only."""
|
|
13
|
+
message = MagicMock()
|
|
14
|
+
message.content = content
|
|
15
|
+
message.tool_calls = None
|
|
16
|
+
choice = MagicMock()
|
|
17
|
+
choice.message = message
|
|
18
|
+
response = MagicMock()
|
|
19
|
+
response.choices = [choice]
|
|
20
|
+
return response
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _make_tool_response(tool_calls: list[dict]):
|
|
24
|
+
"""Create a mock API response with tool calls."""
|
|
25
|
+
mock_calls = []
|
|
26
|
+
for tc in tool_calls:
|
|
27
|
+
call = MagicMock()
|
|
28
|
+
call.id = tc["id"]
|
|
29
|
+
call.function.name = tc["name"]
|
|
30
|
+
call.function.arguments = tc["arguments"]
|
|
31
|
+
mock_calls.append(call)
|
|
32
|
+
|
|
33
|
+
message = MagicMock()
|
|
34
|
+
message.tool_calls = mock_calls
|
|
35
|
+
message.content = None
|
|
36
|
+
message.model_dump.return_value = {
|
|
37
|
+
"role": "assistant",
|
|
38
|
+
"content": None,
|
|
39
|
+
"tool_calls": [
|
|
40
|
+
{
|
|
41
|
+
"id": tc["id"],
|
|
42
|
+
"type": "function",
|
|
43
|
+
"function": {"name": tc["name"], "arguments": tc["arguments"]},
|
|
44
|
+
}
|
|
45
|
+
for tc in tool_calls
|
|
46
|
+
],
|
|
47
|
+
}
|
|
48
|
+
choice = MagicMock()
|
|
49
|
+
choice.message = message
|
|
50
|
+
response = MagicMock()
|
|
51
|
+
response.choices = [choice]
|
|
52
|
+
return response
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_text_only_response():
|
|
56
|
+
"""Loop returns immediately on text-only response."""
|
|
57
|
+
client = MagicMock()
|
|
58
|
+
client.chat.completions.create.return_value = _make_text_response("Hello!")
|
|
59
|
+
|
|
60
|
+
skill = Skill(system_prompt="You are helpful.", allowed_tools=[], max_turns=10)
|
|
61
|
+
result = run(skill, "Hi", client, "test-model")
|
|
62
|
+
assert result == "Hello!"
|
|
63
|
+
assert client.chat.completions.create.call_count == 1
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_tool_call_then_text(tmp_path):
|
|
67
|
+
"""Loop executes tool, feeds result back, then gets text response."""
|
|
68
|
+
test_file = tmp_path / "test.txt"
|
|
69
|
+
test_file.write_text("file content here\n")
|
|
70
|
+
|
|
71
|
+
client = MagicMock()
|
|
72
|
+
client.chat.completions.create.side_effect = [
|
|
73
|
+
_make_tool_response(
|
|
74
|
+
[
|
|
75
|
+
{
|
|
76
|
+
"id": "call_1",
|
|
77
|
+
"name": "file_read",
|
|
78
|
+
"arguments": f'{{"path": "{test_file}"}}',
|
|
79
|
+
}
|
|
80
|
+
]
|
|
81
|
+
),
|
|
82
|
+
_make_text_response("I read the file. It says: file content here"),
|
|
83
|
+
]
|
|
84
|
+
|
|
85
|
+
skill = Skill(
|
|
86
|
+
system_prompt="Read files when asked.",
|
|
87
|
+
allowed_tools=["file_read"],
|
|
88
|
+
max_turns=10,
|
|
89
|
+
)
|
|
90
|
+
result = run(skill, "Read the test file", client, "test-model")
|
|
91
|
+
assert "file content here" in result
|
|
92
|
+
assert client.chat.completions.create.call_count == 2
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_max_turns_reached():
|
|
96
|
+
"""Loop stops at max_turns and returns sentinel."""
|
|
97
|
+
client = MagicMock()
|
|
98
|
+
# Always return tool calls — should hit max_turns
|
|
99
|
+
client.chat.completions.create.return_value = _make_tool_response(
|
|
100
|
+
[{"id": "call_1", "name": "bash", "arguments": '{"command": "echo hi"}'}]
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
skill = Skill(
|
|
104
|
+
system_prompt="Keep going.",
|
|
105
|
+
allowed_tools=["bash"],
|
|
106
|
+
max_turns=3,
|
|
107
|
+
)
|
|
108
|
+
result = run(skill, "Loop forever", client, "test-model")
|
|
109
|
+
assert result == "[max_turns reached]"
|
|
110
|
+
assert client.chat.completions.create.call_count == 3
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Tests for skill.md parser."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import tempfile
|
|
5
|
+
|
|
6
|
+
from skill_runner.skill import ALL_TOOLS, DEFAULT_MAX_TURNS, load_skill
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _write_tmp(content: str) -> str:
|
|
10
|
+
fd, path = tempfile.mkstemp(suffix=".md")
|
|
11
|
+
with os.fdopen(fd, "w") as f:
|
|
12
|
+
f.write(content)
|
|
13
|
+
return path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_plain_markdown():
|
|
17
|
+
"""Skill with no front matter — entire file is system prompt."""
|
|
18
|
+
path = _write_tmp("# Hello\n\nDo something useful.")
|
|
19
|
+
skill = load_skill(path)
|
|
20
|
+
assert skill.system_prompt == "# Hello\n\nDo something useful."
|
|
21
|
+
assert skill.allowed_tools == ALL_TOOLS
|
|
22
|
+
assert skill.max_turns == DEFAULT_MAX_TURNS
|
|
23
|
+
os.unlink(path)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_front_matter_tools():
|
|
27
|
+
"""Skill with tools restriction in front matter."""
|
|
28
|
+
path = _write_tmp("---\ntools: [file_read, bash]\n---\n# Review\nReview code.")
|
|
29
|
+
skill = load_skill(path)
|
|
30
|
+
assert skill.allowed_tools == ["file_read", "bash"]
|
|
31
|
+
assert skill.system_prompt == "# Review\nReview code."
|
|
32
|
+
os.unlink(path)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def test_front_matter_max_turns():
|
|
36
|
+
"""Skill with max_turns in front matter."""
|
|
37
|
+
path = _write_tmp("---\nmax_turns: 10\n---\n# Short\nQuick task.")
|
|
38
|
+
skill = load_skill(path)
|
|
39
|
+
assert skill.max_turns == 10
|
|
40
|
+
assert skill.allowed_tools == ALL_TOOLS
|
|
41
|
+
os.unlink(path)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_front_matter_both():
|
|
45
|
+
"""Skill with both tools and max_turns."""
|
|
46
|
+
path = _write_tmp("---\ntools: [grep, glob]\nmax_turns: 5\n---\n# Search\nFind files.")
|
|
47
|
+
skill = load_skill(path)
|
|
48
|
+
assert skill.allowed_tools == ["grep", "glob"]
|
|
49
|
+
assert skill.max_turns == 5
|
|
50
|
+
os.unlink(path)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_invalid_tool_raises():
|
|
54
|
+
"""Unknown tool name in front matter raises ValueError."""
|
|
55
|
+
path = _write_tmp("---\ntools: [file_read, unknown_tool]\n---\n# Bad\nFail.")
|
|
56
|
+
try:
|
|
57
|
+
load_skill(path)
|
|
58
|
+
assert False, "Should have raised ValueError"
|
|
59
|
+
except ValueError as e:
|
|
60
|
+
assert "unknown_tool" in str(e)
|
|
61
|
+
finally:
|
|
62
|
+
os.unlink(path)
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Tests for tool implementations."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
|
|
7
|
+
from skill_runner.tools import (
|
|
8
|
+
bash,
|
|
9
|
+
execute_tool,
|
|
10
|
+
file_patch,
|
|
11
|
+
file_read,
|
|
12
|
+
file_write,
|
|
13
|
+
get_tool_schemas,
|
|
14
|
+
glob_search,
|
|
15
|
+
grep,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TestFileRead:
|
|
20
|
+
def test_read_file(self, tmp_path):
|
|
21
|
+
p = tmp_path / "test.txt"
|
|
22
|
+
p.write_text("line1\nline2\nline3\n")
|
|
23
|
+
result = file_read(str(p))
|
|
24
|
+
assert "1\tline1" in result
|
|
25
|
+
assert "3\tline3" in result
|
|
26
|
+
|
|
27
|
+
def test_read_with_offset(self, tmp_path):
|
|
28
|
+
p = tmp_path / "test.txt"
|
|
29
|
+
p.write_text("line1\nline2\nline3\n")
|
|
30
|
+
result = file_read(str(p), offset=1, limit=1)
|
|
31
|
+
assert "2\tline2" in result
|
|
32
|
+
assert "line1" not in result
|
|
33
|
+
|
|
34
|
+
def test_read_nonexistent(self):
|
|
35
|
+
result = file_read("/nonexistent/file.txt")
|
|
36
|
+
assert "[ERROR]" in result
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TestFileWrite:
|
|
40
|
+
def test_write_file(self, tmp_path):
|
|
41
|
+
p = tmp_path / "output.txt"
|
|
42
|
+
result = file_write(str(p), "hello world")
|
|
43
|
+
assert "[OK]" in result
|
|
44
|
+
assert p.read_text() == "hello world"
|
|
45
|
+
|
|
46
|
+
def test_write_creates_dirs(self, tmp_path):
|
|
47
|
+
p = tmp_path / "sub" / "dir" / "file.txt"
|
|
48
|
+
result = file_write(str(p), "nested")
|
|
49
|
+
assert "[OK]" in result
|
|
50
|
+
assert p.read_text() == "nested"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TestFilePatch:
|
|
54
|
+
def test_patch_unique(self, tmp_path):
|
|
55
|
+
p = tmp_path / "test.txt"
|
|
56
|
+
p.write_text("hello world")
|
|
57
|
+
result = file_patch(str(p), "hello", "goodbye")
|
|
58
|
+
assert "[OK]" in result
|
|
59
|
+
assert p.read_text() == "goodbye world"
|
|
60
|
+
|
|
61
|
+
def test_patch_not_found(self, tmp_path):
|
|
62
|
+
p = tmp_path / "test.txt"
|
|
63
|
+
p.write_text("hello world")
|
|
64
|
+
result = file_patch(str(p), "missing", "replacement")
|
|
65
|
+
assert "[ERROR]" in result
|
|
66
|
+
assert "not found" in result
|
|
67
|
+
|
|
68
|
+
def test_patch_multiple_matches(self, tmp_path):
|
|
69
|
+
p = tmp_path / "test.txt"
|
|
70
|
+
p.write_text("aaa aaa")
|
|
71
|
+
result = file_patch(str(p), "aaa", "bbb")
|
|
72
|
+
assert "[ERROR]" in result
|
|
73
|
+
assert "2 times" in result
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class TestBash:
|
|
77
|
+
def test_simple_command(self):
|
|
78
|
+
result = bash("echo hello")
|
|
79
|
+
assert "hello" in result
|
|
80
|
+
|
|
81
|
+
def test_blocked_rm_rf(self):
|
|
82
|
+
result = bash("rm -rf /")
|
|
83
|
+
assert "[BLOCKED]" in result
|
|
84
|
+
|
|
85
|
+
def test_blocked_force_push(self):
|
|
86
|
+
result = bash("git push --force")
|
|
87
|
+
assert "[BLOCKED]" in result
|
|
88
|
+
|
|
89
|
+
def test_timeout(self):
|
|
90
|
+
result = bash("sleep 10", timeout=1)
|
|
91
|
+
assert "[ERROR]" in result
|
|
92
|
+
assert "timed out" in result
|
|
93
|
+
|
|
94
|
+
def test_stderr_captured(self):
|
|
95
|
+
result = bash("ls /nonexistent_dir_12345 2>&1 || true")
|
|
96
|
+
# Should have some output (error message)
|
|
97
|
+
assert result # non-empty
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class TestGrep:
|
|
101
|
+
def test_grep_file(self, tmp_path):
|
|
102
|
+
p = tmp_path / "test.txt"
|
|
103
|
+
p.write_text("hello world\nfoo bar\nhello again\n")
|
|
104
|
+
result = grep("hello", str(p))
|
|
105
|
+
assert "1:" in result
|
|
106
|
+
assert "3:" in result
|
|
107
|
+
|
|
108
|
+
def test_grep_no_match(self, tmp_path):
|
|
109
|
+
p = tmp_path / "test.txt"
|
|
110
|
+
p.write_text("nothing here\n")
|
|
111
|
+
result = grep("missing", str(p))
|
|
112
|
+
assert "[NO MATCHES]" in result
|
|
113
|
+
|
|
114
|
+
def test_grep_directory(self, tmp_path):
|
|
115
|
+
(tmp_path / "a.py").write_text("import os\n")
|
|
116
|
+
(tmp_path / "b.py").write_text("import sys\n")
|
|
117
|
+
result = grep("import", str(tmp_path), include="*.py")
|
|
118
|
+
assert "a.py" in result
|
|
119
|
+
assert "b.py" in result
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class TestGlob:
|
|
123
|
+
def test_glob_pattern(self, tmp_path):
|
|
124
|
+
(tmp_path / "a.py").write_text("")
|
|
125
|
+
(tmp_path / "b.py").write_text("")
|
|
126
|
+
(tmp_path / "c.txt").write_text("")
|
|
127
|
+
result = glob_search("*.py", str(tmp_path))
|
|
128
|
+
assert "a.py" in result
|
|
129
|
+
assert "b.py" in result
|
|
130
|
+
assert "c.txt" not in result
|
|
131
|
+
|
|
132
|
+
def test_glob_no_match(self, tmp_path):
|
|
133
|
+
result = glob_search("*.xyz", str(tmp_path))
|
|
134
|
+
assert "[NO MATCHES]" in result
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class TestExecuteTool:
|
|
138
|
+
def test_execute_known_tool(self, tmp_path):
|
|
139
|
+
p = tmp_path / "test.txt"
|
|
140
|
+
p.write_text("content")
|
|
141
|
+
result = execute_tool("file_read", json.dumps({"path": str(p)}))
|
|
142
|
+
assert "content" in result
|
|
143
|
+
|
|
144
|
+
def test_execute_unknown_tool(self):
|
|
145
|
+
result = execute_tool("unknown", "{}")
|
|
146
|
+
assert "[ERROR]" in result
|
|
147
|
+
assert "Unknown tool" in result
|
|
148
|
+
|
|
149
|
+
def test_execute_invalid_json(self):
|
|
150
|
+
result = execute_tool("file_read", "not json")
|
|
151
|
+
assert "[ERROR]" in result
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class TestGetToolSchemas:
|
|
155
|
+
def test_all_tools(self):
|
|
156
|
+
schemas = get_tool_schemas(["file_read", "file_write", "file_patch", "bash", "grep", "glob"])
|
|
157
|
+
assert len(schemas) == 6
|
|
158
|
+
|
|
159
|
+
def test_filtered_tools(self):
|
|
160
|
+
schemas = get_tool_schemas(["file_read", "bash"])
|
|
161
|
+
assert len(schemas) == 2
|
|
162
|
+
names = {s["function"]["name"] for s in schemas}
|
|
163
|
+
assert names == {"file_read", "bash"}
|