frankcode 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- frankcode/__init__.py +0 -0
- frankcode/cli.py +69 -0
- frankcode/configs/oh-my-opencode-slim.json +78 -0
- frankcode/configs/opencode-verify.json +125 -0
- frankcode/configs/opencode.json +1 -0
- frankcode/configs/prompt-rules.json +104 -0
- frankcode/environment.py +53 -0
- frankcode/orchestrator.py +343 -0
- frankcode/setup.py +112 -0
- frankcode/utils.py +16 -0
- frankcode-0.1.0.dist-info/METADATA +85 -0
- frankcode-0.1.0.dist-info/RECORD +16 -0
- frankcode-0.1.0.dist-info/WHEEL +5 -0
- frankcode-0.1.0.dist-info/entry_points.txt +2 -0
- frankcode-0.1.0.dist-info/licenses/LICENSE +21 -0
- frankcode-0.1.0.dist-info/top_level.txt +1 -0
frankcode/__init__.py
ADDED
|
File without changes
|
frankcode/cli.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
import asyncio
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Optional
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
from rich.logging import RichHandler
|
|
7
|
+
|
|
8
|
+
from .environment import LocalEnvironment
|
|
9
|
+
from .setup import FrankSetup
|
|
10
|
+
from .orchestrator import FrankOrchestrator, resolve_nvm_path
|
|
11
|
+
|
|
12
|
+
app = typer.Typer()
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
def setup_logging(verbose: bool):
|
|
16
|
+
level = logging.DEBUG if verbose else logging.INFO
|
|
17
|
+
logging.basicConfig(
|
|
18
|
+
level=level,
|
|
19
|
+
format="%(message)s",
|
|
20
|
+
datefmt="[%X]",
|
|
21
|
+
handlers=[RichHandler(rich_tracebacks=True)]
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
@app.command()
|
|
25
|
+
def main(
|
|
26
|
+
prompt: str,
|
|
27
|
+
model: str = typer.Option("google/gemini-3.1-pro-preview", help="LLM model to use"),
|
|
28
|
+
verbose: bool = typer.Option(False, "--verbose", "-v", help="Show debug logs"),
|
|
29
|
+
verify: bool = typer.Option(True, help="Enable verification and fix passes"),
|
|
30
|
+
clean: bool = typer.Option(False, help="Wipe .frankcode directory before starting"),
|
|
31
|
+
harbor_mode: bool = typer.Option(False, help="Exit with code 0 even if verification fails (for harbor compatibility)")
|
|
32
|
+
):
|
|
33
|
+
setup_logging(verbose)
|
|
34
|
+
|
|
35
|
+
if clean:
|
|
36
|
+
import shutil
|
|
37
|
+
import os
|
|
38
|
+
if os.path.exists(".frankcode"):
|
|
39
|
+
shutil.rmtree(".frankcode")
|
|
40
|
+
|
|
41
|
+
env = LocalEnvironment()
|
|
42
|
+
setup = FrankSetup(env, ".")
|
|
43
|
+
orchestrator = FrankOrchestrator(env, setup, model)
|
|
44
|
+
|
|
45
|
+
async def run():
|
|
46
|
+
# Step 0: Resolve NVM paths so npm/opencode are available
|
|
47
|
+
await resolve_nvm_path(env)
|
|
48
|
+
|
|
49
|
+
# Step 1: Install plugins and write configs
|
|
50
|
+
console.print("[bold blue]Installing safety plugins and generating configs...[/bold blue]")
|
|
51
|
+
await setup.ensure_directories()
|
|
52
|
+
await setup.install_plugins()
|
|
53
|
+
await setup.inject_plugins_into_system_configs()
|
|
54
|
+
|
|
55
|
+
# Step 3: Run the task
|
|
56
|
+
console.print(f"[bold green]Executing task:[/bold green] {prompt}")
|
|
57
|
+
success = await orchestrator.run_task(prompt, verify=verify)
|
|
58
|
+
|
|
59
|
+
if success:
|
|
60
|
+
console.print("[bold green]Task completed successfully![/bold green]")
|
|
61
|
+
else:
|
|
62
|
+
console.print("[bold red]Task failed to verify.[/bold red]")
|
|
63
|
+
if not harbor_mode:
|
|
64
|
+
raise typer.Exit(code=1)
|
|
65
|
+
|
|
66
|
+
asyncio.run(run())
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
app()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
{
|
|
2
|
+
"preset": "harbor",
|
|
3
|
+
"presets": {
|
|
4
|
+
"harbor": {
|
|
5
|
+
"build": {
|
|
6
|
+
"model": "google/gemini-3.1-pro-preview"
|
|
7
|
+
},
|
|
8
|
+
"orchestrator": {
|
|
9
|
+
"model": "google/gemini-3.1-pro-preview",
|
|
10
|
+
"skills": [
|
|
11
|
+
"*"
|
|
12
|
+
],
|
|
13
|
+
"mcps": [
|
|
14
|
+
"websearch",
|
|
15
|
+
"ast-grep"
|
|
16
|
+
]
|
|
17
|
+
},
|
|
18
|
+
"oracle": {
|
|
19
|
+
"model": "openai/gpt-5.4",
|
|
20
|
+
"variant": "high",
|
|
21
|
+
"skills": [
|
|
22
|
+
"simplify"
|
|
23
|
+
],
|
|
24
|
+
"mcps": []
|
|
25
|
+
},
|
|
26
|
+
"librarian": {
|
|
27
|
+
"model": "openai/gpt-5.4-mini",
|
|
28
|
+
"variant": "low",
|
|
29
|
+
"skills": [],
|
|
30
|
+
"mcps": [
|
|
31
|
+
"websearch",
|
|
32
|
+
"context7"
|
|
33
|
+
]
|
|
34
|
+
},
|
|
35
|
+
"explorer": {
|
|
36
|
+
"model": "openai/gpt-5.4-mini",
|
|
37
|
+
"variant": "low",
|
|
38
|
+
"skills": [],
|
|
39
|
+
"mcps": []
|
|
40
|
+
},
|
|
41
|
+
"designer": {
|
|
42
|
+
"model": "openai/gpt-5.4-mini",
|
|
43
|
+
"variant": "medium",
|
|
44
|
+
"skills": [
|
|
45
|
+
"agent-browser"
|
|
46
|
+
],
|
|
47
|
+
"mcps": []
|
|
48
|
+
},
|
|
49
|
+
"fixer": {
|
|
50
|
+
"model": "google/gemini-3.1-pro-preview",
|
|
51
|
+
"variant": "low",
|
|
52
|
+
"skills": [],
|
|
53
|
+
"mcps": []
|
|
54
|
+
},
|
|
55
|
+
"momus": {
|
|
56
|
+
"model": "google/gemini-3.1-pro-preview",
|
|
57
|
+
"skills": [],
|
|
58
|
+
"mcps": []
|
|
59
|
+
},
|
|
60
|
+
"data-scientist": {
|
|
61
|
+
"model": "google/gemini-3.1-pro-preview",
|
|
62
|
+
"skills": [],
|
|
63
|
+
"mcps": []
|
|
64
|
+
},
|
|
65
|
+
"allison": {
|
|
66
|
+
"model": "google/gemini-3.1-pro-preview",
|
|
67
|
+
"skills": [],
|
|
68
|
+
"mcps": []
|
|
69
|
+
},
|
|
70
|
+
"recon": {
|
|
71
|
+
"model": "openai/gpt-5.4-mini",
|
|
72
|
+
"variant": "low",
|
|
73
|
+
"skills": [],
|
|
74
|
+
"mcps": []
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
{
|
|
2
|
+
"snapshot": false,
|
|
3
|
+
"permission": {
|
|
4
|
+
"github_create_or_update_file": "deny"
|
|
5
|
+
},
|
|
6
|
+
"mcp": {
|
|
7
|
+
"context7": {
|
|
8
|
+
"type": "remote",
|
|
9
|
+
"url": "https://mcp.context7.com/mcp",
|
|
10
|
+
"headers": {
|
|
11
|
+
"CONTEXT7_API_KEY": "__CONTEXT7_API_KEY__"
|
|
12
|
+
},
|
|
13
|
+
"enabled": true
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"plugin": [
|
|
17
|
+
"oh-my-opencode-slim@latest",
|
|
18
|
+
"opencode-pty",
|
|
19
|
+
[
|
|
20
|
+
"opencode-auto-resume",
|
|
21
|
+
{
|
|
22
|
+
"loopMaxContinues": 3,
|
|
23
|
+
"loopWindowMs": 600000
|
|
24
|
+
}
|
|
25
|
+
]
|
|
26
|
+
],
|
|
27
|
+
"provider": {
|
|
28
|
+
"google": {
|
|
29
|
+
"options": {
|
|
30
|
+
"timeout": 600000,
|
|
31
|
+
"chunkTimeout": 90000
|
|
32
|
+
},
|
|
33
|
+
"models": {
|
|
34
|
+
"gemini-3.1-pro-preview": {
|
|
35
|
+
"name": "Gemini 3.1 Pro Preview",
|
|
36
|
+
"limit": {
|
|
37
|
+
"context": 1048576,
|
|
38
|
+
"output": 65535
|
|
39
|
+
},
|
|
40
|
+
"modalities": {
|
|
41
|
+
"input": [
|
|
42
|
+
"text",
|
|
43
|
+
"image",
|
|
44
|
+
"pdf"
|
|
45
|
+
],
|
|
46
|
+
"output": [
|
|
47
|
+
"text"
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
},
|
|
53
|
+
"openai": {
|
|
54
|
+
"models": {
|
|
55
|
+
"gpt-5.4": {
|
|
56
|
+
"name": "GPT-5.4"
|
|
57
|
+
},
|
|
58
|
+
"gpt-5.4-mini": {
|
|
59
|
+
"name": "GPT-5.4-mini"
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
"agent": {
|
|
65
|
+
"explore": {
|
|
66
|
+
"disable": true
|
|
67
|
+
},
|
|
68
|
+
"general": {
|
|
69
|
+
"disable": true
|
|
70
|
+
},
|
|
71
|
+
"build": {
|
|
72
|
+
"permission": {
|
|
73
|
+
"bash": {
|
|
74
|
+
"git add*": "deny",
|
|
75
|
+
"git commit*": "deny",
|
|
76
|
+
"sed": "deny"
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
"chat": {
|
|
81
|
+
"permission": {
|
|
82
|
+
"bash": {
|
|
83
|
+
"git add*": "deny",
|
|
84
|
+
"git commit*": "deny",
|
|
85
|
+
"python": "deny",
|
|
86
|
+
"sed": "deny"
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
"plan": {
|
|
91
|
+
"permission": {
|
|
92
|
+
"bash": {
|
|
93
|
+
"git add*": "deny",
|
|
94
|
+
"git commit*": "deny",
|
|
95
|
+
"sed": "deny"
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
"momus": {
|
|
100
|
+
"model": "google/gemini-3.1-pro-preview"
|
|
101
|
+
},
|
|
102
|
+
"data-scientist": {
|
|
103
|
+
"model": "google/gemini-3.1-pro-preview"
|
|
104
|
+
},
|
|
105
|
+
"allison": {
|
|
106
|
+
"model": "google/gemini-3.1-pro-preview"
|
|
107
|
+
},
|
|
108
|
+
"recon": {
|
|
109
|
+
"description": "Data reconnaissance specialist. Inspects unknown file formats, binary data, and APIs. Returns a concise structural summary.",
|
|
110
|
+
"mode": "subagent",
|
|
111
|
+
"model": "openai/gpt-5.4-mini",
|
|
112
|
+
"prompt": "You are a data reconnaissance agent. Your ONLY job is to inspect unknown files and report their structure.\n\nRules:\n1. You have a STRICT 3-script limit. Do not write more than 3 inspection scripts.\n2. Each script must print a concise summary (not raw data). Never dump large binary content.\n3. Always wrap execution with `timeout` and output limiting: `timeout 10s python3 script.py 2>&1 | head -n 50`\n4. When done, return a structured summary: file format, data layout, key dimensions, and any gotchas.\n5. Do NOT write implementation code. Only inspect and report.\n6. If a file is too large to read, sample it (first/last N bytes, random offsets).",
|
|
113
|
+
"permission": {
|
|
114
|
+
"bash": "allow",
|
|
115
|
+
"read": "allow",
|
|
116
|
+
"glob": "allow",
|
|
117
|
+
"grep": "allow",
|
|
118
|
+
"write": "deny",
|
|
119
|
+
"edit": "deny",
|
|
120
|
+
"task": "deny"
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
"model": "google/gemini-3.1-pro-preview"
|
|
125
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
{
|
|
2
|
+
"recon_protocol": {
|
|
3
|
+
"name": "recon_protocol",
|
|
4
|
+
"weight": 10,
|
|
5
|
+
"applies_to_all": true,
|
|
6
|
+
"instruction": "MANDATORY FIRST ACTIONS — before writing ANY code or installing ANY packages:\n1. Run `ls -la` in the working directory to understand what files are already provided.\n2. Read any source files that look relevant (e.g., model definitions, config files, existing scripts, READMEs).\n2a. For tasks involving BINARY DATA FILES (.ckpt, .bin, .weights, .safetensors, raw tensor dumps), spawn a `recon` subagent BEFORE writing any implementation. The recon agent will inspect the files and return a structural report. Do NOT write your own diagnostic scripts — that is the recon agent's job.\n3. Identify what's already given vs. what you need to build.\n4. NEVER overwrite files that came with the task environment (images, model weights, test data, config files). These are test fixtures — the verifier expects them unchanged.\n5. If libraries or headers are already present in the working directory (e.g., cJSON.h, lodepng.h), use them instead of downloading alternatives.\n6. Put all intermediate/scratch files (helper scripts, test code, temporary build artifacts) in /tmp/work/ — keep the task directory clean with ONLY the final deliverables the task asks for.\n7. IMPLEMENTATION WORKFLOW: After building a first working version (compiles, runs, produces output), test it against the reference/expected output. If there are precision issues (pixel diffs, off-by-one errors, floating-point mismatches, partial test failures), do NOT iterate more than 3 times in your current session. Instead, spawn a `fixer` subagent using the Task tool. Give it: (a) the path to your implementation file, (b) the exact diff/error output, (c) any diagnostic data you've gathered. A fixer subagent with fresh context is more effective for precision work than continuing in a polluted context."
|
|
7
|
+
},
|
|
8
|
+
"cleanup_rules": {
|
|
9
|
+
"name": "cleanup_rules",
|
|
10
|
+
"weight": 9,
|
|
11
|
+
"applies_to_all": true,
|
|
12
|
+
"instruction": "CLEANUP RULES: If you test your solution by creating mock data, making test commits, or altering state, you must clean up TEST ARTIFACTS before finishing — but be precise about WHAT you clean up.\n- CLEAN UP: temporary test scripts, test clones/repos you created to verify, mock data files, test outputs, scratch directories (e.g., /tmp/test-repo).\n- DO NOT CLEAN UP: infrastructure you built as part of the solution — servers, daemons, hooks, config files, databases, repositories that the TASK asked you to create. These are deliverables, not artifacts.\n- If you pushed test content to a repository or server that should start EMPTY for the verifier, reset only the DATA — do NOT destroy and recreate the infrastructure itself.\n- RULE OF THUMB: If the task asked you to 'set up X' or 'configure X', then X must still be running and properly configured when you finish."
|
|
13
|
+
},
|
|
14
|
+
"environment_isolation": {
|
|
15
|
+
"name": "environment_isolation",
|
|
16
|
+
"weight": 9,
|
|
17
|
+
"applies_to_all": true,
|
|
18
|
+
"instruction": "ENVIRONMENT ISOLATION: The verifier runs in a separate, clean environment. Only files inside the task directory (/app/) are carried over. Everything else — installed packages (pip, apt, npm, cargo), running services, environment variables, config files outside /app/, symlinks to external paths — will NOT exist when the verifier runs. Your deliverables must be fully self-contained. If a script requires a non-stdlib library, it must either self-install its dependencies at runtime or use an alternative approach that relies only on tools already present in the base image (see AVAILABLE TOOLS above). Do NOT assume any package you install during your session will be available to the verifier."
|
|
19
|
+
},
|
|
20
|
+
"verify_deliverables": {
|
|
21
|
+
"name": "verify_deliverables",
|
|
22
|
+
"weight": 9,
|
|
23
|
+
"applies_to_all": true,
|
|
24
|
+
"instruction": "DELIVERABLE VERIFICATION: Before calling task_complete, run `ls -la /app/` and confirm that every file the task asked you to create actually exists there. If you wrote files to /tmp/work/ but not to /app/, copy them now. The verifier only checks /app/ — files anywhere else will not be found."
|
|
25
|
+
},
|
|
26
|
+
"deliverables_manifest": {
|
|
27
|
+
"name": "deliverables_manifest",
|
|
28
|
+
"weight": 9,
|
|
29
|
+
"applies_to_all": true,
|
|
30
|
+
"instruction": "DELIVERABLES MANIFEST: Before calling task_complete or finishing your work, write a file at /tmp/deliverables.manifest listing one absolute path per line for every file in the task directory (/app/) that must exist for the verifier to succeed. Include: source files you modified, compiled binaries or shared libraries (.so, .pyd, .dll) you built, generated data files, config files you created — anything the tests will need at runtime. Do NOT include intermediate build artifacts (object files, build/ directories, dist/ directories, caches). Do NOT include files that existed before you started (fixtures). Example:\n/app/portfolio_optimized.c\n/app/portfolio_optimized.py\n/app/portfolio_optimized_c.cpython-313-x86_64-linux-gnu.so\nIf you are unsure whether to include a file, include it — it is safer to over-include than to omit a file the verifier needs."
|
|
31
|
+
},
|
|
32
|
+
"circuit_breaker": {
|
|
33
|
+
"name": "circuit_breaker",
|
|
34
|
+
"weight": 8,
|
|
35
|
+
"applies_to_all": true,
|
|
36
|
+
"instruction": "CRITICAL SYSTEM INSTRUCTION:\n1. LOOP DETECTION: If you are stuck tweaking the same file/parameters more than 5 times without passing tests, STOP. Abandon your current approach and escalate: (a) Search the web for known issues, bugs, or bypasses specific to the technology you're working with (e.g., 'BeautifulSoup XSS bypass', 'Coq 8.18 compatibility'). (b) If web search doesn't help, spawn a fixer subagent with fresh context. (c) If neither works, delete the file and try a fundamentally different architecture.\n2. BINARY FILE SAFETY: NEVER `cat` or `read` large binary or image files (.ppm, .png, etc) into your context. Write a short script to extract the properties you need and print only the summary.\n3. DEFENSIVE EXECUTION: When running any compiled binary, untrusted script, or unfamiliar command for the first time, ALWAYS wrap it: `timeout 5s ./binary arg1 arg2 2>&1 | head -c 2000`. Never run untrusted executables without both a time limit and an output cap.\n4. CONTEXT PROTECTION: If your context becomes polluted with massive error logs or repeated output, DO NOT continue in the current session. Summarize your progress and use the `Task` tool to spawn a subagent (`fixer` or `explorer`) with fresh context.\n5. DIAGNOSIS BUDGET: You get 3 read-only diagnostic steps before you must write your first implementation. If the task names a specific interface, library, or framework that requires API exploration, you get 8 read-only steps instead. After your first implementation attempt, the stricter budget applies to each debug cycle: diagnose(max 3) → fix → diagnose(max 3) → fix. NEVER write more than 3 consecutive diagnostic scripts after implementation without attempting a code change.\n6. LONG-RUNNING COMMANDS: The bash tool has a 120-second timeout. For commands that take longer (make, opam install, cargo build, training, hash cracking, large downloads), use `pty_spawn` with `notifyOnExit: true`. Use `pty_read` to check progress. If bash says 'shell tool terminated command after exceeding timeout', switch to `pty_spawn`. For multi-step pipelines (train → evaluate → save, compile → test → install), write a single wrapper shell script that runs all steps end-to-end, then spawn that script with pty_spawn. Do not spawn individual pipeline steps and poll between them — if your session ends before the pipeline completes, intermediate results will be lost."
|
|
37
|
+
},
|
|
38
|
+
"programmatic_verification": {
|
|
39
|
+
"name": "programmatic_verification",
|
|
40
|
+
"weight": 5,
|
|
41
|
+
"applies_to_all": true,
|
|
42
|
+
"instruction": "PROGRAMMATIC VERIFICATION: For tasks requiring precise analytical answers (chess moves, mathematical proofs, regex patterns, logic puzzles, cryptographic challenges), do NOT trust your reasoning alone. Write a short script to verify your answer programmatically before writing it to the output file."
|
|
43
|
+
},
|
|
44
|
+
"interface_fidelity": {
|
|
45
|
+
"name": "interface_fidelity",
|
|
46
|
+
"weight": 7,
|
|
47
|
+
"keywords": ["installed", "at version", "at revision", "using the", "package at", "have the"],
|
|
48
|
+
"instruction": "SPECIFIED INTERFACE FIDELITY: The task names a specific library, framework, tool, or versioned runtime. Treat that choice as semantically load-bearing — use the named interface as your primary API. Do not substitute a lower-level or different-stack alternative you happen to know better. Before implementing, inspect the named interface: Python lib → inspect.signature() / help(); CLI tool → --help / --version; compiler/runtime → sample invocation or docs. The task author chose that interface for a reason — it likely handles defaults, conventions, or edge cases the lower-level alternative does not."
|
|
49
|
+
},
|
|
50
|
+
"performance_awareness": {
|
|
51
|
+
"name": "performance_awareness",
|
|
52
|
+
"weight": 6,
|
|
53
|
+
"keywords": ["train", "inference", "compute", "gpu", "neural", "benchmark", "optimization", "faster", "optimize", "performance", "simulation", "parallelism", "parallel", "eigenval", "sampling", "sampler"],
|
|
54
|
+
"instruction": "PERFORMANCE AWARENESS: If your solution involves compute-intensive operations (matrix math, large data processing, inference, image generation, simulation), benchmark it with `time` before declaring it complete. If it takes more than 60 seconds on your test inputs, you must optimize the hot path before finishing. Verifiers typically enforce strict timeouts."
|
|
55
|
+
},
|
|
56
|
+
"binary_data_format": {
|
|
57
|
+
"name": "binary_data_format",
|
|
58
|
+
"weight": 8,
|
|
59
|
+
"keywords": ["checkpoint", ".ckpt", ".bin", "binary", "weight", "tensor", "forward pass", "inference", "model weights", "data layout", "HuggingFace", "GPT-2", "weights.json"],
|
|
60
|
+
"instruction": "BINARY DATA FORMAT RULE: The task involves binary data files (checkpoints, weight files, serialized tensors). You MUST follow this workflow:\n\nPHASE 1 — RECON (mandatory before writing implementation):\nSpawn a `recon` subagent using the Task tool with this information:\n - The binary file paths and their sizes\n - What format you suspect (TF checkpoint, PyTorch, raw floats, protobuf, etc.)\n - What you need to know: tensor names, shapes, byte offsets, and ordering\n\nWait for the recon report before proceeding. If the recon agent cannot determine the layout, try ONE of these escalation paths:\n (a) Download the format's companion metadata file (TF checkpoints have .index files; PyTorch has state_dict keys; safetensors has a JSON header). Parse it to get exact tensor names and offsets.\n (b) Install the canonical library (pip install tensorflow/torch/safetensors) and run a 5-line script to print variable names, shapes, and dtypes.\n (c) Search the web for the exact format specification.\n\nPHASE 2 — IMPLEMENT (only after recon is complete):\nWrite your implementation in ONE shot based on the recon report. Do not write the implementation incrementally with diagnostic scripts interleaved.\n\nANTI-PATTERN: Writing separate small C/Python programs to probe different byte offsets one at a time. This burns your entire time budget. The recon subagent does this in parallel and with a script budget."
|
|
61
|
+
},
|
|
62
|
+
"size_constraint": {
|
|
63
|
+
"name": "size_constraint",
|
|
64
|
+
"weight": 8,
|
|
65
|
+
"keywords": ["bytes", "must be <", "must be under", "at most", "maximum size", "size limit", "under 100"],
|
|
66
|
+
"instruction": "SIZE-CONSTRAINED DELIVERABLE: The task specifies a maximum file size or count. After EVERY write to a deliverable file, immediately check the constraint (wc -c for bytes, wc -l for lines, jq length for JSON arrays). If you exceed the limit, STOP adding features and reduce size: use short variable names, remove comments and whitespace, merge functions, eliminate redundancy. The size constraint is as important as correctness — a correct solution that exceeds the limit scores 0. Plan for the constraint from the start: estimate your budget before writing, not after."
|
|
67
|
+
},
|
|
68
|
+
"persistent_servers": {
|
|
69
|
+
"name": "persistent_servers",
|
|
70
|
+
"weight": 6,
|
|
71
|
+
"keywords": ["pypi server", "web server", "API server", "daemon", "running on port", "run on port", "on port", "webserver", "listen on", "service should run", "run the service", "Flask"],
|
|
72
|
+
"instruction": "PERSISTENT SERVERS: If your task requires a server or long-running process to remain running AFTER your session ends, you MUST start it as a background daemon using `nohup command &>/dev/null &` or `setsid command &>/dev/null &`, NOT via `pty_spawn` or foreground execution. PTY sessions and foreground processes are destroyed when your session ends. Always verify the server is actually listening with `curl` or `ss -tlnp` before declaring the task complete."
|
|
73
|
+
},
|
|
74
|
+
"client_perspective_testing": {
|
|
75
|
+
"name": "client_perspective_testing",
|
|
76
|
+
"weight": 5,
|
|
77
|
+
"keywords": ["git server", "webserver", "curl http", "git clone user@"],
|
|
78
|
+
"instruction": "CLIENT-PERSPECTIVE TESTING: When your task involves setting up any network service (web server, git server, API, database), always test it the way an external client would — over HTTP, SSH, or TCP, not via local filesystem paths. The verifier accesses your services over the network."
|
|
79
|
+
},
|
|
80
|
+
"data_preservation": {
|
|
81
|
+
"name": "data_preservation",
|
|
82
|
+
"weight": 6,
|
|
83
|
+
"keywords": ["corrupted", "deleted file", "forensic", "damaged", "WAL file", "recover the", "recovery task"],
|
|
84
|
+
"instruction": "DATA PRESERVATION: When the task involves recovering, repairing, or analyzing corrupted or damaged files, ALWAYS back up the original files before running any tools on them. Copy them to /tmp/ first. Tools like sqlite3, git, fsck may automatically 'fix' corrupted files, destroying the data you need to recover."
|
|
85
|
+
},
|
|
86
|
+
"decomposed_search": {
|
|
87
|
+
"name": "decomposed_search",
|
|
88
|
+
"weight": 5,
|
|
89
|
+
"keywords": ["disk image", "fragment", "PASSWORD=", "launchcode"],
|
|
90
|
+
"instruction": "DECOMPOSED SEARCH: When searching for a pattern with known start and end markers in binary data, disk images, or large files, ALWAYS search for each marker independently. Deleted or fragmented data is often scattered across non-contiguous disk regions. Find all fragments first, then reconstruct the complete value."
|
|
91
|
+
},
|
|
92
|
+
"prefer_binary_packages": {
|
|
93
|
+
"name": "prefer_binary_packages",
|
|
94
|
+
"weight": 4,
|
|
95
|
+
"keywords": ["set up a", "configure a", "build from source", "build the", "pypi server", "git server", "webserver"],
|
|
96
|
+
"instruction": "PREFER BINARY PACKAGES: When installing dependencies, always try pre-built binary packages first (apt-get install, pip binary wheels) before building from source. Building from source can take 10-30 minutes. Also check if tools are already installed — run `which <tool>` or `dpkg -l | grep <tool>` before installing anything. However, if a configure script or build tool specifies exact version requirements and the system package has a different version, use a language-specific version manager (opam for OCaml/Coq, pyenv for Python, nvm for Node, rustup for Rust) to install the required version. Do not force through version checks with flags like --ignore-version or --skip-check."
|
|
97
|
+
},
|
|
98
|
+
"qemu_vm": {
|
|
99
|
+
"name": "qemu_vm",
|
|
100
|
+
"weight": 6,
|
|
101
|
+
"keywords": ["qemu", "QEMU", "virtual machine", "VM", "alpine"],
|
|
102
|
+
"instruction": "QEMU VM MANAGEMENT: When booting a VM with QEMU, do NOT use pty_spawn. Instead, daemonize QEMU with a serial console socket: `qemu-system-x86_64 -display none -serial unix:/tmp/vm-serial.sock,server,nowait -monitor unix:/tmp/vm-monitor.sock,server,nowait -daemonize -pidfile /tmp/vm.pid`. Connect to the console with `nc -U /tmp/vm-serial.sock` and use `expect` for automated interaction (login, password, service config). For user-mode networking with port forwarding: `-netdev user,id=n0,hostfwd=tcp::PORT-:GUESTPORT -device e1000,netdev=n0`. Wait for the login prompt before configuring services. Test SSH locally before declaring complete."
|
|
103
|
+
}
|
|
104
|
+
}
|
frankcode/environment.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Optional, Dict
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class ExecResult:
|
|
9
|
+
stdout: str
|
|
10
|
+
stderr: str
|
|
11
|
+
exit_code: Optional[int]
|
|
12
|
+
|
|
13
|
+
class LocalEnvironment:
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self._extra_path: str = ""
|
|
16
|
+
|
|
17
|
+
def prepend_path(self, path_entry: str):
|
|
18
|
+
"""Prepend a directory to PATH for all future exec() calls."""
|
|
19
|
+
if self._extra_path:
|
|
20
|
+
self._extra_path = f"{path_entry}:{self._extra_path}"
|
|
21
|
+
else:
|
|
22
|
+
self._extra_path = path_entry
|
|
23
|
+
|
|
24
|
+
async def exec(
|
|
25
|
+
self,
|
|
26
|
+
command: str,
|
|
27
|
+
cwd: Optional[str] = None,
|
|
28
|
+
env: Optional[Dict[str, str]] = None
|
|
29
|
+
) -> ExecResult:
|
|
30
|
+
# Merge current env with provided env
|
|
31
|
+
full_env = os.environ.copy()
|
|
32
|
+
if env:
|
|
33
|
+
full_env.update(env)
|
|
34
|
+
|
|
35
|
+
# Prepend resolved paths (e.g. NVM bin dir) so node/npm/opencode are found
|
|
36
|
+
if self._extra_path:
|
|
37
|
+
full_env["PATH"] = f"{self._extra_path}:{full_env.get('PATH', '')}"
|
|
38
|
+
|
|
39
|
+
process = await asyncio.create_subprocess_shell(
|
|
40
|
+
command,
|
|
41
|
+
stdout=asyncio.subprocess.PIPE,
|
|
42
|
+
stderr=asyncio.subprocess.PIPE,
|
|
43
|
+
cwd=cwd or os.getcwd(),
|
|
44
|
+
env=full_env
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
stdout, stderr = await process.communicate()
|
|
48
|
+
|
|
49
|
+
return ExecResult(
|
|
50
|
+
stdout=stdout.decode().strip(),
|
|
51
|
+
stderr=stderr.decode().strip(),
|
|
52
|
+
exit_code=process.returncode
|
|
53
|
+
)
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
import hashlib
|
|
6
|
+
import shlex
|
|
7
|
+
import re
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional, List, Tuple, Dict
|
|
11
|
+
|
|
12
|
+
from .environment import LocalEnvironment
|
|
13
|
+
from .setup import FrankSetup
|
|
14
|
+
from .utils import check_error_loop
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
def _load_config(name: str) -> dict:
|
|
19
|
+
"""Load a JSON config from the configs/ directory."""
|
|
20
|
+
config_dir = Path(__file__).parent / "configs"
|
|
21
|
+
try:
|
|
22
|
+
with open(config_dir / name) as f:
|
|
23
|
+
return json.load(f)
|
|
24
|
+
except FileNotFoundError:
|
|
25
|
+
logger.warning(f"Config {name} not found in {config_dir}")
|
|
26
|
+
return {}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def resolve_nvm_path(env: LocalEnvironment) -> None:
|
|
30
|
+
"""Find and register the NVM bin directory so node/npm/opencode are on PATH."""
|
|
31
|
+
home = Path.home()
|
|
32
|
+
|
|
33
|
+
candidates = [home / ".nvm"]
|
|
34
|
+
if str(home) != "/root":
|
|
35
|
+
candidates.append(Path("/root/.nvm"))
|
|
36
|
+
|
|
37
|
+
nvm_dir = None
|
|
38
|
+
for candidate in candidates:
|
|
39
|
+
if candidate.exists():
|
|
40
|
+
nvm_dir = candidate
|
|
41
|
+
break
|
|
42
|
+
|
|
43
|
+
if nvm_dir is None:
|
|
44
|
+
logger.warning("NVM directory not found in any of: %s", [str(c) for c in candidates])
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
# Strategy: find the node bin directory directly via filesystem.
|
|
48
|
+
# This avoids sourcing nvm.sh (which may contain bashisms) in /bin/sh.
|
|
49
|
+
result = await env.exec(
|
|
50
|
+
f'find "{nvm_dir}/versions/node" -maxdepth 2 -name bin -type d 2>/dev/null | sort -V | tail -1'
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
if result.exit_code == 0 and result.stdout.strip():
|
|
54
|
+
bin_dir = result.stdout.strip()
|
|
55
|
+
logger.info("Resolved NVM bin directory: %s", bin_dir)
|
|
56
|
+
env.prepend_path(bin_dir)
|
|
57
|
+
else:
|
|
58
|
+
logger.warning("Could not resolve NVM bin directory under %s", nvm_dir)
|
|
59
|
+
|
|
60
|
+
class FrankOrchestrator:
|
|
61
|
+
def __init__(self, env: LocalEnvironment, setup: FrankSetup, model: str):
|
|
62
|
+
self.env = env
|
|
63
|
+
self.setup = setup
|
|
64
|
+
self.model = model
|
|
65
|
+
self.error_history = []
|
|
66
|
+
self.initial_file_listing = ""
|
|
67
|
+
|
|
68
|
+
def _select_rules(self, instruction: str) -> List[str]:
|
|
69
|
+
"""Port of _select_rules from original agent.py."""
|
|
70
|
+
config = _load_config("prompt-rules.json")
|
|
71
|
+
selected = []
|
|
72
|
+
|
|
73
|
+
for rule_name, rule in config.items():
|
|
74
|
+
if rule.get("applies_to_all"):
|
|
75
|
+
selected.append(rule["instruction"])
|
|
76
|
+
continue
|
|
77
|
+
keywords = rule.get("keywords", [])
|
|
78
|
+
for kw in keywords:
|
|
79
|
+
if kw.lower() in instruction.lower():
|
|
80
|
+
selected.append(rule["instruction"])
|
|
81
|
+
break
|
|
82
|
+
|
|
83
|
+
return selected
|
|
84
|
+
|
|
85
|
+
async def run_task(self, prompt: str, verify: bool = True):
|
|
86
|
+
await self.setup.ensure_directories()
|
|
87
|
+
|
|
88
|
+
# Capture initial file listing
|
|
89
|
+
try:
|
|
90
|
+
initial_result = await self.env.exec(
|
|
91
|
+
"find . -type f -not -path '*/.git/*' -not -path './.frankcode/*' 2>/dev/null | sort"
|
|
92
|
+
)
|
|
93
|
+
self.initial_file_listing = initial_result.stdout.strip()
|
|
94
|
+
logger.info(f"Captured initial file listing ({len(self.initial_file_listing.splitlines())} files).")
|
|
95
|
+
except Exception:
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
# Select and inject rules
|
|
99
|
+
rules = self._select_rules(prompt)
|
|
100
|
+
enhanced_prompt = prompt + "\n\n" + "\n\n".join(rules)
|
|
101
|
+
|
|
102
|
+
logger.info("Starting main agent session...")
|
|
103
|
+
success = await self._run_main_session(enhanced_prompt)
|
|
104
|
+
|
|
105
|
+
if not verify:
|
|
106
|
+
return success
|
|
107
|
+
|
|
108
|
+
logger.info("Starting cleanup pass...")
|
|
109
|
+
await self._run_cleanup(prompt)
|
|
110
|
+
|
|
111
|
+
logger.info("Starting verification/fix loop...")
|
|
112
|
+
for i in range(3):
|
|
113
|
+
iteration = i + 1
|
|
114
|
+
logger.info(f"Verification iteration {iteration}/3")
|
|
115
|
+
verified, feedback = await self._run_verification(prompt, iteration)
|
|
116
|
+
if verified:
|
|
117
|
+
logger.info("Task verified successfully!")
|
|
118
|
+
return True
|
|
119
|
+
|
|
120
|
+
logger.info(f"Verification failed. Feedback: {feedback[:100]}...")
|
|
121
|
+
logger.info("Starting fix pass...")
|
|
122
|
+
await self._run_fix(prompt, feedback, iteration)
|
|
123
|
+
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
async def _run_opencode_session(
|
|
127
|
+
self,
|
|
128
|
+
prompt: str,
|
|
129
|
+
output_path: Path,
|
|
130
|
+
config_name: str = "opencode.json",
|
|
131
|
+
timeout_seconds: int | None = None,
|
|
132
|
+
) -> bool:
|
|
133
|
+
config_dir = Path.home() / ".config" / "opencode"
|
|
134
|
+
|
|
135
|
+
env = os.environ.copy()
|
|
136
|
+
# Do not override OPENCODE_CONFIG_DIR so it uses system defaults.
|
|
137
|
+
|
|
138
|
+
# Swap config if needed
|
|
139
|
+
if config_name != "opencode.json":
|
|
140
|
+
await self.env.exec(f"cp {config_dir}/{config_name} {config_dir}/opencode.json")
|
|
141
|
+
|
|
142
|
+
escaped_prompt = shlex.quote(prompt)
|
|
143
|
+
timeout_prefix = f"timeout {timeout_seconds}s " if timeout_seconds else ""
|
|
144
|
+
|
|
145
|
+
cmd = (
|
|
146
|
+
f"{timeout_prefix}opencode --model={self.model} run --format=json --thinking --dangerously-skip-permissions -- {escaped_prompt} </dev/null > {output_path} 2>&1"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
result = await self.env.exec(cmd, env=env)
|
|
150
|
+
|
|
151
|
+
if config_name != "opencode.json":
|
|
152
|
+
# Restore the main config backup
|
|
153
|
+
await self.env.exec(f"cp {config_dir}/opencode-main.json {config_dir}/opencode.json")
|
|
154
|
+
|
|
155
|
+
return result.exit_code == 0
|
|
156
|
+
|
|
157
|
+
async def _run_main_session(self, prompt: str):
|
|
158
|
+
log_path = self.setup.logs_dir / "opencode.txt"
|
|
159
|
+
config_dir = Path.home() / ".config" / "opencode"
|
|
160
|
+
|
|
161
|
+
env = os.environ.copy()
|
|
162
|
+
|
|
163
|
+
escaped_prompt = shlex.quote(prompt)
|
|
164
|
+
cmd = (
|
|
165
|
+
f"opencode --model={self.model} run --format=json --thinking --dangerously-skip-permissions -- {escaped_prompt} </dev/null > {log_path} 2>&1"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
process_task = asyncio.create_task(self.env.exec(cmd, env=env))
|
|
169
|
+
|
|
170
|
+
last_size = -1
|
|
171
|
+
last_activity = time.time()
|
|
172
|
+
last_error_hash = None
|
|
173
|
+
same_error_streak = 0
|
|
174
|
+
|
|
175
|
+
while not process_task.done():
|
|
176
|
+
await asyncio.sleep(5)
|
|
177
|
+
if not log_path.exists():
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
current_size = log_path.stat().st_size
|
|
181
|
+
if current_size != last_size:
|
|
182
|
+
last_size = current_size
|
|
183
|
+
last_activity = time.time()
|
|
184
|
+
|
|
185
|
+
# Error-repeat detection (simplified port from agent.py)
|
|
186
|
+
try:
|
|
187
|
+
err_check = await self.env.exec(
|
|
188
|
+
f"tail -c 5000 {log_path} 2>/dev/null | grep -oiE '(Error|Traceback|FAILED|AssertionError|Exception|Arity mismatch)[^\"}}]*' 2>/dev/null | tail -5"
|
|
189
|
+
)
|
|
190
|
+
error_lines = err_check.stdout.strip()
|
|
191
|
+
if error_lines:
|
|
192
|
+
current_hash = hashlib.md5(error_lines.encode()).hexdigest()
|
|
193
|
+
if current_hash == last_error_hash:
|
|
194
|
+
same_error_streak += 1
|
|
195
|
+
else:
|
|
196
|
+
same_error_streak = 1
|
|
197
|
+
last_error_hash = current_hash
|
|
198
|
+
|
|
199
|
+
if same_error_streak >= 36: # Approx 3 minutes at 5s polling
|
|
200
|
+
logger.warning("Watchdog: Same error detected for 3+ minutes. Terminating session.")
|
|
201
|
+
# We cannot directly kill the asyncio.create_subprocess_shell from outside unless we restructure LocalEnvironment.
|
|
202
|
+
# For now, let it run or rely on opencode's internal timeout/loop breaking.
|
|
203
|
+
break
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
if time.time() - last_activity > 600:
|
|
208
|
+
logger.warning("Watchdog: Process idle for 10 minutes. Terminating.")
|
|
209
|
+
break
|
|
210
|
+
|
|
211
|
+
result = await process_task
|
|
212
|
+
if result.exit_code != 0:
|
|
213
|
+
logger.error(f"Main session failed with exit code {result.exit_code}")
|
|
214
|
+
return result.exit_code == 0
|
|
215
|
+
|
|
216
|
+
async def _run_cleanup(self, instruction: str):
|
|
217
|
+
try:
|
|
218
|
+
await self.env.exec(
|
|
219
|
+
"find . -type d -name '__pycache__' -exec rm -rf {} + 2>/dev/null; "
|
|
220
|
+
"find . -name '*.pyc' -delete 2>/dev/null; "
|
|
221
|
+
"true"
|
|
222
|
+
)
|
|
223
|
+
except Exception:
|
|
224
|
+
pass
|
|
225
|
+
|
|
226
|
+
if not self.initial_file_listing:
|
|
227
|
+
return
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
current_result = await self.env.exec(
|
|
231
|
+
"find . -type f -not -path '*/.git/*' -not -path './.frankcode/*' 2>/dev/null | sort"
|
|
232
|
+
)
|
|
233
|
+
current_file_listing = current_result.stdout.strip()
|
|
234
|
+
except Exception:
|
|
235
|
+
return
|
|
236
|
+
|
|
237
|
+
initial_set = set(self.initial_file_listing.splitlines())
|
|
238
|
+
current_set = set(current_file_listing.splitlines())
|
|
239
|
+
new_files = sorted(current_set - initial_set)
|
|
240
|
+
|
|
241
|
+
if not new_files:
|
|
242
|
+
return
|
|
243
|
+
|
|
244
|
+
cleanup_prompt = (
|
|
245
|
+
"You are a cleanup agent. Your ONLY job is to remove scratch files "
|
|
246
|
+
"and build artifacts that a previous agent left behind in the task "
|
|
247
|
+
"directory. You must NOT remove deliverable files.\n\n"
|
|
248
|
+
f"TASK SPEC (what deliverables were requested):\n{instruction}\n\n"
|
|
249
|
+
"FILES THAT EXISTED BEFORE THE AGENT RAN (fixtures — never touch):\n"
|
|
250
|
+
f"{self.initial_file_listing}\n\n"
|
|
251
|
+
"NEW FILES TO EVALUATE FOR CLEANUP:\n"
|
|
252
|
+
+ "\n".join(new_files) + "\n\n"
|
|
253
|
+
"RULES:\n"
|
|
254
|
+
"1. Files in the INITIAL listing are FIXTURES — never remove them.\n"
|
|
255
|
+
"2. Deliverables named or implied by the task spec — never remove them.\n"
|
|
256
|
+
"3. Everything else is a scratch artifact — move it to .frankcode/quarantine/ using mv.\n"
|
|
257
|
+
"4. When UNSURE whether a file is a deliverable, KEEP it.\n\n"
|
|
258
|
+
"Execute mv commands to move artifacts to .frankcode/quarantine/. "
|
|
259
|
+
"Do NOT use rm. Do NOT run tests, modify deliverables, or write any new files."
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
await self._run_opencode_session(
|
|
263
|
+
cleanup_prompt,
|
|
264
|
+
self.setup.logs_dir / "cleanup.txt",
|
|
265
|
+
config_name="opencode-verify.json",
|
|
266
|
+
timeout_seconds=60
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
async def _run_verification(self, instruction: str, iteration: int) -> Tuple[bool, str]:
|
|
270
|
+
verify_prompt = (
|
|
271
|
+
"You are an ADVERSARIAL VERIFICATION agent. Your job is to independently "
|
|
272
|
+
"verify the deliverables — not by re-running the implementation, but by "
|
|
273
|
+
"designing your own tests that check the answer from a different angle.\n\n"
|
|
274
|
+
f"ORIGINAL TASK SPEC:\n{instruction}\n\n"
|
|
275
|
+
"VERIFICATION PROCESS:\n"
|
|
276
|
+
"1. Read the deliverable files AND the implementation code/scripts to understand what approach was used.\n"
|
|
277
|
+
"2. Extract every testable condition from the task spec.\n"
|
|
278
|
+
"3. For each condition, write a verification script that checks the answer using a DIFFERENT method than the implementation.\n"
|
|
279
|
+
"4. If ANY of your independent tests produces a mismatch, you MUST report NEEDS_FIX.\n"
|
|
280
|
+
"5. Do NOT modify any files in the task directory. Put all your verification scripts in .frankcode/verify/.\n"
|
|
281
|
+
"6. Produce your verdict as the LAST line of output:\n"
|
|
282
|
+
" - VERIFIED\n"
|
|
283
|
+
" - NEEDS_FIX: followed by a detailed description\n"
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
log_path = self.setup.logs_dir / f"verify_{iteration}.txt"
|
|
287
|
+
await self._run_opencode_session(
|
|
288
|
+
verify_prompt,
|
|
289
|
+
log_path,
|
|
290
|
+
config_name="opencode-verify.json",
|
|
291
|
+
timeout_seconds=300
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
if not log_path.exists():
|
|
295
|
+
return False, "Verification log not found"
|
|
296
|
+
|
|
297
|
+
with open(log_path, "r") as f:
|
|
298
|
+
raw_tail = f.read()[-5000:]
|
|
299
|
+
|
|
300
|
+
verdict = self._extract_verdict(raw_tail)
|
|
301
|
+
return verdict == "VERIFIED", raw_tail
|
|
302
|
+
|
|
303
|
+
async def _run_fix(self, instruction: str, feedback: str, iteration: int):
|
|
304
|
+
fix_prompt = (
|
|
305
|
+
"A previous agent attempted this task but the solution may have bugs. "
|
|
306
|
+
"The implementation files are already in place.\n\n"
|
|
307
|
+
f"ORIGINAL TASK:\n{instruction}\n\n"
|
|
308
|
+
f"VERIFICATION RESULT:\n{feedback[-2000:]}\n\n"
|
|
309
|
+
"Read the existing files, understand what's failing, and fix it. "
|
|
310
|
+
"Test your fix before declaring complete."
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
await self._run_opencode_session(
|
|
314
|
+
fix_prompt,
|
|
315
|
+
self.setup.logs_dir / f"fix_{iteration}.txt",
|
|
316
|
+
config_name="opencode-verify.json",
|
|
317
|
+
timeout_seconds=300
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
@staticmethod
|
|
321
|
+
def _extract_verdict(raw_tail: str) -> str:
|
|
322
|
+
for line in reversed(raw_tail.splitlines()):
|
|
323
|
+
line = line.strip()
|
|
324
|
+
if not line:
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
if '"text"' in line or '"output"' in line:
|
|
328
|
+
text_matches = re.findall(r'"(?:text|output)"\s*:\s*"((?:[^"\\]|\\.)*)"', line)
|
|
329
|
+
for text_value in reversed(text_matches):
|
|
330
|
+
text_value = text_value.replace("\\n", "\n").replace('\\"', '"')
|
|
331
|
+
for verdict_line in reversed(text_value.splitlines()):
|
|
332
|
+
verdict_line = verdict_line.strip()
|
|
333
|
+
if verdict_line.startswith("VERIFIED"):
|
|
334
|
+
return "VERIFIED"
|
|
335
|
+
if verdict_line.startswith("NEEDS_FIX"):
|
|
336
|
+
return "NEEDS_FIX"
|
|
337
|
+
|
|
338
|
+
if line.startswith("VERIFIED"):
|
|
339
|
+
return "VERIFIED"
|
|
340
|
+
if line.startswith("NEEDS_FIX"):
|
|
341
|
+
return "NEEDS_FIX"
|
|
342
|
+
|
|
343
|
+
return "UNKNOWN"
|
frankcode/setup.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Dict, List, Any
|
|
7
|
+
from .environment import LocalEnvironment
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
class FrankSetup:
|
|
12
|
+
def __init__(self, env: LocalEnvironment, work_dir: str):
|
|
13
|
+
self.env = env
|
|
14
|
+
self.work_dir = Path(work_dir)
|
|
15
|
+
self.frank_dir = self.work_dir / ".frankcode"
|
|
16
|
+
self.logs_dir = self.frank_dir / "logs"
|
|
17
|
+
self.quarantine_dir = self.frank_dir / "quarantine"
|
|
18
|
+
|
|
19
|
+
async def ensure_directories(self):
|
|
20
|
+
self.frank_dir.mkdir(parents=True, exist_ok=True)
|
|
21
|
+
self.logs_dir.mkdir(parents=True, exist_ok=True)
|
|
22
|
+
self.quarantine_dir.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
|
|
24
|
+
def get_os(self) -> str:
|
|
25
|
+
if sys.platform == "darwin":
|
|
26
|
+
return "macos"
|
|
27
|
+
return "linux"
|
|
28
|
+
|
|
29
|
+
async def install_plugins(self):
|
|
30
|
+
plugins = [
|
|
31
|
+
"opencode-anti-loop",
|
|
32
|
+
"opencode-auto-reviewer",
|
|
33
|
+
"opencode-context-warning",
|
|
34
|
+
"opencode-deadline-pressure",
|
|
35
|
+
"opencode-heartbeat",
|
|
36
|
+
"opencode-ledger"
|
|
37
|
+
]
|
|
38
|
+
logger.info(f"Installing plugins via npm into {self.frank_dir}...")
|
|
39
|
+
cmd = f"npm install --prefix {self.frank_dir} {' '.join(plugins)}"
|
|
40
|
+
return await self.env.exec(cmd)
|
|
41
|
+
|
|
42
|
+
async def inject_plugins_into_system_configs(self):
|
|
43
|
+
# We find ~/.config/opencode
|
|
44
|
+
config_dir = Path.home() / ".config" / "opencode"
|
|
45
|
+
if not config_dir.exists():
|
|
46
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
|
|
48
|
+
plugin_paths = [
|
|
49
|
+
str(self.frank_dir.resolve() / "node_modules" / "opencode-anti-loop"),
|
|
50
|
+
str(self.frank_dir.resolve() / "node_modules" / "opencode-auto-reviewer"),
|
|
51
|
+
str(self.frank_dir.resolve() / "node_modules" / "opencode-context-warning"),
|
|
52
|
+
str(self.frank_dir.resolve() / "node_modules" / "opencode-deadline-pressure"),
|
|
53
|
+
str(self.frank_dir.resolve() / "node_modules" / "opencode-heartbeat"),
|
|
54
|
+
str(self.frank_dir.resolve() / "node_modules" / "opencode-ledger")
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
# 1. Inject into main opencode.json
|
|
58
|
+
main_config_path = config_dir / "opencode.json"
|
|
59
|
+
main_config = {}
|
|
60
|
+
if main_config_path.exists():
|
|
61
|
+
try:
|
|
62
|
+
with open(main_config_path, "r") as f:
|
|
63
|
+
main_config = json.load(f)
|
|
64
|
+
except Exception as e:
|
|
65
|
+
logger.warning(f"Failed to read {main_config_path}: {e}")
|
|
66
|
+
|
|
67
|
+
existing_plugins = main_config.get("plugin", [])
|
|
68
|
+
if not isinstance(existing_plugins, list):
|
|
69
|
+
existing_plugins = [existing_plugins]
|
|
70
|
+
|
|
71
|
+
# Deduplicate
|
|
72
|
+
final_plugins = existing_plugins.copy()
|
|
73
|
+
for p in plugin_paths:
|
|
74
|
+
if p not in final_plugins:
|
|
75
|
+
final_plugins.append(p)
|
|
76
|
+
|
|
77
|
+
main_config["plugin"] = final_plugins
|
|
78
|
+
|
|
79
|
+
with open(main_config_path, "w") as f:
|
|
80
|
+
json.dump(main_config, f, indent=2)
|
|
81
|
+
|
|
82
|
+
# 2. Inject into opencode-verify.json if it exists
|
|
83
|
+
verify_config_path = config_dir / "opencode-verify.json"
|
|
84
|
+
if verify_config_path.exists():
|
|
85
|
+
try:
|
|
86
|
+
with open(verify_config_path, "r") as f:
|
|
87
|
+
verify_config = json.load(f)
|
|
88
|
+
|
|
89
|
+
v_existing = verify_config.get("plugin", [])
|
|
90
|
+
if not isinstance(v_existing, list):
|
|
91
|
+
v_existing = [v_existing]
|
|
92
|
+
|
|
93
|
+
v_final = v_existing.copy()
|
|
94
|
+
for p in plugin_paths:
|
|
95
|
+
if p not in v_final:
|
|
96
|
+
v_final.append(p)
|
|
97
|
+
|
|
98
|
+
verify_config["plugin"] = v_final
|
|
99
|
+
with open(verify_config_path, "w") as f:
|
|
100
|
+
json.dump(verify_config, f, indent=2)
|
|
101
|
+
except Exception as e:
|
|
102
|
+
logger.warning(f"Failed to read/write {verify_config_path}: {e}")
|
|
103
|
+
|
|
104
|
+
# 3. Create a backup of the original injected main config so we can restore it after verify passes
|
|
105
|
+
backup_path = config_dir / "opencode-main.json"
|
|
106
|
+
with open(backup_path, "w") as f:
|
|
107
|
+
json.dump(main_config, f, indent=2)
|
|
108
|
+
|
|
109
|
+
# We do not use preinstall_deps in CLI anymore because it lacks root permissions.
|
|
110
|
+
# It is handled in agent.py now.
|
|
111
|
+
async def preinstall_deps(self, instruction: str):
|
|
112
|
+
pass
|
frankcode/utils.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
def check_error_loop(hash_history: List[str], threshold: int = 3) -> bool:
|
|
5
|
+
if len(hash_history) < threshold:
|
|
6
|
+
return False
|
|
7
|
+
|
|
8
|
+
last_hash = hash_history[-1]
|
|
9
|
+
streak = 0
|
|
10
|
+
for h in reversed(hash_history):
|
|
11
|
+
if h == last_hash:
|
|
12
|
+
streak += 1
|
|
13
|
+
else:
|
|
14
|
+
break
|
|
15
|
+
|
|
16
|
+
return streak >= threshold
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: frankcode
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Advanced multi-agent orchestrator for OpenCode
|
|
5
|
+
Author: joeyism
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/joeyism/frankcode
|
|
8
|
+
Project-URL: Repository, https://github.com/joeyism/frankcode
|
|
9
|
+
Project-URL: Issues, https://github.com/joeyism/frankcode/issues
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Intended Audience :: Developers
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: typer[all]>=0.9.0
|
|
21
|
+
Requires-Dist: rich>=13.0.0
|
|
22
|
+
Requires-Dist: pydantic>=2.0.0
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# FrankCode
|
|
26
|
+
|
|
27
|
+
Advanced multi-agent orchestrator for OpenCode.
|
|
28
|
+
|
|
29
|
+
FrankCode acts as a wrapper and orchestration layer around OpenCode, managing specialized agent setups, safety plugins, verification passes, and environment configurations to accomplish complex development tasks safely and robustly.
|
|
30
|
+
|
|
31
|
+
## Requirements
|
|
32
|
+
|
|
33
|
+
Before installing FrankCode, ensure you have the following prerequisites installed on your system:
|
|
34
|
+
|
|
35
|
+
- **Python**: `>= 3.10`
|
|
36
|
+
- **Node.js & npm**: Required to install OpenCode plugins and manage the environment dynamically. FrankCode will attempt to automatically resolve Node via `nvm` if available.
|
|
37
|
+
- **OpenCode**: Make sure the `opencode` CLI is installed and available in your `PATH`.
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
You can install FrankCode directly via pip:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install frankcode
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Alternatively, to run from source:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
git clone https://github.com/joeyism/frankcode.git
|
|
51
|
+
cd frankcode
|
|
52
|
+
pip install -e .
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Usage
|
|
56
|
+
|
|
57
|
+
FrankCode provides a simple CLI. To execute a task using the default multi-agent orchestrator:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
frankcode "Create a new React component that fetches data from an API and displays it in a table"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Options
|
|
64
|
+
|
|
65
|
+
- `--model`: Specify the LLM model to use (default: `google/gemini-3.1-pro-preview`).
|
|
66
|
+
- `--verbose`, `-v`: Show debug logs.
|
|
67
|
+
- `--no-verify`: Disable the autonomous verification and fix passes.
|
|
68
|
+
- `--clean`: Wipe the `.frankcode` execution environment before starting.
|
|
69
|
+
|
|
70
|
+
Example:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
frankcode "Refactor the auth logic to use JWT" --model openai/gpt-5.4 --verbose
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## How It Works
|
|
77
|
+
|
|
78
|
+
1. **Environment Setup**: FrankCode creates an isolated `.frankcode` directory to store logs, quarantine artifacts, and install specific OpenCode plugins dynamically.
|
|
79
|
+
2. **Execution**: Your prompt is passed into an enhanced OpenCode session. FrankCode actively monitors the process for infinite error loops or idle timeouts.
|
|
80
|
+
3. **Verification Loop (Optional but enabled by default)**: After the main task completes, an adversarial validation agent reads the outputs and devises independent tests. If the tests fail, FrankCode triggers a "fix pass" and repeats the verification.
|
|
81
|
+
4. **Cleanup**: Temporary artifacts generated during agent reasoning are quarantined.
|
|
82
|
+
|
|
83
|
+
## License
|
|
84
|
+
|
|
85
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
frankcode/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
frankcode/cli.py,sha256=xp8tCCuEIU3thx9eYLdMxv5_Wi9vkzeGpUQLeigSDDE,2297
|
|
3
|
+
frankcode/environment.py,sha256=IdRl2Evqo5sU1x472diLBUvA3laWpw1L3urDadD1QlQ,1504
|
|
4
|
+
frankcode/orchestrator.py,sha256=eSw5Cypz7seWM0QeDDqKGaseQDRfsOAE1EzQMVDcdAA,13767
|
|
5
|
+
frankcode/setup.py,sha256=VrX8S6J_v5y9YOj3DKTAGmVjb_kJNzmeSy99i_uhaZg,4472
|
|
6
|
+
frankcode/utils.py,sha256=_AyIggd0_X6omVlGh4QGdfjG82b2DawjlI6jiD24vkk,391
|
|
7
|
+
frankcode/configs/oh-my-opencode-slim.json,sha256=oQmCCxQrGGGyCaRkQfDCSbndDvqw4bdnyZxsiJul5X0,1638
|
|
8
|
+
frankcode/configs/opencode-verify.json,sha256=vsPenxhDqXS3ZSLTLkU00ng_MHmxkI-HSjn1rA2PApI,3218
|
|
9
|
+
frankcode/configs/opencode.json,sha256=yj0WO6sFU4GCciYUBWjzvvfqrBh869doeOC2Pp5EI1Y,3
|
|
10
|
+
frankcode/configs/prompt-rules.json,sha256=kRbDHHkr9cefWRblRMaPU7rTvmSVnKD2iIJT2zK5P3I,16102
|
|
11
|
+
frankcode-0.1.0.dist-info/licenses/LICENSE,sha256=Efkiyt9ImfICPljcBeVuDTPhI2oOwih_twu6quBoePI,1063
|
|
12
|
+
frankcode-0.1.0.dist-info/METADATA,sha256=rL0ZyA-KiACm3S9OYwSNu7DGvLGUkce78g0orIo5G3g,3141
|
|
13
|
+
frankcode-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
14
|
+
frankcode-0.1.0.dist-info/entry_points.txt,sha256=womlESdSPcIMJppDxJ5VPkAc1J2GBQkAnegxRcAIJXo,48
|
|
15
|
+
frankcode-0.1.0.dist-info/top_level.txt,sha256=bQRmX59A1cyP_gMp7iHbbIQ2yRLrxQsTtlGRcI3W_9E,10
|
|
16
|
+
frankcode-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 joeyism
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
frankcode
|