harness-evolver 3.0.4 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -64,7 +64,34 @@ Based on your strategy and diagnosis, modify the code:
64
64
  - **Error handling**: retry logic, fallback strategies, timeout handling
65
65
  - **Model selection**: which model for which task
66
66
 
67
- Use Context7 MCP tools (`resolve-library-id`, `get-library-docs`) to check current API documentation before writing code that uses library APIs.
67
+ ### Phase 3.5: Consult Documentation (MANDATORY)
68
+
69
+ **Before writing ANY code**, you MUST consult Context7 for every library you'll be modifying or using. This is NOT optional.
70
+
71
+ **Step 1 — Identify libraries from the code you read:**
72
+ Read the imports in the files you're about to modify. For each framework/library (LangGraph, OpenAI, Anthropic, CrewAI, etc.):
73
+
74
+ **Step 2 — Resolve library ID:**
75
+ ```
76
+ resolve-library-id(libraryName: "langgraph", query: "what you're trying to do")
77
+ ```
78
+ This returns up to 10 matches. Pick the one with the highest relevance.
79
+
80
+ **Step 3 — Query docs for your specific task:**
81
+ ```
82
+ get-library-docs(libraryId: "/langchain-ai/langgraph", query: "conditional edges StateGraph", topic: "routing")
83
+ ```
84
+ Ask about the SPECIFIC API you're going to use or change.
85
+
86
+ **Examples of what to query:**
87
+ - About to modify a StateGraph? → `query: "StateGraph add_conditional_edges"`
88
+ - Changing prompt template? → `query: "ChatPromptTemplate from_messages"` for langchain
89
+ - Adding a tool? → `query: "StructuredTool create tool definition"` for langchain
90
+ - Changing model? → `query: "ChatOpenAI model parameters temperature"` for openai
91
+
92
+ **Why this matters:** Your training data may be outdated. Libraries change APIs between versions. A quick Context7 lookup takes seconds and prevents proposing code that uses deprecated or incorrect patterns. The documentation is the source of truth, not your model knowledge.
93
+
94
+ **If Context7 MCP is not available:** Note in proposal.md "API patterns not verified against current docs — verify before deploying."
68
95
 
69
96
  ### Phase 4: Commit and Document
70
97
 
@@ -99,15 +126,6 @@ If `production_seed.json` exists:
99
126
 
100
127
  Prioritize changes that fix real production failures over synthetic test failures.
101
128
 
102
- ## Context7 — Documentation Lookup
103
-
104
- Use Context7 MCP tools proactively when:
105
- - Writing code that uses a library API
106
- - Unsure about method signatures or patterns
107
- - Checking if a better approach exists in the latest version
108
-
109
- If Context7 is not available, proceed with model knowledge but note in proposal.md.
110
-
111
129
  ## Rules
112
130
 
113
131
  1. **Read before writing** — understand the code before changing it
package/bin/install.js CHANGED
@@ -190,28 +190,58 @@ function installTools() {
190
190
  }
191
191
 
192
192
  function installPythonDeps() {
193
- console.log(`\n ${YELLOW}Installing Python dependencies...${RESET}`);
194
-
195
- // Try multiple pip variants
196
- const commands = [
197
- "pip install langsmith openevals",
198
- "uv pip install langsmith openevals",
199
- "pip3 install langsmith openevals",
200
- "python3 -m pip install langsmith openevals",
193
+ const venvDir = path.join(HOME, ".evolver", "venv");
194
+ const venvPython = path.join(venvDir, "bin", "python");
195
+ const venvPip = path.join(venvDir, "bin", "pip");
196
+
197
+ console.log(`\n ${YELLOW}Setting up Python environment...${RESET}`);
198
+
199
+ // Create venv if it doesn't exist
200
+ if (!fs.existsSync(venvPython)) {
201
+ console.log(` Creating isolated venv at ~/.evolver/venv/`);
202
+ const venvCommands = [
203
+ `uv venv "${venvDir}"`,
204
+ `python3 -m venv "${venvDir}"`,
205
+ ];
206
+ let created = false;
207
+ for (const cmd of venvCommands) {
208
+ try {
209
+ execSync(cmd, { stdio: "pipe", timeout: 30000 });
210
+ created = true;
211
+ break;
212
+ } catch {
213
+ continue;
214
+ }
215
+ }
216
+ if (!created) {
217
+ console.log(` ${RED}Failed to create venv.${RESET}`);
218
+ console.log(` Run manually: ${BOLD}python3 -m venv ~/.evolver/venv${RESET}`);
219
+ return false;
220
+ }
221
+ console.log(` ${GREEN}✓${RESET} venv created`);
222
+ } else {
223
+ console.log(` ${GREEN}✓${RESET} venv exists at ~/.evolver/venv/`);
224
+ }
225
+
226
+ // Install/upgrade deps in the venv
227
+ const installCommands = [
228
+ `uv pip install --python "${venvPython}" langsmith openevals`,
229
+ `"${venvPip}" install --upgrade langsmith openevals`,
230
+ `"${venvPython}" -m pip install --upgrade langsmith openevals`,
201
231
  ];
202
232
 
203
- for (const cmd of commands) {
233
+ for (const cmd of installCommands) {
204
234
  try {
205
235
  execSync(cmd, { stdio: "pipe", timeout: 120000 });
206
- console.log(` ${GREEN}✓${RESET} langsmith + openevals installed`);
236
+ console.log(` ${GREEN}✓${RESET} langsmith + openevals installed in venv`);
207
237
  return true;
208
238
  } catch {
209
239
  continue;
210
240
  }
211
241
  }
212
242
 
213
- console.log(` ${YELLOW}!${RESET} Could not auto-install Python packages.`);
214
- console.log(` Run manually: ${BOLD}pip install langsmith openevals${RESET}`);
243
+ console.log(` ${YELLOW}!${RESET} Could not install packages in venv.`);
244
+ console.log(` Run manually: ${BOLD}~/.evolver/venv/bin/pip install langsmith openevals${RESET}`);
215
245
  return false;
216
246
  }
217
247
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "harness-evolver",
3
- "version": "3.0.4",
3
+ "version": "3.0.6",
4
4
  "description": "LangSmith-native autonomous agent optimization for Claude Code",
5
5
  "author": "Raphael Valdetaro",
6
6
  "license": "MIT",
@@ -13,12 +13,15 @@ Run the autonomous propose-evaluate-iterate loop using LangSmith as the evaluati
13
13
 
14
14
  `.evolver.json` must exist. If not, tell user to run `evolver:setup`.
15
15
 
16
- ## Resolve Tool Path
16
+ ## Resolve Tool Path and Python
17
17
 
18
18
  ```bash
19
19
  TOOLS=$([ -d ".evolver/tools" ] && echo ".evolver/tools" || echo "$HOME/.evolver/tools")
20
+ EVOLVER_PY=$([ -f "$HOME/.evolver/venv/bin/python" ] && echo "$HOME/.evolver/venv/bin/python" || echo "python3")
20
21
  ```
21
22
 
23
+ Use `$EVOLVER_PY` instead of `python3` for ALL tool invocations.
24
+
22
25
  ## Parse Arguments
23
26
 
24
27
  - `--iterations N` (default: from interactive question or 5)
@@ -76,7 +79,7 @@ Run trace insights from the best experiment:
76
79
 
77
80
  ```bash
78
81
  BEST=$(python3 -c "import json; print(json.load(open('.evolver.json'))['best_experiment'])")
79
- python3 $TOOLS/trace_insights.py \
82
+ $EVOLVER_PY $TOOLS/trace_insights.py \
80
83
  --from-experiment "$BEST" \
81
84
  --output trace_insights.json 2>/dev/null
82
85
  ```
@@ -86,7 +89,7 @@ If a production project is configured, also gather production insights:
86
89
  ```bash
87
90
  PROD=$(python3 -c "import json; c=json.load(open('.evolver.json')); print(c.get('production_project',''))")
88
91
  if [ -n "$PROD" ] && [ ! -f "production_seed.json" ]; then
89
- python3 $TOOLS/seed_from_traces.py \
92
+ $EVOLVER_PY $TOOLS/seed_from_traces.py \
90
93
  --project "$PROD" --use-sdk \
91
94
  --output-md production_seed.md \
92
95
  --output-json production_seed.json \
@@ -99,7 +102,7 @@ fi
99
102
  Read the best experiment results and cluster failures:
100
103
 
101
104
  ```bash
102
- python3 $TOOLS/read_results.py \
105
+ $EVOLVER_PY $TOOLS/read_results.py \
103
106
  --experiment "$BEST" \
104
107
  --config .evolver.json \
105
108
  --output best_results.json 2>/dev/null
@@ -174,7 +177,7 @@ Wait for all 5 to complete.
174
177
  For each worktree that has changes (proposer committed something):
175
178
 
176
179
  ```bash
177
- python3 $TOOLS/run_eval.py \
180
+ $EVOLVER_PY $TOOLS/run_eval.py \
178
181
  --config .evolver.json \
179
182
  --worktree-path {worktree_path} \
180
183
  --experiment-prefix v{NNN}{suffix} \
@@ -186,7 +189,7 @@ Each candidate becomes a separate LangSmith experiment.
186
189
  ### 4. Compare All Candidates
187
190
 
188
191
  ```bash
189
- python3 $TOOLS/read_results.py \
192
+ $EVOLVER_PY $TOOLS/read_results.py \
190
193
  --experiments "v{NNN}a,v{NNN}b,v{NNN}c,v{NNN}d,v{NNN}e" \
191
194
  --config .evolver.json \
192
195
  --output comparison.json
@@ -35,35 +35,34 @@ If `MISSING`: "Set your LangSmith API key: `export LANGSMITH_API_KEY=lsv2_pt_...
35
35
 
36
36
  The tools auto-load the key from the credentials file, but the env var takes precedence.
37
37
 
38
- Python 3.10+ with `langsmith` and `openevals` packages must be installed:
39
-
40
- ```bash
41
- pip install langsmith openevals 2>/dev/null || uv pip install langsmith openevals
42
- ```
43
-
44
- ## Resolve Tool Path
38
+ ## Resolve Tool Path and Python
45
39
 
46
40
  ```bash
47
41
  TOOLS=$([ -d ".evolver/tools" ] && echo ".evolver/tools" || echo "$HOME/.evolver/tools")
42
+ EVOLVER_PY=$([ -f "$HOME/.evolver/venv/bin/python" ] && echo "$HOME/.evolver/venv/bin/python" || echo "python3")
48
43
  ```
49
44
 
45
+ Use `$EVOLVER_PY` instead of `python3` for ALL tool invocations. This ensures the venv with langsmith+openevals is used.
46
+
50
47
  ## Phase 1: Explore Project (automatic)
51
48
 
52
49
  ```bash
53
- find . -maxdepth 3 -type f -name "*.py" | head -30
54
- python3 $TOOLS/detect_stack.py .
50
+ find . -maxdepth 3 -type f -name "*.py" -not -path "*/.venv/*" -not -path "*/node_modules/*" -not -path "*/__pycache__/*" | head -30
55
51
  ```
56
52
 
53
+ **Monorepo detection**: if the project root has multiple subdirectories with their own `main.py` or `pyproject.toml`, it's a monorepo. Use AskUserQuestion to ask WHICH app to optimize before proceeding — do NOT scan everything.
54
+
57
55
  Look for:
58
56
  - Entry points: files with `if __name__`, or named `main.py`, `app.py`, `agent.py`, `graph.py`, `pipeline.py`
59
- - Framework: LangGraph, CrewAI, OpenAI SDK, Anthropic SDK, etc.
60
57
  - Existing LangSmith config: `LANGCHAIN_PROJECT` / `LANGSMITH_PROJECT` in env or `.env`
61
58
  - Existing test data: JSON files with inputs, CSV files, etc.
62
59
  - Dependencies: `requirements.txt`, `pyproject.toml`
63
60
 
64
- Identify the **run command**how to execute the agent. Examples:
61
+ To identify the **framework**, read the entry point file and its immediate imports. The proposer agents will use Context7 MCP for detailed documentation lookup you don't need to detect every library, just identify the main framework (LangGraph, CrewAI, OpenAI Agents SDK, etc.) from the imports you see.
62
+
63
+ Identify the **run command** — how to execute the agent:
65
64
  - `python main.py` (if it accepts `--input` flag)
66
- - `python -c "from agent import run; import json,sys; print(json.dumps(run(json.load(open(sys.argv[1])))))"`
65
+ - The command in the project's README, Makefile, or scripts/
67
66
 
68
67
  ## Phase 2: Confirm Detection (interactive)
69
68
 
@@ -145,7 +144,7 @@ If "I have test data": ask for file path.
145
144
  Build the setup.py command based on all gathered information:
146
145
 
147
146
  ```bash
148
- python3 $TOOLS/setup.py \
147
+ $EVOLVER_PY $TOOLS/setup.py \
149
148
  --project-name "{project_name}" \
150
149
  --entry-point "{run_command}" \
151
150
  --framework "{framework}" \
@@ -1,173 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Detect the technology stack of a harness by analyzing Python imports via AST.
3
-
4
- Usage:
5
- detect_stack.py <file_or_directory> [-o output.json]
6
-
7
- Maps imports to known libraries and their Context7 IDs for documentation lookup.
8
- Stdlib-only. No external dependencies.
9
- """
10
-
11
- import ast
12
- import json
13
- import os
14
- import sys
15
-
16
- KNOWN_LIBRARIES = {
17
- "langchain": {
18
- "context7_id": "/langchain-ai/langchain",
19
- "display": "LangChain",
20
- "modules": ["langchain", "langchain_core", "langchain_openai",
21
- "langchain_anthropic", "langchain_community"],
22
- },
23
- "langgraph": {
24
- "context7_id": "/langchain-ai/langgraph",
25
- "display": "LangGraph",
26
- "modules": ["langgraph"],
27
- },
28
- "llamaindex": {
29
- "context7_id": "/run-llama/llama_index",
30
- "display": "LlamaIndex",
31
- "modules": ["llama_index"],
32
- },
33
- "openai": {
34
- "context7_id": "/openai/openai-python",
35
- "display": "OpenAI Python SDK",
36
- "modules": ["openai"],
37
- },
38
- "anthropic": {
39
- "context7_id": "/anthropics/anthropic-sdk-python",
40
- "display": "Anthropic Python SDK",
41
- "modules": ["anthropic"],
42
- },
43
- "dspy": {
44
- "context7_id": "/stanfordnlp/dspy",
45
- "display": "DSPy",
46
- "modules": ["dspy"],
47
- },
48
- "crewai": {
49
- "context7_id": "/crewAIInc/crewAI",
50
- "display": "CrewAI",
51
- "modules": ["crewai"],
52
- },
53
- "autogen": {
54
- "context7_id": "/microsoft/autogen",
55
- "display": "AutoGen",
56
- "modules": ["autogen"],
57
- },
58
- "chromadb": {
59
- "context7_id": "/chroma-core/chroma",
60
- "display": "ChromaDB",
61
- "modules": ["chromadb"],
62
- },
63
- "pinecone": {
64
- "context7_id": "/pinecone-io/pinecone-python-client",
65
- "display": "Pinecone",
66
- "modules": ["pinecone"],
67
- },
68
- "qdrant": {
69
- "context7_id": "/qdrant/qdrant",
70
- "display": "Qdrant",
71
- "modules": ["qdrant_client"],
72
- },
73
- "weaviate": {
74
- "context7_id": "/weaviate/weaviate",
75
- "display": "Weaviate",
76
- "modules": ["weaviate"],
77
- },
78
- "fastapi": {
79
- "context7_id": "/fastapi/fastapi",
80
- "display": "FastAPI",
81
- "modules": ["fastapi"],
82
- },
83
- "flask": {
84
- "context7_id": "/pallets/flask",
85
- "display": "Flask",
86
- "modules": ["flask"],
87
- },
88
- "pydantic": {
89
- "context7_id": "/pydantic/pydantic",
90
- "display": "Pydantic",
91
- "modules": ["pydantic"],
92
- },
93
- "pandas": {
94
- "context7_id": "/pandas-dev/pandas",
95
- "display": "Pandas",
96
- "modules": ["pandas"],
97
- },
98
- "numpy": {
99
- "context7_id": "/numpy/numpy",
100
- "display": "NumPy",
101
- "modules": ["numpy"],
102
- },
103
- }
104
-
105
-
106
- def detect_from_file(filepath):
107
- """Analyze imports of a Python file and return detected stack."""
108
- with open(filepath) as f:
109
- try:
110
- tree = ast.parse(f.read())
111
- except SyntaxError:
112
- return {}
113
-
114
- imports = set()
115
- for node in ast.walk(tree):
116
- if isinstance(node, ast.Import):
117
- for alias in node.names:
118
- imports.add(alias.name.split(".")[0])
119
- elif isinstance(node, ast.ImportFrom):
120
- if node.module:
121
- imports.add(node.module.split(".")[0])
122
-
123
- detected = {}
124
- for lib_key, lib_info in KNOWN_LIBRARIES.items():
125
- found = imports & set(lib_info["modules"])
126
- if found:
127
- detected[lib_key] = {
128
- "context7_id": lib_info["context7_id"],
129
- "display": lib_info["display"],
130
- "modules_found": sorted(found),
131
- }
132
-
133
- return detected
134
-
135
-
136
- def detect_from_directory(directory):
137
- """Analyze all .py files in a directory and consolidate the stack."""
138
- all_detected = {}
139
- for root, dirs, files in os.walk(directory):
140
- for f in files:
141
- if f.endswith(".py"):
142
- filepath = os.path.join(root, f)
143
- file_detected = detect_from_file(filepath)
144
- for lib_key, lib_info in file_detected.items():
145
- if lib_key not in all_detected:
146
- all_detected[lib_key] = lib_info
147
- else:
148
- existing = set(all_detected[lib_key]["modules_found"])
149
- existing.update(lib_info["modules_found"])
150
- all_detected[lib_key]["modules_found"] = sorted(existing)
151
- return all_detected
152
-
153
-
154
- if __name__ == "__main__":
155
- import argparse
156
-
157
- parser = argparse.ArgumentParser(description="Detect stack from Python files")
158
- parser.add_argument("path", help="File or directory to analyze")
159
- parser.add_argument("--output", "-o", help="Output JSON path")
160
- args = parser.parse_args()
161
-
162
- if os.path.isfile(args.path):
163
- result = detect_from_file(args.path)
164
- else:
165
- result = detect_from_directory(args.path)
166
-
167
- output = json.dumps(result, indent=2)
168
-
169
- if args.output:
170
- with open(args.output, "w") as f:
171
- f.write(output)
172
- else:
173
- print(output)