agentic-python-coder 2.2.1__tar.gz → 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/.gitignore +2 -3
  2. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/PKG-INFO +39 -27
  3. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/README.md +29 -13
  4. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/prompts/system.md +12 -6
  5. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/prompts/system_todo.md +12 -6
  6. agentic_python_coder-3.0.0/coder/src/agentic_python_coder/__init__.py +89 -0
  7. agentic_python_coder-3.0.0/coder/src/agentic_python_coder/agent.py +367 -0
  8. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/cli.py +17 -7
  9. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/cpmpy/cpmpy.md +12 -3
  10. agentic_python_coder-3.0.0/coder/src/agentic_python_coder/kernel.py +634 -0
  11. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/llm.py +46 -36
  12. agentic_python_coder-3.0.0/coder/src/agentic_python_coder/mcp_server.py +531 -0
  13. agentic_python_coder-3.0.0/coder/src/agentic_python_coder/models/gemini3pro.json +8 -0
  14. agentic_python_coder-2.2.1/coder/src/agentic_python_coder/models/gpt5.json → agentic_python_coder-3.0.0/coder/src/agentic_python_coder/models/gpt52.json +2 -2
  15. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/runner.py +82 -5
  16. agentic_python_coder-3.0.0/coder/src/agentic_python_coder/tools.py +350 -0
  17. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/pyproject.toml +11 -17
  18. agentic_python_coder-2.2.1/coder/src/agentic_python_coder/__init__.py +0 -39
  19. agentic_python_coder-2.2.1/coder/src/agentic_python_coder/agent.py +0 -366
  20. agentic_python_coder-2.2.1/coder/src/agentic_python_coder/kernel.py +0 -343
  21. agentic_python_coder-2.2.1/coder/src/agentic_python_coder/mcp_server.py +0 -485
  22. agentic_python_coder-2.2.1/coder/src/agentic_python_coder/tools.py +0 -231
  23. agentic_python_coder-2.2.1/coder/tests/test_kernel.py +0 -193
  24. agentic_python_coder-2.2.1/coder/tests/test_library_api.py +0 -159
  25. agentic_python_coder-2.2.1/coder/tests/test_mcp_server.py +0 -327
  26. agentic_python_coder-2.2.1/coder/tests/test_todo_flag_integration.py +0 -128
  27. agentic_python_coder-2.2.1/coder/tests/test_todo_tool_availability.py +0 -51
  28. agentic_python_coder-2.2.1/examples/clingo/README.md +0 -81
  29. agentic_python_coder-2.2.1/examples/clingo/clingo.md +0 -1220
  30. agentic_python_coder-2.2.1/examples/clingo/sample_tasks/bird_reasoning.md +0 -49
  31. agentic_python_coder-2.2.1/examples/clingo/sample_tasks/diagnosis.md +0 -57
  32. agentic_python_coder-2.2.1/examples/clingo/sample_tasks/simple_coloring.md +0 -26
  33. agentic_python_coder-2.2.1/examples/clingo/sample_tasks/stable_marriage.md +0 -26
  34. agentic_python_coder-2.2.1/examples/clingo/sample_tasks/sudoku_mini.md +0 -28
  35. agentic_python_coder-2.2.1/examples/cpmpy/README.md +0 -69
  36. agentic_python_coder-2.2.1/examples/cpmpy/cpmpy.md +0 -46
  37. agentic_python_coder-2.2.1/examples/cpmpy/sample_problems/magic_square.md +0 -46
  38. agentic_python_coder-2.2.1/examples/cpmpy/sample_problems/n_queens.md +0 -36
  39. agentic_python_coder-2.2.1/examples/regex/README.md +0 -66
  40. agentic_python_coder-2.2.1/examples/regex/regex.md +0 -114
  41. agentic_python_coder-2.2.1/examples/regex/sample_tasks/email_extraction.md +0 -31
  42. agentic_python_coder-2.2.1/examples/regex/sample_tasks/phone_validation.md +0 -34
  43. agentic_python_coder-2.2.1/examples/regex/sample_tasks/test_email/email_extractor.py +0 -77
  44. agentic_python_coder-2.2.1/examples/regex/sample_tasks/test_email/extracted_emails.txt +0 -7
  45. agentic_python_coder-2.2.1/examples/regex/sample_tasks/test_email/text.txt +0 -1
  46. agentic_python_coder-2.2.1/examples/regex/sample_tasks/url_parsing.md +0 -62
  47. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/LICENSE +0 -0
  48. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/.gitignore +0 -0
  49. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/__init__.py +0 -0
  50. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/clingo/README.md +0 -0
  51. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/clingo/clingo.md +0 -0
  52. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/clingo/sample_tasks/bird_reasoning.md +0 -0
  53. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/clingo/sample_tasks/diagnosis.md +0 -0
  54. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/clingo/sample_tasks/simple_coloring.md +0 -0
  55. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/clingo/sample_tasks/stable_marriage.md +0 -0
  56. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/clingo/sample_tasks/sudoku_mini.md +0 -0
  57. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/cpmpy/README.md +0 -0
  58. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/cpmpy/sample_tasks/magic_square.md +0 -0
  59. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/cpmpy/sample_tasks/n_queens.md +0 -0
  60. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/regex/README.md +0 -0
  61. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/regex/regex.md +0 -0
  62. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/regex/sample_tasks/email_extraction.md +0 -0
  63. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/regex/sample_tasks/phone_validation.md +0 -0
  64. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/examples/regex/sample_tasks/url_parsing.md +0 -0
  65. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/models/deepseek31.json +0 -0
  66. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/models/gemini25.json +0 -0
  67. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/models/grok41.json +0 -0
  68. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/models/opus45.json +0 -0
  69. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/models/qwen3.json +0 -0
  70. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/models/sonnet45.json +0 -0
  71. {agentic_python_coder-2.2.1 → agentic_python_coder-3.0.0}/coder/src/agentic_python_coder/project_md.py +0 -0
@@ -152,8 +152,10 @@ cython_debug/
152
152
  uv.lock
153
153
 
154
154
  # Project specific
155
+ tests/
155
156
  coder-examples/
156
157
  CLAUDE-archive.md
158
+ EXPERTISE/
157
159
  PROCESS_NOTES.md
158
160
  conversation_log.json
159
161
  coder_output.log
@@ -171,9 +173,6 @@ PAPER/
171
173
  PAPER-ASP/
172
174
  ZEBRA/
173
175
  .mcp.json
174
- examples/cpmpy/cpmpy_v*.md
175
- examples/cpmpy/cpmpy.md.backup-*
176
- examples/clingo/clingo_v*.md
177
176
 
178
177
  # Test files and folders (root level only)
179
178
  /test-*/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agentic-python-coder
3
- Version: 2.2.1
3
+ Version: 3.0.0
4
4
  Summary: A lightweight Python coding agent that writes, executes, and iterates on code through natural language instructions
5
5
  Author: Stefan Szeider
6
6
  License: Apache-2.0
@@ -14,25 +14,21 @@ Classifier: Programming Language :: Python :: 3.13
14
14
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
15
  Classifier: Topic :: Software Development :: Code Generators
16
16
  Requires-Python: <3.14,>=3.13
17
- Requires-Dist: ipykernel>=6.30.1
18
- Requires-Dist: jupyter-client>=8.6.3
19
- Requires-Dist: langchain-anthropic>=1.2.0
20
- Requires-Dist: langchain-core>=1.1.0
21
- Requires-Dist: langchain-experimental>=0.4.0
22
- Requires-Dist: langchain-openai>=1.1.0
23
- Requires-Dist: langgraph>=1.0.4
24
- Requires-Dist: mcp>=1.0.0
17
+ Requires-Dist: ipykernel>=7.1.0
18
+ Requires-Dist: jupyter-client>=8.8.0
19
+ Requires-Dist: mcp>=1.26.0
20
+ Requires-Dist: openai>=2.16.0
25
21
  Requires-Dist: python-dotenv>=1.2.1
26
22
  Requires-Dist: pyyaml>=6.0.3
27
- Requires-Dist: rich>=14.2.0
23
+ Requires-Dist: rich>=14.3.1
28
24
  Provides-Extra: dev
29
- Requires-Dist: mypy>=1.19.0; extra == 'dev'
30
- Requires-Dist: ruff>=0.14.7; extra == 'dev'
25
+ Requires-Dist: mypy>=1.19.1; extra == 'dev'
26
+ Requires-Dist: ruff>=0.14.14; extra == 'dev'
31
27
  Provides-Extra: test
32
- Requires-Dist: pytest-asyncio>=1.2.0; extra == 'test'
28
+ Requires-Dist: pytest-asyncio>=1.3.0; extra == 'test'
33
29
  Requires-Dist: pytest-cov>=7.0.0; extra == 'test'
34
30
  Requires-Dist: pytest-watch>=4.2.0; extra == 'test'
35
- Requires-Dist: pytest>=9.0.1; extra == 'test'
31
+ Requires-Dist: pytest>=9.0.2; extra == 'test'
36
32
  Description-Content-Type: text/markdown
37
33
 
38
34
  # Agentic Python Coder
@@ -41,11 +37,10 @@ Description-Content-Type: text/markdown
41
37
  [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
42
38
  [![MCP Compatible](https://img.shields.io/badge/MCP-Compatible-green.svg)](https://modelcontextprotocol.io/)
43
39
  [![UV](https://img.shields.io/badge/Packaged%20with-UV-purple)](https://github.com/astral-sh/uv)
44
- [![LangGraph](https://img.shields.io/badge/Built%20with-LangGraph-green)](https://github.com/langchain-ai/langgraph)
45
40
 
46
41
  This package provides two utilities for Python code execution:
47
42
 
48
- 1. **coder** — An autonomous coding agent using the ReAct framework (CLI + Python library)
43
+ 1. **coder** — An autonomous coding agent using the ReAct pattern (CLI + Python library)
49
44
  2. **ipython_mcp** — An MCP server that gives any MCP-compatible client (Claude Desktop, etc.) Python execution capability
50
45
 
51
46
  Both share a persistent IPython kernel for stateful code execution.
@@ -156,8 +151,9 @@ coder --model opus45 "task" # Claude Opus 4.5
156
151
  coder --model deepseek31 "task" # DeepSeek v3.1
157
152
  coder --model grok41 "task" # X.AI Grok 4.1
158
153
  coder --model qwen3 "task" # Qwen3 Coder
159
- coder --model gemini25 "task" # Gemini Pro 2.5
160
- coder --model gpt5 "task" # GPT-5
154
+ coder --model gemini25 "task" # Gemini Pro 2.5
155
+ coder --model gemini3pro "task" # Gemini 3 Pro Preview
156
+ coder --model gpt52 "task" # GPT-5.2
161
157
 
162
158
  # Custom model (JSON file)
163
159
  coder --model ./mymodel.json "task"
@@ -165,16 +161,17 @@ coder --model ./mymodel.json "task"
165
161
 
166
162
  ### Project Templates
167
163
 
168
- Domain-specific templates improve results:
164
+ Domain-specific templates improve results. Bundled examples are available on GitHub at [`coder/src/agentic_python_coder/examples/`](coder/src/agentic_python_coder/examples/). Use `--init` to copy them locally:
169
165
 
170
166
  ```bash
171
- # Initialize example templates (creates coder-examples/ directory)
167
+ # Copy all bundled examples to coder-examples/
172
168
  coder --init
173
169
 
174
- # Constraint programming with CPMpy
175
- coder --with cpmpy --project coder-examples/cpmpy/cpmpy.md "Solve 8-queens"
170
+ # Or copy a specific template
171
+ coder --init cpmpy
176
172
 
177
- # Answer Set Programming with Clingo
173
+ # Then use with your task
174
+ coder --with cpmpy --project coder-examples/cpmpy/cpmpy.md "Solve 8-queens"
178
175
  coder --with clingo --project coder-examples/clingo/clingo.md "Model bird flight"
179
176
  ```
180
177
 
@@ -262,7 +259,7 @@ from agentic_python_coder import get_openrouter_llm, list_available_models
262
259
 
263
260
  llm = get_openrouter_llm(model="sonnet45")
264
261
  print(list_available_models())
265
- # ['deepseek31', 'gemini25', 'gpt5', 'grok41', 'opus45', 'qwen3', 'sonnet45']
262
+ # ['deepseek31', 'gemini25', 'gemini3pro', 'gpt52', 'grok41', 'opus45', 'qwen3', 'sonnet45']
266
263
  ```
267
264
 
268
265
  ---
@@ -291,17 +288,32 @@ Add to your MCP settings (e.g., `~/.claude/claude_desktop_config.json` or projec
291
288
  | Tool | Description |
292
289
  |------|-------------|
293
290
  | `python_exec` | Execute Python code. Auto-starts session if needed. Default 30s timeout. |
294
- | `python_reset` | Clear session state. Optionally install packages (e.g., `packages=["numpy", "pandas"]`). |
295
- | `python_status` | Check if session is active, Python version, installed packages, defined variables. |
291
+ | `python_reset` | Create new kernel (no `kernel_id`) OR reset existing kernel (with `kernel_id`). Optionally install packages. |
292
+ | `python_status` | Check session state: active flag, all active kernel IDs, Python version, packages, variables. |
296
293
  | `python_interrupt` | Send interrupt signal to stop long-running code. Session state is preserved. |
297
294
 
295
+ ### Multi-Agent Workflow
296
+
297
+ For parallel agents, each agent gets its own kernel:
298
+
299
+ ```
300
+ Agent A Agent B
301
+ ──────── ────────
302
+ python_reset() → kernel_id="aaa" python_reset() → kernel_id="bbb"
303
+ python_exec(kernel_id="aaa", ...) python_exec(kernel_id="bbb", ...)
304
+ python_exec(kernel_id="aaa", ...) python_exec(kernel_id="bbb", ...)
305
+ ```
306
+
307
+ Simple single-agent use: just call `python_exec()` — the default kernel auto-starts.
308
+
298
309
  ### Features
299
310
 
300
311
  - **Persistent state**: Variables, imports, and definitions persist across executions
301
- - **Auto-start**: Session starts automatically on first `python_exec`
312
+ - **Auto-start**: Default session starts automatically on first `python_exec`
302
313
  - **Package installation**: Use `python_reset` with `packages` parameter to install dependencies
303
314
  - **Timeout handling**: Long-running code times out gracefully (session preserved)
304
315
  - **Interrupt support**: Stop runaway code without losing session state
316
+ - **Multi-kernel**: Each `python_reset()` creates an isolated kernel for parallel agents
305
317
 
306
318
  ### Usage Tips
307
319
 
@@ -4,11 +4,10 @@
4
4
  [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
5
5
  [![MCP Compatible](https://img.shields.io/badge/MCP-Compatible-green.svg)](https://modelcontextprotocol.io/)
6
6
  [![UV](https://img.shields.io/badge/Packaged%20with-UV-purple)](https://github.com/astral-sh/uv)
7
- [![LangGraph](https://img.shields.io/badge/Built%20with-LangGraph-green)](https://github.com/langchain-ai/langgraph)
8
7
 
9
8
  This package provides two utilities for Python code execution:
10
9
 
11
- 1. **coder** — An autonomous coding agent using the ReAct framework (CLI + Python library)
10
+ 1. **coder** — An autonomous coding agent using the ReAct pattern (CLI + Python library)
12
11
  2. **ipython_mcp** — An MCP server that gives any MCP-compatible client (Claude Desktop, etc.) Python execution capability
13
12
 
14
13
  Both share a persistent IPython kernel for stateful code execution.
@@ -119,8 +118,9 @@ coder --model opus45 "task" # Claude Opus 4.5
119
118
  coder --model deepseek31 "task" # DeepSeek v3.1
120
119
  coder --model grok41 "task" # X.AI Grok 4.1
121
120
  coder --model qwen3 "task" # Qwen3 Coder
122
- coder --model gemini25 "task" # Gemini Pro 2.5
123
- coder --model gpt5 "task" # GPT-5
121
+ coder --model gemini25 "task" # Gemini Pro 2.5
122
+ coder --model gemini3pro "task" # Gemini 3 Pro Preview
123
+ coder --model gpt52 "task" # GPT-5.2
124
124
 
125
125
  # Custom model (JSON file)
126
126
  coder --model ./mymodel.json "task"
@@ -128,16 +128,17 @@ coder --model ./mymodel.json "task"
128
128
 
129
129
  ### Project Templates
130
130
 
131
- Domain-specific templates improve results:
131
+ Domain-specific templates improve results. Bundled examples are available on GitHub at [`coder/src/agentic_python_coder/examples/`](coder/src/agentic_python_coder/examples/). Use `--init` to copy them locally:
132
132
 
133
133
  ```bash
134
- # Initialize example templates (creates coder-examples/ directory)
134
+ # Copy all bundled examples to coder-examples/
135
135
  coder --init
136
136
 
137
- # Constraint programming with CPMpy
138
- coder --with cpmpy --project coder-examples/cpmpy/cpmpy.md "Solve 8-queens"
137
+ # Or copy a specific template
138
+ coder --init cpmpy
139
139
 
140
- # Answer Set Programming with Clingo
140
+ # Then use with your task
141
+ coder --with cpmpy --project coder-examples/cpmpy/cpmpy.md "Solve 8-queens"
141
142
  coder --with clingo --project coder-examples/clingo/clingo.md "Model bird flight"
142
143
  ```
143
144
 
@@ -225,7 +226,7 @@ from agentic_python_coder import get_openrouter_llm, list_available_models
225
226
 
226
227
  llm = get_openrouter_llm(model="sonnet45")
227
228
  print(list_available_models())
228
- # ['deepseek31', 'gemini25', 'gpt5', 'grok41', 'opus45', 'qwen3', 'sonnet45']
229
+ # ['deepseek31', 'gemini25', 'gemini3pro', 'gpt52', 'grok41', 'opus45', 'qwen3', 'sonnet45']
229
230
  ```
230
231
 
231
232
  ---
@@ -254,17 +255,32 @@ Add to your MCP settings (e.g., `~/.claude/claude_desktop_config.json` or projec
254
255
  | Tool | Description |
255
256
  |------|-------------|
256
257
  | `python_exec` | Execute Python code. Auto-starts session if needed. Default 30s timeout. |
257
- | `python_reset` | Clear session state. Optionally install packages (e.g., `packages=["numpy", "pandas"]`). |
258
- | `python_status` | Check if session is active, Python version, installed packages, defined variables. |
258
+ | `python_reset` | Create new kernel (no `kernel_id`) OR reset existing kernel (with `kernel_id`). Optionally install packages. |
259
+ | `python_status` | Check session state: active flag, all active kernel IDs, Python version, packages, variables. |
259
260
  | `python_interrupt` | Send interrupt signal to stop long-running code. Session state is preserved. |
260
261
 
262
+ ### Multi-Agent Workflow
263
+
264
+ For parallel agents, each agent gets its own kernel:
265
+
266
+ ```
267
+ Agent A Agent B
268
+ ──────── ────────
269
+ python_reset() → kernel_id="aaa" python_reset() → kernel_id="bbb"
270
+ python_exec(kernel_id="aaa", ...) python_exec(kernel_id="bbb", ...)
271
+ python_exec(kernel_id="aaa", ...) python_exec(kernel_id="bbb", ...)
272
+ ```
273
+
274
+ Simple single-agent use: just call `python_exec()` — the default kernel auto-starts.
275
+
261
276
  ### Features
262
277
 
263
278
  - **Persistent state**: Variables, imports, and definitions persist across executions
264
- - **Auto-start**: Session starts automatically on first `python_exec`
279
+ - **Auto-start**: Default session starts automatically on first `python_exec`
265
280
  - **Package installation**: Use `python_reset` with `packages` parameter to install dependencies
266
281
  - **Timeout handling**: Long-running code times out gracefully (session preserved)
267
282
  - **Interrupt support**: Stop runaway code without losing session state
283
+ - **Multi-kernel**: Each `python_reset()` creates an isolated kernel for parallel agents
268
284
 
269
285
  ### Usage Tips
270
286
 
@@ -59,8 +59,13 @@ Build solutions incrementally:
59
59
  ## Important Guidelines
60
60
 
61
61
  1. **Focus on the Task**: Complete what's requested, nothing more
62
- 2. **Test Efficiently**: One or two test cases are usually sufficient
63
- 3. **Save Once**: Call save_code only when you have the final code
62
+ 2. **Verify Before Saving**: Before calling save_code, you MUST verify your solution:
63
+ - Execute the full script via python_exec and confirm it produces correct output
64
+ - For constraint/logic problems: write a verification function that checks the output against EVERY constraint in the problem statement using plain Python asserts, independent of your solver model
65
+ - For problems with a specific output format: assert that JSON keys, array shapes, and value ranges match the spec exactly
66
+ - For optimization: confirm optimality (e.g., re-solve with a stricter bound and confirm infeasibility)
67
+ - Do NOT trust that solver.solve()==True means your model is correct — your constraints may be wrong
68
+ 3. **Save Once**: Call save_code only after verification passes
64
69
  4. **Stop When Done**: Don't add features not requested
65
70
 
66
71
  ## Error Recovery
@@ -83,9 +88,10 @@ Before saving any code with save_code, your script MUST pass this checklist:
83
88
  ## Task Completion
84
89
 
85
90
  When finishing:
86
- 1. Verify the solution works correctly
87
- 2. Clean the code according to the **Code Cleaning Requirements** above
88
- 3. Call save_code with the complete, cleaned code
89
- 4. STOP - do not continue unless asked
91
+ 1. Execute the full solution and verify it produces correct, complete output
92
+ 2. For logic/constraint problems: run an independent verification that checks every constraint
93
+ 3. Clean the code according to the **Code Cleaning Requirements** above
94
+ 4. Call save_code with the complete, cleaned code
95
+ 5. STOP - do not continue unless asked
90
96
 
91
97
  Your goal is efficient, focused problem-solving.
@@ -76,8 +76,13 @@ Build solutions incrementally:
76
76
 
77
77
  1. **Todo List is Mandatory**: ALWAYS use todo_write after understanding the problem
78
78
  2. **Focus on the Task**: Complete what's requested, nothing more
79
- 3. **Test Efficiently**: One or two test cases are usually sufficient
80
- 4. **Save Once**: Call save_code only when you have the final code
79
+ 3. **Verify Before Saving**: Before calling save_code, you MUST verify your solution:
80
+ - Execute the full script via python_exec and confirm it produces correct output
81
+ - For constraint/logic problems: write a verification function that checks the output against EVERY constraint in the problem statement using plain Python asserts, independent of your solver model
82
+ - For problems with a specific output format: assert that JSON keys, array shapes, and value ranges match the spec exactly
83
+ - For optimization: confirm optimality (e.g., re-solve with a stricter bound and confirm infeasibility)
84
+ - Do NOT trust that solver.solve()==True means your model is correct — your constraints may be wrong
85
+ 4. **Save Once**: Call save_code only after verification passes
81
86
  5. **Stop When Done**: Don't add features not requested
82
87
 
83
88
  ## Error Recovery
@@ -101,10 +106,11 @@ Before saving any code with save_code, your script MUST pass this checklist:
101
106
 
102
107
  When finishing (these should be your final todo items):
103
108
  1. Ensure all todo items are marked as completed
104
- 2. Verify the solution works correctly
105
- 3. Clean the code according to the **Code Cleaning Requirements** above
106
- 4. Call save_code with the complete, cleaned code (final todo item)
107
- 5. STOP - do not continue unless asked
109
+ 2. Execute the full solution and verify it produces correct, complete output
110
+ 3. For logic/constraint problems: run an independent verification that checks every constraint
111
+ 4. Clean the code according to the **Code Cleaning Requirements** above
112
+ 5. Call save_code with the complete, cleaned code (final todo item)
113
+ 6. STOP - do not continue unless asked
108
114
 
109
115
  Note: Your todo list should show a clear progression from planning through completion.
110
116
 
@@ -0,0 +1,89 @@
1
+ """Python Coding Agent - A minimal coding assistant using direct OpenAI API and OpenRouter."""
2
+
3
+ __version__ = "3.0.0"
4
+
5
+ # High-level API (recommended for most users)
6
+ from agentic_python_coder.runner import solve_task
7
+
8
+ # Lower-level API (for custom workflows)
9
+ from agentic_python_coder.agent import (
10
+ CodingAgent,
11
+ create_coding_agent,
12
+ run_agent,
13
+ get_final_response,
14
+ DEFAULT_STEP_LIMIT,
15
+ )
16
+
17
+ # LLM utilities
18
+ from agentic_python_coder.llm import (
19
+ LLMConfig,
20
+ get_openrouter_llm,
21
+ load_model_config,
22
+ list_available_models,
23
+ DEFAULT_MODEL,
24
+ )
25
+
26
+ # Tool system
27
+ from agentic_python_coder.tools import (
28
+ Tool,
29
+ ToolRegistry,
30
+ create_tool_registry,
31
+ )
32
+
33
+ # Kernel management (multi-kernel API)
34
+ from agentic_python_coder.kernel import (
35
+ # Core functions
36
+ create_kernel,
37
+ execute_in_kernel,
38
+ shutdown_kernel_by_id,
39
+ interrupt_kernel_by_id,
40
+ restart_kernel,
41
+ # Query functions
42
+ list_kernels,
43
+ kernel_exists,
44
+ get_kernel_info,
45
+ shutdown_all_kernels,
46
+ # Backward compat
47
+ get_kernel,
48
+ shutdown_kernel,
49
+ # Constants
50
+ DEFAULT_KERNEL_ID,
51
+ MAX_KERNELS,
52
+ )
53
+
54
+ __all__ = [
55
+ # Version
56
+ "__version__",
57
+ # High-level
58
+ "solve_task",
59
+ # Low-level agent
60
+ "CodingAgent",
61
+ "create_coding_agent",
62
+ "run_agent",
63
+ "get_final_response",
64
+ "DEFAULT_STEP_LIMIT",
65
+ # LLM
66
+ "LLMConfig",
67
+ "get_openrouter_llm",
68
+ "load_model_config",
69
+ "list_available_models",
70
+ "DEFAULT_MODEL",
71
+ # Tool system
72
+ "Tool",
73
+ "ToolRegistry",
74
+ "create_tool_registry",
75
+ # Kernel management
76
+ "create_kernel",
77
+ "execute_in_kernel",
78
+ "shutdown_kernel_by_id",
79
+ "interrupt_kernel_by_id",
80
+ "restart_kernel",
81
+ "list_kernels",
82
+ "kernel_exists",
83
+ "get_kernel_info",
84
+ "shutdown_all_kernels",
85
+ "get_kernel",
86
+ "shutdown_kernel",
87
+ "DEFAULT_KERNEL_ID",
88
+ "MAX_KERNELS",
89
+ ]