deepparallel 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepparallel-0.2.0/PKG-INFO +128 -0
- deepparallel-0.2.0/README.md +106 -0
- deepparallel-0.2.0/deepparallel/__init__.py +3 -0
- deepparallel-0.2.0/deepparallel/agent.py +286 -0
- deepparallel-0.2.0/deepparallel/backend.py +302 -0
- deepparallel-0.2.0/deepparallel/branding.py +211 -0
- deepparallel-0.2.0/deepparallel/cli.py +569 -0
- deepparallel-0.2.0/deepparallel/config.py +158 -0
- deepparallel-0.2.0/deepparallel/fusion.py +225 -0
- deepparallel-0.2.0/deepparallel/licensing.py +108 -0
- deepparallel-0.2.0/deepparallel/registry.json +13 -0
- deepparallel-0.2.0/deepparallel/renderer.py +222 -0
- deepparallel-0.2.0/deepparallel/system_prompt.txt +4 -0
- deepparallel-0.2.0/deepparallel/tools/__init__.py +27 -0
- deepparallel-0.2.0/deepparallel/tools/codeast.py +171 -0
- deepparallel-0.2.0/deepparallel/tools/edit.py +29 -0
- deepparallel-0.2.0/deepparallel/tools/files.py +74 -0
- deepparallel-0.2.0/deepparallel/tools/registry.py +149 -0
- deepparallel-0.2.0/deepparallel/tools/sandbox.py +110 -0
- deepparallel-0.2.0/deepparallel/tools/search.py +38 -0
- deepparallel-0.2.0/deepparallel/tools/shell.py +38 -0
- deepparallel-0.2.0/deepparallel/tools/vision.py +54 -0
- deepparallel-0.2.0/deepparallel/tools/web.py +76 -0
- deepparallel-0.2.0/deepparallel.egg-info/PKG-INFO +128 -0
- deepparallel-0.2.0/deepparallel.egg-info/SOURCES.txt +48 -0
- deepparallel-0.2.0/deepparallel.egg-info/dependency_links.txt +1 -0
- deepparallel-0.2.0/deepparallel.egg-info/entry_points.txt +3 -0
- deepparallel-0.2.0/deepparallel.egg-info/requires.txt +12 -0
- deepparallel-0.2.0/deepparallel.egg-info/top_level.txt +1 -0
- deepparallel-0.2.0/pyproject.toml +44 -0
- deepparallel-0.2.0/setup.cfg +4 -0
- deepparallel-0.2.0/tests/test_agent.py +385 -0
- deepparallel-0.2.0/tests/test_backend.py +115 -0
- deepparallel-0.2.0/tests/test_backend_chat.py +113 -0
- deepparallel-0.2.0/tests/test_backend_stream.py +110 -0
- deepparallel-0.2.0/tests/test_branding.py +135 -0
- deepparallel-0.2.0/tests/test_cli.py +318 -0
- deepparallel-0.2.0/tests/test_config.py +168 -0
- deepparallel-0.2.0/tests/test_fusion.py +162 -0
- deepparallel-0.2.0/tests/test_licensing.py +98 -0
- deepparallel-0.2.0/tests/test_renderer.py +203 -0
- deepparallel-0.2.0/tests/test_tool_registry.py +89 -0
- deepparallel-0.2.0/tests/test_tools_codeast.py +63 -0
- deepparallel-0.2.0/tests/test_tools_edit.py +30 -0
- deepparallel-0.2.0/tests/test_tools_files.py +56 -0
- deepparallel-0.2.0/tests/test_tools_sandbox.py +59 -0
- deepparallel-0.2.0/tests/test_tools_search.py +33 -0
- deepparallel-0.2.0/tests/test_tools_shell.py +24 -0
- deepparallel-0.2.0/tests/test_tools_vision.py +57 -0
- deepparallel-0.2.0/tests/test_tools_web.py +82 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: deepparallel
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: DeepParallel - a multi-model agentic coding CLI with cross-model Guardian review, served via Crowe Logic.
|
|
5
|
+
Author-email: Michael Crowe <michael@crowelogic.com>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://crowelogic.com
|
|
8
|
+
Keywords: deepparallel,agent,coding-agent,cli,llm,code-review,crowe-logic
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
Requires-Dist: click>=8.1.0
|
|
12
|
+
Requires-Dist: rich>=14.0.0
|
|
13
|
+
Requires-Dist: prompt-toolkit>=3.0.0
|
|
14
|
+
Requires-Dist: httpx>=0.28.0
|
|
15
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
16
|
+
Requires-Dist: tree-sitter>=0.25.0
|
|
17
|
+
Requires-Dist: tree-sitter-language-pack>=1.8.0
|
|
18
|
+
Requires-Dist: cryptography>=42.0.0
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
21
|
+
Requires-Dist: ruff>=0.6.0; extra == "dev"
|
|
22
|
+
|
|
23
|
+
# DeepParallel
|
|
24
|
+
|
|
25
|
+
A focused command-line interface for the DeepParallel model, served via Crowe Logic.
|
|
26
|
+
|
|
27
|
+
## Install
|
|
28
|
+
|
|
29
|
+
uv venv
|
|
30
|
+
uv pip install -p .venv -e .
|
|
31
|
+
|
|
32
|
+
## Configure
|
|
33
|
+
|
|
34
|
+
Set the backend env vars (a `.env` file in the working directory is loaded automatically):
|
|
35
|
+
|
|
36
|
+
# default backend: azure
|
|
37
|
+
AZURE_CORE_ENDPOINT=https://<resource>.openai.azure.com
|
|
38
|
+
AZURE_CORE_API_KEY=<key>
|
|
39
|
+
# optional overrides
|
|
40
|
+
DEEPPARALLEL_API_VERSION=2024-08-01-preview
|
|
41
|
+
|
|
42
|
+
# or route through the Crowe Logic Foundry control plane:
|
|
43
|
+
DEEPPARALLEL_BACKEND=foundry
|
|
44
|
+
FOUNDRY_BASE_URL=https://<control-plane>
|
|
45
|
+
FOUNDRY_API_KEY=<token>
|
|
46
|
+
|
|
47
|
+
## Use
|
|
48
|
+
|
|
49
|
+
deepparallel # interactive agent chat
|
|
50
|
+
deepparallel run "question" # one-shot, pipe-friendly (answer on stdout)
|
|
51
|
+
deepparallel tools # list the agent's tools
|
|
52
|
+
deepparallel info # model + backend status
|
|
53
|
+
deepparallel doctor # diagnose config + reachability
|
|
54
|
+
deepparallel run --no-tools "..." # plain chat, no tools
|
|
55
|
+
deepparallel run --yes "..." # auto-approve tool actions
|
|
56
|
+
|
|
57
|
+
## Fusion (stacking models for stronger output)
|
|
58
|
+
|
|
59
|
+
`deepparallel` can stack the answerer with a separate reasoning model. All modes
|
|
60
|
+
compose hosted backends (no GPU/weight-merging), so they are API-call stacking.
|
|
61
|
+
|
|
62
|
+
deepparallel run --fuse reason "hard question" # reasoner thinks, answerer answers
|
|
63
|
+
deepparallel run --fuse escalate "question" # answer first; escalate to reasoner only if unsure
|
|
64
|
+
deepparallel run --deep "question" # heavy: many models in parallel + a judge (slow)
|
|
65
|
+
deepparallel run --dual "A,B" "question" # compare two deployments side by side
|
|
66
|
+
deepparallel run --dual "A,B" --synth "question" # ...and synthesize a merged answer
|
|
67
|
+
|
|
68
|
+
`--fuse` also works in interactive chat. Set a default with `DEEPPARALLEL_FUSION=reason`.
|
|
69
|
+
|
|
70
|
+
## Fusion-native UX (what single-model agents cannot do)
|
|
71
|
+
|
|
72
|
+
- **Guardian review** - before an edit (`write_file` / `edit_file` / `ast_replace_symbol`)
|
|
73
|
+
is applied, a second model reviews the diff and its verdict
|
|
74
|
+
(`safe` / `risky: ...` / `bug: ...`) is shown in the confirm card. Advisory:
|
|
75
|
+
you still approve. Toggle with `DEEPPARALLEL_GUARDIAN`; pick the reviewer with
|
|
76
|
+
`DEEPPARALLEL_GUARDIAN_DEPLOYMENT`.
|
|
77
|
+
- **Consensus + divergence** - `run --deep` prints a `consensus:` chip from
|
|
78
|
+
cross-model agreement (agreement, not correctness), and on low agreement
|
|
79
|
+
lists the dissenting candidates.
|
|
80
|
+
- **Live dial** - in interactive chat, `/fast`, `/fuse`, `/escalate` switch the
|
|
81
|
+
fusion mode mid-session (shown in the prompt); `/deep` runs the next prompt as
|
|
82
|
+
a multi-model query.
|
|
83
|
+
|
|
84
|
+
## Agent and tools
|
|
85
|
+
|
|
86
|
+
`deepparallel` is an agent: it can call tools to inspect and change your project.
|
|
87
|
+
|
|
88
|
+
- Read-only (run automatically): `read_file`, `list_dir`, `glob`, `grep`,
|
|
89
|
+
`ast_symbols`, `ast_show_symbol`, `web_fetch`, `web_search`, `analyze_image`.
|
|
90
|
+
- Mutating / executing (require confirmation): `write_file`, `edit_file`,
|
|
91
|
+
`ast_replace_symbol`, `run_shell`, `run_code`.
|
|
92
|
+
|
|
93
|
+
`web_search` needs `DEEPPARALLEL_SEARCH_API_KEY`; `analyze_image` works out of the
|
|
94
|
+
box on a multimodal deployment (override with `DEEPPARALLEL_VISION_DEPLOYMENT`).
|
|
95
|
+
|
|
96
|
+
In interactive chat, mutating actions prompt for y/n. In `run` (non-interactive)
|
|
97
|
+
they are denied unless you pass `--yes`. `ast_*` tools are multi-language via
|
|
98
|
+
tree-sitter; `run_code` executes in a Docker sandbox when available, else a
|
|
99
|
+
timeboxed local subprocess.
|
|
100
|
+
|
|
101
|
+
## Configuration reference
|
|
102
|
+
|
|
103
|
+
| Variable | Purpose | Default |
|
|
104
|
+
|---|---|---|
|
|
105
|
+
| `DEEPPARALLEL_BACKEND` | `azure` or `foundry` | `azure` |
|
|
106
|
+
| `AZURE_CORE_ENDPOINT` / `AZURE_CORE_API_KEY` | Azure transport | (required for azure) |
|
|
107
|
+
| `DEEPPARALLEL_API_VERSION` | Azure API version | `2024-08-01-preview` |
|
|
108
|
+
| `FOUNDRY_BASE_URL` / `FOUNDRY_API_KEY` | control-plane transport | (required for foundry) |
|
|
109
|
+
| `DEEPPARALLEL_TEMPERATURE` | default sampling temperature | `0.4` |
|
|
110
|
+
| `DEEPPARALLEL_MAX_TOKENS` | response cap | `2048` |
|
|
111
|
+
| `DEEPPARALLEL_THINK` | surface reasoning stream | `0` (answer-only) |
|
|
112
|
+
| `DEEPPARALLEL_TOOLS` | enable agent tools | `1` (on) |
|
|
113
|
+
| `DEEPPARALLEL_AUTO_APPROVE` | auto-approve mutating tools | `0` (off) |
|
|
114
|
+
| `DEEPPARALLEL_MAX_STEPS` | max agent tool-call rounds | `12` |
|
|
115
|
+
| `DEEPPARALLEL_SHELL_TIMEOUT` | run_shell timeout (s) | `120` |
|
|
116
|
+
| `DEEPPARALLEL_SANDBOX` | `auto` / `docker` / `subprocess` for run_code | `auto` |
|
|
117
|
+
| `DEEPPARALLEL_PLAIN` | force plain (non-rich) output | `0` |
|
|
118
|
+
| `DEEPPARALLEL_FUSION` | default fusion: `off` / `reason` / `escalate` | `off` |
|
|
119
|
+
| `DEEPPARALLEL_REASONER_DEPLOYMENT` | reasoner model for fusion | `DeepSeek-R1-0528` |
|
|
120
|
+
| `DEEPPARALLEL_PARALLEL_MODELS` | comma-separated chains for `--deep` | (three defaults) |
|
|
121
|
+
| `DEEPPARALLEL_JUDGE_DEPLOYMENT` | judge/synthesizer model | the primary deployment |
|
|
122
|
+
| `DEEPPARALLEL_SEARCH_API_KEY` | enables web_search (Brave Search API) | (unset) |
|
|
123
|
+
| `DEEPPARALLEL_SEARCH_URL` | search API endpoint | Brave web search |
|
|
124
|
+
| `DEEPPARALLEL_VISION_DEPLOYMENT` | multimodal model for analyze_image | `Llama-4-Scout` |
|
|
125
|
+
| `DEEPPARALLEL_GUARDIAN` | second-model review of edits before apply | `1` (on) |
|
|
126
|
+
| `DEEPPARALLEL_GUARDIAN_DEPLOYMENT` | the reviewer model | the reasoner |
|
|
127
|
+
|
|
128
|
+
DeepParallel is served via Crowe Logic infrastructure. https://crowelogic.com
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# DeepParallel
|
|
2
|
+
|
|
3
|
+
A focused command-line interface for the DeepParallel model, served via Crowe Logic.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
uv venv
|
|
8
|
+
uv pip install -p .venv -e .
|
|
9
|
+
|
|
10
|
+
## Configure
|
|
11
|
+
|
|
12
|
+
Set the backend env vars (a `.env` file in the working directory is loaded automatically):
|
|
13
|
+
|
|
14
|
+
# default backend: azure
|
|
15
|
+
AZURE_CORE_ENDPOINT=https://<resource>.openai.azure.com
|
|
16
|
+
AZURE_CORE_API_KEY=<key>
|
|
17
|
+
# optional overrides
|
|
18
|
+
DEEPPARALLEL_API_VERSION=2024-08-01-preview
|
|
19
|
+
|
|
20
|
+
# or route through the Crowe Logic Foundry control plane:
|
|
21
|
+
DEEPPARALLEL_BACKEND=foundry
|
|
22
|
+
FOUNDRY_BASE_URL=https://<control-plane>
|
|
23
|
+
FOUNDRY_API_KEY=<token>
|
|
24
|
+
|
|
25
|
+
## Use
|
|
26
|
+
|
|
27
|
+
deepparallel # interactive agent chat
|
|
28
|
+
deepparallel run "question" # one-shot, pipe-friendly (answer on stdout)
|
|
29
|
+
deepparallel tools # list the agent's tools
|
|
30
|
+
deepparallel info # model + backend status
|
|
31
|
+
deepparallel doctor # diagnose config + reachability
|
|
32
|
+
deepparallel run --no-tools "..." # plain chat, no tools
|
|
33
|
+
deepparallel run --yes "..." # auto-approve tool actions
|
|
34
|
+
|
|
35
|
+
## Fusion (stacking models for stronger output)
|
|
36
|
+
|
|
37
|
+
`deepparallel` can stack the answerer with a separate reasoning model. All modes
|
|
38
|
+
compose hosted backends (no GPU/weight-merging), so they are API-call stacking.
|
|
39
|
+
|
|
40
|
+
deepparallel run --fuse reason "hard question" # reasoner thinks, answerer answers
|
|
41
|
+
deepparallel run --fuse escalate "question" # answer first; escalate to reasoner only if unsure
|
|
42
|
+
deepparallel run --deep "question" # heavy: many models in parallel + a judge (slow)
|
|
43
|
+
deepparallel run --dual "A,B" "question" # compare two deployments side by side
|
|
44
|
+
deepparallel run --dual "A,B" --synth "question" # ...and synthesize a merged answer
|
|
45
|
+
|
|
46
|
+
`--fuse` also works in interactive chat. Set a default with `DEEPPARALLEL_FUSION=reason`.
|
|
47
|
+
|
|
48
|
+
## Fusion-native UX (what single-model agents cannot do)
|
|
49
|
+
|
|
50
|
+
- **Guardian review** - before an edit (`write_file` / `edit_file` / `ast_replace_symbol`)
|
|
51
|
+
is applied, a second model reviews the diff and its verdict
|
|
52
|
+
(`safe` / `risky: ...` / `bug: ...`) is shown in the confirm card. Advisory:
|
|
53
|
+
you still approve. Toggle with `DEEPPARALLEL_GUARDIAN`; pick the reviewer with
|
|
54
|
+
`DEEPPARALLEL_GUARDIAN_DEPLOYMENT`.
|
|
55
|
+
- **Consensus + divergence** - `run --deep` prints a `consensus:` chip from
|
|
56
|
+
cross-model agreement (agreement, not correctness), and on low agreement
|
|
57
|
+
lists the dissenting candidates.
|
|
58
|
+
- **Live dial** - in interactive chat, `/fast`, `/fuse`, `/escalate` switch the
|
|
59
|
+
fusion mode mid-session (shown in the prompt); `/deep` runs the next prompt as
|
|
60
|
+
a multi-model query.
|
|
61
|
+
|
|
62
|
+
## Agent and tools
|
|
63
|
+
|
|
64
|
+
`deepparallel` is an agent: it can call tools to inspect and change your project.
|
|
65
|
+
|
|
66
|
+
- Read-only (run automatically): `read_file`, `list_dir`, `glob`, `grep`,
|
|
67
|
+
`ast_symbols`, `ast_show_symbol`, `web_fetch`, `web_search`, `analyze_image`.
|
|
68
|
+
- Mutating / executing (require confirmation): `write_file`, `edit_file`,
|
|
69
|
+
`ast_replace_symbol`, `run_shell`, `run_code`.
|
|
70
|
+
|
|
71
|
+
`web_search` needs `DEEPPARALLEL_SEARCH_API_KEY`; `analyze_image` works out of the
|
|
72
|
+
box on a multimodal deployment (override with `DEEPPARALLEL_VISION_DEPLOYMENT`).
|
|
73
|
+
|
|
74
|
+
In interactive chat, mutating actions prompt for y/n. In `run` (non-interactive)
|
|
75
|
+
they are denied unless you pass `--yes`. `ast_*` tools are multi-language via
|
|
76
|
+
tree-sitter; `run_code` executes in a Docker sandbox when available, else a
|
|
77
|
+
timeboxed local subprocess.
|
|
78
|
+
|
|
79
|
+
## Configuration reference
|
|
80
|
+
|
|
81
|
+
| Variable | Purpose | Default |
|
|
82
|
+
|---|---|---|
|
|
83
|
+
| `DEEPPARALLEL_BACKEND` | `azure` or `foundry` | `azure` |
|
|
84
|
+
| `AZURE_CORE_ENDPOINT` / `AZURE_CORE_API_KEY` | Azure transport | (required for azure) |
|
|
85
|
+
| `DEEPPARALLEL_API_VERSION` | Azure API version | `2024-08-01-preview` |
|
|
86
|
+
| `FOUNDRY_BASE_URL` / `FOUNDRY_API_KEY` | control-plane transport | (required for foundry) |
|
|
87
|
+
| `DEEPPARALLEL_TEMPERATURE` | default sampling temperature | `0.4` |
|
|
88
|
+
| `DEEPPARALLEL_MAX_TOKENS` | response cap | `2048` |
|
|
89
|
+
| `DEEPPARALLEL_THINK` | surface reasoning stream | `0` (answer-only) |
|
|
90
|
+
| `DEEPPARALLEL_TOOLS` | enable agent tools | `1` (on) |
|
|
91
|
+
| `DEEPPARALLEL_AUTO_APPROVE` | auto-approve mutating tools | `0` (off) |
|
|
92
|
+
| `DEEPPARALLEL_MAX_STEPS` | max agent tool-call rounds | `12` |
|
|
93
|
+
| `DEEPPARALLEL_SHELL_TIMEOUT` | run_shell timeout (s) | `120` |
|
|
94
|
+
| `DEEPPARALLEL_SANDBOX` | `auto` / `docker` / `subprocess` for run_code | `auto` |
|
|
95
|
+
| `DEEPPARALLEL_PLAIN` | force plain (non-rich) output | `0` |
|
|
96
|
+
| `DEEPPARALLEL_FUSION` | default fusion: `off` / `reason` / `escalate` | `off` |
|
|
97
|
+
| `DEEPPARALLEL_REASONER_DEPLOYMENT` | reasoner model for fusion | `DeepSeek-R1-0528` |
|
|
98
|
+
| `DEEPPARALLEL_PARALLEL_MODELS` | comma-separated chains for `--deep` | (three defaults) |
|
|
99
|
+
| `DEEPPARALLEL_JUDGE_DEPLOYMENT` | judge/synthesizer model | the primary deployment |
|
|
100
|
+
| `DEEPPARALLEL_SEARCH_API_KEY` | enables web_search (Brave Search API) | (unset) |
|
|
101
|
+
| `DEEPPARALLEL_SEARCH_URL` | search API endpoint | Brave web search |
|
|
102
|
+
| `DEEPPARALLEL_VISION_DEPLOYMENT` | multimodal model for analyze_image | `Llama-4-Scout` |
|
|
103
|
+
| `DEEPPARALLEL_GUARDIAN` | second-model review of edits before apply | `1` (on) |
|
|
104
|
+
| `DEEPPARALLEL_GUARDIAN_DEPLOYMENT` | the reviewer model | the reasoner |
|
|
105
|
+
|
|
106
|
+
DeepParallel is served via Crowe Logic infrastructure. https://crowelogic.com
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""The agentic tool-calling loop.
|
|
2
|
+
|
|
3
|
+
UI-agnostic: it drives a backend (`chat`) and a tool registry, and surfaces
|
|
4
|
+
activity through an injected renderer. Dangerous tools are gated; the loop
|
|
5
|
+
terminates on a content answer or a step cap.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import difflib
|
|
11
|
+
import json
|
|
12
|
+
import time
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
from deepparallel.tools.registry import coerce_args
|
|
16
|
+
|
|
17
|
+
_MAX_TOOL_RESULT = 50_000
|
|
18
|
+
_GATED_PATH_TOOLS = ("write_file", "edit_file")
|
|
19
|
+
_EDIT_TOOLS = ("write_file", "edit_file", "ast_replace_symbol")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _parse_args(raw: str) -> dict:
|
|
23
|
+
if not raw:
|
|
24
|
+
return {}
|
|
25
|
+
try:
|
|
26
|
+
value = json.loads(raw)
|
|
27
|
+
except json.JSONDecodeError:
|
|
28
|
+
return {"__parse_error__": raw[:2000]}
|
|
29
|
+
return value if isinstance(value, dict) else {}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _plural(n: int, sing: str, plur: str) -> str:
|
|
33
|
+
return f"{n} {sing if n == 1 else plur}"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _summarize_result(name: str, result: str) -> str:
|
|
37
|
+
"""A short, human-readable summary of a tool result for the UI.
|
|
38
|
+
|
|
39
|
+
The model still receives the full result; this is only what the user sees
|
|
40
|
+
on the tool card, so it must be legible rather than raw truncated JSON.
|
|
41
|
+
"""
|
|
42
|
+
try:
|
|
43
|
+
obj = json.loads(result)
|
|
44
|
+
except (json.JSONDecodeError, ValueError):
|
|
45
|
+
obj = None
|
|
46
|
+
if isinstance(obj, dict) and "error" in obj:
|
|
47
|
+
return f"error: {str(obj['error'])[:100]}"
|
|
48
|
+
if isinstance(obj, dict):
|
|
49
|
+
if name == "list_dir":
|
|
50
|
+
return _plural(len(obj.get("entries", [])), "entry", "entries")
|
|
51
|
+
if name in ("glob", "grep"):
|
|
52
|
+
return _plural(len(obj.get("matches", [])), "match", "matches")
|
|
53
|
+
if name == "web_search":
|
|
54
|
+
return _plural(len(obj.get("results", [])), "result", "results")
|
|
55
|
+
if name == "ast_symbols":
|
|
56
|
+
return _plural(len(obj.get("symbols", [])), "symbol", "symbols")
|
|
57
|
+
if name == "write_file":
|
|
58
|
+
return f"wrote {obj.get('bytes', 0)} bytes"
|
|
59
|
+
if name == "edit_file":
|
|
60
|
+
return "edited"
|
|
61
|
+
if name == "ast_replace_symbol":
|
|
62
|
+
return "replaced"
|
|
63
|
+
if name == "run_shell":
|
|
64
|
+
lines = (obj.get("stdout") or "").count("\n")
|
|
65
|
+
return f"rc {obj.get('return_code')} · {_plural(lines, 'line', 'lines')}"
|
|
66
|
+
if name == "run_code":
|
|
67
|
+
return f"rc {obj.get('exit_code')} · {obj.get('sandbox', '')}"
|
|
68
|
+
if name == "web_fetch":
|
|
69
|
+
title = (obj.get("title") or "").strip()
|
|
70
|
+
chars = len(obj.get("text") or "")
|
|
71
|
+
return f'"{title[:40]}" · {chars} chars' if title else f"{chars} chars"
|
|
72
|
+
if name == "ast_show_symbol":
|
|
73
|
+
return _plural((obj.get("source") or "").count("\n") + 1, "line", "lines")
|
|
74
|
+
if name == "analyze_image":
|
|
75
|
+
return f"{len(obj.get('description') or '')} chars"
|
|
76
|
+
return next(iter(obj)) if obj else "ok"
|
|
77
|
+
if name == "read_file":
|
|
78
|
+
return _plural(result.count("\n") + 1, "line", "lines") if result else "0 lines"
|
|
79
|
+
return " ".join(result.split())[:80]
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _preview(args: dict) -> str:
|
|
83
|
+
try:
|
|
84
|
+
s = json.dumps(args)
|
|
85
|
+
except (TypeError, ValueError):
|
|
86
|
+
s = str(args)
|
|
87
|
+
return s[:80]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _outside_cwd(args: dict) -> bool:
|
|
91
|
+
p = args.get("file_path")
|
|
92
|
+
if not p:
|
|
93
|
+
return False
|
|
94
|
+
try:
|
|
95
|
+
Path(p).expanduser().resolve().relative_to(Path.cwd().resolve())
|
|
96
|
+
return False
|
|
97
|
+
except ValueError:
|
|
98
|
+
return True
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _diff_preview(name: str, args: dict) -> str:
|
|
102
|
+
path = args.get("file_path", "")
|
|
103
|
+
try:
|
|
104
|
+
target = Path(path).expanduser().resolve()
|
|
105
|
+
old = target.read_text(encoding="utf-8") if target.is_file() else ""
|
|
106
|
+
except OSError:
|
|
107
|
+
old = ""
|
|
108
|
+
if name == "write_file":
|
|
109
|
+
new = args.get("content", "")
|
|
110
|
+
if args.get("content_b64"):
|
|
111
|
+
import base64
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
new = base64.b64decode(args["content_b64"]).decode("utf-8", errors="replace")
|
|
115
|
+
except Exception: # noqa: BLE001
|
|
116
|
+
new = "<base64 payload>"
|
|
117
|
+
else:
|
|
118
|
+
new = old.replace(args.get("old_string", ""), args.get("new_string", ""), 1)
|
|
119
|
+
diff = difflib.unified_diff(old.splitlines(), new.splitlines(), lineterm="", n=2)
|
|
120
|
+
text = "\n".join(list(diff)[:60])
|
|
121
|
+
return text or f"write {path}"
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _describe(name: str, args: dict) -> tuple[str, str]:
|
|
125
|
+
if name == "run_shell":
|
|
126
|
+
cmd = args.get("command", "")
|
|
127
|
+
return (
|
|
128
|
+
f"run shell: {cmd[:60]}",
|
|
129
|
+
f"command: {cmd}\ncwd: {args.get('working_directory') or '.'}",
|
|
130
|
+
)
|
|
131
|
+
if name in _GATED_PATH_TOOLS:
|
|
132
|
+
return f"{name}: {args.get('file_path', '')}", _diff_preview(name, args)
|
|
133
|
+
return name, json.dumps(args)[:500]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _guardian_review_content(name: str, args: dict) -> str:
|
|
137
|
+
if name in _GATED_PATH_TOOLS:
|
|
138
|
+
return _diff_preview(name, args)
|
|
139
|
+
return (
|
|
140
|
+
f"Replace symbol '{args.get('name', '')}' in {args.get('file_path', '')} with:\n"
|
|
141
|
+
f"{(args.get('new_source') or '')[:4000]}"
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def guardian_review(guardian, content: str) -> str | None:
|
|
146
|
+
"""Ask an independent model to review a code change/snippet. Returns a
|
|
147
|
+
one-line verdict ('safe', 'risky: ...', 'bug: ...') or None on failure."""
|
|
148
|
+
prompt = (
|
|
149
|
+
"You are an independent code reviewer giving a second opinion on a proposed "
|
|
150
|
+
"change. Reply with exactly one line: 'VERDICT: safe', 'VERDICT: risky: <reason>', "
|
|
151
|
+
f"or 'VERDICT: bug: <reason>'. Be terse.\n\nProposed change:\n{content}\n\nVerdict:"
|
|
152
|
+
)
|
|
153
|
+
messages = [{"role": "user", "content": prompt}]
|
|
154
|
+
msg = None
|
|
155
|
+
for _ in range(2): # one retry: review runs against a sometimes-flaky API
|
|
156
|
+
try:
|
|
157
|
+
msg = guardian.chat(messages, [], 0.0, 512)
|
|
158
|
+
break
|
|
159
|
+
except Exception: # noqa: BLE001 - review is best-effort
|
|
160
|
+
msg = None
|
|
161
|
+
if msg is None:
|
|
162
|
+
return None
|
|
163
|
+
text = (msg.get("content") or msg.get("reasoning_content") or "").strip()
|
|
164
|
+
for line in text.splitlines():
|
|
165
|
+
s = line.strip()
|
|
166
|
+
if s.upper().startswith("VERDICT:"):
|
|
167
|
+
return s.split(":", 1)[1].strip()
|
|
168
|
+
return text.splitlines()[0][:120] if text else None
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def verdict_severity(verdict: str | None) -> str:
|
|
172
|
+
"""Classify a verdict string into safe | risky | bug | unknown."""
|
|
173
|
+
if not verdict:
|
|
174
|
+
return "unknown"
|
|
175
|
+
head = verdict.strip().lower()
|
|
176
|
+
if head.startswith("safe"):
|
|
177
|
+
return "safe"
|
|
178
|
+
if head.startswith("risky"):
|
|
179
|
+
return "risky"
|
|
180
|
+
if head.startswith("bug"):
|
|
181
|
+
return "bug"
|
|
182
|
+
return "unknown"
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def verdict_exit_code(verdict: str | None) -> int:
|
|
186
|
+
"""Exit code for `review` as a PR gate: 0 safe, 1 risky, 2 bug, 0 unknown."""
|
|
187
|
+
return {"safe": 0, "risky": 1, "bug": 2, "unknown": 0}[verdict_severity(verdict)]
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _guardian_verdict(guardian, name: str, args: dict) -> str | None:
|
|
191
|
+
return guardian_review(guardian, _guardian_review_content(name, args))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _approved(name, args, interactive, auto_approve, renderer, guardian=None) -> bool:
|
|
195
|
+
forced = name in _GATED_PATH_TOOLS and _outside_cwd(args)
|
|
196
|
+
if auto_approve and not forced:
|
|
197
|
+
return True
|
|
198
|
+
if not interactive:
|
|
199
|
+
return False
|
|
200
|
+
title, detail = _describe(name, args)
|
|
201
|
+
if guardian is not None and name in _EDIT_TOOLS:
|
|
202
|
+
verdict = _guardian_verdict(guardian, name, args)
|
|
203
|
+
if verdict:
|
|
204
|
+
detail = f"{detail}\n\nGuardian: {verdict}"
|
|
205
|
+
return renderer.confirm(title, detail)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _stream_turn(backend, messages, schemas, settings, renderer):
|
|
209
|
+
"""Drive one streaming turn: stream content tokens live and capture the
|
|
210
|
+
assembled message (with any tool_calls) from the generator's return value."""
|
|
211
|
+
captured = {"msg": {"role": "assistant", "content": "", "tool_calls": None}}
|
|
212
|
+
|
|
213
|
+
def content_tokens():
|
|
214
|
+
gen = backend.stream_chat_tools(
|
|
215
|
+
messages, schemas, settings.temperature, settings.max_tokens
|
|
216
|
+
)
|
|
217
|
+
while True:
|
|
218
|
+
try:
|
|
219
|
+
channel, text = next(gen)
|
|
220
|
+
except StopIteration as stop:
|
|
221
|
+
captured["msg"] = stop.value
|
|
222
|
+
return
|
|
223
|
+
if channel == "content":
|
|
224
|
+
yield text
|
|
225
|
+
|
|
226
|
+
renderer.answer_stream(content_tokens())
|
|
227
|
+
return captured["msg"], True
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def run_agent(
|
|
231
|
+
backend,
|
|
232
|
+
registry,
|
|
233
|
+
messages,
|
|
234
|
+
settings,
|
|
235
|
+
renderer,
|
|
236
|
+
*,
|
|
237
|
+
interactive: bool,
|
|
238
|
+
auto_approve: bool,
|
|
239
|
+
max_steps: int | None = None,
|
|
240
|
+
stream: bool = False,
|
|
241
|
+
guardian=None,
|
|
242
|
+
) -> str:
|
|
243
|
+
steps = max_steps if max_steps is not None else settings.max_steps
|
|
244
|
+
schemas = registry.schemas()
|
|
245
|
+
can_stream = stream and hasattr(backend, "stream_chat_tools")
|
|
246
|
+
for _ in range(steps):
|
|
247
|
+
if can_stream:
|
|
248
|
+
msg, streamed = _stream_turn(backend, messages, schemas, settings, renderer)
|
|
249
|
+
else:
|
|
250
|
+
msg = backend.chat(messages, schemas, settings.temperature, settings.max_tokens)
|
|
251
|
+
streamed = False
|
|
252
|
+
tool_calls = msg.get("tool_calls")
|
|
253
|
+
if not tool_calls:
|
|
254
|
+
content = msg.get("content") or ""
|
|
255
|
+
messages.append({"role": "assistant", "content": content})
|
|
256
|
+
if not streamed:
|
|
257
|
+
renderer.answer(content)
|
|
258
|
+
return content
|
|
259
|
+
|
|
260
|
+
messages.append(
|
|
261
|
+
{"role": "assistant", "content": msg.get("content"), "tool_calls": tool_calls}
|
|
262
|
+
)
|
|
263
|
+
for tc in tool_calls:
|
|
264
|
+
name = tc["function"]["name"]
|
|
265
|
+
args = _parse_args(tc["function"].get("arguments", ""))
|
|
266
|
+
meta = registry.get(name)
|
|
267
|
+
renderer.tool_start(name, _preview(args))
|
|
268
|
+
start = time.monotonic()
|
|
269
|
+
if meta is None:
|
|
270
|
+
result = json.dumps({"error": f"unknown tool: {name}"})
|
|
271
|
+
elif "__parse_error__" in args:
|
|
272
|
+
result = json.dumps({"error": "invalid JSON arguments"})
|
|
273
|
+
elif meta.dangerous and not _approved(
|
|
274
|
+
name, args, interactive, auto_approve, renderer, guardian
|
|
275
|
+
):
|
|
276
|
+
result = json.dumps({"error": "denied by user"})
|
|
277
|
+
else:
|
|
278
|
+
try:
|
|
279
|
+
result = registry.call(name, **coerce_args(meta.parameters, args))
|
|
280
|
+
except Exception as e: # noqa: BLE001 - surface tool failure to model
|
|
281
|
+
result = json.dumps({"error": f"{type(e).__name__}: {e}"})
|
|
282
|
+
result = str(result)[:_MAX_TOOL_RESULT]
|
|
283
|
+
ok = '"error"' not in result[:30]
|
|
284
|
+
renderer.tool_result(ok, _summarize_result(name, result), time.monotonic() - start)
|
|
285
|
+
messages.append({"role": "tool", "tool_call_id": tc["id"], "content": result})
|
|
286
|
+
return f"Reached the {steps}-step limit without a final answer."
|