nexhelm 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexhelm-0.3.0.dist-info/METADATA +437 -0
- nexhelm-0.3.0.dist-info/RECORD +23 -0
- nexhelm-0.3.0.dist-info/WHEEL +4 -0
- nexhelm-0.3.0.dist-info/entry_points.txt +3 -0
- nexhelm_mcp/__init__.py +3 -0
- nexhelm_mcp/agents/__init__.py +22 -0
- nexhelm_mcp/agents/agent_workspace.py +81 -0
- nexhelm_mcp/agents/agent_workspace_proxy.py +266 -0
- nexhelm_mcp/agents/lab_workflow.py +880 -0
- nexhelm_mcp/agents/tasks.py +255 -0
- nexhelm_mcp/auth.py +130 -0
- nexhelm_mcp/cli.py +3076 -0
- nexhelm_mcp/client.py +133 -0
- nexhelm_mcp/config.py +296 -0
- nexhelm_mcp/github_device.py +84 -0
- nexhelm_mcp/login.py +280 -0
- nexhelm_mcp/server.py +461 -0
- nexhelm_mcp/ssh_config.py +113 -0
- nexhelm_mcp/streaming.py +267 -0
- nexhelm_mcp/supabase_auth.py +98 -0
- nexhelm_mcp/tasks_run.py +420 -0
- nexhelm_mcp/transcript.py +92 -0
- nexhelm_mcp/tunnel.py +75 -0
|
@@ -0,0 +1,437 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nexhelm
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Nexhelm CLI (`nex`). Public client for Nexhelm AI — login, workspaces, workflows. No core IP; the backend is the boundary.
|
|
5
|
+
Author: Nexhelm
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: httpx>=0.27.0
|
|
8
|
+
Requires-Dist: mcp>=1.0.0
|
|
9
|
+
Requires-Dist: pydantic>=2.6.0
|
|
10
|
+
Requires-Dist: rich>=13.7.0
|
|
11
|
+
Requires-Dist: typer>=0.12.0
|
|
12
|
+
Requires-Dist: websockets>=12.0
|
|
13
|
+
Provides-Extra: dev
|
|
14
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == 'dev'
|
|
15
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
16
|
+
Requires-Dist: respx>=0.21.0; extra == 'dev'
|
|
17
|
+
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# nexhelm-mcp
|
|
21
|
+
|
|
22
|
+
CLI + MCP server for Nexhelm. Lets Claude Code (and humans) design, verify,
|
|
23
|
+
and debug Workflow Lab runs end-to-end without leaving the terminal.
|
|
24
|
+
|
|
25
|
+
## Architecture
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
═══════════════ EXTERNAL ════════════════════════════════════════════
|
|
29
|
+
human ──┐
|
|
30
|
+
│ approves at gates, paste transcript
|
|
31
|
+
▼
|
|
32
|
+
Claude Code (external) ─── reads/writes code in repo with approval
|
|
33
|
+
│ stdio
|
|
34
|
+
▼
|
|
35
|
+
═══════════════ SURFACE TIER (this package) ═════════════════════════
|
|
36
|
+
nexhelm-mcp server + nex CLI
|
|
37
|
+
- agent broker, capability scopes, view_urls, exit codes
|
|
38
|
+
- never inlines prompts; never returns LLM internals
|
|
39
|
+
- persona-as-action surface:
|
|
40
|
+
lab_workflow.{integration_engineer, designer, refine, run, verifier, trace, evaluator, show, list}
|
|
41
|
+
│ HTTPS, public schemas only
|
|
42
|
+
▼
|
|
43
|
+
═══════════════ ENGINE TIER (Nexhelm FastAPI, blackbox) ═════════════
|
|
44
|
+
Persona registry (proprietary):
|
|
45
|
+
integration_engineer services/personas/integration_engineer/
|
|
46
|
+
workflow_designer services/workflow_lab/designer.py
|
|
47
|
+
verifier_orchestrator services/workflow_lab/orchestrator.py
|
|
48
|
+
workflow_evaluator services/workflow_lab/evaluator.py
|
|
49
|
+
(future) log_aggregator, firm_wide, code, infra, cfp, ...
|
|
50
|
+
|
|
51
|
+
Each persona owns its prompts (private), model config (private), and
|
|
52
|
+
exposes a stable public input/output schema. The HTTP boundary crosses
|
|
53
|
+
ONLY public schemas. No prompt or chain-of-thought ever reaches the
|
|
54
|
+
surface tier.
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Agent broker, not REST wrapper.** The surface tier is pure transport.
|
|
58
|
+
Each agent in the broker registers via `agents/__init__.py`; the server
|
|
59
|
+
and CLI loop over the registry. Adding a "code agent" or "CFP agent" is
|
|
60
|
+
one new file plus one import.
|
|
61
|
+
|
|
62
|
+
**Persona pattern (engine).** New blackbox personas live in
|
|
63
|
+
`fastapi_app/app/services/personas/<name>/{persona.py, prompts.py,
|
|
64
|
+
schemas.py}`. `prompts.py` is private; `schemas.py` is the public
|
|
65
|
+
contract; routers serialize only the schema.
|
|
66
|
+
|
|
67
|
+
**One-click URLs everywhere.** Every agent return value includes `view_url`
|
|
68
|
+
(and `verification_task_url` where applicable) so Claude prints a link and
|
|
69
|
+
the user clicks straight into the frontend.
|
|
70
|
+
|
|
71
|
+
## Install
|
|
72
|
+
|
|
73
|
+
**Recommended (any user, any machine):** one-shot via `install.sh`:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
bash backend/tools/nexhelm-mcp/install.sh
|
|
77
|
+
# pipx-installs `nex` and `nexhelm-mcp-server` on PATH.
|
|
78
|
+
# Prints the exact Claude Code MCP config block to paste.
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Or manual:**
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
cd backend/tools/nexhelm-mcp
|
|
85
|
+
python3.12 -m venv .venv
|
|
86
|
+
source .venv/bin/activate
|
|
87
|
+
pip install -e ".[dev]"
|
|
88
|
+
pytest -q # 117 passing
|
|
89
|
+
nex --help
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Two entrypoints (`pyproject.toml`):
|
|
93
|
+
|
|
94
|
+
- `nex` — human CLI
|
|
95
|
+
- `nexhelm-mcp-server` — stdio MCP server for Claude Code
|
|
96
|
+
|
|
97
|
+
## Setup — `nex auth setup -e EMAIL` (recommended)
|
|
98
|
+
|
|
99
|
+
Email + password, no browser, no env files. Works on any machine.
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
nex auth setup -e you@nexhelm.ai --backend-url <BACKEND_URL>
|
|
103
|
+
# Password: ****
|
|
104
|
+
# bootstrap: supabase_url=https://<project>.supabase.co; required email domain @nexhelm.ai
|
|
105
|
+
# ok authenticated as you@nexhelm.ai
|
|
106
|
+
#
|
|
107
|
+
# Your clients (12)
|
|
108
|
+
# 1 c_001 Acme Wealth ops@acme.com
|
|
109
|
+
# 2 c_002 Smith FO ...
|
|
110
|
+
# Pick by # (Enter to skip): 2
|
|
111
|
+
# ok Saved to ~/.nex/config (mode 0600)
|
|
112
|
+
|
|
113
|
+
nex doctor
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
What's happening:
|
|
117
|
+
1. Backend's `GET /auth/cli-config` returns the public Supabase URL + anon key
|
|
118
|
+
2. CLI calls Supabase Auth `/token?grant_type=password` with email + password
|
|
119
|
+
3. CLI POSTs the resulting Supabase access token to backend `POST /auth/cli-token`
|
|
120
|
+
4. Backend gates `@nexhelm.ai`, mints a 24h HS256 service JWT, returns
|
|
121
|
+
it + your clients
|
|
122
|
+
5. CLI saves the bundle to `~/.nex/config`
|
|
123
|
+
|
|
124
|
+
No `frontend/.env.local` needed. No shared HMAC secret on your machine.
|
|
125
|
+
|
|
126
|
+
### Optional: `nex auth login` (browser / PKCE)
|
|
127
|
+
|
|
128
|
+
Same end state as `setup -e`, but uses Supabase OAuth (Google by default)
|
|
129
|
+
instead of email + password. Useful if you'd rather not enter your
|
|
130
|
+
password into a CLI. Requires a one-time Supabase dashboard step:
|
|
131
|
+
|
|
132
|
+
- **Auth → Providers**: enable Google (or pass `--provider <name>`)
|
|
133
|
+
- **Auth → URL Configuration → Redirect URLs**: add `http://localhost:7866/callback`
|
|
134
|
+
|
|
135
|
+
If `redirect_to` isn't whitelisted, Supabase falls back to the project's
|
|
136
|
+
"Site URL" and the browser lands somewhere weird (usually the deployed
|
|
137
|
+
frontend behind Vercel SSO). One allowlist entry fixes it for everyone.
|
|
138
|
+
|
|
139
|
+
### Manual fallback: `nex auth setup` (legacy local-mint)
|
|
140
|
+
|
|
141
|
+
Backend devs only. Reads the shared HMAC secret from `frontend/.env.local`
|
|
142
|
+
(or `NEX_*` env vars) and mints JWTs locally per request. No backend
|
|
143
|
+
hop, no Supabase round-trip — but only works for users with that env
|
|
144
|
+
file on disk. Don't ship to the team; tell them to use `setup -e`.
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
nex auth setup
|
|
148
|
+
# Your user_id (Supabase UUID): <paste>
|
|
149
|
+
# Your client_id: <paste>
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
The CLI + the backend now live on the same branch (`marx/nex-cli-phase1`,
|
|
153
|
+
which is rebased on top of `marx/firm-workflow-context-design`). Point
|
|
154
|
+
`--backend-url` at any backend running this branch. Once both merge to
|
|
155
|
+
main/staging, point at staging/main as normal.
|
|
156
|
+
|
|
157
|
+
## Configure Claude Code
|
|
158
|
+
|
|
159
|
+
```jsonc
|
|
160
|
+
{
|
|
161
|
+
"mcpServers": {
|
|
162
|
+
"nexhelm": { "command": "nexhelm-mcp-server" }
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
The MCP server inherits the same `~/.nex/config` and `frontend/.env.local`
|
|
168
|
+
that the CLI uses, so once `nex doctor` is green, Claude Code is good to go.
|
|
169
|
+
|
|
170
|
+
## Sandbox verification
|
|
171
|
+
|
|
172
|
+
Verification runs the agent **as a dedicated sandbox user**, not as you.
|
|
173
|
+
Each org has one sandbox user (a real `user_profiles` row + a real Google
|
|
174
|
+
or SharePoint account, distinct from any human operator). The verifier
|
|
175
|
+
agent's `user_id` is swapped to that sandbox user at `/agent/init` and
|
|
176
|
+
`/agent/chat` time, so every Drive / Gmail / Calendar / CRM call hits the
|
|
177
|
+
sandbox tenant — your own data is untouchable by construction.
|
|
178
|
+
|
|
179
|
+
Before each run, the orchestrator **wipes the sandbox account's Drive (or
|
|
180
|
+
SharePoint scope) and rebuilds it from a blueprint** — folders, docs,
|
|
181
|
+
sheets — recorded on the org's `sandbox_environments` row. After
|
|
182
|
+
`finalise_run` (or `cancel_run`) the same wipe runs again. So fixtures
|
|
183
|
+
are declarative; you don't preload your own Drive.
|
|
184
|
+
|
|
185
|
+
### One-time per org
|
|
186
|
+
|
|
187
|
+
1. Create a real Google / Microsoft account for the sandbox (e.g.
|
|
188
|
+
`sandbox+<orgslug>@nexhelm.ai`); have it sign up through the normal
|
|
189
|
+
user-onboarding flow so a `user_profiles` row exists with the right
|
|
190
|
+
`organization_id`.
|
|
191
|
+
2. From the sandbox account, OAuth-connect Google Drive (or SharePoint)
|
|
192
|
+
via the standard connect-apps flow so `oauth_credentials` is populated
|
|
193
|
+
— `get_fresh_access_token(sandbox_user_id, "google_drive")` must
|
|
194
|
+
succeed for setup/teardown to work.
|
|
195
|
+
3. `POST /workflow-lab/sandbox-client` — creates the dummy `clients` row
|
|
196
|
+
the verifier targets as `target_client_id`.
|
|
197
|
+
4. `POST /workflow-lab/sandbox-environments` with `provider` + a
|
|
198
|
+
`blueprint` JSON describing the directory layout the workflow
|
|
199
|
+
expects, e.g.:
|
|
200
|
+
```jsonc
|
|
201
|
+
{
|
|
202
|
+
"provider": "google",
|
|
203
|
+
"blueprint": {
|
|
204
|
+
"container": { "type": "folder_root", "name_template": "WFL Sandbox — {{organization_name}}" },
|
|
205
|
+
"folders": [{ "path": "Tax Returns 2023" }],
|
|
206
|
+
"docs": [{ "path": "Tax Returns 2023", "title": "Doe_2023_1040", "text": "..." }],
|
|
207
|
+
"sheets": [{ "path": "Tax Returns 2023", "title": "Doe_2023_W2", "rows": [...] }]
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
```
|
|
211
|
+
DB triggers enforce **exactly one sandbox user per org**; supported
|
|
212
|
+
providers are `google` and `sharepoint`.
|
|
213
|
+
|
|
214
|
+
### Per run
|
|
215
|
+
|
|
216
|
+
Pass `-s/--sandbox-user-id` at design time; discover candidates with
|
|
217
|
+
`GET /workflow-lab/sandbox-accounts`:
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
nex workflow designer -u <gdoc> -s <sandbox_user_uuid> # bind run to sandbox user
|
|
221
|
+
nex workflow run <run_id> # setup → drive wiped + rebuilt; agent runs as sandbox user
|
|
222
|
+
nex workflow evaluator <run_id> --verdict approved # cleanup wipes drive again
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Concurrency + post-hoc gate
|
|
226
|
+
|
|
227
|
+
- Only **one active run per sandbox user** (enforced by
|
|
228
|
+
`idx_workflow_lab_runs_active_sandbox_user`). Two SEs in the same org
|
|
229
|
+
cannot verify concurrently; the second `start-verification` is
|
|
230
|
+
rejected.
|
|
231
|
+
- Sandbox isolates blast radius; the evaluator still scans the trace for
|
|
232
|
+
side-effect tools (`send_/create_/update_/delete_/post_/patch_`). If
|
|
233
|
+
any fired and no reviewer verdict was supplied, terminal status is
|
|
234
|
+
`awaiting_human_verify` — a human has to walk the trace before the
|
|
235
|
+
design is stamped `verified`.
|
|
236
|
+
|
|
237
|
+
## CLI commands
|
|
238
|
+
|
|
239
|
+
### Auth
|
|
240
|
+
|
|
241
|
+
| Command | What it does |
|
|
242
|
+
|---|---|
|
|
243
|
+
| `nex auth setup [-e EMAIL] [-b BACKEND_URL] [-f FRONTEND_URL] [--user-id ID] [--client-id ID] [--skip-verify]` | Interactive setup → `~/.nex/config` (mode 0600). With `-e`, does Supabase email/password login + numbered client picker. |
|
|
244
|
+
| `nex auth status` | Show config + probe `/workflow-lab/runs`. |
|
|
245
|
+
| `nex auth clear` | Delete `~/.nex/config`. |
|
|
246
|
+
|
|
247
|
+
### Workflow
|
|
248
|
+
|
|
249
|
+
Persona-as-action grammar. Same names appear as MCP tools (`agents.lab_workflow.<persona>`) — CLI and MCP stay in sync by construction.
|
|
250
|
+
|
|
251
|
+
| Command | What it does |
|
|
252
|
+
|---|---|
|
|
253
|
+
| `nex workflow integration-engineer [-f FILE \| --transcript-text TEXT \| -u GDOC_URL] [--json]` | Run the **integration_engineer** persona: identify integration gaps. Returns capabilities the workflow needs and which are covered by existing Nexhelm tools vs missing. Run **before** `designer` when you suspect new integrations may be required. Powers the engineering loop (see below). |
|
|
254
|
+
| `nex workflow sandbox-accounts [--include-disabled] [--json]` | List sandbox user accounts available to the operator. Surfaces `sandbox_user_id` + email + organization + which providers each sandbox has `sandbox_environments` rows for (note: this is snapshot-scope provisioning, **not** OAuth/API-key connectivity — Wealthbox tokens etc. don't show up here). Use to discover the value for `designer --sandbox-user-id`. |
|
|
255
|
+
| `nex workflow designer [-f FILE \| --transcript-text TEXT \| -u GDOC_URL \| --transcript-doc-id ID] [-s SANDBOX_USER_ID] [--target-client-id ID] [--json]` | Run the **designer** persona on a transcript → returns `run_id`, `view_url`, proposal. The designer prompt favors fully-autonomous workflows (≥80% agent-only steps); HITL pauses are flagged `[HITL: <reason>]` only when there's a hard regulatory reason. **`-s/--sandbox-user-id`** binds the run to a sandbox account; the resulting task_definition lives in the operator's org but the sandbox user gets a direct edit permission so they can resolve it via Refine-with-AI URLs. Discover ids via `nex workflow sandbox-accounts`. |
|
|
256
|
+
| `nex workflow refine RUN_ID -m "FEEDBACK" \| -F FILE [--json]` | Re-run the designer with operator feedback. Produces a NEW run id. The source run is **auto-cancelled** to free the single active-sandbox slot (`idx_workflow_lab_runs_active_sandbox_user`) — no need to `cancel` manually before refining. For sandbox runs that already passed `start_verification`, the auto-cancel routes through full cleanup so adapter state stays symmetric. |
|
|
257
|
+
| `nex workflow run RUN_ID [-k PROMPT] [--as EMAIL] [--json]` | **Autonomous end-to-end.** Auto-fires the verifier (idempotent) then drains `/langchain-agent/chat`. Returns the agent transcript + a `stop_reason` so you see *where* it stopped: `complete` (call `evaluator`), `interaction` (HITL pause — refine to remove, or finish in dashboard), `error` (revise design), `cut_off` (network blip — re-run). Default kickoff prompt instructs the agent to be autonomous and fail loudly rather than ask. **Sandbox runs auto-switch identity:** if the run has `sandbox_user_id`, the backend transparently swaps the chat's `user_id` to the sandbox user (so its OAuth tokens / API keys drive tool calls) **without** forcing `read_only` — sandbox writes are allowed by design. No `--as` needed. **`--as EMAIL`** is still available for non-sandbox impersonation (e.g. debug against a real user's data); always READ-ONLY (write tools filtered). Operator must be in backend's `IMPERSONATION_ALLOWLIST`. |
|
|
258
|
+
| `nex workflow verifier RUN_ID [--json]` | Manual path: spawn a `client_task` only. Prefer `run` for autonomous verification. |
|
|
259
|
+
| `nex workflow trace RUN_ID [--json]` | Run state + verifier conversation rendered inline (role + tool calls). |
|
|
260
|
+
| `nex workflow evaluator RUN_ID [--verifier-thread-id ID] [--verifier-execution-id ID] [--verdict approved\|rejected] [--note TEXT] [--json]` | Run the **evaluator** persona: stamp run with trace ids + verdict. Verdict omitted ⇒ auto-classify. |
|
|
261
|
+
| `nex workflow show RUN_ID [--json]` | Fetch a single run by id. |
|
|
262
|
+
| `nex workflow list [-n LIMIT] [--sandbox-user-id ID] [--active] [--json]` | List runs for the configured user (1–200, default 50). `--sandbox-user-id` filters to runs bound to that sandbox; pair with `--active` to drop terminal-status rows — handy for finding which run is holding the active-sandbox slot. |
|
|
263
|
+
| `nex workflow cancel [RUN_ID] [--sandbox-user-id ID] [--all-active] [--json]` | Cancel a run. Three mutually-exclusive modes: positional `RUN_ID` cancels one specific run; `--sandbox-user-id` cancels every non-terminal run bound to that sandbox; `--all-active` cancels every non-terminal run owned by the operator. Useful as a manual rescue when refine/designer can't auto-resolve (e.g. a run is stuck in `setting_up`). |
|
|
264
|
+
|
|
265
|
+
### Diagnostics
|
|
266
|
+
|
|
267
|
+
| Command | What it does |
|
|
268
|
+
|---|---|
|
|
269
|
+
| `nex doctor` | End-to-end env checks (config, secret, JWT mint, backend reachability). Exit 0 = green. |
|
|
270
|
+
|
|
271
|
+
## MCP tools exposed to Claude Code
|
|
272
|
+
|
|
273
|
+
10 tools, each gated by a `PHASE_1_SCOPES` capability (`auth.py`):
|
|
274
|
+
|
|
275
|
+
| Tool | Args | Notes |
|
|
276
|
+
|---|---|---|
|
|
277
|
+
| `agents.list` | — | unscoped; tool registry browse |
|
|
278
|
+
| `agents.lab_workflow.integration_engineer` | `transcript_text? \| transcript_url?` | gap analysis pre-design |
|
|
279
|
+
| `agents.lab_workflow.designer` | `transcript_text? \| transcript_url? \| transcript_doc_id?`, `target_client_id?` | designer prompt biased toward HITL-free workflows |
|
|
280
|
+
| `agents.lab_workflow.refine` | `run_id`, `feedback` | re-runs designer with feedback; produces NEW run id |
|
|
281
|
+
| `agents.lab_workflow.run` | `run_id`, `kickoff_prompt?`, `as_email?` | **autonomous run.** drains chat stream; returns `events`, `stop_reason`, `thread_id`. long-running. `as_email` impersonates that user (READ-ONLY): operator's email must be in backend `IMPERSONATION_ALLOWLIST`; backend resolves email→user_id; write tools filtered. |
|
|
282
|
+
| `agents.lab_workflow.verifier` | `run_id` | manual path; spawns client_task only |
|
|
283
|
+
| `agents.lab_workflow.trace` | `run_id` | run + verifier conversation |
|
|
284
|
+
| `agents.lab_workflow.evaluator` | `run_id`, `verifier_thread_id?`, `verifier_execution_id?`, `reviewer_verdict?`, `reviewer_note?` | stamps verdict |
|
|
285
|
+
| `agents.lab_workflow.show` | `run_id` | run record |
|
|
286
|
+
| `agents.lab_workflow.list` | `limit=50` | recent runs |
|
|
287
|
+
|
|
288
|
+
## Impersonation (admin, read-only)
|
|
289
|
+
|
|
290
|
+
For verifying a workflow against a real (non-sandbox) user's data without
|
|
291
|
+
writing to their systems. Sandbox runs **do not need `--as`** — the
|
|
292
|
+
backend auto-switches identity via the `client_task → workflow_lab_run →
|
|
293
|
+
sandbox_user_id` lookup and allows writes (sandbox is throwaway by
|
|
294
|
+
design). Use `--as` for the genuinely different case: "run as Alice from
|
|
295
|
+
firmcorp so I can see what *her* Drive looks like".
|
|
296
|
+
|
|
297
|
+
`nex workflow run --as <email>` runs the agent against the target's
|
|
298
|
+
OAuth tokens (Drive/Gmail/Calendar/...) but with **write tools filtered
|
|
299
|
+
out of the agent's vocabulary** — `send_*`, `create_*`, `update_*`,
|
|
300
|
+
`delete_*`, `upload_*`, `post_*`, etc. The LLM literally cannot call
|
|
301
|
+
them, so accidental writes are impossible by construction (not by HITL
|
|
302
|
+
gate).
|
|
303
|
+
|
|
304
|
+
```bash
|
|
305
|
+
nex workflow run <run_id> --as alice@firmcorp.com
|
|
306
|
+
# magenta banner: "impersonating user_email=alice@firmcorp.com (READ-ONLY)"
|
|
307
|
+
# kickoff: <default kickoff>
|
|
308
|
+
# running... agent may take 30-300s on complex workflows
|
|
309
|
+
# [agent transcript with no writes possible]
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
**Allowlist gate (backend):** your email must be in
|
|
313
|
+
`IMPERSONATION_ALLOWLIST` (env var, comma-separated, case-insensitive):
|
|
314
|
+
|
|
315
|
+
```bash
|
|
316
|
+
# in your backend env (e.g. .env.local for local dev):
|
|
317
|
+
IMPERSONATION_ALLOWLIST=admin@nexhelm.ai,ops@nexhelm.ai
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
Without that, `chat` returns 403 and the CLI surfaces the error. The check
|
|
321
|
+
runs at `app/services/impersonation.py::resolve_impersonation` and is
|
|
322
|
+
re-validated on every chat call — there's no token-level grant. The target
|
|
323
|
+
email is resolved to a `user_id` via `user_profiles` (404 if no match).
|
|
324
|
+
|
|
325
|
+
The CLI looks up the operator's email in `~/.nex/config` (field
|
|
326
|
+
`user_email`, set by `nex auth setup -e <email>`). If missing, `--as`
|
|
327
|
+
fails fast with a clear error before any backend call.
|
|
328
|
+
|
|
329
|
+
**What's filtered:** the prefix list in `impersonation.py:_WRITE_TOOL_PREFIXES`
|
|
330
|
+
plus a hand-audited override list. Tools matching `set_thread_name` and a
|
|
331
|
+
small list of harmless mutations are kept. New tools that should be
|
|
332
|
+
considered writes can be added to `_WRITE_TOOL_NAMES`.
|
|
333
|
+
|
|
334
|
+
**Future (post-MVP):**
|
|
335
|
+
- Replace env-var allowlist with an `impersonation_grants` table
|
|
336
|
+
(organization-scoped admin role + time-bound grants + audit log).
|
|
337
|
+
- Per-tool allow/deny lists so an admin can opt-in to a narrow write
|
|
338
|
+
(e.g. send a single confirmation email, scoped + signed).
|
|
339
|
+
|
|
340
|
+
## Engineering loop — adding an integration
|
|
341
|
+
|
|
342
|
+
The full lifecycle from "user pastes a transcript" to "PR opened":
|
|
343
|
+
|
|
344
|
+
```
|
|
345
|
+
human pastes transcript
|
|
346
|
+
│
|
|
347
|
+
▼
|
|
348
|
+
1. ANALYZE nex workflow integration-engineer -u <gdoc>
|
|
349
|
+
→ capabilities table, which exist, which are missing
|
|
350
|
+
│
|
|
351
|
+
▼ (only if any are missing)
|
|
352
|
+
2. PLAN/IMPLEMENT external Claude Code (Claude Code) invokes the
|
|
353
|
+
add-integration skill at ~/.claude/skills/add-integration.md
|
|
354
|
+
Five phases with explicit approval gates:
|
|
355
|
+
CONSUME GAP REPORT → wait `plan`
|
|
356
|
+
PROPOSE PLAN → wait `approve`
|
|
357
|
+
IMPLEMENT → wait `lab`
|
|
358
|
+
LAB VERIFY → wait `pr`
|
|
359
|
+
PR
|
|
360
|
+
External Claude does code reads/writes locally; nex never
|
|
361
|
+
edits code.
|
|
362
|
+
│
|
|
363
|
+
▼
|
|
364
|
+
3. DESIGN nex workflow designer -u <gdoc>
|
|
365
|
+
[--sandbox-user-id <id>]
|
|
366
|
+
→ run_id, view_url, proposal
|
|
367
|
+
Designer prompt is biased toward fully-autonomous workflows
|
|
368
|
+
(≥80% agent-only steps); HITL pauses are flagged
|
|
369
|
+
`[HITL: <reason>]` only for hard regulatory gates.
|
|
370
|
+
Pass `--sandbox-user-id` to bind to a sandbox account
|
|
371
|
+
(discover with `nex workflow sandbox-accounts`); the run
|
|
372
|
+
then auto-switches identity in `run` (no `--as` needed).
|
|
373
|
+
│
|
|
374
|
+
▼ (optional: review proposal, refine if needed)
|
|
375
|
+
3a. REFINE nex workflow refine <run_id> -m "tighten X / drop Y"
|
|
376
|
+
→ NEW run_id with refined proposal. Source run is
|
|
377
|
+
auto-cancelled to free the active-sandbox slot.
|
|
378
|
+
Loop here until the proposal is close enough to verify.
|
|
379
|
+
│
|
|
380
|
+
▼
|
|
381
|
+
4. RUN nex workflow run <run_id>
|
|
382
|
+
→ autonomous: idempotently spawn verifier client_task,
|
|
383
|
+
POST kickoff to /langchain-agent/chat, drain SSE.
|
|
384
|
+
→ prints conversation + stop_reason:
|
|
385
|
+
complete → step 5
|
|
386
|
+
interaction → revise (refine) or finish in dashboard
|
|
387
|
+
error → revise the task definition; re-run
|
|
388
|
+
cut_off → network/backend blip; re-run
|
|
389
|
+
This is the visibility primitive — operator sees where
|
|
390
|
+
the agent stopped and revises until it runs end-to-end
|
|
391
|
+
without intervention. (Manual fallback: `verifier` +
|
|
392
|
+
finish in dashboard + `trace`.)
|
|
393
|
+
│
|
|
394
|
+
▼
|
|
395
|
+
5. FINALISE nex workflow evaluator <run_id> --verdict approved|rejected
|
|
396
|
+
--verifier-thread-id <from `run` output>
|
|
397
|
+
│
|
|
398
|
+
▼
|
|
399
|
+
6. PR external Claude opens PR via `gh pr create` (linked from
|
|
400
|
+
the skill); engineering review takes over.
|
|
401
|
+
```
|
|
402
|
+
|
|
403
|
+
**Engineering vs verification — separation of concerns.** Nex runs the
|
|
404
|
+
analyze + design + verify orchestration (engine tier); external Claude
|
|
405
|
+
reads and writes code in the repo (with approval gates from the skill);
|
|
406
|
+
human approves at every gate. The CLI is the only thing the human ever
|
|
407
|
+
runs; the MCP server is the only thing external Claude calls. No double-
|
|
408
|
+
implementation between nex and Claude.
|
|
409
|
+
|
|
410
|
+
**Where the skill lives.** External, in `~/.claude/skills/add-integration.md`.
|
|
411
|
+
Deliberately outside the codebase — keeps your code engine and the external
|
|
412
|
+
AI/human persona separate. Update the skill independently of any repo PR.
|
|
413
|
+
|
|
414
|
+
## Code layout (`src/nexhelm_mcp/`)
|
|
415
|
+
|
|
416
|
+
| File | Role |
|
|
417
|
+
|---|---|
|
|
418
|
+
| `cli.py` | Typer app with auth/workflow/doctor command groups |
|
|
419
|
+
| `server.py` | FastMCP server, one wrapper per agent tool |
|
|
420
|
+
| `agents/lab_workflow.py` | Reference agent over `/workflow-lab/*` |
|
|
421
|
+
| `auth.py` | HS256 JWT minting (≤90s TTL); `PHASE_1_SCOPES` |
|
|
422
|
+
| `client.py` | `BackendClient` httpx wrapper |
|
|
423
|
+
| `config.py` | `~/.nex/config` (0600); reads secret from `frontend/.env.local` at runtime |
|
|
424
|
+
| `supabase_auth.py` | Email/password login + client list helper |
|
|
425
|
+
| `login.py` | Phase-2 PKCE browser flow placeholder |
|
|
426
|
+
|
|
427
|
+
## Adding a new agent
|
|
428
|
+
|
|
429
|
+
1. Create `src/nexhelm_mcp/agents/<name>.py` exposing `AGENT = Agent(...)`
|
|
430
|
+
with one or more `AgentTool(...)` entries. Each `AgentTool.scope` must be
|
|
431
|
+
added to `auth.PHASE_1_SCOPES` (or a new scope set for that agent).
|
|
432
|
+
2. Register in `agents/__init__.py`.
|
|
433
|
+
3. Add typed wrappers in `server.py` so Claude gets a proper schema.
|
|
434
|
+
4. Optionally add CLI subcommands in `cli.py`.
|
|
435
|
+
5. Add tests with `respx` mocking the backend.
|
|
436
|
+
|
|
437
|
+
The `lab_workflow` agent is the reference template.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
nexhelm_mcp/__init__.py,sha256=_hABU_WSos0j5Ue0gUmA8TytGQU6l7xMmTJB5mvdlZI,100
|
|
2
|
+
nexhelm_mcp/auth.py,sha256=ZppGFGF4xNSXum8EaNz4O3OyBo3LWLY2cWRdmgOnJh0,4288
|
|
3
|
+
nexhelm_mcp/cli.py,sha256=S0Ek5ROz4MenRSj9ohLPiPfytNwtVsq16PwWEq7uhvY,113010
|
|
4
|
+
nexhelm_mcp/client.py,sha256=v-qG8GLvK4OUl3qw4-WtPwjzlFKh2mNM47svdUkfTss,4394
|
|
5
|
+
nexhelm_mcp/config.py,sha256=3V2UKg0guebZOjlqj5_Lvg_1vGgQVKk1icfrUv3kvNU,11214
|
|
6
|
+
nexhelm_mcp/github_device.py,sha256=aVJf3J3v21yRA1-GVBdbwij4snmLIzHUsW21KXtf97M,3113
|
|
7
|
+
nexhelm_mcp/login.py,sha256=_iffee2R0GErd5iERyLQkNtTVLGExR9hoUHEP1BmNrA,9313
|
|
8
|
+
nexhelm_mcp/server.py,sha256=orul3rxrBFiravg8WW9KlE3le8E_wsOKkH9TA2CZeZc,18052
|
|
9
|
+
nexhelm_mcp/ssh_config.py,sha256=FtU3FBhc9zMX2LYv6GDasz5bM1GmaB8FZIqZH3KfpnQ,3830
|
|
10
|
+
nexhelm_mcp/streaming.py,sha256=74u0IT_ZAM97myh7Rt3d3GGnZ0NIv2ziFf1ag_F8txo,10038
|
|
11
|
+
nexhelm_mcp/supabase_auth.py,sha256=2gymbA60lHe84FtIkyxuvyjYl5g2d5re3zGkmIOjPmo,3156
|
|
12
|
+
nexhelm_mcp/tasks_run.py,sha256=ejd8EQqIv_rF4wIcWru_S-GtjglYmkOHqb9x8S-T_Ww,15995
|
|
13
|
+
nexhelm_mcp/transcript.py,sha256=0A2rGUYwfWNE9q9D00xj9O3UF7Kj-dE1_kyDt4BW-UQ,3443
|
|
14
|
+
nexhelm_mcp/tunnel.py,sha256=86a9tnnoNM0QAZps4Q_HNFYbyFx0FyY_ZzeumKKjjug,2375
|
|
15
|
+
nexhelm_mcp/agents/__init__.py,sha256=GYmoWF3AxlcY4oPW2UiBwwvNARtUJ6uRLsf9DUEh1OY,822
|
|
16
|
+
nexhelm_mcp/agents/agent_workspace.py,sha256=iY6sd5OQmpk58rnvZ_QNCovvmPThQijuxDx1y3Zm8yo,2305
|
|
17
|
+
nexhelm_mcp/agents/agent_workspace_proxy.py,sha256=EIz_B-ClTrN4vCYnYi3eQIAFwqEuEnAqfNBrR_27PQw,9681
|
|
18
|
+
nexhelm_mcp/agents/lab_workflow.py,sha256=nehc1R9JHDA8T4aMtUFwfF-R_XGOmI-lMhNRhctjLlI,34104
|
|
19
|
+
nexhelm_mcp/agents/tasks.py,sha256=Th9HVdZhVZQcMAjLJQqdyrboaZfljTZEXeLWeXVR6NQ,9403
|
|
20
|
+
nexhelm-0.3.0.dist-info/METADATA,sha256=eaUQFIq6aaKFcAJuSMUFGAiAXnvnpok4VudIMkDXilE,22796
|
|
21
|
+
nexhelm-0.3.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
22
|
+
nexhelm-0.3.0.dist-info/entry_points.txt,sha256=T-XlZVnUZp2a_b2AWnjcH_ZuUrMTtKgrQrcYHib9ORY,89
|
|
23
|
+
nexhelm-0.3.0.dist-info/RECORD,,
|
nexhelm_mcp/__init__.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Agent registry. Each agent declares its capabilities and tool surface here.
|
|
2
|
+
|
|
3
|
+
To add a new specialized agent (firm-wide, code, infra, cfp, etc.):
|
|
4
|
+
1. Create a module under nexhelm_mcp/agents/<name>.py exposing an `AGENT`
|
|
5
|
+
constant of type Agent.
|
|
6
|
+
2. Append it to AGENT_REGISTRY below.
|
|
7
|
+
|
|
8
|
+
The MCP broker is the only thing that knows about the registry. Agents are
|
|
9
|
+
self-contained: they own their capability scopes, their tool schemas, and
|
|
10
|
+
their backend interactions.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from nexhelm_mcp.agents.lab_workflow import AGENT as _LAB_WORKFLOW_AGENT
|
|
16
|
+
from nexhelm_mcp.agents.lab_workflow import Agent
|
|
17
|
+
from nexhelm_mcp.agents.tasks import AGENT as _TASKS_AGENT
|
|
18
|
+
|
|
19
|
+
AGENT_REGISTRY: dict[str, Agent] = {
|
|
20
|
+
_LAB_WORKFLOW_AGENT.name: _LAB_WORKFLOW_AGENT,
|
|
21
|
+
_TASKS_AGENT.name: _TASKS_AGENT,
|
|
22
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""CLI-side helpers for the agent-workspace lifecycle.
|
|
2
|
+
|
|
3
|
+
Mirrors the persona-as-action shape used by lab_workflow:
|
|
4
|
+
|
|
5
|
+
start -> POST /agent-workspace/start
|
|
6
|
+
status -> GET /agent-workspace/{id}
|
|
7
|
+
end -> POST /agent-workspace/{id}/end
|
|
8
|
+
list -> GET /agent-workspace/
|
|
9
|
+
|
|
10
|
+
Daemon proxy verbs (read/write/patch/test/...) live in
|
|
11
|
+
`agent_workspace_proxy.py` since they don't go through the
|
|
12
|
+
backend -- they hit the daemon directly using the session token
|
|
13
|
+
returned by `start`.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import Any, Optional
|
|
19
|
+
|
|
20
|
+
from nexhelm_mcp.client import BackendClient
|
|
21
|
+
from nexhelm_mcp.config import Config
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def start(
|
|
25
|
+
*,
|
|
26
|
+
config: Config,
|
|
27
|
+
client: BackendClient,
|
|
28
|
+
scope: str = "se_default",
|
|
29
|
+
branch: Optional[str] = None,
|
|
30
|
+
customer: Optional[str] = None,
|
|
31
|
+
) -> dict[str, Any]:
|
|
32
|
+
"""Provision a new workspace + return its session record.
|
|
33
|
+
|
|
34
|
+
Pulls the operator's GitHub FGT from `config.github_fgt` if
|
|
35
|
+
present (one-time `nex auth github-link` populates it). Read-only
|
|
36
|
+
sessions work without one.
|
|
37
|
+
"""
|
|
38
|
+
body: dict[str, Any] = {
|
|
39
|
+
"user_id": config.user_id,
|
|
40
|
+
"scope": scope,
|
|
41
|
+
}
|
|
42
|
+
if config.github_fgt:
|
|
43
|
+
body["github_fgt"] = config.github_fgt
|
|
44
|
+
if branch:
|
|
45
|
+
body["branch"] = branch
|
|
46
|
+
if customer:
|
|
47
|
+
body["customer"] = customer
|
|
48
|
+
return client.post("/agent-workspace/start", json=body, inject_identity=False)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def status(
|
|
52
|
+
*, config: Config, client: BackendClient, workspace_id: str
|
|
53
|
+
) -> dict[str, Any]:
|
|
54
|
+
"""Fetch a workspace's current state. Refreshes from ECS."""
|
|
55
|
+
return client.get(
|
|
56
|
+
f"/agent-workspace/{workspace_id}",
|
|
57
|
+
params={"user_id": config.user_id},
|
|
58
|
+
inject_identity=False,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def end(
|
|
63
|
+
*, config: Config, client: BackendClient, workspace_id: str
|
|
64
|
+
) -> dict[str, Any]:
|
|
65
|
+
"""Tear down a workspace. Idempotent."""
|
|
66
|
+
return client.post(
|
|
67
|
+
f"/agent-workspace/{workspace_id}/end",
|
|
68
|
+
json={"user_id": config.user_id},
|
|
69
|
+
inject_identity=False,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def list_workspaces(
|
|
74
|
+
*, config: Config, client: BackendClient
|
|
75
|
+
) -> dict[str, Any]:
|
|
76
|
+
"""List the operator's active + recently-ended workspaces."""
|
|
77
|
+
return client.get(
|
|
78
|
+
"/agent-workspace/",
|
|
79
|
+
params={"user_id": config.user_id},
|
|
80
|
+
inject_identity=False,
|
|
81
|
+
)
|