comfyui-mcp 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex/hooks.json +15 -0
- package/CHANGELOG.md +64 -0
- package/README.md +45 -1
- package/ROADMAP.md +120 -0
- package/TODO.md +75 -0
- package/blog/comfyui-mcp-tdqs-case-study.md +102 -0
- package/design/embedded-agent-panel.md +118 -0
- package/dist/experimental/agent-poc.d.ts +30 -0
- package/dist/experimental/agent-poc.d.ts.map +1 -0
- package/dist/experimental/agent-poc.js +210 -0
- package/dist/experimental/agent-poc.js.map +1 -0
- package/dist/experimental/chat-handler.d.ts +34 -0
- package/dist/experimental/chat-handler.d.ts.map +1 -0
- package/dist/experimental/chat-handler.js +60 -0
- package/dist/experimental/chat-handler.js.map +1 -0
- package/dist/experimental/provider-registry.d.ts +8 -0
- package/dist/experimental/provider-registry.d.ts.map +1 -0
- package/dist/experimental/provider-registry.js +50 -0
- package/dist/experimental/provider-registry.js.map +1 -0
- package/dist/experimental/run.d.ts +3 -0
- package/dist/experimental/run.d.ts.map +1 -0
- package/dist/experimental/run.js +34 -0
- package/dist/experimental/run.js.map +1 -0
- package/dist/services/download-auth.d.ts +23 -0
- package/dist/services/download-auth.d.ts.map +1 -0
- package/dist/services/download-auth.js +78 -0
- package/dist/services/download-auth.js.map +1 -0
- package/dist/services/download-cache.d.ts +26 -0
- package/dist/services/download-cache.d.ts.map +1 -0
- package/dist/services/download-cache.js +167 -0
- package/dist/services/download-cache.js.map +1 -0
- package/dist/services/image-management.d.ts +14 -0
- package/dist/services/image-management.d.ts.map +1 -1
- package/dist/services/image-management.js +57 -0
- package/dist/services/image-management.js.map +1 -1
- package/dist/services/job-history.d.ts +28 -0
- package/dist/services/job-history.d.ts.map +1 -0
- package/dist/services/job-history.js +130 -0
- package/dist/services/job-history.js.map +1 -0
- package/dist/services/job-watcher.d.ts +8 -0
- package/dist/services/job-watcher.d.ts.map +1 -1
- package/dist/services/job-watcher.js +16 -20
- package/dist/services/job-watcher.js.map +1 -1
- package/dist/services/model-resolver.d.ts +2 -1
- package/dist/services/model-resolver.d.ts.map +1 -1
- package/dist/services/model-resolver.js +11 -18
- package/dist/services/model-resolver.js.map +1 -1
- package/dist/services/node-authoring.d.ts +112 -0
- package/dist/services/node-authoring.d.ts.map +1 -0
- package/dist/services/node-authoring.js +418 -0
- package/dist/services/node-authoring.js.map +1 -0
- package/dist/services/node-management.d.ts +7 -0
- package/dist/services/node-management.d.ts.map +1 -1
- package/dist/services/node-management.js +123 -8
- package/dist/services/node-management.js.map +1 -1
- package/dist/services/process-control.d.ts +45 -0
- package/dist/services/process-control.d.ts.map +1 -1
- package/dist/services/process-control.js +262 -54
- package/dist/services/process-control.js.map +1 -1
- package/dist/services/queue-manager.d.ts +8 -3
- package/dist/services/queue-manager.d.ts.map +1 -1
- package/dist/services/queue-manager.js +25 -2
- package/dist/services/queue-manager.js.map +1 -1
- package/dist/services/tunnel.d.ts +23 -0
- package/dist/services/tunnel.d.ts.map +1 -0
- package/dist/services/tunnel.js +151 -0
- package/dist/services/tunnel.js.map +1 -0
- package/dist/tools/image-management.d.ts.map +1 -1
- package/dist/tools/image-management.js +57 -1
- package/dist/tools/image-management.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +2 -0
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/model-management.d.ts.map +1 -1
- package/dist/tools/model-management.js +26 -1
- package/dist/tools/model-management.js.map +1 -1
- package/dist/tools/node-authoring.d.ts +3 -0
- package/dist/tools/node-authoring.d.ts.map +1 -0
- package/dist/tools/node-authoring.js +96 -0
- package/dist/tools/node-authoring.js.map +1 -0
- package/dist/tools/node-management.d.ts.map +1 -1
- package/dist/tools/node-management.js +5 -1
- package/dist/tools/node-management.js.map +1 -1
- package/dist/tools/process-control.js +2 -2
- package/dist/tools/process-control.js.map +1 -1
- package/dist/tools/queue-management.js +1 -1
- package/dist/tools/queue-management.js.map +1 -1
- package/infra/cloudflare/docs-proxy.js +45 -0
- package/infra/cloudflare/wrangler.jsonc +19 -0
- package/package.json +11 -2
- package/scripts/gen-tool-docs.ts +357 -0
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,68 @@ All notable changes to this project are documented here. This project adheres to
|
|
|
4
4
|
[Semantic Versioning](https://semver.org/) and the format follows
|
|
5
5
|
[Keep a Changelog](https://keepachangelog.com/).
|
|
6
6
|
|
|
7
|
+
## [0.7.0] - 2026-05-25
|
|
8
|
+
|
|
9
|
+
Stability + authoring release: hardens model downloads and the ComfyUI process
|
|
10
|
+
lifecycle, makes failures actionable, and adds a custom-node authoring/publishing
|
|
11
|
+
lifecycle. Plus a hosted docs site and an experimental embedded-agent backend.
|
|
12
|
+
|
|
13
|
+
### Added
|
|
14
|
+
|
|
15
|
+
- **Custom-node authoring** — `scaffold_custom_node` (generate a Python node pack
|
|
16
|
+
from a template) and `publish_custom_node` (publish to the Comfy Registry via
|
|
17
|
+
comfy-cli; key via `REGISTRY_ACCESS_TOKEN`, never logged) (#24).
|
|
18
|
+
- **`install_custom_node` ref pinning** — pin a pack to a commit/branch/tag, parsed
|
|
19
|
+
from GitHub/GitLab/Bitbucket URLs or `repo@ref`, or an explicit `ref` arg.
|
|
20
|
+
- **`download_model` auth** — per-request `bearer` / `basic` / `header` / `query`
|
|
21
|
+
authentication for gated/private model hosts.
|
|
22
|
+
- **Model download cache** — content-addressed dedup, concurrent-download coalescing,
|
|
23
|
+
and optional LRU eviction (`COMFYUI_DOWNLOAD_CACHE_DIR`, `COMFYUI_LRU_CACHE_SIZE_GB`).
|
|
24
|
+
- **ComfyUI process supervision** — bounded startup readiness checks
|
|
25
|
+
(`COMFYUI_STARTUP_CHECK_INTERVAL_S`/`_MAX_TRIES`) and opt-in bounded
|
|
26
|
+
auto-restart-on-crash (`COMFYUI_ALWAYS_RESTART`, `COMFYUI_RESTART_MAX_ATTEMPTS`,
|
|
27
|
+
`COMFYUI_RESTART_WINDOW_S`).
|
|
28
|
+
- **Plugin skills** — `comfyui-frontend-extensions` (v2 `@comfyorg/extension-api`
|
|
29
|
+
authoring + v1→v2 migration) and `comfyui-node-registry` (node authoring/publishing).
|
|
30
|
+
- **Hosted docs** — Mintlify site with a schema-generated tool reference at
|
|
31
|
+
[comfyui-mcp.artokun.io/docs](https://comfyui-mcp.artokun.io/docs).
|
|
32
|
+
|
|
33
|
+
### Changed
|
|
34
|
+
|
|
35
|
+
- **`get_job_status` + completion notifications** now surface ComfyUI
|
|
36
|
+
`execution_error` detail (node id/type, exception type/message, truncated traceback,
|
|
37
|
+
`current_inputs`, OOM flag) and optional per-node + total execution timing.
|
|
38
|
+
Additive and backward-compatible.
|
|
39
|
+
|
|
40
|
+
### Security
|
|
41
|
+
|
|
42
|
+
- `download_model` auth inputs are validated (reject CR/LF/control chars; HTTP-token
|
|
43
|
+
header names); query-auth secrets are redacted from logs and error details.
|
|
44
|
+
- `install_custom_node` git refs are validated and run via `git checkout
|
|
45
|
+
--end-of-options <ref>`, closing an argv-option-injection vector.
|
|
46
|
+
- Spawned ComfyUI children now have `error` listeners so a missing/failed executable
|
|
47
|
+
can't crash the MCP server.
|
|
48
|
+
|
|
49
|
+
### Experimental
|
|
50
|
+
|
|
51
|
+
- **Embedded-agent backend POC** (flag-gated via `COMFYUI_MCP_AGENT_POC`): a cloudflared
|
|
52
|
+
quick-tunnel helper + an AI SDK `/api/chat` endpoint with bearer auth, a request body
|
|
53
|
+
cap, and a server-side model allowlist. Not part of default startup. See
|
|
54
|
+
`design/embedded-agent-panel.md` and `ROADMAP.md`.
|
|
55
|
+
|
|
56
|
+
### Dependencies
|
|
57
|
+
|
|
58
|
+
- Added `ai` + `@ai-sdk/anthropic`/`openai`/`google` + `cloudflared` (experimental POC)
|
|
59
|
+
and declared `zod-to-json-schema` (docs generation). `npm audit`: 0 high vulnerabilities.
|
|
60
|
+
|
|
61
|
+
## [0.6.1] - 2026-05-25
|
|
62
|
+
|
|
63
|
+
### Added
|
|
64
|
+
|
|
65
|
+
- **Media upload** — `upload_video` and `upload_audio` copy local video/audio
|
|
66
|
+
files into ComfyUI's input directory so they can be referenced as workflow
|
|
67
|
+
inputs, mirroring the existing `upload_image` (closes #12).
|
|
68
|
+
|
|
7
69
|
## [0.6.0] - 2026-05-25
|
|
8
70
|
|
|
9
71
|
A large feature release that ports much of the [`comfy-cli`](https://github.com/Comfy-Org/comfy-cli)
|
|
@@ -58,4 +120,6 @@ subprocess fallback where the API can't do the job.
|
|
|
58
120
|
|
|
59
121
|
Earlier releases predate this changelog.
|
|
60
122
|
|
|
123
|
+
[0.7.0]: https://github.com/artokun/comfyui-mcp/releases/tag/v0.7.0
|
|
124
|
+
[0.6.1]: https://github.com/artokun/comfyui-mcp/releases/tag/v0.6.1
|
|
61
125
|
[0.6.0]: https://github.com/artokun/comfyui-mcp/releases/tag/v0.6.0
|
package/README.md
CHANGED
|
@@ -5,13 +5,16 @@
|
|
|
5
5
|
[](https://www.npmjs.com/package/comfyui-mcp)
|
|
6
6
|
[](https://nodejs.org)
|
|
7
7
|
[](./LICENSE)
|
|
8
|
+
[](https://comfyui-mcp.artokun.io/docs)
|
|
8
9
|
|
|
9
10
|
[](https://glama.ai/mcp/servers/artokun/comfyui-mcp)
|
|
10
11
|
[](https://glama.ai/mcp/servers/artokun/comfyui-mcp)
|
|
11
12
|
|
|
12
13
|
Works on **macOS**, **Linux**, and **Windows**. Auto-detects your ComfyUI installation and port.
|
|
13
14
|
|
|
14
|
-
**
|
|
15
|
+
**80+ MCP tools** | **10 slash commands** | **6 knowledge skills** | **3 autonomous agents** | **3 hooks**
|
|
16
|
+
|
|
17
|
+
📖 **Full documentation: [comfyui-mcp.artokun.io/docs](https://comfyui-mcp.artokun.io/docs)**
|
|
15
18
|
|
|
16
19
|
---
|
|
17
20
|
|
|
@@ -413,6 +416,11 @@ The server auto-detects your ComfyUI installation and port. Override with enviro
|
|
|
413
416
|
| `CIVITAI_API_TOKEN` | | CivitAI API token for model downloads |
|
|
414
417
|
| `HUGGINGFACE_TOKEN` | | HuggingFace token for higher API rate limits |
|
|
415
418
|
| `GITHUB_TOKEN` | | GitHub token for skill generation (avoids rate limits) |
|
|
419
|
+
| `REGISTRY_ACCESS_TOKEN` | | Comfy Registry API key for `publish_custom_node` (env-only, never logged) |
|
|
420
|
+
| `COMFYUI_DOWNLOAD_CACHE_DIR` | `~/.comfyui-mcp/cache` | Content-addressed model-download cache (dedup + concurrent coalescing) |
|
|
421
|
+
| `COMFYUI_LRU_CACHE_SIZE_GB` | `0` | Cap the download cache in GB; `0` disables LRU eviction |
|
|
422
|
+
| `COMFYUI_STARTUP_CHECK_INTERVAL_S` / `…_MAX_TRIES` | `1` / `20` | Readiness-probe interval + max tries when starting a local ComfyUI |
|
|
423
|
+
| `COMFYUI_ALWAYS_RESTART` | `false` | Auto-restart a crashed local ComfyUI (bounded by `COMFYUI_RESTART_MAX_ATTEMPTS` / `COMFYUI_RESTART_WINDOW_S`) |
|
|
416
424
|
| `LOG_LEVEL` | `info` | Logging verbosity: `debug`, `info`, `warn`, `error` |
|
|
417
425
|
|
|
418
426
|
### Transports
|
|
@@ -648,6 +656,42 @@ MIT — see [LICENSE](./LICENSE) for details.
|
|
|
648
656
|
|
|
649
657
|
## Changelog
|
|
650
658
|
|
|
659
|
+
The full, structured changelog lives in [CHANGELOG.md](./CHANGELOG.md). Recent highlights:
|
|
660
|
+
|
|
661
|
+
### 0.7.0 — 2026-05-25
|
|
662
|
+
|
|
663
|
+
**Stability + authoring.**
|
|
664
|
+
|
|
665
|
+
- **Custom-node authoring** — `scaffold_custom_node` (template a Python node pack) and `publish_custom_node` (publish to the Comfy Registry; `REGISTRY_ACCESS_TOKEN`).
|
|
666
|
+
- **`install_custom_node` ref pinning** — pin to a commit/branch/tag (URL ref or explicit `ref`).
|
|
667
|
+
- **`download_model` auth** — per-request `bearer`/`basic`/`header`/`query` auth for gated/private models.
|
|
668
|
+
- **Download cache** — content-addressed dedup + concurrent coalescing + optional LRU (`COMFYUI_DOWNLOAD_CACHE_DIR`, `COMFYUI_LRU_CACHE_SIZE_GB`).
|
|
669
|
+
- **Process supervision** — bounded startup readiness checks + opt-in bounded crash auto-restart for local installs.
|
|
670
|
+
- **Actionable failures** — `get_job_status` / completion now surface ComfyUI execution errors (OOM, traceback, node) and per-node + total timing.
|
|
671
|
+
- **Security** — download-auth input validation + secret redaction; git-ref argv-injection hardening; spawn `error` listeners so a bad executable can't crash the server.
|
|
672
|
+
- **Experimental** — flag-gated embedded-agent backend POC (cloudflared tunnel + AI SDK chat).
|
|
673
|
+
- **Docs** — hosted Mintlify site with a schema-generated tool reference.
|
|
674
|
+
|
|
675
|
+
### 0.6.1 — 2026-05-25
|
|
676
|
+
|
|
677
|
+
- **`upload_video` / `upload_audio`** — copy local video/audio files into ComfyUI's input directory so they can be referenced as workflow inputs, mirroring `upload_image`.
|
|
678
|
+
|
|
679
|
+
### 0.6.0 — 2026-05-25
|
|
680
|
+
|
|
681
|
+
**comfy-cli capability port** — much of the [comfy-cli](https://github.com/Comfy-Org/comfy-cli) workflow is now exposed as MCP tools, preferring the ComfyUI-Manager HTTP API with a subprocess fallback:
|
|
682
|
+
|
|
683
|
+
- **Custom nodes** — `install_custom_node`, `update_custom_node`, `reinstall_custom_node`, `fix_custom_node`, `list_installed_nodes`, `sync_node_dependencies`.
|
|
684
|
+
- **Node snapshots** — `save_node_snapshot`, `restore_node_snapshot`, `list_node_snapshots`.
|
|
685
|
+
- **Node bisect** — `bisect_start`, `bisect_good`, `bisect_bad`, `bisect_reset`, `bisect_status` to isolate a faulty custom node.
|
|
686
|
+
- **Workflow dependencies** — `extract_workflow_dependencies`, `install_workflow_dependencies` (API- and UI-format workflows).
|
|
687
|
+
- **Install / update** — `install_comfyui`, `update_comfyui`, `update_all`.
|
|
688
|
+
- **Models** — `remove_model` (path-safe) and `download_civitai_model`.
|
|
689
|
+
- **Workspace & environment** — `get_workspace`, `set_default_workspace`, `list_workspaces`, `get_environment`.
|
|
690
|
+
- **API / partner nodes** — `list_api_nodes`, `get_api_node_schema`, `generate_with_api_node`.
|
|
691
|
+
- **ComfyUI-Manager config** — `configure_manager`.
|
|
692
|
+
- **Security** — CivitAI auth moved to an `Authorization: Bearer` header (token no longer leaks into logs/URLs); model-download filenames validated against path traversal; `COMFY_API_KEY` delivered via the `/prompt` `extra_data` payload rather than the workflow.
|
|
693
|
+
- Rewrote core tool/parameter descriptions for clearer agent tool-selection; added a `Dockerfile` and the [Glama](https://glama.ai) listing.
|
|
694
|
+
|
|
651
695
|
### 0.5.0 — 2026-05-21
|
|
652
696
|
|
|
653
697
|
- **Streamable-HTTP transport** — opt in with `--http` (or `MCP_TRANSPORT=http`) to serve MCP over HTTP at `/mcp` for gateways, remote, and `fetch`-based clients. stdio remains the default; `--host`/`--port` configure the bind.
|
package/ROADMAP.md
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# ComfyUI MCP — Roadmap
|
|
2
|
+
|
|
3
|
+
**Vision:** an agent can author, run, *fix*, and *ship* ComfyUI — from a prompt, to a working
|
|
4
|
+
workflow, to an in-UI assistant that edits the graph live, to a published custom node. comfyui-mcp
|
|
5
|
+
is the backend tool surface; the pieces below extend it up into the ComfyUI frontend and out to the
|
|
6
|
+
Comfy Registry.
|
|
7
|
+
|
|
8
|
+
> Tracking: themes map to beads **epics**; items map to issues. Run `bd ready` for what's actionable.
|
|
9
|
+
> This file is the human-readable map; beads is the source of truth for status.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## ✅ Shipped (0.6.x)
|
|
14
|
+
- comfy-cli capability port (custom-node mgmt, snapshots, bisect, workflow deps, install/update,
|
|
15
|
+
models, workspace/env, API nodes, manager config) — tools surface ~70+.
|
|
16
|
+
- `upload_video` / `upload_audio`.
|
|
17
|
+
- Mintlify docs site (schema-generated tool reference) at comfyui-mcp.artokun.io/docs.
|
|
18
|
+
- Glama listing + TDQS A-grade pass; blog post (TDQS case study).
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Theme A — Frontend extension authoring (enabler)
|
|
23
|
+
The new ComfyUI frontend extension API (`@comfyorg/extension-api`, v2; replaces
|
|
24
|
+
`app.registerExtension`) is brand-new and absent from model training data. Teach it so we (and any
|
|
25
|
+
user) can write correct frontend extensions — the substrate for Theme B.
|
|
26
|
+
|
|
27
|
+
- **A1 — Skill: author v2 extensions.** `defineNode`/`defineExtension`/`defineWidget`,
|
|
28
|
+
`defineSidebarTab`, `NodeHandle`/`WidgetHandle`, event namespaces (`execution`/`graph`/`server`/
|
|
29
|
+
`workbench`), `DisposableHandle` contract, identity helpers, the event+getter/setter idiom.
|
|
30
|
+
- **A2 — Skill: migrate v1 → v2.** Map legacy `app.registerExtension` / prototype-patching patterns
|
|
31
|
+
to the v2 API (the ecosystem dashboard's api-diff/patterns are the source). DrJKL collaboration hook.
|
|
32
|
+
> Source: `Comfy-Org/ComfyUI_frontend` PRs #12142–#12145; `src/extension-api/`. Package not yet on npm.
|
|
33
|
+
|
|
34
|
+
## Theme B — Embedded agent panel (north star)
|
|
35
|
+
A ComfyUI **sidebar tab** (AI icon) hosting an [AI SDK](https://sdk.vercel.ai) chat window. You chat
|
|
36
|
+
with Claude Code / Codex / Gemini and it reads + **fixes the live workflow in the UI**. Connection
|
|
37
|
+
to the agent "app" is via a **cloudflared tunnel** (Ungate-style). Full design:
|
|
38
|
+
[`design/embedded-agent-panel.md`](./design/embedded-agent-panel.md).
|
|
39
|
+
|
|
40
|
+
- **B1 — Tunnel helper.** Port Ungate's `tunnel-manager` (the `cloudflared` npm lib:
|
|
41
|
+
`Tunnel.quick(localUrl) → public https URL`) into our server as `startQuickTunnel(port)`, behind a flag.
|
|
42
|
+
- **B2 — AI SDK chat endpoint.** `POST /api/chat` → `streamText(...).toUIMessageStreamResponse()`,
|
|
43
|
+
provider registry (Anthropic/OpenAI/Google), one real server-side tool end-to-end.
|
|
44
|
+
- **B3 — Sidebar panel.** `defineSidebarTab` + AI SDK `useChat` pointed at the tunnel; render stream.
|
|
45
|
+
- **B4 — Live graph edits.** Graph-mutation tools (`set_widget_value`, `add_node`, `connect`, …) as
|
|
46
|
+
AI SDK **client-side tools** resolved in the panel via extension-api (`NodeHandle`/`WidgetHandle`).
|
|
47
|
+
*This is the magic — "fix it in the UI."*
|
|
48
|
+
- **B5 — Wire comfyui-mcp** as the server-side tool surface via AI SDK MCP client.
|
|
49
|
+
- **B6 — Provider switch + connection/key UX + ship** as a node pack.
|
|
50
|
+
|
|
51
|
+
## Theme C — Custom-node authoring lifecycle (NEW)
|
|
52
|
+
Create a Python custom node from a template, install + restart to test, then publish to the
|
|
53
|
+
[Comfy Registry](https://docs.comfy.org/registry/overview). The full "agent builds & ships a node" loop.
|
|
54
|
+
|
|
55
|
+
- **C1 — Skill: ComfyUI Registry + custom-node authoring.** Minimal node structure
|
|
56
|
+
(`__init__.py`, `NODE_CLASS_MAPPINGS`/`NODE_DISPLAY_NAME_MAPPINGS`, `INPUT_TYPES`/`RETURN_TYPES`/
|
|
57
|
+
`FUNCTION`/`CATEGORY`, optional `WEB_DIRECTORY`), `pyproject.toml` (`[project]` + `[tool.comfy]`:
|
|
58
|
+
`PublisherId`/`DisplayName`/`Icon`), publisher + API key flow, `comfy node init`/`publish`, the
|
|
59
|
+
`Comfy-Org/publish-node-action` CI workflow + `REGISTRY_ACCESS_TOKEN`.
|
|
60
|
+
- **C2 — MCP `scaffold_custom_node`.** Generate a node pack into `custom_nodes/<name>/` from a
|
|
61
|
+
template (prefer `comfy node init`; fall back to our own template). Local-only.
|
|
62
|
+
- **C3 — Test loop.** Install → `restart_comfyui` (have it) → verify the new `class_type` appears in
|
|
63
|
+
`/object_info` → enqueue a smoke-test workflow using it.
|
|
64
|
+
- **C4 — MCP `publish_custom_node`.** `comfy node publish` with token; validate `pyproject.toml`
|
|
65
|
+
metadata first. Token via env (never in URLs/logs), like the CivitAI pattern.
|
|
66
|
+
- **C5 — Template + CI scaffold.** A spawnable starter (Python node + optional v2 frontend +
|
|
67
|
+
`publish_action.yml`) so `create → restart → test → publish` is one smooth path.
|
|
68
|
+
|
|
69
|
+
## Theme D — Discovery (from prior notes)
|
|
70
|
+
- **D1 — `comfy-researcher` agent + skill cache.** Problem→packs research over the Registry +
|
|
71
|
+
HF + community, with a cached skill layer. (Folded in from `TODO.md`.)
|
|
72
|
+
|
|
73
|
+
## Theme E — Production hardening & I/O (from [Salad's comfyui-api](https://github.com/SaladTechnologies/comfyui-api), MIT)
|
|
74
|
+
Harden existing tools and add production I/O, adapting patterns from comfyui-api. We are an
|
|
75
|
+
agent-facing MCP, not a horizontally-scaled web service — so we cherry-pick and skip the
|
|
76
|
+
stateless-server / Salad-specific bits (replicas, deletion-cost, k8s proxy).
|
|
77
|
+
|
|
78
|
+
**Harden existing tools**
|
|
79
|
+
- **E1 — Download cache + dedup.** Content-address downloads (SHA-256 of URL → cache dir + sidecar
|
|
80
|
+
`.meta`, symlink to target), reuse on hit, coalesce concurrent same-URL fetches, optional LRU
|
|
81
|
+
eviction. Hardens `download_model`/`download_civitai_model`. (`remote-storage-manager.ts`, `utils.hashUrlBase64`)
|
|
82
|
+
- **E2 — Download auth + storage backends.** Per-URL credential resolution (bearer/basic/header/
|
|
83
|
+
query/s3) and `s3://` / huggingface / azure-blob / http(s) sources for gated/private models.
|
|
84
|
+
(`credential-resolver.ts`, `storage-providers/*`)
|
|
85
|
+
- **E3 — ComfyUI supervision.** Auto-restart-on-crash + bounded startup readiness checks
|
|
86
|
+
(interval/max-tries) + a real readiness signal. Hardens `start/stop/restart_comfyui`. (`comfy.ts`)
|
|
87
|
+
- **E4 — Rich errors + execution stats.** Surface ComfyUI `execution_error` (exception_type,
|
|
88
|
+
traceback, current_inputs — e.g. OOM) and per-node timing in job results. Hardens
|
|
89
|
+
`get_job_status`/completion reporting. (`event-emitters.ts`)
|
|
90
|
+
- **E7 — Custom-node ref-pinning.** Install a node pack pinned to a commit/branch/tag across
|
|
91
|
+
GitHub/GitLab/Bitbucket URL formats. Hardens `install_custom_node` (reproducibility). (`git-url-parser.ts`)
|
|
92
|
+
- **E11 — Unique output filenames.** Prefix a request id to output filenames to avoid collisions.
|
|
93
|
+
|
|
94
|
+
**Additive capabilities**
|
|
95
|
+
- **E5 — Declarative environment manifest.** `apply_manifest` (yaml/json): apt/pip/custom_nodes/
|
|
96
|
+
models (before/after start), idempotent — reproducible setups. Pairs with Theme C + workspace.
|
|
97
|
+
- **E6 — Output upload to cloud storage.** Push generated outputs to S3 / Azure / HF / HTTP and
|
|
98
|
+
return URLs. (`remote-storage-manager.ts`, `storage-providers/*`)
|
|
99
|
+
- **E8 — Server-side image conversion.** `sharp` PNG↔JPEG↔WebP + quality options for compact outputs. (`image-tools.ts`)
|
|
100
|
+
- **E9 — Dynamic model loading.** URL in a model-loading node → auto-download + cache before exec. (`comfy-node-preprocessors.ts`)
|
|
101
|
+
- **E10 — Warmup.** Run a warmup workflow after `start_comfyui` to preload models. (`comfy.warmupComfyUI`)
|
|
102
|
+
- **E12 — Outbound webhooks (later).** Signed Standard Webhooks on completion/progress + retries —
|
|
103
|
+
mainly for the headless/bridge path, not the interactive plugin. (`event-emitters.ts`)
|
|
104
|
+
|
|
105
|
+
> License: comfyui-api is MIT (deps MIT/Apache-2.0; ComfyUI itself GPL-3.0). Patterns/code are safe
|
|
106
|
+
> to adapt with attribution. Clone for reference: `~/code/salad-comfyui-api`.
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## "Roadmap to the roadmap" — sequencing
|
|
111
|
+
|
|
112
|
+
| Phase | Goal | Items |
|
|
113
|
+
| --- | --- | --- |
|
|
114
|
+
| **0 — now (parallel)** | Enablers + node lifecycle + panel backend POC | A1, A2, C1, C2, C4, B1, B2 |
|
|
115
|
+
| **1 — prove the loop** | Live in-UI editing works | B3, B4, C3, C5, E5, E6 |
|
|
116
|
+
| **2 — productionize** | Full agent panel + discovery + I/O | B5, B6, D1, E8, E9, E10, E12 |
|
|
117
|
+
| **Hardening — continuous** | Reliability + I/O from comfyui-api | E1, E2, E3, E4, E7, E11 |
|
|
118
|
+
|
|
119
|
+
Phase 0 ships value immediately (skills + node tooling) and de-risks the panel (tunnel + streaming)
|
|
120
|
+
before any frontend work. Phase 1 needs the v2 package closer to publish for the panel UI.
|
package/TODO.md
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# ComfyUI MCP — Future Work
|
|
2
|
+
|
|
3
|
+
## Node Discovery & Research Agent
|
|
4
|
+
|
|
5
|
+
**Priority**: High — addresses discoverability gap
|
|
6
|
+
|
|
7
|
+
### Problem
|
|
8
|
+
Users don't know what custom node packs exist for their use case. Finding, evaluating, and understanding nodes is a manual process (browsing the registry, reading READMEs, trial and error).
|
|
9
|
+
|
|
10
|
+
### Proposed Solution
|
|
11
|
+
A `comfy-researcher` agent (or enhanced `comfy-explorer`) that:
|
|
12
|
+
|
|
13
|
+
1. **Search phase**: Takes a user's problem description (e.g., "I need better face detail in my generations") and:
|
|
14
|
+
- Searches the ComfyUI Registry (`api.comfy.org`) for relevant node packs
|
|
15
|
+
- Cross-references with HuggingFace for related models
|
|
16
|
+
- Checks community forums/GitHub for recommendations
|
|
17
|
+
|
|
18
|
+
2. **Analyze phase**: For each candidate node pack:
|
|
19
|
+
- Runs `generate_node_skill` to deep-dive into the pack
|
|
20
|
+
- Checks if it's already installed (`/object_info`)
|
|
21
|
+
- Reviews GitHub stars, last update, compatibility
|
|
22
|
+
- Evaluates if it actually solves the user's problem
|
|
23
|
+
|
|
24
|
+
3. **Recommend phase**: Returns a ranked list with:
|
|
25
|
+
- Why each pack fits (or doesn't)
|
|
26
|
+
- Installation instructions
|
|
27
|
+
- Example workflow snippets showing how to integrate with the user's existing workflow
|
|
28
|
+
|
|
29
|
+
### Architecture Options
|
|
30
|
+
|
|
31
|
+
**Option A: Agent-only** — A Claude Code agent (`comfy-researcher.md`) that orchestrates existing tools:
|
|
32
|
+
- `search_custom_nodes` (already have)
|
|
33
|
+
- `get_node_pack_details` (already have)
|
|
34
|
+
- `generate_node_skill` (already have)
|
|
35
|
+
- WebSearch + WebFetch for community research
|
|
36
|
+
- Pros: Simple, uses existing infrastructure
|
|
37
|
+
- Cons: Slow (multiple API calls per session)
|
|
38
|
+
|
|
39
|
+
**Option B: Agent + Cache** — Same agent but with a skill cache layer:
|
|
40
|
+
- Skills generated by `generate_node_skill` are cached at `~/.claude/skills/comfyui-packs/{pack}@{version}/SKILL.md`
|
|
41
|
+
- Agent checks cache first before re-analyzing
|
|
42
|
+
- Cache hit = instant category/I/O lookup for visualization too
|
|
43
|
+
- Compound key: `{registry_id}@{version}` or `{github_owner}/{repo}@{commit_sha}`
|
|
44
|
+
- Pros: Fast repeat lookups, shared with visualizer
|
|
45
|
+
- Cons: Cache invalidation, storage
|
|
46
|
+
|
|
47
|
+
**Option C: MCP Resource** — Expose cached skills as MCP resources:
|
|
48
|
+
- `comfyui://skills/{pack_name}` returns cached SKILL.md content
|
|
49
|
+
- Claude Code auto-loads relevant skills when working with workflows
|
|
50
|
+
- Pros: Deep integration, always-available context
|
|
51
|
+
- Cons: More complex to implement
|
|
52
|
+
|
|
53
|
+
### Skill Cache Schema
|
|
54
|
+
```
|
|
55
|
+
~/.claude/skills/comfyui-packs/
|
|
56
|
+
├── comfyui-impact-pack@4.5.0/
|
|
57
|
+
│ ├── SKILL.md
|
|
58
|
+
│ └── metadata.json # {version, category_map, output_type_map, cached_at}
|
|
59
|
+
├── comfyui-kjnodes@1.2.3/
|
|
60
|
+
│ ├── SKILL.md
|
|
61
|
+
│ └── metadata.json
|
|
62
|
+
└── index.json # {pack_id: {version, categories: [...], node_count}}
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Cache Integration Points
|
|
66
|
+
1. **`visualize_workflow`** — Before fetching `/object_info`, check skill cache for category maps
|
|
67
|
+
2. **`generate_node_skill`** — Write to cache on generation, skip if cache hit with matching version
|
|
68
|
+
3. **`comfy-researcher` agent** — Read cache for instant analysis of previously-seen packs
|
|
69
|
+
4. **`comfy-explorer` agent** — Read cache to understand node context when exploring packs
|
|
70
|
+
|
|
71
|
+
### Next Steps
|
|
72
|
+
- [ ] Create `comfy-researcher` agent definition in `plugin/agents/researcher.md`
|
|
73
|
+
- [ ] Add skill cache layer to `generate_node_skill` tool
|
|
74
|
+
- [ ] Wire cache into `visualize_workflow` for category lookups
|
|
75
|
+
- [ ] Add `/comfy-research "better face detail"` slash command
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# From B to A: sharpening 70+ tool descriptions with TDQS
|
|
2
|
+
|
|
3
|
+
*by [artokun](https://github.com/artokun) · May 25, 2026 · MCP, TDQS, case study*
|
|
4
|
+
|
|
5
|
+
[comfyui-mcp](https://github.com/artokun/comfyui-mcp) is an MCP server that lets an AI agent
|
|
6
|
+
drive [ComfyUI](https://github.com/comfyanonymous/ComfyUI) — generate images, run and author
|
|
7
|
+
workflows, manage models and custom nodes. It exposes 70+ tools. When it landed on Glama, the
|
|
8
|
+
dashboard gave it a **Tool Definition Quality Score of B**. Here's what fixing that taught us —
|
|
9
|
+
and a few rules any MCP author can steal.
|
|
10
|
+
|
|
11
|
+
## The number that actually matters
|
|
12
|
+
|
|
13
|
+
[TDQS](https://glama.ai/blog/2026-04-03-tool-definition-quality-score-tdqs) blends **Tool
|
|
14
|
+
Definition Quality (70%)** and **Server Coherence (30%)**. Each tool is scored 1–5 across six
|
|
15
|
+
dimensions, but the part that bit us is how the per-tool scores roll up into the server score:
|
|
16
|
+
|
|
17
|
+
> server definition quality = 60% × **mean** TDQS + 40% × **minimum** TDQS
|
|
18
|
+
|
|
19
|
+
That 40%-on-the-minimum is the whole game. Our *average* tool was already ~4.1 — solidly A. Most
|
|
20
|
+
tools scored 4.3–4.7. But the grade was a B, because our single weakest tool, `cancel_job`,
|
|
21
|
+
scored **3.1** and dragged everything down with it.
|
|
22
|
+
|
|
23
|
+
**One vague tool caps your whole server.** So the highest-leverage work isn't polishing your best
|
|
24
|
+
tools — it's finding and fixing your worst one.
|
|
25
|
+
|
|
26
|
+
## The pattern hiding in the dimensions
|
|
27
|
+
|
|
28
|
+
Expanding the low scorers, the same two dimensions were weak almost everywhere — even on
|
|
29
|
+
otherwise-strong tools:
|
|
30
|
+
|
|
31
|
+
- **Usage Guidelines** (2/5 at worst): the description never said *when* to use this tool versus
|
|
32
|
+
a sibling that does something similar.
|
|
33
|
+
- **Behavior** (2/5): it didn't disclose side effects, preconditions, or whether the tool is even
|
|
34
|
+
read-only.
|
|
35
|
+
|
|
36
|
+
A third, **Parameters**, was quietly capped: our zod schemas already described every parameter,
|
|
37
|
+
so when the prose merely *restated* the schema, the dimension maxed out at 3. Descriptions have to
|
|
38
|
+
add something the schema can't.
|
|
39
|
+
|
|
40
|
+
## What we changed
|
|
41
|
+
|
|
42
|
+
We rewrote descriptions against three rules:
|
|
43
|
+
|
|
44
|
+
**1. Disambiguate against siblings.** If you ship three ways to cancel things, say which is which.
|
|
45
|
+
|
|
46
|
+
**2. Disclose behavior up front.** Read-only? Mutates disk? Requires a running server?
|
|
47
|
+
Asynchronous (returns an id you poll later)? Local-only vs. works-against-remote? Destructive and
|
|
48
|
+
irreversible?
|
|
49
|
+
|
|
50
|
+
**3. Add meaning beyond the schema.** Units, valid ranges, what omitting an optional param does,
|
|
51
|
+
and what comes back.
|
|
52
|
+
|
|
53
|
+
Here's `cancel_job`, our 3.1, before and after:
|
|
54
|
+
|
|
55
|
+
> **Before:** "Cancel or interrupt a running ComfyUI job. Optionally target by prompt_id."
|
|
56
|
+
|
|
57
|
+
> **After:** "Interrupt the **currently running** ComfyUI job, optionally only when its prompt_id
|
|
58
|
+
> matches. Stops in-progress execution — the partial result is discarded and not recoverable — and
|
|
59
|
+
> does **not** remove pending/queued jobs. Requires a reachable ComfyUI server. Use this for the
|
|
60
|
+
> job executing right now; use `cancel_queued_job` to remove one specific pending job, or
|
|
61
|
+
> `clear_queue` to drop all pending jobs."
|
|
62
|
+
|
|
63
|
+
Same tool, same parameters. The second version tells an agent when to reach for it, what it does
|
|
64
|
+
to the world, and what *not* to use it for.
|
|
65
|
+
|
|
66
|
+
## A gotcha: score what the checker actually sees
|
|
67
|
+
|
|
68
|
+
One of the lowest-scoring definitions in our own audit was the template behind *auto-loaded
|
|
69
|
+
workflow* tools — tools the server generates from `*.json` files a user drops in a directory. We
|
|
70
|
+
almost spent time polishing it. But TDQS scores the tools the **running server actually exposes**,
|
|
71
|
+
and Glama boots the server in a clean environment with no workflow files — so those tools never
|
|
72
|
+
register and never get scored.
|
|
73
|
+
|
|
74
|
+
Lesson: audit against the tool list your server emits on a *fresh* boot, not the theoretical
|
|
75
|
+
maximum.
|
|
76
|
+
|
|
77
|
+
## Keeping it from rotting
|
|
78
|
+
|
|
79
|
+
Descriptions drift. To keep ours honest, the tool reference is **generated from the live schemas**
|
|
80
|
+
— a script boots the server with a capturing mock, reads each tool's name, description, and zod
|
|
81
|
+
schema, and emits the docs. One source of truth for both the agent and humans, so a sloppy edit
|
|
82
|
+
shows up immediately. (Our [docs](https://comfyui-mcp.artokun.io/docs) are built this way.)
|
|
83
|
+
|
|
84
|
+
## The result
|
|
85
|
+
|
|
86
|
+
Raising the floor — `cancel_job`, `list_local_models`, `search_models`, and the rest of the
|
|
87
|
+
sub-3.5 cluster — plus the cross-cutting Usage/Behavior pass took the minimum from **3.1 to ~4.0**.
|
|
88
|
+
With the 60/40 split, that pulls the server out of B and into A on the next re-index.
|
|
89
|
+
|
|
90
|
+
## Takeaways for MCP authors
|
|
91
|
+
|
|
92
|
+
1. **Fix your worst tool first** — the 40%-minimum weighting means it sets your grade.
|
|
93
|
+
2. **Every description should answer three questions:** what does this do to the world, when do I
|
|
94
|
+
use it instead of a sibling, and what do the parameters mean beyond their types?
|
|
95
|
+
3. **Don't just echo the schema** — it already covers structure; prose should add intent.
|
|
96
|
+
4. **Audit the fresh-boot tool list**, not your theoretical one.
|
|
97
|
+
5. **Generate the reference from the schema** so quality can't silently regress.
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
comfyui-mcp is open source: [github.com/artokun/comfyui-mcp](https://github.com/artokun/comfyui-mcp)
|
|
102
|
+
· docs at [comfyui-mcp.artokun.io/docs](https://comfyui-mcp.artokun.io/docs).
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# Embedded agent panel — Ungate extraction → AI-SDK chat inside ComfyUI
|
|
2
|
+
|
|
3
|
+
> North star: a ComfyUI **sidebar tab** (AI icon) that hosts a chat window. You talk to a coding
|
|
4
|
+
> agent (Claude Code / Codex / Gemini) and it **reads and fixes the live workflow in the UI**.
|
|
5
|
+
> Connection to the agent "app" is set up via a **cloudflared tunnel**, modeled on how the
|
|
6
|
+
> [Ungate](https://github.com/orchidfiles/ungate) VS Code extension connects Cursor to a local proxy.
|
|
7
|
+
|
|
8
|
+
## 1. What Ungate is (reference architecture, MIT)
|
|
9
|
+
|
|
10
|
+
Three tiers in a pnpm monorepo (`~/code/ungate`):
|
|
11
|
+
|
|
12
|
+
- **`apps/extension`** (VS Code host) — lifecycle + supervision:
|
|
13
|
+
- `tunnel-manager.ts` — wraps the [`cloudflared`](https://www.npmjs.com/package/cloudflared) npm package.
|
|
14
|
+
- `api-server.ts` — spawns the local proxy as a detached child, parses its port from stdout,
|
|
15
|
+
health-checks `/health`, restarts/auto-stops.
|
|
16
|
+
- `dashboard.ts` + webview — hosts the Svelte UI, relays messages.
|
|
17
|
+
- **`apps/api`** (local proxy) — Fastify server: provider OAuth (Claude/ChatGPT), Anthropic↔OpenAI
|
|
18
|
+
translation, tool-call mapping, streaming, `/health`. Listens on `0.0.0.0:PORT`, prints
|
|
19
|
+
`localhost:PORT` to stdout. This is the part that turns a **subscription** into an OpenAI-shaped endpoint.
|
|
20
|
+
- **`apps/web`** (webview UI) — Svelte dashboard: tunnel panel, provider auth, analytics. Talks to
|
|
21
|
+
the host via `postMessage` (`start-tunnel` / `tunnel-status` / …).
|
|
22
|
+
|
|
23
|
+
Flow: `Cursor chat → Cursor backend → Cloudflare tunnel → Ungate proxy → Provider API → back`.
|
|
24
|
+
Tunnel exists because **Cursor's backend can't call `localhost`** — it needs a public HTTPS URL.
|
|
25
|
+
|
|
26
|
+
## 2. The bits that matter (verbatim mechanics)
|
|
27
|
+
|
|
28
|
+
### 2a. Tunnel mechanic — the crux (`tunnel-manager.ts`)
|
|
29
|
+
```ts
|
|
30
|
+
import { bin, install, use, Tunnel } from 'cloudflared';
|
|
31
|
+
// ensure binary: if !fs.existsSync(bin) → install(<path>) then use(<path>)
|
|
32
|
+
const t = Tunnel.quick(`http://localhost:${port}`, {
|
|
33
|
+
'--config': process.platform === 'win32' ? 'NUL' : '/dev/null',
|
|
34
|
+
'--edge-ip-version': '4',
|
|
35
|
+
});
|
|
36
|
+
t.on('url', (url) => { /* public https://<rand>.trycloudflare.com */ });
|
|
37
|
+
t.on('stderr',(line) => { /* logs */ });
|
|
38
|
+
t.on('error', (err) => { /* error state */ });
|
|
39
|
+
t.on('exit', (code) => { /* stopped/error */ });
|
|
40
|
+
t.stop();
|
|
41
|
+
```
|
|
42
|
+
State machine: `stopped → starting → (installing) → running → error`. Auto-stops on an interval
|
|
43
|
+
when there are no live clients. **This is plain Node — it drops straight into our server; no VS Code needed.**
|
|
44
|
+
|
|
45
|
+
### 2b. Local-server supervision (`api-server.ts`)
|
|
46
|
+
- `cp.spawn(node, [entry], { detached: true })`, `.unref()`.
|
|
47
|
+
- Detect ready by regex-matching `localhost:(\d+)` on stdout.
|
|
48
|
+
- Poll `GET /health` on an interval; restart on crash; stop after a no-clients grace period.
|
|
49
|
+
- Reattach to an already-running instance on `EADDRINUSE` (multi-window safe).
|
|
50
|
+
|
|
51
|
+
### 2c. Connection UX + messaging (`apps/web` tunnel-store + `$shared/vscode`)
|
|
52
|
+
- Reactive `TunnelState { status, url, error }` rendered in a panel.
|
|
53
|
+
- Buttons post intents to the host (`start-tunnel`/`stop-tunnel`/`restart-tunnel`); host pushes
|
|
54
|
+
`tunnel-status` back. User **copies the public URL + a proxy key** into the client.
|
|
55
|
+
|
|
56
|
+
### 2d. Security model (README)
|
|
57
|
+
- CORS `origin: '*'` on the proxy (tunnel is the perimeter).
|
|
58
|
+
- **Tunnel URL + proxy key are secrets**; anyone with both can drive your proxy. Key is rotatable.
|
|
59
|
+
- OAuth/provider creds stored locally only.
|
|
60
|
+
|
|
61
|
+
## 3. Mapping → our stack (AI SDK + ComfyUI extension-api)
|
|
62
|
+
|
|
63
|
+
| Ungate piece | Our equivalent | Keep / Replace / New |
|
|
64
|
+
| --- | --- | --- |
|
|
65
|
+
| `tunnel-manager.ts` (cloudflared) | Same lib, lifted into our local app | **Keep** (port nearly verbatim) |
|
|
66
|
+
| `api-server.ts` supervisor | We *are* the server (no separate child needed at first) | **Simplify** |
|
|
67
|
+
| `apps/api` Fastify proxy + provider OAuth/translation | **AI SDK** `streamText` + provider registry (`@ai-sdk/anthropic`, `@ai-sdk/openai`, `@ai-sdk/google`) | **Replace** (AI SDK does translation/streaming/tools) |
|
|
68
|
+
| Cursor (the client) | **ComfyUI sidebar tab** with AI SDK `useChat` | **Replace** |
|
|
69
|
+
| VS Code webview + `postMessage` | ComfyUI `defineSidebarTab` (Vue) + HTTP/WS to tunnel | **Replace** |
|
|
70
|
+
| Webview tunnel panel (copy URL+key) | Panel "Connection" section (paste/auto URL + key) | **Keep shape** |
|
|
71
|
+
| — (Ungate has none) | **Live graph edits** via `NodeHandle`/`WidgetHandle` as AI SDK *client-side tools* | **New (the magic)** |
|
|
72
|
+
| comfyui-mcp tools | Server-side tools via AI SDK MCP client (`experimental_createMCPClient`) | **Keep + wire in** |
|
|
73
|
+
|
|
74
|
+
## 4. Target shape
|
|
75
|
+
|
|
76
|
+
### "The app" — local bridge (we build; likely extends comfyui-mcp)
|
|
77
|
+
A Node HTTP server on `localhost:PORT`:
|
|
78
|
+
- `POST /api/chat` → AI SDK `streamText({ model, messages, tools }).toUIMessageStreamResponse()`.
|
|
79
|
+
- **Provider registry** picks Claude / Codex(OpenAI) / Gemini per request (the pluggable "agent").
|
|
80
|
+
- **Tools**:
|
|
81
|
+
- *server-side* (have `execute`): generate, search/download models, build/modify/enqueue big
|
|
82
|
+
workflows, queue mgmt — backed by **comfyui-mcp** (consumed as MCP tools).
|
|
83
|
+
- *client-side* (no `execute`; resolved in the panel): `read_graph`, `set_widget_value`,
|
|
84
|
+
`add_node`, `connect`, `move_node`, … → executed via extension-api in the browser.
|
|
85
|
+
- `GET /health` for supervision; CORS open (tunnel is the perimeter); a bearer **session key**.
|
|
86
|
+
- **cloudflared `Tunnel.quick`** exposes it → public HTTPS URL the ComfyUI page can reach (even when
|
|
87
|
+
ComfyUI is remote or served over HTTPS — solves mixed-content + remote installs).
|
|
88
|
+
|
|
89
|
+
### ComfyUI panel — `defineSidebarTab` (AI icon)
|
|
90
|
+
- AI SDK `useChat({ api: <tunnelURL>/api/chat, headers: { Authorization: Bearer <key> } })`.
|
|
91
|
+
- **Connection** section: paste/auto-discover tunnel URL + key (Ungate `TunnelPanel` analog).
|
|
92
|
+
- Renders streamed messages + tool-call cards.
|
|
93
|
+
- `onToolCall` → for graph-mutation tools, call extension-api (`NodeHandle.setValue(...)`, etc.),
|
|
94
|
+
then `addToolResult(...)`. This is how "fix the workflow directly in the UI" happens — through the
|
|
95
|
+
same undo-able command path a human uses. Live context comes from `graph`/`execution` events.
|
|
96
|
+
|
|
97
|
+
## 5. Key adaptations vs. Ungate
|
|
98
|
+
- **Same tunnel direction** (expose local app over HTTPS so a remote/HTTPS ComfyUI reaches it).
|
|
99
|
+
- **Drop the OAuth-subscription proxy for v1** — AI SDK + provider keys is far less code. (If we
|
|
100
|
+
later want Ungate's "use your subscription, not API tokens," `apps/api/src/auth/*` is the part to lift.)
|
|
101
|
+
- **Client-side tool execution is the novel core** — Ungate only proxies; we additionally let the
|
|
102
|
+
model *act on the open graph* via extension-api.
|
|
103
|
+
|
|
104
|
+
## 6. Build order
|
|
105
|
+
0. **v2 authoring skill** (enabler — write the extension correctly).
|
|
106
|
+
1. **Tunnel helper** — port `tunnel-manager` into our server (`startQuickTunnel(port) → url`), behind a flag.
|
|
107
|
+
2. **AI SDK chat endpoint** — `/api/chat` with one server-side tool (`generate_image`) end-to-end.
|
|
108
|
+
3. **Sidebar skeleton** — `defineSidebarTab` + `useChat` hitting the tunnel; render stream.
|
|
109
|
+
4. **Live edit** — one client-side tool (`set_widget_value`) applied via `WidgetHandle`; prove the loop.
|
|
110
|
+
5. **Wire comfyui-mcp** as the server-side tool surface (MCP client); expand client-side graph tools.
|
|
111
|
+
6. **Provider switch** (Claude/Codex/Gemini) + connection/key UX + polish into a shippable node pack.
|
|
112
|
+
|
|
113
|
+
## References
|
|
114
|
+
- Ungate (MIT): https://github.com/orchidfiles/ungate — clone at `~/code/ungate`.
|
|
115
|
+
- `cloudflared` npm: https://www.npmjs.com/package/cloudflared
|
|
116
|
+
- ComfyUI v2 extension API: `Comfy-Org/ComfyUI_frontend` PRs #12142–#12145; `src/extension-api/`.
|
|
117
|
+
- AI SDK: provider registry, `streamText`, `useChat`, client-side tools (`onToolCall`/`addToolResult`),
|
|
118
|
+
MCP client (`experimental_createMCPClient`).
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
export interface AgentPocOptions {
|
|
2
|
+
port?: number;
|
|
3
|
+
host?: string;
|
|
4
|
+
/** Open a cloudflared quick tunnel and log the public URL. */
|
|
5
|
+
tunnel?: boolean;
|
|
6
|
+
/** Bearer token required on /api/chat. Auto-generated if omitted. */
|
|
7
|
+
token?: string;
|
|
8
|
+
/** Max accepted request body size in bytes for /api/chat (default 1 MiB). */
|
|
9
|
+
maxBodyBytes?: number;
|
|
10
|
+
}
|
|
11
|
+
export interface AgentPocHandle {
|
|
12
|
+
/** Local URL the chat server is listening on. */
|
|
13
|
+
localUrl: string;
|
|
14
|
+
/** Public tunnel URL, if a tunnel was opened. */
|
|
15
|
+
publicUrl: string | null;
|
|
16
|
+
/** Bearer token clients must send as `Authorization: Bearer <token>`. */
|
|
17
|
+
token: string;
|
|
18
|
+
stop(): Promise<void>;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Start the experimental agent POC HTTP server (and optional tunnel).
|
|
22
|
+
* Returns a handle (incl. the bearer token). Callers gate this behind the env flag.
|
|
23
|
+
*/
|
|
24
|
+
export declare function startAgentPoc(options?: AgentPocOptions): Promise<AgentPocHandle>;
|
|
25
|
+
/**
|
|
26
|
+
* Gated bootstrap. Only starts the POC when COMFYUI_MCP_AGENT_POC is truthy.
|
|
27
|
+
* Safe to call unconditionally from a side entry; a no-op otherwise.
|
|
28
|
+
*/
|
|
29
|
+
export declare function maybeStartAgentPoc(): Promise<AgentPocHandle | null>;
|
|
30
|
+
//# sourceMappingURL=agent-poc.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"agent-poc.d.ts","sourceRoot":"","sources":["../../src/experimental/agent-poc.ts"],"names":[],"mappings":"AA0BA,MAAM,WAAW,eAAe;IAC9B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,8DAA8D;IAC9D,MAAM,CAAC,EAAE,OAAO,CAAC;IACjB,qEAAqE;IACrE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,6EAA6E;IAC7E,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED,MAAM,WAAW,cAAc;IAC7B,iDAAiD;IACjD,QAAQ,EAAE,MAAM,CAAC;IACjB,iDAAiD;IACjD,SAAS,EAAE,MAAM,GAAG,IAAI,CAAC;IACzB,yEAAyE;IACzE,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACvB;AAiGD;;;GAGG;AACH,wBAAsB,aAAa,CACjC,OAAO,GAAE,eAAoB,GAC5B,OAAO,CAAC,cAAc,CAAC,CA2DzB;AAoED;;;GAGG;AACH,wBAAsB,kBAAkB,IAAI,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC,CAOzE"}
|