npm - @swarmclawai/swarmclaw - Versions diffs - 1.5.57 → 1.5.59 - Mend

@swarmclawai/swarmclaw 1.5.57 → 1.5.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +19 -110
package/package.json +1 -1
package/src/app/api/eval/scenarios/route.ts +6 -4
package/src/app/api/eval/suite/route.ts +5 -1
package/src/app/api/eval/suites/route.ts +19 -0
package/src/app/api/s/[token]/raw/route.ts +79 -0
package/src/app/api/s/[token]/route.ts +37 -0
package/src/app/api/share/[id]/route.ts +28 -0
package/src/app/api/share/route.ts +53 -0
package/src/app/api/usage/live/route.ts +94 -0
package/src/app/s/[token]/page.tsx +138 -0
package/src/cli/index.js +15 -1
package/src/components/auth/setup-wizard/utils.test.ts +8 -1
package/src/components/auth/setup-wizard/utils.ts +8 -1
package/src/lib/server/chat-execution/chat-turn-finalization.ts +33 -6
package/src/lib/server/eval/runner.ts +16 -5
package/src/lib/server/eval/scenarios-gaia.ts +100 -0
package/src/lib/server/eval/scenarios-swebench.ts +196 -0
package/src/lib/server/eval/scenarios.ts +20 -1
package/src/lib/server/eval/types.ts +4 -0
package/src/lib/server/missions/mission-templates.ts +23 -0
package/src/lib/server/sharing/share-link-repository.test.ts +69 -0
package/src/lib/server/sharing/share-link-repository.ts +107 -0
package/src/lib/server/sharing/share-resolver.ts +153 -0
package/src/lib/server/storage.ts +1 -0
package/src/lib/setup-defaults.ts +76 -0

package/README.md CHANGED Viewed

@@ -399,6 +399,25 @@ Operational docs: https://swarmclaw.ai/docs/observability
 ## Releases
+### v1.5.59 Highlights
+Viral-loop release. Adds public share links for missions, skills, and sessions, plus a complementary raw-markdown endpoint so any shared skill installs directly through the existing `POST /api/skills/import`.
+- **Share links for missions, skills, and sessions.** New `share_links` collection in `src/lib/server/storage.ts` plus `src/lib/server/sharing/share-link-repository.ts`. `POST /api/share { entityType, entityId, expiresInSec?, label? }` mints a cryptographically random 32-char base64url token; `GET /api/share` lists; `GET /api/share/:id` fetches; `DELETE /api/share/:id` revokes (pass `?hard=true` to hard-delete). CLI: `swarmclaw share {list,mint,get,revoke,resolve,raw}`.
+- **Public read endpoints (no auth required).** `GET /api/s/:token` returns the scrubbed JSON payload; `GET /api/s/:token/raw` returns plain markdown (skills return their SKILL.md verbatim, missions render as title + goal + criteria + milestones, sessions as a transcript). Revoked and expired tokens return `404 Not found` without leaking shape information. `GET /s/:token` is a server-rendered page for dropping straight into a browser.
+- **Share-link-based skill install.** `POST /api/skills/import` already accepts an http(s) URL; pointing it at `https://<your-host>/api/s/<token>/raw` now installs a shared skill from another SwarmClaw instance without auth handshakes. Pairs naturally with existing `swarmclaw skills import` CLI.
+- **Share-link repository tests.** `share-link-repository.test.ts` covers mint / list / revoke / lookup-by-token round-trip plus expiry handling against a temporary data dir.
+### v1.5.58 Highlights
+This release broadens the built-in evaluation harness so SwarmClaw runs can be benchmarked against named suites, adds two targeted starter kits, exposes live per-session cost data, tightens auto-skill drafting, and ships a zero-setup demo mission template.
+- **Benchmark-style eval suites.** New `SWEBENCH_LITE_SCENARIOS` and `GAIA_L1_SCENARIOS` in `src/lib/server/eval/scenarios-swebench.ts` and `scenarios-gaia.ts` — curated parallels (not the upstream datasets) sized for a single-agent harness run. The shared `EvalScenario` type now carries an optional `suite: 'core' | 'swe-bench-lite' | 'gaia-l1' | 'tool-use' | 'code-action'` tag. `POST /api/eval/suite` accepts `{ suite: "swe-bench-lite" }` to scope a run. New `GET /api/eval/suites` lists every suite with scenario count, max score, and categories. CLI commands: `swarmclaw eval suites`, and `swarmclaw eval suite` still takes a JSON body now including `suite`. Useful for advertising verifiable numbers against a named benchmark instead of a bespoke scoring rubric.
+- **Two additional starter kits.** `inbox_triage` (single Triager agent over email + memory + documents) and `data_analyst` (single Analyst agent over shell + files + web + documents) join the existing seven kits in `src/lib/setup-defaults.ts`. Both are surfaced on the intent-driven setup path alongside Personal Assistant, Research Copilot, Builder Studio, and Delegate Team.
+- **Live per-session usage API.** New `GET /api/usage/live?sessionId=...` returns a lightweight snapshot — records, tokens in/out, estimated cost, firstAt/lastAt, wallclockMs, turns — so frontends can surface a live cost meter without pulling the full aggregated `/api/usage` payload. Without a `sessionId` the route returns the ten most recently active sessions. Registered in the CLI as `swarmclaw usage live`.
+- **Auto-skill drafting is stricter and rate-limited.** `shouldAutoDraftSkillSuggestion` in `chat-turn-finalization.ts` now requires at least 3 tool events in the completed turn (was 1), and a new per-agent daily cap limits automatic drafts to 3 per day per agent to prevent suggestion-inbox spam. Both thresholds are named constants (`AUTO_DRAFT_MIN_TOOL_EVENTS`, `AUTO_DRAFT_DAILY_LIMIT`). Agents with `autoDraftSkillSuggestions = false` are unaffected (auto-drafting remains opt-in per agent).
+- **Hello World demo mission template.** New `hello-world-demo` entry in `BUILT_IN_MISSION_TEMPLATES` — a bounded, zero-setup mission that reads three files in the working directory and writes a one-paragraph markdown summary to `hello-world-report.md`. Budgets (USD 0.25, 20k tokens, 30 turns, 15 min) are small enough to run on a local Ollama model without cost. Intended as the first thing a new user watches an agent complete end to end.
 ### v1.5.57 Highlights
 This release closes the org-orchestration feature gap with Paperclip while keeping SwarmClaw's autonomous-assistant focus. Most additions are additive; nothing existing has changed shape.
@@ -428,116 +447,6 @@ This release closes the org-orchestration feature gap with Paperclip while keepi
 - **Fix: `PUT /api/webhooks/:id` now validates its body with a Zod schema.** Previously `{"events": "not_an_array"}` wiped the events list. Added `WebhookUpdateSchema` and explicit `rawKeys.has(...)` guards in the mutate closure so only fields actually present in the body are applied.
 - **Fix: classifier JSON no longer leaks into assistant responses.** Some Ollama / Ollama Cloud turns were emitting the internal `MessageClassification` object directly into the stream (e.g. `{"taskIntent":"research",...}` prepended to the real reply). The existing stripper only matched when `isDeliverableTask` was the first key, so leaks starting with `taskIntent` sailed through to the user. Replaced the regex with a principled detector that brace-matches candidate JSON (string-quote aware) and validates against `MessageClassificationSchema.safeParse` — the schema itself is the source of truth, so future schema changes can't break detection.
-### v1.5.54 Highlights
-- **Mission templates library**: the `/missions` page now opens with a curated gallery of starter missions. Each template pre-wires a goal, success criteria, USD / token / turn / wallclock budgets, and a report cadence, so non-technical users can install a working autonomous run in one click. Initial lineup: Daily News Digest, Inbox Triage, Competitor Watch, Weekly Research Report, Social Listener, and Customer Support Triage. Setup notes flag any connector or permission prerequisites before installation. Power-user overrides (budget caps, success criteria, report cadence) live behind a collapsed **Advanced Settings** panel so the default install flow stays one click.
-- **New API routes `GET /api/missions/templates` and `POST /api/missions/templates/:id/instantiate`** with matching CLI commands `swarmclaw missions templates` and `swarmclaw missions instantiate`. Installed missions persist a `templateId` so the origin is traceable for future template-update flows; legacy missions normalize to `templateId: null` on load, no data migration required.
-- **Fix: user-selected provider and model now survive the chat execution pipeline** ([#51](https://github.com/swarmclawai/swarmclaw/pull/51), thanks to [@borislavnnikolov](https://github.com/borislavnnikolov)). Switching provider or model via the inspector panel mid-session was being reverted on every turn because the agent's configured route was unconditionally reapplied in three places. `syncSessionFromAgent` now only syncs credentials / endpoint / fallbacks when the session's provider still matches the route provider, `prepareChatTurn` preserves the user's chosen model after applying the route, and `updateChatSession` auto-resolves a stored credential for the new provider (and clears the stale `apiEndpoint`) when provider changes without an explicit `credentialId`. Restores reliable switching between Copilot CLI, Codex CLI, Groq, and OpenAI-compatible providers.
-> **Note:** v1.5.53 release notes described the mission templates library, but the feature commit landed after the v1.5.53 tag was cut. v1.5.54 is the release that actually ships it.
-### v1.5.53 Highlights
-- **Fix: switching a session's model now sticks in the UI** ([#50](https://github.com/swarmclawai/swarmclaw/pull/50), thanks to [@borislavnnikolov](https://github.com/borislavnnikolov)). The **Switch Model** panel in the agent inspector was reading from `agent.provider` / `agent.model` (the agent's defaults) instead of `session.provider` / `session.model`, so after saving a model switch the collapsed pill still showed the agent default, the combobox reset to the default when reopened, and `selectedProvider` reverted on every save. `ModelSwitcherInline` now uses `session.provider || agent.provider` and `session.model || agent.model` as the source of truth, and its `useEffect` syncs to `session.provider` changes so a successful save updates the panel immediately.
-### v1.5.52 Highlights
-- **Session X-Ray now surfaces the backend execution log** ([#48](https://github.com/swarmclawai/swarmclaw/pull/48), thanks to [@borislavnnikolov](https://github.com/borislavnnikolov)). The debug panel fetches entries from the SQLite execution log on open and merges them with in-memory message events, sorted by time. Expandable entries show provider, model, stream errors, duration, and token counts — the info that was previously invisible when Ollama or other local-model runs failed silently. A new **Tools** filter tab, an `exec` badge for log-sourced entries, an entry count in the stats bar, and a Refresh button round it out. New API route `GET /api/chats/:id/execution-log` with `limit`, `since`, and `category` query params, registered in the CLI manifest as `swarmclaw chats execution-log`.
-- **Execution errors now captured in the log** ([#48](https://github.com/swarmclawai/swarmclaw/pull/48)). `finalizeChatTurn()` writes a structured `error` entry to the execution log on terminal failure, recording provider, model, stream errors, duration, token counts, and whether a response was produced — so the Session X-Ray above actually has something to show.
-- **Fix: blank task-sheet no longer shows `"null"` under *Blocked By*** ([#47](https://github.com/swarmclawai/swarmclaw/pull/47), thanks to [@borislavnnikolov](https://github.com/borislavnnikolov)). A successful task create/update returns `error: null`, and the old `'error' in res` guard treated that as a truthy error and rendered `String(null)` as a red "null" string under the Blocked By field. Now only non-empty string errors trigger the UI, and `depError` is cleared on dialog close so stale state cannot leak across re-opens.
-### v1.5.51 Highlights
-- **Desktop app now actually opens and renders on macOS**: packaged builds were broken in v1.5.50 by a stack of independent issues that each masked the next. This release unblocks the cold-boot path end to end. Measured cold-boot time on a populated install: ~1 second to first `/api/healthz` response, down from a hard 60-second timeout.
-  - Ad-hoc code signing (`identity: '-'`) via a new `scripts/electron-after-pack.cjs` hook that runs `codesign --sign - --force --deep` after electron-builder packages the bundle. The bundle identifier is now sealed as `ai.swarmclaw.desktop` with all 74k resources sealed, so quarantined dmgs surface as "unidentified developer" (right-click → Open) instead of the more confusing "damaged" error.
-  - Per-architecture native module sync: the afterPack hook copies `better-sqlite3`, `@mongodb-js/zstd`, `node-liblzma`, and `utf-8-validate` `.node` binaries from the electron-builder-rebuilt root `node_modules` into the packaged `.next/standalone/node_modules`. Without this, the standalone server hit `ERR_DLOPEN_FAILED: NODE_MODULE_VERSION 137` on launch because Next.js's output-tracing copied the Node-ABI build of better-sqlite3 into standalone while electron-builder only rebuilt the root tree for Electron's ABI.
-  - `scripts/run-next-build.mjs` now copies `mdn-data` (used by `css-tree` via `jsdom`) into standalone alongside the existing `css-tree/data` patch, so pages that depend on it don't 500 with `Cannot find module 'mdn-data/css/at-rules.json'`.
-  - `isomorphic-dompurify` replaced by the browser-only `dompurify` in `agent-avatar.tsx`. The isomorphic wrapper was pulling `jsdom`'s ESM-only `@exodus/bytes` dep into every server bundle the avatar was referenced from, which blew up SSR under Electron 33 (Node 20.18) with `ERR_REQUIRE_ESM` on every page.
-  - Session-consolidation migrations, `initWsServer`, and `ensureDaemonStarted` moved into a `setImmediate` deferred block in `src/instrumentation.ts` so Next.js can bind the HTTP listener before per-install work runs.
-- **App icon fixed**: the Dock no longer shows Electron's default `exec` placeholder. `scripts/gen-icons.mjs` generates `resources/icon.icns`, `resources/icon.ico`, and `resources/icon.png` from `public/branding/swarmclaw-org-avatar.png`; the main process sets the Dock icon at launch and passes it to every `BrowserWindow`.
-- **Embedded server log file + improved failure dialog**: the Electron wrapper now tees the child Next.js server's stdout/stderr into `<userData>/logs/server.log` (`~/Library/Application Support/@swarmclawai/swarmclaw/logs/server.log` on macOS, 1 MB rotation). If startup fails or the server exits, the error dialog shows the tail of the log inline and exposes an **Open Logs Folder** button that jumps Finder straight to the file. This is what made root-cause debugging possible in the first place — if you hit any kind of regression here, grab that log and open an issue.
-- **Embedded server timeout raised from 60s to 5 minutes**: a safety net. On a healthy install the server is up in about a second; 300 seconds is there for pathological cold boots (very large data dirs, contested Apple Silicon Gatekeeper verification on unsigned binaries, etc.) and should never be hit in normal use.
-### v1.5.50 Highlights
-- **Fix: opencode-web remote instances no longer fail with `EACCES`**: SwarmClaw used to send the local workspace path (e.g. `/root/.swarmclaw/workspace`) as a `directory=` query parameter on every opencode-web request. Remote opencode-web instances tried to `lstat` that path and rejected the call. The provider now auto-detects local vs. remote from the endpoint hostname (`localhost`, `127.0.0.1`, `::1`, `0.0.0.0`) and only sends `directory=` when the endpoint is local. Thanks to [@SteamedFish](https://github.com/SteamedFish) for the detailed root-cause writeup in [#45](https://github.com/swarmclawai/swarmclaw/issues/45).
-### v1.5.49 Highlights
-- **Autonomous Missions**: a new first-class concept for long-running, goal-driven agent work. Hand your agent team a goal on Friday, come back Monday to see what they shipped. Each mission carries a title, a natural-language objective, bulleted success criteria, hard budgets (USD, tokens, turns, wallclock), periodic markdown reports, and a full milestone timeline. Missions drive any session through the existing heartbeat pipeline, so delegation to Claude Code, Codex, OpenCode, Cursor, Droid, Goose, Qwen, or native SwarmClaw agents all work without changes.
-- **Budget enforcement in the run pipeline**: `enqueueSessionRun` now consults the mission's budget before every autonomous turn. When any cap is hit the mission transitions to `budget_exhausted`, the queue drains, and a final report fires. Warn thresholds (default 50% / 80% / 95% of each cap) emit `budget_warn` milestones exactly once each.
-- **Scheduler tick from heartbeat**: `runMissionScheduler()` fires every heartbeat tick, independent of the active-hours window, so wallclock budgets and periodic reports still fire overnight. Report cadence is configurable per mission; reports land as in-app notifications today and ship as Slack/Discord/audio in a follow-up.
-- **`/missions` dashboard**: new page with a live mission list, status pills, four-axis budget gauges, a scrollable milestone timeline, a reports drawer, and start / pause / cancel / mark-complete / generate-report-now controls.
-- **CLI commands**: `swarmclaw missions list|get|create|update|delete|control|reports|report-now|events`. Create a mission, start it, and watch the timeline from the terminal or CI.
-- **New storage collections**: `agent_missions`, `mission_reports`, and `agent_mission_events`. The legacy deprecated `missions` table is left untouched so nothing in existing installs is disturbed.
-### v1.5.48 Highlights
-- **SwarmDock MCP preset now points at the hosted endpoint**: *MCP Servers → Quick Setup → SwarmDock* is pre-filled with `streamable-http` transport pointed at `https://swarmdock-api.onrender.com/mcp` and a ready-to-edit `Authorization: Bearer <key>` header template. Users no longer need to run `npx swarmdock-mcp` locally — the SwarmDock team hosts the MCP server in-process on the existing API service. First-time setup (browser keygen + agent registration) lives at [swarmdock.ai/mcp/connect](https://www.swarmdock.ai/mcp/connect).
-- **McpPreset gains `url` and `headersTemplate`**: `applyPreset` now prefills the URL input and the Headers textarea in addition to command/args/env, so remote presets can ship complete configs.
-- **Skills doc refresh**: the `swarmclaw` skill's MCP Servers section points to the hosted flow instead of the prior stdio instructions.
-### v1.5.47 Highlights
-- **MCP injection for GitHub Copilot CLI and OpenAI Codex CLI agents**: agents using the `copilot-cli` or `codex-cli` providers now run with their assigned MCP servers attached at runtime. Copilot CLI receives the servers via `--additional-mcp-config @<tempfile>`; Codex CLI gets per-session `[mcp_servers.*]` TOML sections appended to a scoped `config.toml`. Stdio transports (command, args, env, cwd) and SSE / streamable-http transports (url, headers) are both supported. Skills assigned to the agent continue to be injected via the system prompt.
-- **Skills and MCP panel visible for copilot-cli and codex-cli in the agent editor**: the Advanced Settings section now opens for these two providers so you can attach skills and MCP servers from the UI. Routing, memory, and voice panels stay hidden since these providers are worker-only.
-- **Codex CLI approval policy change**: Codex CLI sessions now launch with `--dangerously-bypass-approvals-and-sandbox` instead of `--full-auto`. The old flag silently cancels MCP tool calls via Codex's approval gate, which is why MCP tool results were not landing. SwarmClaw itself runs in its own sandbox, so Codex's additional sandbox was not load-bearing, but be aware of the change if you were relying on it for a specific agent.
-- **Under the hood**: `~/.codex-sessions/<session.id>/` replaces `/tmp/swarmclaw-codex-*` as the per-session Codex config directory because Codex refuses to create helper binaries under `/tmp`. The Playwright MCP proxy now passes an explicit `cwd: process.cwd()` when spawning, so it no longer crashes with `uv_cwd ENOENT` when the server is restarted after a directory move.
-- **Exa as a new web search provider**: Settings > Web Search gains an Exa option alongside Tavily, Brave, SearXNG, DuckDuckGo, Google, and Bing. Exa uses neural search with AI-generated summaries and falls back to highlights, then raw text when summaries are unavailable. Configure the key via the UI, the `EXA_API_KEY` environment variable, or the secrets store. Requests carry an `x-exa-integration: swarmclaw` tracking header so usage attributed to SwarmClaw is visible to Exa.
-Thanks to [@borislavnnikolov](https://github.com/borislavnnikolov) and [@tgonzalezc5](https://github.com/tgonzalezc5) for the contributions.
-### v1.5.46 Highlights
-- **Custom base URL for built-in OpenAI and Anthropic providers**: the Endpoint field in provider settings now works for the built-in OpenAI and Anthropic providers (marked as `optionalEndpoint`). Point them at a proxy, gateway, or self-hosted endpoint and the URL persists, auto-resolves on connection test, and flows through both the live chat path and the LangGraph agent path (`ChatAnthropic` now receives `anthropicApiUrl`). Existing installs with no custom URL keep using the defaults.
-- **Test-model selector in provider settings**: when you hit "Test Connection", a new dropdown lets you pick a specific model (for example `gpt-4.1-mini` or `claude-haiku-4-5`) or leave it on Auto-detect. Useful for verifying a specific model is reachable on a given endpoint.
-- **Auto-resolution of credentials and endpoints in the connection test**: the test route now looks up the saved credential and base URL for the provider when they are not explicitly supplied, so the provider sheet's "Test" button works without needing to replay config.
-- **Anthropic streaming refactor**: the streaming handler moved from Node's `https.request()` to `fetch()`. Same behavior, cleaner cancellation, and it now respects `session.apiEndpoint` as a full base URL instead of a hostname.
-- **Connection test body**: Ollama and OpenAI-compatible test requests now send `max_completion_tokens` instead of the legacy `max_tokens`, matching current OpenAI conventions and working correctly with reasoning models that reject `max_tokens`.
-Thanks to [@Llugaes](https://github.com/Llugaes) for the contribution.
-### v1.5.45 Highlights
-- **SwarmVault MCP preset**: a new "SwarmVault" Quick Setup chip in the MCP server sheet pre-fills `npx -y @swarmvaultai/cli mcp` over `stdio` and prompts for the vault directory. One click registers a SwarmVault knowledge vault as an MCP server; agents pick it up via the existing per-agent MCP server selector. SwarmVault docs: https://swarmvault.ai
-- **`cwd` on stdio MCP servers**: `McpServerConfig` now has an optional `cwd` field. The MCP client passes it through to `StdioClientTransport` so servers that discover config from the working directory (SwarmVault, anything that reads from `cwd`-relative files) work correctly. Existing MCP servers are untouched (the field is optional and defaults to the SwarmClaw process cwd, which was the prior behaviour).
-- **Bundled `swarmvault` skill**: ships at `skills/swarmvault/SKILL.md` and is auto-discovered alongside the other bundled skills. Captures the schema-first / graph-query-first conventions (read `swarmvault.schema.md` before compile or query work, treat `raw/` as immutable, prefer `graph query|path|explain` over grep, preserve `page_id` / `source_ids` / `node_ids` / `freshness` / `source_hashes` frontmatter, save high-value answers to `wiki/outputs/`). Pin it on any agent that talks to a SwarmVault vault. Optional and decoupled from the MCP integration.
-### v1.5.44 Highlights
-- **Model lists refreshed across every provider**: dropdowns now lead with the April-2026 flagship models instead of mid-2025 names. OpenAI goes to GPT-5.4 / 5.4-mini / 5.4-nano / 5.3 / o3-mini. Google and Gemini CLI lead with Gemini 3.1 Pro, Gemini 3 Flash, and 3.1 Flash-Lite, keeping 2.5 as a legacy fallback. xAI jumps from Grok 3 to Grok 4 plus the Grok 4 / 4.1 Fast reasoning and non-reasoning variants. Groq drops the deprecated `deepseek-r1-distill-llama-70b` and leads with Llama 4 Maverick, Llama 4 Scout, Kimi K2, and gpt-oss 120b/20b. Mistral moves to Magistral 1.2, Devstral 2, Codestral, and Mistral Small 4. Fireworks / Nebius / DeepInfra now lead with DeepSeek V3.2, Kimi K2.5, and Qwen 3 235B instead of the older R1-0528 checkpoint. Anthropic and Claude CLI reorder Opus 4.6 / Sonnet 4.6 / Haiku 4.5 newest-first. OpenCode Web refreshes its `providerID/modelID` seed list.
-- **OpenRouter default set expanded**: was one model (`openai/gpt-4.1-mini`). Now ten flagship routes including `openrouter/auto`, Claude 4.6 Opus / Sonnet / Haiku, GPT-5.4, Gemini 3.1 Pro / 3 Flash, Grok 4, DeepSeek V3.2, and Llama 4 Maverick. Much better first-run experience for the "provider that routes to every other provider".
-- **`DEFAULT_AGENTS` models refreshed**: 11 starter-agent models updated to match the new flagship lineups (OpenAI → GPT-5.4, xAI → Grok 4, Google / Gemini CLI → Gemini 3.1 Pro, Groq → Llama 4 Maverick, Fireworks / Nebius / DeepInfra → DeepSeek V3.2, OpenCode Web / Copilot CLI → Claude Sonnet 4.6, OpenRouter → Claude Sonnet 4.6). Starter agents created from the setup wizard now default to the right model out of the box.
-- **Starter-agent tool bundles now include `droid_cli` and `copilot_cli`**: these delegation backends were added in v1.5.37 and v1.5.3 respectively but never made it into `STARTER_AGENT_TOOLS` / `BUILDER_AGENT_TOOLS`. Every starter kit (Sidekick, Researcher, Builder, Reviewer, Operator, OpenClaw fleet) now picks them up on new workspace creation.
-- **DeepSeek note**: `deepseek-chat` and `deepseek-reasoner` remain the recommended model names — they are stable aliases that auto-track the current `V3.2` weights. No action required.
-- **Registry sanity test**: added `provider-models.test.ts` which asserts every provider declares a non-empty deduplicated models array, matching metadata keys, and a working `handler.streamChat`. Guards against future copy-paste regressions in the registry.
-### v1.5.43 Highlights
-- **`/api/version` no longer 500s in Docker**: the route used to shell out to `git` at runtime, which fails in the production image because `.git/` is not copied. The route now returns 200 with `{ source: 'package', version }` from `package.json` when git metadata is unavailable, and `{ source: 'git', version, commit, ... }` when it is. `/api/version/update` short-circuits on Docker-style installs with a clear `no_git_metadata` reason instead of an opaque 500. ([#41](https://github.com/swarmclawai/swarmclaw/issues/41) Bug 1, reported by [@SteamedFish](https://github.com/SteamedFish).)
-- **Daemon reclaims stale `daemon-primary` leases on container restart**: when the previous container died holding the SQLite-backed lease, the new container previously waited up to the full 120 s TTL before the daemon could start. The successor now parses the recorded owner pid, probes it with `process.kill(pid, 0)`, and reclaims the lease immediately when the prior owner is provably dead on this host. When the owner is genuinely alive (or when the recorded host is ambiguous, such as multi-pod Kubernetes), behaviour is unchanged but a single deferred retry is scheduled just past the TTL so the daemon comes up automatically rather than waiting for the next API call. ([#41](https://github.com/swarmclawai/swarmclaw/issues/41) Bug 2.)
-- **Subprocess daemon fallback fails soft in Docker**: when `resolveDaemonRuntimeEntry()` cannot find `src/lib/server/daemon/daemon-runtime.ts` (the file is intentionally not in the standalone build), `ensureDaemonProcessRunning()` now logs a one-shot warning and returns `false` instead of throwing into the API handler. The in-process daemon path (with the Bug 2 fix) is the production path in Docker. ([#41](https://github.com/swarmclawai/swarmclaw/issues/41) Bug 3.)
-- **`CONTRIBUTING.md`**: dropped the broken reference to `AGENTS.md`. That file is `.gitignore`'d and not visible to external contributors. The single canonical project-conventions document is `CLAUDE.md`.
-### v1.5.42 Highlights
-- **New `opencode-web` provider — connect to remote OpenCode HTTP servers** ([#40](https://github.com/swarmclawai/swarmclaw/issues/40), requested by [@SteamedFish](https://github.com/SteamedFish)): point an agent at any host running `opencode serve` or `opencode web` (default port `4096`). Supports HTTPS endpoints, HTTP Basic Auth (encode credentials as `username:password` in the API key field; bare passwords default the username to `opencode`), automatic OpenCode session reuse across chat turns, and per-session workspace isolation via `?directory=...`. Models are entered as `providerID/modelID` (e.g. `anthropic/claude-sonnet-4-5`). The existing `opencode-cli` provider is unchanged.
-- **New `CONTRIBUTING.md`**: short, scannable guide covering bug reports, feature requests, PR expectations, commit conventions, and where to look in the codebase. Models the gold-standard examples after issues #39 and #40.
-- **`GET /api/memory/:id` now returns a single entry by default**: previously it eagerly traversed linked memories and returned an array, which broke naive callers that expected a single object per REST convention. Linked traversal is now opt-in via `?depth=N` or `?envelope=true`.
-### v1.5.41 Highlights
-- **Moonshot / Kimi compatibility — duplicate `files` tool name fixed**: any agent with the default `files` extension was sending two tools both literally named `files` to the LLM. Most providers tolerated the duplicate; Moonshot's strict tool-schema validation rejected it with `MoonshotException - function name files is duplicated` ([#39](https://github.com/swarmclawai/swarmclaw/issues/39), reported by [@SteamedFish](https://github.com/SteamedFish)). Three fixes: the v2 file builder is now correctly gated on `files_v2` (not `files`), it registers under the matching capability key, and the session-tools assembler now shares a single dedup Set across native, CRUD, and extension phases so any future name collision is rejected with a clear warning instead of a silent double-register.
-### v1.5.40 Highlights
-- **Current-thread recall routing**: the message classifier now emits four explicit flags (`isCurrentThreadRecall`, `isGreeting`, `isAcknowledgement`, `isMemoryWriteIntent`) so the chat router stops treating in-thread pronouns ("your last reply", "both answers", "what I just said") as durable-memory queries. Previously small OSS models (`devstral-small-2:24b` and similar) would run `memory_search` for these, come back empty, and truthfully report "no memories found" even when the answer was three messages up.
-- **`memory_search` short-circuits thread-recall queries**: when the search query itself contains phrases like "just", "last reply", "my last", "both answers", the tool now returns a redirect pointing the model back to the visible chat history instead of executing a pointless vector search. Explicit cross-session phrasing ("yesterday", "last week", "in a previous conversation") still runs the normal search path.
-- **Explicit Routing Matrix in the system prompt**: spells out the boundary between "read the thread above" and "call a memory tool" in plain language, so routing doesn't depend on the model extrapolating a terse rule. Memory-tool lines are now tagged `(not this thread)` so the distinction is unmissable.
-- **Tool-summary retry threshold tightened**: the "trivial response" threshold used to decide whether to force a redundant `tool_summary` continuation dropped from 150 → 80 characters. A 119-char response like "I wrote X, stored Y, and confirmed both." is substantive; the old threshold forced the model to re-stream the same answer twice.
-- **Classifier timeout raised to 10 s**: 2 s was too tight for Ollama Cloud with a fully-configured agent (observed 4–6 s calls). Result caching means the latency tax only applies to first-seen messages.
-- **Reflection memories dedup across runs**: the supervisor reflection writer now compares candidate notes against recent (last 7 days) reflection memories for the same agent and skips ones that have already been stored, stopping the ~7-per-turn rediscovery churn on top of the within-run dedup shipped in v1.5.38.
 Older releases: https://swarmclaw.ai/docs/release-notes
 - GitHub releases: https://github.com/swarmclawai/swarmclaw/releases

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@swarmclawai/swarmclaw",
-  "version": "1.5.57",
+  "version": "1.5.59",
   "description": "Build and run autonomous AI agents with OpenClaw, Hermes, multiple model providers, orchestration, delegation, memory, skills, schedules, and chat connectors.",
   "main": "electron-dist/main.js",
   "license": "MIT",

package/src/app/api/eval/scenarios/route.ts CHANGED Viewed

@@ -1,19 +1,21 @@
 import { NextResponse } from 'next/server'
-import { EVAL_SCENARIOS } from '@/lib/server/eval/scenarios'
+import { EVAL_SCENARIOS, getSuiteScenarios } from '@/lib/server/eval/scenarios'
 export async function GET(req: Request) {
   const { searchParams } = new URL(req.url)
   const category = searchParams.get('category')
+  const suite = searchParams.get('suite')
-  const scenarios = category
-    ? EVAL_SCENARIOS.filter((s) => s.category === category)
-    : EVAL_SCENARIOS
+  let scenarios = EVAL_SCENARIOS
+  if (suite) scenarios = getSuiteScenarios(suite)
+  if (category) scenarios = scenarios.filter((s) => s.category === category)
   return NextResponse.json(
     scenarios.map((s) => ({
       id: s.id,
       name: s.name,
       category: s.category,
+      suite: s.suite ?? 'core',
       description: s.description,
       tools: s.tools,
       timeoutMs: s.timeoutMs,

package/src/app/api/eval/suite/route.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import { errorMessage } from '@/lib/shared-utils'
 const SuiteSchema = z.object({
   agentId: z.string().min(1),
   categories: z.array(z.string()).optional(),
+  suite: z.string().min(1).optional(),
 })
 export async function POST(req: Request) {
@@ -19,7 +20,10 @@ export async function POST(req: Request) {
       )
     }
-    const result = await runEvalSuite(parsed.data.agentId, parsed.data.categories)
+    const result = await runEvalSuite(parsed.data.agentId, {
+      categories: parsed.data.categories,
+      suite: parsed.data.suite,
+    })
     return NextResponse.json(result)
   } catch (err: unknown) {
     return NextResponse.json(

package/src/app/api/eval/suites/route.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import { NextResponse } from 'next/server'
+import { EVAL_SCENARIOS, getSuiteScenarios, listSuites } from '@/lib/server/eval/scenarios'
+export async function GET() {
+  const suites = listSuites()
+  const summary = suites.map((name) => {
+    const scenarios = name === 'core' ? EVAL_SCENARIOS.filter(s => !s.suite || s.suite === 'core') : getSuiteScenarios(name)
+    return {
+      name,
+      count: scenarios.length,
+      maxScore: scenarios.reduce(
+        (sum, s) => sum + s.scoringCriteria.reduce((a, c) => a + c.weight, 0),
+        0,
+      ),
+      categories: Array.from(new Set(scenarios.map(s => s.category))),
+    }
+  })
+  return NextResponse.json(summary)
+}

package/src/app/api/s/[token]/raw/route.ts ADDED Viewed

@@ -0,0 +1,79 @@
+import {
+  isShareLinkActive,
+  loadShareLinkByToken,
+} from '@/lib/server/sharing/share-link-repository'
+import { resolveSharedEntity } from '@/lib/server/sharing/share-resolver'
+export const dynamic = 'force-dynamic'
+/**
+ * Public raw-content endpoint for shared entities. Skills return markdown so
+ * a second SwarmClaw instance can install via `POST /api/skills/import`
+ * without any auth handshake. Missions and sessions return plain-text
+ * summaries sized for quick sharing.
+ *
+ * Returns 404 for missing, expired, or revoked tokens to avoid leaking
+ * shape information to a probe.
+ */
+export async function GET(_req: Request, ctx: { params: Promise<{ token: string }> }) {
+  const { token } = await ctx.params
+  const link = loadShareLinkByToken(token)
+  if (!link || !isShareLinkActive(link)) {
+    return new Response('Not found', { status: 404 })
+  }
+  const payload = resolveSharedEntity(link)
+  if (!payload) {
+    return new Response('Not found', { status: 404 })
+  }
+  if (payload.kind === 'skill') {
+    return new Response(payload.content, {
+      status: 200,
+      headers: {
+        'content-type': 'text/markdown; charset=utf-8',
+        'cache-control': 'public, max-age=60',
+        'x-skill-name': encodeURIComponent(payload.name),
+      },
+    })
+  }
+  if (payload.kind === 'mission') {
+    const lines: string[] = []
+    lines.push(`# ${payload.title}`, '')
+    if (payload.goal) lines.push(payload.goal, '')
+    if (payload.successCriteria.length > 0) {
+      lines.push('## Success criteria', '')
+      for (const c of payload.successCriteria) lines.push(`- ${c}`)
+      lines.push('')
+    }
+    if (payload.milestones.length > 0) {
+      lines.push('## Milestones', '')
+      for (const m of payload.milestones) {
+        lines.push(`- ${new Date(m.at).toISOString().slice(0, 19).replace('T', ' ')}: ${m.note}`)
+      }
+      lines.push('')
+    }
+    return new Response(lines.join('\n'), {
+      status: 200,
+      headers: {
+        'content-type': 'text/markdown; charset=utf-8',
+        'cache-control': 'public, max-age=60',
+      },
+    })
+  }
+  // session
+  const lines: string[] = []
+  lines.push(`# ${payload.name}`, '')
+  if (payload.agentName) lines.push(`Agent: ${payload.agentName}`, '')
+  for (const m of payload.messages) {
+    lines.push(`### ${m.role}`, '', m.text, '')
+  }
+  return new Response(lines.join('\n'), {
+    status: 200,
+    headers: {
+      'content-type': 'text/markdown; charset=utf-8',
+      'cache-control': 'public, max-age=60',
+    },
+  })
+}

package/src/app/api/s/[token]/route.ts ADDED Viewed

@@ -0,0 +1,37 @@
+import { NextResponse } from 'next/server'
+import {
+  isShareLinkActive,
+  loadShareLinkByToken,
+} from '@/lib/server/sharing/share-link-repository'
+import { resolveSharedEntity } from '@/lib/server/sharing/share-resolver'
+export const dynamic = 'force-dynamic'
+/**
+ * Public, unauthenticated fetch of a shared entity by token.
+ *
+ * Returns the scrubbed payload shape (secrets and credentials are never
+ * loaded into the resolver). A 404 is returned for unknown, expired, or
+ * revoked tokens to avoid leaking validity to a probe.
+ */
+export async function GET(_req: Request, ctx: { params: Promise<{ token: string }> }) {
+  const { token } = await ctx.params
+  const link = loadShareLinkByToken(token)
+  if (!link || !isShareLinkActive(link)) {
+    return NextResponse.json({ error: 'not_found' }, { status: 404 })
+  }
+  const payload = resolveSharedEntity(link)
+  if (!payload) {
+    return NextResponse.json({ error: 'entity_missing' }, { status: 404 })
+  }
+  return NextResponse.json({
+    share: {
+      id: link.id,
+      entityType: link.entityType,
+      label: link.label,
+      createdAt: link.createdAt,
+      expiresAt: link.expiresAt,
+    },
+    payload,
+  })
+}

package/src/app/api/share/[id]/route.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import { NextResponse } from 'next/server'
+import {
+  loadShareLinkById,
+  revokeShareLink,
+  deleteShareLink,
+} from '@/lib/server/sharing/share-link-repository'
+export const dynamic = 'force-dynamic'
+export async function GET(_req: Request, ctx: { params: Promise<{ id: string }> }) {
+  const { id } = await ctx.params
+  const link = loadShareLinkById(id)
+  if (!link) return NextResponse.json({ error: 'not_found' }, { status: 404 })
+  return NextResponse.json(link)
+}
+export async function DELETE(req: Request, ctx: { params: Promise<{ id: string }> }) {
+  const { id } = await ctx.params
+  const { searchParams } = new URL(req.url)
+  const hard = searchParams.get('hard') === 'true'
+  if (hard) {
+    deleteShareLink(id)
+    return NextResponse.json({ ok: true, deleted: true })
+  }
+  const revoked = revokeShareLink(id)
+  if (!revoked) return NextResponse.json({ error: 'not_found' }, { status: 404 })
+  return NextResponse.json(revoked)
+}

package/src/app/api/share/route.ts ADDED Viewed

@@ -0,0 +1,53 @@
+import { NextResponse } from 'next/server'
+import { z } from 'zod'
+import {
+  createShareLink,
+  listShareLinks,
+  type ShareEntityType,
+} from '@/lib/server/sharing/share-link-repository'
+import { errorMessage } from '@/lib/shared-utils'
+export const dynamic = 'force-dynamic'
+const MintSchema = z.object({
+  entityType: z.enum(['mission', 'skill', 'session']),
+  entityId: z.string().min(1),
+  expiresInSec: z.number().int().positive().nullable().optional(),
+  label: z.string().trim().max(120).nullable().optional(),
+})
+export async function GET(req: Request) {
+  const { searchParams } = new URL(req.url)
+  const entityType = searchParams.get('entityType') as ShareEntityType | null
+  const entityId = searchParams.get('entityId')
+  let links = listShareLinks()
+  if (entityType) links = links.filter((l) => l.entityType === entityType)
+  if (entityId) links = links.filter((l) => l.entityId === entityId)
+  // Newest first
+  links.sort((a, b) => b.createdAt - a.createdAt)
+  return NextResponse.json(links)
+}
+export async function POST(req: Request) {
+  try {
+    const body: unknown = await req.json()
+    const parsed = MintSchema.safeParse(body)
+    if (!parsed.success) {
+      return NextResponse.json(
+        { error: parsed.error.issues.map((i) => i.message).join(', ') },
+        { status: 400 },
+      )
+    }
+    const link = createShareLink({
+      entityType: parsed.data.entityType,
+      entityId: parsed.data.entityId,
+      expiresInSec: parsed.data.expiresInSec ?? null,
+      label: parsed.data.label ?? null,
+    })
+    return NextResponse.json(link)
+  } catch (err) {
+    return NextResponse.json({ error: errorMessage(err) }, { status: 500 })
+  }
+}

package/src/app/api/usage/live/route.ts ADDED Viewed

@@ -0,0 +1,94 @@
+import { NextResponse } from 'next/server'
+import { loadUsage, loadSessions } from '@/lib/server/storage'
+import type { UsageRecord } from '@/types'
+export const dynamic = 'force-dynamic'
+type SessionSnapshot = {
+  id?: string
+  agentId?: string
+  createdAt?: number
+  lastActiveAt?: number
+  messages?: unknown[]
+}
+interface LiveUsage {
+  sessionId: string
+  records: number
+  totalTokens: number
+  inputTokens: number
+  outputTokens: number
+  estimatedCost: number
+  firstAt: number | null
+  lastAt: number | null
+  wallclockMs: number
+  turns: number
+}
+function summarize(sessionId: string, records: UsageRecord[], session: SessionSnapshot | undefined): LiveUsage {
+  let totalTokens = 0
+  let inputTokens = 0
+  let outputTokens = 0
+  let estimatedCost = 0
+  let firstAt: number | null = null
+  let lastAt: number | null = null
+  for (const r of records) {
+    totalTokens += r.totalTokens || 0
+    inputTokens += r.inputTokens || 0
+    outputTokens += r.outputTokens || 0
+    estimatedCost += r.estimatedCost || 0
+    const ts = r.timestamp || 0
+    if (ts > 0) {
+      if (firstAt === null || ts < firstAt) firstAt = ts
+      if (lastAt === null || ts > lastAt) lastAt = ts
+    }
+  }
+  const turns = Array.isArray(session?.messages) ? session!.messages!.length : records.length
+  const wallStart = session?.createdAt ?? firstAt ?? 0
+  const wallEnd = session?.lastActiveAt ?? lastAt ?? Date.now()
+  const wallclockMs = wallStart > 0 ? Math.max(0, wallEnd - wallStart) : 0
+  return {
+    sessionId,
+    records: records.length,
+    totalTokens,
+    inputTokens,
+    outputTokens,
+    estimatedCost: Math.round(estimatedCost * 10000) / 10000,
+    firstAt,
+    lastAt,
+    wallclockMs,
+    turns,
+  }
+}
+export async function GET(req: Request) {
+  const { searchParams } = new URL(req.url)
+  const sessionId = searchParams.get('sessionId')?.trim()
+  const usage = loadUsage() as Record<string, UsageRecord[]>
+  const sessions = loadSessions() as Record<string, SessionSnapshot>
+  if (sessionId) {
+    const records = usage[sessionId] ?? []
+    const session = sessions[sessionId]
+    return NextResponse.json(summarize(sessionId, records, session))
+  }
+  // Without sessionId, return the 10 most recently active sessions
+  const ids = Object.keys(usage)
+  const recent = ids
+    .map((id) => {
+      const records = usage[id] ?? []
+      const last = records.reduce((m, r) => Math.max(m, r.timestamp || 0), 0)
+      return { id, last }
+    })
+    .sort((a, b) => b.last - a.last)
+    .slice(0, 10)
+  return NextResponse.json(
+    recent.map(({ id }) => summarize(id, usage[id] ?? [], sessions[id])),
+  )
+}