npm - @swarmclawai/swarmclaw - Versions diffs - 1.2.1 → 1.2.2 - Mend

@swarmclawai/swarmclaw 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (144) hide show

package/README.md +9 -0
package/package.json +2 -2
package/skills/coding-agent/SKILL.md +111 -0
package/skills/github/SKILL.md +140 -0
package/skills/nano-banana-pro/SKILL.md +62 -0
package/skills/nano-banana-pro/scripts/generate_image.py +235 -0
package/skills/nano-pdf/SKILL.md +53 -0
package/skills/openai-image-gen/SKILL.md +78 -0
package/skills/openai-image-gen/scripts/gen.py +328 -0
package/skills/resourceful-problem-solving/SKILL.md +49 -0
package/skills/skill-creator/SKILL.md +147 -0
package/skills/skill-creator/scripts/init_skill.py +378 -0
package/skills/skill-creator/scripts/quick_validate.py +159 -0
package/skills/summarize/SKILL.md +77 -0
package/src/app/api/auth/route.ts +20 -5
package/src/app/api/chats/[id]/devserver/route.ts +13 -19
package/src/app/api/chats/[id]/messages/route.ts +13 -15
package/src/app/api/chats/[id]/route.ts +9 -10
package/src/app/api/chats/[id]/stop/route.ts +5 -7
package/src/app/api/chats/messages-route.test.ts +8 -6
package/src/app/api/chats/route.ts +9 -10
package/src/app/api/ip/route.ts +2 -2
package/src/app/api/preview-server/route.ts +1 -1
package/src/app/api/projects/[id]/route.ts +7 -46
package/src/components/chat/chat-area.tsx +45 -23
package/src/components/chat/message-bubble.test.ts +35 -0
package/src/components/chat/message-bubble.tsx +19 -9
package/src/components/chat/message-list.tsx +37 -3
package/src/components/input/chat-input.tsx +34 -14
package/src/instrumentation.ts +1 -1
package/src/lib/chat/assistant-render-id.ts +3 -0
package/src/lib/chat/chat-streaming-state.test.ts +42 -3
package/src/lib/chat/chat-streaming-state.ts +20 -8
package/src/lib/chat/queued-message-queue.test.ts +23 -1
package/src/lib/chat/queued-message-queue.ts +11 -2
package/src/lib/providers/cli-utils.test.ts +124 -0
package/src/lib/server/activity/activity-log.ts +21 -0
package/src/lib/server/agents/agent-availability.test.ts +10 -5
package/src/lib/server/agents/agent-cascade.ts +79 -59
package/src/lib/server/agents/agent-registry.ts +3 -1
package/src/lib/server/agents/agent-repository.ts +90 -0
package/src/lib/server/agents/delegation-job-repository.ts +53 -0
package/src/lib/server/agents/delegation-jobs.ts +11 -4
package/src/lib/server/agents/guardian-checkpoint-repository.ts +35 -0
package/src/lib/server/agents/guardian.ts +2 -2
package/src/lib/server/agents/main-agent-loop.ts +10 -3
package/src/lib/server/agents/main-loop-state-repository.ts +38 -0
package/src/lib/server/agents/subagent-runtime.ts +9 -6
package/src/lib/server/agents/subagent-swarm.ts +3 -2
package/src/lib/server/agents/task-session.ts +3 -4
package/src/lib/server/approvals/approval-repository.ts +30 -0
package/src/lib/server/autonomy/supervisor-incident-repository.ts +42 -0
package/src/lib/server/chat-execution/chat-execution-types.ts +38 -0
package/src/lib/server/chat-execution/chat-execution-utils.ts +1 -1
package/src/lib/server/chat-execution/chat-execution.ts +84 -1926
package/src/lib/server/chat-execution/chat-turn-finalization.ts +620 -0
package/src/lib/server/chat-execution/chat-turn-partial-persistence.ts +221 -0
package/src/lib/server/chat-execution/chat-turn-preflight.ts +133 -0
package/src/lib/server/chat-execution/chat-turn-preparation.ts +817 -0
package/src/lib/server/chat-execution/chat-turn-stream-execution.ts +296 -0
package/src/lib/server/chat-execution/chat-turn-tool-routing.ts +5 -5
package/src/lib/server/chat-execution/message-classifier.test.ts +329 -0
package/src/lib/server/chat-execution/post-stream-finalization.ts +1 -1
package/src/lib/server/chat-execution/prompt-builder.ts +11 -0
package/src/lib/server/chat-execution/prompt-sections.ts +5 -6
package/src/lib/server/chat-execution/situational-awareness.ts +12 -7
package/src/lib/server/chat-execution/stream-agent-chat.ts +16 -13
package/src/lib/server/chatrooms/chatroom-repository.ts +32 -0
package/src/lib/server/connectors/connector-repository.ts +58 -0
package/src/lib/server/connectors/runtime-state.test.ts +117 -0
package/src/lib/server/credentials/credential-repository.ts +7 -0
package/src/lib/server/gateways/gateway-profile-repository.ts +4 -0
package/src/lib/server/memory/memory-abstract.test.ts +59 -0
package/src/lib/server/missions/mission-repository.ts +74 -0
package/src/lib/server/missions/mission-service/actions.ts +6 -0
package/src/lib/server/missions/mission-service/bindings.ts +9 -0
package/src/lib/server/missions/mission-service/context.ts +4 -0
package/src/lib/server/missions/mission-service/core.ts +2269 -0
package/src/lib/server/missions/mission-service/queries.ts +12 -0
package/src/lib/server/missions/mission-service/recovery.ts +5 -0
package/src/lib/server/missions/mission-service/ticks.ts +9 -0
package/src/lib/server/missions/mission-service.test.ts +9 -2
package/src/lib/server/missions/mission-service.ts +6 -2266
package/src/lib/server/persistence/repository-utils.ts +154 -0
package/src/lib/server/persistence/storage-context.ts +51 -0
package/src/lib/server/persistence/transaction.ts +1 -0
package/src/lib/server/projects/project-repository.ts +36 -0
package/src/lib/server/projects/project-service.ts +79 -0
package/src/lib/server/protocols/protocol-normalization.test.ts +6 -4
package/src/lib/server/runtime/alert-dispatch.ts +1 -1
package/src/lib/server/runtime/daemon-policy.ts +1 -1
package/src/lib/server/runtime/daemon-state/core.ts +1570 -0
package/src/lib/server/runtime/daemon-state/health.ts +6 -0
package/src/lib/server/runtime/daemon-state/policy.ts +7 -0
package/src/lib/server/runtime/daemon-state/supervisor.ts +6 -0
package/src/lib/server/runtime/daemon-state.test.ts +48 -0
package/src/lib/server/runtime/daemon-state.ts +3 -1470
package/src/lib/server/runtime/estop-repository.ts +4 -0
package/src/lib/server/runtime/estop.ts +3 -1
package/src/lib/server/runtime/heartbeat-service.test.ts +2 -2
package/src/lib/server/runtime/heartbeat-service.ts +55 -34
package/src/lib/server/runtime/heartbeat-wake.ts +6 -4
package/src/lib/server/runtime/idle-window.ts +2 -2
package/src/lib/server/runtime/network.ts +11 -0
package/src/lib/server/runtime/orchestrator-events.ts +2 -2
package/src/lib/server/runtime/queue/claims.ts +4 -0
package/src/lib/server/runtime/queue/core.ts +2079 -0
package/src/lib/server/runtime/queue/execution.ts +7 -0
package/src/lib/server/runtime/queue/followups.ts +4 -0
package/src/lib/server/runtime/queue/queries.ts +12 -0
package/src/lib/server/runtime/queue/recovery.ts +7 -0
package/src/lib/server/runtime/queue-recovery.test.ts +48 -13
package/src/lib/server/runtime/queue-repository.ts +17 -0
package/src/lib/server/runtime/queue.ts +5 -2061
package/src/lib/server/runtime/run-ledger.ts +6 -5
package/src/lib/server/runtime/run-repository.ts +73 -0
package/src/lib/server/runtime/runtime-lock-repository.ts +8 -0
package/src/lib/server/runtime/runtime-settings.ts +1 -1
package/src/lib/server/runtime/runtime-state.ts +99 -0
package/src/lib/server/runtime/scheduler.ts +4 -2
package/src/lib/server/runtime/session-run-manager/cancellation.ts +157 -0
package/src/lib/server/runtime/session-run-manager/drain.ts +246 -0
package/src/lib/server/runtime/session-run-manager/enqueue.ts +287 -0
package/src/lib/server/runtime/session-run-manager/queries.ts +117 -0
package/src/lib/server/runtime/session-run-manager/recovery.ts +238 -0
package/src/lib/server/runtime/session-run-manager/state.ts +441 -0
package/src/lib/server/runtime/session-run-manager/types.ts +74 -0
package/src/lib/server/runtime/session-run-manager.ts +72 -1377
package/src/lib/server/runtime/watch-job-repository.ts +35 -0
package/src/lib/server/runtime/watch-jobs.ts +3 -1
package/src/lib/server/schedules/schedule-repository.ts +42 -0
package/src/lib/server/sessions/session-repository.ts +85 -0
package/src/lib/server/settings/settings-repository.ts +25 -0
package/src/lib/server/skills/skill-discovery.test.ts +2 -2
package/src/lib/server/skills/skill-discovery.ts +2 -2
package/src/lib/server/skills/skill-repository.ts +14 -0
package/src/lib/server/storage.ts +13 -24
package/src/lib/server/tasks/task-repository.ts +54 -0
package/src/lib/server/usage/usage-repository.ts +30 -0
package/src/lib/server/webhooks/webhook-repository.ts +10 -0
package/src/lib/strip-internal-metadata.test.ts +42 -41
package/src/stores/use-chat-store.test.ts +54 -0
package/src/stores/use-chat-store.ts +21 -5
/package/{bundled-skills → skills}/google-workspace/SKILL.md +0 -0

package/README.md CHANGED Viewed

@@ -190,6 +190,15 @@ The building blocks are the same: **agents, tools, memory, delegation, schedules
 ## Release Notes
+### v1.2.2 Highlights
+- **Modular chat execution pipeline**: decomposed the monolithic chat-execution module into 6 focused stages (preflight, preparation, stream execution, partial persistence, finalization, types) for maintainability and testability.
+- **Repository pattern adoption**: extracted ~15 repository modules from `storage.ts`, giving each domain (agents, sessions, missions, credentials, tasks, etc.) its own data-access layer.
+- **Runtime state encapsulation**: moved process-local state (active sessions, dev servers) from storage into `runtime-state.ts` with proper HMR singleton usage.
+- **Streaming state improvements**: stable assistant render IDs, better live-row display logic, and smoother streaming phase transitions in the chat UI.
+- **8 new skills**: coding-agent, github, nano-banana-pro, nano-pdf, openai-image-gen, resourceful-problem-solving, skill-creator, summarize.
+- **Lint baseline improvements**: reduced lint violations from 414 to 396 (-18).
 ### v1.2.1 Highlights
 - **System health endpoint**: new `/api/system/status` route returns lightweight health summary for external monitoring and uptime checks.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@swarmclawai/swarmclaw",
-  "version": "1.2.1",
+  "version": "1.2.2",
   "description": "Self-hosted AI runtime for OpenClaw, delegation, autonomy, runtime skills, crypto wallets, and chat platform connectors.",
   "license": "MIT",
   "publishConfig": {
@@ -30,7 +30,7 @@
   },
   "files": [
     "bin/",
-    "bundled-skills/",
+    "skills/",
     "src/",
     "public/",
     "Dockerfile.sandbox-browser",

package/skills/coding-agent/SKILL.md ADDED Viewed

@@ -0,0 +1,111 @@
+---
+name: coding-agent
+description: 'Delegate coding tasks to external coding agents (Claude Code, Codex, Pi, OpenCode) via shell. Use when: (1) building new features or apps in a separate project, (2) reviewing PRs, (3) refactoring large codebases, (4) iterative coding that needs file exploration. NOT for: simple one-liner fixes (just edit directly), reading code (use read/file tools), or work inside the SwarmClaw workspace itself.'
+metadata:
+  {
+    "openclaw": { "emoji": "🧩", "requires": { "anyBins": ["claude", "codex", "opencode", "pi"] } },
+  }
+---
+# Coding Agent
+Delegate coding tasks to external coding agents via shell tools.
+## Agent Execution Modes
+### Claude Code (recommended)
+Use `--print --permission-mode bypassPermissions` for non-interactive execution:
+```bash
+cd /path/to/project && claude --permission-mode bypassPermissions --print 'Your task here'
+```
+For background execution, use the shell tool's background mode.
+**Do NOT use PTY mode with Claude Code** — `--print` mode keeps full tool access and avoids interactive confirmation dialogs.
+### Codex
+Codex requires a git repository and PTY mode:
+```bash
+# Quick one-shot (auto-approves changes)
+cd /path/to/project && codex exec --full-auto 'Build a dark mode toggle'
+# Codex refuses to run outside a git directory. For scratch work:
+SCRATCH=$(mktemp -d) && cd $SCRATCH && git init && codex exec "Your prompt"
+```
+### Pi Coding Agent
+```bash
+# Install: npm install -g @mariozechner/pi-coding-agent
+cd /path/to/project && pi 'Your task'
+# Non-interactive mode
+pi -p 'Summarize src/'
+# Different provider/model
+pi --provider openai --model gpt-4o-mini -p 'Your task'
+```
+### OpenCode
+```bash
+cd /path/to/project && opencode run 'Your task'
+```
+## PR Reviews
+Clone to a temp folder or use git worktree — never review PRs in the SwarmClaw project directory:
+```bash
+# Clone to temp for safe review
+REVIEW_DIR=$(mktemp -d)
+git clone https://github.com/user/repo.git $REVIEW_DIR
+cd $REVIEW_DIR && gh pr checkout 130
+codex review --base origin/main
+# Or use git worktree
+git worktree add /tmp/pr-130-review pr-130-branch
+cd /tmp/pr-130-review && codex review --base main
+```
+## Parallel Issue Fixing
+Use git worktrees to fix multiple issues in parallel:
+```bash
+# Create worktrees
+git worktree add -b fix/issue-78 /tmp/issue-78 main
+git worktree add -b fix/issue-99 /tmp/issue-99 main
+# Launch agents (use background shell execution)
+cd /tmp/issue-78 && codex --yolo 'Fix issue #78: <description>. Commit when done.'
+cd /tmp/issue-99 && codex --yolo 'Fix issue #99: <description>. Commit when done.'
+# Create PRs after
+cd /tmp/issue-78 && git push -u origin fix/issue-78
+gh pr create --repo user/repo --head fix/issue-78 --title "fix: ..." --body "..."
+# Cleanup
+git worktree remove /tmp/issue-78
+git worktree remove /tmp/issue-99
+```
+## Rules
+1. **Use the right execution mode per agent**: Claude Code uses `--print` (no PTY); Codex/Pi/OpenCode may need interactive terminal.
+2. **Respect tool choice** — if the user asks for Codex, use Codex. Don't silently switch agents.
+3. **Be patient** — don't kill sessions because they seem slow.
+4. **Monitor progress** — check output periodically without interfering.
+5. **Never run coding agents inside the SwarmClaw project directory** — use a separate project directory or temp folder.
+## Progress Updates
+When spawning coding agents in the background:
+- Send a short message when you start (what's running, where).
+- Update only when something changes (milestone, error, completion).
+- If you kill a session, say so immediately and explain why.

package/skills/github/SKILL.md ADDED Viewed

@@ -0,0 +1,140 @@
+---
+name: github
+description: "GitHub operations via `gh` CLI: issues, PRs, CI runs, code review, API queries. Use when: (1) checking PR status or CI, (2) creating/commenting on issues, (3) listing/filtering PRs or issues, (4) viewing run logs. NOT for: local git operations (use git directly), non-GitHub repos, or cloning (use git clone)."
+metadata:
+  {
+    "openclaw":
+      {
+        "emoji": "🐙",
+        "requires": { "bins": ["gh"] },
+        "install":
+          [
+            {
+              "id": "brew",
+              "kind": "brew",
+              "formula": "gh",
+              "bins": ["gh"],
+              "label": "Install GitHub CLI (brew)",
+            },
+            {
+              "id": "apt",
+              "kind": "apt",
+              "package": "gh",
+              "bins": ["gh"],
+              "label": "Install GitHub CLI (apt)",
+            },
+          ],
+      },
+  }
+---
+# GitHub Skill
+Use the `gh` CLI to interact with GitHub repositories, issues, PRs, and CI.
+## Setup
+```bash
+# Authenticate (one-time)
+gh auth login
+# Verify
+gh auth status
+```
+## Common Commands
+### Pull Requests
+```bash
+# List PRs
+gh pr list --repo owner/repo
+# Check CI status
+gh pr checks 55 --repo owner/repo
+# View PR details
+gh pr view 55 --repo owner/repo
+# Create PR
+gh pr create --title "feat: add feature" --body "Description"
+# Merge PR
+gh pr merge 55 --squash --repo owner/repo
+```
+### Issues
+```bash
+# List issues
+gh issue list --repo owner/repo --state open
+# Create issue
+gh issue create --title "Bug: something broken" --body "Details..."
+# Close issue
+gh issue close 42 --repo owner/repo
+```
+### CI/Workflow Runs
+```bash
+# List recent runs
+gh run list --repo owner/repo --limit 10
+# View specific run
+gh run view <run-id> --repo owner/repo
+# View failed step logs only
+gh run view <run-id> --repo owner/repo --log-failed
+# Re-run failed jobs
+gh run rerun <run-id> --failed --repo owner/repo
+```
+### API Queries
+```bash
+# Get PR with specific fields
+gh api repos/owner/repo/pulls/55 --jq '.title, .state, .user.login'
+# List all labels
+gh api repos/owner/repo/labels --jq '.[].name'
+# Get repo stats
+gh api repos/owner/repo --jq '{stars: .stargazers_count, forks: .forks_count}'
+```
+## JSON Output
+Most commands support `--json` for structured output with `--jq` filtering:
+```bash
+gh issue list --repo owner/repo --json number,title --jq '.[] | "\(.number): \(.title)"'
+gh pr list --json number,title,state,mergeable --jq '.[] | select(.mergeable == "MERGEABLE")'
+```
+## Templates
+### PR Review Summary
+```bash
+PR=55 REPO=owner/repo
+echo "## PR #$PR Summary"
+gh pr view $PR --repo $REPO --json title,body,author,additions,deletions,changedFiles \
+  --jq '"**\(.title)** by @\(.author.login)\n\n\(.body)\n\n+\(.additions) -\(.deletions) across \(.changedFiles) files"'
+gh pr checks $PR --repo $REPO
+```
+### Issue Triage
+```bash
+gh issue list --repo owner/repo --state open --json number,title,labels,createdAt \
+  --jq '.[] | "[\(.number)] \(.title) - \([.labels[].name] | join(", ")) (\(.createdAt[:10]))"'
+```
+## Notes
+- Always specify `--repo owner/repo` when not in a git directory.
+- Use URLs directly: `gh pr view https://github.com/owner/repo/pull/55`
+- Rate limits apply; use `gh api --cache 1h` for repeated queries.

package/skills/nano-banana-pro/SKILL.md ADDED Viewed

@@ -0,0 +1,62 @@
+---
+name: nano-banana-pro
+description: Generate or edit images via Gemini 3 Pro Image (Nano Banana Pro). Use when asked to create, generate, or edit images and a Gemini API key is available. Supports text-to-image generation, single-image editing, and multi-image composition (up to 14 images).
+metadata:
+  {
+    "openclaw":
+      {
+        "emoji": "🍌",
+        "requires": { "bins": ["uv"], "env": ["GEMINI_API_KEY"] },
+        "primaryEnv": "GEMINI_API_KEY",
+        "install":
+          [
+            {
+              "id": "uv-brew",
+              "kind": "brew",
+              "formula": "uv",
+              "bins": ["uv"],
+              "label": "Install uv (brew)",
+            },
+          ],
+      },
+  }
+---
+# Nano Banana Pro (Gemini 3 Pro Image)
+Use the bundled script to generate or edit images.
+## Generate
+```bash
+uv run {baseDir}/scripts/generate_image.py --prompt "your image description" --filename "output.png" --resolution 1K
+```
+## Edit (Single Image)
+```bash
+uv run {baseDir}/scripts/generate_image.py --prompt "edit instructions" --filename "output.png" -i "/path/in.png" --resolution 2K
+```
+## Multi-Image Composition (up to 14 images)
+```bash
+uv run {baseDir}/scripts/generate_image.py --prompt "combine these into one scene" --filename "output.png" -i img1.png -i img2.png -i img3.png
+```
+## API Key
+Set `GEMINI_API_KEY` as an environment variable, or pass `--api-key <KEY>` to the script.
+## Aspect Ratio (optional)
+```bash
+uv run {baseDir}/scripts/generate_image.py --prompt "portrait photo" --filename "output.png" --aspect-ratio 9:16
+```
+## Notes
+- Resolutions: `1K` (default), `2K`, `4K`.
+- Aspect ratios: `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`. Without `--aspect-ratio`, the model picks freely.
+- Use timestamps in filenames for uniqueness: `yyyy-mm-dd-hh-mm-ss-name.png`.
+- Do not read the image back into context; report the saved path only.

package/skills/nano-banana-pro/scripts/generate_image.py ADDED Viewed

@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "google-genai>=1.0.0",
+#     "pillow>=10.0.0",
+# ]
+# ///
+"""
+Generate images using Google's Nano Banana Pro (Gemini 3 Pro Image) API.
+Usage:
+    uv run generate_image.py --prompt "your image description" --filename "output.png" [--resolution 1K|2K|4K] [--api-key KEY]
+Multi-image editing (up to 14 images):
+    uv run generate_image.py --prompt "combine these images" --filename "output.png" -i img1.png -i img2.png -i img3.png
+"""
+import argparse
+import os
+import sys
+from pathlib import Path
+SUPPORTED_ASPECT_RATIOS = [
+    "1:1",
+    "2:3",
+    "3:2",
+    "3:4",
+    "4:3",
+    "4:5",
+    "5:4",
+    "9:16",
+    "16:9",
+    "21:9",
+]
+def get_api_key(provided_key: str | None) -> str | None:
+    """Get API key from argument first, then environment."""
+    if provided_key:
+        return provided_key
+    return os.environ.get("GEMINI_API_KEY")
+def auto_detect_resolution(max_input_dim: int) -> str:
+    """Infer output resolution from the largest input image dimension."""
+    if max_input_dim >= 3000:
+        return "4K"
+    if max_input_dim >= 1500:
+        return "2K"
+    return "1K"
+def choose_output_resolution(
+    requested_resolution: str | None,
+    max_input_dim: int,
+    has_input_images: bool,
+) -> tuple[str, bool]:
+    """Choose final resolution and whether it was auto-detected.
+    Auto-detection is only applied when the user did not pass --resolution.
+    """
+    if requested_resolution is not None:
+        return requested_resolution, False
+    if has_input_images and max_input_dim > 0:
+        return auto_detect_resolution(max_input_dim), True
+    return "1K", False
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate images using Nano Banana Pro (Gemini 3 Pro Image)"
+    )
+    parser.add_argument(
+        "--prompt", "-p",
+        required=True,
+        help="Image description/prompt"
+    )
+    parser.add_argument(
+        "--filename", "-f",
+        required=True,
+        help="Output filename (e.g., sunset-mountains.png)"
+    )
+    parser.add_argument(
+        "--input-image", "-i",
+        action="append",
+        dest="input_images",
+        metavar="IMAGE",
+        help="Input image path(s) for editing/composition. Can be specified multiple times (up to 14 images)."
+    )
+    parser.add_argument(
+        "--resolution", "-r",
+        choices=["1K", "2K", "4K"],
+        default=None,
+        help="Output resolution: 1K, 2K, or 4K. If omitted with input images, auto-detect from largest image dimension."
+    )
+    parser.add_argument(
+        "--aspect-ratio", "-a",
+        choices=SUPPORTED_ASPECT_RATIOS,
+        default=None,
+        help=f"Output aspect ratio (default: model decides). Options: {', '.join(SUPPORTED_ASPECT_RATIOS)}"
+    )
+    parser.add_argument(
+        "--api-key", "-k",
+        help="Gemini API key (overrides GEMINI_API_KEY env var)"
+    )
+    args = parser.parse_args()
+    # Get API key
+    api_key = get_api_key(args.api_key)
+    if not api_key:
+        print("Error: No API key provided.", file=sys.stderr)
+        print("Please either:", file=sys.stderr)
+        print("  1. Provide --api-key argument", file=sys.stderr)
+        print("  2. Set GEMINI_API_KEY environment variable", file=sys.stderr)
+        sys.exit(1)
+    # Import here after checking API key to avoid slow import on error
+    from google import genai
+    from google.genai import types
+    from PIL import Image as PILImage
+    # Initialise client
+    client = genai.Client(api_key=api_key)
+    # Set up output path
+    output_path = Path(args.filename)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    # Load input images if provided (up to 14 supported by Nano Banana Pro)
+    input_images = []
+    max_input_dim = 0
+    if args.input_images:
+        if len(args.input_images) > 14:
+            print(f"Error: Too many input images ({len(args.input_images)}). Maximum is 14.", file=sys.stderr)
+            sys.exit(1)
+        for img_path in args.input_images:
+            try:
+                with PILImage.open(img_path) as img:
+                    copied = img.copy()
+                    width, height = copied.size
+                input_images.append(copied)
+                print(f"Loaded input image: {img_path}")
+                # Track largest dimension for auto-resolution
+                max_input_dim = max(max_input_dim, width, height)
+            except Exception as e:
+                print(f"Error loading input image '{img_path}': {e}", file=sys.stderr)
+                sys.exit(1)
+    output_resolution, auto_detected = choose_output_resolution(
+        requested_resolution=args.resolution,
+        max_input_dim=max_input_dim,
+        has_input_images=bool(input_images),
+    )
+    if auto_detected:
+        print(
+            f"Auto-detected resolution: {output_resolution} "
+            f"(from max input dimension {max_input_dim})"
+        )
+    # Build contents (images first if editing, prompt only if generating)
+    if input_images:
+        contents = [*input_images, args.prompt]
+        img_count = len(input_images)
+        print(f"Processing {img_count} image{'s' if img_count > 1 else ''} with resolution {output_resolution}...")
+    else:
+        contents = args.prompt
+        print(f"Generating image with resolution {output_resolution}...")
+    try:
+        # Build image config with optional aspect ratio
+        image_cfg_kwargs = {"image_size": output_resolution}
+        if args.aspect_ratio:
+            image_cfg_kwargs["aspect_ratio"] = args.aspect_ratio
+        response = client.models.generate_content(
+            model="gemini-3-pro-image-preview",
+            contents=contents,
+            config=types.GenerateContentConfig(
+                response_modalities=["TEXT", "IMAGE"],
+                image_config=types.ImageConfig(**image_cfg_kwargs)
+            )
+        )
+        # Process response and convert to PNG
+        image_saved = False
+        for part in response.parts:
+            if part.text is not None:
+                print(f"Model response: {part.text}")
+            elif part.inline_data is not None:
+                # Convert inline data to PIL Image and save as PNG
+                from io import BytesIO
+                # inline_data.data is already bytes, not base64
+                image_data = part.inline_data.data
+                if isinstance(image_data, str):
+                    # If it's a string, it might be base64
+                    import base64
+                    image_data = base64.b64decode(image_data)
+                image = PILImage.open(BytesIO(image_data))
+                # Ensure RGB mode for PNG (convert RGBA to RGB with white background if needed)
+                if image.mode == 'RGBA':
+                    rgb_image = PILImage.new('RGB', image.size, (255, 255, 255))
+                    rgb_image.paste(image, mask=image.split()[3])
+                    rgb_image.save(str(output_path), 'PNG')
+                elif image.mode == 'RGB':
+                    image.save(str(output_path), 'PNG')
+                else:
+                    image.convert('RGB').save(str(output_path), 'PNG')
+                image_saved = True
+        if image_saved:
+            full_path = output_path.resolve()
+            print(f"\nImage saved: {full_path}")
+            # OpenClaw parses MEDIA: tokens and will attach the file on
+            # supported chat providers. Emit the canonical MEDIA:<path> form.
+            print(f"MEDIA:{full_path}")
+        else:
+            print("Error: No image was generated in the response.", file=sys.stderr)
+            sys.exit(1)
+    except Exception as e:
+        print(f"Error generating image: {e}", file=sys.stderr)
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

package/skills/nano-pdf/SKILL.md ADDED Viewed

@@ -0,0 +1,53 @@
+---
+name: nano-pdf
+description: Edit or create PDFs with natural-language instructions using the nano-pdf CLI. Use when asked to make a PDF, edit a PDF, add pages, change text in a PDF, or convert content to PDF format.
+metadata:
+  {
+    "openclaw":
+      {
+        "emoji": "📄",
+        "requires": { "bins": ["nano-pdf"] },
+        "install":
+          [
+            {
+              "id": "uv",
+              "kind": "uv",
+              "package": "nano-pdf",
+              "bins": ["nano-pdf"],
+              "label": "Install nano-pdf (uv)",
+            },
+          ],
+      },
+  }
+---
+# nano-pdf
+Use `nano-pdf` to apply edits to a specific page in a PDF using a natural-language instruction.
+## Quick Start
+```bash
+nano-pdf edit deck.pdf 1 "Change the title to 'Q3 Results' and fix the typo in the subtitle"
+```
+## Creating a New PDF
+```bash
+nano-pdf create output.pdf "Create a one-page summary of quarterly results with a header, bullet points, and a footer"
+```
+## Usage in SwarmClaw
+When a user asks to create or edit a PDF:
+1. Check if `nano-pdf` is installed: `which nano-pdf`
+2. If not installed, install via `uv tool install nano-pdf` or `pip install nano-pdf`
+3. Run the appropriate command
+4. Report the output file path to the user
+## Notes
+- Page numbers are 0-based or 1-based depending on the tool's version; if the result looks off by one, retry with the other.
+- Always sanity-check the output PDF before reporting success.
+- For multi-page edits, run separate commands per page.