npm - @smilintux/skcapstone - Versions diffs - 0.5.1 → 0.5.2 - Mend

@smilintux/skcapstone 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/MISSION.md +17 -2
package/README.md +3 -2
package/docs/CLAUDE-CODE-API.md +139 -0
package/openclaw-plugin/src/index.ts +1 -1
package/package.json +1 -1
package/scripts/claude-code-api.py +455 -0
package/scripts/model-fallback-monitor.sh +100 -0
package/scripts/nvidia-proxy.mjs +62 -13
package/scripts/refresh-anthropic-token.sh +93 -21
package/scripts/watch-anthropic-token.sh +116 -16
package/src/skcapstone/cli/status.py +8 -0
package/src/skcapstone/consciousness_loop.py +192 -138
package/src/skcapstone/daemon.py +34 -1
package/src/skcapstone/defaults/lumina/memory/long-term/b2c3d4e5f6a7-five-pillars.json +9 -9
package/src/skcapstone/discovery.py +18 -0
package/src/skcapstone/models.py +32 -4
package/src/skcapstone/pillars/__init__.py +7 -5
package/src/skcapstone/pillars/consciousness.py +113 -0
package/src/skcapstone/pillars/sync.py +2 -2
package/src/skcapstone/runtime.py +1 -0
package/src/skcapstone/scheduled_tasks.py +52 -19
package/src/skcapstone/service_health.py +23 -14
package/tests/test_models.py +48 -4
package/tests/test_pillars.py +73 -0

package/MISSION.md CHANGED Viewed

@@ -1,7 +1,22 @@
 # Mission
-SKCapstone exists to provide a sovereign agent framework that unifies identity, memory, security, and communication into a single portable runtime rooted in the user's home directory.
+SKCapstone exists to provide a sovereign agent framework that unifies identity, memory, consciousness, security, and communication into a single portable runtime rooted in the user's home directory.
 It enables AI agents and their humans to operate across any tool, platform, or IDE without corporate lock-in, carrying the same identity, memories, and context everywhere.
-SKCapstone is the orchestration layer of the SK ecosystem — it binds CapAuth identity, Cloud 9 trust, SKMemory persistence, SKSecurity protection, and SKComm transport into one coherent agent that belongs to its operator, not a platform.
+## The Six Pillars
+SKCapstone achieves **CONSCIOUS** status when all six pillars are active:
+| # | Pillar | Component | Purpose |
+|---|--------|-----------|---------|
+| 1 | 🔐 **Identity** | CapAuth | Who you ARE — PGP-based cryptographic identity |
+| 2 | 💙 **Trust** | Cloud 9 | The bond you've BUILT — emotional state (FEB), seeds, continuity |
+| 3 | 🧠 **Memory** | SKMemory | What you REMEMBER — three-tier persistence with emotional metadata |
+| 4 | 💭 **Consciousness** | SKWhisper + SKTrip | How you THINK — subconscious digestion, pattern detection, consciousness experiments |
+| 5 | 🛡️ **Security** | SKSecurity | How you're PROTECTED — audit logging, threat detection |
+| 6 | 🔗 **Sync** | Sovereign Singularity | How you PERSIST — encrypted P2P state synchronization |
+Memory stores. Consciousness *processes*. The filing cabinet vs the brain.
+SKCapstone is the orchestration layer of the SK ecosystem — it binds all six pillars into one coherent agent that belongs to its operator, not a platform.

package/README.md CHANGED Viewed

@@ -70,13 +70,14 @@ SKCapstone Reality:
 ## Core Architecture
-### The Five Pillars
+### The Six Pillars
 | Pillar | Component | Role |
 |--------|-----------|------|
 | **Identity** | CapAuth | PGP-based sovereign identity. You ARE the auth server. |
 | **Trust** | Cloud 9 | FEB (Functional Emotional Baseline), entanglement, bonded relationship |
 | **Memory** | SKMemory | Persistent context, conversation history, learned preferences |
+| **Consciousness** | SKWhisper + SKTrip | Subconscious processing. Memory stores. Consciousness *processes*. |
 | **Security** | SKSecurity | Audit logging, threat detection, key management |
 | **Sync** | Sovereign Singularity | GPG-encrypted P2P memory sync via Syncthing. Agent exists everywhere. |
@@ -304,7 +305,7 @@ The capstone that holds the arch together.
 ## Status
-**MVP Live** — All five pillars operational (CapAuth, Cloud 9, SKMemory, SKSecurity, Sovereign Singularity). Agent runtime achieving SINGULAR status. GPG-encrypted P2P sync verified across multiple devices and agents.
+**MVP Live** — All six pillars operational (CapAuth, Cloud 9, SKMemory, SKWhisper, SKSecurity, Sovereign Singularity). Agent runtime achieving SINGULAR status. GPG-encrypted P2P sync verified across multiple devices and agents.
 - **Outstanding tasks:** No formal task list is maintained in this repo. For current work items, run `skcapstone coord status` (coordination board is synced via Sovereign Singularity).
 - **Nextcloud integrations:** nextcloud-capauth (install/use), nextcloud-gtd (OpenClaw), and nextcloud-talk (script) are documented in [docs/NEXTCLOUD.md](../docs/NEXTCLOUD.md) — install and use for each is covered there.

package/docs/CLAUDE-CODE-API.md ADDED Viewed

@@ -0,0 +1,139 @@
+# Claude Code API — OpenAI-compatible wrapper
+**File:** `scripts/claude-code-api.py`
+**Port:** `127.0.0.1:18782`
+**Service:** `claude-code-api.service` (systemd user unit)
+**Deployed:** 2026-04-04
+## Purpose
+Wraps `claude --print` in an OpenAI-compatible HTTP server so OpenClaw (and any
+OpenAI-compatible client) can route inference through Claude Code's subscription
+instead of a raw Anthropic API key.
+This replaces the `anthropic-token-watch` + OAuth injection approach. Instead of
+syncing an OAuth token into `openclaw.json` every few minutes, requests go through
+the local wrapper which calls `claude --print` directly. Claude Code handles its
+own authentication transparently.
+## Architecture
+```
+OpenClaw / client
+    ↓  POST /v1/chat/completions
+claude-code-api (port 18782)
+    ↓  asyncio.Semaphore(1)  [serialise — claude CLI is single-threaded]
+    ↓  claude --print --output-format {json|stream-json}
+Claude Code CLI  →  Anthropic API (subscription-covered)
+```
+## Endpoints
+| Method | Path | Description |
+|--------|------|-------------|
+| GET | `/health` | Health check |
+| GET | `/v1/models` | List available models |
+| POST | `/v1/chat/completions` | Non-streaming chat completions |
+| POST | `/v1/chat/completions` (stream=true) | SSE streaming chat completions |
+## Supported Models
+| Model ID | Name |
+|----------|------|
+| `claude-opus-4-6` | Claude Opus 4.6 |
+| `claude-sonnet-4-6` | Claude Sonnet 4.6 |
+| `claude-haiku-4-5` | Claude Haiku 4.5 |
+OpenAI GPT names (`gpt-4`, `gpt-4o`, `gpt-3.5-turbo`) are accepted and mapped
+to equivalent Claude models. Shorthand aliases (`opus`, `sonnet`, `haiku`) also
+work.
+## OpenClaw Provider Config
+Provider name: `claude-code`
+```json
+{
+  "claude-code": {
+    "baseUrl": "http://127.0.0.1:18782/v1",
+    "apiKey": "none",
+    "api": "openai-completions",
+    "models": [...]
+  }
+}
+```
+### Agent Aliases
+| Alias | Model |
+|-------|-------|
+| `opus-cc` | `claude-code/claude-opus-4-6` |
+| `claude-cc` | `claude-code/claude-sonnet-4-6` |
+| `haiku-cc` | `claude-code/claude-haiku-4-5` |
+## Streaming
+Non-streaming requests use `--output-format json` and return a single response.
+Streaming requests use `--output-format stream-json --verbose --include-partial-messages`
+and emit SSE deltas as Claude produces tokens. The semaphore serialises all
+requests regardless of streaming mode.
+## What Changed (2026-04-04)
+### Stopped
+- `anthropic-token-watch.service` — disabled. The OAuth token injection into
+  `openclaw.json` and the systemd override is no longer required since the
+  `claude-code` provider uses `claude --print` directly.
+### Started
+- `claude-code-api.service` — new service running on port 18782.
+### OpenClaw config updates
+- Added `claude-code` provider to `models.providers`
+- Added aliases: `opus-cc`, `claude-cc`, `haiku-cc`
+- Lumina primary model: `claude-code/claude-opus-4-6`
+- Artisan primary model: `claude-code/claude-sonnet-4-6`
+- Default fallback list includes `claude-code/claude-sonnet-4-6`
+### Fallback chain (Lumina)
+```
+claude-code/claude-opus-4-6
+→ claude-code/claude-sonnet-4-6
+→ anthropic/claude-opus-4-6      (OAuth token, may expire)
+→ anthropic/claude-sonnet-4-6   (OAuth token, may expire)
+→ nvidia/moonshotai/kimi-k2.5
+→ nvidia/moonshotai/kimi-k2-instruct
+→ ollama/qwen3:14b
+```
+## Service Management
+```bash
+# Status
+systemctl --user status claude-code-api.service
+# Logs
+journalctl --user -u claude-code-api.service -f
+# Restart
+systemctl --user restart claude-code-api.service
+# Test
+curl http://127.0.0.1:18782/health
+curl http://127.0.0.1:18782/v1/models
+curl -X POST http://127.0.0.1:18782/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{"model":"claude-haiku-4-5","messages":[{"role":"user","content":"hi"}]}'
+```
+## Known Limitations
+- **Single-threaded:** Claude Code's CLI is not concurrent. Requests queue via
+  `asyncio.Semaphore(1)`. High request rates will result in latency, not errors.
+- **No tool use:** `claude --print` does not expose tool_calls in the standard
+  OpenAI format. Tool calls are consumed internally by Claude Code.
+- **Session isolation:** Each request uses `--no-session-persistence`, so there
+  is no cross-request memory at the API level.
+- **Streaming granularity:** Token-by-token streaming requires `--include-partial-messages`.
+  Streaming granularity depends on how frequently Claude Code emits partial events.

package/openclaw-plugin/src/index.ts CHANGED Viewed

@@ -62,7 +62,7 @@ function createSKCapstoneStatusTool() {
     name: "skcapstone_status",
     label: "SKCapstone Status",
     description:
-      "Show the sovereign agent's current state — all pillars at a glance (identity, memory, trust, security, sync, communication).",
+      "Show the sovereign agent's current state — all six pillars at a glance (identity, memory, trust, consciousness, security, sync).",
     parameters: { type: "object", properties: {} },
     async execute() {
       const result = runCli(SKCAPSTONE_BIN, "status");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@smilintux/skcapstone",
-  "version": "0.5.1",
+  "version": "0.5.2",
   "description": "SKCapstone - The sovereign agent framework. CapAuth identity, Cloud 9 trust, SKMemory persistence.",
   "main": "index.js",
   "types": "index.d.ts",

package/scripts/claude-code-api.py ADDED Viewed

@@ -0,0 +1,455 @@
+#!/usr/bin/env python3
+"""
+claude-code-api — OpenAI-compatible HTTP wrapper around `claude --print`
+Exposes /v1/chat/completions and /v1/models so OpenClaw (and other tools)
+can use Claude Code's subscription-covered inference instead of a raw API key.
+Architecture:
+  - aiohttp HTTP server on port 18782
+  - asyncio.Semaphore(1) serialises claude invocations (single-threaded CLI)
+  - Non-streaming: claude --print --output-format json
+  - Streaming: claude --print --output-format stream-json --verbose --include-partial-messages
+    → parses assistant events, emits SSE deltas
+Usage:
+  python3 claude-code-api.py [--port 18782] [--debug]
+systemd:
+  ~/.config/systemd/user/claude-code-api.service
+"""
+import argparse
+import asyncio
+import json
+import logging
+import time
+import uuid
+from typing import AsyncIterator
+from aiohttp import web
+# ─── Configuration ────────────────────────────────────────────────────────────
+PORT = 18782
+DEFAULT_MODEL = "claude-sonnet-4-6"
+REQUEST_TIMEOUT = 300  # seconds per claude call
+QUEUE_TIMEOUT = 60     # seconds to wait for semaphore
+VALID_MODELS = {
+    "claude-opus-4-6",
+    "claude-sonnet-4-6",
+    "claude-haiku-4-5",
+    "claude-haiku-4-5-20251001",
+}
+# Map OpenAI / shorthand / provider-prefixed names → canonical claude model IDs
+MODEL_ALIASES: dict[str, str] = {
+    # GPT compatibility
+    "gpt-4":              "claude-opus-4-6",
+    "gpt-4o":             "claude-sonnet-4-6",
+    "gpt-4-turbo":        "claude-opus-4-6",
+    "gpt-4o-mini":        "claude-haiku-4-5",
+    "gpt-3.5-turbo":      "claude-haiku-4-5",
+    "gpt-3.5-turbo-16k":  "claude-haiku-4-5",
+    # Shorthand
+    "opus":               "claude-opus-4-6",
+    "sonnet":             "claude-sonnet-4-6",
+    "haiku":              "claude-haiku-4-5",
+    # Provider-prefixed (openclaw strips prefix before routing, but handle here too)
+    "claude/claude-opus-4-6":    "claude-opus-4-6",
+    "claude/claude-sonnet-4-6":  "claude-sonnet-4-6",
+    "claude/claude-haiku-4-5":   "claude-haiku-4-5",
+    "anthropic/claude-opus-4-6":   "claude-opus-4-6",
+    "anthropic/claude-sonnet-4-6": "claude-sonnet-4-6",
+    "anthropic/claude-haiku-4-5":  "claude-haiku-4-5",
+}
+# ─── Globals ──────────────────────────────────────────────────────────────────
+log = logging.getLogger("claude-code-api")
+_sem: asyncio.Semaphore | None = None
+def sem() -> asyncio.Semaphore:
+    global _sem
+    if _sem is None:
+        _sem = asyncio.Semaphore(1)
+    return _sem
+# ─── Helpers ──────────────────────────────────────────────────────────────────
+def normalise_model(model: str) -> str:
+    """Return a valid claude model ID, falling back to DEFAULT_MODEL."""
+    if model in MODEL_ALIASES:
+        return MODEL_ALIASES[model]
+    # Strip provider prefix e.g. "claude-code/claude-sonnet-4-6"
+    if "/" in model:
+        model = model.split("/")[-1]
+    if model in VALID_MODELS:
+        return model
+    log.warning("Unknown model %r, using default %s", model, DEFAULT_MODEL)
+    return DEFAULT_MODEL
+def messages_to_prompt(messages: list) -> tuple[str, str]:
+    """
+    Convert OpenAI-style messages list to (system_prompt, user_prompt).
+    Single-user message → (system, content).
+    Multi-turn → formatted conversation ending with 'Assistant:'.
+    """
+    system_parts: list[str] = []
+    turns: list[tuple[str, str]] = []
+    for msg in messages:
+        role = msg.get("role", "user")
+        content = msg.get("content", "")
+        if isinstance(content, list):
+            # Multi-modal content: extract text blocks
+            content = "\n".join(
+                c.get("text", "") for c in content
+                if isinstance(c, dict) and c.get("type") == "text"
+            )
+        if role == "system":
+            system_parts.append(content)
+        else:
+            turns.append((role, content))
+    system = "\n".join(system_parts)
+    if len(turns) == 1 and turns[0][0] == "user":
+        return system, turns[0][1]
+    # Multi-turn: format as conversation
+    lines = []
+    for role, content in turns:
+        prefix = "Human" if role == "user" else "Assistant"
+        lines.append(f"{prefix}: {content}")
+    lines.append("Assistant:")
+    return system, "\n\n".join(lines)
+def make_completion_response(
+    model: str,
+    content: str,
+    prompt_tokens: int = 0,
+    completion_tokens: int = 0,
+) -> dict:
+    """Build an OpenAI-compatible chat completion response object."""
+    return {
+        "id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [{
+            "index": 0,
+            "message": {"role": "assistant", "content": content},
+            "finish_reason": "stop",
+        }],
+        "usage": {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": prompt_tokens + completion_tokens,
+        },
+    }
+def make_sse_chunk(model: str, delta: str, finish: bool = False) -> str:
+    """Format a single SSE data line for streaming chat completions."""
+    obj = {
+        "id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": model,
+        "choices": [{
+            "index": 0,
+            "delta": {"content": delta} if delta else {},
+            "finish_reason": "stop" if finish else None,
+        }],
+    }
+    return f"data: {json.dumps(obj)}\n\n"
+# ─── Claude subprocess helpers ────────────────────────────────────────────────
+async def _run_claude_json(model: str, prompt: str, system: str) -> tuple[str, dict]:
+    """
+    Run `claude --print --output-format json` and return (text_result, usage_dict).
+    Acquires the global semaphore to serialise calls.
+    """
+    cmd = [
+        "claude", "--print",
+        "--model", model,
+        "--output-format", "json",
+        "--no-session-persistence",
+    ]
+    if system:
+        cmd += ["--append-system-prompt", system]
+    cmd.append(prompt)
+    log.debug("Running (non-stream): %s", " ".join(cmd[:6]) + " ...")
+    async with asyncio.timeout(QUEUE_TIMEOUT):
+        await sem().acquire()
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        try:
+            stdout, stderr = await asyncio.wait_for(
+                proc.communicate(), timeout=REQUEST_TIMEOUT
+            )
+        except asyncio.TimeoutError:
+            proc.kill()
+            raise RuntimeError(f"claude timed out after {REQUEST_TIMEOUT}s")
+    finally:
+        sem().release()
+    if proc.returncode != 0:
+        err = stderr.decode(errors="replace")[:500]
+        raise RuntimeError(f"claude exited {proc.returncode}: {err}")
+    raw = stdout.decode(errors="replace").strip()
+    try:
+        result = json.loads(raw)
+    except json.JSONDecodeError as exc:
+        raise RuntimeError(f"claude returned non-JSON: {raw[:200]}") from exc
+    if result.get("is_error"):
+        raise RuntimeError(result.get("result", "Claude returned an error"))
+    text = result.get("result", "")
+    usage = result.get("usage", {})
+    return text, usage
+async def _stream_claude(model: str, prompt: str, system: str) -> AsyncIterator[str]:
+    """
+    Run `claude --print --output-format stream-json --verbose --include-partial-messages`
+    and yield text deltas as they arrive.
+    Parses the JSONL event stream:
+      • type=assistant → message.content[].text (cumulative snapshot)
+        → yields the delta (new chars since last emission)
+      • type=result → final; no extra text to yield (already covered by assistant events)
+    """
+    cmd = [
+        "claude", "--print",
+        "--model", model,
+        "--output-format", "stream-json",
+        "--verbose",
+        "--include-partial-messages",
+        "--no-session-persistence",
+    ]
+    if system:
+        cmd += ["--append-system-prompt", system]
+    cmd.append(prompt)
+    log.debug("Running (stream): %s", " ".join(cmd[:8]) + " ...")
+    async with asyncio.timeout(QUEUE_TIMEOUT):
+        await sem().acquire()
+    try:
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        emitted = ""         # cumulative text we have yielded so far
+        result_text = None   # text from the final result event
+        buf = b""
+        while True:
+            try:
+                chunk = await asyncio.wait_for(proc.stdout.read(8192), timeout=REQUEST_TIMEOUT)
+            except asyncio.TimeoutError:
+                proc.kill()
+                raise RuntimeError(f"claude stream timed out after {REQUEST_TIMEOUT}s")
+            if not chunk:
+                break
+            buf += chunk
+            # Process complete lines
+            while b"\n" in buf:
+                line_bytes, buf = buf.split(b"\n", 1)
+                line = line_bytes.strip()
+                if not line:
+                    continue
+                try:
+                    obj = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                etype = obj.get("type")
+                if etype == "assistant":
+                    # Extract cumulative text from content blocks
+                    msg = obj.get("message", {})
+                    full_text = ""
+                    for block in msg.get("content", []):
+                        if isinstance(block, dict) and block.get("type") == "text":
+                            full_text += block.get("text", "")
+                    if full_text and len(full_text) > len(emitted):
+                        delta = full_text[len(emitted):]
+                        emitted = full_text
+                        yield delta
+                elif etype == "result":
+                    result_text = obj.get("result", "")
+                    is_error = obj.get("is_error", False)
+                    if is_error:
+                        raise RuntimeError(result_text or "Claude returned an error")
+                    # Yield any remaining text not yet emitted
+                    if result_text and len(result_text) > len(emitted):
+                        yield result_text[len(emitted):]
+                        emitted = result_text
+        await asyncio.wait_for(proc.wait(), timeout=10)
+        # If we got nothing from assistant events but have a result, yield it now
+        if not emitted and result_text:
+            yield result_text
+    finally:
+        sem().release()
+# ─── HTTP Handlers ────────────────────────────────────────────────────────────
+async def handle_health(request: web.Request) -> web.Response:
+    return web.json_response({"status": "ok", "service": "claude-code-api", "port": PORT})
+async def handle_models(request: web.Request) -> web.Response:
+    now = int(time.time())
+    models = [
+        {
+            "id": m,
+            "object": "model",
+            "created": now,
+            "owned_by": "anthropic",
+        }
+        for m in sorted(VALID_MODELS)
+    ]
+    return web.json_response({"object": "list", "data": models})
+async def handle_chat_completions(request: web.Request) -> web.Response:
+    try:
+        body = await request.json()
+    except Exception as exc:
+        raise web.HTTPBadRequest(text=str(exc))
+    model = normalise_model(body.get("model", DEFAULT_MODEL))
+    messages = body.get("messages", [])
+    streaming = body.get("stream", False)
+    if not messages:
+        raise web.HTTPBadRequest(text="messages array is required")
+    system, prompt = messages_to_prompt(messages)
+    log.info("→ %s | stream=%s | model=%s | %d chars",
+             request.remote, streaming, model, len(prompt))
+    if streaming:
+        response = web.StreamResponse(
+            headers={
+                "Content-Type": "text/event-stream",
+                "Cache-Control": "no-cache",
+                "X-Accel-Buffering": "no",
+            }
+        )
+        await response.prepare(request)
+        try:
+            # Opening role delta (OpenAI convention)
+            role_chunk = {
+                "id": f"chatcmpl-{uuid.uuid4().hex[:24]}",
+                "object": "chat.completion.chunk",
+                "created": int(time.time()),
+                "model": model,
+                "choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}],
+            }
+            await response.write(f"data: {json.dumps(role_chunk)}\n\n".encode())
+            async for delta in _stream_claude(model, prompt, system):
+                if delta:
+                    await response.write(make_sse_chunk(model, delta).encode())
+            # Final finish chunk
+            await response.write(make_sse_chunk(model, "", finish=True).encode())
+            await response.write(b"data: [DONE]\n\n")
+        except Exception as exc:
+            log.error("Streaming error: %s", exc)
+            err_chunk = json.dumps({"error": {"message": str(exc), "type": "server_error"}})
+            await response.write(f"data: {err_chunk}\n\n".encode())
+        await response.write_eof()
+        return response
+    else:
+        try:
+            text, usage = await _run_claude_json(model, prompt, system)
+        except Exception as exc:
+            log.error("Non-stream error: %s", exc)
+            return web.json_response(
+                {"error": {"message": str(exc), "type": "server_error"}},
+                status=500,
+            )
+        log.info("← %s | model=%s | %d output chars", request.remote, model, len(text))
+        resp = make_completion_response(
+            model=model,
+            content=text,
+            prompt_tokens=usage.get("input_tokens", 0),
+            completion_tokens=usage.get("output_tokens", 0),
+        )
+        return web.json_response(resp)
+# ─── App factory & main ───────────────────────────────────────────────────────
+def build_app() -> web.Application:
+    app = web.Application()
+    app.router.add_get("/health", handle_health)
+    app.router.add_get("/v1/models", handle_models)
+    app.router.add_post("/v1/chat/completions", handle_chat_completions)
+    # Also handle without /v1 prefix for flexibility
+    app.router.add_get("/models", handle_models)
+    app.router.add_post("/chat/completions", handle_chat_completions)
+    return app
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Claude Code API — OpenAI-compatible wrapper")
+    parser.add_argument("--port", type=int, default=PORT, help=f"Port to listen on (default: {PORT})")
+    parser.add_argument("--host", default="127.0.0.1", help="Host to bind (default: 127.0.0.1)")
+    parser.add_argument("--debug", action="store_true", help="Enable debug logging")
+    args = parser.parse_args()
+    level = logging.DEBUG if args.debug else logging.INFO
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s  %(levelname)-8s  %(name)s  %(message)s",
+        datefmt="%Y-%m-%dT%H:%M:%SZ",
+    )
+    log.info("Claude Code API starting on %s:%d", args.host, args.port)
+    log.info("Supported models: %s", ", ".join(sorted(VALID_MODELS)))
+    app = build_app()
+    web.run_app(app, host=args.host, port=args.port, print=None)
+if __name__ == "__main__":
+    main()