npm - @a1hvdy/cc-openclaw - Versions diffs - 0.7.1 → 0.9.0 - Mend

@a1hvdy/cc-openclaw 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

package/dist/src/command-router/cc-handler.js +11 -3
package/dist/src/command-router/cc-handler.js.map +1 -1
package/dist/src/engines/persistent-session.d.ts +1 -0
package/dist/src/engines/persistent-session.js +35 -1
package/dist/src/engines/persistent-session.js.map +1 -1
package/dist/src/index.d.ts +10 -1
package/dist/src/index.js +47 -7
package/dist/src/index.js.map +1 -1
package/dist/src/lib/config-service.d.ts +106 -0
package/dist/src/lib/config-service.js +217 -0
package/dist/src/lib/config-service.js.map +1 -0
package/dist/src/lib/config.d.ts +33 -14
package/dist/src/lib/config.js +147 -34
package/dist/src/lib/config.js.map +1 -1
package/dist/src/lib/index.d.ts +1 -1
package/dist/src/lib/index.js +4 -1
package/dist/src/lib/index.js.map +1 -1
package/dist/src/openai-compat/message-extractor.d.ts +79 -0
package/dist/src/openai-compat/message-extractor.js +134 -0
package/dist/src/openai-compat/message-extractor.js.map +1 -0
package/dist/src/openai-compat/mode-flags.d.ts +34 -0
package/dist/src/openai-compat/mode-flags.js +44 -0
package/dist/src/openai-compat/mode-flags.js.map +1 -0
package/dist/src/openai-compat/non-streaming-handler.d.ts +26 -0
package/dist/src/openai-compat/non-streaming-handler.js +108 -0
package/dist/src/openai-compat/non-streaming-handler.js.map +1 -0
package/dist/src/openai-compat/openai-compat.d.ts +15 -166
package/dist/src/openai-compat/openai-compat.js +72 -817
package/dist/src/openai-compat/openai-compat.js.map +1 -1
package/dist/src/openai-compat/prompts.d.ts +47 -0
package/dist/src/openai-compat/prompts.js +119 -0
package/dist/src/openai-compat/prompts.js.map +1 -0
package/dist/src/openai-compat/response-formatter.d.ts +33 -0
package/dist/src/openai-compat/response-formatter.js +74 -0
package/dist/src/openai-compat/response-formatter.js.map +1 -0
package/dist/src/openai-compat/session-key-resolver.d.ts +41 -0
package/dist/src/openai-compat/session-key-resolver.js +78 -0
package/dist/src/openai-compat/session-key-resolver.js.map +1 -0
package/dist/src/openai-compat/status-reporter.d.ts +30 -0
package/dist/src/openai-compat/status-reporter.js +81 -0
package/dist/src/openai-compat/status-reporter.js.map +1 -0
package/dist/src/openai-compat/streaming-handler.d.ts +41 -0
package/dist/src/openai-compat/streaming-handler.js +294 -0
package/dist/src/openai-compat/streaming-handler.js.map +1 -0
package/dist/src/openai-compat/tool-calls-parser.d.ts +34 -0
package/dist/src/openai-compat/tool-calls-parser.js +93 -0
package/dist/src/openai-compat/tool-calls-parser.js.map +1 -0
package/dist/src/openai-compat/tool-results-serializer.d.ts +60 -0
package/dist/src/openai-compat/tool-results-serializer.js +56 -0
package/dist/src/openai-compat/tool-results-serializer.js.map +1 -0
package/dist/src/session/session-manager.js +12 -0
package/dist/src/session/session-manager.js.map +1 -1
package/dist/src/session-bootstrap/cwd-patch.js +30 -13
package/dist/src/session-bootstrap/cwd-patch.js.map +1 -1
package/dist/src/types/index.d.ts +15 -0
package/dist/src/types/index.js +16 -0
package/dist/src/types/index.js.map +1 -0
package/dist/src/types/route.d.ts +41 -0
package/dist/src/types/route.js +12 -0
package/dist/src/types/route.js.map +1 -0
package/dist/src/types/runtime-config.d.ts +161 -0
package/dist/src/types/runtime-config.js +118 -0
package/dist/src/types/runtime-config.js.map +1 -0
package/dist/src/types/session.d.ts +48 -0
package/dist/src/types/session.js +20 -0
package/dist/src/types/session.js.map +1 -0
package/dist/src/types/sse.d.ts +38 -0
package/dist/src/types/sse.js +12 -0
package/dist/src/types/sse.js.map +1 -0
package/dist/src/types/tool-bridge.d.ts +81 -0
package/dist/src/types/tool-bridge.js +34 -0
package/dist/src/types/tool-bridge.js.map +1 -0
package/dist/src/types/upstream.d.ts +652 -0
package/dist/src/types/upstream.js +145 -0
package/dist/src/types/upstream.js.map +1 -0
package/package.json +3 -2
package/dist/src/lib/route-flag.d.ts +0 -49
package/dist/src/lib/route-flag.js +0 -52
package/dist/src/lib/route-flag.js.map +0 -1

package/dist/src/openai-compat/openai-compat.js CHANGED Viewed

@@ -5,469 +5,76 @@
  * webchat frontends (ChatGPT-Next-Web, Open WebUI, etc.) to use the plugin
  * as a drop-in backend. Stateful sessions maximize Anthropic prompt caching.
  */
-import * as http from 'node:http';
 import * as fs from 'node:fs';
 import * as path from 'node:path';
 import * as os from 'node:os';
-import { randomUUID, createHash } from 'node:crypto';
+import { randomUUID } from 'node:crypto';
 import { resolveEngineAndModel } from '../models.js';
-import { OPENAI_COMPAT_DEFAULT_MODEL, OPENAI_COMPAT_AUTO_COMPACT_THRESHOLD, OPENAI_COMPAT_SESSION_PREFIX, } from '../constants.js';
-import { getOpenaiCompatToolsPerMessage, isOpenaiCompatNewConvoHeuristic, getOpenaiCompatStatusUrl, getSurfaceThinkingEnabled, } from '../lib/config.js';
-import { maybeInlineSkill } from './skill-resolver.js';
+import { OPENAI_COMPAT_DEFAULT_MODEL, OPENAI_COMPAT_AUTO_COMPACT_THRESHOLD, } from '../constants.js';
+import { isToolsPerMessageModeEnabled, isToolStreamMode } from './mode-flags.js';
+import { resolveSessionKey, sessionNameFromKey } from './session-key-resolver.js';
+import { buildSessionSystemPrompt, buildToolPromptBlock } from './prompts.js';
+import { extractUserMessage, } from './message-extractor.js';
+import { handleNonStreaming } from './non-streaming-handler.js';
+import { handleStreaming } from './streaming-handler.js';
+// Re-export for backward compat — Cluster B extracted these to dedicated
+// modules; keep the original import surface stable for any external caller.
+// See src/openai-compat/{mode-flags,session-key-resolver,prompts,tool-calls-parser,tool-results-serializer}.ts.
+export { isToolsPerMessageModeEnabled, isToolStreamMode } from './mode-flags.js';
+export { resolveSessionKey, sessionNameFromKey } from './session-key-resolver.js';
+export { noToolsSystemPrompt, buildSessionSystemPrompt, buildToolPromptBlock } from './prompts.js';
+export { parseToolCallsFromText } from './tool-calls-parser.js';
+export { serializeToolResults, serializeToolResultsAsBlocks, } from './tool-results-serializer.js';
+export { extractUserMessage, } from './message-extractor.js';
+export { formatCompletionResponse, formatCompletionChunk } from './response-formatter.js';
+export { reportStatus, getToolDescription } from './status-reporter.js';
+export { handleNonStreaming } from './non-streaming-handler.js';
+export { handleStreaming } from './streaming-handler.js';
 import { emit as emitTrajectory } from '../lib/trajectory.js';
 import { formatError, ERROR_CODES } from '../lib/error-formatter.js';
-// ─── Session Key Resolution ──────────────────────────────────────────────────
-/**
- * Derive a session key from the request.
- * Priority: X-Session-Id header > user field > sha1(model + systemPrompt) > "default"
- *
- * The system-prompt-hash fallback prevents the bug where every caller without
- * X-Session-Id or `user` collapses onto a single shared "openai-default"
- * plugin session. In multi-caller setups (OpenClaw routing the main agent,
- * cron jobs, and subagents through the same gateway) that previously meant
- * every request serialized against every other and frequently picked up the
- * wrong session's appendSystemPrompt — also a privacy leak across callers.
- *
- * The model is mixed into the hash so that two callers with the same system
- * prompt but different requested models don't collide and silently get
- * responses from the wrong model. Originally diagnosed in PR #40 by
- * @megayounus786.
- */
-/**
- * When set (to '1', 'true', 'yes'), the proxy preserves the pre-fix behavior:
- *   - tools injected into every user message
- *   - session key NOT fingerprinted by tools (same session across tool changes)
- * Default (unset) is the new behavior: tools embedded in session system prompt
- * at create time + session key fingerprinted by tools. The new behavior
- * eliminates periodic latency spikes but does not support mutating the tool
- * list within a single session (a new session is created when tools change).
- */
-export function isToolsPerMessageModeEnabled() {
-    const v = getOpenaiCompatToolsPerMessage();
-    if (!v)
-        return false;
-    const t = v.trim().toLowerCase();
-    return t === '1' || t === 'true' || t === 'yes';
-}
-/**
- * Phase 2 R5: tool-stream mode flag. When `CC_OPENCLAW_TOOL_STREAM=1` AND the
- * caller provides `tools[]`, cc-openclaw skips the defensive "no tools"
- * system prompt and does NOT clear `sessionConfig.tools`, allowing Claude
- * CLI's native tool_use events to flow through the new parser+translator
- * pipeline (Phase 4 Pillar 0.5). Default off; opt-in for the new path.
- */
-export function isToolStreamMode() {
-    return process.env.CC_OPENCLAW_TOOL_STREAM === '1';
-}
-/**
- * Generate the "no built-in tools" system prompt preamble.
- * The `toolLocation` parameter controls how the model is told where to find
- * tool definitions — 'system' means "in the <available_tools> block below"
- * (tools baked into system prompt), 'user' means "in <available_tools> tags
- * in the user message" (legacy per-turn injection).
- */
-export function noToolsSystemPrompt(toolLocation) {
-    const allowBuiltins = process.env.CC_OPENCLAW_ALLOW_BUILTINS === '1';
-    const locationHint = toolLocation === 'system'
-        ? 'in the <available_tools> block below'
-        : 'in <available_tools> tags in the user message';
-    if (allowBuiltins) {
-        // Phase 2.1 dual-source mode: Claude CLI's built-in tools (Bash, Read,
-        // Write, Edit, Glob, Grep, WebFetch, WebSearch, NotebookEdit, +any
-        // MCP-bridged tools) coexist with the proxy-defined <available_tools>.
-        // The model picks whichever fits. v0.4.9 added explicit anti-hallucination
-        // language because Savvy was still emitting "I do not have access to tools"
-        // even with this preamble active — model needs strong, repeated affirmation.
-        return ('You are an AI assistant with TWO complementary tool sources available:\n' +
-            '1. Claude Code CLI built-ins: Bash, Read, Write, Edit, Glob, Grep, WebFetch, WebSearch, NotebookEdit, plus any MCP-bridged tools (gh, npm, pm2, openclaw, docker, kubectl, etc. — invoke via Bash). Invoke these directly via your normal tool_use mechanism.\n' +
-            `2. Proxy-defined tools: ${locationHint}. Use these by emitting <tool_calls> XML as instructed there.\n` +
-            'CRITICAL — anti-hallucination: NEVER claim "I have no tools", "I cannot run commands", "I do not have access to this environment", or any equivalent phrasing. You DO have full Bash, gh, npm, pm2, openclaw, file, and web access. If unsure whether a specific tool is available, ATTEMPT it; the runtime will return a concrete error if not.\n' +
-            'When a request needs action, ACT via tools — do not narrate what you would do. Default to executing, not describing.\n' +
-            'For research / web fetching / file ops / command execution: prefer the built-ins (WebFetch, WebSearch, Bash, Read).\n' +
-            'For tasks where the proxy ships a specific custom tool: prefer the proxy tool.\n' +
-            'If a tool you would normally use is not available in either source, attempt the task with what IS available and explain concretely which tool would have helped.');
-    }
-    return ('You are an AI assistant operating through a proxy that provides a specific set of tools.\n' +
-        `Your tools are defined ${locationHint}. Use them by emitting <tool_calls> XML as instructed there.\n` +
-        'When a request needs action, you MUST use the tools that are defined — do not refuse on the grounds of "no tools".\n' +
-        'If a specific tool you would normally use is not in <available_tools>, do the best you can with what IS provided, or report concretely which tool would be needed.\n' +
-        'You do NOT have direct access to Claude Code CLI built-ins (Bash, Read, Write, Edit, Glob, Grep) outside of <available_tools>; do not invoke them directly.\n' +
-        'If no <available_tools> are provided at all, respond with text only.');
-}
-/**
- * Build the full session system prompt for a Claude Code session with tools.
- * Exported for testability — called from `handleChatCompletion`.
- *
- * - Default mode: tools are embedded in the system prompt (cacheable by Anthropic).
- * - Legacy mode (OPENAI_COMPAT_TOOLS_PER_MESSAGE=1): tools are NOT embedded;
- *   they'll be injected per-turn in the user message instead.
- */
-export function buildSessionSystemPrompt(tools, callerSystemPrompt) {
-    // Phase 2 R5: in tool-stream mode with tools provided, skip the defensive
-    // "no tools" preamble and the <available_tools> block entirely. Claude CLI
-    // gets the tools natively via sessionConfig.tools (not cleared) and emits
-    // tool_use events that the new parser+translator translate to OpenAI SSE.
-    // v0.4.9: prepend a minimal tool-affirmation preamble. Without this, callers
-    // with weak/no system prompts saw the model hallucinate "I have no tools" —
-    // the CLI had tools loaded but nothing in the prompt told the model so.
-    if (isToolStreamMode() && tools && tools.length > 0) {
-        const allowBuiltins = process.env.CC_OPENCLAW_ALLOW_BUILTINS === '1';
-        const toolAffirmation = allowBuiltins
-            ? 'You have full Claude Code CLI tools (Bash, Read, Write, Edit, Glob, Grep, WebFetch, WebSearch, gh, npm, pm2, openclaw, etc.) available natively, plus any caller-provided tools below. NEVER claim "I have no tools" — invoke a tool and let the runtime confirm. Default to ACTING, not narrating.\n\n'
-            : '';
-        return toolAffirmation + (callerSystemPrompt ?? '');
-    }
-    if (isToolsPerMessageModeEnabled()) {
-        const preamble = noToolsSystemPrompt('user');
-        return callerSystemPrompt ? `${preamble}\n\n${callerSystemPrompt}` : preamble;
-    }
-    const preamble = noToolsSystemPrompt('system');
-    const toolBlock = buildToolPromptBlock(tools);
-    const systemWithTools = `${preamble}\n\n${toolBlock}`;
-    return callerSystemPrompt ? `${systemWithTools}\n\n${callerSystemPrompt}` : systemWithTools;
-}
-export function resolveSessionKey(body, headers) {
-    const headerKey = headers['x-session-id'];
-    if (typeof headerKey === 'string' && headerKey.trim())
-        return headerKey.trim();
-    if (body.user && body.user.trim())
-        return body.user.trim();
-    const sys = (body.messages || [])
-        .filter((m) => m && m.role === 'system')
-        .map((m) => (typeof m.content === 'string' ? m.content : JSON.stringify(m.content)))
-        .join('\n');
-    const modelTag = (body.model || '').toString();
-    // Include a fingerprint of the tool list so that two requests with the same
-    // system prompt but different tool definitions land in different sessions.
-    // The tool schemas are baked into the session system prompt on create; if
-    // tools change we need a new session rather than re-using a stale one.
-    // Hash only tool names + a short description prefix to keep the fingerprint
-    // small and stable against schema formatting differences.
-    //
-    // Opt-out: OPENAI_COMPAT_TOOLS_PER_MESSAGE=1 restores the pre-fix behavior
-    // of keying sessions only by system prompt + model. Enable this if you have
-    // callers that mutate their tool list within one conversation and rely on
-    // continuing history across tool changes.
-    const toolsFingerprint = isToolsPerMessageModeEnabled()
-        ? ''
-        : (body.tools || [])
-            .map((t) => {
-            const fn = t?.function;
-            if (!fn?.name)
-                return '';
-            const descPrefix = (typeof fn.description === 'string' ? fn.description : '').slice(0, 64);
-            return `${fn.name}:${descPrefix}`;
-        })
-            .filter(Boolean)
-            .join('|');
-    if (sys || modelTag || toolsFingerprint) {
-        return ('sys-' +
-            createHash('sha1')
-                .update(modelTag + '\n' + sys + '\n' + toolsFingerprint)
-                .digest('hex')
-                .slice(0, 12));
-    }
-    return 'default';
-}
-/** Build the full session name from a key */
-export function sessionNameFromKey(key) {
-    return `${OPENAI_COMPAT_SESSION_PREFIX}${key}`;
-}
-// ─── Function Calling Support ────────────────────────────────────────────────
-/**
- * Convert OpenAI tool definitions into a structured prompt block.
- * Injected into the user message so the CLI model sees tool definitions
- * and responds with <tool_calls> tags when it wants to invoke a function.
- */
-export function buildToolPromptBlock(tools) {
-    if (!tools?.length)
-        return '';
-    const toolDefs = tools
-        .map((t) => {
-        const fn = t.function;
-        const params = JSON.stringify(fn.parameters, null, 2);
-        return `### ${fn.name}\n${fn.description}\n\nParameters:\n\`\`\`json\n${params}\n\`\`\``;
-    })
-        .join('\n\n');
-    return ('<available_tools>\n' +
-        'You have access to the following tools. When you need to use a tool, respond with a JSON array wrapped in <tool_calls> tags.\n\n' +
-        'FORMAT:\n' +
-        '<tool_calls>\n' +
-        '[{"name": "tool_name", "arguments": {"param1": "value1"}}]\n' +
-        '</tool_calls>\n\n' +
-        'If you do NOT need any tools, respond normally with text only (no <tool_calls> tags).\n\n' +
-        '## Available Tools\n\n' +
-        toolDefs +
-        '\n</available_tools>');
-}
-/**
- * Parse tool_calls from CLI text output.
- *
- * Looks for <tool_calls>[...]</tool_calls> tags in the response text.
- * Returns both the extracted text content (before/after tags) and any tool calls found.
- */
-export function parseToolCallsFromText(text) {
-    // Match ALL <tool_calls> blocks (model may output multiple)
-    const tagRegex = /<tool_calls>\s*([\s\S]*?)\s*<\/tool_calls>/g;
-    const allCalls = [];
-    let lastIndex = 0;
-    const textParts = [];
-    let m;
-    while ((m = tagRegex.exec(text)) !== null) {
-        // Collect text before this block
-        const before = text.slice(lastIndex, m.index).trim();
-        if (before)
-            textParts.push(before);
-        lastIndex = m.index + m[0].length;
-        try {
-            const parsed = JSON.parse(m[1].trim());
-            const arr = Array.isArray(parsed) ? parsed : [parsed];
-            for (const raw of arr) {
-                const call = raw;
-                if (!call || typeof call !== 'object' || typeof call.name !== 'string')
-                    continue;
-                let args;
-                if (typeof call.arguments === 'string') {
-                    try {
-                        JSON.parse(call.arguments);
-                        args = call.arguments;
-                    }
-                    catch {
-                        args = JSON.stringify({ input: call.arguments });
-                    }
-                }
-                else {
-                    args = JSON.stringify(call.arguments ?? {});
-                }
-                allCalls.push({
-                    id: `call_${randomUUID().replace(/-/g, '').slice(0, 24)}`,
-                    type: 'function',
-                    function: { name: call.name, arguments: args },
-                });
-            }
-        }
-        catch {
-            // One block failed — keep its text as content
-            textParts.push(m[0]);
-        }
-    }
-    // Collect text after last block
-    const after = text.slice(lastIndex).trim();
-    if (after)
-        textParts.push(after);
-    // Strip <tool_result> and <tool_results> tags that the model may echo back
-    // from the serialized tool results we injected earlier.
-    const stripToolResultTags = (s) => s
-        .replace(/<tool_results?>[\s\S]*?<\/tool_results?>/g, '')
-        .replace(/<tool_results?[^>]*>/g, '')
-        .trim();
-    if (allCalls.length > 0) {
-        const raw = textParts.join('\n').trim();
-        const cleaned = raw ? stripToolResultTags(raw) : null;
-        return { textContent: cleaned || null, toolCalls: allCalls };
-    }
-    const cleaned = text ? stripToolResultTags(text) : null;
-    return { textContent: cleaned || null, toolCalls: [] };
-}
-/**
- * Serialize tool result messages into a text block for the CLI model.
- * Converts OpenAI `tool` role messages into <tool_result> tags.
- *
- * Legacy path (CC_OPENCLAW_TOOL_STREAM=0). Used when the model receives
- * tool definitions via the system prompt's <available_tools> XML block
- * and emits <tool_calls> XML in response. Tool-stream mode (R4) uses
- * `serializeToolResultsAsBlocks()` instead, returning native Anthropic
- * `tool_result` content blocks that Claude CLI parses directly.
- */
-export function serializeToolResults(messages) {
-    const toolMessages = messages.filter((m) => m.role === 'tool');
-    if (!toolMessages.length)
-        return '';
-    const results = toolMessages
-        .map((m) => {
-        const content = typeof m.content === 'string' ? m.content : JSON.stringify(m.content);
-        return `<tool_result tool_call_id="${m.tool_call_id || 'unknown'}">\n${content}\n</tool_result>`;
-    })
-        .join('\n\n');
-    return `<tool_results>\n${results}\n</tool_results>\n\nAbove are the results of the tool calls you requested. Continue your response based on these results.`;
-}
-export function serializeToolResultsAsBlocks(messages) {
-    return messages
-        .filter((m) => m.role === 'tool')
-        .map((m) => ({
-        type: 'tool_result',
-        tool_use_id: m.tool_call_id || 'unknown',
-        content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content),
-    }));
-}
-/**
- * Extract the relevant parts from an OpenAI messages array.
- *
- * Sessions are stateful — we only need the last user message. The tricky
- * question is whether to start a fresh session or append to the existing one.
- *
- * Default mode (no env var): only honor an explicit `X-Session-Reset: 1`
- * header. This is correct for clients that maintain their own conversation
- * transcript and forward only the latest user turn (OpenClaw main agent
- * loop, cron jobs, subagents). The previous heuristic
- * (`nonSystemMessages.length <= 1`) fired on every such request, killing the
- * persistent CLI every turn and preventing Anthropic prompt caching from
- * ever warming. Originally diagnosed in PR #40 by @megayounus786.
- *
- * Legacy mode (`OPENAI_COMPAT_NEW_CONVO_HEURISTIC=1`): restore the old
- * `system + single user ⇒ new conversation` rule, for clients that re-send
- * the full transcript on every turn (ChatGPT-Next-Web, Open WebUI, data
- * labeling tools, etc). They use the transcript shape itself as their only
- * "start a new conversation" signal.
- *
- * The env var is read on every call so ops can flip it via launchctl setenv
- * without restarting the server.
- */
-export function extractUserMessage(messages, headers) {
-    if (!messages || messages.length === 0) {
-        throw new Error('messages array is empty');
-    }
-    // Normalize content from any message: OpenAI API allows content as a string
-    // OR an array of content parts (e.g. multimodal messages with text + images).
-    // We need a string for the CLI, so arrays are joined.
-    const textOf = (m) => {
-        if (typeof m.content === 'string')
-            return m.content;
-        if (Array.isArray(m.content)) {
-            return m.content
-                .map((p) => p.text || '')
-                .filter(Boolean)
-                .join('');
-        }
-        return m.content != null ? String(m.content) : '';
-    };
-    // Extract system prompt if present
-    const systemMessages = messages.filter((m) => m.role === 'system');
-    const systemPrompt = systemMessages.length > 0 ? systemMessages.map(textOf).join('\n') : undefined;
-    // Handle tool result messages — only when the LAST non-system message is
-    // a tool role (meaning we're in an active tool-use cycle). If the last
-    // message is a user role, it's a follow-up in an existing conversation
-    // and the old tool results are already in the CLI's history.
-    const lastNonSystem = [...messages].reverse().find((m) => m.role !== 'system');
-    if (lastNonSystem?.role === 'tool') {
-        const userMessages = messages.filter((m) => m.role === 'user');
-        const lastUserText = userMessages.length > 0 ? textOf(userMessages[userMessages.length - 1]) : '';
-        // Phase 2 R4 wire-up: in tool-stream mode, emit native Anthropic
-        // tool_result blocks instead of XML-wrapped text. Claude CLI's
-        // stream-json input accepts content arrays directly.
-        if (isToolStreamMode()) {
-            const toolBlocks = serializeToolResultsAsBlocks(messages);
-            const userMessageBlocks = [...toolBlocks];
-            if (lastUserText) {
-                userMessageBlocks.push({ type: 'text', text: lastUserText });
-            }
-            // Keep userMessage populated as the legacy XML form for callers
-            // that don't yet handle the structured path. Both fields agree in
-            // intent; consumers should prefer userMessageBlocks when present.
-            const fallback = serializeToolResults(messages);
-            const userMessage = lastUserText ? `${fallback}\n\n${lastUserText}` : fallback;
-            return { systemPrompt, userMessage, userMessageBlocks, isNewConversation: false };
-        }
-        const toolResultBlock = serializeToolResults(messages);
-        const userMessage = lastUserText ? `${toolResultBlock}\n\n${lastUserText}` : toolResultBlock;
-        return { systemPrompt, userMessage, isNewConversation: false };
-    }
-    // Find last user message
-    const userMessages = messages.filter((m) => m.role === 'user');
-    if (userMessages.length === 0) {
-        throw new Error('No user message found in messages array');
-    }
-    const rawUserMessage = textOf(userMessages[userMessages.length - 1]);
-    // Workspace skill auto-inline: if the last user message is /<skill> [args]
-    // and ~/.openclaw/workspace/skills/*/SKILL.md has a matching `name:` in
-    // frontmatter, replace the user message with the SKILL.md body so the
-    // model has full skill context without needing the Read tool (cc-openclaw
-    // disables built-in tools by design — see the `sessionConfig.tools = ''`
-    // line below).
-    const userMessage = maybeInlineSkill(rawUserMessage) ?? rawUserMessage;
-    // 1. Explicit reset header — honored in both modes. Normalize trim+lowercase
-    //    so callers using `TRUE`, ` 1 `, etc. don't silently fail.
-    const rawReset = headers?.['x-session-reset'];
-    const resetHeader = typeof rawReset === 'string' ? rawReset.trim().toLowerCase() : '';
-    if (resetHeader === 'true' || resetHeader === '1') {
-        return { systemPrompt, userMessage, isNewConversation: true };
+function parseRouteBody(body) {
+    if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
+        return {
+            ok: false,
+            status: 400,
+            error: 'messages is required and must be a non-empty array',
+        };
     }
-    // 2. Legacy heuristic — only when explicitly opted in via env var.
-    if (isOpenaiCompatNewConvoHeuristic()) {
-        const nonSystemMessages = messages.filter((m) => m.role !== 'system');
-        return { systemPrompt, userMessage, isNewConversation: nonSystemMessages.length <= 1 };
+    if (body.max_tokens !== undefined &&
+        (typeof body.max_tokens !== 'number' || body.max_tokens <= 0)) {
+        return {
+            ok: false,
+            status: 400,
+            error: 'max_tokens must be a positive number',
+        };
     }
-    return { systemPrompt, userMessage, isNewConversation: false };
-}
-// ─── Response Formatting ─────────────────────────────────────────────────────
-export function formatCompletionResponse(id, model, text, tokensIn, tokensOut, toolCalls,
-/** v0.7.0: when present + non-empty, attached as `choices[0].message.reasoning`
- *  (mirrors OpenAI o1/o3 schema). Caller must already be gated on
- *  `getSurfaceThinkingEnabled()` from `lib/config.ts` — this function does
- *  not re-check the flag. Pass empty string or undefined to omit. */
-reasoning) {
-    const hasToolCalls = toolCalls && toolCalls.length > 0;
-    const hasReasoning = typeof reasoning === 'string' && reasoning.length > 0;
     return {
-        id,
-        object: 'chat.completion',
-        created: Math.floor(Date.now() / 1000),
-        model,
-        choices: [
-            {
-                index: 0,
-                message: {
-                    role: 'assistant',
-                    content: text || null,
-                    ...(hasToolCalls ? { tool_calls: toolCalls } : {}),
-                    ...(hasReasoning ? { reasoning } : {}),
-                },
-                finish_reason: hasToolCalls ? 'tool_calls' : 'stop',
-            },
-        ],
-        usage: {
-            prompt_tokens: tokensIn,
-            completion_tokens: tokensOut,
-            total_tokens: tokensIn + tokensOut,
+        ok: true,
+        request: {
+            messages: body.messages,
+            model: body.model,
+            stream: body.stream,
+            temperature: body.temperature,
+            max_tokens: body.max_tokens,
+            max_completion_tokens: body.max_completion_tokens,
+            user: body.user,
+            tools: body.tools,
         },
     };
 }
-export function formatCompletionChunk(id, model, delta, finishReason) {
-    return {
-        id,
-        object: 'chat.completion.chunk',
-        created: Math.floor(Date.now() / 1000),
-        model,
-        choices: [{ index: 0, delta, finish_reason: finishReason }],
-    };
-}
 export async function handleChatCompletion(manager, body, headers, res) {
-    // Validate before casting
-    if (!body.messages || !Array.isArray(body.messages) || body.messages.length === 0) {
-        res.writeHead(400, { 'Content-Type': 'application/json' });
+    // Cluster A step 4: typed boundary parser. Replaces the inline cast +
+    // validation block that previously lived here (~30 lines). Returns a
+    // discriminated union so the type system enforces "validate before use."
+    const parsed = parseRouteBody(body);
+    if (!parsed.ok) {
+        res.writeHead(parsed.status, { 'Content-Type': 'application/json' });
         res.end(JSON.stringify({
-            error: { message: 'messages is required and must be a non-empty array', type: 'invalid_request_error' },
-        }));
-        return;
-    }
-    // Safe cast: messages validated above, other fields are optional
-    const request = {
-        messages: body.messages,
-        model: body.model,
-        stream: body.stream,
-        temperature: body.temperature,
-        max_tokens: body.max_tokens,
-        user: body.user,
-        tools: body.tools,
-    };
-    // Validate max_tokens if provided
-    if (request.max_tokens !== undefined && (typeof request.max_tokens !== 'number' || request.max_tokens <= 0)) {
-        res.writeHead(400, { 'Content-Type': 'application/json' });
-        res.end(JSON.stringify({
-            error: { message: 'max_tokens must be a positive number', type: 'invalid_request_error' },
+            error: { message: parsed.error, type: 'invalid_request_error' },
         }));
         return;
     }
+    const request = parsed.request;
     const modelStr = request.model || OPENAI_COMPAT_DEFAULT_MODEL;
     const { engine, model: resolvedModel } = resolveEngineAndModel(modelStr);
     const sessionKey = resolveSessionKey(request, headers);
@@ -523,6 +130,19 @@ export async function handleChatCompletion(manager, body, headers, res) {
             // Note: noSessionPersistence (--no-session-persistence) is NOT set
             // because some CLI forks don't support this flag.
             skipPersistence: true,
+            // v0.7.4 EMERGENCY RESTORE: re-enable --include-partial-messages for
+            // openai-compat sessions. v0.6.0 made this opt-in (default OFF) for
+            // a 10-100× JSON overhead drop, but the engine never grew the
+            // corresponding case 'assistant' text-block handler to compensate.
+            // Result: when claude.exe doesn't emit incremental text_delta events
+            // (its non-partial mode), no onText fires, no SSE content chunks
+            // emit, and OpenClaw upstream rejects the turn as
+            // "incomplete terminal response (format)". Until the case 'assistant'
+            // text-block backstop in persistent-session.ts (v0.7.3) is verified
+            // firing, force partial-messages mode for the HTTP path so onText
+            // flows the way the rest of the pipeline expects. Cost: more NDJSON
+            // events per turn — fine compared to silent-broken-Savvy.
+            includePartialMessages: true,
         };
         // Phase 2.1 (CC_OPENCLAW_ALLOW_BUILTINS=1): when the env flag is set,
         // do NOT disable Claude CLI's built-in tools. Claude's WebFetch /
@@ -675,375 +295,10 @@ export async function handleChatCompletion(manager, body, headers, res) {
         manager.stopSession(sessionName).catch(() => { });
     }
 }
-// ─── Status Reporting ───────────────────────────────────────────────────────
-// Push tool/thinking status to an external webhook so a webchat status bar
-// can show what the CLI agent is doing. Best-effort fire-and-forget.
-/**
- * Optional status webhook — set `OPENAI_COMPAT_STATUS_URL` to an HTTP endpoint
- * that accepts `POST { state, activity, tool }`. The bridge will fire-and-forget
- * status updates when the CLI agent uses tools, so an external dashboard (e.g.
- * a webchat status bar) can show real-time progress.
- *
- * Example: `OPENAI_COMPAT_STATUS_URL=http://127.0.0.1:18795/my-app/agent-status`
- */
-function reportStatus(state, activity, tool) {
-    const url = getOpenaiCompatStatusUrl();
-    if (!url)
-        return;
-    const payload = JSON.stringify({ state, activity, tool: tool || null });
-    const req = http.request(url, {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(payload) },
-        timeout: 2000,
-    }, () => { });
-    req.on('error', () => { });
-    req.write(payload);
-    req.end();
-}
-function getToolDescription(toolName, toolInput) {
-    switch (toolName) {
-        case 'Bash':
-        case 'exec': {
-            const cmd = String(toolInput?.command || '');
-            return `Running: ${cmd.length > 50 ? cmd.slice(0, 50) + '...' : cmd}`;
-        }
-        case 'Read':
-        case 'read':
-            return `Reading: ${String(toolInput?.file_path || toolInput?.path || 'file')
-                .split('/')
-                .pop()}`;
-        case 'Write':
-        case 'write':
-            return `Writing: ${String(toolInput?.file_path || toolInput?.path || 'file')
-                .split('/')
-                .pop()}`;
-        case 'Edit':
-        case 'edit':
-            return `Editing: ${String(toolInput?.file_path || toolInput?.path || 'file')
-                .split('/')
-                .pop()}`;
-        case 'Glob':
-        case 'glob':
-            return `Searching files: ${String(toolInput?.pattern || '')}`;
-        case 'Grep':
-        case 'grep':
-            return `Searching content: ${String(toolInput?.pattern || '')}`;
-        case 'WebSearch':
-            return `Web search: ${String(toolInput?.query || '')}`;
-        case 'Agent':
-            return `Spawning sub-agent...`;
-        default:
-            return `Using tool: ${toolName}`;
-    }
-}
-// ─── Non-Streaming ───────────────────────────────────────────────────────────
-async function handleNonStreaming(manager, sessionName, model,
-// Phase 2 R4 wire-up: accepts native content-block arrays in tool-stream mode.
-userMessage, completionId, res, hasTools) {
-    try {
-        reportStatus('thinking', 'Processing request...');
-        // v0.7.1: accumulate thinking-block content when surfaceThinking is on.
-        // Default OFF for privacy — empty string means no `reasoning` field
-        // gets attached to the response.
-        const surfaceThinking = getSurfaceThinkingEnabled();
-        let thinkingBuffer = '';
-        const result = await manager.sendMessage(sessionName, userMessage, {
-            onEvent: (event) => {
-                if (event.type === 'tool_use' && event.tool?.name) {
-                    const desc = getToolDescription(event.tool.name, event.tool.input);
-                    reportStatus('working', desc, event.tool.name);
-                    // Pillar B v0.4.3: trajectory tool_use event. Emit tool name and
-                    // input-arg keys (not values — keys leak no sensitive content
-                    // while still letting offline analysis cluster tool-call shapes).
-                    emitTrajectory('tool_use', {
-                        name: event.tool.name,
-                        inputKeys: event.tool.input ? Object.keys(event.tool.input) : [],
-                    }, sessionName);
-                }
-                else if (event.type === 'tool_result') {
-                    emitTrajectory('tool_result', {}, sessionName);
-                }
-            },
-            // v0.7.1: when surfaceThinking is on, accumulate extended-thinking text
-            // for the `reasoning` field on the OpenAI response. Subscribing to the
-            // callback always (cheap closure cost ~ none); only buffering when
-            // the env flag is set so the privacy-default-OFF promise holds.
-            onThinking: surfaceThinking
-                ? (text) => {
-                    thinkingBuffer += text;
-                }
-                : undefined,
-        });
-        reportStatus('idle', 'Ready');
-        let tokensIn = 0;
-        let tokensOut = 0;
-        try {
-            const status = manager.getStatus(sessionName);
-            tokensIn = status.stats.tokensIn;
-            tokensOut = status.stats.tokensOut;
-        }
-        catch {
-            /* stats unavailable */
-        }
-        // v0.7.1: emit thinking_block trajectory event with token-count metadata
-        // only (never raw text). Fires when buffer is non-empty regardless of
-        // whether the response surfaces it — so observability is independent
-        // of the user-visible flag.
-        if (thinkingBuffer.length > 0) {
-            emitTrajectory('thinking_block', {
-                excerpt_chars: thinkingBuffer.length,
-                tokens_approx: Math.ceil(thinkingBuffer.length / 4),
-            }, sessionName);
-        }
-        // Parse tool_calls from response text when caller provided tools
-        if (hasTools) {
-            const parsed = parseToolCallsFromText(result.output);
-            const response = formatCompletionResponse(completionId, model, parsed.textContent ?? '', tokensIn, tokensOut, parsed.toolCalls.length > 0 ? parsed.toolCalls : undefined, surfaceThinking ? thinkingBuffer : undefined);
-            res.writeHead(200, { 'Content-Type': 'application/json' });
-            res.end(JSON.stringify(response));
-        }
-        else {
-            const response = formatCompletionResponse(completionId, model, result.output, tokensIn, tokensOut, undefined, surfaceThinking ? thinkingBuffer : undefined);
-            res.writeHead(200, { 'Content-Type': 'application/json' });
-            res.end(JSON.stringify(response));
-        }
-    }
-    catch (err) {
-        reportStatus('idle', 'Request failed');
-        // v0.4.3: route through formatError for errors_total + trajectory error.
-        formatError(err, { code: ERROR_CODES.SESSION_ERROR, sessionId: sessionName, details: { phase: 'handleNonStreaming' } });
-        res.writeHead(500, { 'Content-Type': 'application/json' });
-        res.end(JSON.stringify({ error: { message: err.message, type: 'server_error' } }));
-    }
-}
-// ─── Streaming ───────────────────────────────────────────────────────────────
-async function handleStreaming(manager, sessionName, model,
-// Phase 2 R4 wire-up: accepts native content-block arrays in tool-stream mode.
-userMessage, completionId, res, hasTools) {
-    res.writeHead(200, {
-        'Content-Type': 'text/event-stream',
-        'Cache-Control': 'no-cache',
-        Connection: 'keep-alive',
-        'X-Accel-Buffering': 'no',
-    });
-    let clientDisconnected = false;
-    res.on('close', () => {
-        clientDisconnected = true;
-    });
-    const writeSSE = (data) => {
-        if (!clientDisconnected) {
-            try {
-                res.write(`data: ${data}\n\n`);
-            }
-            catch {
-                clientDisconnected = true;
-            }
-        }
-    };
-    // Initial chunk with role
-    writeSSE(JSON.stringify(formatCompletionChunk(completionId, model, { role: 'assistant' }, null)));
-    // SSE keepalive heartbeat
-    const heartbeatTimer = setInterval(() => {
-        if (!clientDisconnected) {
-            try {
-                res.write(': keepalive\n\n');
-            }
-            catch {
-                clientDisconnected = true;
-            }
-        }
-    }, 30_000);
-    // Phase 2 R1+R2: in tool-stream mode, bridge session-manager's pre-parsed
-    // tool_use events directly to OpenAI tool_calls SSE deltas. Skips the
-    // legacy "buffer text + regex-parse <tool_calls> XML" path entirely.
-    // Per memory project_cc_openclaw_session_manager_preparses.md:
-    // session-manager has already stripped Claude CLI's NDJSON envelope, so
-    // we don't need cli-stream-parser here — onEvent is the parser output.
-    const useToolStream = isToolStreamMode() && hasTools;
-    // When tools are present (legacy mode), buffer the full response to parse
-    // for <tool_calls> XML. Without tools — or in tool-stream mode — stream
-    // text chunks directly for low latency.
-    let bufferedText = '';
-    let toolCallsEmitted = 0;
-    try {
-        reportStatus('thinking', 'Processing request...');
-        await manager.sendMessage(sessionName, userMessage, {
-            onChunk: (chunk) => {
-                if (useToolStream || !hasTools) {
-                    // Stream text deltas immediately. Tool-stream mode interleaves
-                    // text and tool_calls chunks naturally — Claude CLI emits text
-                    // between tool_use blocks, OpenClaw client handles that fine.
-                    writeSSE(JSON.stringify(formatCompletionChunk(completionId, model, { content: chunk }, null)));
-                }
-                else {
-                    // Legacy hasTools mode: buffer for XML <tool_calls> parsing post-stream.
-                    bufferedText += chunk;
-                }
-            },
-            onEvent: (event) => {
-                if (event.type === 'tool_result') {
-                    // Pillar B v0.4.3: streaming tool_result trajectory event.
-                    emitTrajectory('tool_result', {}, sessionName);
-                    return;
-                }
-                if (event.type === 'tool_use' && event.tool?.name) {
-                    reportStatus('working', getToolDescription(event.tool.name, event.tool.input), event.tool.name);
-                    // Pillar B v0.4.3: streaming tool_use trajectory event. Same
-                    // privacy-preserving inputKeys-only payload as handleNonStreaming.
-                    emitTrajectory('tool_use', {
-                        name: event.tool.name,
-                        inputKeys: event.tool.input ? Object.keys(event.tool.input) : [],
-                    }, sessionName);
-                    if (useToolStream) {
-                        // R1+R2 bridge: session-manager event → OpenAI tool_calls SSE.
-                        // Emit two chunks per tool_use (per OpenAI streaming spec):
-                        //   1. id + name + empty arguments
-                        //   2. arguments (JSON-stringified input)
-                        const toolUseId = event.tool.id ||
-                            `toolu_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
-                        const idx = toolCallsEmitted;
-                        const argsJson = event.tool.input != null ? JSON.stringify(event.tool.input) : '{}';
-                        const startChunk = {
-                            id: completionId,
-                            object: 'chat.completion.chunk',
-                            created: Math.floor(Date.now() / 1000),
-                            model,
-                            choices: [
-                                {
-                                    index: 0,
-                                    delta: {
-                                        tool_calls: [
-                                            {
-                                                index: idx,
-                                                id: toolUseId,
-                                                type: 'function',
-                                                function: { name: event.tool.name, arguments: '' },
-                                            },
-                                        ],
-                                    },
-                                    finish_reason: null,
-                                },
-                            ],
-                        };
-                        const argsChunk = {
-                            id: completionId,
-                            object: 'chat.completion.chunk',
-                            created: Math.floor(Date.now() / 1000),
-                            model,
-                            choices: [
-                                {
-                                    index: 0,
-                                    delta: {
-                                        tool_calls: [
-                                            {
-                                                index: idx,
-                                                function: { arguments: argsJson },
-                                            },
-                                        ],
-                                    },
-                                    finish_reason: null,
-                                },
-                            ],
-                        };
-                        writeSSE(JSON.stringify(startChunk));
-                        writeSSE(JSON.stringify(argsChunk));
-                        toolCallsEmitted += 1;
-                    }
-                }
-            },
-        });
-        reportStatus('idle', 'Ready');
-        // Get token usage for final chunk
-        let usage;
-        try {
-            const status = manager.getStatus(sessionName);
-            usage = {
-                prompt_tokens: status.stats.tokensIn,
-                completion_tokens: status.stats.tokensOut,
-                total_tokens: status.stats.tokensIn + status.stats.tokensOut,
-            };
-        }
-        catch {
-            /* best effort */
-        }
-        if (useToolStream) {
-            // R1+R2: tool-stream mode — text + tool_calls already streamed inline.
-            // Just emit the final chunk with the right finish_reason.
-            const finishReason = toolCallsEmitted > 0 ? 'tool_calls' : 'stop';
-            const finalChunk = formatCompletionChunk(completionId, model, {}, finishReason);
-            if (usage)
-                finalChunk.usage = usage;
-            writeSSE(JSON.stringify(finalChunk));
-        }
-        else if (hasTools && bufferedText) {
-            const parsed = parseToolCallsFromText(bufferedText);
-            if (parsed.toolCalls.length > 0) {
-                // Emit text content if any
-                if (parsed.textContent) {
-                    writeSSE(JSON.stringify(formatCompletionChunk(completionId, model, { content: parsed.textContent }, null)));
-                }
-                // Emit tool_call chunks
-                for (let i = 0; i < parsed.toolCalls.length; i++) {
-                    const tc = parsed.toolCalls[i];
-                    writeSSE(JSON.stringify({
-                        id: completionId,
-                        object: 'chat.completion.chunk',
-                        created: Math.floor(Date.now() / 1000),
-                        model,
-                        choices: [
-                            {
-                                index: 0,
-                                delta: {
-                                    tool_calls: [
-                                        {
-                                            index: i,
-                                            id: tc.id,
-                                            type: 'function',
-                                            function: { name: tc.function.name, arguments: tc.function.arguments },
-                                        },
-                                    ],
-                                },
-                                finish_reason: null,
-                            },
-                        ],
-                    }));
-                }
-                // Final chunk with tool_calls finish reason
-                const finalChunk = formatCompletionChunk(completionId, model, {}, 'tool_calls');
-                if (usage)
-                    finalChunk.usage = usage;
-                writeSSE(JSON.stringify(finalChunk));
-            }
-            else {
-                // No tool calls — emit buffered text as content
-                writeSSE(JSON.stringify(formatCompletionChunk(completionId, model, { content: bufferedText }, null)));
-                const finalChunk = formatCompletionChunk(completionId, model, {}, 'stop');
-                if (usage)
-                    finalChunk.usage = usage;
-                writeSSE(JSON.stringify(finalChunk));
-            }
-        }
-        else {
-            // No tools — standard finish
-            const finalChunk = formatCompletionChunk(completionId, model, {}, 'stop');
-            if (usage)
-                finalChunk.usage = usage;
-            writeSSE(JSON.stringify(finalChunk));
-        }
-        writeSSE('[DONE]');
-    }
-    catch (err) {
-        reportStatus('idle', 'Request failed');
-        // v0.4.3: route through formatError for errors_total + trajectory error.
-        formatError(err, { code: ERROR_CODES.SESSION_ERROR, sessionId: sessionName, details: { phase: 'handleStreaming' } });
-        writeSSE(JSON.stringify({ error: { message: err.message, type: 'server_error' } }));
-        writeSSE('[DONE]');
-    }
-    finally {
-        clearInterval(heartbeatTimer);
-    }
-    if (!clientDisconnected) {
-        res.end();
-    }
-}
+// reportStatus + getToolDescription moved to status-reporter.ts
+// (Cluster B Phase 2 Module F). Re-exported above for backward compat.
+// handleNonStreaming moved to non-streaming-handler.ts
+// (Cluster B Phase 2 Module G). Re-exported above for backward compat.
+// handleStreaming moved to streaming-handler.ts
+// (Cluster B Phase 2 Module H). Re-exported above for backward compat.
 //# sourceMappingURL=openai-compat.js.map