npm - @space3-npm/cybersoul-client - Versions diffs - 1.0.6 → 1.0.8 - Mend

@space3-npm/cybersoul-client 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/client.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { CyberSoulClientConfig, InteractParams, DispatcherIntent, InteractResponse, CharacterState, ImageGenerationParams, VoiceGenerationParams, CoreMemory } from "./types.js";
+import { CyberSoulClientConfig, InteractParams, DispatcherIntent, InteractResponse, CharacterState, CoreMemory } from "./types.js";
 export declare class CyberSoulClient {
     private config;
     private llm;
@@ -7,6 +7,9 @@ export declare class CyberSoulClient {
      * Internal wrapper for fetch that automatically injects the backend URL and Character Auth token.
      */
     private apiFetch;
+    private buildStateContextPrompt;
+    private getImageSchemaParams;
+    private getVoiceSchemaParams;
     /**
      * Fetches the current dynamic context and daily state.
      */
@@ -18,13 +21,19 @@ export declare class CyberSoulClient {
     /**
      * Manually generate an image of the character outside of chat flow.
      */
-    generateImage(params: ImageGenerationParams): Promise<{
+    generateImage(params: {
+        sceneDescription: string;
+        interactParams?: InteractParams;
+    }): Promise<{
         imageUrl: string;
     }>;
     /**
      * Manually synthesize voice audio outside of chat flow.
      */
-    generateVoice(params: VoiceGenerationParams): Promise<{
+    generateVoice(params: {
+        text: string;
+        interactParams?: InteractParams;
+    }): Promise<{
         audioUrl: string;
         durationSec?: number;
     }>;

package/dist/client.js CHANGED Viewed

@@ -26,6 +26,56 @@ export class CyberSoulClient {
         };
         return fetch(url, { ...options, headers });
     }
+    buildStateContextPrompt(state, localContext) {
+        const contextParts = [];
+        if (state.active_event) {
+            contextParts.push(`- Active Event: ${state.active_event.title} (${state.active_event.narrative_context})`);
+        }
+        if (state.next_event) {
+            contextParts.push(`- Next Event: ${state.next_event.title} at ${state.next_event.start_time} (in ${state.next_event.time_until_mins} mins)`);
+        }
+        if (state.active_wardrobe) {
+            contextParts.push(`- Wardrobe: ${state.active_wardrobe.name || state.active_wardrobe.id || "Current"}`);
+        }
+        const dyn = state.dynamic_context || {};
+        const stage = state.relationship_stage || "NEUTRAL";
+        contextParts.push(`- Relationship Info (Stage: ${stage}): You call the user '${dyn.userNickname || "User"}'. The user calls you '${dyn.agentNickname || "Agent"}'. Mood: ${dyn.talkingStyle || "Normal"}. Temp (0-100): ${dyn.temperature || 50}.`);
+        if (localContext) {
+            contextParts.push(`- Additional Context: ${localContext}`);
+        }
+        const scenarioContext = contextParts.join("\n");
+        return `You are ${state.name}, acting as a virtual companion.
+  Demographics: Age ${state.age || "unknown"}, Gender ${state.gender || "unknown"}, Occupation ${state.occupation || "unknown"}, Hobby ${state.hobby || "unknown"}
+Current time: ${new Date(state.current_time || Date.now()).toLocaleString("zh-CN", { timeZone: "Asia/Shanghai" })}
+Current context/schedule: ${scenarioContext}
+Relationship stage: ${state.relationship_stage}
+Personality Traits: ${state.personality_traits || "None"}
+Interaction Boundaries: ${state.interaction_boundaries || "None"}
+Communication Style: ${state.communication_style || "None"}
+EMOTIONAL INERTIA RULES:
+1. You must act strictly according to the current Relationship Stage (${state.relationship_stage || "NEUTRAL"}).
+2. If the user expresses sudden high affection (e.g. "I miss you") but your stage is COLD, you MUST react with skepticism, coldness, or appropriately distanced deflection. Do NOT instantly become warm.
+3. Emotional mood changes must be slow. The 'temperatureDelta' should rarely exceed +/- 5 points per turn.`;
+    }
+    getImageSchemaParams() {
+        return `"imageParams": {
+    "mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
+    "full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH.",
+    "expression": "seductive | cute | happy | sleepy | dazed | pleased | default (Strictly choose ONE from this exact list. DO NOT invent new words like 'shy'.)",
+    "condition": "normal | sweaty | wet | messy | oily (Strictly choose ONE from this exact list.)",
+    "view_angle": "front | side | high_angle | from_below | boyfriend_view | selfie | mirror (Strictly choose ONE from this exact list.)",
+    "exposure": "normal | cleavage | see_through | half_naked | naked | intimate (Strictly choose ONE from this exact list.)",
+    "pose": "e.g., sitting on bed, leaning forward (ENGLISH ONLY)",
+    "scene": "e.g., cozy bedroom, morning light (ENGLISH ONLY)",
+    "outfit": "auto | ondemand",
+    "ondemandOutfit": "e.g., silk robe (ENGLISH ONLY)",
+    "style": "e.g., photorealistic (ENGLISH ONLY)"
+  }`;
+    }
+    getVoiceSchemaParams() {
+        return `"voiceArgs": { "style_instruction": "How the line should be spoken (Qwen3 format)", "emotion": "happy | sad | angry | fearful | disgusted | surprised | calm | fluent | whisper (Strictly choose ONE from this exact list.)" }`;
+    }
     /**
      * Fetches the current dynamic context and daily state.
      */
@@ -53,13 +103,72 @@ export class CyberSoulClient {
      * Manually generate an image of the character outside of chat flow.
      */
     async generateImage(params) {
-        return this.generatePrimitive("image", params);
+        let imageParams = {};
+        const state = await this.getState();
+        const prompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
+You are an AI image prompt director. Analyze the scene description according to the character's relationship stage and emotional inertia to determine the best image generation parameters.
+Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly matching this schema:
+{
+  ${this.getImageSchemaParams()}
+}`;
+        const promptMessages = [
+            { role: "system", content: prompt },
+            ...(params.interactParams?.history || []),
+            {
+                role: "user",
+                content: `Scene Description: "${params.sceneDescription}"\n\n**CRITICAL REMINDER**: You MUST output your final response exactly in the JSON format specified in the system prompt. DO NOT output plain text dialogue directly. For 'imageParams', ALL values MUST be in ENGLISH ONLY without exception, and you MUST use the exact English enum strings provided.`,
+            },
+        ];
+        const llmRes = await this.llm.generate(promptMessages, 500, 0.4);
+        console.log("[CyberSoulClient ImageGen] Raw LLM Response:", llmRes);
+        try {
+            const parsedImageArgs = robustJsonParse(llmRes, "generateImage args fallback");
+            imageParams = parsedImageArgs.imageParams || parsedImageArgs;
+        }
+        catch (e) {
+            imageParams = { mode: "full-prompt", full_prompt: params.sceneDescription }; // fallback to basic prompt
+        }
+        const res = await this.generatePrimitive("image", imageParams);
+        return {
+            imageUrl: res.image_url,
+        };
     }
     /**
      * Manually synthesize voice audio outside of chat flow.
      */
     async generateVoice(params) {
-        return this.generatePrimitive("voice", params);
+        let dynamicArgs = {};
+        const state = await this.getState();
+        const prompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
+You are a voice acting director. Analyze the text according to the character's relationship stage and emotional inertia to determine the single best emotion and a style instruction for TTS.
+Allowed emotions: "happy", "sad", "angry", "fearful", "disgusted", "surprised", "calm", "fluent", "whisper".
+Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly this format: {"emotion": "chosen_emotion", "style_instruction": "How the line should be spoken"}`;
+        const promptMessages = [
+            { role: "system", content: prompt },
+            ...(params.interactParams?.history || []),
+            {
+                role: "user",
+                content: `Text: "${params.text}"\n\n**CRITICAL REMINDER**: You MUST output your final response exactly in the JSON format specified in the system prompt. DO NOT output plain text dialogue directly.`,
+            },
+        ];
+        const llmRes = await this.llm.generate(promptMessages, 300, 0.3);
+        console.log("[CyberSoulClient VoiceGen] Raw LLM Response:", llmRes);
+        try {
+            dynamicArgs = robustJsonParse(llmRes, "generateVoice args fallback");
+        }
+        catch (e) {
+            dynamicArgs = {}; // fallback to empty
+        }
+        const res = await this.generatePrimitive("voice", {
+            text: params.text,
+            dynamicArgs,
+        });
+        return {
+            audioUrl: res.audio_url,
+            durationSec: res.duration_sec,
+        };
     }
     /**
      * Gift a new outfit to the character's wardrobe inventory.
@@ -143,36 +252,7 @@ export class CyberSoulClient {
             const types = this.normalizeRequestTypes(params.requestTypes);
             const isAuto = types.includes(InteractRequestType.AUTO);
             // Combine state info into a clean descriptive context
-            const contextParts = [];
-            if (state.active_event) {
-                contextParts.push(`- Active Event: ${state.active_event.title} (${state.active_event.narrative_context})`);
-            }
-            if (state.next_event) {
-                contextParts.push(`- Next Event: ${state.next_event.title} at ${state.next_event.start_time} (in ${state.next_event.time_until_mins} mins)`);
-            }
-            if (state.active_wardrobe) {
-                contextParts.push(`- Wardrobe: ${state.active_wardrobe.name || state.active_wardrobe.id || "Current"}`);
-            }
-            const dyn = state.dynamic_context || {};
-            const stage = state.relationship_stage || "NEUTRAL";
-            contextParts.push(`- Relationship Info (Stage: ${stage}): You call the user '${dyn.userNickname || "User"}'. The user calls you '${dyn.agentNickname || "Agent"}'. Mood: ${dyn.talkingStyle || "Normal"}. Temp (0-100): ${dyn.temperature || 50}.`);
-            if (params.localContext) {
-                contextParts.push(`- Additional Context: ${params.localContext}`);
-            }
-            const scenarioContext = contextParts.join("\n");
-            const systemPrompt = `You are ${state.name}, acting as a virtual companion.
-  Demographics: Age ${state.age || "unknown"}, Gender ${state.gender || "unknown"}, Occupation ${state.occupation || "unknown"}, Hobby ${state.hobby || "unknown"}
-Current time: ${new Date(state.current_time).toLocaleString("zh-CN", { timeZone: "Asia/Shanghai" })}
-Current context/schedule: ${scenarioContext}
-Relationship stage: ${state.relationship_stage}
-Personality Traits: ${state.personality_traits || "None"}
-Interaction Boundaries: ${state.interaction_boundaries || "None"}
-Communication Style: ${state.communication_style || "None"}
-EMOTIONAL INERTIA RULES:
-1. You must act strictly according to the current Relationship Stage (${state.relationship_stage || "NEUTRAL"}).
-2. If the user expresses sudden high affection (e.g. "I miss you") but your stage is COLD, you MUST react with skepticism, coldness, or appropriately distanced deflection. Do NOT instantly become warm.
-3. Emotional mood changes must be slow. The 'temperatureDelta' should rarely exceed +/- 5 points per turn.
+            const systemPrompt = `${this.buildStateContextPrompt(state, params.localContext)}
 The user has sent a message. You must evaluate the context and the user's message, and return a JSON object (no markdown formatting) that dictates the character's multi-modal response.
@@ -188,20 +268,8 @@ Output JSON Schema:
 {
   "textResponse": "The direct spoken dialogue in Chinese",
   "stateUpdate": { "temperatureDelta": "+1 to -1", "userNickname": "What you now call the user", "agentNickname": "What the user calls you", "talkingStyle": "Current mood/style of talking" },
-  "imageParams": {
-    "mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
-    "full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH.",
-    "expression": "seductive | cute | happy | sleepy | dazed | pleased | default (Strictly choose ONE from this exact list. DO NOT invent new words like 'shy'.)",
-    "condition": "normal | sweaty | wet | messy | oily (Strictly choose ONE from this exact list.)",
-    "view_angle": "front | side | high_angle | from_below | boyfriend_view | selfie | mirror (Strictly choose ONE from this exact list.)",
-    "exposure": "normal | cleavage | see_through | half_naked | naked | intimate (Strictly choose ONE from this exact list.)",
-    "pose": "e.g., sitting on bed, leaning forward (ENGLISH ONLY)",
-    "scene": "e.g., cozy bedroom, morning light (ENGLISH ONLY)",
-    "outfit": "auto | ondemand",
-    "ondemandOutfit": "e.g., silk robe (ENGLISH ONLY)",
-    "style": "e.g., photorealistic (ENGLISH ONLY)"
-  },
-  "voiceArgs": { "style_instruction": "How the line should be spoken (Qwen3 format)", "emotion": "e.g., happy (MiniMax format, MUST BE ENGLISH, no Chinese)" }
+  ${this.getImageSchemaParams()},
+  ${this.getVoiceSchemaParams()}
 }
 Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their values to null instead of omitting the keys completely (e.g., "imageParams": null). Output MUST be ONLY valid JSON with no markdown block wrappers. CRITICAL: Ensure your JSON has exactly one root object \`{\` and ends with exactly one \`}\` without any trailing garbage or extra brackets.`;
             const promptMessages = [
@@ -244,23 +312,16 @@ Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their
             const shouldGenerateImage = types.includes(InteractRequestType.IMAGE) ||
                 (isAuto && !!parsedIntent.imageParams);
             if (shouldGenerateImage) {
-                mediaTasks.push(this.generatePrimitive("image", {
-                    ...parsedIntent.imageParams,
-                    ...(params.imageOverrides || {}),
-                }).then((res) => {
+                mediaTasks.push(this.generatePrimitive("image", parsedIntent.imageParams).then((res) => {
                     finalImageUrl = res.image_url;
                 }));
             }
             const shouldGenerateVoice = types.includes(InteractRequestType.VOICE) ||
                 (isAuto && !!parsedIntent.voiceArgs);
             if (shouldGenerateVoice) {
-                const dynamicArgs = {
-                    ...(parsedIntent.voiceArgs || {}),
-                    ...(params.voiceOverrides || {}),
-                };
                 mediaTasks.push(this.generatePrimitive("voice", {
                     text: parsedIntent.textResponse,
-                    dynamicArgs,
+                    dynamicArgs: parsedIntent.voiceArgs || {},
                 }).then((res) => {
                     finalAudioUrl = res.audio_url;
                     finalDurationSec = res.duration_sec;

package/dist/types.d.ts CHANGED Viewed

@@ -22,8 +22,6 @@ export interface InteractParams {
         role: string;
         content: string;
     }[];
-    imageOverrides?: Partial<ImageGenerationParams>;
-    voiceOverrides?: Partial<VoiceGenerationParams['dynamicArgs']>;
     onTextReady?: (textResponse: string) => void;
 }
 export interface InteractResponse {
@@ -75,26 +73,3 @@ export interface BaseLLMProvider {
         content: string;
     }[], maxTokens?: number, temperature?: number): Promise<string>;
 }
-export interface ImageGenerationParams {
-    mode: 'structured' | 'full-prompt';
-    full_prompt?: string;
-    expression?: string;
-    condition?: string;
-    pose?: string;
-    view_angle?: string;
-    exposure?: string;
-    outfit?: string;
-    scene?: string;
-    ondemandOutfit?: string;
-    style?: string;
-    triggerWord?: string;
-    appearanceBody?: string;
-    appearanceFace?: string;
-}
-export interface VoiceGenerationParams {
-    text: string;
-    dynamicArgs: {
-        style_instruction?: string;
-        emotion?: string;
-    };
-}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@space3-npm/cybersoul-client",
-  "version": "1.0.6",
+  "version": "1.0.8",
   "type": "module",
   "main": "dist/index.js",
   "module": "dist/index.js",