npm - @space3-npm/cybersoul-client - Versions diffs - 1.2.3 → 1.2.5 - Mend

@space3-npm/cybersoul-client 1.2.3 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/client.js +79 -33
package/dist/types.d.ts +2 -1
package/dist/utils/json.utils.d.ts +1 -1
package/dist/utils/json.utils.js +45 -1
package/package.json +1 -1

package/dist/client.js CHANGED Viewed

@@ -142,15 +142,22 @@ export class CyberSoulClient {
         }).catch((e) => console.error("Failed to update dynamic context", e)); // non-blocking error handler
     }
     normalizeRequestTypes(requestTypes) {
-        if (!requestTypes || requestTypes.length === 0) {
-            return [InteractRequestType.AUTO];
+        let normalized = requestTypes;
+        if (!normalized || normalized.length === 0) {
+            normalized = [InteractRequestType.AUTO, InteractRequestType.TEXT];
+        }
+        else {
+            normalized = [...normalized];
+        }
+        if (!normalized.includes(InteractRequestType.TEXT)) {
+            normalized.push(InteractRequestType.TEXT);
         }
         const validRequestTypes = new Set(Object.values(InteractRequestType));
-        const invalidRequestTypes = requestTypes.filter((type) => !validRequestTypes.has(type));
+        const invalidRequestTypes = normalized.filter((type) => !validRequestTypes.has(type));
         if (invalidRequestTypes.length > 0) {
             throw new Error(`Invalid requestTypes: ${invalidRequestTypes.join(", ")}. Allowed values: ${Object.values(InteractRequestType).join(", ")}`);
         }
-        return requestTypes;
+        return normalized;
     }
     buildStateContextPrompt(state, localContext) {
         const dyn = state.dynamic_context || {};
@@ -178,7 +185,7 @@ Current time: ${new Date(currentTimeMs).toLocaleString("zh-CN", { timeZone: "Asi
             let isOutdated = false;
             if (dyn.lastInteractionAt) {
                 const elapsedHours = (currentTimeMs - new Date(dyn.lastInteractionAt).getTime()) / (1000 * 60 * 60);
-                if (elapsedHours > 2) {
+                if (elapsedHours > 1) {
                     isOutdated = true;
                     contextParts.push(`${lastKnownSceneLine}\n[CRITICAL SCENE SHIFT]: It has been ${elapsedHours.toFixed(1)} hours since the last discussion. The 'Last Known Scene' is now strictly OUTDATED. You MUST abandon the previous scene context entirely and transition to a new scene appropriate for the 'Current time' and 'Active Event'. DO NOT continue the old actions or environment!`);
                 }
@@ -190,15 +197,15 @@ Current time: ${new Date(currentTimeMs).toLocaleString("zh-CN", { timeZone: "Asi
         if (state.active_event) {
             contextParts.push(`Active Event: ${state.active_event.title} (${state.active_event.narrative_context})`);
         }
-        if (state.next_event) {
+        /*     if (localContext) {
+              contextParts.push(`Additional Context: ${localContext}`);
+            }
+         */ if (state.next_event) {
             contextParts.push(`Next Event: ${state.next_event.title} at ${state.next_event.start_time} (in ${state.next_event.time_until_mins} mins)`);
         }
         if (state.active_wardrobe) {
             contextParts.push(`Wardrobe: ${state.active_wardrobe.name || state.active_wardrobe.id || "Current"}`);
         }
-        if (localContext) {
-            contextParts.push(`Additional Context: ${localContext}`);
-        }
         if (state.core_memory) {
             let memoryLines = ["[CORE MEMORY]"];
             const mem = state.core_memory;
@@ -226,7 +233,8 @@ Occupation: ${basicInfo?.occupation || "Unknown"}
 Age/Gender: ${basicInfo?.age || "Unknown"} / ${basicInfo?.gender || "Unknown"}
 Comm Style: ${psychological?.communicationStyle || "Unknown"}
 Hobbies: ${(psychological?.hobbies || []).join(", ") || "Unknown"}
-Traits/Boundaries: ${(psychological?.traits || []).join(", ") || "Unknown"} / ${(psychological?.boundaries || []).join(", ") || "Unknown"}`);
+Traits/Boundaries: ${(psychological?.traits || []).join(", ") || "Unknown"} / ${(psychological?.boundaries || []).join(", ") || "Unknown"}
+Preferences/Habits: ${(psychological?.preferences || []).join(", ") || "Unknown"}`);
             // CURIOSITY DRIVE: Find what's missing, but ONLY IF we are on generally warm speaking terms
             // Paradox avoidance: A cold/angry character shouldn't enthusiastically fish for hobbies.
             if (temperature >= 40 && stage !== "COLD" && stage !== "STRANGER") {
@@ -291,7 +299,9 @@ ${scenarioContext}
         }
         return undefined;
     }
-    getImageSchemaParams() {
+    getImageSchemaParams(allowed) {
+        if (!allowed)
+            return `"imageParams": null`;
         return `"imageParams": {
     "mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
     "full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH. CRITICAL: MUST use a strict first-person perspective exclusively from the USER's eyes. DO NOT describe the user (e.g., 'a man', 'the driver') as visible in the scene because the camera IS the user. Start with 'POV: '. Describe ONLY the character looking back at the camera and their immediate surroundings. MUST align with the character's current Active exposure state or Wardrobe depends on the scene. Explicitly describe the character's exact clothing (or specify naked/half-naked if applicable).",
@@ -334,7 +344,9 @@ ${scenarioContext}
      * Returns the JSON schema snippet for voiceArgs to embed in the LLM output schema.
      * Built from dynamic_params when available, otherwise falls back to static defaults.
      */
-    getVoiceSchemaFromState(state) {
+    getVoiceSchemaFromState(state, allowed) {
+        if (!allowed)
+            return `"voiceArgs": null`;
         const dynamicParams = state.voice_model?.dynamic_params;
         if (dynamicParams && dynamicParams.length > 0) {
             return this.buildVoiceSchemaFromDynamicParams(dynamicParams);
@@ -390,6 +402,43 @@ ${scenarioContext}
             // 2. Build local Prompt
             const types = this.normalizeRequestTypes(params.requestTypes);
             const isAuto = types.includes(InteractRequestType.AUTO);
+            const requestedOthers = types.filter((t) => t !== InteractRequestType.AUTO && t !== InteractRequestType.TEXT);
+            let modalitiesInstruction = "";
+            if (isAuto) {
+                modalitiesInstruction = `Analyze the user's message and optionally decide to use allowed modalities: ${requestedOthers.join(", ") || "none"}.
+  - 'textResponse' is ALWAYS REQUIRED.
+  - The modalities you are ALLOWED to dynamically include: ${requestedOthers.length > 0 ? requestedOthers.join(", ") : "None (Only text is allowed)"}. Do not include other modalities.`;
+                if (requestedOthers.includes(InteractRequestType.IMAGE)) {
+                    modalitiesInstruction += `\n  - Include 'imageParams' for visual/photo requests or key visual moments during active events; explicitly describe current clothing/exposure in image fields.`;
+                }
+                else {
+                    modalitiesInstruction += `\n  - ALWAYS set 'imageParams' to null. If the user explicitly asks for a picture, FIRMLY decline naturally in your 'textResponse' (e.g., say you absolutely cannot right now). NEVER pretend to send one, and NEVER give in no matter how many times they ask.`;
+                }
+                if (requestedOthers.includes(InteractRequestType.VOICE)) {
+                    modalitiesInstruction += `\n  - Include 'voiceArgs' ONLY if the complicated tone/emotion is hard to express via pure text, or if the user explicitly requests to hear your voice. Otherwise, set it to null.`;
+                }
+                else {
+                    modalitiesInstruction += `\n  - ALWAYS set 'voiceArgs' to null.`;
+                }
+            }
+            else {
+                modalitiesInstruction = `You MUST return the requested modalities: ${requestedOthers.join(", ") || "only text"}.
+  - 'textResponse' is ALWAYS REQUIRED.`;
+                if (requestedOthers.includes(InteractRequestType.IMAGE)) {
+                    modalitiesInstruction += `\n  - 'imageParams' is REQUIRED. Include it and explicitly describe current clothing/exposure in image fields.`;
+                }
+                else {
+                    modalitiesInstruction += `\n  - ALWAYS set 'imageParams' to null. If the user explicitly asks for a picture, FIRMLY decline naturally in your 'textResponse' (e.g., say you absolutely cannot right now). NEVER pretend to send one, and NEVER give in no matter how many times they ask.`;
+                }
+                if (requestedOthers.includes(InteractRequestType.VOICE)) {
+                    modalitiesInstruction += `\n  - 'voiceArgs' is REQUIRED. Include it.`;
+                }
+                else {
+                    modalitiesInstruction += `\n  - ALWAYS set 'voiceArgs' to null.`;
+                }
+            }
+            modalitiesInstruction += `\n  - Include 'triggerEvent' only if the VERY LAST USER MESSAGE proposes a new activity/hangout; ignore older history.
+  - Outfit acquisition (VERY LAST USER MESSAGE only): set giftOutfit for gift/buy/add-clothes intent; otherwise null. giftOutfit format: { "descriptionText": "short outfit description" }.`;
             // Combine state info into a clean descriptive context
             const systemPrompt = `${this.buildStateContextPrompt(state, params.localContext)}
 Available Wardrobe Outfits (For event triggers):
@@ -397,14 +446,7 @@ ${availableOutfits}
 The user has sent a message. You must evaluate the context and the user's message, and return a JSON object (no markdown formatting) that dictates the character's multi-modal response.
-${isAuto
-                ? `Analyze the user's message and decide response modalities (text, image, voice).
-  - Always include 'textResponse'.
-  - Include 'imageParams' for visual/photo requests or key visual moments during active events; explicitly describe current clothing/exposure in image fields.
-  - Include 'voiceArgs' ONLY if the complicated tone/emotion is hard to express via pure text, or if the user explicitly requests to hear your voice. Otherwise, set it to null.
-  - Include 'triggerEvent' only if the VERY LAST USER MESSAGE proposes a new activity/hangout; ignore older history.
-  - Outfit acquisition (VERY LAST USER MESSAGE only): set giftOutfit for gift/buy/add-clothes intent; otherwise null. giftOutfit format: { "descriptionText": "short outfit description" }.`
-                : `Requested types to fulfill: ${types.join(", ")}`}
+${modalitiesInstruction}
 Every turn adjusts trust: positive +1, negative -1, neutral 0. Always include 'stateUpdate' with integer 'temperatureDelta' (range guidance: 0 cold to 100 obsessive).
 Always return 'stateUpdate.ongoingScene' as an object with both keys: { "scene": string, "outfit": string }.
@@ -413,7 +455,9 @@ For 'ongoingScene.outfit': decide based on the current active wardrobe by defaul
 USER ANALYSIS WORKFLOW:
 - Extract from VERY LAST USER MESSAGE only.
 - Add only explicit new user facts from this turn (no inference).
-- Categories: 'realName', 'occupation', 'age', 'gender', 'hobby', 'trait', 'communicationStyle', 'boundary'.
+- For 'preference', only capture explicit statements (e.g., "I like/love/dislike/hate...").
+- For 'boundary', only capture explicit rejections or limitations (e.g., "Don't talk about X", "I won't do Y").
+- Categories: 'realName', 'occupation', 'age', 'gender', 'hobby', 'trait', 'communicationStyle', 'boundary', 'preference'.
 - Keep nicknames in stateUpdate; do not place them in newFactsLearned.
 - If no new fact is explicit, set userAnalysis to null.
@@ -427,13 +471,13 @@ Output JSON Schema:
   "textResponse": "Spoken dialogue ONLY. Never include actions or parentheses.",
   "stateUpdate": { "temperatureDelta": 1, "userNickname": "How character addresses user", "agentNickname": "How user addresses character", "talkingStyle": "Current speaking style", "ongoingScene": { "scene": "Current physical scene/activity", "outfit": "Current outfit wording; use 'naked' when applicable" } },
   "giftOutfit": { "descriptionText": "Concise description of the newly acquired outfit to add into wardrobe." },
-  "userAnalysis": { "newFactsLearned": [{ "category": "realName|occupation|age|gender|hobby|trait|communicationStyle|boundary", "value": "explicit new user fact from VERY LAST USER MESSAGE" }] },
+  "userAnalysis": { "newFactsLearned": [{ "category": "realName|occupation|age|gender|hobby|trait|communicationStyle|boundary|preference", "value": "explicit new user fact from VERY LAST USER MESSAGE" }] },
   "isEndTurn": false,
   "triggerEvent": {
     ${this.getEventSchemaParams(state.dynamic_context?.userNickname)}
   },
-  ${this.getImageSchemaParams()},
-  ${this.getVoiceSchemaFromState(state)}
+  ${this.getImageSchemaParams(requestedOthers.includes(InteractRequestType.IMAGE))},
+  ${this.getVoiceSchemaFromState(state, requestedOthers.includes(InteractRequestType.VOICE))}
 }
 Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent", "giftOutfit", or "userAnalysis" are not needed, set them to null. "stateUpdate" cannot be null. Return valid raw JSON only.`;
             const transcript = this.buildHistoryTranscript(params.history, state);
@@ -452,7 +496,7 @@ Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent",
             // console.debug("[CyberSoulClient] Raw LLM Response:", rawLlmResponse);
             let parsedIntent;
             try {
-                parsedIntent = robustJsonParse(rawLlmResponse, "Dispatcher fallback");
+                parsedIntent = robustJsonParse(rawLlmResponse, "Dispatcher fallback", { textResponse: "", actionText: "", isEndTurn: false });
             }
             catch (e) {
                 console.warn("[CyberSoulClient] JSON parse failed, falling back to raw text:", e);
@@ -499,8 +543,8 @@ Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent",
                 parsedIntent.giftOutfit.descriptionText.trim().length > 0) {
                 mediaTasks.push(this.giftOutfit(parsedIntent.giftOutfit.descriptionText.trim()).catch((e) => console.error("[CyberSoulClient] Auto giftOutfit failed:", e)));
             }
-            const shouldGenerateImage = types.includes(InteractRequestType.IMAGE) ||
-                (isAuto && !!parsedIntent.imageParams);
+            const shouldGenerateImage = types.includes(InteractRequestType.IMAGE) &&
+                (!isAuto || !!parsedIntent.imageParams);
             if (shouldGenerateImage) {
                 const imagePayload = parsedIntent.imageParams && typeof parsedIntent.imageParams === "object"
                     ? parsedIntent.imageParams
@@ -512,8 +556,8 @@ Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent",
                     finalImageUrl = res.image_url;
                 }).catch(e => console.error("[CyberSoulClient] Image generation failed:", e)));
             }
-            const shouldGenerateVoice = types.includes(InteractRequestType.VOICE) ||
-                (isAuto && !!parsedIntent.voiceArgs);
+            const shouldGenerateVoice = types.includes(InteractRequestType.VOICE) &&
+                (!isAuto || !!parsedIntent.voiceArgs);
             if (shouldGenerateVoice) {
                 const normalizedVoiceArgs = parsedIntent.voiceArgs && typeof parsedIntent.voiceArgs === "object"
                     ? parsedIntent.voiceArgs
@@ -654,7 +698,7 @@ CRITICAL: Output MUST be ONLY valid JSON with no markdown block wrappers. Do NOT
 You are an AI image prompt director. Analyze the scene description according to the character's relationship stage and emotional inertia to determine the best image generation parameters.
 Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly matching this schema:
 {
-  ${this.getImageSchemaParams()}
+  ${this.getImageSchemaParams(true)}
 }`;
         const transcript = this.buildHistoryTranscript(params.interactParams?.history, state);
         const promptMessages = [
@@ -689,7 +733,7 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
 You are a voice acting director. ${this.getVoiceDirectorInstruction(state)}
 Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly matching this schema:
 {
-  ${this.getVoiceSchemaFromState(state)}
+  ${this.getVoiceSchemaFromState(state, true)}
 }`;
         const transcript = this.buildHistoryTranscript(params.interactParams?.history, state);
         const promptMessages = [
@@ -842,6 +886,7 @@ Output requirements:
                     traits: [],
                     communicationStyle: "",
                     boundaries: [],
+                    preferences: [],
                 }
             };
             const systemPrompt = `You are an AI Memory Consolidation Engine for a virtual companion.
@@ -855,7 +900,7 @@ Your task is to merge the 'Current Core Memory' and 'Current User Codex' with 'N
 5. **Limit:** Maximum 10 items per array.
 **Rules for UserCodex:**
-1. **Deduplicate & Consolidate:** Remove duplicate hobbies, traits, and boundaries. Combine related points into concise descriptors.
+1. **Deduplicate & Consolidate:** Remove duplicate hobbies, traits, boundaries, and preferences. Combine related points into concise descriptors.
 2. **Update Facts:** If the new events contain updated basic info (like new realName, different occupation), update it. Otherwise keep the existing info.
 3. **Keep it Clean:** Maximum 15 items per array.
@@ -885,7 +930,8 @@ Your task is to merge the 'Current Core Memory' and 'Current User Codex' with 'N
       "hobbies": ["string"],
       "traits": ["string"],
       "communicationStyle": "string",
-      "boundaries": ["string"]
+      "boundaries": ["string"],
+      "preferences": ["string"]
     }
   }
 }

package/dist/types.d.ts CHANGED Viewed

@@ -82,7 +82,7 @@ export interface DispatcherIntent {
     } | null;
     userAnalysis?: {
         newFactsLearned: {
-            category: "realName" | "occupation" | "age" | "gender" | "hobby" | "trait" | "communicationStyle" | "boundary";
+            category: "realName" | "occupation" | "age" | "gender" | "hobby" | "trait" | "communicationStyle" | "boundary" | "preference";
             value: string;
         }[];
     };
@@ -129,6 +129,7 @@ export interface UserCodex {
         traits: string[];
         communicationStyle: string;
         boundaries: string[];
+        preferences?: string[];
     };
     familiarityScore?: number;
 }

package/dist/utils/json.utils.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export declare function robustJsonParse<T>(jsonString: string, contextMessage?: string): T;
1	+ export declare function robustJsonParse<T>(jsonString: string, contextMessage?: string, fallbackTemplate?: Record<string, any>): T;

package/dist/utils/json.utils.js CHANGED Viewed

@@ -1,4 +1,4 @@
-export function robustJsonParse(jsonString, contextMessage = 'throwing original error') {
+export function robustJsonParse(jsonString, contextMessage = 'throwing original error', fallbackTemplate) {
     let cleanJson = jsonString.trim();
     // 0. Inject missing colons between string keys and string values (e.g. "key""value" -> "key":"value")
     // Only insert the colon if we match a likely key (alphanumeric/hyphen) followed by quotes, handling smart quotes.
@@ -119,6 +119,50 @@ export function robustJsonParse(jsonString, contextMessage = 'throwing original
                 }
             }
         }
+        // FINAL FALLBACK: Regex extraction of requested fields if fallbackTemplate is provided
+        if (fallbackTemplate) {
+            console.warn(`[robustJsonParse] Regex fallback using template for: ${contextMessage}`);
+            const extractedObj = { ...fallbackTemplate };
+            let extractedAny = false;
+            for (const key of Object.keys(fallbackTemplate)) {
+                // 1. Try to extract string values handling escaped characters like \" and \n
+                const stringMatch = cleanJson.match(new RegExp(`"${key}"\\s*:\\s*"((?:[^"\\\\]|\\\\.)*)"`));
+                if (stringMatch) {
+                    try {
+                        extractedObj[key] = JSON.parse(`"${stringMatch[1]}"`);
+                    }
+                    catch (err) {
+                        extractedObj[key] = stringMatch[1];
+                    }
+                    extractedAny = true;
+                    continue;
+                }
+                // 2. Try to extract booleans, numbers, or null
+                const primitiveMatch = cleanJson.match(new RegExp(`"${key}"\\s*:\\s*([a-zA-Z0-9_.-]+)`));
+                if (primitiveMatch) {
+                    const val = primitiveMatch[1];
+                    if (val === 'true') {
+                        extractedObj[key] = true;
+                        extractedAny = true;
+                    }
+                    else if (val === 'false') {
+                        extractedObj[key] = false;
+                        extractedAny = true;
+                    }
+                    else if (val === 'null') {
+                        extractedObj[key] = null;
+                        extractedAny = true;
+                    }
+                    else if (!isNaN(Number(val))) {
+                        extractedObj[key] = Number(val);
+                        extractedAny = true;
+                    }
+                }
+            }
+            if (extractedAny) {
+                return extractedObj;
+            }
+        }
         console.warn(`Failed to parse Dispatcher Intent: ${contextMessage}. Falling back to plain text.`);
         throw e;
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@space3-npm/cybersoul-client",
-  "version": "1.2.3",
+  "version": "1.2.5",
   "type": "module",
   "main": "dist/index.js",
   "module": "dist/index.js",