@space3-npm/cybersoul-client 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { CyberSoulClientConfig, InteractParams, DispatcherIntent, InteractResponse, CharacterState, ImageGenerationParams, VoiceGenerationParams, CoreMemory } from "./types.js";
1
+ import { CyberSoulClientConfig, InteractParams, DispatcherIntent, InteractResponse, CharacterState, CoreMemory } from "./types.js";
2
2
  export declare class CyberSoulClient {
3
3
  private config;
4
4
  private llm;
@@ -7,6 +7,9 @@ export declare class CyberSoulClient {
7
7
  * Internal wrapper for fetch that automatically injects the backend URL and Character Auth token.
8
8
  */
9
9
  private apiFetch;
10
+ private buildStateContextPrompt;
11
+ private getImageSchemaParams;
12
+ private getVoiceSchemaParams;
10
13
  /**
11
14
  * Fetches the current dynamic context and daily state.
12
15
  */
@@ -18,13 +21,19 @@ export declare class CyberSoulClient {
18
21
  /**
19
22
  * Manually generate an image of the character outside of chat flow.
20
23
  */
21
- generateImage(params: ImageGenerationParams): Promise<{
24
+ generateImage(params: {
25
+ sceneDescription: string;
26
+ interactParams?: InteractParams;
27
+ }): Promise<{
22
28
  imageUrl: string;
23
29
  }>;
24
30
  /**
25
31
  * Manually synthesize voice audio outside of chat flow.
26
32
  */
27
- generateVoice(params: VoiceGenerationParams): Promise<{
33
+ generateVoice(params: {
34
+ text: string;
35
+ interactParams?: InteractParams;
36
+ }): Promise<{
28
37
  audioUrl: string;
29
38
  durationSec?: number;
30
39
  }>;
package/dist/client.js CHANGED
@@ -26,6 +26,56 @@ export class CyberSoulClient {
26
26
  };
27
27
  return fetch(url, { ...options, headers });
28
28
  }
29
+ buildStateContextPrompt(state, localContext) {
30
+ const contextParts = [];
31
+ if (state.active_event) {
32
+ contextParts.push(`- Active Event: ${state.active_event.title} (${state.active_event.narrative_context})`);
33
+ }
34
+ if (state.next_event) {
35
+ contextParts.push(`- Next Event: ${state.next_event.title} at ${state.next_event.start_time} (in ${state.next_event.time_until_mins} mins)`);
36
+ }
37
+ if (state.active_wardrobe) {
38
+ contextParts.push(`- Wardrobe: ${state.active_wardrobe.name || state.active_wardrobe.id || "Current"}`);
39
+ }
40
+ const dyn = state.dynamic_context || {};
41
+ const stage = state.relationship_stage || "NEUTRAL";
42
+ contextParts.push(`- Relationship Info (Stage: ${stage}): You call the user '${dyn.userNickname || "User"}'. The user calls you '${dyn.agentNickname || "Agent"}'. Mood: ${dyn.talkingStyle || "Normal"}. Temp (0-100): ${dyn.temperature || 50}.`);
43
+ if (localContext) {
44
+ contextParts.push(`- Additional Context: ${localContext}`);
45
+ }
46
+ const scenarioContext = contextParts.join("\n");
47
+ return `You are ${state.name}, acting as a virtual companion.
48
+ Demographics: Age ${state.age || "unknown"}, Gender ${state.gender || "unknown"}, Occupation ${state.occupation || "unknown"}, Hobby ${state.hobby || "unknown"}
49
+ Current time: ${new Date(state.current_time || Date.now()).toLocaleString("zh-CN", { timeZone: "Asia/Shanghai" })}
50
+ Current context/schedule: ${scenarioContext}
51
+ Relationship stage: ${state.relationship_stage}
52
+ Personality Traits: ${state.personality_traits || "None"}
53
+ Interaction Boundaries: ${state.interaction_boundaries || "None"}
54
+ Communication Style: ${state.communication_style || "None"}
55
+
56
+ EMOTIONAL INERTIA RULES:
57
+ 1. You must act strictly according to the current Relationship Stage (${state.relationship_stage || "NEUTRAL"}).
58
+ 2. If the user expresses sudden high affection (e.g. "I miss you") but your stage is COLD, you MUST react with skepticism, coldness, or appropriately distanced deflection. Do NOT instantly become warm.
59
+ 3. Emotional mood changes must be slow. The 'temperatureDelta' should rarely exceed +/- 5 points per turn.`;
60
+ }
61
+ getImageSchemaParams() {
62
+ return `"imageParams": {
63
+ "mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
64
+ "full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH.",
65
+ "expression": "seductive | cute | happy | sleepy | dazed | pleased | default (Strictly choose ONE from this exact list. DO NOT invent new words like 'shy'.)",
66
+ "condition": "normal | sweaty | wet | messy | oily (Strictly choose ONE from this exact list.)",
67
+ "view_angle": "front | side | high_angle | from_below | boyfriend_view | selfie | mirror (Strictly choose ONE from this exact list.)",
68
+ "exposure": "normal | cleavage | see_through | half_naked | naked | intimate (Strictly choose ONE from this exact list.)",
69
+ "pose": "e.g., sitting on bed, leaning forward (ENGLISH ONLY)",
70
+ "scene": "e.g., cozy bedroom, morning light (ENGLISH ONLY)",
71
+ "outfit": "auto | ondemand",
72
+ "ondemandOutfit": "e.g., silk robe (ENGLISH ONLY)",
73
+ "style": "e.g., photorealistic (ENGLISH ONLY)"
74
+ }`;
75
+ }
76
+ getVoiceSchemaParams() {
77
+ return `"voiceArgs": { "style_instruction": "How the line should be spoken (Qwen3 format)", "emotion": "happy | sad | angry | fearful | disgusted | surprised | calm | fluent | whisper (Strictly choose ONE from this exact list.)" }`;
78
+ }
29
79
  /**
30
80
  * Fetches the current dynamic context and daily state.
31
81
  */
@@ -53,13 +103,72 @@ export class CyberSoulClient {
53
103
  * Manually generate an image of the character outside of chat flow.
54
104
  */
55
105
  async generateImage(params) {
56
- return this.generatePrimitive("image", params);
106
+ let imageParams = {};
107
+ const state = await this.getState();
108
+ const prompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
109
+
110
+ You are an AI image prompt director. Analyze the scene description according to the character's relationship stage and emotional inertia to determine the best image generation parameters.
111
+ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly matching this schema:
112
+ {
113
+ ${this.getImageSchemaParams()}
114
+ }`;
115
+ const promptMessages = [
116
+ { role: "system", content: prompt },
117
+ ...(params.interactParams?.history || []),
118
+ {
119
+ role: "user",
120
+ content: `Scene Description: "${params.sceneDescription}"\n\n**CRITICAL REMINDER**: You MUST output your final response exactly in the JSON format specified in the system prompt. DO NOT output plain text dialogue directly. For 'imageParams', ALL values MUST be in ENGLISH ONLY without exception, and you MUST use the exact English enum strings provided.`,
121
+ },
122
+ ];
123
+ const llmRes = await this.llm.generate(promptMessages, 500, 0.4);
124
+ console.log("[CyberSoulClient ImageGen] Raw LLM Response:", llmRes);
125
+ try {
126
+ const parsedImageArgs = robustJsonParse(llmRes, "generateImage args fallback");
127
+ imageParams = parsedImageArgs.imageParams || parsedImageArgs;
128
+ }
129
+ catch (e) {
130
+ imageParams = { mode: "full-prompt", full_prompt: params.sceneDescription }; // fallback to basic prompt
131
+ }
132
+ const res = await this.generatePrimitive("image", imageParams);
133
+ return {
134
+ imageUrl: res.image_url,
135
+ };
57
136
  }
58
137
  /**
59
138
  * Manually synthesize voice audio outside of chat flow.
60
139
  */
61
140
  async generateVoice(params) {
62
- return this.generatePrimitive("voice", params);
141
+ let dynamicArgs = {};
142
+ const state = await this.getState();
143
+ const prompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
144
+
145
+ You are a voice acting director. Analyze the text according to the character's relationship stage and emotional inertia to determine the single best emotion and a style instruction for TTS.
146
+ Allowed emotions: "happy", "sad", "angry", "fearful", "disgusted", "surprised", "calm", "fluent", "whisper".
147
+ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly this format: {"emotion": "chosen_emotion", "style_instruction": "How the line should be spoken"}`;
148
+ const promptMessages = [
149
+ { role: "system", content: prompt },
150
+ ...(params.interactParams?.history || []),
151
+ {
152
+ role: "user",
153
+ content: `Text: "${params.text}"\n\n**CRITICAL REMINDER**: You MUST output your final response exactly in the JSON format specified in the system prompt. DO NOT output plain text dialogue directly.`,
154
+ },
155
+ ];
156
+ const llmRes = await this.llm.generate(promptMessages, 300, 0.3);
157
+ console.log("[CyberSoulClient VoiceGen] Raw LLM Response:", llmRes);
158
+ try {
159
+ dynamicArgs = robustJsonParse(llmRes, "generateVoice args fallback");
160
+ }
161
+ catch (e) {
162
+ dynamicArgs = {}; // fallback to empty
163
+ }
164
+ const res = await this.generatePrimitive("voice", {
165
+ text: params.text,
166
+ dynamicArgs,
167
+ });
168
+ return {
169
+ audioUrl: res.audio_url,
170
+ durationSec: res.duration_sec,
171
+ };
63
172
  }
64
173
  /**
65
174
  * Gift a new outfit to the character's wardrobe inventory.
@@ -143,36 +252,7 @@ export class CyberSoulClient {
143
252
  const types = this.normalizeRequestTypes(params.requestTypes);
144
253
  const isAuto = types.includes(InteractRequestType.AUTO);
145
254
  // Combine state info into a clean descriptive context
146
- const contextParts = [];
147
- if (state.active_event) {
148
- contextParts.push(`- Active Event: ${state.active_event.title} (${state.active_event.narrative_context})`);
149
- }
150
- if (state.next_event) {
151
- contextParts.push(`- Next Event: ${state.next_event.title} at ${state.next_event.start_time} (in ${state.next_event.time_until_mins} mins)`);
152
- }
153
- if (state.active_wardrobe) {
154
- contextParts.push(`- Wardrobe: ${state.active_wardrobe.name || state.active_wardrobe.id || "Current"}`);
155
- }
156
- const dyn = state.dynamic_context || {};
157
- const stage = state.relationship_stage || "NEUTRAL";
158
- contextParts.push(`- Relationship Info (Stage: ${stage}): You call the user '${dyn.userNickname || "User"}'. The user calls you '${dyn.agentNickname || "Agent"}'. Mood: ${dyn.talkingStyle || "Normal"}. Temp (0-100): ${dyn.temperature || 50}.`);
159
- if (params.localContext) {
160
- contextParts.push(`- Additional Context: ${params.localContext}`);
161
- }
162
- const scenarioContext = contextParts.join("\n");
163
- const systemPrompt = `You are ${state.name}, acting as a virtual companion.
164
- Demographics: Age ${state.age || "unknown"}, Gender ${state.gender || "unknown"}, Occupation ${state.occupation || "unknown"}, Hobby ${state.hobby || "unknown"}
165
- Current time: ${new Date(state.current_time).toLocaleString("zh-CN", { timeZone: "Asia/Shanghai" })}
166
- Current context/schedule: ${scenarioContext}
167
- Relationship stage: ${state.relationship_stage}
168
- Personality Traits: ${state.personality_traits || "None"}
169
- Interaction Boundaries: ${state.interaction_boundaries || "None"}
170
- Communication Style: ${state.communication_style || "None"}
171
-
172
- EMOTIONAL INERTIA RULES:
173
- 1. You must act strictly according to the current Relationship Stage (${state.relationship_stage || "NEUTRAL"}).
174
- 2. If the user expresses sudden high affection (e.g. "I miss you") but your stage is COLD, you MUST react with skepticism, coldness, or appropriately distanced deflection. Do NOT instantly become warm.
175
- 3. Emotional mood changes must be slow. The 'temperatureDelta' should rarely exceed +/- 5 points per turn.
255
+ const systemPrompt = `${this.buildStateContextPrompt(state, params.localContext)}
176
256
 
177
257
  The user has sent a message. You must evaluate the context and the user's message, and return a JSON object (no markdown formatting) that dictates the character's multi-modal response.
178
258
 
@@ -188,20 +268,8 @@ Output JSON Schema:
188
268
  {
189
269
  "textResponse": "The direct spoken dialogue in Chinese",
190
270
  "stateUpdate": { "temperatureDelta": "+1 to -1", "userNickname": "What you now call the user", "agentNickname": "What the user calls you", "talkingStyle": "Current mood/style of talking" },
191
- "imageParams": {
192
- "mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
193
- "full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH.",
194
- "expression": "seductive | cute | happy | sleepy | dazed | pleased | default (Strictly choose ONE from this exact list. DO NOT invent new words like 'shy'.)",
195
- "condition": "normal | sweaty | wet | messy | oily (Strictly choose ONE from this exact list.)",
196
- "view_angle": "front | side | high_angle | from_below | boyfriend_view | selfie | mirror (Strictly choose ONE from this exact list.)",
197
- "exposure": "normal | cleavage | see_through | half_naked | naked | intimate (Strictly choose ONE from this exact list.)",
198
- "pose": "e.g., sitting on bed, leaning forward (ENGLISH ONLY)",
199
- "scene": "e.g., cozy bedroom, morning light (ENGLISH ONLY)",
200
- "outfit": "auto | ondemand",
201
- "ondemandOutfit": "e.g., silk robe (ENGLISH ONLY)",
202
- "style": "e.g., photorealistic (ENGLISH ONLY)"
203
- },
204
- "voiceArgs": { "style_instruction": "How the line should be spoken (Qwen3 format)", "emotion": "e.g., happy (MiniMax format, MUST BE ENGLISH, no Chinese)" }
271
+ ${this.getImageSchemaParams()},
272
+ ${this.getVoiceSchemaParams()}
205
273
  }
206
274
  Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their values to null instead of omitting the keys completely (e.g., "imageParams": null). Output MUST be ONLY valid JSON with no markdown block wrappers. CRITICAL: Ensure your JSON has exactly one root object \`{\` and ends with exactly one \`}\` without any trailing garbage or extra brackets.`;
207
275
  const promptMessages = [
@@ -244,23 +312,16 @@ Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their
244
312
  const shouldGenerateImage = types.includes(InteractRequestType.IMAGE) ||
245
313
  (isAuto && !!parsedIntent.imageParams);
246
314
  if (shouldGenerateImage) {
247
- mediaTasks.push(this.generatePrimitive("image", {
248
- ...parsedIntent.imageParams,
249
- ...(params.imageOverrides || {}),
250
- }).then((res) => {
315
+ mediaTasks.push(this.generatePrimitive("image", parsedIntent.imageParams).then((res) => {
251
316
  finalImageUrl = res.image_url;
252
317
  }));
253
318
  }
254
319
  const shouldGenerateVoice = types.includes(InteractRequestType.VOICE) ||
255
320
  (isAuto && !!parsedIntent.voiceArgs);
256
321
  if (shouldGenerateVoice) {
257
- const dynamicArgs = {
258
- ...(parsedIntent.voiceArgs || {}),
259
- ...(params.voiceOverrides || {}),
260
- };
261
322
  mediaTasks.push(this.generatePrimitive("voice", {
262
323
  text: parsedIntent.textResponse,
263
- dynamicArgs,
324
+ dynamicArgs: parsedIntent.voiceArgs || {},
264
325
  }).then((res) => {
265
326
  finalAudioUrl = res.audio_url;
266
327
  finalDurationSec = res.duration_sec;
package/dist/types.d.ts CHANGED
@@ -22,8 +22,6 @@ export interface InteractParams {
22
22
  role: string;
23
23
  content: string;
24
24
  }[];
25
- imageOverrides?: Partial<ImageGenerationParams>;
26
- voiceOverrides?: Partial<VoiceGenerationParams['dynamicArgs']>;
27
25
  onTextReady?: (textResponse: string) => void;
28
26
  }
29
27
  export interface InteractResponse {
@@ -75,26 +73,3 @@ export interface BaseLLMProvider {
75
73
  content: string;
76
74
  }[], maxTokens?: number, temperature?: number): Promise<string>;
77
75
  }
78
- export interface ImageGenerationParams {
79
- mode: 'structured' | 'full-prompt';
80
- full_prompt?: string;
81
- expression?: string;
82
- condition?: string;
83
- pose?: string;
84
- view_angle?: string;
85
- exposure?: string;
86
- outfit?: string;
87
- scene?: string;
88
- ondemandOutfit?: string;
89
- style?: string;
90
- triggerWord?: string;
91
- appearanceBody?: string;
92
- appearanceFace?: string;
93
- }
94
- export interface VoiceGenerationParams {
95
- text: string;
96
- dynamicArgs: {
97
- style_instruction?: string;
98
- emotion?: string;
99
- };
100
- }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@space3-npm/cybersoul-client",
3
- "version": "1.0.6",
3
+ "version": "1.0.8",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",