@space3-npm/cybersoul-client 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { CyberSoulClientConfig, InteractParams, DispatcherIntent, InteractResponse, CharacterState, ImageGenerationParams, VoiceGenerationParams, CoreMemory } from "./types.js";
1
+ import { CyberSoulClientConfig, InteractParams, DispatcherIntent, InteractResponse, CharacterState, CoreMemory } from "./types.js";
2
2
  export declare class CyberSoulClient {
3
3
  private config;
4
4
  private llm;
@@ -7,6 +7,9 @@ export declare class CyberSoulClient {
7
7
  * Internal wrapper for fetch that automatically injects the backend URL and Character Auth token.
8
8
  */
9
9
  private apiFetch;
10
+ private buildStateContextPrompt;
11
+ private getImageSchemaParams;
12
+ private getVoiceSchemaParams;
10
13
  /**
11
14
  * Fetches the current dynamic context and daily state.
12
15
  */
@@ -18,13 +21,19 @@ export declare class CyberSoulClient {
18
21
  /**
19
22
  * Manually generate an image of the character outside of chat flow.
20
23
  */
21
- generateImage(params: ImageGenerationParams): Promise<{
24
+ generateImage(params: {
25
+ sceneDescription: string;
26
+ interactParams?: InteractParams;
27
+ }): Promise<{
22
28
  imageUrl: string;
23
29
  }>;
24
30
  /**
25
31
  * Manually synthesize voice audio outside of chat flow.
26
32
  */
27
- generateVoice(params: VoiceGenerationParams): Promise<{
33
+ generateVoice(params: {
34
+ text: string;
35
+ interactParams?: InteractParams;
36
+ }): Promise<{
28
37
  audioUrl: string;
29
38
  durationSec?: number;
30
39
  }>;
package/dist/client.js CHANGED
@@ -26,6 +26,56 @@ export class CyberSoulClient {
26
26
  };
27
27
  return fetch(url, { ...options, headers });
28
28
  }
29
+ buildStateContextPrompt(state, localContext) {
30
+ const contextParts = [];
31
+ if (state.active_event) {
32
+ contextParts.push(`- Active Event: ${state.active_event.title} (${state.active_event.narrative_context})`);
33
+ }
34
+ if (state.next_event) {
35
+ contextParts.push(`- Next Event: ${state.next_event.title} at ${state.next_event.start_time} (in ${state.next_event.time_until_mins} mins)`);
36
+ }
37
+ if (state.active_wardrobe) {
38
+ contextParts.push(`- Wardrobe: ${state.active_wardrobe.name || state.active_wardrobe.id || "Current"}`);
39
+ }
40
+ const dyn = state.dynamic_context || {};
41
+ const stage = state.relationship_stage || "NEUTRAL";
42
+ contextParts.push(`- Relationship Info (Stage: ${stage}): You call the user '${dyn.userNickname || "User"}'. The user calls you '${dyn.agentNickname || "Agent"}'. Mood: ${dyn.talkingStyle || "Normal"}. Temp (0-100): ${dyn.temperature || 50}.`);
43
+ if (localContext) {
44
+ contextParts.push(`- Additional Context: ${localContext}`);
45
+ }
46
+ const scenarioContext = contextParts.join("\n");
47
+ return `You are ${state.name}, acting as a virtual companion.
48
+ Demographics: Age ${state.age || "unknown"}, Gender ${state.gender || "unknown"}, Occupation ${state.occupation || "unknown"}, Hobby ${state.hobby || "unknown"}
49
+ Current time: ${new Date(state.current_time || Date.now()).toLocaleString("zh-CN", { timeZone: "Asia/Shanghai" })}
50
+ Current context/schedule: ${scenarioContext}
51
+ Relationship stage: ${state.relationship_stage}
52
+ Personality Traits: ${state.personality_traits || "None"}
53
+ Interaction Boundaries: ${state.interaction_boundaries || "None"}
54
+ Communication Style: ${state.communication_style || "None"}
55
+
56
+ EMOTIONAL INERTIA RULES:
57
+ 1. You must act strictly according to the current Relationship Stage (${state.relationship_stage || "NEUTRAL"}).
58
+ 2. If the user expresses sudden high affection (e.g. "I miss you") but your stage is COLD, you MUST react with skepticism, coldness, or appropriately distanced deflection. Do NOT instantly become warm.
59
+ 3. Emotional mood changes must be slow. The 'temperatureDelta' should rarely exceed +/- 5 points per turn.`;
60
+ }
61
+ getImageSchemaParams() {
62
+ return `"imageParams": {
63
+ "mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
64
+ "full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH.",
65
+ "expression": "seductive | cute | happy | sleepy | dazed | pleased | default (Strictly choose ONE from this exact list. DO NOT invent new words like 'shy'.)",
66
+ "condition": "normal | sweaty | wet | messy | oily (Strictly choose ONE from this exact list.)",
67
+ "view_angle": "front | side | high_angle | from_below | boyfriend_view | selfie | mirror (Strictly choose ONE from this exact list.)",
68
+ "exposure": "normal | cleavage | see_through | half_naked | naked | intimate (Strictly choose ONE from this exact list.)",
69
+ "pose": "e.g., sitting on bed, leaning forward (ENGLISH ONLY)",
70
+ "scene": "e.g., cozy bedroom, morning light (ENGLISH ONLY)",
71
+ "outfit": "auto | ondemand",
72
+ "ondemandOutfit": "e.g., silk robe (ENGLISH ONLY)",
73
+ "style": "e.g., photorealistic (ENGLISH ONLY)"
74
+ }`;
75
+ }
76
+ getVoiceSchemaParams() {
77
+ return `"voiceArgs": { "style_instruction": "How the line should be spoken (Qwen3 format)", "emotion": "happy | sad | angry | fearful | disgusted | surprised | calm | fluent | whisper (Strictly choose ONE from this exact list.)" }`;
78
+ }
29
79
  /**
30
80
  * Fetches the current dynamic context and daily state.
31
81
  */
@@ -53,13 +103,63 @@ export class CyberSoulClient {
53
103
  * Manually generate an image of the character outside of chat flow.
54
104
  */
55
105
  async generateImage(params) {
56
- return this.generatePrimitive("image", params);
106
+ let imageParams = {};
107
+ const state = await this.getState();
108
+ const prompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
109
+
110
+ You are an AI image prompt director. Analyze the scene description according to the character's relationship stage and emotional inertia to determine the best image generation parameters.
111
+ Output strictly valid JSON exactly matching this schema:
112
+ {
113
+ ${this.getImageSchemaParams()}
114
+ }`;
115
+ const promptMessages = [
116
+ { role: "system", content: prompt },
117
+ ...(params.interactParams?.history || []),
118
+ {
119
+ role: "user",
120
+ content: `Scene Description: "${params.sceneDescription}"`,
121
+ },
122
+ ];
123
+ const llmRes = await this.llm.generate(promptMessages, 500, 0.4);
124
+ try {
125
+ const parsedImageArgs = robustJsonParse(llmRes, "generateImage args fallback");
126
+ imageParams = parsedImageArgs.imageParams || parsedImageArgs;
127
+ }
128
+ catch (e) {
129
+ imageParams = { mode: "full-prompt", full_prompt: params.sceneDescription }; // fallback to basic prompt
130
+ }
131
+ return this.generatePrimitive("image", imageParams);
57
132
  }
58
133
  /**
59
134
  * Manually synthesize voice audio outside of chat flow.
60
135
  */
61
136
  async generateVoice(params) {
62
- return this.generatePrimitive("voice", params);
137
+ let dynamicArgs = {};
138
+ const state = await this.getState();
139
+ const prompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
140
+
141
+ You are a voice acting director. Analyze the text according to the character's relationship stage and emotional inertia to determine the single best emotion and a style instruction for TTS.
142
+ Allowed emotions: "happy", "sad", "angry", "fearful", "disgusted", "surprised", "calm", "fluent", "whisper".
143
+ Output strictly valid JSON in exactly this format: {"emotion": "chosen_emotion", "style_instruction": "How the line should be spoken"}`;
144
+ const promptMessages = [
145
+ { role: "system", content: prompt },
146
+ ...(params.interactParams?.history || []),
147
+ {
148
+ role: "user",
149
+ content: `Text: "${params.text}"`,
150
+ },
151
+ ];
152
+ const llmRes = await this.llm.generate(promptMessages, 300, 0.3);
153
+ try {
154
+ dynamicArgs = robustJsonParse(llmRes, "generateVoice args fallback");
155
+ }
156
+ catch (e) {
157
+ dynamicArgs = {}; // fallback to empty
158
+ }
159
+ return this.generatePrimitive("voice", {
160
+ text: params.text,
161
+ dynamicArgs,
162
+ });
63
163
  }
64
164
  /**
65
165
  * Gift a new outfit to the character's wardrobe inventory.
@@ -143,36 +243,7 @@ export class CyberSoulClient {
143
243
  const types = this.normalizeRequestTypes(params.requestTypes);
144
244
  const isAuto = types.includes(InteractRequestType.AUTO);
145
245
  // Combine state info into a clean descriptive context
146
- const contextParts = [];
147
- if (state.active_event) {
148
- contextParts.push(`- Active Event: ${state.active_event.title} (${state.active_event.narrative_context})`);
149
- }
150
- if (state.next_event) {
151
- contextParts.push(`- Next Event: ${state.next_event.title} at ${state.next_event.start_time} (in ${state.next_event.time_until_mins} mins)`);
152
- }
153
- if (state.active_wardrobe) {
154
- contextParts.push(`- Wardrobe: ${state.active_wardrobe.name || state.active_wardrobe.id || "Current"}`);
155
- }
156
- const dyn = state.dynamic_context || {};
157
- const stage = state.relationship_stage || "NEUTRAL";
158
- contextParts.push(`- Relationship Info (Stage: ${stage}): You call the user '${dyn.userNickname || "User"}'. The user calls you '${dyn.agentNickname || "Agent"}'. Mood: ${dyn.talkingStyle || "Normal"}. Temp (0-100): ${dyn.temperature || 50}.`);
159
- if (params.localContext) {
160
- contextParts.push(`- Additional Context: ${params.localContext}`);
161
- }
162
- const scenarioContext = contextParts.join("\n");
163
- const systemPrompt = `You are ${state.name}, acting as a virtual companion.
164
- Demographics: Age ${state.age || "unknown"}, Gender ${state.gender || "unknown"}, Occupation ${state.occupation || "unknown"}, Hobby ${state.hobby || "unknown"}
165
- Current time: ${new Date(state.current_time).toLocaleString("zh-CN", { timeZone: "Asia/Shanghai" })}
166
- Current context/schedule: ${scenarioContext}
167
- Relationship stage: ${state.relationship_stage}
168
- Personality Traits: ${state.personality_traits || "None"}
169
- Interaction Boundaries: ${state.interaction_boundaries || "None"}
170
- Communication Style: ${state.communication_style || "None"}
171
-
172
- EMOTIONAL INERTIA RULES:
173
- 1. You must act strictly according to the current Relationship Stage (${state.relationship_stage || "NEUTRAL"}).
174
- 2. If the user expresses sudden high affection (e.g. "I miss you") but your stage is COLD, you MUST react with skepticism, coldness, or appropriately distanced deflection. Do NOT instantly become warm.
175
- 3. Emotional mood changes must be slow. The 'temperatureDelta' should rarely exceed +/- 5 points per turn.
246
+ const systemPrompt = `${this.buildStateContextPrompt(state, params.localContext)}
176
247
 
177
248
  The user has sent a message. You must evaluate the context and the user's message, and return a JSON object (no markdown formatting) that dictates the character's multi-modal response.
178
249
 
@@ -188,20 +259,8 @@ Output JSON Schema:
188
259
  {
189
260
  "textResponse": "The direct spoken dialogue in Chinese",
190
261
  "stateUpdate": { "temperatureDelta": "+1 to -1", "userNickname": "What you now call the user", "agentNickname": "What the user calls you", "talkingStyle": "Current mood/style of talking" },
191
- "imageParams": {
192
- "mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
193
- "full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH.",
194
- "expression": "seductive | cute | happy | sleepy | dazed | pleased | default (Strictly choose ONE from this exact list. DO NOT invent new words like 'shy'.)",
195
- "condition": "normal | sweaty | wet | messy | oily (Strictly choose ONE from this exact list.)",
196
- "view_angle": "front | side | high_angle | from_below | boyfriend_view | selfie | mirror (Strictly choose ONE from this exact list.)",
197
- "exposure": "normal | cleavage | see_through | half_naked | naked | intimate (Strictly choose ONE from this exact list.)",
198
- "pose": "e.g., sitting on bed, leaning forward (ENGLISH ONLY)",
199
- "scene": "e.g., cozy bedroom, morning light (ENGLISH ONLY)",
200
- "outfit": "auto | ondemand",
201
- "ondemandOutfit": "e.g., silk robe (ENGLISH ONLY)",
202
- "style": "e.g., photorealistic (ENGLISH ONLY)"
203
- },
204
- "voiceArgs": { "style_instruction": "How the line should be spoken (Qwen3 format)", "emotion": "e.g., happy (MiniMax format, MUST BE ENGLISH, no Chinese)" }
262
+ ${this.getImageSchemaParams()},
263
+ ${this.getVoiceSchemaParams()}
205
264
  }
206
265
  Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their values to null instead of omitting the keys completely (e.g., "imageParams": null). Output MUST be ONLY valid JSON with no markdown block wrappers. CRITICAL: Ensure your JSON has exactly one root object \`{\` and ends with exactly one \`}\` without any trailing garbage or extra brackets.`;
207
266
  const promptMessages = [
@@ -244,23 +303,16 @@ Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their
244
303
  const shouldGenerateImage = types.includes(InteractRequestType.IMAGE) ||
245
304
  (isAuto && !!parsedIntent.imageParams);
246
305
  if (shouldGenerateImage) {
247
- mediaTasks.push(this.generatePrimitive("image", {
248
- ...parsedIntent.imageParams,
249
- ...(params.imageOverrides || {}),
250
- }).then((res) => {
306
+ mediaTasks.push(this.generatePrimitive("image", parsedIntent.imageParams).then((res) => {
251
307
  finalImageUrl = res.image_url;
252
308
  }));
253
309
  }
254
310
  const shouldGenerateVoice = types.includes(InteractRequestType.VOICE) ||
255
311
  (isAuto && !!parsedIntent.voiceArgs);
256
312
  if (shouldGenerateVoice) {
257
- const dynamicArgs = {
258
- ...(parsedIntent.voiceArgs || {}),
259
- ...(params.voiceOverrides || {}),
260
- };
261
313
  mediaTasks.push(this.generatePrimitive("voice", {
262
314
  text: parsedIntent.textResponse,
263
- dynamicArgs,
315
+ dynamicArgs: parsedIntent.voiceArgs || {},
264
316
  }).then((res) => {
265
317
  finalAudioUrl = res.audio_url;
266
318
  finalDurationSec = res.duration_sec;
package/dist/types.d.ts CHANGED
@@ -22,8 +22,6 @@ export interface InteractParams {
22
22
  role: string;
23
23
  content: string;
24
24
  }[];
25
- imageOverrides?: Partial<ImageGenerationParams>;
26
- voiceOverrides?: Partial<VoiceGenerationParams['dynamicArgs']>;
27
25
  onTextReady?: (textResponse: string) => void;
28
26
  }
29
27
  export interface InteractResponse {
@@ -75,26 +73,3 @@ export interface BaseLLMProvider {
75
73
  content: string;
76
74
  }[], maxTokens?: number, temperature?: number): Promise<string>;
77
75
  }
78
- export interface ImageGenerationParams {
79
- mode: 'structured' | 'full-prompt';
80
- full_prompt?: string;
81
- expression?: string;
82
- condition?: string;
83
- pose?: string;
84
- view_angle?: string;
85
- exposure?: string;
86
- outfit?: string;
87
- scene?: string;
88
- ondemandOutfit?: string;
89
- style?: string;
90
- triggerWord?: string;
91
- appearanceBody?: string;
92
- appearanceFace?: string;
93
- }
94
- export interface VoiceGenerationParams {
95
- text: string;
96
- dynamicArgs: {
97
- style_instruction?: string;
98
- emotion?: string;
99
- };
100
- }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@space3-npm/cybersoul-client",
3
- "version": "1.0.6",
3
+ "version": "1.0.7",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",