@space3-npm/cybersoul-client 1.0.8 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.d.ts +17 -0
- package/dist/client.js +66 -24
- package/dist/types.d.ts +28 -1
- package/package.json +1 -1
package/dist/client.d.ts
CHANGED
|
@@ -10,6 +10,23 @@ export declare class CyberSoulClient {
|
|
|
10
10
|
private buildStateContextPrompt;
|
|
11
11
|
private getImageSchemaParams;
|
|
12
12
|
private getVoiceSchemaParams;
|
|
13
|
+
private buildVoiceSchemaFromDynamicParams;
|
|
14
|
+
/**
|
|
15
|
+
* Returns the JSON schema snippet for voiceArgs to embed in the LLM output schema.
|
|
16
|
+
* Built from dynamic_params when available, otherwise falls back to static defaults.
|
|
17
|
+
*/
|
|
18
|
+
private getVoiceSchemaFromState;
|
|
19
|
+
/**
|
|
20
|
+
* Returns the natural-language director instruction for generating voiceArgs.
|
|
21
|
+
* Uses dynamic_param_prompt_template from the voice model when configured.
|
|
22
|
+
*/
|
|
23
|
+
private getVoiceDirectorInstruction;
|
|
24
|
+
/**
|
|
25
|
+
* Extracts and types voiceArgs from a raw standalone LLM response.
|
|
26
|
+
* The voice-only prompt wraps the result as { voiceArgs: { ... } } — unwraps the inner object.
|
|
27
|
+
* If the payload is already the inner args object (no voiceArgs wrapper), uses it as-is.
|
|
28
|
+
*/
|
|
29
|
+
private extractVoiceArgsFromLlmResponse;
|
|
13
30
|
/**
|
|
14
31
|
* Fetches the current dynamic context and daily state.
|
|
15
32
|
*/
|
package/dist/client.js
CHANGED
|
@@ -74,7 +74,53 @@ EMOTIONAL INERTIA RULES:
|
|
|
74
74
|
}`;
|
|
75
75
|
}
|
|
76
76
|
getVoiceSchemaParams() {
|
|
77
|
-
|
|
77
|
+
// Only reached when no dynamic_params are configured on the voice model.
|
|
78
|
+
// Configure dynamic_params in DB to match the TTS provider; this fallback is provider-agnostic.
|
|
79
|
+
console.warn("[CyberSoulClient] voice_model.dynamic_params not configured — using generic fallback schema. Configure dynamic_params in DB for provider-specific behaviour.");
|
|
80
|
+
return `"voiceArgs": { "style_instruction": "How the line should be spoken (required)" }`;
|
|
81
|
+
}
|
|
82
|
+
buildVoiceSchemaFromDynamicParams(dynamicParams) {
|
|
83
|
+
const fields = dynamicParams
|
|
84
|
+
.map((p) => {
|
|
85
|
+
const hint = p.required ? `${p.description} (required)` : `${p.description} (optional)`;
|
|
86
|
+
return `"${p.name}": "${hint}"`;
|
|
87
|
+
})
|
|
88
|
+
.join(", ");
|
|
89
|
+
return `"voiceArgs": { ${fields} }`;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Returns the JSON schema snippet for voiceArgs to embed in the LLM output schema.
|
|
93
|
+
* Built from dynamic_params when available, otherwise falls back to static defaults.
|
|
94
|
+
*/
|
|
95
|
+
getVoiceSchemaFromState(state) {
|
|
96
|
+
const dynamicParams = state.voice_model?.dynamic_params;
|
|
97
|
+
if (dynamicParams && dynamicParams.length > 0) {
|
|
98
|
+
return this.buildVoiceSchemaFromDynamicParams(dynamicParams);
|
|
99
|
+
}
|
|
100
|
+
return this.getVoiceSchemaParams();
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Returns the natural-language director instruction for generating voiceArgs.
|
|
104
|
+
* Uses dynamic_param_prompt_template from the voice model when configured.
|
|
105
|
+
*/
|
|
106
|
+
getVoiceDirectorInstruction(state) {
|
|
107
|
+
const template = state.voice_model?.dynamic_param_prompt_template?.trim();
|
|
108
|
+
if (template) {
|
|
109
|
+
return template;
|
|
110
|
+
}
|
|
111
|
+
return "Analyze the text according to the character's relationship stage and emotional inertia to determine the best dynamic voice parameters for TTS.";
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Extracts and types voiceArgs from a raw standalone LLM response.
|
|
115
|
+
* The voice-only prompt wraps the result as { voiceArgs: { ... } } — unwraps the inner object.
|
|
116
|
+
* If the payload is already the inner args object (no voiceArgs wrapper), uses it as-is.
|
|
117
|
+
*/
|
|
118
|
+
extractVoiceArgsFromLlmResponse(payload) {
|
|
119
|
+
const inner = payload.voiceArgs;
|
|
120
|
+
if (inner && typeof inner === "object" && !Array.isArray(inner)) {
|
|
121
|
+
return inner;
|
|
122
|
+
}
|
|
123
|
+
return payload;
|
|
78
124
|
}
|
|
79
125
|
/**
|
|
80
126
|
* Fetches the current dynamic context and daily state.
|
|
@@ -86,25 +132,14 @@ EMOTIONAL INERTIA RULES:
|
|
|
86
132
|
* Updates the character's relationship temperature or mood.
|
|
87
133
|
*/
|
|
88
134
|
async updateDynamicContext(stateUpdate) {
|
|
89
|
-
|
|
90
|
-
return;
|
|
91
|
-
// Map TS schema intent (temperatureDelta) to match Backend payload schema (temperature)
|
|
92
|
-
const payload = { ...stateUpdate };
|
|
93
|
-
if (payload.temperatureDelta !== undefined) {
|
|
94
|
-
payload.temperature = payload.temperatureDelta;
|
|
95
|
-
delete payload.temperatureDelta;
|
|
96
|
-
}
|
|
97
|
-
await this.apiFetch("/api/v1/cyber-soul/characters/dynamic-context", {
|
|
98
|
-
method: "PATCH",
|
|
99
|
-
body: JSON.stringify(payload),
|
|
100
|
-
}).catch((e) => console.error("Failed to update dynamic context", e)); // non-blocking error handler
|
|
135
|
+
return this._updateDynamicContextInternal(stateUpdate);
|
|
101
136
|
}
|
|
102
137
|
/**
|
|
103
138
|
* Manually generate an image of the character outside of chat flow.
|
|
104
139
|
*/
|
|
105
140
|
async generateImage(params) {
|
|
106
141
|
let imageParams = {};
|
|
107
|
-
const state = await this.
|
|
142
|
+
const state = await this.fetchRemoteState();
|
|
108
143
|
const prompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
|
|
109
144
|
|
|
110
145
|
You are an AI image prompt director. Analyze the scene description according to the character's relationship stage and emotional inertia to determine the best image generation parameters.
|
|
@@ -120,7 +155,7 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
|
|
|
120
155
|
content: `Scene Description: "${params.sceneDescription}"\n\n**CRITICAL REMINDER**: You MUST output your final response exactly in the JSON format specified in the system prompt. DO NOT output plain text dialogue directly. For 'imageParams', ALL values MUST be in ENGLISH ONLY without exception, and you MUST use the exact English enum strings provided.`,
|
|
121
156
|
},
|
|
122
157
|
];
|
|
123
|
-
const llmRes = await this.llm.generate(promptMessages,
|
|
158
|
+
const llmRes = await this.llm.generate(promptMessages, 800, 0.4);
|
|
124
159
|
console.log("[CyberSoulClient ImageGen] Raw LLM Response:", llmRes);
|
|
125
160
|
try {
|
|
126
161
|
const parsedImageArgs = robustJsonParse(llmRes, "generateImage args fallback");
|
|
@@ -139,12 +174,14 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
|
|
|
139
174
|
*/
|
|
140
175
|
async generateVoice(params) {
|
|
141
176
|
let dynamicArgs = {};
|
|
142
|
-
const state = await this.
|
|
177
|
+
const state = await this.fetchRemoteState();
|
|
143
178
|
const prompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
|
|
144
179
|
|
|
145
|
-
You are a voice acting director.
|
|
146
|
-
|
|
147
|
-
|
|
180
|
+
You are a voice acting director. ${this.getVoiceDirectorInstruction(state)}
|
|
181
|
+
Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly matching this schema:
|
|
182
|
+
{
|
|
183
|
+
${this.getVoiceSchemaFromState(state)}
|
|
184
|
+
}`;
|
|
148
185
|
const promptMessages = [
|
|
149
186
|
{ role: "system", content: prompt },
|
|
150
187
|
...(params.interactParams?.history || []),
|
|
@@ -153,13 +190,14 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
|
|
|
153
190
|
content: `Text: "${params.text}"\n\n**CRITICAL REMINDER**: You MUST output your final response exactly in the JSON format specified in the system prompt. DO NOT output plain text dialogue directly.`,
|
|
154
191
|
},
|
|
155
192
|
];
|
|
156
|
-
const llmRes = await this.llm.generate(promptMessages,
|
|
193
|
+
const llmRes = await this.llm.generate(promptMessages, 800, 0.3);
|
|
157
194
|
console.log("[CyberSoulClient VoiceGen] Raw LLM Response:", llmRes);
|
|
158
195
|
try {
|
|
159
|
-
|
|
196
|
+
const parsedVoicePayload = robustJsonParse(llmRes, "generateVoice args fallback");
|
|
197
|
+
dynamicArgs = this.extractVoiceArgsFromLlmResponse(parsedVoicePayload);
|
|
160
198
|
}
|
|
161
199
|
catch (e) {
|
|
162
|
-
dynamicArgs = {};
|
|
200
|
+
dynamicArgs = {};
|
|
163
201
|
}
|
|
164
202
|
const res = await this.generatePrimitive("voice", {
|
|
165
203
|
text: params.text,
|
|
@@ -263,13 +301,14 @@ ${isAuto
|
|
|
263
301
|
- If the user wants to hear you, or if appropriate for a voice message, include 'voiceArgs'.`
|
|
264
302
|
: `Requested types to fulfill: ${types.join(", ")}`}
|
|
265
303
|
If the user's message shifts the emotional mood, establishes new nicknames, or warrants a relationship temperature change, you MUST include a 'stateUpdate' block. Temperature goes from 0 (cold/angry) to 100 (obsessively in love).
|
|
304
|
+
Voice direction for voiceArgs: ${this.getVoiceDirectorInstruction(state)}
|
|
266
305
|
|
|
267
306
|
Output JSON Schema:
|
|
268
307
|
{
|
|
269
308
|
"textResponse": "The direct spoken dialogue in Chinese",
|
|
270
309
|
"stateUpdate": { "temperatureDelta": "+1 to -1", "userNickname": "What you now call the user", "agentNickname": "What the user calls you", "talkingStyle": "Current mood/style of talking" },
|
|
271
310
|
${this.getImageSchemaParams()},
|
|
272
|
-
${this.
|
|
311
|
+
${this.getVoiceSchemaFromState(state)}
|
|
273
312
|
}
|
|
274
313
|
Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their values to null instead of omitting the keys completely (e.g., "imageParams": null). Output MUST be ONLY valid JSON with no markdown block wrappers. CRITICAL: Ensure your JSON has exactly one root object \`{\` and ends with exactly one \`}\` without any trailing garbage or extra brackets.`;
|
|
275
314
|
const promptMessages = [
|
|
@@ -319,9 +358,12 @@ Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their
|
|
|
319
358
|
const shouldGenerateVoice = types.includes(InteractRequestType.VOICE) ||
|
|
320
359
|
(isAuto && !!parsedIntent.voiceArgs);
|
|
321
360
|
if (shouldGenerateVoice) {
|
|
361
|
+
const normalizedVoiceArgs = parsedIntent.voiceArgs && typeof parsedIntent.voiceArgs === "object"
|
|
362
|
+
? parsedIntent.voiceArgs
|
|
363
|
+
: {};
|
|
322
364
|
mediaTasks.push(this.generatePrimitive("voice", {
|
|
323
365
|
text: parsedIntent.textResponse,
|
|
324
|
-
dynamicArgs:
|
|
366
|
+
dynamicArgs: normalizedVoiceArgs,
|
|
325
367
|
}).then((res) => {
|
|
326
368
|
finalAudioUrl = res.audio_url;
|
|
327
369
|
finalDurationSec = res.duration_sec;
|
package/dist/types.d.ts
CHANGED
|
@@ -35,7 +35,7 @@ export interface InteractResponse {
|
|
|
35
35
|
export interface DispatcherIntent {
|
|
36
36
|
textResponse?: string;
|
|
37
37
|
imageParams?: any;
|
|
38
|
-
voiceArgs?:
|
|
38
|
+
voiceArgs?: VoiceArgs | null;
|
|
39
39
|
stateUpdate?: {
|
|
40
40
|
temperatureDelta?: string | number;
|
|
41
41
|
userNickname?: string;
|
|
@@ -50,6 +50,32 @@ export interface CoreMemory {
|
|
|
50
50
|
keyEvents: string[];
|
|
51
51
|
appointments: string[];
|
|
52
52
|
}
|
|
53
|
+
/**
|
|
54
|
+
* Generic dynamic voice args returned by the LLM and forwarded to backend TTS.
|
|
55
|
+
*
|
|
56
|
+
* - T lets callers/project code narrow this to model-specific fields when needed.
|
|
57
|
+
* - Defaults to fully dynamic key/value pairs for provider-agnostic SDK behavior.
|
|
58
|
+
*/
|
|
59
|
+
export type VoiceArgs<T extends Record<string, unknown> = Record<string, unknown>> = T;
|
|
60
|
+
/**
|
|
61
|
+
* Optional compatibility shape for currently common fields.
|
|
62
|
+
* Not used as the SDK contract to avoid coupling to specific providers.
|
|
63
|
+
*/
|
|
64
|
+
export interface CommonVoiceArgs {
|
|
65
|
+
style_instruction?: string;
|
|
66
|
+
emotion?: string;
|
|
67
|
+
}
|
|
68
|
+
export interface VoiceModelState {
|
|
69
|
+
tts_provider?: string;
|
|
70
|
+
dynamic_param_prompt_template?: string;
|
|
71
|
+
dynamic_params?: Array<{
|
|
72
|
+
name: string;
|
|
73
|
+
description: string;
|
|
74
|
+
type: string;
|
|
75
|
+
required: boolean;
|
|
76
|
+
default?: unknown;
|
|
77
|
+
}>;
|
|
78
|
+
}
|
|
53
79
|
export interface CharacterState {
|
|
54
80
|
current_time: string;
|
|
55
81
|
active_event?: any;
|
|
@@ -57,6 +83,7 @@ export interface CharacterState {
|
|
|
57
83
|
active_wardrobe?: any;
|
|
58
84
|
core_memory?: CoreMemory;
|
|
59
85
|
dynamic_context?: any;
|
|
86
|
+
voice_model?: VoiceModelState | null;
|
|
60
87
|
relationship_stage?: string;
|
|
61
88
|
name?: string;
|
|
62
89
|
age?: number;
|