@space3-npm/cybersoul-client 1.0.9 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -2
- package/dist/client.d.ts +25 -1
- package/dist/client.js +222 -33
- package/dist/providers/minimax.provider.js +1 -1
- package/dist/types.d.ts +57 -1
- package/package.json +4 -1
package/README.md
CHANGED
|
@@ -19,6 +19,8 @@ Cyber-Soul Service transforms static text-based virtual companions into fully re
|
|
|
19
19
|
|
|
20
20
|
## Installation
|
|
21
21
|
|
|
22
|
+
**Prerequisites:** This SDK uses the native `fetch` API and requires **Node.js 18 or higher** (or a modern browser environment).
|
|
23
|
+
|
|
22
24
|
You can install the SDK locally or via npm:
|
|
23
25
|
|
|
24
26
|
```bash
|
|
@@ -34,7 +36,7 @@ import { CyberSoulClient } from '@space3-npm/cybersoul-client';
|
|
|
34
36
|
|
|
35
37
|
const client = new CyberSoulClient({
|
|
36
38
|
characterKey: 'YOUR_CHARACTER_KEY_HASH', // Ties requests to your specific Cyber-Soul persona
|
|
37
|
-
backendUrl: '
|
|
39
|
+
backendUrl: 'https://space3.cloud', // The Cyber-Soul core service URL (e.g., http://localhost:3002 for local dev)
|
|
38
40
|
llmConfig: {
|
|
39
41
|
provider: 'minimax',
|
|
40
42
|
apiKey: 'YOUR_MINIMAX_API_KEY',
|
|
@@ -104,4 +106,6 @@ The SDK perfectly mirrors the underlying Cyber-Soul backend capabilities via typ
|
|
|
104
106
|
- `giftOutfit(descriptionText)`: Provisions a new explicit outfit descriptor to the character's backend inventory.
|
|
105
107
|
- `bootstrapCharacter(workspaceFiles)`: Automates character profile and prompt setup directly from local markdown configuration files.
|
|
106
108
|
- `generateDailyScript()`: Cron-job helper instructing the AI scheduling system to compute the next block of dynamic events and plans.
|
|
107
|
-
- `interact(params)`: The primary orchestrated multi-modal dialogue endpoint processing standard human <-> agent chat requests.
|
|
109
|
+
- `interact(params)`: The primary orchestrated multi-modal dialogue endpoint processing standard human <-> agent chat requests.
|
|
110
|
+
- `ondemandEvent(params)`: Evaluates and triggers an on-demand event, using the LLM to intelligently decide if the character accepts the event and whether an outfit change is appropriate.
|
|
111
|
+
- `consolidateCoreMemory(input)`: Uses edge LLM logic to merge recent events with the character's core memory and synchronizes the updated memory to the remote database.
|
package/dist/client.d.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
import { CyberSoulClientConfig, InteractParams, DispatcherIntent, InteractResponse, CharacterState, CoreMemory } from "./types.js";
|
|
1
|
+
import { CyberSoulClientConfig, InteractParams, OndemandEventParams, OndemandEventResponse, DispatcherIntent, InteractResponse, CharacterState, CoreMemory } from "./types.js";
|
|
2
2
|
export declare class CyberSoulClient {
|
|
3
3
|
private config;
|
|
4
4
|
private llm;
|
|
5
|
+
private cachedWardrobeStr;
|
|
6
|
+
private cachedWardrobeTime;
|
|
5
7
|
constructor(config: CyberSoulClientConfig);
|
|
6
8
|
/**
|
|
7
9
|
* Internal wrapper for fetch that automatically injects the backend URL and Character Auth token.
|
|
@@ -10,6 +12,27 @@ export declare class CyberSoulClient {
|
|
|
10
12
|
private buildStateContextPrompt;
|
|
11
13
|
private getImageSchemaParams;
|
|
12
14
|
private getVoiceSchemaParams;
|
|
15
|
+
private buildVoiceSchemaFromDynamicParams;
|
|
16
|
+
/**
|
|
17
|
+
* Returns the JSON schema snippet for voiceArgs to embed in the LLM output schema.
|
|
18
|
+
* Built from dynamic_params when available, otherwise falls back to static defaults.
|
|
19
|
+
*/
|
|
20
|
+
private getVoiceSchemaFromState;
|
|
21
|
+
/**
|
|
22
|
+
* Returns the natural-language director instruction for generating voiceArgs.
|
|
23
|
+
* Uses dynamic_param_prompt_template from the voice model when configured.
|
|
24
|
+
*/
|
|
25
|
+
private getVoiceDirectorInstruction;
|
|
26
|
+
/**
|
|
27
|
+
* Extracts and types voiceArgs from a raw standalone LLM response.
|
|
28
|
+
* The voice-only prompt wraps the result as { voiceArgs: { ... } } — unwraps the inner object.
|
|
29
|
+
* If the payload is already the inner args object (no voiceArgs wrapper), uses it as-is.
|
|
30
|
+
*/
|
|
31
|
+
private extractVoiceArgsFromLlmResponse;
|
|
32
|
+
/**
|
|
33
|
+
* Evaluates and triggers an on-demand event, intelligently deciding if an outfit change is needed.
|
|
34
|
+
*/
|
|
35
|
+
ondemandEvent(params: OndemandEventParams): Promise<OndemandEventResponse>;
|
|
13
36
|
/**
|
|
14
37
|
* Fetches the current dynamic context and daily state.
|
|
15
38
|
*/
|
|
@@ -51,6 +74,7 @@ export declare class CyberSoulClient {
|
|
|
51
74
|
*/
|
|
52
75
|
generateDailyScript(): Promise<void>;
|
|
53
76
|
private fetchRemoteState;
|
|
77
|
+
private getWardrobePromptStr;
|
|
54
78
|
private _updateDynamicContextInternal;
|
|
55
79
|
private generatePrimitive;
|
|
56
80
|
private normalizeRequestTypes;
|
package/dist/client.js
CHANGED
|
@@ -4,6 +4,8 @@ import { MinimaxProvider } from "./providers/minimax.provider.js";
|
|
|
4
4
|
export class CyberSoulClient {
|
|
5
5
|
config;
|
|
6
6
|
llm;
|
|
7
|
+
cachedWardrobeStr = null;
|
|
8
|
+
cachedWardrobeTime = 0;
|
|
7
9
|
constructor(config) {
|
|
8
10
|
this.config = config;
|
|
9
11
|
// Setup Provider
|
|
@@ -61,7 +63,7 @@ EMOTIONAL INERTIA RULES:
|
|
|
61
63
|
getImageSchemaParams() {
|
|
62
64
|
return `"imageParams": {
|
|
63
65
|
"mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
|
|
64
|
-
"full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH.",
|
|
66
|
+
"full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH. MUST align with the character's current Active Wardrobe unless the context/exposure explicitly demands otherwise (e.g., naked for intimate scenes).",
|
|
65
67
|
"expression": "seductive | cute | happy | sleepy | dazed | pleased | default (Strictly choose ONE from this exact list. DO NOT invent new words like 'shy'.)",
|
|
66
68
|
"condition": "normal | sweaty | wet | messy | oily (Strictly choose ONE from this exact list.)",
|
|
67
69
|
"view_angle": "front | side | high_angle | from_below | boyfriend_view | selfie | mirror (Strictly choose ONE from this exact list.)",
|
|
@@ -74,7 +76,142 @@ EMOTIONAL INERTIA RULES:
|
|
|
74
76
|
}`;
|
|
75
77
|
}
|
|
76
78
|
getVoiceSchemaParams() {
|
|
77
|
-
|
|
79
|
+
// Only reached when no dynamic_params are configured on the voice model.
|
|
80
|
+
// Configure dynamic_params in DB to match the TTS provider; this fallback is provider-agnostic.
|
|
81
|
+
console.warn("[CyberSoulClient] voice_model.dynamic_params not configured — using generic fallback schema. Configure dynamic_params in DB for provider-specific behaviour.");
|
|
82
|
+
return `"voiceArgs": { "style_instruction": "How the line should be spoken (required)" }`;
|
|
83
|
+
}
|
|
84
|
+
buildVoiceSchemaFromDynamicParams(dynamicParams) {
|
|
85
|
+
const fields = dynamicParams
|
|
86
|
+
.map((p) => {
|
|
87
|
+
const hint = p.required ? `${p.description} (required)` : `${p.description} (optional)`;
|
|
88
|
+
return `"${p.name}": "${hint}"`;
|
|
89
|
+
})
|
|
90
|
+
.join(", ");
|
|
91
|
+
return `"voiceArgs": { ${fields} }`;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Returns the JSON schema snippet for voiceArgs to embed in the LLM output schema.
|
|
95
|
+
* Built from dynamic_params when available, otherwise falls back to static defaults.
|
|
96
|
+
*/
|
|
97
|
+
getVoiceSchemaFromState(state) {
|
|
98
|
+
const dynamicParams = state.voice_model?.dynamic_params;
|
|
99
|
+
if (dynamicParams && dynamicParams.length > 0) {
|
|
100
|
+
return this.buildVoiceSchemaFromDynamicParams(dynamicParams);
|
|
101
|
+
}
|
|
102
|
+
return this.getVoiceSchemaParams();
|
|
103
|
+
}
|
|
104
|
+
/**
|
|
105
|
+
* Returns the natural-language director instruction for generating voiceArgs.
|
|
106
|
+
* Uses dynamic_param_prompt_template from the voice model when configured.
|
|
107
|
+
*/
|
|
108
|
+
getVoiceDirectorInstruction(state) {
|
|
109
|
+
const template = state.voice_model?.dynamic_param_prompt_template?.trim();
|
|
110
|
+
if (template) {
|
|
111
|
+
return template;
|
|
112
|
+
}
|
|
113
|
+
return "Analyze the text according to the character's relationship stage and emotional inertia to determine the best dynamic voice parameters for TTS.";
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Extracts and types voiceArgs from a raw standalone LLM response.
|
|
117
|
+
* The voice-only prompt wraps the result as { voiceArgs: { ... } } — unwraps the inner object.
|
|
118
|
+
* If the payload is already the inner args object (no voiceArgs wrapper), uses it as-is.
|
|
119
|
+
*/
|
|
120
|
+
extractVoiceArgsFromLlmResponse(payload) {
|
|
121
|
+
const inner = payload.voiceArgs;
|
|
122
|
+
if (inner && typeof inner === "object" && !Array.isArray(inner)) {
|
|
123
|
+
return inner;
|
|
124
|
+
}
|
|
125
|
+
return payload;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Evaluates and triggers an on-demand event, intelligently deciding if an outfit change is needed.
|
|
129
|
+
*/
|
|
130
|
+
async ondemandEvent(params) {
|
|
131
|
+
try {
|
|
132
|
+
// 1. Fetch current state and wardrobe items
|
|
133
|
+
const [state, availableOutfits] = await Promise.all([
|
|
134
|
+
this.fetchRemoteState(),
|
|
135
|
+
this.getWardrobePromptStr()
|
|
136
|
+
]);
|
|
137
|
+
// 2. Build local Prompt
|
|
138
|
+
const systemPrompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
|
|
139
|
+
|
|
140
|
+
The user proposes a new event for you to participate in: "${params.eventDescription}".
|
|
141
|
+
Evaluate this based on your current state and relationship stage.
|
|
142
|
+
Decide if you will accept the event, and whether it requires changing your outfit.
|
|
143
|
+
|
|
144
|
+
Available Wardrobe Outfits:
|
|
145
|
+
${availableOutfits || "None available"}
|
|
146
|
+
|
|
147
|
+
You MUST output ONLY a valid JSON object matching this exact structure:
|
|
148
|
+
{
|
|
149
|
+
"acceptEvent": true,
|
|
150
|
+
"reason": "string (Why you accepted or declined, speaking in character)",
|
|
151
|
+
"requiresOutfitChange": false,
|
|
152
|
+
"selectedOutfitId": null
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
Example Valid Answer:
|
|
156
|
+
{
|
|
157
|
+
"acceptEvent": true,
|
|
158
|
+
"reason": "Sure, I'd love to go to the cafe. It sounds relaxing.",
|
|
159
|
+
"requiresOutfitChange": false,
|
|
160
|
+
"selectedOutfitId": null
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
CRITICAL: Output MUST be ONLY valid JSON with no markdown block wrappers. Do NOT wrap the JSON in \`\`\`json or add conversational text.`;
|
|
164
|
+
const promptMessages = [
|
|
165
|
+
{ role: "system", content: systemPrompt },
|
|
166
|
+
...(params.interactParams?.history || []).map((msg) => ({
|
|
167
|
+
role: msg.role,
|
|
168
|
+
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
|
|
169
|
+
})),
|
|
170
|
+
{
|
|
171
|
+
role: "user",
|
|
172
|
+
content: `${params.interactParams?.userMessage || `Event Proposal: ${params.eventDescription}`}\n\n**CRITICAL REMINDER**: You MUST output your final response exactly in the JSON format specified in the system prompt. DO NOT output plain text directly.`,
|
|
173
|
+
},
|
|
174
|
+
];
|
|
175
|
+
// 3. Evaluate with LLM
|
|
176
|
+
const rawLlmResponse = await this.llm.generate(promptMessages, 800, 0.5);
|
|
177
|
+
// console.debug("[CyberSoulClient ondemandEvent] Raw LLM Response:", rawLlmResponse);
|
|
178
|
+
let decisionData = {};
|
|
179
|
+
try {
|
|
180
|
+
decisionData = robustJsonParse(rawLlmResponse, "OndemandEvent fallback");
|
|
181
|
+
}
|
|
182
|
+
catch (e) {
|
|
183
|
+
throw new Error(`Failed to parse LLM decision for ondemandEvent. Raw response: ${rawLlmResponse}`);
|
|
184
|
+
}
|
|
185
|
+
// 4. API call if accepted
|
|
186
|
+
if (decisionData.acceptEvent === true) {
|
|
187
|
+
const payload = {
|
|
188
|
+
eventDescription: params.eventDescription,
|
|
189
|
+
durationMins: params.durationMins || 60,
|
|
190
|
+
outfitId: decisionData.requiresOutfitChange ? decisionData.selectedOutfitId : undefined,
|
|
191
|
+
};
|
|
192
|
+
const backendRes = await this.apiFetch("/api/v1/cyber-soul/characters/ondemand-event", {
|
|
193
|
+
method: "POST",
|
|
194
|
+
body: JSON.stringify(payload),
|
|
195
|
+
});
|
|
196
|
+
if (!backendRes.ok) {
|
|
197
|
+
throw new Error("Backend failed to schedule the on-demand event");
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
return {
|
|
201
|
+
status: "success",
|
|
202
|
+
acceptEvent: decisionData.acceptEvent,
|
|
203
|
+
reason: decisionData.reason,
|
|
204
|
+
requiresOutfitChange: decisionData.requiresOutfitChange,
|
|
205
|
+
selectedOutfitId: decisionData.selectedOutfitId,
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
catch (error) {
|
|
209
|
+
console.error("[CyberSoulClient] ondemandEvent Error: ", error);
|
|
210
|
+
return {
|
|
211
|
+
status: "error",
|
|
212
|
+
error: error.message,
|
|
213
|
+
};
|
|
214
|
+
}
|
|
78
215
|
}
|
|
79
216
|
/**
|
|
80
217
|
* Fetches the current dynamic context and daily state.
|
|
@@ -86,25 +223,14 @@ EMOTIONAL INERTIA RULES:
|
|
|
86
223
|
* Updates the character's relationship temperature or mood.
|
|
87
224
|
*/
|
|
88
225
|
async updateDynamicContext(stateUpdate) {
|
|
89
|
-
|
|
90
|
-
return;
|
|
91
|
-
// Map TS schema intent (temperatureDelta) to match Backend payload schema (temperature)
|
|
92
|
-
const payload = { ...stateUpdate };
|
|
93
|
-
if (payload.temperatureDelta !== undefined) {
|
|
94
|
-
payload.temperature = payload.temperatureDelta;
|
|
95
|
-
delete payload.temperatureDelta;
|
|
96
|
-
}
|
|
97
|
-
await this.apiFetch("/api/v1/cyber-soul/characters/dynamic-context", {
|
|
98
|
-
method: "PATCH",
|
|
99
|
-
body: JSON.stringify(payload),
|
|
100
|
-
}).catch((e) => console.error("Failed to update dynamic context", e)); // non-blocking error handler
|
|
226
|
+
return this._updateDynamicContextInternal(stateUpdate);
|
|
101
227
|
}
|
|
102
228
|
/**
|
|
103
229
|
* Manually generate an image of the character outside of chat flow.
|
|
104
230
|
*/
|
|
105
231
|
async generateImage(params) {
|
|
106
232
|
let imageParams = {};
|
|
107
|
-
const state = await this.
|
|
233
|
+
const state = await this.fetchRemoteState();
|
|
108
234
|
const prompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
|
|
109
235
|
|
|
110
236
|
You are an AI image prompt director. Analyze the scene description according to the character's relationship stage and emotional inertia to determine the best image generation parameters.
|
|
@@ -121,7 +247,7 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
|
|
|
121
247
|
},
|
|
122
248
|
];
|
|
123
249
|
const llmRes = await this.llm.generate(promptMessages, 800, 0.4);
|
|
124
|
-
console.
|
|
250
|
+
// console.debug("[CyberSoulClient ImageGen] Raw LLM Response:", llmRes);
|
|
125
251
|
try {
|
|
126
252
|
const parsedImageArgs = robustJsonParse(llmRes, "generateImage args fallback");
|
|
127
253
|
imageParams = parsedImageArgs.imageParams || parsedImageArgs;
|
|
@@ -139,12 +265,14 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
|
|
|
139
265
|
*/
|
|
140
266
|
async generateVoice(params) {
|
|
141
267
|
let dynamicArgs = {};
|
|
142
|
-
const state = await this.
|
|
268
|
+
const state = await this.fetchRemoteState();
|
|
143
269
|
const prompt = `${this.buildStateContextPrompt(state, params.interactParams?.localContext)}
|
|
144
270
|
|
|
145
|
-
You are a voice acting director.
|
|
146
|
-
|
|
147
|
-
|
|
271
|
+
You are a voice acting director. ${this.getVoiceDirectorInstruction(state)}
|
|
272
|
+
Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly matching this schema:
|
|
273
|
+
{
|
|
274
|
+
${this.getVoiceSchemaFromState(state)}
|
|
275
|
+
}`;
|
|
148
276
|
const promptMessages = [
|
|
149
277
|
{ role: "system", content: prompt },
|
|
150
278
|
...(params.interactParams?.history || []),
|
|
@@ -154,12 +282,13 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
|
|
|
154
282
|
},
|
|
155
283
|
];
|
|
156
284
|
const llmRes = await this.llm.generate(promptMessages, 800, 0.3);
|
|
157
|
-
console.
|
|
285
|
+
// console.debug("[CyberSoulClient VoiceGen] Raw LLM Response:", llmRes);
|
|
158
286
|
try {
|
|
159
|
-
|
|
287
|
+
const parsedVoicePayload = robustJsonParse(llmRes, "generateVoice args fallback");
|
|
288
|
+
dynamicArgs = this.extractVoiceArgsFromLlmResponse(parsedVoicePayload);
|
|
160
289
|
}
|
|
161
290
|
catch (e) {
|
|
162
|
-
dynamicArgs = {};
|
|
291
|
+
dynamicArgs = {};
|
|
163
292
|
}
|
|
164
293
|
const res = await this.generatePrimitive("voice", {
|
|
165
294
|
text: params.text,
|
|
@@ -210,6 +339,33 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
|
|
|
210
339
|
const json = await res.json();
|
|
211
340
|
return json.data;
|
|
212
341
|
}
|
|
342
|
+
async getWardrobePromptStr() {
|
|
343
|
+
const now = Date.now();
|
|
344
|
+
if (this.cachedWardrobeStr && (now - this.cachedWardrobeTime <= 5 * 60 * 1000)) {
|
|
345
|
+
return this.cachedWardrobeStr;
|
|
346
|
+
}
|
|
347
|
+
let availableOutfits = "None available";
|
|
348
|
+
try {
|
|
349
|
+
const wardrobeRes = await this.apiFetch("/api/v1/cyber-soul/wardrobe");
|
|
350
|
+
if (wardrobeRes.ok) {
|
|
351
|
+
let wardrobesPayload = {};
|
|
352
|
+
try {
|
|
353
|
+
wardrobesPayload = await wardrobeRes.json();
|
|
354
|
+
}
|
|
355
|
+
catch (e) { }
|
|
356
|
+
const wardrobes = wardrobesPayload.data || [];
|
|
357
|
+
if (wardrobes.length > 0) {
|
|
358
|
+
availableOutfits = wardrobes
|
|
359
|
+
.map((w) => `- ID: ${w.id} | Name: ${w.itemName} | Category: ${w.category}`)
|
|
360
|
+
.join("\n");
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
catch (e) { }
|
|
365
|
+
this.cachedWardrobeStr = availableOutfits;
|
|
366
|
+
this.cachedWardrobeTime = now;
|
|
367
|
+
return availableOutfits;
|
|
368
|
+
}
|
|
213
369
|
async _updateDynamicContextInternal(stateUpdate) {
|
|
214
370
|
if (!stateUpdate)
|
|
215
371
|
return;
|
|
@@ -229,8 +385,17 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
|
|
|
229
385
|
method: "POST",
|
|
230
386
|
body: JSON.stringify(payload),
|
|
231
387
|
});
|
|
232
|
-
if (!res.ok)
|
|
233
|
-
|
|
388
|
+
if (!res.ok) {
|
|
389
|
+
let errData;
|
|
390
|
+
try {
|
|
391
|
+
errData = await res.json();
|
|
392
|
+
}
|
|
393
|
+
catch (e) { }
|
|
394
|
+
const msg = errData?.message || errData?.error || `Status ${res.status}`;
|
|
395
|
+
const err = new Error(`Failed to generate ${type}: ${msg}`);
|
|
396
|
+
err.code = errData?.code || "UNKNOWN_ERROR";
|
|
397
|
+
throw err;
|
|
398
|
+
}
|
|
234
399
|
return res.json();
|
|
235
400
|
}
|
|
236
401
|
normalizeRequestTypes(requestTypes) {
|
|
@@ -246,13 +411,19 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
|
|
|
246
411
|
}
|
|
247
412
|
async interact(params) {
|
|
248
413
|
try {
|
|
249
|
-
// 1. Sync remote context
|
|
250
|
-
|
|
414
|
+
// 1. Sync remote context and wardrobe (for event triggering)
|
|
415
|
+
// We cache the wardrobe payload for 5 minutes to avoid huge payloads on every chat turn
|
|
416
|
+
const [state, availableOutfits] = await Promise.all([
|
|
417
|
+
this.fetchRemoteState(),
|
|
418
|
+
this.getWardrobePromptStr()
|
|
419
|
+
]);
|
|
251
420
|
// 2. Build local Prompt
|
|
252
421
|
const types = this.normalizeRequestTypes(params.requestTypes);
|
|
253
422
|
const isAuto = types.includes(InteractRequestType.AUTO);
|
|
254
423
|
// Combine state info into a clean descriptive context
|
|
255
424
|
const systemPrompt = `${this.buildStateContextPrompt(state, params.localContext)}
|
|
425
|
+
Available Wardrobe Outfits (For event triggers):
|
|
426
|
+
${availableOutfits}
|
|
256
427
|
|
|
257
428
|
The user has sent a message. You must evaluate the context and the user's message, and return a JSON object (no markdown formatting) that dictates the character's multi-modal response.
|
|
258
429
|
|
|
@@ -260,18 +431,21 @@ ${isAuto
|
|
|
260
431
|
? `Analyze the user's message to determine the appropriate response modalities (text, image, voice).
|
|
261
432
|
- Always include 'textResponse'.
|
|
262
433
|
- If the user explicitly asks to see a photo, look at you, or describing an action that warrants a photo, include 'imageParams'.
|
|
263
|
-
- If the user wants to hear you, or if appropriate for a voice message, include 'voiceArgs'
|
|
434
|
+
- If the user wants to hear you, or if appropriate for a voice message, include 'voiceArgs'.
|
|
435
|
+
- If the user proposes a new activity or hangout (e.g., "let's go to the cafe", "do you want to watch a movie?"), include 'triggerEvent' to schedule it.`
|
|
264
436
|
: `Requested types to fulfill: ${types.join(", ")}`}
|
|
265
437
|
If the user's message shifts the emotional mood, establishes new nicknames, or warrants a relationship temperature change, you MUST include a 'stateUpdate' block. Temperature goes from 0 (cold/angry) to 100 (obsessively in love).
|
|
438
|
+
Voice direction for voiceArgs: ${this.getVoiceDirectorInstruction(state)}
|
|
266
439
|
|
|
267
440
|
Output JSON Schema:
|
|
268
441
|
{
|
|
269
442
|
"textResponse": "The direct spoken dialogue in Chinese",
|
|
270
443
|
"stateUpdate": { "temperatureDelta": "+1 to -1", "userNickname": "What you now call the user", "agentNickname": "What the user calls you", "talkingStyle": "Current mood/style of talking" },
|
|
444
|
+
"triggerEvent": { "eventDescription": "Going to a cafe", "durationMins": 60, "outfitId": "optional wardrobe ID to change into if appropriate" },
|
|
271
445
|
${this.getImageSchemaParams()},
|
|
272
|
-
${this.
|
|
446
|
+
${this.getVoiceSchemaFromState(state)}
|
|
273
447
|
}
|
|
274
|
-
Note: If "imageParams", "voiceArgs", or "
|
|
448
|
+
Note: If "imageParams", "voiceArgs", "stateUpdate", or "triggerEvent" are not needed, set their values to null instead of omitting the keys completely (e.g., "imageParams": null). Output MUST be ONLY valid JSON with no markdown block wrappers. CRITICAL: Ensure your JSON has exactly one root object \`{\` and ends with exactly one \`}\` without any trailing garbage or extra brackets.`;
|
|
275
449
|
const promptMessages = [
|
|
276
450
|
{ role: "system", content: systemPrompt },
|
|
277
451
|
...(params.history || []),
|
|
@@ -283,7 +457,7 @@ Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their
|
|
|
283
457
|
];
|
|
284
458
|
// 3. Local Execute LLM
|
|
285
459
|
const rawLlmResponse = await this.llm.generate(promptMessages, 1500, 0.7);
|
|
286
|
-
console.
|
|
460
|
+
// console.debug("[CyberSoulClient] Raw LLM Response:", rawLlmResponse);
|
|
287
461
|
let parsedIntent;
|
|
288
462
|
try {
|
|
289
463
|
parsedIntent = robustJsonParse(rawLlmResponse, "Dispatcher fallback");
|
|
@@ -295,7 +469,7 @@ Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their
|
|
|
295
469
|
textResponse: rawLlmResponse.replace(/^[\`\s]+|[\`\s]+$/g, "").trim(),
|
|
296
470
|
};
|
|
297
471
|
}
|
|
298
|
-
console.
|
|
472
|
+
// console.debug("[CyberSoulClient] Parsed Intent:", parsedIntent);
|
|
299
473
|
// 4. Update Backend State async
|
|
300
474
|
if (parsedIntent && parsedIntent.stateUpdate) {
|
|
301
475
|
this._updateDynamicContextInternal(parsedIntent.stateUpdate);
|
|
@@ -309,6 +483,17 @@ Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their
|
|
|
309
483
|
let finalImageUrl = undefined;
|
|
310
484
|
let finalAudioUrl = undefined;
|
|
311
485
|
let finalDurationSec = undefined;
|
|
486
|
+
// Output Event Trigger
|
|
487
|
+
if (isAuto && parsedIntent.triggerEvent) {
|
|
488
|
+
mediaTasks.push(this.apiFetch("/api/v1/cyber-soul/characters/ondemand-event", {
|
|
489
|
+
method: "POST",
|
|
490
|
+
body: JSON.stringify({
|
|
491
|
+
eventDescription: parsedIntent.triggerEvent.eventDescription,
|
|
492
|
+
durationMins: parsedIntent.triggerEvent.durationMins || 60,
|
|
493
|
+
outfitId: parsedIntent.triggerEvent.outfitId || undefined,
|
|
494
|
+
}),
|
|
495
|
+
}).catch(e => console.error("[CyberSoulClient] Auto-triggered ondemandEvent failed:", e)));
|
|
496
|
+
}
|
|
312
497
|
const shouldGenerateImage = types.includes(InteractRequestType.IMAGE) ||
|
|
313
498
|
(isAuto && !!parsedIntent.imageParams);
|
|
314
499
|
if (shouldGenerateImage) {
|
|
@@ -319,9 +504,12 @@ Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their
|
|
|
319
504
|
const shouldGenerateVoice = types.includes(InteractRequestType.VOICE) ||
|
|
320
505
|
(isAuto && !!parsedIntent.voiceArgs);
|
|
321
506
|
if (shouldGenerateVoice) {
|
|
507
|
+
const normalizedVoiceArgs = parsedIntent.voiceArgs && typeof parsedIntent.voiceArgs === "object"
|
|
508
|
+
? parsedIntent.voiceArgs
|
|
509
|
+
: {};
|
|
322
510
|
mediaTasks.push(this.generatePrimitive("voice", {
|
|
323
511
|
text: parsedIntent.textResponse,
|
|
324
|
-
dynamicArgs:
|
|
512
|
+
dynamicArgs: normalizedVoiceArgs,
|
|
325
513
|
}).then((res) => {
|
|
326
514
|
finalAudioUrl = res.audio_url;
|
|
327
515
|
finalDurationSec = res.duration_sec;
|
|
@@ -335,6 +523,7 @@ Note: If "imageParams", "voiceArgs", or "stateUpdate" are not needed, set their
|
|
|
335
523
|
imageUrl: finalImageUrl,
|
|
336
524
|
audioUrl: finalAudioUrl,
|
|
337
525
|
durationSec: finalDurationSec,
|
|
526
|
+
triggeredEvent: parsedIntent.triggerEvent || undefined,
|
|
338
527
|
};
|
|
339
528
|
}
|
|
340
529
|
catch (error) {
|
|
@@ -28,7 +28,7 @@ export class MinimaxProvider {
|
|
|
28
28
|
throw new Error(`MiniMax API returned status: ${response.status}`);
|
|
29
29
|
}
|
|
30
30
|
const data = await response.json();
|
|
31
|
-
console.
|
|
31
|
+
// console.debug("[MinimaxProvider] API Response Payload:", data);
|
|
32
32
|
if (data?.base_resp?.status_code && data.base_resp.status_code !== 0) {
|
|
33
33
|
throw new Error(`MiniMax SDK Error [${data.base_resp.status_code}]: ${data.base_resp.status_msg}`);
|
|
34
34
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -24,24 +24,53 @@ export interface InteractParams {
|
|
|
24
24
|
}[];
|
|
25
25
|
onTextReady?: (textResponse: string) => void;
|
|
26
26
|
}
|
|
27
|
+
export interface OndemandEventParams {
|
|
28
|
+
eventDescription: string;
|
|
29
|
+
durationMins?: number;
|
|
30
|
+
interactParams?: InteractParams;
|
|
31
|
+
}
|
|
32
|
+
export interface OndemandEventResponse {
|
|
33
|
+
status: 'success' | 'error';
|
|
34
|
+
acceptEvent?: boolean;
|
|
35
|
+
reason?: string;
|
|
36
|
+
requiresOutfitChange?: boolean;
|
|
37
|
+
selectedOutfitId?: string;
|
|
38
|
+
error?: string;
|
|
39
|
+
}
|
|
40
|
+
export interface WardrobeItem {
|
|
41
|
+
id: string;
|
|
42
|
+
itemName: string;
|
|
43
|
+
category: string;
|
|
44
|
+
promptModifier: string;
|
|
45
|
+
}
|
|
27
46
|
export interface InteractResponse {
|
|
28
47
|
status: 'success' | 'error';
|
|
29
48
|
textResponse: string;
|
|
30
49
|
imageUrl?: string;
|
|
31
50
|
audioUrl?: string;
|
|
32
51
|
durationSec?: number;
|
|
52
|
+
triggeredEvent?: {
|
|
53
|
+
eventDescription: string;
|
|
54
|
+
durationMins?: number;
|
|
55
|
+
outfitId?: string | null;
|
|
56
|
+
};
|
|
33
57
|
error?: string;
|
|
34
58
|
}
|
|
35
59
|
export interface DispatcherIntent {
|
|
36
60
|
textResponse?: string;
|
|
37
61
|
imageParams?: any;
|
|
38
|
-
voiceArgs?:
|
|
62
|
+
voiceArgs?: VoiceArgs | null;
|
|
39
63
|
stateUpdate?: {
|
|
40
64
|
temperatureDelta?: string | number;
|
|
41
65
|
userNickname?: string;
|
|
42
66
|
agentNickname?: string;
|
|
43
67
|
talkingStyle?: string;
|
|
44
68
|
};
|
|
69
|
+
triggerEvent?: {
|
|
70
|
+
eventDescription: string;
|
|
71
|
+
durationMins?: number;
|
|
72
|
+
outfitId?: string | null;
|
|
73
|
+
} | null;
|
|
45
74
|
}
|
|
46
75
|
export interface CoreMemory {
|
|
47
76
|
relationshipStatus: string;
|
|
@@ -50,6 +79,32 @@ export interface CoreMemory {
|
|
|
50
79
|
keyEvents: string[];
|
|
51
80
|
appointments: string[];
|
|
52
81
|
}
|
|
82
|
+
/**
|
|
83
|
+
* Generic dynamic voice args returned by the LLM and forwarded to backend TTS.
|
|
84
|
+
*
|
|
85
|
+
* - T lets callers/project code narrow this to model-specific fields when needed.
|
|
86
|
+
* - Defaults to fully dynamic key/value pairs for provider-agnostic SDK behavior.
|
|
87
|
+
*/
|
|
88
|
+
export type VoiceArgs<T extends Record<string, unknown> = Record<string, unknown>> = T;
|
|
89
|
+
/**
|
|
90
|
+
* Optional compatibility shape for currently common fields.
|
|
91
|
+
* Not used as the SDK contract to avoid coupling to specific providers.
|
|
92
|
+
*/
|
|
93
|
+
export interface CommonVoiceArgs {
|
|
94
|
+
style_instruction?: string;
|
|
95
|
+
emotion?: string;
|
|
96
|
+
}
|
|
97
|
+
export interface VoiceModelState {
|
|
98
|
+
tts_provider?: string;
|
|
99
|
+
dynamic_param_prompt_template?: string;
|
|
100
|
+
dynamic_params?: Array<{
|
|
101
|
+
name: string;
|
|
102
|
+
description: string;
|
|
103
|
+
type: string;
|
|
104
|
+
required: boolean;
|
|
105
|
+
default?: unknown;
|
|
106
|
+
}>;
|
|
107
|
+
}
|
|
53
108
|
export interface CharacterState {
|
|
54
109
|
current_time: string;
|
|
55
110
|
active_event?: any;
|
|
@@ -57,6 +112,7 @@ export interface CharacterState {
|
|
|
57
112
|
active_wardrobe?: any;
|
|
58
113
|
core_memory?: CoreMemory;
|
|
59
114
|
dynamic_context?: any;
|
|
115
|
+
voice_model?: VoiceModelState | null;
|
|
60
116
|
relationship_stage?: string;
|
|
61
117
|
name?: string;
|
|
62
118
|
age?: number;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@space3-npm/cybersoul-client",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -28,6 +28,9 @@
|
|
|
28
28
|
"author": "Space3 Digital Media Tech Studio",
|
|
29
29
|
"license": "MIT",
|
|
30
30
|
"description": "Cyber-Soul multimodal character interaction SDK by Space3 Digital Media Tech Studio",
|
|
31
|
+
"engines": {
|
|
32
|
+
"node": ">=18.0.0"
|
|
33
|
+
},
|
|
31
34
|
"devDependencies": {
|
|
32
35
|
"@types/node": "^25.6.0",
|
|
33
36
|
"ts-node": "^10.9.2",
|