@space3-npm/cybersoul-client 1.2.4 → 1.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.js +70 -27
- package/package.json +1 -1
package/dist/client.js
CHANGED
|
@@ -142,15 +142,22 @@ export class CyberSoulClient {
|
|
|
142
142
|
}).catch((e) => console.error("Failed to update dynamic context", e)); // non-blocking error handler
|
|
143
143
|
}
|
|
144
144
|
normalizeRequestTypes(requestTypes) {
|
|
145
|
-
|
|
146
|
-
|
|
145
|
+
let normalized = requestTypes;
|
|
146
|
+
if (!normalized || normalized.length === 0) {
|
|
147
|
+
normalized = [InteractRequestType.AUTO, InteractRequestType.TEXT];
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
normalized = [...normalized];
|
|
151
|
+
}
|
|
152
|
+
if (!normalized.includes(InteractRequestType.TEXT)) {
|
|
153
|
+
normalized.push(InteractRequestType.TEXT);
|
|
147
154
|
}
|
|
148
155
|
const validRequestTypes = new Set(Object.values(InteractRequestType));
|
|
149
|
-
const invalidRequestTypes =
|
|
156
|
+
const invalidRequestTypes = normalized.filter((type) => !validRequestTypes.has(type));
|
|
150
157
|
if (invalidRequestTypes.length > 0) {
|
|
151
158
|
throw new Error(`Invalid requestTypes: ${invalidRequestTypes.join(", ")}. Allowed values: ${Object.values(InteractRequestType).join(", ")}`);
|
|
152
159
|
}
|
|
153
|
-
return
|
|
160
|
+
return normalized;
|
|
154
161
|
}
|
|
155
162
|
buildStateContextPrompt(state, localContext) {
|
|
156
163
|
const dyn = state.dynamic_context || {};
|
|
@@ -178,7 +185,7 @@ Current time: ${new Date(currentTimeMs).toLocaleString("zh-CN", { timeZone: "Asi
|
|
|
178
185
|
let isOutdated = false;
|
|
179
186
|
if (dyn.lastInteractionAt) {
|
|
180
187
|
const elapsedHours = (currentTimeMs - new Date(dyn.lastInteractionAt).getTime()) / (1000 * 60 * 60);
|
|
181
|
-
if (elapsedHours >
|
|
188
|
+
if (elapsedHours > 1) {
|
|
182
189
|
isOutdated = true;
|
|
183
190
|
contextParts.push(`${lastKnownSceneLine}\n[CRITICAL SCENE SHIFT]: It has been ${elapsedHours.toFixed(1)} hours since the last discussion. The 'Last Known Scene' is now strictly OUTDATED. You MUST abandon the previous scene context entirely and transition to a new scene appropriate for the 'Current time' and 'Active Event'. DO NOT continue the old actions or environment!`);
|
|
184
191
|
}
|
|
@@ -190,15 +197,15 @@ Current time: ${new Date(currentTimeMs).toLocaleString("zh-CN", { timeZone: "Asi
|
|
|
190
197
|
if (state.active_event) {
|
|
191
198
|
contextParts.push(`Active Event: ${state.active_event.title} (${state.active_event.narrative_context})`);
|
|
192
199
|
}
|
|
193
|
-
if (
|
|
200
|
+
/* if (localContext) {
|
|
201
|
+
contextParts.push(`Additional Context: ${localContext}`);
|
|
202
|
+
}
|
|
203
|
+
*/ if (state.next_event) {
|
|
194
204
|
contextParts.push(`Next Event: ${state.next_event.title} at ${state.next_event.start_time} (in ${state.next_event.time_until_mins} mins)`);
|
|
195
205
|
}
|
|
196
206
|
if (state.active_wardrobe) {
|
|
197
207
|
contextParts.push(`Wardrobe: ${state.active_wardrobe.name || state.active_wardrobe.id || "Current"}`);
|
|
198
208
|
}
|
|
199
|
-
if (localContext) {
|
|
200
|
-
contextParts.push(`Additional Context: ${localContext}`);
|
|
201
|
-
}
|
|
202
209
|
if (state.core_memory) {
|
|
203
210
|
let memoryLines = ["[CORE MEMORY]"];
|
|
204
211
|
const mem = state.core_memory;
|
|
@@ -292,7 +299,9 @@ ${scenarioContext}
|
|
|
292
299
|
}
|
|
293
300
|
return undefined;
|
|
294
301
|
}
|
|
295
|
-
getImageSchemaParams() {
|
|
302
|
+
getImageSchemaParams(allowed) {
|
|
303
|
+
if (!allowed)
|
|
304
|
+
return `"imageParams": null`;
|
|
296
305
|
return `"imageParams": {
|
|
297
306
|
"mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
|
|
298
307
|
"full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH. CRITICAL: MUST use a strict first-person perspective exclusively from the USER's eyes. DO NOT describe the user (e.g., 'a man', 'the driver') as visible in the scene because the camera IS the user. Start with 'POV: '. Describe ONLY the character looking back at the camera and their immediate surroundings. MUST align with the character's current Active exposure state or Wardrobe depends on the scene. Explicitly describe the character's exact clothing (or specify naked/half-naked if applicable).",
|
|
@@ -335,7 +344,9 @@ ${scenarioContext}
|
|
|
335
344
|
* Returns the JSON schema snippet for voiceArgs to embed in the LLM output schema.
|
|
336
345
|
* Built from dynamic_params when available, otherwise falls back to static defaults.
|
|
337
346
|
*/
|
|
338
|
-
getVoiceSchemaFromState(state) {
|
|
347
|
+
getVoiceSchemaFromState(state, allowed) {
|
|
348
|
+
if (!allowed)
|
|
349
|
+
return `"voiceArgs": null`;
|
|
339
350
|
const dynamicParams = state.voice_model?.dynamic_params;
|
|
340
351
|
if (dynamicParams && dynamicParams.length > 0) {
|
|
341
352
|
return this.buildVoiceSchemaFromDynamicParams(dynamicParams);
|
|
@@ -391,6 +402,43 @@ ${scenarioContext}
|
|
|
391
402
|
// 2. Build local Prompt
|
|
392
403
|
const types = this.normalizeRequestTypes(params.requestTypes);
|
|
393
404
|
const isAuto = types.includes(InteractRequestType.AUTO);
|
|
405
|
+
const requestedOthers = types.filter((t) => t !== InteractRequestType.AUTO && t !== InteractRequestType.TEXT);
|
|
406
|
+
let modalitiesInstruction = "";
|
|
407
|
+
if (isAuto) {
|
|
408
|
+
modalitiesInstruction = `Analyze the user's message and optionally decide to use allowed modalities: ${requestedOthers.join(", ") || "none"}.
|
|
409
|
+
- 'textResponse' is ALWAYS REQUIRED.
|
|
410
|
+
- The modalities you are ALLOWED to dynamically include: ${requestedOthers.length > 0 ? requestedOthers.join(", ") : "None (Only text is allowed)"}. Do not include other modalities.`;
|
|
411
|
+
if (requestedOthers.includes(InteractRequestType.IMAGE)) {
|
|
412
|
+
modalitiesInstruction += `\n - Include 'imageParams' for visual/photo requests or key visual moments during active events; explicitly describe current clothing/exposure in image fields.`;
|
|
413
|
+
}
|
|
414
|
+
else {
|
|
415
|
+
modalitiesInstruction += `\n - ALWAYS set 'imageParams' to null. If the user explicitly asks for a picture, FIRMLY decline naturally in your 'textResponse' (e.g., say you absolutely cannot right now). NEVER pretend to send one, and NEVER give in no matter how many times they ask.`;
|
|
416
|
+
}
|
|
417
|
+
if (requestedOthers.includes(InteractRequestType.VOICE)) {
|
|
418
|
+
modalitiesInstruction += `\n - Include 'voiceArgs' ONLY if the complicated tone/emotion is hard to express via pure text, or if the user explicitly requests to hear your voice. Otherwise, set it to null.`;
|
|
419
|
+
}
|
|
420
|
+
else {
|
|
421
|
+
modalitiesInstruction += `\n - ALWAYS set 'voiceArgs' to null.`;
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
else {
|
|
425
|
+
modalitiesInstruction = `You MUST return the requested modalities: ${requestedOthers.join(", ") || "only text"}.
|
|
426
|
+
- 'textResponse' is ALWAYS REQUIRED.`;
|
|
427
|
+
if (requestedOthers.includes(InteractRequestType.IMAGE)) {
|
|
428
|
+
modalitiesInstruction += `\n - 'imageParams' is REQUIRED. Include it and explicitly describe current clothing/exposure in image fields.`;
|
|
429
|
+
}
|
|
430
|
+
else {
|
|
431
|
+
modalitiesInstruction += `\n - ALWAYS set 'imageParams' to null. If the user explicitly asks for a picture, FIRMLY decline naturally in your 'textResponse' (e.g., say you absolutely cannot right now). NEVER pretend to send one, and NEVER give in no matter how many times they ask.`;
|
|
432
|
+
}
|
|
433
|
+
if (requestedOthers.includes(InteractRequestType.VOICE)) {
|
|
434
|
+
modalitiesInstruction += `\n - 'voiceArgs' is REQUIRED. Include it.`;
|
|
435
|
+
}
|
|
436
|
+
else {
|
|
437
|
+
modalitiesInstruction += `\n - ALWAYS set 'voiceArgs' to null.`;
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
modalitiesInstruction += `\n - Include 'triggerEvent' only if the VERY LAST USER MESSAGE proposes a new activity/hangout; ignore older history.
|
|
441
|
+
- Outfit acquisition (VERY LAST USER MESSAGE only): set giftOutfit for gift/buy/add-clothes intent; otherwise null. giftOutfit format: { "descriptionText": "short outfit description" }.`;
|
|
394
442
|
// Combine state info into a clean descriptive context
|
|
395
443
|
const systemPrompt = `${this.buildStateContextPrompt(state, params.localContext)}
|
|
396
444
|
Available Wardrobe Outfits (For event triggers):
|
|
@@ -398,14 +446,7 @@ ${availableOutfits}
|
|
|
398
446
|
|
|
399
447
|
The user has sent a message. You must evaluate the context and the user's message, and return a JSON object (no markdown formatting) that dictates the character's multi-modal response.
|
|
400
448
|
|
|
401
|
-
${
|
|
402
|
-
? `Analyze the user's message and decide response modalities (text, image, voice).
|
|
403
|
-
- Always include 'textResponse'.
|
|
404
|
-
- Include 'imageParams' for visual/photo requests or key visual moments during active events; explicitly describe current clothing/exposure in image fields.
|
|
405
|
-
- Include 'voiceArgs' ONLY if the complicated tone/emotion is hard to express via pure text, or if the user explicitly requests to hear your voice. Otherwise, set it to null.
|
|
406
|
-
- Include 'triggerEvent' only if the VERY LAST USER MESSAGE proposes a new activity/hangout; ignore older history.
|
|
407
|
-
- Outfit acquisition (VERY LAST USER MESSAGE only): set giftOutfit for gift/buy/add-clothes intent; otherwise null. giftOutfit format: { "descriptionText": "short outfit description" }.`
|
|
408
|
-
: `Requested types to fulfill: ${types.join(", ")}`}
|
|
449
|
+
${modalitiesInstruction}
|
|
409
450
|
Every turn adjusts trust: positive +1, negative -1, neutral 0. Always include 'stateUpdate' with integer 'temperatureDelta' (range guidance: 0 cold to 100 obsessive).
|
|
410
451
|
|
|
411
452
|
Always return 'stateUpdate.ongoingScene' as an object with both keys: { "scene": string, "outfit": string }.
|
|
@@ -414,6 +455,8 @@ For 'ongoingScene.outfit': decide based on the current active wardrobe by defaul
|
|
|
414
455
|
USER ANALYSIS WORKFLOW:
|
|
415
456
|
- Extract from VERY LAST USER MESSAGE only.
|
|
416
457
|
- Add only explicit new user facts from this turn (no inference).
|
|
458
|
+
- For 'preference', only capture explicit statements (e.g., "I like/love/dislike/hate...").
|
|
459
|
+
- For 'boundary', only capture explicit rejections or limitations (e.g., "Don't talk about X", "I won't do Y").
|
|
417
460
|
- Categories: 'realName', 'occupation', 'age', 'gender', 'hobby', 'trait', 'communicationStyle', 'boundary', 'preference'.
|
|
418
461
|
- Keep nicknames in stateUpdate; do not place them in newFactsLearned.
|
|
419
462
|
- If no new fact is explicit, set userAnalysis to null.
|
|
@@ -433,8 +476,8 @@ Output JSON Schema:
|
|
|
433
476
|
"triggerEvent": {
|
|
434
477
|
${this.getEventSchemaParams(state.dynamic_context?.userNickname)}
|
|
435
478
|
},
|
|
436
|
-
${this.getImageSchemaParams()},
|
|
437
|
-
${this.getVoiceSchemaFromState(state)}
|
|
479
|
+
${this.getImageSchemaParams(requestedOthers.includes(InteractRequestType.IMAGE))},
|
|
480
|
+
${this.getVoiceSchemaFromState(state, requestedOthers.includes(InteractRequestType.VOICE))}
|
|
438
481
|
}
|
|
439
482
|
Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent", "giftOutfit", or "userAnalysis" are not needed, set them to null. "stateUpdate" cannot be null. Return valid raw JSON only.`;
|
|
440
483
|
const transcript = this.buildHistoryTranscript(params.history, state);
|
|
@@ -500,8 +543,8 @@ Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent",
|
|
|
500
543
|
parsedIntent.giftOutfit.descriptionText.trim().length > 0) {
|
|
501
544
|
mediaTasks.push(this.giftOutfit(parsedIntent.giftOutfit.descriptionText.trim()).catch((e) => console.error("[CyberSoulClient] Auto giftOutfit failed:", e)));
|
|
502
545
|
}
|
|
503
|
-
const shouldGenerateImage = types.includes(InteractRequestType.IMAGE)
|
|
504
|
-
(isAuto
|
|
546
|
+
const shouldGenerateImage = types.includes(InteractRequestType.IMAGE) &&
|
|
547
|
+
(!isAuto || !!parsedIntent.imageParams);
|
|
505
548
|
if (shouldGenerateImage) {
|
|
506
549
|
const imagePayload = parsedIntent.imageParams && typeof parsedIntent.imageParams === "object"
|
|
507
550
|
? parsedIntent.imageParams
|
|
@@ -513,8 +556,8 @@ Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent",
|
|
|
513
556
|
finalImageUrl = res.image_url;
|
|
514
557
|
}).catch(e => console.error("[CyberSoulClient] Image generation failed:", e)));
|
|
515
558
|
}
|
|
516
|
-
const shouldGenerateVoice = types.includes(InteractRequestType.VOICE)
|
|
517
|
-
(isAuto
|
|
559
|
+
const shouldGenerateVoice = types.includes(InteractRequestType.VOICE) &&
|
|
560
|
+
(!isAuto || !!parsedIntent.voiceArgs);
|
|
518
561
|
if (shouldGenerateVoice) {
|
|
519
562
|
const normalizedVoiceArgs = parsedIntent.voiceArgs && typeof parsedIntent.voiceArgs === "object"
|
|
520
563
|
? parsedIntent.voiceArgs
|
|
@@ -655,7 +698,7 @@ CRITICAL: Output MUST be ONLY valid JSON with no markdown block wrappers. Do NOT
|
|
|
655
698
|
You are an AI image prompt director. Analyze the scene description according to the character's relationship stage and emotional inertia to determine the best image generation parameters.
|
|
656
699
|
Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly matching this schema:
|
|
657
700
|
{
|
|
658
|
-
${this.getImageSchemaParams()}
|
|
701
|
+
${this.getImageSchemaParams(true)}
|
|
659
702
|
}`;
|
|
660
703
|
const transcript = this.buildHistoryTranscript(params.interactParams?.history, state);
|
|
661
704
|
const promptMessages = [
|
|
@@ -690,7 +733,7 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
|
|
|
690
733
|
You are a voice acting director. ${this.getVoiceDirectorInstruction(state)}
|
|
691
734
|
Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly matching this schema:
|
|
692
735
|
{
|
|
693
|
-
${this.getVoiceSchemaFromState(state)}
|
|
736
|
+
${this.getVoiceSchemaFromState(state, true)}
|
|
694
737
|
}`;
|
|
695
738
|
const transcript = this.buildHistoryTranscript(params.interactParams?.history, state);
|
|
696
739
|
const promptMessages = [
|