@space3-npm/cybersoul-client 1.2.4 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/client.js +72 -28
  2. package/package.json +1 -1
package/dist/client.js CHANGED
@@ -142,15 +142,22 @@ export class CyberSoulClient {
142
142
  }).catch((e) => console.error("Failed to update dynamic context", e)); // non-blocking error handler
143
143
  }
144
144
  normalizeRequestTypes(requestTypes) {
145
- if (!requestTypes || requestTypes.length === 0) {
146
- return [InteractRequestType.AUTO];
145
+ let normalized = requestTypes;
146
+ if (!normalized || normalized.length === 0) {
147
+ normalized = [InteractRequestType.AUTO, InteractRequestType.TEXT];
148
+ }
149
+ else {
150
+ normalized = [...normalized];
151
+ }
152
+ if (!normalized.includes(InteractRequestType.TEXT)) {
153
+ normalized.push(InteractRequestType.TEXT);
147
154
  }
148
155
  const validRequestTypes = new Set(Object.values(InteractRequestType));
149
- const invalidRequestTypes = requestTypes.filter((type) => !validRequestTypes.has(type));
156
+ const invalidRequestTypes = normalized.filter((type) => !validRequestTypes.has(type));
150
157
  if (invalidRequestTypes.length > 0) {
151
158
  throw new Error(`Invalid requestTypes: ${invalidRequestTypes.join(", ")}. Allowed values: ${Object.values(InteractRequestType).join(", ")}`);
152
159
  }
153
- return requestTypes;
160
+ return normalized;
154
161
  }
155
162
  buildStateContextPrompt(state, localContext) {
156
163
  const dyn = state.dynamic_context || {};
@@ -178,7 +185,7 @@ Current time: ${new Date(currentTimeMs).toLocaleString("zh-CN", { timeZone: "Asi
178
185
  let isOutdated = false;
179
186
  if (dyn.lastInteractionAt) {
180
187
  const elapsedHours = (currentTimeMs - new Date(dyn.lastInteractionAt).getTime()) / (1000 * 60 * 60);
181
- if (elapsedHours > 2) {
188
+ if (elapsedHours > 1) {
182
189
  isOutdated = true;
183
190
  contextParts.push(`${lastKnownSceneLine}\n[CRITICAL SCENE SHIFT]: It has been ${elapsedHours.toFixed(1)} hours since the last discussion. The 'Last Known Scene' is now strictly OUTDATED. You MUST abandon the previous scene context entirely and transition to a new scene appropriate for the 'Current time' and 'Active Event'. DO NOT continue the old actions or environment!`);
184
191
  }
@@ -190,15 +197,15 @@ Current time: ${new Date(currentTimeMs).toLocaleString("zh-CN", { timeZone: "Asi
190
197
  if (state.active_event) {
191
198
  contextParts.push(`Active Event: ${state.active_event.title} (${state.active_event.narrative_context})`);
192
199
  }
193
- if (state.next_event) {
200
+ /* if (localContext) {
201
+ contextParts.push(`Additional Context: ${localContext}`);
202
+ }
203
+ */ if (state.next_event) {
194
204
  contextParts.push(`Next Event: ${state.next_event.title} at ${state.next_event.start_time} (in ${state.next_event.time_until_mins} mins)`);
195
205
  }
196
206
  if (state.active_wardrobe) {
197
207
  contextParts.push(`Wardrobe: ${state.active_wardrobe.name || state.active_wardrobe.id || "Current"}`);
198
208
  }
199
- if (localContext) {
200
- contextParts.push(`Additional Context: ${localContext}`);
201
- }
202
209
  if (state.core_memory) {
203
210
  let memoryLines = ["[CORE MEMORY]"];
204
211
  const mem = state.core_memory;
@@ -292,7 +299,9 @@ ${scenarioContext}
292
299
  }
293
300
  return undefined;
294
301
  }
295
- getImageSchemaParams() {
302
+ getImageSchemaParams(allowed) {
303
+ if (!allowed)
304
+ return `"imageParams": null`;
296
305
  return `"imageParams": {
297
306
  "mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
298
307
  "full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH. CRITICAL: MUST use a strict first-person perspective exclusively from the USER's eyes. DO NOT describe the user (e.g., 'a man', 'the driver') as visible in the scene because the camera IS the user. Start with 'POV: '. Describe ONLY the character looking back at the camera and their immediate surroundings. MUST align with the character's current Active exposure state or Wardrobe depends on the scene. Explicitly describe the character's exact clothing (or specify naked/half-naked if applicable).",
@@ -314,7 +323,7 @@ ${scenarioContext}
314
323
  "scheduledDateStr": "YYYY-MM-DD (Optional. If the user specifies a future date like 'tomorrow', 'Saturday', or 'next week', calculate the exact calendar date based on the 'Current time' provided in the context and output it here. Otherwise, return null)",
315
324
  "scheduledStartTimeStr": "HH:MM (Optional, 24-hour format if a specific time is agreed upon, e.g., '14:30', otherwise null)",
316
325
  "durationMins": 60,
317
- "outfitId": "optional wardrobe ID to change into if appropriate"`;
326
+ "outfitId": "optional wardrobe ID to change into if appropriate. MUST match the context of the event (e.g. SLEEPWEAR for bed, INTIMATE for romance, DAILY for going out)"`;
318
327
  }
319
328
  getVoiceSchemaParams() {
320
329
  // Only reached when no dynamic_params are configured on the voice model.
@@ -335,7 +344,9 @@ ${scenarioContext}
335
344
  * Returns the JSON schema snippet for voiceArgs to embed in the LLM output schema.
336
345
  * Built from dynamic_params when available, otherwise falls back to static defaults.
337
346
  */
338
- getVoiceSchemaFromState(state) {
347
+ getVoiceSchemaFromState(state, allowed) {
348
+ if (!allowed)
349
+ return `"voiceArgs": null`;
339
350
  const dynamicParams = state.voice_model?.dynamic_params;
340
351
  if (dynamicParams && dynamicParams.length > 0) {
341
352
  return this.buildVoiceSchemaFromDynamicParams(dynamicParams);
@@ -391,6 +402,43 @@ ${scenarioContext}
391
402
  // 2. Build local Prompt
392
403
  const types = this.normalizeRequestTypes(params.requestTypes);
393
404
  const isAuto = types.includes(InteractRequestType.AUTO);
405
+ const requestedOthers = types.filter((t) => t !== InteractRequestType.AUTO && t !== InteractRequestType.TEXT);
406
+ let modalitiesInstruction = "";
407
+ if (isAuto) {
408
+ modalitiesInstruction = `Analyze the user's message and optionally decide to use allowed modalities: ${requestedOthers.join(", ") || "none"}.
409
+ - 'textResponse' is ALWAYS REQUIRED.
410
+ - The modalities you are ALLOWED to dynamically include: ${requestedOthers.length > 0 ? requestedOthers.join(", ") : "None (Only text is allowed)"}. Do not include other modalities.`;
411
+ if (requestedOthers.includes(InteractRequestType.IMAGE)) {
412
+ modalitiesInstruction += `\n - Include 'imageParams' for visual/photo requests or key visual moments during active events; explicitly describe current clothing/exposure in image fields.`;
413
+ }
414
+ else {
415
+ modalitiesInstruction += `\n - ALWAYS set 'imageParams' to null. If the user explicitly asks for a picture, FIRMLY decline naturally in your 'textResponse' (e.g., say you absolutely cannot right now). NEVER pretend to send one, and NEVER give in no matter how many times they ask.`;
416
+ }
417
+ if (requestedOthers.includes(InteractRequestType.VOICE)) {
418
+ modalitiesInstruction += `\n - Include 'voiceArgs' ONLY if the complicated tone/emotion is hard to express via pure text, or if the user explicitly requests to hear your voice. Otherwise, set it to null.`;
419
+ }
420
+ else {
421
+ modalitiesInstruction += `\n - ALWAYS set 'voiceArgs' to null.`;
422
+ }
423
+ }
424
+ else {
425
+ modalitiesInstruction = `You MUST return the requested modalities: ${requestedOthers.join(", ") || "only text"}.
426
+ - 'textResponse' is ALWAYS REQUIRED.`;
427
+ if (requestedOthers.includes(InteractRequestType.IMAGE)) {
428
+ modalitiesInstruction += `\n - 'imageParams' is REQUIRED. Include it and explicitly describe current clothing/exposure in image fields.`;
429
+ }
430
+ else {
431
+ modalitiesInstruction += `\n - ALWAYS set 'imageParams' to null. If the user explicitly asks for a picture, FIRMLY decline naturally in your 'textResponse' (e.g., say you absolutely cannot right now). NEVER pretend to send one, and NEVER give in no matter how many times they ask.`;
432
+ }
433
+ if (requestedOthers.includes(InteractRequestType.VOICE)) {
434
+ modalitiesInstruction += `\n - 'voiceArgs' is REQUIRED. Include it.`;
435
+ }
436
+ else {
437
+ modalitiesInstruction += `\n - ALWAYS set 'voiceArgs' to null.`;
438
+ }
439
+ }
440
+ modalitiesInstruction += `\n - Include 'triggerEvent' only if the VERY LAST USER MESSAGE proposes a new activity/hangout; ignore older history.
441
+ - Outfit acquisition (VERY LAST USER MESSAGE only): set giftOutfit for gift/buy/add-clothes intent; otherwise null. giftOutfit format: { "descriptionText": "short outfit description" }.`;
394
442
  // Combine state info into a clean descriptive context
395
443
  const systemPrompt = `${this.buildStateContextPrompt(state, params.localContext)}
396
444
  Available Wardrobe Outfits (For event triggers):
@@ -398,14 +446,7 @@ ${availableOutfits}
398
446
 
399
447
  The user has sent a message. You must evaluate the context and the user's message, and return a JSON object (no markdown formatting) that dictates the character's multi-modal response.
400
448
 
401
- ${isAuto
402
- ? `Analyze the user's message and decide response modalities (text, image, voice).
403
- - Always include 'textResponse'.
404
- - Include 'imageParams' for visual/photo requests or key visual moments during active events; explicitly describe current clothing/exposure in image fields.
405
- - Include 'voiceArgs' ONLY if the complicated tone/emotion is hard to express via pure text, or if the user explicitly requests to hear your voice. Otherwise, set it to null.
406
- - Include 'triggerEvent' only if the VERY LAST USER MESSAGE proposes a new activity/hangout; ignore older history.
407
- - Outfit acquisition (VERY LAST USER MESSAGE only): set giftOutfit for gift/buy/add-clothes intent; otherwise null. giftOutfit format: { "descriptionText": "short outfit description" }.`
408
- : `Requested types to fulfill: ${types.join(", ")}`}
449
+ ${modalitiesInstruction}
409
450
  Every turn adjusts trust: positive +1, negative -1, neutral 0. Always include 'stateUpdate' with integer 'temperatureDelta' (range guidance: 0 cold to 100 obsessive).
410
451
 
411
452
  Always return 'stateUpdate.ongoingScene' as an object with both keys: { "scene": string, "outfit": string }.
@@ -414,6 +455,8 @@ For 'ongoingScene.outfit': decide based on the current active wardrobe by defaul
414
455
  USER ANALYSIS WORKFLOW:
415
456
  - Extract from VERY LAST USER MESSAGE only.
416
457
  - Add only explicit new user facts from this turn (no inference).
458
+ - For 'preference', only capture explicit statements (e.g., "I like/love/dislike/hate...").
459
+ - For 'boundary', only capture explicit rejections or limitations (e.g., "Don't talk about X", "I won't do Y").
417
460
  - Categories: 'realName', 'occupation', 'age', 'gender', 'hobby', 'trait', 'communicationStyle', 'boundary', 'preference'.
418
461
  - Keep nicknames in stateUpdate; do not place them in newFactsLearned.
419
462
  - If no new fact is explicit, set userAnalysis to null.
@@ -433,8 +476,8 @@ Output JSON Schema:
433
476
  "triggerEvent": {
434
477
  ${this.getEventSchemaParams(state.dynamic_context?.userNickname)}
435
478
  },
436
- ${this.getImageSchemaParams()},
437
- ${this.getVoiceSchemaFromState(state)}
479
+ ${this.getImageSchemaParams(requestedOthers.includes(InteractRequestType.IMAGE))},
480
+ ${this.getVoiceSchemaFromState(state, requestedOthers.includes(InteractRequestType.VOICE))}
438
481
  }
439
482
  Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent", "giftOutfit", or "userAnalysis" are not needed, set them to null. "stateUpdate" cannot be null. Return valid raw JSON only.`;
440
483
  const transcript = this.buildHistoryTranscript(params.history, state);
@@ -500,8 +543,8 @@ Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent",
500
543
  parsedIntent.giftOutfit.descriptionText.trim().length > 0) {
501
544
  mediaTasks.push(this.giftOutfit(parsedIntent.giftOutfit.descriptionText.trim()).catch((e) => console.error("[CyberSoulClient] Auto giftOutfit failed:", e)));
502
545
  }
503
- const shouldGenerateImage = types.includes(InteractRequestType.IMAGE) ||
504
- (isAuto && !!parsedIntent.imageParams);
546
+ const shouldGenerateImage = types.includes(InteractRequestType.IMAGE) &&
547
+ (!isAuto || !!parsedIntent.imageParams);
505
548
  if (shouldGenerateImage) {
506
549
  const imagePayload = parsedIntent.imageParams && typeof parsedIntent.imageParams === "object"
507
550
  ? parsedIntent.imageParams
@@ -513,8 +556,8 @@ Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent",
513
556
  finalImageUrl = res.image_url;
514
557
  }).catch(e => console.error("[CyberSoulClient] Image generation failed:", e)));
515
558
  }
516
- const shouldGenerateVoice = types.includes(InteractRequestType.VOICE) ||
517
- (isAuto && !!parsedIntent.voiceArgs);
559
+ const shouldGenerateVoice = types.includes(InteractRequestType.VOICE) &&
560
+ (!isAuto || !!parsedIntent.voiceArgs);
518
561
  if (shouldGenerateVoice) {
519
562
  const normalizedVoiceArgs = parsedIntent.voiceArgs && typeof parsedIntent.voiceArgs === "object"
520
563
  ? parsedIntent.voiceArgs
@@ -575,6 +618,7 @@ Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent",
575
618
  The user proposes a new event for you to participate in: "${params.eventDescription}".
576
619
  Evaluate this based on your current state and relationship stage.
577
620
  Decide if you will accept the event, and whether it requires changing your outfit.
621
+ When changing outfits, perfectly match the outfit to the event's activity, environment, and relationship stage. Consider the wardrobe category (e.g., DAILY, INTIMATE, SLEEPWEAR).
578
622
 
579
623
  Available Wardrobe Outfits:
580
624
  ${availableOutfits || "None available"}
@@ -655,7 +699,7 @@ CRITICAL: Output MUST be ONLY valid JSON with no markdown block wrappers. Do NOT
655
699
  You are an AI image prompt director. Analyze the scene description according to the character's relationship stage and emotional inertia to determine the best image generation parameters.
656
700
  Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly matching this schema:
657
701
  {
658
- ${this.getImageSchemaParams()}
702
+ ${this.getImageSchemaParams(true)}
659
703
  }`;
660
704
  const transcript = this.buildHistoryTranscript(params.interactParams?.history, state);
661
705
  const promptMessages = [
@@ -690,7 +734,7 @@ Output strictly valid JSON ONLY. No markdown, no conversational filler. Return e
690
734
  You are a voice acting director. ${this.getVoiceDirectorInstruction(state)}
691
735
  Output strictly valid JSON ONLY. No markdown, no conversational filler. Return exactly matching this schema:
692
736
  {
693
- ${this.getVoiceSchemaFromState(state)}
737
+ ${this.getVoiceSchemaFromState(state, true)}
694
738
  }`;
695
739
  const transcript = this.buildHistoryTranscript(params.interactParams?.history, state);
696
740
  const promptMessages = [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@space3-npm/cybersoul-client",
3
- "version": "1.2.4",
3
+ "version": "1.2.6",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",