@space3-npm/cybersoul-client 1.3.1 → 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.js CHANGED
@@ -175,7 +175,7 @@ Current time: ${new Date(currentTimeMs).toLocaleString("zh-CN", { timeZone: "Asi
175
175
  if (dyn.lastInteractionAt) {
176
176
  contextParts.push(`Last interaction at: ${new Date(dyn.lastInteractionAt).toLocaleString("zh-CN", { timeZone: "Asia/Shanghai" })}`);
177
177
  }
178
- const ongoingScene = this.normalizeOngoingSceneState(dyn.ongoingScene, state.active_wardrobe?.name || state.active_wardrobe?.id);
178
+ const ongoingScene = this.normalizeOngoingSceneState(dyn.ongoingScene, state.active_wardrobe?.itemName);
179
179
  if (ongoingScene) {
180
180
  const lastKnownSceneLine = `Last Known Scene: ${ongoingScene.scene} | Outfit: ${ongoingScene.outfit}`;
181
181
  let isOutdated = false;
@@ -200,7 +200,7 @@ Current time: ${new Date(currentTimeMs).toLocaleString("zh-CN", { timeZone: "Asi
200
200
  contextParts.push(`Next Event: ${state.next_event.title} at ${state.next_event.start_time} (in ${state.next_event.time_until_mins} mins)`);
201
201
  }
202
202
  if (state.active_wardrobe) {
203
- contextParts.push(`Wardrobe: ${state.active_wardrobe.name || state.active_wardrobe.id || "Current"}`);
203
+ contextParts.push(`Wardrobe: ${state.active_wardrobe.itemName || "Current"}`);
204
204
  }
205
205
  if (state.core_memory) {
206
206
  let memoryLines = ["[CORE MEMORY]"];
@@ -304,10 +304,10 @@ ${isProactive
304
304
  return `"imageParams": null`;
305
305
  return `"imageParams": {
306
306
  "mode": "structured | full-prompt (use 'full-prompt' for highly dynamic actions)",
307
- "full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH. CRITICAL: MUST use a strict first-person perspective exclusively from the USER's eyes. DO NOT describe the user (e.g., 'a man', 'the driver') as visible in the scene because the camera IS the user. Start with 'POV: '. Describe ONLY the character looking back at the camera and their immediate surroundings. MUST align precisely with the character's current Wardrobe and exposure state. Explicitly describe the character's exact clothing (or specify naked/half-naked if applicable). Ensure basic appearance (makeup, body shape, hair, facial features, etc.) aligns exactly with the character's foundational appearance profile.",
307
+ "full_prompt": "Use only if mode is full-prompt. Highly detailed visual description in ENGLISH. CRITICAL RULE FOR PERSPECTIVE: If you are physically separated from the user, simulate a selfie. However, absolutely DO NOT use the words 'selfie', 'phone', 'camera', 'lens', or 'holding' in this prompt (unless taking a mirror selfie). NEVER try to use negative prompting like 'no phone visible', as simply writing the word 'phone' forces image models to mistakenly draw a phone or phone border! Instead, achieve the natural selfie look using pure composition descriptions (e.g., 'intimate portrait looking directly at the viewer', 'high-angle portrait leaning forward', or 'wide portrait with one arm reaching out of the frame'). Vary the framing distance and angle to match the mood. If you are physically together with the user, the image MUST be a strict first-person perspective exclusively from the USER's eyes (start with 'POV: '). NEVER mix perspectives together. DO NOT describe the user (e.g., 'a man', 'the driver') as visible in the scene because the view IS the user. Describe ONLY the character looking back and their immediate surroundings. MUST align precisely with the character's current Wardrobe and exposure state. Explicitly describe the character's exact clothing (or specify naked/half-naked if applicable). Ensure basic appearance (makeup, body shape, hair, facial features, etc.) aligns exactly with the character's foundational appearance profile.",
308
308
  "expression": "seductive | cute | happy | sleepy | dazed | pleased | default (Strictly choose ONE from this exact list. DO NOT invent new words like 'shy'.)",
309
309
  "condition": "normal | sweaty | wet | messy | oily (Strictly choose ONE from this exact list.)",
310
- "view_angle": "front | side | high_angle | from_below | boyfriend_view | selfie | mirror (Strictly choose ONE from this exact list.)",
310
+ "view_angle": "front | side | high_angle | from_below | boyfriend_view | selfie | mirror (Strictly choose ONE from this exact list. Use 'selfie' if physically separated from the user, otherwise use POV angles like 'boyfriend_view' or 'front' if together.)",
311
311
  "exposure": "normal | cleavage | see_through | half_naked | naked | intimate (Strictly choose ONE from this exact list. Explicitly choose naked or half_naked if the active scene takes off outfit.)",
312
312
  "pose": "e.g., sitting on bed, leaning forward (ENGLISH ONLY)",
313
313
  "scene": "e.g., cozy bedroom, morning light (ENGLISH ONLY)",
@@ -424,7 +424,12 @@ ${isProactive
424
424
  modalitiesInstruction += `\n - ALWAYS set 'imageParams' to null. If the user explicitly asks for a picture, FIRMLY decline naturally in your 'textResponse' (e.g., say you absolutely cannot right now). NEVER pretend to send one, and NEVER give in no matter how many times they ask.`;
425
425
  }
426
426
  if (requestedOthers.includes(InteractRequestType.VOICE)) {
427
- modalitiesInstruction += `\n - Include 'voiceArgs' ONLY if the complicated tone/emotion is hard to express via pure text, or if the user explicitly requests to hear your voice. Otherwise, set it to null.`;
427
+ modalitiesInstruction += `\n - 'voiceArgs' should be used sparingly to act like a real human. Include it ONLY IF AT LEAST ONE of the following is true:
428
+ 1. The response is a long text that would be tedious to type out in real life.
429
+ 2. The user explicitly requests a voice message.
430
+ 3. Your current scheduled event or action makes texting inconvenient (e.g., driving, cooking, showering).
431
+ 4. You are experiencing complicated moods or emotions that are difficult to convey accurately via pure text.
432
+ Otherwise, ALWAYS set 'voiceArgs' to null.`;
428
433
  }
429
434
  else {
430
435
  modalitiesInstruction += `\n - ALWAYS set 'voiceArgs' to null.`;
@@ -529,7 +534,13 @@ Note: Always include "isEndTurn". If "imageParams", "voiceArgs", "triggerEvent",
529
534
  : params.userMessage;
530
535
  // Fire text ready callback if provided
531
536
  if (params.onTextReady && (resolvedTextResponse || parsedIntent.actionText)) {
532
- params.onTextReady(resolvedTextResponse, parsedIntent.actionText);
537
+ params.onTextReady(resolvedTextResponse, parsedIntent.actionText, {
538
+ stateUpdate: parsedIntent.stateUpdate,
539
+ userAnalysis: parsedIntent.userAnalysis,
540
+ isEndTurn: parsedIntent.isEndTurn,
541
+ triggerEvent: parsedIntent.triggerEvent,
542
+ likePreviousPicture: parsedIntent.likePreviousPicture,
543
+ });
533
544
  }
534
545
  // 5. Build Final Media Calls parallel
535
546
  const mediaTasks = [];
@@ -818,6 +829,20 @@ You MUST output ONLY a valid JSON object matching exactly this structure:
818
829
  if (parsedIntent.stateUpdate) {
819
830
  this._updateDynamicContextInternal(parsedIntent.stateUpdate).catch(e => console.error(e));
820
831
  }
832
+ const resolvedTextResponse = typeof parsedIntent.textResponse === "string" &&
833
+ parsedIntent.textResponse.trim().length > 0
834
+ ? parsedIntent.textResponse
835
+ : "...";
836
+ // Fire text ready callback if provided
837
+ if (params.onTextReady && (resolvedTextResponse || parsedIntent.actionText)) {
838
+ params.onTextReady(resolvedTextResponse, parsedIntent.actionText, {
839
+ stateUpdate: parsedIntent.stateUpdate,
840
+ userAnalysis: parsedIntent.userAnalysis,
841
+ isEndTurn: parsedIntent.isEndTurn,
842
+ triggerEvent: parsedIntent.triggerEvent,
843
+ likePreviousPicture: parsedIntent.likePreviousPicture,
844
+ });
845
+ }
821
846
  // Handle Optional Media (Image only for proactive to save compute normally, but you can extend)
822
847
  let finalImageUrl = undefined;
823
848
  if (requestedOthers.includes(InteractRequestType.IMAGE) || !!parsedIntent.imageParams) {
package/dist/types.d.ts CHANGED
@@ -24,11 +24,19 @@ export interface HistoryEntry {
24
24
  mediaHint?: string;
25
25
  isProactive?: boolean;
26
26
  }
27
+ export interface InteractMetadata {
28
+ stateUpdate?: DispatcherIntent["stateUpdate"];
29
+ userAnalysis?: DispatcherIntent["userAnalysis"];
30
+ isEndTurn?: boolean;
31
+ triggerEvent?: DispatcherIntent["triggerEvent"];
32
+ likePreviousPicture?: boolean;
33
+ }
27
34
  export interface ProactiveParams {
28
35
  history?: HistoryEntry[];
29
36
  maxUnreplied?: number;
30
37
  requestTypes?: InteractRequestType[];
31
38
  localContext?: string;
39
+ onTextReady?: (textResponse: string, actionText?: string, metadata?: InteractMetadata) => void;
32
40
  }
33
41
  export interface ProactiveResponse {
34
42
  status: "success" | "skipped" | "error";
@@ -45,7 +53,7 @@ export interface InteractParams {
45
53
  localContext?: string;
46
54
  requestTypes?: InteractRequestType[];
47
55
  history?: HistoryEntry[];
48
- onTextReady?: (textResponse: string, actionText?: string) => void;
56
+ onTextReady?: (textResponse: string, actionText?: string, metadata?: InteractMetadata) => void;
49
57
  }
50
58
  export interface OndemandEventParams {
51
59
  eventDescription: string;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@space3-npm/cybersoul-client",
3
- "version": "1.3.1",
3
+ "version": "1.3.3",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",