@prometheusavatar/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,349 @@
1
+ /**
2
+ * Shared types for the Prometheus Avatar SDK
3
+ */
4
+ /** Supported emotion types */
5
+ type Emotion = 'neutral' | 'happy' | 'sad' | 'angry' | 'surprised' | 'thinking';
6
+ /** Result from emotion analysis */
7
+ interface EmotionResult {
8
+ emotion: Emotion;
9
+ confidence: number;
10
+ triggers: string[];
11
+ }
12
+ /** TTS engine interface — implement this to plug in custom TTS providers */
13
+ interface ITTSEngine {
14
+ /** Speak the given text, return audio context for lip sync */
15
+ speak(text: string, options?: TTSOptions): Promise<void>;
16
+ /** Stop current speech */
17
+ stop(): void;
18
+ /** Whether the engine is currently speaking */
19
+ readonly isSpeaking: boolean;
20
+ /** Register callback for audio data (for lip sync) */
21
+ onAudioData?: (data: Float32Array) => void;
22
+ /** Register callback when speech ends */
23
+ onEnd?: () => void;
24
+ }
25
+ /** TTS configuration options */
26
+ interface TTSOptions {
27
+ voice?: string;
28
+ rate?: number;
29
+ pitch?: number;
30
+ volume?: number;
31
+ lang?: string;
32
+ }
33
+ /** A single frame of lip sync data */
34
+ interface LipSyncFrame {
35
+ mouthOpenY: number;
36
+ mouthForm: number;
37
+ timestamp: number;
38
+ }
39
+ /** Configuration for creating a Prometheus Avatar */
40
+ interface PrometheusConfig {
41
+ /** DOM element to mount the avatar canvas into */
42
+ container: HTMLElement;
43
+ /** URL to the Live2D model file (.model3.json) */
44
+ modelUrl: string;
45
+ /** Optional TTS engine (defaults to Web Speech API) */
46
+ ttsEngine?: ITTSEngine;
47
+ /** Canvas width (default: 800) */
48
+ width?: number;
49
+ /** Canvas height (default: 600) */
50
+ height?: number;
51
+ /** Background color (default: transparent) */
52
+ backgroundColor?: number;
53
+ /** TTS options */
54
+ ttsOptions?: TTSOptions;
55
+ /** Enable debug logging */
56
+ debug?: boolean;
57
+ }
58
+ /** Avatar creation options (subset of config with defaults) */
59
+ interface AvatarOptions extends Partial<PrometheusConfig> {
60
+ container: HTMLElement;
61
+ modelUrl: string;
62
+ }
63
+ /** Events emitted by PrometheusAvatar */
64
+ interface AvatarEventMap {
65
+ 'model:loaded': {
66
+ modelUrl: string;
67
+ };
68
+ 'model:error': {
69
+ error: Error;
70
+ modelUrl: string;
71
+ };
72
+ 'speech:start': {
73
+ text: string;
74
+ };
75
+ 'speech:end': {
76
+ text: string;
77
+ };
78
+ 'emotion:change': {
79
+ result: EmotionResult;
80
+ previous: Emotion;
81
+ };
82
+ 'lipsync:frame': {
83
+ frame: LipSyncFrame;
84
+ };
85
+ 'destroy': void;
86
+ }
87
+
88
+ /**
89
+ * PrometheusAvatar — Main orchestrator class
90
+ *
91
+ * Ties together renderer, TTS, lip sync, and emotion analysis into a
92
+ * unified API for driving a Live2D avatar from LLM output.
93
+ *
94
+ * Usage:
95
+ * const avatar = await createAvatar({
96
+ * container: document.getElementById('avatar'),
97
+ * modelUrl: '/models/haru/haru.model3.json',
98
+ * });
99
+ * await avatar.speak('Hello! How are you today? 😊');
100
+ */
101
+
102
+ type EventCallback<T> = (data: T) => void;
103
+ declare class PrometheusAvatar {
104
+ private renderer;
105
+ private tts;
106
+ private lipSync;
107
+ private emotion;
108
+ private currentEmotion;
109
+ private config;
110
+ private listeners;
111
+ private destroyed;
112
+ constructor(config: PrometheusConfig);
113
+ /**
114
+ * Initialize the avatar — must be called before speak/setEmotion
115
+ */
116
+ init(): Promise<void>;
117
+ /**
118
+ * Speak text through the avatar:
119
+ * 1. Analyze emotion from text
120
+ * 2. Set expression
121
+ * 3. Start TTS
122
+ * 4. Lip sync follows audio
123
+ */
124
+ speak(text: string): Promise<void>;
125
+ /**
126
+ * Process text from an LLM stream — updates emotion and lip sync
127
+ * without TTS (useful when TTS is handled externally)
128
+ */
129
+ processText(text: string): EmotionResult;
130
+ /**
131
+ * Directly set the avatar's emotion
132
+ */
133
+ setEmotion(emotion: Emotion): void;
134
+ /**
135
+ * Load a different avatar model
136
+ */
137
+ loadModel(modelUrl: string): Promise<void>;
138
+ /**
139
+ * Stop current speech and lip sync
140
+ */
141
+ stop(): void;
142
+ /**
143
+ * Resize the avatar canvas
144
+ */
145
+ resize(width: number, height: number): void;
146
+ /**
147
+ * Get current emotion
148
+ */
149
+ getEmotion(): Emotion;
150
+ /**
151
+ * Subscribe to events
152
+ */
153
+ on<K extends keyof AvatarEventMap>(event: K, callback: EventCallback<AvatarEventMap[K]>): () => void;
154
+ /**
155
+ * Destroy the avatar and release all resources
156
+ */
157
+ destroy(): void;
158
+ /**
159
+ * Emit an event to all listeners
160
+ */
161
+ private emit;
162
+ }
163
+ /**
164
+ * Factory function — convenient way to create and initialize a PrometheusAvatar
165
+ *
166
+ * @example
167
+ * ```ts
168
+ * const avatar = await createAvatar({
169
+ * container: document.getElementById('avatar')!,
170
+ * modelUrl: '/models/haru/haru.model3.json',
171
+ * });
172
+ * await avatar.speak('Hello world! 😊');
173
+ * ```
174
+ */
175
+ declare function createAvatar(options: AvatarOptions): Promise<PrometheusAvatar>;
176
+
177
+ /**
178
+ * Live2D Renderer
179
+ *
180
+ * Wraps pixi-live2d-display to load and control Live2D models.
181
+ * Handles model loading, expression parameters, motions, and layout.
182
+ */
183
+
184
+ type Live2DModel = any;
185
+ interface RendererOptions {
186
+ container: HTMLElement;
187
+ width?: number;
188
+ height?: number;
189
+ backgroundColor?: number;
190
+ debug?: boolean;
191
+ }
192
+ declare class Live2DRenderer {
193
+ private app;
194
+ private model;
195
+ private container;
196
+ private width;
197
+ private height;
198
+ private backgroundColor;
199
+ private initialized;
200
+ private debug;
201
+ constructor(options: RendererOptions);
202
+ /**
203
+ * Initialize the PixiJS application
204
+ */
205
+ init(): Promise<void>;
206
+ /**
207
+ * Load a Live2D model from URL
208
+ */
209
+ loadModel(modelUrl: string): Promise<void>;
210
+ /**
211
+ * Set mouth open parameter (for lip sync)
212
+ */
213
+ setMouthOpen(value: number): void;
214
+ /**
215
+ * Set emotion expression parameters
216
+ */
217
+ setEmotion(emotion: Emotion, transition?: boolean): void;
218
+ /**
219
+ * Set a single model parameter
220
+ */
221
+ setParam(paramId: string, value: number): void;
222
+ /**
223
+ * Get the current model instance
224
+ */
225
+ getModel(): Live2DModel | null;
226
+ /**
227
+ * Resize the renderer
228
+ */
229
+ resize(width: number, height: number): void;
230
+ /**
231
+ * Destroy the renderer and clean up resources
232
+ */
233
+ destroy(): void;
234
+ /**
235
+ * Fit model to canvas with proper scaling
236
+ */
237
+ private fitModel;
238
+ /**
239
+ * Smoothly transition parameters over time
240
+ */
241
+ private transitionParams;
242
+ /**
243
+ * Get current value of a parameter
244
+ */
245
+ private getParamValue;
246
+ }
247
+
248
+ /**
249
+ * TTS (Text-to-Speech) Engine
250
+ *
251
+ * Pluggable interface with Web Speech API as default implementation.
252
+ * Users can swap in ElevenLabs, Azure, or any custom TTS provider.
253
+ */
254
+
255
+ /**
256
+ * Default TTS engine using the Web Speech Synthesis API.
257
+ * Zero-config, works in all modern browsers.
258
+ */
259
+ declare class WebSpeechTTS implements ITTSEngine {
260
+ private synth;
261
+ private currentUtterance;
262
+ private _isSpeaking;
263
+ private defaultOptions;
264
+ /** Called with audio amplitude data for lip sync */
265
+ onAudioData?: (data: Float32Array) => void;
266
+ /** Called when speech ends */
267
+ onEnd?: () => void;
268
+ constructor(options?: TTSOptions);
269
+ get isSpeaking(): boolean;
270
+ /**
271
+ * Speak the given text using Web Speech API
272
+ */
273
+ speak(text: string, options?: TTSOptions): Promise<void>;
274
+ /**
275
+ * Stop current speech
276
+ */
277
+ stop(): void;
278
+ /**
279
+ * Get available voices
280
+ */
281
+ getVoices(): SpeechSynthesisVoice[];
282
+ /**
283
+ * Simulate lip sync data since Web Speech API doesn't expose raw audio.
284
+ * Uses text analysis to generate approximate mouth movements.
285
+ */
286
+ private simulateLipSyncData;
287
+ /**
288
+ * Map character to approximate mouth amplitude
289
+ */
290
+ private charToAmplitude;
291
+ }
292
+
293
+ /**
294
+ * Lip Sync Engine
295
+ *
296
+ * Drives Live2D mouth parameters from audio data or text analysis.
297
+ * Two modes:
298
+ * 1. Audio-driven: analyzes audio amplitude → mouth open/close
299
+ * 2. Text fallback: generates syllable-based mouth animation
300
+ */
301
+
302
+ declare class LipSyncEngine {
303
+ private animationFrame;
304
+ private currentMouthOpen;
305
+ private targetMouthOpen;
306
+ private smoothing;
307
+ private isActive;
308
+ /** Called each frame with updated lip sync data */
309
+ onFrame?: (frame: LipSyncFrame) => void;
310
+ constructor(options?: {
311
+ smoothing?: number;
312
+ });
313
+ /**
314
+ * Process audio amplitude data (called by TTS engine)
315
+ */
316
+ processAudioData(data: Float32Array): void;
317
+ /**
318
+ * Start text-based lip sync animation (fallback when no audio data)
319
+ */
320
+ startTextSync(text: string, durationMs?: number): void;
321
+ /**
322
+ * Stop lip sync animation
323
+ */
324
+ stop(): void;
325
+ /**
326
+ * Start the smooth animation loop
327
+ */
328
+ private startAnimation;
329
+ /**
330
+ * Convert text to syllable amplitude array
331
+ */
332
+ private textToSyllables;
333
+ }
334
+
335
+ /**
336
+ * Rule-based Emotion Analyzer
337
+ *
338
+ * Analyzes text for emotional cues using punctuation, emojis, and keywords.
339
+ * MVP approach — v2 will use LLM-based analysis.
340
+ */
341
+
342
+ declare class EmotionAnalyzer {
343
+ /**
344
+ * Analyze text and return the detected emotion
345
+ */
346
+ analyze(text: string): EmotionResult;
347
+ }
348
+
349
+ export { type AvatarEventMap, type AvatarOptions, type Emotion, EmotionAnalyzer, type EmotionResult, type ITTSEngine, LipSyncEngine, type LipSyncFrame, Live2DRenderer, PrometheusAvatar, type PrometheusConfig, type TTSOptions, WebSpeechTTS, createAvatar };
package/dist/index.js ADDED
@@ -0,0 +1,825 @@
1
+ // src/renderer.ts
2
+ import { Application } from "pixi.js";
3
+ var EMOTION_PARAMS = {
4
+ neutral: {
5
+ "ParamEyeLOpen": 1,
6
+ "ParamEyeROpen": 1,
7
+ "ParamBrowLY": 0,
8
+ "ParamBrowRY": 0,
9
+ "ParamMouthForm": 0
10
+ },
11
+ happy: {
12
+ "ParamEyeLOpen": 0.8,
13
+ "ParamEyeROpen": 0.8,
14
+ "ParamBrowLY": 0.3,
15
+ "ParamBrowRY": 0.3,
16
+ "ParamMouthForm": 1,
17
+ // smile
18
+ "ParamEyeLSmile": 1,
19
+ "ParamEyeRSmile": 1
20
+ },
21
+ sad: {
22
+ "ParamEyeLOpen": 0.6,
23
+ "ParamEyeROpen": 0.6,
24
+ "ParamBrowLY": -0.5,
25
+ "ParamBrowRY": -0.5,
26
+ "ParamMouthForm": -0.3
27
+ },
28
+ angry: {
29
+ "ParamEyeLOpen": 1.2,
30
+ "ParamEyeROpen": 1.2,
31
+ "ParamBrowLY": -1,
32
+ "ParamBrowRY": -1,
33
+ "ParamBrowLAngle": -1,
34
+ "ParamBrowRAngle": -1,
35
+ "ParamMouthForm": -0.5
36
+ },
37
+ surprised: {
38
+ "ParamEyeLOpen": 1.3,
39
+ "ParamEyeROpen": 1.3,
40
+ "ParamBrowLY": 1,
41
+ "ParamBrowRY": 1,
42
+ "ParamMouthOpenY": 0.8,
43
+ "ParamMouthForm": 0
44
+ },
45
+ thinking: {
46
+ "ParamEyeLOpen": 0.7,
47
+ "ParamEyeROpen": 0.9,
48
+ "ParamBrowLY": 0.3,
49
+ "ParamBrowRY": -0.2,
50
+ "ParamAngleX": 15,
51
+ "ParamMouthForm": 0
52
+ }
53
+ };
54
+ var Live2DRenderer = class {
55
+ constructor(options) {
56
+ this.app = null;
57
+ this.model = null;
58
+ this.initialized = false;
59
+ this.container = options.container;
60
+ this.width = options.width ?? 800;
61
+ this.height = options.height ?? 600;
62
+ this.backgroundColor = options.backgroundColor ?? 0;
63
+ this.debug = options.debug ?? false;
64
+ }
65
+ /**
66
+ * Initialize the PixiJS application
67
+ */
68
+ async init() {
69
+ if (this.initialized) return;
70
+ await import("pixi.js");
71
+ this.app = new Application({
72
+ width: this.width,
73
+ height: this.height,
74
+ backgroundAlpha: this.backgroundColor === 0 ? 0 : 1,
75
+ backgroundColor: this.backgroundColor,
76
+ antialias: true,
77
+ resolution: window.devicePixelRatio || 1,
78
+ autoDensity: true
79
+ });
80
+ this.container.appendChild(this.app.view);
81
+ this.initialized = true;
82
+ if (this.debug) {
83
+ console.log("[Prometheus] Renderer initialized", { width: this.width, height: this.height });
84
+ }
85
+ }
86
+ /**
87
+ * Load a Live2D model from URL
88
+ */
89
+ async loadModel(modelUrl) {
90
+ if (!this.app) {
91
+ await this.init();
92
+ }
93
+ try {
94
+ const { Live2DModel: L2DModel } = await import("pixi-live2d-display");
95
+ if (this.model) {
96
+ this.app.stage.removeChild(this.model);
97
+ this.model.destroy();
98
+ }
99
+ if (this.debug) {
100
+ console.log("[Prometheus] Loading model:", modelUrl);
101
+ }
102
+ this.model = await L2DModel.from(modelUrl);
103
+ this.fitModel();
104
+ this.app.stage.addChild(this.model);
105
+ this.model.motion?.("idle", 0, { loop: true });
106
+ if (this.debug) {
107
+ console.log("[Prometheus] Model loaded successfully");
108
+ }
109
+ } catch (error) {
110
+ console.error("[Prometheus] Failed to load model:", error);
111
+ throw error;
112
+ }
113
+ }
114
+ /**
115
+ * Set mouth open parameter (for lip sync)
116
+ */
117
+ setMouthOpen(value) {
118
+ if (!this.model) return;
119
+ this.setParam("ParamMouthOpenY", Math.max(0, Math.min(1, value)));
120
+ }
121
+ /**
122
+ * Set emotion expression parameters
123
+ */
124
+ setEmotion(emotion, transition = true) {
125
+ if (!this.model) return;
126
+ const params = EMOTION_PARAMS[emotion];
127
+ if (!params) return;
128
+ if (transition) {
129
+ this.transitionParams(params, 300);
130
+ } else {
131
+ for (const [param, value] of Object.entries(params)) {
132
+ this.setParam(param, value);
133
+ }
134
+ }
135
+ }
136
+ /**
137
+ * Set a single model parameter
138
+ */
139
+ setParam(paramId, value) {
140
+ if (!this.model?.internalModel?.coreModel) return;
141
+ try {
142
+ const coreModel = this.model.internalModel.coreModel;
143
+ const paramIndex = coreModel.getParameterIndex(paramId);
144
+ if (paramIndex >= 0) {
145
+ coreModel.setParameterValueById(paramId, value);
146
+ }
147
+ } catch {
148
+ }
149
+ }
150
+ /**
151
+ * Get the current model instance
152
+ */
153
+ getModel() {
154
+ return this.model;
155
+ }
156
+ /**
157
+ * Resize the renderer
158
+ */
159
+ resize(width, height) {
160
+ this.width = width;
161
+ this.height = height;
162
+ if (this.app) {
163
+ this.app.renderer.resize(width, height);
164
+ this.fitModel();
165
+ }
166
+ }
167
+ /**
168
+ * Destroy the renderer and clean up resources
169
+ */
170
+ destroy() {
171
+ if (this.model) {
172
+ this.model.destroy();
173
+ this.model = null;
174
+ }
175
+ if (this.app) {
176
+ this.app.destroy(true, { children: true, texture: true });
177
+ this.app = null;
178
+ }
179
+ this.initialized = false;
180
+ }
181
+ /**
182
+ * Fit model to canvas with proper scaling
183
+ */
184
+ fitModel() {
185
+ if (!this.model || !this.app) return;
186
+ const scaleX = this.width / this.model.width;
187
+ const scaleY = this.height / this.model.height;
188
+ const scale = Math.min(scaleX, scaleY) * 0.8;
189
+ this.model.scale.set(scale);
190
+ this.model.x = this.width / 2;
191
+ this.model.y = this.height / 2;
192
+ this.model.anchor?.set(0.5, 0.5);
193
+ }
194
+ /**
195
+ * Smoothly transition parameters over time
196
+ */
197
+ transitionParams(targetParams, durationMs) {
198
+ const startTime = performance.now();
199
+ const startParams = {};
200
+ for (const paramId of Object.keys(targetParams)) {
201
+ startParams[paramId] = this.getParamValue(paramId) ?? 0;
202
+ }
203
+ const animate = () => {
204
+ const elapsed = performance.now() - startTime;
205
+ const progress = Math.min(elapsed / durationMs, 1);
206
+ const eased = 1 - Math.pow(1 - progress, 3);
207
+ for (const [paramId, targetValue] of Object.entries(targetParams)) {
208
+ const startValue = startParams[paramId] ?? 0;
209
+ const currentValue = startValue + (targetValue - startValue) * eased;
210
+ this.setParam(paramId, currentValue);
211
+ }
212
+ if (progress < 1) {
213
+ requestAnimationFrame(animate);
214
+ }
215
+ };
216
+ requestAnimationFrame(animate);
217
+ }
218
+ /**
219
+ * Get current value of a parameter
220
+ */
221
+ getParamValue(paramId) {
222
+ if (!this.model?.internalModel?.coreModel) return null;
223
+ try {
224
+ const coreModel = this.model.internalModel.coreModel;
225
+ return coreModel.getParameterValueById(paramId);
226
+ } catch {
227
+ return null;
228
+ }
229
+ }
230
+ };
231
+
232
+ // src/tts.ts
233
+ var WebSpeechTTS = class {
234
+ constructor(options) {
235
+ this.currentUtterance = null;
236
+ this._isSpeaking = false;
237
+ this.synth = window.speechSynthesis;
238
+ this.defaultOptions = {
239
+ rate: 1,
240
+ pitch: 1,
241
+ volume: 1,
242
+ lang: "en-US",
243
+ ...options
244
+ };
245
+ }
246
+ get isSpeaking() {
247
+ return this._isSpeaking;
248
+ }
249
+ /**
250
+ * Speak the given text using Web Speech API
251
+ */
252
+ async speak(text, options) {
253
+ this.stop();
254
+ return new Promise((resolve, reject) => {
255
+ const utterance = new SpeechSynthesisUtterance(text);
256
+ const opts = { ...this.defaultOptions, ...options };
257
+ utterance.rate = opts.rate ?? 1;
258
+ utterance.pitch = opts.pitch ?? 1;
259
+ utterance.volume = opts.volume ?? 1;
260
+ utterance.lang = opts.lang ?? "en-US";
261
+ if (opts.voice) {
262
+ const voices = this.synth.getVoices();
263
+ const match = voices.find(
264
+ (v) => v.name === opts.voice || v.voiceURI === opts.voice
265
+ );
266
+ if (match) utterance.voice = match;
267
+ }
268
+ utterance.onstart = () => {
269
+ this._isSpeaking = true;
270
+ this.simulateLipSyncData(text);
271
+ };
272
+ utterance.onend = () => {
273
+ this._isSpeaking = false;
274
+ this.currentUtterance = null;
275
+ this.onEnd?.();
276
+ resolve();
277
+ };
278
+ utterance.onerror = (event) => {
279
+ this._isSpeaking = false;
280
+ this.currentUtterance = null;
281
+ if (event.error !== "canceled") {
282
+ reject(new Error(`TTS error: ${event.error}`));
283
+ } else {
284
+ resolve();
285
+ }
286
+ };
287
+ this.currentUtterance = utterance;
288
+ this.synth.speak(utterance);
289
+ });
290
+ }
291
+ /**
292
+ * Stop current speech
293
+ */
294
+ stop() {
295
+ if (this.synth.speaking) {
296
+ this.synth.cancel();
297
+ }
298
+ this._isSpeaking = false;
299
+ this.currentUtterance = null;
300
+ }
301
+ /**
302
+ * Get available voices
303
+ */
304
+ getVoices() {
305
+ return this.synth.getVoices();
306
+ }
307
+ /**
308
+ * Simulate lip sync data since Web Speech API doesn't expose raw audio.
309
+ * Uses text analysis to generate approximate mouth movements.
310
+ */
311
+ simulateLipSyncData(text) {
312
+ if (!this.onAudioData) return;
313
+ const words = text.split(/\s+/);
314
+ const avgWordDuration = 250;
315
+ let offset = 0;
316
+ for (const word of words) {
317
+ for (let i = 0; i < word.length; i++) {
318
+ const char = word[i].toLowerCase();
319
+ const amplitude = this.charToAmplitude(char);
320
+ const delay = offset + i * 60;
321
+ setTimeout(() => {
322
+ if (this._isSpeaking && this.onAudioData) {
323
+ const data = new Float32Array([amplitude]);
324
+ this.onAudioData(data);
325
+ }
326
+ }, delay);
327
+ }
328
+ offset += avgWordDuration;
329
+ }
330
+ }
331
+ /**
332
+ * Map character to approximate mouth amplitude
333
+ */
334
+ charToAmplitude(char) {
335
+ if ("aeiou".includes(char)) return 0.7 + Math.random() * 0.3;
336
+ if ("bmp".includes(char)) return 0.05;
337
+ if ("lnr".includes(char)) return 0.4 + Math.random() * 0.2;
338
+ if ("fvsz".includes(char)) return 0.3 + Math.random() * 0.2;
339
+ return 0.2 + Math.random() * 0.3;
340
+ }
341
+ };
342
+
343
+ // src/lip-sync.ts
344
+ var LipSyncEngine = class {
345
+ constructor(options) {
346
+ this.animationFrame = null;
347
+ this.currentMouthOpen = 0;
348
+ this.targetMouthOpen = 0;
349
+ this.smoothing = 0.3;
350
+ // lower = smoother
351
+ this.isActive = false;
352
+ if (options?.smoothing !== void 0) {
353
+ this.smoothing = Math.max(0.05, Math.min(1, options.smoothing));
354
+ }
355
+ }
356
+ /**
357
+ * Process audio amplitude data (called by TTS engine)
358
+ */
359
+ processAudioData(data) {
360
+ if (data.length === 0) return;
361
+ let sum = 0;
362
+ for (let i = 0; i < data.length; i++) {
363
+ sum += data[i] * data[i];
364
+ }
365
+ const rms = Math.sqrt(sum / data.length);
366
+ this.targetMouthOpen = Math.min(1, rms * 2);
367
+ if (!this.isActive) {
368
+ this.startAnimation();
369
+ }
370
+ }
371
+ /**
372
+ * Start text-based lip sync animation (fallback when no audio data)
373
+ */
374
+ startTextSync(text, durationMs) {
375
+ const duration = durationMs ?? text.length * 80;
376
+ const syllables = this.textToSyllables(text);
377
+ const syllableDuration = duration / syllables.length;
378
+ let index = 0;
379
+ this.isActive = true;
380
+ const step = () => {
381
+ if (index >= syllables.length || !this.isActive) {
382
+ this.targetMouthOpen = 0;
383
+ return;
384
+ }
385
+ this.targetMouthOpen = syllables[index];
386
+ index++;
387
+ setTimeout(step, syllableDuration);
388
+ };
389
+ step();
390
+ if (!this.animationFrame) {
391
+ this.startAnimation();
392
+ }
393
+ }
394
+ /**
395
+ * Stop lip sync animation
396
+ */
397
+ stop() {
398
+ this.isActive = false;
399
+ this.targetMouthOpen = 0;
400
+ this.currentMouthOpen = 0;
401
+ if (this.animationFrame !== null) {
402
+ cancelAnimationFrame(this.animationFrame);
403
+ this.animationFrame = null;
404
+ }
405
+ this.onFrame?.({
406
+ mouthOpenY: 0,
407
+ mouthForm: 0,
408
+ timestamp: performance.now()
409
+ });
410
+ }
411
+ /**
412
+ * Start the smooth animation loop
413
+ */
414
+ startAnimation() {
415
+ this.isActive = true;
416
+ const animate = () => {
417
+ this.currentMouthOpen += (this.targetMouthOpen - this.currentMouthOpen) * this.smoothing;
418
+ if (this.currentMouthOpen < 0.02) {
419
+ this.currentMouthOpen = 0;
420
+ }
421
+ this.onFrame?.({
422
+ mouthOpenY: this.currentMouthOpen,
423
+ mouthForm: 0,
424
+ // neutral form for now
425
+ timestamp: performance.now()
426
+ });
427
+ if (this.isActive || this.currentMouthOpen > 0.01) {
428
+ this.animationFrame = requestAnimationFrame(animate);
429
+ } else {
430
+ this.animationFrame = null;
431
+ }
432
+ };
433
+ this.animationFrame = requestAnimationFrame(animate);
434
+ }
435
+ /**
436
+ * Convert text to syllable amplitude array
437
+ */
438
+ textToSyllables(text) {
439
+ const syllables = [];
440
+ for (const char of text) {
441
+ const lower = char.toLowerCase();
442
+ if ("aeiou\xE1\xE9\xED\xF3\xFA".includes(lower)) {
443
+ syllables.push(0.6 + Math.random() * 0.4);
444
+ } else if ("bmp".includes(lower)) {
445
+ syllables.push(0.05);
446
+ } else if (" \n".includes(char)) {
447
+ syllables.push(0);
448
+ } else if (/[\u4e00-\u9fff]/.test(char)) {
449
+ syllables.push(0.5 + Math.random() * 0.3);
450
+ } else if (/[a-z]/.test(lower)) {
451
+ syllables.push(0.3 + Math.random() * 0.3);
452
+ } else {
453
+ syllables.push(0.05);
454
+ }
455
+ }
456
+ return syllables;
457
+ }
458
+ };
459
+
460
+ // src/emotion.ts
461
+ var EMOJI_MAP = {
462
+ "\u{1F60A}": "happy",
463
+ "\u{1F604}": "happy",
464
+ "\u{1F603}": "happy",
465
+ "\u{1F970}": "happy",
466
+ "\u2764\uFE0F": "happy",
467
+ "\u{1F602}": "happy",
468
+ "\u{1F923}": "happy",
469
+ "\u{1F601}": "happy",
470
+ "\u{1F495}": "happy",
471
+ "\u2728": "happy",
472
+ "\u{1F389}": "happy",
473
+ "\u{1F44D}": "happy",
474
+ "\u{1F60D}": "happy",
475
+ "\u{1F642}": "happy",
476
+ "\u263A\uFE0F": "happy",
477
+ "\u{1F622}": "sad",
478
+ "\u{1F62D}": "sad",
479
+ "\u{1F61E}": "sad",
480
+ "\u{1F614}": "sad",
481
+ "\u{1F494}": "sad",
482
+ "\u{1F97A}": "sad",
483
+ "\u{1F63F}": "sad",
484
+ "\u{1F61F}": "sad",
485
+ "\u{1F620}": "angry",
486
+ "\u{1F621}": "angry",
487
+ "\u{1F92C}": "angry",
488
+ "\u{1F4A2}": "angry",
489
+ "\u{1F44A}": "angry",
490
+ "\u{1F624}": "angry",
491
+ "\u{1F525}": "angry",
492
+ "\u{1F632}": "surprised",
493
+ "\u{1F62E}": "surprised",
494
+ "\u{1F92F}": "surprised",
495
+ "\u{1F631}": "surprised",
496
+ "\u2757": "surprised",
497
+ "\u2049\uFE0F": "surprised",
498
+ "\u203C\uFE0F": "surprised",
499
+ "\u{1F633}": "surprised",
500
+ "\u{1F914}": "thinking",
501
+ "\u{1F4AD}": "thinking",
502
+ "\u{1F9D0}": "thinking",
503
+ "\u{1F4DD}": "thinking"
504
+ };
505
+ var KEYWORD_MAP = {
506
+ // Happy
507
+ "happy": "happy",
508
+ "glad": "happy",
509
+ "great": "happy",
510
+ "awesome": "happy",
511
+ "wonderful": "happy",
512
+ "love": "happy",
513
+ "amazing": "happy",
514
+ "excellent": "happy",
515
+ "fantastic": "happy",
516
+ "beautiful": "happy",
517
+ "perfect": "happy",
518
+ "haha": "happy",
519
+ "lol": "happy",
520
+ "yay": "happy",
521
+ "hooray": "happy",
522
+ "congratulations": "happy",
523
+ "\u5F00\u5FC3": "happy",
524
+ "\u9AD8\u5174": "happy",
525
+ "\u592A\u597D\u4E86": "happy",
526
+ "\u68D2": "happy",
527
+ "\u54C8\u54C8": "happy",
528
+ // Sad
529
+ "sad": "sad",
530
+ "sorry": "sad",
531
+ "unfortunately": "sad",
532
+ "miss": "sad",
533
+ "disappointed": "sad",
534
+ "depressed": "sad",
535
+ "unhappy": "sad",
536
+ "cry": "sad",
537
+ "tragic": "sad",
538
+ "heartbroken": "sad",
539
+ "\u96BE\u8FC7": "sad",
540
+ "\u4F24\u5FC3": "sad",
541
+ "\u53EF\u60DC": "sad",
542
+ "\u9057\u61BE": "sad",
543
+ // Angry
544
+ "angry": "angry",
545
+ "furious": "angry",
546
+ "hate": "angry",
547
+ "terrible": "angry",
548
+ "awful": "angry",
549
+ "stupid": "angry",
550
+ "ridiculous": "angry",
551
+ "unacceptable": "angry",
552
+ "\u751F\u6C14": "angry",
553
+ "\u6124\u6012": "angry",
554
+ "\u8BA8\u538C": "angry",
555
+ // Surprised
556
+ "wow": "surprised",
557
+ "whoa": "surprised",
558
+ "omg": "surprised",
559
+ "incredible": "surprised",
560
+ "unbelievable": "surprised",
561
+ "shocking": "surprised",
562
+ "unexpected": "surprised",
563
+ "\u54C7": "surprised",
564
+ "\u5929\u54EA": "surprised",
565
+ "\u4E0D\u53EF\u601D\u8BAE": "surprised",
566
+ // Thinking
567
+ "think": "thinking",
568
+ "consider": "thinking",
569
+ "perhaps": "thinking",
570
+ "maybe": "thinking",
571
+ "hmm": "thinking",
572
+ "interesting": "thinking",
573
+ "wonder": "thinking",
574
+ "analyze": "thinking",
575
+ "\u60F3\u60F3": "thinking",
576
+ "\u4E5F\u8BB8": "thinking",
577
+ "\u6709\u610F\u601D": "thinking"
578
+ };
579
+ var EmotionAnalyzer = class {
580
+ /**
581
+ * Analyze text and return the detected emotion
582
+ */
583
+ analyze(text) {
584
+ const triggers = [];
585
+ const scores = {
586
+ neutral: 0.3,
587
+ // baseline — neutral wins when nothing else triggers
588
+ happy: 0,
589
+ sad: 0,
590
+ angry: 0,
591
+ surprised: 0,
592
+ thinking: 0
593
+ };
594
+ for (const [emoji, emotion] of Object.entries(EMOJI_MAP)) {
595
+ if (text.includes(emoji)) {
596
+ scores[emotion] += 0.5;
597
+ triggers.push(`emoji:${emoji}`);
598
+ }
599
+ }
600
+ const lowerText = text.toLowerCase();
601
+ for (const [keyword, emotion] of Object.entries(KEYWORD_MAP)) {
602
+ if (lowerText.includes(keyword)) {
603
+ scores[emotion] += 0.3;
604
+ triggers.push(`keyword:${keyword}`);
605
+ }
606
+ }
607
+ const exclamationCount = (text.match(/!/g) || []).length;
608
+ const questionCount = (text.match(/\?/g) || []).length;
609
+ const capsRatio = text.replace(/[^a-zA-Z]/g, "").length > 0 ? (text.match(/[A-Z]/g) || []).length / text.replace(/[^a-zA-Z]/g, "").length : 0;
610
+ if (exclamationCount >= 2) {
611
+ scores.surprised += 0.3;
612
+ triggers.push("punctuation:!!");
613
+ } else if (exclamationCount === 1) {
614
+ scores.happy += 0.15;
615
+ triggers.push("punctuation:!");
616
+ }
617
+ if (questionCount >= 1) {
618
+ scores.thinking += 0.2;
619
+ triggers.push("punctuation:?");
620
+ }
621
+ if (capsRatio > 0.6 && text.length > 5) {
622
+ scores.angry += 0.2;
623
+ scores.surprised += 0.15;
624
+ triggers.push("caps:high");
625
+ }
626
+ if (text.includes("...") || text.includes("\u2026")) {
627
+ scores.thinking += 0.15;
628
+ triggers.push("punctuation:...");
629
+ }
630
+ let maxEmotion = "neutral";
631
+ let maxScore = 0;
632
+ for (const [emotion, score] of Object.entries(scores)) {
633
+ if (score > maxScore) {
634
+ maxScore = score;
635
+ maxEmotion = emotion;
636
+ }
637
+ }
638
+ return {
639
+ emotion: maxEmotion,
640
+ confidence: Math.min(maxScore, 1),
641
+ triggers
642
+ };
643
+ }
644
+ };
645
+
646
+ // src/avatar.ts
647
+ var PrometheusAvatar = class {
648
+ constructor(config) {
649
+ this.currentEmotion = "neutral";
650
+ this.listeners = /* @__PURE__ */ new Map();
651
+ this.destroyed = false;
652
+ this.config = config;
653
+ this.renderer = new Live2DRenderer({
654
+ container: config.container,
655
+ width: config.width,
656
+ height: config.height,
657
+ backgroundColor: config.backgroundColor,
658
+ debug: config.debug
659
+ });
660
+ this.tts = config.ttsEngine ?? new WebSpeechTTS(config.ttsOptions);
661
+ this.lipSync = new LipSyncEngine();
662
+ this.emotion = new EmotionAnalyzer();
663
+ this.tts.onAudioData = (data) => {
664
+ this.lipSync.processAudioData(data);
665
+ };
666
+ this.lipSync.onFrame = (frame) => {
667
+ this.renderer.setMouthOpen(frame.mouthOpenY);
668
+ this.emit("lipsync:frame", { frame });
669
+ };
670
+ this.tts.onEnd = () => {
671
+ this.lipSync.stop();
672
+ };
673
+ }
674
+ /**
675
+ * Initialize the avatar — must be called before speak/setEmotion
676
+ */
677
+ async init() {
678
+ await this.renderer.init();
679
+ await this.renderer.loadModel(this.config.modelUrl);
680
+ this.emit("model:loaded", { modelUrl: this.config.modelUrl });
681
+ }
682
+ /**
683
+ * Speak text through the avatar:
684
+ * 1. Analyze emotion from text
685
+ * 2. Set expression
686
+ * 3. Start TTS
687
+ * 4. Lip sync follows audio
688
+ */
689
+ async speak(text) {
690
+ if (this.destroyed) return;
691
+ const result = this.emotion.analyze(text);
692
+ if (result.emotion !== this.currentEmotion) {
693
+ const previous = this.currentEmotion;
694
+ this.currentEmotion = result.emotion;
695
+ this.renderer.setEmotion(result.emotion);
696
+ this.emit("emotion:change", { result, previous });
697
+ }
698
+ this.emit("speech:start", { text });
699
+ try {
700
+ await this.tts.speak(text, this.config.ttsOptions);
701
+ } finally {
702
+ this.lipSync.stop();
703
+ this.emit("speech:end", { text });
704
+ }
705
+ }
706
+ /**
707
+ * Process text from an LLM stream — updates emotion and lip sync
708
+ * without TTS (useful when TTS is handled externally)
709
+ */
710
+ processText(text) {
711
+ if (this.destroyed) return { emotion: "neutral", confidence: 0, triggers: [] };
712
+ const result = this.emotion.analyze(text);
713
+ if (result.emotion !== this.currentEmotion) {
714
+ const previous = this.currentEmotion;
715
+ this.currentEmotion = result.emotion;
716
+ this.renderer.setEmotion(result.emotion);
717
+ this.emit("emotion:change", { result, previous });
718
+ }
719
+ this.lipSync.startTextSync(text);
720
+ return result;
721
+ }
722
+ /**
723
+ * Directly set the avatar's emotion
724
+ */
725
+ setEmotion(emotion) {
726
+ if (this.destroyed) return;
727
+ const previous = this.currentEmotion;
728
+ this.currentEmotion = emotion;
729
+ this.renderer.setEmotion(emotion);
730
+ this.emit("emotion:change", {
731
+ result: { emotion, confidence: 1, triggers: ["manual"] },
732
+ previous
733
+ });
734
+ }
735
+ /**
736
+ * Load a different avatar model
737
+ */
738
+ async loadModel(modelUrl) {
739
+ if (this.destroyed) return;
740
+ try {
741
+ await this.renderer.loadModel(modelUrl);
742
+ this.config.modelUrl = modelUrl;
743
+ this.currentEmotion = "neutral";
744
+ this.emit("model:loaded", { modelUrl });
745
+ } catch (error) {
746
+ this.emit("model:error", { error, modelUrl });
747
+ throw error;
748
+ }
749
+ }
750
+ /**
751
+ * Stop current speech and lip sync
752
+ */
753
+ stop() {
754
+ this.tts.stop();
755
+ this.lipSync.stop();
756
+ }
757
+ /**
758
+ * Resize the avatar canvas
759
+ */
760
+ resize(width, height) {
761
+ this.renderer.resize(width, height);
762
+ }
763
+ /**
764
+ * Get current emotion
765
+ */
766
+ getEmotion() {
767
+ return this.currentEmotion;
768
+ }
769
+ /**
770
+ * Subscribe to events
771
+ */
772
+ on(event, callback) {
773
+ const key = event;
774
+ if (!this.listeners.has(key)) {
775
+ this.listeners.set(key, /* @__PURE__ */ new Set());
776
+ }
777
+ this.listeners.get(key).add(callback);
778
+ return () => {
779
+ this.listeners.get(key)?.delete(callback);
780
+ };
781
+ }
782
+ /**
783
+ * Destroy the avatar and release all resources
784
+ */
785
+ destroy() {
786
+ this.destroyed = true;
787
+ this.stop();
788
+ this.renderer.destroy();
789
+ this.listeners.clear();
790
+ this.emit("destroy", void 0);
791
+ }
792
+ /**
793
+ * Emit an event to all listeners
794
+ */
795
+ emit(event, data) {
796
+ const key = event;
797
+ this.listeners.get(key)?.forEach((cb) => {
798
+ try {
799
+ cb(data);
800
+ } catch (error) {
801
+ console.error(`[Prometheus] Error in event handler for '${key}':`, error);
802
+ }
803
+ });
804
+ }
805
+ };
806
+ async function createAvatar(options) {
807
+ const config = {
808
+ width: 800,
809
+ height: 600,
810
+ backgroundColor: 0,
811
+ debug: false,
812
+ ...options
813
+ };
814
+ const avatar = new PrometheusAvatar(config);
815
+ await avatar.init();
816
+ return avatar;
817
+ }
818
+ export {
819
+ EmotionAnalyzer,
820
+ LipSyncEngine,
821
+ Live2DRenderer,
822
+ PrometheusAvatar,
823
+ WebSpeechTTS,
824
+ createAvatar
825
+ };
package/package.json ADDED
@@ -0,0 +1,50 @@
1
+ {
2
+ "name": "@prometheusavatar/core",
3
+ "version": "0.1.0",
4
+ "description": "Give your AI agent an embodied avatar — Live2D rendering, TTS, lip-sync, and emotion analysis in one SDK",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "module": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "import": "./dist/index.js",
12
+ "types": "./dist/index.d.ts"
13
+ }
14
+ },
15
+ "files": [
16
+ "dist",
17
+ "README.md"
18
+ ],
19
+ "scripts": {
20
+ "build": "tsup src/index.ts --format esm --dts --clean",
21
+ "dev": "tsup src/index.ts --format esm --dts --watch",
22
+ "clean": "rm -rf dist",
23
+ "lint": "tsc --noEmit"
24
+ },
25
+ "dependencies": {
26
+ "pixi.js": "^6.5.10",
27
+ "pixi-live2d-display": "^0.4.0"
28
+ },
29
+ "devDependencies": {
30
+ "tsup": "^8.0.0",
31
+ "typescript": "^5.4.0"
32
+ },
33
+ "keywords": [
34
+ "live2d",
35
+ "avatar",
36
+ "ai",
37
+ "llm",
38
+ "tts",
39
+ "lip-sync",
40
+ "embodied-intelligence",
41
+ "openclaw",
42
+ "vtuber"
43
+ ],
44
+ "license": "MIT",
45
+ "author": "Myths Labs (JC)",
46
+ "repository": {
47
+ "type": "git",
48
+ "url": "https://github.com/myths-labs/prometheus"
49
+ }
50
+ }