alive-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/Dockerfile +24 -0
  2. package/LICENSE +21 -0
  3. package/README.md +143 -0
  4. package/alive_ai/__init__.py +3 -0
  5. package/brain/__init__.py +59 -0
  6. package/brain/almost_said.py +154 -0
  7. package/brain/bid_detector.py +636 -0
  8. package/brain/conversation_flow.py +135 -0
  9. package/brain/curiosity.py +328 -0
  10. package/brain/default_mode.py +1438 -0
  11. package/brain/dreams.py +220 -0
  12. package/brain/embeddings/__init__.py +82 -0
  13. package/brain/emotional_memory.py +949 -0
  14. package/brain/global_activity.py +173 -0
  15. package/brain/group_dynamics.py +63 -0
  16. package/brain/linguistic.py +235 -0
  17. package/brain/llm/__init__.py +63 -0
  18. package/brain/llm/base.py +33 -0
  19. package/brain/llm/fallback_router.py +309 -0
  20. package/brain/llm/manifest.md +30 -0
  21. package/brain/llm/ollama.py +218 -0
  22. package/brain/llm/openrouter.py +151 -0
  23. package/brain/llm/provider.py +205 -0
  24. package/brain/llm/unified.py +423 -0
  25. package/brain/llm/zai.py +169 -0
  26. package/brain/manifest.md +23 -0
  27. package/brain/memory/__init__.py +123 -0
  28. package/brain/memory/episodic.py +92 -0
  29. package/brain/memory/fact_extractor.py +209 -0
  30. package/brain/memory/index.py +54 -0
  31. package/brain/memory/manager.py +151 -0
  32. package/brain/memory/summarizer.py +102 -0
  33. package/brain/memory/vector_store.py +297 -0
  34. package/brain/memory/working.py +43 -0
  35. package/brain/narrative.py +343 -0
  36. package/brain/stt/__init__.py +4 -0
  37. package/brain/stt/google_stt.py +83 -0
  38. package/brain/stt/whisper_stt.py +82 -0
  39. package/brain/subconscious/__init__.py +33 -0
  40. package/brain/subconscious/actions.py +136 -0
  41. package/brain/subconscious/evaluation.py +166 -0
  42. package/brain/subconscious/goal_system.py +90 -0
  43. package/brain/subconscious/goals.py +41 -0
  44. package/brain/subconscious/impulse_generator.py +200 -0
  45. package/brain/subconscious/impulses.py +48 -0
  46. package/brain/subconscious/learning.py +24 -0
  47. package/brain/subconscious/learning_system.py +79 -0
  48. package/brain/subconscious/loop.py +398 -0
  49. package/brain/subconscious/manifest.md +32 -0
  50. package/brain/subconscious/relationship.py +47 -0
  51. package/brain/subconscious/relationship_memory.py +83 -0
  52. package/brain/subconscious/response_analyzer.py +74 -0
  53. package/brain/subconscious/templates.py +70 -0
  54. package/brain/subconscious/thought.py +37 -0
  55. package/brain/subconscious/working_memory.py +97 -0
  56. package/cli/index.js +371 -0
  57. package/config/directives.example.json +28 -0
  58. package/config/instructions.example.md +16 -0
  59. package/config/self.example.json +74 -0
  60. package/config/settings.example.json +95 -0
  61. package/core/__init__.py +1 -0
  62. package/core/config.py +54 -0
  63. package/core/directives.py +198 -0
  64. package/core/events.py +50 -0
  65. package/core/follow_up.py +267 -0
  66. package/core/hot_reload.py +174 -0
  67. package/core/initialization.py +253 -0
  68. package/core/manifest.md +28 -0
  69. package/core/media_handler.py +241 -0
  70. package/core/memory_monitor.py +200 -0
  71. package/core/message_handler.py +1440 -0
  72. package/core/proactive_generator.py +277 -0
  73. package/core/self.py +188 -0
  74. package/core/settings.py +169 -0
  75. package/core/skills_registry.py +357 -0
  76. package/core/state.py +27 -0
  77. package/core/subconscious_bridge.py +93 -0
  78. package/core/thinking.py +175 -0
  79. package/core/user_manager.py +306 -0
  80. package/core/user_tracker.py +144 -0
  81. package/demo/index.html +144 -0
  82. package/docker-compose.yml +28 -0
  83. package/docs/assets/logo.svg +15 -0
  84. package/docs/index.html +355 -0
  85. package/heart/__init__.py +93 -0
  86. package/heart/afterglow.py +215 -0
  87. package/heart/attachment.py +186 -0
  88. package/heart/circadian.py +251 -0
  89. package/heart/complex_emotions.py +114 -0
  90. package/heart/conflicts.py +589 -0
  91. package/heart/core.py +387 -0
  92. package/heart/emotional_decay.py +59 -0
  93. package/heart/emotional_memory.py +261 -0
  94. package/heart/emotional_state.py +146 -0
  95. package/heart/emotional_variability.py +156 -0
  96. package/heart/hormonal.py +424 -0
  97. package/heart/inconsistency.py +1222 -0
  98. package/heart/integrity.py +469 -0
  99. package/heart/interoception.py +997 -0
  100. package/heart/love.py +120 -0
  101. package/heart/manifest.md +25 -0
  102. package/heart/mood_shifts.py +169 -0
  103. package/heart/phantom_somatic.py +259 -0
  104. package/heart/predictive.py +374 -0
  105. package/heart/scars.py +474 -0
  106. package/heart/somatic.py +482 -0
  107. package/heart/soul.py +633 -0
  108. package/heart/telemetry.py +942 -0
  109. package/heart/triggers.py +119 -0
  110. package/heart/unconscious.py +443 -0
  111. package/input/__init__.py +1 -0
  112. package/input/manifest.md +24 -0
  113. package/input/telegram/__init__.py +1 -0
  114. package/input/telegram/commands.py +762 -0
  115. package/input/telegram/listener.py +532 -0
  116. package/main.py +90 -0
  117. package/manifest.md +28 -0
  118. package/mypics/.gitkeep +1 -0
  119. package/myvids/.gitkeep +1 -0
  120. package/output/__init__.py +1 -0
  121. package/output/images/__init__.py +1 -0
  122. package/output/images/fal_gen.py +43 -0
  123. package/output/manifest.md +26 -0
  124. package/output/text/__init__.py +1 -0
  125. package/output/text/sender.py +22 -0
  126. package/output/voice/__init__.py +64 -0
  127. package/output/voice/google_tts.py +252 -0
  128. package/output/voice/gtts_tts.py +214 -0
  129. package/output/voice/vibe_tts.py +190 -0
  130. package/package.json +58 -0
  131. package/pyproject.toml +23 -0
  132. package/requirements.txt +21 -0
  133. package/skills/__init__.py +1 -0
  134. package/skills/anticipation_engine/__init__.py +8 -0
  135. package/skills/anticipation_engine/engine.py +618 -0
  136. package/skills/anticipation_engine/manifest.md +192 -0
  137. package/skills/calendar/__init__.py +1 -0
  138. package/skills/content_unlocks/__init__.py +8 -0
  139. package/skills/content_unlocks/manifest.md +231 -0
  140. package/skills/content_unlocks/unlocks.py +945 -0
  141. package/skills/exclusive_moments/__init__.py +8 -0
  142. package/skills/exclusive_moments/manifest.md +145 -0
  143. package/skills/exclusive_moments/moments.py +506 -0
  144. package/skills/intimacy_layers/__init__.py +8 -0
  145. package/skills/intimacy_layers/layers.py +703 -0
  146. package/skills/intimacy_layers/manifest.md +203 -0
  147. package/skills/manifest.md +67 -0
  148. package/skills/memory_callbacks/__init__.py +9 -0
  149. package/skills/memory_callbacks/callbacks.py +748 -0
  150. package/skills/memory_callbacks/manifest.md +170 -0
  151. package/skills/message_scheduler/__init__.py +19 -0
  152. package/skills/message_scheduler/manifest.md +107 -0
  153. package/skills/message_scheduler/scheduler.py +510 -0
  154. package/skills/photo_manager/__init__.py +1 -0
  155. package/skills/photo_manager/scanner.py +296 -0
  156. package/skills/relationship_milestones/__init__.py +8 -0
  157. package/skills/relationship_milestones/manifest.md +206 -0
  158. package/skills/relationship_milestones/tracker.py +494 -0
  159. package/skills/self_authorship/__init__.py +23 -0
  160. package/skills/self_authorship/author.py +331 -0
  161. package/skills/self_authorship/manifest.md +24 -0
  162. package/skills/video_manager/__init__.py +5 -0
  163. package/skills/video_manager/manifest.md +37 -0
  164. package/skills/video_manager/scanner.py +229 -0
  165. package/webui/__init__.py +3 -0
  166. package/webui/app.py +936 -0
  167. package/webui/bridge.py +366 -0
  168. package/webui/static/index.html +2070 -0
@@ -0,0 +1,43 @@
1
+ """
2
+ Output: Fal.ai Image Generator
3
+ Generate images using fal.ai API
4
+ """
5
+
6
+ import aiohttp
7
+
8
+ class FalImageGen:
9
+ """Image generator via fal.ai"""
10
+
11
+ def __init__(self, api_key: str):
12
+ self.api_key = api_key
13
+ self.base_url = "https://queue.fal.run/fal-ai/flux/schnell"
14
+
15
+ async def generate(self, prompt: str, size: str = "square_hd") -> str:
16
+ """Generate image, return URL"""
17
+ if not self.api_key:
18
+ print("[Fal.ai] No API key configured")
19
+ return ""
20
+
21
+ try:
22
+ async with aiohttp.ClientSession() as session:
23
+ async with session.post(
24
+ self.base_url,
25
+ headers={
26
+ "Authorization": f"Key {self.api_key}",
27
+ "Content-Type": "application/json"
28
+ },
29
+ json={
30
+ "prompt": prompt,
31
+ "image_size": size
32
+ },
33
+ timeout=aiohttp.ClientTimeout(total=60)
34
+ ) as resp:
35
+ if resp.status == 200:
36
+ result = await resp.json()
37
+ return result.get("images", [{}])[0].get("url", "")
38
+ else:
39
+ print(f"[Fal.ai] Error: {resp.status}")
40
+ return ""
41
+ except Exception as e:
42
+ print(f"[Fal.ai] Error: {e}")
43
+ return ""
@@ -0,0 +1,26 @@
1
+ # Output - Actions
2
+
3
+ How the AI responds to users.
4
+
5
+ ## Modules
6
+ - `text/` - Text message sender
7
+ - `voice/` - Voice synthesis via VibeTTS
8
+ - `vibe_tts.py` - TTS client with auto-splitting (5000 char limit)
9
+ - `sender.py` - Voice file sender
10
+ - `images/` - Image generation via Fal.ai
11
+ - `fal_gen.py` - Fal.ai API client
12
+
13
+ ## Voice (VibeTTS)
14
+ - Connects to VibeVoice server (VIBE_TTS_URL)
15
+ - Mood-based CFG scaling (high_desire=1.9, neutral=1.5)
16
+ - Auto-splits long texts at paragraph boundaries
17
+ - Output: OGG format for Telegram
18
+
19
+ ## Image Generation
20
+ - Fal.ai API for AI image generation
21
+ - Prompt enhancement based on mood/context
22
+
23
+ ## Integration Points
24
+ - Receives events: `send_text`, `send_voice_file`, `send_image`, `send_video`
25
+ - Voice triggered by: user request, high desire, is_high_desire state
26
+ - Images/videos selected based on arousal level and context
@@ -0,0 +1 @@
1
+ """Text output"""
@@ -0,0 +1,22 @@
1
+ """
2
+ Output: Text Sender
3
+ Send text messages
4
+ """
5
+
6
+ class TextSender:
7
+ """Text message sender"""
8
+
9
+ def __init__(self, nervous, config):
10
+ self.nervous = nervous
11
+ self.config = config
12
+
13
+ # Listen for send events
14
+ nervous.on("send_text", self._send)
15
+
16
+ async def _send(self, data: dict):
17
+ """Handle text send (actual sending done by TelegramListener)"""
18
+ text = data.get("text", "")
19
+ mood = data.get("mood", "neutral")
20
+
21
+ # Log the message
22
+ print(f"[Outgoing {mood}] {text[:100]}{'...' if len(text) > 100 else ''}")
@@ -0,0 +1,64 @@
1
+ """
2
+ Output: Voice TTS Factory
3
+ Creates the appropriate TTS provider based on settings
4
+ """
5
+
6
+ from typing import Optional
7
+
8
+
9
+ async def create_tts(provider: str = "vibe", **kwargs) -> Optional[object]:
10
+ """
11
+ Create a TTS instance based on provider setting.
12
+
13
+ Args:
14
+ provider: "vibe", "google", or "gtts"
15
+ **kwargs: Provider-specific arguments:
16
+ - vibe: url (required)
17
+ - google: api_key (optional)
18
+ - gtts: no arguments needed
19
+
20
+ Returns:
21
+ TTS instance or None if unavailable
22
+ """
23
+ provider = provider.lower()
24
+
25
+ if provider == "vibe" or provider == "vibevoice":
26
+ from .vibe_tts import VibeTTS
27
+ url = kwargs.get("url", "http://localhost:8080")
28
+ tts = VibeTTS(url)
29
+ if await tts.is_available():
30
+ print(f"[TTS] Connected to VibeVoice at {url}")
31
+ return tts
32
+ else:
33
+ print(f"[TTS] VibeVoice not available at {url}")
34
+ return None
35
+
36
+ elif provider == "google" or provider == "google-tts":
37
+ from .google_tts import GoogleTTS
38
+ api_key = kwargs.get("api_key")
39
+ tts = GoogleTTS(api_key)
40
+ if await tts.is_available():
41
+ print(f"[TTS] Connected to Google Cloud TTS")
42
+ return tts
43
+ else:
44
+ print(f"[TTS] Google Cloud TTS not available")
45
+ return None
46
+
47
+ elif provider == "gtts" or provider == "gtranslate":
48
+ from .gtts_tts import GTTS
49
+ tts = GTTS()
50
+ if await tts.is_available():
51
+ print(f"[TTS] Connected to gTTS (Google Translate - FREE)")
52
+ return tts
53
+ else:
54
+ print(f"[TTS] gTTS not available. Install with: pip install gtts")
55
+ return None
56
+
57
+ else:
58
+ print(f"[TTS] Unknown provider: {provider}")
59
+ return None
60
+
61
+
62
+ def get_available_providers() -> list:
63
+ """Get list of available TTS providers"""
64
+ return ["vibe", "google", "gtts"]
@@ -0,0 +1,252 @@
1
+ """
2
+ Output: Google Cloud TTS
3
+ Text-to-speech using Google Cloud Text-to-Speech API
4
+
5
+ Uses the free tier (up to 4M chars/month) with Emma voice
6
+ """
7
+
8
+ import aiohttp
9
+ import asyncio
10
+ import re
11
+ import tempfile
12
+ import subprocess
13
+ from pathlib import Path
14
+ from typing import Optional
15
+
16
+ VOICE_OUTPUT_PATH = "/tmp/alive_ai_voice.ogg"
17
+ MAX_CHARS = 5000 # Google TTS limit per request
18
+
19
+
20
+ class GoogleTTS:
21
+ """Text-to-speech via Google Cloud TTS API"""
22
+
23
+ # Available voices - Emma is the natural sounding one
24
+ AVAILABLE_VOICES = {
25
+ "emma": "en-US-Neural2-F", # Emma - natural female US
26
+ "emma-uk": "en-GB-Neural2-F", # UK female
27
+ "guy": "en-US-Neural2-D", # Male US
28
+ "guy-uk": "en-GB-Neural2-D", # Male UK
29
+ }
30
+
31
+ DEFAULT_VOICE = "emma"
32
+
33
+ # Speaking rates by mood (0.25 to 4.0, 1.0 is normal)
34
+ MOOD_RATES = {
35
+ "chill": 0.9, "neutral": 1.0, "happy": 1.05,
36
+ "flirty": 1.0, "excited": 1.1, "high_desire": 0.95, "intense": 1.0,
37
+ "sad": 0.9, "tired": 0.85
38
+ }
39
+
40
+ def __init__(self, api_key: str = None):
41
+ """
42
+ Initialize Google TTS.
43
+
44
+ Args:
45
+ api_key: Google Cloud API key (optional if using ADC)
46
+ """
47
+ self.api_key = api_key
48
+ self.base_url = "https://texttospeech.googleapis.com/v1"
49
+
50
+ def prepare_text(self, text: str) -> str:
51
+ """Clean text for TTS - removes formatting and EMOJIS"""
52
+ # Remove markdown formatting
53
+ text = text.replace("**", "").replace("__", "").replace("*", "")
54
+ text = text.replace("_", "").replace("~", "")
55
+ text = re.sub(r'\*[^*]+\*', '', text)
56
+ text = re.sub(r'\.{3,}', '...', text)
57
+ text = re.sub(r'!{2,}', '!', text)
58
+ text = re.sub(r'\?{2,}', '?', text)
59
+
60
+ # Remove ALL emojis - they break TTS
61
+ text = re.sub(r'[\U00010000-\U0010ffff]', '', text)
62
+ text = re.sub(r'[\U0001F600-\U0001F64F]', '', text) # emoticons
63
+ text = re.sub(r'[\U0001F300-\U0001F5FF]', '', text) # symbols & pictographs
64
+ text = re.sub(r'[\U0001F680-\U0001F6FF]', '', text) # transport & map
65
+ text = re.sub(r'[\U0001F700-\U0001F77F]', '', text) # alchemical
66
+ text = re.sub(r'[\U0001F780-\U0001F7FF]', '', text) # Geometric Shapes
67
+ text = re.sub(r'[\U0001F800-\U0001F8FF]', '', text) # Supplemental Arrows-C
68
+ text = re.sub(r'[\U0001F900-\U0001F9FF]', '', text) # Supplemental Symbols
69
+ text = re.sub(r'[\U0001FA00-\U0001FA6F]', '', text) # Chess Symbols
70
+ text = re.sub(r'[\U0001FA70-\U0001FAFF]', '', text) # Symbols Extended-A
71
+ text = re.sub(r'[\U00002702-\U000027B0]', '', text) # Dingbats
72
+ text = re.sub(r'[\U000024C2-\U0001F251]', '', text) # Enclosed characters
73
+ text = re.sub(r'[\U0001F1E0-\U0001F1FF]', '', text) # Flags
74
+
75
+ # Clean up extra spaces
76
+ text = re.sub(r'\s+', ' ', text)
77
+ return text.strip()
78
+
79
+ def split_text(self, text: str) -> list:
80
+ """Split long text at paragraph/sentence boundaries"""
81
+ if len(text) <= MAX_CHARS:
82
+ return [text]
83
+
84
+ parts = []
85
+ paragraphs = text.split('\n\n')
86
+ current = ""
87
+
88
+ for para in paragraphs:
89
+ if len(current) + len(para) + 2 <= MAX_CHARS:
90
+ current = current + "\n\n" + para if current else para
91
+ else:
92
+ if current:
93
+ parts.append(current)
94
+ # If single paragraph is too long, split by sentences
95
+ if len(para) > MAX_CHARS:
96
+ sentences = para.replace('. ', '.\n').split('\n')
97
+ chunk = ""
98
+ for s in sentences:
99
+ if len(chunk) + len(s) + 1 <= MAX_CHARS:
100
+ chunk = chunk + " " + s if chunk else s
101
+ else:
102
+ if chunk:
103
+ parts.append(chunk)
104
+ chunk = s
105
+ if chunk:
106
+ parts.append(chunk)
107
+ else:
108
+ current = para
109
+
110
+ if current:
111
+ parts.append(current)
112
+
113
+ return parts
114
+
115
+ def get_voice_id(self, voice: str) -> str:
116
+ """Get Google voice ID from friendly name"""
117
+ return self.AVAILABLE_VOICES.get(voice, self.AVAILABLE_VOICES[self.DEFAULT_VOICE])
118
+
119
+ def get_rate_for_mood(self, mood: str) -> float:
120
+ return self.MOOD_RATES.get(mood, 1.0)
121
+
122
+ async def generate(self, text: str, voice: str = None,
123
+ cfg: float = None, mood: str = "neutral") -> str:
124
+ """Generate audio using Google Cloud TTS"""
125
+ if voice is None:
126
+ voice = self.DEFAULT_VOICE
127
+
128
+ voice_id = self.get_voice_id(voice)
129
+ speaking_rate = self.get_rate_for_mood(mood)
130
+
131
+ text = self.prepare_text(text)
132
+ print(f"[GoogleTTS] Generating voice for {len(text)} chars with voice {voice_id}...")
133
+
134
+ # Split if needed
135
+ parts = self.split_text(text)
136
+ if len(parts) > 1:
137
+ print(f"[GoogleTTS] Split into {len(parts)} parts")
138
+
139
+ audio_parts = []
140
+ for i, part in enumerate(parts):
141
+ print(f"[GoogleTTS] Processing part {i+1}/{len(parts)} ({len(part)} chars)")
142
+ audio = await self._generate_single(part, voice_id, speaking_rate)
143
+ if audio:
144
+ audio_parts.append(audio)
145
+ else:
146
+ print(f"[GoogleTTS] Part {i+1} failed")
147
+
148
+ if not audio_parts:
149
+ return ""
150
+
151
+ # Combine all parts
152
+ if len(audio_parts) == 1:
153
+ Path(VOICE_OUTPUT_PATH).write_bytes(audio_parts[0])
154
+ else:
155
+ # Use ffmpeg to properly concatenate OGG files
156
+ temp_files = []
157
+ try:
158
+ for i, part in enumerate(audio_parts):
159
+ tf = tempfile.NamedTemporaryFile(suffix=".ogg", delete=False)
160
+ tf.write(part)
161
+ tf.close()
162
+ temp_files.append(tf.name)
163
+ list_file = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False)
164
+ for tf_name in temp_files:
165
+ list_file.write(f"file '{tf_name}'\n")
166
+ list_file.close()
167
+ subprocess.run(
168
+ ["ffmpeg", "-y", "-f", "concat", "-safe", "0",
169
+ "-i", list_file.name, "-c", "copy", VOICE_OUTPUT_PATH],
170
+ capture_output=True, timeout=30
171
+ )
172
+ Path(list_file.name).unlink(missing_ok=True)
173
+ except Exception as e:
174
+ print(f"[GoogleTTS] ffmpeg concat failed, using first part: {e}")
175
+ Path(VOICE_OUTPUT_PATH).write_bytes(audio_parts[0])
176
+ finally:
177
+ for tf_name in temp_files:
178
+ Path(tf_name).unlink(missing_ok=True)
179
+ print(f"[GoogleTTS] Generated audio file")
180
+ return VOICE_OUTPUT_PATH
181
+
182
+ async def _generate_single(self, text: str, voice_id: str, speaking_rate: float) -> bytes:
183
+ """Generate single audio part via Google Cloud TTS API"""
184
+ try:
185
+ # Build request URL
186
+ url = f"{self.base_url}/text:synthesize"
187
+ if self.api_key:
188
+ url += f"?key={self.api_key}"
189
+
190
+ # Request body
191
+ payload = {
192
+ "input": {"text": text},
193
+ "voice": {
194
+ "languageCode": "en-US",
195
+ "name": voice_id
196
+ },
197
+ "audioConfig": {
198
+ "audioEncoding": "OGG_OPUS",
199
+ "speakingRate": speaking_rate,
200
+ "pitch": 0.0
201
+ }
202
+ }
203
+
204
+ # Adjust language code based on voice
205
+ if "en-GB" in voice_id:
206
+ payload["voice"]["languageCode"] = "en-GB"
207
+
208
+ async with aiohttp.ClientSession() as session:
209
+ async with session.post(
210
+ url,
211
+ json=payload,
212
+ timeout=aiohttp.ClientTimeout(total=60)
213
+ ) as resp:
214
+ if resp.status == 200:
215
+ result = await resp.json()
216
+ # Response contains base64-encoded audio
217
+ import base64
218
+ audio_content = result.get("audioContent", "")
219
+ if audio_content:
220
+ return base64.b64decode(audio_content)
221
+ return b""
222
+ else:
223
+ error = await resp.text()
224
+ print(f"[GoogleTTS] Error {resp.status}: {error[:200]}")
225
+ return b""
226
+ except asyncio.TimeoutError:
227
+ print("[GoogleTTS] Timeout")
228
+ return b""
229
+ except Exception as e:
230
+ print(f"[GoogleTTS] Error: {e}")
231
+ return b""
232
+
233
+ async def is_available(self) -> bool:
234
+ """Check if Google TTS is available"""
235
+ # If we have an API key or ADC is configured, it should work
236
+ try:
237
+ # Try a minimal synthesis to check
238
+ url = f"{self.base_url}/text:synthesize"
239
+ if self.api_key:
240
+ url += f"?key={self.api_key}"
241
+
242
+ payload = {
243
+ "input": {"text": "test"},
244
+ "voice": {"languageCode": "en-US", "name": "en-US-Neural2-F"},
245
+ "audioConfig": {"audioEncoding": "OGG_OPUS"}
246
+ }
247
+
248
+ async with aiohttp.ClientSession() as session:
249
+ async with session.post(url, json=payload, timeout=10) as resp:
250
+ return resp.status == 200
251
+ except Exception:
252
+ return False
@@ -0,0 +1,214 @@
1
+ """
2
+ Output: gTTS (Google Translate TTS)
3
+ Free text-to-speech using Google Translate's TTS API
4
+ No API key required!
5
+ """
6
+
7
+ import asyncio
8
+ import re
9
+ import tempfile
10
+ import subprocess
11
+ from pathlib import Path
12
+ from typing import Optional
13
+ from concurrent.futures import ThreadPoolExecutor
14
+
15
+ VOICE_OUTPUT_PATH = "/tmp/alive_ai_voice.ogg"
16
+ MAX_CHARS = 5000 # gTTS limit
17
+
18
+ # Thread pool for blocking gTTS calls
19
+ _executor = ThreadPoolExecutor(max_workers=2)
20
+
21
+
22
+ class GTTS:
23
+ """Text-to-speech via gTTS (Google Translate TTS) - Completely free!"""
24
+
25
+ # Available voices are language codes
26
+ # gTTS doesn't have named voices like "Emma", just languages/accents
27
+ AVAILABLE_VOICES = {
28
+ "en": "en", # English (default)
29
+ "en-us": "en", # US English
30
+ "en-uk": "co.uk", # UK English
31
+ "en-au": "com.au", # Australian English
32
+ "en-in": "co.in", # Indian English
33
+ "it": "it", # Italian
34
+ "es": "es", # Spanish
35
+ "fr": "fr", # French
36
+ "de": "de", # German
37
+ "pt": "pt", # Portuguese
38
+ }
39
+
40
+ DEFAULT_VOICE = "en"
41
+ DEFAULT_LANG = "en"
42
+
43
+ def __init__(self):
44
+ """Initialize gTTS - no API key needed!"""
45
+ self._available = None
46
+
47
+ def prepare_text(self, text: str) -> str:
48
+ """Clean text for TTS"""
49
+ # Remove markdown formatting
50
+ text = text.replace("**", "").replace("__", "").replace("*", "")
51
+ text = text.replace("_", "").replace("~", "")
52
+ text = re.sub(r'\*[^*]+\*', '', text)
53
+ text = re.sub(r'\.{3,}', '...', text)
54
+ text = re.sub(r'!{2,}', '!', text)
55
+ text = re.sub(r'\?{2,}', '?', text)
56
+
57
+ # Remove ALL emojis
58
+ text = re.sub(r'[\U00010000-\U0010ffff]', '', text)
59
+ text = re.sub(r'[\U0001F600-\U0001F64F]', '', text)
60
+ text = re.sub(r'[\U0001F300-\U0001F5FF]', '', text)
61
+ text = re.sub(r'[\U0001F680-\U0001F6FF]', '', text)
62
+ text = re.sub(r'[\U00002702-\U000027B0]', '', text)
63
+ text = re.sub(r'[\U000024C2-\U0001F251]', '', text)
64
+ text = re.sub(r'[\U0001F1E0-\U0001F1FF]', '', text)
65
+
66
+ # Clean up extra spaces
67
+ text = re.sub(r'\s+', ' ', text)
68
+ return text.strip()
69
+
70
+ def split_text(self, text: str) -> list:
71
+ """Split long text at paragraph boundaries"""
72
+ if len(text) <= MAX_CHARS:
73
+ return [text]
74
+
75
+ parts = []
76
+ paragraphs = text.split('\n\n')
77
+ current = ""
78
+
79
+ for para in paragraphs:
80
+ if len(current) + len(para) + 2 <= MAX_CHARS:
81
+ current = current + "\n\n" + para if current else para
82
+ else:
83
+ if current:
84
+ parts.append(current)
85
+ if len(para) > MAX_CHARS:
86
+ sentences = para.replace('. ', '.\n').split('\n')
87
+ chunk = ""
88
+ for s in sentences:
89
+ if len(chunk) + len(s) + 1 <= MAX_CHARS:
90
+ chunk = chunk + " " + s if chunk else s
91
+ else:
92
+ if chunk:
93
+ parts.append(chunk)
94
+ chunk = s
95
+ if chunk:
96
+ parts.append(chunk)
97
+ else:
98
+ current = para
99
+
100
+ if current:
101
+ parts.append(current)
102
+
103
+ return parts
104
+
105
+ def _generate_blocking(self, text: str, lang: str) -> bytes:
106
+ """Generate audio in a blocking manner (runs in thread pool)"""
107
+ try:
108
+ from gtts import gTTS
109
+ import io
110
+
111
+ tts = gTTS(text=text, lang=lang, slow=False)
112
+ mp3_buffer = io.BytesIO()
113
+ tts.write_to_fp(mp3_buffer)
114
+ mp3_buffer.seek(0)
115
+
116
+ # Convert MP3 to OGG for Telegram
117
+ # If pydub is available, convert to OGG
118
+ try:
119
+ from pydub import AudioSegment
120
+ audio = AudioSegment.from_mp3(mp3_buffer)
121
+ ogg_buffer = io.BytesIO()
122
+ audio.export(ogg_buffer, format="ogg")
123
+ ogg_buffer.seek(0)
124
+ return ogg_buffer.read()
125
+ except ImportError:
126
+ # No pydub - return MP3, Telegram accepts it too
127
+ mp3_buffer.seek(0)
128
+ return mp3_buffer.read()
129
+
130
+ except ImportError:
131
+ print("[GTTS] gtts not installed. Run: pip install gtts")
132
+ return b""
133
+ except Exception as e:
134
+ print(f"[GTTS] Error: {e}")
135
+ return b""
136
+
137
+ async def generate(self, text: str, voice: str = None,
138
+ cfg: float = None, mood: str = "neutral") -> str:
139
+ """Generate audio using gTTS"""
140
+ if voice is None:
141
+ voice = self.DEFAULT_VOICE
142
+
143
+ # Map voice to language code
144
+ lang = self.AVAILABLE_VOICES.get(voice, self.DEFAULT_LANG)
145
+
146
+ text = self.prepare_text(text)
147
+ print(f"[GTTS] Generating voice for {len(text)} chars with lang={lang}...")
148
+
149
+ # Split if needed
150
+ parts = self.split_text(text)
151
+ if len(parts) > 1:
152
+ print(f"[GTTS] Split into {len(parts)} parts")
153
+
154
+ audio_parts = []
155
+ loop = asyncio.get_running_loop()
156
+
157
+ for i, part in enumerate(parts):
158
+ print(f"[GTTS] Processing part {i+1}/{len(parts)} ({len(part)} chars)")
159
+ audio = await loop.run_in_executor(_executor, self._generate_blocking, part, lang)
160
+ if audio:
161
+ audio_parts.append(audio)
162
+ else:
163
+ print(f"[GTTS] Part {i+1} failed")
164
+
165
+ if not audio_parts:
166
+ return ""
167
+
168
+ # Determine extension based on format
169
+ ext = ".ogg" if audio_parts[0][:4] == b'OggS' else ".mp3"
170
+ output_path = VOICE_OUTPUT_PATH.replace(".ogg", ext)
171
+
172
+ # Combine all parts
173
+ if len(audio_parts) == 1:
174
+ Path(output_path).write_bytes(audio_parts[0])
175
+ else:
176
+ # Use ffmpeg to properly concatenate audio files
177
+ temp_files = []
178
+ try:
179
+ for i, part in enumerate(audio_parts):
180
+ tf = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
181
+ tf.write(part)
182
+ tf.close()
183
+ temp_files.append(tf.name)
184
+ list_file = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False)
185
+ for tf_name in temp_files:
186
+ list_file.write(f"file '{tf_name}'\n")
187
+ list_file.close()
188
+ subprocess.run(
189
+ ["ffmpeg", "-y", "-f", "concat", "-safe", "0",
190
+ "-i", list_file.name, "-c", "copy", output_path],
191
+ capture_output=True, timeout=30
192
+ )
193
+ Path(list_file.name).unlink(missing_ok=True)
194
+ except Exception as e:
195
+ print(f"[GTTS] ffmpeg concat failed, using first part: {e}")
196
+ Path(output_path).write_bytes(audio_parts[0])
197
+ finally:
198
+ for tf_name in temp_files:
199
+ Path(tf_name).unlink(missing_ok=True)
200
+ print(f"[GTTS] Generated audio file")
201
+ return output_path
202
+
203
+ async def is_available(self) -> bool:
204
+ """Check if gTTS is available"""
205
+ if self._available is not None:
206
+ return self._available
207
+
208
+ try:
209
+ from gtts import gTTS
210
+ self._available = True
211
+ return True
212
+ except ImportError:
213
+ self._available = False
214
+ return False