alive-ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +24 -0
- package/LICENSE +21 -0
- package/README.md +143 -0
- package/alive_ai/__init__.py +3 -0
- package/brain/__init__.py +59 -0
- package/brain/almost_said.py +154 -0
- package/brain/bid_detector.py +636 -0
- package/brain/conversation_flow.py +135 -0
- package/brain/curiosity.py +328 -0
- package/brain/default_mode.py +1438 -0
- package/brain/dreams.py +220 -0
- package/brain/embeddings/__init__.py +82 -0
- package/brain/emotional_memory.py +949 -0
- package/brain/global_activity.py +173 -0
- package/brain/group_dynamics.py +63 -0
- package/brain/linguistic.py +235 -0
- package/brain/llm/__init__.py +63 -0
- package/brain/llm/base.py +33 -0
- package/brain/llm/fallback_router.py +309 -0
- package/brain/llm/manifest.md +30 -0
- package/brain/llm/ollama.py +218 -0
- package/brain/llm/openrouter.py +151 -0
- package/brain/llm/provider.py +205 -0
- package/brain/llm/unified.py +423 -0
- package/brain/llm/zai.py +169 -0
- package/brain/manifest.md +23 -0
- package/brain/memory/__init__.py +123 -0
- package/brain/memory/episodic.py +92 -0
- package/brain/memory/fact_extractor.py +209 -0
- package/brain/memory/index.py +54 -0
- package/brain/memory/manager.py +151 -0
- package/brain/memory/summarizer.py +102 -0
- package/brain/memory/vector_store.py +297 -0
- package/brain/memory/working.py +43 -0
- package/brain/narrative.py +343 -0
- package/brain/stt/__init__.py +4 -0
- package/brain/stt/google_stt.py +83 -0
- package/brain/stt/whisper_stt.py +82 -0
- package/brain/subconscious/__init__.py +33 -0
- package/brain/subconscious/actions.py +136 -0
- package/brain/subconscious/evaluation.py +166 -0
- package/brain/subconscious/goal_system.py +90 -0
- package/brain/subconscious/goals.py +41 -0
- package/brain/subconscious/impulse_generator.py +200 -0
- package/brain/subconscious/impulses.py +48 -0
- package/brain/subconscious/learning.py +24 -0
- package/brain/subconscious/learning_system.py +79 -0
- package/brain/subconscious/loop.py +398 -0
- package/brain/subconscious/manifest.md +32 -0
- package/brain/subconscious/relationship.py +47 -0
- package/brain/subconscious/relationship_memory.py +83 -0
- package/brain/subconscious/response_analyzer.py +74 -0
- package/brain/subconscious/templates.py +70 -0
- package/brain/subconscious/thought.py +37 -0
- package/brain/subconscious/working_memory.py +97 -0
- package/cli/index.js +371 -0
- package/config/directives.example.json +28 -0
- package/config/instructions.example.md +16 -0
- package/config/self.example.json +74 -0
- package/config/settings.example.json +95 -0
- package/core/__init__.py +1 -0
- package/core/config.py +54 -0
- package/core/directives.py +198 -0
- package/core/events.py +50 -0
- package/core/follow_up.py +267 -0
- package/core/hot_reload.py +174 -0
- package/core/initialization.py +253 -0
- package/core/manifest.md +28 -0
- package/core/media_handler.py +241 -0
- package/core/memory_monitor.py +200 -0
- package/core/message_handler.py +1440 -0
- package/core/proactive_generator.py +277 -0
- package/core/self.py +188 -0
- package/core/settings.py +169 -0
- package/core/skills_registry.py +357 -0
- package/core/state.py +27 -0
- package/core/subconscious_bridge.py +93 -0
- package/core/thinking.py +175 -0
- package/core/user_manager.py +306 -0
- package/core/user_tracker.py +144 -0
- package/demo/index.html +144 -0
- package/docker-compose.yml +28 -0
- package/docs/assets/logo.svg +15 -0
- package/docs/index.html +355 -0
- package/heart/__init__.py +93 -0
- package/heart/afterglow.py +215 -0
- package/heart/attachment.py +186 -0
- package/heart/circadian.py +251 -0
- package/heart/complex_emotions.py +114 -0
- package/heart/conflicts.py +589 -0
- package/heart/core.py +387 -0
- package/heart/emotional_decay.py +59 -0
- package/heart/emotional_memory.py +261 -0
- package/heart/emotional_state.py +146 -0
- package/heart/emotional_variability.py +156 -0
- package/heart/hormonal.py +424 -0
- package/heart/inconsistency.py +1222 -0
- package/heart/integrity.py +469 -0
- package/heart/interoception.py +997 -0
- package/heart/love.py +120 -0
- package/heart/manifest.md +25 -0
- package/heart/mood_shifts.py +169 -0
- package/heart/phantom_somatic.py +259 -0
- package/heart/predictive.py +374 -0
- package/heart/scars.py +474 -0
- package/heart/somatic.py +482 -0
- package/heart/soul.py +633 -0
- package/heart/telemetry.py +942 -0
- package/heart/triggers.py +119 -0
- package/heart/unconscious.py +443 -0
- package/input/__init__.py +1 -0
- package/input/manifest.md +24 -0
- package/input/telegram/__init__.py +1 -0
- package/input/telegram/commands.py +762 -0
- package/input/telegram/listener.py +532 -0
- package/main.py +90 -0
- package/manifest.md +28 -0
- package/mypics/.gitkeep +1 -0
- package/myvids/.gitkeep +1 -0
- package/output/__init__.py +1 -0
- package/output/images/__init__.py +1 -0
- package/output/images/fal_gen.py +43 -0
- package/output/manifest.md +26 -0
- package/output/text/__init__.py +1 -0
- package/output/text/sender.py +22 -0
- package/output/voice/__init__.py +64 -0
- package/output/voice/google_tts.py +252 -0
- package/output/voice/gtts_tts.py +214 -0
- package/output/voice/vibe_tts.py +190 -0
- package/package.json +58 -0
- package/pyproject.toml +23 -0
- package/requirements.txt +21 -0
- package/skills/__init__.py +1 -0
- package/skills/anticipation_engine/__init__.py +8 -0
- package/skills/anticipation_engine/engine.py +618 -0
- package/skills/anticipation_engine/manifest.md +192 -0
- package/skills/calendar/__init__.py +1 -0
- package/skills/content_unlocks/__init__.py +8 -0
- package/skills/content_unlocks/manifest.md +231 -0
- package/skills/content_unlocks/unlocks.py +945 -0
- package/skills/exclusive_moments/__init__.py +8 -0
- package/skills/exclusive_moments/manifest.md +145 -0
- package/skills/exclusive_moments/moments.py +506 -0
- package/skills/intimacy_layers/__init__.py +8 -0
- package/skills/intimacy_layers/layers.py +703 -0
- package/skills/intimacy_layers/manifest.md +203 -0
- package/skills/manifest.md +67 -0
- package/skills/memory_callbacks/__init__.py +9 -0
- package/skills/memory_callbacks/callbacks.py +748 -0
- package/skills/memory_callbacks/manifest.md +170 -0
- package/skills/message_scheduler/__init__.py +19 -0
- package/skills/message_scheduler/manifest.md +107 -0
- package/skills/message_scheduler/scheduler.py +510 -0
- package/skills/photo_manager/__init__.py +1 -0
- package/skills/photo_manager/scanner.py +296 -0
- package/skills/relationship_milestones/__init__.py +8 -0
- package/skills/relationship_milestones/manifest.md +206 -0
- package/skills/relationship_milestones/tracker.py +494 -0
- package/skills/self_authorship/__init__.py +23 -0
- package/skills/self_authorship/author.py +331 -0
- package/skills/self_authorship/manifest.md +24 -0
- package/skills/video_manager/__init__.py +5 -0
- package/skills/video_manager/manifest.md +37 -0
- package/skills/video_manager/scanner.py +229 -0
- package/webui/__init__.py +3 -0
- package/webui/app.py +936 -0
- package/webui/bridge.py +366 -0
- package/webui/static/index.html +2070 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Output: Fal.ai Image Generator
|
|
3
|
+
Generate images using fal.ai API
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import aiohttp
|
|
7
|
+
|
|
8
|
+
class FalImageGen:
|
|
9
|
+
"""Image generator via fal.ai"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, api_key: str):
|
|
12
|
+
self.api_key = api_key
|
|
13
|
+
self.base_url = "https://queue.fal.run/fal-ai/flux/schnell"
|
|
14
|
+
|
|
15
|
+
async def generate(self, prompt: str, size: str = "square_hd") -> str:
|
|
16
|
+
"""Generate image, return URL"""
|
|
17
|
+
if not self.api_key:
|
|
18
|
+
print("[Fal.ai] No API key configured")
|
|
19
|
+
return ""
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
async with aiohttp.ClientSession() as session:
|
|
23
|
+
async with session.post(
|
|
24
|
+
self.base_url,
|
|
25
|
+
headers={
|
|
26
|
+
"Authorization": f"Key {self.api_key}",
|
|
27
|
+
"Content-Type": "application/json"
|
|
28
|
+
},
|
|
29
|
+
json={
|
|
30
|
+
"prompt": prompt,
|
|
31
|
+
"image_size": size
|
|
32
|
+
},
|
|
33
|
+
timeout=aiohttp.ClientTimeout(total=60)
|
|
34
|
+
) as resp:
|
|
35
|
+
if resp.status == 200:
|
|
36
|
+
result = await resp.json()
|
|
37
|
+
return result.get("images", [{}])[0].get("url", "")
|
|
38
|
+
else:
|
|
39
|
+
print(f"[Fal.ai] Error: {resp.status}")
|
|
40
|
+
return ""
|
|
41
|
+
except Exception as e:
|
|
42
|
+
print(f"[Fal.ai] Error: {e}")
|
|
43
|
+
return ""
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Output - Actions
|
|
2
|
+
|
|
3
|
+
How the AI responds to users.
|
|
4
|
+
|
|
5
|
+
## Modules
|
|
6
|
+
- `text/` - Text message sender
|
|
7
|
+
- `voice/` - Voice synthesis via VibeTTS
|
|
8
|
+
- `vibe_tts.py` - TTS client with auto-splitting (5000 char limit)
|
|
9
|
+
- `sender.py` - Voice file sender
|
|
10
|
+
- `images/` - Image generation via Fal.ai
|
|
11
|
+
- `fal_gen.py` - Fal.ai API client
|
|
12
|
+
|
|
13
|
+
## Voice (VibeTTS)
|
|
14
|
+
- Connects to VibeVoice server (VIBE_TTS_URL)
|
|
15
|
+
- Mood-based CFG scaling (high_desire=1.9, neutral=1.5)
|
|
16
|
+
- Auto-splits long texts at paragraph boundaries
|
|
17
|
+
- Output: OGG format for Telegram
|
|
18
|
+
|
|
19
|
+
## Image Generation
|
|
20
|
+
- Fal.ai API for AI image generation
|
|
21
|
+
- Prompt enhancement based on mood/context
|
|
22
|
+
|
|
23
|
+
## Integration Points
|
|
24
|
+
- Receives events: `send_text`, `send_voice_file`, `send_image`, `send_video`
|
|
25
|
+
- Voice triggered by: user request, high desire, is_high_desire state
|
|
26
|
+
- Images/videos selected based on arousal level and context
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Text output"""
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Output: Text Sender
|
|
3
|
+
Send text messages
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
class TextSender:
|
|
7
|
+
"""Text message sender"""
|
|
8
|
+
|
|
9
|
+
def __init__(self, nervous, config):
|
|
10
|
+
self.nervous = nervous
|
|
11
|
+
self.config = config
|
|
12
|
+
|
|
13
|
+
# Listen for send events
|
|
14
|
+
nervous.on("send_text", self._send)
|
|
15
|
+
|
|
16
|
+
async def _send(self, data: dict):
|
|
17
|
+
"""Handle text send (actual sending done by TelegramListener)"""
|
|
18
|
+
text = data.get("text", "")
|
|
19
|
+
mood = data.get("mood", "neutral")
|
|
20
|
+
|
|
21
|
+
# Log the message
|
|
22
|
+
print(f"[Outgoing {mood}] {text[:100]}{'...' if len(text) > 100 else ''}")
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Output: Voice TTS Factory
|
|
3
|
+
Creates the appropriate TTS provider based on settings
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
async def create_tts(provider: str = "vibe", **kwargs) -> Optional[object]:
|
|
10
|
+
"""
|
|
11
|
+
Create a TTS instance based on provider setting.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
provider: "vibe", "google", or "gtts"
|
|
15
|
+
**kwargs: Provider-specific arguments:
|
|
16
|
+
- vibe: url (required)
|
|
17
|
+
- google: api_key (optional)
|
|
18
|
+
- gtts: no arguments needed
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
TTS instance or None if unavailable
|
|
22
|
+
"""
|
|
23
|
+
provider = provider.lower()
|
|
24
|
+
|
|
25
|
+
if provider == "vibe" or provider == "vibevoice":
|
|
26
|
+
from .vibe_tts import VibeTTS
|
|
27
|
+
url = kwargs.get("url", "http://localhost:8080")
|
|
28
|
+
tts = VibeTTS(url)
|
|
29
|
+
if await tts.is_available():
|
|
30
|
+
print(f"[TTS] Connected to VibeVoice at {url}")
|
|
31
|
+
return tts
|
|
32
|
+
else:
|
|
33
|
+
print(f"[TTS] VibeVoice not available at {url}")
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
elif provider == "google" or provider == "google-tts":
|
|
37
|
+
from .google_tts import GoogleTTS
|
|
38
|
+
api_key = kwargs.get("api_key")
|
|
39
|
+
tts = GoogleTTS(api_key)
|
|
40
|
+
if await tts.is_available():
|
|
41
|
+
print(f"[TTS] Connected to Google Cloud TTS")
|
|
42
|
+
return tts
|
|
43
|
+
else:
|
|
44
|
+
print(f"[TTS] Google Cloud TTS not available")
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
elif provider == "gtts" or provider == "gtranslate":
|
|
48
|
+
from .gtts_tts import GTTS
|
|
49
|
+
tts = GTTS()
|
|
50
|
+
if await tts.is_available():
|
|
51
|
+
print(f"[TTS] Connected to gTTS (Google Translate - FREE)")
|
|
52
|
+
return tts
|
|
53
|
+
else:
|
|
54
|
+
print(f"[TTS] gTTS not available. Install with: pip install gtts")
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
else:
|
|
58
|
+
print(f"[TTS] Unknown provider: {provider}")
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def get_available_providers() -> list:
|
|
63
|
+
"""Get list of available TTS providers"""
|
|
64
|
+
return ["vibe", "google", "gtts"]
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Output: Google Cloud TTS
|
|
3
|
+
Text-to-speech using Google Cloud Text-to-Speech API
|
|
4
|
+
|
|
5
|
+
Uses the free tier (up to 4M chars/month) with Emma voice
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import aiohttp
|
|
9
|
+
import asyncio
|
|
10
|
+
import re
|
|
11
|
+
import tempfile
|
|
12
|
+
import subprocess
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
VOICE_OUTPUT_PATH = "/tmp/alive_ai_voice.ogg"
|
|
17
|
+
MAX_CHARS = 5000 # Google TTS limit per request
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class GoogleTTS:
|
|
21
|
+
"""Text-to-speech via Google Cloud TTS API"""
|
|
22
|
+
|
|
23
|
+
# Available voices - Emma is the natural sounding one
|
|
24
|
+
AVAILABLE_VOICES = {
|
|
25
|
+
"emma": "en-US-Neural2-F", # Emma - natural female US
|
|
26
|
+
"emma-uk": "en-GB-Neural2-F", # UK female
|
|
27
|
+
"guy": "en-US-Neural2-D", # Male US
|
|
28
|
+
"guy-uk": "en-GB-Neural2-D", # Male UK
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
DEFAULT_VOICE = "emma"
|
|
32
|
+
|
|
33
|
+
# Speaking rates by mood (0.25 to 4.0, 1.0 is normal)
|
|
34
|
+
MOOD_RATES = {
|
|
35
|
+
"chill": 0.9, "neutral": 1.0, "happy": 1.05,
|
|
36
|
+
"flirty": 1.0, "excited": 1.1, "high_desire": 0.95, "intense": 1.0,
|
|
37
|
+
"sad": 0.9, "tired": 0.85
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
def __init__(self, api_key: str = None):
|
|
41
|
+
"""
|
|
42
|
+
Initialize Google TTS.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
api_key: Google Cloud API key (optional if using ADC)
|
|
46
|
+
"""
|
|
47
|
+
self.api_key = api_key
|
|
48
|
+
self.base_url = "https://texttospeech.googleapis.com/v1"
|
|
49
|
+
|
|
50
|
+
def prepare_text(self, text: str) -> str:
|
|
51
|
+
"""Clean text for TTS - removes formatting and EMOJIS"""
|
|
52
|
+
# Remove markdown formatting
|
|
53
|
+
text = text.replace("**", "").replace("__", "").replace("*", "")
|
|
54
|
+
text = text.replace("_", "").replace("~", "")
|
|
55
|
+
text = re.sub(r'\*[^*]+\*', '', text)
|
|
56
|
+
text = re.sub(r'\.{3,}', '...', text)
|
|
57
|
+
text = re.sub(r'!{2,}', '!', text)
|
|
58
|
+
text = re.sub(r'\?{2,}', '?', text)
|
|
59
|
+
|
|
60
|
+
# Remove ALL emojis - they break TTS
|
|
61
|
+
text = re.sub(r'[\U00010000-\U0010ffff]', '', text)
|
|
62
|
+
text = re.sub(r'[\U0001F600-\U0001F64F]', '', text) # emoticons
|
|
63
|
+
text = re.sub(r'[\U0001F300-\U0001F5FF]', '', text) # symbols & pictographs
|
|
64
|
+
text = re.sub(r'[\U0001F680-\U0001F6FF]', '', text) # transport & map
|
|
65
|
+
text = re.sub(r'[\U0001F700-\U0001F77F]', '', text) # alchemical
|
|
66
|
+
text = re.sub(r'[\U0001F780-\U0001F7FF]', '', text) # Geometric Shapes
|
|
67
|
+
text = re.sub(r'[\U0001F800-\U0001F8FF]', '', text) # Supplemental Arrows-C
|
|
68
|
+
text = re.sub(r'[\U0001F900-\U0001F9FF]', '', text) # Supplemental Symbols
|
|
69
|
+
text = re.sub(r'[\U0001FA00-\U0001FA6F]', '', text) # Chess Symbols
|
|
70
|
+
text = re.sub(r'[\U0001FA70-\U0001FAFF]', '', text) # Symbols Extended-A
|
|
71
|
+
text = re.sub(r'[\U00002702-\U000027B0]', '', text) # Dingbats
|
|
72
|
+
text = re.sub(r'[\U000024C2-\U0001F251]', '', text) # Enclosed characters
|
|
73
|
+
text = re.sub(r'[\U0001F1E0-\U0001F1FF]', '', text) # Flags
|
|
74
|
+
|
|
75
|
+
# Clean up extra spaces
|
|
76
|
+
text = re.sub(r'\s+', ' ', text)
|
|
77
|
+
return text.strip()
|
|
78
|
+
|
|
79
|
+
def split_text(self, text: str) -> list:
|
|
80
|
+
"""Split long text at paragraph/sentence boundaries"""
|
|
81
|
+
if len(text) <= MAX_CHARS:
|
|
82
|
+
return [text]
|
|
83
|
+
|
|
84
|
+
parts = []
|
|
85
|
+
paragraphs = text.split('\n\n')
|
|
86
|
+
current = ""
|
|
87
|
+
|
|
88
|
+
for para in paragraphs:
|
|
89
|
+
if len(current) + len(para) + 2 <= MAX_CHARS:
|
|
90
|
+
current = current + "\n\n" + para if current else para
|
|
91
|
+
else:
|
|
92
|
+
if current:
|
|
93
|
+
parts.append(current)
|
|
94
|
+
# If single paragraph is too long, split by sentences
|
|
95
|
+
if len(para) > MAX_CHARS:
|
|
96
|
+
sentences = para.replace('. ', '.\n').split('\n')
|
|
97
|
+
chunk = ""
|
|
98
|
+
for s in sentences:
|
|
99
|
+
if len(chunk) + len(s) + 1 <= MAX_CHARS:
|
|
100
|
+
chunk = chunk + " " + s if chunk else s
|
|
101
|
+
else:
|
|
102
|
+
if chunk:
|
|
103
|
+
parts.append(chunk)
|
|
104
|
+
chunk = s
|
|
105
|
+
if chunk:
|
|
106
|
+
parts.append(chunk)
|
|
107
|
+
else:
|
|
108
|
+
current = para
|
|
109
|
+
|
|
110
|
+
if current:
|
|
111
|
+
parts.append(current)
|
|
112
|
+
|
|
113
|
+
return parts
|
|
114
|
+
|
|
115
|
+
def get_voice_id(self, voice: str) -> str:
|
|
116
|
+
"""Get Google voice ID from friendly name"""
|
|
117
|
+
return self.AVAILABLE_VOICES.get(voice, self.AVAILABLE_VOICES[self.DEFAULT_VOICE])
|
|
118
|
+
|
|
119
|
+
def get_rate_for_mood(self, mood: str) -> float:
|
|
120
|
+
return self.MOOD_RATES.get(mood, 1.0)
|
|
121
|
+
|
|
122
|
+
async def generate(self, text: str, voice: str = None,
|
|
123
|
+
cfg: float = None, mood: str = "neutral") -> str:
|
|
124
|
+
"""Generate audio using Google Cloud TTS"""
|
|
125
|
+
if voice is None:
|
|
126
|
+
voice = self.DEFAULT_VOICE
|
|
127
|
+
|
|
128
|
+
voice_id = self.get_voice_id(voice)
|
|
129
|
+
speaking_rate = self.get_rate_for_mood(mood)
|
|
130
|
+
|
|
131
|
+
text = self.prepare_text(text)
|
|
132
|
+
print(f"[GoogleTTS] Generating voice for {len(text)} chars with voice {voice_id}...")
|
|
133
|
+
|
|
134
|
+
# Split if needed
|
|
135
|
+
parts = self.split_text(text)
|
|
136
|
+
if len(parts) > 1:
|
|
137
|
+
print(f"[GoogleTTS] Split into {len(parts)} parts")
|
|
138
|
+
|
|
139
|
+
audio_parts = []
|
|
140
|
+
for i, part in enumerate(parts):
|
|
141
|
+
print(f"[GoogleTTS] Processing part {i+1}/{len(parts)} ({len(part)} chars)")
|
|
142
|
+
audio = await self._generate_single(part, voice_id, speaking_rate)
|
|
143
|
+
if audio:
|
|
144
|
+
audio_parts.append(audio)
|
|
145
|
+
else:
|
|
146
|
+
print(f"[GoogleTTS] Part {i+1} failed")
|
|
147
|
+
|
|
148
|
+
if not audio_parts:
|
|
149
|
+
return ""
|
|
150
|
+
|
|
151
|
+
# Combine all parts
|
|
152
|
+
if len(audio_parts) == 1:
|
|
153
|
+
Path(VOICE_OUTPUT_PATH).write_bytes(audio_parts[0])
|
|
154
|
+
else:
|
|
155
|
+
# Use ffmpeg to properly concatenate OGG files
|
|
156
|
+
temp_files = []
|
|
157
|
+
try:
|
|
158
|
+
for i, part in enumerate(audio_parts):
|
|
159
|
+
tf = tempfile.NamedTemporaryFile(suffix=".ogg", delete=False)
|
|
160
|
+
tf.write(part)
|
|
161
|
+
tf.close()
|
|
162
|
+
temp_files.append(tf.name)
|
|
163
|
+
list_file = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False)
|
|
164
|
+
for tf_name in temp_files:
|
|
165
|
+
list_file.write(f"file '{tf_name}'\n")
|
|
166
|
+
list_file.close()
|
|
167
|
+
subprocess.run(
|
|
168
|
+
["ffmpeg", "-y", "-f", "concat", "-safe", "0",
|
|
169
|
+
"-i", list_file.name, "-c", "copy", VOICE_OUTPUT_PATH],
|
|
170
|
+
capture_output=True, timeout=30
|
|
171
|
+
)
|
|
172
|
+
Path(list_file.name).unlink(missing_ok=True)
|
|
173
|
+
except Exception as e:
|
|
174
|
+
print(f"[GoogleTTS] ffmpeg concat failed, using first part: {e}")
|
|
175
|
+
Path(VOICE_OUTPUT_PATH).write_bytes(audio_parts[0])
|
|
176
|
+
finally:
|
|
177
|
+
for tf_name in temp_files:
|
|
178
|
+
Path(tf_name).unlink(missing_ok=True)
|
|
179
|
+
print(f"[GoogleTTS] Generated audio file")
|
|
180
|
+
return VOICE_OUTPUT_PATH
|
|
181
|
+
|
|
182
|
+
async def _generate_single(self, text: str, voice_id: str, speaking_rate: float) -> bytes:
|
|
183
|
+
"""Generate single audio part via Google Cloud TTS API"""
|
|
184
|
+
try:
|
|
185
|
+
# Build request URL
|
|
186
|
+
url = f"{self.base_url}/text:synthesize"
|
|
187
|
+
if self.api_key:
|
|
188
|
+
url += f"?key={self.api_key}"
|
|
189
|
+
|
|
190
|
+
# Request body
|
|
191
|
+
payload = {
|
|
192
|
+
"input": {"text": text},
|
|
193
|
+
"voice": {
|
|
194
|
+
"languageCode": "en-US",
|
|
195
|
+
"name": voice_id
|
|
196
|
+
},
|
|
197
|
+
"audioConfig": {
|
|
198
|
+
"audioEncoding": "OGG_OPUS",
|
|
199
|
+
"speakingRate": speaking_rate,
|
|
200
|
+
"pitch": 0.0
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
# Adjust language code based on voice
|
|
205
|
+
if "en-GB" in voice_id:
|
|
206
|
+
payload["voice"]["languageCode"] = "en-GB"
|
|
207
|
+
|
|
208
|
+
async with aiohttp.ClientSession() as session:
|
|
209
|
+
async with session.post(
|
|
210
|
+
url,
|
|
211
|
+
json=payload,
|
|
212
|
+
timeout=aiohttp.ClientTimeout(total=60)
|
|
213
|
+
) as resp:
|
|
214
|
+
if resp.status == 200:
|
|
215
|
+
result = await resp.json()
|
|
216
|
+
# Response contains base64-encoded audio
|
|
217
|
+
import base64
|
|
218
|
+
audio_content = result.get("audioContent", "")
|
|
219
|
+
if audio_content:
|
|
220
|
+
return base64.b64decode(audio_content)
|
|
221
|
+
return b""
|
|
222
|
+
else:
|
|
223
|
+
error = await resp.text()
|
|
224
|
+
print(f"[GoogleTTS] Error {resp.status}: {error[:200]}")
|
|
225
|
+
return b""
|
|
226
|
+
except asyncio.TimeoutError:
|
|
227
|
+
print("[GoogleTTS] Timeout")
|
|
228
|
+
return b""
|
|
229
|
+
except Exception as e:
|
|
230
|
+
print(f"[GoogleTTS] Error: {e}")
|
|
231
|
+
return b""
|
|
232
|
+
|
|
233
|
+
async def is_available(self) -> bool:
|
|
234
|
+
"""Check if Google TTS is available"""
|
|
235
|
+
# If we have an API key or ADC is configured, it should work
|
|
236
|
+
try:
|
|
237
|
+
# Try a minimal synthesis to check
|
|
238
|
+
url = f"{self.base_url}/text:synthesize"
|
|
239
|
+
if self.api_key:
|
|
240
|
+
url += f"?key={self.api_key}"
|
|
241
|
+
|
|
242
|
+
payload = {
|
|
243
|
+
"input": {"text": "test"},
|
|
244
|
+
"voice": {"languageCode": "en-US", "name": "en-US-Neural2-F"},
|
|
245
|
+
"audioConfig": {"audioEncoding": "OGG_OPUS"}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
async with aiohttp.ClientSession() as session:
|
|
249
|
+
async with session.post(url, json=payload, timeout=10) as resp:
|
|
250
|
+
return resp.status == 200
|
|
251
|
+
except Exception:
|
|
252
|
+
return False
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Output: gTTS (Google Translate TTS)
|
|
3
|
+
Free text-to-speech using Google Translate's TTS API
|
|
4
|
+
No API key required!
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import re
|
|
9
|
+
import tempfile
|
|
10
|
+
import subprocess
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
14
|
+
|
|
15
|
+
VOICE_OUTPUT_PATH = "/tmp/alive_ai_voice.ogg"
|
|
16
|
+
MAX_CHARS = 5000 # gTTS limit
|
|
17
|
+
|
|
18
|
+
# Thread pool for blocking gTTS calls
|
|
19
|
+
_executor = ThreadPoolExecutor(max_workers=2)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class GTTS:
|
|
23
|
+
"""Text-to-speech via gTTS (Google Translate TTS) - Completely free!"""
|
|
24
|
+
|
|
25
|
+
# Available voices are language codes
|
|
26
|
+
# gTTS doesn't have named voices like "Emma", just languages/accents
|
|
27
|
+
AVAILABLE_VOICES = {
|
|
28
|
+
"en": "en", # English (default)
|
|
29
|
+
"en-us": "en", # US English
|
|
30
|
+
"en-uk": "co.uk", # UK English
|
|
31
|
+
"en-au": "com.au", # Australian English
|
|
32
|
+
"en-in": "co.in", # Indian English
|
|
33
|
+
"it": "it", # Italian
|
|
34
|
+
"es": "es", # Spanish
|
|
35
|
+
"fr": "fr", # French
|
|
36
|
+
"de": "de", # German
|
|
37
|
+
"pt": "pt", # Portuguese
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
DEFAULT_VOICE = "en"
|
|
41
|
+
DEFAULT_LANG = "en"
|
|
42
|
+
|
|
43
|
+
def __init__(self):
|
|
44
|
+
"""Initialize gTTS - no API key needed!"""
|
|
45
|
+
self._available = None
|
|
46
|
+
|
|
47
|
+
def prepare_text(self, text: str) -> str:
|
|
48
|
+
"""Clean text for TTS"""
|
|
49
|
+
# Remove markdown formatting
|
|
50
|
+
text = text.replace("**", "").replace("__", "").replace("*", "")
|
|
51
|
+
text = text.replace("_", "").replace("~", "")
|
|
52
|
+
text = re.sub(r'\*[^*]+\*', '', text)
|
|
53
|
+
text = re.sub(r'\.{3,}', '...', text)
|
|
54
|
+
text = re.sub(r'!{2,}', '!', text)
|
|
55
|
+
text = re.sub(r'\?{2,}', '?', text)
|
|
56
|
+
|
|
57
|
+
# Remove ALL emojis
|
|
58
|
+
text = re.sub(r'[\U00010000-\U0010ffff]', '', text)
|
|
59
|
+
text = re.sub(r'[\U0001F600-\U0001F64F]', '', text)
|
|
60
|
+
text = re.sub(r'[\U0001F300-\U0001F5FF]', '', text)
|
|
61
|
+
text = re.sub(r'[\U0001F680-\U0001F6FF]', '', text)
|
|
62
|
+
text = re.sub(r'[\U00002702-\U000027B0]', '', text)
|
|
63
|
+
text = re.sub(r'[\U000024C2-\U0001F251]', '', text)
|
|
64
|
+
text = re.sub(r'[\U0001F1E0-\U0001F1FF]', '', text)
|
|
65
|
+
|
|
66
|
+
# Clean up extra spaces
|
|
67
|
+
text = re.sub(r'\s+', ' ', text)
|
|
68
|
+
return text.strip()
|
|
69
|
+
|
|
70
|
+
def split_text(self, text: str) -> list:
|
|
71
|
+
"""Split long text at paragraph boundaries"""
|
|
72
|
+
if len(text) <= MAX_CHARS:
|
|
73
|
+
return [text]
|
|
74
|
+
|
|
75
|
+
parts = []
|
|
76
|
+
paragraphs = text.split('\n\n')
|
|
77
|
+
current = ""
|
|
78
|
+
|
|
79
|
+
for para in paragraphs:
|
|
80
|
+
if len(current) + len(para) + 2 <= MAX_CHARS:
|
|
81
|
+
current = current + "\n\n" + para if current else para
|
|
82
|
+
else:
|
|
83
|
+
if current:
|
|
84
|
+
parts.append(current)
|
|
85
|
+
if len(para) > MAX_CHARS:
|
|
86
|
+
sentences = para.replace('. ', '.\n').split('\n')
|
|
87
|
+
chunk = ""
|
|
88
|
+
for s in sentences:
|
|
89
|
+
if len(chunk) + len(s) + 1 <= MAX_CHARS:
|
|
90
|
+
chunk = chunk + " " + s if chunk else s
|
|
91
|
+
else:
|
|
92
|
+
if chunk:
|
|
93
|
+
parts.append(chunk)
|
|
94
|
+
chunk = s
|
|
95
|
+
if chunk:
|
|
96
|
+
parts.append(chunk)
|
|
97
|
+
else:
|
|
98
|
+
current = para
|
|
99
|
+
|
|
100
|
+
if current:
|
|
101
|
+
parts.append(current)
|
|
102
|
+
|
|
103
|
+
return parts
|
|
104
|
+
|
|
105
|
+
def _generate_blocking(self, text: str, lang: str) -> bytes:
|
|
106
|
+
"""Generate audio in a blocking manner (runs in thread pool)"""
|
|
107
|
+
try:
|
|
108
|
+
from gtts import gTTS
|
|
109
|
+
import io
|
|
110
|
+
|
|
111
|
+
tts = gTTS(text=text, lang=lang, slow=False)
|
|
112
|
+
mp3_buffer = io.BytesIO()
|
|
113
|
+
tts.write_to_fp(mp3_buffer)
|
|
114
|
+
mp3_buffer.seek(0)
|
|
115
|
+
|
|
116
|
+
# Convert MP3 to OGG for Telegram
|
|
117
|
+
# If pydub is available, convert to OGG
|
|
118
|
+
try:
|
|
119
|
+
from pydub import AudioSegment
|
|
120
|
+
audio = AudioSegment.from_mp3(mp3_buffer)
|
|
121
|
+
ogg_buffer = io.BytesIO()
|
|
122
|
+
audio.export(ogg_buffer, format="ogg")
|
|
123
|
+
ogg_buffer.seek(0)
|
|
124
|
+
return ogg_buffer.read()
|
|
125
|
+
except ImportError:
|
|
126
|
+
# No pydub - return MP3, Telegram accepts it too
|
|
127
|
+
mp3_buffer.seek(0)
|
|
128
|
+
return mp3_buffer.read()
|
|
129
|
+
|
|
130
|
+
except ImportError:
|
|
131
|
+
print("[GTTS] gtts not installed. Run: pip install gtts")
|
|
132
|
+
return b""
|
|
133
|
+
except Exception as e:
|
|
134
|
+
print(f"[GTTS] Error: {e}")
|
|
135
|
+
return b""
|
|
136
|
+
|
|
137
|
+
async def generate(self, text: str, voice: str = None,
|
|
138
|
+
cfg: float = None, mood: str = "neutral") -> str:
|
|
139
|
+
"""Generate audio using gTTS"""
|
|
140
|
+
if voice is None:
|
|
141
|
+
voice = self.DEFAULT_VOICE
|
|
142
|
+
|
|
143
|
+
# Map voice to language code
|
|
144
|
+
lang = self.AVAILABLE_VOICES.get(voice, self.DEFAULT_LANG)
|
|
145
|
+
|
|
146
|
+
text = self.prepare_text(text)
|
|
147
|
+
print(f"[GTTS] Generating voice for {len(text)} chars with lang={lang}...")
|
|
148
|
+
|
|
149
|
+
# Split if needed
|
|
150
|
+
parts = self.split_text(text)
|
|
151
|
+
if len(parts) > 1:
|
|
152
|
+
print(f"[GTTS] Split into {len(parts)} parts")
|
|
153
|
+
|
|
154
|
+
audio_parts = []
|
|
155
|
+
loop = asyncio.get_running_loop()
|
|
156
|
+
|
|
157
|
+
for i, part in enumerate(parts):
|
|
158
|
+
print(f"[GTTS] Processing part {i+1}/{len(parts)} ({len(part)} chars)")
|
|
159
|
+
audio = await loop.run_in_executor(_executor, self._generate_blocking, part, lang)
|
|
160
|
+
if audio:
|
|
161
|
+
audio_parts.append(audio)
|
|
162
|
+
else:
|
|
163
|
+
print(f"[GTTS] Part {i+1} failed")
|
|
164
|
+
|
|
165
|
+
if not audio_parts:
|
|
166
|
+
return ""
|
|
167
|
+
|
|
168
|
+
# Determine extension based on format
|
|
169
|
+
ext = ".ogg" if audio_parts[0][:4] == b'OggS' else ".mp3"
|
|
170
|
+
output_path = VOICE_OUTPUT_PATH.replace(".ogg", ext)
|
|
171
|
+
|
|
172
|
+
# Combine all parts
|
|
173
|
+
if len(audio_parts) == 1:
|
|
174
|
+
Path(output_path).write_bytes(audio_parts[0])
|
|
175
|
+
else:
|
|
176
|
+
# Use ffmpeg to properly concatenate audio files
|
|
177
|
+
temp_files = []
|
|
178
|
+
try:
|
|
179
|
+
for i, part in enumerate(audio_parts):
|
|
180
|
+
tf = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
|
|
181
|
+
tf.write(part)
|
|
182
|
+
tf.close()
|
|
183
|
+
temp_files.append(tf.name)
|
|
184
|
+
list_file = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False)
|
|
185
|
+
for tf_name in temp_files:
|
|
186
|
+
list_file.write(f"file '{tf_name}'\n")
|
|
187
|
+
list_file.close()
|
|
188
|
+
subprocess.run(
|
|
189
|
+
["ffmpeg", "-y", "-f", "concat", "-safe", "0",
|
|
190
|
+
"-i", list_file.name, "-c", "copy", output_path],
|
|
191
|
+
capture_output=True, timeout=30
|
|
192
|
+
)
|
|
193
|
+
Path(list_file.name).unlink(missing_ok=True)
|
|
194
|
+
except Exception as e:
|
|
195
|
+
print(f"[GTTS] ffmpeg concat failed, using first part: {e}")
|
|
196
|
+
Path(output_path).write_bytes(audio_parts[0])
|
|
197
|
+
finally:
|
|
198
|
+
for tf_name in temp_files:
|
|
199
|
+
Path(tf_name).unlink(missing_ok=True)
|
|
200
|
+
print(f"[GTTS] Generated audio file")
|
|
201
|
+
return output_path
|
|
202
|
+
|
|
203
|
+
async def is_available(self) -> bool:
|
|
204
|
+
"""Check if gTTS is available"""
|
|
205
|
+
if self._available is not None:
|
|
206
|
+
return self._available
|
|
207
|
+
|
|
208
|
+
try:
|
|
209
|
+
from gtts import gTTS
|
|
210
|
+
self._available = True
|
|
211
|
+
return True
|
|
212
|
+
except ImportError:
|
|
213
|
+
self._available = False
|
|
214
|
+
return False
|