realtimex-deeptutor 0.5.0.post3__py3-none-any.whl → 0.5.0.post6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: realtimex-deeptutor
3
- Version: 0.5.0.post3
3
+ Version: 0.5.0.post6
4
4
  Summary: RealTimeX DeepTutor - Intelligent learning companion with multi-agent collaboration and LightRAG
5
5
  License: Apache-2.0
6
6
  Requires-Python: >=3.10
@@ -1,5 +1,5 @@
1
1
  realtimex_deeptutor/__init__.py,sha256=sSfuCLjJa6BnayszcU4azNl_sr1OzuKgLP10BAtdoh8,1567
2
- realtimex_deeptutor-0.5.0.post3.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
2
+ realtimex_deeptutor-0.5.0.post6.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
3
3
  scripts/__init__.py,sha256=mxMsCbci-Qon3qWU1JIi93-tYlHAy0NIUbDRmAPVcg0,54
4
4
  scripts/audit_prompts.py,sha256=Ltuk7tvsjpKhiobVbYq1volgVFKiVLgSTaE_Is4MGaM,5651
5
5
  scripts/check_install.py,sha256=GbApEcDLJ6r0QmYrCVHAFCOK4wolpSLwL3eBRmmD3og,13929
@@ -7,7 +7,7 @@ scripts/generate_roster.py,sha256=COsJ12bvZ5W9TI-wAvKpknKBgHr9uQTvJ_JCz2gVMVo,12
7
7
  scripts/install_all.py,sha256=u-A3eLhk1ua_KCjz8WZMkrVNJN6QdYs7NhGOcsm-Mks,23875
8
8
  scripts/migrate_kb.py,sha256=uyJgplkJag35rT2RrwSiT37__gpB4TiA0xh5uVcWIa4,19667
9
9
  scripts/start.py,sha256=EYbyjryor0DN_WcxQMSkKWCboM9UjMkv61fWhLyv63I,30300
10
- scripts/start_web.py,sha256=vzn7TiW7g2RNpAKYPQjHVNtOt6G9MxokEnZE_YWfHu4,22980
10
+ scripts/start_web.py,sha256=aZ5nqH-h2F6I_tAsY-_uy56jIS5ZJt8Fsjw0OHjEYGc,29755
11
11
  scripts/sync_prompts_from_en.py,sha256=TkBSFilYSwnwo0a3cgRnJ84i02zByAIW12N3ePzBwE8,4677
12
12
  src/__init__.py,sha256=UNw3C20mbskiQF3rK3HhjglrG8snhfuiVthc5UsoHX0,1046
13
13
  src/agents/__init__.py,sha256=IPhP4RZnCH2kcUDBkdKHO_ciVdyWnuHUCG2flG5Ydcw,885
@@ -19,7 +19,7 @@ src/agents/chat/prompts/en/chat_agent.yaml,sha256=K19crF4jLrtGeZYZ7_LSzlpOFljCJ_
19
19
  src/agents/chat/prompts/zh/chat_agent.yaml,sha256=dUudCfMKj_75pGdh1rjVUmWlHf8hzUhHia7qfEwmntU,929
20
20
  src/agents/co_writer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  src/agents/co_writer/edit_agent.py,sha256=5NCTQtReO7S9A3jD8TefeUNv6LFSCojvo7dUyPJAc9s,9147
22
- src/agents/co_writer/narrator_agent.py,sha256=RyulrOgvVz1ZXS4njhzPed-li5wKAoJaui1MLe5ZnJ0,15397
22
+ src/agents/co_writer/narrator_agent.py,sha256=h9hjIsWvWJBXsQxPkuwBlHgAMBKFvwXyUprlvh0SXbc,16425
23
23
  src/agents/co_writer/prompts/en/edit_agent.yaml,sha256=wGGk_IBq5DlIoDAl7sSHn-xFt6jx07NWv1NbXFBQE2g,4085
24
24
  src/agents/co_writer/prompts/en/narrator_agent.yaml,sha256=zUhgsE64uTQPxOBHE34YvDxMq1a7wav1NVMP1XDYMkg,4259
25
25
  src/agents/co_writer/prompts/zh/edit_agent.yaml,sha256=Sg6RnG660LvglIaW5bqAH8yqc6EQl2xmE_wHGrixX38,3617
@@ -139,7 +139,7 @@ src/api/routers/ideagen.py,sha256=UFgQ0LjKjGwKc-wVjdNi-R-TC7RhU8kZrWxLKBuiCls,16
139
139
  src/api/routers/knowledge.py,sha256=gW6Ol6cRMqZ-reRQ30JfJHf51Wg4gNg5vvf0Zpt5MQ8,32056
140
140
  src/api/routers/notebook.py,sha256=NwLf2MPaT_rsF6cDlsZG85o3z7JMHXatl-FuWIKYaJw,6098
141
141
  src/api/routers/question.py,sha256=aAAY5nz3PRgsww75Ph1ZWY_9_Hey2uudTxdsQlMZzPA,20876
142
- src/api/routers/realtimex.py,sha256=nMmSWYyIDRoRdeQTFpTrGnpaf87jSiwvJ4zRkyEaW00,3998
142
+ src/api/routers/realtimex.py,sha256=AX4dna8tbScf0W_AWtzxqlXm0WRgPMMqmVTh7fYOZp8,4464
143
143
  src/api/routers/research.py,sha256=ZZJRNi1xhmoi8Shg8gBln_1uL1_XEW9Hxo0IM5RsOKo,16520
144
144
  src/api/routers/settings.py,sha256=7Q2edffDgD0PeiBJ0emd3kFc3i3qn1rFrTbbIm9LsXI,4549
145
145
  src/api/routers/solve.py,sha256=lLyPd_R1o-Yxe9I685nGj_SfyCeGBS74023tbDNpZGI,15939
@@ -150,7 +150,7 @@ src/api/utils/notebook_manager.py,sha256=4zTn_J10BmWlCaCJo3bcNWiWUGgLw4hGR92sfj3
150
150
  src/api/utils/progress_broadcaster.py,sha256=u1cfxZ2Rek9tSP9sP1hdQMQkh6Dr_6fhzJjBUvFf-3I,2772
151
151
  src/api/utils/task_id_manager.py,sha256=E59dJ2rg-_qY7uACNT3Nmzey0fnpZgfMYerW-8QsaNM,3687
152
152
  src/cli/__init__.py,sha256=MQ18rJWUlLlk_keWk7lrCjOYW5c_kDfJr5roKDu5ZT8,269
153
- src/cli/start.py,sha256=KNQ4ulAB4QCcS39Z5TcYWw4UtTzfEFCJg_yZOtg-Brw,6664
153
+ src/cli/start.py,sha256=g2kYPuynTDCDL66wJ8IUchRLd-ZV8q5-SjAuuZWyYto,6744
154
154
  src/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
155
  src/config/accessors.py,sha256=A06OaUK78MdgLu__6MRcKcVoog1ctDj5fHP4APOyRiI,534
156
156
  src/config/constants.py,sha256=C3370U6qkFExKkoNyDDLcf05FIJU0_5MHvpT6mvDQ7A,734
@@ -184,7 +184,7 @@ src/services/__init__.py,sha256=91ZEKiPyPtwFgYRtmkKBrdYAZKVnREUw43itPM5n-WM,1851
184
184
  src/services/config/__init__.py,sha256=Vjt0TORUt358yZvNCgoIVnuWJWVPuYiNA6ge9DV-Dyo,1792
185
185
  src/services/config/knowledge_base_config.py,sha256=l6QlAivVCBf1Q23YaEkHIHvq3TmsowPJmjDASezBf8c,7266
186
186
  src/services/config/loader.py,sha256=1PXImuGe8RpJ8UQpyGcsRhkq_YL9kWJq2f1ylQE4N3Y,8068
187
- src/services/config/unified_config.py,sha256=3sPs06oU0j6BjPZObh5Zoc1eLDusa1qa8HPGU9rncdY,32338
187
+ src/services/config/unified_config.py,sha256=oIPUJ2lVhcREf2V8PdQBwvPoBHhpcuuaWoPvoc6gBt8,34238
188
188
  src/services/embedding/__init__.py,sha256=ljDq50as6CkKNHK2sSQtX_iXu9TyOMhzzFNs2f7QYKs,1338
189
189
  src/services/embedding/client.py,sha256=X5iUbGuXjtk_6Nkl7N4eRXjbL5hPY46JcTfi65UZWus,6003
190
190
  src/services/embedding/config.py,sha256=vBJjsTcYLsIpy0bmR7V8etu8eAXdRShjQIYIMcqwIgU,5368
@@ -269,7 +269,7 @@ src/services/settings/interface_settings.py,sha256=xyiJl_N2eYWmQVufQE63wEZJftgCx
269
269
  src/services/setup/__init__.py,sha256=9AyQ8c-3ViiKpylTzqw4_qle0Y8MBo0GBPzTNP4vT_4,711
270
270
  src/services/setup/init.py,sha256=fsLMcgZ_LQRI4EoPwfbrmnI9p2lU26lgQwR6QmZW01A,10779
271
271
  src/services/tts/__init__.py,sha256=sLKSM4WvthMajSCVKWSK5hi8FOf0xmx_zpE471n97FI,343
272
- src/services/tts/config.py,sha256=by4OWjPwA16AtL2d3805Y_w3Z_UrmF9I3zfN028twWY,3088
272
+ src/services/tts/config.py,sha256=nVDC_ukWLPE8ZvWOqYA9eHQhqpjjbH23zg-BkJ4sQf8,3166
273
273
  src/tools/__init__.py,sha256=Zn9-uR4OqkAUq5oXm8Mwd3I4O_Qpg5GUJkrwEN47QLA,2777
274
274
  src/tools/code_executor.py,sha256=Tb8R2Cp-4_q1XfxUYsBUW52UDUwdQxLIwBMukjkEMXk,19436
275
275
  src/tools/paper_search_tool.py,sha256=f-CynXbOYo9-DLIaTIlc-odGYo8j3hx4sHb0sFktQF8,4745
@@ -287,10 +287,10 @@ src/utils/document_validator.py,sha256=ogNk0KvMZPQKi9zHoeQzhUxCVsYFpkP9-pjO8kC71
287
287
  src/utils/error_rate_tracker.py,sha256=MIhaNaStdrhinVFIKHNOgQMPdbrddjKJy2ok_jK8WX4,3942
288
288
  src/utils/error_utils.py,sha256=ME_9q-DlmxFl-Xvv3ETPZE_iP705x6MXiuAREgWYsjM,2262
289
289
  src/utils/json_parser.py,sha256=M_KfrsrNvQPSiFvpKHQV79Aj85_MEcLVc6hnKzvTV58,3243
290
- src/utils/realtimex.py,sha256=WlGeZ7d-ymXIJ6ZYK2nhjqxArV_7rVHHJLCqaNkcOUw,8648
290
+ src/utils/realtimex.py,sha256=vs7fAEnJJ4zpAyyBn-7vUmGWiiQvpTWQCRgax1MLTDw,9769
291
291
  src/utils/network/circuit_breaker.py,sha256=BtjogK5R3tG8fuJniS5-PJKZMtwD5P2SkP2JFiQ9sRA,2722
292
- realtimex_deeptutor-0.5.0.post3.dist-info/METADATA,sha256=cPG-bhP_0bk97uBwQaN9OzVTH9YiVceBLDrNM4_yv8Q,58304
293
- realtimex_deeptutor-0.5.0.post3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
294
- realtimex_deeptutor-0.5.0.post3.dist-info/entry_points.txt,sha256=slNAzwRLUpqiMtDRZBQIkXbU2vGMHL_om6-o19gYdh8,134
295
- realtimex_deeptutor-0.5.0.post3.dist-info/top_level.txt,sha256=zUAd6V7jDYhdL7bvg2S38YCM-gVhvd36WqkjxrT-02I,32
296
- realtimex_deeptutor-0.5.0.post3.dist-info/RECORD,,
292
+ realtimex_deeptutor-0.5.0.post6.dist-info/METADATA,sha256=ZHgtwKQVopSxjI2xUxf27e-HiHVkrPtObk6b35e2zlk,58304
293
+ realtimex_deeptutor-0.5.0.post6.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
294
+ realtimex_deeptutor-0.5.0.post6.dist-info/entry_points.txt,sha256=slNAzwRLUpqiMtDRZBQIkXbU2vGMHL_om6-o19gYdh8,134
295
+ realtimex_deeptutor-0.5.0.post6.dist-info/top_level.txt,sha256=zUAd6V7jDYhdL7bvg2S38YCM-gVhvd36WqkjxrT-02I,32
296
+ realtimex_deeptutor-0.5.0.post6.dist-info/RECORD,,
scripts/start_web.py CHANGED
@@ -69,6 +69,194 @@ else:
69
69
  return False
70
70
 
71
71
 
72
+ def check_port_in_use(port: int) -> tuple[bool, int | None]:
73
+ """
74
+ Check if a port is in use and return the PID of the process using it.
75
+
76
+ Uses connect test to check if something is actually LISTENING on the port,
77
+ rather than bind test which fails for TIME_WAIT state.
78
+
79
+ Args:
80
+ port: Port number to check
81
+
82
+ Returns:
83
+ Tuple of (is_in_use, pid_or_none)
84
+ """
85
+ import socket
86
+
87
+ # Use connect test to check if something is actually listening
88
+ # This avoids false positives from TIME_WAIT state
89
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
90
+ sock.settimeout(1)
91
+ try:
92
+ result = sock.connect_ex(("localhost", port))
93
+ if result != 0:
94
+ # Connection refused = nothing listening = port is free
95
+ return False, None
96
+ except (OSError, socket.timeout):
97
+ # Connection failed = port is free
98
+ return False, None
99
+ finally:
100
+ try:
101
+ sock.close()
102
+ except Exception:
103
+ pass
104
+
105
+ # Port is in use (connection succeeded), try to find the PID
106
+ pid = None
107
+ try:
108
+ if os.name == "nt":
109
+ # Windows: use netstat
110
+ result = subprocess.run(
111
+ ["netstat", "-ano"],
112
+ capture_output=True,
113
+ text=True,
114
+ timeout=5,
115
+ )
116
+ for line in result.stdout.splitlines():
117
+ if f":{port}" in line and "LISTENING" in line:
118
+ parts = line.split()
119
+ if parts:
120
+ try:
121
+ pid = int(parts[-1])
122
+ break
123
+ except ValueError:
124
+ pass
125
+ else:
126
+ # Unix: use lsof
127
+ result = subprocess.run(
128
+ ["lsof", "-ti", f":{port}"],
129
+ capture_output=True,
130
+ text=True,
131
+ timeout=5,
132
+ )
133
+ if result.returncode == 0 and result.stdout.strip():
134
+ # May return multiple PIDs, take the first one
135
+ try:
136
+ pid = int(result.stdout.strip().split()[0])
137
+ except (ValueError, IndexError):
138
+ pass
139
+ except Exception:
140
+ pass
141
+
142
+ return True, pid
143
+
144
+
145
+ def kill_process_on_port(port: int, force: bool = False) -> bool:
146
+ """
147
+ Kill the process using a specific port.
148
+
149
+ Args:
150
+ port: Port number
151
+ force: If True, use SIGKILL instead of SIGTERM
152
+
153
+ Returns:
154
+ True if process was killed successfully
155
+ """
156
+ in_use, pid = check_port_in_use(port)
157
+ if not in_use:
158
+ return True # Port is free
159
+
160
+ if pid is None:
161
+ print_flush(f"⚠️ Port {port} is in use but couldn't identify the process")
162
+ return False
163
+
164
+ print_flush(f" Stopping process {pid} on port {port}...")
165
+
166
+ try:
167
+ if os.name == "nt":
168
+ subprocess.run(["taskkill", "/F", "/PID", str(pid)], check=True, capture_output=True)
169
+ else:
170
+ sig = signal.SIGKILL if force else signal.SIGTERM
171
+ os.kill(pid, sig)
172
+ # Wait a moment for process to terminate
173
+ time.sleep(0.5)
174
+ # Check if still running, force kill if needed
175
+ if not force:
176
+ try:
177
+ os.kill(pid, 0) # Check if process exists
178
+ os.kill(pid, signal.SIGKILL)
179
+ time.sleep(0.3)
180
+ except ProcessLookupError:
181
+ pass # Process already terminated
182
+
183
+ # Verify port is now free
184
+ time.sleep(0.3)
185
+ in_use, _ = check_port_in_use(port)
186
+ if not in_use:
187
+ print_flush(f"✅ Port {port} is now free")
188
+ return True
189
+ else:
190
+ print_flush(f"⚠️ Port {port} still in use after killing process")
191
+ return False
192
+
193
+ except Exception as e:
194
+ print_flush(f"❌ Failed to kill process {pid}: {e}")
195
+ return False
196
+
197
+
198
+ def ensure_ports_available(backend_port: int, frontend_port: int, auto_kill: bool = False) -> bool:
199
+ """
200
+ Ensure required ports are available, optionally killing existing processes.
201
+
202
+ Args:
203
+ backend_port: Backend port number
204
+ frontend_port: Frontend port number
205
+ auto_kill: If True, automatically kill processes using the ports
206
+
207
+ Returns:
208
+ True if all ports are available
209
+ """
210
+ ports_to_check = [
211
+ (backend_port, "Backend"),
212
+ (frontend_port, "Frontend"),
213
+ ]
214
+
215
+ conflicts = []
216
+ for port, name in ports_to_check:
217
+ in_use, pid = check_port_in_use(port)
218
+ if in_use:
219
+ conflicts.append((port, name, pid))
220
+
221
+ if not conflicts:
222
+ return True
223
+
224
+ print_flush("")
225
+ print_flush("⚠️ Port conflict detected:")
226
+ for port, name, pid in conflicts:
227
+ pid_info = f" (PID: {pid})" if pid else ""
228
+ print_flush(f" - {name} port {port} is already in use{pid_info}")
229
+
230
+ if auto_kill:
231
+ print_flush("")
232
+ print_flush("🔄 AUTO_KILL_PORTS is enabled, cleaning up...")
233
+ all_freed = True
234
+ for port, name, _ in conflicts:
235
+ if not kill_process_on_port(port):
236
+ all_freed = False
237
+ return all_freed
238
+ else:
239
+ print_flush("")
240
+ print_flush("💡 To resolve this, you can either:")
241
+ print_flush(" 1. Set AUTO_KILL_PORTS=true to automatically clean up")
242
+ print_flush(" 2. Manually kill the processes:")
243
+ for port, name, pid in conflicts:
244
+ if pid:
245
+ if os.name == "nt":
246
+ print_flush(f" taskkill /F /PID {pid}")
247
+ else:
248
+ print_flush(f" kill -9 {pid}")
249
+ else:
250
+ if os.name == "nt":
251
+ print_flush(f" netstat -ano | findstr :{port}")
252
+ else:
253
+ print_flush(f" lsof -ti :{port} | xargs kill -9")
254
+ print_flush(" 3. Use different ports via environment variables:")
255
+ print_flush(" BACKEND_PORT=8002 FRONTEND_PORT=3783 uvx realtimex-deeptutor")
256
+ print_flush("")
257
+ return False
258
+
259
+
72
260
  def terminate_process_tree(process, name="Process", timeout=5):
73
261
  """
74
262
  Terminate a process and all its children (process group).
@@ -541,6 +729,23 @@ if __name__ == "__main__":
541
729
  print_flush(f"⚠️ Warning: Failed to initialize user directories: {e}")
542
730
  print_flush(" Continuing anyway...")
543
731
 
732
+ # Check for port conflicts before starting services
733
+ try:
734
+ from pathlib import Path
735
+
736
+ from src.services.setup import get_ports
737
+
738
+ backend_port, frontend_port = get_ports(
739
+ Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
740
+ )
741
+
742
+ auto_kill = os.environ.get("AUTO_KILL_PORTS", "").lower() in ("true", "1", "yes")
743
+ if not ensure_ports_available(backend_port, frontend_port, auto_kill=auto_kill):
744
+ sys.exit(1)
745
+ except Exception as e:
746
+ print_flush(f"⚠️ Warning: Failed to check ports: {e}")
747
+ print_flush(" Continuing anyway...")
748
+
544
749
  backend = None
545
750
  frontend = None
546
751
 
@@ -315,30 +315,54 @@ class NarratorAgent(BaseAgent):
315
315
  self.logger.info(f"Starting TTS audio generation - ID: {audio_id}, Voice: {voice}")
316
316
 
317
317
  try:
318
- binding = os.getenv("TTS_BINDING", "openai")
319
- api_version = self.tts_config.get("api_version")
320
-
321
- # Only use Azure client if binding is explicitly Azure,
322
- # OR if binding is generic 'openai' but an Azure-specific api_version is present.
323
- if binding == "azure_openai" or (binding == "openai" and api_version):
324
- client = AsyncAzureOpenAI(
325
- api_key=self.tts_config["api_key"],
326
- azure_endpoint=self.tts_config["base_url"],
327
- api_version=api_version,
318
+ # Check if using RealTimeX SDK
319
+ if self.tts_config.get("source") == "realtimex":
320
+ from src.utils.realtimex import get_realtimex_sdk
321
+
322
+ sdk = get_realtimex_sdk()
323
+
324
+ # Use SDK to generate audio
325
+ audio_bytes = await sdk.tts.speak(
326
+ text=script,
327
+ voice=voice,
328
+ model=self.tts_config.get("model"),
329
+ provider=self.tts_config.get("provider")
330
+ if self.tts_config.get("provider") != "realtimexai"
331
+ else None,
332
+ speed=self.tts_config.get("speed"),
333
+ num_inference_steps=self.tts_config.get("quality"),
328
334
  )
335
+
336
+ # Save bytes to file
337
+ with open(audio_path, "wb") as f:
338
+ f.write(audio_bytes)
339
+
329
340
  else:
330
- # Create OpenAI client with custom base_url
331
- client = AsyncOpenAI(
332
- base_url=self.tts_config["base_url"], api_key=self.tts_config["api_key"]
341
+ # Standard OpenAI/Azure implementation
342
+ binding = os.getenv("TTS_BINDING", "openai")
343
+ api_version = self.tts_config.get("api_version")
344
+
345
+ # Only use Azure client if binding is explicitly Azure,
346
+ # OR if binding is generic 'openai' but an Azure-specific api_version is present.
347
+ if binding == "azure_openai" or (binding == "openai" and api_version):
348
+ client = AsyncAzureOpenAI(
349
+ api_key=self.tts_config["api_key"],
350
+ azure_endpoint=self.tts_config["base_url"],
351
+ api_version=api_version,
352
+ )
353
+ else:
354
+ # Create OpenAI client with custom base_url
355
+ client = AsyncOpenAI(
356
+ base_url=self.tts_config["base_url"], api_key=self.tts_config["api_key"]
357
+ )
358
+
359
+ # Call OpenAI TTS API
360
+ response = await client.audio.speech.create(
361
+ model=self.tts_config["model"], voice=voice, input=script
333
362
  )
334
363
 
335
- # Call OpenAI TTS API
336
- response = await client.audio.speech.create(
337
- model=self.tts_config["model"], voice=voice, input=script
338
- )
339
-
340
- # Save audio to file
341
- await response.stream_to_file(audio_path)
364
+ # Save audio to file
365
+ await response.stream_to_file(audio_path)
342
366
 
343
367
  self.logger.info(f"Audio saved to: {audio_path}")
344
368
 
@@ -83,12 +83,16 @@ async def get_providers():
83
83
 
84
84
 
85
85
  from pydantic import BaseModel
86
+ from typing import Optional
86
87
 
87
88
 
88
89
  class RTXConfigApplyRequest(BaseModel):
89
- config_type: str # "llm" or "embedding"
90
+ config_type: str # "llm", "embedding" or "tts"
90
91
  provider: str
91
92
  model: str
93
+ voice: Optional[str] = None
94
+ speed: Optional[float] = None
95
+ quality: Optional[int] = None
92
96
 
93
97
 
94
98
  @router.post("/realtimex/config/apply")
@@ -113,11 +117,20 @@ async def apply_rtx_config(request: RTXConfigApplyRequest):
113
117
  config_type_enum = ConfigType.LLM
114
118
  elif request.config_type == "embedding":
115
119
  config_type_enum = ConfigType.EMBEDDING
120
+ elif request.config_type == "tts":
121
+ config_type_enum = ConfigType.TTS
116
122
  else:
117
123
  raise HTTPException(400, f"Invalid config type: {request.config_type}")
118
124
 
119
125
  # Save RTX selection to rtx_active.json
120
- if not set_rtx_active_config(request.config_type, request.provider, request.model):
126
+ if not set_rtx_active_config(
127
+ request.config_type,
128
+ request.provider,
129
+ request.model,
130
+ request.voice,
131
+ request.speed,
132
+ request.quality,
133
+ ):
121
134
  raise HTTPException(500, "Failed to save RTX configuration")
122
135
 
123
136
  # Set 'rtx' as the active config in unified config manager
@@ -129,6 +142,9 @@ async def apply_rtx_config(request: RTXConfigApplyRequest):
129
142
  "config_type": request.config_type,
130
143
  "provider": request.provider,
131
144
  "model": request.model,
145
+ "voice": request.voice,
146
+ "speed": request.speed,
147
+ "quality": request.quality,
132
148
  }
133
149
 
134
150
  except HTTPException:
src/cli/start.py CHANGED
@@ -39,6 +39,7 @@ Examples:
39
39
  Environment Variables:
40
40
  FRONTEND_PORT Frontend port (default: 3782)
41
41
  BACKEND_PORT Backend port (default: 8001)
42
+ AUTO_KILL_PORTS Auto-kill processes using required ports (default: false)
42
43
  RTX_APP_ID RealTimeX App ID (auto-detected)
43
44
  API_BASE_URL Backend API URL (auto-configured)
44
45
  LOG_LEVEL Logging level (DEBUG, INFO, WARNING, ERROR)
@@ -190,6 +190,22 @@ class UnifiedConfigManager:
190
190
  self.set_active_config(ConfigType.EMBEDDING, "rtx")
191
191
  logger.info("Auto-activated RealTimeX for Embedding")
192
192
 
193
+ # Auto-activate for TTS
194
+ tts_data = self._load_configs(ConfigType.TTS)
195
+ tts_active_id = tts_data.get("active_id", "default")
196
+
197
+ # Only auto-activate if still using default
198
+ if tts_active_id == "default":
199
+ rtx_tts_config = get_rtx_active_config("tts")
200
+ if not rtx_tts_config:
201
+ # Set default RTX TTS config
202
+ set_rtx_active_config("tts", "realtimexai", "tts-1")
203
+ logger.info("Auto-configured RealTimeX TTS with default model: tts-1")
204
+
205
+ # Activate RTX for TTS
206
+ self.set_active_config(ConfigType.TTS, "rtx")
207
+ logger.info("Auto-activated RealTimeX for TTS")
208
+
193
209
  except ImportError:
194
210
  # RTX utilities not available
195
211
  pass
@@ -472,8 +488,8 @@ class UnifiedConfigManager:
472
488
  if not should_use_realtimex_sdk():
473
489
  return None
474
490
 
475
- # Only LLM and Embedding are supported via RTX
476
- if config_type not in (ConfigType.LLM, ConfigType.EMBEDDING):
491
+ # Only LLM, Embedding, and TTS are supported via RTX
492
+ if config_type not in (ConfigType.LLM, ConfigType.EMBEDDING, ConfigType.TTS):
477
493
  return None
478
494
 
479
495
  # Get user's active selection (or use defaults)
@@ -482,15 +498,23 @@ class UnifiedConfigManager:
482
498
  if active:
483
499
  provider = active.get("provider", "realtimexai")
484
500
  model = active.get("model", "")
501
+ voice = active.get("voice")
502
+ speed = active.get("speed")
503
+ quality = active.get("quality")
485
504
  else:
486
505
  # Use defaults
487
506
  provider = "realtimexai"
507
+ voice = None
508
+ speed = None
509
+ quality = None
488
510
  if config_type == ConfigType.LLM:
489
511
  model = "gpt-4o-mini"
490
- else: # Embedding
512
+ elif config_type == ConfigType.EMBEDDING:
491
513
  model = "text-embedding-3-small"
514
+ elif config_type == ConfigType.TTS:
515
+ model = "tts-1"
492
516
 
493
- return {
517
+ config = {
494
518
  "id": "rtx",
495
519
  "name": "RealTimeX",
496
520
  "is_default": False,
@@ -500,6 +524,16 @@ class UnifiedConfigManager:
500
524
  "api_key": "—", # No API key needed
501
525
  "base_url": "—", # Uses SDK proxy
502
526
  }
527
+
528
+ # Add TTS-specific fields if present
529
+ if voice:
530
+ config["voice"] = voice
531
+ if speed is not None:
532
+ config["speed"] = speed
533
+ if quality is not None:
534
+ config["quality"] = quality
535
+
536
+ return config
503
537
 
504
538
  except ImportError:
505
539
  return None
@@ -602,15 +636,22 @@ class UnifiedConfigManager:
602
636
  "id": "rtx",
603
637
  "provider": rtx_active.get("provider", "realtimexai"),
604
638
  "model": rtx_active.get("model", ""),
639
+ "voice": rtx_active.get("voice"), # Include voice for TTS
640
+ "speed": rtx_active.get("speed"), # Include speed for TTS
641
+ "quality": rtx_active.get("quality"), # Include quality for TTS
605
642
  "source": "realtimex", # This tells services to use SDK
606
643
  }
607
644
  else:
608
645
  # Return defaults when no selection exists yet
609
- default_model = (
610
- "gpt-4o-mini"
611
- if config_type == ConfigType.LLM
612
- else "text-embedding-3-small"
613
- )
646
+ if config_type == ConfigType.LLM:
647
+ default_model = "gpt-4o-mini"
648
+ elif config_type == ConfigType.EMBEDDING:
649
+ default_model = "text-embedding-3-small"
650
+ elif config_type == ConfigType.TTS:
651
+ default_model = "tts-1"
652
+ else:
653
+ default_model = ""
654
+
614
655
  return {
615
656
  "id": "rtx",
616
657
  "provider": "realtimexai",
@@ -60,6 +60,7 @@ def get_tts_config() -> dict:
60
60
  "base_url": config.get("base_url", ""),
61
61
  "api_version": config.get("api_version"),
62
62
  "voice": config.get("voice", "alloy"),
63
+ "source": config.get("source"), # "realtimex" when using RTX
63
64
  }
64
65
  except ImportError:
65
66
  # Unified config service not yet available, fall back to env
src/utils/realtimex.py CHANGED
@@ -45,6 +45,8 @@ def get_realtimex_sdk() -> "RealtimeXSDK":
45
45
  "llm.chat", # For LLM completions
46
46
  "llm.providers", # For listing available providers
47
47
  "llm.embed", # For embeddings
48
+ "tts.speak", # For TTS
49
+ "tts.providers", # For listing TTS providers
48
50
  ]
49
51
  )
50
52
  )
@@ -160,21 +162,34 @@ async def get_cached_providers() -> dict:
160
162
  try:
161
163
  sdk = get_realtimex_sdk()
162
164
 
163
- # Fetch both in parallel (conceptually, though await is sequential here)
164
- # In a real async environment we might use asyncio.gather, but sequential is safe
165
+ # Fetch all providers in parallel (conceptually)
165
166
  llm_result = await sdk.llm.chat_providers()
166
167
  embed_result = await sdk.llm.embed_providers()
168
+ tts_result = await sdk.tts.list_providers()
167
169
 
168
170
  def serialize_provider(p):
169
- return {
171
+ # Handle both object and dict responses (SDK might return dicts for TTS)
172
+ if isinstance(p, dict):
173
+ # Normalize: ensure 'provider' key exists (SDK uses 'id' for TTS)
174
+ if "provider" not in p and "id" in p:
175
+ p["provider"] = p["id"]
176
+ return p
177
+
178
+ data = {
170
179
  "provider": p.provider,
171
180
  "models": [{"id": m.id, "name": m.name} for m in p.models],
172
181
  }
182
+ # Add config metadata if available (for TTS advanced options)
183
+ if hasattr(p, "config"):
184
+ data["config"] = p.config
185
+
186
+ return data
173
187
 
174
188
  _providers_cache = {
175
189
  "rtx_enabled": True,
176
190
  "llm": [serialize_provider(p) for p in llm_result.providers],
177
191
  "embedding": [serialize_provider(p) for p in embed_result.providers],
192
+ "tts": [serialize_provider(p) for p in tts_result],
178
193
  }
179
194
  _providers_cache_time = time.time()
180
195
 
@@ -247,23 +262,41 @@ def get_rtx_active_config(config_type: str) -> Optional[dict]:
247
262
  return data.get(config_type)
248
263
 
249
264
 
250
- def set_rtx_active_config(config_type: str, provider: str, model: str) -> bool:
265
+ def set_rtx_active_config(
266
+ config_type: str,
267
+ provider: str,
268
+ model: str,
269
+ voice: Optional[str] = None,
270
+ speed: Optional[float] = None,
271
+ quality: Optional[int] = None,
272
+ ) -> bool:
251
273
  """
252
274
  Set the active RTX config for a specific config type.
253
275
 
254
276
  Args:
255
- config_type: "llm" or "embedding"
277
+ config_type: "llm", "embedding" or "tts"
256
278
  provider: Provider name (e.g., "openai")
257
279
  model: Model ID (e.g., "gpt-4o")
280
+ voice: Optional voice ID (for TTS)
281
+ speed: Optional speed multiplier (for TTS)
282
+ quality: Optional quality/inference steps (for TTS)
258
283
 
259
284
  Returns:
260
285
  True if saved successfully
261
286
  """
262
287
  data = _load_rtx_active_config()
263
- data[config_type] = {
288
+ config = {
264
289
  "provider": provider,
265
290
  "model": model,
266
291
  }
292
+ if voice:
293
+ config["voice"] = voice
294
+ if speed is not None:
295
+ config["speed"] = speed
296
+ if quality is not None:
297
+ config["quality"] = quality
298
+
299
+ data[config_type] = config
267
300
  return _save_rtx_active_config(data)
268
301
 
269
302