qyro 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qyro/__init__.py +17 -0
- qyro/adapters/__init__.py +4 -0
- qyro/adapters/language_adapters/__init__.py +4 -0
- qyro/adapters/language_adapters/c/__init__.py +4 -0
- qyro/adapters/language_adapters/python/__init__.py +4 -0
- qyro/adapters/language_adapters/python/python_adapter.py +584 -0
- qyro/cli/__init__.py +8 -0
- qyro/cli/__main__.py +5 -0
- qyro/cli/cli.py +392 -0
- qyro/cli/interactive.py +297 -0
- qyro/common/__init__.py +37 -0
- qyro/common/animation.py +82 -0
- qyro/common/builder.py +434 -0
- qyro/common/compiler.py +895 -0
- qyro/common/config.py +93 -0
- qyro/common/constants.py +99 -0
- qyro/common/errors.py +176 -0
- qyro/common/frontend.py +74 -0
- qyro/common/health.py +358 -0
- qyro/common/kafka_manager.py +192 -0
- qyro/common/logging.py +149 -0
- qyro/common/memory.py +147 -0
- qyro/common/metrics.py +301 -0
- qyro/common/monitoring.py +468 -0
- qyro/common/parser.py +91 -0
- qyro/common/platform.py +609 -0
- qyro/common/redis_memory.py +1108 -0
- qyro/common/rpc.py +287 -0
- qyro/common/sandbox.py +191 -0
- qyro/common/schema_loader.py +33 -0
- qyro/common/secure_sandbox.py +490 -0
- qyro/common/toolchain_validator.py +617 -0
- qyro/common/type_generator.py +176 -0
- qyro/common/validation.py +401 -0
- qyro/common/validator.py +204 -0
- qyro/gateway/__init__.py +8 -0
- qyro/gateway/gateway.py +303 -0
- qyro/orchestrator/__init__.py +8 -0
- qyro/orchestrator/orchestrator.py +1223 -0
- qyro-2.0.0.dist-info/METADATA +244 -0
- qyro-2.0.0.dist-info/RECORD +45 -0
- qyro-2.0.0.dist-info/WHEEL +5 -0
- qyro-2.0.0.dist-info/entry_points.txt +2 -0
- qyro-2.0.0.dist-info/licenses/LICENSE +21 -0
- qyro-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1223 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Nexus Process Orchestrator with Kafka Integration
|
|
3
|
+
Manages lifecycle of polyglot processes with improved supervision,
|
|
4
|
+
Kafka-based messaging, and graceful error handling.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
import threading
|
|
11
|
+
import subprocess
|
|
12
|
+
from typing import Dict, List, Any, Optional
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
|
|
15
|
+
import colorama
|
|
16
|
+
from colorama import Fore, Style
|
|
17
|
+
|
|
18
|
+
from qyro.common.parser import QyroParser
|
|
19
|
+
from qyro.common.compiler import QyroCompiler
|
|
20
|
+
from qyro.common.schema_loader import QyroSchemaLoader
|
|
21
|
+
from qyro.common.logging import get_logger
|
|
22
|
+
from qyro.common.platform import get_platform
|
|
23
|
+
from qyro.common.kafka_manager import KafkaManager
|
|
24
|
+
from qyro.common.config import QyroConfig
|
|
25
|
+
from qyro.common.redis_memory import RedisQyroMemory, RedisConnectionError
|
|
26
|
+
from qyro.common.secure_sandbox import get_secure_sandbox
|
|
27
|
+
from qyro.common.monitoring import get_monitor
|
|
28
|
+
|
|
29
|
+
# colorama.init() - Disabled to prevent interference with UTF-8 stdout wrapper
|
|
30
|
+
if sys.platform == 'win32':
|
|
31
|
+
try:
|
|
32
|
+
from colorama import just_fix_windows_console
|
|
33
|
+
just_fix_windows_console()
|
|
34
|
+
except ImportError:
|
|
35
|
+
# Fallback for older colorama
|
|
36
|
+
colorama.init(strip=False, convert=False)
|
|
37
|
+
else:
|
|
38
|
+
colorama.init()
|
|
39
|
+
logger = get_logger("nexus.orchestrator")
|
|
40
|
+
monitor = get_monitor()
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ProcessInfo:
|
|
45
|
+
"""Information about a supervised process."""
|
|
46
|
+
artifact: Dict[str, Any]
|
|
47
|
+
name: str
|
|
48
|
+
cmd: List[str]
|
|
49
|
+
proc: Optional[subprocess.Popen] = None
|
|
50
|
+
restarts: int = 0
|
|
51
|
+
last_crash: float = 0.0
|
|
52
|
+
backoff_seconds: float = 1.0
|
|
53
|
+
status: str = "unknown" # running, crashed, stopped, backoff
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class ProcessSupervisor:
|
|
57
|
+
"""Handles process supervision and lifecycle management."""
|
|
58
|
+
|
|
59
|
+
def __init__(self, max_restarts: int = 10, initial_backoff: float = 1.0, max_backoff: float = 60.0):
|
|
60
|
+
self.max_restarts = max_restarts
|
|
61
|
+
self.initial_backoff = initial_backoff
|
|
62
|
+
self.max_backoff = max_backoff
|
|
63
|
+
|
|
64
|
+
def handle_process_crash(self, p_info: ProcessInfo, return_code: int) -> bool:
|
|
65
|
+
"""Handle a process crash and determine if it should be restarted."""
|
|
66
|
+
now = time.time()
|
|
67
|
+
p_info.last_crash = now
|
|
68
|
+
|
|
69
|
+
logger.warning("process_crashed", name=p_info.name, code=return_code, restarts=p_info.restarts)
|
|
70
|
+
|
|
71
|
+
# Check restart limit
|
|
72
|
+
if p_info.restarts >= self.max_restarts:
|
|
73
|
+
logger.error(f"Process {p_info.name} exceeded max restarts ({self.max_restarts}). Giving up.")
|
|
74
|
+
p_info.status = "failed"
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
# Enter backoff
|
|
78
|
+
p_info.status = "backoff"
|
|
79
|
+
p_info.restarts += 1
|
|
80
|
+
p_info.backoff_seconds = min(p_info.backoff_seconds * 2, self.max_backoff)
|
|
81
|
+
|
|
82
|
+
logger.info(f"Waiting {p_info.backoff_seconds:.1f}s before restart (attempt {p_info.restarts}/{self.max_restarts})...")
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
def should_restart_process(self, p_info: ProcessInfo) -> bool:
|
|
86
|
+
"""Check if a process should be restarted based on backoff period."""
|
|
87
|
+
now = time.time()
|
|
88
|
+
return now - p_info.last_crash >= p_info.backoff_seconds
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class QyroOrchestrator:
|
|
92
|
+
"""
|
|
93
|
+
Orchestrates the lifecycle of multiple polyglot processes with Kafka integration.
|
|
94
|
+
|
|
95
|
+
Features:
|
|
96
|
+
- Process supervision with crash detection
|
|
97
|
+
- Exponential backoff for restarts (prevents crash loops)
|
|
98
|
+
- Maximum restart limit per process
|
|
99
|
+
- Kafka-based inter-module communication
|
|
100
|
+
- Redis-based shared state
|
|
101
|
+
- Graceful shutdown with timeout
|
|
102
|
+
- Health monitoring
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
def __init__(
|
|
106
|
+
self,
|
|
107
|
+
qyro_file: str,
|
|
108
|
+
config: QyroConfig,
|
|
109
|
+
skip_missing: bool = True,
|
|
110
|
+
):
|
|
111
|
+
self.qyro_file = qyro_file
|
|
112
|
+
self.config = config
|
|
113
|
+
self.parser = QyroParser()()
|
|
114
|
+
self.compiler = QyroCompiler(skip_missing=skip_missing)
|
|
115
|
+
self.schema_loader = QyroSchemaLoader()()
|
|
116
|
+
self.platform = get_platform()
|
|
117
|
+
self.running = True
|
|
118
|
+
self.processes: List[ProcessInfo] = []
|
|
119
|
+
self._monitor_thread: Optional[threading.Thread] = None
|
|
120
|
+
self.skip_missing = skip_missing
|
|
121
|
+
self.supervisor = ProcessSupervisor(
|
|
122
|
+
max_restarts=config.max_restarts,
|
|
123
|
+
initial_backoff=config.restart_backoff
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Kafka manager for messaging
|
|
127
|
+
self.kafka_manager: Optional[KafkaManager] = None
|
|
128
|
+
|
|
129
|
+
# Redis memory instance (initialized in start())
|
|
130
|
+
self.memory: Optional[RedisQyroMemory] = None
|
|
131
|
+
self._redis_available = False
|
|
132
|
+
self._use_distributed_memory = False
|
|
133
|
+
|
|
134
|
+
# Output monitoring thread
|
|
135
|
+
self._output_monitor_thread: Optional[threading.Thread] = None
|
|
136
|
+
|
|
137
|
+
def start(self):
|
|
138
|
+
"""Start the orchestrator with Kafka and Redis integration."""
|
|
139
|
+
self._print_banner()
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
logger.info(f"Parsing {self.QYRO_file}...")
|
|
143
|
+
self.parser.parse_file(self.QYRO_file)
|
|
144
|
+
|
|
145
|
+
# Initialize Kafka manager
|
|
146
|
+
self._initialize_kafka_manager()
|
|
147
|
+
|
|
148
|
+
# Initialize Redis memory
|
|
149
|
+
self._initialize_redis_memory()
|
|
150
|
+
|
|
151
|
+
if self.parser.blocks.get('schema'):
|
|
152
|
+
logger.info("Detecting Schema...")
|
|
153
|
+
try:
|
|
154
|
+
initial_state = self.schema_loader.process_schema(self.parser.blocks['schema'][0])
|
|
155
|
+
|
|
156
|
+
# Initialize schema state in Redis
|
|
157
|
+
if initial_state and self._redis_available:
|
|
158
|
+
self.memory.write(initial_state)
|
|
159
|
+
logger.info(f"Schema initialized in Redis (Size: {len(str(initial_state))})")
|
|
160
|
+
|
|
161
|
+
# Also publish initial state to Kafka
|
|
162
|
+
if self.kafka_manager:
|
|
163
|
+
import asyncio
|
|
164
|
+
asyncio.create_task(
|
|
165
|
+
self.kafka_manager.publish_state_change(initial_state, 'orchestrator')
|
|
166
|
+
)
|
|
167
|
+
elif initial_state:
|
|
168
|
+
logger.info(f"Schema detected (Size: {len(str(initial_state))})")
|
|
169
|
+
logger.warning("Redis not available, schema not persisted.")
|
|
170
|
+
except Exception as e:
|
|
171
|
+
logger.error(f"SCHEMA INIT ERROR: {e}")
|
|
172
|
+
raise e
|
|
173
|
+
else:
|
|
174
|
+
logger.info("No schema block found.")
|
|
175
|
+
|
|
176
|
+
# Cleanup stale binaries BEFORE compilation to avoid permission errors on Windows
|
|
177
|
+
self._cleanup_stale_processes()
|
|
178
|
+
|
|
179
|
+
logger.info("Compiling modules...")
|
|
180
|
+
artifacts = self.compiler.compile(self.parser.blocks)
|
|
181
|
+
|
|
182
|
+
# Check for frontend blocks and prepare them
|
|
183
|
+
frontend_types = [a['type'] for a in artifacts if a['type'] in ['react', 'nextjs']]
|
|
184
|
+
if frontend_types:
|
|
185
|
+
self._prepare_frontend(frontend_types[0])
|
|
186
|
+
|
|
187
|
+
self._spawn_processes(artifacts)
|
|
188
|
+
|
|
189
|
+
# Start the centralized output monitoring thread
|
|
190
|
+
self._output_monitor_thread = threading.Thread(target=self._monitor_all_outputs, daemon=True)
|
|
191
|
+
self._output_monitor_thread.start()
|
|
192
|
+
|
|
193
|
+
# Subscribe to Kafka state changes to update Redis
|
|
194
|
+
if self.kafka_manager:
|
|
195
|
+
import asyncio
|
|
196
|
+
asyncio.create_task(self._subscribe_to_kafka_state_changes())
|
|
197
|
+
|
|
198
|
+
self._main_loop()
|
|
199
|
+
|
|
200
|
+
except KeyboardInterrupt:
|
|
201
|
+
logger.info("keyboard_interrupt")
|
|
202
|
+
self.shutdown()
|
|
203
|
+
except Exception as e:
|
|
204
|
+
logger.error("orchestrator_error", error=str(e))
|
|
205
|
+
self.antigravity_protocol(e)
|
|
206
|
+
|
|
207
|
+
def _initialize_kafka_manager(self):
|
|
208
|
+
"""Initialize Kafka manager for messaging."""
|
|
209
|
+
try:
|
|
210
|
+
print(f"{Fore.CYAN}[QYRO] Initializing Kafka at {self.config.kafka_bootstrap_servers}...{Style.RESET_ALL}")
|
|
211
|
+
self.kafka_manager = KafkaManager(self.config)
|
|
212
|
+
|
|
213
|
+
# Start Kafka manager
|
|
214
|
+
import asyncio
|
|
215
|
+
loop = asyncio.new_event_loop()
|
|
216
|
+
asyncio.set_event_loop(loop)
|
|
217
|
+
loop.run_until_complete(self.kafka_manager.start_producer())
|
|
218
|
+
|
|
219
|
+
logger.info("kafka_manager_initialized", servers=self.config.kafka_bootstrap_servers)
|
|
220
|
+
except Exception as e:
|
|
221
|
+
logger.warning("kafka_init_error", error=str(e))
|
|
222
|
+
logger.warning("Continuing without Kafka - some features will be unavailable.")
|
|
223
|
+
self.kafka_manager = None
|
|
224
|
+
|
|
225
|
+
def _initialize_redis_memory(self):
|
|
226
|
+
"""Initialize Redis memory and set up event subscriptions."""
|
|
227
|
+
try:
|
|
228
|
+
print(f"{Fore.CYAN}[QYRO] Connecting to Redis at {self.config.redis_host}:{self.config.redis_port}...{Style.RESET_ALL}")
|
|
229
|
+
self.memory = RedisQyroMemory(
|
|
230
|
+
host=self.config.redis_host,
|
|
231
|
+
port=self.config.redis_port,
|
|
232
|
+
db=self.config.redis_db,
|
|
233
|
+
password=self.config.redis_password
|
|
234
|
+
)
|
|
235
|
+
self._redis_available = True
|
|
236
|
+
|
|
237
|
+
# Register the orchestrator as a module
|
|
238
|
+
self.memory.register_module(
|
|
239
|
+
'orchestrator',
|
|
240
|
+
{
|
|
241
|
+
'version': '2.0',
|
|
242
|
+
'language': 'python',
|
|
243
|
+
'pid': os.getpid(),
|
|
244
|
+
'metadata': {
|
|
245
|
+
'type': 'orchestrator',
|
|
246
|
+
'QYRO_file': self.QYRO_file
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Subscribe to state changes
|
|
252
|
+
self.memory.subscribe_to_changes(self._on_state_change)
|
|
253
|
+
|
|
254
|
+
# Subscribe to events
|
|
255
|
+
self.memory.subscribe_to_events(self._on_redis_event)
|
|
256
|
+
|
|
257
|
+
# Subscribe to broadcasts
|
|
258
|
+
self.memory.subscribe_to_broadcasts(self._on_broadcast)
|
|
259
|
+
|
|
260
|
+
logger.info("redis_memory_initialized", host=self.config.redis_host, port=self.config.redis_port)
|
|
261
|
+
|
|
262
|
+
except RedisConnectionError as e:
|
|
263
|
+
logger.warning("redis_connection_failed", error=str(e))
|
|
264
|
+
logger.warning("Continuing without Redis - some features will be unavailable.")
|
|
265
|
+
self._redis_available = False
|
|
266
|
+
except Exception as e:
|
|
267
|
+
logger.warning("redis_init_error", error=str(e))
|
|
268
|
+
logger.warning("Continuing without Redis - some features will be unavailable.")
|
|
269
|
+
self._redis_available = False
|
|
270
|
+
|
|
271
|
+
async def _subscribe_to_kafka_state_changes(self):
|
|
272
|
+
"""Subscribe to Kafka state changes and update Redis."""
|
|
273
|
+
if not self.kafka_manager:
|
|
274
|
+
return
|
|
275
|
+
|
|
276
|
+
async def state_change_handler(state_diff):
|
|
277
|
+
if self._redis_available and self.memory:
|
|
278
|
+
try:
|
|
279
|
+
self.memory.update_fields(state_diff)
|
|
280
|
+
logger.debug("state_updated_from_kafka", keys=list(state_diff.keys()))
|
|
281
|
+
except Exception as e:
|
|
282
|
+
logger.error("redis_update_from_kafka_failed", error=str(e))
|
|
283
|
+
|
|
284
|
+
await self.kafka_manager.subscribe_to_state_changes(state_change_handler)
|
|
285
|
+
|
|
286
|
+
def _on_state_change(self, state: Dict[str, Any]):
|
|
287
|
+
"""Handle state change events from Redis."""
|
|
288
|
+
logger.debug("state_change_received", keys=list(state.keys()))
|
|
289
|
+
|
|
290
|
+
def _on_redis_event(self, event):
|
|
291
|
+
"""Handle general events from Redis."""
|
|
292
|
+
logger.debug("redis_event_received", event_type=event.event_type, source=event.source)
|
|
293
|
+
|
|
294
|
+
# Handle module registration events
|
|
295
|
+
if event.event_type == 'module_registered':
|
|
296
|
+
module_name = event.data.get('name')
|
|
297
|
+
if module_name:
|
|
298
|
+
logger.info(f"Module registered: {module_name}")
|
|
299
|
+
|
|
300
|
+
# Handle module unregistration events
|
|
301
|
+
elif event.event_type == 'module_unregistered':
|
|
302
|
+
module_name = event.data.get('module')
|
|
303
|
+
if module_name:
|
|
304
|
+
logger.warning(f"Module unregistered: {module_name}")
|
|
305
|
+
|
|
306
|
+
# Handle module state change events
|
|
307
|
+
elif event.event_type == 'module_state_changed':
|
|
308
|
+
module_name = event.data.get('module')
|
|
309
|
+
if module_name:
|
|
310
|
+
logger.debug("module_state_changed", module=module_name)
|
|
311
|
+
|
|
312
|
+
def _on_broadcast(self, message: Dict[str, Any]):
|
|
313
|
+
"""Handle broadcast messages from Redis."""
|
|
314
|
+
logger.debug("broadcast_received", message=message)
|
|
315
|
+
|
|
316
|
+
def _stream_output(self, pipe, name, is_stderr=False):
|
|
317
|
+
"""Stream output from a pipe to stdout."""
|
|
318
|
+
try:
|
|
319
|
+
for line in iter(pipe.readline, ''):
|
|
320
|
+
if not line:
|
|
321
|
+
break
|
|
322
|
+
# Remove trailing newline
|
|
323
|
+
line = line.rstrip()
|
|
324
|
+
if not line:
|
|
325
|
+
continue
|
|
326
|
+
|
|
327
|
+
# Colorize output based on source
|
|
328
|
+
prefix_color = Fore.RED if is_stderr else Fore.CYAN
|
|
329
|
+
# Check for "Link:" or http/https URLs to highlight
|
|
330
|
+
if "Link:" in line or "http://" in line or "https://" in line:
|
|
331
|
+
# Highlight URL if present
|
|
332
|
+
if "http" in line:
|
|
333
|
+
import re
|
|
334
|
+
url_pattern = r'(https?://[^\s]+)'
|
|
335
|
+
line = re.sub(url_pattern, f"{Fore.YELLOW}{Style.BRIGHT}\\1{Style.RESET_ALL}{prefix_color}", line)
|
|
336
|
+
|
|
337
|
+
self._safe_print(f"{prefix_color}[{name}] {line}{Style.RESET_ALL}")
|
|
338
|
+
self._safe_print(f"{Fore.GREEN}{Style.BRIGHT}>>> ACCESS LINK ABOVE <<<{Style.RESET_ALL}")
|
|
339
|
+
else:
|
|
340
|
+
self._safe_print(f"{prefix_color}[{name}] {line}{Style.RESET_ALL}")
|
|
341
|
+
|
|
342
|
+
except ValueError:
|
|
343
|
+
pass # Pipe closed
|
|
344
|
+
except Exception as e:
|
|
345
|
+
# Only log real errors, not pipe closures during shutdown
|
|
346
|
+
if self.running:
|
|
347
|
+
logger.error("stream_error", name=name, error=str(e))
|
|
348
|
+
|
|
349
|
+
def _monitor_all_outputs(self):
|
|
350
|
+
"""Monitor output from all processes using a single thread with select() for efficiency."""
|
|
351
|
+
import select
|
|
352
|
+
import io
|
|
353
|
+
|
|
354
|
+
# Collect all pipes to monitor
|
|
355
|
+
all_pipes = []
|
|
356
|
+
pipe_info = {} # Maps pipe object to (name, is_stderr) tuple
|
|
357
|
+
|
|
358
|
+
for p_info in self.processes:
|
|
359
|
+
if p_info.proc:
|
|
360
|
+
if p_info.proc.stdout:
|
|
361
|
+
all_pipes.append(p_info.proc.stdout)
|
|
362
|
+
pipe_info[p_info.proc.stdout] = (p_info.name, False)
|
|
363
|
+
if p_info.proc.stderr:
|
|
364
|
+
all_pipes.append(p_info.proc.stderr)
|
|
365
|
+
pipe_info[p_info.proc.stderr] = (p_info.name, True)
|
|
366
|
+
|
|
367
|
+
# Use select to efficiently wait for data on any pipe (Unix-like systems)
|
|
368
|
+
# For Windows, we'll use a different approach
|
|
369
|
+
if sys.platform == 'win32':
|
|
370
|
+
# On Windows, use a simpler approach with a single queue and threads per pipe
|
|
371
|
+
import queue
|
|
372
|
+
import threading
|
|
373
|
+
|
|
374
|
+
output_queue = queue.Queue()
|
|
375
|
+
|
|
376
|
+
def pipe_reader(pipe, name, is_stderr):
|
|
377
|
+
"""Read from a single pipe and put output in shared queue."""
|
|
378
|
+
try:
|
|
379
|
+
while self.running:
|
|
380
|
+
line = pipe.readline()
|
|
381
|
+
if not line:
|
|
382
|
+
break
|
|
383
|
+
output_queue.put((name, is_stderr, line.rstrip()))
|
|
384
|
+
except Exception:
|
|
385
|
+
pass # Pipe closed or other error
|
|
386
|
+
|
|
387
|
+
# Start a thread for each pipe
|
|
388
|
+
reader_threads = []
|
|
389
|
+
for p_info in self.processes:
|
|
390
|
+
if p_info.proc:
|
|
391
|
+
if p_info.proc.stdout:
|
|
392
|
+
t = threading.Thread(
|
|
393
|
+
target=pipe_reader,
|
|
394
|
+
args=(p_info.proc.stdout, p_info.name, False),
|
|
395
|
+
daemon=True
|
|
396
|
+
)
|
|
397
|
+
t.start()
|
|
398
|
+
reader_threads.append(t)
|
|
399
|
+
|
|
400
|
+
if p_info.proc.stderr:
|
|
401
|
+
t = threading.Thread(
|
|
402
|
+
target=pipe_reader,
|
|
403
|
+
args=(p_info.proc.stderr, p_info.name, True),
|
|
404
|
+
daemon=True
|
|
405
|
+
)
|
|
406
|
+
t.start()
|
|
407
|
+
reader_threads.append(t)
|
|
408
|
+
|
|
409
|
+
# Process output from the shared queue
|
|
410
|
+
while self.running:
|
|
411
|
+
try:
|
|
412
|
+
try:
|
|
413
|
+
name, is_stderr, line = output_queue.get(timeout=0.01)
|
|
414
|
+
if line:
|
|
415
|
+
# Colorize output based on source
|
|
416
|
+
prefix_color = Fore.RED if is_stderr else Fore.CYAN
|
|
417
|
+
# Check for "Link:" or http/https URLs to highlight
|
|
418
|
+
if "Link:" in line or "http://" in line or "https://" in line:
|
|
419
|
+
if "http" in line:
|
|
420
|
+
import re
|
|
421
|
+
url_pattern = r'(https?://[^\s]+)'
|
|
422
|
+
line = re.sub(url_pattern, f"{Fore.YELLOW}{Style.BRIGHT}\\1{Style.RESET_ALL}{prefix_color}", line)
|
|
423
|
+
|
|
424
|
+
self._safe_print(f"{prefix_color}[{name}] {line}{Style.RESET_ALL}")
|
|
425
|
+
self._safe_print(f"{Fore.GREEN}{Style.BRIGHT}>>> ACCESS LINK ABOVE <<<{Style.RESET_ALL}")
|
|
426
|
+
else:
|
|
427
|
+
self._safe_print(f"{prefix_color}[{name}] {line}{Style.RESET_ALL}")
|
|
428
|
+
except queue.Empty:
|
|
429
|
+
continue
|
|
430
|
+
except Exception:
|
|
431
|
+
time.sleep(0.01)
|
|
432
|
+
else:
|
|
433
|
+
# Unix-like systems: use select for efficient I/O multiplexing
|
|
434
|
+
while self.running:
|
|
435
|
+
try:
|
|
436
|
+
# Use select to wait for data on any of the pipes
|
|
437
|
+
ready_pipes, _, _ = select.select(all_pipes, [], [], 0.1)
|
|
438
|
+
|
|
439
|
+
for pipe in ready_pipes:
|
|
440
|
+
try:
|
|
441
|
+
line = pipe.readline()
|
|
442
|
+
if line:
|
|
443
|
+
name, is_stderr = pipe_info[pipe]
|
|
444
|
+
line = line.rstrip()
|
|
445
|
+
|
|
446
|
+
if line:
|
|
447
|
+
# Colorize output based on source
|
|
448
|
+
prefix_color = Fore.RED if is_stderr else Fore.CYAN
|
|
449
|
+
# Check for "Link:" or http/https URLs to highlight
|
|
450
|
+
if "Link:" in line or "http://" in line or "https://" in line:
|
|
451
|
+
if "http" in line:
|
|
452
|
+
import re
|
|
453
|
+
url_pattern = r'(https?://[^\s]+)'
|
|
454
|
+
line = re.sub(url_pattern, f"{Fore.YELLOW}{Style.BRIGHT}\\1{Style.RESET_ALL}{prefix_color}", line)
|
|
455
|
+
|
|
456
|
+
self._safe_print(f"{prefix_color}[{name}] {line}{Style.RESET_ALL}")
|
|
457
|
+
self._safe_print(f"{Fore.GREEN}{Style.BRIGHT}>>> ACCESS LINK ABOVE <<<{Style.RESET_ALL}")
|
|
458
|
+
else:
|
|
459
|
+
self._safe_print(f"{prefix_color}[{name}] {line}{Style.RESET_ALL}")
|
|
460
|
+
else:
|
|
461
|
+
# EOF reached, remove pipe from monitoring
|
|
462
|
+
if pipe in all_pipes:
|
|
463
|
+
all_pipes.remove(pipe)
|
|
464
|
+
except Exception:
|
|
465
|
+
# If we can't read from the pipe, remove it
|
|
466
|
+
if pipe in all_pipes:
|
|
467
|
+
all_pipes.remove(pipe)
|
|
468
|
+
|
|
469
|
+
except Exception as e:
|
|
470
|
+
# Brief pause to avoid tight loop if there are issues
|
|
471
|
+
time.sleep(0.01)
|
|
472
|
+
|
|
473
|
+
def _launch_process(self, art: Dict[str, Any], existing: Optional[ProcessInfo] = None) -> Optional[ProcessInfo]:
|
|
474
|
+
"""Launch a single process from an artifact with platform abstraction."""
|
|
475
|
+
try:
|
|
476
|
+
cmd = []
|
|
477
|
+
name = "Unknown"
|
|
478
|
+
launch_cwd = None
|
|
479
|
+
|
|
480
|
+
if art['type'] == 'c':
|
|
481
|
+
name = art['bin']
|
|
482
|
+
cmd = [f"./{art['bin']}"]
|
|
483
|
+
elif art['type'] == 'rs':
|
|
484
|
+
name = art['bin']
|
|
485
|
+
cmd = [f"./{art['bin']}"]
|
|
486
|
+
elif art['type'] == 'java':
|
|
487
|
+
# Java modules now use NexusModuleRunner wrapper
|
|
488
|
+
# The wrapper expects the user class name as an argument
|
|
489
|
+
user_class = art.get('user_class', art['class'])
|
|
490
|
+
name = f"{art['class']}({user_class})"
|
|
491
|
+
cp = art.get('cp', '.')
|
|
492
|
+
|
|
493
|
+
# Add JAR files from lib directory to classpath if they exist
|
|
494
|
+
lib_dir = "lib"
|
|
495
|
+
classpath_sep = ';' if self.platform.is_windows() else ':'
|
|
496
|
+
|
|
497
|
+
if os.path.exists(lib_dir):
|
|
498
|
+
for jar_file in os.listdir(lib_dir):
|
|
499
|
+
if jar_file.endswith('.jar'):
|
|
500
|
+
cp += f"{classpath_sep}{lib_dir}/{jar_file}"
|
|
501
|
+
|
|
502
|
+
cmd = ["java", "-cp", cp, art['class'], user_class]
|
|
503
|
+
elif art['type'] == 'go':
|
|
504
|
+
name = art['bin']
|
|
505
|
+
cmd = [f"./{art['bin']}"]
|
|
506
|
+
elif art['type'] == 'ts':
|
|
507
|
+
name = art['src']
|
|
508
|
+
cmd = ["npx", "ts-node", art['src']]
|
|
509
|
+
elif art['type'] == 'web':
|
|
510
|
+
name = art['src']
|
|
511
|
+
cmd = ["uvicorn", art['src'].replace('.py', ':app'), "--reload", "--port", "8000"]
|
|
512
|
+
elif art['type'] == 'py':
|
|
513
|
+
name = art['src']
|
|
514
|
+
cmd = ["python", "-u", art['src']] # -u for unbuffered output
|
|
515
|
+
elif art['type'] == 'react':
|
|
516
|
+
name = "React-Frontend"
|
|
517
|
+
# Frontend is in QYRO_frontend/
|
|
518
|
+
cmd = ["npm", "start"]
|
|
519
|
+
launch_cwd = "QYRO_frontend"
|
|
520
|
+
# Check for package.json to ensure it's ready
|
|
521
|
+
if not os.path.exists(os.path.join(launch_cwd, "package.json")):
|
|
522
|
+
logger.warning(f"React Frontend not initialized. Skipping launch.")
|
|
523
|
+
return None
|
|
524
|
+
elif art['type'] == 'nextjs':
|
|
525
|
+
name = "NextJS-Frontend"
|
|
526
|
+
cmd = ["npm", "run", "dev"]
|
|
527
|
+
launch_cwd = "QYRO_frontend"
|
|
528
|
+
if not os.path.exists(os.path.join(launch_cwd, "package.json")):
|
|
529
|
+
logger.warning("Next.js Frontend not initialized. Skipping launch.")
|
|
530
|
+
return None
|
|
531
|
+
|
|
532
|
+
if not cmd:
|
|
533
|
+
logger.warning("unknown_artifact_type", artifact=art)
|
|
534
|
+
return None
|
|
535
|
+
|
|
536
|
+
# Windows Robustness: Kill any existing process with the same name to avoid 'Permission Denied'
|
|
537
|
+
# (Already handled in _cleanup_stale_processes, but good for dynamic restarts)
|
|
538
|
+
if self.platform.is_windows() and art['type'] in ['c', 'rs', 'go']:
|
|
539
|
+
try:
|
|
540
|
+
self.platform.kill_process_by_name(name, force=True)
|
|
541
|
+
except:
|
|
542
|
+
pass
|
|
543
|
+
|
|
544
|
+
logger.info(f"Launching {art['type'].upper()} Module: {name}")
|
|
545
|
+
|
|
546
|
+
# Use shell=True only for TypeScript (npx issues on Windows)
|
|
547
|
+
use_shell = art['type'] in ['ts', 'react', 'nextjs']
|
|
548
|
+
|
|
549
|
+
# Prepare environment with Redis and Kafka connection info
|
|
550
|
+
env = os.environ.copy()
|
|
551
|
+
if self._redis_available:
|
|
552
|
+
env['QYRO_REDIS_HOST'] = self.config.redis_host
|
|
553
|
+
env['QYRO_REDIS_PORT'] = str(self.config.redis_port)
|
|
554
|
+
env['QYRO_REDIS_DB'] = str(self.config.redis_db)
|
|
555
|
+
if self.config.redis_password:
|
|
556
|
+
env['QYRO_REDIS_PASSWORD'] = self.config.redis_password
|
|
557
|
+
env['QYRO_MODULE_NAME'] = name
|
|
558
|
+
|
|
559
|
+
# Add Kafka configuration to environment
|
|
560
|
+
if self.kafka_manager:
|
|
561
|
+
env['QYRO_KAFKA_BOOTSTRAP_SERVERS'] = self.config.kafka_bootstrap_servers
|
|
562
|
+
env['QYRO_KAFKA_TOPIC_PREFIX'] = self.config.kafka_topic_prefix
|
|
563
|
+
env['QYRO_MODULE_NAME'] = name
|
|
564
|
+
|
|
565
|
+
# SECURITY: Apply resource limits to prevent runaway processes
|
|
566
|
+
def apply_resource_limits():
|
|
567
|
+
"""Apply resource limits in child process."""
|
|
568
|
+
import resource
|
|
569
|
+
try:
|
|
570
|
+
# Limit virtual memory to max_memory_mb (in bytes)
|
|
571
|
+
if hasattr(resource, 'RLIMIT_AS'):
|
|
572
|
+
max_memory = self.config.max_memory_mb * 1024 * 1024 # Convert MB to bytes
|
|
573
|
+
resource.setrlimit(resource.RLIMIT_AS, (max_memory, max_memory))
|
|
574
|
+
|
|
575
|
+
# Limit CPU time based on configuration
|
|
576
|
+
if hasattr(resource, 'RLIMIT_CPU'):
|
|
577
|
+
max_cpu_time = 300 # 5 minutes - configurable
|
|
578
|
+
resource.setrlimit(resource.RLIMIT_CPU, (max_cpu_time, max_cpu_time))
|
|
579
|
+
|
|
580
|
+
# Limit number of file descriptors to prevent resource exhaustion
|
|
581
|
+
if hasattr(resource, 'RLIMIT_NOFILE'):
|
|
582
|
+
max_fds = 32 # Conservative limit for most applications
|
|
583
|
+
resource.setrlimit(resource.RLIMIT_NOFILE, (max_fds, max_fds))
|
|
584
|
+
|
|
585
|
+
# Limit core file size to prevent disk exhaustion
|
|
586
|
+
if hasattr(resource, 'RLIMIT_CORE'):
|
|
587
|
+
resource.setrlimit(resource.RLIMIT_CORE, (0, 0))
|
|
588
|
+
|
|
589
|
+
# Limit stack size to prevent stack overflow
|
|
590
|
+
if hasattr(resource, 'RLIMIT_STACK'):
|
|
591
|
+
max_stack = 8 * 1024 * 1024 # 8 MB
|
|
592
|
+
resource.setrlimit(resource.RLIMIT_STACK, (max_stack, max_stack))
|
|
593
|
+
|
|
594
|
+
except (AttributeError, ValueError, OSError) as e:
|
|
595
|
+
# Resource limits not available or not permitted on this platform
|
|
596
|
+
logger.warning("resource_limits_not_applied", error=str(e))
|
|
597
|
+
|
|
598
|
+
# On Windows, we'll use different approach for resource limits
|
|
599
|
+
if self.platform.is_windows():
|
|
600
|
+
# Check if required Windows modules are available
|
|
601
|
+
try:
|
|
602
|
+
import win32job
|
|
603
|
+
import win32process
|
|
604
|
+
import win32api
|
|
605
|
+
|
|
606
|
+
# Use Windows job objects to limit resources
|
|
607
|
+
# Create a job object to limit resources
|
|
608
|
+
job = win32job.CreateJobObject(None, "")
|
|
609
|
+
|
|
610
|
+
# Set extended limit information
|
|
611
|
+
extended_info = win32job.QueryInformationJobObject(job, win32job.JobObjectExtendedLimitInformation)
|
|
612
|
+
|
|
613
|
+
# Access BasicLimitInformation correctly
|
|
614
|
+
basic_limit_info = extended_info['BasicLimitInformation']
|
|
615
|
+
basic_limit_info['ActiveProcessLimit'] = 1
|
|
616
|
+
basic_limit_info['PerProcessUserTimeLimit'] = 300000000 # 5 minutes in 100ns units
|
|
617
|
+
basic_limit_info['MaximumWorkingSetSize'] = self.config.max_memory_mb * 1024 * 1024 # Convert MB to bytes
|
|
618
|
+
basic_limit_info['MinimumWorkingSetSize'] = 1 * 1024 * 1024 # 1MB
|
|
619
|
+
|
|
620
|
+
# Check if JOB_OBJECT_LIMIT_WORKING_SET is available (some Windows versions don't have it)
|
|
621
|
+
limit_flags = win32job.JOB_OBJECT_LIMIT_ACTIVE_PROCESS \
|
|
622
|
+
| win32job.JOB_OBJECT_LIMIT_PROCESS_TIME \
|
|
623
|
+
| win32job.JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE
|
|
624
|
+
|
|
625
|
+
# Add working set limit only if it's available
|
|
626
|
+
if hasattr(win32job, 'JOB_OBJECT_LIMIT_WORKING_SET'):
|
|
627
|
+
limit_flags |= win32job.JOB_OBJECT_LIMIT_WORKING_SET
|
|
628
|
+
|
|
629
|
+
# Set the limit flags
|
|
630
|
+
extended_info['LimitFlags'] = limit_flags
|
|
631
|
+
extended_info['BasicLimitInformation'] = basic_limit_info
|
|
632
|
+
|
|
633
|
+
win32job.SetInformationJobObject(job, win32job.JobObjectExtendedLimitInformation, extended_info)
|
|
634
|
+
|
|
635
|
+
# Create process suspended to assign to job first
|
|
636
|
+
startup_info = win32process.STARTUPINFO()
|
|
637
|
+
creation_flags = win32process.CREATE_SUSPENDED | win32process.CREATE_NEW_PROCESS_GROUP
|
|
638
|
+
|
|
639
|
+
proc_handle, thrd_handle, proc_id, thr_id = win32process.CreateProcess(
|
|
640
|
+
None, # Application name
|
|
641
|
+
' '.join(cmd), # Command line
|
|
642
|
+
None, # Process security attributes
|
|
643
|
+
None, # Thread security attributes
|
|
644
|
+
False, # Inherit handles
|
|
645
|
+
creation_flags, # Creation flags
|
|
646
|
+
env, # Environment
|
|
647
|
+
launch_cwd, # Current directory
|
|
648
|
+
startup_info # Startup info
|
|
649
|
+
)
|
|
650
|
+
|
|
651
|
+
# Assign process to job
|
|
652
|
+
win32job.AssignProcessToJobObject(job, proc_handle)
|
|
653
|
+
|
|
654
|
+
# Resume the process
|
|
655
|
+
win32process.ResumeThread(thrd_handle)
|
|
656
|
+
|
|
657
|
+
# Close handles
|
|
658
|
+
win32api.CloseHandle(thrd_handle)
|
|
659
|
+
|
|
660
|
+
# Create a subprocess.Popen-like object for compatibility
|
|
661
|
+
import psutil
|
|
662
|
+
proc = psutil.Process(proc_id)
|
|
663
|
+
|
|
664
|
+
except ImportError:
|
|
665
|
+
# Fallback to standard subprocess if Windows modules not available
|
|
666
|
+
logger.warning("windows_job_objects_not_available",
|
|
667
|
+
msg="pywin32 not installed, using standard subprocess")
|
|
668
|
+
proc = subprocess.Popen(
|
|
669
|
+
cmd,
|
|
670
|
+
shell=use_shell,
|
|
671
|
+
stdout=subprocess.PIPE,
|
|
672
|
+
stderr=subprocess.PIPE,
|
|
673
|
+
text=True,
|
|
674
|
+
encoding='utf-8',
|
|
675
|
+
errors='replace',
|
|
676
|
+
bufsize=1, # Line buffered
|
|
677
|
+
cwd=launch_cwd,
|
|
678
|
+
env=env,
|
|
679
|
+
# SECURITY: Prevent spawned processes from inheriting stdin
|
|
680
|
+
stdin=subprocess.DEVNULL
|
|
681
|
+
)
|
|
682
|
+
else:
|
|
683
|
+
# Unix-like systems - use resource limits
|
|
684
|
+
proc = subprocess.Popen(
|
|
685
|
+
cmd,
|
|
686
|
+
shell=use_shell,
|
|
687
|
+
stdout=subprocess.PIPE,
|
|
688
|
+
stderr=subprocess.PIPE,
|
|
689
|
+
text=True,
|
|
690
|
+
encoding='utf-8',
|
|
691
|
+
errors='replace',
|
|
692
|
+
bufsize=1, # Line buffered
|
|
693
|
+
cwd=launch_cwd,
|
|
694
|
+
env=env,
|
|
695
|
+
# SECURITY: Prevent spawned processes from inheriting stdin
|
|
696
|
+
stdin=subprocess.DEVNULL,
|
|
697
|
+
# Apply resource limits to child process (Unix only)
|
|
698
|
+
preexec_fn=apply_resource_limits
|
|
699
|
+
)
|
|
700
|
+
|
|
701
|
+
# Instead of creating threads per process, we'll collect all processes
|
|
702
|
+
# and handle output in a single monitoring thread later
|
|
703
|
+
pass # Output will be handled by the centralized monitor
|
|
704
|
+
|
|
705
|
+
if existing:
|
|
706
|
+
existing.proc = proc
|
|
707
|
+
existing.restarts += 1
|
|
708
|
+
existing.status = "running"
|
|
709
|
+
existing.backoff_seconds = self.supervisor.initial_backoff
|
|
710
|
+
return existing
|
|
711
|
+
else:
|
|
712
|
+
info = ProcessInfo(
|
|
713
|
+
artifact=art,
|
|
714
|
+
name=name,
|
|
715
|
+
cmd=cmd,
|
|
716
|
+
proc=proc,
|
|
717
|
+
status="running"
|
|
718
|
+
)
|
|
719
|
+
self.processes.append(info)
|
|
720
|
+
self._save_pid(proc.pid if hasattr(proc, 'pid') else proc.pid)
|
|
721
|
+
|
|
722
|
+
# Publish module start event to Kafka
|
|
723
|
+
if self.kafka_manager:
|
|
724
|
+
import asyncio
|
|
725
|
+
asyncio.create_task(
|
|
726
|
+
self.kafka_manager.publish_module_event('module_started', name, {
|
|
727
|
+
'pid': proc.pid if hasattr(proc, 'pid') else proc.pid,
|
|
728
|
+
'command': cmd
|
|
729
|
+
})
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
return info
|
|
733
|
+
|
|
734
|
+
except Exception as e:
|
|
735
|
+
logger.error("spawn_failed", artifact=art, error=str(e))
|
|
736
|
+
return None
|
|
737
|
+
|
|
738
|
+
def _spawn_processes(self, artifacts: List[Dict[str, Any]]):
|
|
739
|
+
"""Spawn all processes from artifacts."""
|
|
740
|
+
logger.info(f"Supervisor: Spawning {len(artifacts)} subprocesses...")
|
|
741
|
+
for art in artifacts:
|
|
742
|
+
self._launch_process(art)
|
|
743
|
+
|
|
744
|
+
def _main_loop(self):
|
|
745
|
+
"""Main supervision loop using Kafka and Redis for communication."""
|
|
746
|
+
logger.info("EXECUTION STARTED. Press Ctrl+C to abort.")
|
|
747
|
+
|
|
748
|
+
if self.kafka_manager:
|
|
749
|
+
logger.info("SUPERVISOR: Using Kafka for reliable messaging.")
|
|
750
|
+
elif self._redis_available:
|
|
751
|
+
logger.info("SUPERVISOR: Using Redis pub/sub for real-time communication.")
|
|
752
|
+
else:
|
|
753
|
+
logger.warning("SUPERVISOR: Both Kafka and Redis unavailable, using polling fallback.")
|
|
754
|
+
|
|
755
|
+
# More efficient supervision - reduce frequency of process checks
|
|
756
|
+
# Process supervision is handled by _check_processes() called less frequently
|
|
757
|
+
check_interval = 2.0 # Check every 2 seconds instead of every 1
|
|
758
|
+
last_check = time.time()
|
|
759
|
+
|
|
760
|
+
while self.running:
|
|
761
|
+
current_time = time.time()
|
|
762
|
+
|
|
763
|
+
# Only check processes every check_interval seconds
|
|
764
|
+
if current_time - last_check >= check_interval:
|
|
765
|
+
self._check_processes()
|
|
766
|
+
last_check = current_time
|
|
767
|
+
|
|
768
|
+
# Sleep for a shorter interval to maintain responsiveness
|
|
769
|
+
time.sleep(0.1)
|
|
770
|
+
|
|
771
|
+
def _check_processes(self):
|
|
772
|
+
"""Check all processes and handle crashes using more efficient monitoring."""
|
|
773
|
+
now = time.time()
|
|
774
|
+
|
|
775
|
+
# Use a more efficient approach - only check processes that need attention
|
|
776
|
+
for p_info in self.processes:
|
|
777
|
+
if p_info.proc is None:
|
|
778
|
+
continue
|
|
779
|
+
|
|
780
|
+
# Check if in backoff period
|
|
781
|
+
if p_info.status == "backoff":
|
|
782
|
+
if self.supervisor.should_restart_process(p_info):
|
|
783
|
+
logger.info(f"PHOENIX PROTOCOL: Respawning {p_info.name} after {p_info.backoff_seconds:.1f}s backoff...")
|
|
784
|
+
self._launch_process(p_info.artifact, existing=p_info)
|
|
785
|
+
continue
|
|
786
|
+
|
|
787
|
+
# Check if process has exited (non-blocking check)
|
|
788
|
+
try:
|
|
789
|
+
# For Windows, use psutil to check if process is still alive
|
|
790
|
+
if self.platform.is_windows():
|
|
791
|
+
import psutil
|
|
792
|
+
try:
|
|
793
|
+
proc = p_info.proc
|
|
794
|
+
# If proc is a psutil Process object
|
|
795
|
+
if hasattr(proc, 'is_running'):
|
|
796
|
+
if not proc.is_running():
|
|
797
|
+
# Process has died
|
|
798
|
+
ret = proc.status() if proc.status() != 'zombie' else 1
|
|
799
|
+
self._handle_process_exit(p_info, ret, now)
|
|
800
|
+
else:
|
|
801
|
+
# If proc is a subprocess.Popen object
|
|
802
|
+
ret = p_info.proc.poll()
|
|
803
|
+
if ret is not None:
|
|
804
|
+
self._handle_process_exit(p_info, ret, now)
|
|
805
|
+
except psutil.NoSuchProcess:
|
|
806
|
+
# Process definitely died
|
|
807
|
+
self._handle_process_exit(p_info, 1, now)
|
|
808
|
+
else:
|
|
809
|
+
# Unix-like systems - use poll
|
|
810
|
+
ret = p_info.proc.poll()
|
|
811
|
+
if ret is not None:
|
|
812
|
+
self._handle_process_exit(p_info, ret, now)
|
|
813
|
+
|
|
814
|
+
except Exception as e:
|
|
815
|
+
logger.warning("process_check_error", name=p_info.name, error=str(e))
|
|
816
|
+
# Assume process died if we can't check
|
|
817
|
+
self._handle_process_exit(p_info, 1, now)
|
|
818
|
+
|
|
819
|
+
def _handle_process_exit(self, p_info, ret, now):
|
|
820
|
+
"""Handle a process exit with proper cleanup."""
|
|
821
|
+
p_info.last_crash = now
|
|
822
|
+
|
|
823
|
+
if ret == 0:
|
|
824
|
+
logger.info(f"Process {p_info.name} completed successfully.")
|
|
825
|
+
p_info.status = "stopped"
|
|
826
|
+
|
|
827
|
+
# Publish module stop event to Kafka
|
|
828
|
+
if self.kafka_manager:
|
|
829
|
+
import asyncio
|
|
830
|
+
asyncio.create_task(
|
|
831
|
+
self.kafka_manager.publish_module_event('module_stopped', p_info.name, {
|
|
832
|
+
'exit_code': ret
|
|
833
|
+
})
|
|
834
|
+
)
|
|
835
|
+
return
|
|
836
|
+
|
|
837
|
+
logger.error(f"ALERT: Process {p_info.name} died with code {ret}!")
|
|
838
|
+
|
|
839
|
+
# Print stderr for debugging if available
|
|
840
|
+
try:
|
|
841
|
+
if hasattr(p_info.proc, 'stderr') and p_info.proc.stderr:
|
|
842
|
+
# Try to read any remaining stderr
|
|
843
|
+
try:
|
|
844
|
+
err_out = p_info.proc.stderr.read()
|
|
845
|
+
if err_out:
|
|
846
|
+
logger.error(f"{p_info.name} STDERR:\n{err_out}")
|
|
847
|
+
except:
|
|
848
|
+
pass # May fail if stream is closed
|
|
849
|
+
except:
|
|
850
|
+
pass
|
|
851
|
+
|
|
852
|
+
# Use supervisor to handle the crash
|
|
853
|
+
should_restart = self.supervisor.handle_process_crash(p_info, ret)
|
|
854
|
+
if should_restart:
|
|
855
|
+
logger.info(f"Respawning {p_info.name} after backoff period...")
|
|
856
|
+
else:
|
|
857
|
+
# Publish module failure event to Kafka
|
|
858
|
+
if self.kafka_manager:
|
|
859
|
+
import asyncio
|
|
860
|
+
asyncio.create_task(
|
|
861
|
+
self.kafka_manager.publish_module_event('module_failed', p_info.name, {
|
|
862
|
+
'exit_code': ret,
|
|
863
|
+
'restart_attempts': p_info.restarts
|
|
864
|
+
})
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
def get_status(self) -> Dict[str, Any]:
|
|
868
|
+
"""Get status of all processes including Kafka and Redis statistics."""
|
|
869
|
+
status = {
|
|
870
|
+
"running": self.running,
|
|
871
|
+
"redis_available": self._redis_available,
|
|
872
|
+
"kafka_available": self.kafka_manager is not None,
|
|
873
|
+
"processes": [
|
|
874
|
+
{
|
|
875
|
+
"name": p.name,
|
|
876
|
+
"status": p.status,
|
|
877
|
+
"restarts": p.restarts,
|
|
878
|
+
"pid": p.proc.pid if p.proc and hasattr(p.proc, 'pid') else None
|
|
879
|
+
}
|
|
880
|
+
for p in self.processes
|
|
881
|
+
]
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
# Add Redis statistics if available
|
|
885
|
+
if self._redis_available and self.memory:
|
|
886
|
+
try:
|
|
887
|
+
status["redis"] = self.memory.get_stats()
|
|
888
|
+
status["registered_modules"] = self.memory.get_registered_modules()
|
|
889
|
+
except Exception as e:
|
|
890
|
+
logger.warning("redis_stats_failed", error=str(e))
|
|
891
|
+
status["redis"] = {"error": str(e)}
|
|
892
|
+
|
|
893
|
+
# Add Kafka statistics if available
|
|
894
|
+
if self.kafka_manager:
|
|
895
|
+
try:
|
|
896
|
+
status["kafka"] = {
|
|
897
|
+
"bootstrap_servers": self.config.kafka_bootstrap_servers,
|
|
898
|
+
"topics": self.config.kafka_topics
|
|
899
|
+
}
|
|
900
|
+
except Exception as e:
|
|
901
|
+
logger.warning("kafka_stats_failed", error=str(e))
|
|
902
|
+
status["kafka"] = {"error": str(e)}
|
|
903
|
+
|
|
904
|
+
return status
|
|
905
|
+
|
|
906
|
+
def shutdown(self, timeout: float = 5.0):
|
|
907
|
+
"""Graceful shutdown with timeout."""
|
|
908
|
+
self.running = False
|
|
909
|
+
logger.info("Initiating graceful shutdown...")
|
|
910
|
+
|
|
911
|
+
# Unregister from Redis and close connections
|
|
912
|
+
if self._redis_available and self.memory:
|
|
913
|
+
try:
|
|
914
|
+
logger.info("Unregistering orchestrator from Redis...")
|
|
915
|
+
self.memory.unregister_module('orchestrator')
|
|
916
|
+
self.memory.close()
|
|
917
|
+
logger.info("Redis connections closed.")
|
|
918
|
+
except Exception as e:
|
|
919
|
+
logger.warning("redis_shutdown_error", error=str(e))
|
|
920
|
+
|
|
921
|
+
# Close Kafka connections
|
|
922
|
+
if self.kafka_manager:
|
|
923
|
+
try:
|
|
924
|
+
logger.info("Closing Kafka connections...")
|
|
925
|
+
import asyncio
|
|
926
|
+
loop = asyncio.new_event_loop()
|
|
927
|
+
asyncio.set_event_loop(loop)
|
|
928
|
+
loop.run_until_complete(self.kafka_manager.stop_producer())
|
|
929
|
+
logger.info("Kafka connections closed.")
|
|
930
|
+
except Exception as e:
|
|
931
|
+
logger.warning("kafka_shutdown_error", error=str(e))
|
|
932
|
+
|
|
933
|
+
# Terminate all processes with proper cleanup
|
|
934
|
+
for p_info in self.processes:
|
|
935
|
+
if p_info.proc is None:
|
|
936
|
+
continue
|
|
937
|
+
|
|
938
|
+
try:
|
|
939
|
+
logger.info(f"Terminating {p_info.name}...")
|
|
940
|
+
|
|
941
|
+
# First try graceful termination
|
|
942
|
+
if hasattr(p_info.proc, 'terminate'):
|
|
943
|
+
p_info.proc.terminate()
|
|
944
|
+
else:
|
|
945
|
+
# For psutil Process objects
|
|
946
|
+
try:
|
|
947
|
+
p_info.proc.terminate()
|
|
948
|
+
except:
|
|
949
|
+
pass
|
|
950
|
+
|
|
951
|
+
try:
|
|
952
|
+
# Wait for graceful termination
|
|
953
|
+
if hasattr(p_info.proc, 'wait'):
|
|
954
|
+
exit_code = p_info.proc.wait(timeout=timeout)
|
|
955
|
+
logger.info(f"{p_info.name} terminated gracefully with exit code {exit_code}.")
|
|
956
|
+
else:
|
|
957
|
+
# For psutil Process objects
|
|
958
|
+
p_info.proc.wait(timeout=timeout)
|
|
959
|
+
logger.info(f"{p_info.name} terminated gracefully.")
|
|
960
|
+
except subprocess.TimeoutExpired:
|
|
961
|
+
logger.warning(f"{p_info.name} didn't respond, forcing termination...")
|
|
962
|
+
try:
|
|
963
|
+
# Force kill the process
|
|
964
|
+
if hasattr(p_info.proc, 'kill'):
|
|
965
|
+
p_info.proc.kill()
|
|
966
|
+
else:
|
|
967
|
+
# For psutil Process objects
|
|
968
|
+
p_info.proc.kill()
|
|
969
|
+
# Wait briefly for the process to be cleaned up
|
|
970
|
+
if hasattr(p_info.proc, 'wait'):
|
|
971
|
+
p_info.proc.wait(timeout=1)
|
|
972
|
+
except:
|
|
973
|
+
# Process may have already exited
|
|
974
|
+
pass
|
|
975
|
+
|
|
976
|
+
except ProcessLookupError:
|
|
977
|
+
# Process already terminated
|
|
978
|
+
logger.info(f"{p_info.name} already terminated.")
|
|
979
|
+
except Exception as e:
|
|
980
|
+
logger.error("shutdown_error", name=p_info.name, error=str(e))
|
|
981
|
+
|
|
982
|
+
# Wait for the output monitoring thread to finish
|
|
983
|
+
if self._output_monitor_thread and self._output_monitor_thread.is_alive():
|
|
984
|
+
self._output_monitor_thread.join(timeout=2.0)
|
|
985
|
+
|
|
986
|
+
# Clean up PID files and temporary resources
|
|
987
|
+
self._cleanup_pid_files()
|
|
988
|
+
self._cleanup_temp_resources()
|
|
989
|
+
|
|
990
|
+
logger.info("System Shutdown Complete. All resources cleaned up.")
|
|
991
|
+
|
|
992
|
+
def _cleanup_pid_files(self):
|
|
993
|
+
"""Remove PID files created during execution."""
|
|
994
|
+
pid_dir = ".qyro"
|
|
995
|
+
if os.path.exists(pid_dir):
|
|
996
|
+
pid_file = os.path.join(pid_dir, "pids")
|
|
997
|
+
if os.path.exists(pid_file):
|
|
998
|
+
try:
|
|
999
|
+
os.remove(pid_file)
|
|
1000
|
+
logger.debug("pid_file_removed", path=pid_file)
|
|
1001
|
+
except Exception as e:
|
|
1002
|
+
logger.warning("pid_file_remove_failed", path=pid_file, error=str(e))
|
|
1003
|
+
|
|
1004
|
+
def _cleanup_temp_resources(self):
|
|
1005
|
+
"""Clean up temporary files and resources."""
|
|
1006
|
+
# Clean up any temporary files created during execution
|
|
1007
|
+
import glob
|
|
1008
|
+
temp_patterns = [
|
|
1009
|
+
"QYRO_*.tmp",
|
|
1010
|
+
"*.tmp",
|
|
1011
|
+
".QYRO_*.lock",
|
|
1012
|
+
"QYRO_generated/*",
|
|
1013
|
+
]
|
|
1014
|
+
|
|
1015
|
+
for pattern in temp_patterns:
|
|
1016
|
+
for temp_file in glob.glob(pattern):
|
|
1017
|
+
try:
|
|
1018
|
+
if os.path.isfile(temp_file):
|
|
1019
|
+
os.remove(temp_file)
|
|
1020
|
+
logger.debug("temp_file_removed", path=temp_file)
|
|
1021
|
+
elif os.path.isdir(temp_file):
|
|
1022
|
+
import shutil
|
|
1023
|
+
shutil.rmtree(temp_file)
|
|
1024
|
+
logger.debug("temp_dir_removed", path=temp_file)
|
|
1025
|
+
except Exception as e:
|
|
1026
|
+
logger.warning("temp_resource_remove_failed", path=temp_file, error=str(e))
|
|
1027
|
+
|
|
1028
|
+
# Clean up any temporary directories
|
|
1029
|
+
temp_dirs = [".QYRO_tmp", "__pycache__", "QYRO_generated"]
|
|
1030
|
+
for temp_dir in temp_dirs:
|
|
1031
|
+
if os.path.exists(temp_dir) and os.path.isdir(temp_dir):
|
|
1032
|
+
try:
|
|
1033
|
+
import shutil
|
|
1034
|
+
shutil.rmtree(temp_dir)
|
|
1035
|
+
logger.debug("temp_dir_removed", path=temp_dir)
|
|
1036
|
+
except Exception as e:
|
|
1037
|
+
logger.warning("temp_dir_remove_failed", path=temp_dir, error=str(e))
|
|
1038
|
+
|
|
1039
|
+
def broadcast_to_all(self, message: Dict[str, Any]) -> bool:
|
|
1040
|
+
"""
|
|
1041
|
+
Broadcast a message to all modules via Kafka.
|
|
1042
|
+
|
|
1043
|
+
Args:
|
|
1044
|
+
message: Dictionary containing the message to broadcast
|
|
1045
|
+
|
|
1046
|
+
Returns:
|
|
1047
|
+
True if broadcast was successful, False otherwise
|
|
1048
|
+
"""
|
|
1049
|
+
if not self.kafka_manager:
|
|
1050
|
+
logger.warning("broadcast_failed", reason="kafka_unavailable")
|
|
1051
|
+
return False
|
|
1052
|
+
|
|
1053
|
+
try:
|
|
1054
|
+
import uuid
|
|
1055
|
+
broadcast_id = str(uuid.uuid4())
|
|
1056
|
+
import asyncio
|
|
1057
|
+
loop = asyncio.new_event_loop()
|
|
1058
|
+
asyncio.set_event_loop(loop)
|
|
1059
|
+
loop.run_until_complete(self.kafka_manager.broadcast_message(message, broadcast_id))
|
|
1060
|
+
logger.debug("message_broadcast", message=message)
|
|
1061
|
+
return True
|
|
1062
|
+
except Exception as e:
|
|
1063
|
+
logger.error("broadcast_error", error=str(e))
|
|
1064
|
+
return False
|
|
1065
|
+
|
|
1066
|
+
def antigravity_protocol(self, error: Exception):
|
|
1067
|
+
"""Handle critical errors."""
|
|
1068
|
+
try:
|
|
1069
|
+
logger.error(f"CRITICAL ERROR: {error}")
|
|
1070
|
+
except UnicodeEncodeError:
|
|
1071
|
+
encoding = getattr(sys.stdout, 'encoding', 'utf-8') or 'utf-8'
|
|
1072
|
+
safe_err = str(error).encode(encoding, errors='replace').decode(encoding)
|
|
1073
|
+
logger.error(f"CRITICAL ERROR: {safe_err}")
|
|
1074
|
+
|
|
1075
|
+
logger.critical("antigravity_protocol", error=str(error))
|
|
1076
|
+
self.shutdown()
|
|
1077
|
+
sys.exit(1)
|
|
1078
|
+
|
|
1079
|
+
def _safe_print(self, message: str):
|
|
1080
|
+
"""Thread-safe and encoding-safe print."""
|
|
1081
|
+
try:
|
|
1082
|
+
print(message, flush=True)
|
|
1083
|
+
except UnicodeEncodeError:
|
|
1084
|
+
encoding = getattr(sys.stdout, 'encoding', 'utf-8') or 'utf-8'
|
|
1085
|
+
safe_msg = message.encode(encoding, errors='replace').decode(encoding)
|
|
1086
|
+
print(safe_msg, flush=True)
|
|
1087
|
+
|
|
1088
|
+
def _save_pid(self, pid: int):
|
|
1089
|
+
"""Save a PID to the active PIDs file."""
|
|
1090
|
+
pid_file = ".qyro/pids"
|
|
1091
|
+
os.makedirs(".qyro", exist_ok=True)
|
|
1092
|
+
with open(pid_file, "a") as f:
|
|
1093
|
+
f.write(f"{pid}\n")
|
|
1094
|
+
|
|
1095
|
+
def _cleanup_stale_processes(self):
|
|
1096
|
+
"""Kill stale processes using persisted PIDs from previous runs."""
|
|
1097
|
+
pid_file = ".qyro/pids"
|
|
1098
|
+
if not os.path.exists(pid_file):
|
|
1099
|
+
return
|
|
1100
|
+
|
|
1101
|
+
logger.info("Cleaning up stale processes from previous run...")
|
|
1102
|
+
try:
|
|
1103
|
+
with open(pid_file, "r") as f:
|
|
1104
|
+
pids = [line.strip() for line in f.readlines() if line.strip()]
|
|
1105
|
+
|
|
1106
|
+
for pid_str in pids:
|
|
1107
|
+
try:
|
|
1108
|
+
pid = int(pid_str)
|
|
1109
|
+
# Try to terminate
|
|
1110
|
+
self.platform.kill_process(pid, force=True)
|
|
1111
|
+
except:
|
|
1112
|
+
pass
|
|
1113
|
+
|
|
1114
|
+
# Clear the file
|
|
1115
|
+
os.remove(pid_file)
|
|
1116
|
+
except Exception as e:
|
|
1117
|
+
logger.warning("cleanup_stale_processes_failed", error=str(e))
|
|
1118
|
+
|
|
1119
|
+
def _prepare_frontend(self, framework: str):
|
|
1120
|
+
"""Prepare frontend environment with manual package.json to avoid CRA conflicts."""
|
|
1121
|
+
cwd = "QYRO_frontend"
|
|
1122
|
+
if not os.path.exists(cwd):
|
|
1123
|
+
os.makedirs(cwd)
|
|
1124
|
+
|
|
1125
|
+
# Manually create a minimal Vite + React + TS project structure
|
|
1126
|
+
pkg_json_path = os.path.join(cwd, "package.json")
|
|
1127
|
+
if not os.path.exists(pkg_json_path):
|
|
1128
|
+
logger.info(f"Frontend: Initializing {framework} project structure...")
|
|
1129
|
+
|
|
1130
|
+
pkg_json = {
|
|
1131
|
+
"name": "nexus-frontend",
|
|
1132
|
+
"private": True,
|
|
1133
|
+
"version": "0.0.0",
|
|
1134
|
+
"type": "module",
|
|
1135
|
+
"scripts": {
|
|
1136
|
+
"dev": "vite",
|
|
1137
|
+
"build": "tsc && vite build",
|
|
1138
|
+
"preview": "vite preview",
|
|
1139
|
+
"start": "vite"
|
|
1140
|
+
},
|
|
1141
|
+
"dependencies": {
|
|
1142
|
+
"react": "^18.2.0",
|
|
1143
|
+
"react-dom": "^18.2.0",
|
|
1144
|
+
"nexus-react": "file:../qyro/adapters/language_adapters/node_adapter",
|
|
1145
|
+
"lucide-react": "^0.300.0"
|
|
1146
|
+
},
|
|
1147
|
+
"devDependencies": {
|
|
1148
|
+
"@types/react": "^18.2.0",
|
|
1149
|
+
"@types/react-dom": "^18.2.0",
|
|
1150
|
+
"@vitejs/plugin-react": "^4.0.0",
|
|
1151
|
+
"typescript": "^5.0.0",
|
|
1152
|
+
"vite": "^4.3.0"
|
|
1153
|
+
}
|
|
1154
|
+
}
|
|
1155
|
+
import json
|
|
1156
|
+
with open(pkg_json_path, 'w') as f:
|
|
1157
|
+
json.dump(pkg_json, f, indent=2)
|
|
1158
|
+
|
|
1159
|
+
# Create vite.config.ts
|
|
1160
|
+
with open(os.path.join(cwd, "vite.config.ts"), 'w') as f:
|
|
1161
|
+
f.write("""
|
|
1162
|
+
import { defineConfig } from 'vite'
|
|
1163
|
+
import react from '@vitejs/plugin-react'
|
|
1164
|
+
export default defineConfig({
|
|
1165
|
+
plugins: [react()],
|
|
1166
|
+
server: { port: 3000 }
|
|
1167
|
+
})
|
|
1168
|
+
""")
|
|
1169
|
+
|
|
1170
|
+
# Create index.html
|
|
1171
|
+
with open(os.path.join(cwd, "index.html"), 'w') as f:
|
|
1172
|
+
f.write("""
|
|
1173
|
+
<!DOCTYPE html>
|
|
1174
|
+
<html lang="en">
|
|
1175
|
+
<head>
|
|
1176
|
+
<meta charset="UTF-8" />
|
|
1177
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
1178
|
+
<title>Nexus UI</title>
|
|
1179
|
+
</head>
|
|
1180
|
+
<body>
|
|
1181
|
+
<div id="root"></div>
|
|
1182
|
+
<script type="module" src="/src/main.tsx"></script>
|
|
1183
|
+
</body>
|
|
1184
|
+
</html>
|
|
1185
|
+
""")
|
|
1186
|
+
|
|
1187
|
+
# Create src/main.tsx
|
|
1188
|
+
os.makedirs(os.path.join(cwd, "src"), exist_ok=True)
|
|
1189
|
+
with open(os.path.join(cwd, "src", "main.tsx"), 'w') as f:
|
|
1190
|
+
f.write("""
|
|
1191
|
+
import React from 'react'
|
|
1192
|
+
import ReactDOM from 'react-dom/client'
|
|
1193
|
+
import NexusComponent from './NexusComponent'
|
|
1194
|
+
import './index.css'
|
|
1195
|
+
|
|
1196
|
+
ReactDOM.createRoot(document.getElementById('root')!).render(
|
|
1197
|
+
<React.StrictMode>
|
|
1198
|
+
<NexusComponent />
|
|
1199
|
+
</React.StrictMode>,
|
|
1200
|
+
)
|
|
1201
|
+
""")
|
|
1202
|
+
|
|
1203
|
+
# Create empty index.css
|
|
1204
|
+
with open(os.path.join(cwd, "src", "index.css"), 'w') as f:
|
|
1205
|
+
f.write("body { margin: 0; font-family: sans-serif; background: #0f172a; color: white; }")
|
|
1206
|
+
|
|
1207
|
+
# Always check for node_modules
|
|
1208
|
+
if not os.path.exists(os.path.join(cwd, "node_modules")):
|
|
1209
|
+
logger.info("Frontend: Installing dependencies... (First time only)")
|
|
1210
|
+
subprocess.run(["npm", "install"], cwd=cwd, shell=True)
|
|
1211
|
+
|
|
1212
|
+
def _print_banner(self):
|
|
1213
|
+
banner = r"""
|
|
1214
|
+
_ _ ________ __ _______ _____
|
|
1215
|
+
| \ | | ____\ \ / / |__ __||_ _|
|
|
1216
|
+
| \| | |__ \ V / | | | |
|
|
1217
|
+
| . ` | __| > < | | | |
|
|
1218
|
+
| |\ | |____ / . \ | | _| |_
|
|
1219
|
+
|_| \_|______/_/ \_\ |_| |_____|
|
|
1220
|
+
"""
|
|
1221
|
+
print(f"{Fore.GREEN}{banner}{Style.RESET_ALL}", flush=True)
|
|
1222
|
+
print(f"{Fore.CYAN} Polyglot Runtime v2.0 - NBP v3 Protocol{Style.RESET_ALL}", flush=True)
|
|
1223
|
+
print(flush=True)
|