lumen-app 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. lumen_app/__init__.py +7 -0
  2. lumen_app/core/__init__.py +0 -0
  3. lumen_app/core/config.py +661 -0
  4. lumen_app/core/installer.py +274 -0
  5. lumen_app/core/loader.py +45 -0
  6. lumen_app/core/router.py +87 -0
  7. lumen_app/core/server.py +389 -0
  8. lumen_app/core/service.py +49 -0
  9. lumen_app/core/tests/__init__.py +1 -0
  10. lumen_app/core/tests/test_core_integration.py +561 -0
  11. lumen_app/core/tests/test_env_checker.py +487 -0
  12. lumen_app/proto/README.md +12 -0
  13. lumen_app/proto/ml_service.proto +88 -0
  14. lumen_app/proto/ml_service_pb2.py +66 -0
  15. lumen_app/proto/ml_service_pb2.pyi +136 -0
  16. lumen_app/proto/ml_service_pb2_grpc.py +251 -0
  17. lumen_app/server.py +362 -0
  18. lumen_app/utils/env_checker.py +752 -0
  19. lumen_app/utils/installation/__init__.py +25 -0
  20. lumen_app/utils/installation/env_manager.py +152 -0
  21. lumen_app/utils/installation/micromamba_installer.py +459 -0
  22. lumen_app/utils/installation/package_installer.py +149 -0
  23. lumen_app/utils/installation/verifier.py +95 -0
  24. lumen_app/utils/logger.py +181 -0
  25. lumen_app/utils/mamba/cuda.yaml +12 -0
  26. lumen_app/utils/mamba/default.yaml +6 -0
  27. lumen_app/utils/mamba/openvino.yaml +7 -0
  28. lumen_app/utils/mamba/tensorrt.yaml +13 -0
  29. lumen_app/utils/package_resolver.py +309 -0
  30. lumen_app/utils/preset_registry.py +219 -0
  31. lumen_app/web/__init__.py +3 -0
  32. lumen_app/web/api/__init__.py +1 -0
  33. lumen_app/web/api/config.py +229 -0
  34. lumen_app/web/api/hardware.py +201 -0
  35. lumen_app/web/api/install.py +608 -0
  36. lumen_app/web/api/server.py +253 -0
  37. lumen_app/web/core/__init__.py +1 -0
  38. lumen_app/web/core/server_manager.py +348 -0
  39. lumen_app/web/core/state.py +264 -0
  40. lumen_app/web/main.py +145 -0
  41. lumen_app/web/models/__init__.py +28 -0
  42. lumen_app/web/models/config.py +63 -0
  43. lumen_app/web/models/hardware.py +64 -0
  44. lumen_app/web/models/install.py +134 -0
  45. lumen_app/web/models/server.py +95 -0
  46. lumen_app/web/static/assets/index-CGuhGHC9.css +1 -0
  47. lumen_app/web/static/assets/index-DN6HmxWS.js +56 -0
  48. lumen_app/web/static/index.html +14 -0
  49. lumen_app/web/static/vite.svg +1 -0
  50. lumen_app/web/websockets/__init__.py +1 -0
  51. lumen_app/web/websockets/logs.py +159 -0
  52. lumen_app-0.4.2.dist-info/METADATA +23 -0
  53. lumen_app-0.4.2.dist-info/RECORD +56 -0
  54. lumen_app-0.4.2.dist-info/WHEEL +5 -0
  55. lumen_app-0.4.2.dist-info/entry_points.txt +3 -0
  56. lumen_app-0.4.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,253 @@
1
+ """Server management API endpoints."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from fastapi import APIRouter, HTTPException
6
+
7
+ from lumen_app.utils.logger import get_logger
8
+ from lumen_app.web.core.state import app_state
9
+ from lumen_app.web.models.server import (
10
+ ServerLogs,
11
+ ServerRestartRequest,
12
+ ServerStartRequest,
13
+ ServerStatus,
14
+ ServerStopRequest,
15
+ )
16
+
17
+ logger = get_logger("lumen.web.api.server")
18
+ router = APIRouter()
19
+
20
+
21
+ @router.get("/status", response_model=ServerStatus)
22
+ async def get_server_status():
23
+ """
24
+ Get the current ML server status.
25
+
26
+ Returns detailed information about the running server including:
27
+ - Running state and PID
28
+ - Port and host configuration
29
+ - Uptime in seconds
30
+ - Health status
31
+ - Configuration path
32
+ """
33
+ manager = app_state.server_manager
34
+
35
+ # Get basic status
36
+ running = manager.is_running
37
+ pid = manager.pid
38
+ uptime = manager.uptime_seconds
39
+
40
+ # Perform health check if running
41
+ health = "unknown"
42
+ if running:
43
+ health = await manager.health_check()
44
+
45
+ return ServerStatus(
46
+ running=running,
47
+ pid=pid,
48
+ port=manager.port or 50051,
49
+ host="0.0.0.0",
50
+ uptime_seconds=uptime,
51
+ service_name="lumen-ai", # TODO: Get from config
52
+ config_path=manager.config_path,
53
+ environment="lumen_env", # TODO: Get from config
54
+ health=health,
55
+ last_error=None, # TODO: Track last error
56
+ )
57
+
58
+
59
+ @router.post("/start", response_model=ServerStatus)
60
+ async def start_server(request: ServerStartRequest):
61
+ """
62
+ Start the ML server with specified configuration.
63
+
64
+ Args:
65
+ request: Server start configuration including:
66
+ - config_path: Path to the Lumen YAML configuration
67
+ - port: Optional port override
68
+ - host: Host address (currently unused, always 0.0.0.0)
69
+ - environment: Conda environment name
70
+
71
+ Returns:
72
+ Current server status after startup
73
+
74
+ Raises:
75
+ HTTPException 400: If server is already running
76
+ HTTPException 404: If config file not found
77
+ HTTPException 500: If server fails to start
78
+ """
79
+ logger.info(f"Starting ML server with config: {request.config_path}")
80
+
81
+ manager = app_state.server_manager
82
+
83
+ # Check if already running
84
+ if manager.is_running:
85
+ raise HTTPException(
86
+ status_code=400,
87
+ detail="Server is already running. Stop it first or use restart.",
88
+ )
89
+
90
+ try:
91
+ # Start the server
92
+ success = await manager.start(
93
+ config_path=request.config_path,
94
+ port=request.port,
95
+ log_level="INFO", # TODO: Make configurable
96
+ environment=request.environment,
97
+ )
98
+
99
+ if not success:
100
+ raise HTTPException(
101
+ status_code=500,
102
+ detail="Server failed to start. Check logs for details.",
103
+ )
104
+
105
+ logger.info("✓ ML server started successfully")
106
+
107
+ # Return current status
108
+ return await get_server_status()
109
+
110
+ except FileNotFoundError as e:
111
+ logger.error(f"Config file not found: {e}")
112
+ raise HTTPException(status_code=404, detail=str(e))
113
+
114
+ except RuntimeError as e:
115
+ logger.error(f"Runtime error starting server: {e}")
116
+ raise HTTPException(status_code=400, detail=str(e))
117
+
118
+ except Exception as e:
119
+ logger.error(f"Unexpected error starting server: {e}", exc_info=True)
120
+ raise HTTPException(status_code=500, detail=f"Failed to start server: {str(e)}")
121
+
122
+
123
+ @router.post("/stop", response_model=ServerStatus)
124
+ async def stop_server(request: ServerStopRequest):
125
+ """
126
+ Stop the running ML server.
127
+
128
+ Args:
129
+ request: Stop configuration including:
130
+ - force: If True, force kill immediately without graceful shutdown
131
+ - timeout: Maximum seconds to wait for graceful shutdown
132
+
133
+ Returns:
134
+ Current server status after shutdown
135
+
136
+ Raises:
137
+ HTTPException 400: If server is not running
138
+ HTTPException 500: If server fails to stop
139
+ """
140
+ logger.info("Stopping ML server")
141
+
142
+ manager = app_state.server_manager
143
+
144
+ # Check if running
145
+ if not manager.is_running:
146
+ raise HTTPException(
147
+ status_code=400, detail="Server is not running. Nothing to stop."
148
+ )
149
+
150
+ try:
151
+ # Stop the server
152
+ success = await manager.stop(timeout=request.timeout, force=request.force)
153
+
154
+ if not success:
155
+ raise HTTPException(
156
+ status_code=500,
157
+ detail="Server failed to stop gracefully. Check logs for details.",
158
+ )
159
+
160
+ logger.info("✓ ML server stopped successfully")
161
+
162
+ # Return current status
163
+ return await get_server_status()
164
+
165
+ except Exception as e:
166
+ logger.error(f"Error stopping server: {e}", exc_info=True)
167
+ raise HTTPException(status_code=500, detail=f"Failed to stop server: {str(e)}")
168
+
169
+
170
+ @router.post("/restart", response_model=ServerStatus)
171
+ async def restart_server(request: ServerRestartRequest):
172
+ """
173
+ Restart the ML server with optional new configuration.
174
+
175
+ This is equivalent to stop + start, but handles the sequencing automatically.
176
+
177
+ Args:
178
+ request: Restart configuration including:
179
+ - config_path: Optional new config path (uses existing if not provided)
180
+ - port: Optional new port (uses existing if not provided)
181
+ - host: Host address (currently unused)
182
+ - environment: Environment name
183
+ - force: If True, force kill during stop
184
+ - timeout: Maximum seconds to wait for graceful shutdown
185
+
186
+ Returns:
187
+ Current server status after restart
188
+
189
+ Raises:
190
+ HTTPException 400: If no config path available
191
+ HTTPException 500: If restart fails
192
+ """
193
+ logger.info("Restarting ML server")
194
+
195
+ manager = app_state.server_manager
196
+
197
+ try:
198
+ # Restart the server
199
+ success = await manager.restart(
200
+ config_path=request.config_path,
201
+ port=request.port,
202
+ log_level="INFO", # TODO: Make configurable
203
+ timeout=request.timeout,
204
+ )
205
+
206
+ if not success:
207
+ raise HTTPException(
208
+ status_code=500,
209
+ detail="Server failed to restart. Check logs for details.",
210
+ )
211
+
212
+ logger.info("✓ ML server restarted successfully")
213
+
214
+ # Return current status
215
+ return await get_server_status()
216
+
217
+ except ValueError as e:
218
+ logger.error(f"Invalid restart configuration: {e}")
219
+ raise HTTPException(status_code=400, detail=str(e))
220
+
221
+ except Exception as e:
222
+ logger.error(f"Error restarting server: {e}", exc_info=True)
223
+ raise HTTPException(
224
+ status_code=500, detail=f"Failed to restart server: {str(e)}"
225
+ )
226
+
227
+
228
+ @router.get("/logs", response_model=ServerLogs)
229
+ async def get_server_logs(lines: int = 100, since: float | None = None):
230
+ """
231
+ Get recent server logs.
232
+
233
+ Args:
234
+ lines: Number of recent log lines to return (default: 100, 0 for all)
235
+ since: Unix timestamp to filter logs (currently unused)
236
+
237
+ Returns:
238
+ Server logs with metadata
239
+
240
+ Note:
241
+ The 'since' parameter is reserved for future filtering implementation.
242
+ Currently returns the most recent N lines from the log buffer.
243
+ """
244
+ manager = app_state.server_manager
245
+
246
+ # Get logs from manager
247
+ log_lines = manager.get_logs(tail=lines)
248
+
249
+ return ServerLogs(
250
+ logs=log_lines,
251
+ total_lines=len(log_lines),
252
+ new_lines=0, # TODO: Implement incremental log fetching
253
+ )
@@ -0,0 +1 @@
1
+ """Core modules for Lumen Web API."""
@@ -0,0 +1,348 @@
1
+ """
2
+ Server Manager for gRPC ML Server Process.
3
+
4
+ This module provides lifecycle management for the gRPC ML server subprocess,
5
+ including starting, stopping, health checking, and log capture.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import time
12
+ from collections import deque
13
+ from pathlib import Path
14
+ from typing import Literal
15
+
16
+ from lumen_app.utils.logger import get_logger
17
+
18
+ logger = get_logger("lumen.web.server_manager")
19
+
20
+
21
+ class ServerManager:
22
+ """
23
+ Manages the gRPC ML server as a subprocess.
24
+
25
+ This class handles:
26
+ - Starting the server with proper configuration
27
+ - Monitoring server health and status
28
+ - Capturing and buffering logs
29
+ - Graceful shutdown with timeout handling
30
+ """
31
+
32
+ def __init__(self, max_log_lines: int = 1000):
33
+ """
34
+ Initialize the server manager.
35
+
36
+ Args:
37
+ max_log_lines: Maximum number of log lines to keep in memory
38
+ """
39
+ self.process: asyncio.subprocess.Process | None = None
40
+ self.config_path: str | None = None
41
+ self.port: int | None = None
42
+ self.start_time: float | None = None
43
+ self.log_buffer: deque[str] = deque(maxlen=max_log_lines)
44
+ self._log_task: asyncio.Task | None = None
45
+ self._shutdown_event = asyncio.Event()
46
+
47
+ @property
48
+ def is_running(self) -> bool:
49
+ """Check if the server process is running."""
50
+ if self.process is None:
51
+ return False
52
+
53
+ # Check if process is still alive
54
+ return self.process.returncode is None
55
+
56
+ @property
57
+ def pid(self) -> int | None:
58
+ """Get the server process PID."""
59
+ if self.process:
60
+ return self.process.pid
61
+ return None
62
+
63
+ @property
64
+ def uptime_seconds(self) -> float | None:
65
+ """Get server uptime in seconds."""
66
+ if self.start_time and self.is_running:
67
+ return time.time() - self.start_time
68
+ return None
69
+
70
+ async def start(
71
+ self,
72
+ config_path: str,
73
+ port: int | None = None,
74
+ log_level: str = "INFO",
75
+ environment: str | None = None,
76
+ ) -> bool:
77
+ """
78
+ Start the gRPC ML server as a subprocess.
79
+
80
+ Args:
81
+ config_path: Path to the Lumen configuration YAML file
82
+ port: Port number (overrides config file)
83
+ log_level: Logging level for the server
84
+ environment: Conda environment name (if using conda)
85
+
86
+ Returns:
87
+ True if server started successfully, False otherwise
88
+
89
+ Raises:
90
+ RuntimeError: If server is already running or config is invalid
91
+ """
92
+ if self.is_running:
93
+ raise RuntimeError("Server is already running")
94
+
95
+ # Validate config path
96
+ config_file = Path(config_path).expanduser()
97
+ if not config_file.exists():
98
+ raise FileNotFoundError(f"Config file not found: {config_path}")
99
+
100
+ logger.info(f"Starting ML server with config: {config_path}")
101
+
102
+ # Build command
103
+ # We'll use python -m to run the server module
104
+ cmd = [
105
+ "python",
106
+ "-m",
107
+ "lumen_app.core.server",
108
+ "--config",
109
+ str(config_file),
110
+ "--log-level",
111
+ log_level,
112
+ ]
113
+
114
+ if port:
115
+ cmd.extend(["--port", str(port)])
116
+
117
+ logger.debug(f"Server command: {' '.join(cmd)}")
118
+
119
+ try:
120
+ # Start the subprocess
121
+ self.process = await asyncio.create_subprocess_exec(
122
+ *cmd,
123
+ stdout=asyncio.subprocess.PIPE,
124
+ stderr=asyncio.subprocess.STDOUT, # Merge stderr into stdout
125
+ stdin=asyncio.subprocess.DEVNULL,
126
+ )
127
+
128
+ self.config_path = config_path
129
+ self.port = port
130
+ self.start_time = time.time()
131
+
132
+ logger.info(f"Server process started with PID: {self.process.pid}")
133
+
134
+ # Start log capture task
135
+ self._log_task = asyncio.create_task(self._capture_logs())
136
+
137
+ # Wait for server to be ready (with timeout)
138
+ ready = await self._wait_for_ready(timeout=30.0)
139
+
140
+ if not ready:
141
+ logger.error("Server failed to start within timeout")
142
+ await self.stop(force=True)
143
+ return False
144
+
145
+ logger.info("✓ ML server is ready")
146
+ return True
147
+
148
+ except Exception as e:
149
+ logger.error(f"Failed to start server: {e}", exc_info=True)
150
+ if self.process:
151
+ await self.stop(force=True)
152
+ return False
153
+
154
+ async def stop(self, timeout: float = 30.0, force: bool = False) -> bool:
155
+ """
156
+ Stop the gRPC ML server.
157
+
158
+ Args:
159
+ timeout: Maximum time to wait for graceful shutdown
160
+ force: If True, skip graceful shutdown and kill immediately
161
+
162
+ Returns:
163
+ True if server stopped successfully, False otherwise
164
+ """
165
+ if not self.process:
166
+ logger.warning("No server process to stop")
167
+ return True
168
+
169
+ if not self.is_running:
170
+ logger.info("Server process already stopped")
171
+ self._cleanup()
172
+ return True
173
+
174
+ logger.info(f"Stopping ML server (PID: {self.process.pid})")
175
+
176
+ try:
177
+ if force:
178
+ # Force kill immediately
179
+ logger.warning("Force killing server process")
180
+ self.process.kill()
181
+ else:
182
+ # Try graceful shutdown first
183
+ logger.info("Sending SIGTERM for graceful shutdown")
184
+ self.process.terminate()
185
+
186
+ # Wait for graceful shutdown with timeout
187
+ try:
188
+ await asyncio.wait_for(self.process.wait(), timeout=timeout)
189
+ logger.info("Server stopped gracefully")
190
+ except asyncio.TimeoutError:
191
+ logger.warning(
192
+ f"Server did not stop within {timeout}s, force killing"
193
+ )
194
+ self.process.kill()
195
+ await self.process.wait()
196
+
197
+ self._cleanup()
198
+ logger.info("✓ Server stopped")
199
+ return True
200
+
201
+ except Exception as e:
202
+ logger.error(f"Error stopping server: {e}", exc_info=True)
203
+ return False
204
+
205
+ async def restart(
206
+ self,
207
+ config_path: str | None = None,
208
+ port: int | None = None,
209
+ log_level: str = "INFO",
210
+ timeout: float = 30.0,
211
+ ) -> bool:
212
+ """
213
+ Restart the server.
214
+
215
+ Args:
216
+ config_path: New config path (or use existing)
217
+ port: New port (or use existing)
218
+ log_level: Logging level
219
+ timeout: Timeout for stop operation
220
+
221
+ Returns:
222
+ True if restart successful, False otherwise
223
+ """
224
+ logger.info("Restarting ML server")
225
+
226
+ # Use existing config if not provided
227
+ config_path = config_path or self.config_path
228
+ port = port or self.port
229
+
230
+ if not config_path:
231
+ raise ValueError("No config path specified for restart")
232
+
233
+ # Stop if running
234
+ if self.is_running:
235
+ success = await self.stop(timeout=timeout)
236
+ if not success:
237
+ logger.error("Failed to stop server for restart")
238
+ return False
239
+
240
+ # Wait a bit for resources to be freed
241
+ await asyncio.sleep(1.0)
242
+
243
+ # Start with new/existing config
244
+ return await self.start(config_path=config_path, port=port, log_level=log_level)
245
+
246
+ async def health_check(self) -> Literal["healthy", "unhealthy", "unknown"]:
247
+ """
248
+ Perform a health check on the server.
249
+
250
+ Returns:
251
+ "healthy" if server is running and responsive
252
+ "unhealthy" if server process exists but not responsive
253
+ "unknown" if server is not running
254
+ """
255
+ if not self.is_running:
256
+ return "unknown"
257
+
258
+ # TODO: Implement actual gRPC health check
259
+ # For now, just check if process is running
260
+ return "healthy"
261
+
262
+ def get_logs(self, tail: int = 100) -> list[str]:
263
+ """
264
+ Get recent log lines.
265
+
266
+ Args:
267
+ tail: Number of recent lines to return (0 for all)
268
+
269
+ Returns:
270
+ List of log lines
271
+ """
272
+ if tail > 0:
273
+ # Return last N lines
274
+ return list(self.log_buffer)[-tail:]
275
+ else:
276
+ # Return all lines
277
+ return list(self.log_buffer)
278
+
279
+ async def _capture_logs(self):
280
+ """Background task to capture server stdout/stderr."""
281
+ if not self.process or not self.process.stdout:
282
+ return
283
+
284
+ logger.debug("Starting log capture")
285
+
286
+ try:
287
+ async for line in self.process.stdout:
288
+ try:
289
+ log_line = line.decode("utf-8").rstrip()
290
+ self.log_buffer.append(log_line)
291
+
292
+ # Also log to our logger for debugging
293
+ logger.debug(f"[Server] {log_line}")
294
+
295
+ except Exception as e:
296
+ logger.warning(f"Error decoding log line: {e}")
297
+
298
+ except asyncio.CancelledError:
299
+ logger.debug("Log capture cancelled")
300
+ except Exception as e:
301
+ logger.error(f"Error capturing logs: {e}", exc_info=True)
302
+
303
+ async def _wait_for_ready(self, timeout: float = 30.0) -> bool:
304
+ """
305
+ Wait for the server to be ready.
306
+
307
+ Args:
308
+ timeout: Maximum time to wait
309
+
310
+ Returns:
311
+ True if server is ready, False if timeout or error
312
+ """
313
+ logger.info(f"Waiting for server to be ready (timeout: {timeout}s)...")
314
+
315
+ start_time = time.time()
316
+
317
+ while time.time() - start_time < timeout:
318
+ # Check if process died
319
+ if not self.is_running:
320
+ logger.error("Server process died during startup")
321
+ return False
322
+
323
+ # Look for startup success indicators in logs
324
+ recent_logs = self.get_logs(tail=10)
325
+ for log_line in recent_logs:
326
+ if (
327
+ "listening on" in log_line.lower()
328
+ or "server running" in log_line.lower()
329
+ ):
330
+ logger.info("Server startup detected in logs")
331
+ # Give it a moment to fully initialize
332
+ await asyncio.sleep(1.0)
333
+ return True
334
+
335
+ # Wait a bit before checking again
336
+ await asyncio.sleep(0.5)
337
+
338
+ logger.error(f"Server did not start within {timeout}s")
339
+ return False
340
+
341
+ def _cleanup(self):
342
+ """Clean up server state after shutdown."""
343
+ if self._log_task and not self._log_task.done():
344
+ self._log_task.cancel()
345
+
346
+ self.process = None
347
+ self.start_time = None
348
+ self._log_task = None