mcli-framework 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcli-framework might be problematic. Click here for more details.

Files changed (186) hide show
  1. mcli/app/chat_cmd.py +42 -0
  2. mcli/app/commands_cmd.py +226 -0
  3. mcli/app/completion_cmd.py +216 -0
  4. mcli/app/completion_helpers.py +288 -0
  5. mcli/app/cron_test_cmd.py +697 -0
  6. mcli/app/logs_cmd.py +419 -0
  7. mcli/app/main.py +492 -0
  8. mcli/app/model/model.py +1060 -0
  9. mcli/app/model_cmd.py +227 -0
  10. mcli/app/redis_cmd.py +269 -0
  11. mcli/app/video/video.py +1114 -0
  12. mcli/app/visual_cmd.py +303 -0
  13. mcli/chat/chat.py +2409 -0
  14. mcli/chat/command_rag.py +514 -0
  15. mcli/chat/enhanced_chat.py +652 -0
  16. mcli/chat/system_controller.py +1010 -0
  17. mcli/chat/system_integration.py +1016 -0
  18. mcli/cli.py +25 -0
  19. mcli/config.toml +20 -0
  20. mcli/lib/api/api.py +586 -0
  21. mcli/lib/api/daemon_client.py +203 -0
  22. mcli/lib/api/daemon_client_local.py +44 -0
  23. mcli/lib/api/daemon_decorator.py +217 -0
  24. mcli/lib/api/mcli_decorators.py +1032 -0
  25. mcli/lib/auth/auth.py +85 -0
  26. mcli/lib/auth/aws_manager.py +85 -0
  27. mcli/lib/auth/azure_manager.py +91 -0
  28. mcli/lib/auth/credential_manager.py +192 -0
  29. mcli/lib/auth/gcp_manager.py +93 -0
  30. mcli/lib/auth/key_manager.py +117 -0
  31. mcli/lib/auth/mcli_manager.py +93 -0
  32. mcli/lib/auth/token_manager.py +75 -0
  33. mcli/lib/auth/token_util.py +1011 -0
  34. mcli/lib/config/config.py +47 -0
  35. mcli/lib/discovery/__init__.py +1 -0
  36. mcli/lib/discovery/command_discovery.py +274 -0
  37. mcli/lib/erd/erd.py +1345 -0
  38. mcli/lib/erd/generate_graph.py +453 -0
  39. mcli/lib/files/files.py +76 -0
  40. mcli/lib/fs/fs.py +109 -0
  41. mcli/lib/lib.py +29 -0
  42. mcli/lib/logger/logger.py +611 -0
  43. mcli/lib/performance/optimizer.py +409 -0
  44. mcli/lib/performance/rust_bridge.py +502 -0
  45. mcli/lib/performance/uvloop_config.py +154 -0
  46. mcli/lib/pickles/pickles.py +50 -0
  47. mcli/lib/search/cached_vectorizer.py +479 -0
  48. mcli/lib/services/data_pipeline.py +460 -0
  49. mcli/lib/services/lsh_client.py +441 -0
  50. mcli/lib/services/redis_service.py +387 -0
  51. mcli/lib/shell/shell.py +137 -0
  52. mcli/lib/toml/toml.py +33 -0
  53. mcli/lib/ui/styling.py +47 -0
  54. mcli/lib/ui/visual_effects.py +634 -0
  55. mcli/lib/watcher/watcher.py +185 -0
  56. mcli/ml/api/app.py +215 -0
  57. mcli/ml/api/middleware.py +224 -0
  58. mcli/ml/api/routers/admin_router.py +12 -0
  59. mcli/ml/api/routers/auth_router.py +244 -0
  60. mcli/ml/api/routers/backtest_router.py +12 -0
  61. mcli/ml/api/routers/data_router.py +12 -0
  62. mcli/ml/api/routers/model_router.py +302 -0
  63. mcli/ml/api/routers/monitoring_router.py +12 -0
  64. mcli/ml/api/routers/portfolio_router.py +12 -0
  65. mcli/ml/api/routers/prediction_router.py +267 -0
  66. mcli/ml/api/routers/trade_router.py +12 -0
  67. mcli/ml/api/routers/websocket_router.py +76 -0
  68. mcli/ml/api/schemas.py +64 -0
  69. mcli/ml/auth/auth_manager.py +425 -0
  70. mcli/ml/auth/models.py +154 -0
  71. mcli/ml/auth/permissions.py +302 -0
  72. mcli/ml/backtesting/backtest_engine.py +502 -0
  73. mcli/ml/backtesting/performance_metrics.py +393 -0
  74. mcli/ml/cache.py +400 -0
  75. mcli/ml/cli/main.py +398 -0
  76. mcli/ml/config/settings.py +394 -0
  77. mcli/ml/configs/dvc_config.py +230 -0
  78. mcli/ml/configs/mlflow_config.py +131 -0
  79. mcli/ml/configs/mlops_manager.py +293 -0
  80. mcli/ml/dashboard/app.py +532 -0
  81. mcli/ml/dashboard/app_integrated.py +738 -0
  82. mcli/ml/dashboard/app_supabase.py +560 -0
  83. mcli/ml/dashboard/app_training.py +615 -0
  84. mcli/ml/dashboard/cli.py +51 -0
  85. mcli/ml/data_ingestion/api_connectors.py +501 -0
  86. mcli/ml/data_ingestion/data_pipeline.py +567 -0
  87. mcli/ml/data_ingestion/stream_processor.py +512 -0
  88. mcli/ml/database/migrations/env.py +94 -0
  89. mcli/ml/database/models.py +667 -0
  90. mcli/ml/database/session.py +200 -0
  91. mcli/ml/experimentation/ab_testing.py +845 -0
  92. mcli/ml/features/ensemble_features.py +607 -0
  93. mcli/ml/features/political_features.py +676 -0
  94. mcli/ml/features/recommendation_engine.py +809 -0
  95. mcli/ml/features/stock_features.py +573 -0
  96. mcli/ml/features/test_feature_engineering.py +346 -0
  97. mcli/ml/logging.py +85 -0
  98. mcli/ml/mlops/data_versioning.py +518 -0
  99. mcli/ml/mlops/experiment_tracker.py +377 -0
  100. mcli/ml/mlops/model_serving.py +481 -0
  101. mcli/ml/mlops/pipeline_orchestrator.py +614 -0
  102. mcli/ml/models/base_models.py +324 -0
  103. mcli/ml/models/ensemble_models.py +675 -0
  104. mcli/ml/models/recommendation_models.py +474 -0
  105. mcli/ml/models/test_models.py +487 -0
  106. mcli/ml/monitoring/drift_detection.py +676 -0
  107. mcli/ml/monitoring/metrics.py +45 -0
  108. mcli/ml/optimization/portfolio_optimizer.py +834 -0
  109. mcli/ml/preprocessing/data_cleaners.py +451 -0
  110. mcli/ml/preprocessing/feature_extractors.py +491 -0
  111. mcli/ml/preprocessing/ml_pipeline.py +382 -0
  112. mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
  113. mcli/ml/preprocessing/test_preprocessing.py +294 -0
  114. mcli/ml/scripts/populate_sample_data.py +200 -0
  115. mcli/ml/tasks.py +400 -0
  116. mcli/ml/tests/test_integration.py +429 -0
  117. mcli/ml/tests/test_training_dashboard.py +387 -0
  118. mcli/public/oi/oi.py +15 -0
  119. mcli/public/public.py +4 -0
  120. mcli/self/self_cmd.py +1246 -0
  121. mcli/workflow/daemon/api_daemon.py +800 -0
  122. mcli/workflow/daemon/async_command_database.py +681 -0
  123. mcli/workflow/daemon/async_process_manager.py +591 -0
  124. mcli/workflow/daemon/client.py +530 -0
  125. mcli/workflow/daemon/commands.py +1196 -0
  126. mcli/workflow/daemon/daemon.py +905 -0
  127. mcli/workflow/daemon/daemon_api.py +59 -0
  128. mcli/workflow/daemon/enhanced_daemon.py +571 -0
  129. mcli/workflow/daemon/process_cli.py +244 -0
  130. mcli/workflow/daemon/process_manager.py +439 -0
  131. mcli/workflow/daemon/test_daemon.py +275 -0
  132. mcli/workflow/dashboard/dashboard_cmd.py +113 -0
  133. mcli/workflow/docker/docker.py +0 -0
  134. mcli/workflow/file/file.py +100 -0
  135. mcli/workflow/gcloud/config.toml +21 -0
  136. mcli/workflow/gcloud/gcloud.py +58 -0
  137. mcli/workflow/git_commit/ai_service.py +328 -0
  138. mcli/workflow/git_commit/commands.py +430 -0
  139. mcli/workflow/lsh_integration.py +355 -0
  140. mcli/workflow/model_service/client.py +594 -0
  141. mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
  142. mcli/workflow/model_service/lightweight_embedder.py +397 -0
  143. mcli/workflow/model_service/lightweight_model_server.py +714 -0
  144. mcli/workflow/model_service/lightweight_test.py +241 -0
  145. mcli/workflow/model_service/model_service.py +1955 -0
  146. mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
  147. mcli/workflow/model_service/pdf_processor.py +386 -0
  148. mcli/workflow/model_service/test_efficient_runner.py +234 -0
  149. mcli/workflow/model_service/test_example.py +315 -0
  150. mcli/workflow/model_service/test_integration.py +131 -0
  151. mcli/workflow/model_service/test_new_features.py +149 -0
  152. mcli/workflow/openai/openai.py +99 -0
  153. mcli/workflow/politician_trading/commands.py +1790 -0
  154. mcli/workflow/politician_trading/config.py +134 -0
  155. mcli/workflow/politician_trading/connectivity.py +490 -0
  156. mcli/workflow/politician_trading/data_sources.py +395 -0
  157. mcli/workflow/politician_trading/database.py +410 -0
  158. mcli/workflow/politician_trading/demo.py +248 -0
  159. mcli/workflow/politician_trading/models.py +165 -0
  160. mcli/workflow/politician_trading/monitoring.py +413 -0
  161. mcli/workflow/politician_trading/scrapers.py +966 -0
  162. mcli/workflow/politician_trading/scrapers_california.py +412 -0
  163. mcli/workflow/politician_trading/scrapers_eu.py +377 -0
  164. mcli/workflow/politician_trading/scrapers_uk.py +350 -0
  165. mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
  166. mcli/workflow/politician_trading/supabase_functions.py +354 -0
  167. mcli/workflow/politician_trading/workflow.py +852 -0
  168. mcli/workflow/registry/registry.py +180 -0
  169. mcli/workflow/repo/repo.py +223 -0
  170. mcli/workflow/scheduler/commands.py +493 -0
  171. mcli/workflow/scheduler/cron_parser.py +238 -0
  172. mcli/workflow/scheduler/job.py +182 -0
  173. mcli/workflow/scheduler/monitor.py +139 -0
  174. mcli/workflow/scheduler/persistence.py +324 -0
  175. mcli/workflow/scheduler/scheduler.py +679 -0
  176. mcli/workflow/sync/sync_cmd.py +437 -0
  177. mcli/workflow/sync/test_cmd.py +314 -0
  178. mcli/workflow/videos/videos.py +242 -0
  179. mcli/workflow/wakatime/wakatime.py +11 -0
  180. mcli/workflow/workflow.py +37 -0
  181. mcli_framework-7.0.0.dist-info/METADATA +479 -0
  182. mcli_framework-7.0.0.dist-info/RECORD +186 -0
  183. mcli_framework-7.0.0.dist-info/WHEEL +5 -0
  184. mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
  185. mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
  186. mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1955 @@
1
+ import asyncio
2
+ import base64
3
+ import json
4
+ import logging
5
+ import os
6
+ import shutil
7
+ import signal
8
+ import sqlite3
9
+ import sys
10
+ import tempfile
11
+ import threading
12
+ import time
13
+ import urllib.parse
14
+ import uuid
15
+ from contextlib import asynccontextmanager
16
+ from dataclasses import asdict, dataclass
17
+ from datetime import datetime
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List, Optional
20
+ from urllib.parse import urlparse
21
+
22
+ # CLI Commands
23
+ import click
24
+ import numpy as np
25
+ import psutil
26
+ import requests
27
+
28
+ # Model loading and inference
29
+ import torch
30
+ import transformers
31
+ import uvicorn
32
+
33
+ # FastAPI for REST API
34
+ from fastapi import BackgroundTasks, FastAPI, HTTPException
35
+ from fastapi.middleware.cors import CORSMiddleware
36
+ from PIL import Image
37
+ from pydantic import BaseModel, Field
38
+ from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
39
+
40
+ # Import existing utilities
41
+ from mcli.lib.logger.logger import get_logger
42
+ from mcli.lib.toml.toml import read_from_toml
43
+
44
+ from .lightweight_embedder import LightweightEmbedder
45
+
46
+ # Import lightweight model server
47
+ from .lightweight_model_server import LIGHTWEIGHT_MODELS, LightweightModelServer
48
+ from .pdf_processor import PDFProcessor
49
+
50
+ logger = get_logger(__name__)
51
+
52
+ # Configuration
53
+ DEFAULT_CONFIG = {
54
+ "host": "0.0.0.0",
55
+ "port": 8000,
56
+ "models_dir": "./models",
57
+ "temp_dir": "./temp",
58
+ "max_concurrent_requests": 4,
59
+ "request_timeout": 300,
60
+ "model_cache_size": 2,
61
+ "enable_cors": True,
62
+ "cors_origins": ["*"],
63
+ "log_level": "INFO",
64
+ }
65
+
66
+
67
+ @dataclass
68
+ class ModelInfo:
69
+ """Represents a loaded model"""
70
+
71
+ id: str
72
+ name: str
73
+ model_type: (
74
+ str # 'text-generation', 'text-classification', 'translation', 'image-generation', etc.
75
+ )
76
+ model_path: str
77
+ tokenizer_path: Optional[str] = None
78
+ device: str = "auto" # 'cpu', 'cuda', 'auto'
79
+ max_length: int = 512
80
+ temperature: float = 0.7
81
+ top_p: float = 0.9
82
+ top_k: int = 50
83
+ is_loaded: bool = False
84
+ memory_usage_mb: float = 0.0
85
+ parameters_count: int = 0
86
+ created_at: datetime = datetime.now() # Do not assign None; let __post_init__ handle default
87
+
88
+ def __post_init__(self):
89
+ if self.created_at is None:
90
+ self.created_at = datetime.now()
91
+
92
+
93
+ class ModelDatabase:
94
+ """Manages model metadata storage"""
95
+
96
+ def __init__(self, db_path: Optional[str] = None):
97
+ if db_path is None:
98
+ db_path = str(Path.home() / ".local" / "mcli" / "model_service" / "models.db")
99
+ else:
100
+ db_path = str(db_path)
101
+ self.db_path = db_path
102
+ Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
103
+ self.init_database()
104
+
105
+ def init_database(self):
106
+ """Initialize SQLite database"""
107
+ conn = sqlite3.connect(self.db_path)
108
+ cursor = conn.cursor()
109
+
110
+ # Models table
111
+ cursor.execute(
112
+ """
113
+ CREATE TABLE IF NOT EXISTS models (
114
+ id TEXT PRIMARY KEY,
115
+ name TEXT NOT NULL,
116
+ model_type TEXT NOT NULL,
117
+ model_path TEXT NOT NULL,
118
+ tokenizer_path TEXT,
119
+ device TEXT DEFAULT 'auto',
120
+ max_length INTEGER DEFAULT 512,
121
+ temperature REAL DEFAULT 0.7,
122
+ top_p REAL DEFAULT 0.9,
123
+ top_k INTEGER DEFAULT 50,
124
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
125
+ is_loaded BOOLEAN DEFAULT 0,
126
+ memory_usage_mb REAL DEFAULT 0.0,
127
+ parameters_count INTEGER DEFAULT 0
128
+ )
129
+ """
130
+ )
131
+
132
+ # Inference history
133
+ cursor.execute(
134
+ """
135
+ CREATE TABLE IF NOT EXISTS inferences (
136
+ id TEXT PRIMARY KEY,
137
+ model_id TEXT NOT NULL,
138
+ request_type TEXT NOT NULL,
139
+ input_data TEXT,
140
+ output_data TEXT,
141
+ execution_time_ms INTEGER,
142
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
143
+ error_message TEXT,
144
+ FOREIGN KEY (model_id) REFERENCES models (id)
145
+ )
146
+ """
147
+ )
148
+
149
+ conn.commit()
150
+ conn.close()
151
+
152
+ def add_model(self, model_info: ModelInfo) -> str:
153
+ """Add a new model to the database"""
154
+ conn = sqlite3.connect(self.db_path)
155
+ cursor = conn.cursor()
156
+
157
+ try:
158
+ cursor.execute(
159
+ """
160
+ INSERT INTO models
161
+ (id, name, model_type, model_path, tokenizer_path, device,
162
+ max_length, temperature, top_p, top_k, created_at, is_loaded,
163
+ memory_usage_mb, parameters_count)
164
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
165
+ """,
166
+ (
167
+ model_info.id,
168
+ model_info.name,
169
+ model_info.model_type,
170
+ model_info.model_path,
171
+ model_info.tokenizer_path,
172
+ model_info.device,
173
+ model_info.max_length,
174
+ model_info.temperature,
175
+ model_info.top_p,
176
+ model_info.top_k,
177
+ model_info.created_at.isoformat(),
178
+ model_info.is_loaded,
179
+ model_info.memory_usage_mb,
180
+ model_info.parameters_count,
181
+ ),
182
+ )
183
+
184
+ conn.commit()
185
+ return model_info.id
186
+
187
+ except Exception as e:
188
+ logger.error(f"Error adding model: {e}")
189
+ conn.rollback()
190
+ raise
191
+ finally:
192
+ conn.close()
193
+
194
+ def get_model(self, model_id: str) -> Optional[ModelInfo]:
195
+ """Get a model by ID"""
196
+ conn = sqlite3.connect(self.db_path)
197
+ cursor = conn.cursor()
198
+
199
+ try:
200
+ cursor.execute(
201
+ """
202
+ SELECT id, name, model_type, model_path, tokenizer_path, device,
203
+ max_length, temperature, top_p, top_k, created_at, is_loaded,
204
+ memory_usage_mb, parameters_count
205
+ FROM models WHERE id = ?
206
+ """,
207
+ (model_id,),
208
+ )
209
+
210
+ row = cursor.fetchone()
211
+ if row:
212
+ return self._row_to_model_info(row)
213
+ return None
214
+
215
+ finally:
216
+ conn.close()
217
+
218
+ def get_all_models(self) -> List[ModelInfo]:
219
+ """Get all models"""
220
+ conn = sqlite3.connect(self.db_path)
221
+ cursor = conn.cursor()
222
+
223
+ try:
224
+ cursor.execute(
225
+ """
226
+ SELECT id, name, model_type, model_path, tokenizer_path, device,
227
+ max_length, temperature, top_p, top_k, created_at, is_loaded,
228
+ memory_usage_mb, parameters_count
229
+ FROM models ORDER BY name
230
+ """
231
+ )
232
+
233
+ return [self._row_to_model_info(row) for row in cursor.fetchall()]
234
+
235
+ finally:
236
+ conn.close()
237
+
238
+ def update_model(self, model_info: ModelInfo) -> bool:
239
+ """Update model information"""
240
+ conn = sqlite3.connect(self.db_path)
241
+ cursor = conn.cursor()
242
+
243
+ try:
244
+ cursor.execute(
245
+ """
246
+ UPDATE models
247
+ SET name = ?, model_type = ?, model_path = ?, tokenizer_path = ?,
248
+ device = ?, max_length = ?, temperature = ?, top_p = ?, top_k = ?,
249
+ is_loaded = ?, memory_usage_mb = ?, parameters_count = ?
250
+ WHERE id = ?
251
+ """,
252
+ (
253
+ model_info.name,
254
+ model_info.model_type,
255
+ model_info.model_path,
256
+ model_info.tokenizer_path,
257
+ model_info.device,
258
+ model_info.max_length,
259
+ model_info.temperature,
260
+ model_info.top_p,
261
+ model_info.top_k,
262
+ model_info.is_loaded,
263
+ model_info.memory_usage_mb,
264
+ model_info.parameters_count,
265
+ model_info.id,
266
+ ),
267
+ )
268
+
269
+ conn.commit()
270
+ return cursor.rowcount > 0
271
+
272
+ except Exception as e:
273
+ logger.error(f"Error updating model: {e}")
274
+ conn.rollback()
275
+ return False
276
+ finally:
277
+ conn.close()
278
+
279
+ def delete_model(self, model_id: str) -> bool:
280
+ """Delete a model"""
281
+ conn = sqlite3.connect(self.db_path)
282
+ cursor = conn.cursor()
283
+
284
+ try:
285
+ cursor.execute("DELETE FROM models WHERE id = ?", (model_id,))
286
+ conn.commit()
287
+ return cursor.rowcount > 0
288
+
289
+ except Exception as e:
290
+ logger.error(f"Error deleting model: {e}")
291
+ conn.rollback()
292
+ return False
293
+ finally:
294
+ conn.close()
295
+
296
+ def record_inference(
297
+ self,
298
+ model_id: str,
299
+ request_type: str,
300
+ input_data: str = str(),
301
+ output_data: str = str(),
302
+ execution_time_ms: int = int(),
303
+ error_message: str = str(),
304
+ ):
305
+ """Record inference request"""
306
+ conn = sqlite3.connect(self.db_path)
307
+ cursor = conn.cursor()
308
+
309
+ try:
310
+ inference_id = str(uuid.uuid4())
311
+ cursor.execute(
312
+ """
313
+ INSERT INTO inferences
314
+ (id, model_id, request_type, input_data, output_data,
315
+ execution_time_ms, error_message)
316
+ VALUES (?, ?, ?, ?, ?, ?, ?)
317
+ """,
318
+ (
319
+ inference_id,
320
+ model_id,
321
+ request_type,
322
+ input_data,
323
+ output_data,
324
+ execution_time_ms,
325
+ error_message,
326
+ ),
327
+ )
328
+
329
+ conn.commit()
330
+
331
+ except Exception as e:
332
+ logger.error(f"Error recording inference: {e}")
333
+ conn.rollback()
334
+ finally:
335
+ conn.close()
336
+
337
+ def _row_to_model_info(self, row) -> ModelInfo:
338
+ """Convert database row to ModelInfo object"""
339
+ return ModelInfo(
340
+ id=row[0],
341
+ name=row[1],
342
+ model_type=row[2],
343
+ model_path=row[3],
344
+ tokenizer_path=row[4],
345
+ device=row[5],
346
+ max_length=row[6],
347
+ temperature=row[7],
348
+ top_p=row[8],
349
+ top_k=row[9],
350
+ created_at=datetime.fromisoformat(row[10]),
351
+ is_loaded=bool(row[11]),
352
+ memory_usage_mb=row[12],
353
+ parameters_count=row[13],
354
+ )
355
+
356
+
357
+ class ModelManager:
358
+ """Manages model loading, caching, and inference"""
359
+
360
+ def __init__(self, models_dir: str = "./models", max_cache_size: int = 2):
361
+ self.models_dir = Path(models_dir)
362
+ self.models_dir.mkdir(parents=True, exist_ok=True)
363
+ self.max_cache_size = max_cache_size
364
+ self.loaded_models: Dict[str, Any] = {}
365
+ self.model_lock = threading.Lock()
366
+ self.db = ModelDatabase()
367
+
368
+ # Device detection
369
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
370
+ logger.info(f"Using device: {self.device}")
371
+
372
+ def load_model(self, model_info: ModelInfo) -> bool:
373
+ """Load a model into memory"""
374
+ with self.model_lock:
375
+ try:
376
+ logger.info(f"Loading model: {model_info.name}")
377
+
378
+ # Check if model is already loaded
379
+ if model_info.id in self.loaded_models:
380
+ logger.info(f"Model {model_info.name} already loaded")
381
+ return True
382
+
383
+ # Manage cache size
384
+ if len(self.loaded_models) >= self.max_cache_size:
385
+ self._evict_oldest_model()
386
+
387
+ # Load model based on type
388
+ if model_info.model_type == "text-generation":
389
+ model, tokenizer = self._load_text_generation_model(model_info)
390
+ elif model_info.model_type == "text-classification":
391
+ model, tokenizer = self._load_text_classification_model(model_info)
392
+ elif model_info.model_type == "translation":
393
+ model, tokenizer = self._load_translation_model(model_info)
394
+ elif model_info.model_type == "image-generation":
395
+ model, tokenizer = self._load_image_generation_model(model_info)
396
+ else:
397
+ raise ValueError(f"Unsupported model type: {model_info.model_type}")
398
+
399
+ # Store loaded model
400
+ self.loaded_models[model_info.id] = {
401
+ "model": model,
402
+ "tokenizer": tokenizer,
403
+ "model_info": model_info,
404
+ "loaded_at": datetime.now(),
405
+ }
406
+
407
+ # Update model info
408
+ model_info.is_loaded = True
409
+ model_info.memory_usage_mb = self._get_model_memory_usage(model)
410
+ model_info.parameters_count = sum(p.numel() for p in model.parameters())
411
+ self.db.update_model(model_info)
412
+
413
+ logger.info(f"Successfully loaded model: {model_info.name}")
414
+ return True
415
+
416
+ except Exception as e:
417
+ logger.error(f"Error loading model {model_info.name}: {e}")
418
+ return False
419
+
420
+ def unload_model(self, model_id: str) -> bool:
421
+ """Unload a model from memory"""
422
+ with self.model_lock:
423
+ if model_id in self.loaded_models:
424
+ del self.loaded_models[model_id]
425
+
426
+ # Update model info
427
+ model_info = self.db.get_model(model_id)
428
+ if model_info:
429
+ model_info.is_loaded = False
430
+ model_info.memory_usage_mb = 0.0
431
+ self.db.update_model(model_info)
432
+
433
+ logger.info(f"Unloaded model: {model_id}")
434
+ return True
435
+ return False
436
+
437
+ def _load_text_generation_model(self, model_info: ModelInfo):
438
+ """Load a text generation model"""
439
+ tokenizer = AutoTokenizer.from_pretrained(
440
+ model_info.tokenizer_path or model_info.model_path, trust_remote_code=True
441
+ )
442
+
443
+ model = AutoModelForCausalLM.from_pretrained(
444
+ model_info.model_path,
445
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
446
+ device_map="auto" if self.device == "cuda" else None,
447
+ trust_remote_code=True,
448
+ )
449
+
450
+ if self.device == "cpu":
451
+ model = model.to(self.device)
452
+
453
+ return model, tokenizer
454
+
455
+ def _load_text_classification_model(self, model_info: ModelInfo):
456
+ """Load a text classification model"""
457
+ tokenizer = AutoTokenizer.from_pretrained(
458
+ model_info.tokenizer_path or model_info.model_path
459
+ )
460
+
461
+ model = AutoModel.from_pretrained(
462
+ model_info.model_path,
463
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
464
+ device_map="auto" if self.device == "cuda" else None,
465
+ )
466
+
467
+ if self.device == "cpu":
468
+ model = model.to(self.device)
469
+
470
+ return model, tokenizer
471
+
472
+ def _load_translation_model(self, model_info: ModelInfo):
473
+ """Load a translation model"""
474
+ tokenizer = AutoTokenizer.from_pretrained(
475
+ model_info.tokenizer_path or model_info.model_path
476
+ )
477
+
478
+ model = AutoModelForSeq2SeqLM.from_pretrained(
479
+ model_info.model_path,
480
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
481
+ device_map="auto" if self.device == "cuda" else None,
482
+ )
483
+
484
+ if self.device == "cpu":
485
+ model = model.to(self.device)
486
+
487
+ return model, tokenizer
488
+
489
+ def _load_image_generation_model(self, model_info: ModelInfo):
490
+ """Load an image generation model (placeholder)"""
491
+ # This would be implemented based on specific image generation frameworks
492
+ # like Stable Diffusion, DALL-E, etc.
493
+ raise NotImplementedError("Image generation models not yet implemented")
494
+
495
+ def _evict_oldest_model(self):
496
+ """Evict the oldest loaded model from cache"""
497
+ if not self.loaded_models:
498
+ return
499
+
500
+ oldest_id = min(self.loaded_models.keys(), key=lambda k: self.loaded_models[k]["loaded_at"])
501
+ self.unload_model(oldest_id)
502
+
503
+ def _get_model_memory_usage(self, model) -> float:
504
+ """Get model memory usage in MB"""
505
+ try:
506
+ if self.device == "cuda":
507
+ return torch.cuda.memory_allocated() / 1024 / 1024
508
+ else:
509
+ # Rough estimation for CPU
510
+ total_params = sum(p.numel() for p in model.parameters())
511
+ return total_params * 4 / 1024 / 1024 # 4 bytes per float32
512
+ except:
513
+ return 0.0
514
+
515
+ def generate_text(
516
+ self,
517
+ model_id: str,
518
+ prompt: str,
519
+ max_length: int = int(),
520
+ temperature: float = float(),
521
+ top_p: float = float(),
522
+ top_k: int = int(),
523
+ ) -> str:
524
+ """Generate text using a loaded model"""
525
+ if model_id not in self.loaded_models:
526
+ raise ValueError(f"Model {model_id} not loaded")
527
+
528
+ model_data = self.loaded_models[model_id]
529
+ model = model_data["model"]
530
+ tokenizer = model_data["tokenizer"]
531
+ model_info = model_data["model_info"]
532
+
533
+ # Use provided parameters or defaults
534
+ max_length = max_length or model_info.max_length
535
+ temperature = temperature or model_info.temperature
536
+ top_p = top_p or model_info.top_p
537
+ top_k = top_k or model_info.top_k
538
+
539
+ try:
540
+ # Tokenize input
541
+ inputs = tokenizer(prompt, return_tensors="pt")
542
+ if self.device == "cuda":
543
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
544
+
545
+ # Generate
546
+ with torch.no_grad():
547
+ outputs = model.generate(
548
+ **inputs,
549
+ max_length=max_length,
550
+ temperature=temperature,
551
+ top_p=top_p,
552
+ top_k=top_k,
553
+ do_sample=True,
554
+ pad_token_id=tokenizer.eos_token_id,
555
+ )
556
+
557
+ # Decode output
558
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
559
+
560
+ # Remove input prompt from output
561
+ if generated_text.startswith(prompt):
562
+ generated_text = generated_text[len(prompt) :].strip()
563
+
564
+ return generated_text
565
+
566
+ except Exception as e:
567
+ logger.error(f"Error generating text: {e}")
568
+ raise
569
+
570
+ def classify_text(self, model_id: str, text: str) -> Dict[str, float]:
571
+ """Classify text using a loaded model"""
572
+ if model_id not in self.loaded_models:
573
+ raise ValueError(f"Model {model_id} not loaded")
574
+
575
+ model_data = self.loaded_models[model_id]
576
+ model = model_data["model"]
577
+ tokenizer = model_data["tokenizer"]
578
+
579
+ try:
580
+ # Tokenize input
581
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
582
+ if self.device == "cuda":
583
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
584
+
585
+ # Get predictions
586
+ with torch.no_grad():
587
+ outputs = model(**inputs)
588
+ logits = outputs.logits
589
+ probabilities = torch.softmax(logits, dim=-1)
590
+
591
+ # Convert to dictionary
592
+ probs = probabilities[0].cpu().numpy()
593
+ return {f"class_{i}": float(prob) for i, prob in enumerate(probs)}
594
+
595
+ except Exception as e:
596
+ logger.error(f"Error classifying text: {e}")
597
+ raise
598
+
599
+ def translate_text(
600
+ self, model_id: str, text: str, source_lang: str = "en", target_lang: str = "fr"
601
+ ) -> str:
602
+ """Translate text using a loaded model"""
603
+ if model_id not in self.loaded_models:
604
+ raise ValueError(f"Model {model_id} not loaded")
605
+
606
+ model_data = self.loaded_models[model_id]
607
+ model = model_data["model"]
608
+ tokenizer = model_data["tokenizer"]
609
+
610
+ try:
611
+ # Prepare input
612
+ if hasattr(tokenizer, "lang_code_to_token"):
613
+ # For models like mBART
614
+ inputs = tokenizer(text, return_tensors="pt")
615
+ inputs["labels"] = tokenizer(f"{target_lang} {text}", return_tensors="pt").input_ids
616
+ else:
617
+ # For other translation models
618
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
619
+
620
+ if self.device == "cuda":
621
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
622
+
623
+ # Generate translation
624
+ with torch.no_grad():
625
+ outputs = model.generate(**inputs, max_length=512)
626
+
627
+ # Decode output
628
+ translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
629
+ return translated_text
630
+
631
+ except Exception as e:
632
+ logger.error(f"Error translating text: {e}")
633
+ raise
634
+
635
+ def download_model_from_url(
636
+ self, model_url: str, tokenizer_url: Optional[str] = None
637
+ ) -> tuple[str, Optional[str]]:
638
+ """Download model and tokenizer from URLs and return local paths"""
639
+ try:
640
+ # Parse URLs
641
+ model_parsed = urlparse(model_url)
642
+ model_filename = os.path.basename(model_parsed.path) or "model"
643
+
644
+ # Create model directory
645
+ model_dir = self.models_dir / model_filename
646
+ model_dir.mkdir(parents=True, exist_ok=True)
647
+
648
+ # Download model
649
+ logger.info(f"Downloading model from: {model_url}")
650
+ model_response = requests.get(model_url, stream=True)
651
+ model_response.raise_for_status()
652
+
653
+ model_path = model_dir / "model"
654
+ with open(model_path, "wb") as f:
655
+ for chunk in model_response.iter_content(chunk_size=8192):
656
+ f.write(chunk)
657
+
658
+ # Download tokenizer if provided
659
+ tokenizer_path = None
660
+ if tokenizer_url:
661
+ logger.info(f"Downloading tokenizer from: {tokenizer_url}")
662
+ tokenizer_response = requests.get(tokenizer_url, stream=True)
663
+ tokenizer_response.raise_for_status()
664
+
665
+ tokenizer_path = model_dir / "tokenizer"
666
+ with open(tokenizer_path, "wb") as f:
667
+ for chunk in tokenizer_response.iter_content(chunk_size=8192):
668
+ f.write(chunk)
669
+
670
+ logger.info(f"Model downloaded to: {model_path}")
671
+ return str(model_path), str(tokenizer_path) if tokenizer_path else None
672
+
673
+ except Exception as e:
674
+ logger.error(f"Error downloading model from URL: {e}")
675
+ raise
676
+
677
+ def add_model_from_url(
678
+ self,
679
+ name: str,
680
+ model_type: str,
681
+ model_url: str,
682
+ tokenizer_url: Optional[str] = None,
683
+ device: str = "auto",
684
+ max_length: int = 512,
685
+ temperature: float = 0.7,
686
+ top_p: float = 0.9,
687
+ top_k: int = 50,
688
+ ) -> str:
689
+ """Add a model from URL by downloading it first"""
690
+ try:
691
+ # Download model and tokenizer
692
+ model_path, tokenizer_path = self.download_model_from_url(model_url, tokenizer_url)
693
+
694
+ # Create model info
695
+ model_info = ModelInfo(
696
+ id=str(uuid.uuid4()),
697
+ name=name,
698
+ model_type=model_type,
699
+ model_path=model_path,
700
+ tokenizer_path=tokenizer_path,
701
+ device=device,
702
+ max_length=max_length,
703
+ temperature=temperature,
704
+ top_p=top_p,
705
+ top_k=top_k,
706
+ )
707
+
708
+ # Add to database
709
+ model_id = self.db.add_model(model_info)
710
+
711
+ # Try to load the model
712
+ if self.load_model(model_info):
713
+ logger.info(f"Successfully added and loaded model from URL: {name}")
714
+ else:
715
+ logger.warning(f"Model added from URL but failed to load: {name}")
716
+
717
+ return model_id
718
+
719
+ except Exception as e:
720
+ logger.error(f"Error adding model from URL: {e}")
721
+ raise
722
+
723
+ def get_models_summary(self) -> Dict[str, Any]:
724
+ """Get a summary of all models with statistics"""
725
+ models = self.db.get_all_models()
726
+
727
+ summary = {
728
+ "total_models": len(models),
729
+ "loaded_models": len([m for m in models if m.is_loaded]),
730
+ "total_memory_mb": sum(m.memory_usage_mb for m in models if m.is_loaded),
731
+ "models_by_type": {},
732
+ "models": [],
733
+ }
734
+
735
+ for model in models:
736
+ # Add to type statistics
737
+ model_type = model.model_type
738
+ if model_type not in summary["models_by_type"]:
739
+ summary["models_by_type"][model_type] = {"count": 0, "loaded": 0, "memory_mb": 0.0}
740
+ summary["models_by_type"][model_type]["count"] += 1
741
+ if model.is_loaded:
742
+ summary["models_by_type"][model_type]["loaded"] += 1
743
+ summary["models_by_type"][model_type]["memory_mb"] += model.memory_usage_mb
744
+
745
+ # Add model details
746
+ summary["models"].append(
747
+ {
748
+ "id": model.id,
749
+ "name": model.name,
750
+ "type": model.model_type,
751
+ "loaded": model.is_loaded,
752
+ "memory_mb": model.memory_usage_mb,
753
+ "parameters_count": model.parameters_count,
754
+ "created_at": model.created_at.isoformat(),
755
+ }
756
+ )
757
+
758
+ return summary
759
+
760
+
761
+ # Pydantic models for API
762
+ class ModelLoadRequest(BaseModel):
763
+ name: str
764
+ model_type: str
765
+ model_path: str
766
+ tokenizer_path: Optional[str] = None
767
+ device: str = "auto"
768
+ max_length: int = 512
769
+ temperature: float = 0.7
770
+ top_p: float = 0.9
771
+ top_k: int = 50
772
+
773
+
774
+ class ModelLoadFromUrlRequest(BaseModel):
775
+ name: str
776
+ model_type: str
777
+ model_url: str
778
+ tokenizer_url: Optional[str] = None
779
+ device: str = "auto"
780
+ max_length: int = 512
781
+ temperature: float = 0.7
782
+ top_p: float = 0.9
783
+ top_k: int = 50
784
+
785
+
786
+ class TextGenerationRequest(BaseModel):
787
+ prompt: str
788
+ max_length: Optional[int] = None
789
+ temperature: Optional[float] = None
790
+ top_p: Optional[float] = None
791
+ top_k: Optional[int] = None
792
+
793
+
794
+ class TextClassificationRequest(BaseModel):
795
+ text: str
796
+
797
+
798
+ class TranslationRequest(BaseModel):
799
+ text: str
800
+ source_lang: str = "en"
801
+ target_lang: str = "fr"
802
+
803
+
804
+ class ModelService:
805
+ """Main model service daemon"""
806
+
807
+ def __init__(self, config: Dict[str, Any] = dict[str(), object()]()):
808
+ self.config = {**DEFAULT_CONFIG, **(config or {})}
809
+ self.model_manager = ModelManager(
810
+ models_dir=self.config["models_dir"], max_cache_size=self.config["model_cache_size"]
811
+ )
812
+
813
+ # Initialize lightweight server
814
+ self.lightweight_server = LightweightModelServer(
815
+ models_dir=f"{self.config['models_dir']}/lightweight",
816
+ port=self.config["port"] + 1, # Use next port
817
+ )
818
+
819
+ # Initialize PDF processor
820
+ self.pdf_processor = PDFProcessor(
821
+ models_dir=f"{self.config['models_dir']}/lightweight",
822
+ port=self.config["port"] + 2, # Use next port after lightweight server
823
+ )
824
+
825
+ # Initialize lightweight embedder
826
+ self.embedder = LightweightEmbedder(models_dir=f"{self.config['models_dir']}/embeddings")
827
+
828
+ self.running = False
829
+ self.pid_file = Path.home() / ".local" / "mcli" / "model_service" / "model_service.pid"
830
+ self.pid_file.parent.mkdir(parents=True, exist_ok=True)
831
+
832
+ # FastAPI app
833
+ self.app = FastAPI(
834
+ title="MCLI Model Service",
835
+ description="A service for hosting and providing inference APIs for language models",
836
+ version="1.0.0",
837
+ )
838
+
839
+ # Add CORS middleware
840
+ if self.config["enable_cors"]:
841
+ self.app.add_middleware(
842
+ CORSMiddleware,
843
+ allow_origins=self.config["cors_origins"],
844
+ allow_credentials=True,
845
+ allow_methods=["*"],
846
+ allow_headers=["*"],
847
+ )
848
+
849
+ # Setup routes
850
+ self._setup_routes()
851
+
852
+ def _setup_routes(self):
853
+ """Setup FastAPI routes"""
854
+
855
+ @self.app.get("/")
856
+ async def root():
857
+ return {
858
+ "service": "MCLI Model Service",
859
+ "version": "1.0.0",
860
+ "status": "running",
861
+ "models_loaded": len(self.model_manager.loaded_models),
862
+ }
863
+
864
+ @self.app.get("/models")
865
+ async def list_models():
866
+ """List all available models"""
867
+ models = self.model_manager.db.get_all_models()
868
+ return [asdict(model) for model in models]
869
+
870
+ @self.app.get("/models/summary")
871
+ async def get_models_summary():
872
+ """Get a summary of all models with statistics"""
873
+ return self.model_manager.get_models_summary()
874
+
875
+ @self.app.post("/models")
876
+ async def load_model(request: ModelLoadRequest):
877
+ """Load a new model"""
878
+ try:
879
+ model_info = ModelInfo(
880
+ id=str(uuid.uuid4()),
881
+ name=request.name,
882
+ model_type=request.model_type,
883
+ model_path=request.model_path,
884
+ tokenizer_path=request.tokenizer_path,
885
+ device=request.device,
886
+ max_length=request.max_length,
887
+ temperature=request.temperature,
888
+ top_p=request.top_p,
889
+ top_k=request.top_k,
890
+ )
891
+
892
+ # Add to database
893
+ model_id = self.model_manager.db.add_model(model_info)
894
+
895
+ # Load model
896
+ success = self.model_manager.load_model(model_info)
897
+
898
+ if success:
899
+ return {"model_id": model_id, "status": "loaded"}
900
+ else:
901
+ # Remove from database if loading failed
902
+ self.model_manager.db.delete_model(model_id)
903
+ raise HTTPException(status_code=500, detail="Failed to load model")
904
+
905
+ except Exception as e:
906
+ raise HTTPException(status_code=500, detail=str(e))
907
+
908
+ @self.app.post("/models/from-url")
909
+ async def load_model_from_url(request: ModelLoadFromUrlRequest):
910
+ """Load a new model from URL"""
911
+ try:
912
+ model_id = self.model_manager.add_model_from_url(
913
+ name=request.name,
914
+ model_type=request.model_type,
915
+ model_url=request.model_url,
916
+ tokenizer_url=request.tokenizer_url,
917
+ device=request.device,
918
+ max_length=request.max_length,
919
+ temperature=request.temperature,
920
+ top_p=request.top_p,
921
+ top_k=request.top_k,
922
+ )
923
+
924
+ return {"model_id": model_id, "status": "loaded"}
925
+
926
+ except Exception as e:
927
+ raise HTTPException(status_code=500, detail=str(e))
928
+
929
+ @self.app.delete("/models/{model_id}")
930
+ async def unload_model(model_id: str):
931
+ """Unload a model"""
932
+ try:
933
+ success = self.model_manager.unload_model(model_id)
934
+ if success:
935
+ return {"status": "unloaded"}
936
+ else:
937
+ raise HTTPException(status_code=404, detail="Model not found")
938
+ except Exception as e:
939
+ raise HTTPException(status_code=500, detail=str(e))
940
+
941
+ @self.app.put("/models/{model_id}")
942
+ async def update_model(model_id: str, request: Dict[str, Any]):
943
+ """Update model configuration"""
944
+ try:
945
+ # Get current model info
946
+ model_info = self.model_manager.db.get_model(model_id)
947
+ if not model_info:
948
+ raise HTTPException(status_code=404, detail="Model not found")
949
+
950
+ # Update model info with new values
951
+ for key, value in request.items():
952
+ if hasattr(model_info, key):
953
+ setattr(model_info, key, value)
954
+
955
+ # Update in database
956
+ success = self.model_manager.db.update_model(model_info)
957
+ if success:
958
+ return {"status": "updated", "model_id": model_id}
959
+ else:
960
+ raise HTTPException(status_code=500, detail="Failed to update model")
961
+
962
+ except HTTPException:
963
+ raise
964
+ except Exception as e:
965
+ raise HTTPException(status_code=500, detail=str(e))
966
+
967
+ @self.app.delete("/models/{model_id}/remove")
968
+ async def remove_model(model_id: str):
969
+ """Remove a model from the database"""
970
+ try:
971
+ # First unload if loaded
972
+ self.model_manager.unload_model(model_id)
973
+
974
+ # Remove from database
975
+ success = self.model_manager.db.delete_model(model_id)
976
+ if success:
977
+ return {"status": "removed", "model_id": model_id}
978
+ else:
979
+ raise HTTPException(status_code=404, detail="Model not found")
980
+
981
+ except HTTPException:
982
+ raise
983
+ except Exception as e:
984
+ raise HTTPException(status_code=500, detail=str(e))
985
+
986
+ @self.app.post("/models/{model_id}/generate")
987
+ async def generate_text(model_id: str, request: TextGenerationRequest):
988
+ """Generate text using a model"""
989
+ try:
990
+ start_time = time.time()
991
+
992
+ generated_text = self.model_manager.generate_text(
993
+ model_id=model_id,
994
+ prompt=request.prompt,
995
+ max_length=request.max_length or 512,
996
+ temperature=request.temperature or 0.7,
997
+ top_p=request.top_p or 0.9,
998
+ top_k=request.top_k or 50,
999
+ )
1000
+
1001
+ execution_time = int((time.time() - start_time) * 1000)
1002
+
1003
+ # Record inference
1004
+ self.model_manager.db.record_inference(
1005
+ model_id=model_id,
1006
+ request_type="text-generation",
1007
+ input_data=request.prompt,
1008
+ output_data=generated_text,
1009
+ execution_time_ms=execution_time,
1010
+ )
1011
+
1012
+ return {"generated_text": generated_text, "execution_time_ms": execution_time}
1013
+
1014
+ except Exception as e:
1015
+ # Record error
1016
+ self.model_manager.db.record_inference(
1017
+ model_id=model_id,
1018
+ request_type="text-generation",
1019
+ input_data=request.prompt,
1020
+ error_message=str(e),
1021
+ )
1022
+ raise HTTPException(status_code=500, detail=str(e))
1023
+
1024
+ @self.app.post("/models/{model_id}/classify")
1025
+ async def classify_text(model_id: str, request: TextClassificationRequest):
1026
+ """Classify text using a model"""
1027
+ try:
1028
+ start_time = time.time()
1029
+
1030
+ classifications = self.model_manager.classify_text(
1031
+ model_id=model_id, text=request.text
1032
+ )
1033
+
1034
+ execution_time = int((time.time() - start_time) * 1000)
1035
+
1036
+ # Record inference
1037
+ self.model_manager.db.record_inference(
1038
+ model_id=model_id,
1039
+ request_type="text-classification",
1040
+ input_data=request.text,
1041
+ output_data=json.dumps(classifications),
1042
+ execution_time_ms=execution_time,
1043
+ )
1044
+
1045
+ return {"classifications": classifications, "execution_time_ms": execution_time}
1046
+
1047
+ except Exception as e:
1048
+ # Record error
1049
+ self.model_manager.db.record_inference(
1050
+ model_id=model_id,
1051
+ request_type="text-classification",
1052
+ input_data=request.text,
1053
+ error_message=str(e),
1054
+ )
1055
+ raise HTTPException(status_code=500, detail=str(e))
1056
+
1057
+ @self.app.post("/models/{model_id}/translate")
1058
+ async def translate_text(model_id: str, request: TranslationRequest):
1059
+ """Translate text using a model"""
1060
+ try:
1061
+ start_time = time.time()
1062
+
1063
+ translated_text = self.model_manager.translate_text(
1064
+ model_id=model_id,
1065
+ text=request.text,
1066
+ source_lang=request.source_lang,
1067
+ target_lang=request.target_lang,
1068
+ )
1069
+
1070
+ execution_time = int((time.time() - start_time) * 1000)
1071
+
1072
+ # Record inference
1073
+ self.model_manager.db.record_inference(
1074
+ model_id=model_id,
1075
+ request_type="translation",
1076
+ input_data=request.text,
1077
+ output_data=translated_text,
1078
+ execution_time_ms=execution_time,
1079
+ )
1080
+
1081
+ return {"translated_text": translated_text, "execution_time_ms": execution_time}
1082
+
1083
+ except Exception as e:
1084
+ # Record error
1085
+ self.model_manager.db.record_inference(
1086
+ model_id=model_id,
1087
+ request_type="translation",
1088
+ input_data=request.text,
1089
+ error_message=str(e),
1090
+ )
1091
+ raise HTTPException(status_code=500, detail=str(e))
1092
+
1093
+ @self.app.get("/health")
1094
+ async def health_check():
1095
+ """Health check endpoint"""
1096
+ return {
1097
+ "status": "healthy",
1098
+ "models_loaded": len(self.model_manager.loaded_models),
1099
+ "memory_usage_mb": sum(
1100
+ model_data["model_info"].memory_usage_mb
1101
+ for model_data in self.model_manager.loaded_models.values()
1102
+ ),
1103
+ }
1104
+
1105
+ # Lightweight server endpoints
1106
+ @self.app.get("/lightweight/models")
1107
+ async def list_lightweight_models():
1108
+ """List available lightweight models"""
1109
+ return {
1110
+ "models": LIGHTWEIGHT_MODELS,
1111
+ "downloaded": self.lightweight_server.downloader.get_downloaded_models(),
1112
+ "loaded": list(self.lightweight_server.loaded_models.keys()),
1113
+ }
1114
+
1115
+ @self.app.post("/lightweight/models/{model_key}/download")
1116
+ async def download_lightweight_model(model_key: str):
1117
+ """Download a lightweight model"""
1118
+ if model_key not in LIGHTWEIGHT_MODELS:
1119
+ raise HTTPException(status_code=404, detail="Model not found")
1120
+
1121
+ try:
1122
+ success = self.lightweight_server.download_and_load_model(model_key)
1123
+ if success:
1124
+ return {"status": "downloaded", "model": model_key}
1125
+ else:
1126
+ raise HTTPException(status_code=500, detail="Failed to download model")
1127
+ except Exception as e:
1128
+ raise HTTPException(status_code=500, detail=str(e))
1129
+
1130
+ @self.app.post("/lightweight/start")
1131
+ async def start_lightweight_server():
1132
+ """Start the lightweight server"""
1133
+ try:
1134
+ self.lightweight_server.start_server()
1135
+ return {
1136
+ "status": "started",
1137
+ "port": self.lightweight_server.port,
1138
+ "url": f"http://localhost:{self.lightweight_server.port}",
1139
+ }
1140
+ except Exception as e:
1141
+ raise HTTPException(status_code=500, detail=str(e))
1142
+
1143
+ @self.app.get("/lightweight/status")
1144
+ async def lightweight_status():
1145
+ """Get lightweight server status"""
1146
+ return {
1147
+ "running": self.lightweight_server.running,
1148
+ "port": self.lightweight_server.port,
1149
+ "loaded_models": list(self.lightweight_server.loaded_models.keys()),
1150
+ "system_info": self.lightweight_server.get_system_info(),
1151
+ }
1152
+
1153
+ # PDF processing endpoints
1154
+ @self.app.post("/pdf/extract-text")
1155
+ async def extract_pdf_text(request: Dict[str, Any]):
1156
+ """Extract text from PDF"""
1157
+ try:
1158
+ pdf_path = request.get("pdf_path")
1159
+ if not pdf_path:
1160
+ raise HTTPException(status_code=400, detail="PDF path is required")
1161
+
1162
+ result = self.pdf_processor.extract_text_from_pdf(pdf_path)
1163
+ return result
1164
+ except Exception as e:
1165
+ raise HTTPException(status_code=500, detail=str(e))
1166
+
1167
+ @self.app.post("/pdf/process-with-ai")
1168
+ async def process_pdf_with_ai(request: Dict[str, Any]):
1169
+ """Process PDF with AI analysis"""
1170
+ try:
1171
+ pdf_path = request.get("pdf_path")
1172
+ model_key = request.get("model_key")
1173
+
1174
+ if not pdf_path:
1175
+ raise HTTPException(status_code=400, detail="PDF path is required")
1176
+
1177
+ # Handle optional model_key parameter
1178
+ if model_key:
1179
+ result = self.pdf_processor.process_pdf_with_ai(pdf_path, str(model_key))
1180
+ else:
1181
+ result = self.pdf_processor.process_pdf_with_ai(pdf_path)
1182
+ return result
1183
+ except Exception as e:
1184
+ raise HTTPException(status_code=500, detail=str(e))
1185
+
1186
+ @self.app.get("/pdf/status")
1187
+ async def pdf_processor_status():
1188
+ """Get PDF processor status"""
1189
+ return self.pdf_processor.get_service_status()
1190
+
1191
+ # Embedding endpoints
1192
+ @self.app.post("/embed/text")
1193
+ async def embed_text(request: Dict[str, Any]):
1194
+ """Embed text using lightweight embedder"""
1195
+ try:
1196
+ text = request.get("text")
1197
+ method = request.get("method")
1198
+
1199
+ if not text:
1200
+ raise HTTPException(status_code=400, detail="Text is required")
1201
+
1202
+ # Handle optional method parameter
1203
+ if method:
1204
+ result = self.embedder.embed_text(text, str(method))
1205
+ else:
1206
+ result = self.embedder.embed_text(text)
1207
+ return result
1208
+ except Exception as e:
1209
+ raise HTTPException(status_code=500, detail=str(e))
1210
+
1211
+ @self.app.post("/embed/document")
1212
+ async def embed_document(request: Dict[str, Any]):
1213
+ """Embed document using lightweight embedder"""
1214
+ try:
1215
+ text = request.get("text")
1216
+ chunk_size = request.get("chunk_size", 1000)
1217
+
1218
+ if not text:
1219
+ raise HTTPException(status_code=400, detail="Text is required")
1220
+
1221
+ result = self.embedder.embed_document(text, chunk_size)
1222
+ return result
1223
+ except Exception as e:
1224
+ raise HTTPException(status_code=500, detail=str(e))
1225
+
1226
+ @self.app.post("/embed/search")
1227
+ async def search_embeddings(request: Dict[str, Any]):
1228
+ """Search similar documents using embeddings"""
1229
+ try:
1230
+ query = request.get("query")
1231
+ embeddings = request.get("embeddings", [])
1232
+ top_k = request.get("top_k", 5)
1233
+
1234
+ if not query:
1235
+ raise HTTPException(status_code=400, detail="Query is required")
1236
+
1237
+ results = self.embedder.search_similar(query, embeddings, top_k)
1238
+ return {"results": results}
1239
+ except Exception as e:
1240
+ raise HTTPException(status_code=500, detail=str(e))
1241
+
1242
+ @self.app.get("/embed/status")
1243
+ async def embedder_status():
1244
+ """Get embedder status"""
1245
+ return self.embedder.get_status()
1246
+
1247
+ def start(self):
1248
+ """Start the model service"""
1249
+ if self.running:
1250
+ logger.info("Model service is already running")
1251
+ return
1252
+
1253
+ # Check if already running
1254
+ if self.pid_file.exists():
1255
+ try:
1256
+ with open(self.pid_file, "r") as f:
1257
+ pid = int(f.read().strip())
1258
+ if psutil.pid_exists(pid):
1259
+ logger.info(f"Model service already running with PID {pid}")
1260
+ return
1261
+ except Exception:
1262
+ pass
1263
+
1264
+ # Start service
1265
+ self.running = True
1266
+
1267
+ # Write PID file
1268
+ with open(self.pid_file, "w") as f:
1269
+ f.write(str(os.getpid()))
1270
+
1271
+ logger.info(f"Model service started with PID {os.getpid()}")
1272
+ logger.info(f"API available at http://{self.config['host']}:{self.config['port']}")
1273
+
1274
+ # Set up signal handlers
1275
+ signal.signal(signal.SIGTERM, self._signal_handler)
1276
+ signal.signal(signal.SIGINT, self._signal_handler)
1277
+
1278
+ # Start FastAPI server
1279
+ try:
1280
+ uvicorn.run(
1281
+ self.app,
1282
+ host=self.config["host"],
1283
+ port=self.config["port"],
1284
+ log_level=self.config["log_level"].lower(),
1285
+ )
1286
+ except KeyboardInterrupt:
1287
+ logger.info("Model service interrupted")
1288
+ finally:
1289
+ self.stop()
1290
+
1291
+ def stop(self):
1292
+ """Stop the model service"""
1293
+ if not self.running:
1294
+ return
1295
+
1296
+ self.running = False
1297
+
1298
+ # Unload all models
1299
+ for model_id in list(self.model_manager.loaded_models.keys()):
1300
+ self.model_manager.unload_model(model_id)
1301
+
1302
+ # Remove PID file
1303
+ if self.pid_file.exists():
1304
+ self.pid_file.unlink()
1305
+
1306
+ logger.info("Model service stopped")
1307
+
1308
+ def _signal_handler(self, signum, frame):
1309
+ """Handle shutdown signals"""
1310
+ logger.info(f"Received signal {signum}, shutting down...")
1311
+ self.stop()
1312
+ sys.exit(0)
1313
+
1314
+ def status(self) -> Dict[str, Any]:
1315
+ """Get service status"""
1316
+ is_running = False
1317
+ pid = None
1318
+
1319
+ if self.pid_file.exists():
1320
+ try:
1321
+ with open(self.pid_file, "r") as f:
1322
+ pid = int(f.read().strip())
1323
+ is_running = psutil.pid_exists(pid)
1324
+ except Exception:
1325
+ pass
1326
+
1327
+ return {
1328
+ "running": is_running,
1329
+ "pid": pid,
1330
+ "pid_file": str(self.pid_file),
1331
+ "models_loaded": len(self.model_manager.loaded_models),
1332
+ "api_url": f"http://{self.config['host']}:{self.config['port']}",
1333
+ }
1334
+
1335
+
1336
+ # CLI Commands
1337
+ import click
1338
+
1339
+
1340
+ @click.group(name="model-service")
1341
+ def model_service():
1342
+ """Model service daemon for hosting language models"""
1343
+ pass
1344
+
1345
+
1346
+ @model_service.command()
1347
+ @click.option("--config", help="Path to configuration file")
1348
+ @click.option("--host", default="0.0.0.0", help="Host to bind to")
1349
+ @click.option("--port", default=8000, help="Port to bind to")
1350
+ @click.option("--models-dir", default="./models", help="Directory for model storage")
1351
+ def start(config: Optional[str], host: str, port: int, models_dir: str):
1352
+ """Start the model service daemon"""
1353
+ # Load config if provided
1354
+ service_config = DEFAULT_CONFIG.copy()
1355
+ if config:
1356
+ try:
1357
+ config_data = read_from_toml(config, "model_service")
1358
+ if config_data:
1359
+ service_config.update(config_data)
1360
+ except Exception as e:
1361
+ logger.warning(f"Could not load config from {config}: {e}")
1362
+
1363
+ # Override with command line options
1364
+ service_config["host"] = host
1365
+ service_config["port"] = port
1366
+ service_config["models_dir"] = models_dir
1367
+
1368
+ service = ModelService(service_config)
1369
+ service.start()
1370
+
1371
+
1372
+ @model_service.command()
1373
+ def stop():
1374
+ """Stop the model service daemon"""
1375
+ pid_file = Path.home() / ".local" / "mcli" / "model_service" / "model_service.pid"
1376
+
1377
+ if not pid_file.exists():
1378
+ click.echo("Model service is not running")
1379
+ return
1380
+
1381
+ try:
1382
+ with open(pid_file, "r") as f:
1383
+ pid = int(f.read().strip())
1384
+
1385
+ # Send SIGTERM
1386
+ os.kill(pid, signal.SIGTERM)
1387
+ click.echo(f"Sent stop signal to model service (PID {pid})")
1388
+
1389
+ # Wait a bit and check if it stopped
1390
+ time.sleep(2)
1391
+ if not psutil.pid_exists(pid):
1392
+ click.echo("Model service stopped successfully")
1393
+ else:
1394
+ click.echo("Model service may still be running")
1395
+
1396
+ except Exception as e:
1397
+ click.echo(f"Error stopping model service: {e}")
1398
+
1399
+
1400
+ @model_service.command()
1401
+ def status():
1402
+ """Show model service status"""
1403
+ service = ModelService()
1404
+ status_info = service.status()
1405
+
1406
+ if status_info["running"]:
1407
+ click.echo(f"āœ… Model service is running (PID: {status_info['pid']})")
1408
+ click.echo(f"🌐 API available at: {status_info['api_url']}")
1409
+ click.echo(f"šŸ“Š Models loaded: {status_info['models_loaded']}")
1410
+ else:
1411
+ click.echo("āŒ Model service is not running")
1412
+
1413
+ click.echo(f"šŸ“ PID file: {status_info['pid_file']}")
1414
+
1415
+
1416
+ @model_service.command()
1417
+ @click.option("--summary", is_flag=True, help="Show summary statistics")
1418
+ def list_models(summary: bool = False):
1419
+ """List all models in the service"""
1420
+ service = ModelService()
1421
+
1422
+ try:
1423
+ if summary:
1424
+ # Show summary
1425
+ summary_data = service.model_manager.get_models_summary()
1426
+ click.echo("šŸ“Š Model Service Summary")
1427
+ click.echo("=" * 50)
1428
+ click.echo(f"Total Models: {summary_data['total_models']}")
1429
+ click.echo(f"Loaded Models: {summary_data['loaded_models']}")
1430
+ click.echo(f"Total Memory: {summary_data['total_memory_mb']:.1f} MB")
1431
+ click.echo()
1432
+
1433
+ if summary_data["models_by_type"]:
1434
+ click.echo("Models by Type:")
1435
+ for model_type, stats in summary_data["models_by_type"].items():
1436
+ click.echo(
1437
+ f" {model_type}: {stats['loaded']}/{stats['count']} loaded ({stats['memory_mb']:.1f} MB)"
1438
+ )
1439
+ click.echo()
1440
+ else:
1441
+ # Show detailed list
1442
+ models = service.model_manager.db.get_all_models()
1443
+
1444
+ if not models:
1445
+ click.echo("šŸ“ No models found in the service")
1446
+ return
1447
+
1448
+ click.echo(f"šŸ“ Found {len(models)} model(s):")
1449
+ click.echo("=" * 80)
1450
+
1451
+ for model in models:
1452
+ status_icon = "🟢" if model.is_loaded else "⚪"
1453
+ click.echo(f"{status_icon} {model.name} (ID: {model.id})")
1454
+ click.echo(f" Type: {model.model_type}")
1455
+ click.echo(f" Path: {model.model_path}")
1456
+ if model.tokenizer_path:
1457
+ click.echo(f" Tokenizer: {model.tokenizer_path}")
1458
+ click.echo(f" Device: {model.device}")
1459
+ click.echo(f" Loaded: {'Yes' if model.is_loaded else 'No'}")
1460
+ if model.is_loaded:
1461
+ click.echo(f" Memory: {model.memory_usage_mb:.1f} MB")
1462
+ click.echo(f" Parameters: {model.parameters_count:,}")
1463
+ click.echo(f" Created: {model.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
1464
+ click.echo()
1465
+
1466
+ except Exception as e:
1467
+ click.echo(f"āŒ Error listing models: {e}")
1468
+
1469
+
1470
+ @model_service.command()
1471
+ @click.argument("model_path")
1472
+ @click.option("--name", required=True, help="Model name")
1473
+ @click.option(
1474
+ "--type",
1475
+ "model_type",
1476
+ required=True,
1477
+ help="Model type (text-generation, text-classification, translation)",
1478
+ )
1479
+ @click.option("--tokenizer-path", help="Path to tokenizer (optional)")
1480
+ @click.option("--device", default="auto", help="Device to use (cpu, cuda, auto)")
1481
+ def add_model(
1482
+ model_path: str, name: str, model_type: str, tokenizer_path: str = str(), device: str = "auto"
1483
+ ):
1484
+ """Add a model to the service"""
1485
+ service = ModelService()
1486
+
1487
+ try:
1488
+ model_info = ModelInfo(
1489
+ id=str(uuid.uuid4()),
1490
+ name=name,
1491
+ model_type=model_type,
1492
+ model_path=model_path,
1493
+ tokenizer_path=tokenizer_path,
1494
+ device=device,
1495
+ )
1496
+
1497
+ # Add to database
1498
+ model_id = service.model_manager.db.add_model(model_info)
1499
+ click.echo(f"āœ… Model '{name}' added with ID: {model_id}")
1500
+
1501
+ # Try to load the model
1502
+ if service.model_manager.load_model(model_info):
1503
+ click.echo(f"āœ… Model '{name}' loaded successfully")
1504
+ else:
1505
+ click.echo(f"āš ļø Model '{name}' added but failed to load")
1506
+
1507
+ except Exception as e:
1508
+ click.echo(f"āŒ Error adding model: {e}")
1509
+
1510
+
1511
+ @model_service.command()
1512
+ @click.argument("model_url")
1513
+ @click.option("--name", required=True, help="Model name")
1514
+ @click.option(
1515
+ "--type",
1516
+ "model_type",
1517
+ required=True,
1518
+ help="Model type (text-generation, text-classification, translation)",
1519
+ )
1520
+ @click.option("--tokenizer-url", help="URL to tokenizer (optional)")
1521
+ @click.option("--device", default="auto", help="Device to use (cpu, cuda, auto)")
1522
+ @click.option("--max-length", default=512, help="Maximum sequence length")
1523
+ @click.option("--temperature", default=0.7, help="Temperature for generation")
1524
+ @click.option("--top-p", default=0.9, help="Top-p for generation")
1525
+ @click.option("--top-k", default=50, help="Top-k for generation")
1526
+ def add_model_from_url(
1527
+ model_url: str,
1528
+ name: str,
1529
+ model_type: str,
1530
+ tokenizer_url: str = str(),
1531
+ device: str = "auto",
1532
+ max_length: int = 512,
1533
+ temperature: float = 0.7,
1534
+ top_p: float = 0.9,
1535
+ top_k: int = 50,
1536
+ ):
1537
+ """Add a model from URL to the service"""
1538
+ service = ModelService()
1539
+
1540
+ try:
1541
+ click.echo(f"🌐 Downloading model from: {model_url}")
1542
+ if tokenizer_url:
1543
+ click.echo(f"🌐 Downloading tokenizer from: {tokenizer_url}")
1544
+
1545
+ model_id = service.model_manager.add_model_from_url(
1546
+ name=name,
1547
+ model_type=model_type,
1548
+ model_url=model_url,
1549
+ tokenizer_url=tokenizer_url,
1550
+ device=device,
1551
+ max_length=max_length,
1552
+ temperature=temperature,
1553
+ top_p=top_p,
1554
+ top_k=top_k,
1555
+ )
1556
+
1557
+ click.echo(f"āœ… Model '{name}' downloaded and added with ID: {model_id}")
1558
+
1559
+ except Exception as e:
1560
+ click.echo(f"āŒ Error adding model from URL: {e}")
1561
+
1562
+
1563
+ @model_service.command()
1564
+ @click.argument("model_id")
1565
+ @click.option("--name", help="New model name")
1566
+ @click.option("--temperature", type=float, help="New temperature value")
1567
+ @click.option("--max-length", type=int, help="New max length value")
1568
+ @click.option("--top-p", type=float, help="New top-p value")
1569
+ @click.option("--top-k", type=int, help="New top-k value")
1570
+ @click.option("--device", help="New device setting")
1571
+ def update_model(
1572
+ model_id: str,
1573
+ name: Optional[str] = None,
1574
+ temperature: Optional[float] = None,
1575
+ max_length: Optional[int] = None,
1576
+ top_p: Optional[float] = None,
1577
+ top_k: Optional[int] = None,
1578
+ device: Optional[str] = None,
1579
+ ):
1580
+ """Update model configuration"""
1581
+ service = ModelService()
1582
+
1583
+ try:
1584
+ # Get current model info
1585
+ model_info = service.model_manager.db.get_model(model_id)
1586
+ if not model_info:
1587
+ click.echo(f"āŒ Model {model_id} not found")
1588
+ return
1589
+
1590
+ # Build updates
1591
+ updates = {}
1592
+ if name is not None:
1593
+ updates["name"] = name
1594
+ if temperature is not None:
1595
+ updates["temperature"] = temperature
1596
+ if max_length is not None:
1597
+ updates["max_length"] = max_length
1598
+ if top_p is not None:
1599
+ updates["top_p"] = top_p
1600
+ if top_k is not None:
1601
+ updates["top_k"] = top_k
1602
+ if device is not None:
1603
+ updates["device"] = device
1604
+
1605
+ if not updates:
1606
+ click.echo("āŒ No updates specified. Use --help to see available options.")
1607
+ return
1608
+
1609
+ # Update model
1610
+ for key, value in updates.items():
1611
+ setattr(model_info, key, value)
1612
+
1613
+ success = service.model_manager.db.update_model(model_info)
1614
+ if success:
1615
+ click.echo(f"āœ… Model {model_id} updated successfully!")
1616
+ click.echo("Updated parameters:")
1617
+ for key, value in updates.items():
1618
+ click.echo(f" {key}: {value}")
1619
+ else:
1620
+ click.echo(f"āŒ Failed to update model {model_id}")
1621
+
1622
+ except Exception as e:
1623
+ click.echo(f"āŒ Error updating model: {e}")
1624
+
1625
+
1626
+ @model_service.command()
1627
+ @click.argument("model_id")
1628
+ @click.option("--force", is_flag=True, help="Force removal without confirmation")
1629
+ def remove_model(model_id: str, force: bool = False):
1630
+ """Remove a model from the service"""
1631
+ service = ModelService()
1632
+
1633
+ try:
1634
+ # Get model info first
1635
+ model_info = service.model_manager.db.get_model(model_id)
1636
+ if not model_info:
1637
+ click.echo(f"āŒ Model {model_id} not found")
1638
+ return
1639
+
1640
+ if not force:
1641
+ click.echo(f"Model to remove:")
1642
+ click.echo(f" Name: {model_info.name}")
1643
+ click.echo(f" Type: {model_info.model_type}")
1644
+ click.echo(f" Path: {model_info.model_path}")
1645
+ click.echo(f" Loaded: {'Yes' if model_info.is_loaded else 'No'}")
1646
+
1647
+ if not click.confirm("Are you sure you want to remove this model?"):
1648
+ click.echo("Operation cancelled.")
1649
+ return
1650
+
1651
+ # First unload if loaded
1652
+ if model_info.is_loaded:
1653
+ service.model_manager.unload_model(model_id)
1654
+ click.echo(f"āœ… Model {model_id} unloaded")
1655
+
1656
+ # Remove from database
1657
+ success = service.model_manager.db.delete_model(model_id)
1658
+ if success:
1659
+ click.echo(f"āœ… Model {model_id} removed successfully!")
1660
+ else:
1661
+ click.echo(f"āŒ Failed to remove model {model_id}")
1662
+
1663
+ except Exception as e:
1664
+ click.echo(f"āŒ Error removing model: {e}")
1665
+
1666
+
1667
+ # Lightweight server commands
1668
+ @model_service.command()
1669
+ @click.option("--list", is_flag=True, help="List available lightweight models")
1670
+ @click.option("--download", help="Download a specific lightweight model")
1671
+ @click.option("--auto", is_flag=True, help="Automatically select best model for your system")
1672
+ @click.option("--start-server", is_flag=True, help="Start the lightweight server")
1673
+ @click.option("--port", default=8080, help="Port for lightweight server")
1674
+ def lightweight(list: bool, download: str, auto: bool, start_server: bool, port: int):
1675
+ """Manage lightweight models and server"""
1676
+ service = ModelService()
1677
+
1678
+ if list:
1679
+ click.echo("šŸš€ Available Lightweight Models:")
1680
+ click.echo("=" * 60)
1681
+
1682
+ for key, info in LIGHTWEIGHT_MODELS.items():
1683
+ status = (
1684
+ "āœ… Downloaded"
1685
+ if key in service.lightweight_server.loaded_models
1686
+ else "ā³ Not downloaded"
1687
+ )
1688
+ click.echo(f"{status} - {info['name']} ({info['parameters']})")
1689
+ click.echo(
1690
+ f" Size: {info['size_mb']} MB | Efficiency: {info['efficiency_score']}/10"
1691
+ )
1692
+ click.echo(f" Type: {info['model_type']} | Tags: {', '.join(info['tags'])}")
1693
+ click.echo()
1694
+ return
1695
+
1696
+ if download:
1697
+ if download not in LIGHTWEIGHT_MODELS:
1698
+ click.echo(f"āŒ Model '{download}' not found")
1699
+ click.echo("Available models:")
1700
+ for key in LIGHTWEIGHT_MODELS.keys():
1701
+ click.echo(f" {key}")
1702
+ return
1703
+
1704
+ click.echo(f"šŸ“„ Downloading {download}...")
1705
+ success = service.lightweight_server.download_and_load_model(download)
1706
+ if success:
1707
+ click.echo(f"āœ… Model '{download}' downloaded successfully!")
1708
+ else:
1709
+ click.echo(f"āŒ Failed to download model '{download}'")
1710
+ return
1711
+
1712
+ if auto:
1713
+ recommended = service.lightweight_server.recommend_model()
1714
+ click.echo(f"šŸŽÆ Recommended model: {recommended}")
1715
+ click.echo(f"šŸ“„ Downloading {recommended}...")
1716
+ success = service.lightweight_server.download_and_load_model(recommended)
1717
+ if success:
1718
+ click.echo(f"āœ… Model '{recommended}' downloaded successfully!")
1719
+ else:
1720
+ click.echo(f"āŒ Failed to download model '{recommended}'")
1721
+ return
1722
+
1723
+ if start_server:
1724
+ click.echo(f"šŸš€ Starting lightweight server on port {port}...")
1725
+ service.lightweight_server.port = port
1726
+ service.lightweight_server.start_server()
1727
+
1728
+ click.echo(f"āœ… Server started!")
1729
+ click.echo(f"🌐 API: http://localhost:{port}")
1730
+ click.echo(f"šŸ“Š Health: http://localhost:{port}/health")
1731
+ click.echo(f"šŸ“‹ Models: http://localhost:{port}/models")
1732
+
1733
+ try:
1734
+ while True:
1735
+ time.sleep(1)
1736
+ except KeyboardInterrupt:
1737
+ click.echo("\nšŸ›‘ Server stopped")
1738
+ return
1739
+
1740
+ # Show help if no options provided
1741
+ click.echo("Lightweight model server commands:")
1742
+ click.echo(" --list List available models")
1743
+ click.echo(" --download MODEL Download a specific model")
1744
+ click.echo(" --auto Download recommended model for your system")
1745
+ click.echo(" --start-server Start the lightweight server")
1746
+
1747
+
1748
+ @model_service.command()
1749
+ @click.option(
1750
+ "--model",
1751
+ type=click.Choice(list(LIGHTWEIGHT_MODELS.keys())),
1752
+ help="Specific model to download and run",
1753
+ )
1754
+ @click.option(
1755
+ "--auto", is_flag=True, default=True, help="Automatically select best model for your system"
1756
+ )
1757
+ @click.option("--port", default=8080, help="Port to run server on")
1758
+ @click.option("--list-models", is_flag=True, help="List available models")
1759
+ @click.option("--download-only", is_flag=True, help="Only download models, don't start server")
1760
+ def lightweight_run(
1761
+ model: Optional[str], auto: bool, port: int, list_models: bool, download_only: bool
1762
+ ):
1763
+ """Run lightweight model server (standalone mode)"""
1764
+ service = ModelService()
1765
+
1766
+ click.echo("šŸš€ MCLI Lightweight Model Server")
1767
+ click.echo("=" * 50)
1768
+
1769
+ if list_models:
1770
+ service.lightweight_server.list_models()
1771
+ return 0
1772
+
1773
+ # Get system info and recommend model
1774
+ if model:
1775
+ selected_model = model
1776
+ click.echo(f"šŸŽÆ Using specified model: {selected_model}")
1777
+ elif auto:
1778
+ selected_model = service.lightweight_server.recommend_model()
1779
+ click.echo(f"šŸŽÆ Recommended model: {selected_model}")
1780
+ else:
1781
+ click.echo("Available models:")
1782
+ for key, info in LIGHTWEIGHT_MODELS.items():
1783
+ click.echo(f" {key}: {info['name']} ({info['parameters']})")
1784
+ selected_model = click.prompt(
1785
+ "Select model", type=click.Choice(list(LIGHTWEIGHT_MODELS.keys()))
1786
+ )
1787
+
1788
+ # Download and load model
1789
+ if not service.lightweight_server.download_and_load_model(selected_model):
1790
+ click.echo("āŒ Failed to download model")
1791
+ return 1
1792
+
1793
+ if download_only:
1794
+ click.echo("āœ… Model downloaded successfully")
1795
+ return 0
1796
+
1797
+ # Start server
1798
+ click.echo(f"\nšŸš€ Starting lightweight server on port {port}...")
1799
+ service.lightweight_server.port = port
1800
+ service.lightweight_server.start_server()
1801
+
1802
+ click.echo(f"\nšŸ“ Usage:")
1803
+ click.echo(f" - API: http://localhost:{port}")
1804
+ click.echo(f" - Health: http://localhost:{port}/health")
1805
+ click.echo(f" - Models: http://localhost:{port}/models")
1806
+
1807
+ try:
1808
+ # Keep server running
1809
+ while True:
1810
+ time.sleep(1)
1811
+ except KeyboardInterrupt:
1812
+ click.echo("\nšŸ›‘ Server stopped")
1813
+
1814
+
1815
+ # PDF processing commands
1816
+ @model_service.command()
1817
+ @click.argument("pdf_path")
1818
+ @click.option("--model", help="Specific model to use for AI analysis")
1819
+ @click.option("--extract-only", is_flag=True, help="Only extract text, no AI analysis")
1820
+ def process_pdf(pdf_path: str, model: str, extract_only: bool):
1821
+ """Process PDF with AI analysis"""
1822
+ service = ModelService()
1823
+
1824
+ try:
1825
+ if extract_only:
1826
+ click.echo(f"šŸ“„ Extracting text from: {pdf_path}")
1827
+ result = service.pdf_processor.extract_text_from_pdf(pdf_path)
1828
+ else:
1829
+ click.echo(f"šŸ¤– Processing PDF with AI: {pdf_path}")
1830
+ if model:
1831
+ click.echo(f"šŸŽÆ Using model: {model}")
1832
+ result = service.pdf_processor.process_pdf_with_ai(pdf_path, model)
1833
+ else:
1834
+ result = service.pdf_processor.process_pdf_with_ai(pdf_path)
1835
+
1836
+ if result.get("success"):
1837
+ if extract_only:
1838
+ click.echo(f"āœ… Text extracted: {result['text_length']} characters")
1839
+ click.echo(f"šŸ“ Preview: {result['text'][:200]}...")
1840
+ else:
1841
+ analysis = result["pdf_analysis"]["ai_analysis"]
1842
+ click.echo(f"āœ… PDF processed successfully!")
1843
+ click.echo(f"šŸ“Š Document type: {analysis['document_type']}")
1844
+ click.echo(f"šŸ“ Summary: {analysis['summary'][:200]}...")
1845
+ click.echo(f"šŸ”‘ Key topics: {', '.join(analysis['key_topics'])}")
1846
+ click.echo(f"šŸ“ˆ Complexity score: {analysis['complexity_score']:.2f}")
1847
+ else:
1848
+ click.echo(f"āŒ Error: {result.get('error', 'Unknown error')}")
1849
+
1850
+ except Exception as e:
1851
+ click.echo(f"āŒ Error processing PDF: {e}")
1852
+
1853
+
1854
+ @model_service.command()
1855
+ @click.option("--port", default=8080, help="Port for PDF processing service")
1856
+ def start_pdf_service(port: int):
1857
+ """Start PDF processing service"""
1858
+ service = ModelService()
1859
+
1860
+ try:
1861
+ click.echo(f"šŸš€ Starting PDF processing service on port {port}...")
1862
+ success = service.pdf_processor.start_pdf_processing_service(port)
1863
+
1864
+ if success:
1865
+ click.echo(f"āœ… PDF processing service started!")
1866
+ click.echo(f"🌐 API: http://localhost:{port}")
1867
+ click.echo(f"šŸ“Š Status: http://localhost:{port}/status")
1868
+
1869
+ try:
1870
+ while True:
1871
+ time.sleep(1)
1872
+ except KeyboardInterrupt:
1873
+ click.echo("\nšŸ›‘ PDF processing service stopped")
1874
+ else:
1875
+ click.echo("āŒ Failed to start PDF processing service")
1876
+
1877
+ except Exception as e:
1878
+ click.echo(f"āŒ Error starting PDF service: {e}")
1879
+
1880
+
1881
+ # Embedding commands
1882
+ @model_service.command()
1883
+ @click.argument("text")
1884
+ @click.option("--method", help="Embedding method (sentence_transformers, tfidf, simple_hash)")
1885
+ def embed_text(text: str, method: str):
1886
+ """Embed text using lightweight embedder"""
1887
+ service = ModelService()
1888
+
1889
+ try:
1890
+ click.echo(f"šŸ”¤ Embedding text: {text[:50]}...")
1891
+ if method:
1892
+ click.echo(f"šŸŽÆ Using method: {method}")
1893
+ result = service.embedder.embed_text(text, method)
1894
+ else:
1895
+ result = service.embedder.embed_text(text)
1896
+
1897
+ if result:
1898
+ click.echo(f"āœ… Text embedded successfully!")
1899
+ click.echo(f"šŸ“Š Method: {result['method']}")
1900
+ click.echo(f"šŸ“ Dimensions: {result['dimensions']}")
1901
+ click.echo(f"šŸ“ Text length: {result['text_length']}")
1902
+ else:
1903
+ click.echo("āŒ Failed to embed text")
1904
+
1905
+ except Exception as e:
1906
+ click.echo(f"āŒ Error embedding text: {e}")
1907
+
1908
+
1909
+ @model_service.command()
1910
+ @click.argument("text")
1911
+ @click.option("--chunk-size", default=1000, help="Chunk size for document embedding")
1912
+ def embed_document(text: str, chunk_size: int):
1913
+ """Embed document using lightweight embedder"""
1914
+ service = ModelService()
1915
+
1916
+ try:
1917
+ click.echo(f"šŸ“„ Embedding document: {text[:50]}...")
1918
+ result = service.embedder.embed_document(text, chunk_size)
1919
+
1920
+ if result.get("success"):
1921
+ doc_embedding = result["document_embedding"]
1922
+ click.echo(f"āœ… Document embedded successfully!")
1923
+ click.echo(f"šŸ“Š Method: {doc_embedding['method']}")
1924
+ click.echo(f"šŸ“„ Total chunks: {doc_embedding['total_chunks']}")
1925
+ click.echo(f"šŸ“ Text length: {doc_embedding['total_text_length']}")
1926
+ else:
1927
+ click.echo(f"āŒ Failed to embed document: {result.get('error', 'Unknown error')}")
1928
+
1929
+ except Exception as e:
1930
+ click.echo(f"āŒ Error embedding document: {e}")
1931
+
1932
+
1933
+ @model_service.command()
1934
+ def embedder_status():
1935
+ """Show embedder status"""
1936
+ service = ModelService()
1937
+
1938
+ try:
1939
+ status = service.embedder.get_status()
1940
+ click.echo("šŸ”¤ Lightweight Embedder Status")
1941
+ click.echo("=" * 40)
1942
+ click.echo(f"Current method: {status['current_method']}")
1943
+ click.echo(f"Models directory: {status['models_dir']}")
1944
+ click.echo(f"Cache size: {status['cache_size']}")
1945
+ click.echo("\nAvailable methods:")
1946
+ for method, available in status["available_methods"].items():
1947
+ status_icon = "āœ…" if available else "āŒ"
1948
+ click.echo(f" {status_icon} {method}")
1949
+
1950
+ except Exception as e:
1951
+ click.echo(f"āŒ Error getting embedder status: {e}")
1952
+
1953
+
1954
+ if __name__ == "__main__":
1955
+ model_service()