mcli-framework 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcli-framework might be problematic. Click here for more details.
- mcli/app/chat_cmd.py +42 -0
- mcli/app/commands_cmd.py +226 -0
- mcli/app/completion_cmd.py +216 -0
- mcli/app/completion_helpers.py +288 -0
- mcli/app/cron_test_cmd.py +697 -0
- mcli/app/logs_cmd.py +419 -0
- mcli/app/main.py +492 -0
- mcli/app/model/model.py +1060 -0
- mcli/app/model_cmd.py +227 -0
- mcli/app/redis_cmd.py +269 -0
- mcli/app/video/video.py +1114 -0
- mcli/app/visual_cmd.py +303 -0
- mcli/chat/chat.py +2409 -0
- mcli/chat/command_rag.py +514 -0
- mcli/chat/enhanced_chat.py +652 -0
- mcli/chat/system_controller.py +1010 -0
- mcli/chat/system_integration.py +1016 -0
- mcli/cli.py +25 -0
- mcli/config.toml +20 -0
- mcli/lib/api/api.py +586 -0
- mcli/lib/api/daemon_client.py +203 -0
- mcli/lib/api/daemon_client_local.py +44 -0
- mcli/lib/api/daemon_decorator.py +217 -0
- mcli/lib/api/mcli_decorators.py +1032 -0
- mcli/lib/auth/auth.py +85 -0
- mcli/lib/auth/aws_manager.py +85 -0
- mcli/lib/auth/azure_manager.py +91 -0
- mcli/lib/auth/credential_manager.py +192 -0
- mcli/lib/auth/gcp_manager.py +93 -0
- mcli/lib/auth/key_manager.py +117 -0
- mcli/lib/auth/mcli_manager.py +93 -0
- mcli/lib/auth/token_manager.py +75 -0
- mcli/lib/auth/token_util.py +1011 -0
- mcli/lib/config/config.py +47 -0
- mcli/lib/discovery/__init__.py +1 -0
- mcli/lib/discovery/command_discovery.py +274 -0
- mcli/lib/erd/erd.py +1345 -0
- mcli/lib/erd/generate_graph.py +453 -0
- mcli/lib/files/files.py +76 -0
- mcli/lib/fs/fs.py +109 -0
- mcli/lib/lib.py +29 -0
- mcli/lib/logger/logger.py +611 -0
- mcli/lib/performance/optimizer.py +409 -0
- mcli/lib/performance/rust_bridge.py +502 -0
- mcli/lib/performance/uvloop_config.py +154 -0
- mcli/lib/pickles/pickles.py +50 -0
- mcli/lib/search/cached_vectorizer.py +479 -0
- mcli/lib/services/data_pipeline.py +460 -0
- mcli/lib/services/lsh_client.py +441 -0
- mcli/lib/services/redis_service.py +387 -0
- mcli/lib/shell/shell.py +137 -0
- mcli/lib/toml/toml.py +33 -0
- mcli/lib/ui/styling.py +47 -0
- mcli/lib/ui/visual_effects.py +634 -0
- mcli/lib/watcher/watcher.py +185 -0
- mcli/ml/api/app.py +215 -0
- mcli/ml/api/middleware.py +224 -0
- mcli/ml/api/routers/admin_router.py +12 -0
- mcli/ml/api/routers/auth_router.py +244 -0
- mcli/ml/api/routers/backtest_router.py +12 -0
- mcli/ml/api/routers/data_router.py +12 -0
- mcli/ml/api/routers/model_router.py +302 -0
- mcli/ml/api/routers/monitoring_router.py +12 -0
- mcli/ml/api/routers/portfolio_router.py +12 -0
- mcli/ml/api/routers/prediction_router.py +267 -0
- mcli/ml/api/routers/trade_router.py +12 -0
- mcli/ml/api/routers/websocket_router.py +76 -0
- mcli/ml/api/schemas.py +64 -0
- mcli/ml/auth/auth_manager.py +425 -0
- mcli/ml/auth/models.py +154 -0
- mcli/ml/auth/permissions.py +302 -0
- mcli/ml/backtesting/backtest_engine.py +502 -0
- mcli/ml/backtesting/performance_metrics.py +393 -0
- mcli/ml/cache.py +400 -0
- mcli/ml/cli/main.py +398 -0
- mcli/ml/config/settings.py +394 -0
- mcli/ml/configs/dvc_config.py +230 -0
- mcli/ml/configs/mlflow_config.py +131 -0
- mcli/ml/configs/mlops_manager.py +293 -0
- mcli/ml/dashboard/app.py +532 -0
- mcli/ml/dashboard/app_integrated.py +738 -0
- mcli/ml/dashboard/app_supabase.py +560 -0
- mcli/ml/dashboard/app_training.py +615 -0
- mcli/ml/dashboard/cli.py +51 -0
- mcli/ml/data_ingestion/api_connectors.py +501 -0
- mcli/ml/data_ingestion/data_pipeline.py +567 -0
- mcli/ml/data_ingestion/stream_processor.py +512 -0
- mcli/ml/database/migrations/env.py +94 -0
- mcli/ml/database/models.py +667 -0
- mcli/ml/database/session.py +200 -0
- mcli/ml/experimentation/ab_testing.py +845 -0
- mcli/ml/features/ensemble_features.py +607 -0
- mcli/ml/features/political_features.py +676 -0
- mcli/ml/features/recommendation_engine.py +809 -0
- mcli/ml/features/stock_features.py +573 -0
- mcli/ml/features/test_feature_engineering.py +346 -0
- mcli/ml/logging.py +85 -0
- mcli/ml/mlops/data_versioning.py +518 -0
- mcli/ml/mlops/experiment_tracker.py +377 -0
- mcli/ml/mlops/model_serving.py +481 -0
- mcli/ml/mlops/pipeline_orchestrator.py +614 -0
- mcli/ml/models/base_models.py +324 -0
- mcli/ml/models/ensemble_models.py +675 -0
- mcli/ml/models/recommendation_models.py +474 -0
- mcli/ml/models/test_models.py +487 -0
- mcli/ml/monitoring/drift_detection.py +676 -0
- mcli/ml/monitoring/metrics.py +45 -0
- mcli/ml/optimization/portfolio_optimizer.py +834 -0
- mcli/ml/preprocessing/data_cleaners.py +451 -0
- mcli/ml/preprocessing/feature_extractors.py +491 -0
- mcli/ml/preprocessing/ml_pipeline.py +382 -0
- mcli/ml/preprocessing/politician_trading_preprocessor.py +569 -0
- mcli/ml/preprocessing/test_preprocessing.py +294 -0
- mcli/ml/scripts/populate_sample_data.py +200 -0
- mcli/ml/tasks.py +400 -0
- mcli/ml/tests/test_integration.py +429 -0
- mcli/ml/tests/test_training_dashboard.py +387 -0
- mcli/public/oi/oi.py +15 -0
- mcli/public/public.py +4 -0
- mcli/self/self_cmd.py +1246 -0
- mcli/workflow/daemon/api_daemon.py +800 -0
- mcli/workflow/daemon/async_command_database.py +681 -0
- mcli/workflow/daemon/async_process_manager.py +591 -0
- mcli/workflow/daemon/client.py +530 -0
- mcli/workflow/daemon/commands.py +1196 -0
- mcli/workflow/daemon/daemon.py +905 -0
- mcli/workflow/daemon/daemon_api.py +59 -0
- mcli/workflow/daemon/enhanced_daemon.py +571 -0
- mcli/workflow/daemon/process_cli.py +244 -0
- mcli/workflow/daemon/process_manager.py +439 -0
- mcli/workflow/daemon/test_daemon.py +275 -0
- mcli/workflow/dashboard/dashboard_cmd.py +113 -0
- mcli/workflow/docker/docker.py +0 -0
- mcli/workflow/file/file.py +100 -0
- mcli/workflow/gcloud/config.toml +21 -0
- mcli/workflow/gcloud/gcloud.py +58 -0
- mcli/workflow/git_commit/ai_service.py +328 -0
- mcli/workflow/git_commit/commands.py +430 -0
- mcli/workflow/lsh_integration.py +355 -0
- mcli/workflow/model_service/client.py +594 -0
- mcli/workflow/model_service/download_and_run_efficient_models.py +288 -0
- mcli/workflow/model_service/lightweight_embedder.py +397 -0
- mcli/workflow/model_service/lightweight_model_server.py +714 -0
- mcli/workflow/model_service/lightweight_test.py +241 -0
- mcli/workflow/model_service/model_service.py +1955 -0
- mcli/workflow/model_service/ollama_efficient_runner.py +425 -0
- mcli/workflow/model_service/pdf_processor.py +386 -0
- mcli/workflow/model_service/test_efficient_runner.py +234 -0
- mcli/workflow/model_service/test_example.py +315 -0
- mcli/workflow/model_service/test_integration.py +131 -0
- mcli/workflow/model_service/test_new_features.py +149 -0
- mcli/workflow/openai/openai.py +99 -0
- mcli/workflow/politician_trading/commands.py +1790 -0
- mcli/workflow/politician_trading/config.py +134 -0
- mcli/workflow/politician_trading/connectivity.py +490 -0
- mcli/workflow/politician_trading/data_sources.py +395 -0
- mcli/workflow/politician_trading/database.py +410 -0
- mcli/workflow/politician_trading/demo.py +248 -0
- mcli/workflow/politician_trading/models.py +165 -0
- mcli/workflow/politician_trading/monitoring.py +413 -0
- mcli/workflow/politician_trading/scrapers.py +966 -0
- mcli/workflow/politician_trading/scrapers_california.py +412 -0
- mcli/workflow/politician_trading/scrapers_eu.py +377 -0
- mcli/workflow/politician_trading/scrapers_uk.py +350 -0
- mcli/workflow/politician_trading/scrapers_us_states.py +438 -0
- mcli/workflow/politician_trading/supabase_functions.py +354 -0
- mcli/workflow/politician_trading/workflow.py +852 -0
- mcli/workflow/registry/registry.py +180 -0
- mcli/workflow/repo/repo.py +223 -0
- mcli/workflow/scheduler/commands.py +493 -0
- mcli/workflow/scheduler/cron_parser.py +238 -0
- mcli/workflow/scheduler/job.py +182 -0
- mcli/workflow/scheduler/monitor.py +139 -0
- mcli/workflow/scheduler/persistence.py +324 -0
- mcli/workflow/scheduler/scheduler.py +679 -0
- mcli/workflow/sync/sync_cmd.py +437 -0
- mcli/workflow/sync/test_cmd.py +314 -0
- mcli/workflow/videos/videos.py +242 -0
- mcli/workflow/wakatime/wakatime.py +11 -0
- mcli/workflow/workflow.py +37 -0
- mcli_framework-7.0.0.dist-info/METADATA +479 -0
- mcli_framework-7.0.0.dist-info/RECORD +186 -0
- mcli_framework-7.0.0.dist-info/WHEEL +5 -0
- mcli_framework-7.0.0.dist-info/entry_points.txt +7 -0
- mcli_framework-7.0.0.dist-info/licenses/LICENSE +21 -0
- mcli_framework-7.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1955 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import base64
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import signal
|
|
8
|
+
import sqlite3
|
|
9
|
+
import sys
|
|
10
|
+
import tempfile
|
|
11
|
+
import threading
|
|
12
|
+
import time
|
|
13
|
+
import urllib.parse
|
|
14
|
+
import uuid
|
|
15
|
+
from contextlib import asynccontextmanager
|
|
16
|
+
from dataclasses import asdict, dataclass
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, List, Optional
|
|
20
|
+
from urllib.parse import urlparse
|
|
21
|
+
|
|
22
|
+
# CLI Commands
|
|
23
|
+
import click
|
|
24
|
+
import numpy as np
|
|
25
|
+
import psutil
|
|
26
|
+
import requests
|
|
27
|
+
|
|
28
|
+
# Model loading and inference
|
|
29
|
+
import torch
|
|
30
|
+
import transformers
|
|
31
|
+
import uvicorn
|
|
32
|
+
|
|
33
|
+
# FastAPI for REST API
|
|
34
|
+
from fastapi import BackgroundTasks, FastAPI, HTTPException
|
|
35
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
36
|
+
from PIL import Image
|
|
37
|
+
from pydantic import BaseModel, Field
|
|
38
|
+
from transformers import AutoModel, AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer
|
|
39
|
+
|
|
40
|
+
# Import existing utilities
|
|
41
|
+
from mcli.lib.logger.logger import get_logger
|
|
42
|
+
from mcli.lib.toml.toml import read_from_toml
|
|
43
|
+
|
|
44
|
+
from .lightweight_embedder import LightweightEmbedder
|
|
45
|
+
|
|
46
|
+
# Import lightweight model server
|
|
47
|
+
from .lightweight_model_server import LIGHTWEIGHT_MODELS, LightweightModelServer
|
|
48
|
+
from .pdf_processor import PDFProcessor
|
|
49
|
+
|
|
50
|
+
logger = get_logger(__name__)
|
|
51
|
+
|
|
52
|
+
# Configuration
|
|
53
|
+
DEFAULT_CONFIG = {
|
|
54
|
+
"host": "0.0.0.0",
|
|
55
|
+
"port": 8000,
|
|
56
|
+
"models_dir": "./models",
|
|
57
|
+
"temp_dir": "./temp",
|
|
58
|
+
"max_concurrent_requests": 4,
|
|
59
|
+
"request_timeout": 300,
|
|
60
|
+
"model_cache_size": 2,
|
|
61
|
+
"enable_cors": True,
|
|
62
|
+
"cors_origins": ["*"],
|
|
63
|
+
"log_level": "INFO",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass
|
|
68
|
+
class ModelInfo:
|
|
69
|
+
"""Represents a loaded model"""
|
|
70
|
+
|
|
71
|
+
id: str
|
|
72
|
+
name: str
|
|
73
|
+
model_type: (
|
|
74
|
+
str # 'text-generation', 'text-classification', 'translation', 'image-generation', etc.
|
|
75
|
+
)
|
|
76
|
+
model_path: str
|
|
77
|
+
tokenizer_path: Optional[str] = None
|
|
78
|
+
device: str = "auto" # 'cpu', 'cuda', 'auto'
|
|
79
|
+
max_length: int = 512
|
|
80
|
+
temperature: float = 0.7
|
|
81
|
+
top_p: float = 0.9
|
|
82
|
+
top_k: int = 50
|
|
83
|
+
is_loaded: bool = False
|
|
84
|
+
memory_usage_mb: float = 0.0
|
|
85
|
+
parameters_count: int = 0
|
|
86
|
+
created_at: datetime = datetime.now() # Do not assign None; let __post_init__ handle default
|
|
87
|
+
|
|
88
|
+
def __post_init__(self):
|
|
89
|
+
if self.created_at is None:
|
|
90
|
+
self.created_at = datetime.now()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ModelDatabase:
|
|
94
|
+
"""Manages model metadata storage"""
|
|
95
|
+
|
|
96
|
+
def __init__(self, db_path: Optional[str] = None):
|
|
97
|
+
if db_path is None:
|
|
98
|
+
db_path = str(Path.home() / ".local" / "mcli" / "model_service" / "models.db")
|
|
99
|
+
else:
|
|
100
|
+
db_path = str(db_path)
|
|
101
|
+
self.db_path = db_path
|
|
102
|
+
Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
|
|
103
|
+
self.init_database()
|
|
104
|
+
|
|
105
|
+
def init_database(self):
|
|
106
|
+
"""Initialize SQLite database"""
|
|
107
|
+
conn = sqlite3.connect(self.db_path)
|
|
108
|
+
cursor = conn.cursor()
|
|
109
|
+
|
|
110
|
+
# Models table
|
|
111
|
+
cursor.execute(
|
|
112
|
+
"""
|
|
113
|
+
CREATE TABLE IF NOT EXISTS models (
|
|
114
|
+
id TEXT PRIMARY KEY,
|
|
115
|
+
name TEXT NOT NULL,
|
|
116
|
+
model_type TEXT NOT NULL,
|
|
117
|
+
model_path TEXT NOT NULL,
|
|
118
|
+
tokenizer_path TEXT,
|
|
119
|
+
device TEXT DEFAULT 'auto',
|
|
120
|
+
max_length INTEGER DEFAULT 512,
|
|
121
|
+
temperature REAL DEFAULT 0.7,
|
|
122
|
+
top_p REAL DEFAULT 0.9,
|
|
123
|
+
top_k INTEGER DEFAULT 50,
|
|
124
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
125
|
+
is_loaded BOOLEAN DEFAULT 0,
|
|
126
|
+
memory_usage_mb REAL DEFAULT 0.0,
|
|
127
|
+
parameters_count INTEGER DEFAULT 0
|
|
128
|
+
)
|
|
129
|
+
"""
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Inference history
|
|
133
|
+
cursor.execute(
|
|
134
|
+
"""
|
|
135
|
+
CREATE TABLE IF NOT EXISTS inferences (
|
|
136
|
+
id TEXT PRIMARY KEY,
|
|
137
|
+
model_id TEXT NOT NULL,
|
|
138
|
+
request_type TEXT NOT NULL,
|
|
139
|
+
input_data TEXT,
|
|
140
|
+
output_data TEXT,
|
|
141
|
+
execution_time_ms INTEGER,
|
|
142
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
143
|
+
error_message TEXT,
|
|
144
|
+
FOREIGN KEY (model_id) REFERENCES models (id)
|
|
145
|
+
)
|
|
146
|
+
"""
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
conn.commit()
|
|
150
|
+
conn.close()
|
|
151
|
+
|
|
152
|
+
def add_model(self, model_info: ModelInfo) -> str:
|
|
153
|
+
"""Add a new model to the database"""
|
|
154
|
+
conn = sqlite3.connect(self.db_path)
|
|
155
|
+
cursor = conn.cursor()
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
cursor.execute(
|
|
159
|
+
"""
|
|
160
|
+
INSERT INTO models
|
|
161
|
+
(id, name, model_type, model_path, tokenizer_path, device,
|
|
162
|
+
max_length, temperature, top_p, top_k, created_at, is_loaded,
|
|
163
|
+
memory_usage_mb, parameters_count)
|
|
164
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
165
|
+
""",
|
|
166
|
+
(
|
|
167
|
+
model_info.id,
|
|
168
|
+
model_info.name,
|
|
169
|
+
model_info.model_type,
|
|
170
|
+
model_info.model_path,
|
|
171
|
+
model_info.tokenizer_path,
|
|
172
|
+
model_info.device,
|
|
173
|
+
model_info.max_length,
|
|
174
|
+
model_info.temperature,
|
|
175
|
+
model_info.top_p,
|
|
176
|
+
model_info.top_k,
|
|
177
|
+
model_info.created_at.isoformat(),
|
|
178
|
+
model_info.is_loaded,
|
|
179
|
+
model_info.memory_usage_mb,
|
|
180
|
+
model_info.parameters_count,
|
|
181
|
+
),
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
conn.commit()
|
|
185
|
+
return model_info.id
|
|
186
|
+
|
|
187
|
+
except Exception as e:
|
|
188
|
+
logger.error(f"Error adding model: {e}")
|
|
189
|
+
conn.rollback()
|
|
190
|
+
raise
|
|
191
|
+
finally:
|
|
192
|
+
conn.close()
|
|
193
|
+
|
|
194
|
+
def get_model(self, model_id: str) -> Optional[ModelInfo]:
|
|
195
|
+
"""Get a model by ID"""
|
|
196
|
+
conn = sqlite3.connect(self.db_path)
|
|
197
|
+
cursor = conn.cursor()
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
cursor.execute(
|
|
201
|
+
"""
|
|
202
|
+
SELECT id, name, model_type, model_path, tokenizer_path, device,
|
|
203
|
+
max_length, temperature, top_p, top_k, created_at, is_loaded,
|
|
204
|
+
memory_usage_mb, parameters_count
|
|
205
|
+
FROM models WHERE id = ?
|
|
206
|
+
""",
|
|
207
|
+
(model_id,),
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
row = cursor.fetchone()
|
|
211
|
+
if row:
|
|
212
|
+
return self._row_to_model_info(row)
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
finally:
|
|
216
|
+
conn.close()
|
|
217
|
+
|
|
218
|
+
def get_all_models(self) -> List[ModelInfo]:
|
|
219
|
+
"""Get all models"""
|
|
220
|
+
conn = sqlite3.connect(self.db_path)
|
|
221
|
+
cursor = conn.cursor()
|
|
222
|
+
|
|
223
|
+
try:
|
|
224
|
+
cursor.execute(
|
|
225
|
+
"""
|
|
226
|
+
SELECT id, name, model_type, model_path, tokenizer_path, device,
|
|
227
|
+
max_length, temperature, top_p, top_k, created_at, is_loaded,
|
|
228
|
+
memory_usage_mb, parameters_count
|
|
229
|
+
FROM models ORDER BY name
|
|
230
|
+
"""
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return [self._row_to_model_info(row) for row in cursor.fetchall()]
|
|
234
|
+
|
|
235
|
+
finally:
|
|
236
|
+
conn.close()
|
|
237
|
+
|
|
238
|
+
def update_model(self, model_info: ModelInfo) -> bool:
|
|
239
|
+
"""Update model information"""
|
|
240
|
+
conn = sqlite3.connect(self.db_path)
|
|
241
|
+
cursor = conn.cursor()
|
|
242
|
+
|
|
243
|
+
try:
|
|
244
|
+
cursor.execute(
|
|
245
|
+
"""
|
|
246
|
+
UPDATE models
|
|
247
|
+
SET name = ?, model_type = ?, model_path = ?, tokenizer_path = ?,
|
|
248
|
+
device = ?, max_length = ?, temperature = ?, top_p = ?, top_k = ?,
|
|
249
|
+
is_loaded = ?, memory_usage_mb = ?, parameters_count = ?
|
|
250
|
+
WHERE id = ?
|
|
251
|
+
""",
|
|
252
|
+
(
|
|
253
|
+
model_info.name,
|
|
254
|
+
model_info.model_type,
|
|
255
|
+
model_info.model_path,
|
|
256
|
+
model_info.tokenizer_path,
|
|
257
|
+
model_info.device,
|
|
258
|
+
model_info.max_length,
|
|
259
|
+
model_info.temperature,
|
|
260
|
+
model_info.top_p,
|
|
261
|
+
model_info.top_k,
|
|
262
|
+
model_info.is_loaded,
|
|
263
|
+
model_info.memory_usage_mb,
|
|
264
|
+
model_info.parameters_count,
|
|
265
|
+
model_info.id,
|
|
266
|
+
),
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
conn.commit()
|
|
270
|
+
return cursor.rowcount > 0
|
|
271
|
+
|
|
272
|
+
except Exception as e:
|
|
273
|
+
logger.error(f"Error updating model: {e}")
|
|
274
|
+
conn.rollback()
|
|
275
|
+
return False
|
|
276
|
+
finally:
|
|
277
|
+
conn.close()
|
|
278
|
+
|
|
279
|
+
def delete_model(self, model_id: str) -> bool:
|
|
280
|
+
"""Delete a model"""
|
|
281
|
+
conn = sqlite3.connect(self.db_path)
|
|
282
|
+
cursor = conn.cursor()
|
|
283
|
+
|
|
284
|
+
try:
|
|
285
|
+
cursor.execute("DELETE FROM models WHERE id = ?", (model_id,))
|
|
286
|
+
conn.commit()
|
|
287
|
+
return cursor.rowcount > 0
|
|
288
|
+
|
|
289
|
+
except Exception as e:
|
|
290
|
+
logger.error(f"Error deleting model: {e}")
|
|
291
|
+
conn.rollback()
|
|
292
|
+
return False
|
|
293
|
+
finally:
|
|
294
|
+
conn.close()
|
|
295
|
+
|
|
296
|
+
def record_inference(
|
|
297
|
+
self,
|
|
298
|
+
model_id: str,
|
|
299
|
+
request_type: str,
|
|
300
|
+
input_data: str = str(),
|
|
301
|
+
output_data: str = str(),
|
|
302
|
+
execution_time_ms: int = int(),
|
|
303
|
+
error_message: str = str(),
|
|
304
|
+
):
|
|
305
|
+
"""Record inference request"""
|
|
306
|
+
conn = sqlite3.connect(self.db_path)
|
|
307
|
+
cursor = conn.cursor()
|
|
308
|
+
|
|
309
|
+
try:
|
|
310
|
+
inference_id = str(uuid.uuid4())
|
|
311
|
+
cursor.execute(
|
|
312
|
+
"""
|
|
313
|
+
INSERT INTO inferences
|
|
314
|
+
(id, model_id, request_type, input_data, output_data,
|
|
315
|
+
execution_time_ms, error_message)
|
|
316
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
317
|
+
""",
|
|
318
|
+
(
|
|
319
|
+
inference_id,
|
|
320
|
+
model_id,
|
|
321
|
+
request_type,
|
|
322
|
+
input_data,
|
|
323
|
+
output_data,
|
|
324
|
+
execution_time_ms,
|
|
325
|
+
error_message,
|
|
326
|
+
),
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
conn.commit()
|
|
330
|
+
|
|
331
|
+
except Exception as e:
|
|
332
|
+
logger.error(f"Error recording inference: {e}")
|
|
333
|
+
conn.rollback()
|
|
334
|
+
finally:
|
|
335
|
+
conn.close()
|
|
336
|
+
|
|
337
|
+
def _row_to_model_info(self, row) -> ModelInfo:
|
|
338
|
+
"""Convert database row to ModelInfo object"""
|
|
339
|
+
return ModelInfo(
|
|
340
|
+
id=row[0],
|
|
341
|
+
name=row[1],
|
|
342
|
+
model_type=row[2],
|
|
343
|
+
model_path=row[3],
|
|
344
|
+
tokenizer_path=row[4],
|
|
345
|
+
device=row[5],
|
|
346
|
+
max_length=row[6],
|
|
347
|
+
temperature=row[7],
|
|
348
|
+
top_p=row[8],
|
|
349
|
+
top_k=row[9],
|
|
350
|
+
created_at=datetime.fromisoformat(row[10]),
|
|
351
|
+
is_loaded=bool(row[11]),
|
|
352
|
+
memory_usage_mb=row[12],
|
|
353
|
+
parameters_count=row[13],
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
class ModelManager:
|
|
358
|
+
"""Manages model loading, caching, and inference"""
|
|
359
|
+
|
|
360
|
+
def __init__(self, models_dir: str = "./models", max_cache_size: int = 2):
|
|
361
|
+
self.models_dir = Path(models_dir)
|
|
362
|
+
self.models_dir.mkdir(parents=True, exist_ok=True)
|
|
363
|
+
self.max_cache_size = max_cache_size
|
|
364
|
+
self.loaded_models: Dict[str, Any] = {}
|
|
365
|
+
self.model_lock = threading.Lock()
|
|
366
|
+
self.db = ModelDatabase()
|
|
367
|
+
|
|
368
|
+
# Device detection
|
|
369
|
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
370
|
+
logger.info(f"Using device: {self.device}")
|
|
371
|
+
|
|
372
|
+
def load_model(self, model_info: ModelInfo) -> bool:
|
|
373
|
+
"""Load a model into memory"""
|
|
374
|
+
with self.model_lock:
|
|
375
|
+
try:
|
|
376
|
+
logger.info(f"Loading model: {model_info.name}")
|
|
377
|
+
|
|
378
|
+
# Check if model is already loaded
|
|
379
|
+
if model_info.id in self.loaded_models:
|
|
380
|
+
logger.info(f"Model {model_info.name} already loaded")
|
|
381
|
+
return True
|
|
382
|
+
|
|
383
|
+
# Manage cache size
|
|
384
|
+
if len(self.loaded_models) >= self.max_cache_size:
|
|
385
|
+
self._evict_oldest_model()
|
|
386
|
+
|
|
387
|
+
# Load model based on type
|
|
388
|
+
if model_info.model_type == "text-generation":
|
|
389
|
+
model, tokenizer = self._load_text_generation_model(model_info)
|
|
390
|
+
elif model_info.model_type == "text-classification":
|
|
391
|
+
model, tokenizer = self._load_text_classification_model(model_info)
|
|
392
|
+
elif model_info.model_type == "translation":
|
|
393
|
+
model, tokenizer = self._load_translation_model(model_info)
|
|
394
|
+
elif model_info.model_type == "image-generation":
|
|
395
|
+
model, tokenizer = self._load_image_generation_model(model_info)
|
|
396
|
+
else:
|
|
397
|
+
raise ValueError(f"Unsupported model type: {model_info.model_type}")
|
|
398
|
+
|
|
399
|
+
# Store loaded model
|
|
400
|
+
self.loaded_models[model_info.id] = {
|
|
401
|
+
"model": model,
|
|
402
|
+
"tokenizer": tokenizer,
|
|
403
|
+
"model_info": model_info,
|
|
404
|
+
"loaded_at": datetime.now(),
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
# Update model info
|
|
408
|
+
model_info.is_loaded = True
|
|
409
|
+
model_info.memory_usage_mb = self._get_model_memory_usage(model)
|
|
410
|
+
model_info.parameters_count = sum(p.numel() for p in model.parameters())
|
|
411
|
+
self.db.update_model(model_info)
|
|
412
|
+
|
|
413
|
+
logger.info(f"Successfully loaded model: {model_info.name}")
|
|
414
|
+
return True
|
|
415
|
+
|
|
416
|
+
except Exception as e:
|
|
417
|
+
logger.error(f"Error loading model {model_info.name}: {e}")
|
|
418
|
+
return False
|
|
419
|
+
|
|
420
|
+
def unload_model(self, model_id: str) -> bool:
|
|
421
|
+
"""Unload a model from memory"""
|
|
422
|
+
with self.model_lock:
|
|
423
|
+
if model_id in self.loaded_models:
|
|
424
|
+
del self.loaded_models[model_id]
|
|
425
|
+
|
|
426
|
+
# Update model info
|
|
427
|
+
model_info = self.db.get_model(model_id)
|
|
428
|
+
if model_info:
|
|
429
|
+
model_info.is_loaded = False
|
|
430
|
+
model_info.memory_usage_mb = 0.0
|
|
431
|
+
self.db.update_model(model_info)
|
|
432
|
+
|
|
433
|
+
logger.info(f"Unloaded model: {model_id}")
|
|
434
|
+
return True
|
|
435
|
+
return False
|
|
436
|
+
|
|
437
|
+
def _load_text_generation_model(self, model_info: ModelInfo):
|
|
438
|
+
"""Load a text generation model"""
|
|
439
|
+
tokenizer = AutoTokenizer.from_pretrained(
|
|
440
|
+
model_info.tokenizer_path or model_info.model_path, trust_remote_code=True
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
model = AutoModelForCausalLM.from_pretrained(
|
|
444
|
+
model_info.model_path,
|
|
445
|
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
|
446
|
+
device_map="auto" if self.device == "cuda" else None,
|
|
447
|
+
trust_remote_code=True,
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
if self.device == "cpu":
|
|
451
|
+
model = model.to(self.device)
|
|
452
|
+
|
|
453
|
+
return model, tokenizer
|
|
454
|
+
|
|
455
|
+
def _load_text_classification_model(self, model_info: ModelInfo):
|
|
456
|
+
"""Load a text classification model"""
|
|
457
|
+
tokenizer = AutoTokenizer.from_pretrained(
|
|
458
|
+
model_info.tokenizer_path or model_info.model_path
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
model = AutoModel.from_pretrained(
|
|
462
|
+
model_info.model_path,
|
|
463
|
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
|
464
|
+
device_map="auto" if self.device == "cuda" else None,
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
if self.device == "cpu":
|
|
468
|
+
model = model.to(self.device)
|
|
469
|
+
|
|
470
|
+
return model, tokenizer
|
|
471
|
+
|
|
472
|
+
def _load_translation_model(self, model_info: ModelInfo):
|
|
473
|
+
"""Load a translation model"""
|
|
474
|
+
tokenizer = AutoTokenizer.from_pretrained(
|
|
475
|
+
model_info.tokenizer_path or model_info.model_path
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
model = AutoModelForSeq2SeqLM.from_pretrained(
|
|
479
|
+
model_info.model_path,
|
|
480
|
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
|
481
|
+
device_map="auto" if self.device == "cuda" else None,
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
if self.device == "cpu":
|
|
485
|
+
model = model.to(self.device)
|
|
486
|
+
|
|
487
|
+
return model, tokenizer
|
|
488
|
+
|
|
489
|
+
def _load_image_generation_model(self, model_info: ModelInfo):
|
|
490
|
+
"""Load an image generation model (placeholder)"""
|
|
491
|
+
# This would be implemented based on specific image generation frameworks
|
|
492
|
+
# like Stable Diffusion, DALL-E, etc.
|
|
493
|
+
raise NotImplementedError("Image generation models not yet implemented")
|
|
494
|
+
|
|
495
|
+
def _evict_oldest_model(self):
|
|
496
|
+
"""Evict the oldest loaded model from cache"""
|
|
497
|
+
if not self.loaded_models:
|
|
498
|
+
return
|
|
499
|
+
|
|
500
|
+
oldest_id = min(self.loaded_models.keys(), key=lambda k: self.loaded_models[k]["loaded_at"])
|
|
501
|
+
self.unload_model(oldest_id)
|
|
502
|
+
|
|
503
|
+
def _get_model_memory_usage(self, model) -> float:
|
|
504
|
+
"""Get model memory usage in MB"""
|
|
505
|
+
try:
|
|
506
|
+
if self.device == "cuda":
|
|
507
|
+
return torch.cuda.memory_allocated() / 1024 / 1024
|
|
508
|
+
else:
|
|
509
|
+
# Rough estimation for CPU
|
|
510
|
+
total_params = sum(p.numel() for p in model.parameters())
|
|
511
|
+
return total_params * 4 / 1024 / 1024 # 4 bytes per float32
|
|
512
|
+
except:
|
|
513
|
+
return 0.0
|
|
514
|
+
|
|
515
|
+
def generate_text(
|
|
516
|
+
self,
|
|
517
|
+
model_id: str,
|
|
518
|
+
prompt: str,
|
|
519
|
+
max_length: int = int(),
|
|
520
|
+
temperature: float = float(),
|
|
521
|
+
top_p: float = float(),
|
|
522
|
+
top_k: int = int(),
|
|
523
|
+
) -> str:
|
|
524
|
+
"""Generate text using a loaded model"""
|
|
525
|
+
if model_id not in self.loaded_models:
|
|
526
|
+
raise ValueError(f"Model {model_id} not loaded")
|
|
527
|
+
|
|
528
|
+
model_data = self.loaded_models[model_id]
|
|
529
|
+
model = model_data["model"]
|
|
530
|
+
tokenizer = model_data["tokenizer"]
|
|
531
|
+
model_info = model_data["model_info"]
|
|
532
|
+
|
|
533
|
+
# Use provided parameters or defaults
|
|
534
|
+
max_length = max_length or model_info.max_length
|
|
535
|
+
temperature = temperature or model_info.temperature
|
|
536
|
+
top_p = top_p or model_info.top_p
|
|
537
|
+
top_k = top_k or model_info.top_k
|
|
538
|
+
|
|
539
|
+
try:
|
|
540
|
+
# Tokenize input
|
|
541
|
+
inputs = tokenizer(prompt, return_tensors="pt")
|
|
542
|
+
if self.device == "cuda":
|
|
543
|
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
|
544
|
+
|
|
545
|
+
# Generate
|
|
546
|
+
with torch.no_grad():
|
|
547
|
+
outputs = model.generate(
|
|
548
|
+
**inputs,
|
|
549
|
+
max_length=max_length,
|
|
550
|
+
temperature=temperature,
|
|
551
|
+
top_p=top_p,
|
|
552
|
+
top_k=top_k,
|
|
553
|
+
do_sample=True,
|
|
554
|
+
pad_token_id=tokenizer.eos_token_id,
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
# Decode output
|
|
558
|
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
559
|
+
|
|
560
|
+
# Remove input prompt from output
|
|
561
|
+
if generated_text.startswith(prompt):
|
|
562
|
+
generated_text = generated_text[len(prompt) :].strip()
|
|
563
|
+
|
|
564
|
+
return generated_text
|
|
565
|
+
|
|
566
|
+
except Exception as e:
|
|
567
|
+
logger.error(f"Error generating text: {e}")
|
|
568
|
+
raise
|
|
569
|
+
|
|
570
|
+
def classify_text(self, model_id: str, text: str) -> Dict[str, float]:
|
|
571
|
+
"""Classify text using a loaded model"""
|
|
572
|
+
if model_id not in self.loaded_models:
|
|
573
|
+
raise ValueError(f"Model {model_id} not loaded")
|
|
574
|
+
|
|
575
|
+
model_data = self.loaded_models[model_id]
|
|
576
|
+
model = model_data["model"]
|
|
577
|
+
tokenizer = model_data["tokenizer"]
|
|
578
|
+
|
|
579
|
+
try:
|
|
580
|
+
# Tokenize input
|
|
581
|
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
|
582
|
+
if self.device == "cuda":
|
|
583
|
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
|
584
|
+
|
|
585
|
+
# Get predictions
|
|
586
|
+
with torch.no_grad():
|
|
587
|
+
outputs = model(**inputs)
|
|
588
|
+
logits = outputs.logits
|
|
589
|
+
probabilities = torch.softmax(logits, dim=-1)
|
|
590
|
+
|
|
591
|
+
# Convert to dictionary
|
|
592
|
+
probs = probabilities[0].cpu().numpy()
|
|
593
|
+
return {f"class_{i}": float(prob) for i, prob in enumerate(probs)}
|
|
594
|
+
|
|
595
|
+
except Exception as e:
|
|
596
|
+
logger.error(f"Error classifying text: {e}")
|
|
597
|
+
raise
|
|
598
|
+
|
|
599
|
+
def translate_text(
|
|
600
|
+
self, model_id: str, text: str, source_lang: str = "en", target_lang: str = "fr"
|
|
601
|
+
) -> str:
|
|
602
|
+
"""Translate text using a loaded model"""
|
|
603
|
+
if model_id not in self.loaded_models:
|
|
604
|
+
raise ValueError(f"Model {model_id} not loaded")
|
|
605
|
+
|
|
606
|
+
model_data = self.loaded_models[model_id]
|
|
607
|
+
model = model_data["model"]
|
|
608
|
+
tokenizer = model_data["tokenizer"]
|
|
609
|
+
|
|
610
|
+
try:
|
|
611
|
+
# Prepare input
|
|
612
|
+
if hasattr(tokenizer, "lang_code_to_token"):
|
|
613
|
+
# For models like mBART
|
|
614
|
+
inputs = tokenizer(text, return_tensors="pt")
|
|
615
|
+
inputs["labels"] = tokenizer(f"{target_lang} {text}", return_tensors="pt").input_ids
|
|
616
|
+
else:
|
|
617
|
+
# For other translation models
|
|
618
|
+
inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
|
|
619
|
+
|
|
620
|
+
if self.device == "cuda":
|
|
621
|
+
inputs = {k: v.to(self.device) for k, v in inputs.items()}
|
|
622
|
+
|
|
623
|
+
# Generate translation
|
|
624
|
+
with torch.no_grad():
|
|
625
|
+
outputs = model.generate(**inputs, max_length=512)
|
|
626
|
+
|
|
627
|
+
# Decode output
|
|
628
|
+
translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
629
|
+
return translated_text
|
|
630
|
+
|
|
631
|
+
except Exception as e:
|
|
632
|
+
logger.error(f"Error translating text: {e}")
|
|
633
|
+
raise
|
|
634
|
+
|
|
635
|
+
def download_model_from_url(
|
|
636
|
+
self, model_url: str, tokenizer_url: Optional[str] = None
|
|
637
|
+
) -> tuple[str, Optional[str]]:
|
|
638
|
+
"""Download model and tokenizer from URLs and return local paths"""
|
|
639
|
+
try:
|
|
640
|
+
# Parse URLs
|
|
641
|
+
model_parsed = urlparse(model_url)
|
|
642
|
+
model_filename = os.path.basename(model_parsed.path) or "model"
|
|
643
|
+
|
|
644
|
+
# Create model directory
|
|
645
|
+
model_dir = self.models_dir / model_filename
|
|
646
|
+
model_dir.mkdir(parents=True, exist_ok=True)
|
|
647
|
+
|
|
648
|
+
# Download model
|
|
649
|
+
logger.info(f"Downloading model from: {model_url}")
|
|
650
|
+
model_response = requests.get(model_url, stream=True)
|
|
651
|
+
model_response.raise_for_status()
|
|
652
|
+
|
|
653
|
+
model_path = model_dir / "model"
|
|
654
|
+
with open(model_path, "wb") as f:
|
|
655
|
+
for chunk in model_response.iter_content(chunk_size=8192):
|
|
656
|
+
f.write(chunk)
|
|
657
|
+
|
|
658
|
+
# Download tokenizer if provided
|
|
659
|
+
tokenizer_path = None
|
|
660
|
+
if tokenizer_url:
|
|
661
|
+
logger.info(f"Downloading tokenizer from: {tokenizer_url}")
|
|
662
|
+
tokenizer_response = requests.get(tokenizer_url, stream=True)
|
|
663
|
+
tokenizer_response.raise_for_status()
|
|
664
|
+
|
|
665
|
+
tokenizer_path = model_dir / "tokenizer"
|
|
666
|
+
with open(tokenizer_path, "wb") as f:
|
|
667
|
+
for chunk in tokenizer_response.iter_content(chunk_size=8192):
|
|
668
|
+
f.write(chunk)
|
|
669
|
+
|
|
670
|
+
logger.info(f"Model downloaded to: {model_path}")
|
|
671
|
+
return str(model_path), str(tokenizer_path) if tokenizer_path else None
|
|
672
|
+
|
|
673
|
+
except Exception as e:
|
|
674
|
+
logger.error(f"Error downloading model from URL: {e}")
|
|
675
|
+
raise
|
|
676
|
+
|
|
677
|
+
def add_model_from_url(
|
|
678
|
+
self,
|
|
679
|
+
name: str,
|
|
680
|
+
model_type: str,
|
|
681
|
+
model_url: str,
|
|
682
|
+
tokenizer_url: Optional[str] = None,
|
|
683
|
+
device: str = "auto",
|
|
684
|
+
max_length: int = 512,
|
|
685
|
+
temperature: float = 0.7,
|
|
686
|
+
top_p: float = 0.9,
|
|
687
|
+
top_k: int = 50,
|
|
688
|
+
) -> str:
|
|
689
|
+
"""Add a model from URL by downloading it first"""
|
|
690
|
+
try:
|
|
691
|
+
# Download model and tokenizer
|
|
692
|
+
model_path, tokenizer_path = self.download_model_from_url(model_url, tokenizer_url)
|
|
693
|
+
|
|
694
|
+
# Create model info
|
|
695
|
+
model_info = ModelInfo(
|
|
696
|
+
id=str(uuid.uuid4()),
|
|
697
|
+
name=name,
|
|
698
|
+
model_type=model_type,
|
|
699
|
+
model_path=model_path,
|
|
700
|
+
tokenizer_path=tokenizer_path,
|
|
701
|
+
device=device,
|
|
702
|
+
max_length=max_length,
|
|
703
|
+
temperature=temperature,
|
|
704
|
+
top_p=top_p,
|
|
705
|
+
top_k=top_k,
|
|
706
|
+
)
|
|
707
|
+
|
|
708
|
+
# Add to database
|
|
709
|
+
model_id = self.db.add_model(model_info)
|
|
710
|
+
|
|
711
|
+
# Try to load the model
|
|
712
|
+
if self.load_model(model_info):
|
|
713
|
+
logger.info(f"Successfully added and loaded model from URL: {name}")
|
|
714
|
+
else:
|
|
715
|
+
logger.warning(f"Model added from URL but failed to load: {name}")
|
|
716
|
+
|
|
717
|
+
return model_id
|
|
718
|
+
|
|
719
|
+
except Exception as e:
|
|
720
|
+
logger.error(f"Error adding model from URL: {e}")
|
|
721
|
+
raise
|
|
722
|
+
|
|
723
|
+
def get_models_summary(self) -> Dict[str, Any]:
|
|
724
|
+
"""Get a summary of all models with statistics"""
|
|
725
|
+
models = self.db.get_all_models()
|
|
726
|
+
|
|
727
|
+
summary = {
|
|
728
|
+
"total_models": len(models),
|
|
729
|
+
"loaded_models": len([m for m in models if m.is_loaded]),
|
|
730
|
+
"total_memory_mb": sum(m.memory_usage_mb for m in models if m.is_loaded),
|
|
731
|
+
"models_by_type": {},
|
|
732
|
+
"models": [],
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
for model in models:
|
|
736
|
+
# Add to type statistics
|
|
737
|
+
model_type = model.model_type
|
|
738
|
+
if model_type not in summary["models_by_type"]:
|
|
739
|
+
summary["models_by_type"][model_type] = {"count": 0, "loaded": 0, "memory_mb": 0.0}
|
|
740
|
+
summary["models_by_type"][model_type]["count"] += 1
|
|
741
|
+
if model.is_loaded:
|
|
742
|
+
summary["models_by_type"][model_type]["loaded"] += 1
|
|
743
|
+
summary["models_by_type"][model_type]["memory_mb"] += model.memory_usage_mb
|
|
744
|
+
|
|
745
|
+
# Add model details
|
|
746
|
+
summary["models"].append(
|
|
747
|
+
{
|
|
748
|
+
"id": model.id,
|
|
749
|
+
"name": model.name,
|
|
750
|
+
"type": model.model_type,
|
|
751
|
+
"loaded": model.is_loaded,
|
|
752
|
+
"memory_mb": model.memory_usage_mb,
|
|
753
|
+
"parameters_count": model.parameters_count,
|
|
754
|
+
"created_at": model.created_at.isoformat(),
|
|
755
|
+
}
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
return summary
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
# Pydantic models for API
|
|
762
|
+
class ModelLoadRequest(BaseModel):
|
|
763
|
+
name: str
|
|
764
|
+
model_type: str
|
|
765
|
+
model_path: str
|
|
766
|
+
tokenizer_path: Optional[str] = None
|
|
767
|
+
device: str = "auto"
|
|
768
|
+
max_length: int = 512
|
|
769
|
+
temperature: float = 0.7
|
|
770
|
+
top_p: float = 0.9
|
|
771
|
+
top_k: int = 50
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
class ModelLoadFromUrlRequest(BaseModel):
|
|
775
|
+
name: str
|
|
776
|
+
model_type: str
|
|
777
|
+
model_url: str
|
|
778
|
+
tokenizer_url: Optional[str] = None
|
|
779
|
+
device: str = "auto"
|
|
780
|
+
max_length: int = 512
|
|
781
|
+
temperature: float = 0.7
|
|
782
|
+
top_p: float = 0.9
|
|
783
|
+
top_k: int = 50
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
class TextGenerationRequest(BaseModel):
|
|
787
|
+
prompt: str
|
|
788
|
+
max_length: Optional[int] = None
|
|
789
|
+
temperature: Optional[float] = None
|
|
790
|
+
top_p: Optional[float] = None
|
|
791
|
+
top_k: Optional[int] = None
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
class TextClassificationRequest(BaseModel):
|
|
795
|
+
text: str
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
class TranslationRequest(BaseModel):
|
|
799
|
+
text: str
|
|
800
|
+
source_lang: str = "en"
|
|
801
|
+
target_lang: str = "fr"
|
|
802
|
+
|
|
803
|
+
|
|
804
|
+
class ModelService:
|
|
805
|
+
"""Main model service daemon"""
|
|
806
|
+
|
|
807
|
+
def __init__(self, config: Dict[str, Any] = dict[str(), object()]()):
|
|
808
|
+
self.config = {**DEFAULT_CONFIG, **(config or {})}
|
|
809
|
+
self.model_manager = ModelManager(
|
|
810
|
+
models_dir=self.config["models_dir"], max_cache_size=self.config["model_cache_size"]
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
# Initialize lightweight server
|
|
814
|
+
self.lightweight_server = LightweightModelServer(
|
|
815
|
+
models_dir=f"{self.config['models_dir']}/lightweight",
|
|
816
|
+
port=self.config["port"] + 1, # Use next port
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
# Initialize PDF processor
|
|
820
|
+
self.pdf_processor = PDFProcessor(
|
|
821
|
+
models_dir=f"{self.config['models_dir']}/lightweight",
|
|
822
|
+
port=self.config["port"] + 2, # Use next port after lightweight server
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
# Initialize lightweight embedder
|
|
826
|
+
self.embedder = LightweightEmbedder(models_dir=f"{self.config['models_dir']}/embeddings")
|
|
827
|
+
|
|
828
|
+
self.running = False
|
|
829
|
+
self.pid_file = Path.home() / ".local" / "mcli" / "model_service" / "model_service.pid"
|
|
830
|
+
self.pid_file.parent.mkdir(parents=True, exist_ok=True)
|
|
831
|
+
|
|
832
|
+
# FastAPI app
|
|
833
|
+
self.app = FastAPI(
|
|
834
|
+
title="MCLI Model Service",
|
|
835
|
+
description="A service for hosting and providing inference APIs for language models",
|
|
836
|
+
version="1.0.0",
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
# Add CORS middleware
|
|
840
|
+
if self.config["enable_cors"]:
|
|
841
|
+
self.app.add_middleware(
|
|
842
|
+
CORSMiddleware,
|
|
843
|
+
allow_origins=self.config["cors_origins"],
|
|
844
|
+
allow_credentials=True,
|
|
845
|
+
allow_methods=["*"],
|
|
846
|
+
allow_headers=["*"],
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
# Setup routes
|
|
850
|
+
self._setup_routes()
|
|
851
|
+
|
|
852
|
+
def _setup_routes(self):
|
|
853
|
+
"""Setup FastAPI routes"""
|
|
854
|
+
|
|
855
|
+
@self.app.get("/")
|
|
856
|
+
async def root():
|
|
857
|
+
return {
|
|
858
|
+
"service": "MCLI Model Service",
|
|
859
|
+
"version": "1.0.0",
|
|
860
|
+
"status": "running",
|
|
861
|
+
"models_loaded": len(self.model_manager.loaded_models),
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
@self.app.get("/models")
|
|
865
|
+
async def list_models():
|
|
866
|
+
"""List all available models"""
|
|
867
|
+
models = self.model_manager.db.get_all_models()
|
|
868
|
+
return [asdict(model) for model in models]
|
|
869
|
+
|
|
870
|
+
@self.app.get("/models/summary")
|
|
871
|
+
async def get_models_summary():
|
|
872
|
+
"""Get a summary of all models with statistics"""
|
|
873
|
+
return self.model_manager.get_models_summary()
|
|
874
|
+
|
|
875
|
+
@self.app.post("/models")
|
|
876
|
+
async def load_model(request: ModelLoadRequest):
|
|
877
|
+
"""Load a new model"""
|
|
878
|
+
try:
|
|
879
|
+
model_info = ModelInfo(
|
|
880
|
+
id=str(uuid.uuid4()),
|
|
881
|
+
name=request.name,
|
|
882
|
+
model_type=request.model_type,
|
|
883
|
+
model_path=request.model_path,
|
|
884
|
+
tokenizer_path=request.tokenizer_path,
|
|
885
|
+
device=request.device,
|
|
886
|
+
max_length=request.max_length,
|
|
887
|
+
temperature=request.temperature,
|
|
888
|
+
top_p=request.top_p,
|
|
889
|
+
top_k=request.top_k,
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
# Add to database
|
|
893
|
+
model_id = self.model_manager.db.add_model(model_info)
|
|
894
|
+
|
|
895
|
+
# Load model
|
|
896
|
+
success = self.model_manager.load_model(model_info)
|
|
897
|
+
|
|
898
|
+
if success:
|
|
899
|
+
return {"model_id": model_id, "status": "loaded"}
|
|
900
|
+
else:
|
|
901
|
+
# Remove from database if loading failed
|
|
902
|
+
self.model_manager.db.delete_model(model_id)
|
|
903
|
+
raise HTTPException(status_code=500, detail="Failed to load model")
|
|
904
|
+
|
|
905
|
+
except Exception as e:
|
|
906
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
907
|
+
|
|
908
|
+
@self.app.post("/models/from-url")
|
|
909
|
+
async def load_model_from_url(request: ModelLoadFromUrlRequest):
|
|
910
|
+
"""Load a new model from URL"""
|
|
911
|
+
try:
|
|
912
|
+
model_id = self.model_manager.add_model_from_url(
|
|
913
|
+
name=request.name,
|
|
914
|
+
model_type=request.model_type,
|
|
915
|
+
model_url=request.model_url,
|
|
916
|
+
tokenizer_url=request.tokenizer_url,
|
|
917
|
+
device=request.device,
|
|
918
|
+
max_length=request.max_length,
|
|
919
|
+
temperature=request.temperature,
|
|
920
|
+
top_p=request.top_p,
|
|
921
|
+
top_k=request.top_k,
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
return {"model_id": model_id, "status": "loaded"}
|
|
925
|
+
|
|
926
|
+
except Exception as e:
|
|
927
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
928
|
+
|
|
929
|
+
@self.app.delete("/models/{model_id}")
|
|
930
|
+
async def unload_model(model_id: str):
|
|
931
|
+
"""Unload a model"""
|
|
932
|
+
try:
|
|
933
|
+
success = self.model_manager.unload_model(model_id)
|
|
934
|
+
if success:
|
|
935
|
+
return {"status": "unloaded"}
|
|
936
|
+
else:
|
|
937
|
+
raise HTTPException(status_code=404, detail="Model not found")
|
|
938
|
+
except Exception as e:
|
|
939
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
940
|
+
|
|
941
|
+
@self.app.put("/models/{model_id}")
|
|
942
|
+
async def update_model(model_id: str, request: Dict[str, Any]):
|
|
943
|
+
"""Update model configuration"""
|
|
944
|
+
try:
|
|
945
|
+
# Get current model info
|
|
946
|
+
model_info = self.model_manager.db.get_model(model_id)
|
|
947
|
+
if not model_info:
|
|
948
|
+
raise HTTPException(status_code=404, detail="Model not found")
|
|
949
|
+
|
|
950
|
+
# Update model info with new values
|
|
951
|
+
for key, value in request.items():
|
|
952
|
+
if hasattr(model_info, key):
|
|
953
|
+
setattr(model_info, key, value)
|
|
954
|
+
|
|
955
|
+
# Update in database
|
|
956
|
+
success = self.model_manager.db.update_model(model_info)
|
|
957
|
+
if success:
|
|
958
|
+
return {"status": "updated", "model_id": model_id}
|
|
959
|
+
else:
|
|
960
|
+
raise HTTPException(status_code=500, detail="Failed to update model")
|
|
961
|
+
|
|
962
|
+
except HTTPException:
|
|
963
|
+
raise
|
|
964
|
+
except Exception as e:
|
|
965
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
966
|
+
|
|
967
|
+
@self.app.delete("/models/{model_id}/remove")
|
|
968
|
+
async def remove_model(model_id: str):
|
|
969
|
+
"""Remove a model from the database"""
|
|
970
|
+
try:
|
|
971
|
+
# First unload if loaded
|
|
972
|
+
self.model_manager.unload_model(model_id)
|
|
973
|
+
|
|
974
|
+
# Remove from database
|
|
975
|
+
success = self.model_manager.db.delete_model(model_id)
|
|
976
|
+
if success:
|
|
977
|
+
return {"status": "removed", "model_id": model_id}
|
|
978
|
+
else:
|
|
979
|
+
raise HTTPException(status_code=404, detail="Model not found")
|
|
980
|
+
|
|
981
|
+
except HTTPException:
|
|
982
|
+
raise
|
|
983
|
+
except Exception as e:
|
|
984
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
985
|
+
|
|
986
|
+
@self.app.post("/models/{model_id}/generate")
|
|
987
|
+
async def generate_text(model_id: str, request: TextGenerationRequest):
|
|
988
|
+
"""Generate text using a model"""
|
|
989
|
+
try:
|
|
990
|
+
start_time = time.time()
|
|
991
|
+
|
|
992
|
+
generated_text = self.model_manager.generate_text(
|
|
993
|
+
model_id=model_id,
|
|
994
|
+
prompt=request.prompt,
|
|
995
|
+
max_length=request.max_length or 512,
|
|
996
|
+
temperature=request.temperature or 0.7,
|
|
997
|
+
top_p=request.top_p or 0.9,
|
|
998
|
+
top_k=request.top_k or 50,
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
execution_time = int((time.time() - start_time) * 1000)
|
|
1002
|
+
|
|
1003
|
+
# Record inference
|
|
1004
|
+
self.model_manager.db.record_inference(
|
|
1005
|
+
model_id=model_id,
|
|
1006
|
+
request_type="text-generation",
|
|
1007
|
+
input_data=request.prompt,
|
|
1008
|
+
output_data=generated_text,
|
|
1009
|
+
execution_time_ms=execution_time,
|
|
1010
|
+
)
|
|
1011
|
+
|
|
1012
|
+
return {"generated_text": generated_text, "execution_time_ms": execution_time}
|
|
1013
|
+
|
|
1014
|
+
except Exception as e:
|
|
1015
|
+
# Record error
|
|
1016
|
+
self.model_manager.db.record_inference(
|
|
1017
|
+
model_id=model_id,
|
|
1018
|
+
request_type="text-generation",
|
|
1019
|
+
input_data=request.prompt,
|
|
1020
|
+
error_message=str(e),
|
|
1021
|
+
)
|
|
1022
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1023
|
+
|
|
1024
|
+
@self.app.post("/models/{model_id}/classify")
|
|
1025
|
+
async def classify_text(model_id: str, request: TextClassificationRequest):
|
|
1026
|
+
"""Classify text using a model"""
|
|
1027
|
+
try:
|
|
1028
|
+
start_time = time.time()
|
|
1029
|
+
|
|
1030
|
+
classifications = self.model_manager.classify_text(
|
|
1031
|
+
model_id=model_id, text=request.text
|
|
1032
|
+
)
|
|
1033
|
+
|
|
1034
|
+
execution_time = int((time.time() - start_time) * 1000)
|
|
1035
|
+
|
|
1036
|
+
# Record inference
|
|
1037
|
+
self.model_manager.db.record_inference(
|
|
1038
|
+
model_id=model_id,
|
|
1039
|
+
request_type="text-classification",
|
|
1040
|
+
input_data=request.text,
|
|
1041
|
+
output_data=json.dumps(classifications),
|
|
1042
|
+
execution_time_ms=execution_time,
|
|
1043
|
+
)
|
|
1044
|
+
|
|
1045
|
+
return {"classifications": classifications, "execution_time_ms": execution_time}
|
|
1046
|
+
|
|
1047
|
+
except Exception as e:
|
|
1048
|
+
# Record error
|
|
1049
|
+
self.model_manager.db.record_inference(
|
|
1050
|
+
model_id=model_id,
|
|
1051
|
+
request_type="text-classification",
|
|
1052
|
+
input_data=request.text,
|
|
1053
|
+
error_message=str(e),
|
|
1054
|
+
)
|
|
1055
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1056
|
+
|
|
1057
|
+
@self.app.post("/models/{model_id}/translate")
|
|
1058
|
+
async def translate_text(model_id: str, request: TranslationRequest):
|
|
1059
|
+
"""Translate text using a model"""
|
|
1060
|
+
try:
|
|
1061
|
+
start_time = time.time()
|
|
1062
|
+
|
|
1063
|
+
translated_text = self.model_manager.translate_text(
|
|
1064
|
+
model_id=model_id,
|
|
1065
|
+
text=request.text,
|
|
1066
|
+
source_lang=request.source_lang,
|
|
1067
|
+
target_lang=request.target_lang,
|
|
1068
|
+
)
|
|
1069
|
+
|
|
1070
|
+
execution_time = int((time.time() - start_time) * 1000)
|
|
1071
|
+
|
|
1072
|
+
# Record inference
|
|
1073
|
+
self.model_manager.db.record_inference(
|
|
1074
|
+
model_id=model_id,
|
|
1075
|
+
request_type="translation",
|
|
1076
|
+
input_data=request.text,
|
|
1077
|
+
output_data=translated_text,
|
|
1078
|
+
execution_time_ms=execution_time,
|
|
1079
|
+
)
|
|
1080
|
+
|
|
1081
|
+
return {"translated_text": translated_text, "execution_time_ms": execution_time}
|
|
1082
|
+
|
|
1083
|
+
except Exception as e:
|
|
1084
|
+
# Record error
|
|
1085
|
+
self.model_manager.db.record_inference(
|
|
1086
|
+
model_id=model_id,
|
|
1087
|
+
request_type="translation",
|
|
1088
|
+
input_data=request.text,
|
|
1089
|
+
error_message=str(e),
|
|
1090
|
+
)
|
|
1091
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1092
|
+
|
|
1093
|
+
@self.app.get("/health")
|
|
1094
|
+
async def health_check():
|
|
1095
|
+
"""Health check endpoint"""
|
|
1096
|
+
return {
|
|
1097
|
+
"status": "healthy",
|
|
1098
|
+
"models_loaded": len(self.model_manager.loaded_models),
|
|
1099
|
+
"memory_usage_mb": sum(
|
|
1100
|
+
model_data["model_info"].memory_usage_mb
|
|
1101
|
+
for model_data in self.model_manager.loaded_models.values()
|
|
1102
|
+
),
|
|
1103
|
+
}
|
|
1104
|
+
|
|
1105
|
+
# Lightweight server endpoints
|
|
1106
|
+
@self.app.get("/lightweight/models")
|
|
1107
|
+
async def list_lightweight_models():
|
|
1108
|
+
"""List available lightweight models"""
|
|
1109
|
+
return {
|
|
1110
|
+
"models": LIGHTWEIGHT_MODELS,
|
|
1111
|
+
"downloaded": self.lightweight_server.downloader.get_downloaded_models(),
|
|
1112
|
+
"loaded": list(self.lightweight_server.loaded_models.keys()),
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
@self.app.post("/lightweight/models/{model_key}/download")
|
|
1116
|
+
async def download_lightweight_model(model_key: str):
|
|
1117
|
+
"""Download a lightweight model"""
|
|
1118
|
+
if model_key not in LIGHTWEIGHT_MODELS:
|
|
1119
|
+
raise HTTPException(status_code=404, detail="Model not found")
|
|
1120
|
+
|
|
1121
|
+
try:
|
|
1122
|
+
success = self.lightweight_server.download_and_load_model(model_key)
|
|
1123
|
+
if success:
|
|
1124
|
+
return {"status": "downloaded", "model": model_key}
|
|
1125
|
+
else:
|
|
1126
|
+
raise HTTPException(status_code=500, detail="Failed to download model")
|
|
1127
|
+
except Exception as e:
|
|
1128
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1129
|
+
|
|
1130
|
+
@self.app.post("/lightweight/start")
|
|
1131
|
+
async def start_lightweight_server():
|
|
1132
|
+
"""Start the lightweight server"""
|
|
1133
|
+
try:
|
|
1134
|
+
self.lightweight_server.start_server()
|
|
1135
|
+
return {
|
|
1136
|
+
"status": "started",
|
|
1137
|
+
"port": self.lightweight_server.port,
|
|
1138
|
+
"url": f"http://localhost:{self.lightweight_server.port}",
|
|
1139
|
+
}
|
|
1140
|
+
except Exception as e:
|
|
1141
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1142
|
+
|
|
1143
|
+
@self.app.get("/lightweight/status")
|
|
1144
|
+
async def lightweight_status():
|
|
1145
|
+
"""Get lightweight server status"""
|
|
1146
|
+
return {
|
|
1147
|
+
"running": self.lightweight_server.running,
|
|
1148
|
+
"port": self.lightweight_server.port,
|
|
1149
|
+
"loaded_models": list(self.lightweight_server.loaded_models.keys()),
|
|
1150
|
+
"system_info": self.lightweight_server.get_system_info(),
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
# PDF processing endpoints
|
|
1154
|
+
@self.app.post("/pdf/extract-text")
|
|
1155
|
+
async def extract_pdf_text(request: Dict[str, Any]):
|
|
1156
|
+
"""Extract text from PDF"""
|
|
1157
|
+
try:
|
|
1158
|
+
pdf_path = request.get("pdf_path")
|
|
1159
|
+
if not pdf_path:
|
|
1160
|
+
raise HTTPException(status_code=400, detail="PDF path is required")
|
|
1161
|
+
|
|
1162
|
+
result = self.pdf_processor.extract_text_from_pdf(pdf_path)
|
|
1163
|
+
return result
|
|
1164
|
+
except Exception as e:
|
|
1165
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1166
|
+
|
|
1167
|
+
@self.app.post("/pdf/process-with-ai")
|
|
1168
|
+
async def process_pdf_with_ai(request: Dict[str, Any]):
|
|
1169
|
+
"""Process PDF with AI analysis"""
|
|
1170
|
+
try:
|
|
1171
|
+
pdf_path = request.get("pdf_path")
|
|
1172
|
+
model_key = request.get("model_key")
|
|
1173
|
+
|
|
1174
|
+
if not pdf_path:
|
|
1175
|
+
raise HTTPException(status_code=400, detail="PDF path is required")
|
|
1176
|
+
|
|
1177
|
+
# Handle optional model_key parameter
|
|
1178
|
+
if model_key:
|
|
1179
|
+
result = self.pdf_processor.process_pdf_with_ai(pdf_path, str(model_key))
|
|
1180
|
+
else:
|
|
1181
|
+
result = self.pdf_processor.process_pdf_with_ai(pdf_path)
|
|
1182
|
+
return result
|
|
1183
|
+
except Exception as e:
|
|
1184
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1185
|
+
|
|
1186
|
+
@self.app.get("/pdf/status")
|
|
1187
|
+
async def pdf_processor_status():
|
|
1188
|
+
"""Get PDF processor status"""
|
|
1189
|
+
return self.pdf_processor.get_service_status()
|
|
1190
|
+
|
|
1191
|
+
# Embedding endpoints
|
|
1192
|
+
@self.app.post("/embed/text")
|
|
1193
|
+
async def embed_text(request: Dict[str, Any]):
|
|
1194
|
+
"""Embed text using lightweight embedder"""
|
|
1195
|
+
try:
|
|
1196
|
+
text = request.get("text")
|
|
1197
|
+
method = request.get("method")
|
|
1198
|
+
|
|
1199
|
+
if not text:
|
|
1200
|
+
raise HTTPException(status_code=400, detail="Text is required")
|
|
1201
|
+
|
|
1202
|
+
# Handle optional method parameter
|
|
1203
|
+
if method:
|
|
1204
|
+
result = self.embedder.embed_text(text, str(method))
|
|
1205
|
+
else:
|
|
1206
|
+
result = self.embedder.embed_text(text)
|
|
1207
|
+
return result
|
|
1208
|
+
except Exception as e:
|
|
1209
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1210
|
+
|
|
1211
|
+
@self.app.post("/embed/document")
|
|
1212
|
+
async def embed_document(request: Dict[str, Any]):
|
|
1213
|
+
"""Embed document using lightweight embedder"""
|
|
1214
|
+
try:
|
|
1215
|
+
text = request.get("text")
|
|
1216
|
+
chunk_size = request.get("chunk_size", 1000)
|
|
1217
|
+
|
|
1218
|
+
if not text:
|
|
1219
|
+
raise HTTPException(status_code=400, detail="Text is required")
|
|
1220
|
+
|
|
1221
|
+
result = self.embedder.embed_document(text, chunk_size)
|
|
1222
|
+
return result
|
|
1223
|
+
except Exception as e:
|
|
1224
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1225
|
+
|
|
1226
|
+
@self.app.post("/embed/search")
|
|
1227
|
+
async def search_embeddings(request: Dict[str, Any]):
|
|
1228
|
+
"""Search similar documents using embeddings"""
|
|
1229
|
+
try:
|
|
1230
|
+
query = request.get("query")
|
|
1231
|
+
embeddings = request.get("embeddings", [])
|
|
1232
|
+
top_k = request.get("top_k", 5)
|
|
1233
|
+
|
|
1234
|
+
if not query:
|
|
1235
|
+
raise HTTPException(status_code=400, detail="Query is required")
|
|
1236
|
+
|
|
1237
|
+
results = self.embedder.search_similar(query, embeddings, top_k)
|
|
1238
|
+
return {"results": results}
|
|
1239
|
+
except Exception as e:
|
|
1240
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
1241
|
+
|
|
1242
|
+
@self.app.get("/embed/status")
|
|
1243
|
+
async def embedder_status():
|
|
1244
|
+
"""Get embedder status"""
|
|
1245
|
+
return self.embedder.get_status()
|
|
1246
|
+
|
|
1247
|
+
def start(self):
|
|
1248
|
+
"""Start the model service"""
|
|
1249
|
+
if self.running:
|
|
1250
|
+
logger.info("Model service is already running")
|
|
1251
|
+
return
|
|
1252
|
+
|
|
1253
|
+
# Check if already running
|
|
1254
|
+
if self.pid_file.exists():
|
|
1255
|
+
try:
|
|
1256
|
+
with open(self.pid_file, "r") as f:
|
|
1257
|
+
pid = int(f.read().strip())
|
|
1258
|
+
if psutil.pid_exists(pid):
|
|
1259
|
+
logger.info(f"Model service already running with PID {pid}")
|
|
1260
|
+
return
|
|
1261
|
+
except Exception:
|
|
1262
|
+
pass
|
|
1263
|
+
|
|
1264
|
+
# Start service
|
|
1265
|
+
self.running = True
|
|
1266
|
+
|
|
1267
|
+
# Write PID file
|
|
1268
|
+
with open(self.pid_file, "w") as f:
|
|
1269
|
+
f.write(str(os.getpid()))
|
|
1270
|
+
|
|
1271
|
+
logger.info(f"Model service started with PID {os.getpid()}")
|
|
1272
|
+
logger.info(f"API available at http://{self.config['host']}:{self.config['port']}")
|
|
1273
|
+
|
|
1274
|
+
# Set up signal handlers
|
|
1275
|
+
signal.signal(signal.SIGTERM, self._signal_handler)
|
|
1276
|
+
signal.signal(signal.SIGINT, self._signal_handler)
|
|
1277
|
+
|
|
1278
|
+
# Start FastAPI server
|
|
1279
|
+
try:
|
|
1280
|
+
uvicorn.run(
|
|
1281
|
+
self.app,
|
|
1282
|
+
host=self.config["host"],
|
|
1283
|
+
port=self.config["port"],
|
|
1284
|
+
log_level=self.config["log_level"].lower(),
|
|
1285
|
+
)
|
|
1286
|
+
except KeyboardInterrupt:
|
|
1287
|
+
logger.info("Model service interrupted")
|
|
1288
|
+
finally:
|
|
1289
|
+
self.stop()
|
|
1290
|
+
|
|
1291
|
+
def stop(self):
|
|
1292
|
+
"""Stop the model service"""
|
|
1293
|
+
if not self.running:
|
|
1294
|
+
return
|
|
1295
|
+
|
|
1296
|
+
self.running = False
|
|
1297
|
+
|
|
1298
|
+
# Unload all models
|
|
1299
|
+
for model_id in list(self.model_manager.loaded_models.keys()):
|
|
1300
|
+
self.model_manager.unload_model(model_id)
|
|
1301
|
+
|
|
1302
|
+
# Remove PID file
|
|
1303
|
+
if self.pid_file.exists():
|
|
1304
|
+
self.pid_file.unlink()
|
|
1305
|
+
|
|
1306
|
+
logger.info("Model service stopped")
|
|
1307
|
+
|
|
1308
|
+
def _signal_handler(self, signum, frame):
|
|
1309
|
+
"""Handle shutdown signals"""
|
|
1310
|
+
logger.info(f"Received signal {signum}, shutting down...")
|
|
1311
|
+
self.stop()
|
|
1312
|
+
sys.exit(0)
|
|
1313
|
+
|
|
1314
|
+
def status(self) -> Dict[str, Any]:
|
|
1315
|
+
"""Get service status"""
|
|
1316
|
+
is_running = False
|
|
1317
|
+
pid = None
|
|
1318
|
+
|
|
1319
|
+
if self.pid_file.exists():
|
|
1320
|
+
try:
|
|
1321
|
+
with open(self.pid_file, "r") as f:
|
|
1322
|
+
pid = int(f.read().strip())
|
|
1323
|
+
is_running = psutil.pid_exists(pid)
|
|
1324
|
+
except Exception:
|
|
1325
|
+
pass
|
|
1326
|
+
|
|
1327
|
+
return {
|
|
1328
|
+
"running": is_running,
|
|
1329
|
+
"pid": pid,
|
|
1330
|
+
"pid_file": str(self.pid_file),
|
|
1331
|
+
"models_loaded": len(self.model_manager.loaded_models),
|
|
1332
|
+
"api_url": f"http://{self.config['host']}:{self.config['port']}",
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
# CLI Commands
|
|
1337
|
+
import click
|
|
1338
|
+
|
|
1339
|
+
|
|
1340
|
+
@click.group(name="model-service")
|
|
1341
|
+
def model_service():
|
|
1342
|
+
"""Model service daemon for hosting language models"""
|
|
1343
|
+
pass
|
|
1344
|
+
|
|
1345
|
+
|
|
1346
|
+
@model_service.command()
|
|
1347
|
+
@click.option("--config", help="Path to configuration file")
|
|
1348
|
+
@click.option("--host", default="0.0.0.0", help="Host to bind to")
|
|
1349
|
+
@click.option("--port", default=8000, help="Port to bind to")
|
|
1350
|
+
@click.option("--models-dir", default="./models", help="Directory for model storage")
|
|
1351
|
+
def start(config: Optional[str], host: str, port: int, models_dir: str):
|
|
1352
|
+
"""Start the model service daemon"""
|
|
1353
|
+
# Load config if provided
|
|
1354
|
+
service_config = DEFAULT_CONFIG.copy()
|
|
1355
|
+
if config:
|
|
1356
|
+
try:
|
|
1357
|
+
config_data = read_from_toml(config, "model_service")
|
|
1358
|
+
if config_data:
|
|
1359
|
+
service_config.update(config_data)
|
|
1360
|
+
except Exception as e:
|
|
1361
|
+
logger.warning(f"Could not load config from {config}: {e}")
|
|
1362
|
+
|
|
1363
|
+
# Override with command line options
|
|
1364
|
+
service_config["host"] = host
|
|
1365
|
+
service_config["port"] = port
|
|
1366
|
+
service_config["models_dir"] = models_dir
|
|
1367
|
+
|
|
1368
|
+
service = ModelService(service_config)
|
|
1369
|
+
service.start()
|
|
1370
|
+
|
|
1371
|
+
|
|
1372
|
+
@model_service.command()
|
|
1373
|
+
def stop():
|
|
1374
|
+
"""Stop the model service daemon"""
|
|
1375
|
+
pid_file = Path.home() / ".local" / "mcli" / "model_service" / "model_service.pid"
|
|
1376
|
+
|
|
1377
|
+
if not pid_file.exists():
|
|
1378
|
+
click.echo("Model service is not running")
|
|
1379
|
+
return
|
|
1380
|
+
|
|
1381
|
+
try:
|
|
1382
|
+
with open(pid_file, "r") as f:
|
|
1383
|
+
pid = int(f.read().strip())
|
|
1384
|
+
|
|
1385
|
+
# Send SIGTERM
|
|
1386
|
+
os.kill(pid, signal.SIGTERM)
|
|
1387
|
+
click.echo(f"Sent stop signal to model service (PID {pid})")
|
|
1388
|
+
|
|
1389
|
+
# Wait a bit and check if it stopped
|
|
1390
|
+
time.sleep(2)
|
|
1391
|
+
if not psutil.pid_exists(pid):
|
|
1392
|
+
click.echo("Model service stopped successfully")
|
|
1393
|
+
else:
|
|
1394
|
+
click.echo("Model service may still be running")
|
|
1395
|
+
|
|
1396
|
+
except Exception as e:
|
|
1397
|
+
click.echo(f"Error stopping model service: {e}")
|
|
1398
|
+
|
|
1399
|
+
|
|
1400
|
+
@model_service.command()
|
|
1401
|
+
def status():
|
|
1402
|
+
"""Show model service status"""
|
|
1403
|
+
service = ModelService()
|
|
1404
|
+
status_info = service.status()
|
|
1405
|
+
|
|
1406
|
+
if status_info["running"]:
|
|
1407
|
+
click.echo(f"ā
Model service is running (PID: {status_info['pid']})")
|
|
1408
|
+
click.echo(f"š API available at: {status_info['api_url']}")
|
|
1409
|
+
click.echo(f"š Models loaded: {status_info['models_loaded']}")
|
|
1410
|
+
else:
|
|
1411
|
+
click.echo("ā Model service is not running")
|
|
1412
|
+
|
|
1413
|
+
click.echo(f"š PID file: {status_info['pid_file']}")
|
|
1414
|
+
|
|
1415
|
+
|
|
1416
|
+
@model_service.command()
|
|
1417
|
+
@click.option("--summary", is_flag=True, help="Show summary statistics")
|
|
1418
|
+
def list_models(summary: bool = False):
|
|
1419
|
+
"""List all models in the service"""
|
|
1420
|
+
service = ModelService()
|
|
1421
|
+
|
|
1422
|
+
try:
|
|
1423
|
+
if summary:
|
|
1424
|
+
# Show summary
|
|
1425
|
+
summary_data = service.model_manager.get_models_summary()
|
|
1426
|
+
click.echo("š Model Service Summary")
|
|
1427
|
+
click.echo("=" * 50)
|
|
1428
|
+
click.echo(f"Total Models: {summary_data['total_models']}")
|
|
1429
|
+
click.echo(f"Loaded Models: {summary_data['loaded_models']}")
|
|
1430
|
+
click.echo(f"Total Memory: {summary_data['total_memory_mb']:.1f} MB")
|
|
1431
|
+
click.echo()
|
|
1432
|
+
|
|
1433
|
+
if summary_data["models_by_type"]:
|
|
1434
|
+
click.echo("Models by Type:")
|
|
1435
|
+
for model_type, stats in summary_data["models_by_type"].items():
|
|
1436
|
+
click.echo(
|
|
1437
|
+
f" {model_type}: {stats['loaded']}/{stats['count']} loaded ({stats['memory_mb']:.1f} MB)"
|
|
1438
|
+
)
|
|
1439
|
+
click.echo()
|
|
1440
|
+
else:
|
|
1441
|
+
# Show detailed list
|
|
1442
|
+
models = service.model_manager.db.get_all_models()
|
|
1443
|
+
|
|
1444
|
+
if not models:
|
|
1445
|
+
click.echo("š No models found in the service")
|
|
1446
|
+
return
|
|
1447
|
+
|
|
1448
|
+
click.echo(f"š Found {len(models)} model(s):")
|
|
1449
|
+
click.echo("=" * 80)
|
|
1450
|
+
|
|
1451
|
+
for model in models:
|
|
1452
|
+
status_icon = "š¢" if model.is_loaded else "āŖ"
|
|
1453
|
+
click.echo(f"{status_icon} {model.name} (ID: {model.id})")
|
|
1454
|
+
click.echo(f" Type: {model.model_type}")
|
|
1455
|
+
click.echo(f" Path: {model.model_path}")
|
|
1456
|
+
if model.tokenizer_path:
|
|
1457
|
+
click.echo(f" Tokenizer: {model.tokenizer_path}")
|
|
1458
|
+
click.echo(f" Device: {model.device}")
|
|
1459
|
+
click.echo(f" Loaded: {'Yes' if model.is_loaded else 'No'}")
|
|
1460
|
+
if model.is_loaded:
|
|
1461
|
+
click.echo(f" Memory: {model.memory_usage_mb:.1f} MB")
|
|
1462
|
+
click.echo(f" Parameters: {model.parameters_count:,}")
|
|
1463
|
+
click.echo(f" Created: {model.created_at.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
1464
|
+
click.echo()
|
|
1465
|
+
|
|
1466
|
+
except Exception as e:
|
|
1467
|
+
click.echo(f"ā Error listing models: {e}")
|
|
1468
|
+
|
|
1469
|
+
|
|
1470
|
+
@model_service.command()
|
|
1471
|
+
@click.argument("model_path")
|
|
1472
|
+
@click.option("--name", required=True, help="Model name")
|
|
1473
|
+
@click.option(
|
|
1474
|
+
"--type",
|
|
1475
|
+
"model_type",
|
|
1476
|
+
required=True,
|
|
1477
|
+
help="Model type (text-generation, text-classification, translation)",
|
|
1478
|
+
)
|
|
1479
|
+
@click.option("--tokenizer-path", help="Path to tokenizer (optional)")
|
|
1480
|
+
@click.option("--device", default="auto", help="Device to use (cpu, cuda, auto)")
|
|
1481
|
+
def add_model(
|
|
1482
|
+
model_path: str, name: str, model_type: str, tokenizer_path: str = str(), device: str = "auto"
|
|
1483
|
+
):
|
|
1484
|
+
"""Add a model to the service"""
|
|
1485
|
+
service = ModelService()
|
|
1486
|
+
|
|
1487
|
+
try:
|
|
1488
|
+
model_info = ModelInfo(
|
|
1489
|
+
id=str(uuid.uuid4()),
|
|
1490
|
+
name=name,
|
|
1491
|
+
model_type=model_type,
|
|
1492
|
+
model_path=model_path,
|
|
1493
|
+
tokenizer_path=tokenizer_path,
|
|
1494
|
+
device=device,
|
|
1495
|
+
)
|
|
1496
|
+
|
|
1497
|
+
# Add to database
|
|
1498
|
+
model_id = service.model_manager.db.add_model(model_info)
|
|
1499
|
+
click.echo(f"ā
Model '{name}' added with ID: {model_id}")
|
|
1500
|
+
|
|
1501
|
+
# Try to load the model
|
|
1502
|
+
if service.model_manager.load_model(model_info):
|
|
1503
|
+
click.echo(f"ā
Model '{name}' loaded successfully")
|
|
1504
|
+
else:
|
|
1505
|
+
click.echo(f"ā ļø Model '{name}' added but failed to load")
|
|
1506
|
+
|
|
1507
|
+
except Exception as e:
|
|
1508
|
+
click.echo(f"ā Error adding model: {e}")
|
|
1509
|
+
|
|
1510
|
+
|
|
1511
|
+
@model_service.command()
|
|
1512
|
+
@click.argument("model_url")
|
|
1513
|
+
@click.option("--name", required=True, help="Model name")
|
|
1514
|
+
@click.option(
|
|
1515
|
+
"--type",
|
|
1516
|
+
"model_type",
|
|
1517
|
+
required=True,
|
|
1518
|
+
help="Model type (text-generation, text-classification, translation)",
|
|
1519
|
+
)
|
|
1520
|
+
@click.option("--tokenizer-url", help="URL to tokenizer (optional)")
|
|
1521
|
+
@click.option("--device", default="auto", help="Device to use (cpu, cuda, auto)")
|
|
1522
|
+
@click.option("--max-length", default=512, help="Maximum sequence length")
|
|
1523
|
+
@click.option("--temperature", default=0.7, help="Temperature for generation")
|
|
1524
|
+
@click.option("--top-p", default=0.9, help="Top-p for generation")
|
|
1525
|
+
@click.option("--top-k", default=50, help="Top-k for generation")
|
|
1526
|
+
def add_model_from_url(
|
|
1527
|
+
model_url: str,
|
|
1528
|
+
name: str,
|
|
1529
|
+
model_type: str,
|
|
1530
|
+
tokenizer_url: str = str(),
|
|
1531
|
+
device: str = "auto",
|
|
1532
|
+
max_length: int = 512,
|
|
1533
|
+
temperature: float = 0.7,
|
|
1534
|
+
top_p: float = 0.9,
|
|
1535
|
+
top_k: int = 50,
|
|
1536
|
+
):
|
|
1537
|
+
"""Add a model from URL to the service"""
|
|
1538
|
+
service = ModelService()
|
|
1539
|
+
|
|
1540
|
+
try:
|
|
1541
|
+
click.echo(f"š Downloading model from: {model_url}")
|
|
1542
|
+
if tokenizer_url:
|
|
1543
|
+
click.echo(f"š Downloading tokenizer from: {tokenizer_url}")
|
|
1544
|
+
|
|
1545
|
+
model_id = service.model_manager.add_model_from_url(
|
|
1546
|
+
name=name,
|
|
1547
|
+
model_type=model_type,
|
|
1548
|
+
model_url=model_url,
|
|
1549
|
+
tokenizer_url=tokenizer_url,
|
|
1550
|
+
device=device,
|
|
1551
|
+
max_length=max_length,
|
|
1552
|
+
temperature=temperature,
|
|
1553
|
+
top_p=top_p,
|
|
1554
|
+
top_k=top_k,
|
|
1555
|
+
)
|
|
1556
|
+
|
|
1557
|
+
click.echo(f"ā
Model '{name}' downloaded and added with ID: {model_id}")
|
|
1558
|
+
|
|
1559
|
+
except Exception as e:
|
|
1560
|
+
click.echo(f"ā Error adding model from URL: {e}")
|
|
1561
|
+
|
|
1562
|
+
|
|
1563
|
+
@model_service.command()
|
|
1564
|
+
@click.argument("model_id")
|
|
1565
|
+
@click.option("--name", help="New model name")
|
|
1566
|
+
@click.option("--temperature", type=float, help="New temperature value")
|
|
1567
|
+
@click.option("--max-length", type=int, help="New max length value")
|
|
1568
|
+
@click.option("--top-p", type=float, help="New top-p value")
|
|
1569
|
+
@click.option("--top-k", type=int, help="New top-k value")
|
|
1570
|
+
@click.option("--device", help="New device setting")
|
|
1571
|
+
def update_model(
|
|
1572
|
+
model_id: str,
|
|
1573
|
+
name: Optional[str] = None,
|
|
1574
|
+
temperature: Optional[float] = None,
|
|
1575
|
+
max_length: Optional[int] = None,
|
|
1576
|
+
top_p: Optional[float] = None,
|
|
1577
|
+
top_k: Optional[int] = None,
|
|
1578
|
+
device: Optional[str] = None,
|
|
1579
|
+
):
|
|
1580
|
+
"""Update model configuration"""
|
|
1581
|
+
service = ModelService()
|
|
1582
|
+
|
|
1583
|
+
try:
|
|
1584
|
+
# Get current model info
|
|
1585
|
+
model_info = service.model_manager.db.get_model(model_id)
|
|
1586
|
+
if not model_info:
|
|
1587
|
+
click.echo(f"ā Model {model_id} not found")
|
|
1588
|
+
return
|
|
1589
|
+
|
|
1590
|
+
# Build updates
|
|
1591
|
+
updates = {}
|
|
1592
|
+
if name is not None:
|
|
1593
|
+
updates["name"] = name
|
|
1594
|
+
if temperature is not None:
|
|
1595
|
+
updates["temperature"] = temperature
|
|
1596
|
+
if max_length is not None:
|
|
1597
|
+
updates["max_length"] = max_length
|
|
1598
|
+
if top_p is not None:
|
|
1599
|
+
updates["top_p"] = top_p
|
|
1600
|
+
if top_k is not None:
|
|
1601
|
+
updates["top_k"] = top_k
|
|
1602
|
+
if device is not None:
|
|
1603
|
+
updates["device"] = device
|
|
1604
|
+
|
|
1605
|
+
if not updates:
|
|
1606
|
+
click.echo("ā No updates specified. Use --help to see available options.")
|
|
1607
|
+
return
|
|
1608
|
+
|
|
1609
|
+
# Update model
|
|
1610
|
+
for key, value in updates.items():
|
|
1611
|
+
setattr(model_info, key, value)
|
|
1612
|
+
|
|
1613
|
+
success = service.model_manager.db.update_model(model_info)
|
|
1614
|
+
if success:
|
|
1615
|
+
click.echo(f"ā
Model {model_id} updated successfully!")
|
|
1616
|
+
click.echo("Updated parameters:")
|
|
1617
|
+
for key, value in updates.items():
|
|
1618
|
+
click.echo(f" {key}: {value}")
|
|
1619
|
+
else:
|
|
1620
|
+
click.echo(f"ā Failed to update model {model_id}")
|
|
1621
|
+
|
|
1622
|
+
except Exception as e:
|
|
1623
|
+
click.echo(f"ā Error updating model: {e}")
|
|
1624
|
+
|
|
1625
|
+
|
|
1626
|
+
@model_service.command()
|
|
1627
|
+
@click.argument("model_id")
|
|
1628
|
+
@click.option("--force", is_flag=True, help="Force removal without confirmation")
|
|
1629
|
+
def remove_model(model_id: str, force: bool = False):
|
|
1630
|
+
"""Remove a model from the service"""
|
|
1631
|
+
service = ModelService()
|
|
1632
|
+
|
|
1633
|
+
try:
|
|
1634
|
+
# Get model info first
|
|
1635
|
+
model_info = service.model_manager.db.get_model(model_id)
|
|
1636
|
+
if not model_info:
|
|
1637
|
+
click.echo(f"ā Model {model_id} not found")
|
|
1638
|
+
return
|
|
1639
|
+
|
|
1640
|
+
if not force:
|
|
1641
|
+
click.echo(f"Model to remove:")
|
|
1642
|
+
click.echo(f" Name: {model_info.name}")
|
|
1643
|
+
click.echo(f" Type: {model_info.model_type}")
|
|
1644
|
+
click.echo(f" Path: {model_info.model_path}")
|
|
1645
|
+
click.echo(f" Loaded: {'Yes' if model_info.is_loaded else 'No'}")
|
|
1646
|
+
|
|
1647
|
+
if not click.confirm("Are you sure you want to remove this model?"):
|
|
1648
|
+
click.echo("Operation cancelled.")
|
|
1649
|
+
return
|
|
1650
|
+
|
|
1651
|
+
# First unload if loaded
|
|
1652
|
+
if model_info.is_loaded:
|
|
1653
|
+
service.model_manager.unload_model(model_id)
|
|
1654
|
+
click.echo(f"ā
Model {model_id} unloaded")
|
|
1655
|
+
|
|
1656
|
+
# Remove from database
|
|
1657
|
+
success = service.model_manager.db.delete_model(model_id)
|
|
1658
|
+
if success:
|
|
1659
|
+
click.echo(f"ā
Model {model_id} removed successfully!")
|
|
1660
|
+
else:
|
|
1661
|
+
click.echo(f"ā Failed to remove model {model_id}")
|
|
1662
|
+
|
|
1663
|
+
except Exception as e:
|
|
1664
|
+
click.echo(f"ā Error removing model: {e}")
|
|
1665
|
+
|
|
1666
|
+
|
|
1667
|
+
# Lightweight server commands
|
|
1668
|
+
@model_service.command()
|
|
1669
|
+
@click.option("--list", is_flag=True, help="List available lightweight models")
|
|
1670
|
+
@click.option("--download", help="Download a specific lightweight model")
|
|
1671
|
+
@click.option("--auto", is_flag=True, help="Automatically select best model for your system")
|
|
1672
|
+
@click.option("--start-server", is_flag=True, help="Start the lightweight server")
|
|
1673
|
+
@click.option("--port", default=8080, help="Port for lightweight server")
|
|
1674
|
+
def lightweight(list: bool, download: str, auto: bool, start_server: bool, port: int):
|
|
1675
|
+
"""Manage lightweight models and server"""
|
|
1676
|
+
service = ModelService()
|
|
1677
|
+
|
|
1678
|
+
if list:
|
|
1679
|
+
click.echo("š Available Lightweight Models:")
|
|
1680
|
+
click.echo("=" * 60)
|
|
1681
|
+
|
|
1682
|
+
for key, info in LIGHTWEIGHT_MODELS.items():
|
|
1683
|
+
status = (
|
|
1684
|
+
"ā
Downloaded"
|
|
1685
|
+
if key in service.lightweight_server.loaded_models
|
|
1686
|
+
else "ā³ Not downloaded"
|
|
1687
|
+
)
|
|
1688
|
+
click.echo(f"{status} - {info['name']} ({info['parameters']})")
|
|
1689
|
+
click.echo(
|
|
1690
|
+
f" Size: {info['size_mb']} MB | Efficiency: {info['efficiency_score']}/10"
|
|
1691
|
+
)
|
|
1692
|
+
click.echo(f" Type: {info['model_type']} | Tags: {', '.join(info['tags'])}")
|
|
1693
|
+
click.echo()
|
|
1694
|
+
return
|
|
1695
|
+
|
|
1696
|
+
if download:
|
|
1697
|
+
if download not in LIGHTWEIGHT_MODELS:
|
|
1698
|
+
click.echo(f"ā Model '{download}' not found")
|
|
1699
|
+
click.echo("Available models:")
|
|
1700
|
+
for key in LIGHTWEIGHT_MODELS.keys():
|
|
1701
|
+
click.echo(f" {key}")
|
|
1702
|
+
return
|
|
1703
|
+
|
|
1704
|
+
click.echo(f"š„ Downloading {download}...")
|
|
1705
|
+
success = service.lightweight_server.download_and_load_model(download)
|
|
1706
|
+
if success:
|
|
1707
|
+
click.echo(f"ā
Model '{download}' downloaded successfully!")
|
|
1708
|
+
else:
|
|
1709
|
+
click.echo(f"ā Failed to download model '{download}'")
|
|
1710
|
+
return
|
|
1711
|
+
|
|
1712
|
+
if auto:
|
|
1713
|
+
recommended = service.lightweight_server.recommend_model()
|
|
1714
|
+
click.echo(f"šÆ Recommended model: {recommended}")
|
|
1715
|
+
click.echo(f"š„ Downloading {recommended}...")
|
|
1716
|
+
success = service.lightweight_server.download_and_load_model(recommended)
|
|
1717
|
+
if success:
|
|
1718
|
+
click.echo(f"ā
Model '{recommended}' downloaded successfully!")
|
|
1719
|
+
else:
|
|
1720
|
+
click.echo(f"ā Failed to download model '{recommended}'")
|
|
1721
|
+
return
|
|
1722
|
+
|
|
1723
|
+
if start_server:
|
|
1724
|
+
click.echo(f"š Starting lightweight server on port {port}...")
|
|
1725
|
+
service.lightweight_server.port = port
|
|
1726
|
+
service.lightweight_server.start_server()
|
|
1727
|
+
|
|
1728
|
+
click.echo(f"ā
Server started!")
|
|
1729
|
+
click.echo(f"š API: http://localhost:{port}")
|
|
1730
|
+
click.echo(f"š Health: http://localhost:{port}/health")
|
|
1731
|
+
click.echo(f"š Models: http://localhost:{port}/models")
|
|
1732
|
+
|
|
1733
|
+
try:
|
|
1734
|
+
while True:
|
|
1735
|
+
time.sleep(1)
|
|
1736
|
+
except KeyboardInterrupt:
|
|
1737
|
+
click.echo("\nš Server stopped")
|
|
1738
|
+
return
|
|
1739
|
+
|
|
1740
|
+
# Show help if no options provided
|
|
1741
|
+
click.echo("Lightweight model server commands:")
|
|
1742
|
+
click.echo(" --list List available models")
|
|
1743
|
+
click.echo(" --download MODEL Download a specific model")
|
|
1744
|
+
click.echo(" --auto Download recommended model for your system")
|
|
1745
|
+
click.echo(" --start-server Start the lightweight server")
|
|
1746
|
+
|
|
1747
|
+
|
|
1748
|
+
@model_service.command()
|
|
1749
|
+
@click.option(
|
|
1750
|
+
"--model",
|
|
1751
|
+
type=click.Choice(list(LIGHTWEIGHT_MODELS.keys())),
|
|
1752
|
+
help="Specific model to download and run",
|
|
1753
|
+
)
|
|
1754
|
+
@click.option(
|
|
1755
|
+
"--auto", is_flag=True, default=True, help="Automatically select best model for your system"
|
|
1756
|
+
)
|
|
1757
|
+
@click.option("--port", default=8080, help="Port to run server on")
|
|
1758
|
+
@click.option("--list-models", is_flag=True, help="List available models")
|
|
1759
|
+
@click.option("--download-only", is_flag=True, help="Only download models, don't start server")
|
|
1760
|
+
def lightweight_run(
|
|
1761
|
+
model: Optional[str], auto: bool, port: int, list_models: bool, download_only: bool
|
|
1762
|
+
):
|
|
1763
|
+
"""Run lightweight model server (standalone mode)"""
|
|
1764
|
+
service = ModelService()
|
|
1765
|
+
|
|
1766
|
+
click.echo("š MCLI Lightweight Model Server")
|
|
1767
|
+
click.echo("=" * 50)
|
|
1768
|
+
|
|
1769
|
+
if list_models:
|
|
1770
|
+
service.lightweight_server.list_models()
|
|
1771
|
+
return 0
|
|
1772
|
+
|
|
1773
|
+
# Get system info and recommend model
|
|
1774
|
+
if model:
|
|
1775
|
+
selected_model = model
|
|
1776
|
+
click.echo(f"šÆ Using specified model: {selected_model}")
|
|
1777
|
+
elif auto:
|
|
1778
|
+
selected_model = service.lightweight_server.recommend_model()
|
|
1779
|
+
click.echo(f"šÆ Recommended model: {selected_model}")
|
|
1780
|
+
else:
|
|
1781
|
+
click.echo("Available models:")
|
|
1782
|
+
for key, info in LIGHTWEIGHT_MODELS.items():
|
|
1783
|
+
click.echo(f" {key}: {info['name']} ({info['parameters']})")
|
|
1784
|
+
selected_model = click.prompt(
|
|
1785
|
+
"Select model", type=click.Choice(list(LIGHTWEIGHT_MODELS.keys()))
|
|
1786
|
+
)
|
|
1787
|
+
|
|
1788
|
+
# Download and load model
|
|
1789
|
+
if not service.lightweight_server.download_and_load_model(selected_model):
|
|
1790
|
+
click.echo("ā Failed to download model")
|
|
1791
|
+
return 1
|
|
1792
|
+
|
|
1793
|
+
if download_only:
|
|
1794
|
+
click.echo("ā
Model downloaded successfully")
|
|
1795
|
+
return 0
|
|
1796
|
+
|
|
1797
|
+
# Start server
|
|
1798
|
+
click.echo(f"\nš Starting lightweight server on port {port}...")
|
|
1799
|
+
service.lightweight_server.port = port
|
|
1800
|
+
service.lightweight_server.start_server()
|
|
1801
|
+
|
|
1802
|
+
click.echo(f"\nš Usage:")
|
|
1803
|
+
click.echo(f" - API: http://localhost:{port}")
|
|
1804
|
+
click.echo(f" - Health: http://localhost:{port}/health")
|
|
1805
|
+
click.echo(f" - Models: http://localhost:{port}/models")
|
|
1806
|
+
|
|
1807
|
+
try:
|
|
1808
|
+
# Keep server running
|
|
1809
|
+
while True:
|
|
1810
|
+
time.sleep(1)
|
|
1811
|
+
except KeyboardInterrupt:
|
|
1812
|
+
click.echo("\nš Server stopped")
|
|
1813
|
+
|
|
1814
|
+
|
|
1815
|
+
# PDF processing commands
|
|
1816
|
+
@model_service.command()
|
|
1817
|
+
@click.argument("pdf_path")
|
|
1818
|
+
@click.option("--model", help="Specific model to use for AI analysis")
|
|
1819
|
+
@click.option("--extract-only", is_flag=True, help="Only extract text, no AI analysis")
|
|
1820
|
+
def process_pdf(pdf_path: str, model: str, extract_only: bool):
|
|
1821
|
+
"""Process PDF with AI analysis"""
|
|
1822
|
+
service = ModelService()
|
|
1823
|
+
|
|
1824
|
+
try:
|
|
1825
|
+
if extract_only:
|
|
1826
|
+
click.echo(f"š Extracting text from: {pdf_path}")
|
|
1827
|
+
result = service.pdf_processor.extract_text_from_pdf(pdf_path)
|
|
1828
|
+
else:
|
|
1829
|
+
click.echo(f"š¤ Processing PDF with AI: {pdf_path}")
|
|
1830
|
+
if model:
|
|
1831
|
+
click.echo(f"šÆ Using model: {model}")
|
|
1832
|
+
result = service.pdf_processor.process_pdf_with_ai(pdf_path, model)
|
|
1833
|
+
else:
|
|
1834
|
+
result = service.pdf_processor.process_pdf_with_ai(pdf_path)
|
|
1835
|
+
|
|
1836
|
+
if result.get("success"):
|
|
1837
|
+
if extract_only:
|
|
1838
|
+
click.echo(f"ā
Text extracted: {result['text_length']} characters")
|
|
1839
|
+
click.echo(f"š Preview: {result['text'][:200]}...")
|
|
1840
|
+
else:
|
|
1841
|
+
analysis = result["pdf_analysis"]["ai_analysis"]
|
|
1842
|
+
click.echo(f"ā
PDF processed successfully!")
|
|
1843
|
+
click.echo(f"š Document type: {analysis['document_type']}")
|
|
1844
|
+
click.echo(f"š Summary: {analysis['summary'][:200]}...")
|
|
1845
|
+
click.echo(f"š Key topics: {', '.join(analysis['key_topics'])}")
|
|
1846
|
+
click.echo(f"š Complexity score: {analysis['complexity_score']:.2f}")
|
|
1847
|
+
else:
|
|
1848
|
+
click.echo(f"ā Error: {result.get('error', 'Unknown error')}")
|
|
1849
|
+
|
|
1850
|
+
except Exception as e:
|
|
1851
|
+
click.echo(f"ā Error processing PDF: {e}")
|
|
1852
|
+
|
|
1853
|
+
|
|
1854
|
+
@model_service.command()
|
|
1855
|
+
@click.option("--port", default=8080, help="Port for PDF processing service")
|
|
1856
|
+
def start_pdf_service(port: int):
|
|
1857
|
+
"""Start PDF processing service"""
|
|
1858
|
+
service = ModelService()
|
|
1859
|
+
|
|
1860
|
+
try:
|
|
1861
|
+
click.echo(f"š Starting PDF processing service on port {port}...")
|
|
1862
|
+
success = service.pdf_processor.start_pdf_processing_service(port)
|
|
1863
|
+
|
|
1864
|
+
if success:
|
|
1865
|
+
click.echo(f"ā
PDF processing service started!")
|
|
1866
|
+
click.echo(f"š API: http://localhost:{port}")
|
|
1867
|
+
click.echo(f"š Status: http://localhost:{port}/status")
|
|
1868
|
+
|
|
1869
|
+
try:
|
|
1870
|
+
while True:
|
|
1871
|
+
time.sleep(1)
|
|
1872
|
+
except KeyboardInterrupt:
|
|
1873
|
+
click.echo("\nš PDF processing service stopped")
|
|
1874
|
+
else:
|
|
1875
|
+
click.echo("ā Failed to start PDF processing service")
|
|
1876
|
+
|
|
1877
|
+
except Exception as e:
|
|
1878
|
+
click.echo(f"ā Error starting PDF service: {e}")
|
|
1879
|
+
|
|
1880
|
+
|
|
1881
|
+
# Embedding commands
|
|
1882
|
+
@model_service.command()
|
|
1883
|
+
@click.argument("text")
|
|
1884
|
+
@click.option("--method", help="Embedding method (sentence_transformers, tfidf, simple_hash)")
|
|
1885
|
+
def embed_text(text: str, method: str):
|
|
1886
|
+
"""Embed text using lightweight embedder"""
|
|
1887
|
+
service = ModelService()
|
|
1888
|
+
|
|
1889
|
+
try:
|
|
1890
|
+
click.echo(f"š¤ Embedding text: {text[:50]}...")
|
|
1891
|
+
if method:
|
|
1892
|
+
click.echo(f"šÆ Using method: {method}")
|
|
1893
|
+
result = service.embedder.embed_text(text, method)
|
|
1894
|
+
else:
|
|
1895
|
+
result = service.embedder.embed_text(text)
|
|
1896
|
+
|
|
1897
|
+
if result:
|
|
1898
|
+
click.echo(f"ā
Text embedded successfully!")
|
|
1899
|
+
click.echo(f"š Method: {result['method']}")
|
|
1900
|
+
click.echo(f"š Dimensions: {result['dimensions']}")
|
|
1901
|
+
click.echo(f"š Text length: {result['text_length']}")
|
|
1902
|
+
else:
|
|
1903
|
+
click.echo("ā Failed to embed text")
|
|
1904
|
+
|
|
1905
|
+
except Exception as e:
|
|
1906
|
+
click.echo(f"ā Error embedding text: {e}")
|
|
1907
|
+
|
|
1908
|
+
|
|
1909
|
+
@model_service.command()
|
|
1910
|
+
@click.argument("text")
|
|
1911
|
+
@click.option("--chunk-size", default=1000, help="Chunk size for document embedding")
|
|
1912
|
+
def embed_document(text: str, chunk_size: int):
|
|
1913
|
+
"""Embed document using lightweight embedder"""
|
|
1914
|
+
service = ModelService()
|
|
1915
|
+
|
|
1916
|
+
try:
|
|
1917
|
+
click.echo(f"š Embedding document: {text[:50]}...")
|
|
1918
|
+
result = service.embedder.embed_document(text, chunk_size)
|
|
1919
|
+
|
|
1920
|
+
if result.get("success"):
|
|
1921
|
+
doc_embedding = result["document_embedding"]
|
|
1922
|
+
click.echo(f"ā
Document embedded successfully!")
|
|
1923
|
+
click.echo(f"š Method: {doc_embedding['method']}")
|
|
1924
|
+
click.echo(f"š Total chunks: {doc_embedding['total_chunks']}")
|
|
1925
|
+
click.echo(f"š Text length: {doc_embedding['total_text_length']}")
|
|
1926
|
+
else:
|
|
1927
|
+
click.echo(f"ā Failed to embed document: {result.get('error', 'Unknown error')}")
|
|
1928
|
+
|
|
1929
|
+
except Exception as e:
|
|
1930
|
+
click.echo(f"ā Error embedding document: {e}")
|
|
1931
|
+
|
|
1932
|
+
|
|
1933
|
+
@model_service.command()
|
|
1934
|
+
def embedder_status():
|
|
1935
|
+
"""Show embedder status"""
|
|
1936
|
+
service = ModelService()
|
|
1937
|
+
|
|
1938
|
+
try:
|
|
1939
|
+
status = service.embedder.get_status()
|
|
1940
|
+
click.echo("š¤ Lightweight Embedder Status")
|
|
1941
|
+
click.echo("=" * 40)
|
|
1942
|
+
click.echo(f"Current method: {status['current_method']}")
|
|
1943
|
+
click.echo(f"Models directory: {status['models_dir']}")
|
|
1944
|
+
click.echo(f"Cache size: {status['cache_size']}")
|
|
1945
|
+
click.echo("\nAvailable methods:")
|
|
1946
|
+
for method, available in status["available_methods"].items():
|
|
1947
|
+
status_icon = "ā
" if available else "ā"
|
|
1948
|
+
click.echo(f" {status_icon} {method}")
|
|
1949
|
+
|
|
1950
|
+
except Exception as e:
|
|
1951
|
+
click.echo(f"ā Error getting embedder status: {e}")
|
|
1952
|
+
|
|
1953
|
+
|
|
1954
|
+
if __name__ == "__main__":
|
|
1955
|
+
model_service()
|