cortex-llm 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortex/__init__.py +73 -0
- cortex/__main__.py +83 -0
- cortex/config.py +329 -0
- cortex/conversation_manager.py +468 -0
- cortex/fine_tuning/__init__.py +8 -0
- cortex/fine_tuning/dataset.py +332 -0
- cortex/fine_tuning/mlx_lora_trainer.py +502 -0
- cortex/fine_tuning/trainer.py +957 -0
- cortex/fine_tuning/wizard.py +707 -0
- cortex/gpu_validator.py +467 -0
- cortex/inference_engine.py +727 -0
- cortex/metal/__init__.py +275 -0
- cortex/metal/gpu_validator.py +177 -0
- cortex/metal/memory_pool.py +886 -0
- cortex/metal/mlx_accelerator.py +678 -0
- cortex/metal/mlx_converter.py +638 -0
- cortex/metal/mps_optimizer.py +417 -0
- cortex/metal/optimizer.py +665 -0
- cortex/metal/performance_profiler.py +364 -0
- cortex/model_downloader.py +130 -0
- cortex/model_manager.py +2187 -0
- cortex/quantization/__init__.py +5 -0
- cortex/quantization/dynamic_quantizer.py +736 -0
- cortex/template_registry/__init__.py +15 -0
- cortex/template_registry/auto_detector.py +144 -0
- cortex/template_registry/config_manager.py +234 -0
- cortex/template_registry/interactive.py +260 -0
- cortex/template_registry/registry.py +347 -0
- cortex/template_registry/template_profiles/__init__.py +5 -0
- cortex/template_registry/template_profiles/base.py +142 -0
- cortex/template_registry/template_profiles/complex/__init__.py +5 -0
- cortex/template_registry/template_profiles/complex/reasoning.py +263 -0
- cortex/template_registry/template_profiles/standard/__init__.py +9 -0
- cortex/template_registry/template_profiles/standard/alpaca.py +73 -0
- cortex/template_registry/template_profiles/standard/chatml.py +82 -0
- cortex/template_registry/template_profiles/standard/gemma.py +103 -0
- cortex/template_registry/template_profiles/standard/llama.py +87 -0
- cortex/template_registry/template_profiles/standard/simple.py +65 -0
- cortex/ui/__init__.py +120 -0
- cortex/ui/cli.py +1685 -0
- cortex/ui/markdown_render.py +185 -0
- cortex/ui/terminal_app.py +534 -0
- cortex_llm-1.0.0.dist-info/METADATA +275 -0
- cortex_llm-1.0.0.dist-info/RECORD +48 -0
- cortex_llm-1.0.0.dist-info/WHEEL +5 -0
- cortex_llm-1.0.0.dist-info/entry_points.txt +2 -0
- cortex_llm-1.0.0.dist-info/licenses/LICENSE +21 -0
- cortex_llm-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,534 @@
|
|
|
1
|
+
"""Terminal application UI using Textual."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, List
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
import threading
|
|
6
|
+
|
|
7
|
+
from textual.app import App, ComposeResult
|
|
8
|
+
from textual.binding import Binding
|
|
9
|
+
from textual.containers import ScrollableContainer
|
|
10
|
+
from textual.widgets import Header, Footer, Input, Static
|
|
11
|
+
from textual.reactive import reactive
|
|
12
|
+
from rich.text import Text
|
|
13
|
+
from rich.console import Group
|
|
14
|
+
|
|
15
|
+
from cortex.config import Config
|
|
16
|
+
from cortex.gpu_validator import GPUValidator
|
|
17
|
+
from cortex.model_manager import ModelManager
|
|
18
|
+
from cortex.inference_engine import InferenceEngine, GenerationRequest
|
|
19
|
+
from cortex.conversation_manager import ConversationManager, MessageRole
|
|
20
|
+
from cortex.ui import UIComponents
|
|
21
|
+
from cortex.ui.markdown_render import ThinkMarkdown
|
|
22
|
+
|
|
23
|
+
class MessageDisplay(Static):
|
|
24
|
+
"""Widget to display a single message."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, role: str, content: str, timestamp: datetime, **kwargs):
|
|
27
|
+
super().__init__(**kwargs)
|
|
28
|
+
self.role = role
|
|
29
|
+
self.content = content
|
|
30
|
+
self.timestamp = timestamp
|
|
31
|
+
|
|
32
|
+
def render(self):
|
|
33
|
+
"""Render the message."""
|
|
34
|
+
role_colors = {
|
|
35
|
+
"system": "yellow",
|
|
36
|
+
"user": "cyan",
|
|
37
|
+
"assistant": "green"
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
role_text = Text(f"{self.role.title()}", style=f"bold {role_colors.get(self.role, 'white')}")
|
|
41
|
+
timestamp_text = Text(f" ({self.timestamp.strftime('%H:%M:%S')})", style="dim")
|
|
42
|
+
|
|
43
|
+
header = Text()
|
|
44
|
+
header.append(role_text)
|
|
45
|
+
header.append(timestamp_text)
|
|
46
|
+
header.append("\n")
|
|
47
|
+
|
|
48
|
+
content_renderable = ThinkMarkdown(
|
|
49
|
+
self.content,
|
|
50
|
+
code_theme="monokai",
|
|
51
|
+
use_line_numbers=True,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return Group(header, content_renderable)
|
|
55
|
+
|
|
56
|
+
class ConversationView(ScrollableContainer):
|
|
57
|
+
"""Widget to display conversation messages."""
|
|
58
|
+
|
|
59
|
+
def __init__(self, **kwargs):
|
|
60
|
+
super().__init__(**kwargs)
|
|
61
|
+
self.messages: List[MessageDisplay] = []
|
|
62
|
+
|
|
63
|
+
def add_message(self, role: str, content: str, timestamp: Optional[datetime] = None):
|
|
64
|
+
"""Add a message to the conversation view."""
|
|
65
|
+
timestamp = timestamp or datetime.now()
|
|
66
|
+
message_widget = MessageDisplay(role, content, timestamp)
|
|
67
|
+
self.messages.append(message_widget)
|
|
68
|
+
self.mount(message_widget)
|
|
69
|
+
self.scroll_end()
|
|
70
|
+
|
|
71
|
+
def clear_messages(self):
|
|
72
|
+
"""Clear all messages."""
|
|
73
|
+
for message in self.messages:
|
|
74
|
+
message.remove()
|
|
75
|
+
self.messages.clear()
|
|
76
|
+
|
|
77
|
+
def update_last_message(self, content: str):
|
|
78
|
+
"""Update the content of the last message."""
|
|
79
|
+
if self.messages:
|
|
80
|
+
last_message = self.messages[-1]
|
|
81
|
+
last_message.content = content
|
|
82
|
+
last_message.refresh()
|
|
83
|
+
|
|
84
|
+
class StatusBar(Static):
|
|
85
|
+
"""Status bar showing model and performance info."""
|
|
86
|
+
|
|
87
|
+
model_name = reactive("No model loaded")
|
|
88
|
+
status = reactive("idle")
|
|
89
|
+
tokens_per_second = reactive(0.0)
|
|
90
|
+
gpu_utilization = reactive(0.0)
|
|
91
|
+
memory_gb = reactive(0.0)
|
|
92
|
+
|
|
93
|
+
def render(self) -> Text:
|
|
94
|
+
"""Render the status bar."""
|
|
95
|
+
status_icons = UIComponents.STATUS_ICONS
|
|
96
|
+
icon = status_icons.get(self.status, "⚪")
|
|
97
|
+
|
|
98
|
+
if self.model_name != "No model loaded":
|
|
99
|
+
perf_text = UIComponents.format_performance_metrics(
|
|
100
|
+
self.tokens_per_second,
|
|
101
|
+
self.gpu_utilization,
|
|
102
|
+
self.memory_gb
|
|
103
|
+
)
|
|
104
|
+
return Text(f"{icon} {self.model_name} | {perf_text}")
|
|
105
|
+
else:
|
|
106
|
+
return Text(f"{icon} {self.model_name}")
|
|
107
|
+
|
|
108
|
+
class CommandInput(Input):
|
|
109
|
+
"""Input widget with command support."""
|
|
110
|
+
|
|
111
|
+
def __init__(self, **kwargs):
|
|
112
|
+
super().__init__(placeholder="Type your message or / for commands...", **kwargs)
|
|
113
|
+
self.command_mode = False
|
|
114
|
+
|
|
115
|
+
def on_key(self, event):
|
|
116
|
+
"""Handle key events."""
|
|
117
|
+
if event.key == "escape":
|
|
118
|
+
self.command_mode = False
|
|
119
|
+
self.placeholder = "Type your message or / for commands..."
|
|
120
|
+
elif event.key == "/" and len(self.value) == 0:
|
|
121
|
+
self.command_mode = True
|
|
122
|
+
self.placeholder = "Enter command..."
|
|
123
|
+
|
|
124
|
+
return super().on_key(event)
|
|
125
|
+
|
|
126
|
+
class TerminalApp(App):
|
|
127
|
+
"""Main terminal application."""
|
|
128
|
+
|
|
129
|
+
CSS = """
|
|
130
|
+
Screen {
|
|
131
|
+
background: $background;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
Header {
|
|
135
|
+
background: $primary;
|
|
136
|
+
color: $text;
|
|
137
|
+
height: 3;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
Footer {
|
|
141
|
+
background: $primary;
|
|
142
|
+
color: $text;
|
|
143
|
+
height: 2;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
ConversationView {
|
|
147
|
+
border: solid $border;
|
|
148
|
+
padding: 1;
|
|
149
|
+
margin: 1;
|
|
150
|
+
height: 100%;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
MessageDisplay {
|
|
154
|
+
margin-bottom: 1;
|
|
155
|
+
padding: 1;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
StatusBar {
|
|
159
|
+
dock: bottom;
|
|
160
|
+
height: 1;
|
|
161
|
+
background: $panel;
|
|
162
|
+
color: $text;
|
|
163
|
+
padding: 0 1;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
CommandInput {
|
|
167
|
+
dock: bottom;
|
|
168
|
+
height: 3;
|
|
169
|
+
margin: 0;
|
|
170
|
+
width: 100%;
|
|
171
|
+
}
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
BINDINGS = [
|
|
175
|
+
Binding("ctrl+n", "new_conversation", "New Conversation"),
|
|
176
|
+
Binding("ctrl+c", "cancel_generation", "Cancel"),
|
|
177
|
+
Binding("ctrl+s", "save_conversation", "Save"),
|
|
178
|
+
Binding("ctrl+l", "load_model", "Load Model"),
|
|
179
|
+
Binding("ctrl+q", "quit", "Quit"),
|
|
180
|
+
Binding("ctrl+h", "help", "Help"),
|
|
181
|
+
]
|
|
182
|
+
|
|
183
|
+
def __init__(
|
|
184
|
+
self,
|
|
185
|
+
config: Config,
|
|
186
|
+
gpu_validator: GPUValidator,
|
|
187
|
+
model_manager: ModelManager,
|
|
188
|
+
inference_engine: InferenceEngine,
|
|
189
|
+
conversation_manager: ConversationManager,
|
|
190
|
+
**kwargs
|
|
191
|
+
):
|
|
192
|
+
super().__init__(**kwargs)
|
|
193
|
+
self.config = config
|
|
194
|
+
self.gpu_validator = gpu_validator
|
|
195
|
+
self.model_manager = model_manager
|
|
196
|
+
self.inference_engine = inference_engine
|
|
197
|
+
self.conversation_manager = conversation_manager
|
|
198
|
+
|
|
199
|
+
self.title = "Cortex - GPU-Accelerated LLM Terminal"
|
|
200
|
+
self.sub_title = "Apple Silicon | Metal Performance Shaders"
|
|
201
|
+
|
|
202
|
+
self.conversation_view: Optional[ConversationView] = None
|
|
203
|
+
self.status_bar: Optional[StatusBar] = None
|
|
204
|
+
self.input_widget: Optional[CommandInput] = None
|
|
205
|
+
|
|
206
|
+
self.generating = False
|
|
207
|
+
self.generation_thread: Optional[threading.Thread] = None
|
|
208
|
+
|
|
209
|
+
def compose(self) -> ComposeResult:
|
|
210
|
+
"""Compose the UI."""
|
|
211
|
+
yield Header()
|
|
212
|
+
|
|
213
|
+
self.conversation_view = ConversationView()
|
|
214
|
+
yield self.conversation_view
|
|
215
|
+
|
|
216
|
+
self.status_bar = StatusBar()
|
|
217
|
+
yield self.status_bar
|
|
218
|
+
|
|
219
|
+
self.input_widget = CommandInput()
|
|
220
|
+
yield self.input_widget
|
|
221
|
+
|
|
222
|
+
yield Footer()
|
|
223
|
+
|
|
224
|
+
async def on_mount(self):
|
|
225
|
+
"""Called when app is mounted."""
|
|
226
|
+
self.conversation_manager.new_conversation()
|
|
227
|
+
|
|
228
|
+
if self.config.ui.show_gpu_utilization:
|
|
229
|
+
self.set_interval(1.0, self.update_status)
|
|
230
|
+
|
|
231
|
+
if self.config.model.default_model:
|
|
232
|
+
await self.load_default_model()
|
|
233
|
+
|
|
234
|
+
self.conversation_view.add_message(
|
|
235
|
+
"system",
|
|
236
|
+
"Welcome to Cortex! Type your message or use / for commands.",
|
|
237
|
+
datetime.now()
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
async def load_default_model(self):
|
|
241
|
+
"""Load the default model."""
|
|
242
|
+
model_path = str(self.config.model.model_path / self.config.model.default_model)
|
|
243
|
+
success, message = self.model_manager.load_model(model_path)
|
|
244
|
+
|
|
245
|
+
if success:
|
|
246
|
+
model_info = self.model_manager.get_current_model()
|
|
247
|
+
if model_info:
|
|
248
|
+
self.status_bar.model_name = model_info.name
|
|
249
|
+
self.conversation_view.add_message(
|
|
250
|
+
"system",
|
|
251
|
+
f"Loaded model: {model_info.name} ({model_info.size_gb:.1f}GB)",
|
|
252
|
+
datetime.now()
|
|
253
|
+
)
|
|
254
|
+
else:
|
|
255
|
+
self.conversation_view.add_message(
|
|
256
|
+
"system",
|
|
257
|
+
f"Failed to load default model: {message}",
|
|
258
|
+
datetime.now()
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
async def on_input_submitted(self, event):
|
|
262
|
+
"""Handle input submission."""
|
|
263
|
+
input_text = self.input_widget.value.strip()
|
|
264
|
+
|
|
265
|
+
if not input_text:
|
|
266
|
+
return
|
|
267
|
+
|
|
268
|
+
self.input_widget.value = ""
|
|
269
|
+
|
|
270
|
+
if input_text.startswith("/"):
|
|
271
|
+
await self.handle_command(input_text)
|
|
272
|
+
else:
|
|
273
|
+
await self.handle_message(input_text)
|
|
274
|
+
|
|
275
|
+
async def handle_command(self, command: str):
|
|
276
|
+
"""Handle slash commands."""
|
|
277
|
+
parts = command.split(maxsplit=1)
|
|
278
|
+
cmd = parts[0].lower()
|
|
279
|
+
args = parts[1] if len(parts) > 1 else ""
|
|
280
|
+
|
|
281
|
+
if cmd == "/model":
|
|
282
|
+
await self.command_model(args)
|
|
283
|
+
elif cmd == "/clear":
|
|
284
|
+
self.command_clear()
|
|
285
|
+
elif cmd == "/save":
|
|
286
|
+
self.command_save()
|
|
287
|
+
elif cmd == "/help":
|
|
288
|
+
self.command_help()
|
|
289
|
+
elif cmd == "/gpu":
|
|
290
|
+
self.command_gpu_status()
|
|
291
|
+
elif cmd == "/benchmark":
|
|
292
|
+
await self.command_benchmark()
|
|
293
|
+
elif cmd == "/quit":
|
|
294
|
+
self.exit()
|
|
295
|
+
else:
|
|
296
|
+
self.conversation_view.add_message(
|
|
297
|
+
"system",
|
|
298
|
+
f"Unknown command: {cmd}",
|
|
299
|
+
datetime.now()
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
async def handle_message(self, message: str):
|
|
303
|
+
"""Handle user message and generate response."""
|
|
304
|
+
if self.generating:
|
|
305
|
+
self.conversation_view.add_message(
|
|
306
|
+
"system",
|
|
307
|
+
"Generation already in progress. Press Ctrl+C to cancel.",
|
|
308
|
+
datetime.now()
|
|
309
|
+
)
|
|
310
|
+
return
|
|
311
|
+
|
|
312
|
+
if not self.model_manager.current_model:
|
|
313
|
+
self.conversation_view.add_message(
|
|
314
|
+
"system",
|
|
315
|
+
"No model loaded. Use /model <path> to load a model.",
|
|
316
|
+
datetime.now()
|
|
317
|
+
)
|
|
318
|
+
return
|
|
319
|
+
|
|
320
|
+
self.conversation_view.add_message("user", message, datetime.now())
|
|
321
|
+
self.conversation_manager.add_message(MessageRole.USER, message)
|
|
322
|
+
|
|
323
|
+
self.conversation_view.add_message("assistant", "", datetime.now())
|
|
324
|
+
|
|
325
|
+
self.generating = True
|
|
326
|
+
self.status_bar.status = "generating"
|
|
327
|
+
|
|
328
|
+
request = GenerationRequest(
|
|
329
|
+
prompt=message,
|
|
330
|
+
max_tokens=self.config.inference.max_tokens,
|
|
331
|
+
temperature=self.config.inference.temperature,
|
|
332
|
+
top_p=self.config.inference.top_p,
|
|
333
|
+
top_k=self.config.inference.top_k,
|
|
334
|
+
repetition_penalty=self.config.inference.repetition_penalty,
|
|
335
|
+
stream=True
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
generated_text = ""
|
|
339
|
+
|
|
340
|
+
def generate_worker():
|
|
341
|
+
nonlocal generated_text
|
|
342
|
+
try:
|
|
343
|
+
for token in self.inference_engine.generate(request):
|
|
344
|
+
generated_text += token
|
|
345
|
+
self.call_from_thread(
|
|
346
|
+
self.conversation_view.update_last_message,
|
|
347
|
+
generated_text
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
self.conversation_manager.add_message(MessageRole.ASSISTANT, generated_text)
|
|
351
|
+
|
|
352
|
+
if self.inference_engine.current_metrics:
|
|
353
|
+
metrics = self.inference_engine.current_metrics
|
|
354
|
+
self.status_bar.tokens_per_second = metrics.tokens_per_second
|
|
355
|
+
self.status_bar.gpu_utilization = metrics.gpu_utilization
|
|
356
|
+
self.status_bar.memory_gb = metrics.memory_used_gb
|
|
357
|
+
|
|
358
|
+
except Exception as e:
|
|
359
|
+
self.call_from_thread(
|
|
360
|
+
self.conversation_view.add_message,
|
|
361
|
+
"system",
|
|
362
|
+
f"Error during generation: {str(e)}",
|
|
363
|
+
datetime.now()
|
|
364
|
+
)
|
|
365
|
+
finally:
|
|
366
|
+
self.generating = False
|
|
367
|
+
self.status_bar.status = "idle"
|
|
368
|
+
|
|
369
|
+
self.generation_thread = threading.Thread(target=generate_worker)
|
|
370
|
+
self.generation_thread.start()
|
|
371
|
+
|
|
372
|
+
async def command_model(self, model_path: str):
|
|
373
|
+
"""Load a model."""
|
|
374
|
+
if not model_path:
|
|
375
|
+
models = self.model_manager.list_models()
|
|
376
|
+
if models:
|
|
377
|
+
model_list = "\n".join([f"- {m['name']} ({m['size_gb']:.1f}GB)" for m in models])
|
|
378
|
+
self.conversation_view.add_message(
|
|
379
|
+
"system",
|
|
380
|
+
f"Loaded models:\n{model_list}",
|
|
381
|
+
datetime.now()
|
|
382
|
+
)
|
|
383
|
+
else:
|
|
384
|
+
self.conversation_view.add_message(
|
|
385
|
+
"system",
|
|
386
|
+
"No models loaded. Use /model <path> to load a model.",
|
|
387
|
+
datetime.now()
|
|
388
|
+
)
|
|
389
|
+
return
|
|
390
|
+
|
|
391
|
+
self.status_bar.status = "loading"
|
|
392
|
+
success, message = self.model_manager.load_model(model_path)
|
|
393
|
+
|
|
394
|
+
if success:
|
|
395
|
+
model_info = self.model_manager.get_current_model()
|
|
396
|
+
if model_info:
|
|
397
|
+
self.status_bar.model_name = model_info.name
|
|
398
|
+
self.conversation_view.add_message(
|
|
399
|
+
"system",
|
|
400
|
+
f"Loaded model: {model_info.name} ({model_info.size_gb:.1f}GB)",
|
|
401
|
+
datetime.now()
|
|
402
|
+
)
|
|
403
|
+
else:
|
|
404
|
+
self.conversation_view.add_message(
|
|
405
|
+
"system",
|
|
406
|
+
f"Failed to load model: {message}",
|
|
407
|
+
datetime.now()
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
self.status_bar.status = "idle"
|
|
411
|
+
|
|
412
|
+
def command_clear(self):
|
|
413
|
+
"""Clear conversation."""
|
|
414
|
+
self.conversation_view.clear_messages()
|
|
415
|
+
self.conversation_manager.new_conversation()
|
|
416
|
+
self.conversation_view.add_message(
|
|
417
|
+
"system",
|
|
418
|
+
"Conversation cleared.",
|
|
419
|
+
datetime.now()
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
def command_save(self):
|
|
423
|
+
"""Save conversation."""
|
|
424
|
+
try:
|
|
425
|
+
export_data = self.conversation_manager.export_conversation(format="json")
|
|
426
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
427
|
+
filename = self.config.conversation.save_directory / f"conversation_{timestamp}.json"
|
|
428
|
+
|
|
429
|
+
with open(filename, 'w') as f:
|
|
430
|
+
f.write(export_data)
|
|
431
|
+
|
|
432
|
+
self.conversation_view.add_message(
|
|
433
|
+
"system",
|
|
434
|
+
f"Conversation saved to {filename}",
|
|
435
|
+
datetime.now()
|
|
436
|
+
)
|
|
437
|
+
except Exception as e:
|
|
438
|
+
self.conversation_view.add_message(
|
|
439
|
+
"system",
|
|
440
|
+
f"Failed to save conversation: {str(e)}",
|
|
441
|
+
datetime.now()
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
def command_help(self):
|
|
445
|
+
"""Show help information."""
|
|
446
|
+
help_text = "Available commands:\n"
|
|
447
|
+
for cmd, desc in UIComponents.COMMANDS.items():
|
|
448
|
+
help_text += f" {cmd:<15} - {desc}\n"
|
|
449
|
+
|
|
450
|
+
help_text += "\nKeyboard shortcuts:\n"
|
|
451
|
+
for action, key in UIComponents.SHORTCUTS.items():
|
|
452
|
+
help_text += f" {key:<15} - {action.replace('_', ' ').title()}\n"
|
|
453
|
+
|
|
454
|
+
self.conversation_view.add_message("system", help_text, datetime.now())
|
|
455
|
+
|
|
456
|
+
def command_gpu_status(self):
|
|
457
|
+
"""Show GPU status."""
|
|
458
|
+
self.gpu_validator.print_gpu_info()
|
|
459
|
+
memory_status = self.model_manager.get_memory_status()
|
|
460
|
+
|
|
461
|
+
status_text = f"GPU Status:\n"
|
|
462
|
+
status_text += f" Total Memory: {memory_status['total_gb']:.1f} GB\n"
|
|
463
|
+
status_text += f" Available: {memory_status['available_gb']:.1f} GB\n"
|
|
464
|
+
status_text += f" Models Loaded: {memory_status['models_loaded']}\n"
|
|
465
|
+
status_text += f" Model Memory: {memory_status['model_memory_gb']:.1f} GB\n"
|
|
466
|
+
|
|
467
|
+
self.conversation_view.add_message("system", status_text, datetime.now())
|
|
468
|
+
|
|
469
|
+
async def command_benchmark(self):
|
|
470
|
+
"""Run benchmark."""
|
|
471
|
+
if not self.model_manager.current_model:
|
|
472
|
+
self.conversation_view.add_message(
|
|
473
|
+
"system",
|
|
474
|
+
"No model loaded for benchmark.",
|
|
475
|
+
datetime.now()
|
|
476
|
+
)
|
|
477
|
+
return
|
|
478
|
+
|
|
479
|
+
self.conversation_view.add_message(
|
|
480
|
+
"system",
|
|
481
|
+
"Running benchmark (100 tokens)...",
|
|
482
|
+
datetime.now()
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
metrics = self.inference_engine.benchmark()
|
|
486
|
+
|
|
487
|
+
if metrics:
|
|
488
|
+
benchmark_text = f"Benchmark Results:\n"
|
|
489
|
+
benchmark_text += f" Tokens Generated: {metrics.tokens_generated}\n"
|
|
490
|
+
benchmark_text += f" Time Elapsed: {metrics.time_elapsed:.2f}s\n"
|
|
491
|
+
benchmark_text += f" Tokens/Second: {metrics.tokens_per_second:.1f}\n"
|
|
492
|
+
benchmark_text += f" First Token Latency: {metrics.first_token_latency:.3f}s\n"
|
|
493
|
+
benchmark_text += f" GPU Utilization: {metrics.gpu_utilization:.1f}%\n"
|
|
494
|
+
benchmark_text += f" Memory Used: {metrics.memory_used_gb:.1f}GB\n"
|
|
495
|
+
|
|
496
|
+
self.conversation_view.add_message("system", benchmark_text, datetime.now())
|
|
497
|
+
|
|
498
|
+
def update_status(self):
|
|
499
|
+
"""Update status bar with current metrics."""
|
|
500
|
+
if self.inference_engine.current_metrics:
|
|
501
|
+
metrics = self.inference_engine.current_metrics
|
|
502
|
+
self.status_bar.tokens_per_second = metrics.tokens_per_second
|
|
503
|
+
self.status_bar.gpu_utilization = metrics.gpu_utilization
|
|
504
|
+
self.status_bar.memory_gb = metrics.memory_used_gb
|
|
505
|
+
|
|
506
|
+
def action_new_conversation(self):
|
|
507
|
+
"""Action for new conversation."""
|
|
508
|
+
self.command_clear()
|
|
509
|
+
|
|
510
|
+
def action_cancel_generation(self):
|
|
511
|
+
"""Action to cancel generation."""
|
|
512
|
+
if self.generating:
|
|
513
|
+
self.inference_engine.cancel_generation()
|
|
514
|
+
self.conversation_view.add_message(
|
|
515
|
+
"system",
|
|
516
|
+
"Generation cancelled.",
|
|
517
|
+
datetime.now()
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
def action_save_conversation(self):
|
|
521
|
+
"""Action to save conversation."""
|
|
522
|
+
self.command_save()
|
|
523
|
+
|
|
524
|
+
def action_load_model(self):
|
|
525
|
+
"""Action to load model."""
|
|
526
|
+
self.conversation_view.add_message(
|
|
527
|
+
"system",
|
|
528
|
+
"Use /model <path> to load a model.",
|
|
529
|
+
datetime.now()
|
|
530
|
+
)
|
|
531
|
+
|
|
532
|
+
def action_help(self):
|
|
533
|
+
"""Action to show help."""
|
|
534
|
+
self.command_help()
|