cortex-llm 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortex/__init__.py +1 -1
- cortex/config.py +46 -10
- cortex/inference_engine.py +69 -32
- cortex/tools/fs_ops.py +60 -13
- cortex/tools/search.py +76 -11
- cortex/tools/tool_runner.py +68 -8
- cortex/ui/box_rendering.py +97 -0
- cortex/ui/cli.py +65 -1071
- cortex/ui/cli_commands.py +61 -0
- cortex/ui/cli_prompt.py +96 -0
- cortex/ui/help_ui.py +66 -0
- cortex/ui/input_box.py +205 -0
- cortex/ui/model_ui.py +408 -0
- cortex/ui/status_ui.py +78 -0
- cortex/ui/tool_activity.py +82 -0
- {cortex_llm-1.0.10.dist-info → cortex_llm-1.0.11.dist-info}/METADATA +3 -1
- {cortex_llm-1.0.10.dist-info → cortex_llm-1.0.11.dist-info}/RECORD +21 -13
- {cortex_llm-1.0.10.dist-info → cortex_llm-1.0.11.dist-info}/WHEEL +0 -0
- {cortex_llm-1.0.10.dist-info → cortex_llm-1.0.11.dist-info}/entry_points.txt +0 -0
- {cortex_llm-1.0.10.dist-info → cortex_llm-1.0.11.dist-info}/licenses/LICENSE +0 -0
- {cortex_llm-1.0.10.dist-info → cortex_llm-1.0.11.dist-info}/top_level.txt +0 -0
cortex/ui/cli.py
CHANGED
|
@@ -6,19 +6,13 @@ import signal
|
|
|
6
6
|
import shutil
|
|
7
7
|
import readline
|
|
8
8
|
import time
|
|
9
|
-
import threading
|
|
10
9
|
import logging
|
|
11
|
-
import
|
|
12
|
-
import tty
|
|
13
|
-
import getpass
|
|
14
|
-
from typing import Optional, List, Tuple
|
|
10
|
+
from typing import Optional, List
|
|
15
11
|
from datetime import datetime
|
|
16
12
|
from pathlib import Path
|
|
17
|
-
from textwrap import wrap
|
|
18
13
|
|
|
19
14
|
from rich.live import Live
|
|
20
15
|
from rich.style import Style
|
|
21
|
-
from rich.text import Text
|
|
22
16
|
|
|
23
17
|
|
|
24
18
|
logger = logging.getLogger(__name__)
|
|
@@ -34,6 +28,14 @@ from cortex.fine_tuning import FineTuneWizard
|
|
|
34
28
|
from cortex.tools import ToolRunner
|
|
35
29
|
from cortex.tools import protocol as tool_protocol
|
|
36
30
|
from cortex.ui.markdown_render import ThinkMarkdown, PrefixedRenderable, render_plain_with_think
|
|
31
|
+
from cortex.ui import box_rendering
|
|
32
|
+
from cortex.ui.cli_commands import CommandHandlers, handle_command as dispatch_command
|
|
33
|
+
from cortex.ui.cli_prompt import format_prompt_with_chat_template
|
|
34
|
+
from cortex.ui.tool_activity import print_tool_activity
|
|
35
|
+
from cortex.ui import help_ui
|
|
36
|
+
from cortex.ui import model_ui
|
|
37
|
+
from cortex.ui import status_ui
|
|
38
|
+
from cortex.ui.input_box import prompt_input_box
|
|
37
39
|
|
|
38
40
|
|
|
39
41
|
class CortexCLI:
|
|
@@ -152,75 +154,12 @@ class CortexCLI:
|
|
|
152
154
|
conversation = self.conversation_manager.get_current_conversation()
|
|
153
155
|
if conversation is None:
|
|
154
156
|
conversation = self.conversation_manager.new_conversation()
|
|
155
|
-
marker = "[CORTEX_TOOL_INSTRUCTIONS
|
|
157
|
+
marker = "[CORTEX_TOOL_INSTRUCTIONS v3]"
|
|
156
158
|
for message in conversation.messages:
|
|
157
159
|
if message.role == MessageRole.SYSTEM and marker in message.content:
|
|
158
160
|
return
|
|
159
161
|
self.conversation_manager.add_message(MessageRole.SYSTEM, self.tool_runner.tool_instructions())
|
|
160
162
|
|
|
161
|
-
def _summarize_tool_call(self, call: dict) -> str:
|
|
162
|
-
name = str(call.get("name", "tool"))
|
|
163
|
-
args = call.get("arguments") or {}
|
|
164
|
-
parts = []
|
|
165
|
-
preferred = ("path", "query", "anchor", "start_line", "end_line", "recursive", "max_results")
|
|
166
|
-
for key in preferred:
|
|
167
|
-
if key in args:
|
|
168
|
-
value = args[key]
|
|
169
|
-
if isinstance(value, str) and len(value) > 60:
|
|
170
|
-
value = value[:57] + "..."
|
|
171
|
-
parts.append(f"{key}={value!r}")
|
|
172
|
-
if not parts and args:
|
|
173
|
-
for key in list(args.keys())[:3]:
|
|
174
|
-
value = args[key]
|
|
175
|
-
if isinstance(value, str) and len(value) > 60:
|
|
176
|
-
value = value[:57] + "..."
|
|
177
|
-
parts.append(f"{key}={value!r}")
|
|
178
|
-
arg_str = ", ".join(parts)
|
|
179
|
-
return f"{name}({arg_str})" if arg_str else f"{name}()"
|
|
180
|
-
|
|
181
|
-
def _summarize_tool_result(self, result: dict) -> str:
|
|
182
|
-
name = str(result.get("name", "tool"))
|
|
183
|
-
if not result.get("ok", False):
|
|
184
|
-
error = result.get("error") or "unknown error"
|
|
185
|
-
return f"{name} -> error: {error}"
|
|
186
|
-
payload = result.get("result") or {}
|
|
187
|
-
if name == "list_dir":
|
|
188
|
-
entries = payload.get("entries") or []
|
|
189
|
-
return f"{name} -> entries={len(entries)}"
|
|
190
|
-
if name == "search":
|
|
191
|
-
matches = payload.get("results") or []
|
|
192
|
-
return f"{name} -> results={len(matches)}"
|
|
193
|
-
if name == "read_file":
|
|
194
|
-
path = payload.get("path") or ""
|
|
195
|
-
start = payload.get("start_line")
|
|
196
|
-
end = payload.get("end_line")
|
|
197
|
-
if start and end:
|
|
198
|
-
return f"{name} -> {path} lines {start}-{end}"
|
|
199
|
-
if start:
|
|
200
|
-
return f"{name} -> {path} from line {start}"
|
|
201
|
-
return f"{name} -> {path}"
|
|
202
|
-
if name in {"write_file", "create_file", "delete_file", "replace_in_file", "insert_after", "insert_before"}:
|
|
203
|
-
path = payload.get("path") or ""
|
|
204
|
-
return f"{name} -> {path}"
|
|
205
|
-
return f"{name} -> ok"
|
|
206
|
-
|
|
207
|
-
def _print_tool_activity(self, tool_calls: list, tool_results: list) -> None:
|
|
208
|
-
lines = []
|
|
209
|
-
for call, result in zip(tool_calls, tool_results):
|
|
210
|
-
lines.append(f"tool {self._summarize_tool_call(call)} -> {self._summarize_tool_result(result)}")
|
|
211
|
-
if not lines:
|
|
212
|
-
return
|
|
213
|
-
text = Text("\n".join(lines), style=Style(color="bright_black", italic=True))
|
|
214
|
-
renderable = PrefixedRenderable(text, prefix=" ", prefix_style=Style(dim=True), indent=" ", auto_space=False)
|
|
215
|
-
original_console_width = self.console._width
|
|
216
|
-
target_width = max(40, int(self.get_terminal_width() * 0.75))
|
|
217
|
-
self.console.width = target_width
|
|
218
|
-
try:
|
|
219
|
-
self.console.print(renderable, highlight=False, soft_wrap=True)
|
|
220
|
-
self.console.print()
|
|
221
|
-
finally:
|
|
222
|
-
self.console._width = original_console_width
|
|
223
|
-
|
|
224
163
|
|
|
225
164
|
def get_terminal_width(self) -> int:
|
|
226
165
|
"""Get terminal width."""
|
|
@@ -232,95 +171,35 @@ class CortexCLI:
|
|
|
232
171
|
|
|
233
172
|
def get_visible_length(self, text: str) -> int:
|
|
234
173
|
"""Get visible length of text, ignoring ANSI escape codes and accounting for wide characters."""
|
|
235
|
-
|
|
236
|
-
import unicodedata
|
|
237
|
-
|
|
238
|
-
# Remove ANSI escape sequences
|
|
239
|
-
ansi_escape = re.compile(r'\x1b\[[0-9;]*m')
|
|
240
|
-
visible_text = ansi_escape.sub('', text)
|
|
241
|
-
|
|
242
|
-
# Calculate display width accounting for wide/ambiguous characters
|
|
243
|
-
display_width = 0
|
|
244
|
-
for char in visible_text:
|
|
245
|
-
width = unicodedata.east_asian_width(char)
|
|
246
|
-
if width in ('W', 'F'): # Wide or Fullwidth - always 2 columns
|
|
247
|
-
display_width += 2
|
|
248
|
-
elif width == 'A' and char in '●○': # Ambiguous - might be 2 in some terminals
|
|
249
|
-
# For now, treat these as single-width since most Western terminals do
|
|
250
|
-
# But if alignment issues appear with these characters, change to += 2
|
|
251
|
-
display_width += 1
|
|
252
|
-
else:
|
|
253
|
-
display_width += 1
|
|
254
|
-
|
|
255
|
-
return display_width
|
|
174
|
+
return box_rendering.get_visible_length(text)
|
|
256
175
|
|
|
257
176
|
def print_box_line(self, content: str, width: int, align: str = 'left'):
|
|
258
177
|
"""Print a single line in a box with proper padding."""
|
|
259
|
-
|
|
260
|
-
padding = width - visible_len - 2 # -2 for the borders
|
|
261
|
-
|
|
262
|
-
if align == 'center':
|
|
263
|
-
left_pad = padding // 2
|
|
264
|
-
right_pad = padding - left_pad
|
|
265
|
-
print(f"│{' ' * left_pad}{content}{' ' * right_pad}│")
|
|
266
|
-
else: # left align
|
|
267
|
-
print(f"│{content}{' ' * padding}│")
|
|
178
|
+
box_rendering.print_box_line(content, width, align=align)
|
|
268
179
|
|
|
269
180
|
def print_box_header(self, title: str, width: int):
|
|
270
181
|
"""Print a box header with title."""
|
|
271
|
-
|
|
272
|
-
title_with_color = f" \033[96m{title}\033[0m "
|
|
273
|
-
visible_len = self.get_visible_length(title_with_color)
|
|
274
|
-
padding = width - visible_len - 3 # -3 for "╭─" and "╮"
|
|
275
|
-
print(f"╭─{title_with_color}" + "─" * padding + "╮")
|
|
276
|
-
else:
|
|
277
|
-
print("╭" + "─" * (width - 2) + "╮")
|
|
182
|
+
box_rendering.print_box_header(title, width)
|
|
278
183
|
|
|
279
184
|
def print_box_footer(self, width: int):
|
|
280
185
|
"""Print a box footer."""
|
|
281
|
-
|
|
186
|
+
box_rendering.print_box_footer(width)
|
|
282
187
|
|
|
283
188
|
def print_box_separator(self, width: int):
|
|
284
189
|
"""Print a separator line inside a box."""
|
|
285
|
-
|
|
286
|
-
print("├" + "─" * (width - 2) + "┤")
|
|
190
|
+
box_rendering.print_box_separator(width)
|
|
287
191
|
|
|
288
192
|
def print_empty_line(self, width: int):
|
|
289
193
|
"""Print an empty line inside a box."""
|
|
290
|
-
|
|
194
|
+
box_rendering.print_empty_line(width)
|
|
291
195
|
|
|
292
196
|
def create_box(self, lines: List[str], width: Optional[int] = None) -> str:
|
|
293
197
|
"""Create a box with Unicode borders."""
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
top_right = "╮"
|
|
300
|
-
bottom_left = "╰"
|
|
301
|
-
bottom_right = "╯"
|
|
302
|
-
horizontal = "─"
|
|
303
|
-
vertical = "│"
|
|
304
|
-
|
|
305
|
-
# Calculate inner width
|
|
306
|
-
inner_width = width - 4 # Account for borders and padding
|
|
307
|
-
|
|
308
|
-
# Build box
|
|
309
|
-
result = []
|
|
310
|
-
result.append(top_left + horizontal * (width - 2) + top_right)
|
|
311
|
-
|
|
312
|
-
for line in lines:
|
|
313
|
-
# Calculate visible length to handle ANSI codes
|
|
314
|
-
visible_len = self.get_visible_length(line)
|
|
315
|
-
# Calculate padding needed
|
|
316
|
-
padding_needed = inner_width - visible_len
|
|
317
|
-
# Create padded line with correct spacing
|
|
318
|
-
padded = f" {line}{' ' * padding_needed} "
|
|
319
|
-
result.append(vertical + padded + vertical)
|
|
320
|
-
|
|
321
|
-
result.append(bottom_left + horizontal * (width - 2) + bottom_right)
|
|
322
|
-
|
|
323
|
-
return "\n".join(result)
|
|
198
|
+
return box_rendering.create_box(
|
|
199
|
+
lines,
|
|
200
|
+
width=width,
|
|
201
|
+
terminal_width=self.get_terminal_width(),
|
|
202
|
+
)
|
|
324
203
|
|
|
325
204
|
def print_welcome(self):
|
|
326
205
|
"""Print welcome message in Claude Code style."""
|
|
@@ -480,543 +359,48 @@ class CortexCLI:
|
|
|
480
359
|
|
|
481
360
|
def handle_command(self, command: str) -> bool:
|
|
482
361
|
"""Handle slash commands. Returns False to exit."""
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
self.
|
|
489
|
-
|
|
490
|
-
self.
|
|
491
|
-
|
|
492
|
-
self.
|
|
493
|
-
|
|
494
|
-
self.
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
self.show_status()
|
|
499
|
-
elif cmd == "/gpu":
|
|
500
|
-
self.show_gpu_status()
|
|
501
|
-
elif cmd == "/benchmark":
|
|
502
|
-
self.run_benchmark()
|
|
503
|
-
elif cmd == "/template":
|
|
504
|
-
self.manage_template(args)
|
|
505
|
-
elif cmd == "/finetune":
|
|
506
|
-
self.run_finetune()
|
|
507
|
-
elif cmd == "/login":
|
|
508
|
-
self.hf_login()
|
|
509
|
-
elif cmd in ["/quit", "/exit"]:
|
|
510
|
-
return False
|
|
511
|
-
elif cmd == "?":
|
|
512
|
-
self.show_shortcuts()
|
|
513
|
-
else:
|
|
514
|
-
print(f"\033[31mUnknown command: {cmd}\033[0m")
|
|
515
|
-
print("\033[2mType /help for available commands\033[0m")
|
|
516
|
-
|
|
517
|
-
return True
|
|
362
|
+
handlers = CommandHandlers(
|
|
363
|
+
show_help=self.show_help,
|
|
364
|
+
manage_models=self.manage_models,
|
|
365
|
+
download_model=self.download_model,
|
|
366
|
+
clear_conversation=self.clear_conversation,
|
|
367
|
+
save_conversation=self.save_conversation,
|
|
368
|
+
show_status=self.show_status,
|
|
369
|
+
show_gpu_status=self.show_gpu_status,
|
|
370
|
+
run_benchmark=self.run_benchmark,
|
|
371
|
+
manage_template=self.manage_template,
|
|
372
|
+
run_finetune=self.run_finetune,
|
|
373
|
+
hf_login=self.hf_login,
|
|
374
|
+
show_shortcuts=self.show_shortcuts,
|
|
375
|
+
)
|
|
376
|
+
return dispatch_command(command, handlers)
|
|
518
377
|
|
|
519
378
|
def show_shortcuts(self):
|
|
520
379
|
"""Show keyboard shortcuts."""
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
self.print_empty_line(width)
|
|
526
|
-
|
|
527
|
-
shortcuts = [
|
|
528
|
-
("Ctrl+C", "Cancel current generation"),
|
|
529
|
-
("Ctrl+D", "Exit Cortex"),
|
|
530
|
-
("Tab", "Auto-complete commands"),
|
|
531
|
-
("/help", "Show all commands"),
|
|
532
|
-
("?", "Show this help")
|
|
533
|
-
]
|
|
534
|
-
|
|
535
|
-
for key, desc in shortcuts:
|
|
536
|
-
# Color the key/command in yellow
|
|
537
|
-
colored_key = f"\033[93m{key}\033[0m"
|
|
538
|
-
# Calculate padding
|
|
539
|
-
key_width = len(key)
|
|
540
|
-
padding = " " * (12 - key_width) # Align descriptions at column 14
|
|
541
|
-
line = f" {colored_key}{padding}{desc}"
|
|
542
|
-
self.print_box_line(line, width)
|
|
543
|
-
|
|
544
|
-
self.print_empty_line(width)
|
|
545
|
-
self.print_box_footer(width)
|
|
380
|
+
help_ui.show_shortcuts(
|
|
381
|
+
terminal_width=self.get_terminal_width(),
|
|
382
|
+
box=self,
|
|
383
|
+
)
|
|
546
384
|
|
|
547
385
|
def show_help(self):
|
|
548
386
|
"""Show available commands."""
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
self.print_empty_line(width)
|
|
554
|
-
|
|
555
|
-
commands = [
|
|
556
|
-
("/help", "Show this help message"),
|
|
557
|
-
("/status", "Show current setup and GPU info"),
|
|
558
|
-
("/download", "Download a model from HuggingFace"),
|
|
559
|
-
("/model", "Manage models (load/delete/info)"),
|
|
560
|
-
("/finetune", "Fine-tune a model interactively"),
|
|
561
|
-
("/clear", "Clear conversation history"),
|
|
562
|
-
("/save", "Save current conversation"),
|
|
563
|
-
("/template", "Manage chat templates"),
|
|
564
|
-
("/gpu", "Show GPU status"),
|
|
565
|
-
("/benchmark", "Run performance benchmark"),
|
|
566
|
-
("/login", "Login to HuggingFace for gated models"),
|
|
567
|
-
("/quit", "Exit Cortex")
|
|
568
|
-
]
|
|
569
|
-
|
|
570
|
-
for cmd, desc in commands:
|
|
571
|
-
# Format: " /command description"
|
|
572
|
-
# Color the command in yellow
|
|
573
|
-
colored_cmd = f"\033[93m{cmd}\033[0m"
|
|
574
|
-
# Calculate padding between command and description
|
|
575
|
-
cmd_width = len(cmd)
|
|
576
|
-
padding = " " * (12 - cmd_width) # Align descriptions at column 14
|
|
577
|
-
line = f" {colored_cmd}{padding}{desc}"
|
|
578
|
-
self.print_box_line(line, width)
|
|
579
|
-
|
|
580
|
-
self.print_empty_line(width)
|
|
581
|
-
self.print_box_footer(width)
|
|
387
|
+
help_ui.show_help(
|
|
388
|
+
terminal_width=self.get_terminal_width(),
|
|
389
|
+
box=self,
|
|
390
|
+
)
|
|
582
391
|
|
|
583
392
|
def download_model(self, args: str = ""):
|
|
584
393
|
"""Download a model from HuggingFace."""
|
|
585
|
-
|
|
586
|
-
# Direct download with provided args
|
|
587
|
-
parts = args.split()
|
|
588
|
-
repo_id = parts[0]
|
|
589
|
-
filename = parts[1] if len(parts) > 1 else None
|
|
590
|
-
else:
|
|
591
|
-
# Interactive mode with numbered options
|
|
592
|
-
width = min(self.get_terminal_width() - 2, 70)
|
|
593
|
-
|
|
594
|
-
# Create download UI box using helper methods
|
|
595
|
-
print()
|
|
596
|
-
self.print_box_header("Model Manager", width)
|
|
597
|
-
self.print_empty_line(width)
|
|
598
|
-
|
|
599
|
-
option_num = 1
|
|
600
|
-
available = self.model_manager.discover_available_models()
|
|
601
|
-
|
|
602
|
-
# Show already downloaded models with numbers to load
|
|
603
|
-
if available:
|
|
604
|
-
self.print_box_line(" \033[96mLoad Existing Model:\033[0m", width)
|
|
605
|
-
self.print_empty_line(width)
|
|
606
|
-
|
|
607
|
-
for model in available[:5]: # Show up to 5 downloaded models
|
|
608
|
-
name = model['name'][:width-15]
|
|
609
|
-
size = f"{model['size_gb']:.1f}GB"
|
|
610
|
-
line = f" \033[93m[{option_num}]\033[0m {name} \033[2m({size})\033[0m"
|
|
611
|
-
self.print_box_line(line, width)
|
|
612
|
-
option_num += 1
|
|
613
|
-
|
|
614
|
-
if len(available) > 5:
|
|
615
|
-
line = f" \033[93m[{option_num}]\033[0m \033[2mShow all {len(available)} models...\033[0m"
|
|
616
|
-
self.print_box_line(line, width)
|
|
617
|
-
option_num += 1
|
|
618
|
-
|
|
619
|
-
self.print_empty_line(width)
|
|
620
|
-
self.print_box_separator(width)
|
|
621
|
-
self.print_empty_line(width)
|
|
622
|
-
|
|
623
|
-
# Download new model options
|
|
624
|
-
self.print_box_line(" \033[96mDownload New Model:\033[0m", width)
|
|
625
|
-
self.print_empty_line(width)
|
|
626
|
-
|
|
627
|
-
# Show format in dimmed color
|
|
628
|
-
line = f" \033[2mEnter repository ID (e.g., meta-llama/Llama-3.2-3B)\033[0m"
|
|
629
|
-
self.print_box_line(line, width)
|
|
630
|
-
|
|
631
|
-
self.print_empty_line(width)
|
|
632
|
-
self.print_box_footer(width)
|
|
633
|
-
|
|
634
|
-
# Get user choice
|
|
635
|
-
choice = self.get_input_with_escape("Choice or repo ID")
|
|
636
|
-
|
|
637
|
-
if choice is None:
|
|
638
|
-
return
|
|
639
|
-
|
|
640
|
-
try:
|
|
641
|
-
choice_num = int(choice)
|
|
642
|
-
|
|
643
|
-
# Load existing model
|
|
644
|
-
if available and choice_num <= len(available[:5]):
|
|
645
|
-
model = available[choice_num - 1]
|
|
646
|
-
print(f"\n\033[96m⚡\033[0m Loading {model['name']}...")
|
|
647
|
-
success, msg = self.model_manager.load_model(model['path'])
|
|
648
|
-
if success:
|
|
649
|
-
print(f"\033[32m✓\033[0m Model loaded successfully!")
|
|
650
|
-
|
|
651
|
-
# Show template information
|
|
652
|
-
model_info = self.model_manager.get_current_model()
|
|
653
|
-
if model_info:
|
|
654
|
-
tokenizer = self.model_manager.tokenizers.get(model_info.name)
|
|
655
|
-
profile = self.template_registry.setup_model(
|
|
656
|
-
model_info.name,
|
|
657
|
-
tokenizer=tokenizer,
|
|
658
|
-
interactive=False
|
|
659
|
-
)
|
|
660
|
-
if profile:
|
|
661
|
-
template_name = profile.config.name
|
|
662
|
-
print(f" \033[2m• Template: {template_name}\033[0m")
|
|
663
|
-
else:
|
|
664
|
-
print(f"\033[31m✗\033[0m Failed to load: {msg}")
|
|
665
|
-
return
|
|
666
|
-
|
|
667
|
-
# Show all models
|
|
668
|
-
elif available and choice_num == len(available[:5]) + 1 and len(available) > 5:
|
|
669
|
-
print()
|
|
670
|
-
self.manage_models() # Use the unified model manager
|
|
671
|
-
return
|
|
672
|
-
else:
|
|
673
|
-
print(f"\033[31m✗ Invalid choice\033[0m")
|
|
674
|
-
return
|
|
675
|
-
|
|
676
|
-
except ValueError:
|
|
677
|
-
# Not a number, treat as repository ID
|
|
678
|
-
repo_id = choice
|
|
679
|
-
# Check if filename is provided
|
|
680
|
-
parts = repo_id.split()
|
|
681
|
-
repo_id = parts[0]
|
|
682
|
-
filename = parts[1] if len(parts) > 1 else None
|
|
683
|
-
|
|
684
|
-
# Validate format
|
|
685
|
-
if '/' not in repo_id:
|
|
686
|
-
print(f"\n\033[31m✗ Invalid format. Expected: username/model-name\033[0m")
|
|
687
|
-
return
|
|
688
|
-
|
|
689
|
-
# Show download starting
|
|
690
|
-
print(f"\n\033[96m⬇\033[0m Downloading: \033[93m{repo_id}\033[0m")
|
|
691
|
-
if filename:
|
|
692
|
-
print(f" File: \033[93m{filename}\033[0m")
|
|
693
|
-
print()
|
|
694
|
-
|
|
695
|
-
success, message, path = self.model_downloader.download_model(repo_id, filename)
|
|
696
|
-
|
|
697
|
-
if success:
|
|
698
|
-
# Success message in a nice box
|
|
699
|
-
width = min(self.get_terminal_width() - 2, 70)
|
|
700
|
-
print()
|
|
701
|
-
# Create a custom header with green color for success
|
|
702
|
-
title_with_color = " \033[32mDownload Complete\033[0m "
|
|
703
|
-
visible_len = self.get_visible_length(title_with_color)
|
|
704
|
-
padding = width - visible_len - 3 # -3 for "╭─" and "╮"
|
|
705
|
-
print(f"╭─{title_with_color}" + "─" * padding + "╮")
|
|
706
|
-
self.print_box_line(" \033[32m✓\033[0m Model downloaded successfully!", width)
|
|
707
|
-
|
|
708
|
-
location_str = str(path)[:width-13]
|
|
709
|
-
self.print_box_line(f" \033[2mLocation: {location_str}\033[0m", width)
|
|
710
|
-
self.print_empty_line(width)
|
|
711
|
-
self.print_box_line(" \033[96mLoad this model now?\033[0m", width)
|
|
712
|
-
self.print_box_line(" \033[93m[Y]es\033[0m \033[2m[N]o\033[0m", width)
|
|
713
|
-
self.print_box_footer(width)
|
|
714
|
-
|
|
715
|
-
try:
|
|
716
|
-
choice = input("\n\033[96m▶\033[0m Choice (\033[93my\033[0m/\033[2mn\033[0m): ").strip().lower()
|
|
717
|
-
if choice in ['y', 'yes']:
|
|
718
|
-
print(f"\n\033[96m⚡\033[0m Loading model...")
|
|
719
|
-
load_success, load_msg = self.model_manager.load_model(str(path))
|
|
720
|
-
if load_success:
|
|
721
|
-
print(f"\033[32m✓\033[0m Model loaded successfully!")
|
|
722
|
-
else:
|
|
723
|
-
print(f"\033[31m✗\033[0m Failed to load: {load_msg}")
|
|
724
|
-
except KeyboardInterrupt:
|
|
725
|
-
print("\n\033[2mCancelled\033[0m")
|
|
726
|
-
else:
|
|
727
|
-
print(f"\n\033[31m✗\033[0m {message}")
|
|
394
|
+
model_ui.download_model(cli=self, args=args)
|
|
728
395
|
|
|
729
396
|
def hf_login(self):
|
|
730
397
|
"""Login to HuggingFace for accessing gated models."""
|
|
731
|
-
|
|
732
|
-
from huggingface_hub import login, HfApi
|
|
733
|
-
from huggingface_hub.utils import HfHubHTTPError
|
|
734
|
-
except ImportError:
|
|
735
|
-
print("\n\033[31m✗\033[0m huggingface-hub not installed. Install with: pip install huggingface-hub")
|
|
736
|
-
return
|
|
737
|
-
|
|
738
|
-
width = min(self.get_terminal_width() - 2, 70)
|
|
739
|
-
|
|
740
|
-
# Create login UI box
|
|
741
|
-
print()
|
|
742
|
-
self.print_box_header("HuggingFace Login", width)
|
|
743
|
-
self.print_empty_line(width)
|
|
744
|
-
|
|
745
|
-
# Check if already logged in
|
|
746
|
-
try:
|
|
747
|
-
api = HfApi()
|
|
748
|
-
user_info = api.whoami()
|
|
749
|
-
if user_info:
|
|
750
|
-
username = user_info.get('name', 'Unknown')
|
|
751
|
-
self.print_box_line(f" \033[32m✓\033[0m Already logged in as: \033[93m{username}\033[0m", width)
|
|
752
|
-
self.print_empty_line(width)
|
|
753
|
-
self.print_box_line(" \033[96mOptions:\033[0m", width)
|
|
754
|
-
self.print_box_line(" \033[93m[1]\033[0m Login with new token", width)
|
|
755
|
-
self.print_box_line(" \033[93m[2]\033[0m Logout", width)
|
|
756
|
-
self.print_box_line(" \033[93m[3]\033[0m Cancel", width)
|
|
757
|
-
self.print_box_footer(width)
|
|
758
|
-
|
|
759
|
-
choice = self.get_input_with_escape("Select option (1-3)")
|
|
760
|
-
if choice == '1':
|
|
761
|
-
# Continue to login flow
|
|
762
|
-
pass
|
|
763
|
-
elif choice == '2':
|
|
764
|
-
# Logout
|
|
765
|
-
from huggingface_hub import logout
|
|
766
|
-
logout()
|
|
767
|
-
print("\n\033[32m✓\033[0m Successfully logged out from HuggingFace")
|
|
768
|
-
return
|
|
769
|
-
else:
|
|
770
|
-
return
|
|
771
|
-
except:
|
|
772
|
-
# Not logged in, continue to login flow
|
|
773
|
-
pass
|
|
774
|
-
|
|
775
|
-
# Show login instructions
|
|
776
|
-
print()
|
|
777
|
-
self.print_box_header("HuggingFace Login", width)
|
|
778
|
-
self.print_empty_line(width)
|
|
779
|
-
self.print_box_line(" To access gated models, you need a HuggingFace token.", width)
|
|
780
|
-
self.print_empty_line(width)
|
|
781
|
-
self.print_box_line(" \033[96m1.\033[0m Get your token from:", width)
|
|
782
|
-
self.print_box_line(" \033[93mhttps://huggingface.co/settings/tokens\033[0m", width)
|
|
783
|
-
self.print_empty_line(width)
|
|
784
|
-
self.print_box_line(" \033[96m2.\033[0m Create a token with \033[93mread\033[0m permissions", width)
|
|
785
|
-
self.print_empty_line(width)
|
|
786
|
-
self.print_box_line(" \033[96m3.\033[0m Paste the token below (input hidden)", width)
|
|
787
|
-
self.print_box_footer(width)
|
|
788
|
-
|
|
789
|
-
# Get token with hidden input
|
|
790
|
-
print()
|
|
791
|
-
token = getpass.getpass("\033[96m▶\033[0m Enter token \033[2m(or press Enter to cancel)\033[0m: ")
|
|
792
|
-
|
|
793
|
-
if not token:
|
|
794
|
-
print("\033[2mCancelled\033[0m")
|
|
795
|
-
return
|
|
796
|
-
|
|
797
|
-
# Try to login
|
|
798
|
-
print("\n\033[96m⚡\033[0m Authenticating with HuggingFace...")
|
|
799
|
-
try:
|
|
800
|
-
login(token=token, add_to_git_credential=True)
|
|
801
|
-
|
|
802
|
-
# Verify login
|
|
803
|
-
api = HfApi()
|
|
804
|
-
user_info = api.whoami()
|
|
805
|
-
username = user_info.get('name', 'Unknown')
|
|
806
|
-
|
|
807
|
-
print(f"\033[32m✓\033[0m Successfully logged in as: \033[93m{username}\033[0m")
|
|
808
|
-
print("\033[2m Token saved for future use\033[0m")
|
|
809
|
-
print("\033[2m You can now download gated models\033[0m")
|
|
810
|
-
|
|
811
|
-
except HfHubHTTPError as e:
|
|
812
|
-
if "Invalid token" in str(e):
|
|
813
|
-
print("\033[31m✗\033[0m Invalid token. Please check your token and try again.")
|
|
814
|
-
else:
|
|
815
|
-
print(f"\033[31m✗\033[0m Login failed: {str(e)}")
|
|
816
|
-
except Exception as e:
|
|
817
|
-
print(f"\033[31m✗\033[0m Login failed: {str(e)}")
|
|
398
|
+
model_ui.hf_login(cli=self)
|
|
818
399
|
|
|
819
400
|
def manage_models(self, args: str = ""):
|
|
820
401
|
"""Interactive model manager - simplified for better UX.
|
|
821
402
|
If args provided, tries to load that model directly."""
|
|
822
|
-
|
|
823
|
-
# If args provided, try direct load
|
|
824
|
-
if args:
|
|
825
|
-
print(f"\033[96m⚡\033[0m Loading model: \033[93m{args}\033[0m...")
|
|
826
|
-
success, message = self.model_manager.load_model(args)
|
|
827
|
-
if success:
|
|
828
|
-
print(f"\033[32m✓\033[0m Model loaded successfully")
|
|
829
|
-
else:
|
|
830
|
-
print(f"\033[31m✗\033[0m Failed: {message}", file=sys.stderr)
|
|
831
|
-
return
|
|
832
|
-
|
|
833
|
-
# Interactive mode
|
|
834
|
-
available = self.model_manager.discover_available_models()
|
|
835
|
-
|
|
836
|
-
if not available:
|
|
837
|
-
print(f"\n\033[31m✗\033[0m No models found in \033[2m{self.config.model.model_path}\033[0m")
|
|
838
|
-
print("Use \033[93m/download\033[0m to download models from HuggingFace")
|
|
839
|
-
return
|
|
840
|
-
|
|
841
|
-
width = min(self.get_terminal_width() - 2, 70)
|
|
842
|
-
|
|
843
|
-
# Build the model manager dialog using helper methods
|
|
844
|
-
print()
|
|
845
|
-
self.print_box_header("Select Model", width)
|
|
846
|
-
self.print_empty_line(width)
|
|
847
|
-
|
|
848
|
-
# List models with numbers - simplified view
|
|
849
|
-
for i, model in enumerate(available, 1):
|
|
850
|
-
# Model name and size
|
|
851
|
-
name = model['name'][:width-30]
|
|
852
|
-
size = f"{model['size_gb']:.1f}GB"
|
|
853
|
-
|
|
854
|
-
# Check if currently loaded (handle both original name and MLX cached name)
|
|
855
|
-
current_model = self.model_manager.current_model or ""
|
|
856
|
-
is_current = (model['name'] == current_model or
|
|
857
|
-
model.get('mlx_name') == current_model or
|
|
858
|
-
current_model.endswith(model['name']))
|
|
859
|
-
|
|
860
|
-
# Build status indicators
|
|
861
|
-
status_parts = []
|
|
862
|
-
if model.get('mlx_optimized'):
|
|
863
|
-
status_parts.append("\033[36m⚡ MLX\033[0m") # Cyan lightning for MLX
|
|
864
|
-
elif model.get('mlx_available'):
|
|
865
|
-
status_parts.append("\033[2m○ MLX ready\033[0m") # Dim circle for can be optimized
|
|
866
|
-
|
|
867
|
-
if is_current:
|
|
868
|
-
status_parts.append("\033[32m● loaded\033[0m")
|
|
869
|
-
|
|
870
|
-
status = " ".join(status_parts) if status_parts else ""
|
|
871
|
-
|
|
872
|
-
# Format the line
|
|
873
|
-
if model.get('mlx_optimized'):
|
|
874
|
-
# Show optimized model with special formatting
|
|
875
|
-
line = f" \033[93m[{i}]\033[0m {name} \033[2m({size})\033[0m {status}"
|
|
876
|
-
else:
|
|
877
|
-
line = f" \033[93m[{i}]\033[0m {name} \033[2m({size})\033[0m {status}"
|
|
878
|
-
|
|
879
|
-
self.print_box_line(line, width)
|
|
880
|
-
|
|
881
|
-
self.print_empty_line(width)
|
|
882
|
-
self.print_box_separator(width)
|
|
883
|
-
self.print_empty_line(width)
|
|
884
|
-
|
|
885
|
-
# Additional options
|
|
886
|
-
self.print_box_line(f" \033[93m[D]\033[0m Delete a model", width)
|
|
887
|
-
self.print_box_line(f" \033[93m[N]\033[0m Download new model", width)
|
|
888
|
-
|
|
889
|
-
self.print_empty_line(width)
|
|
890
|
-
self.print_box_footer(width)
|
|
891
|
-
|
|
892
|
-
# Get user choice
|
|
893
|
-
choice = self.get_input_with_escape(f"Select model to load (1-{len(available)}) or option")
|
|
894
|
-
|
|
895
|
-
if choice is None:
|
|
896
|
-
return
|
|
897
|
-
|
|
898
|
-
choice = choice.lower()
|
|
899
|
-
|
|
900
|
-
if choice == 'n':
|
|
901
|
-
self.download_model()
|
|
902
|
-
return
|
|
903
|
-
elif choice == 'd':
|
|
904
|
-
# Delete mode - show models again for deletion
|
|
905
|
-
del_choice = self.get_input_with_escape(f"Select model to delete (1-{len(available)})")
|
|
906
|
-
if del_choice is None:
|
|
907
|
-
return
|
|
908
|
-
try:
|
|
909
|
-
model_idx = int(del_choice) - 1
|
|
910
|
-
if 0 <= model_idx < len(available):
|
|
911
|
-
selected_model = available[model_idx]
|
|
912
|
-
print(f"\n\033[31m⚠\033[0m Delete \033[93m{selected_model['name']}\033[0m?")
|
|
913
|
-
print(f" This will free \033[93m{selected_model['size_gb']:.1f}GB\033[0m of disk space.")
|
|
914
|
-
confirm = self.get_input_with_escape("Confirm deletion (\033[93my\033[0m/\033[2mN\033[0m)")
|
|
915
|
-
if confirm is None:
|
|
916
|
-
return
|
|
917
|
-
confirm = confirm.lower()
|
|
918
|
-
|
|
919
|
-
if confirm == 'y':
|
|
920
|
-
# Delete the model
|
|
921
|
-
model_path = Path(selected_model['path'])
|
|
922
|
-
try:
|
|
923
|
-
if model_path.is_file():
|
|
924
|
-
model_path.unlink()
|
|
925
|
-
elif model_path.is_dir():
|
|
926
|
-
import shutil
|
|
927
|
-
shutil.rmtree(model_path)
|
|
928
|
-
|
|
929
|
-
print(f"\033[32m✓\033[0m Model deleted successfully. Freed \033[93m{selected_model['size_gb']:.1f}GB\033[0m.")
|
|
930
|
-
|
|
931
|
-
# If this was the current model, clear it
|
|
932
|
-
if selected_model['name'] == self.model_manager.current_model:
|
|
933
|
-
self.model_manager.current_model = None
|
|
934
|
-
print("\033[2mNote: Deleted model was currently loaded. Load another model to continue.\033[0m")
|
|
935
|
-
except Exception as e:
|
|
936
|
-
print(f"\033[31m✗\033[0m Failed to delete: {str(e)}")
|
|
937
|
-
else:
|
|
938
|
-
print("\033[2mDeletion cancelled.\033[0m")
|
|
939
|
-
except (ValueError, IndexError):
|
|
940
|
-
print("\033[31m✗\033[0m Invalid selection")
|
|
941
|
-
return
|
|
942
|
-
|
|
943
|
-
try:
|
|
944
|
-
model_idx = int(choice) - 1
|
|
945
|
-
if 0 <= model_idx < len(available):
|
|
946
|
-
selected_model = available[model_idx]
|
|
947
|
-
|
|
948
|
-
# If already loaded, inform user
|
|
949
|
-
if selected_model['name'] == self.model_manager.current_model:
|
|
950
|
-
print(f"\033[2mModel already loaded: {selected_model['name']}\033[0m")
|
|
951
|
-
return
|
|
952
|
-
|
|
953
|
-
# Load model directly - no second prompt
|
|
954
|
-
print(f"\n\033[96m⚡\033[0m Loading \033[93m{selected_model['name']}\033[0m...")
|
|
955
|
-
success, message = self.model_manager.load_model(selected_model['path'])
|
|
956
|
-
if success:
|
|
957
|
-
# Show the same detailed info as startup
|
|
958
|
-
model_info = self.model_manager.get_current_model()
|
|
959
|
-
if model_info:
|
|
960
|
-
# Determine quantization type from name or model info
|
|
961
|
-
model_name = model_info.name
|
|
962
|
-
if "_4bit" in model_name or "4bit" in str(model_info.quantization):
|
|
963
|
-
quant_type = "4-bit"
|
|
964
|
-
elif "_5bit" in model_name or "5bit" in str(model_info.quantization):
|
|
965
|
-
quant_type = "5-bit"
|
|
966
|
-
elif "_8bit" in model_name or "8bit" in str(model_info.quantization):
|
|
967
|
-
quant_type = "8-bit"
|
|
968
|
-
else:
|
|
969
|
-
quant_type = "" # Don't duplicate "quantized"
|
|
970
|
-
|
|
971
|
-
# Clean model name for display
|
|
972
|
-
clean_name = selected_model['name']
|
|
973
|
-
if clean_name.startswith("_Users_"):
|
|
974
|
-
# Extract just the model name from the path
|
|
975
|
-
parts = clean_name.split("_")
|
|
976
|
-
for i, part in enumerate(parts):
|
|
977
|
-
if "models" in part:
|
|
978
|
-
clean_name = "_".join(parts[i+1:])
|
|
979
|
-
break
|
|
980
|
-
clean_name = clean_name.replace("_4bit", "").replace("_5bit", "").replace("_8bit", "")
|
|
981
|
-
|
|
982
|
-
# Format the model format nicely
|
|
983
|
-
format_display = model_info.format.value
|
|
984
|
-
if format_display.lower() == "mlx":
|
|
985
|
-
format_display = "MLX (Apple Silicon optimized)"
|
|
986
|
-
elif format_display.lower() == "gguf":
|
|
987
|
-
format_display = "GGUF" # Remove redundant "(quantized)"
|
|
988
|
-
elif format_display.lower() == "safetensors":
|
|
989
|
-
format_display = "SafeTensors"
|
|
990
|
-
elif format_display.lower() == "pytorch":
|
|
991
|
-
format_display = "PyTorch"
|
|
992
|
-
|
|
993
|
-
print(f" \033[32m✓\033[0m Model ready: \033[93m{clean_name}\033[0m")
|
|
994
|
-
# Show quantization info only if we have specific type
|
|
995
|
-
if quant_type:
|
|
996
|
-
print(f" \033[2m• Size: {model_info.size_gb:.1f}GB ({quant_type} quantized)\033[0m")
|
|
997
|
-
else:
|
|
998
|
-
print(f" \033[2m• Size: {model_info.size_gb:.1f}GB (quantized)\033[0m")
|
|
999
|
-
print(f" \033[2m• Optimizations: AMX acceleration, operation fusion\033[0m")
|
|
1000
|
-
print(f" \033[2m• Format: {format_display}\033[0m")
|
|
1001
|
-
|
|
1002
|
-
# Show template information
|
|
1003
|
-
tokenizer = self.model_manager.tokenizers.get(model_info.name)
|
|
1004
|
-
profile = self.template_registry.setup_model(
|
|
1005
|
-
model_info.name,
|
|
1006
|
-
tokenizer=tokenizer,
|
|
1007
|
-
interactive=False
|
|
1008
|
-
)
|
|
1009
|
-
if profile:
|
|
1010
|
-
template_name = profile.config.name
|
|
1011
|
-
print(f" \033[2m• Template: {template_name}\033[0m")
|
|
1012
|
-
else:
|
|
1013
|
-
print(f"\033[32m✓\033[0m Model loaded successfully!")
|
|
1014
|
-
else:
|
|
1015
|
-
print(f"\033[31m✗\033[0m Failed to load: {message}")
|
|
1016
|
-
else:
|
|
1017
|
-
print("\033[31m✗\033[0m Invalid selection")
|
|
1018
|
-
except ValueError:
|
|
1019
|
-
print("\033[31m✗\033[0m Invalid choice")
|
|
403
|
+
model_ui.manage_models(cli=self, args=args)
|
|
1020
404
|
|
|
1021
405
|
def clear_conversation(self):
|
|
1022
406
|
"""Clear conversation history."""
|
|
@@ -1039,76 +423,15 @@ class CortexCLI:
|
|
|
1039
423
|
|
|
1040
424
|
def show_status(self):
|
|
1041
425
|
"""Show current setup status."""
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
width = min(self.get_terminal_width() - 2, 70) # Consistent width with other dialogs
|
|
1045
|
-
|
|
1046
|
-
print()
|
|
1047
|
-
self.print_box_header("Current Setup", width)
|
|
1048
|
-
self.print_empty_line(width)
|
|
1049
|
-
|
|
1050
|
-
# GPU Info
|
|
1051
|
-
if gpu_info:
|
|
1052
|
-
self.print_box_line(f" \033[2mGPU:\033[0m \033[93m{gpu_info.chip_name}\033[0m", width)
|
|
1053
|
-
self.print_box_line(f" \033[2mCores:\033[0m \033[93m{gpu_info.gpu_cores}\033[0m", width)
|
|
1054
|
-
|
|
1055
|
-
mem_gb = gpu_info.total_memory / (1024**3)
|
|
1056
|
-
mem_str = f"{mem_gb:.1f} GB"
|
|
1057
|
-
self.print_box_line(f" \033[2mMemory:\033[0m \033[93m{mem_str}\033[0m", width)
|
|
1058
|
-
|
|
1059
|
-
# Model Info
|
|
1060
|
-
if self.model_manager.current_model:
|
|
1061
|
-
model_info = self.model_manager.get_current_model()
|
|
1062
|
-
if model_info:
|
|
1063
|
-
self.print_box_line(f" \033[2mModel:\033[0m \033[93m{model_info.name[:43]}\033[0m", width)
|
|
1064
|
-
|
|
1065
|
-
# Template info
|
|
1066
|
-
tokenizer = self.model_manager.tokenizers.get(model_info.name)
|
|
1067
|
-
profile = self.template_registry.get_template(model_info.name)
|
|
1068
|
-
if profile:
|
|
1069
|
-
template_name = profile.config.name
|
|
1070
|
-
self.print_box_line(f" \033[2mTemplate:\033[0m \033[93m{template_name}\033[0m", width)
|
|
1071
|
-
else:
|
|
1072
|
-
self.print_box_line(f" \033[2mModel:\033[0m \033[31mNone loaded\033[0m", width)
|
|
1073
|
-
|
|
1074
|
-
self.print_empty_line(width)
|
|
1075
|
-
self.print_box_footer(width)
|
|
426
|
+
status_ui.show_status(cli=self)
|
|
1076
427
|
|
|
1077
428
|
def show_gpu_status(self):
|
|
1078
429
|
"""Show GPU status."""
|
|
1079
|
-
|
|
1080
|
-
if gpu_info:
|
|
1081
|
-
print(f"\n\033[96mGPU Information:\033[0m")
|
|
1082
|
-
print(f" Chip: \033[93m{gpu_info.chip_name}\033[0m")
|
|
1083
|
-
print(f" GPU Cores: \033[93m{gpu_info.gpu_cores}\033[0m")
|
|
1084
|
-
print(f" Total Memory: \033[93m{gpu_info.total_memory / (1024**3):.1f} GB\033[0m")
|
|
1085
|
-
print(f" Available Memory: \033[93m{gpu_info.available_memory / (1024**3):.1f} GB\033[0m")
|
|
1086
|
-
print(f" Metal Support: {'\033[32mYes\033[0m' if gpu_info.has_metal else '\033[31mNo\033[0m'}")
|
|
1087
|
-
print(f" MPS Support: {'\033[32mYes\033[0m' if gpu_info.has_mps else '\033[31mNo\033[0m'}")
|
|
1088
|
-
|
|
1089
|
-
memory_status = self.model_manager.get_memory_status()
|
|
1090
|
-
print(f"\n\033[96mMemory Status:\033[0m")
|
|
1091
|
-
print(f" Available: \033[93m{memory_status['available_gb']:.1f} GB\033[0m")
|
|
1092
|
-
print(f" Models Loaded: \033[93m{memory_status['models_loaded']}\033[0m")
|
|
1093
|
-
print(f" Model Memory: \033[93m{memory_status['model_memory_gb']:.1f} GB\033[0m")
|
|
430
|
+
status_ui.show_gpu_status(cli=self)
|
|
1094
431
|
|
|
1095
432
|
def run_benchmark(self):
|
|
1096
433
|
"""Run performance benchmark."""
|
|
1097
|
-
|
|
1098
|
-
print("\033[31m✗\033[0m No model loaded.")
|
|
1099
|
-
return
|
|
1100
|
-
|
|
1101
|
-
print("\033[96m⚡\033[0m Running benchmark (100 tokens)...")
|
|
1102
|
-
metrics = self.inference_engine.benchmark()
|
|
1103
|
-
|
|
1104
|
-
if metrics:
|
|
1105
|
-
print(f"\n\033[96mBenchmark Results:\033[0m")
|
|
1106
|
-
print(f" Tokens Generated: \033[93m{metrics.tokens_generated}\033[0m")
|
|
1107
|
-
print(f" Time: \033[93m{metrics.time_elapsed:.2f}s\033[0m")
|
|
1108
|
-
print(f" Tokens/Second: \033[93m{metrics.tokens_per_second:.1f}\033[0m")
|
|
1109
|
-
print(f" First Token: \033[93m{metrics.first_token_latency:.3f}s\033[0m")
|
|
1110
|
-
print(f" GPU Usage: \033[93m{metrics.gpu_utilization:.1f}%\033[0m")
|
|
1111
|
-
print(f" Memory: \033[93m{metrics.memory_used_gb:.1f}GB\033[0m")
|
|
434
|
+
status_ui.run_benchmark(cli=self)
|
|
1112
435
|
|
|
1113
436
|
def manage_template(self, args: str = ""):
|
|
1114
437
|
"""Manage template configuration for the current model."""
|
|
@@ -1329,7 +652,7 @@ class CortexCLI:
|
|
|
1329
652
|
|
|
1330
653
|
if tool_calls:
|
|
1331
654
|
tool_results = self.tool_runner.run_calls(tool_calls)
|
|
1332
|
-
self.
|
|
655
|
+
print_tool_activity(self.console, tool_calls, tool_results, self.get_terminal_width())
|
|
1333
656
|
self.conversation_manager.add_message(
|
|
1334
657
|
MessageRole.SYSTEM,
|
|
1335
658
|
tool_protocol.format_tool_results(tool_results)
|
|
@@ -1382,351 +705,22 @@ class CortexCLI:
|
|
|
1382
705
|
|
|
1383
706
|
def _format_prompt_with_chat_template(self, user_input: str, include_user: bool = True) -> str:
|
|
1384
707
|
"""Format the prompt with appropriate chat template for the model."""
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
messages = []
|
|
1394
|
-
|
|
1395
|
-
# Add conversation history if exists
|
|
1396
|
-
if conversation and conversation.messages:
|
|
1397
|
-
# Include recent context (last few messages)
|
|
1398
|
-
context_messages = conversation.messages[-10:] # Last 10 messages for context
|
|
1399
|
-
for msg in context_messages:
|
|
1400
|
-
messages.append({
|
|
1401
|
-
"role": msg.role.value,
|
|
1402
|
-
"content": msg.content
|
|
1403
|
-
})
|
|
1404
|
-
|
|
1405
|
-
# Add current user message
|
|
1406
|
-
if include_user:
|
|
1407
|
-
messages.append({
|
|
1408
|
-
"role": "user",
|
|
1409
|
-
"content": user_input
|
|
1410
|
-
})
|
|
1411
|
-
|
|
1412
|
-
# Use template registry to format messages
|
|
1413
|
-
try:
|
|
1414
|
-
# Setup model template if not already configured
|
|
1415
|
-
profile = self.template_registry.setup_model(
|
|
1416
|
-
model_name,
|
|
1417
|
-
tokenizer=tokenizer,
|
|
1418
|
-
interactive=False # Non-interactive for smoother experience
|
|
1419
|
-
)
|
|
1420
|
-
|
|
1421
|
-
# Format messages using the template
|
|
1422
|
-
formatted = profile.format_messages(messages, add_generation_prompt=True)
|
|
1423
|
-
|
|
1424
|
-
# DEBUG: Uncomment to see formatted prompt
|
|
1425
|
-
# print(f"\033[36m[DEBUG] Using template: {profile.config.name}\033[0m", file=sys.stderr)
|
|
1426
|
-
# print(f"\033[36m[DEBUG] Formatted prompt preview: {formatted[:200]}...\033[0m", file=sys.stderr)
|
|
1427
|
-
|
|
1428
|
-
return formatted
|
|
1429
|
-
|
|
1430
|
-
except (AttributeError, TypeError, ValueError) as e:
|
|
1431
|
-
# Fallback to old method if template registry fails
|
|
1432
|
-
logger.debug(f"Template registry failed: {e}, using fallback")
|
|
1433
|
-
|
|
1434
|
-
if tokenizer and hasattr(tokenizer, 'apply_chat_template'):
|
|
1435
|
-
# Try direct tokenizer method
|
|
1436
|
-
try:
|
|
1437
|
-
formatted = tokenizer.apply_chat_template(
|
|
1438
|
-
messages,
|
|
1439
|
-
tokenize=False,
|
|
1440
|
-
add_generation_prompt=True
|
|
1441
|
-
)
|
|
1442
|
-
return formatted
|
|
1443
|
-
except (AttributeError, TypeError, ValueError) as e:
|
|
1444
|
-
logger.debug(f"Tokenizer apply_chat_template failed: {e}")
|
|
1445
|
-
|
|
1446
|
-
# Fallback: For TinyLlama and other chat models, use the proper format
|
|
1447
|
-
# Check if it's a chat model
|
|
1448
|
-
if model_name and "chat" in model_name.lower():
|
|
1449
|
-
# DEBUG: Uncomment to see when fallback chat format is used
|
|
1450
|
-
# This occurs when tokenizer doesn't have apply_chat_template method
|
|
1451
|
-
# print(f"\033[35m[DEBUG] Using chat model fallback for: {model_name}\033[0m", file=sys.stderr)
|
|
1452
|
-
|
|
1453
|
-
# Use the proper chat format for TinyLlama and similar models
|
|
1454
|
-
# Build conversation history
|
|
1455
|
-
history = ""
|
|
1456
|
-
if conversation and conversation.messages:
|
|
1457
|
-
recent_messages = conversation.messages[-6:] # Get last few messages
|
|
1458
|
-
for msg in recent_messages:
|
|
1459
|
-
if msg.role == MessageRole.USER:
|
|
1460
|
-
history += f"<|user|>\n{msg.content}</s>\n"
|
|
1461
|
-
elif msg.role == MessageRole.ASSISTANT:
|
|
1462
|
-
history += f"<|assistant|>\n{msg.content}</s>\n"
|
|
1463
|
-
|
|
1464
|
-
# Add current user message with proper format
|
|
1465
|
-
prompt = f"{history}<|user|>\n{user_input}</s>\n<|assistant|>\n"
|
|
1466
|
-
|
|
1467
|
-
# DEBUG: Uncomment to confirm fallback format was applied
|
|
1468
|
-
# print(f"\033[35m[DEBUG] Chat fallback format used\033[0m", file=sys.stderr)
|
|
1469
|
-
return prompt
|
|
1470
|
-
|
|
1471
|
-
# Generic fallback for non-chat models
|
|
1472
|
-
if conversation and len(conversation.messages) > 0:
|
|
1473
|
-
# Include some conversation history
|
|
1474
|
-
context = ""
|
|
1475
|
-
recent_messages = conversation.messages[-6:] # Get last few messages
|
|
1476
|
-
for msg in recent_messages:
|
|
1477
|
-
if msg.role == MessageRole.USER:
|
|
1478
|
-
context += f"User: {msg.content}\n"
|
|
1479
|
-
elif msg.role == MessageRole.ASSISTANT:
|
|
1480
|
-
context += f"Assistant: {msg.content}\n"
|
|
1481
|
-
|
|
1482
|
-
# Add current exchange
|
|
1483
|
-
prompt = f"{context}User: {user_input}\nAssistant:"
|
|
1484
|
-
else:
|
|
1485
|
-
# First message in conversation - use simple format
|
|
1486
|
-
prompt = f"User: {user_input}\nAssistant:"
|
|
1487
|
-
|
|
1488
|
-
return prompt
|
|
708
|
+
return format_prompt_with_chat_template(
|
|
709
|
+
conversation_manager=self.conversation_manager,
|
|
710
|
+
model_manager=self.model_manager,
|
|
711
|
+
template_registry=self.template_registry,
|
|
712
|
+
user_input=user_input,
|
|
713
|
+
include_user=include_user,
|
|
714
|
+
logger=logger,
|
|
715
|
+
)
|
|
1489
716
|
|
|
1490
717
|
def get_input_from_box(self) -> str:
|
|
1491
|
-
"""Get user input from a styled input box.
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
no borders/pipes remain on screen.
|
|
1498
|
-
"""
|
|
1499
|
-
width = self.get_terminal_width()
|
|
1500
|
-
|
|
1501
|
-
# ANSI codes
|
|
1502
|
-
GREEN = "\033[92m"
|
|
1503
|
-
YELLOW = "\033[93m"
|
|
1504
|
-
DIM = "\033[2m"
|
|
1505
|
-
RESET = "\033[0m"
|
|
1506
|
-
CLEAR_LINE = "\033[2K"
|
|
1507
|
-
CLEAR_TO_EOL = "\033[K"
|
|
1508
|
-
CURSOR_UP = "\033[A"
|
|
1509
|
-
CURSOR_DOWN = "\033[B"
|
|
1510
|
-
MOVE_COL = lambda n: f"\033[{n}G"
|
|
1511
|
-
|
|
1512
|
-
# Get current model name for display
|
|
1513
|
-
current_model = ""
|
|
1514
|
-
if self.model_manager.current_model:
|
|
1515
|
-
model_name = os.path.basename(self.model_manager.current_model)
|
|
1516
|
-
# Display full model name without truncation
|
|
1517
|
-
current_model = f"{DIM}Model:{RESET} {YELLOW}{model_name}{RESET}"
|
|
1518
|
-
|
|
1519
|
-
# Draw the input box with dim borders
|
|
1520
|
-
print()
|
|
1521
|
-
print(f"{DIM}╭{'─' * (width - 2)}╮{RESET}")
|
|
1522
|
-
print(f"{DIM}│{RESET}{' ' * (width - 2)}{DIM}│{RESET}")
|
|
1523
|
-
print(f"{DIM}│{RESET}{' ' * (width - 2)}{DIM}│{RESET}")
|
|
1524
|
-
print(f"{DIM}│{RESET}{' ' * (width - 2)}{DIM}│{RESET}")
|
|
1525
|
-
print(f"{DIM}╰{'─' * (width - 2)}╯{RESET}")
|
|
1526
|
-
|
|
1527
|
-
# Bottom hint: show current model aligned with box
|
|
1528
|
-
if current_model:
|
|
1529
|
-
print(f"{current_model}")
|
|
1530
|
-
else:
|
|
1531
|
-
print() # Empty line if no model loaded
|
|
1532
|
-
|
|
1533
|
-
# Move cursor to input position inside the box (center of 3 interior lines)
|
|
1534
|
-
sys.stdout.write("\033[4A") # Move up 4 lines to the input line
|
|
1535
|
-
sys.stdout.write(f"\r{DIM}│{RESET} > ") # Position at prompt
|
|
1536
|
-
sys.stdout.flush()
|
|
1537
|
-
|
|
1538
|
-
try:
|
|
1539
|
-
# Get user input with custom character handling
|
|
1540
|
-
user_input = self._get_protected_input(width)
|
|
1541
|
-
|
|
1542
|
-
# After _get_protected_input returns, the cursor is at the start of the
|
|
1543
|
-
# bottom border line (due to CRLFs when Enter was pressed).
|
|
1544
|
-
# Explicitly clear the entire input box region using relative moves.
|
|
1545
|
-
# 1) Clear hint line (one line below bottom border)
|
|
1546
|
-
sys.stdout.write(f"{CURSOR_DOWN}\r{CLEAR_LINE}")
|
|
1547
|
-
# 2) Clear bottom border
|
|
1548
|
-
sys.stdout.write(f"{CURSOR_UP}\r{CLEAR_LINE}")
|
|
1549
|
-
# 3) Clear padding line
|
|
1550
|
-
sys.stdout.write(f"{CURSOR_UP}\r{CLEAR_LINE}")
|
|
1551
|
-
# 4) Clear input line
|
|
1552
|
-
sys.stdout.write(f"{CURSOR_UP}\r{CLEAR_LINE}")
|
|
1553
|
-
# 5) Clear padding line
|
|
1554
|
-
sys.stdout.write(f"{CURSOR_UP}\r{CLEAR_LINE}")
|
|
1555
|
-
# 6) Clear top border
|
|
1556
|
-
sys.stdout.write(f"{CURSOR_UP}\r{CLEAR_LINE}")
|
|
1557
|
-
|
|
1558
|
-
# Position cursor at the start of where the top border was and print
|
|
1559
|
-
# the clean prompt that represents the submitted user message.
|
|
1560
|
-
sys.stdout.write("\r> " + user_input.strip() + "\n")
|
|
1561
|
-
sys.stdout.flush()
|
|
1562
|
-
|
|
1563
|
-
return user_input.strip()
|
|
1564
|
-
|
|
1565
|
-
except KeyboardInterrupt:
|
|
1566
|
-
# Cleanup already done in _get_protected_input before raising
|
|
1567
|
-
raise
|
|
1568
|
-
except EOFError:
|
|
1569
|
-
# Clean up the box on Ctrl+D by clearing the lines if possible.
|
|
1570
|
-
# We are on the input line.
|
|
1571
|
-
try:
|
|
1572
|
-
sys.stdout.write(f"\r{CLEAR_LINE}") # input line
|
|
1573
|
-
sys.stdout.write(f"{CURSOR_DOWN}\r{CLEAR_LINE}") # padding line
|
|
1574
|
-
sys.stdout.write(f"{CURSOR_DOWN}\r{CLEAR_LINE}") # bottom border
|
|
1575
|
-
sys.stdout.write(f"{CURSOR_DOWN}\r{CLEAR_LINE}") # hint line
|
|
1576
|
-
sys.stdout.write(f"{CURSOR_UP}\r{CLEAR_LINE}") # bottom border
|
|
1577
|
-
sys.stdout.write(f"{CURSOR_UP}\r{CLEAR_LINE}") # padding line
|
|
1578
|
-
sys.stdout.write(f"{CURSOR_UP}\r{CLEAR_LINE}") # input line
|
|
1579
|
-
sys.stdout.write(f"{CURSOR_UP}\r{CLEAR_LINE}") # padding line
|
|
1580
|
-
sys.stdout.write(f"{CURSOR_UP}\r{CLEAR_LINE}") # top border
|
|
1581
|
-
sys.stdout.flush()
|
|
1582
|
-
finally:
|
|
1583
|
-
pass
|
|
1584
|
-
raise
|
|
1585
|
-
|
|
1586
|
-
def _get_protected_input(self, box_width: int) -> str:
|
|
1587
|
-
"""Get input with protection against deleting the prompt.
|
|
1588
|
-
|
|
1589
|
-
This method reads input character by character and prevents
|
|
1590
|
-
the user from backspacing past the beginning of their input.
|
|
1591
|
-
"""
|
|
1592
|
-
DIM = "\033[2m"
|
|
1593
|
-
RESET = "\033[0m"
|
|
1594
|
-
CLEAR_TO_END = "\033[K"
|
|
1595
|
-
SAVE_CURSOR = "\033[s"
|
|
1596
|
-
RESTORE_CURSOR = "\033[u"
|
|
1597
|
-
|
|
1598
|
-
# Calculate usable width for text (box_width - borders - prompt)
|
|
1599
|
-
# box_width - 2 (borders) - 4 (prompt " > ")
|
|
1600
|
-
max_display_width = box_width - 6
|
|
1601
|
-
|
|
1602
|
-
# Store terminal settings
|
|
1603
|
-
old_settings = termios.tcgetattr(sys.stdin)
|
|
1604
|
-
|
|
1605
|
-
try:
|
|
1606
|
-
# Set terminal to raw mode for character-by-character input
|
|
1607
|
-
# Disable ISIG so we can handle Ctrl+C manually for clean exit
|
|
1608
|
-
new_settings = termios.tcgetattr(sys.stdin)
|
|
1609
|
-
new_settings[3] = new_settings[3] & ~termios.ICANON # Disable canonical mode
|
|
1610
|
-
new_settings[3] = new_settings[3] & ~termios.ECHO # Disable echo
|
|
1611
|
-
new_settings[3] = new_settings[3] & ~termios.ISIG # Disable signals - we'll handle Ctrl+C manually
|
|
1612
|
-
new_settings[6][termios.VMIN] = 1 # Read at least 1 character
|
|
1613
|
-
new_settings[6][termios.VTIME] = 0 # No timeout
|
|
1614
|
-
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, new_settings)
|
|
1615
|
-
|
|
1616
|
-
input_buffer = []
|
|
1617
|
-
cursor_pos = 0
|
|
1618
|
-
view_offset = 0 # For horizontal scrolling when text exceeds width
|
|
1619
|
-
|
|
1620
|
-
def redraw_line():
|
|
1621
|
-
"""Redraw the entire input line with proper boundaries."""
|
|
1622
|
-
nonlocal view_offset
|
|
1623
|
-
|
|
1624
|
-
# Calculate what portion of text to display
|
|
1625
|
-
if len(input_buffer) <= max_display_width:
|
|
1626
|
-
# Text fits within box
|
|
1627
|
-
display_text = ''.join(input_buffer)
|
|
1628
|
-
display_cursor_pos = cursor_pos
|
|
1629
|
-
else:
|
|
1630
|
-
# Text needs scrolling
|
|
1631
|
-
# Ensure cursor is visible in the viewport
|
|
1632
|
-
if cursor_pos < view_offset:
|
|
1633
|
-
# Cursor moved left out of view
|
|
1634
|
-
view_offset = cursor_pos
|
|
1635
|
-
elif cursor_pos >= view_offset + max_display_width:
|
|
1636
|
-
# Cursor moved right out of view
|
|
1637
|
-
view_offset = cursor_pos - max_display_width + 1
|
|
1638
|
-
|
|
1639
|
-
# Extract visible portion
|
|
1640
|
-
display_text = ''.join(input_buffer[view_offset:view_offset + max_display_width])
|
|
1641
|
-
display_cursor_pos = cursor_pos - view_offset
|
|
1642
|
-
|
|
1643
|
-
# Clear line and redraw
|
|
1644
|
-
sys.stdout.write(f"\r{DIM}│{RESET} > {display_text}{CLEAR_TO_END}")
|
|
1645
|
-
|
|
1646
|
-
# Draw right border at the correct position
|
|
1647
|
-
# box_width is the full width including borders, so border is at box_width position
|
|
1648
|
-
sys.stdout.write(f"\033[{box_width}G") # Move to border column
|
|
1649
|
-
sys.stdout.write(f"{DIM}│{RESET}")
|
|
1650
|
-
|
|
1651
|
-
# Position cursor at the correct location
|
|
1652
|
-
cursor_column = 5 + display_cursor_pos # 5 = "│ > "
|
|
1653
|
-
sys.stdout.write(f"\033[{cursor_column}G")
|
|
1654
|
-
sys.stdout.flush()
|
|
1655
|
-
|
|
1656
|
-
# Initial display
|
|
1657
|
-
redraw_line()
|
|
1658
|
-
|
|
1659
|
-
while True:
|
|
1660
|
-
char = sys.stdin.read(1)
|
|
1661
|
-
|
|
1662
|
-
# Handle special characters
|
|
1663
|
-
if char == '\r' or char == '\n': # Enter key
|
|
1664
|
-
sys.stdout.write('\r\n')
|
|
1665
|
-
sys.stdout.write('\r\n')
|
|
1666
|
-
sys.stdout.flush()
|
|
1667
|
-
break
|
|
1668
|
-
|
|
1669
|
-
elif char == '\x7f' or char == '\x08': # Backspace (DEL or BS)
|
|
1670
|
-
# Only allow backspace if there are characters to delete
|
|
1671
|
-
if cursor_pos > 0:
|
|
1672
|
-
cursor_pos -= 1
|
|
1673
|
-
input_buffer.pop(cursor_pos)
|
|
1674
|
-
redraw_line()
|
|
1675
|
-
# If cursor_pos is 0, do nothing (can't delete the prompt)
|
|
1676
|
-
|
|
1677
|
-
elif char == '\x03': # Ctrl+C
|
|
1678
|
-
# Clean up the display before raising KeyboardInterrupt
|
|
1679
|
-
# We're in the input line, need to clear the entire box
|
|
1680
|
-
sys.stdout.write("\r\033[2K") # Clear current line
|
|
1681
|
-
sys.stdout.write("\033[1B\r\033[2K") # Down 1, clear padding line
|
|
1682
|
-
sys.stdout.write("\033[1B\r\033[2K") # Down 1, clear bottom border
|
|
1683
|
-
sys.stdout.write("\033[1B\r\033[2K") # Down 1, clear model line
|
|
1684
|
-
sys.stdout.write("\033[4A\r\033[2K") # Up 4 to padding line, clear
|
|
1685
|
-
sys.stdout.write("\033[1A\r\033[2K") # Up 1 to top border, clear
|
|
1686
|
-
sys.stdout.write("\033[1A\r\033[2K") # Up 1 to empty line, clear
|
|
1687
|
-
sys.stdout.write("\r") # Position at start
|
|
1688
|
-
sys.stdout.flush()
|
|
1689
|
-
# Now raise the interrupt for clean exit
|
|
1690
|
-
raise KeyboardInterrupt
|
|
1691
|
-
|
|
1692
|
-
elif char == '\x04': # Ctrl+D
|
|
1693
|
-
raise EOFError
|
|
1694
|
-
|
|
1695
|
-
elif char == '\x1b': # ESC sequence (arrow keys, etc.)
|
|
1696
|
-
# Read the rest of the escape sequence
|
|
1697
|
-
next1 = sys.stdin.read(1)
|
|
1698
|
-
if next1 == '[':
|
|
1699
|
-
next2 = sys.stdin.read(1)
|
|
1700
|
-
if next2 == 'D': # Left arrow
|
|
1701
|
-
if cursor_pos > 0:
|
|
1702
|
-
cursor_pos -= 1
|
|
1703
|
-
redraw_line()
|
|
1704
|
-
elif next2 == 'C': # Right arrow
|
|
1705
|
-
if cursor_pos < len(input_buffer):
|
|
1706
|
-
cursor_pos += 1
|
|
1707
|
-
redraw_line()
|
|
1708
|
-
elif next2 == 'H': # Home
|
|
1709
|
-
cursor_pos = 0
|
|
1710
|
-
view_offset = 0
|
|
1711
|
-
redraw_line()
|
|
1712
|
-
elif next2 == 'F': # End
|
|
1713
|
-
cursor_pos = len(input_buffer)
|
|
1714
|
-
redraw_line()
|
|
1715
|
-
# For other sequences, continue without action
|
|
1716
|
-
continue
|
|
1717
|
-
|
|
1718
|
-
elif ord(char) >= 32: # Printable character
|
|
1719
|
-
# Insert character at cursor position
|
|
1720
|
-
input_buffer.insert(cursor_pos, char)
|
|
1721
|
-
cursor_pos += 1
|
|
1722
|
-
redraw_line()
|
|
1723
|
-
|
|
1724
|
-
return ''.join(input_buffer)
|
|
1725
|
-
|
|
1726
|
-
finally:
|
|
1727
|
-
# Restore terminal settings
|
|
1728
|
-
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
|
|
1729
|
-
|
|
718
|
+
"""Get user input from a styled input box."""
|
|
719
|
+
return prompt_input_box(
|
|
720
|
+
console=self.console,
|
|
721
|
+
terminal_width=self.get_terminal_width(),
|
|
722
|
+
current_model_path=self.model_manager.current_model,
|
|
723
|
+
)
|
|
1730
724
|
|
|
1731
725
|
def run(self):
|
|
1732
726
|
"""Main REPL loop."""
|