@oneciel-ai/claude-any 0.1.88 → 0.1.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1949 +1,1949 @@
1
- #!/usr/bin/env python3
2
- from __future__ import annotations
3
-
4
- import json
5
- import select
6
- import shutil
7
- import subprocess
8
- import sys
9
- import termios
10
- import time
11
- import textwrap
12
- import tty
13
- import urllib.error
14
- import urllib.parse
15
- import urllib.request
16
- from pathlib import Path
17
-
18
- try:
19
- import msvcrt
20
- HAS_MSVCRT = True
21
- except ImportError:
22
- HAS_MSVCRT = False
23
-
24
- try:
25
- sys.stdout.reconfigure(encoding="utf-8")
26
- sys.stderr.reconfigure(encoding="utf-8")
27
- except Exception:
28
- pass
29
-
30
-
31
- def _enable_windows_ansi() -> None:
32
- if sys.platform != "win32":
33
- return
34
- try:
35
- import ctypes
36
- kernel32 = ctypes.windll.kernel32
37
- hOut = kernel32.GetStdHandle(-11)
38
- mode = ctypes.c_ulong()
39
- kernel32.GetConsoleMode(hOut, ctypes.byref(mode))
40
- ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
41
- kernel32.SetConsoleMode(hOut, mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING)
42
- except Exception:
43
- pass
44
-
45
-
46
- class _RawTerminal:
47
- def __enter__(self):
48
- _enable_windows_ansi()
49
- if sys.platform != "win32" and sys.stdin.isatty():
50
- self._fd = sys.stdin.fileno()
51
- self._old = termios.tcgetattr(self._fd)
52
- tty.setraw(self._fd)
53
- return self
54
-
55
- def __exit__(self, *a):
56
- if sys.platform != "win32" and hasattr(self, "_old"):
57
- termios.tcsetattr(self._fd, termios.TCSADRAIN, self._old)
58
- return False
59
-
60
-
61
- def _getch(timeout: float = 60.0) -> bytes | None:
62
- if sys.platform == "win32" and HAS_MSVCRT:
63
- start = time.monotonic()
64
- while time.monotonic() - start < timeout:
65
- if msvcrt.kbhit():
66
- return msvcrt.getch()
67
- time.sleep(0.01)
68
- return None
69
- else:
70
- r, _, _ = select.select([sys.stdin.buffer], [], [], timeout)
71
- if r:
72
- return sys.stdin.buffer.read(1)
73
- return None
74
-
75
-
76
- def _debug_log(msg: str) -> None:
77
- try:
78
- with open("/tmp/ca-menu-debug.log", "a", encoding="utf-8") as f:
79
- f.write(f"{time.monotonic():.3f} {msg}\n")
80
- f.flush()
81
- except Exception:
82
- pass
83
-
84
-
85
- def read_menu_key() -> str:
86
- ch = _getch()
87
- _debug_log(f"_getch returned: {repr(ch)}")
88
- if ch is None:
89
- return ""
90
- if ch == b"\x1b":
91
- seq = b"\x1b"
92
- for _ in range(3):
93
- nxt = _getch(1.0)
94
- _debug_log(f" seq byte: {repr(nxt)}")
95
- if nxt is None:
96
- break
97
- seq += nxt
98
- _debug_log(f" full seq: {repr(seq)} hex: {seq.hex()}")
99
- if seq in (b"\x1b[A", b"\x1bOA"):
100
- return "KEY_UP"
101
- if seq in (b"\x1b[B", b"\x1bOB"):
102
- return "KEY_DOWN"
103
- if seq == b"\x1b[5~":
104
- return "KEY_PPAGE"
105
- if seq == b"\x1b[6~":
106
- return "KEY_NPAGE"
107
- return "KEY_ESC"
108
- if sys.platform == "win32" and HAS_MSVCRT:
109
- if ch in (b"\x00", b"\xe0"):
110
- ch2 = _getch(0.05)
111
- if ch2 == b"H":
112
- return "KEY_UP"
113
- if ch2 == b"P":
114
- return "KEY_DOWN"
115
- if ch2 == b"K":
116
- return "KEY_LEFT"
117
- if ch2 == b"M":
118
- return "KEY_RIGHT"
119
- if ch2 == b"I":
120
- return "KEY_PPAGE"
121
- if ch2 == b"Q":
122
- return "KEY_NPAGE"
123
- return ""
124
- if ch in (b"\r", b"\n"):
125
- return "KEY_ENTER"
126
- if ch in (b"\x7f", b"\x08"):
127
- return "KEY_BACKSPACE"
128
- if ch and 0 < ch[0] < 128 and chr(ch[0]).isprintable():
129
- return chr(ch[0])
130
- return ""
131
-
132
-
133
- def _term_size() -> tuple[int, int]:
134
- try:
135
- return shutil.get_terminal_size(fallback=(80, 24))
136
- except Exception:
137
- return (80, 24)
138
-
139
-
140
- def _clear() -> None:
141
- sys.stdout.write("\033[2J\033[H")
142
- sys.stdout.flush()
143
-
144
-
145
- def _move(row: int, col: int) -> None:
146
- sys.stdout.write(f"\033[{row + 1};{col + 1}H")
147
-
148
-
149
- def _style(fg: int | None = None, bg: int | None = None, bold: bool = False, dim: bool = False, reverse: bool = False) -> str:
150
- codes: list[str] = []
151
- if bold:
152
- codes.append("1")
153
- if dim:
154
- codes.append("2")
155
- if reverse:
156
- codes.append("7")
157
- if fg is not None:
158
- codes.append(f"38;5;{fg}")
159
- if bg is not None:
160
- codes.append(f"48;5;{bg}")
161
- return f"\033[{';&'.join(codes)}m" if codes else ""
162
-
163
-
164
- def _reset() -> str:
165
- return "\033[0m"
166
-
167
-
168
- ANIMATED_TEXT_PALETTE = (203, 209, 215, 221, 229, 187, 151, 116, 111, 147, 183, 219)
169
-
170
-
171
- def animated_text(text: str, *, phase: int | None = None, bold: bool = True) -> str:
172
- if not sys.stdout.isatty():
173
- return text
174
- if phase is None:
175
- phase = int(time.monotonic() * 8)
176
- parts: list[str] = []
177
- for i, ch in enumerate(text):
178
- if ch.isspace():
179
- parts.append(ch)
180
- continue
181
- color = ANIMATED_TEXT_PALETTE[(phase + i) % len(ANIMATED_TEXT_PALETTE)]
182
- parts.append(_style(fg=color, bold=bold) + ch)
183
- parts.append(_reset())
184
- return "".join(parts)
185
-
186
-
187
- def _write(row: int, col: int, text: str, style: str = "") -> None:
188
- if row < 0 or col < 0:
189
- return
190
- _move(row, col)
191
- if style:
192
- sys.stdout.write(style)
193
- sys.stdout.write(text)
194
- if style:
195
- sys.stdout.write(_reset())
196
- sys.stdout.flush()
197
-
198
-
199
- def _write_safe(row: int, col: int, text: str, style: str = "") -> None:
200
- h, w = _term_size()
201
- if row < 0 or row >= h or col >= w:
202
- return
203
- _write(row, col, text[: max(0, w - max(0, col) - 1)], style)
204
-
205
-
206
- CTL = str(Path.home() / ".local/bin/claude-anyctl")
207
- CONFIG = Path.home() / ".config/claude-any/config.json"
208
- NCP_ENV = Path.home() / ".config/nvd-claude-proxy/.env"
209
- PROVIDERS = [
210
- ("anthropic", "Anthropic"),
211
- ("ollama", "Ollama"),
212
- ("ollama-cloud", "Ollama Cloud"),
213
- ("vllm", "vLLM"),
214
- ("nvidia-hosted", "Nvidia Hosted"),
215
- ("self-hosted-nim", "Self Hosted NIM"),
216
- ]
217
- APP_NAME = "Claude Any"
218
- CREDITS = "Credits: One Ciel LLC"
219
- LANGUAGES = {
220
- "en": "English",
221
- "ko": "한국어",
222
- "ja": "日本語",
223
- "zh": "中文",
224
- }
225
- UI_TEXT = {
226
- "en": {
227
- "language": "Language",
228
- "provider": "Provider",
229
- "api_key": "API key",
230
- "base_url": "Base URL",
231
- "model": "Model",
232
- "advisor_model": "Advisor Model",
233
- "ollama_options": "Ollama options",
234
- "provider_options": "Provider options",
235
- "test": "Test compatibility",
236
- "launch": "Launch Claude Code",
237
- "quit": "Quit",
238
- "title": "claude-any pre-launch",
239
- "select_language": "Enter selects language. Up/Down moves inside submenu. Esc closes submenu.",
240
- "select_provider": "Enter selects provider. Up/Down moves inside submenu. Esc closes submenu.",
241
- "select_model": "Enter selects model. Up/Down moves inside submenu. Esc closes submenu. Custom input is at the end.",
242
- "select_advisor_model": "Enter selects advisor model. Use a long-context model such as deepseek-v4-pro.",
243
- "select_ollama_options": "Enter applies this Ollama option. Custom input accepts KEY=VALUE or unset:KEY.",
244
- "select_provider_options": "Enter applies this provider option. Custom input accepts KEY=VALUE or unset:KEY.",
245
- "test_result": "Compatibility result is shown inline. Esc closes the result. Enter runs the test again.",
246
- "help_launch": "Enter launches Claude Code with the selected provider and model.",
247
- "help_test": "Enter tests current provider/model with a minimal Claude Code tool request.",
248
- "help_language": "Enter expands language submenu inline.",
249
- "help_provider": "Enter expands provider submenu inline.",
250
- "help_model": "Enter expands model submenu inline when the provider endpoint is reachable.",
251
- "help_advisor_model": "Enter selects the larger model used by claude-any advisor routing.",
252
- "help_ollama_options": "Enter expands Ollama context and generation options.",
253
- "help_provider_options": "Enter expands provider output/context/timeout options.",
254
- "help_api_key": "Enter opens secure API key setup in the terminal. Keys are not pasted into Claude Code.",
255
- "help_base_url": "Enter edits the current provider base URL on this row.",
256
- "help_quit": "Enter exits without launching Claude Code.",
257
- "running_test": "Running compatibility test...",
258
- "test_passed": "Compatibility test passed.",
259
- "test_failed": "Compatibility test failed.",
260
- "loading_models": "Loading models from current provider...",
261
- "api_key_unchanged": "API key unchanged.",
262
- },
263
- "ko": {
264
- "language": "언어",
265
- "provider": "프로바이더",
266
- "api_key": "API 키",
267
- "base_url": "Base URL",
268
- "model": "모델",
269
- "advisor_model": "Advisor Model",
270
- "ollama_options": "Ollama 옵션",
271
- "provider_options": "프로바이더 옵션",
272
- "test": "호환성 테스트",
273
- "launch": "Claude Code 실행",
274
- "quit": "종료",
275
- "title": "claude-any 실행 전 설정",
276
- "select_language": "Enter로 언어를 선택합니다. 위/아래로 이동, Esc로 닫기.",
277
- "select_provider": "Enter로 프로바이더를 선택합니다. 위/아래로 이동, Esc로 닫기.",
278
- "select_model": "Enter로 모델을 선택합니다. 위/아래로 이동, Esc로 닫기. 마지막 항목은 직접 입력입니다.",
279
- "select_advisor_model": "Advisor Model을 선택합니다. deepseek-v4-pro 같은 긴 컨텍스트 모델을 권장합니다.",
280
- "select_ollama_options": "Enter로 Ollama 옵션을 적용합니다. 직접 입력은 KEY=VALUE 또는 unset:KEY를 받습니다.",
281
- "select_provider_options": "Enter로 프로바이더 옵션을 적용합니다. 직접 입력은 KEY=VALUE 또는 unset:KEY를 받습니다.",
282
- "test_result": "호환성 결과가 메뉴 안에 표시됩니다. Esc로 닫고 Enter로 다시 테스트합니다.",
283
- "help_launch": "선택한 프로바이더와 모델로 Claude Code를 실행합니다.",
284
- "help_test": "현재 프로바이더/모델에 최소 Claude Code 도구 요청을 보내 호환성을 확인합니다.",
285
- "help_language": "언어 선택 메뉴를 펼칩니다.",
286
- "help_provider": "프로바이더 선택 메뉴를 펼칩니다.",
287
- "help_model": "프로바이더 엔드포인트가 유효하면 모델 선택 메뉴를 펼칩니다.",
288
- "help_advisor_model": "claude-any advisor 라우팅에 사용할 더 큰 모델을 선택합니다.",
289
- "help_ollama_options": "Ollama 컨텍스트 크기와 생성 파라미터 메뉴를 펼칩니다.",
290
- "help_provider_options": "프로바이더의 출력 토큰, 컨텍스트, 타임아웃 옵션 메뉴를 펼칩니다.",
291
- "help_api_key": "API 키 입력을 이 터미널에서 안전하게 엽니다. 키는 Claude Code 채팅에 붙여넣지 않습니다.",
292
- "help_base_url": "현재 프로바이더의 Base URL을 이 줄에서 수정합니다.",
293
- "help_quit": "Claude Code를 실행하지 않고 종료합니다.",
294
- "running_test": "호환성 테스트 실행 중...",
295
- "test_passed": "호환성 테스트 성공.",
296
- "test_failed": "호환성 테스트 실패.",
297
- "loading_models": "현재 프로바이더에서 모델을 불러오는 중...",
298
- "api_key_unchanged": "API 키는 변경되지 않았습니다.",
299
- },
300
- "ja": {
301
- "language": "言語",
302
- "provider": "プロバイダー",
303
- "api_key": "APIキー",
304
- "base_url": "Base URL",
305
- "model": "モデル",
306
- "advisor_model": "Advisor Model",
307
- "ollama_options": "Ollamaオプション",
308
- "provider_options": "プロバイダーオプション",
309
- "test": "互換性テスト",
310
- "launch": "Claude Codeを起動",
311
- "quit": "終了",
312
- "title": "claude-any 起動前設定",
313
- "select_language": "Enterで言語を選択します。上下で移動、Escで閉じます。",
314
- "select_provider": "Enterでプロバイダーを選択します。上下で移動、Escで閉じます。",
315
- "select_model": "Enterでモデルを選択します。上下で移動、Escで閉じます。最後は手入力です。",
316
- "select_advisor_model": "Advisor Modelを選択します。deepseek-v4-proのような長コンテキストモデルを推奨します。",
317
- "select_ollama_options": "EnterでOllamaオプションを適用します。手入力はKEY=VALUEまたはunset:KEYです。",
318
- "select_provider_options": "Enterでプロバイダーオプションを適用します。手入力はKEY=VALUEまたはunset:KEYです。",
319
- "test_result": "互換性結果はメニュー内に表示されます。Escで閉じ、Enterで再テストします。",
320
- "help_launch": "選択したプロバイダーとモデルでClaude Codeを起動します。",
321
- "help_test": "現在のプロバイダー/モデルへ最小のClaude Codeツール要求を送り互換性を確認します。",
322
- "help_language": "言語選択メニューを展開します。",
323
- "help_provider": "プロバイダー選択メニューを展開します。",
324
- "help_model": "プロバイダーのエンドポイントが有効な場合、モデル選択メニューを展開します。",
325
- "help_advisor_model": "claude-any advisorルーティングで使う大きなモデルを選択します。",
326
- "help_ollama_options": "Ollamaのコンテキストサイズと生成パラメータを開きます。",
327
- "help_provider_options": "プロバイダーの出力トークン、コンテキスト、タイムアウト設定を開きます。",
328
- "help_api_key": "APIキー入力をこの端末で安全に開きます。キーはClaude Codeチャットに貼り付けません。",
329
- "help_base_url": "現在のプロバイダーのBase URLをこの行で編集します。",
330
- "help_quit": "Claude Codeを起動せずに終了します。",
331
- "running_test": "互換性テストを実行中...",
332
- "test_passed": "互換性テスト成功。",
333
- "test_failed": "互換性テスト失敗。",
334
- "loading_models": "現在のプロバイダーからモデルを読み込み中...",
335
- "api_key_unchanged": "APIキーは変更されませんでした。",
336
- },
337
- "zh": {
338
- "language": "语言",
339
- "provider": "提供商",
340
- "api_key": "API 密钥",
341
- "base_url": "Base URL",
342
- "model": "模型",
343
- "advisor_model": "Advisor Model",
344
- "ollama_options": "Ollama 选项",
345
- "provider_options": "提供商选项",
346
- "test": "兼容性测试",
347
- "launch": "启动 Claude Code",
348
- "quit": "退出",
349
- "title": "claude-any 启动前设置",
350
- "select_language": "按 Enter 选择语言。上下移动,Esc 关闭。",
351
- "select_provider": "按 Enter 选择提供商。上下移动,Esc 关闭。",
352
- "select_model": "按 Enter 选择模型。上下移动,Esc 关闭。最后一项可手动输入。",
353
- "select_advisor_model": "选择 Advisor Model。建议使用 deepseek-v4-pro 等长上下文模型。",
354
- "select_ollama_options": "按 Enter 应用 Ollama 选项。手动输入支持 KEY=VALUE 或 unset:KEY。",
355
- "select_provider_options": "按 Enter 应用提供商选项。手动输入支持 KEY=VALUE 或 unset:KEY。",
356
- "test_result": "兼容性结果会在菜单内显示。Esc 关闭,Enter 重新测试。",
357
- "help_launch": "使用所选提供商和模型启动 Claude Code。",
358
- "help_test": "向当前提供商/模型发送最小 Claude Code 工具请求以检查兼容性。",
359
- "help_language": "展开语言选择菜单。",
360
- "help_provider": "展开提供商选择菜单。",
361
- "help_model": "当提供商端点可用时展开模型选择菜单。",
362
- "help_advisor_model": "选择 claude-any advisor 路由使用的更大模型。",
363
- "help_ollama_options": "展开 Ollama 上下文大小和生成参数。",
364
- "help_provider_options": "展开提供商输出 token、上下文和超时选项。",
365
- "help_api_key": "在此终端安全输入 API 密钥。不要把密钥粘贴到 Claude Code 聊天中。",
366
- "help_base_url": "在这一行编辑当前提供商的 Base URL。",
367
- "help_quit": "不启动 Claude Code 并退出。",
368
- "running_test": "正在运行兼容性测试...",
369
- "test_passed": "兼容性测试成功。",
370
- "test_failed": "兼容性测试失败。",
371
- "loading_models": "正在从当前提供商加载模型...",
372
- "api_key_unchanged": "API 密钥未更改。",
373
- },
374
- }
375
-
376
-
377
- PROVIDER_NOTES = {
378
- "en": {
379
- "anthropic": [
380
- "Anthropic: uses Claude Code's native Anthropic connection.",
381
- "Set an Anthropic API key here, or run `claude /login` separately to use your Claude account login.",
382
- ],
383
- "ollama": [
384
- "Ollama: uses your local Ollama daemon; API key is normally not required.",
385
- "To use :cloud models through local Ollama, sign in on the Ollama host with `ollama signin`.",
386
- ],
387
- "ollama-cloud": [
388
- "Ollama Cloud: calls https://ollama.com/api directly; an Ollama API key is required.",
389
- "Use this when you want cloud models without relying on the local Ollama daemon's sign-in state.",
390
- ],
391
- "vllm": [
392
- "vLLM: enter the vLLM server root that implements the Anthropic Messages API.",
393
- "Do not enter an OpenAI-only chat completions endpoint; use a compatibility proxy for those servers.",
394
- ],
395
- "self-hosted-nim": [
396
- "Self-hosted NIM: enter the NIM server root that exposes Anthropic-compatible /v1/messages.",
397
- "This native path does not use the NVIDIA hosted API Catalog proxy.",
398
- ],
399
- "nvidia-hosted": [
400
- "NVIDIA hosted: uses NVIDIA API Catalog at https://integrate.api.nvidia.com/v1.",
401
- "Hosted catalog models are OpenAI-style, so claude-any keeps a compatibility route for Claude Code.",
402
- ],
403
- },
404
- "ko": {
405
- "anthropic": [
406
- "Anthropic: Claude Code의 기본 Anthropic 연결을 사용합니다.",
407
- "여기에 Anthropic API key를 넣거나, 별도로 `claude /login`을 실행해 Claude 계정 로그인을 사용하세요.",
408
- ],
409
- "ollama": [
410
- "Ollama: 로컬 Ollama 데몬을 사용합니다. 일반 로컬 모델은 API key가 필요 없습니다.",
411
- "로컬 Ollama로 :cloud 모델을 쓰려면 Ollama가 실행되는 호스트에서 `ollama signin`이 필요합니다.",
412
- ],
413
- "ollama-cloud": [
414
- "Ollama Cloud: https://ollama.com/api를 직접 호출합니다. Ollama API key가 필요합니다.",
415
- "로컬 Ollama 데몬의 로그인 상태와 무관하게 클라우드 모델을 쓰고 싶을 때 사용합니다.",
416
- ],
417
- "vllm": [
418
- "vLLM: Anthropic Messages API를 구현한 vLLM 서버 root를 넣으세요.",
419
- "OpenAI 전용 chat completions endpoint를 넣지 마세요. 그런 서버는 호환 프록시가 필요합니다.",
420
- ],
421
- "self-hosted-nim": [
422
- "Self-hosted NIM: Anthropic 호환 /v1/messages를 노출하는 NIM 서버 root를 넣으세요.",
423
- "이 native 경로는 NVIDIA hosted API Catalog 프록시를 사용하지 않습니다.",
424
- ],
425
- "nvidia-hosted": [
426
- "NVIDIA hosted: https://integrate.api.nvidia.com/v1 의 NVIDIA API Catalog를 사용합니다.",
427
- "Hosted catalog 모델은 OpenAI 방식이므로 Claude Code에는 claude-any 호환 라우트를 유지합니다.",
428
- ],
429
- },
430
- "ja": {
431
- "anthropic": [
432
- "Anthropic: Claude CodeのネイティブAnthropic接続を使います。",
433
- "ここでAnthropic API keyを設定するか、別途`claude /login`を実行してClaudeアカウントログインを使ってください。",
434
- ],
435
- "ollama": [
436
- "Ollama: ローカルのOllama daemonを使います。通常のローカルモデルではAPI keyは不要です。",
437
- "ローカルOllama経由で:cloudモデルを使うには、Ollamaホストで`ollama signin`が必要です。",
438
- ],
439
- "ollama-cloud": [
440
- "Ollama Cloud: https://ollama.com/api を直接呼び出します。Ollama API keyが必要です。",
441
- "ローカルOllama daemonのサインイン状態に依存せずクラウドモデルを使う場合に選びます。",
442
- ],
443
- "vllm": [
444
- "vLLM: Anthropic Messages APIを実装したvLLMサーバーrootを入力してください。",
445
- "OpenAI専用chat completions endpointは入力しないでください。その場合は互換プロキシが必要です。",
446
- ],
447
- "self-hosted-nim": [
448
- "Self-hosted NIM: Anthropic互換/v1/messagesを公開するNIMサーバーrootを入力してください。",
449
- "このnative経路はNVIDIA hosted API Catalog proxyを使いません。",
450
- ],
451
- "nvidia-hosted": [
452
- "NVIDIA hosted: https://integrate.api.nvidia.com/v1 のNVIDIA API Catalogを使います。",
453
- "Hosted catalogモデルはOpenAI形式のため、Claude Codeにはclaude-any互換ルートを維持します。",
454
- ],
455
- },
456
- "zh": {
457
- "anthropic": [
458
- "Anthropic: 使用Claude Code原生Anthropic连接。",
459
- "可在此设置Anthropic API key,或另行运行`claude /login`使用Claude账号登录。",
460
- ],
461
- "ollama": [
462
- "Ollama: 使用本地Ollama daemon;普通本地模型通常不需要API key。",
463
- "若通过本地Ollama使用:cloud模型,需要在运行Ollama的主机上执行`ollama signin`。",
464
- ],
465
- "ollama-cloud": [
466
- "Ollama Cloud: 直接调用 https://ollama.com/api;需要Ollama API key。",
467
- "当你想不依赖本地Ollama daemon登录状态使用云端模型时选择它。",
468
- ],
469
- "vllm": [
470
- "vLLM: 请输入实现Anthropic Messages API的vLLM服务器root。",
471
- "不要输入仅OpenAI chat completions的端点;这类服务器需要兼容代理。",
472
- ],
473
- "self-hosted-nim": [
474
- "Self-hosted NIM: 请输入暴露 Anthropic-compatible /v1/messages 的 NIM 服务器 root。",
475
- "此 native 路径不使用 NVIDIA hosted API Catalog 代理。",
476
- ],
477
- "nvidia-hosted": [
478
- "NVIDIA hosted: 使用 https://integrate.api.nvidia.com/v1 的 NVIDIA API Catalog。",
479
- "Hosted catalog 模型是 OpenAI 风格,因此 Claude Code 仍使用 claude-any 兼容路由。",
480
- ],
481
- },
482
- }
483
-
484
-
485
- def init_colors() -> None:
486
- pass
487
-
488
-
489
- def cp(n: int) -> str:
490
- if n == 1:
491
- return _style(fg=255)
492
- if n == 2:
493
- return _style(fg=10)
494
- if n == 3:
495
- return _style(fg=11)
496
- if n == 4:
497
- return _style(fg=9)
498
- if n == 5:
499
- return _style(fg=255)
500
- if n == 6:
501
- return _style(fg=208)
502
- return ""
503
-
504
-
505
- def load_cfg() -> dict:
506
- if CONFIG.exists():
507
- try:
508
- return json.loads(CONFIG.read_text())
509
- except Exception:
510
- pass
511
- return {"current_provider": "nvidia-hosted", "providers": {}}
512
-
513
-
514
- KNOWN_NVIDIA_MODEL_STATUS = {
515
- "claude-nvidia-llama-3.1-nemotron-ultra-253b-v1": ("FAIL 404", "listed but not callable for this NVIDIA account"),
516
- }
517
- DEFAULT_ADVISOR_MODELS = ["deepseek-v4-pro", "claude-opus-4-6", "claude-sonnet-4-6", "glm-5.1"]
518
- COMPAT_OK_TTL_SECONDS = 24 * 60 * 60
519
- COMPAT_FAIL_TTL_SECONDS = 5 * 60
520
-
521
-
522
- def cache_age_seconds(entry: dict) -> int | None:
523
- try:
524
- tested_at = int(entry.get("tested_at"))
525
- except Exception:
526
- return None
527
- return max(0, int(time.time()) - tested_at)
528
-
529
-
530
- def cache_entry_fresh(entry: dict) -> bool:
531
- age = cache_age_seconds(entry)
532
- if age is None:
533
- return False
534
- ttl = COMPAT_OK_TTL_SECONDS if entry.get("ok") else COMPAT_FAIL_TTL_SECONDS
535
- return age <= ttl
536
-
537
-
538
- def human_age(seconds: int | None) -> str:
539
- if seconds is None:
540
- return "unknown age"
541
- if seconds < 60:
542
- return f"{seconds}s ago"
543
- if seconds < 3600:
544
- return f"{seconds // 60}m ago"
545
- return f"{seconds // 3600}h ago"
546
-
547
-
548
- def compatibility_entry(provider: str, upstream: str, alias: str | None = None) -> dict | None:
549
- cache = load_cfg().get("compatibility_cache", {})
550
- if not isinstance(cache, dict):
551
- return None
552
- provider_cache = cache.get(provider, {})
553
- if not isinstance(provider_cache, dict):
554
- return None
555
- for key in (alias, upstream):
556
- if key and isinstance(provider_cache.get(key), dict):
557
- entry = provider_cache[key]
558
- return entry if cache_entry_fresh(entry) else None
559
- return None
560
-
561
-
562
- def compatibility_badge(provider: str, upstream: str, alias: str | None = None) -> str:
563
- if provider == "nvidia-hosted":
564
- known = KNOWN_NVIDIA_MODEL_STATUS.get(upstream) or (KNOWN_NVIDIA_MODEL_STATUS.get(alias or "") if alias else None)
565
- if known:
566
- return f"[{known[0]}]"
567
- entry = compatibility_entry(provider, upstream, alias)
568
- if not entry:
569
- return "[untested]" if provider == "nvidia-hosted" else ""
570
- if entry.get("ok"):
571
- return "[OK]"
572
- code = entry.get("code")
573
- if code:
574
- return f"[FAIL {code}]"
575
- msg = str(entry.get("message") or "").lower()
576
- if "timeout" in msg or "timed out" in msg:
577
- return "[TIMEOUT]"
578
- return "[FAIL]"
579
-
580
-
581
- def current_compatibility_line(provider: str, pcfg: dict) -> str | None:
582
- model = str(pcfg.get("current_model") or "")
583
- if not model:
584
- return "Compatibility: no model selected"
585
- badge = compatibility_badge(provider, model, model)
586
- if badge:
587
- entry = compatibility_entry(provider, model, model)
588
- if entry and not entry.get("ok"):
589
- msg = str(entry.get("message") or entry.get("diagnosis") or "")[:90]
590
- return f"Compatibility: {badge} {model} {msg}".strip()
591
- known = KNOWN_NVIDIA_MODEL_STATUS.get(model)
592
- if known:
593
- return f"Compatibility: {badge} {model} - {known[1]}"
594
- return f"Compatibility: {badge} {model}"
595
- return None
596
-
597
-
598
- def current_language() -> str:
599
- lang = load_cfg().get("language", "en")
600
- return lang if lang in LANGUAGES else "en"
601
-
602
-
603
- def t(key: str) -> str:
604
- lang = current_language()
605
- return UI_TEXT.get(lang, UI_TEXT["en"]).get(key, UI_TEXT["en"].get(key, key))
606
-
607
-
608
- def run_cmd(args: list[str]) -> tuple[int, str]:
609
- p = subprocess.run(args, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
610
- return p.returncode, p.stdout
611
-
612
-
613
- def read_env_file(path: Path) -> dict[str, str]:
614
- if not path.exists():
615
- return {}
616
- env: dict[str, str] = {}
617
- for line in path.read_text(errors="ignore").splitlines():
618
- line = line.strip()
619
- if not line or line.startswith("#") or "=" not in line:
620
- continue
621
- k, v = line.split("=", 1)
622
- env[k.strip()] = v.strip().strip("'\"")
623
- return env
624
-
625
-
626
- def meaningful_key(value: str | None) -> bool:
627
- return bool(value and value not in ("dummy", "not-used", "ollama"))
628
-
629
-
630
- def api_key_status(provider: str, pcfg: dict) -> str:
631
- if provider == "nvidia-hosted":
632
- return "API key: set (NVIDIA)" if meaningful_key(read_env_file(NCP_ENV).get("NVIDIA_API_KEY")) else "API key: missing (NVIDIA required)"
633
- if provider == "anthropic":
634
- return "API key: set (Anthropic)" if meaningful_key(pcfg.get("api_key")) else "API key: not set (use API key or Claude login)"
635
- if provider == "ollama-cloud":
636
- return "API key: set (Ollama Cloud)" if meaningful_key(pcfg.get("api_key")) else "API key: missing (Ollama Cloud required)"
637
- key = pcfg.get("api_key")
638
- if meaningful_key(key):
639
- return "API key: set"
640
- if provider == "ollama":
641
- return "API key: not required for Ollama"
642
- return "API key: optional or not configured"
643
-
644
-
645
- def join_url(base: str, path: str) -> str:
646
- base = base.rstrip("/")
647
- if base.endswith("/v1") and path.startswith("/v1/"):
648
- return base + path[3:]
649
- return base + path
650
-
651
-
652
- def probe_base_url(provider: str, pcfg: dict) -> str:
653
- base = (pcfg.get("base_url") or "").rstrip("/")
654
- if not base:
655
- return "Base URL: missing"
656
- if "your-" in base:
657
- return f"Base URL: placeholder ({base})"
658
- if provider == "nvidia-hosted":
659
- return f"Base URL: NVIDIA hosted ({base}); local router http://127.0.0.1:8799 starts on launch"
660
- path = "/api/tags" if provider in ("ollama", "ollama-cloud") else "/v1/models"
661
- url = join_url(base, path)
662
- headers = {}
663
- key = pcfg.get("api_key")
664
- if meaningful_key(key):
665
- headers = {"x-api-key": key, "authorization": f"Bearer {key}"}
666
- try:
667
- req = urllib.request.Request(url, headers=headers)
668
- with urllib.request.urlopen(req, timeout=2.5) as resp:
669
- body = resp.read(131072).decode("utf-8", errors="ignore")
670
- count = ""
671
- try:
672
- data = json.loads(body)
673
- if provider in ("ollama", "ollama-cloud"):
674
- count = f", {len(data.get('models', []))} models"
675
- elif isinstance(data.get("data"), list):
676
- count = f", {len(data['data'])} models"
677
- except Exception:
678
- pass
679
- return f"Base URL: model list reachable ({path}{count})"
680
- except urllib.error.HTTPError as exc:
681
- if exc.code in (401, 403):
682
- return f"Base URL: model list reachable, auth rejected ({exc.code})"
683
- return f"Base URL: HTTP {exc.code}"
684
- except Exception as exc:
685
- if provider == "nvidia-hosted" and "127.0.0.1" in base:
686
- return "Base URL: proxy down; starts on launch"
687
- return f"Base URL: unreachable ({type(exc).__name__})"
688
-
689
-
690
- def preflight_checks() -> list[str]:
691
- provider, pcfg = current_provider_cfg()
692
- lang = current_language()
693
- notes = PROVIDER_NOTES.get(lang, PROVIDER_NOTES["en"]).get(provider, [])
694
- lines = [
695
- probe_base_url(provider, pcfg),
696
- api_key_status(provider, pcfg),
697
- *notes,
698
- ]
699
- compat = current_compatibility_line(provider, pcfg)
700
- if compat:
701
- lines.append(compat)
702
- return lines
703
-
704
-
705
- def provider_preview_checks(provider: str) -> list[str]:
706
- cfg = load_cfg()
707
- pcfg = cfg.get("providers", {}).get(provider, {})
708
- lang = current_language()
709
- notes = PROVIDER_NOTES.get(lang, PROVIDER_NOTES["en"]).get(provider, [])
710
- return [
711
- f"Base URL: {pcfg.get('base_url') or 'unset'}",
712
- api_key_status(provider, pcfg),
713
- *notes,
714
- ]
715
-
716
-
717
- def selected_provider_value(sub: dict | None) -> str | None:
718
- if not sub or sub.get("kind") != "provider":
719
- return None
720
- try:
721
- return str(sub["items"][sub["idx"]]["value"])
722
- except Exception:
723
- return None
724
-
725
-
726
- def status_text() -> list[str]:
727
- _, out = run_cmd([CTL, "status"])
728
- return out.strip().splitlines() if out else ["status unavailable"]
729
-
730
-
731
- def current_provider() -> str:
732
- return load_cfg().get("current_provider", "nvidia-hosted")
733
-
734
-
735
- def current_provider_cfg() -> tuple[str, dict]:
736
- cfg = load_cfg()
737
- provider = cfg.get("current_provider", "nvidia-hosted")
738
- return provider, cfg.get("providers", {}).get(provider, {})
739
-
740
-
741
- def is_ollama_provider(provider: str) -> bool:
742
- return provider in ("ollama", "ollama-cloud")
743
-
744
-
745
- def has_provider_options(provider: str) -> bool:
746
- return provider in ("vllm", "nvidia-hosted", "self-hosted-nim", "ollama", "ollama-cloud")
747
-
748
-
749
- def ollama_ctx_text(pcfg: dict) -> str:
750
- value = pcfg.get("num_ctx", "auto")
751
- if str(value).lower() == "auto":
752
- return f"auto {pcfg.get('num_ctx_min', 32768)}-{pcfg.get('num_ctx_max', 131072)}"
753
- return str(value)
754
-
755
-
756
- def ollama_options_summary(pcfg: dict) -> str:
757
- parts = [
758
- f"ctx {ollama_ctx_text(pcfg)}",
759
- f"keep {pcfg.get('keep_alive', 'default')}",
760
- f"think {str(bool(pcfg.get('think', False))).lower()}",
761
- f"timeout {pcfg.get('request_timeout_ms', 'default')}ms",
762
- f"rpm {pcfg.get('rate_limit_rpm', 40)}",
763
- f"stream {'on' if bool(pcfg.get('stream_enabled', True)) else 'off'}",
764
- ]
765
- if bool(pcfg.get("rate_limit_status", True)):
766
- parts.append("rpm_status on")
767
- if bool(pcfg.get("stream_word_chunking", False)):
768
- parts.append("word_chunk on")
769
- opts = pcfg.get("ollama_options") or {}
770
- if isinstance(opts, dict) and opts:
771
- extra = ", ".join(f"{k}={v}" for k, v in sorted(opts.items())[:3])
772
- parts.append(extra)
773
- return "; ".join(parts)
774
-
775
-
776
- def provider_options_summary(provider: str, pcfg: dict) -> str:
777
- timeout = pcfg.get("request_timeout_ms", "default")
778
- timeout_text = f"{timeout}ms" if timeout != "default" else "default"
779
- parts = [
780
- f"max {pcfg.get('max_output_tokens', 'default')}",
781
- f"timeout {timeout_text}",
782
- ]
783
- if provider in ("nvidia-hosted", "self-hosted-nim", "ollama", "ollama-cloud"):
784
- parts.append(f"rpm {pcfg.get('rate_limit_rpm', 40)}")
785
- if bool(pcfg.get("rate_limit_status", True)):
786
- parts.append("rpm_status on")
787
- if provider in ("vllm", "self-hosted-nim"):
788
- parts.insert(0, f"ctx {pcfg.get('context_window', 'default')}")
789
- parts.insert(1, f"reserve {pcfg.get('context_reserve_tokens', 'default')}")
790
- parts.append(f"native {str(bool(pcfg.get('native_compat', True))).lower()}")
791
- if provider in ("vllm", "nvidia-hosted", "self-hosted-nim"):
792
- parts.append(f"stream {'on' if bool(pcfg.get('stream_enabled', True)) else 'off'}")
793
- if bool(pcfg.get("stream_word_chunking", False)):
794
- parts.append("word_chunk on")
795
- return "; ".join(parts)
796
-
797
-
798
- def main_items() -> list[tuple[str, str]]:
799
- provider, pcfg = current_provider_cfg()
800
- lang = current_language()
801
- model = pcfg.get("current_model", "unset")
802
- advisor_model = pcfg.get("advisor_model") or "off"
803
- base = pcfg.get("base_url", "unset")
804
- rows: list[tuple[str, str]] = []
805
-
806
- def add(key: str, label: str) -> None:
807
- rows.append((key, f"{len(rows)}. {label}"))
808
-
809
- add("language", f"{t('language')} [{LANGUAGES.get(lang, lang)}]")
810
- add("provider", f"{t('provider')} [{provider}]")
811
- add("api-key", t("api_key"))
812
- add("base-url", f"{t('base_url')} [{base}]")
813
- add("model", f"{t('model')} [{model}]")
814
- add("advisor-model", f"{t('advisor_model')} [{advisor_model}]")
815
- if is_ollama_provider(provider):
816
- add("ollama-options", f"{t('ollama_options')} [{ollama_options_summary(pcfg)}]")
817
- if has_provider_options(provider):
818
- add("provider-options", f"{t('provider_options')} [{provider_options_summary(provider, pcfg)}]")
819
- add("test", t("test"))
820
- add("launch", t("launch"))
821
- rows.append(("quit", t("quit")))
822
- return rows
823
-
824
-
825
- def settings_ready_except_api_key() -> bool:
826
- provider, pcfg = current_provider_cfg()
827
- base = pcfg.get("base_url", "")
828
- model = pcfg.get("current_model", "")
829
- return bool(provider and base and model and "your-" not in base)
830
-
831
- def default_base_url(provider: str) -> str:
832
- return {
833
- "anthropic": "https://api.anthropic.com",
834
- "ollama": "http://your-ollama:11434",
835
- "ollama-cloud": "https://ollama.com",
836
- "vllm": "http://your-vllm:8000",
837
- "nvidia-hosted": "https://integrate.api.nvidia.com/v1",
838
- "self-hosted-nim": "http://your-nim:8000",
839
- }.get(provider, "http://localhost:8000")
840
-
841
-
842
- def help_for_action(action: str, sub_kind: str | None = None) -> str:
843
- if sub_kind == "language":
844
- return t("select_language")
845
- if sub_kind == "provider":
846
- return t("select_provider")
847
- if sub_kind == "model":
848
- return t("select_model")
849
- if sub_kind == "advisor-model":
850
- return t("select_advisor_model")
851
- if sub_kind == "ollama-options":
852
- return t("select_ollama_options")
853
- if sub_kind == "provider-options":
854
- return t("select_provider_options")
855
- if sub_kind == "test-result":
856
- return t("test_result")
857
- return {
858
- "launch": t("help_launch"),
859
- "test": t("help_test"),
860
- "language": t("help_language"),
861
- "provider": t("help_provider"),
862
- "model": t("help_model"),
863
- "advisor-model": t("help_advisor_model"),
864
- "ollama-options": t("help_ollama_options"),
865
- "provider-options": t("help_provider_options"),
866
- "api-key": t("help_api_key"),
867
- "base-url": t("help_base_url"),
868
- "quit": t("help_quit"),
869
- }.get(action, "Enter selects this action.")
870
-
871
-
872
- def get_models_for_current_provider() -> tuple[list[tuple[str, str]], str]:
873
- code, out = run_cmd([CTL, "models"])
874
- models: list[tuple[str, str]] = []
875
- for line in out.splitlines()[1:]:
876
- if "\t" not in line:
877
- continue
878
- alias, upstream = line.split("\t", 1)
879
- if alias.strip() and upstream.strip():
880
- models.append((upstream.strip(), alias.strip()))
881
- return models, out
882
-
883
-
884
- def build_provider_submenu() -> dict:
885
- cfg = load_cfg()
886
- current = cfg.get("current_provider", "nvidia-hosted")
887
- items = []
888
- idx = 0
889
- for i, (key, label) in enumerate(PROVIDERS):
890
- if key == current:
891
- idx = i
892
- base = cfg.get("providers", {}).get(key, {}).get("base_url", "")
893
- items.append({"value": key, "label": f"{label:<16} {key:<15} {base}", "current": key == current})
894
- return {"kind": "provider", "parent": "provider", "items": items, "idx": idx, "offset": 0}
895
-
896
-
897
- def build_language_submenu() -> dict:
898
- current = current_language()
899
- items = []
900
- idx = 0
901
- for i, (code, label) in enumerate(LANGUAGES.items()):
902
- if code == current:
903
- idx = i
904
- items.append({"value": code, "label": f"{code:<2} {label}", "current": code == current})
905
- return {"kind": "language", "parent": "language", "items": items, "idx": idx, "offset": 0}
906
-
907
-
908
- def build_api_key_submenu() -> dict:
909
- current = current_provider()
910
- items = []
911
- idx = 0
912
- for i, (key, label) in enumerate(PROVIDERS):
913
- if key == current:
914
- idx = i
915
- items.append({"value": key, "label": f"{label:<16} {key:<15}", "current": key == current})
916
- return {"kind": "api-key", "parent": "api-key", "items": items, "idx": idx, "offset": 0}
917
-
918
- def build_model_submenu() -> tuple[dict | None, list[str]]:
919
- models, raw = get_models_for_current_provider()
920
- if not models:
921
- lines = raw.strip().splitlines() or ["No models found. Use custom input."]
922
- return None, lines[:2]
923
- provider, pcfg = current_provider_cfg()
924
- current = pcfg.get("current_model", "")
925
- items = []
926
- idx = 0
927
- for i, (upstream, alias) in enumerate(models):
928
- is_current = upstream == current or alias == current
929
- if is_current:
930
- idx = i
931
- badge = compatibility_badge(provider, upstream, alias)
932
- description = ""
933
- known = KNOWN_NVIDIA_MODEL_STATUS.get(upstream) or KNOWN_NVIDIA_MODEL_STATUS.get(alias)
934
- entry = compatibility_entry(provider, upstream, alias)
935
- if known:
936
- description = known[1]
937
- elif entry:
938
- state = "OK" if entry.get("ok") else "failed"
939
- detail = entry.get("diagnosis") or entry.get("message") or ""
940
- description = f"Last compatibility test: {state} ({human_age(cache_age_seconds(entry))}). {detail}".strip()
941
- items.append({
942
- "value": upstream,
943
- "label": f"{badge:<11} {upstream:<58} {alias}",
944
- "current": is_current,
945
- "description": description,
946
- })
947
- items.append({"value": "__custom__", "label": "Custom model id...", "current": False})
948
- return {"kind": "model", "parent": "model", "items": items, "idx": idx, "offset": 0}, []
949
-
950
-
951
- def build_advisor_model_submenu() -> dict:
952
- provider, pcfg = current_provider_cfg()
953
- current = pcfg.get("advisor_model") or ""
954
- values: list[str] = []
955
- for mid in DEFAULT_ADVISOR_MODELS + [upstream for upstream, _ in get_models_for_current_provider()[0]]:
956
- if mid and mid not in values:
957
- values.append(mid)
958
- items = [{"value": "", "label": "Disable Advisor Model", "current": not current, "description": "Disable claude-any advisor routing."}]
959
- idx = 0
960
- for i, mid in enumerate(values, 1):
961
- is_current = mid == current
962
- if is_current:
963
- idx = i
964
- desc = "Recommended long-context advisor model." if mid == "deepseek-v4-pro" else ""
965
- items.append({"value": mid, "label": mid, "current": is_current, "description": desc})
966
- items.append({"value": "__custom__", "label": "Custom advisor model id...", "current": False})
967
- return {"kind": "advisor-model", "parent": "advisor-model", "items": items, "idx": idx, "offset": 0}
968
-
969
-
970
- OLLAMA_OPTION_DESCRIPTIONS = {
971
- "__edit_num_ctx__": {
972
- "en": "Edit Ollama num_ctx. This is the context window sent to Ollama; it cannot exceed the server/model limit.",
973
- "ko": "Ollama num_ctx를 수정합니다. 한 번에 볼 컨텍스트 창이며 서버/모델 한계를 넘게 설정해도 실제 한계는 늘지 않습니다.",
974
- "ja": "Ollamaのnum_ctxを編集します。Ollamaへ送るコンテキスト幅で、サーバー/モデル上限は超えられません。",
975
- "zh": "编辑 Ollama num_ctx。这是发送给 Ollama 的上下文窗口,不能超过服务器/模型上限。",
976
- },
977
- "__edit_min__": {
978
- "en": "Edit the minimum context used when num_ctx is auto. Small requests will not go below this value.",
979
- "ko": "num_ctx=auto일 때 사용할 최소 컨텍스트입니다. 작은 요청도 이 값보다 작게 내려가지 않습니다.",
980
- "ja": "num_ctx=auto時の最小コンテキストです。小さな要求でもこの値未満にはなりません。",
981
- "zh": "编辑 num_ctx=auto 时的最小上下文。小请求也不会低于此值。",
982
- },
983
- "__edit_max__": {
984
- "en": "Edit the maximum context used when num_ctx is auto. Keep it at or below the real server context limit.",
985
- "ko": "num_ctx=auto일 때 사용할 최대 컨텍스트입니다. 실제 서버 컨텍스트 한계 이하로 두는 것이 맞습니다.",
986
- "ja": "num_ctx=auto時の最大コンテキストです。実際のサーバー上限以下にしてください。",
987
- "zh": "编辑 num_ctx=auto 时的最大上下文。应不高于真实服务器上下文上限。",
988
- },
989
- "__edit_keep_alive__": {
990
- "en": "Edit how long Ollama keeps the model loaded after a request. Longer values reduce reloads but hold memory.",
991
- "ko": "요청 후 Ollama가 모델을 메모리에 유지하는 시간입니다. 길수록 재로딩은 줄지만 메모리를 더 오래 잡습니다.",
992
- "ja": "要求後にOllamaがモデルを保持する時間です。長いほど再読み込みは減りますがメモリを保持します。",
993
- "zh": "编辑请求后 Ollama 保持模型加载的时间。更长可减少重载,但会占用内存。",
994
- },
995
- "__edit_temperature__": {
996
- "en": "Edit sampling temperature. Higher is more varied; lower is more deterministic.",
997
- "ko": "샘플링 temperature입니다. 높을수록 답변이 다양해지고, 낮을수록 결정적으로 동작합니다.",
998
- "ja": "サンプリングtemperatureです。高いほど多様、低いほど決定的になります。",
999
- "zh": "编辑采样 temperature。越高越多样,越低越确定。",
1000
- },
1001
- "__edit_top_p__": {
1002
- "en": "Edit nucleus sampling top_p. Lower values restrict token choices; 0.8 is a moderate default.",
1003
- "ko": "누적 확률 top_p입니다. 낮을수록 후보 토큰을 좁히며, 0.8은 중간 정도의 기본값입니다.",
1004
- "ja": "nucleus samplingのtop_pです。低いほど候補を絞り、0.8は中程度の既定値です。",
1005
- "zh": "编辑 nucleus sampling top_p。越低候选越窄;0.8 是中等默认值。",
1006
- },
1007
- "__edit_max_tokens__": {
1008
- "en": "Edit max output tokens (Ollama num_predict). Input plus reserved output must fit in the context window.",
1009
- "ko": "최대 출력 토큰(Ollama num_predict)입니다. 입력과 예약 출력이 컨텍스트 창 안에 같이 들어가야 합니다.",
1010
- "ja": "最大出力トークン(Ollama num_predict)です。入力と予約出力は同じコンテキスト内に収まる必要があります。",
1011
- "zh": "编辑最大输出 token(Ollama num_predict)。输入加预留输出必须放进上下文窗口。",
1012
- },
1013
- "__edit_timeout__": {
1014
- "en": "Edit upstream wait timeout in milliseconds. 300000 means 5 minutes.",
1015
- "ko": "업스트림 응답 대기 시간(ms)입니다. 300000은 5분입니다.",
1016
- "ja": "上流応答待ちタイムアウト(ms)です。300000は5分です。",
1017
- "zh": "编辑上游响应等待超时(毫秒)。300000 表示 5 分钟。",
1018
- },
1019
- "__custom__": {
1020
- "en": "Enter any Ollama option as KEY=VALUE, or unset:KEY to remove it.",
1021
- "ko": "임의의 Ollama 옵션을 KEY=VALUE로 입력합니다. 삭제하려면 unset:KEY를 입력합니다.",
1022
- "ja": "任意のOllamaオプションをKEY=VALUEで入力します。削除はunset:KEYです。",
1023
- "zh": "用 KEY=VALUE 输入任意 Ollama 选项;用 unset:KEY 删除。",
1024
- },
1025
- }
1026
-
1027
-
1028
- def ollama_option_description(value: str) -> str:
1029
- lang = current_language()
1030
- if value in OLLAMA_OPTION_DESCRIPTIONS:
1031
- entry = OLLAMA_OPTION_DESCRIPTIONS[value]
1032
- return entry.get(lang, entry["en"])
1033
- if value.startswith("num_ctx=auto"):
1034
- return {
1035
- "en": "Use automatic context sizing based on request size, bounded by the configured min/max.",
1036
- "ko": "요청 크기에 따라 컨텍스트를 자동 선택합니다. 설정된 최소/최대 범위 안에서만 움직입니다.",
1037
- "ja": "要求サイズに応じてコンテキストを自動選択します。設定した最小/最大範囲内です。",
1038
- "zh": "根据请求大小自动选择上下文,并限制在设置的最小/最大范围内。",
1039
- }.get(lang, "Use automatic context sizing based on request size, bounded by the configured min/max.")
1040
- if value.startswith("num_ctx="):
1041
- return {
1042
- "en": "Use a fixed context window for every Ollama request. Larger values use more memory and may be slower.",
1043
- "ko": "모든 Ollama 요청에 고정 컨텍스트를 사용합니다. 값이 클수록 메모리를 더 쓰고 느려질 수 있습니다.",
1044
- "ja": "全てのOllama要求で固定コンテキストを使います。大きいほどメモリ使用量と遅延が増えます。",
1045
- "zh": "为每个 Ollama 请求使用固定上下文。值越大内存占用越高,也可能更慢。",
1046
- }.get(lang, "Use a fixed context window for every Ollama request.")
1047
- if value.startswith("min="):
1048
- return {
1049
- "en": "Set the lower bound for automatic num_ctx selection.",
1050
- "ko": "자동 num_ctx 선택의 하한값을 설정합니다.",
1051
- "ja": "自動num_ctx選択の下限を設定します。",
1052
- "zh": "设置自动 num_ctx 选择的下限。",
1053
- }.get(lang, "Set the lower bound for automatic num_ctx selection.")
1054
- if value.startswith("max="):
1055
- return {
1056
- "en": "Set the upper bound for automatic num_ctx selection.",
1057
- "ko": "자동 num_ctx 선택의 상한값을 설정합니다.",
1058
- "ja": "自動num_ctx選択の上限を設定します。",
1059
- "zh": "设置自动 num_ctx 选择的上限。",
1060
- }.get(lang, "Set the upper bound for automatic num_ctx selection.")
1061
- if value.startswith("keep_alive="):
1062
- return OLLAMA_OPTION_DESCRIPTIONS["__edit_keep_alive__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__edit_keep_alive__"]["en"])
1063
- if value.startswith("think="):
1064
- return {
1065
- "en": "Toggle Ollama thinking output support. Claude Code may not display provider-specific thinking cleanly.",
1066
- "ko": "Ollama thinking 출력 요청 여부입니다. Claude Code가 provider별 thinking을 항상 깔끔하게 표시하지는 않습니다.",
1067
- "ja": "Ollama thinking出力の要求を切り替えます。Claude Code側で常に綺麗に表示されるとは限りません。",
1068
- "zh": "切换 Ollama thinking 输出请求。Claude Code 不一定能完整显示各提供商的 thinking。",
1069
- }.get(lang, "Toggle Ollama thinking output support.")
1070
- if value.startswith("stream="):
1071
- return {
1072
- "en": "Toggle streaming. When off, the router waits for the full upstream response before sending it to Claude Code. Use this when streaming fragmentation causes tool-call or JSON parse errors.",
1073
- "ko": "스트리밍을 켜고/끕니다. off 면 업스트림 응답이 전부 모일 때까지 기다렸다가 Claude Code에 한 번에 보냅니다. 스트리밍 단편화로 tool-call/JSON 파싱이 실패할 때 사용합니다.",
1074
- "ja": "ストリーミングを切り替えます。offにすると、ルーターは上流応答が揃ってからClaude Codeへ一括送信します。ストリーミング断片化でtool-call/JSON解析が失敗する時に使用します。",
1075
- "zh": "切换流式输出。off 时路由器会等待上游完整响应再发送给 Claude Code。流式分片导致 tool-call/JSON 解析失败时使用。",
1076
- }.get(lang, "Toggle streaming. When off, the router waits for the full upstream response.")
1077
- if value.startswith("stream_word_chunking="):
1078
- return {
1079
- "en": "Buffer text tokens until a whitespace/word boundary before sending the SSE delta. Reduces SSE event volume and can mitigate tool/JSON fragmentation issues. Tool call inputs are not affected.",
1080
- "ko": "토큰을 공백 단위(단어 경계)까지 버퍼링해서 SSE delta로 전송합니다. SSE 이벤트 빈도를 줄이고 tool/JSON 단편화 문제를 완화합니다. tool call 입력은 영향을 받지 않습니다.",
1081
- "ja": "テキストトークンを空白/単語境界までバッファしてSSE deltaを送信します。SSEイベント量を減らし、tool/JSON断片化を緩和できます。tool call入力には影響しません。",
1082
- "zh": "在空白/单词边界处批量发送 SSE 文本 delta。降低 SSE 事件频率并缓解 tool/JSON 分片问题。工具调用输入不受影响。",
1083
- }.get(lang, "Buffer text tokens until a word boundary before sending the SSE delta.")
1084
- if value.startswith("temperature="):
1085
- return OLLAMA_OPTION_DESCRIPTIONS["__edit_temperature__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__edit_temperature__"]["en"])
1086
- if value.startswith("top_p="):
1087
- return OLLAMA_OPTION_DESCRIPTIONS["__edit_top_p__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__edit_top_p__"]["en"])
1088
- if value.startswith(("max_tokens=", "num_predict=")):
1089
- return OLLAMA_OPTION_DESCRIPTIONS["__edit_max_tokens__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__edit_max_tokens__"]["en"])
1090
- if value.startswith("timeout="):
1091
- return OLLAMA_OPTION_DESCRIPTIONS["__edit_timeout__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__edit_timeout__"]["en"])
1092
- return OLLAMA_OPTION_DESCRIPTIONS["__custom__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__custom__"]["en"])
1093
-
1094
-
1095
- def build_ollama_options_submenu() -> dict:
1096
- provider, pcfg = current_provider_cfg()
1097
- ctx = pcfg.get("num_ctx", "auto")
1098
- keep = str(pcfg.get("keep_alive", "5m"))
1099
- think = bool(pcfg.get("think", False))
1100
- stream_on = bool(pcfg.get("stream_enabled", True))
1101
- word_chunk_on = bool(pcfg.get("stream_word_chunking", False))
1102
- options = pcfg.get("ollama_options") or {}
1103
- if not isinstance(options, dict):
1104
- options = {}
1105
- choices = [
1106
- ("__edit_num_ctx__", f"Edit num_ctx [{ollama_ctx_text(pcfg)}]", False),
1107
- ("__edit_min__", f"Edit auto minimum [{pcfg.get('num_ctx_min', 32768)}]", False),
1108
- ("__edit_max__", f"Edit auto maximum [{pcfg.get('num_ctx_max', 131072)}]", False),
1109
- ("__edit_keep_alive__", f"Edit keep_alive [{keep}]", False),
1110
- ("__edit_temperature__", f"Edit temperature [{options.get('temperature', 'unset')}]", False),
1111
- ("__edit_top_p__", f"Edit top_p [{options.get('top_p', 'unset')}]", False),
1112
- ("__edit_max_tokens__", f"Edit max_tokens/num_predict [{options.get('num_predict', 'unset')}]", False),
1113
- ("__edit_timeout__", f"Edit timeout ms [{pcfg.get('request_timeout_ms', 'default')}]", False),
1114
- ("__custom__", "Custom KEY=VALUE or unset:KEY...", False),
1115
- ("num_ctx=auto", f"num_ctx auto ({pcfg.get('num_ctx_min', 32768)}-{pcfg.get('num_ctx_max', 131072)})", str(ctx).lower() == "auto"),
1116
- ("num_ctx=32768", "num_ctx 32768", ctx == 32768),
1117
- ("num_ctx=65536", "num_ctx 65536", ctx == 65536),
1118
- ("num_ctx=131072", "num_ctx 131072", ctx == 131072),
1119
- ("min=32768", "auto minimum 32768", pcfg.get("num_ctx_min", 32768) == 32768),
1120
- ("max=131072", "auto maximum 131072", pcfg.get("num_ctx_max", 131072) == 131072),
1121
- ("keep_alive=5m", "keep_alive 5m", keep == "5m"),
1122
- ("keep_alive=30m", "keep_alive 30m", keep == "30m"),
1123
- ("think=false", "think false", not think),
1124
- ("think=true", "think true", think),
1125
- ("stream=true", "stream on", stream_on),
1126
- ("stream=false", "stream off (buffer full response)", not stream_on),
1127
- ("stream_word_chunking=true", "stream_word_chunking on (flush at word boundary)", word_chunk_on),
1128
- ("stream_word_chunking=false", "stream_word_chunking off (token-by-token)", not word_chunk_on),
1129
- ("temperature=0.7", f"temperature 0.7 (current {options.get('temperature', 'unset')})", options.get("temperature") == 0.7),
1130
- ("top_p=0.8", f"top_p 0.8 (current {options.get('top_p', 'unset')})", options.get("top_p") == 0.8),
1131
- ("max_tokens=4096", f"max_tokens 4096 (current {options.get('num_predict', 'unset')})", options.get("num_predict") == 4096),
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import select
6
+ import shutil
7
+ import subprocess
8
+ import sys
9
+ import termios
10
+ import time
11
+ import textwrap
12
+ import tty
13
+ import urllib.error
14
+ import urllib.parse
15
+ import urllib.request
16
+ from pathlib import Path
17
+
18
+ try:
19
+ import msvcrt
20
+ HAS_MSVCRT = True
21
+ except ImportError:
22
+ HAS_MSVCRT = False
23
+
24
+ try:
25
+ sys.stdout.reconfigure(encoding="utf-8")
26
+ sys.stderr.reconfigure(encoding="utf-8")
27
+ except Exception:
28
+ pass
29
+
30
+
31
+ def _enable_windows_ansi() -> None:
32
+ if sys.platform != "win32":
33
+ return
34
+ try:
35
+ import ctypes
36
+ kernel32 = ctypes.windll.kernel32
37
+ hOut = kernel32.GetStdHandle(-11)
38
+ mode = ctypes.c_ulong()
39
+ kernel32.GetConsoleMode(hOut, ctypes.byref(mode))
40
+ ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
41
+ kernel32.SetConsoleMode(hOut, mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING)
42
+ except Exception:
43
+ pass
44
+
45
+
46
+ class _RawTerminal:
47
+ def __enter__(self):
48
+ _enable_windows_ansi()
49
+ if sys.platform != "win32" and sys.stdin.isatty():
50
+ self._fd = sys.stdin.fileno()
51
+ self._old = termios.tcgetattr(self._fd)
52
+ tty.setraw(self._fd)
53
+ return self
54
+
55
+ def __exit__(self, *a):
56
+ if sys.platform != "win32" and hasattr(self, "_old"):
57
+ termios.tcsetattr(self._fd, termios.TCSADRAIN, self._old)
58
+ return False
59
+
60
+
61
+ def _getch(timeout: float = 60.0) -> bytes | None:
62
+ if sys.platform == "win32" and HAS_MSVCRT:
63
+ start = time.monotonic()
64
+ while time.monotonic() - start < timeout:
65
+ if msvcrt.kbhit():
66
+ return msvcrt.getch()
67
+ time.sleep(0.01)
68
+ return None
69
+ else:
70
+ r, _, _ = select.select([sys.stdin.buffer], [], [], timeout)
71
+ if r:
72
+ return sys.stdin.buffer.read(1)
73
+ return None
74
+
75
+
76
+ def _debug_log(msg: str) -> None:
77
+ try:
78
+ with open("/tmp/ca-menu-debug.log", "a", encoding="utf-8") as f:
79
+ f.write(f"{time.monotonic():.3f} {msg}\n")
80
+ f.flush()
81
+ except Exception:
82
+ pass
83
+
84
+
85
+ def read_menu_key() -> str:
86
+ ch = _getch()
87
+ _debug_log(f"_getch returned: {repr(ch)}")
88
+ if ch is None:
89
+ return ""
90
+ if ch == b"\x1b":
91
+ seq = b"\x1b"
92
+ for _ in range(3):
93
+ nxt = _getch(1.0)
94
+ _debug_log(f" seq byte: {repr(nxt)}")
95
+ if nxt is None:
96
+ break
97
+ seq += nxt
98
+ _debug_log(f" full seq: {repr(seq)} hex: {seq.hex()}")
99
+ if seq in (b"\x1b[A", b"\x1bOA"):
100
+ return "KEY_UP"
101
+ if seq in (b"\x1b[B", b"\x1bOB"):
102
+ return "KEY_DOWN"
103
+ if seq == b"\x1b[5~":
104
+ return "KEY_PPAGE"
105
+ if seq == b"\x1b[6~":
106
+ return "KEY_NPAGE"
107
+ return "KEY_ESC"
108
+ if sys.platform == "win32" and HAS_MSVCRT:
109
+ if ch in (b"\x00", b"\xe0"):
110
+ ch2 = _getch(0.05)
111
+ if ch2 == b"H":
112
+ return "KEY_UP"
113
+ if ch2 == b"P":
114
+ return "KEY_DOWN"
115
+ if ch2 == b"K":
116
+ return "KEY_LEFT"
117
+ if ch2 == b"M":
118
+ return "KEY_RIGHT"
119
+ if ch2 == b"I":
120
+ return "KEY_PPAGE"
121
+ if ch2 == b"Q":
122
+ return "KEY_NPAGE"
123
+ return ""
124
+ if ch in (b"\r", b"\n"):
125
+ return "KEY_ENTER"
126
+ if ch in (b"\x7f", b"\x08"):
127
+ return "KEY_BACKSPACE"
128
+ if ch and 0 < ch[0] < 128 and chr(ch[0]).isprintable():
129
+ return chr(ch[0])
130
+ return ""
131
+
132
+
133
+ def _term_size() -> tuple[int, int]:
134
+ try:
135
+ return shutil.get_terminal_size(fallback=(80, 24))
136
+ except Exception:
137
+ return (80, 24)
138
+
139
+
140
+ def _clear() -> None:
141
+ sys.stdout.write("\033[2J\033[H")
142
+ sys.stdout.flush()
143
+
144
+
145
+ def _move(row: int, col: int) -> None:
146
+ sys.stdout.write(f"\033[{row + 1};{col + 1}H")
147
+
148
+
149
+ def _style(fg: int | None = None, bg: int | None = None, bold: bool = False, dim: bool = False, reverse: bool = False) -> str:
150
+ codes: list[str] = []
151
+ if bold:
152
+ codes.append("1")
153
+ if dim:
154
+ codes.append("2")
155
+ if reverse:
156
+ codes.append("7")
157
+ if fg is not None:
158
+ codes.append(f"38;5;{fg}")
159
+ if bg is not None:
160
+ codes.append(f"48;5;{bg}")
161
+ return f"\033[{';&'.join(codes)}m" if codes else ""
162
+
163
+
164
+ def _reset() -> str:
165
+ return "\033[0m"
166
+
167
+
168
+ ANIMATED_TEXT_PALETTE = (203, 209, 215, 221, 229, 187, 151, 116, 111, 147, 183, 219)
169
+
170
+
171
+ def animated_text(text: str, *, phase: int | None = None, bold: bool = True) -> str:
172
+ if not sys.stdout.isatty():
173
+ return text
174
+ if phase is None:
175
+ phase = int(time.monotonic() * 8)
176
+ parts: list[str] = []
177
+ for i, ch in enumerate(text):
178
+ if ch.isspace():
179
+ parts.append(ch)
180
+ continue
181
+ color = ANIMATED_TEXT_PALETTE[(phase + i) % len(ANIMATED_TEXT_PALETTE)]
182
+ parts.append(_style(fg=color, bold=bold) + ch)
183
+ parts.append(_reset())
184
+ return "".join(parts)
185
+
186
+
187
+ def _write(row: int, col: int, text: str, style: str = "") -> None:
188
+ if row < 0 or col < 0:
189
+ return
190
+ _move(row, col)
191
+ if style:
192
+ sys.stdout.write(style)
193
+ sys.stdout.write(text)
194
+ if style:
195
+ sys.stdout.write(_reset())
196
+ sys.stdout.flush()
197
+
198
+
199
+ def _write_safe(row: int, col: int, text: str, style: str = "") -> None:
200
+ h, w = _term_size()
201
+ if row < 0 or row >= h or col >= w:
202
+ return
203
+ _write(row, col, text[: max(0, w - max(0, col) - 1)], style)
204
+
205
+
206
+ CTL = str(Path.home() / ".local/bin/claude-anyctl")
207
+ CONFIG = Path.home() / ".config/claude-any/config.json"
208
+ NCP_ENV = Path.home() / ".config/nvd-claude-proxy/.env"
209
+ PROVIDERS = [
210
+ ("anthropic", "Anthropic"),
211
+ ("ollama", "Ollama"),
212
+ ("ollama-cloud", "Ollama Cloud"),
213
+ ("vllm", "vLLM"),
214
+ ("nvidia-hosted", "Nvidia Hosted"),
215
+ ("self-hosted-nim", "Self Hosted NIM"),
216
+ ]
217
+ APP_NAME = "Claude Any"
218
+ CREDITS = "Credits: One Ciel LLC"
219
+ LANGUAGES = {
220
+ "en": "English",
221
+ "ko": "한국어",
222
+ "ja": "日本語",
223
+ "zh": "中文",
224
+ }
225
+ UI_TEXT = {
226
+ "en": {
227
+ "language": "Language",
228
+ "provider": "Provider",
229
+ "api_key": "API key",
230
+ "base_url": "Base URL",
231
+ "model": "Model",
232
+ "advisor_model": "Advisor Model",
233
+ "ollama_options": "Ollama options",
234
+ "provider_options": "Provider options",
235
+ "test": "Test compatibility",
236
+ "launch": "Launch Claude Code",
237
+ "quit": "Quit",
238
+ "title": "claude-any pre-launch",
239
+ "select_language": "Enter selects language. Up/Down moves inside submenu. Esc closes submenu.",
240
+ "select_provider": "Enter selects provider. Up/Down moves inside submenu. Esc closes submenu.",
241
+ "select_model": "Enter selects model. Up/Down moves inside submenu. Esc closes submenu. Custom input is at the end.",
242
+ "select_advisor_model": "Enter selects advisor model. Use a long-context model such as deepseek-v4-pro.",
243
+ "select_ollama_options": "Enter applies this Ollama option. Custom input accepts KEY=VALUE or unset:KEY.",
244
+ "select_provider_options": "Enter applies this provider option. Custom input accepts KEY=VALUE or unset:KEY.",
245
+ "test_result": "Compatibility result is shown inline. Esc closes the result. Enter runs the test again.",
246
+ "help_launch": "Enter launches Claude Code with the selected provider and model.",
247
+ "help_test": "Enter tests current provider/model with a minimal Claude Code tool request.",
248
+ "help_language": "Enter expands language submenu inline.",
249
+ "help_provider": "Enter expands provider submenu inline.",
250
+ "help_model": "Enter expands model submenu inline when the provider endpoint is reachable.",
251
+ "help_advisor_model": "Enter selects the larger model used by claude-any advisor routing.",
252
+ "help_ollama_options": "Enter expands Ollama context and generation options.",
253
+ "help_provider_options": "Enter expands provider output/context/timeout options.",
254
+ "help_api_key": "Enter opens secure API key setup in the terminal. Keys are not pasted into Claude Code.",
255
+ "help_base_url": "Enter edits the current provider base URL on this row.",
256
+ "help_quit": "Enter exits without launching Claude Code.",
257
+ "running_test": "Running compatibility test...",
258
+ "test_passed": "Compatibility test passed.",
259
+ "test_failed": "Compatibility test failed.",
260
+ "loading_models": "Loading models from current provider...",
261
+ "api_key_unchanged": "API key unchanged.",
262
+ },
263
+ "ko": {
264
+ "language": "언어",
265
+ "provider": "프로바이더",
266
+ "api_key": "API 키",
267
+ "base_url": "Base URL",
268
+ "model": "모델",
269
+ "advisor_model": "Advisor Model",
270
+ "ollama_options": "Ollama 옵션",
271
+ "provider_options": "프로바이더 옵션",
272
+ "test": "호환성 테스트",
273
+ "launch": "Claude Code 실행",
274
+ "quit": "종료",
275
+ "title": "claude-any 실행 전 설정",
276
+ "select_language": "Enter로 언어를 선택합니다. 위/아래로 이동, Esc로 닫기.",
277
+ "select_provider": "Enter로 프로바이더를 선택합니다. 위/아래로 이동, Esc로 닫기.",
278
+ "select_model": "Enter로 모델을 선택합니다. 위/아래로 이동, Esc로 닫기. 마지막 항목은 직접 입력입니다.",
279
+ "select_advisor_model": "Advisor Model을 선택합니다. deepseek-v4-pro 같은 긴 컨텍스트 모델을 권장합니다.",
280
+ "select_ollama_options": "Enter로 Ollama 옵션을 적용합니다. 직접 입력은 KEY=VALUE 또는 unset:KEY를 받습니다.",
281
+ "select_provider_options": "Enter로 프로바이더 옵션을 적용합니다. 직접 입력은 KEY=VALUE 또는 unset:KEY를 받습니다.",
282
+ "test_result": "호환성 결과가 메뉴 안에 표시됩니다. Esc로 닫고 Enter로 다시 테스트합니다.",
283
+ "help_launch": "선택한 프로바이더와 모델로 Claude Code를 실행합니다.",
284
+ "help_test": "현재 프로바이더/모델에 최소 Claude Code 도구 요청을 보내 호환성을 확인합니다.",
285
+ "help_language": "언어 선택 메뉴를 펼칩니다.",
286
+ "help_provider": "프로바이더 선택 메뉴를 펼칩니다.",
287
+ "help_model": "프로바이더 엔드포인트가 유효하면 모델 선택 메뉴를 펼칩니다.",
288
+ "help_advisor_model": "claude-any advisor 라우팅에 사용할 더 큰 모델을 선택합니다.",
289
+ "help_ollama_options": "Ollama 컨텍스트 크기와 생성 파라미터 메뉴를 펼칩니다.",
290
+ "help_provider_options": "프로바이더의 출력 토큰, 컨텍스트, 타임아웃 옵션 메뉴를 펼칩니다.",
291
+ "help_api_key": "API 키 입력을 이 터미널에서 안전하게 엽니다. 키는 Claude Code 채팅에 붙여넣지 않습니다.",
292
+ "help_base_url": "현재 프로바이더의 Base URL을 이 줄에서 수정합니다.",
293
+ "help_quit": "Claude Code를 실행하지 않고 종료합니다.",
294
+ "running_test": "호환성 테스트 실행 중...",
295
+ "test_passed": "호환성 테스트 성공.",
296
+ "test_failed": "호환성 테스트 실패.",
297
+ "loading_models": "현재 프로바이더에서 모델을 불러오는 중...",
298
+ "api_key_unchanged": "API 키는 변경되지 않았습니다.",
299
+ },
300
+ "ja": {
301
+ "language": "言語",
302
+ "provider": "プロバイダー",
303
+ "api_key": "APIキー",
304
+ "base_url": "Base URL",
305
+ "model": "モデル",
306
+ "advisor_model": "Advisor Model",
307
+ "ollama_options": "Ollamaオプション",
308
+ "provider_options": "プロバイダーオプション",
309
+ "test": "互換性テスト",
310
+ "launch": "Claude Codeを起動",
311
+ "quit": "終了",
312
+ "title": "claude-any 起動前設定",
313
+ "select_language": "Enterで言語を選択します。上下で移動、Escで閉じます。",
314
+ "select_provider": "Enterでプロバイダーを選択します。上下で移動、Escで閉じます。",
315
+ "select_model": "Enterでモデルを選択します。上下で移動、Escで閉じます。最後は手入力です。",
316
+ "select_advisor_model": "Advisor Modelを選択します。deepseek-v4-proのような長コンテキストモデルを推奨します。",
317
+ "select_ollama_options": "EnterでOllamaオプションを適用します。手入力はKEY=VALUEまたはunset:KEYです。",
318
+ "select_provider_options": "Enterでプロバイダーオプションを適用します。手入力はKEY=VALUEまたはunset:KEYです。",
319
+ "test_result": "互換性結果はメニュー内に表示されます。Escで閉じ、Enterで再テストします。",
320
+ "help_launch": "選択したプロバイダーとモデルでClaude Codeを起動します。",
321
+ "help_test": "現在のプロバイダー/モデルへ最小のClaude Codeツール要求を送り互換性を確認します。",
322
+ "help_language": "言語選択メニューを展開します。",
323
+ "help_provider": "プロバイダー選択メニューを展開します。",
324
+ "help_model": "プロバイダーのエンドポイントが有効な場合、モデル選択メニューを展開します。",
325
+ "help_advisor_model": "claude-any advisorルーティングで使う大きなモデルを選択します。",
326
+ "help_ollama_options": "Ollamaのコンテキストサイズと生成パラメータを開きます。",
327
+ "help_provider_options": "プロバイダーの出力トークン、コンテキスト、タイムアウト設定を開きます。",
328
+ "help_api_key": "APIキー入力をこの端末で安全に開きます。キーはClaude Codeチャットに貼り付けません。",
329
+ "help_base_url": "現在のプロバイダーのBase URLをこの行で編集します。",
330
+ "help_quit": "Claude Codeを起動せずに終了します。",
331
+ "running_test": "互換性テストを実行中...",
332
+ "test_passed": "互換性テスト成功。",
333
+ "test_failed": "互換性テスト失敗。",
334
+ "loading_models": "現在のプロバイダーからモデルを読み込み中...",
335
+ "api_key_unchanged": "APIキーは変更されませんでした。",
336
+ },
337
+ "zh": {
338
+ "language": "语言",
339
+ "provider": "提供商",
340
+ "api_key": "API 密钥",
341
+ "base_url": "Base URL",
342
+ "model": "模型",
343
+ "advisor_model": "Advisor Model",
344
+ "ollama_options": "Ollama 选项",
345
+ "provider_options": "提供商选项",
346
+ "test": "兼容性测试",
347
+ "launch": "启动 Claude Code",
348
+ "quit": "退出",
349
+ "title": "claude-any 启动前设置",
350
+ "select_language": "按 Enter 选择语言。上下移动,Esc 关闭。",
351
+ "select_provider": "按 Enter 选择提供商。上下移动,Esc 关闭。",
352
+ "select_model": "按 Enter 选择模型。上下移动,Esc 关闭。最后一项可手动输入。",
353
+ "select_advisor_model": "选择 Advisor Model。建议使用 deepseek-v4-pro 等长上下文模型。",
354
+ "select_ollama_options": "按 Enter 应用 Ollama 选项。手动输入支持 KEY=VALUE 或 unset:KEY。",
355
+ "select_provider_options": "按 Enter 应用提供商选项。手动输入支持 KEY=VALUE 或 unset:KEY。",
356
+ "test_result": "兼容性结果会在菜单内显示。Esc 关闭,Enter 重新测试。",
357
+ "help_launch": "使用所选提供商和模型启动 Claude Code。",
358
+ "help_test": "向当前提供商/模型发送最小 Claude Code 工具请求以检查兼容性。",
359
+ "help_language": "展开语言选择菜单。",
360
+ "help_provider": "展开提供商选择菜单。",
361
+ "help_model": "当提供商端点可用时展开模型选择菜单。",
362
+ "help_advisor_model": "选择 claude-any advisor 路由使用的更大模型。",
363
+ "help_ollama_options": "展开 Ollama 上下文大小和生成参数。",
364
+ "help_provider_options": "展开提供商输出 token、上下文和超时选项。",
365
+ "help_api_key": "在此终端安全输入 API 密钥。不要把密钥粘贴到 Claude Code 聊天中。",
366
+ "help_base_url": "在这一行编辑当前提供商的 Base URL。",
367
+ "help_quit": "不启动 Claude Code 并退出。",
368
+ "running_test": "正在运行兼容性测试...",
369
+ "test_passed": "兼容性测试成功。",
370
+ "test_failed": "兼容性测试失败。",
371
+ "loading_models": "正在从当前提供商加载模型...",
372
+ "api_key_unchanged": "API 密钥未更改。",
373
+ },
374
+ }
375
+
376
+
377
+ PROVIDER_NOTES = {
378
+ "en": {
379
+ "anthropic": [
380
+ "Anthropic: uses Claude Code's native Anthropic connection.",
381
+ "Set an Anthropic API key here, or run `claude /login` separately to use your Claude account login.",
382
+ ],
383
+ "ollama": [
384
+ "Ollama: uses your local Ollama daemon; API key is normally not required.",
385
+ "To use :cloud models through local Ollama, sign in on the Ollama host with `ollama signin`.",
386
+ ],
387
+ "ollama-cloud": [
388
+ "Ollama Cloud: calls https://ollama.com/api directly; an Ollama API key is required.",
389
+ "Use this when you want cloud models without relying on the local Ollama daemon's sign-in state.",
390
+ ],
391
+ "vllm": [
392
+ "vLLM: enter the vLLM server root that implements the Anthropic Messages API.",
393
+ "Do not enter an OpenAI-only chat completions endpoint; use a compatibility proxy for those servers.",
394
+ ],
395
+ "self-hosted-nim": [
396
+ "Self-hosted NIM: enter the NIM server root that exposes Anthropic-compatible /v1/messages.",
397
+ "This native path does not use the NVIDIA hosted API Catalog proxy.",
398
+ ],
399
+ "nvidia-hosted": [
400
+ "NVIDIA hosted: uses NVIDIA API Catalog at https://integrate.api.nvidia.com/v1.",
401
+ "Hosted catalog models are OpenAI-style, so claude-any keeps a compatibility route for Claude Code.",
402
+ ],
403
+ },
404
+ "ko": {
405
+ "anthropic": [
406
+ "Anthropic: Claude Code의 기본 Anthropic 연결을 사용합니다.",
407
+ "여기에 Anthropic API key를 넣거나, 별도로 `claude /login`을 실행해 Claude 계정 로그인을 사용하세요.",
408
+ ],
409
+ "ollama": [
410
+ "Ollama: 로컬 Ollama 데몬을 사용합니다. 일반 로컬 모델은 API key가 필요 없습니다.",
411
+ "로컬 Ollama로 :cloud 모델을 쓰려면 Ollama가 실행되는 호스트에서 `ollama signin`이 필요합니다.",
412
+ ],
413
+ "ollama-cloud": [
414
+ "Ollama Cloud: https://ollama.com/api를 직접 호출합니다. Ollama API key가 필요합니다.",
415
+ "로컬 Ollama 데몬의 로그인 상태와 무관하게 클라우드 모델을 쓰고 싶을 때 사용합니다.",
416
+ ],
417
+ "vllm": [
418
+ "vLLM: Anthropic Messages API를 구현한 vLLM 서버 root를 넣으세요.",
419
+ "OpenAI 전용 chat completions endpoint를 넣지 마세요. 그런 서버는 호환 프록시가 필요합니다.",
420
+ ],
421
+ "self-hosted-nim": [
422
+ "Self-hosted NIM: Anthropic 호환 /v1/messages를 노출하는 NIM 서버 root를 넣으세요.",
423
+ "이 native 경로는 NVIDIA hosted API Catalog 프록시를 사용하지 않습니다.",
424
+ ],
425
+ "nvidia-hosted": [
426
+ "NVIDIA hosted: https://integrate.api.nvidia.com/v1 의 NVIDIA API Catalog를 사용합니다.",
427
+ "Hosted catalog 모델은 OpenAI 방식이므로 Claude Code에는 claude-any 호환 라우트를 유지합니다.",
428
+ ],
429
+ },
430
+ "ja": {
431
+ "anthropic": [
432
+ "Anthropic: Claude CodeのネイティブAnthropic接続を使います。",
433
+ "ここでAnthropic API keyを設定するか、別途`claude /login`を実行してClaudeアカウントログインを使ってください。",
434
+ ],
435
+ "ollama": [
436
+ "Ollama: ローカルのOllama daemonを使います。通常のローカルモデルではAPI keyは不要です。",
437
+ "ローカルOllama経由で:cloudモデルを使うには、Ollamaホストで`ollama signin`が必要です。",
438
+ ],
439
+ "ollama-cloud": [
440
+ "Ollama Cloud: https://ollama.com/api を直接呼び出します。Ollama API keyが必要です。",
441
+ "ローカルOllama daemonのサインイン状態に依存せずクラウドモデルを使う場合に選びます。",
442
+ ],
443
+ "vllm": [
444
+ "vLLM: Anthropic Messages APIを実装したvLLMサーバーrootを入力してください。",
445
+ "OpenAI専用chat completions endpointは入力しないでください。その場合は互換プロキシが必要です。",
446
+ ],
447
+ "self-hosted-nim": [
448
+ "Self-hosted NIM: Anthropic互換/v1/messagesを公開するNIMサーバーrootを入力してください。",
449
+ "このnative経路はNVIDIA hosted API Catalog proxyを使いません。",
450
+ ],
451
+ "nvidia-hosted": [
452
+ "NVIDIA hosted: https://integrate.api.nvidia.com/v1 のNVIDIA API Catalogを使います。",
453
+ "Hosted catalogモデルはOpenAI形式のため、Claude Codeにはclaude-any互換ルートを維持します。",
454
+ ],
455
+ },
456
+ "zh": {
457
+ "anthropic": [
458
+ "Anthropic: 使用Claude Code原生Anthropic连接。",
459
+ "可在此设置Anthropic API key,或另行运行`claude /login`使用Claude账号登录。",
460
+ ],
461
+ "ollama": [
462
+ "Ollama: 使用本地Ollama daemon;普通本地模型通常不需要API key。",
463
+ "若通过本地Ollama使用:cloud模型,需要在运行Ollama的主机上执行`ollama signin`。",
464
+ ],
465
+ "ollama-cloud": [
466
+ "Ollama Cloud: 直接调用 https://ollama.com/api;需要Ollama API key。",
467
+ "当你想不依赖本地Ollama daemon登录状态使用云端模型时选择它。",
468
+ ],
469
+ "vllm": [
470
+ "vLLM: 请输入实现Anthropic Messages API的vLLM服务器root。",
471
+ "不要输入仅OpenAI chat completions的端点;这类服务器需要兼容代理。",
472
+ ],
473
+ "self-hosted-nim": [
474
+ "Self-hosted NIM: 请输入暴露 Anthropic-compatible /v1/messages 的 NIM 服务器 root。",
475
+ "此 native 路径不使用 NVIDIA hosted API Catalog 代理。",
476
+ ],
477
+ "nvidia-hosted": [
478
+ "NVIDIA hosted: 使用 https://integrate.api.nvidia.com/v1 的 NVIDIA API Catalog。",
479
+ "Hosted catalog 模型是 OpenAI 风格,因此 Claude Code 仍使用 claude-any 兼容路由。",
480
+ ],
481
+ },
482
+ }
483
+
484
+
485
+ def init_colors() -> None:
486
+ pass
487
+
488
+
489
+ def cp(n: int) -> str:
490
+ if n == 1:
491
+ return _style(fg=255)
492
+ if n == 2:
493
+ return _style(fg=10)
494
+ if n == 3:
495
+ return _style(fg=11)
496
+ if n == 4:
497
+ return _style(fg=9)
498
+ if n == 5:
499
+ return _style(fg=255)
500
+ if n == 6:
501
+ return _style(fg=208)
502
+ return ""
503
+
504
+
505
+ def load_cfg() -> dict:
506
+ if CONFIG.exists():
507
+ try:
508
+ return json.loads(CONFIG.read_text())
509
+ except Exception:
510
+ pass
511
+ return {"current_provider": "nvidia-hosted", "providers": {}}
512
+
513
+
514
+ KNOWN_NVIDIA_MODEL_STATUS = {
515
+ "claude-nvidia-llama-3.1-nemotron-ultra-253b-v1": ("FAIL 404", "listed but not callable for this NVIDIA account"),
516
+ }
517
+ DEFAULT_ADVISOR_MODELS = ["deepseek-v4-pro", "claude-opus-4-6", "claude-sonnet-4-6", "glm-5.1"]
518
+ COMPAT_OK_TTL_SECONDS = 24 * 60 * 60
519
+ COMPAT_FAIL_TTL_SECONDS = 5 * 60
520
+
521
+
522
+ def cache_age_seconds(entry: dict) -> int | None:
523
+ try:
524
+ tested_at = int(entry.get("tested_at"))
525
+ except Exception:
526
+ return None
527
+ return max(0, int(time.time()) - tested_at)
528
+
529
+
530
+ def cache_entry_fresh(entry: dict) -> bool:
531
+ age = cache_age_seconds(entry)
532
+ if age is None:
533
+ return False
534
+ ttl = COMPAT_OK_TTL_SECONDS if entry.get("ok") else COMPAT_FAIL_TTL_SECONDS
535
+ return age <= ttl
536
+
537
+
538
+ def human_age(seconds: int | None) -> str:
539
+ if seconds is None:
540
+ return "unknown age"
541
+ if seconds < 60:
542
+ return f"{seconds}s ago"
543
+ if seconds < 3600:
544
+ return f"{seconds // 60}m ago"
545
+ return f"{seconds // 3600}h ago"
546
+
547
+
548
+ def compatibility_entry(provider: str, upstream: str, alias: str | None = None) -> dict | None:
549
+ cache = load_cfg().get("compatibility_cache", {})
550
+ if not isinstance(cache, dict):
551
+ return None
552
+ provider_cache = cache.get(provider, {})
553
+ if not isinstance(provider_cache, dict):
554
+ return None
555
+ for key in (alias, upstream):
556
+ if key and isinstance(provider_cache.get(key), dict):
557
+ entry = provider_cache[key]
558
+ return entry if cache_entry_fresh(entry) else None
559
+ return None
560
+
561
+
562
+ def compatibility_badge(provider: str, upstream: str, alias: str | None = None) -> str:
563
+ if provider == "nvidia-hosted":
564
+ known = KNOWN_NVIDIA_MODEL_STATUS.get(upstream) or (KNOWN_NVIDIA_MODEL_STATUS.get(alias or "") if alias else None)
565
+ if known:
566
+ return f"[{known[0]}]"
567
+ entry = compatibility_entry(provider, upstream, alias)
568
+ if not entry:
569
+ return "[untested]" if provider == "nvidia-hosted" else ""
570
+ if entry.get("ok"):
571
+ return "[OK]"
572
+ code = entry.get("code")
573
+ if code:
574
+ return f"[FAIL {code}]"
575
+ msg = str(entry.get("message") or "").lower()
576
+ if "timeout" in msg or "timed out" in msg:
577
+ return "[TIMEOUT]"
578
+ return "[FAIL]"
579
+
580
+
581
+ def current_compatibility_line(provider: str, pcfg: dict) -> str | None:
582
+ model = str(pcfg.get("current_model") or "")
583
+ if not model:
584
+ return "Compatibility: no model selected"
585
+ badge = compatibility_badge(provider, model, model)
586
+ if badge:
587
+ entry = compatibility_entry(provider, model, model)
588
+ if entry and not entry.get("ok"):
589
+ msg = str(entry.get("message") or entry.get("diagnosis") or "")[:90]
590
+ return f"Compatibility: {badge} {model} {msg}".strip()
591
+ known = KNOWN_NVIDIA_MODEL_STATUS.get(model)
592
+ if known:
593
+ return f"Compatibility: {badge} {model} - {known[1]}"
594
+ return f"Compatibility: {badge} {model}"
595
+ return None
596
+
597
+
598
+ def current_language() -> str:
599
+ lang = load_cfg().get("language", "en")
600
+ return lang if lang in LANGUAGES else "en"
601
+
602
+
603
+ def t(key: str) -> str:
604
+ lang = current_language()
605
+ return UI_TEXT.get(lang, UI_TEXT["en"]).get(key, UI_TEXT["en"].get(key, key))
606
+
607
+
608
+ def run_cmd(args: list[str]) -> tuple[int, str]:
609
+ p = subprocess.run(args, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
610
+ return p.returncode, p.stdout
611
+
612
+
613
+ def read_env_file(path: Path) -> dict[str, str]:
614
+ if not path.exists():
615
+ return {}
616
+ env: dict[str, str] = {}
617
+ for line in path.read_text(errors="ignore").splitlines():
618
+ line = line.strip()
619
+ if not line or line.startswith("#") or "=" not in line:
620
+ continue
621
+ k, v = line.split("=", 1)
622
+ env[k.strip()] = v.strip().strip("'\"")
623
+ return env
624
+
625
+
626
+ def meaningful_key(value: str | None) -> bool:
627
+ return bool(value and value not in ("dummy", "not-used", "ollama"))
628
+
629
+
630
+ def api_key_status(provider: str, pcfg: dict) -> str:
631
+ if provider == "nvidia-hosted":
632
+ return "API key: set (NVIDIA)" if meaningful_key(read_env_file(NCP_ENV).get("NVIDIA_API_KEY")) else "API key: missing (NVIDIA required)"
633
+ if provider == "anthropic":
634
+ return "API key: set (Anthropic)" if meaningful_key(pcfg.get("api_key")) else "API key: not set (use API key or Claude login)"
635
+ if provider == "ollama-cloud":
636
+ return "API key: set (Ollama Cloud)" if meaningful_key(pcfg.get("api_key")) else "API key: missing (Ollama Cloud required)"
637
+ key = pcfg.get("api_key")
638
+ if meaningful_key(key):
639
+ return "API key: set"
640
+ if provider == "ollama":
641
+ return "API key: not required for Ollama"
642
+ return "API key: optional or not configured"
643
+
644
+
645
+ def join_url(base: str, path: str) -> str:
646
+ base = base.rstrip("/")
647
+ if base.endswith("/v1") and path.startswith("/v1/"):
648
+ return base + path[3:]
649
+ return base + path
650
+
651
+
652
+ def probe_base_url(provider: str, pcfg: dict) -> str:
653
+ base = (pcfg.get("base_url") or "").rstrip("/")
654
+ if not base:
655
+ return "Base URL: missing"
656
+ if "your-" in base:
657
+ return f"Base URL: placeholder ({base})"
658
+ if provider == "nvidia-hosted":
659
+ return f"Base URL: NVIDIA hosted ({base}); local router http://127.0.0.1:8799 starts on launch"
660
+ path = "/api/tags" if provider in ("ollama", "ollama-cloud") else "/v1/models"
661
+ url = join_url(base, path)
662
+ headers = {}
663
+ key = pcfg.get("api_key")
664
+ if meaningful_key(key):
665
+ headers = {"x-api-key": key, "authorization": f"Bearer {key}"}
666
+ try:
667
+ req = urllib.request.Request(url, headers=headers)
668
+ with urllib.request.urlopen(req, timeout=2.5) as resp:
669
+ body = resp.read(131072).decode("utf-8", errors="ignore")
670
+ count = ""
671
+ try:
672
+ data = json.loads(body)
673
+ if provider in ("ollama", "ollama-cloud"):
674
+ count = f", {len(data.get('models', []))} models"
675
+ elif isinstance(data.get("data"), list):
676
+ count = f", {len(data['data'])} models"
677
+ except Exception:
678
+ pass
679
+ return f"Base URL: model list reachable ({path}{count})"
680
+ except urllib.error.HTTPError as exc:
681
+ if exc.code in (401, 403):
682
+ return f"Base URL: model list reachable, auth rejected ({exc.code})"
683
+ return f"Base URL: HTTP {exc.code}"
684
+ except Exception as exc:
685
+ if provider == "nvidia-hosted" and "127.0.0.1" in base:
686
+ return "Base URL: proxy down; starts on launch"
687
+ return f"Base URL: unreachable ({type(exc).__name__})"
688
+
689
+
690
+ def preflight_checks() -> list[str]:
691
+ provider, pcfg = current_provider_cfg()
692
+ lang = current_language()
693
+ notes = PROVIDER_NOTES.get(lang, PROVIDER_NOTES["en"]).get(provider, [])
694
+ lines = [
695
+ probe_base_url(provider, pcfg),
696
+ api_key_status(provider, pcfg),
697
+ *notes,
698
+ ]
699
+ compat = current_compatibility_line(provider, pcfg)
700
+ if compat:
701
+ lines.append(compat)
702
+ return lines
703
+
704
+
705
+ def provider_preview_checks(provider: str) -> list[str]:
706
+ cfg = load_cfg()
707
+ pcfg = cfg.get("providers", {}).get(provider, {})
708
+ lang = current_language()
709
+ notes = PROVIDER_NOTES.get(lang, PROVIDER_NOTES["en"]).get(provider, [])
710
+ return [
711
+ f"Base URL: {pcfg.get('base_url') or 'unset'}",
712
+ api_key_status(provider, pcfg),
713
+ *notes,
714
+ ]
715
+
716
+
717
+ def selected_provider_value(sub: dict | None) -> str | None:
718
+ if not sub or sub.get("kind") != "provider":
719
+ return None
720
+ try:
721
+ return str(sub["items"][sub["idx"]]["value"])
722
+ except Exception:
723
+ return None
724
+
725
+
726
+ def status_text() -> list[str]:
727
+ _, out = run_cmd([CTL, "status"])
728
+ return out.strip().splitlines() if out else ["status unavailable"]
729
+
730
+
731
+ def current_provider() -> str:
732
+ return load_cfg().get("current_provider", "nvidia-hosted")
733
+
734
+
735
+ def current_provider_cfg() -> tuple[str, dict]:
736
+ cfg = load_cfg()
737
+ provider = cfg.get("current_provider", "nvidia-hosted")
738
+ return provider, cfg.get("providers", {}).get(provider, {})
739
+
740
+
741
+ def is_ollama_provider(provider: str) -> bool:
742
+ return provider in ("ollama", "ollama-cloud")
743
+
744
+
745
+ def has_provider_options(provider: str) -> bool:
746
+ return provider in ("vllm", "nvidia-hosted", "self-hosted-nim", "ollama", "ollama-cloud")
747
+
748
+
749
+ def ollama_ctx_text(pcfg: dict) -> str:
750
+ value = pcfg.get("num_ctx", "auto")
751
+ if str(value).lower() == "auto":
752
+ return f"auto {pcfg.get('num_ctx_min', 32768)}-{pcfg.get('num_ctx_max', 131072)}"
753
+ return str(value)
754
+
755
+
756
+ def ollama_options_summary(pcfg: dict) -> str:
757
+ parts = [
758
+ f"ctx {ollama_ctx_text(pcfg)}",
759
+ f"keep {pcfg.get('keep_alive', 'default')}",
760
+ f"think {str(bool(pcfg.get('think', False))).lower()}",
761
+ f"timeout {pcfg.get('request_timeout_ms', 'default')}ms",
762
+ f"rpm {pcfg.get('rate_limit_rpm', 40)}",
763
+ f"stream {'on' if bool(pcfg.get('stream_enabled', True)) else 'off'}",
764
+ ]
765
+ if bool(pcfg.get("rate_limit_status", True)):
766
+ parts.append("rpm_status on")
767
+ if bool(pcfg.get("stream_word_chunking", False)):
768
+ parts.append("word_chunk on")
769
+ opts = pcfg.get("ollama_options") or {}
770
+ if isinstance(opts, dict) and opts:
771
+ extra = ", ".join(f"{k}={v}" for k, v in sorted(opts.items())[:3])
772
+ parts.append(extra)
773
+ return "; ".join(parts)
774
+
775
+
776
+ def provider_options_summary(provider: str, pcfg: dict) -> str:
777
+ timeout = pcfg.get("request_timeout_ms", "default")
778
+ timeout_text = f"{timeout}ms" if timeout != "default" else "default"
779
+ parts = [
780
+ f"max {pcfg.get('max_output_tokens', 'default')}",
781
+ f"timeout {timeout_text}",
782
+ ]
783
+ if provider in ("nvidia-hosted", "self-hosted-nim", "ollama", "ollama-cloud"):
784
+ parts.append(f"rpm {pcfg.get('rate_limit_rpm', 40)}")
785
+ if bool(pcfg.get("rate_limit_status", True)):
786
+ parts.append("rpm_status on")
787
+ if provider in ("vllm", "self-hosted-nim"):
788
+ parts.insert(0, f"ctx {pcfg.get('context_window', 'default')}")
789
+ parts.insert(1, f"reserve {pcfg.get('context_reserve_tokens', 'default')}")
790
+ parts.append(f"native {str(bool(pcfg.get('native_compat', True))).lower()}")
791
+ if provider in ("vllm", "nvidia-hosted", "self-hosted-nim"):
792
+ parts.append(f"stream {'on' if bool(pcfg.get('stream_enabled', True)) else 'off'}")
793
+ if bool(pcfg.get("stream_word_chunking", False)):
794
+ parts.append("word_chunk on")
795
+ return "; ".join(parts)
796
+
797
+
798
+ def main_items() -> list[tuple[str, str]]:
799
+ provider, pcfg = current_provider_cfg()
800
+ lang = current_language()
801
+ model = pcfg.get("current_model", "unset")
802
+ advisor_model = pcfg.get("advisor_model") or "off"
803
+ base = pcfg.get("base_url", "unset")
804
+ rows: list[tuple[str, str]] = []
805
+
806
+ def add(key: str, label: str) -> None:
807
+ rows.append((key, f"{len(rows)}. {label}"))
808
+
809
+ add("language", f"{t('language')} [{LANGUAGES.get(lang, lang)}]")
810
+ add("provider", f"{t('provider')} [{provider}]")
811
+ add("api-key", t("api_key"))
812
+ add("base-url", f"{t('base_url')} [{base}]")
813
+ add("model", f"{t('model')} [{model}]")
814
+ add("advisor-model", f"{t('advisor_model')} [{advisor_model}]")
815
+ if is_ollama_provider(provider):
816
+ add("ollama-options", f"{t('ollama_options')} [{ollama_options_summary(pcfg)}]")
817
+ if has_provider_options(provider):
818
+ add("provider-options", f"{t('provider_options')} [{provider_options_summary(provider, pcfg)}]")
819
+ add("test", t("test"))
820
+ add("launch", t("launch"))
821
+ rows.append(("quit", t("quit")))
822
+ return rows
823
+
824
+
825
+ def settings_ready_except_api_key() -> bool:
826
+ provider, pcfg = current_provider_cfg()
827
+ base = pcfg.get("base_url", "")
828
+ model = pcfg.get("current_model", "")
829
+ return bool(provider and base and model and "your-" not in base)
830
+
831
+ def default_base_url(provider: str) -> str:
832
+ return {
833
+ "anthropic": "https://api.anthropic.com",
834
+ "ollama": "http://your-ollama:11434",
835
+ "ollama-cloud": "https://ollama.com",
836
+ "vllm": "http://your-vllm:8000",
837
+ "nvidia-hosted": "https://integrate.api.nvidia.com/v1",
838
+ "self-hosted-nim": "http://your-nim:8000",
839
+ }.get(provider, "http://localhost:8000")
840
+
841
+
842
+ def help_for_action(action: str, sub_kind: str | None = None) -> str:
843
+ if sub_kind == "language":
844
+ return t("select_language")
845
+ if sub_kind == "provider":
846
+ return t("select_provider")
847
+ if sub_kind == "model":
848
+ return t("select_model")
849
+ if sub_kind == "advisor-model":
850
+ return t("select_advisor_model")
851
+ if sub_kind == "ollama-options":
852
+ return t("select_ollama_options")
853
+ if sub_kind == "provider-options":
854
+ return t("select_provider_options")
855
+ if sub_kind == "test-result":
856
+ return t("test_result")
857
+ return {
858
+ "launch": t("help_launch"),
859
+ "test": t("help_test"),
860
+ "language": t("help_language"),
861
+ "provider": t("help_provider"),
862
+ "model": t("help_model"),
863
+ "advisor-model": t("help_advisor_model"),
864
+ "ollama-options": t("help_ollama_options"),
865
+ "provider-options": t("help_provider_options"),
866
+ "api-key": t("help_api_key"),
867
+ "base-url": t("help_base_url"),
868
+ "quit": t("help_quit"),
869
+ }.get(action, "Enter selects this action.")
870
+
871
+
872
+ def get_models_for_current_provider() -> tuple[list[tuple[str, str]], str]:
873
+ code, out = run_cmd([CTL, "models"])
874
+ models: list[tuple[str, str]] = []
875
+ for line in out.splitlines()[1:]:
876
+ if "\t" not in line:
877
+ continue
878
+ alias, upstream = line.split("\t", 1)
879
+ if alias.strip() and upstream.strip():
880
+ models.append((upstream.strip(), alias.strip()))
881
+ return models, out
882
+
883
+
884
+ def build_provider_submenu() -> dict:
885
+ cfg = load_cfg()
886
+ current = cfg.get("current_provider", "nvidia-hosted")
887
+ items = []
888
+ idx = 0
889
+ for i, (key, label) in enumerate(PROVIDERS):
890
+ if key == current:
891
+ idx = i
892
+ base = cfg.get("providers", {}).get(key, {}).get("base_url", "")
893
+ items.append({"value": key, "label": f"{label:<16} {key:<15} {base}", "current": key == current})
894
+ return {"kind": "provider", "parent": "provider", "items": items, "idx": idx, "offset": 0}
895
+
896
+
897
+ def build_language_submenu() -> dict:
898
+ current = current_language()
899
+ items = []
900
+ idx = 0
901
+ for i, (code, label) in enumerate(LANGUAGES.items()):
902
+ if code == current:
903
+ idx = i
904
+ items.append({"value": code, "label": f"{code:<2} {label}", "current": code == current})
905
+ return {"kind": "language", "parent": "language", "items": items, "idx": idx, "offset": 0}
906
+
907
+
908
+ def build_api_key_submenu() -> dict:
909
+ current = current_provider()
910
+ items = []
911
+ idx = 0
912
+ for i, (key, label) in enumerate(PROVIDERS):
913
+ if key == current:
914
+ idx = i
915
+ items.append({"value": key, "label": f"{label:<16} {key:<15}", "current": key == current})
916
+ return {"kind": "api-key", "parent": "api-key", "items": items, "idx": idx, "offset": 0}
917
+
918
+ def build_model_submenu() -> tuple[dict | None, list[str]]:
919
+ models, raw = get_models_for_current_provider()
920
+ if not models:
921
+ lines = raw.strip().splitlines() or ["No models found. Use custom input."]
922
+ return None, lines[:2]
923
+ provider, pcfg = current_provider_cfg()
924
+ current = pcfg.get("current_model", "")
925
+ items = []
926
+ idx = 0
927
+ for i, (upstream, alias) in enumerate(models):
928
+ is_current = upstream == current or alias == current
929
+ if is_current:
930
+ idx = i
931
+ badge = compatibility_badge(provider, upstream, alias)
932
+ description = ""
933
+ known = KNOWN_NVIDIA_MODEL_STATUS.get(upstream) or KNOWN_NVIDIA_MODEL_STATUS.get(alias)
934
+ entry = compatibility_entry(provider, upstream, alias)
935
+ if known:
936
+ description = known[1]
937
+ elif entry:
938
+ state = "OK" if entry.get("ok") else "failed"
939
+ detail = entry.get("diagnosis") or entry.get("message") or ""
940
+ description = f"Last compatibility test: {state} ({human_age(cache_age_seconds(entry))}). {detail}".strip()
941
+ items.append({
942
+ "value": upstream,
943
+ "label": f"{badge:<11} {upstream:<58} {alias}",
944
+ "current": is_current,
945
+ "description": description,
946
+ })
947
+ items.append({"value": "__custom__", "label": "Custom model id...", "current": False})
948
+ return {"kind": "model", "parent": "model", "items": items, "idx": idx, "offset": 0}, []
949
+
950
+
951
+ def build_advisor_model_submenu() -> dict:
952
+ provider, pcfg = current_provider_cfg()
953
+ current = pcfg.get("advisor_model") or ""
954
+ values: list[str] = []
955
+ for mid in DEFAULT_ADVISOR_MODELS + [upstream for upstream, _ in get_models_for_current_provider()[0]]:
956
+ if mid and mid not in values:
957
+ values.append(mid)
958
+ items = [{"value": "", "label": "Disable Advisor Model", "current": not current, "description": "Disable claude-any advisor routing."}]
959
+ idx = 0
960
+ for i, mid in enumerate(values, 1):
961
+ is_current = mid == current
962
+ if is_current:
963
+ idx = i
964
+ desc = "Recommended long-context advisor model." if mid == "deepseek-v4-pro" else ""
965
+ items.append({"value": mid, "label": mid, "current": is_current, "description": desc})
966
+ items.append({"value": "__custom__", "label": "Custom advisor model id...", "current": False})
967
+ return {"kind": "advisor-model", "parent": "advisor-model", "items": items, "idx": idx, "offset": 0}
968
+
969
+
970
+ OLLAMA_OPTION_DESCRIPTIONS = {
971
+ "__edit_num_ctx__": {
972
+ "en": "Edit Ollama num_ctx. This is the context window sent to Ollama; it cannot exceed the server/model limit.",
973
+ "ko": "Ollama num_ctx를 수정합니다. 한 번에 볼 컨텍스트 창이며 서버/모델 한계를 넘게 설정해도 실제 한계는 늘지 않습니다.",
974
+ "ja": "Ollamaのnum_ctxを編集します。Ollamaへ送るコンテキスト幅で、サーバー/モデル上限は超えられません。",
975
+ "zh": "编辑 Ollama num_ctx。这是发送给 Ollama 的上下文窗口,不能超过服务器/模型上限。",
976
+ },
977
+ "__edit_min__": {
978
+ "en": "Edit the minimum context used when num_ctx is auto. Small requests will not go below this value.",
979
+ "ko": "num_ctx=auto일 때 사용할 최소 컨텍스트입니다. 작은 요청도 이 값보다 작게 내려가지 않습니다.",
980
+ "ja": "num_ctx=auto時の最小コンテキストです。小さな要求でもこの値未満にはなりません。",
981
+ "zh": "编辑 num_ctx=auto 时的最小上下文。小请求也不会低于此值。",
982
+ },
983
+ "__edit_max__": {
984
+ "en": "Edit the maximum context used when num_ctx is auto. Keep it at or below the real server context limit.",
985
+ "ko": "num_ctx=auto일 때 사용할 최대 컨텍스트입니다. 실제 서버 컨텍스트 한계 이하로 두는 것이 맞습니다.",
986
+ "ja": "num_ctx=auto時の最大コンテキストです。実際のサーバー上限以下にしてください。",
987
+ "zh": "编辑 num_ctx=auto 时的最大上下文。应不高于真实服务器上下文上限。",
988
+ },
989
+ "__edit_keep_alive__": {
990
+ "en": "Edit how long Ollama keeps the model loaded after a request. Longer values reduce reloads but hold memory.",
991
+ "ko": "요청 후 Ollama가 모델을 메모리에 유지하는 시간입니다. 길수록 재로딩은 줄지만 메모리를 더 오래 잡습니다.",
992
+ "ja": "要求後にOllamaがモデルを保持する時間です。長いほど再読み込みは減りますがメモリを保持します。",
993
+ "zh": "编辑请求后 Ollama 保持模型加载的时间。更长可减少重载,但会占用内存。",
994
+ },
995
+ "__edit_temperature__": {
996
+ "en": "Edit sampling temperature. Higher is more varied; lower is more deterministic.",
997
+ "ko": "샘플링 temperature입니다. 높을수록 답변이 다양해지고, 낮을수록 결정적으로 동작합니다.",
998
+ "ja": "サンプリングtemperatureです。高いほど多様、低いほど決定的になります。",
999
+ "zh": "编辑采样 temperature。越高越多样,越低越确定。",
1000
+ },
1001
+ "__edit_top_p__": {
1002
+ "en": "Edit nucleus sampling top_p. Lower values restrict token choices; 0.8 is a moderate default.",
1003
+ "ko": "누적 확률 top_p입니다. 낮을수록 후보 토큰을 좁히며, 0.8은 중간 정도의 기본값입니다.",
1004
+ "ja": "nucleus samplingのtop_pです。低いほど候補を絞り、0.8は中程度の既定値です。",
1005
+ "zh": "编辑 nucleus sampling top_p。越低候选越窄;0.8 是中等默认值。",
1006
+ },
1007
+ "__edit_max_tokens__": {
1008
+ "en": "Edit max output tokens (Ollama num_predict). Input plus reserved output must fit in the context window.",
1009
+ "ko": "최대 출력 토큰(Ollama num_predict)입니다. 입력과 예약 출력이 컨텍스트 창 안에 같이 들어가야 합니다.",
1010
+ "ja": "最大出力トークン(Ollama num_predict)です。入力と予約出力は同じコンテキスト内に収まる必要があります。",
1011
+ "zh": "编辑最大输出 token(Ollama num_predict)。输入加预留输出必须放进上下文窗口。",
1012
+ },
1013
+ "__edit_timeout__": {
1014
+ "en": "Edit upstream wait timeout in milliseconds. 300000 means 5 minutes.",
1015
+ "ko": "업스트림 응답 대기 시간(ms)입니다. 300000은 5분입니다.",
1016
+ "ja": "上流応答待ちタイムアウト(ms)です。300000は5分です。",
1017
+ "zh": "编辑上游响应等待超时(毫秒)。300000 表示 5 分钟。",
1018
+ },
1019
+ "__custom__": {
1020
+ "en": "Enter any Ollama option as KEY=VALUE, or unset:KEY to remove it.",
1021
+ "ko": "임의의 Ollama 옵션을 KEY=VALUE로 입력합니다. 삭제하려면 unset:KEY를 입력합니다.",
1022
+ "ja": "任意のOllamaオプションをKEY=VALUEで入力します。削除はunset:KEYです。",
1023
+ "zh": "用 KEY=VALUE 输入任意 Ollama 选项;用 unset:KEY 删除。",
1024
+ },
1025
+ }
1026
+
1027
+
1028
+ def ollama_option_description(value: str) -> str:
1029
+ lang = current_language()
1030
+ if value in OLLAMA_OPTION_DESCRIPTIONS:
1031
+ entry = OLLAMA_OPTION_DESCRIPTIONS[value]
1032
+ return entry.get(lang, entry["en"])
1033
+ if value.startswith("num_ctx=auto"):
1034
+ return {
1035
+ "en": "Use automatic context sizing based on request size, bounded by the configured min/max.",
1036
+ "ko": "요청 크기에 따라 컨텍스트를 자동 선택합니다. 설정된 최소/최대 범위 안에서만 움직입니다.",
1037
+ "ja": "要求サイズに応じてコンテキストを自動選択します。設定した最小/最大範囲内です。",
1038
+ "zh": "根据请求大小自动选择上下文,并限制在设置的最小/最大范围内。",
1039
+ }.get(lang, "Use automatic context sizing based on request size, bounded by the configured min/max.")
1040
+ if value.startswith("num_ctx="):
1041
+ return {
1042
+ "en": "Use a fixed context window for every Ollama request. Larger values use more memory and may be slower.",
1043
+ "ko": "모든 Ollama 요청에 고정 컨텍스트를 사용합니다. 값이 클수록 메모리를 더 쓰고 느려질 수 있습니다.",
1044
+ "ja": "全てのOllama要求で固定コンテキストを使います。大きいほどメモリ使用量と遅延が増えます。",
1045
+ "zh": "为每个 Ollama 请求使用固定上下文。值越大内存占用越高,也可能更慢。",
1046
+ }.get(lang, "Use a fixed context window for every Ollama request.")
1047
+ if value.startswith("min="):
1048
+ return {
1049
+ "en": "Set the lower bound for automatic num_ctx selection.",
1050
+ "ko": "자동 num_ctx 선택의 하한값을 설정합니다.",
1051
+ "ja": "自動num_ctx選択の下限を設定します。",
1052
+ "zh": "设置自动 num_ctx 选择的下限。",
1053
+ }.get(lang, "Set the lower bound for automatic num_ctx selection.")
1054
+ if value.startswith("max="):
1055
+ return {
1056
+ "en": "Set the upper bound for automatic num_ctx selection.",
1057
+ "ko": "자동 num_ctx 선택의 상한값을 설정합니다.",
1058
+ "ja": "自動num_ctx選択の上限を設定します。",
1059
+ "zh": "设置自动 num_ctx 选择的上限。",
1060
+ }.get(lang, "Set the upper bound for automatic num_ctx selection.")
1061
+ if value.startswith("keep_alive="):
1062
+ return OLLAMA_OPTION_DESCRIPTIONS["__edit_keep_alive__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__edit_keep_alive__"]["en"])
1063
+ if value.startswith("think="):
1064
+ return {
1065
+ "en": "Toggle Ollama thinking output support. Claude Code may not display provider-specific thinking cleanly.",
1066
+ "ko": "Ollama thinking 출력 요청 여부입니다. Claude Code가 provider별 thinking을 항상 깔끔하게 표시하지는 않습니다.",
1067
+ "ja": "Ollama thinking出力の要求を切り替えます。Claude Code側で常に綺麗に表示されるとは限りません。",
1068
+ "zh": "切换 Ollama thinking 输出请求。Claude Code 不一定能完整显示各提供商的 thinking。",
1069
+ }.get(lang, "Toggle Ollama thinking output support.")
1070
+ if value.startswith("stream="):
1071
+ return {
1072
+ "en": "Toggle streaming. When off, the router waits for the full upstream response before sending it to Claude Code. Use this when streaming fragmentation causes tool-call or JSON parse errors.",
1073
+ "ko": "스트리밍을 켜고/끕니다. off 면 업스트림 응답이 전부 모일 때까지 기다렸다가 Claude Code에 한 번에 보냅니다. 스트리밍 단편화로 tool-call/JSON 파싱이 실패할 때 사용합니다.",
1074
+ "ja": "ストリーミングを切り替えます。offにすると、ルーターは上流応答が揃ってからClaude Codeへ一括送信します。ストリーミング断片化でtool-call/JSON解析が失敗する時に使用します。",
1075
+ "zh": "切换流式输出。off 时路由器会等待上游完整响应再发送给 Claude Code。流式分片导致 tool-call/JSON 解析失败时使用。",
1076
+ }.get(lang, "Toggle streaming. When off, the router waits for the full upstream response.")
1077
+ if value.startswith("stream_word_chunking="):
1078
+ return {
1079
+ "en": "Buffer text tokens until a whitespace/word boundary before sending the SSE delta. Reduces SSE event volume and can mitigate tool/JSON fragmentation issues. Tool call inputs are not affected.",
1080
+ "ko": "토큰을 공백 단위(단어 경계)까지 버퍼링해서 SSE delta로 전송합니다. SSE 이벤트 빈도를 줄이고 tool/JSON 단편화 문제를 완화합니다. tool call 입력은 영향을 받지 않습니다.",
1081
+ "ja": "テキストトークンを空白/単語境界までバッファしてSSE deltaを送信します。SSEイベント量を減らし、tool/JSON断片化を緩和できます。tool call入力には影響しません。",
1082
+ "zh": "在空白/单词边界处批量发送 SSE 文本 delta。降低 SSE 事件频率并缓解 tool/JSON 分片问题。工具调用输入不受影响。",
1083
+ }.get(lang, "Buffer text tokens until a word boundary before sending the SSE delta.")
1084
+ if value.startswith("temperature="):
1085
+ return OLLAMA_OPTION_DESCRIPTIONS["__edit_temperature__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__edit_temperature__"]["en"])
1086
+ if value.startswith("top_p="):
1087
+ return OLLAMA_OPTION_DESCRIPTIONS["__edit_top_p__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__edit_top_p__"]["en"])
1088
+ if value.startswith(("max_tokens=", "num_predict=")):
1089
+ return OLLAMA_OPTION_DESCRIPTIONS["__edit_max_tokens__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__edit_max_tokens__"]["en"])
1090
+ if value.startswith("timeout="):
1091
+ return OLLAMA_OPTION_DESCRIPTIONS["__edit_timeout__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__edit_timeout__"]["en"])
1092
+ return OLLAMA_OPTION_DESCRIPTIONS["__custom__"].get(lang, OLLAMA_OPTION_DESCRIPTIONS["__custom__"]["en"])
1093
+
1094
+
1095
+ def build_ollama_options_submenu() -> dict:
1096
+ provider, pcfg = current_provider_cfg()
1097
+ ctx = pcfg.get("num_ctx", "auto")
1098
+ keep = str(pcfg.get("keep_alive", "5m"))
1099
+ think = bool(pcfg.get("think", False))
1100
+ stream_on = bool(pcfg.get("stream_enabled", True))
1101
+ word_chunk_on = bool(pcfg.get("stream_word_chunking", False))
1102
+ options = pcfg.get("ollama_options") or {}
1103
+ if not isinstance(options, dict):
1104
+ options = {}
1105
+ choices = [
1106
+ ("__edit_num_ctx__", f"Edit num_ctx [{ollama_ctx_text(pcfg)}]", False),
1107
+ ("__edit_min__", f"Edit auto minimum [{pcfg.get('num_ctx_min', 32768)}]", False),
1108
+ ("__edit_max__", f"Edit auto maximum [{pcfg.get('num_ctx_max', 131072)}]", False),
1109
+ ("__edit_keep_alive__", f"Edit keep_alive [{keep}]", False),
1110
+ ("__edit_temperature__", f"Edit temperature [{options.get('temperature', 'unset')}]", False),
1111
+ ("__edit_top_p__", f"Edit top_p [{options.get('top_p', 'unset')}]", False),
1112
+ ("__edit_max_tokens__", f"Edit max_tokens/num_predict [{options.get('num_predict', 'unset')}]", False),
1113
+ ("__edit_timeout__", f"Edit timeout ms [{pcfg.get('request_timeout_ms', 'default')}]", False),
1114
+ ("__custom__", "Custom KEY=VALUE or unset:KEY...", False),
1115
+ ("num_ctx=auto", f"num_ctx auto ({pcfg.get('num_ctx_min', 32768)}-{pcfg.get('num_ctx_max', 131072)})", str(ctx).lower() == "auto"),
1116
+ ("num_ctx=32768", "num_ctx 32768", ctx == 32768),
1117
+ ("num_ctx=65536", "num_ctx 65536", ctx == 65536),
1118
+ ("num_ctx=131072", "num_ctx 131072", ctx == 131072),
1119
+ ("min=32768", "auto minimum 32768", pcfg.get("num_ctx_min", 32768) == 32768),
1120
+ ("max=131072", "auto maximum 131072", pcfg.get("num_ctx_max", 131072) == 131072),
1121
+ ("keep_alive=5m", "keep_alive 5m", keep == "5m"),
1122
+ ("keep_alive=30m", "keep_alive 30m", keep == "30m"),
1123
+ ("think=false", "think false", not think),
1124
+ ("think=true", "think true", think),
1125
+ ("stream=true", "stream on", stream_on),
1126
+ ("stream=false", "stream off (buffer full response)", not stream_on),
1127
+ ("stream_word_chunking=true", "stream_word_chunking on (flush at word boundary)", word_chunk_on),
1128
+ ("stream_word_chunking=false", "stream_word_chunking off (token-by-token)", not word_chunk_on),
1129
+ ("temperature=0.7", f"temperature 0.7 (current {options.get('temperature', 'unset')})", options.get("temperature") == 0.7),
1130
+ ("top_p=0.8", f"top_p 0.8 (current {options.get('top_p', 'unset')})", options.get("top_p") == 0.8),
1131
+ ("max_tokens=4096", f"max_tokens 4096 (current {options.get('num_predict', 'unset')})", options.get("num_predict") == 4096),
1132
1132
  ("timeout=300000", f"timeout 300000ms (current {pcfg.get('request_timeout_ms', 'default')})", pcfg.get("request_timeout_ms") == 300000),
1133
- ]
1134
- items = [
1135
- {"value": value, "label": label, "current": current, "description": ollama_option_description(value)}
1136
- for value, label, current in choices
1137
- ]
1138
- return {"kind": "ollama-options", "parent": "ollama-options", "items": items, "idx": 0, "offset": 0}
1139
-
1140
-
1141
- PROVIDER_OPTION_DESCRIPTIONS = {
1142
- "__edit_context_window__": {
1143
- "en": "Edit the context window value used by claude-any tests and router caps. Native mode cannot raise the real server limit.",
1144
- "ko": "claude-any 테스트와 라우터 제한 계산에 쓰는 컨텍스트 값입니다. native 모드에서는 실제 서버 한계를 늘리지 못합니다.",
1145
- "ja": "claude-anyのテストとルーター制限計算に使うコンテキスト値です。nativeモードでは実サーバー上限は増やせません。",
1146
- "zh": "编辑 claude-any 测试和路由器限制计算使用的上下文值。native 模式不能提高真实服务器上限。",
1147
- },
1148
- "__edit_reserve__": {
1149
- "en": "Reserve input-side room when claude-any router caps max_tokens. This is ignored by direct native Claude Code requests.",
1150
- "ko": "claude-any 라우터가 max_tokens를 줄일 때 입력 쪽 여유로 남기는 토큰입니다. direct native 요청에는 적용되지 않습니다.",
1151
- "ja": "claude-anyルーターがmax_tokensを制限する時に入力側へ残す余裕です。direct native要求では無視されます。",
1152
- "zh": "claude-any 路由器限制 max_tokens 时预留给输入侧的空间。direct native 请求会忽略它。",
1153
- },
1154
- "__edit_max_output__": {
1155
- "en": "Set Claude Code's CLAUDE_CODE_MAX_OUTPUT_TOKENS and the claude-any router cap. 4096 is the default.",
1156
- "ko": "Claude Code의 CLAUDE_CODE_MAX_OUTPUT_TOKENS와 claude-any 라우터 출력 제한입니다. 기본값은 4096입니다.",
1157
- "ja": "Claude CodeのCLAUDE_CODE_MAX_OUTPUT_TOKENSとclaude-anyルーターの出力制限です。既定値は4096です。",
1158
- "zh": "设置 Claude Code 的 CLAUDE_CODE_MAX_OUTPUT_TOKENS 和 claude-any 路由器输出上限。默认 4096。",
1159
- },
1160
- "__edit_timeout__": {
1161
- "en": "Edit claude-any compatibility-test/router upstream timeout in milliseconds. Claude Code native networking has its own timeout behavior.",
1162
- "ko": "claude-any 호환성 테스트/라우터의 업스트림 대기 시간(ms)입니다. Claude Code native 네트워크 대기는 자체 동작을 따릅니다.",
1163
- "ja": "claude-any互換性テスト/ルーターの上流タイムアウト(ms)です。Claude Code native通信は独自の挙動です。",
1164
- "zh": "编辑 claude-any 兼容性测试/路由器上游超时(毫秒)。Claude Code native 网络有自身超时行为。",
1165
- },
1166
- "__edit_native__": {
1167
- "en": "Toggle direct Anthropic Messages compatibility. Use it for vLLM/self-hosted NIM servers that implement /v1/messages.",
1168
- "ko": "Anthropic Messages 호환 엔드포인트에 직접 연결할지 정합니다. /v1/messages를 구현한 vLLM/self-hosted NIM에서 사용합니다.",
1169
- "ja": "Anthropic Messages互換エンドポイントへ直接接続するかを切り替えます。/v1/messages対応のvLLM/self-hosted NIMで使います。",
1170
- "zh": "切换是否直接连接 Anthropic Messages 兼容端点。用于实现 /v1/messages 的 vLLM/self-hosted NIM。",
1171
- },
1172
- "__custom__": {
1173
- "en": "Enter provider option as KEY=VALUE, or unset:KEY to remove it.",
1174
- "ko": "프로바이더 옵션을 KEY=VALUE로 입력합니다. 삭제하려면 unset:KEY를 입력합니다.",
1175
- "ja": "プロバイダーオプションをKEY=VALUEで入力します。削除はunset:KEYです。",
1176
- "zh": "用 KEY=VALUE 输入提供商选项;用 unset:KEY 删除。",
1177
- },
1178
- }
1179
-
1180
-
1181
- def provider_option_description(value: str) -> str:
1182
- lang = current_language()
1183
- if value in PROVIDER_OPTION_DESCRIPTIONS:
1184
- entry = PROVIDER_OPTION_DESCRIPTIONS[value]
1185
- return entry.get(lang, entry["en"])
1186
- if value.startswith("context_window="):
1187
- return PROVIDER_OPTION_DESCRIPTIONS["__edit_context_window__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__edit_context_window__"]["en"])
1188
- if value.startswith("context_reserve_tokens="):
1189
- return PROVIDER_OPTION_DESCRIPTIONS["__edit_reserve__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__edit_reserve__"]["en"])
1190
- if value.startswith("max_output_tokens="):
1191
- return PROVIDER_OPTION_DESCRIPTIONS["__edit_max_output__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__edit_max_output__"]["en"])
1192
- if value.startswith(("timeout=", "request_timeout_ms=")):
1193
- return PROVIDER_OPTION_DESCRIPTIONS["__edit_timeout__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__edit_timeout__"]["en"])
1194
- if value.startswith(("rate_limit=", "rate_limit_rpm=", "rpm=")) or value == "__edit_rate_limit__":
1195
- return {
1196
- "en": "Router-side upstream requests per minute. NIM hosted defaults to 40 RPM; 0 disables waiting.",
1197
- "ko": "라우터가 업스트림 요청 수를 분당 제한합니다. NIM hosted 기본값은 40 RPM이고, 0이면 대기하지 않습니다.",
1198
- "ja": "ルーター側の上流リクエスト数/分。NIM hosted は既定 40 RPM、0 で待機なし。",
1199
- "zh": "路由器侧上游每分钟请求限制。NIM hosted 默认 40 RPM;0 表示不等待。",
1200
- }.get(lang, "Router-side upstream requests per minute.")
1201
- if value.startswith(("rate_limit_status=", "rpm_status=")):
1202
- return {
1203
- "en": "Show optional colored RPM usage status in Claude responses.",
1204
- "ko": "Claude 응답에 RPM 사용량 상태를 색상 텍스트로 표시합니다.",
1205
- "ja": "Claude応答にRPM使用量状態を色付きテキストで表示します。",
1206
- "zh": "在 Claude 响应中显示彩色 RPM 使用量状态。",
1207
- }.get(lang, "Show optional colored RPM usage status.")
1208
- if value.startswith(("native=", "native_compat=")):
1209
- return PROVIDER_OPTION_DESCRIPTIONS["__edit_native__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__edit_native__"]["en"])
1210
- if value.startswith("stream="):
1211
- return {
1212
- "en": "Toggle streaming. When off, the router forces stream:false upstream and returns the full response to Claude Code. Use this if streaming fragmentation causes tool-call or JSON parse errors.",
1213
- "ko": "스트리밍 on/off. off 면 업스트림에 stream:false 를 강제하고 응답 전체를 Claude Code에 보냅니다. 스트리밍 단편화로 tool-call/JSON 파싱이 실패할 때 사용합니다.",
1214
- "ja": "ストリーミングを切り替えます。offにすると上流にstream:falseを強制し、応答全体をClaude Codeへ返します。ストリーミング断片化でtool-call/JSONが失敗する時に使います。",
1215
- "zh": "切换流式输出。off 时强制对上游设置 stream:false 并返回完整响应给 Claude Code。流式分片导致 tool-call/JSON 解析失败时使用。",
1216
- }.get(lang, "Toggle streaming. When off, the router forces stream:false upstream and returns the full response.")
1217
- if value.startswith("stream_word_chunking="):
1218
- return {
1219
- "en": "Parse upstream Anthropic SSE and re-emit text_delta events buffered to word boundaries. Reduces SSE event volume; tool deltas and non-text events pass through unchanged.",
1220
- "ko": "업스트림 Anthropic SSE를 파싱해서 text_delta 를 단어 경계 단위로 모아서 다시 전송합니다. SSE 이벤트 빈도를 낮춥니다. tool delta와 텍스트가 아닌 이벤트는 그대로 통과합니다.",
1221
- "ja": "上流のAnthropic SSEを解析し、text_deltaを単語境界でまとめて再送します。SSEイベント量を削減します。tool deltaやテキスト以外のイベントはそのまま透過します。",
1222
- "zh": "解析上游 Anthropic SSE 并将 text_delta 在单词边界处合并后重新发送。降低 SSE 事件频率。工具 delta 与非文本事件原样透传。",
1223
- }.get(lang, "Buffer text_delta events at word boundaries; tool deltas pass through unchanged.")
1224
- return PROVIDER_OPTION_DESCRIPTIONS["__custom__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__custom__"]["en"])
1225
-
1226
-
1227
- def build_provider_options_submenu() -> dict:
1228
- provider, pcfg = current_provider_cfg()
1229
- max_output = pcfg.get("max_output_tokens", "4096")
1230
- timeout = pcfg.get("request_timeout_ms", "300000")
1231
- stream_on = bool(pcfg.get("stream_enabled", True))
1232
- word_chunk_on = bool(pcfg.get("stream_word_chunking", False))
1233
- choices = [
1234
- ("__edit_max_output__", f"Edit max_output_tokens [{max_output}]", False),
1235
- ("__edit_timeout__", f"Edit timeout ms [{timeout}]", False),
1236
- ]
1237
- if provider in ("nvidia-hosted", "self-hosted-nim", "ollama", "ollama-cloud"):
1238
- choices.append(("__edit_rate_limit__", f"Edit rate_limit_rpm [{pcfg.get('rate_limit_rpm', 40)}]", False))
1239
- choices.append(("rate_limit_status=true", "rate_limit_status on", bool(pcfg.get("rate_limit_status", True))))
1240
- choices.append(("rate_limit_status=false", "rate_limit_status off", not bool(pcfg.get("rate_limit_status", True))))
1241
- if provider in ("vllm", "self-hosted-nim"):
1242
- native = bool(pcfg.get("native_compat", True))
1243
- choices = [
1244
- ("__edit_context_window__", f"Edit context_window [{pcfg.get('context_window', 'default')}]", False),
1245
- ("__edit_reserve__", f"Edit context reserve [{pcfg.get('context_reserve_tokens', 'default')}]", False),
1246
- *choices,
1247
- ("__edit_native__", f"Edit native mode [{str(native).lower()}]", False),
1248
- ]
1249
- choices.extend([
1250
- ("__custom__", "Custom KEY=VALUE or unset:KEY...", False),
1251
- ("max_output_tokens=4096", f"max_output_tokens 4096 (current {max_output})", str(max_output) == "4096"),
1252
- ("max_output_tokens=8192", f"max_output_tokens 8192 (current {max_output})", str(max_output) == "8192"),
1253
- ("timeout=300000", f"timeout 300000ms (current {timeout})", str(timeout) == "300000"),
1254
- ])
1255
- if provider in ("vllm", "nvidia-hosted", "self-hosted-nim"):
1256
- choices.extend([
1257
- ("stream=true", "stream on", stream_on),
1258
- ("stream=false", "stream off (buffer full response)", not stream_on),
1259
- ("stream_word_chunking=true", "stream_word_chunking on (flush at word boundary)", word_chunk_on),
1260
- ("stream_word_chunking=false", "stream_word_chunking off (raw upstream SSE)", not word_chunk_on),
1261
- ])
1262
- if provider in ("vllm", "self-hosted-nim"):
1263
- choices.extend([
1264
- ("context_window=32768", f"context_window 32768 (current {pcfg.get('context_window', 'default')})", pcfg.get("context_window") == 32768),
1265
- ("context_window=65536", f"context_window 65536 (current {pcfg.get('context_window', 'default')})", pcfg.get("context_window") == 65536),
1266
- ("native=true", "native true", bool(pcfg.get("native_compat", True))),
1267
- ("native=false", "native false", not bool(pcfg.get("native_compat", True))),
1268
- ])
1269
- items = [
1270
- {"value": value, "label": label, "current": current, "description": provider_option_description(value)}
1271
- for value, label, current in choices
1272
- ]
1273
- return {"kind": "provider-options", "parent": "provider-options", "items": items, "idx": 0, "offset": 0}
1274
-
1275
-
1276
- def after_model_action() -> str:
1277
- provider = current_provider()
1278
- if is_ollama_provider(provider):
1279
- return "ollama-options"
1280
- if has_provider_options(provider):
1281
- return "provider-options"
1282
- return "test"
1283
-
1284
-
1285
- def summarize_test_output(code: int, out: str) -> list[str]:
1286
- raw = out.strip().splitlines()
1287
- if not raw:
1288
- return ["Compatibility: FAIL" if code else "Compatibility: OK", "No output from compatibility test."]
1289
- if any(line.startswith("Traceback ") for line in raw):
1290
- reason = next((line.strip() for line in reversed(raw) if line.strip() and not line.lstrip().startswith("~")), "Internal test error")
1291
- return ["Compatibility: FAIL", "Reason: internal claude-any test error", reason[:160]]
1292
- keep_prefixes = (
1293
- "Testing provider:",
1294
- "Test mode:",
1295
- "Mode:",
1296
- "URL:",
1297
- "Claude API URL:",
1298
- "Upstream base URL:",
1299
- "Model:",
1300
- "Compatibility:",
1301
- "HTTP:",
1302
- "Reason:",
1303
- "Diagnosis:",
1304
- "Stop reason:",
1305
- "Content blocks:",
1306
- "Tokens:",
1307
- "Tool result text:",
1308
- "Note:",
1309
- )
1310
- lines = [line for line in raw if line.startswith(keep_prefixes)]
1311
- if not lines:
1312
- lines = raw[:8]
1313
- if code != 0 and not any(line.startswith("Compatibility:") for line in lines):
1314
- lines.insert(0, "Compatibility: FAIL")
1315
- return lines[:12]
1316
-
1317
-
1318
- def test_submenu(lines: list[str]) -> dict:
1319
- return {
1320
- "kind": "test-result",
1321
- "parent": "test",
1322
- "items": [{"value": "", "label": line, "current": False} for line in lines],
1323
- "idx": 0,
1324
- "offset": 0,
1325
- "readonly": True,
1326
- }
1327
-
1328
-
1329
- def run_test_with_animation(idx: int, checks: list[str]) -> tuple[int, str]:
1330
- frames = ["|", "/", "-", "\\"]
1331
- started = time.monotonic()
1332
- test_timeout = 60
1333
- hard_timeout = test_timeout + 15
1334
- proc = subprocess.Popen(
1335
- [CTL, "test", str(test_timeout), "auto"],
1336
- text=True,
1337
- stdout=subprocess.PIPE,
1338
- stderr=subprocess.STDOUT,
1339
- )
1340
- frame = 0
1341
- while proc.poll() is None:
1342
- elapsed = int(time.monotonic() - started)
1343
- notice = [f"{frames[frame % len(frames)]} {t('running_test')} ({elapsed}s/{test_timeout}s)"]
1344
- render(None, idx, None, notice, checks)
1345
- if elapsed >= hard_timeout:
1346
- proc.terminate()
1347
- try:
1348
- out, _ = proc.communicate(timeout=5)
1349
- except subprocess.TimeoutExpired:
1350
- proc.kill()
1351
- out, _ = proc.communicate()
1352
- timeout_msg = (
1353
- f"Compatibility: FAIL\n"
1354
- f"Reason: compatibility test exceeded {test_timeout}s and was stopped by the menu.\n"
1355
- "Diagnosis: retry the test or choose a faster/more reliable model."
1356
- )
1357
- return 124, ((out or "").rstrip() + "\n" + timeout_msg).strip()
1358
- frame += 1
1359
- time.sleep(0.2)
1360
- out, _ = proc.communicate()
1361
- return proc.returncode or 0, out or ""
1362
-
1363
-
1364
- def inline_prompt(stdscr, prompt_text: str, row: int, default: str = "") -> str:
1365
- h, w = _term_size()
1366
- y = max(1, min(row, h - 3))
1367
- style = _style(reverse=True)
1368
- style_bold = _style(reverse=True, bold=True)
1369
- _write(y, 0, " " * max(0, w - 1), style)
1370
- _write(y, 2, prompt_text[: max(0, w - 4)], style_bold)
1371
- x = min(len(prompt_text) + 2, max(0, w - 2))
1372
- if default:
1373
- _write(y, x, default[: max(0, w - x - 1)], style)
1374
- _move(y, min(x + len(default), max(0, w - 2)))
1375
- else:
1376
- _move(y, x)
1377
- sys.stdout.flush()
1378
- chars = []
1379
- while True:
1380
- ch = read_menu_key()
1381
- if ch == "KEY_ENTER":
1382
- break
1383
- if ch == "KEY_ESC":
1384
- return default
1385
- if ch == "KEY_BACKSPACE":
1386
- if chars:
1387
- chars.pop()
1388
- elif len(ch) == 1 and ch.isprintable():
1389
- chars.append(ch)
1390
- _write(y, 0, " " * max(0, w - 1), style)
1391
- _write(y, 2, prompt_text[: max(0, w - 4)], style_bold)
1392
- _write(y, x, "".join(chars)[: max(0, w - x - 1)], style)
1393
- _move(y, min(x + len(chars), max(0, w - 2)))
1394
- sys.stdout.flush()
1395
- return "".join(chars).strip() or default
1396
-
1397
-
1398
- def inline_secret_prompt(stdscr, prompt_text: str, row: int) -> str:
1399
- h, w = _term_size()
1400
- y = max(1, min(row, h - 3))
1401
- style = _style(reverse=True)
1402
- style_bold = _style(reverse=True, bold=True)
1403
- _write(y, 0, " " * max(0, w - 1), style)
1404
- _write(y, 2, prompt_text[: max(0, w - 4)], style_bold)
1405
- x = min(len(prompt_text) + 2, max(0, w - 2))
1406
- _move(y, x)
1407
- sys.stdout.flush()
1408
- chars = []
1409
- while True:
1410
- ch = read_menu_key()
1411
- if ch == "KEY_ENTER":
1412
- break
1413
- if ch == "KEY_ESC":
1414
- return ""
1415
- if ch == "KEY_BACKSPACE":
1416
- if chars:
1417
- chars.pop()
1418
- elif len(ch) == 1 and ch.isprintable():
1419
- chars.append(ch)
1420
- _write(y, 0, " " * max(0, w - 1), style)
1421
- _write(y, 2, prompt_text[: max(0, w - 4)], style_bold)
1422
- masked = "*" * len(chars)
1423
- _write(y, x, masked[: max(0, w - x - 1)], style)
1424
- _move(y, min(x + len(masked), max(0, w - 2)))
1425
- sys.stdout.flush()
1426
- return "".join(chars).strip()
1427
-
1428
-
1429
- def message(stdscr, title: str, lines: list[str]) -> None:
1430
- _clear()
1431
- h, w = _term_size()
1432
- _write_safe(0, 0, title[: w - 1], _style(bold=True))
1433
- for i, line in enumerate(lines[: h - 4]):
1434
- _write_safe(2 + i, 0, line[: w - 1])
1435
- _write_safe(h - 2, 0, "Press any key to continue", _style(dim=True) + cp(5))
1436
- sys.stdout.flush()
1437
- read_menu_key()
1438
-
1439
-
1440
- def api_key_flow(stdscr) -> list[str]:
1441
- provider = current_provider()
1442
- subprocess.run([CTL, "api-key", provider], check=False)
1443
- input("Press Enter to return to claude-any menu...")
1444
- return [f"API key flow completed for {provider}"]
1445
-
1446
-
1447
- def visible_sub_window(sub: dict, max_rows: int) -> tuple[int, int]:
1448
- count = len(sub["items"])
1449
- idx = sub["idx"]
1450
- offset = sub.get("offset", 0)
1451
- if idx < offset:
1452
- offset = idx
1453
- if idx >= offset + max_rows:
1454
- offset = idx - max_rows + 1
1455
- offset = max(0, min(offset, max(0, count - max_rows)))
1456
- sub["offset"] = offset
1457
- return offset, min(count, offset + max_rows)
1458
-
1459
-
1460
- def selected_sub_description(sub: dict | None) -> str:
1461
- if not sub:
1462
- return ""
1463
- try:
1464
- item = sub["items"][sub["idx"]]
1465
- except Exception:
1466
- return ""
1467
- return str(item.get("description") or "")
1468
-
1469
-
1470
- def index_for_action(action: str) -> int:
1471
- items = main_items()
1472
- return next((i for i, (key, _) in enumerate(items) if key == action), 0)
1473
-
1474
-
1475
- def add(stdscr, y: int, x: int, text: str, style: str = "") -> None:
1476
- _write_safe(y, max(0, x), text, style)
1477
-
1478
-
1479
- def draw_intro_panel(stdscr) -> int:
1480
- h, w = _term_size()
1481
- if h < 20:
1482
- _write(0, 0, animated_text(APP_NAME) + f" - {CREDITS}")
1483
- return 1
1484
-
1485
- panel_w = max(40, w - 2)
1486
- panel_h = 8 if h >= 24 else 7
1487
- border = cp(4)
1488
- add(stdscr, 0, 0, "+" + "-" * (panel_w - 2) + "+", border)
1489
- _write(0, 4, " " + animated_text(APP_NAME) + " ", border)
1490
- for y in range(1, panel_h - 1):
1491
- add(stdscr, y, 0, "|", border)
1492
- add(stdscr, y, panel_w - 1, "|", border)
1493
- add(stdscr, panel_h - 1, 0, "+" + "-" * (panel_w - 2) + "+", border)
1494
-
1495
- if w >= 92:
1496
- split = min(44, panel_w // 2)
1497
- for y in range(1, panel_h - 1):
1498
- add(stdscr, y, split, "|", border)
1499
- add(stdscr, 1, 8, "Welcome back!", _style(bold=True) + cp(5))
1500
- _write(3, 9, animated_text("CLAUDE"))
1501
- _write(4, 12, animated_text("ANY", phase=int(time.monotonic() * 8) + 4))
1502
- add(stdscr, 6, 6, CREDITS, _style(bold=True) + cp(5))
1503
-
1504
- right = split + 3
1505
- add(stdscr, 1, right, "Tips for getting started", _style(bold=True) + cp(4))
1506
- add(stdscr, 2, right, "Choose provider, model, base URL, and API key before launch.", cp(5))
1507
- add(stdscr, 3, right, "Routes Claude Code to Anthropic, Ollama, vLLM, Nvidia, or NIM.", cp(5))
1508
- add(stdscr, 4, right, "Adds DuckDuckGo web search tooling for non-native providers.", cp(5))
1509
- add(stdscr, 5, right, "Use --ca-* flags for headless runs; Claude flags pass through.", cp(5))
1510
- else:
1511
- add(stdscr, 1, 3, f"{APP_NAME} routes Claude Code through selectable providers.", _style(bold=True) + cp(5))
1512
- add(stdscr, 2, 3, "Anthropic, Ollama, vLLM, Nvidia Hosted, and self-hosted NIM.", cp(5))
1513
- add(stdscr, 3, 3, "DuckDuckGo web search is attached for non-native providers.", cp(5))
1514
- add(stdscr, 4, 3, "Headless setup uses --ca-* flags; Claude flags pass through.", cp(5))
1515
- if panel_h > 6:
1516
- add(stdscr, 6, 3, CREDITS, _style(bold=True) + cp(3))
1517
- else:
1518
- add(stdscr, 5, 3, CREDITS, _style(bold=True) + cp(3))
1519
-
1520
- return panel_h + 1
1521
-
1522
-
1523
- def render(stdscr, idx: int, sub: dict | None, notice: list[str], checks: list[str]) -> dict[str, int]:
1524
- lines = status_text()
1525
- items = main_items()
1526
- h, w = _term_size()
1527
- _clear()
1528
- top = draw_intro_panel(stdscr)
1529
- status_count = 5 if h >= 28 else 4 if h >= 23 else 2
1530
- for i, line in enumerate(lines[:status_count]):
1531
- color = cp(2) if line.startswith("provider:") or line.startswith("model:") else cp(5)
1532
- add(stdscr, top + i, 2, line, color)
1533
-
1534
- row = top + status_count + 1
1535
- row_by_action: dict[str, int] = {}
1536
- sub_selected_row = -1
1537
- submenu_budget = max(3, min(10, h - row - len(items) - len(checks) - 4))
1538
- if sub and sub.get("kind") == "test-result":
1539
- submenu_budget = max(4, min(10, h - row - len(items) - len(checks) - 3))
1540
-
1541
- for i, (key, label) in enumerate(items):
1542
- row_by_action[key] = row
1543
- if row >= h - 3:
1544
- break
1545
- if i == idx and (sub is None or sub.get("readonly")):
1546
- style = _style(reverse=True, bold=True)
1547
- elif key == "launch":
1548
- style = cp(2) + _style(bold=True)
1549
- elif key == "test":
1550
- style = cp(3) + _style(bold=True)
1551
- elif key == "quit":
1552
- style = cp(4)
1553
- elif key in ("language", "provider", "model", "advisor-model", "ollama-options", "provider-options", "api-key", "base-url"):
1554
- style = cp(3)
1555
- else:
1556
- style = ""
1557
- _write_safe(row, 2, label[: max(0, w - 4)], style)
1558
- row += 1
1559
-
1560
- if sub and sub.get("parent") == key:
1561
- start, end = visible_sub_window(sub, submenu_budget)
1562
- if start > 0 and row < h - 3:
1563
- _write_safe(row, 6, f"... {start} above", _style(dim=True) + cp(5))
1564
- row += 1
1565
- for si in range(start, end):
1566
- if row >= h - 3:
1567
- break
1568
- item = sub["items"][si]
1569
- if sub.get("kind") == "test-result":
1570
- text = f" {item['label']}"
1571
- if "FAIL" in item["label"] or "TIMEOUT" in item["label"] or item["label"].startswith(("HTTP:", "Reason:", "Diagnosis:")):
1572
- style = cp(4) + _style(bold=True)
1573
- elif "OK" in item["label"]:
1574
- style = cp(2) + _style(bold=True)
1575
- else:
1576
- style = _style(dim=True) + cp(5)
1577
- else:
1578
- marker = "*" if item.get("current") else " "
1579
- prefix = ">" if si == sub["idx"] else " "
1580
- text = f"{prefix} {marker} {item['label']}"
1581
- if si == sub["idx"]:
1582
- style = _style(reverse=True, bold=True)
1583
- sub_selected_row = row
1584
- elif item.get("current"):
1585
- style = cp(2) + _style(bold=True)
1586
- elif "[OK]" in item["label"]:
1587
- style = cp(2)
1588
- elif "[FAIL" in item["label"] or "[TIMEOUT]" in item["label"]:
1589
- style = cp(4) + _style(bold=True)
1590
- else:
1591
- style = _style(dim=True)
1592
- if si == sub["idx"] and not sub.get("readonly"):
1593
- style = _style(reverse=True, bold=True)
1594
- sub_selected_row = row
1595
- _write_safe(row, 4, text[: max(0, w - 6)], style)
1596
- row += 1
1597
- remaining = len(sub["items"]) - end
1598
- if remaining > 0 and row < h - 3:
1599
- _write_safe(row, 6, f"... {remaining} more", _style(dim=True) + cp(5))
1600
- row += 1
1601
-
1602
- desc = selected_sub_description(sub)
1603
- if desc and row < h - 5:
1604
- _write_safe(row, 2, ("-" * max(8, w - 4))[: max(0, w - 4)], _style(dim=True) + cp(6))
1605
- row += 1
1606
- for line in textwrap.wrap(desc, width=max(24, w - 6))[:2]:
1607
- if row >= h - 4:
1608
- break
1609
- _write_safe(row, 2, line[: max(0, w - 4)], _style(bold=True) + cp(6))
1610
- row += 1
1611
-
1612
- if row < h - 4:
1613
- _write_safe(row, 2, ("-" * max(8, w - 4))[: max(0, w - 4)], _style(dim=True) + cp(6))
1614
- row += 1
1615
- for line in checks[: max(0, h - row - 3)]:
1616
- _write_safe(row, 2, line[: max(0, w - 4)], _style(bold=True) + cp(6))
1617
- row += 1
1618
-
1619
- if notice:
1620
- y = max(0, h - 5 - min(len(notice), 2))
1621
- for j, line in enumerate(notice[:2]):
1622
- _write_safe(y + j, 0, line[: w - 1], cp(2) if j == 0 else _style(dim=True))
1623
-
1624
- current_action = items[idx][0]
1625
- _write_safe(h - 2, 0, help_for_action(current_action, sub.get("kind") if sub else None)[: w - 1], _style(dim=True) + cp(5))
1626
- sys.stdout.flush()
1627
- row_by_action["__sub_selected__"] = sub_selected_row
1628
- return row_by_action
1629
-
1630
-
1631
- def main() -> int:
1632
- init_colors()
1633
- idx = index_for_action("launch") if settings_ready_except_api_key() else 0
1634
- sub: dict | None = None
1635
- notice: list[str] = []
1636
- checks = preflight_checks()
1637
- row_by_action: dict[str, int] = {}
1638
-
1639
- def apply_test_result(code: int, out: str) -> None:
1640
- nonlocal sub, notice, checks, idx
1641
- ok = code == 0
1642
- sub = test_submenu(summarize_test_output(code, out))
1643
- if ok:
1644
- notice = [t("test_passed")]
1645
- elif "TIMEOUT" in out.upper() or "timed out" in out.lower():
1646
- notice = ["Compatibility test timed out. The provider or model took too long to respond."]
1647
- else:
1648
- notice = [t("test_failed")]
1649
- checks = preflight_checks()
1650
- idx = index_for_action("launch" if ok else "model")
1651
-
1652
- while True:
1653
- items = main_items()
1654
- idx = max(0, min(idx, len(items) - 1))
1655
- row_by_action = render(None, idx, sub, notice, checks)
1656
- ch = read_menu_key()
1657
-
1658
- if sub and sub.get("readonly"):
1659
- if ch in ("KEY_ESC", "q"):
1660
- sub = None
1661
- notice = []
1662
- continue
1663
- if ch in ("KEY_UP", "k"):
1664
- notice = []
1665
- idx = (idx - 1) % len(items)
1666
- continue
1667
- if ch in ("KEY_DOWN", "j"):
1668
- notice = []
1669
- idx = (idx + 1) % len(items)
1670
- continue
1671
- if ch == "KEY_ENTER":
1672
- action = items[idx][0]
1673
- if action == "launch":
1674
- return 0
1675
- if action == "test":
1676
- code, out = run_test_with_animation(idx, checks)
1677
- apply_test_result(code, out)
1678
- continue
1679
- sub = None
1680
- else:
1681
- continue
1682
-
1683
- if sub:
1684
- if ch in ("KEY_ESC", "q"):
1685
- sub = None
1686
- notice = []
1687
- checks = preflight_checks()
1688
- continue
1689
- if ch in ("KEY_UP", "k"):
1690
- notice = []
1691
- sub["idx"] = (sub["idx"] - 1) % len(sub["items"])
1692
- provider_preview = selected_provider_value(sub)
1693
- if provider_preview:
1694
- checks = provider_preview_checks(provider_preview)
1695
- continue
1696
- if ch in ("KEY_DOWN", "j"):
1697
- notice = []
1698
- sub["idx"] = (sub["idx"] + 1) % len(sub["items"])
1699
- provider_preview = selected_provider_value(sub)
1700
- if provider_preview:
1701
- checks = provider_preview_checks(provider_preview)
1702
- continue
1703
- if ch == "KEY_NPAGE":
1704
- sub["idx"] = min(len(sub["items"]) - 1, sub["idx"] + 10)
1705
- provider_preview = selected_provider_value(sub)
1706
- if provider_preview:
1707
- checks = provider_preview_checks(provider_preview)
1708
- continue
1709
- if ch == "KEY_PPAGE":
1710
- sub["idx"] = max(0, sub["idx"] - 10)
1711
- provider_preview = selected_provider_value(sub)
1712
- if provider_preview:
1713
- checks = provider_preview_checks(provider_preview)
1714
- continue
1715
- if ch == "KEY_ENTER":
1716
- item = sub["items"][sub["idx"]]
1717
- if sub["kind"] == "language":
1718
- _, out = run_cmd([CTL, "language", item["value"]])
1719
- notice = (out.strip().splitlines() or [item["value"]])[:2]
1720
- checks = preflight_checks()
1721
- sub = None
1722
- idx = index_for_action("provider")
1723
- elif sub["kind"] == "provider":
1724
- _, out = run_cmd([CTL, "provider", item["value"]])
1725
- notice = (out.strip().splitlines() or [item["value"]])[:2]
1726
- checks = preflight_checks()
1727
- sub = None
1728
- idx = index_for_action("api-key")
1729
- elif sub["kind"] == "api-key":
1730
- row = row_by_action.get("__sub_selected__", row_by_action.get("api-key", 10))
1731
- key = inline_secret_prompt(None, f"API key for {item['value']}: ", row)
1732
- if key:
1733
- _, out = run_cmd([CTL, "set-api-key", item["value"], key])
1734
- notice = (out.strip().splitlines() or [item["value"]])[:2]
1735
- checks = preflight_checks()
1736
- idx = index_for_action("base-url")
1737
- else:
1738
- notice = [t("api_key_unchanged")]
1739
- sub = None
1740
- elif sub["kind"] == "model":
1741
- if item["value"] == "__custom__":
1742
- row = row_by_action.get("__sub_selected__", row_by_action.get("model", 10))
1743
- value = inline_prompt(None, "Model id or alias: ", row)
1744
- if value:
1745
- _, out = run_cmd([CTL, "model", value])
1746
- notice = (out.strip().splitlines() or [value])[:2]
1747
- checks = preflight_checks()
1748
- idx = index_for_action(after_model_action())
1749
- sub = None
1750
- else:
1751
- _, out = run_cmd([CTL, "model", item["value"]])
1752
- notice = (out.strip().splitlines() or [item["value"]])[:2]
1753
- checks = preflight_checks()
1754
- sub = None
1755
- idx = index_for_action(after_model_action())
1756
- elif sub["kind"] == "advisor-model":
1757
- row = row_by_action.get("__sub_selected__", row_by_action.get("advisor-model", 10))
1758
- if item["value"] == "__custom__":
1759
- value = inline_prompt(None, "Advisor model id: ", row, "deepseek-v4-pro")
1760
- else:
1761
- value = item["value"] or "off"
1762
- _, out = run_cmd([CTL, "advisor-model", value])
1763
- notice = (out.strip().splitlines() or [value])[:2]
1764
- checks = preflight_checks()
1765
- sub = None
1766
- idx = index_for_action("ollama-options" if is_ollama_provider(current_provider()) else ("provider-options" if has_provider_options(current_provider()) else "test"))
1767
- elif sub["kind"] == "ollama-options":
1768
- provider = current_provider()
1769
- row = row_by_action.get("__sub_selected__", row_by_action.get("ollama-options", 10))
1770
- provider_now, pcfg_now = current_provider_cfg()
1771
- opts_now = pcfg_now.get("ollama_options") or {}
1772
- if not isinstance(opts_now, dict):
1773
- opts_now = {}
1774
- action_value = item["value"]
1775
- value = ""
1776
- if action_value == "__edit_num_ctx__":
1777
- default = str(pcfg_now.get("num_ctx", "auto"))
1778
- entered = inline_prompt(None, "num_ctx (auto or integer): ", row, default)
1779
- value = f"num_ctx={entered}" if entered else ""
1780
- elif action_value == "__edit_min__":
1781
- default = str(pcfg_now.get("num_ctx_min", 32768))
1782
- entered = inline_prompt(None, "num_ctx auto minimum: ", row, default)
1783
- value = f"min={entered}" if entered else ""
1784
- elif action_value == "__edit_max__":
1785
- default = str(pcfg_now.get("num_ctx_max", 131072))
1786
- entered = inline_prompt(None, "num_ctx auto maximum: ", row, default)
1787
- value = f"max={entered}" if entered else ""
1788
- elif action_value == "__edit_keep_alive__":
1789
- default = str(pcfg_now.get("keep_alive", "5m"))
1790
- entered = inline_prompt(None, "keep_alive: ", row, default)
1791
- value = f"keep_alive={entered}" if entered else ""
1792
- elif action_value == "__edit_temperature__":
1793
- default = str(opts_now.get("temperature", "0.7"))
1794
- entered = inline_prompt(None, "temperature (unset:temperature clears): ", row, default)
1795
- value = entered if entered.startswith("unset:") else (f"temperature={entered}" if entered else "")
1796
- elif action_value == "__edit_top_p__":
1797
- default = str(opts_now.get("top_p", "0.8"))
1798
- entered = inline_prompt(None, "top_p (unset:top_p clears): ", row, default)
1799
- value = entered if entered.startswith("unset:") else (f"top_p={entered}" if entered else "")
1800
- elif action_value == "__edit_max_tokens__":
1801
- default = str(opts_now.get("num_predict", "4096"))
1802
- entered = inline_prompt(None, "max_tokens / num_predict: ", row, default)
1803
- value = f"max_tokens={entered}" if entered else ""
1804
- elif action_value == "__edit_timeout__":
1805
- default = str(pcfg_now.get("request_timeout_ms", "300000"))
1806
- entered = inline_prompt(None, "timeout ms: ", row, default)
1807
- value = f"timeout={entered}" if entered else ""
1808
- elif action_value == "__edit_rate_limit__":
1809
- default = str(pcfg_now.get("rate_limit_rpm", "40"))
1810
- entered = inline_prompt(None, "rate_limit_rpm (0 disables): ", row, default)
1811
- value = f"rate_limit_rpm={entered}" if entered else ""
1812
- elif action_value == "__custom__":
1813
- value = inline_prompt(None, "Ollama option KEY=VALUE: ", row, "temperature=0.7")
1814
- else:
1815
- value = action_value
1816
- if value:
1817
- _, out = run_cmd([CTL, "ollama-options", provider, value])
1818
- notice = (out.strip().splitlines() or [value])[:2]
1819
- checks = preflight_checks()
1820
- idx = index_for_action("test")
1821
- sub = None
1822
- elif sub["kind"] == "provider-options":
1823
- provider = current_provider()
1824
- row = row_by_action.get("__sub_selected__", row_by_action.get("provider-options", 10))
1825
- provider_now, pcfg_now = current_provider_cfg()
1826
- action_value = item["value"]
1827
- value = ""
1828
- if action_value == "__edit_context_window__":
1829
- default = str(pcfg_now.get("context_window", "32768"))
1830
- entered = inline_prompt(None, "context_window: ", row, default)
1831
- value = f"context_window={entered}" if entered else ""
1832
- elif action_value == "__edit_reserve__":
1833
- default = str(pcfg_now.get("context_reserve_tokens", "1024"))
1834
- entered = inline_prompt(None, "context_reserve_tokens: ", row, default)
1835
- value = f"context_reserve_tokens={entered}" if entered else ""
1836
- elif action_value == "__edit_max_output__":
1837
- default = str(pcfg_now.get("max_output_tokens", "4096"))
1838
- entered = inline_prompt(None, "max_output_tokens: ", row, default)
1839
- value = f"max_output_tokens={entered}" if entered else ""
1840
- elif action_value == "__edit_timeout__":
1841
- default = str(pcfg_now.get("request_timeout_ms", "300000"))
1842
- entered = inline_prompt(None, "timeout ms: ", row, default)
1843
- value = f"timeout={entered}" if entered else ""
1844
- elif action_value == "__edit_native__":
1845
- default = "true" if pcfg_now.get("native_compat", True) else "false"
1846
- entered = inline_prompt(None, "native true/false: ", row, default)
1847
- value = f"native={entered}" if entered else ""
1848
- elif action_value == "__custom__":
1849
- value = inline_prompt(None, "Provider option KEY=VALUE: ", row, "max_output_tokens=4096")
1850
- else:
1851
- value = action_value
1852
- if value:
1853
- _, out = run_cmd([CTL, "provider-options", provider, value])
1854
- notice = (out.strip().splitlines() or [value])[:2]
1855
- checks = preflight_checks()
1856
- idx = index_for_action("test")
1857
- sub = None
1858
- continue
1859
- continue
1860
-
1861
- if ch in ("KEY_ESC", "q"):
1862
- return 10
1863
- if ch in ("KEY_UP", "k"):
1864
- notice = []
1865
- idx = (idx - 1) % len(items)
1866
- continue
1867
- if ch in ("KEY_DOWN", "j"):
1868
- notice = []
1869
- idx = (idx + 1) % len(items)
1870
- continue
1871
- if ch != "KEY_ENTER":
1872
- continue
1873
-
1874
- action = items[idx][0]
1875
- if action == "launch":
1876
- return 0
1877
- if action == "test":
1878
- code, out = run_test_with_animation(idx, checks)
1879
- apply_test_result(code, out)
1880
- continue
1881
- if action == "quit":
1882
- return 10
1883
- if action == "language":
1884
- sub = build_language_submenu()
1885
- notice = []
1886
- elif action == "provider":
1887
- sub = build_provider_submenu()
1888
- notice = []
1889
- provider_preview = selected_provider_value(sub)
1890
- if provider_preview:
1891
- checks = provider_preview_checks(provider_preview)
1892
- elif action == "model":
1893
- notice = [t("loading_models")]
1894
- render(None, idx, None, notice, checks)
1895
- sub, fallback_notice = build_model_submenu()
1896
- notice = fallback_notice
1897
- if sub is None:
1898
- row = row_by_action.get("model", 10)
1899
- value = inline_prompt(None, "Model id or alias: ", row)
1900
- if value:
1901
- _, out = run_cmd([CTL, "model", value])
1902
- notice = (out.strip().splitlines() or [value])[:2]
1903
- checks = preflight_checks()
1904
- idx = index_for_action(after_model_action())
1905
- elif action == "advisor-model":
1906
- notice = []
1907
- sub = build_advisor_model_submenu()
1908
- elif action == "ollama-options":
1909
- provider = current_provider()
1910
- if is_ollama_provider(provider):
1911
- sub = build_ollama_options_submenu()
1912
- notice = []
1913
- else:
1914
- notice = ["Ollama options are available only for ollama and ollama-cloud."]
1915
- elif action == "provider-options":
1916
- provider = current_provider()
1917
- if has_provider_options(provider):
1918
- sub = build_provider_options_submenu()
1919
- notice = []
1920
- else:
1921
- notice = ["Provider options are available for vLLM, NVIDIA hosted, and self-hosted NIM."]
1922
- elif action == "api-key":
1923
- provider = current_provider()
1924
- row = row_by_action.get("api-key", 10)
1925
- key = inline_secret_prompt(None, f"API key for {provider}: ", row)
1926
- if key:
1927
- _, out = run_cmd([CTL, "set-api-key", provider, key])
1928
- notice = (out.strip().splitlines() or [provider])[:2]
1929
- checks = preflight_checks()
1930
- else:
1931
- notice = [t("api_key_unchanged")]
1932
- idx = index_for_action("base-url")
1933
- elif action == "base-url":
1934
- provider = current_provider()
1935
- row = row_by_action.get("base-url", 12)
1936
- value = inline_prompt(None, f"Base URL for {provider}: ", row, default_base_url(provider))
1937
- if value:
1938
- _, out = run_cmd([CTL, "base-url", provider, value])
1939
- notice = (out.strip().splitlines() or [value])[:2]
1940
- checks = preflight_checks()
1941
- idx = index_for_action("model")
1942
-
1943
-
1944
- if __name__ == "__main__":
1945
- try:
1946
- with _RawTerminal():
1947
- raise SystemExit(main())
1948
- except KeyboardInterrupt:
1949
- raise SystemExit(10)
1133
+ ]
1134
+ items = [
1135
+ {"value": value, "label": label, "current": current, "description": ollama_option_description(value)}
1136
+ for value, label, current in choices
1137
+ ]
1138
+ return {"kind": "ollama-options", "parent": "ollama-options", "items": items, "idx": 0, "offset": 0}
1139
+
1140
+
1141
+ PROVIDER_OPTION_DESCRIPTIONS = {
1142
+ "__edit_context_window__": {
1143
+ "en": "Edit the context window value used by claude-any tests and router caps. Native mode cannot raise the real server limit.",
1144
+ "ko": "claude-any 테스트와 라우터 제한 계산에 쓰는 컨텍스트 값입니다. native 모드에서는 실제 서버 한계를 늘리지 못합니다.",
1145
+ "ja": "claude-anyのテストとルーター制限計算に使うコンテキスト値です。nativeモードでは実サーバー上限は増やせません。",
1146
+ "zh": "编辑 claude-any 测试和路由器限制计算使用的上下文值。native 模式不能提高真实服务器上限。",
1147
+ },
1148
+ "__edit_reserve__": {
1149
+ "en": "Reserve input-side room when claude-any router caps max_tokens. This is ignored by direct native Claude Code requests.",
1150
+ "ko": "claude-any 라우터가 max_tokens를 줄일 때 입력 쪽 여유로 남기는 토큰입니다. direct native 요청에는 적용되지 않습니다.",
1151
+ "ja": "claude-anyルーターがmax_tokensを制限する時に入力側へ残す余裕です。direct native要求では無視されます。",
1152
+ "zh": "claude-any 路由器限制 max_tokens 时预留给输入侧的空间。direct native 请求会忽略它。",
1153
+ },
1154
+ "__edit_max_output__": {
1155
+ "en": "Set Claude Code's CLAUDE_CODE_MAX_OUTPUT_TOKENS and the claude-any router cap. 4096 is the default.",
1156
+ "ko": "Claude Code의 CLAUDE_CODE_MAX_OUTPUT_TOKENS와 claude-any 라우터 출력 제한입니다. 기본값은 4096입니다.",
1157
+ "ja": "Claude CodeのCLAUDE_CODE_MAX_OUTPUT_TOKENSとclaude-anyルーターの出力制限です。既定値は4096です。",
1158
+ "zh": "设置 Claude Code 的 CLAUDE_CODE_MAX_OUTPUT_TOKENS 和 claude-any 路由器输出上限。默认 4096。",
1159
+ },
1160
+ "__edit_timeout__": {
1161
+ "en": "Edit claude-any compatibility-test/router upstream timeout in milliseconds. Claude Code native networking has its own timeout behavior.",
1162
+ "ko": "claude-any 호환성 테스트/라우터의 업스트림 대기 시간(ms)입니다. Claude Code native 네트워크 대기는 자체 동작을 따릅니다.",
1163
+ "ja": "claude-any互換性テスト/ルーターの上流タイムアウト(ms)です。Claude Code native通信は独自の挙動です。",
1164
+ "zh": "编辑 claude-any 兼容性测试/路由器上游超时(毫秒)。Claude Code native 网络有自身超时行为。",
1165
+ },
1166
+ "__edit_native__": {
1167
+ "en": "Toggle direct Anthropic Messages compatibility. Use it for vLLM/self-hosted NIM servers that implement /v1/messages.",
1168
+ "ko": "Anthropic Messages 호환 엔드포인트에 직접 연결할지 정합니다. /v1/messages를 구현한 vLLM/self-hosted NIM에서 사용합니다.",
1169
+ "ja": "Anthropic Messages互換エンドポイントへ直接接続するかを切り替えます。/v1/messages対応のvLLM/self-hosted NIMで使います。",
1170
+ "zh": "切换是否直接连接 Anthropic Messages 兼容端点。用于实现 /v1/messages 的 vLLM/self-hosted NIM。",
1171
+ },
1172
+ "__custom__": {
1173
+ "en": "Enter provider option as KEY=VALUE, or unset:KEY to remove it.",
1174
+ "ko": "프로바이더 옵션을 KEY=VALUE로 입력합니다. 삭제하려면 unset:KEY를 입력합니다.",
1175
+ "ja": "プロバイダーオプションをKEY=VALUEで入力します。削除はunset:KEYです。",
1176
+ "zh": "用 KEY=VALUE 输入提供商选项;用 unset:KEY 删除。",
1177
+ },
1178
+ }
1179
+
1180
+
1181
+ def provider_option_description(value: str) -> str:
1182
+ lang = current_language()
1183
+ if value in PROVIDER_OPTION_DESCRIPTIONS:
1184
+ entry = PROVIDER_OPTION_DESCRIPTIONS[value]
1185
+ return entry.get(lang, entry["en"])
1186
+ if value.startswith("context_window="):
1187
+ return PROVIDER_OPTION_DESCRIPTIONS["__edit_context_window__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__edit_context_window__"]["en"])
1188
+ if value.startswith("context_reserve_tokens="):
1189
+ return PROVIDER_OPTION_DESCRIPTIONS["__edit_reserve__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__edit_reserve__"]["en"])
1190
+ if value.startswith("max_output_tokens="):
1191
+ return PROVIDER_OPTION_DESCRIPTIONS["__edit_max_output__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__edit_max_output__"]["en"])
1192
+ if value.startswith(("timeout=", "request_timeout_ms=")):
1193
+ return PROVIDER_OPTION_DESCRIPTIONS["__edit_timeout__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__edit_timeout__"]["en"])
1194
+ if value.startswith(("rate_limit=", "rate_limit_rpm=", "rpm=")) or value == "__edit_rate_limit__":
1195
+ return {
1196
+ "en": "Router-side upstream requests per minute. NIM hosted defaults to 40 RPM; 0 disables waiting.",
1197
+ "ko": "라우터가 업스트림 요청 수를 분당 제한합니다. NIM hosted 기본값은 40 RPM이고, 0이면 대기하지 않습니다.",
1198
+ "ja": "ルーター側の上流リクエスト数/分。NIM hosted は既定 40 RPM、0 で待機なし。",
1199
+ "zh": "路由器侧上游每分钟请求限制。NIM hosted 默认 40 RPM;0 表示不等待。",
1200
+ }.get(lang, "Router-side upstream requests per minute.")
1201
+ if value.startswith(("rate_limit_status=", "rpm_status=")):
1202
+ return {
1203
+ "en": "Show optional colored RPM usage status in Claude responses.",
1204
+ "ko": "Claude 응답에 RPM 사용량 상태를 색상 텍스트로 표시합니다.",
1205
+ "ja": "Claude応答にRPM使用量状態を色付きテキストで表示します。",
1206
+ "zh": "在 Claude 响应中显示彩色 RPM 使用量状态。",
1207
+ }.get(lang, "Show optional colored RPM usage status.")
1208
+ if value.startswith(("native=", "native_compat=")):
1209
+ return PROVIDER_OPTION_DESCRIPTIONS["__edit_native__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__edit_native__"]["en"])
1210
+ if value.startswith("stream="):
1211
+ return {
1212
+ "en": "Toggle streaming. When off, the router forces stream:false upstream and returns the full response to Claude Code. Use this if streaming fragmentation causes tool-call or JSON parse errors.",
1213
+ "ko": "스트리밍 on/off. off 면 업스트림에 stream:false 를 강제하고 응답 전체를 Claude Code에 보냅니다. 스트리밍 단편화로 tool-call/JSON 파싱이 실패할 때 사용합니다.",
1214
+ "ja": "ストリーミングを切り替えます。offにすると上流にstream:falseを強制し、応答全体をClaude Codeへ返します。ストリーミング断片化でtool-call/JSONが失敗する時に使います。",
1215
+ "zh": "切换流式输出。off 时强制对上游设置 stream:false 并返回完整响应给 Claude Code。流式分片导致 tool-call/JSON 解析失败时使用。",
1216
+ }.get(lang, "Toggle streaming. When off, the router forces stream:false upstream and returns the full response.")
1217
+ if value.startswith("stream_word_chunking="):
1218
+ return {
1219
+ "en": "Parse upstream Anthropic SSE and re-emit text_delta events buffered to word boundaries. Reduces SSE event volume; tool deltas and non-text events pass through unchanged.",
1220
+ "ko": "업스트림 Anthropic SSE를 파싱해서 text_delta 를 단어 경계 단위로 모아서 다시 전송합니다. SSE 이벤트 빈도를 낮춥니다. tool delta와 텍스트가 아닌 이벤트는 그대로 통과합니다.",
1221
+ "ja": "上流のAnthropic SSEを解析し、text_deltaを単語境界でまとめて再送します。SSEイベント量を削減します。tool deltaやテキスト以外のイベントはそのまま透過します。",
1222
+ "zh": "解析上游 Anthropic SSE 并将 text_delta 在单词边界处合并后重新发送。降低 SSE 事件频率。工具 delta 与非文本事件原样透传。",
1223
+ }.get(lang, "Buffer text_delta events at word boundaries; tool deltas pass through unchanged.")
1224
+ return PROVIDER_OPTION_DESCRIPTIONS["__custom__"].get(lang, PROVIDER_OPTION_DESCRIPTIONS["__custom__"]["en"])
1225
+
1226
+
1227
+ def build_provider_options_submenu() -> dict:
1228
+ provider, pcfg = current_provider_cfg()
1229
+ max_output = pcfg.get("max_output_tokens", "4096")
1230
+ timeout = pcfg.get("request_timeout_ms", "300000")
1231
+ stream_on = bool(pcfg.get("stream_enabled", True))
1232
+ word_chunk_on = bool(pcfg.get("stream_word_chunking", False))
1233
+ choices = [
1234
+ ("__edit_max_output__", f"Edit max_output_tokens [{max_output}]", False),
1235
+ ("__edit_timeout__", f"Edit timeout ms [{timeout}]", False),
1236
+ ]
1237
+ if provider in ("nvidia-hosted", "self-hosted-nim", "ollama", "ollama-cloud"):
1238
+ choices.append(("__edit_rate_limit__", f"Edit rate_limit_rpm [{pcfg.get('rate_limit_rpm', 40)}]", False))
1239
+ choices.append(("rate_limit_status=true", "rate_limit_status on", bool(pcfg.get("rate_limit_status", True))))
1240
+ choices.append(("rate_limit_status=false", "rate_limit_status off", not bool(pcfg.get("rate_limit_status", True))))
1241
+ if provider in ("vllm", "self-hosted-nim"):
1242
+ native = bool(pcfg.get("native_compat", True))
1243
+ choices = [
1244
+ ("__edit_context_window__", f"Edit context_window [{pcfg.get('context_window', 'default')}]", False),
1245
+ ("__edit_reserve__", f"Edit context reserve [{pcfg.get('context_reserve_tokens', 'default')}]", False),
1246
+ *choices,
1247
+ ("__edit_native__", f"Edit native mode [{str(native).lower()}]", False),
1248
+ ]
1249
+ choices.extend([
1250
+ ("__custom__", "Custom KEY=VALUE or unset:KEY...", False),
1251
+ ("max_output_tokens=4096", f"max_output_tokens 4096 (current {max_output})", str(max_output) == "4096"),
1252
+ ("max_output_tokens=8192", f"max_output_tokens 8192 (current {max_output})", str(max_output) == "8192"),
1253
+ ("timeout=300000", f"timeout 300000ms (current {timeout})", str(timeout) == "300000"),
1254
+ ])
1255
+ if provider in ("vllm", "nvidia-hosted", "self-hosted-nim"):
1256
+ choices.extend([
1257
+ ("stream=true", "stream on", stream_on),
1258
+ ("stream=false", "stream off (buffer full response)", not stream_on),
1259
+ ("stream_word_chunking=true", "stream_word_chunking on (flush at word boundary)", word_chunk_on),
1260
+ ("stream_word_chunking=false", "stream_word_chunking off (raw upstream SSE)", not word_chunk_on),
1261
+ ])
1262
+ if provider in ("vllm", "self-hosted-nim"):
1263
+ choices.extend([
1264
+ ("context_window=32768", f"context_window 32768 (current {pcfg.get('context_window', 'default')})", pcfg.get("context_window") == 32768),
1265
+ ("context_window=65536", f"context_window 65536 (current {pcfg.get('context_window', 'default')})", pcfg.get("context_window") == 65536),
1266
+ ("native=true", "native true", bool(pcfg.get("native_compat", True))),
1267
+ ("native=false", "native false", not bool(pcfg.get("native_compat", True))),
1268
+ ])
1269
+ items = [
1270
+ {"value": value, "label": label, "current": current, "description": provider_option_description(value)}
1271
+ for value, label, current in choices
1272
+ ]
1273
+ return {"kind": "provider-options", "parent": "provider-options", "items": items, "idx": 0, "offset": 0}
1274
+
1275
+
1276
+ def after_model_action() -> str:
1277
+ provider = current_provider()
1278
+ if is_ollama_provider(provider):
1279
+ return "ollama-options"
1280
+ if has_provider_options(provider):
1281
+ return "provider-options"
1282
+ return "test"
1283
+
1284
+
1285
+ def summarize_test_output(code: int, out: str) -> list[str]:
1286
+ raw = out.strip().splitlines()
1287
+ if not raw:
1288
+ return ["Compatibility: FAIL" if code else "Compatibility: OK", "No output from compatibility test."]
1289
+ if any(line.startswith("Traceback ") for line in raw):
1290
+ reason = next((line.strip() for line in reversed(raw) if line.strip() and not line.lstrip().startswith("~")), "Internal test error")
1291
+ return ["Compatibility: FAIL", "Reason: internal claude-any test error", reason[:160]]
1292
+ keep_prefixes = (
1293
+ "Testing provider:",
1294
+ "Test mode:",
1295
+ "Mode:",
1296
+ "URL:",
1297
+ "Claude API URL:",
1298
+ "Upstream base URL:",
1299
+ "Model:",
1300
+ "Compatibility:",
1301
+ "HTTP:",
1302
+ "Reason:",
1303
+ "Diagnosis:",
1304
+ "Stop reason:",
1305
+ "Content blocks:",
1306
+ "Tokens:",
1307
+ "Tool result text:",
1308
+ "Note:",
1309
+ )
1310
+ lines = [line for line in raw if line.startswith(keep_prefixes)]
1311
+ if not lines:
1312
+ lines = raw[:8]
1313
+ if code != 0 and not any(line.startswith("Compatibility:") for line in lines):
1314
+ lines.insert(0, "Compatibility: FAIL")
1315
+ return lines[:12]
1316
+
1317
+
1318
+ def test_submenu(lines: list[str]) -> dict:
1319
+ return {
1320
+ "kind": "test-result",
1321
+ "parent": "test",
1322
+ "items": [{"value": "", "label": line, "current": False} for line in lines],
1323
+ "idx": 0,
1324
+ "offset": 0,
1325
+ "readonly": True,
1326
+ }
1327
+
1328
+
1329
+ def run_test_with_animation(idx: int, checks: list[str]) -> tuple[int, str]:
1330
+ frames = ["|", "/", "-", "\\"]
1331
+ started = time.monotonic()
1332
+ test_timeout = 60
1333
+ hard_timeout = test_timeout + 15
1334
+ proc = subprocess.Popen(
1335
+ [CTL, "test", str(test_timeout), "auto"],
1336
+ text=True,
1337
+ stdout=subprocess.PIPE,
1338
+ stderr=subprocess.STDOUT,
1339
+ )
1340
+ frame = 0
1341
+ while proc.poll() is None:
1342
+ elapsed = int(time.monotonic() - started)
1343
+ notice = [f"{frames[frame % len(frames)]} {t('running_test')} ({elapsed}s/{test_timeout}s)"]
1344
+ render(None, idx, None, notice, checks)
1345
+ if elapsed >= hard_timeout:
1346
+ proc.terminate()
1347
+ try:
1348
+ out, _ = proc.communicate(timeout=5)
1349
+ except subprocess.TimeoutExpired:
1350
+ proc.kill()
1351
+ out, _ = proc.communicate()
1352
+ timeout_msg = (
1353
+ f"Compatibility: FAIL\n"
1354
+ f"Reason: compatibility test exceeded {test_timeout}s and was stopped by the menu.\n"
1355
+ "Diagnosis: retry the test or choose a faster/more reliable model."
1356
+ )
1357
+ return 124, ((out or "").rstrip() + "\n" + timeout_msg).strip()
1358
+ frame += 1
1359
+ time.sleep(0.2)
1360
+ out, _ = proc.communicate()
1361
+ return proc.returncode or 0, out or ""
1362
+
1363
+
1364
+ def inline_prompt(stdscr, prompt_text: str, row: int, default: str = "") -> str:
1365
+ h, w = _term_size()
1366
+ y = max(1, min(row, h - 3))
1367
+ style = _style(reverse=True)
1368
+ style_bold = _style(reverse=True, bold=True)
1369
+ _write(y, 0, " " * max(0, w - 1), style)
1370
+ _write(y, 2, prompt_text[: max(0, w - 4)], style_bold)
1371
+ x = min(len(prompt_text) + 2, max(0, w - 2))
1372
+ if default:
1373
+ _write(y, x, default[: max(0, w - x - 1)], style)
1374
+ _move(y, min(x + len(default), max(0, w - 2)))
1375
+ else:
1376
+ _move(y, x)
1377
+ sys.stdout.flush()
1378
+ chars = []
1379
+ while True:
1380
+ ch = read_menu_key()
1381
+ if ch == "KEY_ENTER":
1382
+ break
1383
+ if ch == "KEY_ESC":
1384
+ return default
1385
+ if ch == "KEY_BACKSPACE":
1386
+ if chars:
1387
+ chars.pop()
1388
+ elif len(ch) == 1 and ch.isprintable():
1389
+ chars.append(ch)
1390
+ _write(y, 0, " " * max(0, w - 1), style)
1391
+ _write(y, 2, prompt_text[: max(0, w - 4)], style_bold)
1392
+ _write(y, x, "".join(chars)[: max(0, w - x - 1)], style)
1393
+ _move(y, min(x + len(chars), max(0, w - 2)))
1394
+ sys.stdout.flush()
1395
+ return "".join(chars).strip() or default
1396
+
1397
+
1398
+ def inline_secret_prompt(stdscr, prompt_text: str, row: int) -> str:
1399
+ h, w = _term_size()
1400
+ y = max(1, min(row, h - 3))
1401
+ style = _style(reverse=True)
1402
+ style_bold = _style(reverse=True, bold=True)
1403
+ _write(y, 0, " " * max(0, w - 1), style)
1404
+ _write(y, 2, prompt_text[: max(0, w - 4)], style_bold)
1405
+ x = min(len(prompt_text) + 2, max(0, w - 2))
1406
+ _move(y, x)
1407
+ sys.stdout.flush()
1408
+ chars = []
1409
+ while True:
1410
+ ch = read_menu_key()
1411
+ if ch == "KEY_ENTER":
1412
+ break
1413
+ if ch == "KEY_ESC":
1414
+ return ""
1415
+ if ch == "KEY_BACKSPACE":
1416
+ if chars:
1417
+ chars.pop()
1418
+ elif len(ch) == 1 and ch.isprintable():
1419
+ chars.append(ch)
1420
+ _write(y, 0, " " * max(0, w - 1), style)
1421
+ _write(y, 2, prompt_text[: max(0, w - 4)], style_bold)
1422
+ masked = "*" * len(chars)
1423
+ _write(y, x, masked[: max(0, w - x - 1)], style)
1424
+ _move(y, min(x + len(masked), max(0, w - 2)))
1425
+ sys.stdout.flush()
1426
+ return "".join(chars).strip()
1427
+
1428
+
1429
+ def message(stdscr, title: str, lines: list[str]) -> None:
1430
+ _clear()
1431
+ h, w = _term_size()
1432
+ _write_safe(0, 0, title[: w - 1], _style(bold=True))
1433
+ for i, line in enumerate(lines[: h - 4]):
1434
+ _write_safe(2 + i, 0, line[: w - 1])
1435
+ _write_safe(h - 2, 0, "Press any key to continue", _style(dim=True) + cp(5))
1436
+ sys.stdout.flush()
1437
+ read_menu_key()
1438
+
1439
+
1440
+ def api_key_flow(stdscr) -> list[str]:
1441
+ provider = current_provider()
1442
+ subprocess.run([CTL, "api-key", provider], check=False)
1443
+ input("Press Enter to return to claude-any menu...")
1444
+ return [f"API key flow completed for {provider}"]
1445
+
1446
+
1447
+ def visible_sub_window(sub: dict, max_rows: int) -> tuple[int, int]:
1448
+ count = len(sub["items"])
1449
+ idx = sub["idx"]
1450
+ offset = sub.get("offset", 0)
1451
+ if idx < offset:
1452
+ offset = idx
1453
+ if idx >= offset + max_rows:
1454
+ offset = idx - max_rows + 1
1455
+ offset = max(0, min(offset, max(0, count - max_rows)))
1456
+ sub["offset"] = offset
1457
+ return offset, min(count, offset + max_rows)
1458
+
1459
+
1460
+ def selected_sub_description(sub: dict | None) -> str:
1461
+ if not sub:
1462
+ return ""
1463
+ try:
1464
+ item = sub["items"][sub["idx"]]
1465
+ except Exception:
1466
+ return ""
1467
+ return str(item.get("description") or "")
1468
+
1469
+
1470
+ def index_for_action(action: str) -> int:
1471
+ items = main_items()
1472
+ return next((i for i, (key, _) in enumerate(items) if key == action), 0)
1473
+
1474
+
1475
+ def add(stdscr, y: int, x: int, text: str, style: str = "") -> None:
1476
+ _write_safe(y, max(0, x), text, style)
1477
+
1478
+
1479
+ def draw_intro_panel(stdscr) -> int:
1480
+ h, w = _term_size()
1481
+ if h < 20:
1482
+ _write(0, 0, animated_text(APP_NAME) + f" - {CREDITS}")
1483
+ return 1
1484
+
1485
+ panel_w = max(40, w - 2)
1486
+ panel_h = 8 if h >= 24 else 7
1487
+ border = cp(4)
1488
+ add(stdscr, 0, 0, "+" + "-" * (panel_w - 2) + "+", border)
1489
+ _write(0, 4, " " + animated_text(APP_NAME) + " ", border)
1490
+ for y in range(1, panel_h - 1):
1491
+ add(stdscr, y, 0, "|", border)
1492
+ add(stdscr, y, panel_w - 1, "|", border)
1493
+ add(stdscr, panel_h - 1, 0, "+" + "-" * (panel_w - 2) + "+", border)
1494
+
1495
+ if w >= 92:
1496
+ split = min(44, panel_w // 2)
1497
+ for y in range(1, panel_h - 1):
1498
+ add(stdscr, y, split, "|", border)
1499
+ add(stdscr, 1, 8, "Welcome back!", _style(bold=True) + cp(5))
1500
+ _write(3, 9, animated_text("CLAUDE"))
1501
+ _write(4, 12, animated_text("ANY", phase=int(time.monotonic() * 8) + 4))
1502
+ add(stdscr, 6, 6, CREDITS, _style(bold=True) + cp(5))
1503
+
1504
+ right = split + 3
1505
+ add(stdscr, 1, right, "Tips for getting started", _style(bold=True) + cp(4))
1506
+ add(stdscr, 2, right, "Choose provider, model, base URL, and API key before launch.", cp(5))
1507
+ add(stdscr, 3, right, "Routes Claude Code to Anthropic, Ollama, vLLM, Nvidia, or NIM.", cp(5))
1508
+ add(stdscr, 4, right, "Adds DuckDuckGo web search tooling for non-native providers.", cp(5))
1509
+ add(stdscr, 5, right, "Use --ca-* flags for headless runs; Claude flags pass through.", cp(5))
1510
+ else:
1511
+ add(stdscr, 1, 3, f"{APP_NAME} routes Claude Code through selectable providers.", _style(bold=True) + cp(5))
1512
+ add(stdscr, 2, 3, "Anthropic, Ollama, vLLM, Nvidia Hosted, and self-hosted NIM.", cp(5))
1513
+ add(stdscr, 3, 3, "DuckDuckGo web search is attached for non-native providers.", cp(5))
1514
+ add(stdscr, 4, 3, "Headless setup uses --ca-* flags; Claude flags pass through.", cp(5))
1515
+ if panel_h > 6:
1516
+ add(stdscr, 6, 3, CREDITS, _style(bold=True) + cp(3))
1517
+ else:
1518
+ add(stdscr, 5, 3, CREDITS, _style(bold=True) + cp(3))
1519
+
1520
+ return panel_h + 1
1521
+
1522
+
1523
+ def render(stdscr, idx: int, sub: dict | None, notice: list[str], checks: list[str]) -> dict[str, int]:
1524
+ lines = status_text()
1525
+ items = main_items()
1526
+ h, w = _term_size()
1527
+ _clear()
1528
+ top = draw_intro_panel(stdscr)
1529
+ status_count = 5 if h >= 28 else 4 if h >= 23 else 2
1530
+ for i, line in enumerate(lines[:status_count]):
1531
+ color = cp(2) if line.startswith("provider:") or line.startswith("model:") else cp(5)
1532
+ add(stdscr, top + i, 2, line, color)
1533
+
1534
+ row = top + status_count + 1
1535
+ row_by_action: dict[str, int] = {}
1536
+ sub_selected_row = -1
1537
+ submenu_budget = max(3, min(10, h - row - len(items) - len(checks) - 4))
1538
+ if sub and sub.get("kind") == "test-result":
1539
+ submenu_budget = max(4, min(10, h - row - len(items) - len(checks) - 3))
1540
+
1541
+ for i, (key, label) in enumerate(items):
1542
+ row_by_action[key] = row
1543
+ if row >= h - 3:
1544
+ break
1545
+ if i == idx and (sub is None or sub.get("readonly")):
1546
+ style = _style(reverse=True, bold=True)
1547
+ elif key == "launch":
1548
+ style = cp(2) + _style(bold=True)
1549
+ elif key == "test":
1550
+ style = cp(3) + _style(bold=True)
1551
+ elif key == "quit":
1552
+ style = cp(4)
1553
+ elif key in ("language", "provider", "model", "advisor-model", "ollama-options", "provider-options", "api-key", "base-url"):
1554
+ style = cp(3)
1555
+ else:
1556
+ style = ""
1557
+ _write_safe(row, 2, label[: max(0, w - 4)], style)
1558
+ row += 1
1559
+
1560
+ if sub and sub.get("parent") == key:
1561
+ start, end = visible_sub_window(sub, submenu_budget)
1562
+ if start > 0 and row < h - 3:
1563
+ _write_safe(row, 6, f"... {start} above", _style(dim=True) + cp(5))
1564
+ row += 1
1565
+ for si in range(start, end):
1566
+ if row >= h - 3:
1567
+ break
1568
+ item = sub["items"][si]
1569
+ if sub.get("kind") == "test-result":
1570
+ text = f" {item['label']}"
1571
+ if "FAIL" in item["label"] or "TIMEOUT" in item["label"] or item["label"].startswith(("HTTP:", "Reason:", "Diagnosis:")):
1572
+ style = cp(4) + _style(bold=True)
1573
+ elif "OK" in item["label"]:
1574
+ style = cp(2) + _style(bold=True)
1575
+ else:
1576
+ style = _style(dim=True) + cp(5)
1577
+ else:
1578
+ marker = "*" if item.get("current") else " "
1579
+ prefix = ">" if si == sub["idx"] else " "
1580
+ text = f"{prefix} {marker} {item['label']}"
1581
+ if si == sub["idx"]:
1582
+ style = _style(reverse=True, bold=True)
1583
+ sub_selected_row = row
1584
+ elif item.get("current"):
1585
+ style = cp(2) + _style(bold=True)
1586
+ elif "[OK]" in item["label"]:
1587
+ style = cp(2)
1588
+ elif "[FAIL" in item["label"] or "[TIMEOUT]" in item["label"]:
1589
+ style = cp(4) + _style(bold=True)
1590
+ else:
1591
+ style = _style(dim=True)
1592
+ if si == sub["idx"] and not sub.get("readonly"):
1593
+ style = _style(reverse=True, bold=True)
1594
+ sub_selected_row = row
1595
+ _write_safe(row, 4, text[: max(0, w - 6)], style)
1596
+ row += 1
1597
+ remaining = len(sub["items"]) - end
1598
+ if remaining > 0 and row < h - 3:
1599
+ _write_safe(row, 6, f"... {remaining} more", _style(dim=True) + cp(5))
1600
+ row += 1
1601
+
1602
+ desc = selected_sub_description(sub)
1603
+ if desc and row < h - 5:
1604
+ _write_safe(row, 2, ("-" * max(8, w - 4))[: max(0, w - 4)], _style(dim=True) + cp(6))
1605
+ row += 1
1606
+ for line in textwrap.wrap(desc, width=max(24, w - 6))[:2]:
1607
+ if row >= h - 4:
1608
+ break
1609
+ _write_safe(row, 2, line[: max(0, w - 4)], _style(bold=True) + cp(6))
1610
+ row += 1
1611
+
1612
+ if row < h - 4:
1613
+ _write_safe(row, 2, ("-" * max(8, w - 4))[: max(0, w - 4)], _style(dim=True) + cp(6))
1614
+ row += 1
1615
+ for line in checks[: max(0, h - row - 3)]:
1616
+ _write_safe(row, 2, line[: max(0, w - 4)], _style(bold=True) + cp(6))
1617
+ row += 1
1618
+
1619
+ if notice:
1620
+ y = max(0, h - 5 - min(len(notice), 2))
1621
+ for j, line in enumerate(notice[:2]):
1622
+ _write_safe(y + j, 0, line[: w - 1], cp(2) if j == 0 else _style(dim=True))
1623
+
1624
+ current_action = items[idx][0]
1625
+ _write_safe(h - 2, 0, help_for_action(current_action, sub.get("kind") if sub else None)[: w - 1], _style(dim=True) + cp(5))
1626
+ sys.stdout.flush()
1627
+ row_by_action["__sub_selected__"] = sub_selected_row
1628
+ return row_by_action
1629
+
1630
+
1631
+ def main() -> int:
1632
+ init_colors()
1633
+ idx = index_for_action("launch") if settings_ready_except_api_key() else 0
1634
+ sub: dict | None = None
1635
+ notice: list[str] = []
1636
+ checks = preflight_checks()
1637
+ row_by_action: dict[str, int] = {}
1638
+
1639
+ def apply_test_result(code: int, out: str) -> None:
1640
+ nonlocal sub, notice, checks, idx
1641
+ ok = code == 0
1642
+ sub = test_submenu(summarize_test_output(code, out))
1643
+ if ok:
1644
+ notice = [t("test_passed")]
1645
+ elif "TIMEOUT" in out.upper() or "timed out" in out.lower():
1646
+ notice = ["Compatibility test timed out. The provider or model took too long to respond."]
1647
+ else:
1648
+ notice = [t("test_failed")]
1649
+ checks = preflight_checks()
1650
+ idx = index_for_action("launch" if ok else "model")
1651
+
1652
+ while True:
1653
+ items = main_items()
1654
+ idx = max(0, min(idx, len(items) - 1))
1655
+ row_by_action = render(None, idx, sub, notice, checks)
1656
+ ch = read_menu_key()
1657
+
1658
+ if sub and sub.get("readonly"):
1659
+ if ch in ("KEY_ESC", "q"):
1660
+ sub = None
1661
+ notice = []
1662
+ continue
1663
+ if ch in ("KEY_UP", "k"):
1664
+ notice = []
1665
+ idx = (idx - 1) % len(items)
1666
+ continue
1667
+ if ch in ("KEY_DOWN", "j"):
1668
+ notice = []
1669
+ idx = (idx + 1) % len(items)
1670
+ continue
1671
+ if ch == "KEY_ENTER":
1672
+ action = items[idx][0]
1673
+ if action == "launch":
1674
+ return 0
1675
+ if action == "test":
1676
+ code, out = run_test_with_animation(idx, checks)
1677
+ apply_test_result(code, out)
1678
+ continue
1679
+ sub = None
1680
+ else:
1681
+ continue
1682
+
1683
+ if sub:
1684
+ if ch in ("KEY_ESC", "q"):
1685
+ sub = None
1686
+ notice = []
1687
+ checks = preflight_checks()
1688
+ continue
1689
+ if ch in ("KEY_UP", "k"):
1690
+ notice = []
1691
+ sub["idx"] = (sub["idx"] - 1) % len(sub["items"])
1692
+ provider_preview = selected_provider_value(sub)
1693
+ if provider_preview:
1694
+ checks = provider_preview_checks(provider_preview)
1695
+ continue
1696
+ if ch in ("KEY_DOWN", "j"):
1697
+ notice = []
1698
+ sub["idx"] = (sub["idx"] + 1) % len(sub["items"])
1699
+ provider_preview = selected_provider_value(sub)
1700
+ if provider_preview:
1701
+ checks = provider_preview_checks(provider_preview)
1702
+ continue
1703
+ if ch == "KEY_NPAGE":
1704
+ sub["idx"] = min(len(sub["items"]) - 1, sub["idx"] + 10)
1705
+ provider_preview = selected_provider_value(sub)
1706
+ if provider_preview:
1707
+ checks = provider_preview_checks(provider_preview)
1708
+ continue
1709
+ if ch == "KEY_PPAGE":
1710
+ sub["idx"] = max(0, sub["idx"] - 10)
1711
+ provider_preview = selected_provider_value(sub)
1712
+ if provider_preview:
1713
+ checks = provider_preview_checks(provider_preview)
1714
+ continue
1715
+ if ch == "KEY_ENTER":
1716
+ item = sub["items"][sub["idx"]]
1717
+ if sub["kind"] == "language":
1718
+ _, out = run_cmd([CTL, "language", item["value"]])
1719
+ notice = (out.strip().splitlines() or [item["value"]])[:2]
1720
+ checks = preflight_checks()
1721
+ sub = None
1722
+ idx = index_for_action("provider")
1723
+ elif sub["kind"] == "provider":
1724
+ _, out = run_cmd([CTL, "provider", item["value"]])
1725
+ notice = (out.strip().splitlines() or [item["value"]])[:2]
1726
+ checks = preflight_checks()
1727
+ sub = None
1728
+ idx = index_for_action("api-key")
1729
+ elif sub["kind"] == "api-key":
1730
+ row = row_by_action.get("__sub_selected__", row_by_action.get("api-key", 10))
1731
+ key = inline_secret_prompt(None, f"API key for {item['value']}: ", row)
1732
+ if key:
1733
+ _, out = run_cmd([CTL, "set-api-key", item["value"], key])
1734
+ notice = (out.strip().splitlines() or [item["value"]])[:2]
1735
+ checks = preflight_checks()
1736
+ idx = index_for_action("base-url")
1737
+ else:
1738
+ notice = [t("api_key_unchanged")]
1739
+ sub = None
1740
+ elif sub["kind"] == "model":
1741
+ if item["value"] == "__custom__":
1742
+ row = row_by_action.get("__sub_selected__", row_by_action.get("model", 10))
1743
+ value = inline_prompt(None, "Model id or alias: ", row)
1744
+ if value:
1745
+ _, out = run_cmd([CTL, "model", value])
1746
+ notice = (out.strip().splitlines() or [value])[:2]
1747
+ checks = preflight_checks()
1748
+ idx = index_for_action(after_model_action())
1749
+ sub = None
1750
+ else:
1751
+ _, out = run_cmd([CTL, "model", item["value"]])
1752
+ notice = (out.strip().splitlines() or [item["value"]])[:2]
1753
+ checks = preflight_checks()
1754
+ sub = None
1755
+ idx = index_for_action(after_model_action())
1756
+ elif sub["kind"] == "advisor-model":
1757
+ row = row_by_action.get("__sub_selected__", row_by_action.get("advisor-model", 10))
1758
+ if item["value"] == "__custom__":
1759
+ value = inline_prompt(None, "Advisor model id: ", row, "deepseek-v4-pro")
1760
+ else:
1761
+ value = item["value"] or "off"
1762
+ _, out = run_cmd([CTL, "advisor-model", value])
1763
+ notice = (out.strip().splitlines() or [value])[:2]
1764
+ checks = preflight_checks()
1765
+ sub = None
1766
+ idx = index_for_action("ollama-options" if is_ollama_provider(current_provider()) else ("provider-options" if has_provider_options(current_provider()) else "test"))
1767
+ elif sub["kind"] == "ollama-options":
1768
+ provider = current_provider()
1769
+ row = row_by_action.get("__sub_selected__", row_by_action.get("ollama-options", 10))
1770
+ provider_now, pcfg_now = current_provider_cfg()
1771
+ opts_now = pcfg_now.get("ollama_options") or {}
1772
+ if not isinstance(opts_now, dict):
1773
+ opts_now = {}
1774
+ action_value = item["value"]
1775
+ value = ""
1776
+ if action_value == "__edit_num_ctx__":
1777
+ default = str(pcfg_now.get("num_ctx", "auto"))
1778
+ entered = inline_prompt(None, "num_ctx (auto or integer): ", row, default)
1779
+ value = f"num_ctx={entered}" if entered else ""
1780
+ elif action_value == "__edit_min__":
1781
+ default = str(pcfg_now.get("num_ctx_min", 32768))
1782
+ entered = inline_prompt(None, "num_ctx auto minimum: ", row, default)
1783
+ value = f"min={entered}" if entered else ""
1784
+ elif action_value == "__edit_max__":
1785
+ default = str(pcfg_now.get("num_ctx_max", 131072))
1786
+ entered = inline_prompt(None, "num_ctx auto maximum: ", row, default)
1787
+ value = f"max={entered}" if entered else ""
1788
+ elif action_value == "__edit_keep_alive__":
1789
+ default = str(pcfg_now.get("keep_alive", "5m"))
1790
+ entered = inline_prompt(None, "keep_alive: ", row, default)
1791
+ value = f"keep_alive={entered}" if entered else ""
1792
+ elif action_value == "__edit_temperature__":
1793
+ default = str(opts_now.get("temperature", "0.7"))
1794
+ entered = inline_prompt(None, "temperature (unset:temperature clears): ", row, default)
1795
+ value = entered if entered.startswith("unset:") else (f"temperature={entered}" if entered else "")
1796
+ elif action_value == "__edit_top_p__":
1797
+ default = str(opts_now.get("top_p", "0.8"))
1798
+ entered = inline_prompt(None, "top_p (unset:top_p clears): ", row, default)
1799
+ value = entered if entered.startswith("unset:") else (f"top_p={entered}" if entered else "")
1800
+ elif action_value == "__edit_max_tokens__":
1801
+ default = str(opts_now.get("num_predict", "4096"))
1802
+ entered = inline_prompt(None, "max_tokens / num_predict: ", row, default)
1803
+ value = f"max_tokens={entered}" if entered else ""
1804
+ elif action_value == "__edit_timeout__":
1805
+ default = str(pcfg_now.get("request_timeout_ms", "300000"))
1806
+ entered = inline_prompt(None, "timeout ms: ", row, default)
1807
+ value = f"timeout={entered}" if entered else ""
1808
+ elif action_value == "__edit_rate_limit__":
1809
+ default = str(pcfg_now.get("rate_limit_rpm", "40"))
1810
+ entered = inline_prompt(None, "rate_limit_rpm (0 disables): ", row, default)
1811
+ value = f"rate_limit_rpm={entered}" if entered else ""
1812
+ elif action_value == "__custom__":
1813
+ value = inline_prompt(None, "Ollama option KEY=VALUE: ", row, "temperature=0.7")
1814
+ else:
1815
+ value = action_value
1816
+ if value:
1817
+ _, out = run_cmd([CTL, "ollama-options", provider, value])
1818
+ notice = (out.strip().splitlines() or [value])[:2]
1819
+ checks = preflight_checks()
1820
+ idx = index_for_action("test")
1821
+ sub = None
1822
+ elif sub["kind"] == "provider-options":
1823
+ provider = current_provider()
1824
+ row = row_by_action.get("__sub_selected__", row_by_action.get("provider-options", 10))
1825
+ provider_now, pcfg_now = current_provider_cfg()
1826
+ action_value = item["value"]
1827
+ value = ""
1828
+ if action_value == "__edit_context_window__":
1829
+ default = str(pcfg_now.get("context_window", "32768"))
1830
+ entered = inline_prompt(None, "context_window: ", row, default)
1831
+ value = f"context_window={entered}" if entered else ""
1832
+ elif action_value == "__edit_reserve__":
1833
+ default = str(pcfg_now.get("context_reserve_tokens", "1024"))
1834
+ entered = inline_prompt(None, "context_reserve_tokens: ", row, default)
1835
+ value = f"context_reserve_tokens={entered}" if entered else ""
1836
+ elif action_value == "__edit_max_output__":
1837
+ default = str(pcfg_now.get("max_output_tokens", "4096"))
1838
+ entered = inline_prompt(None, "max_output_tokens: ", row, default)
1839
+ value = f"max_output_tokens={entered}" if entered else ""
1840
+ elif action_value == "__edit_timeout__":
1841
+ default = str(pcfg_now.get("request_timeout_ms", "300000"))
1842
+ entered = inline_prompt(None, "timeout ms: ", row, default)
1843
+ value = f"timeout={entered}" if entered else ""
1844
+ elif action_value == "__edit_native__":
1845
+ default = "true" if pcfg_now.get("native_compat", True) else "false"
1846
+ entered = inline_prompt(None, "native true/false: ", row, default)
1847
+ value = f"native={entered}" if entered else ""
1848
+ elif action_value == "__custom__":
1849
+ value = inline_prompt(None, "Provider option KEY=VALUE: ", row, "max_output_tokens=4096")
1850
+ else:
1851
+ value = action_value
1852
+ if value:
1853
+ _, out = run_cmd([CTL, "provider-options", provider, value])
1854
+ notice = (out.strip().splitlines() or [value])[:2]
1855
+ checks = preflight_checks()
1856
+ idx = index_for_action("test")
1857
+ sub = None
1858
+ continue
1859
+ continue
1860
+
1861
+ if ch in ("KEY_ESC", "q"):
1862
+ return 10
1863
+ if ch in ("KEY_UP", "k"):
1864
+ notice = []
1865
+ idx = (idx - 1) % len(items)
1866
+ continue
1867
+ if ch in ("KEY_DOWN", "j"):
1868
+ notice = []
1869
+ idx = (idx + 1) % len(items)
1870
+ continue
1871
+ if ch != "KEY_ENTER":
1872
+ continue
1873
+
1874
+ action = items[idx][0]
1875
+ if action == "launch":
1876
+ return 0
1877
+ if action == "test":
1878
+ code, out = run_test_with_animation(idx, checks)
1879
+ apply_test_result(code, out)
1880
+ continue
1881
+ if action == "quit":
1882
+ return 10
1883
+ if action == "language":
1884
+ sub = build_language_submenu()
1885
+ notice = []
1886
+ elif action == "provider":
1887
+ sub = build_provider_submenu()
1888
+ notice = []
1889
+ provider_preview = selected_provider_value(sub)
1890
+ if provider_preview:
1891
+ checks = provider_preview_checks(provider_preview)
1892
+ elif action == "model":
1893
+ notice = [t("loading_models")]
1894
+ render(None, idx, None, notice, checks)
1895
+ sub, fallback_notice = build_model_submenu()
1896
+ notice = fallback_notice
1897
+ if sub is None:
1898
+ row = row_by_action.get("model", 10)
1899
+ value = inline_prompt(None, "Model id or alias: ", row)
1900
+ if value:
1901
+ _, out = run_cmd([CTL, "model", value])
1902
+ notice = (out.strip().splitlines() or [value])[:2]
1903
+ checks = preflight_checks()
1904
+ idx = index_for_action(after_model_action())
1905
+ elif action == "advisor-model":
1906
+ notice = []
1907
+ sub = build_advisor_model_submenu()
1908
+ elif action == "ollama-options":
1909
+ provider = current_provider()
1910
+ if is_ollama_provider(provider):
1911
+ sub = build_ollama_options_submenu()
1912
+ notice = []
1913
+ else:
1914
+ notice = ["Ollama options are available only for ollama and ollama-cloud."]
1915
+ elif action == "provider-options":
1916
+ provider = current_provider()
1917
+ if has_provider_options(provider):
1918
+ sub = build_provider_options_submenu()
1919
+ notice = []
1920
+ else:
1921
+ notice = ["Provider options are available for vLLM, NVIDIA hosted, and self-hosted NIM."]
1922
+ elif action == "api-key":
1923
+ provider = current_provider()
1924
+ row = row_by_action.get("api-key", 10)
1925
+ key = inline_secret_prompt(None, f"API key for {provider}: ", row)
1926
+ if key:
1927
+ _, out = run_cmd([CTL, "set-api-key", provider, key])
1928
+ notice = (out.strip().splitlines() or [provider])[:2]
1929
+ checks = preflight_checks()
1930
+ else:
1931
+ notice = [t("api_key_unchanged")]
1932
+ idx = index_for_action("base-url")
1933
+ elif action == "base-url":
1934
+ provider = current_provider()
1935
+ row = row_by_action.get("base-url", 12)
1936
+ value = inline_prompt(None, f"Base URL for {provider}: ", row, default_base_url(provider))
1937
+ if value:
1938
+ _, out = run_cmd([CTL, "base-url", provider, value])
1939
+ notice = (out.strip().splitlines() or [value])[:2]
1940
+ checks = preflight_checks()
1941
+ idx = index_for_action("model")
1942
+
1943
+
1944
+ if __name__ == "__main__":
1945
+ try:
1946
+ with _RawTerminal():
1947
+ raise SystemExit(main())
1948
+ except KeyboardInterrupt:
1949
+ raise SystemExit(10)