@oneciel-ai/claude-any 0.1.38 → 0.1.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -1
- package/claude_any.py +231 -97
- package/docs/README.ja.md +25 -1
- package/docs/README.ko.md +25 -1
- package/docs/README.zh.md +24 -1
- package/docs/manual.md +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -48,7 +48,7 @@ arguments through unchanged.
|
|
|
48
48
|
|
|
49
49
|
Credits: One Ciel LLC
|
|
50
50
|
|
|
51
|
-
Current version: `0.1.
|
|
51
|
+
Current version: `0.1.42`
|
|
52
52
|
|
|
53
53
|
## Why This Exists
|
|
54
54
|
|
|
@@ -381,6 +381,30 @@ steps under that larger model's supervision.
|
|
|
381
381
|
|
|
382
382
|
## Changelog
|
|
383
383
|
|
|
384
|
+
### 0.1.42
|
|
385
|
+
|
|
386
|
+
- **Live stream progress**: the statusline now updates streamed upstream output
|
|
387
|
+
progress with formatted input/output token estimates and chunk counts.
|
|
388
|
+
|
|
389
|
+
### 0.1.41
|
|
390
|
+
|
|
391
|
+
- **Statusline formatting**: upstream token counts now use thousands separators
|
|
392
|
+
and a space before `tok`, for example `27,501 tok`.
|
|
393
|
+
|
|
394
|
+
### 0.1.40
|
|
395
|
+
|
|
396
|
+
- **RPM 0 is preserved**: setting `rate_limit_rpm=0` now stores an explicit
|
|
397
|
+
unlimited mode instead of falling back to the provider default.
|
|
398
|
+
|
|
399
|
+
### 0.1.39
|
|
400
|
+
|
|
401
|
+
- **Menu input fixes**: restores terminal line/echo mode before text or number
|
|
402
|
+
prompts, so typed numeric values are visible in the prelaunch UI.
|
|
403
|
+
- **Safer numeric validation**: invalid numeric option input now shows an
|
|
404
|
+
inline message instead of crashing the menu.
|
|
405
|
+
- **Preset visibility**: applied presets report the effective context, reserve,
|
|
406
|
+
output, and timeout values.
|
|
407
|
+
|
|
384
408
|
### 0.1.38
|
|
385
409
|
|
|
386
410
|
- **User-selected context windows**: removes the NVIDIA hosted 32K safety cap.
|
package/claude_any.py
CHANGED
|
@@ -85,7 +85,7 @@ PROVIDER_LABELS = {
|
|
|
85
85
|
"self-hosted-nim": "Self Hosted NIM",
|
|
86
86
|
}
|
|
87
87
|
APP_NAME = "Claude Any"
|
|
88
|
-
VERSION = "0.1.
|
|
88
|
+
VERSION = "0.1.42"
|
|
89
89
|
CREDITS = "Credits: One Ciel LLC"
|
|
90
90
|
|
|
91
91
|
LOG_LEVELS = {"SILENT": 0, "ERROR": 1, "WARN": 2, "INFO": 3, "DEBUG": 4, "TRACE": 5}
|
|
@@ -1339,7 +1339,22 @@ def main():
|
|
|
1339
1339
|
tokens = activity.get("tokens")
|
|
1340
1340
|
rpm_text += f" | upstream {age:.0f}s"
|
|
1341
1341
|
if tokens:
|
|
1342
|
-
|
|
1342
|
+
try:
|
|
1343
|
+
rpm_text += f" {int(tokens):,} tok"
|
|
1344
|
+
except Exception:
|
|
1345
|
+
rpm_text += f" {tokens} tok"
|
|
1346
|
+
output_tokens = activity.get("output_tokens")
|
|
1347
|
+
if output_tokens:
|
|
1348
|
+
try:
|
|
1349
|
+
rpm_text += f" -> {int(output_tokens):,} tok"
|
|
1350
|
+
except Exception:
|
|
1351
|
+
rpm_text += f" -> {output_tokens} tok"
|
|
1352
|
+
chunks = activity.get("chunks")
|
|
1353
|
+
if chunks:
|
|
1354
|
+
try:
|
|
1355
|
+
rpm_text += f" ({int(chunks):,} chunks)"
|
|
1356
|
+
except Exception:
|
|
1357
|
+
rpm_text += f" ({chunks} chunks)"
|
|
1343
1358
|
elif event in ("success", "error"):
|
|
1344
1359
|
rpm_text += f" | {event} {age:.0f}s"
|
|
1345
1360
|
print(f"{left} | {color(rpm_text)}")
|
|
@@ -2207,8 +2222,10 @@ def router_rate_limit_recent(timestamps: Any, now: float, window: float, *, incl
|
|
|
2207
2222
|
|
|
2208
2223
|
def router_rate_limit_usage(provider: str, pcfg: dict[str, Any], model: str | None = None) -> tuple[int, int | None]:
|
|
2209
2224
|
rpm = router_rate_limit_effective_rpm(provider, pcfg, model)
|
|
2210
|
-
if rpm is None:
|
|
2211
|
-
return 0, None
|
|
2225
|
+
if rpm is None:
|
|
2226
|
+
return 0, None
|
|
2227
|
+
if rpm == 0:
|
|
2228
|
+
return 0, 0
|
|
2212
2229
|
key = router_rate_limit_key(provider, pcfg, model)
|
|
2213
2230
|
now = time.time()
|
|
2214
2231
|
try:
|
|
@@ -4697,6 +4714,8 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4697
4714
|
source_body: dict[str, Any] | None = None,
|
|
4698
4715
|
start_index: int = 0,
|
|
4699
4716
|
word_chunking: bool = False,
|
|
4717
|
+
input_tokens: int | None = None,
|
|
4718
|
+
input_bytes: int | None = None,
|
|
4700
4719
|
) -> None:
|
|
4701
4720
|
next_content_index = start_index
|
|
4702
4721
|
text_started = False
|
|
@@ -4709,6 +4728,8 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4709
4728
|
tool_fragments: dict[int, dict[str, Any]] = {}
|
|
4710
4729
|
output_tokens = 0
|
|
4711
4730
|
finish_reason = "stop"
|
|
4731
|
+
chunks_seen = 0
|
|
4732
|
+
last_activity_update = 0.0
|
|
4712
4733
|
|
|
4713
4734
|
def emit(event_name: str, payload: dict[str, Any]) -> None:
|
|
4714
4735
|
handler.wfile.write(f"event: {event_name}\ndata: {json.dumps(payload, ensure_ascii=False)}\n\n".encode())
|
|
@@ -4736,8 +4757,27 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4736
4757
|
{"type": "content_block_delta", "index": idx, "delta": {"type": "text_delta", "text": text}},
|
|
4737
4758
|
)
|
|
4738
4759
|
|
|
4760
|
+
def update_stream_activity(force: bool = False) -> None:
|
|
4761
|
+
nonlocal last_activity_update
|
|
4762
|
+
now = time.time()
|
|
4763
|
+
if not force and now - last_activity_update < 0.5:
|
|
4764
|
+
return
|
|
4765
|
+
last_activity_update = now
|
|
4766
|
+
estimated_output = output_tokens or max(0, len(text_so_far) // 4)
|
|
4767
|
+
write_router_activity(
|
|
4768
|
+
"request",
|
|
4769
|
+
provider,
|
|
4770
|
+
model,
|
|
4771
|
+
tokens=input_tokens,
|
|
4772
|
+
bytes=input_bytes,
|
|
4773
|
+
output_tokens=estimated_output,
|
|
4774
|
+
chunks=chunks_seen,
|
|
4775
|
+
stream=True,
|
|
4776
|
+
)
|
|
4777
|
+
|
|
4739
4778
|
try:
|
|
4740
4779
|
for raw_line in resp:
|
|
4780
|
+
chunks_seen += 1
|
|
4741
4781
|
line = raw_line.decode("utf-8", errors="ignore").strip()
|
|
4742
4782
|
if not line or line.startswith(":"):
|
|
4743
4783
|
continue
|
|
@@ -4789,6 +4829,7 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4789
4829
|
emit_text_delta(to_flush)
|
|
4790
4830
|
else:
|
|
4791
4831
|
emit_text_delta(text_chunk)
|
|
4832
|
+
update_stream_activity()
|
|
4792
4833
|
for call in delta.get("tool_calls") or []:
|
|
4793
4834
|
if not isinstance(call, dict):
|
|
4794
4835
|
continue
|
|
@@ -4804,6 +4845,8 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4804
4845
|
slot["name"] += str(fn.get("name"))
|
|
4805
4846
|
if fn.get("arguments"):
|
|
4806
4847
|
slot["arguments"] += str(fn.get("arguments"))
|
|
4848
|
+
update_stream_activity()
|
|
4849
|
+
update_stream_activity(force=True)
|
|
4807
4850
|
if word_chunking and text_buffer:
|
|
4808
4851
|
to_flush, text_buffer = _split_word_buffer(text_buffer, force=True)
|
|
4809
4852
|
emit_text_delta(to_flush)
|
|
@@ -5108,6 +5151,8 @@ def forward_openai_compatible_chat(handler: BaseHTTPRequestHandler, provider: st
|
|
|
5108
5151
|
model,
|
|
5109
5152
|
emit_retry_notice,
|
|
5110
5153
|
)
|
|
5154
|
+
req_tokens = estimate_tokens(req_body)
|
|
5155
|
+
req_bytes = len(json.dumps(req_body, ensure_ascii=False).encode("utf-8"))
|
|
5111
5156
|
stream_openai_chat_to_anthropic_sse(
|
|
5112
5157
|
handler,
|
|
5113
5158
|
resp,
|
|
@@ -5116,8 +5161,10 @@ def forward_openai_compatible_chat(handler: BaseHTTPRequestHandler, provider: st
|
|
|
5116
5161
|
source_body=body,
|
|
5117
5162
|
start_index=index,
|
|
5118
5163
|
word_chunking=bool(pcfg.get("stream_word_chunking", False)),
|
|
5164
|
+
input_tokens=req_tokens,
|
|
5165
|
+
input_bytes=req_bytes,
|
|
5119
5166
|
)
|
|
5120
|
-
write_router_activity("success", provider, model, tokens=
|
|
5167
|
+
write_router_activity("success", provider, model, tokens=req_tokens, bytes=req_bytes, stream=True)
|
|
5121
5168
|
except RuntimeError as exc:
|
|
5122
5169
|
msg = str(exc)
|
|
5123
5170
|
write_anthropic_stream_blocks(handler, [{"type": "text", "text": f"Upstream error: {msg}"}], index)
|
|
@@ -6282,16 +6329,38 @@ def apply_llm_preset_to_provider(provider: str, pcfg: dict[str, Any], preset_id:
|
|
|
6282
6329
|
f"{ui_text('apply_preset', lang)}: {label}",
|
|
6283
6330
|
f"Provider: {provider}; {ui_text('model_family', lang)}: {model_family_text(family, lang)}",
|
|
6284
6331
|
]
|
|
6285
|
-
if provider in ("vllm", "self-hosted-nim"):
|
|
6286
|
-
server_limit = upstream_model_context_limit(provider, pcfg)
|
|
6287
|
-
if server_limit:
|
|
6288
|
-
lines.append(f"Server max_model_len: {server_limit}")
|
|
6289
|
-
if preset_id in ("long-context-65k", "large-output") and server_limit < 65536:
|
|
6290
|
-
lines.append("Long-context preset requires restarting the server with --max-model-len 65536 or higher.")
|
|
6291
|
-
lines.append("Client settings were capped to the server-reported context length.")
|
|
6292
|
-
elif preset_id in ("long-context-65k", "large-output"):
|
|
6293
|
-
lines.append("Could not verify server max_model_len; vLLM/NIM must be started with a matching context limit.")
|
|
6294
|
-
|
|
6332
|
+
if provider in ("vllm", "nvidia-hosted", "self-hosted-nim"):
|
|
6333
|
+
server_limit = upstream_model_context_limit(provider, pcfg)
|
|
6334
|
+
if server_limit:
|
|
6335
|
+
lines.append(f"Server max_model_len: {server_limit}")
|
|
6336
|
+
if preset_id in ("long-context-65k", "large-output") and server_limit < 65536:
|
|
6337
|
+
lines.append("Long-context preset requires restarting the server with --max-model-len 65536 or higher.")
|
|
6338
|
+
lines.append("Client settings were capped to the server-reported context length.")
|
|
6339
|
+
elif preset_id in ("long-context-65k", "large-output"):
|
|
6340
|
+
lines.append("Could not verify server max_model_len; vLLM/NIM must be started with a matching context limit.")
|
|
6341
|
+
if provider in ("vllm", "nvidia-hosted", "self-hosted-nim"):
|
|
6342
|
+
lines.append(
|
|
6343
|
+
"Applied options: "
|
|
6344
|
+
f"context_window={pcfg.get('context_window', 'default')}, "
|
|
6345
|
+
f"reserve={pcfg.get('context_reserve_tokens', 'default')}, "
|
|
6346
|
+
f"max_output_tokens={pcfg.get('max_output_tokens', 'default')}, "
|
|
6347
|
+
f"timeout={pcfg.get('request_timeout_ms', 'default')}ms"
|
|
6348
|
+
)
|
|
6349
|
+
elif provider in ("ollama", "ollama-cloud"):
|
|
6350
|
+
opts = ollama_extra_options(pcfg)
|
|
6351
|
+
lines.append(
|
|
6352
|
+
"Applied options: "
|
|
6353
|
+
f"num_ctx={ollama_num_ctx_status(pcfg)}, "
|
|
6354
|
+
f"num_predict={opts.get('num_predict', 'default')}, "
|
|
6355
|
+
f"timeout={pcfg.get('request_timeout_ms', 'default')}ms"
|
|
6356
|
+
)
|
|
6357
|
+
elif provider == "anthropic":
|
|
6358
|
+
lines.append(
|
|
6359
|
+
"Applied options: "
|
|
6360
|
+
f"max_output_tokens={pcfg.get('max_output_tokens', 'default')}, "
|
|
6361
|
+
f"timeout={pcfg.get('request_timeout_ms', 'default')}ms"
|
|
6362
|
+
)
|
|
6363
|
+
return lines
|
|
6295
6364
|
|
|
6296
6365
|
|
|
6297
6366
|
def apply_llm_preset_config(provider: str, preset_id: str) -> list[str]:
|
|
@@ -6489,9 +6558,9 @@ def llm_option_panel_rows(provider: str, pcfg: dict[str, Any], lang: str | None
|
|
|
6489
6558
|
add("Rate limit RPM", "rate_limit_rpm", pcfg.get("rate_limit_rpm", 40))
|
|
6490
6559
|
add("Rate limit status", "rate_limit_status", "on" if bool(pcfg.get("rate_limit_status", True)) else "off")
|
|
6491
6560
|
else:
|
|
6492
|
-
if provider in ("vllm", "self-hosted-nim"):
|
|
6493
|
-
add("Context window", "context_window", pcfg.get("context_window", "default"))
|
|
6494
|
-
add("Context reserve", "context_reserve_tokens", pcfg.get("context_reserve_tokens", "default"))
|
|
6561
|
+
if provider in ("vllm", "nvidia-hosted", "self-hosted-nim"):
|
|
6562
|
+
add("Context window", "context_window", pcfg.get("context_window", "default"))
|
|
6563
|
+
add("Context reserve", "context_reserve_tokens", pcfg.get("context_reserve_tokens", "default"))
|
|
6495
6564
|
add("Max output tokens", "max_output_tokens", pcfg.get("max_output_tokens", "default"))
|
|
6496
6565
|
if provider in ("vllm", "nvidia-hosted", "self-hosted-nim"):
|
|
6497
6566
|
add("Timeout ms", "request_timeout_ms", pcfg.get("request_timeout_ms", "default"))
|
|
@@ -6532,13 +6601,38 @@ def llm_option_prompt_default(provider: str, pcfg: dict[str, Any], key: str) ->
|
|
|
6532
6601
|
return "" if value is None else str(value)
|
|
6533
6602
|
|
|
6534
6603
|
|
|
6535
|
-
def set_llm_option_config(provider: str, key: str, raw_value: str) -> list[str]:
|
|
6604
|
+
def set_llm_option_config(provider: str, key: str, raw_value: str) -> list[str]:
|
|
6536
6605
|
cfg = load_config()
|
|
6537
6606
|
pcfg = cfg["providers"][provider]
|
|
6538
|
-
value = raw_value.strip()
|
|
6539
|
-
if not value:
|
|
6540
|
-
return ["Option unchanged."]
|
|
6541
|
-
|
|
6607
|
+
value = raw_value.strip()
|
|
6608
|
+
if not value:
|
|
6609
|
+
return ["Option unchanged."]
|
|
6610
|
+
numeric_keys = {
|
|
6611
|
+
"context_window",
|
|
6612
|
+
"context",
|
|
6613
|
+
"max_model_len",
|
|
6614
|
+
"context_reserve_tokens",
|
|
6615
|
+
"reserve",
|
|
6616
|
+
"max_output_tokens",
|
|
6617
|
+
"max_tokens",
|
|
6618
|
+
"maxtoken",
|
|
6619
|
+
"max_token",
|
|
6620
|
+
"num_ctx_min",
|
|
6621
|
+
"num_ctx_max",
|
|
6622
|
+
"num_predict",
|
|
6623
|
+
"timeout",
|
|
6624
|
+
"timeout_ms",
|
|
6625
|
+
"request_timeout",
|
|
6626
|
+
"request_timeout_ms",
|
|
6627
|
+
"rate_limit",
|
|
6628
|
+
"rate_limit_rpm",
|
|
6629
|
+
"rpm",
|
|
6630
|
+
"top_k",
|
|
6631
|
+
}
|
|
6632
|
+
if key in numeric_keys and value.lower() not in ("default", "unset", "none", "null", "0"):
|
|
6633
|
+
if not re.fullmatch(r"\d+", value):
|
|
6634
|
+
return [f"{key}: enter digits only, or use default/unset to clear."]
|
|
6635
|
+
clear_words = ("default", "unset", "none", "null")
|
|
6542
6636
|
token = f"unset:{key}" if value.lower() in clear_words else f"{key}={value}"
|
|
6543
6637
|
if provider in ("ollama", "ollama-cloud"):
|
|
6544
6638
|
apply_ollama_option(pcfg, token)
|
|
@@ -6616,11 +6710,11 @@ def apply_provider_option(provider: str, pcfg: dict[str, Any], token: str) -> No
|
|
|
6616
6710
|
raise SystemExit("timeout must be a positive integer; values above 10000 are treated as milliseconds")
|
|
6617
6711
|
pcfg["request_timeout_ms"] = fixed if key.endswith("_ms") or fixed > 10000 else fixed * 1000
|
|
6618
6712
|
return
|
|
6619
|
-
if key in ("rate_limit", "rate_limit_rpm", "rpm"):
|
|
6620
|
-
fixed = positive_int(value)
|
|
6621
|
-
if value in (0, "0", False, None):
|
|
6622
|
-
pcfg
|
|
6623
|
-
return
|
|
6713
|
+
if key in ("rate_limit", "rate_limit_rpm", "rpm"):
|
|
6714
|
+
fixed = positive_int(value)
|
|
6715
|
+
if value in (0, "0", False, None):
|
|
6716
|
+
pcfg["rate_limit_rpm"] = 0
|
|
6717
|
+
return
|
|
6624
6718
|
if not fixed:
|
|
6625
6719
|
raise SystemExit("rate_limit_rpm must be a positive integer, or 0/unset to disable")
|
|
6626
6720
|
pcfg["rate_limit_rpm"] = fixed
|
|
@@ -8116,14 +8210,16 @@ def render_prelaunch_screen(
|
|
|
8116
8210
|
return False
|
|
8117
8211
|
|
|
8118
8212
|
|
|
8119
|
-
def prompt_menu_value(prompt: str, default: str = "", secret: bool = False) -> str:
|
|
8120
|
-
label = f"{prompt}"
|
|
8121
|
-
if default:
|
|
8122
|
-
label += f" [{default}]"
|
|
8123
|
-
label += ": "
|
|
8124
|
-
if
|
|
8125
|
-
|
|
8126
|
-
|
|
8213
|
+
def prompt_menu_value(prompt: str, default: str = "", secret: bool = False, restore_tty: Callable[[], None] | None = None, raw_tty: Callable[[], None] | None = None) -> str:
|
|
8214
|
+
label = f"{prompt}"
|
|
8215
|
+
if default:
|
|
8216
|
+
label += f" [{default}]"
|
|
8217
|
+
label += ": "
|
|
8218
|
+
if restore_tty:
|
|
8219
|
+
restore_tty()
|
|
8220
|
+
if sys.stdout.isatty():
|
|
8221
|
+
sys.stdout.write("\033[?25h")
|
|
8222
|
+
sys.stdout.flush()
|
|
8127
8223
|
sys.stdout.write("\n" + ansi(label, "1;38;5;208"))
|
|
8128
8224
|
sys.stdout.flush()
|
|
8129
8225
|
try:
|
|
@@ -8131,12 +8227,14 @@ def prompt_menu_value(prompt: str, default: str = "", secret: bool = False) -> s
|
|
|
8131
8227
|
value = getpass.getpass("")
|
|
8132
8228
|
else:
|
|
8133
8229
|
value = input()
|
|
8134
|
-
finally:
|
|
8135
|
-
if sys.stdout.isatty():
|
|
8136
|
-
sys.stdout.write("\033[?25l")
|
|
8137
|
-
sys.stdout.flush()
|
|
8138
|
-
|
|
8139
|
-
|
|
8230
|
+
finally:
|
|
8231
|
+
if sys.stdout.isatty():
|
|
8232
|
+
sys.stdout.write("\033[?25l")
|
|
8233
|
+
sys.stdout.flush()
|
|
8234
|
+
if raw_tty:
|
|
8235
|
+
raw_tty()
|
|
8236
|
+
value = value.strip()
|
|
8237
|
+
return value or default
|
|
8140
8238
|
|
|
8141
8239
|
|
|
8142
8240
|
def portable_provider_menu() -> int:
|
|
@@ -8168,22 +8266,23 @@ def portable_prelaunch_menu() -> int:
|
|
|
8168
8266
|
enable_ansi()
|
|
8169
8267
|
main_idx = 7 if settings_ready_except_api_key() else 0
|
|
8170
8268
|
panel: str | None = None
|
|
8171
|
-
panel_idx = 0
|
|
8172
|
-
panel_rows: list[str] = []
|
|
8173
|
-
panel_values: list[str] = []
|
|
8269
|
+
panel_idx = 0
|
|
8270
|
+
panel_rows: list[str] = []
|
|
8271
|
+
panel_values: list[str] = []
|
|
8272
|
+
panel_last_idx: dict[str, int] = {}
|
|
8174
8273
|
checks = preflight_lines()
|
|
8175
8274
|
messages: list[str] = []
|
|
8176
8275
|
first_render = True
|
|
8177
8276
|
|
|
8178
|
-
def open_panel(name: str) -> None:
|
|
8179
|
-
nonlocal panel, panel_idx, panel_rows, panel_values, messages, first_render
|
|
8180
|
-
cfg = load_config()
|
|
8181
|
-
provider, pcfg = get_current_provider(cfg)
|
|
8182
|
-
panel = name
|
|
8183
|
-
panel_idx = 0
|
|
8184
|
-
if name == "language":
|
|
8185
|
-
panel_rows, panel_values = language_panel_rows(cfg)
|
|
8186
|
-
panel_idx = panel_values.index(cfg.get("language", "en"))
|
|
8277
|
+
def open_panel(name: str) -> None:
|
|
8278
|
+
nonlocal panel, panel_idx, panel_rows, panel_values, messages, first_render
|
|
8279
|
+
cfg = load_config()
|
|
8280
|
+
provider, pcfg = get_current_provider(cfg)
|
|
8281
|
+
panel = name
|
|
8282
|
+
panel_idx = panel_last_idx.get(name, 0)
|
|
8283
|
+
if name == "language":
|
|
8284
|
+
panel_rows, panel_values = language_panel_rows(cfg)
|
|
8285
|
+
panel_idx = panel_values.index(cfg.get("language", "en"))
|
|
8187
8286
|
elif name == "provider":
|
|
8188
8287
|
panel_rows, panel_values = provider_panel_rows(cfg)
|
|
8189
8288
|
panel_idx = panel_values.index(provider)
|
|
@@ -8205,15 +8304,19 @@ def portable_prelaunch_menu() -> int:
|
|
|
8205
8304
|
panel_rows, panel_values = [f"Advisor model list failed: {type(exc).__name__}: {exc}", "+ Custom advisor model id..."], []
|
|
8206
8305
|
elif name == "test":
|
|
8207
8306
|
panel_rows, panel_values = ["Run compatibility test", "Back"], ["run", "back"]
|
|
8208
|
-
elif name == "options":
|
|
8209
|
-
panel_rows, panel_values = llm_option_panel_rows(provider, pcfg, cfg.get("language", "en"))
|
|
8210
|
-
elif name == "preset":
|
|
8211
|
-
panel_rows, panel_values = llm_preset_panel_rows(provider, pcfg, cfg.get("language", "en"))
|
|
8212
|
-
|
|
8213
|
-
|
|
8214
|
-
|
|
8215
|
-
|
|
8216
|
-
panel_idx
|
|
8307
|
+
elif name == "options":
|
|
8308
|
+
panel_rows, panel_values = llm_option_panel_rows(provider, pcfg, cfg.get("language", "en"))
|
|
8309
|
+
elif name == "preset":
|
|
8310
|
+
panel_rows, panel_values = llm_preset_panel_rows(provider, pcfg, cfg.get("language", "en"))
|
|
8311
|
+
if panel_rows:
|
|
8312
|
+
panel_idx = max(0, min(panel_idx, len(panel_rows) - 1))
|
|
8313
|
+
|
|
8314
|
+
def close_panel(next_idx: int | None = None) -> None:
|
|
8315
|
+
nonlocal panel, panel_idx, panel_rows, panel_values, main_idx
|
|
8316
|
+
if panel:
|
|
8317
|
+
panel_last_idx[panel] = panel_idx
|
|
8318
|
+
panel = None
|
|
8319
|
+
panel_idx = 0
|
|
8217
8320
|
panel_rows = []
|
|
8218
8321
|
panel_values = []
|
|
8219
8322
|
if next_idx is not None:
|
|
@@ -8236,20 +8339,43 @@ def portable_prelaunch_menu() -> int:
|
|
|
8236
8339
|
termios.tcsetattr(fd, termios.TCSANOW, new)
|
|
8237
8340
|
except Exception:
|
|
8238
8341
|
fd = -1
|
|
8239
|
-
if sys.stdout.isatty():
|
|
8240
|
-
sys.stdout.write("\033[?25l")
|
|
8241
|
-
sys.stdout.flush()
|
|
8242
|
-
|
|
8342
|
+
if sys.stdout.isatty():
|
|
8343
|
+
sys.stdout.write("\033[?25l")
|
|
8344
|
+
sys.stdout.flush()
|
|
8345
|
+
def restore_line_mode() -> None:
|
|
8346
|
+
if old_settings is not None and fd >= 0:
|
|
8347
|
+
try:
|
|
8348
|
+
import termios
|
|
8349
|
+
termios.tcsetattr(fd, termios.TCSANOW, old_settings)
|
|
8350
|
+
except Exception:
|
|
8351
|
+
pass
|
|
8352
|
+
|
|
8353
|
+
def restore_raw_mode() -> None:
|
|
8354
|
+
if old_settings is not None and fd >= 0:
|
|
8355
|
+
try:
|
|
8356
|
+
import termios
|
|
8357
|
+
new = termios.tcgetattr(fd)
|
|
8358
|
+
new[3] = new[3] & ~(termios.ECHO | termios.ICANON)
|
|
8359
|
+
new[6][termios.VMIN] = 1
|
|
8360
|
+
new[6][termios.VTIME] = 0
|
|
8361
|
+
termios.tcsetattr(fd, termios.TCSANOW, new)
|
|
8362
|
+
except Exception:
|
|
8363
|
+
pass
|
|
8364
|
+
|
|
8365
|
+
try:
|
|
8243
8366
|
while True:
|
|
8244
8367
|
first_render = render_prelaunch_screen(main_idx, panel, panel_idx, panel_rows, checks, messages, first_render)
|
|
8245
8368
|
key = read_menu_key(fd) if fd >= 0 else read_menu_key()
|
|
8246
|
-
if panel:
|
|
8247
|
-
|
|
8248
|
-
|
|
8249
|
-
|
|
8250
|
-
|
|
8251
|
-
|
|
8252
|
-
|
|
8369
|
+
if panel:
|
|
8370
|
+
panel_name = panel
|
|
8371
|
+
if key in ("up", "k"):
|
|
8372
|
+
panel_idx = (panel_idx - 1) % max(1, len(panel_rows))
|
|
8373
|
+
panel_last_idx[panel_name] = panel_idx
|
|
8374
|
+
continue
|
|
8375
|
+
if key in ("down", "j"):
|
|
8376
|
+
panel_idx = (panel_idx + 1) % max(1, len(panel_rows))
|
|
8377
|
+
panel_last_idx[panel_name] = panel_idx
|
|
8378
|
+
continue
|
|
8253
8379
|
if key in ("esc", "left", "q"):
|
|
8254
8380
|
close_panel()
|
|
8255
8381
|
continue
|
|
@@ -8274,7 +8400,7 @@ def portable_prelaunch_menu() -> int:
|
|
|
8274
8400
|
close_panel()
|
|
8275
8401
|
continue
|
|
8276
8402
|
if value == "__custom__" or panel_idx >= len(panel_values):
|
|
8277
|
-
model_value = prompt_menu_value("Model id or alias")
|
|
8403
|
+
model_value = prompt_menu_value("Model id or alias", restore_tty=restore_line_mode, raw_tty=restore_raw_mode)
|
|
8278
8404
|
else:
|
|
8279
8405
|
model_value = value
|
|
8280
8406
|
if model_value:
|
|
@@ -8286,7 +8412,7 @@ def portable_prelaunch_menu() -> int:
|
|
|
8286
8412
|
close_panel()
|
|
8287
8413
|
continue
|
|
8288
8414
|
if value == "__custom__" or panel_idx >= len(panel_values):
|
|
8289
|
-
advisor_value = prompt_menu_value("Advisor model id", "deepseek-v4-pro")
|
|
8415
|
+
advisor_value = prompt_menu_value("Advisor model id", "deepseek-v4-pro", restore_tty=restore_line_mode, raw_tty=restore_raw_mode)
|
|
8290
8416
|
else:
|
|
8291
8417
|
advisor_value = value
|
|
8292
8418
|
messages = set_advisor_model_config(advisor_value)
|
|
@@ -8296,7 +8422,7 @@ def portable_prelaunch_menu() -> int:
|
|
|
8296
8422
|
if value == "back":
|
|
8297
8423
|
close_panel()
|
|
8298
8424
|
elif value == "input":
|
|
8299
|
-
key_value = prompt_menu_value(f"API key for {provider}", secret=True)
|
|
8425
|
+
key_value = prompt_menu_value(f"API key for {provider}", secret=True, restore_tty=restore_line_mode, raw_tty=restore_raw_mode)
|
|
8300
8426
|
if key_value:
|
|
8301
8427
|
messages = store_api_key_config(provider, key_value)
|
|
8302
8428
|
refresh_checks()
|
|
@@ -8307,7 +8433,7 @@ def portable_prelaunch_menu() -> int:
|
|
|
8307
8433
|
"nvidia-hosted": "NVIDIA_API_KEY",
|
|
8308
8434
|
"ollama-cloud": "OLLAMA_API_KEY",
|
|
8309
8435
|
}.get(provider, "API_KEY")
|
|
8310
|
-
env_name = prompt_menu_value("Environment variable name", default_env)
|
|
8436
|
+
env_name = prompt_menu_value("Environment variable name", default_env, restore_tty=restore_line_mode, raw_tty=restore_raw_mode)
|
|
8311
8437
|
key_value = os.environ.get(env_name, "").strip()
|
|
8312
8438
|
if key_value:
|
|
8313
8439
|
messages = store_api_key_config(provider, key_value)
|
|
@@ -8320,7 +8446,7 @@ def portable_prelaunch_menu() -> int:
|
|
|
8320
8446
|
if not key_value:
|
|
8321
8447
|
messages = ["Clipboard did not contain readable text."]
|
|
8322
8448
|
else:
|
|
8323
|
-
confirm = prompt_menu_value(f"Clipboard contains {mask_secret(key_value)}. Store it? y/N")
|
|
8449
|
+
confirm = prompt_menu_value(f"Clipboard contains {mask_secret(key_value)}. Store it? y/N", restore_tty=restore_line_mode, raw_tty=restore_raw_mode)
|
|
8324
8450
|
if confirm.lower().startswith("y"):
|
|
8325
8451
|
messages = store_api_key_config(provider, key_value)
|
|
8326
8452
|
else:
|
|
@@ -8336,7 +8462,7 @@ def portable_prelaunch_menu() -> int:
|
|
|
8336
8462
|
close_panel(4)
|
|
8337
8463
|
elif value == "edit":
|
|
8338
8464
|
default = pcfg.get("base_url") or default_base_url(provider)
|
|
8339
|
-
url = prompt_menu_value(f"Base URL for {provider}", default)
|
|
8465
|
+
url = prompt_menu_value(f"Base URL for {provider}", default, restore_tty=restore_line_mode, raw_tty=restore_raw_mode)
|
|
8340
8466
|
if url:
|
|
8341
8467
|
messages = set_base_url_config(provider, url)
|
|
8342
8468
|
refresh_checks()
|
|
@@ -8365,21 +8491,27 @@ def portable_prelaunch_menu() -> int:
|
|
|
8365
8491
|
messages = set_llm_option_config(provider, value, "false" if current else "true")
|
|
8366
8492
|
except Exception as exc:
|
|
8367
8493
|
messages = [f"Option update failed: {type(exc).__name__}: {exc}"]
|
|
8368
|
-
refresh_checks()
|
|
8369
|
-
cfg = load_config()
|
|
8370
|
-
provider, pcfg = get_current_provider(cfg)
|
|
8371
|
-
|
|
8372
|
-
|
|
8373
|
-
|
|
8374
|
-
|
|
8494
|
+
refresh_checks()
|
|
8495
|
+
cfg = load_config()
|
|
8496
|
+
provider, pcfg = get_current_provider(cfg)
|
|
8497
|
+
old_idx = panel_idx
|
|
8498
|
+
panel_rows, panel_values = llm_option_panel_rows(provider, pcfg, cfg.get("language", "en"))
|
|
8499
|
+
panel_idx = max(0, min(old_idx, len(panel_rows) - 1))
|
|
8500
|
+
panel_last_idx["options"] = panel_idx
|
|
8501
|
+
else:
|
|
8502
|
+
default = llm_option_prompt_default(provider, pcfg, value)
|
|
8503
|
+
entered = prompt_menu_value(f"{value} for {provider} (default/unset clears)", default, restore_tty=restore_line_mode, raw_tty=restore_raw_mode)
|
|
8375
8504
|
try:
|
|
8376
8505
|
messages = set_llm_option_config(provider, value, entered)
|
|
8377
8506
|
except Exception as exc:
|
|
8378
8507
|
messages = [f"Option update failed: {type(exc).__name__}: {exc}"]
|
|
8379
|
-
refresh_checks()
|
|
8380
|
-
cfg = load_config()
|
|
8381
|
-
provider, pcfg = get_current_provider(cfg)
|
|
8382
|
-
|
|
8508
|
+
refresh_checks()
|
|
8509
|
+
cfg = load_config()
|
|
8510
|
+
provider, pcfg = get_current_provider(cfg)
|
|
8511
|
+
old_idx = panel_idx
|
|
8512
|
+
panel_rows, panel_values = llm_option_panel_rows(provider, pcfg, cfg.get("language", "en"))
|
|
8513
|
+
panel_idx = max(0, min(old_idx, len(panel_rows) - 1))
|
|
8514
|
+
panel_last_idx["options"] = panel_idx
|
|
8383
8515
|
elif panel == "preset":
|
|
8384
8516
|
if value == "back":
|
|
8385
8517
|
open_panel("options")
|
|
@@ -8391,12 +8523,14 @@ def portable_prelaunch_menu() -> int:
|
|
|
8391
8523
|
except Exception as exc:
|
|
8392
8524
|
messages = [f"Preset failed: {type(exc).__name__}: {exc}"]
|
|
8393
8525
|
refresh_checks()
|
|
8394
|
-
cfg = load_config()
|
|
8395
|
-
provider, pcfg = get_current_provider(cfg)
|
|
8396
|
-
panel = "options"
|
|
8397
|
-
panel_idx = 0
|
|
8398
|
-
panel_rows, panel_values = llm_option_panel_rows(provider, pcfg, cfg.get("language", "en"))
|
|
8399
|
-
|
|
8526
|
+
cfg = load_config()
|
|
8527
|
+
provider, pcfg = get_current_provider(cfg)
|
|
8528
|
+
panel = "options"
|
|
8529
|
+
panel_idx = panel_last_idx.get("options", 0)
|
|
8530
|
+
panel_rows, panel_values = llm_option_panel_rows(provider, pcfg, cfg.get("language", "en"))
|
|
8531
|
+
panel_idx = max(0, min(panel_idx, len(panel_rows) - 1))
|
|
8532
|
+
panel_last_idx["options"] = panel_idx
|
|
8533
|
+
continue
|
|
8400
8534
|
|
|
8401
8535
|
if key in ("up", "k"):
|
|
8402
8536
|
main_idx = (main_idx - 1) % 10
|
package/docs/README.ja.md
CHANGED
|
@@ -47,7 +47,7 @@ vLLM、NVIDIA hosted、self-hosted NIM を選択し、通常の Claude Code 引
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
現在のバージョン: `0.1.
|
|
50
|
+
現在のバージョン: `0.1.42`
|
|
51
51
|
|
|
52
52
|
## 作られた理由
|
|
53
53
|
|
|
@@ -351,6 +351,30 @@ Windows/Linux 管理、クリーンアップスクリプト、定期的なセキ
|
|
|
351
351
|
|
|
352
352
|
## 変更履歴
|
|
353
353
|
|
|
354
|
+
### 0.1.42
|
|
355
|
+
|
|
356
|
+
- **ライブストリーム進捗**: statusline が upstream streaming の出力進捗を
|
|
357
|
+
入力/出力 token 推定値と chunk 数で継続更新します。
|
|
358
|
+
|
|
359
|
+
### 0.1.41
|
|
360
|
+
|
|
361
|
+
- **Statusline 表示改善**: upstream token 数に桁区切りと `tok` 前の空白を入れ、
|
|
362
|
+
`27,501 tok` のように表示します。
|
|
363
|
+
|
|
364
|
+
### 0.1.40
|
|
365
|
+
|
|
366
|
+
- **RPM 0 を保持**: `rate_limit_rpm=0` の設定が provider 既定値に戻らず、
|
|
367
|
+
明示的な無制限モードとして保存されます。
|
|
368
|
+
|
|
369
|
+
### 0.1.39
|
|
370
|
+
|
|
371
|
+
- **メニュー入力修正**: テキスト/数字プロンプトの前に terminal line/echo mode を
|
|
372
|
+
復元し、prelaunch UI で入力した数字が見えるようにしました。
|
|
373
|
+
- **数字検証の安定化**: 数字オプションへ不正な文字を入れてもメニューが
|
|
374
|
+
クラッシュせず、案内メッセージを表示します。
|
|
375
|
+
- **プリセット表示改善**: preset 適用後に実際の context、reserve、output、
|
|
376
|
+
timeout 値をメッセージに表示します。
|
|
377
|
+
|
|
354
378
|
### 0.1.38
|
|
355
379
|
|
|
356
380
|
- **ユーザー選択の context window を優先**: NVIDIA hosted の 32K safety cap を
|
package/docs/README.ko.md
CHANGED
|
@@ -47,7 +47,7 @@ NVIDIA hosted, self-hosted NIM을 선택하고, Claude Code의 일반 인자는
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
현재 버전: `0.1.
|
|
50
|
+
현재 버전: `0.1.42`
|
|
51
51
|
|
|
52
52
|
## 왜 만들었나
|
|
53
53
|
|
|
@@ -351,6 +351,30 @@ Windows 이벤트 로그 리뷰, 바이러스/랜섬웨어 침입 시도 정리,
|
|
|
351
351
|
|
|
352
352
|
## 변경 이력
|
|
353
353
|
|
|
354
|
+
### 0.1.42
|
|
355
|
+
|
|
356
|
+
- **실시간 스트림 진행 표시**: statusline이 upstream streaming 출력 진행을
|
|
357
|
+
입력/출력 token 추정치와 chunk 수로 계속 갱신합니다.
|
|
358
|
+
|
|
359
|
+
### 0.1.41
|
|
360
|
+
|
|
361
|
+
- **Statusline 표시 개선**: upstream token 수에 천 단위 구분자와 `tok` 앞 공백을
|
|
362
|
+
넣어 `27,501 tok`처럼 표시합니다.
|
|
363
|
+
|
|
364
|
+
### 0.1.40
|
|
365
|
+
|
|
366
|
+
- **RPM 0 유지**: `rate_limit_rpm=0` 설정이 provider 기본값으로 되돌아가지 않고
|
|
367
|
+
명시적인 무제한 모드로 저장됩니다.
|
|
368
|
+
|
|
369
|
+
### 0.1.39
|
|
370
|
+
|
|
371
|
+
- **메뉴 입력 수정**: 텍스트/숫자 프롬프트 전에 터미널 line/echo 모드를 복구하여
|
|
372
|
+
prelaunch UI에서 입력한 숫자가 보이게 했습니다.
|
|
373
|
+
- **숫자 검증 안정화**: 숫자 옵션에 잘못된 문자를 넣어도 메뉴가 크래시되지 않고
|
|
374
|
+
안내 메시지를 표시합니다.
|
|
375
|
+
- **프리셋 표시 개선**: preset 적용 후 실제 context, reserve, output, timeout 값을
|
|
376
|
+
메시지에 표시합니다.
|
|
377
|
+
|
|
354
378
|
### 0.1.38
|
|
355
379
|
|
|
356
380
|
- **사용자 선택 context window 우선**: NVIDIA hosted 32K safety cap을 제거했습니다.
|
package/docs/README.zh.md
CHANGED
|
@@ -47,7 +47,7 @@ NIM,并把普通 Claude Code 参数原样传递。
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
当前版本: `0.1.
|
|
50
|
+
当前版本: `0.1.42`
|
|
51
51
|
|
|
52
52
|
## 为什么存在
|
|
53
53
|
|
|
@@ -337,6 +337,29 @@ Hermes 格式模型或部分较旧的 Qwen tool template。
|
|
|
337
337
|
|
|
338
338
|
## 更新日志
|
|
339
339
|
|
|
340
|
+
### 0.1.42
|
|
341
|
+
|
|
342
|
+
- **实时流式进度**:statusline 会持续更新 upstream streaming 输出进度,
|
|
343
|
+
显示输入/输出 token 估算值和 chunk 数。
|
|
344
|
+
|
|
345
|
+
### 0.1.41
|
|
346
|
+
|
|
347
|
+
- **Statusline 格式优化**:upstream token 数现在带千位分隔符,并在 `tok` 前加入空格,
|
|
348
|
+
例如 `27,501 tok`。
|
|
349
|
+
|
|
350
|
+
### 0.1.40
|
|
351
|
+
|
|
352
|
+
- **保留 RPM 0**:`rate_limit_rpm=0` 现在会保存为明确的无限制模式,
|
|
353
|
+
不会回退到 provider 默认值。
|
|
354
|
+
|
|
355
|
+
### 0.1.39
|
|
356
|
+
|
|
357
|
+
- **菜单输入修复**:在文本/数字提示前恢复 terminal line/echo mode,
|
|
358
|
+
prelaunch UI 中输入的数字现在可见。
|
|
359
|
+
- **数字校验更稳**:数字选项输入非法字符时不再让菜单崩溃,而是显示提示消息。
|
|
360
|
+
- **Preset 可见性**:应用 preset 后会显示实际 context、reserve、output、
|
|
361
|
+
timeout 值。
|
|
362
|
+
|
|
340
363
|
### 0.1.38
|
|
341
364
|
|
|
342
365
|
- **优先使用用户选择的 context window**:移除 NVIDIA hosted 的 32K safety cap。
|
package/docs/manual.md
CHANGED
package/package.json
CHANGED