@oneciel-ai/claude-any 0.1.39 → 0.1.43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -1
- package/claude_any.py +92 -24
- package/docs/README.ja.md +21 -1
- package/docs/README.ko.md +21 -1
- package/docs/README.zh.md +21 -1
- package/docs/manual.md +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -48,7 +48,7 @@ arguments through unchanged.
|
|
|
48
48
|
|
|
49
49
|
Credits: One Ciel LLC
|
|
50
50
|
|
|
51
|
-
Current version: `0.1.
|
|
51
|
+
Current version: `0.1.43`
|
|
52
52
|
|
|
53
53
|
## Why This Exists
|
|
54
54
|
|
|
@@ -381,6 +381,27 @@ steps under that larger model's supervision.
|
|
|
381
381
|
|
|
382
382
|
## Changelog
|
|
383
383
|
|
|
384
|
+
### 0.1.43
|
|
385
|
+
|
|
386
|
+
- **429 backoff retry**: upstream `429 Too Many Requests` responses are now
|
|
387
|
+
handled as retry/backoff events across all retry attempts instead of leaking
|
|
388
|
+
the raw upstream error after the first backoff.
|
|
389
|
+
|
|
390
|
+
### 0.1.42
|
|
391
|
+
|
|
392
|
+
- **Live stream progress**: the statusline now updates streamed upstream output
|
|
393
|
+
progress with formatted input/output token estimates and chunk counts.
|
|
394
|
+
|
|
395
|
+
### 0.1.41
|
|
396
|
+
|
|
397
|
+
- **Statusline formatting**: upstream token counts now use thousands separators
|
|
398
|
+
and a space before `tok`, for example `27,501 tok`.
|
|
399
|
+
|
|
400
|
+
### 0.1.40
|
|
401
|
+
|
|
402
|
+
- **RPM 0 is preserved**: setting `rate_limit_rpm=0` now stores an explicit
|
|
403
|
+
unlimited mode instead of falling back to the provider default.
|
|
404
|
+
|
|
384
405
|
### 0.1.39
|
|
385
406
|
|
|
386
407
|
- **Menu input fixes**: restores terminal line/echo mode before text or number
|
package/claude_any.py
CHANGED
|
@@ -85,7 +85,7 @@ PROVIDER_LABELS = {
|
|
|
85
85
|
"self-hosted-nim": "Self Hosted NIM",
|
|
86
86
|
}
|
|
87
87
|
APP_NAME = "Claude Any"
|
|
88
|
-
VERSION = "0.1.
|
|
88
|
+
VERSION = "0.1.43"
|
|
89
89
|
CREDITS = "Credits: One Ciel LLC"
|
|
90
90
|
|
|
91
91
|
LOG_LEVELS = {"SILENT": 0, "ERROR": 1, "WARN": 2, "INFO": 3, "DEBUG": 4, "TRACE": 5}
|
|
@@ -1339,7 +1339,22 @@ def main():
|
|
|
1339
1339
|
tokens = activity.get("tokens")
|
|
1340
1340
|
rpm_text += f" | upstream {age:.0f}s"
|
|
1341
1341
|
if tokens:
|
|
1342
|
-
|
|
1342
|
+
try:
|
|
1343
|
+
rpm_text += f" {int(tokens):,} tok"
|
|
1344
|
+
except Exception:
|
|
1345
|
+
rpm_text += f" {tokens} tok"
|
|
1346
|
+
output_tokens = activity.get("output_tokens")
|
|
1347
|
+
if output_tokens:
|
|
1348
|
+
try:
|
|
1349
|
+
rpm_text += f" -> {int(output_tokens):,} tok"
|
|
1350
|
+
except Exception:
|
|
1351
|
+
rpm_text += f" -> {output_tokens} tok"
|
|
1352
|
+
chunks = activity.get("chunks")
|
|
1353
|
+
if chunks:
|
|
1354
|
+
try:
|
|
1355
|
+
rpm_text += f" ({int(chunks):,} chunks)"
|
|
1356
|
+
except Exception:
|
|
1357
|
+
rpm_text += f" ({chunks} chunks)"
|
|
1343
1358
|
elif event in ("success", "error"):
|
|
1344
1359
|
rpm_text += f" | {event} {age:.0f}s"
|
|
1345
1360
|
print(f"{left} | {color(rpm_text)}")
|
|
@@ -2207,8 +2222,10 @@ def router_rate_limit_recent(timestamps: Any, now: float, window: float, *, incl
|
|
|
2207
2222
|
|
|
2208
2223
|
def router_rate_limit_usage(provider: str, pcfg: dict[str, Any], model: str | None = None) -> tuple[int, int | None]:
|
|
2209
2224
|
rpm = router_rate_limit_effective_rpm(provider, pcfg, model)
|
|
2210
|
-
if rpm is None:
|
|
2211
|
-
return 0, None
|
|
2225
|
+
if rpm is None:
|
|
2226
|
+
return 0, None
|
|
2227
|
+
if rpm == 0:
|
|
2228
|
+
return 0, 0
|
|
2212
2229
|
key = router_rate_limit_key(provider, pcfg, model)
|
|
2213
2230
|
now = time.time()
|
|
2214
2231
|
try:
|
|
@@ -4697,6 +4714,8 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4697
4714
|
source_body: dict[str, Any] | None = None,
|
|
4698
4715
|
start_index: int = 0,
|
|
4699
4716
|
word_chunking: bool = False,
|
|
4717
|
+
input_tokens: int | None = None,
|
|
4718
|
+
input_bytes: int | None = None,
|
|
4700
4719
|
) -> None:
|
|
4701
4720
|
next_content_index = start_index
|
|
4702
4721
|
text_started = False
|
|
@@ -4709,6 +4728,8 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4709
4728
|
tool_fragments: dict[int, dict[str, Any]] = {}
|
|
4710
4729
|
output_tokens = 0
|
|
4711
4730
|
finish_reason = "stop"
|
|
4731
|
+
chunks_seen = 0
|
|
4732
|
+
last_activity_update = 0.0
|
|
4712
4733
|
|
|
4713
4734
|
def emit(event_name: str, payload: dict[str, Any]) -> None:
|
|
4714
4735
|
handler.wfile.write(f"event: {event_name}\ndata: {json.dumps(payload, ensure_ascii=False)}\n\n".encode())
|
|
@@ -4736,8 +4757,27 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4736
4757
|
{"type": "content_block_delta", "index": idx, "delta": {"type": "text_delta", "text": text}},
|
|
4737
4758
|
)
|
|
4738
4759
|
|
|
4760
|
+
def update_stream_activity(force: bool = False) -> None:
|
|
4761
|
+
nonlocal last_activity_update
|
|
4762
|
+
now = time.time()
|
|
4763
|
+
if not force and now - last_activity_update < 0.5:
|
|
4764
|
+
return
|
|
4765
|
+
last_activity_update = now
|
|
4766
|
+
estimated_output = output_tokens or max(0, len(text_so_far) // 4)
|
|
4767
|
+
write_router_activity(
|
|
4768
|
+
"request",
|
|
4769
|
+
provider,
|
|
4770
|
+
model,
|
|
4771
|
+
tokens=input_tokens,
|
|
4772
|
+
bytes=input_bytes,
|
|
4773
|
+
output_tokens=estimated_output,
|
|
4774
|
+
chunks=chunks_seen,
|
|
4775
|
+
stream=True,
|
|
4776
|
+
)
|
|
4777
|
+
|
|
4739
4778
|
try:
|
|
4740
4779
|
for raw_line in resp:
|
|
4780
|
+
chunks_seen += 1
|
|
4741
4781
|
line = raw_line.decode("utf-8", errors="ignore").strip()
|
|
4742
4782
|
if not line or line.startswith(":"):
|
|
4743
4783
|
continue
|
|
@@ -4789,6 +4829,7 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4789
4829
|
emit_text_delta(to_flush)
|
|
4790
4830
|
else:
|
|
4791
4831
|
emit_text_delta(text_chunk)
|
|
4832
|
+
update_stream_activity()
|
|
4792
4833
|
for call in delta.get("tool_calls") or []:
|
|
4793
4834
|
if not isinstance(call, dict):
|
|
4794
4835
|
continue
|
|
@@ -4804,6 +4845,8 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4804
4845
|
slot["name"] += str(fn.get("name"))
|
|
4805
4846
|
if fn.get("arguments"):
|
|
4806
4847
|
slot["arguments"] += str(fn.get("arguments"))
|
|
4848
|
+
update_stream_activity()
|
|
4849
|
+
update_stream_activity(force=True)
|
|
4807
4850
|
if word_chunking and text_buffer:
|
|
4808
4851
|
to_flush, text_buffer = _split_word_buffer(text_buffer, force=True)
|
|
4809
4852
|
emit_text_delta(to_flush)
|
|
@@ -4928,19 +4971,30 @@ def upstream_http_error_message(exc: urllib.error.HTTPError, raw: str | None = N
|
|
|
4928
4971
|
UPSTREAM_RETRY_HTTP_CODES: frozenset[int] = frozenset({502, 503, 504})
|
|
4929
4972
|
|
|
4930
4973
|
|
|
4931
|
-
def upstream_retry_message(attempt: int, total: int) -> str:
|
|
4932
|
-
lang = str(load_config().get("language") or "en")
|
|
4933
|
-
if lang == "ko":
|
|
4934
|
-
return f"서버가 응답하지 않아 재시도합니다 ({attempt}/{total})."
|
|
4974
|
+
def upstream_retry_message(attempt: int, total: int) -> str:
|
|
4975
|
+
lang = str(load_config().get("language") or "en")
|
|
4976
|
+
if lang == "ko":
|
|
4977
|
+
return f"서버가 응답하지 않아 재시도합니다 ({attempt}/{total})."
|
|
4935
4978
|
if lang == "ja":
|
|
4936
4979
|
return f"サーバーが応答しないため再試行します ({attempt}/{total})。"
|
|
4937
4980
|
if lang == "zh":
|
|
4938
4981
|
return f"服务器未响应,正在重试 ({attempt}/{total})。"
|
|
4939
|
-
return f"Upstream server did not respond; retrying ({attempt}/{total})."
|
|
4940
|
-
|
|
4941
|
-
|
|
4942
|
-
def
|
|
4943
|
-
|
|
4982
|
+
return f"Upstream server did not respond; retrying ({attempt}/{total})."
|
|
4983
|
+
|
|
4984
|
+
|
|
4985
|
+
def upstream_rate_limit_retry_message(attempt: int, total: int) -> str:
|
|
4986
|
+
lang = str(load_config().get("language") or "en")
|
|
4987
|
+
if lang == "ko":
|
|
4988
|
+
return f"Upstream rate limit에 도달해 대기 후 재시도합니다 ({attempt}/{total})."
|
|
4989
|
+
if lang == "ja":
|
|
4990
|
+
return f"Upstream rate limit に達したため、待機して再試行します ({attempt}/{total})。"
|
|
4991
|
+
if lang == "zh":
|
|
4992
|
+
return f"已达到 upstream rate limit,等待后重试 ({attempt}/{total})。"
|
|
4993
|
+
return f"Upstream rate limit reached; waiting before retry ({attempt}/{total})."
|
|
4994
|
+
|
|
4995
|
+
|
|
4996
|
+
def upstream_retry_wait_seconds(attempt: int) -> float:
|
|
4997
|
+
return min(20.0, 2.0 * max(1, attempt))
|
|
4944
4998
|
|
|
4945
4999
|
|
|
4946
5000
|
def retryable_timeout_exception(exc: BaseException) -> bool:
|
|
@@ -4985,10 +5039,15 @@ def post_json_with_rate_retry(
|
|
|
4985
5039
|
except urllib.error.HTTPError as exc:
|
|
4986
5040
|
raw = exc.read().decode("utf-8", errors="ignore")
|
|
4987
5041
|
learn_router_rate_limit_headers(provider, pcfg, model, exc.headers)
|
|
4988
|
-
if exc.code == 429 and attempt
|
|
4989
|
-
|
|
4990
|
-
|
|
4991
|
-
|
|
5042
|
+
if exc.code == 429 and attempt + 1 < max_attempts:
|
|
5043
|
+
retry_no = attempt + 1
|
|
5044
|
+
wait = register_router_rate_limit_backoff(provider, pcfg, model, exc.headers.get("Retry-After"))
|
|
5045
|
+
write_router_activity("retry", provider, model, attempt=retry_no, total=gateway_retries, code=exc.code, wait=wait, tokens=token_estimate, bytes=byte_estimate)
|
|
5046
|
+
router_log("WARN", f"upstream_rate_limit_retry provider={provider} model={model} attempt={retry_no}/{gateway_retries} wait={wait:.2f}s tokens={token_estimate} bytes={byte_estimate}")
|
|
5047
|
+
if retry_notice:
|
|
5048
|
+
retry_notice(upstream_rate_limit_retry_message(retry_no, gateway_retries))
|
|
5049
|
+
time.sleep(wait)
|
|
5050
|
+
continue
|
|
4992
5051
|
if exc.code in UPSTREAM_RETRY_HTTP_CODES and attempt + 1 < max_attempts:
|
|
4993
5052
|
retry_no = attempt + 1
|
|
4994
5053
|
write_router_activity("retry", provider, model, attempt=retry_no, total=gateway_retries, code=exc.code, tokens=token_estimate, bytes=byte_estimate)
|
|
@@ -5049,8 +5108,13 @@ def open_openai_stream_with_rate_retry(
|
|
|
5049
5108
|
except urllib.error.HTTPError as exc:
|
|
5050
5109
|
raw = exc.read().decode("utf-8", errors="ignore")
|
|
5051
5110
|
learn_router_rate_limit_headers(provider, pcfg, model, exc.headers)
|
|
5052
|
-
if exc.code == 429 and attempt
|
|
5111
|
+
if exc.code == 429 and attempt + 1 < max_attempts:
|
|
5112
|
+
retry_no = attempt + 1
|
|
5053
5113
|
wait = register_router_rate_limit_backoff(provider, pcfg, model, exc.headers.get("Retry-After"))
|
|
5114
|
+
write_router_activity("retry", provider, model, attempt=retry_no, total=gateway_retries, code=exc.code, wait=wait, tokens=token_estimate, bytes=byte_estimate, stream=True)
|
|
5115
|
+
router_log("WARN", f"upstream_stream_rate_limit_retry provider={provider} model={model} attempt={retry_no}/{gateway_retries} wait={wait:.2f}s tokens={token_estimate} bytes={byte_estimate}")
|
|
5116
|
+
if retry_notice:
|
|
5117
|
+
retry_notice(upstream_rate_limit_retry_message(retry_no, gateway_retries))
|
|
5054
5118
|
time.sleep(wait)
|
|
5055
5119
|
continue
|
|
5056
5120
|
if exc.code in UPSTREAM_RETRY_HTTP_CODES and attempt + 1 < max_attempts:
|
|
@@ -5108,6 +5172,8 @@ def forward_openai_compatible_chat(handler: BaseHTTPRequestHandler, provider: st
|
|
|
5108
5172
|
model,
|
|
5109
5173
|
emit_retry_notice,
|
|
5110
5174
|
)
|
|
5175
|
+
req_tokens = estimate_tokens(req_body)
|
|
5176
|
+
req_bytes = len(json.dumps(req_body, ensure_ascii=False).encode("utf-8"))
|
|
5111
5177
|
stream_openai_chat_to_anthropic_sse(
|
|
5112
5178
|
handler,
|
|
5113
5179
|
resp,
|
|
@@ -5116,8 +5182,10 @@ def forward_openai_compatible_chat(handler: BaseHTTPRequestHandler, provider: st
|
|
|
5116
5182
|
source_body=body,
|
|
5117
5183
|
start_index=index,
|
|
5118
5184
|
word_chunking=bool(pcfg.get("stream_word_chunking", False)),
|
|
5185
|
+
input_tokens=req_tokens,
|
|
5186
|
+
input_bytes=req_bytes,
|
|
5119
5187
|
)
|
|
5120
|
-
write_router_activity("success", provider, model, tokens=
|
|
5188
|
+
write_router_activity("success", provider, model, tokens=req_tokens, bytes=req_bytes, stream=True)
|
|
5121
5189
|
except RuntimeError as exc:
|
|
5122
5190
|
msg = str(exc)
|
|
5123
5191
|
write_anthropic_stream_blocks(handler, [{"type": "text", "text": f"Upstream error: {msg}"}], index)
|
|
@@ -6663,11 +6731,11 @@ def apply_provider_option(provider: str, pcfg: dict[str, Any], token: str) -> No
|
|
|
6663
6731
|
raise SystemExit("timeout must be a positive integer; values above 10000 are treated as milliseconds")
|
|
6664
6732
|
pcfg["request_timeout_ms"] = fixed if key.endswith("_ms") or fixed > 10000 else fixed * 1000
|
|
6665
6733
|
return
|
|
6666
|
-
if key in ("rate_limit", "rate_limit_rpm", "rpm"):
|
|
6667
|
-
fixed = positive_int(value)
|
|
6668
|
-
if value in (0, "0", False, None):
|
|
6669
|
-
pcfg
|
|
6670
|
-
return
|
|
6734
|
+
if key in ("rate_limit", "rate_limit_rpm", "rpm"):
|
|
6735
|
+
fixed = positive_int(value)
|
|
6736
|
+
if value in (0, "0", False, None):
|
|
6737
|
+
pcfg["rate_limit_rpm"] = 0
|
|
6738
|
+
return
|
|
6671
6739
|
if not fixed:
|
|
6672
6740
|
raise SystemExit("rate_limit_rpm must be a positive integer, or 0/unset to disable")
|
|
6673
6741
|
pcfg["rate_limit_rpm"] = fixed
|
package/docs/README.ja.md
CHANGED
|
@@ -47,7 +47,7 @@ vLLM、NVIDIA hosted、self-hosted NIM を選択し、通常の Claude Code 引
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
現在のバージョン: `0.1.
|
|
50
|
+
現在のバージョン: `0.1.43`
|
|
51
51
|
|
|
52
52
|
## 作られた理由
|
|
53
53
|
|
|
@@ -351,6 +351,26 @@ Windows/Linux 管理、クリーンアップスクリプト、定期的なセキ
|
|
|
351
351
|
|
|
352
352
|
## 変更履歴
|
|
353
353
|
|
|
354
|
+
### 0.1.43
|
|
355
|
+
|
|
356
|
+
- **429 backoff retry**: upstream `429 Too Many Requests` 応答を初回 backoff 後に
|
|
357
|
+
raw error として漏らさず、すべての retry attempt で backoff/retry event として処理します。
|
|
358
|
+
|
|
359
|
+
### 0.1.42
|
|
360
|
+
|
|
361
|
+
- **ライブストリーム進捗**: statusline が upstream streaming の出力進捗を
|
|
362
|
+
入力/出力 token 推定値と chunk 数で継続更新します。
|
|
363
|
+
|
|
364
|
+
### 0.1.41
|
|
365
|
+
|
|
366
|
+
- **Statusline 表示改善**: upstream token 数に桁区切りと `tok` 前の空白を入れ、
|
|
367
|
+
`27,501 tok` のように表示します。
|
|
368
|
+
|
|
369
|
+
### 0.1.40
|
|
370
|
+
|
|
371
|
+
- **RPM 0 を保持**: `rate_limit_rpm=0` の設定が provider 既定値に戻らず、
|
|
372
|
+
明示的な無制限モードとして保存されます。
|
|
373
|
+
|
|
354
374
|
### 0.1.39
|
|
355
375
|
|
|
356
376
|
- **メニュー入力修正**: テキスト/数字プロンプトの前に terminal line/echo mode を
|
package/docs/README.ko.md
CHANGED
|
@@ -47,7 +47,7 @@ NVIDIA hosted, self-hosted NIM을 선택하고, Claude Code의 일반 인자는
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
현재 버전: `0.1.
|
|
50
|
+
현재 버전: `0.1.43`
|
|
51
51
|
|
|
52
52
|
## 왜 만들었나
|
|
53
53
|
|
|
@@ -351,6 +351,26 @@ Windows 이벤트 로그 리뷰, 바이러스/랜섬웨어 침입 시도 정리,
|
|
|
351
351
|
|
|
352
352
|
## 변경 이력
|
|
353
353
|
|
|
354
|
+
### 0.1.43
|
|
355
|
+
|
|
356
|
+
- **429 backoff retry**: upstream `429 Too Many Requests` 응답을 첫 backoff 이후
|
|
357
|
+
raw error로 흘리지 않고, 모든 retry attempt에서 backoff/retry 이벤트로 처리합니다.
|
|
358
|
+
|
|
359
|
+
### 0.1.42
|
|
360
|
+
|
|
361
|
+
- **실시간 스트림 진행 표시**: statusline이 upstream streaming 출력 진행을
|
|
362
|
+
입력/출력 token 추정치와 chunk 수로 계속 갱신합니다.
|
|
363
|
+
|
|
364
|
+
### 0.1.41
|
|
365
|
+
|
|
366
|
+
- **Statusline 표시 개선**: upstream token 수에 천 단위 구분자와 `tok` 앞 공백을
|
|
367
|
+
넣어 `27,501 tok`처럼 표시합니다.
|
|
368
|
+
|
|
369
|
+
### 0.1.40
|
|
370
|
+
|
|
371
|
+
- **RPM 0 유지**: `rate_limit_rpm=0` 설정이 provider 기본값으로 되돌아가지 않고
|
|
372
|
+
명시적인 무제한 모드로 저장됩니다.
|
|
373
|
+
|
|
354
374
|
### 0.1.39
|
|
355
375
|
|
|
356
376
|
- **메뉴 입력 수정**: 텍스트/숫자 프롬프트 전에 터미널 line/echo 모드를 복구하여
|
package/docs/README.zh.md
CHANGED
|
@@ -47,7 +47,7 @@ NIM,并把普通 Claude Code 参数原样传递。
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
当前版本: `0.1.
|
|
50
|
+
当前版本: `0.1.43`
|
|
51
51
|
|
|
52
52
|
## 为什么存在
|
|
53
53
|
|
|
@@ -337,6 +337,26 @@ Hermes 格式模型或部分较旧的 Qwen tool template。
|
|
|
337
337
|
|
|
338
338
|
## 更新日志
|
|
339
339
|
|
|
340
|
+
### 0.1.43
|
|
341
|
+
|
|
342
|
+
- **429 backoff retry**:upstream `429 Too Many Requests` 响应现在会在所有 retry
|
|
343
|
+
attempt 中作为 backoff/retry event 处理,不再在首次 backoff 后泄漏 raw error。
|
|
344
|
+
|
|
345
|
+
### 0.1.42
|
|
346
|
+
|
|
347
|
+
- **实时流式进度**:statusline 会持续更新 upstream streaming 输出进度,
|
|
348
|
+
显示输入/输出 token 估算值和 chunk 数。
|
|
349
|
+
|
|
350
|
+
### 0.1.41
|
|
351
|
+
|
|
352
|
+
- **Statusline 格式优化**:upstream token 数现在带千位分隔符,并在 `tok` 前加入空格,
|
|
353
|
+
例如 `27,501 tok`。
|
|
354
|
+
|
|
355
|
+
### 0.1.40
|
|
356
|
+
|
|
357
|
+
- **保留 RPM 0**:`rate_limit_rpm=0` 现在会保存为明确的无限制模式,
|
|
358
|
+
不会回退到 provider 默认值。
|
|
359
|
+
|
|
340
360
|
### 0.1.39
|
|
341
361
|
|
|
342
362
|
- **菜单输入修复**:在文本/数字提示前恢复 terminal line/echo mode,
|
package/docs/manual.md
CHANGED
package/package.json
CHANGED