@oneciel-ai/claude-any 0.1.42 → 0.1.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -1
- package/claude_any.py +96 -52
- package/docs/README.ja.md +12 -1
- package/docs/README.ko.md +12 -1
- package/docs/README.zh.md +11 -1
- package/docs/manual.md +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -48,7 +48,7 @@ arguments through unchanged.
|
|
|
48
48
|
|
|
49
49
|
Credits: One Ciel LLC
|
|
50
50
|
|
|
51
|
-
Current version: `0.1.
|
|
51
|
+
Current version: `0.1.44`
|
|
52
52
|
|
|
53
53
|
## Why This Exists
|
|
54
54
|
|
|
@@ -381,6 +381,18 @@ steps under that larger model's supervision.
|
|
|
381
381
|
|
|
382
382
|
## Changelog
|
|
383
383
|
|
|
384
|
+
### 0.1.44
|
|
385
|
+
|
|
386
|
+
- **Statusline split**: turning Rate Limit status off now hides only RPM,
|
|
387
|
+
server-limit, and wait counters. Upstream progress, retry, error, and token
|
|
388
|
+
diagnostics remain visible.
|
|
389
|
+
|
|
390
|
+
### 0.1.43
|
|
391
|
+
|
|
392
|
+
- **429 backoff retry**: upstream `429 Too Many Requests` responses are now
|
|
393
|
+
handled as retry/backoff events across all retry attempts instead of leaking
|
|
394
|
+
the raw upstream error after the first backoff.
|
|
395
|
+
|
|
384
396
|
### 0.1.42
|
|
385
397
|
|
|
386
398
|
- **Live stream progress**: the statusline now updates streamed upstream output
|
package/claude_any.py
CHANGED
|
@@ -85,7 +85,7 @@ PROVIDER_LABELS = {
|
|
|
85
85
|
"self-hosted-nim": "Self Hosted NIM",
|
|
86
86
|
}
|
|
87
87
|
APP_NAME = "Claude Any"
|
|
88
|
-
VERSION = "0.1.
|
|
88
|
+
VERSION = "0.1.44"
|
|
89
89
|
CREDITS = "Credits: One Ciel LLC"
|
|
90
90
|
|
|
91
91
|
LOG_LEVELS = {"SILENT": 0, "ERROR": 1, "WARN": 2, "INFO": 3, "DEBUG": 4, "TRACE": 5}
|
|
@@ -1249,10 +1249,11 @@ def main():
|
|
|
1249
1249
|
except Exception:
|
|
1250
1250
|
session = {}
|
|
1251
1251
|
cfg = load_json(CONFIG_PATH, {})
|
|
1252
|
-
providers = cfg.get("providers") if isinstance(cfg.get("providers"), dict) else {}
|
|
1253
|
-
provider = str(cfg.get("current_provider") or "")
|
|
1254
|
-
pcfg = providers.get(provider) if isinstance(providers.get(provider), dict) else {}
|
|
1255
|
-
|
|
1252
|
+
providers = cfg.get("providers") if isinstance(cfg.get("providers"), dict) else {}
|
|
1253
|
+
provider = str(cfg.get("current_provider") or "")
|
|
1254
|
+
pcfg = providers.get(provider) if isinstance(providers.get(provider), dict) else {}
|
|
1255
|
+
rpm_status = bool(pcfg.get("rate_limit_status", True))
|
|
1256
|
+
model = str(pcfg.get("current_model") or "")
|
|
1256
1257
|
raw_rpm = pcfg.get("rate_limit_rpm")
|
|
1257
1258
|
if raw_rpm is None and provider in ("nvidia-hosted", "self-hosted-nim", "ollama", "ollama-cloud"):
|
|
1258
1259
|
raw_rpm = 40
|
|
@@ -1303,29 +1304,33 @@ def main():
|
|
|
1303
1304
|
left = f"[{model_name}]"
|
|
1304
1305
|
if dir_name:
|
|
1305
1306
|
left += f" {dir_name}"
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
if server_remaining is not None:
|
|
1315
|
-
parts
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1307
|
+
status_parts = []
|
|
1308
|
+
if rpm_status:
|
|
1309
|
+
if rpm > 0:
|
|
1310
|
+
shown_limit = display_capacity(rpm)
|
|
1311
|
+
shown_used = min(used, shown_limit)
|
|
1312
|
+
rpm_text = f"RPM used: {shown_used}/{shown_limit}"
|
|
1313
|
+
else:
|
|
1314
|
+
rpm_text = f"RPM used: {used}/min (unlimited)"
|
|
1315
|
+
if server_rpm or server_remaining is not None or server_reset_seconds is not None:
|
|
1316
|
+
parts = []
|
|
1317
|
+
if server_remaining is not None:
|
|
1318
|
+
parts.append(f"remaining {server_remaining}")
|
|
1319
|
+
if server_rpm:
|
|
1320
|
+
parts.append(f"limit {server_rpm}")
|
|
1321
|
+
try:
|
|
1322
|
+
if server_reset_seconds is not None and float(server_reset_seconds) > 0:
|
|
1323
|
+
parts.append(f"reset {float(server_reset_seconds):.0f}s")
|
|
1324
|
+
except Exception:
|
|
1325
|
+
pass
|
|
1326
|
+
if parts:
|
|
1327
|
+
rpm_text += " | server " + ", ".join(parts)
|
|
1328
|
+
if penalty_until > now:
|
|
1329
|
+
rpm_text += f" | wait {max(0.0, penalty_until - now):.0f}s"
|
|
1330
|
+
elif last_wait >= 0.5 and 0.0 <= now - updated_at < 60.0:
|
|
1331
|
+
rpm_text += f" | wait {last_wait:.1f}s"
|
|
1332
|
+
status_parts.append(rpm_text)
|
|
1333
|
+
activity_text = ""
|
|
1329
1334
|
if isinstance(activity, dict):
|
|
1330
1335
|
try:
|
|
1331
1336
|
age = now - float(activity.get("updated_at") or 0)
|
|
@@ -1334,30 +1339,48 @@ def main():
|
|
|
1334
1339
|
if 0 <= age < 180:
|
|
1335
1340
|
event = str(activity.get("event") or "")
|
|
1336
1341
|
if event == "retry":
|
|
1337
|
-
|
|
1342
|
+
activity_text = f"retry {activity.get('attempt')}/{activity.get('total')}"
|
|
1343
|
+
wait = activity.get("wait")
|
|
1344
|
+
try:
|
|
1345
|
+
if wait is not None and float(wait) > 0:
|
|
1346
|
+
activity_text += f" wait {float(wait):.0f}s"
|
|
1347
|
+
except Exception:
|
|
1348
|
+
pass
|
|
1349
|
+
tokens = activity.get("tokens")
|
|
1350
|
+
if tokens:
|
|
1351
|
+
try:
|
|
1352
|
+
activity_text += f" last input {int(tokens):,} tok"
|
|
1353
|
+
except Exception:
|
|
1354
|
+
activity_text += f" last input {tokens} tok"
|
|
1338
1355
|
elif event == "request":
|
|
1339
1356
|
tokens = activity.get("tokens")
|
|
1340
|
-
|
|
1357
|
+
activity_text = f"upstream {age:.0f}s"
|
|
1341
1358
|
if tokens:
|
|
1342
1359
|
try:
|
|
1343
|
-
|
|
1360
|
+
activity_text += f" {int(tokens):,} tok"
|
|
1344
1361
|
except Exception:
|
|
1345
|
-
|
|
1362
|
+
activity_text += f" {tokens} tok"
|
|
1346
1363
|
output_tokens = activity.get("output_tokens")
|
|
1347
1364
|
if output_tokens:
|
|
1348
1365
|
try:
|
|
1349
|
-
|
|
1366
|
+
activity_text += f" -> {int(output_tokens):,} tok"
|
|
1350
1367
|
except Exception:
|
|
1351
|
-
|
|
1368
|
+
activity_text += f" -> {output_tokens} tok"
|
|
1352
1369
|
chunks = activity.get("chunks")
|
|
1353
1370
|
if chunks:
|
|
1354
1371
|
try:
|
|
1355
|
-
|
|
1372
|
+
activity_text += f" ({int(chunks):,} chunks)"
|
|
1356
1373
|
except Exception:
|
|
1357
|
-
|
|
1374
|
+
activity_text += f" ({chunks} chunks)"
|
|
1358
1375
|
elif event in ("success", "error"):
|
|
1359
|
-
|
|
1360
|
-
|
|
1376
|
+
activity_text = f"{event} {age:.0f}s"
|
|
1377
|
+
if activity_text:
|
|
1378
|
+
status_parts.append(activity_text)
|
|
1379
|
+
status_text = " | ".join(status_parts)
|
|
1380
|
+
if status_text:
|
|
1381
|
+
print(f"{left} | {color(status_text)}")
|
|
1382
|
+
else:
|
|
1383
|
+
print(left)
|
|
1361
1384
|
|
|
1362
1385
|
|
|
1363
1386
|
if __name__ == "__main__":
|
|
@@ -4971,19 +4994,30 @@ def upstream_http_error_message(exc: urllib.error.HTTPError, raw: str | None = N
|
|
|
4971
4994
|
UPSTREAM_RETRY_HTTP_CODES: frozenset[int] = frozenset({502, 503, 504})
|
|
4972
4995
|
|
|
4973
4996
|
|
|
4974
|
-
def upstream_retry_message(attempt: int, total: int) -> str:
|
|
4975
|
-
lang = str(load_config().get("language") or "en")
|
|
4976
|
-
if lang == "ko":
|
|
4977
|
-
return f"서버가 응답하지 않아 재시도합니다 ({attempt}/{total})."
|
|
4997
|
+
def upstream_retry_message(attempt: int, total: int) -> str:
|
|
4998
|
+
lang = str(load_config().get("language") or "en")
|
|
4999
|
+
if lang == "ko":
|
|
5000
|
+
return f"서버가 응답하지 않아 재시도합니다 ({attempt}/{total})."
|
|
4978
5001
|
if lang == "ja":
|
|
4979
5002
|
return f"サーバーが応答しないため再試行します ({attempt}/{total})。"
|
|
4980
5003
|
if lang == "zh":
|
|
4981
5004
|
return f"服务器未响应,正在重试 ({attempt}/{total})。"
|
|
4982
|
-
return f"Upstream server did not respond; retrying ({attempt}/{total})."
|
|
4983
|
-
|
|
4984
|
-
|
|
4985
|
-
def
|
|
4986
|
-
|
|
5005
|
+
return f"Upstream server did not respond; retrying ({attempt}/{total})."
|
|
5006
|
+
|
|
5007
|
+
|
|
5008
|
+
def upstream_rate_limit_retry_message(attempt: int, total: int) -> str:
|
|
5009
|
+
lang = str(load_config().get("language") or "en")
|
|
5010
|
+
if lang == "ko":
|
|
5011
|
+
return f"Upstream rate limit에 도달해 대기 후 재시도합니다 ({attempt}/{total})."
|
|
5012
|
+
if lang == "ja":
|
|
5013
|
+
return f"Upstream rate limit に達したため、待機して再試行します ({attempt}/{total})。"
|
|
5014
|
+
if lang == "zh":
|
|
5015
|
+
return f"已达到 upstream rate limit,等待后重试 ({attempt}/{total})。"
|
|
5016
|
+
return f"Upstream rate limit reached; waiting before retry ({attempt}/{total})."
|
|
5017
|
+
|
|
5018
|
+
|
|
5019
|
+
def upstream_retry_wait_seconds(attempt: int) -> float:
|
|
5020
|
+
return min(20.0, 2.0 * max(1, attempt))
|
|
4987
5021
|
|
|
4988
5022
|
|
|
4989
5023
|
def retryable_timeout_exception(exc: BaseException) -> bool:
|
|
@@ -5028,10 +5062,15 @@ def post_json_with_rate_retry(
|
|
|
5028
5062
|
except urllib.error.HTTPError as exc:
|
|
5029
5063
|
raw = exc.read().decode("utf-8", errors="ignore")
|
|
5030
5064
|
learn_router_rate_limit_headers(provider, pcfg, model, exc.headers)
|
|
5031
|
-
if exc.code == 429 and attempt
|
|
5032
|
-
|
|
5033
|
-
|
|
5034
|
-
|
|
5065
|
+
if exc.code == 429 and attempt + 1 < max_attempts:
|
|
5066
|
+
retry_no = attempt + 1
|
|
5067
|
+
wait = register_router_rate_limit_backoff(provider, pcfg, model, exc.headers.get("Retry-After"))
|
|
5068
|
+
write_router_activity("retry", provider, model, attempt=retry_no, total=gateway_retries, code=exc.code, wait=wait, tokens=token_estimate, bytes=byte_estimate)
|
|
5069
|
+
router_log("WARN", f"upstream_rate_limit_retry provider={provider} model={model} attempt={retry_no}/{gateway_retries} wait={wait:.2f}s tokens={token_estimate} bytes={byte_estimate}")
|
|
5070
|
+
if retry_notice:
|
|
5071
|
+
retry_notice(upstream_rate_limit_retry_message(retry_no, gateway_retries))
|
|
5072
|
+
time.sleep(wait)
|
|
5073
|
+
continue
|
|
5035
5074
|
if exc.code in UPSTREAM_RETRY_HTTP_CODES and attempt + 1 < max_attempts:
|
|
5036
5075
|
retry_no = attempt + 1
|
|
5037
5076
|
write_router_activity("retry", provider, model, attempt=retry_no, total=gateway_retries, code=exc.code, tokens=token_estimate, bytes=byte_estimate)
|
|
@@ -5092,8 +5131,13 @@ def open_openai_stream_with_rate_retry(
|
|
|
5092
5131
|
except urllib.error.HTTPError as exc:
|
|
5093
5132
|
raw = exc.read().decode("utf-8", errors="ignore")
|
|
5094
5133
|
learn_router_rate_limit_headers(provider, pcfg, model, exc.headers)
|
|
5095
|
-
if exc.code == 429 and attempt
|
|
5134
|
+
if exc.code == 429 and attempt + 1 < max_attempts:
|
|
5135
|
+
retry_no = attempt + 1
|
|
5096
5136
|
wait = register_router_rate_limit_backoff(provider, pcfg, model, exc.headers.get("Retry-After"))
|
|
5137
|
+
write_router_activity("retry", provider, model, attempt=retry_no, total=gateway_retries, code=exc.code, wait=wait, tokens=token_estimate, bytes=byte_estimate, stream=True)
|
|
5138
|
+
router_log("WARN", f"upstream_stream_rate_limit_retry provider={provider} model={model} attempt={retry_no}/{gateway_retries} wait={wait:.2f}s tokens={token_estimate} bytes={byte_estimate}")
|
|
5139
|
+
if retry_notice:
|
|
5140
|
+
retry_notice(upstream_rate_limit_retry_message(retry_no, gateway_retries))
|
|
5097
5141
|
time.sleep(wait)
|
|
5098
5142
|
continue
|
|
5099
5143
|
if exc.code in UPSTREAM_RETRY_HTTP_CODES and attempt + 1 < max_attempts:
|
package/docs/README.ja.md
CHANGED
|
@@ -47,7 +47,7 @@ vLLM、NVIDIA hosted、self-hosted NIM を選択し、通常の Claude Code 引
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
現在のバージョン: `0.1.
|
|
50
|
+
現在のバージョン: `0.1.44`
|
|
51
51
|
|
|
52
52
|
## 作られた理由
|
|
53
53
|
|
|
@@ -351,6 +351,17 @@ Windows/Linux 管理、クリーンアップスクリプト、定期的なセキ
|
|
|
351
351
|
|
|
352
352
|
## 変更履歴
|
|
353
353
|
|
|
354
|
+
### 0.1.44
|
|
355
|
+
|
|
356
|
+
- **Statusline split**: Rate Limit status を off にした場合、RPM、server-limit、
|
|
357
|
+
wait counter だけを非表示にします。Upstream 進捗、retry、error、token 診断は
|
|
358
|
+
引き続き表示されます。
|
|
359
|
+
|
|
360
|
+
### 0.1.43
|
|
361
|
+
|
|
362
|
+
- **429 backoff retry**: upstream `429 Too Many Requests` 応答を初回 backoff 後に
|
|
363
|
+
raw error として漏らさず、すべての retry attempt で backoff/retry event として処理します。
|
|
364
|
+
|
|
354
365
|
### 0.1.42
|
|
355
366
|
|
|
356
367
|
- **ライブストリーム進捗**: statusline が upstream streaming の出力進捗を
|
package/docs/README.ko.md
CHANGED
|
@@ -47,7 +47,7 @@ NVIDIA hosted, self-hosted NIM을 선택하고, Claude Code의 일반 인자는
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
현재 버전: `0.1.
|
|
50
|
+
현재 버전: `0.1.44`
|
|
51
51
|
|
|
52
52
|
## 왜 만들었나
|
|
53
53
|
|
|
@@ -351,6 +351,17 @@ Windows 이벤트 로그 리뷰, 바이러스/랜섬웨어 침입 시도 정리,
|
|
|
351
351
|
|
|
352
352
|
## 변경 이력
|
|
353
353
|
|
|
354
|
+
### 0.1.44
|
|
355
|
+
|
|
356
|
+
- **Statusline 분리**: Rate Limit status를 off로 바꾸면 RPM, server-limit,
|
|
357
|
+
wait 카운터만 숨깁니다. Upstream 진행, retry, error, token 진단은 계속
|
|
358
|
+
표시됩니다.
|
|
359
|
+
|
|
360
|
+
### 0.1.43
|
|
361
|
+
|
|
362
|
+
- **429 backoff retry**: upstream `429 Too Many Requests` 응답을 첫 backoff 이후
|
|
363
|
+
raw error로 흘리지 않고, 모든 retry attempt에서 backoff/retry 이벤트로 처리합니다.
|
|
364
|
+
|
|
354
365
|
### 0.1.42
|
|
355
366
|
|
|
356
367
|
- **실시간 스트림 진행 표시**: statusline이 upstream streaming 출력 진행을
|
package/docs/README.zh.md
CHANGED
|
@@ -47,7 +47,7 @@ NIM,并把普通 Claude Code 参数原样传递。
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
当前版本: `0.1.
|
|
50
|
+
当前版本: `0.1.44`
|
|
51
51
|
|
|
52
52
|
## 为什么存在
|
|
53
53
|
|
|
@@ -337,6 +337,16 @@ Hermes 格式模型或部分较旧的 Qwen tool template。
|
|
|
337
337
|
|
|
338
338
|
## 更新日志
|
|
339
339
|
|
|
340
|
+
### 0.1.44
|
|
341
|
+
|
|
342
|
+
- **Statusline split**:关闭 Rate Limit status 后只隐藏 RPM、server-limit 和
|
|
343
|
+
wait 计数。Upstream 进度、retry、error 和 token 诊断仍会显示。
|
|
344
|
+
|
|
345
|
+
### 0.1.43
|
|
346
|
+
|
|
347
|
+
- **429 backoff retry**:upstream `429 Too Many Requests` 响应现在会在所有 retry
|
|
348
|
+
attempt 中作为 backoff/retry event 处理,不再在首次 backoff 后泄漏 raw error。
|
|
349
|
+
|
|
340
350
|
### 0.1.42
|
|
341
351
|
|
|
342
352
|
- **实时流式进度**:statusline 会持续更新 upstream streaming 输出进度,
|
package/docs/manual.md
CHANGED
package/package.json
CHANGED