@oneciel-ai/claude-any 0.1.37 → 0.1.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -48,7 +48,7 @@ arguments through unchanged.
48
48
 
49
49
  Credits: One Ciel LLC
50
50
 
51
- Current version: `0.1.37`
51
+ Current version: `0.1.38`
52
52
 
53
53
  ## Why This Exists
54
54
 
@@ -381,6 +381,14 @@ steps under that larger model's supervision.
381
381
 
382
382
  ## Changelog
383
383
 
384
+ ### 0.1.38
385
+
386
+ - **User-selected context windows**: removes the NVIDIA hosted 32K safety cap.
387
+ The router now uses the context window selected in LLM options or headless
388
+ configuration, with model-aware fallback only when no value is configured.
389
+ - **NVIDIA presets updated**: NVIDIA hosted presets now start at 65K and scale
390
+ up to 256K for large-output/reasoning workflows.
391
+
384
392
  ### 0.1.37
385
393
 
386
394
  - **Pseudo tool-call recovery**: the NVIDIA/OpenAI-compatible stream path now
package/claude_any.py CHANGED
@@ -85,7 +85,7 @@ PROVIDER_LABELS = {
85
85
  "self-hosted-nim": "Self Hosted NIM",
86
86
  }
87
87
  APP_NAME = "Claude Any"
88
- VERSION = "0.1.37"
88
+ VERSION = "0.1.38"
89
89
  CREDITS = "Credits: One Ciel LLC"
90
90
 
91
91
  LOG_LEVELS = {"SILENT": 0, "ERROR": 1, "WARN": 2, "INFO": 3, "DEBUG": 4, "TRACE": 5}
@@ -144,7 +144,7 @@ LANGUAGES = {
144
144
  "zh": "中文",
145
145
  }
146
146
 
147
- MODEL_PRESETS: dict[str, dict[str, Any]] = {
147
+ MODEL_PRESETS: dict[str, dict[str, Any]] = {
148
148
  "glm-4.7": {"compat_max_tokens": 64, "thinking": True, "num_ctx_min": 32768, "num_ctx_max": 131072},
149
149
  "glm-5.1": {"compat_max_tokens": 64, "thinking": True, "num_ctx_min": 32768, "num_ctx_max": 131072},
150
150
  "glm-4.7:cloud": {"compat_max_tokens": 64, "thinking": True, "num_ctx_min": 32768, "num_ctx_max": 131072},
@@ -154,10 +154,21 @@ MODEL_PRESETS: dict[str, dict[str, Any]] = {
154
154
  "qwen3.6:27b": {"compat_max_tokens": 16, "thinking": False, "num_ctx_min": 32768, "num_ctx_max": 65536},
155
155
  "deepseek-r1": {"compat_max_tokens": 64, "thinking": True, "num_ctx_min": 32768, "num_ctx_max": 131072},
156
156
  "llama3.3:70b": {"compat_max_tokens": 16, "thinking": False, "num_ctx_min": 32768, "num_ctx_max": 131072},
157
- }
158
-
159
-
160
- def model_preset(model_id: str) -> dict[str, Any]:
157
+ }
158
+
159
+
160
+ def nvidia_hosted_context_default(model_id: str) -> int:
161
+ model = model_id.lower()
162
+ if "kimi-k2.6" in model or "kimi_k2.6" in model:
163
+ return 262144
164
+ if "deepseek" in model:
165
+ return 131072
166
+ if "glm" in model or "qwen" in model:
167
+ return 65536
168
+ return 65536
169
+
170
+
171
+ def model_preset(model_id: str) -> dict[str, Any]:
161
172
  """Return preset dict for a model ID, checking exact match then prefix match."""
162
173
  if model_id in MODEL_PRESETS:
163
174
  return MODEL_PRESETS[model_id]
@@ -722,7 +733,7 @@ DEFAULT_CONFIG: dict[str, Any] = {
722
733
  "native_compat": False,
723
734
  "rate_limit_rpm": 40,
724
735
  "rate_limit_status": True,
725
- "context_window": 32768,
736
+ "context_window": 65536,
726
737
  "max_output_tokens": 4096,
727
738
  "temperature": 0.7,
728
739
  "top_p": 0.8,
@@ -788,7 +799,14 @@ def apply_config_migrations(cfg: dict[str, Any]) -> None:
788
799
  if not migrations.get(marker):
789
800
  pcfg = cfg.get("providers", {}).get("nvidia-hosted", {})
790
801
  if isinstance(pcfg, dict) and not positive_int(pcfg.get("context_window")):
791
- pcfg["context_window"] = 32768
802
+ pcfg["context_window"] = nvidia_hosted_context_default(str(pcfg.get("current_model") or ""))
803
+ migrations[marker] = True
804
+
805
+ marker = "nvidia_context_window_unforce_32k_20260513"
806
+ if not migrations.get(marker):
807
+ pcfg = cfg.get("providers", {}).get("nvidia-hosted", {})
808
+ if isinstance(pcfg, dict) and positive_int(pcfg.get("context_window")) == 32768:
809
+ pcfg["context_window"] = nvidia_hosted_context_default(str(pcfg.get("current_model") or ""))
792
810
  migrations[marker] = True
793
811
 
794
812
  marker = "stream_enabled_default_true_20260513"
@@ -3620,7 +3638,7 @@ def openai_context_limit_for_budget(provider: str, pcfg: dict[str, Any]) -> int:
3620
3638
  if configured:
3621
3639
  return configured
3622
3640
  if provider == "nvidia-hosted":
3623
- return 32768
3641
+ return nvidia_hosted_context_default(str(pcfg.get("current_model") or ""))
3624
3642
  return 65536
3625
3643
 
3626
3644
 
@@ -6124,14 +6142,72 @@ def apply_llm_preset_to_provider(provider: str, pcfg: dict[str, Any], preset_id:
6124
6142
  }
6125
6143
  for token in tokens_by_preset[preset_id]:
6126
6144
  apply_provider_option(provider, pcfg, token)
6127
- else:
6128
- native_default = "false" if provider == "nvidia-hosted" else "true"
6129
- server_limit = upstream_model_context_limit(provider, pcfg) if provider in ("vllm", "self-hosted-nim") else None
6130
- tokens_by_preset = {
6131
- "balanced": [
6132
- "context_window=32768",
6133
- "reserve=2048",
6134
- "max_output_tokens=4096",
6145
+ else:
6146
+ native_default = "false" if provider == "nvidia-hosted" else "true"
6147
+ server_limit = upstream_model_context_limit(provider, pcfg) if provider in ("vllm", "self-hosted-nim") else None
6148
+ if provider == "nvidia-hosted":
6149
+ tokens_by_preset = {
6150
+ "balanced": [
6151
+ "context_window=65536",
6152
+ "reserve=4096",
6153
+ "max_output_tokens=4096",
6154
+ "timeout=300000",
6155
+ "temperature=0.3",
6156
+ "unset:top_p",
6157
+ "unset:top_k",
6158
+ ],
6159
+ "coding": [
6160
+ "context_window=65536",
6161
+ "reserve=4096",
6162
+ "max_output_tokens=4096",
6163
+ "timeout=300000",
6164
+ "temperature=0.2",
6165
+ "unset:top_p",
6166
+ "unset:top_k",
6167
+ ],
6168
+ "fast": [
6169
+ "context_window=65536",
6170
+ "reserve=2048",
6171
+ "max_output_tokens=2048",
6172
+ "timeout=300000",
6173
+ "temperature=0.2",
6174
+ "unset:top_p",
6175
+ "unset:top_k",
6176
+ ],
6177
+ "long-context-65k": [
6178
+ "context_window=131072",
6179
+ "reserve=8192",
6180
+ "max_output_tokens=4096",
6181
+ "timeout=900000",
6182
+ "temperature=0.3",
6183
+ "unset:top_p",
6184
+ "unset:top_k",
6185
+ ],
6186
+ "large-output": [
6187
+ "context_window=262144",
6188
+ "reserve=8192",
6189
+ "max_output_tokens=8192",
6190
+ "timeout=1200000",
6191
+ "temperature=0.3",
6192
+ "unset:top_p",
6193
+ "unset:top_k",
6194
+ ],
6195
+ "reasoning": [
6196
+ "context_window=262144",
6197
+ "reserve=8192",
6198
+ "max_output_tokens=4096",
6199
+ "timeout=1800000",
6200
+ "temperature=0.6",
6201
+ "unset:top_p",
6202
+ "unset:top_k",
6203
+ ],
6204
+ }
6205
+ else:
6206
+ tokens_by_preset = {
6207
+ "balanced": [
6208
+ "context_window=32768",
6209
+ "reserve=2048",
6210
+ "max_output_tokens=4096",
6135
6211
  "timeout=300000",
6136
6212
  "temperature=0.3",
6137
6213
  "unset:top_p",
@@ -6185,10 +6261,10 @@ def apply_llm_preset_to_provider(provider: str, pcfg: dict[str, Any], preset_id:
6185
6261
  "timeout=1800000",
6186
6262
  "temperature=0.6",
6187
6263
  "unset:top_p",
6188
- "unset:top_k",
6189
- f"native={native_default}",
6190
- ],
6191
- }
6264
+ "unset:top_k",
6265
+ f"native={native_default}",
6266
+ ],
6267
+ }
6192
6268
  for token in tokens_by_preset[preset_id]:
6193
6269
  if provider == "nvidia-hosted" and token.startswith("native="):
6194
6270
  continue
package/docs/README.ja.md CHANGED
@@ -47,7 +47,7 @@ vLLM、NVIDIA hosted、self-hosted NIM を選択し、通常の Claude Code 引
47
47
 
48
48
  Credits: One Ciel LLC
49
49
 
50
- 現在のバージョン: `0.1.37`
50
+ 現在のバージョン: `0.1.38`
51
51
 
52
52
  ## 作られた理由
53
53
 
@@ -351,6 +351,14 @@ Windows/Linux 管理、クリーンアップスクリプト、定期的なセキ
351
351
 
352
352
  ## 変更履歴
353
353
 
354
+ ### 0.1.38
355
+
356
+ - **ユーザー選択の context window を優先**: NVIDIA hosted の 32K safety cap を
357
+ 削除しました。router は LLM options または headless 設定で選ばれた
358
+ context window を使い、未設定の場合のみモデル別 fallback を使います。
359
+ - **NVIDIA preset 更新**: NVIDIA hosted preset は 65K から開始し、
360
+ large-output/reasoning workflow では 256K まで使います。
361
+
354
362
  ### 0.1.37
355
363
 
356
364
  - **Pseudo tool-call recovery**: NVIDIA/OpenAI-compatible stream 経路で
package/docs/README.ko.md CHANGED
@@ -47,7 +47,7 @@ NVIDIA hosted, self-hosted NIM을 선택하고, Claude Code의 일반 인자는
47
47
 
48
48
  Credits: One Ciel LLC
49
49
 
50
- 현재 버전: `0.1.37`
50
+ 현재 버전: `0.1.38`
51
51
 
52
52
  ## 왜 만들었나
53
53
 
@@ -351,6 +351,14 @@ Windows 이벤트 로그 리뷰, 바이러스/랜섬웨어 침입 시도 정리,
351
351
 
352
352
  ## 변경 이력
353
353
 
354
+ ### 0.1.38
355
+
356
+ - **사용자 선택 context window 우선**: NVIDIA hosted 32K safety cap을 제거했습니다.
357
+ router는 LLM 옵션 또는 headless 설정에서 선택한 context window를 사용하고,
358
+ 값이 없을 때만 모델별 fallback을 사용합니다.
359
+ - **NVIDIA preset 업데이트**: NVIDIA hosted preset은 65K부터 시작하고,
360
+ large-output/reasoning 워크플로에서는 256K까지 사용합니다.
361
+
354
362
  ### 0.1.37
355
363
 
356
364
  - **Pseudo tool-call recovery**: NVIDIA/OpenAI-compatible stream 경로에서
package/docs/README.zh.md CHANGED
@@ -47,7 +47,7 @@ NIM,并把普通 Claude Code 参数原样传递。
47
47
 
48
48
  Credits: One Ciel LLC
49
49
 
50
- 当前版本: `0.1.37`
50
+ 当前版本: `0.1.38`
51
51
 
52
52
  ## 为什么存在
53
53
 
@@ -337,6 +337,14 @@ Hermes 格式模型或部分较旧的 Qwen tool template。
337
337
 
338
338
  ## 更新日志
339
339
 
340
+ ### 0.1.38
341
+
342
+ - **优先使用用户选择的 context window**:移除 NVIDIA hosted 的 32K safety cap。
343
+ router 会使用 LLM options 或 headless 配置中选择的 context window,
344
+ 只有未配置时才使用按模型推断的 fallback。
345
+ - **NVIDIA preset 更新**:NVIDIA hosted preset 从 65K 起步,
346
+ large-output/reasoning 工作流最高使用 256K。
347
+
340
348
  ### 0.1.37
341
349
 
342
350
  - **Pseudo tool-call recovery**:NVIDIA/OpenAI-compatible stream 路径现在会
package/docs/manual.md CHANGED
@@ -10,7 +10,7 @@ Code starts, while passing normal Claude Code arguments through unchanged.
10
10
 
11
11
  Credits: One Ciel LLC
12
12
 
13
- Current version: `0.1.37`
13
+ Current version: `0.1.38`
14
14
 
15
15
  ## Install
16
16
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oneciel-ai/claude-any",
3
- "version": "0.1.37",
3
+ "version": "0.1.38",
4
4
  "description": "Claude Code provider selector for Anthropic, Ollama, Ollama Cloud, vLLM, NVIDIA hosted, and self-hosted NIM.",
5
5
  "license": "MIT",
6
6
  "author": "One Ciel LLC",