@oneciel-ai/claude-any 0.1.36 → 0.1.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -1
- package/claude_any.py +213 -34
- package/docs/README.ja.md +17 -1
- package/docs/README.ko.md +17 -1
- package/docs/README.zh.md +17 -1
- package/docs/manual.md +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -48,7 +48,7 @@ arguments through unchanged.
|
|
|
48
48
|
|
|
49
49
|
Credits: One Ciel LLC
|
|
50
50
|
|
|
51
|
-
Current version: `0.1.
|
|
51
|
+
Current version: `0.1.38`
|
|
52
52
|
|
|
53
53
|
## Why This Exists
|
|
54
54
|
|
|
@@ -381,6 +381,22 @@ steps under that larger model's supervision.
|
|
|
381
381
|
|
|
382
382
|
## Changelog
|
|
383
383
|
|
|
384
|
+
### 0.1.38
|
|
385
|
+
|
|
386
|
+
- **User-selected context windows**: removes the NVIDIA hosted 32K safety cap.
|
|
387
|
+
The router now uses the context window selected in LLM options or headless
|
|
388
|
+
configuration, with model-aware fallback only when no value is configured.
|
|
389
|
+
- **NVIDIA presets updated**: NVIDIA hosted presets now start at 65K and scale
|
|
390
|
+
up to 256K for large-output/reasoning workflows.
|
|
391
|
+
|
|
392
|
+
### 0.1.37
|
|
393
|
+
|
|
394
|
+
- **Pseudo tool-call recovery**: the NVIDIA/OpenAI-compatible stream path now
|
|
395
|
+
suppresses `<|tool_calls_section_begin|>...` pseudo tool-call text and
|
|
396
|
+
converts it back into Claude `tool_use` blocks when possible.
|
|
397
|
+
- **Streaming defaults**: provider streaming defaults to on; NVIDIA hosted
|
|
398
|
+
remains forced to the streaming upstream path for stability.
|
|
399
|
+
|
|
384
400
|
### 0.1.36
|
|
385
401
|
|
|
386
402
|
- **NVIDIA upstream streaming**: NVIDIA hosted router calls now use upstream
|
package/claude_any.py
CHANGED
|
@@ -85,7 +85,7 @@ PROVIDER_LABELS = {
|
|
|
85
85
|
"self-hosted-nim": "Self Hosted NIM",
|
|
86
86
|
}
|
|
87
87
|
APP_NAME = "Claude Any"
|
|
88
|
-
VERSION = "0.1.
|
|
88
|
+
VERSION = "0.1.38"
|
|
89
89
|
CREDITS = "Credits: One Ciel LLC"
|
|
90
90
|
|
|
91
91
|
LOG_LEVELS = {"SILENT": 0, "ERROR": 1, "WARN": 2, "INFO": 3, "DEBUG": 4, "TRACE": 5}
|
|
@@ -144,7 +144,7 @@ LANGUAGES = {
|
|
|
144
144
|
"zh": "中文",
|
|
145
145
|
}
|
|
146
146
|
|
|
147
|
-
MODEL_PRESETS: dict[str, dict[str, Any]] = {
|
|
147
|
+
MODEL_PRESETS: dict[str, dict[str, Any]] = {
|
|
148
148
|
"glm-4.7": {"compat_max_tokens": 64, "thinking": True, "num_ctx_min": 32768, "num_ctx_max": 131072},
|
|
149
149
|
"glm-5.1": {"compat_max_tokens": 64, "thinking": True, "num_ctx_min": 32768, "num_ctx_max": 131072},
|
|
150
150
|
"glm-4.7:cloud": {"compat_max_tokens": 64, "thinking": True, "num_ctx_min": 32768, "num_ctx_max": 131072},
|
|
@@ -154,10 +154,21 @@ MODEL_PRESETS: dict[str, dict[str, Any]] = {
|
|
|
154
154
|
"qwen3.6:27b": {"compat_max_tokens": 16, "thinking": False, "num_ctx_min": 32768, "num_ctx_max": 65536},
|
|
155
155
|
"deepseek-r1": {"compat_max_tokens": 64, "thinking": True, "num_ctx_min": 32768, "num_ctx_max": 131072},
|
|
156
156
|
"llama3.3:70b": {"compat_max_tokens": 16, "thinking": False, "num_ctx_min": 32768, "num_ctx_max": 131072},
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
def
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def nvidia_hosted_context_default(model_id: str) -> int:
|
|
161
|
+
model = model_id.lower()
|
|
162
|
+
if "kimi-k2.6" in model or "kimi_k2.6" in model:
|
|
163
|
+
return 262144
|
|
164
|
+
if "deepseek" in model:
|
|
165
|
+
return 131072
|
|
166
|
+
if "glm" in model or "qwen" in model:
|
|
167
|
+
return 65536
|
|
168
|
+
return 65536
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def model_preset(model_id: str) -> dict[str, Any]:
|
|
161
172
|
"""Return preset dict for a model ID, checking exact match then prefix match."""
|
|
162
173
|
if model_id in MODEL_PRESETS:
|
|
163
174
|
return MODEL_PRESETS[model_id]
|
|
@@ -722,7 +733,7 @@ DEFAULT_CONFIG: dict[str, Any] = {
|
|
|
722
733
|
"native_compat": False,
|
|
723
734
|
"rate_limit_rpm": 40,
|
|
724
735
|
"rate_limit_status": True,
|
|
725
|
-
"context_window":
|
|
736
|
+
"context_window": 65536,
|
|
726
737
|
"max_output_tokens": 4096,
|
|
727
738
|
"temperature": 0.7,
|
|
728
739
|
"top_p": 0.8,
|
|
@@ -788,7 +799,21 @@ def apply_config_migrations(cfg: dict[str, Any]) -> None:
|
|
|
788
799
|
if not migrations.get(marker):
|
|
789
800
|
pcfg = cfg.get("providers", {}).get("nvidia-hosted", {})
|
|
790
801
|
if isinstance(pcfg, dict) and not positive_int(pcfg.get("context_window")):
|
|
791
|
-
pcfg["context_window"] =
|
|
802
|
+
pcfg["context_window"] = nvidia_hosted_context_default(str(pcfg.get("current_model") or ""))
|
|
803
|
+
migrations[marker] = True
|
|
804
|
+
|
|
805
|
+
marker = "nvidia_context_window_unforce_32k_20260513"
|
|
806
|
+
if not migrations.get(marker):
|
|
807
|
+
pcfg = cfg.get("providers", {}).get("nvidia-hosted", {})
|
|
808
|
+
if isinstance(pcfg, dict) and positive_int(pcfg.get("context_window")) == 32768:
|
|
809
|
+
pcfg["context_window"] = nvidia_hosted_context_default(str(pcfg.get("current_model") or ""))
|
|
810
|
+
migrations[marker] = True
|
|
811
|
+
|
|
812
|
+
marker = "stream_enabled_default_true_20260513"
|
|
813
|
+
if not migrations.get(marker):
|
|
814
|
+
for pcfg in (cfg.get("providers") or {}).values():
|
|
815
|
+
if isinstance(pcfg, dict) and "stream_enabled" not in pcfg:
|
|
816
|
+
pcfg["stream_enabled"] = True
|
|
792
817
|
migrations[marker] = True
|
|
793
818
|
|
|
794
819
|
|
|
@@ -3613,7 +3638,7 @@ def openai_context_limit_for_budget(provider: str, pcfg: dict[str, Any]) -> int:
|
|
|
3613
3638
|
if configured:
|
|
3614
3639
|
return configured
|
|
3615
3640
|
if provider == "nvidia-hosted":
|
|
3616
|
-
return
|
|
3641
|
+
return nvidia_hosted_context_default(str(pcfg.get("current_model") or ""))
|
|
3617
3642
|
return 65536
|
|
3618
3643
|
|
|
3619
3644
|
|
|
@@ -3988,7 +4013,7 @@ def maybe_handle_advisor_request(handler: BaseHTTPRequestHandler, provider: str,
|
|
|
3988
4013
|
return True
|
|
3989
4014
|
|
|
3990
4015
|
|
|
3991
|
-
def normalize_tool_arguments(tool_name: str, args: Any) -> dict[str, Any]:
|
|
4016
|
+
def normalize_tool_arguments(tool_name: str, args: Any) -> dict[str, Any]:
|
|
3992
4017
|
if isinstance(args, dict):
|
|
3993
4018
|
return args
|
|
3994
4019
|
if isinstance(args, str):
|
|
@@ -4003,17 +4028,86 @@ def normalize_tool_arguments(tool_name: str, args: Any) -> dict[str, Any]:
|
|
|
4003
4028
|
pass
|
|
4004
4029
|
if tool_name == "Bash":
|
|
4005
4030
|
return {"command": text}
|
|
4006
|
-
return {}
|
|
4007
|
-
|
|
4008
|
-
|
|
4009
|
-
|
|
4010
|
-
|
|
4011
|
-
|
|
4012
|
-
|
|
4013
|
-
|
|
4014
|
-
|
|
4015
|
-
|
|
4016
|
-
|
|
4031
|
+
return {}
|
|
4032
|
+
|
|
4033
|
+
|
|
4034
|
+
PSEUDO_TOOL_START = "<|tool_calls_section_begin|>"
|
|
4035
|
+
PSEUDO_TOOL_END = "<|tool_calls_section_end|>"
|
|
4036
|
+
PSEUDO_CALL_BEGIN = "<|tool_call_begin|>"
|
|
4037
|
+
PSEUDO_ARG_BEGIN = "<|tool_call_argument_begin|>"
|
|
4038
|
+
PSEUDO_CALL_END = "<|tool_call_end|>"
|
|
4039
|
+
|
|
4040
|
+
|
|
4041
|
+
def infer_tool_name_from_args(args: dict[str, Any]) -> str:
|
|
4042
|
+
keys = set(args)
|
|
4043
|
+
if "command" in keys:
|
|
4044
|
+
return "Bash"
|
|
4045
|
+
if {"file_path", "content"}.issubset(keys):
|
|
4046
|
+
return "Write"
|
|
4047
|
+
if {"file_path", "old_string", "new_string"}.issubset(keys):
|
|
4048
|
+
return "Edit"
|
|
4049
|
+
if "file_path" in keys:
|
|
4050
|
+
return "Read"
|
|
4051
|
+
if "taskId" in keys and "status" in keys:
|
|
4052
|
+
return "TaskUpdate"
|
|
4053
|
+
return "TaskList" if not args else "Write"
|
|
4054
|
+
|
|
4055
|
+
|
|
4056
|
+
def parse_pseudo_tool_calls(text: str) -> tuple[str, list[dict[str, Any]]]:
|
|
4057
|
+
if PSEUDO_TOOL_START not in text:
|
|
4058
|
+
return text, []
|
|
4059
|
+
visible_parts: list[str] = []
|
|
4060
|
+
calls: list[dict[str, Any]] = []
|
|
4061
|
+
pos = 0
|
|
4062
|
+
while True:
|
|
4063
|
+
start = text.find(PSEUDO_TOOL_START, pos)
|
|
4064
|
+
if start < 0:
|
|
4065
|
+
visible_parts.append(text[pos:])
|
|
4066
|
+
break
|
|
4067
|
+
visible_parts.append(text[pos:start])
|
|
4068
|
+
end = text.find(PSEUDO_TOOL_END, start)
|
|
4069
|
+
if end < 0:
|
|
4070
|
+
section = text[start + len(PSEUDO_TOOL_START):]
|
|
4071
|
+
pos = len(text)
|
|
4072
|
+
else:
|
|
4073
|
+
section = text[start + len(PSEUDO_TOOL_START):end]
|
|
4074
|
+
pos = end + len(PSEUDO_TOOL_END)
|
|
4075
|
+
for match in re.finditer(
|
|
4076
|
+
re.escape(PSEUDO_CALL_BEGIN) + r"(.*?)" + re.escape(PSEUDO_ARG_BEGIN) + r"(.*?)" + re.escape(PSEUDO_CALL_END),
|
|
4077
|
+
section,
|
|
4078
|
+
flags=re.DOTALL,
|
|
4079
|
+
):
|
|
4080
|
+
raw_header = match.group(1).strip()
|
|
4081
|
+
raw_args = match.group(2).strip()
|
|
4082
|
+
try:
|
|
4083
|
+
args = json.loads(raw_args)
|
|
4084
|
+
except Exception:
|
|
4085
|
+
continue
|
|
4086
|
+
if not isinstance(args, dict):
|
|
4087
|
+
continue
|
|
4088
|
+
name = ""
|
|
4089
|
+
for part in re.split(r"[\s:|,]+", raw_header):
|
|
4090
|
+
candidate = _fuzzy_match_tool_name(part)
|
|
4091
|
+
if candidate:
|
|
4092
|
+
name = candidate
|
|
4093
|
+
break
|
|
4094
|
+
if not name:
|
|
4095
|
+
name = infer_tool_name_from_args(args)
|
|
4096
|
+
calls.append({"function": {"name": name, "arguments": args}, "id": raw_header})
|
|
4097
|
+
if end < 0:
|
|
4098
|
+
break
|
|
4099
|
+
return "".join(visible_parts), calls
|
|
4100
|
+
|
|
4101
|
+
|
|
4102
|
+
def ollama_chat_to_anthropic(data: dict[str, Any], model: str, source_body: dict[str, Any] | None = None) -> dict[str, Any]:
|
|
4103
|
+
message = data.get("message") if isinstance(data.get("message"), dict) else {}
|
|
4104
|
+
content: list[dict[str, Any]] = []
|
|
4105
|
+
text = message.get("content") or ""
|
|
4106
|
+
text, pseudo_tool_calls = parse_pseudo_tool_calls(text)
|
|
4107
|
+
if text:
|
|
4108
|
+
content.append({"type": "text", "text": text})
|
|
4109
|
+
tool_id_prefix = f"toolu_ollama_{int(time.time() * 1000)}_{os.getpid()}"
|
|
4110
|
+
for i, call in enumerate(list(message.get("tool_calls") or []) + pseudo_tool_calls):
|
|
4017
4111
|
fn = call.get("function") if isinstance(call, dict) else {}
|
|
4018
4112
|
if not isinstance(fn, dict) or not fn.get("name"):
|
|
4019
4113
|
continue
|
|
@@ -4609,6 +4703,8 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4609
4703
|
text_suppressed_for_plan = False
|
|
4610
4704
|
text_index: int | None = None
|
|
4611
4705
|
text_so_far = ""
|
|
4706
|
+
pseudo_text = ""
|
|
4707
|
+
pseudo_mode = False
|
|
4612
4708
|
text_buffer = ""
|
|
4613
4709
|
tool_fragments: dict[int, dict[str, Any]] = {}
|
|
4614
4710
|
output_tokens = 0
|
|
@@ -4667,6 +4763,21 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4667
4763
|
delta = choice.get("delta") if isinstance(choice.get("delta"), dict) else {}
|
|
4668
4764
|
text_chunk = delta.get("content") or ""
|
|
4669
4765
|
if text_chunk:
|
|
4766
|
+
if pseudo_mode or PSEUDO_TOOL_START in text_chunk:
|
|
4767
|
+
before, sep, after = text_chunk.partition(PSEUDO_TOOL_START)
|
|
4768
|
+
if before and not pseudo_mode:
|
|
4769
|
+
text_so_far += before
|
|
4770
|
+
if word_chunking:
|
|
4771
|
+
text_buffer += before
|
|
4772
|
+
to_flush, text_buffer = _split_word_buffer(text_buffer, force=False)
|
|
4773
|
+
emit_text_delta(to_flush)
|
|
4774
|
+
else:
|
|
4775
|
+
emit_text_delta(before)
|
|
4776
|
+
pseudo_mode = True
|
|
4777
|
+
pseudo_text += (sep + after) if sep else text_chunk
|
|
4778
|
+
if PSEUDO_TOOL_END in pseudo_text:
|
|
4779
|
+
pseudo_mode = False
|
|
4780
|
+
continue
|
|
4670
4781
|
if source_body is not None and not text_started and not tool_fragments and should_auto_enter_plan_mode(source_body, text_so_far + text_chunk, []):
|
|
4671
4782
|
text_so_far += text_chunk
|
|
4672
4783
|
text_suppressed_for_plan = True
|
|
@@ -4698,6 +4809,15 @@ def stream_openai_chat_to_anthropic_sse(
|
|
|
4698
4809
|
emit_text_delta(to_flush)
|
|
4699
4810
|
|
|
4700
4811
|
tool_calls: list[dict[str, Any]] = []
|
|
4812
|
+
_, pseudo_tool_calls = parse_pseudo_tool_calls(pseudo_text)
|
|
4813
|
+
for i, pseudo in enumerate(pseudo_tool_calls):
|
|
4814
|
+
fn = pseudo.get("function") if isinstance(pseudo, dict) else {}
|
|
4815
|
+
if isinstance(fn, dict):
|
|
4816
|
+
tool_fragments.setdefault(100000 + i, {
|
|
4817
|
+
"id": str(pseudo.get("id") or ""),
|
|
4818
|
+
"name": str(fn.get("name") or ""),
|
|
4819
|
+
"arguments": json.dumps(fn.get("arguments") or {}, ensure_ascii=False),
|
|
4820
|
+
})
|
|
4701
4821
|
for _, fragment in sorted(tool_fragments.items()):
|
|
4702
4822
|
raw_name = str(fragment.get("name") or "")
|
|
4703
4823
|
if not raw_name:
|
|
@@ -4964,7 +5084,8 @@ def forward_openai_compatible_chat(handler: BaseHTTPRequestHandler, provider: st
|
|
|
4964
5084
|
model = ncp_model_id_for_nvidia_hosted(model)
|
|
4965
5085
|
url = join_url(provider_upstream_request_base(provider, pcfg), "/chat/completions")
|
|
4966
5086
|
waited, rpm_used, rpm_limit = apply_router_rate_limit(provider, pcfg, model)
|
|
4967
|
-
|
|
5087
|
+
stream_enabled = bool(pcfg.get("stream_enabled", True))
|
|
5088
|
+
stream = True if provider == "nvidia-hosted" else bool(body.get("stream", stream_enabled)) and stream_enabled
|
|
4968
5089
|
notice = rate_limit_notice(waited, rpm_used, rpm_limit, bool(pcfg.get("rate_limit_status", True)))
|
|
4969
5090
|
if stream:
|
|
4970
5091
|
req_body = openai_compatible_chat_request(provider, model, body, pcfg, stream=True)
|
|
@@ -6021,14 +6142,72 @@ def apply_llm_preset_to_provider(provider: str, pcfg: dict[str, Any], preset_id:
|
|
|
6021
6142
|
}
|
|
6022
6143
|
for token in tokens_by_preset[preset_id]:
|
|
6023
6144
|
apply_provider_option(provider, pcfg, token)
|
|
6024
|
-
else:
|
|
6025
|
-
native_default = "false" if provider == "nvidia-hosted" else "true"
|
|
6026
|
-
server_limit = upstream_model_context_limit(provider, pcfg) if provider in ("vllm", "self-hosted-nim") else None
|
|
6027
|
-
|
|
6028
|
-
|
|
6029
|
-
"
|
|
6030
|
-
|
|
6031
|
-
|
|
6145
|
+
else:
|
|
6146
|
+
native_default = "false" if provider == "nvidia-hosted" else "true"
|
|
6147
|
+
server_limit = upstream_model_context_limit(provider, pcfg) if provider in ("vllm", "self-hosted-nim") else None
|
|
6148
|
+
if provider == "nvidia-hosted":
|
|
6149
|
+
tokens_by_preset = {
|
|
6150
|
+
"balanced": [
|
|
6151
|
+
"context_window=65536",
|
|
6152
|
+
"reserve=4096",
|
|
6153
|
+
"max_output_tokens=4096",
|
|
6154
|
+
"timeout=300000",
|
|
6155
|
+
"temperature=0.3",
|
|
6156
|
+
"unset:top_p",
|
|
6157
|
+
"unset:top_k",
|
|
6158
|
+
],
|
|
6159
|
+
"coding": [
|
|
6160
|
+
"context_window=65536",
|
|
6161
|
+
"reserve=4096",
|
|
6162
|
+
"max_output_tokens=4096",
|
|
6163
|
+
"timeout=300000",
|
|
6164
|
+
"temperature=0.2",
|
|
6165
|
+
"unset:top_p",
|
|
6166
|
+
"unset:top_k",
|
|
6167
|
+
],
|
|
6168
|
+
"fast": [
|
|
6169
|
+
"context_window=65536",
|
|
6170
|
+
"reserve=2048",
|
|
6171
|
+
"max_output_tokens=2048",
|
|
6172
|
+
"timeout=300000",
|
|
6173
|
+
"temperature=0.2",
|
|
6174
|
+
"unset:top_p",
|
|
6175
|
+
"unset:top_k",
|
|
6176
|
+
],
|
|
6177
|
+
"long-context-65k": [
|
|
6178
|
+
"context_window=131072",
|
|
6179
|
+
"reserve=8192",
|
|
6180
|
+
"max_output_tokens=4096",
|
|
6181
|
+
"timeout=900000",
|
|
6182
|
+
"temperature=0.3",
|
|
6183
|
+
"unset:top_p",
|
|
6184
|
+
"unset:top_k",
|
|
6185
|
+
],
|
|
6186
|
+
"large-output": [
|
|
6187
|
+
"context_window=262144",
|
|
6188
|
+
"reserve=8192",
|
|
6189
|
+
"max_output_tokens=8192",
|
|
6190
|
+
"timeout=1200000",
|
|
6191
|
+
"temperature=0.3",
|
|
6192
|
+
"unset:top_p",
|
|
6193
|
+
"unset:top_k",
|
|
6194
|
+
],
|
|
6195
|
+
"reasoning": [
|
|
6196
|
+
"context_window=262144",
|
|
6197
|
+
"reserve=8192",
|
|
6198
|
+
"max_output_tokens=4096",
|
|
6199
|
+
"timeout=1800000",
|
|
6200
|
+
"temperature=0.6",
|
|
6201
|
+
"unset:top_p",
|
|
6202
|
+
"unset:top_k",
|
|
6203
|
+
],
|
|
6204
|
+
}
|
|
6205
|
+
else:
|
|
6206
|
+
tokens_by_preset = {
|
|
6207
|
+
"balanced": [
|
|
6208
|
+
"context_window=32768",
|
|
6209
|
+
"reserve=2048",
|
|
6210
|
+
"max_output_tokens=4096",
|
|
6032
6211
|
"timeout=300000",
|
|
6033
6212
|
"temperature=0.3",
|
|
6034
6213
|
"unset:top_p",
|
|
@@ -6082,10 +6261,10 @@ def apply_llm_preset_to_provider(provider: str, pcfg: dict[str, Any], preset_id:
|
|
|
6082
6261
|
"timeout=1800000",
|
|
6083
6262
|
"temperature=0.6",
|
|
6084
6263
|
"unset:top_p",
|
|
6085
|
-
"unset:top_k",
|
|
6086
|
-
f"native={native_default}",
|
|
6087
|
-
],
|
|
6088
|
-
|
|
6264
|
+
"unset:top_k",
|
|
6265
|
+
f"native={native_default}",
|
|
6266
|
+
],
|
|
6267
|
+
}
|
|
6089
6268
|
for token in tokens_by_preset[preset_id]:
|
|
6090
6269
|
if provider == "nvidia-hosted" and token.startswith("native="):
|
|
6091
6270
|
continue
|
package/docs/README.ja.md
CHANGED
|
@@ -47,7 +47,7 @@ vLLM、NVIDIA hosted、self-hosted NIM を選択し、通常の Claude Code 引
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
現在のバージョン: `0.1.
|
|
50
|
+
現在のバージョン: `0.1.38`
|
|
51
51
|
|
|
52
52
|
## 作られた理由
|
|
53
53
|
|
|
@@ -351,6 +351,22 @@ Windows/Linux 管理、クリーンアップスクリプト、定期的なセキ
|
|
|
351
351
|
|
|
352
352
|
## 変更履歴
|
|
353
353
|
|
|
354
|
+
### 0.1.38
|
|
355
|
+
|
|
356
|
+
- **ユーザー選択の context window を優先**: NVIDIA hosted の 32K safety cap を
|
|
357
|
+
削除しました。router は LLM options または headless 設定で選ばれた
|
|
358
|
+
context window を使い、未設定の場合のみモデル別 fallback を使います。
|
|
359
|
+
- **NVIDIA preset 更新**: NVIDIA hosted preset は 65K から開始し、
|
|
360
|
+
large-output/reasoning workflow では 256K まで使います。
|
|
361
|
+
|
|
362
|
+
### 0.1.37
|
|
363
|
+
|
|
364
|
+
- **Pseudo tool-call recovery**: NVIDIA/OpenAI-compatible stream 経路で
|
|
365
|
+
`<|tool_calls_section_begin|>...` pseudo tool-call テキストを画面に出さず、
|
|
366
|
+
可能な場合は Claude `tool_use` ブロックへ復元します。
|
|
367
|
+
- **Streaming defaults**: provider streaming の既定値は on です。NVIDIA hosted
|
|
368
|
+
は安定性のため upstream streaming 経路に固定されます。
|
|
369
|
+
|
|
354
370
|
### 0.1.36
|
|
355
371
|
|
|
356
372
|
- **NVIDIA upstream streaming**: NVIDIA hosted router 呼び出しは upstream にも
|
package/docs/README.ko.md
CHANGED
|
@@ -47,7 +47,7 @@ NVIDIA hosted, self-hosted NIM을 선택하고, Claude Code의 일반 인자는
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
현재 버전: `0.1.
|
|
50
|
+
현재 버전: `0.1.38`
|
|
51
51
|
|
|
52
52
|
## 왜 만들었나
|
|
53
53
|
|
|
@@ -351,6 +351,22 @@ Windows 이벤트 로그 리뷰, 바이러스/랜섬웨어 침입 시도 정리,
|
|
|
351
351
|
|
|
352
352
|
## 변경 이력
|
|
353
353
|
|
|
354
|
+
### 0.1.38
|
|
355
|
+
|
|
356
|
+
- **사용자 선택 context window 우선**: NVIDIA hosted 32K safety cap을 제거했습니다.
|
|
357
|
+
router는 LLM 옵션 또는 headless 설정에서 선택한 context window를 사용하고,
|
|
358
|
+
값이 없을 때만 모델별 fallback을 사용합니다.
|
|
359
|
+
- **NVIDIA preset 업데이트**: NVIDIA hosted preset은 65K부터 시작하고,
|
|
360
|
+
large-output/reasoning 워크플로에서는 256K까지 사용합니다.
|
|
361
|
+
|
|
362
|
+
### 0.1.37
|
|
363
|
+
|
|
364
|
+
- **Pseudo tool-call recovery**: NVIDIA/OpenAI-compatible stream 경로에서
|
|
365
|
+
`<|tool_calls_section_begin|>...` pseudo tool-call 텍스트를 화면에 출력하지
|
|
366
|
+
않고 가능한 경우 Claude `tool_use` 블록으로 복구합니다.
|
|
367
|
+
- **Streaming defaults**: provider streaming 기본값은 on이며, NVIDIA hosted는
|
|
368
|
+
안정성을 위해 upstream streaming 경로로 고정됩니다.
|
|
369
|
+
|
|
354
370
|
### 0.1.36
|
|
355
371
|
|
|
356
372
|
- **NVIDIA upstream streaming**: NVIDIA hosted router 호출은 이제 upstream에도
|
package/docs/README.zh.md
CHANGED
|
@@ -47,7 +47,7 @@ NIM,并把普通 Claude Code 参数原样传递。
|
|
|
47
47
|
|
|
48
48
|
Credits: One Ciel LLC
|
|
49
49
|
|
|
50
|
-
当前版本: `0.1.
|
|
50
|
+
当前版本: `0.1.38`
|
|
51
51
|
|
|
52
52
|
## 为什么存在
|
|
53
53
|
|
|
@@ -337,6 +337,22 @@ Hermes 格式模型或部分较旧的 Qwen tool template。
|
|
|
337
337
|
|
|
338
338
|
## 更新日志
|
|
339
339
|
|
|
340
|
+
### 0.1.38
|
|
341
|
+
|
|
342
|
+
- **优先使用用户选择的 context window**:移除 NVIDIA hosted 的 32K safety cap。
|
|
343
|
+
router 会使用 LLM options 或 headless 配置中选择的 context window,
|
|
344
|
+
只有未配置时才使用按模型推断的 fallback。
|
|
345
|
+
- **NVIDIA preset 更新**:NVIDIA hosted preset 从 65K 起步,
|
|
346
|
+
large-output/reasoning 工作流最高使用 256K。
|
|
347
|
+
|
|
348
|
+
### 0.1.37
|
|
349
|
+
|
|
350
|
+
- **Pseudo tool-call recovery**:NVIDIA/OpenAI-compatible stream 路径现在会
|
|
351
|
+
隐藏 `<|tool_calls_section_begin|>...` pseudo tool-call 文本,并尽可能恢复为
|
|
352
|
+
Claude `tool_use` block。
|
|
353
|
+
- **Streaming defaults**:provider streaming 默认开启;NVIDIA hosted 为了稳定性
|
|
354
|
+
固定使用 upstream streaming 路径。
|
|
355
|
+
|
|
340
356
|
### 0.1.36
|
|
341
357
|
|
|
342
358
|
- **NVIDIA upstream streaming**:NVIDIA hosted router 调用现在也会向 upstream
|
package/docs/manual.md
CHANGED
package/package.json
CHANGED