klaude-code 2.8.1__py3-none-any.whl → 2.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. klaude_code/app/runtime.py +2 -1
  2. klaude_code/auth/antigravity/oauth.py +33 -38
  3. klaude_code/auth/antigravity/token_manager.py +0 -18
  4. klaude_code/auth/base.py +53 -0
  5. klaude_code/auth/claude/oauth.py +34 -49
  6. klaude_code/auth/codex/exceptions.py +0 -4
  7. klaude_code/auth/codex/oauth.py +32 -28
  8. klaude_code/auth/codex/token_manager.py +0 -18
  9. klaude_code/cli/cost_cmd.py +128 -39
  10. klaude_code/cli/list_model.py +27 -10
  11. klaude_code/cli/main.py +14 -3
  12. klaude_code/config/assets/builtin_config.yaml +25 -24
  13. klaude_code/config/config.py +47 -25
  14. klaude_code/config/sub_agent_model_helper.py +18 -13
  15. klaude_code/config/thinking.py +0 -8
  16. klaude_code/const.py +1 -1
  17. klaude_code/core/agent_profile.py +11 -56
  18. klaude_code/core/compaction/overflow.py +0 -4
  19. klaude_code/core/executor.py +33 -5
  20. klaude_code/core/manager/llm_clients.py +9 -1
  21. klaude_code/core/prompts/prompt-claude-code.md +4 -4
  22. klaude_code/core/reminders.py +21 -23
  23. klaude_code/core/task.py +1 -5
  24. klaude_code/core/tool/__init__.py +3 -2
  25. klaude_code/core/tool/file/apply_patch.py +0 -27
  26. klaude_code/core/tool/file/read_tool.md +3 -2
  27. klaude_code/core/tool/file/read_tool.py +27 -3
  28. klaude_code/core/tool/offload.py +0 -35
  29. klaude_code/core/tool/shell/bash_tool.py +1 -1
  30. klaude_code/core/tool/sub_agent/__init__.py +6 -0
  31. klaude_code/core/tool/sub_agent/image_gen.md +16 -0
  32. klaude_code/core/tool/sub_agent/image_gen.py +146 -0
  33. klaude_code/core/tool/sub_agent/task.md +20 -0
  34. klaude_code/core/tool/sub_agent/task.py +205 -0
  35. klaude_code/core/tool/tool_registry.py +0 -16
  36. klaude_code/core/turn.py +1 -1
  37. klaude_code/llm/anthropic/input.py +6 -5
  38. klaude_code/llm/antigravity/input.py +14 -7
  39. klaude_code/llm/bedrock_anthropic/__init__.py +3 -0
  40. klaude_code/llm/google/client.py +8 -6
  41. klaude_code/llm/google/input.py +20 -12
  42. klaude_code/llm/image.py +18 -11
  43. klaude_code/llm/input_common.py +32 -6
  44. klaude_code/llm/json_stable.py +37 -0
  45. klaude_code/llm/{codex → openai_codex}/__init__.py +1 -1
  46. klaude_code/llm/{codex → openai_codex}/client.py +24 -2
  47. klaude_code/llm/openai_codex/prompt_sync.py +237 -0
  48. klaude_code/llm/openai_compatible/client.py +3 -1
  49. klaude_code/llm/openai_compatible/input.py +0 -10
  50. klaude_code/llm/openai_compatible/stream.py +35 -10
  51. klaude_code/llm/{responses → openai_responses}/client.py +1 -1
  52. klaude_code/llm/{responses → openai_responses}/input.py +15 -5
  53. klaude_code/llm/registry.py +3 -8
  54. klaude_code/llm/stream_parts.py +3 -1
  55. klaude_code/llm/usage.py +1 -9
  56. klaude_code/protocol/events.py +2 -2
  57. klaude_code/protocol/message.py +3 -2
  58. klaude_code/protocol/model.py +34 -2
  59. klaude_code/protocol/op.py +13 -0
  60. klaude_code/protocol/op_handler.py +5 -0
  61. klaude_code/protocol/sub_agent/AGENTS.md +5 -5
  62. klaude_code/protocol/sub_agent/__init__.py +13 -34
  63. klaude_code/protocol/sub_agent/explore.py +7 -34
  64. klaude_code/protocol/sub_agent/image_gen.py +3 -74
  65. klaude_code/protocol/sub_agent/task.py +3 -47
  66. klaude_code/protocol/sub_agent/web.py +8 -52
  67. klaude_code/protocol/tools.py +2 -0
  68. klaude_code/session/session.py +80 -22
  69. klaude_code/session/store.py +0 -4
  70. klaude_code/skill/assets/deslop/SKILL.md +9 -0
  71. klaude_code/skill/system_skills.py +0 -20
  72. klaude_code/tui/command/fork_session_cmd.py +5 -2
  73. klaude_code/tui/command/resume_cmd.py +9 -2
  74. klaude_code/tui/command/sub_agent_model_cmd.py +85 -18
  75. klaude_code/tui/components/assistant.py +0 -26
  76. klaude_code/tui/components/bash_syntax.py +4 -0
  77. klaude_code/tui/components/command_output.py +3 -1
  78. klaude_code/tui/components/developer.py +3 -0
  79. klaude_code/tui/components/diffs.py +4 -209
  80. klaude_code/tui/components/errors.py +4 -0
  81. klaude_code/tui/components/mermaid_viewer.py +2 -2
  82. klaude_code/tui/components/metadata.py +0 -3
  83. klaude_code/tui/components/rich/markdown.py +120 -87
  84. klaude_code/tui/components/rich/status.py +2 -2
  85. klaude_code/tui/components/rich/theme.py +11 -6
  86. klaude_code/tui/components/sub_agent.py +2 -46
  87. klaude_code/tui/components/thinking.py +0 -33
  88. klaude_code/tui/components/tools.py +65 -21
  89. klaude_code/tui/components/user_input.py +2 -0
  90. klaude_code/tui/input/images.py +21 -18
  91. klaude_code/tui/input/key_bindings.py +2 -2
  92. klaude_code/tui/input/prompt_toolkit.py +49 -49
  93. klaude_code/tui/machine.py +29 -47
  94. klaude_code/tui/renderer.py +48 -33
  95. klaude_code/tui/runner.py +2 -1
  96. klaude_code/tui/terminal/image.py +27 -34
  97. klaude_code/ui/common.py +0 -70
  98. {klaude_code-2.8.1.dist-info → klaude_code-2.9.1.dist-info}/METADATA +3 -6
  99. {klaude_code-2.8.1.dist-info → klaude_code-2.9.1.dist-info}/RECORD +103 -99
  100. klaude_code/core/tool/sub_agent_tool.py +0 -126
  101. klaude_code/llm/bedrock/__init__.py +0 -3
  102. klaude_code/llm/openai_compatible/tool_call_accumulator.py +0 -108
  103. klaude_code/tui/components/rich/searchable_text.py +0 -68
  104. /klaude_code/llm/{bedrock → bedrock_anthropic}/client.py +0 -0
  105. /klaude_code/llm/{responses → openai_responses}/__init__.py +0 -0
  106. {klaude_code-2.8.1.dist-info → klaude_code-2.9.1.dist-info}/WHEEL +0 -0
  107. {klaude_code-2.8.1.dist-info → klaude_code-2.9.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,205 @@
1
+ """Task tool implementation for running sub-agents by type."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from typing import Any, cast
8
+
9
+ from klaude_code.core.tool.context import ToolContext
10
+ from klaude_code.core.tool.tool_abc import ToolABC, ToolConcurrencyPolicy, ToolMetadata, load_desc
11
+ from klaude_code.core.tool.tool_registry import register
12
+ from klaude_code.protocol import llm_param, message, model, tools
13
+ from klaude_code.protocol.sub_agent import get_sub_agent_profile, iter_sub_agent_profiles
14
+ from klaude_code.session.session import Session
15
+
16
+ TASK_TYPE_TO_SUB_AGENT: dict[str, str] = {
17
+ "general-purpose": "Task",
18
+ "explore": "Explore",
19
+ "web": "Web",
20
+ }
21
+
22
+
23
+ def _task_description() -> str:
24
+ summaries: dict[str, str] = {}
25
+ for profile in iter_sub_agent_profiles():
26
+ if profile.invoker_type:
27
+ summaries[profile.invoker_type] = profile.invoker_summary.strip()
28
+
29
+ type_lines: list[str] = []
30
+ for invoker_type in TASK_TYPE_TO_SUB_AGENT:
31
+ summary = summaries.get(invoker_type, "")
32
+ if summary:
33
+ type_lines.append(f"- {invoker_type}: {summary}")
34
+ else:
35
+ type_lines.append(f"- {invoker_type}")
36
+
37
+ types_section = "\n".join(type_lines) if type_lines else "- general-purpose"
38
+
39
+ return load_desc(Path(__file__).parent / "task.md", {"types_section": types_section})
40
+
41
+
42
+ TASK_SCHEMA = llm_param.ToolSchema(
43
+ name=tools.TASK,
44
+ type="function",
45
+ description=_task_description(),
46
+ parameters={
47
+ "type": "object",
48
+ "properties": {
49
+ "type": {
50
+ "type": "string",
51
+ "enum": list(TASK_TYPE_TO_SUB_AGENT.keys()),
52
+ "description": "Sub-agent type selector.",
53
+ },
54
+ "description": {
55
+ "type": "string",
56
+ "description": "A short (3-5 word) description of the task.",
57
+ },
58
+ "prompt": {
59
+ "type": "string",
60
+ "description": "The task for the agent to perform.",
61
+ },
62
+ "output_schema": {
63
+ "type": "object",
64
+ "description": "Optional JSON Schema for structured output.",
65
+ },
66
+ "resume": {
67
+ "type": "string",
68
+ "description": "Optional agent ID to resume from.",
69
+ },
70
+ },
71
+ "required": ["description", "prompt"],
72
+ "additionalProperties": False,
73
+ },
74
+ )
75
+
76
+
77
+ @register(tools.TASK)
78
+ class TaskTool(ToolABC):
79
+ """Run a sub-agent based on the requested type."""
80
+
81
+ @classmethod
82
+ def metadata(cls) -> ToolMetadata:
83
+ return ToolMetadata(concurrency_policy=ToolConcurrencyPolicy.CONCURRENT, has_side_effects=True)
84
+
85
+ @classmethod
86
+ def schema(cls) -> llm_param.ToolSchema:
87
+ return TASK_SCHEMA
88
+
89
+ @classmethod
90
+ async def call(cls, arguments: str, context: ToolContext) -> message.ToolResultMessage:
91
+ try:
92
+ args = json.loads(arguments)
93
+ except json.JSONDecodeError as exc:
94
+ return message.ToolResultMessage(status="error", output_text=f"Invalid JSON arguments: {exc}")
95
+
96
+ if not isinstance(args, dict):
97
+ return message.ToolResultMessage(status="error", output_text="Invalid arguments: expected object")
98
+
99
+ typed_args = cast(dict[str, Any], args)
100
+
101
+ runner = context.run_subtask
102
+ if runner is None:
103
+ return message.ToolResultMessage(status="error", output_text="No subtask runner available in this context")
104
+
105
+ description = str(typed_args.get("description") or "")
106
+
107
+ resume_raw = typed_args.get("resume")
108
+ resume_session_id: str | None = None
109
+ resume_sub_agent_type: str | None = None
110
+ if isinstance(resume_raw, str) and resume_raw.strip():
111
+ try:
112
+ resume_session_id = Session.resolve_sub_agent_session_id(resume_raw)
113
+ except ValueError as exc:
114
+ return message.ToolResultMessage(status="error", output_text=str(exc))
115
+
116
+ try:
117
+ resume_session = Session.load(resume_session_id)
118
+ except (OSError, ValueError, json.JSONDecodeError) as exc:
119
+ return message.ToolResultMessage(
120
+ status="error",
121
+ output_text=f"Failed to resume sub-agent session '{resume_session_id}': {exc}",
122
+ )
123
+
124
+ if resume_session.sub_agent_state is None:
125
+ return message.ToolResultMessage(
126
+ status="error",
127
+ output_text=f"Invalid resume id '{resume_session_id}': target session is not a sub-agent session",
128
+ )
129
+
130
+ resume_sub_agent_type = resume_session.sub_agent_state.sub_agent_type
131
+ if resume_sub_agent_type == tools.IMAGE_GEN:
132
+ return message.ToolResultMessage(
133
+ status="error",
134
+ output_text="This resume id belongs to ImageGen; use the ImageGen tool to resume it.",
135
+ )
136
+
137
+ claims = context.sub_agent_resume_claims
138
+ if claims is not None:
139
+ ok = await claims.claim(resume_session_id)
140
+ if not ok:
141
+ return message.ToolResultMessage(
142
+ status="error",
143
+ output_text=(
144
+ "Duplicate sub-agent resume in the same response: "
145
+ f"resume='{resume_raw.strip()}' (resolved='{resume_session_id[:7]}…'). "
146
+ "Merge into a single call or resume in a later turn."
147
+ ),
148
+ )
149
+
150
+ type_raw = typed_args.get("type")
151
+ requested_type = str(type_raw).strip() if isinstance(type_raw, str) else ""
152
+
153
+ if resume_session_id and not requested_type:
154
+ sub_agent_type = resume_sub_agent_type or TASK_TYPE_TO_SUB_AGENT["general-purpose"]
155
+ else:
156
+ if not requested_type:
157
+ requested_type = "general-purpose"
158
+ sub_agent_type = TASK_TYPE_TO_SUB_AGENT.get(requested_type)
159
+ if sub_agent_type is None:
160
+ return message.ToolResultMessage(
161
+ status="error",
162
+ output_text=f"Unknown Task type '{requested_type}'.",
163
+ )
164
+
165
+ if resume_session_id and resume_sub_agent_type and resume_sub_agent_type != sub_agent_type:
166
+ return message.ToolResultMessage(
167
+ status="error",
168
+ output_text=(
169
+ "Invalid resume id: sub-agent type mismatch. "
170
+ f"Expected '{sub_agent_type}', got '{resume_sub_agent_type}'."
171
+ ),
172
+ )
173
+
174
+ try:
175
+ profile = get_sub_agent_profile(sub_agent_type)
176
+ except KeyError as exc:
177
+ return message.ToolResultMessage(status="error", output_text=str(exc))
178
+
179
+ sub_agent_prompt = profile.prompt_builder(typed_args)
180
+
181
+ output_schema_raw = typed_args.get("output_schema")
182
+ output_schema = cast(dict[str, Any], output_schema_raw) if isinstance(output_schema_raw, dict) else None
183
+
184
+ try:
185
+ result = await runner(
186
+ model.SubAgentState(
187
+ sub_agent_type=profile.name,
188
+ sub_agent_desc=description,
189
+ sub_agent_prompt=sub_agent_prompt,
190
+ resume=resume_session_id,
191
+ output_schema=output_schema,
192
+ generation=None,
193
+ ),
194
+ context.record_sub_agent_session_id,
195
+ context.register_sub_agent_metadata_getter,
196
+ )
197
+ except Exception as exc:
198
+ return message.ToolResultMessage(status="error", output_text=f"Failed to run subtask: {exc}")
199
+
200
+ return message.ToolResultMessage(
201
+ status="success" if not result.error else "error",
202
+ output_text=result.task_result,
203
+ ui_extra=model.SessionIdUIExtra(session_id=result.session_id),
204
+ task_metadata=result.task_metadata,
205
+ )
@@ -1,10 +1,8 @@
1
1
  from collections.abc import Callable
2
2
  from typing import TypeVar
3
3
 
4
- from klaude_code.core.tool.sub_agent_tool import SubAgentTool
5
4
  from klaude_code.core.tool.tool_abc import ToolABC
6
5
  from klaude_code.protocol import llm_param
7
- from klaude_code.protocol.sub_agent import iter_sub_agent_profiles
8
6
 
9
7
  _REGISTRY: dict[str, type[ToolABC]] = {}
10
8
 
@@ -19,20 +17,6 @@ def register(name: str) -> Callable[[type[T]], type[T]]:
19
17
  return _decorator
20
18
 
21
19
 
22
- def _register_sub_agent_tools() -> None:
23
- """Automatically register all sub-agent tools based on their profiles."""
24
- for profile in iter_sub_agent_profiles():
25
- tool_cls = SubAgentTool.for_profile(profile)
26
- _REGISTRY[profile.name] = tool_cls
27
-
28
-
29
- _register_sub_agent_tools()
30
-
31
-
32
- def list_tools() -> list[str]:
33
- return list(_REGISTRY.keys())
34
-
35
-
36
20
  def get_tool_schemas(tool_names: list[str]) -> list[llm_param.ToolSchema]:
37
21
  schemas: list[llm_param.ToolSchema] = []
38
22
  for tool_name in tool_names:
klaude_code/core/turn.py CHANGED
@@ -243,7 +243,7 @@ class TurnExecutor:
243
243
  )
244
244
 
245
245
  # ImageGen per-call overrides (tool-level `generation` parameters)
246
- if ctx.sub_agent_state is not None and ctx.sub_agent_state.sub_agent_type == "ImageGen":
246
+ if ctx.sub_agent_state is not None and ctx.sub_agent_state.sub_agent_type == tools.IMAGE_GEN:
247
247
  call_param.modalities = ["image", "text"]
248
248
  generation = ctx.sub_agent_state.generation or {}
249
249
  image_config = llm_param.ImageConfig()
@@ -18,9 +18,10 @@ from anthropic.types.beta.beta_tool_use_block_param import BetaToolUseBlockParam
18
18
  from anthropic.types.beta.beta_url_image_source_param import BetaURLImageSourceParam
19
19
 
20
20
  from klaude_code.const import EMPTY_TOOL_OUTPUT_MESSAGE
21
- from klaude_code.llm.image import parse_data_url
21
+ from klaude_code.llm.image import image_file_to_data_url, parse_data_url
22
22
  from klaude_code.llm.input_common import (
23
23
  DeveloperAttachment,
24
+ ImagePart,
24
25
  attach_developer_messages,
25
26
  merge_reminder_text,
26
27
  split_thinking_parts,
@@ -36,8 +37,8 @@ _INLINE_IMAGE_MEDIA_TYPES: tuple[AllowedMediaType, ...] = (
36
37
  )
37
38
 
38
39
 
39
- def _image_part_to_block(image: message.ImageURLPart) -> BetaImageBlockParam:
40
- url = image.url
40
+ def _image_part_to_block(image: ImagePart) -> BetaImageBlockParam:
41
+ url = image_file_to_data_url(image) if isinstance(image, message.ImageFilePart) else image.url
41
42
  if url.startswith("data:"):
42
43
  media_type, base64_payload, _ = parse_data_url(url)
43
44
  if media_type not in _INLINE_IMAGE_MEDIA_TYPES:
@@ -64,7 +65,7 @@ def _user_message_to_message(
64
65
  for part in msg.parts:
65
66
  if isinstance(part, message.TextPart):
66
67
  blocks.append(cast(BetaTextBlockParam, {"type": "text", "text": part.text}))
67
- elif isinstance(part, message.ImageURLPart):
68
+ elif isinstance(part, (message.ImageURLPart, message.ImageFilePart)):
68
69
  blocks.append(_image_part_to_block(part))
69
70
  if attachment.text:
70
71
  blocks.append(cast(BetaTextBlockParam, {"type": "text", "text": attachment.text}))
@@ -86,7 +87,7 @@ def _tool_message_to_block(
86
87
  attachment.text,
87
88
  )
88
89
  tool_content.append(cast(BetaTextBlockParam, {"type": "text", "text": merged_text}))
89
- for image in [part for part in msg.parts if isinstance(part, message.ImageURLPart)]:
90
+ for image in [part for part in msg.parts if isinstance(part, (message.ImageURLPart, message.ImageFilePart))]:
90
91
  tool_content.append(_image_part_to_block(image))
91
92
  for image in attachment.images:
92
93
  tool_content.append(_image_part_to_block(image))
@@ -6,9 +6,10 @@ from binascii import Error as BinasciiError
6
6
  from typing import Any, TypedDict
7
7
 
8
8
  from klaude_code.const import EMPTY_TOOL_OUTPUT_MESSAGE
9
- from klaude_code.llm.image import assistant_image_to_data_url, parse_data_url
9
+ from klaude_code.llm.image import assistant_image_to_data_url, image_file_to_data_url, parse_data_url
10
10
  from klaude_code.llm.input_common import (
11
11
  DeveloperAttachment,
12
+ ImagePart,
12
13
  attach_developer_messages,
13
14
  merge_reminder_text,
14
15
  split_thinking_parts,
@@ -66,9 +67,9 @@ def _data_url_to_inline_data(url: str) -> InlineData:
66
67
  return InlineData(mimeType=media_type, data=base64.b64encode(decoded).decode("ascii"))
67
68
 
68
69
 
69
- def _image_part_to_part(image: message.ImageURLPart) -> Part:
70
- """Convert ImageURLPart to Part dict."""
71
- url = image.url
70
+ def _image_part_to_part(image: ImagePart) -> Part:
71
+ """Convert ImageURLPart or ImageFilePart to Part dict."""
72
+ url = image_file_to_data_url(image) if isinstance(image, message.ImageFilePart) else image.url
72
73
  if url.startswith("data:"):
73
74
  return Part(inlineData=_data_url_to_inline_data(url))
74
75
  # For non-data URLs, best-effort using inline_data format
@@ -81,7 +82,7 @@ def _user_message_to_content(msg: message.UserMessage, attachment: DeveloperAtta
81
82
  for part in msg.parts:
82
83
  if isinstance(part, message.TextPart):
83
84
  parts.append(Part(text=part.text))
84
- elif isinstance(part, message.ImageURLPart):
85
+ elif isinstance(part, (message.ImageURLPart, message.ImageFilePart)):
85
86
  parts.append(_image_part_to_part(part))
86
87
  if attachment.text:
87
88
  parts.append(Part(text=attachment.text))
@@ -108,14 +109,20 @@ def _tool_messages_to_contents(
108
109
  )
109
110
  has_text = merged_text.strip() != ""
110
111
 
111
- images = [part for part in msg.parts if isinstance(part, message.ImageURLPart)] + attachment.images
112
+ images: list[ImagePart] = [
113
+ part for part in msg.parts if isinstance(part, (message.ImageURLPart, message.ImageFilePart))
114
+ ]
115
+ images.extend(attachment.images)
112
116
  image_parts: list[Part] = []
113
117
  function_response_parts: list[dict[str, Any]] = []
114
118
 
115
119
  for image in images:
116
120
  try:
117
121
  image_parts.append(_image_part_to_part(image))
118
- if image.url.startswith("data:"):
122
+ if isinstance(image, message.ImageFilePart):
123
+ inline_data = _data_url_to_inline_data(image_file_to_data_url(image))
124
+ function_response_parts.append({"inlineData": inline_data})
125
+ elif image.url.startswith("data:"):
119
126
  inline_data = _data_url_to_inline_data(image.url)
120
127
  function_response_parts.append({"inlineData": inline_data})
121
128
  except ValueError:
@@ -0,0 +1,3 @@
1
+ from klaude_code.llm.bedrock_anthropic.client import BedrockClient
2
+
3
+ __all__ = ["BedrockClient"]
@@ -3,7 +3,6 @@
3
3
  # pyright: reportUnknownArgumentType=false
4
4
  # pyright: reportAttributeAccessIssue=false
5
5
 
6
- import json
7
6
  from base64 import b64encode
8
7
  from collections.abc import AsyncGenerator, AsyncIterator
9
8
  from typing import Any, cast, override
@@ -33,6 +32,7 @@ from klaude_code.llm.client import LLMClientABC, LLMStreamABC
33
32
  from klaude_code.llm.google.input import convert_history_to_contents, convert_tool_schema
34
33
  from klaude_code.llm.image import save_assistant_image
35
34
  from klaude_code.llm.input_common import apply_config_defaults
35
+ from klaude_code.llm.json_stable import dumps_canonical_json
36
36
  from klaude_code.llm.registry import register
37
37
  from klaude_code.llm.stream_parts import (
38
38
  append_text_part,
@@ -122,6 +122,8 @@ def _usage_from_metadata(
122
122
  if usage is None:
123
123
  return None
124
124
 
125
+ # In Gemini usage metadata, prompt_token_count represents the full prompt tokens
126
+ # (including cached tokens). cached_content_token_count is a subset of prompt tokens.
125
127
  cached = usage.cached_content_token_count or 0
126
128
  prompt = usage.prompt_token_count or 0
127
129
  response = usage.candidates_token_count or 0
@@ -136,10 +138,10 @@ def _usage_from_metadata(
136
138
 
137
139
  total = usage.total_token_count
138
140
  if total is None:
139
- total = prompt + cached + response + thoughts
141
+ total = prompt + response + thoughts
140
142
 
141
143
  return model.Usage(
142
- input_tokens=prompt + cached,
144
+ input_tokens=prompt,
143
145
  cached_tokens=cached,
144
146
  output_tokens=response + thoughts,
145
147
  reasoning_tokens=thoughts,
@@ -385,7 +387,7 @@ async def parse_google_stream(
385
387
  args_obj = function_call.args
386
388
  if args_obj is not None:
387
389
  # Add ToolCallPart, then ThinkingSignaturePart after it
388
- state.append_tool_call(call_id, name, json.dumps(args_obj, ensure_ascii=False))
390
+ state.append_tool_call(call_id, name, dumps_canonical_json(args_obj))
389
391
  encoded_sig = _encode_thought_signature(thought_signature)
390
392
  if encoded_sig:
391
393
  state.append_thinking_signature(encoded_sig)
@@ -400,7 +402,7 @@ async def parse_google_stream(
400
402
  will_continue = function_call.will_continue
401
403
  if will_continue is False and call_id in partial_args_by_call and call_id not in completed_tool_items:
402
404
  # Add ToolCallPart, then ThinkingSignaturePart after it
403
- state.append_tool_call(call_id, name, json.dumps(partial_args_by_call[call_id], ensure_ascii=False))
405
+ state.append_tool_call(call_id, name, dumps_canonical_json(partial_args_by_call[call_id]))
404
406
  stored_sig = started_tool_calls.get(call_id, (name, None))[1]
405
407
  encoded_stored_sig = _encode_thought_signature(stored_sig)
406
408
  if encoded_stored_sig:
@@ -412,7 +414,7 @@ async def parse_google_stream(
412
414
  if call_id in completed_tool_items:
413
415
  continue
414
416
  args = partial_args_by_call.get(call_id, {})
415
- state.append_tool_call(call_id, name, json.dumps(args, ensure_ascii=False))
417
+ state.append_tool_call(call_id, name, dumps_canonical_json(args))
416
418
  encoded_stored_sig = _encode_thought_signature(stored_sig)
417
419
  if encoded_stored_sig:
418
420
  state.append_thinking_signature(encoded_stored_sig)
@@ -6,18 +6,20 @@
6
6
  import json
7
7
  from base64 import b64decode
8
8
  from binascii import Error as BinasciiError
9
- from typing import Any
9
+ from typing import Any, cast
10
10
 
11
11
  from google.genai import types
12
12
 
13
13
  from klaude_code.const import EMPTY_TOOL_OUTPUT_MESSAGE
14
- from klaude_code.llm.image import assistant_image_to_data_url, parse_data_url
14
+ from klaude_code.llm.image import assistant_image_to_data_url, image_file_to_data_url, parse_data_url
15
15
  from klaude_code.llm.input_common import (
16
16
  DeveloperAttachment,
17
+ ImagePart,
17
18
  attach_developer_messages,
18
19
  merge_reminder_text,
19
20
  split_thinking_parts,
20
21
  )
22
+ from klaude_code.llm.json_stable import canonicalize_json
21
23
  from klaude_code.protocol import llm_param, message
22
24
 
23
25
 
@@ -26,16 +28,16 @@ def _data_url_to_blob(url: str) -> types.Blob:
26
28
  return types.Blob(data=decoded, mime_type=media_type)
27
29
 
28
30
 
29
- def _image_part_to_part(image: message.ImageURLPart) -> types.Part:
30
- url = image.url
31
+ def _image_part_to_part(image: ImagePart) -> types.Part:
32
+ url = image_file_to_data_url(image) if isinstance(image, message.ImageFilePart) else image.url
31
33
  if url.startswith("data:"):
32
34
  return types.Part(inline_data=_data_url_to_blob(url))
33
35
  # Best-effort: Gemini supports file URIs, and may accept public HTTPS URLs.
34
36
  return types.Part(file_data=types.FileData(file_uri=url))
35
37
 
36
38
 
37
- def _image_part_to_function_response_part(image: message.ImageURLPart) -> types.FunctionResponsePart:
38
- url = image.url
39
+ def _image_part_to_function_response_part(image: ImagePart) -> types.FunctionResponsePart:
40
+ url = image_file_to_data_url(image) if isinstance(image, message.ImageFilePart) else image.url
39
41
  if url.startswith("data:"):
40
42
  media_type, _, decoded = parse_data_url(url)
41
43
  return types.FunctionResponsePart.from_bytes(data=decoded, mime_type=media_type)
@@ -47,7 +49,7 @@ def _user_message_to_content(msg: message.UserMessage, attachment: DeveloperAtta
47
49
  for part in msg.parts:
48
50
  if isinstance(part, message.TextPart):
49
51
  parts.append(types.Part(text=part.text))
50
- elif isinstance(part, message.ImageURLPart):
52
+ elif isinstance(part, (message.ImageURLPart, message.ImageFilePart)):
51
53
  parts.append(_image_part_to_part(part))
52
54
  if attachment.text:
53
55
  parts.append(types.Part(text=attachment.text))
@@ -73,7 +75,10 @@ def _tool_messages_to_contents(
73
75
  )
74
76
  has_text = merged_text.strip() != ""
75
77
 
76
- images = [part for part in msg.parts if isinstance(part, message.ImageURLPart)] + attachment.images
78
+ images: list[ImagePart] = [
79
+ part for part in msg.parts if isinstance(part, (message.ImageURLPart, message.ImageFilePart))
80
+ ]
81
+ images.extend(attachment.images)
77
82
  image_parts: list[types.Part] = []
78
83
  function_response_parts: list[types.FunctionResponsePart] = []
79
84
 
@@ -155,11 +160,14 @@ def _assistant_message_to_content(msg: message.AssistantMessage, model_name: str
155
160
  args: dict[str, Any]
156
161
  if part.arguments_json:
157
162
  try:
158
- args = json.loads(part.arguments_json)
163
+ loaded: object = json.loads(part.arguments_json)
159
164
  except json.JSONDecodeError:
160
- args = {"_raw": part.arguments_json}
165
+ loaded = {"_raw": part.arguments_json}
161
166
  else:
162
- args = {}
167
+ loaded = {}
168
+
169
+ canonical = canonicalize_json(loaded)
170
+ args = cast(dict[str, Any], canonical) if isinstance(canonical, dict) else {"_value": canonical}
163
171
  parts.append(
164
172
  types.Part(
165
173
  function_call=types.FunctionCall(id=part.call_id, name=part.tool_name, args=args),
@@ -223,7 +231,7 @@ def convert_tool_schema(tools: list[llm_param.ToolSchema] | None) -> list[types.
223
231
  types.FunctionDeclaration(
224
232
  name=tool.name,
225
233
  description=tool.description,
226
- parameters_json_schema=tool.parameters,
234
+ parameters_json_schema=canonicalize_json(tool.parameters),
227
235
  )
228
236
  for tool in tools
229
237
  ]
klaude_code/llm/image.py CHANGED
@@ -99,21 +99,12 @@ def save_assistant_image(
99
99
  )
100
100
 
101
101
 
102
- def assistant_image_to_data_url(image: message.ImageFilePart) -> str:
103
- """Load an assistant image from disk and encode it as a base64 data URL.
104
-
105
- This is primarily used for multi-turn image editing, where providers require
106
- sending the previous assistant message (including images) back to the model.
107
- """
102
+ def image_file_to_data_url(image: message.ImageFilePart) -> str:
103
+ """Load an image file from disk and encode it as a base64 data URL."""
108
104
 
109
105
  file_path = Path(image.file_path)
110
106
  decoded = file_path.read_bytes()
111
107
 
112
- if len(decoded) > IMAGE_OUTPUT_MAX_BYTES:
113
- decoded_mb = len(decoded) / (1024 * 1024)
114
- limit_mb = IMAGE_OUTPUT_MAX_BYTES / (1024 * 1024)
115
- raise ValueError(f"Assistant image size ({decoded_mb:.2f}MB) exceeds limit ({limit_mb:.2f}MB)")
116
-
117
108
  mime_type = image.mime_type
118
109
  if not mime_type:
119
110
  guessed, _ = mimetypes.guess_type(str(file_path))
@@ -121,3 +112,19 @@ def assistant_image_to_data_url(image: message.ImageFilePart) -> str:
121
112
 
122
113
  encoded = b64encode(decoded).decode("ascii")
123
114
  return f"data:{mime_type};base64,{encoded}"
115
+
116
+
117
+ def assistant_image_to_data_url(image: message.ImageFilePart) -> str:
118
+ """Load an assistant image from disk and encode it as a base64 data URL.
119
+
120
+ This is primarily used for multi-turn image editing, where providers require
121
+ sending the previous assistant message (including images) back to the model.
122
+ """
123
+
124
+ file_path = Path(image.file_path)
125
+ if file_path.stat().st_size > IMAGE_OUTPUT_MAX_BYTES:
126
+ size_mb = file_path.stat().st_size / (1024 * 1024)
127
+ limit_mb = IMAGE_OUTPUT_MAX_BYTES / (1024 * 1024)
128
+ raise ValueError(f"Assistant image size ({size_mb:.2f}MB) exceeds limit ({limit_mb:.2f}MB)")
129
+
130
+ return image_file_to_data_url(image)
@@ -8,26 +8,29 @@ if TYPE_CHECKING:
8
8
  from klaude_code.protocol.llm_param import LLMCallParameter, LLMConfigParameter
9
9
 
10
10
  from klaude_code.const import EMPTY_TOOL_OUTPUT_MESSAGE
11
+ from klaude_code.llm.image import image_file_to_data_url
11
12
  from klaude_code.protocol import message
12
13
 
14
+ ImagePart = message.ImageURLPart | message.ImageFilePart
13
15
 
14
- def _empty_image_parts() -> list[message.ImageURLPart]:
16
+
17
+ def _empty_image_parts() -> list[ImagePart]:
15
18
  return []
16
19
 
17
20
 
18
21
  @dataclass
19
22
  class DeveloperAttachment:
20
23
  text: str = ""
21
- images: list[message.ImageURLPart] = field(default_factory=_empty_image_parts)
24
+ images: list[ImagePart] = field(default_factory=_empty_image_parts)
22
25
 
23
26
 
24
- def _extract_developer_content(msg: message.DeveloperMessage) -> tuple[str, list[message.ImageURLPart]]:
27
+ def _extract_developer_content(msg: message.DeveloperMessage) -> tuple[str, list[ImagePart]]:
25
28
  text_parts: list[str] = []
26
- images: list[message.ImageURLPart] = []
29
+ images: list[ImagePart] = []
27
30
  for part in msg.parts:
28
31
  if isinstance(part, message.TextPart):
29
32
  text_parts.append(part.text + "\n")
30
- elif isinstance(part, message.ImageURLPart):
33
+ elif isinstance(part, (message.ImageURLPart, message.ImageFilePart)):
31
34
  images.append(part)
32
35
  return "".join(text_parts), images
33
36
 
@@ -87,10 +90,15 @@ def build_chat_content_parts(
87
90
  parts.append({"type": "text", "text": part.text})
88
91
  elif isinstance(part, message.ImageURLPart):
89
92
  parts.append({"type": "image_url", "image_url": {"url": part.url}})
93
+ elif isinstance(part, message.ImageFilePart):
94
+ parts.append({"type": "image_url", "image_url": {"url": image_file_to_data_url(part)}})
90
95
  if attachment.text:
91
96
  parts.append({"type": "text", "text": attachment.text})
92
97
  for image in attachment.images:
93
- parts.append({"type": "image_url", "image_url": {"url": image.url}})
98
+ if isinstance(image, message.ImageFilePart):
99
+ parts.append({"type": "image_url", "image_url": {"url": image_file_to_data_url(image)}})
100
+ else:
101
+ parts.append({"type": "image_url", "image_url": {"url": image.url}})
94
102
  if not parts:
95
103
  parts.append({"type": "text", "text": ""})
96
104
  return parts
@@ -141,6 +149,14 @@ def build_assistant_common_fields(
141
149
  }
142
150
  for tc in tool_calls
143
151
  ]
152
+
153
+ thinking_parts = [part for part in msg.parts if isinstance(part, message.ThinkingTextPart)]
154
+ if thinking_parts:
155
+ thinking_text = "".join(part.text for part in thinking_parts)
156
+ reasoning_field = next((p.reasoning_field for p in thinking_parts if p.reasoning_field), None)
157
+ if thinking_text and reasoning_field:
158
+ result[reasoning_field] = thinking_text
159
+
144
160
  return result
145
161
 
146
162
 
@@ -177,4 +193,14 @@ def apply_config_defaults(param: "LLMCallParameter", config: "LLMConfigParameter
177
193
  param.verbosity = config.verbosity
178
194
  if param.thinking is None:
179
195
  param.thinking = config.thinking
196
+ if param.modalities is None:
197
+ param.modalities = config.modalities
198
+ if param.image_config is None:
199
+ param.image_config = config.image_config
200
+ elif config.image_config is not None:
201
+ # Merge field-level: param overrides config defaults
202
+ if param.image_config.aspect_ratio is None:
203
+ param.image_config.aspect_ratio = config.image_config.aspect_ratio
204
+ if param.image_config.image_size is None:
205
+ param.image_config.image_size = config.image_config.image_size
180
206
  return param
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from collections.abc import Mapping
5
+ from typing import cast
6
+
7
+ type JsonValue = str | int | float | bool | None | list["JsonValue"] | dict[str, "JsonValue"]
8
+
9
+
10
+ def canonicalize_json(value: object) -> JsonValue:
11
+ """Return a JSON-equivalent value with stable dict key ordering.
12
+
13
+ This is used to make provider payload serialization stable across runs so that
14
+ prefix caching has a better chance to hit.
15
+ """
16
+
17
+ if isinstance(value, Mapping):
18
+ items: list[tuple[str, JsonValue]] = []
19
+ for key, item_value in cast(Mapping[object, object], value).items():
20
+ items.append((str(key), canonicalize_json(item_value)))
21
+ items.sort(key=lambda kv: kv[0])
22
+ return {k: v for k, v in items}
23
+
24
+ if isinstance(value, list):
25
+ return [canonicalize_json(v) for v in cast(list[object], value)]
26
+
27
+ if isinstance(value, tuple):
28
+ return [canonicalize_json(v) for v in cast(tuple[object, ...], value)]
29
+
30
+ return cast(JsonValue, value)
31
+
32
+
33
+ def dumps_canonical_json(value: object) -> str:
34
+ """Dump JSON with stable key order and no insignificant whitespace."""
35
+
36
+ canonical = canonicalize_json(value)
37
+ return json.dumps(canonical, ensure_ascii=False, separators=(",", ":"), sort_keys=False)