bone-agent 1.3.3 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +17 -0
  2. package/config.yaml.example +5 -2
  3. package/package.json +1 -1
  4. package/prompts/main/communication_style.md +1 -1
  5. package/prompts/main/dream.md +23 -9
  6. package/prompts/main/skills.md +3 -0
  7. package/prompts/micro/communication_style.md +1 -1
  8. package/prompts/micro/skills.md +1 -0
  9. package/src/core/agentic.py +138 -38
  10. package/src/core/chat_manager.py +19 -6
  11. package/src/core/config_manager.py +8 -1
  12. package/src/core/cron.py +0 -4
  13. package/src/core/metadata.py +75 -0
  14. package/src/core/skills.py +463 -0
  15. package/src/core/sub_agent.py +93 -43
  16. package/src/core/tool_feedback.py +87 -76
  17. package/src/llm/client.py +7 -2
  18. package/src/llm/codex_provider.py +350 -0
  19. package/src/llm/config.py +46 -2
  20. package/src/llm/prompts.py +12 -7
  21. package/src/llm/providers.py +3 -1
  22. package/src/llm/token_tracker.py +15 -0
  23. package/src/tools/__init__.py +24 -85
  24. package/src/tools/create_file.py +1 -1
  25. package/src/tools/directory.py +1 -1
  26. package/src/tools/edit.py +5 -1
  27. package/src/tools/file_reader.py +1 -1
  28. package/src/tools/helpers/__init__.py +1 -7
  29. package/src/tools/helpers/base.py +65 -16
  30. package/src/tools/helpers/loader.py +2 -88
  31. package/src/tools/helpers/path_resolver.py +54 -3
  32. package/src/tools/helpers/plugin_manifest.py +99 -70
  33. package/src/tools/review_sub_agent.py +2 -1
  34. package/src/tools/rg_search.py +24 -7
  35. package/src/tools/search_plugins.py +140 -72
  36. package/src/tools/shell.py +3 -3
  37. package/src/ui/commands.py +355 -33
  38. package/src/ui/displays.py +26 -1
  39. package/src/ui/main.py +0 -4
  40. package/src/ui/tool_confirmation.py +16 -5
  41. package/src/utils/editor.py +88 -39
  42. package/src/utils/settings.py +6 -2
  43. package/src/utils/validation.py +10 -0
@@ -0,0 +1,350 @@
1
+ """Codex provider adapter.
2
+
3
+ Codex is intentionally isolated from the normal provider handlers because it is
4
+ not a Chat Completions-compatible API. It targets the ChatGPT Codex Responses
5
+ backend and adapts that protocol back into vmCode's OpenAI-style internal shape.
6
+ """
7
+
8
+ import copy
9
+ import hashlib
10
+ import json
11
+ from typing import Any, Dict, Iterator, Optional
12
+
13
+ import requests
14
+
15
+ from exceptions import LLMResponseError
16
+
17
+
18
+ class CodexResponsesHandler:
19
+ """Adapter for the ChatGPT Codex Responses backend.
20
+
21
+ Codex-specific behavior kept here:
22
+ - Uses `instructions` + `input` instead of Chat Completions `messages`.
23
+ - Always sends `stream: true`; the backend returns SSE even for logical
24
+ non-streaming agent calls.
25
+ - Stores `_responses_output` replay metadata so tool-call turns can be sent
26
+ back in Responses-native form while using `store: false`.
27
+ """
28
+
29
+ supports_sse_response_fallback = True
30
+
31
+ def build_headers(self, config: Dict[str, Any]) -> Dict[str, str]:
32
+ """Build request headers."""
33
+ headers = {"Content-Type": "application/json"}
34
+ if config.get("type") == "api" and config.get("api_key"):
35
+ headers["Authorization"] = f"Bearer {config['api_key']}"
36
+ if "headers_extra" in config:
37
+ headers.update(config["headers_extra"])
38
+ return headers
39
+
40
+ def build_payload(self, config: Dict[str, Any], messages: list,
41
+ tools: Optional[list] = None, stream: bool = True) -> Dict[str, Any]:
42
+ """Build request payload for Codex backend Responses API."""
43
+ system_parts = [m["content"] for m in messages if m.get("role") == "system"]
44
+ instructions = "\n".join(system_parts) if system_parts else "You are a helpful assistant."
45
+
46
+ codex_input = []
47
+ for m in messages:
48
+ if m.get("role") == "system":
49
+ continue
50
+ role = m.get("role", "user")
51
+ content = m.get("content", "")
52
+
53
+ if role == "assistant" and m.get("_responses_output"):
54
+ codex_input.extend(m.get("_responses_output") or [])
55
+ continue
56
+
57
+ if role == "assistant" and m.get("tool_calls"):
58
+ if content:
59
+ codex_input.append({
60
+ "role": "assistant",
61
+ "content": [{"type": "input_text", "text": content}]
62
+ })
63
+ for tool_call in m.get("tool_calls", []):
64
+ function = tool_call.get("function", {})
65
+ codex_input.append({
66
+ "type": "function_call",
67
+ "call_id": tool_call.get("id"),
68
+ "name": function.get("name", ""),
69
+ "arguments": function.get("arguments", "{}"),
70
+ })
71
+ continue
72
+
73
+ if role == "tool":
74
+ codex_input.append({
75
+ "type": "function_call_output",
76
+ "call_id": m.get("tool_call_id"),
77
+ "output": content,
78
+ })
79
+ continue
80
+
81
+ content_type = "output_text" if role == "assistant" else "input_text"
82
+ codex_input.append({
83
+ "role": role,
84
+ "content": [{"type": content_type, "text": content}]
85
+ })
86
+
87
+ payload = {
88
+ **config.get("payload", {}),
89
+ "instructions": instructions,
90
+ "input": codex_input,
91
+ "store": False,
92
+ "stream": True,
93
+ }
94
+
95
+ if "model" not in payload:
96
+ model_name = config.get("api_model") or config.get("model")
97
+ if model_name:
98
+ payload["model"] = model_name
99
+
100
+ if tools:
101
+ payload["tools"] = [self._convert_tool_to_responses(tool) for tool in tools]
102
+
103
+ if "prompt_cache_key" not in payload:
104
+ model = payload.get("model") or "unknown-model"
105
+ payload["prompt_cache_key"] = self._build_prompt_cache_key(
106
+ model=model,
107
+ instructions=instructions,
108
+ tools=payload.get("tools"),
109
+ )
110
+
111
+ if "temperature" not in payload and config.get("allow_temperature", True):
112
+ payload["temperature"] = config.get("default_temperature", 0.1)
113
+ if "top_p" not in payload and config.get("allow_top_p", True):
114
+ payload["top_p"] = config.get("default_top_p", 0.9)
115
+
116
+ return payload
117
+
118
+ def _build_prompt_cache_key(
119
+ self,
120
+ *,
121
+ model: str,
122
+ instructions: str,
123
+ tools: Optional[list] = None,
124
+ ) -> str:
125
+ """Build a stable prompt-cache key for the reusable Codex prefix."""
126
+ cache_scope = {
127
+ "model": model,
128
+ "instructions": instructions,
129
+ "tools": tools or [],
130
+ }
131
+ canonical = json.dumps(
132
+ cache_scope,
133
+ sort_keys=True,
134
+ separators=(",", ":"),
135
+ ensure_ascii=True,
136
+ )
137
+ cache_hash = hashlib.sha256(canonical.encode("utf-8")).hexdigest()[:24]
138
+ return f"bone-agent:{cache_hash}"
139
+
140
+ def parse_response(self, response_json: Dict[str, Any]) -> Dict[str, Any]:
141
+ """Parse Responses API output into Chat Completions format."""
142
+ return self._normalize_response(response_json)
143
+
144
+ def parse_sse_response(self, response_text: str) -> Dict[str, Any]:
145
+ """Parse a full SSE response body into Chat Completions format."""
146
+ completed_response = None
147
+ output_items = []
148
+
149
+ for raw_line in response_text.splitlines():
150
+ line = raw_line.strip()
151
+ if not line.startswith("data: "):
152
+ continue
153
+ data_str = line[6:]
154
+ if data_str == "[DONE]":
155
+ break
156
+ try:
157
+ data = json.loads(data_str)
158
+ except json.JSONDecodeError as e:
159
+ raise LLMResponseError(
160
+ "Failed to decode SSE response from Codex backend",
161
+ details={"original_error": str(e)}
162
+ )
163
+
164
+ if data.get("type") == "response.output_item.done":
165
+ item = data.get("item")
166
+ if item:
167
+ output_items.append(item)
168
+ continue
169
+
170
+ if data.get("type") == "response.completed":
171
+ completed_response = data.get("response")
172
+ break
173
+
174
+ if completed_response is None:
175
+ raise LLMResponseError(
176
+ "Codex backend returned streaming data without a completed response event"
177
+ )
178
+
179
+ if not completed_response.get("output") and output_items:
180
+ completed_response = dict(completed_response)
181
+ completed_response["output"] = output_items
182
+
183
+ return self._normalize_response(completed_response)
184
+
185
+ def parse_stream(self, response: requests.Response) -> Iterator[Dict[str, Any]]:
186
+ """Parse streaming Responses API."""
187
+ usage_data = None
188
+
189
+ for line in response.iter_lines():
190
+ if line:
191
+ line = line.decode('utf-8')
192
+
193
+ if line.startswith('data: '):
194
+ data_str = line[6:]
195
+ if data_str.strip() == '[DONE]':
196
+ break
197
+
198
+ try:
199
+ data = json.loads(data_str)
200
+
201
+ if 'error' in data:
202
+ error_msg = data.get('error', {}).get('message', 'Unknown streaming error')
203
+ raise LLMResponseError(
204
+ f"Streaming error: {error_msg}",
205
+ details={"error_data": data.get('error')}
206
+ )
207
+
208
+ event_type = data.get("type", "")
209
+
210
+ if event_type == "response.completed":
211
+ resp = data.get("response", {})
212
+ if "usage" in resp:
213
+ usage_data = self._normalize_usage(resp["usage"])
214
+
215
+ if event_type == "response.output_text.delta":
216
+ delta = data.get("delta", "")
217
+ if delta:
218
+ yield delta
219
+
220
+ except json.JSONDecodeError as e:
221
+ raise LLMResponseError(
222
+ f"Failed to decode streaming response",
223
+ details={"original_error": str(e)}
224
+ )
225
+
226
+ if usage_data:
227
+ yield {'__usage__': usage_data}
228
+
229
+ def _convert_tool_to_responses(self, tool: Dict[str, Any]) -> Dict[str, Any]:
230
+ """Convert Chat Completions tool schema to Responses/Codex schema."""
231
+ if tool.get("type") == "function" and "function" in tool:
232
+ function = tool["function"]
233
+ return {
234
+ "type": "function",
235
+ "name": function.get("name", ""),
236
+ "description": function.get("description", ""),
237
+ "parameters": self._normalize_json_schema(function.get("parameters", {})),
238
+ "strict": False,
239
+ }
240
+ return tool
241
+
242
+ def _normalize_response(self, response_json: Dict[str, Any]) -> Dict[str, Any]:
243
+ """Normalize Responses output into Chat Completions message shape."""
244
+ raw_usage = response_json.get("usage", {})
245
+ usage = self._normalize_usage(raw_usage)
246
+
247
+ output_items = response_json.get("output", [])
248
+ content_parts = []
249
+ tool_calls = []
250
+
251
+ for item in output_items:
252
+ item_type = item.get("type")
253
+
254
+ if item_type == "function_call":
255
+ call_id = item.get("call_id") or item.get("id")
256
+ tool_calls.append({
257
+ "id": call_id,
258
+ "type": "function",
259
+ "function": {
260
+ "name": item.get("name", ""),
261
+ "arguments": item.get("arguments", "{}"),
262
+ }
263
+ })
264
+ continue
265
+
266
+ if item_type != "message":
267
+ continue
268
+
269
+ for c in item.get("content", []):
270
+ if c.get("type") in {"output_text", "text"}:
271
+ text = c.get("text")
272
+ if text is not None:
273
+ content_parts.append(text)
274
+
275
+ message = {"role": "assistant"}
276
+ text_content = "\n".join(content_parts) if content_parts else ""
277
+ if tool_calls:
278
+ message["tool_calls"] = tool_calls
279
+ message["content"] = text_content or None
280
+ else:
281
+ message["content"] = text_content
282
+
283
+ replay_items = copy.deepcopy(output_items)
284
+ for item in replay_items:
285
+ item.pop("id", None)
286
+ message["_responses_output"] = replay_items
287
+
288
+ return {
289
+ "choices": [{
290
+ "message": message,
291
+ "finish_reason": "tool_calls" if tool_calls else "stop",
292
+ }],
293
+ "usage": usage,
294
+ }
295
+
296
+ def _normalize_usage(self, usage: Any) -> Dict[str, Any]:
297
+ """Normalize Codex Responses usage into vmCode's OpenAI-style usage shape."""
298
+ if not isinstance(usage, dict):
299
+ return {}
300
+
301
+ normalized = dict(usage)
302
+
303
+ input_tokens = normalized.get("input_tokens")
304
+ output_tokens = normalized.get("output_tokens")
305
+
306
+ if normalized.get("prompt_tokens") is None and input_tokens is not None:
307
+ normalized["prompt_tokens"] = input_tokens
308
+ if normalized.get("completion_tokens") is None and output_tokens is not None:
309
+ normalized["completion_tokens"] = output_tokens
310
+ if normalized.get("total_tokens") is None:
311
+ prompt_tokens = normalized.get("prompt_tokens")
312
+ completion_tokens = normalized.get("completion_tokens")
313
+ if prompt_tokens is not None and completion_tokens is not None:
314
+ normalized["total_tokens"] = prompt_tokens + completion_tokens
315
+
316
+ input_details = normalized.get("input_tokens_details")
317
+ if isinstance(input_details, dict) and input_details.get("cached_tokens") is not None:
318
+ cached_tokens = input_details["cached_tokens"]
319
+ if normalized.get("prompt_tokens_details") is None:
320
+ normalized["prompt_tokens_details"] = {"cached_tokens": cached_tokens}
321
+ elif isinstance(normalized["prompt_tokens_details"], dict):
322
+ normalized["prompt_tokens_details"].setdefault("cached_tokens", cached_tokens)
323
+ normalized.setdefault("cached_tokens", cached_tokens)
324
+
325
+ return normalized
326
+
327
+ def _normalize_json_schema(self, schema: Any) -> Any:
328
+ """Normalize JSON Schema for strict Responses function tools."""
329
+ if not isinstance(schema, dict):
330
+ return schema
331
+
332
+ normalized = dict(schema)
333
+ schema_type = normalized.get("type")
334
+
335
+ if schema_type == "object":
336
+ properties = normalized.get("properties", {})
337
+ normalized["properties"] = {
338
+ key: self._normalize_json_schema(value)
339
+ for key, value in properties.items()
340
+ }
341
+ normalized.setdefault("additionalProperties", False)
342
+
343
+ if schema_type == "array" and "items" in normalized:
344
+ normalized["items"] = self._normalize_json_schema(normalized["items"])
345
+
346
+ for key in ("anyOf", "oneOf", "allOf"):
347
+ if key in normalized and isinstance(normalized[key], list):
348
+ normalized[key] = [self._normalize_json_schema(item) for item in normalized[key]]
349
+
350
+ return normalized
package/src/llm/config.py CHANGED
@@ -32,6 +32,7 @@ ENV_API_KEYS = {
32
32
  'MINIMAX_PLAN_API_KEY': os.environ.get('MINIMAX_PLAN_API_KEY'),
33
33
  'MINIMAX_API_KEY': os.environ.get('MINIMAX_API_KEY'),
34
34
  'BONE_PROXY_API_KEY': os.environ.get('BONE_PROXY_API_KEY'),
35
+ 'CODEX_PLAN_API_KEY': os.environ.get('CODEX_PLAN_API_KEY'),
35
36
  }
36
37
 
37
38
  # Detect platform for llama.cpp paths
@@ -73,6 +74,24 @@ def _load_config():
73
74
 
74
75
  _CONFIG = _load_config()
75
76
 
77
+
78
+ def _get_codex_token() -> str:
79
+ """Read access token from Codex CLI's cached auth (~/.codex/auth.json).
80
+
81
+ Returns the access_token if available, empty string otherwise.
82
+ Codex CLI stores OAuth tokens here after `codex login`.
83
+ """
84
+ try:
85
+ auth_path = Path.home() / ".codex" / "auth.json"
86
+ if not auth_path.exists():
87
+ return ""
88
+ import json
89
+ data = json.loads(auth_path.read_text(encoding="utf-8"))
90
+ return data.get("tokens", {}).get("access_token", "")
91
+ except Exception:
92
+ return ""
93
+
94
+
76
95
  # Cache for provider registry (built once at module load)
77
96
  _provider_registry_cache = None
78
97
  _cached_provider = None
@@ -134,8 +153,8 @@ def _get_provider_registry():
134
153
  },
135
154
  "default_temperature": 0.1,
136
155
  "default_top_p": 0.9,
137
- "allow_top_p": True,
138
- "allow_temperature": True,
156
+ "allow_top_p": False,
157
+ "allow_temperature": False,
139
158
  "cost_in": 0.0,
140
159
  "cost_out": 0.0
141
160
  },
@@ -311,6 +330,22 @@ def _get_provider_registry():
311
330
  "allow_temperature": True,
312
331
  **_model_cost("KIMI_MODEL"),
313
332
  },
333
+ "codex": {
334
+ "type": "api",
335
+ "api_key": _CONFIG.get("CODEX_PLAN_API_KEY", "") or _get_codex_token(),
336
+ "model": _CONFIG.get("CODEX_PLAN_MODEL", "gpt-5.4-mini"),
337
+ "api_base": _CONFIG.get("CODEX_PLAN_API_BASE", "https://chatgpt.com/backend-api/codex"),
338
+ "endpoint": "/responses",
339
+ "error_prefix": "Codex",
340
+ "config_keys": {
341
+ "CODEX_PLAN_API_KEY": "",
342
+ "CODEX_PLAN_MODEL": "",
343
+ "CODEX_PLAN_API_BASE": "https://chatgpt.com/backend-api/codex",
344
+ },
345
+ "allow_temperature": False,
346
+ "allow_top_p": False,
347
+ **_model_cost("CODEX_PLAN_MODEL"),
348
+ },
314
349
  "bone": {
315
350
  "type": "api",
316
351
  "api_key": _CONFIG.get("BONE_PROXY_API_KEY", ""),
@@ -421,6 +456,14 @@ def get_providers():
421
456
  return list(PROVIDER_REGISTRY.keys())
422
457
 
423
458
 
459
+ def get_provider_display_name(provider: str) -> str:
460
+ """Return the user-facing provider name for a provider key."""
461
+ display_names = {
462
+ "codex": "Codex",
463
+ }
464
+ return display_names.get(provider, provider.replace("_", " ").title())
465
+
466
+
424
467
  # ============================================================================
425
468
  # PROVIDER REGISTRY - Centralized provider configuration
426
469
  # ============================================================================
@@ -434,6 +477,7 @@ __all__ = [
434
477
  "CONFIG_PATH",
435
478
  "PROVIDER_REGISTRY",
436
479
  "get_providers",
480
+ "get_provider_display_name",
437
481
  "get_model_cost",
438
482
  "LLM_PROVIDER",
439
483
  "TOOLS_ENABLED",
@@ -262,18 +262,17 @@ def _build_vault_section(variant: str = "main") -> str | None:
262
262
 
263
263
 
264
264
  def _build_context_section() -> str:
265
- """Build a dynamic section with current date, time, and location."""
265
+ """Build a dynamic section with current date and location."""
266
266
  from datetime import datetime
267
+ import os
267
268
 
268
269
  now = datetime.now()
269
270
  date_str = now.strftime("%A, %B %d, %Y")
270
- time_str = now.strftime("%I:%M %p")
271
- timezone = now.astimezone().tzinfo
272
271
 
273
272
  return (
274
273
  "## Current Context\n\n"
275
274
  f"**Date:** {date_str}\n"
276
- f"**Time:** {time_str} ({timezone})\n"
275
+ f"**Working directory:** {os.getcwd()}\n"
277
276
  )
278
277
 
279
278
 
@@ -364,6 +363,7 @@ def _main_sections(variant: str) -> list[tuple[str, callable]]:
364
363
  ("communication_style", lambda: _static(variant, "communication_style.md")),
365
364
  ("trust_subagent_context", lambda: _static(variant, "trust_subagent_context.md")),
366
365
  ("context_reliability", lambda: _static(variant, "context_reliability.md")),
366
+ ("skills", lambda: _static(variant, "skills.md")),
367
367
  ("conversational_tool_calling", lambda: _static(variant, "conversational_tool_calling.md")),
368
368
  ("professional_objectivity", lambda: _static(variant, "professional_objectivity.md")),
369
369
  ("think_before_acting", lambda: _static(variant, "think_before_acting.md")),
@@ -405,12 +405,14 @@ def _sub_agent_sections(variant: str) -> list[tuple[str, callable]]:
405
405
  middle = [
406
406
  ("trust_subagent_context", lambda: _static(variant, "trust_subagent_context.md")),
407
407
  ("context_reliability", lambda: _static(variant, "context_reliability.md")),
408
+ ("skills", lambda: _static(variant, "skills.md")),
408
409
  ("exploration_pattern", lambda: _static(variant, "exploration_pattern.md")),
409
410
  ("targeted_searching", lambda: _static(variant, "targeted_searching.md")),
410
411
  ("tool_preferences", lambda: _static(variant, "tool_preferences.md")),
411
412
  ]
412
413
  else:
413
414
  middle = [
415
+ ("skills", lambda: _static(variant, "skills.md")),
414
416
  ("conversational_tool_calling", lambda: _static(variant, "conversational_tool_calling.md")),
415
417
  ("professional_objectivity", lambda: _static(variant, "professional_objectivity.md")),
416
418
  ("think_before_acting", lambda: _static(variant, "think_before_acting.md")),
@@ -426,7 +428,7 @@ def _sub_agent_sections(variant: str) -> list[tuple[str, callable]]:
426
428
  return base + middle
427
429
 
428
430
 
429
- def build_system_prompt(variant: str | None = None) -> str:
431
+ def build_system_prompt(variant: str | None = None, active_skills_section: str = "") -> str:
430
432
  """Build system prompt for main agent.
431
433
 
432
434
  Loads section content from prompts/<variant>/. Order is defined by
@@ -435,6 +437,7 @@ def build_system_prompt(variant: str | None = None) -> str:
435
437
  Args:
436
438
  variant: Variant name (e.g. 'main', 'micro').
437
439
  If None, reads from settings.
440
+ active_skills_section: Optional rendered active-skills block to append.
438
441
 
439
442
  Returns:
440
443
  Complete system prompt string
@@ -446,7 +449,10 @@ def build_system_prompt(variant: str | None = None) -> str:
446
449
  f"Prompt variant '{variant}' not found: "
447
450
  f"{_PROMPTS_DIR / variant} does not exist"
448
451
  )
449
- return _build_prompt(_main_sections(variant))
452
+ result = _build_prompt(_main_sections(variant))
453
+ if active_skills_section.strip():
454
+ result += "\n\n" + active_skills_section.strip()
455
+ return result
450
456
 
451
457
 
452
458
  def build_sub_agent_prompt(sub_agent_type: str = "research", soft_limit_tokens: int | None = None, hard_limit_tokens: int | None = None) -> str:
@@ -486,4 +492,3 @@ def build_sub_agent_prompt(sub_agent_type: str = "research", soft_limit_tokens:
486
492
  return "\n\n".join(result)
487
493
 
488
494
 
489
-
@@ -8,6 +8,7 @@ from typing import Optional, Dict, Any, Iterator
8
8
  import requests
9
9
 
10
10
  from exceptions import LLMResponseError
11
+ from .codex_provider import CodexResponsesHandler
11
12
 
12
13
 
13
14
  class OpenAIHandler:
@@ -417,6 +418,7 @@ HANDLER_REGISTRY = {
417
418
  "kimi": OpenAIHandler,
418
419
  "anthropic": AnthropicHandler,
419
420
  "local": OpenAIHandler,
421
+ "codex": CodexResponsesHandler,
420
422
  }
421
423
 
422
424
 
@@ -433,4 +435,4 @@ def get_handler(provider_name: str):
433
435
  return handler_class()
434
436
 
435
437
 
436
- __all__ = ['OpenAIHandler', 'AnthropicHandler', 'get_handler']
438
+ __all__ = ['OpenAIHandler', 'AnthropicHandler', 'CodexResponsesHandler', 'get_handler']
@@ -56,6 +56,11 @@ class TokenTracker:
56
56
  self.conv_cache_read_tokens = 0 # Per-conversation cache read tokens
57
57
  self.conv_cache_creation_tokens = 0 # Per-conversation cache creation tokens
58
58
 
59
+ # Last usage payload diagnostics (useful for debugging provider reporting gaps)
60
+ self.last_usage_snapshot = None
61
+ self.last_usage_keys = []
62
+ self.last_cache_metrics_reported = None
63
+
59
64
  # Active prompt variant (loaded from prompts/ directory)
60
65
  self.current_variant = "main"
61
66
 
@@ -85,6 +90,16 @@ class TokenTracker:
85
90
  if "usage" in usage_data:
86
91
  usage_data = usage_with_cost(usage_data)
87
92
 
93
+ self.last_usage_snapshot = dict(usage_data)
94
+ self.last_usage_keys = sorted(usage_data.keys())
95
+ details = usage_data.get('prompt_tokens_details')
96
+ self.last_cache_metrics_reported = (
97
+ usage_data.get('cache_read_input_tokens') is not None
98
+ or usage_data.get('cache_creation_input_tokens') is not None
99
+ or usage_data.get('cached_tokens') is not None
100
+ or (isinstance(details, dict) and details.get('cached_tokens') is not None)
101
+ )
102
+
88
103
  # Update cumulative token counts (accumulated for billing, never reset by compaction)
89
104
  prompt_tokens = usage_data.get('prompt_tokens', 0)
90
105
  completion_tokens = usage_data.get('completion_tokens', 0)