gobby 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. gobby/adapters/claude_code.py +13 -4
  2. gobby/adapters/codex.py +43 -3
  3. gobby/agents/runner.py +8 -0
  4. gobby/cli/__init__.py +6 -0
  5. gobby/cli/clones.py +419 -0
  6. gobby/cli/conductor.py +266 -0
  7. gobby/cli/installers/antigravity.py +3 -9
  8. gobby/cli/installers/claude.py +9 -9
  9. gobby/cli/installers/codex.py +2 -8
  10. gobby/cli/installers/gemini.py +2 -8
  11. gobby/cli/installers/shared.py +71 -8
  12. gobby/cli/skills.py +858 -0
  13. gobby/cli/tasks/ai.py +0 -440
  14. gobby/cli/tasks/crud.py +44 -6
  15. gobby/cli/tasks/main.py +0 -4
  16. gobby/cli/tui.py +2 -2
  17. gobby/cli/utils.py +3 -3
  18. gobby/clones/__init__.py +13 -0
  19. gobby/clones/git.py +547 -0
  20. gobby/conductor/__init__.py +16 -0
  21. gobby/conductor/alerts.py +135 -0
  22. gobby/conductor/loop.py +164 -0
  23. gobby/conductor/monitors/__init__.py +11 -0
  24. gobby/conductor/monitors/agents.py +116 -0
  25. gobby/conductor/monitors/tasks.py +155 -0
  26. gobby/conductor/pricing.py +234 -0
  27. gobby/conductor/token_tracker.py +160 -0
  28. gobby/config/app.py +63 -1
  29. gobby/config/search.py +110 -0
  30. gobby/config/servers.py +1 -1
  31. gobby/config/skills.py +43 -0
  32. gobby/config/tasks.py +6 -14
  33. gobby/hooks/event_handlers.py +145 -2
  34. gobby/hooks/hook_manager.py +48 -2
  35. gobby/hooks/skill_manager.py +130 -0
  36. gobby/install/claude/hooks/hook_dispatcher.py +4 -4
  37. gobby/install/codex/hooks/hook_dispatcher.py +1 -1
  38. gobby/install/gemini/hooks/hook_dispatcher.py +87 -12
  39. gobby/llm/claude.py +22 -34
  40. gobby/llm/claude_executor.py +46 -256
  41. gobby/llm/codex_executor.py +59 -291
  42. gobby/llm/executor.py +21 -0
  43. gobby/llm/gemini.py +134 -110
  44. gobby/llm/litellm_executor.py +143 -6
  45. gobby/llm/resolver.py +95 -33
  46. gobby/mcp_proxy/instructions.py +54 -0
  47. gobby/mcp_proxy/models.py +15 -0
  48. gobby/mcp_proxy/registries.py +68 -5
  49. gobby/mcp_proxy/server.py +33 -3
  50. gobby/mcp_proxy/services/tool_proxy.py +81 -1
  51. gobby/mcp_proxy/stdio.py +2 -1
  52. gobby/mcp_proxy/tools/__init__.py +0 -2
  53. gobby/mcp_proxy/tools/agent_messaging.py +317 -0
  54. gobby/mcp_proxy/tools/clones.py +903 -0
  55. gobby/mcp_proxy/tools/memory.py +1 -24
  56. gobby/mcp_proxy/tools/metrics.py +65 -1
  57. gobby/mcp_proxy/tools/orchestration/__init__.py +3 -0
  58. gobby/mcp_proxy/tools/orchestration/cleanup.py +151 -0
  59. gobby/mcp_proxy/tools/orchestration/wait.py +467 -0
  60. gobby/mcp_proxy/tools/session_messages.py +1 -2
  61. gobby/mcp_proxy/tools/skills/__init__.py +631 -0
  62. gobby/mcp_proxy/tools/task_orchestration.py +7 -0
  63. gobby/mcp_proxy/tools/task_readiness.py +14 -0
  64. gobby/mcp_proxy/tools/task_sync.py +1 -1
  65. gobby/mcp_proxy/tools/tasks/_context.py +0 -20
  66. gobby/mcp_proxy/tools/tasks/_crud.py +91 -4
  67. gobby/mcp_proxy/tools/tasks/_expansion.py +348 -0
  68. gobby/mcp_proxy/tools/tasks/_factory.py +6 -16
  69. gobby/mcp_proxy/tools/tasks/_lifecycle.py +60 -29
  70. gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +18 -29
  71. gobby/mcp_proxy/tools/workflows.py +1 -1
  72. gobby/mcp_proxy/tools/worktrees.py +5 -0
  73. gobby/memory/backends/__init__.py +6 -1
  74. gobby/memory/backends/mem0.py +6 -1
  75. gobby/memory/extractor.py +477 -0
  76. gobby/memory/manager.py +11 -2
  77. gobby/prompts/defaults/handoff/compact.md +63 -0
  78. gobby/prompts/defaults/handoff/session_end.md +57 -0
  79. gobby/prompts/defaults/memory/extract.md +61 -0
  80. gobby/runner.py +37 -16
  81. gobby/search/__init__.py +48 -6
  82. gobby/search/backends/__init__.py +159 -0
  83. gobby/search/backends/embedding.py +225 -0
  84. gobby/search/embeddings.py +238 -0
  85. gobby/search/models.py +148 -0
  86. gobby/search/unified.py +496 -0
  87. gobby/servers/http.py +23 -8
  88. gobby/servers/routes/admin.py +280 -0
  89. gobby/servers/routes/mcp/tools.py +241 -52
  90. gobby/servers/websocket.py +2 -2
  91. gobby/sessions/analyzer.py +2 -0
  92. gobby/sessions/transcripts/base.py +1 -0
  93. gobby/sessions/transcripts/claude.py +64 -5
  94. gobby/skills/__init__.py +91 -0
  95. gobby/skills/loader.py +685 -0
  96. gobby/skills/manager.py +384 -0
  97. gobby/skills/parser.py +258 -0
  98. gobby/skills/search.py +463 -0
  99. gobby/skills/sync.py +119 -0
  100. gobby/skills/updater.py +385 -0
  101. gobby/skills/validator.py +368 -0
  102. gobby/storage/clones.py +378 -0
  103. gobby/storage/database.py +1 -1
  104. gobby/storage/memories.py +43 -13
  105. gobby/storage/migrations.py +180 -6
  106. gobby/storage/sessions.py +73 -0
  107. gobby/storage/skills.py +749 -0
  108. gobby/storage/tasks/_crud.py +4 -4
  109. gobby/storage/tasks/_lifecycle.py +41 -6
  110. gobby/storage/tasks/_manager.py +14 -5
  111. gobby/storage/tasks/_models.py +8 -3
  112. gobby/sync/memories.py +39 -4
  113. gobby/sync/tasks.py +83 -6
  114. gobby/tasks/__init__.py +1 -2
  115. gobby/tasks/validation.py +24 -15
  116. gobby/tui/api_client.py +4 -7
  117. gobby/tui/app.py +5 -3
  118. gobby/tui/screens/orchestrator.py +1 -2
  119. gobby/tui/screens/tasks.py +2 -4
  120. gobby/tui/ws_client.py +1 -1
  121. gobby/utils/daemon_client.py +2 -2
  122. gobby/workflows/actions.py +84 -2
  123. gobby/workflows/context_actions.py +43 -0
  124. gobby/workflows/detection_helpers.py +115 -31
  125. gobby/workflows/engine.py +13 -2
  126. gobby/workflows/lifecycle_evaluator.py +29 -1
  127. gobby/workflows/loader.py +19 -6
  128. gobby/workflows/memory_actions.py +74 -0
  129. gobby/workflows/summary_actions.py +17 -0
  130. gobby/workflows/task_enforcement_actions.py +448 -6
  131. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/METADATA +82 -21
  132. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/RECORD +136 -107
  133. gobby/install/codex/prompts/forget.md +0 -7
  134. gobby/install/codex/prompts/memories.md +0 -7
  135. gobby/install/codex/prompts/recall.md +0 -7
  136. gobby/install/codex/prompts/remember.md +0 -13
  137. gobby/llm/gemini_executor.py +0 -339
  138. gobby/mcp_proxy/tools/task_expansion.py +0 -591
  139. gobby/tasks/context.py +0 -747
  140. gobby/tasks/criteria.py +0 -342
  141. gobby/tasks/expansion.py +0 -626
  142. gobby/tasks/prompts/expand.py +0 -327
  143. gobby/tasks/research.py +0 -421
  144. gobby/tasks/tdd.py +0 -352
  145. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/WHEEL +0 -0
  146. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/entry_points.txt +0 -0
  147. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/licenses/LICENSE.md +0 -0
  148. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,164 @@
1
+ """Conductor loop for orchestrating monitors and agents.
2
+
3
+ The main daemon loop that:
4
+ - Runs TaskMonitor and AgentWatcher periodically
5
+ - Dispatches alerts based on monitor results
6
+ - Checks budget before running
7
+ - Optionally spawns agents in autonomous mode
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ from dataclasses import dataclass, field
14
+ from datetime import UTC, datetime
15
+ from typing import TYPE_CHECKING, Any, Protocol
16
+
17
+ if TYPE_CHECKING:
18
+ from gobby.conductor.alerts import AlertDispatcher
19
+ from gobby.conductor.monitors.agents import AgentWatcher
20
+ from gobby.conductor.monitors.tasks import TaskMonitor
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class BudgetChecker(Protocol):
26
+ """Protocol for budget checking."""
27
+
28
+ def is_budget_exceeded(self) -> bool:
29
+ """Check if budget is exceeded."""
30
+ ...
31
+
32
+
33
+ class AgentSpawner(Protocol):
34
+ """Protocol for agent spawning."""
35
+
36
+ def spawn(self, task_id: str) -> dict[str, Any]:
37
+ """Spawn an agent for a task."""
38
+ ...
39
+
40
+
41
+ @dataclass
42
+ class ConductorLoop:
43
+ """Main conductor loop that orchestrates monitors and agents.
44
+
45
+ Runs periodic checks:
46
+ - TaskMonitor: Detects stale tasks and blocked chains
47
+ - AgentWatcher: Detects stuck agents
48
+ - AlertDispatcher: Sends alerts for issues
49
+
50
+ Supports optional autonomous mode for auto-spawning agents.
51
+ """
52
+
53
+ task_monitor: TaskMonitor
54
+ """Monitor for task health."""
55
+
56
+ agent_watcher: AgentWatcher
57
+ """Watcher for agent health."""
58
+
59
+ alert_dispatcher: AlertDispatcher
60
+ """Dispatcher for alerts."""
61
+
62
+ budget_checker: BudgetChecker | None = None
63
+ """Optional budget checker for throttling."""
64
+
65
+ agent_spawner: AgentSpawner | None = None
66
+ """Optional agent spawner for autonomous mode."""
67
+
68
+ autonomous_mode: bool = False
69
+ """Whether to auto-spawn agents for ready tasks."""
70
+
71
+ _logger: logging.Logger = field(default_factory=lambda: logging.getLogger(__name__))
72
+ """Logger instance."""
73
+
74
+ def tick(self) -> dict[str, Any]:
75
+ """
76
+ Run one iteration of the conductor loop.
77
+
78
+ This method:
79
+ 1. Checks budget (if budget_checker configured)
80
+ 2. Runs TaskMonitor.check()
81
+ 3. Runs AgentWatcher.check()
82
+ 4. Dispatches alerts for any issues found
83
+ 5. Optionally spawns agents in autonomous mode
84
+
85
+ Returns:
86
+ Dict with results from all checks and alerts
87
+ """
88
+ now = datetime.now(UTC)
89
+
90
+ # Check budget first
91
+ if self.budget_checker is not None:
92
+ if self.budget_checker.is_budget_exceeded():
93
+ self._logger.warning("Budget exceeded, throttling conductor")
94
+ return {
95
+ "success": True,
96
+ "throttled": True,
97
+ "reason": "budget_exceeded",
98
+ "checked_at": now.isoformat(),
99
+ }
100
+
101
+ # Run monitors
102
+ task_result = self.task_monitor.check()
103
+ agent_result = self.agent_watcher.check()
104
+
105
+ # Track alerts dispatched
106
+ alerts_dispatched = []
107
+
108
+ # Alert for stale tasks
109
+ stale_count = task_result["summary"]["stale_count"]
110
+ if stale_count > 0:
111
+ alert_result = self.alert_dispatcher.dispatch(
112
+ priority="urgent",
113
+ message=f"{stale_count} stale task(s) detected",
114
+ context={"stale_tasks": task_result["stale_tasks"]},
115
+ source="TaskMonitor",
116
+ )
117
+ alerts_dispatched.append(alert_result)
118
+
119
+ # Alert for blocked chains
120
+ blocked_count = task_result["summary"]["blocked_count"]
121
+ if blocked_count > 0:
122
+ alert_result = self.alert_dispatcher.dispatch(
123
+ priority="info",
124
+ message=f"{blocked_count} blocked task chain(s) detected",
125
+ context={"blocked_chains": task_result["blocked_chains"]},
126
+ source="TaskMonitor",
127
+ )
128
+ alerts_dispatched.append(alert_result)
129
+
130
+ # Alert for stuck agents
131
+ stuck_count = agent_result["summary"]["stuck_count"]
132
+ if stuck_count > 0:
133
+ alert_result = self.alert_dispatcher.dispatch(
134
+ priority="urgent",
135
+ message=f"{stuck_count} stuck agent(s) detected",
136
+ context={"stuck_agents": agent_result["stuck_agents"]},
137
+ source="AgentWatcher",
138
+ )
139
+ alerts_dispatched.append(alert_result)
140
+
141
+ # Build result
142
+ result: dict[str, Any] = {
143
+ "success": True,
144
+ "task_monitor_result": task_result,
145
+ "agent_watcher_result": agent_result,
146
+ "alerts_dispatched": len(alerts_dispatched),
147
+ "checked_at": now.isoformat(),
148
+ }
149
+
150
+ # Handle autonomous mode
151
+ if self.autonomous_mode:
152
+ result["autonomous_mode"] = True
153
+ if self.agent_spawner is not None:
154
+ result["spawner_available"] = True
155
+ # TODO: implement auto-spawn - see issue tracker for orchestration epic
156
+ self._logger.warning(
157
+ "Autonomous mode enabled but auto-spawning not yet implemented. "
158
+ f"Spawner available: {self.agent_spawner is not None}"
159
+ )
160
+ else:
161
+ result["spawner_available"] = False
162
+ self._logger.warning("Autonomous mode enabled but no spawner configured")
163
+
164
+ return result
@@ -0,0 +1,11 @@
1
+ """Conductor monitors for task and system health.
2
+
3
+ Monitors detect issues that need attention:
4
+ - TaskMonitor: Stale tasks, blocked chains
5
+ - AgentWatcher: Stuck agents
6
+ """
7
+
8
+ from gobby.conductor.monitors.agents import AgentWatcher
9
+ from gobby.conductor.monitors.tasks import TaskMonitor
10
+
11
+ __all__ = ["AgentWatcher", "TaskMonitor"]
@@ -0,0 +1,116 @@
1
+ """Agent watcher for detecting stuck agents.
2
+
3
+ Provides monitoring for:
4
+ - Stuck agents: Running longer than threshold without progress
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from dataclasses import dataclass
11
+ from datetime import UTC, datetime, timedelta
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ if TYPE_CHECKING:
15
+ from gobby.agents.registry import RunningAgentRegistry
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class AgentWatcher:
22
+ """Watcher for agent health and status.
23
+
24
+ Detects:
25
+ - Stuck agents: Running longer than threshold
26
+ """
27
+
28
+ agent_registry: RunningAgentRegistry
29
+
30
+ def check(
31
+ self,
32
+ stuck_threshold_minutes: int = 15,
33
+ mode: str | None = None,
34
+ ) -> dict[str, Any]:
35
+ """
36
+ Check for agent health issues.
37
+
38
+ Args:
39
+ stuck_threshold_minutes: Minutes before an agent is considered stuck
40
+ mode: Optional filter by agent mode (terminal, headless, etc.)
41
+
42
+ Returns:
43
+ Dict with stuck_agents and summary
44
+ """
45
+ stuck_agents = self._find_stuck_agents(
46
+ threshold_minutes=stuck_threshold_minutes,
47
+ mode=mode,
48
+ )
49
+
50
+ # Get all running agents for total count
51
+ all_agents = self.agent_registry.list_all()
52
+ if mode:
53
+ all_agents = [a for a in all_agents if a.mode == mode]
54
+
55
+ return {
56
+ "stuck_agents": stuck_agents,
57
+ "summary": {
58
+ "stuck_count": len(stuck_agents),
59
+ "total_running": len(all_agents),
60
+ "checked_at": datetime.now(UTC).isoformat(),
61
+ },
62
+ }
63
+
64
+ def _find_stuck_agents(
65
+ self,
66
+ threshold_minutes: int = 15,
67
+ mode: str | None = None,
68
+ ) -> list[dict[str, Any]]:
69
+ """
70
+ Find agents that have been running longer than threshold.
71
+
72
+ Args:
73
+ threshold_minutes: Minutes before agent is considered stuck
74
+ mode: Optional filter by agent mode
75
+
76
+ Returns:
77
+ List of stuck agent info dicts
78
+ """
79
+ all_agents = self.agent_registry.list_all()
80
+
81
+ # Apply mode filter if specified
82
+ if mode:
83
+ all_agents = [a for a in all_agents if a.mode == mode]
84
+
85
+ threshold = datetime.now(UTC) - timedelta(minutes=threshold_minutes)
86
+ stuck_agents = []
87
+
88
+ for agent in all_agents:
89
+ # Check if agent has been running longer than threshold
90
+ started_at = agent.started_at
91
+
92
+ # Timezone policy: All timestamps are expected to be in UTC.
93
+ # If started_at is naive (no tzinfo), log a warning and skip this agent
94
+ # rather than assume UTC, as the source may be using local time.
95
+ if started_at.tzinfo is None:
96
+ logger.warning(
97
+ f"Agent {agent.run_id} has naive started_at timestamp "
98
+ f"({started_at}); skipping stuck detection. "
99
+ "Ensure the agent registry stores UTC timestamps."
100
+ )
101
+ continue
102
+
103
+ if started_at < threshold:
104
+ minutes_running = (datetime.now(UTC) - started_at).total_seconds() / 60
105
+ stuck_agents.append(
106
+ {
107
+ "run_id": agent.run_id,
108
+ "session_id": agent.session_id,
109
+ "mode": agent.mode,
110
+ "started_at": started_at.isoformat(),
111
+ "minutes_running": round(minutes_running, 1),
112
+ "provider": getattr(agent, "provider", "unknown"),
113
+ }
114
+ )
115
+
116
+ return stuck_agents
@@ -0,0 +1,155 @@
1
+ """Task monitor for detecting stale and blocked tasks.
2
+
3
+ Provides monitoring for:
4
+ - Stale tasks: Tasks in_progress longer than a threshold
5
+ - Blocked chains: Tasks blocked by open dependencies
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from dataclasses import dataclass
12
+ from datetime import UTC, datetime, timedelta
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ if TYPE_CHECKING:
16
+ from gobby.storage.tasks import LocalTaskManager
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class TaskMonitor:
23
+ """Monitor for task health and status.
24
+
25
+ Detects:
26
+ - Stale tasks: in_progress longer than threshold
27
+ - Blocked chains: tasks waiting on open dependencies
28
+ """
29
+
30
+ task_manager: LocalTaskManager
31
+
32
+ def check(
33
+ self,
34
+ project_id: str | None = None,
35
+ stale_threshold_hours: int = 24,
36
+ ) -> dict[str, Any]:
37
+ """
38
+ Check for task health issues.
39
+
40
+ Args:
41
+ project_id: Optional project filter
42
+ stale_threshold_hours: Hours before an in_progress task is considered stale
43
+
44
+ Returns:
45
+ Dict with stale_tasks, blocked_chains, and summary
46
+ """
47
+ stale_tasks = self._find_stale_tasks(
48
+ project_id=project_id,
49
+ threshold_hours=stale_threshold_hours,
50
+ )
51
+ blocked_chains = self._find_blocked_chains(project_id=project_id)
52
+
53
+ return {
54
+ "stale_tasks": stale_tasks,
55
+ "blocked_chains": blocked_chains,
56
+ "summary": {
57
+ "stale_count": len(stale_tasks),
58
+ "blocked_count": len(blocked_chains),
59
+ "checked_at": datetime.now(UTC).isoformat(),
60
+ },
61
+ }
62
+
63
+ def _find_stale_tasks(
64
+ self,
65
+ project_id: str | None = None,
66
+ threshold_hours: int = 24,
67
+ ) -> list[dict[str, Any]]:
68
+ """
69
+ Find tasks that have been in_progress longer than threshold.
70
+
71
+ Args:
72
+ project_id: Optional project filter
73
+ threshold_hours: Hours before task is considered stale
74
+
75
+ Returns:
76
+ List of stale task info dicts
77
+ """
78
+ # Get all in_progress tasks
79
+ in_progress_tasks = self.task_manager.list_tasks(
80
+ project_id=project_id,
81
+ status="in_progress",
82
+ limit=1000,
83
+ )
84
+
85
+ threshold = datetime.now(UTC) - timedelta(hours=threshold_hours)
86
+ stale_tasks = []
87
+
88
+ for task in in_progress_tasks:
89
+ # Parse updated_at timestamp
90
+ try:
91
+ if task.updated_at:
92
+ # Handle both string and datetime types
93
+ if isinstance(task.updated_at, str):
94
+ # Parse ISO format, handle both Z and +00:00 formats
95
+ updated_str = task.updated_at.replace("Z", "+00:00")
96
+ updated_at = datetime.fromisoformat(updated_str)
97
+ else:
98
+ updated_at = task.updated_at
99
+
100
+ # Timezone policy: All timestamps are expected to be stored in UTC.
101
+ # If updated_at is naive (no tzinfo), log a warning and skip
102
+ # rather than assuming UTC which could cause incorrect staleness detection.
103
+ if updated_at.tzinfo is None:
104
+ logger.warning(
105
+ f"Task {task.id} has naive updated_at timestamp "
106
+ f"({updated_at}); skipping staleness check. "
107
+ "Ensure the task storage stores UTC timestamps."
108
+ )
109
+ continue
110
+
111
+ if updated_at < threshold:
112
+ hours_stale = (datetime.now(UTC) - updated_at).total_seconds() / 3600
113
+ stale_tasks.append(
114
+ {
115
+ "task_id": task.id,
116
+ "title": task.title,
117
+ "updated_at": task.updated_at,
118
+ "hours_stale": round(hours_stale, 1),
119
+ }
120
+ )
121
+ except (ValueError, TypeError) as e:
122
+ logger.warning(f"Could not parse updated_at for task {task.id}: {e}")
123
+ continue
124
+
125
+ return stale_tasks
126
+
127
+ def _find_blocked_chains(
128
+ self,
129
+ project_id: str | None = None,
130
+ ) -> list[dict[str, Any]]:
131
+ """
132
+ Find blocked task chains.
133
+
134
+ Args:
135
+ project_id: Optional project filter
136
+
137
+ Returns:
138
+ List of blocked chain info dicts
139
+ """
140
+ blocked_tasks = self.task_manager.list_blocked_tasks(
141
+ project_id=project_id,
142
+ limit=1000,
143
+ )
144
+
145
+ blocked_chains = []
146
+ for task in blocked_tasks:
147
+ blocked_chains.append(
148
+ {
149
+ "task_id": task.id,
150
+ "title": task.title,
151
+ "status": task.status,
152
+ }
153
+ )
154
+
155
+ return blocked_chains
@@ -0,0 +1,234 @@
1
+ """Token tracking and cost calculation using LiteLLM pricing utilities.
2
+
3
+ LiteLLM maintains model_prices_and_context_window.json with current pricing
4
+ for 100+ models, so we don't need to maintain our own pricing data.
5
+
6
+ See: https://docs.litellm.ai/docs/completion/token_usage
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ from dataclasses import dataclass, field
13
+ from typing import Any
14
+
15
+ try:
16
+ import litellm
17
+ except ImportError:
18
+ litellm = None # type: ignore[assignment]
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class TokenTracker:
25
+ """Track token usage and calculate costs using LiteLLM pricing utilities.
26
+
27
+ LiteLLM automatically maintains pricing data for 100+ models including:
28
+ - Claude models (Anthropic)
29
+ - GPT models (OpenAI)
30
+ - Gemini models (Google)
31
+ - And many more
32
+
33
+ Example:
34
+ tracker = TokenTracker()
35
+ cost = tracker.calculate_cost("claude-3-5-sonnet", 1000, 500)
36
+ tracker.track_usage("claude-3-5-sonnet", 1000, 500)
37
+ print(tracker.get_summary())
38
+ """
39
+
40
+ # Accumulated token counts
41
+ total_input_tokens: int = 0
42
+ total_output_tokens: int = 0
43
+ total_cache_read_tokens: int = 0
44
+ total_cache_write_tokens: int = 0
45
+ total_cost: float = 0.0
46
+
47
+ # Track usage by model
48
+ usage_by_model: dict[str, dict[str, Any]] = field(default_factory=dict)
49
+
50
+ def calculate_cost(
51
+ self,
52
+ model: str,
53
+ input_tokens: int,
54
+ output_tokens: int,
55
+ cache_read_tokens: int = 0,
56
+ cache_write_tokens: int = 0,
57
+ ) -> float:
58
+ """Calculate cost for given token usage using LiteLLM pricing.
59
+
60
+ Uses litellm.cost_per_token() to get per-token pricing for the model,
61
+ then calculates total cost.
62
+
63
+ Args:
64
+ model: Model name (e.g., "claude-3-5-sonnet-20241022", "gpt-4o")
65
+ input_tokens: Number of input (prompt) tokens
66
+ output_tokens: Number of output (completion) tokens
67
+ cache_read_tokens: Number of cache read tokens (if model supports)
68
+ cache_write_tokens: Number of cache write tokens (if model supports)
69
+
70
+ Returns:
71
+ Total cost in USD, or 0.0 if pricing is unavailable
72
+ """
73
+ if input_tokens == 0 and output_tokens == 0:
74
+ return 0.0
75
+
76
+ if litellm is None:
77
+ logger.debug("litellm not available, cannot calculate cost")
78
+ return 0.0
79
+
80
+ try:
81
+ # Get cost per token for this model
82
+ prompt_cost, completion_cost = litellm.cost_per_token(
83
+ model=model,
84
+ prompt_tokens=input_tokens,
85
+ completion_tokens=output_tokens,
86
+ )
87
+
88
+ total = prompt_cost + completion_cost
89
+
90
+ # Handle cache tokens if provided using LiteLLM's native cache pricing
91
+ if cache_read_tokens > 0 or cache_write_tokens > 0:
92
+ try:
93
+ # Get model cost info from LiteLLM
94
+ model_info = litellm.get_model_info(model=model)
95
+
96
+ # Check for cache-specific pricing in model info
97
+ cache_read_cost_per_token = model_info.get("cache_read_input_token_cost")
98
+ cache_creation_cost_per_token = model_info.get(
99
+ "cache_creation_input_token_cost"
100
+ )
101
+
102
+ if cache_read_tokens > 0:
103
+ if cache_read_cost_per_token is not None:
104
+ # Use native cache read pricing
105
+ total += cache_read_tokens * cache_read_cost_per_token
106
+ else:
107
+ # Fallback: cache reads are typically 10% of input cost
108
+ input_cost_per_token = model_info.get("input_cost_per_token", 0)
109
+ total += cache_read_tokens * input_cost_per_token * 0.1
110
+
111
+ if cache_write_tokens > 0:
112
+ if cache_creation_cost_per_token is not None:
113
+ # Use native cache creation pricing
114
+ total += cache_write_tokens * cache_creation_cost_per_token
115
+ else:
116
+ # Fallback: cache writes are typically 1.25x input cost
117
+ input_cost_per_token = model_info.get("input_cost_per_token", 0)
118
+ total += cache_write_tokens * input_cost_per_token * 1.25
119
+
120
+ # Note: For Anthropic models with prompt caching, LiteLLM may
121
+ # already account for cache tokens in cost_per_token. Check if
122
+ # the response usage includes cached_tokens to avoid double-counting.
123
+
124
+ except Exception: # nosec B110 - best effort cache pricing, failure is non-critical
125
+ # If cache pricing lookup fails, skip cache cost calculation
126
+ pass
127
+
128
+ return total
129
+
130
+ except Exception as e:
131
+ # Model not found in LiteLLM pricing data
132
+ logger.debug(f"Could not calculate cost for model {model}: {e}")
133
+ return 0.0
134
+
135
+ def calculate_cost_from_response(self, response: Any) -> float:
136
+ """Calculate cost directly from a LiteLLM response object.
137
+
138
+ Uses litellm.completion_cost() which extracts usage info from the response
139
+ and calculates the total cost.
140
+
141
+ Args:
142
+ response: LiteLLM response object from acompletion/completion
143
+
144
+ Returns:
145
+ Total cost in USD, or 0.0 if calculation fails
146
+ """
147
+ if litellm is None:
148
+ logger.debug("litellm not available, cannot calculate cost from response")
149
+ return 0.0
150
+
151
+ try:
152
+ return litellm.completion_cost(response)
153
+ except Exception as e:
154
+ logger.debug(f"Could not calculate cost from response: {e}")
155
+ return 0.0
156
+
157
+ def track_usage(
158
+ self,
159
+ model: str,
160
+ input_tokens: int,
161
+ output_tokens: int,
162
+ cache_read_tokens: int = 0,
163
+ cache_write_tokens: int = 0,
164
+ ) -> float:
165
+ """Track token usage and accumulate costs.
166
+
167
+ Args:
168
+ model: Model name
169
+ input_tokens: Number of input tokens
170
+ output_tokens: Number of output tokens
171
+ cache_read_tokens: Number of cache read tokens
172
+ cache_write_tokens: Number of cache write tokens
173
+
174
+ Returns:
175
+ Cost for this usage
176
+ """
177
+ cost = self.calculate_cost(
178
+ model=model,
179
+ input_tokens=input_tokens,
180
+ output_tokens=output_tokens,
181
+ cache_read_tokens=cache_read_tokens,
182
+ cache_write_tokens=cache_write_tokens,
183
+ )
184
+
185
+ # Accumulate totals
186
+ self.total_input_tokens += input_tokens
187
+ self.total_output_tokens += output_tokens
188
+ self.total_cache_read_tokens += cache_read_tokens
189
+ self.total_cache_write_tokens += cache_write_tokens
190
+ self.total_cost += cost
191
+
192
+ # Track by model
193
+ if model not in self.usage_by_model:
194
+ self.usage_by_model[model] = {
195
+ "input_tokens": 0,
196
+ "output_tokens": 0,
197
+ "cache_read_tokens": 0,
198
+ "cache_write_tokens": 0,
199
+ "cost": 0.0,
200
+ "calls": 0,
201
+ }
202
+
203
+ self.usage_by_model[model]["input_tokens"] += input_tokens
204
+ self.usage_by_model[model]["output_tokens"] += output_tokens
205
+ self.usage_by_model[model]["cache_read_tokens"] += cache_read_tokens
206
+ self.usage_by_model[model]["cache_write_tokens"] += cache_write_tokens
207
+ self.usage_by_model[model]["cost"] += cost
208
+ self.usage_by_model[model]["calls"] += 1
209
+
210
+ return cost
211
+
212
+ def reset(self) -> None:
213
+ """Reset all tracked usage."""
214
+ self.total_input_tokens = 0
215
+ self.total_output_tokens = 0
216
+ self.total_cache_read_tokens = 0
217
+ self.total_cache_write_tokens = 0
218
+ self.total_cost = 0.0
219
+ self.usage_by_model.clear()
220
+
221
+ def get_summary(self) -> dict[str, Any]:
222
+ """Get a summary of all tracked usage.
223
+
224
+ Returns:
225
+ Dict with total tokens, cost, and per-model breakdown
226
+ """
227
+ return {
228
+ "total_input_tokens": self.total_input_tokens,
229
+ "total_output_tokens": self.total_output_tokens,
230
+ "total_cache_read_tokens": self.total_cache_read_tokens,
231
+ "total_cache_write_tokens": self.total_cache_write_tokens,
232
+ "total_cost": self.total_cost,
233
+ "usage_by_model": dict(self.usage_by_model),
234
+ }