gobby 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gobby/adapters/claude_code.py +13 -4
- gobby/adapters/codex.py +43 -3
- gobby/agents/runner.py +8 -0
- gobby/cli/__init__.py +6 -0
- gobby/cli/clones.py +419 -0
- gobby/cli/conductor.py +266 -0
- gobby/cli/installers/antigravity.py +3 -9
- gobby/cli/installers/claude.py +9 -9
- gobby/cli/installers/codex.py +2 -8
- gobby/cli/installers/gemini.py +2 -8
- gobby/cli/installers/shared.py +71 -8
- gobby/cli/skills.py +858 -0
- gobby/cli/tasks/ai.py +0 -440
- gobby/cli/tasks/crud.py +44 -6
- gobby/cli/tasks/main.py +0 -4
- gobby/cli/tui.py +2 -2
- gobby/cli/utils.py +3 -3
- gobby/clones/__init__.py +13 -0
- gobby/clones/git.py +547 -0
- gobby/conductor/__init__.py +16 -0
- gobby/conductor/alerts.py +135 -0
- gobby/conductor/loop.py +164 -0
- gobby/conductor/monitors/__init__.py +11 -0
- gobby/conductor/monitors/agents.py +116 -0
- gobby/conductor/monitors/tasks.py +155 -0
- gobby/conductor/pricing.py +234 -0
- gobby/conductor/token_tracker.py +160 -0
- gobby/config/app.py +63 -1
- gobby/config/search.py +110 -0
- gobby/config/servers.py +1 -1
- gobby/config/skills.py +43 -0
- gobby/config/tasks.py +6 -14
- gobby/hooks/event_handlers.py +145 -2
- gobby/hooks/hook_manager.py +48 -2
- gobby/hooks/skill_manager.py +130 -0
- gobby/install/claude/hooks/hook_dispatcher.py +4 -4
- gobby/install/codex/hooks/hook_dispatcher.py +1 -1
- gobby/install/gemini/hooks/hook_dispatcher.py +87 -12
- gobby/llm/claude.py +22 -34
- gobby/llm/claude_executor.py +46 -256
- gobby/llm/codex_executor.py +59 -291
- gobby/llm/executor.py +21 -0
- gobby/llm/gemini.py +134 -110
- gobby/llm/litellm_executor.py +143 -6
- gobby/llm/resolver.py +95 -33
- gobby/mcp_proxy/instructions.py +54 -0
- gobby/mcp_proxy/models.py +15 -0
- gobby/mcp_proxy/registries.py +68 -5
- gobby/mcp_proxy/server.py +33 -3
- gobby/mcp_proxy/services/tool_proxy.py +81 -1
- gobby/mcp_proxy/stdio.py +2 -1
- gobby/mcp_proxy/tools/__init__.py +0 -2
- gobby/mcp_proxy/tools/agent_messaging.py +317 -0
- gobby/mcp_proxy/tools/clones.py +903 -0
- gobby/mcp_proxy/tools/memory.py +1 -24
- gobby/mcp_proxy/tools/metrics.py +65 -1
- gobby/mcp_proxy/tools/orchestration/__init__.py +3 -0
- gobby/mcp_proxy/tools/orchestration/cleanup.py +151 -0
- gobby/mcp_proxy/tools/orchestration/wait.py +467 -0
- gobby/mcp_proxy/tools/session_messages.py +1 -2
- gobby/mcp_proxy/tools/skills/__init__.py +631 -0
- gobby/mcp_proxy/tools/task_orchestration.py +7 -0
- gobby/mcp_proxy/tools/task_readiness.py +14 -0
- gobby/mcp_proxy/tools/task_sync.py +1 -1
- gobby/mcp_proxy/tools/tasks/_context.py +0 -20
- gobby/mcp_proxy/tools/tasks/_crud.py +91 -4
- gobby/mcp_proxy/tools/tasks/_expansion.py +348 -0
- gobby/mcp_proxy/tools/tasks/_factory.py +6 -16
- gobby/mcp_proxy/tools/tasks/_lifecycle.py +60 -29
- gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +18 -29
- gobby/mcp_proxy/tools/workflows.py +1 -1
- gobby/mcp_proxy/tools/worktrees.py +5 -0
- gobby/memory/backends/__init__.py +6 -1
- gobby/memory/backends/mem0.py +6 -1
- gobby/memory/extractor.py +477 -0
- gobby/memory/manager.py +11 -2
- gobby/prompts/defaults/handoff/compact.md +63 -0
- gobby/prompts/defaults/handoff/session_end.md +57 -0
- gobby/prompts/defaults/memory/extract.md +61 -0
- gobby/runner.py +37 -16
- gobby/search/__init__.py +48 -6
- gobby/search/backends/__init__.py +159 -0
- gobby/search/backends/embedding.py +225 -0
- gobby/search/embeddings.py +238 -0
- gobby/search/models.py +148 -0
- gobby/search/unified.py +496 -0
- gobby/servers/http.py +23 -8
- gobby/servers/routes/admin.py +280 -0
- gobby/servers/routes/mcp/tools.py +241 -52
- gobby/servers/websocket.py +2 -2
- gobby/sessions/analyzer.py +2 -0
- gobby/sessions/transcripts/base.py +1 -0
- gobby/sessions/transcripts/claude.py +64 -5
- gobby/skills/__init__.py +91 -0
- gobby/skills/loader.py +685 -0
- gobby/skills/manager.py +384 -0
- gobby/skills/parser.py +258 -0
- gobby/skills/search.py +463 -0
- gobby/skills/sync.py +119 -0
- gobby/skills/updater.py +385 -0
- gobby/skills/validator.py +368 -0
- gobby/storage/clones.py +378 -0
- gobby/storage/database.py +1 -1
- gobby/storage/memories.py +43 -13
- gobby/storage/migrations.py +180 -6
- gobby/storage/sessions.py +73 -0
- gobby/storage/skills.py +749 -0
- gobby/storage/tasks/_crud.py +4 -4
- gobby/storage/tasks/_lifecycle.py +41 -6
- gobby/storage/tasks/_manager.py +14 -5
- gobby/storage/tasks/_models.py +8 -3
- gobby/sync/memories.py +39 -4
- gobby/sync/tasks.py +83 -6
- gobby/tasks/__init__.py +1 -2
- gobby/tasks/validation.py +24 -15
- gobby/tui/api_client.py +4 -7
- gobby/tui/app.py +5 -3
- gobby/tui/screens/orchestrator.py +1 -2
- gobby/tui/screens/tasks.py +2 -4
- gobby/tui/ws_client.py +1 -1
- gobby/utils/daemon_client.py +2 -2
- gobby/workflows/actions.py +84 -2
- gobby/workflows/context_actions.py +43 -0
- gobby/workflows/detection_helpers.py +115 -31
- gobby/workflows/engine.py +13 -2
- gobby/workflows/lifecycle_evaluator.py +29 -1
- gobby/workflows/loader.py +19 -6
- gobby/workflows/memory_actions.py +74 -0
- gobby/workflows/summary_actions.py +17 -0
- gobby/workflows/task_enforcement_actions.py +448 -6
- {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/METADATA +82 -21
- {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/RECORD +136 -107
- gobby/install/codex/prompts/forget.md +0 -7
- gobby/install/codex/prompts/memories.md +0 -7
- gobby/install/codex/prompts/recall.md +0 -7
- gobby/install/codex/prompts/remember.md +0 -13
- gobby/llm/gemini_executor.py +0 -339
- gobby/mcp_proxy/tools/task_expansion.py +0 -591
- gobby/tasks/context.py +0 -747
- gobby/tasks/criteria.py +0 -342
- gobby/tasks/expansion.py +0 -626
- gobby/tasks/prompts/expand.py +0 -327
- gobby/tasks/research.py +0 -421
- gobby/tasks/tdd.py +0 -352
- {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/WHEEL +0 -0
- {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/entry_points.txt +0 -0
- {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/licenses/LICENSE.md +0 -0
- {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/top_level.txt +0 -0
gobby/conductor/loop.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Conductor loop for orchestrating monitors and agents.
|
|
2
|
+
|
|
3
|
+
The main daemon loop that:
|
|
4
|
+
- Runs TaskMonitor and AgentWatcher periodically
|
|
5
|
+
- Dispatches alerts based on monitor results
|
|
6
|
+
- Checks budget before running
|
|
7
|
+
- Optionally spawns agents in autonomous mode
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import UTC, datetime
|
|
15
|
+
from typing import TYPE_CHECKING, Any, Protocol
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from gobby.conductor.alerts import AlertDispatcher
|
|
19
|
+
from gobby.conductor.monitors.agents import AgentWatcher
|
|
20
|
+
from gobby.conductor.monitors.tasks import TaskMonitor
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BudgetChecker(Protocol):
|
|
26
|
+
"""Protocol for budget checking."""
|
|
27
|
+
|
|
28
|
+
def is_budget_exceeded(self) -> bool:
|
|
29
|
+
"""Check if budget is exceeded."""
|
|
30
|
+
...
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class AgentSpawner(Protocol):
|
|
34
|
+
"""Protocol for agent spawning."""
|
|
35
|
+
|
|
36
|
+
def spawn(self, task_id: str) -> dict[str, Any]:
|
|
37
|
+
"""Spawn an agent for a task."""
|
|
38
|
+
...
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class ConductorLoop:
|
|
43
|
+
"""Main conductor loop that orchestrates monitors and agents.
|
|
44
|
+
|
|
45
|
+
Runs periodic checks:
|
|
46
|
+
- TaskMonitor: Detects stale tasks and blocked chains
|
|
47
|
+
- AgentWatcher: Detects stuck agents
|
|
48
|
+
- AlertDispatcher: Sends alerts for issues
|
|
49
|
+
|
|
50
|
+
Supports optional autonomous mode for auto-spawning agents.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
task_monitor: TaskMonitor
|
|
54
|
+
"""Monitor for task health."""
|
|
55
|
+
|
|
56
|
+
agent_watcher: AgentWatcher
|
|
57
|
+
"""Watcher for agent health."""
|
|
58
|
+
|
|
59
|
+
alert_dispatcher: AlertDispatcher
|
|
60
|
+
"""Dispatcher for alerts."""
|
|
61
|
+
|
|
62
|
+
budget_checker: BudgetChecker | None = None
|
|
63
|
+
"""Optional budget checker for throttling."""
|
|
64
|
+
|
|
65
|
+
agent_spawner: AgentSpawner | None = None
|
|
66
|
+
"""Optional agent spawner for autonomous mode."""
|
|
67
|
+
|
|
68
|
+
autonomous_mode: bool = False
|
|
69
|
+
"""Whether to auto-spawn agents for ready tasks."""
|
|
70
|
+
|
|
71
|
+
_logger: logging.Logger = field(default_factory=lambda: logging.getLogger(__name__))
|
|
72
|
+
"""Logger instance."""
|
|
73
|
+
|
|
74
|
+
def tick(self) -> dict[str, Any]:
|
|
75
|
+
"""
|
|
76
|
+
Run one iteration of the conductor loop.
|
|
77
|
+
|
|
78
|
+
This method:
|
|
79
|
+
1. Checks budget (if budget_checker configured)
|
|
80
|
+
2. Runs TaskMonitor.check()
|
|
81
|
+
3. Runs AgentWatcher.check()
|
|
82
|
+
4. Dispatches alerts for any issues found
|
|
83
|
+
5. Optionally spawns agents in autonomous mode
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Dict with results from all checks and alerts
|
|
87
|
+
"""
|
|
88
|
+
now = datetime.now(UTC)
|
|
89
|
+
|
|
90
|
+
# Check budget first
|
|
91
|
+
if self.budget_checker is not None:
|
|
92
|
+
if self.budget_checker.is_budget_exceeded():
|
|
93
|
+
self._logger.warning("Budget exceeded, throttling conductor")
|
|
94
|
+
return {
|
|
95
|
+
"success": True,
|
|
96
|
+
"throttled": True,
|
|
97
|
+
"reason": "budget_exceeded",
|
|
98
|
+
"checked_at": now.isoformat(),
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
# Run monitors
|
|
102
|
+
task_result = self.task_monitor.check()
|
|
103
|
+
agent_result = self.agent_watcher.check()
|
|
104
|
+
|
|
105
|
+
# Track alerts dispatched
|
|
106
|
+
alerts_dispatched = []
|
|
107
|
+
|
|
108
|
+
# Alert for stale tasks
|
|
109
|
+
stale_count = task_result["summary"]["stale_count"]
|
|
110
|
+
if stale_count > 0:
|
|
111
|
+
alert_result = self.alert_dispatcher.dispatch(
|
|
112
|
+
priority="urgent",
|
|
113
|
+
message=f"{stale_count} stale task(s) detected",
|
|
114
|
+
context={"stale_tasks": task_result["stale_tasks"]},
|
|
115
|
+
source="TaskMonitor",
|
|
116
|
+
)
|
|
117
|
+
alerts_dispatched.append(alert_result)
|
|
118
|
+
|
|
119
|
+
# Alert for blocked chains
|
|
120
|
+
blocked_count = task_result["summary"]["blocked_count"]
|
|
121
|
+
if blocked_count > 0:
|
|
122
|
+
alert_result = self.alert_dispatcher.dispatch(
|
|
123
|
+
priority="info",
|
|
124
|
+
message=f"{blocked_count} blocked task chain(s) detected",
|
|
125
|
+
context={"blocked_chains": task_result["blocked_chains"]},
|
|
126
|
+
source="TaskMonitor",
|
|
127
|
+
)
|
|
128
|
+
alerts_dispatched.append(alert_result)
|
|
129
|
+
|
|
130
|
+
# Alert for stuck agents
|
|
131
|
+
stuck_count = agent_result["summary"]["stuck_count"]
|
|
132
|
+
if stuck_count > 0:
|
|
133
|
+
alert_result = self.alert_dispatcher.dispatch(
|
|
134
|
+
priority="urgent",
|
|
135
|
+
message=f"{stuck_count} stuck agent(s) detected",
|
|
136
|
+
context={"stuck_agents": agent_result["stuck_agents"]},
|
|
137
|
+
source="AgentWatcher",
|
|
138
|
+
)
|
|
139
|
+
alerts_dispatched.append(alert_result)
|
|
140
|
+
|
|
141
|
+
# Build result
|
|
142
|
+
result: dict[str, Any] = {
|
|
143
|
+
"success": True,
|
|
144
|
+
"task_monitor_result": task_result,
|
|
145
|
+
"agent_watcher_result": agent_result,
|
|
146
|
+
"alerts_dispatched": len(alerts_dispatched),
|
|
147
|
+
"checked_at": now.isoformat(),
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# Handle autonomous mode
|
|
151
|
+
if self.autonomous_mode:
|
|
152
|
+
result["autonomous_mode"] = True
|
|
153
|
+
if self.agent_spawner is not None:
|
|
154
|
+
result["spawner_available"] = True
|
|
155
|
+
# TODO: implement auto-spawn - see issue tracker for orchestration epic
|
|
156
|
+
self._logger.warning(
|
|
157
|
+
"Autonomous mode enabled but auto-spawning not yet implemented. "
|
|
158
|
+
f"Spawner available: {self.agent_spawner is not None}"
|
|
159
|
+
)
|
|
160
|
+
else:
|
|
161
|
+
result["spawner_available"] = False
|
|
162
|
+
self._logger.warning("Autonomous mode enabled but no spawner configured")
|
|
163
|
+
|
|
164
|
+
return result
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Conductor monitors for task and system health.
|
|
2
|
+
|
|
3
|
+
Monitors detect issues that need attention:
|
|
4
|
+
- TaskMonitor: Stale tasks, blocked chains
|
|
5
|
+
- AgentWatcher: Stuck agents
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from gobby.conductor.monitors.agents import AgentWatcher
|
|
9
|
+
from gobby.conductor.monitors.tasks import TaskMonitor
|
|
10
|
+
|
|
11
|
+
__all__ = ["AgentWatcher", "TaskMonitor"]
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Agent watcher for detecting stuck agents.
|
|
2
|
+
|
|
3
|
+
Provides monitoring for:
|
|
4
|
+
- Stuck agents: Running longer than threshold without progress
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from datetime import UTC, datetime, timedelta
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from gobby.agents.registry import RunningAgentRegistry
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class AgentWatcher:
|
|
22
|
+
"""Watcher for agent health and status.
|
|
23
|
+
|
|
24
|
+
Detects:
|
|
25
|
+
- Stuck agents: Running longer than threshold
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
agent_registry: RunningAgentRegistry
|
|
29
|
+
|
|
30
|
+
def check(
|
|
31
|
+
self,
|
|
32
|
+
stuck_threshold_minutes: int = 15,
|
|
33
|
+
mode: str | None = None,
|
|
34
|
+
) -> dict[str, Any]:
|
|
35
|
+
"""
|
|
36
|
+
Check for agent health issues.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
stuck_threshold_minutes: Minutes before an agent is considered stuck
|
|
40
|
+
mode: Optional filter by agent mode (terminal, headless, etc.)
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Dict with stuck_agents and summary
|
|
44
|
+
"""
|
|
45
|
+
stuck_agents = self._find_stuck_agents(
|
|
46
|
+
threshold_minutes=stuck_threshold_minutes,
|
|
47
|
+
mode=mode,
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Get all running agents for total count
|
|
51
|
+
all_agents = self.agent_registry.list_all()
|
|
52
|
+
if mode:
|
|
53
|
+
all_agents = [a for a in all_agents if a.mode == mode]
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
"stuck_agents": stuck_agents,
|
|
57
|
+
"summary": {
|
|
58
|
+
"stuck_count": len(stuck_agents),
|
|
59
|
+
"total_running": len(all_agents),
|
|
60
|
+
"checked_at": datetime.now(UTC).isoformat(),
|
|
61
|
+
},
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
def _find_stuck_agents(
|
|
65
|
+
self,
|
|
66
|
+
threshold_minutes: int = 15,
|
|
67
|
+
mode: str | None = None,
|
|
68
|
+
) -> list[dict[str, Any]]:
|
|
69
|
+
"""
|
|
70
|
+
Find agents that have been running longer than threshold.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
threshold_minutes: Minutes before agent is considered stuck
|
|
74
|
+
mode: Optional filter by agent mode
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
List of stuck agent info dicts
|
|
78
|
+
"""
|
|
79
|
+
all_agents = self.agent_registry.list_all()
|
|
80
|
+
|
|
81
|
+
# Apply mode filter if specified
|
|
82
|
+
if mode:
|
|
83
|
+
all_agents = [a for a in all_agents if a.mode == mode]
|
|
84
|
+
|
|
85
|
+
threshold = datetime.now(UTC) - timedelta(minutes=threshold_minutes)
|
|
86
|
+
stuck_agents = []
|
|
87
|
+
|
|
88
|
+
for agent in all_agents:
|
|
89
|
+
# Check if agent has been running longer than threshold
|
|
90
|
+
started_at = agent.started_at
|
|
91
|
+
|
|
92
|
+
# Timezone policy: All timestamps are expected to be in UTC.
|
|
93
|
+
# If started_at is naive (no tzinfo), log a warning and skip this agent
|
|
94
|
+
# rather than assume UTC, as the source may be using local time.
|
|
95
|
+
if started_at.tzinfo is None:
|
|
96
|
+
logger.warning(
|
|
97
|
+
f"Agent {agent.run_id} has naive started_at timestamp "
|
|
98
|
+
f"({started_at}); skipping stuck detection. "
|
|
99
|
+
"Ensure the agent registry stores UTC timestamps."
|
|
100
|
+
)
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
if started_at < threshold:
|
|
104
|
+
minutes_running = (datetime.now(UTC) - started_at).total_seconds() / 60
|
|
105
|
+
stuck_agents.append(
|
|
106
|
+
{
|
|
107
|
+
"run_id": agent.run_id,
|
|
108
|
+
"session_id": agent.session_id,
|
|
109
|
+
"mode": agent.mode,
|
|
110
|
+
"started_at": started_at.isoformat(),
|
|
111
|
+
"minutes_running": round(minutes_running, 1),
|
|
112
|
+
"provider": getattr(agent, "provider", "unknown"),
|
|
113
|
+
}
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
return stuck_agents
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Task monitor for detecting stale and blocked tasks.
|
|
2
|
+
|
|
3
|
+
Provides monitoring for:
|
|
4
|
+
- Stale tasks: Tasks in_progress longer than a threshold
|
|
5
|
+
- Blocked chains: Tasks blocked by open dependencies
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from datetime import UTC, datetime, timedelta
|
|
13
|
+
from typing import TYPE_CHECKING, Any
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from gobby.storage.tasks import LocalTaskManager
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class TaskMonitor:
|
|
23
|
+
"""Monitor for task health and status.
|
|
24
|
+
|
|
25
|
+
Detects:
|
|
26
|
+
- Stale tasks: in_progress longer than threshold
|
|
27
|
+
- Blocked chains: tasks waiting on open dependencies
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
task_manager: LocalTaskManager
|
|
31
|
+
|
|
32
|
+
def check(
|
|
33
|
+
self,
|
|
34
|
+
project_id: str | None = None,
|
|
35
|
+
stale_threshold_hours: int = 24,
|
|
36
|
+
) -> dict[str, Any]:
|
|
37
|
+
"""
|
|
38
|
+
Check for task health issues.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
project_id: Optional project filter
|
|
42
|
+
stale_threshold_hours: Hours before an in_progress task is considered stale
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
Dict with stale_tasks, blocked_chains, and summary
|
|
46
|
+
"""
|
|
47
|
+
stale_tasks = self._find_stale_tasks(
|
|
48
|
+
project_id=project_id,
|
|
49
|
+
threshold_hours=stale_threshold_hours,
|
|
50
|
+
)
|
|
51
|
+
blocked_chains = self._find_blocked_chains(project_id=project_id)
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
"stale_tasks": stale_tasks,
|
|
55
|
+
"blocked_chains": blocked_chains,
|
|
56
|
+
"summary": {
|
|
57
|
+
"stale_count": len(stale_tasks),
|
|
58
|
+
"blocked_count": len(blocked_chains),
|
|
59
|
+
"checked_at": datetime.now(UTC).isoformat(),
|
|
60
|
+
},
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
def _find_stale_tasks(
|
|
64
|
+
self,
|
|
65
|
+
project_id: str | None = None,
|
|
66
|
+
threshold_hours: int = 24,
|
|
67
|
+
) -> list[dict[str, Any]]:
|
|
68
|
+
"""
|
|
69
|
+
Find tasks that have been in_progress longer than threshold.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
project_id: Optional project filter
|
|
73
|
+
threshold_hours: Hours before task is considered stale
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
List of stale task info dicts
|
|
77
|
+
"""
|
|
78
|
+
# Get all in_progress tasks
|
|
79
|
+
in_progress_tasks = self.task_manager.list_tasks(
|
|
80
|
+
project_id=project_id,
|
|
81
|
+
status="in_progress",
|
|
82
|
+
limit=1000,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
threshold = datetime.now(UTC) - timedelta(hours=threshold_hours)
|
|
86
|
+
stale_tasks = []
|
|
87
|
+
|
|
88
|
+
for task in in_progress_tasks:
|
|
89
|
+
# Parse updated_at timestamp
|
|
90
|
+
try:
|
|
91
|
+
if task.updated_at:
|
|
92
|
+
# Handle both string and datetime types
|
|
93
|
+
if isinstance(task.updated_at, str):
|
|
94
|
+
# Parse ISO format, handle both Z and +00:00 formats
|
|
95
|
+
updated_str = task.updated_at.replace("Z", "+00:00")
|
|
96
|
+
updated_at = datetime.fromisoformat(updated_str)
|
|
97
|
+
else:
|
|
98
|
+
updated_at = task.updated_at
|
|
99
|
+
|
|
100
|
+
# Timezone policy: All timestamps are expected to be stored in UTC.
|
|
101
|
+
# If updated_at is naive (no tzinfo), log a warning and skip
|
|
102
|
+
# rather than assuming UTC which could cause incorrect staleness detection.
|
|
103
|
+
if updated_at.tzinfo is None:
|
|
104
|
+
logger.warning(
|
|
105
|
+
f"Task {task.id} has naive updated_at timestamp "
|
|
106
|
+
f"({updated_at}); skipping staleness check. "
|
|
107
|
+
"Ensure the task storage stores UTC timestamps."
|
|
108
|
+
)
|
|
109
|
+
continue
|
|
110
|
+
|
|
111
|
+
if updated_at < threshold:
|
|
112
|
+
hours_stale = (datetime.now(UTC) - updated_at).total_seconds() / 3600
|
|
113
|
+
stale_tasks.append(
|
|
114
|
+
{
|
|
115
|
+
"task_id": task.id,
|
|
116
|
+
"title": task.title,
|
|
117
|
+
"updated_at": task.updated_at,
|
|
118
|
+
"hours_stale": round(hours_stale, 1),
|
|
119
|
+
}
|
|
120
|
+
)
|
|
121
|
+
except (ValueError, TypeError) as e:
|
|
122
|
+
logger.warning(f"Could not parse updated_at for task {task.id}: {e}")
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
return stale_tasks
|
|
126
|
+
|
|
127
|
+
def _find_blocked_chains(
|
|
128
|
+
self,
|
|
129
|
+
project_id: str | None = None,
|
|
130
|
+
) -> list[dict[str, Any]]:
|
|
131
|
+
"""
|
|
132
|
+
Find blocked task chains.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
project_id: Optional project filter
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
List of blocked chain info dicts
|
|
139
|
+
"""
|
|
140
|
+
blocked_tasks = self.task_manager.list_blocked_tasks(
|
|
141
|
+
project_id=project_id,
|
|
142
|
+
limit=1000,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
blocked_chains = []
|
|
146
|
+
for task in blocked_tasks:
|
|
147
|
+
blocked_chains.append(
|
|
148
|
+
{
|
|
149
|
+
"task_id": task.id,
|
|
150
|
+
"title": task.title,
|
|
151
|
+
"status": task.status,
|
|
152
|
+
}
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return blocked_chains
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""Token tracking and cost calculation using LiteLLM pricing utilities.
|
|
2
|
+
|
|
3
|
+
LiteLLM maintains model_prices_and_context_window.json with current pricing
|
|
4
|
+
for 100+ models, so we don't need to maintain our own pricing data.
|
|
5
|
+
|
|
6
|
+
See: https://docs.litellm.ai/docs/completion/token_usage
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import litellm
|
|
17
|
+
except ImportError:
|
|
18
|
+
litellm = None # type: ignore[assignment]
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class TokenTracker:
|
|
25
|
+
"""Track token usage and calculate costs using LiteLLM pricing utilities.
|
|
26
|
+
|
|
27
|
+
LiteLLM automatically maintains pricing data for 100+ models including:
|
|
28
|
+
- Claude models (Anthropic)
|
|
29
|
+
- GPT models (OpenAI)
|
|
30
|
+
- Gemini models (Google)
|
|
31
|
+
- And many more
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
tracker = TokenTracker()
|
|
35
|
+
cost = tracker.calculate_cost("claude-3-5-sonnet", 1000, 500)
|
|
36
|
+
tracker.track_usage("claude-3-5-sonnet", 1000, 500)
|
|
37
|
+
print(tracker.get_summary())
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
# Accumulated token counts
|
|
41
|
+
total_input_tokens: int = 0
|
|
42
|
+
total_output_tokens: int = 0
|
|
43
|
+
total_cache_read_tokens: int = 0
|
|
44
|
+
total_cache_write_tokens: int = 0
|
|
45
|
+
total_cost: float = 0.0
|
|
46
|
+
|
|
47
|
+
# Track usage by model
|
|
48
|
+
usage_by_model: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
49
|
+
|
|
50
|
+
def calculate_cost(
|
|
51
|
+
self,
|
|
52
|
+
model: str,
|
|
53
|
+
input_tokens: int,
|
|
54
|
+
output_tokens: int,
|
|
55
|
+
cache_read_tokens: int = 0,
|
|
56
|
+
cache_write_tokens: int = 0,
|
|
57
|
+
) -> float:
|
|
58
|
+
"""Calculate cost for given token usage using LiteLLM pricing.
|
|
59
|
+
|
|
60
|
+
Uses litellm.cost_per_token() to get per-token pricing for the model,
|
|
61
|
+
then calculates total cost.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
model: Model name (e.g., "claude-3-5-sonnet-20241022", "gpt-4o")
|
|
65
|
+
input_tokens: Number of input (prompt) tokens
|
|
66
|
+
output_tokens: Number of output (completion) tokens
|
|
67
|
+
cache_read_tokens: Number of cache read tokens (if model supports)
|
|
68
|
+
cache_write_tokens: Number of cache write tokens (if model supports)
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Total cost in USD, or 0.0 if pricing is unavailable
|
|
72
|
+
"""
|
|
73
|
+
if input_tokens == 0 and output_tokens == 0:
|
|
74
|
+
return 0.0
|
|
75
|
+
|
|
76
|
+
if litellm is None:
|
|
77
|
+
logger.debug("litellm not available, cannot calculate cost")
|
|
78
|
+
return 0.0
|
|
79
|
+
|
|
80
|
+
try:
|
|
81
|
+
# Get cost per token for this model
|
|
82
|
+
prompt_cost, completion_cost = litellm.cost_per_token(
|
|
83
|
+
model=model,
|
|
84
|
+
prompt_tokens=input_tokens,
|
|
85
|
+
completion_tokens=output_tokens,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
total = prompt_cost + completion_cost
|
|
89
|
+
|
|
90
|
+
# Handle cache tokens if provided using LiteLLM's native cache pricing
|
|
91
|
+
if cache_read_tokens > 0 or cache_write_tokens > 0:
|
|
92
|
+
try:
|
|
93
|
+
# Get model cost info from LiteLLM
|
|
94
|
+
model_info = litellm.get_model_info(model=model)
|
|
95
|
+
|
|
96
|
+
# Check for cache-specific pricing in model info
|
|
97
|
+
cache_read_cost_per_token = model_info.get("cache_read_input_token_cost")
|
|
98
|
+
cache_creation_cost_per_token = model_info.get(
|
|
99
|
+
"cache_creation_input_token_cost"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
if cache_read_tokens > 0:
|
|
103
|
+
if cache_read_cost_per_token is not None:
|
|
104
|
+
# Use native cache read pricing
|
|
105
|
+
total += cache_read_tokens * cache_read_cost_per_token
|
|
106
|
+
else:
|
|
107
|
+
# Fallback: cache reads are typically 10% of input cost
|
|
108
|
+
input_cost_per_token = model_info.get("input_cost_per_token", 0)
|
|
109
|
+
total += cache_read_tokens * input_cost_per_token * 0.1
|
|
110
|
+
|
|
111
|
+
if cache_write_tokens > 0:
|
|
112
|
+
if cache_creation_cost_per_token is not None:
|
|
113
|
+
# Use native cache creation pricing
|
|
114
|
+
total += cache_write_tokens * cache_creation_cost_per_token
|
|
115
|
+
else:
|
|
116
|
+
# Fallback: cache writes are typically 1.25x input cost
|
|
117
|
+
input_cost_per_token = model_info.get("input_cost_per_token", 0)
|
|
118
|
+
total += cache_write_tokens * input_cost_per_token * 1.25
|
|
119
|
+
|
|
120
|
+
# Note: For Anthropic models with prompt caching, LiteLLM may
|
|
121
|
+
# already account for cache tokens in cost_per_token. Check if
|
|
122
|
+
# the response usage includes cached_tokens to avoid double-counting.
|
|
123
|
+
|
|
124
|
+
except Exception: # nosec B110 - best effort cache pricing, failure is non-critical
|
|
125
|
+
# If cache pricing lookup fails, skip cache cost calculation
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
return total
|
|
129
|
+
|
|
130
|
+
except Exception as e:
|
|
131
|
+
# Model not found in LiteLLM pricing data
|
|
132
|
+
logger.debug(f"Could not calculate cost for model {model}: {e}")
|
|
133
|
+
return 0.0
|
|
134
|
+
|
|
135
|
+
def calculate_cost_from_response(self, response: Any) -> float:
|
|
136
|
+
"""Calculate cost directly from a LiteLLM response object.
|
|
137
|
+
|
|
138
|
+
Uses litellm.completion_cost() which extracts usage info from the response
|
|
139
|
+
and calculates the total cost.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
response: LiteLLM response object from acompletion/completion
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Total cost in USD, or 0.0 if calculation fails
|
|
146
|
+
"""
|
|
147
|
+
if litellm is None:
|
|
148
|
+
logger.debug("litellm not available, cannot calculate cost from response")
|
|
149
|
+
return 0.0
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
return litellm.completion_cost(response)
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.debug(f"Could not calculate cost from response: {e}")
|
|
155
|
+
return 0.0
|
|
156
|
+
|
|
157
|
+
def track_usage(
|
|
158
|
+
self,
|
|
159
|
+
model: str,
|
|
160
|
+
input_tokens: int,
|
|
161
|
+
output_tokens: int,
|
|
162
|
+
cache_read_tokens: int = 0,
|
|
163
|
+
cache_write_tokens: int = 0,
|
|
164
|
+
) -> float:
|
|
165
|
+
"""Track token usage and accumulate costs.
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
model: Model name
|
|
169
|
+
input_tokens: Number of input tokens
|
|
170
|
+
output_tokens: Number of output tokens
|
|
171
|
+
cache_read_tokens: Number of cache read tokens
|
|
172
|
+
cache_write_tokens: Number of cache write tokens
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
Cost for this usage
|
|
176
|
+
"""
|
|
177
|
+
cost = self.calculate_cost(
|
|
178
|
+
model=model,
|
|
179
|
+
input_tokens=input_tokens,
|
|
180
|
+
output_tokens=output_tokens,
|
|
181
|
+
cache_read_tokens=cache_read_tokens,
|
|
182
|
+
cache_write_tokens=cache_write_tokens,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
# Accumulate totals
|
|
186
|
+
self.total_input_tokens += input_tokens
|
|
187
|
+
self.total_output_tokens += output_tokens
|
|
188
|
+
self.total_cache_read_tokens += cache_read_tokens
|
|
189
|
+
self.total_cache_write_tokens += cache_write_tokens
|
|
190
|
+
self.total_cost += cost
|
|
191
|
+
|
|
192
|
+
# Track by model
|
|
193
|
+
if model not in self.usage_by_model:
|
|
194
|
+
self.usage_by_model[model] = {
|
|
195
|
+
"input_tokens": 0,
|
|
196
|
+
"output_tokens": 0,
|
|
197
|
+
"cache_read_tokens": 0,
|
|
198
|
+
"cache_write_tokens": 0,
|
|
199
|
+
"cost": 0.0,
|
|
200
|
+
"calls": 0,
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
self.usage_by_model[model]["input_tokens"] += input_tokens
|
|
204
|
+
self.usage_by_model[model]["output_tokens"] += output_tokens
|
|
205
|
+
self.usage_by_model[model]["cache_read_tokens"] += cache_read_tokens
|
|
206
|
+
self.usage_by_model[model]["cache_write_tokens"] += cache_write_tokens
|
|
207
|
+
self.usage_by_model[model]["cost"] += cost
|
|
208
|
+
self.usage_by_model[model]["calls"] += 1
|
|
209
|
+
|
|
210
|
+
return cost
|
|
211
|
+
|
|
212
|
+
def reset(self) -> None:
|
|
213
|
+
"""Reset all tracked usage."""
|
|
214
|
+
self.total_input_tokens = 0
|
|
215
|
+
self.total_output_tokens = 0
|
|
216
|
+
self.total_cache_read_tokens = 0
|
|
217
|
+
self.total_cache_write_tokens = 0
|
|
218
|
+
self.total_cost = 0.0
|
|
219
|
+
self.usage_by_model.clear()
|
|
220
|
+
|
|
221
|
+
def get_summary(self) -> dict[str, Any]:
|
|
222
|
+
"""Get a summary of all tracked usage.
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Dict with total tokens, cost, and per-model breakdown
|
|
226
|
+
"""
|
|
227
|
+
return {
|
|
228
|
+
"total_input_tokens": self.total_input_tokens,
|
|
229
|
+
"total_output_tokens": self.total_output_tokens,
|
|
230
|
+
"total_cache_read_tokens": self.total_cache_read_tokens,
|
|
231
|
+
"total_cache_write_tokens": self.total_cache_write_tokens,
|
|
232
|
+
"total_cost": self.total_cost,
|
|
233
|
+
"usage_by_model": dict(self.usage_by_model),
|
|
234
|
+
}
|