htmlgraph 0.26.13__py3-none-any.whl → 0.26.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
htmlgraph/__init__.py CHANGED
@@ -95,7 +95,7 @@ from htmlgraph.types import (
95
95
  )
96
96
  from htmlgraph.work_type_utils import infer_work_type, infer_work_type_from_id
97
97
 
98
- __version__ = "0.26.13"
98
+ __version__ = "0.26.15"
99
99
  __all__ = [
100
100
  # Exceptions
101
101
  "HtmlGraphError",
@@ -1,9 +1,12 @@
1
1
  """
2
2
  Analytics modules for HtmlGraph.
3
3
 
4
- Provides work type analysis, dependency analytics, cross-session analytics, and CLI analytics.
4
+ Provides work type analysis, dependency analytics, cross-session analytics, CLI analytics,
5
+ and cost attribution analysis for OTEL ROI.
5
6
  """
6
7
 
8
+ from htmlgraph.analytics.cost_analyzer import CostAnalyzer
9
+ from htmlgraph.analytics.cost_reporter import CostReporter
7
10
  from htmlgraph.analytics.cross_session import CrossSessionAnalytics
8
11
  from htmlgraph.analytics.dependency import DependencyAnalytics
9
12
  from htmlgraph.analytics.work_type import Analytics
@@ -12,4 +15,6 @@ __all__ = [
12
15
  "Analytics",
13
16
  "DependencyAnalytics",
14
17
  "CrossSessionAnalytics",
18
+ "CostAnalyzer",
19
+ "CostReporter",
15
20
  ]
@@ -0,0 +1,387 @@
1
+ """
2
+ CostAnalyzer for OTEL ROI Analysis - Phase 1.
3
+
4
+ Analyzes cost attribution of Task() delegations vs direct tool execution.
5
+ Provides insights into which delegations are most expensive and their ROI.
6
+
7
+ Components:
8
+ 1. get_task_delegations() - Query all task_delegation events with hierarchy
9
+ 2. calculate_task_cost(event_id) - Sum token costs of all child tool calls
10
+ 3. get_cost_by_subagent_type() - Group costs by subagent type
11
+ 4. get_cost_by_tool_type() - Show which tools cost most
12
+ 5. get_roi_stats() - Calculate parallelization savings and benefits
13
+
14
+ Usage:
15
+ from htmlgraph.analytics.cost_analyzer import CostAnalyzer
16
+ analyzer = CostAnalyzer()
17
+ delegations = analyzer.get_task_delegations_with_costs()
18
+ print(f"Total delegation cost: ${delegations['total_cost_usd']:.2f}")
19
+ """
20
+
21
+ import sqlite3
22
+ from dataclasses import dataclass, field
23
+ from datetime import datetime
24
+ from pathlib import Path
25
+ from typing import Any
26
+
27
+ from htmlgraph.cigs.cost import CostCalculator
28
+
29
+
30
+ @dataclass
31
+ class TaskDelegation:
32
+ """Represents a single Task delegation with cost analysis."""
33
+
34
+ event_id: str
35
+ session_id: str
36
+ timestamp: datetime
37
+ subagent_type: str
38
+ parent_event_id: str | None
39
+ tool_count: int = 0
40
+ total_cost_tokens: int = 0
41
+ child_events: list[dict[str, Any]] = field(default_factory=list)
42
+
43
+ def to_dict(self) -> dict[str, Any]:
44
+ """Convert to dictionary."""
45
+ return {
46
+ "event_id": self.event_id,
47
+ "session_id": self.session_id,
48
+ "timestamp": self.timestamp.isoformat(),
49
+ "subagent_type": self.subagent_type,
50
+ "parent_event_id": self.parent_event_id,
51
+ "tool_count": self.tool_count,
52
+ "total_cost_tokens": self.total_cost_tokens,
53
+ "child_events": self.child_events,
54
+ }
55
+
56
+
57
+ @dataclass
58
+ class CostBreakdown:
59
+ """Cost breakdown analysis."""
60
+
61
+ by_subagent: dict[str, int] = field(default_factory=dict)
62
+ by_tool: dict[str, int] = field(default_factory=dict)
63
+ total_cost_tokens: int = 0
64
+ total_delegations: int = 0
65
+ avg_cost_per_delegation: float = 0.0
66
+
67
+
68
+ @dataclass
69
+ class ROIStats:
70
+ """Return-on-Investment statistics."""
71
+
72
+ total_delegation_cost: int = 0
73
+ estimated_direct_cost: int = 0
74
+ estimated_savings: int = 0
75
+ savings_percentage: float = 0.0
76
+ avg_parallelization_factor: float = 1.0
77
+ context_preservation_benefit: float = 0.0
78
+ total_delegations: int = 0
79
+ avg_cost_per_delegation: float = 0.0
80
+
81
+
82
+ class CostAnalyzer:
83
+ """
84
+ Analyze cost attribution of Task delegations.
85
+
86
+ Queries the agent_events database to calculate:
87
+ - Total cost of each Task delegation (sum of child tool calls)
88
+ - Cost breakdown by subagent type and tool type
89
+ - ROI statistics comparing direct vs delegated execution
90
+ """
91
+
92
+ def __init__(self, graph_dir: Path | None = None):
93
+ """
94
+ Initialize CostAnalyzer.
95
+
96
+ Args:
97
+ graph_dir: Root directory for HtmlGraph (defaults to .htmlgraph)
98
+ """
99
+ if graph_dir is None:
100
+ graph_dir = Path.cwd() / ".htmlgraph"
101
+
102
+ self.graph_dir = Path(graph_dir)
103
+ self.db_path = self.graph_dir / "htmlgraph.db"
104
+ self.cost_calculator = CostCalculator()
105
+
106
+ if not self.db_path.exists():
107
+ raise FileNotFoundError(f"Database not found at {self.db_path}")
108
+
109
+ def _get_connection(self) -> sqlite3.Connection:
110
+ """Get database connection with row factory."""
111
+ conn = sqlite3.connect(str(self.db_path))
112
+ conn.row_factory = sqlite3.Row
113
+ return conn
114
+
115
+ def get_task_delegations(self) -> list[TaskDelegation]:
116
+ """
117
+ Query all task_delegation events from the database.
118
+
119
+ Returns:
120
+ List of TaskDelegation objects ordered by timestamp (newest first)
121
+ """
122
+ conn = self._get_connection()
123
+ try:
124
+ cursor = conn.cursor()
125
+ cursor.execute(
126
+ """
127
+ SELECT
128
+ event_id,
129
+ session_id,
130
+ timestamp,
131
+ subagent_type,
132
+ parent_event_id
133
+ FROM agent_events
134
+ WHERE event_type = 'task_delegation'
135
+ ORDER BY timestamp DESC
136
+ """
137
+ )
138
+
139
+ delegations = []
140
+ for row in cursor.fetchall():
141
+ timestamp = (
142
+ datetime.fromisoformat(row["timestamp"])
143
+ if isinstance(row["timestamp"], str)
144
+ else row["timestamp"]
145
+ )
146
+ delegations.append(
147
+ TaskDelegation(
148
+ event_id=row["event_id"],
149
+ session_id=row["session_id"],
150
+ timestamp=timestamp,
151
+ subagent_type=row["subagent_type"] or "unknown",
152
+ parent_event_id=row["parent_event_id"],
153
+ )
154
+ )
155
+
156
+ return delegations
157
+ finally:
158
+ conn.close()
159
+
160
+ def calculate_task_cost(self, event_id: str) -> tuple[int, list[dict[str, Any]]]:
161
+ """
162
+ Calculate total cost of a Task delegation.
163
+
164
+ Sums all token costs of child tool calls using cost_tokens field.
165
+ Falls back to CIGS cost estimation if cost_tokens is not available.
166
+
167
+ Args:
168
+ event_id: Task delegation event ID
169
+
170
+ Returns:
171
+ Tuple of (total_cost_tokens, child_events_list)
172
+ """
173
+ conn = self._get_connection()
174
+ try:
175
+ cursor = conn.cursor()
176
+
177
+ # Get all children of this task
178
+ cursor.execute(
179
+ """
180
+ SELECT
181
+ event_id,
182
+ tool_name,
183
+ cost_tokens,
184
+ input_summary,
185
+ output_summary,
186
+ timestamp
187
+ FROM agent_events
188
+ WHERE parent_event_id = ?
189
+ AND event_type IN ('tool_call', 'tool_result')
190
+ ORDER BY timestamp ASC
191
+ """,
192
+ (event_id,),
193
+ )
194
+
195
+ total_cost = 0
196
+ child_events = []
197
+
198
+ for row in cursor.fetchall():
199
+ cost = row["cost_tokens"] if row["cost_tokens"] else 0
200
+
201
+ # If no stored cost, estimate based on tool type
202
+ if cost == 0 and row["tool_name"]:
203
+ cost = self.cost_calculator.predict_cost(row["tool_name"], {})
204
+
205
+ total_cost += cost
206
+
207
+ child_events.append(
208
+ {
209
+ "event_id": row["event_id"],
210
+ "tool_name": row["tool_name"],
211
+ "cost_tokens": cost,
212
+ "timestamp": row["timestamp"],
213
+ }
214
+ )
215
+
216
+ return total_cost, child_events
217
+ finally:
218
+ conn.close()
219
+
220
+ def get_task_delegations_with_costs(self) -> dict[str, Any]:
221
+ """
222
+ Get all task delegations with calculated costs.
223
+
224
+ Returns:
225
+ Dictionary with:
226
+ - delegations: List of TaskDelegation with costs
227
+ - total_cost_tokens: Sum of all delegation costs
228
+ - total_delegations: Count of delegations
229
+ - by_subagent_type: Cost breakdown by subagent
230
+ - by_tool_type: Cost breakdown by tool
231
+ """
232
+ delegations = self.get_task_delegations()
233
+
234
+ total_cost = 0
235
+ by_subagent: dict[str, int] = {}
236
+ by_tool: dict[str, int] = {}
237
+
238
+ for delegation in delegations:
239
+ cost, child_events = self.calculate_task_cost(delegation.event_id)
240
+ delegation.total_cost_tokens = cost
241
+ delegation.child_events = child_events
242
+ delegation.tool_count = len(child_events)
243
+
244
+ total_cost += cost
245
+
246
+ # Track by subagent type
247
+ subagent = delegation.subagent_type
248
+ by_subagent[subagent] = by_subagent.get(subagent, 0) + cost
249
+
250
+ # Track by tool type
251
+ for child in child_events:
252
+ tool = child["tool_name"] or "unknown"
253
+ by_tool[tool] = by_tool.get(tool, 0) + child["cost_tokens"]
254
+
255
+ # Convert to USD (approximation: 1M tokens ~ $3 for input, $6 for output)
256
+ # Average: ~$4.50 per 1M tokens
257
+ total_cost_usd = total_cost * 0.0000045
258
+
259
+ return {
260
+ "delegations": delegations,
261
+ "total_cost_tokens": total_cost,
262
+ "total_cost_usd": total_cost_usd,
263
+ "total_delegations": len(delegations),
264
+ "avg_cost_per_delegation": (
265
+ total_cost / len(delegations) if delegations else 0
266
+ ),
267
+ "by_subagent_type": by_subagent,
268
+ "by_tool_type": by_tool,
269
+ }
270
+
271
+ def get_cost_by_subagent_type(self) -> dict[str, int]:
272
+ """
273
+ Group delegation costs by subagent type.
274
+
275
+ Returns:
276
+ Dictionary mapping subagent_type to total tokens spent
277
+ """
278
+ data = self.get_task_delegations_with_costs()
279
+ result = data.get("by_subagent_type", {})
280
+ if isinstance(result, dict):
281
+ return result
282
+ return {}
283
+
284
+ def get_cost_by_tool_type(self) -> dict[str, int]:
285
+ """
286
+ Show which tools cost most across all delegations.
287
+
288
+ Returns:
289
+ Dictionary mapping tool_name to total tokens spent
290
+ """
291
+ data = self.get_task_delegations_with_costs()
292
+ result = data.get("by_tool_type", {})
293
+ if isinstance(result, dict):
294
+ return result
295
+ return {}
296
+
297
+ def get_roi_stats(self) -> ROIStats:
298
+ """
299
+ Calculate ROI statistics comparing delegation vs direct execution.
300
+
301
+ Assumptions:
302
+ - Direct execution: Tokens spent directly on main agent
303
+ - Delegated execution: Tokens in child subagents (already counted)
304
+ - Savings: Context preservation + parallelization benefits
305
+ - Parallelization factor: 1.2-1.5x (subagents can work more efficiently)
306
+ - Context preservation: ~30% token savings from better focus
307
+
308
+ Returns:
309
+ ROIStats with cost and savings analysis
310
+ """
311
+ data = self.get_task_delegations_with_costs()
312
+
313
+ total_delegation_cost = data["total_cost_tokens"]
314
+ total_delegations = data["total_delegations"]
315
+
316
+ # Estimate direct execution cost
317
+ # Assumption: direct execution would cost 2.5x due to context overhead
318
+ estimated_direct_cost = int(total_delegation_cost * 2.5)
319
+
320
+ # Estimate savings
321
+ # Parallelization benefit: 1.2x efficiency
322
+ # Context preservation: 30% savings
323
+ parallelization_factor = 1.2
324
+ context_benefit = 0.30
325
+
326
+ estimated_savings = int(
327
+ estimated_direct_cost
328
+ - (total_delegation_cost * parallelization_factor * (1.0 - context_benefit))
329
+ )
330
+
331
+ savings_percentage = (
332
+ (estimated_savings / estimated_direct_cost * 100)
333
+ if estimated_direct_cost > 0
334
+ else 0.0
335
+ )
336
+
337
+ return ROIStats(
338
+ total_delegation_cost=total_delegation_cost,
339
+ estimated_direct_cost=estimated_direct_cost,
340
+ estimated_savings=estimated_savings,
341
+ savings_percentage=savings_percentage,
342
+ avg_parallelization_factor=parallelization_factor,
343
+ context_preservation_benefit=context_benefit,
344
+ total_delegations=total_delegations,
345
+ avg_cost_per_delegation=(
346
+ total_delegation_cost / total_delegations
347
+ if total_delegations > 0
348
+ else 0.0
349
+ ),
350
+ )
351
+
352
+ def get_top_delegations(self, limit: int = 10) -> list[TaskDelegation]:
353
+ """
354
+ Get the most expensive Task delegations.
355
+
356
+ Args:
357
+ limit: Number of delegations to return (default: 10)
358
+
359
+ Returns:
360
+ List of TaskDelegation sorted by cost (descending)
361
+ """
362
+ data = self.get_task_delegations_with_costs()
363
+ delegations = data["delegations"]
364
+
365
+ # Sort by cost descending
366
+ sorted_delegations = sorted(
367
+ delegations, key=lambda d: d.total_cost_tokens, reverse=True
368
+ )
369
+
370
+ return sorted_delegations[:limit]
371
+
372
+ def get_cost_breakdown(self) -> CostBreakdown:
373
+ """
374
+ Get comprehensive cost breakdown.
375
+
376
+ Returns:
377
+ CostBreakdown with by_subagent and by_tool analysis
378
+ """
379
+ data = self.get_task_delegations_with_costs()
380
+
381
+ return CostBreakdown(
382
+ by_subagent=data["by_subagent_type"],
383
+ by_tool=data["by_tool_type"],
384
+ total_cost_tokens=data["total_cost_tokens"],
385
+ total_delegations=data["total_delegations"],
386
+ avg_cost_per_delegation=data["avg_cost_per_delegation"],
387
+ )