htmlgraph 0.9.3__py3-none-any.whl → 0.27.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. htmlgraph/.htmlgraph/.session-warning-state.json +6 -0
  2. htmlgraph/.htmlgraph/agents.json +72 -0
  3. htmlgraph/.htmlgraph/htmlgraph.db +0 -0
  4. htmlgraph/__init__.py +173 -17
  5. htmlgraph/__init__.pyi +123 -0
  6. htmlgraph/agent_detection.py +127 -0
  7. htmlgraph/agent_registry.py +45 -30
  8. htmlgraph/agents.py +160 -107
  9. htmlgraph/analytics/__init__.py +9 -2
  10. htmlgraph/analytics/cli.py +190 -51
  11. htmlgraph/analytics/cost_analyzer.py +391 -0
  12. htmlgraph/analytics/cost_monitor.py +664 -0
  13. htmlgraph/analytics/cost_reporter.py +675 -0
  14. htmlgraph/analytics/cross_session.py +617 -0
  15. htmlgraph/analytics/dependency.py +192 -100
  16. htmlgraph/analytics/pattern_learning.py +771 -0
  17. htmlgraph/analytics/session_graph.py +707 -0
  18. htmlgraph/analytics/strategic/__init__.py +80 -0
  19. htmlgraph/analytics/strategic/cost_optimizer.py +611 -0
  20. htmlgraph/analytics/strategic/pattern_detector.py +876 -0
  21. htmlgraph/analytics/strategic/preference_manager.py +709 -0
  22. htmlgraph/analytics/strategic/suggestion_engine.py +747 -0
  23. htmlgraph/analytics/work_type.py +190 -14
  24. htmlgraph/analytics_index.py +135 -51
  25. htmlgraph/api/__init__.py +3 -0
  26. htmlgraph/api/cost_alerts_websocket.py +416 -0
  27. htmlgraph/api/main.py +2498 -0
  28. htmlgraph/api/static/htmx.min.js +1 -0
  29. htmlgraph/api/static/style-redesign.css +1344 -0
  30. htmlgraph/api/static/style.css +1079 -0
  31. htmlgraph/api/templates/dashboard-redesign.html +1366 -0
  32. htmlgraph/api/templates/dashboard.html +794 -0
  33. htmlgraph/api/templates/partials/activity-feed-hierarchical.html +326 -0
  34. htmlgraph/api/templates/partials/activity-feed.html +1100 -0
  35. htmlgraph/api/templates/partials/agents-redesign.html +317 -0
  36. htmlgraph/api/templates/partials/agents.html +317 -0
  37. htmlgraph/api/templates/partials/event-traces.html +373 -0
  38. htmlgraph/api/templates/partials/features-kanban-redesign.html +509 -0
  39. htmlgraph/api/templates/partials/features.html +578 -0
  40. htmlgraph/api/templates/partials/metrics-redesign.html +346 -0
  41. htmlgraph/api/templates/partials/metrics.html +346 -0
  42. htmlgraph/api/templates/partials/orchestration-redesign.html +443 -0
  43. htmlgraph/api/templates/partials/orchestration.html +198 -0
  44. htmlgraph/api/templates/partials/spawners.html +375 -0
  45. htmlgraph/api/templates/partials/work-items.html +613 -0
  46. htmlgraph/api/websocket.py +538 -0
  47. htmlgraph/archive/__init__.py +24 -0
  48. htmlgraph/archive/bloom.py +234 -0
  49. htmlgraph/archive/fts.py +297 -0
  50. htmlgraph/archive/manager.py +583 -0
  51. htmlgraph/archive/search.py +244 -0
  52. htmlgraph/atomic_ops.py +560 -0
  53. htmlgraph/attribute_index.py +208 -0
  54. htmlgraph/bounded_paths.py +539 -0
  55. htmlgraph/builders/__init__.py +14 -0
  56. htmlgraph/builders/base.py +118 -29
  57. htmlgraph/builders/bug.py +150 -0
  58. htmlgraph/builders/chore.py +119 -0
  59. htmlgraph/builders/epic.py +150 -0
  60. htmlgraph/builders/feature.py +31 -6
  61. htmlgraph/builders/insight.py +195 -0
  62. htmlgraph/builders/metric.py +217 -0
  63. htmlgraph/builders/pattern.py +202 -0
  64. htmlgraph/builders/phase.py +162 -0
  65. htmlgraph/builders/spike.py +52 -19
  66. htmlgraph/builders/track.py +148 -72
  67. htmlgraph/cigs/__init__.py +81 -0
  68. htmlgraph/cigs/autonomy.py +385 -0
  69. htmlgraph/cigs/cost.py +475 -0
  70. htmlgraph/cigs/messages_basic.py +472 -0
  71. htmlgraph/cigs/messaging.py +365 -0
  72. htmlgraph/cigs/models.py +771 -0
  73. htmlgraph/cigs/pattern_storage.py +427 -0
  74. htmlgraph/cigs/patterns.py +503 -0
  75. htmlgraph/cigs/posttool_analyzer.py +234 -0
  76. htmlgraph/cigs/reporter.py +818 -0
  77. htmlgraph/cigs/tracker.py +317 -0
  78. htmlgraph/cli/.htmlgraph/.session-warning-state.json +6 -0
  79. htmlgraph/cli/.htmlgraph/agents.json +72 -0
  80. htmlgraph/cli/.htmlgraph/htmlgraph.db +0 -0
  81. htmlgraph/cli/__init__.py +42 -0
  82. htmlgraph/cli/__main__.py +6 -0
  83. htmlgraph/cli/analytics.py +1424 -0
  84. htmlgraph/cli/base.py +685 -0
  85. htmlgraph/cli/constants.py +206 -0
  86. htmlgraph/cli/core.py +954 -0
  87. htmlgraph/cli/main.py +147 -0
  88. htmlgraph/cli/models.py +475 -0
  89. htmlgraph/cli/templates/__init__.py +1 -0
  90. htmlgraph/cli/templates/cost_dashboard.py +399 -0
  91. htmlgraph/cli/work/__init__.py +239 -0
  92. htmlgraph/cli/work/browse.py +115 -0
  93. htmlgraph/cli/work/features.py +568 -0
  94. htmlgraph/cli/work/orchestration.py +676 -0
  95. htmlgraph/cli/work/report.py +728 -0
  96. htmlgraph/cli/work/sessions.py +466 -0
  97. htmlgraph/cli/work/snapshot.py +559 -0
  98. htmlgraph/cli/work/tracks.py +486 -0
  99. htmlgraph/cli_commands/__init__.py +1 -0
  100. htmlgraph/cli_commands/feature.py +195 -0
  101. htmlgraph/cli_framework.py +115 -0
  102. htmlgraph/collections/__init__.py +18 -0
  103. htmlgraph/collections/base.py +415 -98
  104. htmlgraph/collections/bug.py +53 -0
  105. htmlgraph/collections/chore.py +53 -0
  106. htmlgraph/collections/epic.py +53 -0
  107. htmlgraph/collections/feature.py +12 -26
  108. htmlgraph/collections/insight.py +100 -0
  109. htmlgraph/collections/metric.py +92 -0
  110. htmlgraph/collections/pattern.py +97 -0
  111. htmlgraph/collections/phase.py +53 -0
  112. htmlgraph/collections/session.py +194 -0
  113. htmlgraph/collections/spike.py +56 -16
  114. htmlgraph/collections/task_delegation.py +241 -0
  115. htmlgraph/collections/todo.py +511 -0
  116. htmlgraph/collections/traces.py +487 -0
  117. htmlgraph/config/cost_models.json +56 -0
  118. htmlgraph/config.py +190 -0
  119. htmlgraph/context_analytics.py +344 -0
  120. htmlgraph/converter.py +216 -28
  121. htmlgraph/cost_analysis/__init__.py +5 -0
  122. htmlgraph/cost_analysis/analyzer.py +438 -0
  123. htmlgraph/dashboard.html +2406 -307
  124. htmlgraph/dashboard.html.backup +6592 -0
  125. htmlgraph/dashboard.html.bak +7181 -0
  126. htmlgraph/dashboard.html.bak2 +7231 -0
  127. htmlgraph/dashboard.html.bak3 +7232 -0
  128. htmlgraph/db/__init__.py +38 -0
  129. htmlgraph/db/queries.py +790 -0
  130. htmlgraph/db/schema.py +1788 -0
  131. htmlgraph/decorators.py +317 -0
  132. htmlgraph/dependency_models.py +19 -2
  133. htmlgraph/deploy.py +142 -125
  134. htmlgraph/deployment_models.py +474 -0
  135. htmlgraph/docs/API_REFERENCE.md +841 -0
  136. htmlgraph/docs/HTTP_API.md +750 -0
  137. htmlgraph/docs/INTEGRATION_GUIDE.md +752 -0
  138. htmlgraph/docs/ORCHESTRATION_PATTERNS.md +717 -0
  139. htmlgraph/docs/README.md +532 -0
  140. htmlgraph/docs/__init__.py +77 -0
  141. htmlgraph/docs/docs_version.py +55 -0
  142. htmlgraph/docs/metadata.py +93 -0
  143. htmlgraph/docs/migrations.py +232 -0
  144. htmlgraph/docs/template_engine.py +143 -0
  145. htmlgraph/docs/templates/_sections/cli_reference.md.j2 +52 -0
  146. htmlgraph/docs/templates/_sections/core_concepts.md.j2 +29 -0
  147. htmlgraph/docs/templates/_sections/sdk_basics.md.j2 +69 -0
  148. htmlgraph/docs/templates/base_agents.md.j2 +78 -0
  149. htmlgraph/docs/templates/example_user_override.md.j2 +47 -0
  150. htmlgraph/docs/version_check.py +163 -0
  151. htmlgraph/edge_index.py +182 -27
  152. htmlgraph/error_handler.py +544 -0
  153. htmlgraph/event_log.py +100 -52
  154. htmlgraph/event_migration.py +13 -4
  155. htmlgraph/exceptions.py +49 -0
  156. htmlgraph/file_watcher.py +101 -28
  157. htmlgraph/find_api.py +75 -63
  158. htmlgraph/git_events.py +145 -63
  159. htmlgraph/graph.py +1122 -106
  160. htmlgraph/hooks/.htmlgraph/.session-warning-state.json +6 -0
  161. htmlgraph/hooks/.htmlgraph/agents.json +72 -0
  162. htmlgraph/hooks/.htmlgraph/index.sqlite +0 -0
  163. htmlgraph/hooks/__init__.py +45 -0
  164. htmlgraph/hooks/bootstrap.py +169 -0
  165. htmlgraph/hooks/cigs_pretool_enforcer.py +354 -0
  166. htmlgraph/hooks/concurrent_sessions.py +208 -0
  167. htmlgraph/hooks/context.py +350 -0
  168. htmlgraph/hooks/drift_handler.py +525 -0
  169. htmlgraph/hooks/event_tracker.py +1314 -0
  170. htmlgraph/hooks/git_commands.py +175 -0
  171. htmlgraph/hooks/hooks-config.example.json +12 -0
  172. htmlgraph/hooks/installer.py +343 -0
  173. htmlgraph/hooks/orchestrator.py +674 -0
  174. htmlgraph/hooks/orchestrator_reflector.py +223 -0
  175. htmlgraph/hooks/post-checkout.sh +28 -0
  176. htmlgraph/hooks/post-commit.sh +24 -0
  177. htmlgraph/hooks/post-merge.sh +26 -0
  178. htmlgraph/hooks/post_tool_use_failure.py +273 -0
  179. htmlgraph/hooks/post_tool_use_handler.py +257 -0
  180. htmlgraph/hooks/posttooluse.py +408 -0
  181. htmlgraph/hooks/pre-commit.sh +94 -0
  182. htmlgraph/hooks/pre-push.sh +28 -0
  183. htmlgraph/hooks/pretooluse.py +819 -0
  184. htmlgraph/hooks/prompt_analyzer.py +637 -0
  185. htmlgraph/hooks/session_handler.py +668 -0
  186. htmlgraph/hooks/session_summary.py +395 -0
  187. htmlgraph/hooks/state_manager.py +504 -0
  188. htmlgraph/hooks/subagent_detection.py +202 -0
  189. htmlgraph/hooks/subagent_stop.py +369 -0
  190. htmlgraph/hooks/task_enforcer.py +255 -0
  191. htmlgraph/hooks/task_validator.py +177 -0
  192. htmlgraph/hooks/validator.py +628 -0
  193. htmlgraph/ids.py +41 -27
  194. htmlgraph/index.d.ts +286 -0
  195. htmlgraph/learning.py +767 -0
  196. htmlgraph/mcp_server.py +69 -23
  197. htmlgraph/models.py +1586 -87
  198. htmlgraph/operations/README.md +62 -0
  199. htmlgraph/operations/__init__.py +79 -0
  200. htmlgraph/operations/analytics.py +339 -0
  201. htmlgraph/operations/bootstrap.py +289 -0
  202. htmlgraph/operations/events.py +244 -0
  203. htmlgraph/operations/fastapi_server.py +231 -0
  204. htmlgraph/operations/hooks.py +350 -0
  205. htmlgraph/operations/initialization.py +597 -0
  206. htmlgraph/operations/initialization.py.backup +228 -0
  207. htmlgraph/operations/server.py +303 -0
  208. htmlgraph/orchestration/__init__.py +58 -0
  209. htmlgraph/orchestration/claude_launcher.py +179 -0
  210. htmlgraph/orchestration/command_builder.py +72 -0
  211. htmlgraph/orchestration/headless_spawner.py +281 -0
  212. htmlgraph/orchestration/live_events.py +377 -0
  213. htmlgraph/orchestration/model_selection.py +327 -0
  214. htmlgraph/orchestration/plugin_manager.py +140 -0
  215. htmlgraph/orchestration/prompts.py +137 -0
  216. htmlgraph/orchestration/spawner_event_tracker.py +383 -0
  217. htmlgraph/orchestration/spawners/__init__.py +16 -0
  218. htmlgraph/orchestration/spawners/base.py +194 -0
  219. htmlgraph/orchestration/spawners/claude.py +173 -0
  220. htmlgraph/orchestration/spawners/codex.py +435 -0
  221. htmlgraph/orchestration/spawners/copilot.py +294 -0
  222. htmlgraph/orchestration/spawners/gemini.py +471 -0
  223. htmlgraph/orchestration/subprocess_runner.py +36 -0
  224. htmlgraph/orchestration/task_coordination.py +343 -0
  225. htmlgraph/orchestration.md +563 -0
  226. htmlgraph/orchestrator-system-prompt-optimized.txt +863 -0
  227. htmlgraph/orchestrator.py +669 -0
  228. htmlgraph/orchestrator_config.py +357 -0
  229. htmlgraph/orchestrator_mode.py +328 -0
  230. htmlgraph/orchestrator_validator.py +133 -0
  231. htmlgraph/parallel.py +646 -0
  232. htmlgraph/parser.py +160 -35
  233. htmlgraph/path_query.py +608 -0
  234. htmlgraph/pattern_matcher.py +636 -0
  235. htmlgraph/planning.py +147 -52
  236. htmlgraph/pydantic_models.py +476 -0
  237. htmlgraph/quality_gates.py +350 -0
  238. htmlgraph/query_builder.py +109 -72
  239. htmlgraph/query_composer.py +509 -0
  240. htmlgraph/reflection.py +443 -0
  241. htmlgraph/refs.py +344 -0
  242. htmlgraph/repo_hash.py +512 -0
  243. htmlgraph/repositories/__init__.py +292 -0
  244. htmlgraph/repositories/analytics_repository.py +455 -0
  245. htmlgraph/repositories/analytics_repository_standard.py +628 -0
  246. htmlgraph/repositories/feature_repository.py +581 -0
  247. htmlgraph/repositories/feature_repository_htmlfile.py +668 -0
  248. htmlgraph/repositories/feature_repository_memory.py +607 -0
  249. htmlgraph/repositories/feature_repository_sqlite.py +858 -0
  250. htmlgraph/repositories/filter_service.py +620 -0
  251. htmlgraph/repositories/filter_service_standard.py +445 -0
  252. htmlgraph/repositories/shared_cache.py +621 -0
  253. htmlgraph/repositories/shared_cache_memory.py +395 -0
  254. htmlgraph/repositories/track_repository.py +552 -0
  255. htmlgraph/repositories/track_repository_htmlfile.py +619 -0
  256. htmlgraph/repositories/track_repository_memory.py +508 -0
  257. htmlgraph/repositories/track_repository_sqlite.py +711 -0
  258. htmlgraph/routing.py +8 -19
  259. htmlgraph/scripts/deploy.py +1 -2
  260. htmlgraph/sdk/__init__.py +398 -0
  261. htmlgraph/sdk/__init__.pyi +14 -0
  262. htmlgraph/sdk/analytics/__init__.py +19 -0
  263. htmlgraph/sdk/analytics/engine.py +155 -0
  264. htmlgraph/sdk/analytics/helpers.py +178 -0
  265. htmlgraph/sdk/analytics/registry.py +109 -0
  266. htmlgraph/sdk/base.py +484 -0
  267. htmlgraph/sdk/constants.py +216 -0
  268. htmlgraph/sdk/core.pyi +308 -0
  269. htmlgraph/sdk/discovery.py +120 -0
  270. htmlgraph/sdk/help/__init__.py +12 -0
  271. htmlgraph/sdk/help/mixin.py +699 -0
  272. htmlgraph/sdk/mixins/__init__.py +15 -0
  273. htmlgraph/sdk/mixins/attribution.py +113 -0
  274. htmlgraph/sdk/mixins/mixin.py +410 -0
  275. htmlgraph/sdk/operations/__init__.py +12 -0
  276. htmlgraph/sdk/operations/mixin.py +427 -0
  277. htmlgraph/sdk/orchestration/__init__.py +17 -0
  278. htmlgraph/sdk/orchestration/coordinator.py +203 -0
  279. htmlgraph/sdk/orchestration/spawner.py +204 -0
  280. htmlgraph/sdk/planning/__init__.py +19 -0
  281. htmlgraph/sdk/planning/bottlenecks.py +93 -0
  282. htmlgraph/sdk/planning/mixin.py +211 -0
  283. htmlgraph/sdk/planning/parallel.py +186 -0
  284. htmlgraph/sdk/planning/queue.py +210 -0
  285. htmlgraph/sdk/planning/recommendations.py +87 -0
  286. htmlgraph/sdk/planning/smart_planning.py +319 -0
  287. htmlgraph/sdk/session/__init__.py +19 -0
  288. htmlgraph/sdk/session/continuity.py +57 -0
  289. htmlgraph/sdk/session/handoff.py +110 -0
  290. htmlgraph/sdk/session/info.py +309 -0
  291. htmlgraph/sdk/session/manager.py +103 -0
  292. htmlgraph/sdk/strategic/__init__.py +26 -0
  293. htmlgraph/sdk/strategic/mixin.py +563 -0
  294. htmlgraph/server.py +685 -180
  295. htmlgraph/services/__init__.py +10 -0
  296. htmlgraph/services/claiming.py +199 -0
  297. htmlgraph/session_hooks.py +300 -0
  298. htmlgraph/session_manager.py +1392 -175
  299. htmlgraph/session_registry.py +587 -0
  300. htmlgraph/session_state.py +436 -0
  301. htmlgraph/session_warning.py +201 -0
  302. htmlgraph/sessions/__init__.py +23 -0
  303. htmlgraph/sessions/handoff.py +756 -0
  304. htmlgraph/setup.py +34 -17
  305. htmlgraph/spike_index.py +143 -0
  306. htmlgraph/sync_docs.py +12 -15
  307. htmlgraph/system_prompts.py +450 -0
  308. htmlgraph/templates/AGENTS.md.template +366 -0
  309. htmlgraph/templates/CLAUDE.md.template +97 -0
  310. htmlgraph/templates/GEMINI.md.template +87 -0
  311. htmlgraph/templates/orchestration-view.html +350 -0
  312. htmlgraph/track_builder.py +146 -15
  313. htmlgraph/track_manager.py +69 -21
  314. htmlgraph/transcript.py +890 -0
  315. htmlgraph/transcript_analytics.py +699 -0
  316. htmlgraph/types.py +323 -0
  317. htmlgraph/validation.py +115 -0
  318. htmlgraph/watch.py +8 -5
  319. htmlgraph/work_type_utils.py +3 -2
  320. {htmlgraph-0.9.3.data → htmlgraph-0.27.5.data}/data/htmlgraph/dashboard.html +2406 -307
  321. htmlgraph-0.27.5.data/data/htmlgraph/templates/AGENTS.md.template +366 -0
  322. htmlgraph-0.27.5.data/data/htmlgraph/templates/CLAUDE.md.template +97 -0
  323. htmlgraph-0.27.5.data/data/htmlgraph/templates/GEMINI.md.template +87 -0
  324. {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/METADATA +97 -64
  325. htmlgraph-0.27.5.dist-info/RECORD +337 -0
  326. {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/entry_points.txt +1 -1
  327. htmlgraph/cli.py +0 -2688
  328. htmlgraph/sdk.py +0 -709
  329. htmlgraph-0.9.3.dist-info/RECORD +0 -61
  330. {htmlgraph-0.9.3.data → htmlgraph-0.27.5.data}/data/htmlgraph/styles.css +0 -0
  331. {htmlgraph-0.9.3.dist-info → htmlgraph-0.27.5.dist-info}/WHEEL +0 -0
@@ -0,0 +1,699 @@
1
+ """
2
+ Transcript Analytics & Learning System.
3
+
4
+ Extracts patterns, metrics, and insights from Claude Code transcripts
5
+ to enable active learning, pattern recognition, and workflow improvements.
6
+
7
+ Key capabilities:
8
+ - Tool transition analysis (which tools follow which)
9
+ - Session health scoring (efficiency, retry rates, context rebuilds)
10
+ - Workflow pattern detection (common sequences, anti-patterns)
11
+ - Cross-session learning (compare and improve over time)
12
+ """
13
+
14
+ from collections import Counter, defaultdict
15
+ from dataclasses import dataclass, field
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ from htmlgraph.transcript import TranscriptReader, TranscriptSession
20
+
21
+
22
+ @dataclass
23
+ class ToolTransition:
24
+ """Represents a transition between two tools."""
25
+
26
+ from_tool: str
27
+ to_tool: str
28
+ count: int = 1
29
+ avg_time_between: float = 0.0 # seconds
30
+
31
+
32
+ @dataclass
33
+ class WorkflowPattern:
34
+ """A detected workflow pattern."""
35
+
36
+ sequence: list[str]
37
+ count: int
38
+ success_rate: float # 0.0 to 1.0
39
+ avg_duration: float # seconds
40
+ category: str = "neutral" # "optimal", "neutral", "anti-pattern"
41
+
42
+
43
+ @dataclass
44
+ class SessionHealth:
45
+ """Health metrics for a session."""
46
+
47
+ session_id: str
48
+ efficiency_score: float # 0.0 to 1.0
49
+ retry_rate: float # proportion of retried operations
50
+ context_rebuild_count: int # times same files were re-read
51
+ tool_diversity: float # 0.0 to 1.0 (higher = more varied tools)
52
+ prompt_clarity_score: float # estimated from iterations needed
53
+ error_recovery_rate: float # successful recoveries / total errors
54
+ duration_seconds: float
55
+ tools_per_minute: float
56
+
57
+ def overall_score(self) -> float:
58
+ """Calculate overall health score."""
59
+ weights = {
60
+ "efficiency": 0.3,
61
+ "low_retry": 0.2,
62
+ "low_rebuilds": 0.15,
63
+ "diversity": 0.1,
64
+ "clarity": 0.15,
65
+ "recovery": 0.1,
66
+ }
67
+
68
+ # Normalize rebuild count (lower is better, cap at 10)
69
+ rebuild_score = max(0, 1 - (self.context_rebuild_count / 10))
70
+
71
+ return (
72
+ weights["efficiency"] * self.efficiency_score
73
+ + weights["low_retry"] * (1 - self.retry_rate)
74
+ + weights["low_rebuilds"] * rebuild_score
75
+ + weights["diversity"] * self.tool_diversity
76
+ + weights["clarity"] * self.prompt_clarity_score
77
+ + weights["recovery"] * self.error_recovery_rate
78
+ )
79
+
80
+
81
+ @dataclass
82
+ class TranscriptInsights:
83
+ """Aggregated insights from transcript analysis."""
84
+
85
+ total_sessions: int
86
+ total_user_messages: int
87
+ total_tool_calls: int
88
+
89
+ # Tool analysis
90
+ tool_frequency: dict[str, int] = field(default_factory=dict)
91
+ tool_transitions: list[ToolTransition] = field(default_factory=list)
92
+
93
+ # Patterns
94
+ common_patterns: list[WorkflowPattern] = field(default_factory=list)
95
+ anti_patterns: list[WorkflowPattern] = field(default_factory=list)
96
+
97
+ # Health
98
+ avg_session_health: float = 0.0
99
+ health_trend: str = "stable" # "improving", "stable", "declining"
100
+
101
+ # Recommendations
102
+ recommendations: list[str] = field(default_factory=list)
103
+
104
+
105
+ @dataclass
106
+ class TrackTranscriptStats:
107
+ """Aggregated transcript stats for a track (multi-session)."""
108
+
109
+ track_id: str
110
+ session_count: int
111
+ total_user_messages: int
112
+ total_tool_calls: int
113
+ total_duration_seconds: float
114
+
115
+ # Per-session breakdown
116
+ session_ids: list[str] = field(default_factory=list)
117
+ session_healths: list[float] = field(default_factory=list)
118
+
119
+ # Aggregated tool usage
120
+ tool_frequency: dict[str, int] = field(default_factory=dict)
121
+ tool_transitions: dict[str, dict[str, int]] = field(default_factory=dict)
122
+
123
+ # Patterns across sessions
124
+ common_patterns: list[WorkflowPattern] = field(default_factory=list)
125
+ anti_patterns_detected: int = 0
126
+
127
+ # Learning metrics
128
+ avg_session_health: float = 0.0
129
+ health_trend: str = "stable" # "improving", "declining", "stable"
130
+
131
+ def to_dict(self) -> dict[str, Any]:
132
+ """Convert to dictionary for API responses."""
133
+ return {
134
+ "track_id": self.track_id,
135
+ "session_count": self.session_count,
136
+ "total_user_messages": self.total_user_messages,
137
+ "total_tool_calls": self.total_tool_calls,
138
+ "total_duration_seconds": self.total_duration_seconds,
139
+ "total_duration_formatted": self._format_duration(
140
+ self.total_duration_seconds
141
+ ),
142
+ "session_ids": self.session_ids,
143
+ "tool_frequency": self.tool_frequency,
144
+ "avg_session_health": round(self.avg_session_health, 2),
145
+ "health_trend": self.health_trend,
146
+ "anti_patterns_detected": self.anti_patterns_detected,
147
+ }
148
+
149
+ def _format_duration(self, seconds: float) -> str:
150
+ """Format duration as human-readable string."""
151
+ hours = int(seconds // 3600)
152
+ minutes = int((seconds % 3600) // 60)
153
+ if hours > 0:
154
+ return f"{hours}h {minutes}m"
155
+ return f"{minutes}m"
156
+
157
+
158
+ class TranscriptAnalytics:
159
+ """
160
+ Analytics engine for Claude Code transcripts.
161
+
162
+ Extracts patterns, calculates metrics, and generates insights
163
+ for continuous improvement of agent workflows.
164
+ """
165
+
166
+ # Known anti-patterns
167
+ ANTI_PATTERNS = [
168
+ (["Grep", "Grep", "Grep"], "Repeated search without reading results"),
169
+ (["Read", "Read", "Read"], "Excessive file reading - consider caching"),
170
+ (["Edit", "Edit", "Edit"], "Multiple edits - consider batching"),
171
+ (["Bash", "Bash", "Bash", "Bash"], "Command loop - check for errors"),
172
+ ]
173
+
174
+ # Known optimal patterns
175
+ OPTIMAL_PATTERNS = [
176
+ (["Grep", "Read", "Edit"], "Search → Read → Edit flow"),
177
+ (["Read", "Edit", "Bash"], "Read → Edit → Test flow"),
178
+ (["Glob", "Read", "Edit", "Bash"], "Find → Read → Edit → Verify"),
179
+ ]
180
+
181
+ def __init__(self, graph_dir: Path | None = None):
182
+ self.graph_dir = Path(graph_dir) if graph_dir else Path(".htmlgraph")
183
+ self.reader = TranscriptReader()
184
+ self._cache: dict[str, TranscriptSession] = {}
185
+
186
+ def get_transcript(self, transcript_id: str) -> TranscriptSession | None:
187
+ """Get transcript, with caching."""
188
+ if transcript_id not in self._cache:
189
+ transcript = self.reader.read_session(transcript_id)
190
+ if transcript:
191
+ self._cache[transcript_id] = transcript
192
+ return self._cache.get(transcript_id)
193
+
194
+ def get_tool_transitions(
195
+ self,
196
+ transcript_id: str | None = None,
197
+ feature_id: str | None = None,
198
+ ) -> dict[str, dict[str, int]]:
199
+ """
200
+ Calculate tool transition matrix.
201
+
202
+ Returns dict of {from_tool: {to_tool: count}}
203
+ """
204
+ transitions: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
205
+
206
+ transcripts = self._get_transcripts(transcript_id, feature_id)
207
+
208
+ for transcript in transcripts:
209
+ tools = [e.tool_name for e in transcript.entries if e.tool_name]
210
+
211
+ for i in range(len(tools) - 1):
212
+ from_tool = tools[i]
213
+ to_tool = tools[i + 1]
214
+ transitions[from_tool][to_tool] += 1
215
+
216
+ # Convert to regular dict
217
+ return {k: dict(v) for k, v in transitions.items()}
218
+
219
+ def get_tool_frequency(
220
+ self,
221
+ transcript_id: str | None = None,
222
+ feature_id: str | None = None,
223
+ ) -> dict[str, int]:
224
+ """Get frequency count for each tool."""
225
+ frequency: Counter[str] = Counter()
226
+
227
+ transcripts = self._get_transcripts(transcript_id, feature_id)
228
+
229
+ for transcript in transcripts:
230
+ for entry in transcript.entries:
231
+ if entry.tool_name:
232
+ frequency[entry.tool_name] += 1
233
+
234
+ return dict(frequency.most_common())
235
+
236
+ def calculate_session_health(self, transcript_id: str) -> SessionHealth | None:
237
+ """Calculate health metrics for a session."""
238
+ transcript = self.get_transcript(transcript_id)
239
+ if not transcript:
240
+ return None
241
+
242
+ entries = transcript.entries
243
+ if not entries:
244
+ return SessionHealth(
245
+ session_id=transcript_id,
246
+ efficiency_score=0.0,
247
+ retry_rate=0.0,
248
+ context_rebuild_count=0,
249
+ tool_diversity=0.0,
250
+ prompt_clarity_score=0.0,
251
+ error_recovery_rate=0.0,
252
+ duration_seconds=0.0,
253
+ tools_per_minute=0.0,
254
+ )
255
+
256
+ # Calculate metrics
257
+ tools = [e.tool_name for e in entries if e.tool_name]
258
+ user_messages = [e for e in entries if e.entry_type == "user"]
259
+ [e for e in entries if e.entry_type == "tool_result"]
260
+
261
+ # Duration
262
+ if entries[0].timestamp and entries[-1].timestamp:
263
+ duration = (entries[-1].timestamp - entries[0].timestamp).total_seconds()
264
+ else:
265
+ duration = 0.0
266
+
267
+ # Efficiency: tools per user message (higher is better, capped)
268
+ efficiency = min(1.0, len(tools) / max(1, len(user_messages) * 5))
269
+
270
+ # Retry rate: consecutive same tools / total tools
271
+ retries = sum(1 for i in range(1, len(tools)) if tools[i] == tools[i - 1])
272
+ retry_rate = retries / max(1, len(tools))
273
+
274
+ # Context rebuilds: count of repeated Read on same file
275
+ read_files: list[str] = []
276
+ rebuilds = 0
277
+ for e in entries:
278
+ if e.tool_name == "Read" and e.tool_input:
279
+ file_path = e.tool_input.get("file_path", "")
280
+ if file_path in read_files:
281
+ rebuilds += 1
282
+ else:
283
+ read_files.append(file_path)
284
+
285
+ # Tool diversity
286
+ unique_tools = len(set(tools))
287
+ diversity = min(1.0, unique_tools / 10) # Cap at 10 unique tools
288
+
289
+ # Prompt clarity: fewer user messages per completion = clearer prompts
290
+ clarity = min(1.0, 1 / max(1, len(user_messages) / 5))
291
+
292
+ # Error recovery (simplified: assume tool_results with errors)
293
+ # For now, estimate based on session completion
294
+ recovery_rate = 0.8 if duration > 60 else 0.5
295
+
296
+ # Tools per minute
297
+ tools_per_min = len(tools) / max(1, duration / 60)
298
+
299
+ return SessionHealth(
300
+ session_id=transcript_id,
301
+ efficiency_score=efficiency,
302
+ retry_rate=retry_rate,
303
+ context_rebuild_count=rebuilds,
304
+ tool_diversity=diversity,
305
+ prompt_clarity_score=clarity,
306
+ error_recovery_rate=recovery_rate,
307
+ duration_seconds=duration,
308
+ tools_per_minute=tools_per_min,
309
+ )
310
+
311
+ def detect_patterns(
312
+ self,
313
+ transcript_id: str | None = None,
314
+ min_length: int = 3,
315
+ max_length: int = 5,
316
+ ) -> list[WorkflowPattern]:
317
+ """Detect workflow patterns in transcript(s)."""
318
+ patterns: Counter[tuple[str, ...]] = Counter()
319
+
320
+ transcripts = self._get_transcripts(transcript_id, None)
321
+
322
+ for transcript in transcripts:
323
+ tools = [e.tool_name for e in transcript.entries if e.tool_name]
324
+
325
+ # Extract subsequences
326
+ for length in range(min_length, min(max_length + 1, len(tools) + 1)):
327
+ for i in range(len(tools) - length + 1):
328
+ seq = tuple(tools[i : i + length])
329
+ patterns[seq] += 1
330
+
331
+ # Convert to WorkflowPattern objects
332
+ result = []
333
+ for seq, count in patterns.most_common(20):
334
+ category = self._categorize_pattern(list(seq))
335
+ result.append(
336
+ WorkflowPattern(
337
+ sequence=list(seq),
338
+ count=count,
339
+ success_rate=0.8 if category == "optimal" else 0.5,
340
+ avg_duration=0.0,
341
+ category=category,
342
+ )
343
+ )
344
+
345
+ return result
346
+
347
+ def detect_anti_patterns(
348
+ self,
349
+ transcript_id: str | None = None,
350
+ ) -> list[tuple[WorkflowPattern, str]]:
351
+ """Detect anti-patterns with explanations."""
352
+ results = []
353
+ transcripts = self._get_transcripts(transcript_id, None)
354
+
355
+ for transcript in transcripts:
356
+ tools = [e.tool_name for e in transcript.entries if e.tool_name]
357
+ tools_str = ",".join(tools)
358
+
359
+ for pattern, explanation in self.ANTI_PATTERNS:
360
+ pattern_str = ",".join(pattern)
361
+ count = tools_str.count(pattern_str)
362
+
363
+ if count > 0:
364
+ results.append(
365
+ (
366
+ WorkflowPattern(
367
+ sequence=pattern,
368
+ count=count,
369
+ success_rate=0.3,
370
+ avg_duration=0.0,
371
+ category="anti-pattern",
372
+ ),
373
+ explanation,
374
+ )
375
+ )
376
+
377
+ return results
378
+
379
+ def compare_sessions(
380
+ self,
381
+ session_ids: list[str],
382
+ ) -> dict[str, Any]:
383
+ """Compare multiple sessions."""
384
+ healths = []
385
+ for sid in session_ids:
386
+ health = self.calculate_session_health(sid)
387
+ if health:
388
+ healths.append(health)
389
+
390
+ if not healths:
391
+ return {"error": "No valid sessions found"}
392
+
393
+ # Find best/worst
394
+ sorted_by_score = sorted(healths, key=lambda h: h.overall_score(), reverse=True)
395
+
396
+ return {
397
+ "sessions_compared": len(healths),
398
+ "best_session": {
399
+ "id": sorted_by_score[0].session_id,
400
+ "score": sorted_by_score[0].overall_score(),
401
+ },
402
+ "worst_session": {
403
+ "id": sorted_by_score[-1].session_id,
404
+ "score": sorted_by_score[-1].overall_score(),
405
+ },
406
+ "avg_efficiency": sum(h.efficiency_score for h in healths) / len(healths),
407
+ "avg_retry_rate": sum(h.retry_rate for h in healths) / len(healths),
408
+ "total_context_rebuilds": sum(h.context_rebuild_count for h in healths),
409
+ }
410
+
411
+ def generate_recommendations(
412
+ self,
413
+ transcript_id: str | None = None,
414
+ ) -> list[str]:
415
+ """Generate workflow improvement recommendations."""
416
+ recommendations = []
417
+
418
+ # Analyze anti-patterns
419
+ anti_patterns = self.detect_anti_patterns(transcript_id)
420
+ for pattern, explanation in anti_patterns:
421
+ if pattern.count >= 2:
422
+ recommendations.append(
423
+ f"⚠️ Detected: {' → '.join(pattern.sequence)} ({pattern.count}x) - {explanation}"
424
+ )
425
+
426
+ # Analyze health if single session
427
+ if transcript_id:
428
+ health = self.calculate_session_health(transcript_id)
429
+ if health:
430
+ if health.retry_rate > 0.3:
431
+ recommendations.append(
432
+ "📊 High retry rate detected. Consider reading more context before acting."
433
+ )
434
+ if health.context_rebuild_count > 5:
435
+ recommendations.append(
436
+ "🔄 Many context rebuilds. Consider keeping file content in memory."
437
+ )
438
+ if health.tool_diversity < 0.3:
439
+ recommendations.append(
440
+ "🔧 Low tool diversity. Explore using more specialized tools."
441
+ )
442
+
443
+ # Tool frequency analysis
444
+ freq = self.get_tool_frequency(transcript_id)
445
+ if freq:
446
+ top_tool = max(freq, key=lambda k: freq[k])
447
+ if freq[top_tool] > 50:
448
+ recommendations.append(
449
+ f"📈 Heavy use of {top_tool} ({freq[top_tool]}x). Consider if this is optimal."
450
+ )
451
+
452
+ if not recommendations:
453
+ recommendations.append(
454
+ "✅ No major issues detected. Workflow looks healthy!"
455
+ )
456
+
457
+ return recommendations
458
+
459
+ def get_insights(
460
+ self,
461
+ transcript_ids: list[str] | None = None,
462
+ ) -> TranscriptInsights:
463
+ """Generate comprehensive insights from transcripts."""
464
+ transcripts_raw: list[TranscriptSession | None]
465
+ if transcript_ids:
466
+ transcripts_raw = [self.get_transcript(tid) for tid in transcript_ids]
467
+ transcripts = [t for t in transcripts_raw if t is not None]
468
+ else:
469
+ transcripts = list(self._get_transcripts(None, None))
470
+
471
+ if not transcripts:
472
+ return TranscriptInsights(
473
+ total_sessions=0,
474
+ total_user_messages=0,
475
+ total_tool_calls=0,
476
+ )
477
+
478
+ # Aggregate stats
479
+ total_user = sum(
480
+ len([e for e in t.entries if e.entry_type == "user"]) for t in transcripts
481
+ )
482
+ total_tools = sum(
483
+ len([e for e in t.entries if e.tool_name]) for t in transcripts
484
+ )
485
+
486
+ # Get patterns and anti-patterns
487
+ patterns = self.detect_patterns()
488
+ optimal = [p for p in patterns if p.category == "optimal"]
489
+ anti = [p for p in patterns if p.category == "anti-pattern"]
490
+
491
+ # Calculate average health
492
+ healths = []
493
+ for t in transcripts:
494
+ h = self.calculate_session_health(t.session_id)
495
+ if h:
496
+ healths.append(h.overall_score())
497
+
498
+ avg_health = sum(healths) / len(healths) if healths else 0.0
499
+
500
+ return TranscriptInsights(
501
+ total_sessions=len(transcripts),
502
+ total_user_messages=total_user,
503
+ total_tool_calls=total_tools,
504
+ tool_frequency=self.get_tool_frequency(),
505
+ common_patterns=optimal[:5],
506
+ anti_patterns=anti[:5],
507
+ avg_session_health=avg_health,
508
+ recommendations=self.generate_recommendations(),
509
+ )
510
+
511
+ def _get_transcripts(
512
+ self,
513
+ transcript_id: str | None,
514
+ feature_id: str | None,
515
+ ) -> list[TranscriptSession]:
516
+ """Get transcripts to analyze."""
517
+ if transcript_id:
518
+ t = self.get_transcript(transcript_id)
519
+ return [t] if t else []
520
+
521
+ # Get all available transcripts
522
+ transcripts = []
523
+ for session in self.reader.list_sessions():
524
+ t = self.get_transcript(session.session_id)
525
+ if t:
526
+ transcripts.append(t)
527
+
528
+ return transcripts
529
+
530
+ def get_track_stats(self, track_id: str) -> TrackTranscriptStats | None:
531
+ """
532
+ Get aggregated transcript stats for a track.
533
+
534
+ Aggregates transcript data across all sessions linked to the track.
535
+
536
+ Args:
537
+ track_id: Track ID to aggregate
538
+
539
+ Returns:
540
+ TrackTranscriptStats or None if track not found
541
+ """
542
+ from htmlgraph.graph import HtmlGraph
543
+ from htmlgraph.session_manager import SessionManager
544
+
545
+ session_mgr = SessionManager(self.graph_dir)
546
+
547
+ # Load the track using HtmlGraph
548
+ tracks_dir = self.graph_dir / "tracks"
549
+ if not tracks_dir.exists():
550
+ return None
551
+
552
+ try:
553
+ graph = HtmlGraph(
554
+ tracks_dir, auto_load=True, pattern=["*.html", "*/index.html"]
555
+ )
556
+ track = graph.get(track_id)
557
+ except Exception:
558
+ return None
559
+
560
+ if not track:
561
+ return None
562
+
563
+ # Get session IDs from track (stored in edges or properties)
564
+ session_ids_raw = (
565
+ track.edges.get("sessions", []) if hasattr(track, "edges") else []
566
+ )
567
+ # Also check properties for sessions
568
+ if not session_ids_raw and hasattr(track, "properties"):
569
+ session_ids_raw = track.properties.get("sessions", [])
570
+
571
+ # Convert to list of strings (handle both Edge objects and plain strings)
572
+ session_ids: list[str] = []
573
+ for item in session_ids_raw:
574
+ if isinstance(item, str):
575
+ session_ids.append(item)
576
+ elif hasattr(item, "target"):
577
+ # It's an Edge object
578
+ session_ids.append(str(item.target))
579
+ else:
580
+ # Try to convert to string
581
+ session_ids.append(str(item))
582
+
583
+ if not session_ids:
584
+ # Return empty stats
585
+ return TrackTranscriptStats(
586
+ track_id=track_id,
587
+ session_count=0,
588
+ total_user_messages=0,
589
+ total_tool_calls=0,
590
+ total_duration_seconds=0.0,
591
+ )
592
+
593
+ # Aggregate stats from each session's transcript
594
+ total_user_messages = 0
595
+ total_tool_calls = 0
596
+ total_duration = 0.0
597
+ all_session_ids: list[str] = []
598
+ session_healths = []
599
+ combined_tool_freq: Counter[str] = Counter()
600
+ combined_transitions: dict[str, dict[str, int]] = {}
601
+ anti_pattern_count = 0
602
+
603
+ for session_id in session_ids:
604
+ session = session_mgr.get_session(session_id)
605
+ if not session or not session.transcript_id:
606
+ continue
607
+
608
+ transcript = self.get_transcript(session.transcript_id)
609
+ if not transcript:
610
+ continue
611
+
612
+ all_session_ids.append(session_id)
613
+
614
+ # Count messages
615
+ user_msgs = [e for e in transcript.entries if e.entry_type == "user"]
616
+ tool_calls = [e for e in transcript.entries if e.tool_name]
617
+
618
+ total_user_messages += len(user_msgs)
619
+ total_tool_calls += len(tool_calls)
620
+
621
+ # Calculate duration
622
+ if transcript.entries and len(transcript.entries) >= 2:
623
+ first = transcript.entries[0].timestamp
624
+ last = transcript.entries[-1].timestamp
625
+ if first and last:
626
+ total_duration += (last - first).total_seconds()
627
+
628
+ # Tool frequency
629
+ for entry in transcript.entries:
630
+ if entry.tool_name:
631
+ combined_tool_freq[entry.tool_name] += 1
632
+
633
+ # Tool transitions
634
+ transitions = self.get_tool_transitions(session.transcript_id)
635
+ for from_tool, to_tools in transitions.items():
636
+ if from_tool not in combined_transitions:
637
+ combined_transitions[from_tool] = {}
638
+ for to_tool, count in to_tools.items():
639
+ combined_transitions[from_tool][to_tool] = (
640
+ combined_transitions[from_tool].get(to_tool, 0) + count
641
+ )
642
+
643
+ # Session health
644
+ health = self.calculate_session_health(session.transcript_id)
645
+ if health:
646
+ session_healths.append(health.overall_score())
647
+
648
+ # Anti-patterns
649
+ anti_patterns = self.detect_anti_patterns(session.transcript_id)
650
+ anti_pattern_count += sum(p[0].count for p in anti_patterns)
651
+
652
+ # Calculate averages and trends
653
+ avg_health = (
654
+ sum(session_healths) / len(session_healths) if session_healths else 0.0
655
+ )
656
+
657
+ # Calculate health trend (compare first half to second half)
658
+ health_trend = "stable"
659
+ if len(session_healths) >= 4:
660
+ mid = len(session_healths) // 2
661
+ first_half = sum(session_healths[:mid]) / mid
662
+ second_half = sum(session_healths[mid:]) / (len(session_healths) - mid)
663
+ diff = second_half - first_half
664
+ if diff > 0.1:
665
+ health_trend = "improving"
666
+ elif diff < -0.1:
667
+ health_trend = "declining"
668
+
669
+ # Detect common patterns across sessions
670
+ patterns = self.detect_patterns()
671
+ optimal_patterns = [p for p in patterns if p.category == "optimal"][:5]
672
+
673
+ return TrackTranscriptStats(
674
+ track_id=track_id,
675
+ session_count=len(all_session_ids),
676
+ total_user_messages=total_user_messages,
677
+ total_tool_calls=total_tool_calls,
678
+ total_duration_seconds=total_duration,
679
+ session_ids=all_session_ids,
680
+ session_healths=session_healths,
681
+ tool_frequency=dict(combined_tool_freq.most_common()),
682
+ tool_transitions=combined_transitions,
683
+ common_patterns=optimal_patterns,
684
+ anti_patterns_detected=anti_pattern_count,
685
+ avg_session_health=avg_health,
686
+ health_trend=health_trend,
687
+ )
688
+
689
+ def _categorize_pattern(self, sequence: list[str]) -> str:
690
+ """Categorize a pattern as optimal, anti-pattern, or neutral."""
691
+ for pattern, _ in self.OPTIMAL_PATTERNS:
692
+ if sequence == pattern:
693
+ return "optimal"
694
+
695
+ for pattern, _ in self.ANTI_PATTERNS:
696
+ if sequence == pattern:
697
+ return "anti-pattern"
698
+
699
+ return "neutral"