emdash-core 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. emdash_core/__init__.py +3 -0
  2. emdash_core/agent/__init__.py +37 -0
  3. emdash_core/agent/agents.py +225 -0
  4. emdash_core/agent/code_reviewer.py +476 -0
  5. emdash_core/agent/compaction.py +143 -0
  6. emdash_core/agent/context_manager.py +140 -0
  7. emdash_core/agent/events.py +338 -0
  8. emdash_core/agent/handlers.py +224 -0
  9. emdash_core/agent/inprocess_subagent.py +377 -0
  10. emdash_core/agent/mcp/__init__.py +50 -0
  11. emdash_core/agent/mcp/client.py +346 -0
  12. emdash_core/agent/mcp/config.py +302 -0
  13. emdash_core/agent/mcp/manager.py +496 -0
  14. emdash_core/agent/mcp/tool_factory.py +213 -0
  15. emdash_core/agent/prompts/__init__.py +38 -0
  16. emdash_core/agent/prompts/main_agent.py +104 -0
  17. emdash_core/agent/prompts/subagents.py +131 -0
  18. emdash_core/agent/prompts/workflow.py +136 -0
  19. emdash_core/agent/providers/__init__.py +34 -0
  20. emdash_core/agent/providers/base.py +143 -0
  21. emdash_core/agent/providers/factory.py +80 -0
  22. emdash_core/agent/providers/models.py +220 -0
  23. emdash_core/agent/providers/openai_provider.py +463 -0
  24. emdash_core/agent/providers/transformers_provider.py +217 -0
  25. emdash_core/agent/research/__init__.py +81 -0
  26. emdash_core/agent/research/agent.py +143 -0
  27. emdash_core/agent/research/controller.py +254 -0
  28. emdash_core/agent/research/critic.py +428 -0
  29. emdash_core/agent/research/macros.py +469 -0
  30. emdash_core/agent/research/planner.py +449 -0
  31. emdash_core/agent/research/researcher.py +436 -0
  32. emdash_core/agent/research/state.py +523 -0
  33. emdash_core/agent/research/synthesizer.py +594 -0
  34. emdash_core/agent/reviewer_profile.py +475 -0
  35. emdash_core/agent/rules.py +123 -0
  36. emdash_core/agent/runner.py +601 -0
  37. emdash_core/agent/session.py +262 -0
  38. emdash_core/agent/spec_schema.py +66 -0
  39. emdash_core/agent/specification.py +479 -0
  40. emdash_core/agent/subagent.py +397 -0
  41. emdash_core/agent/subagent_prompts.py +13 -0
  42. emdash_core/agent/toolkit.py +482 -0
  43. emdash_core/agent/toolkits/__init__.py +64 -0
  44. emdash_core/agent/toolkits/base.py +96 -0
  45. emdash_core/agent/toolkits/explore.py +47 -0
  46. emdash_core/agent/toolkits/plan.py +55 -0
  47. emdash_core/agent/tools/__init__.py +141 -0
  48. emdash_core/agent/tools/analytics.py +436 -0
  49. emdash_core/agent/tools/base.py +131 -0
  50. emdash_core/agent/tools/coding.py +484 -0
  51. emdash_core/agent/tools/github_mcp.py +592 -0
  52. emdash_core/agent/tools/history.py +13 -0
  53. emdash_core/agent/tools/modes.py +153 -0
  54. emdash_core/agent/tools/plan.py +206 -0
  55. emdash_core/agent/tools/plan_write.py +135 -0
  56. emdash_core/agent/tools/search.py +412 -0
  57. emdash_core/agent/tools/spec.py +341 -0
  58. emdash_core/agent/tools/task.py +262 -0
  59. emdash_core/agent/tools/task_output.py +204 -0
  60. emdash_core/agent/tools/tasks.py +454 -0
  61. emdash_core/agent/tools/traversal.py +588 -0
  62. emdash_core/agent/tools/web.py +179 -0
  63. emdash_core/analytics/__init__.py +5 -0
  64. emdash_core/analytics/engine.py +1286 -0
  65. emdash_core/api/__init__.py +5 -0
  66. emdash_core/api/agent.py +308 -0
  67. emdash_core/api/agents.py +154 -0
  68. emdash_core/api/analyze.py +264 -0
  69. emdash_core/api/auth.py +173 -0
  70. emdash_core/api/context.py +77 -0
  71. emdash_core/api/db.py +121 -0
  72. emdash_core/api/embed.py +131 -0
  73. emdash_core/api/feature.py +143 -0
  74. emdash_core/api/health.py +93 -0
  75. emdash_core/api/index.py +162 -0
  76. emdash_core/api/plan.py +110 -0
  77. emdash_core/api/projectmd.py +210 -0
  78. emdash_core/api/query.py +320 -0
  79. emdash_core/api/research.py +122 -0
  80. emdash_core/api/review.py +161 -0
  81. emdash_core/api/router.py +76 -0
  82. emdash_core/api/rules.py +116 -0
  83. emdash_core/api/search.py +119 -0
  84. emdash_core/api/spec.py +99 -0
  85. emdash_core/api/swarm.py +223 -0
  86. emdash_core/api/tasks.py +109 -0
  87. emdash_core/api/team.py +120 -0
  88. emdash_core/auth/__init__.py +17 -0
  89. emdash_core/auth/github.py +389 -0
  90. emdash_core/config.py +74 -0
  91. emdash_core/context/__init__.py +52 -0
  92. emdash_core/context/models.py +50 -0
  93. emdash_core/context/providers/__init__.py +11 -0
  94. emdash_core/context/providers/base.py +74 -0
  95. emdash_core/context/providers/explored_areas.py +183 -0
  96. emdash_core/context/providers/touched_areas.py +360 -0
  97. emdash_core/context/registry.py +73 -0
  98. emdash_core/context/reranker.py +199 -0
  99. emdash_core/context/service.py +260 -0
  100. emdash_core/context/session.py +352 -0
  101. emdash_core/core/__init__.py +104 -0
  102. emdash_core/core/config.py +454 -0
  103. emdash_core/core/exceptions.py +55 -0
  104. emdash_core/core/models.py +265 -0
  105. emdash_core/core/review_config.py +57 -0
  106. emdash_core/db/__init__.py +67 -0
  107. emdash_core/db/auth.py +134 -0
  108. emdash_core/db/models.py +91 -0
  109. emdash_core/db/provider.py +222 -0
  110. emdash_core/db/providers/__init__.py +5 -0
  111. emdash_core/db/providers/supabase.py +452 -0
  112. emdash_core/embeddings/__init__.py +24 -0
  113. emdash_core/embeddings/indexer.py +534 -0
  114. emdash_core/embeddings/models.py +192 -0
  115. emdash_core/embeddings/providers/__init__.py +7 -0
  116. emdash_core/embeddings/providers/base.py +112 -0
  117. emdash_core/embeddings/providers/fireworks.py +141 -0
  118. emdash_core/embeddings/providers/openai.py +104 -0
  119. emdash_core/embeddings/registry.py +146 -0
  120. emdash_core/embeddings/service.py +215 -0
  121. emdash_core/graph/__init__.py +26 -0
  122. emdash_core/graph/builder.py +134 -0
  123. emdash_core/graph/connection.py +692 -0
  124. emdash_core/graph/schema.py +416 -0
  125. emdash_core/graph/writer.py +667 -0
  126. emdash_core/ingestion/__init__.py +7 -0
  127. emdash_core/ingestion/change_detector.py +150 -0
  128. emdash_core/ingestion/git/__init__.py +5 -0
  129. emdash_core/ingestion/git/commit_analyzer.py +196 -0
  130. emdash_core/ingestion/github/__init__.py +6 -0
  131. emdash_core/ingestion/github/pr_fetcher.py +296 -0
  132. emdash_core/ingestion/github/task_extractor.py +100 -0
  133. emdash_core/ingestion/orchestrator.py +540 -0
  134. emdash_core/ingestion/parsers/__init__.py +10 -0
  135. emdash_core/ingestion/parsers/base_parser.py +66 -0
  136. emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
  137. emdash_core/ingestion/parsers/class_extractor.py +154 -0
  138. emdash_core/ingestion/parsers/function_extractor.py +202 -0
  139. emdash_core/ingestion/parsers/import_analyzer.py +119 -0
  140. emdash_core/ingestion/parsers/python_parser.py +123 -0
  141. emdash_core/ingestion/parsers/registry.py +72 -0
  142. emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
  143. emdash_core/ingestion/parsers/typescript_parser.py +278 -0
  144. emdash_core/ingestion/repository.py +346 -0
  145. emdash_core/models/__init__.py +38 -0
  146. emdash_core/models/agent.py +68 -0
  147. emdash_core/models/index.py +77 -0
  148. emdash_core/models/query.py +113 -0
  149. emdash_core/planning/__init__.py +7 -0
  150. emdash_core/planning/agent_api.py +413 -0
  151. emdash_core/planning/context_builder.py +265 -0
  152. emdash_core/planning/feature_context.py +232 -0
  153. emdash_core/planning/feature_expander.py +646 -0
  154. emdash_core/planning/llm_explainer.py +198 -0
  155. emdash_core/planning/similarity.py +509 -0
  156. emdash_core/planning/team_focus.py +821 -0
  157. emdash_core/server.py +153 -0
  158. emdash_core/sse/__init__.py +5 -0
  159. emdash_core/sse/stream.py +196 -0
  160. emdash_core/swarm/__init__.py +17 -0
  161. emdash_core/swarm/merge_agent.py +383 -0
  162. emdash_core/swarm/session_manager.py +274 -0
  163. emdash_core/swarm/swarm_runner.py +226 -0
  164. emdash_core/swarm/task_definition.py +137 -0
  165. emdash_core/swarm/worker_spawner.py +319 -0
  166. emdash_core/swarm/worktree_manager.py +278 -0
  167. emdash_core/templates/__init__.py +10 -0
  168. emdash_core/templates/defaults/agent-builder.md.template +82 -0
  169. emdash_core/templates/defaults/focus.md.template +115 -0
  170. emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
  171. emdash_core/templates/defaults/pr-review.md.template +80 -0
  172. emdash_core/templates/defaults/project.md.template +85 -0
  173. emdash_core/templates/defaults/research_critic.md.template +112 -0
  174. emdash_core/templates/defaults/research_planner.md.template +85 -0
  175. emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
  176. emdash_core/templates/defaults/reviewer.md.template +81 -0
  177. emdash_core/templates/defaults/spec.md.template +41 -0
  178. emdash_core/templates/defaults/tasks.md.template +78 -0
  179. emdash_core/templates/loader.py +296 -0
  180. emdash_core/utils/__init__.py +45 -0
  181. emdash_core/utils/git.py +84 -0
  182. emdash_core/utils/image.py +502 -0
  183. emdash_core/utils/logger.py +51 -0
  184. emdash_core-0.1.7.dist-info/METADATA +35 -0
  185. emdash_core-0.1.7.dist-info/RECORD +187 -0
  186. emdash_core-0.1.7.dist-info/WHEEL +4 -0
  187. emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,821 @@
1
+ """Team focus analyzer using graph data and LLM synthesis."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import datetime, timedelta, timezone
5
+ from typing import Optional
6
+
7
+ from ..analytics.engine import AnalyticsEngine
8
+ from ..graph.connection import KuzuConnection, get_connection
9
+ from ..ingestion.github.pr_fetcher import PRFetcher
10
+ from .feature_expander import FeatureExpander, FeatureGraph
11
+ from ..agent.providers import get_provider
12
+ from ..agent.providers.factory import DEFAULT_MODEL
13
+ from ..templates.loader import load_template_for_agent
14
+ from ..utils.logger import log
15
+
16
+
17
+ @dataclass
18
+ class TeamFocusData:
19
+ """Data collected for team focus analysis."""
20
+
21
+ # Repository info for GitHub links
22
+ github_url: Optional[str] = None
23
+
24
+ # Area focus (directories)
25
+ hot_areas: list[dict] = field(default_factory=list)
26
+
27
+ # File focus (individual files)
28
+ hot_files: list[dict] = field(default_factory=list)
29
+
30
+ # Code context for hot files (classes, functions, docstrings)
31
+ hot_file_code_context: list[dict] = field(default_factory=list)
32
+
33
+ # Open PRs (work in progress)
34
+ open_prs: list[dict] = field(default_factory=list)
35
+
36
+ # Recently merged PRs
37
+ merged_prs: list[dict] = field(default_factory=list)
38
+
39
+ # Detailed graph context from PR files (code entities, call graph)
40
+ pr_graph_context: list[dict] = field(default_factory=list)
41
+
42
+ # Contributors active in the time window
43
+ active_contributors: list[dict] = field(default_factory=list)
44
+
45
+ def to_dict(self) -> dict:
46
+ """Convert to dictionary."""
47
+ return {
48
+ "github_url": self.github_url,
49
+ "hot_areas": self.hot_areas,
50
+ "hot_files": self.hot_files,
51
+ "hot_file_code_context": self.hot_file_code_context,
52
+ "open_prs": self.open_prs,
53
+ "merged_prs": self.merged_prs,
54
+ "pr_graph_context": self.pr_graph_context,
55
+ "active_contributors": self.active_contributors,
56
+ }
57
+
58
+
59
+ def _get_system_prompt() -> str:
60
+ """Get the system prompt for team focus analysis.
61
+
62
+ Loads the focus template if available, otherwise uses a fallback.
63
+ """
64
+ try:
65
+ return load_template_for_agent("focus")
66
+ except Exception as e:
67
+ log.warning(f"Could not load focus template, using fallback: {e}")
68
+ return """You are a senior engineering manager analyzing your team's recent activity and focus areas.
69
+
70
+ You have access to DETAILED CODE CONTEXT including:
71
+ - What classes and functions are in each hot file
72
+ - What code entities each PR is modifying
73
+ - Docstrings explaining what the code does
74
+ - Call graph relationships showing code flow
75
+
76
+ YOUR JOB: Use this code context to explain WHAT the team is actually building/changing, not just which files they're touching.
77
+
78
+ ## LINK FORMATTING (REQUIRED)
79
+
80
+ You MUST use markdown links for PRs and contributors:
81
+ - PRs: `[PR #123](https://github.com/owner/repo/pull/123)` - use the GitHub URL provided in the data
82
+ - Contributors: `[@username](https://github.com/username)` - always link to their GitHub profile
83
+
84
+ Example: "[@liorfo](https://github.com/liorfo) opened [PR #1847](https://github.com/wix-private/picasso/pull/1847)"
85
+
86
+ ## ANALYSIS GUIDELINES
87
+
88
+ For each work stream, explain:
89
+ 1. **WHAT** is being built/changed (based on class/function names and docstrings)
90
+ 2. **WHY** it matters (infer from the code purpose)
91
+ 3. **HOW** it fits together (use call graph info)
92
+
93
+ Example of GOOD analysis:
94
+ "The team is building a **streaming response handler** for the AI coder:
95
+ - `StreamProcessor` class handles chunked LLM responses
96
+ - `createAppStreamStep()` orchestrates the streaming workflow
97
+ - Changes to `WorkflowManager` integrate this into the main pipeline"
98
+
99
+ Example of BAD analysis (too vague):
100
+ "The team is working on packages/picasso-mastra-server/"
101
+
102
+ When describing PRs, explain WHAT the code change accomplishes based on:
103
+ - The PR description
104
+ - The function/class names being modified
105
+ - Their docstrings
106
+
107
+ Be specific about the technical changes. Use the code entity information provided.
108
+
109
+ Output in markdown with these sections:
110
+ 1. **Executive Summary** - 2-3 sentences on main focus
111
+ 2. **Work Streams** - Grouped by theme, with specific code details
112
+ 3. **PR Analysis** - What each is changing (use code context!) - USE PR LINKS
113
+ 4. **Key Contributors** - Who's working on what - USE CONTRIBUTOR LINKS
114
+ 5. **Technical Insights** - Patterns, risks, or recommendations"""
115
+
116
+
117
+ class TeamFocusAnalyzer:
118
+ """Analyzes team focus and work-in-progress using graph data and LLM."""
119
+
120
+ def __init__(
121
+ self,
122
+ connection: Optional[KuzuConnection] = None,
123
+ model: str = DEFAULT_MODEL,
124
+ ):
125
+ """Initialize team focus analyzer.
126
+
127
+ Args:
128
+ connection: Neo4j connection. If None, uses global connection.
129
+ model: LLM model to use for synthesis.
130
+ """
131
+ self.connection = connection or get_connection()
132
+ self.model = model
133
+ self.analytics = AnalyticsEngine(self.connection)
134
+ self.expander = FeatureExpander(self.connection)
135
+ self._provider = None
136
+
137
+ @property
138
+ def provider(self):
139
+ """Get LLM provider lazily."""
140
+ if self._provider is None:
141
+ self._provider = get_provider(self.model)
142
+ return self._provider
143
+
144
+ def _get_github_url(self) -> Optional[str]:
145
+ """Get the GitHub repository URL from the database.
146
+
147
+ Returns:
148
+ GitHub URL like 'https://github.com/owner/repo' or None if not found
149
+ """
150
+ import re
151
+
152
+ with self.connection.session() as session:
153
+ result = session.run(
154
+ """
155
+ MATCH (r:Repository)
156
+ RETURN r.owner as owner, r.name as name, r.url as url
157
+ LIMIT 1
158
+ """
159
+ )
160
+ record = result.single()
161
+ if not record:
162
+ return None
163
+
164
+ # Try owner/name first
165
+ if record["owner"] and record["name"]:
166
+ return f"https://github.com/{record['owner']}/{record['name']}"
167
+
168
+ # Fall back to parsing the URL
169
+ url = record.get("url", "")
170
+ if not url:
171
+ return None
172
+
173
+ # Parse SSH format: git@github.com:owner/repo.git
174
+ ssh_match = re.match(r"git@github\.com:([^/]+)/([^/]+?)(?:\.git)?$", url)
175
+ if ssh_match:
176
+ return f"https://github.com/{ssh_match.group(1)}/{ssh_match.group(2)}"
177
+
178
+ # Parse HTTPS format: https://github.com/owner/repo.git
179
+ https_match = re.match(r"https://github\.com/([^/]+)/([^/]+?)(?:\.git)?/?$", url)
180
+ if https_match:
181
+ return f"https://github.com/{https_match.group(1)}/{https_match.group(2)}"
182
+
183
+ return None
184
+
185
+ def gather_data(
186
+ self,
187
+ days: int = 14,
188
+ top_areas: int = 8,
189
+ top_files: int = 25,
190
+ max_prs: int = 20,
191
+ include_graph_context: bool = True,
192
+ ) -> TeamFocusData:
193
+ """Gather all data needed for team focus analysis.
194
+
195
+ Args:
196
+ days: Time window for recency scoring
197
+ top_areas: Number of top areas to include
198
+ top_files: Number of top files to include (primary focus)
199
+ max_prs: Maximum number of PRs to include
200
+ include_graph_context: Whether to expand PR files through graph
201
+
202
+ Returns:
203
+ TeamFocusData with all gathered information
204
+ """
205
+ log.info(f"Gathering team focus data (last {days} days)...")
206
+
207
+ data = TeamFocusData()
208
+
209
+ # Get GitHub URL for generating links
210
+ data.github_url = self._get_github_url()
211
+ if data.github_url:
212
+ log.info(f"GitHub repo: {data.github_url}")
213
+
214
+ # 1. Get hot files FIRST (sorted by recent activity) - this is the primary data
215
+ files = self.analytics.compute_file_importance(days=days, limit=top_files * 2)
216
+ # Sort by recent commits
217
+ files_with_recent = [f for f in files if f.get("recent_commits", 0) > 0]
218
+ files_with_recent.sort(key=lambda x: x.get("recent_commits", 0), reverse=True)
219
+ data.hot_files = files_with_recent[:top_files]
220
+ log.info(f"Found {len(data.hot_files)} hot files")
221
+
222
+ # 2. Get hot areas (sorted by focus) - secondary/summary data
223
+ areas = self.analytics.compute_area_importance(
224
+ depth=2, days=days, limit=top_areas * 2
225
+ )
226
+ # Sort by focus percentage and filter to those with recent activity
227
+ areas_with_focus = [a for a in areas if a.get("focus_pct", 0) > 0]
228
+ areas_with_focus.sort(key=lambda x: x.get("focus_pct", 0), reverse=True)
229
+ data.hot_areas = areas_with_focus[:top_areas]
230
+ log.info(f"Found {len(data.hot_areas)} hot areas")
231
+
232
+ # 3. Get open PRs
233
+ data.open_prs = self._get_prs(state="open", limit=max_prs)
234
+ log.info(f"Found {len(data.open_prs)} open PRs")
235
+
236
+ # 4. Get recently merged PRs
237
+ data.merged_prs = self._get_prs(state="merged", limit=max_prs)
238
+ log.info(f"Found {len(data.merged_prs)} merged PRs")
239
+
240
+ # 5. Get active contributors
241
+ data.active_contributors = self._get_active_contributors(days=days)
242
+ log.info(f"Found {len(data.active_contributors)} active contributors")
243
+
244
+ # 6. Optionally expand PR files through graph for context
245
+ if include_graph_context:
246
+ # Get detailed PR context
247
+ data.pr_graph_context = self._get_pr_graph_context(
248
+ data.open_prs + data.merged_prs[:10] # More PRs for better coverage
249
+ )
250
+ log.info(f"Gathered graph context for {len(data.pr_graph_context)} PRs")
251
+
252
+ # Get code context for hot files
253
+ data.hot_file_code_context = self._get_hot_file_code_context(
254
+ data.hot_files, limit=12
255
+ )
256
+ log.info(f"Gathered code context for {len(data.hot_file_code_context)} hot files")
257
+
258
+ return data
259
+
260
+ def _get_prs(self, state: str = "all", limit: int = 20) -> list[dict]:
261
+ """Get PRs from the graph database, with GitHub fallback.
262
+
263
+ Args:
264
+ state: PR state (open, merged, closed, all)
265
+ limit: Maximum PRs to return
266
+
267
+ Returns:
268
+ List of PR dictionaries
269
+ """
270
+ # First try database
271
+ prs = self._get_prs_from_db(state, limit)
272
+
273
+ # If database is empty, try fetching from GitHub
274
+ if not prs:
275
+ log.info(f"No PRs in database, fetching from GitHub...")
276
+ prs = self._fetch_prs_from_github(state, limit)
277
+
278
+ return prs
279
+
280
+ def _get_prs_from_db(self, state: str, limit: int) -> list[dict]:
281
+ """Get PRs from the graph database."""
282
+ with self.connection.session() as session:
283
+ state_filter = ""
284
+ if state == "open":
285
+ state_filter = "AND pr.state = 'open'"
286
+ elif state == "merged":
287
+ state_filter = "AND pr.state = 'merged'"
288
+ elif state == "closed":
289
+ state_filter = "AND pr.state IN ['closed', 'merged']"
290
+
291
+ result = session.run(
292
+ f"""
293
+ MATCH (pr:PullRequest)
294
+ WHERE pr.number IS NOT NULL
295
+ {state_filter}
296
+ OPTIONAL MATCH (pr)-[:PR_MODIFIES]->(f:File)
297
+ WITH pr, collect(DISTINCT f.path) as files
298
+ RETURN pr.number as number,
299
+ pr.title as title,
300
+ pr.author as author,
301
+ pr.state as state,
302
+ pr.created_at as created_at,
303
+ pr.merged_at as merged_at,
304
+ pr.additions as additions,
305
+ pr.deletions as deletions,
306
+ pr.description as description,
307
+ size(files) as files_count,
308
+ files[0:10] as files
309
+ ORDER BY COALESCE(pr.merged_at, pr.created_at) DESC
310
+ LIMIT $limit
311
+ """,
312
+ limit=limit,
313
+ )
314
+
315
+ return [dict(record) for record in result]
316
+
317
+ def _fetch_prs_from_github(self, state: str, limit: int) -> list[dict]:
318
+ """Fetch PRs directly from GitHub using gh CLI.
319
+
320
+ Falls back to this when database has no PR data.
321
+ """
322
+ # Get GitHub URL to extract owner/repo
323
+ github_url = self._get_github_url()
324
+ if not github_url:
325
+ log.warning("Cannot fetch PRs: GitHub URL not available")
326
+ return []
327
+
328
+ # Extract owner/repo from URL
329
+ # URL format: https://github.com/owner/repo
330
+ parts = github_url.replace("https://github.com/", "").split("/")
331
+ if len(parts) < 2:
332
+ log.warning(f"Cannot parse GitHub URL: {github_url}")
333
+ return []
334
+
335
+ owner, repo = parts[0], parts[1]
336
+
337
+ try:
338
+ fetcher = PRFetcher(owner=owner, repo=repo)
339
+ if not fetcher.gh_path:
340
+ log.warning("gh CLI not available, cannot fetch PRs")
341
+ return []
342
+
343
+ # Fetch PRs from GitHub
344
+ pr_entities = fetcher.fetch_prs(state=state, limit=limit)
345
+
346
+ # Convert to dict format expected by the analyzer
347
+ prs = []
348
+ for pr in pr_entities:
349
+ files = pr.files_changed or []
350
+ prs.append({
351
+ "number": pr.number,
352
+ "title": pr.title,
353
+ "author": pr.author,
354
+ "state": pr.state,
355
+ "created_at": pr.created_at,
356
+ "merged_at": pr.merged_at,
357
+ "additions": pr.additions,
358
+ "deletions": pr.deletions,
359
+ "description": pr.description,
360
+ "files_count": len(files),
361
+ "files": files[:10],
362
+ })
363
+
364
+ log.info(f"Fetched {len(prs)} PRs from GitHub")
365
+ return prs
366
+
367
+ except Exception as e:
368
+ log.warning(f"Failed to fetch PRs from GitHub: {e}")
369
+ return []
370
+
371
+ def _get_active_contributors(self, days: int = 14) -> list[dict]:
372
+ """Get contributors active in the time window.
373
+
374
+ Args:
375
+ days: Time window in days
376
+
377
+ Returns:
378
+ List of contributor dictionaries
379
+ """
380
+ # Calculate cutoff timestamp
381
+ cutoff = datetime.now(timezone.utc) - timedelta(days=days)
382
+
383
+ with self.connection.session() as session:
384
+ # Get contributors from recent commits
385
+ result = session.run(
386
+ """
387
+ MATCH (c:GitCommit)-[:AUTHORED_BY]->(a:Author)
388
+ WHERE c.timestamp >= $cutoff
389
+ WITH a, count(c) as commit_count
390
+ OPTIONAL MATCH (c2:GitCommit)-[:AUTHORED_BY]->(a)
391
+ WHERE c2.timestamp >= $cutoff
392
+ OPTIONAL MATCH (c2)-[:COMMIT_MODIFIES]->(f:File)
393
+ WITH a, commit_count, collect(DISTINCT f.path) as files
394
+ RETURN a.name as name,
395
+ a.email as email,
396
+ commit_count,
397
+ size(files) as files_touched,
398
+ files[0:5] as sample_files
399
+ ORDER BY commit_count DESC
400
+ LIMIT 15
401
+ """,
402
+ cutoff=cutoff,
403
+ )
404
+
405
+ contributors = [dict(record) for record in result]
406
+
407
+ # Also get contributors from PRs
408
+ pr_result = session.run(
409
+ """
410
+ MATCH (pr:PullRequest)
411
+ WHERE pr.created_at >= $cutoff
412
+ OR pr.merged_at >= $cutoff
413
+ WITH pr.author as author, count(pr) as pr_count
414
+ WHERE author IS NOT NULL
415
+ RETURN author as name, pr_count
416
+ ORDER BY pr_count DESC
417
+ LIMIT 10
418
+ """,
419
+ cutoff=cutoff,
420
+ )
421
+
422
+ # Merge PR authors into contributors
423
+ pr_authors = {r["name"]: r["pr_count"] for r in pr_result}
424
+ for contrib in contributors:
425
+ name = contrib.get("name", "")
426
+ if name in pr_authors:
427
+ contrib["pr_count"] = pr_authors[name]
428
+
429
+ return contributors
430
+
431
+ def _get_pr_graph_context(self, prs: list[dict], max_files_per_pr: int = 5) -> list[dict]:
432
+ """Get detailed graph context for PR files including code insights.
433
+
434
+ Args:
435
+ prs: List of PR dictionaries
436
+ max_files_per_pr: Maximum files to expand per PR
437
+
438
+ Returns:
439
+ List of detailed context dictionaries per PR
440
+ """
441
+ contexts = []
442
+
443
+ for pr in prs[:15]: # Process more PRs for better coverage
444
+ pr_context = {
445
+ "pr_number": pr.get("number"),
446
+ "pr_title": pr.get("title"),
447
+ "pr_description": pr.get("description", ""),
448
+ "pr_author": pr.get("author", "unknown"),
449
+ "files_changed": pr.get("files", [])[:10],
450
+ "additions": pr.get("additions", 0),
451
+ "deletions": pr.get("deletions", 0),
452
+ "code_entities": [],
453
+ "call_graph_samples": [],
454
+ }
455
+
456
+ files = pr.get("files", [])[:max_files_per_pr]
457
+
458
+ for file_path in files:
459
+ if not file_path:
460
+ continue
461
+
462
+ try:
463
+ graph = self.expander.expand_from_file(file_path, max_hops=1)
464
+
465
+ # Extract filename for context
466
+ filename = file_path.split("/")[-1] if "/" in file_path else file_path
467
+
468
+ # Extract classes with more detail
469
+ for cls in graph.classes[:3]:
470
+ pr_context["code_entities"].append({
471
+ "type": "Class",
472
+ "name": cls.get("name"),
473
+ "file": filename,
474
+ "docstring": (cls.get("docstring") or "")[:200],
475
+ "qualified_name": cls.get("qualified_name", ""),
476
+ })
477
+
478
+ # Extract functions with more detail
479
+ for func in graph.functions[:6]:
480
+ pr_context["code_entities"].append({
481
+ "type": "Function",
482
+ "name": func.get("name"),
483
+ "file": filename,
484
+ "docstring": (func.get("docstring") or "")[:200],
485
+ "qualified_name": func.get("qualified_name", ""),
486
+ })
487
+
488
+ # Extract call graph samples to understand code flow
489
+ for call in graph.call_graph[:5]:
490
+ pr_context["call_graph_samples"].append({
491
+ "caller": call.get("caller", ""),
492
+ "callee": call.get("callee", ""),
493
+ })
494
+
495
+ except Exception as e:
496
+ log.debug(f"Could not expand file {file_path}: {e}")
497
+ continue
498
+
499
+ contexts.append(pr_context)
500
+
501
+ return contexts
502
+
503
+ def _get_hot_file_code_context(self, hot_files: list[dict], limit: int = 10) -> list[dict]:
504
+ """Get code context for the hottest files.
505
+
506
+ Args:
507
+ hot_files: List of hot file dictionaries
508
+ limit: Maximum files to analyze
509
+
510
+ Returns:
511
+ List of file context with code entities
512
+ """
513
+ file_contexts = []
514
+
515
+ for f in hot_files[:limit]:
516
+ file_path = f.get("file_path", "")
517
+ if not file_path:
518
+ continue
519
+
520
+ file_ctx = {
521
+ "file_path": file_path,
522
+ "filename": file_path.split("/")[-1] if "/" in file_path else file_path,
523
+ "recent_commits": f.get("recent_commits", 0),
524
+ "classes": [],
525
+ "functions": [],
526
+ }
527
+
528
+ try:
529
+ graph = self.expander.expand_from_file(file_path, max_hops=1)
530
+
531
+ for cls in graph.classes[:3]:
532
+ file_ctx["classes"].append({
533
+ "name": cls.get("name"),
534
+ "docstring": (cls.get("docstring") or "")[:150],
535
+ })
536
+
537
+ for func in graph.functions[:5]:
538
+ file_ctx["functions"].append({
539
+ "name": func.get("name"),
540
+ "docstring": (func.get("docstring") or "")[:150],
541
+ })
542
+
543
+ if file_ctx["classes"] or file_ctx["functions"]:
544
+ file_contexts.append(file_ctx)
545
+
546
+ except Exception as e:
547
+ log.debug(f"Could not get context for {file_path}: {e}")
548
+ continue
549
+
550
+ return file_contexts
551
+
552
+ def _build_prompt(self, data: TeamFocusData, days: int) -> str:
553
+ """Build the LLM prompt from gathered data.
554
+
555
+ Args:
556
+ data: TeamFocusData with all gathered information
557
+ days: Time window for context
558
+
559
+ Returns:
560
+ Formatted prompt string
561
+ """
562
+ sections = []
563
+
564
+ sections.append(f"# Team Focus Analysis (Last {days} Days)\n")
565
+
566
+ # Add GitHub URL for link generation
567
+ if data.github_url:
568
+ sections.append(f"**Repository:** {data.github_url}")
569
+ sections.append("")
570
+ sections.append("**IMPORTANT - Use these link formats in your output:**")
571
+ sections.append(f"- PRs: `[PR #123]({data.github_url}/pull/123)`")
572
+ sections.append(f"- Contributors: `[@username](https://github.com/username)`")
573
+ sections.append("")
574
+
575
+ # Hot Files FIRST - this is the PRIMARY data, most specific and actionable
576
+ if data.hot_files:
577
+ sections.append("## HOT FILES - Most Active Individual Files")
578
+ sections.append("")
579
+ sections.append("**IMPORTANT: These specific files have the most recent activity. Reference these in your summary!**")
580
+ sections.append("")
581
+ sections.append("| File | Recent Commits | Total Commits | Authors |")
582
+ sections.append("|------|----------------|---------------|---------|")
583
+ for f in data.hot_files[:20]: # Show up to 20 files
584
+ path = f.get("file_path", "unknown")
585
+ # Shorten path for display but keep enough context
586
+ if "/" in path:
587
+ parts = path.split("/")
588
+ # Keep last 4 parts for better context
589
+ path = "/".join(parts[-4:]) if len(parts) > 4 else path
590
+ recent = f.get("recent_commits", 0)
591
+ total = f.get("commits", 0)
592
+ authors = f.get("authors", 0)
593
+ sections.append(f"| `{path}` | {recent} | {total} | {authors} |")
594
+ sections.append("")
595
+
596
+ # Hot Areas - summary view
597
+ if data.hot_areas:
598
+ sections.append("## Hot Areas (Directory Summary)")
599
+ sections.append("")
600
+ sections.append("Aggregated view of where activity is concentrated:")
601
+ sections.append("")
602
+ for area in data.hot_areas:
603
+ path = area.get("path", "unknown")
604
+ focus = area.get("focus_pct", 0)
605
+ commits = area.get("total_commits", 0)
606
+ authors = area.get("unique_authors", 0)
607
+ file_count = area.get("file_count", 0)
608
+ sections.append(
609
+ f"- **{path}**: {focus:.1f}% of recent commits, "
610
+ f"{commits} total commits, {file_count} files, {authors} contributors"
611
+ )
612
+ sections.append("")
613
+
614
+ # Code context for hot files - what's IN those files
615
+ if data.hot_file_code_context:
616
+ sections.append("## Code Context for Hot Files")
617
+ sections.append("")
618
+ sections.append("What code entities are in the most active files:")
619
+ sections.append("")
620
+ for fctx in data.hot_file_code_context[:10]:
621
+ filename = fctx.get("filename", "unknown")
622
+ recent = fctx.get("recent_commits", 0)
623
+ sections.append(f"### `{filename}` ({recent} recent commits)")
624
+
625
+ classes = fctx.get("classes", [])
626
+ if classes:
627
+ for cls in classes[:2]:
628
+ name = cls.get("name", "")
629
+ doc = cls.get("docstring", "")
630
+ if doc:
631
+ sections.append(f"- **Class `{name}`**: {doc}")
632
+ else:
633
+ sections.append(f"- **Class `{name}`**")
634
+
635
+ funcs = fctx.get("functions", [])
636
+ if funcs:
637
+ for func in funcs[:4]:
638
+ name = func.get("name", "")
639
+ doc = func.get("docstring", "")
640
+ if doc:
641
+ sections.append(f"- `{name}()`: {doc}")
642
+ else:
643
+ sections.append(f"- `{name}()`")
644
+ sections.append("")
645
+
646
+ # Detailed PR context with code entities
647
+ if data.pr_graph_context:
648
+ sections.append("## PR Deep Dive - What Code is Being Changed")
649
+ sections.append("")
650
+ sections.append("Detailed analysis of what each PR is modifying:")
651
+ sections.append("")
652
+
653
+ for pr_ctx in data.pr_graph_context[:12]:
654
+ pr_num = pr_ctx.get("pr_number", "?")
655
+ pr_title = pr_ctx.get("pr_title", "Unknown")
656
+ pr_desc = pr_ctx.get("pr_description", "")
657
+ pr_author = pr_ctx.get("pr_author", "unknown")
658
+ additions = pr_ctx.get("additions", 0)
659
+ deletions = pr_ctx.get("deletions", 0)
660
+ files = pr_ctx.get("files_changed", [])
661
+
662
+ sections.append(f"### PR #{pr_num}: {pr_title}")
663
+ sections.append(f"**Author:** @{pr_author} | **Changes:** +{additions}/-{deletions}")
664
+
665
+ if pr_desc:
666
+ # Clean and truncate description
667
+ desc = pr_desc.replace("\n", " ").strip()[:300]
668
+ sections.append(f"**Description:** {desc}")
669
+
670
+ if files:
671
+ file_names = [f.split("/")[-1] for f in files[:5]]
672
+ sections.append(f"**Files:** {', '.join(file_names)}")
673
+
674
+ entities = pr_ctx.get("code_entities", [])
675
+ if entities:
676
+ sections.append("**Code being modified:**")
677
+ for ent in entities[:6]:
678
+ etype = ent.get("type", "")
679
+ ename = ent.get("name", "")
680
+ efile = ent.get("file", "")
681
+ edoc = ent.get("docstring", "")
682
+ if edoc:
683
+ sections.append(f" - {etype} `{ename}` in {efile}: {edoc[:100]}")
684
+ else:
685
+ sections.append(f" - {etype} `{ename}` in {efile}")
686
+
687
+ # Show call graph if available
688
+ calls = pr_ctx.get("call_graph_samples", [])
689
+ if calls:
690
+ call_strs = [f"{c['caller']}->{c['callee']}" for c in calls[:3]]
691
+ sections.append(f"**Call flow:** {', '.join(call_strs)}")
692
+
693
+ sections.append("")
694
+
695
+ # Open PRs summary (shorter, since we have deep dive above)
696
+ if data.open_prs:
697
+ sections.append("## Open PRs Summary")
698
+ sections.append("")
699
+ for pr in data.open_prs[:8]:
700
+ number = pr.get("number", "?")
701
+ title = pr.get("title", "Unknown")[:50]
702
+ author = pr.get("author", "unknown")
703
+ sections.append(f"- **#{number}**: {title} (@{author})")
704
+ sections.append("")
705
+
706
+ # Merged PRs summary
707
+ if data.merged_prs:
708
+ sections.append("## Recently Merged PRs")
709
+ sections.append("")
710
+ for pr in data.merged_prs[:8]:
711
+ number = pr.get("number", "?")
712
+ title = pr.get("title", "Unknown")[:50]
713
+ author = pr.get("author", "unknown")
714
+ sections.append(f"- **#{number}**: {title} (@{author})")
715
+ sections.append("")
716
+
717
+ # Graph Context (entities touched by PRs)
718
+ if data.pr_graph_context:
719
+ sections.append("## Key Code Entities Affected by PRs")
720
+ sections.append("")
721
+ for ctx in data.pr_graph_context[:5]:
722
+ pr_num = ctx.get("pr_number", "?")
723
+ pr_title = ctx.get("pr_title", "")[:40]
724
+ entities = ctx.get("entities", [])
725
+ if entities:
726
+ sections.append(f"**PR #{pr_num}** ({pr_title}...):")
727
+ for ent in entities[:4]:
728
+ etype = ent.get("type", "?")
729
+ ename = ent.get("name", "?")
730
+ edoc = ent.get("docstring", "")
731
+ if edoc:
732
+ sections.append(f" - {etype} `{ename}`: {edoc}")
733
+ else:
734
+ sections.append(f" - {etype} `{ename}`")
735
+ sections.append("")
736
+
737
+ # Active Contributors
738
+ if data.active_contributors:
739
+ sections.append("## Active Contributors")
740
+ sections.append("")
741
+ for contrib in data.active_contributors[:8]:
742
+ name = contrib.get("name", "unknown")
743
+ commits = contrib.get("commit_count", 0)
744
+ files = contrib.get("files_touched", 0)
745
+ pr_count = contrib.get("pr_count", 0)
746
+ parts = [f"{commits} commits"]
747
+ if files:
748
+ parts.append(f"{files} files")
749
+ if pr_count:
750
+ parts.append(f"{pr_count} PRs")
751
+ sections.append(f"- **{name}**: {', '.join(parts)}")
752
+ sections.append("")
753
+
754
+ # Final instruction
755
+ sections.append("---")
756
+ sections.append("")
757
+ sections.append(
758
+ "Based on the CODE CONTEXT above, provide a technical analysis:\n\n"
759
+ "## Executive Summary\n"
760
+ "2-3 sentences: What is the team primarily building/improving?\n\n"
761
+ "## Work Streams (Use Code Context!)\n"
762
+ "Group related work and explain WHAT is being built using the class/function info:\n"
763
+ "- Name the actual classes and functions being modified\n"
764
+ "- Explain what they do (use the docstrings provided)\n"
765
+ "- Show how they connect (use call graph info)\n\n"
766
+ "## PR Analysis\n"
767
+ "For each major PR, explain the technical change:\n"
768
+ "- What code entities are being modified?\n"
769
+ "- What capability is being added/changed?\n\n"
770
+ "## Key Contributors\n"
771
+ "Who's working on what technical areas?\n\n"
772
+ "## Technical Insights\n"
773
+ "Patterns, architectural changes, or areas needing attention."
774
+ )
775
+
776
+ return "\n".join(sections)
777
+
778
+ def analyze(
779
+ self,
780
+ days: int = 14,
781
+ include_graph_context: bool = True,
782
+ ) -> str:
783
+ """Analyze team focus and generate LLM summary.
784
+
785
+ Args:
786
+ days: Time window for recency scoring
787
+ include_graph_context: Whether to expand PR files through graph
788
+
789
+ Returns:
790
+ LLM-generated summary in markdown format
791
+ """
792
+ # Gather all data
793
+ data = self.gather_data(
794
+ days=days,
795
+ include_graph_context=include_graph_context,
796
+ )
797
+
798
+ # Build prompt
799
+ prompt = self._build_prompt(data, days)
800
+
801
+ # Call LLM
802
+ log.info("Generating team focus summary with LLM...")
803
+
804
+ response = self.provider.chat(
805
+ messages=[{"role": "user", "content": prompt}],
806
+ system=_get_system_prompt(),
807
+ )
808
+
809
+ return response.content or ""
810
+
811
+ def get_raw_data(self, days: int = 14) -> dict:
812
+ """Get raw data without LLM synthesis (for JSON output).
813
+
814
+ Args:
815
+ days: Time window for recency scoring
816
+
817
+ Returns:
818
+ Dictionary with all gathered data
819
+ """
820
+ data = self.gather_data(days=days, include_graph_context=False)
821
+ return data.to_dict()