emdash-core 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. emdash_core/__init__.py +3 -0
  2. emdash_core/agent/__init__.py +37 -0
  3. emdash_core/agent/agents.py +225 -0
  4. emdash_core/agent/code_reviewer.py +476 -0
  5. emdash_core/agent/compaction.py +143 -0
  6. emdash_core/agent/context_manager.py +140 -0
  7. emdash_core/agent/events.py +338 -0
  8. emdash_core/agent/handlers.py +224 -0
  9. emdash_core/agent/inprocess_subagent.py +377 -0
  10. emdash_core/agent/mcp/__init__.py +50 -0
  11. emdash_core/agent/mcp/client.py +346 -0
  12. emdash_core/agent/mcp/config.py +302 -0
  13. emdash_core/agent/mcp/manager.py +496 -0
  14. emdash_core/agent/mcp/tool_factory.py +213 -0
  15. emdash_core/agent/prompts/__init__.py +38 -0
  16. emdash_core/agent/prompts/main_agent.py +104 -0
  17. emdash_core/agent/prompts/subagents.py +131 -0
  18. emdash_core/agent/prompts/workflow.py +136 -0
  19. emdash_core/agent/providers/__init__.py +34 -0
  20. emdash_core/agent/providers/base.py +143 -0
  21. emdash_core/agent/providers/factory.py +80 -0
  22. emdash_core/agent/providers/models.py +220 -0
  23. emdash_core/agent/providers/openai_provider.py +463 -0
  24. emdash_core/agent/providers/transformers_provider.py +217 -0
  25. emdash_core/agent/research/__init__.py +81 -0
  26. emdash_core/agent/research/agent.py +143 -0
  27. emdash_core/agent/research/controller.py +254 -0
  28. emdash_core/agent/research/critic.py +428 -0
  29. emdash_core/agent/research/macros.py +469 -0
  30. emdash_core/agent/research/planner.py +449 -0
  31. emdash_core/agent/research/researcher.py +436 -0
  32. emdash_core/agent/research/state.py +523 -0
  33. emdash_core/agent/research/synthesizer.py +594 -0
  34. emdash_core/agent/reviewer_profile.py +475 -0
  35. emdash_core/agent/rules.py +123 -0
  36. emdash_core/agent/runner.py +601 -0
  37. emdash_core/agent/session.py +262 -0
  38. emdash_core/agent/spec_schema.py +66 -0
  39. emdash_core/agent/specification.py +479 -0
  40. emdash_core/agent/subagent.py +397 -0
  41. emdash_core/agent/subagent_prompts.py +13 -0
  42. emdash_core/agent/toolkit.py +482 -0
  43. emdash_core/agent/toolkits/__init__.py +64 -0
  44. emdash_core/agent/toolkits/base.py +96 -0
  45. emdash_core/agent/toolkits/explore.py +47 -0
  46. emdash_core/agent/toolkits/plan.py +55 -0
  47. emdash_core/agent/tools/__init__.py +141 -0
  48. emdash_core/agent/tools/analytics.py +436 -0
  49. emdash_core/agent/tools/base.py +131 -0
  50. emdash_core/agent/tools/coding.py +484 -0
  51. emdash_core/agent/tools/github_mcp.py +592 -0
  52. emdash_core/agent/tools/history.py +13 -0
  53. emdash_core/agent/tools/modes.py +153 -0
  54. emdash_core/agent/tools/plan.py +206 -0
  55. emdash_core/agent/tools/plan_write.py +135 -0
  56. emdash_core/agent/tools/search.py +412 -0
  57. emdash_core/agent/tools/spec.py +341 -0
  58. emdash_core/agent/tools/task.py +262 -0
  59. emdash_core/agent/tools/task_output.py +204 -0
  60. emdash_core/agent/tools/tasks.py +454 -0
  61. emdash_core/agent/tools/traversal.py +588 -0
  62. emdash_core/agent/tools/web.py +179 -0
  63. emdash_core/analytics/__init__.py +5 -0
  64. emdash_core/analytics/engine.py +1286 -0
  65. emdash_core/api/__init__.py +5 -0
  66. emdash_core/api/agent.py +308 -0
  67. emdash_core/api/agents.py +154 -0
  68. emdash_core/api/analyze.py +264 -0
  69. emdash_core/api/auth.py +173 -0
  70. emdash_core/api/context.py +77 -0
  71. emdash_core/api/db.py +121 -0
  72. emdash_core/api/embed.py +131 -0
  73. emdash_core/api/feature.py +143 -0
  74. emdash_core/api/health.py +93 -0
  75. emdash_core/api/index.py +162 -0
  76. emdash_core/api/plan.py +110 -0
  77. emdash_core/api/projectmd.py +210 -0
  78. emdash_core/api/query.py +320 -0
  79. emdash_core/api/research.py +122 -0
  80. emdash_core/api/review.py +161 -0
  81. emdash_core/api/router.py +76 -0
  82. emdash_core/api/rules.py +116 -0
  83. emdash_core/api/search.py +119 -0
  84. emdash_core/api/spec.py +99 -0
  85. emdash_core/api/swarm.py +223 -0
  86. emdash_core/api/tasks.py +109 -0
  87. emdash_core/api/team.py +120 -0
  88. emdash_core/auth/__init__.py +17 -0
  89. emdash_core/auth/github.py +389 -0
  90. emdash_core/config.py +74 -0
  91. emdash_core/context/__init__.py +52 -0
  92. emdash_core/context/models.py +50 -0
  93. emdash_core/context/providers/__init__.py +11 -0
  94. emdash_core/context/providers/base.py +74 -0
  95. emdash_core/context/providers/explored_areas.py +183 -0
  96. emdash_core/context/providers/touched_areas.py +360 -0
  97. emdash_core/context/registry.py +73 -0
  98. emdash_core/context/reranker.py +199 -0
  99. emdash_core/context/service.py +260 -0
  100. emdash_core/context/session.py +352 -0
  101. emdash_core/core/__init__.py +104 -0
  102. emdash_core/core/config.py +454 -0
  103. emdash_core/core/exceptions.py +55 -0
  104. emdash_core/core/models.py +265 -0
  105. emdash_core/core/review_config.py +57 -0
  106. emdash_core/db/__init__.py +67 -0
  107. emdash_core/db/auth.py +134 -0
  108. emdash_core/db/models.py +91 -0
  109. emdash_core/db/provider.py +222 -0
  110. emdash_core/db/providers/__init__.py +5 -0
  111. emdash_core/db/providers/supabase.py +452 -0
  112. emdash_core/embeddings/__init__.py +24 -0
  113. emdash_core/embeddings/indexer.py +534 -0
  114. emdash_core/embeddings/models.py +192 -0
  115. emdash_core/embeddings/providers/__init__.py +7 -0
  116. emdash_core/embeddings/providers/base.py +112 -0
  117. emdash_core/embeddings/providers/fireworks.py +141 -0
  118. emdash_core/embeddings/providers/openai.py +104 -0
  119. emdash_core/embeddings/registry.py +146 -0
  120. emdash_core/embeddings/service.py +215 -0
  121. emdash_core/graph/__init__.py +26 -0
  122. emdash_core/graph/builder.py +134 -0
  123. emdash_core/graph/connection.py +692 -0
  124. emdash_core/graph/schema.py +416 -0
  125. emdash_core/graph/writer.py +667 -0
  126. emdash_core/ingestion/__init__.py +7 -0
  127. emdash_core/ingestion/change_detector.py +150 -0
  128. emdash_core/ingestion/git/__init__.py +5 -0
  129. emdash_core/ingestion/git/commit_analyzer.py +196 -0
  130. emdash_core/ingestion/github/__init__.py +6 -0
  131. emdash_core/ingestion/github/pr_fetcher.py +296 -0
  132. emdash_core/ingestion/github/task_extractor.py +100 -0
  133. emdash_core/ingestion/orchestrator.py +540 -0
  134. emdash_core/ingestion/parsers/__init__.py +10 -0
  135. emdash_core/ingestion/parsers/base_parser.py +66 -0
  136. emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
  137. emdash_core/ingestion/parsers/class_extractor.py +154 -0
  138. emdash_core/ingestion/parsers/function_extractor.py +202 -0
  139. emdash_core/ingestion/parsers/import_analyzer.py +119 -0
  140. emdash_core/ingestion/parsers/python_parser.py +123 -0
  141. emdash_core/ingestion/parsers/registry.py +72 -0
  142. emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
  143. emdash_core/ingestion/parsers/typescript_parser.py +278 -0
  144. emdash_core/ingestion/repository.py +346 -0
  145. emdash_core/models/__init__.py +38 -0
  146. emdash_core/models/agent.py +68 -0
  147. emdash_core/models/index.py +77 -0
  148. emdash_core/models/query.py +113 -0
  149. emdash_core/planning/__init__.py +7 -0
  150. emdash_core/planning/agent_api.py +413 -0
  151. emdash_core/planning/context_builder.py +265 -0
  152. emdash_core/planning/feature_context.py +232 -0
  153. emdash_core/planning/feature_expander.py +646 -0
  154. emdash_core/planning/llm_explainer.py +198 -0
  155. emdash_core/planning/similarity.py +509 -0
  156. emdash_core/planning/team_focus.py +821 -0
  157. emdash_core/server.py +153 -0
  158. emdash_core/sse/__init__.py +5 -0
  159. emdash_core/sse/stream.py +196 -0
  160. emdash_core/swarm/__init__.py +17 -0
  161. emdash_core/swarm/merge_agent.py +383 -0
  162. emdash_core/swarm/session_manager.py +274 -0
  163. emdash_core/swarm/swarm_runner.py +226 -0
  164. emdash_core/swarm/task_definition.py +137 -0
  165. emdash_core/swarm/worker_spawner.py +319 -0
  166. emdash_core/swarm/worktree_manager.py +278 -0
  167. emdash_core/templates/__init__.py +10 -0
  168. emdash_core/templates/defaults/agent-builder.md.template +82 -0
  169. emdash_core/templates/defaults/focus.md.template +115 -0
  170. emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
  171. emdash_core/templates/defaults/pr-review.md.template +80 -0
  172. emdash_core/templates/defaults/project.md.template +85 -0
  173. emdash_core/templates/defaults/research_critic.md.template +112 -0
  174. emdash_core/templates/defaults/research_planner.md.template +85 -0
  175. emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
  176. emdash_core/templates/defaults/reviewer.md.template +81 -0
  177. emdash_core/templates/defaults/spec.md.template +41 -0
  178. emdash_core/templates/defaults/tasks.md.template +78 -0
  179. emdash_core/templates/loader.py +296 -0
  180. emdash_core/utils/__init__.py +45 -0
  181. emdash_core/utils/git.py +84 -0
  182. emdash_core/utils/image.py +502 -0
  183. emdash_core/utils/logger.py +51 -0
  184. emdash_core-0.1.7.dist-info/METADATA +35 -0
  185. emdash_core-0.1.7.dist-info/RECORD +187 -0
  186. emdash_core-0.1.7.dist-info/WHEEL +4 -0
  187. emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,150 @@
1
+ """Change detection for incremental indexing using git diff."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ import git
8
+
9
+ from ..utils.logger import log
10
+
11
+
12
+ @dataclass
13
+ class ChangedFiles:
14
+ """Files changed since last indexing."""
15
+
16
+ added: list[Path] = field(default_factory=list)
17
+ modified: list[Path] = field(default_factory=list)
18
+ deleted: list[Path] = field(default_factory=list)
19
+
20
+ @property
21
+ def all_to_index(self) -> list[Path]:
22
+ """Get all files that need to be (re)indexed."""
23
+ return self.added + self.modified
24
+
25
+ @property
26
+ def total_changes(self) -> int:
27
+ """Total number of changed files."""
28
+ return len(self.added) + len(self.modified) + len(self.deleted)
29
+
30
+ def __bool__(self) -> bool:
31
+ """True if there are any changes."""
32
+ return self.total_changes > 0
33
+
34
+
35
+ class ChangeDetector:
36
+ """Detects files changed since last indexing using git diff."""
37
+
38
+ def __init__(self, repo: git.Repo, last_indexed_commit: Optional[str] = None):
39
+ """Initialize change detector.
40
+
41
+ Args:
42
+ repo: Git repository
43
+ last_indexed_commit: SHA of commit at last index, or None for full index
44
+ """
45
+ self.repo = repo
46
+ self.last_indexed_commit = last_indexed_commit
47
+ self.repo_root = Path(repo.working_dir)
48
+
49
+ def get_current_commit(self) -> str:
50
+ """Get current HEAD commit SHA."""
51
+ return self.repo.head.commit.hexsha
52
+
53
+ def get_changed_files(self, extensions: list[str] = None) -> ChangedFiles:
54
+ """Find files changed since last index.
55
+
56
+ Uses git diff to detect:
57
+ - Added files (A)
58
+ - Modified files (M)
59
+ - Deleted files (D)
60
+ - Renamed files (R) - treated as delete + add
61
+
62
+ Args:
63
+ extensions: Optional list of extensions to filter (e.g., ['.py', '.ts'])
64
+
65
+ Returns:
66
+ ChangedFiles with categorized changes
67
+ """
68
+ if not self.last_indexed_commit:
69
+ log.info("No previous index commit - full index required")
70
+ return ChangedFiles()
71
+
72
+ try:
73
+ # Verify the commit exists
74
+ try:
75
+ old_commit = self.repo.commit(self.last_indexed_commit)
76
+ except git.BadName:
77
+ log.warning(f"Previous commit {self.last_indexed_commit[:8]} not found - full index required")
78
+ return ChangedFiles()
79
+
80
+ current_commit = self.repo.head.commit
81
+
82
+ # Get diff between last indexed commit and current HEAD
83
+ diff = old_commit.diff(current_commit)
84
+
85
+ added = []
86
+ modified = []
87
+ deleted = []
88
+
89
+ for change in diff:
90
+ # Handle different change types
91
+ if change.change_type == 'A': # Added
92
+ file_path = self.repo_root / change.b_path
93
+ if self._should_include(file_path, extensions):
94
+ added.append(file_path)
95
+
96
+ elif change.change_type == 'M': # Modified
97
+ file_path = self.repo_root / change.b_path
98
+ if self._should_include(file_path, extensions):
99
+ modified.append(file_path)
100
+
101
+ elif change.change_type == 'D': # Deleted
102
+ file_path = self.repo_root / change.a_path
103
+ if self._should_include(file_path, extensions):
104
+ deleted.append(file_path)
105
+
106
+ elif change.change_type == 'R': # Renamed
107
+ # Treat as delete old + add new
108
+ old_path = self.repo_root / change.a_path
109
+ new_path = self.repo_root / change.b_path
110
+ if self._should_include(old_path, extensions):
111
+ deleted.append(old_path)
112
+ if self._should_include(new_path, extensions):
113
+ added.append(new_path)
114
+
115
+ elif change.change_type in ('C', 'T'): # Copied or Type changed
116
+ file_path = self.repo_root / change.b_path
117
+ if self._should_include(file_path, extensions):
118
+ modified.append(file_path)
119
+
120
+ result = ChangedFiles(added=added, modified=modified, deleted=deleted)
121
+
122
+ log.info(f"Detected changes: {len(added)} added, {len(modified)} modified, {len(deleted)} deleted")
123
+
124
+ return result
125
+
126
+ except Exception as e:
127
+ log.error(f"Error detecting changes: {e}")
128
+ return ChangedFiles()
129
+
130
+ def _should_include(self, file_path: Path, extensions: Optional[list[str]]) -> bool:
131
+ """Check if file should be included based on extension filter.
132
+
133
+ Args:
134
+ file_path: Path to file
135
+ extensions: Optional list of extensions to include
136
+
137
+ Returns:
138
+ True if file should be included
139
+ """
140
+ if extensions is None:
141
+ return True
142
+ return file_path.suffix.lower() in [ext.lower() for ext in extensions]
143
+
144
+ def has_uncommitted_changes(self) -> bool:
145
+ """Check if there are uncommitted changes in the working directory.
146
+
147
+ Returns:
148
+ True if there are uncommitted changes
149
+ """
150
+ return self.repo.is_dirty(untracked_files=True)
@@ -0,0 +1,5 @@
1
+ """Git analysis module for EmDash."""
2
+
3
+ from .commit_analyzer import CommitAnalyzer
4
+
5
+ __all__ = ["CommitAnalyzer"]
@@ -0,0 +1,196 @@
1
+ """Analyze Git commit history."""
2
+
3
+ from collections import defaultdict
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import List, Dict
7
+
8
+ from git import Repo
9
+
10
+ from ...core.models import (
11
+ CommitEntity,
12
+ AuthorEntity,
13
+ FileModification,
14
+ GitData,
15
+ RepositoryEntity,
16
+ )
17
+ from ...utils.logger import log
18
+
19
+
20
+ class CommitAnalyzer:
21
+ """Analyzes Git commit history and extracts metadata."""
22
+
23
+ def __init__(self, repo: Repo, max_commits: int = None):
24
+ """Initialize commit analyzer.
25
+
26
+ Args:
27
+ repo: Git repository
28
+ max_commits: Maximum number of commits to analyze (None = all)
29
+ """
30
+ self.repo = repo
31
+ self.max_commits = max_commits
32
+
33
+ def analyze(self, repo_entity: RepositoryEntity) -> GitData:
34
+ """Analyze commit history and extract Git data.
35
+
36
+ Args:
37
+ repo_entity: Repository entity
38
+
39
+ Returns:
40
+ GitData containing commits, authors, and modifications
41
+ """
42
+ log.info("Analyzing Git commit history...")
43
+
44
+ commits = []
45
+ modifications = []
46
+ author_stats = defaultdict(lambda: {
47
+ 'name': '',
48
+ 'email': '',
49
+ 'commits': 0,
50
+ 'lines_added': 0,
51
+ 'lines_deleted': 0,
52
+ 'first_commit': None,
53
+ 'last_commit': None,
54
+ })
55
+
56
+ # Iterate through commits
57
+ commit_count = 0
58
+ for commit in self.repo.iter_commits():
59
+ if self.max_commits and commit_count >= self.max_commits:
60
+ break
61
+
62
+ commit_entity = self._extract_commit(commit)
63
+ commits.append(commit_entity)
64
+
65
+ # Extract file modifications
66
+ file_mods = self._extract_modifications(commit)
67
+ modifications.extend(file_mods)
68
+
69
+ # Update author statistics
70
+ email = commit.author.email
71
+ author_stats[email]['name'] = commit.author.name
72
+ author_stats[email]['email'] = email
73
+ author_stats[email]['commits'] += 1
74
+ author_stats[email]['lines_added'] += commit.stats.total['insertions']
75
+ author_stats[email]['lines_deleted'] += commit.stats.total['deletions']
76
+
77
+ timestamp = datetime.fromtimestamp(commit.committed_date)
78
+ if author_stats[email]['first_commit'] is None:
79
+ author_stats[email]['first_commit'] = timestamp
80
+ author_stats[email]['last_commit'] = timestamp
81
+
82
+ commit_count += 1
83
+
84
+ # Create author entities
85
+ authors = [
86
+ AuthorEntity(
87
+ email=stats['email'],
88
+ name=stats['name'],
89
+ first_commit=stats['first_commit'],
90
+ last_commit=stats['last_commit'],
91
+ total_commits=stats['commits'],
92
+ total_lines_added=stats['lines_added'],
93
+ total_lines_deleted=stats['lines_deleted'],
94
+ )
95
+ for stats in author_stats.values()
96
+ ]
97
+
98
+ log.info(f"Analyzed {len(commits)} commits, {len(authors)} authors")
99
+
100
+ return GitData(
101
+ repository=repo_entity,
102
+ commits=commits,
103
+ modifications=modifications,
104
+ authors=authors,
105
+ )
106
+
107
+ def _extract_commit(self, commit) -> CommitEntity:
108
+ """Extract a commit entity from a git commit.
109
+
110
+ Args:
111
+ commit: GitPython commit object
112
+
113
+ Returns:
114
+ CommitEntity
115
+ """
116
+ parent_shas = [parent.hexsha for parent in commit.parents]
117
+
118
+ return CommitEntity(
119
+ sha=commit.hexsha,
120
+ message=commit.message,
121
+ timestamp=datetime.fromtimestamp(commit.committed_date),
122
+ author_name=commit.author.name,
123
+ author_email=commit.author.email,
124
+ committer_name=commit.committer.name,
125
+ committer_email=commit.committer.email,
126
+ insertions=commit.stats.total['insertions'],
127
+ deletions=commit.stats.total['deletions'],
128
+ files_changed=commit.stats.total['files'],
129
+ is_merge=len(parent_shas) > 1,
130
+ parent_shas=parent_shas,
131
+ )
132
+
133
+ def _extract_modifications(self, commit) -> List[FileModification]:
134
+ """Extract file modifications from a commit.
135
+
136
+ Args:
137
+ commit: GitPython commit object
138
+
139
+ Returns:
140
+ List of FileModification objects
141
+ """
142
+ modifications = []
143
+
144
+ try:
145
+ # Get diffs from parent (if exists)
146
+ if commit.parents:
147
+ diffs = commit.parents[0].diff(commit, create_patch=True)
148
+ else:
149
+ # First commit - all files are new
150
+ diffs = commit.diff(None, create_patch=True)
151
+
152
+ for diff in diffs:
153
+ # Determine change type
154
+ if diff.new_file:
155
+ change_type = "added"
156
+ elif diff.deleted_file:
157
+ change_type = "deleted"
158
+ elif diff.renamed_file:
159
+ change_type = "renamed"
160
+ else:
161
+ change_type = "modified"
162
+
163
+ # Get file path (handle renames)
164
+ relative_path = diff.b_path if diff.b_path else diff.a_path
165
+ old_relative_path = diff.a_path if diff.renamed_file else None
166
+
167
+ # Convert relative paths to absolute paths (matching File nodes)
168
+ repo_root = Path(self.repo.working_dir)
169
+ file_path = str(repo_root / relative_path) if relative_path else None
170
+ old_path = str(repo_root / old_relative_path) if old_relative_path else None
171
+
172
+ # Calculate insertions/deletions (rough estimate from diff stats)
173
+ insertions = 0
174
+ deletions = 0
175
+
176
+ if hasattr(diff, 'diff') and diff.diff:
177
+ diff_text = diff.diff.decode('utf-8', errors='ignore')
178
+ for line in diff_text.split('\n'):
179
+ if line.startswith('+') and not line.startswith('+++'):
180
+ insertions += 1
181
+ elif line.startswith('-') and not line.startswith('---'):
182
+ deletions += 1
183
+
184
+ modifications.append(FileModification(
185
+ commit_sha=commit.hexsha,
186
+ file_path=file_path,
187
+ change_type=change_type,
188
+ insertions=insertions,
189
+ deletions=deletions,
190
+ old_path=old_path,
191
+ ))
192
+
193
+ except Exception as e:
194
+ log.warning(f"Failed to extract modifications for commit {commit.hexsha}: {e}")
195
+
196
+ return modifications
@@ -0,0 +1,6 @@
1
+ """GitHub integration for EmDash."""
2
+
3
+ from .pr_fetcher import PRFetcher
4
+ from .task_extractor import TaskExtractor
5
+
6
+ __all__ = ["PRFetcher", "TaskExtractor"]
@@ -0,0 +1,296 @@
1
+ """GitHub Pull Request fetcher using gh CLI."""
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ import shutil
7
+ import subprocess
8
+ from datetime import datetime
9
+ from typing import Optional
10
+
11
+ from ...core.models import PullRequestEntity
12
+ from ...utils.logger import log
13
+
14
+
15
+ class PRFetcher:
16
+ """Fetches pull requests using gh CLI (supports private repos)."""
17
+
18
+ # Pattern to extract owner/repo from GitHub URLs
19
+ GITHUB_URL_PATTERN = re.compile(
20
+ r"(?:https?://)?(?:www\.)?github\.com[/:]([^/]+)/([^/.]+)(?:\.git)?/?$"
21
+ )
22
+
23
+ def __init__(
24
+ self,
25
+ owner: str,
26
+ repo: str,
27
+ token: Optional[str] = None,
28
+ ):
29
+ """Initialize PR fetcher.
30
+
31
+ Args:
32
+ owner: GitHub repository owner
33
+ repo: GitHub repository name
34
+ token: GitHub personal access token (ignored, uses gh CLI auth)
35
+ """
36
+ self.owner = owner
37
+ self.repo = repo
38
+ self.repo_path = f"{owner}/{repo}"
39
+
40
+ # Find gh CLI binary
41
+ self.gh_path = self._find_gh_cli()
42
+ if not self.gh_path:
43
+ log.warning(
44
+ "gh CLI not found. Install with 'brew install gh' and authenticate with 'gh auth login'. "
45
+ "PR fetching will be skipped."
46
+ )
47
+
48
+ def _find_gh_cli(self) -> Optional[str]:
49
+ """Find the gh CLI binary path."""
50
+ # Check common locations
51
+ paths_to_check = [
52
+ shutil.which("gh"),
53
+ "/opt/homebrew/bin/gh",
54
+ "/usr/local/bin/gh",
55
+ "/usr/bin/gh",
56
+ ]
57
+ for path in paths_to_check:
58
+ if path and shutil.os.path.isfile(path):
59
+ return path
60
+ return None
61
+
62
+ def _run_gh(self, args: list[str]) -> Optional[str]:
63
+ """Run a gh CLI command and return output."""
64
+ if not self.gh_path:
65
+ return None
66
+ try:
67
+ # Copy environment but remove GITHUB_TOKEN to let gh use its own OAuth
68
+ env = os.environ.copy()
69
+ env.pop("GITHUB_TOKEN", None)
70
+ env.pop("GH_TOKEN", None)
71
+
72
+ result = subprocess.run(
73
+ [self.gh_path] + args,
74
+ capture_output=True,
75
+ text=True,
76
+ timeout=120,
77
+ env=env,
78
+ )
79
+ if result.returncode == 0:
80
+ return result.stdout
81
+ else:
82
+ log.error(f"gh CLI error: {result.stderr}")
83
+ return None
84
+ except subprocess.TimeoutExpired:
85
+ log.error("gh CLI command timed out")
86
+ return None
87
+ except Exception as e:
88
+ log.error(f"gh CLI error: {e}")
89
+ return None
90
+
91
+ @classmethod
92
+ def extract_repo_info(cls, remote_url: str) -> tuple[Optional[str], Optional[str]]:
93
+ """Extract owner and repo name from a GitHub remote URL.
94
+
95
+ Args:
96
+ remote_url: Git remote URL (HTTPS or SSH format)
97
+
98
+ Returns:
99
+ Tuple of (owner, repo) or (None, None) if not a GitHub URL
100
+
101
+ Examples:
102
+ >>> PRFetcher.extract_repo_info("https://github.com/owner/repo.git")
103
+ ('owner', 'repo')
104
+ >>> PRFetcher.extract_repo_info("git@github.com:owner/repo.git")
105
+ ('owner', 'repo')
106
+ """
107
+ # Handle SSH format: git@github.com:owner/repo.git
108
+ if remote_url.startswith("git@github.com:"):
109
+ parts = remote_url.replace("git@github.com:", "").replace(".git", "").split("/")
110
+ if len(parts) >= 2:
111
+ return parts[0], parts[1]
112
+
113
+ # Handle HTTPS format
114
+ match = cls.GITHUB_URL_PATTERN.match(remote_url)
115
+ if match:
116
+ return match.group(1), match.group(2)
117
+
118
+ return None, None
119
+
120
+ def fetch_prs(
121
+ self,
122
+ state: str = "all",
123
+ limit: Optional[int] = 100,
124
+ since: Optional[datetime] = None,
125
+ ) -> list[PullRequestEntity]:
126
+ """Fetch pull requests from the repository using gh CLI.
127
+
128
+ Args:
129
+ state: PR state filter ("open", "closed", "all")
130
+ limit: Maximum number of PRs to fetch (None for all)
131
+ since: Only fetch PRs updated after this datetime
132
+
133
+ Returns:
134
+ List of PullRequestEntity objects
135
+ """
136
+ if not self.gh_path:
137
+ log.error("gh CLI not available. Cannot fetch PRs.")
138
+ return []
139
+
140
+ prs = []
141
+
142
+ # Map state to gh CLI format
143
+ states_to_fetch = []
144
+ if state == "all":
145
+ states_to_fetch = ["open", "closed", "merged"]
146
+ elif state == "closed":
147
+ states_to_fetch = ["closed", "merged"]
148
+ else:
149
+ states_to_fetch = [state]
150
+
151
+ for pr_state in states_to_fetch:
152
+ # Build gh pr list command - request minimal fields to avoid GraphQL limits
153
+ args = [
154
+ "pr", "list",
155
+ "-R", self.repo_path,
156
+ "--state", pr_state,
157
+ "--limit", str(limit or 100),
158
+ "--json", "number,title,body,state,createdAt,author,mergedAt,labels,additions,deletions,baseRefName,headRefName"
159
+ ]
160
+
161
+ output = self._run_gh(args)
162
+ if not output:
163
+ continue
164
+
165
+ try:
166
+ pr_list = json.loads(output)
167
+ except json.JSONDecodeError as e:
168
+ log.error(f"Failed to parse gh output: {e}")
169
+ continue
170
+
171
+ for pr_data in pr_list:
172
+ # Check date filter
173
+ if since:
174
+ created_at = self._parse_datetime(pr_data.get("createdAt"))
175
+ if created_at and created_at < since:
176
+ continue
177
+
178
+ pr_entity = self._extract_pr_from_json(pr_data)
179
+ prs.append(pr_entity)
180
+
181
+ if len(prs) % 10 == 0:
182
+ log.info(f"Fetched {len(prs)} PRs...")
183
+
184
+ # Check limit
185
+ if limit and len(prs) >= limit:
186
+ break
187
+
188
+ if limit and len(prs) >= limit:
189
+ break
190
+
191
+ log.info(f"Fetched {len(prs)} pull requests from {self.repo_path}")
192
+ return prs
193
+
194
+ def fetch_pr_files(self, pr_number: int) -> list[str]:
195
+ """Get files changed in a specific PR.
196
+
197
+ Args:
198
+ pr_number: Pull request number
199
+
200
+ Returns:
201
+ List of file paths that were modified
202
+ """
203
+ if not self.gh_path:
204
+ return []
205
+
206
+ args = [
207
+ "pr", "view", str(pr_number),
208
+ "-R", self.repo_path,
209
+ "--json", "files"
210
+ ]
211
+
212
+ output = self._run_gh(args)
213
+ if not output:
214
+ return []
215
+
216
+ try:
217
+ data = json.loads(output)
218
+ return [f.get("path", "") for f in data.get("files", [])]
219
+ except json.JSONDecodeError:
220
+ return []
221
+
222
+ def _parse_datetime(self, dt_str: Optional[str]) -> Optional[datetime]:
223
+ """Parse ISO datetime string."""
224
+ if not dt_str:
225
+ return None
226
+ try:
227
+ # Handle ISO format with Z suffix
228
+ dt_str = dt_str.replace("Z", "+00:00")
229
+ return datetime.fromisoformat(dt_str)
230
+ except (ValueError, AttributeError):
231
+ return None
232
+
233
+ def _extract_pr_from_json(self, pr_data: dict) -> PullRequestEntity:
234
+ """Extract PR data from gh CLI JSON output.
235
+
236
+ Args:
237
+ pr_data: PR data from gh CLI
238
+
239
+ Returns:
240
+ PullRequestEntity with extracted data
241
+ """
242
+ # Determine state
243
+ state = pr_data.get("state", "open").lower()
244
+ if pr_data.get("mergedAt"):
245
+ state = "merged"
246
+
247
+ # Get reviewers
248
+ reviewers = set()
249
+ for req in pr_data.get("reviewRequests", []):
250
+ if isinstance(req, dict) and req.get("login"):
251
+ reviewers.add(req["login"])
252
+ for review in pr_data.get("reviews", []):
253
+ if isinstance(review, dict) and review.get("author", {}).get("login"):
254
+ reviewers.add(review["author"]["login"])
255
+
256
+ # Get commit SHAs
257
+ commit_shas = []
258
+ for commit in pr_data.get("commits", []):
259
+ if isinstance(commit, dict) and commit.get("oid"):
260
+ commit_shas.append(commit["oid"])
261
+
262
+ # Get files changed
263
+ files_changed = []
264
+ for f in pr_data.get("files", []):
265
+ if isinstance(f, dict) and f.get("path"):
266
+ files_changed.append(f["path"])
267
+
268
+ # Get labels
269
+ labels = []
270
+ for label in pr_data.get("labels", []):
271
+ if isinstance(label, dict) and label.get("name"):
272
+ labels.append(label["name"])
273
+
274
+ # Get author
275
+ author = "unknown"
276
+ author_data = pr_data.get("author")
277
+ if isinstance(author_data, dict):
278
+ author = author_data.get("login", "unknown")
279
+
280
+ return PullRequestEntity(
281
+ number=pr_data.get("number", 0),
282
+ title=pr_data.get("title", ""),
283
+ description=pr_data.get("body"),
284
+ state=state,
285
+ created_at=self._parse_datetime(pr_data.get("createdAt")),
286
+ author=author,
287
+ merged_at=self._parse_datetime(pr_data.get("mergedAt")),
288
+ reviewers=list(reviewers),
289
+ labels=labels,
290
+ additions=pr_data.get("additions", 0),
291
+ deletions=pr_data.get("deletions", 0),
292
+ files_changed=files_changed,
293
+ commit_shas=commit_shas,
294
+ base_branch=pr_data.get("baseRefName", "main"),
295
+ head_branch=pr_data.get("headRefName", ""),
296
+ )