emdash-core 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- emdash_core/__init__.py +3 -0
- emdash_core/agent/__init__.py +37 -0
- emdash_core/agent/agents.py +225 -0
- emdash_core/agent/code_reviewer.py +476 -0
- emdash_core/agent/compaction.py +143 -0
- emdash_core/agent/context_manager.py +140 -0
- emdash_core/agent/events.py +338 -0
- emdash_core/agent/handlers.py +224 -0
- emdash_core/agent/inprocess_subagent.py +377 -0
- emdash_core/agent/mcp/__init__.py +50 -0
- emdash_core/agent/mcp/client.py +346 -0
- emdash_core/agent/mcp/config.py +302 -0
- emdash_core/agent/mcp/manager.py +496 -0
- emdash_core/agent/mcp/tool_factory.py +213 -0
- emdash_core/agent/prompts/__init__.py +38 -0
- emdash_core/agent/prompts/main_agent.py +104 -0
- emdash_core/agent/prompts/subagents.py +131 -0
- emdash_core/agent/prompts/workflow.py +136 -0
- emdash_core/agent/providers/__init__.py +34 -0
- emdash_core/agent/providers/base.py +143 -0
- emdash_core/agent/providers/factory.py +80 -0
- emdash_core/agent/providers/models.py +220 -0
- emdash_core/agent/providers/openai_provider.py +463 -0
- emdash_core/agent/providers/transformers_provider.py +217 -0
- emdash_core/agent/research/__init__.py +81 -0
- emdash_core/agent/research/agent.py +143 -0
- emdash_core/agent/research/controller.py +254 -0
- emdash_core/agent/research/critic.py +428 -0
- emdash_core/agent/research/macros.py +469 -0
- emdash_core/agent/research/planner.py +449 -0
- emdash_core/agent/research/researcher.py +436 -0
- emdash_core/agent/research/state.py +523 -0
- emdash_core/agent/research/synthesizer.py +594 -0
- emdash_core/agent/reviewer_profile.py +475 -0
- emdash_core/agent/rules.py +123 -0
- emdash_core/agent/runner.py +601 -0
- emdash_core/agent/session.py +262 -0
- emdash_core/agent/spec_schema.py +66 -0
- emdash_core/agent/specification.py +479 -0
- emdash_core/agent/subagent.py +397 -0
- emdash_core/agent/subagent_prompts.py +13 -0
- emdash_core/agent/toolkit.py +482 -0
- emdash_core/agent/toolkits/__init__.py +64 -0
- emdash_core/agent/toolkits/base.py +96 -0
- emdash_core/agent/toolkits/explore.py +47 -0
- emdash_core/agent/toolkits/plan.py +55 -0
- emdash_core/agent/tools/__init__.py +141 -0
- emdash_core/agent/tools/analytics.py +436 -0
- emdash_core/agent/tools/base.py +131 -0
- emdash_core/agent/tools/coding.py +484 -0
- emdash_core/agent/tools/github_mcp.py +592 -0
- emdash_core/agent/tools/history.py +13 -0
- emdash_core/agent/tools/modes.py +153 -0
- emdash_core/agent/tools/plan.py +206 -0
- emdash_core/agent/tools/plan_write.py +135 -0
- emdash_core/agent/tools/search.py +412 -0
- emdash_core/agent/tools/spec.py +341 -0
- emdash_core/agent/tools/task.py +262 -0
- emdash_core/agent/tools/task_output.py +204 -0
- emdash_core/agent/tools/tasks.py +454 -0
- emdash_core/agent/tools/traversal.py +588 -0
- emdash_core/agent/tools/web.py +179 -0
- emdash_core/analytics/__init__.py +5 -0
- emdash_core/analytics/engine.py +1286 -0
- emdash_core/api/__init__.py +5 -0
- emdash_core/api/agent.py +308 -0
- emdash_core/api/agents.py +154 -0
- emdash_core/api/analyze.py +264 -0
- emdash_core/api/auth.py +173 -0
- emdash_core/api/context.py +77 -0
- emdash_core/api/db.py +121 -0
- emdash_core/api/embed.py +131 -0
- emdash_core/api/feature.py +143 -0
- emdash_core/api/health.py +93 -0
- emdash_core/api/index.py +162 -0
- emdash_core/api/plan.py +110 -0
- emdash_core/api/projectmd.py +210 -0
- emdash_core/api/query.py +320 -0
- emdash_core/api/research.py +122 -0
- emdash_core/api/review.py +161 -0
- emdash_core/api/router.py +76 -0
- emdash_core/api/rules.py +116 -0
- emdash_core/api/search.py +119 -0
- emdash_core/api/spec.py +99 -0
- emdash_core/api/swarm.py +223 -0
- emdash_core/api/tasks.py +109 -0
- emdash_core/api/team.py +120 -0
- emdash_core/auth/__init__.py +17 -0
- emdash_core/auth/github.py +389 -0
- emdash_core/config.py +74 -0
- emdash_core/context/__init__.py +52 -0
- emdash_core/context/models.py +50 -0
- emdash_core/context/providers/__init__.py +11 -0
- emdash_core/context/providers/base.py +74 -0
- emdash_core/context/providers/explored_areas.py +183 -0
- emdash_core/context/providers/touched_areas.py +360 -0
- emdash_core/context/registry.py +73 -0
- emdash_core/context/reranker.py +199 -0
- emdash_core/context/service.py +260 -0
- emdash_core/context/session.py +352 -0
- emdash_core/core/__init__.py +104 -0
- emdash_core/core/config.py +454 -0
- emdash_core/core/exceptions.py +55 -0
- emdash_core/core/models.py +265 -0
- emdash_core/core/review_config.py +57 -0
- emdash_core/db/__init__.py +67 -0
- emdash_core/db/auth.py +134 -0
- emdash_core/db/models.py +91 -0
- emdash_core/db/provider.py +222 -0
- emdash_core/db/providers/__init__.py +5 -0
- emdash_core/db/providers/supabase.py +452 -0
- emdash_core/embeddings/__init__.py +24 -0
- emdash_core/embeddings/indexer.py +534 -0
- emdash_core/embeddings/models.py +192 -0
- emdash_core/embeddings/providers/__init__.py +7 -0
- emdash_core/embeddings/providers/base.py +112 -0
- emdash_core/embeddings/providers/fireworks.py +141 -0
- emdash_core/embeddings/providers/openai.py +104 -0
- emdash_core/embeddings/registry.py +146 -0
- emdash_core/embeddings/service.py +215 -0
- emdash_core/graph/__init__.py +26 -0
- emdash_core/graph/builder.py +134 -0
- emdash_core/graph/connection.py +692 -0
- emdash_core/graph/schema.py +416 -0
- emdash_core/graph/writer.py +667 -0
- emdash_core/ingestion/__init__.py +7 -0
- emdash_core/ingestion/change_detector.py +150 -0
- emdash_core/ingestion/git/__init__.py +5 -0
- emdash_core/ingestion/git/commit_analyzer.py +196 -0
- emdash_core/ingestion/github/__init__.py +6 -0
- emdash_core/ingestion/github/pr_fetcher.py +296 -0
- emdash_core/ingestion/github/task_extractor.py +100 -0
- emdash_core/ingestion/orchestrator.py +540 -0
- emdash_core/ingestion/parsers/__init__.py +10 -0
- emdash_core/ingestion/parsers/base_parser.py +66 -0
- emdash_core/ingestion/parsers/call_graph_builder.py +121 -0
- emdash_core/ingestion/parsers/class_extractor.py +154 -0
- emdash_core/ingestion/parsers/function_extractor.py +202 -0
- emdash_core/ingestion/parsers/import_analyzer.py +119 -0
- emdash_core/ingestion/parsers/python_parser.py +123 -0
- emdash_core/ingestion/parsers/registry.py +72 -0
- emdash_core/ingestion/parsers/ts_ast_parser.js +313 -0
- emdash_core/ingestion/parsers/typescript_parser.py +278 -0
- emdash_core/ingestion/repository.py +346 -0
- emdash_core/models/__init__.py +38 -0
- emdash_core/models/agent.py +68 -0
- emdash_core/models/index.py +77 -0
- emdash_core/models/query.py +113 -0
- emdash_core/planning/__init__.py +7 -0
- emdash_core/planning/agent_api.py +413 -0
- emdash_core/planning/context_builder.py +265 -0
- emdash_core/planning/feature_context.py +232 -0
- emdash_core/planning/feature_expander.py +646 -0
- emdash_core/planning/llm_explainer.py +198 -0
- emdash_core/planning/similarity.py +509 -0
- emdash_core/planning/team_focus.py +821 -0
- emdash_core/server.py +153 -0
- emdash_core/sse/__init__.py +5 -0
- emdash_core/sse/stream.py +196 -0
- emdash_core/swarm/__init__.py +17 -0
- emdash_core/swarm/merge_agent.py +383 -0
- emdash_core/swarm/session_manager.py +274 -0
- emdash_core/swarm/swarm_runner.py +226 -0
- emdash_core/swarm/task_definition.py +137 -0
- emdash_core/swarm/worker_spawner.py +319 -0
- emdash_core/swarm/worktree_manager.py +278 -0
- emdash_core/templates/__init__.py +10 -0
- emdash_core/templates/defaults/agent-builder.md.template +82 -0
- emdash_core/templates/defaults/focus.md.template +115 -0
- emdash_core/templates/defaults/pr-review-enhanced.md.template +309 -0
- emdash_core/templates/defaults/pr-review.md.template +80 -0
- emdash_core/templates/defaults/project.md.template +85 -0
- emdash_core/templates/defaults/research_critic.md.template +112 -0
- emdash_core/templates/defaults/research_planner.md.template +85 -0
- emdash_core/templates/defaults/research_synthesizer.md.template +128 -0
- emdash_core/templates/defaults/reviewer.md.template +81 -0
- emdash_core/templates/defaults/spec.md.template +41 -0
- emdash_core/templates/defaults/tasks.md.template +78 -0
- emdash_core/templates/loader.py +296 -0
- emdash_core/utils/__init__.py +45 -0
- emdash_core/utils/git.py +84 -0
- emdash_core/utils/image.py +502 -0
- emdash_core/utils/logger.py +51 -0
- emdash_core-0.1.7.dist-info/METADATA +35 -0
- emdash_core-0.1.7.dist-info/RECORD +187 -0
- emdash_core-0.1.7.dist-info/WHEEL +4 -0
- emdash_core-0.1.7.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Change detection for incremental indexing using git diff."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import git
|
|
8
|
+
|
|
9
|
+
from ..utils.logger import log
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class ChangedFiles:
|
|
14
|
+
"""Files changed since last indexing."""
|
|
15
|
+
|
|
16
|
+
added: list[Path] = field(default_factory=list)
|
|
17
|
+
modified: list[Path] = field(default_factory=list)
|
|
18
|
+
deleted: list[Path] = field(default_factory=list)
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def all_to_index(self) -> list[Path]:
|
|
22
|
+
"""Get all files that need to be (re)indexed."""
|
|
23
|
+
return self.added + self.modified
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def total_changes(self) -> int:
|
|
27
|
+
"""Total number of changed files."""
|
|
28
|
+
return len(self.added) + len(self.modified) + len(self.deleted)
|
|
29
|
+
|
|
30
|
+
def __bool__(self) -> bool:
|
|
31
|
+
"""True if there are any changes."""
|
|
32
|
+
return self.total_changes > 0
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ChangeDetector:
|
|
36
|
+
"""Detects files changed since last indexing using git diff."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, repo: git.Repo, last_indexed_commit: Optional[str] = None):
|
|
39
|
+
"""Initialize change detector.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
repo: Git repository
|
|
43
|
+
last_indexed_commit: SHA of commit at last index, or None for full index
|
|
44
|
+
"""
|
|
45
|
+
self.repo = repo
|
|
46
|
+
self.last_indexed_commit = last_indexed_commit
|
|
47
|
+
self.repo_root = Path(repo.working_dir)
|
|
48
|
+
|
|
49
|
+
def get_current_commit(self) -> str:
|
|
50
|
+
"""Get current HEAD commit SHA."""
|
|
51
|
+
return self.repo.head.commit.hexsha
|
|
52
|
+
|
|
53
|
+
def get_changed_files(self, extensions: list[str] = None) -> ChangedFiles:
|
|
54
|
+
"""Find files changed since last index.
|
|
55
|
+
|
|
56
|
+
Uses git diff to detect:
|
|
57
|
+
- Added files (A)
|
|
58
|
+
- Modified files (M)
|
|
59
|
+
- Deleted files (D)
|
|
60
|
+
- Renamed files (R) - treated as delete + add
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
extensions: Optional list of extensions to filter (e.g., ['.py', '.ts'])
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
ChangedFiles with categorized changes
|
|
67
|
+
"""
|
|
68
|
+
if not self.last_indexed_commit:
|
|
69
|
+
log.info("No previous index commit - full index required")
|
|
70
|
+
return ChangedFiles()
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
# Verify the commit exists
|
|
74
|
+
try:
|
|
75
|
+
old_commit = self.repo.commit(self.last_indexed_commit)
|
|
76
|
+
except git.BadName:
|
|
77
|
+
log.warning(f"Previous commit {self.last_indexed_commit[:8]} not found - full index required")
|
|
78
|
+
return ChangedFiles()
|
|
79
|
+
|
|
80
|
+
current_commit = self.repo.head.commit
|
|
81
|
+
|
|
82
|
+
# Get diff between last indexed commit and current HEAD
|
|
83
|
+
diff = old_commit.diff(current_commit)
|
|
84
|
+
|
|
85
|
+
added = []
|
|
86
|
+
modified = []
|
|
87
|
+
deleted = []
|
|
88
|
+
|
|
89
|
+
for change in diff:
|
|
90
|
+
# Handle different change types
|
|
91
|
+
if change.change_type == 'A': # Added
|
|
92
|
+
file_path = self.repo_root / change.b_path
|
|
93
|
+
if self._should_include(file_path, extensions):
|
|
94
|
+
added.append(file_path)
|
|
95
|
+
|
|
96
|
+
elif change.change_type == 'M': # Modified
|
|
97
|
+
file_path = self.repo_root / change.b_path
|
|
98
|
+
if self._should_include(file_path, extensions):
|
|
99
|
+
modified.append(file_path)
|
|
100
|
+
|
|
101
|
+
elif change.change_type == 'D': # Deleted
|
|
102
|
+
file_path = self.repo_root / change.a_path
|
|
103
|
+
if self._should_include(file_path, extensions):
|
|
104
|
+
deleted.append(file_path)
|
|
105
|
+
|
|
106
|
+
elif change.change_type == 'R': # Renamed
|
|
107
|
+
# Treat as delete old + add new
|
|
108
|
+
old_path = self.repo_root / change.a_path
|
|
109
|
+
new_path = self.repo_root / change.b_path
|
|
110
|
+
if self._should_include(old_path, extensions):
|
|
111
|
+
deleted.append(old_path)
|
|
112
|
+
if self._should_include(new_path, extensions):
|
|
113
|
+
added.append(new_path)
|
|
114
|
+
|
|
115
|
+
elif change.change_type in ('C', 'T'): # Copied or Type changed
|
|
116
|
+
file_path = self.repo_root / change.b_path
|
|
117
|
+
if self._should_include(file_path, extensions):
|
|
118
|
+
modified.append(file_path)
|
|
119
|
+
|
|
120
|
+
result = ChangedFiles(added=added, modified=modified, deleted=deleted)
|
|
121
|
+
|
|
122
|
+
log.info(f"Detected changes: {len(added)} added, {len(modified)} modified, {len(deleted)} deleted")
|
|
123
|
+
|
|
124
|
+
return result
|
|
125
|
+
|
|
126
|
+
except Exception as e:
|
|
127
|
+
log.error(f"Error detecting changes: {e}")
|
|
128
|
+
return ChangedFiles()
|
|
129
|
+
|
|
130
|
+
def _should_include(self, file_path: Path, extensions: Optional[list[str]]) -> bool:
|
|
131
|
+
"""Check if file should be included based on extension filter.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
file_path: Path to file
|
|
135
|
+
extensions: Optional list of extensions to include
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
True if file should be included
|
|
139
|
+
"""
|
|
140
|
+
if extensions is None:
|
|
141
|
+
return True
|
|
142
|
+
return file_path.suffix.lower() in [ext.lower() for ext in extensions]
|
|
143
|
+
|
|
144
|
+
def has_uncommitted_changes(self) -> bool:
|
|
145
|
+
"""Check if there are uncommitted changes in the working directory.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
True if there are uncommitted changes
|
|
149
|
+
"""
|
|
150
|
+
return self.repo.is_dirty(untracked_files=True)
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Analyze Git commit history."""
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List, Dict
|
|
7
|
+
|
|
8
|
+
from git import Repo
|
|
9
|
+
|
|
10
|
+
from ...core.models import (
|
|
11
|
+
CommitEntity,
|
|
12
|
+
AuthorEntity,
|
|
13
|
+
FileModification,
|
|
14
|
+
GitData,
|
|
15
|
+
RepositoryEntity,
|
|
16
|
+
)
|
|
17
|
+
from ...utils.logger import log
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CommitAnalyzer:
|
|
21
|
+
"""Analyzes Git commit history and extracts metadata."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, repo: Repo, max_commits: int = None):
|
|
24
|
+
"""Initialize commit analyzer.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
repo: Git repository
|
|
28
|
+
max_commits: Maximum number of commits to analyze (None = all)
|
|
29
|
+
"""
|
|
30
|
+
self.repo = repo
|
|
31
|
+
self.max_commits = max_commits
|
|
32
|
+
|
|
33
|
+
def analyze(self, repo_entity: RepositoryEntity) -> GitData:
|
|
34
|
+
"""Analyze commit history and extract Git data.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
repo_entity: Repository entity
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
GitData containing commits, authors, and modifications
|
|
41
|
+
"""
|
|
42
|
+
log.info("Analyzing Git commit history...")
|
|
43
|
+
|
|
44
|
+
commits = []
|
|
45
|
+
modifications = []
|
|
46
|
+
author_stats = defaultdict(lambda: {
|
|
47
|
+
'name': '',
|
|
48
|
+
'email': '',
|
|
49
|
+
'commits': 0,
|
|
50
|
+
'lines_added': 0,
|
|
51
|
+
'lines_deleted': 0,
|
|
52
|
+
'first_commit': None,
|
|
53
|
+
'last_commit': None,
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
# Iterate through commits
|
|
57
|
+
commit_count = 0
|
|
58
|
+
for commit in self.repo.iter_commits():
|
|
59
|
+
if self.max_commits and commit_count >= self.max_commits:
|
|
60
|
+
break
|
|
61
|
+
|
|
62
|
+
commit_entity = self._extract_commit(commit)
|
|
63
|
+
commits.append(commit_entity)
|
|
64
|
+
|
|
65
|
+
# Extract file modifications
|
|
66
|
+
file_mods = self._extract_modifications(commit)
|
|
67
|
+
modifications.extend(file_mods)
|
|
68
|
+
|
|
69
|
+
# Update author statistics
|
|
70
|
+
email = commit.author.email
|
|
71
|
+
author_stats[email]['name'] = commit.author.name
|
|
72
|
+
author_stats[email]['email'] = email
|
|
73
|
+
author_stats[email]['commits'] += 1
|
|
74
|
+
author_stats[email]['lines_added'] += commit.stats.total['insertions']
|
|
75
|
+
author_stats[email]['lines_deleted'] += commit.stats.total['deletions']
|
|
76
|
+
|
|
77
|
+
timestamp = datetime.fromtimestamp(commit.committed_date)
|
|
78
|
+
if author_stats[email]['first_commit'] is None:
|
|
79
|
+
author_stats[email]['first_commit'] = timestamp
|
|
80
|
+
author_stats[email]['last_commit'] = timestamp
|
|
81
|
+
|
|
82
|
+
commit_count += 1
|
|
83
|
+
|
|
84
|
+
# Create author entities
|
|
85
|
+
authors = [
|
|
86
|
+
AuthorEntity(
|
|
87
|
+
email=stats['email'],
|
|
88
|
+
name=stats['name'],
|
|
89
|
+
first_commit=stats['first_commit'],
|
|
90
|
+
last_commit=stats['last_commit'],
|
|
91
|
+
total_commits=stats['commits'],
|
|
92
|
+
total_lines_added=stats['lines_added'],
|
|
93
|
+
total_lines_deleted=stats['lines_deleted'],
|
|
94
|
+
)
|
|
95
|
+
for stats in author_stats.values()
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
log.info(f"Analyzed {len(commits)} commits, {len(authors)} authors")
|
|
99
|
+
|
|
100
|
+
return GitData(
|
|
101
|
+
repository=repo_entity,
|
|
102
|
+
commits=commits,
|
|
103
|
+
modifications=modifications,
|
|
104
|
+
authors=authors,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
def _extract_commit(self, commit) -> CommitEntity:
|
|
108
|
+
"""Extract a commit entity from a git commit.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
commit: GitPython commit object
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
CommitEntity
|
|
115
|
+
"""
|
|
116
|
+
parent_shas = [parent.hexsha for parent in commit.parents]
|
|
117
|
+
|
|
118
|
+
return CommitEntity(
|
|
119
|
+
sha=commit.hexsha,
|
|
120
|
+
message=commit.message,
|
|
121
|
+
timestamp=datetime.fromtimestamp(commit.committed_date),
|
|
122
|
+
author_name=commit.author.name,
|
|
123
|
+
author_email=commit.author.email,
|
|
124
|
+
committer_name=commit.committer.name,
|
|
125
|
+
committer_email=commit.committer.email,
|
|
126
|
+
insertions=commit.stats.total['insertions'],
|
|
127
|
+
deletions=commit.stats.total['deletions'],
|
|
128
|
+
files_changed=commit.stats.total['files'],
|
|
129
|
+
is_merge=len(parent_shas) > 1,
|
|
130
|
+
parent_shas=parent_shas,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def _extract_modifications(self, commit) -> List[FileModification]:
|
|
134
|
+
"""Extract file modifications from a commit.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
commit: GitPython commit object
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
List of FileModification objects
|
|
141
|
+
"""
|
|
142
|
+
modifications = []
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
# Get diffs from parent (if exists)
|
|
146
|
+
if commit.parents:
|
|
147
|
+
diffs = commit.parents[0].diff(commit, create_patch=True)
|
|
148
|
+
else:
|
|
149
|
+
# First commit - all files are new
|
|
150
|
+
diffs = commit.diff(None, create_patch=True)
|
|
151
|
+
|
|
152
|
+
for diff in diffs:
|
|
153
|
+
# Determine change type
|
|
154
|
+
if diff.new_file:
|
|
155
|
+
change_type = "added"
|
|
156
|
+
elif diff.deleted_file:
|
|
157
|
+
change_type = "deleted"
|
|
158
|
+
elif diff.renamed_file:
|
|
159
|
+
change_type = "renamed"
|
|
160
|
+
else:
|
|
161
|
+
change_type = "modified"
|
|
162
|
+
|
|
163
|
+
# Get file path (handle renames)
|
|
164
|
+
relative_path = diff.b_path if diff.b_path else diff.a_path
|
|
165
|
+
old_relative_path = diff.a_path if diff.renamed_file else None
|
|
166
|
+
|
|
167
|
+
# Convert relative paths to absolute paths (matching File nodes)
|
|
168
|
+
repo_root = Path(self.repo.working_dir)
|
|
169
|
+
file_path = str(repo_root / relative_path) if relative_path else None
|
|
170
|
+
old_path = str(repo_root / old_relative_path) if old_relative_path else None
|
|
171
|
+
|
|
172
|
+
# Calculate insertions/deletions (rough estimate from diff stats)
|
|
173
|
+
insertions = 0
|
|
174
|
+
deletions = 0
|
|
175
|
+
|
|
176
|
+
if hasattr(diff, 'diff') and diff.diff:
|
|
177
|
+
diff_text = diff.diff.decode('utf-8', errors='ignore')
|
|
178
|
+
for line in diff_text.split('\n'):
|
|
179
|
+
if line.startswith('+') and not line.startswith('+++'):
|
|
180
|
+
insertions += 1
|
|
181
|
+
elif line.startswith('-') and not line.startswith('---'):
|
|
182
|
+
deletions += 1
|
|
183
|
+
|
|
184
|
+
modifications.append(FileModification(
|
|
185
|
+
commit_sha=commit.hexsha,
|
|
186
|
+
file_path=file_path,
|
|
187
|
+
change_type=change_type,
|
|
188
|
+
insertions=insertions,
|
|
189
|
+
deletions=deletions,
|
|
190
|
+
old_path=old_path,
|
|
191
|
+
))
|
|
192
|
+
|
|
193
|
+
except Exception as e:
|
|
194
|
+
log.warning(f"Failed to extract modifications for commit {commit.hexsha}: {e}")
|
|
195
|
+
|
|
196
|
+
return modifications
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
"""GitHub Pull Request fetcher using gh CLI."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import shutil
|
|
7
|
+
import subprocess
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from ...core.models import PullRequestEntity
|
|
12
|
+
from ...utils.logger import log
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PRFetcher:
|
|
16
|
+
"""Fetches pull requests using gh CLI (supports private repos)."""
|
|
17
|
+
|
|
18
|
+
# Pattern to extract owner/repo from GitHub URLs
|
|
19
|
+
GITHUB_URL_PATTERN = re.compile(
|
|
20
|
+
r"(?:https?://)?(?:www\.)?github\.com[/:]([^/]+)/([^/.]+)(?:\.git)?/?$"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
owner: str,
|
|
26
|
+
repo: str,
|
|
27
|
+
token: Optional[str] = None,
|
|
28
|
+
):
|
|
29
|
+
"""Initialize PR fetcher.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
owner: GitHub repository owner
|
|
33
|
+
repo: GitHub repository name
|
|
34
|
+
token: GitHub personal access token (ignored, uses gh CLI auth)
|
|
35
|
+
"""
|
|
36
|
+
self.owner = owner
|
|
37
|
+
self.repo = repo
|
|
38
|
+
self.repo_path = f"{owner}/{repo}"
|
|
39
|
+
|
|
40
|
+
# Find gh CLI binary
|
|
41
|
+
self.gh_path = self._find_gh_cli()
|
|
42
|
+
if not self.gh_path:
|
|
43
|
+
log.warning(
|
|
44
|
+
"gh CLI not found. Install with 'brew install gh' and authenticate with 'gh auth login'. "
|
|
45
|
+
"PR fetching will be skipped."
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def _find_gh_cli(self) -> Optional[str]:
|
|
49
|
+
"""Find the gh CLI binary path."""
|
|
50
|
+
# Check common locations
|
|
51
|
+
paths_to_check = [
|
|
52
|
+
shutil.which("gh"),
|
|
53
|
+
"/opt/homebrew/bin/gh",
|
|
54
|
+
"/usr/local/bin/gh",
|
|
55
|
+
"/usr/bin/gh",
|
|
56
|
+
]
|
|
57
|
+
for path in paths_to_check:
|
|
58
|
+
if path and shutil.os.path.isfile(path):
|
|
59
|
+
return path
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
def _run_gh(self, args: list[str]) -> Optional[str]:
|
|
63
|
+
"""Run a gh CLI command and return output."""
|
|
64
|
+
if not self.gh_path:
|
|
65
|
+
return None
|
|
66
|
+
try:
|
|
67
|
+
# Copy environment but remove GITHUB_TOKEN to let gh use its own OAuth
|
|
68
|
+
env = os.environ.copy()
|
|
69
|
+
env.pop("GITHUB_TOKEN", None)
|
|
70
|
+
env.pop("GH_TOKEN", None)
|
|
71
|
+
|
|
72
|
+
result = subprocess.run(
|
|
73
|
+
[self.gh_path] + args,
|
|
74
|
+
capture_output=True,
|
|
75
|
+
text=True,
|
|
76
|
+
timeout=120,
|
|
77
|
+
env=env,
|
|
78
|
+
)
|
|
79
|
+
if result.returncode == 0:
|
|
80
|
+
return result.stdout
|
|
81
|
+
else:
|
|
82
|
+
log.error(f"gh CLI error: {result.stderr}")
|
|
83
|
+
return None
|
|
84
|
+
except subprocess.TimeoutExpired:
|
|
85
|
+
log.error("gh CLI command timed out")
|
|
86
|
+
return None
|
|
87
|
+
except Exception as e:
|
|
88
|
+
log.error(f"gh CLI error: {e}")
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def extract_repo_info(cls, remote_url: str) -> tuple[Optional[str], Optional[str]]:
|
|
93
|
+
"""Extract owner and repo name from a GitHub remote URL.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
remote_url: Git remote URL (HTTPS or SSH format)
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Tuple of (owner, repo) or (None, None) if not a GitHub URL
|
|
100
|
+
|
|
101
|
+
Examples:
|
|
102
|
+
>>> PRFetcher.extract_repo_info("https://github.com/owner/repo.git")
|
|
103
|
+
('owner', 'repo')
|
|
104
|
+
>>> PRFetcher.extract_repo_info("git@github.com:owner/repo.git")
|
|
105
|
+
('owner', 'repo')
|
|
106
|
+
"""
|
|
107
|
+
# Handle SSH format: git@github.com:owner/repo.git
|
|
108
|
+
if remote_url.startswith("git@github.com:"):
|
|
109
|
+
parts = remote_url.replace("git@github.com:", "").replace(".git", "").split("/")
|
|
110
|
+
if len(parts) >= 2:
|
|
111
|
+
return parts[0], parts[1]
|
|
112
|
+
|
|
113
|
+
# Handle HTTPS format
|
|
114
|
+
match = cls.GITHUB_URL_PATTERN.match(remote_url)
|
|
115
|
+
if match:
|
|
116
|
+
return match.group(1), match.group(2)
|
|
117
|
+
|
|
118
|
+
return None, None
|
|
119
|
+
|
|
120
|
+
def fetch_prs(
|
|
121
|
+
self,
|
|
122
|
+
state: str = "all",
|
|
123
|
+
limit: Optional[int] = 100,
|
|
124
|
+
since: Optional[datetime] = None,
|
|
125
|
+
) -> list[PullRequestEntity]:
|
|
126
|
+
"""Fetch pull requests from the repository using gh CLI.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
state: PR state filter ("open", "closed", "all")
|
|
130
|
+
limit: Maximum number of PRs to fetch (None for all)
|
|
131
|
+
since: Only fetch PRs updated after this datetime
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
List of PullRequestEntity objects
|
|
135
|
+
"""
|
|
136
|
+
if not self.gh_path:
|
|
137
|
+
log.error("gh CLI not available. Cannot fetch PRs.")
|
|
138
|
+
return []
|
|
139
|
+
|
|
140
|
+
prs = []
|
|
141
|
+
|
|
142
|
+
# Map state to gh CLI format
|
|
143
|
+
states_to_fetch = []
|
|
144
|
+
if state == "all":
|
|
145
|
+
states_to_fetch = ["open", "closed", "merged"]
|
|
146
|
+
elif state == "closed":
|
|
147
|
+
states_to_fetch = ["closed", "merged"]
|
|
148
|
+
else:
|
|
149
|
+
states_to_fetch = [state]
|
|
150
|
+
|
|
151
|
+
for pr_state in states_to_fetch:
|
|
152
|
+
# Build gh pr list command - request minimal fields to avoid GraphQL limits
|
|
153
|
+
args = [
|
|
154
|
+
"pr", "list",
|
|
155
|
+
"-R", self.repo_path,
|
|
156
|
+
"--state", pr_state,
|
|
157
|
+
"--limit", str(limit or 100),
|
|
158
|
+
"--json", "number,title,body,state,createdAt,author,mergedAt,labels,additions,deletions,baseRefName,headRefName"
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
output = self._run_gh(args)
|
|
162
|
+
if not output:
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
pr_list = json.loads(output)
|
|
167
|
+
except json.JSONDecodeError as e:
|
|
168
|
+
log.error(f"Failed to parse gh output: {e}")
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
for pr_data in pr_list:
|
|
172
|
+
# Check date filter
|
|
173
|
+
if since:
|
|
174
|
+
created_at = self._parse_datetime(pr_data.get("createdAt"))
|
|
175
|
+
if created_at and created_at < since:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
pr_entity = self._extract_pr_from_json(pr_data)
|
|
179
|
+
prs.append(pr_entity)
|
|
180
|
+
|
|
181
|
+
if len(prs) % 10 == 0:
|
|
182
|
+
log.info(f"Fetched {len(prs)} PRs...")
|
|
183
|
+
|
|
184
|
+
# Check limit
|
|
185
|
+
if limit and len(prs) >= limit:
|
|
186
|
+
break
|
|
187
|
+
|
|
188
|
+
if limit and len(prs) >= limit:
|
|
189
|
+
break
|
|
190
|
+
|
|
191
|
+
log.info(f"Fetched {len(prs)} pull requests from {self.repo_path}")
|
|
192
|
+
return prs
|
|
193
|
+
|
|
194
|
+
def fetch_pr_files(self, pr_number: int) -> list[str]:
|
|
195
|
+
"""Get files changed in a specific PR.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
pr_number: Pull request number
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
List of file paths that were modified
|
|
202
|
+
"""
|
|
203
|
+
if not self.gh_path:
|
|
204
|
+
return []
|
|
205
|
+
|
|
206
|
+
args = [
|
|
207
|
+
"pr", "view", str(pr_number),
|
|
208
|
+
"-R", self.repo_path,
|
|
209
|
+
"--json", "files"
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
output = self._run_gh(args)
|
|
213
|
+
if not output:
|
|
214
|
+
return []
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
data = json.loads(output)
|
|
218
|
+
return [f.get("path", "") for f in data.get("files", [])]
|
|
219
|
+
except json.JSONDecodeError:
|
|
220
|
+
return []
|
|
221
|
+
|
|
222
|
+
def _parse_datetime(self, dt_str: Optional[str]) -> Optional[datetime]:
|
|
223
|
+
"""Parse ISO datetime string."""
|
|
224
|
+
if not dt_str:
|
|
225
|
+
return None
|
|
226
|
+
try:
|
|
227
|
+
# Handle ISO format with Z suffix
|
|
228
|
+
dt_str = dt_str.replace("Z", "+00:00")
|
|
229
|
+
return datetime.fromisoformat(dt_str)
|
|
230
|
+
except (ValueError, AttributeError):
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
def _extract_pr_from_json(self, pr_data: dict) -> PullRequestEntity:
|
|
234
|
+
"""Extract PR data from gh CLI JSON output.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
pr_data: PR data from gh CLI
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
PullRequestEntity with extracted data
|
|
241
|
+
"""
|
|
242
|
+
# Determine state
|
|
243
|
+
state = pr_data.get("state", "open").lower()
|
|
244
|
+
if pr_data.get("mergedAt"):
|
|
245
|
+
state = "merged"
|
|
246
|
+
|
|
247
|
+
# Get reviewers
|
|
248
|
+
reviewers = set()
|
|
249
|
+
for req in pr_data.get("reviewRequests", []):
|
|
250
|
+
if isinstance(req, dict) and req.get("login"):
|
|
251
|
+
reviewers.add(req["login"])
|
|
252
|
+
for review in pr_data.get("reviews", []):
|
|
253
|
+
if isinstance(review, dict) and review.get("author", {}).get("login"):
|
|
254
|
+
reviewers.add(review["author"]["login"])
|
|
255
|
+
|
|
256
|
+
# Get commit SHAs
|
|
257
|
+
commit_shas = []
|
|
258
|
+
for commit in pr_data.get("commits", []):
|
|
259
|
+
if isinstance(commit, dict) and commit.get("oid"):
|
|
260
|
+
commit_shas.append(commit["oid"])
|
|
261
|
+
|
|
262
|
+
# Get files changed
|
|
263
|
+
files_changed = []
|
|
264
|
+
for f in pr_data.get("files", []):
|
|
265
|
+
if isinstance(f, dict) and f.get("path"):
|
|
266
|
+
files_changed.append(f["path"])
|
|
267
|
+
|
|
268
|
+
# Get labels
|
|
269
|
+
labels = []
|
|
270
|
+
for label in pr_data.get("labels", []):
|
|
271
|
+
if isinstance(label, dict) and label.get("name"):
|
|
272
|
+
labels.append(label["name"])
|
|
273
|
+
|
|
274
|
+
# Get author
|
|
275
|
+
author = "unknown"
|
|
276
|
+
author_data = pr_data.get("author")
|
|
277
|
+
if isinstance(author_data, dict):
|
|
278
|
+
author = author_data.get("login", "unknown")
|
|
279
|
+
|
|
280
|
+
return PullRequestEntity(
|
|
281
|
+
number=pr_data.get("number", 0),
|
|
282
|
+
title=pr_data.get("title", ""),
|
|
283
|
+
description=pr_data.get("body"),
|
|
284
|
+
state=state,
|
|
285
|
+
created_at=self._parse_datetime(pr_data.get("createdAt")),
|
|
286
|
+
author=author,
|
|
287
|
+
merged_at=self._parse_datetime(pr_data.get("mergedAt")),
|
|
288
|
+
reviewers=list(reviewers),
|
|
289
|
+
labels=labels,
|
|
290
|
+
additions=pr_data.get("additions", 0),
|
|
291
|
+
deletions=pr_data.get("deletions", 0),
|
|
292
|
+
files_changed=files_changed,
|
|
293
|
+
commit_shas=commit_shas,
|
|
294
|
+
base_branch=pr_data.get("baseRefName", "main"),
|
|
295
|
+
head_branch=pr_data.get("headRefName", ""),
|
|
296
|
+
)
|