git-aware-coding-agent 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. avos_cli/__init__.py +3 -0
  2. avos_cli/agents/avos_ask_agent.md +47 -0
  3. avos_cli/agents/avos_ask_agent_JSON_converter.md +78 -0
  4. avos_cli/agents/avos_hisotry_agent_JSON_converter.md +92 -0
  5. avos_cli/agents/avos_history_agent.md +58 -0
  6. avos_cli/agents/git_diff_agent.md +63 -0
  7. avos_cli/artifacts/__init__.py +17 -0
  8. avos_cli/artifacts/base.py +47 -0
  9. avos_cli/artifacts/commit_builder.py +35 -0
  10. avos_cli/artifacts/doc_builder.py +30 -0
  11. avos_cli/artifacts/issue_builder.py +37 -0
  12. avos_cli/artifacts/pr_builder.py +50 -0
  13. avos_cli/cli/__init__.py +1 -0
  14. avos_cli/cli/main.py +504 -0
  15. avos_cli/commands/__init__.py +1 -0
  16. avos_cli/commands/ask.py +541 -0
  17. avos_cli/commands/connect.py +363 -0
  18. avos_cli/commands/history.py +549 -0
  19. avos_cli/commands/hook_install.py +260 -0
  20. avos_cli/commands/hook_sync.py +231 -0
  21. avos_cli/commands/ingest.py +506 -0
  22. avos_cli/commands/ingest_pr.py +239 -0
  23. avos_cli/config/__init__.py +1 -0
  24. avos_cli/config/hash_store.py +93 -0
  25. avos_cli/config/lock.py +122 -0
  26. avos_cli/config/manager.py +180 -0
  27. avos_cli/config/state.py +90 -0
  28. avos_cli/exceptions.py +272 -0
  29. avos_cli/models/__init__.py +58 -0
  30. avos_cli/models/api.py +75 -0
  31. avos_cli/models/artifacts.py +99 -0
  32. avos_cli/models/config.py +56 -0
  33. avos_cli/models/diff.py +117 -0
  34. avos_cli/models/query.py +234 -0
  35. avos_cli/parsers/__init__.py +21 -0
  36. avos_cli/parsers/artifact_ref_extractor.py +173 -0
  37. avos_cli/parsers/reference_parser.py +117 -0
  38. avos_cli/services/__init__.py +1 -0
  39. avos_cli/services/chronology_service.py +68 -0
  40. avos_cli/services/citation_validator.py +134 -0
  41. avos_cli/services/context_budget_service.py +104 -0
  42. avos_cli/services/diff_resolver.py +398 -0
  43. avos_cli/services/diff_summary_service.py +141 -0
  44. avos_cli/services/git_client.py +351 -0
  45. avos_cli/services/github_client.py +443 -0
  46. avos_cli/services/llm_client.py +312 -0
  47. avos_cli/services/memory_client.py +323 -0
  48. avos_cli/services/query_fallback_formatter.py +108 -0
  49. avos_cli/services/reply_output_service.py +341 -0
  50. avos_cli/services/sanitization_service.py +218 -0
  51. avos_cli/utils/__init__.py +1 -0
  52. avos_cli/utils/dotenv_load.py +50 -0
  53. avos_cli/utils/hashing.py +22 -0
  54. avos_cli/utils/logger.py +77 -0
  55. avos_cli/utils/output.py +232 -0
  56. avos_cli/utils/sanitization_diagnostics.py +81 -0
  57. avos_cli/utils/time_helpers.py +56 -0
  58. git_aware_coding_agent-1.0.0.dist-info/METADATA +390 -0
  59. git_aware_coding_agent-1.0.0.dist-info/RECORD +62 -0
  60. git_aware_coding_agent-1.0.0.dist-info/WHEEL +4 -0
  61. git_aware_coding_agent-1.0.0.dist-info/entry_points.txt +2 -0
  62. git_aware_coding_agent-1.0.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,68 @@
1
+ """Chronology service for deterministic timeline ordering.
2
+
3
+ Parses ISO 8601 timestamps, normalizes to UTC, and sorts artifacts
4
+ using a stable deterministic comparator for the history command pipeline.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime, timezone
10
+
11
+ from avos_cli.models.query import RetrievedArtifact
12
+ from avos_cli.utils.logger import get_logger
13
+
14
+ _log = get_logger("chronology")
15
+
16
+ _MAX_DATETIME = datetime(9999, 12, 31, 23, 59, 59, tzinfo=timezone.utc)
17
+
18
+
19
+ class ChronologyService:
20
+ """Sorts artifacts chronologically with deterministic tie-breakers.
21
+
22
+ Sort contract: timestamp ASC, rank ASC, note_id ASC.
23
+ Invalid/null timestamps are sorted last.
24
+ """
25
+
26
+ def sort(self, artifacts: list[RetrievedArtifact]) -> list[RetrievedArtifact]:
27
+ """Sort artifacts in chronological order.
28
+
29
+ Args:
30
+ artifacts: Artifacts to sort (not mutated).
31
+
32
+ Returns:
33
+ New list sorted by (timestamp ASC, rank ASC, note_id ASC).
34
+ """
35
+ return sorted(artifacts, key=self._sort_key)
36
+
37
+ def _sort_key(self, art: RetrievedArtifact) -> tuple[datetime, int, str]:
38
+ """Build deterministic sort key for an artifact."""
39
+ ts = self._parse_timestamp(art.created_at)
40
+ return (ts, art.rank, art.note_id)
41
+
42
+ def _parse_timestamp(self, value: str) -> datetime:
43
+ """Parse ISO 8601 timestamp to UTC datetime.
44
+
45
+ Invalid or empty values return _MAX_DATETIME (sorted last).
46
+ Naive datetimes are assumed UTC.
47
+ """
48
+ if not value or not value.strip():
49
+ return _MAX_DATETIME
50
+
51
+ cleaned = value.strip()
52
+
53
+ for fmt in (
54
+ "%Y-%m-%dT%H:%M:%S%z",
55
+ "%Y-%m-%dT%H:%M:%SZ",
56
+ "%Y-%m-%dT%H:%M:%S",
57
+ "%Y-%m-%d",
58
+ ):
59
+ try:
60
+ dt = datetime.strptime(cleaned, fmt)
61
+ if dt.tzinfo is None:
62
+ dt = dt.replace(tzinfo=timezone.utc)
63
+ return dt.astimezone(timezone.utc)
64
+ except ValueError:
65
+ continue
66
+
67
+ _log.warning("Unparseable timestamp '%s', sorting last", value)
68
+ return _MAX_DATETIME
@@ -0,0 +1,134 @@
1
+ """Citation validator for query pipeline grounding enforcement.
2
+
3
+ Extracts citations from LLM synthesis responses and validates each
4
+ against retrieved artifact note_ids. Ungrounded citations are dropped
5
+ with warnings. Supports structured JSON and inline [note_id] fallback.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import re
12
+
13
+ from avos_cli.models.query import (
14
+ GroundedCitation,
15
+ GroundingStatus,
16
+ ReferenceType,
17
+ SanitizedArtifact,
18
+ )
19
+ from avos_cli.utils.logger import get_logger
20
+
21
+ _log = get_logger("citation_validator")
22
+
23
+ _INLINE_CITATION_PATTERN = re.compile(r"\[([a-zA-Z0-9_\-]+)\]")
24
+
25
+ _MIN_GROUNDED_THRESHOLD = 2
26
+
27
+
28
+ class CitationValidator:
29
+ """Validates LLM synthesis citations against retrieved artifacts.
30
+
31
+ Grounding rule: exact note_id match only. No fuzzy matching.
32
+ Minimum threshold: 2 grounded citations for synthesis acceptance.
33
+ """
34
+
35
+ def validate(
36
+ self,
37
+ response_text: str,
38
+ artifacts: list[SanitizedArtifact],
39
+ ) -> tuple[list[GroundedCitation], list[GroundedCitation], list[str]]:
40
+ """Validate citations in LLM response against retrieved artifacts.
41
+
42
+ Args:
43
+ response_text: Raw LLM response text (may contain JSON or inline refs).
44
+ artifacts: The sanitized artifacts that were sent to the LLM.
45
+
46
+ Returns:
47
+ Tuple of (grounded_citations, dropped_citations, warnings).
48
+ """
49
+ valid_ids = {art.note_id for art in artifacts}
50
+ raw_citations = self._extract_citations(response_text)
51
+
52
+ seen: set[str] = set()
53
+ grounded: list[GroundedCitation] = []
54
+ dropped: list[GroundedCitation] = []
55
+ warnings: list[str] = []
56
+
57
+ for note_id, display_label in raw_citations:
58
+ if note_id in seen:
59
+ continue
60
+ seen.add(note_id)
61
+
62
+ if note_id in valid_ids:
63
+ grounded.append(
64
+ GroundedCitation(
65
+ note_id=note_id,
66
+ display_label=display_label or note_id,
67
+ reference_type=ReferenceType.NOTE_ID,
68
+ grounding_status=GroundingStatus.GROUNDED,
69
+ )
70
+ )
71
+ else:
72
+ dropped.append(
73
+ GroundedCitation(
74
+ note_id=note_id,
75
+ display_label=display_label or note_id,
76
+ reference_type=ReferenceType.NOTE_ID,
77
+ grounding_status=GroundingStatus.DROPPED_UNVERIFIABLE,
78
+ )
79
+ )
80
+
81
+ if dropped:
82
+ warnings.append(
83
+ f"{len(dropped)} citation(s) dropped as unverifiable: "
84
+ f"{', '.join(c.note_id for c in dropped)}"
85
+ )
86
+
87
+ return grounded, dropped, warnings
88
+
89
+ def _extract_citations(
90
+ self, response_text: str
91
+ ) -> list[tuple[str, str | None]]:
92
+ """Extract citation note_ids from response, preferring structured JSON.
93
+
94
+ Returns:
95
+ List of (note_id, display_label_or_none) tuples.
96
+ """
97
+ structured = self._try_structured_extraction(response_text)
98
+ if structured is not None:
99
+ return structured
100
+ return self._inline_extraction(response_text)
101
+
102
+ def _try_structured_extraction(
103
+ self, response_text: str
104
+ ) -> list[tuple[str, str | None]] | None:
105
+ """Try to parse citations from JSON structure in response."""
106
+ cleaned = re.sub(r"^```(?:json)?\s*\n?", "", response_text.strip())
107
+ cleaned = re.sub(r"\n?```\s*$", "", cleaned)
108
+ try:
109
+ data = json.loads(cleaned)
110
+ except (json.JSONDecodeError, TypeError):
111
+ return None
112
+
113
+ if not isinstance(data, dict):
114
+ return None
115
+
116
+ citations_raw = data.get("citations")
117
+ if not isinstance(citations_raw, list):
118
+ return None
119
+
120
+ results: list[tuple[str, str | None]] = []
121
+ for item in citations_raw:
122
+ if isinstance(item, dict) and "note_id" in item:
123
+ note_id = str(item["note_id"])
124
+ display_label = item.get("display_label")
125
+ results.append((note_id, display_label))
126
+
127
+ return results if results else None
128
+
129
+ def _inline_extraction(
130
+ self, response_text: str
131
+ ) -> list[tuple[str, str | None]]:
132
+ """Fallback: extract [note_id] patterns from plain text."""
133
+ matches = _INLINE_CITATION_PATTERN.findall(response_text)
134
+ return [(m, None) for m in matches]
@@ -0,0 +1,104 @@
1
+ """Context budget service for query pipeline artifact packing.
2
+
3
+ Applies deterministic ranking, truncation, and hard caps to sanitized
4
+ artifacts before LLM synthesis. Ensures stable ordering across runs
5
+ and preserves citation metadata for downstream grounding.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from avos_cli.models.query import BudgetResult, SanitizedArtifact
11
+ from avos_cli.utils.logger import get_logger
12
+
13
+ _log = get_logger("context_budget")
14
+
15
+ _ASK_MAX_ARTIFACTS = 6
16
+ _ASK_EXCERPT_CAP = 800
17
+ _HISTORY_MAX_ARTIFACTS = 10
18
+ _HISTORY_EXCERPT_CAP = 600
19
+
20
+ _EPOCH = "1970-01-01T00:00:00Z"
21
+ _MAX_RANK = 999999999
22
+
23
+
24
+ class ContextBudgetService:
25
+ """Packs sanitized artifacts within model context budget.
26
+
27
+ Sorting contract: rank ASC, created_at DESC, note_id ASC.
28
+ Null rank -> max int (sorted last). Empty created_at -> epoch (sorted last for DESC).
29
+ """
30
+
31
+ def pack(
32
+ self,
33
+ artifacts: list[SanitizedArtifact],
34
+ mode: str,
35
+ ) -> BudgetResult:
36
+ """Rank, sort, truncate, and cap artifacts for synthesis.
37
+
38
+ Args:
39
+ artifacts: Sanitized artifacts to pack.
40
+ mode: 'ask' or 'history' -- determines caps and excerpt limits.
41
+
42
+ Returns:
43
+ BudgetResult with included/excluded artifacts and metadata.
44
+ """
45
+ if mode == "ask":
46
+ max_count = _ASK_MAX_ARTIFACTS
47
+ excerpt_cap = _ASK_EXCERPT_CAP
48
+ else:
49
+ max_count = _HISTORY_MAX_ARTIFACTS
50
+ excerpt_cap = _HISTORY_EXCERPT_CAP
51
+
52
+ sorted_arts = sorted(artifacts, key=self._sort_key)
53
+
54
+ included_raw = sorted_arts[:max_count]
55
+ excluded_raw = sorted_arts[max_count:]
56
+
57
+ included: list[SanitizedArtifact] = []
58
+ truncation_flags: dict[str, bool] = {}
59
+
60
+ for art in included_raw:
61
+ truncated_content, was_truncated = self._truncate(art.content, excerpt_cap)
62
+ truncation_flags[art.note_id] = was_truncated
63
+ if was_truncated:
64
+ included.append(
65
+ SanitizedArtifact(
66
+ note_id=art.note_id,
67
+ content=truncated_content,
68
+ created_at=art.created_at,
69
+ rank=art.rank,
70
+ source_type=art.source_type,
71
+ display_ref=art.display_ref,
72
+ redaction_applied=art.redaction_applied,
73
+ redaction_types=art.redaction_types,
74
+ )
75
+ )
76
+ else:
77
+ truncation_flags[art.note_id] = False
78
+ included.append(art)
79
+
80
+ return BudgetResult(
81
+ included=included,
82
+ excluded=list(excluded_raw),
83
+ truncation_flags=truncation_flags,
84
+ included_count=len(included),
85
+ excluded_count=len(excluded_raw),
86
+ )
87
+
88
+ def _sort_key(self, art: SanitizedArtifact) -> tuple[int, str, str]:
89
+ """Deterministic sort key: rank ASC, created_at DESC, note_id ASC.
90
+
91
+ For DESC on created_at, we negate by using a complement string trick:
92
+ reverse the sort by prepending a character that inverts ordering.
93
+ """
94
+ rank = art.rank if art.rank < _MAX_RANK else _MAX_RANK
95
+ created_at = art.created_at if art.created_at else _EPOCH
96
+ # For descending created_at: invert by negating character ordinals
97
+ inverted_date = "".join(chr(0xFFFF - ord(c)) for c in created_at)
98
+ return (rank, inverted_date, art.note_id)
99
+
100
+ def _truncate(self, content: str, cap: int) -> tuple[str, bool]:
101
+ """Truncate content to cap with '...' marker if needed."""
102
+ if len(content) <= cap:
103
+ return content, False
104
+ return content[:cap] + "...", True
@@ -0,0 +1,398 @@
1
+ """Diff resolver for PR and commit references.
2
+
3
+ Implements the PR-Wins deduplication strategy: commits that are part of
4
+ a referenced PR are suppressed to avoid duplicate content. Extracts
5
+ unified diffs from the GitHub REST API for both PRs and commits.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import TYPE_CHECKING
11
+
12
+ from avos_cli.exceptions import AvosError
13
+ from avos_cli.models.diff import (
14
+ DedupDecision,
15
+ DedupPlanItem,
16
+ DiffReferenceType,
17
+ DiffResult,
18
+ DiffStatus,
19
+ ParsedReference,
20
+ ResolvedReference,
21
+ )
22
+ from avos_cli.utils.logger import get_logger
23
+
24
+ if TYPE_CHECKING:
25
+ from avos_cli.services.github_client import GitHubClient
26
+
27
+ _log = get_logger("diff_resolver")
28
+
29
+
30
+ class DiffResolver:
31
+ """Resolves PR and commit references to unified diffs via GitHub API.
32
+
33
+ Implements the PR-Wins deduplication strategy:
34
+ 1. Build coverage index: map each commit SHA to PRs containing it
35
+ 2. For each commit reference, check if it's covered by any PR
36
+ 3. Suppress covered commits, keep independent commits
37
+ 4. Extract diffs for all kept references
38
+
39
+ Args:
40
+ github_client: GitHub REST API client for PR and commit endpoints.
41
+ """
42
+
43
+ def __init__(self, github_client: GitHubClient) -> None:
44
+ self._github = github_client
45
+
46
+ def resolve(self, references: list[ParsedReference]) -> list[DiffResult]:
47
+ """Resolve references to diffs with PR-Wins deduplication.
48
+
49
+ Args:
50
+ references: List of parsed PR/commit references.
51
+
52
+ Returns:
53
+ List of DiffResult objects with extracted diffs or status info.
54
+ """
55
+ if not references:
56
+ return []
57
+
58
+ pr_refs = [r for r in references if r.reference_type == DiffReferenceType.PR]
59
+ coverage_index = self._build_coverage_index(pr_refs)
60
+ dedup_plan = self._apply_dedup(references, coverage_index)
61
+
62
+ results: list[DiffResult] = []
63
+ for plan_item in dedup_plan:
64
+ result = self._extract_diff(plan_item)
65
+ results.append(result)
66
+
67
+ return results
68
+
69
+ def _build_coverage_index(
70
+ self, pr_refs: list[ParsedReference]
71
+ ) -> dict[str, set[int]]:
72
+ """Build index mapping commit SHAs to PR numbers that contain them.
73
+
74
+ Args:
75
+ pr_refs: List of PR references to index.
76
+
77
+ Returns:
78
+ Dict mapping full commit SHA to set of PR numbers.
79
+ """
80
+ index: dict[str, set[int]] = {}
81
+
82
+ for pr_ref in pr_refs:
83
+ if pr_ref.repo_slug is None:
84
+ continue
85
+
86
+ pr_number = int(pr_ref.raw_id)
87
+ owner, repo = pr_ref.repo_slug.split("/", 1)
88
+
89
+ try:
90
+ commit_shas = self._github.list_pr_commits(owner, repo, pr_number)
91
+ for sha in commit_shas:
92
+ if sha not in index:
93
+ index[sha] = set()
94
+ index[sha].add(pr_number)
95
+ except AvosError as e:
96
+ _log.warning("Failed to fetch commits for PR #%d: %s", pr_number, e)
97
+
98
+ return index
99
+
100
+ def _apply_dedup(
101
+ self,
102
+ references: list[ParsedReference],
103
+ coverage_index: dict[str, set[int]],
104
+ ) -> list[DedupPlanItem]:
105
+ """Apply PR-Wins deduplication rule to all references.
106
+
107
+ Args:
108
+ references: All parsed references.
109
+ coverage_index: Mapping of commit SHA to covering PR numbers.
110
+
111
+ Returns:
112
+ List of DedupPlanItem with keep/suppress decisions.
113
+ """
114
+ plan: list[DedupPlanItem] = []
115
+
116
+ for ref in references:
117
+ if ref.reference_type == DiffReferenceType.PR:
118
+ resolved = self._resolve_pr_reference(ref)
119
+ if resolved is None:
120
+ plan.append(self._make_unresolved_plan_item(ref, "PR resolution failed"))
121
+ else:
122
+ plan.append(
123
+ DedupPlanItem(reference=resolved, decision=DedupDecision.KEEP)
124
+ )
125
+ else:
126
+ resolved, commit_detail = self._resolve_commit_reference(ref)
127
+ if resolved is None:
128
+ plan.append(
129
+ self._make_unresolved_plan_item(
130
+ ref,
131
+ commit_detail or "Commit SHA expansion failed",
132
+ )
133
+ )
134
+ continue
135
+
136
+ full_sha = resolved.full_sha
137
+ if full_sha and full_sha in coverage_index:
138
+ covering_prs = coverage_index[full_sha]
139
+ first_pr = min(covering_prs)
140
+ plan.append(
141
+ DedupPlanItem(
142
+ reference=resolved,
143
+ decision=DedupDecision.SUPPRESS_COVERED_BY_PR,
144
+ covered_by_pr=first_pr,
145
+ reason=f"Covered by PR #{first_pr}",
146
+ )
147
+ )
148
+ else:
149
+ plan.append(
150
+ DedupPlanItem(reference=resolved, decision=DedupDecision.KEEP)
151
+ )
152
+
153
+ return plan
154
+
155
+ def _resolve_pr_reference(self, ref: ParsedReference) -> ResolvedReference | None:
156
+ """Resolve a PR reference to canonical form.
157
+
158
+ Args:
159
+ ref: Parsed PR reference.
160
+
161
+ Returns:
162
+ ResolvedReference or None if resolution fails.
163
+ """
164
+ if ref.repo_slug is None:
165
+ return None
166
+
167
+ pr_number = int(ref.raw_id)
168
+ owner, repo = ref.repo_slug.split("/", 1)
169
+
170
+ try:
171
+ commit_shas = self._github.list_pr_commits(owner, repo, pr_number)
172
+ except AvosError:
173
+ commit_shas = []
174
+
175
+ return ResolvedReference(
176
+ reference_type=DiffReferenceType.PR,
177
+ canonical_id=f"PR #{pr_number}",
178
+ repo_slug=ref.repo_slug,
179
+ pr_number=pr_number,
180
+ commit_shas=commit_shas,
181
+ )
182
+
183
+ def _resolve_commit_reference(
184
+ self, ref: ParsedReference
185
+ ) -> tuple[ResolvedReference | None, str | None]:
186
+ """Resolve a commit reference to canonical form with full SHA.
187
+
188
+ Uses the GitHub commits API (same as the hosted repo).
189
+
190
+ Args:
191
+ ref: Parsed commit reference.
192
+
193
+ Returns:
194
+ (resolved, None) on success, or (None, detail) where detail is a
195
+ short error string (e.g. GitHub API message) when resolution fails.
196
+ """
197
+ if ref.repo_slug is None:
198
+ return None, None
199
+
200
+ owner, repo = ref.repo_slug.split("/", 1)
201
+
202
+ try:
203
+ payload = self._github.get_commit(owner, repo, ref.raw_id)
204
+ except AvosError as e:
205
+ return None, str(e)
206
+ full_sha = str(payload.get("sha", ""))
207
+ if len(full_sha) != 40:
208
+ return None, "Commit response missing full SHA"
209
+ return (
210
+ ResolvedReference(
211
+ reference_type=DiffReferenceType.COMMIT,
212
+ canonical_id=full_sha,
213
+ repo_slug=ref.repo_slug,
214
+ full_sha=full_sha,
215
+ ),
216
+ None,
217
+ )
218
+
219
+ def _make_unresolved_plan_item(
220
+ self, ref: ParsedReference, reason: str
221
+ ) -> DedupPlanItem:
222
+ """Create a plan item for an unresolved reference.
223
+
224
+ Args:
225
+ ref: The unresolved reference.
226
+ reason: Reason for failure.
227
+
228
+ Returns:
229
+ DedupPlanItem with KEEP decision (will produce UNRESOLVED result).
230
+ """
231
+ resolved = ResolvedReference(
232
+ reference_type=ref.reference_type,
233
+ canonical_id=ref.raw_id,
234
+ repo_slug=ref.repo_slug or "unknown",
235
+ pr_number=int(ref.raw_id) if ref.reference_type == DiffReferenceType.PR else None,
236
+ full_sha=ref.raw_id if ref.reference_type == DiffReferenceType.COMMIT else None,
237
+ )
238
+ return DedupPlanItem(
239
+ reference=resolved,
240
+ decision=DedupDecision.KEEP,
241
+ reason=reason,
242
+ )
243
+
244
+ def _extract_diff(self, plan_item: DedupPlanItem) -> DiffResult:
245
+ """Extract diff for a single plan item.
246
+
247
+ Args:
248
+ plan_item: Dedup plan item with reference and decision.
249
+
250
+ Returns:
251
+ DiffResult with diff text or status info.
252
+ """
253
+ ref = plan_item.reference
254
+
255
+ if plan_item.decision == DedupDecision.SUPPRESS_COVERED_BY_PR:
256
+ return DiffResult(
257
+ reference_type=ref.reference_type,
258
+ canonical_id=ref.canonical_id,
259
+ repo=ref.repo_slug,
260
+ status=DiffStatus.SUPPRESSED,
261
+ suppressed_reason=f"covered_by_pr:{plan_item.covered_by_pr}",
262
+ )
263
+
264
+ if ref.repo_slug is None or ref.repo_slug == "unknown":
265
+ return DiffResult(
266
+ reference_type=ref.reference_type,
267
+ canonical_id=ref.canonical_id,
268
+ repo=ref.repo_slug or "unknown",
269
+ status=DiffStatus.UNRESOLVED,
270
+ error_message="Repository context unknown",
271
+ )
272
+
273
+ if plan_item.decision == DedupDecision.KEEP and plan_item.reason:
274
+ return DiffResult(
275
+ reference_type=ref.reference_type,
276
+ canonical_id=ref.canonical_id,
277
+ repo=ref.repo_slug,
278
+ status=DiffStatus.UNRESOLVED,
279
+ error_message=plan_item.reason,
280
+ )
281
+
282
+ if ref.reference_type == DiffReferenceType.PR:
283
+ return self._extract_pr_diff(ref)
284
+ return self._extract_commit_diff(ref)
285
+
286
+ def _extract_pr_diff(self, ref: ResolvedReference) -> DiffResult:
287
+ """Extract diff for a PR reference.
288
+
289
+ Args:
290
+ ref: Resolved PR reference.
291
+
292
+ Returns:
293
+ DiffResult with PR diff or error.
294
+ """
295
+ if ref.pr_number is None or ref.repo_slug is None:
296
+ return DiffResult(
297
+ reference_type=ref.reference_type,
298
+ canonical_id=ref.canonical_id,
299
+ repo=ref.repo_slug or "unknown",
300
+ status=DiffStatus.UNRESOLVED,
301
+ error_message="Invalid PR reference",
302
+ )
303
+
304
+ owner, repo = ref.repo_slug.split("/", 1)
305
+
306
+ try:
307
+ diff_text = self._github.get_pr_diff(owner, repo, ref.pr_number)
308
+ return DiffResult(
309
+ reference_type=ref.reference_type,
310
+ canonical_id=ref.canonical_id,
311
+ repo=ref.repo_slug,
312
+ diff_text=diff_text,
313
+ status=DiffStatus.RESOLVED,
314
+ )
315
+ except AvosError as e:
316
+ return DiffResult(
317
+ reference_type=ref.reference_type,
318
+ canonical_id=ref.canonical_id,
319
+ repo=ref.repo_slug,
320
+ status=DiffStatus.UNRESOLVED,
321
+ error_message=str(e),
322
+ )
323
+
324
+ def _extract_commit_diff(self, ref: ResolvedReference) -> DiffResult:
325
+ """Extract diff for a commit reference via GitHub commits API.
326
+
327
+ Args:
328
+ ref: Resolved commit reference.
329
+
330
+ Returns:
331
+ DiffResult with commit diff or error.
332
+ """
333
+ if ref.full_sha is None:
334
+ return DiffResult(
335
+ reference_type=ref.reference_type,
336
+ canonical_id=ref.canonical_id,
337
+ repo=ref.repo_slug,
338
+ status=DiffStatus.UNRESOLVED,
339
+ error_message="Commit SHA not resolved",
340
+ )
341
+
342
+ owner, repo = ref.repo_slug.split("/", 1)
343
+
344
+ try:
345
+ diff_text = self._github.get_commit_diff(owner, repo, ref.full_sha)
346
+ if not diff_text:
347
+ return DiffResult(
348
+ reference_type=ref.reference_type,
349
+ canonical_id=ref.canonical_id,
350
+ repo=ref.repo_slug,
351
+ status=DiffStatus.UNRESOLVED,
352
+ error_message="Commit has no diff (empty response)",
353
+ )
354
+ return DiffResult(
355
+ reference_type=ref.reference_type,
356
+ canonical_id=ref.canonical_id,
357
+ repo=ref.repo_slug,
358
+ diff_text=diff_text,
359
+ status=DiffStatus.RESOLVED,
360
+ )
361
+ except AvosError as e:
362
+ return DiffResult(
363
+ reference_type=ref.reference_type,
364
+ canonical_id=ref.canonical_id,
365
+ repo=ref.repo_slug,
366
+ status=DiffStatus.UNRESOLVED,
367
+ error_message=str(e),
368
+ )
369
+
370
+ def format_output(self, results: list[DiffResult]) -> str:
371
+ """Format diff results as grouped output text.
372
+
373
+ Args:
374
+ results: List of DiffResult objects.
375
+
376
+ Returns:
377
+ Formatted string with headers and diff content.
378
+ """
379
+ lines: list[str] = []
380
+
381
+ for result in results:
382
+ if result.reference_type == DiffReferenceType.PR:
383
+ header = f"=== PR #{result.canonical_id.replace('PR #', '')} ==="
384
+ else:
385
+ header = f"=== COMMIT {result.canonical_id} ==="
386
+
387
+ lines.append(header)
388
+
389
+ if result.status == DiffStatus.RESOLVED:
390
+ lines.append(result.diff_text or "")
391
+ elif result.status == DiffStatus.SUPPRESSED:
392
+ lines.append(f"[suppressed: {result.suppressed_reason}]")
393
+ else:
394
+ lines.append(f"[unresolved: {result.error_message}]")
395
+
396
+ lines.append("")
397
+
398
+ return "\n".join(lines)