git-aware-coding-agent 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avos_cli/__init__.py +3 -0
- avos_cli/agents/avos_ask_agent.md +47 -0
- avos_cli/agents/avos_ask_agent_JSON_converter.md +78 -0
- avos_cli/agents/avos_hisotry_agent_JSON_converter.md +92 -0
- avos_cli/agents/avos_history_agent.md +58 -0
- avos_cli/agents/git_diff_agent.md +63 -0
- avos_cli/artifacts/__init__.py +17 -0
- avos_cli/artifacts/base.py +47 -0
- avos_cli/artifacts/commit_builder.py +35 -0
- avos_cli/artifacts/doc_builder.py +30 -0
- avos_cli/artifacts/issue_builder.py +37 -0
- avos_cli/artifacts/pr_builder.py +50 -0
- avos_cli/cli/__init__.py +1 -0
- avos_cli/cli/main.py +504 -0
- avos_cli/commands/__init__.py +1 -0
- avos_cli/commands/ask.py +541 -0
- avos_cli/commands/connect.py +363 -0
- avos_cli/commands/history.py +549 -0
- avos_cli/commands/hook_install.py +260 -0
- avos_cli/commands/hook_sync.py +231 -0
- avos_cli/commands/ingest.py +506 -0
- avos_cli/commands/ingest_pr.py +239 -0
- avos_cli/config/__init__.py +1 -0
- avos_cli/config/hash_store.py +93 -0
- avos_cli/config/lock.py +122 -0
- avos_cli/config/manager.py +180 -0
- avos_cli/config/state.py +90 -0
- avos_cli/exceptions.py +272 -0
- avos_cli/models/__init__.py +58 -0
- avos_cli/models/api.py +75 -0
- avos_cli/models/artifacts.py +99 -0
- avos_cli/models/config.py +56 -0
- avos_cli/models/diff.py +117 -0
- avos_cli/models/query.py +234 -0
- avos_cli/parsers/__init__.py +21 -0
- avos_cli/parsers/artifact_ref_extractor.py +173 -0
- avos_cli/parsers/reference_parser.py +117 -0
- avos_cli/services/__init__.py +1 -0
- avos_cli/services/chronology_service.py +68 -0
- avos_cli/services/citation_validator.py +134 -0
- avos_cli/services/context_budget_service.py +104 -0
- avos_cli/services/diff_resolver.py +398 -0
- avos_cli/services/diff_summary_service.py +141 -0
- avos_cli/services/git_client.py +351 -0
- avos_cli/services/github_client.py +443 -0
- avos_cli/services/llm_client.py +312 -0
- avos_cli/services/memory_client.py +323 -0
- avos_cli/services/query_fallback_formatter.py +108 -0
- avos_cli/services/reply_output_service.py +341 -0
- avos_cli/services/sanitization_service.py +218 -0
- avos_cli/utils/__init__.py +1 -0
- avos_cli/utils/dotenv_load.py +50 -0
- avos_cli/utils/hashing.py +22 -0
- avos_cli/utils/logger.py +77 -0
- avos_cli/utils/output.py +232 -0
- avos_cli/utils/sanitization_diagnostics.py +81 -0
- avos_cli/utils/time_helpers.py +56 -0
- git_aware_coding_agent-1.0.0.dist-info/METADATA +390 -0
- git_aware_coding_agent-1.0.0.dist-info/RECORD +62 -0
- git_aware_coding_agent-1.0.0.dist-info/WHEEL +4 -0
- git_aware_coding_agent-1.0.0.dist-info/entry_points.txt +2 -0
- git_aware_coding_agent-1.0.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Chronology service for deterministic timeline ordering.
|
|
2
|
+
|
|
3
|
+
Parses ISO 8601 timestamps, normalizes to UTC, and sorts artifacts
|
|
4
|
+
using a stable deterministic comparator for the history command pipeline.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
|
|
11
|
+
from avos_cli.models.query import RetrievedArtifact
|
|
12
|
+
from avos_cli.utils.logger import get_logger
|
|
13
|
+
|
|
14
|
+
_log = get_logger("chronology")
|
|
15
|
+
|
|
16
|
+
_MAX_DATETIME = datetime(9999, 12, 31, 23, 59, 59, tzinfo=timezone.utc)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ChronologyService:
|
|
20
|
+
"""Sorts artifacts chronologically with deterministic tie-breakers.
|
|
21
|
+
|
|
22
|
+
Sort contract: timestamp ASC, rank ASC, note_id ASC.
|
|
23
|
+
Invalid/null timestamps are sorted last.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def sort(self, artifacts: list[RetrievedArtifact]) -> list[RetrievedArtifact]:
|
|
27
|
+
"""Sort artifacts in chronological order.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
artifacts: Artifacts to sort (not mutated).
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
New list sorted by (timestamp ASC, rank ASC, note_id ASC).
|
|
34
|
+
"""
|
|
35
|
+
return sorted(artifacts, key=self._sort_key)
|
|
36
|
+
|
|
37
|
+
def _sort_key(self, art: RetrievedArtifact) -> tuple[datetime, int, str]:
|
|
38
|
+
"""Build deterministic sort key for an artifact."""
|
|
39
|
+
ts = self._parse_timestamp(art.created_at)
|
|
40
|
+
return (ts, art.rank, art.note_id)
|
|
41
|
+
|
|
42
|
+
def _parse_timestamp(self, value: str) -> datetime:
|
|
43
|
+
"""Parse ISO 8601 timestamp to UTC datetime.
|
|
44
|
+
|
|
45
|
+
Invalid or empty values return _MAX_DATETIME (sorted last).
|
|
46
|
+
Naive datetimes are assumed UTC.
|
|
47
|
+
"""
|
|
48
|
+
if not value or not value.strip():
|
|
49
|
+
return _MAX_DATETIME
|
|
50
|
+
|
|
51
|
+
cleaned = value.strip()
|
|
52
|
+
|
|
53
|
+
for fmt in (
|
|
54
|
+
"%Y-%m-%dT%H:%M:%S%z",
|
|
55
|
+
"%Y-%m-%dT%H:%M:%SZ",
|
|
56
|
+
"%Y-%m-%dT%H:%M:%S",
|
|
57
|
+
"%Y-%m-%d",
|
|
58
|
+
):
|
|
59
|
+
try:
|
|
60
|
+
dt = datetime.strptime(cleaned, fmt)
|
|
61
|
+
if dt.tzinfo is None:
|
|
62
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
|
63
|
+
return dt.astimezone(timezone.utc)
|
|
64
|
+
except ValueError:
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
_log.warning("Unparseable timestamp '%s', sorting last", value)
|
|
68
|
+
return _MAX_DATETIME
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Citation validator for query pipeline grounding enforcement.
|
|
2
|
+
|
|
3
|
+
Extracts citations from LLM synthesis responses and validates each
|
|
4
|
+
against retrieved artifact note_ids. Ungrounded citations are dropped
|
|
5
|
+
with warnings. Supports structured JSON and inline [note_id] fallback.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import re
|
|
12
|
+
|
|
13
|
+
from avos_cli.models.query import (
|
|
14
|
+
GroundedCitation,
|
|
15
|
+
GroundingStatus,
|
|
16
|
+
ReferenceType,
|
|
17
|
+
SanitizedArtifact,
|
|
18
|
+
)
|
|
19
|
+
from avos_cli.utils.logger import get_logger
|
|
20
|
+
|
|
21
|
+
_log = get_logger("citation_validator")
|
|
22
|
+
|
|
23
|
+
_INLINE_CITATION_PATTERN = re.compile(r"\[([a-zA-Z0-9_\-]+)\]")
|
|
24
|
+
|
|
25
|
+
_MIN_GROUNDED_THRESHOLD = 2
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class CitationValidator:
|
|
29
|
+
"""Validates LLM synthesis citations against retrieved artifacts.
|
|
30
|
+
|
|
31
|
+
Grounding rule: exact note_id match only. No fuzzy matching.
|
|
32
|
+
Minimum threshold: 2 grounded citations for synthesis acceptance.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def validate(
|
|
36
|
+
self,
|
|
37
|
+
response_text: str,
|
|
38
|
+
artifacts: list[SanitizedArtifact],
|
|
39
|
+
) -> tuple[list[GroundedCitation], list[GroundedCitation], list[str]]:
|
|
40
|
+
"""Validate citations in LLM response against retrieved artifacts.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
response_text: Raw LLM response text (may contain JSON or inline refs).
|
|
44
|
+
artifacts: The sanitized artifacts that were sent to the LLM.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Tuple of (grounded_citations, dropped_citations, warnings).
|
|
48
|
+
"""
|
|
49
|
+
valid_ids = {art.note_id for art in artifacts}
|
|
50
|
+
raw_citations = self._extract_citations(response_text)
|
|
51
|
+
|
|
52
|
+
seen: set[str] = set()
|
|
53
|
+
grounded: list[GroundedCitation] = []
|
|
54
|
+
dropped: list[GroundedCitation] = []
|
|
55
|
+
warnings: list[str] = []
|
|
56
|
+
|
|
57
|
+
for note_id, display_label in raw_citations:
|
|
58
|
+
if note_id in seen:
|
|
59
|
+
continue
|
|
60
|
+
seen.add(note_id)
|
|
61
|
+
|
|
62
|
+
if note_id in valid_ids:
|
|
63
|
+
grounded.append(
|
|
64
|
+
GroundedCitation(
|
|
65
|
+
note_id=note_id,
|
|
66
|
+
display_label=display_label or note_id,
|
|
67
|
+
reference_type=ReferenceType.NOTE_ID,
|
|
68
|
+
grounding_status=GroundingStatus.GROUNDED,
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
else:
|
|
72
|
+
dropped.append(
|
|
73
|
+
GroundedCitation(
|
|
74
|
+
note_id=note_id,
|
|
75
|
+
display_label=display_label or note_id,
|
|
76
|
+
reference_type=ReferenceType.NOTE_ID,
|
|
77
|
+
grounding_status=GroundingStatus.DROPPED_UNVERIFIABLE,
|
|
78
|
+
)
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
if dropped:
|
|
82
|
+
warnings.append(
|
|
83
|
+
f"{len(dropped)} citation(s) dropped as unverifiable: "
|
|
84
|
+
f"{', '.join(c.note_id for c in dropped)}"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return grounded, dropped, warnings
|
|
88
|
+
|
|
89
|
+
def _extract_citations(
|
|
90
|
+
self, response_text: str
|
|
91
|
+
) -> list[tuple[str, str | None]]:
|
|
92
|
+
"""Extract citation note_ids from response, preferring structured JSON.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
List of (note_id, display_label_or_none) tuples.
|
|
96
|
+
"""
|
|
97
|
+
structured = self._try_structured_extraction(response_text)
|
|
98
|
+
if structured is not None:
|
|
99
|
+
return structured
|
|
100
|
+
return self._inline_extraction(response_text)
|
|
101
|
+
|
|
102
|
+
def _try_structured_extraction(
|
|
103
|
+
self, response_text: str
|
|
104
|
+
) -> list[tuple[str, str | None]] | None:
|
|
105
|
+
"""Try to parse citations from JSON structure in response."""
|
|
106
|
+
cleaned = re.sub(r"^```(?:json)?\s*\n?", "", response_text.strip())
|
|
107
|
+
cleaned = re.sub(r"\n?```\s*$", "", cleaned)
|
|
108
|
+
try:
|
|
109
|
+
data = json.loads(cleaned)
|
|
110
|
+
except (json.JSONDecodeError, TypeError):
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
if not isinstance(data, dict):
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
citations_raw = data.get("citations")
|
|
117
|
+
if not isinstance(citations_raw, list):
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
results: list[tuple[str, str | None]] = []
|
|
121
|
+
for item in citations_raw:
|
|
122
|
+
if isinstance(item, dict) and "note_id" in item:
|
|
123
|
+
note_id = str(item["note_id"])
|
|
124
|
+
display_label = item.get("display_label")
|
|
125
|
+
results.append((note_id, display_label))
|
|
126
|
+
|
|
127
|
+
return results if results else None
|
|
128
|
+
|
|
129
|
+
def _inline_extraction(
|
|
130
|
+
self, response_text: str
|
|
131
|
+
) -> list[tuple[str, str | None]]:
|
|
132
|
+
"""Fallback: extract [note_id] patterns from plain text."""
|
|
133
|
+
matches = _INLINE_CITATION_PATTERN.findall(response_text)
|
|
134
|
+
return [(m, None) for m in matches]
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Context budget service for query pipeline artifact packing.
|
|
2
|
+
|
|
3
|
+
Applies deterministic ranking, truncation, and hard caps to sanitized
|
|
4
|
+
artifacts before LLM synthesis. Ensures stable ordering across runs
|
|
5
|
+
and preserves citation metadata for downstream grounding.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from avos_cli.models.query import BudgetResult, SanitizedArtifact
|
|
11
|
+
from avos_cli.utils.logger import get_logger
|
|
12
|
+
|
|
13
|
+
_log = get_logger("context_budget")
|
|
14
|
+
|
|
15
|
+
_ASK_MAX_ARTIFACTS = 6
|
|
16
|
+
_ASK_EXCERPT_CAP = 800
|
|
17
|
+
_HISTORY_MAX_ARTIFACTS = 10
|
|
18
|
+
_HISTORY_EXCERPT_CAP = 600
|
|
19
|
+
|
|
20
|
+
_EPOCH = "1970-01-01T00:00:00Z"
|
|
21
|
+
_MAX_RANK = 999999999
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ContextBudgetService:
|
|
25
|
+
"""Packs sanitized artifacts within model context budget.
|
|
26
|
+
|
|
27
|
+
Sorting contract: rank ASC, created_at DESC, note_id ASC.
|
|
28
|
+
Null rank -> max int (sorted last). Empty created_at -> epoch (sorted last for DESC).
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def pack(
|
|
32
|
+
self,
|
|
33
|
+
artifacts: list[SanitizedArtifact],
|
|
34
|
+
mode: str,
|
|
35
|
+
) -> BudgetResult:
|
|
36
|
+
"""Rank, sort, truncate, and cap artifacts for synthesis.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
artifacts: Sanitized artifacts to pack.
|
|
40
|
+
mode: 'ask' or 'history' -- determines caps and excerpt limits.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
BudgetResult with included/excluded artifacts and metadata.
|
|
44
|
+
"""
|
|
45
|
+
if mode == "ask":
|
|
46
|
+
max_count = _ASK_MAX_ARTIFACTS
|
|
47
|
+
excerpt_cap = _ASK_EXCERPT_CAP
|
|
48
|
+
else:
|
|
49
|
+
max_count = _HISTORY_MAX_ARTIFACTS
|
|
50
|
+
excerpt_cap = _HISTORY_EXCERPT_CAP
|
|
51
|
+
|
|
52
|
+
sorted_arts = sorted(artifacts, key=self._sort_key)
|
|
53
|
+
|
|
54
|
+
included_raw = sorted_arts[:max_count]
|
|
55
|
+
excluded_raw = sorted_arts[max_count:]
|
|
56
|
+
|
|
57
|
+
included: list[SanitizedArtifact] = []
|
|
58
|
+
truncation_flags: dict[str, bool] = {}
|
|
59
|
+
|
|
60
|
+
for art in included_raw:
|
|
61
|
+
truncated_content, was_truncated = self._truncate(art.content, excerpt_cap)
|
|
62
|
+
truncation_flags[art.note_id] = was_truncated
|
|
63
|
+
if was_truncated:
|
|
64
|
+
included.append(
|
|
65
|
+
SanitizedArtifact(
|
|
66
|
+
note_id=art.note_id,
|
|
67
|
+
content=truncated_content,
|
|
68
|
+
created_at=art.created_at,
|
|
69
|
+
rank=art.rank,
|
|
70
|
+
source_type=art.source_type,
|
|
71
|
+
display_ref=art.display_ref,
|
|
72
|
+
redaction_applied=art.redaction_applied,
|
|
73
|
+
redaction_types=art.redaction_types,
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
truncation_flags[art.note_id] = False
|
|
78
|
+
included.append(art)
|
|
79
|
+
|
|
80
|
+
return BudgetResult(
|
|
81
|
+
included=included,
|
|
82
|
+
excluded=list(excluded_raw),
|
|
83
|
+
truncation_flags=truncation_flags,
|
|
84
|
+
included_count=len(included),
|
|
85
|
+
excluded_count=len(excluded_raw),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
def _sort_key(self, art: SanitizedArtifact) -> tuple[int, str, str]:
|
|
89
|
+
"""Deterministic sort key: rank ASC, created_at DESC, note_id ASC.
|
|
90
|
+
|
|
91
|
+
For DESC on created_at, we negate by using a complement string trick:
|
|
92
|
+
reverse the sort by prepending a character that inverts ordering.
|
|
93
|
+
"""
|
|
94
|
+
rank = art.rank if art.rank < _MAX_RANK else _MAX_RANK
|
|
95
|
+
created_at = art.created_at if art.created_at else _EPOCH
|
|
96
|
+
# For descending created_at: invert by negating character ordinals
|
|
97
|
+
inverted_date = "".join(chr(0xFFFF - ord(c)) for c in created_at)
|
|
98
|
+
return (rank, inverted_date, art.note_id)
|
|
99
|
+
|
|
100
|
+
def _truncate(self, content: str, cap: int) -> tuple[str, bool]:
|
|
101
|
+
"""Truncate content to cap with '...' marker if needed."""
|
|
102
|
+
if len(content) <= cap:
|
|
103
|
+
return content, False
|
|
104
|
+
return content[:cap] + "...", True
|
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
"""Diff resolver for PR and commit references.
|
|
2
|
+
|
|
3
|
+
Implements the PR-Wins deduplication strategy: commits that are part of
|
|
4
|
+
a referenced PR are suppressed to avoid duplicate content. Extracts
|
|
5
|
+
unified diffs from the GitHub REST API for both PRs and commits.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from avos_cli.exceptions import AvosError
|
|
13
|
+
from avos_cli.models.diff import (
|
|
14
|
+
DedupDecision,
|
|
15
|
+
DedupPlanItem,
|
|
16
|
+
DiffReferenceType,
|
|
17
|
+
DiffResult,
|
|
18
|
+
DiffStatus,
|
|
19
|
+
ParsedReference,
|
|
20
|
+
ResolvedReference,
|
|
21
|
+
)
|
|
22
|
+
from avos_cli.utils.logger import get_logger
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from avos_cli.services.github_client import GitHubClient
|
|
26
|
+
|
|
27
|
+
_log = get_logger("diff_resolver")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class DiffResolver:
|
|
31
|
+
"""Resolves PR and commit references to unified diffs via GitHub API.
|
|
32
|
+
|
|
33
|
+
Implements the PR-Wins deduplication strategy:
|
|
34
|
+
1. Build coverage index: map each commit SHA to PRs containing it
|
|
35
|
+
2. For each commit reference, check if it's covered by any PR
|
|
36
|
+
3. Suppress covered commits, keep independent commits
|
|
37
|
+
4. Extract diffs for all kept references
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
github_client: GitHub REST API client for PR and commit endpoints.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, github_client: GitHubClient) -> None:
|
|
44
|
+
self._github = github_client
|
|
45
|
+
|
|
46
|
+
def resolve(self, references: list[ParsedReference]) -> list[DiffResult]:
|
|
47
|
+
"""Resolve references to diffs with PR-Wins deduplication.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
references: List of parsed PR/commit references.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
List of DiffResult objects with extracted diffs or status info.
|
|
54
|
+
"""
|
|
55
|
+
if not references:
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
pr_refs = [r for r in references if r.reference_type == DiffReferenceType.PR]
|
|
59
|
+
coverage_index = self._build_coverage_index(pr_refs)
|
|
60
|
+
dedup_plan = self._apply_dedup(references, coverage_index)
|
|
61
|
+
|
|
62
|
+
results: list[DiffResult] = []
|
|
63
|
+
for plan_item in dedup_plan:
|
|
64
|
+
result = self._extract_diff(plan_item)
|
|
65
|
+
results.append(result)
|
|
66
|
+
|
|
67
|
+
return results
|
|
68
|
+
|
|
69
|
+
def _build_coverage_index(
|
|
70
|
+
self, pr_refs: list[ParsedReference]
|
|
71
|
+
) -> dict[str, set[int]]:
|
|
72
|
+
"""Build index mapping commit SHAs to PR numbers that contain them.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
pr_refs: List of PR references to index.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dict mapping full commit SHA to set of PR numbers.
|
|
79
|
+
"""
|
|
80
|
+
index: dict[str, set[int]] = {}
|
|
81
|
+
|
|
82
|
+
for pr_ref in pr_refs:
|
|
83
|
+
if pr_ref.repo_slug is None:
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
pr_number = int(pr_ref.raw_id)
|
|
87
|
+
owner, repo = pr_ref.repo_slug.split("/", 1)
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
commit_shas = self._github.list_pr_commits(owner, repo, pr_number)
|
|
91
|
+
for sha in commit_shas:
|
|
92
|
+
if sha not in index:
|
|
93
|
+
index[sha] = set()
|
|
94
|
+
index[sha].add(pr_number)
|
|
95
|
+
except AvosError as e:
|
|
96
|
+
_log.warning("Failed to fetch commits for PR #%d: %s", pr_number, e)
|
|
97
|
+
|
|
98
|
+
return index
|
|
99
|
+
|
|
100
|
+
def _apply_dedup(
|
|
101
|
+
self,
|
|
102
|
+
references: list[ParsedReference],
|
|
103
|
+
coverage_index: dict[str, set[int]],
|
|
104
|
+
) -> list[DedupPlanItem]:
|
|
105
|
+
"""Apply PR-Wins deduplication rule to all references.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
references: All parsed references.
|
|
109
|
+
coverage_index: Mapping of commit SHA to covering PR numbers.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
List of DedupPlanItem with keep/suppress decisions.
|
|
113
|
+
"""
|
|
114
|
+
plan: list[DedupPlanItem] = []
|
|
115
|
+
|
|
116
|
+
for ref in references:
|
|
117
|
+
if ref.reference_type == DiffReferenceType.PR:
|
|
118
|
+
resolved = self._resolve_pr_reference(ref)
|
|
119
|
+
if resolved is None:
|
|
120
|
+
plan.append(self._make_unresolved_plan_item(ref, "PR resolution failed"))
|
|
121
|
+
else:
|
|
122
|
+
plan.append(
|
|
123
|
+
DedupPlanItem(reference=resolved, decision=DedupDecision.KEEP)
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
resolved, commit_detail = self._resolve_commit_reference(ref)
|
|
127
|
+
if resolved is None:
|
|
128
|
+
plan.append(
|
|
129
|
+
self._make_unresolved_plan_item(
|
|
130
|
+
ref,
|
|
131
|
+
commit_detail or "Commit SHA expansion failed",
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
full_sha = resolved.full_sha
|
|
137
|
+
if full_sha and full_sha in coverage_index:
|
|
138
|
+
covering_prs = coverage_index[full_sha]
|
|
139
|
+
first_pr = min(covering_prs)
|
|
140
|
+
plan.append(
|
|
141
|
+
DedupPlanItem(
|
|
142
|
+
reference=resolved,
|
|
143
|
+
decision=DedupDecision.SUPPRESS_COVERED_BY_PR,
|
|
144
|
+
covered_by_pr=first_pr,
|
|
145
|
+
reason=f"Covered by PR #{first_pr}",
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
else:
|
|
149
|
+
plan.append(
|
|
150
|
+
DedupPlanItem(reference=resolved, decision=DedupDecision.KEEP)
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
return plan
|
|
154
|
+
|
|
155
|
+
def _resolve_pr_reference(self, ref: ParsedReference) -> ResolvedReference | None:
|
|
156
|
+
"""Resolve a PR reference to canonical form.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
ref: Parsed PR reference.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
ResolvedReference or None if resolution fails.
|
|
163
|
+
"""
|
|
164
|
+
if ref.repo_slug is None:
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
pr_number = int(ref.raw_id)
|
|
168
|
+
owner, repo = ref.repo_slug.split("/", 1)
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
commit_shas = self._github.list_pr_commits(owner, repo, pr_number)
|
|
172
|
+
except AvosError:
|
|
173
|
+
commit_shas = []
|
|
174
|
+
|
|
175
|
+
return ResolvedReference(
|
|
176
|
+
reference_type=DiffReferenceType.PR,
|
|
177
|
+
canonical_id=f"PR #{pr_number}",
|
|
178
|
+
repo_slug=ref.repo_slug,
|
|
179
|
+
pr_number=pr_number,
|
|
180
|
+
commit_shas=commit_shas,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
def _resolve_commit_reference(
|
|
184
|
+
self, ref: ParsedReference
|
|
185
|
+
) -> tuple[ResolvedReference | None, str | None]:
|
|
186
|
+
"""Resolve a commit reference to canonical form with full SHA.
|
|
187
|
+
|
|
188
|
+
Uses the GitHub commits API (same as the hosted repo).
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
ref: Parsed commit reference.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
(resolved, None) on success, or (None, detail) where detail is a
|
|
195
|
+
short error string (e.g. GitHub API message) when resolution fails.
|
|
196
|
+
"""
|
|
197
|
+
if ref.repo_slug is None:
|
|
198
|
+
return None, None
|
|
199
|
+
|
|
200
|
+
owner, repo = ref.repo_slug.split("/", 1)
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
payload = self._github.get_commit(owner, repo, ref.raw_id)
|
|
204
|
+
except AvosError as e:
|
|
205
|
+
return None, str(e)
|
|
206
|
+
full_sha = str(payload.get("sha", ""))
|
|
207
|
+
if len(full_sha) != 40:
|
|
208
|
+
return None, "Commit response missing full SHA"
|
|
209
|
+
return (
|
|
210
|
+
ResolvedReference(
|
|
211
|
+
reference_type=DiffReferenceType.COMMIT,
|
|
212
|
+
canonical_id=full_sha,
|
|
213
|
+
repo_slug=ref.repo_slug,
|
|
214
|
+
full_sha=full_sha,
|
|
215
|
+
),
|
|
216
|
+
None,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
def _make_unresolved_plan_item(
|
|
220
|
+
self, ref: ParsedReference, reason: str
|
|
221
|
+
) -> DedupPlanItem:
|
|
222
|
+
"""Create a plan item for an unresolved reference.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
ref: The unresolved reference.
|
|
226
|
+
reason: Reason for failure.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
DedupPlanItem with KEEP decision (will produce UNRESOLVED result).
|
|
230
|
+
"""
|
|
231
|
+
resolved = ResolvedReference(
|
|
232
|
+
reference_type=ref.reference_type,
|
|
233
|
+
canonical_id=ref.raw_id,
|
|
234
|
+
repo_slug=ref.repo_slug or "unknown",
|
|
235
|
+
pr_number=int(ref.raw_id) if ref.reference_type == DiffReferenceType.PR else None,
|
|
236
|
+
full_sha=ref.raw_id if ref.reference_type == DiffReferenceType.COMMIT else None,
|
|
237
|
+
)
|
|
238
|
+
return DedupPlanItem(
|
|
239
|
+
reference=resolved,
|
|
240
|
+
decision=DedupDecision.KEEP,
|
|
241
|
+
reason=reason,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
def _extract_diff(self, plan_item: DedupPlanItem) -> DiffResult:
|
|
245
|
+
"""Extract diff for a single plan item.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
plan_item: Dedup plan item with reference and decision.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
DiffResult with diff text or status info.
|
|
252
|
+
"""
|
|
253
|
+
ref = plan_item.reference
|
|
254
|
+
|
|
255
|
+
if plan_item.decision == DedupDecision.SUPPRESS_COVERED_BY_PR:
|
|
256
|
+
return DiffResult(
|
|
257
|
+
reference_type=ref.reference_type,
|
|
258
|
+
canonical_id=ref.canonical_id,
|
|
259
|
+
repo=ref.repo_slug,
|
|
260
|
+
status=DiffStatus.SUPPRESSED,
|
|
261
|
+
suppressed_reason=f"covered_by_pr:{plan_item.covered_by_pr}",
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
if ref.repo_slug is None or ref.repo_slug == "unknown":
|
|
265
|
+
return DiffResult(
|
|
266
|
+
reference_type=ref.reference_type,
|
|
267
|
+
canonical_id=ref.canonical_id,
|
|
268
|
+
repo=ref.repo_slug or "unknown",
|
|
269
|
+
status=DiffStatus.UNRESOLVED,
|
|
270
|
+
error_message="Repository context unknown",
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
if plan_item.decision == DedupDecision.KEEP and plan_item.reason:
|
|
274
|
+
return DiffResult(
|
|
275
|
+
reference_type=ref.reference_type,
|
|
276
|
+
canonical_id=ref.canonical_id,
|
|
277
|
+
repo=ref.repo_slug,
|
|
278
|
+
status=DiffStatus.UNRESOLVED,
|
|
279
|
+
error_message=plan_item.reason,
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if ref.reference_type == DiffReferenceType.PR:
|
|
283
|
+
return self._extract_pr_diff(ref)
|
|
284
|
+
return self._extract_commit_diff(ref)
|
|
285
|
+
|
|
286
|
+
def _extract_pr_diff(self, ref: ResolvedReference) -> DiffResult:
|
|
287
|
+
"""Extract diff for a PR reference.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
ref: Resolved PR reference.
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
DiffResult with PR diff or error.
|
|
294
|
+
"""
|
|
295
|
+
if ref.pr_number is None or ref.repo_slug is None:
|
|
296
|
+
return DiffResult(
|
|
297
|
+
reference_type=ref.reference_type,
|
|
298
|
+
canonical_id=ref.canonical_id,
|
|
299
|
+
repo=ref.repo_slug or "unknown",
|
|
300
|
+
status=DiffStatus.UNRESOLVED,
|
|
301
|
+
error_message="Invalid PR reference",
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
owner, repo = ref.repo_slug.split("/", 1)
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
diff_text = self._github.get_pr_diff(owner, repo, ref.pr_number)
|
|
308
|
+
return DiffResult(
|
|
309
|
+
reference_type=ref.reference_type,
|
|
310
|
+
canonical_id=ref.canonical_id,
|
|
311
|
+
repo=ref.repo_slug,
|
|
312
|
+
diff_text=diff_text,
|
|
313
|
+
status=DiffStatus.RESOLVED,
|
|
314
|
+
)
|
|
315
|
+
except AvosError as e:
|
|
316
|
+
return DiffResult(
|
|
317
|
+
reference_type=ref.reference_type,
|
|
318
|
+
canonical_id=ref.canonical_id,
|
|
319
|
+
repo=ref.repo_slug,
|
|
320
|
+
status=DiffStatus.UNRESOLVED,
|
|
321
|
+
error_message=str(e),
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
def _extract_commit_diff(self, ref: ResolvedReference) -> DiffResult:
|
|
325
|
+
"""Extract diff for a commit reference via GitHub commits API.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
ref: Resolved commit reference.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
DiffResult with commit diff or error.
|
|
332
|
+
"""
|
|
333
|
+
if ref.full_sha is None:
|
|
334
|
+
return DiffResult(
|
|
335
|
+
reference_type=ref.reference_type,
|
|
336
|
+
canonical_id=ref.canonical_id,
|
|
337
|
+
repo=ref.repo_slug,
|
|
338
|
+
status=DiffStatus.UNRESOLVED,
|
|
339
|
+
error_message="Commit SHA not resolved",
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
owner, repo = ref.repo_slug.split("/", 1)
|
|
343
|
+
|
|
344
|
+
try:
|
|
345
|
+
diff_text = self._github.get_commit_diff(owner, repo, ref.full_sha)
|
|
346
|
+
if not diff_text:
|
|
347
|
+
return DiffResult(
|
|
348
|
+
reference_type=ref.reference_type,
|
|
349
|
+
canonical_id=ref.canonical_id,
|
|
350
|
+
repo=ref.repo_slug,
|
|
351
|
+
status=DiffStatus.UNRESOLVED,
|
|
352
|
+
error_message="Commit has no diff (empty response)",
|
|
353
|
+
)
|
|
354
|
+
return DiffResult(
|
|
355
|
+
reference_type=ref.reference_type,
|
|
356
|
+
canonical_id=ref.canonical_id,
|
|
357
|
+
repo=ref.repo_slug,
|
|
358
|
+
diff_text=diff_text,
|
|
359
|
+
status=DiffStatus.RESOLVED,
|
|
360
|
+
)
|
|
361
|
+
except AvosError as e:
|
|
362
|
+
return DiffResult(
|
|
363
|
+
reference_type=ref.reference_type,
|
|
364
|
+
canonical_id=ref.canonical_id,
|
|
365
|
+
repo=ref.repo_slug,
|
|
366
|
+
status=DiffStatus.UNRESOLVED,
|
|
367
|
+
error_message=str(e),
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
def format_output(self, results: list[DiffResult]) -> str:
|
|
371
|
+
"""Format diff results as grouped output text.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
results: List of DiffResult objects.
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
Formatted string with headers and diff content.
|
|
378
|
+
"""
|
|
379
|
+
lines: list[str] = []
|
|
380
|
+
|
|
381
|
+
for result in results:
|
|
382
|
+
if result.reference_type == DiffReferenceType.PR:
|
|
383
|
+
header = f"=== PR #{result.canonical_id.replace('PR #', '')} ==="
|
|
384
|
+
else:
|
|
385
|
+
header = f"=== COMMIT {result.canonical_id} ==="
|
|
386
|
+
|
|
387
|
+
lines.append(header)
|
|
388
|
+
|
|
389
|
+
if result.status == DiffStatus.RESOLVED:
|
|
390
|
+
lines.append(result.diff_text or "")
|
|
391
|
+
elif result.status == DiffStatus.SUPPRESSED:
|
|
392
|
+
lines.append(f"[suppressed: {result.suppressed_reason}]")
|
|
393
|
+
else:
|
|
394
|
+
lines.append(f"[unresolved: {result.error_message}]")
|
|
395
|
+
|
|
396
|
+
lines.append("")
|
|
397
|
+
|
|
398
|
+
return "\n".join(lines)
|