git-aware-coding-agent 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. avos_cli/__init__.py +3 -0
  2. avos_cli/agents/avos_ask_agent.md +47 -0
  3. avos_cli/agents/avos_ask_agent_JSON_converter.md +78 -0
  4. avos_cli/agents/avos_hisotry_agent_JSON_converter.md +92 -0
  5. avos_cli/agents/avos_history_agent.md +58 -0
  6. avos_cli/agents/git_diff_agent.md +63 -0
  7. avos_cli/artifacts/__init__.py +17 -0
  8. avos_cli/artifacts/base.py +47 -0
  9. avos_cli/artifacts/commit_builder.py +35 -0
  10. avos_cli/artifacts/doc_builder.py +30 -0
  11. avos_cli/artifacts/issue_builder.py +37 -0
  12. avos_cli/artifacts/pr_builder.py +50 -0
  13. avos_cli/cli/__init__.py +1 -0
  14. avos_cli/cli/main.py +504 -0
  15. avos_cli/commands/__init__.py +1 -0
  16. avos_cli/commands/ask.py +541 -0
  17. avos_cli/commands/connect.py +363 -0
  18. avos_cli/commands/history.py +549 -0
  19. avos_cli/commands/hook_install.py +260 -0
  20. avos_cli/commands/hook_sync.py +231 -0
  21. avos_cli/commands/ingest.py +506 -0
  22. avos_cli/commands/ingest_pr.py +239 -0
  23. avos_cli/config/__init__.py +1 -0
  24. avos_cli/config/hash_store.py +93 -0
  25. avos_cli/config/lock.py +122 -0
  26. avos_cli/config/manager.py +180 -0
  27. avos_cli/config/state.py +90 -0
  28. avos_cli/exceptions.py +272 -0
  29. avos_cli/models/__init__.py +58 -0
  30. avos_cli/models/api.py +75 -0
  31. avos_cli/models/artifacts.py +99 -0
  32. avos_cli/models/config.py +56 -0
  33. avos_cli/models/diff.py +117 -0
  34. avos_cli/models/query.py +234 -0
  35. avos_cli/parsers/__init__.py +21 -0
  36. avos_cli/parsers/artifact_ref_extractor.py +173 -0
  37. avos_cli/parsers/reference_parser.py +117 -0
  38. avos_cli/services/__init__.py +1 -0
  39. avos_cli/services/chronology_service.py +68 -0
  40. avos_cli/services/citation_validator.py +134 -0
  41. avos_cli/services/context_budget_service.py +104 -0
  42. avos_cli/services/diff_resolver.py +398 -0
  43. avos_cli/services/diff_summary_service.py +141 -0
  44. avos_cli/services/git_client.py +351 -0
  45. avos_cli/services/github_client.py +443 -0
  46. avos_cli/services/llm_client.py +312 -0
  47. avos_cli/services/memory_client.py +323 -0
  48. avos_cli/services/query_fallback_formatter.py +108 -0
  49. avos_cli/services/reply_output_service.py +341 -0
  50. avos_cli/services/sanitization_service.py +218 -0
  51. avos_cli/utils/__init__.py +1 -0
  52. avos_cli/utils/dotenv_load.py +50 -0
  53. avos_cli/utils/hashing.py +22 -0
  54. avos_cli/utils/logger.py +77 -0
  55. avos_cli/utils/output.py +232 -0
  56. avos_cli/utils/sanitization_diagnostics.py +81 -0
  57. avos_cli/utils/time_helpers.py +56 -0
  58. git_aware_coding_agent-1.0.0.dist-info/METADATA +390 -0
  59. git_aware_coding_agent-1.0.0.dist-info/RECORD +62 -0
  60. git_aware_coding_agent-1.0.0.dist-info/WHEEL +4 -0
  61. git_aware_coding_agent-1.0.0.dist-info/entry_points.txt +2 -0
  62. git_aware_coding_agent-1.0.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,117 @@
1
+ """Diff pipeline models for PR and commit reference resolution.
2
+
3
+ Defines Pydantic models for the git diff extraction pipeline:
4
+ parsed references, resolved references, deduplication plan items,
5
+ and final diff results.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from enum import Enum
11
+
12
+ from pydantic import BaseModel, ConfigDict
13
+
14
+
15
+ class DiffReferenceType(str, Enum):
16
+ """Type of diff reference: PR or commit."""
17
+
18
+ PR = "pr"
19
+ COMMIT = "commit"
20
+
21
+
22
+ class DedupDecision(str, Enum):
23
+ """Deduplication decision for a reference."""
24
+
25
+ KEEP = "keep"
26
+ SUPPRESS_COVERED_BY_PR = "suppress_covered_by_pr"
27
+
28
+
29
+ class DiffStatus(str, Enum):
30
+ """Resolution status of a diff extraction."""
31
+
32
+ RESOLVED = "resolved"
33
+ UNRESOLVED = "unresolved"
34
+ SUPPRESSED = "suppressed"
35
+
36
+
37
+ class ParsedReference(BaseModel):
38
+ """A parsed PR or commit reference from raw input.
39
+
40
+ Args:
41
+ reference_type: Whether this is a PR or commit reference.
42
+ raw_id: The raw identifier (PR number or short/full SHA).
43
+ repo_slug: Repository slug 'org/repo', or None if ambiguous.
44
+ """
45
+
46
+ model_config = ConfigDict(frozen=True)
47
+
48
+ reference_type: DiffReferenceType
49
+ raw_id: str
50
+ repo_slug: str | None
51
+
52
+
53
+ class ResolvedReference(BaseModel):
54
+ """A fully resolved reference with canonical identifiers.
55
+
56
+ For PRs: includes the list of commit SHAs contained in the PR.
57
+ For commits: includes the expanded full SHA.
58
+
59
+ Args:
60
+ reference_type: Whether this is a PR or commit reference.
61
+ canonical_id: Canonical display ID (e.g., 'PR #1245' or full SHA).
62
+ repo_slug: Repository slug 'org/repo'.
63
+ pr_number: PR number (for PR references).
64
+ full_sha: Full 40-char commit SHA (for commit references).
65
+ commit_shas: List of commit SHAs contained in this PR (for PR refs).
66
+ """
67
+
68
+ model_config = ConfigDict(frozen=True)
69
+
70
+ reference_type: DiffReferenceType
71
+ canonical_id: str
72
+ repo_slug: str
73
+ pr_number: int | None = None
74
+ full_sha: str | None = None
75
+ commit_shas: list[str] = []
76
+
77
+
78
+ class DedupPlanItem(BaseModel):
79
+ """A deduplication plan item with decision and reasoning.
80
+
81
+ Args:
82
+ reference: The resolved reference being evaluated.
83
+ decision: Keep or suppress this reference.
84
+ covered_by_pr: PR number that covers this commit (if suppressed).
85
+ reason: Human-readable reason for the decision.
86
+ """
87
+
88
+ model_config = ConfigDict(frozen=True)
89
+
90
+ reference: ResolvedReference
91
+ decision: DedupDecision
92
+ covered_by_pr: int | None = None
93
+ reason: str | None = None
94
+
95
+
96
+ class DiffResult(BaseModel):
97
+ """Final result of diff extraction for a single reference.
98
+
99
+ Args:
100
+ reference_type: Whether this is a PR or commit reference.
101
+ canonical_id: Canonical display ID (e.g., 'PR #1245' or full SHA).
102
+ repo: Repository slug 'org/repo'.
103
+ diff_text: The unified diff text, or None if unresolved/suppressed.
104
+ status: Resolution status (resolved, unresolved, suppressed).
105
+ suppressed_reason: Reason for suppression (e.g., 'covered_by_pr:1245').
106
+ error_message: Error message if unresolved.
107
+ """
108
+
109
+ model_config = ConfigDict(frozen=True)
110
+
111
+ reference_type: DiffReferenceType
112
+ canonical_id: str
113
+ repo: str
114
+ diff_text: str | None = None
115
+ status: DiffStatus
116
+ suppressed_reason: str | None = None
117
+ error_message: str | None = None
@@ -0,0 +1,234 @@
1
+ """Query pipeline internal contracts for Sprint 3 (AVOS-012..015).
2
+
3
+ Defines frozen Pydantic models for the query synthesis pipeline:
4
+ retrieval, sanitization, budget packing, citation grounding,
5
+ chronology, synthesis request/response, and result envelopes.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from enum import Enum
11
+
12
+ from pydantic import BaseModel, ConfigDict
13
+
14
+
15
+ class QueryMode(str, Enum):
16
+ """Synthesis mode selector for ask vs history pipelines."""
17
+
18
+ ASK = "ask"
19
+ HISTORY = "history"
20
+
21
+
22
+ class FallbackReason(str, Enum):
23
+ """Categorized reasons for falling back from synthesis to raw results."""
24
+
25
+ LLM_UNAVAILABLE = "llm_unavailable"
26
+ GROUNDING_FAILED = "grounding_failed"
27
+ SAFETY_BLOCK = "safety_block"
28
+ BUDGET_EXHAUSTED = "budget_exhausted"
29
+
30
+
31
+ class GroundingStatus(str, Enum):
32
+ """Citation grounding validation outcome."""
33
+
34
+ GROUNDED = "grounded"
35
+ DROPPED_UNVERIFIABLE = "dropped_unverifiable"
36
+
37
+
38
+ class ReferenceType(str, Enum):
39
+ """Type of evidence reference for display purposes."""
40
+
41
+ NOTE_ID = "note_id"
42
+ PR = "pr"
43
+ ISSUE = "issue"
44
+ COMMIT = "commit"
45
+
46
+
47
+ class RetrievedArtifact(BaseModel):
48
+ """A single artifact returned from Memory API search.
49
+
50
+ Args:
51
+ note_id: Unique identifier from Memory API.
52
+ content: Full text content of the note.
53
+ created_at: ISO 8601 creation timestamp.
54
+ rank: Relevance rank (1 = best match).
55
+ source_type: Classified artifact type (e.g. raw_pr_thread).
56
+ display_ref: Optional human-friendly label (e.g. PR #101).
57
+ """
58
+
59
+ model_config = ConfigDict(frozen=True)
60
+
61
+ note_id: str
62
+ content: str
63
+ created_at: str
64
+ rank: int
65
+ source_type: str | None = None
66
+ display_ref: str | None = None
67
+
68
+
69
+ class SanitizedArtifact(BaseModel):
70
+ """An artifact after sanitization/redaction processing.
71
+
72
+ Carries the same fields as RetrievedArtifact plus redaction audit metadata.
73
+
74
+ Args:
75
+ note_id: Unique identifier from Memory API.
76
+ content: Sanitized text content (secrets/PII redacted).
77
+ created_at: ISO 8601 creation timestamp.
78
+ rank: Relevance rank.
79
+ source_type: Classified artifact type.
80
+ display_ref: Optional human-friendly label.
81
+ redaction_applied: Whether any redaction was performed.
82
+ redaction_types: List of redaction categories applied.
83
+ """
84
+
85
+ model_config = ConfigDict(frozen=True)
86
+
87
+ note_id: str
88
+ content: str
89
+ created_at: str
90
+ rank: int
91
+ source_type: str | None = None
92
+ display_ref: str | None = None
93
+ redaction_applied: bool = False
94
+ redaction_types: list[str] = []
95
+
96
+
97
+ class SanitizationResult(BaseModel):
98
+ """Aggregate result of sanitization across all artifacts.
99
+
100
+ Args:
101
+ artifacts: List of sanitized artifacts.
102
+ redaction_applied: Whether any redaction occurred across the set.
103
+ redaction_types: Union of all redaction categories applied.
104
+ confidence_score: Sanitization confidence (0-100).
105
+ """
106
+
107
+ model_config = ConfigDict(frozen=True)
108
+
109
+ artifacts: list[SanitizedArtifact]
110
+ redaction_applied: bool
111
+ redaction_types: list[str]
112
+ confidence_score: int
113
+
114
+
115
+ class BudgetResult(BaseModel):
116
+ """Result of context-budget packing.
117
+
118
+ Args:
119
+ included: Artifacts selected for synthesis (within budget).
120
+ excluded: Artifacts cut due to budget constraints.
121
+ truncation_flags: Map of note_id -> was_truncated.
122
+ included_count: Number of included artifacts.
123
+ excluded_count: Number of excluded artifacts.
124
+ """
125
+
126
+ model_config = ConfigDict(frozen=True)
127
+
128
+ included: list[SanitizedArtifact]
129
+ excluded: list[SanitizedArtifact]
130
+ truncation_flags: dict[str, bool]
131
+ included_count: int
132
+ excluded_count: int
133
+
134
+
135
+ class GroundedCitation(BaseModel):
136
+ """A citation validated against retrieved artifacts.
137
+
138
+ Args:
139
+ note_id: The artifact note_id this citation references.
140
+ display_label: Human-friendly label (e.g. PR #101, Issue #42).
141
+ reference_type: Category of the reference.
142
+ grounding_status: Whether the citation is grounded or dropped.
143
+ """
144
+
145
+ model_config = ConfigDict(frozen=True)
146
+
147
+ note_id: str
148
+ display_label: str
149
+ reference_type: ReferenceType
150
+ grounding_status: GroundingStatus
151
+
152
+
153
+ class TimelineEvent(BaseModel):
154
+ """A classified event in a chronological history timeline.
155
+
156
+ Args:
157
+ timestamp: ISO 8601 timestamp of the event.
158
+ event_class: Classification (Introduction, Expansion, Bug Fix, etc.).
159
+ summary: Brief description of the event.
160
+ supporting_refs: List of note_ids supporting this event.
161
+ """
162
+
163
+ model_config = ConfigDict(frozen=True)
164
+
165
+ timestamp: str
166
+ event_class: str
167
+ summary: str
168
+ supporting_refs: list[str] = []
169
+
170
+
171
+ class SynthesisRequest(BaseModel):
172
+ """Request payload for LLM synthesis.
173
+
174
+ Args:
175
+ mode: ask or history pipeline selector.
176
+ query: User's question or subject.
177
+ provider: LLM provider name.
178
+ model: LLM model identifier.
179
+ prompt_template_version: Version tag for prompt template.
180
+ artifacts: Packed, sanitized artifacts for context.
181
+ budget_meta: Optional budget metadata for diagnostics.
182
+ """
183
+
184
+ model_config = ConfigDict(frozen=True)
185
+
186
+ mode: QueryMode
187
+ query: str
188
+ provider: str
189
+ model: str
190
+ prompt_template_version: str
191
+ artifacts: list[SanitizedArtifact]
192
+ budget_meta: dict[str, object] | None = None
193
+
194
+
195
+ class SynthesisResponse(BaseModel):
196
+ """Response from LLM synthesis.
197
+
198
+ Args:
199
+ answer_text: The synthesized answer or timeline narrative.
200
+ evidence_refs: Grounded citation references.
201
+ timeline_events: Classified timeline events (history mode).
202
+ warnings: Any warnings (truncation, partial grounding, etc.).
203
+ """
204
+
205
+ model_config = ConfigDict(frozen=True)
206
+
207
+ answer_text: str
208
+ evidence_refs: list[GroundedCitation] = []
209
+ timeline_events: list[TimelineEvent] = []
210
+ warnings: list[str] = []
211
+
212
+
213
+ class QueryResultEnvelope(BaseModel):
214
+ """Final result envelope returned to CLI layer.
215
+
216
+ Args:
217
+ mode: ask or history.
218
+ answer: Synthesized answer text (ask mode).
219
+ timeline: Chronological events (history mode).
220
+ citations: Grounded citations for evidence display.
221
+ fallback_used: Whether fallback was triggered.
222
+ warnings: Any warnings to display.
223
+ fallback_reason: Categorized reason if fallback was used.
224
+ """
225
+
226
+ model_config = ConfigDict(frozen=True)
227
+
228
+ mode: QueryMode
229
+ answer: str | None = None
230
+ timeline: list[TimelineEvent] | None = None
231
+ citations: list[GroundedCitation]
232
+ fallback_used: bool
233
+ warnings: list[str] = []
234
+ fallback_reason: FallbackReason | None = None
@@ -0,0 +1,21 @@
1
+ """Parsers for reference extraction and normalization."""
2
+
3
+ from avos_cli.parsers.artifact_ref_extractor import (
4
+ ArtifactRef,
5
+ NoteRefs,
6
+ collect_all_refs,
7
+ extract_refs,
8
+ extract_refs_by_note,
9
+ extract_refs_from_hits,
10
+ )
11
+ from avos_cli.parsers.reference_parser import ReferenceParser
12
+
13
+ __all__ = [
14
+ "ArtifactRef",
15
+ "NoteRefs",
16
+ "ReferenceParser",
17
+ "collect_all_refs",
18
+ "extract_refs",
19
+ "extract_refs_by_note",
20
+ "extract_refs_from_hits",
21
+ ]
@@ -0,0 +1,173 @@
1
+ """Extract PR numbers and commit hashes from memory search hit content.
2
+
3
+ This module provides utilities to parse structured tags from artifact content
4
+ returned by the Avos Memory API. The tags follow the format established by
5
+ the ingest builders (pr_builder.py, commit_builder.py):
6
+
7
+ [pr: #42]
8
+ [hash: abc1234...]
9
+ [repo: org/repo]
10
+
11
+ Functions:
12
+ extract_refs: Extract refs from a single content string.
13
+ extract_refs_from_hits: Map extraction over a list of SearchHit objects.
14
+ collect_all_refs: Aggregate unique refs across all hits.
15
+ extract_refs_by_note: Extract refs grouped by note_id as unified string arrays.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ from typing import TYPE_CHECKING
22
+
23
+ from pydantic import BaseModel, ConfigDict, Field
24
+
25
+ if TYPE_CHECKING:
26
+ from avos_cli.models.api import SearchHit
27
+
28
+ # Compiled regex patterns for tag extraction (shared across the codebase)
29
+ _PR_RE = re.compile(r"\[pr:\s*#(\d+)\]", re.IGNORECASE)
30
+ _HASH_RE = re.compile(r"\[hash:\s*([a-f0-9]+)\]", re.IGNORECASE)
31
+ _REPO_RE = re.compile(r"\[repo:\s*([a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+)\]", re.IGNORECASE)
32
+
33
+
34
+ def _build_references(pr_numbers: list[int], commit_hashes: list[str]) -> list[str]:
35
+ """Build unified reference strings from PR numbers and commit hashes.
36
+
37
+ Args:
38
+ pr_numbers: List of PR numbers.
39
+ commit_hashes: List of commit hashes.
40
+
41
+ Returns:
42
+ List of formatted reference strings like ["pr #42", "commit abc1234"].
43
+ """
44
+ refs: list[str] = []
45
+ for pr in pr_numbers:
46
+ refs.append(f"pr #{pr}")
47
+ for h in commit_hashes:
48
+ refs.append(f"commit {h}")
49
+ return refs
50
+
51
+
52
+ class ArtifactRef(BaseModel):
53
+ """Structured references extracted from a memory artifact.
54
+
55
+ Attributes:
56
+ pr_numbers: List of unique PR numbers found in the content.
57
+ commit_hashes: List of unique commit hashes found in the content.
58
+ references: Unified list of reference strings (e.g., ["pr #42", "commit abc1234"]).
59
+ repo: Repository slug (owner/name) if found, else None.
60
+ """
61
+
62
+ model_config = ConfigDict(frozen=True)
63
+
64
+ pr_numbers: list[int] = Field(default_factory=list)
65
+ commit_hashes: list[str] = Field(default_factory=list)
66
+ references: list[str] = Field(default_factory=list)
67
+ repo: str | None = None
68
+
69
+
70
+ class NoteRefs(BaseModel):
71
+ """Per-note reference storage.
72
+
73
+ Attributes:
74
+ note_id: The note identifier from the memory artifact.
75
+ references: Unified list of reference strings (e.g., ["pr #42", "commit abc1234"]).
76
+ """
77
+
78
+ model_config = ConfigDict(frozen=True)
79
+
80
+ note_id: str
81
+ references: list[str] = Field(default_factory=list)
82
+
83
+
84
+ def extract_refs(content: str) -> ArtifactRef:
85
+ """Extract PR numbers, commit hashes, and repo from content string.
86
+
87
+ Args:
88
+ content: Raw text content from a memory artifact.
89
+
90
+ Returns:
91
+ ArtifactRef with extracted references (deduplicated).
92
+ """
93
+ pr_matches = _PR_RE.findall(content)
94
+ pr_numbers = list(dict.fromkeys(int(m) for m in pr_matches))
95
+
96
+ hash_matches = _HASH_RE.findall(content)
97
+ commit_hashes = list(dict.fromkeys(hash_matches))
98
+
99
+ repo_match = _REPO_RE.search(content)
100
+ repo = repo_match.group(1) if repo_match else None
101
+
102
+ references = _build_references(pr_numbers, commit_hashes)
103
+
104
+ return ArtifactRef(
105
+ pr_numbers=pr_numbers,
106
+ commit_hashes=commit_hashes,
107
+ references=references,
108
+ repo=repo,
109
+ )
110
+
111
+
112
+ def extract_refs_from_hits(
113
+ hits: list[SearchHit],
114
+ ) -> list[tuple[SearchHit, ArtifactRef]]:
115
+ """Extract refs from each SearchHit, preserving hit association.
116
+
117
+ Args:
118
+ hits: List of SearchHit objects from memory search.
119
+
120
+ Returns:
121
+ List of (SearchHit, ArtifactRef) tuples in the same order as input.
122
+ """
123
+ return [(hit, extract_refs(hit.content)) for hit in hits]
124
+
125
+
126
+ def collect_all_refs(hits: list[SearchHit]) -> ArtifactRef:
127
+ """Aggregate unique refs across all search hits.
128
+
129
+ Args:
130
+ hits: List of SearchHit objects from memory search.
131
+
132
+ Returns:
133
+ Single ArtifactRef with deduplicated PRs and hashes from all hits.
134
+ Uses the first non-None repo found.
135
+ """
136
+ all_prs: list[int] = []
137
+ all_hashes: list[str] = []
138
+ first_repo: str | None = None
139
+
140
+ for hit in hits:
141
+ ref = extract_refs(hit.content)
142
+ all_prs.extend(ref.pr_numbers)
143
+ all_hashes.extend(ref.commit_hashes)
144
+ if first_repo is None and ref.repo is not None:
145
+ first_repo = ref.repo
146
+
147
+ unique_prs = list(dict.fromkeys(all_prs))
148
+ unique_hashes = list(dict.fromkeys(all_hashes))
149
+ references = _build_references(unique_prs, unique_hashes)
150
+
151
+ return ArtifactRef(
152
+ pr_numbers=unique_prs,
153
+ commit_hashes=unique_hashes,
154
+ references=references,
155
+ repo=first_repo,
156
+ )
157
+
158
+
159
+ def extract_refs_by_note(hits: list[SearchHit]) -> list[NoteRefs]:
160
+ """Extract refs grouped by note_id as unified string arrays.
161
+
162
+ Args:
163
+ hits: List of SearchHit objects from memory search.
164
+
165
+ Returns:
166
+ List of NoteRefs, one per hit, with references as formatted strings.
167
+ Example: [NoteRefs(note_id="11", references=["pr #123", "commit qdsf"])]
168
+ """
169
+ result: list[NoteRefs] = []
170
+ for hit in hits:
171
+ ref = extract_refs(hit.content)
172
+ result.append(NoteRefs(note_id=hit.note_id, references=ref.references))
173
+ return result
@@ -0,0 +1,117 @@
1
+ """Reference parser for PR and commit identifiers.
2
+
3
+ Provides regex-based parsing of various PR and commit reference formats
4
+ without LLM interpretation. Fails fast on ambiguous inputs.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from typing import ClassVar
11
+
12
+ from avos_cli.models.diff import DiffReferenceType, ParsedReference
13
+
14
+
15
+ class ReferenceParser:
16
+ """Parses raw reference strings into structured ParsedReference objects.
17
+
18
+ Supports formats:
19
+ - PR: "PR #1245", "pr #1245", "PR#1245", "#1245", "org/repo#1245"
20
+ - Commit: "Commit 8c3a1b2", "commit abc123", bare SHA (7-40 hex chars)
21
+ - URLs: "https://github.com/org/repo/pull/123", ".../commit/abc123"
22
+ """
23
+
24
+ _PR_PATTERNS: ClassVar[list[re.Pattern[str]]] = [
25
+ # GitHub PR URL: https://github.com/org/repo/pull/123
26
+ re.compile(
27
+ r"(?:https?://)?github\.com/(?P<repo>[^/]+/[^/]+)/pull/(?P<num>\d+)",
28
+ re.IGNORECASE,
29
+ ),
30
+ # org/repo#123 or github.com/org/repo#123
31
+ re.compile(
32
+ r"(?:github\.com/)?(?P<repo>[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+)#(?P<num>\d+)",
33
+ re.IGNORECASE,
34
+ ),
35
+ # PR #123 or PR#123 or pr #123
36
+ re.compile(r"\bpr\s*#\s*(?P<num>\d+)", re.IGNORECASE),
37
+ # Bare #123 (assumes PR in context)
38
+ re.compile(r"^#(?P<num>\d+)(?:\s|$)"),
39
+ ]
40
+
41
+ _COMMIT_PATTERNS: ClassVar[list[re.Pattern[str]]] = [
42
+ # GitHub commit URL: https://github.com/org/repo/commit/abc123
43
+ re.compile(
44
+ r"(?:https?://)?github\.com/(?P<repo>[^/]+/[^/]+)/commit/(?P<sha>[a-f0-9]{7,40})",
45
+ re.IGNORECASE,
46
+ ),
47
+ # Commit abc123 or commit: abc123
48
+ re.compile(r"\bcommit:?\s*(?P<sha>[a-f0-9]{7,40})", re.IGNORECASE),
49
+ # Bare SHA (7-40 hex chars, must be whole token)
50
+ re.compile(r"^(?P<sha>[a-f0-9]{7,40})$", re.IGNORECASE),
51
+ ]
52
+
53
+ def parse(self, raw: str, default_repo: str | None) -> ParsedReference | None:
54
+ """Parse a single raw reference string.
55
+
56
+ Args:
57
+ raw: The raw reference string to parse.
58
+ default_repo: Default repository slug if not specified in reference.
59
+ After ``avos connect``, use :func:`~avos_cli.config.manager.connected_repo_slug`.
60
+
61
+ Returns:
62
+ ParsedReference if successfully parsed, None otherwise.
63
+ """
64
+ if not raw or not raw.strip():
65
+ return None
66
+
67
+ text = raw.strip()
68
+
69
+ # Try PR patterns first
70
+ for pattern in self._PR_PATTERNS:
71
+ match = pattern.search(text)
72
+ if match:
73
+ num_str = match.group("num")
74
+ num = int(num_str)
75
+ if num <= 0:
76
+ return None
77
+
78
+ repo = match.groupdict().get("repo") or default_repo
79
+ return ParsedReference(
80
+ reference_type=DiffReferenceType.PR,
81
+ raw_id=num_str,
82
+ repo_slug=repo,
83
+ )
84
+
85
+ # Try commit patterns
86
+ for pattern in self._COMMIT_PATTERNS:
87
+ match = pattern.search(text)
88
+ if match:
89
+ sha = match.group("sha").lower()
90
+ repo = match.groupdict().get("repo") or default_repo
91
+ return ParsedReference(
92
+ reference_type=DiffReferenceType.COMMIT,
93
+ raw_id=sha,
94
+ repo_slug=repo,
95
+ )
96
+
97
+ return None
98
+
99
+ def parse_all(
100
+ self, raw_list: list[str], default_repo: str | None
101
+ ) -> list[ParsedReference]:
102
+ """Parse multiple raw reference strings.
103
+
104
+ Args:
105
+ raw_list: List of raw reference strings to parse.
106
+ default_repo: Default repository slug if not specified.
107
+ After ``avos connect``, use :func:`~avos_cli.config.manager.connected_repo_slug`.
108
+
109
+ Returns:
110
+ List of successfully parsed references (invalid ones are skipped).
111
+ """
112
+ results: list[ParsedReference] = []
113
+ for raw in raw_list:
114
+ ref = self.parse(raw, default_repo)
115
+ if ref is not None:
116
+ results.append(ref)
117
+ return results
@@ -0,0 +1 @@
1
+ """Shared service modules for AVOS CLI."""