git-aware-coding-agent 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- avos_cli/__init__.py +3 -0
- avos_cli/agents/avos_ask_agent.md +47 -0
- avos_cli/agents/avos_ask_agent_JSON_converter.md +78 -0
- avos_cli/agents/avos_hisotry_agent_JSON_converter.md +92 -0
- avos_cli/agents/avos_history_agent.md +58 -0
- avos_cli/agents/git_diff_agent.md +63 -0
- avos_cli/artifacts/__init__.py +17 -0
- avos_cli/artifacts/base.py +47 -0
- avos_cli/artifacts/commit_builder.py +35 -0
- avos_cli/artifacts/doc_builder.py +30 -0
- avos_cli/artifacts/issue_builder.py +37 -0
- avos_cli/artifacts/pr_builder.py +50 -0
- avos_cli/cli/__init__.py +1 -0
- avos_cli/cli/main.py +504 -0
- avos_cli/commands/__init__.py +1 -0
- avos_cli/commands/ask.py +541 -0
- avos_cli/commands/connect.py +363 -0
- avos_cli/commands/history.py +549 -0
- avos_cli/commands/hook_install.py +260 -0
- avos_cli/commands/hook_sync.py +231 -0
- avos_cli/commands/ingest.py +506 -0
- avos_cli/commands/ingest_pr.py +239 -0
- avos_cli/config/__init__.py +1 -0
- avos_cli/config/hash_store.py +93 -0
- avos_cli/config/lock.py +122 -0
- avos_cli/config/manager.py +180 -0
- avos_cli/config/state.py +90 -0
- avos_cli/exceptions.py +272 -0
- avos_cli/models/__init__.py +58 -0
- avos_cli/models/api.py +75 -0
- avos_cli/models/artifacts.py +99 -0
- avos_cli/models/config.py +56 -0
- avos_cli/models/diff.py +117 -0
- avos_cli/models/query.py +234 -0
- avos_cli/parsers/__init__.py +21 -0
- avos_cli/parsers/artifact_ref_extractor.py +173 -0
- avos_cli/parsers/reference_parser.py +117 -0
- avos_cli/services/__init__.py +1 -0
- avos_cli/services/chronology_service.py +68 -0
- avos_cli/services/citation_validator.py +134 -0
- avos_cli/services/context_budget_service.py +104 -0
- avos_cli/services/diff_resolver.py +398 -0
- avos_cli/services/diff_summary_service.py +141 -0
- avos_cli/services/git_client.py +351 -0
- avos_cli/services/github_client.py +443 -0
- avos_cli/services/llm_client.py +312 -0
- avos_cli/services/memory_client.py +323 -0
- avos_cli/services/query_fallback_formatter.py +108 -0
- avos_cli/services/reply_output_service.py +341 -0
- avos_cli/services/sanitization_service.py +218 -0
- avos_cli/utils/__init__.py +1 -0
- avos_cli/utils/dotenv_load.py +50 -0
- avos_cli/utils/hashing.py +22 -0
- avos_cli/utils/logger.py +77 -0
- avos_cli/utils/output.py +232 -0
- avos_cli/utils/sanitization_diagnostics.py +81 -0
- avos_cli/utils/time_helpers.py +56 -0
- git_aware_coding_agent-1.0.0.dist-info/METADATA +390 -0
- git_aware_coding_agent-1.0.0.dist-info/RECORD +62 -0
- git_aware_coding_agent-1.0.0.dist-info/WHEEL +4 -0
- git_aware_coding_agent-1.0.0.dist-info/entry_points.txt +2 -0
- git_aware_coding_agent-1.0.0.dist-info/licenses/LICENSE +201 -0
avos_cli/models/diff.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Diff pipeline models for PR and commit reference resolution.
|
|
2
|
+
|
|
3
|
+
Defines Pydantic models for the git diff extraction pipeline:
|
|
4
|
+
parsed references, resolved references, deduplication plan items,
|
|
5
|
+
and final diff results.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from enum import Enum
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, ConfigDict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DiffReferenceType(str, Enum):
|
|
16
|
+
"""Type of diff reference: PR or commit."""
|
|
17
|
+
|
|
18
|
+
PR = "pr"
|
|
19
|
+
COMMIT = "commit"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DedupDecision(str, Enum):
|
|
23
|
+
"""Deduplication decision for a reference."""
|
|
24
|
+
|
|
25
|
+
KEEP = "keep"
|
|
26
|
+
SUPPRESS_COVERED_BY_PR = "suppress_covered_by_pr"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DiffStatus(str, Enum):
|
|
30
|
+
"""Resolution status of a diff extraction."""
|
|
31
|
+
|
|
32
|
+
RESOLVED = "resolved"
|
|
33
|
+
UNRESOLVED = "unresolved"
|
|
34
|
+
SUPPRESSED = "suppressed"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ParsedReference(BaseModel):
|
|
38
|
+
"""A parsed PR or commit reference from raw input.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
reference_type: Whether this is a PR or commit reference.
|
|
42
|
+
raw_id: The raw identifier (PR number or short/full SHA).
|
|
43
|
+
repo_slug: Repository slug 'org/repo', or None if ambiguous.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
model_config = ConfigDict(frozen=True)
|
|
47
|
+
|
|
48
|
+
reference_type: DiffReferenceType
|
|
49
|
+
raw_id: str
|
|
50
|
+
repo_slug: str | None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ResolvedReference(BaseModel):
|
|
54
|
+
"""A fully resolved reference with canonical identifiers.
|
|
55
|
+
|
|
56
|
+
For PRs: includes the list of commit SHAs contained in the PR.
|
|
57
|
+
For commits: includes the expanded full SHA.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
reference_type: Whether this is a PR or commit reference.
|
|
61
|
+
canonical_id: Canonical display ID (e.g., 'PR #1245' or full SHA).
|
|
62
|
+
repo_slug: Repository slug 'org/repo'.
|
|
63
|
+
pr_number: PR number (for PR references).
|
|
64
|
+
full_sha: Full 40-char commit SHA (for commit references).
|
|
65
|
+
commit_shas: List of commit SHAs contained in this PR (for PR refs).
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
model_config = ConfigDict(frozen=True)
|
|
69
|
+
|
|
70
|
+
reference_type: DiffReferenceType
|
|
71
|
+
canonical_id: str
|
|
72
|
+
repo_slug: str
|
|
73
|
+
pr_number: int | None = None
|
|
74
|
+
full_sha: str | None = None
|
|
75
|
+
commit_shas: list[str] = []
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class DedupPlanItem(BaseModel):
|
|
79
|
+
"""A deduplication plan item with decision and reasoning.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
reference: The resolved reference being evaluated.
|
|
83
|
+
decision: Keep or suppress this reference.
|
|
84
|
+
covered_by_pr: PR number that covers this commit (if suppressed).
|
|
85
|
+
reason: Human-readable reason for the decision.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
model_config = ConfigDict(frozen=True)
|
|
89
|
+
|
|
90
|
+
reference: ResolvedReference
|
|
91
|
+
decision: DedupDecision
|
|
92
|
+
covered_by_pr: int | None = None
|
|
93
|
+
reason: str | None = None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class DiffResult(BaseModel):
|
|
97
|
+
"""Final result of diff extraction for a single reference.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
reference_type: Whether this is a PR or commit reference.
|
|
101
|
+
canonical_id: Canonical display ID (e.g., 'PR #1245' or full SHA).
|
|
102
|
+
repo: Repository slug 'org/repo'.
|
|
103
|
+
diff_text: The unified diff text, or None if unresolved/suppressed.
|
|
104
|
+
status: Resolution status (resolved, unresolved, suppressed).
|
|
105
|
+
suppressed_reason: Reason for suppression (e.g., 'covered_by_pr:1245').
|
|
106
|
+
error_message: Error message if unresolved.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
model_config = ConfigDict(frozen=True)
|
|
110
|
+
|
|
111
|
+
reference_type: DiffReferenceType
|
|
112
|
+
canonical_id: str
|
|
113
|
+
repo: str
|
|
114
|
+
diff_text: str | None = None
|
|
115
|
+
status: DiffStatus
|
|
116
|
+
suppressed_reason: str | None = None
|
|
117
|
+
error_message: str | None = None
|
avos_cli/models/query.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""Query pipeline internal contracts for Sprint 3 (AVOS-012..015).
|
|
2
|
+
|
|
3
|
+
Defines frozen Pydantic models for the query synthesis pipeline:
|
|
4
|
+
retrieval, sanitization, budget packing, citation grounding,
|
|
5
|
+
chronology, synthesis request/response, and result envelopes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from enum import Enum
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, ConfigDict
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class QueryMode(str, Enum):
|
|
16
|
+
"""Synthesis mode selector for ask vs history pipelines."""
|
|
17
|
+
|
|
18
|
+
ASK = "ask"
|
|
19
|
+
HISTORY = "history"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FallbackReason(str, Enum):
|
|
23
|
+
"""Categorized reasons for falling back from synthesis to raw results."""
|
|
24
|
+
|
|
25
|
+
LLM_UNAVAILABLE = "llm_unavailable"
|
|
26
|
+
GROUNDING_FAILED = "grounding_failed"
|
|
27
|
+
SAFETY_BLOCK = "safety_block"
|
|
28
|
+
BUDGET_EXHAUSTED = "budget_exhausted"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GroundingStatus(str, Enum):
|
|
32
|
+
"""Citation grounding validation outcome."""
|
|
33
|
+
|
|
34
|
+
GROUNDED = "grounded"
|
|
35
|
+
DROPPED_UNVERIFIABLE = "dropped_unverifiable"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ReferenceType(str, Enum):
|
|
39
|
+
"""Type of evidence reference for display purposes."""
|
|
40
|
+
|
|
41
|
+
NOTE_ID = "note_id"
|
|
42
|
+
PR = "pr"
|
|
43
|
+
ISSUE = "issue"
|
|
44
|
+
COMMIT = "commit"
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class RetrievedArtifact(BaseModel):
|
|
48
|
+
"""A single artifact returned from Memory API search.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
note_id: Unique identifier from Memory API.
|
|
52
|
+
content: Full text content of the note.
|
|
53
|
+
created_at: ISO 8601 creation timestamp.
|
|
54
|
+
rank: Relevance rank (1 = best match).
|
|
55
|
+
source_type: Classified artifact type (e.g. raw_pr_thread).
|
|
56
|
+
display_ref: Optional human-friendly label (e.g. PR #101).
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
model_config = ConfigDict(frozen=True)
|
|
60
|
+
|
|
61
|
+
note_id: str
|
|
62
|
+
content: str
|
|
63
|
+
created_at: str
|
|
64
|
+
rank: int
|
|
65
|
+
source_type: str | None = None
|
|
66
|
+
display_ref: str | None = None
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class SanitizedArtifact(BaseModel):
|
|
70
|
+
"""An artifact after sanitization/redaction processing.
|
|
71
|
+
|
|
72
|
+
Carries the same fields as RetrievedArtifact plus redaction audit metadata.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
note_id: Unique identifier from Memory API.
|
|
76
|
+
content: Sanitized text content (secrets/PII redacted).
|
|
77
|
+
created_at: ISO 8601 creation timestamp.
|
|
78
|
+
rank: Relevance rank.
|
|
79
|
+
source_type: Classified artifact type.
|
|
80
|
+
display_ref: Optional human-friendly label.
|
|
81
|
+
redaction_applied: Whether any redaction was performed.
|
|
82
|
+
redaction_types: List of redaction categories applied.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
model_config = ConfigDict(frozen=True)
|
|
86
|
+
|
|
87
|
+
note_id: str
|
|
88
|
+
content: str
|
|
89
|
+
created_at: str
|
|
90
|
+
rank: int
|
|
91
|
+
source_type: str | None = None
|
|
92
|
+
display_ref: str | None = None
|
|
93
|
+
redaction_applied: bool = False
|
|
94
|
+
redaction_types: list[str] = []
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class SanitizationResult(BaseModel):
|
|
98
|
+
"""Aggregate result of sanitization across all artifacts.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
artifacts: List of sanitized artifacts.
|
|
102
|
+
redaction_applied: Whether any redaction occurred across the set.
|
|
103
|
+
redaction_types: Union of all redaction categories applied.
|
|
104
|
+
confidence_score: Sanitization confidence (0-100).
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
model_config = ConfigDict(frozen=True)
|
|
108
|
+
|
|
109
|
+
artifacts: list[SanitizedArtifact]
|
|
110
|
+
redaction_applied: bool
|
|
111
|
+
redaction_types: list[str]
|
|
112
|
+
confidence_score: int
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class BudgetResult(BaseModel):
|
|
116
|
+
"""Result of context-budget packing.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
included: Artifacts selected for synthesis (within budget).
|
|
120
|
+
excluded: Artifacts cut due to budget constraints.
|
|
121
|
+
truncation_flags: Map of note_id -> was_truncated.
|
|
122
|
+
included_count: Number of included artifacts.
|
|
123
|
+
excluded_count: Number of excluded artifacts.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
model_config = ConfigDict(frozen=True)
|
|
127
|
+
|
|
128
|
+
included: list[SanitizedArtifact]
|
|
129
|
+
excluded: list[SanitizedArtifact]
|
|
130
|
+
truncation_flags: dict[str, bool]
|
|
131
|
+
included_count: int
|
|
132
|
+
excluded_count: int
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class GroundedCitation(BaseModel):
|
|
136
|
+
"""A citation validated against retrieved artifacts.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
note_id: The artifact note_id this citation references.
|
|
140
|
+
display_label: Human-friendly label (e.g. PR #101, Issue #42).
|
|
141
|
+
reference_type: Category of the reference.
|
|
142
|
+
grounding_status: Whether the citation is grounded or dropped.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
model_config = ConfigDict(frozen=True)
|
|
146
|
+
|
|
147
|
+
note_id: str
|
|
148
|
+
display_label: str
|
|
149
|
+
reference_type: ReferenceType
|
|
150
|
+
grounding_status: GroundingStatus
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class TimelineEvent(BaseModel):
|
|
154
|
+
"""A classified event in a chronological history timeline.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
timestamp: ISO 8601 timestamp of the event.
|
|
158
|
+
event_class: Classification (Introduction, Expansion, Bug Fix, etc.).
|
|
159
|
+
summary: Brief description of the event.
|
|
160
|
+
supporting_refs: List of note_ids supporting this event.
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
model_config = ConfigDict(frozen=True)
|
|
164
|
+
|
|
165
|
+
timestamp: str
|
|
166
|
+
event_class: str
|
|
167
|
+
summary: str
|
|
168
|
+
supporting_refs: list[str] = []
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class SynthesisRequest(BaseModel):
|
|
172
|
+
"""Request payload for LLM synthesis.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
mode: ask or history pipeline selector.
|
|
176
|
+
query: User's question or subject.
|
|
177
|
+
provider: LLM provider name.
|
|
178
|
+
model: LLM model identifier.
|
|
179
|
+
prompt_template_version: Version tag for prompt template.
|
|
180
|
+
artifacts: Packed, sanitized artifacts for context.
|
|
181
|
+
budget_meta: Optional budget metadata for diagnostics.
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
model_config = ConfigDict(frozen=True)
|
|
185
|
+
|
|
186
|
+
mode: QueryMode
|
|
187
|
+
query: str
|
|
188
|
+
provider: str
|
|
189
|
+
model: str
|
|
190
|
+
prompt_template_version: str
|
|
191
|
+
artifacts: list[SanitizedArtifact]
|
|
192
|
+
budget_meta: dict[str, object] | None = None
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class SynthesisResponse(BaseModel):
|
|
196
|
+
"""Response from LLM synthesis.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
answer_text: The synthesized answer or timeline narrative.
|
|
200
|
+
evidence_refs: Grounded citation references.
|
|
201
|
+
timeline_events: Classified timeline events (history mode).
|
|
202
|
+
warnings: Any warnings (truncation, partial grounding, etc.).
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
model_config = ConfigDict(frozen=True)
|
|
206
|
+
|
|
207
|
+
answer_text: str
|
|
208
|
+
evidence_refs: list[GroundedCitation] = []
|
|
209
|
+
timeline_events: list[TimelineEvent] = []
|
|
210
|
+
warnings: list[str] = []
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
class QueryResultEnvelope(BaseModel):
|
|
214
|
+
"""Final result envelope returned to CLI layer.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
mode: ask or history.
|
|
218
|
+
answer: Synthesized answer text (ask mode).
|
|
219
|
+
timeline: Chronological events (history mode).
|
|
220
|
+
citations: Grounded citations for evidence display.
|
|
221
|
+
fallback_used: Whether fallback was triggered.
|
|
222
|
+
warnings: Any warnings to display.
|
|
223
|
+
fallback_reason: Categorized reason if fallback was used.
|
|
224
|
+
"""
|
|
225
|
+
|
|
226
|
+
model_config = ConfigDict(frozen=True)
|
|
227
|
+
|
|
228
|
+
mode: QueryMode
|
|
229
|
+
answer: str | None = None
|
|
230
|
+
timeline: list[TimelineEvent] | None = None
|
|
231
|
+
citations: list[GroundedCitation]
|
|
232
|
+
fallback_used: bool
|
|
233
|
+
warnings: list[str] = []
|
|
234
|
+
fallback_reason: FallbackReason | None = None
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Parsers for reference extraction and normalization."""
|
|
2
|
+
|
|
3
|
+
from avos_cli.parsers.artifact_ref_extractor import (
|
|
4
|
+
ArtifactRef,
|
|
5
|
+
NoteRefs,
|
|
6
|
+
collect_all_refs,
|
|
7
|
+
extract_refs,
|
|
8
|
+
extract_refs_by_note,
|
|
9
|
+
extract_refs_from_hits,
|
|
10
|
+
)
|
|
11
|
+
from avos_cli.parsers.reference_parser import ReferenceParser
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"ArtifactRef",
|
|
15
|
+
"NoteRefs",
|
|
16
|
+
"ReferenceParser",
|
|
17
|
+
"collect_all_refs",
|
|
18
|
+
"extract_refs",
|
|
19
|
+
"extract_refs_by_note",
|
|
20
|
+
"extract_refs_from_hits",
|
|
21
|
+
]
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""Extract PR numbers and commit hashes from memory search hit content.
|
|
2
|
+
|
|
3
|
+
This module provides utilities to parse structured tags from artifact content
|
|
4
|
+
returned by the Avos Memory API. The tags follow the format established by
|
|
5
|
+
the ingest builders (pr_builder.py, commit_builder.py):
|
|
6
|
+
|
|
7
|
+
[pr: #42]
|
|
8
|
+
[hash: abc1234...]
|
|
9
|
+
[repo: org/repo]
|
|
10
|
+
|
|
11
|
+
Functions:
|
|
12
|
+
extract_refs: Extract refs from a single content string.
|
|
13
|
+
extract_refs_from_hits: Map extraction over a list of SearchHit objects.
|
|
14
|
+
collect_all_refs: Aggregate unique refs across all hits.
|
|
15
|
+
extract_refs_by_note: Extract refs grouped by note_id as unified string arrays.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
from typing import TYPE_CHECKING
|
|
22
|
+
|
|
23
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from avos_cli.models.api import SearchHit
|
|
27
|
+
|
|
28
|
+
# Compiled regex patterns for tag extraction (shared across the codebase)
|
|
29
|
+
_PR_RE = re.compile(r"\[pr:\s*#(\d+)\]", re.IGNORECASE)
|
|
30
|
+
_HASH_RE = re.compile(r"\[hash:\s*([a-f0-9]+)\]", re.IGNORECASE)
|
|
31
|
+
_REPO_RE = re.compile(r"\[repo:\s*([a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+)\]", re.IGNORECASE)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _build_references(pr_numbers: list[int], commit_hashes: list[str]) -> list[str]:
|
|
35
|
+
"""Build unified reference strings from PR numbers and commit hashes.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
pr_numbers: List of PR numbers.
|
|
39
|
+
commit_hashes: List of commit hashes.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List of formatted reference strings like ["pr #42", "commit abc1234"].
|
|
43
|
+
"""
|
|
44
|
+
refs: list[str] = []
|
|
45
|
+
for pr in pr_numbers:
|
|
46
|
+
refs.append(f"pr #{pr}")
|
|
47
|
+
for h in commit_hashes:
|
|
48
|
+
refs.append(f"commit {h}")
|
|
49
|
+
return refs
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ArtifactRef(BaseModel):
|
|
53
|
+
"""Structured references extracted from a memory artifact.
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
pr_numbers: List of unique PR numbers found in the content.
|
|
57
|
+
commit_hashes: List of unique commit hashes found in the content.
|
|
58
|
+
references: Unified list of reference strings (e.g., ["pr #42", "commit abc1234"]).
|
|
59
|
+
repo: Repository slug (owner/name) if found, else None.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
model_config = ConfigDict(frozen=True)
|
|
63
|
+
|
|
64
|
+
pr_numbers: list[int] = Field(default_factory=list)
|
|
65
|
+
commit_hashes: list[str] = Field(default_factory=list)
|
|
66
|
+
references: list[str] = Field(default_factory=list)
|
|
67
|
+
repo: str | None = None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class NoteRefs(BaseModel):
|
|
71
|
+
"""Per-note reference storage.
|
|
72
|
+
|
|
73
|
+
Attributes:
|
|
74
|
+
note_id: The note identifier from the memory artifact.
|
|
75
|
+
references: Unified list of reference strings (e.g., ["pr #42", "commit abc1234"]).
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
model_config = ConfigDict(frozen=True)
|
|
79
|
+
|
|
80
|
+
note_id: str
|
|
81
|
+
references: list[str] = Field(default_factory=list)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def extract_refs(content: str) -> ArtifactRef:
|
|
85
|
+
"""Extract PR numbers, commit hashes, and repo from content string.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
content: Raw text content from a memory artifact.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
ArtifactRef with extracted references (deduplicated).
|
|
92
|
+
"""
|
|
93
|
+
pr_matches = _PR_RE.findall(content)
|
|
94
|
+
pr_numbers = list(dict.fromkeys(int(m) for m in pr_matches))
|
|
95
|
+
|
|
96
|
+
hash_matches = _HASH_RE.findall(content)
|
|
97
|
+
commit_hashes = list(dict.fromkeys(hash_matches))
|
|
98
|
+
|
|
99
|
+
repo_match = _REPO_RE.search(content)
|
|
100
|
+
repo = repo_match.group(1) if repo_match else None
|
|
101
|
+
|
|
102
|
+
references = _build_references(pr_numbers, commit_hashes)
|
|
103
|
+
|
|
104
|
+
return ArtifactRef(
|
|
105
|
+
pr_numbers=pr_numbers,
|
|
106
|
+
commit_hashes=commit_hashes,
|
|
107
|
+
references=references,
|
|
108
|
+
repo=repo,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def extract_refs_from_hits(
|
|
113
|
+
hits: list[SearchHit],
|
|
114
|
+
) -> list[tuple[SearchHit, ArtifactRef]]:
|
|
115
|
+
"""Extract refs from each SearchHit, preserving hit association.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
hits: List of SearchHit objects from memory search.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
List of (SearchHit, ArtifactRef) tuples in the same order as input.
|
|
122
|
+
"""
|
|
123
|
+
return [(hit, extract_refs(hit.content)) for hit in hits]
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def collect_all_refs(hits: list[SearchHit]) -> ArtifactRef:
|
|
127
|
+
"""Aggregate unique refs across all search hits.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
hits: List of SearchHit objects from memory search.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Single ArtifactRef with deduplicated PRs and hashes from all hits.
|
|
134
|
+
Uses the first non-None repo found.
|
|
135
|
+
"""
|
|
136
|
+
all_prs: list[int] = []
|
|
137
|
+
all_hashes: list[str] = []
|
|
138
|
+
first_repo: str | None = None
|
|
139
|
+
|
|
140
|
+
for hit in hits:
|
|
141
|
+
ref = extract_refs(hit.content)
|
|
142
|
+
all_prs.extend(ref.pr_numbers)
|
|
143
|
+
all_hashes.extend(ref.commit_hashes)
|
|
144
|
+
if first_repo is None and ref.repo is not None:
|
|
145
|
+
first_repo = ref.repo
|
|
146
|
+
|
|
147
|
+
unique_prs = list(dict.fromkeys(all_prs))
|
|
148
|
+
unique_hashes = list(dict.fromkeys(all_hashes))
|
|
149
|
+
references = _build_references(unique_prs, unique_hashes)
|
|
150
|
+
|
|
151
|
+
return ArtifactRef(
|
|
152
|
+
pr_numbers=unique_prs,
|
|
153
|
+
commit_hashes=unique_hashes,
|
|
154
|
+
references=references,
|
|
155
|
+
repo=first_repo,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def extract_refs_by_note(hits: list[SearchHit]) -> list[NoteRefs]:
|
|
160
|
+
"""Extract refs grouped by note_id as unified string arrays.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
hits: List of SearchHit objects from memory search.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
List of NoteRefs, one per hit, with references as formatted strings.
|
|
167
|
+
Example: [NoteRefs(note_id="11", references=["pr #123", "commit qdsf"])]
|
|
168
|
+
"""
|
|
169
|
+
result: list[NoteRefs] = []
|
|
170
|
+
for hit in hits:
|
|
171
|
+
ref = extract_refs(hit.content)
|
|
172
|
+
result.append(NoteRefs(note_id=hit.note_id, references=ref.references))
|
|
173
|
+
return result
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""Reference parser for PR and commit identifiers.
|
|
2
|
+
|
|
3
|
+
Provides regex-based parsing of various PR and commit reference formats
|
|
4
|
+
without LLM interpretation. Fails fast on ambiguous inputs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from typing import ClassVar
|
|
11
|
+
|
|
12
|
+
from avos_cli.models.diff import DiffReferenceType, ParsedReference
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ReferenceParser:
|
|
16
|
+
"""Parses raw reference strings into structured ParsedReference objects.
|
|
17
|
+
|
|
18
|
+
Supports formats:
|
|
19
|
+
- PR: "PR #1245", "pr #1245", "PR#1245", "#1245", "org/repo#1245"
|
|
20
|
+
- Commit: "Commit 8c3a1b2", "commit abc123", bare SHA (7-40 hex chars)
|
|
21
|
+
- URLs: "https://github.com/org/repo/pull/123", ".../commit/abc123"
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
_PR_PATTERNS: ClassVar[list[re.Pattern[str]]] = [
|
|
25
|
+
# GitHub PR URL: https://github.com/org/repo/pull/123
|
|
26
|
+
re.compile(
|
|
27
|
+
r"(?:https?://)?github\.com/(?P<repo>[^/]+/[^/]+)/pull/(?P<num>\d+)",
|
|
28
|
+
re.IGNORECASE,
|
|
29
|
+
),
|
|
30
|
+
# org/repo#123 or github.com/org/repo#123
|
|
31
|
+
re.compile(
|
|
32
|
+
r"(?:github\.com/)?(?P<repo>[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+)#(?P<num>\d+)",
|
|
33
|
+
re.IGNORECASE,
|
|
34
|
+
),
|
|
35
|
+
# PR #123 or PR#123 or pr #123
|
|
36
|
+
re.compile(r"\bpr\s*#\s*(?P<num>\d+)", re.IGNORECASE),
|
|
37
|
+
# Bare #123 (assumes PR in context)
|
|
38
|
+
re.compile(r"^#(?P<num>\d+)(?:\s|$)"),
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
_COMMIT_PATTERNS: ClassVar[list[re.Pattern[str]]] = [
|
|
42
|
+
# GitHub commit URL: https://github.com/org/repo/commit/abc123
|
|
43
|
+
re.compile(
|
|
44
|
+
r"(?:https?://)?github\.com/(?P<repo>[^/]+/[^/]+)/commit/(?P<sha>[a-f0-9]{7,40})",
|
|
45
|
+
re.IGNORECASE,
|
|
46
|
+
),
|
|
47
|
+
# Commit abc123 or commit: abc123
|
|
48
|
+
re.compile(r"\bcommit:?\s*(?P<sha>[a-f0-9]{7,40})", re.IGNORECASE),
|
|
49
|
+
# Bare SHA (7-40 hex chars, must be whole token)
|
|
50
|
+
re.compile(r"^(?P<sha>[a-f0-9]{7,40})$", re.IGNORECASE),
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
def parse(self, raw: str, default_repo: str | None) -> ParsedReference | None:
|
|
54
|
+
"""Parse a single raw reference string.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
raw: The raw reference string to parse.
|
|
58
|
+
default_repo: Default repository slug if not specified in reference.
|
|
59
|
+
After ``avos connect``, use :func:`~avos_cli.config.manager.connected_repo_slug`.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
ParsedReference if successfully parsed, None otherwise.
|
|
63
|
+
"""
|
|
64
|
+
if not raw or not raw.strip():
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
text = raw.strip()
|
|
68
|
+
|
|
69
|
+
# Try PR patterns first
|
|
70
|
+
for pattern in self._PR_PATTERNS:
|
|
71
|
+
match = pattern.search(text)
|
|
72
|
+
if match:
|
|
73
|
+
num_str = match.group("num")
|
|
74
|
+
num = int(num_str)
|
|
75
|
+
if num <= 0:
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
repo = match.groupdict().get("repo") or default_repo
|
|
79
|
+
return ParsedReference(
|
|
80
|
+
reference_type=DiffReferenceType.PR,
|
|
81
|
+
raw_id=num_str,
|
|
82
|
+
repo_slug=repo,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Try commit patterns
|
|
86
|
+
for pattern in self._COMMIT_PATTERNS:
|
|
87
|
+
match = pattern.search(text)
|
|
88
|
+
if match:
|
|
89
|
+
sha = match.group("sha").lower()
|
|
90
|
+
repo = match.groupdict().get("repo") or default_repo
|
|
91
|
+
return ParsedReference(
|
|
92
|
+
reference_type=DiffReferenceType.COMMIT,
|
|
93
|
+
raw_id=sha,
|
|
94
|
+
repo_slug=repo,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
def parse_all(
|
|
100
|
+
self, raw_list: list[str], default_repo: str | None
|
|
101
|
+
) -> list[ParsedReference]:
|
|
102
|
+
"""Parse multiple raw reference strings.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
raw_list: List of raw reference strings to parse.
|
|
106
|
+
default_repo: Default repository slug if not specified.
|
|
107
|
+
After ``avos connect``, use :func:`~avos_cli.config.manager.connected_repo_slug`.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
List of successfully parsed references (invalid ones are skipped).
|
|
111
|
+
"""
|
|
112
|
+
results: list[ParsedReference] = []
|
|
113
|
+
for raw in raw_list:
|
|
114
|
+
ref = self.parse(raw, default_repo)
|
|
115
|
+
if ref is not None:
|
|
116
|
+
results.append(ref)
|
|
117
|
+
return results
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Shared service modules for AVOS CLI."""
|