git-aware-coding-agent 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. avos_cli/__init__.py +3 -0
  2. avos_cli/agents/avos_ask_agent.md +47 -0
  3. avos_cli/agents/avos_ask_agent_JSON_converter.md +78 -0
  4. avos_cli/agents/avos_hisotry_agent_JSON_converter.md +92 -0
  5. avos_cli/agents/avos_history_agent.md +58 -0
  6. avos_cli/agents/git_diff_agent.md +63 -0
  7. avos_cli/artifacts/__init__.py +17 -0
  8. avos_cli/artifacts/base.py +47 -0
  9. avos_cli/artifacts/commit_builder.py +35 -0
  10. avos_cli/artifacts/doc_builder.py +30 -0
  11. avos_cli/artifacts/issue_builder.py +37 -0
  12. avos_cli/artifacts/pr_builder.py +50 -0
  13. avos_cli/cli/__init__.py +1 -0
  14. avos_cli/cli/main.py +504 -0
  15. avos_cli/commands/__init__.py +1 -0
  16. avos_cli/commands/ask.py +541 -0
  17. avos_cli/commands/connect.py +363 -0
  18. avos_cli/commands/history.py +549 -0
  19. avos_cli/commands/hook_install.py +260 -0
  20. avos_cli/commands/hook_sync.py +231 -0
  21. avos_cli/commands/ingest.py +506 -0
  22. avos_cli/commands/ingest_pr.py +239 -0
  23. avos_cli/config/__init__.py +1 -0
  24. avos_cli/config/hash_store.py +93 -0
  25. avos_cli/config/lock.py +122 -0
  26. avos_cli/config/manager.py +180 -0
  27. avos_cli/config/state.py +90 -0
  28. avos_cli/exceptions.py +272 -0
  29. avos_cli/models/__init__.py +58 -0
  30. avos_cli/models/api.py +75 -0
  31. avos_cli/models/artifacts.py +99 -0
  32. avos_cli/models/config.py +56 -0
  33. avos_cli/models/diff.py +117 -0
  34. avos_cli/models/query.py +234 -0
  35. avos_cli/parsers/__init__.py +21 -0
  36. avos_cli/parsers/artifact_ref_extractor.py +173 -0
  37. avos_cli/parsers/reference_parser.py +117 -0
  38. avos_cli/services/__init__.py +1 -0
  39. avos_cli/services/chronology_service.py +68 -0
  40. avos_cli/services/citation_validator.py +134 -0
  41. avos_cli/services/context_budget_service.py +104 -0
  42. avos_cli/services/diff_resolver.py +398 -0
  43. avos_cli/services/diff_summary_service.py +141 -0
  44. avos_cli/services/git_client.py +351 -0
  45. avos_cli/services/github_client.py +443 -0
  46. avos_cli/services/llm_client.py +312 -0
  47. avos_cli/services/memory_client.py +323 -0
  48. avos_cli/services/query_fallback_formatter.py +108 -0
  49. avos_cli/services/reply_output_service.py +341 -0
  50. avos_cli/services/sanitization_service.py +218 -0
  51. avos_cli/utils/__init__.py +1 -0
  52. avos_cli/utils/dotenv_load.py +50 -0
  53. avos_cli/utils/hashing.py +22 -0
  54. avos_cli/utils/logger.py +77 -0
  55. avos_cli/utils/output.py +232 -0
  56. avos_cli/utils/sanitization_diagnostics.py +81 -0
  57. avos_cli/utils/time_helpers.py +56 -0
  58. git_aware_coding_agent-1.0.0.dist-info/METADATA +390 -0
  59. git_aware_coding_agent-1.0.0.dist-info/RECORD +62 -0
  60. git_aware_coding_agent-1.0.0.dist-info/WHEEL +4 -0
  61. git_aware_coding_agent-1.0.0.dist-info/entry_points.txt +2 -0
  62. git_aware_coding_agent-1.0.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,506 @@
1
+ """Ingest command orchestrator for AVOS CLI.
2
+
3
+ Implements the `avos ingest org/repo --since Nd` flow: fetches PRs,
4
+ issues, commits, and docs, builds artifacts, deduplicates via content
5
+ hash, and stores in Avos Memory. Supports partial failure (exit 3).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import glob as globmod
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ from avos_cli.artifacts.commit_builder import CommitBuilder
16
+ from avos_cli.artifacts.doc_builder import DocBuilder
17
+ from avos_cli.artifacts.issue_builder import IssueBuilder
18
+ from avos_cli.artifacts.pr_builder import PRThreadBuilder
19
+ from avos_cli.config.hash_store import IngestHashStore
20
+ from avos_cli.config.lock import IngestLockManager
21
+ from avos_cli.config.manager import load_config
22
+ from avos_cli.exceptions import (
23
+ AvosError,
24
+ ConfigurationNotInitializedError,
25
+ IngestLockError,
26
+ )
27
+ from avos_cli.models.artifacts import (
28
+ CommitArtifact,
29
+ DocArtifact,
30
+ IssueArtifact,
31
+ PRArtifact,
32
+ )
33
+ from avos_cli.services.git_client import GitClient
34
+ from avos_cli.services.github_client import GitHubClient
35
+ from avos_cli.services.memory_client import AvosMemoryClient
36
+ from avos_cli.utils.logger import get_logger
37
+ from avos_cli.utils.output import print_error, print_info, print_json, print_success, render_table
38
+ from avos_cli.utils.time_helpers import days_ago
39
+
40
+ _log = get_logger("commands.ingest")
41
+
42
+ _DOC_GLOBS = [
43
+ "README*",
44
+ "docs/**/*.md",
45
+ "adr/**/*.md",
46
+ "**/*ADR*.md",
47
+ ]
48
+
49
+
50
+ _EXIT_PRECEDENCE = {2: 4, 3: 3, 1: 2, 0: 1}
51
+
52
+
53
+ def resolve_exit_code(*codes: int) -> int:
54
+ """Return the highest-precedence exit code (2 > 3 > 1 > 0)."""
55
+ if not codes:
56
+ return 0
57
+ return max(codes, key=lambda c: _EXIT_PRECEDENCE.get(c, 0))
58
+
59
+
60
+ @dataclass
61
+ class IngestStageResult:
62
+ """Tracks counts for a single ingest stage.
63
+
64
+ Attributes:
65
+ processed: Total items attempted.
66
+ stored: Items successfully stored in Avos Memory.
67
+ skipped: Items skipped due to deduplication.
68
+ failed: Items that failed to store.
69
+ hard_failure: True if an upstream/external error caused total stage failure.
70
+ """
71
+
72
+ processed: int = 0
73
+ stored: int = 0
74
+ skipped: int = 0
75
+ failed: int = 0
76
+ hard_failure: bool = False
77
+
78
+ @property
79
+ def has_failures(self) -> bool:
80
+ return self.failed > 0
81
+
82
+ @property
83
+ def exit_code(self) -> int:
84
+ """Per-stage exit code: 2 if hard external, 3 if partial, else 0."""
85
+ if self.hard_failure:
86
+ return 2
87
+ if self.failed > 0:
88
+ return 3
89
+ return 0
90
+
91
+
92
+ class IngestOrchestrator:
93
+ """Orchestrates the `avos ingest` command.
94
+
95
+ Pipeline per stage: fetch -> build artifact -> check hash -> store.
96
+ Exit codes: 0=success, 1=precondition, 2=hard external, 3=partial.
97
+
98
+ Args:
99
+ memory_client: Avos Memory API client.
100
+ github_client: GitHub REST API client.
101
+ git_client: Local git operations wrapper.
102
+ hash_store: Content hash store for deduplication.
103
+ lock_manager: Ingest lock manager.
104
+ repo_root: Path to the repository root.
105
+ """
106
+
107
+ def __init__(
108
+ self,
109
+ memory_client: AvosMemoryClient,
110
+ github_client: GitHubClient,
111
+ git_client: GitClient,
112
+ hash_store: IngestHashStore,
113
+ lock_manager: IngestLockManager,
114
+ repo_root: Path,
115
+ ) -> None:
116
+ self._memory = memory_client
117
+ self._github = github_client
118
+ self._git = git_client
119
+ self._hash_store = hash_store
120
+ self._lock = lock_manager
121
+ self._repo_root = repo_root
122
+ self._pr_builder = PRThreadBuilder()
123
+ self._issue_builder = IssueBuilder()
124
+ self._commit_builder = CommitBuilder()
125
+ self._doc_builder = DocBuilder()
126
+
127
+ def run(self, repo_slug: str, since_days: int, json_output: bool = False) -> int:
128
+ """Execute the ingest flow.
129
+
130
+ Args:
131
+ repo_slug: Repository identifier in 'org/repo' format.
132
+ since_days: Number of days to look back.
133
+ json_output: If True, emit JSON output instead of human UI.
134
+
135
+ Returns:
136
+ Exit code: 0, 1, 2, or 3.
137
+ """
138
+ self._json_output = json_output
139
+
140
+ if not self._validate_slug(repo_slug):
141
+ self._emit_error("REPOSITORY_CONTEXT_ERROR", "Invalid repo slug. Expected 'org/repo'.")
142
+ return 1
143
+
144
+ owner, repo = repo_slug.split("/", 1)
145
+
146
+ try:
147
+ config = load_config(self._repo_root)
148
+ except ConfigurationNotInitializedError as e:
149
+ self._emit_error("CONFIG_NOT_INITIALIZED", str(e), hint="Run 'avos connect org/repo' first.")
150
+ return 1
151
+ except AvosError as e:
152
+ self._emit_error(e.code, str(e))
153
+ return 1
154
+
155
+ try:
156
+ self._lock.acquire()
157
+ except IngestLockError as e:
158
+ self._emit_error("INGEST_LOCK_CONFLICT", str(e))
159
+ return 1
160
+
161
+ try:
162
+ return self._run_pipeline(owner, repo, repo_slug, config.memory_id, since_days)
163
+ finally:
164
+ self._lock.release()
165
+
166
+ def _emit_error(
167
+ self, code: str, message: str, hint: str | None = None, retryable: bool = False
168
+ ) -> None:
169
+ """Emit error in JSON or human format based on mode."""
170
+ if self._json_output:
171
+ print_json(
172
+ success=False,
173
+ data=None,
174
+ error={"code": code, "message": message, "hint": hint, "retryable": retryable},
175
+ )
176
+ else:
177
+ print_error(f"[{code}] {message}")
178
+
179
+ def _run_pipeline(
180
+ self, owner: str, repo: str, repo_slug: str, memory_id: str, since_days: int
181
+ ) -> int:
182
+ """Run the 4-stage ingest pipeline inside the lock."""
183
+ since_date = days_ago(since_days).isoformat()
184
+ results: list[IngestStageResult] = []
185
+
186
+ if not self._json_output:
187
+ print_info(f"Ingesting {repo_slug} (last {since_days} days)")
188
+ print_info("[Stage 1/4: PRs]")
189
+ results.append(self._ingest_prs(owner, repo, since_date, memory_id))
190
+
191
+ if not self._json_output:
192
+ print_info("[Stage 2/4: Issues]")
193
+ results.append(self._ingest_issues(owner, repo, since_date, memory_id))
194
+
195
+ if not self._json_output:
196
+ print_info("[Stage 3/4: Commits]")
197
+ results.append(self._ingest_commits(repo_slug, since_date, memory_id))
198
+
199
+ if not self._json_output:
200
+ print_info("[Stage 4/4: Docs]")
201
+ results.append(self._ingest_docs(repo_slug, memory_id))
202
+
203
+ self._hash_store.save()
204
+ self._print_summary(results)
205
+
206
+ return resolve_exit_code(*(r.exit_code for r in results))
207
+
208
+ def _ingest_prs(
209
+ self, owner: str, repo: str, since_date: str, memory_id: str
210
+ ) -> IngestStageResult:
211
+ """Fetch PRs, build artifacts, dedupe, and store."""
212
+ result = IngestStageResult()
213
+ try:
214
+ pr_list = self._github.list_pull_requests(owner, repo, since_date=since_date)
215
+ except AvosError as e:
216
+ _log.error("Failed to fetch PR list: %s", e)
217
+ result.failed += 1
218
+ result.hard_failure = True
219
+ return result
220
+
221
+ total = len(pr_list)
222
+ for idx, pr_summary in enumerate(pr_list, 1):
223
+ result.processed += 1
224
+ print_info(f" PR {idx}/{total}: #{pr_summary.get('number', '?')}")
225
+ try:
226
+ pr_detail = self._github.get_pr_details(owner, repo, pr_summary["number"])
227
+ artifact = self._build_pr_artifact(repo, owner, pr_detail)
228
+ text = self._pr_builder.build(artifact)
229
+ content_hash = self._pr_builder.content_hash(artifact)
230
+
231
+ if self._hash_store.contains(content_hash):
232
+ result.skipped += 1
233
+ continue
234
+
235
+ self._memory.add_memory(memory_id=memory_id, content=text)
236
+ self._hash_store.add(content_hash, "pr", str(pr_summary["number"]))
237
+ result.stored += 1
238
+ except Exception as e:
239
+ _log.error("Failed to ingest PR #%s: %s", pr_summary.get("number"), e)
240
+ result.failed += 1
241
+
242
+ return result
243
+
244
+ def _ingest_issues(
245
+ self, owner: str, repo: str, since_date: str, memory_id: str
246
+ ) -> IngestStageResult:
247
+ """Fetch issues, build artifacts, dedupe, and store."""
248
+ result = IngestStageResult()
249
+ try:
250
+ issue_list = self._github.list_issues(owner, repo, since_date=since_date)
251
+ except AvosError as e:
252
+ _log.error("Failed to fetch issue list: %s", e)
253
+ result.failed += 1
254
+ result.hard_failure = True
255
+ return result
256
+
257
+ total = len(issue_list)
258
+ for idx, issue_summary in enumerate(issue_list, 1):
259
+ result.processed += 1
260
+ print_info(f" Issue {idx}/{total}: #{issue_summary.get('number', '?')}")
261
+ try:
262
+ issue_detail = self._github.get_issue_details(
263
+ owner, repo, issue_summary["number"]
264
+ )
265
+ artifact = self._build_issue_artifact(repo, owner, issue_detail)
266
+ text = self._issue_builder.build(artifact)
267
+ content_hash = self._issue_builder.content_hash(artifact)
268
+
269
+ if self._hash_store.contains(content_hash):
270
+ result.skipped += 1
271
+ continue
272
+
273
+ self._memory.add_memory(memory_id=memory_id, content=text)
274
+ self._hash_store.add(content_hash, "issue", str(issue_summary["number"]))
275
+ result.stored += 1
276
+ except Exception as e:
277
+ _log.error("Failed to ingest issue #%s: %s", issue_summary.get("number"), e)
278
+ result.failed += 1
279
+
280
+ return result
281
+
282
+ def _ingest_commits(
283
+ self, repo_slug: str, since_date: str, memory_id: str
284
+ ) -> IngestStageResult:
285
+ """Fetch commits from local git, build artifacts, dedupe, and store."""
286
+ result = IngestStageResult()
287
+ try:
288
+ commits = self._git.commit_log(self._repo_root, since_date=since_date)
289
+ except AvosError as e:
290
+ _log.error("Failed to fetch commit log: %s", e)
291
+ result.failed += 1
292
+ result.hard_failure = True
293
+ return result
294
+
295
+ total = len(commits)
296
+ for idx, commit_data in enumerate(commits, 1):
297
+ result.processed += 1
298
+ short_hash = str(commit_data.get("hash", "?"))[:8]
299
+ print_info(f" Commit {idx}/{total}: {short_hash}")
300
+ try:
301
+ artifact = CommitArtifact(
302
+ repo=repo_slug,
303
+ hash=commit_data["hash"],
304
+ message=commit_data["message"],
305
+ author=commit_data["author"],
306
+ date=commit_data["date"],
307
+ )
308
+ text = self._commit_builder.build(artifact)
309
+ content_hash = self._commit_builder.content_hash(artifact)
310
+
311
+ if self._hash_store.contains(content_hash):
312
+ result.skipped += 1
313
+ continue
314
+
315
+ self._memory.add_memory(memory_id=memory_id, content=text)
316
+ self._hash_store.add(content_hash, "commit", commit_data["hash"])
317
+ result.stored += 1
318
+ except Exception as e:
319
+ _log.error("Failed to ingest commit %s: %s", commit_data.get("hash"), e)
320
+ result.failed += 1
321
+
322
+ return result
323
+
324
+ def _ingest_docs(self, repo_slug: str, memory_id: str) -> IngestStageResult:
325
+ """Discover and ingest local documentation files."""
326
+ result = IngestStageResult()
327
+ doc_paths = self._discover_docs()
328
+
329
+ total = len(doc_paths)
330
+ for idx, doc_path in enumerate(doc_paths, 1):
331
+ result.processed += 1
332
+ print_info(f" Doc {idx}/{total}: {doc_path.name}")
333
+ try:
334
+ content = doc_path.read_text(encoding="utf-8")
335
+ rel_path = str(doc_path.relative_to(self._repo_root))
336
+ content_type = self._classify_doc(rel_path)
337
+
338
+ artifact = DocArtifact(
339
+ repo=repo_slug,
340
+ path=rel_path,
341
+ content_type=content_type,
342
+ content=content,
343
+ )
344
+ text = self._doc_builder.build(artifact)
345
+ content_hash = self._doc_builder.content_hash(artifact)
346
+
347
+ if self._hash_store.contains(content_hash):
348
+ result.skipped += 1
349
+ continue
350
+
351
+ self._memory.add_memory(memory_id=memory_id, content=text)
352
+ self._hash_store.add(content_hash, "doc", rel_path)
353
+ result.stored += 1
354
+ except Exception as e:
355
+ _log.error("Failed to ingest doc %s: %s", doc_path, e)
356
+ result.failed += 1
357
+
358
+ return result
359
+
360
+ def _discover_docs(self) -> list[Path]:
361
+ """Find documentation files using hardcoded glob patterns."""
362
+ found: set[Path] = set()
363
+ for pattern in _DOC_GLOBS:
364
+ matches = globmod.glob(str(self._repo_root / pattern), recursive=True)
365
+ for m in matches:
366
+ p = Path(m)
367
+ if p.is_file() and not self._is_in_avos_dir(p):
368
+ found.add(p)
369
+ return sorted(found)
370
+
371
+ def _is_in_avos_dir(self, path: Path) -> bool:
372
+ """Check if a path is inside the .avos directory."""
373
+ try:
374
+ path.relative_to(self._repo_root / ".avos")
375
+ return True
376
+ except ValueError:
377
+ return False
378
+
379
+ def _build_pr_artifact(
380
+ self, repo: str, owner: str, pr_detail: dict[str, Any]
381
+ ) -> PRArtifact:
382
+ """Transform GitHub PR detail dict into a PRArtifact."""
383
+ files = [f["filename"] for f in pr_detail.get("files", [])]
384
+ comments = pr_detail.get("comments", [])
385
+ reviews = pr_detail.get("reviews", [])
386
+ discussion_parts: list[str] = []
387
+ for c in comments:
388
+ user = c.get("user", {}).get("login", "unknown")
389
+ discussion_parts.append(f"{user}: {c.get('body', '')}")
390
+ for r in reviews:
391
+ user = r.get("user", {}).get("login", "unknown")
392
+ discussion_parts.append(f"{user} ({r.get('state', '')}): {r.get('body', '')}")
393
+
394
+ return PRArtifact(
395
+ repo=f"{owner}/{repo}",
396
+ pr_number=pr_detail["number"],
397
+ title=pr_detail.get("title", ""),
398
+ author=pr_detail.get("user", {}).get("login", "unknown"),
399
+ merged_date=pr_detail.get("merged_at"),
400
+ files=files,
401
+ description=pr_detail.get("body"),
402
+ discussion="\n".join(discussion_parts) if discussion_parts else None,
403
+ )
404
+
405
+ def _build_issue_artifact(
406
+ self, repo: str, owner: str, issue_data: dict[str, Any]
407
+ ) -> IssueArtifact:
408
+ """Transform GitHub issue dict into an IssueArtifact."""
409
+ raw_labels = issue_data.get("labels", [])
410
+ labels = (
411
+ [lbl["name"] for lbl in raw_labels if isinstance(lbl, dict) and "name" in lbl]
412
+ if isinstance(raw_labels, list)
413
+ else []
414
+ )
415
+ raw_comments = issue_data.get("comments", [])
416
+ comments = (
417
+ [
418
+ f"{c.get('user', {}).get('login', 'unknown')}: {c.get('body', '')}"
419
+ for c in raw_comments
420
+ if isinstance(c, dict)
421
+ ]
422
+ if isinstance(raw_comments, list)
423
+ else []
424
+ )
425
+ return IssueArtifact(
426
+ repo=f"{owner}/{repo}",
427
+ issue_number=issue_data["number"],
428
+ title=issue_data.get("title", ""),
429
+ labels=labels,
430
+ body=issue_data.get("body"),
431
+ comments=comments,
432
+ )
433
+
434
+ @staticmethod
435
+ def _classify_doc(rel_path: str) -> str:
436
+ """Classify a document by its path."""
437
+ lower = rel_path.lower()
438
+ if "readme" in lower:
439
+ return "readme"
440
+ if "adr" in lower:
441
+ return "adr"
442
+ if "design" in lower:
443
+ return "design_doc"
444
+ return "documentation"
445
+
446
+ @staticmethod
447
+ def _validate_slug(slug: str) -> bool:
448
+ if not slug or "/" not in slug:
449
+ return False
450
+ parts = slug.split("/", 1)
451
+ return bool(parts[0]) and bool(parts[1])
452
+
453
+ def _print_summary(self, results: list[IngestStageResult]) -> None:
454
+ """Print a summary of all ingest stages as a Rich table or JSON."""
455
+ stage_names = ["PRs", "Issues", "Commits", "Docs"]
456
+ total_stored = 0
457
+ total_skipped = 0
458
+ total_failed = 0
459
+
460
+ stage_data = {}
461
+ rows: list[list[str]] = []
462
+ for name, r in zip(stage_names, results, strict=True):
463
+ rows.append([name, str(r.stored), str(r.skipped), str(r.failed)])
464
+ stage_data[name.lower()] = {"stored": r.stored, "skipped": r.skipped, "failed": r.failed}
465
+ total_stored += r.stored
466
+ total_skipped += r.skipped
467
+ total_failed += r.failed
468
+
469
+ if self._json_output:
470
+ print_json(
471
+ success=total_failed == 0,
472
+ data={
473
+ "prs_ingested": stage_data.get("prs", {}).get("stored", 0),
474
+ "issues_ingested": stage_data.get("issues", {}).get("stored", 0),
475
+ "commits_ingested": stage_data.get("commits", {}).get("stored", 0),
476
+ "docs_ingested": stage_data.get("docs", {}).get("stored", 0),
477
+ "skipped_duplicates": total_skipped,
478
+ "failed": total_failed,
479
+ "stages": stage_data,
480
+ },
481
+ error={"code": "PARTIAL_FAILURE", "message": f"{total_failed} items failed"} if total_failed > 0 else None,
482
+ )
483
+ return
484
+
485
+ if total_failed > 0:
486
+ title = (
487
+ f"Ingest Completed with Errors: "
488
+ f"{total_stored} stored, {total_skipped} skipped, {total_failed} failed"
489
+ )
490
+ else:
491
+ title = f"Ingest Complete: {total_stored} stored, {total_skipped} skipped"
492
+
493
+ render_table(
494
+ title,
495
+ [("Stage", "bold"), ("Stored", "success"), ("Skipped", "dim"), ("Failed", "error")],
496
+ rows,
497
+ )
498
+
499
+ if total_failed > 0:
500
+ print_error(
501
+ f"Total: {total_stored} stored, {total_skipped} skipped, {total_failed} failed"
502
+ )
503
+ else:
504
+ print_success(
505
+ f"Total: {total_stored} stored, {total_skipped} skipped"
506
+ )