acc-context-compaction 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. acc_context_compaction-0.1.0/PKG-INFO +20 -0
  2. acc_context_compaction-0.1.0/README.md +17 -0
  3. acc_context_compaction-0.1.0/acc/__init__.py +1 -0
  4. acc_context_compaction-0.1.0/acc/api/__init__.py +1 -0
  5. acc_context_compaction-0.1.0/acc/api/routes.py +13 -0
  6. acc_context_compaction-0.1.0/acc/compaction/__init__.py +1 -0
  7. acc_context_compaction-0.1.0/acc/compaction/_formatter_deprecated.py +43 -0
  8. acc_context_compaction-0.1.0/acc/compaction/_parsers_deprecated.py +53 -0
  9. acc_context_compaction-0.1.0/acc/compaction/dedup_cache.py +117 -0
  10. acc_context_compaction-0.1.0/acc/compaction/parsers/__init__.py +58 -0
  11. acc_context_compaction-0.1.0/acc/compaction/parsers/base.py +46 -0
  12. acc_context_compaction-0.1.0/acc/compaction/parsers/build_parser.py +122 -0
  13. acc_context_compaction-0.1.0/acc/compaction/parsers/compiler_parser.py +141 -0
  14. acc_context_compaction-0.1.0/acc/compaction/parsers/git_diff_parser.py +291 -0
  15. acc_context_compaction-0.1.0/acc/compaction/parsers/linter_parser.py +187 -0
  16. acc_context_compaction-0.1.0/acc/compaction/parsers/pytest_parser.py +114 -0
  17. acc_context_compaction-0.1.0/acc/compaction/proxy.py +21 -0
  18. acc_context_compaction-0.1.0/acc/compaction/slicer.py +357 -0
  19. acc_context_compaction-0.1.0/acc/core/__init__.py +1 -0
  20. acc_context_compaction-0.1.0/acc/core/config.py +11 -0
  21. acc_context_compaction-0.1.0/acc/core/llm.py +63 -0
  22. acc_context_compaction-0.1.0/acc/core/logger.py +31 -0
  23. acc_context_compaction-0.1.0/acc/evals/harness.py +98 -0
  24. acc_context_compaction-0.1.0/acc/filters/__init__.py +2 -0
  25. acc_context_compaction-0.1.0/acc/filters/dedup.py +30 -0
  26. acc_context_compaction-0.1.0/acc/filters/head_tail.py +15 -0
  27. acc_context_compaction-0.1.0/acc/filters/noise.py +47 -0
  28. acc_context_compaction-0.1.0/acc/filters/pipeline.py +60 -0
  29. acc_context_compaction-0.1.0/acc/filters/profile_manager.py +21 -0
  30. acc_context_compaction-0.1.0/acc/filters/strip_ansi.py +7 -0
  31. acc_context_compaction-0.1.0/acc/mcp/__init__.py +1 -0
  32. acc_context_compaction-0.1.0/acc/mcp/server.py +122 -0
  33. acc_context_compaction-0.1.0/acc/mcp/tools_cli.py +13 -0
  34. acc_context_compaction-0.1.0/acc/mcp/tools_compaction.py +44 -0
  35. acc_context_compaction-0.1.0/acc/mcp/tools_memory.py +34 -0
  36. acc_context_compaction-0.1.0/acc/mcp/tools_repo.py +8 -0
  37. acc_context_compaction-0.1.0/acc/memory/db.py +25 -0
  38. acc_context_compaction-0.1.0/acc/memory/models.py +24 -0
  39. acc_context_compaction-0.1.0/acc/memory/repositories.py +103 -0
  40. acc_context_compaction-0.1.0/acc/repo/analyzer.py +69 -0
  41. acc_context_compaction-0.1.0/acc/repo/compressor.py +70 -0
  42. acc_context_compaction-0.1.0/acc/repo/ranker.py +23 -0
  43. acc_context_compaction-0.1.0/acc/schemas/__init__.py +1 -0
  44. acc_context_compaction-0.1.0/acc/schemas/common.py +5 -0
  45. acc_context_compaction-0.1.0/acc/schemas/compaction.py +10 -0
  46. acc_context_compaction-0.1.0/acc/schemas/memory.py +20 -0
  47. acc_context_compaction-0.1.0/acc/services/__init__.py +1 -0
  48. acc_context_compaction-0.1.0/acc/services/compaction_service.py +66 -0
  49. acc_context_compaction-0.1.0/acc/services/memory_service.py +11 -0
  50. acc_context_compaction-0.1.0/acc/structured/conversation.py +113 -0
  51. acc_context_compaction-0.1.0/acc/structured/json_minifier.py +33 -0
  52. acc_context_compaction-0.1.0/acc/structured/python_ast.py +63 -0
  53. acc_context_compaction-0.1.0/acc_context_compaction.egg-info/PKG-INFO +20 -0
  54. acc_context_compaction-0.1.0/acc_context_compaction.egg-info/SOURCES.txt +69 -0
  55. acc_context_compaction-0.1.0/acc_context_compaction.egg-info/dependency_links.txt +1 -0
  56. acc_context_compaction-0.1.0/acc_context_compaction.egg-info/entry_points.txt +3 -0
  57. acc_context_compaction-0.1.0/acc_context_compaction.egg-info/requires.txt +17 -0
  58. acc_context_compaction-0.1.0/acc_context_compaction.egg-info/top_level.txt +1 -0
  59. acc_context_compaction-0.1.0/pyproject.toml +33 -0
  60. acc_context_compaction-0.1.0/setup.cfg +4 -0
  61. acc_context_compaction-0.1.0/tests/test_conversation.py +27 -0
  62. acc_context_compaction-0.1.0/tests/test_dedup_cache.py +152 -0
  63. acc_context_compaction-0.1.0/tests/test_failsafe.py +34 -0
  64. acc_context_compaction-0.1.0/tests/test_filters.py +54 -0
  65. acc_context_compaction-0.1.0/tests/test_memory.py +34 -0
  66. acc_context_compaction-0.1.0/tests/test_parser_plugins.py +243 -0
  67. acc_context_compaction-0.1.0/tests/test_parsers.py +13 -0
  68. acc_context_compaction-0.1.0/tests/test_repo.py +66 -0
  69. acc_context_compaction-0.1.0/tests/test_slicer.py +129 -0
  70. acc_context_compaction-0.1.0/tests/test_structured.py +43 -0
  71. acc_context_compaction-0.1.0/tests/test_temporal.py +59 -0
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.4
2
+ Name: acc-context-compaction
3
+ Version: 0.1.0
4
+ Summary: Automatically Context Compaction framework
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: mcp
7
+ Requires-Dist: typer
8
+ Requires-Dist: fastapi
9
+ Requires-Dist: sqlmodel
10
+ Requires-Dist: alembic
11
+ Requires-Dist: pydantic-settings
12
+ Requires-Dist: httpx
13
+ Requires-Dist: tiktoken
14
+ Requires-Dist: pyyaml
15
+ Provides-Extra: slicer
16
+ Requires-Dist: tree-sitter==0.21.3; extra == "slicer"
17
+ Requires-Dist: tree-sitter-languages>=1.10; extra == "slicer"
18
+ Provides-Extra: dev
19
+ Requires-Dist: pytest; extra == "dev"
20
+ Requires-Dist: pytest-asyncio; extra == "dev"
@@ -0,0 +1,17 @@
1
+ # ACC - Automatically Context Compaction
2
+
3
+ ## Project Overview
4
+ ACC is a context compaction framework designed to drastically reduce token consumption and improve context retention for AI IDEs and agents. It contains three main modules:
5
+
6
+ 1. **`acc_cli`**: A Python-based CLI proxy that intelligently filters, deduplicates, and compresses verbose terminal outputs (such as massive git logs or test runner failures) before sending them to the LLM.
7
+ 2. **`acc_memory`**: A FastAPI and SQLAlchemy-powered memory backend for durable, cross-session context retention. It extracts semantic facts from conversations and stores them.
8
+ 3. **`acc_mcp`**: A standard Model Context Protocol (MCP) server that seamlessly exposes both the CLI proxy and the memory backend tools to modern IDEs like Cursor and Codex.
9
+
10
+ ## Key Features
11
+ - **Token Efficiency:** Drops terminal output token usage by 50-80% using deterministic heuristic filtering without relying on expensive LLM summaries.
12
+ - **Tee Failsafe Strategy:** When logs are aggressively truncated, the full raw output is safely dumped to a local temporary file, and a footnote is provided so the AI can retrieve the rest of the file if needed.
13
+ - **Fact-Based Memory:** Converts passive conversational history into an atomic, queryable knowledge graph to prevent context amnesia across large repositories.
14
+ - **Smart Git Parsers:** Contains built-in interceptors for `git status`, `git diff`, and `git log` to strip out noise and condense the output format specifically for LLM ingestion.
15
+
16
+ ## Architecture & Integration
17
+ ACC is designed as a standalone toolchain. Once the MCP server is mounted in your IDE, the AI is instructed (via system prompts) to execute all shell commands through the `acc.cli_run` tool rather than running raw terminal processes.
@@ -0,0 +1 @@
1
+ # Empty __init__.py
@@ -0,0 +1 @@
1
+ # Empty __init__.py
@@ -0,0 +1,13 @@
1
+ from fastapi import FastAPI
2
+ from acc.schemas.memory import MemoryCreate, MemorySearch
3
+ from acc.services.memory_service import save_memory, search_memory
4
+
5
+ app = FastAPI(title="ACC API")
6
+
7
+ @app.post("/memory/save")
8
+ def api_memory_save(data: MemoryCreate):
9
+ return save_memory(data)
10
+
11
+ @app.post("/memory/search")
12
+ def api_memory_search(data: MemorySearch):
13
+ return search_memory(data)
@@ -0,0 +1 @@
1
+ # Empty __init__.py
@@ -0,0 +1,43 @@
1
+ from typing import List, Dict
2
+
3
+ def format_pytest(parsed: Dict, raw_lines: List[str]) -> str:
4
+ lines = []
5
+ if parsed.get("summary"):
6
+ lines.append(f"PYTEST SUMMARY: {parsed['summary']}")
7
+ if parsed["failures"]:
8
+ lines.append("FAILING TESTS:")
9
+ for f in parsed["failures"][:20]:
10
+ lines.append(f"- {f['file']}:: {f['test']} — {f['reason']}")
11
+ else:
12
+ lines.append("No explicit failing tests detected in parsed output.")
13
+ return "\n".join(lines)
14
+
15
+ def format_git_status(parsed: Dict, raw_lines: List[str]) -> str:
16
+ lines = ["GIT STATUS SUMMARY:"]
17
+ if parsed["modified"]:
18
+ lines.append("Modified Files:")
19
+ lines.extend([f" {f}" for f in parsed["modified"][:20]])
20
+ if parsed["untracked"]:
21
+ lines.append("Untracked Files:")
22
+ lines.extend([f" {f}" for f in parsed["untracked"][:20]])
23
+ if not parsed["modified"] and not parsed["untracked"]:
24
+ lines.append("Working tree clean.")
25
+ return "\n".join(lines)
26
+
27
+ def format_git_log(parsed: Dict, raw_lines: List[str]) -> str:
28
+ lines = ["GIT LOG SUMMARY:"]
29
+ for c in parsed["commits"][:20]:
30
+ lines.append(f"- {c['hash']} {c['message']}")
31
+ return "\n".join(lines)
32
+
33
+ def format_git_diff(parsed: Dict, raw_lines: List[str]) -> str:
34
+ lines = ["GIT DIFF SUMMARY:"]
35
+ if parsed["files_changed"]:
36
+ lines.append("Files Changed:")
37
+ for f in parsed["files_changed"][:10]:
38
+ lines.append(f" {f.replace('diff --git a/', '').replace(' b/', ' -> ')}")
39
+ if parsed["hunks"]:
40
+ lines.append("Hunks modified:")
41
+ for h in parsed["hunks"][:10]:
42
+ lines.append(f" {h}")
43
+ return "\n".join(lines)
@@ -0,0 +1,53 @@
1
+ import re
2
+ from typing import List, Dict
3
+
4
+ PYTEST_FAIL_RE = re.compile(r"^FAILED\s+(.+?)::(.+?)\s+\[([^\]]+)\]")
5
+
6
+ def parse_pytest(lines: List[str]) -> Dict:
7
+ failures = []
8
+ summary_line = None
9
+ for line in lines:
10
+ if " failed," in line and " passed" in line:
11
+ summary_line = line.strip()
12
+ m = PYTEST_FAIL_RE.match(line)
13
+ if m:
14
+ file_path, test_name, reason = m.groups()
15
+ failures.append({
16
+ "file": file_path,
17
+ "test": test_name,
18
+ "reason": reason,
19
+ })
20
+ return {"summary": summary_line, "failures": failures}
21
+
22
+ def parse_git_status(lines: List[str]) -> Dict:
23
+ # A simple parser for git status -s
24
+ modified = []
25
+ untracked = []
26
+ for line in lines:
27
+ if len(line) < 3: continue
28
+ status = line[:2]
29
+ file_path = line[3:].strip()
30
+ if "??" in status:
31
+ untracked.append(file_path)
32
+ else:
33
+ modified.append(f"{status.strip()} {file_path}")
34
+ return {"modified": modified, "untracked": untracked}
35
+
36
+ def parse_git_log(lines: List[str]) -> Dict:
37
+ commits = []
38
+ for line in lines:
39
+ parts = line.split(" ", 1)
40
+ if len(parts) == 2 and len(parts[0]) >= 7:
41
+ commits.append({"hash": parts[0], "message": parts[1].strip()})
42
+ return {"commits": commits}
43
+
44
+ def parse_git_diff(lines: List[str]) -> Dict:
45
+ # Extract just the files that changed and the hunk headers
46
+ files_changed = []
47
+ hunks = []
48
+ for line in lines:
49
+ if line.startswith("diff --git"):
50
+ files_changed.append(line.strip())
51
+ elif line.startswith("@@ "):
52
+ hunks.append(line.strip())
53
+ return {"files_changed": files_changed, "hunks": hunks}
@@ -0,0 +1,117 @@
1
+ """
2
+ Session-scoped deduplication cache.
3
+
4
+ Prevents re-processing identical outputs across multiple MCP tool calls
5
+ within the same server session. Uses O(1) fingerprinting based on
6
+ (byte_length, hash(first_256_chars), hash(last_256_chars)).
7
+
8
+ Lifetime: process-scoped. Dies with the MCP server process.
9
+ Never persisted to disk — stale cache is worse than cache miss.
10
+ """
11
+
12
+ from dataclasses import dataclass
13
+ from typing import Optional
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class Fingerprint:
18
+ """Immutable fingerprint for output deduplication."""
19
+ byte_length: int
20
+ prefix_hash: int # hash of first 256 chars
21
+ suffix_hash: int # hash of last 256 chars
22
+
23
+
24
+ class DedupCache:
25
+ """
26
+ Session-scoped dedup cache. Lives for the lifetime of the MCP server process.
27
+
28
+ Usage:
29
+ cache = get_session_cache()
30
+ cache.next_turn() # call at start of each tool invocation
31
+ suppressed = cache.check(raw_output)
32
+ if suppressed:
33
+ return suppressed # identical output seen before
34
+ """
35
+
36
+ def __init__(self):
37
+ self._cache: dict[Fingerprint, int] = {} # fingerprint → turn number
38
+ self._turn: int = 0
39
+
40
+ def next_turn(self):
41
+ """Increment turn counter. Call at the start of each tool invocation."""
42
+ self._turn += 1
43
+
44
+ @property
45
+ def current_turn(self) -> int:
46
+ return self._turn
47
+
48
+ def check(self, raw: str) -> Optional[str]:
49
+ """
50
+ Check if this output was seen before in this session.
51
+
52
+ Returns a suppression message if output was seen before, None otherwise.
53
+ On first seeing an output, it is fingerprinted and cached.
54
+ """
55
+ if not raw or not raw.strip():
56
+ return None
57
+
58
+ fp = self._fingerprint(raw)
59
+ if fp in self._cache:
60
+ prev_turn = self._cache[fp]
61
+ return (
62
+ f"[Output identical to turn #{prev_turn} — "
63
+ f"suppressed ({fp.byte_length:,} bytes)]"
64
+ )
65
+ self._cache[fp] = self._turn
66
+ return None
67
+
68
+ def check_file(self, file_path: str, size: int, mtime_ns: int) -> Optional[str]:
69
+ """
70
+ Check if a file has been read before based on stat metadata.
71
+ O(1) — does not require reading the file contents.
72
+
73
+ Returns a suppression message if the file was read with
74
+ identical size and mtime, None otherwise.
75
+ """
76
+ # Use a synthetic fingerprint from file metadata
77
+ fp = Fingerprint(
78
+ byte_length=size,
79
+ prefix_hash=hash(file_path),
80
+ suffix_hash=hash(mtime_ns),
81
+ )
82
+ if fp in self._cache:
83
+ prev_turn = self._cache[fp]
84
+ return (
85
+ f"[File unchanged since turn #{prev_turn} — "
86
+ f"suppressed ({size:,} bytes)]"
87
+ )
88
+ self._cache[fp] = self._turn
89
+ return None
90
+
91
+ def _fingerprint(self, text: str) -> Fingerprint:
92
+ """Create an O(1) fingerprint from text content."""
93
+ return Fingerprint(
94
+ byte_length=len(text),
95
+ prefix_hash=hash(text[:256]),
96
+ suffix_hash=hash(text[-256:]) if len(text) > 256 else hash(text),
97
+ )
98
+
99
+ def clear(self):
100
+ """Reset the cache. Called on session end."""
101
+ self._cache.clear()
102
+ self._turn = 0
103
+
104
+ @property
105
+ def size(self) -> int:
106
+ """Number of entries in the cache."""
107
+ return len(self._cache)
108
+
109
+
110
+ # Module-level singleton — lives for the MCP server process lifetime.
111
+ # All MCP tool calls share this instance.
112
+ _session_cache = DedupCache()
113
+
114
+
115
+ def get_session_cache() -> DedupCache:
116
+ """Get the global session-scoped dedup cache."""
117
+ return _session_cache
@@ -0,0 +1,58 @@
1
+ """
2
+ Auto-discovery module for parser plugins.
3
+
4
+ Scans this directory for modules containing BaseParser subclasses,
5
+ instantiates them, and builds a registry keyed by tool name.
6
+ """
7
+
8
+ import importlib
9
+ import pkgutil
10
+ from pathlib import Path
11
+ from typing import Optional
12
+
13
+ from acc.compaction.parsers.base import BaseParser
14
+
15
+ _registry: list[BaseParser] = []
16
+ _initialized = False
17
+
18
+
19
+ def _discover_parsers():
20
+ """Import all modules in this package and collect BaseParser subclasses."""
21
+ global _initialized
22
+ if _initialized:
23
+ return
24
+
25
+ package_dir = Path(__file__).parent
26
+ for finder, module_name, is_pkg in pkgutil.iter_modules([str(package_dir)]):
27
+ if module_name in ("base", "__init__"):
28
+ continue
29
+ module = importlib.import_module(f"acc.compaction.parsers.{module_name}")
30
+ for attr_name in dir(module):
31
+ attr = getattr(module, attr_name)
32
+ if (
33
+ isinstance(attr, type)
34
+ and issubclass(attr, BaseParser)
35
+ and attr is not BaseParser
36
+ and attr.tool_names # skip abstract or empty
37
+ ):
38
+ _registry.append(attr())
39
+
40
+ _initialized = True
41
+
42
+
43
+ def get_parser(command: str) -> Optional[BaseParser]:
44
+ """
45
+ Find a parser that can handle the given command name.
46
+ Returns None if no parser matches.
47
+ """
48
+ _discover_parsers()
49
+ for parser in _registry:
50
+ if parser.can_handle(command):
51
+ return parser
52
+ return None
53
+
54
+
55
+ def get_all_parsers() -> list[BaseParser]:
56
+ """Return all registered parsers. Useful for testing."""
57
+ _discover_parsers()
58
+ return list(_registry)
@@ -0,0 +1,46 @@
1
+ """Base class for all ACC output parsers."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from acc.core.logger import log
5
+
6
+
7
+ class BaseParser(ABC):
8
+ """
9
+ Base class for tool-specific output parsers.
10
+
11
+ Subclasses declare which commands they handle via `tool_names`,
12
+ implement `parse()` for structured compression, and inherit
13
+ automatic fallback-to-raw on any failure.
14
+ """
15
+
16
+ tool_names: list[str] = []
17
+
18
+ def can_handle(self, command: str) -> bool:
19
+ """Check if this parser handles the given command name."""
20
+ cmd_lower = command.lower()
21
+ if cmd_lower.endswith(".exe"):
22
+ cmd_lower = cmd_lower[:-4]
23
+ # Strip path separators (e.g. /usr/bin/gcc -> gcc)
24
+ cmd_lower = cmd_lower.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
25
+ return cmd_lower in self.tool_names
26
+
27
+ @abstractmethod
28
+ def parse(self, raw_output: str, **kwargs) -> str:
29
+ """
30
+ Parse and compress the raw output.
31
+
32
+ Must return a compressed string. On any internal failure,
33
+ implementations should call self.fallback() rather than raising.
34
+ """
35
+
36
+ def fallback(self, raw_output: str, reason: str = "unknown") -> str:
37
+ """Return raw output unchanged. Called on parse failure."""
38
+ log.warning(
39
+ "Parser fallback triggered",
40
+ extra={
41
+ "parser": self.__class__.__name__,
42
+ "reason": reason,
43
+ "raw_length": len(raw_output),
44
+ },
45
+ )
46
+ return raw_output
@@ -0,0 +1,122 @@
1
+ """Parser for build system output: make, cmake, ninja, cargo, gradle, mvn."""
2
+
3
+ import re
4
+ from acc.compaction.parsers.base import BaseParser
5
+
6
+ # Lines to drop from build output
7
+ _DROP_PATTERNS = (
8
+ "Entering directory",
9
+ "Leaving directory",
10
+ "make[",
11
+ "Nothing to be done",
12
+ "is up to date",
13
+ "Compiling ",
14
+ "Linking ",
15
+ "Building ",
16
+ "Scanning dependencies",
17
+ "Built target ",
18
+ "Installing ",
19
+ "-- ", # cmake status messages
20
+ "[ ", # progress percentages like [ 5%]
21
+ "[ ",
22
+ "UP-TO-DATE",
23
+ "NO-SOURCE",
24
+ "Downloading ",
25
+ "Download ",
26
+ "> Task :", # gradle task lines
27
+ )
28
+
29
+ _ERROR_PATTERNS = (
30
+ "error:",
31
+ "Error:",
32
+ "ERROR:",
33
+ "FAILED",
34
+ "FAILURE",
35
+ "BUILD FAILED",
36
+ "BUILD FAILURE",
37
+ "fatal:",
38
+ "undefined reference",
39
+ "cannot find",
40
+ "not found",
41
+ "No rule to make target",
42
+ "*** ", # make error marker
43
+ )
44
+
45
+
46
+ class BuildParser(BaseParser):
47
+ tool_names = ["make", "cmake", "ninja", "cargo", "gradle", "mvn", "maven"]
48
+
49
+ def parse(self, raw_output: str) -> str:
50
+ try:
51
+ return self._parse_impl(raw_output)
52
+ except Exception as e:
53
+ return self.fallback(raw_output, reason=str(e))
54
+
55
+ def _parse_impl(self, raw_output: str) -> str:
56
+ lines = raw_output.split("\n")
57
+
58
+ # Detect if build succeeded or failed
59
+ has_failure = any(
60
+ any(ep in line for ep in _ERROR_PATTERNS)
61
+ for line in lines[-50:] # check last 50 lines
62
+ )
63
+
64
+ if not has_failure:
65
+ # Successful build — super terse
66
+ total_lines = len([l for l in lines if l.strip()])
67
+ return f"[BUILD OK] Completed successfully ({total_lines} output lines suppressed)"
68
+
69
+ # Build failed — extract error context
70
+ return self._extract_failure(lines)
71
+
72
+ def _extract_failure(self, lines: list[str]) -> str:
73
+ error_lines = []
74
+ context_lines = []
75
+ total_lines = len(lines)
76
+ dropped = 0
77
+
78
+ # First pass: find all error lines and their indices
79
+ error_indices = []
80
+ for i, line in enumerate(lines):
81
+ stripped = line.strip()
82
+ if not stripped:
83
+ continue
84
+
85
+ if any(stripped.startswith(p) or stripped.startswith(p.lstrip()) for p in _DROP_PATTERNS):
86
+ dropped += 1
87
+ continue
88
+
89
+ if any(ep in stripped for ep in _ERROR_PATTERNS):
90
+ error_indices.append(i)
91
+ error_lines.append(stripped)
92
+
93
+ # Second pass: grab context around first error (±5 lines)
94
+ if error_indices:
95
+ first_error = error_indices[0]
96
+ start = max(0, first_error - 3)
97
+ end = min(len(lines), first_error + 8)
98
+ for i in range(start, end):
99
+ stripped = lines[i].strip()
100
+ if stripped and not any(stripped.startswith(p) for p in _DROP_PATTERNS):
101
+ context_lines.append(stripped)
102
+
103
+ result = []
104
+ result.append(
105
+ f"[BUILD FAILED] {len(error_lines)} error(s) in {total_lines} output lines "
106
+ f"({dropped} noise lines suppressed)"
107
+ )
108
+
109
+ if context_lines:
110
+ result.append("ERROR CONTEXT:")
111
+ for c in context_lines[:20]:
112
+ result.append(f" {c}")
113
+
114
+ # If there are more errors beyond the first, list them
115
+ if len(error_lines) > 1:
116
+ result.append(f"ALL ERRORS ({len(error_lines)}):")
117
+ for e in error_lines[:15]:
118
+ result.append(f" {e}")
119
+ if len(error_lines) > 15:
120
+ result.append(f" ... and {len(error_lines) - 15} more")
121
+
122
+ return "\n".join(result)
@@ -0,0 +1,141 @@
1
+ """Parser for compiler output: gcc, g++, clang, clang++, cross-compilers."""
2
+
3
+ import json
4
+ import re
5
+ from acc.compaction.parsers.base import BaseParser
6
+
7
+ # Matches: file:line:col: severity: message
8
+ _GCC_DIAG_RE = re.compile(
9
+ r"^(.+?):(\d+):(\d+):\s+(error|warning|fatal error):\s+(.+)$"
10
+ )
11
+
12
+ # Lines to drop entirely
13
+ _DROP_PREFIXES = (
14
+ "In file included from",
15
+ " from",
16
+ "In member function",
17
+ "In function",
18
+ "In instantiation of",
19
+ " required from",
20
+ " required by",
21
+ )
22
+
23
+
24
+ class CompilerParser(BaseParser):
25
+ tool_names = [
26
+ "gcc", "g++", "cc", "c++",
27
+ "clang", "clang++",
28
+ "arm-none-eabi-gcc", "arm-none-eabi-g++",
29
+ "aarch64-linux-gnu-gcc",
30
+ "x86_64-linux-gnu-gcc",
31
+ ]
32
+
33
+ def parse(self, raw_output: str) -> str:
34
+ try:
35
+ return self._parse_impl(raw_output)
36
+ except Exception as e:
37
+ return self.fallback(raw_output, reason=str(e))
38
+
39
+ def _parse_impl(self, raw_output: str) -> str:
40
+ # Try JSON diagnostics (clang --serialize-diagnostics or --json-diagnostics)
41
+ stripped = raw_output.strip()
42
+ if stripped.startswith("[") or stripped.startswith("{"):
43
+ try:
44
+ return self._parse_json(stripped)
45
+ except (json.JSONDecodeError, KeyError, TypeError):
46
+ pass
47
+
48
+ return self._parse_text(raw_output)
49
+
50
+ def _parse_json(self, raw_output: str) -> str:
51
+ data = json.loads(raw_output)
52
+ if isinstance(data, dict):
53
+ data = data.get("diagnostics", [data])
54
+ if not isinstance(data, list):
55
+ data = [data]
56
+
57
+ errors = []
58
+ warnings = []
59
+ for diag in data:
60
+ severity = diag.get("severity", "").lower()
61
+ msg = diag.get("message", "")
62
+ loc = diag.get("location", {})
63
+ file_path = loc.get("file", "?")
64
+ line = loc.get("line", "?")
65
+ entry = f"{file_path}:{line}: {msg}"
66
+ if "error" in severity:
67
+ errors.append(entry)
68
+ elif "warning" in severity:
69
+ warnings.append(entry)
70
+
71
+ return self._format_output(errors, warnings)
72
+
73
+ def _parse_text(self, raw_output: str) -> str:
74
+ lines = raw_output.split("\n")
75
+ errors = []
76
+ warnings = []
77
+ seen_errors = {} # message -> count for dedup
78
+
79
+ for line in lines:
80
+ stripped = line.strip()
81
+
82
+ # Skip noise lines
83
+ if any(stripped.startswith(p) for p in _DROP_PREFIXES):
84
+ continue
85
+ if stripped.startswith("^") or stripped.startswith("|"):
86
+ continue
87
+ if stripped.startswith("note:") or ": note:" in stripped:
88
+ continue
89
+
90
+ m = _GCC_DIAG_RE.match(stripped)
91
+ if m:
92
+ file_path, lineno, col, severity, message = m.groups()
93
+ entry = f"{file_path}:{lineno}:{col}: {message}"
94
+
95
+ if severity in ("error", "fatal error"):
96
+ # Dedup identical error messages (e.g. undefined reference)
97
+ if message in seen_errors:
98
+ seen_errors[message] += 1
99
+ else:
100
+ seen_errors[message] = 1
101
+ errors.append(entry)
102
+ elif severity == "warning":
103
+ warnings.append(entry)
104
+
105
+ # Append dedup counts
106
+ final_errors = []
107
+ for entry in errors:
108
+ # Extract just the message part after the last ": "
109
+ msg = entry.rsplit(": ", 1)[-1] if ": " in entry else entry
110
+ count = seen_errors.get(msg, 1)
111
+ if count > 1:
112
+ final_errors.append(f"{entry} (×{count})")
113
+ else:
114
+ final_errors.append(entry)
115
+
116
+ return self._format_output(final_errors, warnings)
117
+
118
+ def _format_output(self, errors: list[str], warnings: list[str]) -> str:
119
+ result = []
120
+ result.append(
121
+ f"[BUILD] {len(errors)} error(s), {len(warnings)} warning(s)"
122
+ )
123
+
124
+ if errors:
125
+ result.append("ERRORS:")
126
+ for e in errors[:30]:
127
+ result.append(f" {e}")
128
+
129
+ if warnings:
130
+ if len(warnings) > 10:
131
+ result.append(
132
+ f"WARNINGS: {len(warnings)} total (showing first 5)"
133
+ )
134
+ for w in warnings[:5]:
135
+ result.append(f" {w}")
136
+ else:
137
+ result.append("WARNINGS:")
138
+ for w in warnings:
139
+ result.append(f" {w}")
140
+
141
+ return "\n".join(result)