acc-context-compaction 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- acc_context_compaction-0.1.0/PKG-INFO +20 -0
- acc_context_compaction-0.1.0/README.md +17 -0
- acc_context_compaction-0.1.0/acc/__init__.py +1 -0
- acc_context_compaction-0.1.0/acc/api/__init__.py +1 -0
- acc_context_compaction-0.1.0/acc/api/routes.py +13 -0
- acc_context_compaction-0.1.0/acc/compaction/__init__.py +1 -0
- acc_context_compaction-0.1.0/acc/compaction/_formatter_deprecated.py +43 -0
- acc_context_compaction-0.1.0/acc/compaction/_parsers_deprecated.py +53 -0
- acc_context_compaction-0.1.0/acc/compaction/dedup_cache.py +117 -0
- acc_context_compaction-0.1.0/acc/compaction/parsers/__init__.py +58 -0
- acc_context_compaction-0.1.0/acc/compaction/parsers/base.py +46 -0
- acc_context_compaction-0.1.0/acc/compaction/parsers/build_parser.py +122 -0
- acc_context_compaction-0.1.0/acc/compaction/parsers/compiler_parser.py +141 -0
- acc_context_compaction-0.1.0/acc/compaction/parsers/git_diff_parser.py +291 -0
- acc_context_compaction-0.1.0/acc/compaction/parsers/linter_parser.py +187 -0
- acc_context_compaction-0.1.0/acc/compaction/parsers/pytest_parser.py +114 -0
- acc_context_compaction-0.1.0/acc/compaction/proxy.py +21 -0
- acc_context_compaction-0.1.0/acc/compaction/slicer.py +357 -0
- acc_context_compaction-0.1.0/acc/core/__init__.py +1 -0
- acc_context_compaction-0.1.0/acc/core/config.py +11 -0
- acc_context_compaction-0.1.0/acc/core/llm.py +63 -0
- acc_context_compaction-0.1.0/acc/core/logger.py +31 -0
- acc_context_compaction-0.1.0/acc/evals/harness.py +98 -0
- acc_context_compaction-0.1.0/acc/filters/__init__.py +2 -0
- acc_context_compaction-0.1.0/acc/filters/dedup.py +30 -0
- acc_context_compaction-0.1.0/acc/filters/head_tail.py +15 -0
- acc_context_compaction-0.1.0/acc/filters/noise.py +47 -0
- acc_context_compaction-0.1.0/acc/filters/pipeline.py +60 -0
- acc_context_compaction-0.1.0/acc/filters/profile_manager.py +21 -0
- acc_context_compaction-0.1.0/acc/filters/strip_ansi.py +7 -0
- acc_context_compaction-0.1.0/acc/mcp/__init__.py +1 -0
- acc_context_compaction-0.1.0/acc/mcp/server.py +122 -0
- acc_context_compaction-0.1.0/acc/mcp/tools_cli.py +13 -0
- acc_context_compaction-0.1.0/acc/mcp/tools_compaction.py +44 -0
- acc_context_compaction-0.1.0/acc/mcp/tools_memory.py +34 -0
- acc_context_compaction-0.1.0/acc/mcp/tools_repo.py +8 -0
- acc_context_compaction-0.1.0/acc/memory/db.py +25 -0
- acc_context_compaction-0.1.0/acc/memory/models.py +24 -0
- acc_context_compaction-0.1.0/acc/memory/repositories.py +103 -0
- acc_context_compaction-0.1.0/acc/repo/analyzer.py +69 -0
- acc_context_compaction-0.1.0/acc/repo/compressor.py +70 -0
- acc_context_compaction-0.1.0/acc/repo/ranker.py +23 -0
- acc_context_compaction-0.1.0/acc/schemas/__init__.py +1 -0
- acc_context_compaction-0.1.0/acc/schemas/common.py +5 -0
- acc_context_compaction-0.1.0/acc/schemas/compaction.py +10 -0
- acc_context_compaction-0.1.0/acc/schemas/memory.py +20 -0
- acc_context_compaction-0.1.0/acc/services/__init__.py +1 -0
- acc_context_compaction-0.1.0/acc/services/compaction_service.py +66 -0
- acc_context_compaction-0.1.0/acc/services/memory_service.py +11 -0
- acc_context_compaction-0.1.0/acc/structured/conversation.py +113 -0
- acc_context_compaction-0.1.0/acc/structured/json_minifier.py +33 -0
- acc_context_compaction-0.1.0/acc/structured/python_ast.py +63 -0
- acc_context_compaction-0.1.0/acc_context_compaction.egg-info/PKG-INFO +20 -0
- acc_context_compaction-0.1.0/acc_context_compaction.egg-info/SOURCES.txt +69 -0
- acc_context_compaction-0.1.0/acc_context_compaction.egg-info/dependency_links.txt +1 -0
- acc_context_compaction-0.1.0/acc_context_compaction.egg-info/entry_points.txt +3 -0
- acc_context_compaction-0.1.0/acc_context_compaction.egg-info/requires.txt +17 -0
- acc_context_compaction-0.1.0/acc_context_compaction.egg-info/top_level.txt +1 -0
- acc_context_compaction-0.1.0/pyproject.toml +33 -0
- acc_context_compaction-0.1.0/setup.cfg +4 -0
- acc_context_compaction-0.1.0/tests/test_conversation.py +27 -0
- acc_context_compaction-0.1.0/tests/test_dedup_cache.py +152 -0
- acc_context_compaction-0.1.0/tests/test_failsafe.py +34 -0
- acc_context_compaction-0.1.0/tests/test_filters.py +54 -0
- acc_context_compaction-0.1.0/tests/test_memory.py +34 -0
- acc_context_compaction-0.1.0/tests/test_parser_plugins.py +243 -0
- acc_context_compaction-0.1.0/tests/test_parsers.py +13 -0
- acc_context_compaction-0.1.0/tests/test_repo.py +66 -0
- acc_context_compaction-0.1.0/tests/test_slicer.py +129 -0
- acc_context_compaction-0.1.0/tests/test_structured.py +43 -0
- acc_context_compaction-0.1.0/tests/test_temporal.py +59 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: acc-context-compaction
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Automatically Context Compaction framework
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: mcp
|
|
7
|
+
Requires-Dist: typer
|
|
8
|
+
Requires-Dist: fastapi
|
|
9
|
+
Requires-Dist: sqlmodel
|
|
10
|
+
Requires-Dist: alembic
|
|
11
|
+
Requires-Dist: pydantic-settings
|
|
12
|
+
Requires-Dist: httpx
|
|
13
|
+
Requires-Dist: tiktoken
|
|
14
|
+
Requires-Dist: pyyaml
|
|
15
|
+
Provides-Extra: slicer
|
|
16
|
+
Requires-Dist: tree-sitter==0.21.3; extra == "slicer"
|
|
17
|
+
Requires-Dist: tree-sitter-languages>=1.10; extra == "slicer"
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest; extra == "dev"
|
|
20
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# ACC - Automatically Context Compaction
|
|
2
|
+
|
|
3
|
+
## Project Overview
|
|
4
|
+
ACC is a context compaction framework designed to drastically reduce token consumption and improve context retention for AI IDEs and agents. It contains three main modules:
|
|
5
|
+
|
|
6
|
+
1. **`acc_cli`**: A Python-based CLI proxy that intelligently filters, deduplicates, and compresses verbose terminal outputs (such as massive git logs or test runner failures) before sending them to the LLM.
|
|
7
|
+
2. **`acc_memory`**: A FastAPI and SQLAlchemy-powered memory backend for durable, cross-session context retention. It extracts semantic facts from conversations and stores them.
|
|
8
|
+
3. **`acc_mcp`**: A standard Model Context Protocol (MCP) server that seamlessly exposes both the CLI proxy and the memory backend tools to modern IDEs like Cursor and Codex.
|
|
9
|
+
|
|
10
|
+
## Key Features
|
|
11
|
+
- **Token Efficiency:** Drops terminal output token usage by 50-80% using deterministic heuristic filtering without relying on expensive LLM summaries.
|
|
12
|
+
- **Tee Failsafe Strategy:** When logs are aggressively truncated, the full raw output is safely dumped to a local temporary file, and a footnote is provided so the AI can retrieve the rest of the file if needed.
|
|
13
|
+
- **Fact-Based Memory:** Converts passive conversational history into an atomic, queryable knowledge graph to prevent context amnesia across large repositories.
|
|
14
|
+
- **Smart Git Parsers:** Contains built-in interceptors for `git status`, `git diff`, and `git log` to strip out noise and condense the output format specifically for LLM ingestion.
|
|
15
|
+
|
|
16
|
+
## Architecture & Integration
|
|
17
|
+
ACC is designed as a standalone toolchain. Once the MCP server is mounted in your IDE, the AI is instructed (via system prompts) to execute all shell commands through the `acc.cli_run` tool rather than running raw terminal processes.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Empty __init__.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Empty __init__.py
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from fastapi import FastAPI
|
|
2
|
+
from acc.schemas.memory import MemoryCreate, MemorySearch
|
|
3
|
+
from acc.services.memory_service import save_memory, search_memory
|
|
4
|
+
|
|
5
|
+
app = FastAPI(title="ACC API")
|
|
6
|
+
|
|
7
|
+
@app.post("/memory/save")
|
|
8
|
+
def api_memory_save(data: MemoryCreate):
|
|
9
|
+
return save_memory(data)
|
|
10
|
+
|
|
11
|
+
@app.post("/memory/search")
|
|
12
|
+
def api_memory_search(data: MemorySearch):
|
|
13
|
+
return search_memory(data)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Empty __init__.py
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import List, Dict
|
|
2
|
+
|
|
3
|
+
def format_pytest(parsed: Dict, raw_lines: List[str]) -> str:
|
|
4
|
+
lines = []
|
|
5
|
+
if parsed.get("summary"):
|
|
6
|
+
lines.append(f"PYTEST SUMMARY: {parsed['summary']}")
|
|
7
|
+
if parsed["failures"]:
|
|
8
|
+
lines.append("FAILING TESTS:")
|
|
9
|
+
for f in parsed["failures"][:20]:
|
|
10
|
+
lines.append(f"- {f['file']}:: {f['test']} — {f['reason']}")
|
|
11
|
+
else:
|
|
12
|
+
lines.append("No explicit failing tests detected in parsed output.")
|
|
13
|
+
return "\n".join(lines)
|
|
14
|
+
|
|
15
|
+
def format_git_status(parsed: Dict, raw_lines: List[str]) -> str:
|
|
16
|
+
lines = ["GIT STATUS SUMMARY:"]
|
|
17
|
+
if parsed["modified"]:
|
|
18
|
+
lines.append("Modified Files:")
|
|
19
|
+
lines.extend([f" {f}" for f in parsed["modified"][:20]])
|
|
20
|
+
if parsed["untracked"]:
|
|
21
|
+
lines.append("Untracked Files:")
|
|
22
|
+
lines.extend([f" {f}" for f in parsed["untracked"][:20]])
|
|
23
|
+
if not parsed["modified"] and not parsed["untracked"]:
|
|
24
|
+
lines.append("Working tree clean.")
|
|
25
|
+
return "\n".join(lines)
|
|
26
|
+
|
|
27
|
+
def format_git_log(parsed: Dict, raw_lines: List[str]) -> str:
|
|
28
|
+
lines = ["GIT LOG SUMMARY:"]
|
|
29
|
+
for c in parsed["commits"][:20]:
|
|
30
|
+
lines.append(f"- {c['hash']} {c['message']}")
|
|
31
|
+
return "\n".join(lines)
|
|
32
|
+
|
|
33
|
+
def format_git_diff(parsed: Dict, raw_lines: List[str]) -> str:
|
|
34
|
+
lines = ["GIT DIFF SUMMARY:"]
|
|
35
|
+
if parsed["files_changed"]:
|
|
36
|
+
lines.append("Files Changed:")
|
|
37
|
+
for f in parsed["files_changed"][:10]:
|
|
38
|
+
lines.append(f" {f.replace('diff --git a/', '').replace(' b/', ' -> ')}")
|
|
39
|
+
if parsed["hunks"]:
|
|
40
|
+
lines.append("Hunks modified:")
|
|
41
|
+
for h in parsed["hunks"][:10]:
|
|
42
|
+
lines.append(f" {h}")
|
|
43
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import List, Dict
|
|
3
|
+
|
|
4
|
+
PYTEST_FAIL_RE = re.compile(r"^FAILED\s+(.+?)::(.+?)\s+\[([^\]]+)\]")
|
|
5
|
+
|
|
6
|
+
def parse_pytest(lines: List[str]) -> Dict:
|
|
7
|
+
failures = []
|
|
8
|
+
summary_line = None
|
|
9
|
+
for line in lines:
|
|
10
|
+
if " failed," in line and " passed" in line:
|
|
11
|
+
summary_line = line.strip()
|
|
12
|
+
m = PYTEST_FAIL_RE.match(line)
|
|
13
|
+
if m:
|
|
14
|
+
file_path, test_name, reason = m.groups()
|
|
15
|
+
failures.append({
|
|
16
|
+
"file": file_path,
|
|
17
|
+
"test": test_name,
|
|
18
|
+
"reason": reason,
|
|
19
|
+
})
|
|
20
|
+
return {"summary": summary_line, "failures": failures}
|
|
21
|
+
|
|
22
|
+
def parse_git_status(lines: List[str]) -> Dict:
|
|
23
|
+
# A simple parser for git status -s
|
|
24
|
+
modified = []
|
|
25
|
+
untracked = []
|
|
26
|
+
for line in lines:
|
|
27
|
+
if len(line) < 3: continue
|
|
28
|
+
status = line[:2]
|
|
29
|
+
file_path = line[3:].strip()
|
|
30
|
+
if "??" in status:
|
|
31
|
+
untracked.append(file_path)
|
|
32
|
+
else:
|
|
33
|
+
modified.append(f"{status.strip()} {file_path}")
|
|
34
|
+
return {"modified": modified, "untracked": untracked}
|
|
35
|
+
|
|
36
|
+
def parse_git_log(lines: List[str]) -> Dict:
|
|
37
|
+
commits = []
|
|
38
|
+
for line in lines:
|
|
39
|
+
parts = line.split(" ", 1)
|
|
40
|
+
if len(parts) == 2 and len(parts[0]) >= 7:
|
|
41
|
+
commits.append({"hash": parts[0], "message": parts[1].strip()})
|
|
42
|
+
return {"commits": commits}
|
|
43
|
+
|
|
44
|
+
def parse_git_diff(lines: List[str]) -> Dict:
|
|
45
|
+
# Extract just the files that changed and the hunk headers
|
|
46
|
+
files_changed = []
|
|
47
|
+
hunks = []
|
|
48
|
+
for line in lines:
|
|
49
|
+
if line.startswith("diff --git"):
|
|
50
|
+
files_changed.append(line.strip())
|
|
51
|
+
elif line.startswith("@@ "):
|
|
52
|
+
hunks.append(line.strip())
|
|
53
|
+
return {"files_changed": files_changed, "hunks": hunks}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Session-scoped deduplication cache.
|
|
3
|
+
|
|
4
|
+
Prevents re-processing identical outputs across multiple MCP tool calls
|
|
5
|
+
within the same server session. Uses O(1) fingerprinting based on
|
|
6
|
+
(byte_length, hash(first_256_chars), hash(last_256_chars)).
|
|
7
|
+
|
|
8
|
+
Lifetime: process-scoped. Dies with the MCP server process.
|
|
9
|
+
Never persisted to disk — stale cache is worse than cache miss.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class Fingerprint:
|
|
18
|
+
"""Immutable fingerprint for output deduplication."""
|
|
19
|
+
byte_length: int
|
|
20
|
+
prefix_hash: int # hash of first 256 chars
|
|
21
|
+
suffix_hash: int # hash of last 256 chars
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DedupCache:
|
|
25
|
+
"""
|
|
26
|
+
Session-scoped dedup cache. Lives for the lifetime of the MCP server process.
|
|
27
|
+
|
|
28
|
+
Usage:
|
|
29
|
+
cache = get_session_cache()
|
|
30
|
+
cache.next_turn() # call at start of each tool invocation
|
|
31
|
+
suppressed = cache.check(raw_output)
|
|
32
|
+
if suppressed:
|
|
33
|
+
return suppressed # identical output seen before
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self):
|
|
37
|
+
self._cache: dict[Fingerprint, int] = {} # fingerprint → turn number
|
|
38
|
+
self._turn: int = 0
|
|
39
|
+
|
|
40
|
+
def next_turn(self):
|
|
41
|
+
"""Increment turn counter. Call at the start of each tool invocation."""
|
|
42
|
+
self._turn += 1
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def current_turn(self) -> int:
|
|
46
|
+
return self._turn
|
|
47
|
+
|
|
48
|
+
def check(self, raw: str) -> Optional[str]:
|
|
49
|
+
"""
|
|
50
|
+
Check if this output was seen before in this session.
|
|
51
|
+
|
|
52
|
+
Returns a suppression message if output was seen before, None otherwise.
|
|
53
|
+
On first seeing an output, it is fingerprinted and cached.
|
|
54
|
+
"""
|
|
55
|
+
if not raw or not raw.strip():
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
fp = self._fingerprint(raw)
|
|
59
|
+
if fp in self._cache:
|
|
60
|
+
prev_turn = self._cache[fp]
|
|
61
|
+
return (
|
|
62
|
+
f"[Output identical to turn #{prev_turn} — "
|
|
63
|
+
f"suppressed ({fp.byte_length:,} bytes)]"
|
|
64
|
+
)
|
|
65
|
+
self._cache[fp] = self._turn
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
def check_file(self, file_path: str, size: int, mtime_ns: int) -> Optional[str]:
|
|
69
|
+
"""
|
|
70
|
+
Check if a file has been read before based on stat metadata.
|
|
71
|
+
O(1) — does not require reading the file contents.
|
|
72
|
+
|
|
73
|
+
Returns a suppression message if the file was read with
|
|
74
|
+
identical size and mtime, None otherwise.
|
|
75
|
+
"""
|
|
76
|
+
# Use a synthetic fingerprint from file metadata
|
|
77
|
+
fp = Fingerprint(
|
|
78
|
+
byte_length=size,
|
|
79
|
+
prefix_hash=hash(file_path),
|
|
80
|
+
suffix_hash=hash(mtime_ns),
|
|
81
|
+
)
|
|
82
|
+
if fp in self._cache:
|
|
83
|
+
prev_turn = self._cache[fp]
|
|
84
|
+
return (
|
|
85
|
+
f"[File unchanged since turn #{prev_turn} — "
|
|
86
|
+
f"suppressed ({size:,} bytes)]"
|
|
87
|
+
)
|
|
88
|
+
self._cache[fp] = self._turn
|
|
89
|
+
return None
|
|
90
|
+
|
|
91
|
+
def _fingerprint(self, text: str) -> Fingerprint:
|
|
92
|
+
"""Create an O(1) fingerprint from text content."""
|
|
93
|
+
return Fingerprint(
|
|
94
|
+
byte_length=len(text),
|
|
95
|
+
prefix_hash=hash(text[:256]),
|
|
96
|
+
suffix_hash=hash(text[-256:]) if len(text) > 256 else hash(text),
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
def clear(self):
|
|
100
|
+
"""Reset the cache. Called on session end."""
|
|
101
|
+
self._cache.clear()
|
|
102
|
+
self._turn = 0
|
|
103
|
+
|
|
104
|
+
@property
|
|
105
|
+
def size(self) -> int:
|
|
106
|
+
"""Number of entries in the cache."""
|
|
107
|
+
return len(self._cache)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# Module-level singleton — lives for the MCP server process lifetime.
|
|
111
|
+
# All MCP tool calls share this instance.
|
|
112
|
+
_session_cache = DedupCache()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_session_cache() -> DedupCache:
|
|
116
|
+
"""Get the global session-scoped dedup cache."""
|
|
117
|
+
return _session_cache
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Auto-discovery module for parser plugins.
|
|
3
|
+
|
|
4
|
+
Scans this directory for modules containing BaseParser subclasses,
|
|
5
|
+
instantiates them, and builds a registry keyed by tool name.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import importlib
|
|
9
|
+
import pkgutil
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from acc.compaction.parsers.base import BaseParser
|
|
14
|
+
|
|
15
|
+
_registry: list[BaseParser] = []
|
|
16
|
+
_initialized = False
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _discover_parsers():
|
|
20
|
+
"""Import all modules in this package and collect BaseParser subclasses."""
|
|
21
|
+
global _initialized
|
|
22
|
+
if _initialized:
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
package_dir = Path(__file__).parent
|
|
26
|
+
for finder, module_name, is_pkg in pkgutil.iter_modules([str(package_dir)]):
|
|
27
|
+
if module_name in ("base", "__init__"):
|
|
28
|
+
continue
|
|
29
|
+
module = importlib.import_module(f"acc.compaction.parsers.{module_name}")
|
|
30
|
+
for attr_name in dir(module):
|
|
31
|
+
attr = getattr(module, attr_name)
|
|
32
|
+
if (
|
|
33
|
+
isinstance(attr, type)
|
|
34
|
+
and issubclass(attr, BaseParser)
|
|
35
|
+
and attr is not BaseParser
|
|
36
|
+
and attr.tool_names # skip abstract or empty
|
|
37
|
+
):
|
|
38
|
+
_registry.append(attr())
|
|
39
|
+
|
|
40
|
+
_initialized = True
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_parser(command: str) -> Optional[BaseParser]:
|
|
44
|
+
"""
|
|
45
|
+
Find a parser that can handle the given command name.
|
|
46
|
+
Returns None if no parser matches.
|
|
47
|
+
"""
|
|
48
|
+
_discover_parsers()
|
|
49
|
+
for parser in _registry:
|
|
50
|
+
if parser.can_handle(command):
|
|
51
|
+
return parser
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_all_parsers() -> list[BaseParser]:
|
|
56
|
+
"""Return all registered parsers. Useful for testing."""
|
|
57
|
+
_discover_parsers()
|
|
58
|
+
return list(_registry)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Base class for all ACC output parsers."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from acc.core.logger import log
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BaseParser(ABC):
|
|
8
|
+
"""
|
|
9
|
+
Base class for tool-specific output parsers.
|
|
10
|
+
|
|
11
|
+
Subclasses declare which commands they handle via `tool_names`,
|
|
12
|
+
implement `parse()` for structured compression, and inherit
|
|
13
|
+
automatic fallback-to-raw on any failure.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
tool_names: list[str] = []
|
|
17
|
+
|
|
18
|
+
def can_handle(self, command: str) -> bool:
|
|
19
|
+
"""Check if this parser handles the given command name."""
|
|
20
|
+
cmd_lower = command.lower()
|
|
21
|
+
if cmd_lower.endswith(".exe"):
|
|
22
|
+
cmd_lower = cmd_lower[:-4]
|
|
23
|
+
# Strip path separators (e.g. /usr/bin/gcc -> gcc)
|
|
24
|
+
cmd_lower = cmd_lower.rsplit("/", 1)[-1].rsplit("\\", 1)[-1]
|
|
25
|
+
return cmd_lower in self.tool_names
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def parse(self, raw_output: str, **kwargs) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Parse and compress the raw output.
|
|
31
|
+
|
|
32
|
+
Must return a compressed string. On any internal failure,
|
|
33
|
+
implementations should call self.fallback() rather than raising.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def fallback(self, raw_output: str, reason: str = "unknown") -> str:
|
|
37
|
+
"""Return raw output unchanged. Called on parse failure."""
|
|
38
|
+
log.warning(
|
|
39
|
+
"Parser fallback triggered",
|
|
40
|
+
extra={
|
|
41
|
+
"parser": self.__class__.__name__,
|
|
42
|
+
"reason": reason,
|
|
43
|
+
"raw_length": len(raw_output),
|
|
44
|
+
},
|
|
45
|
+
)
|
|
46
|
+
return raw_output
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Parser for build system output: make, cmake, ninja, cargo, gradle, mvn."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from acc.compaction.parsers.base import BaseParser
|
|
5
|
+
|
|
6
|
+
# Lines to drop from build output
|
|
7
|
+
_DROP_PATTERNS = (
|
|
8
|
+
"Entering directory",
|
|
9
|
+
"Leaving directory",
|
|
10
|
+
"make[",
|
|
11
|
+
"Nothing to be done",
|
|
12
|
+
"is up to date",
|
|
13
|
+
"Compiling ",
|
|
14
|
+
"Linking ",
|
|
15
|
+
"Building ",
|
|
16
|
+
"Scanning dependencies",
|
|
17
|
+
"Built target ",
|
|
18
|
+
"Installing ",
|
|
19
|
+
"-- ", # cmake status messages
|
|
20
|
+
"[ ", # progress percentages like [ 5%]
|
|
21
|
+
"[ ",
|
|
22
|
+
"UP-TO-DATE",
|
|
23
|
+
"NO-SOURCE",
|
|
24
|
+
"Downloading ",
|
|
25
|
+
"Download ",
|
|
26
|
+
"> Task :", # gradle task lines
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
_ERROR_PATTERNS = (
|
|
30
|
+
"error:",
|
|
31
|
+
"Error:",
|
|
32
|
+
"ERROR:",
|
|
33
|
+
"FAILED",
|
|
34
|
+
"FAILURE",
|
|
35
|
+
"BUILD FAILED",
|
|
36
|
+
"BUILD FAILURE",
|
|
37
|
+
"fatal:",
|
|
38
|
+
"undefined reference",
|
|
39
|
+
"cannot find",
|
|
40
|
+
"not found",
|
|
41
|
+
"No rule to make target",
|
|
42
|
+
"*** ", # make error marker
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class BuildParser(BaseParser):
|
|
47
|
+
tool_names = ["make", "cmake", "ninja", "cargo", "gradle", "mvn", "maven"]
|
|
48
|
+
|
|
49
|
+
def parse(self, raw_output: str) -> str:
|
|
50
|
+
try:
|
|
51
|
+
return self._parse_impl(raw_output)
|
|
52
|
+
except Exception as e:
|
|
53
|
+
return self.fallback(raw_output, reason=str(e))
|
|
54
|
+
|
|
55
|
+
def _parse_impl(self, raw_output: str) -> str:
|
|
56
|
+
lines = raw_output.split("\n")
|
|
57
|
+
|
|
58
|
+
# Detect if build succeeded or failed
|
|
59
|
+
has_failure = any(
|
|
60
|
+
any(ep in line for ep in _ERROR_PATTERNS)
|
|
61
|
+
for line in lines[-50:] # check last 50 lines
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if not has_failure:
|
|
65
|
+
# Successful build — super terse
|
|
66
|
+
total_lines = len([l for l in lines if l.strip()])
|
|
67
|
+
return f"[BUILD OK] Completed successfully ({total_lines} output lines suppressed)"
|
|
68
|
+
|
|
69
|
+
# Build failed — extract error context
|
|
70
|
+
return self._extract_failure(lines)
|
|
71
|
+
|
|
72
|
+
def _extract_failure(self, lines: list[str]) -> str:
|
|
73
|
+
error_lines = []
|
|
74
|
+
context_lines = []
|
|
75
|
+
total_lines = len(lines)
|
|
76
|
+
dropped = 0
|
|
77
|
+
|
|
78
|
+
# First pass: find all error lines and their indices
|
|
79
|
+
error_indices = []
|
|
80
|
+
for i, line in enumerate(lines):
|
|
81
|
+
stripped = line.strip()
|
|
82
|
+
if not stripped:
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
if any(stripped.startswith(p) or stripped.startswith(p.lstrip()) for p in _DROP_PATTERNS):
|
|
86
|
+
dropped += 1
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
if any(ep in stripped for ep in _ERROR_PATTERNS):
|
|
90
|
+
error_indices.append(i)
|
|
91
|
+
error_lines.append(stripped)
|
|
92
|
+
|
|
93
|
+
# Second pass: grab context around first error (±5 lines)
|
|
94
|
+
if error_indices:
|
|
95
|
+
first_error = error_indices[0]
|
|
96
|
+
start = max(0, first_error - 3)
|
|
97
|
+
end = min(len(lines), first_error + 8)
|
|
98
|
+
for i in range(start, end):
|
|
99
|
+
stripped = lines[i].strip()
|
|
100
|
+
if stripped and not any(stripped.startswith(p) for p in _DROP_PATTERNS):
|
|
101
|
+
context_lines.append(stripped)
|
|
102
|
+
|
|
103
|
+
result = []
|
|
104
|
+
result.append(
|
|
105
|
+
f"[BUILD FAILED] {len(error_lines)} error(s) in {total_lines} output lines "
|
|
106
|
+
f"({dropped} noise lines suppressed)"
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
if context_lines:
|
|
110
|
+
result.append("ERROR CONTEXT:")
|
|
111
|
+
for c in context_lines[:20]:
|
|
112
|
+
result.append(f" {c}")
|
|
113
|
+
|
|
114
|
+
# If there are more errors beyond the first, list them
|
|
115
|
+
if len(error_lines) > 1:
|
|
116
|
+
result.append(f"ALL ERRORS ({len(error_lines)}):")
|
|
117
|
+
for e in error_lines[:15]:
|
|
118
|
+
result.append(f" {e}")
|
|
119
|
+
if len(error_lines) > 15:
|
|
120
|
+
result.append(f" ... and {len(error_lines) - 15} more")
|
|
121
|
+
|
|
122
|
+
return "\n".join(result)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Parser for compiler output: gcc, g++, clang, clang++, cross-compilers."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
from acc.compaction.parsers.base import BaseParser
|
|
6
|
+
|
|
7
|
+
# Matches: file:line:col: severity: message
|
|
8
|
+
_GCC_DIAG_RE = re.compile(
|
|
9
|
+
r"^(.+?):(\d+):(\d+):\s+(error|warning|fatal error):\s+(.+)$"
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
# Lines to drop entirely
|
|
13
|
+
_DROP_PREFIXES = (
|
|
14
|
+
"In file included from",
|
|
15
|
+
" from",
|
|
16
|
+
"In member function",
|
|
17
|
+
"In function",
|
|
18
|
+
"In instantiation of",
|
|
19
|
+
" required from",
|
|
20
|
+
" required by",
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CompilerParser(BaseParser):
|
|
25
|
+
tool_names = [
|
|
26
|
+
"gcc", "g++", "cc", "c++",
|
|
27
|
+
"clang", "clang++",
|
|
28
|
+
"arm-none-eabi-gcc", "arm-none-eabi-g++",
|
|
29
|
+
"aarch64-linux-gnu-gcc",
|
|
30
|
+
"x86_64-linux-gnu-gcc",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
def parse(self, raw_output: str) -> str:
|
|
34
|
+
try:
|
|
35
|
+
return self._parse_impl(raw_output)
|
|
36
|
+
except Exception as e:
|
|
37
|
+
return self.fallback(raw_output, reason=str(e))
|
|
38
|
+
|
|
39
|
+
def _parse_impl(self, raw_output: str) -> str:
|
|
40
|
+
# Try JSON diagnostics (clang --serialize-diagnostics or --json-diagnostics)
|
|
41
|
+
stripped = raw_output.strip()
|
|
42
|
+
if stripped.startswith("[") or stripped.startswith("{"):
|
|
43
|
+
try:
|
|
44
|
+
return self._parse_json(stripped)
|
|
45
|
+
except (json.JSONDecodeError, KeyError, TypeError):
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
return self._parse_text(raw_output)
|
|
49
|
+
|
|
50
|
+
def _parse_json(self, raw_output: str) -> str:
|
|
51
|
+
data = json.loads(raw_output)
|
|
52
|
+
if isinstance(data, dict):
|
|
53
|
+
data = data.get("diagnostics", [data])
|
|
54
|
+
if not isinstance(data, list):
|
|
55
|
+
data = [data]
|
|
56
|
+
|
|
57
|
+
errors = []
|
|
58
|
+
warnings = []
|
|
59
|
+
for diag in data:
|
|
60
|
+
severity = diag.get("severity", "").lower()
|
|
61
|
+
msg = diag.get("message", "")
|
|
62
|
+
loc = diag.get("location", {})
|
|
63
|
+
file_path = loc.get("file", "?")
|
|
64
|
+
line = loc.get("line", "?")
|
|
65
|
+
entry = f"{file_path}:{line}: {msg}"
|
|
66
|
+
if "error" in severity:
|
|
67
|
+
errors.append(entry)
|
|
68
|
+
elif "warning" in severity:
|
|
69
|
+
warnings.append(entry)
|
|
70
|
+
|
|
71
|
+
return self._format_output(errors, warnings)
|
|
72
|
+
|
|
73
|
+
def _parse_text(self, raw_output: str) -> str:
|
|
74
|
+
lines = raw_output.split("\n")
|
|
75
|
+
errors = []
|
|
76
|
+
warnings = []
|
|
77
|
+
seen_errors = {} # message -> count for dedup
|
|
78
|
+
|
|
79
|
+
for line in lines:
|
|
80
|
+
stripped = line.strip()
|
|
81
|
+
|
|
82
|
+
# Skip noise lines
|
|
83
|
+
if any(stripped.startswith(p) for p in _DROP_PREFIXES):
|
|
84
|
+
continue
|
|
85
|
+
if stripped.startswith("^") or stripped.startswith("|"):
|
|
86
|
+
continue
|
|
87
|
+
if stripped.startswith("note:") or ": note:" in stripped:
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
m = _GCC_DIAG_RE.match(stripped)
|
|
91
|
+
if m:
|
|
92
|
+
file_path, lineno, col, severity, message = m.groups()
|
|
93
|
+
entry = f"{file_path}:{lineno}:{col}: {message}"
|
|
94
|
+
|
|
95
|
+
if severity in ("error", "fatal error"):
|
|
96
|
+
# Dedup identical error messages (e.g. undefined reference)
|
|
97
|
+
if message in seen_errors:
|
|
98
|
+
seen_errors[message] += 1
|
|
99
|
+
else:
|
|
100
|
+
seen_errors[message] = 1
|
|
101
|
+
errors.append(entry)
|
|
102
|
+
elif severity == "warning":
|
|
103
|
+
warnings.append(entry)
|
|
104
|
+
|
|
105
|
+
# Append dedup counts
|
|
106
|
+
final_errors = []
|
|
107
|
+
for entry in errors:
|
|
108
|
+
# Extract just the message part after the last ": "
|
|
109
|
+
msg = entry.rsplit(": ", 1)[-1] if ": " in entry else entry
|
|
110
|
+
count = seen_errors.get(msg, 1)
|
|
111
|
+
if count > 1:
|
|
112
|
+
final_errors.append(f"{entry} (×{count})")
|
|
113
|
+
else:
|
|
114
|
+
final_errors.append(entry)
|
|
115
|
+
|
|
116
|
+
return self._format_output(final_errors, warnings)
|
|
117
|
+
|
|
118
|
+
def _format_output(self, errors: list[str], warnings: list[str]) -> str:
|
|
119
|
+
result = []
|
|
120
|
+
result.append(
|
|
121
|
+
f"[BUILD] {len(errors)} error(s), {len(warnings)} warning(s)"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
if errors:
|
|
125
|
+
result.append("ERRORS:")
|
|
126
|
+
for e in errors[:30]:
|
|
127
|
+
result.append(f" {e}")
|
|
128
|
+
|
|
129
|
+
if warnings:
|
|
130
|
+
if len(warnings) > 10:
|
|
131
|
+
result.append(
|
|
132
|
+
f"WARNINGS: {len(warnings)} total (showing first 5)"
|
|
133
|
+
)
|
|
134
|
+
for w in warnings[:5]:
|
|
135
|
+
result.append(f" {w}")
|
|
136
|
+
else:
|
|
137
|
+
result.append("WARNINGS:")
|
|
138
|
+
for w in warnings:
|
|
139
|
+
result.append(f" {w}")
|
|
140
|
+
|
|
141
|
+
return "\n".join(result)
|