headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,529 @@
|
|
|
1
|
+
"""Log/build output compressor for test and compiler output.
|
|
2
|
+
|
|
3
|
+
This module compresses build and test output which can be 10,000+ lines
|
|
4
|
+
with only 5-10 actual errors. Typical compression: 10-50x.
|
|
5
|
+
|
|
6
|
+
Supported formats:
|
|
7
|
+
- pytest output
|
|
8
|
+
- npm/yarn output
|
|
9
|
+
- cargo/rustc output
|
|
10
|
+
- make/gcc output
|
|
11
|
+
- generic log format (ERROR, WARN, INFO)
|
|
12
|
+
|
|
13
|
+
Compression Strategy:
|
|
14
|
+
1. Detect log format (pytest, npm, cargo, etc.)
|
|
15
|
+
2. Extract all ERROR/FAIL lines with context
|
|
16
|
+
3. Extract first stack trace completely
|
|
17
|
+
4. Deduplicate repeated warnings
|
|
18
|
+
5. Summarize: [247 INFO lines, 12 WARN lines omitted]
|
|
19
|
+
|
|
20
|
+
Key Patterns to Preserve:
|
|
21
|
+
- First error (often root cause)
|
|
22
|
+
- Last error (sometimes the real failure)
|
|
23
|
+
- Stack traces
|
|
24
|
+
- Exit codes
|
|
25
|
+
- Test summary lines
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import re
|
|
31
|
+
from dataclasses import dataclass, field
|
|
32
|
+
from enum import Enum
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class LogFormat(Enum):
|
|
36
|
+
"""Detected log format."""
|
|
37
|
+
|
|
38
|
+
PYTEST = "pytest"
|
|
39
|
+
NPM = "npm"
|
|
40
|
+
CARGO = "cargo"
|
|
41
|
+
MAKE = "make"
|
|
42
|
+
JEST = "jest"
|
|
43
|
+
GENERIC = "generic"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class LogLevel(Enum):
|
|
47
|
+
"""Log level for categorization."""
|
|
48
|
+
|
|
49
|
+
ERROR = "error"
|
|
50
|
+
FAIL = "fail"
|
|
51
|
+
WARN = "warn"
|
|
52
|
+
INFO = "info"
|
|
53
|
+
DEBUG = "debug"
|
|
54
|
+
TRACE = "trace"
|
|
55
|
+
UNKNOWN = "unknown"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(eq=False)
|
|
59
|
+
class LogLine:
|
|
60
|
+
"""A single log line with metadata."""
|
|
61
|
+
|
|
62
|
+
line_number: int
|
|
63
|
+
content: str
|
|
64
|
+
level: LogLevel = LogLevel.UNKNOWN
|
|
65
|
+
is_stack_trace: bool = False
|
|
66
|
+
is_summary: bool = False
|
|
67
|
+
score: float = 0.0
|
|
68
|
+
|
|
69
|
+
def __eq__(self, other: object) -> bool:
|
|
70
|
+
if not isinstance(other, LogLine):
|
|
71
|
+
return NotImplemented
|
|
72
|
+
return self.line_number == other.line_number
|
|
73
|
+
|
|
74
|
+
def __hash__(self) -> int:
|
|
75
|
+
return hash(self.line_number)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class LogCompressorConfig:
|
|
80
|
+
"""Configuration for log compression."""
|
|
81
|
+
|
|
82
|
+
# Error handling
|
|
83
|
+
max_errors: int = 10
|
|
84
|
+
error_context_lines: int = 3
|
|
85
|
+
keep_first_error: bool = True
|
|
86
|
+
keep_last_error: bool = True
|
|
87
|
+
|
|
88
|
+
# Stack trace handling
|
|
89
|
+
max_stack_traces: int = 3
|
|
90
|
+
stack_trace_max_lines: int = 20
|
|
91
|
+
|
|
92
|
+
# Warning handling
|
|
93
|
+
max_warnings: int = 5
|
|
94
|
+
dedupe_warnings: bool = True
|
|
95
|
+
|
|
96
|
+
# Summary handling
|
|
97
|
+
keep_summary_lines: bool = True
|
|
98
|
+
|
|
99
|
+
# Global limits
|
|
100
|
+
max_total_lines: int = 100
|
|
101
|
+
|
|
102
|
+
# CCR integration
|
|
103
|
+
enable_ccr: bool = True
|
|
104
|
+
min_lines_for_ccr: int = 50
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class LogCompressor:
|
|
108
|
+
"""Compresses build/test log output.
|
|
109
|
+
|
|
110
|
+
Example:
|
|
111
|
+
>>> compressor = LogCompressor()
|
|
112
|
+
>>> result = compressor.compress(pytest_output)
|
|
113
|
+
>>> print(result.compressed) # Just errors + summary
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
# Format detection patterns
|
|
117
|
+
_FORMAT_PATTERNS = {
|
|
118
|
+
LogFormat.PYTEST: [
|
|
119
|
+
re.compile(r"^={3,} (FAILURES|ERRORS|test session|short test summary)"),
|
|
120
|
+
re.compile(r"^(PASSED|FAILED|ERROR|SKIPPED)\s+\["),
|
|
121
|
+
re.compile(r"^collected \d+ items?"),
|
|
122
|
+
],
|
|
123
|
+
LogFormat.NPM: [
|
|
124
|
+
re.compile(r"^npm (ERR!|WARN|info|http)"),
|
|
125
|
+
re.compile(r"^(>|added|removed) .+ packages?"),
|
|
126
|
+
],
|
|
127
|
+
LogFormat.CARGO: [
|
|
128
|
+
re.compile(r"^\s*(Compiling|Finished|Running|error\[E\d+\])"),
|
|
129
|
+
re.compile(r"^warning: .+"),
|
|
130
|
+
],
|
|
131
|
+
LogFormat.JEST: [
|
|
132
|
+
re.compile(r"^(PASS|FAIL)\s+.+\.test\.(js|ts)"),
|
|
133
|
+
re.compile(r"^Test Suites:"),
|
|
134
|
+
],
|
|
135
|
+
LogFormat.MAKE: [
|
|
136
|
+
re.compile(r"^make(\[\d+\])?: "),
|
|
137
|
+
re.compile(r"^(gcc|g\+\+|clang).*-o "),
|
|
138
|
+
],
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
# Level detection patterns
|
|
142
|
+
_LEVEL_PATTERNS = {
|
|
143
|
+
LogLevel.ERROR: re.compile(r"\b(ERROR|error|Error|FATAL|fatal|Fatal|CRITICAL|critical)\b"),
|
|
144
|
+
LogLevel.FAIL: re.compile(r"\b(FAIL|FAILED|fail|failed|Fail|Failed)\b"),
|
|
145
|
+
LogLevel.WARN: re.compile(r"\b(WARN|WARNING|warn|warning|Warn|Warning)\b"),
|
|
146
|
+
LogLevel.INFO: re.compile(r"\b(INFO|info|Info)\b"),
|
|
147
|
+
LogLevel.DEBUG: re.compile(r"\b(DEBUG|debug|Debug)\b"),
|
|
148
|
+
LogLevel.TRACE: re.compile(r"\b(TRACE|trace|Trace)\b"),
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# Stack trace patterns
|
|
152
|
+
_STACK_TRACE_PATTERNS = [
|
|
153
|
+
re.compile(r"^\s*Traceback \(most recent call last\)"),
|
|
154
|
+
re.compile(r'^\s*File ".+", line \d+'),
|
|
155
|
+
re.compile(r"^\s*at .+\(.+:\d+:\d+\)"), # JS stack trace
|
|
156
|
+
re.compile(r"^\s+at [\w.$]+\("), # Java stack trace
|
|
157
|
+
re.compile(r"^\s*--> .+:\d+:\d+"), # Rust error
|
|
158
|
+
re.compile(r"^\s*\d+:\s+0x[0-9a-f]+"), # Go stack trace
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
# Summary line patterns
|
|
162
|
+
_SUMMARY_PATTERNS = [
|
|
163
|
+
re.compile(r"^={3,}"), # pytest separators
|
|
164
|
+
re.compile(r"^-{3,}"),
|
|
165
|
+
re.compile(r"^\d+ (passed|failed|skipped|error|warning)"),
|
|
166
|
+
re.compile(r"^(Tests?|Suites?):?\s+\d+"),
|
|
167
|
+
re.compile(r"^(TOTAL|Total|Summary)"),
|
|
168
|
+
re.compile(r"^(Build|Compile|Test).*(succeeded|failed|complete)"),
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
def __init__(self, config: LogCompressorConfig | None = None):
|
|
172
|
+
"""Initialize log compressor.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
config: Compression configuration.
|
|
176
|
+
"""
|
|
177
|
+
self.config = config or LogCompressorConfig()
|
|
178
|
+
|
|
179
|
+
def compress(self, content: str, context: str = "") -> LogCompressionResult:
|
|
180
|
+
"""Compress log output.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
content: Raw log output.
|
|
184
|
+
context: User query context (unused for now).
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
LogCompressionResult with compressed output and metadata.
|
|
188
|
+
"""
|
|
189
|
+
lines = content.split("\n")
|
|
190
|
+
|
|
191
|
+
if len(lines) < self.config.min_lines_for_ccr:
|
|
192
|
+
return LogCompressionResult(
|
|
193
|
+
compressed=content,
|
|
194
|
+
original=content,
|
|
195
|
+
original_line_count=len(lines),
|
|
196
|
+
compressed_line_count=len(lines),
|
|
197
|
+
format_detected=LogFormat.GENERIC,
|
|
198
|
+
compression_ratio=1.0,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Detect format
|
|
202
|
+
log_format = self._detect_format(lines)
|
|
203
|
+
|
|
204
|
+
# Parse and categorize lines
|
|
205
|
+
log_lines = self._parse_lines(lines)
|
|
206
|
+
|
|
207
|
+
# Select important lines
|
|
208
|
+
selected = self._select_lines(log_lines)
|
|
209
|
+
|
|
210
|
+
# Format output with summaries
|
|
211
|
+
compressed, stats = self._format_output(selected, log_lines)
|
|
212
|
+
|
|
213
|
+
ratio = len(compressed) / max(len(content), 1)
|
|
214
|
+
|
|
215
|
+
# Store in CCR if significant compression
|
|
216
|
+
cache_key = None
|
|
217
|
+
if self.config.enable_ccr and ratio < 0.5:
|
|
218
|
+
cache_key = self._store_in_ccr(content, compressed, len(lines))
|
|
219
|
+
if cache_key:
|
|
220
|
+
compressed += f"\n[{len(lines)} lines compressed. hash={cache_key}]"
|
|
221
|
+
|
|
222
|
+
return LogCompressionResult(
|
|
223
|
+
compressed=compressed,
|
|
224
|
+
original=content,
|
|
225
|
+
original_line_count=len(lines),
|
|
226
|
+
compressed_line_count=len(selected),
|
|
227
|
+
format_detected=log_format,
|
|
228
|
+
compression_ratio=ratio,
|
|
229
|
+
cache_key=cache_key,
|
|
230
|
+
stats=stats,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def _detect_format(self, lines: list[str]) -> LogFormat:
|
|
234
|
+
"""Detect the log format."""
|
|
235
|
+
sample = lines[:100] # Check first 100 lines
|
|
236
|
+
|
|
237
|
+
format_scores: dict[LogFormat, int] = {}
|
|
238
|
+
for log_format, patterns in self._FORMAT_PATTERNS.items():
|
|
239
|
+
score = 0
|
|
240
|
+
for line in sample:
|
|
241
|
+
for pattern in patterns:
|
|
242
|
+
if pattern.search(line):
|
|
243
|
+
score += 1
|
|
244
|
+
break
|
|
245
|
+
if score > 0:
|
|
246
|
+
format_scores[log_format] = score
|
|
247
|
+
|
|
248
|
+
if not format_scores:
|
|
249
|
+
return LogFormat.GENERIC
|
|
250
|
+
|
|
251
|
+
return max(format_scores, key=lambda k: format_scores[k])
|
|
252
|
+
|
|
253
|
+
def _parse_lines(self, lines: list[str]) -> list[LogLine]:
|
|
254
|
+
"""Parse lines and categorize by level."""
|
|
255
|
+
log_lines: list[LogLine] = []
|
|
256
|
+
in_stack_trace = False
|
|
257
|
+
stack_trace_lines = 0
|
|
258
|
+
|
|
259
|
+
for i, line in enumerate(lines):
|
|
260
|
+
log_line = LogLine(line_number=i, content=line)
|
|
261
|
+
|
|
262
|
+
# Detect level
|
|
263
|
+
for level, pattern in self._LEVEL_PATTERNS.items():
|
|
264
|
+
if pattern.search(line):
|
|
265
|
+
log_line.level = level
|
|
266
|
+
break
|
|
267
|
+
|
|
268
|
+
# Detect stack trace
|
|
269
|
+
for pattern in self._STACK_TRACE_PATTERNS:
|
|
270
|
+
if pattern.search(line):
|
|
271
|
+
in_stack_trace = True
|
|
272
|
+
stack_trace_lines = 0
|
|
273
|
+
break
|
|
274
|
+
|
|
275
|
+
if in_stack_trace:
|
|
276
|
+
log_line.is_stack_trace = True
|
|
277
|
+
stack_trace_lines += 1
|
|
278
|
+
# End stack trace after max lines or empty line
|
|
279
|
+
if stack_trace_lines > self.config.stack_trace_max_lines or not line.strip():
|
|
280
|
+
in_stack_trace = False
|
|
281
|
+
|
|
282
|
+
# Detect summary lines
|
|
283
|
+
for pattern in self._SUMMARY_PATTERNS:
|
|
284
|
+
if pattern.search(line):
|
|
285
|
+
log_line.is_summary = True
|
|
286
|
+
break
|
|
287
|
+
|
|
288
|
+
# Score line by importance
|
|
289
|
+
log_line.score = self._score_line(log_line)
|
|
290
|
+
|
|
291
|
+
log_lines.append(log_line)
|
|
292
|
+
|
|
293
|
+
return log_lines
|
|
294
|
+
|
|
295
|
+
def _score_line(self, log_line: LogLine) -> float:
|
|
296
|
+
"""Score a line by importance."""
|
|
297
|
+
score = 0.0
|
|
298
|
+
|
|
299
|
+
# Level-based scoring
|
|
300
|
+
level_scores = {
|
|
301
|
+
LogLevel.ERROR: 1.0,
|
|
302
|
+
LogLevel.FAIL: 1.0,
|
|
303
|
+
LogLevel.WARN: 0.5,
|
|
304
|
+
LogLevel.INFO: 0.1,
|
|
305
|
+
LogLevel.DEBUG: 0.05,
|
|
306
|
+
LogLevel.TRACE: 0.02,
|
|
307
|
+
LogLevel.UNKNOWN: 0.1,
|
|
308
|
+
}
|
|
309
|
+
score += level_scores.get(log_line.level, 0.1)
|
|
310
|
+
|
|
311
|
+
# Boost stack traces
|
|
312
|
+
if log_line.is_stack_trace:
|
|
313
|
+
score += 0.3
|
|
314
|
+
|
|
315
|
+
# Boost summary lines
|
|
316
|
+
if log_line.is_summary:
|
|
317
|
+
score += 0.4
|
|
318
|
+
|
|
319
|
+
return min(1.0, score)
|
|
320
|
+
|
|
321
|
+
def _select_lines(self, log_lines: list[LogLine]) -> list[LogLine]:
|
|
322
|
+
"""Select important lines to keep."""
|
|
323
|
+
selected: list[LogLine] = []
|
|
324
|
+
|
|
325
|
+
# Group by category
|
|
326
|
+
errors: list[LogLine] = []
|
|
327
|
+
fails: list[LogLine] = []
|
|
328
|
+
warnings: list[LogLine] = []
|
|
329
|
+
stack_traces: list[list[LogLine]] = []
|
|
330
|
+
summaries: list[LogLine] = []
|
|
331
|
+
current_stack: list[LogLine] = []
|
|
332
|
+
|
|
333
|
+
for log_line in log_lines:
|
|
334
|
+
if log_line.level == LogLevel.ERROR:
|
|
335
|
+
errors.append(log_line)
|
|
336
|
+
elif log_line.level == LogLevel.FAIL:
|
|
337
|
+
fails.append(log_line)
|
|
338
|
+
elif log_line.level == LogLevel.WARN:
|
|
339
|
+
warnings.append(log_line)
|
|
340
|
+
|
|
341
|
+
if log_line.is_stack_trace:
|
|
342
|
+
current_stack.append(log_line)
|
|
343
|
+
elif current_stack:
|
|
344
|
+
stack_traces.append(current_stack)
|
|
345
|
+
current_stack = []
|
|
346
|
+
|
|
347
|
+
if log_line.is_summary:
|
|
348
|
+
summaries.append(log_line)
|
|
349
|
+
|
|
350
|
+
if current_stack:
|
|
351
|
+
stack_traces.append(current_stack)
|
|
352
|
+
|
|
353
|
+
# Select errors (first, last, highest scoring)
|
|
354
|
+
if errors:
|
|
355
|
+
selected_errors = self._select_with_first_last(errors, self.config.max_errors)
|
|
356
|
+
selected.extend(selected_errors)
|
|
357
|
+
|
|
358
|
+
# Select fails
|
|
359
|
+
if fails:
|
|
360
|
+
selected_fails = self._select_with_first_last(fails, self.config.max_errors)
|
|
361
|
+
selected.extend(selected_fails)
|
|
362
|
+
|
|
363
|
+
# Select warnings (dedupe if configured)
|
|
364
|
+
if warnings:
|
|
365
|
+
if self.config.dedupe_warnings:
|
|
366
|
+
warnings = self._dedupe_similar(warnings)
|
|
367
|
+
selected.extend(warnings[: self.config.max_warnings])
|
|
368
|
+
|
|
369
|
+
# Select stack traces
|
|
370
|
+
for stack in stack_traces[: self.config.max_stack_traces]:
|
|
371
|
+
selected.extend(stack[: self.config.stack_trace_max_lines])
|
|
372
|
+
|
|
373
|
+
# Always include summary lines
|
|
374
|
+
if self.config.keep_summary_lines:
|
|
375
|
+
selected.extend(summaries)
|
|
376
|
+
|
|
377
|
+
# Add context lines around errors
|
|
378
|
+
selected = self._add_context(log_lines, selected)
|
|
379
|
+
|
|
380
|
+
# Sort by line number and dedupe
|
|
381
|
+
selected = sorted(set(selected), key=lambda x: x.line_number)
|
|
382
|
+
|
|
383
|
+
# Limit total lines
|
|
384
|
+
if len(selected) > self.config.max_total_lines:
|
|
385
|
+
# Keep most important lines
|
|
386
|
+
selected = sorted(selected, key=lambda x: x.score, reverse=True)
|
|
387
|
+
selected = selected[: self.config.max_total_lines]
|
|
388
|
+
selected = sorted(selected, key=lambda x: x.line_number)
|
|
389
|
+
|
|
390
|
+
return selected
|
|
391
|
+
|
|
392
|
+
def _select_with_first_last(self, lines: list[LogLine], max_count: int) -> list[LogLine]:
|
|
393
|
+
"""Select lines keeping first and last."""
|
|
394
|
+
if len(lines) <= max_count:
|
|
395
|
+
return lines
|
|
396
|
+
|
|
397
|
+
selected: list[LogLine] = []
|
|
398
|
+
|
|
399
|
+
if self.config.keep_first_error and lines:
|
|
400
|
+
selected.append(lines[0])
|
|
401
|
+
|
|
402
|
+
if self.config.keep_last_error and lines and lines[-1] not in selected:
|
|
403
|
+
selected.append(lines[-1])
|
|
404
|
+
|
|
405
|
+
# Fill remaining with highest scoring
|
|
406
|
+
remaining = max_count - len(selected)
|
|
407
|
+
if remaining > 0:
|
|
408
|
+
candidates = [line for line in lines if line not in selected]
|
|
409
|
+
candidates = sorted(candidates, key=lambda x: x.score, reverse=True)
|
|
410
|
+
selected.extend(candidates[:remaining])
|
|
411
|
+
|
|
412
|
+
return selected
|
|
413
|
+
|
|
414
|
+
def _dedupe_similar(self, lines: list[LogLine]) -> list[LogLine]:
|
|
415
|
+
"""Remove duplicate/similar lines."""
|
|
416
|
+
seen_patterns: set[str] = set()
|
|
417
|
+
deduped: list[LogLine] = []
|
|
418
|
+
|
|
419
|
+
for line in lines:
|
|
420
|
+
# Normalize: remove numbers, paths for comparison
|
|
421
|
+
normalized = re.sub(r"\d+", "N", line.content)
|
|
422
|
+
normalized = re.sub(r"/[\w/]+/", "/PATH/", normalized)
|
|
423
|
+
normalized = re.sub(r"0x[0-9a-f]+", "ADDR", normalized)
|
|
424
|
+
|
|
425
|
+
if normalized not in seen_patterns:
|
|
426
|
+
seen_patterns.add(normalized)
|
|
427
|
+
deduped.append(line)
|
|
428
|
+
|
|
429
|
+
return deduped
|
|
430
|
+
|
|
431
|
+
def _add_context(self, all_lines: list[LogLine], selected: list[LogLine]) -> list[LogLine]:
|
|
432
|
+
"""Add context lines around selected lines."""
|
|
433
|
+
selected_indices = {line.line_number for line in selected}
|
|
434
|
+
context_indices: set[int] = set()
|
|
435
|
+
|
|
436
|
+
for idx in selected_indices:
|
|
437
|
+
# Add lines before
|
|
438
|
+
for i in range(max(0, idx - self.config.error_context_lines), idx):
|
|
439
|
+
context_indices.add(i)
|
|
440
|
+
# Add lines after
|
|
441
|
+
for i in range(
|
|
442
|
+
idx + 1,
|
|
443
|
+
min(len(all_lines), idx + self.config.error_context_lines + 1),
|
|
444
|
+
):
|
|
445
|
+
context_indices.add(i)
|
|
446
|
+
|
|
447
|
+
# Add context lines to selected
|
|
448
|
+
for idx in context_indices:
|
|
449
|
+
if idx not in selected_indices and idx < len(all_lines):
|
|
450
|
+
selected.append(all_lines[idx])
|
|
451
|
+
|
|
452
|
+
return selected
|
|
453
|
+
|
|
454
|
+
def _format_output(
|
|
455
|
+
self, selected: list[LogLine], all_lines: list[LogLine]
|
|
456
|
+
) -> tuple[str, dict[str, int]]:
|
|
457
|
+
"""Format selected lines with summary stats."""
|
|
458
|
+
# Count categories
|
|
459
|
+
stats: dict[str, int] = {
|
|
460
|
+
"errors": sum(1 for line in all_lines if line.level == LogLevel.ERROR),
|
|
461
|
+
"fails": sum(1 for line in all_lines if line.level == LogLevel.FAIL),
|
|
462
|
+
"warnings": sum(1 for line in all_lines if line.level == LogLevel.WARN),
|
|
463
|
+
"info": sum(1 for line in all_lines if line.level == LogLevel.INFO),
|
|
464
|
+
"total": len(all_lines),
|
|
465
|
+
"selected": len(selected),
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
# Build output
|
|
469
|
+
output_lines = [line.content for line in selected]
|
|
470
|
+
|
|
471
|
+
# Add summary of omitted lines
|
|
472
|
+
omitted = len(all_lines) - len(selected)
|
|
473
|
+
if omitted > 0:
|
|
474
|
+
summary_parts = []
|
|
475
|
+
for level_name, count in [
|
|
476
|
+
("ERROR", stats["errors"]),
|
|
477
|
+
("FAIL", stats["fails"]),
|
|
478
|
+
("WARN", stats["warnings"]),
|
|
479
|
+
("INFO", stats["info"]),
|
|
480
|
+
]:
|
|
481
|
+
if count > 0:
|
|
482
|
+
summary_parts.append(f"{count} {level_name}")
|
|
483
|
+
|
|
484
|
+
if summary_parts:
|
|
485
|
+
summary = f"[{omitted} lines omitted: {', '.join(summary_parts)}]"
|
|
486
|
+
output_lines.append(summary)
|
|
487
|
+
|
|
488
|
+
return "\n".join(output_lines), stats
|
|
489
|
+
|
|
490
|
+
def _store_in_ccr(self, original: str, compressed: str, original_count: int) -> str | None:
|
|
491
|
+
"""Store original in CCR for later retrieval."""
|
|
492
|
+
try:
|
|
493
|
+
from ..cache.compression_store import get_compression_store
|
|
494
|
+
|
|
495
|
+
store = get_compression_store()
|
|
496
|
+
return store.store(
|
|
497
|
+
original,
|
|
498
|
+
compressed,
|
|
499
|
+
original_item_count=original_count,
|
|
500
|
+
)
|
|
501
|
+
except ImportError:
|
|
502
|
+
return None
|
|
503
|
+
except Exception:
|
|
504
|
+
return None
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
@dataclass
|
|
508
|
+
class LogCompressionResult:
|
|
509
|
+
"""Result of log compression."""
|
|
510
|
+
|
|
511
|
+
compressed: str
|
|
512
|
+
original: str
|
|
513
|
+
original_line_count: int
|
|
514
|
+
compressed_line_count: int
|
|
515
|
+
format_detected: LogFormat
|
|
516
|
+
compression_ratio: float
|
|
517
|
+
cache_key: str | None = None
|
|
518
|
+
stats: dict[str, int] = field(default_factory=dict)
|
|
519
|
+
|
|
520
|
+
@property
|
|
521
|
+
def tokens_saved_estimate(self) -> int:
|
|
522
|
+
"""Estimate tokens saved (rough: 1 token per 4 chars)."""
|
|
523
|
+
chars_saved = len(self.original) - len(self.compressed)
|
|
524
|
+
return max(0, chars_saved // 4)
|
|
525
|
+
|
|
526
|
+
@property
|
|
527
|
+
def lines_omitted(self) -> int:
|
|
528
|
+
"""Number of lines omitted."""
|
|
529
|
+
return self.original_line_count - self.compressed_line_count
|