codetool-shell 0.1.1__py3-none-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codetool_shell/__init__.py +11 -0
- codetool_shell/api.py +59 -0
- codetool_shell/bin/windows-arm64/codetool-shell-rust.exe +0 -0
- codetool_shell/filters/__init__.py +14 -0
- codetool_shell/filters/build_compiler/__init__.py +7 -0
- codetool_shell/filters/build_compiler/detector.py +412 -0
- codetool_shell/filters/build_compiler/reducer.py +166 -0
- codetool_shell/filters/build_compiler/summary.py +617 -0
- codetool_shell/filters/ci_job_log/__init__.py +7 -0
- codetool_shell/filters/ci_job_log/detector.py +64 -0
- codetool_shell/filters/ci_job_log/reducer.py +99 -0
- codetool_shell/filters/ci_job_log/summary.py +243 -0
- codetool_shell/filters/diff/__init__.py +7 -0
- codetool_shell/filters/diff/detector.py +136 -0
- codetool_shell/filters/diff/reducer.py +308 -0
- codetool_shell/filters/generic_log/__init__.py +7 -0
- codetool_shell/filters/generic_log/detector.py +175 -0
- codetool_shell/filters/generic_log/reducer.py +99 -0
- codetool_shell/filters/generic_log/summary.py +161 -0
- codetool_shell/filters/git.py +514 -0
- codetool_shell/filters/html_cleanup/__init__.py +7 -0
- codetool_shell/filters/html_cleanup/detector.py +136 -0
- codetool_shell/filters/html_cleanup/reducer.py +27 -0
- codetool_shell/filters/html_cleanup/summary.py +422 -0
- codetool_shell/filters/json_payload/__init__.py +7 -0
- codetool_shell/filters/json_payload/detector.py +62 -0
- codetool_shell/filters/json_payload/reducer.py +81 -0
- codetool_shell/filters/json_payload/summary.py +233 -0
- codetool_shell/filters/listing/__init__.py +7 -0
- codetool_shell/filters/listing/detector.py +294 -0
- codetool_shell/filters/listing/reducer.py +30 -0
- codetool_shell/filters/log_template/__init__.py +7 -0
- codetool_shell/filters/log_template/constants.py +76 -0
- codetool_shell/filters/log_template/detector.py +331 -0
- codetool_shell/filters/log_template/reducer.py +78 -0
- codetool_shell/filters/log_template/template.py +280 -0
- codetool_shell/filters/log_template/types.py +21 -0
- codetool_shell/filters/opaque_payload/__init__.py +7 -0
- codetool_shell/filters/opaque_payload/detector.py +563 -0
- codetool_shell/filters/opaque_payload/reducer.py +142 -0
- codetool_shell/filters/opaque_payload/summary.py +61 -0
- codetool_shell/filters/package_manager/__init__.py +7 -0
- codetool_shell/filters/package_manager/detector.py +220 -0
- codetool_shell/filters/package_manager/reducer.py +110 -0
- codetool_shell/filters/package_manager/summary.py +172 -0
- codetool_shell/filters/pipeline.py +65 -0
- codetool_shell/filters/rg.py +250 -0
- codetool_shell/filters/system_output/__init__.py +7 -0
- codetool_shell/filters/system_output/detector.py +600 -0
- codetool_shell/filters/system_output/reducer.py +331 -0
- codetool_shell/filters/system_output/summary.py +164 -0
- codetool_shell/filters/table/__init__.py +7 -0
- codetool_shell/filters/table/detector.py +244 -0
- codetool_shell/filters/table/reducer.py +57 -0
- codetool_shell/filters/table/summary.py +37 -0
- codetool_shell/filters/test_runner/__init__.py +7 -0
- codetool_shell/filters/test_runner/ansi.py +80 -0
- codetool_shell/filters/test_runner/detector.py +409 -0
- codetool_shell/filters/test_runner/reducer.py +288 -0
- codetool_shell/filters/test_runner/summary.py +449 -0
- codetool_shell/filters/text.py +38 -0
- codetool_shell/filters/traceback/__init__.py +7 -0
- codetool_shell/filters/traceback/detector.py +209 -0
- codetool_shell/filters/traceback/reducer.py +141 -0
- codetool_shell/filters/traceback/summary.py +122 -0
- codetool_shell/filters/tree.py +59 -0
- codetool_shell/py.typed +0 -0
- codetool_shell/python_backend.py +38 -0
- codetool_shell/rust_backend.py +254 -0
- codetool_shell-0.1.1.dist-info/METADATA +152 -0
- codetool_shell-0.1.1.dist-info/RECORD +72 -0
- codetool_shell-0.1.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""Reduce unified diff output while preserving semantic change lines."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from ..text import choose_smaller_lines, join_preserving_final_newline, split_preserving_final_newline
|
|
8
|
+
from .detector import find_diff_blocks, hunk_prefix_columns, is_hunk_header
|
|
9
|
+
|
|
10
|
+
_LOCKFILE_NAMES = frozenset(
|
|
11
|
+
{
|
|
12
|
+
"Cargo.lock",
|
|
13
|
+
"uv.lock",
|
|
14
|
+
"package-lock.json",
|
|
15
|
+
"npm-shrinkwrap.json",
|
|
16
|
+
"yarn.lock",
|
|
17
|
+
"pnpm-lock.yaml",
|
|
18
|
+
"poetry.lock",
|
|
19
|
+
"Pipfile.lock",
|
|
20
|
+
"composer.lock",
|
|
21
|
+
"Gemfile.lock",
|
|
22
|
+
"go.sum",
|
|
23
|
+
}
|
|
24
|
+
)
|
|
25
|
+
_CONTEXT_SUMMARY_RE = re.compile(r"^… (?P<count>\d+) context lines?$")
|
|
26
|
+
_BINARY_PAYLOAD_SUMMARY_RE = re.compile(
|
|
27
|
+
r"^… (?P<count>\d+) binary patch payload lines?$"
|
|
28
|
+
)
|
|
29
|
+
_ASCII_WHITESPACE = frozenset(" \t\n\r\f\v")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def compress_diff_output(text: str) -> str:
|
|
33
|
+
"""Compress high-confidence generic unified diff output."""
|
|
34
|
+
|
|
35
|
+
lines, final_newline = split_preserving_final_newline(text)
|
|
36
|
+
blocks = find_diff_blocks(lines)
|
|
37
|
+
if not blocks:
|
|
38
|
+
return text
|
|
39
|
+
|
|
40
|
+
output: list[str] = []
|
|
41
|
+
cursor = 0
|
|
42
|
+
for block_range in blocks:
|
|
43
|
+
output.extend(lines[cursor : block_range.start])
|
|
44
|
+
block = lines[block_range.start : block_range.end]
|
|
45
|
+
output.extend(choose_smaller_lines(block, _compress_diff_block(block)))
|
|
46
|
+
cursor = block_range.end
|
|
47
|
+
output.extend(lines[cursor:])
|
|
48
|
+
|
|
49
|
+
return join_preserving_final_newline(output, final_newline)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _compress_diff_block(block: list[str]) -> list[str]:
|
|
53
|
+
output: list[str] = []
|
|
54
|
+
current_path = ""
|
|
55
|
+
i = 0
|
|
56
|
+
|
|
57
|
+
while i < len(block):
|
|
58
|
+
line = block[i]
|
|
59
|
+
|
|
60
|
+
if is_hunk_header(line):
|
|
61
|
+
output.append(line)
|
|
62
|
+
prefix_columns = hunk_prefix_columns(line)
|
|
63
|
+
i += 1
|
|
64
|
+
body: list[str] = []
|
|
65
|
+
while i < len(block) and _is_hunk_body_line(block[i], prefix_columns):
|
|
66
|
+
body.append(block[i])
|
|
67
|
+
i += 1
|
|
68
|
+
output.extend(_compress_hunk_body(body, current_path, prefix_columns))
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
if line.startswith("index "):
|
|
72
|
+
i += 1
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
if path := _path_from_metadata_line(line):
|
|
76
|
+
current_path = path
|
|
77
|
+
|
|
78
|
+
if line.startswith(("literal ", "delta ")):
|
|
79
|
+
output.append(line)
|
|
80
|
+
i += 1
|
|
81
|
+
omitted_payload = 0
|
|
82
|
+
while i < len(block) and not _is_binary_payload_end(block[i]):
|
|
83
|
+
omitted_payload += 1
|
|
84
|
+
i += 1
|
|
85
|
+
if omitted_payload:
|
|
86
|
+
plural = "" if omitted_payload == 1 else "s"
|
|
87
|
+
output.append(f"… {omitted_payload} binary patch payload line{plural}")
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
output.append(line)
|
|
91
|
+
i += 1
|
|
92
|
+
|
|
93
|
+
return output
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _compress_hunk_body(
|
|
97
|
+
body: list[str],
|
|
98
|
+
current_path: str,
|
|
99
|
+
prefix_columns: int,
|
|
100
|
+
) -> list[str]:
|
|
101
|
+
stats = _hunk_stats(body, prefix_columns)
|
|
102
|
+
|
|
103
|
+
if _is_lockfile_path(current_path) and stats.changed_lines >= 20:
|
|
104
|
+
return [stats.summary("lockfile"), *stats.no_newline_markers]
|
|
105
|
+
|
|
106
|
+
if stats.is_whitespace_only:
|
|
107
|
+
return [stats.summary("whitespace-only"), *stats.no_newline_markers]
|
|
108
|
+
|
|
109
|
+
output: list[str] = []
|
|
110
|
+
omitted_context = 0
|
|
111
|
+
|
|
112
|
+
def flush_context() -> None:
|
|
113
|
+
nonlocal omitted_context
|
|
114
|
+
if omitted_context:
|
|
115
|
+
plural = "" if omitted_context == 1 else "s"
|
|
116
|
+
output.append(f"… {omitted_context} context line{plural}")
|
|
117
|
+
omitted_context = 0
|
|
118
|
+
|
|
119
|
+
for line in body:
|
|
120
|
+
context_summary = _context_summary_count(line)
|
|
121
|
+
if context_summary is not None:
|
|
122
|
+
omitted_context += context_summary
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
if line.startswith("\\ No newline"):
|
|
126
|
+
flush_context()
|
|
127
|
+
output.append(line)
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
prefix = line[:prefix_columns]
|
|
131
|
+
if "+" in prefix or "-" in prefix:
|
|
132
|
+
flush_context()
|
|
133
|
+
output.append(line)
|
|
134
|
+
else:
|
|
135
|
+
omitted_context += 1
|
|
136
|
+
|
|
137
|
+
flush_context()
|
|
138
|
+
return output
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class _HunkStats:
|
|
142
|
+
def __init__(
|
|
143
|
+
self,
|
|
144
|
+
*,
|
|
145
|
+
added: int,
|
|
146
|
+
deleted: int,
|
|
147
|
+
context: int,
|
|
148
|
+
old_text: str,
|
|
149
|
+
new_text: str,
|
|
150
|
+
can_check_whitespace_only: bool,
|
|
151
|
+
no_newline_markers: list[str],
|
|
152
|
+
) -> None:
|
|
153
|
+
self.added = added
|
|
154
|
+
self.deleted = deleted
|
|
155
|
+
self.context = context
|
|
156
|
+
self.old_text = old_text
|
|
157
|
+
self.new_text = new_text
|
|
158
|
+
self.can_check_whitespace_only = can_check_whitespace_only
|
|
159
|
+
self.no_newline_markers = no_newline_markers
|
|
160
|
+
|
|
161
|
+
@property
|
|
162
|
+
def changed_lines(self) -> int:
|
|
163
|
+
return self.added + self.deleted
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def is_whitespace_only(self) -> bool:
|
|
167
|
+
if not self.can_check_whitespace_only or not self.added or not self.deleted:
|
|
168
|
+
return False
|
|
169
|
+
return _remove_ascii_whitespace(self.old_text) == _remove_ascii_whitespace(
|
|
170
|
+
self.new_text
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
def summary(self, kind: str) -> str:
|
|
174
|
+
return (
|
|
175
|
+
f"… {kind} hunk omitted: "
|
|
176
|
+
f"+{self.added} -{self.deleted} context={self.context}"
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _hunk_stats(body: list[str], prefix_columns: int) -> _HunkStats:
|
|
181
|
+
added = 0
|
|
182
|
+
deleted = 0
|
|
183
|
+
context = 0
|
|
184
|
+
old_parts: list[str] = []
|
|
185
|
+
new_parts: list[str] = []
|
|
186
|
+
can_check_whitespace_only = prefix_columns == 1
|
|
187
|
+
no_newline_markers: list[str] = []
|
|
188
|
+
|
|
189
|
+
for line in body:
|
|
190
|
+
context_summary = _context_summary_count(line)
|
|
191
|
+
if context_summary is not None:
|
|
192
|
+
context += context_summary
|
|
193
|
+
can_check_whitespace_only = False
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
if line.startswith("\\ No newline"):
|
|
197
|
+
no_newline_markers.append(line)
|
|
198
|
+
continue
|
|
199
|
+
|
|
200
|
+
prefix = line[:prefix_columns]
|
|
201
|
+
content = line[prefix_columns:]
|
|
202
|
+
if "+" in prefix:
|
|
203
|
+
added += 1
|
|
204
|
+
new_parts.append(content)
|
|
205
|
+
if "-" in prefix:
|
|
206
|
+
deleted += 1
|
|
207
|
+
old_parts.append(content)
|
|
208
|
+
if "+" not in prefix and "-" not in prefix:
|
|
209
|
+
context += 1
|
|
210
|
+
old_parts.append(content)
|
|
211
|
+
new_parts.append(content)
|
|
212
|
+
|
|
213
|
+
return _HunkStats(
|
|
214
|
+
added=added,
|
|
215
|
+
deleted=deleted,
|
|
216
|
+
context=context,
|
|
217
|
+
old_text="\n".join(old_parts),
|
|
218
|
+
new_text="\n".join(new_parts),
|
|
219
|
+
can_check_whitespace_only=can_check_whitespace_only,
|
|
220
|
+
no_newline_markers=no_newline_markers,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _is_hunk_body_line(line: str, prefix_columns: int) -> bool:
|
|
225
|
+
if _context_summary_count(line) is not None or line.startswith("\\ No newline"):
|
|
226
|
+
return True
|
|
227
|
+
if len(line) < prefix_columns:
|
|
228
|
+
return False
|
|
229
|
+
return all(char in " +-" for char in line[:prefix_columns])
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def _context_summary_count(line: str) -> int | None:
|
|
233
|
+
match = _CONTEXT_SUMMARY_RE.match(line)
|
|
234
|
+
if match is None:
|
|
235
|
+
return None
|
|
236
|
+
return int(match.group("count"))
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _remove_ascii_whitespace(value: str) -> str:
|
|
240
|
+
return "".join(char for char in value if char not in _ASCII_WHITESPACE)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _path_from_metadata_line(line: str) -> str:
|
|
244
|
+
if line.startswith("diff --git "):
|
|
245
|
+
parts = line.split()
|
|
246
|
+
if len(parts) >= 4:
|
|
247
|
+
return _clean_diff_path(parts[3]) or _clean_diff_path(parts[2])
|
|
248
|
+
if line.startswith("diff --cc "):
|
|
249
|
+
return _clean_diff_path(line.removeprefix("diff --cc "))
|
|
250
|
+
if line.startswith("diff --combined "):
|
|
251
|
+
return _clean_diff_path(line.removeprefix("diff --combined "))
|
|
252
|
+
if line.startswith("Index: "):
|
|
253
|
+
return _clean_diff_path(line.removeprefix("Index: "))
|
|
254
|
+
for prefix in ("--- ", "+++ ", "rename to ", "copy to "):
|
|
255
|
+
if line.startswith(prefix):
|
|
256
|
+
return _clean_diff_path(line.removeprefix(prefix))
|
|
257
|
+
return ""
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _clean_diff_path(raw_path: str) -> str:
|
|
261
|
+
path = raw_path.strip().split("\t", 1)[0].strip().strip('"')
|
|
262
|
+
if path == "/dev/null":
|
|
263
|
+
return ""
|
|
264
|
+
if len(path) >= 2 and path[1] == "/" and path[0] in "abciw":
|
|
265
|
+
path = path[2:]
|
|
266
|
+
return path
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _is_lockfile_path(path: str) -> bool:
|
|
270
|
+
normalized = path.replace("\\", "/")
|
|
271
|
+
name = normalized.rsplit("/", 1)[-1]
|
|
272
|
+
return name in _LOCKFILE_NAMES
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _is_binary_payload_end(line: str) -> bool:
|
|
276
|
+
return (
|
|
277
|
+
is_hunk_header(line)
|
|
278
|
+
or _is_diff_metadata_line(line)
|
|
279
|
+
or _BINARY_PAYLOAD_SUMMARY_RE.match(line) is not None
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _is_diff_metadata_line(line: str) -> bool:
|
|
284
|
+
return line.startswith(
|
|
285
|
+
(
|
|
286
|
+
"diff --git ",
|
|
287
|
+
"diff --cc ",
|
|
288
|
+
"diff --combined ",
|
|
289
|
+
"Index: ",
|
|
290
|
+
"--- ",
|
|
291
|
+
"+++ ",
|
|
292
|
+
"index ",
|
|
293
|
+
"new file mode ",
|
|
294
|
+
"deleted file mode ",
|
|
295
|
+
"old mode ",
|
|
296
|
+
"new mode ",
|
|
297
|
+
"similarity index ",
|
|
298
|
+
"dissimilarity index ",
|
|
299
|
+
"rename from ",
|
|
300
|
+
"rename to ",
|
|
301
|
+
"copy from ",
|
|
302
|
+
"copy to ",
|
|
303
|
+
"Binary files ",
|
|
304
|
+
"GIT binary patch",
|
|
305
|
+
"literal ",
|
|
306
|
+
"delta ",
|
|
307
|
+
)
|
|
308
|
+
)
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
"""Detect conservative generic application log shapes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import Counter
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
from .summary import (
|
|
9
|
+
ParsedLogLine,
|
|
10
|
+
is_important_line,
|
|
11
|
+
is_low_severity,
|
|
12
|
+
normalized_message_pattern,
|
|
13
|
+
parse_log_line,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class GenericLog:
|
|
19
|
+
"""A detected generic app log and repeated low-severity patterns."""
|
|
20
|
+
|
|
21
|
+
parsed_lines: tuple[ParsedLogLine | None, ...]
|
|
22
|
+
repeated_low_patterns: frozenset[tuple[str, str]]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_MIN_NONBLANK_LINES = 12
|
|
26
|
+
_MIN_PREFIXED_LINES = 8
|
|
27
|
+
_MIN_REPEATED_LOW_LINES = 5
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def detect_generic_log(lines: list[str]) -> GenericLog | None:
|
|
31
|
+
"""Return a strong generic app-log signal, or ``None``."""
|
|
32
|
+
|
|
33
|
+
if _has_other_filter_ownership(lines):
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
nonblank_indices = [index for index, line in enumerate(lines) if line.strip()]
|
|
37
|
+
if len(nonblank_indices) < _MIN_NONBLANK_LINES:
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
parsed_lines = tuple(parse_log_line(line) for line in lines)
|
|
41
|
+
parsed_nonblank = [
|
|
42
|
+
parsed_lines[index]
|
|
43
|
+
for index in nonblank_indices
|
|
44
|
+
if parsed_lines[index] is not None
|
|
45
|
+
]
|
|
46
|
+
prefixed_count = len(parsed_nonblank)
|
|
47
|
+
if prefixed_count < _MIN_PREFIXED_LINES:
|
|
48
|
+
return None
|
|
49
|
+
if prefixed_count * 100 < len(nonblank_indices) * 65:
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
pattern_counts: Counter[tuple[str, str]] = Counter()
|
|
53
|
+
for index, parsed in enumerate(parsed_lines):
|
|
54
|
+
if parsed is None or not is_low_severity(parsed):
|
|
55
|
+
continue
|
|
56
|
+
if is_important_line(lines[index], parsed):
|
|
57
|
+
continue
|
|
58
|
+
pattern_counts[(parsed.level, normalized_message_pattern(parsed))] += 1
|
|
59
|
+
|
|
60
|
+
repeated = frozenset(
|
|
61
|
+
pattern for pattern, count in pattern_counts.items() if count >= 3
|
|
62
|
+
)
|
|
63
|
+
repeated_total = sum(pattern_counts[pattern] for pattern in repeated)
|
|
64
|
+
if repeated_total < _MIN_REPEATED_LOW_LINES:
|
|
65
|
+
return None
|
|
66
|
+
|
|
67
|
+
return GenericLog(
|
|
68
|
+
parsed_lines=parsed_lines,
|
|
69
|
+
repeated_low_patterns=repeated,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _has_other_filter_ownership(lines: list[str]) -> bool:
|
|
74
|
+
text = "\n".join(lines)
|
|
75
|
+
lower = text.lower()
|
|
76
|
+
stripped = [line.strip() for line in lines if line.strip()]
|
|
77
|
+
if not stripped:
|
|
78
|
+
return True
|
|
79
|
+
|
|
80
|
+
if any(
|
|
81
|
+
marker in lower
|
|
82
|
+
for marker in (
|
|
83
|
+
"test output:",
|
|
84
|
+
"ci log:",
|
|
85
|
+
"python traceback:",
|
|
86
|
+
"traceback (most recent call last)",
|
|
87
|
+
"test session starts",
|
|
88
|
+
"short test summary info",
|
|
89
|
+
"diff --git ",
|
|
90
|
+
)
|
|
91
|
+
):
|
|
92
|
+
return True
|
|
93
|
+
if _looks_like_json_lines(stripped):
|
|
94
|
+
return True
|
|
95
|
+
if _looks_like_test_runner(stripped, lower):
|
|
96
|
+
return True
|
|
97
|
+
if _looks_like_ci_log(stripped):
|
|
98
|
+
return True
|
|
99
|
+
if _looks_like_table(stripped):
|
|
100
|
+
return True
|
|
101
|
+
if _looks_like_path_location_output(stripped):
|
|
102
|
+
return True
|
|
103
|
+
if _looks_like_build_or_package_output(stripped, lower):
|
|
104
|
+
return True
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _looks_like_json_lines(lines: list[str]) -> bool:
|
|
109
|
+
json_like = sum(
|
|
110
|
+
1 for line in lines if line.startswith("{") and line.endswith("}") and ":" in line
|
|
111
|
+
)
|
|
112
|
+
return json_like * 100 >= len(lines) * 70
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _looks_like_test_runner(lines: list[str], lower: str) -> bool:
|
|
116
|
+
return (
|
|
117
|
+
"pytest" in lower
|
|
118
|
+
or any(line.startswith(("Test Files", "Tests", "FAIL ")) for line in lines)
|
|
119
|
+
or any(line.startswith("test result:") for line in lines)
|
|
120
|
+
or (
|
|
121
|
+
any(line.startswith("Ran ") and " tests in " in line for line in lines)
|
|
122
|
+
and any(line == "OK" or line.startswith("FAILED (") for line in lines)
|
|
123
|
+
)
|
|
124
|
+
or any(line.startswith("Running ") and " tests using " in line for line in lines)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _looks_like_ci_log(lines: list[str]) -> bool:
|
|
129
|
+
return any(
|
|
130
|
+
line.startswith(("##[", "::group::", "::endgroup::", "::error", "::warning"))
|
|
131
|
+
or "\t" in line
|
|
132
|
+
and "T" in line
|
|
133
|
+
and "Z " in line
|
|
134
|
+
for line in lines
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _looks_like_table(lines: list[str]) -> bool:
|
|
139
|
+
pipe_rows = sum(1 for line in lines if "|" in line)
|
|
140
|
+
separator_rows = sum(1 for line in lines if set(line.strip()) <= {"-", "+", "|"})
|
|
141
|
+
return pipe_rows >= 3 and (pipe_rows * 100 >= len(lines) * 55 or separator_rows >= 1)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _looks_like_path_location_output(lines: list[str]) -> bool:
|
|
145
|
+
matches = sum(1 for line in lines if _looks_like_path_location_line(line))
|
|
146
|
+
return matches >= 2 and matches * 100 >= len(lines) * 45
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _looks_like_path_location_line(line: str) -> bool:
|
|
150
|
+
parts = line.split(":")
|
|
151
|
+
if len(parts) < 3:
|
|
152
|
+
return False
|
|
153
|
+
return any(part.isdigit() for part in parts[1:3]) and (
|
|
154
|
+
"/" in parts[0] or "." in parts[0]
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _looks_like_build_or_package_output(lines: list[str], lower: str) -> bool:
|
|
159
|
+
build_markers = (
|
|
160
|
+
"compiling ",
|
|
161
|
+
"checking ",
|
|
162
|
+
"finished ",
|
|
163
|
+
"could not compile",
|
|
164
|
+
"error[",
|
|
165
|
+
"warning[",
|
|
166
|
+
"downloading ",
|
|
167
|
+
"downloaded ",
|
|
168
|
+
"installing collected packages",
|
|
169
|
+
"successfully installed",
|
|
170
|
+
"added ",
|
|
171
|
+
"packages audited",
|
|
172
|
+
)
|
|
173
|
+
return any(marker in lower for marker in build_markers) and not any(
|
|
174
|
+
parse_log_line(line) is not None for line in lines
|
|
175
|
+
)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Conservative reducer for generic application logs."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections import Counter
|
|
6
|
+
|
|
7
|
+
from ..text import join_preserving_final_newline, score, split_preserving_final_newline
|
|
8
|
+
from .detector import GenericLog, detect_generic_log
|
|
9
|
+
from .summary import (
|
|
10
|
+
is_important_line,
|
|
11
|
+
is_low_severity,
|
|
12
|
+
normalize_selected_line,
|
|
13
|
+
normalized_message_pattern,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
_CONTEXT_LINES = 2
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def compress_generic_log_output(text: str) -> str:
|
|
20
|
+
"""Compress repeated low-severity application log lines."""
|
|
21
|
+
|
|
22
|
+
lines, final_newline = split_preserving_final_newline(text)
|
|
23
|
+
signal = detect_generic_log(lines)
|
|
24
|
+
if signal is None:
|
|
25
|
+
return text
|
|
26
|
+
|
|
27
|
+
candidate_lines = _reduce_lines(lines, signal)
|
|
28
|
+
if len(candidate_lines) < 3:
|
|
29
|
+
return text
|
|
30
|
+
|
|
31
|
+
candidate = join_preserving_final_newline(candidate_lines, final_newline)
|
|
32
|
+
if score(candidate) < score(text):
|
|
33
|
+
return candidate
|
|
34
|
+
return text
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _reduce_lines(lines: list[str], signal: GenericLog) -> list[str]:
|
|
38
|
+
nonblank_indices = [index for index, line in enumerate(lines) if line.strip()]
|
|
39
|
+
head = set(nonblank_indices[:_CONTEXT_LINES])
|
|
40
|
+
tail = set(nonblank_indices[-_CONTEXT_LINES:])
|
|
41
|
+
selected = ["generic log: application summary"]
|
|
42
|
+
omitted: Counter[tuple[str, str]] = Counter()
|
|
43
|
+
|
|
44
|
+
def flush_omitted() -> None:
|
|
45
|
+
if not omitted:
|
|
46
|
+
return
|
|
47
|
+
selected.extend(_format_omitted(omitted))
|
|
48
|
+
omitted.clear()
|
|
49
|
+
|
|
50
|
+
for index, line in enumerate(lines):
|
|
51
|
+
if not line.strip():
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
parsed = signal.parsed_lines[index]
|
|
55
|
+
pattern: tuple[str, str] | None = None
|
|
56
|
+
if parsed is not None and is_low_severity(parsed):
|
|
57
|
+
pattern = (parsed.level, normalized_message_pattern(parsed))
|
|
58
|
+
|
|
59
|
+
should_omit = (
|
|
60
|
+
pattern is not None
|
|
61
|
+
and pattern in signal.repeated_low_patterns
|
|
62
|
+
and index not in head
|
|
63
|
+
and index not in tail
|
|
64
|
+
and not is_important_line(line, parsed)
|
|
65
|
+
)
|
|
66
|
+
if should_omit and pattern is not None:
|
|
67
|
+
omitted[pattern] += 1
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
flush_omitted()
|
|
71
|
+
selected.append(normalize_selected_line(line))
|
|
72
|
+
|
|
73
|
+
flush_omitted()
|
|
74
|
+
return _drop_adjacent_duplicates(selected)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _format_omitted(omitted: Counter[tuple[str, str]]) -> list[str]:
|
|
78
|
+
lines: list[str] = []
|
|
79
|
+
for (level, pattern), count in omitted.most_common(3):
|
|
80
|
+
plural = "" if count == 1 else "s"
|
|
81
|
+
lines.append(f"… {count} repeated {level} line{plural} omitted: {pattern}")
|
|
82
|
+
remaining_patterns = len(omitted) - 3
|
|
83
|
+
if remaining_patterns > 0:
|
|
84
|
+
remaining_count = sum(count for _, count in omitted.most_common()[3:])
|
|
85
|
+
plural = "" if remaining_count == 1 else "s"
|
|
86
|
+
lines.append(
|
|
87
|
+
f"… {remaining_count} repeated low-severity line{plural} omitted "
|
|
88
|
+
f"across {remaining_patterns} more patterns"
|
|
89
|
+
)
|
|
90
|
+
return lines
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _drop_adjacent_duplicates(lines: list[str]) -> list[str]:
|
|
94
|
+
output: list[str] = []
|
|
95
|
+
for line in lines:
|
|
96
|
+
if output and output[-1] == line:
|
|
97
|
+
continue
|
|
98
|
+
output.append(line)
|
|
99
|
+
return output
|