codetool-shell 0.1.1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codetool_shell/__init__.py +11 -0
- codetool_shell/api.py +59 -0
- codetool_shell/bin/windows-x86_64/codetool-shell-rust.exe +0 -0
- codetool_shell/filters/__init__.py +14 -0
- codetool_shell/filters/build_compiler/__init__.py +7 -0
- codetool_shell/filters/build_compiler/detector.py +412 -0
- codetool_shell/filters/build_compiler/reducer.py +166 -0
- codetool_shell/filters/build_compiler/summary.py +617 -0
- codetool_shell/filters/ci_job_log/__init__.py +7 -0
- codetool_shell/filters/ci_job_log/detector.py +64 -0
- codetool_shell/filters/ci_job_log/reducer.py +99 -0
- codetool_shell/filters/ci_job_log/summary.py +243 -0
- codetool_shell/filters/diff/__init__.py +7 -0
- codetool_shell/filters/diff/detector.py +136 -0
- codetool_shell/filters/diff/reducer.py +308 -0
- codetool_shell/filters/generic_log/__init__.py +7 -0
- codetool_shell/filters/generic_log/detector.py +175 -0
- codetool_shell/filters/generic_log/reducer.py +99 -0
- codetool_shell/filters/generic_log/summary.py +161 -0
- codetool_shell/filters/git.py +514 -0
- codetool_shell/filters/html_cleanup/__init__.py +7 -0
- codetool_shell/filters/html_cleanup/detector.py +136 -0
- codetool_shell/filters/html_cleanup/reducer.py +27 -0
- codetool_shell/filters/html_cleanup/summary.py +422 -0
- codetool_shell/filters/json_payload/__init__.py +7 -0
- codetool_shell/filters/json_payload/detector.py +62 -0
- codetool_shell/filters/json_payload/reducer.py +81 -0
- codetool_shell/filters/json_payload/summary.py +233 -0
- codetool_shell/filters/listing/__init__.py +7 -0
- codetool_shell/filters/listing/detector.py +294 -0
- codetool_shell/filters/listing/reducer.py +30 -0
- codetool_shell/filters/log_template/__init__.py +7 -0
- codetool_shell/filters/log_template/constants.py +76 -0
- codetool_shell/filters/log_template/detector.py +331 -0
- codetool_shell/filters/log_template/reducer.py +78 -0
- codetool_shell/filters/log_template/template.py +280 -0
- codetool_shell/filters/log_template/types.py +21 -0
- codetool_shell/filters/opaque_payload/__init__.py +7 -0
- codetool_shell/filters/opaque_payload/detector.py +563 -0
- codetool_shell/filters/opaque_payload/reducer.py +142 -0
- codetool_shell/filters/opaque_payload/summary.py +61 -0
- codetool_shell/filters/package_manager/__init__.py +7 -0
- codetool_shell/filters/package_manager/detector.py +220 -0
- codetool_shell/filters/package_manager/reducer.py +110 -0
- codetool_shell/filters/package_manager/summary.py +172 -0
- codetool_shell/filters/pipeline.py +65 -0
- codetool_shell/filters/rg.py +250 -0
- codetool_shell/filters/system_output/__init__.py +7 -0
- codetool_shell/filters/system_output/detector.py +600 -0
- codetool_shell/filters/system_output/reducer.py +331 -0
- codetool_shell/filters/system_output/summary.py +164 -0
- codetool_shell/filters/table/__init__.py +7 -0
- codetool_shell/filters/table/detector.py +244 -0
- codetool_shell/filters/table/reducer.py +57 -0
- codetool_shell/filters/table/summary.py +37 -0
- codetool_shell/filters/test_runner/__init__.py +7 -0
- codetool_shell/filters/test_runner/ansi.py +80 -0
- codetool_shell/filters/test_runner/detector.py +409 -0
- codetool_shell/filters/test_runner/reducer.py +288 -0
- codetool_shell/filters/test_runner/summary.py +449 -0
- codetool_shell/filters/text.py +38 -0
- codetool_shell/filters/traceback/__init__.py +7 -0
- codetool_shell/filters/traceback/detector.py +209 -0
- codetool_shell/filters/traceback/reducer.py +141 -0
- codetool_shell/filters/traceback/summary.py +122 -0
- codetool_shell/filters/tree.py +59 -0
- codetool_shell/py.typed +0 -0
- codetool_shell/python_backend.py +38 -0
- codetool_shell/rust_backend.py +254 -0
- codetool_shell-0.1.1.dist-info/METADATA +152 -0
- codetool_shell-0.1.1.dist-info/RECORD +72 -0
- codetool_shell-0.1.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
"""Template construction and scalar normalization helpers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .constants import (
|
|
6
|
+
_KNOWN_LEVELS,
|
|
7
|
+
_MAX_CAPTURED_VALUES,
|
|
8
|
+
_MAX_VALUE_LEN,
|
|
9
|
+
_OPERATIONAL_WORDS,
|
|
10
|
+
_SECRET_KEY_PARTS,
|
|
11
|
+
)
|
|
12
|
+
from .types import LineTemplate
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def template_for_line(line: str) -> LineTemplate | None:
|
|
16
|
+
stripped = line.strip()
|
|
17
|
+
if not stripped:
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
tokens = stripped.split()
|
|
21
|
+
log_like = _is_log_like(tokens)
|
|
22
|
+
has_key_value = any(_parse_key_value_token(token) is not None for token in tokens)
|
|
23
|
+
stable_text = _is_stable_text_candidate(tokens)
|
|
24
|
+
if not (log_like or has_key_value or stable_text):
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
allow_standalone = log_like or stable_text
|
|
28
|
+
normalized_tokens: list[str] = []
|
|
29
|
+
values: list[str] = []
|
|
30
|
+
|
|
31
|
+
for token in tokens:
|
|
32
|
+
normalized = _normalize_token(token, allow_standalone=allow_standalone)
|
|
33
|
+
if normalized is None:
|
|
34
|
+
return None
|
|
35
|
+
normalized_token, captured = normalized
|
|
36
|
+
normalized_tokens.append(normalized_token)
|
|
37
|
+
values.extend(captured)
|
|
38
|
+
|
|
39
|
+
if not (1 <= len(values) <= _MAX_CAPTURED_VALUES):
|
|
40
|
+
return None
|
|
41
|
+
if any(not _is_safe_printable_value(value) for value in values):
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
template = " ".join(normalized_tokens)
|
|
45
|
+
if not _has_enough_static_text(template):
|
|
46
|
+
return None
|
|
47
|
+
return LineTemplate(template=template, values=tuple(values))
|
|
48
|
+
|
|
49
|
+
def _normalize_token(
|
|
50
|
+
token: str, *, allow_standalone: bool
|
|
51
|
+
) -> tuple[str, tuple[str, ...]] | None:
|
|
52
|
+
leading, core, trailing = _split_wrapping_punctuation(token)
|
|
53
|
+
if not core:
|
|
54
|
+
return token, ()
|
|
55
|
+
|
|
56
|
+
key_value = _parse_key_value_core(core)
|
|
57
|
+
if key_value is not None:
|
|
58
|
+
key, value = key_value
|
|
59
|
+
if _is_secret_key(key):
|
|
60
|
+
return None
|
|
61
|
+
normalized = _normalize_value(value, key=key)
|
|
62
|
+
if normalized is None:
|
|
63
|
+
return None
|
|
64
|
+
replacement, captured = normalized
|
|
65
|
+
return f"{leading}{key}={replacement}{trailing}", (captured,)
|
|
66
|
+
|
|
67
|
+
if _looks_like_timestamp(core):
|
|
68
|
+
return f"{leading}<ts>{trailing}", (core,)
|
|
69
|
+
if _looks_like_ipv4(core):
|
|
70
|
+
return f"{leading}<ip>{trailing}", (core,)
|
|
71
|
+
|
|
72
|
+
if allow_standalone:
|
|
73
|
+
number = _split_number_with_unit(core)
|
|
74
|
+
if number is not None:
|
|
75
|
+
captured, suffix = number
|
|
76
|
+
return f"{leading}<num>{suffix}{trailing}", (captured,)
|
|
77
|
+
if _looks_like_id_value(core):
|
|
78
|
+
return f"{leading}<id>{trailing}", (core,)
|
|
79
|
+
|
|
80
|
+
return token, ()
|
|
81
|
+
|
|
82
|
+
def _normalize_value(value: str, *, key: str) -> tuple[str, str] | None:
|
|
83
|
+
if not _is_safe_printable_value(value):
|
|
84
|
+
return None
|
|
85
|
+
if _looks_like_timestamp(value):
|
|
86
|
+
return "<ts>", value
|
|
87
|
+
if _looks_like_ipv4(value):
|
|
88
|
+
return "<ip>", value
|
|
89
|
+
|
|
90
|
+
number = _split_number_with_unit(value)
|
|
91
|
+
if number is not None:
|
|
92
|
+
captured, suffix = number
|
|
93
|
+
return f"<num>{suffix}", captured
|
|
94
|
+
|
|
95
|
+
if _is_id_key(key) and _is_safe_id_value(value):
|
|
96
|
+
return "<id>", value
|
|
97
|
+
if _looks_like_id_value(value):
|
|
98
|
+
return "<id>", value
|
|
99
|
+
if _is_safe_scalar_value(value):
|
|
100
|
+
return "<value>", value
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
def _split_wrapping_punctuation(token: str) -> tuple[str, str, str]:
|
|
104
|
+
start = 0
|
|
105
|
+
end = len(token)
|
|
106
|
+
while start < end and token[start] in "([{":
|
|
107
|
+
start += 1
|
|
108
|
+
while end > start and token[end - 1] in ".,;)]}":
|
|
109
|
+
end -= 1
|
|
110
|
+
return token[:start], token[start:end], token[end:]
|
|
111
|
+
|
|
112
|
+
def _parse_key_value_token(token: str) -> tuple[str, str] | None:
|
|
113
|
+
_, core, _ = _split_wrapping_punctuation(token)
|
|
114
|
+
return _parse_key_value_core(core)
|
|
115
|
+
|
|
116
|
+
def _parse_key_value_core(core: str) -> tuple[str, str] | None:
|
|
117
|
+
if "=" not in core:
|
|
118
|
+
return None
|
|
119
|
+
key, value = core.split("=", 1)
|
|
120
|
+
if not key or not value or not _is_valid_key(key):
|
|
121
|
+
return None
|
|
122
|
+
return key, value
|
|
123
|
+
|
|
124
|
+
def _is_valid_key(key: str) -> bool:
|
|
125
|
+
if len(key) > 40 or not (key[0].isalpha() or key[0] == "_"):
|
|
126
|
+
return False
|
|
127
|
+
return all(char.isalnum() or char in "_.-" for char in key)
|
|
128
|
+
|
|
129
|
+
def _is_secret_key(key: str) -> bool:
|
|
130
|
+
normalized = key.lower().replace("-", "_").replace(".", "_")
|
|
131
|
+
return any(part in normalized for part in _SECRET_KEY_PARTS)
|
|
132
|
+
|
|
133
|
+
def _is_id_key(key: str) -> bool:
|
|
134
|
+
normalized = key.lower()
|
|
135
|
+
return any(
|
|
136
|
+
part in normalized
|
|
137
|
+
for part in ("id", "uuid", "hash", "sha", "trace", "span", "request", "job", "task")
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def _is_log_like(tokens: list[str]) -> bool:
|
|
141
|
+
if not tokens:
|
|
142
|
+
return False
|
|
143
|
+
|
|
144
|
+
first = tokens[0].strip("[]")
|
|
145
|
+
if _looks_like_timestamp(first):
|
|
146
|
+
if len(tokens) == 1:
|
|
147
|
+
return True
|
|
148
|
+
return _normalize_level(tokens[1].strip("[]:=-")) is not None
|
|
149
|
+
|
|
150
|
+
first_level = _normalize_level(first.strip(":=-"))
|
|
151
|
+
if first_level is not None and (
|
|
152
|
+
tokens[0].startswith("[") or tokens[0].endswith((": ", ":", "-")) or len(tokens) > 1
|
|
153
|
+
):
|
|
154
|
+
return tokens[0].startswith("[") or tokens[0].endswith((":","-"))
|
|
155
|
+
|
|
156
|
+
if _parse_key_value_core(tokens[0]) is not None:
|
|
157
|
+
key, value = _parse_key_value_core(tokens[0]) or ("", "")
|
|
158
|
+
return key.lower() == "level" and _normalize_level(value) is not None
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
def _normalize_level(value: str) -> str | None:
|
|
162
|
+
upper = value.upper()
|
|
163
|
+
if upper == "WARNING":
|
|
164
|
+
upper = "WARN"
|
|
165
|
+
if upper in _KNOWN_LEVELS:
|
|
166
|
+
return upper
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
def _is_stable_text_candidate(tokens: list[str]) -> bool:
|
|
170
|
+
if len(tokens) < 4:
|
|
171
|
+
return False
|
|
172
|
+
text = " ".join(tokens)
|
|
173
|
+
if text.endswith(".") or len(text) > 160:
|
|
174
|
+
return False
|
|
175
|
+
lowered_words = {token.strip(".,:;()[]{}").lower() for token in tokens}
|
|
176
|
+
return bool(lowered_words & _OPERATIONAL_WORDS)
|
|
177
|
+
|
|
178
|
+
def _has_enough_static_text(template: str) -> bool:
|
|
179
|
+
static = template
|
|
180
|
+
for placeholder in ("<num>", "<id>", "<ip>", "<ts>", "<value>"):
|
|
181
|
+
static = static.replace(placeholder, "")
|
|
182
|
+
static_nonspace = "".join(char for char in static if not char.isspace())
|
|
183
|
+
alpha_count = sum(1 for char in static_nonspace if char.isalpha())
|
|
184
|
+
return len(static_nonspace) >= 12 and alpha_count >= 6
|
|
185
|
+
|
|
186
|
+
def _is_safe_printable_value(value: str) -> bool:
|
|
187
|
+
if not value or len(value) > _MAX_VALUE_LEN:
|
|
188
|
+
return False
|
|
189
|
+
return all(33 <= ord(char) <= 126 for char in value) and not any(
|
|
190
|
+
char in value for char in "<>{}[]/\\"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def _is_safe_scalar_value(value: str) -> bool:
|
|
194
|
+
return (
|
|
195
|
+
len(value) <= 24
|
|
196
|
+
and any(char.isalpha() for char in value)
|
|
197
|
+
and all(char.isalnum() or char in "_.-" for char in value)
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
def _is_safe_id_value(value: str) -> bool:
|
|
201
|
+
return (
|
|
202
|
+
len(value) <= _MAX_VALUE_LEN
|
|
203
|
+
and any(char.isalnum() for char in value)
|
|
204
|
+
and all(char.isalnum() or char in "_.:-" for char in value)
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def _looks_like_id_value(value: str) -> bool:
|
|
208
|
+
if not _is_safe_id_value(value):
|
|
209
|
+
return False
|
|
210
|
+
has_alpha = any(char.isalpha() for char in value)
|
|
211
|
+
has_digit = any(char.isdigit() for char in value)
|
|
212
|
+
if not (has_alpha and has_digit):
|
|
213
|
+
return False
|
|
214
|
+
if "-" in value or "_" in value:
|
|
215
|
+
return len(value) >= 6
|
|
216
|
+
hex_chars = set("0123456789abcdefABCDEF")
|
|
217
|
+
return len(value) >= 12 and all(char in hex_chars for char in value)
|
|
218
|
+
|
|
219
|
+
def _split_number_with_unit(value: str) -> tuple[str, str] | None:
|
|
220
|
+
if not value:
|
|
221
|
+
return None
|
|
222
|
+
index = 0
|
|
223
|
+
if value[index] in "+-":
|
|
224
|
+
index += 1
|
|
225
|
+
if index >= len(value):
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
digit_count = 0
|
|
229
|
+
while index < len(value) and value[index].isdigit():
|
|
230
|
+
index += 1
|
|
231
|
+
digit_count += 1
|
|
232
|
+
if index < len(value) and value[index] == ".":
|
|
233
|
+
dot_index = index
|
|
234
|
+
index += 1
|
|
235
|
+
fractional_digits = 0
|
|
236
|
+
while index < len(value) and value[index].isdigit():
|
|
237
|
+
index += 1
|
|
238
|
+
fractional_digits += 1
|
|
239
|
+
if fractional_digits == 0:
|
|
240
|
+
index = dot_index
|
|
241
|
+
if digit_count == 0:
|
|
242
|
+
return None
|
|
243
|
+
|
|
244
|
+
number = value[:index]
|
|
245
|
+
suffix = value[index:]
|
|
246
|
+
if suffix and (
|
|
247
|
+
len(suffix) > 8
|
|
248
|
+
or not all(char.isalpha() or char in "%µ" for char in suffix)
|
|
249
|
+
):
|
|
250
|
+
return None
|
|
251
|
+
return number, suffix
|
|
252
|
+
|
|
253
|
+
def _looks_like_timestamp(value: str) -> bool:
|
|
254
|
+
if len(value) < 19:
|
|
255
|
+
return False
|
|
256
|
+
if not (
|
|
257
|
+
value[4] == "-"
|
|
258
|
+
and value[7] == "-"
|
|
259
|
+
and value[10] in "T "
|
|
260
|
+
and value[13] == ":"
|
|
261
|
+
and value[16] == ":"
|
|
262
|
+
):
|
|
263
|
+
return False
|
|
264
|
+
for index in (0, 1, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18):
|
|
265
|
+
if not value[index].isdigit():
|
|
266
|
+
return False
|
|
267
|
+
return all(char.isdigit() or char in ".,Zz+-:" for char in value[19:])
|
|
268
|
+
|
|
269
|
+
def _looks_like_ipv4(value: str) -> bool:
|
|
270
|
+
parts = value.split(".")
|
|
271
|
+
if len(parts) != 4:
|
|
272
|
+
return False
|
|
273
|
+
for part in parts:
|
|
274
|
+
if not part.isdigit():
|
|
275
|
+
return False
|
|
276
|
+
number = int(part)
|
|
277
|
+
if number > 255:
|
|
278
|
+
return False
|
|
279
|
+
return True
|
|
280
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Shared types for repeated log-template compaction."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class LineTemplate:
|
|
10
|
+
"""A line reduced to a stable template plus captured scalar values."""
|
|
11
|
+
|
|
12
|
+
template: str
|
|
13
|
+
values: tuple[str, ...]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass(frozen=True)
|
|
17
|
+
class LogTemplateSignal:
|
|
18
|
+
"""Detected repeated templates in a log-like output."""
|
|
19
|
+
|
|
20
|
+
line_templates: tuple[LineTemplate | None, ...]
|
|
21
|
+
repeated_templates: frozenset[str]
|