codetool-shell 0.1.1__py3-none-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. codetool_shell/__init__.py +11 -0
  2. codetool_shell/api.py +59 -0
  3. codetool_shell/bin/windows-arm64/codetool-shell-rust.exe +0 -0
  4. codetool_shell/filters/__init__.py +14 -0
  5. codetool_shell/filters/build_compiler/__init__.py +7 -0
  6. codetool_shell/filters/build_compiler/detector.py +412 -0
  7. codetool_shell/filters/build_compiler/reducer.py +166 -0
  8. codetool_shell/filters/build_compiler/summary.py +617 -0
  9. codetool_shell/filters/ci_job_log/__init__.py +7 -0
  10. codetool_shell/filters/ci_job_log/detector.py +64 -0
  11. codetool_shell/filters/ci_job_log/reducer.py +99 -0
  12. codetool_shell/filters/ci_job_log/summary.py +243 -0
  13. codetool_shell/filters/diff/__init__.py +7 -0
  14. codetool_shell/filters/diff/detector.py +136 -0
  15. codetool_shell/filters/diff/reducer.py +308 -0
  16. codetool_shell/filters/generic_log/__init__.py +7 -0
  17. codetool_shell/filters/generic_log/detector.py +175 -0
  18. codetool_shell/filters/generic_log/reducer.py +99 -0
  19. codetool_shell/filters/generic_log/summary.py +161 -0
  20. codetool_shell/filters/git.py +514 -0
  21. codetool_shell/filters/html_cleanup/__init__.py +7 -0
  22. codetool_shell/filters/html_cleanup/detector.py +136 -0
  23. codetool_shell/filters/html_cleanup/reducer.py +27 -0
  24. codetool_shell/filters/html_cleanup/summary.py +422 -0
  25. codetool_shell/filters/json_payload/__init__.py +7 -0
  26. codetool_shell/filters/json_payload/detector.py +62 -0
  27. codetool_shell/filters/json_payload/reducer.py +81 -0
  28. codetool_shell/filters/json_payload/summary.py +233 -0
  29. codetool_shell/filters/listing/__init__.py +7 -0
  30. codetool_shell/filters/listing/detector.py +294 -0
  31. codetool_shell/filters/listing/reducer.py +30 -0
  32. codetool_shell/filters/log_template/__init__.py +7 -0
  33. codetool_shell/filters/log_template/constants.py +76 -0
  34. codetool_shell/filters/log_template/detector.py +331 -0
  35. codetool_shell/filters/log_template/reducer.py +78 -0
  36. codetool_shell/filters/log_template/template.py +280 -0
  37. codetool_shell/filters/log_template/types.py +21 -0
  38. codetool_shell/filters/opaque_payload/__init__.py +7 -0
  39. codetool_shell/filters/opaque_payload/detector.py +563 -0
  40. codetool_shell/filters/opaque_payload/reducer.py +142 -0
  41. codetool_shell/filters/opaque_payload/summary.py +61 -0
  42. codetool_shell/filters/package_manager/__init__.py +7 -0
  43. codetool_shell/filters/package_manager/detector.py +220 -0
  44. codetool_shell/filters/package_manager/reducer.py +110 -0
  45. codetool_shell/filters/package_manager/summary.py +172 -0
  46. codetool_shell/filters/pipeline.py +65 -0
  47. codetool_shell/filters/rg.py +250 -0
  48. codetool_shell/filters/system_output/__init__.py +7 -0
  49. codetool_shell/filters/system_output/detector.py +600 -0
  50. codetool_shell/filters/system_output/reducer.py +331 -0
  51. codetool_shell/filters/system_output/summary.py +164 -0
  52. codetool_shell/filters/table/__init__.py +7 -0
  53. codetool_shell/filters/table/detector.py +244 -0
  54. codetool_shell/filters/table/reducer.py +57 -0
  55. codetool_shell/filters/table/summary.py +37 -0
  56. codetool_shell/filters/test_runner/__init__.py +7 -0
  57. codetool_shell/filters/test_runner/ansi.py +80 -0
  58. codetool_shell/filters/test_runner/detector.py +409 -0
  59. codetool_shell/filters/test_runner/reducer.py +288 -0
  60. codetool_shell/filters/test_runner/summary.py +449 -0
  61. codetool_shell/filters/text.py +38 -0
  62. codetool_shell/filters/traceback/__init__.py +7 -0
  63. codetool_shell/filters/traceback/detector.py +209 -0
  64. codetool_shell/filters/traceback/reducer.py +141 -0
  65. codetool_shell/filters/traceback/summary.py +122 -0
  66. codetool_shell/filters/tree.py +59 -0
  67. codetool_shell/py.typed +0 -0
  68. codetool_shell/python_backend.py +38 -0
  69. codetool_shell/rust_backend.py +254 -0
  70. codetool_shell-0.1.1.dist-info/METADATA +152 -0
  71. codetool_shell-0.1.1.dist-info/RECORD +72 -0
  72. codetool_shell-0.1.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,280 @@
1
+ """Template construction and scalar normalization helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .constants import (
6
+ _KNOWN_LEVELS,
7
+ _MAX_CAPTURED_VALUES,
8
+ _MAX_VALUE_LEN,
9
+ _OPERATIONAL_WORDS,
10
+ _SECRET_KEY_PARTS,
11
+ )
12
+ from .types import LineTemplate
13
+
14
+
15
+ def template_for_line(line: str) -> LineTemplate | None:
16
+ stripped = line.strip()
17
+ if not stripped:
18
+ return None
19
+
20
+ tokens = stripped.split()
21
+ log_like = _is_log_like(tokens)
22
+ has_key_value = any(_parse_key_value_token(token) is not None for token in tokens)
23
+ stable_text = _is_stable_text_candidate(tokens)
24
+ if not (log_like or has_key_value or stable_text):
25
+ return None
26
+
27
+ allow_standalone = log_like or stable_text
28
+ normalized_tokens: list[str] = []
29
+ values: list[str] = []
30
+
31
+ for token in tokens:
32
+ normalized = _normalize_token(token, allow_standalone=allow_standalone)
33
+ if normalized is None:
34
+ return None
35
+ normalized_token, captured = normalized
36
+ normalized_tokens.append(normalized_token)
37
+ values.extend(captured)
38
+
39
+ if not (1 <= len(values) <= _MAX_CAPTURED_VALUES):
40
+ return None
41
+ if any(not _is_safe_printable_value(value) for value in values):
42
+ return None
43
+
44
+ template = " ".join(normalized_tokens)
45
+ if not _has_enough_static_text(template):
46
+ return None
47
+ return LineTemplate(template=template, values=tuple(values))
48
+
49
+ def _normalize_token(
50
+ token: str, *, allow_standalone: bool
51
+ ) -> tuple[str, tuple[str, ...]] | None:
52
+ leading, core, trailing = _split_wrapping_punctuation(token)
53
+ if not core:
54
+ return token, ()
55
+
56
+ key_value = _parse_key_value_core(core)
57
+ if key_value is not None:
58
+ key, value = key_value
59
+ if _is_secret_key(key):
60
+ return None
61
+ normalized = _normalize_value(value, key=key)
62
+ if normalized is None:
63
+ return None
64
+ replacement, captured = normalized
65
+ return f"{leading}{key}={replacement}{trailing}", (captured,)
66
+
67
+ if _looks_like_timestamp(core):
68
+ return f"{leading}<ts>{trailing}", (core,)
69
+ if _looks_like_ipv4(core):
70
+ return f"{leading}<ip>{trailing}", (core,)
71
+
72
+ if allow_standalone:
73
+ number = _split_number_with_unit(core)
74
+ if number is not None:
75
+ captured, suffix = number
76
+ return f"{leading}<num>{suffix}{trailing}", (captured,)
77
+ if _looks_like_id_value(core):
78
+ return f"{leading}<id>{trailing}", (core,)
79
+
80
+ return token, ()
81
+
82
+ def _normalize_value(value: str, *, key: str) -> tuple[str, str] | None:
83
+ if not _is_safe_printable_value(value):
84
+ return None
85
+ if _looks_like_timestamp(value):
86
+ return "<ts>", value
87
+ if _looks_like_ipv4(value):
88
+ return "<ip>", value
89
+
90
+ number = _split_number_with_unit(value)
91
+ if number is not None:
92
+ captured, suffix = number
93
+ return f"<num>{suffix}", captured
94
+
95
+ if _is_id_key(key) and _is_safe_id_value(value):
96
+ return "<id>", value
97
+ if _looks_like_id_value(value):
98
+ return "<id>", value
99
+ if _is_safe_scalar_value(value):
100
+ return "<value>", value
101
+ return None
102
+
103
+ def _split_wrapping_punctuation(token: str) -> tuple[str, str, str]:
104
+ start = 0
105
+ end = len(token)
106
+ while start < end and token[start] in "([{":
107
+ start += 1
108
+ while end > start and token[end - 1] in ".,;)]}":
109
+ end -= 1
110
+ return token[:start], token[start:end], token[end:]
111
+
112
+ def _parse_key_value_token(token: str) -> tuple[str, str] | None:
113
+ _, core, _ = _split_wrapping_punctuation(token)
114
+ return _parse_key_value_core(core)
115
+
116
+ def _parse_key_value_core(core: str) -> tuple[str, str] | None:
117
+ if "=" not in core:
118
+ return None
119
+ key, value = core.split("=", 1)
120
+ if not key or not value or not _is_valid_key(key):
121
+ return None
122
+ return key, value
123
+
124
+ def _is_valid_key(key: str) -> bool:
125
+ if len(key) > 40 or not (key[0].isalpha() or key[0] == "_"):
126
+ return False
127
+ return all(char.isalnum() or char in "_.-" for char in key)
128
+
129
+ def _is_secret_key(key: str) -> bool:
130
+ normalized = key.lower().replace("-", "_").replace(".", "_")
131
+ return any(part in normalized for part in _SECRET_KEY_PARTS)
132
+
133
+ def _is_id_key(key: str) -> bool:
134
+ normalized = key.lower()
135
+ return any(
136
+ part in normalized
137
+ for part in ("id", "uuid", "hash", "sha", "trace", "span", "request", "job", "task")
138
+ )
139
+
140
+ def _is_log_like(tokens: list[str]) -> bool:
141
+ if not tokens:
142
+ return False
143
+
144
+ first = tokens[0].strip("[]")
145
+ if _looks_like_timestamp(first):
146
+ if len(tokens) == 1:
147
+ return True
148
+ return _normalize_level(tokens[1].strip("[]:=-")) is not None
149
+
150
+ first_level = _normalize_level(first.strip(":=-"))
151
+ if first_level is not None and (
152
+ tokens[0].startswith("[") or tokens[0].endswith((": ", ":", "-")) or len(tokens) > 1
153
+ ):
154
+ return tokens[0].startswith("[") or tokens[0].endswith((":","-"))
155
+
156
+ if _parse_key_value_core(tokens[0]) is not None:
157
+ key, value = _parse_key_value_core(tokens[0]) or ("", "")
158
+ return key.lower() == "level" and _normalize_level(value) is not None
159
+ return False
160
+
161
+ def _normalize_level(value: str) -> str | None:
162
+ upper = value.upper()
163
+ if upper == "WARNING":
164
+ upper = "WARN"
165
+ if upper in _KNOWN_LEVELS:
166
+ return upper
167
+ return None
168
+
169
+ def _is_stable_text_candidate(tokens: list[str]) -> bool:
170
+ if len(tokens) < 4:
171
+ return False
172
+ text = " ".join(tokens)
173
+ if text.endswith(".") or len(text) > 160:
174
+ return False
175
+ lowered_words = {token.strip(".,:;()[]{}").lower() for token in tokens}
176
+ return bool(lowered_words & _OPERATIONAL_WORDS)
177
+
178
+ def _has_enough_static_text(template: str) -> bool:
179
+ static = template
180
+ for placeholder in ("<num>", "<id>", "<ip>", "<ts>", "<value>"):
181
+ static = static.replace(placeholder, "")
182
+ static_nonspace = "".join(char for char in static if not char.isspace())
183
+ alpha_count = sum(1 for char in static_nonspace if char.isalpha())
184
+ return len(static_nonspace) >= 12 and alpha_count >= 6
185
+
186
+ def _is_safe_printable_value(value: str) -> bool:
187
+ if not value or len(value) > _MAX_VALUE_LEN:
188
+ return False
189
+ return all(33 <= ord(char) <= 126 for char in value) and not any(
190
+ char in value for char in "<>{}[]/\\"
191
+ )
192
+
193
+ def _is_safe_scalar_value(value: str) -> bool:
194
+ return (
195
+ len(value) <= 24
196
+ and any(char.isalpha() for char in value)
197
+ and all(char.isalnum() or char in "_.-" for char in value)
198
+ )
199
+
200
+ def _is_safe_id_value(value: str) -> bool:
201
+ return (
202
+ len(value) <= _MAX_VALUE_LEN
203
+ and any(char.isalnum() for char in value)
204
+ and all(char.isalnum() or char in "_.:-" for char in value)
205
+ )
206
+
207
+ def _looks_like_id_value(value: str) -> bool:
208
+ if not _is_safe_id_value(value):
209
+ return False
210
+ has_alpha = any(char.isalpha() for char in value)
211
+ has_digit = any(char.isdigit() for char in value)
212
+ if not (has_alpha and has_digit):
213
+ return False
214
+ if "-" in value or "_" in value:
215
+ return len(value) >= 6
216
+ hex_chars = set("0123456789abcdefABCDEF")
217
+ return len(value) >= 12 and all(char in hex_chars for char in value)
218
+
219
+ def _split_number_with_unit(value: str) -> tuple[str, str] | None:
220
+ if not value:
221
+ return None
222
+ index = 0
223
+ if value[index] in "+-":
224
+ index += 1
225
+ if index >= len(value):
226
+ return None
227
+
228
+ digit_count = 0
229
+ while index < len(value) and value[index].isdigit():
230
+ index += 1
231
+ digit_count += 1
232
+ if index < len(value) and value[index] == ".":
233
+ dot_index = index
234
+ index += 1
235
+ fractional_digits = 0
236
+ while index < len(value) and value[index].isdigit():
237
+ index += 1
238
+ fractional_digits += 1
239
+ if fractional_digits == 0:
240
+ index = dot_index
241
+ if digit_count == 0:
242
+ return None
243
+
244
+ number = value[:index]
245
+ suffix = value[index:]
246
+ if suffix and (
247
+ len(suffix) > 8
248
+ or not all(char.isalpha() or char in "%µ" for char in suffix)
249
+ ):
250
+ return None
251
+ return number, suffix
252
+
253
+ def _looks_like_timestamp(value: str) -> bool:
254
+ if len(value) < 19:
255
+ return False
256
+ if not (
257
+ value[4] == "-"
258
+ and value[7] == "-"
259
+ and value[10] in "T "
260
+ and value[13] == ":"
261
+ and value[16] == ":"
262
+ ):
263
+ return False
264
+ for index in (0, 1, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18):
265
+ if not value[index].isdigit():
266
+ return False
267
+ return all(char.isdigit() or char in ".,Zz+-:" for char in value[19:])
268
+
269
+ def _looks_like_ipv4(value: str) -> bool:
270
+ parts = value.split(".")
271
+ if len(parts) != 4:
272
+ return False
273
+ for part in parts:
274
+ if not part.isdigit():
275
+ return False
276
+ number = int(part)
277
+ if number > 255:
278
+ return False
279
+ return True
280
+
@@ -0,0 +1,21 @@
1
+ """Shared types for repeated log-template compaction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass(frozen=True)
9
+ class LineTemplate:
10
+ """A line reduced to a stable template plus captured scalar values."""
11
+
12
+ template: str
13
+ values: tuple[str, ...]
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class LogTemplateSignal:
18
+ """Detected repeated templates in a log-like output."""
19
+
20
+ line_templates: tuple[LineTemplate | None, ...]
21
+ repeated_templates: frozenset[str]
@@ -0,0 +1,7 @@
1
+ """Opaque blob and long payload compression."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .reducer import compress_opaque_payload_output
6
+
7
+ __all__ = ["compress_opaque_payload_output"]