codetool-shell 0.1.1__py3-none-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codetool_shell/__init__.py +11 -0
- codetool_shell/api.py +59 -0
- codetool_shell/bin/windows-arm64/codetool-shell-rust.exe +0 -0
- codetool_shell/filters/__init__.py +14 -0
- codetool_shell/filters/build_compiler/__init__.py +7 -0
- codetool_shell/filters/build_compiler/detector.py +412 -0
- codetool_shell/filters/build_compiler/reducer.py +166 -0
- codetool_shell/filters/build_compiler/summary.py +617 -0
- codetool_shell/filters/ci_job_log/__init__.py +7 -0
- codetool_shell/filters/ci_job_log/detector.py +64 -0
- codetool_shell/filters/ci_job_log/reducer.py +99 -0
- codetool_shell/filters/ci_job_log/summary.py +243 -0
- codetool_shell/filters/diff/__init__.py +7 -0
- codetool_shell/filters/diff/detector.py +136 -0
- codetool_shell/filters/diff/reducer.py +308 -0
- codetool_shell/filters/generic_log/__init__.py +7 -0
- codetool_shell/filters/generic_log/detector.py +175 -0
- codetool_shell/filters/generic_log/reducer.py +99 -0
- codetool_shell/filters/generic_log/summary.py +161 -0
- codetool_shell/filters/git.py +514 -0
- codetool_shell/filters/html_cleanup/__init__.py +7 -0
- codetool_shell/filters/html_cleanup/detector.py +136 -0
- codetool_shell/filters/html_cleanup/reducer.py +27 -0
- codetool_shell/filters/html_cleanup/summary.py +422 -0
- codetool_shell/filters/json_payload/__init__.py +7 -0
- codetool_shell/filters/json_payload/detector.py +62 -0
- codetool_shell/filters/json_payload/reducer.py +81 -0
- codetool_shell/filters/json_payload/summary.py +233 -0
- codetool_shell/filters/listing/__init__.py +7 -0
- codetool_shell/filters/listing/detector.py +294 -0
- codetool_shell/filters/listing/reducer.py +30 -0
- codetool_shell/filters/log_template/__init__.py +7 -0
- codetool_shell/filters/log_template/constants.py +76 -0
- codetool_shell/filters/log_template/detector.py +331 -0
- codetool_shell/filters/log_template/reducer.py +78 -0
- codetool_shell/filters/log_template/template.py +280 -0
- codetool_shell/filters/log_template/types.py +21 -0
- codetool_shell/filters/opaque_payload/__init__.py +7 -0
- codetool_shell/filters/opaque_payload/detector.py +563 -0
- codetool_shell/filters/opaque_payload/reducer.py +142 -0
- codetool_shell/filters/opaque_payload/summary.py +61 -0
- codetool_shell/filters/package_manager/__init__.py +7 -0
- codetool_shell/filters/package_manager/detector.py +220 -0
- codetool_shell/filters/package_manager/reducer.py +110 -0
- codetool_shell/filters/package_manager/summary.py +172 -0
- codetool_shell/filters/pipeline.py +65 -0
- codetool_shell/filters/rg.py +250 -0
- codetool_shell/filters/system_output/__init__.py +7 -0
- codetool_shell/filters/system_output/detector.py +600 -0
- codetool_shell/filters/system_output/reducer.py +331 -0
- codetool_shell/filters/system_output/summary.py +164 -0
- codetool_shell/filters/table/__init__.py +7 -0
- codetool_shell/filters/table/detector.py +244 -0
- codetool_shell/filters/table/reducer.py +57 -0
- codetool_shell/filters/table/summary.py +37 -0
- codetool_shell/filters/test_runner/__init__.py +7 -0
- codetool_shell/filters/test_runner/ansi.py +80 -0
- codetool_shell/filters/test_runner/detector.py +409 -0
- codetool_shell/filters/test_runner/reducer.py +288 -0
- codetool_shell/filters/test_runner/summary.py +449 -0
- codetool_shell/filters/text.py +38 -0
- codetool_shell/filters/traceback/__init__.py +7 -0
- codetool_shell/filters/traceback/detector.py +209 -0
- codetool_shell/filters/traceback/reducer.py +141 -0
- codetool_shell/filters/traceback/summary.py +122 -0
- codetool_shell/filters/tree.py +59 -0
- codetool_shell/py.typed +0 -0
- codetool_shell/python_backend.py +38 -0
- codetool_shell/rust_backend.py +254 -0
- codetool_shell-0.1.1.dist-info/METADATA +152 -0
- codetool_shell-0.1.1.dist-info/RECORD +72 -0
- codetool_shell-0.1.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Reduce JSON/JSONL payload output."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from ..text import join_preserving_final_newline, score, split_preserving_final_newline
|
|
6
|
+
from .detector import parse_jsonl_payload, parse_whole_json_payload
|
|
7
|
+
from .summary import summarize_json_array, summarize_jsonl_records
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def compress_json_payload_output(text: str) -> str:
|
|
11
|
+
"""Compress whole JSON or object-record JSONL, otherwise return unchanged."""
|
|
12
|
+
|
|
13
|
+
best = text
|
|
14
|
+
final_newline = text.endswith("\n")
|
|
15
|
+
|
|
16
|
+
value = parse_whole_json_payload(text)
|
|
17
|
+
if value is not None:
|
|
18
|
+
minified = _with_final_newline(_minify_json_source(text.strip()), final_newline)
|
|
19
|
+
best = _choose_smaller(best, minified)
|
|
20
|
+
summary = summarize_json_array(value, final_newline)
|
|
21
|
+
if summary is not None:
|
|
22
|
+
best = _choose_smaller(best, summary)
|
|
23
|
+
return best
|
|
24
|
+
|
|
25
|
+
lines, final_newline = split_preserving_final_newline(text)
|
|
26
|
+
payload = parse_jsonl_payload(lines)
|
|
27
|
+
if payload is None:
|
|
28
|
+
return text
|
|
29
|
+
|
|
30
|
+
minified_lines = [
|
|
31
|
+
_minify_json_source(line.strip()) if line.strip() else "" for line in lines
|
|
32
|
+
]
|
|
33
|
+
best = _choose_smaller(
|
|
34
|
+
best,
|
|
35
|
+
join_preserving_final_newline(minified_lines, final_newline),
|
|
36
|
+
)
|
|
37
|
+
summary = summarize_jsonl_records(payload.records, final_newline)
|
|
38
|
+
if summary is not None:
|
|
39
|
+
best = _choose_smaller(best, summary)
|
|
40
|
+
return best
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _choose_smaller(current: str, candidate: str) -> str:
|
|
44
|
+
if score(candidate) < score(current):
|
|
45
|
+
return candidate
|
|
46
|
+
return current
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _with_final_newline(text: str, final_newline: bool) -> str:
|
|
50
|
+
if final_newline:
|
|
51
|
+
return f"{text}\n"
|
|
52
|
+
return text
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _minify_json_source(source: str) -> str:
|
|
56
|
+
"""Remove insignificant JSON whitespace while preserving raw string content."""
|
|
57
|
+
|
|
58
|
+
output: list[str] = []
|
|
59
|
+
in_string = False
|
|
60
|
+
escaped = False
|
|
61
|
+
|
|
62
|
+
for char in source:
|
|
63
|
+
if in_string:
|
|
64
|
+
output.append(char)
|
|
65
|
+
if escaped:
|
|
66
|
+
escaped = False
|
|
67
|
+
elif char == "\\":
|
|
68
|
+
escaped = True
|
|
69
|
+
elif char == '"':
|
|
70
|
+
in_string = False
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
if char == '"':
|
|
74
|
+
in_string = True
|
|
75
|
+
output.append(char)
|
|
76
|
+
elif char in " \t\r\n":
|
|
77
|
+
continue
|
|
78
|
+
else:
|
|
79
|
+
output.append(char)
|
|
80
|
+
|
|
81
|
+
return "".join(output)
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""Summaries for large JSON object-record collections."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from ..text import join_preserving_final_newline
|
|
10
|
+
|
|
11
|
+
_MIN_SUMMARY_ROWS = 10
|
|
12
|
+
_HEAD_ROWS = 3
|
|
13
|
+
_TAIL_ROWS = 3
|
|
14
|
+
_STATUS_KEYS = {"status", "result", "outcome"}
|
|
15
|
+
_OK_WORDS = {
|
|
16
|
+
"ok",
|
|
17
|
+
"success",
|
|
18
|
+
"successful",
|
|
19
|
+
"pass",
|
|
20
|
+
"passed",
|
|
21
|
+
"true",
|
|
22
|
+
"done",
|
|
23
|
+
"complete",
|
|
24
|
+
"completed",
|
|
25
|
+
"healthy",
|
|
26
|
+
"0",
|
|
27
|
+
}
|
|
28
|
+
_ALERT_RE = re.compile(
|
|
29
|
+
r"\b(?:error|errors|warn|warning|warnings|fail|failed|failure|fatal|critical|exception|traceback)\b",
|
|
30
|
+
re.IGNORECASE,
|
|
31
|
+
)
|
|
32
|
+
_PATH_RE = re.compile(
|
|
33
|
+
r"(^|[\s|])(?:\.{0,2}/|/|~/|[A-Za-z]:[\\/]|[A-Za-z0-9_.@+-]+/[A-Za-z0-9_.@+/\-]+)"
|
|
34
|
+
)
|
|
35
|
+
_UUID_RE = re.compile(
|
|
36
|
+
r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b"
|
|
37
|
+
)
|
|
38
|
+
_LONG_HEX_RE = re.compile(r"\b[0-9a-fA-F]{12,}\b")
|
|
39
|
+
_ISSUE_ID_RE = re.compile(r"\b[A-Z]{2,}-\d{2,}\b")
|
|
40
|
+
_ID_VALUE_RE = re.compile(r"^[A-Za-z0-9_.-]{8,}$")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def summarize_json_array(value: Any, final_newline: bool) -> str | None:
|
|
44
|
+
"""Summarize a top-level JSON array of object records."""
|
|
45
|
+
|
|
46
|
+
if not isinstance(value, list) or len(value) < _MIN_SUMMARY_ROWS:
|
|
47
|
+
return None
|
|
48
|
+
if not all(isinstance(row, dict) for row in value):
|
|
49
|
+
return None
|
|
50
|
+
return _summarize_records(
|
|
51
|
+
header_prefix="json array",
|
|
52
|
+
count_label="rows",
|
|
53
|
+
omitted_label="JSON rows",
|
|
54
|
+
records=value,
|
|
55
|
+
final_newline=final_newline,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def summarize_jsonl_records(
|
|
60
|
+
records: tuple[dict[str, Any], ...],
|
|
61
|
+
final_newline: bool,
|
|
62
|
+
) -> str | None:
|
|
63
|
+
"""Summarize JSONL object records."""
|
|
64
|
+
|
|
65
|
+
if len(records) < _MIN_SUMMARY_ROWS:
|
|
66
|
+
return None
|
|
67
|
+
return _summarize_records(
|
|
68
|
+
header_prefix="jsonl",
|
|
69
|
+
count_label="records",
|
|
70
|
+
omitted_label="JSONL records",
|
|
71
|
+
records=list(records),
|
|
72
|
+
final_newline=final_newline,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _summarize_records(
|
|
77
|
+
*,
|
|
78
|
+
header_prefix: str,
|
|
79
|
+
count_label: str,
|
|
80
|
+
omitted_label: str,
|
|
81
|
+
records: list[dict[str, Any]],
|
|
82
|
+
final_newline: bool,
|
|
83
|
+
) -> str:
|
|
84
|
+
keys = _schema_keys(records)
|
|
85
|
+
selected_indices = _select_record_indices(records)
|
|
86
|
+
if header_prefix == "jsonl":
|
|
87
|
+
header = f"{header_prefix} {count_label}={len(records)} keys={','.join(keys)}"
|
|
88
|
+
else:
|
|
89
|
+
header = f"{header_prefix}: {count_label}={len(records)} keys={','.join(keys)}"
|
|
90
|
+
lines = [header]
|
|
91
|
+
cursor = 0
|
|
92
|
+
|
|
93
|
+
for index in selected_indices:
|
|
94
|
+
omitted = index - cursor
|
|
95
|
+
if omitted > 0:
|
|
96
|
+
lines.append(f"… {omitted} {omitted_label} omitted")
|
|
97
|
+
lines.append(_format_record(records[index]))
|
|
98
|
+
cursor = index + 1
|
|
99
|
+
|
|
100
|
+
if cursor < len(records):
|
|
101
|
+
lines.append(f"… {len(records) - cursor} {omitted_label} omitted")
|
|
102
|
+
|
|
103
|
+
return join_preserving_final_newline(lines, final_newline)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _schema_keys(records: list[dict[str, Any]]) -> list[str]:
|
|
107
|
+
keys: list[str] = []
|
|
108
|
+
seen: set[str] = set()
|
|
109
|
+
for record in records:
|
|
110
|
+
for key in record:
|
|
111
|
+
if key not in seen:
|
|
112
|
+
seen.add(key)
|
|
113
|
+
keys.append(key)
|
|
114
|
+
return keys
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _select_record_indices(records: list[dict[str, Any]]) -> list[int]:
|
|
118
|
+
total = len(records)
|
|
119
|
+
selected = set(range(min(_HEAD_ROWS, total)))
|
|
120
|
+
selected.update(range(max(_HEAD_ROWS, total - _TAIL_ROWS), total))
|
|
121
|
+
baseline_keys = set(records[0]) if records else set()
|
|
122
|
+
selected.update(
|
|
123
|
+
index
|
|
124
|
+
for index, record in enumerate(records)
|
|
125
|
+
if _is_salient_record(record, baseline_keys)
|
|
126
|
+
)
|
|
127
|
+
return sorted(selected)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _is_salient_record(record: dict[str, Any], baseline_keys: set[str]) -> bool:
|
|
131
|
+
return (
|
|
132
|
+
set(record) != baseline_keys
|
|
133
|
+
or _has_non_ok_status(record)
|
|
134
|
+
or _has_named_id_field(record)
|
|
135
|
+
or any(_is_interesting_text(value) for value in _iter_string_values(record))
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _format_record(record: dict[str, Any]) -> str:
|
|
140
|
+
return json.dumps(record, ensure_ascii=False, separators=(",", ":"))
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _iter_string_values(value: Any) -> list[str]:
|
|
144
|
+
if isinstance(value, str):
|
|
145
|
+
return [value]
|
|
146
|
+
if isinstance(value, dict):
|
|
147
|
+
values: list[str] = []
|
|
148
|
+
for child in value.values():
|
|
149
|
+
values.extend(_iter_string_values(child))
|
|
150
|
+
return values
|
|
151
|
+
if isinstance(value, list):
|
|
152
|
+
values = []
|
|
153
|
+
for child in value:
|
|
154
|
+
values.extend(_iter_string_values(child))
|
|
155
|
+
return values
|
|
156
|
+
return []
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _is_interesting_text(value: str) -> bool:
|
|
160
|
+
return (
|
|
161
|
+
bool(_ALERT_RE.search(value))
|
|
162
|
+
or "://" in value
|
|
163
|
+
or bool(_PATH_RE.search(value))
|
|
164
|
+
or bool(_UUID_RE.search(value))
|
|
165
|
+
or bool(_LONG_HEX_RE.search(value))
|
|
166
|
+
or bool(_ISSUE_ID_RE.search(value))
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _has_named_id_field(value: Any) -> bool:
|
|
171
|
+
if isinstance(value, dict):
|
|
172
|
+
for key, child in value.items():
|
|
173
|
+
if _is_named_id_key(key) and _is_named_id_value(child):
|
|
174
|
+
return True
|
|
175
|
+
if _has_named_id_field(child):
|
|
176
|
+
return True
|
|
177
|
+
elif isinstance(value, list):
|
|
178
|
+
return any(_has_named_id_field(child) for child in value)
|
|
179
|
+
return False
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _is_named_id_key(key: str) -> bool:
|
|
183
|
+
lower = key.lower()
|
|
184
|
+
return (
|
|
185
|
+
lower == "id"
|
|
186
|
+
or lower.endswith("_id")
|
|
187
|
+
or lower.endswith("-id")
|
|
188
|
+
or "uuid" in lower
|
|
189
|
+
or "hash" in lower
|
|
190
|
+
or lower in {"sha", "sha1", "sha256", "sha512"}
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _is_named_id_value(value: Any) -> bool:
|
|
195
|
+
if not isinstance(value, str):
|
|
196
|
+
return False
|
|
197
|
+
stripped = value.strip()
|
|
198
|
+
return (
|
|
199
|
+
len(stripped) >= 8
|
|
200
|
+
and not stripped.isdigit()
|
|
201
|
+
and bool(_ID_VALUE_RE.fullmatch(stripped))
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _has_non_ok_status(value: Any) -> bool:
|
|
206
|
+
if isinstance(value, dict):
|
|
207
|
+
for key, child in value.items():
|
|
208
|
+
lower = key.lower()
|
|
209
|
+
if lower in _STATUS_KEYS and not _is_ok_status_value(lower, child):
|
|
210
|
+
return True
|
|
211
|
+
if _has_non_ok_status(child):
|
|
212
|
+
return True
|
|
213
|
+
elif isinstance(value, list):
|
|
214
|
+
return any(_has_non_ok_status(child) for child in value)
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def _is_ok_status_value(key: str, value: Any) -> bool:
|
|
219
|
+
if isinstance(value, bool):
|
|
220
|
+
return value
|
|
221
|
+
if isinstance(value, int | float) and not isinstance(value, bool):
|
|
222
|
+
if key == "status":
|
|
223
|
+
return value == 0 or 200 <= value < 300
|
|
224
|
+
return value == 0
|
|
225
|
+
if isinstance(value, str):
|
|
226
|
+
normalized = value.strip().lower()
|
|
227
|
+
if normalized in _OK_WORDS:
|
|
228
|
+
return True
|
|
229
|
+
if key == "status" and normalized.isdigit():
|
|
230
|
+
status = int(normalized)
|
|
231
|
+
return 200 <= status < 300
|
|
232
|
+
return False
|
|
233
|
+
return False
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"""Conservative parsers for file/tree/listing output."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class ListingEntry:
|
|
11
|
+
"""A path entry selected from listing output."""
|
|
12
|
+
|
|
13
|
+
path: str
|
|
14
|
+
suffix: str = ""
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True)
|
|
18
|
+
class ListingParse:
|
|
19
|
+
"""Parsed listing output ready for tree rendering."""
|
|
20
|
+
|
|
21
|
+
entries: tuple[ListingEntry, ...]
|
|
22
|
+
summary_lines: tuple[str, ...] = ()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
_MIN_PATH_LIST_LINES = 4
|
|
26
|
+
_TREE_SUMMARY_RE = re.compile(
|
|
27
|
+
r"^(?:\d+ director(?:y|ies)(?:, \d+ files?)?|\d+ files?)$"
|
|
28
|
+
)
|
|
29
|
+
_RG_STYLE_RE = re.compile(r"^(.+?):([1-9]\d*)(?::([1-9]\d*))?:(.*)$")
|
|
30
|
+
_PERMISSION_ROW_RE = re.compile(r"^[bcdlps-][rwxStTs-]{9}\s+")
|
|
31
|
+
_FILE_EXTENSION_RE = re.compile(r"(^|/)[A-Za-z0-9_.@+-]+\.[A-Za-z0-9][A-Za-z0-9_.+-]*/?$")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def parse_listing_lines(lines: list[str]) -> ListingParse | None:
|
|
35
|
+
"""Return a conservative listing parse, or ``None``."""
|
|
36
|
+
|
|
37
|
+
for parser in (_parse_path_list, _parse_tree_output, _parse_ls_r_output):
|
|
38
|
+
parsed = parser(lines)
|
|
39
|
+
if parsed is not None and parsed.entries:
|
|
40
|
+
return parsed
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _parse_path_list(lines: list[str]) -> ListingParse | None:
|
|
45
|
+
if len(lines) < _MIN_PATH_LIST_LINES or any(not line for line in lines):
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
entries: list[ListingEntry] = []
|
|
49
|
+
for line in lines:
|
|
50
|
+
if not _is_safe_path_line(line):
|
|
51
|
+
return None
|
|
52
|
+
entries.append(_entry_from_path(line))
|
|
53
|
+
|
|
54
|
+
return ListingParse(tuple(entries))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _parse_tree_output(lines: list[str]) -> ListingParse | None:
|
|
58
|
+
non_blank = [line for line in lines if line.strip()]
|
|
59
|
+
if len(non_blank) < 3:
|
|
60
|
+
return None
|
|
61
|
+
|
|
62
|
+
root_prefix: str | None = None
|
|
63
|
+
branch_count = 0
|
|
64
|
+
entries: list[ListingEntry] = []
|
|
65
|
+
summary_lines: list[str] = []
|
|
66
|
+
stack: list[str] = []
|
|
67
|
+
|
|
68
|
+
for raw_line in lines:
|
|
69
|
+
stripped = raw_line.strip()
|
|
70
|
+
if not stripped:
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
if _is_tree_summary_line(stripped):
|
|
74
|
+
summary_lines.append(stripped)
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
branch = _parse_tree_branch_line(raw_line)
|
|
78
|
+
if branch is None:
|
|
79
|
+
if branch_count == 0 and root_prefix is None and _is_safe_tree_root(stripped):
|
|
80
|
+
root_prefix = None if stripped in {".", "./"} else stripped
|
|
81
|
+
continue
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
depth, label = branch
|
|
85
|
+
if depth > len(stack) or not _is_safe_tree_label(label):
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
branch_count += 1
|
|
89
|
+
clean_label, suffix = _split_directory_suffix(label)
|
|
90
|
+
stack = stack[:depth]
|
|
91
|
+
path_parts = []
|
|
92
|
+
if root_prefix is not None:
|
|
93
|
+
path_parts.append(root_prefix)
|
|
94
|
+
path_parts.extend(stack)
|
|
95
|
+
path_parts.append(clean_label)
|
|
96
|
+
entries.append(ListingEntry("/".join(path_parts), suffix))
|
|
97
|
+
stack.append(clean_label)
|
|
98
|
+
|
|
99
|
+
if branch_count < 3:
|
|
100
|
+
return None
|
|
101
|
+
return ListingParse(tuple(_dedupe_entries(entries)), tuple(summary_lines))
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _parse_ls_r_output(lines: list[str]) -> ListingParse | None:
|
|
105
|
+
if len([line for line in lines if line.strip()]) < 4:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
current_dir: str | None = None
|
|
109
|
+
headers: list[str] = []
|
|
110
|
+
child_names: list[str] = []
|
|
111
|
+
entries: list[ListingEntry] = []
|
|
112
|
+
|
|
113
|
+
for raw_line in lines:
|
|
114
|
+
line = raw_line.strip()
|
|
115
|
+
if not line:
|
|
116
|
+
continue
|
|
117
|
+
if raw_line != line:
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
if line.endswith(":"):
|
|
121
|
+
header = line[:-1]
|
|
122
|
+
if not _is_safe_ls_header(header):
|
|
123
|
+
return None
|
|
124
|
+
current_dir = header
|
|
125
|
+
headers.append(header)
|
|
126
|
+
if header not in {".", "./"}:
|
|
127
|
+
entries.append(ListingEntry(header, "/"))
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
if current_dir is None or not _is_safe_ls_child(line):
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
child_names.append(line)
|
|
134
|
+
child, suffix = _split_directory_suffix(line)
|
|
135
|
+
entries.append(ListingEntry(_join_listing_path(current_dir, child), suffix))
|
|
136
|
+
|
|
137
|
+
if len(headers) < 2 or len(child_names) < 2:
|
|
138
|
+
return None
|
|
139
|
+
if not _has_strong_ls_r_signal(headers, child_names):
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
return ListingParse(tuple(_dedupe_entries(entries)))
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _is_safe_path_line(line: str) -> bool:
|
|
146
|
+
if not line or line.strip() != line:
|
|
147
|
+
return False
|
|
148
|
+
if line in {".", "./"}:
|
|
149
|
+
return True
|
|
150
|
+
if line in {"..", "../"}:
|
|
151
|
+
return False
|
|
152
|
+
if len(line) > 300:
|
|
153
|
+
return False
|
|
154
|
+
if any(char in line for char in "\t|"):
|
|
155
|
+
return False
|
|
156
|
+
if "://" in line or " " in line:
|
|
157
|
+
return False
|
|
158
|
+
if _PERMISSION_ROW_RE.match(line) or _is_rg_style_line(line):
|
|
159
|
+
return False
|
|
160
|
+
if ":" in line and not re.match(r"^[A-Za-z]:[\\/]", line):
|
|
161
|
+
return False
|
|
162
|
+
if line.startswith(("Traceback", "File ", "at ", "├", "└", "|--", "`--")):
|
|
163
|
+
return False
|
|
164
|
+
return _looks_like_path(line)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _looks_like_path(value: str) -> bool:
|
|
168
|
+
if value in {".", "./"}:
|
|
169
|
+
return True
|
|
170
|
+
if value.startswith(("./", "../", "/", "~/", "~\\")):
|
|
171
|
+
return True
|
|
172
|
+
if "/" in value or "\\" in value:
|
|
173
|
+
return True
|
|
174
|
+
return _FILE_EXTENSION_RE.search(value) is not None
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def _is_rg_style_line(line: str) -> bool:
|
|
178
|
+
match = _RG_STYLE_RE.match(line)
|
|
179
|
+
return match is not None and _looks_like_path(match.group(1))
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _entry_from_path(path: str) -> ListingEntry:
|
|
183
|
+
clean, suffix = _split_directory_suffix(path)
|
|
184
|
+
return ListingEntry(clean, suffix)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _split_directory_suffix(path: str) -> tuple[str, str]:
|
|
188
|
+
if path not in {"/", "\\"} and path.endswith(("/", "\\")):
|
|
189
|
+
return path.rstrip("/\\"), "/"
|
|
190
|
+
return path, ""
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _is_tree_summary_line(line: str) -> bool:
|
|
194
|
+
return _TREE_SUMMARY_RE.match(line) is not None
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _parse_tree_branch_line(line: str) -> tuple[int, str] | None:
|
|
198
|
+
for marker in ("├── ", "└── ", "|-- ", "`-- "):
|
|
199
|
+
index = line.find(marker)
|
|
200
|
+
if index < 0:
|
|
201
|
+
continue
|
|
202
|
+
prefix = line[:index]
|
|
203
|
+
if index % 4 != 0 or not _is_tree_prefix(prefix):
|
|
204
|
+
return None
|
|
205
|
+
label = line[index + len(marker) :].strip()
|
|
206
|
+
if not label:
|
|
207
|
+
return None
|
|
208
|
+
return index // 4, label
|
|
209
|
+
return None
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _is_tree_prefix(prefix: str) -> bool:
|
|
213
|
+
return all(
|
|
214
|
+
chunk in {"│ ", "| ", " "}
|
|
215
|
+
for chunk in (prefix[index : index + 4] for index in range(0, len(prefix), 4))
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _is_safe_tree_root(value: str) -> bool:
|
|
220
|
+
if value in {".", "./"}:
|
|
221
|
+
return True
|
|
222
|
+
if any(char in value for char in "\t|:"):
|
|
223
|
+
return False
|
|
224
|
+
if "://" in value or " " in value:
|
|
225
|
+
return False
|
|
226
|
+
return bool(re.match(r"^[A-Za-z0-9_.@+~/-]+$", value))
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _is_safe_tree_label(label: str) -> bool:
|
|
230
|
+
if not label or label in {".", ".."}:
|
|
231
|
+
return False
|
|
232
|
+
if label.startswith("[") or " -> " in label:
|
|
233
|
+
return False
|
|
234
|
+
if any(char in label for char in "\t:"):
|
|
235
|
+
return False
|
|
236
|
+
if _PERMISSION_ROW_RE.match(label):
|
|
237
|
+
return False
|
|
238
|
+
return True
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _is_safe_ls_header(header: str) -> bool:
|
|
242
|
+
if not header or header in {"..", "../"}:
|
|
243
|
+
return False
|
|
244
|
+
if header.strip() != header:
|
|
245
|
+
return False
|
|
246
|
+
if any(char in header for char in "\t|:"):
|
|
247
|
+
return False
|
|
248
|
+
if "://" in header or " " in header:
|
|
249
|
+
return False
|
|
250
|
+
return bool(re.match(r"^[A-Za-z0-9_.@+~/-]+$", header))
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _is_safe_ls_child(child: str) -> bool:
|
|
254
|
+
if child in {".", ".."} or child.startswith("total "):
|
|
255
|
+
return False
|
|
256
|
+
if child.strip() != child:
|
|
257
|
+
return False
|
|
258
|
+
if _PERMISSION_ROW_RE.match(child):
|
|
259
|
+
return False
|
|
260
|
+
clean_child = child[:-1] if child.endswith("/") else child
|
|
261
|
+
if any(char in clean_child for char in "\t|:/\\"):
|
|
262
|
+
return False
|
|
263
|
+
if "://" in clean_child or " " in clean_child:
|
|
264
|
+
return False
|
|
265
|
+
return bool(re.match(r"^[A-Za-z0-9_.@+-]+$", clean_child))
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def _has_strong_ls_r_signal(headers: list[str], child_names: list[str]) -> bool:
|
|
269
|
+
return any(
|
|
270
|
+
header in {".", "./"}
|
|
271
|
+
or header.startswith(("./", "../", "/", "~/"))
|
|
272
|
+
or "/" in header
|
|
273
|
+
or "\\" in header
|
|
274
|
+
or _looks_like_path(header)
|
|
275
|
+
for header in headers
|
|
276
|
+
) or any(_looks_like_path(child) for child in child_names)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _join_listing_path(parent: str, child: str) -> str:
|
|
280
|
+
if parent in {".", "./"}:
|
|
281
|
+
return child
|
|
282
|
+
return f"{parent.rstrip('/')}/{child}"
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _dedupe_entries(entries: list[ListingEntry]) -> list[ListingEntry]:
|
|
286
|
+
seen: set[tuple[str, str]] = set()
|
|
287
|
+
output: list[ListingEntry] = []
|
|
288
|
+
for entry in entries:
|
|
289
|
+
key = (entry.path, entry.suffix)
|
|
290
|
+
if key in seen:
|
|
291
|
+
continue
|
|
292
|
+
seen.add(key)
|
|
293
|
+
output.append(entry)
|
|
294
|
+
return output
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Reduce safe file/tree/listing output to compact path trees."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from ..text import join_preserving_final_newline, score, split_preserving_final_newline
|
|
6
|
+
from ..tree import TreeRow, format_tree
|
|
7
|
+
from .detector import parse_listing_lines
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def compress_listing_output(text: str) -> str:
|
|
11
|
+
"""Compress safe file listing output, otherwise return ``text`` unchanged."""
|
|
12
|
+
|
|
13
|
+
lines, final_newline = split_preserving_final_newline(text)
|
|
14
|
+
parsed = parse_listing_lines(lines)
|
|
15
|
+
if parsed is None:
|
|
16
|
+
return text
|
|
17
|
+
|
|
18
|
+
rows = [TreeRow(entry.path, suffix=entry.suffix) for entry in parsed.entries]
|
|
19
|
+
tree_text = format_tree(rows)
|
|
20
|
+
if not tree_text:
|
|
21
|
+
return text
|
|
22
|
+
|
|
23
|
+
candidate_lines = [*parsed.summary_lines, *tree_text.splitlines()]
|
|
24
|
+
if len(candidate_lines) < 2:
|
|
25
|
+
return text
|
|
26
|
+
|
|
27
|
+
candidate = join_preserving_final_newline(candidate_lines, final_newline)
|
|
28
|
+
if score(candidate) < score(text):
|
|
29
|
+
return candidate
|
|
30
|
+
return text
|