leancontext 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,152 @@
1
+ """Protocol-aware message reduction — the gateway/wire surface.
2
+
3
+ This is how LeanContext plugs into gateways (LiteLLM), SDK client wrappers, and proxies
4
+ *without* the structure-blindness that hurts wire-level compressors: the chat
5
+ protocols already tag tool outputs (OpenAI ``role="tool"``; Anthropic
6
+ ``tool_result`` blocks), so we can find and reduce exactly those — and nothing
7
+ else. We never touch system/user/assistant instruction text. Fail-open throughout.
8
+
9
+ Cache-safety: reductions are deterministic and content-addressed, so the same tool
10
+ output always serialises to the same bytes → the provider prompt-cache keeps hitting.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import Any
16
+
17
+ from .core import reduce_text
18
+
19
+
20
+ def detect_format(messages: list) -> str:
21
+ """Best-effort detection of the message protocol."""
22
+ for m in messages:
23
+ if not isinstance(m, dict):
24
+ continue
25
+ if isinstance(m.get("parts"), list):
26
+ return "gemini"
27
+ if m.get("role") in ("tool", "function"):
28
+ return "openai"
29
+ content = m.get("content")
30
+ if isinstance(content, list):
31
+ for block in content:
32
+ if isinstance(block, dict) and block.get("type") == "tool_result":
33
+ return "anthropic"
34
+ return "openai"
35
+
36
+
37
+ def _reduce_str(text: Any, opts: dict) -> Any:
38
+ if not isinstance(text, str):
39
+ return text
40
+ return reduce_text(text, **opts).text
41
+
42
+
43
+ # --- OpenAI / chat-completions format ----------------------------------------
44
+
45
+ def _reduce_openai_message(m: Any, opts: dict) -> Any:
46
+ if not isinstance(m, dict) or m.get("role") not in ("tool", "function"):
47
+ return m
48
+ content = m.get("content")
49
+ if isinstance(content, str):
50
+ nm = dict(m)
51
+ nm["content"] = _reduce_str(content, opts)
52
+ return nm
53
+ if isinstance(content, list):
54
+ nm = dict(m)
55
+ nm["content"] = [_reduce_openai_part(p, opts) for p in content]
56
+ return nm
57
+ return m
58
+
59
+
60
+ def _reduce_openai_part(part: Any, opts: dict) -> Any:
61
+ if (
62
+ isinstance(part, dict)
63
+ and part.get("type") in ("text", "output_text")
64
+ and isinstance(part.get("text"), str)
65
+ ):
66
+ np = dict(part)
67
+ np["text"] = _reduce_str(part["text"], opts)
68
+ return np
69
+ return part
70
+
71
+
72
+ # --- Anthropic messages format -----------------------------------------------
73
+
74
+ def _reduce_anthropic_message(m: Any, opts: dict) -> Any:
75
+ if not isinstance(m, dict):
76
+ return m
77
+ content = m.get("content")
78
+ if not isinstance(content, list):
79
+ return m
80
+ new_blocks, changed = [], False
81
+ for block in content:
82
+ if isinstance(block, dict) and block.get("type") == "tool_result":
83
+ bc = block.get("content")
84
+ if isinstance(bc, str):
85
+ nb = dict(block)
86
+ nb["content"] = _reduce_str(bc, opts)
87
+ new_blocks.append(nb)
88
+ changed = True
89
+ continue
90
+ if isinstance(bc, list):
91
+ nb = dict(block)
92
+ nb["content"] = [_reduce_anthropic_textblock(x, opts) for x in bc]
93
+ new_blocks.append(nb)
94
+ changed = True
95
+ continue
96
+ new_blocks.append(block)
97
+ if not changed:
98
+ return m
99
+ nm = dict(m)
100
+ nm["content"] = new_blocks
101
+ return nm
102
+
103
+
104
+ def _reduce_anthropic_textblock(x: Any, opts: dict) -> Any:
105
+ if isinstance(x, dict) and x.get("type") == "text" and isinstance(x.get("text"), str):
106
+ nx = dict(x)
107
+ nx["text"] = _reduce_str(x["text"], opts)
108
+ return nx
109
+ return x
110
+
111
+
112
+ # --- Gemini format -----------------------------------------------------------
113
+ # Gemini uses `contents` -> `parts`, where a tool result is a `functionResponse`
114
+ # part whose `response` is a dict. We reduce the large string values inside that
115
+ # dict, keeping the dict shape Gemini requires. Typed SDK objects (non-dict)
116
+ # pass through untouched.
117
+
118
+ def _reduce_gemini_message(content: Any, opts: dict) -> Any:
119
+ if not isinstance(content, dict) or not isinstance(content.get("parts"), list):
120
+ return content
121
+ new_parts, changed = [], False
122
+ for part in content["parts"]:
123
+ fr = part.get("functionResponse") if isinstance(part, dict) else None
124
+ resp = fr.get("response") if isinstance(fr, dict) else None
125
+ if isinstance(fr, dict) and isinstance(resp, dict):
126
+ reduced = {k: (_reduce_str(v, opts) if isinstance(v, str) else v) for k, v in resp.items()}
127
+ new_parts.append({**part, "functionResponse": {**fr, "response": reduced}})
128
+ changed = True
129
+ else:
130
+ new_parts.append(part)
131
+ if not changed:
132
+ return content
133
+ return {**content, "parts": new_parts}
134
+
135
+
136
+ # --- public ------------------------------------------------------------------
137
+
138
+ def reduce_messages(messages: Any, *, fmt: str = "auto", **opts) -> Any:
139
+ """Return a new message list with tool outputs reduced. Input is not mutated.
140
+
141
+ Handles OpenAI (`role:"tool"`), Anthropic (`tool_result` blocks), and Gemini
142
+ (`functionResponse` parts). Only tool-result content is touched; instructions
143
+ are never altered. Anything unrecognised passes through unchanged (fail open).
144
+ """
145
+ if not isinstance(messages, list):
146
+ return messages
147
+ resolved = detect_format(messages) if fmt == "auto" else fmt
148
+ if resolved == "anthropic":
149
+ return [_reduce_anthropic_message(m, opts) for m in messages]
150
+ if resolved == "gemini":
151
+ return [_reduce_gemini_message(m, opts) for m in messages]
152
+ return [_reduce_openai_message(m, opts) for m in messages]
leancontext/paging.py ADDED
@@ -0,0 +1,104 @@
1
+ """Paging: drop aged tool outputs from the wire, keep them retrievable.
2
+
3
+ Reducing shrinks each payload; paging goes further by removing old payloads from
4
+ context once the agent has moved on. The output is replaced with a small reference
5
+ (a few tens of tokens) and the original is stored, so the agent can fetch it back
6
+ with the expand tool when it needs the detail again.
7
+
8
+ Refs are content hashes, so they're deterministic. The store is in-memory by
9
+ default, or disk-backed (set ``root``) for retrieval across processes.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import re
16
+
17
+ from .tokens import content_ref, count_tokens
18
+
19
+ REF_SCHEME = "lc"
20
+ _REF_RE = re.compile(r"lc://([0-9a-f]{6,40})")
21
+
22
+
23
+ class ContentStore:
24
+ """Maps a content hash → original content. In-memory, or disk-backed if ``root`` set."""
25
+
26
+ def __init__(self, root: str | None = None):
27
+ self.root = root
28
+ self._mem: dict[str, str] = {}
29
+ if self.root:
30
+ os.makedirs(self.root, exist_ok=True)
31
+
32
+ def _path(self, ref: str) -> str:
33
+ return os.path.join(self.root, f"{ref}.txt") # type: ignore[arg-type]
34
+
35
+ def put(self, content: str) -> str:
36
+ ref = content_ref(content)
37
+ if self.root:
38
+ with open(self._path(ref), "w", encoding="utf-8") as fh:
39
+ fh.write(content)
40
+ else:
41
+ self._mem[ref] = content
42
+ return ref
43
+
44
+ def get(self, ref: str) -> str | None:
45
+ if self.root:
46
+ try:
47
+ with open(self._path(ref), encoding="utf-8") as fh:
48
+ return fh.read()
49
+ except OSError:
50
+ return None
51
+ return self._mem.get(ref)
52
+
53
+
54
+ _DEFAULT_STORE = ContentStore()
55
+
56
+
57
+ def _normalize(ref: str) -> str:
58
+ m = _REF_RE.search(ref)
59
+ return m.group(1) if m else ref.strip()
60
+
61
+
62
+ def store(content: str, using: ContentStore | None = None) -> str:
63
+ """Stash content and return its ref id."""
64
+ return (using or _DEFAULT_STORE).put(content)
65
+
66
+
67
+ def expand(ref: str, using: ContentStore | None = None) -> str | None:
68
+ """Return the original content for a ref (accepts 'lc://<id>' or a bare id)."""
69
+ return (using or _DEFAULT_STORE).get(_normalize(ref))
70
+
71
+
72
+ def reference_line(content: str, summary: str | None = None,
73
+ using: ContentStore | None = None) -> str:
74
+ """Stash content and return a compact, expandable reference line."""
75
+ ref = store(content, using=using)
76
+ tokens = count_tokens(content)
77
+ tail = f" — {summary}" if summary else ""
78
+ return f"[{REF_SCHEME}://{ref} · {tokens} tokens · call leancontext_expand to view{tail}]"
79
+
80
+
81
+ def page(content: str, *, summary: str | None = None,
82
+ using: ContentStore | None = None) -> str:
83
+ """Collapse aged content to an expandable reference (O(1) on the wire)."""
84
+ return reference_line(content, summary=summary, using=using)
85
+
86
+
87
+ #: Tool spec to expose ``expand`` to an agent (OpenAI/Anthropic/MCP-compatible shape).
88
+ EXPAND_TOOL_SPEC = {
89
+ "name": "leancontext_expand",
90
+ "description": (
91
+ "Retrieve the full original content for a LeanContext reference id "
92
+ "(format: lc://<id>) that was collapsed to save tokens."
93
+ ),
94
+ "input_schema": {
95
+ "type": "object",
96
+ "properties": {
97
+ "ref": {
98
+ "type": "string",
99
+ "description": "The reference id, e.g. 'lc://a1b2c3d4' or the bare id.",
100
+ },
101
+ },
102
+ "required": ["ref"],
103
+ },
104
+ }
leancontext/py.typed ADDED
File without changes
@@ -0,0 +1,36 @@
1
+ """Typed reducers.
2
+
3
+ Each reducer module exposes a ``REDUCER`` (kind, detector, reduce function,
4
+ priority). ``REGISTRY`` is the ordered list the core iterates for detection and
5
+ dispatch, so adding a reducer means adding one module and listing it here.
6
+ """
7
+
8
+ from .base import Reducer
9
+ from .diff import REDUCER as _diff
10
+ from .diff import reduce_diff
11
+ from .html import REDUCER as _html
12
+ from .html import reduce_html
13
+ from .json_data import REDUCER as _json
14
+ from .json_data import reduce_json
15
+ from .logs import REDUCER as _logs
16
+ from .logs import reduce_logs
17
+ from .stacktrace import REDUCER as _stacktrace
18
+ from .stacktrace import reduce_stacktrace
19
+ from .table import REDUCER as _table
20
+ from .table import reduce_table
21
+
22
+ # Detection runs in priority order (lowest first): json, stacktrace, diff, html, log.
23
+ REGISTRY: list[Reducer] = sorted(
24
+ [_json, _stacktrace, _diff, _html, _logs, _table], key=lambda r: r.priority
25
+ )
26
+
27
+ __all__ = [
28
+ "Reducer",
29
+ "REGISTRY",
30
+ "reduce_logs",
31
+ "reduce_json",
32
+ "reduce_diff",
33
+ "reduce_stacktrace",
34
+ "reduce_html",
35
+ "reduce_table",
36
+ ]
@@ -0,0 +1,19 @@
1
+ """The shape every reducer registers with.
2
+
3
+ A reducer bundles three things: the kind name, a detector that says whether a
4
+ payload is this kind, and the reduce function. Detection priority is explicit
5
+ (lower runs first), so the order is clear and stable.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Callable
11
+ from dataclasses import dataclass
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class Reducer:
16
+ kind: str
17
+ detect: Callable[[str], bool]
18
+ reduce: Callable[[str], tuple[str, list[str]]]
19
+ priority: int
@@ -0,0 +1,54 @@
1
+ """Diff reducer.
2
+
3
+ Keeps every change line (``+``/``-``), hunk header (``@@``) and file header verbatim
4
+ — those are the signal. Collapses long runs of unchanged context lines to the first
5
+ and last line plus a count. Deterministic; value-preserving for all changes.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+
12
+ from .base import Reducer
13
+
14
+ _DIFF_HUNK = re.compile(r"(?m)^@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@")
15
+
16
+ _KEEP_PREFIXES = (
17
+ "+", "-", "@@", "diff ", "index ", "new file", "deleted file",
18
+ "rename ", "similarity ", "copy ", "old mode", "new mode",
19
+ )
20
+
21
+
22
+ def reduce_diff(text: str) -> tuple[str, list[str]]:
23
+ lines = text.splitlines()
24
+ out: list[str] = []
25
+ ctx: list[str] = []
26
+
27
+ def flush() -> None:
28
+ if not ctx:
29
+ return
30
+ if len(ctx) <= 3:
31
+ out.extend(ctx)
32
+ else:
33
+ out.append(ctx[0])
34
+ out.append(f" ⟪… {len(ctx) - 2} unchanged lines⟫")
35
+ out.append(ctx[-1])
36
+ ctx.clear()
37
+
38
+ for line in lines:
39
+ if line.startswith(_KEEP_PREFIXES):
40
+ flush()
41
+ out.append(line)
42
+ else:
43
+ ctx.append(line)
44
+ flush()
45
+
46
+ notes = [f"kept all change/header lines; collapsed unchanged context ({len(lines)}→{len(out)} lines)"]
47
+ return "\n".join(out), notes
48
+
49
+
50
+ def _detect(text: str) -> bool:
51
+ return text.lstrip().startswith("diff --git") or bool(_DIFF_HUNK.search(text))
52
+
53
+
54
+ REDUCER = Reducer("diff", _detect, reduce_diff, priority=30)
@@ -0,0 +1,64 @@
1
+ """HTML reducer — for web-fetch / scraped tool outputs.
2
+
3
+ Strips tags, scripts, styles and collapses whitespace, keeping the visible text and
4
+ the links (URLs are signal, so they're preserved). Stdlib only, deterministic.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from html.parser import HTMLParser
11
+
12
+ from .base import Reducer
13
+
14
+ _SKIP = {"script", "style", "noscript", "svg", "head", "template"}
15
+
16
+
17
+ class _Extract(HTMLParser):
18
+ def __init__(self) -> None:
19
+ super().__init__(convert_charrefs=True)
20
+ self.parts: list[str] = []
21
+ self.links: list[str] = []
22
+ self._skip = 0
23
+
24
+ def handle_starttag(self, tag, attrs):
25
+ if tag in _SKIP:
26
+ self._skip += 1
27
+ if tag == "a":
28
+ for key, val in attrs:
29
+ if key == "href" and val:
30
+ self.links.append(val)
31
+
32
+ def handle_endtag(self, tag):
33
+ if tag in _SKIP and self._skip > 0:
34
+ self._skip -= 1
35
+
36
+ def handle_data(self, data):
37
+ if self._skip == 0:
38
+ text = data.strip()
39
+ if text:
40
+ self.parts.append(text)
41
+
42
+
43
+ def reduce_html(text: str) -> tuple[str, list[str]]:
44
+ parser = _Extract()
45
+ parser.feed(text)
46
+ body = re.sub(r"[ \t]+", " ", "\n".join(parser.parts)).strip()
47
+ links = list(dict.fromkeys(parser.links))
48
+
49
+ out = body
50
+ if links:
51
+ out += "\n\nLinks: " + " ".join(links)
52
+ notes = [f"stripped HTML tags/scripts/styles; kept visible text + {len(links)} links"]
53
+ return out, notes
54
+
55
+
56
+ def _detect(text: str) -> bool:
57
+ stripped = text.lstrip()
58
+ head = stripped[:512].lower()
59
+ if "<!doctype html" in head or "<html" in head:
60
+ return True
61
+ return stripped.startswith("<") and text.lower().count("</") >= 5
62
+
63
+
64
+ REDUCER = Reducer("html", _detect, reduce_html, priority=40)
@@ -0,0 +1,61 @@
1
+ """JSON / RAG reducer.
2
+
3
+ The dominant waste in JSON tool output is *repeated keys*: a list of 200 records
4
+ re-states every field name 200 times. We factor the schema out once and emit the
5
+ values columnar. All values are preserved, so this is near-lossless.
6
+
7
+ Falls back to whitespace-stripped (minified) JSON when the payload isn't a record
8
+ list — still a real saving on pretty-printed output, with zero information loss.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ from typing import Any
15
+
16
+ from .base import Reducer
17
+
18
+
19
+ def _find_records(data: Any) -> list[dict] | None:
20
+ """Locate a homogeneous-ish list of dicts at the top level or one level down."""
21
+ if isinstance(data, list) and data and all(isinstance(x, dict) for x in data):
22
+ return data
23
+ if isinstance(data, dict):
24
+ for value in data.values():
25
+ if isinstance(value, list) and value and all(isinstance(x, dict) for x in value):
26
+ return value
27
+ return None
28
+
29
+
30
+ def _fmt(value: Any) -> str:
31
+ if isinstance(value, str):
32
+ return value
33
+ return json.dumps(value, separators=(",", ":"), ensure_ascii=False)
34
+
35
+
36
+ def reduce_json(text: str) -> tuple[str, list[str]]:
37
+ data = json.loads(text)
38
+ records = _find_records(data)
39
+
40
+ if records is not None and len(records) >= 3:
41
+ keys = list(dict.fromkeys(k for row in records for k in row.keys()))
42
+ header = "fields: " + " | ".join(keys)
43
+ rows = [" | ".join(_fmt(row.get(k, "")) for k in keys) for row in records]
44
+ notes = [f"columnar: {len(records)} records × {len(keys)} fields, keys factored out once"]
45
+ return header + "\n" + "\n".join(rows), notes
46
+
47
+ compact = json.dumps(data, separators=(",", ":"), ensure_ascii=False)
48
+ return compact, ["minified json (indentation/whitespace removed, lossless)"]
49
+
50
+
51
+ def _detect(text: str) -> bool:
52
+ if text.lstrip()[:1] not in "[{":
53
+ return False
54
+ try:
55
+ json.loads(text)
56
+ return True
57
+ except Exception:
58
+ return False
59
+
60
+
61
+ REDUCER = Reducer("json", _detect, reduce_json, priority=10)
@@ -0,0 +1,91 @@
1
+ """Collapse repetitive log lines.
2
+
3
+ Near-identical lines collapse to one representative plus a count, while every
4
+ error/anomaly line and every one-off pattern is kept as-is.
5
+
6
+ To decide "near-identical", we mask the volatile parts of a line (timestamps, ips,
7
+ uuids, hex, numbers, quoted strings) into a template. Lines that share a template
8
+ are the same event with different values, so we keep one and count the rest.
9
+ Templates seen only once, or carrying a severity keyword, are kept verbatim, since
10
+ the rare line is usually the one that matters.
11
+
12
+ Deterministic: first-seen order is preserved, so the same input gives the same output.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import re
18
+
19
+ from .base import Reducer
20
+
21
+ _LOG_HINT = re.compile(
22
+ r"(?im)^\s*(?:\d{4}-\d{2}-\d{2}[T ]|\[?(?:INFO|DEBUG|WARN|WARNING|ERROR|FATAL|TRACE|CRITICAL)\b)"
23
+ )
24
+ _SEVERITY = re.compile(r"(?i)\b(ERROR|FATAL|CRITICAL|EXCEPTION|PANIC|TRACEBACK|WARN|WARNING)\b")
25
+
26
+ # Order matters: more specific patterns first so they win before the generic
27
+ # number mask consumes their digits.
28
+ _MASKS = (
29
+ (re.compile(r"\d{4}-\d{2}-\d{2}[T ][\d:.,]+(?:Z|[+-]\d{2}:?\d{2})?"), "§ts"),
30
+ (re.compile(r"\b\d{1,3}(?:\.\d{1,3}){3}\b"), "§ip"),
31
+ (re.compile(r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b"), "§uuid"),
32
+ (re.compile(r"0x[0-9a-fA-F]+"), "§hex"),
33
+ (re.compile(r'"[^"]*"'), "§s"),
34
+ (re.compile(r"\b\d+(?:\.\d+)?\b"), "§n"),
35
+ )
36
+
37
+
38
+ def _template(line: str) -> str:
39
+ t = line
40
+ for rx, repl in _MASKS:
41
+ t = rx.sub(repl, t)
42
+ return t.strip()
43
+
44
+
45
+ def reduce_logs(text: str) -> tuple[str, list[str]]:
46
+ lines = text.splitlines()
47
+ groups: dict[str, list] = {} # template -> [representative, count, is_severity]
48
+ order: list[str] = []
49
+
50
+ for line in lines:
51
+ if not line.strip():
52
+ continue
53
+ key = _template(line)
54
+ sev = bool(_SEVERITY.search(line))
55
+ g = groups.get(key)
56
+ if g is None:
57
+ groups[key] = [line, 1, sev]
58
+ order.append(key)
59
+ else:
60
+ g[1] += 1
61
+ g[2] = g[2] or sev
62
+
63
+ out: list[str] = []
64
+ kept_verbatim = 0
65
+ for key in order:
66
+ line, count, is_sev = groups[key]
67
+ if is_sev:
68
+ kept_verbatim += 1
69
+ out.append(line if count == 1 else f"{line} ⟪×{count}⟫")
70
+ elif count == 1:
71
+ kept_verbatim += 1
72
+ out.append(line)
73
+ else:
74
+ out.append(f"{line} ⟪×{count} similar⟫")
75
+
76
+ notes = [
77
+ f"{len(order)} unique patterns from {len(lines)} lines; "
78
+ f"{kept_verbatim} anomaly/unique lines kept verbatim"
79
+ ]
80
+ return "\n".join(out), notes
81
+
82
+
83
+ def _detect(text: str) -> bool:
84
+ lines = text.splitlines()
85
+ if len(lines) < 5:
86
+ return False
87
+ hits = sum(1 for ln in lines if _LOG_HINT.match(ln))
88
+ return hits >= max(3, len(lines) * 0.3)
89
+
90
+
91
+ REDUCER = Reducer("log", _detect, reduce_logs, priority=50)
@@ -0,0 +1,59 @@
1
+ """Stack-trace reducer.
2
+
3
+ The exception (last line) and the boundary frames are the signal; the deep middle
4
+ of the call stack is usually noise. Keeps the header, the first frame, the last two
5
+ frames, and the full exception/tail verbatim; collapses the middle with a count.
6
+ Raises on non-tracebacks → core falls back to passthrough (fail open).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from .base import Reducer
12
+
13
+ _KEEP_HEAD = 1
14
+ _KEEP_TAIL = 2
15
+
16
+
17
+ def reduce_stacktrace(text: str) -> tuple[str, list[str]]:
18
+ lines = text.splitlines()
19
+ file_idx = [i for i, ln in enumerate(lines) if ln.lstrip().startswith('File "')]
20
+ if not file_idx:
21
+ raise ValueError("not a python traceback")
22
+
23
+ header = lines[: file_idx[0]]
24
+ frames = [
25
+ lines[start : (file_idx[k + 1] if k + 1 < len(file_idx) else len(lines))]
26
+ for k, start in enumerate(file_idx)
27
+ ]
28
+
29
+ # Peel the trailing non-indented lines off the last frame: that's the exception.
30
+ last = frames[-1]
31
+ split = len(last)
32
+ for m in range(1, len(last)):
33
+ if last[m].strip() and not last[m].startswith((" ", "\t")):
34
+ split = m
35
+ break
36
+ tail = last[split:]
37
+ frames[-1] = last[:split]
38
+
39
+ out = list(header)
40
+ if len(frames) <= _KEEP_HEAD + _KEEP_TAIL + 1:
41
+ for fr in frames:
42
+ out.extend(fr)
43
+ else:
44
+ for fr in frames[:_KEEP_HEAD]:
45
+ out.extend(fr)
46
+ out.append(f" ⟪… {len(frames) - _KEEP_HEAD - _KEEP_TAIL} stack frames hidden⟫")
47
+ for fr in frames[-_KEEP_TAIL:]:
48
+ out.extend(fr)
49
+ out.extend(tail)
50
+
51
+ notes = [f"kept {min(len(frames), _KEEP_HEAD + _KEEP_TAIL)} of {len(frames)} frames + exception"]
52
+ return "\n".join(out), notes
53
+
54
+
55
+ def _detect(text: str) -> bool:
56
+ return "Traceback (most recent call last)" in text
57
+
58
+
59
+ REDUCER = Reducer("stacktrace", _detect, reduce_stacktrace, priority=20)
@@ -0,0 +1,32 @@
1
+ """Whitespace-aligned table reducer.
2
+
3
+ Command-line tools (kubectl, docker, ps, ls -l, df) pad columns with runs of
4
+ spaces so they line up. That padding is pure tokens. We collapse each run of two
5
+ or more spaces to a single space and trim line ends. Every value is kept; only
6
+ the alignment is dropped, so this is lossless for the data.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+
13
+ from .base import Reducer
14
+
15
+ _GAP = re.compile(r"[ \t]{2,}")
16
+
17
+
18
+ def reduce_table(text: str) -> tuple[str, list[str]]:
19
+ out = [_GAP.sub(" ", line).rstrip() for line in text.splitlines()]
20
+ return "\n".join(out), ["collapsed column padding; values preserved"]
21
+
22
+
23
+ def _detect(text: str) -> bool:
24
+ lines = [ln for ln in text.splitlines() if ln.strip()]
25
+ if len(lines) < 3:
26
+ return False
27
+ # A line looks columnar when it has at least two padded gaps (3+ columns).
28
+ columnar = sum(1 for ln in lines if len(_GAP.findall(ln)) >= 2)
29
+ return columnar >= max(3, len(lines) * 0.6)
30
+
31
+
32
+ REDUCER = Reducer("table", _detect, reduce_table, priority=60)