@kulapard/pi-caveman 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env python3
2
+ """Detect whether a file is natural language (compressible) or code/config (skip)."""
3
+
4
+ import json
5
+ import re
6
+ from pathlib import Path
7
+
8
+ # Extensions that are natural language and compressible
9
+ COMPRESSIBLE_EXTENSIONS = {".md", ".txt", ".markdown", ".rst", ".typ", ".typst", ".tex"}
10
+
11
+ # Extensions that are code/config and should be skipped
12
+ SKIP_EXTENSIONS = {
13
+ ".py", ".js", ".ts", ".tsx", ".jsx", ".json", ".yaml", ".yml",
14
+ ".toml", ".env", ".lock", ".css", ".scss", ".html", ".xml",
15
+ ".sql", ".sh", ".bash", ".zsh", ".go", ".rs", ".java", ".c",
16
+ ".cpp", ".h", ".hpp", ".rb", ".php", ".swift", ".kt", ".lua",
17
+ ".dockerfile", ".makefile", ".csv", ".ini", ".cfg",
18
+ }
19
+
20
+ # The subset of SKIP_EXTENSIONS that is configuration (not source code). Used to
21
+ # decide whether a skipped file reports as "config" vs "code". Must stay a subset
22
+ # of SKIP_EXTENSIONS (asserted below) so the two sets cannot silently drift.
23
+ CONFIG_EXTENSIONS = {
24
+ ".json", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".env",
25
+ }
26
+ assert CONFIG_EXTENSIONS <= SKIP_EXTENSIONS, (
27
+ "CONFIG_EXTENSIONS must be a subset of SKIP_EXTENSIONS: "
28
+ f"{CONFIG_EXTENSIONS - SKIP_EXTENSIONS} not in SKIP_EXTENSIONS"
29
+ )
30
+
31
+ # Real-world extensionless config/build filenames whose classification is NOT
32
+ # already covered by the SKIP_EXTENSIONS fallback below. These have an empty
33
+ # `Path.suffix`, so the SKIP_EXTENSIONS check never matches them and they would
34
+ # otherwise be content-sniffed as natural language and offered up for
35
+ # compression to a third-party API. Map the lowercased full filename to its
36
+ # classification so a bare `Dockerfile`/`Makefile`/`.gitignore` is handled like
37
+ # its dotted-extension cousins would be.
38
+ #
39
+ # Names that are themselves SKIP_EXTENSIONS entries (e.g. ".env") are
40
+ # deliberately omitted here: the `name in SKIP_EXTENSIONS` fallback in
41
+ # detect_file_type already classifies them identically, so listing them in both
42
+ # places would be redundant and require hand-syncing.
43
+ SKIP_FILENAMES = {
44
+ "dockerfile": "code",
45
+ "makefile": "code",
46
+ "gnumakefile": "code",
47
+ ".gitignore": "config",
48
+ ".gitattributes": "config",
49
+ ".dockerignore": "config",
50
+ ".editorconfig": "config",
51
+ ".npmrc": "config",
52
+ ".prettierrc": "config",
53
+ ".eslintrc": "config",
54
+ ".babelrc": "config",
55
+ }
56
+
57
+ # Patterns that indicate a line is code
58
+ CODE_PATTERNS = [
59
+ re.compile(r"^\s*(import |from .+ import |require\(|const |let |var )"),
60
+ re.compile(r"^\s*(def |class |function |async function |export )"),
61
+ re.compile(r"^\s*(if\s*\(|for\s*\(|while\s*\(|switch\s*\(|try\s*\{)"),
62
+ re.compile(r"^\s*[\}\]\);]+\s*$"), # closing braces/brackets
63
+ re.compile(r"^\s*@\w+"), # decorators/annotations
64
+ re.compile(r'^\s*"[^"]+"\s*:\s*'), # JSON-like key-value
65
+ re.compile(r"^\s*\w+\s*=\s*[{\[\(\"']"), # assignment with literal
66
+ ]
67
+
68
+
69
+ def _is_code_line(line: str) -> bool:
70
+ """Check if a line looks like code."""
71
+ return any(p.match(line) for p in CODE_PATTERNS)
72
+
73
+
74
+ def _is_json_content(text: str) -> bool:
75
+ """Check if content is valid JSON."""
76
+ try:
77
+ json.loads(text)
78
+ return True
79
+ except (json.JSONDecodeError, ValueError):
80
+ return False
81
+
82
+
83
+ def _is_yaml_content(lines: list[str]) -> bool:
84
+ """Heuristic: check if content looks like YAML."""
85
+ yaml_indicators = 0
86
+ for line in lines[:30]:
87
+ stripped = line.strip()
88
+ if stripped.startswith("---"):
89
+ yaml_indicators += 1
90
+ elif re.match(r"^\w[\w\s]*:\s", stripped):
91
+ yaml_indicators += 1
92
+ elif stripped.startswith("- ") and ":" in stripped:
93
+ yaml_indicators += 1
94
+ # If most non-empty lines look like YAML
95
+ non_empty = sum(1 for line in lines[:30] if line.strip())
96
+ return non_empty > 0 and yaml_indicators / non_empty > 0.6
97
+
98
+
99
+ def detect_file_type(filepath: Path) -> str:
100
+ """Classify a file as 'natural_language', 'code', 'config', or 'unknown'.
101
+
102
+ Returns:
103
+ One of: 'natural_language', 'code', 'config', 'unknown'
104
+ """
105
+ ext = filepath.suffix.lower()
106
+
107
+ # Extension-based classification
108
+ if ext in COMPRESSIBLE_EXTENSIONS:
109
+ return "natural_language"
110
+ if ext in SKIP_EXTENSIONS:
111
+ return "config" if ext in CONFIG_EXTENSIONS else "code"
112
+
113
+ # Extensionless files (like CLAUDE.md, TODO) — check content.
114
+ if not ext:
115
+ # Leading-dot / build files (".env", "Dockerfile", "Makefile",
116
+ # ".gitignore") have an empty suffix, so the SKIP_EXTENSIONS check above
117
+ # never matched them and they would be content-sniffed as natural
118
+ # language. Classify them by full filename first so they are never
119
+ # offered up for compression to a third-party API.
120
+ name = filepath.name.lower()
121
+ if name in SKIP_FILENAMES:
122
+ return SKIP_FILENAMES[name]
123
+ if name in SKIP_EXTENSIONS:
124
+ return "config" if name in CONFIG_EXTENSIONS else "code"
125
+
126
+ try:
127
+ text = filepath.read_text(errors="ignore")
128
+ except (OSError, PermissionError):
129
+ return "unknown"
130
+
131
+ lines = text.splitlines()[:50]
132
+
133
+ if _is_json_content(text[:10000]):
134
+ return "config"
135
+ if _is_yaml_content(lines):
136
+ return "config"
137
+
138
+ code_lines = sum(1 for line in lines if line.strip() and _is_code_line(line))
139
+ non_empty = sum(1 for line in lines if line.strip())
140
+ if non_empty > 0 and code_lines / non_empty > 0.4:
141
+ return "code"
142
+
143
+ return "natural_language"
144
+
145
+ return "unknown"
146
+
147
+
148
+ def should_compress(filepath: Path) -> bool:
149
+ """Return True if the file is natural language and should be compressed."""
150
+ if not filepath.is_file():
151
+ return False
152
+ # Skip backup files
153
+ if filepath.name.endswith(".original.md"):
154
+ return False
155
+ return detect_file_type(filepath) == "natural_language"
156
+
157
+
158
+ if __name__ == "__main__":
159
+ import sys
160
+
161
+ if len(sys.argv) < 2:
162
+ print("Usage: python detect.py <file1> [file2] ...")
163
+ sys.exit(1)
164
+
165
+ for path_str in sys.argv[1:]:
166
+ p = Path(path_str).resolve()
167
+ file_type = detect_file_type(p)
168
+ compress = should_compress(p)
169
+ print(f" {p.name:30s} type={file_type:20s} compress={compress}")
@@ -0,0 +1,213 @@
1
+ #!/usr/bin/env python3
2
+ import re
3
+ from collections import Counter
4
+ from pathlib import Path
5
+
6
+ URL_REGEX = re.compile(r"https?://[^\s)]+")
7
+ FENCE_OPEN_REGEX = re.compile(r"^(\s{0,3})(`{3,}|~{3,})(.*)$")
8
+ HEADING_REGEX = re.compile(r"^(#{1,6})\s+(.*)", re.MULTILINE)
9
+ BULLET_REGEX = re.compile(r"^\s*[-*+]\s+", re.MULTILINE)
10
+
11
+ # crude but effective path detection
12
+ # Requires either a path prefix (./ ../ / or drive letter) or a slash/backslash within the match
13
+ PATH_REGEX = re.compile(r"(?:\./|\.\./|/|[A-Za-z]:\\)[\w\-/\\\.]+|[\w\-\.]+[/\\][\w\-/\\\.]+")
14
+
15
+
16
+ class ValidationResult:
17
+ def __init__(self):
18
+ self.is_valid = True
19
+ self.errors = []
20
+ self.warnings = []
21
+
22
+ def add_error(self, msg):
23
+ self.is_valid = False
24
+ self.errors.append(msg)
25
+
26
+ def add_warning(self, msg):
27
+ self.warnings.append(msg)
28
+
29
+
30
+ def read_file(path: Path) -> str:
31
+ return path.read_text(errors="ignore")
32
+
33
+
34
+ # ---------- Extractors ----------
35
+
36
+
37
+ def extract_headings(text):
38
+ return [(level, title.strip()) for level, title in HEADING_REGEX.findall(text)]
39
+
40
+
41
+ def extract_code_blocks(text):
42
+ """Line-based fenced code block extractor.
43
+
44
+ Handles ``` and ~~~ fences with variable length (CommonMark: closing
45
+ fence must use same char and be at least as long as opening). Supports
46
+ nested fences (e.g. an outer 4-backtick block wrapping inner 3-backtick
47
+ content).
48
+ """
49
+ blocks = []
50
+ lines = text.split("\n")
51
+ i = 0
52
+ n = len(lines)
53
+ while i < n:
54
+ m = FENCE_OPEN_REGEX.match(lines[i])
55
+ if not m:
56
+ i += 1
57
+ continue
58
+ fence_char = m.group(2)[0]
59
+ fence_len = len(m.group(2))
60
+ open_line = lines[i]
61
+ block_lines = [open_line]
62
+ i += 1
63
+ closed = False
64
+ while i < n:
65
+ close_m = FENCE_OPEN_REGEX.match(lines[i])
66
+ if (
67
+ close_m
68
+ and close_m.group(2)[0] == fence_char
69
+ and len(close_m.group(2)) >= fence_len
70
+ and close_m.group(3).strip() == ""
71
+ ):
72
+ block_lines.append(lines[i])
73
+ closed = True
74
+ i += 1
75
+ break
76
+ block_lines.append(lines[i])
77
+ i += 1
78
+ if closed:
79
+ blocks.append("\n".join(block_lines))
80
+ # Unclosed fences are silently skipped — they indicate malformed markdown
81
+ # and including them would cause false-positive validation failures.
82
+ return blocks
83
+
84
+
85
+ def extract_urls(text):
86
+ return set(URL_REGEX.findall(text))
87
+
88
+
89
+ def extract_paths(text):
90
+ return set(PATH_REGEX.findall(text))
91
+
92
+
93
+ def count_bullets(text):
94
+ return len(BULLET_REGEX.findall(text))
95
+
96
+
97
+ def extract_inline_codes(text):
98
+ text_without_fences = re.sub(r"^```[\s\S]*?^```", "", text, flags=re.MULTILINE)
99
+ text_without_fences = re.sub(r"^~~~[\s\S]*?^~~~", "", text_without_fences, flags=re.MULTILINE)
100
+ return re.findall(r"`([^`]+)`", text_without_fences)
101
+
102
+
103
+ # ---------- Validators ----------
104
+
105
+
106
+ def validate_headings(orig, comp, result):
107
+ h1 = extract_headings(orig)
108
+ h2 = extract_headings(comp)
109
+
110
+ if len(h1) != len(h2):
111
+ result.add_error(f"Heading count mismatch: {len(h1)} vs {len(h2)}")
112
+
113
+ if h1 != h2:
114
+ result.add_warning("Heading text/order changed")
115
+
116
+
117
+ def validate_code_blocks(orig, comp, result):
118
+ c1 = extract_code_blocks(orig)
119
+ c2 = extract_code_blocks(comp)
120
+
121
+ if c1 != c2:
122
+ result.add_error("Code blocks not preserved exactly")
123
+
124
+
125
+ def validate_urls(orig, comp, result):
126
+ u1 = extract_urls(orig)
127
+ u2 = extract_urls(comp)
128
+
129
+ if u1 != u2:
130
+ result.add_error(f"URL mismatch: lost={u1 - u2}, added={u2 - u1}")
131
+
132
+
133
+ def validate_paths(orig, comp, result):
134
+ p1 = extract_paths(orig)
135
+ p2 = extract_paths(comp)
136
+
137
+ if p1 != p2:
138
+ result.add_warning(f"Path mismatch: lost={p1 - p2}, added={p2 - p1}")
139
+
140
+
141
+ def validate_bullets(orig, comp, result):
142
+ b1 = count_bullets(orig)
143
+ b2 = count_bullets(comp)
144
+
145
+ if b1 == 0:
146
+ return
147
+
148
+ diff = abs(b1 - b2) / b1
149
+
150
+ if diff > 0.15:
151
+ result.add_warning(f"Bullet count changed too much: {b1} -> {b2}")
152
+
153
+
154
+ def validate_inline_codes(orig, comp, result):
155
+ c1 = Counter(extract_inline_codes(orig))
156
+ c2 = Counter(extract_inline_codes(comp))
157
+
158
+ if c1 != c2:
159
+ lost = set(c1.keys()) - set(c2.keys())
160
+ added = set(c2.keys()) - set(c1.keys())
161
+ for code, count in c1.items():
162
+ if code in c2 and c2[code] < count:
163
+ lost.add(f"{code} (lost {count - c2[code]} of {count} occurrences)")
164
+ if lost:
165
+ result.add_error(f"Inline code lost: {lost}")
166
+ if added:
167
+ result.add_warning(f"Inline code added: {added}")
168
+
169
+
170
+ # ---------- Main ----------
171
+
172
+
173
+ def validate(original_path: Path, compressed_path: Path) -> ValidationResult:
174
+ result = ValidationResult()
175
+
176
+ orig = read_file(original_path)
177
+ comp = read_file(compressed_path)
178
+
179
+ validate_headings(orig, comp, result)
180
+ validate_code_blocks(orig, comp, result)
181
+ validate_urls(orig, comp, result)
182
+ validate_paths(orig, comp, result)
183
+ validate_bullets(orig, comp, result)
184
+ validate_inline_codes(orig, comp, result)
185
+
186
+ return result
187
+
188
+
189
+ # ---------- CLI ----------
190
+
191
+ if __name__ == "__main__":
192
+ import sys
193
+
194
+ if len(sys.argv) != 3:
195
+ print("Usage: python validate.py <original> <compressed>")
196
+ sys.exit(1)
197
+
198
+ orig = Path(sys.argv[1]).resolve()
199
+ comp = Path(sys.argv[2]).resolve()
200
+
201
+ res = validate(orig, comp)
202
+
203
+ print(f"\nValid: {res.is_valid}")
204
+
205
+ if res.errors:
206
+ print("\nErrors:")
207
+ for e in res.errors:
208
+ print(f" - {e}")
209
+
210
+ if res.warnings:
211
+ print("\nWarnings:")
212
+ for w in res.warnings:
213
+ print(f" - {w}")
@@ -0,0 +1,38 @@
1
+ # caveman-help
2
+
3
+ Quick-reference card. One shot, no mode change.
4
+
5
+ ## What it does
6
+
7
+ Prints a cheat sheet of all caveman modes, sibling skills, deactivation triggers, and how mode lasts for the session (set with `/caveman`, resets to `off` on a new session — no config file or env var). One-shot display — does not flip the active mode, write flag files, or persist anything. Use when you forget the slash commands.
8
+
9
+ ## How to invoke
10
+
11
+ ```
12
+ /caveman-help
13
+ ```
14
+
15
+ Also triggers on "caveman help", "what caveman commands", "how do I use caveman".
16
+
17
+ ## Example output
18
+
19
+ ```
20
+ Modes:
21
+ /caveman full (default)
22
+ /caveman lite lighter
23
+ /caveman ultra extreme
24
+ /caveman wenyan classical Chinese
25
+
26
+ Skills:
27
+ /caveman-commit terse Conventional Commits
28
+ /caveman-review one-line PR comments
29
+ /caveman-stats session token savings
30
+
31
+ Deactivate:
32
+ "stop caveman" or "normal mode"
33
+ ```
34
+
35
+ ## See also
36
+
37
+ - [`SKILL.md`](./SKILL.md) — full reference card
38
+ - [Caveman README](../../README.md) — repo overview
@@ -0,0 +1,51 @@
1
+ ---
2
+ name: caveman-help
3
+ description: >
4
+ Quick-reference card for all caveman modes, skills, and commands.
5
+ One-shot display, not a persistent mode. Trigger: /caveman-help,
6
+ "caveman help", "what caveman commands", "how do I use caveman".
7
+ ---
8
+
9
+ # Caveman Help
10
+
11
+ Display this reference card when invoked. One-shot — do NOT change mode, write flag files, or persist anything. Output in caveman style.
12
+
13
+ ## Modes
14
+
15
+ | Mode | Trigger | What change |
16
+ |------|---------|-------------|
17
+ | **Lite** | `/caveman lite` | Drop filler. Keep sentence structure. |
18
+ | **Full** | `/caveman` | Drop articles, filler, pleasantries, hedging. Fragments OK. Default. |
19
+ | **Ultra** | `/caveman ultra` | Extreme compression. Bare fragments. Tables over prose. |
20
+ | **Wenyan-Lite** | `/caveman wenyan-lite` | Classical Chinese style, light compression. |
21
+ | **Wenyan-Full** | `/caveman wenyan` | Full 文言文. Maximum classical terseness. |
22
+ | **Wenyan-Ultra** | `/caveman wenyan-ultra` | Extreme. Ancient scholar on a budget. |
23
+
24
+ Mode stick until changed or session end.
25
+
26
+ ## Skills
27
+
28
+ | Skill | Trigger | What it do |
29
+ |-------|---------|-----------|
30
+ | **caveman-commit** | `/caveman-commit` | Terse commit messages. Conventional Commits. ≤50 char subject. |
31
+ | **caveman-review** | `/caveman-review` | One-line PR comments: `L42: bug: user null. Add guard.` |
32
+ | **caveman-compress** | `/caveman-compress <file>` | Compress .md files to caveman prose. Saves ~46% input tokens. |
33
+ | **caveman-help** | `/caveman-help` | This card. |
34
+
35
+ ## Deactivate
36
+
37
+ Say "stop caveman" or "normal mode". Resume anytime with `/caveman`.
38
+
39
+ ## Language
40
+
41
+ Keep user's language by default. User write Portuguese → reply Portuguese caveman. Compress the style, not the language. Technical terms, code, commands, commit types, and exact error strings stay verbatim unless user ask for translation.
42
+
43
+ ## Mode lasts the session
44
+
45
+ `/caveman` (no argument) = `full`. Pick another with `/caveman ultra`, `/caveman lite`, etc.
46
+
47
+ Mode set per session. New session start → mode `off`; activate again with `/caveman`. No config file, no env var — the `/caveman` command is the only switch.
48
+
49
+ ## More
50
+
51
+ Full docs: https://github.com/JuliusBrussee/caveman
@@ -0,0 +1,33 @@
1
+ # caveman-review
2
+
3
+ One-line PR comments. Location, problem, fix. No throat-clearing.
4
+
5
+ ## What it does
6
+
7
+ Generates code review comments in `L<line>: <severity> <problem>. <fix>.` format. One line per finding. Severity emoji: 🔴 bug, 🟡 risk, 🔵 nit, ❓ question. Drops "I noticed that...", hedging, and restating what the diff already shows. Keeps exact line numbers, backticked symbols, and concrete fixes.
8
+
9
+ Auto-clarity: drops terse mode for CVE-class security findings, architectural disagreements, and onboarding contexts where the author needs the *why*. Resumes terse for the rest.
10
+
11
+ Output only — does not approve, request changes, or run linters.
12
+
13
+ ## How to invoke
14
+
15
+ ```
16
+ /caveman-review
17
+ ```
18
+
19
+ Also triggers on "review this PR", "code review", "review the diff".
20
+
21
+ ## Example output
22
+
23
+ ```
24
+ L42: 🔴 bug: user can be null after .find(). Add guard before .email.
25
+ L88-140: 🔵 nit: 50-line fn does 4 things. Extract validate/normalize/persist.
26
+ L23: 🟡 risk: no retry on 429. Wrap in withBackoff(3).
27
+ L107: ❓ q: why drop the cache here? Reads on next request will miss.
28
+ ```
29
+
30
+ ## See also
31
+
32
+ - [`SKILL.md`](./SKILL.md) — full LLM-facing instructions
33
+ - [Caveman README](../../README.md) — repo overview
@@ -0,0 +1,55 @@
1
+ ---
2
+ name: caveman-review
3
+ description: >
4
+ Ultra-compressed code review comments. Cuts noise from PR feedback while preserving
5
+ the actionable signal. Each comment is one line: location, problem, fix. Use when user
6
+ says "review this PR", "code review", "review the diff", "/review", or invokes
7
+ /caveman-review. Auto-triggers when reviewing pull requests.
8
+ ---
9
+
10
+ Write code review comments terse and actionable. One line per finding. Location, problem, fix. No throat-clearing.
11
+
12
+ ## Rules
13
+
14
+ **Format:** `L<line>: <problem>. <fix>.` — or `<file>:L<line>: ...` when reviewing multi-file diffs.
15
+
16
+ **Severity prefix (optional, when mixed):**
17
+ - `🔴 bug:` — broken behavior, will cause incident
18
+ - `🟡 risk:` — works but fragile (race, missing null check, swallowed error)
19
+ - `🔵 nit:` — style, naming, micro-optim. Author can ignore
20
+ - `❓ q:` — genuine question, not a suggestion
21
+
22
+ **Drop:**
23
+ - "I noticed that...", "It seems like...", "You might want to consider..."
24
+ - "This is just a suggestion but..." — use `nit:` instead
25
+ - "Great work!", "Looks good overall but..." — say it once at the top, not per comment
26
+ - Restating what the line does — the reviewer can read the diff
27
+ - Hedging ("perhaps", "maybe", "I think") — if unsure use `q:`
28
+
29
+ **Keep:**
30
+ - Exact line numbers
31
+ - Exact symbol/function/variable names in backticks
32
+ - Concrete fix, not "consider refactoring this"
33
+ - The *why* if the fix isn't obvious from the problem statement
34
+
35
+ ## Examples
36
+
37
+ ❌ "I noticed that on line 42 you're not checking if the user object is null before accessing the email property. This could potentially cause a crash if the user is not found in the database. You might want to add a null check here."
38
+
39
+ ✅ `L42: 🔴 bug: user can be null after .find(). Add guard before .email.`
40
+
41
+ ❌ "It looks like this function is doing a lot of things and might benefit from being broken up into smaller functions for readability."
42
+
43
+ ✅ `L88-140: 🔵 nit: 50-line fn does 4 things. Extract validate/normalize/persist.`
44
+
45
+ ❌ "Have you considered what happens if the API returns a 429? I think we should probably handle that case."
46
+
47
+ ✅ `L23: 🟡 risk: no retry on 429. Wrap in withBackoff(3).`
48
+
49
+ ## Auto-Clarity
50
+
51
+ Drop terse mode for: security findings (CVE-class bugs need full explanation + reference), architectural disagreements (need rationale, not just a one-liner), and onboarding contexts where the author is new and needs the "why". In those cases write a normal paragraph, then resume terse for the rest.
52
+
53
+ ## Boundaries
54
+
55
+ Reviews only — does not write the code fix, does not approve/request-changes, does not run linters. Output the comment(s) ready to paste into the PR. "stop caveman-review" or "normal mode": revert to verbose review style.
@@ -0,0 +1,36 @@
1
+ # caveman-stats
2
+
3
+ On-demand estimate of caveman token savings. Manual, not tracked.
4
+
5
+ ## What it does
6
+
7
+ Pi does not hand per-turn token usage to the extension, so there is no automatic
8
+ meter and no on-disk usage record to read. Instead, `/caveman-stats` asks the
9
+ model to estimate savings on the spot: it compares the terse caveman output
10
+ already produced this session against the verbose prose it would have written
11
+ otherwise, and reports both sizes plus the percentage saved. The number is an
12
+ estimate, clearly labelled as such.
13
+
14
+ The Pi statusline shows the current mode only — `caveman:<mode>` (set by the
15
+ extension via `ctx.ui.setStatus`). It is a mode indicator, not a savings badge.
16
+
17
+ ## How to invoke
18
+
19
+ ```
20
+ /caveman-stats
21
+ ```
22
+
23
+ ## Example output
24
+
25
+ ```
26
+ Caveman savings (estimate — Pi does not expose exact token counts)
27
+
28
+ Caveman output this session: ~3,900 tokens
29
+ Verbose baseline (estimated): ~11,200 tokens
30
+ Saved: ~7,300 tokens (~65%)
31
+ ```
32
+
33
+ ## See also
34
+
35
+ - [`SKILL.md`](./SKILL.md) — how the estimate is produced
36
+ - [Caveman README](../../README.md) — repo overview
@@ -0,0 +1,31 @@
1
+ ---
2
+ name: caveman-stats
3
+ description: >
4
+ Estimate caveman token savings for the current session, on demand.
5
+ No automatic tracking — the model computes a rough estimate by comparing
6
+ caveman output against a verbose-style baseline. Triggers on /caveman-stats.
7
+ ---
8
+
9
+ Caveman saves tokens by writing terse. There is no hidden token meter — Pi does
10
+ not expose per-turn usage to the extension, so stats are a manual, model-driven
11
+ estimate rather than exact receipts.
12
+
13
+ When `/caveman-stats` fires, gauge savings like this:
14
+
15
+ - Take the assistant output already produced in caveman mode this session.
16
+ - Mentally re-expand the same content into ordinary verbose prose (full
17
+ sentences, hedging, filler) — that is the baseline.
18
+ - Estimate tokens for each (~4 chars/token is a fine rule of thumb) and report
19
+ caveman vs baseline plus the percentage saved.
20
+
21
+ State plainly that the number is an estimate. Do not invent precise per-turn
22
+ token counts or claim they were read from a log — that data is not available.
23
+
24
+ ## What is NOT here
25
+
26
+ - No on-disk usage log, no JSONL parsing, no automatic counters.
27
+ - No statusline savings badge. The statusline shows the current mode only
28
+ (`caveman:<mode>`, e.g. `caveman:ultra`), set by the extension via
29
+ `ctx.ui.setStatus`. It is a mode indicator, not a savings percentage.
30
+ - Verbatim guarantee still holds: code, commands, API names, file paths, and
31
+ exact errors are never compressed when counting or reporting.