claude-agent-skills 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/README.md +65 -0
  2. package/bundled-skills/ask-matt/SKILL.md +61 -0
  3. package/bundled-skills/brainstorming/SKILL.md +159 -0
  4. package/bundled-skills/brainstorming/scripts/frame-template.html +213 -0
  5. package/bundled-skills/brainstorming/scripts/helper.js +167 -0
  6. package/bundled-skills/brainstorming/scripts/server.cjs +723 -0
  7. package/bundled-skills/brainstorming/scripts/start-server.sh +209 -0
  8. package/bundled-skills/brainstorming/scripts/stop-server.sh +120 -0
  9. package/bundled-skills/brainstorming/spec-document-reviewer-prompt.md +49 -0
  10. package/bundled-skills/brainstorming/visual-companion.md +298 -0
  11. package/bundled-skills/cavecrew/README.md +41 -0
  12. package/bundled-skills/cavecrew/SKILL.md +82 -0
  13. package/bundled-skills/caveman/README.md +48 -0
  14. package/bundled-skills/caveman/SKILL.md +78 -0
  15. package/bundled-skills/caveman-commit/README.md +44 -0
  16. package/bundled-skills/caveman-commit/SKILL.md +65 -0
  17. package/bundled-skills/caveman-compress/README.md +163 -0
  18. package/bundled-skills/caveman-compress/SECURITY.md +31 -0
  19. package/bundled-skills/caveman-compress/SKILL.md +111 -0
  20. package/bundled-skills/caveman-compress/scripts/__init__.py +9 -0
  21. package/bundled-skills/caveman-compress/scripts/__main__.py +3 -0
  22. package/bundled-skills/caveman-compress/scripts/benchmark.py +80 -0
  23. package/bundled-skills/caveman-compress/scripts/cli.py +85 -0
  24. package/bundled-skills/caveman-compress/scripts/compress.py +342 -0
  25. package/bundled-skills/caveman-compress/scripts/detect.py +121 -0
  26. package/bundled-skills/caveman-compress/scripts/validate.py +213 -0
  27. package/bundled-skills/caveman-help/README.md +38 -0
  28. package/bundled-skills/caveman-help/SKILL.md +63 -0
  29. package/bundled-skills/caveman-review/README.md +33 -0
  30. package/bundled-skills/caveman-review/SKILL.md +55 -0
  31. package/bundled-skills/caveman-stats/README.md +30 -0
  32. package/bundled-skills/caveman-stats/SKILL.md +10 -0
  33. package/bundled-skills/codebase-design/DEEPENING.md +37 -0
  34. package/bundled-skills/codebase-design/DESIGN-IT-TWICE.md +44 -0
  35. package/bundled-skills/codebase-design/SKILL.md +114 -0
  36. package/bundled-skills/council/SKILL.md +77 -0
  37. package/bundled-skills/diagnosing-bugs/SKILL.md +134 -0
  38. package/bundled-skills/diagnosing-bugs/scripts/hitl-loop.template.sh +41 -0
  39. package/bundled-skills/dispatching-parallel-agents/SKILL.md +185 -0
  40. package/bundled-skills/domain-modeling/ADR-FORMAT.md +47 -0
  41. package/bundled-skills/domain-modeling/CONTEXT-FORMAT.md +60 -0
  42. package/bundled-skills/domain-modeling/SKILL.md +74 -0
  43. package/bundled-skills/edit-article/SKILL.md +15 -0
  44. package/bundled-skills/executing-plans/SKILL.md +70 -0
  45. package/bundled-skills/finishing-a-development-branch/SKILL.md +241 -0
  46. package/bundled-skills/git-guardrails-claude-code/SKILL.md +95 -0
  47. package/bundled-skills/git-guardrails-claude-code/scripts/block-dangerous-git.sh +25 -0
  48. package/bundled-skills/grill-me/SKILL.md +7 -0
  49. package/bundled-skills/grill-with-docs/SKILL.md +7 -0
  50. package/bundled-skills/grilling/SKILL.md +10 -0
  51. package/bundled-skills/handoff/SKILL.md +16 -0
  52. package/bundled-skills/i-am-dumb/SKILL.md +57 -0
  53. package/bundled-skills/implement/SKILL.md +15 -0
  54. package/bundled-skills/improve-codebase-architecture/HTML-REPORT.md +123 -0
  55. package/bundled-skills/improve-codebase-architecture/SKILL.md +66 -0
  56. package/bundled-skills/migrate-to-shoehorn/SKILL.md +118 -0
  57. package/bundled-skills/obsidian-vault/SKILL.md +59 -0
  58. package/bundled-skills/ponytail/SKILL.md +117 -0
  59. package/bundled-skills/ponytail-audit/SKILL.md +50 -0
  60. package/bundled-skills/ponytail-debt/SKILL.md +59 -0
  61. package/bundled-skills/ponytail-gain/SKILL.md +51 -0
  62. package/bundled-skills/ponytail-help/SKILL.md +43 -0
  63. package/bundled-skills/ponytail-review/SKILL.md +51 -0
  64. package/bundled-skills/prototype/LOGIC.md +79 -0
  65. package/bundled-skills/prototype/SKILL.md +31 -0
  66. package/bundled-skills/prototype/UI.md +112 -0
  67. package/bundled-skills/receiving-code-review/SKILL.md +213 -0
  68. package/bundled-skills/requesting-code-review/SKILL.md +103 -0
  69. package/bundled-skills/requesting-code-review/code-reviewer.md +172 -0
  70. package/bundled-skills/resolving-merge-conflicts/SKILL.md +14 -0
  71. package/bundled-skills/scaffold-exercises/SKILL.md +106 -0
  72. package/bundled-skills/setup-matt-pocock-skills/SKILL.md +127 -0
  73. package/bundled-skills/setup-matt-pocock-skills/domain.md +51 -0
  74. package/bundled-skills/setup-matt-pocock-skills/issue-tracker-github.md +34 -0
  75. package/bundled-skills/setup-matt-pocock-skills/issue-tracker-gitlab.md +35 -0
  76. package/bundled-skills/setup-matt-pocock-skills/issue-tracker-local.md +19 -0
  77. package/bundled-skills/setup-matt-pocock-skills/triage-labels.md +15 -0
  78. package/bundled-skills/setup-pre-commit/SKILL.md +91 -0
  79. package/bundled-skills/subagent-driven-development/SKILL.md +418 -0
  80. package/bundled-skills/subagent-driven-development/implementer-prompt.md +139 -0
  81. package/bundled-skills/subagent-driven-development/scripts/review-package +44 -0
  82. package/bundled-skills/subagent-driven-development/scripts/sdd-workspace +22 -0
  83. package/bundled-skills/subagent-driven-development/scripts/task-brief +40 -0
  84. package/bundled-skills/subagent-driven-development/task-reviewer-prompt.md +188 -0
  85. package/bundled-skills/systematic-debugging/CREATION-LOG.md +119 -0
  86. package/bundled-skills/systematic-debugging/SKILL.md +296 -0
  87. package/bundled-skills/systematic-debugging/condition-based-waiting-example.ts +158 -0
  88. package/bundled-skills/systematic-debugging/condition-based-waiting.md +115 -0
  89. package/bundled-skills/systematic-debugging/defense-in-depth.md +122 -0
  90. package/bundled-skills/systematic-debugging/find-polluter.sh +63 -0
  91. package/bundled-skills/systematic-debugging/root-cause-tracing.md +169 -0
  92. package/bundled-skills/systematic-debugging/test-academic.md +14 -0
  93. package/bundled-skills/systematic-debugging/test-pressure-1.md +58 -0
  94. package/bundled-skills/systematic-debugging/test-pressure-2.md +68 -0
  95. package/bundled-skills/systematic-debugging/test-pressure-3.md +69 -0
  96. package/bundled-skills/tdd/SKILL.md +108 -0
  97. package/bundled-skills/tdd/mocking.md +59 -0
  98. package/bundled-skills/tdd/refactoring.md +10 -0
  99. package/bundled-skills/tdd/tests.md +61 -0
  100. package/bundled-skills/teach/GLOSSARY-FORMAT.md +35 -0
  101. package/bundled-skills/teach/LEARNING-RECORD-FORMAT.md +46 -0
  102. package/bundled-skills/teach/MISSION-FORMAT.md +31 -0
  103. package/bundled-skills/teach/RESOURCES-FORMAT.md +32 -0
  104. package/bundled-skills/teach/SKILL.md +140 -0
  105. package/bundled-skills/test-driven-development/SKILL.md +371 -0
  106. package/bundled-skills/test-driven-development/testing-anti-patterns.md +299 -0
  107. package/bundled-skills/to-issues/SKILL.md +84 -0
  108. package/bundled-skills/to-prd/SKILL.md +75 -0
  109. package/bundled-skills/triage/AGENT-BRIEF.md +207 -0
  110. package/bundled-skills/triage/OUT-OF-SCOPE.md +105 -0
  111. package/bundled-skills/triage/SKILL.md +112 -0
  112. package/bundled-skills/using-git-worktrees/SKILL.md +202 -0
  113. package/bundled-skills/using-superpowers/SKILL.md +121 -0
  114. package/bundled-skills/using-superpowers/references/antigravity-tools.md +96 -0
  115. package/bundled-skills/using-superpowers/references/claude-code-tools.md +50 -0
  116. package/bundled-skills/using-superpowers/references/codex-tools.md +72 -0
  117. package/bundled-skills/using-superpowers/references/copilot-tools.md +49 -0
  118. package/bundled-skills/using-superpowers/references/gemini-tools.md +63 -0
  119. package/bundled-skills/using-superpowers/references/pi-tools.md +28 -0
  120. package/bundled-skills/verification-before-completion/SKILL.md +139 -0
  121. package/bundled-skills/writing-great-skills/GLOSSARY.md +195 -0
  122. package/bundled-skills/writing-great-skills/SKILL.md +82 -0
  123. package/bundled-skills/writing-plans/SKILL.md +174 -0
  124. package/bundled-skills/writing-plans/plan-document-reviewer-prompt.md +49 -0
  125. package/bundled-skills/writing-skills/SKILL.md +689 -0
  126. package/bundled-skills/writing-skills/anthropic-best-practices.md +1150 -0
  127. package/bundled-skills/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -0
  128. package/bundled-skills/writing-skills/graphviz-conventions.dot +172 -0
  129. package/bundled-skills/writing-skills/persuasion-principles.md +187 -0
  130. package/bundled-skills/writing-skills/render-graphs.js +168 -0
  131. package/bundled-skills/writing-skills/testing-skills-with-subagents.md +384 -0
  132. package/commands/add.js +97 -0
  133. package/commands/check.js +54 -0
  134. package/commands/exportSkills.js +30 -0
  135. package/commands/hub.js +52 -0
  136. package/commands/importSkills.js +68 -0
  137. package/commands/list.js +37 -0
  138. package/commands/remove.js +59 -0
  139. package/commands/sync.js +66 -0
  140. package/commands/update.js +70 -0
  141. package/index.js +100 -0
  142. package/lib/banner.js +108 -0
  143. package/lib/constants.js +10 -0
  144. package/lib/deps.js +51 -0
  145. package/lib/hash.js +26 -0
  146. package/lib/install.js +31 -0
  147. package/lib/lockfile.js +37 -0
  148. package/lib/prompts.js +50 -0
  149. package/lib/scope.js +19 -0
  150. package/lib/summary.js +108 -0
  151. package/lib/theme.js +11 -0
  152. package/package.json +43 -0
  153. package/skills.json +164 -0
@@ -0,0 +1,3 @@
1
+ from .cli import main
2
+
3
+ main()
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env python3
2
+ from pathlib import Path
3
+ import sys
4
+
5
+ # Support both direct execution and module import
6
+ try:
7
+ from .validate import validate
8
+ except ImportError:
9
+ sys.path.insert(0, str(Path(__file__).parent))
10
+ from validate import validate
11
+
12
+ try:
13
+ import tiktoken
14
+ _enc = tiktoken.get_encoding("o200k_base")
15
+ except ImportError:
16
+ _enc = None
17
+
18
+
19
+ def count_tokens(text):
20
+ if _enc is None:
21
+ return len(text.split()) # fallback: word count
22
+ return len(_enc.encode(text))
23
+
24
+
25
+ def benchmark_pair(orig_path: Path, comp_path: Path):
26
+ orig_text = orig_path.read_text()
27
+ comp_text = comp_path.read_text()
28
+
29
+ orig_tokens = count_tokens(orig_text)
30
+ comp_tokens = count_tokens(comp_text)
31
+ saved = 100 * (orig_tokens - comp_tokens) / orig_tokens if orig_tokens > 0 else 0.0
32
+ result = validate(orig_path, comp_path)
33
+
34
+ return (comp_path.name, orig_tokens, comp_tokens, saved, result.is_valid)
35
+
36
+
37
+ def print_table(rows):
38
+ print("\n| File | Original | Compressed | Saved % | Valid |")
39
+ print("|------|----------|------------|---------|-------|")
40
+ for r in rows:
41
+ print(f"| {r[0]} | {r[1]} | {r[2]} | {r[3]:.1f}% | {'✅' if r[4] else '❌'} |")
42
+
43
+
44
+ def main():
45
+ # Direct file pair: python3 benchmark.py original.md compressed.md
46
+ if len(sys.argv) == 3:
47
+ orig = Path(sys.argv[1]).resolve()
48
+ comp = Path(sys.argv[2]).resolve()
49
+ if not orig.exists():
50
+ print(f"❌ Not found: {orig}")
51
+ sys.exit(1)
52
+ if not comp.exists():
53
+ print(f"❌ Not found: {comp}")
54
+ sys.exit(1)
55
+ print_table([benchmark_pair(orig, comp)])
56
+ return
57
+
58
+ # Glob mode: repo_root/tests/caveman-compress/
59
+ # __file__ lives at <repo_root>/skills/caveman-compress/scripts/benchmark.py
60
+ # Walk up four dirs: scripts → caveman-compress → skills → repo_root.
61
+ tests_dir = Path(__file__).resolve().parents[3] / "tests" / "caveman-compress"
62
+ if not tests_dir.exists():
63
+ print(f"❌ Tests dir not found: {tests_dir}")
64
+ sys.exit(1)
65
+
66
+ rows = []
67
+ for orig in sorted(tests_dir.glob("*.original.md")):
68
+ comp = orig.with_name(orig.stem.removesuffix(".original") + ".md")
69
+ if comp.exists():
70
+ rows.append(benchmark_pair(orig, comp))
71
+
72
+ if not rows:
73
+ print("No compressed file pairs found.")
74
+ return
75
+
76
+ print_table(rows)
77
+
78
+
79
+ if __name__ == "__main__":
80
+ main()
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Caveman Compress CLI
4
+
5
+ Usage:
6
+ caveman <filepath>
7
+ """
8
+
9
+ import sys
10
+
11
+ # Force UTF-8 on stdout/stderr before any code can print. Windows consoles
12
+ # default to cp1252 and crash on the ❌ glyphs in error/validation branches,
13
+ # masking the real error and leaving the user with a half-compressed file.
14
+ for _stream in (sys.stdout, sys.stderr):
15
+ reconfigure = getattr(_stream, "reconfigure", None)
16
+ if callable(reconfigure):
17
+ try:
18
+ reconfigure(encoding="utf-8", errors="replace")
19
+ except Exception:
20
+ pass
21
+
22
+ from pathlib import Path
23
+
24
+ from .compress import backup_dir_for, compress_file
25
+ from .detect import detect_file_type, should_compress
26
+
27
+
28
+ def print_usage():
29
+ print("Usage: caveman <filepath>")
30
+
31
+
32
+ def main():
33
+ if len(sys.argv) != 2:
34
+ print_usage()
35
+ sys.exit(1)
36
+
37
+ filepath = Path(sys.argv[1])
38
+
39
+ # Check file exists
40
+ if not filepath.exists():
41
+ print(f"❌ File not found: {filepath}")
42
+ sys.exit(1)
43
+
44
+ if not filepath.is_file():
45
+ print(f"❌ Not a file: {filepath}")
46
+ sys.exit(1)
47
+
48
+ filepath = filepath.resolve()
49
+
50
+ # Detect file type
51
+ file_type = detect_file_type(filepath)
52
+
53
+ print(f"Detected: {file_type}")
54
+
55
+ # Check if compressible
56
+ if not should_compress(filepath):
57
+ print("Skipping: file is not natural language (code/config)")
58
+ sys.exit(0)
59
+
60
+ print("Starting caveman compression...\n")
61
+
62
+ try:
63
+ success = compress_file(filepath)
64
+
65
+ if success:
66
+ print("\nCompression completed successfully")
67
+ backup_path = backup_dir_for(filepath) / (filepath.stem + ".original.md")
68
+ print(f"Compressed: {filepath}")
69
+ print(f"Original: {backup_path}")
70
+ sys.exit(0)
71
+ else:
72
+ print("\n❌ Compression failed after retries")
73
+ sys.exit(2)
74
+
75
+ except KeyboardInterrupt:
76
+ print("\nInterrupted by user")
77
+ sys.exit(130)
78
+
79
+ except Exception as e:
80
+ print(f"\n❌ Error: {e}")
81
+ sys.exit(1)
82
+
83
+
84
+ if __name__ == "__main__":
85
+ main()
@@ -0,0 +1,342 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Caveman Memory Compression Orchestrator
4
+
5
+ Usage:
6
+ python scripts/compress.py <filepath>
7
+ """
8
+
9
+ import os
10
+ import re
11
+ import shutil
12
+ import subprocess
13
+ import sys
14
+ from pathlib import Path
15
+ from typing import List
16
+
17
+ OUTER_FENCE_REGEX = re.compile(
18
+ r"\A\s*(`{3,}|~{3,})[^\n]*\n(.*)\n\1\s*\Z", re.DOTALL
19
+ )
20
+
21
+ # YAML frontmatter: starts at file start with --- on its own line, ends with --- on its own line.
22
+ # Captures the entire block (including delimiters and trailing newline) and the body after.
23
+ FRONTMATTER_REGEX = re.compile(
24
+ r"\A(---\r?\n.*?\r?\n---\r?\n)(.*)", re.DOTALL
25
+ )
26
+
27
+
28
+ def split_frontmatter(text: str):
29
+ """Split YAML frontmatter from body. Returns (frontmatter, body).
30
+
31
+ Memory files (and many other markdown docs) start with a YAML frontmatter
32
+ block delimited by `---` lines. The compression LLM has a habit of stripping
33
+ or rewriting these despite preserve-structure rules in the prompt — so we
34
+ surgically remove the frontmatter before compression and prepend it back
35
+ verbatim to the output. Files without frontmatter pass through unchanged.
36
+ """
37
+ m = FRONTMATTER_REGEX.match(text)
38
+ if m:
39
+ return m.group(1), m.group(2)
40
+ return "", text
41
+
42
+ # Filenames and paths that almost certainly hold secrets or PII. Compressing
43
+ # them ships raw bytes to the Anthropic API — a third-party data boundary that
44
+ # developers on sensitive codebases cannot cross. detect.py already skips .env
45
+ # by extension, but credentials.md / secrets.txt / ~/.aws/credentials would
46
+ # slip through the natural-language filter. This is a hard refuse before read.
47
+ SENSITIVE_BASENAME_REGEX = re.compile(
48
+ r"(?ix)^("
49
+ r"\.env(\..+)?"
50
+ r"|\.netrc"
51
+ r"|credentials(\..+)?"
52
+ r"|secrets?(\..+)?"
53
+ r"|passwords?(\..+)?"
54
+ r"|id_(rsa|dsa|ecdsa|ed25519)(\.pub)?"
55
+ r"|authorized_keys"
56
+ r"|known_hosts"
57
+ r"|.*\.(pem|key|p12|pfx|crt|cer|jks|keystore|asc|gpg)"
58
+ r")$"
59
+ )
60
+
61
+ SENSITIVE_PATH_COMPONENTS = frozenset({".ssh", ".aws", ".gnupg", ".kube", ".docker"})
62
+
63
+ SENSITIVE_NAME_TOKENS = (
64
+ "secret", "credential", "password", "passwd",
65
+ "apikey", "accesskey", "token", "privatekey",
66
+ )
67
+
68
+
69
+ def backup_dir_for(filepath: Path) -> Path:
70
+ """Resolve the out-of-tree backup directory for a given source file.
71
+
72
+ Backups must live OUTSIDE the source directory so skill auto-loaders
73
+ (Claude Code rules/, opencode instructions/, etc.) stop re-ingesting the
74
+ `.original.md` copies as live files. Base dir is platform-aware:
75
+ - Windows: %LOCALAPPDATA%\\caveman-compress\\backups
76
+ - else: $XDG_DATA_HOME/caveman-compress/backups if set,
77
+ else ~/.local/share/caveman-compress/backups
78
+
79
+ The source file's parent-dir name is mirrored under the base to reduce
80
+ cross-project collisions (e.g. two `task.md` files in different repos).
81
+ """
82
+ if os.name == "nt" or sys.platform == "win32":
83
+ local_appdata = os.environ.get("LOCALAPPDATA")
84
+ base = Path(local_appdata) if local_appdata else Path.home() / "AppData" / "Local"
85
+ base = base / "caveman-compress" / "backups"
86
+ else:
87
+ xdg = os.environ.get("XDG_DATA_HOME")
88
+ base = Path(xdg) if xdg else Path.home() / ".local" / "share"
89
+ base = base / "caveman-compress" / "backups"
90
+ return base / filepath.parent.name
91
+
92
+
93
+ def is_sensitive_path(filepath: Path) -> bool:
94
+ """Heuristic denylist for files that must never be shipped to a third-party API."""
95
+ name = filepath.name
96
+ if SENSITIVE_BASENAME_REGEX.match(name):
97
+ return True
98
+ lowered_parts = {p.lower() for p in filepath.parts}
99
+ if lowered_parts & SENSITIVE_PATH_COMPONENTS:
100
+ return True
101
+ # Normalize separators so "api-key" and "api_key" both match "apikey".
102
+ lower = re.sub(r"[_\-\s.]", "", name.lower())
103
+ return any(tok in lower for tok in SENSITIVE_NAME_TOKENS)
104
+
105
+
106
+ def strip_llm_wrapper(text: str) -> str:
107
+ """Strip outer ```markdown ... ``` fence when it wraps the entire output."""
108
+ m = OUTER_FENCE_REGEX.match(text)
109
+ if m:
110
+ return m.group(2)
111
+ return text
112
+
113
+ from .detect import should_compress
114
+ from .validate import validate
115
+
116
+ MAX_RETRIES = 2
117
+
118
+
119
+ # ---------- Claude Calls ----------
120
+
121
+
122
+ def call_claude(prompt: str) -> str:
123
+ """Send a prompt to Claude.
124
+
125
+ Prefers the Anthropic SDK when ANTHROPIC_API_KEY is set; otherwise falls
126
+ back to the ``claude --print`` CLI (which handles desktop auth).
127
+
128
+ On Windows the CLI subprocess decoding defaults to the system codepage
129
+ (cp1251 / cp1252) and crashes on UTF-8 output — see issue #152. Pinning
130
+ ``encoding="utf-8"`` with ``errors="replace"`` matches the CLI's actual
131
+ native I/O and prevents the UnicodeDecodeError before validation can
132
+ report. Windows users with non-ASCII content can also set
133
+ ``ANTHROPIC_API_KEY`` to route through the SDK and skip the subprocess.
134
+ """
135
+ api_key = os.environ.get("ANTHROPIC_API_KEY")
136
+ if api_key:
137
+ try:
138
+ import anthropic
139
+
140
+ client = anthropic.Anthropic(api_key=api_key)
141
+ msg = client.messages.create(
142
+ model=os.environ.get("CAVEMAN_MODEL", "claude-sonnet-4-5"),
143
+ max_tokens=8192,
144
+ messages=[{"role": "user", "content": prompt}],
145
+ )
146
+ return strip_llm_wrapper(msg.content[0].text.strip())
147
+ except ImportError:
148
+ pass # anthropic not installed, fall back to CLI
149
+ # Fallback: use claude CLI (handles desktop auth).
150
+ # Resolve binary via shutil.which so Windows .cmd/.bat shims (e.g.
151
+ # %APPDATA%\npm\claude.CMD) work without shell=True. On POSIX,
152
+ # shutil.which returns the same absolute path as the implicit lookup,
153
+ # so this is a no-op there. Falls back to bare "claude" if not found
154
+ # on PATH so subprocess raises a clear FileNotFoundError.
155
+ claude_bin = shutil.which("claude") or "claude"
156
+ try:
157
+ result = subprocess.run(
158
+ [claude_bin, "--print"],
159
+ input=prompt,
160
+ text=True,
161
+ capture_output=True,
162
+ check=True,
163
+ encoding="utf-8",
164
+ errors="replace",
165
+ )
166
+ return strip_llm_wrapper(result.stdout.strip())
167
+ except subprocess.CalledProcessError as e:
168
+ raise RuntimeError(f"Claude call failed:\n{e.stderr}")
169
+
170
+
171
+ def build_compress_prompt(original: str) -> str:
172
+ return f"""
173
+ Compress this markdown into caveman format.
174
+
175
+ STRICT RULES:
176
+ - Do NOT modify anything inside ``` code blocks
177
+ - Do NOT modify anything inside inline backticks
178
+ - Preserve ALL URLs exactly
179
+ - Preserve ALL headings exactly
180
+ - Preserve file paths and commands
181
+ - Return ONLY the compressed markdown body — do NOT wrap the entire output in a ```markdown fence or any other fence. Inner code blocks from the original stay as-is; do not add a new outer fence around the whole file.
182
+
183
+ Only compress natural language.
184
+
185
+ TEXT:
186
+ {original}
187
+ """
188
+
189
+
190
+ def build_fix_prompt(original: str, compressed: str, errors: List[str]) -> str:
191
+ errors_str = "\n".join(f"- {e}" for e in errors)
192
+ return f"""You are fixing a caveman-compressed markdown file. Specific validation errors were found.
193
+
194
+ CRITICAL RULES:
195
+ - DO NOT recompress or rephrase the file
196
+ - ONLY fix the listed errors — leave everything else exactly as-is
197
+ - The ORIGINAL is provided as reference only (to restore missing content)
198
+ - Preserve caveman style in all untouched sections
199
+
200
+ ERRORS TO FIX:
201
+ {errors_str}
202
+
203
+ HOW TO FIX:
204
+ - Missing URL: find it in ORIGINAL, restore it exactly where it belongs in COMPRESSED
205
+ - Code block mismatch: find the exact code block in ORIGINAL, restore it in COMPRESSED
206
+ - Heading mismatch: restore the exact heading text from ORIGINAL into COMPRESSED
207
+ - Do not touch any section not mentioned in the errors
208
+
209
+ ORIGINAL (reference only):
210
+ {original}
211
+
212
+ COMPRESSED (fix this):
213
+ {compressed}
214
+
215
+ Return ONLY the fixed compressed file. No explanation.
216
+ """
217
+
218
+
219
+ # ---------- Core Logic ----------
220
+
221
+
222
+ def compress_file(filepath: Path) -> bool:
223
+ # Resolve and validate path
224
+ filepath = filepath.resolve()
225
+ MAX_FILE_SIZE = 500_000 # 500KB
226
+ if not filepath.exists():
227
+ raise FileNotFoundError(f"File not found: {filepath}")
228
+ if filepath.stat().st_size > MAX_FILE_SIZE:
229
+ raise ValueError(f"File too large to compress safely (max 500KB): {filepath}")
230
+
231
+ # Refuse files that look like they contain secrets or PII. Compressing ships
232
+ # the raw bytes to the Anthropic API — a third-party boundary — so we fail
233
+ # loudly rather than silently exfiltrate credentials or keys. Override is
234
+ # intentional: the user must rename the file if the heuristic is wrong.
235
+ if is_sensitive_path(filepath):
236
+ raise ValueError(
237
+ f"Refusing to compress {filepath}: filename looks sensitive "
238
+ "(credentials, keys, secrets, or known private paths). "
239
+ "Compression sends file contents to the Anthropic API. "
240
+ "Rename the file if this is a false positive."
241
+ )
242
+
243
+ print(f"Processing: {filepath}")
244
+
245
+ if not should_compress(filepath):
246
+ print("Skipping (not natural language)")
247
+ return False
248
+
249
+ original_text = filepath.read_text(errors="ignore")
250
+ # Store backup outside the source directory so skill auto-loaders don't
251
+ # re-ingest the `.original.md` copy as a live file. Mirror the source's
252
+ # parent-dir name + stem under a platform-aware base to reduce collisions.
253
+ backup_dir = backup_dir_for(filepath)
254
+ backup_dir.mkdir(parents=True, exist_ok=True)
255
+ backup_path = backup_dir / (filepath.stem + ".original.md")
256
+
257
+ if not original_text.strip():
258
+ print("❌ Refusing to compress: file is empty or whitespace-only.")
259
+ return False
260
+
261
+ # Check if backup already exists to prevent accidental overwriting
262
+ if backup_path.exists():
263
+ print(f"⚠️ Backup file already exists: {backup_path}")
264
+ print("The original backup may contain important content.")
265
+ print("Aborting to prevent data loss. Please remove or rename the backup file if you want to proceed.")
266
+ return False
267
+
268
+ # Split YAML frontmatter off before compression. Claude tends to strip or
269
+ # rewrite frontmatter despite preserve-structure rules; we keep it verbatim
270
+ # by removing it from the input and re-prepending it to the output.
271
+ frontmatter, body = split_frontmatter(original_text)
272
+ if frontmatter:
273
+ print(f"Detected YAML frontmatter ({len(frontmatter)} chars) — preserving verbatim")
274
+
275
+ if not body.strip():
276
+ print("❌ Refusing to compress: body is empty after frontmatter removal.")
277
+ return False
278
+
279
+ # Step 1: Compress (body only, frontmatter excluded)
280
+ print("Compressing with Claude...")
281
+ compressed_body = call_claude(build_compress_prompt(body))
282
+
283
+ if compressed_body is None or not compressed_body.strip():
284
+ print("❌ Compression aborted: Claude returned an empty response.")
285
+ print(" Original file is untouched (no backup created).")
286
+ return False
287
+
288
+ # Compare the BODY (not the whole file) — frontmatter is preserved verbatim
289
+ # and would never change, so identity must be judged on the compressible part.
290
+ if compressed_body.strip() == body.strip():
291
+ print("❌ Compression aborted: output is identical to input.")
292
+ print(" Likely causes: Claude refused, returned the prompt verbatim, or the file is")
293
+ print(" already in caveman form. Original file is untouched (no backup created).")
294
+ return False
295
+
296
+ # Reassemble: frontmatter (verbatim) + compressed body
297
+ compressed = frontmatter + compressed_body
298
+
299
+ # Save original as backup, then verify the backup readback before
300
+ # touching the input file. If the filesystem dropped bytes (encoding,
301
+ # antivirus, disk full), unlink the bad backup and abort instead of
302
+ # leaving the user with a corrupt backup + compressed primary.
303
+ backup_path.write_text(original_text)
304
+ backup_readback = backup_path.read_text(errors="ignore")
305
+ if backup_readback != original_text:
306
+ print(f"❌ Backup write verification failed: {backup_path}")
307
+ print(" In-memory original differs from on-disk backup. Aborting before touching the input file.")
308
+ try:
309
+ backup_path.unlink()
310
+ except OSError:
311
+ pass
312
+ return False
313
+ filepath.write_text(compressed)
314
+
315
+ # Step 2: Validate + Retry
316
+ for attempt in range(MAX_RETRIES):
317
+ print(f"\nValidation attempt {attempt + 1}")
318
+
319
+ result = validate(backup_path, filepath)
320
+
321
+ if result.is_valid:
322
+ print("Validation passed")
323
+ break
324
+
325
+ print("❌ Validation failed:")
326
+ for err in result.errors:
327
+ print(f" - {err}")
328
+
329
+ if attempt == MAX_RETRIES - 1:
330
+ # Restore original on failure
331
+ filepath.write_text(original_text)
332
+ backup_path.unlink(missing_ok=True)
333
+ print("❌ Failed after retries — original restored")
334
+ return False
335
+
336
+ print("Fixing with Claude...")
337
+ compressed = call_claude(
338
+ build_fix_prompt(original_text, compressed, result.errors)
339
+ )
340
+ filepath.write_text(compressed)
341
+
342
+ return True
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env python3
2
+ """Detect whether a file is natural language (compressible) or code/config (skip)."""
3
+
4
+ import json
5
+ import re
6
+ from pathlib import Path
7
+
8
+ # Extensions that are natural language and compressible
9
+ COMPRESSIBLE_EXTENSIONS = {".md", ".txt", ".markdown", ".rst", ".typ", ".typst", ".tex"}
10
+
11
+ # Extensions that are code/config and should be skipped
12
+ SKIP_EXTENSIONS = {
13
+ ".py", ".js", ".ts", ".tsx", ".jsx", ".json", ".yaml", ".yml",
14
+ ".toml", ".env", ".lock", ".css", ".scss", ".html", ".xml",
15
+ ".sql", ".sh", ".bash", ".zsh", ".go", ".rs", ".java", ".c",
16
+ ".cpp", ".h", ".hpp", ".rb", ".php", ".swift", ".kt", ".lua",
17
+ ".dockerfile", ".makefile", ".csv", ".ini", ".cfg",
18
+ }
19
+
20
+ # Patterns that indicate a line is code
21
+ CODE_PATTERNS = [
22
+ re.compile(r"^\s*(import |from .+ import |require\(|const |let |var )"),
23
+ re.compile(r"^\s*(def |class |function |async function |export )"),
24
+ re.compile(r"^\s*(if\s*\(|for\s*\(|while\s*\(|switch\s*\(|try\s*\{)"),
25
+ re.compile(r"^\s*[\}\]\);]+\s*$"), # closing braces/brackets
26
+ re.compile(r"^\s*@\w+"), # decorators/annotations
27
+ re.compile(r'^\s*"[^"]+"\s*:\s*'), # JSON-like key-value
28
+ re.compile(r"^\s*\w+\s*=\s*[{\[\(\"']"), # assignment with literal
29
+ ]
30
+
31
+
32
+ def _is_code_line(line: str) -> bool:
33
+ """Check if a line looks like code."""
34
+ return any(p.match(line) for p in CODE_PATTERNS)
35
+
36
+
37
+ def _is_json_content(text: str) -> bool:
38
+ """Check if content is valid JSON."""
39
+ try:
40
+ json.loads(text)
41
+ return True
42
+ except (json.JSONDecodeError, ValueError):
43
+ return False
44
+
45
+
46
+ def _is_yaml_content(lines: list[str]) -> bool:
47
+ """Heuristic: check if content looks like YAML."""
48
+ yaml_indicators = 0
49
+ for line in lines[:30]:
50
+ stripped = line.strip()
51
+ if stripped.startswith("---"):
52
+ yaml_indicators += 1
53
+ elif re.match(r"^\w[\w\s]*:\s", stripped):
54
+ yaml_indicators += 1
55
+ elif stripped.startswith("- ") and ":" in stripped:
56
+ yaml_indicators += 1
57
+ # If most non-empty lines look like YAML
58
+ non_empty = sum(1 for l in lines[:30] if l.strip())
59
+ return non_empty > 0 and yaml_indicators / non_empty > 0.6
60
+
61
+
62
+ def detect_file_type(filepath: Path) -> str:
63
+ """Classify a file as 'natural_language', 'code', 'config', or 'unknown'.
64
+
65
+ Returns:
66
+ One of: 'natural_language', 'code', 'config', 'unknown'
67
+ """
68
+ ext = filepath.suffix.lower()
69
+
70
+ # Extension-based classification
71
+ if ext in COMPRESSIBLE_EXTENSIONS:
72
+ return "natural_language"
73
+ if ext in SKIP_EXTENSIONS:
74
+ return "code" if ext not in {".json", ".yaml", ".yml", ".toml", ".ini", ".cfg", ".env"} else "config"
75
+
76
+ # Extensionless files (like CLAUDE.md, TODO) — check content
77
+ if not ext:
78
+ try:
79
+ text = filepath.read_text(errors="ignore")
80
+ except (OSError, PermissionError):
81
+ return "unknown"
82
+
83
+ lines = text.splitlines()[:50]
84
+
85
+ if _is_json_content(text[:10000]):
86
+ return "config"
87
+ if _is_yaml_content(lines):
88
+ return "config"
89
+
90
+ code_lines = sum(1 for l in lines if l.strip() and _is_code_line(l))
91
+ non_empty = sum(1 for l in lines if l.strip())
92
+ if non_empty > 0 and code_lines / non_empty > 0.4:
93
+ return "code"
94
+
95
+ return "natural_language"
96
+
97
+ return "unknown"
98
+
99
+
100
+ def should_compress(filepath: Path) -> bool:
101
+ """Return True if the file is natural language and should be compressed."""
102
+ if not filepath.is_file():
103
+ return False
104
+ # Skip backup files
105
+ if filepath.name.endswith(".original.md"):
106
+ return False
107
+ return detect_file_type(filepath) == "natural_language"
108
+
109
+
110
+ if __name__ == "__main__":
111
+ import sys
112
+
113
+ if len(sys.argv) < 2:
114
+ print("Usage: python detect.py <file1> [file2] ...")
115
+ sys.exit(1)
116
+
117
+ for path_str in sys.argv[1:]:
118
+ p = Path(path_str).resolve()
119
+ file_type = detect_file_type(p)
120
+ compress = should_compress(p)
121
+ print(f" {p.name:30s} type={file_type:20s} compress={compress}")