@kulapard/pi-caveman 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +12 -9
- package/CHANGELOG.md +71 -0
- package/README.md +10 -20
- package/extensions/caveman-core.ts +1 -1
- package/extensions/caveman.ts +8 -9
- package/package.json +4 -7
- package/skills/caveman/SKILL.md +1 -1
- package/skills/caveman-compress/README.md +10 -31
- package/skills/caveman-compress/SKILL.md +15 -18
- package/skills/caveman-help/SKILL.md +3 -2
- package/skills/caveman-compress/SECURITY.md +0 -31
- package/skills/caveman-compress/scripts/__init__.py +0 -9
- package/skills/caveman-compress/scripts/__main__.py +0 -3
- package/skills/caveman-compress/scripts/benchmark.py +0 -80
- package/skills/caveman-compress/scripts/cli.py +0 -85
- package/skills/caveman-compress/scripts/compress.py +0 -341
- package/skills/caveman-compress/scripts/detect.py +0 -169
- package/skills/caveman-compress/scripts/validate.py +0 -213
|
@@ -1,213 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
import re
|
|
3
|
-
from collections import Counter
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
URL_REGEX = re.compile(r"https?://[^\s)]+")
|
|
7
|
-
FENCE_OPEN_REGEX = re.compile(r"^(\s{0,3})(`{3,}|~{3,})(.*)$")
|
|
8
|
-
HEADING_REGEX = re.compile(r"^(#{1,6})\s+(.*)", re.MULTILINE)
|
|
9
|
-
BULLET_REGEX = re.compile(r"^\s*[-*+]\s+", re.MULTILINE)
|
|
10
|
-
|
|
11
|
-
# crude but effective path detection
|
|
12
|
-
# Requires either a path prefix (./ ../ / or drive letter) or a slash/backslash within the match
|
|
13
|
-
PATH_REGEX = re.compile(r"(?:\./|\.\./|/|[A-Za-z]:\\)[\w\-/\\\.]+|[\w\-\.]+[/\\][\w\-/\\\.]+")
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class ValidationResult:
|
|
17
|
-
def __init__(self):
|
|
18
|
-
self.is_valid = True
|
|
19
|
-
self.errors = []
|
|
20
|
-
self.warnings = []
|
|
21
|
-
|
|
22
|
-
def add_error(self, msg):
|
|
23
|
-
self.is_valid = False
|
|
24
|
-
self.errors.append(msg)
|
|
25
|
-
|
|
26
|
-
def add_warning(self, msg):
|
|
27
|
-
self.warnings.append(msg)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def read_file(path: Path) -> str:
|
|
31
|
-
return path.read_text(errors="ignore")
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# ---------- Extractors ----------
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def extract_headings(text):
|
|
38
|
-
return [(level, title.strip()) for level, title in HEADING_REGEX.findall(text)]
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def extract_code_blocks(text):
|
|
42
|
-
"""Line-based fenced code block extractor.
|
|
43
|
-
|
|
44
|
-
Handles ``` and ~~~ fences with variable length (CommonMark: closing
|
|
45
|
-
fence must use same char and be at least as long as opening). Supports
|
|
46
|
-
nested fences (e.g. an outer 4-backtick block wrapping inner 3-backtick
|
|
47
|
-
content).
|
|
48
|
-
"""
|
|
49
|
-
blocks = []
|
|
50
|
-
lines = text.split("\n")
|
|
51
|
-
i = 0
|
|
52
|
-
n = len(lines)
|
|
53
|
-
while i < n:
|
|
54
|
-
m = FENCE_OPEN_REGEX.match(lines[i])
|
|
55
|
-
if not m:
|
|
56
|
-
i += 1
|
|
57
|
-
continue
|
|
58
|
-
fence_char = m.group(2)[0]
|
|
59
|
-
fence_len = len(m.group(2))
|
|
60
|
-
open_line = lines[i]
|
|
61
|
-
block_lines = [open_line]
|
|
62
|
-
i += 1
|
|
63
|
-
closed = False
|
|
64
|
-
while i < n:
|
|
65
|
-
close_m = FENCE_OPEN_REGEX.match(lines[i])
|
|
66
|
-
if (
|
|
67
|
-
close_m
|
|
68
|
-
and close_m.group(2)[0] == fence_char
|
|
69
|
-
and len(close_m.group(2)) >= fence_len
|
|
70
|
-
and close_m.group(3).strip() == ""
|
|
71
|
-
):
|
|
72
|
-
block_lines.append(lines[i])
|
|
73
|
-
closed = True
|
|
74
|
-
i += 1
|
|
75
|
-
break
|
|
76
|
-
block_lines.append(lines[i])
|
|
77
|
-
i += 1
|
|
78
|
-
if closed:
|
|
79
|
-
blocks.append("\n".join(block_lines))
|
|
80
|
-
# Unclosed fences are silently skipped — they indicate malformed markdown
|
|
81
|
-
# and including them would cause false-positive validation failures.
|
|
82
|
-
return blocks
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def extract_urls(text):
|
|
86
|
-
return set(URL_REGEX.findall(text))
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def extract_paths(text):
|
|
90
|
-
return set(PATH_REGEX.findall(text))
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def count_bullets(text):
|
|
94
|
-
return len(BULLET_REGEX.findall(text))
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def extract_inline_codes(text):
|
|
98
|
-
text_without_fences = re.sub(r"^```[\s\S]*?^```", "", text, flags=re.MULTILINE)
|
|
99
|
-
text_without_fences = re.sub(r"^~~~[\s\S]*?^~~~", "", text_without_fences, flags=re.MULTILINE)
|
|
100
|
-
return re.findall(r"`([^`]+)`", text_without_fences)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
# ---------- Validators ----------
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def validate_headings(orig, comp, result):
|
|
107
|
-
h1 = extract_headings(orig)
|
|
108
|
-
h2 = extract_headings(comp)
|
|
109
|
-
|
|
110
|
-
if len(h1) != len(h2):
|
|
111
|
-
result.add_error(f"Heading count mismatch: {len(h1)} vs {len(h2)}")
|
|
112
|
-
|
|
113
|
-
if h1 != h2:
|
|
114
|
-
result.add_warning("Heading text/order changed")
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def validate_code_blocks(orig, comp, result):
|
|
118
|
-
c1 = extract_code_blocks(orig)
|
|
119
|
-
c2 = extract_code_blocks(comp)
|
|
120
|
-
|
|
121
|
-
if c1 != c2:
|
|
122
|
-
result.add_error("Code blocks not preserved exactly")
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def validate_urls(orig, comp, result):
|
|
126
|
-
u1 = extract_urls(orig)
|
|
127
|
-
u2 = extract_urls(comp)
|
|
128
|
-
|
|
129
|
-
if u1 != u2:
|
|
130
|
-
result.add_error(f"URL mismatch: lost={u1 - u2}, added={u2 - u1}")
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def validate_paths(orig, comp, result):
|
|
134
|
-
p1 = extract_paths(orig)
|
|
135
|
-
p2 = extract_paths(comp)
|
|
136
|
-
|
|
137
|
-
if p1 != p2:
|
|
138
|
-
result.add_warning(f"Path mismatch: lost={p1 - p2}, added={p2 - p1}")
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def validate_bullets(orig, comp, result):
|
|
142
|
-
b1 = count_bullets(orig)
|
|
143
|
-
b2 = count_bullets(comp)
|
|
144
|
-
|
|
145
|
-
if b1 == 0:
|
|
146
|
-
return
|
|
147
|
-
|
|
148
|
-
diff = abs(b1 - b2) / b1
|
|
149
|
-
|
|
150
|
-
if diff > 0.15:
|
|
151
|
-
result.add_warning(f"Bullet count changed too much: {b1} -> {b2}")
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def validate_inline_codes(orig, comp, result):
|
|
155
|
-
c1 = Counter(extract_inline_codes(orig))
|
|
156
|
-
c2 = Counter(extract_inline_codes(comp))
|
|
157
|
-
|
|
158
|
-
if c1 != c2:
|
|
159
|
-
lost = set(c1.keys()) - set(c2.keys())
|
|
160
|
-
added = set(c2.keys()) - set(c1.keys())
|
|
161
|
-
for code, count in c1.items():
|
|
162
|
-
if code in c2 and c2[code] < count:
|
|
163
|
-
lost.add(f"{code} (lost {count - c2[code]} of {count} occurrences)")
|
|
164
|
-
if lost:
|
|
165
|
-
result.add_error(f"Inline code lost: {lost}")
|
|
166
|
-
if added:
|
|
167
|
-
result.add_warning(f"Inline code added: {added}")
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
# ---------- Main ----------
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def validate(original_path: Path, compressed_path: Path) -> ValidationResult:
|
|
174
|
-
result = ValidationResult()
|
|
175
|
-
|
|
176
|
-
orig = read_file(original_path)
|
|
177
|
-
comp = read_file(compressed_path)
|
|
178
|
-
|
|
179
|
-
validate_headings(orig, comp, result)
|
|
180
|
-
validate_code_blocks(orig, comp, result)
|
|
181
|
-
validate_urls(orig, comp, result)
|
|
182
|
-
validate_paths(orig, comp, result)
|
|
183
|
-
validate_bullets(orig, comp, result)
|
|
184
|
-
validate_inline_codes(orig, comp, result)
|
|
185
|
-
|
|
186
|
-
return result
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
# ---------- CLI ----------
|
|
190
|
-
|
|
191
|
-
if __name__ == "__main__":
|
|
192
|
-
import sys
|
|
193
|
-
|
|
194
|
-
if len(sys.argv) != 3:
|
|
195
|
-
print("Usage: python validate.py <original> <compressed>")
|
|
196
|
-
sys.exit(1)
|
|
197
|
-
|
|
198
|
-
orig = Path(sys.argv[1]).resolve()
|
|
199
|
-
comp = Path(sys.argv[2]).resolve()
|
|
200
|
-
|
|
201
|
-
res = validate(orig, comp)
|
|
202
|
-
|
|
203
|
-
print(f"\nValid: {res.is_valid}")
|
|
204
|
-
|
|
205
|
-
if res.errors:
|
|
206
|
-
print("\nErrors:")
|
|
207
|
-
for e in res.errors:
|
|
208
|
-
print(f" - {e}")
|
|
209
|
-
|
|
210
|
-
if res.warnings:
|
|
211
|
-
print("\nWarnings:")
|
|
212
|
-
for w in res.warnings:
|
|
213
|
-
print(f" - {w}")
|