@tw93/waza 3.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +206 -0
  3. package/package.json +35 -0
  4. package/rules/anti-patterns.md +38 -0
  5. package/rules/chinese.md +18 -0
  6. package/rules/durable-context.md +27 -0
  7. package/rules/english.md +14 -0
  8. package/scripts/build_metadata.py +360 -0
  9. package/scripts/check_routing_drift.py +82 -0
  10. package/scripts/dispatcher-template.md +43 -0
  11. package/scripts/dispatcher.md +53 -0
  12. package/scripts/package-skill.sh +71 -0
  13. package/scripts/packaging_filter.py +55 -0
  14. package/scripts/setup-rule.sh +109 -0
  15. package/scripts/setup-statusline.sh +127 -0
  16. package/scripts/skill_checks.py +483 -0
  17. package/scripts/skill_frontmatter.py +110 -0
  18. package/scripts/statusline.sh +321 -0
  19. package/scripts/validate_package.py +66 -0
  20. package/scripts/verify_skills.py +100 -0
  21. package/skills/RESOLVER.md +91 -0
  22. package/skills/check/SKILL.md +338 -0
  23. package/skills/check/agents/reviewer-architecture.md +39 -0
  24. package/skills/check/agents/reviewer-security.md +39 -0
  25. package/skills/check/references/persona-catalog.md +56 -0
  26. package/skills/check/references/project-context.md +107 -0
  27. package/skills/check/references/public-reply.md +14 -0
  28. package/skills/check/scripts/audit_signals.py +485 -0
  29. package/skills/check/scripts/run-tests.sh +19 -0
  30. package/skills/design/SKILL.md +134 -0
  31. package/skills/design/references/design-aesthetic-quality.md +67 -0
  32. package/skills/design/references/design-data-viz.md +34 -0
  33. package/skills/design/references/design-reference.md +278 -0
  34. package/skills/design/references/design-tokens.md +53 -0
  35. package/skills/design/references/design-traps.md +43 -0
  36. package/skills/health/SKILL.md +231 -0
  37. package/skills/health/agents/inspector-context.md +119 -0
  38. package/skills/health/agents/inspector-control.md +84 -0
  39. package/skills/health/agents/inspector-maintainability.md +55 -0
  40. package/skills/health/scripts/check-agent-context.sh +5 -0
  41. package/skills/health/scripts/check-doc-refs.sh +8 -0
  42. package/skills/health/scripts/check-maintainability.sh +8 -0
  43. package/skills/health/scripts/check-verifier-output.sh +5 -0
  44. package/skills/health/scripts/check_agent_context.py +407 -0
  45. package/skills/health/scripts/check_doc_refs.py +110 -0
  46. package/skills/health/scripts/check_maintainability.py +629 -0
  47. package/skills/health/scripts/check_verifier_output.py +116 -0
  48. package/skills/health/scripts/collect-data.sh +760 -0
  49. package/skills/hunt/SKILL.md +197 -0
  50. package/skills/hunt/references/failure-patterns.md +75 -0
  51. package/skills/hunt/references/ime-unicode.md +58 -0
  52. package/skills/hunt/references/logging-techniques.md +72 -0
  53. package/skills/hunt/references/rendering-debug.md +34 -0
  54. package/skills/learn/SKILL.md +128 -0
  55. package/skills/read/SKILL.md +108 -0
  56. package/skills/read/references/read-methods.md +110 -0
  57. package/skills/read/references/save-paths.md +33 -0
  58. package/skills/read/scripts/fetch.sh +105 -0
  59. package/skills/read/scripts/fetch_feishu.py +246 -0
  60. package/skills/read/scripts/fetch_local.py +218 -0
  61. package/skills/read/scripts/fetch_weixin.py +107 -0
  62. package/skills/think/SKILL.md +155 -0
  63. package/skills/write/SKILL.md +129 -0
  64. package/skills/write/references/write-en.md +197 -0
  65. package/skills/write/references/write-zh-bilingual.md +60 -0
  66. package/skills/write/references/write-zh-prose.md +48 -0
  67. package/skills/write/references/write-zh-release-notes.md +38 -0
  68. package/skills/write/references/write-zh.md +645 -0
@@ -0,0 +1,483 @@
1
+ """Validation checks for Waza skills.
2
+
3
+ Each function takes the repository root (and pre-discovered skill metadata
4
+ where useful) and either prints `ok:` lines or calls `fail()`. No side effects
5
+ beyond stdout/stderr. Driver lives in `verify_skills.py`.
6
+
7
+ Split out of verify_skills.py so the check functions can be imported and
8
+ exercised by pytest unit tests without invoking the argparse driver.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import re
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ from skill_frontmatter import fail, parse_frontmatter, parse_when_to_use_keywords
19
+
20
+
21
+ REF_PATTERN = re.compile(r'(?<![/.])\b(?:references|agents|scripts)/[\w/.-]+\b')
22
+ SCRIPT_VAR_PATTERN = re.compile(r'\}/scripts/([\w/.-]+)')
23
+ LINK_RE = re.compile(r'\[[^\]]*\]\(([^)]+)\)')
24
+ URL_PREFIXES = ("http://", "https://", "mailto:", "ftp://", "tel:", "data:")
25
+ SEP_RE = re.compile(r'^[\s|:\-]+$')
26
+ PERSONAL_PATH_PATTERN = re.compile(r'/(?:Users|home)/[A-Za-z0-9._-]+/')
27
+ SKILL_REF_RE = re.compile(r'skills/([a-z][a-z0-9_-]*)/SKILL\.md')
28
+
29
+ DURABLE_CONTEXT_SKILLS = {"think", "check", "hunt", "design", "write", "health"}
30
+
31
+ NINJA_PREFIX = "Prefix your first line with 🥷 inline, not as its own paragraph."
32
+
33
+ # Attribution strings that indicate AI co-authorship leaked into tracked files.
34
+ ATTRIBUTION_PATTERNS = (
35
+ "Co-Authored-By: Claude",
36
+ "Co-authored-by: Cursor",
37
+ "noreply@anthropic.com",
38
+ "cursoragent@cursor.com",
39
+ )
40
+
41
+
42
+ def pipe_count(s: str) -> int:
43
+ n, tick, i = 0, False, 0
44
+ while i < len(s):
45
+ if s[i] == "\\" and i + 1 < len(s):
46
+ i += 2
47
+ continue
48
+ if s[i] == "`":
49
+ tick = not tick
50
+ elif s[i] == "|" and not tick:
51
+ n += 1
52
+ i += 1
53
+ return n
54
+
55
+
56
+ def check_skill_files(root: Path):
57
+ skill_files = sorted((root / "skills").glob("*/SKILL.md"))
58
+ if not skill_files:
59
+ fail("NO SKILLS FOUND: expected skills/*/SKILL.md")
60
+ skill_descriptions: dict[str, str] = {}
61
+ skill_keywords: dict[str, set[str]] = {}
62
+ for path in skill_files:
63
+ skill_dir = path.parent.name
64
+ fields = parse_frontmatter(path)
65
+ if fields["name"] != skill_dir:
66
+ fail(f"NAME MISMATCH: {path} frontmatter name={fields['name']} dir={skill_dir}")
67
+ if NINJA_PREFIX not in path.read_text():
68
+ fail(
69
+ f"MISSING NINJA PREFIX INSTRUCTION: {path}\n"
70
+ f" Every SKILL.md must carry this exact line:\n"
71
+ f" {NINJA_PREFIX}"
72
+ )
73
+ if not fields["dispatch_intent"]:
74
+ fail(
75
+ f"MISSING dispatch_intent: in {path}\n"
76
+ f" Every skill needs a dispatch_intent line. It feeds the dispatcher "
77
+ f"routing table emitted by scripts/build_metadata.py."
78
+ )
79
+ skill_descriptions[skill_dir] = fields["description"]
80
+ skill_keywords[skill_dir] = parse_when_to_use_keywords(fields["when_to_use"])
81
+ print(f"ok: {path.as_posix()}")
82
+ return skill_files, skill_descriptions, skill_keywords
83
+
84
+
85
+ def check_marketplace(root: Path, expected_version: str, skill_names: set[str], skill_descriptions: dict[str, str]):
86
+ """Validate marketplace.json shape:
87
+
88
+ - One bundle entry: name == "waza", source == "./".
89
+ - Per-skill entries: name == "waza-<skill>", source == "./skills/<skill>".
90
+ - All versions in marketplace march in lock-step with the top-level VERSION
91
+ file. Source of truth is VERSION; per-skill SKILL.md no longer carries a
92
+ version field (codegen + this check guarantee marketplace stays correct).
93
+ """
94
+ market_path = root / ".claude-plugin" / "marketplace.json"
95
+ marketplace = json.loads(market_path.read_text())
96
+ plugins = marketplace.get("plugins")
97
+ if not isinstance(plugins, list):
98
+ fail("INVALID MARKETPLACE: plugins must be a list")
99
+
100
+ market_versions: dict[str, str] = {}
101
+ market_descriptions: dict[str, str] = {}
102
+ seen_names: set[str] = set()
103
+ bundle_version = ""
104
+ for entry in plugins:
105
+ if not isinstance(entry, dict):
106
+ fail("INVALID MARKETPLACE: plugin entry must be an object")
107
+ name = entry.get("name")
108
+ version = entry.get("version")
109
+ source = entry.get("source")
110
+ description = (entry.get("description") or "").strip().strip('"')
111
+ if not name or not version:
112
+ fail("INVALID MARKETPLACE: every plugin needs name and version")
113
+ if not description:
114
+ fail(f"MISSING DESCRIPTION: marketplace plugin {name}")
115
+ if name in seen_names:
116
+ fail(f"DUPLICATE MARKETPLACE ENTRY: {name}")
117
+ seen_names.add(name)
118
+
119
+ if name == "waza":
120
+ if source != "./":
121
+ fail(f"WRONG BUNDLE SOURCE: source={source!r} expected='./'")
122
+ bundle_version = version
123
+ continue
124
+
125
+ if not name.startswith("waza-"):
126
+ fail(
127
+ f"INVALID PLUGIN NAME: {name!r} must be 'waza' (bundle) or "
128
+ f"'waza-<skill>' (per-skill entry)"
129
+ )
130
+ skill_name = name.removeprefix("waza-")
131
+ if not skill_name:
132
+ fail(
133
+ f"INVALID PLUGIN NAME: {name!r} has an empty <skill> suffix; "
134
+ f"per-skill entries must be named 'waza-<skill>' with a non-empty skill name"
135
+ )
136
+ expected_source = f"./skills/{skill_name}"
137
+ if source != expected_source:
138
+ fail(f"WRONG SOURCE: {name} source={source!r} expected={expected_source!r}")
139
+ market_versions[skill_name] = version
140
+ market_descriptions[skill_name] = description
141
+
142
+ if "waza" not in seen_names:
143
+ fail(
144
+ "MISSING BUNDLE ENTRY: marketplace.json must include a 'waza' bundle entry "
145
+ "(name=\"waza\", source=\"./\") so /plugin install waza@waza registers "
146
+ "all skills under the waza namespace"
147
+ )
148
+
149
+ missing_from_market = sorted(skill_names - set(market_versions))
150
+ if missing_from_market:
151
+ fail("NOT IN MARKETPLACE: " + ", ".join(missing_from_market))
152
+ extra_in_market = sorted(set(market_versions) - skill_names)
153
+ if extra_in_market:
154
+ fail("MISSING SKILL DIRECTORY: " + ", ".join(extra_in_market))
155
+
156
+ for skill in sorted(skill_names):
157
+ market_version = market_versions[skill]
158
+ if market_version != expected_version:
159
+ fail(
160
+ f"VERSION DRIFT: marketplace waza-{skill} version={market_version!r} "
161
+ f"!= VERSION file {expected_version!r}.\n"
162
+ f" All marketplace entries march in lock-step. "
163
+ f"Update .claude-plugin/marketplace.json to match VERSION."
164
+ )
165
+ if not market_descriptions[skill].startswith(skill_descriptions[skill]):
166
+ fail(
167
+ f"DESCRIPTION MISMATCH: {skill}\n"
168
+ f" SKILL.md: {skill_descriptions[skill]}\n"
169
+ f" marketplace: {market_descriptions[skill]}\n"
170
+ f" marketplace description must start with the SKILL.md description"
171
+ )
172
+ print(f"ok: marketplace waza-{skill} pinned to {market_version}")
173
+
174
+ if bundle_version and bundle_version != expected_version:
175
+ fail(
176
+ f"VERSION DRIFT: waza bundle version={bundle_version!r} "
177
+ f"!= VERSION file {expected_version!r}.\n"
178
+ f" Update the 'waza' entry in .claude-plugin/marketplace.json to match VERSION."
179
+ )
180
+ print(f"ok: all versions in lock-step with VERSION={expected_version}")
181
+
182
+
183
+ def check_references(root: Path, skill_files: list[Path]):
184
+ for path in skill_files:
185
+ skill_dir = path.parent.name
186
+ text = path.read_text()
187
+ refs = set(REF_PATTERN.findall(text))
188
+ refs |= {"scripts/" + s for s in SCRIPT_VAR_PATTERN.findall(text)}
189
+ for ref in sorted(refs):
190
+ expected = root / "skills" / skill_dir / ref
191
+ if not expected.exists():
192
+ fail(f"BROKEN REFERENCE: {path} references {ref} but file does not exist")
193
+ print(f"ok: reference {skill_dir}/{ref}")
194
+
195
+
196
+ def check_description_conformance(skill_descriptions: dict[str, str]):
197
+ """Every skill needs a triggerable opening, a 'Use when' cue, a 'Not for' exclusion, and a sane length.
198
+
199
+ Locks the convention so new skills can't drift into vague descriptions that
200
+ agent resolvers can't match before they read when_to_use.
201
+ """
202
+ for skill, description in sorted(skill_descriptions.items()):
203
+ clean = description.strip().strip('"')
204
+ length = len(clean)
205
+ if length < 40:
206
+ fail(f"DESCRIPTION TOO SHORT: {skill} ({length} chars); need >=40 for reliable resolver matching")
207
+ if length > 500:
208
+ fail(f"DESCRIPTION TOO LONG: {skill} ({length} chars); trim to <=500 to keep the resolver index light")
209
+ first_word = clean.split()[0].lower() if clean.split() else ""
210
+ if first_word in ("the", "a", "an", "this", "it"):
211
+ fail(
212
+ f"DESCRIPTION STARTS WITH ARTICLE: {skill}\n"
213
+ f" Start with a verb or action phrase (third-person). Got: {clean[:60]!r}"
214
+ )
215
+ if "use when" not in clean.lower():
216
+ fail(
217
+ f"DESCRIPTION MISSING USE-WHEN CUE: {skill}\n"
218
+ f" Description must include a 'Use when ...' trigger phrase because "
219
+ f"some agent runtimes see description before when_to_use. Got: {clean[:120]!r}"
220
+ )
221
+ if "not for" not in clean.lower():
222
+ fail(
223
+ f"DESCRIPTION MISSING EXCLUSION CLAUSE: {skill}\n"
224
+ f" Must contain a 'Not for ...' clause so the resolver learns when NOT to fire. Got: {clean[:120]!r}"
225
+ )
226
+ print(f"ok: description {skill} ({length} chars)")
227
+
228
+
229
+ def check_durable_context_and_paths(root: Path, skill_files: list[Path]):
230
+ """Durable context rules must stay portable and evidence-bound.
231
+
232
+ Each skill in DURABLE_CONTEXT_SKILLS links to rules/durable-context.md for the
233
+ shared preamble (when to read, read order, type mapping) and then adds
234
+ skill-specific guidance with current-state override evidence. The shared
235
+ rules file itself is checked once for the "raw transcripts" guard.
236
+ """
237
+ rules_path = root / "rules" / "durable-context.md"
238
+ if not rules_path.exists():
239
+ fail(
240
+ f"MISSING SHARED RULE: {rules_path}\n"
241
+ f" Durable context preamble must live at rules/durable-context.md."
242
+ )
243
+ rules_text = rules_path.read_text().lower()
244
+ if "raw transcripts" not in rules_text:
245
+ fail(
246
+ f"SHARED RULE MAY OVERREAD: {rules_path}\n"
247
+ f" rules/durable-context.md must forbid reading raw transcripts by default."
248
+ )
249
+ print("ok: rules/durable-context.md forbids raw transcripts")
250
+
251
+ for path in skill_files:
252
+ skill = path.parent.name
253
+ text = path.read_text()
254
+ if PERSONAL_PATH_PATTERN.search(text):
255
+ fail(
256
+ f"PERSONAL ABSOLUTE PATH IN SKILL: {path}\n"
257
+ f" Skill docs must not hard-code personal home-directory paths. "
258
+ f"Use user-provided paths, project-relative paths, or resolver commands instead."
259
+ )
260
+
261
+ has_section = "## Durable Context Preflight" in text
262
+ if skill in DURABLE_CONTEXT_SKILLS and not has_section:
263
+ fail(
264
+ f"MISSING DURABLE CONTEXT PREFLIGHT: {path}\n"
265
+ f" This skill must explain how to consume optional memory/preview context."
266
+ )
267
+ if not has_section:
268
+ continue
269
+
270
+ section = text.split("## Durable Context Preflight", 1)[1]
271
+ section = section.split("\n## ", 1)[0]
272
+ section_lower = section.lower()
273
+ if "rules/durable-context.md" not in section:
274
+ fail(
275
+ f"DURABLE CONTEXT MISSING SHARED REFERENCE: {path}\n"
276
+ f" Section must link to rules/durable-context.md for the shared preamble."
277
+ )
278
+ if "current" not in section_lower or "override" not in section_lower:
279
+ fail(
280
+ f"DURABLE CONTEXT NOT EVIDENCE-BOUND: {path}\n"
281
+ f" Skill-specific paragraph must name what current state overrides memory."
282
+ )
283
+ print(f"ok: durable context preflight for {skill}")
284
+
285
+
286
+ def check_resolver(root: Path, skill_names: set[str]):
287
+ """Every skill must be referenced from skills/RESOLVER.md.
288
+
289
+ Keeps the human-readable index in lock-step with the SKILL.md descriptions
290
+ the model actually sees.
291
+ """
292
+ resolver_path = root / "skills" / "RESOLVER.md"
293
+ if not resolver_path.exists():
294
+ fail(f"MISSING RESOLVER: expected {resolver_path}")
295
+ resolver_text = resolver_path.read_text()
296
+ for skill in sorted(skill_names):
297
+ token = f"skills/{skill}/SKILL.md"
298
+ if token not in resolver_text:
299
+ fail(
300
+ f"RESOLVER GAP: {skill} has no entry in {resolver_path}\n"
301
+ f" Add a row to a triggers table that references {token!r}."
302
+ )
303
+ print(f"ok: resolver entry for {skill}")
304
+
305
+ referenced_skills = set(SKILL_REF_RE.findall(resolver_text))
306
+ stale = sorted(referenced_skills - skill_names)
307
+ if stale:
308
+ fail(f"RESOLVER REFERENCES MISSING SKILL: {', '.join(stale)}")
309
+ print("ok: resolver has no stale skill references")
310
+ return resolver_path
311
+
312
+
313
+ def collect_all_md(root: Path, skill_names: set[str], resolver_path: Path) -> list[Path]:
314
+ all_md: list[Path] = [resolver_path]
315
+ for skill in sorted(skill_names):
316
+ skill_root = root / "skills" / skill
317
+ all_md.append(skill_root / "SKILL.md")
318
+ for sub in ("references", "agents"):
319
+ sub_dir = skill_root / sub
320
+ if sub_dir.is_dir():
321
+ all_md.extend(sorted(sub_dir.rglob("*.md")))
322
+ return all_md
323
+
324
+
325
+ def check_markdown_links(root: Path, all_md: list[Path]):
326
+ for path in all_md:
327
+ if not path.exists():
328
+ continue
329
+ in_code = False
330
+ for lineno, line in enumerate(path.read_text().splitlines(), start=1):
331
+ if line.lstrip().startswith("```"):
332
+ in_code = not in_code
333
+ continue
334
+ if in_code:
335
+ continue
336
+ for m in LINK_RE.finditer(line):
337
+ raw = m.group(1).strip()
338
+ if not raw or raw.startswith(("#", "/")):
339
+ continue
340
+ if raw.startswith(URL_PREFIXES) or "://" in raw:
341
+ continue
342
+ target = raw.split("#", 1)[0].split("?", 1)[0]
343
+ if target and not (path.parent / target).resolve().exists():
344
+ fail(f"BROKEN MARKDOWN LINK: {path}:{lineno} -> {raw}")
345
+ print(f"ok: markdown links {path.relative_to(root)}")
346
+
347
+
348
+ # Unescaped | in data cells breaks GitHub rendering (#35).
349
+ def check_table_pipes(root: Path, all_md: list[Path]):
350
+ for path in all_md:
351
+ if not path.exists():
352
+ continue
353
+ in_fence = False
354
+ sep_pipes = None
355
+ for lineno, line in enumerate(path.read_text().splitlines(), start=1):
356
+ stripped = line.strip()
357
+ if stripped.startswith("```"):
358
+ in_fence = not in_fence
359
+ sep_pipes = None
360
+ continue
361
+ if in_fence:
362
+ sep_pipes = None
363
+ continue
364
+ if SEP_RE.match(stripped) and "---" in stripped and "|" in stripped:
365
+ sep_pipes = pipe_count(stripped)
366
+ continue
367
+ if sep_pipes is not None and stripped.startswith("|"):
368
+ if pipe_count(stripped) > sep_pipes:
369
+ fail(
370
+ f"UNESCAPED PIPE IN TABLE: {path}:{lineno}\n"
371
+ f" Use '\\|' or wrap the cell text in backticks."
372
+ )
373
+ continue
374
+ sep_pipes = None
375
+ print(f"ok: table pipes {path.relative_to(root)}")
376
+
377
+
378
+ def check_no_root_skill(root: Path):
379
+ """A root SKILL.md would make `npx skills add tw93/Waza` stop scanning nested
380
+ skills, so the direct coding install path would expose only `/waza`. Claude
381
+ Desktop's single-root SKILL.md is generated by scripts/package-skill.sh
382
+ during release packaging.
383
+ """
384
+ root_skill = root / "SKILL.md"
385
+ if root_skill.exists():
386
+ fail("ROOT SKILL DISALLOWED: generate the Desktop dispatcher during packaging instead")
387
+ print("ok: no root SKILL.md")
388
+
389
+
390
+ def check_rules_files_present(root: Path):
391
+ """Required shared rule files outside skills/ that the per-skill ref check
392
+ doesn't cover."""
393
+ required = ["english.md", "chinese.md", "anti-patterns.md", "durable-context.md"]
394
+ for name in required:
395
+ path = root / "rules" / name
396
+ if not path.exists():
397
+ fail(f"MISSING RULE FILE: {path}")
398
+ print(f"ok: rules/ files present ({', '.join(required)})")
399
+
400
+
401
+ def check_readme_install_command(root: Path):
402
+ """README must show the default install command users can copy-paste."""
403
+ readme = root / "README.md"
404
+ if not readme.exists():
405
+ fail(f"MISSING README.md at {readme}")
406
+ text = readme.read_text()
407
+ expected = "npx skills add tw93/Waza -a claude-code -g -y"
408
+ if expected not in text:
409
+ fail(
410
+ f"README INSTALL COMMAND: README.md must include {expected!r}\n"
411
+ f" Waza's public install path depends on this exact string."
412
+ )
413
+ print("ok: README installs nested skills")
414
+
415
+
416
+ def check_english_coaching_guard(root: Path):
417
+ """rules/english.md must keep two failure-mode guards intact:
418
+ (1) silence on Chinese-only messages, (2) silence when English is fine.
419
+ These guards were added after real misfires; do not let them rot."""
420
+ english_rule = root / "rules" / "english.md"
421
+ if not english_rule.exists():
422
+ fail(f"MISSING {english_rule}")
423
+ text = english_rule.read_text()
424
+ missing = []
425
+ if "Chinese-only messages" not in text:
426
+ missing.append("'Chinese-only messages'")
427
+ if "already-natural English, stay silent" not in text:
428
+ missing.append("'already-natural English, stay silent'")
429
+ if missing:
430
+ fail(
431
+ "ENGLISH COACHING GUARD: rules/english.md must suppress no-op output. "
432
+ f"Missing markers: {', '.join(missing)}"
433
+ )
434
+ print("ok: English Coaching guard")
435
+
436
+
437
+ def check_attribution_leak(root: Path):
438
+ """Scan tracked .sh and .json files for AI-attribution strings. This file
439
+ legitimately owns the pattern list, so exclude itself from the scan.
440
+ Markdown is excluded because rules/anti-patterns.md and similar docs may
441
+ describe these strings as patterns to avoid."""
442
+ self_path = Path(__file__).resolve()
443
+ for suffix in (".sh", ".json"):
444
+ for path in root.rglob(f"*{suffix}"):
445
+ if ".git" in path.parts:
446
+ continue
447
+ try:
448
+ if path.resolve() == self_path:
449
+ continue
450
+ except OSError:
451
+ continue
452
+ try:
453
+ text = path.read_text()
454
+ except (UnicodeDecodeError, OSError):
455
+ continue
456
+ for pat in ATTRIBUTION_PATTERNS:
457
+ if pat in text:
458
+ fail(
459
+ f"ATTRIBUTION LEAK: {path.relative_to(root)} contains {pat!r}"
460
+ )
461
+ print("ok: no attribution leak")
462
+
463
+
464
+ def check_trigger_overlap(skill_keywords: dict[str, set[str]]):
465
+ """Pairwise Jaccard >= 0.5 means more than half the combined keywords are shared."""
466
+ names = sorted(skill_keywords)
467
+ found_overlap = False
468
+ for i, a in enumerate(names):
469
+ for b in names[i + 1:]:
470
+ shared = skill_keywords[a] & skill_keywords[b]
471
+ union = skill_keywords[a] | skill_keywords[b]
472
+ if not union:
473
+ continue
474
+ jaccard = len(shared) / len(union)
475
+ if jaccard >= 0.5:
476
+ print(
477
+ f"TRIGGER OVERLAP: {a} vs {b} jaccard={jaccard:.2f} shared={sorted(shared)}",
478
+ file=sys.stderr,
479
+ )
480
+ found_overlap = True
481
+ if found_overlap:
482
+ raise SystemExit(1)
483
+ print("ok: trigger keyword overlap below threshold")
@@ -0,0 +1,110 @@
1
+ """Frontmatter parser for Waza skill files.
2
+
3
+ Used by both `verify_skills.py` (validation pipeline) and `build_metadata.py`
4
+ (codegen). Kept dependency-free (stdlib only) so first-run install does not
5
+ require pip.
6
+
7
+ Waza frontmatter is intentionally tiny: top-level scalars `name`,
8
+ `description`, `when_to_use`, `dispatch_intent`. The legacy `metadata.version`
9
+ field is rejected by the verifier (single source of truth is the top-level
10
+ VERSION file).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import ast
16
+ import sys
17
+ from pathlib import Path
18
+ from typing import NoReturn
19
+
20
+
21
+ def fail(message: str) -> NoReturn:
22
+ print(message, file=sys.stderr)
23
+ raise SystemExit(1)
24
+
25
+
26
+ def parse_frontmatter(path: Path) -> dict:
27
+ text = path.read_text()
28
+ lines = text.splitlines()
29
+ if not lines or lines[0] != "---":
30
+ fail(f"INVALID FRONTMATTER: {path} must start with ---")
31
+ try:
32
+ end = lines.index("---", 1)
33
+ except ValueError:
34
+ fail(f"INVALID FRONTMATTER: {path} missing closing ---")
35
+
36
+ def parse_scalar(field: str, raw: str) -> str:
37
+ value = raw.strip()
38
+ if not value:
39
+ fail(f"EMPTY FRONTMATTER VALUE: {path} field {field}")
40
+ if value[0] in ("'", '"'):
41
+ try:
42
+ parsed = ast.literal_eval(value)
43
+ except (SyntaxError, ValueError) as exc:
44
+ fail(f"INVALID FRONTMATTER QUOTE: {path} field {field}: {exc}")
45
+ if not isinstance(parsed, str):
46
+ fail(f"INVALID FRONTMATTER VALUE: {path} field {field} must be a string")
47
+ return parsed
48
+ if ": " in value:
49
+ fail(
50
+ f"UNQUOTED FRONTMATTER COLON: {path} field {field}\n"
51
+ f" Quote values containing ': ' so the metadata contract stays unambiguous."
52
+ )
53
+ return value
54
+
55
+ fields: dict[str, str] = {}
56
+ in_metadata = False
57
+ for raw_line in lines[1:end]:
58
+ if not raw_line.strip():
59
+ continue
60
+ if raw_line.startswith(" "):
61
+ if not in_metadata:
62
+ fail(f"INVALID FRONTMATTER INDENT: {path}: {raw_line!r}")
63
+ key, sep, raw_value = raw_line.strip().partition(":")
64
+ if not sep:
65
+ fail(f"INVALID FRONTMATTER LINE: {path}: {raw_line!r}")
66
+ if key == "version":
67
+ fields["version"] = parse_scalar("metadata.version", raw_value)
68
+ continue
69
+
70
+ in_metadata = False
71
+ key, sep, raw_value = raw_line.partition(":")
72
+ if not sep:
73
+ fail(f"INVALID FRONTMATTER LINE: {path}: {raw_line!r}")
74
+ if key == "metadata":
75
+ if raw_value.strip():
76
+ fail(f"INVALID FRONTMATTER METADATA: {path} metadata must be a mapping")
77
+ in_metadata = True
78
+ elif key in {"name", "description", "when_to_use", "dispatch_intent"}:
79
+ fields[key] = parse_scalar(key, raw_value)
80
+
81
+ name = fields.get("name")
82
+ description = fields.get("description")
83
+ when_to_use = fields.get("when_to_use", "")
84
+ dispatch_intent = fields.get("dispatch_intent", "")
85
+
86
+ if not name or not name.strip():
87
+ fail(f"MISSING name: in {path}")
88
+ if not description or not description.strip():
89
+ fail(f"MISSING description: in {path}")
90
+
91
+ # metadata.version was removed from per-skill frontmatter in favor of the
92
+ # top-level VERSION file (single source of truth). If a SKILL.md still
93
+ # carries a version field, reject it so the duplication does not return.
94
+ if "version" in fields:
95
+ fail(
96
+ f"STALE metadata.version: {path} still declares a per-skill version. "
97
+ f"Source of truth is the top-level VERSION file; remove the metadata "
98
+ f"block from frontmatter."
99
+ )
100
+
101
+ return {
102
+ "name": name.strip(),
103
+ "description": description.strip(),
104
+ "when_to_use": when_to_use.strip(),
105
+ "dispatch_intent": dispatch_intent.strip(),
106
+ }
107
+
108
+
109
+ def parse_when_to_use_keywords(when_to_use: str) -> set[str]:
110
+ return {kw.strip().lower() for kw in when_to_use.split(",") if kw.strip()}