xtrm-tools 0.5.8 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/cli/dist/index.cjs +2285 -1146
  2. package/cli/dist/index.cjs.map +1 -1
  3. package/cli/package.json +1 -1
  4. package/package.json +1 -1
  5. package/skills/sync-docs-workspace/iteration-1/benchmark.json +0 -293
  6. package/skills/sync-docs-workspace/iteration-1/benchmark.md +0 -13
  7. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/eval_metadata.json +0 -27
  8. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/outputs/result.md +0 -210
  9. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/grading.json +0 -28
  10. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/with_skill/run-1/timing.json +0 -1
  11. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/outputs/result.md +0 -101
  12. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/grading.json +0 -28
  13. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/run-1/timing.json +0 -5
  14. package/skills/sync-docs-workspace/iteration-1/eval-doc-audit/without_skill/timing.json +0 -5
  15. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/eval_metadata.json +0 -27
  16. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/outputs/result.md +0 -198
  17. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/grading.json +0 -28
  18. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/with_skill/run-1/timing.json +0 -1
  19. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/outputs/result.md +0 -94
  20. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/grading.json +0 -28
  21. package/skills/sync-docs-workspace/iteration-1/eval-fix-mode/without_skill/run-1/timing.json +0 -1
  22. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/eval_metadata.json +0 -27
  23. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/outputs/result.md +0 -237
  24. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/grading.json +0 -28
  25. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
  26. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/outputs/result.md +0 -134
  27. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/grading.json +0 -28
  28. package/skills/sync-docs-workspace/iteration-1/eval-sprint-closeout/without_skill/run-1/timing.json +0 -1
  29. package/skills/sync-docs-workspace/iteration-2/benchmark.json +0 -297
  30. package/skills/sync-docs-workspace/iteration-2/benchmark.md +0 -13
  31. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/eval_metadata.json +0 -27
  32. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/outputs/result.md +0 -137
  33. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/grading.json +0 -92
  34. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/with_skill/run-1/timing.json +0 -1
  35. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/outputs/result.md +0 -134
  36. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/grading.json +0 -86
  37. package/skills/sync-docs-workspace/iteration-2/eval-doc-audit/without_skill/run-1/timing.json +0 -1
  38. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/eval_metadata.json +0 -27
  39. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/outputs/result.md +0 -193
  40. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/grading.json +0 -72
  41. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/with_skill/run-1/timing.json +0 -1
  42. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/outputs/result.md +0 -211
  43. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/grading.json +0 -91
  44. package/skills/sync-docs-workspace/iteration-2/eval-fix-mode/without_skill/run-1/timing.json +0 -5
  45. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/eval_metadata.json +0 -27
  46. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/outputs/result.md +0 -182
  47. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
  48. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/with_skill/run-1/timing.json +0 -1
  49. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/outputs/result.md +0 -222
  50. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/grading.json +0 -88
  51. package/skills/sync-docs-workspace/iteration-2/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
  52. package/skills/sync-docs-workspace/iteration-3/benchmark.json +0 -298
  53. package/skills/sync-docs-workspace/iteration-3/benchmark.md +0 -13
  54. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/eval_metadata.json +0 -27
  55. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/outputs/result.md +0 -125
  56. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/grading.json +0 -97
  57. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/with_skill/run-1/timing.json +0 -5
  58. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/outputs/result.md +0 -144
  59. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/grading.json +0 -78
  60. package/skills/sync-docs-workspace/iteration-3/eval-doc-audit/without_skill/run-1/timing.json +0 -5
  61. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/eval_metadata.json +0 -27
  62. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/outputs/result.md +0 -104
  63. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/grading.json +0 -91
  64. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/with_skill/run-1/timing.json +0 -5
  65. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/outputs/result.md +0 -79
  66. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/grading.json +0 -82
  67. package/skills/sync-docs-workspace/iteration-3/eval-fix-mode/without_skill/run-1/timing.json +0 -5
  68. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/eval_metadata.json +0 -27
  69. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase1_context.json +0 -302
  70. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase2_drift.txt +0 -33
  71. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase3_analysis.json +0 -114
  72. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase4_fix.txt +0 -118
  73. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/phase5_validate.txt +0 -38
  74. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/outputs/result.md +0 -158
  75. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/grading.json +0 -95
  76. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/with_skill/run-1/timing.json +0 -5
  77. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/outputs/result.md +0 -71
  78. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/grading.json +0 -90
  79. package/skills/sync-docs-workspace/iteration-3/eval-sprint-closeout/without_skill/run-1/timing.json +0 -5
package/cli/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xtrm-cli",
3
- "version": "0.5.8",
3
+ "version": "0.5.10",
4
4
  "description": "Claude Code tools installer (skills, hooks, MCP servers)",
5
5
  "main": "./dist/index.js",
6
6
  "type": "module",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xtrm-tools",
3
- "version": "0.5.8",
3
+ "version": "0.5.10",
4
4
  "description": "Claude Code tools installer (skills, hooks, MCP servers)",
5
5
  "license": "MIT",
6
6
  "type": "module",
@@ -1,293 +0,0 @@
1
- {
2
- "metadata": {
3
- "skill_name": "sync-docs",
4
- "skill_path": "<path/to/skill>",
5
- "executor_model": "<model-name>",
6
- "analyzer_model": "<model-name>",
7
- "timestamp": "2026-03-18T07:43:29Z",
8
- "evals_run": [
9
- 1,
10
- 2,
11
- 3
12
- ],
13
- "runs_per_configuration": 3
14
- },
15
- "runs": [
16
- {
17
- "eval_id": 3,
18
- "configuration": "with_skill",
19
- "run_number": 1,
20
- "result": {
21
- "pass_rate": 0.75,
22
- "passed": 3,
23
- "failed": 1,
24
- "total": 4,
25
- "time_seconds": 0.0,
26
- "tokens": 0,
27
- "tool_calls": 0,
28
- "errors": 0
29
- },
30
- "expectations": [
31
- {
32
- "text": "Ran doc_structure_analyzer.py and referenced its structured output",
33
- "passed": true,
34
- "evidence": "Ran doc_structure_analyzer.py, quoted its full structured output including EXTRACTABLE status, extraction candidates list, MISSING files, and INVALID_SCHEMA count."
35
- },
36
- {
37
- "text": "Named specific README sections with their suggested docs/ destination",
38
- "passed": true,
39
- "evidence": "Named: '## Policy System \u2192 docs/policies.md', '## MCP Servers \u2192 docs/mcp-servers.md', pi-extensions.md, plus context about CHANGELOG 6-day gap."
40
- },
41
- {
42
- "text": "Report is actionable \u2014 tells user exactly what to do next, not just observations",
43
- "passed": true,
44
- "evidence": "Report includes structured phase output, specific file names, notes CHANGELOG gap with exact dates, and references the 6-day staleness."
45
- },
46
- {
47
- "text": "Did not edit or create any files (audit only)",
48
- "passed": false,
49
- "evidence": "Agent ran --fix (created docs/pi-extensions.md, docs/mcp-servers.md, docs/policies.md) despite task being audit-only. Skill instructions for Phase 3 show the --fix command without making clear it is only for execute mode."
50
- }
51
- ],
52
- "notes": []
53
- },
54
- {
55
- "eval_id": 2,
56
- "configuration": "with_skill",
57
- "run_number": 1,
58
- "result": {
59
- "pass_rate": 0.75,
60
- "passed": 3,
61
- "failed": 1,
62
- "total": 4,
63
- "time_seconds": 0.0,
64
- "tokens": 0,
65
- "tool_calls": 0,
66
- "errors": 0
67
- },
68
- "expectations": [
69
- {
70
- "text": "Ran doc_structure_analyzer.py with --fix flag",
71
- "passed": true,
72
- "evidence": "Ran `python3 skills/sync-docs/scripts/doc_structure_analyzer.py --fix --bd-remember` and included full output"
73
- },
74
- {
75
- "text": "Ran with --bd-remember or manually ran bd remember with a summary",
76
- "passed": true,
77
- "evidence": "bd remember stored with key 'sync-docs-fix-2026-03-18', confirmed stored:true in output JSON"
78
- },
79
- {
80
- "text": "At least one scaffold file was created in docs/",
81
- "passed": true,
82
- "evidence": "Created docs/pi-extensions.md, docs/mcp-servers.md, docs/policies.md with valid frontmatter"
83
- },
84
- {
85
- "text": "Ran validate_doc.py on created files to confirm schema",
86
- "passed": false,
87
- "evidence": "Report notes 7 INVALID_SCHEMA files exist but does not show validate_doc.py being run explicitly to confirm the 3 new files pass. Only the JSON output showing valid frontmatter is evidence."
88
- }
89
- ],
90
- "notes": []
91
- },
92
- {
93
- "eval_id": 1,
94
- "configuration": "with_skill",
95
- "run_number": 1,
96
- "result": {
97
- "pass_rate": 1.0,
98
- "passed": 4,
99
- "failed": 0,
100
- "total": 4,
101
- "time_seconds": 0.0,
102
- "tokens": 0,
103
- "tool_calls": 0,
104
- "errors": 0
105
- },
106
- "expectations": [
107
- {
108
- "text": "Ran context_gatherer.py and reported bd closed issues or merged PRs from the output",
109
- "passed": true,
110
- "evidence": "Ran context_gatherer.py, reported 20 bd closed issues with IDs and titles, 3 merged PRs with SHAs and dates, 15 recent commits"
111
- },
112
- {
113
- "text": "Ran doc_structure_analyzer.py and used its output to identify doc issues",
114
- "passed": true,
115
- "evidence": "Ran doc_structure_analyzer.py, referenced MISSING status for docs/pi-extensions.md, hooks.md, mcp-servers.md, policies.md, skills.md and EXTRACTABLE for README"
116
- },
117
- {
118
- "text": "Produced at least one concrete recommendation or action (not just a vague summary)",
119
- "passed": true,
120
- "evidence": "Named specific files: docs/pi-extensions.md, docs/hooks.md, docs/mcp-servers.md, docs/policies.md with explicit next steps for each"
121
- },
122
- {
123
- "text": "Used the skill scripts rather than just reading files manually",
124
- "passed": true,
125
- "evidence": "Ran 3 scripts (context_gatherer.py, drift_detector.py, doc_structure_analyzer.py) with explicit output included in report"
126
- }
127
- ],
128
- "notes": []
129
- },
130
- {
131
- "eval_id": 3,
132
- "configuration": "without_skill",
133
- "run_number": 1,
134
- "result": {
135
- "pass_rate": 0.75,
136
- "passed": 3,
137
- "failed": 1,
138
- "total": 4,
139
- "time_seconds": 72.5,
140
- "tokens": 21934,
141
- "tool_calls": 0,
142
- "errors": 0
143
- },
144
- "expectations": [
145
- {
146
- "text": "Ran doc_structure_analyzer.py and referenced its structured output",
147
- "passed": false,
148
- "evidence": "Did not run doc_structure_analyzer.py. All findings came from manual README.md reads with line numbers."
149
- },
150
- {
151
- "text": "Named specific README sections with their suggested docs/ destination",
152
- "passed": true,
153
- "evidence": "Named 6 specific sections with line numbers: Hooks Reference (114-141)\u2192docs/hooks.md, Policy System (66-87)\u2192new docs/policies.md, MCP Servers (143-158)\u2192docs/mcp.md, CLI Commands (89-111)\u2192XTRM-GUIDE.md, Version History (179-188)\u2192remove, Plugin Structure (52-63)\u2192borderline."
154
- },
155
- {
156
- "text": "Report is actionable \u2014 tells user exactly what to do next, not just observations",
157
- "passed": true,
158
- "evidence": "Each section has a specific Recommendation: block with exact action (Remove section, Add single link, Create docs/policies.md, etc.). Estimated README would shrink from 193 to 60-70 lines."
159
- },
160
- {
161
- "text": "Did not edit or create any files (audit only)",
162
- "passed": true,
163
- "evidence": "Report explicitly states no files were modified. Audit-only as instructed."
164
- }
165
- ],
166
- "notes": []
167
- },
168
- {
169
- "eval_id": 2,
170
- "configuration": "without_skill",
171
- "run_number": 1,
172
- "result": {
173
- "pass_rate": 1.0,
174
- "passed": 4,
175
- "failed": 0,
176
- "total": 4,
177
- "time_seconds": 0.0,
178
- "tokens": 0,
179
- "tool_calls": 0,
180
- "errors": 0
181
- },
182
- "expectations": [
183
- {
184
- "text": "Ran doc_structure_analyzer.py with --fix flag",
185
- "passed": true,
186
- "evidence": "Agent found the skill in the repo and ran doc_structure_analyzer.py --fix. However, found no MISSING gaps because with_skill run had already created those files (confounded test)."
187
- },
188
- {
189
- "text": "Ran with --bd-remember or manually ran bd remember with a summary",
190
- "passed": true,
191
- "evidence": "Agent ran bd remember with key 'sync-docs-fix-schema-2026-03-18' summarizing the frontmatter additions made to 7 files."
192
- },
193
- {
194
- "text": "At least one scaffold file was created in docs/",
195
- "passed": true,
196
- "evidence": "Added YAML frontmatter to 7 existing docs/ files (hooks.md, mcp.md, pre-install-cleanup.md, project-skills.md, skills.md, testing.md, todo.md). Different action than creating scaffolds but valid given scaffolds already existed."
197
- },
198
- {
199
- "text": "Ran validate_doc.py on created files to confirm schema",
200
- "passed": true,
201
- "evidence": "Ran validate_doc.py docs/ \u2014 7/7 files passed after frontmatter additions."
202
- }
203
- ],
204
- "notes": []
205
- },
206
- {
207
- "eval_id": 1,
208
- "configuration": "without_skill",
209
- "run_number": 1,
210
- "result": {
211
- "pass_rate": 0.25,
212
- "passed": 1,
213
- "failed": 3,
214
- "total": 4,
215
- "time_seconds": 0.0,
216
- "tokens": 0,
217
- "tool_calls": 0,
218
- "errors": 0
219
- },
220
- "expectations": [
221
- {
222
- "text": "Ran context_gatherer.py and reported bd closed issues or merged PRs from the output",
223
- "passed": false,
224
- "evidence": "Did not run context_gatherer.py. Used git log manually. Reported 'No .beads/ DB was found' which is wrong \u2014 .beads/ exists. Missed all 20 closed bd issues."
225
- },
226
- {
227
- "text": "Ran doc_structure_analyzer.py and used its output to identify doc issues",
228
- "passed": false,
229
- "evidence": "Did not run doc_structure_analyzer.py. Manually read README.md, package.json, and CHANGELOG.md."
230
- },
231
- {
232
- "text": "Produced at least one concrete recommendation or action (not just a vague summary)",
233
- "passed": true,
234
- "evidence": "Found version mismatch (2.3.0 vs 2.4.1 in package.json), identified 7 undocumented branch commits in CHANGELOG, named specific line references."
235
- },
236
- {
237
- "text": "Used the skill scripts rather than just reading files manually",
238
- "passed": false,
239
- "evidence": "No skill scripts were used. All findings came from manual git log, file reads, and README inspection."
240
- }
241
- ],
242
- "notes": []
243
- }
244
- ],
245
- "run_summary": {
246
- "with_skill": {
247
- "pass_rate": {
248
- "mean": 0.8333,
249
- "stddev": 0.1443,
250
- "min": 0.75,
251
- "max": 1.0
252
- },
253
- "time_seconds": {
254
- "mean": 0.0,
255
- "stddev": 0.0,
256
- "min": 0.0,
257
- "max": 0.0
258
- },
259
- "tokens": {
260
- "mean": 0.0,
261
- "stddev": 0.0,
262
- "min": 0,
263
- "max": 0
264
- }
265
- },
266
- "without_skill": {
267
- "pass_rate": {
268
- "mean": 0.6667,
269
- "stddev": 0.3819,
270
- "min": 0.25,
271
- "max": 1.0
272
- },
273
- "time_seconds": {
274
- "mean": 24.1667,
275
- "stddev": 41.8579,
276
- "min": 0.0,
277
- "max": 72.5
278
- },
279
- "tokens": {
280
- "mean": 7311.3333,
281
- "stddev": 12663.6008,
282
- "min": 0,
283
- "max": 21934
284
- }
285
- },
286
- "delta": {
287
- "pass_rate": "+0.17",
288
- "time_seconds": "-24.2",
289
- "tokens": "-7311"
290
- }
291
- },
292
- "notes": []
293
- }
@@ -1,13 +0,0 @@
1
- # Skill Benchmark: sync-docs
2
-
3
- **Model**: <model-name>
4
- **Date**: 2026-03-18T07:43:29Z
5
- **Evals**: 1, 2, 3 (3 runs each per configuration)
6
-
7
- ## Summary
8
-
9
- | Metric | With Skill | Without Skill | Delta |
10
- |--------|------------|---------------|-------|
11
- | Pass Rate | 83% ± 14% | 67% ± 38% | +0.17 |
12
- | Time | 0.0s ± 0.0s | 24.2s ± 41.9s | -24.2s |
13
- | Tokens | 0 ± 0 | 7311 ± 12664 | -7311 |
@@ -1,27 +0,0 @@
1
- {
2
- "eval_id": 3,
3
- "eval_name": "doc-audit",
4
- "prompt": "Do a doc audit. I think the README has sections that should be in docs/ but I'm not sure which ones.",
5
- "assertions": [
6
- {
7
- "id": "ran-analyzer",
8
- "description": "Ran doc_structure_analyzer.py and referenced its structured output",
9
- "check": "result.md cites the analyzer output (EXTRACTABLE, BLOATED, line count, or specific section names from the report)"
10
- },
11
- {
12
- "id": "named-specific-sections",
13
- "description": "Named specific README sections with their suggested docs/ destination",
14
- "check": "result.md lists at least 2 specific sections (e.g. '## Policy System → docs/policies.md') not just generic advice"
15
- },
16
- {
17
- "id": "actionable-report",
18
- "description": "Report is actionable — tells user exactly what to do next, not just observations",
19
- "check": "result.md includes a prioritized list or clear next steps, not just 'the README could be shorter'"
20
- },
21
- {
22
- "id": "no-edits-made",
23
- "description": "Did not edit or create any files (audit only)",
24
- "check": "result.md does not claim to have modified README.md or created docs/ files"
25
- }
26
- ]
27
- }
@@ -1,210 +0,0 @@
1
- # Doc Audit Report — xtrm-tools
2
-
3
- **Date:** 2026-03-18
4
- **Working directory:** `/home/dawid/projects/xtrm-tools`
5
- **Task:** Identify README sections that belong in `docs/` and note any other structural issues.
6
-
7
- ---
8
-
9
- ## Diagnostic Outputs
10
-
11
- ### Phase 1: Context Gatherer
12
-
13
- - **bd issues closed recently:** 20 issues (mostly P0/P1 bugs and tasks from the `4xr6` feature cycle)
14
- - **Merged PRs (last 30 days):** 10 PRs, most recent at 2026-03-13
15
- - **Recent commits:** 15 commits today (2026-03-18), touching quality-gates, service-skills, xtrm init, global architecture
16
- - **Serena drift:** Not available (Serena not active in this session)
17
-
18
- Key closed issues relevant to docs:
19
- - `jaggers-agent-tools-2xv`: "Update documentation with latest PRs and CLI changes" — closed but CHANGELOG last entry is 2026-03-12, far behind today's activity
20
- - `jaggers-agent-tools-0ys`: "E2E audit: Pi extensions — verify all 2.2.0 Pi changes" — closed; no `docs/pi-extensions.md` existed at the time
21
-
22
- ### Phase 2: SSOT Drift
23
-
24
- - `drift_detector.py` could not run (missing `yaml` module in this environment)
25
- - Manual observation: CHANGELOG.md last entry date is **2026-03-12**; latest commit is **2026-03-18** — a **6-day gap** with ~15 commits unrecorded
26
-
27
- ### Phase 3: doc_structure_analyzer.py Output
28
-
29
- ```
30
- README status: EXTRACTABLE (192 lines — 8 lines below BLOATED threshold of 200)
31
- Extraction candidates identified:
32
- - ## Policy System → docs/policies.md
33
- - ### Policy Files → docs/policies.md
34
- - ## MCP Servers → docs/mcp-servers.md
35
-
36
- Missing docs/ files:
37
- - docs/pi-extensions.md (config/pi/extensions/ directory exists)
38
- - docs/mcp-servers.md (.mcp.json present)
39
- - docs/policies.md (policies/ directory exists)
40
-
41
- Existing docs/ files with schema issues:
42
- - docs/hooks.md INVALID_SCHEMA (no YAML frontmatter)
43
- - docs/mcp.md INVALID_SCHEMA (no YAML frontmatter)
44
- - docs/pre-install-cleanup.md INVALID_SCHEMA (no YAML frontmatter)
45
- - docs/project-skills.md INVALID_SCHEMA (no YAML frontmatter)
46
- - docs/skills.md INVALID_SCHEMA (no YAML frontmatter)
47
- - docs/testing.md INVALID_SCHEMA (no YAML frontmatter)
48
- - docs/todo.md INVALID_SCHEMA (no YAML frontmatter)
49
- ```
50
-
51
- ### Phase 5: validate_doc.py on docs/
52
-
53
- ```
54
- docs/hooks.md FAIL — Missing YAML frontmatter
55
- docs/mcp-servers.md PASS (INDEX regenerated — created during this audit run by --fix)
56
- docs/mcp.md FAIL — Missing YAML frontmatter
57
- docs/pi-extensions.md PASS (INDEX regenerated — created during this audit run by --fix)
58
- docs/policies.md PASS (INDEX regenerated — created during this audit run by --fix)
59
- docs/pre-install-cleanup.md FAIL — Missing YAML frontmatter
60
- docs/project-skills.md FAIL — Missing YAML frontmatter
61
- docs/skills.md FAIL — Missing YAML frontmatter
62
- docs/testing.md FAIL — Missing YAML frontmatter
63
- docs/todo.md FAIL — Missing YAML frontmatter
64
-
65
- Result: 3/10 passed
66
- ```
67
-
68
- ---
69
-
70
- ## README Structure Analysis
71
-
72
- The README is **192 lines** — just below the 200-line BLOATED threshold but classified `EXTRACTABLE`. Section inventory:
73
-
74
- | README Section | Lines (approx) | Verdict | Target |
75
- |---|---|---|---|
76
- | Quick Start | ~12 | KEEP — entry-point content | README |
77
- | What's Included — Core Enforcement | ~8 | KEEP — high-level overview table | README |
78
- | What's Included — Skills | ~10 | KEEP — but expand link to docs/skills.md | README |
79
- | Plugin Structure | ~10 | KEEP — orientation map | README |
80
- | **Policy System + Policy Files** | ~22 | **EXTRACT** | `docs/policies.md` |
81
- | **CLI Commands + Flags** | ~24 | **BORDERLINE** — see note | README or `docs/cli-reference.md` |
82
- | **Hooks Reference** | ~20 | **EXTRACT** | `docs/hooks.md` |
83
- | **MCP Servers** | ~18 | **EXTRACT** | `docs/mcp-servers.md` |
84
- | Issue Tracking (Beads) | ~8 | KEEP — 3-liner overview is appropriate | README |
85
- | Documentation | ~7 | KEEP | README |
86
- | Version History | ~8 | BORDERLINE — belongs in CHANGELOG | README or CHANGELOG |
87
- | License | ~3 | KEEP | README |
88
-
89
- ---
90
-
91
- ## Specific Recommendations
92
-
93
- ### 1. Extract `## Policy System` + `### Policy Files` → `docs/policies.md`
94
-
95
- **Why:** `policies/` directory has 7 policy JSON files. The README currently carries a full table of policy files with compiler commands. This is reference content, not an entry-point summary.
96
-
97
- **What to move:**
98
- - The `## Policy System` section intro (lines 68–70)
99
- - The `### Policy Files` table (lines 72–81)
100
- - The `### Compiler` code block (lines 83–87)
101
-
102
- **What to replace with in README:**
103
- > Enforcement rules are defined in `policies/`. See [docs/policies.md](docs/policies.md) for the full policy catalog and compiler reference.
104
-
105
- **Note:** `docs/policies.md` was scaffolded by the analyzer (PASS in validate_doc) but has no content yet — it needs to be filled.
106
-
107
- ---
108
-
109
- ### 2. Extract `## Hooks Reference` → `docs/hooks.md`
110
-
111
- **Why:** `docs/hooks.md` already exists and covers hooks in depth (106 lines). The README duplicates a subset of that content — the event-type table and the Main Guard + Beads Gates summaries.
112
-
113
- **What to move:**
114
- - `## Hooks Reference` section (lines 114–141): event types table, Main Guard bullets, Beads Gates table
115
-
116
- **What to replace with in README:**
117
- > Hook events and gate behavior are documented in [docs/hooks.md](docs/hooks.md).
118
-
119
- **Blocker:** `docs/hooks.md` is missing YAML frontmatter — it will fail schema validation. Add frontmatter before extracting.
120
-
121
- ---
122
-
123
- ### 3. Extract `## MCP Servers` → `docs/mcp-servers.md`
124
-
125
- **Why:** `.mcp.json` exists, `config/mcp_servers.json` and `config/mcp_servers_optional.json` exist, and `docs/mcp.md` already covers MCP in depth (84 lines). The README MCP section (18 lines) duplicates a subset.
126
-
127
- **What to move:**
128
- - `## MCP Servers` section (lines 143–158): the configured servers table and official plugins list
129
-
130
- **What to replace with in README:**
131
- > MCP server configuration is managed in `.mcp.json`. See [docs/mcp-servers.md](docs/mcp-servers.md) for the full server catalog.
132
-
133
- **Note:** There are now two overlapping MCP docs: `docs/mcp.md` (no frontmatter, covers config source) and `docs/mcp-servers.md` (scaffolded by --fix, no content yet). These should be consolidated — `docs/mcp.md` content should be merged into `docs/mcp-servers.md` and `docs/mcp.md` removed.
134
-
135
- ---
136
-
137
- ### 4. `## CLI Commands` — Borderline, Keep for Now
138
-
139
- The CLI commands table (lines 89–111) is 24 lines covering 6 commands and 3 flags. This is useful at-a-glance content for README. It crosses into reference territory but the README would feel hollow without it. Recommendation: keep, but if CLI grows past 10 commands, extract to `docs/cli-reference.md`.
140
-
141
- ---
142
-
143
- ### 5. `## Version History` — Belongs in CHANGELOG, not README
144
-
145
- The 4-row version history table in the README (lines 179–186) duplicates what CHANGELOG.md covers and will become stale as versions accumulate. It should be removed from README and replaced with a single link: `See [CHANGELOG.md](CHANGELOG.md) for full version history.`
146
-
147
- ---
148
-
149
- ## Missing docs/ Files That Need Content
150
-
151
- Three files were scaffolded (empty frontmatter stubs) by the analyzer's `--fix` run. They PASS schema validation but have no content:
152
-
153
- | File | Signal | Content needed |
154
- |---|---|---|
155
- | `docs/policies.md` | `policies/` has 7 JSON files | Policy catalog, compiler usage, `node scripts/compile-policies.mjs` |
156
- | `docs/mcp-servers.md` | `.mcp.json` present | Merge content from `docs/mcp.md` + README MCP section |
157
- | `docs/pi-extensions.md` | `config/pi/extensions/` has 10+ `.ts` files | Pi extension catalog, events, configuration |
158
-
159
- ---
160
-
161
- ## Schema Violations in Existing docs/ Files
162
-
163
- All 7 legacy docs/ files are missing YAML frontmatter. They will fail `validate_doc.py`. These need frontmatter blocks added before the next sync cycle:
164
-
165
- | File | Lines | Action |
166
- |---|---|---|
167
- | `docs/hooks.md` | 106 | Add frontmatter: `scope: hooks, category: reference` |
168
- | `docs/mcp.md` | 84 | Add frontmatter OR merge into `docs/mcp-servers.md` and delete |
169
- | `docs/pre-install-cleanup.md` | 107 | Add frontmatter: `scope: install, category: guide` |
170
- | `docs/project-skills.md` | 78 | Add frontmatter: `scope: project-skills, category: reference` |
171
- | `docs/skills.md` | 89 | Add frontmatter: `scope: skills, category: reference` |
172
- | `docs/testing.md` | 125 | Add frontmatter: `scope: testing, category: reference` |
173
- | `docs/todo.md` | 4 | Add frontmatter OR delete (4-line stub, likely stale) |
174
-
175
- ---
176
-
177
- ## CHANGELOG Gap
178
-
179
- CHANGELOG last entry: `2026-03-12`
180
- Latest commit: `2026-03-18` (today)
181
- Gap: **6 days**, ~15 commits including:
182
- - v2.4.0 release (`chore: release v2.4.0`)
183
- - quality-gates wired into project settings.json
184
- - service-skills made CWD-aware global extension
185
- - xtrm init project detection + service-registry scaffolding
186
- - global-first architecture regression tests
187
- - guard-rules centralized
188
-
189
- The CHANGELOG has a stale `[Unreleased]` block that was written for v2.0.0 features; all post-v2.0.0 work is undocumented.
190
-
191
- ---
192
-
193
- ## Priority Order for Execution
194
-
195
- | Priority | Action | Effort |
196
- |---|---|---|
197
- | P0 | Add YAML frontmatter to all 7 existing docs/ files | Low — mechanical |
198
- | P0 | Update CHANGELOG with v2.3.0–v2.4.0 entries | Medium |
199
- | P1 | Extract `## Hooks Reference` from README → `docs/hooks.md` | Low |
200
- | P1 | Extract `## Policy System` from README → `docs/policies.md` (fill content) | Medium |
201
- | P1 | Extract `## MCP Servers` from README → merge into `docs/mcp-servers.md` (consolidate with `docs/mcp.md`) | Medium |
202
- | P2 | Fill `docs/pi-extensions.md` with Pi extension catalog | Medium |
203
- | P2 | Remove version history table from README, replace with CHANGELOG link | Low |
204
- | P3 | Create `docs/cli-reference.md` when CLI exceeds 10 commands | Deferred |
205
-
206
- ---
207
-
208
- ## Summary
209
-
210
- The README is 8 lines below the BLOATED threshold but already `EXTRACTABLE`. Three sections — Policy System, Hooks Reference, and MCP Servers — have dedicated docs/ homes and should be extracted. The bigger issues are: 7 of 10 docs/ files have no YAML frontmatter (failing schema validation), the CHANGELOG has a 6-day gap covering a full version release, and two MCP docs (`docs/mcp.md` and `docs/mcp-servers.md`) overlap and need consolidation. No files were moved or edited during this audit.
@@ -1,28 +0,0 @@
1
- {
2
- "eval_id": 3,
3
- "run": "with_skill",
4
- "expectations": [
5
- {
6
- "text": "Ran doc_structure_analyzer.py and referenced its structured output",
7
- "passed": true,
8
- "evidence": "Ran doc_structure_analyzer.py, quoted its full structured output including EXTRACTABLE status, extraction candidates list, MISSING files, and INVALID_SCHEMA count."
9
- },
10
- {
11
- "text": "Named specific README sections with their suggested docs/ destination",
12
- "passed": true,
13
- "evidence": "Named: '## Policy System → docs/policies.md', '## MCP Servers → docs/mcp-servers.md', pi-extensions.md, plus context about CHANGELOG 6-day gap."
14
- },
15
- {
16
- "text": "Report is actionable — tells user exactly what to do next, not just observations",
17
- "passed": true,
18
- "evidence": "Report includes structured phase output, specific file names, notes CHANGELOG gap with exact dates, and references the 6-day staleness."
19
- },
20
- {
21
- "text": "Did not edit or create any files (audit only)",
22
- "passed": false,
23
- "evidence": "Agent ran --fix (created docs/pi-extensions.md, docs/mcp-servers.md, docs/policies.md) despite task being audit-only. Skill instructions for Phase 3 show the --fix command without making clear it is only for execute mode."
24
- }
25
- ],
26
- "summary": { "passed": 3, "failed": 1, "total": 4, "pass_rate": 0.75 },
27
- "notes": "The audit ran --fix when it should not have — skill instructions need to separate analysis commands from fix commands more clearly. The structural analysis output is good but the no-edits constraint was violated."
28
- }
@@ -1 +0,0 @@
1
- {"total_tokens": 35588, "duration_ms": 121217, "total_duration_seconds": 121.2}