xtrm-tools 0.7.17 → 0.7.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.xtrm/config/hooks.json +2 -0
- package/.xtrm/config/instructions/agents-top.md +2 -1
- package/.xtrm/registry.json +429 -712
- package/.xtrm/skills/default/creating-service-skills/scripts/bootstrap.py +82 -156
- package/.xtrm/skills/default/creating-service-skills/scripts/scaffolder.py +73 -121
- package/.xtrm/skills/default/hook-development/references/patterns.md +1 -1
- package/.xtrm/skills/default/last30days/scripts/test-v1-vs-v2.sh +2 -2
- package/.xtrm/skills/default/planning/SKILL.md +75 -29
- package/.xtrm/skills/default/releasing/SKILL.md +163 -57
- package/.xtrm/skills/default/security-pipeline/SKILL.md +192 -0
- package/.xtrm/skills/default/security-pipeline/scripts/security-bootstrap.sh +294 -0
- package/.xtrm/skills/default/security-pipeline/templates/.githooks/pre-push.template +39 -0
- package/.xtrm/skills/default/security-pipeline/templates/.github/workflows/gitleaks.yml +33 -0
- package/.xtrm/skills/default/security-pipeline/templates/.github/workflows/osv-scanner.yml +33 -0
- package/.xtrm/skills/default/security-pipeline/templates/.github/workflows/semgrep.yml +41 -0
- package/.xtrm/skills/default/security-pipeline/templates/.gitleaks.toml +44 -0
- package/.xtrm/skills/default/security-pipeline/templates/.pre-commit-config.yaml +67 -0
- package/.xtrm/skills/default/security-pipeline/templates/.semgrepignore +46 -0
- package/.xtrm/skills/default/security-pipeline/templates/scripts/security-scan.sh +57 -0
- package/.xtrm/skills/default/security-pipeline/templates/scripts/semgrep-diff.sh +68 -0
- package/.xtrm/skills/default/session-close-report/SKILL.md +167 -6
- package/.xtrm/skills/default/sync-docs/SKILL.md +1 -1
- package/.xtrm/skills/default/update-xt/SKILL.md +270 -4
- package/.xtrm/skills/default/updating-service-skills/scripts/drift_detector.py +22 -0
- package/.xtrm/skills/default/using-script-specialists/SKILL.md +7 -5
- package/.xtrm/skills/default/using-specialists/SKILL.md +13 -12
- package/.xtrm/skills/default/using-specialists-auto/SKILL.md +137 -0
- package/.xtrm/skills/default/using-specialists-v2/SKILL.md +14 -21
- package/.xtrm/skills/default/using-specialists-v3/SKILL.md +533 -21
- package/.xtrm/skills/default/vaultctl/SKILL.md +2 -2
- package/CHANGELOG.md +87 -3
- package/cli/dist/index.cjs +12429 -3769
- package/cli/dist/index.cjs.map +1 -1
- package/cli/package.json +9 -3
- package/package.json +27 -7
- package/packages/pi-extensions/package.json +1 -1
- package/.xtrm/skills/default/planning/evals/evals.json +0 -19
- package/.xtrm/skills/default/quality-gates/evals/evals.json +0 -181
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/FINAL-EVAL-SUMMARY.md +0 -75
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/edge-case-auto-fix-verification/with_skill/outputs/response.md +0 -59
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/edge-case-mixed-language-project/with_skill/outputs/response.md +0 -60
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/eval-summary.md +0 -105
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/partial-install-python-only/with_skill/outputs/response.md +0 -93
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/python-refactor-request/with_skill/outputs/response.md +0 -104
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/quality-gate-error-fix/with_skill/outputs/response.md +0 -74
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/should-not-trigger-general-chat/with_skill/outputs/response.md +0 -18
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/should-not-trigger-math-question/with_skill/outputs/response.md +0 -18
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/should-not-trigger-unrelated-coding/with_skill/outputs/response.md +0 -56
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/tdd-guard-blocking-confusion/with_skill/outputs/response.md +0 -67
- package/.xtrm/skills/default/quality-gates/workspace/iteration-1/typescript-feature-with-tests/with_skill/outputs/response.md +0 -97
- package/.xtrm/skills/default/sync-docs/evals/evals.json +0 -89
- package/.xtrm/skills/default/test-planning/evals/evals.json +0 -23
- package/.xtrm/skills/default/using-specialists/SKILL.safe.md +0 -1082
- package/.xtrm/skills/default/using-specialists/SKILL.ultra.md +0 -1082
- package/.xtrm/skills/default/using-specialists/evals/evals.json +0 -68
- package/.xtrm/skills/default/using-specialists-v3/evals/evals.json +0 -89
- package/packages/pi-extensions/.serena/project.yml +0 -130
package/cli/package.json
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "xtrm-cli",
|
|
3
|
-
"
|
|
3
|
+
"private": true,
|
|
4
|
+
"version": "0.7.19",
|
|
4
5
|
"description": "Claude Code tools installer (skills, hooks, MCP servers)",
|
|
5
6
|
"main": "./dist/index.js",
|
|
6
7
|
"type": "module",
|
|
@@ -36,14 +37,14 @@
|
|
|
36
37
|
"ora": "^9.3.0",
|
|
37
38
|
"project": "^0.1.6",
|
|
38
39
|
"prompts": "^2.4.2",
|
|
39
|
-
"
|
|
40
|
+
"yaml": "^2.4.2",
|
|
40
41
|
"zod": "^4.3.6"
|
|
41
42
|
},
|
|
42
43
|
"devDependencies": {
|
|
43
44
|
"@types/fs-extra": "^11.0.4",
|
|
44
45
|
"@types/node": "^25.3.0",
|
|
46
|
+
"@types/prompts": "^2.4.9",
|
|
45
47
|
"fast-check": "^4.6.0",
|
|
46
|
-
"tdd-guard-vitest": "^0.1.6",
|
|
47
48
|
"tsup": "^8.5.1",
|
|
48
49
|
"tsx": "^4.21.0",
|
|
49
50
|
"typescript": "^5.9.3",
|
|
@@ -51,5 +52,10 @@
|
|
|
51
52
|
},
|
|
52
53
|
"engines": {
|
|
53
54
|
"node": ">=20.0.0"
|
|
55
|
+
},
|
|
56
|
+
"pnpm": {
|
|
57
|
+
"overrides": {
|
|
58
|
+
"vite": "8.0.12"
|
|
59
|
+
}
|
|
54
60
|
}
|
|
55
61
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "xtrm-tools",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.19",
|
|
4
4
|
"description": "Claude Code tools installer (skills, hooks, MCP servers)",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -32,7 +32,10 @@
|
|
|
32
32
|
"!packages/pi-extensions/extensions/**/.pi/**",
|
|
33
33
|
"!.xtrm/extensions/**/.pi/**",
|
|
34
34
|
"!.xtrm/**/test_*.py",
|
|
35
|
-
"!.xtrm/**/tests/**"
|
|
35
|
+
"!.xtrm/**/tests/**",
|
|
36
|
+
"!.xtrm/skills/default/**/evals/**",
|
|
37
|
+
"!.xtrm/skills/default/**/workspace/iteration-*/**",
|
|
38
|
+
"!packages/*/.serena/**"
|
|
36
39
|
],
|
|
37
40
|
"publishConfig": {
|
|
38
41
|
"access": "public"
|
|
@@ -56,25 +59,37 @@
|
|
|
56
59
|
"start": "node cli/dist/index.cjs",
|
|
57
60
|
"lint": "echo 'No linting configured'",
|
|
58
61
|
"test": "npm test --workspace cli",
|
|
62
|
+
"check:package": "node scripts/assert-no-self-dependency.mjs",
|
|
59
63
|
"version": "npm run sync:cli-version && git add cli/package.json packages/pi-extensions/package.json",
|
|
60
|
-
"
|
|
64
|
+
"check:skills-ownership": "node scripts/check-skills-ownership.mjs",
|
|
65
|
+
"check:specialists-vendor": "node scripts/verify-specialists-vendor.mjs",
|
|
66
|
+
"check:asset-contract": "node scripts/verify-asset-contract.mjs",
|
|
67
|
+
"check:layout-guards": "node scripts/check-layout-guards.mjs",
|
|
68
|
+
"check:gitnexus-no-counter": "node scripts/check-gitnexus-no-counter.mjs",
|
|
69
|
+
"check:skills-symlinks": "node scripts/check-skills-symlinks.mjs",
|
|
70
|
+
"prepublishOnly": "npm run sync:cli-version && npm run check:package && npm run check:skills-ownership && node scripts/vendor-specialists-skills.mjs --specialists-package specialists --specialists-ref master && npm run gen-registry && npm run check:registry-pack-parity && npm run check:payload-hygiene && npm run check:specialists-vendor && npm run check:layout-guards && npm run check:gitnexus-no-counter && npm run check:skills-symlinks && npm run build",
|
|
61
71
|
"release": "npm publish --tag latest",
|
|
62
72
|
"release:pi-extensions": "npm publish --workspace @jaggerxtrm/pi-extensions --tag latest --access public",
|
|
63
73
|
"release:all": "npm run release && npm run release:pi-extensions",
|
|
64
|
-
"gen-registry": "node scripts/gen-registry.mjs"
|
|
74
|
+
"gen-registry": "node scripts/gen-registry.mjs",
|
|
75
|
+
"check:registry-pack-parity": "node scripts/check-registry-pack-parity.mjs",
|
|
76
|
+
"check:payload-hygiene": "node scripts/check-payload-hygiene.mjs"
|
|
65
77
|
},
|
|
66
78
|
"engines": {
|
|
67
79
|
"node": ">=20.0.0"
|
|
68
80
|
},
|
|
69
81
|
"dependencies": {
|
|
70
|
-
"@artale/pi-procs": "^1.1.0",
|
|
71
82
|
"comment-json": "^4.2.3",
|
|
72
83
|
"conf": "^15.1.0",
|
|
73
84
|
"dotenv": "^17.3.1",
|
|
74
85
|
"fs-extra": "^11.2.0",
|
|
75
86
|
"kleur": "^4.1.5",
|
|
76
|
-
"prompts": "^2.4.2"
|
|
77
|
-
|
|
87
|
+
"prompts": "^2.4.2"
|
|
88
|
+
},
|
|
89
|
+
"overrides": {
|
|
90
|
+
"fast-uri": "3.1.2",
|
|
91
|
+
"@aws-sdk/xml-builder": "3.972.22",
|
|
92
|
+
"vite": "8.0.12"
|
|
78
93
|
},
|
|
79
94
|
"pi": {
|
|
80
95
|
"extensions": [
|
|
@@ -88,5 +103,10 @@
|
|
|
88
103
|
"npm:@aliou/pi-guardrails",
|
|
89
104
|
"npm:@aliou/pi-processes"
|
|
90
105
|
]
|
|
106
|
+
},
|
|
107
|
+
"pnpm": {
|
|
108
|
+
"overrides": {
|
|
109
|
+
"vite": "8.0.12"
|
|
110
|
+
}
|
|
91
111
|
}
|
|
92
112
|
}
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"skill_name": "planning",
|
|
3
|
-
"evals": [
|
|
4
|
-
{
|
|
5
|
-
"id": 1,
|
|
6
|
-
"eval_name": "docs-list-command",
|
|
7
|
-
"prompt": "Plan the implementation of the `xtrm docs list` command (xtrm-vwp0). The command should list all project docs with metadata, support filtering, table output, and JSON mode. It needs to be a subcommand of the existing `xtrm docs` CLI group in cli/src/. There's already a partially-implemented docs.ts somewhere. Break this into a proper phased issue board.",
|
|
8
|
-
"expected_output": "An epic with phased child tasks (scaffold/core/integration), each with rich descriptions containing context, what to build, AC, and approach notes. test-planning invoked after issue board created. Dependencies wired between phases.",
|
|
9
|
-
"files": []
|
|
10
|
-
},
|
|
11
|
-
{
|
|
12
|
-
"id": 2,
|
|
13
|
-
"eval_name": "docs-crosscheck-command",
|
|
14
|
-
"prompt": "Plan the implementation of the `xtrm docs cross-check` command (xtrm-uc0e). This validates docs against PRs and bd issues — detects stale docs, coverage gaps, open issue references. Uses gh CLI for GitHub data. Needs to be a subcommand of `xtrm docs`. Break this into a well-structured bd issue board with proper phasing.",
|
|
15
|
-
"expected_output": "An epic with phased child tasks covering: GitHub data fetching (boundary layer), cross-check logic (core layer), CLI command wiring (shell layer). test-planning invoked. High-quality issue descriptions that another agent could work from independently.",
|
|
16
|
-
"files": []
|
|
17
|
-
}
|
|
18
|
-
]
|
|
19
|
-
}
|
|
@@ -1,181 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"skill_name": "using-quality-gates",
|
|
3
|
-
"evals": [
|
|
4
|
-
{
|
|
5
|
-
"id": 1,
|
|
6
|
-
"name": "typescript-feature-with-tests",
|
|
7
|
-
"prompt": "Add a new authentication middleware to src/auth.ts that validates JWT tokens. The middleware should check the Authorization header, verify the token, and attach the user to the request object.",
|
|
8
|
-
"expected_output": "Skill explains TDD workflow: write failing test first, then implement. References tdd-guard-vitest or tdd-guard-jest setup. After implementation, mentions TS quality gate will run ESLint/Prettier checks.",
|
|
9
|
-
"expectations": [
|
|
10
|
-
"Mentions writing a failing test before implementation",
|
|
11
|
-
"References TDD Guard blocking mechanism",
|
|
12
|
-
"Mentions TypeScript quality gate runs after edit",
|
|
13
|
-
"Provides actionable next steps"
|
|
14
|
-
]
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
"id": 2,
|
|
18
|
-
"name": "python-refactor-request",
|
|
19
|
-
"prompt": "Refactor the database connection pooling in db/connection.py to use async/await. Current implementation is blocking and causing performance issues.",
|
|
20
|
-
"expected_output": "Skill explains TDD + Python quality workflow: write failing test for async behavior, implement, then ruff/mypy will validate. Mentions auto-fix capabilities.",
|
|
21
|
-
"expectations": [
|
|
22
|
-
"Mentions writing tests first (TDD Guard)",
|
|
23
|
-
"References Python quality gate (ruff + mypy)",
|
|
24
|
-
"Mentions auto-fix for linting issues",
|
|
25
|
-
"Explains the post-edit validation flow"
|
|
26
|
-
]
|
|
27
|
-
},
|
|
28
|
-
{
|
|
29
|
-
"id": 3,
|
|
30
|
-
"name": "quality-gate-error-fix",
|
|
31
|
-
"prompt": "I'm getting blocked by the quality gate with TypeScript errors. Here's the error: 'Type string is not assignable to type number'. How do I fix this?",
|
|
32
|
-
"expected_output": "Skill explains quality gate error handling: read errors, apply auto-fix if available, manually fix type errors, gate re-runs automatically.",
|
|
33
|
-
"expectations": [
|
|
34
|
-
"Explains how to read quality gate errors",
|
|
35
|
-
"Mentions auto-fix capability",
|
|
36
|
-
"Explains manual fix process for type errors",
|
|
37
|
-
"Notes gate re-runs on next edit"
|
|
38
|
-
]
|
|
39
|
-
},
|
|
40
|
-
{
|
|
41
|
-
"id": 4,
|
|
42
|
-
"name": "partial-install-python-only",
|
|
43
|
-
"prompt": "I'm working on a Python-only project with pytest. What quality tools should I install?",
|
|
44
|
-
"expected_output": "Skill recommends Python-specific setup: tdd-guard-pytest for TDD, ruff + mypy for quality gate. Explains partial install workflow.",
|
|
45
|
-
"expectations": [
|
|
46
|
-
"Recommends tdd-guard-pytest",
|
|
47
|
-
"Recommends ruff and mypy",
|
|
48
|
-
"Explains Python-only workflow",
|
|
49
|
-
"Does not mention TypeScript tools"
|
|
50
|
-
]
|
|
51
|
-
},
|
|
52
|
-
{
|
|
53
|
-
"id": 5,
|
|
54
|
-
"name": "tdd-guard-blocking-confusion",
|
|
55
|
-
"prompt": "Why am I getting 'No failing test found' when I try to edit src/service.ts? I just want to add a logging statement.",
|
|
56
|
-
"expected_output": "Skill explains TDD Guard purpose: enforce test-first development. Even small changes require a failing test. Suggests writing a test that verifies the logging behavior.",
|
|
57
|
-
"expectations": [
|
|
58
|
-
"Explains TDD Guard blocks all implementation",
|
|
59
|
-
"Clarifies test-first requirement",
|
|
60
|
-
"Suggests writing appropriate test",
|
|
61
|
-
"Does not suggest bypassing the guard"
|
|
62
|
-
]
|
|
63
|
-
},
|
|
64
|
-
{
|
|
65
|
-
"id": 6,
|
|
66
|
-
"name": "eslint-not-found-error",
|
|
67
|
-
"prompt": "The quality gate says 'ESLint not found' but I'm editing a TypeScript file. What do I do?",
|
|
68
|
-
"expected_output": "Skill explains ESLint is required for TS quality gate. Provides install command (npm install --save-dev eslint) or how to disable in hook-config.json.",
|
|
69
|
-
"expectations": [
|
|
70
|
-
"Explains ESLint is required dependency",
|
|
71
|
-
"Provides npm install command",
|
|
72
|
-
"Mentions hook-config.json disable option",
|
|
73
|
-
"Clear troubleshooting steps"
|
|
74
|
-
]
|
|
75
|
-
},
|
|
76
|
-
{
|
|
77
|
-
"id": 7,
|
|
78
|
-
"name": "full-workflow-question",
|
|
79
|
-
"prompt": "Walk me through the complete workflow for adding a new feature to this TypeScript project.",
|
|
80
|
-
"expected_output": "Skill explains full pipeline: 1) Write failing test, 2) TDD Guard allows implementation, 3) Implement feature, 4) TS quality gate validates, 5) Fix any issues, 6) Commit.",
|
|
81
|
-
"expectations": [
|
|
82
|
-
"Lists all steps in order",
|
|
83
|
-
"Explains TDD Guard role",
|
|
84
|
-
"Explains quality gate role",
|
|
85
|
-
"Mentions auto-fix capabilities",
|
|
86
|
-
"Includes commit step"
|
|
87
|
-
]
|
|
88
|
-
},
|
|
89
|
-
{
|
|
90
|
-
"id": 8,
|
|
91
|
-
"name": "documentation-edit-exception",
|
|
92
|
-
"prompt": "I need to update the README.md with new API documentation. Will the quality gates block me?",
|
|
93
|
-
"expected_output": "Skill explains quality gates only apply to code files (.ts, .js, .py, etc.), not documentation. README edits proceed without TDD or linting checks.",
|
|
94
|
-
"expectations": [
|
|
95
|
-
"Clarifies documentation is exempt",
|
|
96
|
-
"Lists code file extensions that trigger gates",
|
|
97
|
-
"Explains why docs are exempt",
|
|
98
|
-
"No TDD requirement for docs"
|
|
99
|
-
]
|
|
100
|
-
},
|
|
101
|
-
{
|
|
102
|
-
"id": 9,
|
|
103
|
-
"name": "mypy-type-errors",
|
|
104
|
-
"prompt": "Mypy is reporting 15 type errors in my Python code. Should I fix all of them before continuing?",
|
|
105
|
-
"expected_output": "Skill explains exit code 2 means blocking - all errors must be fixed. Suggests using mypy's output to prioritize, fix incrementally, gate re-runs on each edit.",
|
|
106
|
-
"expectations": [
|
|
107
|
-
"Confirms all blocking errors must be fixed",
|
|
108
|
-
"Explains exit code 2 behavior",
|
|
109
|
-
"Suggests incremental fix approach",
|
|
110
|
-
"Notes gate re-runs automatically"
|
|
111
|
-
]
|
|
112
|
-
},
|
|
113
|
-
{
|
|
114
|
-
"id": 10,
|
|
115
|
-
"name": "vitest-reporter-setup",
|
|
116
|
-
"prompt": "How do I set up the Vitest test reporter for TDD Guard?",
|
|
117
|
-
"expected_output": "Skill explains tdd-guard-vitest installation and vitest.config.ts configuration with VitestReporter and project root path.",
|
|
118
|
-
"expectations": [
|
|
119
|
-
"Mentions tdd-guard-vitest package",
|
|
120
|
-
"Shows vitest.config.ts configuration",
|
|
121
|
-
"Explains project root path requirement",
|
|
122
|
-
"Clear setup steps"
|
|
123
|
-
]
|
|
124
|
-
},
|
|
125
|
-
{
|
|
126
|
-
"id": 11,
|
|
127
|
-
"name": "should-not-trigger-general-chat",
|
|
128
|
-
"prompt": "What's the weather like today?",
|
|
129
|
-
"expected_output": "Skill should NOT trigger - this is general chat, not a code quality workflow question.",
|
|
130
|
-
"expectations": [
|
|
131
|
-
"Skill does not activate",
|
|
132
|
-
"Standard Claude response"
|
|
133
|
-
]
|
|
134
|
-
},
|
|
135
|
-
{
|
|
136
|
-
"id": 12,
|
|
137
|
-
"name": "should-not-trigger-unrelated-coding",
|
|
138
|
-
"prompt": "Write a Python script to scrape data from example.com and save it to CSV.",
|
|
139
|
-
"expected_output": "Skill should NOT trigger strongly - this is a general coding task without quality gate context. May mention TDD as best practice but full skill not needed.",
|
|
140
|
-
"expectations": [
|
|
141
|
-
"Minimal or no skill activation",
|
|
142
|
-
"Focus on task completion",
|
|
143
|
-
"May mention testing as best practice"
|
|
144
|
-
]
|
|
145
|
-
},
|
|
146
|
-
{
|
|
147
|
-
"id": 13,
|
|
148
|
-
"name": "should-not-trigger-math-question",
|
|
149
|
-
"prompt": "What's the time complexity of binary search?",
|
|
150
|
-
"expected_output": "Skill should NOT trigger - this is a CS theory question, not about quality gates.",
|
|
151
|
-
"expectations": [
|
|
152
|
-
"Skill does not activate",
|
|
153
|
-
"Standard Claude response"
|
|
154
|
-
]
|
|
155
|
-
},
|
|
156
|
-
{
|
|
157
|
-
"id": 14,
|
|
158
|
-
"name": "edge-case-mixed-language-project",
|
|
159
|
-
"prompt": "I have a monorepo with both TypeScript backend and Python ML services. How do quality gates work?",
|
|
160
|
-
"expected_output": "Skill explains both gates can coexist: TS quality gate for backend files, PY quality gate for ML services. TDD Guard works with both via appropriate reporters.",
|
|
161
|
-
"expectations": [
|
|
162
|
-
"Explains coexistence of both gates",
|
|
163
|
-
"File-type-based routing",
|
|
164
|
-
"TDD Guard works with both",
|
|
165
|
-
"Separate reporters per language"
|
|
166
|
-
]
|
|
167
|
-
},
|
|
168
|
-
{
|
|
169
|
-
"id": 15,
|
|
170
|
-
"name": "edge-case-auto-fix-verification",
|
|
171
|
-
"prompt": "The quality gate said it auto-fixed 3 issues but I still have 2 errors. What happened?",
|
|
172
|
-
"expected_output": "Skill explains auto-fix handles fixable issues (formatting, simple lint), but type errors and complex issues require manual fixes. Shows how to identify remaining issues.",
|
|
173
|
-
"expectations": [
|
|
174
|
-
"Explains auto-fix limitations",
|
|
175
|
-
"Distinguishes fixable vs manual issues",
|
|
176
|
-
"Type errors require manual fix",
|
|
177
|
-
"How to read remaining errors"
|
|
178
|
-
]
|
|
179
|
-
}
|
|
180
|
-
]
|
|
181
|
-
}
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
# Using Quality Gates — Skill Creator Evals (Iteration 1) — COMPLETE
|
|
2
|
-
|
|
3
|
-
## All 10 Test Cases Evaluated
|
|
4
|
-
|
|
5
|
-
### Should-Trigger Eval Results
|
|
6
|
-
|
|
7
|
-
| ID | Name | Expectations Met | Notes |
|
|
8
|
-
|----|------|------------------|-------|
|
|
9
|
-
| 1 | typescript-feature-with-tests | ✅ 4/4 | Full TDD + TS workflow |
|
|
10
|
-
| 2 | python-refactor-request | ✅ 4/4 | Async refactor + PY gate |
|
|
11
|
-
| 3 | quality-gate-error-fix | ✅ 4/4 | Error handling explained |
|
|
12
|
-
| 4 | partial-install-python-only | ✅ 4/4 | Python-only, no TS mentions |
|
|
13
|
-
| 5 | tdd-guard-blocking-confusion | ✅ 4/4 | TDD philosophy explained |
|
|
14
|
-
| 14 | edge-case-mixed-language-project | ✅ 4/4 | Coexistence explained |
|
|
15
|
-
| 15 | edge-case-auto-fix-verification | ✅ 4/4 | Auto-fix limits clarified |
|
|
16
|
-
|
|
17
|
-
### Should-NOT-Trigger Eval Results
|
|
18
|
-
|
|
19
|
-
| ID | Name | Result | Notes |
|
|
20
|
-
|----|------|--------|-------|
|
|
21
|
-
| 11 | should-not-trigger-general-chat | ✅ Pass | Skill correctly silent |
|
|
22
|
-
| 12 | should-not-trigger-unrelated-coding | ✅ Pass | Minimal mode applied |
|
|
23
|
-
| 13 | should-not-trigger-math-question | ✅ Pass | Skill correctly silent |
|
|
24
|
-
|
|
25
|
-
---
|
|
26
|
-
|
|
27
|
-
## Overall Assessment
|
|
28
|
-
|
|
29
|
-
### Pass Rate: 10/10 (100%) ✅
|
|
30
|
-
|
|
31
|
-
**Iteration 1 Fix Applied:**
|
|
32
|
-
- Added "Response Modes" section to SKILL.md
|
|
33
|
-
- Full Workflow Mode for feature/refactor work
|
|
34
|
-
- Minimal Mode for general coding tasks
|
|
35
|
-
- Eval 12 re-run verified fix works
|
|
36
|
-
|
|
37
|
-
---
|
|
38
|
-
|
|
39
|
-
## Skill Files Created
|
|
40
|
-
|
|
41
|
-
```
|
|
42
|
-
project-skills/quality-gates/
|
|
43
|
-
├── README.md # User documentation
|
|
44
|
-
├── evals/
|
|
45
|
-
│ └── evals.json # 15 test cases defined
|
|
46
|
-
├── .claude/
|
|
47
|
-
│ └── skills/
|
|
48
|
-
│ └── using-quality-gates/
|
|
49
|
-
│ └── SKILL.md # Main skill file (with Response Modes)
|
|
50
|
-
└── workspace/
|
|
51
|
-
└── iteration-1/
|
|
52
|
-
├── [10 eval directories with outputs]
|
|
53
|
-
└── FINAL-EVAL-SUMMARY.md
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
---
|
|
57
|
-
|
|
58
|
-
## Ready to Ship
|
|
59
|
-
|
|
60
|
-
The skill is complete and passes all evals. Key features:
|
|
61
|
-
|
|
62
|
-
1. **Unified workflow** — TDD Guard + TS/PY Quality Gates in one skill
|
|
63
|
-
2. **Progressive disclosure** — Full vs Minimal mode based on context
|
|
64
|
-
3. **Language routing** — TypeScript vs Python handled correctly
|
|
65
|
-
4. **Partial install support** — Works with any combination of gates
|
|
66
|
-
5. **Troubleshooting** — Clear guidance for common errors
|
|
67
|
-
|
|
68
|
-
---
|
|
69
|
-
|
|
70
|
-
## Next Steps
|
|
71
|
-
|
|
72
|
-
1. ✅ Skill created and validated
|
|
73
|
-
2. ⏳ Test installation: `xtrm install project quality-gates`
|
|
74
|
-
3. ⏳ Update xtrm CLI to include quality-gates in project list
|
|
75
|
-
4. ⏳ Decide: Deprecate individual skills or keep as legacy options
|
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
# Eval 15: edge-case-auto-fix-verification
|
|
2
|
-
|
|
3
|
-
**Prompt:** The quality gate said it auto-fixed 3 issues but I still have 2 errors. What happened?
|
|
4
|
-
|
|
5
|
-
**Skill Applied:** using-quality-gates
|
|
6
|
-
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
## Response (with skill active)
|
|
10
|
-
|
|
11
|
-
This is expected behavior. Here's what happened:
|
|
12
|
-
|
|
13
|
-
### Auto-Fix Limitations
|
|
14
|
-
|
|
15
|
-
**Auto-fix handles:**
|
|
16
|
-
- ✅ Prettier formatting issues (indentation, quotes, semicolons)
|
|
17
|
-
- ✅ ESLint stylistic rules (unused vars, prefer const, etc.)
|
|
18
|
-
- ✅ Ruff linting issues (imports, style, simple fixes)
|
|
19
|
-
|
|
20
|
-
**Auto-fix CANNOT handle:**
|
|
21
|
-
- ❌ TypeScript type errors (wrong types, missing properties)
|
|
22
|
-
- ❌ Mypy type errors (type mismatches)
|
|
23
|
-
- ❌ Complex logic errors
|
|
24
|
-
- ❌ Missing imports or undefined variables
|
|
25
|
-
|
|
26
|
-
### Reading the Output
|
|
27
|
-
|
|
28
|
-
```
|
|
29
|
-
[WARN] Auto-fix applied: 3 issues fixed
|
|
30
|
-
[ERROR] TypeScript compilation failed:
|
|
31
|
-
src/user.ts:42:5 - error TS2322: Type 'string' is not assignable to type 'number'
|
|
32
|
-
[ERROR] ESLint found 1 issue that could not be auto-fixed:
|
|
33
|
-
src/user.ts:28:3 - Missing return type on function
|
|
34
|
-
[BLOCK] 2 issues remain - fix before continuing
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
### How to Fix Remaining Issues
|
|
38
|
-
|
|
39
|
-
1. **Type errors** — Add proper types or convert values:
|
|
40
|
-
```typescript
|
|
41
|
-
// Fix: convert string to number
|
|
42
|
-
const userId: number = Number(inputId);
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
2. **Missing return types** — Add explicit type annotations:
|
|
46
|
-
```typescript
|
|
47
|
-
// Fix: add return type
|
|
48
|
-
function getUser(id: number): User { ... }
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
3. **Save the file** — Quality Gate re-runs automatically on next edit
|
|
52
|
-
|
|
53
|
-
### Why This Design?
|
|
54
|
-
|
|
55
|
-
Auto-fix is conservative — it only applies fixes that are guaranteed safe. Type errors require human judgment to fix correctly.
|
|
56
|
-
|
|
57
|
-
---
|
|
58
|
-
|
|
59
|
-
**Result:** ✅ Explains auto-fix limitations, distinguishes fixable vs manual issues
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
# Eval 14: edge-case-mixed-language-project
|
|
2
|
-
|
|
3
|
-
**Prompt:** I have a monorepo with both TypeScript backend and Python ML services. How do quality gates work?
|
|
4
|
-
|
|
5
|
-
**Skill Applied:** using-quality-gates
|
|
6
|
-
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
## Response (with skill active)
|
|
10
|
-
|
|
11
|
-
In a **mixed-language monorepo**, Quality Gates work independently per file type:
|
|
12
|
-
|
|
13
|
-
### File-Type Routing
|
|
14
|
-
|
|
15
|
-
| File Extension | Active Gate |
|
|
16
|
-
|----------------|-------------|
|
|
17
|
-
| `.ts`, `.tsx`, `.js`, `.jsx` | TypeScript Quality Gate |
|
|
18
|
-
| `.py` | Python Quality Gate |
|
|
19
|
-
| `.go`, `.rs`, `.php` | TDD Guard only (no language-specific quality gate) |
|
|
20
|
-
| `.md`, `.txt`, `.json` | No gate (documentation/config) |
|
|
21
|
-
|
|
22
|
-
### TDD Guard (All Code Files)
|
|
23
|
-
|
|
24
|
-
TDD Guard applies to **all code files** regardless of language:
|
|
25
|
-
- TypeScript backend tests → `tdd-guard-jest` or `tdd-guard-vitest`
|
|
26
|
-
- Python ML tests → `tdd-guard-pytest`
|
|
27
|
-
- Both reporters can coexist in the same project
|
|
28
|
-
|
|
29
|
-
### Quality Gates (Language-Specific)
|
|
30
|
-
|
|
31
|
-
**TypeScript Backend** (`packages/backend/`):
|
|
32
|
-
```bash
|
|
33
|
-
npm install --save-dev typescript eslint prettier
|
|
34
|
-
# PostToolUse hook runs: tsc + eslint + prettier
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
**Python ML Services** (`packages/ml-service/`):
|
|
38
|
-
```bash
|
|
39
|
-
pip install ruff mypy
|
|
40
|
-
# PostToolUse hook runs: ruff check + ruff format + mypy
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
### Configuration
|
|
44
|
-
|
|
45
|
-
Each language has separate config:
|
|
46
|
-
- TS: `.claude/hooks/hook-config.json`
|
|
47
|
-
- Python: Environment variables (`CLAUDE_HOOKS_RUFF_ENABLED`, etc.)
|
|
48
|
-
|
|
49
|
-
### Your Workflow
|
|
50
|
-
|
|
51
|
-
```
|
|
52
|
-
Edit TypeScript file → TDD Guard check → TS Quality Gate
|
|
53
|
-
Edit Python file → TDD Guard check → PY Quality Gate
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
Both gates coexist peacefully — each only activates for its file types.
|
|
57
|
-
|
|
58
|
-
---
|
|
59
|
-
|
|
60
|
-
**Result:** ✅ Explains coexistence, file-type routing, separate reporters
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
# Using Quality Gates — Skill Creator Evals (Iteration 1)
|
|
2
|
-
|
|
3
|
-
## Test Cases Run: 5 of 15
|
|
4
|
-
|
|
5
|
-
### Eval Results Summary
|
|
6
|
-
|
|
7
|
-
| ID | Name | Status | Notes |
|
|
8
|
-
|----|------|--------|-------|
|
|
9
|
-
| 1 | typescript-feature-with-tests | ✅ Complete | Full TDD + TS quality gate workflow explained |
|
|
10
|
-
| 2 | python-refactor-request | ✅ Complete | Async refactor with Python quality gate |
|
|
11
|
-
| 3 | quality-gate-error-fix | ✅ Complete | Error handling and auto-fix explanation |
|
|
12
|
-
| 4 | partial-install-python-only | ✅ Complete | Python-only setup, no TS tools mentioned |
|
|
13
|
-
| 5 | tdd-guard-blocking-confusion | ✅ Complete | Explains TDD philosophy, provides options |
|
|
14
|
-
|
|
15
|
-
---
|
|
16
|
-
|
|
17
|
-
## Evaluation Criteria (from evals/evals.json)
|
|
18
|
-
|
|
19
|
-
### Eval 1: typescript-feature-with-tests
|
|
20
|
-
**Expectations:**
|
|
21
|
-
- [ ] Mentions writing a failing test before implementation
|
|
22
|
-
- [ ] References TDD Guard blocking mechanism
|
|
23
|
-
- [ ] Mentions TypeScript quality gate runs after edit
|
|
24
|
-
- [ ] Provides actionable next steps
|
|
25
|
-
|
|
26
|
-
**Result:** ✅ All expectations met
|
|
27
|
-
|
|
28
|
-
---
|
|
29
|
-
|
|
30
|
-
### Eval 2: python-refactor-request
|
|
31
|
-
**Expectations:**
|
|
32
|
-
- [ ] Mentions writing tests first (TDD Guard)
|
|
33
|
-
- [ ] References Python quality gate (ruff + mypy)
|
|
34
|
-
- [ ] Mentions auto-fix for linting issues
|
|
35
|
-
- [ ] Explains the post-edit validation flow
|
|
36
|
-
|
|
37
|
-
**Result:** ✅ All expectations met
|
|
38
|
-
|
|
39
|
-
---
|
|
40
|
-
|
|
41
|
-
### Eval 3: quality-gate-error-fix
|
|
42
|
-
**Expectations:**
|
|
43
|
-
- [ ] Explains how to read quality gate errors
|
|
44
|
-
- [ ] Mentions auto-fix capability
|
|
45
|
-
- [ ] Explains manual fix process for type errors
|
|
46
|
-
- [ ] Notes gate re-runs on next edit
|
|
47
|
-
|
|
48
|
-
**Result:** ✅ All expectations met
|
|
49
|
-
|
|
50
|
-
---
|
|
51
|
-
|
|
52
|
-
### Eval 4: partial-install-python-only
|
|
53
|
-
**Expectations:**
|
|
54
|
-
- [ ] Recommends tdd-guard-pytest
|
|
55
|
-
- [ ] Recommends ruff and mypy
|
|
56
|
-
- [ ] Explains Python-only workflow
|
|
57
|
-
- [ ] Does not mention TypeScript tools
|
|
58
|
-
|
|
59
|
-
**Result:** ✅ All expectations met
|
|
60
|
-
|
|
61
|
-
---
|
|
62
|
-
|
|
63
|
-
### Eval 5: tdd-guard-blocking-confusion
|
|
64
|
-
**Expectations:**
|
|
65
|
-
- [ ] Explains TDD Guard blocks all implementation
|
|
66
|
-
- [ ] Clarifies test-first requirement
|
|
67
|
-
- [ ] Suggests writing appropriate test
|
|
68
|
-
- [ ] Does not suggest bypassing the guard
|
|
69
|
-
|
|
70
|
-
**Result:** ✅ All expectations met
|
|
71
|
-
|
|
72
|
-
---
|
|
73
|
-
|
|
74
|
-
## Observations
|
|
75
|
-
|
|
76
|
-
### Strengths
|
|
77
|
-
1. **Consistent workflow explanation** — All responses follow the same TDD → implement → quality gate pattern
|
|
78
|
-
2. **Language-specific routing** — Python vs TypeScript handled correctly
|
|
79
|
-
3. **Actionable guidance** — Each response includes concrete commands and code examples
|
|
80
|
-
4. **Partial install handling** — Python-only response doesn't mention TS tools
|
|
81
|
-
|
|
82
|
-
### Potential Improvements
|
|
83
|
-
1. **Length** — Responses are detailed but could be overwhelming for simple questions
|
|
84
|
-
2. **Trigger specificity** — Skill might trigger on general coding questions (needs eval for should-not-trigger cases)
|
|
85
|
-
3. **Visual diagram** — The workflow diagram in SKILL.md is helpful but not referenced in responses
|
|
86
|
-
|
|
87
|
-
### Next Steps
|
|
88
|
-
1. Run should-not-trigger evals (11, 12, 13) to verify skill doesn't over-trigger
|
|
89
|
-
2. Run edge case evals (14, 15) for mixed-language and auto-fix scenarios
|
|
90
|
-
3. Based on feedback, potentially trim response length for simpler queries
|
|
91
|
-
4. Consider adding explicit "when NOT to use this skill" guidance
|
|
92
|
-
|
|
93
|
-
---
|
|
94
|
-
|
|
95
|
-
## Files Created
|
|
96
|
-
|
|
97
|
-
```
|
|
98
|
-
workspace/iteration-1/
|
|
99
|
-
├── typescript-feature-with-tests/with_skill/outputs/response.md
|
|
100
|
-
├── python-refactor-request/with_skill/outputs/response.md
|
|
101
|
-
├── quality-gate-error-fix/with_skill/outputs/response.md
|
|
102
|
-
├── partial-install-python-only/with_skill/outputs/response.md
|
|
103
|
-
├── tdd-guard-blocking-confusion/with_skill/outputs/response.md
|
|
104
|
-
└── eval-summary.md
|
|
105
|
-
```
|