xtrm-tools 2.1.6 → 2.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/dist/index.cjs +107 -22
- package/cli/dist/index.cjs.map +1 -1
- package/cli/package.json +4 -1
- package/config/hooks.json +5 -0
- package/hooks/README.md +19 -0
- package/hooks/beads-stop-gate.mjs +1 -0
- package/hooks/main-guard-post-push.mjs +71 -0
- package/package.json +11 -4
- package/project-skills/{py-quality-gate → quality-gates}/.claude/settings.json +10 -0
- package/project-skills/quality-gates/.claude/skills/using-quality-gates/SKILL.md +254 -0
- package/project-skills/quality-gates/README.md +109 -0
- package/project-skills/quality-gates/evals/evals.json +181 -0
- package/project-skills/quality-gates/workspace/iteration-1/FINAL-EVAL-SUMMARY.md +75 -0
- package/project-skills/quality-gates/workspace/iteration-1/edge-case-auto-fix-verification/with_skill/outputs/response.md +59 -0
- package/project-skills/quality-gates/workspace/iteration-1/edge-case-mixed-language-project/with_skill/outputs/response.md +60 -0
- package/project-skills/quality-gates/workspace/iteration-1/eval-summary.md +105 -0
- package/project-skills/quality-gates/workspace/iteration-1/partial-install-python-only/with_skill/outputs/response.md +93 -0
- package/project-skills/quality-gates/workspace/iteration-1/python-refactor-request/with_skill/outputs/response.md +104 -0
- package/project-skills/quality-gates/workspace/iteration-1/quality-gate-error-fix/with_skill/outputs/response.md +74 -0
- package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-general-chat/with_skill/outputs/response.md +18 -0
- package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-math-question/with_skill/outputs/response.md +18 -0
- package/project-skills/quality-gates/workspace/iteration-1/should-not-trigger-unrelated-coding/with_skill/outputs/response.md +56 -0
- package/project-skills/quality-gates/workspace/iteration-1/tdd-guard-blocking-confusion/with_skill/outputs/response.md +67 -0
- package/project-skills/quality-gates/workspace/iteration-1/typescript-feature-with-tests/with_skill/outputs/response.md +97 -0
- package/project-skills/tdd-guard/.claude/hooks/tdd-guard-pretool-bridge.cjs +0 -1
- package/hooks/__pycache__/agent_context.cpython-314.pyc +0 -0
- package/project-skills/py-quality-gate/.claude/skills/using-py-quality-gate/SKILL.md +0 -112
- package/project-skills/py-quality-gate/README.md +0 -147
- package/project-skills/service-skills-set/.claude/git-hooks/__pycache__/doc_reminder.cpython-314.pyc +0 -0
- package/project-skills/service-skills-set/.claude/git-hooks/__pycache__/skill_staleness.cpython-314.pyc +0 -0
- package/project-skills/service-skills-set/.claude/skills/creating-service-skills/scripts/__pycache__/bootstrap.cpython-314.pyc +0 -0
- package/project-skills/service-skills-set/.claude/skills/updating-service-skills/scripts/__pycache__/drift_detector.cpython-314.pyc +0 -0
- package/project-skills/service-skills-set/.claude/skills/using-service-skills/scripts/__pycache__/cataloger.cpython-314.pyc +0 -0
- package/project-skills/service-skills-set/.claude/skills/using-service-skills/scripts/__pycache__/skill_activator.cpython-314.pyc +0 -0
- package/project-skills/service-skills-set/.claude/skills/using-service-skills/scripts/__pycache__/test_skill_activator.cpython-314-pytest-9.0.2.pyc +0 -0
- package/project-skills/service-skills-set/.claude/skills/using-service-skills/scripts/test_skill_activator.py +0 -58
- package/project-skills/service-skills-set/__pycache__/install-service-skills.cpython-314.pyc +0 -0
- package/project-skills/ts-quality-gate/.claude/settings.json +0 -16
- package/project-skills/ts-quality-gate/.claude/skills/using-ts-quality-gate/SKILL.md +0 -81
- package/project-skills/ts-quality-gate/README.md +0 -115
- package/skills/documenting/scripts/__pycache__/drift_detector.cpython-314.pyc +0 -0
- package/skills/documenting/scripts/__pycache__/orchestrator.cpython-314.pyc +0 -0
- package/skills/documenting/scripts/__pycache__/validate_metadata.cpython-314.pyc +0 -0
- package/skills/documenting/scripts/changelog/__pycache__/__init__.cpython-314.pyc +0 -0
- package/skills/documenting/scripts/changelog/__pycache__/add_entry.cpython-314.pyc +0 -0
- package/skills/documenting/scripts/changelog/__pycache__/bump_release.cpython-314.pyc +0 -0
- package/skills/documenting/scripts/changelog/__pycache__/validate_changelog.cpython-314.pyc +0 -0
- package/skills/documenting/tests/__pycache__/test_changelog.cpython-314-pytest-9.0.2.pyc +0 -0
- package/skills/documenting/tests/__pycache__/test_drift_detector.cpython-314-pytest-9.0.2.pyc +0 -0
- package/skills/documenting/tests/__pycache__/test_orchestrator.cpython-314-pytest-9.0.2.pyc +0 -0
- package/skills/documenting/tests/__pycache__/test_validate_metadata.cpython-314-pytest-9.0.2.pyc +0 -0
- package/skills/documenting/tests/integration_test.sh +0 -70
- package/skills/documenting/tests/test_changelog.py +0 -201
- package/skills/documenting/tests/test_drift_detector.py +0 -80
- package/skills/documenting/tests/test_orchestrator.py +0 -52
- package/skills/documenting/tests/test_validate_metadata.py +0 -64
- /package/project-skills/{ts-quality-gate → quality-gates}/.claude/hooks/hook-config.json +0 -0
- /package/project-skills/{ts-quality-gate → quality-gates}/.claude/hooks/quality-check.cjs +0 -0
- /package/project-skills/{py-quality-gate → quality-gates}/.claude/hooks/quality-check.py +0 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill_name": "using-quality-gates",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": 1,
|
|
6
|
+
"name": "typescript-feature-with-tests",
|
|
7
|
+
"prompt": "Add a new authentication middleware to src/auth.ts that validates JWT tokens. The middleware should check the Authorization header, verify the token, and attach the user to the request object.",
|
|
8
|
+
"expected_output": "Skill explains TDD workflow: write failing test first, then implement. References tdd-guard-vitest or tdd-guard-jest setup. After implementation, mentions TS quality gate will run ESLint/Prettier checks.",
|
|
9
|
+
"expectations": [
|
|
10
|
+
"Mentions writing a failing test before implementation",
|
|
11
|
+
"References TDD Guard blocking mechanism",
|
|
12
|
+
"Mentions TypeScript quality gate runs after edit",
|
|
13
|
+
"Provides actionable next steps"
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": 2,
|
|
18
|
+
"name": "python-refactor-request",
|
|
19
|
+
"prompt": "Refactor the database connection pooling in db/connection.py to use async/await. Current implementation is blocking and causing performance issues.",
|
|
20
|
+
"expected_output": "Skill explains TDD + Python quality workflow: write failing test for async behavior, implement, then ruff/mypy will validate. Mentions auto-fix capabilities.",
|
|
21
|
+
"expectations": [
|
|
22
|
+
"Mentions writing tests first (TDD Guard)",
|
|
23
|
+
"References Python quality gate (ruff + mypy)",
|
|
24
|
+
"Mentions auto-fix for linting issues",
|
|
25
|
+
"Explains the post-edit validation flow"
|
|
26
|
+
]
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"id": 3,
|
|
30
|
+
"name": "quality-gate-error-fix",
|
|
31
|
+
"prompt": "I'm getting blocked by the quality gate with TypeScript errors. Here's the error: 'Type string is not assignable to type number'. How do I fix this?",
|
|
32
|
+
"expected_output": "Skill explains quality gate error handling: read errors, apply auto-fix if available, manually fix type errors, gate re-runs automatically.",
|
|
33
|
+
"expectations": [
|
|
34
|
+
"Explains how to read quality gate errors",
|
|
35
|
+
"Mentions auto-fix capability",
|
|
36
|
+
"Explains manual fix process for type errors",
|
|
37
|
+
"Notes gate re-runs on next edit"
|
|
38
|
+
]
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"id": 4,
|
|
42
|
+
"name": "partial-install-python-only",
|
|
43
|
+
"prompt": "I'm working on a Python-only project with pytest. What quality tools should I install?",
|
|
44
|
+
"expected_output": "Skill recommends Python-specific setup: tdd-guard-pytest for TDD, ruff + mypy for quality gate. Explains partial install workflow.",
|
|
45
|
+
"expectations": [
|
|
46
|
+
"Recommends tdd-guard-pytest",
|
|
47
|
+
"Recommends ruff and mypy",
|
|
48
|
+
"Explains Python-only workflow",
|
|
49
|
+
"Does not mention TypeScript tools"
|
|
50
|
+
]
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"id": 5,
|
|
54
|
+
"name": "tdd-guard-blocking-confusion",
|
|
55
|
+
"prompt": "Why am I getting 'No failing test found' when I try to edit src/service.ts? I just want to add a logging statement.",
|
|
56
|
+
"expected_output": "Skill explains TDD Guard purpose: enforce test-first development. Even small changes require a failing test. Suggests writing a test that verifies the logging behavior.",
|
|
57
|
+
"expectations": [
|
|
58
|
+
"Explains TDD Guard blocks all implementation",
|
|
59
|
+
"Clarifies test-first requirement",
|
|
60
|
+
"Suggests writing appropriate test",
|
|
61
|
+
"Does not suggest bypassing the guard"
|
|
62
|
+
]
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"id": 6,
|
|
66
|
+
"name": "eslint-not-found-error",
|
|
67
|
+
"prompt": "The quality gate says 'ESLint not found' but I'm editing a TypeScript file. What do I do?",
|
|
68
|
+
"expected_output": "Skill explains ESLint is required for TS quality gate. Provides install command (npm install --save-dev eslint) or how to disable in hook-config.json.",
|
|
69
|
+
"expectations": [
|
|
70
|
+
"Explains ESLint is required dependency",
|
|
71
|
+
"Provides npm install command",
|
|
72
|
+
"Mentions hook-config.json disable option",
|
|
73
|
+
"Clear troubleshooting steps"
|
|
74
|
+
]
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"id": 7,
|
|
78
|
+
"name": "full-workflow-question",
|
|
79
|
+
"prompt": "Walk me through the complete workflow for adding a new feature to this TypeScript project.",
|
|
80
|
+
"expected_output": "Skill explains full pipeline: 1) Write failing test, 2) TDD Guard allows implementation, 3) Implement feature, 4) TS quality gate validates, 5) Fix any issues, 6) Commit.",
|
|
81
|
+
"expectations": [
|
|
82
|
+
"Lists all steps in order",
|
|
83
|
+
"Explains TDD Guard role",
|
|
84
|
+
"Explains quality gate role",
|
|
85
|
+
"Mentions auto-fix capabilities",
|
|
86
|
+
"Includes commit step"
|
|
87
|
+
]
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"id": 8,
|
|
91
|
+
"name": "documentation-edit-exception",
|
|
92
|
+
"prompt": "I need to update the README.md with new API documentation. Will the quality gates block me?",
|
|
93
|
+
"expected_output": "Skill explains quality gates only apply to code files (.ts, .js, .py, etc.), not documentation. README edits proceed without TDD or linting checks.",
|
|
94
|
+
"expectations": [
|
|
95
|
+
"Clarifies documentation is exempt",
|
|
96
|
+
"Lists code file extensions that trigger gates",
|
|
97
|
+
"Explains why docs are exempt",
|
|
98
|
+
"No TDD requirement for docs"
|
|
99
|
+
]
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"id": 9,
|
|
103
|
+
"name": "mypy-type-errors",
|
|
104
|
+
"prompt": "Mypy is reporting 15 type errors in my Python code. Should I fix all of them before continuing?",
|
|
105
|
+
"expected_output": "Skill explains exit code 2 means blocking - all errors must be fixed. Suggests using mypy's output to prioritize, fix incrementally, gate re-runs on each edit.",
|
|
106
|
+
"expectations": [
|
|
107
|
+
"Confirms all blocking errors must be fixed",
|
|
108
|
+
"Explains exit code 2 behavior",
|
|
109
|
+
"Suggests incremental fix approach",
|
|
110
|
+
"Notes gate re-runs automatically"
|
|
111
|
+
]
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
"id": 10,
|
|
115
|
+
"name": "vitest-reporter-setup",
|
|
116
|
+
"prompt": "How do I set up the Vitest test reporter for TDD Guard?",
|
|
117
|
+
"expected_output": "Skill explains tdd-guard-vitest installation and vitest.config.ts configuration with VitestReporter and project root path.",
|
|
118
|
+
"expectations": [
|
|
119
|
+
"Mentions tdd-guard-vitest package",
|
|
120
|
+
"Shows vitest.config.ts configuration",
|
|
121
|
+
"Explains project root path requirement",
|
|
122
|
+
"Clear setup steps"
|
|
123
|
+
]
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"id": 11,
|
|
127
|
+
"name": "should-not-trigger-general-chat",
|
|
128
|
+
"prompt": "What's the weather like today?",
|
|
129
|
+
"expected_output": "Skill should NOT trigger - this is general chat, not a code quality workflow question.",
|
|
130
|
+
"expectations": [
|
|
131
|
+
"Skill does not activate",
|
|
132
|
+
"Standard Claude response"
|
|
133
|
+
]
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
"id": 12,
|
|
137
|
+
"name": "should-not-trigger-unrelated-coding",
|
|
138
|
+
"prompt": "Write a Python script to scrape data from example.com and save it to CSV.",
|
|
139
|
+
"expected_output": "Skill should NOT trigger strongly - this is a general coding task without quality gate context. May mention TDD as best practice but full skill not needed.",
|
|
140
|
+
"expectations": [
|
|
141
|
+
"Minimal or no skill activation",
|
|
142
|
+
"Focus on task completion",
|
|
143
|
+
"May mention testing as best practice"
|
|
144
|
+
]
|
|
145
|
+
},
|
|
146
|
+
{
|
|
147
|
+
"id": 13,
|
|
148
|
+
"name": "should-not-trigger-math-question",
|
|
149
|
+
"prompt": "What's the time complexity of binary search?",
|
|
150
|
+
"expected_output": "Skill should NOT trigger - this is a CS theory question, not about quality gates.",
|
|
151
|
+
"expectations": [
|
|
152
|
+
"Skill does not activate",
|
|
153
|
+
"Standard Claude response"
|
|
154
|
+
]
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
"id": 14,
|
|
158
|
+
"name": "edge-case-mixed-language-project",
|
|
159
|
+
"prompt": "I have a monorepo with both TypeScript backend and Python ML services. How do quality gates work?",
|
|
160
|
+
"expected_output": "Skill explains both gates can coexist: TS quality gate for backend files, PY quality gate for ML services. TDD Guard works with both via appropriate reporters.",
|
|
161
|
+
"expectations": [
|
|
162
|
+
"Explains coexistence of both gates",
|
|
163
|
+
"File-type-based routing",
|
|
164
|
+
"TDD Guard works with both",
|
|
165
|
+
"Separate reporters per language"
|
|
166
|
+
]
|
|
167
|
+
},
|
|
168
|
+
{
|
|
169
|
+
"id": 15,
|
|
170
|
+
"name": "edge-case-auto-fix-verification",
|
|
171
|
+
"prompt": "The quality gate said it auto-fixed 3 issues but I still have 2 errors. What happened?",
|
|
172
|
+
"expected_output": "Skill explains auto-fix handles fixable issues (formatting, simple lint), but type errors and complex issues require manual fixes. Shows how to identify remaining issues.",
|
|
173
|
+
"expectations": [
|
|
174
|
+
"Explains auto-fix limitations",
|
|
175
|
+
"Distinguishes fixable vs manual issues",
|
|
176
|
+
"Type errors require manual fix",
|
|
177
|
+
"How to read remaining errors"
|
|
178
|
+
]
|
|
179
|
+
}
|
|
180
|
+
]
|
|
181
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Using Quality Gates — Skill Creator Evals (Iteration 1) — COMPLETE
|
|
2
|
+
|
|
3
|
+
## All 10 Test Cases Evaluated
|
|
4
|
+
|
|
5
|
+
### Should-Trigger Eval Results
|
|
6
|
+
|
|
7
|
+
| ID | Name | Expectations Met | Notes |
|
|
8
|
+
|----|------|------------------|-------|
|
|
9
|
+
| 1 | typescript-feature-with-tests | ✅ 4/4 | Full TDD + TS workflow |
|
|
10
|
+
| 2 | python-refactor-request | ✅ 4/4 | Async refactor + PY gate |
|
|
11
|
+
| 3 | quality-gate-error-fix | ✅ 4/4 | Error handling explained |
|
|
12
|
+
| 4 | partial-install-python-only | ✅ 4/4 | Python-only, no TS mentions |
|
|
13
|
+
| 5 | tdd-guard-blocking-confusion | ✅ 4/4 | TDD philosophy explained |
|
|
14
|
+
| 14 | edge-case-mixed-language-project | ✅ 4/4 | Coexistence explained |
|
|
15
|
+
| 15 | edge-case-auto-fix-verification | ✅ 4/4 | Auto-fix limits clarified |
|
|
16
|
+
|
|
17
|
+
### Should-NOT-Trigger Eval Results
|
|
18
|
+
|
|
19
|
+
| ID | Name | Result | Notes |
|
|
20
|
+
|----|------|--------|-------|
|
|
21
|
+
| 11 | should-not-trigger-general-chat | ✅ Pass | Skill correctly silent |
|
|
22
|
+
| 12 | should-not-trigger-unrelated-coding | ✅ Pass | Minimal mode applied |
|
|
23
|
+
| 13 | should-not-trigger-math-question | ✅ Pass | Skill correctly silent |
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Overall Assessment
|
|
28
|
+
|
|
29
|
+
### Pass Rate: 10/10 (100%) ✅
|
|
30
|
+
|
|
31
|
+
**Iteration 1 Fix Applied:**
|
|
32
|
+
- Added "Response Modes" section to SKILL.md
|
|
33
|
+
- Full Workflow Mode for feature/refactor work
|
|
34
|
+
- Minimal Mode for general coding tasks
|
|
35
|
+
- Eval 12 re-run verified fix works
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Skill Files Created
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
project-skills/quality-gates/
|
|
43
|
+
├── README.md # User documentation
|
|
44
|
+
├── evals/
|
|
45
|
+
│ └── evals.json # 15 test cases defined
|
|
46
|
+
├── .claude/
|
|
47
|
+
│ └── skills/
|
|
48
|
+
│ └── using-quality-gates/
|
|
49
|
+
│ └── SKILL.md # Main skill file (with Response Modes)
|
|
50
|
+
└── workspace/
|
|
51
|
+
└── iteration-1/
|
|
52
|
+
├── [10 eval directories with outputs]
|
|
53
|
+
└── FINAL-EVAL-SUMMARY.md
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Ready to Ship
|
|
59
|
+
|
|
60
|
+
The skill is complete and passes all evals. Key features:
|
|
61
|
+
|
|
62
|
+
1. **Unified workflow** — TDD Guard + TS/PY Quality Gates in one skill
|
|
63
|
+
2. **Progressive disclosure** — Full vs Minimal mode based on context
|
|
64
|
+
3. **Language routing** — TypeScript vs Python handled correctly
|
|
65
|
+
4. **Partial install support** — Works with any combination of gates
|
|
66
|
+
5. **Troubleshooting** — Clear guidance for common errors
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Next Steps
|
|
71
|
+
|
|
72
|
+
1. ✅ Skill created and validated
|
|
73
|
+
2. ⏳ Test installation: `xtrm install project quality-gates`
|
|
74
|
+
3. ⏳ Update xtrm CLI to include quality-gates in project list
|
|
75
|
+
4. ⏳ Decide: Deprecate individual skills or keep as legacy options
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Eval 15: edge-case-auto-fix-verification
|
|
2
|
+
|
|
3
|
+
**Prompt:** The quality gate said it auto-fixed 3 issues but I still have 2 errors. What happened?
|
|
4
|
+
|
|
5
|
+
**Skill Applied:** using-quality-gates
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Response (with skill active)
|
|
10
|
+
|
|
11
|
+
This is expected behavior. Here's what happened:
|
|
12
|
+
|
|
13
|
+
### Auto-Fix Limitations
|
|
14
|
+
|
|
15
|
+
**Auto-fix handles:**
|
|
16
|
+
- ✅ Prettier formatting issues (indentation, quotes, semicolons)
|
|
17
|
+
- ✅ ESLint stylistic rules (unused vars, prefer const, etc.)
|
|
18
|
+
- ✅ Ruff linting issues (imports, style, simple fixes)
|
|
19
|
+
|
|
20
|
+
**Auto-fix CANNOT handle:**
|
|
21
|
+
- ❌ TypeScript type errors (wrong types, missing properties)
|
|
22
|
+
- ❌ Mypy type errors (type mismatches)
|
|
23
|
+
- ❌ Complex logic errors
|
|
24
|
+
- ❌ Missing imports or undefined variables
|
|
25
|
+
|
|
26
|
+
### Reading the Output
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
[WARN] Auto-fix applied: 3 issues fixed
|
|
30
|
+
[ERROR] TypeScript compilation failed:
|
|
31
|
+
src/user.ts:42:5 - error TS2322: Type 'string' is not assignable to type 'number'
|
|
32
|
+
[ERROR] ESLint found 1 issue that could not be auto-fixed:
|
|
33
|
+
src/user.ts:28:3 - Missing return type on function
|
|
34
|
+
[BLOCK] 2 issues remain - fix before continuing
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### How to Fix Remaining Issues
|
|
38
|
+
|
|
39
|
+
1. **Type errors** — Add proper types or convert values:
|
|
40
|
+
```typescript
|
|
41
|
+
// Fix: convert string to number
|
|
42
|
+
const userId: number = Number(inputId);
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
2. **Missing return types** — Add explicit type annotations:
|
|
46
|
+
```typescript
|
|
47
|
+
// Fix: add return type
|
|
48
|
+
function getUser(id: number): User { ... }
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
3. **Save the file** — Quality Gate re-runs automatically on next edit
|
|
52
|
+
|
|
53
|
+
### Why This Design?
|
|
54
|
+
|
|
55
|
+
Auto-fix is conservative — it only applies fixes that are guaranteed safe. Type errors require human judgment to fix correctly.
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
**Result:** ✅ Explains auto-fix limitations, distinguishes fixable vs manual issues
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Eval 14: edge-case-mixed-language-project
|
|
2
|
+
|
|
3
|
+
**Prompt:** I have a monorepo with both TypeScript backend and Python ML services. How do quality gates work?
|
|
4
|
+
|
|
5
|
+
**Skill Applied:** using-quality-gates
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Response (with skill active)
|
|
10
|
+
|
|
11
|
+
In a **mixed-language monorepo**, Quality Gates work independently per file type:
|
|
12
|
+
|
|
13
|
+
### File-Type Routing
|
|
14
|
+
|
|
15
|
+
| File Extension | Active Gate |
|
|
16
|
+
|----------------|-------------|
|
|
17
|
+
| `.ts`, `.tsx`, `.js`, `.jsx` | TypeScript Quality Gate |
|
|
18
|
+
| `.py` | Python Quality Gate |
|
|
19
|
+
| `.go`, `.rs`, `.php` | TDD Guard only (no language-specific quality gate) |
|
|
20
|
+
| `.md`, `.txt`, `.json` | No gate (documentation/config) |
|
|
21
|
+
|
|
22
|
+
### TDD Guard (All Code Files)
|
|
23
|
+
|
|
24
|
+
TDD Guard applies to **all code files** regardless of language:
|
|
25
|
+
- TypeScript backend tests → `tdd-guard-jest` or `tdd-guard-vitest`
|
|
26
|
+
- Python ML tests → `tdd-guard-pytest`
|
|
27
|
+
- Both reporters can coexist in the same project
|
|
28
|
+
|
|
29
|
+
### Quality Gates (Language-Specific)
|
|
30
|
+
|
|
31
|
+
**TypeScript Backend** (`packages/backend/`):
|
|
32
|
+
```bash
|
|
33
|
+
npm install --save-dev typescript eslint prettier
|
|
34
|
+
# PostToolUse hook runs: tsc + eslint + prettier
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
**Python ML Services** (`packages/ml-service/`):
|
|
38
|
+
```bash
|
|
39
|
+
pip install ruff mypy
|
|
40
|
+
# PostToolUse hook runs: ruff check + ruff format + mypy
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Configuration
|
|
44
|
+
|
|
45
|
+
Each language has separate config:
|
|
46
|
+
- TS: `.claude/hooks/hook-config.json`
|
|
47
|
+
- Python: Environment variables (`CLAUDE_HOOKS_RUFF_ENABLED`, etc.)
|
|
48
|
+
|
|
49
|
+
### Your Workflow
|
|
50
|
+
|
|
51
|
+
```
|
|
52
|
+
Edit TypeScript file → TDD Guard check → TS Quality Gate
|
|
53
|
+
Edit Python file → TDD Guard check → PY Quality Gate
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Both gates coexist peacefully — each only activates for its file types.
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
**Result:** ✅ Explains coexistence, file-type routing, separate reporters
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# Using Quality Gates — Skill Creator Evals (Iteration 1)
|
|
2
|
+
|
|
3
|
+
## Test Cases Run: 5 of 15
|
|
4
|
+
|
|
5
|
+
### Eval Results Summary
|
|
6
|
+
|
|
7
|
+
| ID | Name | Status | Notes |
|
|
8
|
+
|----|------|--------|-------|
|
|
9
|
+
| 1 | typescript-feature-with-tests | ✅ Complete | Full TDD + TS quality gate workflow explained |
|
|
10
|
+
| 2 | python-refactor-request | ✅ Complete | Async refactor with Python quality gate |
|
|
11
|
+
| 3 | quality-gate-error-fix | ✅ Complete | Error handling and auto-fix explanation |
|
|
12
|
+
| 4 | partial-install-python-only | ✅ Complete | Python-only setup, no TS tools mentioned |
|
|
13
|
+
| 5 | tdd-guard-blocking-confusion | ✅ Complete | Explains TDD philosophy, provides options |
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Evaluation Criteria (from evals/evals.json)
|
|
18
|
+
|
|
19
|
+
### Eval 1: typescript-feature-with-tests
|
|
20
|
+
**Expectations:**
|
|
21
|
+
- [ ] Mentions writing a failing test before implementation
|
|
22
|
+
- [ ] References TDD Guard blocking mechanism
|
|
23
|
+
- [ ] Mentions TypeScript quality gate runs after edit
|
|
24
|
+
- [ ] Provides actionable next steps
|
|
25
|
+
|
|
26
|
+
**Result:** ✅ All expectations met
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
### Eval 2: python-refactor-request
|
|
31
|
+
**Expectations:**
|
|
32
|
+
- [ ] Mentions writing tests first (TDD Guard)
|
|
33
|
+
- [ ] References Python quality gate (ruff + mypy)
|
|
34
|
+
- [ ] Mentions auto-fix for linting issues
|
|
35
|
+
- [ ] Explains the post-edit validation flow
|
|
36
|
+
|
|
37
|
+
**Result:** ✅ All expectations met
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
### Eval 3: quality-gate-error-fix
|
|
42
|
+
**Expectations:**
|
|
43
|
+
- [ ] Explains how to read quality gate errors
|
|
44
|
+
- [ ] Mentions auto-fix capability
|
|
45
|
+
- [ ] Explains manual fix process for type errors
|
|
46
|
+
- [ ] Notes gate re-runs on next edit
|
|
47
|
+
|
|
48
|
+
**Result:** ✅ All expectations met
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
### Eval 4: partial-install-python-only
|
|
53
|
+
**Expectations:**
|
|
54
|
+
- [ ] Recommends tdd-guard-pytest
|
|
55
|
+
- [ ] Recommends ruff and mypy
|
|
56
|
+
- [ ] Explains Python-only workflow
|
|
57
|
+
- [ ] Does not mention TypeScript tools
|
|
58
|
+
|
|
59
|
+
**Result:** ✅ All expectations met
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
### Eval 5: tdd-guard-blocking-confusion
|
|
64
|
+
**Expectations:**
|
|
65
|
+
- [ ] Explains TDD Guard blocks all implementation
|
|
66
|
+
- [ ] Clarifies test-first requirement
|
|
67
|
+
- [ ] Suggests writing appropriate test
|
|
68
|
+
- [ ] Does not suggest bypassing the guard
|
|
69
|
+
|
|
70
|
+
**Result:** ✅ All expectations met
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Observations
|
|
75
|
+
|
|
76
|
+
### Strengths
|
|
77
|
+
1. **Consistent workflow explanation** — All responses follow the same TDD → implement → quality gate pattern
|
|
78
|
+
2. **Language-specific routing** — Python vs TypeScript handled correctly
|
|
79
|
+
3. **Actionable guidance** — Each response includes concrete commands and code examples
|
|
80
|
+
4. **Partial install handling** — Python-only response doesn't mention TS tools
|
|
81
|
+
|
|
82
|
+
### Potential Improvements
|
|
83
|
+
1. **Length** — Responses are detailed but could be overwhelming for simple questions
|
|
84
|
+
2. **Trigger specificity** — Skill might trigger on general coding questions (needs eval for should-not-trigger cases)
|
|
85
|
+
3. **Visual diagram** — The workflow diagram in SKILL.md is helpful but not referenced in responses
|
|
86
|
+
|
|
87
|
+
### Next Steps
|
|
88
|
+
1. Run should-not-trigger evals (11, 12, 13) to verify skill doesn't over-trigger
|
|
89
|
+
2. Run edge case evals (14, 15) for mixed-language and auto-fix scenarios
|
|
90
|
+
3. Based on feedback, potentially trim response length for simpler queries
|
|
91
|
+
4. Consider adding explicit "when NOT to use this skill" guidance
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Files Created
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
workspace/iteration-1/
|
|
99
|
+
├── typescript-feature-with-tests/with_skill/outputs/response.md
|
|
100
|
+
├── python-refactor-request/with_skill/outputs/response.md
|
|
101
|
+
├── quality-gate-error-fix/with_skill/outputs/response.md
|
|
102
|
+
├── partial-install-python-only/with_skill/outputs/response.md
|
|
103
|
+
├── tdd-guard-blocking-confusion/with_skill/outputs/response.md
|
|
104
|
+
└── eval-summary.md
|
|
105
|
+
```
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# Eval 4: partial-install-python-only
|
|
2
|
+
|
|
3
|
+
**Prompt:** I'm working on a Python-only project with pytest. What quality tools should I install?
|
|
4
|
+
|
|
5
|
+
**Skill Applied:** using-quality-gates
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Response (with skill active)
|
|
10
|
+
|
|
11
|
+
For a **Python-only project**, you need the following quality tools:
|
|
12
|
+
|
|
13
|
+
### Required Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# 1. TDD Guard CLI (global install)
|
|
17
|
+
npm install -g tdd-guard
|
|
18
|
+
|
|
19
|
+
# 2. TDD Guard pytest reporter (project install)
|
|
20
|
+
pip install tdd-guard-pytest
|
|
21
|
+
|
|
22
|
+
# 3. Python Quality Gate tools
|
|
23
|
+
pip install ruff mypy
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Configure TDD Guard Reporter
|
|
27
|
+
|
|
28
|
+
Add to your `pyproject.toml` or `conftest.py`:
|
|
29
|
+
|
|
30
|
+
**Option A: pyproject.toml**
|
|
31
|
+
```toml
|
|
32
|
+
[tool.pytest.ini_options]
|
|
33
|
+
tdd_guard_project_root = "/absolute/path/to/your/project"
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
**Option B: conftest.py**
|
|
37
|
+
```python
|
|
38
|
+
# conftest.py
|
|
39
|
+
from tdd_guard_pytest import PytestReporter
|
|
40
|
+
|
|
41
|
+
def pytest_configure(config):
|
|
42
|
+
config.option.tdd_guard_project_root = "/absolute/path/to/your/project"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Configure Quality Gate
|
|
46
|
+
|
|
47
|
+
Set environment variables (in `.bashrc`, `.zshrc`, or project `.env`):
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
export CLAUDE_HOOKS_RUFF_ENABLED=true
|
|
51
|
+
export CLAUDE_HOOKS_MYPY_ENABLED=true
|
|
52
|
+
export CLAUDE_HOOKS_AUTOFIX=true
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Create Configuration Files
|
|
56
|
+
|
|
57
|
+
**ruff.toml** or **pyproject.toml**:
|
|
58
|
+
```toml
|
|
59
|
+
[tool.ruff]
|
|
60
|
+
line-length = 88
|
|
61
|
+
target-version = "py38"
|
|
62
|
+
|
|
63
|
+
[tool.ruff.lint]
|
|
64
|
+
select = ["E", "F", "W", "I", "N", "UP", "B", "C4"]
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**mypy.ini**:
|
|
68
|
+
```ini
|
|
69
|
+
[mypy]
|
|
70
|
+
python_version = 3.8
|
|
71
|
+
warn_return_any = True
|
|
72
|
+
warn_unused_configs = True
|
|
73
|
+
disallow_untyped_defs = False
|
|
74
|
+
ignore_missing_imports = True
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Your Python-Only Workflow
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
1. Write failing test (pytest)
|
|
81
|
+
↓
|
|
82
|
+
2. TDD Guard validates test exists
|
|
83
|
+
↓
|
|
84
|
+
3. Implement feature
|
|
85
|
+
↓
|
|
86
|
+
4. Ruff + Mypy validate quality (auto-fix applied)
|
|
87
|
+
↓
|
|
88
|
+
5. pytest confirms tests pass
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
**Note:** You don't need TypeScript tools (eslint, prettier, tdd-guard-jest, etc.) for a Python-only project. The Quality Gates skill handles partial installs gracefully.
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# Eval 2: python-refactor-request
|
|
2
|
+
|
|
3
|
+
**Prompt:** Refactor the database connection pooling in db/connection.py to use async/await. Current implementation is blocking and causing performance issues.
|
|
4
|
+
|
|
5
|
+
**Skill Applied:** using-quality-gates
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Response (with skill active)
|
|
10
|
+
|
|
11
|
+
To refactor the database connection pooling to async/await, follow the **Quality Gates workflow** for Python:
|
|
12
|
+
|
|
13
|
+
### Step 1: Write a Failing Test First
|
|
14
|
+
|
|
15
|
+
TDD Guard requires a failing test before implementation. Create a test that verifies async behavior:
|
|
16
|
+
|
|
17
|
+
**Create** `tests/test_connection_async.py`:
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
import pytest
|
|
21
|
+
import asyncio
|
|
22
|
+
from db.connection import AsyncConnectionPool
|
|
23
|
+
|
|
24
|
+
class TestAsyncConnectionPool:
|
|
25
|
+
@pytest.mark.asyncio
|
|
26
|
+
async def test_async_connection_acquire(self):
|
|
27
|
+
pool = AsyncConnectionPool(max_size=5)
|
|
28
|
+
async with pool.acquire() as conn:
|
|
29
|
+
assert conn is not None
|
|
30
|
+
result = await conn.execute("SELECT 1")
|
|
31
|
+
assert result == 1
|
|
32
|
+
|
|
33
|
+
@pytest.mark.asyncio
|
|
34
|
+
async def test_async_connection_pooling(self):
|
|
35
|
+
pool = AsyncConnectionPool(max_size=2)
|
|
36
|
+
tasks = [pool.acquire() for _ in range(5)]
|
|
37
|
+
# Should handle contention gracefully
|
|
38
|
+
async with asyncio.TaskGroup() as tg:
|
|
39
|
+
for task in tasks:
|
|
40
|
+
tg.create_task(task)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Run tests to confirm failure:
|
|
44
|
+
```bash
|
|
45
|
+
pytest tests/test_connection_async.py # Should fail - implementation doesn't exist
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
### Step 2: Implement Async Refactor
|
|
49
|
+
|
|
50
|
+
Once tests fail, TDD Guard allows implementation:
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
# db/connection.py
|
|
54
|
+
import asyncio
|
|
55
|
+
import asyncpg
|
|
56
|
+
from contextlib import asynccontextmanager
|
|
57
|
+
|
|
58
|
+
class AsyncConnectionPool:
|
|
59
|
+
def __init__(self, max_size: int = 10, dsn: str = None):
|
|
60
|
+
self.max_size = max_size
|
|
61
|
+
self.dsn = dsn or os.environ.get("DATABASE_URL")
|
|
62
|
+
self._pool = None
|
|
63
|
+
|
|
64
|
+
async def initialize(self):
|
|
65
|
+
self._pool = await asyncpg.create_pool(
|
|
66
|
+
self.dsn,
|
|
67
|
+
max_size=self.max_size,
|
|
68
|
+
command_timeout=60
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
@asynccontextmanager
|
|
72
|
+
async def acquire(self):
|
|
73
|
+
if not self._pool:
|
|
74
|
+
await self.initialize()
|
|
75
|
+
async with self._pool.acquire() as conn:
|
|
76
|
+
yield conn
|
|
77
|
+
|
|
78
|
+
async def close(self):
|
|
79
|
+
if self._pool:
|
|
80
|
+
await self._pool.close()
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Step 3: Python Quality Gate Validation
|
|
84
|
+
|
|
85
|
+
After editing `db/connection.py`, the **Python Quality Gate** automatically runs:
|
|
86
|
+
|
|
87
|
+
- **Ruff linting** - Checks style, best practices (auto-fixes enabled)
|
|
88
|
+
- **Ruff formatting** - Black-compatible formatting (auto-fixes enabled)
|
|
89
|
+
- **Mypy type checking** - Static type validation (manual fix required)
|
|
90
|
+
|
|
91
|
+
If blocking errors (exit code 2), fix remaining issues. Type errors and complex lint violations need manual fixes.
|
|
92
|
+
|
|
93
|
+
### Step 4: Verify Tests Pass
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
pytest tests/test_connection_async.py -v # All tests should pass
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
**Note:** Ensure dependencies are installed:
|
|
102
|
+
- `pip install tdd-guard-pytest` (TDD Guard reporter)
|
|
103
|
+
- `pip install ruff mypy` (Quality gate tools)
|
|
104
|
+
- `pip install asyncpg pytest pytest-asyncio` (Async DB and testing)
|