skill-lab 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skill_lab-0.1.0/LICENSE +21 -0
- skill_lab-0.1.0/PKG-INFO +257 -0
- skill_lab-0.1.0/README.md +220 -0
- skill_lab-0.1.0/pyproject.toml +128 -0
- skill_lab-0.1.0/setup.cfg +4 -0
- skill_lab-0.1.0/src/skill_lab/__init__.py +8 -0
- skill_lab-0.1.0/src/skill_lab/__main__.py +6 -0
- skill_lab-0.1.0/src/skill_lab/checks/__init__.py +5 -0
- skill_lab-0.1.0/src/skill_lab/checks/base.py +110 -0
- skill_lab-0.1.0/src/skill_lab/checks/static/__init__.py +5 -0
- skill_lab-0.1.0/src/skill_lab/checks/static/content.py +154 -0
- skill_lab-0.1.0/src/skill_lab/checks/static/description.py +146 -0
- skill_lab-0.1.0/src/skill_lab/checks/static/frontmatter.py +166 -0
- skill_lab-0.1.0/src/skill_lab/checks/static/naming.py +126 -0
- skill_lab-0.1.0/src/skill_lab/checks/static/structure.py +173 -0
- skill_lab-0.1.0/src/skill_lab/cli.py +503 -0
- skill_lab-0.1.0/src/skill_lab/core/__init__.py +41 -0
- skill_lab-0.1.0/src/skill_lab/core/exceptions.py +226 -0
- skill_lab-0.1.0/src/skill_lab/core/models.py +348 -0
- skill_lab-0.1.0/src/skill_lab/core/registry.py +63 -0
- skill_lab-0.1.0/src/skill_lab/core/scoring.py +204 -0
- skill_lab-0.1.0/src/skill_lab/core/utils.py +140 -0
- skill_lab-0.1.0/src/skill_lab/evaluators/__init__.py +6 -0
- skill_lab-0.1.0/src/skill_lab/evaluators/static_evaluator.py +146 -0
- skill_lab-0.1.0/src/skill_lab/evaluators/trace_evaluator.py +119 -0
- skill_lab-0.1.0/src/skill_lab/parsers/__init__.py +6 -0
- skill_lab-0.1.0/src/skill_lab/parsers/skill_parser.py +181 -0
- skill_lab-0.1.0/src/skill_lab/parsers/trace_parser.py +134 -0
- skill_lab-0.1.0/src/skill_lab/reporters/__init__.py +6 -0
- skill_lab-0.1.0/src/skill_lab/reporters/console_reporter.py +187 -0
- skill_lab-0.1.0/src/skill_lab/reporters/json_reporter.py +60 -0
- skill_lab-0.1.0/src/skill_lab/runtimes/__init__.py +11 -0
- skill_lab-0.1.0/src/skill_lab/runtimes/base.py +66 -0
- skill_lab-0.1.0/src/skill_lab/runtimes/claude_runtime.py +169 -0
- skill_lab-0.1.0/src/skill_lab/runtimes/codex_runtime.py +112 -0
- skill_lab-0.1.0/src/skill_lab/tracechecks/__init__.py +14 -0
- skill_lab-0.1.0/src/skill_lab/tracechecks/handlers/__init__.py +20 -0
- skill_lab-0.1.0/src/skill_lab/tracechecks/handlers/base.py +91 -0
- skill_lab-0.1.0/src/skill_lab/tracechecks/handlers/command_presence.py +53 -0
- skill_lab-0.1.0/src/skill_lab/tracechecks/handlers/efficiency.py +60 -0
- skill_lab-0.1.0/src/skill_lab/tracechecks/handlers/event_sequence.py +83 -0
- skill_lab-0.1.0/src/skill_lab/tracechecks/handlers/file_creation.py +52 -0
- skill_lab-0.1.0/src/skill_lab/tracechecks/handlers/loop_detection.py +67 -0
- skill_lab-0.1.0/src/skill_lab/tracechecks/registry.py +107 -0
- skill_lab-0.1.0/src/skill_lab/tracechecks/trace_check_loader.py +86 -0
- skill_lab-0.1.0/src/skill_lab/triggers/__init__.py +12 -0
- skill_lab-0.1.0/src/skill_lab/triggers/test_loader.py +233 -0
- skill_lab-0.1.0/src/skill_lab/triggers/trace_analyzer.py +160 -0
- skill_lab-0.1.0/src/skill_lab/triggers/trigger_evaluator.py +261 -0
- skill_lab-0.1.0/src/skill_lab.egg-info/PKG-INFO +257 -0
- skill_lab-0.1.0/src/skill_lab.egg-info/SOURCES.txt +62 -0
- skill_lab-0.1.0/src/skill_lab.egg-info/dependency_links.txt +1 -0
- skill_lab-0.1.0/src/skill_lab.egg-info/entry_points.txt +2 -0
- skill_lab-0.1.0/src/skill_lab.egg-info/requires.txt +9 -0
- skill_lab-0.1.0/src/skill_lab.egg-info/top_level.txt +1 -0
- skill_lab-0.1.0/tests/test_checks.py +206 -0
- skill_lab-0.1.0/tests/test_cli.py +82 -0
- skill_lab-0.1.0/tests/test_evaluator.py +90 -0
- skill_lab-0.1.0/tests/test_parsers.py +137 -0
- skill_lab-0.1.0/tests/test_scoring.py +126 -0
- skill_lab-0.1.0/tests/test_trace_check_loader.py +95 -0
- skill_lab-0.1.0/tests/test_trace_evaluator.py +138 -0
- skill_lab-0.1.0/tests/test_trace_handlers.py +181 -0
- skill_lab-0.1.0/tests/test_triggers.py +254 -0
skill_lab-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 8ddieHu0314
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
skill_lab-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skill-lab
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python CLI tool for evaluating agent skills through static analysis and quality checks
|
|
5
|
+
Author-email: Eddie Hu <eddiehu0314@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/8ddieHu0314/Skill-Lab
|
|
8
|
+
Project-URL: Documentation, https://github.com/8ddieHu0314/Skill-Lab#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/8ddieHu0314/Skill-Lab
|
|
10
|
+
Project-URL: Issues, https://github.com/8ddieHu0314/Skill-Lab/issues
|
|
11
|
+
Project-URL: Releases, https://github.com/8ddieHu0314/Skill-Lab/releases
|
|
12
|
+
Keywords: agent,skills,evaluation,cli,static-analysis,quality,SKILL.md,ai-agents
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Environment :: Console
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
22
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
23
|
+
Classifier: Topic :: Software Development :: Testing
|
|
24
|
+
Classifier: Typing :: Typed
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: typer>=0.9.0
|
|
29
|
+
Requires-Dist: rich>=13.0.0
|
|
30
|
+
Requires-Dist: pyyaml>=6.0
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
34
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
35
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# Skill Lab
|
|
39
|
+
|
|
40
|
+
[](https://badge.fury.io/py/skill-lab)
|
|
41
|
+
[](https://www.python.org/downloads/)
|
|
42
|
+
[](https://opensource.org/licenses/MIT)
|
|
43
|
+
[](https://github.com/8ddieHu0314/Skill-Lab/actions/workflows/test.yml)
|
|
44
|
+
|
|
45
|
+
A Python CLI tool for evaluating agent skills through static analysis and quality checks.
|
|
46
|
+
|
|
47
|
+
## Features
|
|
48
|
+
|
|
49
|
+
- **SKILL.md Parsing**: Parse YAML frontmatter and markdown body from skill definitions
|
|
50
|
+
- **18 Static Checks**: Comprehensive checks across 4 dimensions
|
|
51
|
+
- Structure: File existence, folder organization, frontmatter validation
|
|
52
|
+
- Naming: Format, directory matching
|
|
53
|
+
- Description: Length, trigger information
|
|
54
|
+
- Content: Examples, line budget, reference depth
|
|
55
|
+
- **Quality Scoring**: Weighted 0-100 score based on check results
|
|
56
|
+
- **Multiple Output Formats**: Console (rich formatting) and JSON
|
|
57
|
+
- **Trace Evaluation**: Analyze execution traces against defined checks
|
|
58
|
+
- **Trigger Testing**: Verify skill activation with different prompt types
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
# From PyPI
|
|
64
|
+
pip install skill-lab
|
|
65
|
+
|
|
66
|
+
# From source
|
|
67
|
+
pip install -e .
|
|
68
|
+
|
|
69
|
+
# With development dependencies
|
|
70
|
+
pip install -e ".[dev]"
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Quick Start
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Evaluate a skill
|
|
77
|
+
sklab evaluate ./my-skill
|
|
78
|
+
|
|
79
|
+
# Quick validation (pass/fail)
|
|
80
|
+
sklab validate ./my-skill
|
|
81
|
+
|
|
82
|
+
# List available checks
|
|
83
|
+
sklab list-checks
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Usage
|
|
87
|
+
|
|
88
|
+
### Evaluate a Skill
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Console output (default)
|
|
92
|
+
sklab evaluate ./my-skill
|
|
93
|
+
|
|
94
|
+
# JSON output
|
|
95
|
+
sklab evaluate ./my-skill --format json
|
|
96
|
+
|
|
97
|
+
# Save to file
|
|
98
|
+
sklab evaluate ./my-skill --output report.json
|
|
99
|
+
|
|
100
|
+
# Verbose (show all checks, not just failures)
|
|
101
|
+
sklab evaluate ./my-skill --verbose
|
|
102
|
+
|
|
103
|
+
# Spec-only (skip quality suggestions)
|
|
104
|
+
sklab evaluate ./my-skill --spec-only
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Quick Validation
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Returns exit code 0 if valid, 1 if invalid
|
|
111
|
+
sklab validate ./my-skill
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### List Available Checks
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# List all checks
|
|
118
|
+
sklab list-checks
|
|
119
|
+
|
|
120
|
+
# Filter by dimension
|
|
121
|
+
sklab list-checks --dimension structure
|
|
122
|
+
|
|
123
|
+
# Show only spec-required checks
|
|
124
|
+
sklab list-checks --spec-only
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### Test Triggers
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# Run trigger tests
|
|
131
|
+
sklab test-triggers ./my-skill
|
|
132
|
+
|
|
133
|
+
# Filter by trigger type
|
|
134
|
+
sklab test-triggers ./my-skill --type explicit
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Evaluate Traces
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# Evaluate an execution trace
|
|
141
|
+
sklab eval-trace ./my-skill --trace ./trace.jsonl
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Check Categories
|
|
145
|
+
|
|
146
|
+
### Structure Checks
|
|
147
|
+
| Check ID | Severity | Description |
|
|
148
|
+
|----------|----------|-------------|
|
|
149
|
+
| `structure.skill-md-exists` | ERROR | SKILL.md file exists |
|
|
150
|
+
| `structure.valid-frontmatter` | ERROR | YAML frontmatter is parseable |
|
|
151
|
+
| `frontmatter.compatibility-length` | ERROR | Compatibility under 500 chars |
|
|
152
|
+
| `frontmatter.metadata-format` | ERROR | Metadata is string-to-string map |
|
|
153
|
+
| `structure.scripts-valid` | WARNING | /scripts contains valid files |
|
|
154
|
+
| `structure.references-valid` | WARNING | /references contains valid files |
|
|
155
|
+
|
|
156
|
+
### Naming Checks
|
|
157
|
+
| Check ID | Severity | Description |
|
|
158
|
+
|----------|----------|-------------|
|
|
159
|
+
| `naming.required` | ERROR | Name field is present |
|
|
160
|
+
| `naming.format` | ERROR | Lowercase, hyphens only, max 64 chars |
|
|
161
|
+
| `naming.matches-directory` | ERROR | Name matches parent directory |
|
|
162
|
+
|
|
163
|
+
### Description Checks
|
|
164
|
+
| Check ID | Severity | Description |
|
|
165
|
+
|----------|----------|-------------|
|
|
166
|
+
| `description.required` | ERROR | Description field is present |
|
|
167
|
+
| `description.not-empty` | ERROR | Description is not empty |
|
|
168
|
+
| `description.max-length` | ERROR | Max 1024 characters |
|
|
169
|
+
| `description.includes-triggers` | INFO | Describes when to use |
|
|
170
|
+
|
|
171
|
+
### Content Checks
|
|
172
|
+
| Check ID | Severity | Description |
|
|
173
|
+
|----------|----------|-------------|
|
|
174
|
+
| `content.body-not-empty` | WARNING | Body has content (min 50 chars) |
|
|
175
|
+
| `content.line-budget` | WARNING | Under 500 lines |
|
|
176
|
+
| `content.has-examples` | INFO | Contains code examples |
|
|
177
|
+
| `content.reference-depth` | WARNING | References max 1 level deep |
|
|
178
|
+
|
|
179
|
+
## Output Format (JSON)
|
|
180
|
+
|
|
181
|
+
```json
|
|
182
|
+
{
|
|
183
|
+
"skill_path": "/path/to/skill",
|
|
184
|
+
"skill_name": "my-skill",
|
|
185
|
+
"timestamp": "2026-01-25T14:30:00Z",
|
|
186
|
+
"duration_ms": 45.3,
|
|
187
|
+
"quality_score": 87.5,
|
|
188
|
+
"overall_pass": true,
|
|
189
|
+
"checks_run": 18,
|
|
190
|
+
"checks_passed": 19,
|
|
191
|
+
"checks_failed": 2,
|
|
192
|
+
"results": [...],
|
|
193
|
+
"summary": {
|
|
194
|
+
"by_severity": {...},
|
|
195
|
+
"by_dimension": {...}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
## Development
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
# Install dev dependencies
|
|
204
|
+
pip install -e ".[dev]"
|
|
205
|
+
|
|
206
|
+
# Run tests
|
|
207
|
+
pytest tests/ -v
|
|
208
|
+
|
|
209
|
+
# Run with coverage
|
|
210
|
+
pytest tests/ --cov=skill_lab
|
|
211
|
+
|
|
212
|
+
# Type checking
|
|
213
|
+
mypy src/
|
|
214
|
+
|
|
215
|
+
# Linting
|
|
216
|
+
ruff check src/
|
|
217
|
+
|
|
218
|
+
# Format code
|
|
219
|
+
ruff format src/
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
## Project Structure
|
|
223
|
+
|
|
224
|
+
```
|
|
225
|
+
sklab/
|
|
226
|
+
├── src/skill_lab/
|
|
227
|
+
│ ├── cli.py # CLI interface (Typer)
|
|
228
|
+
│ ├── core/
|
|
229
|
+
│ │ ├── models.py # Data models
|
|
230
|
+
│ │ ├── registry.py # Check registration
|
|
231
|
+
│ │ └── scoring.py # Quality scoring
|
|
232
|
+
│ ├── parsers/
|
|
233
|
+
│ │ └── skill_parser.py # SKILL.md parsing
|
|
234
|
+
│ ├── checks/
|
|
235
|
+
│ │ ├── base.py # Base check class
|
|
236
|
+
│ │ └── static/ # Static checks
|
|
237
|
+
│ ├── evaluators/
|
|
238
|
+
│ │ ├── static_evaluator.py # Static analysis
|
|
239
|
+
│ │ └── trace_evaluator.py # Trace evaluation
|
|
240
|
+
│ ├── triggers/
|
|
241
|
+
│ │ └── trigger_evaluator.py # Trigger testing
|
|
242
|
+
│ └── reporters/
|
|
243
|
+
│ ├── json_reporter.py
|
|
244
|
+
│ └── console_reporter.py
|
|
245
|
+
├── tests/
|
|
246
|
+
├── docs/
|
|
247
|
+
└── examples/
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
## Related
|
|
251
|
+
|
|
252
|
+
- [Agent Skills Specification](https://agentskills.io/specification) - The specification this tool validates against
|
|
253
|
+
- [SkillsMP](https://skillsmp.com) - Marketplace for agent skills
|
|
254
|
+
|
|
255
|
+
## License
|
|
256
|
+
|
|
257
|
+
MIT
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# Skill Lab
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/py/skill-lab)
|
|
4
|
+
[](https://www.python.org/downloads/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
[](https://github.com/8ddieHu0314/Skill-Lab/actions/workflows/test.yml)
|
|
7
|
+
|
|
8
|
+
A Python CLI tool for evaluating agent skills through static analysis and quality checks.
|
|
9
|
+
|
|
10
|
+
## Features
|
|
11
|
+
|
|
12
|
+
- **SKILL.md Parsing**: Parse YAML frontmatter and markdown body from skill definitions
|
|
13
|
+
- **18 Static Checks**: Comprehensive checks across 4 dimensions
|
|
14
|
+
- Structure: File existence, folder organization, frontmatter validation
|
|
15
|
+
- Naming: Format, directory matching
|
|
16
|
+
- Description: Length, trigger information
|
|
17
|
+
- Content: Examples, line budget, reference depth
|
|
18
|
+
- **Quality Scoring**: Weighted 0-100 score based on check results
|
|
19
|
+
- **Multiple Output Formats**: Console (rich formatting) and JSON
|
|
20
|
+
- **Trace Evaluation**: Analyze execution traces against defined checks
|
|
21
|
+
- **Trigger Testing**: Verify skill activation with different prompt types
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# From PyPI
|
|
27
|
+
pip install skill-lab
|
|
28
|
+
|
|
29
|
+
# From source
|
|
30
|
+
pip install -e .
|
|
31
|
+
|
|
32
|
+
# With development dependencies
|
|
33
|
+
pip install -e ".[dev]"
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Evaluate a skill
|
|
40
|
+
sklab evaluate ./my-skill
|
|
41
|
+
|
|
42
|
+
# Quick validation (pass/fail)
|
|
43
|
+
sklab validate ./my-skill
|
|
44
|
+
|
|
45
|
+
# List available checks
|
|
46
|
+
sklab list-checks
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Usage
|
|
50
|
+
|
|
51
|
+
### Evaluate a Skill
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# Console output (default)
|
|
55
|
+
sklab evaluate ./my-skill
|
|
56
|
+
|
|
57
|
+
# JSON output
|
|
58
|
+
sklab evaluate ./my-skill --format json
|
|
59
|
+
|
|
60
|
+
# Save to file
|
|
61
|
+
sklab evaluate ./my-skill --output report.json
|
|
62
|
+
|
|
63
|
+
# Verbose (show all checks, not just failures)
|
|
64
|
+
sklab evaluate ./my-skill --verbose
|
|
65
|
+
|
|
66
|
+
# Spec-only (skip quality suggestions)
|
|
67
|
+
sklab evaluate ./my-skill --spec-only
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Quick Validation
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
# Returns exit code 0 if valid, 1 if invalid
|
|
74
|
+
sklab validate ./my-skill
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### List Available Checks
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# List all checks
|
|
81
|
+
sklab list-checks
|
|
82
|
+
|
|
83
|
+
# Filter by dimension
|
|
84
|
+
sklab list-checks --dimension structure
|
|
85
|
+
|
|
86
|
+
# Show only spec-required checks
|
|
87
|
+
sklab list-checks --spec-only
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Test Triggers
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
# Run trigger tests
|
|
94
|
+
sklab test-triggers ./my-skill
|
|
95
|
+
|
|
96
|
+
# Filter by trigger type
|
|
97
|
+
sklab test-triggers ./my-skill --type explicit
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Evaluate Traces
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
# Evaluate an execution trace
|
|
104
|
+
sklab eval-trace ./my-skill --trace ./trace.jsonl
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Check Categories
|
|
108
|
+
|
|
109
|
+
### Structure Checks
|
|
110
|
+
| Check ID | Severity | Description |
|
|
111
|
+
|----------|----------|-------------|
|
|
112
|
+
| `structure.skill-md-exists` | ERROR | SKILL.md file exists |
|
|
113
|
+
| `structure.valid-frontmatter` | ERROR | YAML frontmatter is parseable |
|
|
114
|
+
| `frontmatter.compatibility-length` | ERROR | Compatibility under 500 chars |
|
|
115
|
+
| `frontmatter.metadata-format` | ERROR | Metadata is string-to-string map |
|
|
116
|
+
| `structure.scripts-valid` | WARNING | /scripts contains valid files |
|
|
117
|
+
| `structure.references-valid` | WARNING | /references contains valid files |
|
|
118
|
+
|
|
119
|
+
### Naming Checks
|
|
120
|
+
| Check ID | Severity | Description |
|
|
121
|
+
|----------|----------|-------------|
|
|
122
|
+
| `naming.required` | ERROR | Name field is present |
|
|
123
|
+
| `naming.format` | ERROR | Lowercase, hyphens only, max 64 chars |
|
|
124
|
+
| `naming.matches-directory` | ERROR | Name matches parent directory |
|
|
125
|
+
|
|
126
|
+
### Description Checks
|
|
127
|
+
| Check ID | Severity | Description |
|
|
128
|
+
|----------|----------|-------------|
|
|
129
|
+
| `description.required` | ERROR | Description field is present |
|
|
130
|
+
| `description.not-empty` | ERROR | Description is not empty |
|
|
131
|
+
| `description.max-length` | ERROR | Max 1024 characters |
|
|
132
|
+
| `description.includes-triggers` | INFO | Describes when to use |
|
|
133
|
+
|
|
134
|
+
### Content Checks
|
|
135
|
+
| Check ID | Severity | Description |
|
|
136
|
+
|----------|----------|-------------|
|
|
137
|
+
| `content.body-not-empty` | WARNING | Body has content (min 50 chars) |
|
|
138
|
+
| `content.line-budget` | WARNING | Under 500 lines |
|
|
139
|
+
| `content.has-examples` | INFO | Contains code examples |
|
|
140
|
+
| `content.reference-depth` | WARNING | References max 1 level deep |
|
|
141
|
+
|
|
142
|
+
## Output Format (JSON)
|
|
143
|
+
|
|
144
|
+
```json
|
|
145
|
+
{
|
|
146
|
+
"skill_path": "/path/to/skill",
|
|
147
|
+
"skill_name": "my-skill",
|
|
148
|
+
"timestamp": "2026-01-25T14:30:00Z",
|
|
149
|
+
"duration_ms": 45.3,
|
|
150
|
+
"quality_score": 87.5,
|
|
151
|
+
"overall_pass": true,
|
|
152
|
+
"checks_run": 18,
|
|
153
|
+
"checks_passed": 19,
|
|
154
|
+
"checks_failed": 2,
|
|
155
|
+
"results": [...],
|
|
156
|
+
"summary": {
|
|
157
|
+
"by_severity": {...},
|
|
158
|
+
"by_dimension": {...}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Development
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
# Install dev dependencies
|
|
167
|
+
pip install -e ".[dev]"
|
|
168
|
+
|
|
169
|
+
# Run tests
|
|
170
|
+
pytest tests/ -v
|
|
171
|
+
|
|
172
|
+
# Run with coverage
|
|
173
|
+
pytest tests/ --cov=skill_lab
|
|
174
|
+
|
|
175
|
+
# Type checking
|
|
176
|
+
mypy src/
|
|
177
|
+
|
|
178
|
+
# Linting
|
|
179
|
+
ruff check src/
|
|
180
|
+
|
|
181
|
+
# Format code
|
|
182
|
+
ruff format src/
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## Project Structure
|
|
186
|
+
|
|
187
|
+
```
|
|
188
|
+
sklab/
|
|
189
|
+
├── src/skill_lab/
|
|
190
|
+
│ ├── cli.py # CLI interface (Typer)
|
|
191
|
+
│ ├── core/
|
|
192
|
+
│ │ ├── models.py # Data models
|
|
193
|
+
│ │ ├── registry.py # Check registration
|
|
194
|
+
│ │ └── scoring.py # Quality scoring
|
|
195
|
+
│ ├── parsers/
|
|
196
|
+
│ │ └── skill_parser.py # SKILL.md parsing
|
|
197
|
+
│ ├── checks/
|
|
198
|
+
│ │ ├── base.py # Base check class
|
|
199
|
+
│ │ └── static/ # Static checks
|
|
200
|
+
│ ├── evaluators/
|
|
201
|
+
│ │ ├── static_evaluator.py # Static analysis
|
|
202
|
+
│ │ └── trace_evaluator.py # Trace evaluation
|
|
203
|
+
│ ├── triggers/
|
|
204
|
+
│ │ └── trigger_evaluator.py # Trigger testing
|
|
205
|
+
│ └── reporters/
|
|
206
|
+
│ ├── json_reporter.py
|
|
207
|
+
│ └── console_reporter.py
|
|
208
|
+
├── tests/
|
|
209
|
+
├── docs/
|
|
210
|
+
└── examples/
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## Related
|
|
214
|
+
|
|
215
|
+
- [Agent Skills Specification](https://agentskills.io/specification) - The specification this tool validates against
|
|
216
|
+
- [SkillsMP](https://skillsmp.com) - Marketplace for agent skills
|
|
217
|
+
|
|
218
|
+
## License
|
|
219
|
+
|
|
220
|
+
MIT
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "skill-lab"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "A Python CLI tool for evaluating agent skills through static analysis and quality checks"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
requires-python = ">=3.10"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Eddie Hu", email = "eddiehu0314@gmail.com" }
|
|
10
|
+
]
|
|
11
|
+
keywords = [
|
|
12
|
+
"agent",
|
|
13
|
+
"skills",
|
|
14
|
+
"evaluation",
|
|
15
|
+
"cli",
|
|
16
|
+
"static-analysis",
|
|
17
|
+
"quality",
|
|
18
|
+
"SKILL.md",
|
|
19
|
+
"ai-agents",
|
|
20
|
+
]
|
|
21
|
+
classifiers = [
|
|
22
|
+
"Development Status :: 4 - Beta",
|
|
23
|
+
"Environment :: Console",
|
|
24
|
+
"Intended Audience :: Developers",
|
|
25
|
+
"Operating System :: OS Independent",
|
|
26
|
+
"Programming Language :: Python :: 3",
|
|
27
|
+
"Programming Language :: Python :: 3.10",
|
|
28
|
+
"Programming Language :: Python :: 3.11",
|
|
29
|
+
"Programming Language :: Python :: 3.12",
|
|
30
|
+
"Programming Language :: Python :: 3.13",
|
|
31
|
+
"Topic :: Software Development :: Quality Assurance",
|
|
32
|
+
"Topic :: Software Development :: Testing",
|
|
33
|
+
"Typing :: Typed",
|
|
34
|
+
]
|
|
35
|
+
dependencies = [
|
|
36
|
+
"typer>=0.9.0",
|
|
37
|
+
"rich>=13.0.0",
|
|
38
|
+
"pyyaml>=6.0",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[project.optional-dependencies]
|
|
42
|
+
dev = [
|
|
43
|
+
"pytest>=7.0",
|
|
44
|
+
"pytest-cov>=4.0",
|
|
45
|
+
"mypy>=1.0",
|
|
46
|
+
"ruff>=0.1.0",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
[project.scripts]
|
|
50
|
+
sklab = "skill_lab.cli:app"
|
|
51
|
+
|
|
52
|
+
[project.urls]
|
|
53
|
+
Homepage = "https://github.com/8ddieHu0314/Skill-Lab"
|
|
54
|
+
Documentation = "https://github.com/8ddieHu0314/Skill-Lab#readme"
|
|
55
|
+
Repository = "https://github.com/8ddieHu0314/Skill-Lab"
|
|
56
|
+
Issues = "https://github.com/8ddieHu0314/Skill-Lab/issues"
|
|
57
|
+
Releases = "https://github.com/8ddieHu0314/Skill-Lab/releases"
|
|
58
|
+
|
|
59
|
+
[build-system]
|
|
60
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
61
|
+
build-backend = "setuptools.build_meta"
|
|
62
|
+
|
|
63
|
+
[tool.setuptools.packages.find]
|
|
64
|
+
where = ["src"]
|
|
65
|
+
|
|
66
|
+
# Ruff - Linting and formatting
|
|
67
|
+
[tool.ruff]
|
|
68
|
+
line-length = 100
|
|
69
|
+
target-version = "py310"
|
|
70
|
+
src = ["src"]
|
|
71
|
+
|
|
72
|
+
[tool.ruff.lint]
|
|
73
|
+
select = [
|
|
74
|
+
"E", # pycodestyle errors
|
|
75
|
+
"F", # pyflakes
|
|
76
|
+
"I", # isort
|
|
77
|
+
"W", # pycodestyle warnings
|
|
78
|
+
"UP", # pyupgrade
|
|
79
|
+
"B", # flake8-bugbear
|
|
80
|
+
"SIM", # flake8-simplify
|
|
81
|
+
]
|
|
82
|
+
ignore = [
|
|
83
|
+
"E501", # line too long (handled by formatter)
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
[tool.ruff.lint.isort]
|
|
87
|
+
known-first-party = ["skill_lab"]
|
|
88
|
+
|
|
89
|
+
# MyPy - Type checking
|
|
90
|
+
[tool.mypy]
|
|
91
|
+
python_version = "3.10"
|
|
92
|
+
strict = true
|
|
93
|
+
warn_return_any = true
|
|
94
|
+
warn_unused_ignores = true
|
|
95
|
+
disallow_untyped_defs = true
|
|
96
|
+
disallow_incomplete_defs = true
|
|
97
|
+
check_untyped_defs = true
|
|
98
|
+
no_implicit_optional = true
|
|
99
|
+
show_error_codes = true
|
|
100
|
+
|
|
101
|
+
[[tool.mypy.overrides]]
|
|
102
|
+
module = ["yaml.*"]
|
|
103
|
+
ignore_missing_imports = true
|
|
104
|
+
|
|
105
|
+
# Pytest - Testing
|
|
106
|
+
[tool.pytest.ini_options]
|
|
107
|
+
testpaths = ["tests"]
|
|
108
|
+
python_files = "test_*.py"
|
|
109
|
+
python_functions = "test_*"
|
|
110
|
+
addopts = "-v --tb=short"
|
|
111
|
+
filterwarnings = [
|
|
112
|
+
"ignore::DeprecationWarning",
|
|
113
|
+
]
|
|
114
|
+
|
|
115
|
+
# Coverage
|
|
116
|
+
[tool.coverage.run]
|
|
117
|
+
source = ["src/skill_lab"]
|
|
118
|
+
branch = true
|
|
119
|
+
|
|
120
|
+
[tool.coverage.report]
|
|
121
|
+
exclude_lines = [
|
|
122
|
+
"pragma: no cover",
|
|
123
|
+
"def __repr__",
|
|
124
|
+
"raise NotImplementedError",
|
|
125
|
+
"if TYPE_CHECKING:",
|
|
126
|
+
"if __name__ == .__main__.:",
|
|
127
|
+
]
|
|
128
|
+
show_missing = true
|