opencode-skills-collection 3.0.45 → 3.0.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundled-skills/.antigravity-install-manifest.json +10 -1
- package/bundled-skills/2slides-ppt-generator/SKILL.md +1 -1
- package/bundled-skills/2slides-ppt-generator/scripts/create_pdf_slides.py +2 -1
- package/bundled-skills/2slides-ppt-generator/scripts/generate_narration.py +2 -1
- package/bundled-skills/2slides-ppt-generator/scripts/generate_slides.py +13 -7
- package/bundled-skills/android-dev/references/hybrid.md +7 -4
- package/bundled-skills/android-dev/references/react-native.md +5 -2
- package/bundled-skills/atlas-contract/SKILL.md +4 -4
- package/bundled-skills/atlas-ledger/SKILL.md +10 -7
- package/bundled-skills/bun-development/SKILL.md +1 -1
- package/bundled-skills/cloud-penetration-testing/SKILL.md +1 -1
- package/bundled-skills/codebase-to-wordpress-converter/SKILL.md +1 -0
- package/bundled-skills/docs/integrations/jetski-cortex.md +3 -3
- package/bundled-skills/docs/integrations/jetski-gemini-loader/README.md +1 -1
- package/bundled-skills/docs/maintainers/repo-growth-seo.md +3 -3
- package/bundled-skills/docs/maintainers/skills-update-guide.md +1 -1
- package/bundled-skills/docs/users/bundles.md +1 -1
- package/bundled-skills/docs/users/claude-code-skills.md +1 -1
- package/bundled-skills/docs/users/gemini-cli-skills.md +1 -1
- package/bundled-skills/docs/users/getting-started.md +1 -1
- package/bundled-skills/docs/users/kiro-integration.md +1 -1
- package/bundled-skills/docs/users/usage.md +4 -4
- package/bundled-skills/docs/users/visual-guide.md +4 -4
- package/bundled-skills/dos-verify-done-claims/SKILL.md +173 -0
- package/bundled-skills/ecl-harness-engineer/LICENSE +21 -0
- package/bundled-skills/ecl-harness-engineer/SKILL.md +714 -0
- package/bundled-skills/ecl-harness-engineer/agents/analyzer.md +119 -0
- package/bundled-skills/ecl-harness-engineer/agents/auditor.md +212 -0
- package/bundled-skills/ecl-harness-engineer/agents/creator-config.md +343 -0
- package/bundled-skills/ecl-harness-engineer/agents/creator-docs.md +201 -0
- package/bundled-skills/ecl-harness-engineer/agents/creator-linters.md +123 -0
- package/bundled-skills/ecl-harness-engineer/references/adapters/adapter-schema.md +204 -0
- package/bundled-skills/ecl-harness-engineer/references/adapters/generic.md +156 -0
- package/bundled-skills/ecl-harness-engineer/references/adapters/go.md +212 -0
- package/bundled-skills/ecl-harness-engineer/references/adapters/java.md +205 -0
- package/bundled-skills/ecl-harness-engineer/references/adapters/python.md +225 -0
- package/bundled-skills/ecl-harness-engineer/references/adapters/rust.md +220 -0
- package/bundled-skills/ecl-harness-engineer/references/adapters/typescript.md +245 -0
- package/bundled-skills/ecl-harness-engineer/references/architecture-diagrams.md +420 -0
- package/bundled-skills/ecl-harness-engineer/references/audit-templates.md +649 -0
- package/bundled-skills/ecl-harness-engineer/references/capability-registry.md +485 -0
- package/bundled-skills/ecl-harness-engineer/references/darwin-eval-prompts.md +373 -0
- package/bundled-skills/ecl-harness-engineer/references/documentation-templates.md +741 -0
- package/bundled-skills/ecl-harness-engineer/references/durability-patterns.md +423 -0
- package/bundled-skills/ecl-harness-engineer/references/ecl-harness.md +1431 -0
- package/bundled-skills/ecl-harness-engineer/references/environment-config-guide.md +534 -0
- package/bundled-skills/ecl-harness-engineer/references/environment-detection-guide.md +751 -0
- package/bundled-skills/ecl-harness-engineer/references/eval-templates.md +377 -0
- package/bundled-skills/ecl-harness-engineer/references/gc-templates.md +798 -0
- package/bundled-skills/ecl-harness-engineer/references/greenfield-templates.md +1385 -0
- package/bundled-skills/ecl-harness-engineer/references/linter-templates.md +448 -0
- package/bundled-skills/ecl-harness-engineer/references/observability-templates.md +315 -0
- package/bundled-skills/environment-setup-guide/SKILL.md +2 -2
- package/bundled-skills/evolution/SKILL.md +1 -1
- package/bundled-skills/gitops-workflow/SKILL.md +1 -1
- package/bundled-skills/linkerd-patterns/SKILL.md +1 -1
- package/bundled-skills/loki-mode/examples/todo-app-generated/frontend/package-lock.json +504 -1317
- package/bundled-skills/loki-mode/examples/todo-app-generated/frontend/package.json +2 -2
- package/bundled-skills/lovable-cleanup/SKILL.md +416 -0
- package/bundled-skills/monopoly/SKILL.md +397 -0
- package/bundled-skills/monopoly/patterns/SKILL.md +331 -0
- package/bundled-skills/monopoly/scale-benchmarks/SKILL.md +174 -0
- package/bundled-skills/monopoly/security-checklist/SKILL.md +69 -0
- package/bundled-skills/monopoly/tech-matrix/SKILL.md +268 -0
- package/bundled-skills/pagespeed-enhancer/SKILL.md +579 -0
- package/bundled-skills/polis-protocol/SKILL.md +6 -3
- package/bundled-skills/unship/SKILL.md +11 -5
- package/bundled-skills/uv-package-manager/resources/implementation-playbook.md +1 -1
- package/bundled-skills/varlock/SKILL.md +2 -2
- package/package.json +1 -1
- package/skills_index.json +204 -4
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
# Eval Templates
|
|
2
|
+
|
|
3
|
+
Templates for evaluation/benchmark infrastructure.
|
|
4
|
+
|
|
5
|
+
Advanced profile only. Load this reference only when the user explicitly requests agent
|
|
6
|
+
evaluation, regression benchmarks, skill scoring, or an eval framework. Do not create
|
|
7
|
+
`harness/eval` as part of the default core harness.
|
|
8
|
+
|
|
9
|
+
## Eval Task JSON Schema
|
|
10
|
+
|
|
11
|
+
```json
|
|
12
|
+
{
|
|
13
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
14
|
+
"type": "object",
|
|
15
|
+
"required": ["id", "category", "difficulty", "prompt"],
|
|
16
|
+
"properties": {
|
|
17
|
+
"id": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "Unique task identifier, e.g., 'file_ops_001'"
|
|
20
|
+
},
|
|
21
|
+
"category": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"enum": ["file_ops", "code_gen", "debugging", "refactoring"],
|
|
24
|
+
"description": "Task category for grouping"
|
|
25
|
+
},
|
|
26
|
+
"difficulty": {
|
|
27
|
+
"type": "string",
|
|
28
|
+
"enum": ["easy", "medium", "hard"],
|
|
29
|
+
"description": "Task difficulty level"
|
|
30
|
+
},
|
|
31
|
+
"description": {
|
|
32
|
+
"type": "string",
|
|
33
|
+
"description": "Human-readable description of what this tests"
|
|
34
|
+
},
|
|
35
|
+
"prompt": {
|
|
36
|
+
"type": "string",
|
|
37
|
+
"description": "The user prompt to send to the agent"
|
|
38
|
+
},
|
|
39
|
+
"init_files": {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"additionalProperties": { "type": "string" },
|
|
42
|
+
"description": "Files to create before running (path -> content)"
|
|
43
|
+
},
|
|
44
|
+
"expected_files": {
|
|
45
|
+
"type": "object",
|
|
46
|
+
"additionalProperties": { "$ref": "#/definitions/FileExpectation" },
|
|
47
|
+
"description": "Expected file states after execution"
|
|
48
|
+
},
|
|
49
|
+
"expected_commands": {
|
|
50
|
+
"type": "array",
|
|
51
|
+
"items": { "type": "string" },
|
|
52
|
+
"description": "Commands that should have been executed"
|
|
53
|
+
},
|
|
54
|
+
"max_turns": {
|
|
55
|
+
"type": "integer",
|
|
56
|
+
"description": "Maximum conversation turns allowed"
|
|
57
|
+
},
|
|
58
|
+
"max_tokens": {
|
|
59
|
+
"type": "integer",
|
|
60
|
+
"description": "Maximum tokens allowed"
|
|
61
|
+
},
|
|
62
|
+
"timeout": {
|
|
63
|
+
"type": "string",
|
|
64
|
+
"description": "Timeout duration, e.g., '30s', '2m'"
|
|
65
|
+
},
|
|
66
|
+
"tags": {
|
|
67
|
+
"type": "array",
|
|
68
|
+
"items": { "type": "string" },
|
|
69
|
+
"description": "Tags for filtering"
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
"definitions": {
|
|
73
|
+
"FileExpectation": {
|
|
74
|
+
"type": "object",
|
|
75
|
+
"properties": {
|
|
76
|
+
"must_exist": { "type": "boolean" },
|
|
77
|
+
"must_not_exist": { "type": "boolean" },
|
|
78
|
+
"must_contain": {
|
|
79
|
+
"type": "array",
|
|
80
|
+
"items": { "type": "string" },
|
|
81
|
+
"description": "Regex patterns that must match"
|
|
82
|
+
},
|
|
83
|
+
"must_not_contain": {
|
|
84
|
+
"type": "array",
|
|
85
|
+
"items": { "type": "string" },
|
|
86
|
+
"description": "Regex patterns that must not match"
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Example Eval Tasks
|
|
95
|
+
|
|
96
|
+
### File Operations
|
|
97
|
+
|
|
98
|
+
```json
|
|
99
|
+
{
|
|
100
|
+
"id": "file_ops_001",
|
|
101
|
+
"category": "file_ops",
|
|
102
|
+
"difficulty": "easy",
|
|
103
|
+
"description": "Create a simple Go hello world",
|
|
104
|
+
"prompt": "Create hello.go with a main function that prints 'Hello, World!'",
|
|
105
|
+
"expected_files": {
|
|
106
|
+
"hello.go": {
|
|
107
|
+
"must_exist": true,
|
|
108
|
+
"must_contain": ["package main", "func main", "Hello, World"]
|
|
109
|
+
}
|
|
110
|
+
},
|
|
111
|
+
"max_turns": 3,
|
|
112
|
+
"timeout": "30s",
|
|
113
|
+
"tags": ["go", "create"]
|
|
114
|
+
}
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Code Generation
|
|
118
|
+
|
|
119
|
+
```json
|
|
120
|
+
{
|
|
121
|
+
"id": "code_gen_001",
|
|
122
|
+
"category": "code_gen",
|
|
123
|
+
"difficulty": "medium",
|
|
124
|
+
"description": "Generate function with error handling",
|
|
125
|
+
"prompt": "Create a Go function ReadJSON that reads a JSON file into a struct",
|
|
126
|
+
"expected_files": {
|
|
127
|
+
"json_reader.go": {
|
|
128
|
+
"must_exist": true,
|
|
129
|
+
"must_contain": ["func ReadJSON", "json.Unmarshal", "error"]
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
"max_turns": 5,
|
|
133
|
+
"timeout": "60s",
|
|
134
|
+
"tags": ["go", "function", "json"]
|
|
135
|
+
}
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Debugging
|
|
139
|
+
|
|
140
|
+
```json
|
|
141
|
+
{
|
|
142
|
+
"id": "debug_001",
|
|
143
|
+
"category": "debugging",
|
|
144
|
+
"difficulty": "medium",
|
|
145
|
+
"description": "Fix nil pointer dereference",
|
|
146
|
+
"prompt": "Fix the nil pointer bug in buggy.go",
|
|
147
|
+
"init_files": {
|
|
148
|
+
"buggy.go": "package main\n\nfunc main() {\n\tvar s *string\n\tprintln(*s)\n}"
|
|
149
|
+
},
|
|
150
|
+
"expected_files": {
|
|
151
|
+
"buggy.go": {
|
|
152
|
+
"must_exist": true,
|
|
153
|
+
"must_not_contain": ["println(\\*s)"]
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
"max_turns": 4,
|
|
157
|
+
"timeout": "45s",
|
|
158
|
+
"tags": ["go", "nil-check", "bug"]
|
|
159
|
+
}
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Refactoring
|
|
163
|
+
|
|
164
|
+
```json
|
|
165
|
+
{
|
|
166
|
+
"id": "refactor_001",
|
|
167
|
+
"category": "refactoring",
|
|
168
|
+
"difficulty": "medium",
|
|
169
|
+
"description": "Extract duplicated validation logic",
|
|
170
|
+
"prompt": "Extract the duplicated validation in handler.go into a validate function",
|
|
171
|
+
"init_files": {
|
|
172
|
+
"handler.go": "package main\n\nfunc handleA(s string) error {\n\tif s == \"\" { return fmt.Errorf(\"empty\") }\n\tif len(s) > 100 { return fmt.Errorf(\"too long\") }\n\treturn nil\n}\n\nfunc handleB(s string) error {\n\tif s == \"\" { return fmt.Errorf(\"empty\") }\n\tif len(s) > 100 { return fmt.Errorf(\"too long\") }\n\treturn nil\n}"
|
|
173
|
+
},
|
|
174
|
+
"expected_files": {
|
|
175
|
+
"handler.go": {
|
|
176
|
+
"must_exist": true,
|
|
177
|
+
"must_contain": ["func validate"]
|
|
178
|
+
}
|
|
179
|
+
},
|
|
180
|
+
"max_turns": 5,
|
|
181
|
+
"timeout": "60s",
|
|
182
|
+
"tags": ["go", "extract-function"]
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
## Eval Framework Code
|
|
187
|
+
|
|
188
|
+
```go
|
|
189
|
+
// harness/eval/framework.go
|
|
190
|
+
package eval
|
|
191
|
+
|
|
192
|
+
import (
|
|
193
|
+
"encoding/json"
|
|
194
|
+
"os"
|
|
195
|
+
"path/filepath"
|
|
196
|
+
"regexp"
|
|
197
|
+
"time"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
type EvalTask struct {
|
|
201
|
+
ID string `json:"id"`
|
|
202
|
+
Category string `json:"category"`
|
|
203
|
+
Difficulty string `json:"difficulty"`
|
|
204
|
+
Description string `json:"description,omitempty"`
|
|
205
|
+
Prompt string `json:"prompt"`
|
|
206
|
+
InitFiles map[string]string `json:"init_files,omitempty"`
|
|
207
|
+
ExpectedFiles map[string]FileExpectation `json:"expected_files,omitempty"`
|
|
208
|
+
ExpectedCommands []string `json:"expected_commands,omitempty"`
|
|
209
|
+
MaxTurns int `json:"max_turns,omitempty"`
|
|
210
|
+
MaxTokens int `json:"max_tokens,omitempty"`
|
|
211
|
+
Timeout time.Duration `json:"timeout,omitempty"`
|
|
212
|
+
Tags []string `json:"tags,omitempty"`
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
type FileExpectation struct {
|
|
216
|
+
MustExist bool `json:"must_exist"`
|
|
217
|
+
MustNotExist bool `json:"must_not_exist,omitempty"`
|
|
218
|
+
MustContain []string `json:"must_contain,omitempty"`
|
|
219
|
+
MustNotContain []string `json:"must_not_contain,omitempty"`
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
type Score struct {
|
|
223
|
+
Correctness float64 `json:"correctness"`
|
|
224
|
+
Efficiency float64 `json:"efficiency"`
|
|
225
|
+
Style float64 `json:"style"`
|
|
226
|
+
Overall float64 `json:"overall"`
|
|
227
|
+
Details map[string]float64 `json:"details,omitempty"`
|
|
228
|
+
Notes []string `json:"notes,omitempty"`
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
type EvalResult struct {
|
|
232
|
+
TaskID string `json:"task_id"`
|
|
233
|
+
Success bool `json:"success"`
|
|
234
|
+
Score Score `json:"score"`
|
|
235
|
+
Duration time.Duration `json:"duration"`
|
|
236
|
+
Error string `json:"error,omitempty"`
|
|
237
|
+
Timestamp time.Time `json:"timestamp"`
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
// LoadTask reads an eval task from JSON
|
|
241
|
+
func LoadTask(path string) (*EvalTask, error) {
|
|
242
|
+
data, err := os.ReadFile(path)
|
|
243
|
+
if err != nil {
|
|
244
|
+
return nil, err
|
|
245
|
+
}
|
|
246
|
+
var task EvalTask
|
|
247
|
+
return &task, json.Unmarshal(data, &task)
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
// LoadTasksFromDir loads all tasks from a directory
|
|
251
|
+
func LoadTasksFromDir(dir string) ([]EvalTask, error) {
|
|
252
|
+
var tasks []EvalTask
|
|
253
|
+
filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
|
254
|
+
if err != nil || info.IsDir() || filepath.Ext(path) != ".json" {
|
|
255
|
+
return nil
|
|
256
|
+
}
|
|
257
|
+
task, err := LoadTask(path)
|
|
258
|
+
if err == nil {
|
|
259
|
+
tasks = append(tasks, *task)
|
|
260
|
+
}
|
|
261
|
+
return nil
|
|
262
|
+
})
|
|
263
|
+
return tasks, nil
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
// ValidateFile checks if a file meets expectations
|
|
267
|
+
func ValidateFile(path string, exp FileExpectation) (bool, []string) {
|
|
268
|
+
var issues []string
|
|
269
|
+
|
|
270
|
+
_, err := os.Stat(path)
|
|
271
|
+
exists := err == nil
|
|
272
|
+
|
|
273
|
+
if exp.MustExist && !exists {
|
|
274
|
+
issues = append(issues, "file must exist")
|
|
275
|
+
}
|
|
276
|
+
if exp.MustNotExist && exists {
|
|
277
|
+
issues = append(issues, "file must not exist")
|
|
278
|
+
}
|
|
279
|
+
if !exists {
|
|
280
|
+
return len(issues) == 0, issues
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
content, err := os.ReadFile(path)
|
|
284
|
+
if err != nil {
|
|
285
|
+
issues = append(issues, "cannot read file")
|
|
286
|
+
return false, issues
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
for _, pattern := range exp.MustContain {
|
|
290
|
+
if matched, _ := regexp.Match(pattern, content); !matched {
|
|
291
|
+
issues = append(issues, "missing: "+pattern)
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
for _, pattern := range exp.MustNotContain {
|
|
295
|
+
if matched, _ := regexp.Match(pattern, content); matched {
|
|
296
|
+
issues = append(issues, "forbidden: "+pattern)
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
return len(issues) == 0, issues
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
## CLI Integration
|
|
305
|
+
|
|
306
|
+
```go
|
|
307
|
+
// cmd/eval/eval.go
|
|
308
|
+
package eval
|
|
309
|
+
|
|
310
|
+
import (
|
|
311
|
+
"fmt"
|
|
312
|
+
evalfw "your-module/harness/eval"
|
|
313
|
+
"github.com/spf13/cobra"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
func NewEvalCommand() *cobra.Command {
|
|
317
|
+
cmd := &cobra.Command{
|
|
318
|
+
Use: "eval",
|
|
319
|
+
Short: "Run evaluation benchmarks",
|
|
320
|
+
}
|
|
321
|
+
cmd.AddCommand(newRunCmd())
|
|
322
|
+
cmd.AddCommand(newListCmd())
|
|
323
|
+
return cmd
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
func newRunCmd() *cobra.Command {
|
|
327
|
+
var category, taskID, output string
|
|
328
|
+
cmd := &cobra.Command{
|
|
329
|
+
Use: "run",
|
|
330
|
+
Short: "Run eval tasks",
|
|
331
|
+
RunE: func(cmd *cobra.Command, args []string) error {
|
|
332
|
+
tasks, _ := evalfw.LoadTasksFromDir("harness/eval/datasets")
|
|
333
|
+
fmt.Printf("Running %d tasks...\n", len(tasks))
|
|
334
|
+
// Execute tasks and report
|
|
335
|
+
return nil
|
|
336
|
+
},
|
|
337
|
+
}
|
|
338
|
+
cmd.Flags().StringVar(&category, "category", "", "Filter by category")
|
|
339
|
+
cmd.Flags().StringVar(&taskID, "task", "", "Run specific task")
|
|
340
|
+
cmd.Flags().StringVar(&output, "output", "", "Export results to file")
|
|
341
|
+
return cmd
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
func newListCmd() *cobra.Command {
|
|
345
|
+
return &cobra.Command{
|
|
346
|
+
Use: "list",
|
|
347
|
+
Short: "List available eval tasks",
|
|
348
|
+
RunE: func(cmd *cobra.Command, args []string) error {
|
|
349
|
+
tasks, _ := evalfw.LoadTasksFromDir("harness/eval/datasets")
|
|
350
|
+
fmt.Printf("%-20s %-15s %-10s\n", "ID", "Category", "Difficulty")
|
|
351
|
+
for _, t := range tasks {
|
|
352
|
+
fmt.Printf("%-20s %-15s %-10s\n", t.ID, t.Category, t.Difficulty)
|
|
353
|
+
}
|
|
354
|
+
return nil
|
|
355
|
+
},
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
## Report Format
|
|
361
|
+
|
|
362
|
+
```go
|
|
363
|
+
type EvalReport struct {
|
|
364
|
+
StartTime time.Time `json:"start_time"`
|
|
365
|
+
EndTime time.Time `json:"end_time"`
|
|
366
|
+
Results []EvalResult `json:"results"`
|
|
367
|
+
Summary EvalSummary `json:"summary"`
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
type EvalSummary struct {
|
|
371
|
+
TotalTasks int `json:"total_tasks"`
|
|
372
|
+
PassedTasks int `json:"passed_tasks"`
|
|
373
|
+
FailedTasks int `json:"failed_tasks"`
|
|
374
|
+
PassRate float64 `json:"pass_rate"`
|
|
375
|
+
AverageScore float64 `json:"average_score"`
|
|
376
|
+
}
|
|
377
|
+
```
|