npm - opencode-skills-collection - Versions diffs - 3.0.45 → 3.0.47 - Mend

opencode-skills-collection 3.0.45 → 3.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

package/bundled-skills/ecl-harness-engineer/references/eval-templates.md ADDED Viewed

@@ -0,0 +1,377 @@
+# Eval Templates
+Templates for evaluation/benchmark infrastructure.
+Advanced profile only. Load this reference only when the user explicitly requests agent
+evaluation, regression benchmarks, skill scoring, or an eval framework. Do not create
+`harness/eval` as part of the default core harness.
+## Eval Task JSON Schema
+```json
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "required": ["id", "category", "difficulty", "prompt"],
+  "properties": {
+    "id": {
+      "type": "string",
+      "description": "Unique task identifier, e.g., 'file_ops_001'"
+    },
+    "category": {
+      "type": "string",
+      "enum": ["file_ops", "code_gen", "debugging", "refactoring"],
+      "description": "Task category for grouping"
+    },
+    "difficulty": {
+      "type": "string",
+      "enum": ["easy", "medium", "hard"],
+      "description": "Task difficulty level"
+    },
+    "description": {
+      "type": "string",
+      "description": "Human-readable description of what this tests"
+    },
+    "prompt": {
+      "type": "string",
+      "description": "The user prompt to send to the agent"
+    },
+    "init_files": {
+      "type": "object",
+      "additionalProperties": { "type": "string" },
+      "description": "Files to create before running (path -> content)"
+    },
+    "expected_files": {
+      "type": "object",
+      "additionalProperties": { "$ref": "#/definitions/FileExpectation" },
+      "description": "Expected file states after execution"
+    },
+    "expected_commands": {
+      "type": "array",
+      "items": { "type": "string" },
+      "description": "Commands that should have been executed"
+    },
+    "max_turns": {
+      "type": "integer",
+      "description": "Maximum conversation turns allowed"
+    },
+    "max_tokens": {
+      "type": "integer",
+      "description": "Maximum tokens allowed"
+    },
+    "timeout": {
+      "type": "string",
+      "description": "Timeout duration, e.g., '30s', '2m'"
+    },
+    "tags": {
+      "type": "array",
+      "items": { "type": "string" },
+      "description": "Tags for filtering"
+    }
+  },
+  "definitions": {
+    "FileExpectation": {
+      "type": "object",
+      "properties": {
+        "must_exist": { "type": "boolean" },
+        "must_not_exist": { "type": "boolean" },
+        "must_contain": {
+          "type": "array",
+          "items": { "type": "string" },
+          "description": "Regex patterns that must match"
+        },
+        "must_not_contain": {
+          "type": "array",
+          "items": { "type": "string" },
+          "description": "Regex patterns that must not match"
+        }
+      }
+    }
+  }
+}
+```
+## Example Eval Tasks
+### File Operations
+```json
+{
+  "id": "file_ops_001",
+  "category": "file_ops",
+  "difficulty": "easy",
+  "description": "Create a simple Go hello world",
+  "prompt": "Create hello.go with a main function that prints 'Hello, World!'",
+  "expected_files": {
+    "hello.go": {
+      "must_exist": true,
+      "must_contain": ["package main", "func main", "Hello, World"]
+    }
+  },
+  "max_turns": 3,
+  "timeout": "30s",
+  "tags": ["go", "create"]
+}
+```
+### Code Generation
+```json
+{
+  "id": "code_gen_001",
+  "category": "code_gen",
+  "difficulty": "medium",
+  "description": "Generate function with error handling",
+  "prompt": "Create a Go function ReadJSON that reads a JSON file into a struct",
+  "expected_files": {
+    "json_reader.go": {
+      "must_exist": true,
+      "must_contain": ["func ReadJSON", "json.Unmarshal", "error"]
+    }
+  },
+  "max_turns": 5,
+  "timeout": "60s",
+  "tags": ["go", "function", "json"]
+}
+```
+### Debugging
+```json
+{
+  "id": "debug_001",
+  "category": "debugging",
+  "difficulty": "medium",
+  "description": "Fix nil pointer dereference",
+  "prompt": "Fix the nil pointer bug in buggy.go",
+  "init_files": {
+    "buggy.go": "package main\n\nfunc main() {\n\tvar s *string\n\tprintln(*s)\n}"
+  },
+  "expected_files": {
+    "buggy.go": {
+      "must_exist": true,
+      "must_not_contain": ["println(\\*s)"]
+    }
+  },
+  "max_turns": 4,
+  "timeout": "45s",
+  "tags": ["go", "nil-check", "bug"]
+}
+```
+### Refactoring
+```json
+{
+  "id": "refactor_001",
+  "category": "refactoring",
+  "difficulty": "medium",
+  "description": "Extract duplicated validation logic",
+  "prompt": "Extract the duplicated validation in handler.go into a validate function",
+  "init_files": {
+    "handler.go": "package main\n\nfunc handleA(s string) error {\n\tif s == \"\" { return fmt.Errorf(\"empty\") }\n\tif len(s) > 100 { return fmt.Errorf(\"too long\") }\n\treturn nil\n}\n\nfunc handleB(s string) error {\n\tif s == \"\" { return fmt.Errorf(\"empty\") }\n\tif len(s) > 100 { return fmt.Errorf(\"too long\") }\n\treturn nil\n}"
+  },
+  "expected_files": {
+    "handler.go": {
+      "must_exist": true,
+      "must_contain": ["func validate"]
+    }
+  },
+  "max_turns": 5,
+  "timeout": "60s",
+  "tags": ["go", "extract-function"]
+}
+```
+## Eval Framework Code
+```go
+// harness/eval/framework.go
+package eval
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"regexp"
+	"time"
+)
+type EvalTask struct {
+	ID               string                      `json:"id"`
+	Category         string                      `json:"category"`
+	Difficulty       string                      `json:"difficulty"`
+	Description      string                      `json:"description,omitempty"`
+	Prompt           string                      `json:"prompt"`
+	InitFiles        map[string]string           `json:"init_files,omitempty"`
+	ExpectedFiles    map[string]FileExpectation  `json:"expected_files,omitempty"`
+	ExpectedCommands []string                    `json:"expected_commands,omitempty"`
+	MaxTurns         int                         `json:"max_turns,omitempty"`
+	MaxTokens        int                         `json:"max_tokens,omitempty"`
+	Timeout          time.Duration               `json:"timeout,omitempty"`
+	Tags             []string                    `json:"tags,omitempty"`
+}
+type FileExpectation struct {
+	MustExist      bool     `json:"must_exist"`
+	MustNotExist   bool     `json:"must_not_exist,omitempty"`
+	MustContain    []string `json:"must_contain,omitempty"`
+	MustNotContain []string `json:"must_not_contain,omitempty"`
+}
+type Score struct {
+	Correctness float64            `json:"correctness"`
+	Efficiency  float64            `json:"efficiency"`
+	Style       float64            `json:"style"`
+	Overall     float64            `json:"overall"`
+	Details     map[string]float64 `json:"details,omitempty"`
+	Notes       []string           `json:"notes,omitempty"`
+}
+type EvalResult struct {
+	TaskID    string        `json:"task_id"`
+	Success   bool          `json:"success"`
+	Score     Score         `json:"score"`
+	Duration  time.Duration `json:"duration"`
+	Error     string        `json:"error,omitempty"`
+	Timestamp time.Time     `json:"timestamp"`
+}
+// LoadTask reads an eval task from JSON
+func LoadTask(path string) (*EvalTask, error) {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, err
+	}
+	var task EvalTask
+	return &task, json.Unmarshal(data, &task)
+}
+// LoadTasksFromDir loads all tasks from a directory
+func LoadTasksFromDir(dir string) ([]EvalTask, error) {
+	var tasks []EvalTask
+	filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
+		if err != nil || info.IsDir() || filepath.Ext(path) != ".json" {
+			return nil
+		}
+		task, err := LoadTask(path)
+		if err == nil {
+			tasks = append(tasks, *task)
+		}
+		return nil
+	})
+	return tasks, nil
+}
+// ValidateFile checks if a file meets expectations
+func ValidateFile(path string, exp FileExpectation) (bool, []string) {
+	var issues []string
+	_, err := os.Stat(path)
+	exists := err == nil
+	if exp.MustExist && !exists {
+		issues = append(issues, "file must exist")
+	}
+	if exp.MustNotExist && exists {
+		issues = append(issues, "file must not exist")
+	}
+	if !exists {
+		return len(issues) == 0, issues
+	}
+	content, err := os.ReadFile(path)
+	if err != nil {
+		issues = append(issues, "cannot read file")
+		return false, issues
+	}
+	for _, pattern := range exp.MustContain {
+		if matched, _ := regexp.Match(pattern, content); !matched {
+			issues = append(issues, "missing: "+pattern)
+		}
+	}
+	for _, pattern := range exp.MustNotContain {
+		if matched, _ := regexp.Match(pattern, content); matched {
+			issues = append(issues, "forbidden: "+pattern)
+		}
+	}
+	return len(issues) == 0, issues
+}
+```
+## CLI Integration
+```go
+// cmd/eval/eval.go
+package eval
+import (
+	"fmt"
+	evalfw "your-module/harness/eval"
+	"github.com/spf13/cobra"
+)
+func NewEvalCommand() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "eval",
+		Short: "Run evaluation benchmarks",
+	}
+	cmd.AddCommand(newRunCmd())
+	cmd.AddCommand(newListCmd())
+	return cmd
+}
+func newRunCmd() *cobra.Command {
+	var category, taskID, output string
+	cmd := &cobra.Command{
+		Use:   "run",
+		Short: "Run eval tasks",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			tasks, _ := evalfw.LoadTasksFromDir("harness/eval/datasets")
+			fmt.Printf("Running %d tasks...\n", len(tasks))
+			// Execute tasks and report
+			return nil
+		},
+	}
+	cmd.Flags().StringVar(&category, "category", "", "Filter by category")
+	cmd.Flags().StringVar(&taskID, "task", "", "Run specific task")
+	cmd.Flags().StringVar(&output, "output", "", "Export results to file")
+	return cmd
+}
+func newListCmd() *cobra.Command {
+	return &cobra.Command{
+		Use:   "list",
+		Short: "List available eval tasks",
+		RunE: func(cmd *cobra.Command, args []string) error {
+			tasks, _ := evalfw.LoadTasksFromDir("harness/eval/datasets")
+			fmt.Printf("%-20s %-15s %-10s\n", "ID", "Category", "Difficulty")
+			for _, t := range tasks {
+				fmt.Printf("%-20s %-15s %-10s\n", t.ID, t.Category, t.Difficulty)
+			}
+			return nil
+		},
+	}
+}
+```
+## Report Format
+```go
+type EvalReport struct {
+	StartTime time.Time     `json:"start_time"`
+	EndTime   time.Time     `json:"end_time"`
+	Results   []EvalResult  `json:"results"`
+	Summary   EvalSummary   `json:"summary"`
+}
+type EvalSummary struct {
+	TotalTasks   int     `json:"total_tasks"`
+	PassedTasks  int     `json:"passed_tasks"`
+	FailedTasks  int     `json:"failed_tasks"`
+	PassRate     float64 `json:"pass_rate"`
+	AverageScore float64 `json:"average_score"`
+}
+```