@gokulkrishh/skills 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -0
- package/index.js +16 -0
- package/package.json +27 -0
- package/skills/code-review/SKILL.md +94 -0
- package/skills/code-review/evals/evals.json +39 -0
- package/skills/commit/SKILL.md +112 -0
- package/skills/commit/evals/evals.json +38 -0
- package/skills/model-council/SKILL.md +87 -0
- package/skills/model-council/evals/evals.json +38 -0
package/README.md
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Skills
|
|
2
|
+
|
|
3
|
+
A collection of custom [Agent Skills](https://agentskills.io) for AI coding agents. Works with Claude Code, GitHub Copilot, Cursor, Cline, and more.
|
|
4
|
+
|
|
5
|
+
## Skills
|
|
6
|
+
|
|
7
|
+
| Skill | Command | Description |
|
|
8
|
+
| ------------------------------------------------ | ---------- | ----------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
9
|
+
| [Model Council](./skills/model-council/SKILL.md) | `/council` | 4-agent debate system inspired by Grok 4.20's council. Agents (Captain, Scholar, Logician, Contrarian) analyze, debate, and synthesize answers. |
|
|
10
|
+
| [Code Review](./skills/code-review/SKILL.md) | `/review` | Code review for security (OWASP), performance, accessibility, and code quality with severity-based findings. |
|
|
11
|
+
| [Commit](./skills/commit/SKILL.md) | `/commit` | Analyzes changes and creates a conventional commit with a clear title and description. |
|
|
12
|
+
|
|
13
|
+
## Install
|
|
14
|
+
|
|
15
|
+
### Via [skills.sh](https://skills.sh)
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
npx skills add https://github.com/gokulkrishh/skills
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Via Claude Code (Commands)
|
|
22
|
+
|
|
23
|
+
Clone this repo and the commands are available via `.claude/commands/`:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
/council Should I use RSC or client-side rendering for my app?
|
|
27
|
+
/review https://github.com/user/repo/pull/42
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
### As npm Package
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
npm install @gokulkrishh/skills
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
```js
|
|
37
|
+
import { getSkill } from '@gokulkrishh/skills'
|
|
38
|
+
|
|
39
|
+
const prompt = getSkill('model-council')
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Creating a Skill
|
|
43
|
+
|
|
44
|
+
1. Create a new directory in `skills/` with a `SKILL.md` file
|
|
45
|
+
2. Add YAML frontmatter with `name` and `description` ([spec](https://agentskills.io/specification))
|
|
46
|
+
3. Add a command file in `.claude/commands/` for Claude Code
|
|
47
|
+
4. Register it in `index.js`
|
|
48
|
+
|
|
49
|
+
## License
|
|
50
|
+
|
|
51
|
+
MIT
|
package/index.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { readFileSync } from 'fs'
|
|
2
|
+
import { join, dirname } from 'path'
|
|
3
|
+
import { fileURLToPath } from 'url'
|
|
4
|
+
|
|
5
|
+
const __dirname = dirname(fileURLToPath(import.meta.url))
|
|
6
|
+
|
|
7
|
+
export function getSkill(name) {
|
|
8
|
+
const skillPath = join(__dirname, 'skills', name, 'SKILL.md')
|
|
9
|
+
return readFileSync(skillPath, 'utf-8')
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export const skills = {
|
|
13
|
+
'model-council': 'skills/model-council/SKILL.md',
|
|
14
|
+
'code-review': 'skills/code-review/SKILL.md',
|
|
15
|
+
'commit': 'skills/commit/SKILL.md',
|
|
16
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@gokulkrishh/skills",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Agent Skills for AI coding agents - model council, code review, and more",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "index.js",
|
|
7
|
+
"files": [
|
|
8
|
+
"skills/",
|
|
9
|
+
"index.js"
|
|
10
|
+
],
|
|
11
|
+
"keywords": [
|
|
12
|
+
"agent-skills",
|
|
13
|
+
"claude-code",
|
|
14
|
+
"copilot",
|
|
15
|
+
"cursor",
|
|
16
|
+
"model-council",
|
|
17
|
+
"code-review",
|
|
18
|
+
"multi-agent",
|
|
19
|
+
"ai-skills"
|
|
20
|
+
],
|
|
21
|
+
"author": "Gokulakrishnan Kalaikovan",
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"repository": {
|
|
24
|
+
"type": "git",
|
|
25
|
+
"url": "https://github.com/gokulkrishh/skills"
|
|
26
|
+
}
|
|
27
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: code-review
|
|
3
|
+
description: Reviews code for security (OWASP), performance, accessibility, and quality. Outputs findings by severity (Critical, Warning, Suggestion, Good) with a verdict. Use when reviewing code, diffs, or PRs.
|
|
4
|
+
license: MIT
|
|
5
|
+
metadata:
|
|
6
|
+
author: gokulkrishh
|
|
7
|
+
version: '0.1.0'
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Code Review
|
|
11
|
+
|
|
12
|
+
A code review skill that checks for web security vulnerabilities, performance issues, accessibility problems, and code quality — then delivers structured feedback with severity levels.
|
|
13
|
+
|
|
14
|
+
## Instructions
|
|
15
|
+
|
|
16
|
+
**STEP 1 — GATHER CHANGES**
|
|
17
|
+
|
|
18
|
+
Determine what to review:
|
|
19
|
+
|
|
20
|
+
- If a GitHub PR URL is provided: fetch the PR diff and context
|
|
21
|
+
- If a branch name is provided: diff it against the main branch
|
|
22
|
+
- If file paths are provided: read those files directly
|
|
23
|
+
- If nothing is provided: check for staged changes, then fall back to unstaged changes
|
|
24
|
+
|
|
25
|
+
**STEP 2 — ANALYZE**
|
|
26
|
+
|
|
27
|
+
Review the changes across these dimensions:
|
|
28
|
+
|
|
29
|
+
**Security (OWASP Top 10):**
|
|
30
|
+
|
|
31
|
+
- Injection flaws (SQL, XSS, command injection)
|
|
32
|
+
- Broken authentication / authorization
|
|
33
|
+
- Sensitive data exposure (hardcoded secrets, API keys, tokens)
|
|
34
|
+
- Insecure dependencies (check for known CVEs if possible)
|
|
35
|
+
- CSRF, open redirects (including client-side navigation to unvalidated URLs), unsafe deserialization
|
|
36
|
+
|
|
37
|
+
**Performance:**
|
|
38
|
+
|
|
39
|
+
- Unnecessary re-renders (React: missing memo, unstable refs in deps, missing key props in lists)
|
|
40
|
+
- Bundle size impact (large imports, tree-shaking issues)
|
|
41
|
+
- N+1 queries, unoptimized loops, missing pagination
|
|
42
|
+
- Memory leaks (event listeners, subscriptions not cleaned up)
|
|
43
|
+
- Lazy loading opportunities
|
|
44
|
+
|
|
45
|
+
**Accessibility:**
|
|
46
|
+
|
|
47
|
+
- Missing ARIA attributes, roles, or labels
|
|
48
|
+
- Keyboard navigation issues
|
|
49
|
+
- Color contrast, focus management
|
|
50
|
+
- Semantic HTML usage
|
|
51
|
+
|
|
52
|
+
**Code Quality:**
|
|
53
|
+
|
|
54
|
+
- Type safety issues (TypeScript)
|
|
55
|
+
- Error handling gaps
|
|
56
|
+
- Dead code, unused imports
|
|
57
|
+
- Naming clarity, readability
|
|
58
|
+
- Test coverage for new logic
|
|
59
|
+
|
|
60
|
+
**STEP 3 — REPORT**
|
|
61
|
+
|
|
62
|
+
Output the review in this format:
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
## Code Review
|
|
66
|
+
|
|
67
|
+
### Summary
|
|
68
|
+
[1-2 sentence overview of the changes and overall assessment]
|
|
69
|
+
|
|
70
|
+
### Findings
|
|
71
|
+
|
|
72
|
+
#### 🔴 Critical
|
|
73
|
+
[Issues that MUST be fixed before merging — security vulnerabilities, data loss risks, breaking bugs]
|
|
74
|
+
|
|
75
|
+
#### 🟡 Warning
|
|
76
|
+
[Issues that SHOULD be fixed — performance problems, accessibility gaps, potential bugs]
|
|
77
|
+
|
|
78
|
+
#### 🔵 Suggestion
|
|
79
|
+
[Nice-to-haves — code style improvements, minor optimizations, better patterns]
|
|
80
|
+
|
|
81
|
+
#### ✅ Good
|
|
82
|
+
[Things done well worth calling out — encouragement for good patterns]
|
|
83
|
+
|
|
84
|
+
### Verdict: [APPROVE | REQUEST CHANGES | NEEDS DISCUSSION]
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Rules
|
|
88
|
+
|
|
89
|
+
- Every finding must reference the specific file and line number. If code is provided inline (not from a file), reference the relevant line within the snippet
|
|
90
|
+
- Include a concrete fix or code suggestion for each Critical and Warning item
|
|
91
|
+
- If there are no findings in a severity category, omit that category
|
|
92
|
+
- Be direct and specific — no vague feedback like "consider improving this"
|
|
93
|
+
- If the diff is clean with no issues, say so and approve
|
|
94
|
+
- For security findings, explain the attack vector briefly so the author understands the risk
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill_name": "code-review",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": 1,
|
|
6
|
+
"prompt": "Review this Express endpoint: app.get('/user', (req, res) => { const id = req.query.id; db.query('SELECT * FROM users WHERE id = ' + id); })",
|
|
7
|
+
"expected_output": "A review that catches the SQL injection vulnerability as Critical, with a fix suggestion using parameterized queries.",
|
|
8
|
+
"assertions": [
|
|
9
|
+
"SQL injection is flagged as Critical",
|
|
10
|
+
"The attack vector is briefly explained",
|
|
11
|
+
"A concrete fix using parameterized queries is suggested",
|
|
12
|
+
"The specific file and line are referenced",
|
|
13
|
+
"A verdict of REQUEST CHANGES is given"
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": 2,
|
|
18
|
+
"prompt": "Review my changes on the current branch",
|
|
19
|
+
"expected_output": "The skill gathers the diff from the current branch, analyzes it, and outputs structured findings with severity levels.",
|
|
20
|
+
"assertions": [
|
|
21
|
+
"The review includes a Summary section",
|
|
22
|
+
"Findings are organized by severity (Critical, Warning, Suggestion, Good)",
|
|
23
|
+
"A Verdict is given (APPROVE, REQUEST CHANGES, or NEEDS DISCUSSION)",
|
|
24
|
+
"Empty severity categories are omitted"
|
|
25
|
+
]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"id": 3,
|
|
29
|
+
"prompt": "Review this React component: function UserList({ users }) { return <div>{users.map(u => <div onClick={() => window.location = u.website}>{u.name}</div>)}</div> }",
|
|
30
|
+
"expected_output": "Catches accessibility issues (no semantic HTML, no keyboard handling) and potential XSS via unvalidated URL redirect.",
|
|
31
|
+
"assertions": [
|
|
32
|
+
"Missing semantic HTML is flagged (div instead of ul/li or button)",
|
|
33
|
+
"Keyboard accessibility issue is identified (onClick without keyboard handler)",
|
|
34
|
+
"The unvalidated URL redirect is flagged as a security concern",
|
|
35
|
+
"Missing key prop on mapped elements is noted"
|
|
36
|
+
]
|
|
37
|
+
}
|
|
38
|
+
]
|
|
39
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: commit
|
|
3
|
+
description: Generates a conventional commit (conventionalcommits.org) with a clear title and description based on staged or unstaged changes. Analyzes the diff to determine the type, scope, and whether it's a breaking change. Use when the user wants to commit code changes.
|
|
4
|
+
license: MIT
|
|
5
|
+
metadata:
|
|
6
|
+
author: gokulkrishh
|
|
7
|
+
version: '0.1.0'
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Commit
|
|
11
|
+
|
|
12
|
+
Analyzes code changes and creates a [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) compliant git commit.
|
|
13
|
+
|
|
14
|
+
## Instructions
|
|
15
|
+
|
|
16
|
+
**STEP 1 — GATHER CHANGES**
|
|
17
|
+
|
|
18
|
+
- Run `git status` to see what files have changed
|
|
19
|
+
- Run `git diff --staged` to see staged changes
|
|
20
|
+
- If nothing is staged, run `git diff` for unstaged changes and stage them
|
|
21
|
+
- Run `git log --oneline -5` to understand the repo's commit style
|
|
22
|
+
|
|
23
|
+
**STEP 2 — ANALYZE**
|
|
24
|
+
|
|
25
|
+
Determine:
|
|
26
|
+
|
|
27
|
+
- The **type** of change (see types below)
|
|
28
|
+
- The **scope** — which module, component, or area is affected (always include when one can be reasonably determined)
|
|
29
|
+
- Whether this is a **breaking change**
|
|
30
|
+
- The "why" behind the change, not just the "what"
|
|
31
|
+
|
|
32
|
+
**STEP 3 — COMMIT**
|
|
33
|
+
|
|
34
|
+
Create the commit following the [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) specification:
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
<type>(<scope>): <description>
|
|
38
|
+
|
|
39
|
+
[optional body]
|
|
40
|
+
|
|
41
|
+
[optional footer(s)]
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Format
|
|
45
|
+
|
|
46
|
+
- **Title line:** `<type>(<scope>): <description>` — under 70 characters
|
|
47
|
+
- **Body:** Explain why the change was made, with brief context on what changed. 1-3 lines. Separate from title with a blank line. Always include a body for non-trivial commits.
|
|
48
|
+
- **Breaking changes:** Add `BREAKING CHANGE:` in the footer, or `!` after the type/scope (e.g. `feat!:` or `feat(api)!:`)
|
|
49
|
+
|
|
50
|
+
### Types
|
|
51
|
+
|
|
52
|
+
- `feat` — new feature (correlates with MINOR in SemVer)
|
|
53
|
+
- `fix` — bug fix (correlates with PATCH in SemVer)
|
|
54
|
+
- `docs` — documentation only
|
|
55
|
+
- `style` — formatting, whitespace, semicolons (no code change)
|
|
56
|
+
- `refactor` — code change that neither fixes a bug nor adds a feature
|
|
57
|
+
- `perf` — performance improvement
|
|
58
|
+
- `test` — adding or updating tests
|
|
59
|
+
- `build` — build system or external dependencies
|
|
60
|
+
- `ci` — CI configuration and scripts
|
|
61
|
+
- `chore` — other changes that don't modify src or test files
|
|
62
|
+
|
|
63
|
+
### Examples
|
|
64
|
+
|
|
65
|
+
Simple:
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
feat(auth): add OAuth2 login flow
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
With body:
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
fix(parser): handle empty input without crashing
|
|
75
|
+
|
|
76
|
+
Previously the parser would throw a NullPointerException when given
|
|
77
|
+
an empty string. Now it returns an empty result set.
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Breaking change:
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
feat(api)!: remove deprecated /users endpoint
|
|
84
|
+
|
|
85
|
+
BREAKING CHANGE: The /users endpoint has been removed.
|
|
86
|
+
Use /v2/users instead.
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
**STEP 4 — SUMMARY**
|
|
90
|
+
|
|
91
|
+
After committing, show a summary:
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
Committed: <commit hash>
|
|
95
|
+
Branch: <branch name>
|
|
96
|
+
Files: <number of files changed>
|
|
97
|
+
|
|
98
|
+
<commit title>
|
|
99
|
+
|
|
100
|
+
<commit body>
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Rules
|
|
104
|
+
|
|
105
|
+
- Title must be under 70 characters
|
|
106
|
+
- Title should be imperative mood ("add feature" not "added feature")
|
|
107
|
+
- Description should be lowercase, no period at the end
|
|
108
|
+
- Body should explain why the change was made, not just restate the title
|
|
109
|
+
- Do not commit files that look like secrets (.env, credentials, tokens)
|
|
110
|
+
- If there are no changes to commit, say so and stop
|
|
111
|
+
- Stage specific files, not `git add .` or `git add -A`
|
|
112
|
+
- Use `!` or `BREAKING CHANGE:` footer for breaking changes
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill_name": "commit",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": 1,
|
|
6
|
+
"prompt": "I just added a new login page component, commit it for me",
|
|
7
|
+
"expected_output": "A conventional commit with type feat, a scope related to auth/login, imperative mood title under 70 chars, and a body explaining why.",
|
|
8
|
+
"assertions": [
|
|
9
|
+
"The commit message starts with a valid type (feat, fix, docs, style, refactor, perf, test, build, ci, chore)",
|
|
10
|
+
"The title line is under 70 characters",
|
|
11
|
+
"The title uses imperative mood",
|
|
12
|
+
"The description is lowercase with no period at the end",
|
|
13
|
+
"A post-commit summary is shown with commit hash, branch, and file count"
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": 2,
|
|
18
|
+
"prompt": "commit my changes",
|
|
19
|
+
"expected_output": "The skill analyzes the diff, determines the correct type and scope, and creates a well-formed conventional commit without needing more context from the user.",
|
|
20
|
+
"assertions": [
|
|
21
|
+
"The commit message follows <type>(<scope>): <description> format",
|
|
22
|
+
"The type matches the nature of the actual changes in the diff",
|
|
23
|
+
"The body explains what changed and why",
|
|
24
|
+
"Specific files are staged (not git add . or git add -A)"
|
|
25
|
+
]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"id": 3,
|
|
29
|
+
"prompt": "I renamed the API endpoint from /users to /v2/accounts, this is a breaking change. commit please",
|
|
30
|
+
"expected_output": "A conventional commit with breaking change indicator (! or BREAKING CHANGE footer).",
|
|
31
|
+
"assertions": [
|
|
32
|
+
"The commit includes a breaking change indicator (! after type/scope or BREAKING CHANGE: in footer)",
|
|
33
|
+
"The title mentions the API change",
|
|
34
|
+
"The body or footer explains what broke and what to use instead"
|
|
35
|
+
]
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: model-council
|
|
3
|
+
description: Multi-agent debate system for complex queries. Four agents (Scholar, Logician, Contrarian, Captain) analyze independently, debate, and synthesize a consensus. Use for deeper analysis, second opinions, or stress-testing decisions.
|
|
4
|
+
license: MIT
|
|
5
|
+
metadata:
|
|
6
|
+
author: gokulkrishh
|
|
7
|
+
version: '0.1.0'
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Model Council
|
|
11
|
+
|
|
12
|
+
A multi-agent debate system inspired by Grok 4.20's council architecture. Four specialized agents deliberate on your query, challenge each other's reasoning, and produce a synthesized consensus answer.
|
|
13
|
+
|
|
14
|
+
## Architecture
|
|
15
|
+
|
|
16
|
+
Four agents with distinct roles:
|
|
17
|
+
|
|
18
|
+
1. **Captain (Coordinator)** — Decomposes the task, sets the agenda, resolves conflicts, and synthesizes the final answer.
|
|
19
|
+
2. **Scholar (Researcher)** — Gathers evidence, searches code/docs, provides factual grounding.
|
|
20
|
+
3. **Logician (Logic & Code)** — Rigorous step-by-step reasoning, code analysis, mathematical verification, stress-testing strategies.
|
|
21
|
+
4. **Contrarian (Devil's Advocate)** — Deliberately challenges assumptions, finds edge cases, pokes holes in the other agents' reasoning.
|
|
22
|
+
|
|
23
|
+
## Instructions
|
|
24
|
+
|
|
25
|
+
Follow these phases strictly:
|
|
26
|
+
|
|
27
|
+
**PHASE 1 — INDEPENDENT ANALYSIS**
|
|
28
|
+
|
|
29
|
+
Simulate three independent agent perspectives on the user's query:
|
|
30
|
+
|
|
31
|
+
**Scholar (Research & Facts):**
|
|
32
|
+
|
|
33
|
+
- Search the codebase, documentation, or use web search for relevant context
|
|
34
|
+
- Present factual findings, prior art, and evidence. When the user presents multiple options, evaluate each one individually
|
|
35
|
+
- Be thorough and cite specific files/lines when applicable
|
|
36
|
+
|
|
37
|
+
**Logician (Logic & Code):**
|
|
38
|
+
|
|
39
|
+
- Break down the problem step-by-step
|
|
40
|
+
- Propose a concrete solution with reasoning
|
|
41
|
+
- Consider performance, maintainability, and correctness
|
|
42
|
+
- Write code snippets if applicable
|
|
43
|
+
|
|
44
|
+
**Contrarian (Devil's Advocate):**
|
|
45
|
+
|
|
46
|
+
- Challenge the other agents' assumptions
|
|
47
|
+
- Identify edge cases, failure modes, and risks
|
|
48
|
+
- Suggest alternative approaches the others may have missed
|
|
49
|
+
- Be constructively critical — not dismissive
|
|
50
|
+
|
|
51
|
+
**PHASE 2 — DEBATE**
|
|
52
|
+
|
|
53
|
+
Have each agent respond to the others' Phase 1 analysis:
|
|
54
|
+
|
|
55
|
+
- Scholar: Verify or refute claims made by Logician and Contrarian
|
|
56
|
+
- Logician: Address Contrarian's objections, strengthen or revise the solution
|
|
57
|
+
- Contrarian: Final challenge — are there still unaddressed risks?
|
|
58
|
+
|
|
59
|
+
**PHASE 3 — SYNTHESIS (Captain)**
|
|
60
|
+
|
|
61
|
+
As the Captain (Coordinator), synthesize the debate using this exact template:
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
## Council Verdict
|
|
65
|
+
|
|
66
|
+
**Confidence:** [High | Medium | Low]
|
|
67
|
+
|
|
68
|
+
### Consensus Answer
|
|
69
|
+
[The synthesized answer incorporating the strongest arguments from all agents]
|
|
70
|
+
|
|
71
|
+
### Key Insights
|
|
72
|
+
- [Non-obvious or surprising points that emerged from the debate — not just restatements of known facts]
|
|
73
|
+
|
|
74
|
+
### Dissenting Points
|
|
75
|
+
- [Any unresolved disagreements or risks flagged by the Contrarian]
|
|
76
|
+
|
|
77
|
+
### Recommended Action
|
|
78
|
+
[Clear, actionable next steps]
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Rules
|
|
82
|
+
|
|
83
|
+
- Each agent must stay in character throughout
|
|
84
|
+
- The Contrarian MUST disagree with at least one aspect of the other agents' reasoning
|
|
85
|
+
- The Captain must acknowledge dissent, not just override it
|
|
86
|
+
- If the query is simple and doesn't benefit from debate, you MUST skip all phases, state that it doesn't warrant a council debate, and answer directly
|
|
87
|
+
- Use the actual tools available (file reading, searching, web search, etc.) during the Scholar phase — don't just theorize
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill_name": "model-council",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": 1,
|
|
6
|
+
"prompt": "Should I use Next.js App Router or Remix for a new SaaS dashboard?",
|
|
7
|
+
"expected_output": "A full council debate with all 3 phases (independent analysis, debate, synthesis) and a Council Verdict with confidence level.",
|
|
8
|
+
"assertions": [
|
|
9
|
+
"All three agents (Scholar, Logician, Contrarian) provide independent analysis in Phase 1",
|
|
10
|
+
"The Contrarian disagrees with at least one aspect of the other agents' reasoning",
|
|
11
|
+
"Phase 2 shows agents responding to each other's points",
|
|
12
|
+
"The Council Verdict includes a confidence level (High, Medium, or Low)",
|
|
13
|
+
"Dissenting points are acknowledged in the final synthesis"
|
|
14
|
+
]
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": 2,
|
|
18
|
+
"prompt": "What is 2 + 2?",
|
|
19
|
+
"expected_output": "The skill recognizes this is too simple for a full council debate and answers directly.",
|
|
20
|
+
"assertions": [
|
|
21
|
+
"The skill identifies the query as too simple for a full debate",
|
|
22
|
+
"A direct answer is given without running all 3 phases"
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"id": 3,
|
|
27
|
+
"prompt": "We're choosing between PostgreSQL, MongoDB, and DynamoDB for a real-time analytics platform processing 10M events/day. What should we use?",
|
|
28
|
+
"expected_output": "A thorough council debate with the Scholar researching each database's strengths, the Logician analyzing trade-offs, and the Contrarian challenging assumptions.",
|
|
29
|
+
"assertions": [
|
|
30
|
+
"The Scholar provides factual evidence about each database option",
|
|
31
|
+
"The Logician breaks down the decision with structured reasoning",
|
|
32
|
+
"The Contrarian challenges at least one assumption",
|
|
33
|
+
"The final verdict includes a clear recommended action",
|
|
34
|
+
"Key insights section surfaces non-obvious points from the debate"
|
|
35
|
+
]
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
}
|