oh-my-customcode 0.64.0 → 0.64.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/README.md +3 -3
  2. package/dist/cli/index.js +1 -1
  3. package/dist/index.js +1 -1
  4. package/package.json +1 -1
  5. package/templates/.claude/agents/arch-documenter.md +3 -0
  6. package/templates/.claude/agents/arch-speckit-agent.md +5 -0
  7. package/templates/.claude/agents/be-django-expert.md +1 -0
  8. package/templates/.claude/agents/be-express-expert.md +1 -0
  9. package/templates/.claude/agents/be-fastapi-expert.md +1 -0
  10. package/templates/.claude/agents/be-go-backend-expert.md +1 -0
  11. package/templates/.claude/agents/be-nestjs-expert.md +1 -0
  12. package/templates/.claude/agents/be-springboot-expert.md +1 -0
  13. package/templates/.claude/agents/db-alembic-expert.md +1 -0
  14. package/templates/.claude/agents/db-postgres-expert.md +1 -0
  15. package/templates/.claude/agents/db-redis-expert.md +1 -0
  16. package/templates/.claude/agents/db-supabase-expert.md +1 -0
  17. package/templates/.claude/agents/de-airflow-expert.md +1 -0
  18. package/templates/.claude/agents/de-dbt-expert.md +1 -0
  19. package/templates/.claude/agents/de-kafka-expert.md +1 -0
  20. package/templates/.claude/agents/de-pipeline-expert.md +1 -0
  21. package/templates/.claude/agents/de-snowflake-expert.md +1 -0
  22. package/templates/.claude/agents/de-spark-expert.md +1 -0
  23. package/templates/.claude/agents/fe-design-expert.md +6 -0
  24. package/templates/.claude/agents/fe-flutter-agent.md +1 -0
  25. package/templates/.claude/agents/fe-svelte-agent.md +1 -0
  26. package/templates/.claude/agents/fe-vercel-agent.md +1 -0
  27. package/templates/.claude/agents/fe-vuejs-agent.md +1 -0
  28. package/templates/.claude/agents/infra-aws-expert.md +1 -0
  29. package/templates/.claude/agents/infra-docker-expert.md +1 -0
  30. package/templates/.claude/agents/lang-golang-expert.md +1 -0
  31. package/templates/.claude/agents/lang-java21-expert.md +1 -0
  32. package/templates/.claude/agents/lang-kotlin-expert.md +1 -0
  33. package/templates/.claude/agents/lang-python-expert.md +1 -0
  34. package/templates/.claude/agents/lang-rust-expert.md +1 -0
  35. package/templates/.claude/agents/lang-typescript-expert.md +1 -0
  36. package/templates/.claude/agents/mgr-claude-code-bible.md +2 -0
  37. package/templates/.claude/agents/mgr-creator.md +2 -0
  38. package/templates/.claude/agents/mgr-gitnerd.md +5 -0
  39. package/templates/.claude/agents/mgr-sauron.md +2 -0
  40. package/templates/.claude/agents/mgr-supplier.md +6 -0
  41. package/templates/.claude/agents/mgr-updater.md +5 -0
  42. package/templates/.claude/agents/qa-engineer.md +4 -0
  43. package/templates/.claude/agents/qa-planner.md +3 -0
  44. package/templates/.claude/agents/qa-writer.md +6 -0
  45. package/templates/.claude/agents/sec-codeql-expert.md +1 -0
  46. package/templates/.claude/agents/sys-memory-keeper.md +5 -0
  47. package/templates/.claude/agents/sys-naggy.md +6 -0
  48. package/templates/.claude/agents/tool-bun-expert.md +1 -0
  49. package/templates/.claude/agents/tool-npm-expert.md +1 -0
  50. package/templates/.claude/agents/tool-optimizer.md +4 -0
  51. package/templates/.claude/skills/evaluator-optimizer/SKILL.md +4 -0
  52. package/templates/.claude/skills/harness-eval/SKILL.md +95 -0
  53. package/templates/CLAUDE.md +2 -1
  54. package/templates/manifest.json +2 -2
package/README.md CHANGED
@@ -13,7 +13,7 @@
13
13
 
14
14
  **[한국어 문서 (Korean)](./README_ko.md)**
15
15
 
16
- 46 agents. 97 skills. 21 rules. One command.
16
+ 46 agents. 98 skills. 21 rules. One command.
17
17
 
18
18
  ```bash
19
19
  npm install -g oh-my-customcode && cd your-project && omcustom init
@@ -146,7 +146,7 @@ Each agent declares its tools, model, memory scope, and limitations in YAML fron
146
146
 
147
147
  ---
148
148
 
149
- ### Skills (97)
149
+ ### Skills (98)
150
150
 
151
151
  | Category | Count | Includes |
152
152
  |----------|-------|----------|
@@ -282,7 +282,7 @@ your-project/
282
282
  ├── CLAUDE.md # Entry point
283
283
  ├── .claude/
284
284
  │ ├── agents/ # 46 agent definitions
285
- │ ├── skills/ # 97 skill modules
285
+ │ ├── skills/ # 98 skill modules
286
286
  │ ├── rules/ # 21 governance rules (R000-R021)
287
287
  │ ├── hooks/ # 15 lifecycle hook scripts
288
288
  │ ├── schemas/ # Tool input validation schemas
package/dist/cli/index.js CHANGED
@@ -9325,7 +9325,7 @@ var init_package = __esm(() => {
9325
9325
  workspaces: [
9326
9326
  "packages/*"
9327
9327
  ],
9328
- version: "0.64.0",
9328
+ version: "0.64.2",
9329
9329
  description: "Batteries-included agent harness for Claude Code",
9330
9330
  type: "module",
9331
9331
  bin: {
package/dist/index.js CHANGED
@@ -1674,7 +1674,7 @@ var package_default = {
1674
1674
  workspaces: [
1675
1675
  "packages/*"
1676
1676
  ],
1677
- version: "0.64.0",
1677
+ version: "0.64.2",
1678
1678
  description: "Batteries-included agent harness for Claude Code",
1679
1679
  type: "module",
1680
1680
  bin: {
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "workspaces": [
4
4
  "packages/*"
5
5
  ],
6
- "version": "0.64.0",
6
+ "version": "0.64.2",
7
7
  "description": "Batteries-included agent harness for Claude Code",
8
8
  "type": "module",
9
9
  "bin": {
@@ -14,6 +14,9 @@ tools:
14
14
  - Edit
15
15
  - Grep
16
16
  - Glob
17
+ maxTurns: 20
18
+ disallowedTools: [Bash]
19
+ permissionMode: bypassPermissions
17
20
  ---
18
21
 
19
22
  You handle software architecture documentation: system design docs, API specs, ADRs, and technical doc maintenance.
@@ -12,6 +12,11 @@ tools:
12
12
  - Grep
13
13
  - Glob
14
14
  - Bash
15
+ maxTurns: 20
16
+ limitations:
17
+ - "cannot execute code"
18
+ - "cannot deploy infrastructure"
19
+ permissionMode: bypassPermissions
15
20
  ---
16
21
 
17
22
  You are a Spec-Driven Development agent that transforms requirements into executable specifications.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Django developer specialized in building production-ready Python web applications following best practices and modern patterns.
@@ -12,6 +12,7 @@ tools:
12
12
  - Grep
13
13
  - Glob
14
14
  - Bash
15
+ permissionMode: bypassPermissions
15
16
  ---
16
17
 
17
18
  You are an expert Express.js developer for production-ready Node.js APIs following security best practices and 12-factor app principles.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert FastAPI developer specialized in building high-performance async Python APIs following best practices and modern patterns.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Go backend developer specialized in building production-ready services following Uber style guide and standard project layout.
@@ -12,6 +12,7 @@ tools:
12
12
  - Grep
13
13
  - Glob
14
14
  - Bash
15
+ permissionMode: bypassPermissions
15
16
  ---
16
17
 
17
18
  You are an expert NestJS developer for scalable Node.js applications using TypeScript with enterprise-grade patterns.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Spring Boot developer for enterprise-grade Java/Kotlin applications. Focused on Spring Boot 3.5.x with Java 21.
@@ -23,6 +23,7 @@ limitations:
23
23
  - "cannot apply migrations directly to production databases"
24
24
  - "cannot resolve application-level data backfill logic without domain context"
25
25
  - "cannot detect rename intent without git diff context or explicit user instruction"
26
+ permissionMode: bypassPermissions
26
27
  ---
27
28
 
28
29
  # db-alembic-expert
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert PostgreSQL DBA specialized in designing, optimizing, and maintaining pure PostgreSQL databases in production.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Redis developer specialized in high-performance caching, in-memory data architectures, and real-time messaging systems.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert in Supabase and PostgreSQL for performant, secure database-driven applications.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Apache Airflow developer for production-ready DAGs following official best practices.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert dbt developer for analytics engineering, SQL modeling, and data transformation.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Apache Kafka developer for event streaming architectures with high throughput and reliability.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert data pipeline architect for robust, scalable data pipelines integrating multiple tools with data quality assurance.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Snowflake developer for cloud data warehouse design, query optimization, and scalable data platforms.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Apache Spark developer for performant distributed data processing using PySpark and Scala.
@@ -9,6 +9,12 @@ skills:
9
9
  - impeccable-design
10
10
  - web-design-guidelines
11
11
  tools: [Read, Write, Edit, Grep, Glob, Bash]
12
+ maxTurns: 20
13
+ disallowedTools: [Bash]
14
+ limitations:
15
+ - "cannot modify backend code"
16
+ - "cannot execute shell commands"
17
+ permissionMode: bypassPermissions
12
18
  source:
13
19
  type: external
14
20
  origin: github
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Flutter developer following official documentation and Dart best practices.
@@ -15,6 +15,7 @@ tools:
15
15
  - Grep
16
16
  - Glob
17
17
  - Bash
18
+ permissionMode: bypassPermissions
18
19
  ---
19
20
 
20
21
  You are an expert Svelte developer following official documentation and compiler-based reactivity patterns.
@@ -17,6 +17,7 @@ tools:
17
17
  - Grep
18
18
  - Glob
19
19
  - Bash
20
+ permissionMode: bypassPermissions
20
21
  ---
21
22
 
22
23
  You are a frontend specialist for React/Next.js projects with Vercel deployment capabilities.
@@ -15,6 +15,7 @@ tools:
15
15
  - Grep
16
16
  - Glob
17
17
  - Bash
18
+ permissionMode: bypassPermissions
18
19
  ---
19
20
 
20
21
  You are an expert Vue.js (Vue 3) developer following official documentation and best practices.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert AWS cloud architect specialized in designing and implementing scalable, secure, and cost-effective cloud infrastructure following AWS Well-Architected Framework.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Docker engineer specialized in building optimized container images and managing containerized applications following official best practices.
@@ -15,6 +15,7 @@ tools:
15
15
  - Grep
16
16
  - Glob
17
17
  - Bash
18
+ permissionMode: bypassPermissions
18
19
  ---
19
20
 
20
21
  You are an expert Go developer specialized in writing idiomatic, performant, and maintainable Go code following official best practices.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Java 21 developer for modern Java features including Virtual Threads, Pattern Matching, Record Patterns, and Sequenced Collections.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Kotlin developer specialized in writing idiomatic, concise, and safe Kotlin code following JetBrains official conventions.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Python developer specialized in writing Pythonic, clean, and maintainable code following PEP 8 and The Zen of Python.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert Rust developer specialized in writing safe, performant, and idiomatic Rust code following official guidelines and community best practices.
@@ -14,6 +14,7 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are an expert TypeScript developer specialized in writing type-safe, maintainable, and scalable TypeScript code following industry best practices.
@@ -5,6 +5,7 @@ model: sonnet
5
5
  domain: universal
6
6
  memory: project
7
7
  effort: medium
8
+ maxTurns: 20
8
9
  skills:
9
10
  - claude-code-bible
10
11
  tools:
@@ -12,6 +13,7 @@ tools:
12
13
  - Write
13
14
  - Grep
14
15
  - Bash
16
+ permissionMode: bypassPermissions
15
17
  ---
16
18
 
17
19
  You are the authoritative source of truth for Claude Code specifications. You fetch official documentation from code.claude.com and validate the project against official specs.
@@ -14,6 +14,8 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ maxTurns: 25
18
+ permissionMode: bypassPermissions
17
19
  ---
18
20
 
19
21
  You are an agent creation specialist following R006 (MUST-agent-design.md) rules.
@@ -5,6 +5,10 @@ model: sonnet
5
5
  domain: universal
6
6
  memory: project
7
7
  effort: medium
8
+ maxTurns: 20
9
+ limitations:
10
+ - "cannot modify source code"
11
+ - "cannot create agents"
8
12
  tools:
9
13
  - Read
10
14
  - Write
@@ -12,6 +16,7 @@ tools:
12
16
  - Grep
13
17
  - Glob
14
18
  - Bash
19
+ permissionMode: bypassPermissions
15
20
  ---
16
21
 
17
22
  You are a Git operations specialist following GitHub flow best practices.
@@ -14,6 +14,8 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ maxTurns: 25
18
+ permissionMode: bypassPermissions
17
19
  ---
18
20
 
19
21
  You are an automated verification specialist that executes the mandatory R017 verification process, acting as the "all-seeing eye" that ensures system integrity through comprehensive multi-round verification.
@@ -5,12 +5,18 @@ model: haiku
5
5
  domain: universal
6
6
  memory: local
7
7
  effort: low
8
+ maxTurns: 10
9
+ limitations:
10
+ - "cannot modify agent files"
11
+ - "cannot create new agents"
12
+ disallowedTools: [Bash, Write, Edit]
8
13
  skills:
9
14
  - audit-agents
10
15
  tools:
11
16
  - Read
12
17
  - Grep
13
18
  - Glob
19
+ permissionMode: default
14
20
  ---
15
21
 
16
22
  You are a dependency validation specialist ensuring agents have all required skills and guides properly linked.
@@ -5,6 +5,10 @@ model: sonnet
5
5
  domain: universal
6
6
  memory: project
7
7
  effort: medium
8
+ maxTurns: 20
9
+ limitations:
10
+ - "cannot create new agents"
11
+ - "cannot modify rules"
8
12
  skills:
9
13
  - update-external
10
14
  - update-docs
@@ -15,6 +19,7 @@ tools:
15
19
  - Grep
16
20
  - Glob
17
21
  - Bash
22
+ permissionMode: bypassPermissions
18
23
  ---
19
24
 
20
25
  You are an external source synchronization specialist keeping external components up-to-date.
@@ -5,6 +5,9 @@ model: sonnet
5
5
  domain: universal
6
6
  memory: project
7
7
  effort: medium
8
+ maxTurns: 20
9
+ limitations:
10
+ - "cannot modify source code in production branches"
8
11
  tools:
9
12
  - Read
10
13
  - Write
@@ -12,6 +15,7 @@ tools:
12
15
  - Grep
13
16
  - Glob
14
17
  - Bash
18
+ permissionMode: bypassPermissions
15
19
  ---
16
20
 
17
21
  You are a QA execution specialist that runs tests, identifies defects, and validates software quality.
@@ -5,6 +5,8 @@ model: sonnet
5
5
  domain: universal
6
6
  memory: project
7
7
  effort: high
8
+ maxTurns: 20
9
+ disallowedTools: [Bash]
8
10
  limitations:
9
11
  - "cannot execute tests"
10
12
  - "cannot modify code"
@@ -14,6 +16,7 @@ tools:
14
16
  - Edit
15
17
  - Grep
16
18
  - Glob
19
+ permissionMode: bypassPermissions
17
20
  ---
18
21
 
19
22
  You are a QA planning specialist creating comprehensive test strategies from requirements.
@@ -5,12 +5,18 @@ model: sonnet
5
5
  domain: universal
6
6
  memory: project
7
7
  effort: medium
8
+ maxTurns: 20
9
+ limitations:
10
+ - "cannot execute tests"
11
+ - "cannot modify source code"
12
+ disallowedTools: [Bash]
8
13
  tools:
9
14
  - Read
10
15
  - Write
11
16
  - Edit
12
17
  - Grep
13
18
  - Glob
19
+ permissionMode: bypassPermissions
14
20
  ---
15
21
 
16
22
  You are a QA documentation specialist transforming test plans into detailed, executable test cases and reports.
@@ -14,6 +14,7 @@ tools:
14
14
  - Write
15
15
  - Grep
16
16
  - Bash
17
+ permissionMode: bypassPermissions
17
18
  ---
18
19
 
19
20
  You are a security-focused code analyst specializing in CodeQL-based vulnerability detection and assessment.
@@ -16,6 +16,11 @@ tools:
16
16
  - Grep
17
17
  - Glob
18
18
  - Bash
19
+ maxTurns: 15
20
+ limitations:
21
+ - "cannot modify source code"
22
+ - "cannot execute tests"
23
+ permissionMode: bypassPermissions
19
24
  ---
20
25
 
21
26
  You are a session memory management specialist ensuring context survives across session compactions using claude-mem.
@@ -5,11 +5,17 @@ model: sonnet
5
5
  domain: universal
6
6
  memory: local
7
7
  effort: low
8
+ maxTurns: 10
9
+ limitations:
10
+ - "cannot modify project files"
11
+ - "cannot execute external commands"
12
+ disallowedTools: [Bash]
8
13
  tools:
9
14
  - Read
10
15
  - Write
11
16
  - Edit
12
17
  - Grep
18
+ permissionMode: bypassPermissions
13
19
  ---
14
20
 
15
21
  You are a task management specialist that proactively manages TODO items and reminds users of pending tasks.
@@ -11,6 +11,7 @@ tools:
11
11
  - Edit
12
12
  - Grep
13
13
  - Bash
14
+ permissionMode: bypassPermissions
14
15
  ---
15
16
 
16
17
  You are an expert Bun runtime developer for high-performance JavaScript/TypeScript applications.
@@ -15,6 +15,7 @@ tools:
15
15
  - Edit
16
16
  - Grep
17
17
  - Bash
18
+ permissionMode: bypassPermissions
18
19
  ---
19
20
 
20
21
  You manage npm package publishing, versioning, and registry operations.
@@ -14,6 +14,10 @@ tools:
14
14
  - Grep
15
15
  - Glob
16
16
  - Bash
17
+ maxTurns: 20
18
+ limitations:
19
+ - "cannot modify source code"
20
+ permissionMode: bypassPermissions
17
21
  ---
18
22
 
19
23
  You analyze and optimize application bundles, detect performance issues, and provide actionable recommendations.
@@ -363,3 +363,7 @@ evaluator-optimizer:
363
363
  Weight ordering (originality > craft > functionality) follows Anthropic's anti-slop principle: functionality is table stakes, but originality and craft distinguish quality output from generic AI generation.
364
364
 
365
365
  Integration: Works with [impeccable-design](/skills/impeccable-design) skill for design language enforcement.
366
+
367
+ ### Harness Eval Preset
368
+
369
+ The `harness-eval` skill provides a structured 15-task SE benchmark rubric that can be used as a preset for the evaluator-optimizer pipeline. When invoked via `/omcustom:harness-eval`, the harness rubric dimensions (Test Coverage 30%, Architecture 25%, Error Handling 25%, Extensibility 20%) are loaded as the sprint contract criteria.
@@ -0,0 +1,95 @@
1
+ ---
2
+ name: harness-eval
3
+ description: Structured SE task evaluation using 15 benchmark definitions from claude-code-harness research
4
+ scope: harness
5
+ user-invocable: true
6
+ argument-hint: "[--preset all|quick] [--task task-name]"
7
+ effort: high
8
+ version: 1.0.0
9
+ ---
10
+
11
+ # Harness Eval — Structured SE Task Benchmark
12
+
13
+ ## Purpose
14
+
15
+ Evaluate agent quality using 15 structured software engineering task definitions with quantitative scoring. Based on research from [revfactory/claude-code-harness](https://github.com/revfactory/claude-code-harness) which demonstrated 60% improvement (49.5 → 79.3 points) through structured pre-configuration.
16
+
17
+ ## Usage
18
+
19
+ ```
20
+ /omcustom:harness-eval # Run all 15 benchmarks
21
+ /omcustom:harness-eval --preset quick # Run top 5 high-impact benchmarks
22
+ /omcustom:harness-eval --task api-design # Run specific task benchmark
23
+ ```
24
+
25
+ ## Quality Dimensions
26
+
27
+ | Dimension | Weight | Description |
28
+ |-----------|--------|-------------|
29
+ | Test Coverage | 30% | Unit test count, edge case coverage, assertion quality |
30
+ | Architecture Design | 25% | Separation of concerns, dependency management, scalability |
31
+ | Error Handling | 25% | Input validation, error propagation, recovery strategies |
32
+ | Extensibility | 20% | Plugin points, configuration flexibility, API surface |
33
+
34
+ ## 15 SE Task Benchmark Suite
35
+
36
+ | # | Task | Category | Key Evaluation Criteria |
37
+ |---|------|----------|------------------------|
38
+ | 1 | API Design | Architecture | RESTful conventions, versioning, error responses |
39
+ | 2 | Data Modeling | Architecture | Schema normalization, relationships, indexing |
40
+ | 3 | Authentication Flow | Security | Token management, session handling, OWASP compliance |
41
+ | 4 | Test Suite Creation | Quality | Coverage breadth, assertion quality, edge cases |
42
+ | 5 | Error Handler | Reliability | Error classification, recovery, user feedback |
43
+ | 6 | Logging System | Observability | Structured logging, levels, correlation IDs |
44
+ | 7 | Configuration Manager | Operations | Env-based config, validation, secrets handling |
45
+ | 8 | CLI Tool | UX | Argument parsing, help text, exit codes |
46
+ | 9 | Database Migration | Data | Reversibility, data preservation, zero-downtime |
47
+ | 10 | Cache Layer | Performance | Invalidation strategy, TTL, cache-aside pattern |
48
+ | 11 | Queue Consumer | Reliability | Idempotency, retry logic, dead letter handling |
49
+ | 12 | Middleware Chain | Architecture | Composability, ordering, short-circuiting |
50
+ | 13 | File Processor | I/O | Streaming, error recovery, format validation |
51
+ | 14 | Webhook Handler | Integration | Signature verification, retry tolerance, idempotency |
52
+ | 15 | Rate Limiter | Security | Algorithm choice, distributed state, fairness |
53
+
54
+ ## Scoring Rubric
55
+
56
+ Each task is scored 0-100 across the 4 quality dimensions:
57
+
58
+ ```
59
+ Score = (test_coverage × 0.30) + (architecture × 0.25) + (error_handling × 0.25) + (extensibility × 0.20)
60
+ ```
61
+
62
+ ### Score Thresholds
63
+
64
+ | Score Range | Grade | Interpretation |
65
+ |-------------|-------|----------------|
66
+ | 80-100 | A | Production-ready, well-structured |
67
+ | 60-79 | B | Functional with minor gaps |
68
+ | 40-59 | C | Works but needs improvement |
69
+ | 0-39 | D | Significant structural issues |
70
+
71
+ ## Presets
72
+
73
+ ### `all` (default)
74
+ Run all 15 tasks. Full evaluation ~45 minutes.
75
+
76
+ ### `quick`
77
+ Run top 5 high-impact tasks (1, 3, 4, 5, 12). Quick evaluation ~15 minutes.
78
+
79
+ ## Integration with evaluator-optimizer
80
+
81
+ This skill provides preset rubrics for the evaluator-optimizer pipeline:
82
+
83
+ ```
84
+ /omcustom:harness-eval → loads rubric → evaluator-optimizer executes → scoring → report
85
+ ```
86
+
87
+ The evaluator-optimizer skill's `pre_negotiation` phase accepts harness-eval rubric dimensions as sprint contract criteria.
88
+
89
+ ## Output
90
+
91
+ Results saved to `.claude/outputs/sessions/{YYYY-MM-DD}/harness-eval-{HHmmss}.md` with per-task scores and aggregate grade.
92
+
93
+ ## Attribution
94
+
95
+ Evaluation framework based on research by [revfactory/claude-code-harness](https://github.com/revfactory/claude-code-harness). Adapted for oh-my-customcode's evaluator-optimizer pipeline with permission.
@@ -101,6 +101,7 @@ oh-my-customcode로 구동됩니다.
101
101
  | `/omcustom:update-external` | 외부 소스에서 에이전트 업데이트 |
102
102
  | `/omcustom:audit-agents` | 에이전트 의존성 감사 |
103
103
  | `/omcustom:fix-refs` | 깨진 참조 수정 |
104
+ | `/omcustom:harness-eval` | 15 SE task 구조적 벤치마크 평가 |
104
105
  | `/omcustom:auto-improve` | 개선 사항 자동 적용 워크플로우 |
105
106
  | `/omcustom:improve-report` | eval-core 기반 개선 현황 리포트 |
106
107
  | `/omcustom-takeover` | 기존 에이전트/스킬에서 canonical spec 추출 |
@@ -138,7 +139,7 @@ project/
138
139
  +-- CLAUDE.md # 진입점
139
140
  +-- .claude/
140
141
  | +-- agents/ # 서브에이전트 정의 (46 파일)
141
- | +-- skills/ # 스킬 (97 디렉토리)
142
+ | +-- skills/ # 스킬 (98 디렉토리)
142
143
  | +-- rules/ # 전역 규칙 (R000-R021)
143
144
  | +-- hooks/ # 훅 스크립트 (보안, 검증, HUD)
144
145
  | +-- contexts/ # 컨텍스트 파일 (ecomode)
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "0.64.0",
2
+ "version": "0.64.2",
3
3
  "lastUpdated": "2026-03-24T00:00:00.000Z",
4
4
  "components": [
5
5
  {
@@ -18,7 +18,7 @@
18
18
  "name": "skills",
19
19
  "path": ".claude/skills",
20
20
  "description": "Reusable skill modules (includes slash commands)",
21
- "files": 97
21
+ "files": 98
22
22
  },
23
23
  {
24
24
  "name": "guides",