zenkit 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +63 -0
- package/LICENSE +21 -0
- package/README.md +242 -0
- package/agents/backend-architect.md +19 -0
- package/agents/frontend-architect.md +19 -0
- package/agents/implementation-auditor.md +19 -0
- package/agents/product-manager.md +19 -0
- package/agents/qa-test-engineer.md +19 -0
- package/agents/security-specialist.md +19 -0
- package/agents/system-architect.md +19 -0
- package/agents/technical-writer.md +19 -0
- package/agents/ux-engineer.md +19 -0
- package/benchmark/feature-specs/cli-tool.json +58 -0
- package/benchmark/feature-specs/handoff-system.json +69 -0
- package/benchmark/feature-specs/protocol-completeness.json +85 -0
- package/benchmark/feature-specs/schema-validator-baseline.json +93 -0
- package/benchmark/feature-specs/schema-validator-playground.json +92 -0
- package/benchmark/feature-specs/self-audit.json +76 -0
- package/benchmark/fixtures/valid-handoff.json +13 -0
- package/benchmark/scripts/compare.ts +172 -0
- package/benchmark/scripts/report.ts +102 -0
- package/benchmark/scripts/run-all.ts +125 -0
- package/benchmark/scripts/run.ts +595 -0
- package/benchmark/scripts/visualize.ts +120 -0
- package/bin/zenkit.js +24 -0
- package/commands/audit.md +28 -0
- package/commands/build.md +26 -0
- package/commands/checkpoint.md +28 -0
- package/commands/handoff.md +28 -0
- package/commands/plan.md +27 -0
- package/commands/refactor.md +27 -0
- package/commands/ship.md +28 -0
- package/commands/spec.md +26 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +174 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +765 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +121 -0
- package/dist/index.js.map +1 -0
- package/dist/schemas/audit.schema.json +63 -0
- package/dist/schemas/benchmark.schema.json +118 -0
- package/dist/schemas/checkpoint.schema.json +64 -0
- package/dist/schemas/feature-spec.schema.json +76 -0
- package/dist/schemas/handoff.schema.json +78 -0
- package/dist/schemas/schemas/audit.schema.json +63 -0
- package/dist/schemas/schemas/benchmark.schema.json +118 -0
- package/dist/schemas/schemas/checkpoint.schema.json +64 -0
- package/dist/schemas/schemas/feature-spec.schema.json +76 -0
- package/dist/schemas/schemas/handoff.schema.json +78 -0
- package/dist/schemas/schemas/task.schema.json +69 -0
- package/dist/schemas/task.schema.json +69 -0
- package/docs/agent-contract.md +36 -0
- package/docs/architecture.md +88 -0
- package/docs/benchmarking.md +51 -0
- package/docs/command-model.md +43 -0
- package/docs/philosophy.md +35 -0
- package/docs/roadmap.md +43 -0
- package/docs/self-audit.md +29 -0
- package/hooks/post-change.md +30 -0
- package/hooks/pre-change.md +27 -0
- package/hooks/pre-ship.md +30 -0
- package/package.json +92 -0
- package/rubrics/architectural-alignment.md +26 -0
- package/rubrics/execution-quality.md +26 -0
- package/rubrics/verbosity-score.md +26 -0
- package/schemas/audit.schema.json +63 -0
- package/schemas/benchmark.schema.json +118 -0
- package/schemas/checkpoint.schema.json +64 -0
- package/schemas/feature-spec.schema.json +76 -0
- package/schemas/handoff.schema.json +78 -0
- package/schemas/task.schema.json +69 -0
- package/skills/architecture-review.md +17 -0
- package/skills/backend-change.md +17 -0
- package/skills/bug-triage.md +17 -0
- package/skills/frontend-change.md +17 -0
- package/skills/prompt-pruning.md +17 -0
- package/skills/release-check.md +17 -0
- package/skills/security-review.md +17 -0
- package/templates/agent.template.md +18 -0
- package/templates/command.template.md +21 -0
- package/templates/skill.template.md +15 -0
- package/templates/task.template.md +19 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ZenKit Benchmark Runner — All Specs
|
|
3
|
+
*
|
|
4
|
+
* Runs the benchmark against every feature spec in benchmark/feature-specs/
|
|
5
|
+
* and produces a combined summary.
|
|
6
|
+
*
|
|
7
|
+
* Usage: npx tsx benchmark/scripts/run-all.ts
|
|
8
|
+
*/
|
|
9
|
+
import { execSync } from 'child_process'
|
|
10
|
+
import fs from 'fs'
|
|
11
|
+
import path from 'path'
|
|
12
|
+
|
|
13
|
+
const ROOT = path.resolve(__dirname, '../..')
|
|
14
|
+
const specsDir = path.join(ROOT, 'benchmark/feature-specs')
|
|
15
|
+
|
|
16
|
+
interface Summary {
|
|
17
|
+
total: number
|
|
18
|
+
passed: number
|
|
19
|
+
failed: number
|
|
20
|
+
partial: number
|
|
21
|
+
results: Array<{
|
|
22
|
+
spec: string
|
|
23
|
+
status: string
|
|
24
|
+
criteria: string
|
|
25
|
+
checks: string
|
|
26
|
+
duration_ms: number
|
|
27
|
+
}>
|
|
28
|
+
generated_at: string
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function main() {
|
|
32
|
+
const specs = fs.readdirSync(specsDir)
|
|
33
|
+
.filter(f => f.endsWith('.json') && !f.includes('baseline'))
|
|
34
|
+
.sort()
|
|
35
|
+
|
|
36
|
+
console.log('ZenKit Benchmark Runner — All Specs')
|
|
37
|
+
console.log('====================================\n')
|
|
38
|
+
console.log(`Found ${specs.length} feature specs\n`)
|
|
39
|
+
|
|
40
|
+
const summary: Summary = {
|
|
41
|
+
total: specs.length,
|
|
42
|
+
passed: 0,
|
|
43
|
+
failed: 0,
|
|
44
|
+
partial: 0,
|
|
45
|
+
results: [],
|
|
46
|
+
generated_at: new Date().toISOString(),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
for (const spec of specs) {
|
|
50
|
+
const specPath = path.join('benchmark/feature-specs', spec)
|
|
51
|
+
try {
|
|
52
|
+
const output = execSync(`npx tsx benchmark/scripts/run.ts ${specPath}`, {
|
|
53
|
+
cwd: ROOT,
|
|
54
|
+
encoding: 'utf-8',
|
|
55
|
+
timeout: 30000,
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
// Parse the result from the generated file
|
|
59
|
+
const specData = JSON.parse(fs.readFileSync(path.join(specsDir, spec), 'utf-8'))
|
|
60
|
+
const resultPath = path.join(ROOT, `benchmark/results/${specData.feature_id}-live.json`)
|
|
61
|
+
const result = JSON.parse(fs.readFileSync(resultPath, 'utf-8'))
|
|
62
|
+
|
|
63
|
+
const totalChecks = result.stages.reduce((s: number, st: any) => s + st.checks_run, 0)
|
|
64
|
+
const totalPassed = result.stages.reduce((s: number, st: any) => s + st.checks_passed, 0)
|
|
65
|
+
|
|
66
|
+
summary.results.push({
|
|
67
|
+
spec: spec,
|
|
68
|
+
status: result.status,
|
|
69
|
+
criteria: `${result.validation_summary.criteria_passed}/${result.validation_summary.total_criteria}`,
|
|
70
|
+
checks: `${totalPassed}/${totalChecks}`,
|
|
71
|
+
duration_ms: result.duration_ms,
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
if (result.status === 'pass') summary.passed++
|
|
75
|
+
else if (result.status === 'partial') summary.partial++
|
|
76
|
+
else summary.failed++
|
|
77
|
+
|
|
78
|
+
const icon = result.status === 'pass' ? 'PASS' : result.status === 'partial' ? 'PARTIAL' : 'FAIL'
|
|
79
|
+
console.log(` [${icon}] ${specData.name} — criteria ${summary.results[summary.results.length - 1].criteria}, checks ${summary.results[summary.results.length - 1].checks}`)
|
|
80
|
+
} catch (err) {
|
|
81
|
+
summary.failed++
|
|
82
|
+
summary.results.push({
|
|
83
|
+
spec: spec,
|
|
84
|
+
status: 'error',
|
|
85
|
+
criteria: '0/0',
|
|
86
|
+
checks: '0/0',
|
|
87
|
+
duration_ms: 0,
|
|
88
|
+
})
|
|
89
|
+
console.log(` [ERROR] ${spec} — ${err}`)
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
console.log(`\n${'='.repeat(50)}`)
|
|
94
|
+
console.log(`Total: ${summary.total} specs`)
|
|
95
|
+
console.log(`Passed: ${summary.passed}`)
|
|
96
|
+
console.log(`Failed: ${summary.failed}`)
|
|
97
|
+
console.log(`Partial: ${summary.partial}`)
|
|
98
|
+
|
|
99
|
+
// Write summary
|
|
100
|
+
const summaryPath = path.join(ROOT, 'benchmark/results/summary.json')
|
|
101
|
+
fs.writeFileSync(summaryPath, JSON.stringify(summary, null, 2))
|
|
102
|
+
console.log(`\nSummary: ${summaryPath}`)
|
|
103
|
+
|
|
104
|
+
// Write markdown summary
|
|
105
|
+
const lines = [
|
|
106
|
+
'# Benchmark Summary',
|
|
107
|
+
'',
|
|
108
|
+
`Generated: ${summary.generated_at}`,
|
|
109
|
+
'',
|
|
110
|
+
`| Spec | Status | Criteria | Checks | Duration |`,
|
|
111
|
+
`|------|--------|----------|--------|----------|`,
|
|
112
|
+
]
|
|
113
|
+
for (const r of summary.results) {
|
|
114
|
+
lines.push(`| ${r.spec} | ${r.status.toUpperCase()} | ${r.criteria} | ${r.checks} | ${r.duration_ms}ms |`)
|
|
115
|
+
}
|
|
116
|
+
lines.push('', `**${summary.passed}/${summary.total} passed**`)
|
|
117
|
+
|
|
118
|
+
const mdPath = path.join(ROOT, 'benchmark/results/summary.md')
|
|
119
|
+
fs.writeFileSync(mdPath, lines.join('\n'))
|
|
120
|
+
console.log(`Report: ${mdPath}`)
|
|
121
|
+
|
|
122
|
+
process.exit(summary.failed > 0 ? 1 : 0)
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
main()
|