zenkit 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/CONTRIBUTING.md +63 -0
  2. package/LICENSE +21 -0
  3. package/README.md +242 -0
  4. package/agents/backend-architect.md +19 -0
  5. package/agents/frontend-architect.md +19 -0
  6. package/agents/implementation-auditor.md +19 -0
  7. package/agents/product-manager.md +19 -0
  8. package/agents/qa-test-engineer.md +19 -0
  9. package/agents/security-specialist.md +19 -0
  10. package/agents/system-architect.md +19 -0
  11. package/agents/technical-writer.md +19 -0
  12. package/agents/ux-engineer.md +19 -0
  13. package/benchmark/feature-specs/cli-tool.json +58 -0
  14. package/benchmark/feature-specs/handoff-system.json +69 -0
  15. package/benchmark/feature-specs/protocol-completeness.json +85 -0
  16. package/benchmark/feature-specs/schema-validator-baseline.json +93 -0
  17. package/benchmark/feature-specs/schema-validator-playground.json +92 -0
  18. package/benchmark/feature-specs/self-audit.json +76 -0
  19. package/benchmark/fixtures/valid-handoff.json +13 -0
  20. package/benchmark/scripts/compare.ts +172 -0
  21. package/benchmark/scripts/report.ts +102 -0
  22. package/benchmark/scripts/run-all.ts +125 -0
  23. package/benchmark/scripts/run.ts +595 -0
  24. package/benchmark/scripts/visualize.ts +120 -0
  25. package/bin/zenkit.js +24 -0
  26. package/commands/audit.md +28 -0
  27. package/commands/build.md +26 -0
  28. package/commands/checkpoint.md +28 -0
  29. package/commands/handoff.md +28 -0
  30. package/commands/plan.md +27 -0
  31. package/commands/refactor.md +27 -0
  32. package/commands/ship.md +28 -0
  33. package/commands/spec.md +26 -0
  34. package/dist/cli.d.ts +2 -0
  35. package/dist/cli.d.ts.map +1 -0
  36. package/dist/cli.js +174 -0
  37. package/dist/cli.js.map +1 -0
  38. package/dist/index.d.ts +765 -0
  39. package/dist/index.d.ts.map +1 -0
  40. package/dist/index.js +121 -0
  41. package/dist/index.js.map +1 -0
  42. package/dist/schemas/audit.schema.json +63 -0
  43. package/dist/schemas/benchmark.schema.json +118 -0
  44. package/dist/schemas/checkpoint.schema.json +64 -0
  45. package/dist/schemas/feature-spec.schema.json +76 -0
  46. package/dist/schemas/handoff.schema.json +78 -0
  47. package/dist/schemas/schemas/audit.schema.json +63 -0
  48. package/dist/schemas/schemas/benchmark.schema.json +118 -0
  49. package/dist/schemas/schemas/checkpoint.schema.json +64 -0
  50. package/dist/schemas/schemas/feature-spec.schema.json +76 -0
  51. package/dist/schemas/schemas/handoff.schema.json +78 -0
  52. package/dist/schemas/schemas/task.schema.json +69 -0
  53. package/dist/schemas/task.schema.json +69 -0
  54. package/docs/agent-contract.md +36 -0
  55. package/docs/architecture.md +88 -0
  56. package/docs/benchmarking.md +51 -0
  57. package/docs/command-model.md +43 -0
  58. package/docs/philosophy.md +35 -0
  59. package/docs/roadmap.md +43 -0
  60. package/docs/self-audit.md +29 -0
  61. package/hooks/post-change.md +30 -0
  62. package/hooks/pre-change.md +27 -0
  63. package/hooks/pre-ship.md +30 -0
  64. package/package.json +92 -0
  65. package/rubrics/architectural-alignment.md +26 -0
  66. package/rubrics/execution-quality.md +26 -0
  67. package/rubrics/verbosity-score.md +26 -0
  68. package/schemas/audit.schema.json +63 -0
  69. package/schemas/benchmark.schema.json +118 -0
  70. package/schemas/checkpoint.schema.json +64 -0
  71. package/schemas/feature-spec.schema.json +76 -0
  72. package/schemas/handoff.schema.json +78 -0
  73. package/schemas/task.schema.json +69 -0
  74. package/skills/architecture-review.md +17 -0
  75. package/skills/backend-change.md +17 -0
  76. package/skills/bug-triage.md +17 -0
  77. package/skills/frontend-change.md +17 -0
  78. package/skills/prompt-pruning.md +17 -0
  79. package/skills/release-check.md +17 -0
  80. package/skills/security-review.md +17 -0
  81. package/templates/agent.template.md +18 -0
  82. package/templates/command.template.md +21 -0
  83. package/templates/skill.template.md +15 -0
  84. package/templates/task.template.md +19 -0
@@ -0,0 +1,85 @@
1
+ {
2
+ "feature_id": "pc-001",
3
+ "name": "Protocol Completeness",
4
+ "description": "Verifies the complete ZenKit protocol layer exists: all commands, schemas, skills, hooks, agents, rubrics, templates, and documentation.",
5
+ "mode": "zenkit",
6
+ "acceptance_criteria": [
7
+ {
8
+ "id": "pc-1",
9
+ "description": "All 8 commands exist",
10
+ "verification": { "type": "file_exists", "path": "commands/ship.md" }
11
+ },
12
+ {
13
+ "id": "pc-2",
14
+ "description": "All 5 schemas compile",
15
+ "verification": { "type": "schema_count", "expected": 6 }
16
+ },
17
+ {
18
+ "id": "pc-3",
19
+ "description": "Architecture documentation exists",
20
+ "verification": { "type": "file_exists", "path": "docs/architecture.md" }
21
+ },
22
+ {
23
+ "id": "pc-4",
24
+ "description": "Philosophy documentation exists",
25
+ "verification": { "type": "file_exists", "path": "docs/philosophy.md" }
26
+ },
27
+ {
28
+ "id": "pc-5",
29
+ "description": "Benchmark system documentation exists",
30
+ "verification": { "type": "file_exists", "path": "docs/benchmarking.md" }
31
+ },
32
+ {
33
+ "id": "pc-6",
34
+ "description": "Pre-change hook exists",
35
+ "verification": { "type": "file_exists", "path": "hooks/pre-change.md" }
36
+ },
37
+ {
38
+ "id": "pc-7",
39
+ "description": "At least one rubric defines a scoring scale",
40
+ "verification": { "type": "file_contains", "path": "rubrics/execution-quality.md", "pattern": "Score" }
41
+ },
42
+ {
43
+ "id": "pc-8",
44
+ "description": "Agent template exists for extending the protocol",
45
+ "verification": { "type": "file_exists", "path": "templates/agent.template.md" }
46
+ },
47
+ {
48
+ "id": "pc-9",
49
+ "description": "README documents the project structure",
50
+ "verification": { "type": "file_contains", "path": "README.md", "pattern": "commands/" }
51
+ },
52
+ {
53
+ "id": "pc-10",
54
+ "description": "Schemas use consistent draft version",
55
+ "verification": { "type": "schemas_consistent" }
56
+ }
57
+ ],
58
+ "constraints": [
59
+ "Protocol artifacts are plain files (markdown + JSON), no compiled dependencies"
60
+ ],
61
+ "expected_files": [
62
+ "commands/spec.md",
63
+ "commands/plan.md",
64
+ "commands/build.md",
65
+ "commands/audit.md",
66
+ "commands/refactor.md",
67
+ "commands/handoff.md",
68
+ "commands/checkpoint.md",
69
+ "commands/ship.md",
70
+ "schemas/handoff.schema.json",
71
+ "schemas/task.schema.json",
72
+ "schemas/audit.schema.json",
73
+ "schemas/checkpoint.schema.json",
74
+ "schemas/benchmark.schema.json",
75
+ "docs/architecture.md",
76
+ "docs/philosophy.md",
77
+ "docs/benchmarking.md"
78
+ ],
79
+ "assigned_commands": ["audit"],
80
+ "estimated_complexity": "low",
81
+ "limitations": [
82
+ "Verifies file existence and key content, not comprehensive content quality",
83
+ "Does not evaluate whether individual commands or agents are well-written"
84
+ ]
85
+ }
@@ -0,0 +1,93 @@
1
+ {
2
+ "feature_id": "svp-001-baseline",
3
+ "name": "Schema Validator Playground (Baseline)",
4
+ "description": "Same feature as svp-001, but executed without ZenKit workflow structure. This spec represents an unstructured approach for comparison.",
5
+ "mode": "baseline",
6
+ "acceptance_criteria": [
7
+ {
8
+ "id": "ac-1",
9
+ "description": "Schema selector component exists and exports SchemaSelector",
10
+ "verification": {
11
+ "type": "file_contains",
12
+ "path": "src/components/playground/SchemaSelector.tsx",
13
+ "pattern": "export function SchemaSelector"
14
+ }
15
+ },
16
+ {
17
+ "id": "ac-2",
18
+ "description": "JSON editor component exists and accepts value/onChange props",
19
+ "verification": {
20
+ "type": "file_contains",
21
+ "path": "src/components/playground/JsonEditor.tsx",
22
+ "pattern": "export function JsonEditor"
23
+ }
24
+ },
25
+ {
26
+ "id": "ac-3",
27
+ "description": "Validation results component displays errors with paths",
28
+ "verification": {
29
+ "type": "file_contains",
30
+ "path": "src/components/playground/ValidationResults.tsx",
31
+ "pattern": "err.path"
32
+ }
33
+ },
34
+ {
35
+ "id": "ac-4",
36
+ "description": "Playground page wires schema selection, editing, and validation together",
37
+ "verification": {
38
+ "type": "file_contains",
39
+ "path": "src/app/playground/page.tsx",
40
+ "pattern": "validateAgainstSchema"
41
+ }
42
+ },
43
+ {
44
+ "id": "ac-5",
45
+ "description": "All 5 ZenKit schemas are registered and compilable",
46
+ "verification": {
47
+ "type": "schema_count",
48
+ "expected": 6
49
+ }
50
+ },
51
+ {
52
+ "id": "ac-6",
53
+ "description": "Example data exists and validates for each schema",
54
+ "verification": {
55
+ "type": "examples_valid"
56
+ }
57
+ },
58
+ {
59
+ "id": "ac-7",
60
+ "description": "Unit tests exist and cover schema validation",
61
+ "verification": {
62
+ "type": "file_exists",
63
+ "path": "src/lib/__tests__/schemas.test.ts"
64
+ }
65
+ },
66
+ {
67
+ "id": "ac-8",
68
+ "description": "All schemas use consistent draft-07 format",
69
+ "verification": {
70
+ "type": "schemas_consistent"
71
+ }
72
+ }
73
+ ],
74
+ "constraints": [
75
+ "Must work entirely client-side with no backend API calls",
76
+ "Must use the same validation library (Ajv) as core tooling"
77
+ ],
78
+ "expected_files": [
79
+ "src/app/playground/page.tsx",
80
+ "src/components/playground/SchemaSelector.tsx",
81
+ "src/components/playground/JsonEditor.tsx",
82
+ "src/components/playground/ValidationResults.tsx",
83
+ "src/lib/schemas.ts",
84
+ "src/lib/__tests__/schemas.test.ts"
85
+ ],
86
+ "assigned_commands": [],
87
+ "estimated_complexity": "medium",
88
+ "limitations": [
89
+ "This baseline spec verifies the same deliverables as the zenkit run — it differs in workflow metadata only",
90
+ "The comparison is illustrative: both specs validate the same codebase, so pass/fail will match",
91
+ "A meaningful baseline comparison requires running the actual implementation twice with different workflows"
92
+ ]
93
+ }
@@ -0,0 +1,92 @@
1
+ {
2
+ "feature_id": "svp-001",
3
+ "name": "Schema Validator Playground",
4
+ "description": "An interactive web-based tool for validating JSON data against ZenKit schemas.",
5
+ "mode": "zenkit",
6
+ "acceptance_criteria": [
7
+ {
8
+ "id": "ac-1",
9
+ "description": "Schema selector component exists and exports SchemaSelector",
10
+ "verification": {
11
+ "type": "file_contains",
12
+ "path": "src/components/playground/SchemaSelector.tsx",
13
+ "pattern": "export function SchemaSelector"
14
+ }
15
+ },
16
+ {
17
+ "id": "ac-2",
18
+ "description": "JSON editor component exists and accepts value/onChange props",
19
+ "verification": {
20
+ "type": "file_contains",
21
+ "path": "src/components/playground/JsonEditor.tsx",
22
+ "pattern": "export function JsonEditor"
23
+ }
24
+ },
25
+ {
26
+ "id": "ac-3",
27
+ "description": "Validation results component displays errors with paths",
28
+ "verification": {
29
+ "type": "file_contains",
30
+ "path": "src/components/playground/ValidationResults.tsx",
31
+ "pattern": "err.path"
32
+ }
33
+ },
34
+ {
35
+ "id": "ac-4",
36
+ "description": "Playground page wires schema selection, editing, and validation together",
37
+ "verification": {
38
+ "type": "file_contains",
39
+ "path": "src/app/playground/page.tsx",
40
+ "pattern": "validateAgainstSchema"
41
+ }
42
+ },
43
+ {
44
+ "id": "ac-5",
45
+ "description": "All 5 ZenKit schemas are registered and compilable",
46
+ "verification": {
47
+ "type": "schema_count",
48
+ "expected": 6
49
+ }
50
+ },
51
+ {
52
+ "id": "ac-6",
53
+ "description": "Example data exists and validates for each schema",
54
+ "verification": {
55
+ "type": "examples_valid"
56
+ }
57
+ },
58
+ {
59
+ "id": "ac-7",
60
+ "description": "Unit tests exist and cover schema validation",
61
+ "verification": {
62
+ "type": "file_exists",
63
+ "path": "src/lib/__tests__/schemas.test.ts"
64
+ }
65
+ },
66
+ {
67
+ "id": "ac-8",
68
+ "description": "All schemas use consistent draft-07 format",
69
+ "verification": {
70
+ "type": "schemas_consistent"
71
+ }
72
+ }
73
+ ],
74
+ "constraints": [
75
+ "Must work entirely client-side with no backend API calls",
76
+ "Must use the same validation library (Ajv) as core tooling"
77
+ ],
78
+ "expected_files": [
79
+ "src/app/playground/page.tsx",
80
+ "src/components/playground/SchemaSelector.tsx",
81
+ "src/components/playground/JsonEditor.tsx",
82
+ "src/components/playground/ValidationResults.tsx",
83
+ "src/lib/schemas.ts",
84
+ "src/lib/__tests__/schemas.test.ts"
85
+ ],
86
+ "assigned_commands": ["spec", "plan", "build", "audit", "checkpoint", "ship"],
87
+ "estimated_complexity": "medium",
88
+ "limitations": [
89
+ "Acceptance criteria verify code structure and schema validity, not runtime UI behavior",
90
+ "UI interaction testing (click flows, keyboard navigation) requires browser automation not included here"
91
+ ]
92
+ }
@@ -0,0 +1,76 @@
1
+ {
2
+ "feature_id": "sa-001",
3
+ "name": "ZenKit Self-Audit",
4
+ "description": "ZenKit auditing its own repository structure, schema validity, test coverage, and documentation completeness.",
5
+ "mode": "zenkit",
6
+ "acceptance_criteria": [
7
+ {
8
+ "id": "sa-1",
9
+ "description": "All JSON schemas compile without errors",
10
+ "verification": { "type": "schema_count", "expected": 6 }
11
+ },
12
+ {
13
+ "id": "sa-2",
14
+ "description": "All schemas use consistent JSON Schema draft",
15
+ "verification": { "type": "schemas_consistent" }
16
+ },
17
+ {
18
+ "id": "sa-3",
19
+ "description": "Benchmark fixtures validate against their schemas",
20
+ "verification": { "type": "examples_valid" }
21
+ },
22
+ {
23
+ "id": "sa-4",
24
+ "description": "Self-audit documentation exists",
25
+ "verification": { "type": "file_exists", "path": "docs/self-audit.md" }
26
+ },
27
+ {
28
+ "id": "sa-5",
29
+ "description": "Self-audit doc addresses circular validation safeguards",
30
+ "verification": { "type": "file_contains", "path": "docs/self-audit.md", "pattern": "circular" }
31
+ },
32
+ {
33
+ "id": "sa-6",
34
+ "description": "README links to self-audit documentation",
35
+ "verification": { "type": "file_contains", "path": "README.md", "pattern": "self-audit" }
36
+ },
37
+ {
38
+ "id": "sa-7",
39
+ "description": "Landing page includes self-audit section",
40
+ "verification": { "type": "file_contains", "path": "src/components/SelfAudit.tsx", "pattern": "self-certification" }
41
+ },
42
+ {
43
+ "id": "sa-8",
44
+ "description": "Benchmark results include uncertainty notes",
45
+ "verification": { "type": "file_contains", "path": "schemas/benchmark.schema.json", "pattern": "uncertainty" }
46
+ },
47
+ {
48
+ "id": "sa-9",
49
+ "description": "Schema validation script passes",
50
+ "verification": { "type": "test_passes", "command": "npx tsx src/lib/validate-schemas.ts" }
51
+ },
52
+ {
53
+ "id": "sa-10",
54
+ "description": "Package version is 0.4 or later",
55
+ "verification": { "type": "json_path_equals", "path": "package.json", "json_path": "private", "equals": false }
56
+ }
57
+ ],
58
+ "constraints": [
59
+ "Must use ZenKit's own benchmark runner for verification",
60
+ "Must not claim self-audit proves correctness"
61
+ ],
62
+ "expected_files": [
63
+ "docs/self-audit.md",
64
+ "schemas/benchmark.schema.json",
65
+ "src/components/SelfAudit.tsx",
66
+ "benchmark/scripts/run.ts"
67
+ ],
68
+ "assigned_commands": ["audit", "checkpoint"],
69
+ "estimated_complexity": "low",
70
+ "limitations": [
71
+ "Self-audit checks structure and content presence, not semantic correctness",
72
+ "Cannot verify whether safeguards are actually followed in practice",
73
+ "A system auditing itself has inherent blind spots — this is documented, not eliminated",
74
+ "test_passes runs the full test suite — a slow but real check. json_path_equals checks a single value."
75
+ ]
76
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "context": "Test fixture: minimal valid handoff for benchmark validation.",
3
+ "assumptions": ["This is a test fixture"],
4
+ "constraints": [],
5
+ "decision": "Use minimal data for fast validation.",
6
+ "deliverable": {
7
+ "type": "artifact",
8
+ "description": "Test fixture for schema validation"
9
+ },
10
+ "risks": [],
11
+ "open_questions": [],
12
+ "next_agent": "qa-test-engineer"
13
+ }
@@ -0,0 +1,172 @@
1
+ /**
2
+ * ZenKit Benchmark Comparison
3
+ *
4
+ * Compares a zenkit-mode benchmark result against a baseline-mode result.
5
+ * Produces a structured comparison artifact.
6
+ *
7
+ * Usage: npx tsx benchmark/scripts/compare.ts <zenkit-result> <baseline-result>
8
+ */
9
+ import fs from 'fs'
10
+ import path from 'path'
11
+
12
+ interface ComparisonResult {
13
+ comparison_id: string
14
+ version: string
15
+ zenkit_result: string
16
+ baseline_result: string
17
+ generated_at: string
18
+ data_source: 'illustrative' | 'measured'
19
+ summary: {
20
+ zenkit_status: string
21
+ baseline_status: string
22
+ zenkit_criteria_passed: number
23
+ baseline_criteria_passed: number
24
+ zenkit_total_checks: number
25
+ baseline_total_checks: number
26
+ zenkit_duration_ms: number
27
+ baseline_duration_ms: number
28
+ }
29
+ structural_differences: {
30
+ category: string
31
+ zenkit: string
32
+ baseline: string
33
+ note: string
34
+ }[]
35
+ telemetry_comparison: {
36
+ metric: string
37
+ zenkit: string
38
+ baseline: string
39
+ source: string
40
+ }[]
41
+ interpretation: string[]
42
+ caveats: string[]
43
+ }
44
+
45
+ function main() {
46
+ const zenkitPath = process.argv[2] || 'benchmark/results/svp-001-live.json'
47
+ const baselinePath = process.argv[3] || 'benchmark/results/svp-001-baseline-live.json'
48
+
49
+ const zk = JSON.parse(fs.readFileSync(path.resolve(zenkitPath), 'utf-8'))
50
+ const bl = JSON.parse(fs.readFileSync(path.resolve(baselinePath), 'utf-8'))
51
+
52
+ const zkChecks = (zk.stages || []).reduce((s: number, st: any) => s + st.checks_run, 0)
53
+ const blChecks = (bl.stages || []).reduce((s: number, st: any) => s + st.checks_run, 0)
54
+
55
+ const comparison: ComparisonResult = {
56
+ comparison_id: `cmp-${Date.now()}`,
57
+ version: '0.2.0',
58
+ zenkit_result: zenkitPath,
59
+ baseline_result: baselinePath,
60
+ generated_at: new Date().toISOString(),
61
+ data_source: 'illustrative',
62
+ summary: {
63
+ zenkit_status: zk.status,
64
+ baseline_status: bl.status,
65
+ zenkit_criteria_passed: zk.validation_summary.criteria_passed,
66
+ baseline_criteria_passed: bl.validation_summary.criteria_passed,
67
+ zenkit_total_checks: zkChecks,
68
+ baseline_total_checks: blChecks,
69
+ zenkit_duration_ms: zk.duration_ms,
70
+ baseline_duration_ms: bl.duration_ms,
71
+ },
72
+ structural_differences: [
73
+ {
74
+ category: 'Workflow structure',
75
+ zenkit: `${zk.stages?.length || 0} stages with ${zkChecks} checks`,
76
+ baseline: `${bl.stages?.length || 0} stages with ${blChecks} checks`,
77
+ note: 'ZenKit enforces staged validation; baseline runs the same checks without workflow metadata',
78
+ },
79
+ {
80
+ category: 'Uncertainty tracking',
81
+ zenkit: `${zk.uncertainty?.length || 0} uncertainty notes, ${zk.limitations?.length || 0} limitations`,
82
+ baseline: `${bl.uncertainty?.length || 0} uncertainty notes, ${bl.limitations?.length || 0} limitations`,
83
+ note: 'Both specs can declare uncertainty; ZenKit workflow structure makes this a first-class requirement',
84
+ },
85
+ {
86
+ category: 'Handoff contracts',
87
+ zenkit: 'Handoff schema enforced between stages',
88
+ baseline: 'No handoff contracts — context transfer is implicit',
89
+ note: 'This structural difference matters most in multi-agent workflows, not single-feature benchmarks',
90
+ },
91
+ {
92
+ category: 'Acceptance criteria format',
93
+ zenkit: 'Machine-verifiable criteria with verification steps',
94
+ baseline: 'Same criteria format (shared spec structure)',
95
+ note: 'The verification format is the same — the difference is that ZenKit mandates it',
96
+ },
97
+ ],
98
+ telemetry_comparison: [
99
+ {
100
+ metric: 'Estimated tokens',
101
+ zenkit: `~${zk.telemetry?.estimated?.tokens?.toLocaleString() || 'N/A'}`,
102
+ baseline: `~${bl.telemetry?.estimated?.tokens?.toLocaleString() || 'N/A'}`,
103
+ source: 'estimated',
104
+ },
105
+ {
106
+ metric: 'Estimated cost',
107
+ zenkit: `~$${zk.telemetry?.estimated?.cost_usd?.toFixed(2) || 'N/A'}`,
108
+ baseline: `~$${bl.telemetry?.estimated?.cost_usd?.toFixed(2) || 'N/A'}`,
109
+ source: 'estimated',
110
+ },
111
+ ],
112
+ interpretation: [
113
+ 'In this comparison, both modes verify the same codebase and produce the same pass/fail results.',
114
+ 'The structural difference is in workflow metadata: ZenKit runs enforce staged validation, explicit uncertainty, and handoff contracts.',
115
+ 'A meaningful cost/quality comparison requires running the actual implementation process twice — once with ZenKit structure, once without — and measuring drift, retries, and rework.',
116
+ 'This comparison demonstrates the architecture for such measurement, not the measurement itself.',
117
+ ],
118
+ caveats: [
119
+ 'This is illustrative data. Both benchmark runs validate the same already-built feature.',
120
+ 'Pass/fail parity is expected — the value of ZenKit structure shows in multi-step, multi-agent workflows where drift compounds.',
121
+ 'Real comparison data requires A/B workflow execution, which is outside the scope of this static benchmark.',
122
+ ],
123
+ }
124
+
125
+ // Write JSON
126
+ const outPath = path.resolve('benchmark/results/comparison-svp-001.json')
127
+ fs.writeFileSync(outPath, JSON.stringify(comparison, null, 2))
128
+
129
+ // Write markdown
130
+ const lines = [
131
+ '# Benchmark Comparison: ZenKit vs Baseline',
132
+ '',
133
+ `> **Data source: ${comparison.data_source}** — This comparison demonstrates the measurement architecture, not a measured result.`,
134
+ '',
135
+ '## Summary',
136
+ '',
137
+ '| Metric | ZenKit | Baseline |',
138
+ '|--------|--------|----------|',
139
+ `| Status | ${comparison.summary.zenkit_status} | ${comparison.summary.baseline_status} |`,
140
+ `| Criteria passed | ${comparison.summary.zenkit_criteria_passed} | ${comparison.summary.baseline_criteria_passed} |`,
141
+ `| Total checks | ${comparison.summary.zenkit_total_checks} | ${comparison.summary.baseline_total_checks} |`,
142
+ `| Duration | ${comparison.summary.zenkit_duration_ms}ms | ${comparison.summary.baseline_duration_ms}ms |`,
143
+ '',
144
+ '## Structural Differences',
145
+ '',
146
+ ]
147
+
148
+ for (const d of comparison.structural_differences) {
149
+ lines.push(`### ${d.category}`)
150
+ lines.push(`- **ZenKit:** ${d.zenkit}`)
151
+ lines.push(`- **Baseline:** ${d.baseline}`)
152
+ lines.push(`- *${d.note}*`)
153
+ lines.push('')
154
+ }
155
+
156
+ lines.push('## Interpretation', '')
157
+ for (const i of comparison.interpretation) lines.push(`- ${i}`)
158
+
159
+ lines.push('', '## Caveats', '')
160
+ for (const c of comparison.caveats) lines.push(`- ${c}`)
161
+
162
+ lines.push('', '---', '', '*Generated by ZenKit Benchmark Comparison v0.2*')
163
+
164
+ const mdPath = path.resolve('benchmark/results/comparison-svp-001.md')
165
+ fs.writeFileSync(mdPath, lines.join('\n'))
166
+
167
+ console.log(lines.join('\n'))
168
+ console.log(`\nJSON: ${outPath}`)
169
+ console.log(`Markdown: ${mdPath}`)
170
+ }
171
+
172
+ main()
@@ -0,0 +1,102 @@
1
+ /**
2
+ * ZenKit Benchmark Report Generator
3
+ *
4
+ * Reads a benchmark result JSON and produces a human-readable markdown report.
5
+ * Clearly labels what was validated vs. estimated.
6
+ *
7
+ * Usage: npx tsx benchmark/scripts/report.ts [result-path]
8
+ */
9
+ import fs from 'fs'
10
+ import path from 'path'
11
+
12
+ function main() {
13
+ const resultPath = process.argv[2] || 'benchmark/results/svp-001-live.json'
14
+ const resolved = path.resolve(resultPath)
15
+
16
+ if (!fs.existsSync(resolved)) {
17
+ console.error(`Result file not found: ${resolved}`)
18
+ process.exit(1)
19
+ }
20
+
21
+ const r = JSON.parse(fs.readFileSync(resolved, 'utf-8'))
22
+
23
+ const lines: string[] = [
24
+ `# Benchmark Report: ${r.task_name}`,
25
+ '',
26
+ `| Field | Value |`,
27
+ `|-------|-------|`,
28
+ `| Benchmark ID | \`${r.benchmark_id}\` |`,
29
+ `| Version | ${r.version} |`,
30
+ `| Mode | ${r.mode} |`,
31
+ `| Status | **${r.status.toUpperCase()}** |`,
32
+ `| Duration | ${r.duration_ms}ms |`,
33
+ `| Started | ${r.started_at} |`,
34
+ `| Completed | ${r.completed_at} |`,
35
+ '',
36
+ '## Acceptance Criteria',
37
+ '',
38
+ `${r.validation_summary.criteria_passed}/${r.validation_summary.total_criteria} criteria passed.`,
39
+ '',
40
+ '| ID | Status | Description | Evidence |',
41
+ '|----|--------|-------------|----------|',
42
+ ]
43
+
44
+ for (const c of r.acceptance_criteria_results || []) {
45
+ const icon = c.status === 'pass' ? 'PASS' : 'FAIL'
46
+ lines.push(`| ${c.id} | ${icon} | ${c.description} | ${c.evidence} |`)
47
+ }
48
+
49
+ // Stages
50
+ lines.push('', '## Stages', '')
51
+ for (const s of r.stages || []) {
52
+ lines.push(`- **[${s.status.toUpperCase()}]** ${s.name} — ${s.checks_passed}/${s.checks_run} checks`)
53
+ if (s.details) {
54
+ for (const d of s.details) {
55
+ lines.push(` - ${d}`)
56
+ }
57
+ }
58
+ }
59
+
60
+ // Files
61
+ if (r.files_missing?.length > 0) {
62
+ lines.push('', '## Missing Files', '')
63
+ for (const f of r.files_missing) lines.push(`- \`${f}\``)
64
+ }
65
+
66
+ // Telemetry
67
+ lines.push('', '## Telemetry', '')
68
+ if (r.telemetry?.estimated) {
69
+ lines.push(`- **Estimated tokens:** ~${r.telemetry.estimated.tokens.toLocaleString()}`)
70
+ lines.push(`- **Estimated cost:** ~$${r.telemetry.estimated.cost_usd.toFixed(2)}`)
71
+ lines.push(`- **Basis:** ${r.telemetry.estimated.basis}`)
72
+ }
73
+ if (r.telemetry?.actual) {
74
+ lines.push(`- **Actual tokens:** ${r.telemetry.actual.tokens.toLocaleString()}`)
75
+ lines.push(`- **Actual cost:** $${r.telemetry.actual.cost_usd.toFixed(2)}`)
76
+ } else {
77
+ lines.push(`- **Actual telemetry:** Not available — no API instrumentation in this run`)
78
+ }
79
+
80
+ // Uncertainty
81
+ if (r.uncertainty?.length > 0) {
82
+ lines.push('', '## What This Benchmark Does NOT Prove', '')
83
+ for (const u of r.uncertainty) lines.push(`- ${u}`)
84
+ }
85
+
86
+ // Limitations
87
+ if (r.limitations?.length > 0) {
88
+ lines.push('', '## Limitations', '')
89
+ for (const l of r.limitations) lines.push(`- ${l}`)
90
+ }
91
+
92
+ lines.push('', '---', '', '*Generated by ZenKit Benchmark Reporter v0.2*')
93
+
94
+ const report = lines.join('\n')
95
+ const mdPath = resolved.replace(/\.json$/, '.md')
96
+ fs.writeFileSync(mdPath, report)
97
+
98
+ console.log(report)
99
+ console.log(`\nReport written to: ${mdPath}`)
100
+ }
101
+
102
+ main()