zenkit 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +63 -0
- package/LICENSE +21 -0
- package/README.md +242 -0
- package/agents/backend-architect.md +19 -0
- package/agents/frontend-architect.md +19 -0
- package/agents/implementation-auditor.md +19 -0
- package/agents/product-manager.md +19 -0
- package/agents/qa-test-engineer.md +19 -0
- package/agents/security-specialist.md +19 -0
- package/agents/system-architect.md +19 -0
- package/agents/technical-writer.md +19 -0
- package/agents/ux-engineer.md +19 -0
- package/benchmark/feature-specs/cli-tool.json +58 -0
- package/benchmark/feature-specs/handoff-system.json +69 -0
- package/benchmark/feature-specs/protocol-completeness.json +85 -0
- package/benchmark/feature-specs/schema-validator-baseline.json +93 -0
- package/benchmark/feature-specs/schema-validator-playground.json +92 -0
- package/benchmark/feature-specs/self-audit.json +76 -0
- package/benchmark/fixtures/valid-handoff.json +13 -0
- package/benchmark/scripts/compare.ts +172 -0
- package/benchmark/scripts/report.ts +102 -0
- package/benchmark/scripts/run-all.ts +125 -0
- package/benchmark/scripts/run.ts +595 -0
- package/benchmark/scripts/visualize.ts +120 -0
- package/bin/zenkit.js +24 -0
- package/commands/audit.md +28 -0
- package/commands/build.md +26 -0
- package/commands/checkpoint.md +28 -0
- package/commands/handoff.md +28 -0
- package/commands/plan.md +27 -0
- package/commands/refactor.md +27 -0
- package/commands/ship.md +28 -0
- package/commands/spec.md +26 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +174 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +765 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +121 -0
- package/dist/index.js.map +1 -0
- package/dist/schemas/audit.schema.json +63 -0
- package/dist/schemas/benchmark.schema.json +118 -0
- package/dist/schemas/checkpoint.schema.json +64 -0
- package/dist/schemas/feature-spec.schema.json +76 -0
- package/dist/schemas/handoff.schema.json +78 -0
- package/dist/schemas/schemas/audit.schema.json +63 -0
- package/dist/schemas/schemas/benchmark.schema.json +118 -0
- package/dist/schemas/schemas/checkpoint.schema.json +64 -0
- package/dist/schemas/schemas/feature-spec.schema.json +76 -0
- package/dist/schemas/schemas/handoff.schema.json +78 -0
- package/dist/schemas/schemas/task.schema.json +69 -0
- package/dist/schemas/task.schema.json +69 -0
- package/docs/agent-contract.md +36 -0
- package/docs/architecture.md +88 -0
- package/docs/benchmarking.md +51 -0
- package/docs/command-model.md +43 -0
- package/docs/philosophy.md +35 -0
- package/docs/roadmap.md +43 -0
- package/docs/self-audit.md +29 -0
- package/hooks/post-change.md +30 -0
- package/hooks/pre-change.md +27 -0
- package/hooks/pre-ship.md +30 -0
- package/package.json +92 -0
- package/rubrics/architectural-alignment.md +26 -0
- package/rubrics/execution-quality.md +26 -0
- package/rubrics/verbosity-score.md +26 -0
- package/schemas/audit.schema.json +63 -0
- package/schemas/benchmark.schema.json +118 -0
- package/schemas/checkpoint.schema.json +64 -0
- package/schemas/feature-spec.schema.json +76 -0
- package/schemas/handoff.schema.json +78 -0
- package/schemas/task.schema.json +69 -0
- package/skills/architecture-review.md +17 -0
- package/skills/backend-change.md +17 -0
- package/skills/bug-triage.md +17 -0
- package/skills/frontend-change.md +17 -0
- package/skills/prompt-pruning.md +17 -0
- package/skills/release-check.md +17 -0
- package/skills/security-review.md +17 -0
- package/templates/agent.template.md +18 -0
- package/templates/command.template.md +21 -0
- package/templates/skill.template.md +15 -0
- package/templates/task.template.md +19 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://zenkit.dev/schemas/benchmark.schema.json",
|
|
4
|
+
"title": "ZenKit Benchmark Result",
|
|
5
|
+
"description": "Structured output from a ZenKit benchmark run with acceptance criteria verification.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["benchmark_id", "version", "mode", "task_name", "started_at", "completed_at", "status", "validation_summary", "acceptance_criteria_results"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"benchmark_id": { "type": "string" },
|
|
10
|
+
"version": { "type": "string" },
|
|
11
|
+
"mode": {
|
|
12
|
+
"type": "string",
|
|
13
|
+
"enum": ["zenkit", "baseline"],
|
|
14
|
+
"description": "Whether this run used ZenKit workflow structure or an unstructured baseline."
|
|
15
|
+
},
|
|
16
|
+
"task_name": { "type": "string" },
|
|
17
|
+
"feature_spec": { "type": "string" },
|
|
18
|
+
"started_at": { "type": "string", "format": "date-time" },
|
|
19
|
+
"completed_at": { "type": "string", "format": "date-time" },
|
|
20
|
+
"duration_ms": { "type": "integer", "minimum": 0 },
|
|
21
|
+
"status": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"enum": ["pass", "fail", "partial"]
|
|
24
|
+
},
|
|
25
|
+
"expected_files": {
|
|
26
|
+
"type": "array",
|
|
27
|
+
"items": { "type": "string" }
|
|
28
|
+
},
|
|
29
|
+
"files_found": {
|
|
30
|
+
"type": "array",
|
|
31
|
+
"items": { "type": "string" }
|
|
32
|
+
},
|
|
33
|
+
"files_missing": {
|
|
34
|
+
"type": "array",
|
|
35
|
+
"items": { "type": "string" }
|
|
36
|
+
},
|
|
37
|
+
"acceptance_criteria_results": {
|
|
38
|
+
"type": "array",
|
|
39
|
+
"items": {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"required": ["id", "description", "status", "evidence", "verification_type"],
|
|
42
|
+
"properties": {
|
|
43
|
+
"id": { "type": "string" },
|
|
44
|
+
"description": { "type": "string" },
|
|
45
|
+
"status": { "type": "string", "enum": ["pass", "fail"] },
|
|
46
|
+
"evidence": { "type": "string", "description": "What was actually checked and found." },
|
|
47
|
+
"verification_type": { "type": "string" }
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"stages": {
|
|
52
|
+
"type": "array",
|
|
53
|
+
"items": {
|
|
54
|
+
"type": "object",
|
|
55
|
+
"required": ["name", "status", "checks_run", "checks_passed"],
|
|
56
|
+
"properties": {
|
|
57
|
+
"name": { "type": "string" },
|
|
58
|
+
"status": { "type": "string", "enum": ["pass", "fail", "skipped"] },
|
|
59
|
+
"duration_ms": { "type": "integer" },
|
|
60
|
+
"checks_run": { "type": "integer", "minimum": 0 },
|
|
61
|
+
"checks_passed": { "type": "integer", "minimum": 0 },
|
|
62
|
+
"details": { "type": "array", "items": { "type": "string" } }
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
"validation_summary": {
|
|
67
|
+
"type": "object",
|
|
68
|
+
"required": ["total_criteria", "criteria_passed", "criteria_failed"],
|
|
69
|
+
"properties": {
|
|
70
|
+
"total_criteria": { "type": "integer" },
|
|
71
|
+
"criteria_passed": { "type": "integer" },
|
|
72
|
+
"criteria_failed": { "type": "integer" },
|
|
73
|
+
"schemas_valid": { "type": "boolean" },
|
|
74
|
+
"examples_valid": { "type": "boolean" }
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"telemetry": {
|
|
78
|
+
"type": "object",
|
|
79
|
+
"required": ["estimated"],
|
|
80
|
+
"properties": {
|
|
81
|
+
"estimated": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"required": ["tokens", "cost_usd", "basis"],
|
|
84
|
+
"properties": {
|
|
85
|
+
"tokens": { "type": "integer" },
|
|
86
|
+
"cost_usd": { "type": "number" },
|
|
87
|
+
"basis": { "type": "string", "description": "How the estimate was calculated." }
|
|
88
|
+
}
|
|
89
|
+
},
|
|
90
|
+
"actual": {
|
|
91
|
+
"oneOf": [
|
|
92
|
+
{ "type": "null" },
|
|
93
|
+
{
|
|
94
|
+
"type": "object",
|
|
95
|
+
"required": ["tokens", "cost_usd"],
|
|
96
|
+
"properties": {
|
|
97
|
+
"tokens": { "type": "integer" },
|
|
98
|
+
"cost_usd": { "type": "number" }
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
],
|
|
102
|
+
"description": "Null when no real telemetry is available. Never fabricated."
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
},
|
|
106
|
+
"uncertainty": {
|
|
107
|
+
"type": "array",
|
|
108
|
+
"items": { "type": "string" },
|
|
109
|
+
"description": "What this benchmark does NOT prove."
|
|
110
|
+
},
|
|
111
|
+
"limitations": {
|
|
112
|
+
"type": "array",
|
|
113
|
+
"items": { "type": "string" },
|
|
114
|
+
"description": "Inherited from the feature spec — scope boundaries of the verification."
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
"additionalProperties": false
|
|
118
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://zenkit.dev/schemas/checkpoint.schema.json",
|
|
4
|
+
"title": "ZenKit Checkpoint",
|
|
5
|
+
"description": "A snapshot of workflow state at a specific point in execution.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["checkpoint_id", "task_id", "status", "timestamp"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"checkpoint_id": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"pattern": "^chk-[a-z0-9-]+$"
|
|
12
|
+
},
|
|
13
|
+
"task_id": {
|
|
14
|
+
"type": "string"
|
|
15
|
+
},
|
|
16
|
+
"timestamp": {
|
|
17
|
+
"type": "string",
|
|
18
|
+
"format": "date-time"
|
|
19
|
+
},
|
|
20
|
+
"status": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"enum": ["snapshot", "gate", "rollback_point"]
|
|
23
|
+
},
|
|
24
|
+
"stage": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"enum": ["plan", "build", "audit", "ship"]
|
|
27
|
+
},
|
|
28
|
+
"state": {
|
|
29
|
+
"type": "object",
|
|
30
|
+
"properties": {
|
|
31
|
+
"files_changed": {
|
|
32
|
+
"type": "array",
|
|
33
|
+
"items": { "type": "string" }
|
|
34
|
+
},
|
|
35
|
+
"tests_passing": { "type": "boolean" },
|
|
36
|
+
"lint_passing": { "type": "boolean" },
|
|
37
|
+
"git_ref": { "type": "string" },
|
|
38
|
+
"notes": { "type": "string" }
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
"gate_conditions": {
|
|
42
|
+
"type": "array",
|
|
43
|
+
"items": {
|
|
44
|
+
"type": "object",
|
|
45
|
+
"properties": {
|
|
46
|
+
"condition": { "type": "string" },
|
|
47
|
+
"met": { "type": "boolean" }
|
|
48
|
+
},
|
|
49
|
+
"required": ["condition", "met"]
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
"metadata": {
|
|
53
|
+
"type": "object",
|
|
54
|
+
"properties": {
|
|
55
|
+
"agent": { "type": "string" },
|
|
56
|
+
"command": { "type": "string" },
|
|
57
|
+
"iteration": { "type": "integer" },
|
|
58
|
+
"tokens_used": { "type": "integer" },
|
|
59
|
+
"cost_estimate_usd": { "type": "number" }
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
},
|
|
63
|
+
"additionalProperties": false
|
|
64
|
+
}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://zenkit.dev/schemas/feature-spec.schema.json",
|
|
4
|
+
"title": "ZenKit Feature Spec",
|
|
5
|
+
"description": "Machine-readable feature specification for ZenKit benchmarks.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["feature_id", "name", "description", "mode", "acceptance_criteria", "constraints", "expected_files", "limitations"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"feature_id": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"pattern": "^[a-z0-9-]+$"
|
|
12
|
+
},
|
|
13
|
+
"name": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"minLength": 1
|
|
16
|
+
},
|
|
17
|
+
"description": {
|
|
18
|
+
"type": "string"
|
|
19
|
+
},
|
|
20
|
+
"mode": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"enum": ["zenkit", "baseline"]
|
|
23
|
+
},
|
|
24
|
+
"acceptance_criteria": {
|
|
25
|
+
"type": "array",
|
|
26
|
+
"minItems": 1,
|
|
27
|
+
"items": {
|
|
28
|
+
"type": "object",
|
|
29
|
+
"required": ["id", "description", "verification"],
|
|
30
|
+
"properties": {
|
|
31
|
+
"id": { "type": "string" },
|
|
32
|
+
"description": { "type": "string" },
|
|
33
|
+
"verification": {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"required": ["type"],
|
|
36
|
+
"properties": {
|
|
37
|
+
"type": {
|
|
38
|
+
"type": "string",
|
|
39
|
+
"enum": ["file_exists", "file_contains", "schema_count", "examples_valid", "schemas_consistent", "test_passes", "json_path_equals"]
|
|
40
|
+
},
|
|
41
|
+
"path": { "type": "string" },
|
|
42
|
+
"pattern": { "type": "string" },
|
|
43
|
+
"command": { "type": "string", "description": "Shell command to run for test_passes verification." },
|
|
44
|
+
"json_path": { "type": "string", "description": "Dot-separated path into JSON for json_path_equals." },
|
|
45
|
+
"equals": { "description": "Expected value for json_path_equals." },
|
|
46
|
+
"expected": { "type": "integer" }
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
"constraints": {
|
|
53
|
+
"type": "array",
|
|
54
|
+
"items": { "type": "string" }
|
|
55
|
+
},
|
|
56
|
+
"expected_files": {
|
|
57
|
+
"type": "array",
|
|
58
|
+
"items": { "type": "string" }
|
|
59
|
+
},
|
|
60
|
+
"assigned_commands": {
|
|
61
|
+
"type": "array",
|
|
62
|
+
"items": { "type": "string" }
|
|
63
|
+
},
|
|
64
|
+
"estimated_complexity": {
|
|
65
|
+
"type": "string",
|
|
66
|
+
"enum": ["low", "medium", "high"]
|
|
67
|
+
},
|
|
68
|
+
"limitations": {
|
|
69
|
+
"type": "array",
|
|
70
|
+
"minItems": 1,
|
|
71
|
+
"items": { "type": "string" },
|
|
72
|
+
"description": "What this spec does NOT verify. Required — specs must be honest about scope."
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
"additionalProperties": false
|
|
76
|
+
}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://zenkit.dev/schemas/handoff.schema.json",
|
|
4
|
+
"title": "ZenKit Handoff Contract",
|
|
5
|
+
"description": "Structured handoff between agents or workflow stages in ZenKit.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["context", "assumptions", "decision", "deliverable", "next_agent"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"context": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "What is the current situation? What has happened so far?"
|
|
12
|
+
},
|
|
13
|
+
"assumptions": {
|
|
14
|
+
"type": "array",
|
|
15
|
+
"items": { "type": "string" },
|
|
16
|
+
"description": "Explicit assumptions made during this stage."
|
|
17
|
+
},
|
|
18
|
+
"constraints": {
|
|
19
|
+
"type": "array",
|
|
20
|
+
"items": { "type": "string" },
|
|
21
|
+
"description": "Hard constraints that bound this work."
|
|
22
|
+
},
|
|
23
|
+
"decision": {
|
|
24
|
+
"type": "string",
|
|
25
|
+
"description": "What was decided and why."
|
|
26
|
+
},
|
|
27
|
+
"deliverable": {
|
|
28
|
+
"type": "object",
|
|
29
|
+
"properties": {
|
|
30
|
+
"type": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"enum": ["code", "document", "schema", "plan", "review", "test", "artifact"]
|
|
33
|
+
},
|
|
34
|
+
"description": { "type": "string" },
|
|
35
|
+
"files_changed": {
|
|
36
|
+
"type": "array",
|
|
37
|
+
"items": { "type": "string" }
|
|
38
|
+
},
|
|
39
|
+
"validation_status": {
|
|
40
|
+
"type": "string",
|
|
41
|
+
"enum": ["passed", "failed", "partial", "untested"]
|
|
42
|
+
}
|
|
43
|
+
},
|
|
44
|
+
"required": ["type", "description"]
|
|
45
|
+
},
|
|
46
|
+
"risks": {
|
|
47
|
+
"type": "array",
|
|
48
|
+
"items": {
|
|
49
|
+
"type": "object",
|
|
50
|
+
"properties": {
|
|
51
|
+
"description": { "type": "string" },
|
|
52
|
+
"severity": { "type": "string", "enum": ["low", "medium", "high", "critical"] },
|
|
53
|
+
"mitigation": { "type": "string" }
|
|
54
|
+
},
|
|
55
|
+
"required": ["description", "severity"]
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
"open_questions": {
|
|
59
|
+
"type": "array",
|
|
60
|
+
"items": { "type": "string" },
|
|
61
|
+
"description": "Unresolved questions that the next agent should address."
|
|
62
|
+
},
|
|
63
|
+
"next_agent": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"description": "The agent or role that should receive this handoff."
|
|
66
|
+
},
|
|
67
|
+
"metadata": {
|
|
68
|
+
"type": "object",
|
|
69
|
+
"properties": {
|
|
70
|
+
"timestamp": { "type": "string", "format": "date-time" },
|
|
71
|
+
"source_agent": { "type": "string" },
|
|
72
|
+
"command": { "type": "string" },
|
|
73
|
+
"iteration": { "type": "integer", "minimum": 0 }
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"additionalProperties": false
|
|
78
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://zenkit.dev/schemas/task.schema.json",
|
|
4
|
+
"title": "ZenKit Task",
|
|
5
|
+
"description": "A discrete unit of work within a ZenKit workflow.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["id", "name", "status", "command"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"id": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"pattern": "^[a-z0-9-]+$"
|
|
12
|
+
},
|
|
13
|
+
"name": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"minLength": 1,
|
|
16
|
+
"maxLength": 200
|
|
17
|
+
},
|
|
18
|
+
"description": {
|
|
19
|
+
"type": "string"
|
|
20
|
+
},
|
|
21
|
+
"command": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"enum": ["plan", "build", "audit", "refactor", "spec", "handoff", "checkpoint", "ship"]
|
|
24
|
+
},
|
|
25
|
+
"status": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"enum": ["pending", "in_progress", "blocked", "completed", "failed", "skipped"]
|
|
28
|
+
},
|
|
29
|
+
"context": {
|
|
30
|
+
"type": "string"
|
|
31
|
+
},
|
|
32
|
+
"assumptions": {
|
|
33
|
+
"type": "array",
|
|
34
|
+
"items": { "type": "string" }
|
|
35
|
+
},
|
|
36
|
+
"constraints": {
|
|
37
|
+
"type": "array",
|
|
38
|
+
"items": { "type": "string" }
|
|
39
|
+
},
|
|
40
|
+
"acceptance_criteria": {
|
|
41
|
+
"type": "array",
|
|
42
|
+
"items": { "type": "string" }
|
|
43
|
+
},
|
|
44
|
+
"files_affected": {
|
|
45
|
+
"type": "array",
|
|
46
|
+
"items": { "type": "string" }
|
|
47
|
+
},
|
|
48
|
+
"assigned_agent": {
|
|
49
|
+
"type": "string"
|
|
50
|
+
},
|
|
51
|
+
"parent_task": {
|
|
52
|
+
"type": "string"
|
|
53
|
+
},
|
|
54
|
+
"dependencies": {
|
|
55
|
+
"type": "array",
|
|
56
|
+
"items": { "type": "string" }
|
|
57
|
+
},
|
|
58
|
+
"metadata": {
|
|
59
|
+
"type": "object",
|
|
60
|
+
"properties": {
|
|
61
|
+
"created_at": { "type": "string", "format": "date-time" },
|
|
62
|
+
"updated_at": { "type": "string", "format": "date-time" },
|
|
63
|
+
"estimated_tokens": { "type": "integer", "minimum": 0 },
|
|
64
|
+
"actual_tokens": { "type": "integer", "minimum": 0 }
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
"additionalProperties": false
|
|
69
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://zenkit.dev/schemas/task.schema.json",
|
|
4
|
+
"title": "ZenKit Task",
|
|
5
|
+
"description": "A discrete unit of work within a ZenKit workflow.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["id", "name", "status", "command"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"id": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"pattern": "^[a-z0-9-]+$"
|
|
12
|
+
},
|
|
13
|
+
"name": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"minLength": 1,
|
|
16
|
+
"maxLength": 200
|
|
17
|
+
},
|
|
18
|
+
"description": {
|
|
19
|
+
"type": "string"
|
|
20
|
+
},
|
|
21
|
+
"command": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"enum": ["plan", "build", "audit", "refactor", "spec", "handoff", "checkpoint", "ship"]
|
|
24
|
+
},
|
|
25
|
+
"status": {
|
|
26
|
+
"type": "string",
|
|
27
|
+
"enum": ["pending", "in_progress", "blocked", "completed", "failed", "skipped"]
|
|
28
|
+
},
|
|
29
|
+
"context": {
|
|
30
|
+
"type": "string"
|
|
31
|
+
},
|
|
32
|
+
"assumptions": {
|
|
33
|
+
"type": "array",
|
|
34
|
+
"items": { "type": "string" }
|
|
35
|
+
},
|
|
36
|
+
"constraints": {
|
|
37
|
+
"type": "array",
|
|
38
|
+
"items": { "type": "string" }
|
|
39
|
+
},
|
|
40
|
+
"acceptance_criteria": {
|
|
41
|
+
"type": "array",
|
|
42
|
+
"items": { "type": "string" }
|
|
43
|
+
},
|
|
44
|
+
"files_affected": {
|
|
45
|
+
"type": "array",
|
|
46
|
+
"items": { "type": "string" }
|
|
47
|
+
},
|
|
48
|
+
"assigned_agent": {
|
|
49
|
+
"type": "string"
|
|
50
|
+
},
|
|
51
|
+
"parent_task": {
|
|
52
|
+
"type": "string"
|
|
53
|
+
},
|
|
54
|
+
"dependencies": {
|
|
55
|
+
"type": "array",
|
|
56
|
+
"items": { "type": "string" }
|
|
57
|
+
},
|
|
58
|
+
"metadata": {
|
|
59
|
+
"type": "object",
|
|
60
|
+
"properties": {
|
|
61
|
+
"created_at": { "type": "string", "format": "date-time" },
|
|
62
|
+
"updated_at": { "type": "string", "format": "date-time" },
|
|
63
|
+
"estimated_tokens": { "type": "integer", "minimum": 0 },
|
|
64
|
+
"actual_tokens": { "type": "integer", "minimum": 0 }
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
"additionalProperties": false
|
|
69
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Agent Contracts
|
|
2
|
+
|
|
3
|
+
Agents in ZenKit are bounded workers, not autonomous assistants. Each has explicit responsibility, defined inputs, and required outputs.
|
|
4
|
+
|
|
5
|
+
## Definition format
|
|
6
|
+
|
|
7
|
+
Each agent specifies: name, responsibility (one sentence), input/output schemas, allowed skills, and constraints. See `agents/*.md` for all 9 definitions.
|
|
8
|
+
|
|
9
|
+
## Bounded responsibility
|
|
10
|
+
|
|
11
|
+
Each agent owns one stage. It does not redo earlier work or pre-empt later work. Schemas enforce this — an agent cannot produce output shaped for a different stage without violating its contract.
|
|
12
|
+
|
|
13
|
+
## Default handoff chain
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
product-manager → system-architect → backend / frontend (parallel)
|
|
17
|
+
→ qa → security → auditor → writer → ship
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
This is a default, not a requirement. Teams can remove, reorder, or add agents.
|
|
21
|
+
|
|
22
|
+
## What agents must NOT do
|
|
23
|
+
|
|
24
|
+
- Assume context not in the input.
|
|
25
|
+
- Skip open questions — silence is a contract violation.
|
|
26
|
+
- Modify upstream artifacts — flag problems and hand off.
|
|
27
|
+
- Claim certainty without evidence.
|
|
28
|
+
- Invoke skills outside their allowed list.
|
|
29
|
+
|
|
30
|
+
## Adding custom agents
|
|
31
|
+
|
|
32
|
+
1. Define in `agents/` with name, responsibility, boundaries, handoff targets.
|
|
33
|
+
2. Place in the handoff chain at the appropriate position.
|
|
34
|
+
3. Ensure schema compatibility with upstream output and downstream input.
|
|
35
|
+
|
|
36
|
+
Custom agents follow the same contracts as built-in agents.
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# ZenKit Architecture
|
|
2
|
+
|
|
3
|
+
ZenKit is built from six primitive categories that compose into workflows. There is no runtime -- only schemas, commands, and conventions that tools and agents agree to follow.
|
|
4
|
+
|
|
5
|
+
## The Six Primitives
|
|
6
|
+
|
|
7
|
+
### Commands
|
|
8
|
+
|
|
9
|
+
The verbs of the system. Each command (`spec`, `plan`, `build`, `audit`, `refactor`, `handoff`, `checkpoint`, `ship`) takes structured input and produces structured output. Commands are stateless -- they read input, produce output, and exit.
|
|
10
|
+
|
|
11
|
+
### Schemas
|
|
12
|
+
|
|
13
|
+
JSON schemas that define the shape of every input and output. Schemas are the source of truth. If an agent produces output that does not validate against its schema, the output is rejected. No exceptions.
|
|
14
|
+
|
|
15
|
+
### Skills
|
|
16
|
+
|
|
17
|
+
Reusable capabilities that agents can invoke during command execution. A skill is a scoped unit of work -- "generate SQL migration," "run lint," "estimate complexity." Skills are composable and can be shared across agents.
|
|
18
|
+
|
|
19
|
+
### Hooks
|
|
20
|
+
|
|
21
|
+
Lifecycle callbacks that fire before or after a command runs. Hooks handle side effects: logging, notifications, file cleanup, environment validation. They keep commands pure by moving side effects to the edges.
|
|
22
|
+
|
|
23
|
+
### Checkpoints
|
|
24
|
+
|
|
25
|
+
Snapshots of workflow state at a given point. A checkpoint captures the current context, completed steps, and pending work. Checkpoints enable resumption -- if a workflow fails at step 4, you restart from checkpoint 3, not from zero.
|
|
26
|
+
|
|
27
|
+
### Rubrics
|
|
28
|
+
|
|
29
|
+
Evaluation criteria that the audit system uses to grade deliverables. A rubric defines what "done" looks like for a given command or workflow stage. Rubrics are explicit, versioned, and diffable.
|
|
30
|
+
|
|
31
|
+
## Workflow Composition
|
|
32
|
+
|
|
33
|
+
Primitives compose into workflows through chaining. The output of one command becomes the input of the next. The standard lifecycle is:
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
plan --> build --> audit --> ship
|
|
37
|
+
|
|
38
|
+
+---> checkpoint (at any stage)
|
|
39
|
+
+---> refactor (loops back to build)
|
|
40
|
+
+---> handoff (transfers between agents)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### The Lifecycle
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
+---------+ +---------+ +---------+ +---------+
|
|
47
|
+
| PLAN |---->| BUILD |---->| AUDIT |---->| SHIP |
|
|
48
|
+
+---------+ +---------+ +---------+ +---------+
|
|
49
|
+
| | | |
|
|
50
|
+
v v v v
|
|
51
|
+
checkpoint checkpoint checkpoint checkpoint
|
|
52
|
+
^ |
|
|
53
|
+
| (fail) |
|
|
54
|
+
+---------------+
|
|
55
|
+
refactor loop
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**Plan**: Define what will be built, how, and why. Produces a structured plan with scope, dependencies, and risk assessment.
|
|
59
|
+
|
|
60
|
+
**Build**: Execute the plan. Produce code, configurations, migrations, or whatever the spec requires. Output includes the deliverable plus metadata about decisions made during building.
|
|
61
|
+
|
|
62
|
+
**Audit**: Validate the build output against the spec and rubrics. The audit checks schema compliance, runs tests if available, and flags gaps. Audit failures feed back into a refactor cycle.
|
|
63
|
+
|
|
64
|
+
**Ship**: Package and release the validated output. Ship only runs after audit passes. It produces a manifest of what was shipped and where.
|
|
65
|
+
|
|
66
|
+
## The Handoff Contract
|
|
67
|
+
|
|
68
|
+
Agents do not share memory. They share documents. When one agent finishes its work, it produces a handoff artifact that contains everything the next agent needs: context, deliverables, assumptions, constraints, and open questions.
|
|
69
|
+
|
|
70
|
+
The handoff contract ensures that agent A does not need to know how agent B works. It only needs to produce output that matches the handoff schema. This is what makes agents replaceable -- swap the implementation, keep the contract.
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
Agent A Agent B
|
|
74
|
+
| |
|
|
75
|
+
+--- handoff artifact ---->|
|
|
76
|
+
| (schema-validated) |
|
|
77
|
+
| +---> continues work
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Handoff artifacts are versioned and stored alongside the code they describe. They are first-class project artifacts, not ephemeral messages.
|
|
81
|
+
|
|
82
|
+
## The CLI
|
|
83
|
+
|
|
84
|
+
The `zenkit` CLI provides the executable interface to the protocol: `validate` checks data against schemas, `benchmark` runs feature specs, `audit` produces structured audit reports, `init` scaffolds ZenKit into a project, and `status` shows project health. The CLI is a thin wrapper — it delegates to the same scripts and schemas that compose the protocol layer.
|
|
85
|
+
|
|
86
|
+
## What Is Not Here
|
|
87
|
+
|
|
88
|
+
There is no central orchestrator. There is no agent registry service. There is no shared memory bus. Coordination happens through files, schemas, and explicit contracts. This is intentional — ZenKit works with any AI provider, any CI system, and any language.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Benchmarking
|
|
2
|
+
|
|
3
|
+
ZenKit benchmarks verify acceptance criteria against the actual implementation.
|
|
4
|
+
|
|
5
|
+
## What the runner checks
|
|
6
|
+
|
|
7
|
+
The benchmark runner executes four stages per feature spec:
|
|
8
|
+
|
|
9
|
+
1. **Spec validation.** The spec validates against `feature-spec.schema.json`. Must have acceptance criteria and declared limitations.
|
|
10
|
+
2. **Schema compilation.** All JSON schemas compile. All use the same draft version.
|
|
11
|
+
3. **Build verification.** Every file in `expected_files` exists.
|
|
12
|
+
4. **Acceptance criteria audit.** Each criterion runs a typed verification step.
|
|
13
|
+
|
|
14
|
+
## Verification types
|
|
15
|
+
|
|
16
|
+
| Type | What it checks |
|
|
17
|
+
|------|---------------|
|
|
18
|
+
| `file_exists` | File is present |
|
|
19
|
+
| `file_contains` | File contains a specific string |
|
|
20
|
+
| `schema_count` | Expected number of schemas compile |
|
|
21
|
+
| `examples_valid` | Fixture data validates against schemas |
|
|
22
|
+
| `schemas_consistent` | All schemas use the same draft |
|
|
23
|
+
| `test_passes` | Shell command exits with code 0 |
|
|
24
|
+
| `json_path_equals` | JSON file at a dot-path equals expected value |
|
|
25
|
+
|
|
26
|
+
Each check produces `evidence` — a string describing what was found. Results are `pass` or `fail`.
|
|
27
|
+
|
|
28
|
+
## Telemetry
|
|
29
|
+
|
|
30
|
+
- **Estimated:** Token and cost figures from a heuristic (documented in the `basis` field).
|
|
31
|
+
- **Actual:** Null when no API instrumentation is available. Never fabricated.
|
|
32
|
+
|
|
33
|
+
## CLI
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
zenkit benchmark [spec] # Single spec
|
|
37
|
+
zenkit benchmark:all # All specs
|
|
38
|
+
zenkit benchmark:report [result] # Markdown report
|
|
39
|
+
zenkit benchmark:compare [z] [b] # ZenKit vs baseline
|
|
40
|
+
zenkit audit # Run all benchmarks + produce audit report
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Comparison mode
|
|
44
|
+
|
|
45
|
+
Feature specs include a `mode` field: `zenkit` or `baseline`. Current comparison data is **illustrative** — both runs verify the same codebase. Real comparison requires A/B workflow execution.
|
|
46
|
+
|
|
47
|
+
## Limitations
|
|
48
|
+
|
|
49
|
+
- Acceptance criteria verify code structure and schema validity, not runtime UI behavior (except via `test_passes` running E2E).
|
|
50
|
+
- Token estimates are heuristics, not measurements.
|
|
51
|
+
- Self-audit benchmarks structure introspection but do not replace independent review.
|