@moreih29/nexus-core 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +3 -3
  2. package/agents/architect/body.md +7 -6
  3. package/agents/designer/body.md +3 -3
  4. package/agents/engineer/body.md +8 -8
  5. package/agents/postdoc/body.md +4 -4
  6. package/agents/researcher/body.md +4 -4
  7. package/agents/reviewer/body.md +2 -2
  8. package/agents/strategist/body.md +4 -4
  9. package/agents/tester/body.md +2 -2
  10. package/agents/writer/body.md +1 -1
  11. package/conformance/README.md +125 -0
  12. package/conformance/scenarios/full-plan-cycle.json +132 -0
  13. package/conformance/scenarios/task-deps-ordering.json +83 -0
  14. package/conformance/schema/fixture.schema.json +224 -0
  15. package/conformance/state-schemas/agent-tracker.schema.json +58 -0
  16. package/conformance/state-schemas/history.schema.json +124 -0
  17. package/conformance/state-schemas/plan.schema.json +72 -0
  18. package/conformance/state-schemas/runtime.schema.json +25 -0
  19. package/conformance/state-schemas/tasks.schema.json +93 -0
  20. package/conformance/tools/plan-decide.json +70 -0
  21. package/conformance/tools/plan-start.json +67 -0
  22. package/conformance/tools/task-add.json +73 -0
  23. package/conformance/tools/task-close.json +98 -0
  24. package/docs/behavioral-contracts.md +145 -0
  25. package/docs/consumer-implementation-guide.md +844 -0
  26. package/docs/nexus-layout.md +234 -0
  27. package/docs/nexus-state-overview.md +185 -0
  28. package/docs/nexus-tools-contract.md +427 -0
  29. package/manifest.json +126 -113
  30. package/package.json +5 -1
  31. package/schema/common.schema.json +0 -4
  32. package/schema/skill.schema.json +16 -1
  33. package/schema/vocabulary.schema.json +14 -9
  34. package/skills/nx-init/body.md +6 -9
  35. package/skills/nx-init/meta.yml +1 -0
  36. package/skills/nx-plan/body.md +14 -11
  37. package/skills/nx-plan/meta.yml +3 -0
  38. package/skills/nx-run/body.md +4 -4
  39. package/skills/nx-run/meta.yml +3 -0
  40. package/skills/nx-setup/body.md +9 -9
  41. package/skills/nx-setup/meta.yml +1 -0
  42. package/skills/nx-sync/meta.yml +1 -0
  43. package/vocabulary/capabilities.yml +58 -25
@@ -0,0 +1,67 @@
1
+ [
2
+ {
3
+ "test_id": "plan_start_happy_path",
4
+ "description": "plan_start creates plan.json with correct structure when given a topic, issues, and research_summary",
5
+ "precondition": {
6
+ "state_files": {
7
+ ".nexus/state/plan.json": null
8
+ }
9
+ },
10
+ "action": {
11
+ "tool": "plan_start",
12
+ "params": {
13
+ "topic": "Introduce conformance test fixtures",
14
+ "issues": [
15
+ "What fixture format best supports harness-neutral assertions?",
16
+ "Which tools should be covered in the MVP?"
17
+ ],
18
+ "research_summary": "Reviewed existing state-schemas and claude-nexus MCP tool implementations. Conformance fixtures should be declarative JSON with JSONPath assertions."
19
+ }
20
+ },
21
+ "postcondition": {
22
+ "return_value": {
23
+ "$.created": true,
24
+ "$.plan_id": { "type": "number", "min": 1 },
25
+ "$.topic": "Introduce conformance test fixtures",
26
+ "$.issueCount": 2
27
+ },
28
+ "state_files": {
29
+ ".nexus/state/plan.json": {
30
+ "$.id": { "type": "number", "min": 1 },
31
+ "$.topic": "Introduce conformance test fixtures",
32
+ "$.issues.length": 2,
33
+ "$.issues[0].id": 1,
34
+ "$.issues[0].title": "What fixture format best supports harness-neutral assertions?",
35
+ "$.issues[0].status": "pending",
36
+ "$.issues[1].id": 2,
37
+ "$.issues[1].title": "Which tools should be covered in the MVP?",
38
+ "$.issues[1].status": "pending",
39
+ "$.research_summary": { "type": "string", "minLength": 10 },
40
+ "$.created_at": { "type": "iso8601" }
41
+ }
42
+ }
43
+ }
44
+ },
45
+ {
46
+ "test_id": "plan_start_missing_research_summary_error",
47
+ "description": "plan_start rejects the invocation when research_summary is omitted — the field is required to enforce that research happened before planning",
48
+ "precondition": {
49
+ "state_files": {
50
+ ".nexus/state/plan.json": null
51
+ }
52
+ },
53
+ "action": {
54
+ "tool": "plan_start",
55
+ "params": {
56
+ "topic": "Some topic",
57
+ "issues": ["Issue A"]
58
+ }
59
+ },
60
+ "postcondition": {
61
+ "error": true,
62
+ "state_files": {
63
+ ".nexus/state/plan.json": null
64
+ }
65
+ }
66
+ }
67
+ ]
@@ -0,0 +1,73 @@
1
+ [
2
+ {
3
+ "test_id": "task_add_happy_path",
4
+ "description": "task_add creates tasks.json with the new task when no tasks.json existed, assigning id=1 and status=pending",
5
+ "precondition": {
6
+ "state_files": {
7
+ ".nexus/state/tasks.json": null
8
+ }
9
+ },
10
+ "action": {
11
+ "tool": "task_add",
12
+ "params": {
13
+ "title": "Create fixture.schema.json",
14
+ "context": "Define the JSON Schema that validates conformance test fixture files",
15
+ "deps": [],
16
+ "approach": "Write schema incrementally, validating against draft/2020-12 spec",
17
+ "acceptance": "All 6 fixture files validate against the schema without errors",
18
+ "risk": "JSONPath assertion syntax may need iteration to be harness-implementable"
19
+ }
20
+ },
21
+ "postcondition": {
22
+ "return_value": {
23
+ "$.task.id": 1,
24
+ "$.task.title": "Create fixture.schema.json",
25
+ "$.task.status": "pending",
26
+ "$.task.deps.length": 0,
27
+ "$.task.created_at": { "type": "iso8601" }
28
+ },
29
+ "state_files": {
30
+ ".nexus/state/tasks.json": {
31
+ "$.tasks.length": 1,
32
+ "$.tasks[0].id": 1,
33
+ "$.tasks[0].title": "Create fixture.schema.json",
34
+ "$.tasks[0].status": "pending",
35
+ "$.tasks[0].deps.length": 0
36
+ }
37
+ }
38
+ }
39
+ },
40
+ {
41
+ "test_id": "task_add_with_goal",
42
+ "description": "task_add sets the goal field on tasks.json when the goal parameter is provided on the first call",
43
+ "precondition": {
44
+ "state_files": {
45
+ ".nexus/state/tasks.json": null
46
+ }
47
+ },
48
+ "action": {
49
+ "tool": "task_add",
50
+ "params": {
51
+ "title": "Write conformance tool fixtures",
52
+ "context": "Create the 4 per-tool fixture files covering happy path and error cases",
53
+ "deps": [],
54
+ "goal": "Ship MVP conformance fixtures for nexus-core v0.2.0",
55
+ "decisions": ["Cover plan_start, plan_decide, task_add, task_close"]
56
+ }
57
+ },
58
+ "postcondition": {
59
+ "return_value": {
60
+ "$.task.id": 1,
61
+ "$.task.title": "Write conformance tool fixtures"
62
+ },
63
+ "state_files": {
64
+ ".nexus/state/tasks.json": {
65
+ "$.goal": "Ship MVP conformance fixtures for nexus-core v0.2.0",
66
+ "$.decisions.length": 1,
67
+ "$.decisions[0]": "Cover plan_start, plan_decide, task_add, task_close",
68
+ "$.tasks.length": 1
69
+ }
70
+ }
71
+ }
72
+ }
73
+ ]
@@ -0,0 +1,98 @@
1
+ [
2
+ {
3
+ "test_id": "task_close_happy_path",
4
+ "description": "task_close archives plan and tasks into history.json then deletes plan.json and tasks.json",
5
+ "precondition": {
6
+ "state_files": {
7
+ ".nexus/state/plan.json": {
8
+ "id": 1,
9
+ "topic": "Introduce conformance test fixtures",
10
+ "issues": [
11
+ {
12
+ "id": 1,
13
+ "title": "What fixture format?",
14
+ "status": "decided",
15
+ "decision": "Declarative JSON with JSONPath assertions"
16
+ },
17
+ {
18
+ "id": 2,
19
+ "title": "Which tools to cover?",
20
+ "status": "decided",
21
+ "decision": "plan_start, plan_decide, task_add, task_close"
22
+ }
23
+ ],
24
+ "research_summary": "Reviewed existing state-schemas and tool implementations.",
25
+ "created_at": "2026-04-12T00:00:00.000Z"
26
+ },
27
+ ".nexus/state/tasks.json": {
28
+ "goal": "Ship MVP conformance fixtures",
29
+ "decisions": ["Declarative JSON with JSONPath assertions"],
30
+ "tasks": [
31
+ {
32
+ "id": 1,
33
+ "title": "Create fixture.schema.json",
34
+ "context": "Define the JSON Schema for fixture files",
35
+ "status": "completed",
36
+ "deps": [],
37
+ "created_at": "2026-04-12T00:00:00.000Z"
38
+ }
39
+ ]
40
+ }
41
+ }
42
+ },
43
+ "action": {
44
+ "tool": "task_close",
45
+ "params": {}
46
+ },
47
+ "postcondition": {
48
+ "return_value": {
49
+ "$.closed": true,
50
+ "$.archived.plan": true,
51
+ "$.archived.decisions": { "type": "number", "min": 1 },
52
+ "$.archived.tasks": 1,
53
+ "$.total_cycles": { "type": "number", "min": 1 }
54
+ },
55
+ "state_files": {
56
+ ".nexus/state/plan.json": null,
57
+ ".nexus/state/tasks.json": null,
58
+ ".nexus/history.json": {
59
+ "$.cycles.length": { "type": "number", "min": 1 },
60
+ "$.cycles[-1].plan.topic": "Introduce conformance test fixtures",
61
+ "$.cycles[-1].tasks.length": 1,
62
+ "$.cycles[-1].completed_at": { "type": "iso8601" }
63
+ }
64
+ }
65
+ }
66
+ },
67
+ {
68
+ "test_id": "task_close_no_plan_or_tasks",
69
+ "description": "task_close still creates a history cycle with null plan and empty tasks when neither plan.json nor tasks.json exist",
70
+ "precondition": {
71
+ "state_files": {
72
+ ".nexus/state/plan.json": null,
73
+ ".nexus/state/tasks.json": null
74
+ }
75
+ },
76
+ "action": {
77
+ "tool": "task_close",
78
+ "params": {}
79
+ },
80
+ "postcondition": {
81
+ "return_value": {
82
+ "$.closed": true,
83
+ "$.archived.plan": false,
84
+ "$.archived.tasks": 0,
85
+ "$.total_cycles": { "type": "number", "min": 1 }
86
+ },
87
+ "state_files": {
88
+ ".nexus/state/plan.json": null,
89
+ ".nexus/state/tasks.json": null,
90
+ ".nexus/history.json": {
91
+ "$.cycles.length": { "type": "number", "min": 1 },
92
+ "$.cycles[-1].plan": null,
93
+ "$.cycles[-1].tasks.length": 0
94
+ }
95
+ }
96
+ }
97
+ }
98
+ ]
@@ -0,0 +1,145 @@
1
+ # Behavioral Contracts
2
+
3
+ This document formalizes the behavioral contracts that all Nexus harnesses must implement. These contracts define state machines, coordination rules, and semantic boundaries that are harness-neutral — they describe *what* must happen, not *how* any specific harness implements it.
4
+
5
+ ---
6
+
7
+ ## 1. Task Lifecycle State Machine
8
+
9
+ Tasks transition through the following states:
10
+
11
+ ```
12
+ pending → in_progress → completed
13
+ ↑ |
14
+ └── (reopen) ──┘
15
+ ```
16
+
17
+ **States.**
18
+
19
+ | State | Meaning |
20
+ |---|---|
21
+ | `pending` | Task is waiting to begin. Not yet assigned or all dependencies unresolved. |
22
+ | `in_progress` | Task is actively being worked on by an assigned agent. |
23
+ | `completed` | Task has finished and its output is available. |
24
+
25
+ **Reopen.** A `completed` task may be transitioned back to `pending` via `task_update`. This is used when review or downstream work reveals that the task's output is insufficient and it must be reworked.
26
+
27
+ **No `blocked` state.** There is no explicit `blocked` state. A task that cannot proceed because a dependency has not completed remains in `pending`. Harnesses compute readiness from dependency status rather than relying on an explicit blocked marker.
28
+
29
+ **Readiness computation.** A task is ready to begin when both conditions hold:
30
+ 1. Its status is `pending`.
31
+ 2. Every task listed in its `deps` field has status `completed`.
32
+
33
+ ---
34
+
35
+ ## 2. Plan Lifecycle State Machine
36
+
37
+ Plan issues transition through the following states:
38
+
39
+ ```
40
+ pending → decided
41
+ ↑ |
42
+ └─────────┘ (reopen)
43
+ ```
44
+
45
+ **States.**
46
+
47
+ | State | Meaning |
48
+ |---|---|
49
+ | `pending` | Issue is open and no decision has been recorded. |
50
+ | `decided` | A decision has been recorded for this issue. |
51
+
52
+ **Reopen.** A `decided` issue may be transitioned back to `pending` via `plan_update` with `action: reopen`. On reopen, the `decision` field for that issue is deleted. The issue returns to open discussion.
53
+
54
+ **Plan complete signal.** A plan is considered complete when all issues within it have status `decided`. This signals that the plan phase is finished and execution may begin.
55
+
56
+ ---
57
+
58
+ ## 3. Resume Tier and Owner Reuse Policy Coordination
59
+
60
+ ### Resume Tiers
61
+
62
+ Each agent role carries a resume tier that governs whether a prior agent session may be reused for a new task assignment, or whether a fresh spawn is required. The three tiers are defined in `vocabulary/resume-tiers.yml`.
63
+
64
+ **`ephemeral`.** The agent is always spawned fresh. No prior session is carried forward. Used for roles where independence from prior context is essential to correctness (e.g., verification roles).
65
+
66
+ **`bounded`.** The agent may resume a prior session only when all of the following conditions hold: (a) the same owner identity is assigned, (b) the target files or artifacts are the same, and (c) no intervening edits have occurred to those targets since the prior session. If any condition is not met, a fresh spawn is used. Agent instructions for bounded-tier agents must include a directive to re-read the target files at the start of each resumed session to ensure current state is reflected.
67
+
68
+ **`persistent`.** The agent resumes by default within the same run session. Cross-task reuse is allowed. Used for roles where accumulated context is the primary asset (e.g., analysis and design roles).
69
+
70
+ ### Owner Reuse Policy Override
71
+
72
+ The `owner_reuse_policy` field in `tasks.json` allows per-task override of the default resume-tier behavior.
73
+
74
+ | Value | Effect |
75
+ |---|---|
76
+ | `fresh` | Force a fresh spawn regardless of resume tier. |
77
+ | `resume_if_same_artifact` | Apply bounded-tier behavior: resume only if same artifact, same owner, no intervening edits. |
78
+ | `resume` | Force resume regardless of resume tier, if a prior session is available. |
79
+
80
+ When `owner_reuse_policy` is absent, the agent's default resume tier governs.
81
+
82
+ ### Resume Gating
83
+
84
+ Before attempting to resume a prior agent session, the harness must verify that its resume mechanism is available for the current context. If the mechanism is unavailable, the harness must fall back to a fresh spawn silently, without surfacing an error to the user. Resume gating is a harness-level concern; nexus-core specifies only that fallback must occur.
85
+
86
+ ---
87
+
88
+ ## 4. Permission Model
89
+
90
+ **Lead.** The Lead agent may invoke all skills, call all tools available to the harness, spawn subagents, and record plan decisions. Lead is the only role that may initiate a new plan or run cycle.
91
+
92
+ **Subagents.** Each subagent role has a defined set of capabilities that restrict which tools it may call. Capabilities are declared in the agent's `meta.yml` using the capability abstraction defined in `vocabulary/capabilities.yml`. A subagent may not call tools outside its declared capability set.
93
+
94
+ **Gate enforcement.** The mechanism by which capability gates are enforced is harness-specific. nexus-core specifies the semantic — which capabilities a role holds — but not the enforcement implementation. Harnesses are responsible for translating capability declarations into their native access-control mechanism.
95
+
96
+ **Capability override rule (additive-only).** A consumer's effective capability set for any agent is computed as:
97
+
98
+ ```
99
+ effective_capabilities(agent) = canonical_capabilities(agent) ∪ consumer_additions(agent)
100
+ ```
101
+
102
+ `canonical_capabilities` is the `capabilities` array in `agents/{id}/meta.yml` — the nexus-core canonical definition. `consumer_additions` is a harness-local set of additional capabilities the consumer chooses to apply (format and storage are consumer decisions). Consumers may **add** capabilities but **must not remove** canonical ones. Removing a canonical capability (e.g., removing `no_file_edit` from an agent that canonically carries it) would violate the nexus-core design intent and is forbidden. The union is idempotent — if nexus-core later adds a capability that a consumer already applied locally, the overlap is harmless.
103
+
104
+ ---
105
+
106
+ ## 5. Session Boundary Semantics
107
+
108
+ A **session** begins when the harness launches and ends when the harness closes or the user explicitly terminates it. A session may contain one or more plan/run cycles.
109
+
110
+ A **cycle** consists of exactly one `plan.json` lifecycle and one `tasks.json` lifecycle. A cycle begins when a new plan is created and ends with `task_close`, which archives the cycle's plan and task records into `history.json`.
111
+
112
+ ```
113
+ Session
114
+ └── Cycle 1: plan.json + tasks.json → task_close → history.json
115
+ └── Cycle 2: plan.json + tasks.json → task_close → history.json
116
+ └── ...
117
+ ```
118
+
119
+ Session state (the `state/` directory) persists across cycles within a single session. When a new cycle begins within the same session, `plan.json` and `tasks.json` are replaced; other session state (e.g., agent registrations) may persist or be reset depending on harness policy.
120
+
121
+ Session end discards all remaining session state that has not been promoted to project-scoped storage.
122
+
123
+ ---
124
+
125
+ ## 6. `manual_only` Contract
126
+
127
+ A skill declared with `manual_only: true` in its `meta.yml` must not be auto-invoked by the language model as a result of natural-language inference.
128
+
129
+ **Activation constraint.** Only an explicit user-initiated trigger may activate a `manual_only` skill. Valid explicit triggers are: a slash command typed by the user, or a bracket tag typed by the user (e.g., `[plan]`). Inference from conversational context does not qualify as an explicit trigger.
130
+
131
+ **Consumer harness obligation.** Consumer harnesses that implement auto-invocation detection — where the language model may activate skills based on recognized patterns in user messages — must filter `manual_only` skills out of the skill activation list exposed to the language model. A `manual_only` skill must not appear as a candidate for automatic activation under any circumstances.
132
+
133
+ This contract ensures that high-consequence or structurally significant skills are only invoked when the user has expressed deliberate intent.
134
+
135
+ ---
136
+
137
+ ## 7. Natural-Language Trigger Boundary
138
+
139
+ Natural-language trigger detection is **consumer-owned**. nexus-core does not define, distribute, or maintain natural-language pattern lists for any skill or tag.
140
+
141
+ **Canonical trigger form.** The authoritative trigger for every tag is the explicit bracket form defined in `vocabulary/tags.yml` (e.g., `[plan]`, `[run]`, `[sync]`). This is the form nexus-core specifies. All other activation forms are consumer extensions.
142
+
143
+ **Consumer responsibility.** Each consumer harness independently defines the natural-language patterns it recognizes as equivalent to an explicit trigger, tests those patterns, and maintains them over time. nexus-core provides no shared pattern library and makes no guarantees about pattern compatibility across harnesses.
144
+
145
+ **Divergence is acceptable.** Different consumer harnesses may recognize different natural-language phrasings for the same underlying skill. This divergence is explicitly acceptable. Harnesses must not assume that another harness's pattern set matches their own.