npm - agent-bober - Versions diffs - 0.11.6 → 0.12.0 - Mend

agent-bober 0.11.6 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/CHANGELOG.md +98 -0
package/README.md +12 -6
package/agents/bober-evaluator.md +38 -0
package/agents/bober-generator.md +54 -0
package/agents/bober-planner.md +256 -34
package/dist/cli/commands/eval.js +6 -6
package/dist/cli/commands/eval.js.map +1 -1
package/dist/cli/commands/init.js +46 -2
package/dist/cli/commands/init.js.map +1 -1
package/dist/cli/commands/plan.d.ts +12 -0
package/dist/cli/commands/plan.d.ts.map +1 -1
package/dist/cli/commands/plan.js +232 -37
package/dist/cli/commands/plan.js.map +1 -1
package/dist/cli/commands/run.js +2 -2
package/dist/cli/commands/run.js.map +1 -1
package/dist/cli/commands/sprint.d.ts.map +1 -1
package/dist/cli/commands/sprint.js +8 -8
package/dist/cli/commands/sprint.js.map +1 -1
package/dist/cli/index.js +23 -2
package/dist/cli/index.js.map +1 -1
package/dist/config/schema.d.ts +40 -40
package/dist/contracts/eval-result.d.ts +38 -38
package/dist/contracts/index.d.ts +2 -2
package/dist/contracts/index.d.ts.map +1 -1
package/dist/contracts/index.js +8 -4
package/dist/contracts/index.js.map +1 -1
package/dist/contracts/spec.d.ts +335 -40
package/dist/contracts/spec.d.ts.map +1 -1
package/dist/contracts/spec.js +210 -18
package/dist/contracts/spec.js.map +1 -1
package/dist/contracts/sprint-contract.d.ts +155 -88
package/dist/contracts/sprint-contract.d.ts.map +1 -1
package/dist/contracts/sprint-contract.js +176 -29
package/dist/contracts/sprint-contract.js.map +1 -1
package/dist/evaluators/builtin/api-check.js +1 -1
package/dist/evaluators/builtin/api-check.js.map +1 -1
package/dist/index.d.ts +2 -2
package/dist/index.d.ts.map +1 -1
package/dist/index.js +2 -2
package/dist/index.js.map +1 -1
package/dist/mcp/tools/contracts.js +2 -2
package/dist/mcp/tools/contracts.js.map +1 -1
package/dist/mcp/tools/eval.js +8 -8
package/dist/mcp/tools/eval.js.map +1 -1
package/dist/mcp/tools/plan.d.ts.map +1 -1
package/dist/mcp/tools/plan.js +40 -14
package/dist/mcp/tools/plan.js.map +1 -1
package/dist/mcp/tools/sprint.d.ts.map +1 -1
package/dist/mcp/tools/sprint.js +11 -11
package/dist/mcp/tools/sprint.js.map +1 -1
package/dist/orchestrator/context-handoff.d.ts +484 -224
package/dist/orchestrator/context-handoff.d.ts.map +1 -1
package/dist/orchestrator/context-handoff.js +32 -12
package/dist/orchestrator/context-handoff.js.map +1 -1
package/dist/orchestrator/curator-agent.d.ts.map +1 -1
package/dist/orchestrator/curator-agent.js +4 -4
package/dist/orchestrator/curator-agent.js.map +1 -1
package/dist/orchestrator/evaluator-agent.js +2 -2
package/dist/orchestrator/evaluator-agent.js.map +1 -1
package/dist/orchestrator/generator-agent.js +3 -3
package/dist/orchestrator/generator-agent.js.map +1 -1
package/dist/orchestrator/model-resolver.js +2 -2
package/dist/orchestrator/model-resolver.js.map +1 -1
package/dist/orchestrator/pipeline.d.ts +7 -0
package/dist/orchestrator/pipeline.d.ts.map +1 -1
package/dist/orchestrator/pipeline.js +67 -28
package/dist/orchestrator/pipeline.js.map +1 -1
package/dist/orchestrator/planner-agent.d.ts +21 -1
package/dist/orchestrator/planner-agent.d.ts.map +1 -1
package/dist/orchestrator/planner-agent.js +11 -2
package/dist/orchestrator/planner-agent.js.map +1 -1
package/dist/state/history.d.ts.map +1 -1
package/dist/state/history.js +3 -3
package/dist/state/history.js.map +1 -1
package/dist/state/plan-state.js +1 -1
package/dist/state/plan-state.js.map +1 -1
package/dist/state/sprint-state.d.ts +9 -2
package/dist/state/sprint-state.d.ts.map +1 -1
package/dist/state/sprint-state.js +25 -11
package/dist/state/sprint-state.js.map +1 -1
package/package.json +2 -1
package/scripts/migrate-specs.mjs +127 -0
package/scripts/sync-skills.mjs +96 -0
package/skills/bober.plan/SKILL.md +41 -0
package/skills/bober.plan/references/spec-schema.md +31 -4
package/skills/bober.run/SKILL.md +41 -7
package/skills/bober.sprint/SKILL.md +6 -259

package/skills/bober.plan/references/spec-schema.md CHANGED Viewed

@@ -24,10 +24,34 @@ PlanSpec files are stored at: `.bober/specs/<specId>.json`
   "description": "string (required, 2-3 sentences)",
   "mode": "string (required, one of: greenfield, brownfield)",
   "preset": "string (optional, e.g.: nextjs, react-vite, solidity, anchor, api-node, python-api)",
-  "status": "string (required, one of: planned, in-progress, completed, archived)",
+  "status": "string (required, one of: draft, needs-clarification, ready, in-progress, completed, abandoned)",
+  "ambiguityScore": "number (optional, 0-10) — planner's self-rated ambiguity. >= 7 forces status='needs-clarification'.",
+  "clarificationQuestions": [
+    {
+      "questionId": "string (required, e.g. 'Q1')",
+      "category": "string (required, one of: scope | user-personas | data-model | tech-constraints | design-ux | integrations | non-functional | error-handling | integration-risk | pattern-conflict | regression-risk | other)",
+      "question": "string (required, ends in '?')",
+      "options": [
+        { "label": "string (e.g. 'A')", "description": "string" }
+      ],
+      "recommendation": "string (optional, planner's suggested answer based on codebase evidence)",
+      "ambiguityWeight": "number (optional, 0-10, how much this question contributes to overall ambiguity)"
+    }
+  ],
+  "resolvedClarifications": [
+    {
+      "questionId": "string (required, matches a clarificationQuestions entry)",
+      "answer": "string (required, free-form)",
+      "resolvedAt": "string (required, ISO-8601)",
+      "resolvedBy": "string (required, one of: user | planner)"
+    }
+  ],
   "assumptions": [
-    "string — each assumption the planner is making"
+    "string — each assumption the planner is making, ideally with codebase evidence"
   ],
   "outOfScope": [
@@ -90,7 +114,10 @@ PlanSpec files are stored at: `.bober/specs/<specId>.json`
 | `description` | 2-3 sentence summary of the feature and its user value. |
 | `mode` | Must match the `project.mode` in `bober.config.json` (`greenfield` or `brownfield`). |
 | `preset` | Must match the `project.preset` in `bober.config.json`, if set (e.g., `nextjs`, `solidity`, `anchor`). |
-| `status` | Lifecycle state: `planned` (not started), `in-progress` (sprints running), `completed` (all sprints done), `archived` (abandoned or superseded). |
+| `status` | Lifecycle state. `draft`: planner emitted a complete plan, no sprints run yet. `needs-clarification`: planner refused to fully decompose; user must answer the open `clarificationQuestions` before sprints can run. `ready`: clarifications resolved, pipeline may proceed. `in-progress`: at least one sprint has started. `completed`: all sprints done. `abandoned`: planner or user dropped this spec. |
+| `ambiguityScore` | Planner's self-rating 0-10. If `>= 7`, the planner MUST set `status: "needs-clarification"` and emit at least one `clarificationQuestions` entry — the pipeline will refuse to run sprints from such a spec. |
+| `clarificationQuestions` | Open questions awaiting user answers. Each unresolved entry blocks sprint execution. |
+| `resolvedClarifications` | Question/answer history (both autonomous self-answers and user inputs via `bober plan answer`). |
 ### Features Array
@@ -155,7 +182,7 @@ Bad criteria:
   "description": "A complete user authentication system supporting email/password registration and login, with session management and protected routes. This enables the application to identify users and restrict access to authorized content.",
   "mode": "greenfield",
   "preset": "react-vite",
-  "status": "planned",
+  "status": "draft",
   "assumptions": [
     "The application does not currently have any authentication system",
     "PostgreSQL is the database, as configured in the project",

package/skills/bober.run/SKILL.md CHANGED Viewed

@@ -91,9 +91,18 @@ If it fails, report the missing prerequisites and stop.
 ### 1c. Check for Existing Plans
-List all spec files in `.bober/specs/`. For each spec, read only the **first 5 lines** of the JSON file. The `status` field is near the top — if it says `"completed"`, skip that spec entirely (do not load its contracts or read further).
+List all spec files in `.bober/specs/`. For each spec, read only the **first 10 lines** of the JSON file. The `status` field is near the top — apply the following triage:
-Collect only the specs where `status` is NOT `"completed"` (i.e. `"planned"` or `"in-progress"`). These are the **actionable specs**.
+- `"completed"` or `"abandoned"` → skip entirely (do not load its contracts).
+- `"needs-clarification"` → **BLOCK and surface to user.** Read the spec's `clarificationQuestions` array and print each one with its category, options, and recommendation. Tell the user to resolve via:
+  - `npx agent-bober plan answer <specId>` (interactive walkthrough), or
+  - `npx agent-bober plan answer <specId> <questionId> "<answer>"` (one-shot per question), or
+  - Edit `.bober/specs/<specId>.json` directly and flip `status` to `"ready"` after answering.
+  Do NOT include this spec in the actionable set. Do NOT proceed past the planning phase if this is the only spec — the user must answer first.
+- `"draft"`, `"ready"`, `"in-progress"` → this is an **actionable spec**, collect it.
+Collect only the actionable specs.
 - **No actionable specs + user provided a new task:** Create a new plan (go to Step 2).
 - **No actionable specs + no task provided:** Tell the user all plans are complete. Stop.
@@ -166,10 +175,34 @@ When done, respond with EXACTLY this JSON structure (no other text):
 **After the planner subagent returns:**
-1. Parse the planner's response to extract `specId` and `contractIds`.
-2. Read `.bober/specs/<specId>.json` to verify it was created.
-3. Read each contract file in `.bober/contracts/` to verify they exist.
-4. Print the plan summary:
+1. Parse the planner's response. Inspect the `status` field FIRST.
+2. **If `status` is `"needs-clarification"`:** The planner refused to fully decompose the request. STOP the pipeline. Read `.bober/specs/<specId>.json` and print the open `clarificationQuestions` to the user:
+   ```
+   === PLAN BLOCKED — NEEDS CLARIFICATION ===
+   Spec: <specId>
+   Title: <title>
+   Ambiguity score: <N>/10
+   Open questions:
+     Q1 [<category>]: <question>
+       A) <option> — <description>
+       B) <option> — <description>
+       💡 Suggested: <recommendation if any>
+     Q2 [<category>]: <question>
+     ...
+   Resolve via either:
+     npx agent-bober plan answer <specId>                            (interactive)
+     npx agent-bober plan answer <specId> Q1 "<your answer>"         (one-shot)
+     Or edit .bober/specs/<specId>.json directly and flip status to "ready".
+   ```
+   Do NOT proceed to Step 3. Do NOT spawn the generator subagent. The user must resolve the questions before the pipeline can continue. Exit cleanly.
+3. **If `status` is `"draft"` or `"ready"`:** Extract `specId` and `contractIds`. Read `.bober/specs/<specId>.json` to verify it was created. Read each contract file in `.bober/contracts/` to verify they exist. Print the plan summary:
    ```
    === PLAN CREATED ===
    Spec: <specId>
@@ -179,7 +212,8 @@ When done, respond with EXACTLY this JSON structure (no other text):
    2. <Sprint 2 title>
    ...
    ```
-5. If the planner subagent failed or returned an error, report it and stop the pipeline.
+4. If the planner subagent failed or returned an error, report it and stop the pipeline.
 ---

package/skills/bober.sprint/SKILL.md CHANGED Viewed

@@ -30,9 +30,13 @@ Also read `.bober/principles.md` if it exists. You will include the principles t
 ## Step 1: Identify the Target Sprint
-**Find the active PlanSpec.** List all specs in `.bober/specs/`. For each spec, read only the **first 5 lines** — the `status` field is near the top. Skip any spec where `status` is `"completed"`. From the remaining specs, pick the most recent one (sort by `createdAt` descending).
+**Find the active PlanSpec.** List all specs in `.bober/specs/`. For each spec, read only the **first 10 lines** — the `status` field is near the top. Apply this triage:
-If all specs are `completed` and no sprint number was provided, tell the user all plans are complete.
+- `"completed"` or `"abandoned"` → skip entirely.
+- `"needs-clarification"` → BLOCK this spec from sprint execution. Print the open `clarificationQuestions` from the spec and tell the user to resolve via `npx agent-bober plan answer <specId>` (interactive) or `npx agent-bober plan answer <specId> <questionId> "<answer>"` (one-shot). Do NOT spawn the generator. Do NOT pick this spec as the active one. If it's the only spec, exit.
+- `"draft"`, `"ready"`, `"in-progress"` → eligible. From the eligible specs, pick the most recent one (sort by `createdAt` descending).
+If all specs are `completed`/`abandoned` and no sprint number was provided, tell the user all plans are complete. If the only remaining spec is `needs-clarification`, exit with the clarification message.
 **If a sprint number was provided as an argument:**
 - Find the contract for that sprint number: `.bober/contracts/sprint-<specId>-<N>.json`
@@ -463,260 +467,3 @@ Read `pipeline.contextReset` from config:
 After completing this phase, suggest the following next steps to the user:
 - `/bober-eval` — Evaluate the current sprint output independently
 - `/bober-sprint` — Execute the next sprint in the plan
----
-<!-- Reference: contract-schema.md -->
-# SprintContract JSON Schema
-This document defines the complete schema for SprintContract documents. Sprint contracts are the binding agreement between the Planner, Generator, and Evaluator for a single sprint.
-## Location
-SprintContract files are stored at: `.bober/contracts/<contractId>.json`
-## Naming Convention
-- `contractId` format: `sprint-<specId>-<sprint-number>`
-- Example: `sprint-spec-20260326-user-auth-1`
-- Sprint numbers are 1-indexed (first sprint is 1, not 0)
-## Full Schema
-```json
-{
-  "contractId": "string (required)",
-  "specId": "string (required, references parent PlanSpec)",
-  "sprintNumber": "number (required, 1-indexed)",
-  "title": "string (required, concise sprint title)",
-  "description": "string (required, what this sprint delivers)",
-  "status": "string (required, one of: proposed, in-progress, completed, needs-rework)",
-  "createdAt": "string (required, ISO-8601)",
-  "updatedAt": "string (required, ISO-8601)",
-  "completedAt": "string (optional, ISO-8601, set when status becomes completed)",
-  "dependsOn": [
-    "string — contractId references for sprints that must complete before this one"
-  ],
-  "features": [
-    "string — featureId references from the parent PlanSpec"
-  ],
-  "successCriteria": [
-    {
-      "criterionId": "string (required, format: sc-<sprint>-<index>)",
-      "description": "string (required, specific testable criterion)",
-      "verificationMethod": "string (required, one of: manual, typecheck, lint, unit-test, playwright, api-check, build, custom)",
-      "required": "boolean (required, true = must pass for sprint to pass)",
-      "customCommand": "string (optional, command to run for custom verification)"
-    }
-  ],
-  "generatorNotes": "string (required, guidance for the Generator agent)",
-  "evaluatorNotes": "string (required, guidance for the Evaluator agent)",
-  "estimatedFiles": [
-    "string — file paths expected to be created or modified"
-  ],
-  "estimatedDuration": "string (required, one of: small, medium, large)",
-  "iterationHistory": [
-    {
-      "iteration": "number",
-      "evalId": "string — reference to EvalResult",
-      "result": "string (pass | fail)",
-      "timestamp": "string (ISO-8601)"
-    }
-  ],
-  "lastEvalId": "string (optional, reference to most recent EvalResult)"
-}
-```
-## Field Descriptions
-### Core Fields
-| Field | Description |
-|-------|-------------|
-| `contractId` | Unique identifier. Generated by the Planner. Never changes. |
-| `specId` | Reference to the parent PlanSpec. Used to load broader context. |
-| `sprintNumber` | Position in the sprint sequence. 1-indexed. |
-| `title` | Concise description of what this sprint delivers. Should start with a verb: "Implement...", "Add...", "Create...". |
-| `description` | 2-4 sentences describing the sprint's deliverables and scope. |
-| `status` | Lifecycle state. See Status Transitions below. |
-### Status Transitions
-```
-proposed → in-progress → completed
-                ↓
-          needs-rework → in-progress → completed
-```
-- `proposed`: Created by the Planner. Not yet started or reviewed.
-- `in-progress`: Contract negotiated and Generator is working on it.
-- `completed`: All required success criteria passed evaluation.
-- `needs-rework`: Failed evaluation after maximum iterations. Requires human intervention or plan revision.
-### Dependencies
-| Field | Description |
-|-------|-------------|
-| `dependsOn` | Array of `contractId` values that must have status `completed` before this sprint can start. Empty array for the first sprint. |
-| `features` | Array of `featureId` values from the parent PlanSpec that this sprint implements (partially or fully). |
-### Success Criteria
-Each success criterion is a single testable statement that the Evaluator checks independently.
-| Field | Description |
-|-------|-------------|
-| `criterionId` | Unique within the contract. Format: `sc-<sprintNumber>-<index>` (1-indexed). |
-| `description` | Specific, testable criterion. Must describe observable behavior or measurable outcome. |
-| `verificationMethod` | How the Evaluator should verify this criterion. |
-| `required` | If `true`, this criterion MUST pass for the sprint to pass. If `false`, it is advisory. |
-| `customCommand` | Only for `verificationMethod: "custom"`. The command the Evaluator should run. |
-### Verification Methods
-| Method | What the Evaluator Does |
-|--------|------------------------|
-| `manual` | Reads source code and assesses whether the criterion is met based on code inspection and logic tracing. |
-| `typecheck` | Runs the configured typecheck command. Criterion passes if zero type errors. |
-| `lint` | Runs the configured lint command. Criterion passes if zero lint errors (warnings OK). |
-| `unit-test` | Runs the configured test command. Criterion passes if all tests pass. |
-| `playwright` | Runs Playwright E2E tests. Criterion passes if all relevant E2E tests pass. |
-| `api-check` | Tests specific API endpoints using curl or similar. Criterion passes if responses match expectations. |
-| `build` | Runs the configured build command. Criterion passes if build succeeds with exit code 0. |
-| `custom` | Runs `customCommand` and interprets the result. Exit code 0 = pass. |
-### Agent Notes
-| Field | Description |
-|-------|-------------|
-| `generatorNotes` | Free-form guidance for the Generator. Should include: key files to examine for patterns, known gotchas, suggested implementation order, references to similar existing code. |
-| `evaluatorNotes` | Free-form guidance for the Evaluator. Should include: specific things to test, edge cases to check, how to verify UI criteria, expected API response shapes. |
-### Estimates
-| Field | Description |
-|-------|-------------|
-| `estimatedFiles` | Array of file paths the Generator is expected to create or modify. This is advisory -- the Generator may touch additional files if needed. The Evaluator uses this to check for unexpected changes. |
-| `estimatedDuration` | Relative size estimate: `small` (30-60 min), `medium` (1-3 hours), `large` (3-5 hours). |
-### Iteration History
-| Field | Description |
-|-------|-------------|
-| `iterationHistory` | Array of past evaluation attempts. Appended after each evaluation. |
-| `lastEvalId` | Reference to the most recent EvalResult. Updated after each evaluation. |
-## Complete Example
-```json
-{
-  "contractId": "sprint-spec-20260326-user-auth-1",
-  "specId": "spec-20260326-user-auth",
-  "sprintNumber": 1,
-  "title": "Implement user registration with form and API",
-  "description": "Create the user registration flow end-to-end: a React registration form with email, password, and confirm-password fields; an Express API endpoint that validates input and creates a user record in PostgreSQL with a bcrypt-hashed password; and basic form validation on both client and server.",
-  "status": "proposed",
-  "createdAt": "2026-03-26T10:00:00Z",
-  "updatedAt": "2026-03-26T10:00:00Z",
-  "completedAt": null,
-  "dependsOn": [],
-  "features": ["feat-1"],
-  "successCriteria": [
-    {
-      "criterionId": "sc-1-1",
-      "description": "The project builds successfully with zero errors.",
-      "verificationMethod": "build",
-      "required": true
-    },
-    {
-      "criterionId": "sc-1-2",
-      "description": "TypeScript compilation produces zero type errors.",
-      "verificationMethod": "typecheck",
-      "required": true
-    },
-    {
-      "criterionId": "sc-1-3",
-      "description": "A registration form component exists at the /register route with email, password, and confirm-password input fields, each with an associated label.",
-      "verificationMethod": "manual",
-      "required": true
-    },
-    {
-      "criterionId": "sc-1-4",
-      "description": "POST /api/auth/register accepts { email, password } and returns 201 with { id, email } on success.",
-      "verificationMethod": "api-check",
-      "required": true
-    },
-    {
-      "criterionId": "sc-1-5",
-      "description": "POST /api/auth/register returns 400 with an error message when email is already registered.",
-      "verificationMethod": "api-check",
-      "required": true
-    },
-    {
-      "criterionId": "sc-1-6",
-      "description": "The password is stored as a bcrypt hash in the database, never in plain text.",
-      "verificationMethod": "manual",
-      "required": true
-    },
-    {
-      "criterionId": "sc-1-7",
-      "description": "Client-side validation shows an error when password is shorter than 8 characters before form submission.",
-      "verificationMethod": "manual",
-      "required": true
-    },
-    {
-      "criterionId": "sc-1-8",
-      "description": "ESLint reports zero errors on all new and modified files.",
-      "verificationMethod": "lint",
-      "required": false
-    }
-  ],
-  "generatorNotes": "Look at existing route definitions in src/routes/ for the Express routing pattern. The project uses Prisma -- check prisma/schema.prisma for the existing schema and add a User model. Use bcrypt (already in package.json) for password hashing. For the React form, follow the pattern in src/components/ -- the project uses controlled components with useState. The registration form should be at src/pages/Register.tsx and the route added to src/App.tsx.",
-  "evaluatorNotes": "For sc-1-3: Read the Register component source and verify it renders three labeled input fields. For sc-1-4 and sc-1-5: Start the dev server and use curl to test the endpoint. For sc-1-6: Read the route handler code and verify bcrypt.hash is called before database insertion. For sc-1-7: Read the form component code and verify client-side validation logic exists for password length.",
-  "estimatedFiles": [
-    "prisma/schema.prisma",
-    "src/routes/auth.ts",
-    "src/pages/Register.tsx",
-    "src/App.tsx"
-  ],
-  "estimatedDuration": "medium",
-  "iterationHistory": [],
-  "lastEvalId": null
-}
-```
-## Writing Good Success Criteria
-### Do
-- Start with an observable action or state: "The form displays...", "The API returns...", "The database contains..."
-- Include specific values: "returns 201", "displays 'Invalid email'", "at least 8 characters"
-- Map each criterion to exactly one verification method
-- Include at least one `build` criterion and one functional criterion per sprint
-- Write criteria the Evaluator can verify without guessing
-### Do Not
-- Use subjective language: "looks good", "works well", "clean code"
-- Combine multiple checks in one criterion (split them)
-- Reference internal implementation details unless checking them IS the criterion
-- Write criteria that require human visual judgment (unless verification method is `manual` and the check is code-inspectable)
-- Assume the Evaluator has context beyond the contract and handoff documents