npm - agent-bober - Versions diffs - 0.1.0 - Mend

agent-bober 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (212) hide show

package/.claude-plugin/plugin.json +9 -0
package/LICENSE +21 -0
package/README.md +495 -0
package/agents/bober-evaluator.md +323 -0
package/agents/bober-generator.md +245 -0
package/agents/bober-planner.md +248 -0
package/dist/cli/commands/eval.d.ts +6 -0
package/dist/cli/commands/eval.d.ts.map +1 -0
package/dist/cli/commands/eval.js +129 -0
package/dist/cli/commands/eval.js.map +1 -0
package/dist/cli/commands/init.d.ts +5 -0
package/dist/cli/commands/init.d.ts.map +1 -0
package/dist/cli/commands/init.js +547 -0
package/dist/cli/commands/init.js.map +1 -0
package/dist/cli/commands/plan.d.ts +5 -0
package/dist/cli/commands/plan.d.ts.map +1 -0
package/dist/cli/commands/plan.js +87 -0
package/dist/cli/commands/plan.js.map +1 -0
package/dist/cli/commands/run.d.ts +5 -0
package/dist/cli/commands/run.d.ts.map +1 -0
package/dist/cli/commands/run.js +120 -0
package/dist/cli/commands/run.js.map +1 -0
package/dist/cli/commands/sprint.d.ts +6 -0
package/dist/cli/commands/sprint.d.ts.map +1 -0
package/dist/cli/commands/sprint.js +206 -0
package/dist/cli/commands/sprint.js.map +1 -0
package/dist/cli/index.d.ts +3 -0
package/dist/cli/index.d.ts.map +1 -0
package/dist/cli/index.js +124 -0
package/dist/cli/index.js.map +1 -0
package/dist/config/defaults.d.ts +15 -0
package/dist/config/defaults.d.ts.map +1 -0
package/dist/config/defaults.js +226 -0
package/dist/config/defaults.js.map +1 -0
package/dist/config/index.d.ts +4 -0
package/dist/config/index.d.ts.map +1 -0
package/dist/config/index.js +8 -0
package/dist/config/index.js.map +1 -0
package/dist/config/loader.d.ts +18 -0
package/dist/config/loader.d.ts.map +1 -0
package/dist/config/loader.js +189 -0
package/dist/config/loader.js.map +1 -0
package/dist/config/schema.d.ts +904 -0
package/dist/config/schema.d.ts.map +1 -0
package/dist/config/schema.js +181 -0
package/dist/config/schema.js.map +1 -0
package/dist/contracts/eval-result.d.ts +205 -0
package/dist/contracts/eval-result.d.ts.map +1 -0
package/dist/contracts/eval-result.js +87 -0
package/dist/contracts/eval-result.js.map +1 -0
package/dist/contracts/index.d.ts +4 -0
package/dist/contracts/index.d.ts.map +1 -0
package/dist/contracts/index.js +16 -0
package/dist/contracts/index.js.map +1 -0
package/dist/contracts/spec.d.ts +101 -0
package/dist/contracts/spec.d.ts.map +1 -0
package/dist/contracts/spec.js +51 -0
package/dist/contracts/spec.js.map +1 -0
package/dist/contracts/sprint-contract.d.ts +141 -0
package/dist/contracts/sprint-contract.d.ts.map +1 -0
package/dist/contracts/sprint-contract.js +80 -0
package/dist/contracts/sprint-contract.js.map +1 -0
package/dist/evaluators/builtin/api-check.d.ts +13 -0
package/dist/evaluators/builtin/api-check.d.ts.map +1 -0
package/dist/evaluators/builtin/api-check.js +152 -0
package/dist/evaluators/builtin/api-check.js.map +1 -0
package/dist/evaluators/builtin/build-check.d.ts +17 -0
package/dist/evaluators/builtin/build-check.d.ts.map +1 -0
package/dist/evaluators/builtin/build-check.js +155 -0
package/dist/evaluators/builtin/build-check.js.map +1 -0
package/dist/evaluators/builtin/command-runner.d.ts +26 -0
package/dist/evaluators/builtin/command-runner.d.ts.map +1 -0
package/dist/evaluators/builtin/command-runner.js +114 -0
package/dist/evaluators/builtin/command-runner.js.map +1 -0
package/dist/evaluators/builtin/lint.d.ts +17 -0
package/dist/evaluators/builtin/lint.d.ts.map +1 -0
package/dist/evaluators/builtin/lint.js +264 -0
package/dist/evaluators/builtin/lint.js.map +1 -0
package/dist/evaluators/builtin/playwright.d.ts +16 -0
package/dist/evaluators/builtin/playwright.d.ts.map +1 -0
package/dist/evaluators/builtin/playwright.js +238 -0
package/dist/evaluators/builtin/playwright.js.map +1 -0
package/dist/evaluators/builtin/typescript-check.d.ts +12 -0
package/dist/evaluators/builtin/typescript-check.d.ts.map +1 -0
package/dist/evaluators/builtin/typescript-check.js +155 -0
package/dist/evaluators/builtin/typescript-check.js.map +1 -0
package/dist/evaluators/builtin/unit-test.d.ts +18 -0
package/dist/evaluators/builtin/unit-test.d.ts.map +1 -0
package/dist/evaluators/builtin/unit-test.js +279 -0
package/dist/evaluators/builtin/unit-test.js.map +1 -0
package/dist/evaluators/index.d.ts +11 -0
package/dist/evaluators/index.d.ts.map +1 -0
package/dist/evaluators/index.js +13 -0
package/dist/evaluators/index.js.map +1 -0
package/dist/evaluators/plugin-interface.d.ts +50 -0
package/dist/evaluators/plugin-interface.d.ts.map +1 -0
package/dist/evaluators/plugin-interface.js +2 -0
package/dist/evaluators/plugin-interface.js.map +1 -0
package/dist/evaluators/plugin-loader.d.ts +18 -0
package/dist/evaluators/plugin-loader.d.ts.map +1 -0
package/dist/evaluators/plugin-loader.js +107 -0
package/dist/evaluators/plugin-loader.js.map +1 -0
package/dist/evaluators/registry.d.ts +78 -0
package/dist/evaluators/registry.d.ts.map +1 -0
package/dist/evaluators/registry.js +238 -0
package/dist/evaluators/registry.js.map +1 -0
package/dist/index.d.ts +17 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +22 -0
package/dist/index.js.map +1 -0
package/dist/orchestrator/context-handoff.d.ts +543 -0
package/dist/orchestrator/context-handoff.d.ts.map +1 -0
package/dist/orchestrator/context-handoff.js +133 -0
package/dist/orchestrator/context-handoff.js.map +1 -0
package/dist/orchestrator/evaluator-agent.d.ts +15 -0
package/dist/orchestrator/evaluator-agent.d.ts.map +1 -0
package/dist/orchestrator/evaluator-agent.js +233 -0
package/dist/orchestrator/evaluator-agent.js.map +1 -0
package/dist/orchestrator/generator-agent.d.ts +16 -0
package/dist/orchestrator/generator-agent.d.ts.map +1 -0
package/dist/orchestrator/generator-agent.js +147 -0
package/dist/orchestrator/generator-agent.js.map +1 -0
package/dist/orchestrator/pipeline.d.ts +24 -0
package/dist/orchestrator/pipeline.d.ts.map +1 -0
package/dist/orchestrator/pipeline.js +290 -0
package/dist/orchestrator/pipeline.js.map +1 -0
package/dist/orchestrator/planner-agent.d.ts +10 -0
package/dist/orchestrator/planner-agent.d.ts.map +1 -0
package/dist/orchestrator/planner-agent.js +187 -0
package/dist/orchestrator/planner-agent.js.map +1 -0
package/dist/state/helpers.d.ts +5 -0
package/dist/state/helpers.d.ts.map +1 -0
package/dist/state/helpers.js +8 -0
package/dist/state/helpers.js.map +1 -0
package/dist/state/history.d.ts +39 -0
package/dist/state/history.d.ts.map +1 -0
package/dist/state/history.js +162 -0
package/dist/state/history.js.map +1 -0
package/dist/state/index.d.ts +8 -0
package/dist/state/index.d.ts.map +1 -0
package/dist/state/index.js +22 -0
package/dist/state/index.js.map +1 -0
package/dist/state/plan-state.d.ts +21 -0
package/dist/state/plan-state.d.ts.map +1 -0
package/dist/state/plan-state.js +108 -0
package/dist/state/plan-state.js.map +1 -0
package/dist/state/sprint-state.d.ts +20 -0
package/dist/state/sprint-state.d.ts.map +1 -0
package/dist/state/sprint-state.js +98 -0
package/dist/state/sprint-state.js.map +1 -0
package/dist/utils/fs.d.ts +31 -0
package/dist/utils/fs.d.ts.map +1 -0
package/dist/utils/fs.js +67 -0
package/dist/utils/fs.js.map +1 -0
package/dist/utils/git.d.ts +35 -0
package/dist/utils/git.d.ts.map +1 -0
package/dist/utils/git.js +84 -0
package/dist/utils/git.js.map +1 -0
package/dist/utils/index.d.ts +4 -0
package/dist/utils/index.d.ts.map +1 -0
package/dist/utils/index.js +4 -0
package/dist/utils/index.js.map +1 -0
package/dist/utils/logger.d.ts +45 -0
package/dist/utils/logger.d.ts.map +1 -0
package/dist/utils/logger.js +73 -0
package/dist/utils/logger.js.map +1 -0
package/hooks/hooks.json +10 -0
package/package.json +67 -0
package/scripts/detect-stack.sh +287 -0
package/scripts/init-project.sh +206 -0
package/scripts/run-eval.sh +175 -0
package/skills/bober.anchor/SKILL.md +365 -0
package/skills/bober.anchor/references/anchor-guide.md +567 -0
package/skills/bober.brownfield/SKILL.md +422 -0
package/skills/bober.brownfield/references/codebase-analysis.md +304 -0
package/skills/bober.eval/SKILL.md +235 -0
package/skills/bober.eval/references/eval-strategies.md +407 -0
package/skills/bober.eval/references/feedback-format.md +182 -0
package/skills/bober.plan/SKILL.md +244 -0
package/skills/bober.plan/references/clarification-guide.md +124 -0
package/skills/bober.plan/references/spec-schema.md +253 -0
package/skills/bober.react/SKILL.md +330 -0
package/skills/bober.react/references/react-scaffold.md +344 -0
package/skills/bober.run/SKILL.md +303 -0
package/skills/bober.solidity/SKILL.md +416 -0
package/skills/bober.solidity/references/solidity-guide.md +487 -0
package/skills/bober.sprint/SKILL.md +280 -0
package/skills/bober.sprint/references/contract-schema.md +251 -0
package/templates/base/CLAUDE.md +20 -0
package/templates/base/bober.config.json +35 -0
package/templates/brownfield/CLAUDE.md +34 -0
package/templates/brownfield/bober.config.json +37 -0
package/templates/presets/anchor/CLAUDE.md +163 -0
package/templates/presets/anchor/bober.config.json +9 -0
package/templates/presets/api-node/CLAUDE.md +153 -0
package/templates/presets/api-node/bober.config.json +10 -0
package/templates/presets/nextjs/CLAUDE.md +82 -0
package/templates/presets/nextjs/bober.config.json +14 -0
package/templates/presets/python-api/CLAUDE.md +202 -0
package/templates/presets/python-api/bober.config.json +9 -0
package/templates/presets/react-vite/CLAUDE.md +71 -0
package/templates/presets/react-vite/bober.config.json +53 -0
package/templates/presets/react-vite/scaffold/package.json +45 -0
package/templates/presets/react-vite/scaffold/server/index.ts +38 -0
package/templates/presets/react-vite/scaffold/server/tsconfig.json +24 -0
package/templates/presets/react-vite/scaffold/src/App.tsx +37 -0
package/templates/presets/react-vite/scaffold/src/index.html +12 -0
package/templates/presets/react-vite/scaffold/src/main.tsx +12 -0
package/templates/presets/react-vite/scaffold/tsconfig.json +27 -0
package/templates/presets/react-vite/scaffold/vite.config.ts +34 -0
package/templates/presets/solidity/CLAUDE.md +106 -0
package/templates/presets/solidity/bober.config.json +9 -0

package/skills/bober.brownfield/references/codebase-analysis.md ADDED Viewed

@@ -0,0 +1,304 @@
+# Codebase Analysis Methodology
+This document describes how to perform a thorough analysis of an existing codebase before planning brownfield changes. A complete analysis prevents regressions, ensures pattern compliance, and correctly sizes sprint contracts.
+## Analysis Phases
+### Phase 1: Surface-Level Survey (5 minutes)
+Get the big picture without reading any code.
+**1. File structure survey:**
+```
+Use Glob with broad patterns to understand the layout:
+  src/**/*
+  app/**/*
+  server/**/*
+  lib/**/*
+  tests/**/*
+  e2e/**/*
+```
+Questions to answer:
+- Is this a monorepo or single project?
+- What is the top-level organization? (feature folders, layer folders, hybrid)
+- How many source files are there? (rough scale: tens, hundreds, thousands)
+- Where do tests live? (co-located, separate directory, both)
+**2. Package/dependency analysis:**
+Read `package.json` (or equivalent) and categorize dependencies:
+- Framework (React, Vue, Angular, Express, Fastify, etc.)
+- ORM/database (Prisma, Drizzle, TypeORM, Mongoose, etc.)
+- State management (Redux, Zustand, MobX, Recoil, etc.)
+- UI library (shadcn, Material UI, Chakra, Ant Design, etc.)
+- Testing (vitest, jest, mocha, playwright, cypress, etc.)
+- Build tools (vite, webpack, esbuild, turbopack, etc.)
+- Utilities (lodash, date-fns, zod, etc.)
+**3. Configuration file scan:**
+Check for and read:
+- `tsconfig.json` / `jsconfig.json` — Compiler settings, path aliases, strict mode
+- `vite.config.ts` / `next.config.js` / `webpack.config.js` — Build configuration
+- `eslint.config.js` / `.eslintrc.*` / `biome.json` — Linting rules
+- `tailwind.config.ts` — CSS configuration
+- `prisma/schema.prisma` / `drizzle.config.ts` — Database configuration
+- `.env.example` — Environment variables (reveals integrations and services)
+- `Dockerfile` / `docker-compose.yml` — Container configuration
+- `.github/workflows/*.yml` — CI/CD pipeline
+### Phase 2: Architecture Mapping (10 minutes)
+Understand how the system is organized and how data flows.
+**1. Entry points:**
+Identify the application's entry points:
+- Frontend: `main.tsx`, `App.tsx`, `pages/_app.tsx`, `app/layout.tsx`
+- Backend: `server/index.ts`, `src/app.ts`, `main.py`
+- CLI: `bin/`, `cli/`
+Read each entry point to understand the boot sequence: what middleware is loaded, what routes are registered, what providers wrap the app.
+**2. Routing map:**
+Frontend routes:
+```
+Use Grep to find route definitions:
+  Pattern: "path.*:.*/" or "Route.*path" or "<Route" (React Router)
+  Pattern: "app/" directory structure (Next.js App Router)
+  Pattern: "pages/" directory structure (Next.js Pages Router)
+```
+Backend routes:
+```
+Use Grep to find API route definitions:
+  Pattern: "app\.(get|post|put|delete|patch)" (Express)
+  Pattern: "router\.(get|post|put|delete|patch)" (Express Router)
+  Pattern: "@(Get|Post|Put|Delete|Patch)" (NestJS decorators)
+  Pattern: "@app\.(get|post|put|delete|patch)" (FastAPI)
+```
+Produce a route table:
+```
+Frontend Routes:
+  /              -> pages/Home.tsx
+  /login         -> pages/Login.tsx
+  /dashboard     -> pages/Dashboard.tsx (protected)
+  /settings      -> pages/Settings.tsx (protected)
+Backend Routes:
+  GET    /api/users        -> routes/users.ts:getUsers
+  POST   /api/users        -> routes/users.ts:createUser
+  GET    /api/users/:id    -> routes/users.ts:getUser
+  PUT    /api/users/:id    -> routes/users.ts:updateUser
+  DELETE /api/users/:id    -> routes/users.ts:deleteUser
+  POST   /api/auth/login   -> routes/auth.ts:login
+  POST   /api/auth/logout  -> routes/auth.ts:logout
+```
+**3. Database schema map:**
+Read the ORM schema and produce an entity relationship summary:
+```
+Models:
+  User:        id, email, passwordHash, name, createdAt, updatedAt
+  Post:        id, title, content, authorId -> User, createdAt, updatedAt
+  Comment:     id, content, postId -> Post, authorId -> User, createdAt
+Relationships:
+  User 1:N Post   (author)
+  User 1:N Comment (author)
+  Post 1:N Comment
+```
+**4. Middleware/interceptor chain:**
+For backend apps, trace the middleware chain:
+```
+Request -> cors -> helmet -> bodyParser -> authMiddleware -> routeHandler -> errorHandler -> Response
+```
+For frontend apps, trace the provider chain:
+```
+<StrictMode>
+  <QueryClientProvider>
+    <AuthProvider>
+      <ThemeProvider>
+        <RouterProvider>
+          <App />
+```
+### Phase 3: Pattern Extraction (10 minutes)
+Read 3-5 representative files of each type to extract patterns.
+**1. Component patterns (frontend):**
+Read several components and note:
+- Function declaration style: `function Component()` or `const Component = () =>`
+- Props typing: `interface Props {}` or `type Props = {}` or inline
+- State management: useState, useReducer, store hook
+- Data fetching: useEffect + fetch, React Query, SWR, server components
+- Styling: className strings, CSS modules, styled-components, Tailwind
+- File structure: imports, types, component, exports (in what order?)
+**2. Route handler patterns (backend):**
+Read several route handlers and note:
+- Handler style: direct function, controller class, handler + service pattern
+- Request validation: Zod, Joi, class-validator, manual
+- Response format: JSON shape, status codes, error format
+- Error handling: try/catch, error middleware, either pattern
+- Database access: direct ORM calls or through a service layer?
+**3. Test patterns:**
+Read several test files and note:
+- Test structure: describe/it, test(), or BDD-style
+- Assertion library: expect (vitest/jest), assert, chai
+- Mocking approach: vi.mock, jest.mock, manual mocks
+- Test data: factories, fixtures, inline objects
+- Setup/teardown: beforeEach/afterEach patterns
+**4. Import conventions:**
+Note:
+- Absolute imports (`@/lib/utils`) vs relative (`../../lib/utils`)
+- Barrel imports (`from '@/components'`) vs direct (`from '@/components/Button'`)
+- Type imports: `import type { X }` vs `import { X }`
+- Import ordering: external first, then internal? Alphabetical?
+### Phase 4: Health Assessment (5 minutes)
+Assess the current health of the codebase.
+**1. Test coverage:**
+```bash
+# Count test files
+find src -name "*.test.*" | wc -l
+find tests -name "*.test.*" 2>/dev/null | wc -l
+# Count source files (to calculate ratio)
+find src -name "*.ts" -not -name "*.test.*" -not -name "*.d.ts" | wc -l
+# Run tests to get current status
+npm test 2>&1 | tail -20
+```
+**2. Type safety:**
+```bash
+# Check for any existing type errors
+npx tsc --noEmit 2>&1 | tail -20
+# Check for `any` usage (indicates weak typing)
+grep -r ": any" src/ --include="*.ts" --include="*.tsx" | wc -l
+```
+**3. Code quality indicators:**
+```bash
+# Check for TODO/FIXME/HACK comments
+grep -r "TODO\|FIXME\|HACK\|XXX" src/ --include="*.ts" --include="*.tsx" | wc -l
+# Check for console.log statements
+grep -r "console\.log" src/ --include="*.ts" --include="*.tsx" | wc -l
+# Check linting status
+npm run lint 2>&1 | tail -10
+```
+**4. Git health:**
+```bash
+# Recent activity (who's working on what)
+git log --oneline --since="2 weeks ago" | head -20
+# Files with most recent changes (hot spots)
+git log --name-only --since="1 month ago" --pretty=format: | sort | uniq -c | sort -rn | head -20
+# Check for uncommitted changes
+git status --porcelain
+```
+### Phase 5: Risk Map
+Combine the analysis into a risk assessment:
+**High-risk areas** (modify with extreme caution):
+- Files imported by >10 other files (high coupling)
+- Files with no test coverage
+- Files with recent high churn (many recent commits)
+- Shared utilities and middleware
+- Database schema (migrations affect everything)
+- Authentication/authorization code
+**Medium-risk areas** (modify carefully with tests):
+- Components used on multiple pages
+- API route handlers with complex business logic
+- Configuration files
+- Shared types/interfaces
+**Low-risk areas** (safe to modify):
+- Isolated page components
+- New files that don't modify existing code
+- Test files
+- Documentation
+## Output Format
+The codebase analysis should produce a structured summary that is saved to `.bober/codebase-analysis.json` (or included in the PlanSpec's `techNotes.existingPatterns`) and referenced by all sprint contracts:
+```json
+{
+  "timestamp": "<ISO-8601>",
+  "commit": "<git commit hash>",
+  "techStack": {
+    "language": "TypeScript 5.x",
+    "frontend": "React 18, Vite, React Router v6",
+    "backend": "Express.js",
+    "database": "PostgreSQL via Prisma",
+    "styling": "Tailwind CSS + shadcn/ui",
+    "testing": "Vitest (unit), Playwright (E2E)",
+    "cicd": "GitHub Actions"
+  },
+  "architecture": {
+    "pattern": "feature-based with shared lib/",
+    "frontendRoutes": 8,
+    "backendEndpoints": 15,
+    "dbModels": 5
+  },
+  "health": {
+    "testFiles": 23,
+    "sourceFiles": 67,
+    "testCoverageRatio": 0.34,
+    "typeErrors": 0,
+    "lintErrors": 3,
+    "todoComments": 12,
+    "anyUsage": 4
+  },
+  "patterns": {
+    "componentStyle": "Arrow function components with Props interface",
+    "stateManagement": "Zustand for global state, useState for local",
+    "dataFetching": "TanStack Query with custom hooks in src/hooks/",
+    "apiCalls": "Fetch wrapper in src/lib/api.ts",
+    "errorHandling": "Error boundaries + toast notifications",
+    "testStyle": "describe/it blocks with @testing-library/react",
+    "importStyle": "Absolute imports with @/ prefix, type imports separated"
+  },
+  "highRiskFiles": [
+    "src/lib/api.ts (imported by 23 files)",
+    "src/middleware/auth.ts (all protected routes depend on this)",
+    "prisma/schema.prisma (database schema)"
+  ]
+}
+```
+## Tips for Effective Analysis
+1. **Read the README first.** It often explains the architecture and setup process.
+2. **Check CLAUDE.md or CONTRIBUTING.md.** These may have explicit instructions about code patterns.
+3. **Look at recent PRs** (if accessible) to understand the team's expectations.
+4. **Do not analyze every file.** Sample 3-5 representative files per category. If the first 3 components all use the same pattern, you can assume the rest do too.
+5. **Pay attention to the `.gitignore`.** It tells you what's generated vs. authored.
+6. **Check for a monorepo tool.** `turbo.json`, `nx.json`, `pnpm-workspace.yaml`, `lerna.json` indicate monorepo structure.
+7. **Look for a design system.** Check `src/components/ui/` or similar. If a design system exists, all new UI must use it.

package/skills/bober.eval/SKILL.md ADDED Viewed

@@ -0,0 +1,235 @@
+---
+name: bober.eval
+description: Run an independent evaluation of the current sprint state against its contract, producing structured pass/fail feedback.
+argument-hint: "[contract-id]"
+---
+# bober.eval — Standalone Evaluation Skill
+You are running the **bober.eval** skill. Your job is to independently evaluate the current state of a sprint implementation against its contract and produce structured feedback. This skill can be run at any time, independently of the sprint execution loop.
+## When to Use This Skill
+- **During development:** To check your progress against criteria before running the full sprint loop
+- **After manual changes:** When you have fixed something the Generator produced and want to re-evaluate
+- **For debugging:** To understand exactly what is passing and failing in a sprint
+- **As a standalone QA check:** To evaluate any codebase state against a sprint contract
+## Step 1: Identify the Target Contract
+**If a contract ID was provided as an argument:**
+- Load the contract from `.bober/contracts/<contractId>.json`
+- Verify it exists
+**If no contract ID was provided:**
+- Load the most recent PlanSpec from `.bober/specs/`
+- Find the most recent sprint contract with status `in-progress` or `needs-rework`
+- If none are in-progress, find the first `proposed` contract
+- If all are `completed`, tell the user there is nothing to evaluate
+Read the contract and its parent PlanSpec.
+## Step 2: Load Configuration
+Read `bober.config.json` and extract:
+- `evaluator.strategies`: The configured evaluation strategies
+- `evaluator.model`: The model to use (informational)
+- `commands`: The project commands for build, test, lint, typecheck
+## Step 3: Pre-Flight Checks
+Before running evaluation strategies, verify the environment:
+1. **Check if dependencies are installed:**
+   ```bash
+   # Check for installed dependencies (varies by stack)
+   # Node.js: ls node_modules/.package-lock.json 2>/dev/null
+   # Rust/Anchor: check target/ directory
+   # Solidity/Hardhat: ls node_modules/.package-lock.json 2>/dev/null
+   # Solidity/Foundry: check lib/ directory
+   # Python: check venv or .venv
+   ```
+   If dependencies are not installed, run the configured install command first.
+2. **Check the current git branch:**
+   ```bash
+   git branch --show-current
+   ```
+   Note the branch for the evaluation report.
+3. **Check for uncommitted changes:**
+   ```bash
+   git status --porcelain
+   ```
+   Note any uncommitted changes in the report. The evaluation should still proceed, but this is important context.
+## Step 4: Execute Evaluation Strategies
+Run each strategy configured in `evaluator.strategies` from the config. Execute them in this order for fastest feedback on failures:
+### Priority 1: Build/Compile Verification
+```bash
+# Use commands.build from config (varies by stack)
+# e.g., npm run build, anchor build, forge build, cargo build, etc.
+```
+- Record the full output
+- If the build fails, most other checks are unreliable -- still run them but note this
+### Priority 2: Type Checking / Static Analysis
+```bash
+# Use commands.typecheck from config (varies by stack)
+# e.g., npx tsc --noEmit, cargo clippy, solhint, mypy, etc.
+```
+- Record every type error with file path and line number
+- Count total errors
+### Priority 3: Linting
+```bash
+# Use commands.lint from config (varies by stack)
+# e.g., npm run lint, solhint, clippy, ruff, etc.
+```
+- Record every lint error (ignore warnings unless they indicate real problems)
+- Count total errors
+### Priority 4: Unit Tests
+```bash
+# Use commands.test from config (varies by stack)
+# e.g., npm test, anchor test, forge test, pytest, etc.
+```
+- Record which tests passed and which failed
+- For failures, record the test name, expected vs actual output, and file location
+- Check if any pre-existing tests broke (regression)
+### Priority 5: E2E Tests (Playwright)
+```bash
+# Only run if configured and installed
+npx playwright test 2>&1
+```
+- If Playwright is not installed, mark as `skipped` (not `failed`)
+- Record which tests passed and failed
+- Note if screenshots are available
+### Priority 6: API Checks
+- If the contract has API-related success criteria, start the dev server and test endpoints:
+  ```bash
+  # Start dev server in background
+  # Test endpoints with curl
+  curl -s -w "\n%{http_code}" http://localhost:<port>/api/<endpoint>
+  ```
+- Record response status codes and body shapes
+### Priority 7: Custom Strategies
+- For each strategy with `type: "custom"`, execute the command from the strategy's `config` field
+- Record the output and exit code
+**For each strategy, record:**
+```json
+{
+  "strategy": "<type>",
+  "required": true,
+  "result": "pass | fail | skipped",
+  "exitCode": 0,
+  "output": "<relevant output>",
+  "errorCount": 0,
+  "details": "<explanation>"
+}
+```
+## Step 5: Verify Success Criteria
+Go through EVERY success criterion in the contract, one by one.
+For each criterion:
+1. **Read the criterion and its verification method**
+2. **Gather evidence:**
+   - For `build`/`typecheck`/`lint`/`unit-test`/`playwright`: Use the strategy results from Step 4
+   - For `manual`: Read the relevant source files. Trace the code path. Verify the described behavior exists in the code.
+   - For `api-check`: Test the specific endpoint described in the criterion
+   - For `custom`: Run the custom command
+3. **Make a judgment: pass, fail, or skipped**
+4. **Record evidence supporting the judgment**
+**Judgment rules:**
+- `pass`: You have concrete evidence the criterion is met
+- `fail`: You have concrete evidence the criterion is NOT met, or you cannot find evidence that it IS met
+- `skipped`: The verification method cannot be executed (e.g., Playwright not installed)
+**A criterion marked `required: true` MUST have a definitive pass or fail. It cannot be skipped.**
+## Step 6: Check for Regressions
+Beyond the contract criteria, check for broader regressions:
+1. **Pre-existing test count:** If you can determine how many tests existed before the sprint, compare to the current count. Fewer passing tests = regression.
+2. **Build stability:** Does the full project build, not just the new code?
+3. **Unexpected file changes:** Use `git diff --stat` to see all changed files. Flag any files changed that are NOT in the contract's `estimatedFiles`.
+## Step 7: Produce the EvalResult
+Generate the structured evaluation result following the schema in `skills/bober.eval/references/feedback-format.md`.
+**Overall result determination:**
+- **PASS:** ALL required strategies passed AND ALL required criteria passed AND no critical regressions
+- **FAIL:** ANY required strategy failed OR ANY required criterion failed OR critical regression found
+Save the EvalResult to `.bober/eval-results/eval-<contractId>-<iteration>.json`.
+If this is the first evaluation for this contract, iteration = 1. Otherwise, read the contract's `iterationHistory` to determine the next iteration number.
+Append to `.bober/history.jsonl`:
+```json
+{"event":"eval-completed","contractId":"...","evalId":"...","result":"pass|fail","timestamp":"..."}
+```
+## Step 8: Output Report
+Present results in a clear, human-readable format:
+```
+## Evaluation Report: <sprint title>
+**Contract:** <contractId>
+**Iteration:** <N>
+**Result:** PASS / FAIL
+**Branch:** <current branch>
+**Uncommitted changes:** yes/no
+### Strategy Results
+| Strategy | Required | Result |
+|----------|----------|--------|
+| build    | yes      | PASS   |
+| typecheck| yes      | PASS   |
+| lint     | yes      | FAIL (3 errors) |
+| unit-test| yes      | PASS (12/12 tests) |
+### Success Criteria
+| ID | Description | Required | Result |
+|----|-------------|----------|--------|
+| sc-1-1 | Project builds successfully | yes | PASS |
+| sc-1-2 | Registration form exists at /register | yes | PASS |
+| sc-1-3 | API returns 201 on valid registration | yes | FAIL |
+...
+### Failures (if any)
+**sc-1-3: API returns 201 on valid registration**
+- What failed: POST /api/auth/register returns 500 instead of 201
+- Where: src/routes/auth.ts:42
+- Evidence: `curl -X POST http://localhost:3000/api/auth/register -H "Content-Type: application/json" -d '{"email":"test@test.com","password":"password123"}' returned 500 with error "relation users does not exist"`
+- Expected: 201 with `{ id, email }` response body
+- Root cause: The database migration has not been run. The users table does not exist.
+### Regressions (if any)
+- <description>
+### Summary
+<2-3 sentence summary>
+```
+## Anti-Leniency Reminders
+- If a criterion says "the form displays an error message" and you can only verify the validation logic exists in code but cannot confirm the message renders, mark it as **fail** with a note about what you could not verify.
+- If the build has warnings that look like potential runtime errors (e.g., unused imports of things that should be used), flag them even if the build technically passes.
+- If a test passes but the test itself is trivial (e.g., `expect(true).toBe(true)`), note this in the report. A passing trivial test does not satisfy a functional criterion.
+- If the Generator's self-report says something works but you find evidence it does not, trust your evidence over the report.