@markus-global/cli 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/start.js +16 -6
- package/dist/commands/start.js.map +1 -1
- package/dist/markus.mjs +926 -818
- package/dist/web-ui/assets/index-CvTg0RPT.css +1 -0
- package/dist/web-ui/assets/index-DJ4hiBa1.js +61 -0
- package/dist/web-ui/index.html +2 -2
- package/package.json +1 -1
- package/templates/roles/agent-father/ROLE.md +15 -1
- package/templates/roles/developer/ROLE.md +61 -16
- package/templates/roles/devops/ROLE.md +26 -4
- package/templates/roles/project-manager/ROLE.md +59 -0
- package/templates/roles/qa-engineer/ROLE.md +36 -8
- package/templates/roles/research-assistant/ROLE.md +29 -2
- package/templates/roles/reviewer/ROLE.md +14 -7
- package/templates/roles/team-factory/ROLE.md +18 -1
- package/templates/skills/agent-building/SKILL.md +4 -2
- package/templates/skills/team-building/SKILL.md +25 -5
- package/templates/teams/content-team/ANNOUNCEMENT.md +20 -6
- package/templates/teams/content-team/NORMS.md +42 -14
- package/templates/teams/content-team/team.json +31 -6
- package/templates/teams/dev-squad/ANNOUNCEMENT.md +17 -6
- package/templates/teams/dev-squad/NORMS.md +60 -20
- package/templates/teams/dev-squad/team.json +38 -7
- package/templates/teams/engineering-pod/ANNOUNCEMENT.md +26 -0
- package/templates/teams/engineering-pod/NORMS.md +78 -0
- package/templates/teams/engineering-pod/team.json +50 -0
- package/templates/teams/research-lab/ANNOUNCEMENT.md +25 -0
- package/templates/teams/research-lab/NORMS.md +88 -0
- package/templates/teams/research-lab/team.json +43 -0
- package/templates/teams/startup-team/ANNOUNCEMENT.md +20 -7
- package/templates/teams/startup-team/NORMS.md +57 -19
- package/templates/teams/startup-team/team.json +24 -8
- package/dist/web-ui/assets/index-Bcc58A3R.css +0 -1
- package/dist/web-ui/assets/index-DuLIQUDd.js +0 -61
|
@@ -2,17 +2,42 @@
|
|
|
2
2
|
"type": "team",
|
|
3
3
|
"name": "content-team",
|
|
4
4
|
"displayName": "Content Creation Team",
|
|
5
|
-
"version": "
|
|
6
|
-
"description": "A
|
|
5
|
+
"version": "2.0.0",
|
|
6
|
+
"description": "A research-backed content team with a structured pipeline: Research → Brief → Draft → Edit → Publish. Features parallel writing with independent workstreams, fact-checking via subagent research, and editorial review gates. Ideal for documentation projects, marketing campaigns, technical writing, and content operations.",
|
|
7
7
|
"author": "Markus Team",
|
|
8
8
|
"category": "productivity",
|
|
9
|
-
"tags": ["content", "writing", "marketing", "documentation", "creative"],
|
|
9
|
+
"tags": ["content", "writing", "marketing", "documentation", "creative", "editorial"],
|
|
10
10
|
"icon": "edit",
|
|
11
11
|
"team": {
|
|
12
12
|
"members": [
|
|
13
|
-
{
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
{
|
|
14
|
+
"name": "Editor-in-Chief",
|
|
15
|
+
"role": "manager",
|
|
16
|
+
"roleName": "project-manager",
|
|
17
|
+
"count": 1,
|
|
18
|
+
"skills": ["markus-project-cli", "self-evolution"]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"name": "Senior Writer",
|
|
22
|
+
"role": "worker",
|
|
23
|
+
"roleName": "content-writer",
|
|
24
|
+
"count": 1,
|
|
25
|
+
"skills": ["self-evolution"]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"name": "Technical Writer",
|
|
29
|
+
"role": "worker",
|
|
30
|
+
"roleName": "tech-writer",
|
|
31
|
+
"count": 1,
|
|
32
|
+
"skills": ["self-evolution"]
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"name": "Research Analyst",
|
|
36
|
+
"role": "worker",
|
|
37
|
+
"roleName": "research-assistant",
|
|
38
|
+
"count": 1,
|
|
39
|
+
"skills": ["self-evolution"]
|
|
40
|
+
}
|
|
16
41
|
]
|
|
17
42
|
}
|
|
18
43
|
}
|
|
@@ -1,11 +1,22 @@
|
|
|
1
1
|
# Development Squad
|
|
2
2
|
|
|
3
|
-
Welcome to the Development Squad. We
|
|
3
|
+
Welcome to the Development Squad. We deliver production-grade software through structured, parallel workflows.
|
|
4
|
+
|
|
5
|
+
## How We Work
|
|
6
|
+
1. **Tech Lead** decomposes requirements into tasks with clear ownership boundaries and dependencies.
|
|
7
|
+
2. **Developers** implement in parallel using isolated worktrees — no file conflicts.
|
|
8
|
+
3. **Code Reviewer** validates every change against spec and quality standards.
|
|
9
|
+
4. **QA Engineer** runs validation for tasks requiring integration/regression testing.
|
|
4
10
|
|
|
5
11
|
## Current Focus
|
|
6
|
-
- Awaiting
|
|
12
|
+
- Awaiting project assignment. Once a project is onboarded, the Tech Lead will analyze the codebase, define module ownership, and create the first task batch.
|
|
7
13
|
|
|
8
|
-
##
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
14
|
+
## Key Capabilities
|
|
15
|
+
- **Worktree isolation**: Each task runs in its own git worktree. Parallel work without conflicts.
|
|
16
|
+
- **Subagent analysis**: Use `spawn_subagent` for deep dives — codebase analysis, refactoring plans, test generation.
|
|
17
|
+
- **Background execution**: Long builds and test suites run in background with automatic notifications.
|
|
18
|
+
- **Dependency-aware scheduling**: Tasks with `blockedBy` are auto-scheduled when dependencies complete.
|
|
19
|
+
- **Two-stage review**: Code review for correctness, QA for functional validation.
|
|
20
|
+
|
|
21
|
+
## Getting Started
|
|
22
|
+
All work flows through the task system. Submit requirements, and the Tech Lead handles the rest.
|
|
@@ -1,22 +1,62 @@
|
|
|
1
1
|
# Development Squad — Working Norms
|
|
2
2
|
|
|
3
|
-
##
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
-
|
|
16
|
-
-
|
|
17
|
-
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
-
|
|
22
|
-
-
|
|
3
|
+
## Workflow Phases
|
|
4
|
+
|
|
5
|
+
### 1. Plan (Tech Lead)
|
|
6
|
+
- Decompose requirements into tasks with clear acceptance criteria.
|
|
7
|
+
- Define **file/module ownership** per developer — each task must specify which directories or modules are in scope. Overlap causes merge conflicts.
|
|
8
|
+
- Set task dependencies via `blockedBy` — a task that needs another's API or schema should depend on it.
|
|
9
|
+
- Use `spawn_subagent` for deep architecture analysis before committing to a plan: codebase exploration, dependency audits, risk assessment.
|
|
10
|
+
- Target **5–6 tasks per developer** per sprint cycle. Too few = idle time. Too many = context switching overhead.
|
|
11
|
+
|
|
12
|
+
### 2. Implement (Developers)
|
|
13
|
+
- Each developer works in an **isolated worktree** — the system creates `task/<id>` branches automatically.
|
|
14
|
+
- Write tests first (TDD) for new features. For bug fixes, write a failing test that reproduces the issue before fixing.
|
|
15
|
+
- Use `spawn_subagent` for focused subtasks: generating boilerplate, analyzing a complex function, researching an API. This keeps your main context clean.
|
|
16
|
+
- Run `background_exec` for test suites and builds — you'll be notified when they complete.
|
|
17
|
+
- Use `subtask_create` to track progress within a task. Complete subtasks as you go.
|
|
18
|
+
- Submit via `task_submit_review` when done. Include a summary of changes in task notes.
|
|
19
|
+
|
|
20
|
+
### 3. Review & Merge (Code Reviewer)
|
|
21
|
+
- **Stage 1 — Spec compliance**: Does the implementation match the task's acceptance criteria? Are edge cases handled?
|
|
22
|
+
- **Stage 2 — Code quality**: Architecture alignment, naming, error handling, test coverage, performance concerns.
|
|
23
|
+
- Use `spawn_subagent` to deeply analyze complex changes without polluting your review context.
|
|
24
|
+
- Leave structured notes via `task_note` — every review produces a trail.
|
|
25
|
+
- **On approval**: Merge the task branch via `shell_execute` (`git merge` or `gh pr create` + `gh pr merge`), then complete the task.
|
|
26
|
+
- **On merge conflict**: Reject the task with conflict details. The developer resolves conflicts in their worktree and re-submits for review.
|
|
27
|
+
- **On rejection**: Task returns to `in_progress` with specific change requests.
|
|
28
|
+
|
|
29
|
+
### 4. Validate (QA Engineer)
|
|
30
|
+
- For tasks marked with QA requirements, run integration and regression tests.
|
|
31
|
+
- Verify functional correctness, edge cases, and cross-browser/cross-platform behavior.
|
|
32
|
+
- Report bugs as new tasks with `blockedBy` referencing the original task.
|
|
33
|
+
- Use `background_exec` for long-running test suites.
|
|
34
|
+
|
|
35
|
+
## File Ownership Rules
|
|
36
|
+
|
|
37
|
+
This is the most important rule for parallel development:
|
|
38
|
+
- **Each developer owns different directories/modules.** Overlap = conflicts.
|
|
39
|
+
- Tech Lead defines ownership in the task description. Example: "Backend Dev owns `src/api/` and `src/models/`. Frontend Dev owns `src/components/` and `src/pages/`."
|
|
40
|
+
- Shared files (e.g., types, configs) should be changed in a dependency task that others `blockedBy`.
|
|
41
|
+
- If you must edit a file outside your scope, coordinate via `agent_send_message` first.
|
|
42
|
+
|
|
43
|
+
## Communication Protocols
|
|
44
|
+
|
|
45
|
+
- **Status broadcasts**: Use `agent_broadcast_status` when starting/finishing a task.
|
|
46
|
+
- **Blocking issues**: Message the Tech Lead immediately via `agent_send_message`. Don't wait for heartbeat.
|
|
47
|
+
- **Interface contracts**: When one developer's API is needed by another, publish the interface as a `deliverable_create` (type: "convention") before implementing.
|
|
48
|
+
- **Review requests**: Submit via `task_submit_review`. The reviewer is notified automatically.
|
|
49
|
+
|
|
50
|
+
## Quality Standards
|
|
51
|
+
|
|
52
|
+
- All new code must have test coverage. No exceptions for production paths.
|
|
53
|
+
- Follow existing code conventions — use `spawn_subagent` to analyze the project's patterns if unsure.
|
|
54
|
+
- Commits must be focused (one logical change) and well-described.
|
|
55
|
+
- Security-sensitive changes (auth, crypto, input validation) require explicit review notes.
|
|
56
|
+
- Performance-critical paths should include benchmark data in task notes.
|
|
57
|
+
|
|
58
|
+
## Knowledge Capture
|
|
59
|
+
|
|
60
|
+
- Document non-obvious decisions as `deliverable_create` (type: "architecture_decision").
|
|
61
|
+
- Save reusable patterns and gotchas via `memory_save` with appropriate tags.
|
|
62
|
+
- After completing a complex task, record lessons learned during the self-evolution reflection.
|
|
@@ -2,18 +2,49 @@
|
|
|
2
2
|
"type": "team",
|
|
3
3
|
"name": "dev-squad",
|
|
4
4
|
"displayName": "Development Squad",
|
|
5
|
-
"version": "
|
|
6
|
-
"description": "A
|
|
5
|
+
"version": "2.0.0",
|
|
6
|
+
"description": "A high-performance development team that follows a structured Plan → Implement → Review → Validate workflow. Features parallel implementation with worktree isolation, two-stage review (code + QA), subagent-driven deep analysis, and dependency-aware task scheduling. Suitable for feature sprints, refactoring campaigns, and production-grade software delivery.",
|
|
7
7
|
"author": "Markus Team",
|
|
8
8
|
"category": "development",
|
|
9
|
-
"tags": ["development", "agile", "sprint", "feature", "software"],
|
|
9
|
+
"tags": ["development", "agile", "sprint", "feature", "software", "tdd", "code-review"],
|
|
10
10
|
"icon": "users",
|
|
11
11
|
"team": {
|
|
12
12
|
"members": [
|
|
13
|
-
{
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
{
|
|
14
|
+
"name": "Tech Lead",
|
|
15
|
+
"role": "manager",
|
|
16
|
+
"roleName": "project-manager",
|
|
17
|
+
"count": 1,
|
|
18
|
+
"skills": ["markus-project-cli", "team-building"]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"name": "Backend Developer",
|
|
22
|
+
"role": "worker",
|
|
23
|
+
"roleName": "developer",
|
|
24
|
+
"count": 1,
|
|
25
|
+
"skills": ["self-evolution"]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"name": "Frontend Developer",
|
|
29
|
+
"role": "worker",
|
|
30
|
+
"roleName": "developer",
|
|
31
|
+
"count": 1,
|
|
32
|
+
"skills": ["chrome-devtools", "self-evolution"]
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"name": "Code Reviewer",
|
|
36
|
+
"role": "worker",
|
|
37
|
+
"roleName": "reviewer",
|
|
38
|
+
"count": 1,
|
|
39
|
+
"skills": ["self-evolution"]
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"name": "QA Engineer",
|
|
43
|
+
"role": "worker",
|
|
44
|
+
"roleName": "qa-engineer",
|
|
45
|
+
"count": 1,
|
|
46
|
+
"skills": ["self-evolution"]
|
|
47
|
+
}
|
|
17
48
|
]
|
|
18
49
|
}
|
|
19
50
|
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Engineering Pod
|
|
2
|
+
|
|
3
|
+
Full-stack engineering team for complex, multi-layer software projects.
|
|
4
|
+
|
|
5
|
+
## Team Structure
|
|
6
|
+
- **Architect** — Designs systems, defines ownership, coordinates integration
|
|
7
|
+
- **Backend Engineer** — APIs, business logic, database, server-side testing
|
|
8
|
+
- **Frontend Engineer** — UI components, state management, browser testing
|
|
9
|
+
- **Infra Engineer** — CI/CD, deployment, monitoring, infrastructure-as-code
|
|
10
|
+
- **Senior Reviewer** — Two-pass code review (contract compliance + quality)
|
|
11
|
+
|
|
12
|
+
## How We Work
|
|
13
|
+
1. Architect produces a design brief with component breakdown and API contracts
|
|
14
|
+
2. Engineers implement in parallel — each in their own worktree, owning separate directories
|
|
15
|
+
3. Architect verifies integration against published contracts
|
|
16
|
+
4. Senior Reviewer validates quality across all layers
|
|
17
|
+
5. Infra Engineer deploys and verifies
|
|
18
|
+
|
|
19
|
+
## Key Principles
|
|
20
|
+
- **Parallel by design**: Layer isolation prevents conflicts. Clear domain ownership matrix.
|
|
21
|
+
- **Contract-first**: API shapes are agreed before implementation starts.
|
|
22
|
+
- **Dependency-aware**: Task graph ensures correct execution order.
|
|
23
|
+
- **Deep analysis**: `spawn_subagent` for architecture exploration, security review, and research.
|
|
24
|
+
|
|
25
|
+
## Current Focus
|
|
26
|
+
Awaiting project assignment. The Architect will analyze the codebase and produce the initial design brief.
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Engineering Pod — Working Norms
|
|
2
|
+
|
|
3
|
+
## Architecture: Design → Contract → Implement → Integrate → Review → Deploy
|
|
4
|
+
|
|
5
|
+
### Phase 1: Design (Architect)
|
|
6
|
+
- Analyze the requirement and codebase using `spawn_subagent` for targeted exploration.
|
|
7
|
+
- Produce an architecture brief as a `deliverable_create` (type: "architecture_decision") covering:
|
|
8
|
+
- Component breakdown and ownership mapping
|
|
9
|
+
- API contracts between layers (backend ↔ frontend, services ↔ infra)
|
|
10
|
+
- Data flow and state management strategy
|
|
11
|
+
- Risk assessment and mitigation plan
|
|
12
|
+
- Create tasks with explicit **layer ownership** and **dependency graph**:
|
|
13
|
+
- Shared types/interfaces → `blockedBy: []` (first)
|
|
14
|
+
- Backend API → `blockedBy: [shared-types-task]`
|
|
15
|
+
- Frontend UI → `blockedBy: [shared-types-task]`
|
|
16
|
+
- Infra/deploy → `blockedBy: [backend-task]`
|
|
17
|
+
- Integration test → `blockedBy: [backend-task, frontend-task]`
|
|
18
|
+
|
|
19
|
+
### Phase 2: Contract (All Engineers)
|
|
20
|
+
- Before implementation, agree on interface contracts:
|
|
21
|
+
- API schemas (request/response shapes, status codes, error formats)
|
|
22
|
+
- Component props and event signatures
|
|
23
|
+
- Configuration and environment variable naming
|
|
24
|
+
- Publish contracts as `deliverable_create` (type: "convention"). All parties reference these during implementation.
|
|
25
|
+
|
|
26
|
+
### Phase 3: Implement (Engineers, Parallel)
|
|
27
|
+
- Each engineer works in a **dedicated worktree** on their layer. No cross-layer file edits without coordination.
|
|
28
|
+
- **Backend Engineer**: API endpoints, business logic, database schema, server-side tests.
|
|
29
|
+
- **Frontend Engineer**: Components, pages, state management, client-side tests. Use `chrome-devtools` skill for browser debugging.
|
|
30
|
+
- **Infra Engineer**: CI/CD pipelines, deployment configs, monitoring, infrastructure-as-code.
|
|
31
|
+
- Use `spawn_subagent` for isolated analysis tasks — don't let research pollute your implementation context.
|
|
32
|
+
- Run tests in `background_exec` to stay productive while suites execute.
|
|
33
|
+
|
|
34
|
+
### Phase 4: Integrate (Architect coordinates)
|
|
35
|
+
- Architect verifies that layer implementations satisfy the published contracts.
|
|
36
|
+
- Use `spawn_subagent` to diff each layer's output against the contract deliverables.
|
|
37
|
+
- Create integration test tasks if not already done. These validate cross-layer communication.
|
|
38
|
+
|
|
39
|
+
### Phase 5: Review & Merge (Senior Reviewer)
|
|
40
|
+
- Review each layer independently, then the integration points.
|
|
41
|
+
- Two-pass review:
|
|
42
|
+
1. **Contract compliance**: Do implementations match published API contracts?
|
|
43
|
+
2. **Quality and security**: Error handling, input validation, performance, test coverage.
|
|
44
|
+
- Use `spawn_subagent` for deep security analysis on auth/payment/data-handling code.
|
|
45
|
+
- **On approval**: Merge each task branch via `shell_execute`:
|
|
46
|
+
- Local: `cd <repo> && git checkout <base_branch> && git merge <task_branch> --no-ff`
|
|
47
|
+
- Or via GitHub: `gh pr create` then `gh pr merge`
|
|
48
|
+
- **On merge conflict**: Reject the task with conflict details — the engineer resolves in their worktree and re-submits.
|
|
49
|
+
- Merge order matters: merge dependency tasks first (shared types → backend → frontend → integration).
|
|
50
|
+
|
|
51
|
+
### Phase 6: Deploy (Infra Engineer)
|
|
52
|
+
- Verify all task branches are merged to the target branch.
|
|
53
|
+
- Run deployment pipeline via `background_exec`.
|
|
54
|
+
- Verify deployment health via smoke tests.
|
|
55
|
+
|
|
56
|
+
## Domain Ownership Matrix
|
|
57
|
+
|
|
58
|
+
| Engineer | Primary Scope | Shared (coordinate first) |
|
|
59
|
+
|----------|--------------|--------------------------|
|
|
60
|
+
| Backend | `src/api/`, `src/services/`, `src/models/`, `src/db/` | `src/types/`, `package.json` |
|
|
61
|
+
| Frontend | `src/components/`, `src/pages/`, `src/hooks/`, `src/styles/` | `src/types/`, `package.json` |
|
|
62
|
+
| Infra | `infra/`, `deploy/`, `.github/`, `Dockerfile`, CI configs | `package.json`, env configs |
|
|
63
|
+
|
|
64
|
+
Edit anything in the "Shared" column only after notifying the team via `agent_send_message`.
|
|
65
|
+
|
|
66
|
+
## Communication Protocols
|
|
67
|
+
|
|
68
|
+
- **Contract changes**: Broadcast to all via `agent_send_message` before modifying any published contract.
|
|
69
|
+
- **Blocking dependencies**: If you need another layer's work, check if the dependency task is complete. If not, message that engineer directly.
|
|
70
|
+
- **Integration issues**: Create a task with `blockedBy` referencing both layers involved. Assign to the Architect for triage.
|
|
71
|
+
|
|
72
|
+
## Quality Gates
|
|
73
|
+
|
|
74
|
+
- Every implementation task must include tests covering the happy path and at least one error path.
|
|
75
|
+
- API endpoints must handle malformed input gracefully (400, not 500).
|
|
76
|
+
- Frontend components must handle loading, error, and empty states.
|
|
77
|
+
- Infra changes must be idempotent and rollback-safe.
|
|
78
|
+
- Security-critical code (auth, permissions, data access) requires explicit review notes.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "team",
|
|
3
|
+
"name": "engineering-pod",
|
|
4
|
+
"displayName": "Engineering Pod",
|
|
5
|
+
"version": "1.0.0",
|
|
6
|
+
"description": "A full-featured engineering pod with an architect who designs, multiple developers who implement in parallel across layers (backend, frontend, infrastructure), a dedicated reviewer, and a DevOps engineer. Designed for complex multi-layer projects requiring cross-cutting coordination, API contracts, and production deployment. Each member owns a clear domain to prevent file conflicts.",
|
|
7
|
+
"author": "Markus Team",
|
|
8
|
+
"category": "development",
|
|
9
|
+
"tags": ["engineering", "full-stack", "architecture", "devops", "deployment", "multi-layer"],
|
|
10
|
+
"icon": "server",
|
|
11
|
+
"team": {
|
|
12
|
+
"members": [
|
|
13
|
+
{
|
|
14
|
+
"name": "Architect",
|
|
15
|
+
"role": "manager",
|
|
16
|
+
"roleName": "project-manager",
|
|
17
|
+
"count": 1,
|
|
18
|
+
"skills": ["markus-project-cli", "team-building", "self-evolution"]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"name": "Backend Engineer",
|
|
22
|
+
"role": "worker",
|
|
23
|
+
"roleName": "developer",
|
|
24
|
+
"count": 1,
|
|
25
|
+
"skills": ["self-evolution"]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"name": "Frontend Engineer",
|
|
29
|
+
"role": "worker",
|
|
30
|
+
"roleName": "developer",
|
|
31
|
+
"count": 1,
|
|
32
|
+
"skills": ["chrome-devtools", "self-evolution"]
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"name": "Infra Engineer",
|
|
36
|
+
"role": "worker",
|
|
37
|
+
"roleName": "devops",
|
|
38
|
+
"count": 1,
|
|
39
|
+
"skills": ["self-evolution"]
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
"name": "Senior Reviewer",
|
|
43
|
+
"role": "worker",
|
|
44
|
+
"roleName": "reviewer",
|
|
45
|
+
"count": 1,
|
|
46
|
+
"skills": ["self-evolution"]
|
|
47
|
+
}
|
|
48
|
+
]
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Research Lab
|
|
2
|
+
|
|
3
|
+
A team for investigating complex problems through parallel, adversarial research.
|
|
4
|
+
|
|
5
|
+
## Team Structure
|
|
6
|
+
- **Research Lead** — Frames questions, assigns hypotheses, synthesizes consensus
|
|
7
|
+
- **3 Researchers** — Each investigates a different angle, then cross-examines peers' findings
|
|
8
|
+
|
|
9
|
+
## How We Work
|
|
10
|
+
1. Lead frames the problem and assigns competing hypotheses to researchers
|
|
11
|
+
2. Researchers investigate in parallel — different angles, same question
|
|
12
|
+
3. Researchers challenge each other's findings (adversarial review)
|
|
13
|
+
4. Lead synthesizes evidence into a conclusion with confidence levels
|
|
14
|
+
|
|
15
|
+
## Best For
|
|
16
|
+
- **Root cause debugging**: Multiple hypotheses tested in parallel
|
|
17
|
+
- **Technology evaluation**: Each researcher champions a different option
|
|
18
|
+
- **Security audits**: Divide by attack surface, cross-verify findings
|
|
19
|
+
- **Architecture exploration**: Map a codebase from multiple perspectives
|
|
20
|
+
|
|
21
|
+
## Key Principle
|
|
22
|
+
The hypothesis that survives cross-examination is most likely correct. Adversarial challenge eliminates anchoring bias.
|
|
23
|
+
|
|
24
|
+
## Current Focus
|
|
25
|
+
Awaiting research question. The Lead will frame the investigation and assign angles once a question is submitted.
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# Research Lab — Working Norms
|
|
2
|
+
|
|
3
|
+
## Methodology: Frame → Investigate → Challenge → Synthesize
|
|
4
|
+
|
|
5
|
+
### Phase 1: Frame (Research Lead)
|
|
6
|
+
- Define the research question precisely. Vague questions produce vague answers.
|
|
7
|
+
- Set **success criteria** upfront: what "done" looks like, what evidence would confirm or disprove each hypothesis.
|
|
8
|
+
- Identify **competing hypotheses** or **angles of investigation** — assign each researcher a different one.
|
|
9
|
+
- Create tasks with clear scope: "Investigate hypothesis X by examining Y, looking for evidence of Z."
|
|
10
|
+
- Set dependencies via `blockedBy` so later phases do not start until prerequisites are satisfied.
|
|
11
|
+
|
|
12
|
+
### Phase 2: Investigate (Researchers, Parallel)
|
|
13
|
+
- Each researcher independently explores their assigned angle.
|
|
14
|
+
- Use `spawn_subagent` for deep dives into specific files, logs, or codebases without losing your investigation context.
|
|
15
|
+
- Use `web_search` and `web_fetch` for external research — documentation, papers, vendor comparisons. Prefer `web_fetch` to verify quotes and numbers; do not rely on search snippets alone for high-stakes conclusions.
|
|
16
|
+
- Record all findings as `deliverable_create` artifacts with evidence:
|
|
17
|
+
- Code snippets, log excerpts, benchmark data, documentation references
|
|
18
|
+
- Confidence level (High/Medium/Low) with reasoning
|
|
19
|
+
- Explicitly note what you did NOT find (negative evidence matters)
|
|
20
|
+
- **Do not anchor on your first finding.** Actively look for disconfirming evidence.
|
|
21
|
+
- Run `memory_search` at the start of each investigation thread to avoid redoing prior work.
|
|
22
|
+
|
|
23
|
+
### Phase 3: Challenge (All Researchers)
|
|
24
|
+
- After initial investigation, researchers **review each other's findings** via `agent_send_message`.
|
|
25
|
+
- The goal is adversarial: each researcher tries to find weaknesses in others' conclusions.
|
|
26
|
+
- Ask: "What would have to be true for this finding to be wrong?"
|
|
27
|
+
- Prefer specific questions over rubber-stamp agreement. Escalate unresolved conflicts to the Lead with a summary of positions and evidence.
|
|
28
|
+
- Update your deliverables based on challenges received. Strengthen or retract claims.
|
|
29
|
+
|
|
30
|
+
### Phase 4: Synthesize (Synthesizer + Research Lead)
|
|
31
|
+
- Synthesizer collects all deliverables and cross-examination results.
|
|
32
|
+
- Use `spawn_subagent` to systematically compare findings across researchers.
|
|
33
|
+
- Produce a synthesis deliverable (`deliverable_create`) that includes:
|
|
34
|
+
1. **Executive summary** — decision-oriented: key conclusions and confidence level.
|
|
35
|
+
2. **Methodology** — scope, sources consulted, tools used, limitations.
|
|
36
|
+
3. **Findings** — organized by theme or question, each tied to cited evidence.
|
|
37
|
+
4. **Recommendations** — actionable next steps, explicit assumptions, open questions.
|
|
38
|
+
- If no consensus emerges, the synthesis should say so honestly and recommend further investigation.
|
|
39
|
+
- Research Lead reviews and approves the final synthesis.
|
|
40
|
+
|
|
41
|
+
## Competing Hypotheses Protocol
|
|
42
|
+
|
|
43
|
+
For ambiguous investigations (unclear root cause, conflicting sources, multiple plausible explanations):
|
|
44
|
+
|
|
45
|
+
1. Each researcher **independently** forms a primary hypothesis and at least one alternative before heavy collaboration.
|
|
46
|
+
2. Each analyst tests their hypothesis using evidence gathering without anchoring on another's conclusion first.
|
|
47
|
+
3. Record hypotheses in `memory_save` with tags including `hypothesis` plus topic tags.
|
|
48
|
+
4. Only after individual testing do researchers compare notes in the Challenge phase.
|
|
49
|
+
|
|
50
|
+
## Investigation Playbooks
|
|
51
|
+
|
|
52
|
+
### Debugging / Root Cause Analysis
|
|
53
|
+
- Assign researchers to different hypotheses: "race condition" vs "data corruption" vs "configuration issue."
|
|
54
|
+
- Each investigator must produce **reproduction steps** or explain why reproduction is not possible.
|
|
55
|
+
- Share raw evidence (stack traces, logs, diffs) in task notes so others can verify.
|
|
56
|
+
|
|
57
|
+
### Technology Evaluation
|
|
58
|
+
- Assign each researcher a different technology to evaluate against the same criteria.
|
|
59
|
+
- Criteria must be defined in the framing phase: performance, ecosystem, learning curve, cost, security.
|
|
60
|
+
- Each researcher writes a balanced assessment — strengths AND weaknesses.
|
|
61
|
+
- The Synthesizer produces a comparison matrix from individual assessments.
|
|
62
|
+
|
|
63
|
+
### Security Audit
|
|
64
|
+
- Divide the codebase by attack surface: authentication, authorization, input handling, data storage, network.
|
|
65
|
+
- Each researcher focuses on one surface using the OWASP framework.
|
|
66
|
+
- Findings must include severity, exploitability, and recommended remediation.
|
|
67
|
+
- Cross-challenge phase: can one researcher exploit a path that another declared safe?
|
|
68
|
+
|
|
69
|
+
## Evidence Standards
|
|
70
|
+
|
|
71
|
+
- **Every non-trivial claim** must cite specific sources (URL, file path, log line, or deliverable reference).
|
|
72
|
+
- Assign a **confidence level** to major conclusions and state what would change that rating.
|
|
73
|
+
- Distinguish **facts** (directly supported) from **inference** (reasonable interpretation) from **speculation** (unsupported).
|
|
74
|
+
- A finding without evidence is an opinion, not research.
|
|
75
|
+
- Negative results are valuable — "I investigated X and found no evidence of Y" is a useful finding.
|
|
76
|
+
|
|
77
|
+
## Knowledge Accumulation
|
|
78
|
+
|
|
79
|
+
- Run `memory_search` at the start of new investigation threads to avoid redoing work.
|
|
80
|
+
- `memory_save` all durable insights: methods tried, dead ends, key citations, resolved disagreements, takeaways.
|
|
81
|
+
- Use consistent tagging (topic, project, phase, `hypothesis` when applicable) for fast retrieval.
|
|
82
|
+
|
|
83
|
+
## Communication
|
|
84
|
+
|
|
85
|
+
- **Share early, share raw**: Post intermediate findings to task notes. Don't wait for polished conclusions.
|
|
86
|
+
- **Cite your sources**: Every claim links to a file, URL, log line, or benchmark.
|
|
87
|
+
- **Disagree constructively**: "I found evidence that contradicts X because..." not "X is wrong."
|
|
88
|
+
- **Track confidence**: Use explicit levels (High/Medium/Low) and update as evidence changes.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "team",
|
|
3
|
+
"name": "research-lab",
|
|
4
|
+
"displayName": "Research Lab",
|
|
5
|
+
"version": "1.1.0",
|
|
6
|
+
"description": "Structured research team for investigating complex problems: debugging with competing hypotheses, technology evaluation, security audits, and deep analysis. Multiple researchers explore different angles in parallel, challenge each other's findings, and a synthesizer produces high-quality deliverables. Uses subagents for deep dives, web tools for evidence, memory for knowledge accumulation, and deliverables for traceable outputs.",
|
|
7
|
+
"author": "Markus Team",
|
|
8
|
+
"category": "research",
|
|
9
|
+
"tags": ["research", "investigation", "debugging", "analysis", "competing-hypotheses", "audit"],
|
|
10
|
+
"icon": "search",
|
|
11
|
+
"team": {
|
|
12
|
+
"members": [
|
|
13
|
+
{
|
|
14
|
+
"name": "Research Lead",
|
|
15
|
+
"role": "manager",
|
|
16
|
+
"roleName": "project-manager",
|
|
17
|
+
"count": 1,
|
|
18
|
+
"skills": ["markus-project-cli", "self-evolution"]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"name": "Researcher Alpha",
|
|
22
|
+
"role": "worker",
|
|
23
|
+
"roleName": "research-assistant",
|
|
24
|
+
"count": 1,
|
|
25
|
+
"skills": ["self-evolution"]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"name": "Researcher Beta",
|
|
29
|
+
"role": "worker",
|
|
30
|
+
"roleName": "research-assistant",
|
|
31
|
+
"count": 1,
|
|
32
|
+
"skills": ["self-evolution"]
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"name": "Synthesizer",
|
|
36
|
+
"role": "worker",
|
|
37
|
+
"roleName": "content-writer",
|
|
38
|
+
"count": 1,
|
|
39
|
+
"skills": ["self-evolution"]
|
|
40
|
+
}
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -1,11 +1,24 @@
|
|
|
1
1
|
# Startup All-in-One Team
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Lean team built for speed. Ship MVPs, measure results, iterate.
|
|
4
4
|
|
|
5
|
-
##
|
|
6
|
-
-
|
|
5
|
+
## Team Structure
|
|
6
|
+
- **Product Manager** — Strategy, priorities, hypothesis framing, release coordination
|
|
7
|
+
- **2 Full-Stack Developers** — Rapid feature development, parallel implementation with worktree isolation
|
|
8
|
+
- **Growth Lead** — Marketing, analytics, user acquisition, experiment measurement
|
|
9
|
+
|
|
10
|
+
## How We Work
|
|
11
|
+
1. PM frames opportunities as testable hypotheses with clear metrics
|
|
12
|
+
2. Developers build MVPs in parallel using isolated worktrees
|
|
13
|
+
3. Ship immediately — don't batch releases
|
|
14
|
+
4. Growth Lead measures results against hypothesis criteria
|
|
15
|
+
5. Feed learnings into the next cycle
|
|
7
16
|
|
|
8
|
-
## Operating
|
|
9
|
-
-
|
|
10
|
-
-
|
|
11
|
-
-
|
|
17
|
+
## Operating Principles
|
|
18
|
+
- **Speed over perfection** for experiments. Quality for core product.
|
|
19
|
+
- **Async communication.** Message, don't wait.
|
|
20
|
+
- **Small scope.** If a task takes more than a day, break it down.
|
|
21
|
+
- **Evidence-driven.** Every decision references data or user feedback.
|
|
22
|
+
|
|
23
|
+
## Current Focus
|
|
24
|
+
Awaiting project assignment. The PM will frame initial hypotheses once a product is onboarded.
|