feed-the-machine 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +170 -170
- package/bin/generate-manifest.mjs +463 -463
- package/bin/install.mjs +491 -491
- package/docs/HOOKS.md +243 -243
- package/docs/INBOX.md +233 -233
- package/ftm/SKILL.md +122 -122
- package/ftm-audit/SKILL.md +623 -541
- package/ftm-audit/references/protocols/PROJECT-PATTERNS.md +91 -91
- package/ftm-audit/references/protocols/RUNTIME-WIRING.md +66 -66
- package/ftm-audit/references/protocols/WIRING-CONTRACTS.md +135 -135
- package/ftm-audit/references/strategies/AUTO-FIX-STRATEGIES.md +69 -69
- package/ftm-audit/references/templates/REPORT-FORMAT.md +96 -96
- package/ftm-audit/scripts/run-knip.sh +23 -23
- package/ftm-audit.yml +2 -2
- package/ftm-brainstorm/SKILL.md +498 -498
- package/ftm-brainstorm/evals/evals.json +100 -100
- package/ftm-brainstorm/evals/promptfoo.yaml +109 -109
- package/ftm-brainstorm/references/agent-prompts.md +224 -224
- package/ftm-brainstorm/references/plan-template.md +121 -121
- package/ftm-brainstorm.yml +2 -2
- package/ftm-browse/SKILL.md +454 -454
- package/ftm-browse/daemon/browser-manager.ts +206 -206
- package/ftm-browse/daemon/bun.lock +30 -30
- package/ftm-browse/daemon/cli.ts +347 -347
- package/ftm-browse/daemon/commands.ts +410 -410
- package/ftm-browse/daemon/main.ts +357 -357
- package/ftm-browse/daemon/package.json +17 -17
- package/ftm-browse/daemon/server.ts +189 -189
- package/ftm-browse/daemon/snapshot.ts +519 -519
- package/ftm-browse/daemon/tsconfig.json +22 -22
- package/ftm-browse.yml +4 -4
- package/ftm-capture/SKILL.md +370 -370
- package/ftm-capture.yml +4 -4
- package/ftm-codex-gate/SKILL.md +361 -361
- package/ftm-codex-gate.yml +2 -2
- package/ftm-config/SKILL.md +345 -345
- package/ftm-config.default.yml +82 -80
- package/ftm-config.yml +2 -2
- package/ftm-council/SKILL.md +416 -416
- package/ftm-council/references/prompts/CLAUDE-INVESTIGATION.md +60 -60
- package/ftm-council/references/prompts/CODEX-INVESTIGATION.md +58 -58
- package/ftm-council/references/prompts/GEMINI-INVESTIGATION.md +58 -58
- package/ftm-council/references/prompts/REBUTTAL-TEMPLATE.md +57 -57
- package/ftm-council/references/protocols/PREREQUISITES.md +47 -47
- package/ftm-council/references/protocols/STEP-0-FRAMING.md +46 -46
- package/ftm-council.yml +2 -2
- package/ftm-dashboard/SKILL.md +163 -163
- package/ftm-dashboard.yml +4 -4
- package/ftm-debug/SKILL.md +1037 -1037
- package/ftm-debug/references/phases/PHASE-0-INTAKE.md +58 -58
- package/ftm-debug/references/phases/PHASE-1-TRIAGE.md +46 -46
- package/ftm-debug/references/phases/PHASE-2-WAR-ROOM-AGENTS.md +279 -279
- package/ftm-debug/references/phases/PHASE-3-TO-6-EXECUTION.md +436 -436
- package/ftm-debug/references/protocols/BLACKBOARD.md +86 -86
- package/ftm-debug/references/protocols/EDGE-CASES.md +103 -103
- package/ftm-debug.yml +2 -2
- package/ftm-diagram/SKILL.md +277 -277
- package/ftm-diagram.yml +2 -2
- package/ftm-executor/SKILL.md +777 -767
- package/ftm-executor/references/STYLE-TEMPLATE.md +73 -73
- package/ftm-executor/references/phases/PHASE-0-VERIFICATION.md +62 -62
- package/ftm-executor/references/phases/PHASE-2-AGENT-ASSEMBLY.md +34 -34
- package/ftm-executor/references/phases/PHASE-3-WORKTREES.md +38 -38
- package/ftm-executor/references/phases/PHASE-4-5-AUDIT.md +72 -72
- package/ftm-executor/references/phases/PHASE-4-DISPATCH.md +66 -66
- package/ftm-executor/references/phases/PHASE-5-5-CODEX-GATE.md +73 -73
- package/ftm-executor/references/protocols/DOCUMENTATION-BOOTSTRAP.md +36 -36
- package/ftm-executor/references/protocols/MODEL-PROFILE.md +59 -44
- package/ftm-executor/references/protocols/PROGRESS-TRACKING.md +66 -66
- package/ftm-executor/runtime/ftm-runtime.mjs +252 -252
- package/ftm-executor/runtime/package.json +8 -8
- package/ftm-executor.yml +2 -2
- package/ftm-git/SKILL.md +441 -441
- package/ftm-git/evals/evals.json +26 -26
- package/ftm-git/evals/promptfoo.yaml +75 -75
- package/ftm-git/hooks/post-commit-experience.sh +92 -92
- package/ftm-git/references/patterns/SECRET-PATTERNS.md +104 -104
- package/ftm-git/references/protocols/REMEDIATION.md +139 -139
- package/ftm-git/scripts/pre-commit-secrets.sh +110 -110
- package/ftm-git.yml +2 -2
- package/ftm-inbox/backend/adapters/_retry.py +64 -64
- package/ftm-inbox/backend/adapters/base.py +230 -230
- package/ftm-inbox/backend/adapters/freshservice.py +104 -104
- package/ftm-inbox/backend/adapters/gmail.py +125 -125
- package/ftm-inbox/backend/adapters/jira.py +136 -136
- package/ftm-inbox/backend/adapters/registry.py +192 -192
- package/ftm-inbox/backend/adapters/slack.py +110 -110
- package/ftm-inbox/backend/db/connection.py +54 -54
- package/ftm-inbox/backend/db/schema.py +78 -78
- package/ftm-inbox/backend/executor/__init__.py +7 -7
- package/ftm-inbox/backend/executor/engine.py +149 -149
- package/ftm-inbox/backend/executor/step_runner.py +98 -98
- package/ftm-inbox/backend/main.py +103 -103
- package/ftm-inbox/backend/models/__init__.py +1 -1
- package/ftm-inbox/backend/models/unified_task.py +36 -36
- package/ftm-inbox/backend/planner/__init__.py +6 -6
- package/ftm-inbox/backend/planner/generator.py +127 -127
- package/ftm-inbox/backend/planner/schema.py +34 -34
- package/ftm-inbox/backend/requirements.txt +5 -5
- package/ftm-inbox/backend/routes/execute.py +186 -186
- package/ftm-inbox/backend/routes/health.py +52 -52
- package/ftm-inbox/backend/routes/inbox.py +68 -68
- package/ftm-inbox/backend/routes/plan.py +271 -271
- package/ftm-inbox/bin/launchagent.mjs +91 -91
- package/ftm-inbox/bin/setup.mjs +188 -188
- package/ftm-inbox/bin/start.sh +10 -10
- package/ftm-inbox/bin/status.sh +17 -17
- package/ftm-inbox/bin/stop.sh +8 -8
- package/ftm-inbox/config.example.yml +55 -55
- package/ftm-inbox/package-lock.json +2898 -2898
- package/ftm-inbox/package.json +26 -26
- package/ftm-inbox/postcss.config.js +6 -6
- package/ftm-inbox/src/app.css +199 -199
- package/ftm-inbox/src/app.html +18 -18
- package/ftm-inbox/src/lib/api.ts +166 -166
- package/ftm-inbox/src/lib/components/ExecutionLog.svelte +81 -81
- package/ftm-inbox/src/lib/components/InboxFeed.svelte +143 -143
- package/ftm-inbox/src/lib/components/PlanStep.svelte +271 -271
- package/ftm-inbox/src/lib/components/PlanView.svelte +206 -206
- package/ftm-inbox/src/lib/components/StreamPanel.svelte +99 -99
- package/ftm-inbox/src/lib/components/TaskCard.svelte +190 -190
- package/ftm-inbox/src/lib/components/ui/EmptyState.svelte +63 -63
- package/ftm-inbox/src/lib/components/ui/KawaiiCard.svelte +86 -86
- package/ftm-inbox/src/lib/components/ui/PillButton.svelte +106 -106
- package/ftm-inbox/src/lib/components/ui/StatusBadge.svelte +67 -67
- package/ftm-inbox/src/lib/components/ui/StreamDrawer.svelte +149 -149
- package/ftm-inbox/src/lib/components/ui/ThemeToggle.svelte +80 -80
- package/ftm-inbox/src/lib/theme.ts +47 -47
- package/ftm-inbox/src/routes/+layout.svelte +76 -76
- package/ftm-inbox/src/routes/+page.svelte +401 -401
- package/ftm-inbox/svelte.config.js +12 -12
- package/ftm-inbox/tailwind.config.ts +63 -63
- package/ftm-inbox/tsconfig.json +13 -13
- package/ftm-inbox/vite.config.ts +6 -6
- package/ftm-intent/SKILL.md +241 -241
- package/ftm-intent.yml +2 -2
- package/ftm-manifest.json +3794 -3794
- package/ftm-map/SKILL.md +291 -291
- package/ftm-map/scripts/db.py +712 -712
- package/ftm-map/scripts/index.py +415 -415
- package/ftm-map/scripts/parser.py +224 -224
- package/ftm-map/scripts/queries/go-tags.scm +20 -20
- package/ftm-map/scripts/queries/javascript-tags.scm +35 -35
- package/ftm-map/scripts/queries/python-tags.scm +31 -31
- package/ftm-map/scripts/queries/ruby-tags.scm +19 -19
- package/ftm-map/scripts/queries/rust-tags.scm +37 -37
- package/ftm-map/scripts/queries/typescript-tags.scm +41 -41
- package/ftm-map/scripts/query.py +301 -301
- package/ftm-map/scripts/ranker.py +377 -377
- package/ftm-map/scripts/requirements.txt +5 -5
- package/ftm-map/scripts/setup-hooks.sh +27 -27
- package/ftm-map/scripts/setup.sh +56 -56
- package/ftm-map/scripts/test_db.py +364 -364
- package/ftm-map/scripts/test_parser.py +174 -174
- package/ftm-map/scripts/test_query.py +183 -183
- package/ftm-map/scripts/test_ranker.py +199 -199
- package/ftm-map/scripts/views.py +591 -591
- package/ftm-map.yml +2 -2
- package/ftm-mind/SKILL.md +1943 -1943
- package/ftm-mind/evals/promptfoo.yaml +142 -142
- package/ftm-mind/references/blackboard-schema.md +328 -328
- package/ftm-mind/references/complexity-guide.md +110 -110
- package/ftm-mind/references/event-registry.md +319 -319
- package/ftm-mind/references/mcp-inventory.md +296 -296
- package/ftm-mind/references/protocols/COMPLEXITY-SIZING.md +72 -72
- package/ftm-mind/references/protocols/MCP-HEURISTICS.md +32 -32
- package/ftm-mind/references/protocols/PLAN-APPROVAL.md +80 -80
- package/ftm-mind/references/reflexion-protocol.md +249 -249
- package/ftm-mind/references/routing/SCENARIOS.md +22 -22
- package/ftm-mind/references/routing-scenarios.md +35 -35
- package/ftm-mind.yml +2 -2
- package/ftm-pause/SKILL.md +395 -395
- package/ftm-pause/references/protocols/SKILL-RESTORE-PROTOCOLS.md +186 -186
- package/ftm-pause/references/protocols/VALIDATION.md +80 -80
- package/ftm-pause.yml +2 -2
- package/ftm-researcher/SKILL.md +275 -275
- package/ftm-researcher/evals/agent-diversity.yaml +17 -17
- package/ftm-researcher/evals/synthesis-quality.yaml +12 -12
- package/ftm-researcher/evals/trigger-accuracy.yaml +39 -39
- package/ftm-researcher/references/adaptive-search.md +116 -116
- package/ftm-researcher/references/agent-prompts.md +193 -193
- package/ftm-researcher/references/council-integration.md +193 -193
- package/ftm-researcher/references/output-format.md +203 -203
- package/ftm-researcher/references/synthesis-pipeline.md +165 -165
- package/ftm-researcher/scripts/score_credibility.py +234 -234
- package/ftm-researcher/scripts/validate_research.py +92 -92
- package/ftm-researcher.yml +2 -2
- package/ftm-resume/SKILL.md +518 -518
- package/ftm-resume/references/protocols/VALIDATION.md +172 -172
- package/ftm-resume.yml +2 -2
- package/ftm-retro/SKILL.md +380 -380
- package/ftm-retro/references/protocols/SCORING-RUBRICS.md +89 -89
- package/ftm-retro/references/templates/REPORT-FORMAT.md +109 -109
- package/ftm-retro.yml +2 -2
- package/ftm-routine/SKILL.md +170 -170
- package/ftm-routine.yml +4 -4
- package/ftm-state/blackboard/capabilities.json +5 -5
- package/ftm-state/blackboard/capabilities.schema.json +27 -27
- package/ftm-state/blackboard/context.json +23 -23
- package/ftm-state/blackboard/experiences/index.json +9 -9
- package/ftm-state/blackboard/patterns.json +6 -6
- package/ftm-state/schemas/context.schema.json +130 -130
- package/ftm-state/schemas/experience-index.schema.json +77 -77
- package/ftm-state/schemas/experience.schema.json +78 -78
- package/ftm-state/schemas/patterns.schema.json +44 -44
- package/ftm-upgrade/SKILL.md +194 -194
- package/ftm-upgrade/scripts/check-version.sh +76 -76
- package/ftm-upgrade/scripts/upgrade.sh +143 -143
- package/ftm-upgrade.yml +2 -2
- package/ftm-verify.yml +2 -2
- package/ftm.yml +2 -2
- package/hooks/ftm-blackboard-enforcer.sh +93 -93
- package/hooks/ftm-discovery-reminder.sh +90 -90
- package/hooks/ftm-drafts-gate.sh +61 -61
- package/hooks/ftm-event-logger.mjs +107 -107
- package/hooks/ftm-map-autodetect.sh +79 -79
- package/hooks/ftm-pending-sync-check.sh +22 -22
- package/hooks/ftm-plan-gate.sh +92 -92
- package/hooks/ftm-post-commit-trigger.sh +57 -57
- package/hooks/settings-template.json +81 -81
- package/install.sh +363 -363
- package/package.json +84 -84
- package/uninstall.sh +25 -25
|
@@ -1,100 +1,100 @@
|
|
|
1
|
-
{
|
|
2
|
-
"skill_name": "ftm-brainstorm",
|
|
3
|
-
"evals": [
|
|
4
|
-
{
|
|
5
|
-
"id": 0,
|
|
6
|
-
"name": "fresh-idea-intake",
|
|
7
|
-
"prompt": "I'm thinking about building an app that helps people find study buddies at their university. Like Tinder but for studying.",
|
|
8
|
-
"expected_output": "Phase 0 repo scan launched in background, 1-2 intake questions, hard STOP",
|
|
9
|
-
"files": [],
|
|
10
|
-
"assertions": [
|
|
11
|
-
{"name": "one_or_two_questions", "description": "Asks 1-2 questions max, not a wall of questions"},
|
|
12
|
-
{"name": "no_research_sprint_turn1", "description": "Does NOT dispatch research agents on the very first turn — intake only"},
|
|
13
|
-
{"name": "hard_stop", "description": "Ends with a question and waits — does NOT proceed to synthesize or generate suggestions"},
|
|
14
|
-
{"name": "repo_scan_launched", "description": "Mentions or silently launches a repo/codebase scan agent in background"}
|
|
15
|
-
]
|
|
16
|
-
},
|
|
17
|
-
{
|
|
18
|
-
"id": 1,
|
|
19
|
-
"name": "fresh-idea-turn2-research",
|
|
20
|
-
"prompt": "It's for college students who want to find people in the same classes to study with. Matching based on courses, study style, and schedule availability.",
|
|
21
|
-
"expected_output": "First research sprint dispatched (3 agents), synthesized suggestions with citations, challenge, 1-2 questions, STOP",
|
|
22
|
-
"files": [],
|
|
23
|
-
"multi_turn_context": "Turn 2. Turn 1: user said 'building study buddy app like Tinder for studying', skill asked intake questions, user now responds with details.",
|
|
24
|
-
"assertions": [
|
|
25
|
-
{"name": "three_agents_dispatched", "description": "Dispatches 3 parallel research agents (web, github, competitive) — not fewer"},
|
|
26
|
-
{"name": "real_citations", "description": "At least 3 unique URLs to real repos/articles/products in the synthesis"},
|
|
27
|
-
{"name": "suggestions_with_evidence", "description": "Presents numbered suggestions (3-5) with real-world evidence backing each one"},
|
|
28
|
-
{"name": "recommendation_labeled", "description": "Suggestion #1 is labeled RECOMMENDED with rationale"},
|
|
29
|
-
{"name": "challenge_present", "description": "Includes at least one challenge/pushback after suggestions"},
|
|
30
|
-
{"name": "ends_with_question", "description": "Ends with 1-2 targeted questions to drive next research sprint"},
|
|
31
|
-
{"name": "hard_stop", "description": "Does NOT continue past the questions — waits for user response"},
|
|
32
|
-
{"name": "depth_is_broad", "description": "Research queries are landscape-level (major approaches, who's done this) not implementation-specific"}
|
|
33
|
-
]
|
|
34
|
-
},
|
|
35
|
-
{
|
|
36
|
-
"id": 2,
|
|
37
|
-
"name": "turn3-deeper-research",
|
|
38
|
-
"prompt": "I like option 2 — the React Native approach with Firebase. But I'm worried about the matching algorithm complexity. Also we need to handle the cold-start problem when a new university joins.",
|
|
39
|
-
"expected_output": "Second research sprint (deeper, focused on RN+Firebase+matching), new suggestions building on prior, new challenges, new questions",
|
|
40
|
-
"files": [],
|
|
41
|
-
"multi_turn_context": "Turn 3. Prior turns: user described study buddy app, first research sprint found 5 approaches, user now picks one and raises two specific concerns.",
|
|
42
|
-
"assertions": [
|
|
43
|
-
{"name": "three_agents_dispatched", "description": "Dispatches 3 parallel research agents again — every turn gets a research sprint"},
|
|
44
|
-
{"name": "research_is_deeper", "description": "Search queries target matching algorithms and cold-start specifically, NOT broad 'study buddy app' landscape again"},
|
|
45
|
-
{"name": "builds_on_prior", "description": "References prior turn's findings — does not re-present the same 5 approaches"},
|
|
46
|
-
{"name": "new_citations", "description": "At least 2 URLs not seen in prior turns — fresh research, not recycled"},
|
|
47
|
-
{"name": "addresses_user_concerns", "description": "Suggestions specifically address matching algorithm complexity AND cold-start problem"},
|
|
48
|
-
{"name": "challenge_present", "description": "Challenges the user on at least one assumption or pushes back on scope"},
|
|
49
|
-
{"name": "ends_with_question", "description": "Ends with 1-2 questions that unlock the NEXT research vector"},
|
|
50
|
-
{"name": "hard_stop", "description": "Does NOT continue past the questions"}
|
|
51
|
-
]
|
|
52
|
-
},
|
|
53
|
-
{
|
|
54
|
-
"id": 3,
|
|
55
|
-
"name": "brain-dump-intake",
|
|
56
|
-
"prompt": "help me build all the suggestions in this chat: [brain dump about eng-buddy]",
|
|
57
|
-
"expected_output": "Path B structured extraction with repo context, confirmation gate, no research yet",
|
|
58
|
-
"files": ["brain-dump-input.md"],
|
|
59
|
-
"assertions": [
|
|
60
|
-
{"name": "decided_section", "description": "Contains a 'Decided' or 'Decisions already made' section"},
|
|
61
|
-
{"name": "open_questions_section", "description": "Contains an 'Open questions' or 'Gaps' section"},
|
|
62
|
-
{"name": "no_basic_questions", "description": "Does NOT ask basic 'what are you building?' questions already answered by the paste"},
|
|
63
|
-
{"name": "confirmation_gate", "description": "Ends with a confirmation question before proceeding to research"},
|
|
64
|
-
{"name": "no_research_sprint", "description": "Does NOT dispatch research agents or present suggestions on this turn"},
|
|
65
|
-
{"name": "hard_stop", "description": "Stops after asking for confirmation — does not proceed"}
|
|
66
|
-
]
|
|
67
|
-
},
|
|
68
|
-
{
|
|
69
|
-
"id": 4,
|
|
70
|
-
"name": "brain-dump-turn2-research",
|
|
71
|
-
"prompt": "Yeah that looks right, go ahead and research it",
|
|
72
|
-
"expected_output": "First research sprint in brain dump mode: novelty map, suggestions with citations, challenges",
|
|
73
|
-
"files": ["brain-dump-input.md"],
|
|
74
|
-
"multi_turn_context": "Turn 2 of brain dump. Turn 1: user pasted eng-buddy brain dump, skill extracted structured summary, user now confirms.",
|
|
75
|
-
"assertions": [
|
|
76
|
-
{"name": "three_agents_dispatched", "description": "Dispatches 3 parallel research agents searching for specific brain dump claims"},
|
|
77
|
-
{"name": "novelty_map_present", "description": "Contains a Novelty Map table with solved/partially solved/novel verdicts"},
|
|
78
|
-
{"name": "real_citations", "description": "At least 5 unique URLs to real repos/articles/products"},
|
|
79
|
-
{"name": "brain_dump_claims_researched", "description": "Agents searched for specific architectural claims from the dump, not just broad topic searches"},
|
|
80
|
-
{"name": "challenge_present", "description": "At least one challenge/pushback raised"},
|
|
81
|
-
{"name": "ends_with_question", "description": "Ends with 1-2 targeted questions"},
|
|
82
|
-
{"name": "hard_stop", "description": "Does NOT proceed past questions"}
|
|
83
|
-
]
|
|
84
|
-
},
|
|
85
|
-
{
|
|
86
|
-
"id": 5,
|
|
87
|
-
"name": "phase3-gate",
|
|
88
|
-
"prompt": "Ok I think I know what I want. Let's turn this into a plan.",
|
|
89
|
-
"expected_output": "Vision summary for approval, NOT the full plan yet",
|
|
90
|
-
"files": [],
|
|
91
|
-
"multi_turn_context": "Turn 6+ of brainstorming. Previous turns explored study-buddy app, settled on React Native + Firebase, matching algorithm, cold-start solution. User now wants to move to planning.",
|
|
92
|
-
"assertions": [
|
|
93
|
-
{"name": "vision_summary", "description": "Presents a brief 'here's what we've landed on' summary before generating the full plan"},
|
|
94
|
-
{"name": "approval_gate", "description": "Asks for explicit confirmation before proceeding to full plan generation"},
|
|
95
|
-
{"name": "does_not_dump_full_plan", "description": "Does NOT generate the entire task breakdown, agent assignments, and wave structure in this message"},
|
|
96
|
-
{"name": "references_plan_template", "description": "Reads or references references/plan-template.md for plan generation"}
|
|
97
|
-
]
|
|
98
|
-
}
|
|
99
|
-
]
|
|
100
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"skill_name": "ftm-brainstorm",
|
|
3
|
+
"evals": [
|
|
4
|
+
{
|
|
5
|
+
"id": 0,
|
|
6
|
+
"name": "fresh-idea-intake",
|
|
7
|
+
"prompt": "I'm thinking about building an app that helps people find study buddies at their university. Like Tinder but for studying.",
|
|
8
|
+
"expected_output": "Phase 0 repo scan launched in background, 1-2 intake questions, hard STOP",
|
|
9
|
+
"files": [],
|
|
10
|
+
"assertions": [
|
|
11
|
+
{"name": "one_or_two_questions", "description": "Asks 1-2 questions max, not a wall of questions"},
|
|
12
|
+
{"name": "no_research_sprint_turn1", "description": "Does NOT dispatch research agents on the very first turn — intake only"},
|
|
13
|
+
{"name": "hard_stop", "description": "Ends with a question and waits — does NOT proceed to synthesize or generate suggestions"},
|
|
14
|
+
{"name": "repo_scan_launched", "description": "Mentions or silently launches a repo/codebase scan agent in background"}
|
|
15
|
+
]
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"id": 1,
|
|
19
|
+
"name": "fresh-idea-turn2-research",
|
|
20
|
+
"prompt": "It's for college students who want to find people in the same classes to study with. Matching based on courses, study style, and schedule availability.",
|
|
21
|
+
"expected_output": "First research sprint dispatched (3 agents), synthesized suggestions with citations, challenge, 1-2 questions, STOP",
|
|
22
|
+
"files": [],
|
|
23
|
+
"multi_turn_context": "Turn 2. Turn 1: user said 'building study buddy app like Tinder for studying', skill asked intake questions, user now responds with details.",
|
|
24
|
+
"assertions": [
|
|
25
|
+
{"name": "three_agents_dispatched", "description": "Dispatches 3 parallel research agents (web, github, competitive) — not fewer"},
|
|
26
|
+
{"name": "real_citations", "description": "At least 3 unique URLs to real repos/articles/products in the synthesis"},
|
|
27
|
+
{"name": "suggestions_with_evidence", "description": "Presents numbered suggestions (3-5) with real-world evidence backing each one"},
|
|
28
|
+
{"name": "recommendation_labeled", "description": "Suggestion #1 is labeled RECOMMENDED with rationale"},
|
|
29
|
+
{"name": "challenge_present", "description": "Includes at least one challenge/pushback after suggestions"},
|
|
30
|
+
{"name": "ends_with_question", "description": "Ends with 1-2 targeted questions to drive next research sprint"},
|
|
31
|
+
{"name": "hard_stop", "description": "Does NOT continue past the questions — waits for user response"},
|
|
32
|
+
{"name": "depth_is_broad", "description": "Research queries are landscape-level (major approaches, who's done this) not implementation-specific"}
|
|
33
|
+
]
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"id": 2,
|
|
37
|
+
"name": "turn3-deeper-research",
|
|
38
|
+
"prompt": "I like option 2 — the React Native approach with Firebase. But I'm worried about the matching algorithm complexity. Also we need to handle the cold-start problem when a new university joins.",
|
|
39
|
+
"expected_output": "Second research sprint (deeper, focused on RN+Firebase+matching), new suggestions building on prior, new challenges, new questions",
|
|
40
|
+
"files": [],
|
|
41
|
+
"multi_turn_context": "Turn 3. Prior turns: user described study buddy app, first research sprint found 5 approaches, user now picks one and raises two specific concerns.",
|
|
42
|
+
"assertions": [
|
|
43
|
+
{"name": "three_agents_dispatched", "description": "Dispatches 3 parallel research agents again — every turn gets a research sprint"},
|
|
44
|
+
{"name": "research_is_deeper", "description": "Search queries target matching algorithms and cold-start specifically, NOT broad 'study buddy app' landscape again"},
|
|
45
|
+
{"name": "builds_on_prior", "description": "References prior turn's findings — does not re-present the same 5 approaches"},
|
|
46
|
+
{"name": "new_citations", "description": "At least 2 URLs not seen in prior turns — fresh research, not recycled"},
|
|
47
|
+
{"name": "addresses_user_concerns", "description": "Suggestions specifically address matching algorithm complexity AND cold-start problem"},
|
|
48
|
+
{"name": "challenge_present", "description": "Challenges the user on at least one assumption or pushes back on scope"},
|
|
49
|
+
{"name": "ends_with_question", "description": "Ends with 1-2 questions that unlock the NEXT research vector"},
|
|
50
|
+
{"name": "hard_stop", "description": "Does NOT continue past the questions"}
|
|
51
|
+
]
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"id": 3,
|
|
55
|
+
"name": "brain-dump-intake",
|
|
56
|
+
"prompt": "help me build all the suggestions in this chat: [brain dump about eng-buddy]",
|
|
57
|
+
"expected_output": "Path B structured extraction with repo context, confirmation gate, no research yet",
|
|
58
|
+
"files": ["brain-dump-input.md"],
|
|
59
|
+
"assertions": [
|
|
60
|
+
{"name": "decided_section", "description": "Contains a 'Decided' or 'Decisions already made' section"},
|
|
61
|
+
{"name": "open_questions_section", "description": "Contains an 'Open questions' or 'Gaps' section"},
|
|
62
|
+
{"name": "no_basic_questions", "description": "Does NOT ask basic 'what are you building?' questions already answered by the paste"},
|
|
63
|
+
{"name": "confirmation_gate", "description": "Ends with a confirmation question before proceeding to research"},
|
|
64
|
+
{"name": "no_research_sprint", "description": "Does NOT dispatch research agents or present suggestions on this turn"},
|
|
65
|
+
{"name": "hard_stop", "description": "Stops after asking for confirmation — does not proceed"}
|
|
66
|
+
]
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
"id": 4,
|
|
70
|
+
"name": "brain-dump-turn2-research",
|
|
71
|
+
"prompt": "Yeah that looks right, go ahead and research it",
|
|
72
|
+
"expected_output": "First research sprint in brain dump mode: novelty map, suggestions with citations, challenges",
|
|
73
|
+
"files": ["brain-dump-input.md"],
|
|
74
|
+
"multi_turn_context": "Turn 2 of brain dump. Turn 1: user pasted eng-buddy brain dump, skill extracted structured summary, user now confirms.",
|
|
75
|
+
"assertions": [
|
|
76
|
+
{"name": "three_agents_dispatched", "description": "Dispatches 3 parallel research agents searching for specific brain dump claims"},
|
|
77
|
+
{"name": "novelty_map_present", "description": "Contains a Novelty Map table with solved/partially solved/novel verdicts"},
|
|
78
|
+
{"name": "real_citations", "description": "At least 5 unique URLs to real repos/articles/products"},
|
|
79
|
+
{"name": "brain_dump_claims_researched", "description": "Agents searched for specific architectural claims from the dump, not just broad topic searches"},
|
|
80
|
+
{"name": "challenge_present", "description": "At least one challenge/pushback raised"},
|
|
81
|
+
{"name": "ends_with_question", "description": "Ends with 1-2 targeted questions"},
|
|
82
|
+
{"name": "hard_stop", "description": "Does NOT proceed past questions"}
|
|
83
|
+
]
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
"id": 5,
|
|
87
|
+
"name": "phase3-gate",
|
|
88
|
+
"prompt": "Ok I think I know what I want. Let's turn this into a plan.",
|
|
89
|
+
"expected_output": "Vision summary for approval, NOT the full plan yet",
|
|
90
|
+
"files": [],
|
|
91
|
+
"multi_turn_context": "Turn 6+ of brainstorming. Previous turns explored study-buddy app, settled on React Native + Firebase, matching algorithm, cold-start solution. User now wants to move to planning.",
|
|
92
|
+
"assertions": [
|
|
93
|
+
{"name": "vision_summary", "description": "Presents a brief 'here's what we've landed on' summary before generating the full plan"},
|
|
94
|
+
{"name": "approval_gate", "description": "Asks for explicit confirmation before proceeding to full plan generation"},
|
|
95
|
+
{"name": "does_not_dump_full_plan", "description": "Does NOT generate the entire task breakdown, agent assignments, and wave structure in this message"},
|
|
96
|
+
{"name": "references_plan_template", "description": "Reads or references references/plan-template.md for plan generation"}
|
|
97
|
+
]
|
|
98
|
+
}
|
|
99
|
+
]
|
|
100
|
+
}
|
|
@@ -1,109 +1,109 @@
|
|
|
1
|
-
description: "ftm-brainstorm behavior across multi-turn brainstorming sessions"
|
|
2
|
-
|
|
3
|
-
prompts:
|
|
4
|
-
- "{{input}}"
|
|
5
|
-
|
|
6
|
-
providers:
|
|
7
|
-
- id: "exec:claude --print"
|
|
8
|
-
label: "claude-code"
|
|
9
|
-
|
|
10
|
-
defaultTest:
|
|
11
|
-
options:
|
|
12
|
-
transformVars: "vars"
|
|
13
|
-
|
|
14
|
-
tests:
|
|
15
|
-
# Eval 0: fresh-idea-intake
|
|
16
|
-
- description: "fresh idea intake — asks 1-2 questions, no research sprint, hard stop"
|
|
17
|
-
vars:
|
|
18
|
-
input: "I'm thinking about building an app that helps people find study buddies at their university. Like Tinder but for studying."
|
|
19
|
-
assert:
|
|
20
|
-
- type: regex
|
|
21
|
-
value: "\\?"
|
|
22
|
-
description: "Response must contain at least one question"
|
|
23
|
-
- type: not-contains
|
|
24
|
-
value: "sprint"
|
|
25
|
-
description: "Does not dispatch research sprint on first turn"
|
|
26
|
-
- type: not-contains
|
|
27
|
-
value: "dispatch"
|
|
28
|
-
description: "Does not dispatch agents on first turn"
|
|
29
|
-
|
|
30
|
-
# Eval 1: fresh-idea-turn2-research
|
|
31
|
-
- description: "turn 2 — dispatches research agents, citations, labeled recommendation, challenge, ends with question"
|
|
32
|
-
vars:
|
|
33
|
-
input: "It's for college students who want to find people in the same classes to study with. Matching based on courses, study style, and schedule availability."
|
|
34
|
-
assert:
|
|
35
|
-
- type: regex
|
|
36
|
-
value: "https?://"
|
|
37
|
-
description: "Response includes at least one URL citation"
|
|
38
|
-
- type: regex
|
|
39
|
-
value: "RECOMMENDED|recommended|#1"
|
|
40
|
-
description: "At least one suggestion is labeled as recommended"
|
|
41
|
-
- type: regex
|
|
42
|
-
value: "\\?"
|
|
43
|
-
description: "Ends with a question"
|
|
44
|
-
|
|
45
|
-
# Eval 2: turn3-deeper-research
|
|
46
|
-
- description: "turn 3 — deeper research on specific concerns, builds on prior, fresh citations, challenge"
|
|
47
|
-
vars:
|
|
48
|
-
input: "I like option 2 — the React Native approach with Firebase. But I'm worried about the matching algorithm complexity. Also we need to handle the cold-start problem when a new university joins."
|
|
49
|
-
assert:
|
|
50
|
-
- type: regex
|
|
51
|
-
value: "matching|algorithm|cold.start"
|
|
52
|
-
description: "Response addresses the specific concerns raised"
|
|
53
|
-
- type: regex
|
|
54
|
-
value: "https?://"
|
|
55
|
-
description: "Response includes at least one URL citation"
|
|
56
|
-
- type: regex
|
|
57
|
-
value: "\\?"
|
|
58
|
-
description: "Ends with a question"
|
|
59
|
-
|
|
60
|
-
# Eval 3: brain-dump-intake
|
|
61
|
-
- description: "brain dump intake — extracts structure, confirmation gate, no research yet"
|
|
62
|
-
vars:
|
|
63
|
-
input: "help me build all the suggestions in this chat: [brain dump about eng-buddy]"
|
|
64
|
-
assert:
|
|
65
|
-
- type: regex
|
|
66
|
-
value: "Decided|decided|Decision|decision"
|
|
67
|
-
description: "Contains a 'Decided' section"
|
|
68
|
-
- type: regex
|
|
69
|
-
value: "open question|Open question|gap|Gap"
|
|
70
|
-
description: "Contains an open questions or gaps section"
|
|
71
|
-
- type: regex
|
|
72
|
-
value: "\\?"
|
|
73
|
-
description: "Ends with a confirmation question"
|
|
74
|
-
- type: not-contains
|
|
75
|
-
value: "dispatch"
|
|
76
|
-
description: "Does not dispatch research agents on intake turn"
|
|
77
|
-
|
|
78
|
-
# Eval 4: brain-dump-turn2-research
|
|
79
|
-
- description: "brain dump turn 2 — novelty map, research citations, challenge, question"
|
|
80
|
-
vars:
|
|
81
|
-
input: "Yeah that looks right, go ahead and research it"
|
|
82
|
-
assert:
|
|
83
|
-
- type: regex
|
|
84
|
-
value: "Novelty|novelty|novel|Novel"
|
|
85
|
-
description: "Contains a Novelty Map"
|
|
86
|
-
- type: regex
|
|
87
|
-
value: "https?://"
|
|
88
|
-
description: "Response includes at least one URL citation"
|
|
89
|
-
- type: regex
|
|
90
|
-
value: "\\?"
|
|
91
|
-
description: "Ends with a question"
|
|
92
|
-
|
|
93
|
-
# Eval 5: phase3-gate
|
|
94
|
-
- description: "phase 3 gate — vision summary and approval before generating full plan"
|
|
95
|
-
vars:
|
|
96
|
-
input: "Ok I think I know what I want. Let's turn this into a plan."
|
|
97
|
-
assert:
|
|
98
|
-
- type: regex
|
|
99
|
-
value: "summary|Summary|we've landed|landed on|here's what"
|
|
100
|
-
description: "Presents a vision summary before generating the full plan"
|
|
101
|
-
- type: regex
|
|
102
|
-
value: "confirm|Confirm|proceed|Proceed|ready|Ready|go ahead|approve"
|
|
103
|
-
description: "Asks for confirmation before proceeding to full plan"
|
|
104
|
-
- type: not-contains
|
|
105
|
-
value: "Wave 1"
|
|
106
|
-
description: "Does not dump the full plan structure prematurely"
|
|
107
|
-
- type: not-contains
|
|
108
|
-
value: "Wave 2"
|
|
109
|
-
description: "Does not dump the full plan structure prematurely"
|
|
1
|
+
description: "ftm-brainstorm behavior across multi-turn brainstorming sessions"
|
|
2
|
+
|
|
3
|
+
prompts:
|
|
4
|
+
- "{{input}}"
|
|
5
|
+
|
|
6
|
+
providers:
|
|
7
|
+
- id: "exec:claude --print"
|
|
8
|
+
label: "claude-code"
|
|
9
|
+
|
|
10
|
+
defaultTest:
|
|
11
|
+
options:
|
|
12
|
+
transformVars: "vars"
|
|
13
|
+
|
|
14
|
+
tests:
|
|
15
|
+
# Eval 0: fresh-idea-intake
|
|
16
|
+
- description: "fresh idea intake — asks 1-2 questions, no research sprint, hard stop"
|
|
17
|
+
vars:
|
|
18
|
+
input: "I'm thinking about building an app that helps people find study buddies at their university. Like Tinder but for studying."
|
|
19
|
+
assert:
|
|
20
|
+
- type: regex
|
|
21
|
+
value: "\\?"
|
|
22
|
+
description: "Response must contain at least one question"
|
|
23
|
+
- type: not-contains
|
|
24
|
+
value: "sprint"
|
|
25
|
+
description: "Does not dispatch research sprint on first turn"
|
|
26
|
+
- type: not-contains
|
|
27
|
+
value: "dispatch"
|
|
28
|
+
description: "Does not dispatch agents on first turn"
|
|
29
|
+
|
|
30
|
+
# Eval 1: fresh-idea-turn2-research
|
|
31
|
+
- description: "turn 2 — dispatches research agents, citations, labeled recommendation, challenge, ends with question"
|
|
32
|
+
vars:
|
|
33
|
+
input: "It's for college students who want to find people in the same classes to study with. Matching based on courses, study style, and schedule availability."
|
|
34
|
+
assert:
|
|
35
|
+
- type: regex
|
|
36
|
+
value: "https?://"
|
|
37
|
+
description: "Response includes at least one URL citation"
|
|
38
|
+
- type: regex
|
|
39
|
+
value: "RECOMMENDED|recommended|#1"
|
|
40
|
+
description: "At least one suggestion is labeled as recommended"
|
|
41
|
+
- type: regex
|
|
42
|
+
value: "\\?"
|
|
43
|
+
description: "Ends with a question"
|
|
44
|
+
|
|
45
|
+
# Eval 2: turn3-deeper-research
|
|
46
|
+
- description: "turn 3 — deeper research on specific concerns, builds on prior, fresh citations, challenge"
|
|
47
|
+
vars:
|
|
48
|
+
input: "I like option 2 — the React Native approach with Firebase. But I'm worried about the matching algorithm complexity. Also we need to handle the cold-start problem when a new university joins."
|
|
49
|
+
assert:
|
|
50
|
+
- type: regex
|
|
51
|
+
value: "matching|algorithm|cold.start"
|
|
52
|
+
description: "Response addresses the specific concerns raised"
|
|
53
|
+
- type: regex
|
|
54
|
+
value: "https?://"
|
|
55
|
+
description: "Response includes at least one URL citation"
|
|
56
|
+
- type: regex
|
|
57
|
+
value: "\\?"
|
|
58
|
+
description: "Ends with a question"
|
|
59
|
+
|
|
60
|
+
# Eval 3: brain-dump-intake
|
|
61
|
+
- description: "brain dump intake — extracts structure, confirmation gate, no research yet"
|
|
62
|
+
vars:
|
|
63
|
+
input: "help me build all the suggestions in this chat: [brain dump about eng-buddy]"
|
|
64
|
+
assert:
|
|
65
|
+
- type: regex
|
|
66
|
+
value: "Decided|decided|Decision|decision"
|
|
67
|
+
description: "Contains a 'Decided' section"
|
|
68
|
+
- type: regex
|
|
69
|
+
value: "open question|Open question|gap|Gap"
|
|
70
|
+
description: "Contains an open questions or gaps section"
|
|
71
|
+
- type: regex
|
|
72
|
+
value: "\\?"
|
|
73
|
+
description: "Ends with a confirmation question"
|
|
74
|
+
- type: not-contains
|
|
75
|
+
value: "dispatch"
|
|
76
|
+
description: "Does not dispatch research agents on intake turn"
|
|
77
|
+
|
|
78
|
+
# Eval 4: brain-dump-turn2-research
|
|
79
|
+
- description: "brain dump turn 2 — novelty map, research citations, challenge, question"
|
|
80
|
+
vars:
|
|
81
|
+
input: "Yeah that looks right, go ahead and research it"
|
|
82
|
+
assert:
|
|
83
|
+
- type: regex
|
|
84
|
+
value: "Novelty|novelty|novel|Novel"
|
|
85
|
+
description: "Contains a Novelty Map"
|
|
86
|
+
- type: regex
|
|
87
|
+
value: "https?://"
|
|
88
|
+
description: "Response includes at least one URL citation"
|
|
89
|
+
- type: regex
|
|
90
|
+
value: "\\?"
|
|
91
|
+
description: "Ends with a question"
|
|
92
|
+
|
|
93
|
+
# Eval 5: phase3-gate
|
|
94
|
+
- description: "phase 3 gate — vision summary and approval before generating full plan"
|
|
95
|
+
vars:
|
|
96
|
+
input: "Ok I think I know what I want. Let's turn this into a plan."
|
|
97
|
+
assert:
|
|
98
|
+
- type: regex
|
|
99
|
+
value: "summary|Summary|we've landed|landed on|here's what"
|
|
100
|
+
description: "Presents a vision summary before generating the full plan"
|
|
101
|
+
- type: regex
|
|
102
|
+
value: "confirm|Confirm|proceed|Proceed|ready|Ready|go ahead|approve"
|
|
103
|
+
description: "Asks for confirmation before proceeding to full plan"
|
|
104
|
+
- type: not-contains
|
|
105
|
+
value: "Wave 1"
|
|
106
|
+
description: "Does not dump the full plan structure prematurely"
|
|
107
|
+
- type: not-contains
|
|
108
|
+
value: "Wave 2"
|
|
109
|
+
description: "Does not dump the full plan structure prematurely"
|