npm - triflux - Versions diffs - 9.8.2 → 10.0.0-alpha.1 - Mend

triflux 9.8.2 → 10.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (249) hide show

package/bin/triflux.mjs +5 -0
package/package.json +13 -46
package/skills/tfx-workspace/async-tests/run-tests.sh +203 -0
package/skills/tfx-workspace/evals/evals.json +79 -0
package/skills/tfx-workspace/iteration-1/benchmark.json +162 -0
package/skills/tfx-workspace/iteration-1/codex-gemini-remap/eval_metadata.json +11 -0
package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/grading.json +9 -0
package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/outputs/analysis.md +154 -0
package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/grading.json +9 -0
package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/outputs/analysis.md +126 -0
package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/doctor-diagnosis/eval_metadata.json +11 -0
package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/grading.json +9 -0
package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/outputs/analysis.md +119 -0
package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/grading.json +9 -0
package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/outputs/analysis.md +115 -0
package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/hub-start-sequence/eval_metadata.json +10 -0
package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/grading.json +8 -0
package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/outputs/analysis.md +86 -0
package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/grading.json +8 -0
package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/outputs/analysis.md +81 -0
package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/multi-team-creation/eval_metadata.json +12 -0
package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/grading.json +10 -0
package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/outputs/analysis.md +316 -0
package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/grading.json +10 -0
package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/outputs/analysis.md +352 -0
package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/review.html +1325 -0
package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/eval_metadata.json +12 -0
package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/grading.json +10 -0
package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/outputs/analysis.md +97 -0
package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/grading.json +10 -0
package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/outputs/analysis.md +94 -0
package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/eval_metadata.json +12 -0
package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/grading.json +10 -0
package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/outputs/analysis.md +209 -0
package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/grading.json +10 -0
package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/outputs/analysis.md +193 -0
package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-2/benchmark.json +62 -0
package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/eval_metadata.json +13 -0
package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/grading.json +11 -0
package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/outputs/analysis.md +382 -0
package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/grading.json +11 -0
package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/outputs/analysis.md +333 -0
package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/timing.json +5 -0
package/skills/tfx-workspace/iteration-2/review.html +1325 -0
package/skills/tfx-workspace/skill-snapshot/tfx-auto/SKILL.md +217 -0
package/skills/tfx-workspace/skill-snapshot/tfx-auto-codex/SKILL.md +77 -0
package/skills/tfx-workspace/skill-snapshot/tfx-codex/SKILL.md +65 -0
package/skills/tfx-workspace/skill-snapshot/tfx-doctor/SKILL.md +94 -0
package/skills/tfx-workspace/skill-snapshot/tfx-gemini/SKILL.md +82 -0
package/skills/tfx-workspace/skill-snapshot/tfx-hub/SKILL.md +133 -0
package/skills/tfx-workspace/skill-snapshot/tfx-multi/SKILL.md +426 -0
package/skills/tfx-workspace/skill-snapshot/tfx-setup/SKILL.md +101 -0
package/.claude-plugin/marketplace.json +0 -34
package/.claude-plugin/plugin.json +0 -22
package/hooks/agent-route-guard.mjs +0 -109
package/hooks/cross-review-tracker.mjs +0 -122
package/hooks/error-context.mjs +0 -148
package/hooks/hook-manager.mjs +0 -352
package/hooks/hook-orchestrator.mjs +0 -312
package/hooks/hook-registry.json +0 -213
package/hooks/hooks.json +0 -89
package/hooks/keyword-rules.json +0 -393
package/hooks/lib/resolve-root.mjs +0 -59
package/hooks/mcp-config-watcher.mjs +0 -85
package/hooks/pipeline-stop.mjs +0 -76
package/hooks/safety-guard.mjs +0 -106
package/hooks/subagent-verifier.mjs +0 -80
package/hub/assign-callbacks.mjs +0 -136
package/hub/bridge.mjs +0 -799
package/hub/delegator/contracts.mjs +0 -37
package/hub/delegator/index.mjs +0 -14
package/hub/delegator/schema/delegator-tools.schema.json +0 -250
package/hub/delegator/service.mjs +0 -307
package/hub/delegator/tool-definitions.mjs +0 -35
package/hub/fullcycle.mjs +0 -96
package/hub/hitl.mjs +0 -140
package/hub/intent.mjs +0 -198
package/hub/lib/process-utils.mjs +0 -360
package/hub/middleware/request-logger.mjs +0 -81
package/hub/paths.mjs +0 -30
package/hub/pipe.mjs +0 -582
package/hub/pipeline/gates/confidence.mjs +0 -56
package/hub/pipeline/gates/consensus.mjs +0 -94
package/hub/pipeline/gates/index.mjs +0 -5
package/hub/pipeline/gates/selfcheck.mjs +0 -82
package/hub/pipeline/index.mjs +0 -318
package/hub/pipeline/state.mjs +0 -191
package/hub/pipeline/transitions.mjs +0 -124
package/hub/public/dashboard.html +0 -355
package/hub/public/tray-icon.ico +0 -0
package/hub/public/tray-icon.png +0 -0
package/hub/quality/deslop.mjs +0 -253
package/hub/reflexion.mjs +0 -107
package/hub/research.mjs +0 -146
package/hub/router.mjs +0 -791
package/hub/routing/complexity.mjs +0 -166
package/hub/routing/index.mjs +0 -117
package/hub/routing/q-learning.mjs +0 -336
package/hub/schema.sql +0 -146
package/hub/server.mjs +0 -1000
package/hub/store.mjs +0 -807
package/hub/team/agent-map.json +0 -11
package/hub/team/ansi.mjs +0 -379
package/hub/team/backend.mjs +0 -92
package/hub/team/cli/commands/attach.mjs +0 -37
package/hub/team/cli/commands/control.mjs +0 -43
package/hub/team/cli/commands/debug.mjs +0 -74
package/hub/team/cli/commands/focus.mjs +0 -53
package/hub/team/cli/commands/interrupt.mjs +0 -36
package/hub/team/cli/commands/kill.mjs +0 -37
package/hub/team/cli/commands/list.mjs +0 -24
package/hub/team/cli/commands/send.mjs +0 -37
package/hub/team/cli/commands/start/index.mjs +0 -106
package/hub/team/cli/commands/start/parse-args.mjs +0 -130
package/hub/team/cli/commands/start/start-headless.mjs +0 -109
package/hub/team/cli/commands/start/start-in-process.mjs +0 -40
package/hub/team/cli/commands/start/start-mux.mjs +0 -73
package/hub/team/cli/commands/start/start-wt.mjs +0 -69
package/hub/team/cli/commands/status.mjs +0 -87
package/hub/team/cli/commands/stop.mjs +0 -31
package/hub/team/cli/commands/task.mjs +0 -30
package/hub/team/cli/commands/tasks.mjs +0 -13
package/hub/team/cli/help.mjs +0 -42
package/hub/team/cli/index.mjs +0 -41
package/hub/team/cli/manifest.mjs +0 -29
package/hub/team/cli/render.mjs +0 -30
package/hub/team/cli/services/attach-fallback.mjs +0 -54
package/hub/team/cli/services/hub-client.mjs +0 -208
package/hub/team/cli/services/member-selector.mjs +0 -30
package/hub/team/cli/services/native-control.mjs +0 -118
package/hub/team/cli/services/runtime-mode.mjs +0 -62
package/hub/team/cli/services/state-store.mjs +0 -48
package/hub/team/cli/services/task-model.mjs +0 -30
package/hub/team/codex-compat.mjs +0 -78
package/hub/team/dashboard-anchor.mjs +0 -14
package/hub/team/dashboard-layout.mjs +0 -33
package/hub/team/dashboard-open.mjs +0 -153
package/hub/team/dashboard.mjs +0 -274
package/hub/team/handoff.mjs +0 -303
package/hub/team/headless.mjs +0 -858
package/hub/team/native-supervisor.mjs +0 -392
package/hub/team/native.mjs +0 -649
package/hub/team/nativeProxy.mjs +0 -680
package/hub/team/orchestrator.mjs +0 -161
package/hub/team/pane.mjs +0 -154
package/hub/team/psmux.mjs +0 -1354
package/hub/team/routing.mjs +0 -223
package/hub/team/session.mjs +0 -611
package/hub/team/shared.mjs +0 -13
package/hub/team/staleState.mjs +0 -361
package/hub/team/tui-lite.mjs +0 -380
package/hub/team/tui-viewer.mjs +0 -463
package/hub/team/tui.mjs +0 -1245
package/hub/token-mode.mjs +0 -224
package/hub/tools.mjs +0 -554
package/hub/tray.mjs +0 -375
package/hub/workers/claude-worker.mjs +0 -423
package/hub/workers/codex-mcp.mjs +0 -410
package/hub/workers/delegator-mcp.mjs +0 -1076
package/hub/workers/factory.mjs +0 -21
package/hub/workers/gemini-worker.mjs +0 -429
package/hub/workers/interface.mjs +0 -40
package/hub/workers/worker-utils.mjs +0 -26
package/hud/colors.mjs +0 -88
package/hud/constants.mjs +0 -81
package/hud/hud-qos-status.mjs +0 -206
package/hud/providers/claude.mjs +0 -309
package/hud/providers/codex.mjs +0 -151
package/hud/providers/gemini.mjs +0 -320
package/hud/renderers.mjs +0 -424
package/hud/terminal.mjs +0 -140
package/hud/utils.mjs +0 -287
package/scripts/__tests__/keyword-detector.test.mjs +0 -234
package/scripts/__tests__/mcp-guard-engine.test.mjs +0 -118
package/scripts/__tests__/remote-spawn-transfer.test.mjs +0 -117
package/scripts/__tests__/remote-spawn.test.mjs +0 -92
package/scripts/__tests__/smoke.test.mjs +0 -34
package/scripts/cache-buildup.mjs +0 -30
package/scripts/cache-doctor.mjs +0 -149
package/scripts/cache-warmup.mjs +0 -557
package/scripts/claude-logged.ps1 +0 -54
package/scripts/cli-route.sh +0 -3
package/scripts/completions/tfx.bash +0 -47
package/scripts/completions/tfx.fish +0 -44
package/scripts/completions/tfx.zsh +0 -83
package/scripts/cross-review-gate.mjs +0 -126
package/scripts/cross-review-tracker.mjs +0 -238
package/scripts/demo-tui.mjs +0 -59
package/scripts/headless-guard-fast.sh +0 -21
package/scripts/headless-guard.mjs +0 -354
package/scripts/hub-ensure.mjs +0 -120
package/scripts/keyword-detector.mjs +0 -272
package/scripts/keyword-rules-expander.mjs +0 -521
package/scripts/lib/context.mjs +0 -67
package/scripts/lib/cross-review-utils.mjs +0 -51
package/scripts/lib/env-probe.mjs +0 -160
package/scripts/lib/gemini-profiles.mjs +0 -85
package/scripts/lib/hook-utils.mjs +0 -14
package/scripts/lib/keyword-rules.mjs +0 -166
package/scripts/lib/logger.mjs +0 -105
package/scripts/lib/mcp-filter.mjs +0 -739
package/scripts/lib/mcp-guard-engine.mjs +0 -940
package/scripts/lib/mcp-manifest.mjs +0 -79
package/scripts/lib/mcp-server-catalog.mjs +0 -118
package/scripts/lib/psmux-info.mjs +0 -119
package/scripts/lib/remote-spawn-transfer.mjs +0 -196
package/scripts/mcp-check.mjs +0 -237
package/scripts/mcp-cleanup.ps1 +0 -17
package/scripts/mcp-gateway-config.mjs +0 -207
package/scripts/mcp-gateway-ensure.mjs +0 -85
package/scripts/mcp-gateway-integration-test.mjs +0 -228
package/scripts/mcp-gateway-start.mjs +0 -226
package/scripts/mcp-gateway-start.ps1 +0 -141
package/scripts/mcp-gateway-verify.mjs +0 -77
package/scripts/mcp-safety-guard.mjs +0 -44
package/scripts/notion-read.mjs +0 -554
package/scripts/preflight-cache.mjs +0 -68
package/scripts/preinstall.mjs +0 -96
package/scripts/psmux-safety-guard.mjs +0 -64
package/scripts/remote-spawn.mjs +0 -1289
package/scripts/run.cjs +0 -79
package/scripts/session-spawn-helper.mjs +0 -185
package/scripts/setup.mjs +0 -1527
package/scripts/test-tfx-route-no-claude-native.mjs +0 -57
package/scripts/tfx-batch-stats.mjs +0 -96
package/scripts/tfx-gate-activate.mjs +0 -89
package/scripts/tfx-route-post.mjs +0 -505
package/scripts/tfx-route-worker.mjs +0 -223
package/scripts/tfx-route.sh +0 -1956
package/scripts/tmp-cleanup.mjs +0 -74
package/scripts/token-snapshot.mjs +0 -575
package/tui/codex-profile.mjs +0 -402
package/tui/core.mjs +0 -236
package/tui/doctor.mjs +0 -328
package/tui/gemini-profile.mjs +0 -254
package/tui/setup.mjs +0 -442

package/bin/triflux.mjs CHANGED Viewed

@@ -28,6 +28,7 @@ import {
   extractManagedHookFilename, getManagedRegistryHooks, ensureHooksInSettings,
   ensureCodexHubServerConfig,
 } from "../scripts/setup.mjs";
+import { cleanupTmpFiles } from "../scripts/tmp-cleanup.mjs";
 const PKG_ROOT = dirname(dirname(fileURLToPath(import.meta.url)));
 const CLAUDE_DIR = join(homedir(), ".claude");
@@ -3531,6 +3532,10 @@ async function main() {
   const cmd = NORMALIZED_ARGS[0] || "help";
   const cmdArgs = NORMALIZED_ARGS.slice(1);
+  cleanupTmpFiles({
+    protectPaths: [process.env.HOME, process.env.USERPROFILE],
+  }).catch(() => {});
   switch (cmd) {
     case "setup":
       cmdSetup({ dryRun: cmdArgs.includes("--dry-run") });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "triflux",
-  "version": "9.8.2",
+  "version": "10.0.0-alpha.1",
   "description": "CLI-first multi-model orchestrator for Claude Code — route tasks to Codex, Gemini, and Claude",
   "type": "module",
   "bin": {
@@ -13,57 +13,24 @@
     "tfx-doctor-tui": "bin/tfx-doctor-tui.mjs",
     "tfx-setup-tui": "bin/tfx-setup-tui.mjs"
   },
+  "engines": { "node": ">=18.0.0" },
+  "dependencies": {
+    "@triflux/core": "^10.0.0-alpha.1",
+    "@triflux/remote": "^10.0.0-alpha.1"
+  },
   "files": [
     "bin",
-    "tui",
-    "hub",
     "skills",
-    "!skills/tfx-workspace",
-    "!**/failure-reports",
-    "scripts",
-    "hooks",
-    "hud",
-    ".claude-plugin",
     "README.md",
-    "README.ko.md",
     "LICENSE"
   ],
-  "scripts": {
-    "setup": "node scripts/setup.mjs",
-    "preinstall": "node scripts/preinstall.mjs",
-    "postinstall": "node scripts/setup.mjs",
-    "test": "node --test --test-force-exit --test-concurrency=1 \"tests/**/*.test.mjs\" \"scripts/__tests__/**/*.test.mjs\"",
-    "test:unit": "node --test --test-force-exit --test-concurrency=1 tests/unit/**/*.test.mjs",
-    "test:integration": "node --test --test-force-exit --test-concurrency=1 tests/integration/**/*.test.mjs",
-    "test:route-smoke": "node --test scripts/test-tfx-route-no-claude-native.mjs"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/tellang/triflux.git"
-  },
-  "homepage": "https://github.com/tellang/triflux#readme",
+  "keywords": ["claude-code", "plugin", "codex", "gemini", "cli-routing", "orchestration", "multi-model", "triflux", "tfx"],
   "author": "tellang",
   "license": "MIT",
-  "dependencies": {
-    "@modelcontextprotocol/sdk": "^1.27.1",
-    "better-sqlite3": "^12.6.2",
-    "pino": "^10.3.1",
-    "pino-pretty": "^13.1.3",
-    "systray2": "^2.1.4",
-    "zod": "^4.0.0"
-  },
-  "keywords": [
-    "claude-code",
-    "plugin",
-    "codex",
-    "gemini",
-    "cli-routing",
-    "orchestration",
-    "multi-model",
-    "triflux",
-    "tfx"
-  ]
+  "homepage": "https://github.com/tellang/triflux#readme",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/tellang/triflux.git",
+    "directory": "packages/triflux"
+  }
 }

package/skills/tfx-workspace/async-tests/run-tests.sh ADDED Viewed

@@ -0,0 +1,203 @@
+#!/usr/bin/env bash
+# tfx-route.sh v2.5 async job system — 통합 테스트
+set -uo pipefail
+ROUTE="scripts/tfx-route.sh"
+PASS=0
+FAIL=0
+TOTAL=0
+assert_eq() {
+  local name="$1" expected="$2" actual="$3"
+  TOTAL=$((TOTAL + 1))
+  if [[ "$actual" == *"$expected"* ]]; then
+    echo "  ✓ $name"
+    PASS=$((PASS + 1))
+  else
+    echo "  ✗ $name — expected: '$expected', got: '$actual'"
+    FAIL=$((FAIL + 1))
+  fi
+}
+assert_neq() {
+  local name="$1" unexpected="$2" actual="$3"
+  TOTAL=$((TOTAL + 1))
+  if [[ "$actual" != *"$unexpected"* ]]; then
+    echo "  ✓ $name"
+    PASS=$((PASS + 1))
+  else
+    echo "  ✗ $name — should NOT contain: '$unexpected', got: '$actual'"
+    FAIL=$((FAIL + 1))
+  fi
+}
+assert_exit() {
+  local name="$1" expected="$2" actual="$3"
+  TOTAL=$((TOTAL + 1))
+  if [[ "$actual" -eq "$expected" ]]; then
+    echo "  ✓ $name"
+    PASS=$((PASS + 1))
+  else
+    echo "  ✗ $name — expected exit=$expected, got exit=$actual"
+    FAIL=$((FAIL + 1))
+  fi
+}
+echo "═══ tfx-route.sh v2.5 Async Job System Tests ═══"
+echo ""
+# ── Test 1: --async 기본 동작 ──
+echo "Test 1: --async 기본 시작 + job_id 반환"
+JOB_ID=$(bash "$ROUTE" --async executor "echo hello" none 30 2>/dev/null)
+EC=$?
+assert_exit "exit code 0" 0 "$EC"
+TOTAL=$((TOTAL + 1))
+if [[ -n "$JOB_ID" ]]; then echo "  ✓ job_id not empty ($JOB_ID)"; PASS=$((PASS + 1)); else echo "  ✗ job_id is empty"; FAIL=$((FAIL + 1)); fi
+assert_neq "job_id not error" "error" "$JOB_ID"
+echo ""
+# ── Test 2: --job-status running → done 전이 ──
+echo "Test 2: --job-status 상태 전이 (running → done)"
+LONG_JOB=$(bash "$ROUTE" --async executor "sleep 3 && echo done" none 60 2>/dev/null)
+STATUS_EARLY=$(bash "$ROUTE" --job-status "$LONG_JOB" 2>/dev/null)
+assert_eq "initial status: running" "running" "$STATUS_EARLY"
+# Codex 시작 ~10초 + sleep 3초 + 후처리 → 최대 25초 대기
+for i in $(seq 1 5); do
+  sleep 5
+  STATUS_LATE=$(bash "$ROUTE" --job-status "$LONG_JOB" 2>/dev/null)
+  [[ "$STATUS_LATE" == "done" ]] && break
+done
+assert_eq "final status: done" "done" "$STATUS_LATE"
+echo ""
+# ── Test 3: --job-status 존재하지 않는 job ──
+echo "Test 3: --job-status 존재하지 않는 job"
+RESULT=$(bash "$ROUTE" --job-status "nonexistent-12345" 2>/dev/null)
+EC=$?
+assert_eq "returns error" "error" "$RESULT"
+assert_exit "exit code 1" 1 "$EC"
+echo ""
+# ── Test 4: --job-result 완료된 job ──
+echo "Test 4: --job-result 완료된 job 결과 읽기"
+# Test 1의 JOB_ID 재사용 — Codex 완료 대기
+for i in $(seq 1 6); do
+  S=$(bash "$ROUTE" --job-status "$JOB_ID" 2>/dev/null)
+  [[ "$S" == "done" ]] && break
+  sleep 5
+done
+RESULT=$(bash "$ROUTE" --job-result "$JOB_ID" 2>/dev/null)
+EC=$?
+assert_exit "exit code 0" 0 "$EC"
+TOTAL=$((TOTAL + 1))
+if [[ -n "$RESULT" ]]; then echo "  ✓ result not empty (${#RESULT} bytes)"; PASS=$((PASS + 1)); else echo "  ✗ result is empty"; FAIL=$((FAIL + 1)); fi
+assert_neq "result not error" "error:" "$RESULT"
+echo ""
+# ── Test 5: --job-result 아직 실행 중인 job ──
+echo "Test 5: --job-result 실행 중인 job → 에러"
+RUNNING_JOB=$(bash "$ROUTE" --async executor "sleep 30" none 60 2>/dev/null)
+RESULT=$(bash "$ROUTE" --job-result "$RUNNING_JOB" 2>/dev/null)
+EC=$?
+assert_eq "returns error" "error: job still running" "$RESULT"
+assert_exit "exit code 1" 1 "$EC"
+# cleanup
+JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$RUNNING_JOB"
+[[ -f "$JOB_DIR/pid" ]] && kill "$(cat "$JOB_DIR/pid")" 2>/dev/null
+echo ""
+# ── Test 6: --job-wait 완료 감지 ──
+echo "Test 6: --job-wait 완료 감지"
+WAIT_JOB=$(bash "$ROUTE" --async executor "echo wait-test-ok" none 30 2>/dev/null)
+sleep 15  # codex 실행 대기
+WAIT_RESULT=$(bash "$ROUTE" --job-wait "$WAIT_JOB" 60 2>/dev/null)
+assert_eq "wait returns done" "done" "$WAIT_RESULT"
+echo ""
+# ── Test 7: --job-wait still_running (max_wait < 실행시간) ──
+echo "Test 7: --job-wait still_running (짧은 max_wait)"
+SLOW_JOB=$(bash "$ROUTE" --async executor "sleep 60" none 120 2>/dev/null)
+sleep 1
+WAIT_RESULT=$(bash "$ROUTE" --job-wait "$SLOW_JOB" 5 2>/dev/null)
+assert_eq "wait returns still_running" "still_running" "$WAIT_RESULT"
+# cleanup
+JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$SLOW_JOB"
+[[ -f "$JOB_DIR/pid" ]] && kill "$(cat "$JOB_DIR/pid")" 2>/dev/null
+echo ""
+# ── Test 8: exit code 전파 ──
+echo "Test 8: 실패한 job의 exit code 전파"
+FAIL_JOB=$(bash "$ROUTE" --async executor "exit 42" none 30 2>/dev/null)
+# Codex 완료 대기
+for i in $(seq 1 8); do
+  S=$(bash "$ROUTE" --job-status "$FAIL_JOB" 2>/dev/null)
+  [[ "$S" != *"running"* ]] && break
+  sleep 5
+done
+STATUS=$(bash "$ROUTE" --job-status "$FAIL_JOB" 2>/dev/null)
+# Codex가 exit 42를 감싸서 성공/실패 둘 다 가능 — "running이 아님"만 확인
+TOTAL=$((TOTAL + 1))
+if [[ "$STATUS" == "done" || "$STATUS" == *"failed"* || "$STATUS" == "timeout" ]]; then
+  echo "  ✓ status is terminal: $STATUS"; PASS=$((PASS + 1))
+else
+  echo "  ✗ status not terminal: $STATUS"; FAIL=$((FAIL + 1))
+fi
+# Codex는 exit 42를 감싸서 다른 코드로 반환할 수 있음 — 완료 자체만 확인
+TOTAL=$((TOTAL + 1))
+if [[ "$STATUS" != *"running"* ]]; then echo "  ✓ job completed (not stuck running)"; PASS=$((PASS + 1)); else echo "  ✗ job still running"; FAIL=$((FAIL + 1)); fi
+echo ""
+# ── Test 9: job 디렉토리 구조 검증 ──
+echo "Test 9: job 디렉토리 구조"
+STRUCT_JOB=$(bash "$ROUTE" --async executor "echo structure-test" none 30 2>/dev/null)
+JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$STRUCT_JOB"
+assert_eq "pid file exists" "true" "$([ -f "$JOB_DIR/pid" ] && echo true || echo false)"
+assert_eq "agent_type file exists" "true" "$([ -f "$JOB_DIR/agent_type" ] && echo true || echo false)"
+assert_eq "start_time file exists" "true" "$([ -f "$JOB_DIR/start_time" ] && echo true || echo false)"
+AGENT=$(cat "$JOB_DIR/agent_type" 2>/dev/null)
+assert_eq "agent_type == executor" "executor" "$AGENT"
+echo ""
+# ── Test 10: native.mjs 프롬프트 검증 ──
+echo "Test 10: native.mjs buildSlimWrapperPrompt async 키워드"
+PROMPT_CHECK=$(node -e "
+import('./hub/team/native.mjs').then(m => {
+  const p = m.buildSlimWrapperPrompt('codex', {
+    subtask: 'test task',
+    role: 'scientist',
+    teamName: 'test-team',
+    taskId: 'task-1',
+    agentName: 'codex-worker-1',
+  });
+  const checks = {
+    has_async: p.includes('--async'),
+    has_job_wait: p.includes('--job-wait'),
+    has_job_result: p.includes('--job-result'),
+    has_route_timeout: p.includes('auto 1800'),
+    no_old_bashTimeout: !p.includes('timeout: 1860000'),
+    has_launch_timeout: p.includes('timeout: 15000'),
+    has_wait_timeout: p.includes('timeout: 570000'),
+    has_result_timeout: p.includes('timeout: 30000'),
+  };
+  for (const [k, v] of Object.entries(checks)) {
+    console.log(k + '=' + v);
+  }
+});
+" 2>/dev/null)
+for line in $PROMPT_CHECK; do
+  key="${line%%=*}"
+  val="${line##*=}"
+  assert_eq "$key" "true" "$val"
+done
+echo ""
+# ── 결과 요약 ──
+echo "═══════════════════════════════════════════════════"
+echo "  Results: $PASS/$TOTAL passed, $FAIL failed"
+echo "═══════════════════════════════════════════════════"
+if [[ "$FAIL" -gt 0 ]]; then
+  exit 1
+fi
+exit 0

package/skills/tfx-workspace/evals/evals.json ADDED Viewed

@@ -0,0 +1,79 @@
+{
+  "skill_name": "tfx-skills-suite",
+  "evals": [
+    {
+      "id": 1,
+      "prompt": "You are a Claude Code agent. Read the tfx-auto skill definition, then explain how you would handle this user request: '/implement JWT 인증 미들웨어 추가해줘'. List the EXACT bash commands you would run. Do NOT actually execute them.",
+      "expected_output": "Should route to executor agent via tfx-route.sh with 'implement' MCP profile. Command: bash ~/.claude/scripts/tfx-route.sh executor 'JWT 인증 미들웨어 추가해줘' implement",
+      "files": [],
+      "expectations": [
+        "Routes to 'executor' agent (not architect, not analyst)",
+        "Uses 'implement' MCP profile",
+        "Generates correct tfx-route.sh command syntax",
+        "Does NOT trigger triage (single command shortcut)",
+        "Does NOT delegate to tfx-multi"
+      ]
+    },
+    {
+      "id": 2,
+      "prompt": "You are a Claude Code agent. Read the tfx-auto skill definition, then explain how you would handle: '/tfx-auto 프론트엔드 리팩터링하고 보안 리뷰도 해줘'. List all routing decisions, triage steps, and delegation.",
+      "expected_output": "Should enter auto triage mode, classify via Codex, decompose into 2+ subtasks, then delegate to tfx-multi Phase 3",
+      "files": [],
+      "expectations": [
+        "Identifies this as auto mode (not command shortcut)",
+        "Triggers Codex classification step",
+        "Decomposes into at least 2 subtasks",
+        "Notes delegation to tfx-multi for subtasks >= 2",
+        "Does NOT try to execute all subtasks directly"
+      ]
+    },
+    {
+      "id": 3,
+      "prompt": "You are a Claude Code agent. Read the tfx-multi skill definition, then explain step-by-step how you would handle: '/tfx-multi 인증 리팩터링 + UI 개선 + 보안 리뷰'. List all TeamCreate, TaskCreate, Agent calls with exact parameters.",
+      "expected_output": "Should create team, 3 TaskCreates, 3 Agent spawns with slim wrapper structure following Phase 0-5",
+      "files": [],
+      "expectations": [
+        "Creates exactly one TeamCreate with tfx- prefix naming",
+        "Creates 3 TaskCreate calls (one per subtask)",
+        "Spawns 3 Agent wrappers with mode: bypassPermissions",
+        "Uses tfx-route.sh inside Agent wrapper (not direct codex/gemini)",
+        "Includes Phase 5 cleanup (TeamDelete)"
+      ]
+    },
+    {
+      "id": 4,
+      "prompt": "You are a Claude Code agent. Read the tfx-doctor skill definition, then explain how you would handle: 'HUD가 안 보이고 codex도 안 되는데 어떻게 해?'. List exact commands and reasoning.",
+      "expected_output": "Should suggest running triflux doctor first, then triflux doctor --fix if issues found",
+      "files": [],
+      "expectations": [
+        "Runs 'triflux doctor' as first diagnostic step",
+        "Suggests '--fix' mode for auto-repair",
+        "Mentions HUD and CLI path checks in explanation",
+        "Does NOT jump straight to --reset (that's for cache only)"
+      ]
+    },
+    {
+      "id": 5,
+      "prompt": "You are a Claude Code agent. Read the tfx-hub skill definition, then explain how you would handle: '/tfx-hub start'. List exact commands.",
+      "expected_output": "Should run 'node hub/server.mjs' in background",
+      "files": [],
+      "expectations": [
+        "Runs 'node hub/server.mjs' with run_in_background=true",
+        "Mentions port 27888 and /mcp endpoint",
+        "Does NOT try to run any triage or routing"
+      ]
+    },
+    {
+      "id": 6,
+      "prompt": "You are a Claude Code agent. Read the tfx-codex skill definition, then explain the Gemini-to-Codex remapping. For '/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘', list the routing showing how designer/writer get remapped.",
+      "expected_output": "designer remapped to Codex(high), writer to Codex Spark(spark_fast), TFX_CLI_MODE=codex env var",
+      "files": [],
+      "expectations": [
+        "designer remapped to Codex with effort: high",
+        "writer remapped to Codex Spark with effort: spark_fast",
+        "Sets TFX_CLI_MODE=codex environment variable",
+        "Changes MCP profile: designer->implement, writer->analyze"
+      ]
+    }
+  ]
+}

package/skills/tfx-workspace/iteration-1/benchmark.json ADDED Viewed

@@ -0,0 +1,162 @@
+{
+  "metadata": {
+    "skill_name": "tfx-skills-suite",
+    "skill_path": "C:/Users/SSAFY/Desktop/Projects/cli/triflux/skills",
+    "executor_model": "claude-sonnet-4-6",
+    "analyzer_model": "claude-opus-4-6",
+    "timestamp": "2026-03-19T10:00:00Z",
+    "evals_run": [1, 2, 3, 4, 5, 6],
+    "runs_per_configuration": 1
+  },
+  "runs": [
+    {
+      "eval_id": 1, "eval_name": "routing-implement-shortcut", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 43.6, "tokens": 16303, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Routes to executor agent", "passed": true, "evidence": "Correctly mapped from implement shortcut table"},
+        {"text": "Uses implement MCP profile", "passed": true, "evidence": "Mapped from shortcut table"},
+        {"text": "Generates correct tfx-route.sh command", "passed": true, "evidence": "bash ~/.claude/scripts/tfx-route.sh executor '...' implement"},
+        {"text": "Does NOT trigger triage", "passed": true, "evidence": "Command shortcut skips triage"},
+        {"text": "Does NOT delegate to tfx-multi", "passed": true, "evidence": "No subtask decomposition occurred"}
+      ]
+    },
+    {
+      "eval_id": 1, "eval_name": "routing-implement-shortcut", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 48.1, "tokens": 16436, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Routes to executor agent", "passed": true, "evidence": "Correctly mapped"},
+        {"text": "Uses implement MCP profile", "passed": true, "evidence": "Assigned by shortcut table"},
+        {"text": "Generates correct tfx-route.sh command", "passed": true, "evidence": "Correct syntax generated"},
+        {"text": "Does NOT trigger triage", "passed": true, "evidence": "Shortcut mode skips triage"},
+        {"text": "Does NOT delegate to tfx-multi", "passed": true, "evidence": "No delegation"}
+      ]
+    },
+    {
+      "eval_id": 2, "eval_name": "routing-multi-task-triage", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 58.2, "tokens": 17584, "tool_calls": 3, "errors": 0},
+      "expectations": [
+        {"text": "Identifies as auto mode", "passed": true, "evidence": "No shortcut match, auto mode selected"},
+        {"text": "Triggers Codex classification", "passed": true, "evidence": "Codex --full-auto classification triggered"},
+        {"text": "Decomposes into 2+ subtasks", "passed": true, "evidence": "2 subtasks: executor + security-reviewer"},
+        {"text": "Notes tfx-multi delegation", "passed": true, "evidence": "subtasks.length >= 2 triggers tfx-multi Phase 3"},
+        {"text": "Does NOT execute directly", "passed": true, "evidence": "Delegates to tfx-multi"}
+      ]
+    },
+    {
+      "eval_id": 2, "eval_name": "routing-multi-task-triage", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 77.2, "tokens": 18626, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Identifies as auto mode", "passed": true, "evidence": "Auto mode selected"},
+        {"text": "Triggers Codex classification", "passed": true, "evidence": "Codex --full-auto triggered"},
+        {"text": "Decomposes into 2+ subtasks", "passed": true, "evidence": "2 subtasks decomposed"},
+        {"text": "Notes tfx-multi delegation", "passed": true, "evidence": "Hands off to tfx-multi Phase 3"},
+        {"text": "Does NOT execute directly", "passed": true, "evidence": "Delegates correctly"}
+      ]
+    },
+    {
+      "eval_id": 3, "eval_name": "multi-team-creation", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 115.3, "tokens": 27197, "tool_calls": 3, "errors": 0},
+      "expectations": [
+        {"text": "Creates TeamCreate with tfx- prefix", "passed": true, "evidence": "TeamCreate({ team_name: 'tfx-<base36>' })"},
+        {"text": "Creates 3 TaskCreate calls", "passed": true, "evidence": "3x TaskCreate with metadata"},
+        {"text": "Spawns 3 Agent wrappers with bypassPermissions", "passed": true, "evidence": "3x Agent({ mode: bypassPermissions })"},
+        {"text": "Uses tfx-route.sh inside wrappers", "passed": true, "evidence": "Direct codex/gemini calls prohibited"},
+        {"text": "Includes Phase 5 TeamDelete", "passed": true, "evidence": "TeamDelete always runs, max 30s wait"}
+      ]
+    },
+    {
+      "eval_id": 3, "eval_name": "multi-team-creation", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 100.6, "tokens": 26140, "tool_calls": 3, "errors": 0},
+      "expectations": [
+        {"text": "Creates TeamCreate with tfx- prefix", "passed": true, "evidence": "TeamCreate with tfx-<id>"},
+        {"text": "Creates 3 TaskCreate calls", "passed": true, "evidence": "Three TaskCreate calls"},
+        {"text": "Spawns 3 Agent wrappers with bypassPermissions", "passed": true, "evidence": "mode: bypassPermissions in all 3"},
+        {"text": "Uses tfx-route.sh inside wrappers", "passed": true, "evidence": "Never direct codex/gemini calls"},
+        {"text": "Includes Phase 5 TeamDelete", "passed": true, "evidence": "TeamDelete unconditionally"}
+      ]
+    },
+    {
+      "eval_id": 4, "eval_name": "doctor-diagnosis", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 53.8, "tokens": 14499, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Runs triflux doctor first", "passed": true, "evidence": "Bash(\"triflux doctor\")"},
+        {"text": "Suggests --fix mode", "passed": true, "evidence": "Suggests after diagnosis report"},
+        {"text": "Mentions HUD and CLI checks", "passed": true, "evidence": "HUD and CLI paths checked"},
+        {"text": "Does NOT jump to --reset", "passed": true, "evidence": "--reset reserved for explicit request"}
+      ]
+    },
+    {
+      "eval_id": 4, "eval_name": "doctor-diagnosis", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 48.3, "tokens": 14482, "tool_calls": 3, "errors": 0},
+      "expectations": [
+        {"text": "Runs triflux doctor first", "passed": true, "evidence": "Bash(\"triflux doctor\")"},
+        {"text": "Suggests --fix mode", "passed": true, "evidence": "Offers --fix after diagnosis"},
+        {"text": "Mentions HUD and CLI checks", "passed": true, "evidence": "All 8 diagnostics listed"},
+        {"text": "Does NOT jump to --reset", "passed": true, "evidence": "--reset reserved for explicit request"}
+      ]
+    },
+    {
+      "eval_id": 5, "eval_name": "hub-start-sequence", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 3, "failed": 0, "total": 3, "time_seconds": 47.2, "tokens": 14821, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Runs node hub/server.mjs in background", "passed": true, "evidence": "Bash(\"node hub/server.mjs\", run_in_background=true)"},
+        {"text": "Mentions port 27888 and /mcp", "passed": true, "evidence": "Port 27888, http://127.0.0.1:27888/mcp"},
+        {"text": "No triage or routing attempted", "passed": true, "evidence": "Command match, not fallthrough"}
+      ]
+    },
+    {
+      "eval_id": 5, "eval_name": "hub-start-sequence", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 3, "failed": 0, "total": 3, "time_seconds": 51.8, "tokens": 14904, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Runs node hub/server.mjs in background", "passed": true, "evidence": "Bash(\"node hub/server.mjs\", run_in_background=true)"},
+        {"text": "Mentions port 27888 and /mcp", "passed": true, "evidence": "Port 27888, endpoint /mcp"},
+        {"text": "No triage or routing attempted", "passed": true, "evidence": "Command match, not fallthrough"}
+      ]
+    },
+    {
+      "eval_id": 6, "eval_name": "codex-gemini-remap", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 69.7, "tokens": 14889, "tool_calls": 5, "errors": 0},
+      "expectations": [
+        {"text": "designer remapped to Codex (effort: high)", "passed": true, "evidence": "designer → Codex (effort: high)"},
+        {"text": "writer remapped to Codex Spark (spark_fast)", "passed": true, "evidence": "writer → Codex Spark (effort: spark_fast)"},
+        {"text": "TFX_CLI_MODE=codex set", "passed": true, "evidence": "Set for every Phase 3 call"},
+        {"text": "MCP profiles changed", "passed": true, "evidence": "designer→implement, writer→analyze"}
+      ]
+    },
+    {
+      "eval_id": 6, "eval_name": "codex-gemini-remap", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 85.2, "tokens": 19802, "tool_calls": 7, "errors": 0},
+      "expectations": [
+        {"text": "designer remapped to Codex (effort: high)", "passed": true, "evidence": "designer → Codex (effort: high)"},
+        {"text": "writer remapped to Codex Spark (spark_fast)", "passed": true, "evidence": "writer → Codex Spark (effort: spark_fast)"},
+        {"text": "TFX_CLI_MODE=codex set", "passed": true, "evidence": "TFX_CLI_MODE set to codex"},
+        {"text": "MCP profiles changed", "passed": true, "evidence": "writer→analyze, designer→implement"}
+      ]
+    }
+  ],
+  "run_summary": {
+    "with_skill": {
+      "pass_rate": {"mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0},
+      "time_seconds": {"mean": 64.6, "stddev": 26.4, "min": 43.6, "max": 115.3},
+      "tokens": {"mean": 17549, "stddev": 4857, "min": 14499, "max": 27197}
+    },
+    "without_skill": {
+      "pass_rate": {"mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0},
+      "time_seconds": {"mean": 68.5, "stddev": 20.4, "min": 48.1, "max": 100.6},
+      "tokens": {"mean": 18398, "stddev": 4227, "min": 14482, "max": 26140}
+    },
+    "delta": {
+      "pass_rate": "+0.00",
+      "time_seconds": "-3.9",
+      "tokens": "-849"
+    }
+  },
+  "notes": [
+    "All 26 assertions pass at 100% for both configurations — the skills are functionally correct",
+    "The fixes applied (dead reference removal, Phase numbering consistency, hub description) don't change routing logic, so pass rates are identical",
+    "NEW version is marginally faster (-3.9s avg) and uses fewer tokens (-849 avg), likely due to cleaner references reducing model confusion",
+    "tfx-multi is the most complex skill (115s / 27K tokens with_skill) — consider extracting reference docs to reduce context load",
+    "tfx-codex OLD references 'Phase(1~6)' which doesn't exist in tfx-auto — the NEW version correctly references the actual workflow names",
+    "All assertions pass regardless of configuration — these test the core routing logic which is unchanged. Consider adding assertions that specifically test the fixed issues (dead refs, phase naming) for differentiation"
+  ]
+}

package/skills/tfx-workspace/iteration-1/codex-gemini-remap/eval_metadata.json ADDED Viewed

@@ -0,0 +1,11 @@
+{
+  "eval_id": 6,
+  "eval_name": "codex-gemini-remap",
+  "prompt": "/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘",
+  "assertions": [
+    "designer remapped to Codex with effort: high",
+    "writer remapped to Codex Spark with effort: spark_fast",
+    "Sets TFX_CLI_MODE=codex environment variable",
+    "Changes MCP profile: designer->implement, writer->analyze"
+  ]
+}

package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/grading.json ADDED Viewed

@@ -0,0 +1,9 @@
+{
+  "expectations": [
+    {"text": "designer remapped to Codex with effort: high", "passed": true, "evidence": "Agent output: designer → Codex (effort: high)"},
+    {"text": "writer remapped to Codex Spark with effort: spark_fast", "passed": true, "evidence": "Agent output: writer → Codex Spark (effort: spark_fast)"},
+    {"text": "Sets TFX_CLI_MODE=codex environment variable", "passed": true, "evidence": "Agent output: 'TFX_CLI_MODE: Set to codex'"},
+    {"text": "Changes MCP profile: designer->implement, writer->analyze", "passed": true, "evidence": "Agent output: writer→analyze, designer→implement"}
+  ],
+  "summary": {"passed": 4, "failed": 0, "total": 4, "pass_rate": 1.0}
+}