triflux 9.8.3 → 10.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. package/package.json +13 -52
  2. package/skills/tfx-workspace/async-tests/run-tests.sh +203 -0
  3. package/skills/tfx-workspace/evals/evals.json +79 -0
  4. package/skills/tfx-workspace/iteration-1/benchmark.json +162 -0
  5. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/eval_metadata.json +11 -0
  6. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/grading.json +9 -0
  7. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/outputs/analysis.md +154 -0
  8. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/timing.json +5 -0
  9. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/grading.json +9 -0
  10. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/outputs/analysis.md +126 -0
  11. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/timing.json +5 -0
  12. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/eval_metadata.json +11 -0
  13. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/grading.json +9 -0
  14. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/outputs/analysis.md +119 -0
  15. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/timing.json +5 -0
  16. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/grading.json +9 -0
  17. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/outputs/analysis.md +115 -0
  18. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/timing.json +5 -0
  19. package/skills/tfx-workspace/iteration-1/hub-start-sequence/eval_metadata.json +10 -0
  20. package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/grading.json +8 -0
  21. package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/outputs/analysis.md +86 -0
  22. package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/timing.json +5 -0
  23. package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/grading.json +8 -0
  24. package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/outputs/analysis.md +81 -0
  25. package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/timing.json +5 -0
  26. package/skills/tfx-workspace/iteration-1/multi-team-creation/eval_metadata.json +12 -0
  27. package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/grading.json +10 -0
  28. package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/outputs/analysis.md +316 -0
  29. package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/timing.json +5 -0
  30. package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/grading.json +10 -0
  31. package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/outputs/analysis.md +352 -0
  32. package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/timing.json +5 -0
  33. package/skills/tfx-workspace/iteration-1/review.html +1325 -0
  34. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/eval_metadata.json +12 -0
  35. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/grading.json +10 -0
  36. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/outputs/analysis.md +97 -0
  37. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/timing.json +5 -0
  38. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/grading.json +10 -0
  39. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/outputs/analysis.md +94 -0
  40. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/timing.json +5 -0
  41. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/eval_metadata.json +12 -0
  42. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/grading.json +10 -0
  43. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/outputs/analysis.md +209 -0
  44. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/timing.json +5 -0
  45. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/grading.json +10 -0
  46. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/outputs/analysis.md +193 -0
  47. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/timing.json +5 -0
  48. package/skills/tfx-workspace/iteration-2/benchmark.json +62 -0
  49. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/eval_metadata.json +13 -0
  50. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/grading.json +11 -0
  51. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/outputs/analysis.md +382 -0
  52. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/timing.json +5 -0
  53. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/grading.json +11 -0
  54. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/outputs/analysis.md +333 -0
  55. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/timing.json +5 -0
  56. package/skills/tfx-workspace/iteration-2/review.html +1325 -0
  57. package/skills/tfx-workspace/skill-snapshot/tfx-auto/SKILL.md +217 -0
  58. package/skills/tfx-workspace/skill-snapshot/tfx-auto-codex/SKILL.md +77 -0
  59. package/skills/tfx-workspace/skill-snapshot/tfx-codex/SKILL.md +65 -0
  60. package/skills/tfx-workspace/skill-snapshot/tfx-doctor/SKILL.md +94 -0
  61. package/skills/tfx-workspace/skill-snapshot/tfx-gemini/SKILL.md +82 -0
  62. package/skills/tfx-workspace/skill-snapshot/tfx-hub/SKILL.md +133 -0
  63. package/skills/tfx-workspace/skill-snapshot/tfx-multi/SKILL.md +426 -0
  64. package/skills/tfx-workspace/skill-snapshot/tfx-setup/SKILL.md +101 -0
  65. package/.claude-plugin/marketplace.json +0 -34
  66. package/.claude-plugin/plugin.json +0 -22
  67. package/hooks/agent-route-guard.mjs +0 -109
  68. package/hooks/cross-review-tracker.mjs +0 -122
  69. package/hooks/error-context.mjs +0 -148
  70. package/hooks/hook-manager.mjs +0 -352
  71. package/hooks/hook-orchestrator.mjs +0 -312
  72. package/hooks/hook-registry.json +0 -213
  73. package/hooks/hooks.json +0 -89
  74. package/hooks/keyword-rules.json +0 -393
  75. package/hooks/lib/resolve-root.mjs +0 -59
  76. package/hooks/mcp-config-watcher.mjs +0 -85
  77. package/hooks/pipeline-stop.mjs +0 -76
  78. package/hooks/safety-guard.mjs +0 -106
  79. package/hooks/subagent-verifier.mjs +0 -80
  80. package/hub/assign-callbacks.mjs +0 -133
  81. package/hub/bridge.mjs +0 -799
  82. package/hub/delegator/contracts.mjs +0 -37
  83. package/hub/delegator/index.mjs +0 -14
  84. package/hub/delegator/schema/delegator-tools.schema.json +0 -250
  85. package/hub/delegator/service.mjs +0 -307
  86. package/hub/delegator/tool-definitions.mjs +0 -35
  87. package/hub/fullcycle.mjs +0 -96
  88. package/hub/hitl.mjs +0 -143
  89. package/hub/intent.mjs +0 -193
  90. package/hub/lib/process-utils.mjs +0 -361
  91. package/hub/middleware/request-logger.mjs +0 -81
  92. package/hub/paths.mjs +0 -30
  93. package/hub/pipe.mjs +0 -579
  94. package/hub/pipeline/gates/confidence.mjs +0 -56
  95. package/hub/pipeline/gates/consensus.mjs +0 -94
  96. package/hub/pipeline/gates/index.mjs +0 -5
  97. package/hub/pipeline/gates/selfcheck.mjs +0 -82
  98. package/hub/pipeline/index.mjs +0 -318
  99. package/hub/pipeline/state.mjs +0 -191
  100. package/hub/pipeline/transitions.mjs +0 -124
  101. package/hub/platform.mjs +0 -186
  102. package/hub/public/dashboard.html +0 -355
  103. package/hub/public/tray-icon.ico +0 -0
  104. package/hub/public/tray-icon.png +0 -0
  105. package/hub/quality/deslop.mjs +0 -253
  106. package/hub/reflexion.mjs +0 -107
  107. package/hub/research.mjs +0 -146
  108. package/hub/router.mjs +0 -791
  109. package/hub/routing/complexity.mjs +0 -166
  110. package/hub/routing/index.mjs +0 -117
  111. package/hub/routing/q-learning.mjs +0 -336
  112. package/hub/schema.sql +0 -146
  113. package/hub/server.mjs +0 -1112
  114. package/hub/state.mjs +0 -245
  115. package/hub/store-adapter.mjs +0 -614
  116. package/hub/store.mjs +0 -820
  117. package/hub/team/agent-map.json +0 -11
  118. package/hub/team/ansi.mjs +0 -379
  119. package/hub/team/backend.mjs +0 -92
  120. package/hub/team/cli/commands/attach.mjs +0 -37
  121. package/hub/team/cli/commands/control.mjs +0 -43
  122. package/hub/team/cli/commands/debug.mjs +0 -74
  123. package/hub/team/cli/commands/focus.mjs +0 -53
  124. package/hub/team/cli/commands/interrupt.mjs +0 -36
  125. package/hub/team/cli/commands/kill.mjs +0 -37
  126. package/hub/team/cli/commands/list.mjs +0 -24
  127. package/hub/team/cli/commands/send.mjs +0 -37
  128. package/hub/team/cli/commands/start/index.mjs +0 -106
  129. package/hub/team/cli/commands/start/parse-args.mjs +0 -130
  130. package/hub/team/cli/commands/start/start-headless.mjs +0 -109
  131. package/hub/team/cli/commands/start/start-in-process.mjs +0 -40
  132. package/hub/team/cli/commands/start/start-mux.mjs +0 -73
  133. package/hub/team/cli/commands/start/start-wt.mjs +0 -69
  134. package/hub/team/cli/commands/status.mjs +0 -87
  135. package/hub/team/cli/commands/stop.mjs +0 -31
  136. package/hub/team/cli/commands/task.mjs +0 -30
  137. package/hub/team/cli/commands/tasks.mjs +0 -13
  138. package/hub/team/cli/help.mjs +0 -42
  139. package/hub/team/cli/index.mjs +0 -41
  140. package/hub/team/cli/manifest.mjs +0 -29
  141. package/hub/team/cli/render.mjs +0 -30
  142. package/hub/team/cli/services/attach-fallback.mjs +0 -54
  143. package/hub/team/cli/services/hub-client.mjs +0 -208
  144. package/hub/team/cli/services/member-selector.mjs +0 -30
  145. package/hub/team/cli/services/native-control.mjs +0 -118
  146. package/hub/team/cli/services/runtime-mode.mjs +0 -62
  147. package/hub/team/cli/services/state-store.mjs +0 -48
  148. package/hub/team/cli/services/task-model.mjs +0 -30
  149. package/hub/team/codex-compat.mjs +0 -78
  150. package/hub/team/dashboard-anchor.mjs +0 -14
  151. package/hub/team/dashboard-layout.mjs +0 -33
  152. package/hub/team/dashboard-open.mjs +0 -153
  153. package/hub/team/dashboard.mjs +0 -274
  154. package/hub/team/handoff.mjs +0 -303
  155. package/hub/team/headless.mjs +0 -1090
  156. package/hub/team/native-supervisor.mjs +0 -392
  157. package/hub/team/native.mjs +0 -649
  158. package/hub/team/nativeProxy.mjs +0 -681
  159. package/hub/team/orchestrator.mjs +0 -161
  160. package/hub/team/pane.mjs +0 -154
  161. package/hub/team/psmux.mjs +0 -1354
  162. package/hub/team/routing.mjs +0 -223
  163. package/hub/team/session.mjs +0 -611
  164. package/hub/team/shared.mjs +0 -13
  165. package/hub/team/staleState.mjs +0 -361
  166. package/hub/team/tui-lite.mjs +0 -380
  167. package/hub/team/tui-viewer.mjs +0 -463
  168. package/hub/team/tui.mjs +0 -1245
  169. package/hub/token-mode.mjs +0 -224
  170. package/hub/tools.mjs +0 -554
  171. package/hub/tray.mjs +0 -376
  172. package/hub/workers/claude-worker.mjs +0 -475
  173. package/hub/workers/codex-mcp.mjs +0 -504
  174. package/hub/workers/delegator-mcp.mjs +0 -1076
  175. package/hub/workers/factory.mjs +0 -21
  176. package/hub/workers/gemini-worker.mjs +0 -373
  177. package/hub/workers/interface.mjs +0 -52
  178. package/hub/workers/worker-utils.mjs +0 -104
  179. package/hud/colors.mjs +0 -88
  180. package/hud/constants.mjs +0 -81
  181. package/hud/hud-qos-status.mjs +0 -206
  182. package/hud/providers/claude.mjs +0 -309
  183. package/hud/providers/codex.mjs +0 -151
  184. package/hud/providers/gemini.mjs +0 -320
  185. package/hud/renderers.mjs +0 -424
  186. package/hud/terminal.mjs +0 -140
  187. package/hud/utils.mjs +0 -287
  188. package/scripts/__tests__/keyword-detector.test.mjs +0 -234
  189. package/scripts/__tests__/mcp-guard-engine.test.mjs +0 -118
  190. package/scripts/__tests__/remote-spawn-transfer.test.mjs +0 -117
  191. package/scripts/__tests__/remote-spawn.test.mjs +0 -92
  192. package/scripts/__tests__/smoke.test.mjs +0 -34
  193. package/scripts/cache-buildup.mjs +0 -30
  194. package/scripts/cache-doctor.mjs +0 -149
  195. package/scripts/cache-warmup.mjs +0 -557
  196. package/scripts/claude-logged.ps1 +0 -54
  197. package/scripts/cli-route.sh +0 -3
  198. package/scripts/completions/tfx.bash +0 -47
  199. package/scripts/completions/tfx.fish +0 -44
  200. package/scripts/completions/tfx.zsh +0 -83
  201. package/scripts/cross-review-gate.mjs +0 -126
  202. package/scripts/cross-review-tracker.mjs +0 -238
  203. package/scripts/demo-tui.mjs +0 -59
  204. package/scripts/headless-guard-fast.sh +0 -21
  205. package/scripts/headless-guard.mjs +0 -360
  206. package/scripts/hub-ensure.mjs +0 -120
  207. package/scripts/keyword-detector.mjs +0 -272
  208. package/scripts/keyword-rules-expander.mjs +0 -521
  209. package/scripts/lib/context.mjs +0 -67
  210. package/scripts/lib/cross-review-utils.mjs +0 -51
  211. package/scripts/lib/env-probe.mjs +0 -160
  212. package/scripts/lib/gemini-profiles.mjs +0 -85
  213. package/scripts/lib/hook-utils.mjs +0 -14
  214. package/scripts/lib/keyword-rules.mjs +0 -166
  215. package/scripts/lib/logger.mjs +0 -105
  216. package/scripts/lib/mcp-filter.mjs +0 -739
  217. package/scripts/lib/mcp-guard-engine.mjs +0 -940
  218. package/scripts/lib/mcp-manifest.mjs +0 -79
  219. package/scripts/lib/mcp-server-catalog.mjs +0 -118
  220. package/scripts/lib/psmux-info.mjs +0 -119
  221. package/scripts/lib/remote-spawn-transfer.mjs +0 -196
  222. package/scripts/mcp-check.mjs +0 -237
  223. package/scripts/mcp-cleanup.ps1 +0 -17
  224. package/scripts/mcp-gateway-config.mjs +0 -207
  225. package/scripts/mcp-gateway-ensure.mjs +0 -85
  226. package/scripts/mcp-gateway-integration-test.mjs +0 -228
  227. package/scripts/mcp-gateway-start.mjs +0 -226
  228. package/scripts/mcp-gateway-start.ps1 +0 -141
  229. package/scripts/mcp-gateway-verify.mjs +0 -77
  230. package/scripts/mcp-safety-guard.mjs +0 -44
  231. package/scripts/notion-read.mjs +0 -554
  232. package/scripts/preflight-cache.mjs +0 -68
  233. package/scripts/preinstall.mjs +0 -96
  234. package/scripts/psmux-safety-guard.mjs +0 -64
  235. package/scripts/remote-spawn.mjs +0 -1289
  236. package/scripts/run.cjs +0 -79
  237. package/scripts/session-spawn-helper.mjs +0 -185
  238. package/scripts/setup.mjs +0 -838
  239. package/scripts/test-tfx-route-no-claude-native.mjs +0 -57
  240. package/scripts/tfx-batch-stats.mjs +0 -96
  241. package/scripts/tfx-gate-activate.mjs +0 -89
  242. package/scripts/tfx-route-post.mjs +0 -505
  243. package/scripts/tfx-route-worker.mjs +0 -223
  244. package/scripts/tfx-route.sh +0 -1956
  245. package/scripts/tmp-cleanup.mjs +0 -103
  246. package/scripts/token-snapshot.mjs +0 -575
  247. package/tui/codex-profile.mjs +0 -402
  248. package/tui/core.mjs +0 -236
  249. package/tui/doctor.mjs +0 -328
  250. package/tui/gemini-profile.mjs +0 -254
  251. package/tui/setup.mjs +0 -442
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "triflux",
3
- "version": "9.8.3",
3
+ "version": "10.0.0-alpha.1",
4
4
  "description": "CLI-first multi-model orchestrator for Claude Code — route tasks to Codex, Gemini, and Claude",
5
5
  "type": "module",
6
6
  "bin": {
@@ -13,63 +13,24 @@
13
13
  "tfx-doctor-tui": "bin/tfx-doctor-tui.mjs",
14
14
  "tfx-setup-tui": "bin/tfx-setup-tui.mjs"
15
15
  },
16
+ "engines": { "node": ">=18.0.0" },
17
+ "dependencies": {
18
+ "@triflux/core": "^10.0.0-alpha.1",
19
+ "@triflux/remote": "^10.0.0-alpha.1"
20
+ },
16
21
  "files": [
17
22
  "bin",
18
- "tui",
19
- "hub",
20
23
  "skills",
21
- "!skills/tfx-workspace",
22
- "!**/failure-reports",
23
- "scripts",
24
- "hooks",
25
- "hud",
26
- ".claude-plugin",
27
24
  "README.md",
28
- "README.ko.md",
29
25
  "LICENSE"
30
26
  ],
31
- "scripts": {
32
- "setup": "node scripts/setup.mjs",
33
- "preinstall": "node scripts/preinstall.mjs",
34
- "postinstall": "node scripts/setup.mjs",
35
- "lint": "biome check .",
36
- "lint:fix": "biome check --fix .",
37
- "health": "npm test && npm run lint",
38
- "test": "node --test --test-force-exit --test-concurrency=1 \"tests/**/*.test.mjs\" \"scripts/__tests__/**/*.test.mjs\"",
39
- "test:unit": "node --test --test-force-exit --test-concurrency=1 tests/unit/**/*.test.mjs",
40
- "test:integration": "node --test --test-force-exit --test-concurrency=1 tests/integration/**/*.test.mjs",
41
- "test:route-smoke": "node --test scripts/test-tfx-route-no-claude-native.mjs"
42
- },
43
- "engines": {
44
- "node": ">=18.0.0"
45
- },
46
- "repository": {
47
- "type": "git",
48
- "url": "git+https://github.com/tellang/triflux.git"
49
- },
50
- "homepage": "https://github.com/tellang/triflux#readme",
27
+ "keywords": ["claude-code", "plugin", "codex", "gemini", "cli-routing", "orchestration", "multi-model", "triflux", "tfx"],
51
28
  "author": "tellang",
52
29
  "license": "MIT",
53
- "dependencies": {
54
- "@modelcontextprotocol/sdk": "^1.27.1",
55
- "better-sqlite3": "^12.6.2",
56
- "pino": "^10.3.1",
57
- "pino-pretty": "^13.1.3",
58
- "systray2": "^2.1.4",
59
- "zod": "^4.0.0"
60
- },
61
- "devDependencies": {
62
- "@biomejs/biome": "^2.0.0"
63
- },
64
- "keywords": [
65
- "claude-code",
66
- "plugin",
67
- "codex",
68
- "gemini",
69
- "cli-routing",
70
- "orchestration",
71
- "multi-model",
72
- "triflux",
73
- "tfx"
74
- ]
30
+ "homepage": "https://github.com/tellang/triflux#readme",
31
+ "repository": {
32
+ "type": "git",
33
+ "url": "git+https://github.com/tellang/triflux.git",
34
+ "directory": "packages/triflux"
35
+ }
75
36
  }
@@ -0,0 +1,203 @@
1
+ #!/usr/bin/env bash
2
+ # tfx-route.sh v2.5 async job system — 통합 테스트
3
+ set -uo pipefail
4
+
5
+ ROUTE="scripts/tfx-route.sh"
6
+ PASS=0
7
+ FAIL=0
8
+ TOTAL=0
9
+
10
+ assert_eq() {
11
+ local name="$1" expected="$2" actual="$3"
12
+ TOTAL=$((TOTAL + 1))
13
+ if [[ "$actual" == *"$expected"* ]]; then
14
+ echo " ✓ $name"
15
+ PASS=$((PASS + 1))
16
+ else
17
+ echo " ✗ $name — expected: '$expected', got: '$actual'"
18
+ FAIL=$((FAIL + 1))
19
+ fi
20
+ }
21
+
22
+ assert_neq() {
23
+ local name="$1" unexpected="$2" actual="$3"
24
+ TOTAL=$((TOTAL + 1))
25
+ if [[ "$actual" != *"$unexpected"* ]]; then
26
+ echo " ✓ $name"
27
+ PASS=$((PASS + 1))
28
+ else
29
+ echo " ✗ $name — should NOT contain: '$unexpected', got: '$actual'"
30
+ FAIL=$((FAIL + 1))
31
+ fi
32
+ }
33
+
34
+ assert_exit() {
35
+ local name="$1" expected="$2" actual="$3"
36
+ TOTAL=$((TOTAL + 1))
37
+ if [[ "$actual" -eq "$expected" ]]; then
38
+ echo " ✓ $name"
39
+ PASS=$((PASS + 1))
40
+ else
41
+ echo " ✗ $name — expected exit=$expected, got exit=$actual"
42
+ FAIL=$((FAIL + 1))
43
+ fi
44
+ }
45
+
46
+ echo "═══ tfx-route.sh v2.5 Async Job System Tests ═══"
47
+ echo ""
48
+
49
+ # ── Test 1: --async 기본 동작 ──
50
+ echo "Test 1: --async 기본 시작 + job_id 반환"
51
+ JOB_ID=$(bash "$ROUTE" --async executor "echo hello" none 30 2>/dev/null)
52
+ EC=$?
53
+ assert_exit "exit code 0" 0 "$EC"
54
+ TOTAL=$((TOTAL + 1))
55
+ if [[ -n "$JOB_ID" ]]; then echo " ✓ job_id not empty ($JOB_ID)"; PASS=$((PASS + 1)); else echo " ✗ job_id is empty"; FAIL=$((FAIL + 1)); fi
56
+ assert_neq "job_id not error" "error" "$JOB_ID"
57
+ echo ""
58
+
59
+ # ── Test 2: --job-status running → done 전이 ──
60
+ echo "Test 2: --job-status 상태 전이 (running → done)"
61
+ LONG_JOB=$(bash "$ROUTE" --async executor "sleep 3 && echo done" none 60 2>/dev/null)
62
+ STATUS_EARLY=$(bash "$ROUTE" --job-status "$LONG_JOB" 2>/dev/null)
63
+ assert_eq "initial status: running" "running" "$STATUS_EARLY"
64
+
65
+ # Codex 시작 ~10초 + sleep 3초 + 후처리 → 최대 25초 대기
66
+ for i in $(seq 1 5); do
67
+ sleep 5
68
+ STATUS_LATE=$(bash "$ROUTE" --job-status "$LONG_JOB" 2>/dev/null)
69
+ [[ "$STATUS_LATE" == "done" ]] && break
70
+ done
71
+ assert_eq "final status: done" "done" "$STATUS_LATE"
72
+ echo ""
73
+
74
+ # ── Test 3: --job-status 존재하지 않는 job ──
75
+ echo "Test 3: --job-status 존재하지 않는 job"
76
+ RESULT=$(bash "$ROUTE" --job-status "nonexistent-12345" 2>/dev/null)
77
+ EC=$?
78
+ assert_eq "returns error" "error" "$RESULT"
79
+ assert_exit "exit code 1" 1 "$EC"
80
+ echo ""
81
+
82
+ # ── Test 4: --job-result 완료된 job ──
83
+ echo "Test 4: --job-result 완료된 job 결과 읽기"
84
+ # Test 1의 JOB_ID 재사용 — Codex 완료 대기
85
+ for i in $(seq 1 6); do
86
+ S=$(bash "$ROUTE" --job-status "$JOB_ID" 2>/dev/null)
87
+ [[ "$S" == "done" ]] && break
88
+ sleep 5
89
+ done
90
+ RESULT=$(bash "$ROUTE" --job-result "$JOB_ID" 2>/dev/null)
91
+ EC=$?
92
+ assert_exit "exit code 0" 0 "$EC"
93
+ TOTAL=$((TOTAL + 1))
94
+ if [[ -n "$RESULT" ]]; then echo " ✓ result not empty (${#RESULT} bytes)"; PASS=$((PASS + 1)); else echo " ✗ result is empty"; FAIL=$((FAIL + 1)); fi
95
+ assert_neq "result not error" "error:" "$RESULT"
96
+ echo ""
97
+
98
+ # ── Test 5: --job-result 아직 실행 중인 job ──
99
+ echo "Test 5: --job-result 실행 중인 job → 에러"
100
+ RUNNING_JOB=$(bash "$ROUTE" --async executor "sleep 30" none 60 2>/dev/null)
101
+ RESULT=$(bash "$ROUTE" --job-result "$RUNNING_JOB" 2>/dev/null)
102
+ EC=$?
103
+ assert_eq "returns error" "error: job still running" "$RESULT"
104
+ assert_exit "exit code 1" 1 "$EC"
105
+ # cleanup
106
+ JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$RUNNING_JOB"
107
+ [[ -f "$JOB_DIR/pid" ]] && kill "$(cat "$JOB_DIR/pid")" 2>/dev/null
108
+ echo ""
109
+
110
+ # ── Test 6: --job-wait 완료 감지 ──
111
+ echo "Test 6: --job-wait 완료 감지"
112
+ WAIT_JOB=$(bash "$ROUTE" --async executor "echo wait-test-ok" none 30 2>/dev/null)
113
+ sleep 15 # codex 실행 대기
114
+ WAIT_RESULT=$(bash "$ROUTE" --job-wait "$WAIT_JOB" 60 2>/dev/null)
115
+ assert_eq "wait returns done" "done" "$WAIT_RESULT"
116
+ echo ""
117
+
118
+ # ── Test 7: --job-wait still_running (max_wait < 실행시간) ──
119
+ echo "Test 7: --job-wait still_running (짧은 max_wait)"
120
+ SLOW_JOB=$(bash "$ROUTE" --async executor "sleep 60" none 120 2>/dev/null)
121
+ sleep 1
122
+ WAIT_RESULT=$(bash "$ROUTE" --job-wait "$SLOW_JOB" 5 2>/dev/null)
123
+ assert_eq "wait returns still_running" "still_running" "$WAIT_RESULT"
124
+ # cleanup
125
+ JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$SLOW_JOB"
126
+ [[ -f "$JOB_DIR/pid" ]] && kill "$(cat "$JOB_DIR/pid")" 2>/dev/null
127
+ echo ""
128
+
129
+ # ── Test 8: exit code 전파 ──
130
+ echo "Test 8: 실패한 job의 exit code 전파"
131
+ FAIL_JOB=$(bash "$ROUTE" --async executor "exit 42" none 30 2>/dev/null)
132
+ # Codex 완료 대기
133
+ for i in $(seq 1 8); do
134
+ S=$(bash "$ROUTE" --job-status "$FAIL_JOB" 2>/dev/null)
135
+ [[ "$S" != *"running"* ]] && break
136
+ sleep 5
137
+ done
138
+ STATUS=$(bash "$ROUTE" --job-status "$FAIL_JOB" 2>/dev/null)
139
+ # Codex가 exit 42를 감싸서 성공/실패 둘 다 가능 — "running이 아님"만 확인
140
+ TOTAL=$((TOTAL + 1))
141
+ if [[ "$STATUS" == "done" || "$STATUS" == *"failed"* || "$STATUS" == "timeout" ]]; then
142
+ echo " ✓ status is terminal: $STATUS"; PASS=$((PASS + 1))
143
+ else
144
+ echo " ✗ status not terminal: $STATUS"; FAIL=$((FAIL + 1))
145
+ fi
146
+ # Codex는 exit 42를 감싸서 다른 코드로 반환할 수 있음 — 완료 자체만 확인
147
+ TOTAL=$((TOTAL + 1))
148
+ if [[ "$STATUS" != *"running"* ]]; then echo " ✓ job completed (not stuck running)"; PASS=$((PASS + 1)); else echo " ✗ job still running"; FAIL=$((FAIL + 1)); fi
149
+ echo ""
150
+
151
+ # ── Test 9: job 디렉토리 구조 검증 ──
152
+ echo "Test 9: job 디렉토리 구조"
153
+ STRUCT_JOB=$(bash "$ROUTE" --async executor "echo structure-test" none 30 2>/dev/null)
154
+ JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$STRUCT_JOB"
155
+ assert_eq "pid file exists" "true" "$([ -f "$JOB_DIR/pid" ] && echo true || echo false)"
156
+ assert_eq "agent_type file exists" "true" "$([ -f "$JOB_DIR/agent_type" ] && echo true || echo false)"
157
+ assert_eq "start_time file exists" "true" "$([ -f "$JOB_DIR/start_time" ] && echo true || echo false)"
158
+ AGENT=$(cat "$JOB_DIR/agent_type" 2>/dev/null)
159
+ assert_eq "agent_type == executor" "executor" "$AGENT"
160
+ echo ""
161
+
162
+ # ── Test 10: native.mjs 프롬프트 검증 ──
163
+ echo "Test 10: native.mjs buildSlimWrapperPrompt async 키워드"
164
+ PROMPT_CHECK=$(node -e "
165
+ import('./hub/team/native.mjs').then(m => {
166
+ const p = m.buildSlimWrapperPrompt('codex', {
167
+ subtask: 'test task',
168
+ role: 'scientist',
169
+ teamName: 'test-team',
170
+ taskId: 'task-1',
171
+ agentName: 'codex-worker-1',
172
+ });
173
+ const checks = {
174
+ has_async: p.includes('--async'),
175
+ has_job_wait: p.includes('--job-wait'),
176
+ has_job_result: p.includes('--job-result'),
177
+ has_route_timeout: p.includes('auto 1800'),
178
+ no_old_bashTimeout: !p.includes('timeout: 1860000'),
179
+ has_launch_timeout: p.includes('timeout: 15000'),
180
+ has_wait_timeout: p.includes('timeout: 570000'),
181
+ has_result_timeout: p.includes('timeout: 30000'),
182
+ };
183
+ for (const [k, v] of Object.entries(checks)) {
184
+ console.log(k + '=' + v);
185
+ }
186
+ });
187
+ " 2>/dev/null)
188
+ for line in $PROMPT_CHECK; do
189
+ key="${line%%=*}"
190
+ val="${line##*=}"
191
+ assert_eq "$key" "true" "$val"
192
+ done
193
+ echo ""
194
+
195
+ # ── 결과 요약 ──
196
+ echo "═══════════════════════════════════════════════════"
197
+ echo " Results: $PASS/$TOTAL passed, $FAIL failed"
198
+ echo "═══════════════════════════════════════════════════"
199
+
200
+ if [[ "$FAIL" -gt 0 ]]; then
201
+ exit 1
202
+ fi
203
+ exit 0
@@ -0,0 +1,79 @@
1
+ {
2
+ "skill_name": "tfx-skills-suite",
3
+ "evals": [
4
+ {
5
+ "id": 1,
6
+ "prompt": "You are a Claude Code agent. Read the tfx-auto skill definition, then explain how you would handle this user request: '/implement JWT 인증 미들웨어 추가해줘'. List the EXACT bash commands you would run. Do NOT actually execute them.",
7
+ "expected_output": "Should route to executor agent via tfx-route.sh with 'implement' MCP profile. Command: bash ~/.claude/scripts/tfx-route.sh executor 'JWT 인증 미들웨어 추가해줘' implement",
8
+ "files": [],
9
+ "expectations": [
10
+ "Routes to 'executor' agent (not architect, not analyst)",
11
+ "Uses 'implement' MCP profile",
12
+ "Generates correct tfx-route.sh command syntax",
13
+ "Does NOT trigger triage (single command shortcut)",
14
+ "Does NOT delegate to tfx-multi"
15
+ ]
16
+ },
17
+ {
18
+ "id": 2,
19
+ "prompt": "You are a Claude Code agent. Read the tfx-auto skill definition, then explain how you would handle: '/tfx-auto 프론트엔드 리팩터링하고 보안 리뷰도 해줘'. List all routing decisions, triage steps, and delegation.",
20
+ "expected_output": "Should enter auto triage mode, classify via Codex, decompose into 2+ subtasks, then delegate to tfx-multi Phase 3",
21
+ "files": [],
22
+ "expectations": [
23
+ "Identifies this as auto mode (not command shortcut)",
24
+ "Triggers Codex classification step",
25
+ "Decomposes into at least 2 subtasks",
26
+ "Notes delegation to tfx-multi for subtasks >= 2",
27
+ "Does NOT try to execute all subtasks directly"
28
+ ]
29
+ },
30
+ {
31
+ "id": 3,
32
+ "prompt": "You are a Claude Code agent. Read the tfx-multi skill definition, then explain step-by-step how you would handle: '/tfx-multi 인증 리팩터링 + UI 개선 + 보안 리뷰'. List all TeamCreate, TaskCreate, Agent calls with exact parameters.",
33
+ "expected_output": "Should create team, 3 TaskCreates, 3 Agent spawns with slim wrapper structure following Phase 0-5",
34
+ "files": [],
35
+ "expectations": [
36
+ "Creates exactly one TeamCreate with tfx- prefix naming",
37
+ "Creates 3 TaskCreate calls (one per subtask)",
38
+ "Spawns 3 Agent wrappers with mode: bypassPermissions",
39
+ "Uses tfx-route.sh inside Agent wrapper (not direct codex/gemini)",
40
+ "Includes Phase 5 cleanup (TeamDelete)"
41
+ ]
42
+ },
43
+ {
44
+ "id": 4,
45
+ "prompt": "You are a Claude Code agent. Read the tfx-doctor skill definition, then explain how you would handle: 'HUD가 안 보이고 codex도 안 되는데 어떻게 해?'. List exact commands and reasoning.",
46
+ "expected_output": "Should suggest running triflux doctor first, then triflux doctor --fix if issues found",
47
+ "files": [],
48
+ "expectations": [
49
+ "Runs 'triflux doctor' as first diagnostic step",
50
+ "Suggests '--fix' mode for auto-repair",
51
+ "Mentions HUD and CLI path checks in explanation",
52
+ "Does NOT jump straight to --reset (that's for cache only)"
53
+ ]
54
+ },
55
+ {
56
+ "id": 5,
57
+ "prompt": "You are a Claude Code agent. Read the tfx-hub skill definition, then explain how you would handle: '/tfx-hub start'. List exact commands.",
58
+ "expected_output": "Should run 'node hub/server.mjs' in background",
59
+ "files": [],
60
+ "expectations": [
61
+ "Runs 'node hub/server.mjs' with run_in_background=true",
62
+ "Mentions port 27888 and /mcp endpoint",
63
+ "Does NOT try to run any triage or routing"
64
+ ]
65
+ },
66
+ {
67
+ "id": 6,
68
+ "prompt": "You are a Claude Code agent. Read the tfx-codex skill definition, then explain the Gemini-to-Codex remapping. For '/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘', list the routing showing how designer/writer get remapped.",
69
+ "expected_output": "designer remapped to Codex(high), writer to Codex Spark(spark_fast), TFX_CLI_MODE=codex env var",
70
+ "files": [],
71
+ "expectations": [
72
+ "designer remapped to Codex with effort: high",
73
+ "writer remapped to Codex Spark with effort: spark_fast",
74
+ "Sets TFX_CLI_MODE=codex environment variable",
75
+ "Changes MCP profile: designer->implement, writer->analyze"
76
+ ]
77
+ }
78
+ ]
79
+ }
@@ -0,0 +1,162 @@
1
+ {
2
+ "metadata": {
3
+ "skill_name": "tfx-skills-suite",
4
+ "skill_path": "C:/Users/SSAFY/Desktop/Projects/cli/triflux/skills",
5
+ "executor_model": "claude-sonnet-4-6",
6
+ "analyzer_model": "claude-opus-4-6",
7
+ "timestamp": "2026-03-19T10:00:00Z",
8
+ "evals_run": [1, 2, 3, 4, 5, 6],
9
+ "runs_per_configuration": 1
10
+ },
11
+ "runs": [
12
+ {
13
+ "eval_id": 1, "eval_name": "routing-implement-shortcut", "configuration": "with_skill", "run_number": 1,
14
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 43.6, "tokens": 16303, "tool_calls": 4, "errors": 0},
15
+ "expectations": [
16
+ {"text": "Routes to executor agent", "passed": true, "evidence": "Correctly mapped from implement shortcut table"},
17
+ {"text": "Uses implement MCP profile", "passed": true, "evidence": "Mapped from shortcut table"},
18
+ {"text": "Generates correct tfx-route.sh command", "passed": true, "evidence": "bash ~/.claude/scripts/tfx-route.sh executor '...' implement"},
19
+ {"text": "Does NOT trigger triage", "passed": true, "evidence": "Command shortcut skips triage"},
20
+ {"text": "Does NOT delegate to tfx-multi", "passed": true, "evidence": "No subtask decomposition occurred"}
21
+ ]
22
+ },
23
+ {
24
+ "eval_id": 1, "eval_name": "routing-implement-shortcut", "configuration": "without_skill", "run_number": 1,
25
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 48.1, "tokens": 16436, "tool_calls": 4, "errors": 0},
26
+ "expectations": [
27
+ {"text": "Routes to executor agent", "passed": true, "evidence": "Correctly mapped"},
28
+ {"text": "Uses implement MCP profile", "passed": true, "evidence": "Assigned by shortcut table"},
29
+ {"text": "Generates correct tfx-route.sh command", "passed": true, "evidence": "Correct syntax generated"},
30
+ {"text": "Does NOT trigger triage", "passed": true, "evidence": "Shortcut mode skips triage"},
31
+ {"text": "Does NOT delegate to tfx-multi", "passed": true, "evidence": "No delegation"}
32
+ ]
33
+ },
34
+ {
35
+ "eval_id": 2, "eval_name": "routing-multi-task-triage", "configuration": "with_skill", "run_number": 1,
36
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 58.2, "tokens": 17584, "tool_calls": 3, "errors": 0},
37
+ "expectations": [
38
+ {"text": "Identifies as auto mode", "passed": true, "evidence": "No shortcut match, auto mode selected"},
39
+ {"text": "Triggers Codex classification", "passed": true, "evidence": "Codex --full-auto classification triggered"},
40
+ {"text": "Decomposes into 2+ subtasks", "passed": true, "evidence": "2 subtasks: executor + security-reviewer"},
41
+ {"text": "Notes tfx-multi delegation", "passed": true, "evidence": "subtasks.length >= 2 triggers tfx-multi Phase 3"},
42
+ {"text": "Does NOT execute directly", "passed": true, "evidence": "Delegates to tfx-multi"}
43
+ ]
44
+ },
45
+ {
46
+ "eval_id": 2, "eval_name": "routing-multi-task-triage", "configuration": "without_skill", "run_number": 1,
47
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 77.2, "tokens": 18626, "tool_calls": 4, "errors": 0},
48
+ "expectations": [
49
+ {"text": "Identifies as auto mode", "passed": true, "evidence": "Auto mode selected"},
50
+ {"text": "Triggers Codex classification", "passed": true, "evidence": "Codex --full-auto triggered"},
51
+ {"text": "Decomposes into 2+ subtasks", "passed": true, "evidence": "2 subtasks decomposed"},
52
+ {"text": "Notes tfx-multi delegation", "passed": true, "evidence": "Hands off to tfx-multi Phase 3"},
53
+ {"text": "Does NOT execute directly", "passed": true, "evidence": "Delegates correctly"}
54
+ ]
55
+ },
56
+ {
57
+ "eval_id": 3, "eval_name": "multi-team-creation", "configuration": "with_skill", "run_number": 1,
58
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 115.3, "tokens": 27197, "tool_calls": 3, "errors": 0},
59
+ "expectations": [
60
+ {"text": "Creates TeamCreate with tfx- prefix", "passed": true, "evidence": "TeamCreate({ team_name: 'tfx-<base36>' })"},
61
+ {"text": "Creates 3 TaskCreate calls", "passed": true, "evidence": "3x TaskCreate with metadata"},
62
+ {"text": "Spawns 3 Agent wrappers with bypassPermissions", "passed": true, "evidence": "3x Agent({ mode: bypassPermissions })"},
63
+ {"text": "Uses tfx-route.sh inside wrappers", "passed": true, "evidence": "Direct codex/gemini calls prohibited"},
64
+ {"text": "Includes Phase 5 TeamDelete", "passed": true, "evidence": "TeamDelete always runs, max 30s wait"}
65
+ ]
66
+ },
67
+ {
68
+ "eval_id": 3, "eval_name": "multi-team-creation", "configuration": "without_skill", "run_number": 1,
69
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 100.6, "tokens": 26140, "tool_calls": 3, "errors": 0},
70
+ "expectations": [
71
+ {"text": "Creates TeamCreate with tfx- prefix", "passed": true, "evidence": "TeamCreate with tfx-<id>"},
72
+ {"text": "Creates 3 TaskCreate calls", "passed": true, "evidence": "Three TaskCreate calls"},
73
+ {"text": "Spawns 3 Agent wrappers with bypassPermissions", "passed": true, "evidence": "mode: bypassPermissions in all 3"},
74
+ {"text": "Uses tfx-route.sh inside wrappers", "passed": true, "evidence": "Never direct codex/gemini calls"},
75
+ {"text": "Includes Phase 5 TeamDelete", "passed": true, "evidence": "TeamDelete unconditionally"}
76
+ ]
77
+ },
78
+ {
79
+ "eval_id": 4, "eval_name": "doctor-diagnosis", "configuration": "with_skill", "run_number": 1,
80
+ "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 53.8, "tokens": 14499, "tool_calls": 4, "errors": 0},
81
+ "expectations": [
82
+ {"text": "Runs triflux doctor first", "passed": true, "evidence": "Bash(\"triflux doctor\")"},
83
+ {"text": "Suggests --fix mode", "passed": true, "evidence": "Suggests after diagnosis report"},
84
+ {"text": "Mentions HUD and CLI checks", "passed": true, "evidence": "HUD and CLI paths checked"},
85
+ {"text": "Does NOT jump to --reset", "passed": true, "evidence": "--reset reserved for explicit request"}
86
+ ]
87
+ },
88
+ {
89
+ "eval_id": 4, "eval_name": "doctor-diagnosis", "configuration": "without_skill", "run_number": 1,
90
+ "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 48.3, "tokens": 14482, "tool_calls": 3, "errors": 0},
91
+ "expectations": [
92
+ {"text": "Runs triflux doctor first", "passed": true, "evidence": "Bash(\"triflux doctor\")"},
93
+ {"text": "Suggests --fix mode", "passed": true, "evidence": "Offers --fix after diagnosis"},
94
+ {"text": "Mentions HUD and CLI checks", "passed": true, "evidence": "All 8 diagnostics listed"},
95
+ {"text": "Does NOT jump to --reset", "passed": true, "evidence": "--reset reserved for explicit request"}
96
+ ]
97
+ },
98
+ {
99
+ "eval_id": 5, "eval_name": "hub-start-sequence", "configuration": "with_skill", "run_number": 1,
100
+ "result": {"pass_rate": 1.0, "passed": 3, "failed": 0, "total": 3, "time_seconds": 47.2, "tokens": 14821, "tool_calls": 4, "errors": 0},
101
+ "expectations": [
102
+ {"text": "Runs node hub/server.mjs in background", "passed": true, "evidence": "Bash(\"node hub/server.mjs\", run_in_background=true)"},
103
+ {"text": "Mentions port 27888 and /mcp", "passed": true, "evidence": "Port 27888, http://127.0.0.1:27888/mcp"},
104
+ {"text": "No triage or routing attempted", "passed": true, "evidence": "Command match, not fallthrough"}
105
+ ]
106
+ },
107
+ {
108
+ "eval_id": 5, "eval_name": "hub-start-sequence", "configuration": "without_skill", "run_number": 1,
109
+ "result": {"pass_rate": 1.0, "passed": 3, "failed": 0, "total": 3, "time_seconds": 51.8, "tokens": 14904, "tool_calls": 4, "errors": 0},
110
+ "expectations": [
111
+ {"text": "Runs node hub/server.mjs in background", "passed": true, "evidence": "Bash(\"node hub/server.mjs\", run_in_background=true)"},
112
+ {"text": "Mentions port 27888 and /mcp", "passed": true, "evidence": "Port 27888, endpoint /mcp"},
113
+ {"text": "No triage or routing attempted", "passed": true, "evidence": "Command match, not fallthrough"}
114
+ ]
115
+ },
116
+ {
117
+ "eval_id": 6, "eval_name": "codex-gemini-remap", "configuration": "with_skill", "run_number": 1,
118
+ "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 69.7, "tokens": 14889, "tool_calls": 5, "errors": 0},
119
+ "expectations": [
120
+ {"text": "designer remapped to Codex (effort: high)", "passed": true, "evidence": "designer → Codex (effort: high)"},
121
+ {"text": "writer remapped to Codex Spark (spark_fast)", "passed": true, "evidence": "writer → Codex Spark (effort: spark_fast)"},
122
+ {"text": "TFX_CLI_MODE=codex set", "passed": true, "evidence": "Set for every Phase 3 call"},
123
+ {"text": "MCP profiles changed", "passed": true, "evidence": "designer→implement, writer→analyze"}
124
+ ]
125
+ },
126
+ {
127
+ "eval_id": 6, "eval_name": "codex-gemini-remap", "configuration": "without_skill", "run_number": 1,
128
+ "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 85.2, "tokens": 19802, "tool_calls": 7, "errors": 0},
129
+ "expectations": [
130
+ {"text": "designer remapped to Codex (effort: high)", "passed": true, "evidence": "designer → Codex (effort: high)"},
131
+ {"text": "writer remapped to Codex Spark (spark_fast)", "passed": true, "evidence": "writer → Codex Spark (effort: spark_fast)"},
132
+ {"text": "TFX_CLI_MODE=codex set", "passed": true, "evidence": "TFX_CLI_MODE set to codex"},
133
+ {"text": "MCP profiles changed", "passed": true, "evidence": "writer→analyze, designer→implement"}
134
+ ]
135
+ }
136
+ ],
137
+ "run_summary": {
138
+ "with_skill": {
139
+ "pass_rate": {"mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0},
140
+ "time_seconds": {"mean": 64.6, "stddev": 26.4, "min": 43.6, "max": 115.3},
141
+ "tokens": {"mean": 17549, "stddev": 4857, "min": 14499, "max": 27197}
142
+ },
143
+ "without_skill": {
144
+ "pass_rate": {"mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0},
145
+ "time_seconds": {"mean": 68.5, "stddev": 20.4, "min": 48.1, "max": 100.6},
146
+ "tokens": {"mean": 18398, "stddev": 4227, "min": 14482, "max": 26140}
147
+ },
148
+ "delta": {
149
+ "pass_rate": "+0.00",
150
+ "time_seconds": "-3.9",
151
+ "tokens": "-849"
152
+ }
153
+ },
154
+ "notes": [
155
+ "All 26 assertions pass at 100% for both configurations — the skills are functionally correct",
156
+ "The fixes applied (dead reference removal, Phase numbering consistency, hub description) don't change routing logic, so pass rates are identical",
157
+ "NEW version is marginally faster (-3.9s avg) and uses fewer tokens (-849 avg), likely due to cleaner references reducing model confusion",
158
+ "tfx-multi is the most complex skill (115s / 27K tokens with_skill) — consider extracting reference docs to reduce context load",
159
+ "tfx-codex OLD references 'Phase(1~6)' which doesn't exist in tfx-auto — the NEW version correctly references the actual workflow names",
160
+ "All assertions pass regardless of configuration — these test the core routing logic which is unchanged. Consider adding assertions that specifically test the fixed issues (dead refs, phase naming) for differentiation"
161
+ ]
162
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "eval_id": 6,
3
+ "eval_name": "codex-gemini-remap",
4
+ "prompt": "/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘",
5
+ "assertions": [
6
+ "designer remapped to Codex with effort: high",
7
+ "writer remapped to Codex Spark with effort: spark_fast",
8
+ "Sets TFX_CLI_MODE=codex environment variable",
9
+ "Changes MCP profile: designer->implement, writer->analyze"
10
+ ]
11
+ }
@@ -0,0 +1,9 @@
1
+ {
2
+ "expectations": [
3
+ {"text": "designer remapped to Codex with effort: high", "passed": true, "evidence": "Agent output: designer → Codex (effort: high)"},
4
+ {"text": "writer remapped to Codex Spark with effort: spark_fast", "passed": true, "evidence": "Agent output: writer → Codex Spark (effort: spark_fast)"},
5
+ {"text": "Sets TFX_CLI_MODE=codex environment variable", "passed": true, "evidence": "Agent output: 'TFX_CLI_MODE: Set to codex'"},
6
+ {"text": "Changes MCP profile: designer->implement, writer->analyze", "passed": true, "evidence": "Agent output: writer→analyze, designer→implement"}
7
+ ],
8
+ "summary": {"passed": 4, "failed": 0, "total": 4, "pass_rate": 1.0}
9
+ }