triflux 9.8.2 → 10.0.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (249) hide show
  1. package/bin/triflux.mjs +5 -0
  2. package/package.json +13 -46
  3. package/skills/tfx-workspace/async-tests/run-tests.sh +203 -0
  4. package/skills/tfx-workspace/evals/evals.json +79 -0
  5. package/skills/tfx-workspace/iteration-1/benchmark.json +162 -0
  6. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/eval_metadata.json +11 -0
  7. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/grading.json +9 -0
  8. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/outputs/analysis.md +154 -0
  9. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/timing.json +5 -0
  10. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/grading.json +9 -0
  11. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/outputs/analysis.md +126 -0
  12. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/timing.json +5 -0
  13. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/eval_metadata.json +11 -0
  14. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/grading.json +9 -0
  15. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/outputs/analysis.md +119 -0
  16. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/timing.json +5 -0
  17. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/grading.json +9 -0
  18. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/outputs/analysis.md +115 -0
  19. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/timing.json +5 -0
  20. package/skills/tfx-workspace/iteration-1/hub-start-sequence/eval_metadata.json +10 -0
  21. package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/grading.json +8 -0
  22. package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/outputs/analysis.md +86 -0
  23. package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/timing.json +5 -0
  24. package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/grading.json +8 -0
  25. package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/outputs/analysis.md +81 -0
  26. package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/timing.json +5 -0
  27. package/skills/tfx-workspace/iteration-1/multi-team-creation/eval_metadata.json +12 -0
  28. package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/grading.json +10 -0
  29. package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/outputs/analysis.md +316 -0
  30. package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/timing.json +5 -0
  31. package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/grading.json +10 -0
  32. package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/outputs/analysis.md +352 -0
  33. package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/timing.json +5 -0
  34. package/skills/tfx-workspace/iteration-1/review.html +1325 -0
  35. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/eval_metadata.json +12 -0
  36. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/grading.json +10 -0
  37. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/outputs/analysis.md +97 -0
  38. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/timing.json +5 -0
  39. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/grading.json +10 -0
  40. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/outputs/analysis.md +94 -0
  41. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/timing.json +5 -0
  42. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/eval_metadata.json +12 -0
  43. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/grading.json +10 -0
  44. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/outputs/analysis.md +209 -0
  45. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/timing.json +5 -0
  46. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/grading.json +10 -0
  47. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/outputs/analysis.md +193 -0
  48. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/timing.json +5 -0
  49. package/skills/tfx-workspace/iteration-2/benchmark.json +62 -0
  50. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/eval_metadata.json +13 -0
  51. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/grading.json +11 -0
  52. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/outputs/analysis.md +382 -0
  53. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/timing.json +5 -0
  54. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/grading.json +11 -0
  55. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/outputs/analysis.md +333 -0
  56. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/timing.json +5 -0
  57. package/skills/tfx-workspace/iteration-2/review.html +1325 -0
  58. package/skills/tfx-workspace/skill-snapshot/tfx-auto/SKILL.md +217 -0
  59. package/skills/tfx-workspace/skill-snapshot/tfx-auto-codex/SKILL.md +77 -0
  60. package/skills/tfx-workspace/skill-snapshot/tfx-codex/SKILL.md +65 -0
  61. package/skills/tfx-workspace/skill-snapshot/tfx-doctor/SKILL.md +94 -0
  62. package/skills/tfx-workspace/skill-snapshot/tfx-gemini/SKILL.md +82 -0
  63. package/skills/tfx-workspace/skill-snapshot/tfx-hub/SKILL.md +133 -0
  64. package/skills/tfx-workspace/skill-snapshot/tfx-multi/SKILL.md +426 -0
  65. package/skills/tfx-workspace/skill-snapshot/tfx-setup/SKILL.md +101 -0
  66. package/.claude-plugin/marketplace.json +0 -34
  67. package/.claude-plugin/plugin.json +0 -22
  68. package/hooks/agent-route-guard.mjs +0 -109
  69. package/hooks/cross-review-tracker.mjs +0 -122
  70. package/hooks/error-context.mjs +0 -148
  71. package/hooks/hook-manager.mjs +0 -352
  72. package/hooks/hook-orchestrator.mjs +0 -312
  73. package/hooks/hook-registry.json +0 -213
  74. package/hooks/hooks.json +0 -89
  75. package/hooks/keyword-rules.json +0 -393
  76. package/hooks/lib/resolve-root.mjs +0 -59
  77. package/hooks/mcp-config-watcher.mjs +0 -85
  78. package/hooks/pipeline-stop.mjs +0 -76
  79. package/hooks/safety-guard.mjs +0 -106
  80. package/hooks/subagent-verifier.mjs +0 -80
  81. package/hub/assign-callbacks.mjs +0 -136
  82. package/hub/bridge.mjs +0 -799
  83. package/hub/delegator/contracts.mjs +0 -37
  84. package/hub/delegator/index.mjs +0 -14
  85. package/hub/delegator/schema/delegator-tools.schema.json +0 -250
  86. package/hub/delegator/service.mjs +0 -307
  87. package/hub/delegator/tool-definitions.mjs +0 -35
  88. package/hub/fullcycle.mjs +0 -96
  89. package/hub/hitl.mjs +0 -140
  90. package/hub/intent.mjs +0 -198
  91. package/hub/lib/process-utils.mjs +0 -360
  92. package/hub/middleware/request-logger.mjs +0 -81
  93. package/hub/paths.mjs +0 -30
  94. package/hub/pipe.mjs +0 -582
  95. package/hub/pipeline/gates/confidence.mjs +0 -56
  96. package/hub/pipeline/gates/consensus.mjs +0 -94
  97. package/hub/pipeline/gates/index.mjs +0 -5
  98. package/hub/pipeline/gates/selfcheck.mjs +0 -82
  99. package/hub/pipeline/index.mjs +0 -318
  100. package/hub/pipeline/state.mjs +0 -191
  101. package/hub/pipeline/transitions.mjs +0 -124
  102. package/hub/public/dashboard.html +0 -355
  103. package/hub/public/tray-icon.ico +0 -0
  104. package/hub/public/tray-icon.png +0 -0
  105. package/hub/quality/deslop.mjs +0 -253
  106. package/hub/reflexion.mjs +0 -107
  107. package/hub/research.mjs +0 -146
  108. package/hub/router.mjs +0 -791
  109. package/hub/routing/complexity.mjs +0 -166
  110. package/hub/routing/index.mjs +0 -117
  111. package/hub/routing/q-learning.mjs +0 -336
  112. package/hub/schema.sql +0 -146
  113. package/hub/server.mjs +0 -1000
  114. package/hub/store.mjs +0 -807
  115. package/hub/team/agent-map.json +0 -11
  116. package/hub/team/ansi.mjs +0 -379
  117. package/hub/team/backend.mjs +0 -92
  118. package/hub/team/cli/commands/attach.mjs +0 -37
  119. package/hub/team/cli/commands/control.mjs +0 -43
  120. package/hub/team/cli/commands/debug.mjs +0 -74
  121. package/hub/team/cli/commands/focus.mjs +0 -53
  122. package/hub/team/cli/commands/interrupt.mjs +0 -36
  123. package/hub/team/cli/commands/kill.mjs +0 -37
  124. package/hub/team/cli/commands/list.mjs +0 -24
  125. package/hub/team/cli/commands/send.mjs +0 -37
  126. package/hub/team/cli/commands/start/index.mjs +0 -106
  127. package/hub/team/cli/commands/start/parse-args.mjs +0 -130
  128. package/hub/team/cli/commands/start/start-headless.mjs +0 -109
  129. package/hub/team/cli/commands/start/start-in-process.mjs +0 -40
  130. package/hub/team/cli/commands/start/start-mux.mjs +0 -73
  131. package/hub/team/cli/commands/start/start-wt.mjs +0 -69
  132. package/hub/team/cli/commands/status.mjs +0 -87
  133. package/hub/team/cli/commands/stop.mjs +0 -31
  134. package/hub/team/cli/commands/task.mjs +0 -30
  135. package/hub/team/cli/commands/tasks.mjs +0 -13
  136. package/hub/team/cli/help.mjs +0 -42
  137. package/hub/team/cli/index.mjs +0 -41
  138. package/hub/team/cli/manifest.mjs +0 -29
  139. package/hub/team/cli/render.mjs +0 -30
  140. package/hub/team/cli/services/attach-fallback.mjs +0 -54
  141. package/hub/team/cli/services/hub-client.mjs +0 -208
  142. package/hub/team/cli/services/member-selector.mjs +0 -30
  143. package/hub/team/cli/services/native-control.mjs +0 -118
  144. package/hub/team/cli/services/runtime-mode.mjs +0 -62
  145. package/hub/team/cli/services/state-store.mjs +0 -48
  146. package/hub/team/cli/services/task-model.mjs +0 -30
  147. package/hub/team/codex-compat.mjs +0 -78
  148. package/hub/team/dashboard-anchor.mjs +0 -14
  149. package/hub/team/dashboard-layout.mjs +0 -33
  150. package/hub/team/dashboard-open.mjs +0 -153
  151. package/hub/team/dashboard.mjs +0 -274
  152. package/hub/team/handoff.mjs +0 -303
  153. package/hub/team/headless.mjs +0 -858
  154. package/hub/team/native-supervisor.mjs +0 -392
  155. package/hub/team/native.mjs +0 -649
  156. package/hub/team/nativeProxy.mjs +0 -680
  157. package/hub/team/orchestrator.mjs +0 -161
  158. package/hub/team/pane.mjs +0 -154
  159. package/hub/team/psmux.mjs +0 -1354
  160. package/hub/team/routing.mjs +0 -223
  161. package/hub/team/session.mjs +0 -611
  162. package/hub/team/shared.mjs +0 -13
  163. package/hub/team/staleState.mjs +0 -361
  164. package/hub/team/tui-lite.mjs +0 -380
  165. package/hub/team/tui-viewer.mjs +0 -463
  166. package/hub/team/tui.mjs +0 -1245
  167. package/hub/token-mode.mjs +0 -224
  168. package/hub/tools.mjs +0 -554
  169. package/hub/tray.mjs +0 -375
  170. package/hub/workers/claude-worker.mjs +0 -423
  171. package/hub/workers/codex-mcp.mjs +0 -410
  172. package/hub/workers/delegator-mcp.mjs +0 -1076
  173. package/hub/workers/factory.mjs +0 -21
  174. package/hub/workers/gemini-worker.mjs +0 -429
  175. package/hub/workers/interface.mjs +0 -40
  176. package/hub/workers/worker-utils.mjs +0 -26
  177. package/hud/colors.mjs +0 -88
  178. package/hud/constants.mjs +0 -81
  179. package/hud/hud-qos-status.mjs +0 -206
  180. package/hud/providers/claude.mjs +0 -309
  181. package/hud/providers/codex.mjs +0 -151
  182. package/hud/providers/gemini.mjs +0 -320
  183. package/hud/renderers.mjs +0 -424
  184. package/hud/terminal.mjs +0 -140
  185. package/hud/utils.mjs +0 -287
  186. package/scripts/__tests__/keyword-detector.test.mjs +0 -234
  187. package/scripts/__tests__/mcp-guard-engine.test.mjs +0 -118
  188. package/scripts/__tests__/remote-spawn-transfer.test.mjs +0 -117
  189. package/scripts/__tests__/remote-spawn.test.mjs +0 -92
  190. package/scripts/__tests__/smoke.test.mjs +0 -34
  191. package/scripts/cache-buildup.mjs +0 -30
  192. package/scripts/cache-doctor.mjs +0 -149
  193. package/scripts/cache-warmup.mjs +0 -557
  194. package/scripts/claude-logged.ps1 +0 -54
  195. package/scripts/cli-route.sh +0 -3
  196. package/scripts/completions/tfx.bash +0 -47
  197. package/scripts/completions/tfx.fish +0 -44
  198. package/scripts/completions/tfx.zsh +0 -83
  199. package/scripts/cross-review-gate.mjs +0 -126
  200. package/scripts/cross-review-tracker.mjs +0 -238
  201. package/scripts/demo-tui.mjs +0 -59
  202. package/scripts/headless-guard-fast.sh +0 -21
  203. package/scripts/headless-guard.mjs +0 -354
  204. package/scripts/hub-ensure.mjs +0 -120
  205. package/scripts/keyword-detector.mjs +0 -272
  206. package/scripts/keyword-rules-expander.mjs +0 -521
  207. package/scripts/lib/context.mjs +0 -67
  208. package/scripts/lib/cross-review-utils.mjs +0 -51
  209. package/scripts/lib/env-probe.mjs +0 -160
  210. package/scripts/lib/gemini-profiles.mjs +0 -85
  211. package/scripts/lib/hook-utils.mjs +0 -14
  212. package/scripts/lib/keyword-rules.mjs +0 -166
  213. package/scripts/lib/logger.mjs +0 -105
  214. package/scripts/lib/mcp-filter.mjs +0 -739
  215. package/scripts/lib/mcp-guard-engine.mjs +0 -940
  216. package/scripts/lib/mcp-manifest.mjs +0 -79
  217. package/scripts/lib/mcp-server-catalog.mjs +0 -118
  218. package/scripts/lib/psmux-info.mjs +0 -119
  219. package/scripts/lib/remote-spawn-transfer.mjs +0 -196
  220. package/scripts/mcp-check.mjs +0 -237
  221. package/scripts/mcp-cleanup.ps1 +0 -17
  222. package/scripts/mcp-gateway-config.mjs +0 -207
  223. package/scripts/mcp-gateway-ensure.mjs +0 -85
  224. package/scripts/mcp-gateway-integration-test.mjs +0 -228
  225. package/scripts/mcp-gateway-start.mjs +0 -226
  226. package/scripts/mcp-gateway-start.ps1 +0 -141
  227. package/scripts/mcp-gateway-verify.mjs +0 -77
  228. package/scripts/mcp-safety-guard.mjs +0 -44
  229. package/scripts/notion-read.mjs +0 -554
  230. package/scripts/preflight-cache.mjs +0 -68
  231. package/scripts/preinstall.mjs +0 -96
  232. package/scripts/psmux-safety-guard.mjs +0 -64
  233. package/scripts/remote-spawn.mjs +0 -1289
  234. package/scripts/run.cjs +0 -79
  235. package/scripts/session-spawn-helper.mjs +0 -185
  236. package/scripts/setup.mjs +0 -1527
  237. package/scripts/test-tfx-route-no-claude-native.mjs +0 -57
  238. package/scripts/tfx-batch-stats.mjs +0 -96
  239. package/scripts/tfx-gate-activate.mjs +0 -89
  240. package/scripts/tfx-route-post.mjs +0 -505
  241. package/scripts/tfx-route-worker.mjs +0 -223
  242. package/scripts/tfx-route.sh +0 -1956
  243. package/scripts/tmp-cleanup.mjs +0 -74
  244. package/scripts/token-snapshot.mjs +0 -575
  245. package/tui/codex-profile.mjs +0 -402
  246. package/tui/core.mjs +0 -236
  247. package/tui/doctor.mjs +0 -328
  248. package/tui/gemini-profile.mjs +0 -254
  249. package/tui/setup.mjs +0 -442
package/bin/triflux.mjs CHANGED
@@ -28,6 +28,7 @@ import {
28
28
  extractManagedHookFilename, getManagedRegistryHooks, ensureHooksInSettings,
29
29
  ensureCodexHubServerConfig,
30
30
  } from "../scripts/setup.mjs";
31
+ import { cleanupTmpFiles } from "../scripts/tmp-cleanup.mjs";
31
32
 
32
33
  const PKG_ROOT = dirname(dirname(fileURLToPath(import.meta.url)));
33
34
  const CLAUDE_DIR = join(homedir(), ".claude");
@@ -3531,6 +3532,10 @@ async function main() {
3531
3532
  const cmd = NORMALIZED_ARGS[0] || "help";
3532
3533
  const cmdArgs = NORMALIZED_ARGS.slice(1);
3533
3534
 
3535
+ cleanupTmpFiles({
3536
+ protectPaths: [process.env.HOME, process.env.USERPROFILE],
3537
+ }).catch(() => {});
3538
+
3534
3539
  switch (cmd) {
3535
3540
  case "setup":
3536
3541
  cmdSetup({ dryRun: cmdArgs.includes("--dry-run") });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "triflux",
3
- "version": "9.8.2",
3
+ "version": "10.0.0-alpha.1",
4
4
  "description": "CLI-first multi-model orchestrator for Claude Code — route tasks to Codex, Gemini, and Claude",
5
5
  "type": "module",
6
6
  "bin": {
@@ -13,57 +13,24 @@
13
13
  "tfx-doctor-tui": "bin/tfx-doctor-tui.mjs",
14
14
  "tfx-setup-tui": "bin/tfx-setup-tui.mjs"
15
15
  },
16
+ "engines": { "node": ">=18.0.0" },
17
+ "dependencies": {
18
+ "@triflux/core": "^10.0.0-alpha.1",
19
+ "@triflux/remote": "^10.0.0-alpha.1"
20
+ },
16
21
  "files": [
17
22
  "bin",
18
- "tui",
19
- "hub",
20
23
  "skills",
21
- "!skills/tfx-workspace",
22
- "!**/failure-reports",
23
- "scripts",
24
- "hooks",
25
- "hud",
26
- ".claude-plugin",
27
24
  "README.md",
28
- "README.ko.md",
29
25
  "LICENSE"
30
26
  ],
31
- "scripts": {
32
- "setup": "node scripts/setup.mjs",
33
- "preinstall": "node scripts/preinstall.mjs",
34
- "postinstall": "node scripts/setup.mjs",
35
- "test": "node --test --test-force-exit --test-concurrency=1 \"tests/**/*.test.mjs\" \"scripts/__tests__/**/*.test.mjs\"",
36
- "test:unit": "node --test --test-force-exit --test-concurrency=1 tests/unit/**/*.test.mjs",
37
- "test:integration": "node --test --test-force-exit --test-concurrency=1 tests/integration/**/*.test.mjs",
38
- "test:route-smoke": "node --test scripts/test-tfx-route-no-claude-native.mjs"
39
- },
40
- "engines": {
41
- "node": ">=18.0.0"
42
- },
43
- "repository": {
44
- "type": "git",
45
- "url": "git+https://github.com/tellang/triflux.git"
46
- },
47
- "homepage": "https://github.com/tellang/triflux#readme",
27
+ "keywords": ["claude-code", "plugin", "codex", "gemini", "cli-routing", "orchestration", "multi-model", "triflux", "tfx"],
48
28
  "author": "tellang",
49
29
  "license": "MIT",
50
- "dependencies": {
51
- "@modelcontextprotocol/sdk": "^1.27.1",
52
- "better-sqlite3": "^12.6.2",
53
- "pino": "^10.3.1",
54
- "pino-pretty": "^13.1.3",
55
- "systray2": "^2.1.4",
56
- "zod": "^4.0.0"
57
- },
58
- "keywords": [
59
- "claude-code",
60
- "plugin",
61
- "codex",
62
- "gemini",
63
- "cli-routing",
64
- "orchestration",
65
- "multi-model",
66
- "triflux",
67
- "tfx"
68
- ]
30
+ "homepage": "https://github.com/tellang/triflux#readme",
31
+ "repository": {
32
+ "type": "git",
33
+ "url": "git+https://github.com/tellang/triflux.git",
34
+ "directory": "packages/triflux"
35
+ }
69
36
  }
@@ -0,0 +1,203 @@
1
+ #!/usr/bin/env bash
2
+ # tfx-route.sh v2.5 async job system — 통합 테스트
3
+ set -uo pipefail
4
+
5
+ ROUTE="scripts/tfx-route.sh"
6
+ PASS=0
7
+ FAIL=0
8
+ TOTAL=0
9
+
10
+ assert_eq() {
11
+ local name="$1" expected="$2" actual="$3"
12
+ TOTAL=$((TOTAL + 1))
13
+ if [[ "$actual" == *"$expected"* ]]; then
14
+ echo " ✓ $name"
15
+ PASS=$((PASS + 1))
16
+ else
17
+ echo " ✗ $name — expected: '$expected', got: '$actual'"
18
+ FAIL=$((FAIL + 1))
19
+ fi
20
+ }
21
+
22
+ assert_neq() {
23
+ local name="$1" unexpected="$2" actual="$3"
24
+ TOTAL=$((TOTAL + 1))
25
+ if [[ "$actual" != *"$unexpected"* ]]; then
26
+ echo " ✓ $name"
27
+ PASS=$((PASS + 1))
28
+ else
29
+ echo " ✗ $name — should NOT contain: '$unexpected', got: '$actual'"
30
+ FAIL=$((FAIL + 1))
31
+ fi
32
+ }
33
+
34
+ assert_exit() {
35
+ local name="$1" expected="$2" actual="$3"
36
+ TOTAL=$((TOTAL + 1))
37
+ if [[ "$actual" -eq "$expected" ]]; then
38
+ echo " ✓ $name"
39
+ PASS=$((PASS + 1))
40
+ else
41
+ echo " ✗ $name — expected exit=$expected, got exit=$actual"
42
+ FAIL=$((FAIL + 1))
43
+ fi
44
+ }
45
+
46
+ echo "═══ tfx-route.sh v2.5 Async Job System Tests ═══"
47
+ echo ""
48
+
49
+ # ── Test 1: --async 기본 동작 ──
50
+ echo "Test 1: --async 기본 시작 + job_id 반환"
51
+ JOB_ID=$(bash "$ROUTE" --async executor "echo hello" none 30 2>/dev/null)
52
+ EC=$?
53
+ assert_exit "exit code 0" 0 "$EC"
54
+ TOTAL=$((TOTAL + 1))
55
+ if [[ -n "$JOB_ID" ]]; then echo " ✓ job_id not empty ($JOB_ID)"; PASS=$((PASS + 1)); else echo " ✗ job_id is empty"; FAIL=$((FAIL + 1)); fi
56
+ assert_neq "job_id not error" "error" "$JOB_ID"
57
+ echo ""
58
+
59
+ # ── Test 2: --job-status running → done 전이 ──
60
+ echo "Test 2: --job-status 상태 전이 (running → done)"
61
+ LONG_JOB=$(bash "$ROUTE" --async executor "sleep 3 && echo done" none 60 2>/dev/null)
62
+ STATUS_EARLY=$(bash "$ROUTE" --job-status "$LONG_JOB" 2>/dev/null)
63
+ assert_eq "initial status: running" "running" "$STATUS_EARLY"
64
+
65
+ # Codex 시작 ~10초 + sleep 3초 + 후처리 → 최대 25초 대기
66
+ for i in $(seq 1 5); do
67
+ sleep 5
68
+ STATUS_LATE=$(bash "$ROUTE" --job-status "$LONG_JOB" 2>/dev/null)
69
+ [[ "$STATUS_LATE" == "done" ]] && break
70
+ done
71
+ assert_eq "final status: done" "done" "$STATUS_LATE"
72
+ echo ""
73
+
74
+ # ── Test 3: --job-status 존재하지 않는 job ──
75
+ echo "Test 3: --job-status 존재하지 않는 job"
76
+ RESULT=$(bash "$ROUTE" --job-status "nonexistent-12345" 2>/dev/null)
77
+ EC=$?
78
+ assert_eq "returns error" "error" "$RESULT"
79
+ assert_exit "exit code 1" 1 "$EC"
80
+ echo ""
81
+
82
+ # ── Test 4: --job-result 완료된 job ──
83
+ echo "Test 4: --job-result 완료된 job 결과 읽기"
84
+ # Test 1의 JOB_ID 재사용 — Codex 완료 대기
85
+ for i in $(seq 1 6); do
86
+ S=$(bash "$ROUTE" --job-status "$JOB_ID" 2>/dev/null)
87
+ [[ "$S" == "done" ]] && break
88
+ sleep 5
89
+ done
90
+ RESULT=$(bash "$ROUTE" --job-result "$JOB_ID" 2>/dev/null)
91
+ EC=$?
92
+ assert_exit "exit code 0" 0 "$EC"
93
+ TOTAL=$((TOTAL + 1))
94
+ if [[ -n "$RESULT" ]]; then echo " ✓ result not empty (${#RESULT} bytes)"; PASS=$((PASS + 1)); else echo " ✗ result is empty"; FAIL=$((FAIL + 1)); fi
95
+ assert_neq "result not error" "error:" "$RESULT"
96
+ echo ""
97
+
98
+ # ── Test 5: --job-result 아직 실행 중인 job ──
99
+ echo "Test 5: --job-result 실행 중인 job → 에러"
100
+ RUNNING_JOB=$(bash "$ROUTE" --async executor "sleep 30" none 60 2>/dev/null)
101
+ RESULT=$(bash "$ROUTE" --job-result "$RUNNING_JOB" 2>/dev/null)
102
+ EC=$?
103
+ assert_eq "returns error" "error: job still running" "$RESULT"
104
+ assert_exit "exit code 1" 1 "$EC"
105
+ # cleanup
106
+ JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$RUNNING_JOB"
107
+ [[ -f "$JOB_DIR/pid" ]] && kill "$(cat "$JOB_DIR/pid")" 2>/dev/null
108
+ echo ""
109
+
110
+ # ── Test 6: --job-wait 완료 감지 ──
111
+ echo "Test 6: --job-wait 완료 감지"
112
+ WAIT_JOB=$(bash "$ROUTE" --async executor "echo wait-test-ok" none 30 2>/dev/null)
113
+ sleep 15 # codex 실행 대기
114
+ WAIT_RESULT=$(bash "$ROUTE" --job-wait "$WAIT_JOB" 60 2>/dev/null)
115
+ assert_eq "wait returns done" "done" "$WAIT_RESULT"
116
+ echo ""
117
+
118
+ # ── Test 7: --job-wait still_running (max_wait < 실행시간) ──
119
+ echo "Test 7: --job-wait still_running (짧은 max_wait)"
120
+ SLOW_JOB=$(bash "$ROUTE" --async executor "sleep 60" none 120 2>/dev/null)
121
+ sleep 1
122
+ WAIT_RESULT=$(bash "$ROUTE" --job-wait "$SLOW_JOB" 5 2>/dev/null)
123
+ assert_eq "wait returns still_running" "still_running" "$WAIT_RESULT"
124
+ # cleanup
125
+ JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$SLOW_JOB"
126
+ [[ -f "$JOB_DIR/pid" ]] && kill "$(cat "$JOB_DIR/pid")" 2>/dev/null
127
+ echo ""
128
+
129
+ # ── Test 8: exit code 전파 ──
130
+ echo "Test 8: 실패한 job의 exit code 전파"
131
+ FAIL_JOB=$(bash "$ROUTE" --async executor "exit 42" none 30 2>/dev/null)
132
+ # Codex 완료 대기
133
+ for i in $(seq 1 8); do
134
+ S=$(bash "$ROUTE" --job-status "$FAIL_JOB" 2>/dev/null)
135
+ [[ "$S" != *"running"* ]] && break
136
+ sleep 5
137
+ done
138
+ STATUS=$(bash "$ROUTE" --job-status "$FAIL_JOB" 2>/dev/null)
139
+ # Codex가 exit 42를 감싸서 성공/실패 둘 다 가능 — "running이 아님"만 확인
140
+ TOTAL=$((TOTAL + 1))
141
+ if [[ "$STATUS" == "done" || "$STATUS" == *"failed"* || "$STATUS" == "timeout" ]]; then
142
+ echo " ✓ status is terminal: $STATUS"; PASS=$((PASS + 1))
143
+ else
144
+ echo " ✗ status not terminal: $STATUS"; FAIL=$((FAIL + 1))
145
+ fi
146
+ # Codex는 exit 42를 감싸서 다른 코드로 반환할 수 있음 — 완료 자체만 확인
147
+ TOTAL=$((TOTAL + 1))
148
+ if [[ "$STATUS" != *"running"* ]]; then echo " ✓ job completed (not stuck running)"; PASS=$((PASS + 1)); else echo " ✗ job still running"; FAIL=$((FAIL + 1)); fi
149
+ echo ""
150
+
151
+ # ── Test 9: job 디렉토리 구조 검증 ──
152
+ echo "Test 9: job 디렉토리 구조"
153
+ STRUCT_JOB=$(bash "$ROUTE" --async executor "echo structure-test" none 30 2>/dev/null)
154
+ JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$STRUCT_JOB"
155
+ assert_eq "pid file exists" "true" "$([ -f "$JOB_DIR/pid" ] && echo true || echo false)"
156
+ assert_eq "agent_type file exists" "true" "$([ -f "$JOB_DIR/agent_type" ] && echo true || echo false)"
157
+ assert_eq "start_time file exists" "true" "$([ -f "$JOB_DIR/start_time" ] && echo true || echo false)"
158
+ AGENT=$(cat "$JOB_DIR/agent_type" 2>/dev/null)
159
+ assert_eq "agent_type == executor" "executor" "$AGENT"
160
+ echo ""
161
+
162
+ # ── Test 10: native.mjs 프롬프트 검증 ──
163
+ echo "Test 10: native.mjs buildSlimWrapperPrompt async 키워드"
164
+ PROMPT_CHECK=$(node -e "
165
+ import('./hub/team/native.mjs').then(m => {
166
+ const p = m.buildSlimWrapperPrompt('codex', {
167
+ subtask: 'test task',
168
+ role: 'scientist',
169
+ teamName: 'test-team',
170
+ taskId: 'task-1',
171
+ agentName: 'codex-worker-1',
172
+ });
173
+ const checks = {
174
+ has_async: p.includes('--async'),
175
+ has_job_wait: p.includes('--job-wait'),
176
+ has_job_result: p.includes('--job-result'),
177
+ has_route_timeout: p.includes('auto 1800'),
178
+ no_old_bashTimeout: !p.includes('timeout: 1860000'),
179
+ has_launch_timeout: p.includes('timeout: 15000'),
180
+ has_wait_timeout: p.includes('timeout: 570000'),
181
+ has_result_timeout: p.includes('timeout: 30000'),
182
+ };
183
+ for (const [k, v] of Object.entries(checks)) {
184
+ console.log(k + '=' + v);
185
+ }
186
+ });
187
+ " 2>/dev/null)
188
+ for line in $PROMPT_CHECK; do
189
+ key="${line%%=*}"
190
+ val="${line##*=}"
191
+ assert_eq "$key" "true" "$val"
192
+ done
193
+ echo ""
194
+
195
+ # ── 결과 요약 ──
196
+ echo "═══════════════════════════════════════════════════"
197
+ echo " Results: $PASS/$TOTAL passed, $FAIL failed"
198
+ echo "═══════════════════════════════════════════════════"
199
+
200
+ if [[ "$FAIL" -gt 0 ]]; then
201
+ exit 1
202
+ fi
203
+ exit 0
@@ -0,0 +1,79 @@
1
+ {
2
+ "skill_name": "tfx-skills-suite",
3
+ "evals": [
4
+ {
5
+ "id": 1,
6
+ "prompt": "You are a Claude Code agent. Read the tfx-auto skill definition, then explain how you would handle this user request: '/implement JWT 인증 미들웨어 추가해줘'. List the EXACT bash commands you would run. Do NOT actually execute them.",
7
+ "expected_output": "Should route to executor agent via tfx-route.sh with 'implement' MCP profile. Command: bash ~/.claude/scripts/tfx-route.sh executor 'JWT 인증 미들웨어 추가해줘' implement",
8
+ "files": [],
9
+ "expectations": [
10
+ "Routes to 'executor' agent (not architect, not analyst)",
11
+ "Uses 'implement' MCP profile",
12
+ "Generates correct tfx-route.sh command syntax",
13
+ "Does NOT trigger triage (single command shortcut)",
14
+ "Does NOT delegate to tfx-multi"
15
+ ]
16
+ },
17
+ {
18
+ "id": 2,
19
+ "prompt": "You are a Claude Code agent. Read the tfx-auto skill definition, then explain how you would handle: '/tfx-auto 프론트엔드 리팩터링하고 보안 리뷰도 해줘'. List all routing decisions, triage steps, and delegation.",
20
+ "expected_output": "Should enter auto triage mode, classify via Codex, decompose into 2+ subtasks, then delegate to tfx-multi Phase 3",
21
+ "files": [],
22
+ "expectations": [
23
+ "Identifies this as auto mode (not command shortcut)",
24
+ "Triggers Codex classification step",
25
+ "Decomposes into at least 2 subtasks",
26
+ "Notes delegation to tfx-multi for subtasks >= 2",
27
+ "Does NOT try to execute all subtasks directly"
28
+ ]
29
+ },
30
+ {
31
+ "id": 3,
32
+ "prompt": "You are a Claude Code agent. Read the tfx-multi skill definition, then explain step-by-step how you would handle: '/tfx-multi 인증 리팩터링 + UI 개선 + 보안 리뷰'. List all TeamCreate, TaskCreate, Agent calls with exact parameters.",
33
+ "expected_output": "Should create team, 3 TaskCreates, 3 Agent spawns with slim wrapper structure following Phase 0-5",
34
+ "files": [],
35
+ "expectations": [
36
+ "Creates exactly one TeamCreate with tfx- prefix naming",
37
+ "Creates 3 TaskCreate calls (one per subtask)",
38
+ "Spawns 3 Agent wrappers with mode: bypassPermissions",
39
+ "Uses tfx-route.sh inside Agent wrapper (not direct codex/gemini)",
40
+ "Includes Phase 5 cleanup (TeamDelete)"
41
+ ]
42
+ },
43
+ {
44
+ "id": 4,
45
+ "prompt": "You are a Claude Code agent. Read the tfx-doctor skill definition, then explain how you would handle: 'HUD가 안 보이고 codex도 안 되는데 어떻게 해?'. List exact commands and reasoning.",
46
+ "expected_output": "Should suggest running triflux doctor first, then triflux doctor --fix if issues found",
47
+ "files": [],
48
+ "expectations": [
49
+ "Runs 'triflux doctor' as first diagnostic step",
50
+ "Suggests '--fix' mode for auto-repair",
51
+ "Mentions HUD and CLI path checks in explanation",
52
+ "Does NOT jump straight to --reset (that's for cache only)"
53
+ ]
54
+ },
55
+ {
56
+ "id": 5,
57
+ "prompt": "You are a Claude Code agent. Read the tfx-hub skill definition, then explain how you would handle: '/tfx-hub start'. List exact commands.",
58
+ "expected_output": "Should run 'node hub/server.mjs' in background",
59
+ "files": [],
60
+ "expectations": [
61
+ "Runs 'node hub/server.mjs' with run_in_background=true",
62
+ "Mentions port 27888 and /mcp endpoint",
63
+ "Does NOT try to run any triage or routing"
64
+ ]
65
+ },
66
+ {
67
+ "id": 6,
68
+ "prompt": "You are a Claude Code agent. Read the tfx-codex skill definition, then explain the Gemini-to-Codex remapping. For '/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘', list the routing showing how designer/writer get remapped.",
69
+ "expected_output": "designer remapped to Codex(high), writer to Codex Spark(spark_fast), TFX_CLI_MODE=codex env var",
70
+ "files": [],
71
+ "expectations": [
72
+ "designer remapped to Codex with effort: high",
73
+ "writer remapped to Codex Spark with effort: spark_fast",
74
+ "Sets TFX_CLI_MODE=codex environment variable",
75
+ "Changes MCP profile: designer->implement, writer->analyze"
76
+ ]
77
+ }
78
+ ]
79
+ }
@@ -0,0 +1,162 @@
1
+ {
2
+ "metadata": {
3
+ "skill_name": "tfx-skills-suite",
4
+ "skill_path": "C:/Users/SSAFY/Desktop/Projects/cli/triflux/skills",
5
+ "executor_model": "claude-sonnet-4-6",
6
+ "analyzer_model": "claude-opus-4-6",
7
+ "timestamp": "2026-03-19T10:00:00Z",
8
+ "evals_run": [1, 2, 3, 4, 5, 6],
9
+ "runs_per_configuration": 1
10
+ },
11
+ "runs": [
12
+ {
13
+ "eval_id": 1, "eval_name": "routing-implement-shortcut", "configuration": "with_skill", "run_number": 1,
14
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 43.6, "tokens": 16303, "tool_calls": 4, "errors": 0},
15
+ "expectations": [
16
+ {"text": "Routes to executor agent", "passed": true, "evidence": "Correctly mapped from implement shortcut table"},
17
+ {"text": "Uses implement MCP profile", "passed": true, "evidence": "Mapped from shortcut table"},
18
+ {"text": "Generates correct tfx-route.sh command", "passed": true, "evidence": "bash ~/.claude/scripts/tfx-route.sh executor '...' implement"},
19
+ {"text": "Does NOT trigger triage", "passed": true, "evidence": "Command shortcut skips triage"},
20
+ {"text": "Does NOT delegate to tfx-multi", "passed": true, "evidence": "No subtask decomposition occurred"}
21
+ ]
22
+ },
23
+ {
24
+ "eval_id": 1, "eval_name": "routing-implement-shortcut", "configuration": "without_skill", "run_number": 1,
25
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 48.1, "tokens": 16436, "tool_calls": 4, "errors": 0},
26
+ "expectations": [
27
+ {"text": "Routes to executor agent", "passed": true, "evidence": "Correctly mapped"},
28
+ {"text": "Uses implement MCP profile", "passed": true, "evidence": "Assigned by shortcut table"},
29
+ {"text": "Generates correct tfx-route.sh command", "passed": true, "evidence": "Correct syntax generated"},
30
+ {"text": "Does NOT trigger triage", "passed": true, "evidence": "Shortcut mode skips triage"},
31
+ {"text": "Does NOT delegate to tfx-multi", "passed": true, "evidence": "No delegation"}
32
+ ]
33
+ },
34
+ {
35
+ "eval_id": 2, "eval_name": "routing-multi-task-triage", "configuration": "with_skill", "run_number": 1,
36
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 58.2, "tokens": 17584, "tool_calls": 3, "errors": 0},
37
+ "expectations": [
38
+ {"text": "Identifies as auto mode", "passed": true, "evidence": "No shortcut match, auto mode selected"},
39
+ {"text": "Triggers Codex classification", "passed": true, "evidence": "Codex --full-auto classification triggered"},
40
+ {"text": "Decomposes into 2+ subtasks", "passed": true, "evidence": "2 subtasks: executor + security-reviewer"},
41
+ {"text": "Notes tfx-multi delegation", "passed": true, "evidence": "subtasks.length >= 2 triggers tfx-multi Phase 3"},
42
+ {"text": "Does NOT execute directly", "passed": true, "evidence": "Delegates to tfx-multi"}
43
+ ]
44
+ },
45
+ {
46
+ "eval_id": 2, "eval_name": "routing-multi-task-triage", "configuration": "without_skill", "run_number": 1,
47
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 77.2, "tokens": 18626, "tool_calls": 4, "errors": 0},
48
+ "expectations": [
49
+ {"text": "Identifies as auto mode", "passed": true, "evidence": "Auto mode selected"},
50
+ {"text": "Triggers Codex classification", "passed": true, "evidence": "Codex --full-auto triggered"},
51
+ {"text": "Decomposes into 2+ subtasks", "passed": true, "evidence": "2 subtasks decomposed"},
52
+ {"text": "Notes tfx-multi delegation", "passed": true, "evidence": "Hands off to tfx-multi Phase 3"},
53
+ {"text": "Does NOT execute directly", "passed": true, "evidence": "Delegates correctly"}
54
+ ]
55
+ },
56
+ {
57
+ "eval_id": 3, "eval_name": "multi-team-creation", "configuration": "with_skill", "run_number": 1,
58
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 115.3, "tokens": 27197, "tool_calls": 3, "errors": 0},
59
+ "expectations": [
60
+ {"text": "Creates TeamCreate with tfx- prefix", "passed": true, "evidence": "TeamCreate({ team_name: 'tfx-<base36>' })"},
61
+ {"text": "Creates 3 TaskCreate calls", "passed": true, "evidence": "3x TaskCreate with metadata"},
62
+ {"text": "Spawns 3 Agent wrappers with bypassPermissions", "passed": true, "evidence": "3x Agent({ mode: bypassPermissions })"},
63
+ {"text": "Uses tfx-route.sh inside wrappers", "passed": true, "evidence": "Direct codex/gemini calls prohibited"},
64
+ {"text": "Includes Phase 5 TeamDelete", "passed": true, "evidence": "TeamDelete always runs, max 30s wait"}
65
+ ]
66
+ },
67
+ {
68
+ "eval_id": 3, "eval_name": "multi-team-creation", "configuration": "without_skill", "run_number": 1,
69
+ "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 100.6, "tokens": 26140, "tool_calls": 3, "errors": 0},
70
+ "expectations": [
71
+ {"text": "Creates TeamCreate with tfx- prefix", "passed": true, "evidence": "TeamCreate with tfx-<id>"},
72
+ {"text": "Creates 3 TaskCreate calls", "passed": true, "evidence": "Three TaskCreate calls"},
73
+ {"text": "Spawns 3 Agent wrappers with bypassPermissions", "passed": true, "evidence": "mode: bypassPermissions in all 3"},
74
+ {"text": "Uses tfx-route.sh inside wrappers", "passed": true, "evidence": "Never direct codex/gemini calls"},
75
+ {"text": "Includes Phase 5 TeamDelete", "passed": true, "evidence": "TeamDelete unconditionally"}
76
+ ]
77
+ },
78
+ {
79
+ "eval_id": 4, "eval_name": "doctor-diagnosis", "configuration": "with_skill", "run_number": 1,
80
+ "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 53.8, "tokens": 14499, "tool_calls": 4, "errors": 0},
81
+ "expectations": [
82
+ {"text": "Runs triflux doctor first", "passed": true, "evidence": "Bash(\"triflux doctor\")"},
83
+ {"text": "Suggests --fix mode", "passed": true, "evidence": "Suggests after diagnosis report"},
84
+ {"text": "Mentions HUD and CLI checks", "passed": true, "evidence": "HUD and CLI paths checked"},
85
+ {"text": "Does NOT jump to --reset", "passed": true, "evidence": "--reset reserved for explicit request"}
86
+ ]
87
+ },
88
+ {
89
+ "eval_id": 4, "eval_name": "doctor-diagnosis", "configuration": "without_skill", "run_number": 1,
90
+ "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 48.3, "tokens": 14482, "tool_calls": 3, "errors": 0},
91
+ "expectations": [
92
+ {"text": "Runs triflux doctor first", "passed": true, "evidence": "Bash(\"triflux doctor\")"},
93
+ {"text": "Suggests --fix mode", "passed": true, "evidence": "Offers --fix after diagnosis"},
94
+ {"text": "Mentions HUD and CLI checks", "passed": true, "evidence": "All 8 diagnostics listed"},
95
+ {"text": "Does NOT jump to --reset", "passed": true, "evidence": "--reset reserved for explicit request"}
96
+ ]
97
+ },
98
+ {
99
+ "eval_id": 5, "eval_name": "hub-start-sequence", "configuration": "with_skill", "run_number": 1,
100
+ "result": {"pass_rate": 1.0, "passed": 3, "failed": 0, "total": 3, "time_seconds": 47.2, "tokens": 14821, "tool_calls": 4, "errors": 0},
101
+ "expectations": [
102
+ {"text": "Runs node hub/server.mjs in background", "passed": true, "evidence": "Bash(\"node hub/server.mjs\", run_in_background=true)"},
103
+ {"text": "Mentions port 27888 and /mcp", "passed": true, "evidence": "Port 27888, http://127.0.0.1:27888/mcp"},
104
+ {"text": "No triage or routing attempted", "passed": true, "evidence": "Command match, not fallthrough"}
105
+ ]
106
+ },
107
+ {
108
+ "eval_id": 5, "eval_name": "hub-start-sequence", "configuration": "without_skill", "run_number": 1,
109
+ "result": {"pass_rate": 1.0, "passed": 3, "failed": 0, "total": 3, "time_seconds": 51.8, "tokens": 14904, "tool_calls": 4, "errors": 0},
110
+ "expectations": [
111
+ {"text": "Runs node hub/server.mjs in background", "passed": true, "evidence": "Bash(\"node hub/server.mjs\", run_in_background=true)"},
112
+ {"text": "Mentions port 27888 and /mcp", "passed": true, "evidence": "Port 27888, endpoint /mcp"},
113
+ {"text": "No triage or routing attempted", "passed": true, "evidence": "Command match, not fallthrough"}
114
+ ]
115
+ },
116
+ {
117
+ "eval_id": 6, "eval_name": "codex-gemini-remap", "configuration": "with_skill", "run_number": 1,
118
+ "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 69.7, "tokens": 14889, "tool_calls": 5, "errors": 0},
119
+ "expectations": [
120
+ {"text": "designer remapped to Codex (effort: high)", "passed": true, "evidence": "designer → Codex (effort: high)"},
121
+ {"text": "writer remapped to Codex Spark (spark_fast)", "passed": true, "evidence": "writer → Codex Spark (effort: spark_fast)"},
122
+ {"text": "TFX_CLI_MODE=codex set", "passed": true, "evidence": "Set for every Phase 3 call"},
123
+ {"text": "MCP profiles changed", "passed": true, "evidence": "designer→implement, writer→analyze"}
124
+ ]
125
+ },
126
+ {
127
+ "eval_id": 6, "eval_name": "codex-gemini-remap", "configuration": "without_skill", "run_number": 1,
128
+ "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 85.2, "tokens": 19802, "tool_calls": 7, "errors": 0},
129
+ "expectations": [
130
+ {"text": "designer remapped to Codex (effort: high)", "passed": true, "evidence": "designer → Codex (effort: high)"},
131
+ {"text": "writer remapped to Codex Spark (spark_fast)", "passed": true, "evidence": "writer → Codex Spark (effort: spark_fast)"},
132
+ {"text": "TFX_CLI_MODE=codex set", "passed": true, "evidence": "TFX_CLI_MODE set to codex"},
133
+ {"text": "MCP profiles changed", "passed": true, "evidence": "writer→analyze, designer→implement"}
134
+ ]
135
+ }
136
+ ],
137
+ "run_summary": {
138
+ "with_skill": {
139
+ "pass_rate": {"mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0},
140
+ "time_seconds": {"mean": 64.6, "stddev": 26.4, "min": 43.6, "max": 115.3},
141
+ "tokens": {"mean": 17549, "stddev": 4857, "min": 14499, "max": 27197}
142
+ },
143
+ "without_skill": {
144
+ "pass_rate": {"mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0},
145
+ "time_seconds": {"mean": 68.5, "stddev": 20.4, "min": 48.1, "max": 100.6},
146
+ "tokens": {"mean": 18398, "stddev": 4227, "min": 14482, "max": 26140}
147
+ },
148
+ "delta": {
149
+ "pass_rate": "+0.00",
150
+ "time_seconds": "-3.9",
151
+ "tokens": "-849"
152
+ }
153
+ },
154
+ "notes": [
155
+ "All 26 assertions pass at 100% for both configurations — the skills are functionally correct",
156
+ "The fixes applied (dead reference removal, Phase numbering consistency, hub description) don't change routing logic, so pass rates are identical",
157
+ "NEW version is marginally faster (-3.9s avg) and uses fewer tokens (-849 avg), likely due to cleaner references reducing model confusion",
158
+ "tfx-multi is the most complex skill (115s / 27K tokens with_skill) — consider extracting reference docs to reduce context load",
159
+ "tfx-codex OLD references 'Phase(1~6)' which doesn't exist in tfx-auto — the NEW version correctly references the actual workflow names",
160
+ "All assertions pass regardless of configuration — these test the core routing logic which is unchanged. Consider adding assertions that specifically test the fixed issues (dead refs, phase naming) for differentiation"
161
+ ]
162
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "eval_id": 6,
3
+ "eval_name": "codex-gemini-remap",
4
+ "prompt": "/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘",
5
+ "assertions": [
6
+ "designer remapped to Codex with effort: high",
7
+ "writer remapped to Codex Spark with effort: spark_fast",
8
+ "Sets TFX_CLI_MODE=codex environment variable",
9
+ "Changes MCP profile: designer->implement, writer->analyze"
10
+ ]
11
+ }
@@ -0,0 +1,9 @@
1
+ {
2
+ "expectations": [
3
+ {"text": "designer remapped to Codex with effort: high", "passed": true, "evidence": "Agent output: designer → Codex (effort: high)"},
4
+ {"text": "writer remapped to Codex Spark with effort: spark_fast", "passed": true, "evidence": "Agent output: writer → Codex Spark (effort: spark_fast)"},
5
+ {"text": "Sets TFX_CLI_MODE=codex environment variable", "passed": true, "evidence": "Agent output: 'TFX_CLI_MODE: Set to codex'"},
6
+ {"text": "Changes MCP profile: designer->implement, writer->analyze", "passed": true, "evidence": "Agent output: writer→analyze, designer→implement"}
7
+ ],
8
+ "summary": {"passed": 4, "failed": 0, "total": 4, "pass_rate": 1.0}
9
+ }