triflux 10.9.19 → 10.9.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/CLAUDE.md +212 -0
  2. package/hub/lib/bash-path.mjs +73 -0
  3. package/hub/team/dashboard-open.mjs +1 -68
  4. package/hub/team/native-supervisor.mjs +9 -2
  5. package/hub/team/psmux.mjs +5 -13
  6. package/hub/team/session.mjs +6 -26
  7. package/hub/team/swarm-hypervisor.mjs +205 -27
  8. package/hub/team/synapse-http.mjs +1 -0
  9. package/hub/team/tui-core.mjs +292 -0
  10. package/hub/team/tui-lite.mjs +20 -154
  11. package/hub/team/tui-synapse.mjs +213 -0
  12. package/hub/team/tui-widgets.mjs +262 -0
  13. package/hub/team/tui.mjs +159 -255
  14. package/hub/workers/delegator-mcp.mjs +2 -2
  15. package/package.json +21 -62
  16. package/references/hosts.json +46 -0
  17. package/scripts/__tests__/keyword-detector.test.mjs +4 -4
  18. package/scripts/cross-review-gate.mjs +13 -0
  19. package/scripts/remote-spawn.mjs +11 -46
  20. package/scripts/session-spawn-helper.mjs +8 -21
  21. package/scripts/test-tfx-route-no-claude-native.mjs +4 -2
  22. package/scripts/tfx-route.sh +13 -0
  23. package/skills/tfx-deep-interview/SKILL.md +6 -6
  24. package/skills/tfx-deep-interview/SKILL.md.tmpl +6 -6
  25. package/skills/tfx-index/SKILL.md +1 -1
  26. package/skills/tfx-index/SKILL.md.tmpl +1 -1
  27. package/skills/tfx-interview/SKILL.md +9 -9
  28. package/skills/tfx-interview/SKILL.md.tmpl +9 -9
  29. package/skills/tfx-plan/SKILL.md +1 -1
  30. package/skills/tfx-plan/SKILL.md.tmpl +1 -1
  31. package/skills/tfx-research/SKILL.md +1 -1
  32. package/skills/tfx-research/SKILL.md.tmpl +1 -1
  33. package/skills/tfx-workspace/async-tests/run-tests.sh +203 -0
  34. package/skills/tfx-workspace/evals/evals.json +79 -0
  35. package/skills/tfx-workspace/iteration-1/benchmark.json +524 -0
  36. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/eval_metadata.json +11 -0
  37. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/grading.json +25 -0
  38. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/outputs/analysis.md +154 -0
  39. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/timing.json +5 -0
  40. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/grading.json +25 -0
  41. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/outputs/analysis.md +126 -0
  42. package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/timing.json +5 -0
  43. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/eval_metadata.json +11 -0
  44. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/grading.json +25 -0
  45. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/outputs/analysis.md +119 -0
  46. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/timing.json +5 -0
  47. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/grading.json +25 -0
  48. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/outputs/analysis.md +115 -0
  49. package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/timing.json +5 -0
  50. package/skills/tfx-workspace/iteration-1/hub-start-sequence/eval_metadata.json +10 -0
  51. package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/grading.json +20 -0
  52. package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/outputs/analysis.md +86 -0
  53. package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/timing.json +5 -0
  54. package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/grading.json +20 -0
  55. package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/outputs/analysis.md +81 -0
  56. package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/timing.json +5 -0
  57. package/skills/tfx-workspace/iteration-1/multi-team-creation/eval_metadata.json +12 -0
  58. package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/grading.json +30 -0
  59. package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/outputs/analysis.md +316 -0
  60. package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/timing.json +5 -0
  61. package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/grading.json +30 -0
  62. package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/outputs/analysis.md +352 -0
  63. package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/timing.json +5 -0
  64. package/skills/tfx-workspace/iteration-1/review.html +1325 -0
  65. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/eval_metadata.json +12 -0
  66. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/grading.json +30 -0
  67. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/outputs/analysis.md +97 -0
  68. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/timing.json +5 -0
  69. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/grading.json +30 -0
  70. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/outputs/analysis.md +94 -0
  71. package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/timing.json +5 -0
  72. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/eval_metadata.json +12 -0
  73. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/grading.json +30 -0
  74. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/outputs/analysis.md +209 -0
  75. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/timing.json +5 -0
  76. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/grading.json +30 -0
  77. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/outputs/analysis.md +193 -0
  78. package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/timing.json +5 -0
  79. package/skills/tfx-workspace/iteration-2/benchmark.json +144 -0
  80. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/eval_metadata.json +13 -0
  81. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/grading.json +35 -0
  82. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/outputs/analysis.md +382 -0
  83. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/timing.json +5 -0
  84. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/grading.json +35 -0
  85. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/outputs/analysis.md +333 -0
  86. package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/timing.json +5 -0
  87. package/skills/tfx-workspace/iteration-2/review.html +1325 -0
  88. package/skills/tfx-workspace/skill-snapshot/tfx-auto/SKILL.md +217 -0
  89. package/skills/tfx-workspace/skill-snapshot/tfx-auto-codex/SKILL.md +77 -0
  90. package/skills/tfx-workspace/skill-snapshot/tfx-codex/SKILL.md +65 -0
  91. package/skills/tfx-workspace/skill-snapshot/tfx-doctor/SKILL.md +94 -0
  92. package/skills/tfx-workspace/skill-snapshot/tfx-gemini/SKILL.md +82 -0
  93. package/skills/tfx-workspace/skill-snapshot/tfx-hub/SKILL.md +133 -0
  94. package/skills/tfx-workspace/skill-snapshot/tfx-multi/SKILL.md +426 -0
  95. package/skills/tfx-workspace/skill-snapshot/tfx-setup/SKILL.md +101 -0
  96. package/.claude-plugin/marketplace.json +0 -34
  97. package/.claude-plugin/plugin.json +0 -22
  98. package/config/mcp-registry.json +0 -29
  99. package/scripts/__tests__/release-governance.test.mjs +0 -148
  100. package/scripts/release/bump-version.mjs +0 -77
  101. package/scripts/release/check-sync.mjs +0 -51
  102. package/scripts/release/lib.mjs +0 -303
  103. package/scripts/release/prepare.mjs +0 -85
  104. package/scripts/release/publish.mjs +0 -87
  105. package/scripts/release/verify.mjs +0 -81
  106. package/scripts/release/version-manifest.json +0 -26
  107. package/tui/codex-profile.mjs +0 -457
  108. package/tui/core.mjs +0 -266
  109. package/tui/doctor.mjs +0 -375
  110. package/tui/gemini-profile.mjs +0 -299
  111. package/tui/monitor-data.mjs +0 -152
  112. package/tui/monitor.mjs +0 -339
  113. package/tui/setup.mjs +0 -598
@@ -0,0 +1,193 @@
1
+ # Routing Analysis: `/tfx-auto 프론트엔드 리팩터링하고 보안 리뷰도 해줘`
2
+
3
+ ## 1. Mode Selection
4
+
5
+ **Selected mode: AUTO**
6
+
7
+ The input `/tfx-auto 프론트엔드 리팩터링하고 보안 리뷰도 해줘` uses the `tfx-auto` trigger directly with a free-form natural language task description. It does not match any command shortcut keyword (e.g., `implement`, `cleanup`, `analyze`), and it does not use the manual `N:agent_type` prefix syntax.
8
+
9
+ Per the SKILL.md mode table:
10
+
11
+ | Input pattern | Mode | Triage |
12
+ |---|---|---|
13
+ | `/tfx-auto "리팩터링 + UI"` | 자동 (auto) | Codex 분류 → Opus 분해 |
14
+
15
+ This request falls exactly into the **auto mode** pattern.
16
+
17
+ ---
18
+
19
+ ## 2. Triage Trigger
20
+
21
+ Triage **IS triggered** because the mode is auto (not a command shortcut, not manual).
22
+
23
+ The triage proceeds in two steps:
24
+
25
+ ### Step 1 — Codex Classification
26
+ ```
27
+ codex exec --full-auto --skip-git-repo-check
28
+ ```
29
+ Input: `"프론트엔드 리팩터링하고 보안 리뷰도 해줘"`
30
+
31
+ Expected output JSON:
32
+ ```json
33
+ {
34
+ "parts": [
35
+ { "description": "프론트엔드 리팩터링", "agent": "codex" },
36
+ { "description": "보안 리뷰", "agent": "codex" }
37
+ ]
38
+ }
39
+ ```
40
+
41
+ ### Step 2 — Opus Inline Decomposition
42
+ Opus receives the classified parts and decomposes them into a structured subtask graph:
43
+
44
+ ```json
45
+ {
46
+ "graph_type": "INDEPENDENT",
47
+ "subtasks": [
48
+ {
49
+ "id": "st-1",
50
+ "description": "프론트엔드 코드 리팩터링",
51
+ "scope": "frontend source files",
52
+ "agent": "executor",
53
+ "mcp_profile": "implement",
54
+ "depends_on": [],
55
+ "context_output": "refactor-summary",
56
+ "context_input": null
57
+ },
58
+ {
59
+ "id": "st-2",
60
+ "description": "보안 리뷰 수행",
61
+ "scope": "전체 코드베이스 또는 프론트엔드",
62
+ "agent": "security-reviewer",
63
+ "mcp_profile": "review",
64
+ "depends_on": [],
65
+ "context_output": "security-review-report",
66
+ "context_input": null
67
+ }
68
+ ]
69
+ }
70
+ ```
71
+
72
+ The two tasks ("리팩터링" and "보안 리뷰") are **semantically independent**: refactoring does not depend on the security review and vice versa, so `graph_type` resolves to `INDEPENDENT`.
73
+
74
+ If Codex classification fails, Opus performs both classification and decomposition directly (fallback path per SKILL.md §트리아지).
75
+
76
+ ---
77
+
78
+ ## 3. Task Decomposition into Subtasks
79
+
80
+ The request contains two distinct tasks:
81
+
82
+ | # | Description | Agent | MCP Profile |
83
+ |---|---|---|---|
84
+ | st-1 | 프론트엔드 리팩터링 | `executor` | `implement` |
85
+ | st-2 | 보안 리뷰 | `security-reviewer` | `review` |
86
+
87
+ Agent assignments follow the SKILL.md agent mapping table:
88
+ - Refactoring → `executor` → Codex, MCP: `implement`
89
+ - Security review → `security-reviewer` → Codex (review mode), MCP: `review`
90
+
91
+ ---
92
+
93
+ ## 4. Subtask Count >= 2 → Delegation to tfx-multi
94
+
95
+ **Subtask count = 2, which satisfies `>= 2`.**
96
+
97
+ Per SKILL.md §멀티 태스크 라우팅:
98
+
99
+ > 트리아지 결과 서브태스크가 2개 이상이면 tfx-multi Native Teams 모드로 자동 전환한다.
100
+
101
+ The skill **automatically delegates to tfx-multi Phase 3**, skipping tfx-multi's own Phase 2 (triage) since triage has already been completed by tfx-auto.
102
+
103
+ The handoff logic:
104
+ ```
105
+ if subtasks.length >= 2:
106
+ → tfx-multi Phase 3 실행 (트리아지 결과 재사용)
107
+ → TeamCreate → TaskCreate × N → Agent 래퍼 spawn (Phase 3a~3c)
108
+ → Phase 4 결과 수집 → Phase 5 정리
109
+ ```
110
+
111
+ ---
112
+
113
+ ## 5. Exact Sequence of Actions
114
+
115
+ ```
116
+ [Step 1] Mode detection
117
+ Input: "/tfx-auto 프론트엔드 리팩터링하고 보안 리뷰도 해줘"
118
+ → No command shortcut match
119
+ → No N:agent_type prefix
120
+ → Mode = AUTO, triage = ENABLED
121
+
122
+ [Step 2] Triage — Codex classification
123
+ codex exec --full-auto --skip-git-repo-check
124
+ Prompt: "프론트엔드 리팩터링하고 보안 리뷰도 해줘"
125
+ Output: { parts: [ {description: "프론트엔드 리팩터링", agent: "codex"},
126
+ {description: "보안 리뷰", agent: "codex"} ] }
127
+
128
+ [Step 3] Triage — Opus inline decomposition
129
+ Input: classified parts from Step 2
130
+ Output: {
131
+ graph_type: "INDEPENDENT",
132
+ subtasks: [
133
+ { id: "st-1", description: "프론트엔드 리팩터링", agent: "executor",
134
+ mcp_profile: "implement", depends_on: [] },
135
+ { id: "st-2", description: "보안 리뷰", agent: "security-reviewer",
136
+ mcp_profile: "review", depends_on: [] }
137
+ ]
138
+ }
139
+
140
+ [Step 4] Subtask count check
141
+ subtasks.length = 2 → >= 2 condition TRUE
142
+ → Delegate to tfx-multi Phase 3 (skip tfx-multi Phase 2)
143
+
144
+ [Step 5] tfx-multi Phase 3a — TeamCreate
145
+ Create a Native Teams session with the decomposed subtask list
146
+
147
+ [Step 6] tfx-multi Phase 3b — TaskCreate × 2
148
+ Task 1: "프론트엔드 리팩터링" → executor / implement
149
+ Task 2: "보안 리뷰" → security-reviewer / review
150
+
151
+ [Step 7] tfx-multi Phase 3c — Agent wrapper spawn (parallel, INDEPENDENT graph)
152
+ Bash("bash ~/.claude/scripts/tfx-route.sh executor '프론트엔드 리팩터링' implement",
153
+ run_in_background=true)
154
+ Bash("bash ~/.claude/scripts/tfx-route.sh security-reviewer '보안 리뷰' review",
155
+ run_in_background=true)
156
+ Both tasks run concurrently because graph_type = INDEPENDENT (no depends_on).
157
+
158
+ [Step 8] tfx-multi Phase 4 — Result collection
159
+ Await both background tasks.
160
+ Parse exit codes and extract OUTPUT sections.
161
+ On timeout (exit 124): use PARTIAL OUTPUT.
162
+ On failure (exit ≠ 0): Claude fallback → Agent(subagent_type="oh-my-claudecode:executor", model="sonnet")
163
+
164
+ [Step 9] tfx-multi Phase 5 — Cleanup & report
165
+ Produce final report in tfx-auto format:
166
+ ## tfx-auto 완료
167
+ **모드**: auto | **그래프**: INDEPENDENT | **레벨**: 0
168
+ | # | 서브태스크 | Agent | CLI | MCP | 레벨 | 상태 | 시간 |
169
+ |---|---|---|---|---|---|---|---|
170
+ | 1 | 프론트엔드 리팩터링 | executor | codex | implement | 0 | ✓ | Xs |
171
+ | 2 | 보안 리뷰 | security-reviewer | codex | review | 0 | ✓ | Ys |
172
+ ### 워커 1: 프론트엔드 리팩터링
173
+ (리팩터링 결과 요약)
174
+ ### 워커 2: 보안 리뷰
175
+ (보안 리뷰 결과 요약)
176
+ ### Token Savings Report
177
+ (node ~/.claude/scripts/token-snapshot.mjs report {session-id})
178
+ ```
179
+
180
+ ---
181
+
182
+ ## Summary
183
+
184
+ | Item | Value |
185
+ |---|---|
186
+ | Mode | AUTO |
187
+ | Triage triggered | Yes (Codex classification → Opus decomposition) |
188
+ | Graph type | INDEPENDENT |
189
+ | Subtask count | 2 |
190
+ | Delegation to tfx-multi | Yes (Phase 3 entry, skipping Phase 2) |
191
+ | Execution style | Parallel (both tasks run concurrently via run_in_background=true) |
192
+ | st-1 agent/MCP | executor / implement |
193
+ | st-2 agent/MCP | security-reviewer / review |
@@ -0,0 +1,5 @@
1
+ {
2
+ "total_tokens": 17584,
3
+ "duration_ms": 58178,
4
+ "total_duration_seconds": 58.2
5
+ }
@@ -0,0 +1,144 @@
1
+ {
2
+ "metadata": {
3
+ "skill_name": "tfx-multi-refactored",
4
+ "skill_path": "C:/Users/SSAFY/Desktop/Projects/cli/triflux/skills/tfx-multi",
5
+ "executor_model": "claude-sonnet-4-6",
6
+ "analyzer_model": "claude-opus-4-6",
7
+ "timestamp": "2026-03-19T11:00:00Z",
8
+ "evals_run": [3],
9
+ "runs_per_configuration": 1
10
+ },
11
+ "runs": [
12
+ {
13
+ "eval_id": 3,
14
+ "eval_name": "multi-team-creation-refactored",
15
+ "configuration": "with_skill",
16
+ "run_number": 1,
17
+ "result": {
18
+ "pass_rate": 1.0,
19
+ "passed": 6,
20
+ "failed": 0,
21
+ "total": 6,
22
+ "time_seconds": 120.6,
23
+ "tokens": 23431,
24
+ "tool_calls": 6,
25
+ "errors": 0
26
+ },
27
+ "expectations": [
28
+ {
29
+ "text": "Creates TeamCreate with tfx- prefix",
30
+ "passed": true,
31
+ "evidence": "TeamCreate({ team_name: 'tfx-<hex6>' })"
32
+ },
33
+ {
34
+ "text": "Creates 3 TaskCreate calls",
35
+ "passed": true,
36
+ "evidence": "3x TaskCreate"
37
+ },
38
+ {
39
+ "text": "Spawns 3 Agent wrappers with bypassPermissions",
40
+ "passed": true,
41
+ "evidence": "3x Agent({ mode: bypassPermissions })"
42
+ },
43
+ {
44
+ "text": "Uses tfx-route.sh inside wrappers",
45
+ "passed": true,
46
+ "evidence": "Direct CLI calls forbidden"
47
+ },
48
+ {
49
+ "text": "Includes Phase 5 TeamDelete",
50
+ "passed": true,
51
+ "evidence": "Always executed"
52
+ },
53
+ {
54
+ "text": "References agent-wrapper-rules.md",
55
+ "passed": true,
56
+ "evidence": "Provided interrupt protocol + timeout values"
57
+ }
58
+ ]
59
+ },
60
+ {
61
+ "eval_id": 3,
62
+ "eval_name": "multi-team-creation-refactored",
63
+ "configuration": "without_skill",
64
+ "run_number": 1,
65
+ "result": {
66
+ "pass_rate": 0.83,
67
+ "passed": 5,
68
+ "failed": 1,
69
+ "total": 6,
70
+ "time_seconds": 133.1,
71
+ "tokens": 27382,
72
+ "tool_calls": 4,
73
+ "errors": 0
74
+ },
75
+ "expectations": [
76
+ {
77
+ "text": "Creates TeamCreate with tfx- prefix",
78
+ "passed": true,
79
+ "evidence": "TeamCreate with tfx-<6chars>"
80
+ },
81
+ {
82
+ "text": "Creates 3 TaskCreate calls",
83
+ "passed": true,
84
+ "evidence": "3x TaskCreate"
85
+ },
86
+ {
87
+ "text": "Spawns 3 Agent wrappers with bypassPermissions",
88
+ "passed": true,
89
+ "evidence": "bypassPermissions in all"
90
+ },
91
+ {
92
+ "text": "Uses tfx-route.sh inside wrappers",
93
+ "passed": true,
94
+ "evidence": "Direct CLI prohibited"
95
+ },
96
+ {
97
+ "text": "Includes Phase 5 TeamDelete",
98
+ "passed": true,
99
+ "evidence": "Mandatory"
100
+ },
101
+ {
102
+ "text": "References agent-wrapper-rules.md",
103
+ "passed": false,
104
+ "evidence": "OLD has no reference file structure"
105
+ }
106
+ ]
107
+ }
108
+ ],
109
+ "run_summary": {
110
+ "with_skill": {
111
+ "pass_rate": { "mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0 },
112
+ "time_seconds": {
113
+ "mean": 120.6,
114
+ "stddev": 0.0,
115
+ "min": 120.6,
116
+ "max": 120.6
117
+ },
118
+ "tokens": { "mean": 23431, "stddev": 0, "min": 23431, "max": 23431 }
119
+ },
120
+ "without_skill": {
121
+ "pass_rate": { "mean": 0.83, "stddev": 0.0, "min": 0.83, "max": 0.83 },
122
+ "time_seconds": {
123
+ "mean": 133.1,
124
+ "stddev": 0.0,
125
+ "min": 133.1,
126
+ "max": 133.1
127
+ },
128
+ "tokens": { "mean": 27382, "stddev": 0, "min": 27382, "max": 27382 }
129
+ },
130
+ "delta": {
131
+ "pass_rate": "+0.17",
132
+ "time_seconds": "-12.5",
133
+ "tokens": "-3951"
134
+ }
135
+ },
136
+ "notes": [
137
+ "Refactored version (177 lines + 2 reference files) vs original (426 lines monolith)",
138
+ "Token savings: 3,951 fewer tokens (-14.4%) — the model loads less context upfront",
139
+ "Time savings: 12.5s faster (-9.4%) — despite reading 3 files vs 1",
140
+ "Progressive disclosure works: the model correctly loaded reference files only when relevant",
141
+ "Reference files provided additional value: interrupt protocol details, timeout values, technical reasoning for tfx-route.sh requirement",
142
+ "Core routing accuracy is identical — all 5 functional assertions pass in both versions"
143
+ ]
144
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "eval_id": 3,
3
+ "eval_name": "multi-team-creation-refactored",
4
+ "prompt": "/tfx-multi 인증 리팩터링 + UI 개선 + 보안 리뷰",
5
+ "assertions": [
6
+ "Creates exactly one TeamCreate with tfx- prefix naming",
7
+ "Creates 3 TaskCreate calls (one per subtask)",
8
+ "Spawns 3 Agent wrappers with mode: bypassPermissions",
9
+ "Uses tfx-route.sh inside Agent wrapper (not direct codex/gemini)",
10
+ "Includes Phase 5 cleanup (TeamDelete)",
11
+ "References agent-wrapper-rules.md for detailed rules (new skill only)"
12
+ ]
13
+ }
@@ -0,0 +1,35 @@
1
+ {
2
+ "expectations": [
3
+ {
4
+ "text": "Creates exactly one TeamCreate with tfx- prefix naming",
5
+ "passed": true,
6
+ "evidence": "TeamCreate({ team_name: 'tfx-<6chars>' })"
7
+ },
8
+ {
9
+ "text": "Creates 3 TaskCreate calls (one per subtask)",
10
+ "passed": true,
11
+ "evidence": "3x TaskCreate with subject, description, metadata"
12
+ },
13
+ {
14
+ "text": "Spawns 3 Agent wrappers with mode: bypassPermissions",
15
+ "passed": true,
16
+ "evidence": "mode: bypassPermissions in all Agent calls"
17
+ },
18
+ {
19
+ "text": "Uses tfx-route.sh inside Agent wrapper (not direct codex/gemini)",
20
+ "passed": true,
21
+ "evidence": "Direct codex exec / gemini -y -p explicitly prohibited"
22
+ },
23
+ {
24
+ "text": "Includes Phase 5 cleanup (TeamDelete)",
25
+ "passed": true,
26
+ "evidence": "TeamDelete mandatory, 30s wait, force cleanup fallback"
27
+ },
28
+ {
29
+ "text": "References agent-wrapper-rules.md for detailed rules",
30
+ "passed": false,
31
+ "evidence": "OLD version has all rules inline in SKILL.md (426 lines), no reference file structure"
32
+ }
33
+ ],
34
+ "summary": { "passed": 5, "failed": 1, "total": 6, "pass_rate": 0.83 }
35
+ }