triflux 10.9.19 → 10.9.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +212 -0
- package/hub/lib/bash-path.mjs +73 -0
- package/hub/team/dashboard-open.mjs +1 -68
- package/hub/team/native-supervisor.mjs +9 -2
- package/hub/team/psmux.mjs +5 -13
- package/hub/team/session.mjs +6 -26
- package/hub/team/swarm-hypervisor.mjs +205 -27
- package/hub/team/synapse-http.mjs +1 -0
- package/hub/team/tui-core.mjs +292 -0
- package/hub/team/tui-lite.mjs +20 -154
- package/hub/team/tui-synapse.mjs +213 -0
- package/hub/team/tui-widgets.mjs +262 -0
- package/hub/team/tui.mjs +159 -255
- package/hub/workers/delegator-mcp.mjs +2 -2
- package/package.json +21 -62
- package/references/hosts.json +46 -0
- package/scripts/__tests__/keyword-detector.test.mjs +4 -4
- package/scripts/cross-review-gate.mjs +13 -0
- package/scripts/remote-spawn.mjs +11 -46
- package/scripts/session-spawn-helper.mjs +8 -21
- package/scripts/test-tfx-route-no-claude-native.mjs +4 -2
- package/scripts/tfx-route.sh +13 -0
- package/skills/tfx-deep-interview/SKILL.md +6 -6
- package/skills/tfx-deep-interview/SKILL.md.tmpl +6 -6
- package/skills/tfx-index/SKILL.md +1 -1
- package/skills/tfx-index/SKILL.md.tmpl +1 -1
- package/skills/tfx-interview/SKILL.md +9 -9
- package/skills/tfx-interview/SKILL.md.tmpl +9 -9
- package/skills/tfx-plan/SKILL.md +1 -1
- package/skills/tfx-plan/SKILL.md.tmpl +1 -1
- package/skills/tfx-research/SKILL.md +1 -1
- package/skills/tfx-research/SKILL.md.tmpl +1 -1
- package/skills/tfx-workspace/async-tests/run-tests.sh +203 -0
- package/skills/tfx-workspace/evals/evals.json +79 -0
- package/skills/tfx-workspace/iteration-1/benchmark.json +524 -0
- package/skills/tfx-workspace/iteration-1/codex-gemini-remap/eval_metadata.json +11 -0
- package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/grading.json +25 -0
- package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/outputs/analysis.md +154 -0
- package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/grading.json +25 -0
- package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/outputs/analysis.md +126 -0
- package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/doctor-diagnosis/eval_metadata.json +11 -0
- package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/grading.json +25 -0
- package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/outputs/analysis.md +119 -0
- package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/grading.json +25 -0
- package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/outputs/analysis.md +115 -0
- package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/hub-start-sequence/eval_metadata.json +10 -0
- package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/grading.json +20 -0
- package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/outputs/analysis.md +86 -0
- package/skills/tfx-workspace/iteration-1/hub-start-sequence/old_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/grading.json +20 -0
- package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/outputs/analysis.md +81 -0
- package/skills/tfx-workspace/iteration-1/hub-start-sequence/with_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/multi-team-creation/eval_metadata.json +12 -0
- package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/grading.json +30 -0
- package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/outputs/analysis.md +316 -0
- package/skills/tfx-workspace/iteration-1/multi-team-creation/old_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/grading.json +30 -0
- package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/outputs/analysis.md +352 -0
- package/skills/tfx-workspace/iteration-1/multi-team-creation/with_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/review.html +1325 -0
- package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/eval_metadata.json +12 -0
- package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/grading.json +30 -0
- package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/outputs/analysis.md +97 -0
- package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/old_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/grading.json +30 -0
- package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/outputs/analysis.md +94 -0
- package/skills/tfx-workspace/iteration-1/routing-implement-shortcut/with_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/eval_metadata.json +12 -0
- package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/grading.json +30 -0
- package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/outputs/analysis.md +209 -0
- package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/old_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/grading.json +30 -0
- package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/outputs/analysis.md +193 -0
- package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-2/benchmark.json +144 -0
- package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/eval_metadata.json +13 -0
- package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/grading.json +35 -0
- package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/outputs/analysis.md +382 -0
- package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/grading.json +35 -0
- package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/outputs/analysis.md +333 -0
- package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/with_skill/timing.json +5 -0
- package/skills/tfx-workspace/iteration-2/review.html +1325 -0
- package/skills/tfx-workspace/skill-snapshot/tfx-auto/SKILL.md +217 -0
- package/skills/tfx-workspace/skill-snapshot/tfx-auto-codex/SKILL.md +77 -0
- package/skills/tfx-workspace/skill-snapshot/tfx-codex/SKILL.md +65 -0
- package/skills/tfx-workspace/skill-snapshot/tfx-doctor/SKILL.md +94 -0
- package/skills/tfx-workspace/skill-snapshot/tfx-gemini/SKILL.md +82 -0
- package/skills/tfx-workspace/skill-snapshot/tfx-hub/SKILL.md +133 -0
- package/skills/tfx-workspace/skill-snapshot/tfx-multi/SKILL.md +426 -0
- package/skills/tfx-workspace/skill-snapshot/tfx-setup/SKILL.md +101 -0
- package/.claude-plugin/marketplace.json +0 -34
- package/.claude-plugin/plugin.json +0 -22
- package/config/mcp-registry.json +0 -29
- package/scripts/__tests__/release-governance.test.mjs +0 -148
- package/scripts/release/bump-version.mjs +0 -77
- package/scripts/release/check-sync.mjs +0 -51
- package/scripts/release/lib.mjs +0 -303
- package/scripts/release/prepare.mjs +0 -85
- package/scripts/release/publish.mjs +0 -87
- package/scripts/release/verify.mjs +0 -81
- package/scripts/release/version-manifest.json +0 -26
- package/tui/codex-profile.mjs +0 -457
- package/tui/core.mjs +0 -266
- package/tui/doctor.mjs +0 -375
- package/tui/gemini-profile.mjs +0 -299
- package/tui/monitor-data.mjs +0 -152
- package/tui/monitor.mjs +0 -339
- package/tui/setup.mjs +0 -598
package/skills/tfx-workspace/iteration-1/routing-multi-task-triage/with_skill/outputs/analysis.md
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
# Routing Analysis: `/tfx-auto 프론트엔드 리팩터링하고 보안 리뷰도 해줘`
|
|
2
|
+
|
|
3
|
+
## 1. Mode Selection
|
|
4
|
+
|
|
5
|
+
**Selected mode: AUTO**
|
|
6
|
+
|
|
7
|
+
The input `/tfx-auto 프론트엔드 리팩터링하고 보안 리뷰도 해줘` uses the `tfx-auto` trigger directly with a free-form natural language task description. It does not match any command shortcut keyword (e.g., `implement`, `cleanup`, `analyze`), and it does not use the manual `N:agent_type` prefix syntax.
|
|
8
|
+
|
|
9
|
+
Per the SKILL.md mode table:
|
|
10
|
+
|
|
11
|
+
| Input pattern | Mode | Triage |
|
|
12
|
+
|---|---|---|
|
|
13
|
+
| `/tfx-auto "리팩터링 + UI"` | 자동 (auto) | Codex 분류 → Opus 분해 |
|
|
14
|
+
|
|
15
|
+
This request falls exactly into the **auto mode** pattern.
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## 2. Triage Trigger
|
|
20
|
+
|
|
21
|
+
Triage **IS triggered** because the mode is auto (not a command shortcut, not manual).
|
|
22
|
+
|
|
23
|
+
The triage proceeds in two steps:
|
|
24
|
+
|
|
25
|
+
### Step 1 — Codex Classification
|
|
26
|
+
```
|
|
27
|
+
codex exec --full-auto --skip-git-repo-check
|
|
28
|
+
```
|
|
29
|
+
Input: `"프론트엔드 리팩터링하고 보안 리뷰도 해줘"`
|
|
30
|
+
|
|
31
|
+
Expected output JSON:
|
|
32
|
+
```json
|
|
33
|
+
{
|
|
34
|
+
"parts": [
|
|
35
|
+
{ "description": "프론트엔드 리팩터링", "agent": "codex" },
|
|
36
|
+
{ "description": "보안 리뷰", "agent": "codex" }
|
|
37
|
+
]
|
|
38
|
+
}
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Step 2 — Opus Inline Decomposition
|
|
42
|
+
Opus receives the classified parts and decomposes them into a structured subtask graph:
|
|
43
|
+
|
|
44
|
+
```json
|
|
45
|
+
{
|
|
46
|
+
"graph_type": "INDEPENDENT",
|
|
47
|
+
"subtasks": [
|
|
48
|
+
{
|
|
49
|
+
"id": "st-1",
|
|
50
|
+
"description": "프론트엔드 코드 리팩터링",
|
|
51
|
+
"scope": "frontend source files",
|
|
52
|
+
"agent": "executor",
|
|
53
|
+
"mcp_profile": "implement",
|
|
54
|
+
"depends_on": [],
|
|
55
|
+
"context_output": "refactor-summary",
|
|
56
|
+
"context_input": null
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"id": "st-2",
|
|
60
|
+
"description": "보안 리뷰 수행",
|
|
61
|
+
"scope": "전체 코드베이스 또는 프론트엔드",
|
|
62
|
+
"agent": "security-reviewer",
|
|
63
|
+
"mcp_profile": "review",
|
|
64
|
+
"depends_on": [],
|
|
65
|
+
"context_output": "security-review-report",
|
|
66
|
+
"context_input": null
|
|
67
|
+
}
|
|
68
|
+
]
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
The two tasks ("리팩터링" and "보안 리뷰") are **semantically independent**: refactoring does not depend on the security review and vice versa, so `graph_type` resolves to `INDEPENDENT`.
|
|
73
|
+
|
|
74
|
+
If Codex classification fails, Opus performs both classification and decomposition directly (fallback path per SKILL.md §트리아지).
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## 3. Task Decomposition into Subtasks
|
|
79
|
+
|
|
80
|
+
The request contains two distinct tasks:
|
|
81
|
+
|
|
82
|
+
| # | Description | Agent | MCP Profile |
|
|
83
|
+
|---|---|---|---|
|
|
84
|
+
| st-1 | 프론트엔드 리팩터링 | `executor` | `implement` |
|
|
85
|
+
| st-2 | 보안 리뷰 | `security-reviewer` | `review` |
|
|
86
|
+
|
|
87
|
+
Agent assignments follow the SKILL.md agent mapping table:
|
|
88
|
+
- Refactoring → `executor` → Codex, MCP: `implement`
|
|
89
|
+
- Security review → `security-reviewer` → Codex (review mode), MCP: `review`
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## 4. Subtask Count >= 2 → Delegation to tfx-multi
|
|
94
|
+
|
|
95
|
+
**Subtask count = 2, which satisfies `>= 2`.**
|
|
96
|
+
|
|
97
|
+
Per SKILL.md §멀티 태스크 라우팅:
|
|
98
|
+
|
|
99
|
+
> 트리아지 결과 서브태스크가 2개 이상이면 tfx-multi Native Teams 모드로 자동 전환한다.
|
|
100
|
+
|
|
101
|
+
The skill **automatically delegates to tfx-multi Phase 3**, skipping tfx-multi's own Phase 2 (triage) since triage has already been completed by tfx-auto.
|
|
102
|
+
|
|
103
|
+
The handoff logic:
|
|
104
|
+
```
|
|
105
|
+
if subtasks.length >= 2:
|
|
106
|
+
→ tfx-multi Phase 3 실행 (트리아지 결과 재사용)
|
|
107
|
+
→ TeamCreate → TaskCreate × N → Agent 래퍼 spawn (Phase 3a~3c)
|
|
108
|
+
→ Phase 4 결과 수집 → Phase 5 정리
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## 5. Exact Sequence of Actions
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
[Step 1] Mode detection
|
|
117
|
+
Input: "/tfx-auto 프론트엔드 리팩터링하고 보안 리뷰도 해줘"
|
|
118
|
+
→ No command shortcut match
|
|
119
|
+
→ No N:agent_type prefix
|
|
120
|
+
→ Mode = AUTO, triage = ENABLED
|
|
121
|
+
|
|
122
|
+
[Step 2] Triage — Codex classification
|
|
123
|
+
codex exec --full-auto --skip-git-repo-check
|
|
124
|
+
Prompt: "프론트엔드 리팩터링하고 보안 리뷰도 해줘"
|
|
125
|
+
Output: { parts: [ {description: "프론트엔드 리팩터링", agent: "codex"},
|
|
126
|
+
{description: "보안 리뷰", agent: "codex"} ] }
|
|
127
|
+
|
|
128
|
+
[Step 3] Triage — Opus inline decomposition
|
|
129
|
+
Input: classified parts from Step 2
|
|
130
|
+
Output: {
|
|
131
|
+
graph_type: "INDEPENDENT",
|
|
132
|
+
subtasks: [
|
|
133
|
+
{ id: "st-1", description: "프론트엔드 리팩터링", agent: "executor",
|
|
134
|
+
mcp_profile: "implement", depends_on: [] },
|
|
135
|
+
{ id: "st-2", description: "보안 리뷰", agent: "security-reviewer",
|
|
136
|
+
mcp_profile: "review", depends_on: [] }
|
|
137
|
+
]
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
[Step 4] Subtask count check
|
|
141
|
+
subtasks.length = 2 → >= 2 condition TRUE
|
|
142
|
+
→ Delegate to tfx-multi Phase 3 (skip tfx-multi Phase 2)
|
|
143
|
+
|
|
144
|
+
[Step 5] tfx-multi Phase 3a — TeamCreate
|
|
145
|
+
Create a Native Teams session with the decomposed subtask list
|
|
146
|
+
|
|
147
|
+
[Step 6] tfx-multi Phase 3b — TaskCreate × 2
|
|
148
|
+
Task 1: "프론트엔드 리팩터링" → executor / implement
|
|
149
|
+
Task 2: "보안 리뷰" → security-reviewer / review
|
|
150
|
+
|
|
151
|
+
[Step 7] tfx-multi Phase 3c — Agent wrapper spawn (parallel, INDEPENDENT graph)
|
|
152
|
+
Bash("bash ~/.claude/scripts/tfx-route.sh executor '프론트엔드 리팩터링' implement",
|
|
153
|
+
run_in_background=true)
|
|
154
|
+
Bash("bash ~/.claude/scripts/tfx-route.sh security-reviewer '보안 리뷰' review",
|
|
155
|
+
run_in_background=true)
|
|
156
|
+
Both tasks run concurrently because graph_type = INDEPENDENT (no depends_on).
|
|
157
|
+
|
|
158
|
+
[Step 8] tfx-multi Phase 4 — Result collection
|
|
159
|
+
Await both background tasks.
|
|
160
|
+
Parse exit codes and extract OUTPUT sections.
|
|
161
|
+
On timeout (exit 124): use PARTIAL OUTPUT.
|
|
162
|
+
On failure (exit ≠ 0): Claude fallback → Agent(subagent_type="oh-my-claudecode:executor", model="sonnet")
|
|
163
|
+
|
|
164
|
+
[Step 9] tfx-multi Phase 5 — Cleanup & report
|
|
165
|
+
Produce final report in tfx-auto format:
|
|
166
|
+
## tfx-auto 완료
|
|
167
|
+
**모드**: auto | **그래프**: INDEPENDENT | **레벨**: 0
|
|
168
|
+
| # | 서브태스크 | Agent | CLI | MCP | 레벨 | 상태 | 시간 |
|
|
169
|
+
|---|---|---|---|---|---|---|---|
|
|
170
|
+
| 1 | 프론트엔드 리팩터링 | executor | codex | implement | 0 | ✓ | Xs |
|
|
171
|
+
| 2 | 보안 리뷰 | security-reviewer | codex | review | 0 | ✓ | Ys |
|
|
172
|
+
### 워커 1: 프론트엔드 리팩터링
|
|
173
|
+
(리팩터링 결과 요약)
|
|
174
|
+
### 워커 2: 보안 리뷰
|
|
175
|
+
(보안 리뷰 결과 요약)
|
|
176
|
+
### Token Savings Report
|
|
177
|
+
(node ~/.claude/scripts/token-snapshot.mjs report {session-id})
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
## Summary
|
|
183
|
+
|
|
184
|
+
| Item | Value |
|
|
185
|
+
|---|---|
|
|
186
|
+
| Mode | AUTO |
|
|
187
|
+
| Triage triggered | Yes (Codex classification → Opus decomposition) |
|
|
188
|
+
| Graph type | INDEPENDENT |
|
|
189
|
+
| Subtask count | 2 |
|
|
190
|
+
| Delegation to tfx-multi | Yes (Phase 3 entry, skipping Phase 2) |
|
|
191
|
+
| Execution style | Parallel (both tasks run concurrently via run_in_background=true) |
|
|
192
|
+
| st-1 agent/MCP | executor / implement |
|
|
193
|
+
| st-2 agent/MCP | security-reviewer / review |
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
{
|
|
2
|
+
"metadata": {
|
|
3
|
+
"skill_name": "tfx-multi-refactored",
|
|
4
|
+
"skill_path": "C:/Users/SSAFY/Desktop/Projects/cli/triflux/skills/tfx-multi",
|
|
5
|
+
"executor_model": "claude-sonnet-4-6",
|
|
6
|
+
"analyzer_model": "claude-opus-4-6",
|
|
7
|
+
"timestamp": "2026-03-19T11:00:00Z",
|
|
8
|
+
"evals_run": [3],
|
|
9
|
+
"runs_per_configuration": 1
|
|
10
|
+
},
|
|
11
|
+
"runs": [
|
|
12
|
+
{
|
|
13
|
+
"eval_id": 3,
|
|
14
|
+
"eval_name": "multi-team-creation-refactored",
|
|
15
|
+
"configuration": "with_skill",
|
|
16
|
+
"run_number": 1,
|
|
17
|
+
"result": {
|
|
18
|
+
"pass_rate": 1.0,
|
|
19
|
+
"passed": 6,
|
|
20
|
+
"failed": 0,
|
|
21
|
+
"total": 6,
|
|
22
|
+
"time_seconds": 120.6,
|
|
23
|
+
"tokens": 23431,
|
|
24
|
+
"tool_calls": 6,
|
|
25
|
+
"errors": 0
|
|
26
|
+
},
|
|
27
|
+
"expectations": [
|
|
28
|
+
{
|
|
29
|
+
"text": "Creates TeamCreate with tfx- prefix",
|
|
30
|
+
"passed": true,
|
|
31
|
+
"evidence": "TeamCreate({ team_name: 'tfx-<hex6>' })"
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"text": "Creates 3 TaskCreate calls",
|
|
35
|
+
"passed": true,
|
|
36
|
+
"evidence": "3x TaskCreate"
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
"text": "Spawns 3 Agent wrappers with bypassPermissions",
|
|
40
|
+
"passed": true,
|
|
41
|
+
"evidence": "3x Agent({ mode: bypassPermissions })"
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
"text": "Uses tfx-route.sh inside wrappers",
|
|
45
|
+
"passed": true,
|
|
46
|
+
"evidence": "Direct CLI calls forbidden"
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"text": "Includes Phase 5 TeamDelete",
|
|
50
|
+
"passed": true,
|
|
51
|
+
"evidence": "Always executed"
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"text": "References agent-wrapper-rules.md",
|
|
55
|
+
"passed": true,
|
|
56
|
+
"evidence": "Provided interrupt protocol + timeout values"
|
|
57
|
+
}
|
|
58
|
+
]
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"eval_id": 3,
|
|
62
|
+
"eval_name": "multi-team-creation-refactored",
|
|
63
|
+
"configuration": "without_skill",
|
|
64
|
+
"run_number": 1,
|
|
65
|
+
"result": {
|
|
66
|
+
"pass_rate": 0.83,
|
|
67
|
+
"passed": 5,
|
|
68
|
+
"failed": 1,
|
|
69
|
+
"total": 6,
|
|
70
|
+
"time_seconds": 133.1,
|
|
71
|
+
"tokens": 27382,
|
|
72
|
+
"tool_calls": 4,
|
|
73
|
+
"errors": 0
|
|
74
|
+
},
|
|
75
|
+
"expectations": [
|
|
76
|
+
{
|
|
77
|
+
"text": "Creates TeamCreate with tfx- prefix",
|
|
78
|
+
"passed": true,
|
|
79
|
+
"evidence": "TeamCreate with tfx-<6chars>"
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"text": "Creates 3 TaskCreate calls",
|
|
83
|
+
"passed": true,
|
|
84
|
+
"evidence": "3x TaskCreate"
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"text": "Spawns 3 Agent wrappers with bypassPermissions",
|
|
88
|
+
"passed": true,
|
|
89
|
+
"evidence": "bypassPermissions in all"
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
"text": "Uses tfx-route.sh inside wrappers",
|
|
93
|
+
"passed": true,
|
|
94
|
+
"evidence": "Direct CLI prohibited"
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"text": "Includes Phase 5 TeamDelete",
|
|
98
|
+
"passed": true,
|
|
99
|
+
"evidence": "Mandatory"
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"text": "References agent-wrapper-rules.md",
|
|
103
|
+
"passed": false,
|
|
104
|
+
"evidence": "OLD has no reference file structure"
|
|
105
|
+
}
|
|
106
|
+
]
|
|
107
|
+
}
|
|
108
|
+
],
|
|
109
|
+
"run_summary": {
|
|
110
|
+
"with_skill": {
|
|
111
|
+
"pass_rate": { "mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0 },
|
|
112
|
+
"time_seconds": {
|
|
113
|
+
"mean": 120.6,
|
|
114
|
+
"stddev": 0.0,
|
|
115
|
+
"min": 120.6,
|
|
116
|
+
"max": 120.6
|
|
117
|
+
},
|
|
118
|
+
"tokens": { "mean": 23431, "stddev": 0, "min": 23431, "max": 23431 }
|
|
119
|
+
},
|
|
120
|
+
"without_skill": {
|
|
121
|
+
"pass_rate": { "mean": 0.83, "stddev": 0.0, "min": 0.83, "max": 0.83 },
|
|
122
|
+
"time_seconds": {
|
|
123
|
+
"mean": 133.1,
|
|
124
|
+
"stddev": 0.0,
|
|
125
|
+
"min": 133.1,
|
|
126
|
+
"max": 133.1
|
|
127
|
+
},
|
|
128
|
+
"tokens": { "mean": 27382, "stddev": 0, "min": 27382, "max": 27382 }
|
|
129
|
+
},
|
|
130
|
+
"delta": {
|
|
131
|
+
"pass_rate": "+0.17",
|
|
132
|
+
"time_seconds": "-12.5",
|
|
133
|
+
"tokens": "-3951"
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
"notes": [
|
|
137
|
+
"Refactored version (177 lines + 2 reference files) vs original (426 lines monolith)",
|
|
138
|
+
"Token savings: 3,951 fewer tokens (-14.4%) — the model loads less context upfront",
|
|
139
|
+
"Time savings: 12.5s faster (-9.4%) — despite reading 3 files vs 1",
|
|
140
|
+
"Progressive disclosure works: the model correctly loaded reference files only when relevant",
|
|
141
|
+
"Reference files provided additional value: interrupt protocol details, timeout values, technical reasoning for tfx-route.sh requirement",
|
|
142
|
+
"Core routing accuracy is identical — all 5 functional assertions pass in both versions"
|
|
143
|
+
]
|
|
144
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"eval_id": 3,
|
|
3
|
+
"eval_name": "multi-team-creation-refactored",
|
|
4
|
+
"prompt": "/tfx-multi 인증 리팩터링 + UI 개선 + 보안 리뷰",
|
|
5
|
+
"assertions": [
|
|
6
|
+
"Creates exactly one TeamCreate with tfx- prefix naming",
|
|
7
|
+
"Creates 3 TaskCreate calls (one per subtask)",
|
|
8
|
+
"Spawns 3 Agent wrappers with mode: bypassPermissions",
|
|
9
|
+
"Uses tfx-route.sh inside Agent wrapper (not direct codex/gemini)",
|
|
10
|
+
"Includes Phase 5 cleanup (TeamDelete)",
|
|
11
|
+
"References agent-wrapper-rules.md for detailed rules (new skill only)"
|
|
12
|
+
]
|
|
13
|
+
}
|
package/skills/tfx-workspace/iteration-2/multi-team-creation-refactored/old_skill/grading.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"expectations": [
|
|
3
|
+
{
|
|
4
|
+
"text": "Creates exactly one TeamCreate with tfx- prefix naming",
|
|
5
|
+
"passed": true,
|
|
6
|
+
"evidence": "TeamCreate({ team_name: 'tfx-<6chars>' })"
|
|
7
|
+
},
|
|
8
|
+
{
|
|
9
|
+
"text": "Creates 3 TaskCreate calls (one per subtask)",
|
|
10
|
+
"passed": true,
|
|
11
|
+
"evidence": "3x TaskCreate with subject, description, metadata"
|
|
12
|
+
},
|
|
13
|
+
{
|
|
14
|
+
"text": "Spawns 3 Agent wrappers with mode: bypassPermissions",
|
|
15
|
+
"passed": true,
|
|
16
|
+
"evidence": "mode: bypassPermissions in all Agent calls"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"text": "Uses tfx-route.sh inside Agent wrapper (not direct codex/gemini)",
|
|
20
|
+
"passed": true,
|
|
21
|
+
"evidence": "Direct codex exec / gemini -y -p explicitly prohibited"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"text": "Includes Phase 5 cleanup (TeamDelete)",
|
|
25
|
+
"passed": true,
|
|
26
|
+
"evidence": "TeamDelete mandatory, 30s wait, force cleanup fallback"
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
"text": "References agent-wrapper-rules.md for detailed rules",
|
|
30
|
+
"passed": false,
|
|
31
|
+
"evidence": "OLD version has all rules inline in SKILL.md (426 lines), no reference file structure"
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
"summary": { "passed": 5, "failed": 1, "total": 6, "pass_rate": 0.83 }
|
|
35
|
+
}
|