npm - triflux - Versions diffs - 10.3.3 → 10.4.0 - Mend

triflux 10.3.3 → 10.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (130) hide show

package/skills/tfx-workspace/iteration-1/benchmark.json ADDED Viewed

@@ -0,0 +1,162 @@
+{
+  "metadata": {
+    "skill_name": "tfx-skills-suite",
+    "skill_path": "C:/Users/SSAFY/Desktop/Projects/cli/triflux/skills",
+    "executor_model": "claude-sonnet-4-6",
+    "analyzer_model": "claude-opus-4-6",
+    "timestamp": "2026-03-19T10:00:00Z",
+    "evals_run": [1, 2, 3, 4, 5, 6],
+    "runs_per_configuration": 1
+  },
+  "runs": [
+    {
+      "eval_id": 1, "eval_name": "routing-implement-shortcut", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 43.6, "tokens": 16303, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Routes to executor agent", "passed": true, "evidence": "Correctly mapped from implement shortcut table"},
+        {"text": "Uses implement MCP profile", "passed": true, "evidence": "Mapped from shortcut table"},
+        {"text": "Generates correct tfx-route.sh command", "passed": true, "evidence": "bash ~/.claude/scripts/tfx-route.sh executor '...' implement"},
+        {"text": "Does NOT trigger triage", "passed": true, "evidence": "Command shortcut skips triage"},
+        {"text": "Does NOT delegate to tfx-multi", "passed": true, "evidence": "No subtask decomposition occurred"}
+      ]
+    },
+    {
+      "eval_id": 1, "eval_name": "routing-implement-shortcut", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 48.1, "tokens": 16436, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Routes to executor agent", "passed": true, "evidence": "Correctly mapped"},
+        {"text": "Uses implement MCP profile", "passed": true, "evidence": "Assigned by shortcut table"},
+        {"text": "Generates correct tfx-route.sh command", "passed": true, "evidence": "Correct syntax generated"},
+        {"text": "Does NOT trigger triage", "passed": true, "evidence": "Shortcut mode skips triage"},
+        {"text": "Does NOT delegate to tfx-multi", "passed": true, "evidence": "No delegation"}
+      ]
+    },
+    {
+      "eval_id": 2, "eval_name": "routing-multi-task-triage", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 58.2, "tokens": 17584, "tool_calls": 3, "errors": 0},
+      "expectations": [
+        {"text": "Identifies as auto mode", "passed": true, "evidence": "No shortcut match, auto mode selected"},
+        {"text": "Triggers Codex classification", "passed": true, "evidence": "Codex --full-auto classification triggered"},
+        {"text": "Decomposes into 2+ subtasks", "passed": true, "evidence": "2 subtasks: executor + security-reviewer"},
+        {"text": "Notes tfx-multi delegation", "passed": true, "evidence": "subtasks.length >= 2 triggers tfx-multi Phase 3"},
+        {"text": "Does NOT execute directly", "passed": true, "evidence": "Delegates to tfx-multi"}
+      ]
+    },
+    {
+      "eval_id": 2, "eval_name": "routing-multi-task-triage", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 77.2, "tokens": 18626, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Identifies as auto mode", "passed": true, "evidence": "Auto mode selected"},
+        {"text": "Triggers Codex classification", "passed": true, "evidence": "Codex --full-auto triggered"},
+        {"text": "Decomposes into 2+ subtasks", "passed": true, "evidence": "2 subtasks decomposed"},
+        {"text": "Notes tfx-multi delegation", "passed": true, "evidence": "Hands off to tfx-multi Phase 3"},
+        {"text": "Does NOT execute directly", "passed": true, "evidence": "Delegates correctly"}
+      ]
+    },
+    {
+      "eval_id": 3, "eval_name": "multi-team-creation", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 115.3, "tokens": 27197, "tool_calls": 3, "errors": 0},
+      "expectations": [
+        {"text": "Creates TeamCreate with tfx- prefix", "passed": true, "evidence": "TeamCreate({ team_name: 'tfx-<base36>' })"},
+        {"text": "Creates 3 TaskCreate calls", "passed": true, "evidence": "3x TaskCreate with metadata"},
+        {"text": "Spawns 3 Agent wrappers with bypassPermissions", "passed": true, "evidence": "3x Agent({ mode: bypassPermissions })"},
+        {"text": "Uses tfx-route.sh inside wrappers", "passed": true, "evidence": "Direct codex/gemini calls prohibited"},
+        {"text": "Includes Phase 5 TeamDelete", "passed": true, "evidence": "TeamDelete always runs, max 30s wait"}
+      ]
+    },
+    {
+      "eval_id": 3, "eval_name": "multi-team-creation", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 5, "failed": 0, "total": 5, "time_seconds": 100.6, "tokens": 26140, "tool_calls": 3, "errors": 0},
+      "expectations": [
+        {"text": "Creates TeamCreate with tfx- prefix", "passed": true, "evidence": "TeamCreate with tfx-<id>"},
+        {"text": "Creates 3 TaskCreate calls", "passed": true, "evidence": "Three TaskCreate calls"},
+        {"text": "Spawns 3 Agent wrappers with bypassPermissions", "passed": true, "evidence": "mode: bypassPermissions in all 3"},
+        {"text": "Uses tfx-route.sh inside wrappers", "passed": true, "evidence": "Never direct codex/gemini calls"},
+        {"text": "Includes Phase 5 TeamDelete", "passed": true, "evidence": "TeamDelete unconditionally"}
+      ]
+    },
+    {
+      "eval_id": 4, "eval_name": "doctor-diagnosis", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 53.8, "tokens": 14499, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Runs triflux doctor first", "passed": true, "evidence": "Bash(\"triflux doctor\")"},
+        {"text": "Suggests --fix mode", "passed": true, "evidence": "Suggests after diagnosis report"},
+        {"text": "Mentions HUD and CLI checks", "passed": true, "evidence": "HUD and CLI paths checked"},
+        {"text": "Does NOT jump to --reset", "passed": true, "evidence": "--reset reserved for explicit request"}
+      ]
+    },
+    {
+      "eval_id": 4, "eval_name": "doctor-diagnosis", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 48.3, "tokens": 14482, "tool_calls": 3, "errors": 0},
+      "expectations": [
+        {"text": "Runs triflux doctor first", "passed": true, "evidence": "Bash(\"triflux doctor\")"},
+        {"text": "Suggests --fix mode", "passed": true, "evidence": "Offers --fix after diagnosis"},
+        {"text": "Mentions HUD and CLI checks", "passed": true, "evidence": "All 8 diagnostics listed"},
+        {"text": "Does NOT jump to --reset", "passed": true, "evidence": "--reset reserved for explicit request"}
+      ]
+    },
+    {
+      "eval_id": 5, "eval_name": "hub-start-sequence", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 3, "failed": 0, "total": 3, "time_seconds": 47.2, "tokens": 14821, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Runs node hub/server.mjs in background", "passed": true, "evidence": "Bash(\"node hub/server.mjs\", run_in_background=true)"},
+        {"text": "Mentions port 27888 and /mcp", "passed": true, "evidence": "Port 27888, http://127.0.0.1:27888/mcp"},
+        {"text": "No triage or routing attempted", "passed": true, "evidence": "Command match, not fallthrough"}
+      ]
+    },
+    {
+      "eval_id": 5, "eval_name": "hub-start-sequence", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 3, "failed": 0, "total": 3, "time_seconds": 51.8, "tokens": 14904, "tool_calls": 4, "errors": 0},
+      "expectations": [
+        {"text": "Runs node hub/server.mjs in background", "passed": true, "evidence": "Bash(\"node hub/server.mjs\", run_in_background=true)"},
+        {"text": "Mentions port 27888 and /mcp", "passed": true, "evidence": "Port 27888, endpoint /mcp"},
+        {"text": "No triage or routing attempted", "passed": true, "evidence": "Command match, not fallthrough"}
+      ]
+    },
+    {
+      "eval_id": 6, "eval_name": "codex-gemini-remap", "configuration": "with_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 69.7, "tokens": 14889, "tool_calls": 5, "errors": 0},
+      "expectations": [
+        {"text": "designer remapped to Codex (effort: high)", "passed": true, "evidence": "designer → Codex (effort: high)"},
+        {"text": "writer remapped to Codex Spark (spark_fast)", "passed": true, "evidence": "writer → Codex Spark (effort: spark_fast)"},
+        {"text": "TFX_CLI_MODE=codex set", "passed": true, "evidence": "Set for every Phase 3 call"},
+        {"text": "MCP profiles changed", "passed": true, "evidence": "designer→implement, writer→analyze"}
+      ]
+    },
+    {
+      "eval_id": 6, "eval_name": "codex-gemini-remap", "configuration": "without_skill", "run_number": 1,
+      "result": {"pass_rate": 1.0, "passed": 4, "failed": 0, "total": 4, "time_seconds": 85.2, "tokens": 19802, "tool_calls": 7, "errors": 0},
+      "expectations": [
+        {"text": "designer remapped to Codex (effort: high)", "passed": true, "evidence": "designer → Codex (effort: high)"},
+        {"text": "writer remapped to Codex Spark (spark_fast)", "passed": true, "evidence": "writer → Codex Spark (effort: spark_fast)"},
+        {"text": "TFX_CLI_MODE=codex set", "passed": true, "evidence": "TFX_CLI_MODE set to codex"},
+        {"text": "MCP profiles changed", "passed": true, "evidence": "writer→analyze, designer→implement"}
+      ]
+    }
+  ],
+  "run_summary": {
+    "with_skill": {
+      "pass_rate": {"mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0},
+      "time_seconds": {"mean": 64.6, "stddev": 26.4, "min": 43.6, "max": 115.3},
+      "tokens": {"mean": 17549, "stddev": 4857, "min": 14499, "max": 27197}
+    },
+    "without_skill": {
+      "pass_rate": {"mean": 1.0, "stddev": 0.0, "min": 1.0, "max": 1.0},
+      "time_seconds": {"mean": 68.5, "stddev": 20.4, "min": 48.1, "max": 100.6},
+      "tokens": {"mean": 18398, "stddev": 4227, "min": 14482, "max": 26140}
+    },
+    "delta": {
+      "pass_rate": "+0.00",
+      "time_seconds": "-3.9",
+      "tokens": "-849"
+    }
+  },
+  "notes": [
+    "All 26 assertions pass at 100% for both configurations — the skills are functionally correct",
+    "The fixes applied (dead reference removal, Phase numbering consistency, hub description) don't change routing logic, so pass rates are identical",
+    "NEW version is marginally faster (-3.9s avg) and uses fewer tokens (-849 avg), likely due to cleaner references reducing model confusion",
+    "tfx-multi is the most complex skill (115s / 27K tokens with_skill) — consider extracting reference docs to reduce context load",
+    "tfx-codex OLD references 'Phase(1~6)' which doesn't exist in tfx-auto — the NEW version correctly references the actual workflow names",
+    "All assertions pass regardless of configuration — these test the core routing logic which is unchanged. Consider adding assertions that specifically test the fixed issues (dead refs, phase naming) for differentiation"
+  ]
+}

package/skills/tfx-workspace/iteration-1/codex-gemini-remap/eval_metadata.json ADDED Viewed

@@ -0,0 +1,11 @@
+{
+  "eval_id": 6,
+  "eval_name": "codex-gemini-remap",
+  "prompt": "/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘",
+  "assertions": [
+    "designer remapped to Codex with effort: high",
+    "writer remapped to Codex Spark with effort: spark_fast",
+    "Sets TFX_CLI_MODE=codex environment variable",
+    "Changes MCP profile: designer->implement, writer->analyze"
+  ]
+}

package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/grading.json ADDED Viewed

@@ -0,0 +1,9 @@
+{
+  "expectations": [
+    {"text": "designer remapped to Codex with effort: high", "passed": true, "evidence": "Agent output: designer → Codex (effort: high)"},
+    {"text": "writer remapped to Codex Spark with effort: spark_fast", "passed": true, "evidence": "Agent output: writer → Codex Spark (effort: spark_fast)"},
+    {"text": "Sets TFX_CLI_MODE=codex environment variable", "passed": true, "evidence": "Agent output: 'TFX_CLI_MODE: Set to codex'"},
+    {"text": "Changes MCP profile: designer->implement, writer->analyze", "passed": true, "evidence": "Agent output: writer→analyze, designer→implement"}
+  ],
+  "summary": {"passed": 4, "failed": 0, "total": 4, "pass_rate": 1.0}
+}

package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/outputs/analysis.md ADDED Viewed

@@ -0,0 +1,154 @@
+# tfx-codex 라우팅 분석 — DRY RUN
+**요청**: `/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘`
+**분석 기준 SKILL**: `skills/tfx-workspace/skill-snapshot/tfx-codex/SKILL.md`
+---
+## 1. 에이전트 리매핑 테이블
+이 요청은 두 개의 독립 서브태스크로 분해됩니다:
+- 서브태스크 A: "API 문서를 작성" → **writer** 역할
+- 서브태스크 B: "디자인 가이드도 만들어줘" → **designer** 역할
+| 에이전트 | 원래 CLI (tfx-auto) | tfx-codex에서 | effort 파라미터 | MCP 프로필 |
+|----------|---------------------|---------------|-----------------|-----------|
+| **writer** | ~~Gemini~~ (`docs` MCP) | **Codex** (effort: spark_fast) — Codex Spark 경량 문서 | `spark_fast` | `analyze` |
+| **designer** | ~~Gemini~~ (`docs` MCP) | **Codex** (effort: high) — UI 코드 생성 | `high` | `implement` |
+### 원본 tfx-auto 기준 (리매핑 전)
+`tfx-auto` SKILL.md의 에이전트 매핑 테이블에서:
+```
+| gemini / designer / writer | Gemini | docs |
+```
+즉, 원래 두 역할 모두 Gemini CLI + `docs` MCP로 실행됩니다.
+### tfx-codex 기준 (리매핑 후)
+`tfx-codex` SKILL.md의 에이전트 라우팅 테이블에서:
+```
+| designer | ~~Gemini~~ | Codex (effort: high) — UI 코드 생성     | implement |
+| writer   | ~~Gemini~~ | Codex Spark (effort: spark_fast) — 경량 문서 | analyze   |
+```
+---
+## 2. TFX_CLI_MODE 환경변수
+```
+TFX_CLI_MODE=codex
+```
+이 환경변수는 tfx-route.sh에 전달되어 Gemini 에이전트가 선택될 경우 Codex로 강제 교체하도록 지시합니다. Phase 2 트리아지에서 Codex 분류기가 `gemini`를 반환하더라도 이 값에 의해 `codex`로 교체됩니다.
+---
+## 3. Phase 2 트리아지 동작
+**자동 모드** (`/tfx-codex "API 문서를 작성하고 디자인 가이드도 만들어줘"`):
+1. **Codex 분류** (`--full-auto --skip-git-repo-check`):
+   - 입력 파싱 결과 예상 JSON:
+     ```json
+     {
+       "parts": [
+         { "description": "API 문서 작성", "agent": "gemini" },
+         { "description": "디자인 가이드 생성", "agent": "gemini" }
+       ]
+     }
+     ```
+   - `TFX_CLI_MODE=codex` 적용 → 두 항목 모두 `"gemini"` → **`"codex"`로 강제 교체**
+2. **Opus 인라인 분해** (강제 변환 이후):
+   - `writer` 역할: MCP 프로필 `analyze` 할당
+   - `designer` 역할: MCP 프로필 `implement` 할당
+   - 두 서브태스크는 독립적(INDEPENDENT), `graph_type: "INDEPENDENT"`
+3. **서브태스크 수 = 2** → tfx-multi Native Teams 모드로 자동 전환 (tfx-auto 규칙: 2개 이상 시 tfx-multi Phase 3)
+---
+## 4. 생성되는 Bash 커맨드 (서브태스크별)
+서브태스크가 2개이므로 tfx-multi Phase 3a(TeamCreate) → Phase 3b(TaskCreate) → Phase 3c(Agent 래퍼 spawn) 순서로 실행됩니다. 각 Agent 래퍼 내부에서 다음 Bash 커맨드가 실행됩니다:
+### 서브태스크 A — writer (API 문서 작성)
+```bash
+TFX_CLI_MODE=codex bash ~/.claude/scripts/tfx-route.sh writer 'API 문서를 작성해줘' analyze
+```
+- `writer` 에이전트: Codex Spark (`effort: spark_fast`) 로 실행
+- MCP 프로필: `analyze` (문서 기반 리서치+작성)
+- `run_in_background=true` (INDEPENDENT 병렬 실행)
+### 서브태스크 B — designer (디자인 가이드 생성)
+```bash
+TFX_CLI_MODE=codex bash ~/.claude/scripts/tfx-route.sh designer '디자인 가이드를 만들어줘' implement
+```
+- `designer` 에이전트: Codex (`effort: high`) 로 실행
+- MCP 프로필: `implement` (코드 기반 UI 작업)
+- `run_in_background=true` (INDEPENDENT 병렬 실행)
+> 두 서브태스크는 `depends_on` 없이 Level 0에서 병렬 실행됩니다.
+---
+## 5. MCP 프로필 변화 상세
+| 에이전트 | tfx-auto 원본 MCP | tfx-codex 변경 후 MCP | 변경 이유 |
+|----------|-------------------|----------------------|----------|
+| **writer** | `docs` | `analyze` | Gemini → Codex 전환 시 문서 리서치+작성에 적합한 `analyze` 프로필 사용 |
+| **designer** | `docs` | `implement` | Gemini → Codex 전환 시 UI 코드 생성에 적합한 `implement` 프로필 사용 |
+원래 `docs` MCP는 Gemini CLI의 웹 검색/문서 접근 기능을 전제로 설계되었습니다. Codex로 리매핑 시 각 역할의 실제 작업 성격에 맞는 프로필로 교체됩니다.
+---
+## 6. 워크플로우 레퍼런스
+**tfx-codex는 tfx-auto SKILL.md의 Phase 1~6 전체를 그대로 따릅니다.**
+```
+Phase 1: 입력 파싱 — 트리거 `/tfx-codex` 인식, 인자 추출
+Phase 2: 트리아지
+  - Codex 분류 실행 (TFX_CLI_MODE=codex)
+  - gemini 반환값 → codex 강제 교체
+  - Opus 인라인 분해 (writer→analyze MCP, designer→implement MCP)
+Phase 3: CLI 실행
+  - TFX_CLI_MODE=codex 환경변수 포함
+  - tfx-route.sh 호출
+  - 서브태스크 2개 → tfx-multi Phase 3 전환
+Phase 4: 결과 수집
+  - exit_code 0: === OUTPUT === 섹션 파싱
+  - exit_code 124: === PARTIAL OUTPUT === 사용
+  - 그 외: STDERR → Claude fallback
+Phase 5: 실패 처리
+  - 1차: Claude executor(sonnet) fallback
+  - 2차: 실패 보고 + 성공 결과만 종합
+Phase 6: 보고 형식 출력
+  - 모드/그래프/레벨/서브태스크 상태 테이블
+  - Token Savings Report
+```
+**핵심 차이점 요약**: Phase 2와 Phase 3에서만 동작이 달라집니다.
+- Phase 2: gemini 분류 결과를 codex로 강제 변환 + MCP 프로필 재할당
+- Phase 3: 모든 tfx-route.sh 호출에 `TFX_CLI_MODE=codex` 접두 추가
+---
+## 7. 요약
+이 요청(`/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘`)은 다음과 같이 처리됩니다:
+1. 두 서브태스크로 분해 (INDEPENDENT 그래프)
+2. 원래 Gemini로 라우팅될 `writer`와 `designer` 모두 Codex로 리매핑
+3. MCP 프로필: writer → `analyze`, designer → `implement` (원본 `docs`에서 변경)
+4. 서브태스크 2개이므로 tfx-multi Native Teams 모드로 자동 전환하여 병렬 실행
+5. 전체 Phase 1~6은 tfx-auto 워크플로우를 그대로 따름

package/skills/tfx-workspace/iteration-1/codex-gemini-remap/old_skill/timing.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "total_tokens": 19802,
+  "duration_ms": 85239,
+  "total_duration_seconds": 85.2
+}

package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/grading.json ADDED Viewed

@@ -0,0 +1,9 @@
+{
+  "expectations": [
+    {"text": "designer remapped to Codex with effort: high", "passed": true, "evidence": "Agent output: designer → Codex (effort: high)"},
+    {"text": "writer remapped to Codex Spark with effort: spark_fast", "passed": true, "evidence": "Agent output: writer → Codex Spark (effort: spark_fast)"},
+    {"text": "Sets TFX_CLI_MODE=codex environment variable", "passed": true, "evidence": "Agent output: 'TFX_CLI_MODE: Set to codex for every Phase 3 call'"},
+    {"text": "Changes MCP profile: designer->implement, writer->analyze", "passed": true, "evidence": "Agent output: writer→analyze, designer→implement (changed from docs)"}
+  ],
+  "summary": {"passed": 4, "failed": 0, "total": 4, "pass_rate": 1.0}
+}

package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/outputs/analysis.md ADDED Viewed

@@ -0,0 +1,126 @@
+# tfx-codex Routing Analysis — DRY RUN
+**User Request:** `/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘`
+**Skill Definition Source:** `skills/tfx-codex/SKILL.md`
+---
+## 1. Trigger Matching
+The command prefix `/tfx-codex` matches the skill trigger `tfx-codex` exactly.
+The skill is invoked with the argument: `"API 문서를 작성하고 디자인 가이드도 만들어줘"`
+---
+## 2. TFX_CLI_MODE Environment Variable
+```
+TFX_CLI_MODE=codex
+```
+This variable is set for every CLI execution in Phase 3. It forces `tfx-route.sh` to substitute
+any `gemini` classification with `codex`, ensuring Gemini CLI is never called.
+---
+## 3. Task Decomposition (Phase 2 Triage)
+The user request contains two distinct subtasks:
+| # | Subtask | Natural Agent Assignment | tfx-codex Override |
+|---|---------|-------------------------|--------------------|
+| 1 | API 문서를 작성 (Write API documentation) | **writer** (originally Gemini) | **Codex Spark** |
+| 2 | 디자인 가이드도 만들어줘 (Create design guide) | **designer** (originally Gemini) | **Codex** (effort: high) |
+During Phase 2, the Opus decomposition step detects that both subtasks would ordinarily route to
+Gemini-backed roles. The `TFX_CLI_MODE=codex` override forces:
+- Any `gemini` classification result → replaced with `codex`
+- `designer` and `writer` agent types → mapped to Codex with adjusted MCP profiles
+---
+## 4. Agent Remapping Table
+| 에이전트 | 원래 CLI | tfx-codex 매핑 | effort 플래그 |
+|----------|---------|---------------|--------------|
+| **designer** | ~~Gemini~~ | **Codex** | `effort: high` — UI/시각 코드 생성 |
+| **writer** | ~~Gemini~~ | **Codex Spark** | `effort: spark_fast` — 경량 문서 작성 |
+| executor, build-fixer, debugger | Codex | Codex | 변경 없음 |
+| architect, planner, critic, analyst | Codex | Codex | 변경 없음 |
+| code-reviewer, security-reviewer | Codex | Codex | 변경 없음 |
+| scientist, document-specialist | Codex | Codex | 변경 없음 |
+| explore | Claude Haiku | Claude Haiku | 변경 없음 |
+| verifier, test-engineer | Claude Sonnet | Claude Sonnet | 변경 없음 |
+---
+## 5. MCP Profile Changes for designer and writer
+| 에이전트 | 기본 MCP 프로필 | tfx-codex MCP 프로필 | 이유 |
+|----------|--------------|---------------------|------|
+| **designer** | (Gemini 전용 — 없음) | `implement` | 코드 기반 UI 작업으로 처리 |
+| **writer** | (Gemini 전용 — 없음) | `analyze` | 문서 기반 리서치 + 작성 워크플로우 |
+Both roles lose access to Gemini's multimodal/creative profile and are instead assigned
+Codex-compatible MCP profiles that match the nature of the work:
+- `implement` for designer: treats the design guide as a code artifact (e.g., CSS, component specs)
+- `analyze` for writer: treats API documentation as a research-and-summarize task
+---
+## 6. Exact Bash Commands Generated (Phase 3)
+### Subtask 1 — writer: API 문서 작성
+```bash
+TFX_CLI_MODE=codex bash ~/.claude/scripts/tfx-route.sh writer 'API 문서를 작성해줘' analyze
+```
+- Agent: `writer` → remapped to **Codex Spark** (`effort: spark_fast`)
+- MCP Profile: `analyze`
+- The `tfx-route.sh` script reads `TFX_CLI_MODE=codex` and substitutes the Gemini path with
+  a Codex Spark invocation.
+### Subtask 2 — designer: 디자인 가이드 작성
+```bash
+TFX_CLI_MODE=codex bash ~/.claude/scripts/tfx-route.sh designer '디자인 가이드를 만들어줘' implement
+```
+- Agent: `designer` → remapped to **Codex** (`effort: high`)
+- MCP Profile: `implement`
+- The `tfx-route.sh` script reads `TFX_CLI_MODE=codex` and substitutes the Gemini path with
+  a full-effort Codex invocation.
+---
+## 7. Workflow Reference — tfx-auto Phases Followed
+Per the skill definition: *"tfx-auto SKILL.md의 전체 워크플로우(커맨드 숏컷 → 트리아지 → 실행 → 결과 파싱 → 보고)를 그대로 따릅니다."*
+The exact same phase sequence as `tfx-auto` is executed:
+| Phase | Name | tfx-codex 특이사항 |
+|-------|------|--------------------|
+| Phase 1 | 커맨드 숏컷 파싱 | 동일 (`N:codex` 숏컷 지원) |
+| Phase 2 | 트리아지 (Opus 분해) | `gemini` 분류 결과를 `codex`로 강제 변환; designer/writer → Codex + MCP 재할당 |
+| Phase 3 | CLI 실행 | `TFX_CLI_MODE=codex` 환경변수 주입하여 `tfx-route.sh` 호출 |
+| Phase 4 | 결과 파싱 | 동일 |
+| Phase 5 | 보고 | 동일 |
+The only deviation from `tfx-auto` occurs in **Phase 2** (forced gemini→codex substitution)
+and **Phase 3** (environment variable injection). All other phases are identical.
+---
+## 8. Summary
+For the request `/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘`:
+- Two subtasks are identified: **writer** (API docs) and **designer** (design guide).
+- Both roles were originally mapped to **Gemini CLI** in the default `tfx-auto` routing.
+- `tfx-codex` remaps them: `writer` → Codex Spark (`analyze` MCP), `designer` → Codex high-effort (`implement` MCP).
+- `TFX_CLI_MODE=codex` is injected at Phase 3 for every `tfx-route.sh` call.
+- The full `tfx-auto` 5-phase workflow is followed with the two overrides noted above.
+- Gemini CLI is never invoked; no Gemini dependency exists.

package/skills/tfx-workspace/iteration-1/codex-gemini-remap/with_skill/timing.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "total_tokens": 14889,
+  "duration_ms": 69725,
+  "total_duration_seconds": 69.7
+}

package/skills/tfx-workspace/iteration-1/doctor-diagnosis/eval_metadata.json ADDED Viewed

@@ -0,0 +1,11 @@
+{
+  "eval_id": 4,
+  "eval_name": "doctor-diagnosis",
+  "prompt": "HUD가 안 보이고 codex도 안 되는데 어떻게 해?",
+  "assertions": [
+    "Runs 'triflux doctor' as first diagnostic step",
+    "Suggests '--fix' mode for auto-repair",
+    "Mentions HUD and CLI path checks in explanation",
+    "Does NOT jump straight to --reset (that's for cache only)"
+  ]
+}

package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/grading.json ADDED Viewed

@@ -0,0 +1,9 @@
+{
+  "expectations": [
+    {"text": "Runs 'triflux doctor' as first diagnostic step", "passed": true, "evidence": "Agent output: 'Exact command: Bash(\"triflux doctor\")'"},
+    {"text": "Suggests '--fix' mode for auto-repair", "passed": true, "evidence": "Agent output: 'after the diagnostic report, offer /tfx-doctor --fix as the next step'"},
+    {"text": "Mentions HUD and CLI path checks in explanation", "passed": true, "evidence": "Agent output: 'HUD installation and config, Codex/Gemini/Claude CLI paths — directly relevant'"},
+    {"text": "Does NOT jump straight to --reset (that's for cache only)", "passed": true, "evidence": "Agent output: '--reset is destructive and reserved for explicit cache-clear request'"}
+  ],
+  "summary": {"passed": 4, "failed": 0, "total": 4, "pass_rate": 1.0}
+}

package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/outputs/analysis.md ADDED Viewed

@@ -0,0 +1,119 @@
+# tfx-doctor Routing Analysis
+## User Request
+"HUD가 안 보이고 codex도 안 되는데 어떻게 해?"
+---
+## 1. Mode Selection
+**Mode used: default (진단만)**
+Rationale:
+- The user is reporting two symptoms: HUD not visible and Codex not working.
+- The request does not contain any explicit repair intent ("--fix"), cache reset request ("캐시 초기화", "리셋", "reset"), or destructive intent.
+- The skill's auto-suggest trigger list includes both "HUD가 안 보여" and "codex/gemini가 안 돼", which directly match the user's message.
+- Because no `--fix` or `--reset` flag is implied, the skill begins in read-only diagnostic mode.
+- After the diagnostic report is shown, `--fix` would be **suggested** to the user as a next step.
+---
+## 2. Exact Bash Command That Would Be Run
+```bash
+Bash("triflux doctor")
+```
+No additional flags. This is the read-only diagnostic path.
+---
+## 3. Diagnostics Checked
+The `triflux doctor` command (default mode) checks all of the following:
+| Diagnostic Item | Relevance to This Request |
+|----------------|--------------------------|
+| tfx-route.sh 설치 상태 | General health baseline |
+| HUD 설치 및 설정 상태 | **Directly relevant** — user reports HUD not visible |
+| Codex/Gemini/Claude CLI 경로 (크로스 셸) | **Directly relevant** — user reports Codex not working |
+| 스킬 설치 상태 | General health |
+| 플러그인 등록 상태 | General health |
+| MCP 인벤토리 캐시 | Could affect HUD and CLI display |
+| CLI 이슈 트래커 | May log the Codex failure |
+| 잔존 팀(orphan teams) 감지 (`~/.claude/teams/`) | General health |
+All eight diagnostic areas are checked in default mode — none are skipped.
+---
+## 4. Whether --fix Would Be Suggested
+**Yes.** After the diagnostic report is displayed, `--fix` would be recommended to the user as the next step if any issues are found (e.g., HUD misconfiguration, missing Codex CLI path, corrupted cache). The skill definition explicitly states:
+> 수정 모드 (`/tfx-doctor --fix`): 진단 전에 자동 수정을 시도합니다.
+The agent would prompt: "문제가 발견되었습니다. `/tfx-doctor --fix`를 실행하여 자동 수정을 시도하시겠습니까?"
+---
+## 5. Whether --reset Would Be Used (and Why/Why Not)
+**No. `--reset` would NOT be used.**
+Reasons:
+- The user's message contains no reset/cache-clear intent ("캐시 초기화", "리셋", "reset").
+- `--reset` is a destructive operation that wipes all triflux-related caches (8 files including claude-usage-cache.json, mcp-inventory.json, etc.).
+- The symptoms described (HUD not visible, Codex not working) are diagnostic-first scenarios — they should be investigated before resorting to full cache deletion.
+- Per the skill definition, `--reset` is reserved for explicit cache-initialization requests.
+- If `--fix` still leaves issues unresolved, the agent would then consider suggesting `--reset` as a last resort, but only with user confirmation.
+---
+## 6. Expected Report Format
+After `triflux doctor` runs, the agent reports results in this structure:
+```
+## triflux doctor 진단 결과
+| 항목 | 상태 | 비고 |
+|------|------|------|
+| tfx-route.sh | ✓ / ✗ | ... |
+| HUD | ✓ / ✗ | 설치됨/설치 안됨/설정 오류 |
+| Codex CLI 경로 | ✓ / ✗ | 경로 발견/미발견 |
+| Gemini CLI 경로 | ✓ / ✗ | ... |
+| Claude CLI 경로 | ✓ / ✗ | ... |
+| 스킬 설치 상태 | ✓ / ✗ | ... |
+| 플러그인 등록 | ✓ / ✗ | ... |
+| MCP 인벤토리 캐시 | ✓ / ✗ | ... |
+| CLI 이슈 트래커 | ✓ / ✗ | ... |
+| orphan teams | ✓ / ✗ | ... |
+이슈가 발견되었습니다. `/tfx-doctor --fix`로 자동 수정을 시도할 수 있습니다.
+```
+If no issues are found:
+```
+모든 진단 항목이 정상입니다.
+```
+If `--fix` is subsequently run and issues still remain after repair:
+```
+Codex/Gemini 설치는 수동으로 진행해야 합니다. (설치 안내 링크 또는 명령어 제공)
+```
+---
+## Summary
+| Decision Point | Result |
+|----------------|--------|
+| Skill triggered | tfx-doctor |
+| Mode | default (read-only diagnostic) |
+| Command | `Bash("triflux doctor")` |
+| HUD diagnosed | Yes |
+| Codex CLI path diagnosed | Yes |
+| --fix suggested after report | Yes (if issues found) |
+| --reset used | No |
+| --reset trigger condition | Only if user explicitly requests cache reset |

package/skills/tfx-workspace/iteration-1/doctor-diagnosis/old_skill/timing.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "total_tokens": 14482,
+  "duration_ms": 48260,
+  "total_duration_seconds": 48.3
+}

package/skills/tfx-workspace/iteration-1/doctor-diagnosis/with_skill/grading.json ADDED Viewed

@@ -0,0 +1,9 @@
+{
+  "expectations": [
+    {"text": "Runs 'triflux doctor' as first diagnostic step", "passed": true, "evidence": "Agent output: 'Exact command: Bash(\"triflux doctor\")'"},
+    {"text": "Suggests '--fix' mode for auto-repair", "passed": true, "evidence": "Agent output: '--fix would be suggested to the user after the diagnosis report is returned'"},
+    {"text": "Mentions HUD and CLI path checks in explanation", "passed": true, "evidence": "Agent output: 'HUD installation and config, Codex/Gemini/Claude CLI paths — directly relevant to symptoms'"},
+    {"text": "Does NOT jump straight to --reset (that's for cache only)", "passed": true, "evidence": "Agent output: '--reset is destructive and reserved for explicit reset requests; user never said 리셋/초기화/reset'"}
+  ],
+  "summary": {"passed": 4, "failed": 0, "total": 4, "pass_rate": 1.0}
+}