npm - triflux - Versions diffs - 10.9.19 → 10.9.21 - Mend

triflux 10.9.19 → 10.9.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

package/skills/tfx-workspace/async-tests/run-tests.sh ADDED Viewed

@@ -0,0 +1,203 @@
+#!/usr/bin/env bash
+# tfx-route.sh v2.5 async job system — 통합 테스트
+set -uo pipefail
+ROUTE="scripts/tfx-route.sh"
+PASS=0
+FAIL=0
+TOTAL=0
+assert_eq() {
+  local name="$1" expected="$2" actual="$3"
+  TOTAL=$((TOTAL + 1))
+  if [[ "$actual" == *"$expected"* ]]; then
+    echo "  ✓ $name"
+    PASS=$((PASS + 1))
+  else
+    echo "  ✗ $name — expected: '$expected', got: '$actual'"
+    FAIL=$((FAIL + 1))
+  fi
+}
+assert_neq() {
+  local name="$1" unexpected="$2" actual="$3"
+  TOTAL=$((TOTAL + 1))
+  if [[ "$actual" != *"$unexpected"* ]]; then
+    echo "  ✓ $name"
+    PASS=$((PASS + 1))
+  else
+    echo "  ✗ $name — should NOT contain: '$unexpected', got: '$actual'"
+    FAIL=$((FAIL + 1))
+  fi
+}
+assert_exit() {
+  local name="$1" expected="$2" actual="$3"
+  TOTAL=$((TOTAL + 1))
+  if [[ "$actual" -eq "$expected" ]]; then
+    echo "  ✓ $name"
+    PASS=$((PASS + 1))
+  else
+    echo "  ✗ $name — expected exit=$expected, got exit=$actual"
+    FAIL=$((FAIL + 1))
+  fi
+}
+echo "═══ tfx-route.sh v2.5 Async Job System Tests ═══"
+echo ""
+# ── Test 1: --async 기본 동작 ──
+echo "Test 1: --async 기본 시작 + job_id 반환"
+JOB_ID=$(bash "$ROUTE" --async executor "echo hello" none 30 2>/dev/null)
+EC=$?
+assert_exit "exit code 0" 0 "$EC"
+TOTAL=$((TOTAL + 1))
+if [[ -n "$JOB_ID" ]]; then echo "  ✓ job_id not empty ($JOB_ID)"; PASS=$((PASS + 1)); else echo "  ✗ job_id is empty"; FAIL=$((FAIL + 1)); fi
+assert_neq "job_id not error" "error" "$JOB_ID"
+echo ""
+# ── Test 2: --job-status running → done 전이 ──
+echo "Test 2: --job-status 상태 전이 (running → done)"
+LONG_JOB=$(bash "$ROUTE" --async executor "sleep 3 && echo done" none 60 2>/dev/null)
+STATUS_EARLY=$(bash "$ROUTE" --job-status "$LONG_JOB" 2>/dev/null)
+assert_eq "initial status: running" "running" "$STATUS_EARLY"
+# Codex 시작 ~10초 + sleep 3초 + 후처리 → 최대 25초 대기
+for i in $(seq 1 5); do
+  sleep 5
+  STATUS_LATE=$(bash "$ROUTE" --job-status "$LONG_JOB" 2>/dev/null)
+  [[ "$STATUS_LATE" == "done" ]] && break
+done
+assert_eq "final status: done" "done" "$STATUS_LATE"
+echo ""
+# ── Test 3: --job-status 존재하지 않는 job ──
+echo "Test 3: --job-status 존재하지 않는 job"
+RESULT=$(bash "$ROUTE" --job-status "nonexistent-12345" 2>/dev/null)
+EC=$?
+assert_eq "returns error" "error" "$RESULT"
+assert_exit "exit code 1" 1 "$EC"
+echo ""
+# ── Test 4: --job-result 완료된 job ──
+echo "Test 4: --job-result 완료된 job 결과 읽기"
+# Test 1의 JOB_ID 재사용 — Codex 완료 대기
+for i in $(seq 1 6); do
+  S=$(bash "$ROUTE" --job-status "$JOB_ID" 2>/dev/null)
+  [[ "$S" == "done" ]] && break
+  sleep 5
+done
+RESULT=$(bash "$ROUTE" --job-result "$JOB_ID" 2>/dev/null)
+EC=$?
+assert_exit "exit code 0" 0 "$EC"
+TOTAL=$((TOTAL + 1))
+if [[ -n "$RESULT" ]]; then echo "  ✓ result not empty (${#RESULT} bytes)"; PASS=$((PASS + 1)); else echo "  ✗ result is empty"; FAIL=$((FAIL + 1)); fi
+assert_neq "result not error" "error:" "$RESULT"
+echo ""
+# ── Test 5: --job-result 아직 실행 중인 job ──
+echo "Test 5: --job-result 실행 중인 job → 에러"
+RUNNING_JOB=$(bash "$ROUTE" --async executor "sleep 30" none 60 2>/dev/null)
+RESULT=$(bash "$ROUTE" --job-result "$RUNNING_JOB" 2>/dev/null)
+EC=$?
+assert_eq "returns error" "error: job still running" "$RESULT"
+assert_exit "exit code 1" 1 "$EC"
+# cleanup
+JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$RUNNING_JOB"
+[[ -f "$JOB_DIR/pid" ]] && kill "$(cat "$JOB_DIR/pid")" 2>/dev/null
+echo ""
+# ── Test 6: --job-wait 완료 감지 ──
+echo "Test 6: --job-wait 완료 감지"
+WAIT_JOB=$(bash "$ROUTE" --async executor "echo wait-test-ok" none 30 2>/dev/null)
+sleep 15  # codex 실행 대기
+WAIT_RESULT=$(bash "$ROUTE" --job-wait "$WAIT_JOB" 60 2>/dev/null)
+assert_eq "wait returns done" "done" "$WAIT_RESULT"
+echo ""
+# ── Test 7: --job-wait still_running (max_wait < 실행시간) ──
+echo "Test 7: --job-wait still_running (짧은 max_wait)"
+SLOW_JOB=$(bash "$ROUTE" --async executor "sleep 60" none 120 2>/dev/null)
+sleep 1
+WAIT_RESULT=$(bash "$ROUTE" --job-wait "$SLOW_JOB" 5 2>/dev/null)
+assert_eq "wait returns still_running" "still_running" "$WAIT_RESULT"
+# cleanup
+JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$SLOW_JOB"
+[[ -f "$JOB_DIR/pid" ]] && kill "$(cat "$JOB_DIR/pid")" 2>/dev/null
+echo ""
+# ── Test 8: exit code 전파 ──
+echo "Test 8: 실패한 job의 exit code 전파"
+FAIL_JOB=$(bash "$ROUTE" --async executor "exit 42" none 30 2>/dev/null)
+# Codex 완료 대기
+for i in $(seq 1 8); do
+  S=$(bash "$ROUTE" --job-status "$FAIL_JOB" 2>/dev/null)
+  [[ "$S" != *"running"* ]] && break
+  sleep 5
+done
+STATUS=$(bash "$ROUTE" --job-status "$FAIL_JOB" 2>/dev/null)
+# Codex가 exit 42를 감싸서 성공/실패 둘 다 가능 — "running이 아님"만 확인
+TOTAL=$((TOTAL + 1))
+if [[ "$STATUS" == "done" || "$STATUS" == *"failed"* || "$STATUS" == "timeout" ]]; then
+  echo "  ✓ status is terminal: $STATUS"; PASS=$((PASS + 1))
+else
+  echo "  ✗ status not terminal: $STATUS"; FAIL=$((FAIL + 1))
+fi
+# Codex는 exit 42를 감싸서 다른 코드로 반환할 수 있음 — 완료 자체만 확인
+TOTAL=$((TOTAL + 1))
+if [[ "$STATUS" != *"running"* ]]; then echo "  ✓ job completed (not stuck running)"; PASS=$((PASS + 1)); else echo "  ✗ job still running"; FAIL=$((FAIL + 1)); fi
+echo ""
+# ── Test 9: job 디렉토리 구조 검증 ──
+echo "Test 9: job 디렉토리 구조"
+STRUCT_JOB=$(bash "$ROUTE" --async executor "echo structure-test" none 30 2>/dev/null)
+JOB_DIR="${TMPDIR:-/tmp}/tfx-jobs/$STRUCT_JOB"
+assert_eq "pid file exists" "true" "$([ -f "$JOB_DIR/pid" ] && echo true || echo false)"
+assert_eq "agent_type file exists" "true" "$([ -f "$JOB_DIR/agent_type" ] && echo true || echo false)"
+assert_eq "start_time file exists" "true" "$([ -f "$JOB_DIR/start_time" ] && echo true || echo false)"
+AGENT=$(cat "$JOB_DIR/agent_type" 2>/dev/null)
+assert_eq "agent_type == executor" "executor" "$AGENT"
+echo ""
+# ── Test 10: native.mjs 프롬프트 검증 ──
+echo "Test 10: native.mjs buildSlimWrapperPrompt async 키워드"
+PROMPT_CHECK=$(node -e "
+import('./hub/team/native.mjs').then(m => {
+  const p = m.buildSlimWrapperPrompt('codex', {
+    subtask: 'test task',
+    role: 'scientist',
+    teamName: 'test-team',
+    taskId: 'task-1',
+    agentName: 'codex-worker-1',
+  });
+  const checks = {
+    has_async: p.includes('--async'),
+    has_job_wait: p.includes('--job-wait'),
+    has_job_result: p.includes('--job-result'),
+    has_route_timeout: p.includes('auto 1800'),
+    no_old_bashTimeout: !p.includes('timeout: 1860000'),
+    has_launch_timeout: p.includes('timeout: 15000'),
+    has_wait_timeout: p.includes('timeout: 570000'),
+    has_result_timeout: p.includes('timeout: 30000'),
+  };
+  for (const [k, v] of Object.entries(checks)) {
+    console.log(k + '=' + v);
+  }
+});
+" 2>/dev/null)
+for line in $PROMPT_CHECK; do
+  key="${line%%=*}"
+  val="${line##*=}"
+  assert_eq "$key" "true" "$val"
+done
+echo ""
+# ── 결과 요약 ──
+echo "═══════════════════════════════════════════════════"
+echo "  Results: $PASS/$TOTAL passed, $FAIL failed"
+echo "═══════════════════════════════════════════════════"
+if [[ "$FAIL" -gt 0 ]]; then
+  exit 1
+fi
+exit 0

package/skills/tfx-workspace/evals/evals.json ADDED Viewed

@@ -0,0 +1,79 @@
+{
+  "skill_name": "tfx-skills-suite",
+  "evals": [
+    {
+      "id": 1,
+      "prompt": "You are a Claude Code agent. Read the tfx-auto skill definition, then explain how you would handle this user request: '/implement JWT 인증 미들웨어 추가해줘'. List the EXACT bash commands you would run. Do NOT actually execute them.",
+      "expected_output": "Should route to executor agent via tfx-route.sh with 'implement' MCP profile. Command: bash ~/.claude/scripts/tfx-route.sh executor 'JWT 인증 미들웨어 추가해줘' implement",
+      "files": [],
+      "expectations": [
+        "Routes to 'executor' agent (not architect, not analyst)",
+        "Uses 'implement' MCP profile",
+        "Generates correct tfx-route.sh command syntax",
+        "Does NOT trigger triage (single command shortcut)",
+        "Does NOT delegate to tfx-multi"
+      ]
+    },
+    {
+      "id": 2,
+      "prompt": "You are a Claude Code agent. Read the tfx-auto skill definition, then explain how you would handle: '/tfx-auto 프론트엔드 리팩터링하고 보안 리뷰도 해줘'. List all routing decisions, triage steps, and delegation.",
+      "expected_output": "Should enter auto triage mode, classify via Codex, decompose into 2+ subtasks, then delegate to tfx-multi Phase 3",
+      "files": [],
+      "expectations": [
+        "Identifies this as auto mode (not command shortcut)",
+        "Triggers Codex classification step",
+        "Decomposes into at least 2 subtasks",
+        "Notes delegation to tfx-multi for subtasks >= 2",
+        "Does NOT try to execute all subtasks directly"
+      ]
+    },
+    {
+      "id": 3,
+      "prompt": "You are a Claude Code agent. Read the tfx-multi skill definition, then explain step-by-step how you would handle: '/tfx-multi 인증 리팩터링 + UI 개선 + 보안 리뷰'. List all TeamCreate, TaskCreate, Agent calls with exact parameters.",
+      "expected_output": "Should create team, 3 TaskCreates, 3 Agent spawns with slim wrapper structure following Phase 0-5",
+      "files": [],
+      "expectations": [
+        "Creates exactly one TeamCreate with tfx- prefix naming",
+        "Creates 3 TaskCreate calls (one per subtask)",
+        "Spawns 3 Agent wrappers with mode: bypassPermissions",
+        "Uses tfx-route.sh inside Agent wrapper (not direct codex/gemini)",
+        "Includes Phase 5 cleanup (TeamDelete)"
+      ]
+    },
+    {
+      "id": 4,
+      "prompt": "You are a Claude Code agent. Read the tfx-doctor skill definition, then explain how you would handle: 'HUD가 안 보이고 codex도 안 되는데 어떻게 해?'. List exact commands and reasoning.",
+      "expected_output": "Should suggest running triflux doctor first, then triflux doctor --fix if issues found",
+      "files": [],
+      "expectations": [
+        "Runs 'triflux doctor' as first diagnostic step",
+        "Suggests '--fix' mode for auto-repair",
+        "Mentions HUD and CLI path checks in explanation",
+        "Does NOT jump straight to --reset (that's for cache only)"
+      ]
+    },
+    {
+      "id": 5,
+      "prompt": "You are a Claude Code agent. Read the tfx-hub skill definition, then explain how you would handle: '/tfx-hub start'. List exact commands.",
+      "expected_output": "Should run 'node hub/server.mjs' in background",
+      "files": [],
+      "expectations": [
+        "Runs 'node hub/server.mjs' with run_in_background=true",
+        "Mentions port 27888 and /mcp endpoint",
+        "Does NOT try to run any triage or routing"
+      ]
+    },
+    {
+      "id": 6,
+      "prompt": "You are a Claude Code agent. Read the tfx-codex skill definition, then explain the Gemini-to-Codex remapping. For '/tfx-codex API 문서를 작성하고 디자인 가이드도 만들어줘', list the routing showing how designer/writer get remapped.",
+      "expected_output": "designer remapped to Codex(high), writer to Codex Spark(spark_fast), TFX_CLI_MODE=codex env var",
+      "files": [],
+      "expectations": [
+        "designer remapped to Codex with effort: high",
+        "writer remapped to Codex Spark with effort: spark_fast",
+        "Sets TFX_CLI_MODE=codex environment variable",
+        "Changes MCP profile: designer->implement, writer->analyze"
+      ]
+    }
+  ]
+}