triflux 10.14.0 → 10.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/triflux.mjs CHANGED
@@ -61,6 +61,7 @@ import {
61
61
  probePsmuxSupport,
62
62
  } from "../scripts/lib/psmux-info.mjs";
63
63
  import {
64
+ buildWindowsHubAutostartCommand,
64
65
  cleanupStaleSkills,
65
66
  ensureCodexHubServerConfig,
66
67
  ensureCodexProfiles,
@@ -68,6 +69,7 @@ import {
68
69
  extractManagedHookFilename,
69
70
  getManagedRegistryHooks,
70
71
  getVersion,
72
+ getWindowsHubAutostartStatus,
71
73
  hasProfileSection,
72
74
  LEGACY_CODEX_MODELS,
73
75
  REQUIRED_CODEX_PROFILES,
@@ -123,7 +125,7 @@ const NORMALIZED_ARGS = RAW_ARGS.filter((arg) => arg !== "--json");
123
125
 
124
126
  const CLI_COMMAND_SCHEMAS = Object.freeze({
125
127
  setup: {
126
- usage: "tfx setup [--dry-run]",
128
+ usage: "tfx setup [--dry-run] [--enable-hub-autostart]",
127
129
  description: "파일 동기화 + HUD/MCP 설정",
128
130
  options: [
129
131
  {
@@ -131,6 +133,12 @@ const CLI_COMMAND_SCHEMAS = Object.freeze({
131
133
  type: "boolean",
132
134
  description: "실제 변경 없이 예정 작업을 JSON으로 출력",
133
135
  },
136
+ {
137
+ name: "--enable-hub-autostart",
138
+ type: "boolean",
139
+ description:
140
+ "Windows 로그인 시 tfx-hub를 보장하는 Task Scheduler 항목 등록",
141
+ },
134
142
  ],
135
143
  },
136
144
  doctor: {
@@ -1115,6 +1123,16 @@ function buildSetupDryRunPlan() {
1115
1123
  const defaultHubUrl = `http://127.0.0.1:${process.env.TFX_HUB_PORT || "27888"}/mcp`;
1116
1124
  actions.push(...previewMcpRegistrationActions(defaultHubUrl));
1117
1125
  actions.push(previewStatusLineAction());
1126
+ const autostart = getWindowsHubAutostartStatus();
1127
+ actions.push({
1128
+ type: "hub-autostart",
1129
+ platform: process.platform,
1130
+ taskName: autostart.taskName,
1131
+ change: autostart.supported && !autostart.registered ? "available" : "noop",
1132
+ registered: autostart.registered,
1133
+ command: autostart.supported ? buildWindowsHubAutostartCommand() : null,
1134
+ enableWith: "tfx setup --enable-hub-autostart",
1135
+ });
1118
1136
 
1119
1137
  return {
1120
1138
  dry_run: true,
@@ -1123,7 +1141,12 @@ function buildSetupDryRunPlan() {
1123
1141
  }
1124
1142
 
1125
1143
  function cmdSetup(options = {}) {
1126
- const { dryRun = false, overrideVersion, skipClaudeMdSync = false } = options;
1144
+ const {
1145
+ dryRun = false,
1146
+ overrideVersion,
1147
+ skipClaudeMdSync = false,
1148
+ enableHubAutostart = false,
1149
+ } = options;
1127
1150
  if (dryRun) {
1128
1151
  printJson(buildSetupDryRunPlan());
1129
1152
  return;
@@ -1351,6 +1374,67 @@ function cmdSetup(options = {}) {
1351
1374
  console.log("");
1352
1375
  }
1353
1376
 
1377
+ if (process.platform === "win32") {
1378
+ const status = getWindowsHubAutostartStatus();
1379
+ if (enableHubAutostart) {
1380
+ try {
1381
+ const script = join(PKG_ROOT, "scripts", "setup.mjs");
1382
+ execFileSync(
1383
+ process.execPath,
1384
+ [script, "--enable-hub-autostart", "--sync"],
1385
+ {
1386
+ stdio: ["ignore", "pipe", "pipe"],
1387
+ timeout: 10000,
1388
+ windowsHide: true,
1389
+ },
1390
+ );
1391
+ // subprocess silent-catch 회귀 가드: schtasks /Query 로 실제 등록 재검증.
1392
+ const verified = getWindowsHubAutostartStatus();
1393
+ if (verified.registered) {
1394
+ ok(`Hub autostart: ${verified.taskName} 등록됨`);
1395
+ summary.push({
1396
+ item: "Hub autostart",
1397
+ status: "✅",
1398
+ detail: `${verified.taskName} 등록됨`,
1399
+ });
1400
+ } else {
1401
+ warn(
1402
+ "Hub autostart 등록 실패: subprocess 성공했으나 /Query 에서 미발견",
1403
+ );
1404
+ summary.push({
1405
+ item: "Hub autostart",
1406
+ status: "⚠️",
1407
+ detail: "등록 실패 (subprocess silent catch 의심)",
1408
+ });
1409
+ }
1410
+ } catch (error) {
1411
+ warn(`Hub autostart 등록 실패: ${renderErrorMessage(error.message)}`);
1412
+ summary.push({
1413
+ item: "Hub autostart",
1414
+ status: "⚠️",
1415
+ detail: "등록 실패",
1416
+ });
1417
+ }
1418
+ } else if (status.registered) {
1419
+ ok(`Hub autostart: ${status.taskName} 이미 등록됨`);
1420
+ summary.push({
1421
+ item: "Hub autostart",
1422
+ status: "✅",
1423
+ detail: "이미 등록됨",
1424
+ });
1425
+ } else {
1426
+ warn(
1427
+ "Hub autostart 미등록 — Codex 단독 시작 전 hub가 죽어 있으면 MCP가 실패할 수 있음",
1428
+ );
1429
+ info("등록: tfx setup --enable-hub-autostart");
1430
+ summary.push({
1431
+ item: "Hub autostart",
1432
+ status: "⏭️",
1433
+ detail: "미등록",
1434
+ });
1435
+ }
1436
+ }
1437
+
1354
1438
  // HUD statusLine 설정
1355
1439
  console.log(`${CYAN}[HUD 설정]${RESET}`);
1356
1440
  const settingsPath = join(CLAUDE_DIR, "settings.json");
@@ -5579,7 +5663,10 @@ async function main() {
5579
5663
 
5580
5664
  switch (cmd) {
5581
5665
  case "setup":
5582
- cmdSetup({ dryRun: cmdArgs.includes("--dry-run") });
5666
+ cmdSetup({
5667
+ dryRun: cmdArgs.includes("--dry-run"),
5668
+ enableHubAutostart: cmdArgs.includes("--enable-hub-autostart"),
5669
+ });
5583
5670
  return;
5584
5671
  case "doctor": {
5585
5672
  if (cmdArgs.includes("--audit")) {
@@ -696,6 +696,8 @@ export function createConductor(opts = {}) {
696
696
  },
697
697
  {
698
698
  ...probeOpts,
699
+ writeStateFile:
700
+ probeOpts.writeStateFile ?? process.env.TFX_PROBE_WRITE_STATE === "1",
699
701
  onProbe: (result) => handleProbeResult(session, result),
700
702
  },
701
703
  );
@@ -2,6 +2,10 @@
2
2
  // 기존 cli-adapter-base.mjs:stallThresholdMs(30s)와 headless.mjs:STALL_DEFAULTS(120s)를
3
3
  // 4단계 probe 모델로 교체. stdout+stderr 통합 스트림으로 평가 (F3 해결).
4
4
 
5
+ import { mkdirSync, unlinkSync, writeFileSync } from "node:fs";
6
+ import { tmpdir } from "node:os";
7
+ import { dirname, join } from "node:path";
8
+
5
9
  /**
6
10
  * Health probe level 정의.
7
11
  * L0: Process alive (PID 존재 + exit code 없음)
@@ -25,6 +29,8 @@ export const PROBE_DEFAULTS = Object.freeze({
25
29
  l2ThresholdMs: 30_000,
26
30
  l3ThresholdMs: 120_000,
27
31
  enableL2: false,
32
+ writeStateFile: false,
33
+ stateDir: join(tmpdir(), "tfx-probe"),
28
34
  });
29
35
 
30
36
  /**
@@ -96,6 +102,49 @@ export function createHealthProbe(session, opts = {}) {
96
102
  inputWaitPattern: null,
97
103
  };
98
104
 
105
+ function getStateFilePath() {
106
+ if (typeof config.stateFile === "string" && config.stateFile.length > 0) {
107
+ return config.stateFile;
108
+ }
109
+ const pid = session.pid;
110
+ if (pid == null || pid <= 0) return null;
111
+ return join(config.stateDir, `${pid}.json`);
112
+ }
113
+
114
+ function deriveState(result) {
115
+ if (result.l0 === "fail") return "exited";
116
+ if (result.l1 === "input_wait") return "input_wait";
117
+ if (result.l2 === "fail") return "mcp_initializing";
118
+ if (result.l1 === "stall") return "stalled";
119
+ if (result.l3 === "timeout") return "reasoning";
120
+ return "active";
121
+ }
122
+
123
+ function writeState(result) {
124
+ if (!config.writeStateFile && !config.stateFile) return;
125
+ const stateFile = getStateFilePath();
126
+ if (!stateFile) return;
127
+ try {
128
+ mkdirSync(dirname(stateFile), { recursive: true });
129
+ writeFileSync(
130
+ stateFile,
131
+ JSON.stringify(
132
+ {
133
+ pid: session.pid ?? null,
134
+ state: deriveState(result),
135
+ result,
136
+ updatedAt: new Date(result.ts).toISOString(),
137
+ },
138
+ null,
139
+ 2,
140
+ ) + "\n",
141
+ "utf8",
142
+ );
143
+ } catch {
144
+ // probe state is advisory only.
145
+ }
146
+ }
147
+
99
148
  /**
100
149
  * L0: Process alive check.
101
150
  */
@@ -227,6 +276,7 @@ export function createHealthProbe(session, opts = {}) {
227
276
  ts: Date.now(),
228
277
  };
229
278
  status.lastProbeAt = result.ts;
279
+ writeState(result);
230
280
 
231
281
  if (typeof config.onProbe === "function") {
232
282
  config.onProbe(result);
@@ -259,6 +309,12 @@ export function createHealthProbe(session, opts = {}) {
259
309
  clearInterval(timer);
260
310
  timer = null;
261
311
  }
312
+ if (config.writeStateFile || config.stateFile) {
313
+ try {
314
+ const stateFile = getStateFilePath();
315
+ if (stateFile) unlinkSync(stateFile);
316
+ } catch {}
317
+ }
262
318
  }
263
319
 
264
320
  /** L1 tracking 리셋 (restart 후 호출) */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "triflux",
3
- "version": "10.14.0",
3
+ "version": "10.14.1",
4
4
  "description": "CLI-first multi-model orchestrator for Claude Code — route tasks to Codex, Gemini, and Claude",
5
5
  "type": "module",
6
6
  "bin": {
@@ -13,6 +13,7 @@ import { fileURLToPath } from "url";
13
13
  const LOOPBACK_HOSTS = new Set(["127.0.0.1", "localhost", "::1"]);
14
14
  const PLUGIN_ROOT = dirname(dirname(fileURLToPath(import.meta.url)));
15
15
  const HUB_PID_FILE = join(homedir(), ".claude", "cache", "tfx-hub", "hub.pid");
16
+ const HUB_DEFAULT_PORT = 27888;
16
17
 
17
18
  function formatHostForUrl(host) {
18
19
  return host.includes(":") ? `[${host}]` : host;
@@ -34,29 +35,33 @@ async function syncHubConfigsIfAvailable({ hubUrl }) {
34
35
  await mod.syncCodexHubUrl({ hubUrl });
35
36
  }
36
37
  if (typeof mod?.syncProjectMcpJson === "function") {
37
- await mod.syncProjectMcpJson({ hubUrl, projectRoot: PLUGIN_ROOT });
38
+ // 사용자 작업 디렉토리의 .mcp.json sync 대상으로 한다.
39
+ // 이전에는 PLUGIN_ROOT(triflux 설치 경로)를 넘겨서 설치 경로의 .mcp.json
40
+ // 만 sync 되고 사용자 실제 프로젝트는 drift 되던 증상이 있었다.
41
+ await mod.syncProjectMcpJson({ hubUrl, projectRoot: process.cwd() });
38
42
  }
39
43
  } catch {
40
44
  // sync는 best-effort이며 hub-ensure 성공/실패를 좌우하지 않는다.
41
45
  }
42
46
  }
43
47
 
44
- function resolveHubTarget() {
48
+ export function resolveHubTarget() {
45
49
  const envPortRaw = Number(process.env.TFX_HUB_PORT || "");
46
50
  const envPort =
47
51
  Number.isFinite(envPortRaw) && envPortRaw > 0 ? envPortRaw : null;
48
52
  const target = {
49
53
  host: "127.0.0.1",
50
- port: envPort || 27888,
54
+ port: envPort ?? HUB_DEFAULT_PORT,
51
55
  };
52
56
 
57
+ // PID 파일의 port는 source of truth가 아니다. host 힌트만 재사용한다.
58
+ // 과거에는 `!envPort`일 때 PID file의 port로 target.port를 덮었으나,
59
+ // 이는 이전 세션의 오염된 port(비표준 포트)가 cascade로 영속화되는 버그 원인이었다.
60
+ // 포트는 오직 TFX_HUB_PORT env(없으면 HUB_DEFAULT_PORT=27888)만 source of truth다.
61
+ // client config 는 sync-hub-mcp-settings.mjs가 이 hubUrl로 재동기화한다.
53
62
  if (existsSync(HUB_PID_FILE)) {
54
63
  try {
55
64
  const info = JSON.parse(readFileSync(HUB_PID_FILE, "utf8"));
56
- if (!envPort) {
57
- const pidPort = Number(info?.port);
58
- if (Number.isFinite(pidPort) && pidPort > 0) target.port = pidPort;
59
- }
60
65
  if (typeof info?.host === "string") {
61
66
  const host = info.host.trim();
62
67
  if (LOOPBACK_HOSTS.has(host)) target.host = host;
@@ -334,7 +334,7 @@ function makeInitializeRequest() {
334
334
 
335
335
  function isValidInitResponse(line) {
336
336
  const trimmed = line.trim();
337
- if (!trimmed || !trimmed.startsWith("{")) return false;
337
+ if (!trimmed?.startsWith("{")) return false;
338
338
  try {
339
339
  const msg = JSON.parse(trimmed);
340
340
  if (msg.jsonrpc !== "2.0") return false;
@@ -576,8 +576,7 @@ export async function probeAll({
576
576
  const prior = cachedResults[name];
577
577
  if (
578
578
  cacheWithinTtl &&
579
- prior &&
580
- prior.fingerprint &&
579
+ prior?.fingerprint &&
581
580
  fingerprintsEqual(prior.fingerprint, fingerprints[name])
582
581
  ) {
583
582
  hits[name] = prior;
package/scripts/setup.mjs CHANGED
@@ -47,6 +47,7 @@ function detectDevMode(root = PLUGIN_ROOT) {
47
47
  const BREADCRUMB_PATH = join(CLAUDE_DIR, "scripts", ".tfx-pkg-root");
48
48
  const SETTINGS_PATH = join(CLAUDE_DIR, "settings.json");
49
49
  const HUD_PATH = join(CLAUDE_DIR, "hud", "hud-qos-status.mjs");
50
+ const WINDOWS_HUB_AUTOSTART_TASK = "TrifluxHubEnsure";
50
51
 
51
52
  const REQUIRED_CODEX_PROFILES = [
52
53
  // gpt-5.5 — 새 main 플래그십. xhigh/high/med/low 4 tier 전부 보장.
@@ -747,6 +748,81 @@ function getSetupArgv(stdinData) {
747
748
  return Array.isArray(stdinData?.argv) ? stdinData.argv : [];
748
749
  }
749
750
 
751
+ function quoteWindowsTaskArg(value) {
752
+ return `"${String(value).replace(/"/g, '\\"')}"`;
753
+ }
754
+
755
+ function buildWindowsHubAutostartCommand({
756
+ nodePath = process.execPath,
757
+ pluginRoot = PLUGIN_ROOT,
758
+ } = {}) {
759
+ return [
760
+ quoteWindowsTaskArg(nodePath),
761
+ quoteWindowsTaskArg(join(pluginRoot, "scripts", "hub-ensure.mjs")),
762
+ ].join(" ");
763
+ }
764
+
765
+ function getWindowsHubAutostartStatus({
766
+ taskName = WINDOWS_HUB_AUTOSTART_TASK,
767
+ } = {}) {
768
+ if (process.platform !== "win32") {
769
+ return { supported: false, registered: false, taskName };
770
+ }
771
+ try {
772
+ execFileSync("schtasks.exe", ["/Query", "/TN", taskName], {
773
+ stdio: "ignore",
774
+ windowsHide: true,
775
+ });
776
+ return { supported: true, registered: true, taskName };
777
+ } catch {
778
+ return { supported: true, registered: false, taskName };
779
+ }
780
+ }
781
+
782
+ function ensureWindowsHubAutostart({
783
+ taskName = WINDOWS_HUB_AUTOSTART_TASK,
784
+ nodePath = process.execPath,
785
+ pluginRoot = PLUGIN_ROOT,
786
+ force = true,
787
+ } = {}) {
788
+ if (process.platform !== "win32") {
789
+ return {
790
+ supported: false,
791
+ changed: false,
792
+ registered: false,
793
+ taskName,
794
+ reason: "non-windows",
795
+ };
796
+ }
797
+
798
+ const command = buildWindowsHubAutostartCommand({ nodePath, pluginRoot });
799
+ const args = [
800
+ "/Create",
801
+ "/TN",
802
+ taskName,
803
+ "/SC",
804
+ "ONLOGON",
805
+ "/TR",
806
+ command,
807
+ "/RL",
808
+ "LIMITED",
809
+ ];
810
+ if (force) args.push("/F");
811
+
812
+ execFileSync("schtasks.exe", args, {
813
+ stdio: ["ignore", "pipe", "pipe"],
814
+ windowsHide: true,
815
+ });
816
+
817
+ return {
818
+ supported: true,
819
+ changed: true,
820
+ registered: true,
821
+ taskName,
822
+ command,
823
+ };
824
+ }
825
+
750
826
  function loadSettings() {
751
827
  if (!existsSync(SETTINGS_PATH)) return {};
752
828
 
@@ -980,6 +1056,7 @@ function ensureCriticalSetup() {
980
1056
 
981
1057
  export {
982
1058
  BREADCRUMB_PATH,
1059
+ buildWindowsHubAutostartCommand,
983
1060
  CLAUDE_DIR,
984
1061
  cleanupStaleSkills,
985
1062
  DEPRECATED_SKILLS,
@@ -987,9 +1064,11 @@ export {
987
1064
  ensureCodexHubServerConfig,
988
1065
  ensureCodexProfiles,
989
1066
  ensureHooksInSettings,
1067
+ ensureWindowsHubAutostart,
990
1068
  extractManagedHookFilename,
991
1069
  getManagedRegistryHooks,
992
1070
  getVersion,
1071
+ getWindowsHubAutostartStatus,
993
1072
  hasProfileSection,
994
1073
  LEGACY_CODEX_MODELS,
995
1074
  PLUGIN_ROOT,
@@ -1002,6 +1081,7 @@ export {
1002
1081
  SYNC_MAP,
1003
1082
  scanHudFiles,
1004
1083
  syncAliasedSkillDir,
1084
+ WINDOWS_HUB_AUTOSTART_TASK,
1005
1085
  writeMarker,
1006
1086
  };
1007
1087
 
@@ -1032,6 +1112,9 @@ export async function runDeferred(stdinData) {
1032
1112
  const argv = getSetupArgv(stdinData);
1033
1113
  const isSync = argv.includes("--sync");
1034
1114
  const isForce = argv.includes("--force");
1115
+ const enableHubAutostart =
1116
+ argv.includes("--enable-hub-autostart") ||
1117
+ process.env.TFX_HUB_AUTOSTART === "1";
1035
1118
  const isDev = detectDevMode();
1036
1119
 
1037
1120
  if (isDev) {
@@ -1668,6 +1751,22 @@ export async function runDeferred(stdinData) {
1668
1751
  synced++;
1669
1752
  }
1670
1753
 
1754
+ // ── Windows Codex 단독 실행 보호: 로그인 시 hub-ensure 등록 ──
1755
+ // Claude SessionStart 훅이 없는 순수 Codex 시작 경로에서도 tfx-hub가 살아있게 한다.
1756
+ if (enableHubAutostart) {
1757
+ try {
1758
+ const result = ensureWindowsHubAutostart();
1759
+ if (result.registered) {
1760
+ io.log(` \x1b[32m✓\x1b[0m Windows hub autostart: ${result.taskName}`);
1761
+ synced++;
1762
+ }
1763
+ } catch (error) {
1764
+ io.log(
1765
+ ` \x1b[33m⚠\x1b[0m Windows hub autostart 등록 실패: ${error.message}`,
1766
+ );
1767
+ }
1768
+ }
1769
+
1671
1770
  // ── CLAUDE.md 라우팅 섹션 자동 동기화 ──
1672
1771
 
1673
1772
  try {
@@ -25,7 +25,7 @@ function getCodexConfigPath(codexConfigPath) {
25
25
  return join(home, ...CODEX_CONFIG_FILE);
26
26
  }
27
27
 
28
- function getProjectMcpJsonPaths(projectRoot) {
28
+ export function getProjectMcpJsonPaths(projectRoot) {
29
29
  const root =
30
30
  typeof projectRoot === "string" && projectRoot.length > 0
31
31
  ? projectRoot
@@ -132,8 +132,11 @@ function parseTomlScalar(rawValue) {
132
132
  }
133
133
 
134
134
  function findMcpServerSection(raw, sectionName) {
135
+ // TOML 동치 표현 지원: [mcp_servers.name] / [mcp_servers."name"] / [mcp_servers . name]
136
+ // 미검출 시 appendCodexMcpServerSection이 중복 테이블 생성 → TOMLDecodeError 회귀 방지.
137
+ const escaped = escapeRegExp(sectionName);
135
138
  const headerRegex = new RegExp(
136
- `^\\[mcp_servers\\.${escapeRegExp(sectionName)}\\]\\s*$`,
139
+ `^\\[\\s*mcp_servers\\s*\\.\\s*(?:${escaped}|"${escaped}"|'${escaped}')\\s*\\]\\s*$`,
137
140
  "m",
138
141
  );
139
142
  const headerMatch = headerRegex.exec(raw);
@@ -153,6 +156,13 @@ function findMcpServerSection(raw, sectionName) {
153
156
  };
154
157
  }
155
158
 
159
+ function appendCodexMcpServerSection(raw, sectionName, hubUrl) {
160
+ const normalized = raw.length > 0 && !raw.endsWith("\n") ? `${raw}\n` : raw;
161
+ const separator =
162
+ normalized.length > 0 && !normalized.endsWith("\n\n") ? "\n" : "";
163
+ return `${normalized}${separator}[mcp_servers.${sectionName}]\nurl = ${formatTomlString(hubUrl)}\n`;
164
+ }
165
+
156
166
  async function syncSingleFile({ filePath, hubUrl, dryRun, logger }) {
157
167
  return withFileLock(filePath, async () => {
158
168
  if (!(await fileExists(filePath))) {
@@ -243,8 +253,29 @@ async function syncCodexConfigFile({ filePath, hubUrl, dryRun, logger }) {
243
253
 
244
254
  const section = findMcpServerSection(raw, TFX_HUB_SECTION);
245
255
  if (!section) {
246
- log(logger, "info", `[codex-mcp-sync] skipped: ${filePath}`);
247
- return { kind: "skipped", path: filePath };
256
+ const nextRaw = appendCodexMcpServerSection(raw, TFX_HUB_SECTION, hubUrl);
257
+ log(
258
+ logger,
259
+ "debug",
260
+ `[codex-mcp-sync] ${filePath} add ${TFX_HUB_SECTION}: ${hubUrl}`,
261
+ );
262
+
263
+ if (!dryRun) {
264
+ try {
265
+ await writeTextAtomic(filePath, nextRaw);
266
+ } catch (error) {
267
+ const reason = getReason(error, "write failed");
268
+ log(
269
+ logger,
270
+ "error",
271
+ `[codex-mcp-sync] error: ${filePath} (${reason})`,
272
+ );
273
+ return { kind: "error", path: filePath, reason };
274
+ }
275
+ }
276
+
277
+ log(logger, "info", `[codex-mcp-sync] updated: ${filePath}`);
278
+ return { kind: "updated", path: filePath };
248
279
  }
249
280
 
250
281
  const urlMatch = /^(\s*url\s*=\s*)(.+?)(\s*(?:#.*)?)$/m.exec(section.body);
@@ -262,7 +262,7 @@ if [[ "$MCP_PROFILE" == --* ]]; then
262
262
  fi
263
263
 
264
264
  # ── CLI 경로 해석 (Windows npm global 대응) ──
265
- NODE_BIN="${NODE_BIN:-$(command -v node 2>/dev/null || echo node)}"
265
+ NODE_BIN="${NODE_BIN:-$(command -v node 2>/dev/null || command -v node.exe 2>/dev/null || echo node)}"
266
266
  CODEX_BIN="${CODEX_BIN:-$(command -v codex 2>/dev/null || echo codex)}"
267
267
  GEMINI_BIN="${GEMINI_BIN:-$(command -v gemini 2>/dev/null || echo gemini)}"
268
268
  CLAUDE_BIN="${CLAUDE_BIN:-$(command -v claude 2>/dev/null || echo claude)}"
@@ -278,6 +278,50 @@ GEMINI_PROFILES_PATH="${GEMINI_PROFILES_PATH:-${HOME}/.gemini/triflux-profiles.j
278
278
  # ── 상수 ──
279
279
  MAX_STDOUT_BYTES=51200 # 50KB — Claude 컨텍스트 절약
280
280
  TIMESTAMP=$(date +%s)
281
+ TFX_PROBE_DIR="${TFX_PROBE_DIR:-${TFX_TMP}/tfx-probe}"
282
+ mkdir -p "$TFX_PROBE_DIR" 2>/dev/null || true
283
+
284
+ estimate_expected_duration_sec() {
285
+ local agent="${1:-}" profile="${2:-}" prompt="${3:-}"
286
+ local text="${prompt,,}"
287
+ local expected=30
288
+
289
+ case "$agent" in
290
+ explore|style-reviewer) expected=30 ;;
291
+ writer|verifier|qa-tester) expected=90 ;;
292
+ executor|debugger|test-engineer) expected=300 ;;
293
+ code-reviewer|security-reviewer|architect|planner|critic|analyst) expected=600 ;;
294
+ scientist|scientist-deep|deep-executor|document-specialist) expected=900 ;;
295
+ esac
296
+
297
+ case "$profile" in
298
+ minimal|default) [[ "$expected" -lt 60 ]] && expected=60 ;;
299
+ analyze|review|full) [[ "$expected" -lt 300 ]] && expected=300 ;;
300
+ implement|executor) [[ "$expected" -lt 300 ]] && expected=300 ;;
301
+ esac
302
+
303
+ if [[ "$text" =~ (deep|research|analy[sz]e|분석|리서치|조사|전체|전부|싹다|comprehensive) ]]; then
304
+ [[ "$expected" -lt 600 ]] && expected=600
305
+ fi
306
+ if [[ "$text" =~ (refactor|migration|migrate|리팩터|마이그레이션|대규모|rewrite) ]]; then
307
+ [[ "$expected" -lt 900 ]] && expected=900
308
+ fi
309
+ if [[ "$text" =~ (test|lint|build|npm|pnpm|pytest|검증|테스트) ]]; then
310
+ [[ "$expected" -lt 180 ]] && expected=180
311
+ fi
312
+ if [[ "$text" =~ (mcp|browser|playwright|context7|exa|tavily|brave) ]]; then
313
+ [[ "$expected" -lt 120 ]] && expected=120
314
+ fi
315
+
316
+ printf '%s\n' "$expected"
317
+ }
318
+
319
+ read_probe_state() {
320
+ local pid="$1"
321
+ local state_file="${TFX_PROBE_STATE_FILE:-${TFX_PROBE_DIR}/${pid}.json}"
322
+ [[ -f "$state_file" ]] || return 1
323
+ sed -n 's/.*"state"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' "$state_file" 2>/dev/null | head -1
324
+ }
281
325
  RUN_ID="${TIMESTAMP}-$$-${RANDOM}"
282
326
  STDERR_LOG="${TFX_TMP}/tfx-route-${AGENT_TYPE}-${RUN_ID}-stderr.log"
283
327
  STDOUT_LOG="${TFX_TMP}/tfx-route-${AGENT_TYPE}-${RUN_ID}-stdout.log"
@@ -833,7 +877,7 @@ route_agent() {
833
877
 
834
878
  # ── CLI_TYPE: 단일 소스 (agent-map.json) ──
835
879
  local _raw_type
836
- _raw_type=$(node -e "
880
+ _raw_type=$("$NODE_BIN" -e "
837
881
  const p=require('path').resolve(process.argv[1]);
838
882
  const m=JSON.parse(require('fs').readFileSync(p,'utf8'));
839
883
  const t=m[process.argv[2]];
@@ -842,7 +886,7 @@ route_agent() {
842
886
 
843
887
  if [[ -z "$_raw_type" ]]; then
844
888
  echo "ERROR: 알 수 없는 에이전트 타입: $agent" >&2
845
- echo "사용 가능: $(node -e "console.log(Object.keys(JSON.parse(require('fs').readFileSync(require('path').resolve(process.argv[1]),'utf8'))).join(', '))" "$map_file" 2>/dev/null)" >&2
889
+ echo "사용 가능: $("$NODE_BIN" -e "console.log(Object.keys(JSON.parse(require('fs').readFileSync(require('path').resolve(process.argv[1]),'utf8'))).join(', '))" "$map_file" 2>/dev/null)" >&2
846
890
  exit 1
847
891
  fi
848
892
 
@@ -1299,7 +1343,9 @@ heartbeat_monitor() {
1299
1343
  [[ "${TFX_HEARTBEAT:-1}" -eq 0 ]] && return 0
1300
1344
  local pid="$1"
1301
1345
  local interval="${2:-${TFX_HEARTBEAT_INTERVAL:-10}}"
1302
- local stall_threshold="${3:-${TFX_STALL_THRESHOLD:-60}}"
1346
+ # 땜빵(PLANNING P4 구현 전): 60 → 300. MCP init/재시도 여유 + false STALL 감소.
1347
+ local stall_threshold="${3:-${TFX_STALL_THRESHOLD:-300}}"
1348
+ local expected_duration="${TFX_EXPECTED_DURATION_SEC:-}"
1303
1349
  local last_size=0 stall_count=0
1304
1350
  local pid_gone=false
1305
1351
  local post_exit_checks=0
@@ -1330,18 +1376,27 @@ heartbeat_monitor() {
1330
1376
  [[ -f "$STDERR_LOG" ]] && stderr_size=$(wc -c < "$STDERR_LOG" 2>/dev/null || echo 0)
1331
1377
  current_size=$((current_size + stderr_size))
1332
1378
  local elapsed=$(($(date +%s) - TIMESTAMP))
1379
+ local expected_suffix=""
1380
+ if [[ -n "$expected_duration" && "$expected_duration" =~ ^[0-9]+$ && "$expected_duration" -gt 0 ]]; then
1381
+ expected_suffix=" expected=${expected_duration}s"
1382
+ if [[ "$elapsed" -gt $((expected_duration * 2)) ]]; then
1383
+ expected_suffix="${expected_suffix} anomaly=slow"
1384
+ fi
1385
+ fi
1333
1386
 
1334
1387
  if [[ "$current_size" -gt "$last_size" ]]; then
1335
1388
  stall_count=0
1336
1389
  if [[ "$pid_gone" == "true" ]]; then
1337
1390
  local _fi="forked"; [[ -n "$last_known_forks" ]] && _fi="forks:${last_known_forks// /,}"
1338
- echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B status=active(${_fi})" >&2
1391
+ echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B${expected_suffix} status=active(${_fi})" >&2
1339
1392
  post_exit_checks=0 # reset — still producing output
1340
1393
  else
1341
- echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B status=active" >&2
1394
+ echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B${expected_suffix} status=active" >&2
1342
1395
  fi
1343
1396
  else
1344
1397
  stall_count=$((stall_count + interval))
1398
+ local probe_state=""
1399
+ probe_state="$(read_probe_state "$pid" 2>/dev/null || true)"
1345
1400
  if [[ "$pid_gone" == "true" ]]; then
1346
1401
  if [[ -n "$last_known_forks" ]]; then
1347
1402
  # Direct fork tracking — terminate when all forks are dead
@@ -1350,26 +1405,30 @@ heartbeat_monitor() {
1350
1405
  kill -0 "$_fp" 2>/dev/null && _alive=true && break
1351
1406
  done
1352
1407
  if [[ "$_alive" == "false" ]]; then
1353
- echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B status=terminated(forks-exited)" >&2
1408
+ echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B${expected_suffix} status=terminated(forks-exited)" >&2
1354
1409
  break
1355
1410
  fi
1356
- echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B status=fork-idle(${last_known_forks// /,})" >&2
1411
+ echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B${expected_suffix} status=fork-idle(${last_known_forks// /,})" >&2
1357
1412
  else
1358
1413
  # Fallback: output-based drain (no fork PIDs found)
1359
1414
  post_exit_checks=$((post_exit_checks + 1))
1360
1415
  if [[ "$post_exit_checks" -ge "$max_post_exit_checks" ]]; then
1361
- echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B status=terminated(drain-done)" >&2
1416
+ echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B${expected_suffix} status=terminated(drain-done)" >&2
1362
1417
  break
1363
1418
  fi
1364
- echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B status=draining(${post_exit_checks}/${max_post_exit_checks})" >&2
1419
+ echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B${expected_suffix} status=draining(${post_exit_checks}/${max_post_exit_checks})" >&2
1365
1420
  fi
1421
+ elif [[ "$probe_state" =~ ^(mcp_initializing|input_wait)$ ]]; then
1422
+ stall_count=0
1423
+ echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B${expected_suffix} status=${probe_state}(probe-grace)" >&2
1366
1424
  elif [[ "$stall_count" -ge "$stall_threshold" ]]; then
1367
1425
  # STALL kill (#144/#66 regression guard): stall=threshold+grace 이상 지속 시 SIGTERM→SIGKILL.
1368
- # 기본 활성화. TFX_STALL_KILL=0 으로 opt-out. grace=30s (기본) SSE/MCP 정상 handshake 여유.
1369
- local kill_on_stall="${TFX_STALL_KILL:-1}"
1426
+ # 땜빵(PLANNING P4 구현 전): default 1 → 0. false kill >> true stuck 비용이 압도적이라
1427
+ # opt-in 으로 전환. debug 필요 시 TFX_STALL_KILL=1 로 명시 활성화. classify mode는 차기.
1428
+ local kill_on_stall="${TFX_STALL_KILL:-0}"
1370
1429
  local kill_grace="${TFX_STALL_KILL_GRACE:-30}"
1371
1430
  if [[ "$kill_on_stall" -eq 1 && "$stall_count" -ge $((stall_threshold + kill_grace)) ]]; then
1372
- echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B status=STALL_KILL stall=${stall_count}s — SIGTERM" >&2
1431
+ echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B${expected_suffix} status=STALL_KILL stall=${stall_count}s — SIGTERM" >&2
1373
1432
  # Snapshot child PIDs before SIGTERM — wrapper 가 SIGTERM 을 수용해 죽으면
1374
1433
  # 부모 소멸 후 taskkill /T 가 자식 트리를 탐색하지 못해 codex 자식이 orphan 으로 남는다.
1375
1434
  # 사용자 보고(2026-04-22): "tfx-route 래퍼 exit 이후에도 Codex 자식이 살아있음".
@@ -1417,9 +1476,9 @@ heartbeat_monitor() {
1417
1476
  fi
1418
1477
  break
1419
1478
  fi
1420
- echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B status=STALL stall=${stall_count}s" >&2
1479
+ echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B${expected_suffix} status=STALL stall=${stall_count}s" >&2
1421
1480
  else
1422
- echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B status=quiet stall=${stall_count}s" >&2
1481
+ echo "[tfx-heartbeat] pid=$pid elapsed=${elapsed}s output=${current_size}B${expected_suffix} status=quiet stall=${stall_count}s" >&2
1423
1482
  fi
1424
1483
  fi
1425
1484
  last_size=$current_size
@@ -1581,26 +1640,32 @@ _mcp_preflight_filter_dead() {
1581
1640
  CODEX_CONFIG_FLAGS=("${new_flags[@]}")
1582
1641
  echo "[tfx-route] MCP preflight: ${#dead_names[@]}개 dead MCP 제외 (${dead_list})" >&2
1583
1642
 
1584
- # #148: profile-allowed 전부 dead 인 all-dead 엣지케이스 조기 실패.
1585
- # allowed_pat _codex_config_swap fail-safe (#132) 의해 원본 config
1586
- # 전체를 유지 비필요 MCP 까지 전부 spawn 역효과.
1587
- # TFX_MCP_ALLOW_ALL_DEAD=1 명시적 opt-in MCP 없이 진행 (degraded).
1643
+ # #170 graceful degradation (회귀 fix):
1644
+ # all-dead default exec mode 자동 fallback. TFX_MCP_FAIL_ON_ALL_DEAD=1
1645
+ # 명시 opt-in 시만 #148 기존 동작 (early fail). TFX_MCP_ALLOW_ALL_DEAD=1 은 호환성
1646
+ # 유지 (alias for graceful default). transport auto 인 채로 run_codex_mcp 를
1647
+ # 호출하면 dead MCP 와 connect 시도 → stall → 본 fix 의 _TFX_MCP_DEGRADED=1 marker
1648
+ # 가 호출자 에서 transport=exec 강제 + MCP_HINT 자동 주입 skip 을 유발한다.
1588
1649
  local remaining_alive=0
1589
1650
  local rflag
1590
1651
  for rflag in "${CODEX_CONFIG_FLAGS[@]}"; do
1591
- if [[ "$rflag" =~ ^mcp_servers\.[^.]+\.enabled=true$ ]]; then
1652
+ # #153 + #170 P1: candidate 추출 정규식 (line 1607) 과 일관 — dotted server 이름
1653
+ # (e.g. mcp_servers.foo.bar.enabled=true) 도 alive 로 카운트한다. `[^.]+` 는 첫 dot
1654
+ # 에서 끊겨 dotted alive 만 남은 경우 false all-dead 판정 → 불필요 degraded.
1655
+ if [[ "$rflag" =~ ^mcp_servers\..+\.enabled=true$ ]]; then
1592
1656
  remaining_alive=$((remaining_alive + 1))
1593
1657
  fi
1594
1658
  done
1595
1659
 
1596
1660
  if [[ "$remaining_alive" -eq 0 ]]; then
1597
- if [[ "${TFX_MCP_ALLOW_ALL_DEAD:-0}" == "1" ]]; then
1598
- echo "[tfx-route] TFX_MCP_ALLOW_ALL_DEAD=1 MCP 없이 계속 진행 (degraded)" >&2
1599
- return 0
1661
+ if [[ "${TFX_MCP_FAIL_ON_ALL_DEAD:-0}" == "1" ]]; then
1662
+ echo "[tfx-route] 조기 실패: TFX_MCP_FAIL_ON_ALL_DEAD=1 + MCP 전부 dead Codex 호출 중단" >&2
1663
+ echo " 복구: (1) dead MCP 복구 (2) TFX_MCP_HEALTH_CHECK=0 preflight 비활성 (3) TFX_MCP_FAIL_ON_ALL_DEAD=0 graceful degradation" >&2
1664
+ return 78
1600
1665
  fi
1601
- echo "[tfx-route] 조기 실패: profile 에서 허용한 MCP 전부 dead — Codex 호출 중단" >&2
1602
- echo " 복구: (1) dead MCP 복구 (2) TFX_MCP_HEALTH_CHECK=0 preflight 비활성 (3) TFX_MCP_ALLOW_ALL_DEAD=1 MCP 없이 진행" >&2
1603
- return 78
1666
+ export _TFX_MCP_DEGRADED=1
1667
+ echo "[tfx-route] graceful degradation: MCP 전부 dead exec mode 자동 전환 (set TFX_MCP_FAIL_ON_ALL_DEAD=1 to revert to early-fail)" >&2
1668
+ return 0
1604
1669
  fi
1605
1670
  }
1606
1671
 
@@ -1922,6 +1987,9 @@ main() {
1922
1987
  TIMEOUT_SEC="$DEFAULT_TIMEOUT"
1923
1988
  fi
1924
1989
 
1990
+ TFX_EXPECTED_DURATION_SEC="${TFX_EXPECTED_DURATION_SEC:-$(estimate_expected_duration_sec "$AGENT_TYPE" "$MCP_PROFILE" "$PROMPT")}"
1991
+ export TFX_EXPECTED_DURATION_SEC
1992
+
1925
1993
  # 컨텍스트 파일 → 프롬프트에 주입
1926
1994
  if [[ -n "$CONTEXT_FILE" && -f "$CONTEXT_FILE" ]]; then
1927
1995
  local ctx_content
@@ -2046,6 +2114,19 @@ FALLBACK_EOF
2046
2114
  # swap 후 config override 플래그 클리어 — 제거된 서버에 override 보내면 "invalid transport" 에러
2047
2115
  CODEX_CONFIG_FLAGS=()
2048
2116
  CODEX_CONFIG_JSON="{}"
2117
+ # #170 graceful degradation: MCP 전부 dead 면 transport 무관 exec 강제.
2118
+ # _mcp_preflight_filter_dead 가 _TFX_MCP_DEGRADED=1 를 export 했으면 이미 stall 보장 안 됨.
2119
+ # 사용자가 TFX_CODEX_TRANSPORT=mcp 명시했더라도 dead MCP 와 connect 시도 = stall →
2120
+ # warning + exec 강제 (transport 명시는 사용자 의도지만 stall 회피가 우선).
2121
+ # MCP_HINT (e.g. "context7으로 조회하세요") 도 prompt 에서 제거 — degraded 환경에서
2122
+ # 모델이 사용 불가 도구를 시도하면 stall/실패 trigger.
2123
+ if [[ "${_TFX_MCP_DEGRADED:-0}" == "1" ]]; then
2124
+ if [[ "$TFX_CODEX_TRANSPORT" == "mcp" ]]; then
2125
+ echo "[tfx-route] WARNING: TFX_CODEX_TRANSPORT=mcp + all-MCP-dead → exec 강제 (stall 회피)" >&2
2126
+ fi
2127
+ TFX_CODEX_TRANSPORT="exec"
2128
+ FULL_PROMPT="$PROMPT"
2129
+ fi
2049
2130
  codex_transport_effective="exec"
2050
2131
  if [[ "$TFX_CODEX_TRANSPORT" != "exec" ]]; then
2051
2132
  run_codex_mcp "$FULL_PROMPT" "$use_tee" || exit_code=$?
@@ -2055,6 +2136,11 @@ FALLBACK_EOF
2055
2136
  # MCP 실패 → exec fallback. run_codex_exec는 < /dev/null 로 stdin 블록 회피 (line 1639).
2056
2137
  # 정책: codex/gemini 강건성 — MCP 가용 시 MCP, 실패 시 그래도 워커 자체는 굴러간다.
2057
2138
  echo "[tfx-route] Codex MCP 실패(exit=${exit_code}). exec fallback 시도." >&2
2139
+ local _sd
2140
+ _sd="$(_get_script_dir)"
2141
+ if [[ -f "$_sd/hub-ensure.mjs" ]]; then
2142
+ "$NODE_BIN" "$_sd/hub-ensure.mjs" >/dev/null 2>&1 || true
2143
+ fi
2058
2144
  exit_code=0
2059
2145
  run_codex_exec "$FULL_PROMPT" "$use_tee" || exit_code=$?
2060
2146
  codex_transport_effective="exec-fallback"