context-mode 1.0.121 → 1.0.122

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/.openclaw-plugin/openclaw.plugin.json +1 -1
  4. package/.openclaw-plugin/package.json +1 -1
  5. package/README.md +4 -4
  6. package/build/adapters/claude-code/hooks.d.ts +16 -1
  7. package/build/adapters/claude-code/hooks.js +16 -0
  8. package/build/adapters/claude-code/index.js +2 -11
  9. package/build/adapters/codex/hooks.d.ts +19 -0
  10. package/build/adapters/codex/hooks.js +22 -0
  11. package/build/adapters/codex/index.js +8 -1
  12. package/build/adapters/copilot-base.d.ts +17 -1
  13. package/build/adapters/copilot-base.js +18 -2
  14. package/build/adapters/cursor/hooks.d.ts +14 -1
  15. package/build/adapters/cursor/hooks.js +14 -0
  16. package/build/adapters/detect.d.ts +12 -2
  17. package/build/adapters/detect.js +70 -3
  18. package/build/adapters/gemini-cli/hooks.d.ts +16 -0
  19. package/build/adapters/gemini-cli/hooks.js +19 -0
  20. package/build/adapters/gemini-cli/index.js +4 -2
  21. package/build/adapters/kiro/hooks.d.ts +16 -1
  22. package/build/adapters/kiro/hooks.js +19 -0
  23. package/build/adapters/pi/extension.d.ts +9 -0
  24. package/build/adapters/pi/extension.js +47 -0
  25. package/build/adapters/qwen-code/hooks.d.ts +26 -0
  26. package/build/adapters/qwen-code/hooks.js +29 -0
  27. package/build/adapters/qwen-code/index.js +6 -0
  28. package/build/cli.js +26 -1
  29. package/build/executor.js +18 -3
  30. package/build/lifecycle.d.ts +15 -0
  31. package/build/lifecycle.js +24 -1
  32. package/build/runtime.js +34 -13
  33. package/build/session/extract.js +150 -48
  34. package/build/session/snapshot.js +46 -0
  35. package/cli.bundle.mjs +137 -136
  36. package/configs/codex/hooks.json +1 -1
  37. package/configs/cursor/hooks.json +1 -1
  38. package/configs/kiro/agent.json +1 -1
  39. package/hooks/core/routing.mjs +56 -1
  40. package/hooks/cursor/hooks.json +1 -1
  41. package/hooks/ensure-deps.mjs +22 -3
  42. package/hooks/hooks.json +9 -0
  43. package/hooks/routing-block.mjs +5 -0
  44. package/hooks/session-extract.bundle.mjs +2 -2
  45. package/hooks/session-snapshot.bundle.mjs +21 -20
  46. package/openclaw.plugin.json +1 -1
  47. package/package.json +3 -3
  48. package/scripts/heal-better-sqlite3.mjs +188 -10
  49. package/scripts/heal-installed-plugins.mjs +111 -0
  50. package/scripts/postinstall.mjs +18 -2
  51. package/server.bundle.mjs +111 -111
  52. package/start.mjs +14 -1
  53. package/.mcp.json +0 -8
@@ -22,5 +22,14 @@
22
22
  * a prior load.
23
23
  */
24
24
  export declare let _mcpBridgeReady: Promise<void>;
25
+ /**
26
+ * Returns true iff `argv` matches a Pi top-level short-circuit invocation
27
+ * (help or version). Only argv[0] is inspected — Pi's runCli only checks
28
+ * the first token, and subcommand-level `--help` (e.g. `pi stats --help`)
29
+ * still spins up a real session, so we must NOT skip bootstrap there.
30
+ *
31
+ * Exported for unit tests.
32
+ */
33
+ export declare function isPiShortCircuitArgv(argv: readonly string[]): boolean;
25
34
  /** Pi extension default export. Called once by Pi runtime with the extension API. */
26
35
  export default function piExtension(pi: any): void;
@@ -193,6 +193,41 @@ function handleCommandText(text, ctx) {
193
193
  }
194
194
  return { text };
195
195
  }
196
+ // ── Pi short-circuit argv detection (#534) ───────────────
197
+ //
198
+ // Pi's runtime loads every extension during module discovery, BEFORE its
199
+ // `runCli()` decides whether the invocation is a real session or a
200
+ // short-lived help / version print. Without this guard, even `pi --help`
201
+ // causes us to spawn `server.bundle.mjs` as a long-lived stdio child —
202
+ // which is then reparented to PID 1 the moment Pi's `--help` handler
203
+ // returns. The MCP SDK's StdioServerTransport CPU-spins on the half-closed
204
+ // pipe until the 30 s ppid poll catches up, accumulating multi-hour orphans
205
+ // (see issue #534, plus the historical #311 / #388 fixes that only addressed
206
+ // the *recovery* path — not the *prevention* path).
207
+ //
208
+ // Token set verified against the Pi 14.x source — specifically:
209
+ // refs/platforms/oh-my-pi/packages/coding-agent/src/cli.ts:runCli
210
+ //
211
+ // if (first === "--help" || first === "-h" || first === "--version"
212
+ // || first === "-v" || first === "help") { /* short-circuit */ }
213
+ //
214
+ // We mirror it exactly — no inferred flags, no `-V` (Pi uses lowercase `-v`),
215
+ // no `--no-help`. Anything else (including `pi stats --help`) routes through
216
+ // the normal launch path and the bridge bootstraps as usual.
217
+ const PI_SHORT_CIRCUIT_TOKENS = new Set(["--help", "-h", "--version", "-v", "help"]);
218
+ /**
219
+ * Returns true iff `argv` matches a Pi top-level short-circuit invocation
220
+ * (help or version). Only argv[0] is inspected — Pi's runCli only checks
221
+ * the first token, and subcommand-level `--help` (e.g. `pi stats --help`)
222
+ * still spins up a real session, so we must NOT skip bootstrap there.
223
+ *
224
+ * Exported for unit tests.
225
+ */
226
+ export function isPiShortCircuitArgv(argv) {
227
+ if (argv.length === 0)
228
+ return false;
229
+ return PI_SHORT_CIRCUIT_TOKENS.has(argv[0]);
230
+ }
196
231
  // ── Extension entry point ────────────────────────────────
197
232
  /** Pi extension default export. Called once by Pi runtime with the extension API. */
198
233
  export default function piExtension(pi) {
@@ -534,6 +569,18 @@ export default function piExtension(pi) {
534
569
  // Best-effort: a missing bundle or a spawn failure must NOT prevent
535
570
  // the rest of the extension (session capture, hooks, slash commands)
536
571
  // from initializing. We log to stderr and continue.
572
+ // Short-circuit guard (#534): skip the MCP bridge bootstrap for
573
+ // `pi --help` / `pi --version` / `pi help` and similar. Pi prints and
574
+ // exits within milliseconds, but the bridge child would otherwise live
575
+ // long enough to be reparented to PID 1, half-close stdin, and pin a CPU
576
+ // core via the MCP SDK's stdio loop. We use process.argv directly so the
577
+ // guard works for any caller that boots Pi with a short-circuit token,
578
+ // regardless of how the runtime wires its CLI parser.
579
+ const piArgv = process.argv.slice(2);
580
+ if (isPiShortCircuitArgv(piArgv)) {
581
+ _mcpBridgeReady = Promise.resolve();
582
+ return;
583
+ }
537
584
  const serverBundle = resolve(pluginRoot, "server.bundle.mjs");
538
585
  if (existsSync(serverBundle)) {
539
586
  _mcpBridgeReady = bootstrapMCPTools(pi, serverBundle).then((handle) => {
@@ -0,0 +1,26 @@
1
+ /**
2
+ * adapters/qwen-code/hooks — Qwen Code hook definitions.
3
+ *
4
+ * Qwen Code is a Gemini CLI fork (packages/core/src/tools/tool-names.ts —
5
+ * shares native names like `run_shell_command`, `read_file`). The hook wire
6
+ * protocol is JSON stdin / stdout, identical to Claude Code and Gemini CLI.
7
+ *
8
+ * Config: ~/.qwen/settings.json under "hooks" key.
9
+ */
10
+ /**
11
+ * Negative-lookahead matcher for external MCP tool namespaces on Qwen Code (#529).
12
+ *
13
+ * Qwen Code MCP wire shape: `mcp__<server>__<tool>` (shared with Gemini CLI
14
+ * upstream). Own context-mode MCP surfaces as both
15
+ * `mcp__plugin_context-mode_context-mode__ctx_*` (Claude shim path when users
16
+ * install via the Claude marketplace) and `mcp__context-mode__ctx_*` (Qwen
17
+ * canonical — see hooks/core/tool-naming.mjs). The negative lookahead
18
+ * `(?!.*context-mode)` excludes both variants from the external-MCP routing
19
+ * branch so context-mode's own tools (already wired by the explicit entries
20
+ * above this catch-all) are not double-routed.
21
+ *
22
+ * Without this matcher, large payloads from slack / telegram / gdrive / notion
23
+ * MCPs bypass the routing nudge and flood the model's context window —
24
+ * PostToolUse runs too late to keep the raw data out.
25
+ */
26
+ export declare const EXTERNAL_MCP_MATCHER_PATTERN = "mcp__(?!.*context-mode)";
@@ -0,0 +1,29 @@
1
+ /**
2
+ * adapters/qwen-code/hooks — Qwen Code hook definitions.
3
+ *
4
+ * Qwen Code is a Gemini CLI fork (packages/core/src/tools/tool-names.ts —
5
+ * shares native names like `run_shell_command`, `read_file`). The hook wire
6
+ * protocol is JSON stdin / stdout, identical to Claude Code and Gemini CLI.
7
+ *
8
+ * Config: ~/.qwen/settings.json under "hooks" key.
9
+ */
10
+ // ─────────────────────────────────────────────────────────
11
+ // External MCP routing matcher (#529)
12
+ // ─────────────────────────────────────────────────────────
13
+ /**
14
+ * Negative-lookahead matcher for external MCP tool namespaces on Qwen Code (#529).
15
+ *
16
+ * Qwen Code MCP wire shape: `mcp__<server>__<tool>` (shared with Gemini CLI
17
+ * upstream). Own context-mode MCP surfaces as both
18
+ * `mcp__plugin_context-mode_context-mode__ctx_*` (Claude shim path when users
19
+ * install via the Claude marketplace) and `mcp__context-mode__ctx_*` (Qwen
20
+ * canonical — see hooks/core/tool-naming.mjs). The negative lookahead
21
+ * `(?!.*context-mode)` excludes both variants from the external-MCP routing
22
+ * branch so context-mode's own tools (already wired by the explicit entries
23
+ * above this catch-all) are not double-routed.
24
+ *
25
+ * Without this matcher, large payloads from slack / telegram / gdrive / notion
26
+ * MCPs bypass the routing nudge and flood the model's context window —
27
+ * PostToolUse runs too late to keep the raw data out.
28
+ */
29
+ export const EXTERNAL_MCP_MATCHER_PATTERN = "mcp__(?!.*context-mode)";
@@ -16,6 +16,7 @@ import { readFileSync, writeFileSync, existsSync, } from "node:fs";
16
16
  import { resolve, join } from "node:path";
17
17
  import { homedir } from "node:os";
18
18
  import { ClaudeCodeBaseAdapter } from "../claude-code-base.js";
19
+ import { EXTERNAL_MCP_MATCHER_PATTERN } from "./hooks.js";
19
20
  import { buildNodeCommand, } from "../types.js";
20
21
  // ─────────────────────────────────────────────────────────
21
22
  // Adapter implementation
@@ -55,6 +56,9 @@ export class QwenCodeAdapter extends ClaudeCodeBaseAdapter {
55
56
  "mcp__plugin_context-mode_context-mode__ctx_execute",
56
57
  "mcp__plugin_context-mode_context-mode__ctx_execute_file",
57
58
  "mcp__plugin_context-mode_context-mode__ctx_batch_execute",
59
+ // External MCP catch-all (#529). Negative-lookahead excludes context-mode's
60
+ // own server segments so the explicit entries above are not double-routed.
61
+ EXTERNAL_MCP_MATCHER_PATTERN,
58
62
  ].join("|");
59
63
  return {
60
64
  PreToolUse: [
@@ -247,6 +251,8 @@ export class QwenCodeAdapter extends ClaudeCodeBaseAdapter {
247
251
  "mcp__plugin_context-mode_context-mode__ctx_execute",
248
252
  "mcp__plugin_context-mode_context-mode__ctx_execute_file",
249
253
  "mcp__plugin_context-mode_context-mode__ctx_batch_execute",
254
+ // External MCP catch-all (#529) — keep in sync with generateHookConfig above.
255
+ EXTERNAL_MCP_MATCHER_PATTERN,
250
256
  ].join("|"),
251
257
  },
252
258
  {
package/build/cli.js CHANGED
@@ -25,7 +25,7 @@ import { resolveClaudeConfigDir } from "./util/claude-config.js";
25
25
  // v1.0.119 — Issue #523 Layer 5 heal: post-bump assertion on .claude-plugin/plugin.json
26
26
  // mcpServers args. Single source of truth shared with start.mjs HEAL block + postinstall.
27
27
  // @ts-expect-error — JS module, no TS declarations
28
- import { healPluginJsonMcpServers } from "../scripts/heal-installed-plugins.mjs";
28
+ import { healPluginJsonMcpServers, healMcpJsonArgs } from "../scripts/heal-installed-plugins.mjs";
29
29
  // Private 16-LOC copy of browserOpenArgv. Canonical version lives in src/server.ts;
30
30
  // duplicated here so the cli bundle does not pull server.ts top-level boot side effects.
31
31
  // Keep in sync — pure data, no I/O.
@@ -804,6 +804,31 @@ async function upgrade() {
804
804
  const message = err instanceof Error ? err.message : String(err);
805
805
  throw new Error(`plugin.json drift check failed: ${message}`);
806
806
  }
807
+ // v1.0.122 — Issue #531 — Layer 6 heal: assert .mcp.json's
808
+ // mcpServers["context-mode"].args[0] is the literal ${CLAUDE_PLUGIN_ROOT}/start.mjs
809
+ // placeholder. Asymmetric-heal sibling of the plugin.json assertion above.
810
+ // cli.ts writes .mcp.json at ~line 829-845 with the placeholder, but never
811
+ // asserted the on-disk shape afterwards — if a future regression dropped
812
+ // the placeholder write or a parallel normalize baked in an absolute path,
813
+ // upgrade() would declare success on a poisoned tree. Belt-and-braces:
814
+ // first call cleans any drift; second call MUST return healed:[] or throw.
815
+ // Single source of truth shared with start.mjs HEAL block + postinstall.
816
+ try {
817
+ const pluginCacheRoot = resolve(resolveClaudeConfigDir(), "plugins", "cache");
818
+ const pluginKey = "context-mode@context-mode";
819
+ const firstPass = healMcpJsonArgs({ pluginRoot, pluginCacheRoot, pluginKey });
820
+ if (firstPass && firstPass.error) {
821
+ throw new Error(firstPass.error);
822
+ }
823
+ const secondPass = healMcpJsonArgs({ pluginRoot, pluginCacheRoot, pluginKey });
824
+ if (secondPass && Array.isArray(secondPass.healed) && secondPass.healed.length > 0) {
825
+ throw new Error(`.mcp.json drift: mcpServers.args still poisoned after first heal pass (healed=${secondPass.healed.join(",")})`);
826
+ }
827
+ }
828
+ catch (err) {
829
+ const message = err instanceof Error ? err.message : String(err);
830
+ throw new Error(`.mcp.json drift check failed: ${message}`);
831
+ }
807
832
  // v1.0.114 hotfix — marketplace post-pull assertion: clone (if
808
833
  // present) MUST be on newVersion. Mert's case showed marketplace
809
834
  // stuck at v1.0.89 — the sync block above swallowed that silently.
package/build/executor.js CHANGED
@@ -238,11 +238,11 @@ export class PolyglotExecutor {
238
238
  ? cmd.slice(1).map(a => a.replace(/\\/g, "/"))
239
239
  : cmd.slice(1);
240
240
  }
241
- const proc = spawn(spawnCmd, spawnArgs, {
241
+ // Common options shared by both spawn variants below.
242
+ const commonOpts = {
242
243
  cwd,
243
244
  stdio: ["ignore", "pipe", "pipe"],
244
245
  env: this.#buildSafeEnv(sandboxTmpDir),
245
- shell: needsShell,
246
246
  // On Unix, create a new process group so killTree can kill all children
247
247
  detached: !isWin,
248
248
  // Hide the spawned-process console window on Windows. Without this,
@@ -250,7 +250,22 @@ export class PolyglotExecutor {
250
250
  // leaving the MCP response empty and popping a Git Bash terminal over
251
251
  // the user's IDE. Issue #384.
252
252
  ...buildSpawnOptions(process.platform),
253
- });
253
+ };
254
+ // DEP0190 fix: when shell is true (Windows .cmd/.bat shims), pass a
255
+ // single command string instead of cmd + args array. Node.js warns
256
+ // that args are unsafely concatenated when shell:true is combined with
257
+ // the args-array form of spawn(). Colllapsing to a string avoids the
258
+ // warning while preserving the same shell behavior.
259
+ let proc;
260
+ if (needsShell) {
261
+ const fullCmd = [spawnCmd, ...spawnArgs]
262
+ .map(a => /\s/.test(a) ? JSON.stringify(a) : a)
263
+ .join(" ");
264
+ proc = spawn(fullCmd, [], { ...commonOpts, shell: true });
265
+ }
266
+ else {
267
+ proc = spawn(spawnCmd, spawnArgs, { ...commonOpts, shell: false });
268
+ }
254
269
  let timedOut = false;
255
270
  let resolved = false;
256
271
  // Issue #406 — if the caller didn't pass a timeout we don't fire one.
@@ -44,6 +44,21 @@ export interface IsParentAliveDeps {
44
44
  * {@link defaultIsParentAlive} (captured once at module load).
45
45
  */
46
46
  export declare function makeDefaultIsParentAlive(deps?: IsParentAliveDeps): () => boolean;
47
+ /**
48
+ * Resolve the parent-liveness poll interval based on context (#534).
49
+ *
50
+ * When this process is the MCP bridge child spawned by the Pi adapter
51
+ * (`bootstrapMCPTools` in `src/adapters/pi/mcp-bridge.ts` sets
52
+ * `CONTEXT_MODE_BRIDGE_DEPTH=1` in the child env), we tighten the poll to
53
+ * 1 s. The Pi parent can disappear in under 50 ms (`pi --help` prints
54
+ * usage and returns), so the default 30 s window leaves a long-lived
55
+ * CPU-spinning orphan. For top-level MCP servers (depth 0 / absent) we
56
+ * keep the original 30 s cadence — the existing #311/#388 ppid + stdin
57
+ * recovery paths already cover Claude Code style hosts.
58
+ *
59
+ * Exported for unit-testing.
60
+ */
61
+ export declare function lifecycleGuardIntervalForEnv(env?: NodeJS.ProcessEnv): number;
47
62
  /**
48
63
  * Start the lifecycle guard. Returns a cleanup function.
49
64
  * Skipped automatically when stdin is a TTY (e.g. OpenCode ts-plugin).
@@ -71,12 +71,35 @@ export function makeDefaultIsParentAlive(deps = {}) {
71
71
  };
72
72
  }
73
73
  const defaultIsParentAlive = makeDefaultIsParentAlive();
74
+ /**
75
+ * Resolve the parent-liveness poll interval based on context (#534).
76
+ *
77
+ * When this process is the MCP bridge child spawned by the Pi adapter
78
+ * (`bootstrapMCPTools` in `src/adapters/pi/mcp-bridge.ts` sets
79
+ * `CONTEXT_MODE_BRIDGE_DEPTH=1` in the child env), we tighten the poll to
80
+ * 1 s. The Pi parent can disappear in under 50 ms (`pi --help` prints
81
+ * usage and returns), so the default 30 s window leaves a long-lived
82
+ * CPU-spinning orphan. For top-level MCP servers (depth 0 / absent) we
83
+ * keep the original 30 s cadence — the existing #311/#388 ppid + stdin
84
+ * recovery paths already cover Claude Code style hosts.
85
+ *
86
+ * Exported for unit-testing.
87
+ */
88
+ export function lifecycleGuardIntervalForEnv(env = process.env) {
89
+ const raw = env.CONTEXT_MODE_BRIDGE_DEPTH;
90
+ if (raw === undefined)
91
+ return 30_000;
92
+ const depth = Number.parseInt(raw, 10);
93
+ if (!Number.isFinite(depth) || depth <= 0)
94
+ return 30_000;
95
+ return 1000;
96
+ }
74
97
  /**
75
98
  * Start the lifecycle guard. Returns a cleanup function.
76
99
  * Skipped automatically when stdin is a TTY (e.g. OpenCode ts-plugin).
77
100
  */
78
101
  export function startLifecycleGuard(opts) {
79
- const interval = opts.checkIntervalMs ?? 30_000;
102
+ const interval = opts.checkIntervalMs ?? lifecycleGuardIntervalForEnv();
80
103
  const check = opts.isParentAlive ?? defaultIsParentAlive;
81
104
  let stopped = false;
82
105
  const shutdown = () => {
package/build/runtime.js CHANGED
@@ -60,11 +60,15 @@ function runnableExists(cmd) {
60
60
  // fallthrough can be slow). On POSIX, 1500ms is plenty for a real binary
61
61
  // and keeps cold detection of python3 → python → py under ~5s total (#454).
62
62
  try {
63
- execFileSync(cmd, ["--version"], {
64
- shell: isWindows,
65
- stdio: "pipe",
66
- timeout: isWindows ? 5000 : 1500,
67
- });
63
+ // DEP0190 fix: avoid args array with shell:true on Windows.
64
+ // Use execSync with a command string when shell is required;
65
+ // keep execFileSync (no shell) on POSIX.
66
+ if (isWindows) {
67
+ execSync(`"${cmd}" --version`, { stdio: "pipe", timeout: 5000 });
68
+ }
69
+ else {
70
+ execFileSync(cmd, ["--version"], { stdio: "pipe", timeout: 1500 });
71
+ }
68
72
  return true;
69
73
  }
70
74
  catch {
@@ -152,14 +156,31 @@ function resolveWindowsBash() {
152
156
  }
153
157
  function getVersion(cmd, args = ["--version"]) {
154
158
  try {
155
- return execFileSync(cmd, args, {
156
- encoding: "utf-8",
157
- shell: process.platform === "win32",
158
- stdio: ["pipe", "pipe", "pipe"],
159
- timeout: 5000,
160
- })
161
- .trim()
162
- .split(/\r?\n/)[0];
159
+ // DEP0190 fix: avoid args array with shell:true on Windows.
160
+ if (process.platform === "win32") {
161
+ // Hardening (PR #537 review): quote any cmd.exe metacharacter, not just
162
+ // whitespace. Current arg sources are internally controlled, but cheap
163
+ // defense-in-depth for future call sites.
164
+ const cmdStr = [cmd, ...args]
165
+ .map(a => /[\s"&|<>^()%!]/.test(a) ? JSON.stringify(a) : a)
166
+ .join(" ");
167
+ return execSync(cmdStr, {
168
+ encoding: "utf-8",
169
+ stdio: ["pipe", "pipe", "pipe"],
170
+ timeout: 5000,
171
+ })
172
+ .trim()
173
+ .split(/\r?\n/)[0];
174
+ }
175
+ else {
176
+ return execFileSync(cmd, args, {
177
+ encoding: "utf-8",
178
+ stdio: ["pipe", "pipe", "pipe"],
179
+ timeout: 5000,
180
+ })
181
+ .trim()
182
+ .split(/\r?\n/)[0];
183
+ }
163
184
  }
164
185
  catch {
165
186
  return "unknown";
@@ -704,17 +704,38 @@ function extractWorktree(input) {
704
704
  /**
705
705
  * Category 6: decision
706
706
  * User corrections / approach selections.
707
+ *
708
+ * Universal-rule detector (Hybrid C, issue #535):
709
+ * A decision message typically takes the structural shape
710
+ * "{negation/rejection} X {separator} Y" — across every human language.
711
+ *
712
+ * We treat the following as the structural shape:
713
+ * - contains a clause separator (ASCII `,` `;`, fullwidth `,` `;`,
714
+ * Japanese ideographic `、`, Arabic `،`), AND
715
+ * - codepoint length is in the corrective range (15..500), AND
716
+ * - the message is not a question (no cross-script `?`), AND
717
+ * - contains at least one alphabetic codepoint.
718
+ *
719
+ * The renderer prints the raw message back to the next LLM, so the gate
720
+ * only needs to be a coarse "looks like a correction" filter — the LLM
721
+ * handles fine-grained interpretation. No per-language keyword list.
707
722
  */
708
- const DECISION_PATTERNS = [
709
- /\b(don'?t|do not|never|always|instead|rather|prefer)\b/i,
710
- /\b(use|switch to|go with|pick|choose)\s+\w+\s+(instead|over|not)\b/i,
711
- /\b(no,?\s+(use|do|try|make))\b/i,
712
- // Turkish patterns
713
- /\b(hayır|hayir|evet|böyle|boyle|degil|değil|yerine|kullan)\b/i,
714
- ];
723
+ const CLAUSE_SEPARATOR_PATTERN = /[,;,;、،]/u;
724
+ const DECISION_MIN_CHARS = 15;
725
+ const DECISION_MAX_CHARS = 500;
726
+ function looksLikeDecision(trimmed) {
727
+ if (QUESTION_MARK_PATTERN.test(trimmed))
728
+ return false;
729
+ if (!ALPHABETIC_PATTERN.test(trimmed))
730
+ return false;
731
+ if (!CLAUSE_SEPARATOR_PATTERN.test(trimmed))
732
+ return false;
733
+ const codepointLength = [...trimmed].length;
734
+ return codepointLength >= DECISION_MIN_CHARS && codepointLength <= DECISION_MAX_CHARS;
735
+ }
715
736
  function extractUserDecision(message) {
716
- const isDecision = DECISION_PATTERNS.some(p => p.test(message));
717
- if (!isDecision)
737
+ const trimmed = message.trim();
738
+ if (!looksLikeDecision(trimmed))
718
739
  return [];
719
740
  return [{
720
741
  type: "decision",
@@ -726,16 +747,58 @@ function extractUserDecision(message) {
726
747
  /**
727
748
  * Category 7: role
728
749
  * Persona / behavioral directive patterns.
750
+ *
751
+ * Universal-rule detector (Hybrid C, issue #535):
752
+ * A persona/role statement is structurally a single non-question clause
753
+ * of moderate length containing more than one lexical token — e.g.
754
+ * "You are a senior engineer", "Tu es développeur",
755
+ * "あなたは経験豊富なエンジニアです", "Sen kıdemli mühendisisin".
756
+ *
757
+ * We treat the following as the structural shape:
758
+ * - codepoint length is in the persona range (12..120), AND
759
+ * - is not a question (no cross-script `?`), AND
760
+ * - is a single clause (no clause separator that would mark it as a
761
+ * decision), AND
762
+ * - carries enough lexical density: either two whitespace-separated
763
+ * runs of letters, OR a continuous Unicode-letter run of ≥6
764
+ * codepoints (a fallback for scripts without word spaces — Japanese,
765
+ * Chinese, Thai).
766
+ *
767
+ * The renderer prints the raw message back to the next LLM verbatim,
768
+ * so the gate only needs a coarse "looks like a persona statement"
769
+ * filter — no per-language keyword list.
729
770
  */
730
- const ROLE_PATTERNS = [
731
- /\b(act as|you are|behave like|pretend|role of|persona)\b/i,
732
- /\b(senior|staff|principal|lead)\s+(engineer|developer|architect)\b/i,
733
- // Turkish patterns
734
- /\b(gibi davran|rolünde|olarak çalış)\b/i,
735
- ];
771
+ // Lower bound accommodates information-dense scripts (Chinese, Japanese,
772
+ // Korean) where a complete persona sentence may use as few as 8 codepoints
773
+ // — e.g. "你是高级工程师" — while still excluding bare single-token noise.
774
+ const ROLE_MIN_CHARS = 8;
775
+ const ROLE_MAX_CHARS = 120;
776
+ const TWO_LEXICAL_TOKENS_PATTERN = /\p{L}+\s+\p{L}+/u;
777
+ const CONTINUOUS_LETTER_RUN_PATTERN = /\p{L}{6,}/u;
778
+ function looksLikeRole(trimmed) {
779
+ // Role prompts are persona-prefix shaped: the FIRST SENTENCE declares the
780
+ // role (e.g. "You are a senior backend engineer. <long context...>").
781
+ // Apply the structural test to the first clause only — real-world role
782
+ // prompts often append context paragraphs that would blow the length cap
783
+ // if we tested the whole message. First-clause shape is the load-bearing
784
+ // signal across languages (English "You are X.", French "Tu es X.",
785
+ // Japanese "あなたは X です。" all parse the same way under a period split).
786
+ const firstClause = trimmed.split(/[.!\n。!]/u)[0].trim();
787
+ if (QUESTION_MARK_PATTERN.test(firstClause))
788
+ return false;
789
+ if (CLAUSE_SEPARATOR_PATTERN.test(firstClause))
790
+ return false;
791
+ if (!ALPHABETIC_PATTERN.test(firstClause))
792
+ return false;
793
+ const codepointLength = [...firstClause].length;
794
+ if (codepointLength < ROLE_MIN_CHARS || codepointLength > ROLE_MAX_CHARS)
795
+ return false;
796
+ return (TWO_LEXICAL_TOKENS_PATTERN.test(firstClause) ||
797
+ CONTINUOUS_LETTER_RUN_PATTERN.test(firstClause));
798
+ }
736
799
  function extractRole(message) {
737
- const isRole = ROLE_PATTERNS.some(p => p.test(message));
738
- if (!isRole)
800
+ const trimmed = message.trim();
801
+ if (!looksLikeRole(trimmed))
739
802
  return [];
740
803
  return [{
741
804
  type: "role",
@@ -747,50 +810,90 @@ function extractRole(message) {
747
810
  /**
748
811
  * Category 13: intent
749
812
  * Session mode classification from user messages.
813
+ *
814
+ * Universal-rule detector (Hybrid C, issue #535):
815
+ * investigate — message contains a question mark from any script:
816
+ * ASCII `?` U+003F, fullwidth `?` U+FF1F, Arabic `؟` U+061F,
817
+ * Spanish opening `¿` U+00BF.
818
+ * (Greek `;` U+037E and Armenian `՞` U+055E are excluded —
819
+ * Greek shares its codepoint with ASCII semicolon, which
820
+ * would produce false positives across the corpus.)
821
+ *
822
+ * Structural / Unicode-aware — no per-language keyword list.
750
823
  */
751
- const INTENT_PATTERNS = [
752
- { mode: "investigate", pattern: /\b(why|how does|explain|understand|what is|analyze|debug|look into)\b/i },
753
- { mode: "implement", pattern: /\b(create|add|build|implement|write|make|develop|fix)\b/i },
754
- { mode: "discuss", pattern: /\b(think about|consider|should we|what if|pros and cons|opinion)\b/i },
755
- { mode: "review", pattern: /\b(review|check|audit|verify|test|validate)\b/i },
756
- ];
824
+ const QUESTION_MARK_PATTERN = /[??؟¿]/u;
825
+ /**
826
+ * "Imperative tone" structural heuristic for implement intent:
827
+ * - trimmed length < IMPERATIVE_MAX_CHARS codepoints (short directive,
828
+ * not a discursive paragraph)
829
+ * - contains no question mark from any script
830
+ * - contains at least one alphabetic codepoint (filters pure punctuation noise)
831
+ *
832
+ * `[...str]` walks Unicode codepoints so CJK / Indic scripts are measured
833
+ * fairly against the budget rather than penalised by UTF-16 unit count.
834
+ */
835
+ const ALPHABETIC_PATTERN = /\p{L}/u;
836
+ const IMPERATIVE_MAX_CHARS = 60;
837
+ function isImperativeTone(trimmed) {
838
+ if (QUESTION_MARK_PATTERN.test(trimmed))
839
+ return false;
840
+ if (!ALPHABETIC_PATTERN.test(trimmed))
841
+ return false;
842
+ const codepointLength = [...trimmed].length;
843
+ return codepointLength > 0 && codepointLength < IMPERATIVE_MAX_CHARS;
844
+ }
757
845
  function extractIntent(message) {
758
- const match = INTENT_PATTERNS.find(({ pattern }) => pattern.test(message));
759
- if (!match)
846
+ const trimmed = message.trim();
847
+ if (!trimmed)
848
+ return [];
849
+ let mode;
850
+ if (QUESTION_MARK_PATTERN.test(trimmed)) {
851
+ mode = "investigate";
852
+ }
853
+ else if (isImperativeTone(trimmed)) {
854
+ mode = "implement";
855
+ }
856
+ if (!mode)
760
857
  return [];
761
858
  return [{
762
859
  type: "intent",
763
860
  category: "intent",
764
- data: safeString(match.mode),
861
+ data: safeString(mode),
765
862
  priority: 4,
766
863
  }];
767
864
  }
768
865
  /**
769
866
  * Category 25: blocked-on
770
867
  * Detect when work is blocked on something, or when a blocker is resolved.
868
+ *
869
+ * Universal-rule detector (Hybrid C, issue #535):
870
+ * Programming-domain error markers are script-agnostic — they are
871
+ * emitted by tooling regardless of the user's spoken language. The
872
+ * words "Error", "Exception", "Traceback" stay in their original
873
+ * English form inside a Chinese / Arabic / Russian terminal log.
874
+ *
875
+ * blocker matches:
876
+ * - the literal "Error:" / "Exception:" / "Traceback" tokens, OR
877
+ * - a Python-style frame line ("File ", `line:col`), OR
878
+ * - a JS / Java-style stack frame ("at <ident>(...)" with a
879
+ * `:line:col` suffix).
880
+ *
881
+ * blocker_resolved matches:
882
+ * - a Unicode check-mark glyph (✓ U+2713, ✔ U+2714, ✅ U+2705,
883
+ * ☑ U+2611, 🎉 U+1F389), OR
884
+ * - the structural marker "fixed: …" / "resolved: …" — these are
885
+ * programming-domain conventions (git log, PR titles, CHANGELOG
886
+ * entries) rather than natural-language phrases.
771
887
  */
772
- const BLOCKER_PATTERNS = [
773
- /\bblocked on\b/i,
774
- /\bwaiting for\b/i,
775
- /\bneed\s+\S+\s+before\b/i,
776
- /\bcan'?t proceed until\b/i,
777
- /\bdepends on\b/i,
778
- /\bblocked\b/i,
779
- // Turkish patterns
780
- /\bbekliyor\b/i,
781
- /\bbekliyorum\b/i,
782
- ];
783
- const BLOCKER_RESOLVED_PATTERNS = [
784
- /\bunblocked\b/i,
785
- /\bresolved\b/i,
786
- /\bgot the\s+\S+/i,
787
- /\bis ready now\b/i,
788
- /\bcan proceed\b/i,
789
- ];
888
+ const BLOCKER_MARKERS_PATTERN = /(?:\bError\s*:|\bException\s*:|\bTraceback\b|\bat\s+\S+\s*\([^)]*:\d+:\d+\))/u;
889
+ const BLOCKER_RESOLVED_CHECKMARK_PATTERN = /[✓✔✅☑🎉]/u;
890
+ const BLOCKER_RESOLVED_MARKER_PATTERN = /^\s*(?:fixed|resolved)\s*:/iu;
790
891
  function extractBlocker(message) {
791
892
  const events = [];
792
- // Check resolution first — if both match, resolution takes priority
793
- const isResolved = BLOCKER_RESOLVED_PATTERNS.some(p => p.test(message));
893
+ // Resolution takes precedence — if both shapes match, render the
894
+ // happier signal so the snapshot reflects the latest state.
895
+ const isResolved = BLOCKER_RESOLVED_CHECKMARK_PATTERN.test(message) ||
896
+ BLOCKER_RESOLVED_MARKER_PATTERN.test(message);
794
897
  if (isResolved) {
795
898
  events.push({
796
899
  type: "blocker_resolved",
@@ -800,8 +903,7 @@ function extractBlocker(message) {
800
903
  });
801
904
  return events;
802
905
  }
803
- const isBlocked = BLOCKER_PATTERNS.some(p => p.test(message));
804
- if (isBlocked) {
906
+ if (BLOCKER_MARKERS_PATTERN.test(message)) {
805
907
  events.push({
806
908
  type: "blocker",
807
909
  category: "blocked-on",