npm - @orchid-labs/pluxx - Versions diffs - 0.1.0 → 0.1.1 - Mend

@orchid-labs/pluxx 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/README.md +100 -522
package/dist/cli/agent.d.ts +7 -0
package/dist/cli/agent.d.ts.map +1 -1
package/dist/cli/doctor.d.ts +1 -0
package/dist/cli/doctor.d.ts.map +1 -1
package/dist/cli/eval.d.ts +22 -0
package/dist/cli/eval.d.ts.map +1 -0
package/dist/cli/index.d.ts +19 -2
package/dist/cli/index.d.ts.map +1 -1
package/dist/cli/init-from-mcp.d.ts +17 -2
package/dist/cli/init-from-mcp.d.ts.map +1 -1
package/dist/cli/install.d.ts +2 -0
package/dist/cli/install.d.ts.map +1 -1
package/dist/cli/lint.d.ts +5 -1
package/dist/cli/lint.d.ts.map +1 -1
package/dist/cli/mcp-proxy.d.ts +10 -0
package/dist/cli/mcp-proxy.d.ts.map +1 -0
package/dist/cli/migrate.d.ts.map +1 -1
package/dist/cli/sync-from-mcp.d.ts.map +1 -1
package/dist/cli/test.d.ts +2 -0
package/dist/cli/test.d.ts.map +1 -1
package/dist/generators/claude-code/index.d.ts +2 -0
package/dist/generators/claude-code/index.d.ts.map +1 -1
package/dist/generators/codex/index.d.ts +1 -0
package/dist/generators/codex/index.d.ts.map +1 -1
package/dist/index.d.ts +1 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +99 -1
package/dist/mcp/introspect.d.ts +43 -1
package/dist/mcp/introspect.d.ts.map +1 -1
package/dist/permissions.d.ts.map +1 -1
package/dist/validation/platform-rules.d.ts +20 -0
package/dist/validation/platform-rules.d.ts.map +1 -1
package/package.json +2 -2
package/src/cli/agent.ts +459 -34
package/src/cli/doctor.ts +400 -1
package/src/cli/eval.ts +470 -0
package/src/cli/index.ts +633 -114
package/src/cli/init-from-mcp.ts +545 -41
package/src/cli/install.ts +166 -4
package/src/cli/lint.ts +56 -26
package/src/cli/mcp-proxy.ts +322 -0
package/src/cli/migrate.ts +256 -3
package/src/cli/sync-from-mcp.ts +23 -0
package/src/cli/test.ts +10 -2
package/src/generators/claude-code/index.ts +143 -0
package/src/generators/codex/index.ts +23 -0
package/src/index.ts +12 -1
package/src/mcp/introspect.ts +297 -24
package/src/permissions.ts +3 -1
package/src/validation/platform-rules.ts +121 -0

package/src/cli/agent.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { existsSync } from 'fs'
-import { mkdir } from 'fs/promises'
+import { chmod, copyFile, mkdir, mkdtemp, readFile, rm } from 'fs/promises'
+import { homedir, tmpdir } from 'os'
 import { resolve } from 'path'
 import { spawn } from 'child_process'
 import { loadConfig } from '../config/load'
@@ -37,6 +38,8 @@ const AGENT_RUNNER_BINARIES: Record<AgentRunner, string> = {
   cursor: 'agent',
 }
+const CURSOR_RUNNER_BINARIES = ['agent', 'cursor-agent'] as const
 export interface AgentPreparePlannedFile {
   relativePath: string
   content: string
@@ -95,10 +98,17 @@ export interface AgentRunOptions {
   verify?: boolean
 }
+export interface AgentRunnerModelSummary {
+  value?: string
+  source: 'explicit' | 'default' | 'unknown'
+  display: string
+}
 export interface AgentRunSummary {
   pluginName: string
   kind: AgentPromptKind
   runner: AgentRunner
+  model: AgentRunnerModelSummary
   verify: boolean
   command: string[]
   commandDisplay: string
@@ -112,6 +122,7 @@ export interface AgentRunSummary {
 export interface AgentRunPlan extends AgentRunSummary {
   files: AgentPreparePlannedFile[]
+  prepareOptions?: AgentPrepareOptions
 }
 export interface AgentRunResult extends AgentRunSummary {
@@ -280,10 +291,6 @@ export async function planAgentRun(
     throw new Error('--attach is only supported for the opencode runner.')
   }
-  if (options.runner === 'codex' && options.model) {
-    throw new Error('--model is not yet supported for the codex runner in Pluxx. Use the default Codex CLI model selection for now.')
-  }
   const preparePlan = await planAgentPrepare(rootDir, prepareOptions)
   const promptPlan = await planAgentPrompt(rootDir, kind, { allowMissingContext: true })
   const promptPath = AGENT_PROMPT_PATHS[kind]
@@ -293,11 +300,13 @@ export async function planAgentRun(
     attach: options.attach,
     workspace: rootDir,
   })
+  const model = await resolveAgentRunnerModel(options.runner, options.model)
   return {
     pluginName: preparePlan.pluginName,
     kind,
     runner: options.runner,
+    model,
     verify,
     command,
     commandDisplay: command.map(shellQuote).join(' '),
@@ -307,6 +316,7 @@ export async function planAgentRun(
     updatedFiles: [...preparePlan.updatedFiles, ...promptPlan.updatedFiles],
     contextInputs: preparePlan.contextInputs,
     files: [...preparePlan.files, ...promptPlan.files],
+    prepareOptions,
   }
 }
@@ -317,22 +327,31 @@ export async function runAgentPlan(
     streamOutput?: boolean
   } = {},
 ): Promise<AgentRunResult> {
-  for (const file of plan.files) {
-    const filePath = resolve(rootDir, file.relativePath)
-    const parentDir = file.relativePath.split('/').slice(0, -1).join('/')
-    if (parentDir) {
-      await mkdir(resolve(rootDir, parentDir), { recursive: true })
-    }
-    await Bun.write(filePath, file.content)
-  }
+  const preparePlan = await planAgentPrepare(rootDir, plan.prepareOptions ?? {})
+  const promptPlan = await planAgentPrompt(rootDir, plan.kind, { allowMissingContext: true })
+  await writePlannedFiles(rootDir, [...preparePlan.files, ...promptPlan.files])
+  let createdFiles = [...preparePlan.createdFiles, ...promptPlan.createdFiles]
+  let updatedFiles = [...preparePlan.updatedFiles, ...promptPlan.updatedFiles]
+  let contextInputs = preparePlan.contextInputs
   await ensureRunnerAvailable(plan.runner)
   await ensureRunnerAuthenticated(plan.runner)
-  const runnerExitCode = await executeCommand(plan.command, rootDir, {
-    streamOutput: options.streamOutput === true,
-  })
+  const executionContext = await prepareRunnerExecution(plan.runner)
+  let runnerExitCode: number
+  try {
+    runnerExitCode = await executeCommand(plan.command, rootDir, {
+      streamOutput: options.streamOutput === true,
+      env: executionContext.env,
+    })
+  } finally {
+    await executionContext.cleanup?.()
+  }
   if (runnerExitCode === 0 && plan.kind === 'taxonomy') {
     await applyPersistedTaxonomy(rootDir)
+    const refreshedPack = await refreshAgentPack(rootDir, plan.prepareOptions ?? {})
+    createdFiles = mergeUnique(createdFiles, refreshedPack.createdFiles)
+    updatedFiles = mergeUnique(updatedFiles, refreshedPack.updatedFiles)
+    contextInputs = refreshedPack.contextInputs
   }
   const verification = runnerExitCode === 0 && plan.verify
     ? await runTestSuite({ rootDir })
@@ -340,12 +359,61 @@ export async function runAgentPlan(
   return {
     ...plan,
+    createdFiles,
+    updatedFiles,
+    contextInputs,
     ok: runnerExitCode === 0 && (verification?.ok ?? true),
     runnerExitCode,
     verification,
   }
 }
+async function refreshAgentPack(
+  rootDir: string,
+  prepareOptions: AgentPrepareOptions,
+): Promise<{
+  createdFiles: string[]
+  updatedFiles: string[]
+  contextInputs: string[]
+}> {
+  const preparePlan = await planAgentPrepare(rootDir, prepareOptions)
+  const promptPlans = await Promise.all(
+    AGENT_PROMPT_KINDS.map((kind) => planAgentPrompt(rootDir, kind, { allowMissingContext: true })),
+  )
+  const files = [
+    ...preparePlan.files,
+    ...promptPlans.flatMap((promptPlan) => promptPlan.files),
+  ]
+  await writePlannedFiles(rootDir, files)
+  return {
+    createdFiles: mergeUnique(
+      preparePlan.createdFiles,
+      promptPlans.flatMap((promptPlan) => promptPlan.createdFiles),
+    ),
+    updatedFiles: mergeUnique(
+      preparePlan.updatedFiles,
+      promptPlans.flatMap((promptPlan) => promptPlan.updatedFiles),
+    ),
+    contextInputs: preparePlan.contextInputs,
+  }
+}
+async function writePlannedFiles(rootDir: string, files: AgentPreparePlannedFile[]): Promise<void> {
+  for (const file of files) {
+    const filePath = resolve(rootDir, file.relativePath)
+    const parentDir = file.relativePath.split('/').slice(0, -1).join('/')
+    if (parentDir) {
+      await mkdir(resolve(rootDir, parentDir), { recursive: true })
+    }
+    await Bun.write(filePath, file.content)
+  }
+}
+function mergeUnique(existing: string[], next: string[]): string[] {
+  return [...new Set([...existing, ...next])]
+}
 function buildEditableFiles(metadata: McpScaffoldMetadata): AgentPlanFile[] {
   const files: AgentPlanFile[] = [{
     path: MCP_TAXONOMY_PATH,
@@ -418,6 +486,9 @@ function buildAgentContext(
   const serverEntry = Object.entries(config.mcp ?? {})[0]
   const [serverName, server] = serverEntry ?? ['unknown', undefined]
   const displayName = config.brand?.displayName ?? metadata.settings.displayName ?? config.name
+  const resourceByUri = new Map((metadata.resources ?? []).map((resource) => [resource.uri, resource]))
+  const resourceTemplateByUri = new Map((metadata.resourceTemplates ?? []).map((template) => [template.uriTemplate, template]))
+  const promptByName = new Map((metadata.prompts ?? []).map((prompt) => [prompt.name, prompt]))
   const lines = [
     '# Pluxx Agent Context',
     '',
@@ -435,16 +506,60 @@ function buildAgentContext(
     `- Transport: ${server?.transport ?? metadata.source.transport}`,
     `- Auth: ${describeAuth(server ?? metadata.source)}`,
     `- Tool count: ${metadata.tools.length}`,
+    `- Resource count: ${metadata.resources?.length ?? 0}`,
+    `- Prompt template count: ${metadata.prompts?.length ?? 0}`,
     '',
     '## Generated Skills',
     '',
   ]
   for (const skill of metadata.skills) {
+    const relatedResourceLabels = [
+      ...(skill.resourceUris ?? []).map((uri) => {
+        const resource = resourceByUri.get(uri)
+        return resource ? `\`${resource.name ?? resource.title ?? resource.uri}\`` : null
+      }),
+      ...(skill.resourceTemplateUris ?? []).map((uriTemplate) => {
+        const template = resourceTemplateByUri.get(uriTemplate)
+        return template ? `\`${template.name}\`` : null
+      }),
+    ].filter((label): label is string => Boolean(label))
+    const relatedPromptLabels = (skill.promptNames ?? [])
+      .map((name) => promptByName.get(name)?.name ?? name)
+      .map((name) => `\`${name}\``)
     lines.push(`### \`${skill.dirName}\``)
     lines.push('')
     lines.push(`- Title: ${skill.title}`)
     lines.push(`- Tools: ${skill.toolNames.join(', ') || 'none'}`)
+    if (relatedResourceLabels.length > 0) {
+      lines.push(`- Related resources: ${relatedResourceLabels.join(', ')}`)
+    }
+    if (relatedPromptLabels.length > 0) {
+      lines.push(`- Related prompt templates: ${relatedPromptLabels.join(', ')}`)
+    }
+    lines.push('')
+  }
+  if ((metadata.resources?.length ?? 0) > 0 || (metadata.resourceTemplates?.length ?? 0) > 0 || (metadata.prompts?.length ?? 0) > 0) {
+    lines.push('## MCP Discovery Surfaces')
+    lines.push('')
+    for (const resource of metadata.resources ?? []) {
+      const label = resource.name ?? resource.title ?? resource.uri
+      lines.push(`- Resource \`${label}\`: ${summarizeDiscoveryDescription(resource.description, `URI: ${resource.uri}`)}`)
+    }
+    for (const template of metadata.resourceTemplates ?? []) {
+      lines.push(`- Resource template \`${template.name}\`: ${summarizeDiscoveryDescription(template.description, `URI template: ${template.uriTemplate}`)}`)
+    }
+    for (const prompt of metadata.prompts ?? []) {
+      const args = prompt.arguments?.map((argument) => `\`${argument.name}\`${argument.required ? ' (required)' : ''}`).join(', ')
+      const trailing = args ? `Arguments: ${args}` : undefined
+      lines.push(`- Prompt \`${prompt.name}\`: ${summarizeDiscoveryDescription(prompt.description, trailing)}`)
+    }
     lines.push('')
   }
@@ -504,6 +619,9 @@ function buildAgentContext(
   lines.push('- Examples should be concrete and specific, not generic placeholders.')
   lines.push('- Weak MCP metadata (missing/generic tool descriptions) should be called out explicitly before publishing.')
   lines.push('- The wording should match the MCP product narrative, not just raw tool names.')
+  lines.push('- Use discovered MCP resources and prompt templates when they clarify the real product surface.')
+  lines.push('- Respect the per-skill resource and prompt-template associations in the metadata/context unless stronger discovery evidence shows they are wrong.')
+  lines.push('- Keep INSTRUCTIONS.md as concise routing guidance; do not dump raw vendor documentation into generated sections.')
   lines.push('')
   return `${lines.join('\n')}\n`
@@ -717,18 +835,28 @@ function buildAgentPrompt(
     `- Preserve all custom-note blocks between \`${PLUXX_CUSTOM_START}\` and \`${PLUXX_CUSTOM_END}\`.`,
     '- Do not change auth wiring or target-platform config.',
     '- Do not edit files under `dist/`.',
+    '- Treat discovered MCP resources, resource templates, and prompt templates as part of the product surface when they are present in the context and metadata.',
+    '- Treat per-skill related resources and prompt templates in the context as default evidence for workflow boundaries and examples unless stronger discovery evidence contradicts them.',
     '',
   ]
   if (kind === 'taxonomy') {
-    return `${sharedIntro.join('\n')}Your job:\n1. Treat \`${MCP_TAXONOMY_PATH}\` as the semantic source of truth for skill grouping and naming.\n2. Infer the MCP's real product surfaces and workflows.\n3. Merge, split, or rename generated skills so labels are product-facing, not lexical buckets.\n4. Update the taxonomy file first; Pluxx will re-render generated skills and commands from that taxonomy after the pass.\n5. Keep setup/onboarding, account-admin, and runtime workflows intentionally separated when appropriate.\n6. Eliminate misleading labels such as contact or people discovery when the tools do not actually perform direct lookup.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- each skill represents a real user workflow or product surface\n- skill names are product-shaped and avoid raw MCP tool/server identifiers when possible\n- setup/onboarding, account-admin, and runtime workflows are grouped intentionally\n- singleton skills are avoided unless they represent a real standalone user workflow\n- commands stay aligned with the chosen taxonomy\n`
+    return `${sharedIntro.join('\n')}Your job:\n1. Treat \`${MCP_TAXONOMY_PATH}\` as the semantic source of truth for skill grouping and naming.\n2. Infer the MCP's real product surfaces and workflows from tools, resources, resource templates, and prompt templates.\n3. Merge, split, or rename generated skills so labels are product-facing, not lexical buckets.\n4. Update the taxonomy file first; Pluxx will re-render generated skills and commands from that taxonomy after the pass.\n5. Keep setup/onboarding, account-admin, and runtime workflows intentionally separated when appropriate.\n6. Eliminate misleading labels such as contact or people discovery when the tools do not actually perform direct lookup.\n7. Use per-skill related resources and prompt templates as strong evidence for workflow shape, but correct them when broader discovery evidence shows a mismatch.\n8. Reject stale scaffold assumptions; if current files conflict with discovery context, prefer the discovery evidence and flag the mismatch.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- each skill represents a real user workflow or product surface\n- skill names are product-shaped and avoid raw MCP tool/server identifiers when possible\n- setup/onboarding, account-admin, and runtime workflows are grouped intentionally\n- singleton skills are avoided unless they represent a real standalone user workflow\n- commands stay aligned with the chosen taxonomy and avoid weak command UX\n- per-skill resource and prompt-template associations remain coherent with the chosen taxonomy\n- taxonomy decisions are grounded in current discovery context, not stale scaffold assumptions\n`
   }
   if (kind === 'instructions') {
-    return `${sharedIntro.join('\n')}Your job:\n1. Rewrite only the generated block in \`INSTRUCTIONS.md\`.\n2. Explain what the plugin is for, how the skills should be used, and which setup/admin/account/runtime boundaries matter.\n3. Keep wording aligned to the MCP's product narrative and branded language; avoid raw MCP server/tool identifiers except when technically required.\n4. Prefer the branded product name in user-facing copy; do not lead with internal MCP server identifiers.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- instructions are concise, actionable, and product-shaped\n- wording is branded and product-facing, not raw MCP-internal naming\n- auth/setup/admin caveats are explicit when relevant\n- raw MCP server identifiers are omitted unless operationally necessary\n- the file remains safe for future \`pluxx sync --from-mcp\`\n`
+    return `${sharedIntro.join('\n')}Your job:\n1. Rewrite only the generated block in \`INSTRUCTIONS.md\`.\n2. Explain what the plugin is for, how the skills should be used, and which setup/admin/account/runtime boundaries matter.\n3. Use discovered tools, resources, resource templates, and prompt templates to produce short routing guidance, not a raw documentation dump.\n4. Keep wording aligned to the MCP's product narrative and branded language; avoid raw MCP server/tool identifiers except when technically required.\n5. Prefer the branded product name in user-facing copy; do not lead with internal MCP server identifiers.\n6. Replace stale scaffold claims with current discovery-backed language and keep command examples operational, concrete, and copy-paste runnable.\n7. When a workflow already has related resources or prompt templates in the context, keep the wording and examples aligned to that surfaced workflow evidence.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- instructions are concise, actionable, and product-shaped\n- wording is branded and product-facing, not raw MCP-internal naming\n- auth/setup/admin caveats are explicit when relevant\n- raw MCP server identifiers are omitted unless operationally necessary\n- the generated section reads like routing guidance, not pasted vendor docs\n- command examples use strong command UX (clear intent, realistic args, and runnable shapes)\n- workflow guidance stays coherent with related resource and prompt-template evidence in the context\n- the file remains safe for future \`pluxx sync --from-mcp\`\n`
   }
-  return `${sharedIntro.join('\n')}Your job:\n1. Review the current scaffold critically.\n2. Call out weak skill groupings, missing setup guidance, vague examples, product/category mismatches, or weak MCP metadata signals.\n3. Separate scaffold quality findings from runtime-correctness findings.\n4. Propose only the highest-value changes needed to make the scaffold useful.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- findings are concrete and tied to files\n- scaffold quality gaps are distinguished from runtime correctness\n- suggested changes improve user-facing plugin quality\n- recommendations stay inside Pluxx-managed boundaries\n`
+  return `${sharedIntro.join('\n')}Your job:\n1. Review the current scaffold critically.\n2. Call out weak skill groupings, missing setup guidance, vague examples, product/category mismatches, raw documentation dumps, lexical skill names, stale scaffold assumptions, weak command UX, incoherent per-skill resource/prompt associations, or weak MCP metadata signals.\n3. Separate scaffold quality findings from runtime-correctness findings.\n4. Propose only the highest-value changes needed to make the scaffold useful.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- findings are concrete and tied to files\n- scaffold quality gaps are distinguished from runtime correctness\n- stale assumptions, incoherent per-skill discovery associations, and command-UX weaknesses are identified explicitly when present\n- suggested changes improve user-facing plugin quality\n- recommendations stay inside Pluxx-managed boundaries\n`
+}
+function summarizeDiscoveryDescription(description: string | undefined, trailing?: string): string {
+  const base = description
+    ?.replace(/\s+/g, ' ')
+    .trim()
+    .slice(0, 180)
+  return [base || 'Discovered during MCP introspection.', trailing].filter(Boolean).join(' ')
 }
 function buildAgentRunnerPrompt(kind: AgentPromptKind, promptPath: string): string {
@@ -769,12 +897,27 @@ function buildAgentRunnerCommand(
     if (options.model) {
       args.push('--model', options.model)
     }
-    args.push('--permission-mode', kind === 'review' ? 'plan' : 'acceptEdits', '-p', prompt)
+    args.push(
+      '--no-session-persistence',
+      '--verbose',
+      '--output-format',
+      'stream-json',
+      '--permission-mode',
+      kind === 'review' ? 'plan' : 'acceptEdits',
+      '-p',
+      prompt,
+    )
     return args
   }
   if (runner === 'codex') {
-    const args = [binary, 'exec']
+    // Codex headless edits can finish successfully and then stall during
+    // session persistence/finalization. Ephemeral mode keeps the non-interactive
+    // worker path stable for Pluxx agent/autopilot runs.
+    const args = [binary, 'exec', '--ephemeral', '--skip-git-repo-check']
+    if (options.model) {
+      args.push('--model', options.model)
+    }
     if (kind !== 'review') {
       args.push('--full-auto')
     }
@@ -809,12 +952,123 @@ function buildAgentRunnerCommand(
   return args
 }
+async function resolveAgentRunnerModel(
+  runner: AgentRunner,
+  explicitModel?: string,
+): Promise<AgentRunnerModelSummary> {
+  if (explicitModel) {
+    return {
+      value: explicitModel,
+      source: 'explicit',
+      display: `${explicitModel} (explicit)`,
+    }
+  }
+  const detectedModel = runner === 'codex'
+    ? await readCodexDefaultModel()
+    : runner === 'opencode'
+      ? await readOpenCodeDefaultModel()
+      : runner === 'claude'
+        ? await readClaudeDefaultModel()
+        : undefined
+  if (detectedModel) {
+    return {
+      value: detectedModel,
+      source: 'default',
+      display: `${detectedModel} (local default)`,
+    }
+  }
+  return {
+    source: 'unknown',
+    display: 'local default (CLI-managed)',
+  }
+}
+async function readCodexDefaultModel(): Promise<string | undefined> {
+  const codexHome = process.env.CODEX_HOME?.trim() || resolve(homedir(), '.codex')
+  return await readTomlStringValue(resolve(codexHome, 'config.toml'), 'model')
+}
+async function readOpenCodeDefaultModel(): Promise<string | undefined> {
+  const configHome = process.env.XDG_CONFIG_HOME?.trim() || resolve(homedir(), '.config')
+  const configPath = resolve(configHome, 'opencode', 'opencode.json')
+  const parsed = await readJsonFile(configPath)
+  if (!parsed || typeof parsed !== 'object') {
+    return undefined
+  }
+  if (typeof parsed.model === 'string' && parsed.model.trim()) {
+    return parsed.model.trim()
+  }
+  if (
+    typeof parsed.default_agent === 'string'
+    && parsed.agent
+    && typeof parsed.agent === 'object'
+    && parsed.default_agent in parsed.agent
+  ) {
+    const defaultAgent = parsed.agent[parsed.default_agent]
+    if (
+      defaultAgent
+      && typeof defaultAgent === 'object'
+      && 'model' in defaultAgent
+      && typeof defaultAgent.model === 'string'
+      && defaultAgent.model.trim()
+    ) {
+      return defaultAgent.model.trim()
+    }
+  }
+  return undefined
+}
+async function readClaudeDefaultModel(): Promise<string | undefined> {
+  for (const candidate of [
+    resolve(homedir(), '.claude', 'settings.json'),
+    resolve(homedir(), '.claude', 'settings.local.json'),
+    resolve(homedir(), '.claude.json'),
+  ]) {
+    const parsed = await readJsonFile(candidate)
+    if (!parsed || typeof parsed !== 'object') continue
+    for (const key of ['model', 'defaultModel', 'default_model']) {
+      if (key in parsed && typeof parsed[key] === 'string' && parsed[key].trim()) {
+        return parsed[key].trim()
+      }
+    }
+  }
+  return undefined
+}
+async function readTomlStringValue(filePath: string, key: string): Promise<string | undefined> {
+  try {
+    const raw = await readFile(filePath, 'utf8')
+    const match = raw.match(new RegExp(`^\\s*${key}\\s*=\\s*"([^"]+)"\\s*$`, 'm'))
+    return match?.[1]?.trim() || undefined
+  } catch {
+    return undefined
+  }
+}
+async function readJsonFile(filePath: string): Promise<Record<string, any> | undefined> {
+  try {
+    const raw = await readFile(filePath, 'utf8')
+    return JSON.parse(raw) as Record<string, any>
+  } catch {
+    return undefined
+  }
+}
 async function ensureRunnerAvailable(runner: AgentRunner): Promise<void> {
-  const binary = AGENT_RUNNER_BINARIES[runner]
-  const available = await commandExists(binary)
+  const binary = runner === 'cursor'
+    ? await resolveCursorBinary()
+    : AGENT_RUNNER_BINARIES[runner]
+  const available = binary ? await commandExists(binary) : false
   if (!available) {
     if (runner === 'cursor') {
-      throw new Error('The cursor runner requires the Cursor CLI `agent` binary on PATH. Install it with `curl https://cursor.com/install -fsS | bash` or choose a different runner.')
+      throw new Error('The cursor runner requires the Cursor CLI `agent` or `cursor-agent` binary on PATH. Install it with `curl https://cursor.com/install -fsS | bash` or choose a different runner.')
     }
     throw new Error(`The ${runner} runner is not available on PATH. Install \`${binary}\` or choose a different runner.`)
   }
@@ -827,12 +1081,23 @@ async function ensureRunnerAuthenticated(runner: AgentRunner): Promise<void> {
     return
   }
-  const isAuthenticated = await commandSucceeds(['agent', 'status'])
+  const binary = await resolveCursorBinary()
+  const isAuthenticated = binary ? await commandSucceeds([binary, 'status']) : false
   if (!isAuthenticated) {
-    throw new Error('Cursor CLI authentication is required. Run `agent login` (browser auth) or export `CURSOR_API_KEY` before running Pluxx with `--runner cursor`.')
+    throw new Error('Cursor CLI authentication is required. Run `agent login` (or `cursor-agent login`) or export `CURSOR_API_KEY` before running Pluxx with `--runner cursor`.')
   }
 }
+async function resolveCursorBinary(): Promise<string | undefined> {
+  for (const candidate of CURSOR_RUNNER_BINARIES) {
+    if (await commandExists(candidate)) {
+      return candidate
+    }
+  }
+  return undefined
+}
 async function commandExists(binary: string): Promise<boolean> {
   return await new Promise<boolean>((resolvePromise) => {
     const child = spawn('sh', ['-c', `command -v ${shellQuote(binary)} >/dev/null 2>&1`], {
@@ -860,25 +1125,185 @@ async function executeCommand(
   cwd: string,
   options: {
     streamOutput?: boolean
+    env?: NodeJS.ProcessEnv
   } = {},
 ): Promise<number> {
+  const runtimeCommand = [...command]
+  let codexOutputDir: string | null = null
+  let codexLastMessagePath: string | null = null
+  const isClaudeStreamJson = runtimeCommand[0] === 'claude'
+    && runtimeCommand.includes('--output-format')
+    && runtimeCommand.includes('stream-json')
+  if (runtimeCommand[0] === 'codex' && runtimeCommand[1] === 'exec') {
+    codexOutputDir = await mkdtemp(resolve(tmpdir(), 'pluxx-codex-output-'))
+    codexLastMessagePath = resolve(codexOutputDir, 'last-message.txt')
+    runtimeCommand.splice(2, 0, '--json', '--output-last-message', codexLastMessagePath)
+  }
   return await new Promise<number>((resolvePromise, reject) => {
-    const child = spawn(command[0], command.slice(1), {
+    const child = spawn(runtimeCommand[0], runtimeCommand.slice(1), {
       cwd,
       stdio: ['ignore', 'pipe', 'pipe'],
-      env: process.env,
+      env: options.env ?? process.env,
     })
-    if (options.streamOutput) {
-      child.stdout?.on('data', (chunk) => process.stdout.write(chunk))
-      child.stderr?.on('data', (chunk) => process.stderr.write(chunk))
+    let killedAfterFinalMessage = false
+    let sawFinalMessageAt: number | null = null
+    let codexStdoutBuffer = ''
+    let codexTurnCompleted = false
+    let codexTurnFailed = false
+    let claudeStdoutBuffer = ''
+    let claudeTurnCompleted = false
+    let claudeTurnFailed = false
+    const sentinelInterval = (codexLastMessagePath || isClaudeStreamJson)
+      ? setInterval(() => {
+        const sawCompletionSignal = codexTurnCompleted
+          || codexTurnFailed
+          || claudeTurnCompleted
+          || claudeTurnFailed
+          || (codexLastMessagePath ? existsSync(codexLastMessagePath) : false)
+        if (!sawCompletionSignal) return
+        if (sawFinalMessageAt == null) {
+          sawFinalMessageAt = Date.now()
+          return
+        }
+        if (!killedAfterFinalMessage && Date.now() - sawFinalMessageAt >= 1500) {
+          killedAfterFinalMessage = true
+          child.kill('SIGTERM')
+        }
+      }, 250)
+      : null
+    const finalize = async (result: number, error?: Error): Promise<void> => {
+      if (sentinelInterval) clearInterval(sentinelInterval)
+      if (codexOutputDir) {
+        await rm(codexOutputDir, { recursive: true, force: true })
+      }
+      if (error) {
+        reject(error)
+        return
+      }
+      resolvePromise(result)
     }
-    child.on('error', (error) => reject(error))
-    child.on('close', (code) => resolvePromise(code ?? 1))
+    child.stdout?.on('data', (chunk) => {
+      const text = chunk.toString()
+      if (codexLastMessagePath || isClaudeStreamJson) {
+        const buffer = codexLastMessagePath ? codexStdoutBuffer + text : claudeStdoutBuffer + text
+        const lines = buffer.split('\n')
+        const remainder = lines.pop() ?? ''
+        if (codexLastMessagePath) {
+          codexStdoutBuffer = remainder
+        } else {
+          claudeStdoutBuffer = remainder
+        }
+        for (const line of lines) {
+          const trimmed = line.trim()
+          if (!trimmed) continue
+          try {
+            const event = JSON.parse(trimmed) as { type?: string; subtype?: string; is_error?: boolean }
+            if (codexLastMessagePath) {
+              if (event.type === 'turn.completed') {
+                codexTurnCompleted = true
+              } else if (event.type === 'turn.failed' || event.type === 'error') {
+                codexTurnFailed = true
+              }
+            } else if (isClaudeStreamJson) {
+              if (event.type === 'result') {
+                if (event.is_error || event.subtype === 'error') {
+                  claudeTurnFailed = true
+                } else {
+                  claudeTurnCompleted = true
+                }
+              }
+            }
+          } catch {
+            // Ignore non-JSON lines. Codex still writes some human-readable output to stderr.
+          }
+        }
+      }
+      if (options.streamOutput) process.stdout.write(chunk)
+    })
+    child.stderr?.on('data', (chunk) => {
+      if (options.streamOutput) process.stderr.write(chunk)
+    })
+    child.on('error', (error) => {
+      void finalize(1, error)
+    })
+    child.on('close', (code) => {
+      const result = codexTurnFailed || claudeTurnFailed
+        ? 1
+        : (killedAfterFinalMessage || codexTurnCompleted || claudeTurnCompleted ? 0 : (code ?? 1))
+      void finalize(result)
+    })
   })
 }
+async function prepareRunnerExecution(runner: AgentRunner): Promise<{
+  env: NodeJS.ProcessEnv
+  cleanup?: () => Promise<void>
+}> {
+  if (runner === 'cursor') {
+    const cursorBinary = await resolveCursorBinary()
+    if (!cursorBinary || cursorBinary === AGENT_RUNNER_BINARIES.cursor) {
+      return { env: process.env }
+    }
+    const shimDir = await mkdtemp(resolve(tmpdir(), 'pluxx-cursor-bin-'))
+    const shimPath = resolve(shimDir, AGENT_RUNNER_BINARIES.cursor)
+    await Bun.write(
+      shimPath,
+      `#!/bin/sh\nexec ${shellQuote(cursorBinary)} "$@"\n`,
+    )
+    await chmod(shimPath, 0o755)
+    return {
+      env: {
+        ...process.env,
+        PATH: `${shimDir}:${process.env.PATH ?? ''}`,
+      },
+      cleanup: async () => {
+        await rm(shimDir, { recursive: true, force: true })
+      },
+    }
+  }
+  if (runner !== 'codex') {
+    return { env: process.env }
+  }
+  const currentCodexHome = process.env.CODEX_HOME?.trim() || resolve(homedir(), '.codex')
+  const isolatedCodexHome = await mkdtemp(resolve(tmpdir(), 'pluxx-codex-home-'))
+  await mkdir(resolve(isolatedCodexHome, 'memories'), { recursive: true })
+  for (const relativePath of ['auth.json', 'config.toml', 'hooks.json', 'installation_id']) {
+    const sourcePath = resolve(currentCodexHome, relativePath)
+    if (!existsSync(sourcePath)) continue
+    await copyFile(sourcePath, resolve(isolatedCodexHome, relativePath))
+  }
+  const rulesSourceDir = resolve(currentCodexHome, 'rules')
+  if (existsSync(rulesSourceDir)) {
+    const rulesTargetDir = resolve(isolatedCodexHome, 'rules')
+    await mkdir(rulesTargetDir, { recursive: true })
+    const defaultRulesPath = resolve(rulesSourceDir, 'default.rules')
+    if (existsSync(defaultRulesPath)) {
+      await copyFile(defaultRulesPath, resolve(rulesTargetDir, 'default.rules'))
+    }
+  }
+  return {
+    env: {
+      ...process.env,
+      CODEX_HOME: isolatedCodexHome,
+    },
+    cleanup: async () => {
+      await rm(isolatedCodexHome, { recursive: true, force: true })
+    },
+  }
+}
 function shellQuote(value: string): string {
   if (/^[A-Za-z0-9_/:=.,-]+$/.test(value)) {
     return value