@orchid-labs/pluxx 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +100 -522
  2. package/dist/cli/agent.d.ts +7 -0
  3. package/dist/cli/agent.d.ts.map +1 -1
  4. package/dist/cli/doctor.d.ts +1 -0
  5. package/dist/cli/doctor.d.ts.map +1 -1
  6. package/dist/cli/eval.d.ts +22 -0
  7. package/dist/cli/eval.d.ts.map +1 -0
  8. package/dist/cli/index.d.ts +19 -2
  9. package/dist/cli/index.d.ts.map +1 -1
  10. package/dist/cli/init-from-mcp.d.ts +17 -2
  11. package/dist/cli/init-from-mcp.d.ts.map +1 -1
  12. package/dist/cli/install.d.ts +2 -0
  13. package/dist/cli/install.d.ts.map +1 -1
  14. package/dist/cli/lint.d.ts +5 -1
  15. package/dist/cli/lint.d.ts.map +1 -1
  16. package/dist/cli/mcp-proxy.d.ts +10 -0
  17. package/dist/cli/mcp-proxy.d.ts.map +1 -0
  18. package/dist/cli/migrate.d.ts.map +1 -1
  19. package/dist/cli/sync-from-mcp.d.ts.map +1 -1
  20. package/dist/cli/test.d.ts +2 -0
  21. package/dist/cli/test.d.ts.map +1 -1
  22. package/dist/generators/claude-code/index.d.ts +2 -0
  23. package/dist/generators/claude-code/index.d.ts.map +1 -1
  24. package/dist/generators/codex/index.d.ts +1 -0
  25. package/dist/generators/codex/index.d.ts.map +1 -1
  26. package/dist/index.d.ts +1 -1
  27. package/dist/index.d.ts.map +1 -1
  28. package/dist/index.js +99 -1
  29. package/dist/mcp/introspect.d.ts +43 -1
  30. package/dist/mcp/introspect.d.ts.map +1 -1
  31. package/dist/permissions.d.ts.map +1 -1
  32. package/dist/validation/platform-rules.d.ts +20 -0
  33. package/dist/validation/platform-rules.d.ts.map +1 -1
  34. package/package.json +2 -2
  35. package/src/cli/agent.ts +459 -34
  36. package/src/cli/doctor.ts +400 -1
  37. package/src/cli/eval.ts +470 -0
  38. package/src/cli/index.ts +633 -114
  39. package/src/cli/init-from-mcp.ts +545 -41
  40. package/src/cli/install.ts +166 -4
  41. package/src/cli/lint.ts +56 -26
  42. package/src/cli/mcp-proxy.ts +322 -0
  43. package/src/cli/migrate.ts +256 -3
  44. package/src/cli/sync-from-mcp.ts +23 -0
  45. package/src/cli/test.ts +10 -2
  46. package/src/generators/claude-code/index.ts +143 -0
  47. package/src/generators/codex/index.ts +23 -0
  48. package/src/index.ts +12 -1
  49. package/src/mcp/introspect.ts +297 -24
  50. package/src/permissions.ts +3 -1
  51. package/src/validation/platform-rules.ts +121 -0
package/src/cli/agent.ts CHANGED
@@ -1,5 +1,6 @@
1
1
  import { existsSync } from 'fs'
2
- import { mkdir } from 'fs/promises'
2
+ import { chmod, copyFile, mkdir, mkdtemp, readFile, rm } from 'fs/promises'
3
+ import { homedir, tmpdir } from 'os'
3
4
  import { resolve } from 'path'
4
5
  import { spawn } from 'child_process'
5
6
  import { loadConfig } from '../config/load'
@@ -37,6 +38,8 @@ const AGENT_RUNNER_BINARIES: Record<AgentRunner, string> = {
37
38
  cursor: 'agent',
38
39
  }
39
40
 
41
+ const CURSOR_RUNNER_BINARIES = ['agent', 'cursor-agent'] as const
42
+
40
43
  export interface AgentPreparePlannedFile {
41
44
  relativePath: string
42
45
  content: string
@@ -95,10 +98,17 @@ export interface AgentRunOptions {
95
98
  verify?: boolean
96
99
  }
97
100
 
101
+ export interface AgentRunnerModelSummary {
102
+ value?: string
103
+ source: 'explicit' | 'default' | 'unknown'
104
+ display: string
105
+ }
106
+
98
107
  export interface AgentRunSummary {
99
108
  pluginName: string
100
109
  kind: AgentPromptKind
101
110
  runner: AgentRunner
111
+ model: AgentRunnerModelSummary
102
112
  verify: boolean
103
113
  command: string[]
104
114
  commandDisplay: string
@@ -112,6 +122,7 @@ export interface AgentRunSummary {
112
122
 
113
123
  export interface AgentRunPlan extends AgentRunSummary {
114
124
  files: AgentPreparePlannedFile[]
125
+ prepareOptions?: AgentPrepareOptions
115
126
  }
116
127
 
117
128
  export interface AgentRunResult extends AgentRunSummary {
@@ -280,10 +291,6 @@ export async function planAgentRun(
280
291
  throw new Error('--attach is only supported for the opencode runner.')
281
292
  }
282
293
 
283
- if (options.runner === 'codex' && options.model) {
284
- throw new Error('--model is not yet supported for the codex runner in Pluxx. Use the default Codex CLI model selection for now.')
285
- }
286
-
287
294
  const preparePlan = await planAgentPrepare(rootDir, prepareOptions)
288
295
  const promptPlan = await planAgentPrompt(rootDir, kind, { allowMissingContext: true })
289
296
  const promptPath = AGENT_PROMPT_PATHS[kind]
@@ -293,11 +300,13 @@ export async function planAgentRun(
293
300
  attach: options.attach,
294
301
  workspace: rootDir,
295
302
  })
303
+ const model = await resolveAgentRunnerModel(options.runner, options.model)
296
304
 
297
305
  return {
298
306
  pluginName: preparePlan.pluginName,
299
307
  kind,
300
308
  runner: options.runner,
309
+ model,
301
310
  verify,
302
311
  command,
303
312
  commandDisplay: command.map(shellQuote).join(' '),
@@ -307,6 +316,7 @@ export async function planAgentRun(
307
316
  updatedFiles: [...preparePlan.updatedFiles, ...promptPlan.updatedFiles],
308
317
  contextInputs: preparePlan.contextInputs,
309
318
  files: [...preparePlan.files, ...promptPlan.files],
319
+ prepareOptions,
310
320
  }
311
321
  }
312
322
 
@@ -317,22 +327,31 @@ export async function runAgentPlan(
317
327
  streamOutput?: boolean
318
328
  } = {},
319
329
  ): Promise<AgentRunResult> {
320
- for (const file of plan.files) {
321
- const filePath = resolve(rootDir, file.relativePath)
322
- const parentDir = file.relativePath.split('/').slice(0, -1).join('/')
323
- if (parentDir) {
324
- await mkdir(resolve(rootDir, parentDir), { recursive: true })
325
- }
326
- await Bun.write(filePath, file.content)
327
- }
330
+ const preparePlan = await planAgentPrepare(rootDir, plan.prepareOptions ?? {})
331
+ const promptPlan = await planAgentPrompt(rootDir, plan.kind, { allowMissingContext: true })
332
+ await writePlannedFiles(rootDir, [...preparePlan.files, ...promptPlan.files])
333
+ let createdFiles = [...preparePlan.createdFiles, ...promptPlan.createdFiles]
334
+ let updatedFiles = [...preparePlan.updatedFiles, ...promptPlan.updatedFiles]
335
+ let contextInputs = preparePlan.contextInputs
328
336
 
329
337
  await ensureRunnerAvailable(plan.runner)
330
338
  await ensureRunnerAuthenticated(plan.runner)
331
- const runnerExitCode = await executeCommand(plan.command, rootDir, {
332
- streamOutput: options.streamOutput === true,
333
- })
339
+ const executionContext = await prepareRunnerExecution(plan.runner)
340
+ let runnerExitCode: number
341
+ try {
342
+ runnerExitCode = await executeCommand(plan.command, rootDir, {
343
+ streamOutput: options.streamOutput === true,
344
+ env: executionContext.env,
345
+ })
346
+ } finally {
347
+ await executionContext.cleanup?.()
348
+ }
334
349
  if (runnerExitCode === 0 && plan.kind === 'taxonomy') {
335
350
  await applyPersistedTaxonomy(rootDir)
351
+ const refreshedPack = await refreshAgentPack(rootDir, plan.prepareOptions ?? {})
352
+ createdFiles = mergeUnique(createdFiles, refreshedPack.createdFiles)
353
+ updatedFiles = mergeUnique(updatedFiles, refreshedPack.updatedFiles)
354
+ contextInputs = refreshedPack.contextInputs
336
355
  }
337
356
  const verification = runnerExitCode === 0 && plan.verify
338
357
  ? await runTestSuite({ rootDir })
@@ -340,12 +359,61 @@ export async function runAgentPlan(
340
359
 
341
360
  return {
342
361
  ...plan,
362
+ createdFiles,
363
+ updatedFiles,
364
+ contextInputs,
343
365
  ok: runnerExitCode === 0 && (verification?.ok ?? true),
344
366
  runnerExitCode,
345
367
  verification,
346
368
  }
347
369
  }
348
370
 
371
+ async function refreshAgentPack(
372
+ rootDir: string,
373
+ prepareOptions: AgentPrepareOptions,
374
+ ): Promise<{
375
+ createdFiles: string[]
376
+ updatedFiles: string[]
377
+ contextInputs: string[]
378
+ }> {
379
+ const preparePlan = await planAgentPrepare(rootDir, prepareOptions)
380
+ const promptPlans = await Promise.all(
381
+ AGENT_PROMPT_KINDS.map((kind) => planAgentPrompt(rootDir, kind, { allowMissingContext: true })),
382
+ )
383
+ const files = [
384
+ ...preparePlan.files,
385
+ ...promptPlans.flatMap((promptPlan) => promptPlan.files),
386
+ ]
387
+ await writePlannedFiles(rootDir, files)
388
+
389
+ return {
390
+ createdFiles: mergeUnique(
391
+ preparePlan.createdFiles,
392
+ promptPlans.flatMap((promptPlan) => promptPlan.createdFiles),
393
+ ),
394
+ updatedFiles: mergeUnique(
395
+ preparePlan.updatedFiles,
396
+ promptPlans.flatMap((promptPlan) => promptPlan.updatedFiles),
397
+ ),
398
+ contextInputs: preparePlan.contextInputs,
399
+ }
400
+ }
401
+
402
+ async function writePlannedFiles(rootDir: string, files: AgentPreparePlannedFile[]): Promise<void> {
403
+ for (const file of files) {
404
+ const filePath = resolve(rootDir, file.relativePath)
405
+ const parentDir = file.relativePath.split('/').slice(0, -1).join('/')
406
+ if (parentDir) {
407
+ await mkdir(resolve(rootDir, parentDir), { recursive: true })
408
+ }
409
+ await Bun.write(filePath, file.content)
410
+ }
411
+ }
412
+
413
+ function mergeUnique(existing: string[], next: string[]): string[] {
414
+ return [...new Set([...existing, ...next])]
415
+ }
416
+
349
417
  function buildEditableFiles(metadata: McpScaffoldMetadata): AgentPlanFile[] {
350
418
  const files: AgentPlanFile[] = [{
351
419
  path: MCP_TAXONOMY_PATH,
@@ -418,6 +486,9 @@ function buildAgentContext(
418
486
  const serverEntry = Object.entries(config.mcp ?? {})[0]
419
487
  const [serverName, server] = serverEntry ?? ['unknown', undefined]
420
488
  const displayName = config.brand?.displayName ?? metadata.settings.displayName ?? config.name
489
+ const resourceByUri = new Map((metadata.resources ?? []).map((resource) => [resource.uri, resource]))
490
+ const resourceTemplateByUri = new Map((metadata.resourceTemplates ?? []).map((template) => [template.uriTemplate, template]))
491
+ const promptByName = new Map((metadata.prompts ?? []).map((prompt) => [prompt.name, prompt]))
421
492
  const lines = [
422
493
  '# Pluxx Agent Context',
423
494
  '',
@@ -435,16 +506,60 @@ function buildAgentContext(
435
506
  `- Transport: ${server?.transport ?? metadata.source.transport}`,
436
507
  `- Auth: ${describeAuth(server ?? metadata.source)}`,
437
508
  `- Tool count: ${metadata.tools.length}`,
509
+ `- Resource count: ${metadata.resources?.length ?? 0}`,
510
+ `- Prompt template count: ${metadata.prompts?.length ?? 0}`,
438
511
  '',
439
512
  '## Generated Skills',
440
513
  '',
441
514
  ]
442
515
 
443
516
  for (const skill of metadata.skills) {
517
+ const relatedResourceLabels = [
518
+ ...(skill.resourceUris ?? []).map((uri) => {
519
+ const resource = resourceByUri.get(uri)
520
+ return resource ? `\`${resource.name ?? resource.title ?? resource.uri}\`` : null
521
+ }),
522
+ ...(skill.resourceTemplateUris ?? []).map((uriTemplate) => {
523
+ const template = resourceTemplateByUri.get(uriTemplate)
524
+ return template ? `\`${template.name}\`` : null
525
+ }),
526
+ ].filter((label): label is string => Boolean(label))
527
+ const relatedPromptLabels = (skill.promptNames ?? [])
528
+ .map((name) => promptByName.get(name)?.name ?? name)
529
+ .map((name) => `\`${name}\``)
530
+
444
531
  lines.push(`### \`${skill.dirName}\``)
445
532
  lines.push('')
446
533
  lines.push(`- Title: ${skill.title}`)
447
534
  lines.push(`- Tools: ${skill.toolNames.join(', ') || 'none'}`)
535
+ if (relatedResourceLabels.length > 0) {
536
+ lines.push(`- Related resources: ${relatedResourceLabels.join(', ')}`)
537
+ }
538
+ if (relatedPromptLabels.length > 0) {
539
+ lines.push(`- Related prompt templates: ${relatedPromptLabels.join(', ')}`)
540
+ }
541
+ lines.push('')
542
+ }
543
+
544
+ if ((metadata.resources?.length ?? 0) > 0 || (metadata.resourceTemplates?.length ?? 0) > 0 || (metadata.prompts?.length ?? 0) > 0) {
545
+ lines.push('## MCP Discovery Surfaces')
546
+ lines.push('')
547
+
548
+ for (const resource of metadata.resources ?? []) {
549
+ const label = resource.name ?? resource.title ?? resource.uri
550
+ lines.push(`- Resource \`${label}\`: ${summarizeDiscoveryDescription(resource.description, `URI: ${resource.uri}`)}`)
551
+ }
552
+
553
+ for (const template of metadata.resourceTemplates ?? []) {
554
+ lines.push(`- Resource template \`${template.name}\`: ${summarizeDiscoveryDescription(template.description, `URI template: ${template.uriTemplate}`)}`)
555
+ }
556
+
557
+ for (const prompt of metadata.prompts ?? []) {
558
+ const args = prompt.arguments?.map((argument) => `\`${argument.name}\`${argument.required ? ' (required)' : ''}`).join(', ')
559
+ const trailing = args ? `Arguments: ${args}` : undefined
560
+ lines.push(`- Prompt \`${prompt.name}\`: ${summarizeDiscoveryDescription(prompt.description, trailing)}`)
561
+ }
562
+
448
563
  lines.push('')
449
564
  }
450
565
 
@@ -504,6 +619,9 @@ function buildAgentContext(
504
619
  lines.push('- Examples should be concrete and specific, not generic placeholders.')
505
620
  lines.push('- Weak MCP metadata (missing/generic tool descriptions) should be called out explicitly before publishing.')
506
621
  lines.push('- The wording should match the MCP product narrative, not just raw tool names.')
622
+ lines.push('- Use discovered MCP resources and prompt templates when they clarify the real product surface.')
623
+ lines.push('- Respect the per-skill resource and prompt-template associations in the metadata/context unless stronger discovery evidence shows they are wrong.')
624
+ lines.push('- Keep INSTRUCTIONS.md as concise routing guidance; do not dump raw vendor documentation into generated sections.')
507
625
  lines.push('')
508
626
 
509
627
  return `${lines.join('\n')}\n`
@@ -717,18 +835,28 @@ function buildAgentPrompt(
717
835
  `- Preserve all custom-note blocks between \`${PLUXX_CUSTOM_START}\` and \`${PLUXX_CUSTOM_END}\`.`,
718
836
  '- Do not change auth wiring or target-platform config.',
719
837
  '- Do not edit files under `dist/`.',
838
+ '- Treat discovered MCP resources, resource templates, and prompt templates as part of the product surface when they are present in the context and metadata.',
839
+ '- Treat per-skill related resources and prompt templates in the context as default evidence for workflow boundaries and examples unless stronger discovery evidence contradicts them.',
720
840
  '',
721
841
  ]
722
842
 
723
843
  if (kind === 'taxonomy') {
724
- return `${sharedIntro.join('\n')}Your job:\n1. Treat \`${MCP_TAXONOMY_PATH}\` as the semantic source of truth for skill grouping and naming.\n2. Infer the MCP's real product surfaces and workflows.\n3. Merge, split, or rename generated skills so labels are product-facing, not lexical buckets.\n4. Update the taxonomy file first; Pluxx will re-render generated skills and commands from that taxonomy after the pass.\n5. Keep setup/onboarding, account-admin, and runtime workflows intentionally separated when appropriate.\n6. Eliminate misleading labels such as contact or people discovery when the tools do not actually perform direct lookup.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- each skill represents a real user workflow or product surface\n- skill names are product-shaped and avoid raw MCP tool/server identifiers when possible\n- setup/onboarding, account-admin, and runtime workflows are grouped intentionally\n- singleton skills are avoided unless they represent a real standalone user workflow\n- commands stay aligned with the chosen taxonomy\n`
844
+ return `${sharedIntro.join('\n')}Your job:\n1. Treat \`${MCP_TAXONOMY_PATH}\` as the semantic source of truth for skill grouping and naming.\n2. Infer the MCP's real product surfaces and workflows from tools, resources, resource templates, and prompt templates.\n3. Merge, split, or rename generated skills so labels are product-facing, not lexical buckets.\n4. Update the taxonomy file first; Pluxx will re-render generated skills and commands from that taxonomy after the pass.\n5. Keep setup/onboarding, account-admin, and runtime workflows intentionally separated when appropriate.\n6. Eliminate misleading labels such as contact or people discovery when the tools do not actually perform direct lookup.\n7. Use per-skill related resources and prompt templates as strong evidence for workflow shape, but correct them when broader discovery evidence shows a mismatch.\n8. Reject stale scaffold assumptions; if current files conflict with discovery context, prefer the discovery evidence and flag the mismatch.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- each skill represents a real user workflow or product surface\n- skill names are product-shaped and avoid raw MCP tool/server identifiers when possible\n- setup/onboarding, account-admin, and runtime workflows are grouped intentionally\n- singleton skills are avoided unless they represent a real standalone user workflow\n- commands stay aligned with the chosen taxonomy and avoid weak command UX\n- per-skill resource and prompt-template associations remain coherent with the chosen taxonomy\n- taxonomy decisions are grounded in current discovery context, not stale scaffold assumptions\n`
725
845
  }
726
846
 
727
847
  if (kind === 'instructions') {
728
- return `${sharedIntro.join('\n')}Your job:\n1. Rewrite only the generated block in \`INSTRUCTIONS.md\`.\n2. Explain what the plugin is for, how the skills should be used, and which setup/admin/account/runtime boundaries matter.\n3. Keep wording aligned to the MCP's product narrative and branded language; avoid raw MCP server/tool identifiers except when technically required.\n4. Prefer the branded product name in user-facing copy; do not lead with internal MCP server identifiers.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- instructions are concise, actionable, and product-shaped\n- wording is branded and product-facing, not raw MCP-internal naming\n- auth/setup/admin caveats are explicit when relevant\n- raw MCP server identifiers are omitted unless operationally necessary\n- the file remains safe for future \`pluxx sync --from-mcp\`\n`
848
+ return `${sharedIntro.join('\n')}Your job:\n1. Rewrite only the generated block in \`INSTRUCTIONS.md\`.\n2. Explain what the plugin is for, how the skills should be used, and which setup/admin/account/runtime boundaries matter.\n3. Use discovered tools, resources, resource templates, and prompt templates to produce short routing guidance, not a raw documentation dump.\n4. Keep wording aligned to the MCP's product narrative and branded language; avoid raw MCP server/tool identifiers except when technically required.\n5. Prefer the branded product name in user-facing copy; do not lead with internal MCP server identifiers.\n6. Replace stale scaffold claims with current discovery-backed language and keep command examples operational, concrete, and copy-paste runnable.\n7. When a workflow already has related resources or prompt templates in the context, keep the wording and examples aligned to that surfaced workflow evidence.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- instructions are concise, actionable, and product-shaped\n- wording is branded and product-facing, not raw MCP-internal naming\n- auth/setup/admin caveats are explicit when relevant\n- raw MCP server identifiers are omitted unless operationally necessary\n- the generated section reads like routing guidance, not pasted vendor docs\n- command examples use strong command UX (clear intent, realistic args, and runnable shapes)\n- workflow guidance stays coherent with related resource and prompt-template evidence in the context\n- the file remains safe for future \`pluxx sync --from-mcp\`\n`
729
849
  }
730
850
 
731
- return `${sharedIntro.join('\n')}Your job:\n1. Review the current scaffold critically.\n2. Call out weak skill groupings, missing setup guidance, vague examples, product/category mismatches, or weak MCP metadata signals.\n3. Separate scaffold quality findings from runtime-correctness findings.\n4. Propose only the highest-value changes needed to make the scaffold useful.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- findings are concrete and tied to files\n- scaffold quality gaps are distinguished from runtime correctness\n- suggested changes improve user-facing plugin quality\n- recommendations stay inside Pluxx-managed boundaries\n`
851
+ return `${sharedIntro.join('\n')}Your job:\n1. Review the current scaffold critically.\n2. Call out weak skill groupings, missing setup guidance, vague examples, product/category mismatches, raw documentation dumps, lexical skill names, stale scaffold assumptions, weak command UX, incoherent per-skill resource/prompt associations, or weak MCP metadata signals.\n3. Separate scaffold quality findings from runtime-correctness findings.\n4. Propose only the highest-value changes needed to make the scaffold useful.\n${buildPromptOverrideBlock(kind, input.overrides)}\nSuccess criteria:\n- findings are concrete and tied to files\n- scaffold quality gaps are distinguished from runtime correctness\n- stale assumptions, incoherent per-skill discovery associations, and command-UX weaknesses are identified explicitly when present\n- suggested changes improve user-facing plugin quality\n- recommendations stay inside Pluxx-managed boundaries\n`
852
+ }
853
+
854
+ function summarizeDiscoveryDescription(description: string | undefined, trailing?: string): string {
855
+ const base = description
856
+ ?.replace(/\s+/g, ' ')
857
+ .trim()
858
+ .slice(0, 180)
859
+ return [base || 'Discovered during MCP introspection.', trailing].filter(Boolean).join(' ')
732
860
  }
733
861
 
734
862
  function buildAgentRunnerPrompt(kind: AgentPromptKind, promptPath: string): string {
@@ -769,12 +897,27 @@ function buildAgentRunnerCommand(
769
897
  if (options.model) {
770
898
  args.push('--model', options.model)
771
899
  }
772
- args.push('--permission-mode', kind === 'review' ? 'plan' : 'acceptEdits', '-p', prompt)
900
+ args.push(
901
+ '--no-session-persistence',
902
+ '--verbose',
903
+ '--output-format',
904
+ 'stream-json',
905
+ '--permission-mode',
906
+ kind === 'review' ? 'plan' : 'acceptEdits',
907
+ '-p',
908
+ prompt,
909
+ )
773
910
  return args
774
911
  }
775
912
 
776
913
  if (runner === 'codex') {
777
- const args = [binary, 'exec']
914
+ // Codex headless edits can finish successfully and then stall during
915
+ // session persistence/finalization. Ephemeral mode keeps the non-interactive
916
+ // worker path stable for Pluxx agent/autopilot runs.
917
+ const args = [binary, 'exec', '--ephemeral', '--skip-git-repo-check']
918
+ if (options.model) {
919
+ args.push('--model', options.model)
920
+ }
778
921
  if (kind !== 'review') {
779
922
  args.push('--full-auto')
780
923
  }
@@ -809,12 +952,123 @@ function buildAgentRunnerCommand(
809
952
  return args
810
953
  }
811
954
 
955
+ async function resolveAgentRunnerModel(
956
+ runner: AgentRunner,
957
+ explicitModel?: string,
958
+ ): Promise<AgentRunnerModelSummary> {
959
+ if (explicitModel) {
960
+ return {
961
+ value: explicitModel,
962
+ source: 'explicit',
963
+ display: `${explicitModel} (explicit)`,
964
+ }
965
+ }
966
+
967
+ const detectedModel = runner === 'codex'
968
+ ? await readCodexDefaultModel()
969
+ : runner === 'opencode'
970
+ ? await readOpenCodeDefaultModel()
971
+ : runner === 'claude'
972
+ ? await readClaudeDefaultModel()
973
+ : undefined
974
+
975
+ if (detectedModel) {
976
+ return {
977
+ value: detectedModel,
978
+ source: 'default',
979
+ display: `${detectedModel} (local default)`,
980
+ }
981
+ }
982
+
983
+ return {
984
+ source: 'unknown',
985
+ display: 'local default (CLI-managed)',
986
+ }
987
+ }
988
+
989
+ async function readCodexDefaultModel(): Promise<string | undefined> {
990
+ const codexHome = process.env.CODEX_HOME?.trim() || resolve(homedir(), '.codex')
991
+ return await readTomlStringValue(resolve(codexHome, 'config.toml'), 'model')
992
+ }
993
+
994
+ async function readOpenCodeDefaultModel(): Promise<string | undefined> {
995
+ const configHome = process.env.XDG_CONFIG_HOME?.trim() || resolve(homedir(), '.config')
996
+ const configPath = resolve(configHome, 'opencode', 'opencode.json')
997
+ const parsed = await readJsonFile(configPath)
998
+ if (!parsed || typeof parsed !== 'object') {
999
+ return undefined
1000
+ }
1001
+
1002
+ if (typeof parsed.model === 'string' && parsed.model.trim()) {
1003
+ return parsed.model.trim()
1004
+ }
1005
+
1006
+ if (
1007
+ typeof parsed.default_agent === 'string'
1008
+ && parsed.agent
1009
+ && typeof parsed.agent === 'object'
1010
+ && parsed.default_agent in parsed.agent
1011
+ ) {
1012
+ const defaultAgent = parsed.agent[parsed.default_agent]
1013
+ if (
1014
+ defaultAgent
1015
+ && typeof defaultAgent === 'object'
1016
+ && 'model' in defaultAgent
1017
+ && typeof defaultAgent.model === 'string'
1018
+ && defaultAgent.model.trim()
1019
+ ) {
1020
+ return defaultAgent.model.trim()
1021
+ }
1022
+ }
1023
+
1024
+ return undefined
1025
+ }
1026
+
1027
+ async function readClaudeDefaultModel(): Promise<string | undefined> {
1028
+ for (const candidate of [
1029
+ resolve(homedir(), '.claude', 'settings.json'),
1030
+ resolve(homedir(), '.claude', 'settings.local.json'),
1031
+ resolve(homedir(), '.claude.json'),
1032
+ ]) {
1033
+ const parsed = await readJsonFile(candidate)
1034
+ if (!parsed || typeof parsed !== 'object') continue
1035
+ for (const key of ['model', 'defaultModel', 'default_model']) {
1036
+ if (key in parsed && typeof parsed[key] === 'string' && parsed[key].trim()) {
1037
+ return parsed[key].trim()
1038
+ }
1039
+ }
1040
+ }
1041
+
1042
+ return undefined
1043
+ }
1044
+
1045
+ async function readTomlStringValue(filePath: string, key: string): Promise<string | undefined> {
1046
+ try {
1047
+ const raw = await readFile(filePath, 'utf8')
1048
+ const match = raw.match(new RegExp(`^\\s*${key}\\s*=\\s*"([^"]+)"\\s*$`, 'm'))
1049
+ return match?.[1]?.trim() || undefined
1050
+ } catch {
1051
+ return undefined
1052
+ }
1053
+ }
1054
+
1055
+ async function readJsonFile(filePath: string): Promise<Record<string, any> | undefined> {
1056
+ try {
1057
+ const raw = await readFile(filePath, 'utf8')
1058
+ return JSON.parse(raw) as Record<string, any>
1059
+ } catch {
1060
+ return undefined
1061
+ }
1062
+ }
1063
+
812
1064
  async function ensureRunnerAvailable(runner: AgentRunner): Promise<void> {
813
- const binary = AGENT_RUNNER_BINARIES[runner]
814
- const available = await commandExists(binary)
1065
+ const binary = runner === 'cursor'
1066
+ ? await resolveCursorBinary()
1067
+ : AGENT_RUNNER_BINARIES[runner]
1068
+ const available = binary ? await commandExists(binary) : false
815
1069
  if (!available) {
816
1070
  if (runner === 'cursor') {
817
- throw new Error('The cursor runner requires the Cursor CLI `agent` binary on PATH. Install it with `curl https://cursor.com/install -fsS | bash` or choose a different runner.')
1071
+ throw new Error('The cursor runner requires the Cursor CLI `agent` or `cursor-agent` binary on PATH. Install it with `curl https://cursor.com/install -fsS | bash` or choose a different runner.')
818
1072
  }
819
1073
  throw new Error(`The ${runner} runner is not available on PATH. Install \`${binary}\` or choose a different runner.`)
820
1074
  }
@@ -827,12 +1081,23 @@ async function ensureRunnerAuthenticated(runner: AgentRunner): Promise<void> {
827
1081
  return
828
1082
  }
829
1083
 
830
- const isAuthenticated = await commandSucceeds(['agent', 'status'])
1084
+ const binary = await resolveCursorBinary()
1085
+ const isAuthenticated = binary ? await commandSucceeds([binary, 'status']) : false
831
1086
  if (!isAuthenticated) {
832
- throw new Error('Cursor CLI authentication is required. Run `agent login` (browser auth) or export `CURSOR_API_KEY` before running Pluxx with `--runner cursor`.')
1087
+ throw new Error('Cursor CLI authentication is required. Run `agent login` (or `cursor-agent login`) or export `CURSOR_API_KEY` before running Pluxx with `--runner cursor`.')
833
1088
  }
834
1089
  }
835
1090
 
1091
+ async function resolveCursorBinary(): Promise<string | undefined> {
1092
+ for (const candidate of CURSOR_RUNNER_BINARIES) {
1093
+ if (await commandExists(candidate)) {
1094
+ return candidate
1095
+ }
1096
+ }
1097
+
1098
+ return undefined
1099
+ }
1100
+
836
1101
  async function commandExists(binary: string): Promise<boolean> {
837
1102
  return await new Promise<boolean>((resolvePromise) => {
838
1103
  const child = spawn('sh', ['-c', `command -v ${shellQuote(binary)} >/dev/null 2>&1`], {
@@ -860,25 +1125,185 @@ async function executeCommand(
860
1125
  cwd: string,
861
1126
  options: {
862
1127
  streamOutput?: boolean
1128
+ env?: NodeJS.ProcessEnv
863
1129
  } = {},
864
1130
  ): Promise<number> {
1131
+ const runtimeCommand = [...command]
1132
+ let codexOutputDir: string | null = null
1133
+ let codexLastMessagePath: string | null = null
1134
+ const isClaudeStreamJson = runtimeCommand[0] === 'claude'
1135
+ && runtimeCommand.includes('--output-format')
1136
+ && runtimeCommand.includes('stream-json')
1137
+
1138
+ if (runtimeCommand[0] === 'codex' && runtimeCommand[1] === 'exec') {
1139
+ codexOutputDir = await mkdtemp(resolve(tmpdir(), 'pluxx-codex-output-'))
1140
+ codexLastMessagePath = resolve(codexOutputDir, 'last-message.txt')
1141
+ runtimeCommand.splice(2, 0, '--json', '--output-last-message', codexLastMessagePath)
1142
+ }
1143
+
865
1144
  return await new Promise<number>((resolvePromise, reject) => {
866
- const child = spawn(command[0], command.slice(1), {
1145
+ const child = spawn(runtimeCommand[0], runtimeCommand.slice(1), {
867
1146
  cwd,
868
1147
  stdio: ['ignore', 'pipe', 'pipe'],
869
- env: process.env,
1148
+ env: options.env ?? process.env,
870
1149
  })
871
-
872
- if (options.streamOutput) {
873
- child.stdout?.on('data', (chunk) => process.stdout.write(chunk))
874
- child.stderr?.on('data', (chunk) => process.stderr.write(chunk))
1150
+ let killedAfterFinalMessage = false
1151
+ let sawFinalMessageAt: number | null = null
1152
+ let codexStdoutBuffer = ''
1153
+ let codexTurnCompleted = false
1154
+ let codexTurnFailed = false
1155
+ let claudeStdoutBuffer = ''
1156
+ let claudeTurnCompleted = false
1157
+ let claudeTurnFailed = false
1158
+ const sentinelInterval = (codexLastMessagePath || isClaudeStreamJson)
1159
+ ? setInterval(() => {
1160
+ const sawCompletionSignal = codexTurnCompleted
1161
+ || codexTurnFailed
1162
+ || claudeTurnCompleted
1163
+ || claudeTurnFailed
1164
+ || (codexLastMessagePath ? existsSync(codexLastMessagePath) : false)
1165
+ if (!sawCompletionSignal) return
1166
+ if (sawFinalMessageAt == null) {
1167
+ sawFinalMessageAt = Date.now()
1168
+ return
1169
+ }
1170
+ if (!killedAfterFinalMessage && Date.now() - sawFinalMessageAt >= 1500) {
1171
+ killedAfterFinalMessage = true
1172
+ child.kill('SIGTERM')
1173
+ }
1174
+ }, 250)
1175
+ : null
1176
+
1177
+ const finalize = async (result: number, error?: Error): Promise<void> => {
1178
+ if (sentinelInterval) clearInterval(sentinelInterval)
1179
+ if (codexOutputDir) {
1180
+ await rm(codexOutputDir, { recursive: true, force: true })
1181
+ }
1182
+ if (error) {
1183
+ reject(error)
1184
+ return
1185
+ }
1186
+ resolvePromise(result)
875
1187
  }
876
1188
 
877
- child.on('error', (error) => reject(error))
878
- child.on('close', (code) => resolvePromise(code ?? 1))
1189
+ child.stdout?.on('data', (chunk) => {
1190
+ const text = chunk.toString()
1191
+ if (codexLastMessagePath || isClaudeStreamJson) {
1192
+ const buffer = codexLastMessagePath ? codexStdoutBuffer + text : claudeStdoutBuffer + text
1193
+ const lines = buffer.split('\n')
1194
+ const remainder = lines.pop() ?? ''
1195
+ if (codexLastMessagePath) {
1196
+ codexStdoutBuffer = remainder
1197
+ } else {
1198
+ claudeStdoutBuffer = remainder
1199
+ }
1200
+ for (const line of lines) {
1201
+ const trimmed = line.trim()
1202
+ if (!trimmed) continue
1203
+ try {
1204
+ const event = JSON.parse(trimmed) as { type?: string; subtype?: string; is_error?: boolean }
1205
+ if (codexLastMessagePath) {
1206
+ if (event.type === 'turn.completed') {
1207
+ codexTurnCompleted = true
1208
+ } else if (event.type === 'turn.failed' || event.type === 'error') {
1209
+ codexTurnFailed = true
1210
+ }
1211
+ } else if (isClaudeStreamJson) {
1212
+ if (event.type === 'result') {
1213
+ if (event.is_error || event.subtype === 'error') {
1214
+ claudeTurnFailed = true
1215
+ } else {
1216
+ claudeTurnCompleted = true
1217
+ }
1218
+ }
1219
+ }
1220
+ } catch {
1221
+ // Ignore non-JSON lines. Codex still writes some human-readable output to stderr.
1222
+ }
1223
+ }
1224
+ }
1225
+ if (options.streamOutput) process.stdout.write(chunk)
1226
+ })
1227
+ child.stderr?.on('data', (chunk) => {
1228
+ if (options.streamOutput) process.stderr.write(chunk)
1229
+ })
1230
+
1231
+ child.on('error', (error) => {
1232
+ void finalize(1, error)
1233
+ })
1234
+ child.on('close', (code) => {
1235
+ const result = codexTurnFailed || claudeTurnFailed
1236
+ ? 1
1237
+ : (killedAfterFinalMessage || codexTurnCompleted || claudeTurnCompleted ? 0 : (code ?? 1))
1238
+ void finalize(result)
1239
+ })
879
1240
  })
880
1241
  }
881
1242
 
1243
+ async function prepareRunnerExecution(runner: AgentRunner): Promise<{
1244
+ env: NodeJS.ProcessEnv
1245
+ cleanup?: () => Promise<void>
1246
+ }> {
1247
+ if (runner === 'cursor') {
1248
+ const cursorBinary = await resolveCursorBinary()
1249
+ if (!cursorBinary || cursorBinary === AGENT_RUNNER_BINARIES.cursor) {
1250
+ return { env: process.env }
1251
+ }
1252
+
1253
+ const shimDir = await mkdtemp(resolve(tmpdir(), 'pluxx-cursor-bin-'))
1254
+ const shimPath = resolve(shimDir, AGENT_RUNNER_BINARIES.cursor)
1255
+ await Bun.write(
1256
+ shimPath,
1257
+ `#!/bin/sh\nexec ${shellQuote(cursorBinary)} "$@"\n`,
1258
+ )
1259
+ await chmod(shimPath, 0o755)
1260
+
1261
+ return {
1262
+ env: {
1263
+ ...process.env,
1264
+ PATH: `${shimDir}:${process.env.PATH ?? ''}`,
1265
+ },
1266
+ cleanup: async () => {
1267
+ await rm(shimDir, { recursive: true, force: true })
1268
+ },
1269
+ }
1270
+ }
1271
+
1272
+ if (runner !== 'codex') {
1273
+ return { env: process.env }
1274
+ }
1275
+
1276
+ const currentCodexHome = process.env.CODEX_HOME?.trim() || resolve(homedir(), '.codex')
1277
+ const isolatedCodexHome = await mkdtemp(resolve(tmpdir(), 'pluxx-codex-home-'))
1278
+ await mkdir(resolve(isolatedCodexHome, 'memories'), { recursive: true })
1279
+
1280
+ for (const relativePath of ['auth.json', 'config.toml', 'hooks.json', 'installation_id']) {
1281
+ const sourcePath = resolve(currentCodexHome, relativePath)
1282
+ if (!existsSync(sourcePath)) continue
1283
+ await copyFile(sourcePath, resolve(isolatedCodexHome, relativePath))
1284
+ }
1285
+
1286
+ const rulesSourceDir = resolve(currentCodexHome, 'rules')
1287
+ if (existsSync(rulesSourceDir)) {
1288
+ const rulesTargetDir = resolve(isolatedCodexHome, 'rules')
1289
+ await mkdir(rulesTargetDir, { recursive: true })
1290
+ const defaultRulesPath = resolve(rulesSourceDir, 'default.rules')
1291
+ if (existsSync(defaultRulesPath)) {
1292
+ await copyFile(defaultRulesPath, resolve(rulesTargetDir, 'default.rules'))
1293
+ }
1294
+ }
1295
+
1296
+ return {
1297
+ env: {
1298
+ ...process.env,
1299
+ CODEX_HOME: isolatedCodexHome,
1300
+ },
1301
+ cleanup: async () => {
1302
+ await rm(isolatedCodexHome, { recursive: true, force: true })
1303
+ },
1304
+ }
1305
+ }
1306
+
882
1307
  function shellQuote(value: string): string {
883
1308
  if (/^[A-Za-z0-9_/:=.,-]+$/.test(value)) {
884
1309
  return value