@roleplay-sh/cli 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1024,11 +1024,11 @@ var init_init = __esm({
1024
1024
  envExample = `# Optional agent credentials used by your own HTTP/CLI target.
1025
1025
  AGENT_API_KEY=
1026
1026
 
1027
- # cloud workbench upload settings.
1028
- ROLEPLAY_CLOUD_URL=http://127.0.0.1:3000
1029
- ROLEPLAY_PROJECT_ID=proj_support
1027
+ # Workbench project settings. Create these after starting a Builder or Team trial.
1028
+ ROLEPLAY_CLOUD_URL=https://app.roleplay.sh
1029
+ ROLEPLAY_PROJECT_ID=
1030
1030
  ROLEPLAY_API_KEY=
1031
- ROLEPLAY_AGENT_NAME=support-agent-staging
1031
+ ROLEPLAY_AGENT_NAME=
1032
1032
 
1033
1033
  # Built-in social-engineering-core target. Set exactly one for CI.
1034
1034
  ROLEPLAY_TARGET_URL=http://localhost:3000/agent
@@ -1074,8 +1074,10 @@ ROLEPLAY_LLM_BASE_URL=
1074
1074
  this.log(`${chalk2.cyan("roleplay.sh")} initialized.`);
1075
1075
  this.log(chalk2.gray("Created .roleplay/config.json, scenarios, and runs directory."));
1076
1076
  this.log("\nNext steps:");
1077
- this.log(" roleplay run .roleplay/scenarios/refund-policy-edge-case.yml");
1078
- this.log(" roleplay report latest");
1077
+ this.log(" Start a 7-day Builder or Team trial: https://app.roleplay.sh/auth/create-workspace");
1078
+ this.log(" Add ROLEPLAY_PROJECT_ID, ROLEPLAY_API_KEY, and your LLM provider key to .env");
1079
+ this.log(" Smoke test install: roleplay run social-engineering-core --target mock --provider mock");
1080
+ this.log(" Real test: roleplay run social-engineering-core --target <agent-url> --provider openai");
1079
1081
  }
1080
1082
  };
1081
1083
  }
@@ -2434,240 +2436,6 @@ var init_engine = __esm({
2434
2436
  }
2435
2437
  });
2436
2438
 
2437
- // src/commands/run.ts
2438
- var run_exports = {};
2439
- __export(run_exports, {
2440
- RunCommand: () => RunCommand
2441
- });
2442
- import { Args as Args2, Flags as Flags3 } from "@oclif/core";
2443
- import { promises as fs6 } from "fs";
2444
- import { tmpdir } from "os";
2445
- import { join as join4 } from "path";
2446
- function resolveProviderFlags(flags, fallback) {
2447
- const sharedProvider = providerFrom(flags.provider ?? process.env.ROLEPLAY_LLM_PROVIDER, fallback);
2448
- const attackerProvider = providerFrom(flags["attacker-provider"] ?? process.env.ROLEPLAY_ATTACKER_PROVIDER, sharedProvider);
2449
- const judgeProvider = providerFrom(flags["judge-provider"] ?? process.env.ROLEPLAY_JUDGE_PROVIDER, sharedProvider);
2450
- return {
2451
- attackerProvider,
2452
- judgeProvider,
2453
- attackerModel: flags["attacker-model"] ?? process.env.ROLEPLAY_ATTACKER_MODEL ?? flags.model ?? process.env.ROLEPLAY_LLM_MODEL,
2454
- judgeModel: flags["judge-model"] ?? process.env.ROLEPLAY_JUDGE_MODEL ?? flags.model ?? process.env.ROLEPLAY_LLM_MODEL,
2455
- llmBaseUrl: flags["llm-base-url"] ?? process.env.ROLEPLAY_LLM_BASE_URL
2456
- };
2457
- }
2458
- function providerFrom(value, fallback) {
2459
- if (!value && !fallback) return void 0;
2460
- return normalizeProvider(value, fallback ?? "mock");
2461
- }
2462
- function resultNameFromPath(path) {
2463
- return path.replace(/^.*[\\/]/, "").replace(/\.ya?ml$/i, "");
2464
- }
2465
- function cloudAttackPackIdForScenario(scenarioName) {
2466
- if (scenarioName.includes("authority-impersonation")) return "pack_authority";
2467
- if (scenarioName.includes("urgency-pressure")) return "pack_urgency";
2468
- if (scenarioName.includes("policy-bypass")) return "pack_policy";
2469
- if (scenarioName.includes("indirect-prompt-injection")) return "pack_injection";
2470
- if (scenarioName.includes("data-exfiltration")) return "pack_exfiltration";
2471
- if (scenarioName.includes("tool-misuse")) return "pack_tools";
2472
- if (scenarioName.includes("auth-session-confusion")) return "pack_auth_session";
2473
- if (scenarioName.includes("memory-context-poisoning")) return "pack_memory_context";
2474
- return void 0;
2475
- }
2476
- var socialEngineeringCorePack, RunCommand;
2477
- var init_run = __esm({
2478
- "src/commands/run.ts"() {
2479
- "use strict";
2480
- init_engine();
2481
- init_scoring();
2482
- init_reporter();
2483
- init_output();
2484
- init_fs();
2485
- init_scenarios();
2486
- init_errors();
2487
- init_base();
2488
- init_client();
2489
- socialEngineeringCorePack = "social-engineering-core";
2490
- RunCommand = class _RunCommand extends BaseCommand {
2491
- static description = "Run a roleplay scenario or built-in attack pack.";
2492
- static args = {
2493
- scenario: Args2.string({ required: true })
2494
- };
2495
- static flags = {
2496
- target: Flags3.string({
2497
- description: 'HTTP target URL, or "mock" for local smoke tests. Defaults to ROLEPLAY_TARGET_URL.',
2498
- default: process.env.ROLEPLAY_TARGET_URL
2499
- }),
2500
- "target-command": Flags3.string({
2501
- description: "CLI target command for built-in attack packs. Defaults to ROLEPLAY_TARGET_COMMAND.",
2502
- default: process.env.ROLEPLAY_TARGET_COMMAND
2503
- }),
2504
- "max-turns": Flags3.integer(),
2505
- json: Flags3.boolean({ description: "Output JSON only." }),
2506
- out: Flags3.string({ default: ".roleplay/runs" }),
2507
- "fail-on": Flags3.string({ options: ["warning", "failed", "critical"], default: "failed" }),
2508
- provider: Flags3.string({
2509
- options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
2510
- description: "Shared attacker and judge provider. Defaults to ROLEPLAY_LLM_PROVIDER, openai for real attack-pack targets, or mock for smoke tests.",
2511
- default: process.env.ROLEPLAY_LLM_PROVIDER
2512
- }),
2513
- "attacker-provider": Flags3.string({
2514
- options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
2515
- description: "Provider for adaptive attacker turns. Defaults to ROLEPLAY_ATTACKER_PROVIDER or --provider.",
2516
- default: process.env.ROLEPLAY_ATTACKER_PROVIDER
2517
- }),
2518
- "judge-provider": Flags3.string({
2519
- options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
2520
- description: "Provider for transcript judging. Defaults to ROLEPLAY_JUDGE_PROVIDER or --provider.",
2521
- default: process.env.ROLEPLAY_JUDGE_PROVIDER
2522
- }),
2523
- model: Flags3.string({
2524
- description: "Shared LLM model. Defaults to ROLEPLAY_LLM_MODEL or provider defaults.",
2525
- default: process.env.ROLEPLAY_LLM_MODEL
2526
- }),
2527
- "attacker-model": Flags3.string({
2528
- description: "Model for adaptive attacker turns. Defaults to ROLEPLAY_ATTACKER_MODEL or --model.",
2529
- default: process.env.ROLEPLAY_ATTACKER_MODEL
2530
- }),
2531
- "judge-model": Flags3.string({
2532
- description: "Model for transcript judging. Defaults to ROLEPLAY_JUDGE_MODEL, scenario judge.model, or --model.",
2533
- default: process.env.ROLEPLAY_JUDGE_MODEL
2534
- }),
2535
- "llm-base-url": Flags3.string({
2536
- description: "Base URL for openai-compatible providers. Defaults to ROLEPLAY_LLM_BASE_URL.",
2537
- default: process.env.ROLEPLAY_LLM_BASE_URL
2538
- }),
2539
- yes: Flags3.boolean({ char: "y", description: "Allow local CLI target command execution." })
2540
- };
2541
- async run() {
2542
- const { args, flags } = await this.parse(_RunCommand);
2543
- if (args.scenario === socialEngineeringCorePack) {
2544
- await this.runSocialEngineeringCore(flags);
2545
- return;
2546
- }
2547
- if (flags.target || flags["target-command"]) {
2548
- throw new AppError({
2549
- code: "ATTACK_PACK_TARGET_UNSUPPORTED",
2550
- message: "--target and --target-command are only supported when running social-engineering-core.",
2551
- suggestion: "Use roleplay run social-engineering-core --target <url>, or pass a scenario path without target flags.",
2552
- exitCode: 2
2553
- });
2554
- }
2555
- const spinner = createSpinner("Running scenario", flags.json);
2556
- const providers = resolveProviderFlags(flags);
2557
- let result;
2558
- try {
2559
- result = await runScenario({
2560
- scenarioRef: args.scenario,
2561
- maxTurns: flags["max-turns"],
2562
- outDir: flags.out,
2563
- yes: flags.yes,
2564
- ...providers
2565
- });
2566
- spinner?.succeed("Scenario complete");
2567
- } catch (error) {
2568
- spinner?.fail("Scenario failed");
2569
- throw error;
2570
- }
2571
- if (flags.json) {
2572
- this.log(
2573
- JSON.stringify({
2574
- runId: result.runId,
2575
- scenario: result.scenario.name,
2576
- status: result.report.status,
2577
- score: result.report.score,
2578
- reportPath: result.paths.reportJsonPath,
2579
- markdownPath: result.paths.reportMarkdownPath
2580
- })
2581
- );
2582
- } else {
2583
- this.log(
2584
- terminalSummary({
2585
- report: result.report,
2586
- reportPath: result.paths.reportJsonPath,
2587
- markdownPath: result.paths.reportMarkdownPath
2588
- })
2589
- );
2590
- }
2591
- if (shouldFail(result.report.status, result.report.failures, flags["fail-on"])) {
2592
- process.exitCode = 1;
2593
- }
2594
- }
2595
- async runSocialEngineeringCore(flags) {
2596
- if (Boolean(flags.target) === Boolean(flags["target-command"])) {
2597
- throw new AppError({
2598
- code: "ATTACK_PACK_TARGET_REQUIRED",
2599
- message: "Provide exactly one target for social-engineering-core.",
2600
- suggestion: 'Use --target http://localhost:3000/agent, --target-command "node ./agent.js", ROLEPLAY_TARGET_URL, or ROLEPLAY_TARGET_COMMAND.',
2601
- exitCode: 2
2602
- });
2603
- }
2604
- const target = flags.target === "mock" ? { type: "mock" } : flags.target ? { type: "http", url: flags.target } : { type: "cli", command: flags["target-command"] };
2605
- const scenarioDir = await fs6.mkdtemp(join4(tmpdir(), "roleplay-social-engineering-core-"));
2606
- await ensureDir(scenarioDir);
2607
- const spinner = createSpinner("Running social-engineering-core", flags.json);
2608
- const providers = resolveProviderFlags(flags, target.type === "mock" ? "mock" : "openai");
2609
- try {
2610
- const files = [];
2611
- for (const content of attackPackTemplates(target)) {
2612
- const name = content.match(/^name:\s*(.+)$/m)?.[1] ?? `social-engineering-${files.length + 1}`;
2613
- const path = join4(scenarioDir, `${name}.yml`);
2614
- await fs6.writeFile(path, content, "utf8");
2615
- files.push(path);
2616
- }
2617
- const results = [];
2618
- for (const file of files) {
2619
- const result = await runScenario({
2620
- scenarioRef: file,
2621
- maxTurns: flags["max-turns"],
2622
- outDir: flags.out,
2623
- yes: flags.yes,
2624
- ...providers,
2625
- metadata: {
2626
- attackPackId: cloudAttackPackIdForScenario(resultNameFromPath(file)),
2627
- attackPackScenario: resultNameFromPath(file)
2628
- }
2629
- });
2630
- results.push({
2631
- runId: result.runId,
2632
- scenario: result.scenario.name,
2633
- status: result.report.status,
2634
- score: result.report.score,
2635
- failures: result.report.failures,
2636
- reportPath: result.paths.reportJsonPath,
2637
- markdownPath: result.paths.reportMarkdownPath
2638
- });
2639
- }
2640
- spinner?.succeed("Attack pack complete");
2641
- const failed = results.filter(
2642
- (result) => shouldFail(result.status, result.failures, flags["fail-on"])
2643
- );
2644
- if (flags.json) {
2645
- this.log(
2646
- JSON.stringify({
2647
- pack: socialEngineeringCorePack,
2648
- target: target.type,
2649
- total: results.length,
2650
- failed: failed.length,
2651
- results
2652
- })
2653
- );
2654
- } else {
2655
- this.log(
2656
- results.map((result) => `${result.status.toUpperCase()} ${result.score}/100 ${result.scenario} ${result.runId}`).join("\n")
2657
- );
2658
- }
2659
- if (failed.length) process.exitCode = 1;
2660
- } catch (error) {
2661
- spinner?.fail("Attack pack failed");
2662
- throw error;
2663
- } finally {
2664
- await fs6.rm(scenarioDir, { recursive: true, force: true });
2665
- }
2666
- }
2667
- };
2668
- }
2669
- });
2670
-
2671
2439
  // src/schemas/report.schema.ts
2672
2440
  import { z as z4 } from "zod";
2673
2441
  var requiredString, criterionResultSchema, failureSchema2, reportSchema;
@@ -2910,8 +2678,8 @@ var init_cloud_upload_schema = __esm({
2910
2678
  });
2911
2679
 
2912
2680
  // src/cloud/upload-client.ts
2913
- import { promises as fs7 } from "fs";
2914
- import { join as join5 } from "path";
2681
+ import { promises as fs6 } from "fs";
2682
+ import { join as join4 } from "path";
2915
2683
  function requireUploadApiKey(apiKey) {
2916
2684
  const normalized = apiKey?.trim();
2917
2685
  if (normalized) return normalized;
@@ -2932,12 +2700,42 @@ function requireUploadProjectId(projectId) {
2932
2700
  exitCode: 1
2933
2701
  });
2934
2702
  }
2703
+ function requireRunApiKey(apiKey) {
2704
+ const normalized = apiKey?.trim();
2705
+ if (normalized) return normalized;
2706
+ throw new AppError({
2707
+ code: "WORKBENCH_API_KEY_REQUIRED",
2708
+ message: "A Builder or Team trial is required to run real agent tests.",
2709
+ suggestion: "Start a 7-day trial at https://app.roleplay.sh/auth/create-workspace, then set ROLEPLAY_PROJECT_ID and ROLEPLAY_API_KEY.",
2710
+ exitCode: 1
2711
+ });
2712
+ }
2713
+ function requireRunProjectId(projectId) {
2714
+ const normalized = projectId?.trim();
2715
+ if (normalized) return normalized;
2716
+ throw new AppError({
2717
+ code: "WORKBENCH_PROJECT_REQUIRED",
2718
+ message: "A Builder or Team trial is required to run real agent tests.",
2719
+ suggestion: "Start a 7-day trial at https://app.roleplay.sh/auth/create-workspace, then set ROLEPLAY_PROJECT_ID and ROLEPLAY_API_KEY.",
2720
+ exitCode: 1
2721
+ });
2722
+ }
2723
+ async function assertRunEntitlement(input) {
2724
+ const verification = await verifyCloudCredentials(input);
2725
+ if (verification.entitlement.canRun) return verification;
2726
+ throw inactiveSubscriptionError();
2727
+ }
2728
+ async function assertUploadEntitlement(input) {
2729
+ const verification = await verifyCloudCredentials(input);
2730
+ if (verification.entitlement.canUpload) return verification;
2731
+ throw inactiveSubscriptionError();
2732
+ }
2935
2733
  async function buildUploadPayload(input) {
2936
2734
  const runDir = await resolveRunDir(input.run, input.runsDir);
2937
- const reportPath = join5(runDir, "report.json");
2938
- const transcriptPath = join5(runDir, "transcript.json");
2939
- const scenarioPath = join5(runDir, "scenario.yml");
2940
- const metadataPath = join5(runDir, "metadata.json");
2735
+ const reportPath = join4(runDir, "report.json");
2736
+ const transcriptPath = join4(runDir, "transcript.json");
2737
+ const scenarioPath = join4(runDir, "scenario.yml");
2738
+ const metadataPath = join4(runDir, "metadata.json");
2941
2739
  const includeFullEvidence = input.mode === "full_transcript_opt_in";
2942
2740
  const reportArtifact = await readJsonArtifact(reportPath);
2943
2741
  const report = reportSchema.parse(reportArtifact);
@@ -3070,7 +2868,7 @@ function parseCredentialVerification(body) {
3070
2868
  const candidate = body;
3071
2869
  const key = candidate?.key;
3072
2870
  const policy = candidate?.uploadPolicy;
3073
- if (candidate && typeof candidate === "object" && typeof candidate.projectId === "string" && candidate.authenticated === true && key && typeof key === "object" && typeof key.id === "string" && typeof key.name === "string" && typeof key.preview === "string" && typeof key.createdAt === "string" && policy && typeof policy === "object" && (policy.mode === "sanitized_findings" || policy.mode === "full_transcript_opt_in") && typeof policy.transcriptUpload === "boolean" && typeof policy.redactedSnippets === "boolean" && typeof policy.secretRedaction === "boolean" && Number.isInteger(policy.retentionDays) && policy.retentionDays > 0) {
2871
+ if (candidate && typeof candidate === "object" && typeof candidate.projectId === "string" && candidate.authenticated === true && key && typeof key === "object" && typeof key.id === "string" && typeof key.name === "string" && typeof key.preview === "string" && typeof key.createdAt === "string" && policy && typeof policy === "object" && candidate.entitlement && typeof candidate.entitlement === "object" && (candidate.entitlement.plan === "builder" || candidate.entitlement.plan === "team") && ["trialing", "active", "past_due", "canceled"].includes(String(candidate.entitlement.status)) && typeof candidate.entitlement.canRun === "boolean" && typeof candidate.entitlement.canUpload === "boolean" && (policy.mode === "sanitized_findings" || policy.mode === "full_transcript_opt_in") && typeof policy.transcriptUpload === "boolean" && typeof policy.redactedSnippets === "boolean" && typeof policy.secretRedaction === "boolean" && Number.isInteger(policy.retentionDays) && policy.retentionDays > 0) {
3074
2872
  return candidate;
3075
2873
  }
3076
2874
  throw new AppError({
@@ -3080,6 +2878,14 @@ function parseCredentialVerification(body) {
3080
2878
  exitCode: 1
3081
2879
  });
3082
2880
  }
2881
+ function inactiveSubscriptionError() {
2882
+ return new AppError({
2883
+ code: "WORKBENCH_SUBSCRIPTION_INACTIVE",
2884
+ message: "Your workspace subscription is not active.",
2885
+ suggestion: "Open billing to start or resume Builder/Team access: https://app.roleplay.sh/billing",
2886
+ exitCode: 1
2887
+ });
2888
+ }
3083
2889
  function assertUploadResponseMatchesPayload(response, payload) {
3084
2890
  if (response.projectId === payload.projectId && response.runId === payload.run.report.runId && response.mode === payload.mode) {
3085
2891
  return;
@@ -3112,14 +2918,14 @@ function isRelativeCloudPath(value) {
3112
2918
  return value.startsWith("/") && !value.startsWith("//");
3113
2919
  }
3114
2920
  async function readJsonArtifact(path) {
3115
- const contents = await fs7.readFile(path, "utf8");
2921
+ const contents = await fs6.readFile(path, "utf8");
3116
2922
  return JSON.parse(contents.replace(/^\uFEFF/, ""));
3117
2923
  }
3118
2924
  async function readOptionalJsonArtifact(path) {
3119
2925
  return pathExists(path).then((exists) => exists ? readJsonArtifact(path) : void 0);
3120
2926
  }
3121
2927
  async function readOptionalTextArtifact(path) {
3122
- return pathExists(path).then((exists) => exists ? fs7.readFile(path, "utf8") : void 0);
2928
+ return pathExists(path).then((exists) => exists ? fs6.readFile(path, "utf8") : void 0);
3123
2929
  }
3124
2930
  async function readRequiredTranscriptArtifact(path) {
3125
2931
  if (await pathExists(path)) return readJsonArtifact(path);
@@ -3143,10 +2949,280 @@ var init_upload_client = __esm({
3143
2949
  }
3144
2950
  });
3145
2951
 
3146
- // src/commands/upload.ts
3147
- var upload_exports = {};
3148
- __export(upload_exports, {
3149
- UploadCommand: () => UploadCommand
2952
+ // src/commands/run.ts
2953
+ var run_exports = {};
2954
+ __export(run_exports, {
2955
+ RunCommand: () => RunCommand
2956
+ });
2957
+ import { Args as Args2, Flags as Flags3 } from "@oclif/core";
2958
+ import { promises as fs7 } from "fs";
2959
+ import { tmpdir } from "os";
2960
+ import { join as join5 } from "path";
2961
+ function resolveProviderFlags(flags, fallback) {
2962
+ const sharedProvider = providerFrom(flags.provider ?? process.env.ROLEPLAY_LLM_PROVIDER, fallback);
2963
+ const attackerProvider = providerFrom(flags["attacker-provider"] ?? process.env.ROLEPLAY_ATTACKER_PROVIDER, sharedProvider);
2964
+ const judgeProvider = providerFrom(flags["judge-provider"] ?? process.env.ROLEPLAY_JUDGE_PROVIDER, sharedProvider);
2965
+ return {
2966
+ attackerProvider,
2967
+ judgeProvider,
2968
+ attackerModel: flags["attacker-model"] ?? process.env.ROLEPLAY_ATTACKER_MODEL ?? flags.model ?? process.env.ROLEPLAY_LLM_MODEL,
2969
+ judgeModel: flags["judge-model"] ?? process.env.ROLEPLAY_JUDGE_MODEL ?? flags.model ?? process.env.ROLEPLAY_LLM_MODEL,
2970
+ llmBaseUrl: flags["llm-base-url"] ?? process.env.ROLEPLAY_LLM_BASE_URL
2971
+ };
2972
+ }
2973
+ function providerFrom(value, fallback) {
2974
+ if (!value && !fallback) return void 0;
2975
+ return normalizeProvider(value, fallback ?? "mock");
2976
+ }
2977
+ function scenarioRequiresRunEntitlement(scenario, providers) {
2978
+ return scenario.target.type !== "mock" || scenario.attacker?.provider !== void 0 && scenario.attacker.provider !== "mock" || scenario.judge.type !== "mock" || providersContainRealProvider(providers);
2979
+ }
2980
+ function providersContainRealProvider(providers) {
2981
+ return [providers.attackerProvider, providers.judgeProvider].some((provider) => provider !== void 0 && provider !== "mock");
2982
+ }
2983
+ function resultNameFromPath(path) {
2984
+ return path.replace(/^.*[\\/]/, "").replace(/\.ya?ml$/i, "");
2985
+ }
2986
+ function cloudAttackPackIdForScenario(scenarioName) {
2987
+ if (scenarioName.includes("authority-impersonation")) return "pack_authority";
2988
+ if (scenarioName.includes("urgency-pressure")) return "pack_urgency";
2989
+ if (scenarioName.includes("policy-bypass")) return "pack_policy";
2990
+ if (scenarioName.includes("indirect-prompt-injection")) return "pack_injection";
2991
+ if (scenarioName.includes("data-exfiltration")) return "pack_exfiltration";
2992
+ if (scenarioName.includes("tool-misuse")) return "pack_tools";
2993
+ if (scenarioName.includes("auth-session-confusion")) return "pack_auth_session";
2994
+ if (scenarioName.includes("memory-context-poisoning")) return "pack_memory_context";
2995
+ return void 0;
2996
+ }
2997
+ var socialEngineeringCorePack, RunCommand;
2998
+ var init_run = __esm({
2999
+ "src/commands/run.ts"() {
3000
+ "use strict";
3001
+ init_engine();
3002
+ init_run_store();
3003
+ init_scenario_schema();
3004
+ init_scoring();
3005
+ init_reporter();
3006
+ init_output();
3007
+ init_fs();
3008
+ init_scenarios();
3009
+ init_errors();
3010
+ init_base();
3011
+ init_client();
3012
+ init_upload_client();
3013
+ socialEngineeringCorePack = "social-engineering-core";
3014
+ RunCommand = class _RunCommand extends BaseCommand {
3015
+ static description = "Run a roleplay scenario or built-in attack pack.";
3016
+ static args = {
3017
+ scenario: Args2.string({ required: true })
3018
+ };
3019
+ static flags = {
3020
+ target: Flags3.string({
3021
+ description: 'HTTP target URL, or "mock" for local smoke tests. Defaults to ROLEPLAY_TARGET_URL.',
3022
+ default: process.env.ROLEPLAY_TARGET_URL
3023
+ }),
3024
+ "target-command": Flags3.string({
3025
+ description: "CLI target command for built-in attack packs. Defaults to ROLEPLAY_TARGET_COMMAND.",
3026
+ default: process.env.ROLEPLAY_TARGET_COMMAND
3027
+ }),
3028
+ "max-turns": Flags3.integer(),
3029
+ json: Flags3.boolean({ description: "Output JSON only." }),
3030
+ out: Flags3.string({ default: ".roleplay/runs" }),
3031
+ "fail-on": Flags3.string({ options: ["warning", "failed", "critical"], default: "failed" }),
3032
+ provider: Flags3.string({
3033
+ options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
3034
+ description: "Shared attacker and judge provider. Defaults to ROLEPLAY_LLM_PROVIDER, openai for real attack-pack targets, or mock for smoke tests.",
3035
+ default: process.env.ROLEPLAY_LLM_PROVIDER
3036
+ }),
3037
+ "attacker-provider": Flags3.string({
3038
+ options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
3039
+ description: "Provider for adaptive attacker turns. Defaults to ROLEPLAY_ATTACKER_PROVIDER or --provider.",
3040
+ default: process.env.ROLEPLAY_ATTACKER_PROVIDER
3041
+ }),
3042
+ "judge-provider": Flags3.string({
3043
+ options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
3044
+ description: "Provider for transcript judging. Defaults to ROLEPLAY_JUDGE_PROVIDER or --provider.",
3045
+ default: process.env.ROLEPLAY_JUDGE_PROVIDER
3046
+ }),
3047
+ model: Flags3.string({
3048
+ description: "Shared LLM model. Defaults to ROLEPLAY_LLM_MODEL or provider defaults.",
3049
+ default: process.env.ROLEPLAY_LLM_MODEL
3050
+ }),
3051
+ "attacker-model": Flags3.string({
3052
+ description: "Model for adaptive attacker turns. Defaults to ROLEPLAY_ATTACKER_MODEL or --model.",
3053
+ default: process.env.ROLEPLAY_ATTACKER_MODEL
3054
+ }),
3055
+ "judge-model": Flags3.string({
3056
+ description: "Model for transcript judging. Defaults to ROLEPLAY_JUDGE_MODEL, scenario judge.model, or --model.",
3057
+ default: process.env.ROLEPLAY_JUDGE_MODEL
3058
+ }),
3059
+ "llm-base-url": Flags3.string({
3060
+ description: "Base URL for openai-compatible providers. Defaults to ROLEPLAY_LLM_BASE_URL.",
3061
+ default: process.env.ROLEPLAY_LLM_BASE_URL
3062
+ }),
3063
+ endpoint: Flags3.string({
3064
+ description: "cloud workbench URL for real-run entitlement checks. Defaults to ROLEPLAY_CLOUD_URL.",
3065
+ default: process.env.ROLEPLAY_CLOUD_URL ?? "http://127.0.0.1:3000"
3066
+ }),
3067
+ project: Flags3.string({
3068
+ description: "cloud workbench project ID for real agent tests. Defaults to ROLEPLAY_PROJECT_ID.",
3069
+ default: process.env.ROLEPLAY_PROJECT_ID
3070
+ }),
3071
+ "api-key": Flags3.string({
3072
+ description: "cloud workbench API key for real agent tests. Defaults to ROLEPLAY_API_KEY.",
3073
+ default: process.env.ROLEPLAY_API_KEY
3074
+ }),
3075
+ yes: Flags3.boolean({ char: "y", description: "Allow local CLI target command execution." })
3076
+ };
3077
+ async run() {
3078
+ const { args, flags } = await this.parse(_RunCommand);
3079
+ if (args.scenario === socialEngineeringCorePack) {
3080
+ await this.runSocialEngineeringCore(flags);
3081
+ return;
3082
+ }
3083
+ if (flags.target || flags["target-command"]) {
3084
+ throw new AppError({
3085
+ code: "ATTACK_PACK_TARGET_UNSUPPORTED",
3086
+ message: "--target and --target-command are only supported when running social-engineering-core.",
3087
+ suggestion: "Use roleplay run social-engineering-core --target <url>, or pass a scenario path without target flags.",
3088
+ exitCode: 2
3089
+ });
3090
+ }
3091
+ const providers = resolveProviderFlags(flags);
3092
+ const scenario = await loadScenarioFile(await resolveScenarioPath(args.scenario));
3093
+ if (scenarioRequiresRunEntitlement(scenario, providers)) {
3094
+ await assertRunEntitlement({
3095
+ endpoint: flags.endpoint,
3096
+ projectId: requireRunProjectId(flags.project),
3097
+ apiKey: requireRunApiKey(flags["api-key"])
3098
+ });
3099
+ }
3100
+ const spinner = createSpinner("Running scenario", flags.json);
3101
+ let result;
3102
+ try {
3103
+ result = await runScenario({
3104
+ scenarioRef: args.scenario,
3105
+ maxTurns: flags["max-turns"],
3106
+ outDir: flags.out,
3107
+ yes: flags.yes,
3108
+ ...providers
3109
+ });
3110
+ spinner?.succeed("Scenario complete");
3111
+ } catch (error) {
3112
+ spinner?.fail("Scenario failed");
3113
+ throw error;
3114
+ }
3115
+ if (flags.json) {
3116
+ this.log(
3117
+ JSON.stringify({
3118
+ runId: result.runId,
3119
+ scenario: result.scenario.name,
3120
+ status: result.report.status,
3121
+ score: result.report.score,
3122
+ reportPath: result.paths.reportJsonPath,
3123
+ markdownPath: result.paths.reportMarkdownPath
3124
+ })
3125
+ );
3126
+ } else {
3127
+ this.log(
3128
+ terminalSummary({
3129
+ report: result.report,
3130
+ reportPath: result.paths.reportJsonPath,
3131
+ markdownPath: result.paths.reportMarkdownPath
3132
+ })
3133
+ );
3134
+ }
3135
+ if (shouldFail(result.report.status, result.report.failures, flags["fail-on"])) {
3136
+ process.exitCode = 1;
3137
+ }
3138
+ }
3139
+ async runSocialEngineeringCore(flags) {
3140
+ if (Boolean(flags.target) === Boolean(flags["target-command"])) {
3141
+ throw new AppError({
3142
+ code: "ATTACK_PACK_TARGET_REQUIRED",
3143
+ message: "Provide exactly one target for social-engineering-core.",
3144
+ suggestion: 'Use --target http://localhost:3000/agent, --target-command "node ./agent.js", ROLEPLAY_TARGET_URL, or ROLEPLAY_TARGET_COMMAND.',
3145
+ exitCode: 2
3146
+ });
3147
+ }
3148
+ const target = flags.target === "mock" ? { type: "mock" } : flags.target ? { type: "http", url: flags.target } : { type: "cli", command: flags["target-command"] };
3149
+ const scenarioDir = await fs7.mkdtemp(join5(tmpdir(), "roleplay-social-engineering-core-"));
3150
+ await ensureDir(scenarioDir);
3151
+ const providers = resolveProviderFlags(flags, target.type === "mock" ? "mock" : "openai");
3152
+ if (target.type !== "mock" || providersContainRealProvider(providers)) {
3153
+ await assertRunEntitlement({
3154
+ endpoint: flags.endpoint,
3155
+ projectId: requireRunProjectId(flags.project),
3156
+ apiKey: requireRunApiKey(flags["api-key"])
3157
+ });
3158
+ }
3159
+ const spinner = createSpinner("Running social-engineering-core", flags.json);
3160
+ try {
3161
+ const files = [];
3162
+ for (const content of attackPackTemplates(target)) {
3163
+ const name = content.match(/^name:\s*(.+)$/m)?.[1] ?? `social-engineering-${files.length + 1}`;
3164
+ const path = join5(scenarioDir, `${name}.yml`);
3165
+ await fs7.writeFile(path, content, "utf8");
3166
+ files.push(path);
3167
+ }
3168
+ const results = [];
3169
+ for (const file of files) {
3170
+ const result = await runScenario({
3171
+ scenarioRef: file,
3172
+ maxTurns: flags["max-turns"],
3173
+ outDir: flags.out,
3174
+ yes: flags.yes,
3175
+ ...providers,
3176
+ metadata: {
3177
+ attackPackId: cloudAttackPackIdForScenario(resultNameFromPath(file)),
3178
+ attackPackScenario: resultNameFromPath(file)
3179
+ }
3180
+ });
3181
+ results.push({
3182
+ runId: result.runId,
3183
+ scenario: result.scenario.name,
3184
+ status: result.report.status,
3185
+ score: result.report.score,
3186
+ failures: result.report.failures,
3187
+ reportPath: result.paths.reportJsonPath,
3188
+ markdownPath: result.paths.reportMarkdownPath
3189
+ });
3190
+ }
3191
+ spinner?.succeed("Attack pack complete");
3192
+ const failed = results.filter(
3193
+ (result) => shouldFail(result.status, result.failures, flags["fail-on"])
3194
+ );
3195
+ if (flags.json) {
3196
+ this.log(
3197
+ JSON.stringify({
3198
+ pack: socialEngineeringCorePack,
3199
+ target: target.type,
3200
+ total: results.length,
3201
+ failed: failed.length,
3202
+ results
3203
+ })
3204
+ );
3205
+ } else {
3206
+ this.log(
3207
+ results.map((result) => `${result.status.toUpperCase()} ${result.score}/100 ${result.scenario} ${result.runId}`).join("\n")
3208
+ );
3209
+ }
3210
+ if (failed.length) process.exitCode = 1;
3211
+ } catch (error) {
3212
+ spinner?.fail("Attack pack failed");
3213
+ throw error;
3214
+ } finally {
3215
+ await fs7.rm(scenarioDir, { recursive: true, force: true });
3216
+ }
3217
+ }
3218
+ };
3219
+ }
3220
+ });
3221
+
3222
+ // src/commands/upload.ts
3223
+ var upload_exports = {};
3224
+ __export(upload_exports, {
3225
+ UploadCommand: () => UploadCommand
3150
3226
  });
3151
3227
  import { Args as Args3, Flags as Flags4 } from "@oclif/core";
3152
3228
  import chalk4 from "chalk";
@@ -3180,12 +3256,7 @@ async function selectedUploadRunIds(run, runsDir) {
3180
3256
  }
3181
3257
  async function assertUploadPolicyAllowsMode(input) {
3182
3258
  if (input.mode !== "full_transcript_opt_in") return;
3183
- const verification = await verifyCloudCredentials({
3184
- endpoint: input.endpoint,
3185
- projectId: input.projectId,
3186
- apiKey: input.apiKey
3187
- });
3188
- if (verification.uploadPolicy.mode === "full_transcript_opt_in" && verification.uploadPolicy.transcriptUpload) {
3259
+ if (input.verification.uploadPolicy.mode === "full_transcript_opt_in" && input.verification.uploadPolicy.transcriptUpload) {
3189
3260
  return;
3190
3261
  }
3191
3262
  throw new AppError({
@@ -3261,11 +3332,15 @@ var init_upload = __esm({
3261
3332
  );
3262
3333
  try {
3263
3334
  const runIds = await selectedUploadRunIds(args.run, flags.out);
3264
- await assertUploadPolicyAllowsMode({
3335
+ const verification = await assertUploadEntitlement({
3265
3336
  endpoint: flags.endpoint,
3266
3337
  projectId,
3267
- apiKey,
3268
- mode
3338
+ apiKey
3339
+ });
3340
+ await assertUploadPolicyAllowsMode({
3341
+ projectId,
3342
+ mode,
3343
+ verification
3269
3344
  });
3270
3345
  if (args.run === "all") {
3271
3346
  const uploads = [];
@@ -3532,10 +3607,12 @@ async function checkCloudCredentials(cloudUrl, projectId, apiKey) {
3532
3607
  apiKey: normalizedApiKey
3533
3608
  });
3534
3609
  const policy = verification.uploadPolicy;
3610
+ const entitlement = verification.entitlement;
3611
+ const access2 = entitlement.canRun && entitlement.canUpload;
3535
3612
  return {
3536
3613
  name: "cloud workbench API key",
3537
- ok: true,
3538
- detail: `${verification.key.name} (${verification.key.preview}) can upload to ${verification.projectId} with ${policy.mode}, ${policy.retentionDays}d retention`
3614
+ ok: access2,
3615
+ detail: access2 ? `${verification.key.name} (${verification.key.preview}) can run and upload to ${verification.projectId} with ${policy.mode}, ${policy.retentionDays}d retention` : `subscription ${entitlement.status}; open billing to start or resume Builder/Team access`
3539
3616
  };
3540
3617
  } catch (error) {
3541
3618
  return {
@@ -3545,6 +3622,29 @@ async function checkCloudCredentials(cloudUrl, projectId, apiKey) {
3545
3622
  };
3546
3623
  }
3547
3624
  }
3625
+ function checkProviderKey(provider) {
3626
+ if (!provider || provider === "mock") {
3627
+ return {
3628
+ name: "LLM provider key",
3629
+ ok: true,
3630
+ detail: "mock provider is available for install smoke tests"
3631
+ };
3632
+ }
3633
+ const envName = providerKeyEnv(provider);
3634
+ const ok = Boolean(envName && process.env[envName]?.trim());
3635
+ return {
3636
+ name: "LLM provider key",
3637
+ ok,
3638
+ detail: ok ? `${envName} is configured for real adaptive runs` : `set ${envName ?? "ROLEPLAY_LLM_API_KEY"} before running real adaptive tests, or use --provider mock for smoke tests`
3639
+ };
3640
+ }
3641
+ function providerKeyEnv(provider) {
3642
+ if (provider === "openai") return "ROLEPLAY_OPENAI_API_KEY";
3643
+ if (provider === "anthropic") return "ROLEPLAY_ANTHROPIC_API_KEY";
3644
+ if (provider === "google") return "ROLEPLAY_GOOGLE_API_KEY";
3645
+ if (provider === "openai-compatible") return "ROLEPLAY_LLM_API_KEY";
3646
+ return void 0;
3647
+ }
3548
3648
  function cloudHealthDetail(body, endpoint) {
3549
3649
  const service = body.service ?? "cloud workbench";
3550
3650
  const privacy = body.privacy;
@@ -3587,6 +3687,11 @@ var init_doctor = __esm({
3587
3687
  "api-key": Flags8.string({
3588
3688
  description: "cloud workbench API key for credential verification. Defaults to ROLEPLAY_API_KEY.",
3589
3689
  default: process.env.ROLEPLAY_API_KEY
3690
+ }),
3691
+ provider: Flags8.string({
3692
+ options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
3693
+ description: "LLM provider to check for real adaptive runs. Defaults to ROLEPLAY_LLM_PROVIDER or openai.",
3694
+ default: process.env.ROLEPLAY_LLM_PROVIDER ?? "openai"
3590
3695
  })
3591
3696
  };
3592
3697
  async run() {
@@ -3601,6 +3706,7 @@ var init_doctor = __esm({
3601
3706
  checks.push(await checkCloudHealth(flags["cloud-url"]));
3602
3707
  if (flags.project || flags["api-key"]) {
3603
3708
  checks.push(await checkCloudCredentials(flags["cloud-url"], flags.project, flags["api-key"]));
3709
+ checks.push(checkProviderKey(flags.provider));
3604
3710
  }
3605
3711
  }
3606
3712
  if (flags.json) {
@@ -3867,13 +3973,14 @@ var HelpCommand = class extends Command2 {
3867
3973
  command: Args6.string({ required: false })
3868
3974
  };
3869
3975
  async run() {
3870
- this.log(`${chalk8.cyan("roleplay.sh")} - Test your AI agent before your users do.
3976
+ this.log(`${chalk8.cyan("roleplay.sh")} - Included CLI for Builder and Team workspaces.
3871
3977
 
3872
3978
  Usage:
3873
3979
  roleplay init
3874
3980
  roleplay scenario:create <name>
3875
3981
  roleplay run <scenario>
3876
- roleplay run social-engineering-core --target <url> --provider openai
3982
+ roleplay run social-engineering-core --target mock --provider mock
3983
+ roleplay run social-engineering-core --target <url> --provider openai --project <projectId>
3877
3984
  roleplay report latest|<runId> [--out .roleplay/runs]
3878
3985
  roleplay replay latest|<runId> [--out .roleplay/runs]
3879
3986
  roleplay upload latest|all --project <projectId>
@@ -3881,6 +3988,7 @@ Usage:
3881
3988
  roleplay doctor
3882
3989
  roleplay mcp
3883
3990
 
3991
+ Use mock mode for install smoke tests. Use a project API key for real agent tests.
3884
3992
  Use --json on commands for machine-readable output.`);
3885
3993
  }
3886
3994
  };