@roleplay-sh/cli 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -2
- package/RELEASE.md +4 -2
- package/dist/cli.js +374 -266
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +8 -8
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -1024,11 +1024,11 @@ var init_init = __esm({
|
|
|
1024
1024
|
envExample = `# Optional agent credentials used by your own HTTP/CLI target.
|
|
1025
1025
|
AGENT_API_KEY=
|
|
1026
1026
|
|
|
1027
|
-
#
|
|
1028
|
-
ROLEPLAY_CLOUD_URL=
|
|
1029
|
-
ROLEPLAY_PROJECT_ID=
|
|
1027
|
+
# Workbench project settings. Create these after starting a Builder or Team trial.
|
|
1028
|
+
ROLEPLAY_CLOUD_URL=https://app.roleplay.sh
|
|
1029
|
+
ROLEPLAY_PROJECT_ID=
|
|
1030
1030
|
ROLEPLAY_API_KEY=
|
|
1031
|
-
ROLEPLAY_AGENT_NAME=
|
|
1031
|
+
ROLEPLAY_AGENT_NAME=
|
|
1032
1032
|
|
|
1033
1033
|
# Built-in social-engineering-core target. Set exactly one for CI.
|
|
1034
1034
|
ROLEPLAY_TARGET_URL=http://localhost:3000/agent
|
|
@@ -1074,8 +1074,10 @@ ROLEPLAY_LLM_BASE_URL=
|
|
|
1074
1074
|
this.log(`${chalk2.cyan("roleplay.sh")} initialized.`);
|
|
1075
1075
|
this.log(chalk2.gray("Created .roleplay/config.json, scenarios, and runs directory."));
|
|
1076
1076
|
this.log("\nNext steps:");
|
|
1077
|
-
this.log("
|
|
1078
|
-
this.log("
|
|
1077
|
+
this.log(" Start a 7-day Builder or Team trial: https://app.roleplay.sh/auth/create-workspace");
|
|
1078
|
+
this.log(" Add ROLEPLAY_PROJECT_ID, ROLEPLAY_API_KEY, and your LLM provider key to .env");
|
|
1079
|
+
this.log(" Smoke test install: roleplay run social-engineering-core --target mock --provider mock");
|
|
1080
|
+
this.log(" Real test: roleplay run social-engineering-core --target <agent-url> --provider openai");
|
|
1079
1081
|
}
|
|
1080
1082
|
};
|
|
1081
1083
|
}
|
|
@@ -2434,240 +2436,6 @@ var init_engine = __esm({
|
|
|
2434
2436
|
}
|
|
2435
2437
|
});
|
|
2436
2438
|
|
|
2437
|
-
// src/commands/run.ts
|
|
2438
|
-
var run_exports = {};
|
|
2439
|
-
__export(run_exports, {
|
|
2440
|
-
RunCommand: () => RunCommand
|
|
2441
|
-
});
|
|
2442
|
-
import { Args as Args2, Flags as Flags3 } from "@oclif/core";
|
|
2443
|
-
import { promises as fs6 } from "fs";
|
|
2444
|
-
import { tmpdir } from "os";
|
|
2445
|
-
import { join as join4 } from "path";
|
|
2446
|
-
function resolveProviderFlags(flags, fallback) {
|
|
2447
|
-
const sharedProvider = providerFrom(flags.provider ?? process.env.ROLEPLAY_LLM_PROVIDER, fallback);
|
|
2448
|
-
const attackerProvider = providerFrom(flags["attacker-provider"] ?? process.env.ROLEPLAY_ATTACKER_PROVIDER, sharedProvider);
|
|
2449
|
-
const judgeProvider = providerFrom(flags["judge-provider"] ?? process.env.ROLEPLAY_JUDGE_PROVIDER, sharedProvider);
|
|
2450
|
-
return {
|
|
2451
|
-
attackerProvider,
|
|
2452
|
-
judgeProvider,
|
|
2453
|
-
attackerModel: flags["attacker-model"] ?? process.env.ROLEPLAY_ATTACKER_MODEL ?? flags.model ?? process.env.ROLEPLAY_LLM_MODEL,
|
|
2454
|
-
judgeModel: flags["judge-model"] ?? process.env.ROLEPLAY_JUDGE_MODEL ?? flags.model ?? process.env.ROLEPLAY_LLM_MODEL,
|
|
2455
|
-
llmBaseUrl: flags["llm-base-url"] ?? process.env.ROLEPLAY_LLM_BASE_URL
|
|
2456
|
-
};
|
|
2457
|
-
}
|
|
2458
|
-
function providerFrom(value, fallback) {
|
|
2459
|
-
if (!value && !fallback) return void 0;
|
|
2460
|
-
return normalizeProvider(value, fallback ?? "mock");
|
|
2461
|
-
}
|
|
2462
|
-
function resultNameFromPath(path) {
|
|
2463
|
-
return path.replace(/^.*[\\/]/, "").replace(/\.ya?ml$/i, "");
|
|
2464
|
-
}
|
|
2465
|
-
function cloudAttackPackIdForScenario(scenarioName) {
|
|
2466
|
-
if (scenarioName.includes("authority-impersonation")) return "pack_authority";
|
|
2467
|
-
if (scenarioName.includes("urgency-pressure")) return "pack_urgency";
|
|
2468
|
-
if (scenarioName.includes("policy-bypass")) return "pack_policy";
|
|
2469
|
-
if (scenarioName.includes("indirect-prompt-injection")) return "pack_injection";
|
|
2470
|
-
if (scenarioName.includes("data-exfiltration")) return "pack_exfiltration";
|
|
2471
|
-
if (scenarioName.includes("tool-misuse")) return "pack_tools";
|
|
2472
|
-
if (scenarioName.includes("auth-session-confusion")) return "pack_auth_session";
|
|
2473
|
-
if (scenarioName.includes("memory-context-poisoning")) return "pack_memory_context";
|
|
2474
|
-
return void 0;
|
|
2475
|
-
}
|
|
2476
|
-
var socialEngineeringCorePack, RunCommand;
|
|
2477
|
-
var init_run = __esm({
|
|
2478
|
-
"src/commands/run.ts"() {
|
|
2479
|
-
"use strict";
|
|
2480
|
-
init_engine();
|
|
2481
|
-
init_scoring();
|
|
2482
|
-
init_reporter();
|
|
2483
|
-
init_output();
|
|
2484
|
-
init_fs();
|
|
2485
|
-
init_scenarios();
|
|
2486
|
-
init_errors();
|
|
2487
|
-
init_base();
|
|
2488
|
-
init_client();
|
|
2489
|
-
socialEngineeringCorePack = "social-engineering-core";
|
|
2490
|
-
RunCommand = class _RunCommand extends BaseCommand {
|
|
2491
|
-
static description = "Run a roleplay scenario or built-in attack pack.";
|
|
2492
|
-
static args = {
|
|
2493
|
-
scenario: Args2.string({ required: true })
|
|
2494
|
-
};
|
|
2495
|
-
static flags = {
|
|
2496
|
-
target: Flags3.string({
|
|
2497
|
-
description: 'HTTP target URL, or "mock" for local smoke tests. Defaults to ROLEPLAY_TARGET_URL.',
|
|
2498
|
-
default: process.env.ROLEPLAY_TARGET_URL
|
|
2499
|
-
}),
|
|
2500
|
-
"target-command": Flags3.string({
|
|
2501
|
-
description: "CLI target command for built-in attack packs. Defaults to ROLEPLAY_TARGET_COMMAND.",
|
|
2502
|
-
default: process.env.ROLEPLAY_TARGET_COMMAND
|
|
2503
|
-
}),
|
|
2504
|
-
"max-turns": Flags3.integer(),
|
|
2505
|
-
json: Flags3.boolean({ description: "Output JSON only." }),
|
|
2506
|
-
out: Flags3.string({ default: ".roleplay/runs" }),
|
|
2507
|
-
"fail-on": Flags3.string({ options: ["warning", "failed", "critical"], default: "failed" }),
|
|
2508
|
-
provider: Flags3.string({
|
|
2509
|
-
options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
|
|
2510
|
-
description: "Shared attacker and judge provider. Defaults to ROLEPLAY_LLM_PROVIDER, openai for real attack-pack targets, or mock for smoke tests.",
|
|
2511
|
-
default: process.env.ROLEPLAY_LLM_PROVIDER
|
|
2512
|
-
}),
|
|
2513
|
-
"attacker-provider": Flags3.string({
|
|
2514
|
-
options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
|
|
2515
|
-
description: "Provider for adaptive attacker turns. Defaults to ROLEPLAY_ATTACKER_PROVIDER or --provider.",
|
|
2516
|
-
default: process.env.ROLEPLAY_ATTACKER_PROVIDER
|
|
2517
|
-
}),
|
|
2518
|
-
"judge-provider": Flags3.string({
|
|
2519
|
-
options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
|
|
2520
|
-
description: "Provider for transcript judging. Defaults to ROLEPLAY_JUDGE_PROVIDER or --provider.",
|
|
2521
|
-
default: process.env.ROLEPLAY_JUDGE_PROVIDER
|
|
2522
|
-
}),
|
|
2523
|
-
model: Flags3.string({
|
|
2524
|
-
description: "Shared LLM model. Defaults to ROLEPLAY_LLM_MODEL or provider defaults.",
|
|
2525
|
-
default: process.env.ROLEPLAY_LLM_MODEL
|
|
2526
|
-
}),
|
|
2527
|
-
"attacker-model": Flags3.string({
|
|
2528
|
-
description: "Model for adaptive attacker turns. Defaults to ROLEPLAY_ATTACKER_MODEL or --model.",
|
|
2529
|
-
default: process.env.ROLEPLAY_ATTACKER_MODEL
|
|
2530
|
-
}),
|
|
2531
|
-
"judge-model": Flags3.string({
|
|
2532
|
-
description: "Model for transcript judging. Defaults to ROLEPLAY_JUDGE_MODEL, scenario judge.model, or --model.",
|
|
2533
|
-
default: process.env.ROLEPLAY_JUDGE_MODEL
|
|
2534
|
-
}),
|
|
2535
|
-
"llm-base-url": Flags3.string({
|
|
2536
|
-
description: "Base URL for openai-compatible providers. Defaults to ROLEPLAY_LLM_BASE_URL.",
|
|
2537
|
-
default: process.env.ROLEPLAY_LLM_BASE_URL
|
|
2538
|
-
}),
|
|
2539
|
-
yes: Flags3.boolean({ char: "y", description: "Allow local CLI target command execution." })
|
|
2540
|
-
};
|
|
2541
|
-
async run() {
|
|
2542
|
-
const { args, flags } = await this.parse(_RunCommand);
|
|
2543
|
-
if (args.scenario === socialEngineeringCorePack) {
|
|
2544
|
-
await this.runSocialEngineeringCore(flags);
|
|
2545
|
-
return;
|
|
2546
|
-
}
|
|
2547
|
-
if (flags.target || flags["target-command"]) {
|
|
2548
|
-
throw new AppError({
|
|
2549
|
-
code: "ATTACK_PACK_TARGET_UNSUPPORTED",
|
|
2550
|
-
message: "--target and --target-command are only supported when running social-engineering-core.",
|
|
2551
|
-
suggestion: "Use roleplay run social-engineering-core --target <url>, or pass a scenario path without target flags.",
|
|
2552
|
-
exitCode: 2
|
|
2553
|
-
});
|
|
2554
|
-
}
|
|
2555
|
-
const spinner = createSpinner("Running scenario", flags.json);
|
|
2556
|
-
const providers = resolveProviderFlags(flags);
|
|
2557
|
-
let result;
|
|
2558
|
-
try {
|
|
2559
|
-
result = await runScenario({
|
|
2560
|
-
scenarioRef: args.scenario,
|
|
2561
|
-
maxTurns: flags["max-turns"],
|
|
2562
|
-
outDir: flags.out,
|
|
2563
|
-
yes: flags.yes,
|
|
2564
|
-
...providers
|
|
2565
|
-
});
|
|
2566
|
-
spinner?.succeed("Scenario complete");
|
|
2567
|
-
} catch (error) {
|
|
2568
|
-
spinner?.fail("Scenario failed");
|
|
2569
|
-
throw error;
|
|
2570
|
-
}
|
|
2571
|
-
if (flags.json) {
|
|
2572
|
-
this.log(
|
|
2573
|
-
JSON.stringify({
|
|
2574
|
-
runId: result.runId,
|
|
2575
|
-
scenario: result.scenario.name,
|
|
2576
|
-
status: result.report.status,
|
|
2577
|
-
score: result.report.score,
|
|
2578
|
-
reportPath: result.paths.reportJsonPath,
|
|
2579
|
-
markdownPath: result.paths.reportMarkdownPath
|
|
2580
|
-
})
|
|
2581
|
-
);
|
|
2582
|
-
} else {
|
|
2583
|
-
this.log(
|
|
2584
|
-
terminalSummary({
|
|
2585
|
-
report: result.report,
|
|
2586
|
-
reportPath: result.paths.reportJsonPath,
|
|
2587
|
-
markdownPath: result.paths.reportMarkdownPath
|
|
2588
|
-
})
|
|
2589
|
-
);
|
|
2590
|
-
}
|
|
2591
|
-
if (shouldFail(result.report.status, result.report.failures, flags["fail-on"])) {
|
|
2592
|
-
process.exitCode = 1;
|
|
2593
|
-
}
|
|
2594
|
-
}
|
|
2595
|
-
async runSocialEngineeringCore(flags) {
|
|
2596
|
-
if (Boolean(flags.target) === Boolean(flags["target-command"])) {
|
|
2597
|
-
throw new AppError({
|
|
2598
|
-
code: "ATTACK_PACK_TARGET_REQUIRED",
|
|
2599
|
-
message: "Provide exactly one target for social-engineering-core.",
|
|
2600
|
-
suggestion: 'Use --target http://localhost:3000/agent, --target-command "node ./agent.js", ROLEPLAY_TARGET_URL, or ROLEPLAY_TARGET_COMMAND.',
|
|
2601
|
-
exitCode: 2
|
|
2602
|
-
});
|
|
2603
|
-
}
|
|
2604
|
-
const target = flags.target === "mock" ? { type: "mock" } : flags.target ? { type: "http", url: flags.target } : { type: "cli", command: flags["target-command"] };
|
|
2605
|
-
const scenarioDir = await fs6.mkdtemp(join4(tmpdir(), "roleplay-social-engineering-core-"));
|
|
2606
|
-
await ensureDir(scenarioDir);
|
|
2607
|
-
const spinner = createSpinner("Running social-engineering-core", flags.json);
|
|
2608
|
-
const providers = resolveProviderFlags(flags, target.type === "mock" ? "mock" : "openai");
|
|
2609
|
-
try {
|
|
2610
|
-
const files = [];
|
|
2611
|
-
for (const content of attackPackTemplates(target)) {
|
|
2612
|
-
const name = content.match(/^name:\s*(.+)$/m)?.[1] ?? `social-engineering-${files.length + 1}`;
|
|
2613
|
-
const path = join4(scenarioDir, `${name}.yml`);
|
|
2614
|
-
await fs6.writeFile(path, content, "utf8");
|
|
2615
|
-
files.push(path);
|
|
2616
|
-
}
|
|
2617
|
-
const results = [];
|
|
2618
|
-
for (const file of files) {
|
|
2619
|
-
const result = await runScenario({
|
|
2620
|
-
scenarioRef: file,
|
|
2621
|
-
maxTurns: flags["max-turns"],
|
|
2622
|
-
outDir: flags.out,
|
|
2623
|
-
yes: flags.yes,
|
|
2624
|
-
...providers,
|
|
2625
|
-
metadata: {
|
|
2626
|
-
attackPackId: cloudAttackPackIdForScenario(resultNameFromPath(file)),
|
|
2627
|
-
attackPackScenario: resultNameFromPath(file)
|
|
2628
|
-
}
|
|
2629
|
-
});
|
|
2630
|
-
results.push({
|
|
2631
|
-
runId: result.runId,
|
|
2632
|
-
scenario: result.scenario.name,
|
|
2633
|
-
status: result.report.status,
|
|
2634
|
-
score: result.report.score,
|
|
2635
|
-
failures: result.report.failures,
|
|
2636
|
-
reportPath: result.paths.reportJsonPath,
|
|
2637
|
-
markdownPath: result.paths.reportMarkdownPath
|
|
2638
|
-
});
|
|
2639
|
-
}
|
|
2640
|
-
spinner?.succeed("Attack pack complete");
|
|
2641
|
-
const failed = results.filter(
|
|
2642
|
-
(result) => shouldFail(result.status, result.failures, flags["fail-on"])
|
|
2643
|
-
);
|
|
2644
|
-
if (flags.json) {
|
|
2645
|
-
this.log(
|
|
2646
|
-
JSON.stringify({
|
|
2647
|
-
pack: socialEngineeringCorePack,
|
|
2648
|
-
target: target.type,
|
|
2649
|
-
total: results.length,
|
|
2650
|
-
failed: failed.length,
|
|
2651
|
-
results
|
|
2652
|
-
})
|
|
2653
|
-
);
|
|
2654
|
-
} else {
|
|
2655
|
-
this.log(
|
|
2656
|
-
results.map((result) => `${result.status.toUpperCase()} ${result.score}/100 ${result.scenario} ${result.runId}`).join("\n")
|
|
2657
|
-
);
|
|
2658
|
-
}
|
|
2659
|
-
if (failed.length) process.exitCode = 1;
|
|
2660
|
-
} catch (error) {
|
|
2661
|
-
spinner?.fail("Attack pack failed");
|
|
2662
|
-
throw error;
|
|
2663
|
-
} finally {
|
|
2664
|
-
await fs6.rm(scenarioDir, { recursive: true, force: true });
|
|
2665
|
-
}
|
|
2666
|
-
}
|
|
2667
|
-
};
|
|
2668
|
-
}
|
|
2669
|
-
});
|
|
2670
|
-
|
|
2671
2439
|
// src/schemas/report.schema.ts
|
|
2672
2440
|
import { z as z4 } from "zod";
|
|
2673
2441
|
var requiredString, criterionResultSchema, failureSchema2, reportSchema;
|
|
@@ -2910,8 +2678,8 @@ var init_cloud_upload_schema = __esm({
|
|
|
2910
2678
|
});
|
|
2911
2679
|
|
|
2912
2680
|
// src/cloud/upload-client.ts
|
|
2913
|
-
import { promises as
|
|
2914
|
-
import { join as
|
|
2681
|
+
import { promises as fs6 } from "fs";
|
|
2682
|
+
import { join as join4 } from "path";
|
|
2915
2683
|
function requireUploadApiKey(apiKey) {
|
|
2916
2684
|
const normalized = apiKey?.trim();
|
|
2917
2685
|
if (normalized) return normalized;
|
|
@@ -2932,12 +2700,42 @@ function requireUploadProjectId(projectId) {
|
|
|
2932
2700
|
exitCode: 1
|
|
2933
2701
|
});
|
|
2934
2702
|
}
|
|
2703
|
+
function requireRunApiKey(apiKey) {
|
|
2704
|
+
const normalized = apiKey?.trim();
|
|
2705
|
+
if (normalized) return normalized;
|
|
2706
|
+
throw new AppError({
|
|
2707
|
+
code: "WORKBENCH_API_KEY_REQUIRED",
|
|
2708
|
+
message: "A Builder or Team trial is required to run real agent tests.",
|
|
2709
|
+
suggestion: "Start a 7-day trial at https://app.roleplay.sh/auth/create-workspace, then set ROLEPLAY_PROJECT_ID and ROLEPLAY_API_KEY.",
|
|
2710
|
+
exitCode: 1
|
|
2711
|
+
});
|
|
2712
|
+
}
|
|
2713
|
+
function requireRunProjectId(projectId) {
|
|
2714
|
+
const normalized = projectId?.trim();
|
|
2715
|
+
if (normalized) return normalized;
|
|
2716
|
+
throw new AppError({
|
|
2717
|
+
code: "WORKBENCH_PROJECT_REQUIRED",
|
|
2718
|
+
message: "A Builder or Team trial is required to run real agent tests.",
|
|
2719
|
+
suggestion: "Start a 7-day trial at https://app.roleplay.sh/auth/create-workspace, then set ROLEPLAY_PROJECT_ID and ROLEPLAY_API_KEY.",
|
|
2720
|
+
exitCode: 1
|
|
2721
|
+
});
|
|
2722
|
+
}
|
|
2723
|
+
async function assertRunEntitlement(input) {
|
|
2724
|
+
const verification = await verifyCloudCredentials(input);
|
|
2725
|
+
if (verification.entitlement.canRun) return verification;
|
|
2726
|
+
throw inactiveSubscriptionError();
|
|
2727
|
+
}
|
|
2728
|
+
async function assertUploadEntitlement(input) {
|
|
2729
|
+
const verification = await verifyCloudCredentials(input);
|
|
2730
|
+
if (verification.entitlement.canUpload) return verification;
|
|
2731
|
+
throw inactiveSubscriptionError();
|
|
2732
|
+
}
|
|
2935
2733
|
async function buildUploadPayload(input) {
|
|
2936
2734
|
const runDir = await resolveRunDir(input.run, input.runsDir);
|
|
2937
|
-
const reportPath =
|
|
2938
|
-
const transcriptPath =
|
|
2939
|
-
const scenarioPath =
|
|
2940
|
-
const metadataPath =
|
|
2735
|
+
const reportPath = join4(runDir, "report.json");
|
|
2736
|
+
const transcriptPath = join4(runDir, "transcript.json");
|
|
2737
|
+
const scenarioPath = join4(runDir, "scenario.yml");
|
|
2738
|
+
const metadataPath = join4(runDir, "metadata.json");
|
|
2941
2739
|
const includeFullEvidence = input.mode === "full_transcript_opt_in";
|
|
2942
2740
|
const reportArtifact = await readJsonArtifact(reportPath);
|
|
2943
2741
|
const report = reportSchema.parse(reportArtifact);
|
|
@@ -3070,7 +2868,7 @@ function parseCredentialVerification(body) {
|
|
|
3070
2868
|
const candidate = body;
|
|
3071
2869
|
const key = candidate?.key;
|
|
3072
2870
|
const policy = candidate?.uploadPolicy;
|
|
3073
|
-
if (candidate && typeof candidate === "object" && typeof candidate.projectId === "string" && candidate.authenticated === true && key && typeof key === "object" && typeof key.id === "string" && typeof key.name === "string" && typeof key.preview === "string" && typeof key.createdAt === "string" && policy && typeof policy === "object" && (policy.mode === "sanitized_findings" || policy.mode === "full_transcript_opt_in") && typeof policy.transcriptUpload === "boolean" && typeof policy.redactedSnippets === "boolean" && typeof policy.secretRedaction === "boolean" && Number.isInteger(policy.retentionDays) && policy.retentionDays > 0) {
|
|
2871
|
+
if (candidate && typeof candidate === "object" && typeof candidate.projectId === "string" && candidate.authenticated === true && key && typeof key === "object" && typeof key.id === "string" && typeof key.name === "string" && typeof key.preview === "string" && typeof key.createdAt === "string" && policy && typeof policy === "object" && candidate.entitlement && typeof candidate.entitlement === "object" && (candidate.entitlement.plan === "builder" || candidate.entitlement.plan === "team") && ["trialing", "active", "past_due", "canceled"].includes(String(candidate.entitlement.status)) && typeof candidate.entitlement.canRun === "boolean" && typeof candidate.entitlement.canUpload === "boolean" && (policy.mode === "sanitized_findings" || policy.mode === "full_transcript_opt_in") && typeof policy.transcriptUpload === "boolean" && typeof policy.redactedSnippets === "boolean" && typeof policy.secretRedaction === "boolean" && Number.isInteger(policy.retentionDays) && policy.retentionDays > 0) {
|
|
3074
2872
|
return candidate;
|
|
3075
2873
|
}
|
|
3076
2874
|
throw new AppError({
|
|
@@ -3080,6 +2878,14 @@ function parseCredentialVerification(body) {
|
|
|
3080
2878
|
exitCode: 1
|
|
3081
2879
|
});
|
|
3082
2880
|
}
|
|
2881
|
+
function inactiveSubscriptionError() {
|
|
2882
|
+
return new AppError({
|
|
2883
|
+
code: "WORKBENCH_SUBSCRIPTION_INACTIVE",
|
|
2884
|
+
message: "Your workspace subscription is not active.",
|
|
2885
|
+
suggestion: "Open billing to start or resume Builder/Team access: https://app.roleplay.sh/billing",
|
|
2886
|
+
exitCode: 1
|
|
2887
|
+
});
|
|
2888
|
+
}
|
|
3083
2889
|
function assertUploadResponseMatchesPayload(response, payload) {
|
|
3084
2890
|
if (response.projectId === payload.projectId && response.runId === payload.run.report.runId && response.mode === payload.mode) {
|
|
3085
2891
|
return;
|
|
@@ -3112,14 +2918,14 @@ function isRelativeCloudPath(value) {
|
|
|
3112
2918
|
return value.startsWith("/") && !value.startsWith("//");
|
|
3113
2919
|
}
|
|
3114
2920
|
async function readJsonArtifact(path) {
|
|
3115
|
-
const contents = await
|
|
2921
|
+
const contents = await fs6.readFile(path, "utf8");
|
|
3116
2922
|
return JSON.parse(contents.replace(/^\uFEFF/, ""));
|
|
3117
2923
|
}
|
|
3118
2924
|
async function readOptionalJsonArtifact(path) {
|
|
3119
2925
|
return pathExists(path).then((exists) => exists ? readJsonArtifact(path) : void 0);
|
|
3120
2926
|
}
|
|
3121
2927
|
async function readOptionalTextArtifact(path) {
|
|
3122
|
-
return pathExists(path).then((exists) => exists ?
|
|
2928
|
+
return pathExists(path).then((exists) => exists ? fs6.readFile(path, "utf8") : void 0);
|
|
3123
2929
|
}
|
|
3124
2930
|
async function readRequiredTranscriptArtifact(path) {
|
|
3125
2931
|
if (await pathExists(path)) return readJsonArtifact(path);
|
|
@@ -3143,10 +2949,280 @@ var init_upload_client = __esm({
|
|
|
3143
2949
|
}
|
|
3144
2950
|
});
|
|
3145
2951
|
|
|
3146
|
-
// src/commands/
|
|
3147
|
-
var
|
|
3148
|
-
__export(
|
|
3149
|
-
|
|
2952
|
+
// src/commands/run.ts
|
|
2953
|
+
var run_exports = {};
|
|
2954
|
+
__export(run_exports, {
|
|
2955
|
+
RunCommand: () => RunCommand
|
|
2956
|
+
});
|
|
2957
|
+
import { Args as Args2, Flags as Flags3 } from "@oclif/core";
|
|
2958
|
+
import { promises as fs7 } from "fs";
|
|
2959
|
+
import { tmpdir } from "os";
|
|
2960
|
+
import { join as join5 } from "path";
|
|
2961
|
+
function resolveProviderFlags(flags, fallback) {
|
|
2962
|
+
const sharedProvider = providerFrom(flags.provider ?? process.env.ROLEPLAY_LLM_PROVIDER, fallback);
|
|
2963
|
+
const attackerProvider = providerFrom(flags["attacker-provider"] ?? process.env.ROLEPLAY_ATTACKER_PROVIDER, sharedProvider);
|
|
2964
|
+
const judgeProvider = providerFrom(flags["judge-provider"] ?? process.env.ROLEPLAY_JUDGE_PROVIDER, sharedProvider);
|
|
2965
|
+
return {
|
|
2966
|
+
attackerProvider,
|
|
2967
|
+
judgeProvider,
|
|
2968
|
+
attackerModel: flags["attacker-model"] ?? process.env.ROLEPLAY_ATTACKER_MODEL ?? flags.model ?? process.env.ROLEPLAY_LLM_MODEL,
|
|
2969
|
+
judgeModel: flags["judge-model"] ?? process.env.ROLEPLAY_JUDGE_MODEL ?? flags.model ?? process.env.ROLEPLAY_LLM_MODEL,
|
|
2970
|
+
llmBaseUrl: flags["llm-base-url"] ?? process.env.ROLEPLAY_LLM_BASE_URL
|
|
2971
|
+
};
|
|
2972
|
+
}
|
|
2973
|
+
function providerFrom(value, fallback) {
|
|
2974
|
+
if (!value && !fallback) return void 0;
|
|
2975
|
+
return normalizeProvider(value, fallback ?? "mock");
|
|
2976
|
+
}
|
|
2977
|
+
function scenarioRequiresRunEntitlement(scenario, providers) {
|
|
2978
|
+
return scenario.target.type !== "mock" || scenario.attacker?.provider !== void 0 && scenario.attacker.provider !== "mock" || scenario.judge.type !== "mock" || providersContainRealProvider(providers);
|
|
2979
|
+
}
|
|
2980
|
+
function providersContainRealProvider(providers) {
|
|
2981
|
+
return [providers.attackerProvider, providers.judgeProvider].some((provider) => provider !== void 0 && provider !== "mock");
|
|
2982
|
+
}
|
|
2983
|
+
function resultNameFromPath(path) {
|
|
2984
|
+
return path.replace(/^.*[\\/]/, "").replace(/\.ya?ml$/i, "");
|
|
2985
|
+
}
|
|
2986
|
+
function cloudAttackPackIdForScenario(scenarioName) {
|
|
2987
|
+
if (scenarioName.includes("authority-impersonation")) return "pack_authority";
|
|
2988
|
+
if (scenarioName.includes("urgency-pressure")) return "pack_urgency";
|
|
2989
|
+
if (scenarioName.includes("policy-bypass")) return "pack_policy";
|
|
2990
|
+
if (scenarioName.includes("indirect-prompt-injection")) return "pack_injection";
|
|
2991
|
+
if (scenarioName.includes("data-exfiltration")) return "pack_exfiltration";
|
|
2992
|
+
if (scenarioName.includes("tool-misuse")) return "pack_tools";
|
|
2993
|
+
if (scenarioName.includes("auth-session-confusion")) return "pack_auth_session";
|
|
2994
|
+
if (scenarioName.includes("memory-context-poisoning")) return "pack_memory_context";
|
|
2995
|
+
return void 0;
|
|
2996
|
+
}
|
|
2997
|
+
var socialEngineeringCorePack, RunCommand;
|
|
2998
|
+
var init_run = __esm({
|
|
2999
|
+
"src/commands/run.ts"() {
|
|
3000
|
+
"use strict";
|
|
3001
|
+
init_engine();
|
|
3002
|
+
init_run_store();
|
|
3003
|
+
init_scenario_schema();
|
|
3004
|
+
init_scoring();
|
|
3005
|
+
init_reporter();
|
|
3006
|
+
init_output();
|
|
3007
|
+
init_fs();
|
|
3008
|
+
init_scenarios();
|
|
3009
|
+
init_errors();
|
|
3010
|
+
init_base();
|
|
3011
|
+
init_client();
|
|
3012
|
+
init_upload_client();
|
|
3013
|
+
socialEngineeringCorePack = "social-engineering-core";
|
|
3014
|
+
RunCommand = class _RunCommand extends BaseCommand {
|
|
3015
|
+
static description = "Run a roleplay scenario or built-in attack pack.";
|
|
3016
|
+
static args = {
|
|
3017
|
+
scenario: Args2.string({ required: true })
|
|
3018
|
+
};
|
|
3019
|
+
static flags = {
|
|
3020
|
+
target: Flags3.string({
|
|
3021
|
+
description: 'HTTP target URL, or "mock" for local smoke tests. Defaults to ROLEPLAY_TARGET_URL.',
|
|
3022
|
+
default: process.env.ROLEPLAY_TARGET_URL
|
|
3023
|
+
}),
|
|
3024
|
+
"target-command": Flags3.string({
|
|
3025
|
+
description: "CLI target command for built-in attack packs. Defaults to ROLEPLAY_TARGET_COMMAND.",
|
|
3026
|
+
default: process.env.ROLEPLAY_TARGET_COMMAND
|
|
3027
|
+
}),
|
|
3028
|
+
"max-turns": Flags3.integer(),
|
|
3029
|
+
json: Flags3.boolean({ description: "Output JSON only." }),
|
|
3030
|
+
out: Flags3.string({ default: ".roleplay/runs" }),
|
|
3031
|
+
"fail-on": Flags3.string({ options: ["warning", "failed", "critical"], default: "failed" }),
|
|
3032
|
+
provider: Flags3.string({
|
|
3033
|
+
options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
|
|
3034
|
+
description: "Shared attacker and judge provider. Defaults to ROLEPLAY_LLM_PROVIDER, openai for real attack-pack targets, or mock for smoke tests.",
|
|
3035
|
+
default: process.env.ROLEPLAY_LLM_PROVIDER
|
|
3036
|
+
}),
|
|
3037
|
+
"attacker-provider": Flags3.string({
|
|
3038
|
+
options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
|
|
3039
|
+
description: "Provider for adaptive attacker turns. Defaults to ROLEPLAY_ATTACKER_PROVIDER or --provider.",
|
|
3040
|
+
default: process.env.ROLEPLAY_ATTACKER_PROVIDER
|
|
3041
|
+
}),
|
|
3042
|
+
"judge-provider": Flags3.string({
|
|
3043
|
+
options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
|
|
3044
|
+
description: "Provider for transcript judging. Defaults to ROLEPLAY_JUDGE_PROVIDER or --provider.",
|
|
3045
|
+
default: process.env.ROLEPLAY_JUDGE_PROVIDER
|
|
3046
|
+
}),
|
|
3047
|
+
model: Flags3.string({
|
|
3048
|
+
description: "Shared LLM model. Defaults to ROLEPLAY_LLM_MODEL or provider defaults.",
|
|
3049
|
+
default: process.env.ROLEPLAY_LLM_MODEL
|
|
3050
|
+
}),
|
|
3051
|
+
"attacker-model": Flags3.string({
|
|
3052
|
+
description: "Model for adaptive attacker turns. Defaults to ROLEPLAY_ATTACKER_MODEL or --model.",
|
|
3053
|
+
default: process.env.ROLEPLAY_ATTACKER_MODEL
|
|
3054
|
+
}),
|
|
3055
|
+
"judge-model": Flags3.string({
|
|
3056
|
+
description: "Model for transcript judging. Defaults to ROLEPLAY_JUDGE_MODEL, scenario judge.model, or --model.",
|
|
3057
|
+
default: process.env.ROLEPLAY_JUDGE_MODEL
|
|
3058
|
+
}),
|
|
3059
|
+
"llm-base-url": Flags3.string({
|
|
3060
|
+
description: "Base URL for openai-compatible providers. Defaults to ROLEPLAY_LLM_BASE_URL.",
|
|
3061
|
+
default: process.env.ROLEPLAY_LLM_BASE_URL
|
|
3062
|
+
}),
|
|
3063
|
+
endpoint: Flags3.string({
|
|
3064
|
+
description: "cloud workbench URL for real-run entitlement checks. Defaults to ROLEPLAY_CLOUD_URL.",
|
|
3065
|
+
default: process.env.ROLEPLAY_CLOUD_URL ?? "http://127.0.0.1:3000"
|
|
3066
|
+
}),
|
|
3067
|
+
project: Flags3.string({
|
|
3068
|
+
description: "cloud workbench project ID for real agent tests. Defaults to ROLEPLAY_PROJECT_ID.",
|
|
3069
|
+
default: process.env.ROLEPLAY_PROJECT_ID
|
|
3070
|
+
}),
|
|
3071
|
+
"api-key": Flags3.string({
|
|
3072
|
+
description: "cloud workbench API key for real agent tests. Defaults to ROLEPLAY_API_KEY.",
|
|
3073
|
+
default: process.env.ROLEPLAY_API_KEY
|
|
3074
|
+
}),
|
|
3075
|
+
yes: Flags3.boolean({ char: "y", description: "Allow local CLI target command execution." })
|
|
3076
|
+
};
|
|
3077
|
+
async run() {
|
|
3078
|
+
const { args, flags } = await this.parse(_RunCommand);
|
|
3079
|
+
if (args.scenario === socialEngineeringCorePack) {
|
|
3080
|
+
await this.runSocialEngineeringCore(flags);
|
|
3081
|
+
return;
|
|
3082
|
+
}
|
|
3083
|
+
if (flags.target || flags["target-command"]) {
|
|
3084
|
+
throw new AppError({
|
|
3085
|
+
code: "ATTACK_PACK_TARGET_UNSUPPORTED",
|
|
3086
|
+
message: "--target and --target-command are only supported when running social-engineering-core.",
|
|
3087
|
+
suggestion: "Use roleplay run social-engineering-core --target <url>, or pass a scenario path without target flags.",
|
|
3088
|
+
exitCode: 2
|
|
3089
|
+
});
|
|
3090
|
+
}
|
|
3091
|
+
const providers = resolveProviderFlags(flags);
|
|
3092
|
+
const scenario = await loadScenarioFile(await resolveScenarioPath(args.scenario));
|
|
3093
|
+
if (scenarioRequiresRunEntitlement(scenario, providers)) {
|
|
3094
|
+
await assertRunEntitlement({
|
|
3095
|
+
endpoint: flags.endpoint,
|
|
3096
|
+
projectId: requireRunProjectId(flags.project),
|
|
3097
|
+
apiKey: requireRunApiKey(flags["api-key"])
|
|
3098
|
+
});
|
|
3099
|
+
}
|
|
3100
|
+
const spinner = createSpinner("Running scenario", flags.json);
|
|
3101
|
+
let result;
|
|
3102
|
+
try {
|
|
3103
|
+
result = await runScenario({
|
|
3104
|
+
scenarioRef: args.scenario,
|
|
3105
|
+
maxTurns: flags["max-turns"],
|
|
3106
|
+
outDir: flags.out,
|
|
3107
|
+
yes: flags.yes,
|
|
3108
|
+
...providers
|
|
3109
|
+
});
|
|
3110
|
+
spinner?.succeed("Scenario complete");
|
|
3111
|
+
} catch (error) {
|
|
3112
|
+
spinner?.fail("Scenario failed");
|
|
3113
|
+
throw error;
|
|
3114
|
+
}
|
|
3115
|
+
if (flags.json) {
|
|
3116
|
+
this.log(
|
|
3117
|
+
JSON.stringify({
|
|
3118
|
+
runId: result.runId,
|
|
3119
|
+
scenario: result.scenario.name,
|
|
3120
|
+
status: result.report.status,
|
|
3121
|
+
score: result.report.score,
|
|
3122
|
+
reportPath: result.paths.reportJsonPath,
|
|
3123
|
+
markdownPath: result.paths.reportMarkdownPath
|
|
3124
|
+
})
|
|
3125
|
+
);
|
|
3126
|
+
} else {
|
|
3127
|
+
this.log(
|
|
3128
|
+
terminalSummary({
|
|
3129
|
+
report: result.report,
|
|
3130
|
+
reportPath: result.paths.reportJsonPath,
|
|
3131
|
+
markdownPath: result.paths.reportMarkdownPath
|
|
3132
|
+
})
|
|
3133
|
+
);
|
|
3134
|
+
}
|
|
3135
|
+
if (shouldFail(result.report.status, result.report.failures, flags["fail-on"])) {
|
|
3136
|
+
process.exitCode = 1;
|
|
3137
|
+
}
|
|
3138
|
+
}
|
|
3139
|
+
async runSocialEngineeringCore(flags) {
|
|
3140
|
+
if (Boolean(flags.target) === Boolean(flags["target-command"])) {
|
|
3141
|
+
throw new AppError({
|
|
3142
|
+
code: "ATTACK_PACK_TARGET_REQUIRED",
|
|
3143
|
+
message: "Provide exactly one target for social-engineering-core.",
|
|
3144
|
+
suggestion: 'Use --target http://localhost:3000/agent, --target-command "node ./agent.js", ROLEPLAY_TARGET_URL, or ROLEPLAY_TARGET_COMMAND.',
|
|
3145
|
+
exitCode: 2
|
|
3146
|
+
});
|
|
3147
|
+
}
|
|
3148
|
+
const target = flags.target === "mock" ? { type: "mock" } : flags.target ? { type: "http", url: flags.target } : { type: "cli", command: flags["target-command"] };
|
|
3149
|
+
const scenarioDir = await fs7.mkdtemp(join5(tmpdir(), "roleplay-social-engineering-core-"));
|
|
3150
|
+
await ensureDir(scenarioDir);
|
|
3151
|
+
const providers = resolveProviderFlags(flags, target.type === "mock" ? "mock" : "openai");
|
|
3152
|
+
if (target.type !== "mock" || providersContainRealProvider(providers)) {
|
|
3153
|
+
await assertRunEntitlement({
|
|
3154
|
+
endpoint: flags.endpoint,
|
|
3155
|
+
projectId: requireRunProjectId(flags.project),
|
|
3156
|
+
apiKey: requireRunApiKey(flags["api-key"])
|
|
3157
|
+
});
|
|
3158
|
+
}
|
|
3159
|
+
const spinner = createSpinner("Running social-engineering-core", flags.json);
|
|
3160
|
+
try {
|
|
3161
|
+
const files = [];
|
|
3162
|
+
for (const content of attackPackTemplates(target)) {
|
|
3163
|
+
const name = content.match(/^name:\s*(.+)$/m)?.[1] ?? `social-engineering-${files.length + 1}`;
|
|
3164
|
+
const path = join5(scenarioDir, `${name}.yml`);
|
|
3165
|
+
await fs7.writeFile(path, content, "utf8");
|
|
3166
|
+
files.push(path);
|
|
3167
|
+
}
|
|
3168
|
+
const results = [];
|
|
3169
|
+
for (const file of files) {
|
|
3170
|
+
const result = await runScenario({
|
|
3171
|
+
scenarioRef: file,
|
|
3172
|
+
maxTurns: flags["max-turns"],
|
|
3173
|
+
outDir: flags.out,
|
|
3174
|
+
yes: flags.yes,
|
|
3175
|
+
...providers,
|
|
3176
|
+
metadata: {
|
|
3177
|
+
attackPackId: cloudAttackPackIdForScenario(resultNameFromPath(file)),
|
|
3178
|
+
attackPackScenario: resultNameFromPath(file)
|
|
3179
|
+
}
|
|
3180
|
+
});
|
|
3181
|
+
results.push({
|
|
3182
|
+
runId: result.runId,
|
|
3183
|
+
scenario: result.scenario.name,
|
|
3184
|
+
status: result.report.status,
|
|
3185
|
+
score: result.report.score,
|
|
3186
|
+
failures: result.report.failures,
|
|
3187
|
+
reportPath: result.paths.reportJsonPath,
|
|
3188
|
+
markdownPath: result.paths.reportMarkdownPath
|
|
3189
|
+
});
|
|
3190
|
+
}
|
|
3191
|
+
spinner?.succeed("Attack pack complete");
|
|
3192
|
+
const failed = results.filter(
|
|
3193
|
+
(result) => shouldFail(result.status, result.failures, flags["fail-on"])
|
|
3194
|
+
);
|
|
3195
|
+
if (flags.json) {
|
|
3196
|
+
this.log(
|
|
3197
|
+
JSON.stringify({
|
|
3198
|
+
pack: socialEngineeringCorePack,
|
|
3199
|
+
target: target.type,
|
|
3200
|
+
total: results.length,
|
|
3201
|
+
failed: failed.length,
|
|
3202
|
+
results
|
|
3203
|
+
})
|
|
3204
|
+
);
|
|
3205
|
+
} else {
|
|
3206
|
+
this.log(
|
|
3207
|
+
results.map((result) => `${result.status.toUpperCase()} ${result.score}/100 ${result.scenario} ${result.runId}`).join("\n")
|
|
3208
|
+
);
|
|
3209
|
+
}
|
|
3210
|
+
if (failed.length) process.exitCode = 1;
|
|
3211
|
+
} catch (error) {
|
|
3212
|
+
spinner?.fail("Attack pack failed");
|
|
3213
|
+
throw error;
|
|
3214
|
+
} finally {
|
|
3215
|
+
await fs7.rm(scenarioDir, { recursive: true, force: true });
|
|
3216
|
+
}
|
|
3217
|
+
}
|
|
3218
|
+
};
|
|
3219
|
+
}
|
|
3220
|
+
});
|
|
3221
|
+
|
|
3222
|
+
// src/commands/upload.ts
|
|
3223
|
+
var upload_exports = {};
|
|
3224
|
+
__export(upload_exports, {
|
|
3225
|
+
UploadCommand: () => UploadCommand
|
|
3150
3226
|
});
|
|
3151
3227
|
import { Args as Args3, Flags as Flags4 } from "@oclif/core";
|
|
3152
3228
|
import chalk4 from "chalk";
|
|
@@ -3180,12 +3256,7 @@ async function selectedUploadRunIds(run, runsDir) {
|
|
|
3180
3256
|
}
|
|
3181
3257
|
async function assertUploadPolicyAllowsMode(input) {
|
|
3182
3258
|
if (input.mode !== "full_transcript_opt_in") return;
|
|
3183
|
-
|
|
3184
|
-
endpoint: input.endpoint,
|
|
3185
|
-
projectId: input.projectId,
|
|
3186
|
-
apiKey: input.apiKey
|
|
3187
|
-
});
|
|
3188
|
-
if (verification.uploadPolicy.mode === "full_transcript_opt_in" && verification.uploadPolicy.transcriptUpload) {
|
|
3259
|
+
if (input.verification.uploadPolicy.mode === "full_transcript_opt_in" && input.verification.uploadPolicy.transcriptUpload) {
|
|
3189
3260
|
return;
|
|
3190
3261
|
}
|
|
3191
3262
|
throw new AppError({
|
|
@@ -3261,11 +3332,15 @@ var init_upload = __esm({
|
|
|
3261
3332
|
);
|
|
3262
3333
|
try {
|
|
3263
3334
|
const runIds = await selectedUploadRunIds(args.run, flags.out);
|
|
3264
|
-
await
|
|
3335
|
+
const verification = await assertUploadEntitlement({
|
|
3265
3336
|
endpoint: flags.endpoint,
|
|
3266
3337
|
projectId,
|
|
3267
|
-
apiKey
|
|
3268
|
-
|
|
3338
|
+
apiKey
|
|
3339
|
+
});
|
|
3340
|
+
await assertUploadPolicyAllowsMode({
|
|
3341
|
+
projectId,
|
|
3342
|
+
mode,
|
|
3343
|
+
verification
|
|
3269
3344
|
});
|
|
3270
3345
|
if (args.run === "all") {
|
|
3271
3346
|
const uploads = [];
|
|
@@ -3532,10 +3607,12 @@ async function checkCloudCredentials(cloudUrl, projectId, apiKey) {
|
|
|
3532
3607
|
apiKey: normalizedApiKey
|
|
3533
3608
|
});
|
|
3534
3609
|
const policy = verification.uploadPolicy;
|
|
3610
|
+
const entitlement = verification.entitlement;
|
|
3611
|
+
const access2 = entitlement.canRun && entitlement.canUpload;
|
|
3535
3612
|
return {
|
|
3536
3613
|
name: "cloud workbench API key",
|
|
3537
|
-
ok:
|
|
3538
|
-
detail: `${verification.key.name} (${verification.key.preview}) can upload to ${verification.projectId} with ${policy.mode}, ${policy.retentionDays}d retention`
|
|
3614
|
+
ok: access2,
|
|
3615
|
+
detail: access2 ? `${verification.key.name} (${verification.key.preview}) can run and upload to ${verification.projectId} with ${policy.mode}, ${policy.retentionDays}d retention` : `subscription ${entitlement.status}; open billing to start or resume Builder/Team access`
|
|
3539
3616
|
};
|
|
3540
3617
|
} catch (error) {
|
|
3541
3618
|
return {
|
|
@@ -3545,6 +3622,29 @@ async function checkCloudCredentials(cloudUrl, projectId, apiKey) {
|
|
|
3545
3622
|
};
|
|
3546
3623
|
}
|
|
3547
3624
|
}
|
|
3625
|
+
function checkProviderKey(provider) {
|
|
3626
|
+
if (!provider || provider === "mock") {
|
|
3627
|
+
return {
|
|
3628
|
+
name: "LLM provider key",
|
|
3629
|
+
ok: true,
|
|
3630
|
+
detail: "mock provider is available for install smoke tests"
|
|
3631
|
+
};
|
|
3632
|
+
}
|
|
3633
|
+
const envName = providerKeyEnv(provider);
|
|
3634
|
+
const ok = Boolean(envName && process.env[envName]?.trim());
|
|
3635
|
+
return {
|
|
3636
|
+
name: "LLM provider key",
|
|
3637
|
+
ok,
|
|
3638
|
+
detail: ok ? `${envName} is configured for real adaptive runs` : `set ${envName ?? "ROLEPLAY_LLM_API_KEY"} before running real adaptive tests, or use --provider mock for smoke tests`
|
|
3639
|
+
};
|
|
3640
|
+
}
|
|
3641
|
+
function providerKeyEnv(provider) {
|
|
3642
|
+
if (provider === "openai") return "ROLEPLAY_OPENAI_API_KEY";
|
|
3643
|
+
if (provider === "anthropic") return "ROLEPLAY_ANTHROPIC_API_KEY";
|
|
3644
|
+
if (provider === "google") return "ROLEPLAY_GOOGLE_API_KEY";
|
|
3645
|
+
if (provider === "openai-compatible") return "ROLEPLAY_LLM_API_KEY";
|
|
3646
|
+
return void 0;
|
|
3647
|
+
}
|
|
3548
3648
|
function cloudHealthDetail(body, endpoint) {
|
|
3549
3649
|
const service = body.service ?? "cloud workbench";
|
|
3550
3650
|
const privacy = body.privacy;
|
|
@@ -3587,6 +3687,11 @@ var init_doctor = __esm({
|
|
|
3587
3687
|
"api-key": Flags8.string({
|
|
3588
3688
|
description: "cloud workbench API key for credential verification. Defaults to ROLEPLAY_API_KEY.",
|
|
3589
3689
|
default: process.env.ROLEPLAY_API_KEY
|
|
3690
|
+
}),
|
|
3691
|
+
provider: Flags8.string({
|
|
3692
|
+
options: ["mock", "openai", "anthropic", "google", "openai-compatible"],
|
|
3693
|
+
description: "LLM provider to check for real adaptive runs. Defaults to ROLEPLAY_LLM_PROVIDER or openai.",
|
|
3694
|
+
default: process.env.ROLEPLAY_LLM_PROVIDER ?? "openai"
|
|
3590
3695
|
})
|
|
3591
3696
|
};
|
|
3592
3697
|
async run() {
|
|
@@ -3601,6 +3706,7 @@ var init_doctor = __esm({
|
|
|
3601
3706
|
checks.push(await checkCloudHealth(flags["cloud-url"]));
|
|
3602
3707
|
if (flags.project || flags["api-key"]) {
|
|
3603
3708
|
checks.push(await checkCloudCredentials(flags["cloud-url"], flags.project, flags["api-key"]));
|
|
3709
|
+
checks.push(checkProviderKey(flags.provider));
|
|
3604
3710
|
}
|
|
3605
3711
|
}
|
|
3606
3712
|
if (flags.json) {
|
|
@@ -3867,13 +3973,14 @@ var HelpCommand = class extends Command2 {
|
|
|
3867
3973
|
command: Args6.string({ required: false })
|
|
3868
3974
|
};
|
|
3869
3975
|
async run() {
|
|
3870
|
-
this.log(`${chalk8.cyan("roleplay.sh")} -
|
|
3976
|
+
this.log(`${chalk8.cyan("roleplay.sh")} - Included CLI for Builder and Team workspaces.
|
|
3871
3977
|
|
|
3872
3978
|
Usage:
|
|
3873
3979
|
roleplay init
|
|
3874
3980
|
roleplay scenario:create <name>
|
|
3875
3981
|
roleplay run <scenario>
|
|
3876
|
-
roleplay run social-engineering-core --target
|
|
3982
|
+
roleplay run social-engineering-core --target mock --provider mock
|
|
3983
|
+
roleplay run social-engineering-core --target <url> --provider openai --project <projectId>
|
|
3877
3984
|
roleplay report latest|<runId> [--out .roleplay/runs]
|
|
3878
3985
|
roleplay replay latest|<runId> [--out .roleplay/runs]
|
|
3879
3986
|
roleplay upload latest|all --project <projectId>
|
|
@@ -3881,6 +3988,7 @@ Usage:
|
|
|
3881
3988
|
roleplay doctor
|
|
3882
3989
|
roleplay mcp
|
|
3883
3990
|
|
|
3991
|
+
Use mock mode for install smoke tests. Use a project API key for real agent tests.
|
|
3884
3992
|
Use --json on commands for machine-readable output.`);
|
|
3885
3993
|
}
|
|
3886
3994
|
};
|