@roleplay-sh/cli 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +1 -1
- package/CHANGELOG.md +9 -1
- package/README.md +4 -4
- package/RELEASE.md +3 -3
- package/SECURITY.md +1 -1
- package/dist/cli.js +35 -35
- package/dist/cli.js.map +1 -1
- package/package.json +2 -3
package/.env.example
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Optional agent credentials used by your own HTTP/CLI target.
|
|
2
2
|
AGENT_API_KEY=
|
|
3
3
|
|
|
4
|
-
#
|
|
4
|
+
# Cloud workbench upload settings. Requires a trial workspace and project API key.
|
|
5
5
|
ROLEPLAY_CLOUD_URL=https://app.roleplay.sh
|
|
6
6
|
ROLEPLAY_PROJECT_ID=
|
|
7
7
|
ROLEPLAY_API_KEY=
|
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,14 @@ All notable changes to roleplay.sh will be documented in this file.
|
|
|
4
4
|
|
|
5
5
|
This project follows semantic versioning after the public `0.1.0` release.
|
|
6
6
|
|
|
7
|
+
## 0.1.4 - Unreleased
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
|
|
11
|
+
- Updated CLI upload, doctor, and setup copy for the paid roleplay.sh cloud workbench.
|
|
12
|
+
- Clarified that production uploads require a Builder or Team trial, project API key, and sanitized upload policy.
|
|
13
|
+
- Kept public command syntax stable while preserving mock smoke tests and BYO provider usage for real runs.
|
|
14
|
+
|
|
7
15
|
## 0.1.3 - 2026-06-06
|
|
8
16
|
|
|
9
17
|
### Added
|
|
@@ -31,7 +39,7 @@ This project follows semantic versioning after the public `0.1.0` release.
|
|
|
31
39
|
- Dedicated public CLI package for local attack-pack execution.
|
|
32
40
|
- Built-in `social-engineering-core` attack pack.
|
|
33
41
|
- Local reports and replayable transcripts.
|
|
34
|
-
- Sanitized
|
|
42
|
+
- Sanitized cloud workbench upload support.
|
|
35
43
|
|
|
36
44
|
## 0.1.0 - 2026-05-17
|
|
37
45
|
|
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Social-engineering regression tests for AI agents.
|
|
4
4
|
|
|
5
|
-
`roleplay` runs adversarial roleplay scenarios against local, HTTP, CLI, or mock agents, records replayable evidence, and
|
|
5
|
+
`roleplay` runs adversarial roleplay scenarios against local, HTTP, CLI, or mock agents, records replayable evidence, and uploads sanitized findings to the roleplay.sh cloud workbench.
|
|
6
6
|
|
|
7
7
|
## Install
|
|
8
8
|
|
|
@@ -54,9 +54,9 @@ export ROLEPLAY_OPENAI_API_KEY="your-openai-key"
|
|
|
54
54
|
|
|
55
55
|
Supported providers are `openai`, `anthropic`, `google`, and `openai-compatible`. Use `--attacker-provider` and `--judge-provider` when you want different providers for adaptive attacker turns and transcript judging. Use `--target mock --provider mock` for deterministic local smoke tests.
|
|
56
56
|
|
|
57
|
-
## Upload Sanitized Findings To
|
|
57
|
+
## Upload Sanitized Findings To The Cloud Workbench
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
Start a Builder or Team trial at `https://app.roleplay.sh`, create a workspace project and API key, then run:
|
|
60
60
|
|
|
61
61
|
```bash
|
|
62
62
|
ROLEPLAY_CLOUD_URL=https://app.roleplay.sh \
|
|
@@ -73,7 +73,7 @@ Sanitized upload is the default. Full transcripts, raw scenario YAML, and local
|
|
|
73
73
|
- `roleplay run` runs a scenario file or built-in attack pack.
|
|
74
74
|
- `roleplay report` prints a saved run report.
|
|
75
75
|
- `roleplay replay` replays transcript evidence.
|
|
76
|
-
- `roleplay upload` uploads sanitized findings to
|
|
76
|
+
- `roleplay upload` uploads sanitized findings to the roleplay.sh cloud workbench.
|
|
77
77
|
- `roleplay list` lists local runs.
|
|
78
78
|
- `roleplay doctor` checks local and Cloud configuration.
|
|
79
79
|
- `roleplay mcp` exposes roleplay.sh through MCP.
|
package/RELEASE.md
CHANGED
|
@@ -29,8 +29,8 @@ The publish workflow uses GitHub OIDC and intentionally does not require an npm
|
|
|
29
29
|
Create a GitHub release or push a version tag:
|
|
30
30
|
|
|
31
31
|
```bash
|
|
32
|
-
git tag v0.1.
|
|
33
|
-
git push origin v0.1.
|
|
32
|
+
git tag v0.1.4
|
|
33
|
+
git push origin v0.1.4
|
|
34
34
|
```
|
|
35
35
|
|
|
36
36
|
The publish workflow runs checks and then publishes with:
|
|
@@ -58,7 +58,7 @@ export ROLEPLAY_OPENAI_API_KEY=<openai-key>
|
|
|
58
58
|
roleplay run social-engineering-core --target http://localhost:3000/agent --provider openai --max-turns 1 --fail-on critical
|
|
59
59
|
```
|
|
60
60
|
|
|
61
|
-
For
|
|
61
|
+
For cloud workbench upload verification, start a Builder or Team trial, create a project API key at `https://app.roleplay.sh`, and run:
|
|
62
62
|
|
|
63
63
|
```bash
|
|
64
64
|
ROLEPLAY_CLOUD_URL=https://app.roleplay.sh \
|
package/SECURITY.md
CHANGED
|
@@ -12,7 +12,7 @@ Do not include real API keys, customer data, private prompts, transcripts, or pr
|
|
|
12
12
|
|
|
13
13
|
## Data Handling
|
|
14
14
|
|
|
15
|
-
roleplay.sh stores runs locally under `.roleplay/runs`. Scenario files, hidden context, transcripts, and reports may contain sensitive information. Full transcripts stay local unless you explicitly upload them to
|
|
15
|
+
roleplay.sh stores runs locally under `.roleplay/runs`. Scenario files, hidden context, transcripts, and reports may contain sensitive information. Full transcripts stay local unless you explicitly upload them to the cloud workbench with full-transcript mode enabled in both the project policy and the CLI command.
|
|
16
16
|
|
|
17
17
|
## CLI Target Execution
|
|
18
18
|
|
package/dist/cli.js
CHANGED
|
@@ -1024,7 +1024,7 @@ var init_init = __esm({
|
|
|
1024
1024
|
envExample = `# Optional agent credentials used by your own HTTP/CLI target.
|
|
1025
1025
|
AGENT_API_KEY=
|
|
1026
1026
|
|
|
1027
|
-
#
|
|
1027
|
+
# cloud workbench upload settings.
|
|
1028
1028
|
ROLEPLAY_CLOUD_URL=http://127.0.0.1:3000
|
|
1029
1029
|
ROLEPLAY_PROJECT_ID=proj_support
|
|
1030
1030
|
ROLEPLAY_API_KEY=
|
|
@@ -2917,7 +2917,7 @@ function requireUploadApiKey(apiKey) {
|
|
|
2917
2917
|
if (normalized) return normalized;
|
|
2918
2918
|
throw new AppError({
|
|
2919
2919
|
code: "UPLOAD_API_KEY_REQUIRED",
|
|
2920
|
-
message: "ROLEPLAY_API_KEY or --api-key is required to upload to
|
|
2920
|
+
message: "ROLEPLAY_API_KEY or --api-key is required to upload to cloud workbench.",
|
|
2921
2921
|
suggestion: "Create or copy a project API key from CI & Uploads, then pass --api-key or set ROLEPLAY_API_KEY.",
|
|
2922
2922
|
exitCode: 1
|
|
2923
2923
|
});
|
|
@@ -2927,7 +2927,7 @@ function requireUploadProjectId(projectId) {
|
|
|
2927
2927
|
if (normalized) return normalized;
|
|
2928
2928
|
throw new AppError({
|
|
2929
2929
|
code: "UPLOAD_PROJECT_REQUIRED",
|
|
2930
|
-
message: "ROLEPLAY_PROJECT_ID or --project is required to upload to
|
|
2930
|
+
message: "ROLEPLAY_PROJECT_ID or --project is required to upload to cloud workbench.",
|
|
2931
2931
|
suggestion: "Copy the project ID from CI & Uploads, then pass --project or set ROLEPLAY_PROJECT_ID.",
|
|
2932
2932
|
exitCode: 1
|
|
2933
2933
|
});
|
|
@@ -2998,8 +2998,8 @@ async function uploadToCloud(input) {
|
|
|
2998
2998
|
} catch (error) {
|
|
2999
2999
|
throw new AppError({
|
|
3000
3000
|
code: "UPLOAD_FAILED",
|
|
3001
|
-
message: `Could not reach
|
|
3002
|
-
suggestion: "Check ROLEPLAY_CLOUD_URL, ROLEPLAY_API_KEY, and that
|
|
3001
|
+
message: `Could not reach cloud workbench at ${endpoint}.`,
|
|
3002
|
+
suggestion: "Check ROLEPLAY_CLOUD_URL, ROLEPLAY_API_KEY, and that cloud workbench is running.",
|
|
3003
3003
|
cause: error,
|
|
3004
3004
|
exitCode: 1
|
|
3005
3005
|
});
|
|
@@ -3009,7 +3009,7 @@ async function uploadToCloud(input) {
|
|
|
3009
3009
|
throw new AppError({
|
|
3010
3010
|
code: "UPLOAD_FAILED",
|
|
3011
3011
|
message: body && "error" in body && body.error ? body.error : `Cloud upload failed with HTTP ${response.status}.`,
|
|
3012
|
-
suggestion: "Check ROLEPLAY_CLOUD_URL, ROLEPLAY_API_KEY, and that
|
|
3012
|
+
suggestion: "Check ROLEPLAY_CLOUD_URL, ROLEPLAY_API_KEY, and that cloud workbench is running.",
|
|
3013
3013
|
exitCode: 1
|
|
3014
3014
|
});
|
|
3015
3015
|
}
|
|
@@ -3034,8 +3034,8 @@ async function verifyCloudCredentials(input) {
|
|
|
3034
3034
|
} catch (error) {
|
|
3035
3035
|
throw new AppError({
|
|
3036
3036
|
code: "UPLOAD_CREDENTIALS_FAILED",
|
|
3037
|
-
message: `Could not reach
|
|
3038
|
-
suggestion: "Check ROLEPLAY_CLOUD_URL, ROLEPLAY_PROJECT_ID, ROLEPLAY_API_KEY, and that
|
|
3037
|
+
message: `Could not reach cloud workbench at ${endpoint}.`,
|
|
3038
|
+
suggestion: "Check ROLEPLAY_CLOUD_URL, ROLEPLAY_PROJECT_ID, ROLEPLAY_API_KEY, and that cloud workbench is running.",
|
|
3039
3039
|
cause: error,
|
|
3040
3040
|
exitCode: 1
|
|
3041
3041
|
});
|
|
@@ -3045,7 +3045,7 @@ async function verifyCloudCredentials(input) {
|
|
|
3045
3045
|
throw new AppError({
|
|
3046
3046
|
code: "UPLOAD_CREDENTIALS_FAILED",
|
|
3047
3047
|
message: body && "error" in body && body.error ? body.error : `Cloud API key verification failed with HTTP ${response.status}.`,
|
|
3048
|
-
suggestion: "Check ROLEPLAY_CLOUD_URL, ROLEPLAY_PROJECT_ID, ROLEPLAY_API_KEY, and that
|
|
3048
|
+
suggestion: "Check ROLEPLAY_CLOUD_URL, ROLEPLAY_PROJECT_ID, ROLEPLAY_API_KEY, and that cloud workbench is running.",
|
|
3049
3049
|
exitCode: 1
|
|
3050
3050
|
});
|
|
3051
3051
|
}
|
|
@@ -3061,8 +3061,8 @@ function parseUploadResponse(body) {
|
|
|
3061
3061
|
}
|
|
3062
3062
|
throw new AppError({
|
|
3063
3063
|
code: "UPLOAD_RESPONSE_INVALID",
|
|
3064
|
-
message: "
|
|
3065
|
-
suggestion: "Check that ROLEPLAY_CLOUD_URL points to a compatible roleplay.sh
|
|
3064
|
+
message: "cloud workbench returned an invalid upload response.",
|
|
3065
|
+
suggestion: "Check that ROLEPLAY_CLOUD_URL points to a compatible roleplay.sh cloud workbench backend.",
|
|
3066
3066
|
exitCode: 1
|
|
3067
3067
|
});
|
|
3068
3068
|
}
|
|
@@ -3075,8 +3075,8 @@ function parseCredentialVerification(body) {
|
|
|
3075
3075
|
}
|
|
3076
3076
|
throw new AppError({
|
|
3077
3077
|
code: "UPLOAD_CREDENTIALS_INVALID",
|
|
3078
|
-
message: "
|
|
3079
|
-
suggestion: "Check that ROLEPLAY_CLOUD_URL points to a compatible roleplay.sh
|
|
3078
|
+
message: "cloud workbench returned an invalid API key verification response.",
|
|
3079
|
+
suggestion: "Check that ROLEPLAY_CLOUD_URL points to a compatible roleplay.sh cloud workbench backend.",
|
|
3080
3080
|
exitCode: 1
|
|
3081
3081
|
});
|
|
3082
3082
|
}
|
|
@@ -3086,8 +3086,8 @@ function assertUploadResponseMatchesPayload(response, payload) {
|
|
|
3086
3086
|
}
|
|
3087
3087
|
throw new AppError({
|
|
3088
3088
|
code: "UPLOAD_RESPONSE_INVALID",
|
|
3089
|
-
message: "
|
|
3090
|
-
suggestion: "Check that ROLEPLAY_CLOUD_URL points to a compatible roleplay.sh
|
|
3089
|
+
message: "cloud workbench upload response did not match the requested project, run, or mode.",
|
|
3090
|
+
suggestion: "Check that ROLEPLAY_CLOUD_URL points to a compatible roleplay.sh cloud workbench backend.",
|
|
3091
3091
|
exitCode: 1
|
|
3092
3092
|
});
|
|
3093
3093
|
}
|
|
@@ -3097,8 +3097,8 @@ function assertCredentialVerificationMatchesRequest(response, projectId) {
|
|
|
3097
3097
|
}
|
|
3098
3098
|
throw new AppError({
|
|
3099
3099
|
code: "UPLOAD_CREDENTIALS_INVALID",
|
|
3100
|
-
message: "
|
|
3101
|
-
suggestion: "Check that ROLEPLAY_CLOUD_URL points to a compatible roleplay.sh
|
|
3100
|
+
message: "cloud workbench API key verification response did not match the requested project.",
|
|
3101
|
+
suggestion: "Check that ROLEPLAY_CLOUD_URL points to a compatible roleplay.sh cloud workbench backend.",
|
|
3102
3102
|
exitCode: 1
|
|
3103
3103
|
});
|
|
3104
3104
|
}
|
|
@@ -3212,21 +3212,21 @@ var init_upload = __esm({
|
|
|
3212
3212
|
init_output();
|
|
3213
3213
|
init_base();
|
|
3214
3214
|
UploadCommand = class _UploadCommand extends BaseCommand {
|
|
3215
|
-
static description = "Upload one run or all local runs to roleplay.sh
|
|
3215
|
+
static description = "Upload one run or all local runs to roleplay.sh cloud workbench.";
|
|
3216
3216
|
static args = {
|
|
3217
3217
|
run: Args3.string({ required: false, default: "latest" })
|
|
3218
3218
|
};
|
|
3219
3219
|
static flags = {
|
|
3220
3220
|
endpoint: Flags4.string({
|
|
3221
|
-
description: "
|
|
3221
|
+
description: "cloud workbench URL.",
|
|
3222
3222
|
default: process.env.ROLEPLAY_CLOUD_URL ?? "http://127.0.0.1:3000"
|
|
3223
3223
|
}),
|
|
3224
3224
|
project: Flags4.string({
|
|
3225
|
-
description: "
|
|
3225
|
+
description: "cloud workbench project ID.",
|
|
3226
3226
|
default: process.env.ROLEPLAY_PROJECT_ID
|
|
3227
3227
|
}),
|
|
3228
3228
|
"api-key": Flags4.string({
|
|
3229
|
-
description: "
|
|
3229
|
+
description: "cloud workbench API key. Defaults to ROLEPLAY_API_KEY.",
|
|
3230
3230
|
default: process.env.ROLEPLAY_API_KEY
|
|
3231
3231
|
}),
|
|
3232
3232
|
mode: Flags4.string({
|
|
@@ -3302,7 +3302,7 @@ var init_upload = __esm({
|
|
|
3302
3302
|
this.log(JSON.stringify(result2));
|
|
3303
3303
|
return;
|
|
3304
3304
|
}
|
|
3305
|
-
this.log(`${chalk4.cyan("roleplay.sh
|
|
3305
|
+
this.log(`${chalk4.cyan("roleplay.sh cloud workbench")}
|
|
3306
3306
|
|
|
3307
3307
|
Project: ${result2.projectId}
|
|
3308
3308
|
Runs uploaded: ${result2.uploaded}
|
|
@@ -3333,7 +3333,7 @@ Mode: ${result2.mode}`);
|
|
|
3333
3333
|
this.log(JSON.stringify(result));
|
|
3334
3334
|
return;
|
|
3335
3335
|
}
|
|
3336
|
-
this.log(`${chalk4.cyan("roleplay.sh
|
|
3336
|
+
this.log(`${chalk4.cyan("roleplay.sh cloud workbench")}
|
|
3337
3337
|
|
|
3338
3338
|
Project: ${result.projectId}
|
|
3339
3339
|
Run: ${result.runId}
|
|
@@ -3497,19 +3497,19 @@ async function checkCloudHealth(cloudUrl) {
|
|
|
3497
3497
|
const body = await response.json().catch(() => void 0);
|
|
3498
3498
|
if (response.ok && body?.status === "ok") {
|
|
3499
3499
|
return {
|
|
3500
|
-
name: "
|
|
3500
|
+
name: "cloud workbench health",
|
|
3501
3501
|
ok: true,
|
|
3502
3502
|
detail: cloudHealthDetail(body, endpoint)
|
|
3503
3503
|
};
|
|
3504
3504
|
}
|
|
3505
3505
|
return {
|
|
3506
|
-
name: "
|
|
3506
|
+
name: "cloud workbench health",
|
|
3507
3507
|
ok: false,
|
|
3508
3508
|
detail: `HTTP ${response.status} from ${endpoint}`
|
|
3509
3509
|
};
|
|
3510
3510
|
} catch (error) {
|
|
3511
3511
|
return {
|
|
3512
|
-
name: "
|
|
3512
|
+
name: "cloud workbench health",
|
|
3513
3513
|
ok: false,
|
|
3514
3514
|
detail: error instanceof Error ? error.message : `Could not reach ${endpoint}`
|
|
3515
3515
|
};
|
|
@@ -3520,7 +3520,7 @@ async function checkCloudCredentials(cloudUrl, projectId, apiKey) {
|
|
|
3520
3520
|
const normalizedApiKey = apiKey?.trim();
|
|
3521
3521
|
if (!normalizedProjectId || !normalizedApiKey) {
|
|
3522
3522
|
return {
|
|
3523
|
-
name: "
|
|
3523
|
+
name: "cloud workbench API key",
|
|
3524
3524
|
ok: false,
|
|
3525
3525
|
detail: "ROLEPLAY_PROJECT_ID/--project and ROLEPLAY_API_KEY/--api-key are both required for credential verification"
|
|
3526
3526
|
};
|
|
@@ -3533,20 +3533,20 @@ async function checkCloudCredentials(cloudUrl, projectId, apiKey) {
|
|
|
3533
3533
|
});
|
|
3534
3534
|
const policy = verification.uploadPolicy;
|
|
3535
3535
|
return {
|
|
3536
|
-
name: "
|
|
3536
|
+
name: "cloud workbench API key",
|
|
3537
3537
|
ok: true,
|
|
3538
3538
|
detail: `${verification.key.name} (${verification.key.preview}) can upload to ${verification.projectId} with ${policy.mode}, ${policy.retentionDays}d retention`
|
|
3539
3539
|
};
|
|
3540
3540
|
} catch (error) {
|
|
3541
3541
|
return {
|
|
3542
|
-
name: "
|
|
3542
|
+
name: "cloud workbench API key",
|
|
3543
3543
|
ok: false,
|
|
3544
|
-
detail: error instanceof Error ? error.message : "Could not verify
|
|
3544
|
+
detail: error instanceof Error ? error.message : "Could not verify cloud workbench API key"
|
|
3545
3545
|
};
|
|
3546
3546
|
}
|
|
3547
3547
|
}
|
|
3548
3548
|
function cloudHealthDetail(body, endpoint) {
|
|
3549
|
-
const service = body.service ?? "
|
|
3549
|
+
const service = body.service ?? "cloud workbench";
|
|
3550
3550
|
const privacy = body.privacy;
|
|
3551
3551
|
if (!privacy) return `${service} at ${endpoint}`;
|
|
3552
3552
|
const mode = privacy.defaultUploadMode ?? (privacy.fullTranscriptUpload ? "full_transcript_opt_in" : "sanitized_findings");
|
|
@@ -3575,17 +3575,17 @@ var init_doctor = __esm({
|
|
|
3575
3575
|
static description = "Check local roleplay.sh setup.";
|
|
3576
3576
|
static flags = {
|
|
3577
3577
|
json: Flags8.boolean({ description: "Output JSON only." }),
|
|
3578
|
-
cloud: Flags8.boolean({ description: "Check
|
|
3578
|
+
cloud: Flags8.boolean({ description: "Check cloud workbench connectivity through /api/health." }),
|
|
3579
3579
|
"cloud-url": Flags8.string({
|
|
3580
|
-
description: "
|
|
3580
|
+
description: "cloud workbench base URL.",
|
|
3581
3581
|
default: process.env.ROLEPLAY_CLOUD_URL ?? "http://127.0.0.1:3000"
|
|
3582
3582
|
}),
|
|
3583
3583
|
project: Flags8.string({
|
|
3584
|
-
description: "
|
|
3584
|
+
description: "cloud workbench project ID for API-key verification. Defaults to ROLEPLAY_PROJECT_ID.",
|
|
3585
3585
|
default: process.env.ROLEPLAY_PROJECT_ID
|
|
3586
3586
|
}),
|
|
3587
3587
|
"api-key": Flags8.string({
|
|
3588
|
-
description: "
|
|
3588
|
+
description: "cloud workbench API key for credential verification. Defaults to ROLEPLAY_API_KEY.",
|
|
3589
3589
|
default: process.env.ROLEPLAY_API_KEY
|
|
3590
3590
|
})
|
|
3591
3591
|
};
|