keystone-cli 2.1.3 → 2.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +157 -33
- package/package.json +1 -1
- package/src/parser/agent-parser.test.ts +5 -5
- package/src/parser/schema.ts +0 -8
- package/src/runner/executors/dynamic-executor.ts +2 -8
- package/src/runner/executors/llm/tool-manager.ts +0 -1
- package/src/runner/executors/plan-executor.ts +0 -1
- package/src/runner/executors/request-executor.ts +2 -10
- package/src/runner/executors/script-executor.ts +0 -8
- package/src/runner/executors/shell-executor.ts +2 -65
- package/src/runner/executors/types.ts +0 -1
- package/src/runner/executors/verification_fixes.test.ts +0 -2
- package/src/runner/join-scheduling.test.ts +0 -2
- package/src/runner/mcp-client-audit.test.ts +1 -1
- package/src/runner/mcp-client.test.ts +0 -6
- package/src/runner/mcp-client.ts +2 -22
- package/src/runner/memoization-leak.test.ts +0 -1
- package/src/runner/memoization.test.ts +0 -1
- package/src/runner/optimization-runner.ts +0 -5
- package/src/runner/services/context-builder.ts +0 -4
- package/src/runner/shell-executor.test.ts +0 -8
- package/src/runner/standard-tools-integration.test.ts +0 -1
- package/src/runner/standard-tools.test.ts +1 -17
- package/src/runner/standard-tools.ts +2 -13
- package/src/runner/step-executor.test.ts +0 -4
- package/src/runner/workflow-runner.test.ts +0 -9
- package/src/runner/workflow-runner.ts +0 -3
- package/src/runner/workflow-subflows.test.ts +0 -1
- package/src/templates/basics/basic-inputs.yaml +0 -1
- package/src/templates/basics/basic-shell.yaml +0 -2
- package/src/templates/basics/full-feature-demo.yaml +1 -2
- package/src/templates/control-flow/fan-out-fan-in.yaml +0 -2
- package/src/templates/control-flow/idempotency-example.yaml +0 -1
- package/src/templates/control-flow/loop-parallel.yaml +1 -3
- package/src/templates/control-flow/retry-policy.yaml +0 -2
- package/src/templates/features/artifact-example.yaml +0 -2
- package/src/templates/features/llm-agent.yaml +62 -3
- package/src/templates/features/memory-service.yaml +0 -1
- package/src/templates/features/robust-automation.yaml +0 -2
- package/src/templates/features/script-example.yaml +0 -3
- package/src/templates/features/src/templates/features/downloaded/test-output/file1.txt +1 -0
- package/src/templates/features/src/templates/features/downloaded/test-output/file2.txt +1 -0
- package/src/templates/patterns/agent-handoff.yaml +0 -2
- package/src/templates/patterns/approval-process.yaml +1 -1
- package/src/templates/patterns/batch-processor.yaml +1 -2
- package/src/templates/patterns/composition-parent.yaml +0 -1
- package/src/templates/patterns/data-pipeline.yaml +0 -1
- package/src/templates/scaffolding/dev.yaml +0 -2
- package/src/templates/scaffolding/dynamic-decompose.yaml +0 -1
- package/src/templates/scaffolding/scaffold-feature.yaml +0 -1
package/README.md
CHANGED
|
@@ -447,7 +447,6 @@ Keystone supports several specialized step types:
|
|
|
447
447
|
- `maxMessageHistory`: Number (default `50`). Max messages to retain in history before truncation/summary.
|
|
448
448
|
- `contextStrategy`: `'truncate'|'summary'|'auto'` (default `truncate`). Summarizes older history into a system message when limits are exceeded.
|
|
449
449
|
- `qualityGate`: Optional reviewer config `{ agent, prompt?, provider?, model?, maxAttempts? }`. If review fails, the step is refined and re-run.
|
|
450
|
-
- `allowInsecure`: Boolean (default `false`). Set `true` to allow risky tool execution.
|
|
451
450
|
- `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow tools to access files outside of the current working directory.
|
|
452
451
|
- `handoff`: Optional engine tool definition that lets the LLM delegate work to an allowlisted external CLI with structured inputs.
|
|
453
452
|
- `plan`: Create a dynamic task list for orchestration.
|
|
@@ -456,8 +455,7 @@ Keystone supports several specialized step types:
|
|
|
456
455
|
- `prompt`: Optional override of the planning prompt.
|
|
457
456
|
- Plan steps accept the same LLM options as `llm`, including tools, handoffs, and `allowedHandoffs`.
|
|
458
457
|
- `request`: Make HTTP requests (GET, POST, etc.).
|
|
459
|
-
-
|
|
460
|
-
- Cross-origin redirects are blocked for non-GET/HEAD requests unless `allowInsecure: true`; on cross-origin redirects, non-essential headers are stripped.
|
|
458
|
+
- Cross-origin redirects are blocked for non-GET/HEAD requests; on cross-origin redirects, non-essential headers are stripped for security.
|
|
461
459
|
- `file`: Read, write, append, or patch files.
|
|
462
460
|
- `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow reading/writing files outside of the current working directory.
|
|
463
461
|
- `op: patch`: Apply a unified diff or search/replace blocks via `content`.
|
|
@@ -481,7 +479,7 @@ Keystone supports several specialized step types:
|
|
|
481
479
|
- `condition`: `'all'` (default), `'any'`, or a number.
|
|
482
480
|
- `target`: Reserved for future use; currently ignored.
|
|
483
481
|
- `blueprint`: Generate a structured system blueprint with an agent (persisted as an artifact).
|
|
484
|
-
- `script`: Run JavaScript in a sandboxed subprocess.
|
|
482
|
+
- `script`: Run JavaScript in a sandboxed subprocess.
|
|
485
483
|
- `sleep`: Pause execution for a specified duration or until a timestamp.
|
|
486
484
|
- `duration`: Milliseconds (number or expression).
|
|
487
485
|
- `until`: Date/time string (evaluated), parsed by `Date`.
|
|
@@ -728,8 +726,6 @@ When a step fails, the specified agent is invoked with the error details. The ag
|
|
|
728
726
|
```yaml
|
|
729
727
|
- id: list_files
|
|
730
728
|
type: shell
|
|
731
|
-
# Globbing (*) requires allowInsecure: true
|
|
732
|
-
allowInsecure: true
|
|
733
729
|
run: ls *.txt
|
|
734
730
|
# Post-process stdout into an array of filenames
|
|
735
731
|
transform: ${{ stdout.trim().split('\n') }}
|
|
@@ -755,7 +751,6 @@ Until `strategy.matrix` is wired end-to-end, use explicit `foreach` with an arra
|
|
|
755
751
|
{ node: 22, os: "ubuntu" },
|
|
756
752
|
{ node: 22, os: "macos" }
|
|
757
753
|
] }}
|
|
758
|
-
allowInsecure: true # Required for '=' in arguments
|
|
759
754
|
run: echo "node=${{ item.node }} os=${{ item.os }}"
|
|
760
755
|
```
|
|
761
756
|
|
|
@@ -763,7 +758,6 @@ Until `strategy.matrix` is wired end-to-end, use explicit `foreach` with an arra
|
|
|
763
758
|
```yaml
|
|
764
759
|
- id: calculate
|
|
765
760
|
type: script
|
|
766
|
-
allowInsecure: true
|
|
767
761
|
run: |
|
|
768
762
|
const data = steps.fetch_data.output;
|
|
769
763
|
return data.map(i => i.value * 2).reduce((a, b) => a + b, 0);
|
|
@@ -862,7 +856,6 @@ Upload outputs include `artifactPath` and `files` for downstream references.
|
|
|
862
856
|
- `message`: Commit message.
|
|
863
857
|
- `cwd`: Directory to run the git command in.
|
|
864
858
|
- `allowOutsideCwd`: Boolean (default `false`). Set `true` to allow operations outside the project root.
|
|
865
|
-
- `allowInsecure`: Boolean (default `false`). Set `true` to allow git commands that fail the security whitelist.
|
|
866
859
|
|
|
867
860
|
```yaml
|
|
868
861
|
- id: setup_feat
|
|
@@ -1014,7 +1007,7 @@ Keystone comes with a set of **Standard Tools** that can be enabled for any agen
|
|
|
1014
1007
|
- `list_files`: List files in a directory (arguments: `path`)
|
|
1015
1008
|
- `search_files`: Search for files by glob pattern (arguments: `pattern`, `dir`)
|
|
1016
1009
|
- `search_content`: Search for string or regex within files (arguments: `query`, `dir`, `pattern`)
|
|
1017
|
-
- `run_command`: Run a shell command (arguments: `command`, `dir`).
|
|
1010
|
+
- `run_command`: Run a shell command (arguments: `command`, `dir`).
|
|
1018
1011
|
- `ast_grep_search`: Search for structural code patterns using AST matching (arguments: `pattern`, `language`, `paths`). More precise than regex for code refactoring.
|
|
1019
1012
|
- `ast_grep_replace`: Replace structural code patterns using AST-aware rewriting (arguments: `pattern`, `rewrite`, `language`, `paths`). Safer than regex for code refactoring.
|
|
1020
1013
|
- `fetch`: Fetch content from a URL via GET request (arguments: `url`).
|
|
@@ -1231,40 +1224,171 @@ Input keys passed via `-i key=val` must be alphanumeric/underscore and cannot be
|
|
|
1231
1224
|
|
|
1232
1225
|
## <a id="security">🛡️ Security</a>
|
|
1233
1226
|
|
|
1234
|
-
###
|
|
1235
|
-
Keystone strictly enforces an allowlist of characters (`alphanumeric`, `whitespace`, and `_./:@,+=~-`) to prevent shell injection.
|
|
1227
|
+
### ⚠️ Security Warning
|
|
1236
1228
|
|
|
1237
|
-
|
|
1238
|
-
- **Denylist**: Commands like `rm`, `mkfs`, or `alias` are blocked via a configurable denylist in `config.yaml`, even if `allowInsecure: true` is set.
|
|
1239
|
-
- **Windows Support**: Keystone uses `cmd.exe /d /s /c` on Windows and `sh -c` on other platforms for consistent behavior.
|
|
1229
|
+
**Keystone workflows can execute arbitrary code on your system.** Always review and trust the source of workflows before running them. Think of YAML workflows like shell scripts - they have full access to your filesystem, environment variables, and network.
|
|
1240
1230
|
|
|
1241
|
-
|
|
1231
|
+
**Key Security Principles:**
|
|
1232
|
+
1. **Trust the Source**: Only run workflows from trusted sources (official templates, your team, verified repositories)
|
|
1233
|
+
2. **Review Before Running**: Read through workflow files, especially shell commands and file operations
|
|
1234
|
+
3. **Isolate Sensitive Operations**: Use separate environments for production credentials
|
|
1235
|
+
4. **Validate Inputs**: Use input schemas to constrain user-provided values
|
|
1236
|
+
5. **Mark Secrets**: Use `secret: true` on sensitive inputs for automatic redaction
|
|
1242
1237
|
|
|
1238
|
+
### Runtime Security Warnings
|
|
1239
|
+
|
|
1240
|
+
Keystone displays security warnings when running workflows:
|
|
1241
|
+
```
|
|
1242
|
+
⚠️ Security Warning: Only run workflows from trusted sources.
|
|
1243
|
+
Workflows can execute arbitrary shell commands and access your environment.
|
|
1244
|
+
```
|
|
1245
|
+
|
|
1246
|
+
You can suppress this warning in `.keystone/config.yaml` if needed:
|
|
1247
|
+
```yaml
|
|
1248
|
+
logging:
|
|
1249
|
+
suppress_security_warning: true
|
|
1250
|
+
```
|
|
1251
|
+
|
|
1252
|
+
### Shell Command Security
|
|
1253
|
+
|
|
1254
|
+
Keystone executes shell commands using `sh -c` (POSIX) or `cmd.exe /d /s /c` (Windows). While this provides flexibility, it also means workflows can run **any command** your user can run.
|
|
1255
|
+
|
|
1256
|
+
**Security Measures:**
|
|
1257
|
+
- **Command Denylist**: Dangerous commands like `rm -rf`, `dd`, `mkfs`, and `format` are blocked by default
|
|
1258
|
+
- **Escape Function**: Use `${{ escape(...) }}` when interpolating untrusted input into shell commands
|
|
1259
|
+
- **Review Commands**: Always inspect shell steps in workflows from untrusted sources
|
|
1260
|
+
|
|
1261
|
+
**Configurable Denylist:**
|
|
1262
|
+
Add or remove blocked commands in `.keystone/config.yaml`:
|
|
1263
|
+
```yaml
|
|
1264
|
+
shell:
|
|
1265
|
+
denylist:
|
|
1266
|
+
- rm # Block all rm commands
|
|
1267
|
+
- sudo # Block privilege escalation
|
|
1268
|
+
- curl -X # Block non-GET HTTP requests (optional)
|
|
1269
|
+
```
|
|
1270
|
+
|
|
1271
|
+
**Safe Command Example:**
|
|
1243
1272
|
```yaml
|
|
1244
|
-
- id:
|
|
1273
|
+
- id: safe_echo
|
|
1245
1274
|
type: shell
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1275
|
+
# Safe: escape() prevents injection
|
|
1276
|
+
run: echo ${{ escape(inputs.user_message) }}
|
|
1277
|
+
```
|
|
1278
|
+
|
|
1279
|
+
**Unsafe Command Example:**
|
|
1280
|
+
```yaml
|
|
1281
|
+
- id: unsafe_echo
|
|
1282
|
+
type: shell
|
|
1283
|
+
# UNSAFE: Could execute arbitrary code if user_message contains "; rm -rf /"
|
|
1284
|
+
run: echo ${{ inputs.user_message }}
|
|
1285
|
+
```
|
|
1286
|
+
|
|
1287
|
+
### HTTP Request Security
|
|
1288
|
+
|
|
1289
|
+
Request steps include SSRF (Server-Side Request Forgery) protection to prevent workflows from accessing internal network resources.
|
|
1290
|
+
|
|
1291
|
+
**Blocked by Default:**
|
|
1292
|
+
- `localhost` and `127.0.0.1`
|
|
1293
|
+
- Private IP ranges (`10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`)
|
|
1294
|
+
- Link-local addresses (`169.254.0.0/16`)
|
|
1295
|
+
- Cloud metadata endpoints (AWS, GCP, Azure, etc.)
|
|
1296
|
+
|
|
1297
|
+
**Note**: SSRF protection provides defense-in-depth but is **not foolproof** against DNS rebinding attacks. For high-security environments, use network-level isolation (firewalls, egress proxies).
|
|
1298
|
+
|
|
1299
|
+
**Example:**
|
|
1300
|
+
```yaml
|
|
1301
|
+
- id: fetch_data
|
|
1302
|
+
type: request
|
|
1303
|
+
url: https://api.example.com/data # OK: external HTTPS
|
|
1304
|
+
# url: http://localhost:8080/admin # BLOCKED: localhost access
|
|
1305
|
+
```
|
|
1306
|
+
|
|
1307
|
+
### File Access Security
|
|
1308
|
+
|
|
1309
|
+
File operations are restricted to the current working directory by default.
|
|
1310
|
+
|
|
1311
|
+
**Enable External Access:**
|
|
1312
|
+
```yaml
|
|
1313
|
+
- id: read_config
|
|
1314
|
+
type: file
|
|
1315
|
+
op: read
|
|
1316
|
+
path: /etc/app/config.yaml
|
|
1317
|
+
allowOutsideCwd: true # Required for paths outside project root
|
|
1249
1318
|
```
|
|
1250
1319
|
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1320
|
+
### Script Execution Security
|
|
1321
|
+
|
|
1322
|
+
Script steps run JavaScript in a subprocess. While this provides some isolation, it is **not a security sandbox**.
|
|
1323
|
+
|
|
1324
|
+
**Risk**: Scripts have full access to Node.js APIs and can perform any action.
|
|
1325
|
+
|
|
1326
|
+
**Example:**
|
|
1327
|
+
```yaml
|
|
1328
|
+
- id: calculate
|
|
1329
|
+
type: script
|
|
1330
|
+
run: |
|
|
1331
|
+
const data = steps.fetch_data.output;
|
|
1332
|
+
return data.map(i => i.value * 2).reduce((a, b) => a + b, 0);
|
|
1333
|
+
```
|
|
1334
|
+
|
|
1335
|
+
### Secret Management
|
|
1336
|
+
|
|
1337
|
+
Mark sensitive inputs as secrets to enable automatic redaction:
|
|
1338
|
+
|
|
1339
|
+
```yaml
|
|
1340
|
+
inputs:
|
|
1341
|
+
api_key:
|
|
1342
|
+
type: string
|
|
1343
|
+
secret: true # Redacted in logs, UI, and database
|
|
1344
|
+
|
|
1345
|
+
database_password:
|
|
1346
|
+
type: string
|
|
1347
|
+
secret: true
|
|
1348
|
+
|
|
1349
|
+
steps:
|
|
1350
|
+
- id: deploy
|
|
1351
|
+
type: shell
|
|
1352
|
+
# Secrets are available but redacted in output
|
|
1353
|
+
run: ./deploy.sh --api-key="${{ secrets.api_key }}"
|
|
1354
|
+
```
|
|
1355
|
+
|
|
1356
|
+
**Secret Redaction:**
|
|
1357
|
+
- Secrets are redacted from logs and step outputs
|
|
1358
|
+
- Stored encrypted at rest (when `redact_secrets_at_rest: true` in config)
|
|
1359
|
+
- May require re-entry when resuming workflows
|
|
1255
1360
|
|
|
1256
1361
|
### Expression Safety
|
|
1257
|
-
Expressions `${{ }}` are evaluated using a safe AST parser (`jsep`) which:
|
|
1258
|
-
- Prevents arbitrary code execution (no `eval` or `Function`).
|
|
1259
|
-
- Whitelists safe global objects (`Math`, `JSON`, `Date`, etc.).
|
|
1260
|
-
- Blocks access to sensitive properties (`constructor`, `__proto__`).
|
|
1261
|
-
- Enforces a maximum template length to prevent ReDoS attacks.
|
|
1262
1362
|
|
|
1263
|
-
|
|
1264
|
-
|
|
1363
|
+
Expressions `${{ }}` are evaluated using a safe AST parser that:
|
|
1364
|
+
- **Prevents arbitrary code execution** (no `eval` or `Function`)
|
|
1365
|
+
- **Whitelists safe globals** (`Math`, `JSON`, `Date`)
|
|
1366
|
+
- **Blocks dangerous properties** (`constructor`, `__proto__`, `prototype`)
|
|
1367
|
+
- **Enforces length limits** to prevent ReDoS attacks
|
|
1368
|
+
|
|
1369
|
+
**Safe:**
|
|
1370
|
+
```yaml
|
|
1371
|
+
${{ steps.build.status == 'success' ? '✅' : '❌' }}
|
|
1372
|
+
${{ Math.max(steps.test.outputs.score, 0) }}
|
|
1373
|
+
${{ JSON.stringify({ result: steps.data.output }) }}
|
|
1374
|
+
```
|
|
1375
|
+
|
|
1376
|
+
**Blocked:**
|
|
1377
|
+
```yaml
|
|
1378
|
+
${{ constructor.constructor('return process')().exit() }} # ❌ Blocked
|
|
1379
|
+
${{ __proto__.polluted = true }} # ❌ Blocked
|
|
1380
|
+
```
|
|
1381
|
+
|
|
1382
|
+
### Best Practices Summary
|
|
1265
1383
|
|
|
1266
|
-
|
|
1267
|
-
|
|
1384
|
+
1. ✅ **Review all workflows** before running, especially from external sources
|
|
1385
|
+
2. ✅ **Use `escape()`** when interpolating user input in shell commands
|
|
1386
|
+
3. ✅ **Mark secrets** with `secret: true` on inputs
|
|
1387
|
+
4. ✅ **Enable `allowOutsideCwd`** only when absolutely necessary
|
|
1388
|
+
5. ✅ **Use input validation** with JSON Schema to constrain values
|
|
1389
|
+
6. ✅ **Test in isolated environments** before running in production
|
|
1390
|
+
7. ✅ **Keep credentials in `.env`** files, never hardcode in workflows
|
|
1391
|
+
8. ✅ **Use network isolation** (firewalls) for high-security deployments
|
|
1268
1392
|
|
|
1269
1393
|
---
|
|
1270
1394
|
|
package/package.json
CHANGED
|
@@ -97,11 +97,11 @@ Prompt`;
|
|
|
97
97
|
});
|
|
98
98
|
|
|
99
99
|
it('should parse the real keystone-architect.md template', () => {
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
100
|
+
const filePath = join(process.cwd(), 'src/templates/agents/keystone-architect.md');
|
|
101
|
+
const agent = parseAgent(filePath);
|
|
102
|
+
expect(agent.name).toBe('keystone-architect');
|
|
103
|
+
// Ensure the problematic expression is escaped/spaced
|
|
104
|
+
expect(agent.systemPrompt).toContain('${ { args.paramName } }');
|
|
105
105
|
});
|
|
106
106
|
});
|
|
107
107
|
|
package/src/parser/schema.ts
CHANGED
|
@@ -152,7 +152,6 @@ const ShellStepSchema = BaseStepSchema.extend({
|
|
|
152
152
|
dir: z.string().optional(),
|
|
153
153
|
env: z.record(z.string()).optional(),
|
|
154
154
|
allowOutsideCwd: z.boolean().optional(),
|
|
155
|
-
allowInsecure: z.boolean().optional(),
|
|
156
155
|
});
|
|
157
156
|
|
|
158
157
|
// Forward declaration for AgentToolSchema which depends on StepSchema
|
|
@@ -230,7 +229,6 @@ const LlmStepSchema = BaseStepSchema.extend({
|
|
|
230
229
|
.optional(),
|
|
231
230
|
useStandardTools: z.boolean().optional(),
|
|
232
231
|
allowOutsideCwd: z.boolean().optional(),
|
|
233
|
-
allowInsecure: z.boolean().optional(),
|
|
234
232
|
handoff: EngineHandoffSchema.optional(),
|
|
235
233
|
});
|
|
236
234
|
|
|
@@ -270,7 +268,6 @@ const PlanStepSchema = BaseStepSchema.extend({
|
|
|
270
268
|
.optional(),
|
|
271
269
|
useStandardTools: z.boolean().optional(),
|
|
272
270
|
allowOutsideCwd: z.boolean().optional(),
|
|
273
|
-
allowInsecure: z.boolean().optional(),
|
|
274
271
|
handoff: EngineHandoffSchema.optional(),
|
|
275
272
|
});
|
|
276
273
|
|
|
@@ -303,7 +300,6 @@ const RequestStepSchema = BaseStepSchema.extend({
|
|
|
303
300
|
method: z.enum(['GET', 'POST', 'PUT', 'PATCH', 'DELETE', 'HEAD']).default('GET'),
|
|
304
301
|
body: z.any().optional(),
|
|
305
302
|
headers: z.record(z.string()).optional(),
|
|
306
|
-
allowInsecure: z.boolean().optional(),
|
|
307
303
|
});
|
|
308
304
|
|
|
309
305
|
const HumanStepSchema = BaseStepSchema.extend({
|
|
@@ -323,7 +319,6 @@ const ScriptStepSchema = BaseStepSchema.extend({
|
|
|
323
319
|
type: z.literal('script'),
|
|
324
320
|
run: z.string(),
|
|
325
321
|
allowOutsideCwd: z.boolean().optional(),
|
|
326
|
-
allowInsecure: z.boolean().optional().default(false),
|
|
327
322
|
});
|
|
328
323
|
|
|
329
324
|
const EngineStepSchema = BaseStepSchema.extend({
|
|
@@ -406,7 +401,6 @@ const GitStepSchema = BaseStepSchema.extend({
|
|
|
406
401
|
cwd: z.string().optional(), // Working directory for the git command
|
|
407
402
|
env: z.record(z.string()).optional(),
|
|
408
403
|
allowOutsideCwd: z.boolean().optional(),
|
|
409
|
-
allowInsecure: z.boolean().optional(),
|
|
410
404
|
});
|
|
411
405
|
|
|
412
406
|
const WaitStepSchema = BaseStepSchema.extend({
|
|
@@ -442,7 +436,6 @@ const DynamicStepSchema = BaseStepSchema.extend({
|
|
|
442
436
|
.optional(), // Library of pre-defined step patterns
|
|
443
437
|
confirmPlan: z.boolean().optional().default(false), // Review and approve plan before execution
|
|
444
438
|
maxReplans: z.number().int().nonnegative().default(3), // Max automatic recovery attempts
|
|
445
|
-
allowInsecure: z.boolean().optional(), // Allow generated steps to use insecure commands (e.g. shell redirects)
|
|
446
439
|
});
|
|
447
440
|
|
|
448
441
|
// Note: `as any` casts are required here because of circular type references:
|
|
@@ -478,7 +471,6 @@ const EvalSchema = z.object({
|
|
|
478
471
|
agent: z.string().optional(),
|
|
479
472
|
prompt: z.string().optional(),
|
|
480
473
|
run: z.string().optional(), // for script scorer
|
|
481
|
-
allowInsecure: z.boolean().optional(),
|
|
482
474
|
allowSecrets: z.boolean().optional(),
|
|
483
475
|
});
|
|
484
476
|
|
|
@@ -132,11 +132,7 @@ Return a JSON object with the steps array. Each step should be independently exe
|
|
|
132
132
|
/**
|
|
133
133
|
* Convert a generated step definition into an executable Step
|
|
134
134
|
*/
|
|
135
|
-
function convertToExecutableStep(
|
|
136
|
-
generated: GeneratedStep,
|
|
137
|
-
parentStepId: string,
|
|
138
|
-
allowInsecure?: boolean
|
|
139
|
-
): Step {
|
|
135
|
+
function convertToExecutableStep(generated: GeneratedStep, parentStepId: string): Step {
|
|
140
136
|
const baseProps = {
|
|
141
137
|
id: `${parentStepId}_${generated.id}`,
|
|
142
138
|
needs: generated.needs?.map((n) => `${parentStepId}_${n}`) || [],
|
|
@@ -157,7 +153,6 @@ function convertToExecutableStep(
|
|
|
157
153
|
...baseProps,
|
|
158
154
|
type: 'shell' as const,
|
|
159
155
|
run: generated.run || 'echo "No command specified"',
|
|
160
|
-
allowInsecure: allowInsecure ?? false,
|
|
161
156
|
};
|
|
162
157
|
|
|
163
158
|
case 'workflow':
|
|
@@ -181,7 +176,6 @@ function convertToExecutableStep(
|
|
|
181
176
|
return {
|
|
182
177
|
...baseProps,
|
|
183
178
|
type: 'request' as const,
|
|
184
|
-
allowInsecure: allowInsecure ?? false,
|
|
185
179
|
url: generated.path || '',
|
|
186
180
|
method: 'GET' as const,
|
|
187
181
|
};
|
|
@@ -585,7 +579,7 @@ async function handleExecutionPhase(
|
|
|
585
579
|
` ⚡ [${i + 1}/${state.generatedPlan.steps.length}] Executing step: ${genStep.name}`
|
|
586
580
|
);
|
|
587
581
|
|
|
588
|
-
const executableStep = convertToExecutableStep(genStep, step.id
|
|
582
|
+
const executableStep = convertToExecutableStep(genStep, step.id);
|
|
589
583
|
const stepContext = {
|
|
590
584
|
...dynamicContext,
|
|
591
585
|
steps: {
|
|
@@ -96,7 +96,6 @@ export class ToolManager {
|
|
|
96
96
|
async (args) => {
|
|
97
97
|
validateStandardToolSecurity(tool.name, args, {
|
|
98
98
|
allowOutsideCwd: step.allowOutsideCwd,
|
|
99
|
-
allowInsecure: step.allowInsecure,
|
|
100
99
|
});
|
|
101
100
|
if (tool.execution) {
|
|
102
101
|
// Standard tools usually have .execute method directly on them in STANDARD_TOOLS definition?
|
|
@@ -80,7 +80,6 @@ export async function executePlanStep(
|
|
|
80
80
|
mcpServers: step.mcpServers,
|
|
81
81
|
useStandardTools: step.useStandardTools,
|
|
82
82
|
allowOutsideCwd: step.allowOutsideCwd,
|
|
83
|
-
allowInsecure: step.allowInsecure,
|
|
84
83
|
handoff: step.handoff,
|
|
85
84
|
outputSchema: step.outputSchema ?? DEFAULT_PLAN_OUTPUT_SCHEMA,
|
|
86
85
|
needs: [],
|
|
@@ -74,7 +74,7 @@ export async function executeRequestStep(
|
|
|
74
74
|
|
|
75
75
|
try {
|
|
76
76
|
// Validate URL to prevent SSRF
|
|
77
|
-
await validateRemoteUrl(url
|
|
77
|
+
await validateRemoteUrl(url);
|
|
78
78
|
|
|
79
79
|
// Evaluate headers
|
|
80
80
|
const headers: Record<string, string> = {};
|
|
@@ -171,7 +171,7 @@ export async function executeRequestStep(
|
|
|
171
171
|
}
|
|
172
172
|
|
|
173
173
|
const nextUrl = new URL(location, currentUrl).href;
|
|
174
|
-
await validateRemoteUrl(nextUrl
|
|
174
|
+
await validateRemoteUrl(nextUrl);
|
|
175
175
|
|
|
176
176
|
let nextMethod = currentMethod;
|
|
177
177
|
let nextBody = currentBody;
|
|
@@ -192,14 +192,6 @@ export async function executeRequestStep(
|
|
|
192
192
|
removeHeader('authorization');
|
|
193
193
|
removeHeader('proxy-authorization');
|
|
194
194
|
removeHeader('cookie');
|
|
195
|
-
if (!step.allowInsecure) {
|
|
196
|
-
if (nextMethod !== 'GET' && nextMethod !== 'HEAD') {
|
|
197
|
-
throw new Error(
|
|
198
|
-
`Cross-origin redirect blocked for ${nextMethod} request. Set allowInsecure to true to override.`
|
|
199
|
-
);
|
|
200
|
-
}
|
|
201
|
-
stripCrossOriginHeaders();
|
|
202
|
-
}
|
|
203
195
|
}
|
|
204
196
|
|
|
205
197
|
currentMethod = nextMethod;
|
|
@@ -14,14 +14,6 @@ export async function executeScriptStep(
|
|
|
14
14
|
logger: Logger,
|
|
15
15
|
options: { sandbox?: typeof SafeSandbox; abortSignal?: AbortSignal } = {}
|
|
16
16
|
): Promise<StepResult> {
|
|
17
|
-
if (!step.allowInsecure) {
|
|
18
|
-
return {
|
|
19
|
-
status: 'failed',
|
|
20
|
-
output: null,
|
|
21
|
-
error: 'Script execution is disabled by default. Set allowInsecure: true to run scripts.',
|
|
22
|
-
};
|
|
23
|
-
}
|
|
24
|
-
|
|
25
17
|
try {
|
|
26
18
|
const sandbox = options.sandbox || DefaultSandbox;
|
|
27
19
|
const result = await sandbox.execute(step.run, context as any, {
|
|
@@ -75,34 +75,7 @@ export async function executeShellStep(
|
|
|
75
75
|
throw new Error('Shell step must have either "run" or "args"');
|
|
76
76
|
}
|
|
77
77
|
|
|
78
|
-
// Strict Mode Check: Detect unescaped expressions in the raw template
|
|
79
|
-
// We check if there are any ${{ }} blocks that don't start with escape(
|
|
80
|
-
const hasUnescapedExpr = (s: string) => {
|
|
81
|
-
// Finds ${{ ... }} blocks
|
|
82
|
-
const matches = s.match(/\${{.*?}}/g);
|
|
83
|
-
if (!matches) return false;
|
|
84
|
-
|
|
85
|
-
// Check if the expression is strictly wrapped in escape(...)
|
|
86
|
-
// Matches: ${{ escape(...) }} or ${{ escape( ... ) }}
|
|
87
|
-
// Does NOT match: ${{ "foo" + escape(...) }}
|
|
88
|
-
return matches.some((m) => {
|
|
89
|
-
const content = m.slice(3, -2).trim(); // Remove ${{ and }}
|
|
90
|
-
return !/^escape\s*\(.*\)$/.test(content);
|
|
91
|
-
});
|
|
92
|
-
};
|
|
93
|
-
|
|
94
|
-
if (!step.allowInsecure && hasUnescapedExpr(step.run)) {
|
|
95
|
-
throw new Error(
|
|
96
|
-
`Security Error: Shell command contains unescaped expressions which are vulnerable to injection.\nUse \${{ escape(...) }} to safely interpolate values, or set 'allowInsecure: true' if you trust the source.\nCommand template: ${step.run}`
|
|
97
|
-
);
|
|
98
|
-
}
|
|
99
|
-
|
|
100
78
|
const command = ExpressionEvaluator.evaluateString(step.run, context);
|
|
101
|
-
if (!step.allowInsecure && detectShellInjectionRisk(command)) {
|
|
102
|
-
throw new Error(
|
|
103
|
-
`Security Error: Evaluated command contains shell metacharacters that require 'allowInsecure: true'.\n Command: ${command.substring(0, 100)}${command.length > 100 ? '...' : ''}\n Metacharacters detected. Please use 'allowInsecure: true' if this is intended.`
|
|
104
|
-
);
|
|
105
|
-
}
|
|
106
79
|
|
|
107
80
|
const result = await executeShell(step, context, logger, abortSignal, command);
|
|
108
81
|
return formatShellResult(result, logger);
|
|
@@ -246,31 +219,7 @@ export async function executeShell(
|
|
|
246
219
|
// Evaluate the command string
|
|
247
220
|
const command = commandOverride ?? ExpressionEvaluator.evaluateString(step.run, context);
|
|
248
221
|
|
|
249
|
-
// Security Check: Enforce whitelist
|
|
250
|
-
// If we haven't enabled insecure mode, we MUST be able to use spawn (no shell)
|
|
251
|
-
// or the command must be strictly composed of safe characters.
|
|
252
|
-
if (!step.allowInsecure) {
|
|
253
|
-
if (detectShellInjectionRisk(command)) {
|
|
254
|
-
throw new Error(
|
|
255
|
-
`Security Error: Command execution blocked to prevent potential shell injection.\nCommand: "${command.substring(0, 100)}${
|
|
256
|
-
command.length > 100 ? '...' : ''
|
|
257
|
-
}"\nReason: Contains characters not in the strict whitelist (alphanumeric, whitespace, and _./:@,+=~-).\nThis protects against chaining malicious commands (e.g. '; rm -rf /'). It does NOT evaluate if the command itself is destructive.\nFix: either simplify your command or set 'allowInsecure: true' in your step definition if you trust the input.`
|
|
258
|
-
);
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
// Additional Check: Prevent Directory Traversal in Binary Path
|
|
262
|
-
// Even if it passes the whitelist, we don't want to allow 'cat ../../../etc/passwd'
|
|
263
|
-
// or executing '../../../../bin/malice'.
|
|
264
|
-
// We check for '..' characters which might indicate directory traversal.
|
|
265
|
-
if (command.includes('..') && (command.includes('/') || command.includes('\\'))) {
|
|
266
|
-
throw new Error(
|
|
267
|
-
`Security Error: Command blocked due to potential directory traversal ('..').\nCommand: "${command.substring(0, 100)}"\nTo allow relative paths outside the current directory, set 'allowInsecure: true'.`
|
|
268
|
-
);
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
222
|
// Security Check: Enforce Denylist (e.g. rm, mkfs, etc.)
|
|
273
|
-
// We check this even if allowInsecure is true, because these are explicitly banned by policy.
|
|
274
223
|
const config = ConfigLoader.load();
|
|
275
224
|
if (config.engines?.denylist && config.engines.denylist.length > 0) {
|
|
276
225
|
// Robust parsing to get the command binary
|
|
@@ -322,19 +271,8 @@ export async function executeShell(
|
|
|
322
271
|
const hostEnv = filterSensitiveEnv(Bun.env);
|
|
323
272
|
const mergedEnv = Object.keys(env).length > 0 ? { ...hostEnv, ...env } : hostEnv;
|
|
324
273
|
|
|
325
|
-
//
|
|
326
|
-
// We prefer direct spawn if possible, but fall back to shell if needed (e.g. for pipelines in insecure mode)
|
|
327
|
-
|
|
274
|
+
// Use 'sh -c' to execute the command
|
|
328
275
|
try {
|
|
329
|
-
// If we are in secure mode (allowInsecure: false), we KNOW the command is safe.
|
|
330
|
-
// However, it might still benefit from running directly via spawn to avoid even theoretical shell issues.
|
|
331
|
-
// But simplified splitting by space might break if we allowed quotes (which we don't in the whitelist).
|
|
332
|
-
|
|
333
|
-
// For now, if insecure is allowed, we use 'sh -c'.
|
|
334
|
-
// If secure (whitelist valid), we can also use 'sh -c' relatively safely, or split and spawn.
|
|
335
|
-
// Using 'sh -c' is robust for arguments. Since we validated the string against a strict whitelist,
|
|
336
|
-
// 'sh -c' shouldn't be able to do anything funky like variable expansion or subshells because appropriate chars are banned.
|
|
337
|
-
|
|
338
276
|
let stdoutString = '';
|
|
339
277
|
let stderrString = '';
|
|
340
278
|
let exitCode = 0;
|
|
@@ -343,8 +281,7 @@ export async function executeShell(
|
|
|
343
281
|
const maxOutputBytes = LIMITS.MAX_PROCESS_OUTPUT_BYTES;
|
|
344
282
|
|
|
345
283
|
// Use 'sh -c' (POSIX) or 'cmd.exe /d /s /c' (Windows)
|
|
346
|
-
//
|
|
347
|
-
// which prevents injection of metacharacters, quotes, escapes, etc.
|
|
284
|
+
// The denylist check above prevents dangerous commands like 'rm -rf'
|
|
348
285
|
const isWindows = process.platform === 'win32';
|
|
349
286
|
const shellCommand = isWindows ? 'cmd.exe' : 'sh';
|
|
350
287
|
const shellArgs = isWindows ? ['/d', '/s', '/c'] : ['-c'];
|
|
@@ -25,7 +25,6 @@ describe('Verification Fixes', () => {
|
|
|
25
25
|
id: 'test',
|
|
26
26
|
type: 'shell' as const,
|
|
27
27
|
run: 'cat ../secret.txt',
|
|
28
|
-
allowInsecure: false,
|
|
29
28
|
};
|
|
30
29
|
// It should throw BEFORE spawning
|
|
31
30
|
// The error message I added was "Directory Traversal" or similar
|
|
@@ -38,7 +37,6 @@ describe('Verification Fixes', () => {
|
|
|
38
37
|
id: 'test',
|
|
39
38
|
type: 'shell' as const,
|
|
40
39
|
run: '/bin/ls ../',
|
|
41
|
-
allowInsecure: false,
|
|
42
40
|
};
|
|
43
41
|
await expect(executeShell(step, mockContext)).rejects.toThrow('Command blocked');
|
|
44
42
|
});
|
|
@@ -95,7 +95,6 @@ describe('Join Scheduling & Resume', () => {
|
|
|
95
95
|
if [ "$val" -lt "2" ]; then exit 1; else exit 0; fi
|
|
96
96
|
`,
|
|
97
97
|
retry: { count: 3 },
|
|
98
|
-
allowInsecure: true,
|
|
99
98
|
needs: [],
|
|
100
99
|
},
|
|
101
100
|
{
|
|
@@ -148,7 +147,6 @@ describe('Join Scheduling & Resume', () => {
|
|
|
148
147
|
if [ "$val" -lt "2" ]; then exit 1; else exit 0; fi
|
|
149
148
|
`,
|
|
150
149
|
retry: { count: 1 },
|
|
151
|
-
allowInsecure: true,
|
|
152
150
|
needs: [],
|
|
153
151
|
},
|
|
154
152
|
{
|
|
@@ -139,7 +139,7 @@ describe('MCPClient SSRF Protection', () => {
|
|
|
139
139
|
'http://api.example.com/sse',
|
|
140
140
|
{},
|
|
141
141
|
100, // short timeout
|
|
142
|
-
{
|
|
142
|
+
{ }
|
|
143
143
|
);
|
|
144
144
|
// Should NOT throw SSRF error, but will throw timeout/connection error
|
|
145
145
|
await expect(promise).rejects.not.toThrow(/SSRF Protection/);
|
|
@@ -139,9 +139,7 @@ describe('MCPClient', () => {
|
|
|
139
139
|
return Promise.resolve(new Response(JSON.stringify({ ok: true })));
|
|
140
140
|
});
|
|
141
141
|
|
|
142
|
-
// Use allowInsecure for testing with localhost (fetch is mocked anyway)
|
|
143
142
|
const clientPromise = MCPClient.createRemote('http://localhost:8080/sse', {}, 60000, {
|
|
144
|
-
allowInsecure: true,
|
|
145
143
|
});
|
|
146
144
|
|
|
147
145
|
const client = await clientPromise;
|
|
@@ -188,9 +186,7 @@ describe('MCPClient', () => {
|
|
|
188
186
|
return Promise.resolve(new Response(JSON.stringify({ ok: true })));
|
|
189
187
|
});
|
|
190
188
|
|
|
191
|
-
// Use allowInsecure for testing with localhost (fetch is mocked anyway)
|
|
192
189
|
const client = await MCPClient.createRemote('http://localhost:8080/sse', {}, 60000, {
|
|
193
|
-
allowInsecure: true,
|
|
194
190
|
});
|
|
195
191
|
|
|
196
192
|
// We can't easily hook into onMessage without reaching into internals
|
|
@@ -234,9 +230,7 @@ describe('MCPClient', () => {
|
|
|
234
230
|
)
|
|
235
231
|
);
|
|
236
232
|
|
|
237
|
-
// Use allowInsecure for testing with localhost (fetch is mocked anyway)
|
|
238
233
|
const clientPromise = MCPClient.createRemote('http://localhost:8080/sse', {}, 60000, {
|
|
239
|
-
allowInsecure: true,
|
|
240
234
|
});
|
|
241
235
|
|
|
242
236
|
await expect(clientPromise).rejects.toThrow(/SSE connection failed: 500/);
|