@theokit/sdk 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.9.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 4cbd107: V3-5 — make the eval-harness primitives usable without constructing a `SandboxBackend`. Both default to a `LocalSandbox` when no backend is passed; the explicit-sandbox path is unchanged.
8
+
9
+ - `provisionRepo` gains a 1-arg overload `provisionRepo(opts)` (sandbox defaults to a `LocalSandbox`, cloning into the process cwd's `<instanceId>`). The existing `provisionRepo(sandbox, opts)` form is unchanged. Pass an explicit `LocalSandbox({ workDir })` / Docker / E2B backend to control the workdir.
10
+ - `Scorers.verifyGate` — `VerifyGateOptions.sandbox` is now optional, defaulting to a `LocalSandbox` (workdir-independent: `verifyGate` always `cd`s to the explicit `repoDir`).
11
+
12
+ Lets a local execFile-based eval harness adopt these helpers without instantiating a backend it does not otherwise need. Zero new dependency (the default reuses the already-public `LocalSandbox`).
13
+
3
14
  ## 2.8.0
4
15
 
5
16
  ### Minor Changes
package/dist/eval.cjs CHANGED
@@ -16193,6 +16193,118 @@ async function llmJudgeScore(options) {
16193
16193
  return parseScore(judgement.text, rubric);
16194
16194
  }
16195
16195
 
16196
+ // src/sandbox/types.ts
16197
+ var SandboxSecurityError = class extends Error {
16198
+ code = "sandbox_security";
16199
+ constructor(message) {
16200
+ super(message);
16201
+ this.name = "SandboxSecurityError";
16202
+ }
16203
+ };
16204
+ var SHELL_METACHARACTERS = /[;&|`$(){}]/;
16205
+ var SandboxBackend = class {
16206
+ config;
16207
+ constructor(config = {}) {
16208
+ this.config = {
16209
+ workDir: config.workDir ?? "/tmp",
16210
+ timeoutMs: config.timeoutMs ?? 3e4,
16211
+ maxOutputBytes: config.maxOutputBytes ?? 5 * 1024 * 1024
16212
+ };
16213
+ }
16214
+ async readFile(path) {
16215
+ const result = await this.execute(`cat ${this.shellEscape(path)}`);
16216
+ if (result.exitCode !== 0) {
16217
+ throw new Error(`readFile failed: ${result.stderr}`);
16218
+ }
16219
+ return result.stdout;
16220
+ }
16221
+ async writeFile(path, content) {
16222
+ await this.uploadFile(path, content);
16223
+ }
16224
+ async glob(pattern, cwd) {
16225
+ const dir = cwd ?? this.config.workDir ?? ".";
16226
+ const result = await this.execute(
16227
+ `find ${this.shellEscape(dir)} -name ${this.shellEscape(pattern)} -type f 2>/dev/null`
16228
+ );
16229
+ if (result.exitCode !== 0) return [];
16230
+ return result.stdout.trim().split("\n").filter(Boolean);
16231
+ }
16232
+ async grep(pattern, path) {
16233
+ const target = path ?? ".";
16234
+ const result = await this.execute(
16235
+ `grep -rn ${this.shellEscape(pattern)} ${this.shellEscape(target)} 2>/dev/null`
16236
+ );
16237
+ if (result.exitCode !== 0) return [];
16238
+ return result.stdout.trim().split("\n").filter(Boolean);
16239
+ }
16240
+ async listDir(path) {
16241
+ const result = await this.execute(`ls -1 ${this.shellEscape(path)}`);
16242
+ if (result.exitCode !== 0) return [];
16243
+ return result.stdout.trim().split("\n").filter(Boolean);
16244
+ }
16245
+ validateCommand(command) {
16246
+ if (SHELL_METACHARACTERS.test(command)) {
16247
+ throw new SandboxSecurityError(
16248
+ `Command contains shell metacharacters: ${command.slice(0, 80)}`
16249
+ );
16250
+ }
16251
+ }
16252
+ truncateOutput(output) {
16253
+ const max = this.config.maxOutputBytes ?? 5 * 1024 * 1024;
16254
+ if (Buffer.byteLength(output) > max) {
16255
+ return `${output.slice(0, max)}
16256
+ ...(truncated)`;
16257
+ }
16258
+ return output;
16259
+ }
16260
+ shellEscape(arg) {
16261
+ return shellEscapePosix(arg);
16262
+ }
16263
+ };
16264
+
16265
+ // src/sandbox/local-sandbox.ts
16266
+ var LocalSandbox = class extends SandboxBackend {
16267
+ constructor(config = {}) {
16268
+ super(config);
16269
+ }
16270
+ async execute(command, opts) {
16271
+ const timeout = opts?.timeoutMs ?? this.config.timeoutMs ?? 3e4;
16272
+ const max = this.config.maxOutputBytes ?? 5 * 1024 * 1024;
16273
+ return new Promise((resolve3) => {
16274
+ const child = child_process.execFile(
16275
+ "/bin/sh",
16276
+ ["-c", command],
16277
+ {
16278
+ cwd: this.config.workDir,
16279
+ timeout,
16280
+ maxBuffer: max,
16281
+ encoding: "utf-8"
16282
+ },
16283
+ (error, stdout, stderr) => {
16284
+ resolve3(this.buildResult(error, stdout ?? "", stderr ?? ""));
16285
+ }
16286
+ );
16287
+ child.on("error", () => {
16288
+ resolve3({ stdout: "", stderr: "spawn error", exitCode: 1, timedOut: false });
16289
+ });
16290
+ });
16291
+ }
16292
+ buildResult(error, stdout, stderr) {
16293
+ const timedOut = error !== null && "killed" in error && error.killed;
16294
+ return {
16295
+ stdout: this.truncateOutput(stdout),
16296
+ stderr: this.truncateOutput(stderr),
16297
+ exitCode: timedOut ? 124 : error ? 1 : 0,
16298
+ timedOut
16299
+ };
16300
+ }
16301
+ async uploadFile(path$1, content) {
16302
+ const fullPath = path$1.startsWith("/") ? path$1 : `${this.config.workDir}/${path$1}`;
16303
+ await promises.mkdir(path.dirname(fullPath), { recursive: true });
16304
+ await promises.writeFile(fullPath, content, "utf-8");
16305
+ }
16306
+ };
16307
+
16196
16308
  // src/scorers.ts
16197
16309
  var JSON_SHAPE_MAX_BYTES = 1e6;
16198
16310
  function makeStringScorer(name, caseSensitive, compare) {
@@ -16296,7 +16408,7 @@ var Scorers = {
16296
16408
  * that rejects shell metacharacters in `execute` is unsupported for this scorer.
16297
16409
  */
16298
16410
  verifyGate(opts) {
16299
- const { sandbox, repoDir, failToPass, passToPass, command } = opts;
16411
+ const { sandbox = new LocalSandbox(), repoDir, failToPass, passToPass, command } = opts;
16300
16412
  return {
16301
16413
  name: "verify-gate",
16302
16414
  score: async () => {