snapeval 3.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +55 -0
- package/dist/src/adapters/harness/copilot-sdk.d.ts +1 -0
- package/dist/src/adapters/harness/copilot-sdk.js +9 -0
- package/dist/src/adapters/harness/copilot-sdk.js.map +1 -1
- package/dist/src/adapters/harness/skill-blocker.d.ts +17 -0
- package/dist/src/adapters/harness/skill-blocker.js +47 -0
- package/dist/src/adapters/harness/skill-blocker.js.map +1 -0
- package/dist/src/engine/runner.js +1 -0
- package/dist/src/engine/runner.js.map +1 -1
- package/dist/src/types.d.ts +1 -1
- package/package.json +2 -1
- package/plugin.json +1 -1
- package/src/adapters/harness/copilot-sdk.ts +14 -0
- package/src/adapters/harness/skill-blocker.ts +61 -0
- package/src/engine/runner.ts +1 -0
- package/src/types.ts +1 -1
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "snapeval",
|
|
3
|
+
"version": "4.0.1",
|
|
4
|
+
"description": "Harness-agnostic eval runner for agentskills.io skills",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"snapeval": "./dist/bin/snapeval.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"test": "vitest run",
|
|
11
|
+
"test:watch": "vitest",
|
|
12
|
+
"test:e2e": "vitest run --config vitest.e2e.config.ts",
|
|
13
|
+
"build": "rm -rf dist && tsc && cp package.json dist/",
|
|
14
|
+
"dev": "tsx bin/snapeval.ts",
|
|
15
|
+
"prepublishOnly": "npm run build && npm test"
|
|
16
|
+
},
|
|
17
|
+
"keywords": [
|
|
18
|
+
"ai-skills",
|
|
19
|
+
"agentskills",
|
|
20
|
+
"evaluation",
|
|
21
|
+
"harness"
|
|
22
|
+
],
|
|
23
|
+
"author": "Matan Tsach",
|
|
24
|
+
"license": "MIT",
|
|
25
|
+
"repository": {
|
|
26
|
+
"type": "git",
|
|
27
|
+
"url": "https://github.com/matantsach/snapeval.git"
|
|
28
|
+
},
|
|
29
|
+
"homepage": "https://github.com/matantsach/snapeval",
|
|
30
|
+
"bugs": {
|
|
31
|
+
"url": "https://github.com/matantsach/snapeval/issues"
|
|
32
|
+
},
|
|
33
|
+
"files": [
|
|
34
|
+
"dist/src/",
|
|
35
|
+
"dist/bin/",
|
|
36
|
+
"dist/package.json",
|
|
37
|
+
"bin/",
|
|
38
|
+
"src/",
|
|
39
|
+
"plugin.json",
|
|
40
|
+
"skills/create-evals/SKILL.md",
|
|
41
|
+
"skills/run-evals/SKILL.md",
|
|
42
|
+
"scripts/"
|
|
43
|
+
],
|
|
44
|
+
"dependencies": {
|
|
45
|
+
"@github/copilot-sdk": "^0.2.0",
|
|
46
|
+
"chalk": "^5.4.1",
|
|
47
|
+
"commander": "^14.0.3"
|
|
48
|
+
},
|
|
49
|
+
"devDependencies": {
|
|
50
|
+
"@types/node": "^25.5.0",
|
|
51
|
+
"tsx": "^4.19.3",
|
|
52
|
+
"typescript": "^5.8.2",
|
|
53
|
+
"vitest": "^4.1.0"
|
|
54
|
+
}
|
|
55
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as fs from 'node:fs';
|
|
2
2
|
import * as path from 'node:path';
|
|
3
3
|
import { getClient } from '../copilot-sdk-client.js';
|
|
4
|
+
import { createSkillBlockingPermissionHandler, createSkillBlockingHook, } from './skill-blocker.js';
|
|
4
5
|
export class CopilotSDKHarness {
|
|
5
6
|
name = 'copilot-sdk';
|
|
6
7
|
async run(options) {
|
|
@@ -20,6 +21,14 @@ export class CopilotSDKHarness {
|
|
|
20
21
|
if (options.skillPath) {
|
|
21
22
|
sessionConfig.skillDirectories = [path.dirname(options.skillPath)];
|
|
22
23
|
}
|
|
24
|
+
// Skill blocking: replace approveAll with blocking handler + hook
|
|
25
|
+
if (options.blockedSkillPath) {
|
|
26
|
+
const blockedDir = path.dirname(options.blockedSkillPath);
|
|
27
|
+
sessionConfig.onPermissionRequest = createSkillBlockingPermissionHandler(blockedDir);
|
|
28
|
+
sessionConfig.hooks = {
|
|
29
|
+
onPreToolUse: createSkillBlockingHook(blockedDir),
|
|
30
|
+
};
|
|
31
|
+
}
|
|
23
32
|
const session = await client.createSession(sessionConfig);
|
|
24
33
|
try {
|
|
25
34
|
// Attach input files if provided
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"copilot-sdk.js","sourceRoot":"","sources":["../../../../src/adapters/harness/copilot-sdk.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;
|
|
1
|
+
{"version":3,"file":"copilot-sdk.js","sourceRoot":"","sources":["../../../../src/adapters/harness/copilot-sdk.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAElC,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AACrD,OAAO,EACL,oCAAoC,EACpC,uBAAuB,GACxB,MAAM,oBAAoB,CAAC;AAE5B,MAAM,OAAO,iBAAiB;IACnB,IAAI,GAAG,aAAa,CAAC;IAE9B,KAAK,CAAC,GAAG,CAAC,OAMT;QACC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,MAAM,SAAS,EAAE,CAAC;QAEjC,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAErD,wCAAwC;QACxC,MAAM,EAAE,UAAU,EAAE,GAAG,MAAM,MAAM,CAAC,qBAAqB,CAAC,CAAC;QAE3D,uBAAuB;QACvB,MAAM,aAAa,GAA4B;YAC7C,KAAK,EAAE,SAAS;YAChB,mBAAmB,EAAE,UAAU;YAC/B,gBAAgB,EAAE,OAAO,CAAC,SAAS;YACnC,gBAAgB,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE;SACrC,CAAC;QAEF,qEAAqE;QACrE,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,aAAa,CAAC,gBAAgB,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC;QACrE,CAAC;QAED,kEAAkE;QAClE,IAAI,OAAO,CAAC,gBAAgB,EAAE,CAAC;YAC7B,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,gBAAgB,CAAC,CAAC;YAC1D,aAAa,CAAC,mBAAmB,GAAG,oCAAoC,CAAC,UAAU,CAAC,CAAC;YACrF,aAAa,CAAC,KAAK,GAAG;gBACpB,YAAY,EAAE,uBAAuB,CAAC,UAAU,CAAC;aAClD,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;QAE1D,IAAI,CAAC;YACH,iCAAiC;YACjC,MAAM,WAAW,GAAgE,EAAE,CAAC;YACpF,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;gBAClB,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;oBACjC,oEAAoE;oBACpE,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;oBAC/D,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;oBAC5B,WAAW,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,WAAW,EAAE,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACnF,CAAC;YACH,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,WAAW,CACxC;gBACE,MAAM,EAAE,OAAO,CAAC,MAAM;gBACtB,GAAG,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;aACnD,EACD,OAAO,CACR,CAAC;YAEF,MAAM,GAAG,GAAG,QAAQ,EAAE,IAAI,EAAE,OAAO,IAAI,EAAE,CAAC;YAE1C,8CAA8C;YAC9C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,WAAW,EAAE,CAAC;YAC3C,MAAM,UAAU,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;YAE3C,+EAA+E;YAC/E,MAAM,WAAW,GAAG,CAAC,CAAC;YAEtB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,OAAO,CAAC;YAExC,OAAO;gBACL,GAAG,EAAE,GAAG,CAAC,IAAI,EAAE;gBACf,UAAU;gBACV,KAAK,EAAE,EAAE;gBACT,YAAY,EAAE,WAAW;gBACzB,WAAW,EAAE,UAAU;aACxB,CAAC;QACJ,CAAC;gBAAS,CAAC;YACT,MAAM,OAAO,CAAC,UAAU,EAAE,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW;QACf,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAED,SAAS,eAAe,CAAC,MAAa;IACpC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,cAAc;gBACjB,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,CAAC,IAAI,EAAE,OAAO,IAAI,EAAE,EAAE,CAAC,CAAC;gBAClD,MAAM;YACR,KAAK,mBAAmB;gBACtB,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,CAAC,IAAI,EAAE,OAAO,IAAI,EAAE,EAAE,CAAC,CAAC;gBACvD,MAAM;YACR,KAAK,sBAAsB;gBACzB,KAAK,CAAC,IAAI,CAAC,gBAAgB,KAAK,CAAC,IAAI,EAAE,QAAQ,IAAI,SAAS,IAAI,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAC,GAAG,CAAC,CAAC;gBAChH,MAAM;YACR,KAAK,yBAAyB;gBAC5B,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,CAAC,IAAI,EAAE,QAAQ,IAAI,SAAS,MAAM,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,IAAI,EAAE,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;gBACrH,MAAM;YACR,KAAK,eAAe;gBAClB,KAAK,CAAC,IAAI,CAAC,WAAW,KAAK,CAAC,IAAI,EAAE,IAAI,IAAI,SAAS,KAAK,KAAK,CAAC,IAAI,EAAE,IAAI,IAAI,EAAE,GAAG,CAAC,CAAC;gBACnF,MAAM;YACR,KAAK,eAAe;gBAClB,KAAK,CAAC,IAAI,CAAC,WAAW,KAAK,CAAC,IAAI,EAAE,OAAO,IAAI,EAAE,EAAE,CAAC,CAAC;gBACnD,MAAM;QACV,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,QAAQ,CAAC,GAAW,EAAE,GAAW;IACxC,OAAO,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC;AAC5D,CAAC"}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { PermissionHandler } from '@github/copilot-sdk';
|
|
2
|
+
type PreToolUseHookInput = {
|
|
3
|
+
toolName: string;
|
|
4
|
+
toolArgs: unknown;
|
|
5
|
+
timestamp: number;
|
|
6
|
+
cwd: string;
|
|
7
|
+
};
|
|
8
|
+
type PreToolUseHookOutput = {
|
|
9
|
+
permissionDecision?: 'allow' | 'deny' | 'ask';
|
|
10
|
+
permissionDecisionReason?: string;
|
|
11
|
+
};
|
|
12
|
+
type PreToolUseHandler = (input: PreToolUseHookInput, invocation: {
|
|
13
|
+
sessionId: string;
|
|
14
|
+
}) => PreToolUseHookOutput | void;
|
|
15
|
+
export declare function createSkillBlockingHook(blockedDir: string): PreToolUseHandler;
|
|
16
|
+
export declare function createSkillBlockingPermissionHandler(blockedDir: string): PermissionHandler;
|
|
17
|
+
export {};
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
function resolveDir(dir) {
|
|
4
|
+
try {
|
|
5
|
+
return fs.realpathSync(dir) + path.sep;
|
|
6
|
+
}
|
|
7
|
+
catch {
|
|
8
|
+
return path.resolve(dir) + path.sep;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
export function createSkillBlockingHook(blockedDir) {
|
|
12
|
+
const resolved = resolveDir(blockedDir);
|
|
13
|
+
const raw = path.resolve(blockedDir) + path.sep;
|
|
14
|
+
return (input) => {
|
|
15
|
+
const argsStr = typeof input.toolArgs === 'string'
|
|
16
|
+
? input.toolArgs
|
|
17
|
+
: JSON.stringify(input.toolArgs ?? '');
|
|
18
|
+
if (argsStr.includes(resolved) || argsStr.includes(raw)) {
|
|
19
|
+
return {
|
|
20
|
+
permissionDecision: 'deny',
|
|
21
|
+
permissionDecisionReason: `Blocked: tool "${input.toolName}" references blocked skill directory`,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
return {};
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
export function createSkillBlockingPermissionHandler(blockedDir) {
|
|
28
|
+
const resolved = resolveDir(blockedDir);
|
|
29
|
+
return (request) => {
|
|
30
|
+
if (request.kind === 'read') {
|
|
31
|
+
const readPath = request.path;
|
|
32
|
+
if (typeof readPath === 'string') {
|
|
33
|
+
try {
|
|
34
|
+
const resolvedPath = fs.realpathSync(readPath);
|
|
35
|
+
if (resolvedPath === resolved.slice(0, -1) || resolvedPath.startsWith(resolved)) {
|
|
36
|
+
return { kind: 'denied-by-rules', rules: [{ reason: 'skill-access-blocked' }] };
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
catch {
|
|
40
|
+
// Path doesn't exist — can't be the skill dir
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return { kind: 'approved' };
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=skill-blocker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"skill-blocker.js","sourceRoot":"","sources":["../../../../src/adapters/harness/skill-blocker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AASlC,SAAS,UAAU,CAAC,GAAW;IAC7B,IAAI,CAAC;QACH,OAAO,EAAE,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC;IACtC,CAAC;AACH,CAAC;AAED,MAAM,UAAU,uBAAuB,CACrC,UAAkB;IAElB,MAAM,QAAQ,GAAG,UAAU,CAAC,UAAU,CAAC,CAAC;IACxC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC;IAEhD,OAAO,CAAC,KAAK,EAAE,EAAE;QACf,MAAM,OAAO,GAAG,OAAO,KAAK,CAAC,QAAQ,KAAK,QAAQ;YAChD,CAAC,CAAC,KAAK,CAAC,QAAQ;YAChB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC;QAEzC,IAAI,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YACxD,OAAO;gBACL,kBAAkB,EAAE,MAAe;gBACnC,wBAAwB,EAAE,kBAAkB,KAAK,CAAC,QAAQ,sCAAsC;aACjG,CAAC;QACJ,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,oCAAoC,CAClD,UAAkB;IAElB,MAAM,QAAQ,GAAG,UAAU,CAAC,UAAU,CAAC,CAAC;IAExC,OAAO,CAAC,OAAO,EAAE,EAAE;QACjB,IAAI,OAAO,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAI,OAA8B,CAAC,IAAI,CAAC;YACtD,IAAI,OAAO,QAAQ,KAAK,QAAQ,EAAE,CAAC;gBACjC,IAAI,CAAC;oBACH,MAAM,YAAY,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,CAAC;oBAC/C,IAAI,YAAY,KAAK,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,YAAY,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;wBAChF,OAAO,EAAE,IAAI,EAAE,iBAAiB,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,sBAAsB,EAAE,CAAC,EAAE,CAAC;oBAClF,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,8CAA8C;gBAChD,CAAC;YACH,CAAC;QACH,CAAC;QACD,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;IAC9B,CAAC,CAAC;AACJ,CAAC"}
|
|
@@ -23,6 +23,7 @@ export async function runEval(evalCase, skillPath, evalDir, harness, oldSkillPat
|
|
|
23
23
|
}),
|
|
24
24
|
harness.run({
|
|
25
25
|
skillPath: oldSkillPath,
|
|
26
|
+
blockedSkillPath: skillPath,
|
|
26
27
|
prompt: evalCase.prompt,
|
|
27
28
|
files: evalCase.files,
|
|
28
29
|
outputDir: path.join(baselineDir, 'outputs'),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../../src/engine/runner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAYlC,SAAS,WAAW,CAAC,GAAW,EAAE,MAAwB;IACxD,MAAM,MAAM,GAAe,EAAE,YAAY,EAAE,MAAM,CAAC,YAAY,EAAE,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,CAAC;IAClG,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AACnF,CAAC;AAED,SAAS,WAAW,CAAC,GAAW,EAAE,MAAwB;IACxD,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,SAAS,EAAE,YAAY,CAAC,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC;IACtE,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACtB,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,gBAAgB,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IACxE,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,QAAkB,EAClB,SAAiB,EACjB,OAAe,EACf,OAAgB,EAChB,YAAqB;IAErB,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;IACtD,MAAM,eAAe,GAAG,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,eAAe,CAAC;IACrE,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,eAAe,CAAC,CAAC;IAExD,MAAM,CAAC,eAAe,EAAE,cAAc,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC;YACV,SAAS;YACT,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,SAAS,CAAC;SAC9C,CAAC;QACF,OAAO,CAAC,GAAG,CAAC;YACV,SAAS,EAAE,YAAY;YACvB,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,SAAS,CAAC;SAC7C,CAAC;KACH,CAAC,CAAC;IACH,WAAW,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;IAC3C,WAAW,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;IAC3C,WAAW,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IACzC,WAAW,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IAEzC,OAAO;QACL,MAAM,EAAE,QAAQ,CAAC,EAAE;QACnB,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,GAAG,QAAQ,CAAC,EAAE,EAAE;QACvC,KAAK,EAAE,QAAQ,CAAC,KAAK;QACrB,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,SAAS,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE;QACtC,YAAY,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE;KACzC,CAAC;AACJ,CAAC"}
|
|
1
|
+
{"version":3,"file":"runner.js","sourceRoot":"","sources":["../../../src/engine/runner.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,SAAS,CAAC;AAC9B,OAAO,KAAK,IAAI,MAAM,WAAW,CAAC;AAYlC,SAAS,WAAW,CAAC,GAAW,EAAE,MAAwB;IACxD,MAAM,MAAM,GAAe,EAAE,YAAY,EAAE,MAAM,CAAC,YAAY,EAAE,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,CAAC;IAClG,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,aAAa,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AACnF,CAAC;AAED,SAAS,WAAW,CAAC,GAAW,EAAE,MAAwB;IACxD,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,SAAS,EAAE,YAAY,CAAC,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC;IACtE,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QACtB,EAAE,CAAC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,gBAAgB,CAAC,EAAE,MAAM,CAAC,UAAU,CAAC,CAAC;IACxE,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,QAAkB,EAClB,SAAiB,EACjB,OAAe,EACf,OAAgB,EAChB,YAAqB;IAErB,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;IACtD,MAAM,eAAe,GAAG,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,eAAe,CAAC;IACrE,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,eAAe,CAAC,CAAC;IAExD,MAAM,CAAC,eAAe,EAAE,cAAc,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC;YACV,SAAS;YACT,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,SAAS,CAAC;SAC9C,CAAC;QACF,OAAO,CAAC,GAAG,CAAC;YACV,SAAS,EAAE,YAAY;YACvB,gBAAgB,EAAE,SAAS;YAC3B,MAAM,EAAE,QAAQ,CAAC,MAAM;YACvB,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,SAAS,CAAC;SAC7C,CAAC;KACH,CAAC,CAAC;IACH,WAAW,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;IAC3C,WAAW,CAAC,YAAY,EAAE,eAAe,CAAC,CAAC;IAC3C,WAAW,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IACzC,WAAW,CAAC,WAAW,EAAE,cAAc,CAAC,CAAC;IAEzC,OAAO;QACL,MAAM,EAAE,QAAQ,CAAC,EAAE;QACnB,IAAI,EAAE,QAAQ,CAAC,IAAI,IAAI,GAAG,QAAQ,CAAC,EAAE,EAAE;QACvC,KAAK,EAAE,QAAQ,CAAC,KAAK;QACrB,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,SAAS,EAAE,EAAE,MAAM,EAAE,eAAe,EAAE;QACtC,YAAY,EAAE,EAAE,MAAM,EAAE,cAAc,EAAE;KACzC,CAAC;AACJ,CAAC"}
|
package/dist/src/types.d.ts
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "snapeval",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "4.0.1",
|
|
4
4
|
"description": "Harness-agnostic eval runner for agentskills.io skills",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"files": [
|
|
34
34
|
"dist/src/",
|
|
35
35
|
"dist/bin/",
|
|
36
|
+
"dist/package.json",
|
|
36
37
|
"bin/",
|
|
37
38
|
"src/",
|
|
38
39
|
"plugin.json",
|
package/plugin.json
CHANGED
|
@@ -2,12 +2,17 @@ import * as fs from 'node:fs';
|
|
|
2
2
|
import * as path from 'node:path';
|
|
3
3
|
import type { Harness, HarnessRunResult } from '../../types.js';
|
|
4
4
|
import { getClient } from '../copilot-sdk-client.js';
|
|
5
|
+
import {
|
|
6
|
+
createSkillBlockingPermissionHandler,
|
|
7
|
+
createSkillBlockingHook,
|
|
8
|
+
} from './skill-blocker.js';
|
|
5
9
|
|
|
6
10
|
export class CopilotSDKHarness implements Harness {
|
|
7
11
|
readonly name = 'copilot-sdk';
|
|
8
12
|
|
|
9
13
|
async run(options: {
|
|
10
14
|
skillPath?: string;
|
|
15
|
+
blockedSkillPath?: string;
|
|
11
16
|
prompt: string;
|
|
12
17
|
files?: string[];
|
|
13
18
|
outputDir: string;
|
|
@@ -33,6 +38,15 @@ export class CopilotSDKHarness implements Harness {
|
|
|
33
38
|
sessionConfig.skillDirectories = [path.dirname(options.skillPath)];
|
|
34
39
|
}
|
|
35
40
|
|
|
41
|
+
// Skill blocking: replace approveAll with blocking handler + hook
|
|
42
|
+
if (options.blockedSkillPath) {
|
|
43
|
+
const blockedDir = path.dirname(options.blockedSkillPath);
|
|
44
|
+
sessionConfig.onPermissionRequest = createSkillBlockingPermissionHandler(blockedDir);
|
|
45
|
+
sessionConfig.hooks = {
|
|
46
|
+
onPreToolUse: createSkillBlockingHook(blockedDir),
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
36
50
|
const session = await client.createSession(sessionConfig);
|
|
37
51
|
|
|
38
52
|
try {
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import * as fs from 'node:fs';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import type { PermissionHandler } from '@github/copilot-sdk';
|
|
4
|
+
|
|
5
|
+
// PreToolUseHandler is not re-exported from @github/copilot-sdk's public API,
|
|
6
|
+
// so we define the minimal type inline matching the SDK's internal definition.
|
|
7
|
+
type PreToolUseHookInput = { toolName: string; toolArgs: unknown; timestamp: number; cwd: string };
|
|
8
|
+
type PreToolUseHookOutput = { permissionDecision?: 'allow' | 'deny' | 'ask'; permissionDecisionReason?: string };
|
|
9
|
+
type PreToolUseHandler = (input: PreToolUseHookInput, invocation: { sessionId: string }) => PreToolUseHookOutput | void;
|
|
10
|
+
|
|
11
|
+
function resolveDir(dir: string): string {
|
|
12
|
+
try {
|
|
13
|
+
return fs.realpathSync(dir) + path.sep;
|
|
14
|
+
} catch {
|
|
15
|
+
return path.resolve(dir) + path.sep;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function createSkillBlockingHook(
|
|
20
|
+
blockedDir: string,
|
|
21
|
+
): PreToolUseHandler {
|
|
22
|
+
const resolved = resolveDir(blockedDir);
|
|
23
|
+
const raw = path.resolve(blockedDir) + path.sep;
|
|
24
|
+
|
|
25
|
+
return (input) => {
|
|
26
|
+
const argsStr = typeof input.toolArgs === 'string'
|
|
27
|
+
? input.toolArgs
|
|
28
|
+
: JSON.stringify(input.toolArgs ?? '');
|
|
29
|
+
|
|
30
|
+
if (argsStr.includes(resolved) || argsStr.includes(raw)) {
|
|
31
|
+
return {
|
|
32
|
+
permissionDecision: 'deny' as const,
|
|
33
|
+
permissionDecisionReason: `Blocked: tool "${input.toolName}" references blocked skill directory`,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
return {};
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function createSkillBlockingPermissionHandler(
|
|
41
|
+
blockedDir: string,
|
|
42
|
+
): PermissionHandler {
|
|
43
|
+
const resolved = resolveDir(blockedDir);
|
|
44
|
+
|
|
45
|
+
return (request) => {
|
|
46
|
+
if (request.kind === 'read') {
|
|
47
|
+
const readPath = (request as { path?: unknown }).path;
|
|
48
|
+
if (typeof readPath === 'string') {
|
|
49
|
+
try {
|
|
50
|
+
const resolvedPath = fs.realpathSync(readPath);
|
|
51
|
+
if (resolvedPath === resolved.slice(0, -1) || resolvedPath.startsWith(resolved)) {
|
|
52
|
+
return { kind: 'denied-by-rules', rules: [{ reason: 'skill-access-blocked' }] };
|
|
53
|
+
}
|
|
54
|
+
} catch {
|
|
55
|
+
// Path doesn't exist — can't be the skill dir
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return { kind: 'approved' };
|
|
60
|
+
};
|
|
61
|
+
}
|
package/src/engine/runner.ts
CHANGED