evalify-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/publish.d.ts +1 -0
- package/dist/commands/publish.js +86 -0
- package/dist/commands/publish.js.map +1 -0
- package/dist/commands/pull.d.ts +1 -0
- package/dist/commands/pull.js +63 -0
- package/dist/commands/pull.js.map +1 -0
- package/dist/commands/search.d.ts +1 -0
- package/dist/commands/search.js +21 -0
- package/dist/commands/search.js.map +1 -0
- package/dist/commands/validate.d.ts +1 -0
- package/dist/commands/validate.js +179 -0
- package/dist/commands/validate.js.map +1 -0
- package/dist/format.d.ts +8 -0
- package/dist/format.js +32 -0
- package/dist/format.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +38 -0
- package/dist/index.js.map +1 -0
- package/dist/validator.d.ts +9 -0
- package/dist/validator.js +92 -0
- package/dist/validator.js.map +1 -0
- package/package.json +25 -0
- package/src/commands/publish.ts +100 -0
- package/src/commands/pull.ts +81 -0
- package/src/commands/search.ts +23 -0
- package/src/commands/validate.ts +200 -0
- package/src/format.ts +42 -0
- package/src/index.ts +45 -0
- package/src/validator.ts +112 -0
- package/tsconfig.json +17 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function publish(targetPath?: string): Promise<void>;
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import { getFramework } from "@evalify/frameworks";
|
|
5
|
+
import { header, success, info, dim, error, warn } from "../format.js";
|
|
6
|
+
import { validateEvalsJson } from "../validator.js";
|
|
7
|
+
async function findEvalsFile(targetPath) {
|
|
8
|
+
const stat = await fs.stat(targetPath);
|
|
9
|
+
if (stat.isFile())
|
|
10
|
+
return targetPath;
|
|
11
|
+
const candidates = [
|
|
12
|
+
path.join(targetPath, "evals", "evals.json"),
|
|
13
|
+
path.join(targetPath, "evals.json"),
|
|
14
|
+
];
|
|
15
|
+
for (const candidate of candidates) {
|
|
16
|
+
try {
|
|
17
|
+
await fs.access(candidate);
|
|
18
|
+
return candidate;
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
// not found, continue
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
export async function publish(targetPath) {
|
|
27
|
+
header();
|
|
28
|
+
const resolvedPath = path.resolve(process.cwd(), targetPath || ".");
|
|
29
|
+
try {
|
|
30
|
+
await fs.access(resolvedPath);
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
error(`Path not found: ${targetPath || "."}`);
|
|
34
|
+
console.log();
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
info("Publishing eval criteria to registry...");
|
|
38
|
+
console.log();
|
|
39
|
+
const filePath = await findEvalsFile(resolvedPath);
|
|
40
|
+
if (!filePath) {
|
|
41
|
+
error("No evals.json found");
|
|
42
|
+
dim("Looked in:");
|
|
43
|
+
dim(` ${resolvedPath}/evals.json`);
|
|
44
|
+
dim(` ${resolvedPath}/evals/evals.json`);
|
|
45
|
+
console.log();
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
success(`Found ${path.relative(process.cwd(), filePath)}`);
|
|
49
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
50
|
+
const result = validateEvalsJson(content);
|
|
51
|
+
if (!result.valid) {
|
|
52
|
+
console.log();
|
|
53
|
+
error("Validation failed — cannot publish");
|
|
54
|
+
for (const e of result.errors) {
|
|
55
|
+
error(e);
|
|
56
|
+
}
|
|
57
|
+
console.log();
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
for (const w of result.warnings) {
|
|
61
|
+
warn(w);
|
|
62
|
+
}
|
|
63
|
+
console.log();
|
|
64
|
+
console.log(chalk.bold(" Publish summary:"));
|
|
65
|
+
console.log();
|
|
66
|
+
if (result.summary["skill_name"]) {
|
|
67
|
+
dim(`Skill: ${result.summary["skill_name"]}`);
|
|
68
|
+
}
|
|
69
|
+
if (result.summary["name"]) {
|
|
70
|
+
dim(`Name: ${result.summary["name"]}`);
|
|
71
|
+
}
|
|
72
|
+
if (result.summary["version"]) {
|
|
73
|
+
dim(`Version: ${result.summary["version"]}`);
|
|
74
|
+
}
|
|
75
|
+
if (result.summary["description"]) {
|
|
76
|
+
dim(`Description: ${result.summary["description"]}`);
|
|
77
|
+
}
|
|
78
|
+
dim(`Format: ${getFramework(result.format)?.meta.name ?? result.format}`);
|
|
79
|
+
dim(`Eval count: ${result.evalCount}`);
|
|
80
|
+
dim(`File: ${path.relative(process.cwd(), filePath)}`);
|
|
81
|
+
console.log();
|
|
82
|
+
warn("Dry run — publishing is not yet connected to the registry");
|
|
83
|
+
success("File is valid and ready to publish");
|
|
84
|
+
console.log();
|
|
85
|
+
}
|
|
86
|
+
//# sourceMappingURL=publish.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"publish.js","sourceRoot":"","sources":["../../src/commands/publish.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAEpD,KAAK,UAAU,aAAa,CAAC,UAAkB;IAC7C,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAEvC,IAAI,IAAI,CAAC,MAAM,EAAE;QAAE,OAAO,UAAU,CAAC;IAErC,MAAM,UAAU,GAAG;QACjB,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,OAAO,EAAE,YAAY,CAAC;QAC5C,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,CAAC;KACpC,CAAC;IAEF,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC3B,OAAO,SAAS,CAAC;QACnB,CAAC;QAAC,MAAM,CAAC;YACP,sBAAsB;QACxB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,UAAmB;IAC/C,MAAM,EAAE,CAAC;IAET,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,UAAU,IAAI,GAAG,CAAC,CAAC;IAEpE,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,KAAK,CAAC,mBAAmB,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO;IACT,CAAC;IAED,IAAI,CAAC,yCAAyC,CAAC,CAAC;IAChD,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,MAAM,QAAQ,GAAG,MAAM,aAAa,CAAC,YAAY,CAAC,CAAC;IAEnD,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,KAAK,CAAC,qBAAqB,CAAC,CAAC;QAC7B,GAAG,CAAC,YAAY,CAAC,CAAC;QAClB,GAAG,CAAC,KAAK,YAAY,aAAa,CAAC,CAAC;QACpC,GAAG,CAAC,KAAK,YAAY,mBAAmB,CAAC,CAAC;QAC1C,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO;IACT,CAAC;IAED,OAAO,CAAC,SAAS,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;IAE3D,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACrD,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAE1C,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QAClB,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,KAAK,CAAC,oCAAoC,CAAC,CAAC;QAC5C,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAC9B,KAAK,CAAC,CAAC,CAAC,CAAC;QACX,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO;IACT,CAAC;IAED,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QAChC,IAAI,CAAC,CAAC,CAAC,CAAC;IACV,CAAC;IAED,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,CAAC;IAC9C,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC;QACjC,GAAG,CAAC,gBAAgB,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;IACtD,CAAC;IACD,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3B,GAAG,CAAC,gBAAgB,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAChD,CAAC;IACD,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,GAAG,CAAC,gBAAgB,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IACnD,CAAC;IACD,IAAI,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;QAClC,GAAG,CAAC,gBAAgB,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC,CAAC;IACvD,CAAC;IACD,GAAG,CAAC,gBAAgB,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,IAAI,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAC/E,GAAG,CAAC,gBAAgB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;IACxC,GAAG,CAAC,gBAAgB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;IAE9D,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,IAAI,CAAC,2DAA2D,CAAC,CAAC;IAClE,OAAO,CAAC,oCAAoC,CAAC,CAAC;IAC9C,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function pull(slug: string): Promise<void>;
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import { header, success, info, dim, error } from "../format.js";
|
|
4
|
+
const REGISTRY_URL = "https://evalify.sh/api/registry";
|
|
5
|
+
export async function pull(slug) {
|
|
6
|
+
header();
|
|
7
|
+
if (!slug) {
|
|
8
|
+
error("Missing slug argument");
|
|
9
|
+
console.log();
|
|
10
|
+
return;
|
|
11
|
+
}
|
|
12
|
+
info(`Pulling eval criteria: ${slug}`);
|
|
13
|
+
console.log();
|
|
14
|
+
let pack;
|
|
15
|
+
try {
|
|
16
|
+
const res = await fetch(`${REGISTRY_URL}/${slug}`);
|
|
17
|
+
if (res.status === 404) {
|
|
18
|
+
error(`Criteria not found: ${slug}`);
|
|
19
|
+
dim(`Check the registry at https://evalify.sh`);
|
|
20
|
+
console.log();
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
if (!res.ok) {
|
|
24
|
+
error(`Registry returned ${res.status}`);
|
|
25
|
+
console.log();
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
pack = await res.json();
|
|
29
|
+
}
|
|
30
|
+
catch (err) {
|
|
31
|
+
error(`Failed to reach registry: ${err.message}`);
|
|
32
|
+
console.log();
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
const targetDir = path.resolve(process.cwd(), "evals", slug);
|
|
36
|
+
const targetFile = path.join(targetDir, "evals.json");
|
|
37
|
+
try {
|
|
38
|
+
await fs.mkdir(targetDir, { recursive: true });
|
|
39
|
+
const output = {
|
|
40
|
+
slug: pack.slug,
|
|
41
|
+
displayName: pack.displayName,
|
|
42
|
+
version: pack.version,
|
|
43
|
+
description: pack.description,
|
|
44
|
+
domain: pack.domain,
|
|
45
|
+
author: pack.author,
|
|
46
|
+
tags: pack.tags,
|
|
47
|
+
evals: pack.evals,
|
|
48
|
+
};
|
|
49
|
+
await fs.writeFile(targetFile, JSON.stringify(output, null, 2) + "\n");
|
|
50
|
+
success(`Pulled ${pack.displayName} v${pack.version}`);
|
|
51
|
+
success(`Wrote ${pack.evals.length} eval${pack.evals.length !== 1 ? "s" : ""} to evals/${slug}/evals.json`);
|
|
52
|
+
console.log();
|
|
53
|
+
dim(`Author: ${pack.author}`);
|
|
54
|
+
dim(`Domain: ${pack.domain}`);
|
|
55
|
+
dim(`Location: ${targetFile}`);
|
|
56
|
+
dim(`To validate: evalify validate evals/${slug}`);
|
|
57
|
+
}
|
|
58
|
+
catch (err) {
|
|
59
|
+
error(`Failed to write file: ${err.message}`);
|
|
60
|
+
}
|
|
61
|
+
console.log();
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=pull.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pull.js","sourceRoot":"","sources":["../../src/commands/pull.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAEjE,MAAM,YAAY,GAAG,iCAAiC,CAAC;AAEvD,MAAM,CAAC,KAAK,UAAU,IAAI,CAAC,IAAY;IACrC,MAAM,EAAE,CAAC;IAET,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,KAAK,CAAC,uBAAuB,CAAC,CAAC;QAC/B,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO;IACT,CAAC;IAED,IAAI,CAAC,0BAA0B,IAAI,EAAE,CAAC,CAAC;IACvC,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,IAAI,IASH,CAAC;IAEF,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,YAAY,IAAI,IAAI,EAAE,CAAC,CAAC;QACnD,IAAI,GAAG,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YACvB,KAAK,CAAC,uBAAuB,IAAI,EAAE,CAAC,CAAC;YACrC,GAAG,CAAC,0CAA0C,CAAC,CAAC;YAChD,OAAO,CAAC,GAAG,EAAE,CAAC;YACd,OAAO;QACT,CAAC;QACD,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,KAAK,CAAC,qBAAqB,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC;YACzC,OAAO,CAAC,GAAG,EAAE,CAAC;YACd,OAAO;QACT,CAAC;QACD,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;IAC1B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,KAAK,CAAC,6BAA8B,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;QAC7D,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO;IACT,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,YAAY,CAAC,CAAC;IAEtD,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAE/C,MAAM,MAAM,GAAG;YACb,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,KAAK,EAAE,IAAI,CAAC,KAAK;SAClB,CAAC;QAEF,MAAM,EAAE,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC;QAEvE,OAAO,CAAC,UAAU,IAAI,CAAC,WAAW,KAAK,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;QACvD,OAAO,CAAC,SAAS,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,aAAa,IAAI,aAAa,CAAC,CAAC;QAC5G,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,GAAG,CAAC,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAChC,GAAG,CAAC,aAAa,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QAChC,GAAG,CAAC,aAAa,UAAU,EAAE,CAAC,CAAC;QAC/B,GAAG,CAAC,uCAAuC,IAAI,EAAE,CAAC,CAAC;IACrD,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,KAAK,CAAC,yBAA0B,GAAa,CAAC,OAAO,EAAE,CAAC,CAAC;IAC3D,CAAC;IAED,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function search(query: string): Promise<void>;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
import { header, info, dim, table } from "../format.js";
|
|
3
|
+
export async function search(query) {
|
|
4
|
+
header();
|
|
5
|
+
info(`Searching registry for: ${chalk.bold(query)}`);
|
|
6
|
+
console.log();
|
|
7
|
+
// Placeholder results for now
|
|
8
|
+
const results = [
|
|
9
|
+
[chalk.bold("Slug"), chalk.bold("Description"), chalk.bold("Evals")],
|
|
10
|
+
[chalk.dim("─".repeat(20)), chalk.dim("─".repeat(35)), chalk.dim("─".repeat(5))],
|
|
11
|
+
["code-review", "Code review quality criteria", "12"],
|
|
12
|
+
["summarization", "Text summarization accuracy", "8"],
|
|
13
|
+
["safety-checks", "Safety and content policy evals", "24"],
|
|
14
|
+
];
|
|
15
|
+
table(results);
|
|
16
|
+
console.log();
|
|
17
|
+
dim(`Showing placeholder results — registry search not yet connected`);
|
|
18
|
+
dim(`Use: evalify pull <slug> to download criteria`);
|
|
19
|
+
console.log();
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=search.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../../src/commands/search.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,cAAc,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,MAAM,CAAC,KAAa;IACxC,MAAM,EAAE,CAAC;IACT,IAAI,CAAC,2BAA2B,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,8BAA8B;IAC9B,MAAM,OAAO,GAAG;QACd,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACpE,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,EAAE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,EAAE,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;QAChF,CAAC,aAAa,EAAE,8BAA8B,EAAE,IAAI,CAAC;QACrD,CAAC,eAAe,EAAE,6BAA6B,EAAE,GAAG,CAAC;QACrD,CAAC,eAAe,EAAE,iCAAiC,EAAE,IAAI,CAAC;KAC3D,CAAC;IAEF,KAAK,CAAC,OAAO,CAAC,CAAC;IACf,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,GAAG,CAAC,iEAAiE,CAAC,CAAC;IACvE,GAAG,CAAC,+CAA+C,CAAC,CAAC;IACrD,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function validate(targetPath?: string): Promise<void>;
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import { getFramework } from "@evalify/frameworks";
|
|
5
|
+
import { header, success, info, dim, error, warn } from "../format.js";
|
|
6
|
+
import { validateEvalsJson } from "../validator.js";
|
|
7
|
+
const ALLOWED_EXTENSIONS = new Set([
|
|
8
|
+
".json", ".md", ".txt", ".csv", ".yaml", ".yml",
|
|
9
|
+
".ts", ".tsx", ".js", ".jsx", ".py", ".sql",
|
|
10
|
+
".html", ".css", ".xml", ".toml",
|
|
11
|
+
]);
|
|
12
|
+
async function findEvalsJson(targetPath) {
|
|
13
|
+
const stat = await fs.stat(targetPath);
|
|
14
|
+
if (stat.isFile()) {
|
|
15
|
+
return { evalsPath: targetPath, rootDir: path.dirname(targetPath) };
|
|
16
|
+
}
|
|
17
|
+
// It's a directory — scan for evals.json
|
|
18
|
+
const candidates = [
|
|
19
|
+
path.join(targetPath, "evals", "evals.json"),
|
|
20
|
+
path.join(targetPath, "evals.json"),
|
|
21
|
+
];
|
|
22
|
+
for (const candidate of candidates) {
|
|
23
|
+
try {
|
|
24
|
+
await fs.access(candidate);
|
|
25
|
+
return { evalsPath: candidate, rootDir: targetPath };
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
// not found
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
async function scanCompanionFiles(rootDir, evalsPath, maxDepth = 2) {
|
|
34
|
+
const companions = [];
|
|
35
|
+
const evalsRelative = path.relative(rootDir, evalsPath);
|
|
36
|
+
async function walk(dir, depth) {
|
|
37
|
+
if (depth > maxDepth)
|
|
38
|
+
return;
|
|
39
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
40
|
+
for (const entry of entries) {
|
|
41
|
+
if (entry.name.startsWith("."))
|
|
42
|
+
continue;
|
|
43
|
+
const fullPath = path.join(dir, entry.name);
|
|
44
|
+
const relativePath = path.relative(rootDir, fullPath);
|
|
45
|
+
if (entry.isDirectory()) {
|
|
46
|
+
await walk(fullPath, depth + 1);
|
|
47
|
+
}
|
|
48
|
+
else if (entry.isFile()) {
|
|
49
|
+
if (relativePath === evalsRelative)
|
|
50
|
+
continue;
|
|
51
|
+
const ext = path.extname(entry.name).toLowerCase();
|
|
52
|
+
if (ALLOWED_EXTENSIONS.has(ext)) {
|
|
53
|
+
companions.push(relativePath);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
await walk(rootDir, 0);
|
|
59
|
+
return companions;
|
|
60
|
+
}
|
|
61
|
+
function extractFileRefs(content) {
|
|
62
|
+
const parsed = JSON.parse(content);
|
|
63
|
+
const evals = Array.isArray(parsed) ? parsed : parsed.evals || [];
|
|
64
|
+
const refs = new Set();
|
|
65
|
+
for (const item of evals) {
|
|
66
|
+
if (Array.isArray(item.files)) {
|
|
67
|
+
for (const f of item.files) {
|
|
68
|
+
if (typeof f === "string" && f.length > 0)
|
|
69
|
+
refs.add(f);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return [...refs];
|
|
74
|
+
}
|
|
75
|
+
export async function validate(targetPath) {
|
|
76
|
+
header();
|
|
77
|
+
const resolvedPath = path.resolve(process.cwd(), targetPath || ".");
|
|
78
|
+
// Check if path exists
|
|
79
|
+
try {
|
|
80
|
+
await fs.access(resolvedPath);
|
|
81
|
+
}
|
|
82
|
+
catch {
|
|
83
|
+
error(`Path not found: ${targetPath || "."}`);
|
|
84
|
+
console.log();
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
const stat = await fs.stat(resolvedPath);
|
|
88
|
+
const isFolder = stat.isDirectory();
|
|
89
|
+
if (isFolder) {
|
|
90
|
+
info(`Scanning folder: ${path.basename(resolvedPath)}/`);
|
|
91
|
+
}
|
|
92
|
+
else {
|
|
93
|
+
info(`Validating: ${path.basename(resolvedPath)}`);
|
|
94
|
+
}
|
|
95
|
+
const found = await findEvalsJson(resolvedPath);
|
|
96
|
+
if (!found) {
|
|
97
|
+
console.log();
|
|
98
|
+
error("No evals.json found");
|
|
99
|
+
dim("Looked in:");
|
|
100
|
+
dim(` ${resolvedPath}/evals.json`);
|
|
101
|
+
dim(` ${resolvedPath}/evals/evals.json`);
|
|
102
|
+
console.log();
|
|
103
|
+
return;
|
|
104
|
+
}
|
|
105
|
+
const { evalsPath, rootDir } = found;
|
|
106
|
+
const relEvalsPath = path.relative(process.cwd(), evalsPath);
|
|
107
|
+
success(`Found ${relEvalsPath}`);
|
|
108
|
+
// Scan companion files if it's a folder
|
|
109
|
+
const companions = isFolder ? await scanCompanionFiles(rootDir, evalsPath) : [];
|
|
110
|
+
if (companions.length > 0) {
|
|
111
|
+
success(`${companions.length} companion file${companions.length !== 1 ? "s" : ""} found`);
|
|
112
|
+
for (const c of companions) {
|
|
113
|
+
dim(c);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
// Read and validate
|
|
117
|
+
const content = await fs.readFile(evalsPath, "utf-8");
|
|
118
|
+
const result = validateEvalsJson(content);
|
|
119
|
+
// Check file references
|
|
120
|
+
const fileRefs = extractFileRefs(content);
|
|
121
|
+
if (fileRefs.length > 0) {
|
|
122
|
+
console.log();
|
|
123
|
+
const missing = [];
|
|
124
|
+
const found = [];
|
|
125
|
+
for (const ref of fileRefs) {
|
|
126
|
+
if (companions.includes(ref)) {
|
|
127
|
+
found.push(ref);
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
missing.push(ref);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
if (found.length > 0) {
|
|
134
|
+
success(`${found.length} referenced file${found.length !== 1 ? "s" : ""} present`);
|
|
135
|
+
}
|
|
136
|
+
if (missing.length > 0) {
|
|
137
|
+
warn(`${missing.length} referenced file${missing.length !== 1 ? "s" : ""} missing`);
|
|
138
|
+
for (const m of missing) {
|
|
139
|
+
dim(`${chalk.yellow("missing:")} ${m}`);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
console.log();
|
|
144
|
+
if (result.errors.length > 0) {
|
|
145
|
+
console.log(chalk.bold.red(" Errors:"));
|
|
146
|
+
for (const e of result.errors) {
|
|
147
|
+
error(e);
|
|
148
|
+
}
|
|
149
|
+
console.log();
|
|
150
|
+
}
|
|
151
|
+
if (result.warnings.length > 0) {
|
|
152
|
+
console.log(chalk.bold.yellow(" Warnings:"));
|
|
153
|
+
for (const w of result.warnings) {
|
|
154
|
+
warn(w);
|
|
155
|
+
}
|
|
156
|
+
console.log();
|
|
157
|
+
}
|
|
158
|
+
console.log(chalk.bold(" Summary:"));
|
|
159
|
+
console.log();
|
|
160
|
+
dim(`File: ${relEvalsPath}`);
|
|
161
|
+
dim(`Format: ${getFramework(result.format)?.meta.name ?? result.format}`);
|
|
162
|
+
dim(`Eval count: ${result.evalCount}`);
|
|
163
|
+
if (result.summary["skill_name"])
|
|
164
|
+
dim(`Skill: ${result.summary["skill_name"]}`);
|
|
165
|
+
if (result.summary["name"])
|
|
166
|
+
dim(`Name: ${result.summary["name"]}`);
|
|
167
|
+
if (result.summary["version"])
|
|
168
|
+
dim(`Version: ${result.summary["version"]}`);
|
|
169
|
+
dim(`Valid: ${result.valid ? chalk.green("yes") : chalk.red("no")}`);
|
|
170
|
+
console.log();
|
|
171
|
+
if (result.valid) {
|
|
172
|
+
success("Validation passed");
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
error("Validation failed");
|
|
176
|
+
}
|
|
177
|
+
console.log();
|
|
178
|
+
}
|
|
179
|
+
//# sourceMappingURL=validate.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validate.js","sourceRoot":"","sources":["../../src/commands/validate.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,MAAM,kBAAkB,CAAC;AAClC,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACvE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AAEpD,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC;IACjC,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM;IAC/C,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM;IAC3C,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO;CACjC,CAAC,CAAC;AAEH,KAAK,UAAU,aAAa,CAAC,UAAkB;IAC7C,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAEvC,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;QAClB,OAAO,EAAE,SAAS,EAAE,UAAU,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;IACtE,CAAC;IAED,yCAAyC;IACzC,MAAM,UAAU,GAAG;QACjB,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,OAAO,EAAE,YAAY,CAAC;QAC5C,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,YAAY,CAAC;KACpC,CAAC;IAEF,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC3B,OAAO,EAAE,SAAS,EAAE,SAAS,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;QACvD,CAAC;QAAC,MAAM,CAAC;YACP,YAAY;QACd,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,OAAe,EAAE,SAAiB,EAAE,QAAQ,GAAG,CAAC;IAChF,MAAM,UAAU,GAAa,EAAE,CAAC;IAChC,MAAM,aAAa,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAExD,KAAK,UAAU,IAAI,CAAC,GAAW,EAAE,KAAa;QAC5C,IAAI,KAAK,GAAG,QAAQ;YAAE,OAAO;QAC7B,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAE/D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,SAAS;YACzC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YAC5C,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;YAEtD,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,MAAM,IAAI,CAAC,QAAQ,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;YAClC,CAAC;iBAAM,IAAI,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;gBAC1B,IAAI,YAAY,KAAK,aAAa;oBAAE,SAAS;gBAC7C,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;gBACnD,IAAI,kBAAkB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBAChC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;gBAChC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC;IACvB,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,SAAS,eAAe,CAAC,OAAe;IACtC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACnC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;IAClE,MAAM,IAAI,GAAgB,IAAI,GAAG,EAAE,CAAC;IAEpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YAC9B,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;gBAC3B,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;oBAAE,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACzD,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,IAAI,CAAC,CAAC;AACnB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,UAAmB;IAChD,MAAM,EAAE,CAAC;IAET,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,UAAU,IAAI,GAAG,CAAC,CAAC;IAEpE,uBAAuB;IACvB,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,KAAK,CAAC,mBAAmB,UAAU,IAAI,GAAG,EAAE,CAAC,CAAC;QAC9C,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO;IACT,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACzC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IAEpC,IAAI,QAAQ,EAAE,CAAC;QACb,IAAI,CAAC,oBAAoB,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC3D,CAAC;SAAM,CAAC;QACN,IAAI,CAAC,eAAe,IAAI,CAAC,QAAQ,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;IACrD,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,aAAa,CAAC,YAAY,CAAC,CAAC;IAEhD,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,KAAK,CAAC,qBAAqB,CAAC,CAAC;QAC7B,GAAG,CAAC,YAAY,CAAC,CAAC;QAClB,GAAG,CAAC,KAAK,YAAY,aAAa,CAAC,CAAC;QACpC,GAAG,CAAC,KAAK,YAAY,mBAAmB,CAAC,CAAC;QAC1C,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,OAAO;IACT,CAAC;IAED,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,GAAG,KAAK,CAAC;IACrC,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,SAAS,CAAC,CAAC;IAC7D,OAAO,CAAC,SAAS,YAAY,EAAE,CAAC,CAAC;IAEjC,wCAAwC;IACxC,MAAM,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,MAAM,kBAAkB,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAChF,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,GAAG,UAAU,CAAC,MAAM,kBAAkB,UAAU,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;QAC1F,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;YAC3B,GAAG,CAAC,CAAC,CAAC,CAAC;QACT,CAAC;IACH,CAAC;IAED,oBAAoB;IACpB,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;IACtD,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAC;IAE1C,wBAAwB;IACxB,MAAM,QAAQ,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAC1C,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,EAAE,CAAC;QACd,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,IAAI,UAAU,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC7B,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACpB,CAAC;QACH,CAAC;QAED,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,mBAAmB,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;QACrF,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,IAAI,CAAC,GAAG,OAAO,CAAC,MAAM,mBAAmB,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;YACpF,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;gBACxB,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAC1C,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,IAAI,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC;QACzC,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;YAC9B,KAAK,CAAC,CAAC,CAAC,CAAC;QACX,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC;QAC9C,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YAChC,IAAI,CAAC,CAAC,CAAC,CAAC;QACV,CAAC;QACD,OAAO,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC;IACtC,OAAO,CAAC,GAAG,EAAE,CAAC;IACd,GAAG,CAAC,gBAAgB,YAAY,EAAE,CAAC,CAAC;IACpC,GAAG,CAAC,gBAAgB,YAAY,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,IAAI,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IAC/E,GAAG,CAAC,gBAAgB,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;IAExC,IAAI,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC;QAAE,GAAG,CAAC,gBAAgB,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;IACtF,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC;QAAE,GAAG,CAAC,gBAAgB,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IAC1E,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC;QAAE,GAAG,CAAC,gBAAgB,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;IAEhF,GAAG,CAAC,gBAAgB,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAE3E,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;QACjB,OAAO,CAAC,mBAAmB,CAAC,CAAC;IAC/B,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,mBAAmB,CAAC,CAAC;IAC7B,CAAC;IACD,OAAO,CAAC,GAAG,EAAE,CAAC;AAChB,CAAC"}
|
package/dist/format.d.ts
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export declare const VERSION = "0.1.0";
|
|
2
|
+
export declare function header(): void;
|
|
3
|
+
export declare function success(msg: string): void;
|
|
4
|
+
export declare function error(msg: string): void;
|
|
5
|
+
export declare function warn(msg: string): void;
|
|
6
|
+
export declare function info(msg: string): void;
|
|
7
|
+
export declare function dim(msg: string): void;
|
|
8
|
+
export declare function table(rows: string[][]): void;
|
package/dist/format.js
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
export const VERSION = "0.1.0";
|
|
3
|
+
export function header() {
|
|
4
|
+
console.log(chalk.bold.cyan(`\nevalify`) + chalk.dim(` v${VERSION}\n`));
|
|
5
|
+
}
|
|
6
|
+
export function success(msg) {
|
|
7
|
+
console.log(chalk.green(` ✓ ${msg}`));
|
|
8
|
+
}
|
|
9
|
+
export function error(msg) {
|
|
10
|
+
console.log(chalk.red(` ✗ ${msg}`));
|
|
11
|
+
}
|
|
12
|
+
export function warn(msg) {
|
|
13
|
+
console.log(chalk.yellow(` ⚠ ${msg}`));
|
|
14
|
+
}
|
|
15
|
+
export function info(msg) {
|
|
16
|
+
console.log(chalk.cyan(` → ${msg}`));
|
|
17
|
+
}
|
|
18
|
+
export function dim(msg) {
|
|
19
|
+
console.log(chalk.dim(` ${msg}`));
|
|
20
|
+
}
|
|
21
|
+
export function table(rows) {
|
|
22
|
+
if (rows.length === 0)
|
|
23
|
+
return;
|
|
24
|
+
const colWidths = rows[0].map((_, colIndex) => Math.max(...rows.map((row) => (row[colIndex] ?? "").length)));
|
|
25
|
+
for (const row of rows) {
|
|
26
|
+
const line = row
|
|
27
|
+
.map((cell, i) => cell.padEnd(colWidths[i]))
|
|
28
|
+
.join(" ");
|
|
29
|
+
console.log(` ${line}`);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
//# sourceMappingURL=format.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"format.js","sourceRoot":"","sources":["../src/format.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAE1B,MAAM,CAAC,MAAM,OAAO,GAAG,OAAO,CAAC;AAE/B,MAAM,UAAU,MAAM;IACpB,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,KAAK,OAAO,IAAI,CAAC,CAAC,CAAC;AAC1E,CAAC;AAED,MAAM,UAAU,OAAO,CAAC,GAAW;IACjC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC,CAAC;AACzC,CAAC;AAED,MAAM,UAAU,KAAK,CAAC,GAAW;IAC/B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,UAAU,IAAI,CAAC,GAAW;IAC9B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,MAAM,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC,CAAC;AAC1C,CAAC;AAED,MAAM,UAAU,IAAI,CAAC,GAAW;IAC9B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC,CAAC;AACxC,CAAC;AAED,MAAM,UAAU,GAAG,CAAC,GAAW;IAC7B,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,UAAU,KAAK,CAAC,IAAgB;IACpC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO;IAE9B,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,EAAE,CAC5C,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAC7D,CAAC;IAEF,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,GAAG;aACb,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;aAC3C,IAAI,CAAC,IAAI,CAAC,CAAC;QACd,OAAO,CAAC,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC;IAC3B,CAAC;AACH,CAAC"}
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import { VERSION } from "./format.js";
|
|
4
|
+
import { pull } from "./commands/pull.js";
|
|
5
|
+
import { publish } from "./commands/publish.js";
|
|
6
|
+
import { search } from "./commands/search.js";
|
|
7
|
+
import { validate } from "./commands/validate.js";
|
|
8
|
+
const program = new Command();
|
|
9
|
+
program
|
|
10
|
+
.name("evalify")
|
|
11
|
+
.description("CLI tool for the Evalify eval criteria registry")
|
|
12
|
+
.version(VERSION);
|
|
13
|
+
program
|
|
14
|
+
.command("pull <slug>")
|
|
15
|
+
.description("Download eval criteria from the registry")
|
|
16
|
+
.action(async (slug) => {
|
|
17
|
+
await pull(slug);
|
|
18
|
+
});
|
|
19
|
+
program
|
|
20
|
+
.command("publish [path]")
|
|
21
|
+
.description("Publish eval criteria from a file or skill folder")
|
|
22
|
+
.action(async (targetPath) => {
|
|
23
|
+
await publish(targetPath);
|
|
24
|
+
});
|
|
25
|
+
program
|
|
26
|
+
.command("search <query>")
|
|
27
|
+
.description("Search the registry for eval criteria")
|
|
28
|
+
.action(async (query) => {
|
|
29
|
+
await search(query);
|
|
30
|
+
});
|
|
31
|
+
program
|
|
32
|
+
.command("validate [path]")
|
|
33
|
+
.description("Validate evals.json from a file or skill folder")
|
|
34
|
+
.action(async (targetPath) => {
|
|
35
|
+
await validate(targetPath);
|
|
36
|
+
});
|
|
37
|
+
program.parse();
|
|
38
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAEA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AACpC,OAAO,EAAE,OAAO,EAAE,MAAM,aAAa,CAAC;AACtC,OAAO,EAAE,IAAI,EAAE,MAAM,oBAAoB,CAAC;AAC1C,OAAO,EAAE,OAAO,EAAE,MAAM,uBAAuB,CAAC;AAChD,OAAO,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAC;AAC9C,OAAO,EAAE,QAAQ,EAAE,MAAM,wBAAwB,CAAC;AAElD,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,IAAI,CAAC,SAAS,CAAC;KACf,WAAW,CAAC,iDAAiD,CAAC;KAC9D,OAAO,CAAC,OAAO,CAAC,CAAC;AAEpB,OAAO;KACJ,OAAO,CAAC,aAAa,CAAC;KACtB,WAAW,CAAC,0CAA0C,CAAC;KACvD,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE;IAC7B,MAAM,IAAI,CAAC,IAAI,CAAC,CAAC;AACnB,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,gBAAgB,CAAC;KACzB,WAAW,CAAC,mDAAmD,CAAC;KAChE,MAAM,CAAC,KAAK,EAAE,UAAmB,EAAE,EAAE;IACpC,MAAM,OAAO,CAAC,UAAU,CAAC,CAAC;AAC5B,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,gBAAgB,CAAC;KACzB,WAAW,CAAC,uCAAuC,CAAC;KACpD,MAAM,CAAC,KAAK,EAAE,KAAa,EAAE,EAAE;IAC9B,MAAM,MAAM,CAAC,KAAK,CAAC,CAAC;AACtB,CAAC,CAAC,CAAC;AAEL,OAAO;KACJ,OAAO,CAAC,iBAAiB,CAAC;KAC1B,WAAW,CAAC,iDAAiD,CAAC;KAC9D,MAAM,CAAC,KAAK,EAAE,UAAmB,EAAE,EAAE;IACpC,MAAM,QAAQ,CAAC,UAAU,CAAC,CAAC;AAC7B,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,EAAE,CAAC"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { detectFramework } from "@evalify/frameworks";
|
|
2
|
+
export function validateEvalsJson(content) {
|
|
3
|
+
const result = {
|
|
4
|
+
valid: true,
|
|
5
|
+
format: "unknown",
|
|
6
|
+
errors: [],
|
|
7
|
+
warnings: [],
|
|
8
|
+
evalCount: 0,
|
|
9
|
+
summary: {},
|
|
10
|
+
};
|
|
11
|
+
let parsed;
|
|
12
|
+
try {
|
|
13
|
+
parsed = JSON.parse(content);
|
|
14
|
+
}
|
|
15
|
+
catch {
|
|
16
|
+
result.valid = false;
|
|
17
|
+
result.errors.push("Invalid JSON — could not parse file");
|
|
18
|
+
return result;
|
|
19
|
+
}
|
|
20
|
+
// Accept both top-level array and { skill_name, evals: [...] } wrapper
|
|
21
|
+
let evals;
|
|
22
|
+
let hasSkillName = false;
|
|
23
|
+
if (Array.isArray(parsed)) {
|
|
24
|
+
evals = parsed;
|
|
25
|
+
}
|
|
26
|
+
else if (typeof parsed === "object" &&
|
|
27
|
+
parsed !== null &&
|
|
28
|
+
"evals" in parsed &&
|
|
29
|
+
Array.isArray(parsed.evals)) {
|
|
30
|
+
evals = parsed.evals;
|
|
31
|
+
const meta = parsed;
|
|
32
|
+
hasSkillName = typeof meta.skill_name === "string";
|
|
33
|
+
if (meta.skill_name)
|
|
34
|
+
result.summary["skill_name"] = meta.skill_name;
|
|
35
|
+
if (meta.name)
|
|
36
|
+
result.summary["name"] = meta.name;
|
|
37
|
+
if (meta.version)
|
|
38
|
+
result.summary["version"] = meta.version;
|
|
39
|
+
if (meta.description)
|
|
40
|
+
result.summary["description"] = meta.description;
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
result.valid = false;
|
|
44
|
+
result.errors.push("Expected a JSON array or an object with an \"evals\" array");
|
|
45
|
+
return result;
|
|
46
|
+
}
|
|
47
|
+
result.evalCount = evals.length;
|
|
48
|
+
if (evals.length === 0) {
|
|
49
|
+
result.warnings.push("File contains zero eval entries");
|
|
50
|
+
return result;
|
|
51
|
+
}
|
|
52
|
+
// Detect format using framework registry
|
|
53
|
+
const framework = detectFramework(parsed);
|
|
54
|
+
if (framework) {
|
|
55
|
+
result.format = framework.meta.id;
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
result.format = "unknown";
|
|
59
|
+
result.warnings.push("Could not detect format — expected {prompt, expectations} or Anthropic Skill Creator format");
|
|
60
|
+
}
|
|
61
|
+
// Validate each entry
|
|
62
|
+
for (let i = 0; i < evals.length; i++) {
|
|
63
|
+
const entry = evals[i];
|
|
64
|
+
if (typeof entry !== "object" || entry === null) {
|
|
65
|
+
result.errors.push(`Entry ${i}: not an object`);
|
|
66
|
+
result.valid = false;
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
const e = entry;
|
|
70
|
+
if (!("prompt" in e) || typeof e.prompt !== "string") {
|
|
71
|
+
result.errors.push(`Entry ${i}: missing or invalid "prompt" (expected string)`);
|
|
72
|
+
result.valid = false;
|
|
73
|
+
}
|
|
74
|
+
if (!("expectations" in e)) {
|
|
75
|
+
if (result.format === "anthropic/skillcreator/v2" && "expected_output" in e) {
|
|
76
|
+
// Skill Creator two-phase workflow: expected_output first, expectations added later
|
|
77
|
+
result.warnings.push(`Entry ${i}: has "expected_output" but no "expectations" — assertions may not have been added yet`);
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
result.errors.push(`Entry ${i}: missing "expectations"`);
|
|
81
|
+
result.valid = false;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
else if (!Array.isArray(e.expectations) &&
|
|
85
|
+
typeof e.expectations !== "string") {
|
|
86
|
+
result.errors.push(`Entry ${i}: "expectations" should be a string or array`);
|
|
87
|
+
result.valid = false;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return result;
|
|
91
|
+
}
|
|
92
|
+
//# sourceMappingURL=validator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../src/validator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAWtD,MAAM,UAAU,iBAAiB,CAAC,OAAe;IAC/C,MAAM,MAAM,GAAqB;QAC/B,KAAK,EAAE,IAAI;QACX,MAAM,EAAE,SAAS;QACjB,MAAM,EAAE,EAAE;QACV,QAAQ,EAAE,EAAE;QACZ,SAAS,EAAE,CAAC;QACZ,OAAO,EAAE,EAAE;KACZ,CAAC;IAEF,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;QACrB,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,qCAAqC,CAAC,CAAC;QAC1D,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,uEAAuE;IACvE,IAAI,KAAgB,CAAC;IACrB,IAAI,YAAY,GAAG,KAAK,CAAC;IAEzB,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC1B,KAAK,GAAG,MAAM,CAAC;IACjB,CAAC;SAAM,IACL,OAAO,MAAM,KAAK,QAAQ;QAC1B,MAAM,KAAK,IAAI;QACf,OAAO,IAAI,MAAM;QACjB,KAAK,CAAC,OAAO,CAAE,MAAkC,CAAC,KAAK,CAAC,EACxD,CAAC;QACD,KAAK,GAAI,MAAkC,CAAC,KAAkB,CAAC;QAC/D,MAAM,IAAI,GAAG,MAAiC,CAAC;QAC/C,YAAY,GAAG,OAAO,IAAI,CAAC,UAAU,KAAK,QAAQ,CAAC;QACnD,IAAI,IAAI,CAAC,UAAU;YAAE,MAAM,CAAC,OAAO,CAAC,YAAY,CAAC,GAAG,IAAI,CAAC,UAAU,CAAC;QACpE,IAAI,IAAI,CAAC,IAAI;YAAE,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC;QAClD,IAAI,IAAI,CAAC,OAAO;YAAE,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC;QAC3D,IAAI,IAAI,CAAC,WAAW;YAAE,MAAM,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC;IACzE,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;QACrB,MAAM,CAAC,MAAM,CAAC,IAAI,CAChB,4DAA4D,CAC7D,CAAC;QACF,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,MAAM,CAAC,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC;IAEhC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACxD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,yCAAyC;IACzC,MAAM,SAAS,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;IAC1C,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,CAAC,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC;IACpC,CAAC;SAAM,CAAC;QACN,MAAM,CAAC,MAAM,GAAG,SAAS,CAAC;QAC1B,MAAM,CAAC,QAAQ,CAAC,IAAI,CAClB,6FAA6F,CAC9F,CAAC;IACJ,CAAC;IAED,sBAAsB;IACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACvB,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YAChD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;YAChD,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;YACrB,SAAS;QACX,CAAC;QAED,MAAM,CAAC,GAAG,KAAgC,CAAC;QAE3C,IAAI,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,IAAI,OAAO,CAAC,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;YACrD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,iDAAiD,CAAC,CAAC;YAChF,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;QACvB,CAAC;QAED,IAAI,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC,EAAE,CAAC;YAC3B,IAAI,MAAM,CAAC,MAAM,KAAK,2BAA2B,IAAI,iBAAiB,IAAI,CAAC,EAAE,CAAC;gBAC5E,oFAAoF;gBACpF,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,wFAAwF,CAAC,CAAC;YAC3H,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,0BAA0B,CAAC,CAAC;gBACzD,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;YACvB,CAAC;QACH,CAAC;aAAM,IACL,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC;YAC9B,OAAO,CAAC,CAAC,YAAY,KAAK,QAAQ,EAClC,CAAC;YACD,MAAM,CAAC,MAAM,CAAC,IAAI,CAChB,SAAS,CAAC,8CAA8C,CACzD,CAAC;YACF,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;QACvB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "evalify-cli",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "CLI tool for the Evalify eval criteria registry",
|
|
5
|
+
"homepage": "https://evalify.sh",
|
|
6
|
+
"repository": "https://github.com/AppVerse-cc/evalify",
|
|
7
|
+
"bin": {
|
|
8
|
+
"evalify": "./dist/index.js"
|
|
9
|
+
},
|
|
10
|
+
"scripts": {
|
|
11
|
+
"build": "tsc",
|
|
12
|
+
"dev": "tsc --watch",
|
|
13
|
+
"start": "node dist/index.js"
|
|
14
|
+
},
|
|
15
|
+
"dependencies": {
|
|
16
|
+
"@evalify/frameworks": "workspace:*",
|
|
17
|
+
"commander": "^12.1.0",
|
|
18
|
+
"chalk": "^5.4.1"
|
|
19
|
+
},
|
|
20
|
+
"devDependencies": {
|
|
21
|
+
"@types/node": "^22.0.0",
|
|
22
|
+
"typescript": "^5.7.0"
|
|
23
|
+
},
|
|
24
|
+
"type": "module"
|
|
25
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import { getFramework } from "@evalify/frameworks";
|
|
5
|
+
import { header, success, info, dim, error, warn } from "../format.js";
|
|
6
|
+
import { validateEvalsJson } from "../validator.js";
|
|
7
|
+
|
|
8
|
+
async function findEvalsFile(targetPath: string): Promise<string | null> {
|
|
9
|
+
const stat = await fs.stat(targetPath);
|
|
10
|
+
|
|
11
|
+
if (stat.isFile()) return targetPath;
|
|
12
|
+
|
|
13
|
+
const candidates = [
|
|
14
|
+
path.join(targetPath, "evals", "evals.json"),
|
|
15
|
+
path.join(targetPath, "evals.json"),
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
for (const candidate of candidates) {
|
|
19
|
+
try {
|
|
20
|
+
await fs.access(candidate);
|
|
21
|
+
return candidate;
|
|
22
|
+
} catch {
|
|
23
|
+
// not found, continue
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export async function publish(targetPath?: string): Promise<void> {
|
|
31
|
+
header();
|
|
32
|
+
|
|
33
|
+
const resolvedPath = path.resolve(process.cwd(), targetPath || ".");
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
await fs.access(resolvedPath);
|
|
37
|
+
} catch {
|
|
38
|
+
error(`Path not found: ${targetPath || "."}`);
|
|
39
|
+
console.log();
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
info("Publishing eval criteria to registry...");
|
|
44
|
+
console.log();
|
|
45
|
+
|
|
46
|
+
const filePath = await findEvalsFile(resolvedPath);
|
|
47
|
+
|
|
48
|
+
if (!filePath) {
|
|
49
|
+
error("No evals.json found");
|
|
50
|
+
dim("Looked in:");
|
|
51
|
+
dim(` ${resolvedPath}/evals.json`);
|
|
52
|
+
dim(` ${resolvedPath}/evals/evals.json`);
|
|
53
|
+
console.log();
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
success(`Found ${path.relative(process.cwd(), filePath)}`);
|
|
58
|
+
|
|
59
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
60
|
+
const result = validateEvalsJson(content);
|
|
61
|
+
|
|
62
|
+
if (!result.valid) {
|
|
63
|
+
console.log();
|
|
64
|
+
error("Validation failed — cannot publish");
|
|
65
|
+
for (const e of result.errors) {
|
|
66
|
+
error(e);
|
|
67
|
+
}
|
|
68
|
+
console.log();
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
for (const w of result.warnings) {
|
|
73
|
+
warn(w);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
console.log();
|
|
77
|
+
console.log(chalk.bold(" Publish summary:"));
|
|
78
|
+
console.log();
|
|
79
|
+
|
|
80
|
+
if (result.summary["skill_name"]) {
|
|
81
|
+
dim(`Skill: ${result.summary["skill_name"]}`);
|
|
82
|
+
}
|
|
83
|
+
if (result.summary["name"]) {
|
|
84
|
+
dim(`Name: ${result.summary["name"]}`);
|
|
85
|
+
}
|
|
86
|
+
if (result.summary["version"]) {
|
|
87
|
+
dim(`Version: ${result.summary["version"]}`);
|
|
88
|
+
}
|
|
89
|
+
if (result.summary["description"]) {
|
|
90
|
+
dim(`Description: ${result.summary["description"]}`);
|
|
91
|
+
}
|
|
92
|
+
dim(`Format: ${getFramework(result.format)?.meta.name ?? result.format}`);
|
|
93
|
+
dim(`Eval count: ${result.evalCount}`);
|
|
94
|
+
dim(`File: ${path.relative(process.cwd(), filePath)}`);
|
|
95
|
+
|
|
96
|
+
console.log();
|
|
97
|
+
warn("Dry run — publishing is not yet connected to the registry");
|
|
98
|
+
success("File is valid and ready to publish");
|
|
99
|
+
console.log();
|
|
100
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import { header, success, info, dim, error } from "../format.js";
|
|
4
|
+
|
|
5
|
+
const REGISTRY_URL = "https://evalify.sh/api/registry";
|
|
6
|
+
|
|
7
|
+
export async function pull(slug: string): Promise<void> {
|
|
8
|
+
header();
|
|
9
|
+
|
|
10
|
+
if (!slug) {
|
|
11
|
+
error("Missing slug argument");
|
|
12
|
+
console.log();
|
|
13
|
+
return;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
info(`Pulling eval criteria: ${slug}`);
|
|
17
|
+
console.log();
|
|
18
|
+
|
|
19
|
+
let pack: {
|
|
20
|
+
slug: string;
|
|
21
|
+
displayName: string;
|
|
22
|
+
version: string;
|
|
23
|
+
description: string;
|
|
24
|
+
domain: string;
|
|
25
|
+
author: string;
|
|
26
|
+
tags: string[];
|
|
27
|
+
evals: { prompt: string; expectations: string[] }[];
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
try {
|
|
31
|
+
const res = await fetch(`${REGISTRY_URL}/${slug}`);
|
|
32
|
+
if (res.status === 404) {
|
|
33
|
+
error(`Criteria not found: ${slug}`);
|
|
34
|
+
dim(`Check the registry at https://evalify.sh`);
|
|
35
|
+
console.log();
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
if (!res.ok) {
|
|
39
|
+
error(`Registry returned ${res.status}`);
|
|
40
|
+
console.log();
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
pack = await res.json();
|
|
44
|
+
} catch (err) {
|
|
45
|
+
error(`Failed to reach registry: ${(err as Error).message}`);
|
|
46
|
+
console.log();
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const targetDir = path.resolve(process.cwd(), "evals", slug);
|
|
51
|
+
const targetFile = path.join(targetDir, "evals.json");
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
await fs.mkdir(targetDir, { recursive: true });
|
|
55
|
+
|
|
56
|
+
const output = {
|
|
57
|
+
slug: pack.slug,
|
|
58
|
+
displayName: pack.displayName,
|
|
59
|
+
version: pack.version,
|
|
60
|
+
description: pack.description,
|
|
61
|
+
domain: pack.domain,
|
|
62
|
+
author: pack.author,
|
|
63
|
+
tags: pack.tags,
|
|
64
|
+
evals: pack.evals,
|
|
65
|
+
};
|
|
66
|
+
|
|
67
|
+
await fs.writeFile(targetFile, JSON.stringify(output, null, 2) + "\n");
|
|
68
|
+
|
|
69
|
+
success(`Pulled ${pack.displayName} v${pack.version}`);
|
|
70
|
+
success(`Wrote ${pack.evals.length} eval${pack.evals.length !== 1 ? "s" : ""} to evals/${slug}/evals.json`);
|
|
71
|
+
console.log();
|
|
72
|
+
dim(`Author: ${pack.author}`);
|
|
73
|
+
dim(`Domain: ${pack.domain}`);
|
|
74
|
+
dim(`Location: ${targetFile}`);
|
|
75
|
+
dim(`To validate: evalify validate evals/${slug}`);
|
|
76
|
+
} catch (err) {
|
|
77
|
+
error(`Failed to write file: ${(err as Error).message}`);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
console.log();
|
|
81
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
import { header, info, dim, table } from "../format.js";
|
|
3
|
+
|
|
4
|
+
export async function search(query: string): Promise<void> {
|
|
5
|
+
header();
|
|
6
|
+
info(`Searching registry for: ${chalk.bold(query)}`);
|
|
7
|
+
console.log();
|
|
8
|
+
|
|
9
|
+
// Placeholder results for now
|
|
10
|
+
const results = [
|
|
11
|
+
[chalk.bold("Slug"), chalk.bold("Description"), chalk.bold("Evals")],
|
|
12
|
+
[chalk.dim("─".repeat(20)), chalk.dim("─".repeat(35)), chalk.dim("─".repeat(5))],
|
|
13
|
+
["code-review", "Code review quality criteria", "12"],
|
|
14
|
+
["summarization", "Text summarization accuracy", "8"],
|
|
15
|
+
["safety-checks", "Safety and content policy evals", "24"],
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
table(results);
|
|
19
|
+
console.log();
|
|
20
|
+
dim(`Showing placeholder results — registry search not yet connected`);
|
|
21
|
+
dim(`Use: evalify pull <slug> to download criteria`);
|
|
22
|
+
console.log();
|
|
23
|
+
}
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
import { getFramework } from "@evalify/frameworks";
|
|
5
|
+
import { header, success, info, dim, error, warn } from "../format.js";
|
|
6
|
+
import { validateEvalsJson } from "../validator.js";
|
|
7
|
+
|
|
8
|
+
const ALLOWED_EXTENSIONS = new Set([
|
|
9
|
+
".json", ".md", ".txt", ".csv", ".yaml", ".yml",
|
|
10
|
+
".ts", ".tsx", ".js", ".jsx", ".py", ".sql",
|
|
11
|
+
".html", ".css", ".xml", ".toml",
|
|
12
|
+
]);
|
|
13
|
+
|
|
14
|
+
async function findEvalsJson(targetPath: string): Promise<{ evalsPath: string; rootDir: string } | null> {
|
|
15
|
+
const stat = await fs.stat(targetPath);
|
|
16
|
+
|
|
17
|
+
if (stat.isFile()) {
|
|
18
|
+
return { evalsPath: targetPath, rootDir: path.dirname(targetPath) };
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// It's a directory — scan for evals.json
|
|
22
|
+
const candidates = [
|
|
23
|
+
path.join(targetPath, "evals", "evals.json"),
|
|
24
|
+
path.join(targetPath, "evals.json"),
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
for (const candidate of candidates) {
|
|
28
|
+
try {
|
|
29
|
+
await fs.access(candidate);
|
|
30
|
+
return { evalsPath: candidate, rootDir: targetPath };
|
|
31
|
+
} catch {
|
|
32
|
+
// not found
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
async function scanCompanionFiles(rootDir: string, evalsPath: string, maxDepth = 2): Promise<string[]> {
|
|
40
|
+
const companions: string[] = [];
|
|
41
|
+
const evalsRelative = path.relative(rootDir, evalsPath);
|
|
42
|
+
|
|
43
|
+
async function walk(dir: string, depth: number) {
|
|
44
|
+
if (depth > maxDepth) return;
|
|
45
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
46
|
+
|
|
47
|
+
for (const entry of entries) {
|
|
48
|
+
if (entry.name.startsWith(".")) continue;
|
|
49
|
+
const fullPath = path.join(dir, entry.name);
|
|
50
|
+
const relativePath = path.relative(rootDir, fullPath);
|
|
51
|
+
|
|
52
|
+
if (entry.isDirectory()) {
|
|
53
|
+
await walk(fullPath, depth + 1);
|
|
54
|
+
} else if (entry.isFile()) {
|
|
55
|
+
if (relativePath === evalsRelative) continue;
|
|
56
|
+
const ext = path.extname(entry.name).toLowerCase();
|
|
57
|
+
if (ALLOWED_EXTENSIONS.has(ext)) {
|
|
58
|
+
companions.push(relativePath);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
await walk(rootDir, 0);
|
|
65
|
+
return companions;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function extractFileRefs(content: string): string[] {
|
|
69
|
+
const parsed = JSON.parse(content);
|
|
70
|
+
const evals = Array.isArray(parsed) ? parsed : parsed.evals || [];
|
|
71
|
+
const refs: Set<string> = new Set();
|
|
72
|
+
|
|
73
|
+
for (const item of evals) {
|
|
74
|
+
if (Array.isArray(item.files)) {
|
|
75
|
+
for (const f of item.files) {
|
|
76
|
+
if (typeof f === "string" && f.length > 0) refs.add(f);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return [...refs];
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export async function validate(targetPath?: string): Promise<void> {
|
|
85
|
+
header();
|
|
86
|
+
|
|
87
|
+
const resolvedPath = path.resolve(process.cwd(), targetPath || ".");
|
|
88
|
+
|
|
89
|
+
// Check if path exists
|
|
90
|
+
try {
|
|
91
|
+
await fs.access(resolvedPath);
|
|
92
|
+
} catch {
|
|
93
|
+
error(`Path not found: ${targetPath || "."}`);
|
|
94
|
+
console.log();
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
const stat = await fs.stat(resolvedPath);
|
|
99
|
+
const isFolder = stat.isDirectory();
|
|
100
|
+
|
|
101
|
+
if (isFolder) {
|
|
102
|
+
info(`Scanning folder: ${path.basename(resolvedPath)}/`);
|
|
103
|
+
} else {
|
|
104
|
+
info(`Validating: ${path.basename(resolvedPath)}`);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const found = await findEvalsJson(resolvedPath);
|
|
108
|
+
|
|
109
|
+
if (!found) {
|
|
110
|
+
console.log();
|
|
111
|
+
error("No evals.json found");
|
|
112
|
+
dim("Looked in:");
|
|
113
|
+
dim(` ${resolvedPath}/evals.json`);
|
|
114
|
+
dim(` ${resolvedPath}/evals/evals.json`);
|
|
115
|
+
console.log();
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const { evalsPath, rootDir } = found;
|
|
120
|
+
const relEvalsPath = path.relative(process.cwd(), evalsPath);
|
|
121
|
+
success(`Found ${relEvalsPath}`);
|
|
122
|
+
|
|
123
|
+
// Scan companion files if it's a folder
|
|
124
|
+
const companions = isFolder ? await scanCompanionFiles(rootDir, evalsPath) : [];
|
|
125
|
+
if (companions.length > 0) {
|
|
126
|
+
success(`${companions.length} companion file${companions.length !== 1 ? "s" : ""} found`);
|
|
127
|
+
for (const c of companions) {
|
|
128
|
+
dim(c);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Read and validate
|
|
133
|
+
const content = await fs.readFile(evalsPath, "utf-8");
|
|
134
|
+
const result = validateEvalsJson(content);
|
|
135
|
+
|
|
136
|
+
// Check file references
|
|
137
|
+
const fileRefs = extractFileRefs(content);
|
|
138
|
+
if (fileRefs.length > 0) {
|
|
139
|
+
console.log();
|
|
140
|
+
const missing: string[] = [];
|
|
141
|
+
const found: string[] = [];
|
|
142
|
+
|
|
143
|
+
for (const ref of fileRefs) {
|
|
144
|
+
if (companions.includes(ref)) {
|
|
145
|
+
found.push(ref);
|
|
146
|
+
} else {
|
|
147
|
+
missing.push(ref);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (found.length > 0) {
|
|
152
|
+
success(`${found.length} referenced file${found.length !== 1 ? "s" : ""} present`);
|
|
153
|
+
}
|
|
154
|
+
if (missing.length > 0) {
|
|
155
|
+
warn(`${missing.length} referenced file${missing.length !== 1 ? "s" : ""} missing`);
|
|
156
|
+
for (const m of missing) {
|
|
157
|
+
dim(`${chalk.yellow("missing:")} ${m}`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
console.log();
|
|
163
|
+
|
|
164
|
+
if (result.errors.length > 0) {
|
|
165
|
+
console.log(chalk.bold.red(" Errors:"));
|
|
166
|
+
for (const e of result.errors) {
|
|
167
|
+
error(e);
|
|
168
|
+
}
|
|
169
|
+
console.log();
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (result.warnings.length > 0) {
|
|
173
|
+
console.log(chalk.bold.yellow(" Warnings:"));
|
|
174
|
+
for (const w of result.warnings) {
|
|
175
|
+
warn(w);
|
|
176
|
+
}
|
|
177
|
+
console.log();
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
console.log(chalk.bold(" Summary:"));
|
|
181
|
+
console.log();
|
|
182
|
+
dim(`File: ${relEvalsPath}`);
|
|
183
|
+
dim(`Format: ${getFramework(result.format)?.meta.name ?? result.format}`);
|
|
184
|
+
dim(`Eval count: ${result.evalCount}`);
|
|
185
|
+
|
|
186
|
+
if (result.summary["skill_name"]) dim(`Skill: ${result.summary["skill_name"]}`);
|
|
187
|
+
if (result.summary["name"]) dim(`Name: ${result.summary["name"]}`);
|
|
188
|
+
if (result.summary["version"]) dim(`Version: ${result.summary["version"]}`);
|
|
189
|
+
|
|
190
|
+
dim(`Valid: ${result.valid ? chalk.green("yes") : chalk.red("no")}`);
|
|
191
|
+
|
|
192
|
+
console.log();
|
|
193
|
+
|
|
194
|
+
if (result.valid) {
|
|
195
|
+
success("Validation passed");
|
|
196
|
+
} else {
|
|
197
|
+
error("Validation failed");
|
|
198
|
+
}
|
|
199
|
+
console.log();
|
|
200
|
+
}
|
package/src/format.ts
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
|
|
3
|
+
export const VERSION = "0.1.0";
|
|
4
|
+
|
|
5
|
+
export function header(): void {
|
|
6
|
+
console.log(chalk.bold.cyan(`\nevalify`) + chalk.dim(` v${VERSION}\n`));
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function success(msg: string): void {
|
|
10
|
+
console.log(chalk.green(` ✓ ${msg}`));
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function error(msg: string): void {
|
|
14
|
+
console.log(chalk.red(` ✗ ${msg}`));
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function warn(msg: string): void {
|
|
18
|
+
console.log(chalk.yellow(` ⚠ ${msg}`));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function info(msg: string): void {
|
|
22
|
+
console.log(chalk.cyan(` → ${msg}`));
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function dim(msg: string): void {
|
|
26
|
+
console.log(chalk.dim(` ${msg}`));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function table(rows: string[][]): void {
|
|
30
|
+
if (rows.length === 0) return;
|
|
31
|
+
|
|
32
|
+
const colWidths = rows[0].map((_, colIndex) =>
|
|
33
|
+
Math.max(...rows.map((row) => (row[colIndex] ?? "").length))
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
for (const row of rows) {
|
|
37
|
+
const line = row
|
|
38
|
+
.map((cell, i) => cell.padEnd(colWidths[i]))
|
|
39
|
+
.join(" ");
|
|
40
|
+
console.log(` ${line}`);
|
|
41
|
+
}
|
|
42
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { Command } from "commander";
|
|
4
|
+
import { VERSION } from "./format.js";
|
|
5
|
+
import { pull } from "./commands/pull.js";
|
|
6
|
+
import { publish } from "./commands/publish.js";
|
|
7
|
+
import { search } from "./commands/search.js";
|
|
8
|
+
import { validate } from "./commands/validate.js";
|
|
9
|
+
|
|
10
|
+
const program = new Command();
|
|
11
|
+
|
|
12
|
+
program
|
|
13
|
+
.name("evalify")
|
|
14
|
+
.description("CLI tool for the Evalify eval criteria registry")
|
|
15
|
+
.version(VERSION);
|
|
16
|
+
|
|
17
|
+
program
|
|
18
|
+
.command("pull <slug>")
|
|
19
|
+
.description("Download eval criteria from the registry")
|
|
20
|
+
.action(async (slug: string) => {
|
|
21
|
+
await pull(slug);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
program
|
|
25
|
+
.command("publish [path]")
|
|
26
|
+
.description("Publish eval criteria from a file or skill folder")
|
|
27
|
+
.action(async (targetPath?: string) => {
|
|
28
|
+
await publish(targetPath);
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
program
|
|
32
|
+
.command("search <query>")
|
|
33
|
+
.description("Search the registry for eval criteria")
|
|
34
|
+
.action(async (query: string) => {
|
|
35
|
+
await search(query);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
program
|
|
39
|
+
.command("validate [path]")
|
|
40
|
+
.description("Validate evals.json from a file or skill folder")
|
|
41
|
+
.action(async (targetPath?: string) => {
|
|
42
|
+
await validate(targetPath);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
program.parse();
|
package/src/validator.ts
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { detectFramework } from "@evalify/frameworks";
|
|
2
|
+
|
|
3
|
+
export interface ValidationResult {
|
|
4
|
+
valid: boolean;
|
|
5
|
+
format: string;
|
|
6
|
+
errors: string[];
|
|
7
|
+
warnings: string[];
|
|
8
|
+
evalCount: number;
|
|
9
|
+
summary: Record<string, unknown>;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export function validateEvalsJson(content: string): ValidationResult {
|
|
13
|
+
const result: ValidationResult = {
|
|
14
|
+
valid: true,
|
|
15
|
+
format: "unknown",
|
|
16
|
+
errors: [],
|
|
17
|
+
warnings: [],
|
|
18
|
+
evalCount: 0,
|
|
19
|
+
summary: {},
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
let parsed: unknown;
|
|
23
|
+
try {
|
|
24
|
+
parsed = JSON.parse(content);
|
|
25
|
+
} catch {
|
|
26
|
+
result.valid = false;
|
|
27
|
+
result.errors.push("Invalid JSON — could not parse file");
|
|
28
|
+
return result;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// Accept both top-level array and { skill_name, evals: [...] } wrapper
|
|
32
|
+
let evals: unknown[];
|
|
33
|
+
let hasSkillName = false;
|
|
34
|
+
|
|
35
|
+
if (Array.isArray(parsed)) {
|
|
36
|
+
evals = parsed;
|
|
37
|
+
} else if (
|
|
38
|
+
typeof parsed === "object" &&
|
|
39
|
+
parsed !== null &&
|
|
40
|
+
"evals" in parsed &&
|
|
41
|
+
Array.isArray((parsed as Record<string, unknown>).evals)
|
|
42
|
+
) {
|
|
43
|
+
evals = (parsed as Record<string, unknown>).evals as unknown[];
|
|
44
|
+
const meta = parsed as Record<string, unknown>;
|
|
45
|
+
hasSkillName = typeof meta.skill_name === "string";
|
|
46
|
+
if (meta.skill_name) result.summary["skill_name"] = meta.skill_name;
|
|
47
|
+
if (meta.name) result.summary["name"] = meta.name;
|
|
48
|
+
if (meta.version) result.summary["version"] = meta.version;
|
|
49
|
+
if (meta.description) result.summary["description"] = meta.description;
|
|
50
|
+
} else {
|
|
51
|
+
result.valid = false;
|
|
52
|
+
result.errors.push(
|
|
53
|
+
"Expected a JSON array or an object with an \"evals\" array"
|
|
54
|
+
);
|
|
55
|
+
return result;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
result.evalCount = evals.length;
|
|
59
|
+
|
|
60
|
+
if (evals.length === 0) {
|
|
61
|
+
result.warnings.push("File contains zero eval entries");
|
|
62
|
+
return result;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Detect format using framework registry
|
|
66
|
+
const framework = detectFramework(parsed);
|
|
67
|
+
if (framework) {
|
|
68
|
+
result.format = framework.meta.id;
|
|
69
|
+
} else {
|
|
70
|
+
result.format = "unknown";
|
|
71
|
+
result.warnings.push(
|
|
72
|
+
"Could not detect format — expected {prompt, expectations} or Anthropic Skill Creator format"
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Validate each entry
|
|
77
|
+
for (let i = 0; i < evals.length; i++) {
|
|
78
|
+
const entry = evals[i];
|
|
79
|
+
if (typeof entry !== "object" || entry === null) {
|
|
80
|
+
result.errors.push(`Entry ${i}: not an object`);
|
|
81
|
+
result.valid = false;
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const e = entry as Record<string, unknown>;
|
|
86
|
+
|
|
87
|
+
if (!("prompt" in e) || typeof e.prompt !== "string") {
|
|
88
|
+
result.errors.push(`Entry ${i}: missing or invalid "prompt" (expected string)`);
|
|
89
|
+
result.valid = false;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (!("expectations" in e)) {
|
|
93
|
+
if (result.format === "anthropic/skillcreator/v2" && "expected_output" in e) {
|
|
94
|
+
// Skill Creator two-phase workflow: expected_output first, expectations added later
|
|
95
|
+
result.warnings.push(`Entry ${i}: has "expected_output" but no "expectations" — assertions may not have been added yet`);
|
|
96
|
+
} else {
|
|
97
|
+
result.errors.push(`Entry ${i}: missing "expectations"`);
|
|
98
|
+
result.valid = false;
|
|
99
|
+
}
|
|
100
|
+
} else if (
|
|
101
|
+
!Array.isArray(e.expectations) &&
|
|
102
|
+
typeof e.expectations !== "string"
|
|
103
|
+
) {
|
|
104
|
+
result.errors.push(
|
|
105
|
+
`Entry ${i}: "expectations" should be a string or array`
|
|
106
|
+
);
|
|
107
|
+
result.valid = false;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return result;
|
|
112
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "Node16",
|
|
5
|
+
"moduleResolution": "Node16",
|
|
6
|
+
"outDir": "./dist",
|
|
7
|
+
"rootDir": "./src",
|
|
8
|
+
"strict": true,
|
|
9
|
+
"esModuleInterop": true,
|
|
10
|
+
"skipLibCheck": true,
|
|
11
|
+
"forceConsistentCasingInFileNames": true,
|
|
12
|
+
"declaration": true,
|
|
13
|
+
"sourceMap": true
|
|
14
|
+
},
|
|
15
|
+
"include": ["src/**/*"],
|
|
16
|
+
"exclude": ["node_modules", "dist"]
|
|
17
|
+
}
|