evalify-cli 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "evalify-cli",
3
- "version": "0.1.2",
3
+ "version": "0.1.4",
4
4
  "description": "CLI tool for the Evalify eval criteria registry",
5
5
  "homepage": "https://evalify.sh",
6
6
  "repository": "https://github.com/AppVerse-cc/evalify",
@@ -14,11 +14,13 @@
14
14
  },
15
15
  "dependencies": {
16
16
  "chalk": "^5.4.1",
17
- "commander": "^12.1.0"
17
+ "commander": "^12.1.0",
18
+ "prompts": "^2.4.2"
18
19
  },
19
20
  "devDependencies": {
20
21
  "@evalify/frameworks": "workspace:*",
21
22
  "@types/node": "^22.0.0",
23
+ "@types/prompts": "^2.4.9",
22
24
  "tsup": "^8.5.1",
23
25
  "typescript": "^5.7.0"
24
26
  }
@@ -0,0 +1,181 @@
1
+ import http from "node:http";
2
+ import crypto from "node:crypto";
3
+ import { exec } from "node:child_process";
4
+ import path from "node:path";
5
+ import os from "node:os";
6
+ import fs from "node:fs/promises";
7
+ import { header, success, info, dim, error } from "../format.js";
8
+
9
+ const AUTH_FILE = path.join(os.homedir(), ".evalify", "auth.json");
10
+ const REGISTRY_BASE = "https://www.evalify.sh";
11
+
12
+ export interface AuthData {
13
+ access_token: string;
14
+ refresh_token: string;
15
+ handle: string;
16
+ }
17
+
18
+ export async function readAuth(): Promise<AuthData | null> {
19
+ try {
20
+ const content = await fs.readFile(AUTH_FILE, "utf-8");
21
+ return JSON.parse(content) as AuthData;
22
+ } catch {
23
+ return null;
24
+ }
25
+ }
26
+
27
+ export async function logout(): Promise<void> {
28
+ header();
29
+ try {
30
+ await fs.rm(AUTH_FILE);
31
+ success("Logged out");
32
+ } catch {
33
+ error("Not logged in");
34
+ }
35
+ console.log();
36
+ }
37
+
38
+ function openBrowser(url: string): void {
39
+ const cmd =
40
+ process.platform === "darwin"
41
+ ? "open"
42
+ : process.platform === "win32"
43
+ ? "start"
44
+ : "xdg-open";
45
+ exec(`${cmd} "${url}"`);
46
+ }
47
+
48
+ /** Returns a valid (non-expired) access token, refreshing if needed. */
49
+ export async function getValidToken(): Promise<string | null> {
50
+ const auth = await readAuth();
51
+ if (!auth) return null;
52
+
53
+ // Decode JWT expiry without a library
54
+ try {
55
+ const payload = JSON.parse(Buffer.from(auth.access_token.split(".")[1], "base64url").toString());
56
+ const expiresAt: number = payload.exp * 1000;
57
+ if (Date.now() < expiresAt - 60_000) return auth.access_token; // still valid
58
+ } catch {
59
+ return auth.access_token; // can't decode, try anyway
60
+ }
61
+
62
+ if (!auth.refresh_token) return null;
63
+
64
+ // Refresh via evalify.sh refresh endpoint
65
+ try {
66
+ const res = await fetch(`${REGISTRY_BASE}/api/auth/refresh`, {
67
+ method: "POST",
68
+ headers: { "Content-Type": "application/json" },
69
+ body: JSON.stringify({ refresh_token: auth.refresh_token }),
70
+ });
71
+ if (!res.ok) return null;
72
+ const data = await res.json();
73
+ const updated: AuthData = {
74
+ access_token: data.access_token,
75
+ refresh_token: data.refresh_token ?? auth.refresh_token,
76
+ handle: auth.handle,
77
+ };
78
+ await fs.writeFile(AUTH_FILE, JSON.stringify(updated, null, 2));
79
+ return updated.access_token;
80
+ } catch {
81
+ return null;
82
+ }
83
+ }
84
+
85
+ export async function login(): Promise<void> {
86
+ header();
87
+ info("Logging in to Evalify...");
88
+ console.log();
89
+
90
+ const tokens = await waitForToken();
91
+ if (!tokens) return;
92
+
93
+ // Verify token and fetch handle
94
+ let handle = "";
95
+ try {
96
+ const res = await fetch(`${REGISTRY_BASE}/api/auth/me`, {
97
+ headers: { Authorization: `Bearer ${tokens.access_token}` },
98
+ });
99
+ if (res.ok) {
100
+ const data = await res.json();
101
+ handle = data.handle ?? "";
102
+ }
103
+ } catch {
104
+ // non-critical — save token anyway
105
+ }
106
+
107
+ // Save auth
108
+ const authDir = path.dirname(AUTH_FILE);
109
+ await fs.mkdir(authDir, { recursive: true });
110
+ await fs.writeFile(
111
+ AUTH_FILE,
112
+ JSON.stringify({ access_token: tokens.access_token, refresh_token: tokens.refresh_token, handle }, null, 2)
113
+ );
114
+
115
+ console.log();
116
+ success(`Logged in${handle ? ` as ${handle}` : ""}`);
117
+ dim(`Token saved to ${AUTH_FILE}`);
118
+ console.log();
119
+ }
120
+
121
+ function waitForToken(): Promise<{ access_token: string; refresh_token: string } | null> {
122
+ return new Promise((resolve) => {
123
+ const state = crypto.randomBytes(16).toString("hex");
124
+
125
+ const server = http.createServer((req, res) => {
126
+ const url = new URL(req.url ?? "/", "http://localhost");
127
+
128
+ if (url.pathname !== "/callback") {
129
+ res.writeHead(404);
130
+ res.end();
131
+ return;
132
+ }
133
+
134
+ const returnedState = url.searchParams.get("state");
135
+ const token = url.searchParams.get("token");
136
+ const refreshToken = url.searchParams.get("refresh_token");
137
+
138
+ const ok = returnedState === state && !!token;
139
+
140
+ res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
141
+ res.end(`<!doctype html><html><body style="font-family:sans-serif;padding:2rem;background:#0a0a0a;color:#e4e4e7">
142
+ ${ok
143
+ ? `<h2 style="color:#22c55e">&#10003; Authentication successful</h2><p style="color:#71717a">You can close this tab and return to your terminal.</p>`
144
+ : `<h2 style="color:#ef4444">&#10007; Authentication failed</h2><p style="color:#71717a">Invalid state. Please try again.</p>`
145
+ }
146
+ </body></html>`);
147
+
148
+ req.socket.destroy();
149
+ server.close();
150
+
151
+ if (!ok) {
152
+ error(returnedState !== state ? "State mismatch — possible CSRF" : "No token received");
153
+ resolve(null);
154
+ return;
155
+ }
156
+
157
+ resolve({ access_token: token!, refresh_token: refreshToken ?? "" });
158
+ });
159
+
160
+ server.unref();
161
+ server.listen(0, "127.0.0.1", () => {
162
+ const port = (server.address() as { port: number }).port;
163
+ const authUrl = `${REGISTRY_BASE}/auth/cli?port=${port}&state=${state}`;
164
+ dim(`Opening ${authUrl}`);
165
+ dim("Waiting for authentication...");
166
+ openBrowser(authUrl);
167
+ });
168
+
169
+ server.on("error", (err) => {
170
+ error(`Local server error: ${err.message}`);
171
+ resolve(null);
172
+ });
173
+
174
+ // Timeout after 5 minutes
175
+ setTimeout(() => {
176
+ server.close();
177
+ error("Authentication timed out");
178
+ resolve(null);
179
+ }, 5 * 60 * 1000);
180
+ });
181
+ }
@@ -1,9 +1,13 @@
1
1
  import path from "node:path";
2
2
  import fs from "node:fs/promises";
3
3
  import chalk from "chalk";
4
+ import prompts from "prompts";
4
5
  import { getFramework } from "@evalify/frameworks";
5
6
  import { header, success, info, dim, error, warn } from "../format.js";
6
7
  import { validateEvalsJson } from "../validator.js";
8
+ import { readAuth, getValidToken, login } from "./login.js";
9
+
10
+ const REGISTRY_URL = "https://www.evalify.sh/api/publish";
7
11
 
8
12
  async function findEvalsFile(targetPath: string): Promise<string | null> {
9
13
  const stat = await fs.stat(targetPath);
@@ -30,17 +34,47 @@ async function findEvalsFile(targetPath: string): Promise<string | null> {
30
34
  export async function publish(targetPath?: string): Promise<void> {
31
35
  header();
32
36
 
37
+ // Check auth
38
+ let auth = await readAuth();
39
+ if (!auth) {
40
+ error("Not logged in");
41
+ console.log();
42
+ const { action } = await prompts({
43
+ type: "select",
44
+ name: "action",
45
+ message: "What would you like to do?",
46
+ choices: [
47
+ { title: "Login now", value: "login" },
48
+ { title: "Later", value: "later" },
49
+ ],
50
+ initial: 0,
51
+ });
52
+ if (!action || action === "later") {
53
+ console.log();
54
+ dim("Run: evalify-cli login when ready.");
55
+ console.log();
56
+ return;
57
+ }
58
+ await login();
59
+ auth = await readAuth();
60
+ if (!auth) {
61
+ console.log();
62
+ return;
63
+ }
64
+ }
65
+
33
66
  const resolvedPath = path.resolve(process.cwd(), targetPath || ".");
34
67
 
35
68
  try {
36
69
  await fs.access(resolvedPath);
37
70
  } catch {
38
- error(`Path not found: ${targetPath || "."}`);
71
+ error(`Path not found: ${resolvedPath}`);
72
+ dim("Pass a path to a folder containing evals.json, or run from that folder.");
39
73
  console.log();
40
74
  return;
41
75
  }
42
76
 
43
- info("Publishing eval criteria to registry...");
77
+ info(`Publishing from ${path.relative(process.cwd(), resolvedPath) || "."}`);
44
78
  console.log();
45
79
 
46
80
  const filePath = await findEvalsFile(resolvedPath);
@@ -73,28 +107,84 @@ export async function publish(targetPath?: string): Promise<void> {
73
107
  warn(w);
74
108
  }
75
109
 
110
+ const parsed = JSON.parse(content);
111
+
112
+ const displayName = parsed.displayName ?? parsed.skill_name ?? parsed.name ?? "";
113
+ const slug =
114
+ parsed.slug ??
115
+ displayName
116
+ .toLowerCase()
117
+ .replace(/[^a-z0-9]+/g, "-")
118
+ .replace(/^-|-$/g, "");
119
+
76
120
  console.log();
77
121
  console.log(chalk.bold(" Publish summary:"));
78
122
  console.log();
79
-
80
- if (result.summary["skill_name"]) {
81
- dim(`Skill: ${result.summary["skill_name"]}`);
82
- }
83
- if (result.summary["name"]) {
84
- dim(`Name: ${result.summary["name"]}`);
85
- }
86
- if (result.summary["version"]) {
87
- dim(`Version: ${result.summary["version"]}`);
88
- }
89
- if (result.summary["description"]) {
90
- dim(`Description: ${result.summary["description"]}`);
91
- }
123
+ dim(`Name: ${displayName || "(unnamed)"}`);
124
+ dim(`Slug: ${slug || "(auto)"}`);
125
+ dim(`Version: ${parsed.version ?? "1.0.0"}`);
126
+ dim(`Domain: ${parsed.domain ?? "general"}`);
92
127
  dim(`Format: ${getFramework(result.format)?.meta.name ?? result.format}`);
93
128
  dim(`Eval count: ${result.evalCount}`);
94
- dim(`File: ${path.relative(process.cwd(), filePath)}`);
95
-
96
- console.log();
97
- warn("Dry run — publishing is not yet connected to the registry");
98
- success("File is valid and ready to publish");
129
+ dim(`Author: ${auth.handle}`);
99
130
  console.log();
100
- }
131
+
132
+ const { confirm } = await prompts({
133
+ type: "confirm",
134
+ name: "confirm",
135
+ message: "Publish to evalify.sh?",
136
+ initial: true,
137
+ });
138
+
139
+ if (!confirm) {
140
+ console.log();
141
+ dim("Cancelled.");
142
+ console.log();
143
+ return;
144
+ }
145
+
146
+ info("Uploading...");
147
+
148
+ try {
149
+ const token = await getValidToken();
150
+ if (!token) {
151
+ error("Session expired — run: evalify-cli login");
152
+ return;
153
+ }
154
+
155
+ const res = await fetch(REGISTRY_URL, {
156
+ method: "POST",
157
+ headers: {
158
+ "Content-Type": "application/json",
159
+ Authorization: `Bearer ${token}`,
160
+ },
161
+ body: JSON.stringify({
162
+ slug,
163
+ displayName,
164
+ domain: parsed.domain ?? "general",
165
+ version: parsed.version ?? "1.0.0",
166
+ tags: parsed.tags ?? [],
167
+ description: parsed.description ?? "",
168
+ evals: parsed.evals ?? [],
169
+ }),
170
+ });
171
+
172
+ const data = await res.json();
173
+
174
+ if (!res.ok) {
175
+ console.log();
176
+ error(data.error ?? `Server error ${res.status}`);
177
+ console.log();
178
+ return;
179
+ }
180
+
181
+ console.log();
182
+ success(`Published ${displayName || slug}`);
183
+ dim(`View at: https://evalify.sh/criteria/${data.slug}`);
184
+ console.log();
185
+ } catch (err) {
186
+ console.log();
187
+ error(`Failed to publish: ${(err as Error).message}`);
188
+ console.log();
189
+ }
190
+ }
@@ -1,8 +1,33 @@
1
1
  import path from "node:path";
2
+ import os from "node:os";
2
3
  import fs from "node:fs/promises";
4
+ import prompts from "prompts";
3
5
  import { header, success, info, dim, error } from "../format.js";
4
6
 
5
- const REGISTRY_URL = "https://evalify.sh/api/registry";
7
+ const REGISTRY_URL = "https://www.evalify.sh/api/registry";
8
+
9
+ async function detectSkills(skillsDir: string): Promise<{ name: string; evalCount: number }[]> {
10
+ try {
11
+ const entries = await fs.readdir(skillsDir, { withFileTypes: true });
12
+ const skills: { name: string; evalCount: number }[] = [];
13
+
14
+ for (const entry of entries) {
15
+ if (!entry.isDirectory()) continue;
16
+ const evalsFile = path.join(skillsDir, entry.name, "evals.json");
17
+ try {
18
+ const content = await fs.readFile(evalsFile, "utf-8");
19
+ const parsed = JSON.parse(content);
20
+ skills.push({ name: entry.name, evalCount: (parsed.evals ?? []).length });
21
+ } catch {
22
+ // Folder exists but no valid evals.json — skip
23
+ }
24
+ }
25
+
26
+ return skills;
27
+ } catch {
28
+ return [];
29
+ }
30
+ }
6
31
 
7
32
  export async function pull(slug: string): Promise<void> {
8
33
  header();
@@ -47,9 +72,142 @@ export async function pull(slug: string): Promise<void> {
47
72
  return;
48
73
  }
49
74
 
50
- const targetDir = path.resolve(process.cwd(), "evals", slug);
75
+ // Ask install location
76
+ const locationResponse = await prompts({
77
+ type: "select",
78
+ name: "location",
79
+ message: "Where do you want to install?",
80
+ choices: [
81
+ {
82
+ title: "Current directory",
83
+ description: `evals/${pack.slug}/evals.json`,
84
+ value: "current",
85
+ },
86
+ {
87
+ title: "Project",
88
+ description: ".claude/skills/<name>/evals.json",
89
+ value: "project",
90
+ },
91
+ {
92
+ title: "Global",
93
+ description: "~/.claude/skills/<name>/evals.json",
94
+ value: "global",
95
+ },
96
+ ],
97
+ initial: 0,
98
+ });
99
+
100
+ if (!locationResponse.location) {
101
+ console.log();
102
+ dim("Cancelled.");
103
+ console.log();
104
+ return;
105
+ }
106
+
107
+ const location: "current" | "project" | "global" = locationResponse.location;
108
+
109
+ let skillName = pack.slug;
110
+
111
+ if (location === "project" || location === "global") {
112
+ const skillsDir =
113
+ location === "project"
114
+ ? path.resolve(process.cwd(), ".claude", "skills")
115
+ : path.resolve(os.homedir(), ".claude", "skills");
116
+
117
+ const existing = await detectSkills(skillsDir);
118
+
119
+ const baseChoices = existing.map((s) => ({
120
+ title: s.name,
121
+ description: `${s.evalCount} eval${s.evalCount !== 1 ? "s" : ""}`,
122
+ value: s.name,
123
+ }));
124
+
125
+ const pickResponse = await prompts({
126
+ type: "autocomplete",
127
+ name: "skill",
128
+ message:
129
+ existing.length > 0
130
+ ? `Skill folder (${existing.length} found — type to filter or create new):`
131
+ : "Skill folder:",
132
+ choices: baseChoices,
133
+ initial: pack.slug,
134
+ suggest: async (input: string, choices: any[]) => {
135
+ const term = (input || "").toLowerCase();
136
+ const filtered = choices.filter((c) => c.title.toLowerCase().includes(term));
137
+ const exactMatch = choices.find((c) => c.title === (input || pack.slug));
138
+ if (!exactMatch) {
139
+ filtered.push({ title: input || pack.slug, value: input || pack.slug });
140
+ }
141
+ return filtered;
142
+ },
143
+ });
144
+
145
+ if (!pickResponse.skill) {
146
+ console.log();
147
+ dim("Cancelled.");
148
+ console.log();
149
+ return;
150
+ }
151
+
152
+ skillName = (pickResponse.skill as string).trim();
153
+ }
154
+
155
+ let targetDir: string;
156
+ if (location === "current") {
157
+ targetDir = path.resolve(process.cwd(), "evals", pack.slug);
158
+ } else if (location === "project") {
159
+ targetDir = path.resolve(process.cwd(), ".claude", "skills", skillName);
160
+ } else {
161
+ targetDir = path.resolve(os.homedir(), ".claude", "skills", skillName);
162
+ }
163
+
51
164
  const targetFile = path.join(targetDir, "evals.json");
52
165
 
166
+ // Check for existing evals.json and ask append vs override
167
+ let writeMode: "override" | "append" = "override";
168
+ let existingEvals: { prompt: string; expectations: string[] }[] = [];
169
+
170
+ try {
171
+ const existing = await fs.readFile(targetFile, "utf-8");
172
+ const parsed = JSON.parse(existing);
173
+ existingEvals = parsed.evals ?? [];
174
+
175
+ if (existingEvals.length > 0) {
176
+ const conflictResponse = await prompts({
177
+ type: "select",
178
+ name: "mode",
179
+ message: `Found ${existingEvals.length} existing eval${existingEvals.length !== 1 ? "s" : ""} in ${skillName}. What do you want to do?`,
180
+ choices: [
181
+ {
182
+ title: "Append",
183
+ description: `Add ${pack.evals.length} new eval${pack.evals.length !== 1 ? "s" : ""} to the existing ${existingEvals.length}`,
184
+ value: "append",
185
+ },
186
+ {
187
+ title: "Override",
188
+ description: "Replace all existing evals with the pulled set",
189
+ value: "override",
190
+ },
191
+ ],
192
+ initial: 0,
193
+ });
194
+
195
+ if (!conflictResponse.mode) {
196
+ console.log();
197
+ dim("Cancelled.");
198
+ console.log();
199
+ return;
200
+ }
201
+
202
+ writeMode = conflictResponse.mode;
203
+ }
204
+ } catch {
205
+ // No existing file — fresh write
206
+ }
207
+
208
+ const evalsToWrite =
209
+ writeMode === "append" ? [...existingEvals, ...pack.evals] : pack.evals;
210
+
53
211
  try {
54
212
  await fs.mkdir(targetDir, { recursive: true });
55
213
 
@@ -61,21 +219,26 @@ export async function pull(slug: string): Promise<void> {
61
219
  domain: pack.domain,
62
220
  author: pack.author,
63
221
  tags: pack.tags,
64
- evals: pack.evals,
222
+ evals: evalsToWrite,
65
223
  };
66
224
 
67
225
  await fs.writeFile(targetFile, JSON.stringify(output, null, 2) + "\n");
68
226
 
227
+ const action = writeMode === "append" ? "Appended" : "Wrote";
228
+ console.log();
69
229
  success(`Pulled ${pack.displayName} v${pack.version}`);
70
- success(`Wrote ${pack.evals.length} eval${pack.evals.length !== 1 ? "s" : ""} to evals/${slug}/evals.json`);
230
+ success(
231
+ `${action} ${pack.evals.length} eval${pack.evals.length !== 1 ? "s" : ""}` +
232
+ (writeMode === "append" ? ` (${evalsToWrite.length} total)` : "") +
233
+ ` to ${targetFile}`
234
+ );
71
235
  console.log();
72
236
  dim(`Author: ${pack.author}`);
73
237
  dim(`Domain: ${pack.domain}`);
74
- dim(`Location: ${targetFile}`);
75
- dim(`To validate: evalify validate evals/${slug}`);
238
+ dim(`To validate: evalify-cli validate ${targetDir}`);
76
239
  } catch (err) {
77
240
  error(`Failed to write file: ${(err as Error).message}`);
78
241
  }
79
242
 
80
243
  console.log();
81
- }
244
+ }
@@ -18,6 +18,6 @@ export async function search(query: string): Promise<void> {
18
18
  table(results);
19
19
  console.log();
20
20
  dim(`Showing placeholder results — registry search not yet connected`);
21
- dim(`Use: evalify pull <slug> to download criteria`);
21
+ dim(`Use: evalify-cli pull <slug> to download criteria`);
22
22
  console.log();
23
23
  }
package/src/format.ts CHANGED
@@ -1,9 +1,10 @@
1
1
  import chalk from "chalk";
2
2
 
3
- export const VERSION = "0.1.0";
3
+ declare const __PKG_VERSION__: string;
4
+ export const VERSION = __PKG_VERSION__;
4
5
 
5
6
  export function header(): void {
6
- console.log(chalk.bold.cyan(`\nevalify`) + chalk.dim(` v${VERSION}\n`));
7
+ console.log(chalk.bold.cyan(`\nevalify-cli`) + chalk.dim(` v${VERSION}\n`));
7
8
  }
8
9
 
9
10
  export function success(msg: string): void {
package/src/index.ts CHANGED
@@ -4,16 +4,31 @@ import { Command } from "commander";
4
4
  import { VERSION } from "./format.js";
5
5
  import { pull } from "./commands/pull.js";
6
6
  import { publish } from "./commands/publish.js";
7
+ import { login, logout } from "./commands/login.js";
7
8
  import { search } from "./commands/search.js";
8
9
  import { validate } from "./commands/validate.js";
9
10
 
10
11
  const program = new Command();
11
12
 
12
13
  program
13
- .name("evalify")
14
+ .name("evalify-cli")
14
15
  .description("CLI tool for the Evalify eval criteria registry")
15
16
  .version(VERSION);
16
17
 
18
+ program
19
+ .command("login")
20
+ .description("Authenticate with the Evalify registry")
21
+ .action(async () => {
22
+ await login();
23
+ });
24
+
25
+ program
26
+ .command("logout")
27
+ .description("Remove saved credentials")
28
+ .action(async () => {
29
+ await logout();
30
+ });
31
+
17
32
  program
18
33
  .command("pull <slug>")
19
34
  .description("Download eval criteria from the registry")
@@ -42,4 +57,4 @@ program
42
57
  await validate(targetPath);
43
58
  });
44
59
 
45
- program.parse();
60
+ program.parseAsync().then(() => process.exit(0));
package/tsup.config.ts CHANGED
@@ -1,6 +1,10 @@
1
1
  import { defineConfig } from "tsup";
2
+ import { readFileSync } from "node:fs";
3
+
4
+ const pkg = JSON.parse(readFileSync("./package.json", "utf-8")) as { version: string };
2
5
 
3
6
  export default defineConfig({
7
+ define: { __PKG_VERSION__: JSON.stringify(pkg.version) },
4
8
  entry: ["src/index.ts"],
5
9
  format: ["cjs"],
6
10
  platform: "node",