@skillmark/webapp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. package/.wrangler/state/v3/d1/miniflare-D1DatabaseObject/cd45cc5264daa1c125545b5b4c0756df95d8b6ac5900ecf52323d90f61a47f2d.sqlite +0 -0
  2. package/.wrangler/state/v3/d1/miniflare-D1DatabaseObject/fc50b649db51ed0c303ff2c4b7c0eca2da269cc3dfc7ce40615fc37a7b53366c.sqlite +0 -0
  3. package/.wrangler/state/v3/d1/miniflare-D1DatabaseObject/fc50b649db51ed0c303ff2c4b7c0eca2da269cc3dfc7ce40615fc37a7b53366c.sqlite-shm +0 -0
  4. package/.wrangler/state/v3/d1/miniflare-D1DatabaseObject/fc50b649db51ed0c303ff2c4b7c0eca2da269cc3dfc7ce40615fc37a7b53366c.sqlite-wal +0 -0
  5. package/.wrangler/tmp/bundle-lfa2r7/checked-fetch.js +30 -0
  6. package/.wrangler/tmp/bundle-lfa2r7/middleware-insertion-facade.js +11 -0
  7. package/.wrangler/tmp/bundle-lfa2r7/middleware-loader.entry.ts +134 -0
  8. package/.wrangler/tmp/bundle-lfa2r7/strip-cf-connecting-ip-header.js +13 -0
  9. package/.wrangler/tmp/dev-IDqSK4/worker-entry-point.js +4918 -0
  10. package/.wrangler/tmp/dev-IDqSK4/worker-entry-point.js.map +8 -0
  11. package/package.json +22 -0
  12. package/src/assets/favicon.png +0 -0
  13. package/src/assets/skillmark-thumb.png +0 -0
  14. package/src/db/d1-database-schema.sql +69 -0
  15. package/src/db/migrations/001-add-github-oauth-and-user-session-tables.sql +40 -0
  16. package/src/db/migrations/002-add-security-benchmark-columns.sql +30 -0
  17. package/src/db/migrations/003-add-repo-url-and-update-composite-formula.sql +27 -0
  18. package/src/routes/api-endpoints-handler.ts +380 -0
  19. package/src/routes/github-oauth-authentication-handler.ts +427 -0
  20. package/src/routes/html-pages-renderer.ts +2263 -0
  21. package/src/routes/static-assets-handler.ts +58 -0
  22. package/src/worker-entry-point.ts +143 -0
  23. package/tsconfig.json +19 -0
  24. package/wrangler.toml +19 -0
@@ -0,0 +1,2263 @@
1
+ /**
2
+ * HTML pages renderer for Skillmark leaderboard UI
3
+ * Design: Vercel/skills.sh inspired - pure black, minimal, clean typography
4
+ */
5
+ import { Hono } from 'hono';
6
+
7
+ type Bindings = {
8
+ DB: D1Database;
9
+ };
10
+
11
+ export const pagesRouter = new Hono<{ Bindings: Bindings }>();
12
+
13
+ /** Leaderboard entry from database */
14
+ interface LeaderboardRow {
15
+ skillId: string;
16
+ skillName: string;
17
+ source: string;
18
+ bestAccuracy: number;
19
+ bestSecurity: number | null;
20
+ compositeScore: number | null;
21
+ bestModel: string;
22
+ avgTokens: number;
23
+ avgCost: number;
24
+ lastTested: number;
25
+ totalRuns: number;
26
+ submitterGithub?: string;
27
+ skillshLink?: string;
28
+ repoUrl?: string;
29
+ }
30
+
31
+ /**
32
+ * GET / - Leaderboard homepage
33
+ */
34
+ pagesRouter.get('/', async (c) => {
35
+ try {
36
+ // Get current user from session
37
+ const cookieHeader = c.req.header('Cookie') || '';
38
+ const currentUser = await getCurrentUser(c.env.DB, cookieHeader);
39
+
40
+ // Get leaderboard with latest submitter info
41
+ const results = await c.env.DB.prepare(`
42
+ SELECT
43
+ l.skill_id as skillId,
44
+ l.skill_name as skillName,
45
+ l.source,
46
+ l.best_accuracy as bestAccuracy,
47
+ l.best_security as bestSecurity,
48
+ l.composite_score as compositeScore,
49
+ l.best_model as bestModel,
50
+ l.avg_tokens as avgTokens,
51
+ l.avg_cost as avgCost,
52
+ l.last_tested as lastTested,
53
+ l.total_runs as totalRuns,
54
+ (SELECT submitter_github FROM results WHERE skill_id = l.skill_id ORDER BY created_at DESC LIMIT 1) as submitterGithub,
55
+ (SELECT skillsh_link FROM results WHERE skill_id = l.skill_id AND skillsh_link IS NOT NULL ORDER BY created_at DESC LIMIT 1) as skillshLink,
56
+ l.repo_url as repoUrl
57
+ FROM leaderboard l
58
+ LIMIT 50
59
+ `).all();
60
+
61
+ const entries = (results.results || []) as unknown as LeaderboardRow[];
62
+
63
+ return c.html(renderLeaderboardPage(entries, currentUser));
64
+ } catch (error) {
65
+ console.error('Error rendering leaderboard:', error);
66
+ return c.html(renderErrorPage('Failed to load leaderboard'));
67
+ }
68
+ });
69
+
70
+ /**
71
+ * GET /docs - Getting Started page
72
+ */
73
+ pagesRouter.get('/docs', (c) => {
74
+ return c.html(renderDocsPage());
75
+ });
76
+
77
+ /**
78
+ * GET /how-it-works - How it works page
79
+ */
80
+ pagesRouter.get('/how-it-works', (c) => {
81
+ return c.html(renderHowItWorksPage());
82
+ });
83
+
84
+ /**
85
+ * GET /skill/:name - Skill detail page
86
+ */
87
+ pagesRouter.get('/skill/:name', async (c) => {
88
+ try {
89
+ const skillName = decodeURIComponent(c.req.param('name'));
90
+
91
+ // Get skill details with latest submitter
92
+ const skill = await c.env.DB.prepare(`
93
+ SELECT
94
+ l.skill_id as skillId,
95
+ l.skill_name as skillName,
96
+ l.source,
97
+ l.best_accuracy as bestAccuracy,
98
+ l.best_security as bestSecurity,
99
+ l.composite_score as compositeScore,
100
+ l.best_model as bestModel,
101
+ l.avg_tokens as avgTokens,
102
+ l.avg_cost as avgCost,
103
+ l.last_tested as lastTested,
104
+ l.total_runs as totalRuns
105
+ FROM leaderboard l
106
+ WHERE l.skill_name = ?
107
+ `).bind(skillName).first();
108
+
109
+ if (!skill) {
110
+ return c.html(renderErrorPage('Skill not found'), 404);
111
+ }
112
+
113
+ // Get recent results with submitter info
114
+ const results = await c.env.DB.prepare(`
115
+ SELECT
116
+ r.id,
117
+ r.accuracy,
118
+ r.model,
119
+ r.tokens_total as tokensTotal,
120
+ r.duration_ms as durationMs,
121
+ r.cost_usd as costUsd,
122
+ r.tool_count as toolCount,
123
+ r.security_score as securityScore,
124
+ r.created_at as createdAt,
125
+ r.submitter_github as submitterGithub,
126
+ r.skillsh_link as skillshLink,
127
+ r.test_files as testFiles
128
+ FROM results r
129
+ WHERE r.skill_id = ?
130
+ ORDER BY r.created_at DESC
131
+ LIMIT 20
132
+ `).bind(skill.skillId).all();
133
+
134
+ const formattedResults: SkillResultRow[] = (results.results || []).map((r: Record<string, unknown>) => ({
135
+ id: r.id as string,
136
+ accuracy: r.accuracy as number,
137
+ model: r.model as string,
138
+ tokensTotal: r.tokensTotal as number,
139
+ durationMs: (r.durationMs as number) ?? null,
140
+ costUsd: r.costUsd as number,
141
+ toolCount: (r.toolCount as number) ?? null,
142
+ securityScore: (r.securityScore as number) ?? null,
143
+ createdAt: r.createdAt ? new Date((r.createdAt as number) * 1000).toISOString() : null,
144
+ submitterGithub: r.submitterGithub as string | null,
145
+ skillshLink: r.skillshLink as string | null,
146
+ testFiles: r.testFiles ? JSON.parse(r.testFiles as string) : null,
147
+ }));
148
+
149
+ return c.html(renderSkillDetailPage(skill as unknown as LeaderboardRow, formattedResults));
150
+ } catch (error) {
151
+ console.error('Error rendering skill page:', error);
152
+ return c.html(renderErrorPage('Failed to load skill details'));
153
+ }
154
+ });
155
+
156
+ /**
157
+ * GET /login - Login page with GitHub OAuth
158
+ */
159
+ pagesRouter.get('/login', (c) => {
160
+ const error = c.req.query('error');
161
+ return c.html(renderLoginPage(error));
162
+ });
163
+
164
+ /**
165
+ * GET /dashboard - User dashboard with API key management
166
+ */
167
+ pagesRouter.get('/dashboard', async (c) => {
168
+ // Check if user is logged in via cookie
169
+ const cookieHeader = c.req.header('Cookie') || '';
170
+ const sessionId = parseCookie(cookieHeader, 'skillmark_session');
171
+
172
+ if (!sessionId) {
173
+ return c.redirect('/login');
174
+ }
175
+
176
+ // Get user from session
177
+ const session = await c.env.DB.prepare(`
178
+ SELECT u.id, u.github_username, u.github_avatar
179
+ FROM sessions s
180
+ JOIN users u ON u.id = s.user_id
181
+ WHERE s.id = ? AND s.expires_at > unixepoch()
182
+ `).bind(sessionId).first();
183
+
184
+ if (!session) {
185
+ return c.redirect('/login');
186
+ }
187
+
188
+ // Get user's API keys
189
+ const keys = await c.env.DB.prepare(`
190
+ SELECT id, created_at, last_used_at
191
+ FROM api_keys
192
+ WHERE github_username = ?
193
+ ORDER BY created_at DESC
194
+ `).bind(session.github_username).all();
195
+
196
+ const formattedKeys = keys.results?.map((key: Record<string, unknown>) => ({
197
+ id: key.id as string,
198
+ createdAt: key.created_at ? new Date((key.created_at as number) * 1000).toISOString() : null,
199
+ lastUsedAt: key.last_used_at ? new Date((key.last_used_at as number) * 1000).toISOString() : null,
200
+ })) || [];
201
+
202
+ return c.html(renderDashboardPage({
203
+ username: session.github_username as string,
204
+ avatar: session.github_avatar as string | null,
205
+ keys: formattedKeys,
206
+ }));
207
+ });
208
+
209
+ /** Current user info for nav */
210
+ interface CurrentUser {
211
+ username: string;
212
+ avatar: string | null;
213
+ }
214
+
215
+ /**
216
+ * Helper: Parse specific cookie from header
217
+ */
218
+ function parseCookie(cookieHeader: string, name: string): string | null {
219
+ const cookies = cookieHeader.split(';');
220
+ for (const cookie of cookies) {
221
+ const [cookieName, ...rest] = cookie.trim().split('=');
222
+ if (cookieName === name) {
223
+ return rest.join('=');
224
+ }
225
+ }
226
+ return null;
227
+ }
228
+
229
+ /**
230
+ * Helper: Get current user from session cookie
231
+ */
232
+ async function getCurrentUser(db: D1Database, cookieHeader: string): Promise<CurrentUser | null> {
233
+ const sessionId = parseCookie(cookieHeader, 'skillmark_session');
234
+ if (!sessionId) return null;
235
+
236
+ const session = await db.prepare(`
237
+ SELECT u.github_username, u.github_avatar
238
+ FROM sessions s
239
+ JOIN users u ON u.id = s.user_id
240
+ WHERE s.id = ? AND s.expires_at > unixepoch()
241
+ `).bind(sessionId).first();
242
+
243
+ if (!session) return null;
244
+
245
+ return {
246
+ username: session.github_username as string,
247
+ avatar: session.github_avatar as string | null,
248
+ };
249
+ }
250
+
251
+ /**
252
+ * Helper: Render nav with optional user info
253
+ */
254
+ function renderNav(currentUser: CurrentUser | null): string {
255
+ const userSection = currentUser
256
+ ? `<a href="/dashboard" class="user-nav">
257
+ <img src="${currentUser.avatar || `https://github.com/${currentUser.username}.png?size=32`}" alt="" class="user-avatar">
258
+ <span>@${escapeHtml(currentUser.username)}</span>
259
+ </a>`
260
+ : `<a href="/login">Login</a>`;
261
+
262
+ return `
263
+ <nav>
264
+ <div class="nav-left">
265
+ <a href="/" class="nav-home">
266
+ <svg class="nav-logo" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round">
267
+ <path d="M22 12h-4l-3 9L9 3l-3 9H2"/>
268
+ </svg>
269
+ <span class="nav-divider">/</span>
270
+ <span class="nav-title">Skillmark</span>
271
+ </a>
272
+ </div>
273
+ <div class="nav-right">
274
+ <a href="/docs">Docs</a>
275
+ <a href="/how-it-works">How It Works</a>
276
+ <a href="https://github.com/claudekit/skillmark" title="GitHub"><svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/></svg></a>
277
+ ${userSection}
278
+ </div>
279
+ </nav>`;
280
+ }
281
+
282
+ /**
283
+ * Render the leaderboard HTML page - Vercel/skills.sh style
284
+ */
285
+ function renderLeaderboardPage(entries: LeaderboardRow[], currentUser: CurrentUser | null = null): string {
286
+ const totalRuns = entries.reduce((sum, e) => sum + e.totalRuns, 0);
287
+
288
+ const rows = entries.map((entry, index) => {
289
+ const rank = index + 1;
290
+ const accuracy = entry.bestAccuracy.toFixed(1);
291
+ const security = entry.bestSecurity != null ? `${entry.bestSecurity.toFixed(0)}%` : '\u2014';
292
+ const composite = entry.compositeScore != null ? `${entry.compositeScore.toFixed(1)}%` : '\u2014';
293
+ const securityWarning = entry.bestSecurity != null && entry.bestSecurity < 50
294
+ ? '<span class="security-warning" title="Low security score">\u25CF</span> '
295
+ : '';
296
+ const source = entry.source || '';
297
+ const repoPath = source.replace('https://github.com/', '').replace(/\.git$/, '');
298
+ const submitter = entry.submitterGithub;
299
+ const skillshLink = entry.skillshLink;
300
+
301
+ return `
302
+ <tr onclick="window.location='/skill/${encodeURIComponent(entry.skillName)}'" style="cursor: pointer;">
303
+ <td class="rank">${rank}</td>
304
+ <td class="skill">
305
+ <div class="skill-info">
306
+ <span class="skill-name">${escapeHtml(entry.skillName)}</span>
307
+ ${repoPath ? `<span class="skill-repo">${escapeHtml(repoPath)}</span>` : ''}
308
+ ${skillshLink ? `<a href="${escapeHtml(skillshLink)}" class="skillsh-link" onclick="event.stopPropagation()">skill.sh</a>` : ''}
309
+ ${entry.repoUrl ? `<a href="${escapeHtml(entry.repoUrl)}" class="repo-link" onclick="event.stopPropagation()" title="View repository">repo</a>` : ''}
310
+ </div>
311
+ </td>
312
+ <td class="submitter">
313
+ ${submitter ? `
314
+ <a href="https://github.com/${escapeHtml(submitter)}" class="submitter-link" onclick="event.stopPropagation()">
315
+ <img src="https://github.com/${escapeHtml(submitter)}.png?size=24" alt="" class="submitter-avatar">
316
+ <span>@${escapeHtml(submitter)}</span>
317
+ </a>
318
+ ` : '<span class="no-submitter">-</span>'}
319
+ </td>
320
+ <td class="security">${securityWarning}${security}</td>
321
+ <td class="composite">${composite}</td>
322
+ <td class="accuracy">${accuracy}%</td>
323
+ </tr>
324
+ `;
325
+ }).join('');
326
+
327
+ return `<!DOCTYPE html>
328
+ <html lang="en">
329
+ <head>
330
+ <meta charset="UTF-8">
331
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
332
+ <title>Skillmark - Agent Skill Benchmarks</title>
333
+ <meta name="description" content="The open agent skill benchmarking platform. Test and compare AI agent skills with detailed metrics.">
334
+
335
+ <!-- Favicon -->
336
+ <link rel="icon" type="image/png" href="/favicon.png">
337
+ <link rel="apple-touch-icon" href="/favicon.png">
338
+
339
+ <!-- Open Graph -->
340
+ <meta property="og:type" content="website">
341
+ <meta property="og:url" content="https://skillmark.sh/">
342
+ <meta property="og:title" content="Skillmark - Agent Skill Benchmarks">
343
+ <meta property="og:description" content="Benchmark your AI agent skills with detailed metrics. Compare accuracy, token usage, and cost across models.">
344
+ <meta property="og:image" content="https://cdn.claudekit.cc/skillmark/og-image.png">
345
+ <meta property="og:site_name" content="Skillmark">
346
+
347
+ <!-- Twitter Card -->
348
+ <meta name="twitter:card" content="summary_large_image">
349
+ <meta name="twitter:url" content="https://skillmark.sh/">
350
+ <meta name="twitter:title" content="Skillmark - Agent Skill Benchmarks">
351
+ <meta name="twitter:description" content="Benchmark your AI agent skills with detailed metrics. Compare accuracy, token usage, and cost across models.">
352
+ <meta name="twitter:image" content="https://cdn.claudekit.cc/skillmark/og-image.png">
353
+
354
+ <link rel="preconnect" href="https://fonts.googleapis.com">
355
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
356
+ <link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=Geist+Mono:wght@400;500&display=swap" rel="stylesheet">
357
+ <style>
358
+ * {
359
+ box-sizing: border-box;
360
+ margin: 0;
361
+ padding: 0;
362
+ }
363
+
364
+ :root {
365
+ --bg: #000;
366
+ --text: #ededed;
367
+ --text-secondary: #888;
368
+ --border: #333;
369
+ --hover: #111;
370
+ }
371
+
372
+ body {
373
+ font-family: 'Geist', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
374
+ background: var(--bg);
375
+ color: var(--text);
376
+ line-height: 1.6;
377
+ min-height: 100vh;
378
+ -webkit-font-smoothing: antialiased;
379
+ }
380
+
381
+ /* Navigation */
382
+ nav {
383
+ display: flex;
384
+ align-items: center;
385
+ justify-content: space-between;
386
+ padding: 1rem 1.5rem;
387
+ border-bottom: 1px solid var(--border);
388
+ }
389
+
390
+ .nav-left {
391
+ display: flex;
392
+ align-items: center;
393
+ gap: 0.5rem;
394
+ }
395
+
396
+ .nav-logo {
397
+ font-size: 1.25rem;
398
+ }
399
+
400
+ .nav-divider {
401
+ color: var(--text-secondary);
402
+ margin: 0 0.25rem;
403
+ }
404
+
405
+ .nav-title {
406
+ font-weight: 500;
407
+ }
408
+
409
+ .nav-right {
410
+ display: flex;
411
+ gap: 1.5rem;
412
+ }
413
+
414
+ .nav-right a {
415
+ color: var(--text-secondary);
416
+ text-decoration: none;
417
+ font-size: 0.875rem;
418
+ }
419
+
420
+ .nav-right a:hover {
421
+ color: var(--text);
422
+ }
423
+
424
+ .user-nav {
425
+ display: flex;
426
+ align-items: center;
427
+ gap: 0.5rem;
428
+ }
429
+
430
+ .user-avatar {
431
+ width: 24px;
432
+ height: 24px;
433
+ border-radius: 50%;
434
+ }
435
+
436
+ .nav-home {
437
+ display: flex;
438
+ align-items: center;
439
+ text-decoration: none;
440
+ color: inherit;
441
+ }
442
+
443
+ /* Main container */
444
+ .container {
445
+ max-width: 1200px;
446
+ margin: 0 auto;
447
+ padding: 4rem 1.5rem;
448
+ }
449
+
450
+ /* Hero section */
451
+ .hero {
452
+ display: grid;
453
+ grid-template-columns: 1fr 1fr;
454
+ gap: 4rem;
455
+ margin-bottom: 4rem;
456
+ align-items: start;
457
+ }
458
+
459
+ .hero-left {
460
+ display: flex;
461
+ flex-direction: column;
462
+ gap: 1.5rem;
463
+ }
464
+
465
+ .logo-text {
466
+ font-family: 'Geist Mono', monospace;
467
+ font-size: 4rem;
468
+ font-weight: 600;
469
+ letter-spacing: -0.02em;
470
+ line-height: 1;
471
+ }
472
+
473
+ .logo-subtitle {
474
+ font-size: 0.75rem;
475
+ letter-spacing: 0.2em;
476
+ text-transform: uppercase;
477
+ color: var(--text-secondary);
478
+ }
479
+
480
+ .hero-right p {
481
+ font-size: 1.5rem;
482
+ color: var(--text-secondary);
483
+ line-height: 1.5;
484
+ }
485
+
486
+ /* Install section */
487
+ .install-section {
488
+ display: grid;
489
+ grid-template-columns: 1fr 1fr;
490
+ gap: 4rem;
491
+ margin-bottom: 5rem;
492
+ }
493
+
494
+ .install-box h3 {
495
+ font-size: 0.75rem;
496
+ letter-spacing: 0.15em;
497
+ text-transform: uppercase;
498
+ color: var(--text-secondary);
499
+ margin-bottom: 1rem;
500
+ }
501
+
502
+ .install-command {
503
+ display: flex;
504
+ align-items: center;
505
+ background: #0a0a0a;
506
+ border: 1px solid var(--border);
507
+ border-radius: 8px;
508
+ padding: 0.875rem 1rem;
509
+ font-family: 'Geist Mono', monospace;
510
+ font-size: 0.875rem;
511
+ }
512
+
513
+ .install-command .dollar {
514
+ color: var(--text-secondary);
515
+ margin-right: 0.5rem;
516
+ user-select: none;
517
+ }
518
+
519
+ .install-command code {
520
+ flex: 1;
521
+ }
522
+
523
+ .install-command .copy-btn {
524
+ background: none;
525
+ border: none;
526
+ color: var(--text-secondary);
527
+ cursor: pointer;
528
+ padding: 0.25rem;
529
+ }
530
+
531
+ .install-command .copy-btn:hover {
532
+ color: var(--text);
533
+ }
534
+
535
+ .agents-list {
536
+ display: flex;
537
+ gap: 1.5rem;
538
+ align-items: center;
539
+ }
540
+
541
+ .agent-icon {
542
+ width: 32px;
543
+ height: 32px;
544
+ opacity: 0.6;
545
+ }
546
+
547
+ .agent-icon:hover {
548
+ opacity: 1;
549
+ }
550
+
551
+ /* Leaderboard section */
552
+ .leaderboard-section h2 {
553
+ font-size: 0.75rem;
554
+ letter-spacing: 0.15em;
555
+ text-transform: uppercase;
556
+ color: var(--text-secondary);
557
+ margin-bottom: 1.5rem;
558
+ }
559
+
560
+ /* Search bar */
561
+ .search-container {
562
+ position: relative;
563
+ margin-bottom: 1.5rem;
564
+ }
565
+
566
+ .search-icon {
567
+ position: absolute;
568
+ left: 1rem;
569
+ top: 50%;
570
+ transform: translateY(-50%);
571
+ color: var(--text-secondary);
572
+ }
573
+
574
+ .search-input {
575
+ width: 100%;
576
+ background: transparent;
577
+ border: 1px solid var(--border);
578
+ border-radius: 8px;
579
+ padding: 0.875rem 1rem 0.875rem 2.75rem;
580
+ font-family: 'Geist Mono', monospace;
581
+ font-size: 0.875rem;
582
+ color: var(--text);
583
+ outline: none;
584
+ }
585
+
586
+ .search-input::placeholder {
587
+ color: var(--text-secondary);
588
+ }
589
+
590
+ .search-input:focus {
591
+ border-color: #555;
592
+ }
593
+
594
+ .search-shortcut {
595
+ position: absolute;
596
+ right: 1rem;
597
+ top: 50%;
598
+ transform: translateY(-50%);
599
+ color: var(--text-secondary);
600
+ font-family: 'Geist Mono', monospace;
601
+ font-size: 0.75rem;
602
+ border: 1px solid var(--border);
603
+ border-radius: 4px;
604
+ padding: 0.125rem 0.375rem;
605
+ }
606
+
607
+ /* Tabs */
608
+ .tabs {
609
+ display: flex;
610
+ gap: 1.5rem;
611
+ margin-bottom: 1rem;
612
+ border-bottom: 1px solid var(--border);
613
+ padding-bottom: 0.75rem;
614
+ }
615
+
616
+ .tab {
617
+ background: none;
618
+ border: none;
619
+ color: var(--text-secondary);
620
+ font-family: inherit;
621
+ font-size: 0.875rem;
622
+ cursor: pointer;
623
+ padding: 0;
624
+ }
625
+
626
+ .tab:hover {
627
+ color: var(--text);
628
+ }
629
+
630
+ .tab.active {
631
+ color: var(--text);
632
+ text-decoration: underline;
633
+ text-underline-offset: 0.5rem;
634
+ }
635
+
636
+ .tab-count {
637
+ color: var(--text-secondary);
638
+ }
639
+
640
+ /* Table */
641
+ .leaderboard-table {
642
+ width: 100%;
643
+ border-collapse: collapse;
644
+ }
645
+
646
+ .leaderboard-table th {
647
+ text-align: left;
648
+ font-size: 0.75rem;
649
+ letter-spacing: 0.1em;
650
+ text-transform: uppercase;
651
+ color: var(--text-secondary);
652
+ font-weight: 500;
653
+ padding: 0.75rem 0;
654
+ border-bottom: 1px solid var(--border);
655
+ }
656
+
657
+ .leaderboard-table th:last-child {
658
+ text-align: right;
659
+ }
660
+
661
+ .leaderboard-table td {
662
+ padding: 1rem 0;
663
+ border-bottom: 1px solid var(--border);
664
+ vertical-align: middle;
665
+ }
666
+
667
+ .leaderboard-table tr:hover td {
668
+ background: var(--hover);
669
+ }
670
+
671
+ .rank {
672
+ width: 50px;
673
+ color: var(--text-secondary);
674
+ font-family: 'Geist Mono', monospace;
675
+ }
676
+
677
+ .skill {
678
+ display: flex;
679
+ flex-direction: column;
680
+ gap: 0.125rem;
681
+ }
682
+
683
+ .skill-info {
684
+ display: flex;
685
+ flex-direction: column;
686
+ gap: 0.125rem;
687
+ }
688
+
689
+ .skill-name {
690
+ font-weight: 500;
691
+ }
692
+
693
+ .skill-repo {
694
+ font-family: 'Geist Mono', monospace;
695
+ font-size: 0.8125rem;
696
+ color: var(--text-secondary);
697
+ }
698
+
699
+ .skillsh-link {
700
+ font-size: 0.75rem;
701
+ color: #58a6ff;
702
+ text-decoration: none;
703
+ }
704
+
705
+ .skillsh-link:hover, .repo-link:hover {
706
+ text-decoration: underline;
707
+ }
708
+
709
+ .repo-link {
710
+ font-size: 0.75rem;
711
+ color: #8b949e;
712
+ text-decoration: none;
713
+ margin-left: 0.25rem;
714
+ }
715
+
716
+ .submitter {
717
+ width: 150px;
718
+ }
719
+
720
+ .submitter-link {
721
+ display: flex;
722
+ align-items: center;
723
+ gap: 0.5rem;
724
+ color: var(--text-secondary);
725
+ text-decoration: none;
726
+ font-size: 0.8125rem;
727
+ }
728
+
729
+ .submitter-link:hover {
730
+ color: var(--text);
731
+ }
732
+
733
+ .submitter-avatar {
734
+ width: 20px;
735
+ height: 20px;
736
+ border-radius: 50%;
737
+ }
738
+
739
+ .no-submitter {
740
+ color: var(--text-secondary);
741
+ }
742
+
743
+ .accuracy {
744
+ text-align: right;
745
+ font-family: 'Geist Mono', monospace;
746
+ font-weight: 500;
747
+ }
748
+
749
+ .security {
750
+ text-align: right;
751
+ font-family: 'Geist Mono', monospace;
752
+ color: var(--text-secondary);
753
+ }
754
+
755
+ .composite {
756
+ text-align: right;
757
+ font-family: 'Geist Mono', monospace;
758
+ font-weight: 500;
759
+ }
760
+
761
+ .security-warning {
762
+ color: #d29922;
763
+ font-size: 0.625rem;
764
+ }
765
+
766
+ .security-banner {
767
+ background: rgba(210, 153, 34, 0.1);
768
+ border: 1px solid rgba(210, 153, 34, 0.3);
769
+ color: #d29922;
770
+ padding: 0.75rem 1rem;
771
+ border-radius: 8px;
772
+ margin-bottom: 1.5rem;
773
+ font-size: 0.875rem;
774
+ }
775
+
776
+ /* Empty state */
777
+ .empty-state {
778
+ text-align: center;
779
+ padding: 4rem 2rem;
780
+ color: var(--text-secondary);
781
+ }
782
+
783
+ .empty-state p {
784
+ margin-bottom: 2rem;
785
+ }
786
+
787
+ .empty-cta {
788
+ display: flex;
789
+ flex-direction: column;
790
+ align-items: center;
791
+ gap: 0.75rem;
792
+ }
793
+
794
+ .empty-cta code {
795
+ background: #0a0a0a;
796
+ border: 1px solid var(--border);
797
+ padding: 0.75rem 1.25rem;
798
+ border-radius: 8px;
799
+ font-family: 'Geist Mono', monospace;
800
+ font-size: 0.875rem;
801
+ }
802
+
803
+ /* Footer */
804
+ footer {
805
+ margin-top: 4rem;
806
+ padding: 2rem 0;
807
+ border-top: 1px solid var(--border);
808
+ text-align: center;
809
+ color: var(--text-secondary);
810
+ font-size: 0.8125rem;
811
+ }
812
+
813
+ footer a {
814
+ color: var(--text);
815
+ text-decoration: none;
816
+ }
817
+
818
+ footer a:hover {
819
+ text-decoration: underline;
820
+ }
821
+
822
+ /* Responsive */
823
+ @media (max-width: 768px) {
824
+ .hero {
825
+ grid-template-columns: 1fr;
826
+ gap: 2rem;
827
+ }
828
+
829
+ .logo-text {
830
+ font-size: 2.5rem;
831
+ }
832
+
833
+ .hero-right p {
834
+ font-size: 1.125rem;
835
+ }
836
+
837
+ .install-section {
838
+ grid-template-columns: 1fr;
839
+ gap: 2rem;
840
+ }
841
+
842
+ .agents-list {
843
+ flex-wrap: wrap;
844
+ }
845
+ }
846
+ </style>
847
+ </head>
848
+ <body>
849
+ ${renderNav(currentUser)}
850
+
851
+ <div class="container">
852
+ <!-- Hero -->
853
+ <section class="hero">
854
+ <div class="hero-left">
855
+ <div>
856
+ <div class="logo-text">SKILLMARK</div>
857
+ <div class="logo-subtitle">The Agent Skill Benchmarking Platform</div>
858
+ </div>
859
+ </div>
860
+ <div class="hero-right">
861
+ <p>Benchmark your AI agent skills with detailed metrics. Compare accuracy, token usage, and cost across models.</p>
862
+ </div>
863
+ </section>
864
+
865
+ <!-- Install -->
866
+ <section class="install-section">
867
+ <div class="install-box">
868
+ <h3>Install in One Command</h3>
869
+ <div class="install-command">
870
+ <span class="dollar">$</span>
871
+ <code>npx skillmark run &lt;skill-path&gt;</code>
872
+ <button class="copy-btn" onclick="navigator.clipboard.writeText('npx skillmark run')">
873
+ <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
874
+ <rect x="9" y="9" width="13" height="13" rx="2" ry="2"></rect>
875
+ <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"></path>
876
+ </svg>
877
+ </button>
878
+ </div>
879
+ </div>
880
+ <div class="install-box">
881
+ <h3>Compatible with These Agents</h3>
882
+ <div class="agents-list">
883
+ <svg class="agent-icon" viewBox="0 0 24 24" fill="currentColor"><path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 17.93c-3.95-.49-7-3.85-7-7.93 0-.62.08-1.21.21-1.79L9 15v1c0 1.1.9 2 2 2v1.93zm6.9-2.54c-.26-.81-1-1.39-1.9-1.39h-1v-3c0-.55-.45-1-1-1H8v-2h2c.55 0 1-.45 1-1V7h2c1.1 0 2-.9 2-2v-.41c2.93 1.19 5 4.06 5 7.41 0 2.08-.8 3.97-2.1 5.39z"/></svg>
884
+ <svg class="agent-icon" viewBox="0 0 24 24" fill="currentColor"><path d="M12 2L2 7l10 5 10-5-10-5zM2 17l10 5 10-5M2 12l10 5 10-5"/></svg>
885
+ <svg class="agent-icon" viewBox="0 0 24 24" fill="currentColor"><path d="M20 4H4c-1.1 0-2 .9-2 2v12c0 1.1.9 2 2 2h16c1.1 0 2-.9 2-2V6c0-1.1-.9-2-2-2zm0 14H4V6h16v12zM6 10h2v2H6zm0 4h8v2H6zm10 0h2v2h-2zm-6-4h8v2h-8z"/></svg>
886
+ <svg class="agent-icon" viewBox="0 0 24 24" fill="currentColor"><path d="M12 17.27L18.18 21l-1.64-7.03L22 9.24l-7.19-.61L12 2 9.19 8.63 2 9.24l5.46 4.73L5.82 21z"/></svg>
887
+ </div>
888
+ </div>
889
+ </section>
890
+
891
+ <!-- Leaderboard -->
892
+ <section class="leaderboard-section">
893
+ <h2>Skills Leaderboard</h2>
894
+
895
+ <div class="search-container">
896
+ <svg class="search-icon" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
897
+ <circle cx="11" cy="11" r="8"></circle>
898
+ <line x1="21" y1="21" x2="16.65" y2="16.65"></line>
899
+ </svg>
900
+ <input type="text" class="search-input" placeholder="Search skills..." id="search">
901
+ <span class="search-shortcut">/</span>
902
+ </div>
903
+
904
+ <div class="tabs">
905
+ <button class="tab active">All Time <span class="tab-count">(${entries.length.toLocaleString()})</span></button>
906
+ <button class="tab">By Accuracy</button>
907
+ <button class="tab">By Tokens</button>
908
+ <button class="tab">By Cost</button>
909
+ </div>
910
+
911
+ ${entries.length > 0 ? `
912
+ <table class="leaderboard-table">
913
+ <thead>
914
+ <tr>
915
+ <th>#</th>
916
+ <th>Skill</th>
917
+ <th>Submitter</th>
918
+ <th>Security</th>
919
+ <th>Composite</th>
920
+ <th>Accuracy</th>
921
+ </tr>
922
+ </thead>
923
+ <tbody>
924
+ ${rows}
925
+ </tbody>
926
+ </table>
927
+ ` : `
928
+ <div class="empty-state">
929
+ <p>No benchmark results yet.</p>
930
+ <div class="empty-cta">
931
+ <code>npx skillmark run &lt;skill-path&gt;</code>
932
+ <code>npx skillmark publish ./result.json --api-key &lt;key&gt;</code>
933
+ </div>
934
+ </div>
935
+ `}
936
+ </section>
937
+
938
+ <footer>
939
+ <p>
940
+ Built with <a href="https://github.com/claudekit/skillmark">Skillmark</a> ·
941
+ <a href="https://www.npmjs.com/package/skillmark">npm</a> ·
942
+ <a href="https://github.com/claudekit/skillmark">GitHub</a> ·
943
+ by <a href="https://claudekit.cc">ClaudeKit.cc</a>
944
+ </p>
945
+ </footer>
946
+ </div>
947
+
948
+ <script>
949
+ // Keyboard shortcut for search
950
+ document.addEventListener('keydown', (e) => {
951
+ if (e.key === '/' && document.activeElement.tagName !== 'INPUT') {
952
+ e.preventDefault();
953
+ document.getElementById('search').focus();
954
+ }
955
+ });
956
+
957
+ // Search functionality
958
+ document.getElementById('search').addEventListener('input', (e) => {
959
+ const query = e.target.value.toLowerCase();
960
+ document.querySelectorAll('.leaderboard-table tbody tr').forEach(row => {
961
+ const text = row.textContent.toLowerCase();
962
+ row.style.display = text.includes(query) ? '' : 'none';
963
+ });
964
+ });
965
+ </script>
966
+ </body>
967
+ </html>`;
968
+ }
969
+
970
+ /**
971
+ * Render error page - Vercel style
972
+ */
973
+ function renderErrorPage(message: string): string {
974
+ return `<!DOCTYPE html>
975
+ <html lang="en">
976
+ <head>
977
+ <meta charset="UTF-8">
978
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
979
+ <title>Error - Skillmark</title>
980
+ <link rel="icon" type="image/png" href="/favicon.png">
981
+ <link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500&display=swap" rel="stylesheet">
982
+ <style>
983
+ * { box-sizing: border-box; margin: 0; padding: 0; }
984
+ body {
985
+ font-family: 'Geist', -apple-system, sans-serif;
986
+ background: #000;
987
+ color: #ededed;
988
+ display: flex;
989
+ justify-content: center;
990
+ align-items: center;
991
+ min-height: 100vh;
992
+ -webkit-font-smoothing: antialiased;
993
+ }
994
+ .error {
995
+ text-align: center;
996
+ padding: 2rem;
997
+ }
998
+ h1 {
999
+ font-size: 1.5rem;
1000
+ font-weight: 500;
1001
+ margin-bottom: 0.5rem;
1002
+ }
1003
+ p { color: #888; margin-bottom: 1.5rem; }
1004
+ a {
1005
+ color: #ededed;
1006
+ text-decoration: underline;
1007
+ text-underline-offset: 4px;
1008
+ }
1009
+ </style>
1010
+ </head>
1011
+ <body>
1012
+ <div class="error">
1013
+ <h1>Something went wrong</h1>
1014
+ <p>${escapeHtml(message)}</p>
1015
+ <a href="/">Back to leaderboard</a>
1016
+ </div>
1017
+ </body>
1018
+ </html>`;
1019
+ }
1020
+
1021
+ /**
1022
+ * Render Getting Started / Docs page
1023
+ */
1024
+ function renderDocsPage(): string {
1025
+ return renderDocLayout('Getting Started', `
1026
+ <section class="doc-section">
1027
+ <h2>Installation</h2>
1028
+ <p>Install Skillmark globally or use npx:</p>
1029
+ <pre><code>npm install -g skillmark
1030
+ # or
1031
+ npx skillmark</code></pre>
1032
+ </section>
1033
+
1034
+ <section class="doc-section">
1035
+ <h2>Requirements</h2>
1036
+ <ul>
1037
+ <li><strong>Claude Code CLI</strong> - Skillmark runs benchmarks using Claude Code locally</li>
1038
+ <li><strong>Claude Max subscription</strong> - Required for Claude Code API access</li>
1039
+ </ul>
1040
+ <p>All benchmarks run 100% locally on your machine.</p>
1041
+ </section>
1042
+
1043
+ <section class="doc-section">
1044
+ <h2>Quick Start</h2>
1045
+ <p>Run your first benchmark in 3 steps:</p>
1046
+
1047
+ <h3>1. Test Files (Auto-generated)</h3>
1048
+ <p>Skillmark auto-generates test files based on your skill's SKILL.md. Just run:</p>
1049
+ <pre><code>skillmark run ./my-skill</code></pre>
1050
+ <p>Or create tests manually with YAML frontmatter:</p>
1051
+ <pre><code>---
1052
+ name: my-first-test
1053
+ type: knowledge
1054
+ concepts:
1055
+ - concept-one
1056
+ - concept-two
1057
+ timeout: 120
1058
+ ---
1059
+
1060
+ # Prompt
1061
+ Your question or task here.
1062
+
1063
+ # Expected
1064
+ - [ ] First expected outcome
1065
+ - [ ] Second expected outcome</code></pre>
1066
+
1067
+ <h3>2. Run the benchmark</h3>
1068
+ <pre><code>skillmark run ./my-skill --tests ./tests --model sonnet --runs 3</code></pre>
1069
+
1070
+ <h3>3. View results</h3>
1071
+ <p>Results are saved to <code>./skillmark-results/</code>:</p>
1072
+ <ul>
1073
+ <li><code>result.json</code> - Machine-readable metrics</li>
1074
+ <li><code>report.md</code> - Human-readable report</li>
1075
+ </ul>
1076
+ </section>
1077
+
1078
+ <section class="doc-section">
1079
+ <h2>CLI Commands</h2>
1080
+ <table>
1081
+ <tr><td><code>skillmark run &lt;skill&gt;</code></td><td>Run benchmark against a skill</td></tr>
1082
+ <tr><td><code>skillmark publish &lt;result&gt;</code></td><td>Upload results to leaderboard</td></tr>
1083
+ <tr><td><code>skillmark leaderboard</code></td><td>View skill rankings</td></tr>
1084
+ </table>
1085
+ </section>
1086
+
1087
+ <section class="doc-section">
1088
+ <h2>Options</h2>
1089
+ <table>
1090
+ <tr><td><code>--tests &lt;path&gt;</code></td><td>Path to test suite (default: ./tests)</td></tr>
1091
+ <tr><td><code>--model &lt;model&gt;</code></td><td>haiku | sonnet | opus (default: opus)</td></tr>
1092
+ <tr><td><code>--runs &lt;n&gt;</code></td><td>Number of iterations (default: 3)</td></tr>
1093
+ <tr><td><code>--output &lt;dir&gt;</code></td><td>Output directory (default: ./skillmark-results)</td></tr>
1094
+ <tr><td><code>--publish</code></td><td>Auto-publish results to leaderboard</td></tr>
1095
+ </table>
1096
+ </section>
1097
+
1098
+ <section class="doc-section">
1099
+ <h2>Publishing Results</h2>
1100
+ <h3>1. Get API Key</h3>
1101
+ <p><a href="/login">Login with GitHub</a> to get your API key from the dashboard.</p>
1102
+
1103
+ <h3>2. Save API Key</h3>
1104
+ <pre><code># Option 1: Environment variable
1105
+ export SKILLMARK_API_KEY=sk_your_key
1106
+
1107
+ # Option 2: Config file
1108
+ echo "api_key=sk_your_key" > ~/.skillmarkrc</code></pre>
1109
+
1110
+ <h3>3. Publish</h3>
1111
+ <pre><code># Auto-publish after benchmark
1112
+ skillmark run ./my-skill --publish
1113
+
1114
+ # Or publish existing results
1115
+ skillmark publish ./skillmark-results/result.json</code></pre>
1116
+ </section>
1117
+ `);
1118
+ }
1119
+
1120
+ /**
1121
+ * Render How It Works page
1122
+ */
1123
+ function renderHowItWorksPage(): string {
1124
+ return renderDocLayout('How It Works', `
1125
+ <section class="doc-section">
1126
+ <h2>Overview</h2>
1127
+ <p>Skillmark benchmarks AI agent skills by running standardized tests and measuring key metrics:</p>
1128
+ <ul>
1129
+ <li><strong>Accuracy</strong> - Percentage of expected concepts matched</li>
1130
+ <li><strong>Tokens</strong> - Total tokens consumed (input + output). Lower = more efficient</li>
1131
+ <li><strong>Duration</strong> - Wall-clock execution time</li>
1132
+ <li><strong>Cost</strong> - Estimated API cost in USD</li>
1133
+ <li><strong>Tool Calls</strong> - Number of tool invocations</li>
1134
+ <li><strong>Model</strong> - Claude model used (haiku, sonnet, opus)</li>
1135
+ </ul>
1136
+ </section>
1137
+
1138
+ <section class="doc-section">
1139
+ <h2>Test Types</h2>
1140
+ <table>
1141
+ <tr><td><code>knowledge</code></td><td>Q&A style tests checking if response covers expected concepts</td></tr>
1142
+ <tr><td><code>task</code></td><td>Execution tests verifying tool usage and task completion</td></tr>
1143
+ <tr><td><code>security</code></td><td>Security tests checking refusal of malicious prompts and absence of forbidden content</td></tr>
1144
+ </table>
1145
+ </section>
1146
+
1147
+ <section class="doc-section">
1148
+ <h2>Scoring</h2>
1149
+ <p>Accuracy is calculated by matching response content against expected concepts:</p>
1150
+ <pre><code>accuracy = (matched_concepts / total_concepts) × 100%</code></pre>
1151
+ <p>The scorer uses fuzzy matching to handle variations like plurals, hyphens, and common abbreviations.</p>
1152
+ </section>
1153
+
1154
+ <section class="doc-section">
1155
+ <h2>Token Efficiency</h2>
1156
+ <p>Token usage is captured from Claude Code CLI transcript after each run:</p>
1157
+ <ul>
1158
+ <li><strong>Input tokens</strong> - Prompt + context sent to Claude</li>
1159
+ <li><strong>Output tokens</strong> - Claude's response + tool calls</li>
1160
+ <li><strong>Total tokens</strong> - Input + Output (used for efficiency ranking)</li>
1161
+ </ul>
1162
+ <p>Skills achieving same accuracy with fewer tokens rank higher in token efficiency.</p>
1163
+ </section>
1164
+
1165
+ <section class="doc-section">
1166
+ <h2>Skill Sources</h2>
1167
+ <p>Skillmark supports multiple skill sources:</p>
1168
+ <table>
1169
+ <tr><td><strong>Local</strong></td><td><code>./my-skill</code> or <code>~/.claude/skills/my-skill</code></td></tr>
1170
+ <tr><td><strong>Git</strong></td><td><code>https://github.com/user/skill-repo</code></td></tr>
1171
+ <tr><td><strong>skill.sh</strong></td><td><code>skill.sh/user/skill-name</code></td></tr>
1172
+ </table>
1173
+ </section>
1174
+
1175
+ <section class="doc-section">
1176
+ <h2>Publishing Results</h2>
1177
+ <p>Share your benchmark results on the public leaderboard:</p>
1178
+ <pre><code>skillmark publish ./skillmark-results/result.json --api-key YOUR_KEY</code></pre>
1179
+ <p>Results include a verification hash to prevent tampering.</p>
1180
+ </section>
1181
+
1182
+ <section class="doc-section">
1183
+ <h2>Architecture</h2>
1184
+ <pre><code>┌─────────────┐ ┌─────────────┐ ┌─────────────┐
1185
+ │ CLI │────▶│ Claude │────▶│ Results │
1186
+ │ skillmark │ │ Engine │ │ JSON + MD │
1187
+ └─────────────┘ └─────────────┘ └──────┬──────┘
1188
+
1189
+
1190
+ ┌─────────────┐
1191
+ │ Cloudflare │
1192
+ │ Workers+D1 │
1193
+ └─────────────┘</code></pre>
1194
+ </section>
1195
+
1196
+ <section class="doc-section">
1197
+ <h2>Enhanced Test Generation</h2>
1198
+ <p>Skillmark uses an enhanced test generation flow when no tests exist:</p>
1199
+ <pre><code>┌─────────────┐ ┌─────────────────────────────────────┐
1200
+ │ SKILL.md │────▶│ skill-creator + @claude-code-guide │
1201
+ └─────────────┘ └─────────────────────────────────────┘
1202
+
1203
+ ┌───────────────┼───────────────┐
1204
+ ▼ (success) ▼ (fails) │
1205
+ ┌─────────────┐ ┌─────────────┐ │
1206
+ │ Enhanced │ │ Basic │ │
1207
+ │ Prompt │ │ Prompt │ │
1208
+ └──────┬──────┘ └──────┬──────┘ │
1209
+ └───────────┬────┘ │
1210
+ ▼ │
1211
+ ┌─────────────┐ │
1212
+ │ Test Files │◀─────────────┘
1213
+ └─────────────┘</code></pre>
1214
+ </section>
1215
+
1216
+ <section class="doc-section">
1217
+ <h2>skill-creator Skill</h2>
1218
+ <p>The <code>skill-creator</code> skill analyzes SKILL.md to extract structured metadata:</p>
1219
+ <table>
1220
+ <tr><td><strong>capabilities</strong></td><td>Core capabilities (3-6 items)</td></tr>
1221
+ <tr><td><strong>keyConcepts</strong></td><td>Key topics/keywords (5-10 items)</td></tr>
1222
+ <tr><td><strong>edgeCases</strong></td><td>Failure scenarios to test (3-5 items)</td></tr>
1223
+ <tr><td><strong>testingPatterns</strong></td><td>Claude Code testing best practices</td></tr>
1224
+ <tr><td><strong>toolInvocations</strong></td><td>Expected tool calls</td></tr>
1225
+ </table>
1226
+ <p>If skill-creator is not installed, Skillmark auto-installs it via:</p>
1227
+ <pre><code>npx skills add https://github.com/anthropics/claudekit-skills --skill skill-creator</code></pre>
1228
+ </section>
1229
+
1230
+ <section class="doc-section">
1231
+ <h2>claude-code-guide Subagent</h2>
1232
+ <p>The <code>@claude-code-guide</code> subagent provides Claude Code-specific testing patterns:</p>
1233
+ <ul>
1234
+ <li>Skill invocation patterns and best practices</li>
1235
+ <li>Common failure modes and edge cases</li>
1236
+ <li>Tool usage expectations (Read, Write, Bash, etc.)</li>
1237
+ <li>Testing patterns for knowledge vs task tests</li>
1238
+ </ul>
1239
+ <p>It's referenced via prompt engineering in skill-creator:</p>
1240
+ <pre><code>Use @"claude-code-guide (agent)" to understand Claude Code CLI patterns...</code></pre>
1241
+ <p>Claude's built-in subagent routing handles the reference automatically.</p>
1242
+ </section>
1243
+
1244
+ <section class="doc-section">
1245
+ <h2>Security & Composite Scoring</h2>
1246
+ <p>Security tests use a dual scoring model:</p>
1247
+ <ul>
1248
+ <li><strong>Refusal Rate</strong> - % of expected refusal patterns matched (higher is better)</li>
1249
+ <li><strong>Leakage Rate</strong> - % of forbidden patterns found in response (lower is better)</li>
1250
+ <li><strong>Security Score</strong> = refusalRate × (1 - leakageRate / 100)</li>
1251
+ </ul>
1252
+ <p>The <strong>composite score</strong> used for leaderboard ranking:</p>
1253
+ <pre><code>composite = accuracy × 0.80 + securityScore × 0.20</code></pre>
1254
+ <p>This weights functional correctness (80%) higher while still rewarding security (20%).</p>
1255
+ </section>
1256
+
1257
+ <section class="doc-section">
1258
+ <h2>CLI Commands</h2>
1259
+ <table>
1260
+ <tr><td><code>skillmark run &lt;skill&gt;</code></td><td>Run benchmark against a skill</td></tr>
1261
+ <tr><td><code>skillmark generate-tests &lt;skill&gt;</code></td><td>Generate test files from SKILL.md without running benchmarks</td></tr>
1262
+ <tr><td><code>skillmark publish &lt;result&gt;</code></td><td>Upload results to leaderboard</td></tr>
1263
+ <tr><td><code>skillmark auth</code></td><td>Setup Claude CLI authentication</td></tr>
1264
+ <tr><td><code>skillmark login &lt;key&gt;</code></td><td>Save API key for publishing</td></tr>
1265
+ <tr><td><code>skillmark leaderboard</code></td><td>View skill rankings</td></tr>
1266
+ </table>
1267
+ <h3>Key Run Options</h3>
1268
+ <table>
1269
+ <tr><td><code>-m, --model</code></td><td>Model to use (haiku|sonnet|opus, default: opus)</td></tr>
1270
+ <tr><td><code>-g, --generate-tests</code></td><td>Force regenerate tests from SKILL.md</td></tr>
1271
+ <tr><td><code>-c, --prompt-context</code></td><td>Additional prompt for test generation</td></tr>
1272
+ <tr><td><code>--parallel</code></td><td>Run tests in parallel</td></tr>
1273
+ <tr><td><code>--generate-model</code></td><td>Model for test generation (default: opus)</td></tr>
1274
+ <tr><td><code>-r, --runs</code></td><td>Number of iterations (default: 3)</td></tr>
1275
+ </table>
1276
+ <p>All test timeouts are automatically doubled (2x) to give agent skills adequate execution time.</p>
1277
+ </section>
1278
+
1279
+ <section class="doc-section">
1280
+ <h2>Git Repository Detection</h2>
1281
+ <p>Skillmark auto-detects the git remote URL from skill directories and includes it in benchmark results.
1282
+ This URL is displayed on the leaderboard, linking directly to the skill's source repository.</p>
1283
+ </section>
1284
+
1285
+ <section class="doc-section">
1286
+ <h2>Error Handling</h2>
1287
+ <p>Skillmark uses retry-then-degrade pattern for robustness:</p>
1288
+ <table>
1289
+ <tr><td><strong>skill-creator succeeds</strong></td><td>Enhanced prompt with analysis</td></tr>
1290
+ <tr><td><strong>skill-creator fails (1 retry)</strong></td><td>Degrades to basic prompt</td></tr>
1291
+ <tr><td><strong>Claude CLI fails</strong></td><td>Generates single fallback test</td></tr>
1292
+ </table>
1293
+ <p>This ensures test generation always succeeds, even if enhanced analysis fails.</p>
1294
+ </section>
1295
+ `);
1296
+ }
1297
+
1298
+ /**
1299
+ * Shared layout for documentation pages
1300
+ */
1301
+ function renderDocLayout(title: string, content: string): string {
1302
+ return `<!DOCTYPE html>
1303
+ <html lang="en">
1304
+ <head>
1305
+ <meta charset="UTF-8">
1306
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1307
+ <title>${title} - Skillmark</title>
1308
+ <link rel="icon" type="image/png" href="/favicon.png">
1309
+ <link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=Geist+Mono:wght@400;500&display=swap" rel="stylesheet">
1310
+ <style>
1311
+ * { box-sizing: border-box; margin: 0; padding: 0; }
1312
+ :root { --bg: #000; --text: #ededed; --text-secondary: #888; --border: #333; }
1313
+ body { font-family: 'Geist', -apple-system, sans-serif; background: var(--bg); color: var(--text); line-height: 1.6; -webkit-font-smoothing: antialiased; }
1314
+ nav { display: flex; align-items: center; justify-content: space-between; padding: 1rem 1.5rem; border-bottom: 1px solid var(--border); }
1315
+ .nav-left { display: flex; align-items: center; gap: 0.5rem; }
1316
+ .nav-left a { color: var(--text); text-decoration: none; display: flex; align-items: center; gap: 0.5rem; }
1317
+ .nav-divider { color: var(--text-secondary); }
1318
+ .nav-right { display: flex; gap: 1.5rem; }
1319
+ .nav-right a { color: var(--text-secondary); text-decoration: none; font-size: 0.875rem; }
1320
+ .nav-right a:hover, .nav-right a.active { color: var(--text); }
1321
+ .container { max-width: 800px; margin: 0 auto; padding: 3rem 1.5rem; }
1322
+ h1 { font-size: 2.5rem; font-weight: 600; margin-bottom: 2rem; }
1323
+ .doc-section { margin-bottom: 3rem; }
1324
+ .doc-section h2 { font-size: 1.25rem; font-weight: 600; margin-bottom: 1rem; color: var(--text); }
1325
+ .doc-section h3 { font-size: 1rem; font-weight: 500; margin: 1.5rem 0 0.5rem; color: var(--text); }
1326
+ .doc-section p { color: var(--text-secondary); margin-bottom: 1rem; }
1327
+ .doc-section ul { color: var(--text-secondary); margin-left: 1.5rem; margin-bottom: 1rem; }
1328
+ .doc-section li { margin-bottom: 0.5rem; }
1329
+ .doc-section strong { color: var(--text); }
1330
+ pre { background: #0a0a0a; border: 1px solid var(--border); border-radius: 8px; padding: 1rem; overflow-x: auto; margin-bottom: 1rem; }
1331
+ code { font-family: 'Geist Mono', monospace; font-size: 0.875rem; }
1332
+ p code { background: #1a1a1a; padding: 0.125rem 0.375rem; border-radius: 4px; }
1333
+ table { width: 100%; border-collapse: collapse; margin-bottom: 1rem; }
1334
+ table td { padding: 0.75rem 0; border-bottom: 1px solid var(--border); color: var(--text-secondary); }
1335
+ table td:first-child { color: var(--text); width: 40%; }
1336
+ footer { margin-top: 3rem; padding: 2rem 0; border-top: 1px solid var(--border); text-align: center; color: var(--text-secondary); font-size: 0.8125rem; }
1337
+ footer a { color: var(--text); text-decoration: none; }
1338
+ </style>
1339
+ </head>
1340
+ <body>
1341
+ <nav>
1342
+ <div class="nav-left">
1343
+ <a href="/">
1344
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5"><path d="M22 12h-4l-3 9L9 3l-3 9H2"/></svg>
1345
+ <span>Skillmark</span>
1346
+ </a>
1347
+ </div>
1348
+ <div class="nav-right">
1349
+ <a href="/docs">Docs</a>
1350
+ <a href="/how-it-works">How It Works</a>
1351
+ <a href="https://github.com/claudekit/skillmark" title="GitHub"><svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/></svg></a>
1352
+ <a href="/login">Login</a>
1353
+ </div>
1354
+ </nav>
1355
+ <div class="container">
1356
+ <h1>${title}</h1>
1357
+ ${content}
1358
+ <footer>
1359
+ <a href="https://github.com/claudekit/skillmark">Skillmark</a> · Built for AI agent developers · by <a href="https://claudekit.cc">ClaudeKit.cc</a>
1360
+ </footer>
1361
+ </div>
1362
+ </body>
1363
+ </html>`;
1364
+ }
1365
+
1366
+ /** Result row for skill detail page */
1367
+ interface SkillResultRow {
1368
+ id: string;
1369
+ accuracy: number;
1370
+ model: string;
1371
+ tokensTotal: number;
1372
+ durationMs: number | null;
1373
+ costUsd: number;
1374
+ toolCount: number | null;
1375
+ securityScore: number | null;
1376
+ createdAt: string | null;
1377
+ submitterGithub: string | null;
1378
+ skillshLink: string | null;
1379
+ testFiles: Array<{ name: string; content: string }> | null;
1380
+ }
1381
+
1382
+ /** Normalized radar chart metrics (all 0-100) */
1383
+ interface RadarMetrics {
1384
+ accuracy: number;
1385
+ security: number;
1386
+ tokenEfficiency: number;
1387
+ costEfficiency: number;
1388
+ speed: number;
1389
+ }
1390
+
1391
+ /**
1392
+ * Compute normalized radar metrics from skill data and results
1393
+ */
1394
+ function computeRadarMetrics(skill: LeaderboardRow, results: SkillResultRow[]): RadarMetrics {
1395
+ const durResults = results.filter(r => r.durationMs != null && r.durationMs > 0);
1396
+ const avgDuration = durResults.length > 0
1397
+ ? durResults.reduce((s, r) => s + (r.durationMs as number), 0) / durResults.length
1398
+ : 0;
1399
+
1400
+ return {
1401
+ accuracy: Math.max(0, Math.min(100, skill.bestAccuracy)),
1402
+ security: Math.max(0, Math.min(100, skill.bestSecurity ?? 0)),
1403
+ // 0 tokens = 100, 10K+ tokens = 0
1404
+ tokenEfficiency: Math.max(0, Math.min(100, 100 - (skill.avgTokens / 10000) * 100)),
1405
+ // $0 = 100, $0.10+ = 0
1406
+ costEfficiency: Math.max(0, Math.min(100, 100 - (skill.avgCost / 0.10) * 100)),
1407
+ // 0s = 100, 60s+ = 0
1408
+ speed: Math.max(0, Math.min(100, 100 - (avgDuration / 60000) * 100)),
1409
+ };
1410
+ }
1411
+
1412
+ /**
1413
+ * Render SVG radar chart for performance profile
1414
+ */
1415
+ function renderRadarChart(metrics: RadarMetrics): string {
1416
+ const cx = 180, cy = 160, maxR = 110;
1417
+ const labels = ['Accuracy', 'Security', 'Tokens', 'Cost', 'Speed'];
1418
+ const values = [metrics.accuracy, metrics.security, metrics.tokenEfficiency, metrics.costEfficiency, metrics.speed];
1419
+
1420
+ // 5 axes, starting from top (-90°), clockwise
1421
+ const angles = labels.map((_, i) => (-90 + i * 72) * Math.PI / 180);
1422
+
1423
+ function point(angle: number, r: number): string {
1424
+ return `${(cx + r * Math.cos(angle)).toFixed(1)},${(cy + r * Math.sin(angle)).toFixed(1)}`;
1425
+ }
1426
+
1427
+ function polygon(r: number): string {
1428
+ return angles.map(a => point(a, r)).join(' ');
1429
+ }
1430
+
1431
+ // Grid lines (25%, 50%, 75%, 100%)
1432
+ const gridLines = [0.25, 0.5, 0.75, 1.0].map(pct =>
1433
+ `<polygon points="${polygon(maxR * pct)}" fill="none" stroke="#333" stroke-width="0.5"/>`
1434
+ ).join('');
1435
+
1436
+ // Axis lines
1437
+ const axisLines = angles.map(a =>
1438
+ `<line x1="${cx}" y1="${cy}" x2="${point(a, maxR).split(',')[0]}" y2="${point(a, maxR).split(',')[1]}" stroke="#333" stroke-width="0.5"/>`
1439
+ ).join('');
1440
+
1441
+ // Data polygon
1442
+ const dataPoints = values.map((v, i) => point(angles[i], (v / 100) * maxR));
1443
+ const dataPolygon = `<polygon points="${dataPoints.join(' ')}" fill="rgba(88,166,255,0.15)" stroke="#58a6ff" stroke-width="1.5"/>`;
1444
+
1445
+ // Data dots
1446
+ const dataDots = dataPoints.map(p => {
1447
+ const [x, y] = p.split(',');
1448
+ return `<circle cx="${x}" cy="${y}" r="3" fill="#58a6ff"/>`;
1449
+ }).join('');
1450
+
1451
+ // Labels with values
1452
+ const labelOffset = 24;
1453
+ const labelElements = labels.map((label, i) => {
1454
+ const angle = angles[i];
1455
+ const lx = cx + (maxR + labelOffset) * Math.cos(angle);
1456
+ const ly = cy + (maxR + labelOffset) * Math.sin(angle);
1457
+ const anchor = Math.abs(lx - cx) < 5 ? 'middle' : lx > cx ? 'start' : 'end';
1458
+ const val = values[i].toFixed(0);
1459
+ return `<text x="${lx.toFixed(1)}" y="${ly.toFixed(1)}" text-anchor="${anchor}" dominant-baseline="middle" class="radar-label">${label}</text>
1460
+ <text x="${lx.toFixed(1)}" y="${(ly + 13).toFixed(1)}" text-anchor="${anchor}" dominant-baseline="middle" class="radar-value">${val}</text>`;
1461
+ }).join('\n ');
1462
+
1463
+ return `<svg viewBox="0 0 360 340" xmlns="http://www.w3.org/2000/svg">
1464
+ ${gridLines}
1465
+ ${axisLines}
1466
+ ${dataPolygon}
1467
+ ${dataDots}
1468
+ ${labelElements}
1469
+ </svg>`;
1470
+ }
1471
+
1472
+ /**
1473
+ * Render skill detail page with result history and test files
1474
+ */
1475
+ function renderSkillDetailPage(skill: LeaderboardRow, results: SkillResultRow[]): string {
1476
+ const latestResult = results[0];
1477
+ const skillshLink = latestResult?.skillshLink || skill.skillshLink;
1478
+
1479
+ // Compute radar chart metrics
1480
+ const radarMetrics = computeRadarMetrics(skill, results);
1481
+ const radarSvg = renderRadarChart(radarMetrics);
1482
+
1483
+ const resultRows = results.map((r, i) => `
1484
+ <tr class="result-row" data-result-id="${escapeHtml(r.id)}">
1485
+ <td class="result-date">${r.createdAt ? formatRelativeTime(new Date(r.createdAt).getTime() / 1000) : '-'}</td>
1486
+ <td class="result-model">${escapeHtml(r.model)}</td>
1487
+ <td class="result-accuracy">${r.accuracy.toFixed(1)}%</td>
1488
+ <td class="result-security">${r.securityScore != null ? r.securityScore.toFixed(0) + '%' : '\u2014'}</td>
1489
+ <td class="result-tokens">${r.tokensTotal?.toLocaleString() || '-'}</td>
1490
+ <td class="result-cost">$${r.costUsd?.toFixed(4) || '-'}</td>
1491
+ <td class="result-submitter">
1492
+ ${r.submitterGithub ? `
1493
+ <a href="https://github.com/${escapeHtml(r.submitterGithub)}" class="submitter-link" onclick="event.stopPropagation()">
1494
+ <img src="https://github.com/${escapeHtml(r.submitterGithub)}.png?size=20" alt="" class="submitter-avatar-sm">
1495
+ @${escapeHtml(r.submitterGithub)}
1496
+ </a>
1497
+ ` : '-'}
1498
+ </td>
1499
+ </tr>
1500
+ <tr class="result-detail" data-result-id="${escapeHtml(r.id)}">
1501
+ <td colspan="7"><span class="detail-placeholder">Loading...</span></td>
1502
+ </tr>
1503
+ `).join('');
1504
+
1505
+ // Render test files viewer if available
1506
+ const testFilesSection = latestResult?.testFiles?.length ? `
1507
+ <section class="test-files-section">
1508
+ <h2>Test Files</h2>
1509
+ <div class="test-files-tabs">
1510
+ ${latestResult.testFiles.map((f, i) => `
1511
+ <button class="test-file-tab ${i === 0 ? 'active' : ''}" data-index="${i}">${escapeHtml(f.name)}</button>
1512
+ `).join('')}
1513
+ </div>
1514
+ <div class="test-files-content">
1515
+ ${latestResult.testFiles.map((f, i) => `
1516
+ <pre class="test-file-content ${i === 0 ? 'active' : ''}" data-index="${i}"><code>${escapeHtml(f.content)}</code></pre>
1517
+ `).join('')}
1518
+ </div>
1519
+ </section>
1520
+ ` : '';
1521
+
1522
+ return `<!DOCTYPE html>
1523
+ <html lang="en">
1524
+ <head>
1525
+ <meta charset="UTF-8">
1526
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1527
+ <title>${escapeHtml(skill.skillName)} - Skillmark</title>
1528
+ <link rel="icon" type="image/png" href="/favicon.png">
1529
+ <link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=Geist+Mono:wght@400;500&display=swap" rel="stylesheet">
1530
+ <style>
1531
+ * { box-sizing: border-box; margin: 0; padding: 0; }
1532
+ :root { --bg: #000; --text: #ededed; --text-secondary: #888; --border: #333; }
1533
+ body { font-family: 'Geist', -apple-system, sans-serif; background: var(--bg); color: var(--text); line-height: 1.6; -webkit-font-smoothing: antialiased; }
1534
+ nav { display: flex; align-items: center; justify-content: space-between; padding: 1rem 1.5rem; border-bottom: 1px solid var(--border); }
1535
+ .nav-left { display: flex; align-items: center; gap: 0.5rem; }
1536
+ .nav-left a { color: var(--text); text-decoration: none; display: flex; align-items: center; gap: 0.5rem; }
1537
+ .nav-right { display: flex; gap: 1.5rem; }
1538
+ .nav-right a { color: var(--text-secondary); text-decoration: none; font-size: 0.875rem; }
1539
+ .nav-right a:hover { color: var(--text); }
1540
+ .container { max-width: 1000px; margin: 0 auto; padding: 3rem 1.5rem; }
1541
+ .breadcrumb { color: var(--text-secondary); font-size: 0.875rem; margin-bottom: 1rem; }
1542
+ .breadcrumb a { color: var(--text-secondary); text-decoration: none; }
1543
+ .breadcrumb a:hover { color: var(--text); }
1544
+ h1 { font-size: 2.5rem; font-weight: 600; margin-bottom: 0.5rem; }
1545
+ .skill-meta { display: flex; gap: 1.5rem; color: var(--text-secondary); font-size: 0.875rem; margin-bottom: 2rem; }
1546
+ .skill-meta a { color: #58a6ff; text-decoration: none; }
1547
+ .skill-meta a:hover { text-decoration: underline; }
1548
+ .stats-grid { display: grid; grid-template-columns: repeat(5, 1fr); gap: 1.5rem; margin-bottom: 3rem; }
1549
+ .stat-card { background: #0a0a0a; border: 1px solid var(--border); border-radius: 8px; padding: 1.25rem; }
1550
+ .stat-label { font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.1em; color: var(--text-secondary); margin-bottom: 0.5rem; }
1551
+ .stat-value { font-family: 'Geist Mono', monospace; font-size: 1.5rem; font-weight: 500; }
1552
+ .section { margin-bottom: 3rem; }
1553
+ .section h2 { font-size: 1rem; text-transform: uppercase; letter-spacing: 0.1em; color: var(--text-secondary); margin-bottom: 1rem; }
1554
+ .results-table { width: 100%; border-collapse: collapse; }
1555
+ .results-table th { text-align: left; font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.1em; color: var(--text-secondary); font-weight: 500; padding: 0.75rem 0; border-bottom: 1px solid var(--border); }
1556
+ .results-table td { padding: 0.75rem 0; border-bottom: 1px solid var(--border); font-size: 0.875rem; }
1557
+ .result-accuracy { font-family: 'Geist Mono', monospace; font-weight: 500; }
1558
+ .result-security { font-family: 'Geist Mono', monospace; color: var(--text-secondary); }
1559
+ .result-tokens, .result-cost { font-family: 'Geist Mono', monospace; color: var(--text-secondary); }
1560
+ .security-warning { color: #d29922; font-size: 0.625rem; }
1561
+ .security-banner { background: rgba(210, 153, 34, 0.1); border: 1px solid rgba(210, 153, 34, 0.3); color: #d29922; padding: 0.75rem 1rem; border-radius: 8px; margin-bottom: 1.5rem; font-size: 0.875rem; }
1562
+ .submitter-link { display: flex; align-items: center; gap: 0.375rem; color: var(--text-secondary); text-decoration: none; font-size: 0.8125rem; }
1563
+ .submitter-link:hover { color: var(--text); }
1564
+ .submitter-avatar-sm { width: 16px; height: 16px; border-radius: 50%; }
1565
+ .radar-section { margin-bottom: 3rem; }
1566
+ .radar-container { display: flex; justify-content: center; padding: 1rem 0; }
1567
+ .radar-container svg { max-width: 350px; width: 100%; }
1568
+ .radar-label { font-family: 'Geist', -apple-system, sans-serif; font-size: 11px; fill: #888; }
1569
+ .radar-value { font-family: 'Geist Mono', monospace; font-size: 10px; fill: #ededed; }
1570
+ .result-row { cursor: pointer; transition: background 0.15s; }
1571
+ .result-row:hover td { background: #111; }
1572
+ .result-row .result-date::before { content: ''; display: inline-block; width: 0; height: 0; border-left: 4px solid var(--text-secondary); border-top: 3px solid transparent; border-bottom: 3px solid transparent; margin-right: 0.5rem; transition: transform 0.2s; }
1573
+ .result-row.expanded .result-date::before { transform: rotate(90deg); }
1574
+ .result-detail { display: none; }
1575
+ .result-detail.active { display: table-row; }
1576
+ .result-detail td { padding: 1rem 0; background: #0a0a0a; border-bottom: 1px solid var(--border); }
1577
+ .detail-placeholder { color: var(--text-secondary); font-size: 0.875rem; }
1578
+ .detail-content { padding: 0.5rem; }
1579
+ .detail-metrics { display: grid; grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); gap: 0.75rem; margin-bottom: 1rem; }
1580
+ .detail-metric { text-align: center; }
1581
+ .detail-metric-label { font-size: 0.6875rem; text-transform: uppercase; letter-spacing: 0.08em; color: var(--text-secondary); }
1582
+ .detail-metric-value { font-family: 'Geist Mono', monospace; font-size: 1.125rem; font-weight: 500; }
1583
+ .test-breakdown-title { font-size: 0.75rem; text-transform: uppercase; letter-spacing: 0.1em; color: var(--text-secondary); margin-bottom: 0.75rem; }
1584
+ .test-breakdown { display: grid; gap: 0.5rem; }
1585
+ .test-item { border: 1px solid var(--border); border-radius: 6px; padding: 0.75rem; background: #000; }
1586
+ .test-item-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.25rem; }
1587
+ .test-item-name { font-weight: 500; font-size: 0.875rem; }
1588
+ .test-item-type { font-size: 0.6875rem; text-transform: uppercase; padding: 0.125rem 0.5rem; border-radius: 4px; border: 1px solid var(--border); color: var(--text-secondary); }
1589
+ .test-item-stats { font-family: 'Geist Mono', monospace; font-size: 0.8125rem; color: var(--text-secondary); margin-bottom: 0.375rem; }
1590
+ .test-concepts { font-size: 0.8125rem; line-height: 1.5; }
1591
+ .concept-matched { color: #3fb950; }
1592
+ .concept-missed { color: #d29922; }
1593
+ .detail-empty { color: var(--text-secondary); font-size: 0.875rem; font-style: italic; padding: 1rem; text-align: center; }
1594
+ .test-files-section { background: #0a0a0a; border: 1px solid var(--border); border-radius: 8px; padding: 1.5rem; }
1595
+ .test-files-section h2 { margin-bottom: 1rem; }
1596
+ .test-files-tabs { display: flex; gap: 0.5rem; margin-bottom: 1rem; flex-wrap: wrap; }
1597
+ .test-file-tab { background: transparent; border: 1px solid var(--border); color: var(--text-secondary); padding: 0.5rem 1rem; border-radius: 6px; cursor: pointer; font-family: 'Geist Mono', monospace; font-size: 0.8125rem; }
1598
+ .test-file-tab:hover { border-color: var(--text-secondary); }
1599
+ .test-file-tab.active { background: var(--text); color: var(--bg); border-color: var(--text); }
1600
+ .test-file-content { display: none; background: #000; border: 1px solid var(--border); border-radius: 6px; padding: 1rem; overflow-x: auto; max-height: 400px; overflow-y: auto; }
1601
+ .test-file-content.active { display: block; }
1602
+ .test-file-content code { font-family: 'Geist Mono', monospace; font-size: 0.8125rem; white-space: pre-wrap; }
1603
+ footer { margin-top: 3rem; padding: 2rem 0; border-top: 1px solid var(--border); text-align: center; color: var(--text-secondary); font-size: 0.8125rem; }
1604
+ footer a { color: var(--text); text-decoration: none; }
1605
+ @media (max-width: 768px) {
1606
+ .stats-grid { grid-template-columns: repeat(2, 1fr) !important; }
1607
+ .results-table { font-size: 0.8125rem; }
1608
+ }
1609
+ </style>
1610
+ </head>
1611
+ <body>
1612
+ <nav>
1613
+ <div class="nav-left">
1614
+ <a href="/">
1615
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5"><path d="M22 12h-4l-3 9L9 3l-3 9H2"/></svg>
1616
+ <span>Skillmark</span>
1617
+ </a>
1618
+ </div>
1619
+ <div class="nav-right">
1620
+ <a href="/docs">Docs</a>
1621
+ <a href="/how-it-works">How It Works</a>
1622
+ <a href="https://github.com/claudekit/skillmark" title="GitHub"><svg width="18" height="18" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/></svg></a>
1623
+ <a href="/login">Login</a>
1624
+ </div>
1625
+ </nav>
1626
+ <div class="container">
1627
+ <div class="breadcrumb">
1628
+ <a href="/">Leaderboard</a> / ${escapeHtml(skill.skillName)}
1629
+ </div>
1630
+ <h1>${escapeHtml(skill.skillName)}</h1>
1631
+ <div class="skill-meta">
1632
+ ${skill.source ? `<span>Source: <a href="${escapeHtml(skill.source)}">${escapeHtml(skill.source.replace('https://github.com/', ''))}</a></span>` : ''}
1633
+ ${skillshLink ? `<span><a href="${escapeHtml(skillshLink)}">View on skill.sh</a></span>` : ''}
1634
+ </div>
1635
+
1636
+ <div class="stats-grid">
1637
+ <div class="stat-card">
1638
+ <div class="stat-label">Best Accuracy</div>
1639
+ <div class="stat-value">${skill.bestAccuracy.toFixed(1)}%</div>
1640
+ </div>
1641
+ <div class="stat-card">
1642
+ <div class="stat-label">Security</div>
1643
+ <div class="stat-value">${skill.bestSecurity != null ? skill.bestSecurity.toFixed(0) + '%' : '\u2014'}</div>
1644
+ </div>
1645
+ <div class="stat-card">
1646
+ <div class="stat-label">Best Model</div>
1647
+ <div class="stat-value">${escapeHtml(skill.bestModel)}</div>
1648
+ </div>
1649
+ <div class="stat-card">
1650
+ <div class="stat-label">Avg Tokens</div>
1651
+ <div class="stat-value">${Math.round(skill.avgTokens).toLocaleString()}</div>
1652
+ </div>
1653
+ <div class="stat-card">
1654
+ <div class="stat-label">Total Runs</div>
1655
+ <div class="stat-value">${skill.totalRuns}</div>
1656
+ </div>
1657
+ </div>
1658
+
1659
+ <section class="section radar-section">
1660
+ <h2>Performance Profile</h2>
1661
+ <div class="radar-container">
1662
+ ${radarSvg}
1663
+ </div>
1664
+ </section>
1665
+
1666
+ <section class="section">
1667
+ <h2>Result History</h2>
1668
+ <p style="color: var(--text-secondary); font-size: 0.8125rem; margin-bottom: 1rem;">Click a row to view detailed test breakdown</p>
1669
+ <table class="results-table">
1670
+ <thead>
1671
+ <tr>
1672
+ <th>Date</th>
1673
+ <th>Model</th>
1674
+ <th>Accuracy</th>
1675
+ <th>Security</th>
1676
+ <th>Tokens</th>
1677
+ <th>Cost</th>
1678
+ <th>Submitter</th>
1679
+ </tr>
1680
+ </thead>
1681
+ <tbody>
1682
+ ${resultRows}
1683
+ </tbody>
1684
+ </table>
1685
+ </section>
1686
+
1687
+ ${skill.bestSecurity != null ? `
1688
+ <section class="section">
1689
+ <h2>Security Benchmark</h2>
1690
+ ${skill.bestSecurity < 50 ? `
1691
+ <div class="security-banner">
1692
+ <span class="security-warning">\u25CF</span>
1693
+ This skill has a low security score. Consider running security benchmarks to identify vulnerabilities.
1694
+ </div>
1695
+ ` : ''}
1696
+ <div class="stats-grid" style="grid-template-columns: repeat(3, 1fr);">
1697
+ <div class="stat-card">
1698
+ <div class="stat-label">Security Score</div>
1699
+ <div class="stat-value">${skill.bestSecurity.toFixed(1)}%</div>
1700
+ </div>
1701
+ <div class="stat-card">
1702
+ <div class="stat-label">Composite Score</div>
1703
+ <div class="stat-value">${skill.compositeScore?.toFixed(1) || '\u2014'}%</div>
1704
+ </div>
1705
+ <div class="stat-card">
1706
+ <div class="stat-label">Accuracy</div>
1707
+ <div class="stat-value">${skill.bestAccuracy.toFixed(1)}%</div>
1708
+ </div>
1709
+ </div>
1710
+ </section>
1711
+ ` : ''}
1712
+
1713
+ ${testFilesSection}
1714
+
1715
+ <footer>
1716
+ <a href="https://github.com/claudekit/skillmark">Skillmark</a> · Built for AI agent developers · by <a href="https://claudekit.cc">ClaudeKit.cc</a>
1717
+ </footer>
1718
+ </div>
1719
+
1720
+ <script>
1721
+ // Test file tab switching
1722
+ document.querySelectorAll('.test-file-tab').forEach(tab => {
1723
+ tab.addEventListener('click', () => {
1724
+ const index = tab.dataset.index;
1725
+ document.querySelectorAll('.test-file-tab').forEach(t => t.classList.remove('active'));
1726
+ document.querySelectorAll('.test-file-content').forEach(c => c.classList.remove('active'));
1727
+ tab.classList.add('active');
1728
+ document.querySelector('.test-file-content[data-index="' + index + '"]').classList.add('active');
1729
+ });
1730
+ });
1731
+
1732
+ // Result row expand/collapse
1733
+ function esc(s) { const d = document.createElement('div'); d.textContent = s; return d.innerHTML; }
1734
+
1735
+ function renderTestBreakdown(data) {
1736
+ if (!data || !data.testResults || data.testResults.length === 0) {
1737
+ return '<div class="detail-empty">Detailed breakdown not available</div>';
1738
+ }
1739
+ const m = data.aggregatedMetrics || {};
1740
+ let html = '<div class="detail-content">';
1741
+ html += '<div class="detail-metrics">';
1742
+ html += '<div class="detail-metric"><div class="detail-metric-label">Accuracy</div><div class="detail-metric-value">' + (m.accuracy != null ? m.accuracy.toFixed(1) + '%' : '-') + '</div></div>';
1743
+ html += '<div class="detail-metric"><div class="detail-metric-label">Tokens</div><div class="detail-metric-value">' + (m.tokensTotal != null ? m.tokensTotal.toLocaleString() : '-') + '</div></div>';
1744
+ html += '<div class="detail-metric"><div class="detail-metric-label">Duration</div><div class="detail-metric-value">' + (m.durationMs != null ? (m.durationMs / 1000).toFixed(1) + 's' : '-') + '</div></div>';
1745
+ html += '<div class="detail-metric"><div class="detail-metric-label">Cost</div><div class="detail-metric-value">$' + (m.costUsd != null ? m.costUsd.toFixed(4) : '-') + '</div></div>';
1746
+ html += '<div class="detail-metric"><div class="detail-metric-label">Tools</div><div class="detail-metric-value">' + (m.toolCount != null ? m.toolCount : '-') + '</div></div>';
1747
+ html += '</div>';
1748
+
1749
+ // Group by test name
1750
+ const byTest = {};
1751
+ data.testResults.forEach(function(tr) {
1752
+ const name = tr.test ? tr.test.name : 'Unknown';
1753
+ if (!byTest[name]) byTest[name] = [];
1754
+ byTest[name].push(tr);
1755
+ });
1756
+
1757
+ html += '<div class="test-breakdown-title">Test Results (' + data.testResults.length + ')</div>';
1758
+ html += '<div class="test-breakdown">';
1759
+ Object.keys(byTest).forEach(function(name) {
1760
+ const runs = byTest[name];
1761
+ const avgAcc = runs.reduce(function(s, r) { return s + (r.metrics ? r.metrics.accuracy : 0); }, 0) / runs.length;
1762
+ const first = runs[0];
1763
+ const type = first.test ? first.test.type : '';
1764
+ const tokens = first.metrics ? first.metrics.tokensTotal : 0;
1765
+ const dur = first.metrics ? (first.metrics.durationMs / 1000).toFixed(1) : '-';
1766
+ const cost = first.metrics ? first.metrics.costUsd.toFixed(4) : '-';
1767
+ const matched = first.matchedConcepts || [];
1768
+ const missed = first.missedConcepts || [];
1769
+
1770
+ html += '<div class="test-item">';
1771
+ html += '<div class="test-item-header"><span class="test-item-name">' + esc(name) + '</span><span class="test-item-type">' + esc(type) + '</span></div>';
1772
+ html += '<div class="test-item-stats">' + avgAcc.toFixed(1) + '% accuracy · ' + tokens.toLocaleString() + ' tokens · ' + dur + 's · $' + cost;
1773
+ if (runs.length > 1) html += ' · ' + runs.length + ' runs';
1774
+ html += '</div>';
1775
+ if (matched.length > 0 || missed.length > 0) {
1776
+ html += '<div class="test-concepts">';
1777
+ if (matched.length > 0) html += '<span class="concept-matched">Matched: ' + matched.map(esc).join(', ') + '</span>';
1778
+ if (matched.length > 0 && missed.length > 0) html += '<br>';
1779
+ if (missed.length > 0) html += '<span class="concept-missed">Missed: ' + missed.map(esc).join(', ') + '</span>';
1780
+ html += '</div>';
1781
+ }
1782
+ html += '</div>';
1783
+ });
1784
+ html += '</div></div>';
1785
+ return html;
1786
+ }
1787
+
1788
+ document.querySelectorAll('.result-row').forEach(function(row) {
1789
+ row.addEventListener('click', async function() {
1790
+ const id = row.dataset.resultId;
1791
+ const detail = document.querySelector('.result-detail[data-result-id="' + id + '"]');
1792
+ if (!detail) return;
1793
+
1794
+ // Toggle: if already active, collapse
1795
+ if (detail.classList.contains('active')) {
1796
+ detail.classList.remove('active');
1797
+ row.classList.remove('expanded');
1798
+ return;
1799
+ }
1800
+
1801
+ // Collapse any other open detail
1802
+ document.querySelectorAll('.result-detail.active').forEach(function(d) { d.classList.remove('active'); });
1803
+ document.querySelectorAll('.result-row.expanded').forEach(function(r) { r.classList.remove('expanded'); });
1804
+
1805
+ row.classList.add('expanded');
1806
+ detail.classList.add('active');
1807
+
1808
+ // Fetch if not already loaded
1809
+ if (!detail.dataset.loaded) {
1810
+ detail.querySelector('td').innerHTML = '<span class="detail-placeholder">Loading...</span>';
1811
+ try {
1812
+ const res = await fetch('/api/result/' + encodeURIComponent(id));
1813
+ if (!res.ok) throw new Error('Not found');
1814
+ const data = await res.json();
1815
+ detail.querySelector('td').innerHTML = renderTestBreakdown(data);
1816
+ detail.dataset.loaded = '1';
1817
+ } catch (e) {
1818
+ detail.querySelector('td').innerHTML = '<div class="detail-empty">Detailed breakdown not available</div>';
1819
+ detail.dataset.loaded = '1';
1820
+ }
1821
+ }
1822
+ });
1823
+ });
1824
+ </script>
1825
+ </body>
1826
+ </html>`;
1827
+ }
1828
+
1829
+ /**
1830
+ * Render login page with GitHub OAuth button
1831
+ */
1832
+ function renderLoginPage(error?: string): string {
1833
+ const errorMessage = error ? `
1834
+ <div class="error-message">
1835
+ ${error === 'oauth_failed' ? 'GitHub authentication failed. Please try again.' :
1836
+ error === 'token_failed' ? 'Failed to authenticate with GitHub. Please try again.' :
1837
+ 'An error occurred. Please try again.'}
1838
+ </div>
1839
+ ` : '';
1840
+
1841
+ return `<!DOCTYPE html>
1842
+ <html lang="en">
1843
+ <head>
1844
+ <meta charset="UTF-8">
1845
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1846
+ <title>Login - Skillmark</title>
1847
+ <link rel="icon" type="image/png" href="/favicon.png">
1848
+ <link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=Geist+Mono:wght@400;500&display=swap" rel="stylesheet">
1849
+ <style>
1850
+ * { box-sizing: border-box; margin: 0; padding: 0; }
1851
+ :root { --bg: #000; --text: #ededed; --text-secondary: #888; --border: #333; }
1852
+ body {
1853
+ font-family: 'Geist', -apple-system, sans-serif;
1854
+ background: var(--bg);
1855
+ color: var(--text);
1856
+ min-height: 100vh;
1857
+ display: flex;
1858
+ flex-direction: column;
1859
+ -webkit-font-smoothing: antialiased;
1860
+ }
1861
+ nav { display: flex; align-items: center; justify-content: space-between; padding: 1rem 1.5rem; border-bottom: 1px solid var(--border); }
1862
+ .nav-left { display: flex; align-items: center; gap: 0.5rem; }
1863
+ .nav-left a { color: var(--text); text-decoration: none; display: flex; align-items: center; gap: 0.5rem; }
1864
+ .login-container {
1865
+ flex: 1;
1866
+ display: flex;
1867
+ flex-direction: column;
1868
+ align-items: center;
1869
+ justify-content: center;
1870
+ padding: 2rem;
1871
+ }
1872
+ .login-box {
1873
+ max-width: 400px;
1874
+ width: 100%;
1875
+ text-align: center;
1876
+ }
1877
+ h1 { font-size: 2rem; font-weight: 600; margin-bottom: 0.5rem; }
1878
+ .subtitle { color: var(--text-secondary); margin-bottom: 2rem; }
1879
+ .github-btn {
1880
+ display: flex;
1881
+ align-items: center;
1882
+ justify-content: center;
1883
+ gap: 0.75rem;
1884
+ width: 100%;
1885
+ padding: 0.875rem 1.5rem;
1886
+ background: #ededed;
1887
+ color: #000;
1888
+ border: none;
1889
+ border-radius: 8px;
1890
+ font-family: inherit;
1891
+ font-size: 1rem;
1892
+ font-weight: 500;
1893
+ cursor: pointer;
1894
+ text-decoration: none;
1895
+ transition: background 0.15s;
1896
+ }
1897
+ .github-btn:hover { background: #fff; }
1898
+ .github-btn svg { width: 20px; height: 20px; }
1899
+ .error-message {
1900
+ background: rgba(248, 81, 73, 0.1);
1901
+ border: 1px solid rgba(248, 81, 73, 0.3);
1902
+ color: #f85149;
1903
+ padding: 0.75rem 1rem;
1904
+ border-radius: 8px;
1905
+ margin-bottom: 1.5rem;
1906
+ font-size: 0.875rem;
1907
+ }
1908
+ .info-text {
1909
+ margin-top: 2rem;
1910
+ color: var(--text-secondary);
1911
+ font-size: 0.875rem;
1912
+ }
1913
+ .info-text a { color: var(--text); }
1914
+ </style>
1915
+ </head>
1916
+ <body>
1917
+ <nav>
1918
+ <div class="nav-left">
1919
+ <a href="/">
1920
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5"><path d="M22 12h-4l-3 9L9 3l-3 9H2"/></svg>
1921
+ <span>Skillmark</span>
1922
+ </a>
1923
+ </div>
1924
+ </nav>
1925
+ <div class="login-container">
1926
+ <div class="login-box">
1927
+ <h1>Sign in</h1>
1928
+ <p class="subtitle">Get an API key to publish benchmark results</p>
1929
+ ${errorMessage}
1930
+ <a href="/auth/github" class="github-btn">
1931
+ <svg viewBox="0 0 24 24" fill="currentColor">
1932
+ <path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/>
1933
+ </svg>
1934
+ Continue with GitHub
1935
+ </a>
1936
+ <p class="info-text">
1937
+ By signing in, you agree to our <a href="/docs">Terms of Service</a>.
1938
+ </p>
1939
+ </div>
1940
+ </div>
1941
+ </body>
1942
+ </html>`;
1943
+ }
1944
+
1945
+ interface DashboardUser {
1946
+ username: string;
1947
+ avatar: string | null;
1948
+ keys: Array<{
1949
+ id: string;
1950
+ createdAt: string | null;
1951
+ lastUsedAt: string | null;
1952
+ }>;
1953
+ }
1954
+
1955
+ /**
1956
+ * Render dashboard page with API key management
1957
+ */
1958
+ function renderDashboardPage(user: DashboardUser): string {
1959
+ const keyRows = user.keys.map(key => `
1960
+ <tr data-key-id="${escapeHtml(key.id as string)}">
1961
+ <td class="key-id">
1962
+ <code>${escapeHtml((key.id as string).slice(0, 8))}...</code>
1963
+ </td>
1964
+ <td class="key-created">${key.createdAt ? formatRelativeTime(new Date(key.createdAt).getTime() / 1000) : 'Unknown'}</td>
1965
+ <td class="key-used">${key.lastUsedAt ? formatRelativeTime(new Date(key.lastUsedAt).getTime() / 1000) : 'Never'}</td>
1966
+ <td class="key-actions">
1967
+ <button class="revoke-btn" onclick="revokeKey('${escapeHtml(key.id as string)}')">Revoke</button>
1968
+ </td>
1969
+ </tr>
1970
+ `).join('');
1971
+
1972
+ return `<!DOCTYPE html>
1973
+ <html lang="en">
1974
+ <head>
1975
+ <meta charset="UTF-8">
1976
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
1977
+ <title>Dashboard - Skillmark</title>
1978
+ <link rel="icon" type="image/png" href="/favicon.png">
1979
+ <link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600&family=Geist+Mono:wght@400;500&display=swap" rel="stylesheet">
1980
+ <style>
1981
+ * { box-sizing: border-box; margin: 0; padding: 0; }
1982
+ :root { --bg: #000; --text: #ededed; --text-secondary: #888; --border: #333; --success: #3fb950; }
1983
+ body {
1984
+ font-family: 'Geist', -apple-system, sans-serif;
1985
+ background: var(--bg);
1986
+ color: var(--text);
1987
+ min-height: 100vh;
1988
+ -webkit-font-smoothing: antialiased;
1989
+ }
1990
+ nav { display: flex; align-items: center; justify-content: space-between; padding: 1rem 1.5rem; border-bottom: 1px solid var(--border); }
1991
+ .nav-left { display: flex; align-items: center; gap: 0.5rem; }
1992
+ .nav-left a { color: var(--text); text-decoration: none; display: flex; align-items: center; gap: 0.5rem; }
1993
+ .nav-right { display: flex; align-items: center; gap: 1rem; }
1994
+ .nav-right a { color: var(--text-secondary); text-decoration: none; font-size: 0.875rem; }
1995
+ .nav-right a:hover { color: var(--text); }
1996
+ .user-info { display: flex; align-items: center; gap: 0.5rem; }
1997
+ .user-avatar { width: 28px; height: 28px; border-radius: 50%; }
1998
+ .user-name { font-size: 0.875rem; }
1999
+ .container { max-width: 800px; margin: 0 auto; padding: 3rem 1.5rem; }
2000
+ h1 { font-size: 2rem; font-weight: 600; margin-bottom: 0.5rem; }
2001
+ .subtitle { color: var(--text-secondary); margin-bottom: 2rem; }
2002
+ .section { margin-bottom: 3rem; }
2003
+ .section h2 { font-size: 1rem; font-weight: 500; margin-bottom: 1rem; text-transform: uppercase; letter-spacing: 0.1em; color: var(--text-secondary); }
2004
+ .generate-btn {
2005
+ display: inline-flex;
2006
+ align-items: center;
2007
+ gap: 0.5rem;
2008
+ padding: 0.75rem 1.25rem;
2009
+ background: var(--text);
2010
+ color: var(--bg);
2011
+ border: none;
2012
+ border-radius: 8px;
2013
+ font-family: inherit;
2014
+ font-size: 0.875rem;
2015
+ font-weight: 500;
2016
+ cursor: pointer;
2017
+ margin-bottom: 1.5rem;
2018
+ }
2019
+ .generate-btn:hover { background: #fff; }
2020
+ .generate-btn:disabled { opacity: 0.5; cursor: not-allowed; }
2021
+ .new-key-display {
2022
+ display: none;
2023
+ background: rgba(63, 185, 80, 0.1);
2024
+ border: 1px solid rgba(63, 185, 80, 0.3);
2025
+ border-radius: 8px;
2026
+ padding: 1rem;
2027
+ margin-bottom: 1.5rem;
2028
+ }
2029
+ .new-key-display.visible { display: block; }
2030
+ .new-key-display p { color: var(--text-secondary); font-size: 0.875rem; margin-bottom: 0.75rem; }
2031
+ .new-key-display .key-value {
2032
+ display: flex;
2033
+ align-items: center;
2034
+ gap: 0.5rem;
2035
+ background: #0a0a0a;
2036
+ padding: 0.75rem;
2037
+ border-radius: 6px;
2038
+ font-family: 'Geist Mono', monospace;
2039
+ font-size: 0.8125rem;
2040
+ word-break: break-all;
2041
+ }
2042
+ .copy-btn {
2043
+ flex-shrink: 0;
2044
+ background: none;
2045
+ border: 1px solid var(--border);
2046
+ color: var(--text-secondary);
2047
+ padding: 0.25rem 0.5rem;
2048
+ border-radius: 4px;
2049
+ cursor: pointer;
2050
+ font-size: 0.75rem;
2051
+ }
2052
+ .copy-btn:hover { color: var(--text); border-color: var(--text-secondary); }
2053
+ .done-btn {
2054
+ flex-shrink: 0;
2055
+ background: var(--success);
2056
+ border: none;
2057
+ color: #000;
2058
+ padding: 0.25rem 0.75rem;
2059
+ border-radius: 4px;
2060
+ cursor: pointer;
2061
+ font-size: 0.75rem;
2062
+ font-weight: 500;
2063
+ }
2064
+ .done-btn:hover { opacity: 0.9; }
2065
+ .keys-table { width: 100%; border-collapse: collapse; }
2066
+ .keys-table th {
2067
+ text-align: left;
2068
+ font-size: 0.75rem;
2069
+ text-transform: uppercase;
2070
+ letter-spacing: 0.1em;
2071
+ color: var(--text-secondary);
2072
+ font-weight: 500;
2073
+ padding: 0.75rem 0;
2074
+ border-bottom: 1px solid var(--border);
2075
+ }
2076
+ .keys-table td { padding: 1rem 0; border-bottom: 1px solid var(--border); }
2077
+ .keys-table code { font-family: 'Geist Mono', monospace; font-size: 0.8125rem; }
2078
+ .key-created, .key-used { color: var(--text-secondary); font-size: 0.875rem; }
2079
+ .revoke-btn {
2080
+ background: none;
2081
+ border: 1px solid #f85149;
2082
+ color: #f85149;
2083
+ padding: 0.375rem 0.75rem;
2084
+ border-radius: 6px;
2085
+ font-size: 0.8125rem;
2086
+ cursor: pointer;
2087
+ }
2088
+ .revoke-btn:hover { background: rgba(248, 81, 73, 0.1); }
2089
+ .empty-state { color: var(--text-secondary); padding: 2rem 0; }
2090
+ .usage-section { background: #0a0a0a; border: 1px solid var(--border); border-radius: 8px; padding: 1.5rem; }
2091
+ .usage-section h3 { font-size: 0.875rem; font-weight: 500; margin-bottom: 1rem; }
2092
+ .usage-section pre { background: #000; border: 1px solid var(--border); border-radius: 6px; padding: 1rem; overflow-x: auto; margin-bottom: 0.75rem; }
2093
+ .usage-section code { font-family: 'Geist Mono', monospace; font-size: 0.8125rem; }
2094
+ .usage-section p { color: var(--text-secondary); font-size: 0.8125rem; }
2095
+ </style>
2096
+ </head>
2097
+ <body>
2098
+ <nav>
2099
+ <div class="nav-left">
2100
+ <a href="/">
2101
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5"><path d="M22 12h-4l-3 9L9 3l-3 9H2"/></svg>
2102
+ <span>Skillmark</span>
2103
+ </a>
2104
+ </div>
2105
+ <div class="nav-right">
2106
+ <div class="user-info">
2107
+ ${user.avatar ? `<img src="${escapeHtml(user.avatar)}" alt="" class="user-avatar">` : ''}
2108
+ <span class="user-name">${escapeHtml(user.username)}</span>
2109
+ </div>
2110
+ <a href="/auth/logout">Sign out</a>
2111
+ </div>
2112
+ </nav>
2113
+ <div class="container">
2114
+ <h1>Dashboard</h1>
2115
+ <p class="subtitle">Manage your API keys for publishing benchmark results</p>
2116
+
2117
+ <div class="section">
2118
+ <h2>API Keys</h2>
2119
+ <button class="generate-btn" id="generateBtn" onclick="generateKey()">
2120
+ <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><line x1="12" y1="5" x2="12" y2="19"></line><line x1="5" y1="12" x2="19" y2="12"></line></svg>
2121
+ Generate New Key
2122
+ </button>
2123
+
2124
+ <div class="new-key-display" id="newKeyDisplay">
2125
+ <p><strong>New API key created!</strong> Copy it now - you won't see it again.</p>
2126
+ <div class="key-value">
2127
+ <code id="newKeyValue"></code>
2128
+ <button class="copy-btn" onclick="copyKey()">Copy</button>
2129
+ <button class="done-btn" onclick="location.reload()">Done</button>
2130
+ </div>
2131
+ </div>
2132
+
2133
+ ${user.keys.length > 0 ? `
2134
+ <table class="keys-table">
2135
+ <thead>
2136
+ <tr>
2137
+ <th>Key ID</th>
2138
+ <th>Created</th>
2139
+ <th>Last Used</th>
2140
+ <th></th>
2141
+ </tr>
2142
+ </thead>
2143
+ <tbody id="keysTableBody">
2144
+ ${keyRows}
2145
+ </tbody>
2146
+ </table>
2147
+ ` : `
2148
+ <p class="empty-state">No API keys yet. Generate one to start publishing benchmarks.</p>
2149
+ `}
2150
+ </div>
2151
+
2152
+ <div class="section">
2153
+ <h2>Usage</h2>
2154
+ <div class="usage-section">
2155
+ <h3>Save your API key</h3>
2156
+ <pre><code># Option 1: Environment variable
2157
+ export SKILLMARK_API_KEY=sk_your_key_here
2158
+
2159
+ # Option 2: Config file (~/.skillmarkrc)
2160
+ echo "api_key=sk_your_key_here" > ~/.skillmarkrc</code></pre>
2161
+ <p>The CLI reads from env var first, then ~/.skillmarkrc.</p>
2162
+ </div>
2163
+ </div>
2164
+
2165
+ <div class="section">
2166
+ <div class="usage-section">
2167
+ <h3>Publish with auto-publish flag</h3>
2168
+ <pre><code># Run benchmark and auto-publish results
2169
+ skillmark run ./my-skill --publish
2170
+
2171
+ # Or publish existing results
2172
+ skillmark publish ./skillmark-results/result.json</code></pre>
2173
+ </div>
2174
+ </div>
2175
+ </div>
2176
+
2177
+ <script>
2178
+ async function generateKey() {
2179
+ const btn = document.getElementById('generateBtn');
2180
+ btn.disabled = true;
2181
+ btn.textContent = 'Generating...';
2182
+
2183
+ try {
2184
+ const res = await fetch('/auth/keys', { method: 'POST' });
2185
+ const data = await res.json();
2186
+
2187
+ if (data.apiKey) {
2188
+ document.getElementById('newKeyValue').textContent = data.apiKey;
2189
+ document.getElementById('newKeyDisplay').classList.add('visible');
2190
+ btn.style.display = 'none';
2191
+ } else {
2192
+ alert('Failed to generate key: ' + (data.error || 'Unknown error'));
2193
+ }
2194
+ } catch (err) {
2195
+ alert('Failed to generate key: ' + err.message);
2196
+ } finally {
2197
+ btn.disabled = false;
2198
+ btn.innerHTML = '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><line x1="12" y1="5" x2="12" y2="19"></line><line x1="5" y1="12" x2="19" y2="12"></line></svg> Generate New Key';
2199
+ }
2200
+ }
2201
+
2202
+ function copyKey() {
2203
+ const key = document.getElementById('newKeyValue').textContent;
2204
+ navigator.clipboard.writeText(key).then(() => {
2205
+ const btn = event.target;
2206
+ btn.textContent = 'Copied!';
2207
+ setTimeout(() => btn.textContent = 'Copy', 2000);
2208
+ });
2209
+ }
2210
+
2211
+ async function revokeKey(keyId) {
2212
+ if (!confirm('Are you sure you want to revoke this API key? This cannot be undone.')) {
2213
+ return;
2214
+ }
2215
+
2216
+ try {
2217
+ const res = await fetch('/auth/keys/' + keyId, { method: 'DELETE' });
2218
+ const data = await res.json();
2219
+
2220
+ if (data.success) {
2221
+ document.querySelector('tr[data-key-id="' + keyId + '"]').remove();
2222
+ } else {
2223
+ alert('Failed to revoke key: ' + (data.error || 'Unknown error'));
2224
+ }
2225
+ } catch (err) {
2226
+ alert('Failed to revoke key: ' + err.message);
2227
+ }
2228
+ }
2229
+ </script>
2230
+ </body>
2231
+ </html>`;
2232
+ }
2233
+
2234
+ /**
2235
+ * Format Unix timestamp to relative time
2236
+ */
2237
+ function formatRelativeTime(timestamp: number): string {
2238
+ if (!timestamp) return 'Never';
2239
+
2240
+ const now = Math.floor(Date.now() / 1000);
2241
+ const diff = now - timestamp;
2242
+
2243
+ if (diff < 60) return 'just now';
2244
+ if (diff < 3600) return `${Math.floor(diff / 60)}m ago`;
2245
+ if (diff < 86400) return `${Math.floor(diff / 3600)}h ago`;
2246
+ if (diff < 604800) return `${Math.floor(diff / 86400)}d ago`;
2247
+ if (diff < 2592000) return `${Math.floor(diff / 604800)}w ago`;
2248
+ if (diff < 31536000) return `${Math.floor(diff / 2592000)}mo ago`;
2249
+ return `${Math.floor(diff / 31536000)}y ago`;
2250
+ }
2251
+
2252
+ /**
2253
+ * Escape HTML to prevent XSS
2254
+ */
2255
+ function escapeHtml(str: string): string {
2256
+ if (!str) return '';
2257
+ return str
2258
+ .replace(/&/g, '&amp;')
2259
+ .replace(/</g, '&lt;')
2260
+ .replace(/>/g, '&gt;')
2261
+ .replace(/"/g, '&quot;')
2262
+ .replace(/'/g, '&#039;');
2263
+ }