@cleocode/skills 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dispatch-config.json +404 -0
- package/index.d.ts +178 -0
- package/index.js +405 -0
- package/package.json +14 -0
- package/profiles/core.json +7 -0
- package/profiles/full.json +10 -0
- package/profiles/minimal.json +7 -0
- package/profiles/recommended.json +7 -0
- package/provider-skills-map.json +97 -0
- package/skills/_shared/cleo-style-guide.md +84 -0
- package/skills/_shared/manifest-operations.md +810 -0
- package/skills/_shared/placeholders.json +433 -0
- package/skills/_shared/skill-chaining-patterns.md +237 -0
- package/skills/_shared/subagent-protocol-base.md +223 -0
- package/skills/_shared/task-system-integration.md +232 -0
- package/skills/_shared/testing-framework-config.md +110 -0
- package/skills/ct-cleo/SKILL.md +490 -0
- package/skills/ct-cleo/references/anti-patterns.md +19 -0
- package/skills/ct-cleo/references/loom-lifecycle.md +136 -0
- package/skills/ct-cleo/references/orchestrator-constraints.md +55 -0
- package/skills/ct-cleo/references/session-protocol.md +162 -0
- package/skills/ct-codebase-mapper/SKILL.md +82 -0
- package/skills/ct-contribution/SKILL.md +521 -0
- package/skills/ct-contribution/templates/contribution-init.json +21 -0
- package/skills/ct-dev-workflow/SKILL.md +423 -0
- package/skills/ct-docs-lookup/SKILL.md +66 -0
- package/skills/ct-docs-review/SKILL.md +175 -0
- package/skills/ct-docs-write/SKILL.md +108 -0
- package/skills/ct-documentor/SKILL.md +231 -0
- package/skills/ct-epic-architect/SKILL.md +305 -0
- package/skills/ct-epic-architect/references/bug-epic-example.md +172 -0
- package/skills/ct-epic-architect/references/commands.md +201 -0
- package/skills/ct-epic-architect/references/feature-epic-example.md +210 -0
- package/skills/ct-epic-architect/references/migration-epic-example.md +244 -0
- package/skills/ct-epic-architect/references/output-format.md +92 -0
- package/skills/ct-epic-architect/references/patterns.md +284 -0
- package/skills/ct-epic-architect/references/refactor-epic-example.md +412 -0
- package/skills/ct-epic-architect/references/research-epic-example.md +226 -0
- package/skills/ct-epic-architect/references/shell-escaping.md +86 -0
- package/skills/ct-epic-architect/references/skill-aware-execution.md +195 -0
- package/skills/ct-grade/SKILL.md +230 -0
- package/skills/ct-grade/agents/analysis-reporter.md +203 -0
- package/skills/ct-grade/agents/blind-comparator.md +157 -0
- package/skills/ct-grade/agents/scenario-runner.md +134 -0
- package/skills/ct-grade/eval-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
- package/skills/ct-grade/eval-viewer/generate_grade_review.py +1138 -0
- package/skills/ct-grade/eval-viewer/generate_grade_viewer.py +544 -0
- package/skills/ct-grade/eval-viewer/generate_review.py +283 -0
- package/skills/ct-grade/eval-viewer/grade-review.html +1574 -0
- package/skills/ct-grade/eval-viewer/viewer.html +219 -0
- package/skills/ct-grade/evals/evals.json +94 -0
- package/skills/ct-grade/references/ab-test-methodology.md +150 -0
- package/skills/ct-grade/references/domains.md +137 -0
- package/skills/ct-grade/references/grade-spec.md +236 -0
- package/skills/ct-grade/references/scenario-playbook.md +234 -0
- package/skills/ct-grade/references/token-tracking.md +120 -0
- package/skills/ct-grade/scripts/__pycache__/audit_analyzer.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/__pycache__/run_ab_test.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/__pycache__/run_all.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/__pycache__/token_tracker.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/audit_analyzer.py +279 -0
- package/skills/ct-grade/scripts/generate_report.py +283 -0
- package/skills/ct-grade/scripts/run_ab_test.py +504 -0
- package/skills/ct-grade/scripts/run_all.py +287 -0
- package/skills/ct-grade/scripts/setup_run.py +183 -0
- package/skills/ct-grade/scripts/token_tracker.py +630 -0
- package/skills/ct-grade-v2-1/SKILL.md +237 -0
- package/skills/ct-grade-v2-1/agents/analysis-reporter.md +203 -0
- package/skills/ct-grade-v2-1/agents/blind-comparator.md +157 -0
- package/skills/ct-grade-v2-1/agents/scenario-runner.md +179 -0
- package/skills/ct-grade-v2-1/evals/evals.json +74 -0
- package/skills/ct-grade-v2-1/grade-viewer/__pycache__/build_op_stats.cpython-314.pyc +0 -0
- package/skills/ct-grade-v2-1/grade-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
- package/skills/ct-grade-v2-1/grade-viewer/build_op_stats.py +174 -0
- package/skills/ct-grade-v2-1/grade-viewer/eval-analysis.json +41 -0
- package/skills/ct-grade-v2-1/grade-viewer/eval-report.md +34 -0
- package/skills/ct-grade-v2-1/grade-viewer/generate_grade_review.py +1023 -0
- package/skills/ct-grade-v2-1/grade-viewer/generate_grade_viewer.py +548 -0
- package/skills/ct-grade-v2-1/grade-viewer/grade-review-eval.html +613 -0
- package/skills/ct-grade-v2-1/grade-viewer/grade-review.html +1532 -0
- package/skills/ct-grade-v2-1/grade-viewer/viewer.html +620 -0
- package/skills/ct-grade-v2-1/manifest-entry.json +31 -0
- package/skills/ct-grade-v2-1/references/ab-testing.md +233 -0
- package/skills/ct-grade-v2-1/references/domains-ssot.md +156 -0
- package/skills/ct-grade-v2-1/references/grade-spec-v2.md +167 -0
- package/skills/ct-grade-v2-1/references/playbook-v2.md +393 -0
- package/skills/ct-grade-v2-1/references/token-tracking.md +202 -0
- package/skills/ct-grade-v2-1/scripts/generate_report.py +419 -0
- package/skills/ct-grade-v2-1/scripts/run_ab_test.py +493 -0
- package/skills/ct-grade-v2-1/scripts/run_scenario.py +396 -0
- package/skills/ct-grade-v2-1/scripts/setup_run.py +207 -0
- package/skills/ct-grade-v2-1/scripts/token_tracker.py +175 -0
- package/skills/ct-memory/SKILL.md +84 -0
- package/skills/ct-orchestrator/INSTALL.md +61 -0
- package/skills/ct-orchestrator/README.md +69 -0
- package/skills/ct-orchestrator/SKILL.md +380 -0
- package/skills/ct-orchestrator/manifest-entry.json +19 -0
- package/skills/ct-orchestrator/orchestrator-prompt.txt +17 -0
- package/skills/ct-orchestrator/references/SUBAGENT-PROTOCOL-BLOCK.md +66 -0
- package/skills/ct-orchestrator/references/autonomous-operation.md +167 -0
- package/skills/ct-orchestrator/references/lifecycle-gates.md +98 -0
- package/skills/ct-orchestrator/references/orchestrator-compliance.md +271 -0
- package/skills/ct-orchestrator/references/orchestrator-handoffs.md +85 -0
- package/skills/ct-orchestrator/references/orchestrator-patterns.md +164 -0
- package/skills/ct-orchestrator/references/orchestrator-recovery.md +113 -0
- package/skills/ct-orchestrator/references/orchestrator-spawning.md +271 -0
- package/skills/ct-orchestrator/references/orchestrator-tokens.md +180 -0
- package/skills/ct-research-agent/SKILL.md +226 -0
- package/skills/ct-skill-creator/.cleo/.context-state.json +13 -0
- package/skills/ct-skill-creator/.cleo/logs/cleo.2026-03-07.1.log +24 -0
- package/skills/ct-skill-creator/.cleo/tasks.db +0 -0
- package/skills/ct-skill-creator/SKILL.md +356 -0
- package/skills/ct-skill-creator/agents/analyzer.md +276 -0
- package/skills/ct-skill-creator/agents/comparator.md +204 -0
- package/skills/ct-skill-creator/agents/grader.md +225 -0
- package/skills/ct-skill-creator/assets/eval_review.html +146 -0
- package/skills/ct-skill-creator/eval-viewer/__pycache__/generate_review.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/eval-viewer/generate_review.py +471 -0
- package/skills/ct-skill-creator/eval-viewer/viewer.html +1325 -0
- package/skills/ct-skill-creator/manifest-entry.json +17 -0
- package/skills/ct-skill-creator/references/dynamic-context.md +228 -0
- package/skills/ct-skill-creator/references/frontmatter.md +83 -0
- package/skills/ct-skill-creator/references/invocation-control.md +165 -0
- package/skills/ct-skill-creator/references/output-patterns.md +86 -0
- package/skills/ct-skill-creator/references/provider-deployment.md +175 -0
- package/skills/ct-skill-creator/references/schemas.md +430 -0
- package/skills/ct-skill-creator/references/workflows.md +28 -0
- package/skills/ct-skill-creator/scripts/__init__.py +1 -0
- package/skills/ct-skill-creator/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/generate_report.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/improve_description.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/init_skill.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/run_eval.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/run_loop.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/utils.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/skills/ct-skill-creator/scripts/generate_report.py +326 -0
- package/skills/ct-skill-creator/scripts/improve_description.py +247 -0
- package/skills/ct-skill-creator/scripts/init_skill.py +306 -0
- package/skills/ct-skill-creator/scripts/package_skill.py +110 -0
- package/skills/ct-skill-creator/scripts/quick_validate.py +97 -0
- package/skills/ct-skill-creator/scripts/run_eval.py +310 -0
- package/skills/ct-skill-creator/scripts/run_loop.py +328 -0
- package/skills/ct-skill-creator/scripts/utils.py +47 -0
- package/skills/ct-skill-validator/SKILL.md +178 -0
- package/skills/ct-skill-validator/agents/ecosystem-checker.md +151 -0
- package/skills/ct-skill-validator/assets/valid-skill-example.md +13 -0
- package/skills/ct-skill-validator/evals/eval_set.json +14 -0
- package/skills/ct-skill-validator/evals/evals.json +52 -0
- package/skills/ct-skill-validator/manifest-entry.json +20 -0
- package/skills/ct-skill-validator/references/cleo-ecosystem-rules.md +163 -0
- package/skills/ct-skill-validator/references/validation-rules.md +168 -0
- package/skills/ct-skill-validator/scripts/__init__.py +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/audit_body.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/check_ecosystem.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/generate_validation_report.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/validate.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/audit_body.py +242 -0
- package/skills/ct-skill-validator/scripts/check_ecosystem.py +169 -0
- package/skills/ct-skill-validator/scripts/check_manifest.py +172 -0
- package/skills/ct-skill-validator/scripts/generate_validation_report.py +442 -0
- package/skills/ct-skill-validator/scripts/validate.py +422 -0
- package/skills/ct-spec-writer/SKILL.md +189 -0
- package/skills/ct-stickynote/README.md +14 -0
- package/skills/ct-stickynote/SKILL.md +46 -0
- package/skills/ct-task-executor/SKILL.md +296 -0
- package/skills/ct-validator/SKILL.md +216 -0
- package/skills/manifest.json +469 -0
- package/skills.json +281 -0
|
@@ -0,0 +1,620 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>CLEO Grade Review</title>
|
|
7
|
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
|
8
|
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
9
|
+
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@500;600;700&family=Lora:wght@400;500&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
|
|
10
|
+
<style>
|
|
11
|
+
:root {
|
|
12
|
+
--bg: #faf9f5;
|
|
13
|
+
--surface: #ffffff;
|
|
14
|
+
--surface2: #f4f2ec;
|
|
15
|
+
--border: #e8e6dc;
|
|
16
|
+
--text: #141413;
|
|
17
|
+
--text-muted: #8a8880;
|
|
18
|
+
--accent: #d97757;
|
|
19
|
+
--header-bg: #141413;
|
|
20
|
+
--header-text:#faf9f5;
|
|
21
|
+
--radius: 6px;
|
|
22
|
+
--mono: 'JetBrains Mono', 'Fira Code', monospace;
|
|
23
|
+
--grade-a: #4a8c5c; --grade-a-bg: #eaf4ee;
|
|
24
|
+
--grade-b: #3a7fa8; --grade-b-bg: #e8f2f9;
|
|
25
|
+
--grade-c: #b07d2a; --grade-c-bg: #fdf4e3;
|
|
26
|
+
--grade-d: #c45d30; --grade-d-bg: #fceee8;
|
|
27
|
+
--grade-f: #c03030; --grade-f-bg: #fce8e8;
|
|
28
|
+
--dim1: #7b68c8; --dim2: #4a8fa5; --dim3: #4a8c5c;
|
|
29
|
+
--dim4: #c4923a; --dim5: #d97757;
|
|
30
|
+
--mcp: #4a8fa5; --cli: #7b68c8; --tie: #8a8880;
|
|
31
|
+
--win: #4a8c5c; --loss: #c03030;
|
|
32
|
+
}
|
|
33
|
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
34
|
+
body { font-family: 'Lora', Georgia, serif; background: var(--bg); color: var(--text); height: 100vh; display: flex; flex-direction: column; font-size: 14px; }
|
|
35
|
+
.header { background: var(--header-bg); color: var(--header-text); padding: .75rem 1.5rem; display: flex; justify-content: space-between; align-items: center; flex-shrink: 0; gap: 1rem; }
|
|
36
|
+
.header-left { display: flex; align-items: center; gap: .75rem; }
|
|
37
|
+
.header-logo { font-family: 'Poppins', sans-serif; font-size: .65rem; font-weight: 700; letter-spacing: .12em; text-transform: uppercase; background: var(--accent); color: #fff; padding: .2em .5em; border-radius: 3px; }
|
|
38
|
+
.header-title { font-family: 'Poppins', sans-serif; font-size: 1rem; font-weight: 600; color: #fff; }
|
|
39
|
+
.header-subtitle { font-size: .75rem; color: #888; }
|
|
40
|
+
.header-meta { font-size: .7rem; color: #666; text-align: right; line-height: 1.6; }
|
|
41
|
+
.tabs-bar { background: var(--surface); border-bottom: 1px solid var(--border); display: flex; padding: 0 1.5rem; gap: .25rem; flex-shrink: 0; }
|
|
42
|
+
.tab-btn { font-family: 'Poppins', sans-serif; font-size: .75rem; font-weight: 500; padding: .6rem .9rem; border: none; background: transparent; color: var(--text-muted); cursor: pointer; border-bottom: 2px solid transparent; transition: color .15s, border-color .15s; letter-spacing: .02em; }
|
|
43
|
+
.tab-btn:hover { color: var(--text); }
|
|
44
|
+
.tab-btn.active { color: var(--accent); border-bottom-color: var(--accent); }
|
|
45
|
+
.tab-badge { display: inline-block; background: var(--surface2); border-radius: 9px; font-size: .65rem; padding: .1em .45em; margin-left: .3em; color: var(--text-muted); }
|
|
46
|
+
.tab-btn.active .tab-badge { background: #fde8df; color: var(--accent); }
|
|
47
|
+
.main { display: flex; flex: 1; overflow: hidden; }
|
|
48
|
+
.tab-panel { display: none; width: 100%; overflow: hidden; }
|
|
49
|
+
.tab-panel.active { display: flex; }
|
|
50
|
+
.sidebar { width: 240px; min-width: 180px; border-right: 1px solid var(--border); background: var(--surface); display: flex; flex-direction: column; flex-shrink: 0; overflow-y: auto; }
|
|
51
|
+
.sidebar-header { font-family: 'Poppins', sans-serif; font-size: .65rem; font-weight: 600; letter-spacing: .08em; text-transform: uppercase; color: var(--text-muted); padding: .75rem 1rem .4rem; }
|
|
52
|
+
.sidebar-item { padding: .55rem 1rem; cursor: pointer; display: flex; align-items: center; gap: .5rem; border-left: 3px solid transparent; transition: background .1s; font-size: .8rem; }
|
|
53
|
+
.sidebar-item:hover { background: var(--bg); }
|
|
54
|
+
.sidebar-item.active { background: var(--bg); border-left-color: var(--accent); }
|
|
55
|
+
.item-label { flex: 1; min-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
|
56
|
+
.item-score { font-family: var(--mono); font-size: .7rem; font-weight: 500; }
|
|
57
|
+
.content { flex: 1; overflow-y: auto; padding: 1.5rem; display: flex; flex-direction: column; gap: 1.25rem; }
|
|
58
|
+
.grade-card { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.25rem; }
|
|
59
|
+
.grade-card-header { display: flex; align-items: flex-start; justify-content: space-between; gap: 1rem; margin-bottom: 1rem; }
|
|
60
|
+
.grade-score-block { display: flex; align-items: center; gap: 1rem; }
|
|
61
|
+
.grade-letter-badge { font-family: 'Poppins', sans-serif; font-size: 2.5rem; font-weight: 700; width: 64px; height: 64px; display: flex; align-items: center; justify-content: center; border-radius: 8px; }
|
|
62
|
+
.grade-letter-badge.A { background: var(--grade-a-bg); color: var(--grade-a); }
|
|
63
|
+
.grade-letter-badge.B { background: var(--grade-b-bg); color: var(--grade-b); }
|
|
64
|
+
.grade-letter-badge.C { background: var(--grade-c-bg); color: var(--grade-c); }
|
|
65
|
+
.grade-letter-badge.D { background: var(--grade-d-bg); color: var(--grade-d); }
|
|
66
|
+
.grade-letter-badge.F { background: var(--grade-f-bg); color: var(--grade-f); }
|
|
67
|
+
.grade-score-details h2 { font-family: 'Poppins', sans-serif; font-size: 1.5rem; font-weight: 700; line-height: 1; }
|
|
68
|
+
.grade-score-details .pct { font-size: .85rem; color: var(--text-muted); }
|
|
69
|
+
.grade-session-id { font-family: var(--mono); font-size: .65rem; color: var(--text-muted); background: var(--surface2); padding: .2em .5em; border-radius: 3px; }
|
|
70
|
+
.dimensions { display: flex; flex-direction: column; gap: .65rem; }
|
|
71
|
+
.dimension-row { display: flex; align-items: center; gap: .75rem; }
|
|
72
|
+
.dimension-name { font-family: 'Poppins', sans-serif; font-size: .7rem; font-weight: 500; width: 145px; flex-shrink: 0; }
|
|
73
|
+
.dimension-bar-wrap { flex: 1; height: 8px; background: var(--surface2); border-radius: 4px; overflow: hidden; }
|
|
74
|
+
.dimension-bar { height: 100%; border-radius: 4px; transition: width .4s ease; }
|
|
75
|
+
.dimension-score-label { font-family: var(--mono); font-size: .7rem; width: 42px; text-align: right; flex-shrink: 0; }
|
|
76
|
+
.flags-section h3 { font-family: 'Poppins', sans-serif; font-size: .75rem; font-weight: 600; color: var(--text-muted); text-transform: uppercase; letter-spacing: .07em; margin-bottom: .6rem; }
|
|
77
|
+
.flags-list { display: flex; flex-direction: column; gap: .4rem; }
|
|
78
|
+
.flag-item { display: flex; align-items: flex-start; gap: .5rem; font-size: .78rem; padding: .4rem .6rem; background: var(--grade-f-bg); border-left: 3px solid var(--grade-f); border-radius: 0 var(--radius) var(--radius) 0; line-height: 1.4; }
|
|
79
|
+
.flag-icon { flex-shrink: 0; }
|
|
80
|
+
.no-flags { font-size: .78rem; color: var(--win); background: var(--grade-a-bg); padding: .4rem .6rem; border-left: 3px solid var(--win); border-radius: 0 var(--radius) var(--radius) 0; }
|
|
81
|
+
.token-meta-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(130px, 1fr)); gap: .6rem; }
|
|
82
|
+
.token-stat { background: var(--surface2); border-radius: var(--radius); padding: .6rem .75rem; }
|
|
83
|
+
.token-stat-label { font-family: 'Poppins', sans-serif; font-size: .6rem; font-weight: 600; text-transform: uppercase; letter-spacing: .07em; color: var(--text-muted); margin-bottom: .2rem; }
|
|
84
|
+
.token-stat-value { font-family: var(--mono); font-size: .95rem; font-weight: 500; }
|
|
85
|
+
.ab-panel, .token-panel, .history-panel { width: 100%; padding: 1.5rem; overflow-y: auto; display: flex; flex-direction: column; gap: 1.25rem; }
|
|
86
|
+
.section-title { font-family: 'Poppins', sans-serif; font-size: .75rem; font-weight: 600; text-transform: uppercase; letter-spacing: .08em; color: var(--text-muted); margin-bottom: .75rem; }
|
|
87
|
+
.ab-summary-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); gap: .75rem; }
|
|
88
|
+
.ab-stat-card { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: .85rem 1rem; }
|
|
89
|
+
.ab-stat-label { font-family: 'Poppins', sans-serif; font-size: .65rem; font-weight: 600; text-transform: uppercase; letter-spacing: .07em; color: var(--text-muted); margin-bottom: .3rem; }
|
|
90
|
+
.ab-stat-value { font-family: var(--mono); font-size: 1.3rem; font-weight: 500; }
|
|
91
|
+
.ab-stat-value.win { color: var(--win); } .ab-stat-value.loss { color: var(--loss); } .ab-stat-value.neutral { color: var(--text-muted); }
|
|
92
|
+
.ab-winner-banner { display: flex; align-items: center; gap: .6rem; padding: .6rem 1rem; border-radius: var(--radius); font-family: 'Poppins', sans-serif; font-size: .8rem; font-weight: 600; }
|
|
93
|
+
.ab-winner-banner.mcp { background: #e8f2f9; color: var(--mcp); }
|
|
94
|
+
.ab-winner-banner.cli { background: #ede8f9; color: var(--cli); }
|
|
95
|
+
.ab-winner-banner.tie { background: var(--surface2); color: var(--text-muted); }
|
|
96
|
+
.ab-table-wrap { overflow-x: auto; }
|
|
97
|
+
table.ab-table { width: 100%; border-collapse: collapse; font-size: .76rem; }
|
|
98
|
+
.ab-table th { font-family: 'Poppins', sans-serif; font-size: .65rem; font-weight: 600; text-transform: uppercase; letter-spacing: .07em; color: var(--text-muted); padding: .4rem .75rem; text-align: left; background: var(--surface); border-bottom: 1px solid var(--border); white-space: nowrap; }
|
|
99
|
+
.ab-table td { padding: .45rem .75rem; border-bottom: 1px solid var(--border); vertical-align: middle; }
|
|
100
|
+
.ab-table tr:hover td { background: var(--bg); }
|
|
101
|
+
.op-name { font-family: var(--mono); font-size: .72rem; }
|
|
102
|
+
.win-pill { display: inline-block; padding: .15em .5em; border-radius: 9px; font-family: 'Poppins', sans-serif; font-size: .65rem; font-weight: 600; text-transform: uppercase; }
|
|
103
|
+
.win-pill.mcp { background: #d0e8f4; color: var(--mcp); } .win-pill.cli { background: #ddd8f4; color: var(--cli); } .win-pill.tie { background: var(--surface2); color: var(--text-muted); }
|
|
104
|
+
.delta-pos { color: var(--loss); font-family: var(--mono); font-size: .72rem; }
|
|
105
|
+
.delta-neg { color: var(--win); font-family: var(--mono); font-size: .72rem; }
|
|
106
|
+
.delta-zero { color: var(--text-muted); font-family: var(--mono); font-size: .72rem; }
|
|
107
|
+
.bar-chart { display: flex; flex-direction: column; gap: .5rem; }
|
|
108
|
+
.bar-row { display: flex; align-items: center; gap: .75rem; }
|
|
109
|
+
.bar-label { font-family: var(--mono); font-size: .72rem; width: 130px; flex-shrink: 0; text-align: right; }
|
|
110
|
+
.bar-track { flex: 1; height: 16px; background: var(--surface2); border-radius: 3px; overflow: hidden; }
|
|
111
|
+
.bar-fill { height: 100%; border-radius: 3px; transition: width .4s ease; }
|
|
112
|
+
.bar-val { font-family: var(--mono); font-size: .7rem; width: 70px; flex-shrink: 0; }
|
|
113
|
+
.chart-card { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 1rem 1.25rem; }
|
|
114
|
+
.chart-card-title { font-family: 'Poppins', sans-serif; font-size: .8rem; font-weight: 600; margin-bottom: 1rem; }
|
|
115
|
+
.history-list { display: flex; flex-direction: column; gap: .5rem; }
|
|
116
|
+
.history-item { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: .75rem 1rem; display: flex; align-items: center; gap: 1rem; cursor: pointer; transition: border-color .15s; }
|
|
117
|
+
.history-item:hover { border-color: var(--accent); }
|
|
118
|
+
.history-grade-badge { font-family: 'Poppins', sans-serif; font-size: 1.1rem; font-weight: 700; width: 36px; height: 36px; display: flex; align-items: center; justify-content: center; border-radius: 5px; flex-shrink: 0; }
|
|
119
|
+
.history-grade-badge.A { background: var(--grade-a-bg); color: var(--grade-a); } .history-grade-badge.B { background: var(--grade-b-bg); color: var(--grade-b); }
|
|
120
|
+
.history-grade-badge.C { background: var(--grade-c-bg); color: var(--grade-c); } .history-grade-badge.D { background: var(--grade-d-bg); color: var(--grade-d); }
|
|
121
|
+
.history-grade-badge.F { background: var(--grade-f-bg); color: var(--grade-f); }
|
|
122
|
+
.history-item-main { flex: 1; min-width: 0; }
|
|
123
|
+
.history-session-id { font-family: var(--mono); font-size: .7rem; color: var(--text-muted); overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
|
124
|
+
.history-item-meta { display: flex; gap: .6rem; font-size: .7rem; color: var(--text-muted); margin-top: .2rem; flex-wrap: wrap; }
|
|
125
|
+
.history-score-bar { width: 120px; height: 5px; background: var(--surface2); border-radius: 3px; overflow: hidden; flex-shrink: 0; }
|
|
126
|
+
.history-score-fill { height: 100%; border-radius: 3px; }
|
|
127
|
+
canvas.trend { width: 100%; height: 90px; display: block; }
|
|
128
|
+
.empty-state { display: flex; flex-direction: column; align-items: center; justify-content: center; gap: .75rem; padding: 3rem 2rem; color: var(--text-muted); text-align: center; }
|
|
129
|
+
.empty-state .empty-icon { font-size: 2.5rem; }
|
|
130
|
+
.empty-state h3 { font-family: 'Poppins', sans-serif; font-size: .9rem; color: var(--text); }
|
|
131
|
+
.empty-state p { font-size: .78rem; max-width: 380px; line-height: 1.6; }
|
|
132
|
+
.empty-cmd { font-family: var(--mono); background: var(--surface2); padding: .25em .5em; border-radius: 3px; font-size: .74rem; display: inline-block; margin-top: .25rem; }
|
|
133
|
+
.ev-title { font-family: 'Poppins', sans-serif; font-size: .75rem; font-weight: 600; text-transform: uppercase; letter-spacing: .07em; color: var(--text-muted); margin-bottom: .75rem; }
|
|
134
|
+
.ev-row { margin-bottom: .5rem; }
|
|
135
|
+
.ev-dim { font-family: 'Poppins', sans-serif; font-size: .68rem; font-weight: 600; margin-bottom: .2rem; }
|
|
136
|
+
.ev-bullet { font-size: .74rem; padding: .15rem 0 .15rem .75rem; color: var(--text-muted); }
|
|
137
|
+
.ab-note { font-size: .72rem; color: var(--text-muted); background: var(--grade-c-bg); border-left: 3px solid var(--grade-c); padding: .5rem .75rem; border-radius: 0 4px 4px 0; }
|
|
138
|
+
::-webkit-scrollbar { width: 6px; height: 6px; }
|
|
139
|
+
::-webkit-scrollbar-track { background: transparent; }
|
|
140
|
+
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
|
|
141
|
+
::-webkit-scrollbar-thumb:hover { background: var(--text-muted); }
|
|
142
|
+
</style>
|
|
143
|
+
</head>
|
|
144
|
+
<body>
|
|
145
|
+
|
|
146
|
+
<div class="header">
|
|
147
|
+
<div class="header-left">
|
|
148
|
+
<span class="header-logo">CLEO</span>
|
|
149
|
+
<div>
|
|
150
|
+
<div class="header-title" id="hdr-title">Grade Review</div>
|
|
151
|
+
<div class="header-subtitle" id="hdr-sub"></div>
|
|
152
|
+
</div>
|
|
153
|
+
</div>
|
|
154
|
+
<div class="header-meta" id="hdr-meta"></div>
|
|
155
|
+
</div>
|
|
156
|
+
|
|
157
|
+
<div class="tabs-bar">
|
|
158
|
+
<button class="tab-btn active" data-tab="grades">Grades <span class="tab-badge" id="tb-grades">0</span></button>
|
|
159
|
+
<button class="tab-btn" data-tab="ab">A/B Comparison <span class="tab-badge" id="tb-ab">0</span></button>
|
|
160
|
+
<button class="tab-btn" data-tab="tokens">Token Analysis</button>
|
|
161
|
+
<button class="tab-btn" data-tab="history">History <span class="tab-badge" id="tb-history">0</span></button>
|
|
162
|
+
</div>
|
|
163
|
+
|
|
164
|
+
<div class="main">
|
|
165
|
+
<div class="tab-panel active" id="panel-grades">
|
|
166
|
+
<div class="sidebar" id="grade-sidebar"></div>
|
|
167
|
+
<div class="content" id="grade-content"></div>
|
|
168
|
+
</div>
|
|
169
|
+
<div class="tab-panel" id="panel-ab">
|
|
170
|
+
<div class="ab-panel" id="ab-content"></div>
|
|
171
|
+
</div>
|
|
172
|
+
<div class="tab-panel" id="panel-tokens">
|
|
173
|
+
<div class="token-panel" id="token-content"></div>
|
|
174
|
+
</div>
|
|
175
|
+
<div class="tab-panel" id="panel-history">
|
|
176
|
+
<div class="history-panel" id="history-content"></div>
|
|
177
|
+
</div>
|
|
178
|
+
</div>
|
|
179
|
+
|
|
180
|
+
<script>
|
|
181
|
+
/*__EMBEDDED_GRADE_DATA__*/
|
|
182
|
+
|
|
183
|
+
// ── Helpers ────────────────────────────────────────────────────────────────
|
|
184
|
+
|
|
185
|
+
function scoreLetter(s) {
|
|
186
|
+
if (s == null) return '?';
|
|
187
|
+
if (s >= 90) return 'A'; if (s >= 75) return 'B';
|
|
188
|
+
if (s >= 60) return 'C'; if (s >= 45) return 'D'; return 'F';
|
|
189
|
+
}
|
|
190
|
+
function scoreColor(s) {
|
|
191
|
+
if (s >= 90) return 'var(--grade-a)'; if (s >= 75) return 'var(--grade-b)';
|
|
192
|
+
if (s >= 60) return 'var(--grade-c)'; if (s >= 45) return 'var(--grade-d)';
|
|
193
|
+
return 'var(--grade-f)';
|
|
194
|
+
}
|
|
195
|
+
const DIM_COLORS = ['var(--dim1)','var(--dim2)','var(--dim3)','var(--dim4)','var(--dim5)'];
|
|
196
|
+
const DIM_NAMES = ['Session Discipline','Discovery Efficiency','Task Hygiene','Error Protocol','Progressive Disclosure'];
|
|
197
|
+
const DIM_KEYS = ['sessionDiscipline','discoveryEfficiency','taskHygiene','errorProtocol','disclosureUse'];
|
|
198
|
+
|
|
199
|
+
function fmtNum(n, fallback) {
|
|
200
|
+
if (n == null) return fallback !== undefined ? fallback : '\u2014';
|
|
201
|
+
return typeof n === 'number' && !Number.isInteger(n) ? n.toFixed(1) : String(n);
|
|
202
|
+
}
|
|
203
|
+
function fmtDate(s) {
|
|
204
|
+
if (!s) return '';
|
|
205
|
+
try { return new Date(s).toLocaleString(undefined, { dateStyle: 'short', timeStyle: 'short' }); } catch { return s; }
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Safe element builder — never uses innerHTML with data
|
|
209
|
+
function mk(tag, cls, text) {
|
|
210
|
+
var e = document.createElement(tag);
|
|
211
|
+
if (cls) e.className = cls;
|
|
212
|
+
if (text != null) e.textContent = String(text);
|
|
213
|
+
return e;
|
|
214
|
+
}
|
|
215
|
+
function mks(tag, css, text) { // inline style variant
|
|
216
|
+
var e = document.createElement(tag);
|
|
217
|
+
if (css) e.style.cssText = css;
|
|
218
|
+
if (text != null) e.textContent = String(text);
|
|
219
|
+
return e;
|
|
220
|
+
}
|
|
221
|
+
function app(parent) {
|
|
222
|
+
for (var i = 1; i < arguments.length; i++) {
|
|
223
|
+
if (arguments[i] != null) parent.appendChild(arguments[i]);
|
|
224
|
+
}
|
|
225
|
+
return parent;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// ── Tab switching ──────────────────────────────────────────────────────────
|
|
229
|
+
document.querySelectorAll('.tab-btn').forEach(function(btn) {
|
|
230
|
+
btn.addEventListener('click', function() {
|
|
231
|
+
document.querySelectorAll('.tab-btn').forEach(function(b) { b.classList.remove('active'); });
|
|
232
|
+
document.querySelectorAll('.tab-panel').forEach(function(p) { p.classList.remove('active'); });
|
|
233
|
+
btn.classList.add('active');
|
|
234
|
+
document.getElementById('panel-' + btn.dataset.tab).classList.add('active');
|
|
235
|
+
});
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
// ── Empty state ────────────────────────────────────────────────────────────
|
|
239
|
+
function emptyState(icon, title, desc, cmd) {
|
|
240
|
+
var d = mk('div', 'empty-state');
|
|
241
|
+
app(d, mks('div', 'font-size:2.5rem;', icon), mk('h3', null, title), mk('p', null, desc));
|
|
242
|
+
if (cmd) app(d, mk('span', 'empty-cmd', cmd));
|
|
243
|
+
return d;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// ── Grades Tab ─────────────────────────────────────────────────────────────
|
|
247
|
+
function renderGradesTab(grades) {
|
|
248
|
+
var sidebar = document.getElementById('grade-sidebar');
|
|
249
|
+
var content = document.getElementById('grade-content');
|
|
250
|
+
sidebar.textContent = '';
|
|
251
|
+
content.textContent = '';
|
|
252
|
+
if (!grades || !grades.length) {
|
|
253
|
+
content.appendChild(emptyState('📊', 'No grade results yet',
|
|
254
|
+
'Run a scenario to generate grades:', 'python scripts/run_scenario.py --scenario S1 --cleo cleo-dev'));
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
document.getElementById('tb-grades').textContent = grades.length;
|
|
258
|
+
app(sidebar, mk('div', 'sidebar-header', 'Sessions'));
|
|
259
|
+
grades.forEach(function(g, i) {
|
|
260
|
+
var score = g.totalScore != null ? g.totalScore : g.score;
|
|
261
|
+
var letter = scoreLetter(score);
|
|
262
|
+
var sid = (g.sessionId || '').slice(0, 22) || ('Run ' + (i + 1));
|
|
263
|
+
var item = mk('div', 'sidebar-item' + (i === 0 ? ' active' : ''));
|
|
264
|
+
var ls = mks('span', 'font-family:Poppins,sans-serif;font-weight:700;font-size:.85rem;color:' + scoreColor(score) + ';flex-shrink:0;width:18px;', letter);
|
|
265
|
+
var lab = mk('span', 'item-label', sid);
|
|
266
|
+
var sc = mks('span', null, score != null ? (score + '/100') : '\u2014');
|
|
267
|
+
sc.className = 'item-score'; sc.style.color = scoreColor(score);
|
|
268
|
+
app(item, ls, lab, sc);
|
|
269
|
+
item.addEventListener('click', (function(g2, item2) {
|
|
270
|
+
return function() {
|
|
271
|
+
sidebar.querySelectorAll('.sidebar-item').forEach(function(x) { x.classList.remove('active'); });
|
|
272
|
+
item2.classList.add('active');
|
|
273
|
+
renderGradeDetail(g2, content);
|
|
274
|
+
};
|
|
275
|
+
})(g, item));
|
|
276
|
+
sidebar.appendChild(item);
|
|
277
|
+
});
|
|
278
|
+
renderGradeDetail(grades[0], content);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function renderGradeDetail(g, container) {
|
|
282
|
+
container.textContent = '';
|
|
283
|
+
var score = g.totalScore != null ? g.totalScore : (g.score || 0);
|
|
284
|
+
var max = g.maxScore || 100;
|
|
285
|
+
var letter = scoreLetter(score);
|
|
286
|
+
var pct = Math.round((score / max) * 100);
|
|
287
|
+
var dims = g.dimensions || {};
|
|
288
|
+
var flags = g.flags || [];
|
|
289
|
+
var tmeta = g._tokenMeta || null;
|
|
290
|
+
var scenario = g._scenarioId || g.scenario || '';
|
|
291
|
+
|
|
292
|
+
// Score card
|
|
293
|
+
var card = mk('div', 'grade-card');
|
|
294
|
+
var hdr = mk('div', 'grade-card-header');
|
|
295
|
+
var sb = mk('div', 'grade-score-block');
|
|
296
|
+
var badge = mk('div', 'grade-letter-badge ' + letter, letter);
|
|
297
|
+
var sd = mk('div', 'grade-score-details');
|
|
298
|
+
var h2 = mk('h2'); h2.textContent = score + '/' + max; h2.style.color = scoreColor(score);
|
|
299
|
+
var pctEl = mk('div', 'pct', pct + '% \u2014 Grade ' + letter + (scenario ? ' \u2014 ' + scenario : ''));
|
|
300
|
+
app(sd, h2, pctEl);
|
|
301
|
+
if (g.timestamp) app(sd, mks('div', 'font-size:.68rem;color:var(--text-muted);margin-top:.25rem;', fmtDate(g.timestamp)));
|
|
302
|
+
app(sb, badge, sd);
|
|
303
|
+
app(hdr, sb);
|
|
304
|
+
if (g.sessionId) app(hdr, mk('code', 'grade-session-id', g.sessionId));
|
|
305
|
+
card.appendChild(hdr);
|
|
306
|
+
|
|
307
|
+
// Dimension bars
|
|
308
|
+
var dimsWrap = mk('div', 'dimensions');
|
|
309
|
+
DIM_KEYS.forEach(function(key, i) {
|
|
310
|
+
var d = dims[key]; if (!d) return;
|
|
311
|
+
var ds = d.score || 0, dm = d.max || 20, dp = Math.round((ds / dm) * 100);
|
|
312
|
+
var row = mk('div', 'dimension-row');
|
|
313
|
+
var bar = mk('div', 'dimension-bar');
|
|
314
|
+
bar.style.cssText = 'width:' + dp + '%;background:' + DIM_COLORS[i] + ';';
|
|
315
|
+
var bw = mk('div', 'dimension-bar-wrap'); bw.appendChild(bar);
|
|
316
|
+
var sl = mk('span', 'dimension-score-label', ds + '/' + dm);
|
|
317
|
+
sl.style.color = DIM_COLORS[i];
|
|
318
|
+
app(row, mk('span', 'dimension-name', DIM_NAMES[i]), bw, sl);
|
|
319
|
+
if (d.evidence && d.evidence.length) row.title = d.evidence.join(' | ');
|
|
320
|
+
dimsWrap.appendChild(row);
|
|
321
|
+
});
|
|
322
|
+
card.appendChild(dimsWrap);
|
|
323
|
+
container.appendChild(card);
|
|
324
|
+
|
|
325
|
+
// Flags
|
|
326
|
+
var fs = mk('div', 'grade-card flags-section');
|
|
327
|
+
var ft = mk('h3', null, 'Flags' + (flags.length ? ' (' + flags.length + ')' : ''));
|
|
328
|
+
var fl = mk('div', 'flags-list');
|
|
329
|
+
if (!flags.length) {
|
|
330
|
+
fl.appendChild(mk('div', 'no-flags', '\u2713 No flags \u2014 all protocol checks passed'));
|
|
331
|
+
} else {
|
|
332
|
+
flags.forEach(function(f) {
|
|
333
|
+
var item = mk('div', 'flag-item');
|
|
334
|
+
app(item, mks('span', 'flex-shrink:0;', '\u26A0'), mk('span', null, f));
|
|
335
|
+
fl.appendChild(item);
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
app(fs, ft, fl);
|
|
339
|
+
container.appendChild(fs);
|
|
340
|
+
|
|
341
|
+
// Evidence
|
|
342
|
+
var hasEvidence = DIM_KEYS.some(function(k) { return dims[k] && dims[k].evidence && dims[k].evidence.length; });
|
|
343
|
+
if (hasEvidence) {
|
|
344
|
+
var ec = mk('div', 'grade-card');
|
|
345
|
+
ec.appendChild(mk('div', 'ev-title', 'Evidence'));
|
|
346
|
+
DIM_KEYS.forEach(function(key, i) {
|
|
347
|
+
var d = dims[key]; if (!d || !d.evidence || !d.evidence.length) return;
|
|
348
|
+
var row = mk('div', 'ev-row');
|
|
349
|
+
var dimLabel = mk('div', 'ev-dim', DIM_NAMES[i]); dimLabel.style.color = DIM_COLORS[i];
|
|
350
|
+
row.appendChild(dimLabel);
|
|
351
|
+
d.evidence.forEach(function(ev) { row.appendChild(mk('div', 'ev-bullet', '\u2022 ' + ev)); });
|
|
352
|
+
ec.appendChild(row);
|
|
353
|
+
});
|
|
354
|
+
container.appendChild(ec);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// Token metadata
|
|
358
|
+
var tokCard = mk('div', 'grade-card');
|
|
359
|
+
tokCard.appendChild(mk('div', 'ev-title', 'Token Metadata'));
|
|
360
|
+
var grid = mk('div', 'token-meta-grid');
|
|
361
|
+
function addStat(label, value) {
|
|
362
|
+
var s = mk('div', 'token-stat');
|
|
363
|
+
app(s, mk('div', 'token-stat-label', label), mk('div', 'token-stat-value', fmtNum(value, '\u2014')));
|
|
364
|
+
grid.appendChild(s);
|
|
365
|
+
}
|
|
366
|
+
if (tmeta) {
|
|
367
|
+
addStat('Method', tmeta.estimationMethod || tmeta.estimation_method || '\u2014');
|
|
368
|
+
if (tmeta.totalEstimatedTokens) addStat('Est. Tokens', tmeta.totalEstimatedTokens);
|
|
369
|
+
if (tmeta.inputTokens) addStat('Input Tokens', tmeta.inputTokens);
|
|
370
|
+
if (tmeta.outputTokens) addStat('Output Tokens', tmeta.outputTokens);
|
|
371
|
+
if (tmeta.cacheReadTokens) addStat('Cache Read', tmeta.cacheReadTokens);
|
|
372
|
+
} else if (g.entryCount) {
|
|
373
|
+
addStat('Audit Entries', g.entryCount);
|
|
374
|
+
addStat('Est. Tokens', '~' + (g.entryCount * 150));
|
|
375
|
+
addStat('Method', 'entry_count_proxy');
|
|
376
|
+
} else {
|
|
377
|
+
addStat('Note', 'Enable OTEL or use run_scenario.py for token data');
|
|
378
|
+
}
|
|
379
|
+
app(tokCard, grid);
|
|
380
|
+
container.appendChild(tokCard);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// ── A/B Tab ────────────────────────────────────────────────────────────────
|
|
384
|
+
function renderABTab(ab) {
|
|
385
|
+
var container = document.getElementById('ab-content');
|
|
386
|
+
container.textContent = '';
|
|
387
|
+
if (!ab || !ab.total_runs) {
|
|
388
|
+
container.appendChild(emptyState('⚖️', 'No A/B results yet',
|
|
389
|
+
'Run a blind comparison:', 'python scripts/run_ab_test.py --domain tasks --runs 3 --cleo cleo-dev'));
|
|
390
|
+
return;
|
|
391
|
+
}
|
|
392
|
+
document.getElementById('tb-ab').textContent = ab.total_runs || 0;
|
|
393
|
+
var wins = ab.global_wins || {}, wr = ab.global_win_rate || {};
|
|
394
|
+
var delta = ab.avg_token_delta_mcp_minus_cli, winner = ab.overall_winner || 'tie';
|
|
395
|
+
var banner = mk('div', 'ab-winner-banner ' + winner);
|
|
396
|
+
var wlabel = winner === 'mcp' ? '\u26A1 MCP wins overall' : winner === 'cli' ? '\u26A1 CLI wins overall' : '\u2194 Overall tie';
|
|
397
|
+
banner.appendChild(mk('span', null, wlabel));
|
|
398
|
+
if (delta != null) {
|
|
399
|
+
var sign = delta > 0 ? '+' : '';
|
|
400
|
+
banner.appendChild(mks('span', 'font-size:.7rem;font-weight:400;opacity:.8;',
|
|
401
|
+
' \u2014 MCP uses ' + sign + delta.toFixed(1) + ' tokens/op vs CLI'));
|
|
402
|
+
}
|
|
403
|
+
container.appendChild(banner);
|
|
404
|
+
|
|
405
|
+
var sumTitle = mk('div', 'section-title', 'Summary'); container.appendChild(sumTitle);
|
|
406
|
+
var grid = mk('div', 'ab-summary-grid');
|
|
407
|
+
[
|
|
408
|
+
['Total Runs', ab.total_runs, ''],
|
|
409
|
+
['MCP Wins', wins.mcp || 0, 'win'],
|
|
410
|
+
['CLI Wins', wins.cli || 0, winner === 'cli' ? 'win' : ''],
|
|
411
|
+
['Ties', wins.tie || 0, 'neutral'],
|
|
412
|
+
['MCP Win %', ((wr.mcp || 0) * 100).toFixed(1) + '%', winner === 'mcp' ? 'win' : ''],
|
|
413
|
+
['CLI Win %', ((wr.cli || 0) * 100).toFixed(1) + '%', winner === 'cli' ? 'win' : ''],
|
|
414
|
+
['Token Delta', delta != null ? ((delta > 0 ? '+' : '') + delta.toFixed(1) + 't') : '\u2014',
|
|
415
|
+
delta > 10 ? 'loss' : delta < -10 ? 'win' : 'neutral'],
|
|
416
|
+
].forEach(function(row) {
|
|
417
|
+
var c = mk('div', 'ab-stat-card');
|
|
418
|
+
app(c, mk('div', 'ab-stat-label', row[0]), mk('div', 'ab-stat-value ' + row[2], String(row[1])));
|
|
419
|
+
grid.appendChild(c);
|
|
420
|
+
});
|
|
421
|
+
container.appendChild(grid);
|
|
422
|
+
|
|
423
|
+
var rawPerOp = ab.per_operation;
|
|
424
|
+
var perOp = null;
|
|
425
|
+
if (Array.isArray(rawPerOp)) {
|
|
426
|
+
perOp = {};
|
|
427
|
+
rawPerOp.forEach(function(s) { if (s && s.operation) perOp[s.operation] = s; });
|
|
428
|
+
} else if (rawPerOp && typeof rawPerOp === 'object') {
|
|
429
|
+
perOp = rawPerOp;
|
|
430
|
+
}
|
|
431
|
+
if (perOp && Object.keys(perOp).length) {
|
|
432
|
+
container.appendChild(mks('div', 'font-family:Poppins,sans-serif;font-size:.75rem;font-weight:600;text-transform:uppercase;letter-spacing:.08em;color:var(--text-muted);margin-top:.5rem;', 'Per-Operation Results'));
|
|
433
|
+
var wrap = mk('div', 'ab-table-wrap');
|
|
434
|
+
var tbl = document.createElement('table'); tbl.className = 'ab-table';
|
|
435
|
+
var thead = document.createElement('thead'), hrow = document.createElement('tr');
|
|
436
|
+
['Operation','Winner','MCP Wins','CLI Wins','Ties','Token \u0394','MCP chars','CLI chars','MCP ms','CLI ms'].forEach(function(h) {
|
|
437
|
+
hrow.appendChild(mk('th', null, h));
|
|
438
|
+
});
|
|
439
|
+
thead.appendChild(hrow); tbl.appendChild(thead);
|
|
440
|
+
var tbody = document.createElement('tbody');
|
|
441
|
+
Object.keys(perOp).sort().forEach(function(opKey) {
|
|
442
|
+
var d = perOp[opKey], row = document.createElement('tr');
|
|
443
|
+
var delta2 = d.avg_token_delta != null ? d.avg_token_delta : 0;
|
|
444
|
+
var dcls = delta2 > 10 ? 'delta-pos' : delta2 < -10 ? 'delta-neg' : 'delta-zero';
|
|
445
|
+
var opW = d.winner || (d.mcp_wins > d.cli_wins ? 'mcp' : d.cli_wins > d.mcp_wins ? 'cli' : 'tie');
|
|
446
|
+
var cells = [
|
|
447
|
+
mk('span', 'op-name', opKey),
|
|
448
|
+
mk('span', 'win-pill ' + opW, opW.toUpperCase()),
|
|
449
|
+
document.createTextNode(String(d.mcp_wins || 0)),
|
|
450
|
+
document.createTextNode(String(d.cli_wins || 0)),
|
|
451
|
+
document.createTextNode(String(d.ties || 0)),
|
|
452
|
+
mk('span', dcls, (delta2 > 0 ? '+' : '') + delta2.toFixed(0) + 't'),
|
|
453
|
+
document.createTextNode(fmtNum(d.avg_mcp_chars)),
|
|
454
|
+
document.createTextNode(fmtNum(d.avg_cli_chars)),
|
|
455
|
+
document.createTextNode(fmtNum(d.avg_mcp_ms) + 'ms'),
|
|
456
|
+
document.createTextNode(fmtNum(d.avg_cli_ms) + 'ms'),
|
|
457
|
+
];
|
|
458
|
+
cells.forEach(function(c) { var td = document.createElement('td'); td.appendChild(c); row.appendChild(td); });
|
|
459
|
+
tbody.appendChild(row);
|
|
460
|
+
});
|
|
461
|
+
tbl.appendChild(tbody); wrap.appendChild(tbl); container.appendChild(wrap);
|
|
462
|
+
}
|
|
463
|
+
container.appendChild(mk('div', 'ab-note',
|
|
464
|
+
'\u26A0 CLI-only sessions always score 0 on S5 (Progressive Disclosure) — metadata.gateway is not set by the CLI adapter. MCP earns +10 for query gateway usage automatically.'));
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// ── Token Analysis Tab ─────────────────────────────────────────────────────
|
|
468
|
+
function renderTokenTab(ta) {
|
|
469
|
+
var container = document.getElementById('token-content');
|
|
470
|
+
container.textContent = '';
|
|
471
|
+
if (!ta || !Object.keys(ta).length) {
|
|
472
|
+
container.appendChild(emptyState('\uD83D\uDD22', 'No token data yet',
|
|
473
|
+
'Generate token analysis:', 'python scripts/token_tracker.py --grades-file .cleo/metrics/GRADES.jsonl --breakdown-by domain'));
|
|
474
|
+
return;
|
|
475
|
+
}
|
|
476
|
+
function barChart(title, entries, maxVal, colorFn) {
|
|
477
|
+
var card = mk('div', 'chart-card');
|
|
478
|
+
card.appendChild(mk('div', 'chart-card-title', title));
|
|
479
|
+
var chart = mk('div', 'bar-chart');
|
|
480
|
+
entries.forEach(function(row) {
|
|
481
|
+
var label = row[0], val = row[1], color = colorFn ? colorFn(row) : 'var(--accent)';
|
|
482
|
+
var pct = maxVal > 0 ? Math.min((Math.abs(val) / maxVal) * 90, 90) : 0;
|
|
483
|
+
var r = mk('div', 'bar-row');
|
|
484
|
+
var track = mk('div', 'bar-track');
|
|
485
|
+
var fill = mk('div', 'bar-fill'); fill.style.cssText = 'width:' + pct + '%;background:' + color + ';';
|
|
486
|
+
track.appendChild(fill);
|
|
487
|
+
app(r, mk('span', 'bar-label', label), track, mk('span', 'bar-val', fmtNum(val) + 't'));
|
|
488
|
+
chart.appendChild(r);
|
|
489
|
+
});
|
|
490
|
+
card.appendChild(chart);
|
|
491
|
+
return card;
|
|
492
|
+
}
|
|
493
|
+
if (ta.breakdown_by_domain) {
|
|
494
|
+
var entries = Object.entries(ta.breakdown_by_domain).map(function(kv) { return [kv[0], kv[1].mean || 0]; });
|
|
495
|
+
var maxD = Math.max.apply(null, entries.map(function(e) { return e[1]; }).concat([1]));
|
|
496
|
+
var colors2 = ['var(--dim1)','var(--dim2)','var(--dim3)','var(--dim4)','var(--dim5)','var(--accent)','var(--mcp)','var(--cli)','#9b7ec8','#5a9a6a'];
|
|
497
|
+
container.appendChild(barChart('Estimated Tokens by Domain', entries, maxD, function(row, i) { return colors2[entries.indexOf(row) % colors2.length]; }));
|
|
498
|
+
}
|
|
499
|
+
if (ta.breakdown_by_gateway) {
|
|
500
|
+
var gw = ta.breakdown_by_gateway;
|
|
501
|
+
var gwEntries = [['query (MCP)', gw.mcp_query && gw.mcp_query.mean || 0], ['CLI', gw.cli && gw.cli.mean || 0]];
|
|
502
|
+
var maxGw = Math.max.apply(null, gwEntries.map(function(e) { return e[1]; }).concat([1]));
|
|
503
|
+
var gwColors = ['var(--mcp)', 'var(--cli)'];
|
|
504
|
+
container.appendChild(barChart('MCP vs CLI Token Split', gwEntries, maxGw, function(row) { return gwColors[gwEntries.indexOf(row)]; }));
|
|
505
|
+
if (gw.note) {
|
|
506
|
+
var noteEl = mks('p', 'font-size:.7rem;color:var(--text-muted);margin-top:.5rem;line-height:1.5;', gw.note);
|
|
507
|
+
container.lastChild.appendChild(noteEl);
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
if (ta.ab_aggregation && ta.ab_aggregation.per_operation) {
|
|
511
|
+
var opEntries = Object.entries(ta.ab_aggregation.per_operation).map(function(kv) { return [kv[0], kv[1].avg_delta || 0]; });
|
|
512
|
+
opEntries.sort(function(a,b) { return b[1] - a[1]; });
|
|
513
|
+
var maxOp = Math.max.apply(null, opEntries.map(function(e) { return Math.abs(e[1]); }).concat([1]));
|
|
514
|
+
container.appendChild(barChart('Token Delta per Op (MCP \u2212 CLI)', opEntries, maxOp, function(row) {
|
|
515
|
+
return row[1] > 0 ? 'var(--grade-d)' : 'var(--win)';
|
|
516
|
+
}));
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// ── History Tab ────────────────────────────────────────────────────────────
|
|
521
|
+
function renderHistoryTab(grades) {
|
|
522
|
+
var container = document.getElementById('history-content');
|
|
523
|
+
container.textContent = '';
|
|
524
|
+
if (!grades || !grades.length) {
|
|
525
|
+
container.appendChild(emptyState('\uD83D\uDCC8', 'No history yet', 'Run graded sessions to build history.', ''));
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
528
|
+
document.getElementById('tb-history').textContent = grades.length;
|
|
529
|
+
if (grades.length > 1) {
|
|
530
|
+
var sorted = grades.slice().sort(function(a,b) { return new Date(a.timestamp) - new Date(b.timestamp); });
|
|
531
|
+
var scores = sorted.map(function(g) { return g.totalScore != null ? g.totalScore : (g.score || 0); });
|
|
532
|
+
var trendCard = mk('div', 'chart-card');
|
|
533
|
+
trendCard.appendChild(mk('div', 'chart-card-title', 'Score Trend'));
|
|
534
|
+
var canvas = document.createElement('canvas');
|
|
535
|
+
canvas.className = 'trend'; canvas.width = 800; canvas.height = 90;
|
|
536
|
+
trendCard.appendChild(canvas);
|
|
537
|
+
container.appendChild(trendCard);
|
|
538
|
+
requestAnimationFrame(function() { drawSparkline(canvas, scores); });
|
|
539
|
+
}
|
|
540
|
+
var desc = grades.slice().sort(function(a,b) { return new Date(b.timestamp) - new Date(a.timestamp); });
|
|
541
|
+
container.appendChild(mk('div', 'section-title', 'All Sessions'));
|
|
542
|
+
var list = mk('div', 'history-list');
|
|
543
|
+
desc.forEach(function(g, idx) {
|
|
544
|
+
var score = g.totalScore != null ? g.totalScore : (g.score || 0);
|
|
545
|
+
var letter = scoreLetter(score);
|
|
546
|
+
var pct = Math.round((score / (g.maxScore || 100)) * 100);
|
|
547
|
+
var flags = (g.flags || []).length;
|
|
548
|
+
var item = mk('div', 'history-item');
|
|
549
|
+
var badge = mk('div', 'history-grade-badge ' + letter, letter);
|
|
550
|
+
var main = mk('div', 'history-item-main');
|
|
551
|
+
var sid = mk('div', 'history-session-id', g.sessionId || '(no session id)');
|
|
552
|
+
var meta = mk('div', 'history-item-meta');
|
|
553
|
+
app(meta, mks('span', null, fmtDate(g.timestamp)), mks('span', null, score + '/100'));
|
|
554
|
+
if (flags > 0) app(meta, mks('span', 'color:var(--loss);', flags + ' flag' + (flags !== 1 ? 's' : '')));
|
|
555
|
+
if (g._scenarioId || g.scenario) app(meta, mks('span', 'color:var(--text-muted);', g._scenarioId || g.scenario));
|
|
556
|
+
app(main, sid, meta);
|
|
557
|
+
var barWrap = mk('div', 'history-score-bar');
|
|
558
|
+
var fill = mk('div', 'history-score-fill');
|
|
559
|
+
fill.style.cssText = 'width:' + pct + '%;background:' + scoreColor(score) + ';';
|
|
560
|
+
barWrap.appendChild(fill);
|
|
561
|
+
app(item, badge, main, barWrap);
|
|
562
|
+
item.addEventListener('click', (function(g2) {
|
|
563
|
+
return function() {
|
|
564
|
+
document.querySelector('[data-tab="grades"]').click();
|
|
565
|
+
var items = document.querySelectorAll('#grade-sidebar .sidebar-item');
|
|
566
|
+
var allGrades = EMBEDDED_GRADE_DATA.grades || [];
|
|
567
|
+
for (var j = 0; j < allGrades.length; j++) {
|
|
568
|
+
if (allGrades[j].sessionId === g2.sessionId && items[j]) { items[j].click(); break; }
|
|
569
|
+
}
|
|
570
|
+
};
|
|
571
|
+
})(g));
|
|
572
|
+
list.appendChild(item);
|
|
573
|
+
});
|
|
574
|
+
container.appendChild(list);
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
function drawSparkline(canvas, scores) {
|
|
578
|
+
var W = canvas.clientWidth || 600, H = canvas.clientHeight || 90;
|
|
579
|
+
canvas.width = W; canvas.height = H;
|
|
580
|
+
var ctx = canvas.getContext('2d');
|
|
581
|
+
var pad = 10, uW = W - pad*2, uH = H - pad*2;
|
|
582
|
+
var toX = function(i) { return pad + (i / (scores.length - 1)) * uW; };
|
|
583
|
+
var toY = function(s) { return pad + (1 - s / 100) * uH; };
|
|
584
|
+
var grad = ctx.createLinearGradient(0, pad, 0, H - pad);
|
|
585
|
+
grad.addColorStop(0, 'rgba(73,143,165,.3)'); grad.addColorStop(1, 'rgba(73,143,165,.02)');
|
|
586
|
+
ctx.beginPath();
|
|
587
|
+
scores.forEach(function(s,i) { i===0 ? ctx.moveTo(toX(i),toY(s)) : ctx.lineTo(toX(i),toY(s)); });
|
|
588
|
+
ctx.lineTo(toX(scores.length-1), H-pad); ctx.lineTo(toX(0), H-pad); ctx.closePath();
|
|
589
|
+
ctx.fillStyle = grad; ctx.fill();
|
|
590
|
+
ctx.beginPath();
|
|
591
|
+
scores.forEach(function(s,i) { i===0 ? ctx.moveTo(toX(i),toY(s)) : ctx.lineTo(toX(i),toY(s)); });
|
|
592
|
+
ctx.strokeStyle = '#4a8fa5'; ctx.lineWidth = 2; ctx.lineJoin = 'round'; ctx.stroke();
|
|
593
|
+
[90,75,60,45].forEach(function(t) {
|
|
594
|
+
ctx.beginPath(); ctx.moveTo(pad,toY(t)); ctx.lineTo(W-pad,toY(t));
|
|
595
|
+
ctx.strokeStyle = 'rgba(0,0,0,.07)'; ctx.lineWidth = 1; ctx.setLineDash([3,4]); ctx.stroke(); ctx.setLineDash([]);
|
|
596
|
+
});
|
|
597
|
+
scores.forEach(function(s,i) {
|
|
598
|
+
ctx.beginPath(); ctx.arc(toX(i),toY(s),3.5,0,Math.PI*2); ctx.fillStyle = scoreColor(s); ctx.fill();
|
|
599
|
+
});
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// ── Init ───────────────────────────────────────────────────────────────────
|
|
603
|
+
(function() {
|
|
604
|
+
var data = typeof EMBEDDED_GRADE_DATA !== 'undefined' ? EMBEDDED_GRADE_DATA : {};
|
|
605
|
+
document.getElementById('hdr-title').textContent = data.title || 'ct-grade \u2014 Review';
|
|
606
|
+
if (data.subtitle) document.getElementById('hdr-sub').textContent = data.subtitle;
|
|
607
|
+
if (data.generated_at) {
|
|
608
|
+
var m = document.getElementById('hdr-meta');
|
|
609
|
+
m.appendChild(document.createTextNode('Generated ' + fmtDate(data.generated_at)));
|
|
610
|
+
m.appendChild(document.createElement('br'));
|
|
611
|
+
m.appendChild(document.createTextNode((data.grades || []).length + ' grade(s) \u00b7 ' + ((data.ab_results && data.ab_results.total_runs) || 0) + ' A/B run(s)'));
|
|
612
|
+
}
|
|
613
|
+
renderGradesTab(data.grades || []);
|
|
614
|
+
renderABTab(data.ab_results || {});
|
|
615
|
+
renderTokenTab(data.token_analysis || {});
|
|
616
|
+
renderHistoryTab(data.grades || []);
|
|
617
|
+
})();
|
|
618
|
+
</script>
|
|
619
|
+
</body>
|
|
620
|
+
</html>
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_comment": "CLEO-only metadata — merge into packages/ct-skills/skills/manifest.json",
|
|
3
|
+
"name": "ct-grade",
|
|
4
|
+
"version": "2.1.0",
|
|
5
|
+
"tier": 2,
|
|
6
|
+
"token_budget": 16000,
|
|
7
|
+
"capabilities": {
|
|
8
|
+
"inputs": ["SESSION_ID", "SCENARIO_ID", "DOMAIN", "OPERATION_LIST"],
|
|
9
|
+
"outputs": ["grade-result", "ab-comparison", "token-report", "analysis-report"],
|
|
10
|
+
"dispatch_triggers": ["grade", "ab-test", "blind-test", "token-efficiency", "scenario"],
|
|
11
|
+
"compatible_subagent_types": ["general-purpose", "Explore"],
|
|
12
|
+
"agents": ["scenario-runner", "blind-comparator", "analysis-reporter"],
|
|
13
|
+
"chains_to": ["ct-cleo", "ct-research-agent"],
|
|
14
|
+
"dispatch_keywords": {
|
|
15
|
+
"primary": ["grade", "ab-test", "blind-test", "token-efficiency"],
|
|
16
|
+
"secondary": ["mcp-cli-comparison", "behavioral-compliance", "session-quality", "playbook"]
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"constraints": {
|
|
20
|
+
"max_context_tokens": 80000,
|
|
21
|
+
"requires_session": false,
|
|
22
|
+
"requires_epic": false
|
|
23
|
+
},
|
|
24
|
+
"references": [
|
|
25
|
+
"skills/ct-grade-v2-1/references/grade-spec-v2.md",
|
|
26
|
+
"skills/ct-grade-v2-1/references/playbook-v2.md",
|
|
27
|
+
"skills/ct-grade-v2-1/references/ab-testing.md",
|
|
28
|
+
"skills/ct-grade-v2-1/references/token-tracking.md",
|
|
29
|
+
"skills/ct-grade-v2-1/references/domains-ssot.md"
|
|
30
|
+
]
|
|
31
|
+
}
|