@cleocode/skills 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dispatch-config.json +404 -0
- package/index.d.ts +178 -0
- package/index.js +405 -0
- package/package.json +14 -0
- package/profiles/core.json +7 -0
- package/profiles/full.json +10 -0
- package/profiles/minimal.json +7 -0
- package/profiles/recommended.json +7 -0
- package/provider-skills-map.json +97 -0
- package/skills/_shared/cleo-style-guide.md +84 -0
- package/skills/_shared/manifest-operations.md +810 -0
- package/skills/_shared/placeholders.json +433 -0
- package/skills/_shared/skill-chaining-patterns.md +237 -0
- package/skills/_shared/subagent-protocol-base.md +223 -0
- package/skills/_shared/task-system-integration.md +232 -0
- package/skills/_shared/testing-framework-config.md +110 -0
- package/skills/ct-cleo/SKILL.md +490 -0
- package/skills/ct-cleo/references/anti-patterns.md +19 -0
- package/skills/ct-cleo/references/loom-lifecycle.md +136 -0
- package/skills/ct-cleo/references/orchestrator-constraints.md +55 -0
- package/skills/ct-cleo/references/session-protocol.md +162 -0
- package/skills/ct-codebase-mapper/SKILL.md +82 -0
- package/skills/ct-contribution/SKILL.md +521 -0
- package/skills/ct-contribution/templates/contribution-init.json +21 -0
- package/skills/ct-dev-workflow/SKILL.md +423 -0
- package/skills/ct-docs-lookup/SKILL.md +66 -0
- package/skills/ct-docs-review/SKILL.md +175 -0
- package/skills/ct-docs-write/SKILL.md +108 -0
- package/skills/ct-documentor/SKILL.md +231 -0
- package/skills/ct-epic-architect/SKILL.md +305 -0
- package/skills/ct-epic-architect/references/bug-epic-example.md +172 -0
- package/skills/ct-epic-architect/references/commands.md +201 -0
- package/skills/ct-epic-architect/references/feature-epic-example.md +210 -0
- package/skills/ct-epic-architect/references/migration-epic-example.md +244 -0
- package/skills/ct-epic-architect/references/output-format.md +92 -0
- package/skills/ct-epic-architect/references/patterns.md +284 -0
- package/skills/ct-epic-architect/references/refactor-epic-example.md +412 -0
- package/skills/ct-epic-architect/references/research-epic-example.md +226 -0
- package/skills/ct-epic-architect/references/shell-escaping.md +86 -0
- package/skills/ct-epic-architect/references/skill-aware-execution.md +195 -0
- package/skills/ct-grade/SKILL.md +230 -0
- package/skills/ct-grade/agents/analysis-reporter.md +203 -0
- package/skills/ct-grade/agents/blind-comparator.md +157 -0
- package/skills/ct-grade/agents/scenario-runner.md +134 -0
- package/skills/ct-grade/eval-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
- package/skills/ct-grade/eval-viewer/generate_grade_review.py +1138 -0
- package/skills/ct-grade/eval-viewer/generate_grade_viewer.py +544 -0
- package/skills/ct-grade/eval-viewer/generate_review.py +283 -0
- package/skills/ct-grade/eval-viewer/grade-review.html +1574 -0
- package/skills/ct-grade/eval-viewer/viewer.html +219 -0
- package/skills/ct-grade/evals/evals.json +94 -0
- package/skills/ct-grade/references/ab-test-methodology.md +150 -0
- package/skills/ct-grade/references/domains.md +137 -0
- package/skills/ct-grade/references/grade-spec.md +236 -0
- package/skills/ct-grade/references/scenario-playbook.md +234 -0
- package/skills/ct-grade/references/token-tracking.md +120 -0
- package/skills/ct-grade/scripts/__pycache__/audit_analyzer.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/__pycache__/run_ab_test.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/__pycache__/run_all.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/__pycache__/token_tracker.cpython-314.pyc +0 -0
- package/skills/ct-grade/scripts/audit_analyzer.py +279 -0
- package/skills/ct-grade/scripts/generate_report.py +283 -0
- package/skills/ct-grade/scripts/run_ab_test.py +504 -0
- package/skills/ct-grade/scripts/run_all.py +287 -0
- package/skills/ct-grade/scripts/setup_run.py +183 -0
- package/skills/ct-grade/scripts/token_tracker.py +630 -0
- package/skills/ct-grade-v2-1/SKILL.md +237 -0
- package/skills/ct-grade-v2-1/agents/analysis-reporter.md +203 -0
- package/skills/ct-grade-v2-1/agents/blind-comparator.md +157 -0
- package/skills/ct-grade-v2-1/agents/scenario-runner.md +179 -0
- package/skills/ct-grade-v2-1/evals/evals.json +74 -0
- package/skills/ct-grade-v2-1/grade-viewer/__pycache__/build_op_stats.cpython-314.pyc +0 -0
- package/skills/ct-grade-v2-1/grade-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
- package/skills/ct-grade-v2-1/grade-viewer/build_op_stats.py +174 -0
- package/skills/ct-grade-v2-1/grade-viewer/eval-analysis.json +41 -0
- package/skills/ct-grade-v2-1/grade-viewer/eval-report.md +34 -0
- package/skills/ct-grade-v2-1/grade-viewer/generate_grade_review.py +1023 -0
- package/skills/ct-grade-v2-1/grade-viewer/generate_grade_viewer.py +548 -0
- package/skills/ct-grade-v2-1/grade-viewer/grade-review-eval.html +613 -0
- package/skills/ct-grade-v2-1/grade-viewer/grade-review.html +1532 -0
- package/skills/ct-grade-v2-1/grade-viewer/viewer.html +620 -0
- package/skills/ct-grade-v2-1/manifest-entry.json +31 -0
- package/skills/ct-grade-v2-1/references/ab-testing.md +233 -0
- package/skills/ct-grade-v2-1/references/domains-ssot.md +156 -0
- package/skills/ct-grade-v2-1/references/grade-spec-v2.md +167 -0
- package/skills/ct-grade-v2-1/references/playbook-v2.md +393 -0
- package/skills/ct-grade-v2-1/references/token-tracking.md +202 -0
- package/skills/ct-grade-v2-1/scripts/generate_report.py +419 -0
- package/skills/ct-grade-v2-1/scripts/run_ab_test.py +493 -0
- package/skills/ct-grade-v2-1/scripts/run_scenario.py +396 -0
- package/skills/ct-grade-v2-1/scripts/setup_run.py +207 -0
- package/skills/ct-grade-v2-1/scripts/token_tracker.py +175 -0
- package/skills/ct-memory/SKILL.md +84 -0
- package/skills/ct-orchestrator/INSTALL.md +61 -0
- package/skills/ct-orchestrator/README.md +69 -0
- package/skills/ct-orchestrator/SKILL.md +380 -0
- package/skills/ct-orchestrator/manifest-entry.json +19 -0
- package/skills/ct-orchestrator/orchestrator-prompt.txt +17 -0
- package/skills/ct-orchestrator/references/SUBAGENT-PROTOCOL-BLOCK.md +66 -0
- package/skills/ct-orchestrator/references/autonomous-operation.md +167 -0
- package/skills/ct-orchestrator/references/lifecycle-gates.md +98 -0
- package/skills/ct-orchestrator/references/orchestrator-compliance.md +271 -0
- package/skills/ct-orchestrator/references/orchestrator-handoffs.md +85 -0
- package/skills/ct-orchestrator/references/orchestrator-patterns.md +164 -0
- package/skills/ct-orchestrator/references/orchestrator-recovery.md +113 -0
- package/skills/ct-orchestrator/references/orchestrator-spawning.md +271 -0
- package/skills/ct-orchestrator/references/orchestrator-tokens.md +180 -0
- package/skills/ct-research-agent/SKILL.md +226 -0
- package/skills/ct-skill-creator/.cleo/.context-state.json +13 -0
- package/skills/ct-skill-creator/.cleo/logs/cleo.2026-03-07.1.log +24 -0
- package/skills/ct-skill-creator/.cleo/tasks.db +0 -0
- package/skills/ct-skill-creator/SKILL.md +356 -0
- package/skills/ct-skill-creator/agents/analyzer.md +276 -0
- package/skills/ct-skill-creator/agents/comparator.md +204 -0
- package/skills/ct-skill-creator/agents/grader.md +225 -0
- package/skills/ct-skill-creator/assets/eval_review.html +146 -0
- package/skills/ct-skill-creator/eval-viewer/__pycache__/generate_review.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/eval-viewer/generate_review.py +471 -0
- package/skills/ct-skill-creator/eval-viewer/viewer.html +1325 -0
- package/skills/ct-skill-creator/manifest-entry.json +17 -0
- package/skills/ct-skill-creator/references/dynamic-context.md +228 -0
- package/skills/ct-skill-creator/references/frontmatter.md +83 -0
- package/skills/ct-skill-creator/references/invocation-control.md +165 -0
- package/skills/ct-skill-creator/references/output-patterns.md +86 -0
- package/skills/ct-skill-creator/references/provider-deployment.md +175 -0
- package/skills/ct-skill-creator/references/schemas.md +430 -0
- package/skills/ct-skill-creator/references/workflows.md +28 -0
- package/skills/ct-skill-creator/scripts/__init__.py +1 -0
- package/skills/ct-skill-creator/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/generate_report.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/improve_description.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/init_skill.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/run_eval.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/run_loop.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/__pycache__/utils.cpython-314.pyc +0 -0
- package/skills/ct-skill-creator/scripts/aggregate_benchmark.py +401 -0
- package/skills/ct-skill-creator/scripts/generate_report.py +326 -0
- package/skills/ct-skill-creator/scripts/improve_description.py +247 -0
- package/skills/ct-skill-creator/scripts/init_skill.py +306 -0
- package/skills/ct-skill-creator/scripts/package_skill.py +110 -0
- package/skills/ct-skill-creator/scripts/quick_validate.py +97 -0
- package/skills/ct-skill-creator/scripts/run_eval.py +310 -0
- package/skills/ct-skill-creator/scripts/run_loop.py +328 -0
- package/skills/ct-skill-creator/scripts/utils.py +47 -0
- package/skills/ct-skill-validator/SKILL.md +178 -0
- package/skills/ct-skill-validator/agents/ecosystem-checker.md +151 -0
- package/skills/ct-skill-validator/assets/valid-skill-example.md +13 -0
- package/skills/ct-skill-validator/evals/eval_set.json +14 -0
- package/skills/ct-skill-validator/evals/evals.json +52 -0
- package/skills/ct-skill-validator/manifest-entry.json +20 -0
- package/skills/ct-skill-validator/references/cleo-ecosystem-rules.md +163 -0
- package/skills/ct-skill-validator/references/validation-rules.md +168 -0
- package/skills/ct-skill-validator/scripts/__init__.py +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/audit_body.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/check_ecosystem.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/generate_validation_report.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/__pycache__/validate.cpython-314.pyc +0 -0
- package/skills/ct-skill-validator/scripts/audit_body.py +242 -0
- package/skills/ct-skill-validator/scripts/check_ecosystem.py +169 -0
- package/skills/ct-skill-validator/scripts/check_manifest.py +172 -0
- package/skills/ct-skill-validator/scripts/generate_validation_report.py +442 -0
- package/skills/ct-skill-validator/scripts/validate.py +422 -0
- package/skills/ct-spec-writer/SKILL.md +189 -0
- package/skills/ct-stickynote/README.md +14 -0
- package/skills/ct-stickynote/SKILL.md +46 -0
- package/skills/ct-task-executor/SKILL.md +296 -0
- package/skills/ct-validator/SKILL.md +216 -0
- package/skills/manifest.json +469 -0
- package/skills.json +281 -0
|
@@ -0,0 +1,613 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>CLEO Grade Review</title>
|
|
7
|
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
|
8
|
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
9
|
+
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@500;600;700&family=Lora:wght@400;500&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
|
|
10
|
+
<style>
|
|
11
|
+
:root {
|
|
12
|
+
--bg: #faf9f5;
|
|
13
|
+
--surface: #ffffff;
|
|
14
|
+
--surface2: #f4f2ec;
|
|
15
|
+
--border: #e8e6dc;
|
|
16
|
+
--text: #141413;
|
|
17
|
+
--text-muted: #8a8880;
|
|
18
|
+
--accent: #d97757;
|
|
19
|
+
--header-bg: #141413;
|
|
20
|
+
--header-text:#faf9f5;
|
|
21
|
+
--radius: 6px;
|
|
22
|
+
--mono: 'JetBrains Mono', 'Fira Code', monospace;
|
|
23
|
+
--grade-a: #4a8c5c; --grade-a-bg: #eaf4ee;
|
|
24
|
+
--grade-b: #3a7fa8; --grade-b-bg: #e8f2f9;
|
|
25
|
+
--grade-c: #b07d2a; --grade-c-bg: #fdf4e3;
|
|
26
|
+
--grade-d: #c45d30; --grade-d-bg: #fceee8;
|
|
27
|
+
--grade-f: #c03030; --grade-f-bg: #fce8e8;
|
|
28
|
+
--dim1: #7b68c8; --dim2: #4a8fa5; --dim3: #4a8c5c;
|
|
29
|
+
--dim4: #c4923a; --dim5: #d97757;
|
|
30
|
+
--mcp: #4a8fa5; --cli: #7b68c8; --tie: #8a8880;
|
|
31
|
+
--win: #4a8c5c; --loss: #c03030;
|
|
32
|
+
}
|
|
33
|
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
34
|
+
body { font-family: 'Lora', Georgia, serif; background: var(--bg); color: var(--text); height: 100vh; display: flex; flex-direction: column; font-size: 14px; }
|
|
35
|
+
.header { background: var(--header-bg); color: var(--header-text); padding: .75rem 1.5rem; display: flex; justify-content: space-between; align-items: center; flex-shrink: 0; gap: 1rem; }
|
|
36
|
+
.header-left { display: flex; align-items: center; gap: .75rem; }
|
|
37
|
+
.header-logo { font-family: 'Poppins', sans-serif; font-size: .65rem; font-weight: 700; letter-spacing: .12em; text-transform: uppercase; background: var(--accent); color: #fff; padding: .2em .5em; border-radius: 3px; }
|
|
38
|
+
.header-title { font-family: 'Poppins', sans-serif; font-size: 1rem; font-weight: 600; color: #fff; }
|
|
39
|
+
.header-subtitle { font-size: .75rem; color: #888; }
|
|
40
|
+
.header-meta { font-size: .7rem; color: #666; text-align: right; line-height: 1.6; }
|
|
41
|
+
.tabs-bar { background: var(--surface); border-bottom: 1px solid var(--border); display: flex; padding: 0 1.5rem; gap: .25rem; flex-shrink: 0; }
|
|
42
|
+
.tab-btn { font-family: 'Poppins', sans-serif; font-size: .75rem; font-weight: 500; padding: .6rem .9rem; border: none; background: transparent; color: var(--text-muted); cursor: pointer; border-bottom: 2px solid transparent; transition: color .15s, border-color .15s; letter-spacing: .02em; }
|
|
43
|
+
.tab-btn:hover { color: var(--text); }
|
|
44
|
+
.tab-btn.active { color: var(--accent); border-bottom-color: var(--accent); }
|
|
45
|
+
.tab-badge { display: inline-block; background: var(--surface2); border-radius: 9px; font-size: .65rem; padding: .1em .45em; margin-left: .3em; color: var(--text-muted); }
|
|
46
|
+
.tab-btn.active .tab-badge { background: #fde8df; color: var(--accent); }
|
|
47
|
+
.main { display: flex; flex: 1; overflow: hidden; }
|
|
48
|
+
.tab-panel { display: none; width: 100%; overflow: hidden; }
|
|
49
|
+
.tab-panel.active { display: flex; }
|
|
50
|
+
.sidebar { width: 240px; min-width: 180px; border-right: 1px solid var(--border); background: var(--surface); display: flex; flex-direction: column; flex-shrink: 0; overflow-y: auto; }
|
|
51
|
+
.sidebar-header { font-family: 'Poppins', sans-serif; font-size: .65rem; font-weight: 600; letter-spacing: .08em; text-transform: uppercase; color: var(--text-muted); padding: .75rem 1rem .4rem; }
|
|
52
|
+
.sidebar-item { padding: .55rem 1rem; cursor: pointer; display: flex; align-items: center; gap: .5rem; border-left: 3px solid transparent; transition: background .1s; font-size: .8rem; }
|
|
53
|
+
.sidebar-item:hover { background: var(--bg); }
|
|
54
|
+
.sidebar-item.active { background: var(--bg); border-left-color: var(--accent); }
|
|
55
|
+
.item-label { flex: 1; min-width: 0; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
|
56
|
+
.item-score { font-family: var(--mono); font-size: .7rem; font-weight: 500; }
|
|
57
|
+
.content { flex: 1; overflow-y: auto; padding: 1.5rem; display: flex; flex-direction: column; gap: 1.25rem; }
|
|
58
|
+
.grade-card { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 1.25rem; }
|
|
59
|
+
.grade-card-header { display: flex; align-items: flex-start; justify-content: space-between; gap: 1rem; margin-bottom: 1rem; }
|
|
60
|
+
.grade-score-block { display: flex; align-items: center; gap: 1rem; }
|
|
61
|
+
.grade-letter-badge { font-family: 'Poppins', sans-serif; font-size: 2.5rem; font-weight: 700; width: 64px; height: 64px; display: flex; align-items: center; justify-content: center; border-radius: 8px; }
|
|
62
|
+
.grade-letter-badge.A { background: var(--grade-a-bg); color: var(--grade-a); }
|
|
63
|
+
.grade-letter-badge.B { background: var(--grade-b-bg); color: var(--grade-b); }
|
|
64
|
+
.grade-letter-badge.C { background: var(--grade-c-bg); color: var(--grade-c); }
|
|
65
|
+
.grade-letter-badge.D { background: var(--grade-d-bg); color: var(--grade-d); }
|
|
66
|
+
.grade-letter-badge.F { background: var(--grade-f-bg); color: var(--grade-f); }
|
|
67
|
+
.grade-score-details h2 { font-family: 'Poppins', sans-serif; font-size: 1.5rem; font-weight: 700; line-height: 1; }
|
|
68
|
+
.grade-score-details .pct { font-size: .85rem; color: var(--text-muted); }
|
|
69
|
+
.grade-session-id { font-family: var(--mono); font-size: .65rem; color: var(--text-muted); background: var(--surface2); padding: .2em .5em; border-radius: 3px; }
|
|
70
|
+
.dimensions { display: flex; flex-direction: column; gap: .65rem; }
|
|
71
|
+
.dimension-row { display: flex; align-items: center; gap: .75rem; }
|
|
72
|
+
.dimension-name { font-family: 'Poppins', sans-serif; font-size: .7rem; font-weight: 500; width: 145px; flex-shrink: 0; }
|
|
73
|
+
.dimension-bar-wrap { flex: 1; height: 8px; background: var(--surface2); border-radius: 4px; overflow: hidden; }
|
|
74
|
+
.dimension-bar { height: 100%; border-radius: 4px; transition: width .4s ease; }
|
|
75
|
+
.dimension-score-label { font-family: var(--mono); font-size: .7rem; width: 42px; text-align: right; flex-shrink: 0; }
|
|
76
|
+
.flags-section h3 { font-family: 'Poppins', sans-serif; font-size: .75rem; font-weight: 600; color: var(--text-muted); text-transform: uppercase; letter-spacing: .07em; margin-bottom: .6rem; }
|
|
77
|
+
.flags-list { display: flex; flex-direction: column; gap: .4rem; }
|
|
78
|
+
.flag-item { display: flex; align-items: flex-start; gap: .5rem; font-size: .78rem; padding: .4rem .6rem; background: var(--grade-f-bg); border-left: 3px solid var(--grade-f); border-radius: 0 var(--radius) var(--radius) 0; line-height: 1.4; }
|
|
79
|
+
.flag-icon { flex-shrink: 0; }
|
|
80
|
+
.no-flags { font-size: .78rem; color: var(--win); background: var(--grade-a-bg); padding: .4rem .6rem; border-left: 3px solid var(--win); border-radius: 0 var(--radius) var(--radius) 0; }
|
|
81
|
+
.token-meta-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(130px, 1fr)); gap: .6rem; }
|
|
82
|
+
.token-stat { background: var(--surface2); border-radius: var(--radius); padding: .6rem .75rem; }
|
|
83
|
+
.token-stat-label { font-family: 'Poppins', sans-serif; font-size: .6rem; font-weight: 600; text-transform: uppercase; letter-spacing: .07em; color: var(--text-muted); margin-bottom: .2rem; }
|
|
84
|
+
.token-stat-value { font-family: var(--mono); font-size: .95rem; font-weight: 500; }
|
|
85
|
+
.ab-panel, .token-panel, .history-panel { width: 100%; padding: 1.5rem; overflow-y: auto; display: flex; flex-direction: column; gap: 1.25rem; }
|
|
86
|
+
.section-title { font-family: 'Poppins', sans-serif; font-size: .75rem; font-weight: 600; text-transform: uppercase; letter-spacing: .08em; color: var(--text-muted); margin-bottom: .75rem; }
|
|
87
|
+
.ab-summary-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(150px, 1fr)); gap: .75rem; }
|
|
88
|
+
.ab-stat-card { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: .85rem 1rem; }
|
|
89
|
+
.ab-stat-label { font-family: 'Poppins', sans-serif; font-size: .65rem; font-weight: 600; text-transform: uppercase; letter-spacing: .07em; color: var(--text-muted); margin-bottom: .3rem; }
|
|
90
|
+
.ab-stat-value { font-family: var(--mono); font-size: 1.3rem; font-weight: 500; }
|
|
91
|
+
.ab-stat-value.win { color: var(--win); } .ab-stat-value.loss { color: var(--loss); } .ab-stat-value.neutral { color: var(--text-muted); }
|
|
92
|
+
.ab-winner-banner { display: flex; align-items: center; gap: .6rem; padding: .6rem 1rem; border-radius: var(--radius); font-family: 'Poppins', sans-serif; font-size: .8rem; font-weight: 600; }
|
|
93
|
+
.ab-winner-banner.mcp { background: #e8f2f9; color: var(--mcp); }
|
|
94
|
+
.ab-winner-banner.cli { background: #ede8f9; color: var(--cli); }
|
|
95
|
+
.ab-winner-banner.tie { background: var(--surface2); color: var(--text-muted); }
|
|
96
|
+
.ab-table-wrap { overflow-x: auto; }
|
|
97
|
+
table.ab-table { width: 100%; border-collapse: collapse; font-size: .76rem; }
|
|
98
|
+
.ab-table th { font-family: 'Poppins', sans-serif; font-size: .65rem; font-weight: 600; text-transform: uppercase; letter-spacing: .07em; color: var(--text-muted); padding: .4rem .75rem; text-align: left; background: var(--surface); border-bottom: 1px solid var(--border); white-space: nowrap; }
|
|
99
|
+
.ab-table td { padding: .45rem .75rem; border-bottom: 1px solid var(--border); vertical-align: middle; }
|
|
100
|
+
.ab-table tr:hover td { background: var(--bg); }
|
|
101
|
+
.op-name { font-family: var(--mono); font-size: .72rem; }
|
|
102
|
+
.win-pill { display: inline-block; padding: .15em .5em; border-radius: 9px; font-family: 'Poppins', sans-serif; font-size: .65rem; font-weight: 600; text-transform: uppercase; }
|
|
103
|
+
.win-pill.mcp { background: #d0e8f4; color: var(--mcp); } .win-pill.cli { background: #ddd8f4; color: var(--cli); } .win-pill.tie { background: var(--surface2); color: var(--text-muted); }
|
|
104
|
+
.delta-pos { color: var(--loss); font-family: var(--mono); font-size: .72rem; }
|
|
105
|
+
.delta-neg { color: var(--win); font-family: var(--mono); font-size: .72rem; }
|
|
106
|
+
.delta-zero { color: var(--text-muted); font-family: var(--mono); font-size: .72rem; }
|
|
107
|
+
.bar-chart { display: flex; flex-direction: column; gap: .5rem; }
|
|
108
|
+
.bar-row { display: flex; align-items: center; gap: .75rem; }
|
|
109
|
+
.bar-label { font-family: var(--mono); font-size: .72rem; width: 130px; flex-shrink: 0; text-align: right; }
|
|
110
|
+
.bar-track { flex: 1; height: 16px; background: var(--surface2); border-radius: 3px; overflow: hidden; }
|
|
111
|
+
.bar-fill { height: 100%; border-radius: 3px; transition: width .4s ease; }
|
|
112
|
+
.bar-val { font-family: var(--mono); font-size: .7rem; width: 70px; flex-shrink: 0; }
|
|
113
|
+
.chart-card { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 1rem 1.25rem; }
|
|
114
|
+
.chart-card-title { font-family: 'Poppins', sans-serif; font-size: .8rem; font-weight: 600; margin-bottom: 1rem; }
|
|
115
|
+
.history-list { display: flex; flex-direction: column; gap: .5rem; }
|
|
116
|
+
.history-item { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: .75rem 1rem; display: flex; align-items: center; gap: 1rem; cursor: pointer; transition: border-color .15s; }
|
|
117
|
+
.history-item:hover { border-color: var(--accent); }
|
|
118
|
+
.history-grade-badge { font-family: 'Poppins', sans-serif; font-size: 1.1rem; font-weight: 700; width: 36px; height: 36px; display: flex; align-items: center; justify-content: center; border-radius: 5px; flex-shrink: 0; }
|
|
119
|
+
.history-grade-badge.A { background: var(--grade-a-bg); color: var(--grade-a); } .history-grade-badge.B { background: var(--grade-b-bg); color: var(--grade-b); }
|
|
120
|
+
.history-grade-badge.C { background: var(--grade-c-bg); color: var(--grade-c); } .history-grade-badge.D { background: var(--grade-d-bg); color: var(--grade-d); }
|
|
121
|
+
.history-grade-badge.F { background: var(--grade-f-bg); color: var(--grade-f); }
|
|
122
|
+
.history-item-main { flex: 1; min-width: 0; }
|
|
123
|
+
.history-session-id { font-family: var(--mono); font-size: .7rem; color: var(--text-muted); overflow: hidden; text-overflow: ellipsis; white-space: nowrap; }
|
|
124
|
+
.history-item-meta { display: flex; gap: .6rem; font-size: .7rem; color: var(--text-muted); margin-top: .2rem; flex-wrap: wrap; }
|
|
125
|
+
.history-score-bar { width: 120px; height: 5px; background: var(--surface2); border-radius: 3px; overflow: hidden; flex-shrink: 0; }
|
|
126
|
+
.history-score-fill { height: 100%; border-radius: 3px; }
|
|
127
|
+
canvas.trend { width: 100%; height: 90px; display: block; }
|
|
128
|
+
.empty-state { display: flex; flex-direction: column; align-items: center; justify-content: center; gap: .75rem; padding: 3rem 2rem; color: var(--text-muted); text-align: center; }
|
|
129
|
+
.empty-state .empty-icon { font-size: 2.5rem; }
|
|
130
|
+
.empty-state h3 { font-family: 'Poppins', sans-serif; font-size: .9rem; color: var(--text); }
|
|
131
|
+
.empty-state p { font-size: .78rem; max-width: 380px; line-height: 1.6; }
|
|
132
|
+
.empty-cmd { font-family: var(--mono); background: var(--surface2); padding: .25em .5em; border-radius: 3px; font-size: .74rem; display: inline-block; margin-top: .25rem; }
|
|
133
|
+
.ev-title { font-family: 'Poppins', sans-serif; font-size: .75rem; font-weight: 600; text-transform: uppercase; letter-spacing: .07em; color: var(--text-muted); margin-bottom: .75rem; }
|
|
134
|
+
.ev-row { margin-bottom: .5rem; }
|
|
135
|
+
.ev-dim { font-family: 'Poppins', sans-serif; font-size: .68rem; font-weight: 600; margin-bottom: .2rem; }
|
|
136
|
+
.ev-bullet { font-size: .74rem; padding: .15rem 0 .15rem .75rem; color: var(--text-muted); }
|
|
137
|
+
.ab-note { font-size: .72rem; color: var(--text-muted); background: var(--grade-c-bg); border-left: 3px solid var(--grade-c); padding: .5rem .75rem; border-radius: 0 4px 4px 0; }
|
|
138
|
+
::-webkit-scrollbar { width: 6px; height: 6px; }
|
|
139
|
+
::-webkit-scrollbar-track { background: transparent; }
|
|
140
|
+
::-webkit-scrollbar-thumb { background: var(--border); border-radius: 3px; }
|
|
141
|
+
::-webkit-scrollbar-thumb:hover { background: var(--text-muted); }
|
|
142
|
+
</style>
|
|
143
|
+
</head>
|
|
144
|
+
<body>
|
|
145
|
+
|
|
146
|
+
<div class="header">
|
|
147
|
+
<div class="header-left">
|
|
148
|
+
<span class="header-logo">CLEO</span>
|
|
149
|
+
<div>
|
|
150
|
+
<div class="header-title" id="hdr-title">Grade Review</div>
|
|
151
|
+
<div class="header-subtitle" id="hdr-sub"></div>
|
|
152
|
+
</div>
|
|
153
|
+
</div>
|
|
154
|
+
<div class="header-meta" id="hdr-meta"></div>
|
|
155
|
+
</div>
|
|
156
|
+
|
|
157
|
+
<div class="tabs-bar">
|
|
158
|
+
<button class="tab-btn active" data-tab="grades">Grades <span class="tab-badge" id="tb-grades">0</span></button>
|
|
159
|
+
<button class="tab-btn" data-tab="ab">A/B Comparison <span class="tab-badge" id="tb-ab">0</span></button>
|
|
160
|
+
<button class="tab-btn" data-tab="tokens">Token Analysis</button>
|
|
161
|
+
<button class="tab-btn" data-tab="history">History <span class="tab-badge" id="tb-history">0</span></button>
|
|
162
|
+
</div>
|
|
163
|
+
|
|
164
|
+
<div class="main">
|
|
165
|
+
<div class="tab-panel active" id="panel-grades">
|
|
166
|
+
<div class="sidebar" id="grade-sidebar"></div>
|
|
167
|
+
<div class="content" id="grade-content"></div>
|
|
168
|
+
</div>
|
|
169
|
+
<div class="tab-panel" id="panel-ab">
|
|
170
|
+
<div class="ab-panel" id="ab-content"></div>
|
|
171
|
+
</div>
|
|
172
|
+
<div class="tab-panel" id="panel-tokens">
|
|
173
|
+
<div class="token-panel" id="token-content"></div>
|
|
174
|
+
</div>
|
|
175
|
+
<div class="tab-panel" id="panel-history">
|
|
176
|
+
<div class="history-panel" id="history-content"></div>
|
|
177
|
+
</div>
|
|
178
|
+
</div>
|
|
179
|
+
|
|
180
|
+
<script>
|
|
181
|
+
const EMBEDDED_GRADE_DATA = {"title": "ct-grade v2.1 — Grade Review", "subtitle": "ct-grade-eval", "generated_at": "2026-03-07T23:47:37.401179+00:00", "grades": [{"sessionId": "ses_20260302070315_066148", "totalScore": 85, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 20, "max": 20, "evidence": ["session.list called before first task op", "session.end called"]}, "discoveryEfficiency": {"score": 15, "max": 20, "evidence": ["find:list ratio 100% >= 80%"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": ["All 2 tasks.add calls had descriptions"]}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 10, "max": 20, "evidence": ["Progressive disclosure used (1x)"]}}, "flags": ["No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-03-02T07:03:35.064Z", "entryCount": 7, "evaluator": "auto"}, {"sessionId": "ses_20260302070023_733ab2", "totalScore": 60, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 10, "max": 20, "evidence": ["session.end called"]}, "discoveryEfficiency": {"score": 10, "max": 20, "evidence": ["No discovery calls needed"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": ["All 1 tasks.add calls had descriptions"]}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["session.list never called (check existing sessions before starting)", "No admin.help or skill lookup calls (load ct-cleo for guidance)", "No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-03-02T07:01:49.853Z", "entryCount": 3, "evaluator": "auto"}, {"sessionId": "ses_20260302065929_32d2d1", "totalScore": 60, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 10, "max": 20, "evidence": ["session.end called"]}, "discoveryEfficiency": {"score": 10, "max": 20, "evidence": ["No discovery calls needed"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": ["All 1 tasks.add calls had descriptions"]}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["session.list never called (check existing sessions before starting)", "No admin.help or skill lookup calls (load ct-cleo for guidance)", "No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-03-02T07:00:15.289Z", "entryCount": 3, "evaluator": "auto"}, {"sessionId": "session_20260227_070424_313233", "totalScore": 0, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 0, "max": 20, "evidence": []}, "taskHygiene": {"score": 0, "max": 20, "evidence": []}, "errorProtocol": {"score": 0, "max": 20, "evidence": []}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["No audit entries found for session (use --grade flag when starting session)"], "timestamp": "2026-02-27T07:10:11.702Z", "entryCount": 0, "evaluator": "auto"}, {"sessionId": "session_20260227_070213_009212", "totalScore": 81, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 20, "max": 20, "evidence": ["session.list called before first task op", "session.end called"]}, "discoveryEfficiency": {"score": 11, "max": 20, "evidence": ["tasks.show used 6x for detail"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 10, "max": 20, "evidence": ["cleo_query (MCP) used 12x"]}}, "flags": ["tasks.list used 3x (prefer tasks.find for discovery)", "No admin.help or skill lookup calls (load ct-cleo for guidance)"], "timestamp": "2026-02-27T07:04:15.903Z", "entryCount": 14, "evaluator": "auto"}, {"sessionId": "session_20260227_064741_85f12a", "totalScore": 0, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 0, "max": 20, "evidence": []}, "taskHygiene": {"score": 0, "max": 20, "evidence": []}, "errorProtocol": {"score": 0, "max": 20, "evidence": []}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["No audit entries found for session (use --grade flag when starting session)"], "timestamp": "2026-02-27T06:51:00.362Z", "entryCount": 0, "evaluator": "auto"}, {"sessionId": "session_20260227_064452_90d28c", "totalScore": 75, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 10, "max": 20, "evidence": ["session.end called"]}, "discoveryEfficiency": {"score": 20, "max": 20, "evidence": ["find:list ratio 100% >= 80%", "tasks.show used 4x for detail"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": ["All 2 tasks.add calls had descriptions"]}, "errorProtocol": {"score": 15, "max": 20, "evidence": []}, "disclosureUse": {"score": 10, "max": 20, "evidence": ["cleo_query (MCP) used 8x"]}}, "flags": ["session.list never called (check existing sessions before starting)", "1 potentially duplicate task create(s) detected", "No admin.help or skill lookup calls (load ct-cleo for guidance)"], "timestamp": "2026-02-27T06:47:32.608Z", "entryCount": 14, "evaluator": "auto"}, {"sessionId": "session_20260227_064144_bacbf6", "totalScore": 75, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 10, "max": 20, "evidence": ["session.list called before first task op"]}, "discoveryEfficiency": {"score": 15, "max": 20, "evidence": ["No discovery calls needed", "tasks.show used 4x for detail"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": ["All 4 tasks.add calls had descriptions"]}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 10, "max": 20, "evidence": ["cleo_query (MCP) used 5x"]}}, "flags": ["session.end never called (always end sessions when done)", "No admin.help or skill lookup calls (load ct-cleo for guidance)"], "timestamp": "2026-02-27T06:44:42.448Z", "entryCount": 11, "evaluator": "auto"}, {"sessionId": "session_20260227_064020_d32071", "totalScore": 80, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 10, "max": 20, "evidence": ["session.end called"]}, "discoveryEfficiency": {"score": 20, "max": 20, "evidence": ["find:list ratio 100% >= 80%", "tasks.show used 4x for detail"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 10, "max": 20, "evidence": ["cleo_query (MCP) used 8x"]}}, "flags": ["session.list never called (check existing sessions before starting)", "No admin.help or skill lookup calls (load ct-cleo for guidance)"], "timestamp": "2026-02-27T06:41:38.882Z", "entryCount": 10, "evaluator": "auto"}, {"sessionId": "session_20260227_063354_9d5d82", "totalScore": 50, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 10, "max": 20, "evidence": ["No discovery calls needed"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["session.list never called (check existing sessions before starting)", "session.end never called (always end sessions when done)", "No admin.help or skill lookup calls (load ct-cleo for guidance)", "No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-02-27T06:36:36.354Z", "entryCount": 1, "evaluator": "auto"}, {"sessionId": "session_20260227_063205_236fd0", "totalScore": 80, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 10, "max": 20, "evidence": ["session.end called"]}, "discoveryEfficiency": {"score": 20, "max": 20, "evidence": ["find:list ratio 100% >= 80%", "tasks.show used 4x for detail"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 10, "max": 20, "evidence": ["cleo_query (MCP) used 8x"]}}, "flags": ["session.list never called (check existing sessions before starting)", "No admin.help or skill lookup calls (load ct-cleo for guidance)"], "timestamp": "2026-02-27T06:33:47.336Z", "entryCount": 10, "evaluator": "auto"}, {"sessionId": "session_20260227_061640_bf42ad", "totalScore": 50, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 10, "max": 20, "evidence": ["No discovery calls needed"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["session.list never called (check existing sessions before starting)", "session.end never called (always end sessions when done)", "No admin.help or skill lookup calls (load ct-cleo for guidance)", "No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-02-27T06:22:13.587Z", "entryCount": 15, "evaluator": "auto"}, {"sessionId": "session_20260227_061450_567f0c", "totalScore": 50, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 10, "max": 20, "evidence": ["No discovery calls needed"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["session.list never called (check existing sessions before starting)", "session.end never called (always end sessions when done)", "No admin.help or skill lookup calls (load ct-cleo for guidance)", "No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-02-27T06:22:13.100Z", "entryCount": 13, "evaluator": "auto"}, {"sessionId": "session_20260227_061227_541ae0", "totalScore": 50, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 10, "max": 20, "evidence": ["No discovery calls needed"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["session.list never called (check existing sessions before starting)", "session.end never called (always end sessions when done)", "No admin.help or skill lookup calls (load ct-cleo for guidance)", "No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-02-27T06:22:12.605Z", "entryCount": 10, "evaluator": "auto"}, {"sessionId": "session_20260227_061027_965619", "totalScore": 50, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 10, "max": 20, "evidence": ["No discovery calls needed"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["session.list never called (check existing sessions before starting)", "session.end never called (always end sessions when done)", "No admin.help or skill lookup calls (load ct-cleo for guidance)", "No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-02-27T06:22:12.036Z", "entryCount": 8, "evaluator": "auto"}, {"sessionId": "session_20260227_060718_a4bd6b", "totalScore": 50, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 10, "max": 20, "evidence": ["No discovery calls needed"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["session.list never called (check existing sessions before starting)", "session.end never called (always end sessions when done)", "No admin.help or skill lookup calls (load ct-cleo for guidance)", "No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-02-27T06:09:41.251Z", "entryCount": 11, "evaluator": "auto"}, {"sessionId": "session_20260227_060531_d09100", "totalScore": 50, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 10, "max": 20, "evidence": ["No discovery calls needed"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["session.list never called (check existing sessions before starting)", "session.end never called (always end sessions when done)", "No admin.help or skill lookup calls (load ct-cleo for guidance)", "No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-02-27T06:07:08.625Z", "entryCount": 9, "evaluator": "auto"}, {"sessionId": "session_20260227_060300_f4a5f6", "totalScore": 50, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 10, "max": 20, "evidence": ["No discovery calls needed"]}, "taskHygiene": {"score": 20, "max": 20, "evidence": []}, "errorProtocol": {"score": 20, "max": 20, "evidence": ["No error protocol violations"]}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["session.list never called (check existing sessions before starting)", "session.end never called (always end sessions when done)", "No admin.help or skill lookup calls (load ct-cleo for guidance)", "No MCP query calls (prefer cleo_query over CLI for programmatic access)"], "timestamp": "2026-02-27T06:03:54.757Z", "entryCount": 3, "evaluator": "auto"}, {"sessionId": "session_20260227_043637_92a905", "totalScore": 0, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 0, "max": 20, "evidence": []}, "taskHygiene": {"score": 0, "max": 20, "evidence": []}, "errorProtocol": {"score": 0, "max": 20, "evidence": []}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["No audit entries found for session (use --grade flag when starting session)"], "timestamp": "2026-02-27T04:41:06.557Z", "entryCount": 0, "evaluator": "auto"}, {"sessionId": "session_20260227_043053_48fe9b", "totalScore": 0, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 0, "max": 20, "evidence": []}, "taskHygiene": {"score": 0, "max": 20, "evidence": []}, "errorProtocol": {"score": 0, "max": 20, "evidence": []}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["No audit entries found for session (use --grade flag when starting session)"], "timestamp": "2026-02-27T04:33:56.285Z", "entryCount": 0, "evaluator": "auto"}, {"sessionId": "session_20260227_041921_40ba61", "totalScore": 0, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 0, "max": 20, "evidence": []}, "taskHygiene": {"score": 0, "max": 20, "evidence": []}, "errorProtocol": {"score": 0, "max": 20, "evidence": []}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["No audit entries found for session (use --grade flag when starting session)"], "timestamp": "2026-02-27T04:29:58.995Z", "entryCount": 0, "evaluator": "auto"}, {"sessionId": "test-session-id-doesnt-exist", "totalScore": 0, "maxScore": 100, "dimensions": {"sessionDiscipline": {"score": 0, "max": 20, "evidence": []}, "discoveryEfficiency": {"score": 0, "max": 20, "evidence": []}, "taskHygiene": {"score": 0, "max": 20, "evidence": []}, "errorProtocol": {"score": 0, "max": 20, "evidence": []}, "disclosureUse": {"score": 0, "max": 20, "evidence": []}}, "flags": ["No audit entries found for session (use --grade flag when starting session)"], "timestamp": "2026-02-27T04:16:19.834Z", "entryCount": 0, "evaluator": "auto"}], "ab_results": {"timestamp": "2026-03-07T23:47:09.914560+00:00", "test_matrix": {"admin": ["version"]}, "total_runs": 3, "global_wins": {"mcp": 3, "cli": 0, "tie": 0}, "global_win_rate": {"mcp": 1.0, "cli": 0.0}, "avg_token_delta_mcp_minus_cli": 416.0, "per_operation": [{"operation": "admin.version", "runs": 3, "wins": {"mcp": 3, "cli": 0, "tie": 0}, "win_rate": {"mcp": 1.0, "cli": 0.0, "tie": 0.0}, "avg_token_delta_mcp_minus_cli": 416.0, "avg_mcp_chars": 1664.0, "avg_cli_chars": 0.0, "avg_mcp_ms": 930.0, "avg_cli_ms": 786.0}]}, "token_analysis": {"timestamp": "2026-03-07T23:46:42.279906+00:00", "otel_records_found": 0, "grades_found": 31, "breakdown_by_domain": {}}};
|
|
182
|
+
|
|
183
|
+
// ── Helpers ────────────────────────────────────────────────────────────────
|
|
184
|
+
|
|
185
|
+
function scoreLetter(s) {
|
|
186
|
+
if (s == null) return '?';
|
|
187
|
+
if (s >= 90) return 'A'; if (s >= 75) return 'B';
|
|
188
|
+
if (s >= 60) return 'C'; if (s >= 45) return 'D'; return 'F';
|
|
189
|
+
}
|
|
190
|
+
function scoreColor(s) {
|
|
191
|
+
if (s >= 90) return 'var(--grade-a)'; if (s >= 75) return 'var(--grade-b)';
|
|
192
|
+
if (s >= 60) return 'var(--grade-c)'; if (s >= 45) return 'var(--grade-d)';
|
|
193
|
+
return 'var(--grade-f)';
|
|
194
|
+
}
|
|
195
|
+
const DIM_COLORS = ['var(--dim1)','var(--dim2)','var(--dim3)','var(--dim4)','var(--dim5)'];
|
|
196
|
+
const DIM_NAMES = ['Session Discipline','Discovery Efficiency','Task Hygiene','Error Protocol','Progressive Disclosure'];
|
|
197
|
+
const DIM_KEYS = ['sessionDiscipline','discoveryEfficiency','taskHygiene','errorProtocol','disclosureUse'];
|
|
198
|
+
|
|
199
|
+
function fmtNum(n, fallback) {
|
|
200
|
+
if (n == null) return fallback !== undefined ? fallback : '\u2014';
|
|
201
|
+
return typeof n === 'number' && !Number.isInteger(n) ? n.toFixed(1) : String(n);
|
|
202
|
+
}
|
|
203
|
+
function fmtDate(s) {
|
|
204
|
+
if (!s) return '';
|
|
205
|
+
try { return new Date(s).toLocaleString(undefined, { dateStyle: 'short', timeStyle: 'short' }); } catch { return s; }
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Safe element builder — never uses innerHTML with data
|
|
209
|
+
function mk(tag, cls, text) {
|
|
210
|
+
var e = document.createElement(tag);
|
|
211
|
+
if (cls) e.className = cls;
|
|
212
|
+
if (text != null) e.textContent = String(text);
|
|
213
|
+
return e;
|
|
214
|
+
}
|
|
215
|
+
function mks(tag, css, text) { // inline style variant
|
|
216
|
+
var e = document.createElement(tag);
|
|
217
|
+
if (css) e.style.cssText = css;
|
|
218
|
+
if (text != null) e.textContent = String(text);
|
|
219
|
+
return e;
|
|
220
|
+
}
|
|
221
|
+
function app(parent) {
|
|
222
|
+
for (var i = 1; i < arguments.length; i++) {
|
|
223
|
+
if (arguments[i] != null) parent.appendChild(arguments[i]);
|
|
224
|
+
}
|
|
225
|
+
return parent;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// ── Tab switching ──────────────────────────────────────────────────────────
|
|
229
|
+
document.querySelectorAll('.tab-btn').forEach(function(btn) {
|
|
230
|
+
btn.addEventListener('click', function() {
|
|
231
|
+
document.querySelectorAll('.tab-btn').forEach(function(b) { b.classList.remove('active'); });
|
|
232
|
+
document.querySelectorAll('.tab-panel').forEach(function(p) { p.classList.remove('active'); });
|
|
233
|
+
btn.classList.add('active');
|
|
234
|
+
document.getElementById('panel-' + btn.dataset.tab).classList.add('active');
|
|
235
|
+
});
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
// ── Empty state ────────────────────────────────────────────────────────────
|
|
239
|
+
function emptyState(icon, title, desc, cmd) {
|
|
240
|
+
var d = mk('div', 'empty-state');
|
|
241
|
+
app(d, mks('div', 'font-size:2.5rem;', icon), mk('h3', null, title), mk('p', null, desc));
|
|
242
|
+
if (cmd) app(d, mk('span', 'empty-cmd', cmd));
|
|
243
|
+
return d;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// ── Grades Tab ─────────────────────────────────────────────────────────────
|
|
247
|
+
function renderGradesTab(grades) {
|
|
248
|
+
var sidebar = document.getElementById('grade-sidebar');
|
|
249
|
+
var content = document.getElementById('grade-content');
|
|
250
|
+
sidebar.textContent = '';
|
|
251
|
+
content.textContent = '';
|
|
252
|
+
if (!grades || !grades.length) {
|
|
253
|
+
content.appendChild(emptyState('📊', 'No grade results yet',
|
|
254
|
+
'Run a scenario to generate grades:', 'python scripts/run_scenario.py --scenario S1 --cleo cleo-dev'));
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
document.getElementById('tb-grades').textContent = grades.length;
|
|
258
|
+
app(sidebar, mk('div', 'sidebar-header', 'Sessions'));
|
|
259
|
+
grades.forEach(function(g, i) {
|
|
260
|
+
var score = g.totalScore != null ? g.totalScore : g.score;
|
|
261
|
+
var letter = scoreLetter(score);
|
|
262
|
+
var sid = (g.sessionId || '').slice(0, 22) || ('Run ' + (i + 1));
|
|
263
|
+
var item = mk('div', 'sidebar-item' + (i === 0 ? ' active' : ''));
|
|
264
|
+
var ls = mks('span', 'font-family:Poppins,sans-serif;font-weight:700;font-size:.85rem;color:' + scoreColor(score) + ';flex-shrink:0;width:18px;', letter);
|
|
265
|
+
var lab = mk('span', 'item-label', sid);
|
|
266
|
+
var sc = mks('span', null, score != null ? (score + '/100') : '\u2014');
|
|
267
|
+
sc.className = 'item-score'; sc.style.color = scoreColor(score);
|
|
268
|
+
app(item, ls, lab, sc);
|
|
269
|
+
item.addEventListener('click', (function(g2, item2) {
|
|
270
|
+
return function() {
|
|
271
|
+
sidebar.querySelectorAll('.sidebar-item').forEach(function(x) { x.classList.remove('active'); });
|
|
272
|
+
item2.classList.add('active');
|
|
273
|
+
renderGradeDetail(g2, content);
|
|
274
|
+
};
|
|
275
|
+
})(g, item));
|
|
276
|
+
sidebar.appendChild(item);
|
|
277
|
+
});
|
|
278
|
+
renderGradeDetail(grades[0], content);
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function renderGradeDetail(g, container) {
|
|
282
|
+
container.textContent = '';
|
|
283
|
+
var score = g.totalScore != null ? g.totalScore : (g.score || 0);
|
|
284
|
+
var max = g.maxScore || 100;
|
|
285
|
+
var letter = scoreLetter(score);
|
|
286
|
+
var pct = Math.round((score / max) * 100);
|
|
287
|
+
var dims = g.dimensions || {};
|
|
288
|
+
var flags = g.flags || [];
|
|
289
|
+
var tmeta = g._tokenMeta || null;
|
|
290
|
+
var scenario = g._scenarioId || g.scenario || '';
|
|
291
|
+
|
|
292
|
+
// Score card
|
|
293
|
+
var card = mk('div', 'grade-card');
|
|
294
|
+
var hdr = mk('div', 'grade-card-header');
|
|
295
|
+
var sb = mk('div', 'grade-score-block');
|
|
296
|
+
var badge = mk('div', 'grade-letter-badge ' + letter, letter);
|
|
297
|
+
var sd = mk('div', 'grade-score-details');
|
|
298
|
+
var h2 = mk('h2'); h2.textContent = score + '/' + max; h2.style.color = scoreColor(score);
|
|
299
|
+
var pctEl = mk('div', 'pct', pct + '% \u2014 Grade ' + letter + (scenario ? ' \u2014 ' + scenario : ''));
|
|
300
|
+
app(sd, h2, pctEl);
|
|
301
|
+
if (g.timestamp) app(sd, mks('div', 'font-size:.68rem;color:var(--text-muted);margin-top:.25rem;', fmtDate(g.timestamp)));
|
|
302
|
+
app(sb, badge, sd);
|
|
303
|
+
app(hdr, sb);
|
|
304
|
+
if (g.sessionId) app(hdr, mk('code', 'grade-session-id', g.sessionId));
|
|
305
|
+
card.appendChild(hdr);
|
|
306
|
+
|
|
307
|
+
// Dimension bars
|
|
308
|
+
var dimsWrap = mk('div', 'dimensions');
|
|
309
|
+
DIM_KEYS.forEach(function(key, i) {
|
|
310
|
+
var d = dims[key]; if (!d) return;
|
|
311
|
+
var ds = d.score || 0, dm = d.max || 20, dp = Math.round((ds / dm) * 100);
|
|
312
|
+
var row = mk('div', 'dimension-row');
|
|
313
|
+
var bar = mk('div', 'dimension-bar');
|
|
314
|
+
bar.style.cssText = 'width:' + dp + '%;background:' + DIM_COLORS[i] + ';';
|
|
315
|
+
var bw = mk('div', 'dimension-bar-wrap'); bw.appendChild(bar);
|
|
316
|
+
var sl = mk('span', 'dimension-score-label', ds + '/' + dm);
|
|
317
|
+
sl.style.color = DIM_COLORS[i];
|
|
318
|
+
app(row, mk('span', 'dimension-name', DIM_NAMES[i]), bw, sl);
|
|
319
|
+
if (d.evidence && d.evidence.length) row.title = d.evidence.join(' | ');
|
|
320
|
+
dimsWrap.appendChild(row);
|
|
321
|
+
});
|
|
322
|
+
card.appendChild(dimsWrap);
|
|
323
|
+
container.appendChild(card);
|
|
324
|
+
|
|
325
|
+
// Flags
|
|
326
|
+
var fs = mk('div', 'grade-card flags-section');
|
|
327
|
+
var ft = mk('h3', null, 'Flags' + (flags.length ? ' (' + flags.length + ')' : ''));
|
|
328
|
+
var fl = mk('div', 'flags-list');
|
|
329
|
+
if (!flags.length) {
|
|
330
|
+
fl.appendChild(mk('div', 'no-flags', '\u2713 No flags \u2014 all protocol checks passed'));
|
|
331
|
+
} else {
|
|
332
|
+
flags.forEach(function(f) {
|
|
333
|
+
var item = mk('div', 'flag-item');
|
|
334
|
+
app(item, mks('span', 'flex-shrink:0;', '\u26A0'), mk('span', null, f));
|
|
335
|
+
fl.appendChild(item);
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
app(fs, ft, fl);
|
|
339
|
+
container.appendChild(fs);
|
|
340
|
+
|
|
341
|
+
// Evidence
|
|
342
|
+
var hasEvidence = DIM_KEYS.some(function(k) { return dims[k] && dims[k].evidence && dims[k].evidence.length; });
|
|
343
|
+
if (hasEvidence) {
|
|
344
|
+
var ec = mk('div', 'grade-card');
|
|
345
|
+
ec.appendChild(mk('div', 'ev-title', 'Evidence'));
|
|
346
|
+
DIM_KEYS.forEach(function(key, i) {
|
|
347
|
+
var d = dims[key]; if (!d || !d.evidence || !d.evidence.length) return;
|
|
348
|
+
var row = mk('div', 'ev-row');
|
|
349
|
+
var dimLabel = mk('div', 'ev-dim', DIM_NAMES[i]); dimLabel.style.color = DIM_COLORS[i];
|
|
350
|
+
row.appendChild(dimLabel);
|
|
351
|
+
d.evidence.forEach(function(ev) { row.appendChild(mk('div', 'ev-bullet', '\u2022 ' + ev)); });
|
|
352
|
+
ec.appendChild(row);
|
|
353
|
+
});
|
|
354
|
+
container.appendChild(ec);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
// Token metadata
|
|
358
|
+
var tokCard = mk('div', 'grade-card');
|
|
359
|
+
tokCard.appendChild(mk('div', 'ev-title', 'Token Metadata'));
|
|
360
|
+
var grid = mk('div', 'token-meta-grid');
|
|
361
|
+
function addStat(label, value) {
|
|
362
|
+
var s = mk('div', 'token-stat');
|
|
363
|
+
app(s, mk('div', 'token-stat-label', label), mk('div', 'token-stat-value', fmtNum(value, '\u2014')));
|
|
364
|
+
grid.appendChild(s);
|
|
365
|
+
}
|
|
366
|
+
if (tmeta) {
|
|
367
|
+
addStat('Method', tmeta.estimationMethod || tmeta.estimation_method || '\u2014');
|
|
368
|
+
if (tmeta.totalEstimatedTokens) addStat('Est. Tokens', tmeta.totalEstimatedTokens);
|
|
369
|
+
if (tmeta.inputTokens) addStat('Input Tokens', tmeta.inputTokens);
|
|
370
|
+
if (tmeta.outputTokens) addStat('Output Tokens', tmeta.outputTokens);
|
|
371
|
+
if (tmeta.cacheReadTokens) addStat('Cache Read', tmeta.cacheReadTokens);
|
|
372
|
+
} else if (g.entryCount) {
|
|
373
|
+
addStat('Audit Entries', g.entryCount);
|
|
374
|
+
addStat('Est. Tokens', '~' + (g.entryCount * 150));
|
|
375
|
+
addStat('Method', 'entry_count_proxy');
|
|
376
|
+
} else {
|
|
377
|
+
addStat('Note', 'Enable OTEL or use run_scenario.py for token data');
|
|
378
|
+
}
|
|
379
|
+
app(tokCard, grid);
|
|
380
|
+
container.appendChild(tokCard);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// ── A/B Tab ────────────────────────────────────────────────────────────────
|
|
384
|
+
function renderABTab(ab) {
|
|
385
|
+
var container = document.getElementById('ab-content');
|
|
386
|
+
container.textContent = '';
|
|
387
|
+
if (!ab || !ab.total_runs) {
|
|
388
|
+
container.appendChild(emptyState('⚖️', 'No A/B results yet',
|
|
389
|
+
'Run a blind comparison:', 'python scripts/run_ab_test.py --domain tasks --runs 3 --cleo cleo-dev'));
|
|
390
|
+
return;
|
|
391
|
+
}
|
|
392
|
+
document.getElementById('tb-ab').textContent = ab.total_runs || 0;
|
|
393
|
+
var wins = ab.global_wins || {}, wr = ab.global_win_rate || {};
|
|
394
|
+
var delta = ab.avg_token_delta_mcp_minus_cli, winner = ab.overall_winner || 'tie';
|
|
395
|
+
var banner = mk('div', 'ab-winner-banner ' + winner);
|
|
396
|
+
var wlabel = winner === 'mcp' ? '\u26A1 MCP wins overall' : winner === 'cli' ? '\u26A1 CLI wins overall' : '\u2194 Overall tie';
|
|
397
|
+
banner.appendChild(mk('span', null, wlabel));
|
|
398
|
+
if (delta != null) {
|
|
399
|
+
var sign = delta > 0 ? '+' : '';
|
|
400
|
+
banner.appendChild(mks('span', 'font-size:.7rem;font-weight:400;opacity:.8;',
|
|
401
|
+
' \u2014 MCP uses ' + sign + delta.toFixed(1) + ' tokens/op vs CLI'));
|
|
402
|
+
}
|
|
403
|
+
container.appendChild(banner);
|
|
404
|
+
|
|
405
|
+
var sumTitle = mk('div', 'section-title', 'Summary'); container.appendChild(sumTitle);
|
|
406
|
+
var grid = mk('div', 'ab-summary-grid');
|
|
407
|
+
[
|
|
408
|
+
['Total Runs', ab.total_runs, ''],
|
|
409
|
+
['MCP Wins', wins.mcp || 0, 'win'],
|
|
410
|
+
['CLI Wins', wins.cli || 0, winner === 'cli' ? 'win' : ''],
|
|
411
|
+
['Ties', wins.tie || 0, 'neutral'],
|
|
412
|
+
['MCP Win %', ((wr.mcp || 0) * 100).toFixed(1) + '%', winner === 'mcp' ? 'win' : ''],
|
|
413
|
+
['CLI Win %', ((wr.cli || 0) * 100).toFixed(1) + '%', winner === 'cli' ? 'win' : ''],
|
|
414
|
+
['Token Delta', delta != null ? ((delta > 0 ? '+' : '') + delta.toFixed(1) + 't') : '\u2014',
|
|
415
|
+
delta > 10 ? 'loss' : delta < -10 ? 'win' : 'neutral'],
|
|
416
|
+
].forEach(function(row) {
|
|
417
|
+
var c = mk('div', 'ab-stat-card');
|
|
418
|
+
app(c, mk('div', 'ab-stat-label', row[0]), mk('div', 'ab-stat-value ' + row[2], String(row[1])));
|
|
419
|
+
grid.appendChild(c);
|
|
420
|
+
});
|
|
421
|
+
container.appendChild(grid);
|
|
422
|
+
|
|
423
|
+
var perOp = ab.per_operation;
|
|
424
|
+
if (perOp && Object.keys(perOp).length) {
|
|
425
|
+
container.appendChild(mks('div', 'font-family:Poppins,sans-serif;font-size:.75rem;font-weight:600;text-transform:uppercase;letter-spacing:.08em;color:var(--text-muted);margin-top:.5rem;', 'Per-Operation Results'));
|
|
426
|
+
var wrap = mk('div', 'ab-table-wrap');
|
|
427
|
+
var tbl = document.createElement('table'); tbl.className = 'ab-table';
|
|
428
|
+
var thead = document.createElement('thead'), hrow = document.createElement('tr');
|
|
429
|
+
['Operation','Winner','MCP Wins','CLI Wins','Ties','Token \u0394','MCP chars','CLI chars','MCP ms','CLI ms'].forEach(function(h) {
|
|
430
|
+
hrow.appendChild(mk('th', null, h));
|
|
431
|
+
});
|
|
432
|
+
thead.appendChild(hrow); tbl.appendChild(thead);
|
|
433
|
+
var tbody = document.createElement('tbody');
|
|
434
|
+
Object.keys(perOp).sort().forEach(function(opKey) {
|
|
435
|
+
var d = perOp[opKey], row = document.createElement('tr');
|
|
436
|
+
var delta2 = d.avg_token_delta != null ? d.avg_token_delta : 0;
|
|
437
|
+
var dcls = delta2 > 10 ? 'delta-pos' : delta2 < -10 ? 'delta-neg' : 'delta-zero';
|
|
438
|
+
var opW = d.winner || (d.mcp_wins > d.cli_wins ? 'mcp' : d.cli_wins > d.mcp_wins ? 'cli' : 'tie');
|
|
439
|
+
var cells = [
|
|
440
|
+
mk('span', 'op-name', opKey),
|
|
441
|
+
mk('span', 'win-pill ' + opW, opW.toUpperCase()),
|
|
442
|
+
document.createTextNode(String(d.mcp_wins || 0)),
|
|
443
|
+
document.createTextNode(String(d.cli_wins || 0)),
|
|
444
|
+
document.createTextNode(String(d.ties || 0)),
|
|
445
|
+
mk('span', dcls, (delta2 > 0 ? '+' : '') + delta2.toFixed(0) + 't'),
|
|
446
|
+
document.createTextNode(fmtNum(d.avg_mcp_chars)),
|
|
447
|
+
document.createTextNode(fmtNum(d.avg_cli_chars)),
|
|
448
|
+
document.createTextNode(fmtNum(d.avg_mcp_ms) + 'ms'),
|
|
449
|
+
document.createTextNode(fmtNum(d.avg_cli_ms) + 'ms'),
|
|
450
|
+
];
|
|
451
|
+
cells.forEach(function(c) { var td = document.createElement('td'); td.appendChild(c); row.appendChild(td); });
|
|
452
|
+
tbody.appendChild(row);
|
|
453
|
+
});
|
|
454
|
+
tbl.appendChild(tbody); wrap.appendChild(tbl); container.appendChild(wrap);
|
|
455
|
+
}
|
|
456
|
+
container.appendChild(mk('div', 'ab-note',
|
|
457
|
+
'\u26A0 CLI-only sessions always score 0 on S5 (Progressive Disclosure) — metadata.gateway is not set by the CLI adapter. MCP earns +10 for query gateway usage automatically.'));
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
// ── Token Analysis Tab ─────────────────────────────────────────────────────
|
|
461
|
+
function renderTokenTab(ta) {
|
|
462
|
+
var container = document.getElementById('token-content');
|
|
463
|
+
container.textContent = '';
|
|
464
|
+
if (!ta || !Object.keys(ta).length) {
|
|
465
|
+
container.appendChild(emptyState('\uD83D\uDD22', 'No token data yet',
|
|
466
|
+
'Generate token analysis:', 'python scripts/token_tracker.py --grades-file .cleo/metrics/GRADES.jsonl --breakdown-by domain'));
|
|
467
|
+
return;
|
|
468
|
+
}
|
|
469
|
+
function barChart(title, entries, maxVal, colorFn) {
|
|
470
|
+
var card = mk('div', 'chart-card');
|
|
471
|
+
card.appendChild(mk('div', 'chart-card-title', title));
|
|
472
|
+
var chart = mk('div', 'bar-chart');
|
|
473
|
+
entries.forEach(function(row) {
|
|
474
|
+
var label = row[0], val = row[1], color = colorFn ? colorFn(row) : 'var(--accent)';
|
|
475
|
+
var pct = maxVal > 0 ? Math.min((Math.abs(val) / maxVal) * 90, 90) : 0;
|
|
476
|
+
var r = mk('div', 'bar-row');
|
|
477
|
+
var track = mk('div', 'bar-track');
|
|
478
|
+
var fill = mk('div', 'bar-fill'); fill.style.cssText = 'width:' + pct + '%;background:' + color + ';';
|
|
479
|
+
track.appendChild(fill);
|
|
480
|
+
app(r, mk('span', 'bar-label', label), track, mk('span', 'bar-val', fmtNum(val) + 't'));
|
|
481
|
+
chart.appendChild(r);
|
|
482
|
+
});
|
|
483
|
+
card.appendChild(chart);
|
|
484
|
+
return card;
|
|
485
|
+
}
|
|
486
|
+
if (ta.breakdown_by_domain) {
|
|
487
|
+
var entries = Object.entries(ta.breakdown_by_domain).map(function(kv) { return [kv[0], kv[1].mean || 0]; });
|
|
488
|
+
var maxD = Math.max.apply(null, entries.map(function(e) { return e[1]; }).concat([1]));
|
|
489
|
+
var colors2 = ['var(--dim1)','var(--dim2)','var(--dim3)','var(--dim4)','var(--dim5)','var(--accent)','var(--mcp)','var(--cli)','#9b7ec8','#5a9a6a'];
|
|
490
|
+
container.appendChild(barChart('Estimated Tokens by Domain', entries, maxD, function(row, i) { return colors2[entries.indexOf(row) % colors2.length]; }));
|
|
491
|
+
}
|
|
492
|
+
if (ta.breakdown_by_gateway) {
|
|
493
|
+
var gw = ta.breakdown_by_gateway;
|
|
494
|
+
var gwEntries = [['query (MCP)', gw.mcp_query && gw.mcp_query.mean || 0], ['CLI', gw.cli && gw.cli.mean || 0]];
|
|
495
|
+
var maxGw = Math.max.apply(null, gwEntries.map(function(e) { return e[1]; }).concat([1]));
|
|
496
|
+
var gwColors = ['var(--mcp)', 'var(--cli)'];
|
|
497
|
+
container.appendChild(barChart('MCP vs CLI Token Split', gwEntries, maxGw, function(row) { return gwColors[gwEntries.indexOf(row)]; }));
|
|
498
|
+
if (gw.note) {
|
|
499
|
+
var noteEl = mks('p', 'font-size:.7rem;color:var(--text-muted);margin-top:.5rem;line-height:1.5;', gw.note);
|
|
500
|
+
container.lastChild.appendChild(noteEl);
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
if (ta.ab_aggregation && ta.ab_aggregation.per_operation) {
|
|
504
|
+
var opEntries = Object.entries(ta.ab_aggregation.per_operation).map(function(kv) { return [kv[0], kv[1].avg_delta || 0]; });
|
|
505
|
+
opEntries.sort(function(a,b) { return b[1] - a[1]; });
|
|
506
|
+
var maxOp = Math.max.apply(null, opEntries.map(function(e) { return Math.abs(e[1]); }).concat([1]));
|
|
507
|
+
container.appendChild(barChart('Token Delta per Op (MCP \u2212 CLI)', opEntries, maxOp, function(row) {
|
|
508
|
+
return row[1] > 0 ? 'var(--grade-d)' : 'var(--win)';
|
|
509
|
+
}));
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// ── History Tab ────────────────────────────────────────────────────────────
|
|
514
|
+
function renderHistoryTab(grades) {
|
|
515
|
+
var container = document.getElementById('history-content');
|
|
516
|
+
container.textContent = '';
|
|
517
|
+
if (!grades || !grades.length) {
|
|
518
|
+
container.appendChild(emptyState('\uD83D\uDCC8', 'No history yet', 'Run graded sessions to build history.', ''));
|
|
519
|
+
return;
|
|
520
|
+
}
|
|
521
|
+
document.getElementById('tb-history').textContent = grades.length;
|
|
522
|
+
if (grades.length > 1) {
|
|
523
|
+
var sorted = grades.slice().sort(function(a,b) { return new Date(a.timestamp) - new Date(b.timestamp); });
|
|
524
|
+
var scores = sorted.map(function(g) { return g.totalScore != null ? g.totalScore : (g.score || 0); });
|
|
525
|
+
var trendCard = mk('div', 'chart-card');
|
|
526
|
+
trendCard.appendChild(mk('div', 'chart-card-title', 'Score Trend'));
|
|
527
|
+
var canvas = document.createElement('canvas');
|
|
528
|
+
canvas.className = 'trend'; canvas.width = 800; canvas.height = 90;
|
|
529
|
+
trendCard.appendChild(canvas);
|
|
530
|
+
container.appendChild(trendCard);
|
|
531
|
+
requestAnimationFrame(function() { drawSparkline(canvas, scores); });
|
|
532
|
+
}
|
|
533
|
+
var desc = grades.slice().sort(function(a,b) { return new Date(b.timestamp) - new Date(a.timestamp); });
|
|
534
|
+
container.appendChild(mk('div', 'section-title', 'All Sessions'));
|
|
535
|
+
var list = mk('div', 'history-list');
|
|
536
|
+
desc.forEach(function(g, idx) {
|
|
537
|
+
var score = g.totalScore != null ? g.totalScore : (g.score || 0);
|
|
538
|
+
var letter = scoreLetter(score);
|
|
539
|
+
var pct = Math.round((score / (g.maxScore || 100)) * 100);
|
|
540
|
+
var flags = (g.flags || []).length;
|
|
541
|
+
var item = mk('div', 'history-item');
|
|
542
|
+
var badge = mk('div', 'history-grade-badge ' + letter, letter);
|
|
543
|
+
var main = mk('div', 'history-item-main');
|
|
544
|
+
var sid = mk('div', 'history-session-id', g.sessionId || '(no session id)');
|
|
545
|
+
var meta = mk('div', 'history-item-meta');
|
|
546
|
+
app(meta, mks('span', null, fmtDate(g.timestamp)), mks('span', null, score + '/100'));
|
|
547
|
+
if (flags > 0) app(meta, mks('span', 'color:var(--loss);', flags + ' flag' + (flags !== 1 ? 's' : '')));
|
|
548
|
+
if (g._scenarioId || g.scenario) app(meta, mks('span', 'color:var(--text-muted);', g._scenarioId || g.scenario));
|
|
549
|
+
app(main, sid, meta);
|
|
550
|
+
var barWrap = mk('div', 'history-score-bar');
|
|
551
|
+
var fill = mk('div', 'history-score-fill');
|
|
552
|
+
fill.style.cssText = 'width:' + pct + '%;background:' + scoreColor(score) + ';';
|
|
553
|
+
barWrap.appendChild(fill);
|
|
554
|
+
app(item, badge, main, barWrap);
|
|
555
|
+
item.addEventListener('click', (function(g2) {
|
|
556
|
+
return function() {
|
|
557
|
+
document.querySelector('[data-tab="grades"]').click();
|
|
558
|
+
var items = document.querySelectorAll('#grade-sidebar .sidebar-item');
|
|
559
|
+
var allGrades = EMBEDDED_GRADE_DATA.grades || [];
|
|
560
|
+
for (var j = 0; j < allGrades.length; j++) {
|
|
561
|
+
if (allGrades[j].sessionId === g2.sessionId && items[j]) { items[j].click(); break; }
|
|
562
|
+
}
|
|
563
|
+
};
|
|
564
|
+
})(g));
|
|
565
|
+
list.appendChild(item);
|
|
566
|
+
});
|
|
567
|
+
container.appendChild(list);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
function drawSparkline(canvas, scores) {
|
|
571
|
+
var W = canvas.clientWidth || 600, H = canvas.clientHeight || 90;
|
|
572
|
+
canvas.width = W; canvas.height = H;
|
|
573
|
+
var ctx = canvas.getContext('2d');
|
|
574
|
+
var pad = 10, uW = W - pad*2, uH = H - pad*2;
|
|
575
|
+
var toX = function(i) { return pad + (i / (scores.length - 1)) * uW; };
|
|
576
|
+
var toY = function(s) { return pad + (1 - s / 100) * uH; };
|
|
577
|
+
var grad = ctx.createLinearGradient(0, pad, 0, H - pad);
|
|
578
|
+
grad.addColorStop(0, 'rgba(73,143,165,.3)'); grad.addColorStop(1, 'rgba(73,143,165,.02)');
|
|
579
|
+
ctx.beginPath();
|
|
580
|
+
scores.forEach(function(s,i) { i===0 ? ctx.moveTo(toX(i),toY(s)) : ctx.lineTo(toX(i),toY(s)); });
|
|
581
|
+
ctx.lineTo(toX(scores.length-1), H-pad); ctx.lineTo(toX(0), H-pad); ctx.closePath();
|
|
582
|
+
ctx.fillStyle = grad; ctx.fill();
|
|
583
|
+
ctx.beginPath();
|
|
584
|
+
scores.forEach(function(s,i) { i===0 ? ctx.moveTo(toX(i),toY(s)) : ctx.lineTo(toX(i),toY(s)); });
|
|
585
|
+
ctx.strokeStyle = '#4a8fa5'; ctx.lineWidth = 2; ctx.lineJoin = 'round'; ctx.stroke();
|
|
586
|
+
[90,75,60,45].forEach(function(t) {
|
|
587
|
+
ctx.beginPath(); ctx.moveTo(pad,toY(t)); ctx.lineTo(W-pad,toY(t));
|
|
588
|
+
ctx.strokeStyle = 'rgba(0,0,0,.07)'; ctx.lineWidth = 1; ctx.setLineDash([3,4]); ctx.stroke(); ctx.setLineDash([]);
|
|
589
|
+
});
|
|
590
|
+
scores.forEach(function(s,i) {
|
|
591
|
+
ctx.beginPath(); ctx.arc(toX(i),toY(s),3.5,0,Math.PI*2); ctx.fillStyle = scoreColor(s); ctx.fill();
|
|
592
|
+
});
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// ── Init ───────────────────────────────────────────────────────────────────
|
|
596
|
+
(function() {
|
|
597
|
+
var data = typeof EMBEDDED_GRADE_DATA !== 'undefined' ? EMBEDDED_GRADE_DATA : {};
|
|
598
|
+
document.getElementById('hdr-title').textContent = data.title || 'ct-grade \u2014 Review';
|
|
599
|
+
if (data.subtitle) document.getElementById('hdr-sub').textContent = data.subtitle;
|
|
600
|
+
if (data.generated_at) {
|
|
601
|
+
var m = document.getElementById('hdr-meta');
|
|
602
|
+
m.appendChild(document.createTextNode('Generated ' + fmtDate(data.generated_at)));
|
|
603
|
+
m.appendChild(document.createElement('br'));
|
|
604
|
+
m.appendChild(document.createTextNode((data.grades || []).length + ' grade(s) \u00b7 ' + ((data.ab_results && data.ab_results.total_runs) || 0) + ' A/B run(s)'));
|
|
605
|
+
}
|
|
606
|
+
renderGradesTab(data.grades || []);
|
|
607
|
+
renderABTab(data.ab_results || {});
|
|
608
|
+
renderTokenTab(data.token_analysis || {});
|
|
609
|
+
renderHistoryTab(data.grades || []);
|
|
610
|
+
})();
|
|
611
|
+
</script>
|
|
612
|
+
</body>
|
|
613
|
+
</html>
|