@machinespirits/eval 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/components/MobileEvalDashboard.tsx +267 -0
- package/components/comparison/DeltaAnalysisTable.tsx +137 -0
- package/components/comparison/ProfileComparisonCard.tsx +176 -0
- package/components/comparison/RecognitionABMode.tsx +385 -0
- package/components/comparison/RecognitionMetricsPanel.tsx +135 -0
- package/components/comparison/WinnerIndicator.tsx +64 -0
- package/components/comparison/index.ts +5 -0
- package/components/mobile/BottomSheet.tsx +233 -0
- package/components/mobile/DimensionBreakdown.tsx +210 -0
- package/components/mobile/DocsView.tsx +363 -0
- package/components/mobile/LogsView.tsx +481 -0
- package/components/mobile/PsychodynamicQuadrant.tsx +261 -0
- package/components/mobile/QuickTestView.tsx +1098 -0
- package/components/mobile/RecognitionTypeChart.tsx +124 -0
- package/components/mobile/RecognitionView.tsx +809 -0
- package/components/mobile/RunDetailView.tsx +261 -0
- package/components/mobile/RunHistoryView.tsx +367 -0
- package/components/mobile/ScoreRadial.tsx +211 -0
- package/components/mobile/StreamingLogPanel.tsx +230 -0
- package/components/mobile/SynthesisStrategyChart.tsx +140 -0
- package/config/interaction-eval-scenarios.yaml +832 -0
- package/config/learner-agents.yaml +248 -0
- package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +52 -0
- package/docs/research/ABLATION-MODEL-SELECTION.md +53 -0
- package/docs/research/ADVANCED-EVAL-ANALYSIS.md +60 -0
- package/docs/research/ANOVA-RESULTS-2026-01-14.md +257 -0
- package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +586 -0
- package/docs/research/COST-ANALYSIS.md +56 -0
- package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +340 -0
- package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +291 -0
- package/docs/research/EVAL-SYSTEM-ANALYSIS.md +306 -0
- package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +301 -0
- package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +1988 -0
- package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +282 -0
- package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +147 -0
- package/docs/research/PAPER-EXTENSION-DYADIC.md +204 -0
- package/docs/research/PAPER-UNIFIED.md +659 -0
- package/docs/research/PAPER-UNIFIED.pdf +0 -0
- package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +356 -0
- package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +419 -0
- package/docs/research/apa.csl +2133 -0
- package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +1637 -0
- package/docs/research/archive/paper-multiagent-tutor.tex +978 -0
- package/docs/research/paper-draft/full-paper.md +136 -0
- package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
- package/docs/research/paper-draft/references.bib +515 -0
- package/docs/research/transcript-baseline.md +139 -0
- package/docs/research/transcript-recognition-multiagent.md +187 -0
- package/hooks/useEvalData.ts +625 -0
- package/index.js +27 -0
- package/package.json +73 -0
- package/routes/evalRoutes.js +3002 -0
- package/scripts/advanced-eval-analysis.js +351 -0
- package/scripts/analyze-eval-costs.js +378 -0
- package/scripts/analyze-eval-results.js +513 -0
- package/scripts/analyze-interaction-evals.js +368 -0
- package/server-init.js +45 -0
- package/server.js +162 -0
- package/services/benchmarkService.js +1892 -0
- package/services/evaluationRunner.js +739 -0
- package/services/evaluationStore.js +1121 -0
- package/services/learnerConfigLoader.js +385 -0
- package/services/learnerTutorInteractionEngine.js +857 -0
- package/services/memory/learnerMemoryService.js +1227 -0
- package/services/memory/learnerWritingPad.js +577 -0
- package/services/memory/tutorWritingPad.js +674 -0
- package/services/promptRecommendationService.js +493 -0
- package/services/rubricEvaluator.js +826 -0
package/package.json
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@machinespirits/eval",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Evaluation system for Machine Spirits tutor - benchmarking, rubric evaluation, and analysis tools",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "index.js",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./index.js",
|
|
9
|
+
"./services/*": "./services/*.js",
|
|
10
|
+
"./routes/*": "./routes/*.js",
|
|
11
|
+
"./config/*": "./config/*",
|
|
12
|
+
"./components/*": "./components/*.tsx",
|
|
13
|
+
"./components/mobile/*": "./components/mobile/*.tsx",
|
|
14
|
+
"./components/comparison": "./components/comparison/index.ts",
|
|
15
|
+
"./components/comparison/*": "./components/comparison/*.tsx",
|
|
16
|
+
"./hooks/*": "./hooks/*.ts",
|
|
17
|
+
"./types": "./types.ts",
|
|
18
|
+
"./utils/*": "./utils/*.ts"
|
|
19
|
+
},
|
|
20
|
+
"files": [
|
|
21
|
+
"index.js",
|
|
22
|
+
"server.js",
|
|
23
|
+
"server-init.js",
|
|
24
|
+
"routes/",
|
|
25
|
+
"services/",
|
|
26
|
+
"components/",
|
|
27
|
+
"hooks/",
|
|
28
|
+
"config/",
|
|
29
|
+
"scripts/",
|
|
30
|
+
"docs/"
|
|
31
|
+
],
|
|
32
|
+
"scripts": {
|
|
33
|
+
"start": "STANDALONE=true node server.js",
|
|
34
|
+
"dev": "STANDALONE=true node server.js",
|
|
35
|
+
"eval": "node scripts/eval-cli.js",
|
|
36
|
+
"eval:quick": "node scripts/eval-cli.js quick",
|
|
37
|
+
"eval:test": "node scripts/eval-cli.js test"
|
|
38
|
+
},
|
|
39
|
+
"keywords": [
|
|
40
|
+
"evaluation",
|
|
41
|
+
"tutor",
|
|
42
|
+
"benchmark",
|
|
43
|
+
"rubric",
|
|
44
|
+
"machine-spirits"
|
|
45
|
+
],
|
|
46
|
+
"author": "L. Magee",
|
|
47
|
+
"license": "MIT",
|
|
48
|
+
"repository": {
|
|
49
|
+
"type": "git",
|
|
50
|
+
"url": "https://github.com/lmagee/machine-spirits",
|
|
51
|
+
"directory": "packages/eval"
|
|
52
|
+
},
|
|
53
|
+
"peerDependencies": {
|
|
54
|
+
"@machinespirits/tutor-core": ">=0.1.0",
|
|
55
|
+
"@anthropic-ai/sdk": ">=0.71.0"
|
|
56
|
+
},
|
|
57
|
+
"peerDependenciesMeta": {
|
|
58
|
+
"@anthropic-ai/sdk": {
|
|
59
|
+
"optional": true
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
"dependencies": {
|
|
63
|
+
"express": "^4.19.2",
|
|
64
|
+
"yaml": "^2.8.2",
|
|
65
|
+
"better-sqlite3": "^12.5.0"
|
|
66
|
+
},
|
|
67
|
+
"devDependencies": {
|
|
68
|
+
"@types/node": "^22.14.0"
|
|
69
|
+
},
|
|
70
|
+
"engines": {
|
|
71
|
+
"node": ">=18.0.0"
|
|
72
|
+
}
|
|
73
|
+
}
|