@machinespirits/eval 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/components/MobileEvalDashboard.tsx +267 -0
  2. package/components/comparison/DeltaAnalysisTable.tsx +137 -0
  3. package/components/comparison/ProfileComparisonCard.tsx +176 -0
  4. package/components/comparison/RecognitionABMode.tsx +385 -0
  5. package/components/comparison/RecognitionMetricsPanel.tsx +135 -0
  6. package/components/comparison/WinnerIndicator.tsx +64 -0
  7. package/components/comparison/index.ts +5 -0
  8. package/components/mobile/BottomSheet.tsx +233 -0
  9. package/components/mobile/DimensionBreakdown.tsx +210 -0
  10. package/components/mobile/DocsView.tsx +363 -0
  11. package/components/mobile/LogsView.tsx +481 -0
  12. package/components/mobile/PsychodynamicQuadrant.tsx +261 -0
  13. package/components/mobile/QuickTestView.tsx +1098 -0
  14. package/components/mobile/RecognitionTypeChart.tsx +124 -0
  15. package/components/mobile/RecognitionView.tsx +809 -0
  16. package/components/mobile/RunDetailView.tsx +261 -0
  17. package/components/mobile/RunHistoryView.tsx +367 -0
  18. package/components/mobile/ScoreRadial.tsx +211 -0
  19. package/components/mobile/StreamingLogPanel.tsx +230 -0
  20. package/components/mobile/SynthesisStrategyChart.tsx +140 -0
  21. package/config/interaction-eval-scenarios.yaml +832 -0
  22. package/config/learner-agents.yaml +248 -0
  23. package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +52 -0
  24. package/docs/research/ABLATION-MODEL-SELECTION.md +53 -0
  25. package/docs/research/ADVANCED-EVAL-ANALYSIS.md +60 -0
  26. package/docs/research/ANOVA-RESULTS-2026-01-14.md +257 -0
  27. package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +586 -0
  28. package/docs/research/COST-ANALYSIS.md +56 -0
  29. package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +340 -0
  30. package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +291 -0
  31. package/docs/research/EVAL-SYSTEM-ANALYSIS.md +306 -0
  32. package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +301 -0
  33. package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +1988 -0
  34. package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +282 -0
  35. package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +147 -0
  36. package/docs/research/PAPER-EXTENSION-DYADIC.md +204 -0
  37. package/docs/research/PAPER-UNIFIED.md +659 -0
  38. package/docs/research/PAPER-UNIFIED.pdf +0 -0
  39. package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +356 -0
  40. package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +419 -0
  41. package/docs/research/apa.csl +2133 -0
  42. package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +1637 -0
  43. package/docs/research/archive/paper-multiagent-tutor.tex +978 -0
  44. package/docs/research/paper-draft/full-paper.md +136 -0
  45. package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
  46. package/docs/research/paper-draft/references.bib +515 -0
  47. package/docs/research/transcript-baseline.md +139 -0
  48. package/docs/research/transcript-recognition-multiagent.md +187 -0
  49. package/hooks/useEvalData.ts +625 -0
  50. package/index.js +27 -0
  51. package/package.json +73 -0
  52. package/routes/evalRoutes.js +3002 -0
  53. package/scripts/advanced-eval-analysis.js +351 -0
  54. package/scripts/analyze-eval-costs.js +378 -0
  55. package/scripts/analyze-eval-results.js +513 -0
  56. package/scripts/analyze-interaction-evals.js +368 -0
  57. package/server-init.js +45 -0
  58. package/server.js +162 -0
  59. package/services/benchmarkService.js +1892 -0
  60. package/services/evaluationRunner.js +739 -0
  61. package/services/evaluationStore.js +1121 -0
  62. package/services/learnerConfigLoader.js +385 -0
  63. package/services/learnerTutorInteractionEngine.js +857 -0
  64. package/services/memory/learnerMemoryService.js +1227 -0
  65. package/services/memory/learnerWritingPad.js +577 -0
  66. package/services/memory/tutorWritingPad.js +674 -0
  67. package/services/promptRecommendationService.js +493 -0
  68. package/services/rubricEvaluator.js +826 -0
package/package.json ADDED
@@ -0,0 +1,73 @@
1
+ {
2
+ "name": "@machinespirits/eval",
3
+ "version": "0.1.0",
4
+ "description": "Evaluation system for Machine Spirits tutor - benchmarking, rubric evaluation, and analysis tools",
5
+ "type": "module",
6
+ "main": "index.js",
7
+ "exports": {
8
+ ".": "./index.js",
9
+ "./services/*": "./services/*.js",
10
+ "./routes/*": "./routes/*.js",
11
+ "./config/*": "./config/*",
12
+ "./components/*": "./components/*.tsx",
13
+ "./components/mobile/*": "./components/mobile/*.tsx",
14
+ "./components/comparison": "./components/comparison/index.ts",
15
+ "./components/comparison/*": "./components/comparison/*.tsx",
16
+ "./hooks/*": "./hooks/*.ts",
17
+ "./types": "./types.ts",
18
+ "./utils/*": "./utils/*.ts"
19
+ },
20
+ "files": [
21
+ "index.js",
22
+ "server.js",
23
+ "server-init.js",
24
+ "routes/",
25
+ "services/",
26
+ "components/",
27
+ "hooks/",
28
+ "config/",
29
+ "scripts/",
30
+ "docs/"
31
+ ],
32
+ "scripts": {
33
+ "start": "STANDALONE=true node server.js",
34
+ "dev": "STANDALONE=true node server.js",
35
+ "eval": "node scripts/eval-cli.js",
36
+ "eval:quick": "node scripts/eval-cli.js quick",
37
+ "eval:test": "node scripts/eval-cli.js test"
38
+ },
39
+ "keywords": [
40
+ "evaluation",
41
+ "tutor",
42
+ "benchmark",
43
+ "rubric",
44
+ "machine-spirits"
45
+ ],
46
+ "author": "L. Magee",
47
+ "license": "MIT",
48
+ "repository": {
49
+ "type": "git",
50
+ "url": "https://github.com/lmagee/machine-spirits",
51
+ "directory": "packages/eval"
52
+ },
53
+ "peerDependencies": {
54
+ "@machinespirits/tutor-core": ">=0.1.0",
55
+ "@anthropic-ai/sdk": ">=0.71.0"
56
+ },
57
+ "peerDependenciesMeta": {
58
+ "@anthropic-ai/sdk": {
59
+ "optional": true
60
+ }
61
+ },
62
+ "dependencies": {
63
+ "express": "^4.19.2",
64
+ "yaml": "^2.8.2",
65
+ "better-sqlite3": "^12.5.0"
66
+ },
67
+ "devDependencies": {
68
+ "@types/node": "^22.14.0"
69
+ },
70
+ "engines": {
71
+ "node": ">=18.0.0"
72
+ }
73
+ }