@machinespirits/eval 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/README.md +91 -9
  2. package/config/eval-settings.yaml +3 -3
  3. package/config/paper-manifest.json +486 -0
  4. package/config/providers.yaml +9 -6
  5. package/config/tutor-agents.yaml +2261 -0
  6. package/content/README.md +23 -0
  7. package/content/courses/479/course.md +53 -0
  8. package/content/courses/479/lecture-1.md +361 -0
  9. package/content/courses/479/lecture-2.md +360 -0
  10. package/content/courses/479/lecture-3.md +655 -0
  11. package/content/courses/479/lecture-4.md +530 -0
  12. package/content/courses/479/lecture-5.md +326 -0
  13. package/content/courses/479/lecture-6.md +346 -0
  14. package/content/courses/479/lecture-7.md +326 -0
  15. package/content/courses/479/lecture-8.md +273 -0
  16. package/content/courses/479/roadmap-slides.md +656 -0
  17. package/content/manifest.yaml +8 -0
  18. package/docs/research/apa.csl +2133 -0
  19. package/docs/research/build.sh +98 -0
  20. package/docs/research/figures/figure1.png +0 -0
  21. package/docs/research/figures/figure10.png +0 -0
  22. package/docs/research/figures/figure11.png +0 -0
  23. package/docs/research/figures/figure2.png +0 -0
  24. package/docs/research/figures/figure3.png +0 -0
  25. package/docs/research/figures/figure4.png +0 -0
  26. package/docs/research/figures/figure5.png +0 -0
  27. package/docs/research/figures/figure6.png +0 -0
  28. package/docs/research/figures/figure7.png +0 -0
  29. package/docs/research/figures/figure8.png +0 -0
  30. package/docs/research/figures/figure9.png +0 -0
  31. package/docs/research/header.tex +25 -0
  32. package/docs/research/paper-full.md +2565 -0
  33. package/docs/research/paper-short.md +436 -0
  34. package/docs/research/references.bib +1143 -0
  35. package/docs/research/slides-header.tex +188 -0
  36. package/docs/research/slides-pptx.md +363 -0
  37. package/docs/research/slides.md +531 -0
  38. package/docs/research/style-reference-pptx.py +199 -0
  39. package/package.json +5 -5
  40. package/scripts/analyze-eval-results.js +69 -17
  41. package/scripts/analyze-mechanism-traces.js +763 -0
  42. package/scripts/analyze-modulation-learning.js +498 -0
  43. package/scripts/analyze-prosthesis.js +144 -0
  44. package/scripts/analyze-run.js +264 -79
  45. package/scripts/assess-transcripts.js +853 -0
  46. package/scripts/browse-transcripts.js +854 -0
  47. package/scripts/check-parse-failures.js +73 -0
  48. package/scripts/code-dialectical-modulation.js +1320 -0
  49. package/scripts/download-data.sh +55 -0
  50. package/scripts/eval-cli.js +106 -18
  51. package/scripts/generate-paper-figures.js +663 -0
  52. package/scripts/generate-paper-figures.py +577 -76
  53. package/scripts/generate-paper-tables.js +299 -0
  54. package/scripts/qualitative-analysis-ai.js +3 -3
  55. package/scripts/render-sequence-diagram.js +694 -0
  56. package/scripts/test-latency.js +210 -0
  57. package/scripts/test-rate-limit.js +95 -0
  58. package/scripts/test-token-budget.js +332 -0
  59. package/scripts/validate-paper-manifest.js +670 -0
  60. package/services/__tests__/evalConfigLoader.test.js +2 -2
  61. package/services/__tests__/learnerRubricEvaluator.test.js +361 -0
  62. package/services/__tests__/learnerTutorInteractionEngine.test.js +326 -0
  63. package/services/evaluationRunner.js +975 -98
  64. package/services/evaluationStore.js +12 -4
  65. package/services/learnerTutorInteractionEngine.js +27 -2
  66. package/services/mockProvider.js +133 -0
  67. package/services/promptRewriter.js +1471 -5
  68. package/services/rubricEvaluator.js +55 -2
  69. package/services/transcriptFormatter.js +675 -0
  70. package/config/machinespirits-eval.code-workspace +0 -11
  71. package/docs/EVALUATION-VARIABLES.md +0 -589
  72. package/docs/REPLICATION-PLAN.md +0 -577
  73. package/scripts/analyze-run.mjs +0 -282
  74. package/scripts/compare-runs.js +0 -44
  75. package/scripts/compare-suggestions.js +0 -80
  76. package/scripts/dig-into-run.js +0 -158
  77. package/scripts/show-failed-suggestions.js +0 -64
  78. /package/scripts/{check-run.mjs → check-run.js} +0 -0
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env bash
2
+ # Build PDF, short paper, and slides from paper markdown source.
3
+ # Usage:
4
+ # ./build.sh # build full paper PDF
5
+ # ./build.sh full # build full paper PDF
6
+ # ./build.sh short # build short paper PDF
7
+ # ./build.sh beamer # build slides PDF (beamer)
8
+ # ./build.sh pptx # build slides PPTX
9
+ # ./build.sh slides # build both beamer PDF and PPTX slides
10
+ # ./build.sh all # build everything
11
+
12
+ set -euo pipefail
13
+ cd "$(dirname "$0")"
14
+
15
+ # Extract version from paper-full.md YAML frontmatter
16
+ VERSION=$(grep '^version:' paper-full.md | head -1 | sed 's/version: *"\(.*\)"/\1/')
17
+ if [ -z "$VERSION" ]; then
18
+ echo "Warning: no version found in paper-full.md frontmatter, using 'dev'"
19
+ VERSION="dev"
20
+ fi
21
+
22
+ FULL_PDF="paper-full-v${VERSION}.pdf"
23
+ SHORT_PDF="paper-short-v${VERSION}.pdf"
24
+ SLIDES_PDF="slides-v${VERSION}.pdf"
25
+ SLIDES_PPTX="slides-v${VERSION}.pptx"
26
+
27
+ PANDOC_OPTS=(
28
+ --citeproc
29
+ --pdf-engine=xelatex
30
+ -H header.tex
31
+ )
32
+
33
+ build_full() {
34
+ echo "Building ${FULL_PDF} ..."
35
+ pandoc "${PANDOC_OPTS[@]}" paper-full.md -o "${FULL_PDF}"
36
+ echo " -> ${FULL_PDF}"
37
+ }
38
+
39
+ build_short() {
40
+ echo "Building ${SHORT_PDF} ..."
41
+ pandoc "${PANDOC_OPTS[@]}" paper-short.md -o "${SHORT_PDF}"
42
+ echo " -> ${SHORT_PDF}"
43
+ }
44
+
45
+ build_beamer() {
46
+ echo "Building ${SLIDES_PDF} (beamer) ..."
47
+ pandoc --citeproc --pdf-engine=xelatex -t beamer \
48
+ --slide-level=2 \
49
+ slides.md -o "${SLIDES_PDF}"
50
+ echo " -> ${SLIDES_PDF}"
51
+ }
52
+
53
+ build_pptx() {
54
+ echo "Building ${SLIDES_PPTX} ..."
55
+ # Use slides-pptx.md (stripped of LaTeX commands) with styled reference doc
56
+ if [ -f slides-pptx.md ]; then
57
+ SLIDES_SRC="slides-pptx.md"
58
+ else
59
+ SLIDES_SRC="slides.md"
60
+ fi
61
+ PPTX_OPTS=(--citeproc --slide-level=2)
62
+ if [ -f reference.pptx ]; then
63
+ PPTX_OPTS+=(--reference-doc=reference.pptx)
64
+ fi
65
+ pandoc "${PPTX_OPTS[@]}" "${SLIDES_SRC}" -o "${SLIDES_PPTX}"
66
+ echo " -> ${SLIDES_PPTX}"
67
+ }
68
+
69
+ case "${1:-full}" in
70
+ full|pdf|"")
71
+ build_full
72
+ ;;
73
+ short)
74
+ build_short
75
+ ;;
76
+ beamer)
77
+ build_beamer
78
+ ;;
79
+ pptx)
80
+ build_pptx
81
+ ;;
82
+ slides)
83
+ build_beamer
84
+ build_pptx
85
+ ;;
86
+ all)
87
+ build_full
88
+ build_short
89
+ build_beamer
90
+ build_pptx
91
+ ;;
92
+ *)
93
+ echo "Usage: $0 [full|short|beamer|pptx|slides|all]"
94
+ exit 1
95
+ ;;
96
+ esac
97
+
98
+ echo "Done."
@@ -0,0 +1,25 @@
1
+ \usepackage{unicode-math}
2
+ \setmathfont{latinmodern-math.otf}
3
+ \usepackage{etoolbox}
4
+ \usepackage{newunicodechar}
5
+
6
+ % Map Unicode math symbols to LaTeX math mode so they render
7
+ % correctly even when xelatex probes the text font
8
+ \newunicodechar{≈}{$\approx$}
9
+ \newunicodechar{≥}{$\geq$}
10
+ \newunicodechar{𝜒}{$\chi$}
11
+
12
+ % Suppress missing-character warnings from xelatex font probing
13
+ % (unicode-math internally uses Unicode codepoints that trigger
14
+ % text-font lookups in table cells before math font resolves them)
15
+ \tracinglostchars=0
16
+
17
+ % Shrink monospace font so code spans fit in table columns
18
+ \let\oldtexttt\texttt
19
+ \DeclareRobustCommand{\texttt}[1]{{\ttfamily\small #1}}
20
+
21
+ % Shrink code blocks to prevent right-margin overflow
22
+ \AtBeginDocument{%
23
+ \author{Liam Magee\footnote{This sentence is the only one actually authored by Liam Magee in this paper.}\\[0.3em]Education Policy, Organization and Leadership\\University of Illinois Urbana-Champaign}%
24
+ \apptocmd{\Shaded}{\footnotesize}{}{}%
25
+ }