@machinespirits/eval 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +91 -9
- package/config/eval-settings.yaml +3 -3
- package/config/paper-manifest.json +486 -0
- package/config/providers.yaml +9 -6
- package/config/tutor-agents.yaml +2261 -0
- package/content/README.md +23 -0
- package/content/courses/479/course.md +53 -0
- package/content/courses/479/lecture-1.md +361 -0
- package/content/courses/479/lecture-2.md +360 -0
- package/content/courses/479/lecture-3.md +655 -0
- package/content/courses/479/lecture-4.md +530 -0
- package/content/courses/479/lecture-5.md +326 -0
- package/content/courses/479/lecture-6.md +346 -0
- package/content/courses/479/lecture-7.md +326 -0
- package/content/courses/479/lecture-8.md +273 -0
- package/content/courses/479/roadmap-slides.md +656 -0
- package/content/manifest.yaml +8 -0
- package/docs/research/apa.csl +2133 -0
- package/docs/research/build.sh +98 -0
- package/docs/research/figures/figure1.png +0 -0
- package/docs/research/figures/figure10.png +0 -0
- package/docs/research/figures/figure11.png +0 -0
- package/docs/research/figures/figure2.png +0 -0
- package/docs/research/figures/figure3.png +0 -0
- package/docs/research/figures/figure4.png +0 -0
- package/docs/research/figures/figure5.png +0 -0
- package/docs/research/figures/figure6.png +0 -0
- package/docs/research/figures/figure7.png +0 -0
- package/docs/research/figures/figure8.png +0 -0
- package/docs/research/figures/figure9.png +0 -0
- package/docs/research/header.tex +25 -0
- package/docs/research/paper-full.md +2565 -0
- package/docs/research/paper-short.md +436 -0
- package/docs/research/references.bib +1143 -0
- package/docs/research/slides-header.tex +188 -0
- package/docs/research/slides-pptx.md +363 -0
- package/docs/research/slides.md +531 -0
- package/docs/research/style-reference-pptx.py +199 -0
- package/package.json +5 -5
- package/scripts/analyze-eval-results.js +69 -17
- package/scripts/analyze-mechanism-traces.js +763 -0
- package/scripts/analyze-modulation-learning.js +498 -0
- package/scripts/analyze-prosthesis.js +144 -0
- package/scripts/analyze-run.js +264 -79
- package/scripts/assess-transcripts.js +853 -0
- package/scripts/browse-transcripts.js +854 -0
- package/scripts/check-parse-failures.js +73 -0
- package/scripts/code-dialectical-modulation.js +1320 -0
- package/scripts/download-data.sh +55 -0
- package/scripts/eval-cli.js +106 -18
- package/scripts/generate-paper-figures.js +663 -0
- package/scripts/generate-paper-figures.py +577 -76
- package/scripts/generate-paper-tables.js +299 -0
- package/scripts/qualitative-analysis-ai.js +3 -3
- package/scripts/render-sequence-diagram.js +694 -0
- package/scripts/test-latency.js +210 -0
- package/scripts/test-rate-limit.js +95 -0
- package/scripts/test-token-budget.js +332 -0
- package/scripts/validate-paper-manifest.js +670 -0
- package/services/__tests__/evalConfigLoader.test.js +2 -2
- package/services/__tests__/learnerRubricEvaluator.test.js +361 -0
- package/services/__tests__/learnerTutorInteractionEngine.test.js +326 -0
- package/services/evaluationRunner.js +975 -98
- package/services/evaluationStore.js +12 -4
- package/services/learnerTutorInteractionEngine.js +27 -2
- package/services/mockProvider.js +133 -0
- package/services/promptRewriter.js +1471 -5
- package/services/rubricEvaluator.js +55 -2
- package/services/transcriptFormatter.js +675 -0
- package/config/machinespirits-eval.code-workspace +0 -11
- package/docs/EVALUATION-VARIABLES.md +0 -589
- package/docs/REPLICATION-PLAN.md +0 -577
- package/scripts/analyze-run.mjs +0 -282
- package/scripts/compare-runs.js +0 -44
- package/scripts/compare-suggestions.js +0 -80
- package/scripts/dig-into-run.js +0 -158
- package/scripts/show-failed-suggestions.js +0 -64
- /package/scripts/{check-run.mjs → check-run.js} +0 -0
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Build PDF, short paper, and slides from paper markdown source.
|
|
3
|
+
# Usage:
|
|
4
|
+
# ./build.sh # build full paper PDF
|
|
5
|
+
# ./build.sh full # build full paper PDF
|
|
6
|
+
# ./build.sh short # build short paper PDF
|
|
7
|
+
# ./build.sh beamer # build slides PDF (beamer)
|
|
8
|
+
# ./build.sh pptx # build slides PPTX
|
|
9
|
+
# ./build.sh slides # build both beamer PDF and PPTX slides
|
|
10
|
+
# ./build.sh all # build everything
|
|
11
|
+
|
|
12
|
+
set -euo pipefail
|
|
13
|
+
cd "$(dirname "$0")"
|
|
14
|
+
|
|
15
|
+
# Extract version from paper-full.md YAML frontmatter
|
|
16
|
+
VERSION=$(grep '^version:' paper-full.md | head -1 | sed 's/version: *"\(.*\)"/\1/')
|
|
17
|
+
if [ -z "$VERSION" ]; then
|
|
18
|
+
echo "Warning: no version found in paper-full.md frontmatter, using 'dev'"
|
|
19
|
+
VERSION="dev"
|
|
20
|
+
fi
|
|
21
|
+
|
|
22
|
+
FULL_PDF="paper-full-v${VERSION}.pdf"
|
|
23
|
+
SHORT_PDF="paper-short-v${VERSION}.pdf"
|
|
24
|
+
SLIDES_PDF="slides-v${VERSION}.pdf"
|
|
25
|
+
SLIDES_PPTX="slides-v${VERSION}.pptx"
|
|
26
|
+
|
|
27
|
+
PANDOC_OPTS=(
|
|
28
|
+
--citeproc
|
|
29
|
+
--pdf-engine=xelatex
|
|
30
|
+
-H header.tex
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
build_full() {
|
|
34
|
+
echo "Building ${FULL_PDF} ..."
|
|
35
|
+
pandoc "${PANDOC_OPTS[@]}" paper-full.md -o "${FULL_PDF}"
|
|
36
|
+
echo " -> ${FULL_PDF}"
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
build_short() {
|
|
40
|
+
echo "Building ${SHORT_PDF} ..."
|
|
41
|
+
pandoc "${PANDOC_OPTS[@]}" paper-short.md -o "${SHORT_PDF}"
|
|
42
|
+
echo " -> ${SHORT_PDF}"
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
build_beamer() {
|
|
46
|
+
echo "Building ${SLIDES_PDF} (beamer) ..."
|
|
47
|
+
pandoc --citeproc --pdf-engine=xelatex -t beamer \
|
|
48
|
+
--slide-level=2 \
|
|
49
|
+
slides.md -o "${SLIDES_PDF}"
|
|
50
|
+
echo " -> ${SLIDES_PDF}"
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
build_pptx() {
|
|
54
|
+
echo "Building ${SLIDES_PPTX} ..."
|
|
55
|
+
# Use slides-pptx.md (stripped of LaTeX commands) with styled reference doc
|
|
56
|
+
if [ -f slides-pptx.md ]; then
|
|
57
|
+
SLIDES_SRC="slides-pptx.md"
|
|
58
|
+
else
|
|
59
|
+
SLIDES_SRC="slides.md"
|
|
60
|
+
fi
|
|
61
|
+
PPTX_OPTS=(--citeproc --slide-level=2)
|
|
62
|
+
if [ -f reference.pptx ]; then
|
|
63
|
+
PPTX_OPTS+=(--reference-doc=reference.pptx)
|
|
64
|
+
fi
|
|
65
|
+
pandoc "${PPTX_OPTS[@]}" "${SLIDES_SRC}" -o "${SLIDES_PPTX}"
|
|
66
|
+
echo " -> ${SLIDES_PPTX}"
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
case "${1:-full}" in
|
|
70
|
+
full|pdf|"")
|
|
71
|
+
build_full
|
|
72
|
+
;;
|
|
73
|
+
short)
|
|
74
|
+
build_short
|
|
75
|
+
;;
|
|
76
|
+
beamer)
|
|
77
|
+
build_beamer
|
|
78
|
+
;;
|
|
79
|
+
pptx)
|
|
80
|
+
build_pptx
|
|
81
|
+
;;
|
|
82
|
+
slides)
|
|
83
|
+
build_beamer
|
|
84
|
+
build_pptx
|
|
85
|
+
;;
|
|
86
|
+
all)
|
|
87
|
+
build_full
|
|
88
|
+
build_short
|
|
89
|
+
build_beamer
|
|
90
|
+
build_pptx
|
|
91
|
+
;;
|
|
92
|
+
*)
|
|
93
|
+
echo "Usage: $0 [full|short|beamer|pptx|slides|all]"
|
|
94
|
+
exit 1
|
|
95
|
+
;;
|
|
96
|
+
esac
|
|
97
|
+
|
|
98
|
+
echo "Done."
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
\usepackage{unicode-math}
|
|
2
|
+
\setmathfont{latinmodern-math.otf}
|
|
3
|
+
\usepackage{etoolbox}
|
|
4
|
+
\usepackage{newunicodechar}
|
|
5
|
+
|
|
6
|
+
% Map Unicode math symbols to LaTeX math mode so they render
|
|
7
|
+
% correctly even when xelatex probes the text font
|
|
8
|
+
\newunicodechar{≈}{$\approx$}
|
|
9
|
+
\newunicodechar{≥}{$\geq$}
|
|
10
|
+
\newunicodechar{𝜒}{$\chi$}
|
|
11
|
+
|
|
12
|
+
% Suppress missing-character warnings from xelatex font probing
|
|
13
|
+
% (unicode-math internally uses Unicode codepoints that trigger
|
|
14
|
+
% text-font lookups in table cells before math font resolves them)
|
|
15
|
+
\tracinglostchars=0
|
|
16
|
+
|
|
17
|
+
% Shrink monospace font so code spans fit in table columns
|
|
18
|
+
\let\oldtexttt\texttt
|
|
19
|
+
\DeclareRobustCommand{\texttt}[1]{{\ttfamily\small #1}}
|
|
20
|
+
|
|
21
|
+
% Shrink code blocks to prevent right-margin overflow
|
|
22
|
+
\AtBeginDocument{%
|
|
23
|
+
\author{Liam Magee\footnote{This sentence is the only one actually authored by Liam Magee in this paper.}\\[0.3em]Education Policy, Organization and Leadership\\University of Illinois Urbana-Champaign}%
|
|
24
|
+
\apptocmd{\Shaded}{\footnotesize}{}{}%
|
|
25
|
+
}
|