@machinespirits/eval 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@machinespirits/eval",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "Evaluation system for Machine Spirits tutor - benchmarking, rubric evaluation, and analysis tools",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -21,12 +21,14 @@
21
21
  "index.js",
22
22
  "server.js",
23
23
  "server-init.js",
24
+ "types.ts",
24
25
  "routes/",
25
26
  "services/",
26
27
  "components/",
27
28
  "hooks/",
28
29
  "config/",
29
30
  "scripts/",
31
+ "utils/",
30
32
  "docs/"
31
33
  ],
32
34
  "scripts": {
@@ -47,8 +49,7 @@
47
49
  "license": "MIT",
48
50
  "repository": {
49
51
  "type": "git",
50
- "url": "https://github.com/lmagee/machine-spirits",
51
- "directory": "packages/eval"
52
+ "url": "https://github.com/liammagee/machinespirits-eval"
52
53
  },
53
54
  "peerDependencies": {
54
55
  "@machinespirits/tutor-core": ">=0.1.0",
package/types.ts ADDED
@@ -0,0 +1,165 @@
1
+ /**
2
+ * Evaluation Types
3
+ *
4
+ * Types for the eval dashboard components.
5
+ */
6
+
7
+ export interface EvalProfile {
8
+ name: string;
9
+ description: string;
10
+ egoProvider?: string;
11
+ egoModel?: string;
12
+ superegoProvider?: string;
13
+ superegoModel?: string;
14
+ dialogueEnabled?: boolean;
15
+ maxRounds?: number;
16
+ }
17
+
18
+ export interface EvalScenario {
19
+ id: string;
20
+ name: string;
21
+ description?: string;
22
+ category?: string;
23
+ turnCount?: number;
24
+ isMultiTurn?: boolean;
25
+ }
26
+
27
+ export interface EvalRun {
28
+ id: string;
29
+ description?: string;
30
+ totalTests?: number;
31
+ totalScenarios?: number;
32
+ totalConfigurations?: number;
33
+ status: 'running' | 'completed' | 'failed';
34
+ createdAt: string;
35
+ completedAt?: string;
36
+ runType?: 'quick' | 'batch' | 'matrix' | 'compare' | 'interaction';
37
+ profiles?: string[];
38
+ }
39
+
40
+ export type EvalDimensionScore = number | { score: number; reasoning?: string; quote?: string } | null;
41
+
42
+ export interface EvalDimensionScores {
43
+ relevance: EvalDimensionScore;
44
+ specificity: EvalDimensionScore;
45
+ pedagogical: EvalDimensionScore;
46
+ personalization: EvalDimensionScore;
47
+ actionability: EvalDimensionScore;
48
+ tone: EvalDimensionScore;
49
+ }
50
+
51
+ export interface EvalSuggestion {
52
+ type: string;
53
+ title: string;
54
+ message: string;
55
+ actionTarget?: string;
56
+ headline?: string;
57
+ body?: string;
58
+ priority?: 'high' | 'medium' | 'low';
59
+ }
60
+
61
+ export interface EvalValidation {
62
+ passesRequired: boolean;
63
+ passesForbidden: boolean;
64
+ requiredMissing: string[];
65
+ forbiddenFound: string[];
66
+ }
67
+
68
+ export interface EvalQuickTestResult {
69
+ scenarioId: string;
70
+ scenarioName: string;
71
+ profile: string;
72
+ provider?: string;
73
+ model?: string;
74
+ passed: boolean;
75
+ overallScore: number | null;
76
+ latencyMs: number;
77
+ scores?: EvalDimensionScores;
78
+ validation?: EvalValidation;
79
+ suggestions?: EvalSuggestion[];
80
+ inputTokens?: number;
81
+ outputTokens?: number;
82
+ totalTokens?: number;
83
+ apiCalls?: number;
84
+ dialogueRounds?: number;
85
+ evaluationReasoning?: string;
86
+ evaluatorModel?: string;
87
+ scenarioContext?: {
88
+ description: string;
89
+ expectedBehavior?: string;
90
+ learnerContext?: Record<string, string | undefined>;
91
+ };
92
+ }
93
+
94
+ // Agent role types for dialogue system
95
+ export type AgentRole = 'user' | 'ego' | 'superego';
96
+ export type DialogueDirection = 'input' | 'request' | 'response';
97
+
98
+ export interface EvalDialogueEntry {
99
+ timestamp: string;
100
+ agent: AgentRole;
101
+ action?: string;
102
+ model?: string;
103
+ provider?: string;
104
+ latencyMs?: number;
105
+ inputTokens?: number;
106
+ outputTokens?: number;
107
+ suggestions?: Array<{ type: string; title: string; message: string; priority?: string }>;
108
+ verdict?: { approved: boolean; confidence?: number; feedback?: string };
109
+ preAnalysis?: {
110
+ isPreAnalysis: boolean;
111
+ reinterpretations?: unknown[];
112
+ overallCaution?: string;
113
+ };
114
+ from?: AgentRole;
115
+ to?: AgentRole;
116
+ direction?: DialogueDirection;
117
+ rawContext?: string;
118
+ contextData?: {
119
+ courseId?: string;
120
+ courseTitle?: string;
121
+ lectureId?: string;
122
+ lectureTitle?: string;
123
+ recentActivity?: string[];
124
+ };
125
+ output?: unknown;
126
+ cost?: number;
127
+ }
128
+
129
+ export interface EvalDialogue {
130
+ dialogueId: string;
131
+ startTime: string;
132
+ endTime: string;
133
+ entryCount: number;
134
+ entries?: EvalDialogueEntry[];
135
+ summary?: {
136
+ egoCount: number;
137
+ superegoCount: number;
138
+ totalSuggestions: number;
139
+ approvedCount: number;
140
+ revisedCount: number;
141
+ totalLatencyMs: number;
142
+ totalInputTokens?: number;
143
+ totalOutputTokens?: number;
144
+ };
145
+ }
146
+
147
+ export interface EvalTrendPoint {
148
+ runId: string;
149
+ createdAt: string;
150
+ description?: string;
151
+ runType?: 'quick' | 'eval' | 'matrix' | 'compare' | 'auto';
152
+ profiles?: string[];
153
+ scenarioCount?: number;
154
+ testCount: number;
155
+ overallScore: number | null;
156
+ dimensions: EvalDimensionScores;
157
+ }
158
+
159
+ export interface EvalDoc {
160
+ name: string;
161
+ filename: string;
162
+ title: string;
163
+ size: number;
164
+ modified: string;
165
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Haptic Feedback Utilities
3
+ *
4
+ * Provides consistent vibration patterns for mobile interactions.
5
+ * Falls back gracefully when vibration API is not available.
6
+ */
7
+
8
+ type VibrationPattern = number | number[];
9
+
10
+ const vibrate = (pattern: VibrationPattern): void => {
11
+ if (typeof navigator !== 'undefined' && navigator.vibrate) {
12
+ navigator.vibrate(pattern);
13
+ }
14
+ };
15
+
16
+ export const haptics = {
17
+ /** Light tap - tab changes, selections */
18
+ light: () => vibrate(5),
19
+
20
+ /** Medium tap - pull-to-refresh trigger, confirmations */
21
+ medium: () => vibrate(10),
22
+
23
+ /** Heavy tap - errors, warnings */
24
+ heavy: () => vibrate(20),
25
+
26
+ /** Success pattern - test passed, action completed */
27
+ success: () => vibrate([10, 50, 10]),
28
+
29
+ /** Error pattern - test failed, error occurred */
30
+ error: () => vibrate([20, 100, 20, 100, 20]),
31
+
32
+ /** Back online notification */
33
+ online: () => vibrate([100, 50, 100]),
34
+
35
+ /** Went offline notification */
36
+ offline: () => vibrate(200),
37
+
38
+ /** Copy to clipboard */
39
+ copy: () => vibrate(30),
40
+
41
+ /** Button press feedback */
42
+ button: () => vibrate(8)
43
+ };
44
+
45
+ export default haptics;