@machinespirits/eval 0.1.2 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +161 -0
- package/config/eval-settings.yaml +18 -0
- package/config/evaluation-rubric-learner.yaml +277 -0
- package/config/evaluation-rubric.yaml +613 -0
- package/config/interaction-eval-scenarios.yaml +93 -50
- package/config/learner-agents.yaml +124 -193
- package/config/machinespirits-eval.code-workspace +11 -0
- package/config/providers.yaml +60 -0
- package/config/suggestion-scenarios.yaml +1399 -0
- package/config/tutor-agents.yaml +716 -0
- package/docs/EVALUATION-VARIABLES.md +589 -0
- package/docs/REPLICATION-PLAN.md +577 -0
- package/index.js +15 -6
- package/package.json +16 -22
- package/routes/evalRoutes.js +88 -36
- package/scripts/analyze-judge-reliability.js +401 -0
- package/scripts/analyze-run.js +97 -0
- package/scripts/analyze-run.mjs +282 -0
- package/scripts/analyze-validation-failures.js +141 -0
- package/scripts/check-run.mjs +17 -0
- package/scripts/code-impasse-strategies.js +1132 -0
- package/scripts/compare-runs.js +44 -0
- package/scripts/compare-suggestions.js +80 -0
- package/scripts/compare-transformation.js +116 -0
- package/scripts/dig-into-run.js +158 -0
- package/scripts/eval-cli.js +2626 -0
- package/scripts/generate-paper-figures.py +452 -0
- package/scripts/qualitative-analysis-ai.js +1313 -0
- package/scripts/qualitative-analysis.js +688 -0
- package/scripts/seed-db.js +87 -0
- package/scripts/show-failed-suggestions.js +64 -0
- package/scripts/validate-content.js +192 -0
- package/server.js +3 -2
- package/services/__tests__/evalConfigLoader.test.js +338 -0
- package/services/anovaStats.js +499 -0
- package/services/contentResolver.js +407 -0
- package/services/dialogueTraceAnalyzer.js +454 -0
- package/services/evalConfigLoader.js +625 -0
- package/services/evaluationRunner.js +2171 -270
- package/services/evaluationStore.js +564 -29
- package/services/learnerConfigLoader.js +75 -5
- package/services/learnerRubricEvaluator.js +284 -0
- package/services/learnerTutorInteractionEngine.js +375 -0
- package/services/processUtils.js +18 -0
- package/services/progressLogger.js +98 -0
- package/services/promptRecommendationService.js +31 -26
- package/services/promptRewriter.js +427 -0
- package/services/rubricEvaluator.js +543 -70
- package/services/streamingReporter.js +104 -0
- package/services/turnComparisonAnalyzer.js +494 -0
- package/components/MobileEvalDashboard.tsx +0 -267
- package/components/comparison/DeltaAnalysisTable.tsx +0 -137
- package/components/comparison/ProfileComparisonCard.tsx +0 -176
- package/components/comparison/RecognitionABMode.tsx +0 -385
- package/components/comparison/RecognitionMetricsPanel.tsx +0 -135
- package/components/comparison/WinnerIndicator.tsx +0 -64
- package/components/comparison/index.ts +0 -5
- package/components/mobile/BottomSheet.tsx +0 -233
- package/components/mobile/DimensionBreakdown.tsx +0 -210
- package/components/mobile/DocsView.tsx +0 -363
- package/components/mobile/LogsView.tsx +0 -481
- package/components/mobile/PsychodynamicQuadrant.tsx +0 -261
- package/components/mobile/QuickTestView.tsx +0 -1098
- package/components/mobile/RecognitionTypeChart.tsx +0 -124
- package/components/mobile/RecognitionView.tsx +0 -809
- package/components/mobile/RunDetailView.tsx +0 -261
- package/components/mobile/RunHistoryView.tsx +0 -367
- package/components/mobile/ScoreRadial.tsx +0 -211
- package/components/mobile/StreamingLogPanel.tsx +0 -230
- package/components/mobile/SynthesisStrategyChart.tsx +0 -140
- package/docs/research/ABLATION-DIALOGUE-ROUNDS.md +0 -52
- package/docs/research/ABLATION-MODEL-SELECTION.md +0 -53
- package/docs/research/ADVANCED-EVAL-ANALYSIS.md +0 -60
- package/docs/research/ANOVA-RESULTS-2026-01-14.md +0 -257
- package/docs/research/COMPREHENSIVE-EVALUATION-PLAN.md +0 -586
- package/docs/research/COST-ANALYSIS.md +0 -56
- package/docs/research/CRITICAL-REVIEW-RECOGNITION-TUTORING.md +0 -340
- package/docs/research/DYNAMIC-VS-SCRIPTED-ANALYSIS.md +0 -291
- package/docs/research/EVAL-SYSTEM-ANALYSIS.md +0 -306
- package/docs/research/FACTORIAL-RESULTS-2026-01-14.md +0 -301
- package/docs/research/IMPLEMENTATION-PLAN-CRITIQUE-RESPONSE.md +0 -1988
- package/docs/research/LONGITUDINAL-DYADIC-EVALUATION.md +0 -282
- package/docs/research/MULTI-JUDGE-VALIDATION-2026-01-14.md +0 -147
- package/docs/research/PAPER-EXTENSION-DYADIC.md +0 -204
- package/docs/research/PAPER-UNIFIED.md +0 -659
- package/docs/research/PAPER-UNIFIED.pdf +0 -0
- package/docs/research/PROMPT-IMPROVEMENTS-2026-01-14.md +0 -356
- package/docs/research/SESSION-NOTES-2026-01-11-RECOGNITION-EVAL.md +0 -419
- package/docs/research/apa.csl +0 -2133
- package/docs/research/archive/PAPER-DRAFT-RECOGNITION-TUTORING.md +0 -1637
- package/docs/research/archive/paper-multiagent-tutor.tex +0 -978
- package/docs/research/paper-draft/full-paper.md +0 -136
- package/docs/research/paper-draft/images/pasted-image-2026-01-24T03-47-47-846Z-d76a7ae2.png +0 -0
- package/docs/research/paper-draft/references.bib +0 -515
- package/docs/research/transcript-baseline.md +0 -139
- package/docs/research/transcript-recognition-multiagent.md +0 -187
- package/hooks/useEvalData.ts +0 -625
- package/server-init.js +0 -45
- package/services/benchmarkService.js +0 -1892
- package/types.ts +0 -165
- package/utils/haptics.ts +0 -45
|
@@ -2,212 +2,166 @@
|
|
|
2
2
|
# Defines how the simulated learner generates responses during evaluation
|
|
3
3
|
#
|
|
4
4
|
# ============================================================================
|
|
5
|
-
#
|
|
5
|
+
# MODEL OVERRIDES (optional)
|
|
6
6
|
# ============================================================================
|
|
7
|
+
# These override ALL profile models when uncommented. Useful for quick testing.
|
|
8
|
+
# CLI flags (--model, --ego-model, --superego-model) take precedence over these.
|
|
7
9
|
#
|
|
8
|
-
#
|
|
10
|
+
# model_override: openrouter.haiku # Override ALL models (ego + superego + synthesis)
|
|
11
|
+
# ego_model_override: openrouter.nemotron # Override only ego model
|
|
12
|
+
# superego_model_override: openrouter.kimi-k2.5 # Override only superego model
|
|
13
|
+
#
|
|
14
|
+
# ============================================================================
|
|
15
|
+
# LEARNER PROFILES
|
|
16
|
+
# ============================================================================
|
|
17
|
+
#
|
|
18
|
+
# The learner profile determines whether the simulated learner has
|
|
9
19
|
# internal deliberation before generating responses. This enables testing
|
|
10
20
|
# whether multi-agent learner simulation improves evaluation validity.
|
|
11
21
|
#
|
|
12
|
-
#
|
|
22
|
+
# Profiles:
|
|
13
23
|
# 1. unified: Single learner agent (no internal dialogue)
|
|
14
|
-
# 2.
|
|
15
|
-
# 3.
|
|
24
|
+
# 2. ego_superego: Two-agent ego/superego deliberation (mirrors tutor architecture)
|
|
25
|
+
# 3. psychodynamic: Legacy alias → resolves to ego_superego
|
|
16
26
|
#
|
|
17
27
|
# ============================================================================
|
|
18
28
|
|
|
19
|
-
# Active
|
|
29
|
+
# Active profile (can be overridden by tutor profile's learner_architecture)
|
|
20
30
|
active_architecture: unified
|
|
21
31
|
|
|
22
32
|
# ============================================================================
|
|
23
|
-
#
|
|
33
|
+
# PROFILES
|
|
24
34
|
# ============================================================================
|
|
25
35
|
|
|
26
|
-
|
|
27
|
-
#
|
|
28
|
-
# Simple, fast, good for baseline comparisons
|
|
36
|
+
profiles:
|
|
37
|
+
# Single-agent: no internal deliberation
|
|
29
38
|
unified:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
39
|
+
description: "Single unified learner agent"
|
|
40
|
+
architecture: unified
|
|
41
|
+
unified_learner:
|
|
42
|
+
provider: openrouter
|
|
43
|
+
model: kimi-k2.5
|
|
44
|
+
prompt_file: learner-unified.md
|
|
45
|
+
hyperparameters:
|
|
46
|
+
temperature: 0.7
|
|
47
|
+
max_tokens: 500
|
|
48
|
+
dialogue:
|
|
33
49
|
enabled: false
|
|
34
50
|
max_rounds: 0
|
|
35
51
|
|
|
36
|
-
|
|
52
|
+
# Two-agent: mirrors tutor ego/superego pattern
|
|
53
|
+
ego_superego:
|
|
54
|
+
description: "Ego/superego learner — mirrors tutor architecture"
|
|
55
|
+
architecture: ego_superego
|
|
56
|
+
ego:
|
|
37
57
|
provider: openrouter
|
|
38
58
|
model: nemotron
|
|
39
|
-
prompt_file: learner-
|
|
59
|
+
prompt_file: learner-ego.md
|
|
40
60
|
hyperparameters:
|
|
41
61
|
temperature: 0.7
|
|
62
|
+
max_tokens: 400
|
|
63
|
+
superego:
|
|
64
|
+
provider: openrouter
|
|
65
|
+
model: kimi-k2.5
|
|
66
|
+
prompt_file: learner-superego.md
|
|
67
|
+
hyperparameters:
|
|
68
|
+
temperature: 0.5
|
|
69
|
+
max_tokens: 400
|
|
70
|
+
synthesis:
|
|
71
|
+
provider: openrouter
|
|
72
|
+
model: kimi-k2.5
|
|
73
|
+
prompt_file: learner-synthesis.md
|
|
74
|
+
hyperparameters:
|
|
75
|
+
temperature: 0.6
|
|
42
76
|
max_tokens: 500
|
|
43
|
-
|
|
44
|
-
# Psychodynamic: Freudian-inspired desire/intellect/aspiration
|
|
45
|
-
# Internal deliberation between:
|
|
46
|
-
# - Desire (Id-like): What the learner wants emotionally
|
|
47
|
-
# - Intellect (Ego-like): Rational analysis of the situation
|
|
48
|
-
# - Aspiration (Superego-like): Idealized learning goals
|
|
49
|
-
psychodynamic:
|
|
50
|
-
name: "Psychodynamic Learner"
|
|
51
|
-
description: "Freudian-inspired internal deliberation between desire, intellect, and aspiration"
|
|
52
|
-
deliberation:
|
|
77
|
+
dialogue:
|
|
53
78
|
enabled: true
|
|
54
79
|
max_rounds: 2
|
|
55
|
-
convergence_threshold: 0.7
|
|
56
|
-
|
|
57
|
-
agents:
|
|
58
|
-
desire:
|
|
59
|
-
role: "id"
|
|
60
|
-
description: "Emotional/affective responses - what the learner WANTS"
|
|
61
|
-
provider: openrouter
|
|
62
|
-
model: nemotron
|
|
63
|
-
prompt_file: learner-desire.md
|
|
64
|
-
hyperparameters:
|
|
65
|
-
temperature: 0.8 # Higher temp for more emotional/varied responses
|
|
66
|
-
max_tokens: 400
|
|
67
80
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
model: nemotron
|
|
83
|
-
prompt_file: learner-aspiration.md
|
|
84
|
-
hyperparameters:
|
|
85
|
-
temperature: 0.6
|
|
86
|
-
max_tokens: 400
|
|
87
|
-
|
|
88
|
-
synthesizer:
|
|
89
|
-
description: "Integrates the three voices into a coherent response"
|
|
90
|
-
provider: openrouter
|
|
91
|
-
model: nemotron
|
|
92
|
-
prompt_file: learner-synthesizer.md
|
|
93
|
-
hyperparameters:
|
|
94
|
-
temperature: 0.6
|
|
95
|
-
max_tokens: 500
|
|
96
|
-
|
|
97
|
-
deliberation_process: |
|
|
98
|
-
The psychodynamic learner simulates internal conflict:
|
|
99
|
-
|
|
100
|
-
1. DESIRE (Id): "I want to skip ahead / I'm bored / This is frustrating"
|
|
101
|
-
- Immediate emotional reactions
|
|
102
|
-
- Avoidance tendencies
|
|
103
|
-
- Curiosity and excitement
|
|
104
|
-
|
|
105
|
-
2. INTELLECT (Ego): "This doesn't make sense yet / I need more examples"
|
|
106
|
-
- Rational assessment of understanding
|
|
107
|
-
- Strategic thinking about learning path
|
|
108
|
-
- Reality testing
|
|
109
|
-
|
|
110
|
-
3. ASPIRATION (Superego): "I should master this / I want to be an expert"
|
|
111
|
-
- Long-term learning goals
|
|
112
|
-
- Internalized expectations
|
|
113
|
-
- Self-improvement drives
|
|
114
|
-
|
|
115
|
-
4. SYNTHESIS: Integration into coherent learner response
|
|
116
|
-
- Balances immediate desires with long-term goals
|
|
117
|
-
- Produces realistic learner behavior
|
|
81
|
+
# Recognition-enhanced: single unified learner with recognition prompts
|
|
82
|
+
unified_recognition:
|
|
83
|
+
description: "Single unified learner with recognition-aware prompt"
|
|
84
|
+
architecture: unified
|
|
85
|
+
unified_learner:
|
|
86
|
+
provider: openrouter
|
|
87
|
+
model: kimi-k2.5
|
|
88
|
+
prompt_file: learner-unified.md
|
|
89
|
+
hyperparameters:
|
|
90
|
+
temperature: 0.7
|
|
91
|
+
max_tokens: 600
|
|
92
|
+
dialogue:
|
|
93
|
+
enabled: false
|
|
94
|
+
max_rounds: 0
|
|
118
95
|
|
|
119
|
-
#
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
96
|
+
# Recognition-enhanced: ego/superego with recognition-specific prompts
|
|
97
|
+
ego_superego_recognition:
|
|
98
|
+
description: "Ego/superego learner with recognition-aware prompts and memory"
|
|
99
|
+
architecture: ego_superego
|
|
100
|
+
ego:
|
|
101
|
+
provider: openrouter
|
|
102
|
+
model: nemotron
|
|
103
|
+
prompt_file: learner-ego-recognition.md
|
|
104
|
+
hyperparameters:
|
|
105
|
+
temperature: 0.7
|
|
106
|
+
max_tokens: 600
|
|
107
|
+
superego:
|
|
108
|
+
provider: openrouter
|
|
109
|
+
model: kimi-k2.5
|
|
110
|
+
prompt_file: learner-superego-recognition.md
|
|
111
|
+
hyperparameters:
|
|
112
|
+
temperature: 0.5
|
|
113
|
+
max_tokens: 600
|
|
114
|
+
synthesis:
|
|
115
|
+
provider: openrouter
|
|
116
|
+
model: kimi-k2.5
|
|
117
|
+
prompt_file: learner-synthesis-recognition.md
|
|
118
|
+
hyperparameters:
|
|
119
|
+
temperature: 0.6
|
|
120
|
+
max_tokens: 700
|
|
121
|
+
dialogue:
|
|
128
122
|
enabled: true
|
|
129
123
|
max_rounds: 2
|
|
130
|
-
convergence_threshold: 0.7
|
|
131
|
-
|
|
132
|
-
agents:
|
|
133
|
-
thesis:
|
|
134
|
-
role: "thesis"
|
|
135
|
-
description: "Initial understanding or position"
|
|
136
|
-
provider: openrouter
|
|
137
|
-
model: nemotron
|
|
138
|
-
prompt_file: learner-thesis.md
|
|
139
|
-
hyperparameters:
|
|
140
|
-
temperature: 0.6
|
|
141
|
-
max_tokens: 400
|
|
142
|
-
|
|
143
|
-
antithesis:
|
|
144
|
-
role: "antithesis"
|
|
145
|
-
description: "Challenge or complication to the thesis"
|
|
146
|
-
provider: openrouter
|
|
147
|
-
model: nemotron
|
|
148
|
-
prompt_file: learner-antithesis.md
|
|
149
|
-
hyperparameters:
|
|
150
|
-
temperature: 0.7
|
|
151
|
-
max_tokens: 400
|
|
152
|
-
|
|
153
|
-
synthesis:
|
|
154
|
-
role: "synthesis"
|
|
155
|
-
description: "Integration that preserves and overcomes the tension"
|
|
156
|
-
provider: openrouter
|
|
157
|
-
model: nemotron
|
|
158
|
-
prompt_file: learner-synthesis.md
|
|
159
|
-
hyperparameters:
|
|
160
|
-
temperature: 0.6
|
|
161
|
-
max_tokens: 500
|
|
162
124
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
125
|
+
# Legacy alias so existing 'psychodynamic' references resolve to ego_superego
|
|
126
|
+
psychodynamic:
|
|
127
|
+
description: "Legacy alias — uses ego/superego architecture"
|
|
128
|
+
architecture: ego_superego
|
|
129
|
+
ego:
|
|
130
|
+
provider: openrouter
|
|
131
|
+
model: nemotron
|
|
132
|
+
prompt_file: learner-ego.md
|
|
133
|
+
hyperparameters:
|
|
134
|
+
temperature: 0.7
|
|
135
|
+
max_tokens: 400
|
|
136
|
+
superego:
|
|
137
|
+
provider: openrouter
|
|
138
|
+
model: kimi-k2.5
|
|
139
|
+
prompt_file: learner-superego.md
|
|
140
|
+
hyperparameters:
|
|
141
|
+
temperature: 0.5
|
|
142
|
+
max_tokens: 400
|
|
143
|
+
synthesis:
|
|
144
|
+
provider: openrouter
|
|
145
|
+
model: kimi-k2.5
|
|
146
|
+
prompt_file: learner-synthesis.md
|
|
147
|
+
hyperparameters:
|
|
148
|
+
temperature: 0.6
|
|
149
|
+
max_tokens: 500
|
|
150
|
+
dialogue:
|
|
151
|
+
enabled: true
|
|
152
|
+
max_rounds: 2
|
|
180
153
|
|
|
181
154
|
# ============================================================================
|
|
182
155
|
# PERSONA MODIFIERS
|
|
183
156
|
# ============================================================================
|
|
184
|
-
# These modifiers adjust
|
|
157
|
+
# These modifiers adjust learner behavior based on persona
|
|
185
158
|
|
|
186
159
|
persona_modifiers:
|
|
187
|
-
confused_novice:
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
eager_explorer:
|
|
193
|
-
desire_weight: 0.5 # Curiosity-driven
|
|
194
|
-
intellect_weight: 0.3 # Quick but sometimes shallow analysis
|
|
195
|
-
aspiration_weight: 0.2 # Less concerned with "should"
|
|
196
|
-
|
|
197
|
-
focused_achiever:
|
|
198
|
-
desire_weight: 0.2 # Controlled emotional reactions
|
|
199
|
-
intellect_weight: 0.4 # Strong analytical focus
|
|
200
|
-
aspiration_weight: 0.4 # Clear goals
|
|
201
|
-
|
|
202
|
-
struggling_anxious:
|
|
203
|
-
desire_weight: 0.5 # Strong anxiety-driven responses
|
|
204
|
-
intellect_weight: 0.2 # Anxiety impairs analysis
|
|
205
|
-
aspiration_weight: 0.3 # High expectations create pressure
|
|
206
|
-
|
|
207
|
-
adversarial_tester:
|
|
208
|
-
desire_weight: 0.3 # Enjoys challenging
|
|
209
|
-
intellect_weight: 0.4 # Analytical about finding weaknesses
|
|
210
|
-
aspiration_weight: 0.3 # Wants to be thorough
|
|
160
|
+
confused_novice: {}
|
|
161
|
+
eager_explorer: {}
|
|
162
|
+
focused_achiever: {}
|
|
163
|
+
struggling_anxious: {}
|
|
164
|
+
adversarial_tester: {}
|
|
211
165
|
|
|
212
166
|
# ============================================================================
|
|
213
167
|
# EVALUATION SETTINGS
|
|
@@ -222,27 +176,4 @@ evaluation:
|
|
|
222
176
|
metrics:
|
|
223
177
|
- deliberation_rounds
|
|
224
178
|
- internal_coherence
|
|
225
|
-
- desire_intellect_tension
|
|
226
|
-
- aspiration_alignment
|
|
227
179
|
- response_authenticity
|
|
228
|
-
|
|
229
|
-
# ============================================================================
|
|
230
|
-
# ABLATION STUDY SUPPORT
|
|
231
|
-
# ============================================================================
|
|
232
|
-
|
|
233
|
-
ablation:
|
|
234
|
-
# Mapping of ablation profiles to learner architectures
|
|
235
|
-
profile_architectures:
|
|
236
|
-
ablation_baseline_unified: unified
|
|
237
|
-
ablation_baseline_multilearner: psychodynamic
|
|
238
|
-
ablation_multiagent_unified: unified
|
|
239
|
-
ablation_multiagent_multilearner: psychodynamic
|
|
240
|
-
ablation_recognition_unified: unified
|
|
241
|
-
ablation_recognition_multilearner: psychodynamic
|
|
242
|
-
ablation_recognition_multiagent_unified: unified
|
|
243
|
-
ablation_recognition_multiagent_multilearner: psychodynamic
|
|
244
|
-
|
|
245
|
-
# Which architectures to compare in ablation studies
|
|
246
|
-
architectures_to_compare:
|
|
247
|
-
- unified
|
|
248
|
-
- psychodynamic
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
# Shared AI Provider Configuration
|
|
2
|
+
# Used by both tutor-agents.yaml and evaluation-rubric.yaml
|
|
3
|
+
#
|
|
4
|
+
# Model IDs are current as of January 2025. Update when new models release.
|
|
5
|
+
|
|
6
|
+
providers:
|
|
7
|
+
anthropic:
|
|
8
|
+
api_key_env: ANTHROPIC_API_KEY
|
|
9
|
+
base_url: https://api.anthropic.com/v1/messages
|
|
10
|
+
default_model: claude-sonnet-4-5
|
|
11
|
+
models:
|
|
12
|
+
haiku: claude-haiku-4-5
|
|
13
|
+
sonnet: claude-sonnet-4-5
|
|
14
|
+
opus: claude-opus-4-5
|
|
15
|
+
|
|
16
|
+
openai:
|
|
17
|
+
api_key_env: OPENAI_API_KEY
|
|
18
|
+
base_url: https://api.openai.com/v1/chat/completions
|
|
19
|
+
default_model: gpt-5-mini
|
|
20
|
+
models:
|
|
21
|
+
mini: gpt-5-mini
|
|
22
|
+
standard: gpt-5.2
|
|
23
|
+
|
|
24
|
+
openrouter:
|
|
25
|
+
api_key_env: OPENROUTER_API_KEY
|
|
26
|
+
base_url: https://openrouter.ai/api/v1/chat/completions
|
|
27
|
+
default_model: nvidia/nemotron-3-nano-30b-a3b:free
|
|
28
|
+
models:
|
|
29
|
+
# Budget-friendly options (free tier)
|
|
30
|
+
nemotron: nvidia/nemotron-3-nano-30b-a3b:free
|
|
31
|
+
glm47: z-ai/glm-4.7
|
|
32
|
+
kimi-k2: moonshotai/kimi-k2-thinking
|
|
33
|
+
"kimi-k2.5": moonshotai/kimi-k2.5
|
|
34
|
+
deepseek: deepseek/deepseek-v3.2
|
|
35
|
+
minimax: minimax/minimax-m2.1ate
|
|
36
|
+
haiku: anthropic/claude-haiku-4.5
|
|
37
|
+
gpt-oss: openai/gpt-oss-120b
|
|
38
|
+
# Mid-tier options
|
|
39
|
+
sonnet: anthropic/claude-sonnet-4.5
|
|
40
|
+
gpt-mini: openai/gpt-5-mini
|
|
41
|
+
gemini-flash: google/gemini-3-flash-preview
|
|
42
|
+
# Premium options
|
|
43
|
+
opus: anthropic/claude-opus-4.5
|
|
44
|
+
gpt: openai/gpt-5.2
|
|
45
|
+
gemini-pro: google/gemini-3-pro-preview
|
|
46
|
+
|
|
47
|
+
gemini:
|
|
48
|
+
api_key_env: GEMINI_API_KEY
|
|
49
|
+
base_url: https://generativelanguage.googleapis.com/v1beta/models
|
|
50
|
+
default_model: gemini-3-flash-preview
|
|
51
|
+
models:
|
|
52
|
+
flash: gemini-3-flash-preview
|
|
53
|
+
pro: gemini-3-pro-preview
|
|
54
|
+
|
|
55
|
+
local:
|
|
56
|
+
base_url: http://localhost:1234/v1/chat/completions
|
|
57
|
+
format: openai
|
|
58
|
+
default_model: local-model
|
|
59
|
+
models:
|
|
60
|
+
default: local-model
|