brownian-code 2026.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +97 -0
- package/bin/brownian +25 -0
- package/env.example +21 -0
- package/package.json +87 -0
- package/src/agent/agent.test.ts +414 -0
- package/src/agent/agent.ts +385 -0
- package/src/agent/index.ts +27 -0
- package/src/agent/prompts.ts +271 -0
- package/src/agent/scratchpad.test.ts +482 -0
- package/src/agent/scratchpad.ts +526 -0
- package/src/agent/token-counter.test.ts +59 -0
- package/src/agent/token-counter.ts +33 -0
- package/src/agent/types.ts +137 -0
- package/src/cli.tsx +385 -0
- package/src/commands/builtin.test.ts +271 -0
- package/src/commands/builtin.ts +200 -0
- package/src/commands/registry.test.ts +188 -0
- package/src/commands/registry.ts +111 -0
- package/src/commands/types.ts +64 -0
- package/src/components/AgentEventView.tsx +487 -0
- package/src/components/AnswerBox.tsx +81 -0
- package/src/components/ApiKeyPrompt.tsx +75 -0
- package/src/components/CommandMenu.test.tsx +64 -0
- package/src/components/CommandMenu.tsx +38 -0
- package/src/components/CursorText.tsx +43 -0
- package/src/components/DebugPanel.tsx +48 -0
- package/src/components/ErrorBox.test.tsx +58 -0
- package/src/components/ErrorBox.tsx +26 -0
- package/src/components/HelpView.test.tsx +70 -0
- package/src/components/HelpView.tsx +61 -0
- package/src/components/HistoryItemView.tsx +108 -0
- package/src/components/Input.tsx +193 -0
- package/src/components/Intro.test.tsx +59 -0
- package/src/components/Intro.tsx +35 -0
- package/src/components/ModelSelector.tsx +288 -0
- package/src/components/StatusBar.test.tsx +78 -0
- package/src/components/StatusBar.tsx +56 -0
- package/src/components/WorkingIndicator.tsx +133 -0
- package/src/components/index.ts +23 -0
- package/src/e2e/agent-flow.test.ts +378 -0
- package/src/evals/components/EvalApp.tsx +206 -0
- package/src/evals/components/EvalCurrentQuestion.tsx +42 -0
- package/src/evals/components/EvalProgress.tsx +33 -0
- package/src/evals/components/EvalRecentResults.tsx +63 -0
- package/src/evals/components/EvalStats.tsx +49 -0
- package/src/evals/components/index.ts +5 -0
- package/src/evals/dataset/crypto_agent.csv +16 -0
- package/src/evals/run.ts +355 -0
- package/src/gateway/channels/whatsapp/auth-store.ts +15 -0
- package/src/gateway/channels/whatsapp/inbound.ts +86 -0
- package/src/gateway/channels/whatsapp/login.ts +28 -0
- package/src/gateway/channels/whatsapp/outbound.ts +27 -0
- package/src/gateway/channels/whatsapp/session.ts +69 -0
- package/src/gateway/config.ts +81 -0
- package/src/gateway/index.ts +62 -0
- package/src/hooks/useAgentRunner.ts +317 -0
- package/src/hooks/useDebugLogs.ts +22 -0
- package/src/hooks/useInputHistory.ts +106 -0
- package/src/hooks/useModelSelection.ts +249 -0
- package/src/hooks/useTextBuffer.test.ts +121 -0
- package/src/hooks/useTextBuffer.ts +97 -0
- package/src/index.tsx +74 -0
- package/src/mcp/cache.ts +205 -0
- package/src/mcp/client.test.ts +126 -0
- package/src/mcp/client.ts +145 -0
- package/src/mcp/index.ts +2 -0
- package/src/model/llm.test.ts +158 -0
- package/src/model/llm.ts +233 -0
- package/src/providers.ts +94 -0
- package/src/skills/index.ts +17 -0
- package/src/skills/loader.ts +73 -0
- package/src/skills/registry.ts +125 -0
- package/src/skills/types.ts +31 -0
- package/src/test-utils/mocks.ts +110 -0
- package/src/theme.ts +21 -0
- package/src/tools/browser/browser.ts +357 -0
- package/src/tools/browser/index.ts +1 -0
- package/src/tools/crypto/hive-tools.ts +171 -0
- package/src/tools/crypto/index.ts +1 -0
- package/src/tools/descriptions/browser.ts +105 -0
- package/src/tools/descriptions/crypto-search.ts +58 -0
- package/src/tools/descriptions/index.ts +8 -0
- package/src/tools/descriptions/web-fetch.ts +44 -0
- package/src/tools/descriptions/web-search.ts +26 -0
- package/src/tools/fetch/cache.ts +95 -0
- package/src/tools/fetch/external-content.ts +200 -0
- package/src/tools/fetch/index.ts +1 -0
- package/src/tools/fetch/web-fetch-utils.ts +122 -0
- package/src/tools/fetch/web-fetch.ts +371 -0
- package/src/tools/index.ts +12 -0
- package/src/tools/registry.ts +130 -0
- package/src/tools/search/exa.ts +43 -0
- package/src/tools/search/index.ts +2 -0
- package/src/tools/search/tavily.ts +35 -0
- package/src/tools/skill.ts +62 -0
- package/src/tools/types.ts +53 -0
- package/src/utils/ai-message.ts +26 -0
- package/src/utils/config.ts +54 -0
- package/src/utils/cost-calculator.test.ts +101 -0
- package/src/utils/cost-calculator.ts +74 -0
- package/src/utils/env.ts +101 -0
- package/src/utils/error-classifier.test.ts +146 -0
- package/src/utils/error-classifier.ts +91 -0
- package/src/utils/in-memory-chat-history.test.ts +291 -0
- package/src/utils/in-memory-chat-history.ts +224 -0
- package/src/utils/index.ts +19 -0
- package/src/utils/input-key-handlers.test.ts +155 -0
- package/src/utils/input-key-handlers.ts +64 -0
- package/src/utils/logger.ts +67 -0
- package/src/utils/long-term-chat-history.ts +138 -0
- package/src/utils/markdown-table.ts +227 -0
- package/src/utils/ollama.ts +37 -0
- package/src/utils/progress-channel.ts +84 -0
- package/src/utils/text-navigation.test.ts +222 -0
- package/src/utils/text-navigation.ts +81 -0
- package/src/utils/thinking-verbs.ts +29 -0
- package/src/utils/tokens.test.ts +163 -0
- package/src/utils/tokens.ts +67 -0
- package/src/utils/tool-description.ts +88 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import React, { useState, useEffect } from 'react';
|
|
2
|
+
import { Box, Text, useApp } from 'ink';
|
|
3
|
+
import { colors } from '../../theme.js';
|
|
4
|
+
import { EvalProgress } from './EvalProgress.js';
|
|
5
|
+
import { EvalCurrentQuestion } from './EvalCurrentQuestion.js';
|
|
6
|
+
import { EvalStats } from './EvalStats.js';
|
|
7
|
+
import { EvalRecentResults, type EvalResult } from './EvalRecentResults.js';
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
const SHOW_STATS = true;
|
|
11
|
+
|
|
12
|
+
interface EvalState {
|
|
13
|
+
status: 'loading' | 'running' | 'complete';
|
|
14
|
+
total: number;
|
|
15
|
+
completed: number;
|
|
16
|
+
correct: number;
|
|
17
|
+
currentQuestion: string | null;
|
|
18
|
+
results: EvalResult[];
|
|
19
|
+
startTime: number;
|
|
20
|
+
experimentName: string | null;
|
|
21
|
+
datasetName: string | null;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface EvalProgressEvent {
|
|
25
|
+
type: 'init' | 'question_start' | 'question_end' | 'complete';
|
|
26
|
+
total?: number;
|
|
27
|
+
datasetName?: string;
|
|
28
|
+
question?: string;
|
|
29
|
+
score?: number;
|
|
30
|
+
comment?: string;
|
|
31
|
+
experimentName?: string;
|
|
32
|
+
averageScore?: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
interface EvalAppProps {
|
|
36
|
+
runEvaluation: () => AsyncGenerator<EvalProgressEvent, void, unknown>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Main Ink component that orchestrates the eval UI
|
|
41
|
+
*/
|
|
42
|
+
export function EvalApp({ runEvaluation }: EvalAppProps) {
|
|
43
|
+
const { exit } = useApp();
|
|
44
|
+
const [state, setState] = useState<EvalState>({
|
|
45
|
+
status: 'loading',
|
|
46
|
+
total: 0,
|
|
47
|
+
completed: 0,
|
|
48
|
+
correct: 0,
|
|
49
|
+
currentQuestion: null,
|
|
50
|
+
results: [],
|
|
51
|
+
startTime: Date.now(),
|
|
52
|
+
experimentName: null,
|
|
53
|
+
datasetName: null,
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
useEffect(() => {
|
|
57
|
+
let cancelled = false;
|
|
58
|
+
|
|
59
|
+
async function run() {
|
|
60
|
+
for await (const event of runEvaluation()) {
|
|
61
|
+
if (cancelled) break;
|
|
62
|
+
|
|
63
|
+
switch (event.type) {
|
|
64
|
+
case 'init':
|
|
65
|
+
setState(prev => ({
|
|
66
|
+
...prev,
|
|
67
|
+
status: 'running',
|
|
68
|
+
total: event.total ?? 0,
|
|
69
|
+
datasetName: event.datasetName ?? null,
|
|
70
|
+
startTime: Date.now(),
|
|
71
|
+
}));
|
|
72
|
+
break;
|
|
73
|
+
|
|
74
|
+
case 'question_start':
|
|
75
|
+
setState(prev => ({
|
|
76
|
+
...prev,
|
|
77
|
+
currentQuestion: event.question ?? null,
|
|
78
|
+
}));
|
|
79
|
+
break;
|
|
80
|
+
|
|
81
|
+
case 'question_end':
|
|
82
|
+
setState(prev => ({
|
|
83
|
+
...prev,
|
|
84
|
+
completed: prev.completed + 1,
|
|
85
|
+
correct: prev.correct + (event.score === 1 ? 1 : 0),
|
|
86
|
+
currentQuestion: null,
|
|
87
|
+
results: [
|
|
88
|
+
...prev.results,
|
|
89
|
+
{
|
|
90
|
+
question: event.question ?? '',
|
|
91
|
+
score: event.score ?? 0,
|
|
92
|
+
comment: event.comment ?? '',
|
|
93
|
+
},
|
|
94
|
+
],
|
|
95
|
+
}));
|
|
96
|
+
break;
|
|
97
|
+
|
|
98
|
+
case 'complete':
|
|
99
|
+
setState(prev => ({
|
|
100
|
+
...prev,
|
|
101
|
+
status: 'complete',
|
|
102
|
+
experimentName: event.experimentName ?? null,
|
|
103
|
+
currentQuestion: null,
|
|
104
|
+
}));
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Exit after a short delay to let the user see the final summary
|
|
110
|
+
setTimeout(() => {
|
|
111
|
+
exit();
|
|
112
|
+
}, 100);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
run();
|
|
116
|
+
|
|
117
|
+
return () => {
|
|
118
|
+
cancelled = true;
|
|
119
|
+
};
|
|
120
|
+
}, [runEvaluation, exit]);
|
|
121
|
+
|
|
122
|
+
// Loading state
|
|
123
|
+
if (state.status === 'loading') {
|
|
124
|
+
return (
|
|
125
|
+
<Box flexDirection="column" marginTop={1}>
|
|
126
|
+
<Text color={colors.primary} bold>Brownian Eval</Text>
|
|
127
|
+
<Text color={colors.muted}>Loading dataset...</Text>
|
|
128
|
+
</Box>
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// Complete state - show final summary
|
|
133
|
+
if (state.status === 'complete') {
|
|
134
|
+
const avgScore = state.results.length > 0
|
|
135
|
+
? state.results.reduce((sum, r) => sum + r.score, 0) / state.results.length
|
|
136
|
+
: 0;
|
|
137
|
+
|
|
138
|
+
return (
|
|
139
|
+
<Box flexDirection="column" marginTop={1}>
|
|
140
|
+
<Text>{'═'.repeat(70)}</Text>
|
|
141
|
+
<Text bold>EVALUATION COMPLETE</Text>
|
|
142
|
+
<Text>{'═'.repeat(70)}</Text>
|
|
143
|
+
<Text>Experiment: {state.experimentName ?? 'unknown'}</Text>
|
|
144
|
+
<Text>Examples evaluated: {state.results.length}</Text>
|
|
145
|
+
<Text>Average correctness score: <Text color={colors.primary} bold>{(avgScore * 100).toFixed(1)}%</Text></Text>
|
|
146
|
+
<Text> </Text>
|
|
147
|
+
<Text>Results by question:</Text>
|
|
148
|
+
<Text>{'─'.repeat(70)}</Text>
|
|
149
|
+
{state.results.map((r, i) => {
|
|
150
|
+
const icon = r.score === 1 ? '✓' : '✗';
|
|
151
|
+
const iconColor = r.score === 1 ? colors.success : colors.error;
|
|
152
|
+
return (
|
|
153
|
+
<Box key={i} flexDirection="column">
|
|
154
|
+
<Box>
|
|
155
|
+
<Text color={iconColor}>{icon} </Text>
|
|
156
|
+
<Text color={colors.muted}>[{r.score}] </Text>
|
|
157
|
+
<Text>{r.question.slice(0, 65)}{r.question.length > 65 ? '...' : ''}</Text>
|
|
158
|
+
</Box>
|
|
159
|
+
{r.comment && r.score !== 1 && (
|
|
160
|
+
<Text color={colors.muted}> {r.comment.slice(0, 80)}{r.comment.length > 80 ? '...' : ''}</Text>
|
|
161
|
+
)}
|
|
162
|
+
</Box>
|
|
163
|
+
);
|
|
164
|
+
})}
|
|
165
|
+
<Text> </Text>
|
|
166
|
+
<Text>{'─'.repeat(70)}</Text>
|
|
167
|
+
<Text color={colors.muted}>View full results: https://smith.langchain.com</Text>
|
|
168
|
+
</Box>
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// Running state - show progress UI
|
|
173
|
+
return (
|
|
174
|
+
<Box flexDirection="column" marginTop={1}>
|
|
175
|
+
{/* Header */}
|
|
176
|
+
<Box marginBottom={1}>
|
|
177
|
+
<Text color={colors.primary} bold>Brownian Eval</Text>
|
|
178
|
+
{state.datasetName && (
|
|
179
|
+
<Text color={colors.muted}> • {state.datasetName}</Text>
|
|
180
|
+
)}
|
|
181
|
+
</Box>
|
|
182
|
+
|
|
183
|
+
{/* Progress bar */}
|
|
184
|
+
<EvalProgress completed={state.completed} total={state.total} />
|
|
185
|
+
|
|
186
|
+
{/* Current question with spinner */}
|
|
187
|
+
<Box marginTop={1}>
|
|
188
|
+
<EvalCurrentQuestion question={state.currentQuestion} />
|
|
189
|
+
</Box>
|
|
190
|
+
|
|
191
|
+
{/* Live stats */}
|
|
192
|
+
{SHOW_STATS && (
|
|
193
|
+
<Box marginTop={1}>
|
|
194
|
+
<EvalStats
|
|
195
|
+
correct={state.correct}
|
|
196
|
+
incorrect={state.completed - state.correct}
|
|
197
|
+
startTime={state.startTime}
|
|
198
|
+
/>
|
|
199
|
+
</Box>
|
|
200
|
+
)}
|
|
201
|
+
|
|
202
|
+
{/* Recent results */}
|
|
203
|
+
<EvalRecentResults results={state.results} maxDisplay={5} />
|
|
204
|
+
</Box>
|
|
205
|
+
);
|
|
206
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import React from 'react';
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
import Spinner from 'ink-spinner';
|
|
4
|
+
import { colors } from '../../theme.js';
|
|
5
|
+
|
|
6
|
+
interface EvalCurrentQuestionProps {
|
|
7
|
+
question: string | null;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Truncate string at word boundary
|
|
12
|
+
*/
|
|
13
|
+
function truncateAtWord(str: string, maxLength: number): string {
|
|
14
|
+
if (str.length <= maxLength) return str;
|
|
15
|
+
|
|
16
|
+
const lastSpace = str.lastIndexOf(' ', maxLength);
|
|
17
|
+
if (lastSpace > maxLength * 0.5) {
|
|
18
|
+
return str.slice(0, lastSpace) + '...';
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return str.slice(0, maxLength) + '...';
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Shows the question currently being evaluated with a spinner
|
|
26
|
+
*
|
|
27
|
+
* Example: ⠋ How has Netflix's Average Revenue Per Paying User Changed...
|
|
28
|
+
*/
|
|
29
|
+
export function EvalCurrentQuestion({ question }: EvalCurrentQuestionProps) {
|
|
30
|
+
if (!question) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
return (
|
|
35
|
+
<Box>
|
|
36
|
+
<Text color={colors.primary}>
|
|
37
|
+
<Spinner type="dots" />
|
|
38
|
+
</Text>
|
|
39
|
+
<Text> {truncateAtWord(question, 65)}</Text>
|
|
40
|
+
</Box>
|
|
41
|
+
);
|
|
42
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import React from 'react';
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
import { colors } from '../../theme.js';
|
|
4
|
+
|
|
5
|
+
interface EvalProgressProps {
|
|
6
|
+
completed: number;
|
|
7
|
+
total: number;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Visual progress bar showing evaluation completion percentage
|
|
12
|
+
*
|
|
13
|
+
* Example: Evaluating ████████░░░░░░░░░░░░ 40% (6/15)
|
|
14
|
+
*/
|
|
15
|
+
export function EvalProgress({ completed, total }: EvalProgressProps) {
|
|
16
|
+
const percentage = total > 0 ? Math.round((completed / total) * 100) : 0;
|
|
17
|
+
const barWidth = 20;
|
|
18
|
+
const filledWidth = Math.round((completed / total) * barWidth) || 0;
|
|
19
|
+
const emptyWidth = barWidth - filledWidth;
|
|
20
|
+
|
|
21
|
+
const filledBar = '█'.repeat(filledWidth);
|
|
22
|
+
const emptyBar = '░'.repeat(emptyWidth);
|
|
23
|
+
|
|
24
|
+
return (
|
|
25
|
+
<Box>
|
|
26
|
+
<Text color={colors.muted}>Evaluating </Text>
|
|
27
|
+
<Text color={colors.primary}>{filledBar}</Text>
|
|
28
|
+
<Text color={colors.mutedDark}>{emptyBar}</Text>
|
|
29
|
+
<Text color={colors.muted}> {percentage}% </Text>
|
|
30
|
+
<Text color={colors.muted}>({completed}/{total})</Text>
|
|
31
|
+
</Box>
|
|
32
|
+
);
|
|
33
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import React from 'react';
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
import { colors } from '../../theme.js';
|
|
4
|
+
|
|
5
|
+
export interface EvalResult {
|
|
6
|
+
question: string;
|
|
7
|
+
score: number;
|
|
8
|
+
comment: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
interface EvalRecentResultsProps {
|
|
12
|
+
results: EvalResult[];
|
|
13
|
+
maxDisplay?: number;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Truncate string at word boundary
|
|
18
|
+
*/
|
|
19
|
+
function truncateAtWord(str: string, maxLength: number): string {
|
|
20
|
+
if (str.length <= maxLength) return str;
|
|
21
|
+
|
|
22
|
+
const lastSpace = str.lastIndexOf(' ', maxLength);
|
|
23
|
+
if (lastSpace > maxLength * 0.5) {
|
|
24
|
+
return str.slice(0, lastSpace) + '...';
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return str.slice(0, maxLength) + '...';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Shows the last N evaluation results with pass/fail indicators
|
|
32
|
+
*
|
|
33
|
+
* Example:
|
|
34
|
+
* Recent:
|
|
35
|
+
* ✓ Who is the current CFO of Airbnb?
|
|
36
|
+
* ✗ Calculate the 3 year revenue CAGR for Palantir...
|
|
37
|
+
* ✓ What was FND same-store sales growth in Q4 2024?
|
|
38
|
+
*/
|
|
39
|
+
export function EvalRecentResults({ results, maxDisplay = 5 }: EvalRecentResultsProps) {
|
|
40
|
+
if (results.length === 0) {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const recentResults = results.slice(-maxDisplay);
|
|
45
|
+
|
|
46
|
+
return (
|
|
47
|
+
<Box flexDirection="column" marginTop={1}>
|
|
48
|
+
<Text color={colors.muted}>Recent:</Text>
|
|
49
|
+
{recentResults.map((result, index) => {
|
|
50
|
+
const isCorrect = result.score === 1;
|
|
51
|
+
const icon = isCorrect ? '✓' : '✗';
|
|
52
|
+
const iconColor = isCorrect ? colors.success : colors.error;
|
|
53
|
+
|
|
54
|
+
return (
|
|
55
|
+
<Box key={index}>
|
|
56
|
+
<Text color={iconColor}>{icon} </Text>
|
|
57
|
+
<Text>{truncateAtWord(result.question, 60)}</Text>
|
|
58
|
+
</Box>
|
|
59
|
+
);
|
|
60
|
+
})}
|
|
61
|
+
</Box>
|
|
62
|
+
);
|
|
63
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import React, { useState, useEffect } from 'react';
|
|
2
|
+
import { Box, Text } from 'ink';
|
|
3
|
+
import { colors } from '../../theme.js';
|
|
4
|
+
|
|
5
|
+
interface EvalStatsProps {
|
|
6
|
+
correct: number;
|
|
7
|
+
incorrect: number;
|
|
8
|
+
startTime: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Format elapsed time as Xm Ys
|
|
13
|
+
*/
|
|
14
|
+
function formatElapsed(startTime: number): string {
|
|
15
|
+
const elapsed = Math.floor((Date.now() - startTime) / 1000);
|
|
16
|
+
const minutes = Math.floor(elapsed / 60);
|
|
17
|
+
const seconds = elapsed % 60;
|
|
18
|
+
|
|
19
|
+
if (minutes > 0) {
|
|
20
|
+
return `${minutes}m ${seconds}s`;
|
|
21
|
+
}
|
|
22
|
+
return `${seconds}s`;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Live stats display showing correct/incorrect counts and elapsed time
|
|
27
|
+
*
|
|
28
|
+
* Example: ✓ 5 correct ✗ 1 incorrect ⏱ 2m 34s
|
|
29
|
+
*/
|
|
30
|
+
export function EvalStats({ correct, incorrect, startTime }: EvalStatsProps) {
|
|
31
|
+
const [, setTick] = useState(0);
|
|
32
|
+
|
|
33
|
+
// Update every second to refresh elapsed time
|
|
34
|
+
useEffect(() => {
|
|
35
|
+
const interval = setInterval(() => {
|
|
36
|
+
setTick(t => t + 1);
|
|
37
|
+
}, 1000);
|
|
38
|
+
|
|
39
|
+
return () => clearInterval(interval);
|
|
40
|
+
}, []);
|
|
41
|
+
|
|
42
|
+
return (
|
|
43
|
+
<Box gap={2}>
|
|
44
|
+
<Text color={colors.success}>✓ {correct} correct</Text>
|
|
45
|
+
<Text color={colors.error}>✗ {incorrect} incorrect</Text>
|
|
46
|
+
<Text color={colors.muted}>⏱ {formatElapsed(startTime)}</Text>
|
|
47
|
+
</Box>
|
|
48
|
+
);
|
|
49
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export { EvalApp, type EvalProgressEvent } from './EvalApp.js';
|
|
2
|
+
export { EvalProgress } from './EvalProgress.js';
|
|
3
|
+
export { EvalCurrentQuestion } from './EvalCurrentQuestion.js';
|
|
4
|
+
export { EvalStats } from './EvalStats.js';
|
|
5
|
+
export { EvalRecentResults, type EvalResult } from './EvalRecentResults.js';
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
question,answer
|
|
2
|
+
What is the current price of Bitcoin?,"Bitcoin's current price should be retrieved via the simple_price_browser endpoint with real-time data."
|
|
3
|
+
What is the price of Ethereum?,"Ethereum's current price should be retrieved from market data endpoints."
|
|
4
|
+
Compare BTC vs ETH vs SOL market performance,"A comparison table of BTC, ETH, and SOL including price, market cap, 24h change, and volume."
|
|
5
|
+
Analyze this wallet: 0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045,"Wallet analysis showing token holdings, total value, and recent activity for Vitalik's known wallet."
|
|
6
|
+
What are the top DeFi protocols by TVL?,"A list of top DeFi protocols ranked by Total Value Locked, sourced from DefiLlama data."
|
|
7
|
+
Show me trending tokens,"A list of currently trending tokens with price and volume data."
|
|
8
|
+
What is the TVL of Aave?,"Aave's current Total Value Locked across all chains."
|
|
9
|
+
What is the market cap of Solana?,"Solana's current market capitalization in USD."
|
|
10
|
+
What are the gas prices on Ethereum right now?,"Current Ethereum gas prices (slow, standard, fast) in gwei."
|
|
11
|
+
Show me the top NFT collections by volume,"Top NFT collections ranked by trading volume."
|
|
12
|
+
What is the 24h trading volume of Bitcoin?,"Bitcoin's 24-hour trading volume across exchanges."
|
|
13
|
+
Compare the top 5 cryptocurrencies by market cap,"A table comparing the top 5 cryptos by market cap including price and 24h change."
|
|
14
|
+
What is the circulating supply of Ethereum?,"Ethereum's current circulating supply."
|
|
15
|
+
Show me BNB Chain network stats,"BNB Chain network statistics including block time and transaction count."
|
|
16
|
+
What are the top lending protocols?,"Top lending/borrowing protocols by TVL including Aave, Compound, and others."
|