@tyvm/knowhow 0.0.90 → 0.0.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.depcheckrc +31 -0
- package/bin/knowhow.js +1 -1
- package/package.json +4 -32
- package/src/agents/tools/executeScript/index.ts +5 -0
- package/src/agents/tools/googleSearch.ts +2 -2
- package/src/agents/tools/index.ts +0 -3
- package/src/agents/tools/list.ts +0 -147
- package/src/agents/tools/loadWebpage.ts +3 -113
- package/src/auth/browserLogin.ts +10 -13
- package/src/cli.ts +63 -3
- package/src/clients/gemini.ts +96 -25
- package/src/clients/http.ts +7 -11
- package/src/clients/pricing/google.ts +122 -26
- package/src/conversion.ts +24 -54
- package/src/index.ts +8 -1
- package/src/login.ts +5 -6
- package/src/plugins/language.ts +0 -4
- package/src/plugins/plugins.ts +0 -14
- package/src/plugins/url.ts +31 -12
- package/src/services/GitHub.ts +2 -2
- package/src/services/KnowhowClient.ts +34 -34
- package/src/{plugins/downloader/downloader.ts → services/MediaProcessorService.ts} +109 -267
- package/src/services/S3.ts +16 -16
- package/src/services/index.ts +4 -4
- package/src/services/modules/index.ts +10 -2
- package/src/services/modules/types.ts +5 -2
- package/src/services/script-execution/ScriptExecutor.ts +29 -10
- package/src/services/script-execution/ScriptPolicy.ts +6 -2
- package/src/types.ts +1 -0
- package/src/utils/http.ts +127 -0
- package/src/workers/auth/PasskeySetup.ts +7 -11
- package/tests/clients/AIClient.test.ts +24 -21
- package/tests/manual/file-edits/figma.test.ts +3 -70
- package/tests/plugins/language/languagePlugin-content-triggers.test.ts +2 -0
- package/tests/plugins/language/languagePlugin.test.ts +2 -0
- package/tests/processors/ToolResponseCache.test.ts +2 -2
- package/tests/test.spec.ts +0 -14
- package/tests/unit/modules/moduleLoading.test.ts +7 -4
- package/tests/unit/plugins/pluginLoading.test.ts +6 -6
- package/ts_build/package.json +4 -32
- package/ts_build/src/agents/tools/ast/astAppendNode.d.ts +1 -1
- package/ts_build/src/agents/tools/ast/astAppendNode.js +2 -90
- package/ts_build/src/agents/tools/ast/astAppendNode.js.map +1 -1
- package/ts_build/src/agents/tools/ast/astDeleteNode.d.ts +1 -1
- package/ts_build/src/agents/tools/ast/astDeleteNode.js +2 -88
- package/ts_build/src/agents/tools/ast/astDeleteNode.js.map +1 -1
- package/ts_build/src/agents/tools/ast/astEditNode.d.ts +1 -1
- package/ts_build/src/agents/tools/ast/astEditNode.js +2 -90
- package/ts_build/src/agents/tools/ast/astEditNode.js.map +1 -1
- package/ts_build/src/agents/tools/ast/astGetPathForLine.d.ts +1 -1
- package/ts_build/src/agents/tools/ast/astGetPathForLine.js +2 -72
- package/ts_build/src/agents/tools/ast/astGetPathForLine.js.map +1 -1
- package/ts_build/src/agents/tools/ast/astListPaths.d.ts +1 -1
- package/ts_build/src/agents/tools/ast/astListPaths.js +2 -72
- package/ts_build/src/agents/tools/ast/astListPaths.js.map +1 -1
- package/ts_build/src/agents/tools/executeScript/index.d.ts +3 -2
- package/ts_build/src/agents/tools/executeScript/index.js +4 -1
- package/ts_build/src/agents/tools/executeScript/index.js.map +1 -1
- package/ts_build/src/agents/tools/googleSearch.js +2 -2
- package/ts_build/src/agents/tools/googleSearch.js.map +1 -1
- package/ts_build/src/agents/tools/index.d.ts +0 -3
- package/ts_build/src/agents/tools/index.js +0 -3
- package/ts_build/src/agents/tools/index.js.map +1 -1
- package/ts_build/src/agents/tools/list.js +0 -138
- package/ts_build/src/agents/tools/list.js.map +1 -1
- package/ts_build/src/agents/tools/loadWebpage.js +1 -89
- package/ts_build/src/agents/tools/loadWebpage.js.map +1 -1
- package/ts_build/src/agents/tools/textSearch.d.ts +1 -1
- package/ts_build/src/auth/browserLogin.js +7 -7
- package/ts_build/src/auth/browserLogin.js.map +1 -1
- package/ts_build/src/cli.d.ts +1 -1
- package/ts_build/src/cli.js +47 -1
- package/ts_build/src/cli.js.map +1 -1
- package/ts_build/src/clients/gemini.d.ts +1 -73
- package/ts_build/src/clients/gemini.js +57 -19
- package/ts_build/src/clients/gemini.js.map +1 -1
- package/ts_build/src/clients/http.js +5 -9
- package/ts_build/src/clients/http.js.map +1 -1
- package/ts_build/src/clients/pricing/google.d.ts +17 -73
- package/ts_build/src/clients/pricing/google.js +47 -10
- package/ts_build/src/clients/pricing/google.js.map +1 -1
- package/ts_build/src/conversion.d.ts +1 -4
- package/ts_build/src/conversion.js +12 -27
- package/ts_build/src/conversion.js.map +1 -1
- package/ts_build/src/index.d.ts +4 -0
- package/ts_build/src/index.js +7 -1
- package/ts_build/src/index.js.map +1 -1
- package/ts_build/src/login.js +5 -4
- package/ts_build/src/login.js.map +1 -1
- package/ts_build/src/plugins/downloader/downloader.js +3 -3
- package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
- package/ts_build/src/plugins/language.js.map +1 -1
- package/ts_build/src/plugins/plugins.js +0 -14
- package/ts_build/src/plugins/plugins.js.map +1 -1
- package/ts_build/src/plugins/tree-sitter/editor.d.ts +3 -32
- package/ts_build/src/plugins/tree-sitter/editor.js +6 -208
- package/ts_build/src/plugins/tree-sitter/editor.js.map +1 -1
- package/ts_build/src/plugins/tree-sitter/parser.d.ts +19 -54
- package/ts_build/src/plugins/tree-sitter/parser.js +19 -293
- package/ts_build/src/plugins/tree-sitter/parser.js.map +1 -1
- package/ts_build/src/plugins/tree-sitter/simple-paths.d.ts +2 -15
- package/ts_build/src/plugins/tree-sitter/simple-paths.js +2 -324
- package/ts_build/src/plugins/tree-sitter/simple-paths.js.map +1 -1
- package/ts_build/src/plugins/url.js +27 -8
- package/ts_build/src/plugins/url.js.map +1 -1
- package/ts_build/src/services/GitHub.js +2 -2
- package/ts_build/src/services/GitHub.js.map +1 -1
- package/ts_build/src/services/KnowhowClient.d.ts +29 -29
- package/ts_build/src/services/KnowhowClient.js +33 -33
- package/ts_build/src/services/KnowhowClient.js.map +1 -1
- package/ts_build/src/services/MediaProcessorService.d.ts +22 -0
- package/ts_build/src/services/MediaProcessorService.js +215 -0
- package/ts_build/src/services/MediaProcessorService.js.map +1 -0
- package/ts_build/src/services/S3.js +12 -18
- package/ts_build/src/services/S3.js.map +1 -1
- package/ts_build/src/services/index.d.ts +3 -2
- package/ts_build/src/services/index.js +3 -3
- package/ts_build/src/services/index.js.map +1 -1
- package/ts_build/src/services/modules/index.js +10 -2
- package/ts_build/src/services/modules/index.js.map +1 -1
- package/ts_build/src/services/modules/types.d.ts +5 -2
- package/ts_build/src/services/script-execution/ScriptExecutor.js +22 -7
- package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
- package/ts_build/src/services/script-execution/ScriptPolicy.d.ts +1 -1
- package/ts_build/src/services/script-execution/ScriptPolicy.js +4 -2
- package/ts_build/src/services/script-execution/ScriptPolicy.js.map +1 -1
- package/ts_build/src/types.d.ts +1 -0
- package/ts_build/src/types.js +1 -0
- package/ts_build/src/types.js.map +1 -1
- package/ts_build/src/utils/http.d.ts +27 -0
- package/ts_build/src/utils/http.js +98 -0
- package/ts_build/src/utils/http.js.map +1 -0
- package/ts_build/src/workers/auth/PasskeySetup.js +6 -7
- package/ts_build/src/workers/auth/PasskeySetup.js.map +1 -1
- package/ts_build/tests/clients/AIClient.test.js +11 -14
- package/ts_build/tests/clients/AIClient.test.js.map +1 -1
- package/ts_build/tests/manual/file-edits/figma.test.d.ts +0 -1
- package/ts_build/tests/manual/file-edits/figma.test.js +1 -46
- package/ts_build/tests/manual/file-edits/figma.test.js.map +1 -1
- package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +2 -0
- package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
- package/ts_build/tests/plugins/language/languagePlugin.test.js +2 -0
- package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
- package/ts_build/tests/processors/ToolResponseCache.test.js +2 -2
- package/ts_build/tests/processors/ToolResponseCache.test.js.map +1 -1
- package/ts_build/tests/test.spec.js +0 -14
- package/ts_build/tests/test.spec.js.map +1 -1
- package/ts_build/tests/tree-sitter/tree-sitter.test.d.ts +0 -1
- package/ts_build/tests/tree-sitter/tree-sitter.test.js +2 -183
- package/ts_build/tests/tree-sitter/tree-sitter.test.js.map +1 -1
- package/ts_build/tests/unit/modules/moduleLoading.test.js +6 -4
- package/ts_build/tests/unit/modules/moduleLoading.test.js.map +1 -1
- package/ts_build/tests/unit/plugins/pluginLoading.test.js +4 -4
- package/ts_build/tests/unit/plugins/pluginLoading.test.js.map +1 -1
- package/benchmarks/.dockerignore +0 -7
- package/benchmarks/README.md +0 -166
- package/benchmarks/docker/Dockerfile +0 -68
- package/benchmarks/example-config.yml +0 -27
- package/benchmarks/jest.config.js +0 -13
- package/benchmarks/package-lock.json +0 -4297
- package/benchmarks/package.json +0 -39
- package/benchmarks/results/27b0a06/2025-09-27/xai/xai-grok-code-fast-1.json +0 -2909
- package/benchmarks/results/4057aed/2025-08-14/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -1671
- package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +0 -2814
- package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -2014
- package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3121
- package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +0 -98
- package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +0 -3256
- package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +0 -4298
- package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3031
- package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -2990
- package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3256
- package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3007
- package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -3256
- package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +0 -3036
- package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +0 -3280
- package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -1920
- package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +0 -3281
- package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +0 -2896
- package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3011
- package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +0 -3003
- package/benchmarks/scripts/build-and-run.sh +0 -47
- package/benchmarks/scripts/clone-exercism.sh +0 -92
- package/benchmarks/scripts/validate.sh +0 -48
- package/benchmarks/src/__tests__/runner.test.ts +0 -27
- package/benchmarks/src/cli.ts +0 -90
- package/benchmarks/src/evaluators/EvaluatorRegistry.ts +0 -64
- package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +0 -183
- package/benchmarks/src/evaluators/index.ts +0 -3
- package/benchmarks/src/evaluators/types.ts +0 -22
- package/benchmarks/src/index.ts +0 -3
- package/benchmarks/src/providers.ts +0 -13
- package/benchmarks/src/runner.ts +0 -824
- package/benchmarks/src/types.ts +0 -63
- package/benchmarks/tsconfig.json +0 -19
- package/leaderboard/README.md +0 -148
- package/leaderboard/app/api/benchmark-data/route.ts +0 -131
- package/leaderboard/app/api/benchmark-detail/route.ts +0 -172
- package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +0 -501
- package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +0 -375
- package/leaderboard/app/globals.css +0 -27
- package/leaderboard/app/layout.tsx +0 -21
- package/leaderboard/app/page.tsx +0 -170
- package/leaderboard/components/LeaderboardTable.tsx +0 -168
- package/leaderboard/components/PerformanceChart.tsx +0 -109
- package/leaderboard/next-env.d.ts +0 -5
- package/leaderboard/next.config.js +0 -4
- package/leaderboard/package-lock.json +0 -6363
- package/leaderboard/package.json +0 -28
- package/leaderboard/postcss.config.js +0 -6
- package/leaderboard/tailwind.config.js +0 -17
- package/leaderboard/tsconfig.json +0 -28
- package/leaderboard/types/benchmark.ts +0 -67
- package/leaderboard/utils/dataProcessor.ts +0 -33
- package/src/agents/tools/asana/definitions.ts +0 -199
- package/src/agents/tools/asana/index.ts +0 -108
- package/src/agents/tools/ast/astAppendNode.ts +0 -90
- package/src/agents/tools/ast/astDeleteNode.ts +0 -88
- package/src/agents/tools/ast/astEditNode.ts +0 -95
- package/src/agents/tools/ast/astGetPathForLine.ts +0 -73
- package/src/agents/tools/ast/astListPaths.ts +0 -66
- package/src/agents/tools/ast/index.ts +0 -7
- package/src/agents/tools/github/definitions.ts +0 -89
- package/src/agents/tools/github/index.ts +0 -67
- package/src/chat-old.ts +0 -446
- package/src/plugins/asana.ts +0 -146
- package/src/plugins/downloader/plugin.ts +0 -103
- package/src/plugins/downloader/types.ts +0 -92
- package/src/plugins/figma.ts +0 -158
- package/src/plugins/github.ts +0 -219
- package/src/plugins/jira.ts +0 -115
- package/src/plugins/linear.ts +0 -230
- package/src/plugins/notion.ts +0 -179
- package/src/plugins/tree-sitter/editor.ts +0 -369
- package/src/plugins/tree-sitter/lang-packs/index.ts +0 -23
- package/src/plugins/tree-sitter/lang-packs/java.ts +0 -59
- package/src/plugins/tree-sitter/lang-packs/javascript.ts +0 -57
- package/src/plugins/tree-sitter/lang-packs/python.ts +0 -45
- package/src/plugins/tree-sitter/lang-packs/types.ts +0 -79
- package/src/plugins/tree-sitter/lang-packs/typescript.ts +0 -49
- package/src/plugins/tree-sitter/parser.ts +0 -470
- package/src/plugins/tree-sitter/simple-paths.ts +0 -467
- package/tests/tree-sitter/editor.test.ts +0 -113
- package/tests/tree-sitter/invalid.test.ts +0 -299
- package/tests/tree-sitter/paths/common-edits.test.ts +0 -564
- package/tests/tree-sitter/paths/debug-exact-position.test.ts +0 -44
- package/tests/tree-sitter/paths/debug-line-indexing.test.ts +0 -49
- package/tests/tree-sitter/paths/debug-paths.test.ts +0 -90
- package/tests/tree-sitter/paths/paths.test.ts +0 -170
- package/tests/tree-sitter/paths/simple-paths.test.ts +0 -367
- package/tests/tree-sitter/sample-after.ts +0 -48
- package/tests/tree-sitter/sample-before.ts +0 -25
- package/tests/tree-sitter/test-files/completely-broken.ts +0 -7
- package/tests/tree-sitter/test-files/duplicate-braces.ts +0 -39
- package/tests/tree-sitter/test-files/invalid-nesting.ts +0 -39
- package/tests/tree-sitter/test-files/malformed-signature.ts +0 -39
- package/tests/tree-sitter/test-files/mismatched-parens.ts +0 -39
- package/tests/tree-sitter/test-files/missing-semicolon.ts +0 -39
- package/tests/tree-sitter/test-files/partially-broken.ts +0 -20
- package/tests/tree-sitter/test-files/specific-errors.ts +0 -14
- package/tests/tree-sitter/test-files/unclosed-string.ts +0 -39
- package/tests/tree-sitter/tree-sitter.test.ts +0 -251
|
@@ -1,501 +0,0 @@
|
|
|
1
|
-
"use client";
|
|
2
|
-
|
|
3
|
-
import { useState, useEffect } from "react";
|
|
4
|
-
import { useParams, useRouter, useSearchParams } from "next/navigation";
|
|
5
|
-
import { BenchmarkResults, ExerciseResult } from "@/types/benchmark";
|
|
6
|
-
import {
|
|
7
|
-
formatCurrency,
|
|
8
|
-
formatTime,
|
|
9
|
-
formatPercentage,
|
|
10
|
-
} from "@/utils/dataProcessor";
|
|
11
|
-
|
|
12
|
-
interface HistoricalRun {
|
|
13
|
-
endTime: string;
|
|
14
|
-
successRate: number;
|
|
15
|
-
totalExercises: number;
|
|
16
|
-
totalCost: number;
|
|
17
|
-
averageTime: number;
|
|
18
|
-
averageTurns: number;
|
|
19
|
-
commitHash: string;
|
|
20
|
-
averageCost: number;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
interface DetailResponse {
|
|
24
|
-
latest: BenchmarkResults;
|
|
25
|
-
history: HistoricalRun[];
|
|
26
|
-
totalRuns: number;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
export default function ModelDetailPage() {
|
|
30
|
-
const params = useParams();
|
|
31
|
-
const router = useRouter();
|
|
32
|
-
const searchParams = useSearchParams();
|
|
33
|
-
const [detailData, setDetailData] = useState<DetailResponse | null>(
|
|
34
|
-
null
|
|
35
|
-
);
|
|
36
|
-
const [loading, setLoading] = useState(true);
|
|
37
|
-
const [error, setError] = useState<string | null>(null);
|
|
38
|
-
|
|
39
|
-
const model = decodeURIComponent(params.model as string);
|
|
40
|
-
const provider = decodeURIComponent(params.provider as string);
|
|
41
|
-
const language = decodeURIComponent(params.language as string);
|
|
42
|
-
const timestamp = searchParams.get('timestamp');
|
|
43
|
-
|
|
44
|
-
useEffect(() => {
|
|
45
|
-
async function fetchDetailData() {
|
|
46
|
-
try {
|
|
47
|
-
const response = await fetch(
|
|
48
|
-
`/api/benchmark-detail?model=${encodeURIComponent(
|
|
49
|
-
model
|
|
50
|
-
)}&provider=${encodeURIComponent(
|
|
51
|
-
provider
|
|
52
|
-
)}&language=${encodeURIComponent(language)}${
|
|
53
|
-
timestamp ? `×tamp=${timestamp}` : ''
|
|
54
|
-
}`
|
|
55
|
-
);
|
|
56
|
-
if (!response.ok) {
|
|
57
|
-
throw new Error("Failed to fetch benchmark details");
|
|
58
|
-
}
|
|
59
|
-
const data = await response.json();
|
|
60
|
-
setDetailData(data);
|
|
61
|
-
} catch (err) {
|
|
62
|
-
setError(err instanceof Error ? err.message : "An error occurred");
|
|
63
|
-
} finally {
|
|
64
|
-
setLoading(false);
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
fetchDetailData();
|
|
69
|
-
}, [model, provider, language, timestamp]);
|
|
70
|
-
|
|
71
|
-
const loadHistoricalRun = async (timestamp: string) => {
|
|
72
|
-
// Navigate to the same page but with timestamp parameter
|
|
73
|
-
router.push(`/details/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}?timestamp=${timestamp}`);
|
|
74
|
-
};
|
|
75
|
-
|
|
76
|
-
const backToLatestRun = () => {
|
|
77
|
-
// Navigate to the same page without timestamp parameter
|
|
78
|
-
router.push(`/details/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}`);
|
|
79
|
-
};
|
|
80
|
-
|
|
81
|
-
const getStatusBadge = (status: string) => {
|
|
82
|
-
const baseClasses = "px-2 py-1 text-xs font-medium rounded-full";
|
|
83
|
-
switch (status) {
|
|
84
|
-
case "success":
|
|
85
|
-
return `${baseClasses} bg-green-100 text-green-800`;
|
|
86
|
-
case "failure":
|
|
87
|
-
return `${baseClasses} bg-red-100 text-red-800`;
|
|
88
|
-
case "timeout":
|
|
89
|
-
return `${baseClasses} bg-yellow-100 text-yellow-800`;
|
|
90
|
-
case "cost_limit":
|
|
91
|
-
return `${baseClasses} bg-orange-100 text-orange-800`;
|
|
92
|
-
case "turn_limit":
|
|
93
|
-
return `${baseClasses} bg-purple-100 text-purple-800`;
|
|
94
|
-
default:
|
|
95
|
-
return `${baseClasses} bg-gray-100 text-gray-800`;
|
|
96
|
-
}
|
|
97
|
-
};
|
|
98
|
-
|
|
99
|
-
const getStatusIcon = (status: string) => {
|
|
100
|
-
switch (status) {
|
|
101
|
-
case "success":
|
|
102
|
-
return "✅";
|
|
103
|
-
case "failure":
|
|
104
|
-
return "❌";
|
|
105
|
-
case "timeout":
|
|
106
|
-
return "⏰";
|
|
107
|
-
case "cost_limit":
|
|
108
|
-
return "💰";
|
|
109
|
-
case "turn_limit":
|
|
110
|
-
return "🔄";
|
|
111
|
-
default:
|
|
112
|
-
return "❓";
|
|
113
|
-
}
|
|
114
|
-
};
|
|
115
|
-
|
|
116
|
-
if (loading) {
|
|
117
|
-
return (
|
|
118
|
-
<div className="min-h-screen bg-gray-50 flex items-center justify-center">
|
|
119
|
-
<div className="text-center">
|
|
120
|
-
<div className="animate-spin rounded-full h-32 w-32 border-b-2 border-blue-500 mx-auto"></div>
|
|
121
|
-
<p className="mt-4 text-gray-600">Loading benchmark details...</p>
|
|
122
|
-
</div>
|
|
123
|
-
</div>
|
|
124
|
-
);
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
if (error || !detailData) {
|
|
128
|
-
return (
|
|
129
|
-
<div className="min-h-screen bg-gray-50 flex items-center justify-center">
|
|
130
|
-
<div className="text-center">
|
|
131
|
-
<div className="text-red-500 text-6xl mb-4">⚠️</div>
|
|
132
|
-
<h3 className="text-lg font-medium text-gray-900 mb-2">
|
|
133
|
-
Error Loading Details
|
|
134
|
-
</h3>
|
|
135
|
-
<p className="text-gray-500 mb-4">
|
|
136
|
-
{error || "Benchmark data not found"}
|
|
137
|
-
</p>
|
|
138
|
-
<button
|
|
139
|
-
onClick={() => router.back()}
|
|
140
|
-
className="bg-blue-500 hover:bg-blue-600 text-white px-4 py-2 rounded-md"
|
|
141
|
-
>
|
|
142
|
-
Go Back
|
|
143
|
-
</button>
|
|
144
|
-
</div>
|
|
145
|
-
</div>
|
|
146
|
-
);
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
// Extract the latest benchmark data for display
|
|
150
|
-
const benchmarkData = detailData.latest;
|
|
151
|
-
const isHistoricalView = timestamp !== null;
|
|
152
|
-
|
|
153
|
-
return (
|
|
154
|
-
<div className="min-h-screen bg-gray-50">
|
|
155
|
-
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
|
|
156
|
-
{/* Header */}
|
|
157
|
-
<div className="mb-8">
|
|
158
|
-
<button
|
|
159
|
-
onClick={() => router.push("/")}
|
|
160
|
-
className="mb-4 text-blue-600 hover:text-blue-800 flex items-center"
|
|
161
|
-
>
|
|
162
|
-
← Back to Leaderboard
|
|
163
|
-
</button>
|
|
164
|
-
<h1 className="text-3xl font-bold text-gray-900">
|
|
165
|
-
Benchmark Details: {model}
|
|
166
|
-
</h1>
|
|
167
|
-
<p className="mt-2 text-gray-600">
|
|
168
|
-
Provider: {provider} • Language: {language}
|
|
169
|
-
</p>
|
|
170
|
-
{isHistoricalView && (
|
|
171
|
-
<div className="mt-4 p-3 bg-blue-50 border border-blue-200 rounded-md">
|
|
172
|
-
<p className="text-blue-800 text-sm">
|
|
173
|
-
Viewing historical run from {new Date(benchmarkData.endTime).toLocaleString()}
|
|
174
|
-
</p>
|
|
175
|
-
<button
|
|
176
|
-
onClick={backToLatestRun}
|
|
177
|
-
className="mt-2 text-blue-600 hover:text-blue-800 text-sm underline"
|
|
178
|
-
>
|
|
179
|
-
← Back to latest run
|
|
180
|
-
</button>
|
|
181
|
-
</div>
|
|
182
|
-
)}
|
|
183
|
-
</div>
|
|
184
|
-
|
|
185
|
-
{/* Summary Cards */}
|
|
186
|
-
<div className="grid grid-cols-1 md:grid-cols-4 gap-6 mb-8">
|
|
187
|
-
<div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
|
|
188
|
-
<div className="flex items-center">
|
|
189
|
-
<div className="flex-shrink-0">
|
|
190
|
-
<div className="w-8 h-8 bg-green-500 rounded-md flex items-center justify-center">
|
|
191
|
-
<span className="text-white font-bold">%</span>
|
|
192
|
-
</div>
|
|
193
|
-
</div>
|
|
194
|
-
<div className="ml-4">
|
|
195
|
-
<p className="text-sm font-medium text-gray-500">
|
|
196
|
-
Success Rate
|
|
197
|
-
</p>
|
|
198
|
-
<p className="text-2xl font-semibold text-gray-900">
|
|
199
|
-
{formatPercentage(benchmarkData.summary.successRate * 100)}
|
|
200
|
-
</p>
|
|
201
|
-
</div>
|
|
202
|
-
</div>
|
|
203
|
-
</div>
|
|
204
|
-
|
|
205
|
-
<div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
|
|
206
|
-
<div className="flex items-center">
|
|
207
|
-
<div className="flex-shrink-0">
|
|
208
|
-
<div className="w-8 h-8 bg-blue-500 rounded-md flex items-center justify-center">
|
|
209
|
-
<span className="text-white font-bold">E</span>
|
|
210
|
-
</div>
|
|
211
|
-
</div>
|
|
212
|
-
<div className="ml-4">
|
|
213
|
-
<p className="text-sm font-medium text-gray-500">
|
|
214
|
-
Total Exercises
|
|
215
|
-
</p>
|
|
216
|
-
<p className="text-2xl font-semibold text-gray-900">
|
|
217
|
-
{benchmarkData.summary.totalExercises}
|
|
218
|
-
</p>
|
|
219
|
-
</div>
|
|
220
|
-
</div>
|
|
221
|
-
</div>
|
|
222
|
-
|
|
223
|
-
<div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
|
|
224
|
-
<div className="flex items-center">
|
|
225
|
-
<div className="flex-shrink-0">
|
|
226
|
-
<div className="w-8 h-8 bg-yellow-500 rounded-md flex items-center justify-center">
|
|
227
|
-
<span className="text-white font-bold">$</span>
|
|
228
|
-
</div>
|
|
229
|
-
</div>
|
|
230
|
-
<div className="ml-4">
|
|
231
|
-
<p className="text-sm font-medium text-gray-500">Total Cost</p>
|
|
232
|
-
<p className="text-2xl font-semibold text-gray-900">
|
|
233
|
-
{formatCurrency(benchmarkData.summary.totalCost)}
|
|
234
|
-
</p>
|
|
235
|
-
</div>
|
|
236
|
-
</div>
|
|
237
|
-
</div>
|
|
238
|
-
|
|
239
|
-
<div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
|
|
240
|
-
<div className="flex items-center">
|
|
241
|
-
<div className="flex-shrink-0">
|
|
242
|
-
<div className="w-8 h-8 bg-purple-500 rounded-md flex items-center justify-center">
|
|
243
|
-
<span className="text-white font-bold">T</span>
|
|
244
|
-
</div>
|
|
245
|
-
</div>
|
|
246
|
-
<div className="ml-4">
|
|
247
|
-
<p className="text-sm font-medium text-gray-500">Total Time</p>
|
|
248
|
-
<p className="text-2xl font-semibold text-gray-900">
|
|
249
|
-
{formatTime(benchmarkData.summary.totalTime)}
|
|
250
|
-
</p>
|
|
251
|
-
</div>
|
|
252
|
-
</div>
|
|
253
|
-
</div>
|
|
254
|
-
</div>
|
|
255
|
-
|
|
256
|
-
{/* Exercise Results Table */}
|
|
257
|
-
<div className="bg-white rounded-lg shadow-sm border border-gray-200">
|
|
258
|
-
<div className="px-6 py-4 border-b border-gray-200">
|
|
259
|
-
<h2 className="text-xl font-semibold text-gray-900">
|
|
260
|
-
Exercise Results
|
|
261
|
-
</h2>
|
|
262
|
-
<p className="mt-1 text-sm text-gray-500">
|
|
263
|
-
Detailed breakdown of each exercise performance
|
|
264
|
-
</p>
|
|
265
|
-
</div>
|
|
266
|
-
<div className="overflow-x-auto">
|
|
267
|
-
<table className="min-w-full">
|
|
268
|
-
<thead className="bg-gray-50">
|
|
269
|
-
<tr>
|
|
270
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
271
|
-
Exercise
|
|
272
|
-
</th>
|
|
273
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
274
|
-
Status
|
|
275
|
-
</th>
|
|
276
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
277
|
-
Pass / Total
|
|
278
|
-
</th>
|
|
279
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
280
|
-
Time
|
|
281
|
-
</th>
|
|
282
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
283
|
-
Cost
|
|
284
|
-
</th>
|
|
285
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
286
|
-
Turns
|
|
287
|
-
</th>
|
|
288
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
289
|
-
Output
|
|
290
|
-
</th>
|
|
291
|
-
</tr>
|
|
292
|
-
</thead>
|
|
293
|
-
<tbody className="bg-white divide-y divide-gray-200">
|
|
294
|
-
{benchmarkData.exercises.map((exercise, index) => (
|
|
295
|
-
<tr key={exercise.exerciseName} className="hover:bg-gray-50">
|
|
296
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
|
|
297
|
-
<button
|
|
298
|
-
onClick={() => router.push(`/exercise/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}/${encodeURIComponent(exercise.exerciseName)}`)}
|
|
299
|
-
className="text-blue-600 hover:text-blue-800 hover:underline text-left"
|
|
300
|
-
title="Click to view detailed exercise results"
|
|
301
|
-
>
|
|
302
|
-
{exercise.exerciseName}
|
|
303
|
-
</button>
|
|
304
|
-
</td>
|
|
305
|
-
<td className="px-6 py-4 whitespace-nowrap">
|
|
306
|
-
<div className="flex items-center">
|
|
307
|
-
<span className="mr-2">
|
|
308
|
-
{getStatusIcon(
|
|
309
|
-
exercise.testResult?.success ? "success" : "failure"
|
|
310
|
-
)}
|
|
311
|
-
</span>
|
|
312
|
-
<span
|
|
313
|
-
className={getStatusBadge(
|
|
314
|
-
exercise.testResult?.success
|
|
315
|
-
)}
|
|
316
|
-
>
|
|
317
|
-
{exercise.testResult?.success ? "Pass" : "Fail"}
|
|
318
|
-
</span>
|
|
319
|
-
</div>
|
|
320
|
-
</td>
|
|
321
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
|
322
|
-
{exercise.testResult?.passed} /{" "}
|
|
323
|
-
{exercise.testResult?.total}{" "}
|
|
324
|
-
<div>
|
|
325
|
-
{exercise.testResult?.skipped
|
|
326
|
-
? `(${exercise.testResult?.skipped} skipped)`
|
|
327
|
-
: ""}
|
|
328
|
-
</div>
|
|
329
|
-
</td>
|
|
330
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
|
331
|
-
{formatTime(exercise.timeElapsed)}
|
|
332
|
-
</td>
|
|
333
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
|
334
|
-
{formatCurrency(exercise.cost)}
|
|
335
|
-
</td>
|
|
336
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
|
|
337
|
-
{exercise.turns}
|
|
338
|
-
</td>
|
|
339
|
-
<td className="px-6 py-4 text-sm text-gray-500 max-w-xs truncate">
|
|
340
|
-
{exercise.finalOutput?.slice(0, 100) || "-"}
|
|
341
|
-
</td>
|
|
342
|
-
</tr>
|
|
343
|
-
))}
|
|
344
|
-
</tbody>
|
|
345
|
-
</table>
|
|
346
|
-
</div>
|
|
347
|
-
</div>
|
|
348
|
-
|
|
349
|
-
{/* Historical Performance Section */}
|
|
350
|
-
{detailData.history.length > 0 && (
|
|
351
|
-
<div className="mt-8 bg-white rounded-lg shadow-sm border border-gray-200">
|
|
352
|
-
<div className="px-6 py-4 border-b border-gray-200">
|
|
353
|
-
<h2 className="text-xl font-semibold text-gray-900">
|
|
354
|
-
Historical Performance
|
|
355
|
-
</h2>
|
|
356
|
-
<p className="mt-1 text-sm text-gray-500">
|
|
357
|
-
Previous runs for this model/provider/language combination ({detailData.totalRuns} total runs)
|
|
358
|
-
</p>
|
|
359
|
-
</div>
|
|
360
|
-
<div className="overflow-x-auto">
|
|
361
|
-
<table className="min-w-full">
|
|
362
|
-
<thead className="bg-gray-50">
|
|
363
|
-
<tr>
|
|
364
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
365
|
-
Run Date
|
|
366
|
-
</th>
|
|
367
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
368
|
-
Success Rate
|
|
369
|
-
</th>
|
|
370
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
371
|
-
Exercises
|
|
372
|
-
</th>
|
|
373
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
374
|
-
Avg Cost
|
|
375
|
-
</th>
|
|
376
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
377
|
-
Avg Time
|
|
378
|
-
</th>
|
|
379
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
380
|
-
Avg Turns
|
|
381
|
-
</th>
|
|
382
|
-
<th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
|
|
383
|
-
Commit
|
|
384
|
-
</th>
|
|
385
|
-
</tr>
|
|
386
|
-
</thead>
|
|
387
|
-
<tbody className="bg-white divide-y divide-gray-200">
|
|
388
|
-
{detailData.history.map((run, index) => (
|
|
389
|
-
<tr
|
|
390
|
-
key={`${run.endTime}-${index}`}
|
|
391
|
-
className="hover:bg-gray-50 cursor-pointer"
|
|
392
|
-
onClick={() => loadHistoricalRun(run.endTime)}
|
|
393
|
-
title="Click to view detailed results for this run"
|
|
394
|
-
>
|
|
395
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
|
|
396
|
-
{new Date(run.endTime).toLocaleDateString()}
|
|
397
|
-
</td>
|
|
398
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
|
|
399
|
-
{formatPercentage(run.successRate)}
|
|
400
|
-
</td>
|
|
401
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
|
|
402
|
-
{run.totalExercises}
|
|
403
|
-
</td>
|
|
404
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
|
|
405
|
-
{formatCurrency(run.averageCost)}
|
|
406
|
-
</td>
|
|
407
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
|
|
408
|
-
{formatTime(run.averageTime)}
|
|
409
|
-
</td>
|
|
410
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
|
|
411
|
-
{run.averageTurns.toFixed(1)}
|
|
412
|
-
</td>
|
|
413
|
-
<td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500 font-mono">
|
|
414
|
-
{run.commitHash.slice(0, 8)}
|
|
415
|
-
</td>
|
|
416
|
-
</tr>
|
|
417
|
-
))}
|
|
418
|
-
</tbody>
|
|
419
|
-
</table>
|
|
420
|
-
</div>
|
|
421
|
-
</div>
|
|
422
|
-
)}
|
|
423
|
-
|
|
424
|
-
{/* Run Information */}
|
|
425
|
-
<div className="mt-8 bg-white rounded-lg shadow-sm border border-gray-200 p-6">
|
|
426
|
-
<h3 className="text-lg font-semibold text-gray-900 mb-4">
|
|
427
|
-
Run Information
|
|
428
|
-
</h3>
|
|
429
|
-
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
|
|
430
|
-
<div>
|
|
431
|
-
<h4 className="text-sm font-medium text-gray-500 mb-2">
|
|
432
|
-
Configuration
|
|
433
|
-
</h4>
|
|
434
|
-
<dl className="space-y-1">
|
|
435
|
-
<div className="flex">
|
|
436
|
-
<dt className="text-sm text-gray-500 w-24">Model:</dt>
|
|
437
|
-
<dd className="text-sm text-gray-900">
|
|
438
|
-
{benchmarkData.config.model}
|
|
439
|
-
</dd>
|
|
440
|
-
</div>
|
|
441
|
-
<div className="flex">
|
|
442
|
-
<dt className="text-sm text-gray-500 w-24">Provider:</dt>
|
|
443
|
-
<dd className="text-sm text-gray-900">
|
|
444
|
-
{benchmarkData.config.provider}
|
|
445
|
-
</dd>
|
|
446
|
-
</div>
|
|
447
|
-
<div className="flex">
|
|
448
|
-
<dt className="text-sm text-gray-500 w-24">Language:</dt>
|
|
449
|
-
<dd className="text-sm text-gray-900">
|
|
450
|
-
{benchmarkData.config.language}
|
|
451
|
-
</dd>
|
|
452
|
-
</div>
|
|
453
|
-
{benchmarkData.config.agent && (
|
|
454
|
-
<div className="flex">
|
|
455
|
-
<dt className="text-sm text-gray-500 w-24">Agent:</dt>
|
|
456
|
-
<dd className="text-sm text-gray-900">
|
|
457
|
-
{benchmarkData.config.agent}
|
|
458
|
-
</dd>
|
|
459
|
-
</div>
|
|
460
|
-
)}
|
|
461
|
-
</dl>
|
|
462
|
-
</div>
|
|
463
|
-
<div>
|
|
464
|
-
<h4 className="text-sm font-medium text-gray-500 mb-2">Limits</h4>
|
|
465
|
-
<dl className="space-y-1">
|
|
466
|
-
<div className="flex">
|
|
467
|
-
<dt className="text-sm text-gray-500 w-24">Max Turns:</dt>
|
|
468
|
-
<dd className="text-sm text-gray-900">
|
|
469
|
-
{benchmarkData.config.limits.maxTurns}
|
|
470
|
-
</dd>
|
|
471
|
-
</div>
|
|
472
|
-
<div className="flex">
|
|
473
|
-
<dt className="text-sm text-gray-500 w-24">Max Time:</dt>
|
|
474
|
-
<dd className="text-sm text-gray-900">
|
|
475
|
-
{formatTime(benchmarkData.config.limits.maxTime)}
|
|
476
|
-
</dd>
|
|
477
|
-
</div>
|
|
478
|
-
<div className="flex">
|
|
479
|
-
<dt className="text-sm text-gray-500 w-24">Max Cost:</dt>
|
|
480
|
-
<dd className="text-sm text-gray-900">
|
|
481
|
-
{formatCurrency(benchmarkData.config.limits.maxCost)}
|
|
482
|
-
</dd>
|
|
483
|
-
</div>
|
|
484
|
-
</dl>
|
|
485
|
-
</div>
|
|
486
|
-
</div>
|
|
487
|
-
<div className="mt-4 pt-4 border-t border-gray-200">
|
|
488
|
-
<div className="flex justify-between text-sm text-gray-500">
|
|
489
|
-
<span>
|
|
490
|
-
Started: {new Date(benchmarkData.startTime).toLocaleString()}
|
|
491
|
-
</span>
|
|
492
|
-
<span>
|
|
493
|
-
Completed: {new Date(benchmarkData.endTime).toLocaleString()}
|
|
494
|
-
</span>
|
|
495
|
-
</div>
|
|
496
|
-
</div>
|
|
497
|
-
</div>
|
|
498
|
-
</div>
|
|
499
|
-
</div>
|
|
500
|
-
);
|
|
501
|
-
}
|