@tyvm/knowhow 0.0.90 → 0.0.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (262) hide show
  1. package/.depcheckrc +31 -0
  2. package/bin/knowhow.js +1 -1
  3. package/package.json +4 -32
  4. package/src/agents/tools/executeScript/index.ts +5 -0
  5. package/src/agents/tools/googleSearch.ts +2 -2
  6. package/src/agents/tools/index.ts +0 -3
  7. package/src/agents/tools/list.ts +0 -147
  8. package/src/agents/tools/loadWebpage.ts +3 -113
  9. package/src/auth/browserLogin.ts +10 -13
  10. package/src/cli.ts +63 -3
  11. package/src/clients/gemini.ts +96 -25
  12. package/src/clients/http.ts +7 -11
  13. package/src/clients/pricing/google.ts +122 -26
  14. package/src/conversion.ts +24 -54
  15. package/src/index.ts +8 -1
  16. package/src/login.ts +5 -6
  17. package/src/plugins/language.ts +0 -4
  18. package/src/plugins/plugins.ts +0 -14
  19. package/src/plugins/url.ts +31 -12
  20. package/src/services/GitHub.ts +2 -2
  21. package/src/services/KnowhowClient.ts +34 -34
  22. package/src/{plugins/downloader/downloader.ts → services/MediaProcessorService.ts} +109 -267
  23. package/src/services/S3.ts +16 -16
  24. package/src/services/index.ts +4 -4
  25. package/src/services/modules/index.ts +10 -2
  26. package/src/services/modules/types.ts +5 -2
  27. package/src/services/script-execution/ScriptExecutor.ts +29 -10
  28. package/src/services/script-execution/ScriptPolicy.ts +6 -2
  29. package/src/types.ts +1 -0
  30. package/src/utils/http.ts +127 -0
  31. package/src/workers/auth/PasskeySetup.ts +7 -11
  32. package/tests/clients/AIClient.test.ts +24 -21
  33. package/tests/manual/file-edits/figma.test.ts +3 -70
  34. package/tests/plugins/language/languagePlugin-content-triggers.test.ts +2 -0
  35. package/tests/plugins/language/languagePlugin.test.ts +2 -0
  36. package/tests/processors/ToolResponseCache.test.ts +2 -2
  37. package/tests/test.spec.ts +0 -14
  38. package/tests/unit/modules/moduleLoading.test.ts +7 -4
  39. package/tests/unit/plugins/pluginLoading.test.ts +6 -6
  40. package/ts_build/package.json +4 -32
  41. package/ts_build/src/agents/tools/ast/astAppendNode.d.ts +1 -1
  42. package/ts_build/src/agents/tools/ast/astAppendNode.js +2 -90
  43. package/ts_build/src/agents/tools/ast/astAppendNode.js.map +1 -1
  44. package/ts_build/src/agents/tools/ast/astDeleteNode.d.ts +1 -1
  45. package/ts_build/src/agents/tools/ast/astDeleteNode.js +2 -88
  46. package/ts_build/src/agents/tools/ast/astDeleteNode.js.map +1 -1
  47. package/ts_build/src/agents/tools/ast/astEditNode.d.ts +1 -1
  48. package/ts_build/src/agents/tools/ast/astEditNode.js +2 -90
  49. package/ts_build/src/agents/tools/ast/astEditNode.js.map +1 -1
  50. package/ts_build/src/agents/tools/ast/astGetPathForLine.d.ts +1 -1
  51. package/ts_build/src/agents/tools/ast/astGetPathForLine.js +2 -72
  52. package/ts_build/src/agents/tools/ast/astGetPathForLine.js.map +1 -1
  53. package/ts_build/src/agents/tools/ast/astListPaths.d.ts +1 -1
  54. package/ts_build/src/agents/tools/ast/astListPaths.js +2 -72
  55. package/ts_build/src/agents/tools/ast/astListPaths.js.map +1 -1
  56. package/ts_build/src/agents/tools/executeScript/index.d.ts +3 -2
  57. package/ts_build/src/agents/tools/executeScript/index.js +4 -1
  58. package/ts_build/src/agents/tools/executeScript/index.js.map +1 -1
  59. package/ts_build/src/agents/tools/googleSearch.js +2 -2
  60. package/ts_build/src/agents/tools/googleSearch.js.map +1 -1
  61. package/ts_build/src/agents/tools/index.d.ts +0 -3
  62. package/ts_build/src/agents/tools/index.js +0 -3
  63. package/ts_build/src/agents/tools/index.js.map +1 -1
  64. package/ts_build/src/agents/tools/list.js +0 -138
  65. package/ts_build/src/agents/tools/list.js.map +1 -1
  66. package/ts_build/src/agents/tools/loadWebpage.js +1 -89
  67. package/ts_build/src/agents/tools/loadWebpage.js.map +1 -1
  68. package/ts_build/src/agents/tools/textSearch.d.ts +1 -1
  69. package/ts_build/src/auth/browserLogin.js +7 -7
  70. package/ts_build/src/auth/browserLogin.js.map +1 -1
  71. package/ts_build/src/cli.d.ts +1 -1
  72. package/ts_build/src/cli.js +47 -1
  73. package/ts_build/src/cli.js.map +1 -1
  74. package/ts_build/src/clients/gemini.d.ts +1 -73
  75. package/ts_build/src/clients/gemini.js +57 -19
  76. package/ts_build/src/clients/gemini.js.map +1 -1
  77. package/ts_build/src/clients/http.js +5 -9
  78. package/ts_build/src/clients/http.js.map +1 -1
  79. package/ts_build/src/clients/pricing/google.d.ts +17 -73
  80. package/ts_build/src/clients/pricing/google.js +47 -10
  81. package/ts_build/src/clients/pricing/google.js.map +1 -1
  82. package/ts_build/src/conversion.d.ts +1 -4
  83. package/ts_build/src/conversion.js +12 -27
  84. package/ts_build/src/conversion.js.map +1 -1
  85. package/ts_build/src/index.d.ts +4 -0
  86. package/ts_build/src/index.js +7 -1
  87. package/ts_build/src/index.js.map +1 -1
  88. package/ts_build/src/login.js +5 -4
  89. package/ts_build/src/login.js.map +1 -1
  90. package/ts_build/src/plugins/downloader/downloader.js +3 -3
  91. package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
  92. package/ts_build/src/plugins/language.js.map +1 -1
  93. package/ts_build/src/plugins/plugins.js +0 -14
  94. package/ts_build/src/plugins/plugins.js.map +1 -1
  95. package/ts_build/src/plugins/tree-sitter/editor.d.ts +3 -32
  96. package/ts_build/src/plugins/tree-sitter/editor.js +6 -208
  97. package/ts_build/src/plugins/tree-sitter/editor.js.map +1 -1
  98. package/ts_build/src/plugins/tree-sitter/parser.d.ts +19 -54
  99. package/ts_build/src/plugins/tree-sitter/parser.js +19 -293
  100. package/ts_build/src/plugins/tree-sitter/parser.js.map +1 -1
  101. package/ts_build/src/plugins/tree-sitter/simple-paths.d.ts +2 -15
  102. package/ts_build/src/plugins/tree-sitter/simple-paths.js +2 -324
  103. package/ts_build/src/plugins/tree-sitter/simple-paths.js.map +1 -1
  104. package/ts_build/src/plugins/url.js +27 -8
  105. package/ts_build/src/plugins/url.js.map +1 -1
  106. package/ts_build/src/services/GitHub.js +2 -2
  107. package/ts_build/src/services/GitHub.js.map +1 -1
  108. package/ts_build/src/services/KnowhowClient.d.ts +29 -29
  109. package/ts_build/src/services/KnowhowClient.js +33 -33
  110. package/ts_build/src/services/KnowhowClient.js.map +1 -1
  111. package/ts_build/src/services/MediaProcessorService.d.ts +22 -0
  112. package/ts_build/src/services/MediaProcessorService.js +215 -0
  113. package/ts_build/src/services/MediaProcessorService.js.map +1 -0
  114. package/ts_build/src/services/S3.js +12 -18
  115. package/ts_build/src/services/S3.js.map +1 -1
  116. package/ts_build/src/services/index.d.ts +3 -2
  117. package/ts_build/src/services/index.js +3 -3
  118. package/ts_build/src/services/index.js.map +1 -1
  119. package/ts_build/src/services/modules/index.js +10 -2
  120. package/ts_build/src/services/modules/index.js.map +1 -1
  121. package/ts_build/src/services/modules/types.d.ts +5 -2
  122. package/ts_build/src/services/script-execution/ScriptExecutor.js +22 -7
  123. package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
  124. package/ts_build/src/services/script-execution/ScriptPolicy.d.ts +1 -1
  125. package/ts_build/src/services/script-execution/ScriptPolicy.js +4 -2
  126. package/ts_build/src/services/script-execution/ScriptPolicy.js.map +1 -1
  127. package/ts_build/src/types.d.ts +1 -0
  128. package/ts_build/src/types.js +1 -0
  129. package/ts_build/src/types.js.map +1 -1
  130. package/ts_build/src/utils/http.d.ts +27 -0
  131. package/ts_build/src/utils/http.js +98 -0
  132. package/ts_build/src/utils/http.js.map +1 -0
  133. package/ts_build/src/workers/auth/PasskeySetup.js +6 -7
  134. package/ts_build/src/workers/auth/PasskeySetup.js.map +1 -1
  135. package/ts_build/tests/clients/AIClient.test.js +11 -14
  136. package/ts_build/tests/clients/AIClient.test.js.map +1 -1
  137. package/ts_build/tests/manual/file-edits/figma.test.d.ts +0 -1
  138. package/ts_build/tests/manual/file-edits/figma.test.js +1 -46
  139. package/ts_build/tests/manual/file-edits/figma.test.js.map +1 -1
  140. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +2 -0
  141. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
  142. package/ts_build/tests/plugins/language/languagePlugin.test.js +2 -0
  143. package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
  144. package/ts_build/tests/processors/ToolResponseCache.test.js +2 -2
  145. package/ts_build/tests/processors/ToolResponseCache.test.js.map +1 -1
  146. package/ts_build/tests/test.spec.js +0 -14
  147. package/ts_build/tests/test.spec.js.map +1 -1
  148. package/ts_build/tests/tree-sitter/tree-sitter.test.d.ts +0 -1
  149. package/ts_build/tests/tree-sitter/tree-sitter.test.js +2 -183
  150. package/ts_build/tests/tree-sitter/tree-sitter.test.js.map +1 -1
  151. package/ts_build/tests/unit/modules/moduleLoading.test.js +6 -4
  152. package/ts_build/tests/unit/modules/moduleLoading.test.js.map +1 -1
  153. package/ts_build/tests/unit/plugins/pluginLoading.test.js +4 -4
  154. package/ts_build/tests/unit/plugins/pluginLoading.test.js.map +1 -1
  155. package/benchmarks/.dockerignore +0 -7
  156. package/benchmarks/README.md +0 -166
  157. package/benchmarks/docker/Dockerfile +0 -68
  158. package/benchmarks/example-config.yml +0 -27
  159. package/benchmarks/jest.config.js +0 -13
  160. package/benchmarks/package-lock.json +0 -4297
  161. package/benchmarks/package.json +0 -39
  162. package/benchmarks/results/27b0a06/2025-09-27/xai/xai-grok-code-fast-1.json +0 -2909
  163. package/benchmarks/results/4057aed/2025-08-14/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -1671
  164. package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +0 -2814
  165. package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -2014
  166. package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3121
  167. package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +0 -98
  168. package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +0 -3256
  169. package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +0 -4298
  170. package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3031
  171. package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -2990
  172. package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3256
  173. package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3007
  174. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -3256
  175. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +0 -3036
  176. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +0 -3280
  177. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -1920
  178. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +0 -3281
  179. package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +0 -2896
  180. package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3011
  181. package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +0 -3003
  182. package/benchmarks/scripts/build-and-run.sh +0 -47
  183. package/benchmarks/scripts/clone-exercism.sh +0 -92
  184. package/benchmarks/scripts/validate.sh +0 -48
  185. package/benchmarks/src/__tests__/runner.test.ts +0 -27
  186. package/benchmarks/src/cli.ts +0 -90
  187. package/benchmarks/src/evaluators/EvaluatorRegistry.ts +0 -64
  188. package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +0 -183
  189. package/benchmarks/src/evaluators/index.ts +0 -3
  190. package/benchmarks/src/evaluators/types.ts +0 -22
  191. package/benchmarks/src/index.ts +0 -3
  192. package/benchmarks/src/providers.ts +0 -13
  193. package/benchmarks/src/runner.ts +0 -824
  194. package/benchmarks/src/types.ts +0 -63
  195. package/benchmarks/tsconfig.json +0 -19
  196. package/leaderboard/README.md +0 -148
  197. package/leaderboard/app/api/benchmark-data/route.ts +0 -131
  198. package/leaderboard/app/api/benchmark-detail/route.ts +0 -172
  199. package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +0 -501
  200. package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +0 -375
  201. package/leaderboard/app/globals.css +0 -27
  202. package/leaderboard/app/layout.tsx +0 -21
  203. package/leaderboard/app/page.tsx +0 -170
  204. package/leaderboard/components/LeaderboardTable.tsx +0 -168
  205. package/leaderboard/components/PerformanceChart.tsx +0 -109
  206. package/leaderboard/next-env.d.ts +0 -5
  207. package/leaderboard/next.config.js +0 -4
  208. package/leaderboard/package-lock.json +0 -6363
  209. package/leaderboard/package.json +0 -28
  210. package/leaderboard/postcss.config.js +0 -6
  211. package/leaderboard/tailwind.config.js +0 -17
  212. package/leaderboard/tsconfig.json +0 -28
  213. package/leaderboard/types/benchmark.ts +0 -67
  214. package/leaderboard/utils/dataProcessor.ts +0 -33
  215. package/src/agents/tools/asana/definitions.ts +0 -199
  216. package/src/agents/tools/asana/index.ts +0 -108
  217. package/src/agents/tools/ast/astAppendNode.ts +0 -90
  218. package/src/agents/tools/ast/astDeleteNode.ts +0 -88
  219. package/src/agents/tools/ast/astEditNode.ts +0 -95
  220. package/src/agents/tools/ast/astGetPathForLine.ts +0 -73
  221. package/src/agents/tools/ast/astListPaths.ts +0 -66
  222. package/src/agents/tools/ast/index.ts +0 -7
  223. package/src/agents/tools/github/definitions.ts +0 -89
  224. package/src/agents/tools/github/index.ts +0 -67
  225. package/src/chat-old.ts +0 -446
  226. package/src/plugins/asana.ts +0 -146
  227. package/src/plugins/downloader/plugin.ts +0 -103
  228. package/src/plugins/downloader/types.ts +0 -92
  229. package/src/plugins/figma.ts +0 -158
  230. package/src/plugins/github.ts +0 -219
  231. package/src/plugins/jira.ts +0 -115
  232. package/src/plugins/linear.ts +0 -230
  233. package/src/plugins/notion.ts +0 -179
  234. package/src/plugins/tree-sitter/editor.ts +0 -369
  235. package/src/plugins/tree-sitter/lang-packs/index.ts +0 -23
  236. package/src/plugins/tree-sitter/lang-packs/java.ts +0 -59
  237. package/src/plugins/tree-sitter/lang-packs/javascript.ts +0 -57
  238. package/src/plugins/tree-sitter/lang-packs/python.ts +0 -45
  239. package/src/plugins/tree-sitter/lang-packs/types.ts +0 -79
  240. package/src/plugins/tree-sitter/lang-packs/typescript.ts +0 -49
  241. package/src/plugins/tree-sitter/parser.ts +0 -470
  242. package/src/plugins/tree-sitter/simple-paths.ts +0 -467
  243. package/tests/tree-sitter/editor.test.ts +0 -113
  244. package/tests/tree-sitter/invalid.test.ts +0 -299
  245. package/tests/tree-sitter/paths/common-edits.test.ts +0 -564
  246. package/tests/tree-sitter/paths/debug-exact-position.test.ts +0 -44
  247. package/tests/tree-sitter/paths/debug-line-indexing.test.ts +0 -49
  248. package/tests/tree-sitter/paths/debug-paths.test.ts +0 -90
  249. package/tests/tree-sitter/paths/paths.test.ts +0 -170
  250. package/tests/tree-sitter/paths/simple-paths.test.ts +0 -367
  251. package/tests/tree-sitter/sample-after.ts +0 -48
  252. package/tests/tree-sitter/sample-before.ts +0 -25
  253. package/tests/tree-sitter/test-files/completely-broken.ts +0 -7
  254. package/tests/tree-sitter/test-files/duplicate-braces.ts +0 -39
  255. package/tests/tree-sitter/test-files/invalid-nesting.ts +0 -39
  256. package/tests/tree-sitter/test-files/malformed-signature.ts +0 -39
  257. package/tests/tree-sitter/test-files/mismatched-parens.ts +0 -39
  258. package/tests/tree-sitter/test-files/missing-semicolon.ts +0 -39
  259. package/tests/tree-sitter/test-files/partially-broken.ts +0 -20
  260. package/tests/tree-sitter/test-files/specific-errors.ts +0 -14
  261. package/tests/tree-sitter/test-files/unclosed-string.ts +0 -39
  262. package/tests/tree-sitter/tree-sitter.test.ts +0 -251
@@ -1,501 +0,0 @@
1
- "use client";
2
-
3
- import { useState, useEffect } from "react";
4
- import { useParams, useRouter, useSearchParams } from "next/navigation";
5
- import { BenchmarkResults, ExerciseResult } from "@/types/benchmark";
6
- import {
7
- formatCurrency,
8
- formatTime,
9
- formatPercentage,
10
- } from "@/utils/dataProcessor";
11
-
12
- interface HistoricalRun {
13
- endTime: string;
14
- successRate: number;
15
- totalExercises: number;
16
- totalCost: number;
17
- averageTime: number;
18
- averageTurns: number;
19
- commitHash: string;
20
- averageCost: number;
21
- }
22
-
23
- interface DetailResponse {
24
- latest: BenchmarkResults;
25
- history: HistoricalRun[];
26
- totalRuns: number;
27
- }
28
-
29
- export default function ModelDetailPage() {
30
- const params = useParams();
31
- const router = useRouter();
32
- const searchParams = useSearchParams();
33
- const [detailData, setDetailData] = useState<DetailResponse | null>(
34
- null
35
- );
36
- const [loading, setLoading] = useState(true);
37
- const [error, setError] = useState<string | null>(null);
38
-
39
- const model = decodeURIComponent(params.model as string);
40
- const provider = decodeURIComponent(params.provider as string);
41
- const language = decodeURIComponent(params.language as string);
42
- const timestamp = searchParams.get('timestamp');
43
-
44
- useEffect(() => {
45
- async function fetchDetailData() {
46
- try {
47
- const response = await fetch(
48
- `/api/benchmark-detail?model=${encodeURIComponent(
49
- model
50
- )}&provider=${encodeURIComponent(
51
- provider
52
- )}&language=${encodeURIComponent(language)}${
53
- timestamp ? `&timestamp=${timestamp}` : ''
54
- }`
55
- );
56
- if (!response.ok) {
57
- throw new Error("Failed to fetch benchmark details");
58
- }
59
- const data = await response.json();
60
- setDetailData(data);
61
- } catch (err) {
62
- setError(err instanceof Error ? err.message : "An error occurred");
63
- } finally {
64
- setLoading(false);
65
- }
66
- }
67
-
68
- fetchDetailData();
69
- }, [model, provider, language, timestamp]);
70
-
71
- const loadHistoricalRun = async (timestamp: string) => {
72
- // Navigate to the same page but with timestamp parameter
73
- router.push(`/details/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}?timestamp=${timestamp}`);
74
- };
75
-
76
- const backToLatestRun = () => {
77
- // Navigate to the same page without timestamp parameter
78
- router.push(`/details/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}`);
79
- };
80
-
81
- const getStatusBadge = (status: string) => {
82
- const baseClasses = "px-2 py-1 text-xs font-medium rounded-full";
83
- switch (status) {
84
- case "success":
85
- return `${baseClasses} bg-green-100 text-green-800`;
86
- case "failure":
87
- return `${baseClasses} bg-red-100 text-red-800`;
88
- case "timeout":
89
- return `${baseClasses} bg-yellow-100 text-yellow-800`;
90
- case "cost_limit":
91
- return `${baseClasses} bg-orange-100 text-orange-800`;
92
- case "turn_limit":
93
- return `${baseClasses} bg-purple-100 text-purple-800`;
94
- default:
95
- return `${baseClasses} bg-gray-100 text-gray-800`;
96
- }
97
- };
98
-
99
- const getStatusIcon = (status: string) => {
100
- switch (status) {
101
- case "success":
102
- return "✅";
103
- case "failure":
104
- return "❌";
105
- case "timeout":
106
- return "⏰";
107
- case "cost_limit":
108
- return "💰";
109
- case "turn_limit":
110
- return "🔄";
111
- default:
112
- return "❓";
113
- }
114
- };
115
-
116
- if (loading) {
117
- return (
118
- <div className="min-h-screen bg-gray-50 flex items-center justify-center">
119
- <div className="text-center">
120
- <div className="animate-spin rounded-full h-32 w-32 border-b-2 border-blue-500 mx-auto"></div>
121
- <p className="mt-4 text-gray-600">Loading benchmark details...</p>
122
- </div>
123
- </div>
124
- );
125
- }
126
-
127
- if (error || !detailData) {
128
- return (
129
- <div className="min-h-screen bg-gray-50 flex items-center justify-center">
130
- <div className="text-center">
131
- <div className="text-red-500 text-6xl mb-4">⚠️</div>
132
- <h3 className="text-lg font-medium text-gray-900 mb-2">
133
- Error Loading Details
134
- </h3>
135
- <p className="text-gray-500 mb-4">
136
- {error || "Benchmark data not found"}
137
- </p>
138
- <button
139
- onClick={() => router.back()}
140
- className="bg-blue-500 hover:bg-blue-600 text-white px-4 py-2 rounded-md"
141
- >
142
- Go Back
143
- </button>
144
- </div>
145
- </div>
146
- );
147
- }
148
-
149
- // Extract the latest benchmark data for display
150
- const benchmarkData = detailData.latest;
151
- const isHistoricalView = timestamp !== null;
152
-
153
- return (
154
- <div className="min-h-screen bg-gray-50">
155
- <div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
156
- {/* Header */}
157
- <div className="mb-8">
158
- <button
159
- onClick={() => router.push("/")}
160
- className="mb-4 text-blue-600 hover:text-blue-800 flex items-center"
161
- >
162
- ← Back to Leaderboard
163
- </button>
164
- <h1 className="text-3xl font-bold text-gray-900">
165
- Benchmark Details: {model}
166
- </h1>
167
- <p className="mt-2 text-gray-600">
168
- Provider: {provider} • Language: {language}
169
- </p>
170
- {isHistoricalView && (
171
- <div className="mt-4 p-3 bg-blue-50 border border-blue-200 rounded-md">
172
- <p className="text-blue-800 text-sm">
173
- Viewing historical run from {new Date(benchmarkData.endTime).toLocaleString()}
174
- </p>
175
- <button
176
- onClick={backToLatestRun}
177
- className="mt-2 text-blue-600 hover:text-blue-800 text-sm underline"
178
- >
179
- ← Back to latest run
180
- </button>
181
- </div>
182
- )}
183
- </div>
184
-
185
- {/* Summary Cards */}
186
- <div className="grid grid-cols-1 md:grid-cols-4 gap-6 mb-8">
187
- <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
188
- <div className="flex items-center">
189
- <div className="flex-shrink-0">
190
- <div className="w-8 h-8 bg-green-500 rounded-md flex items-center justify-center">
191
- <span className="text-white font-bold">%</span>
192
- </div>
193
- </div>
194
- <div className="ml-4">
195
- <p className="text-sm font-medium text-gray-500">
196
- Success Rate
197
- </p>
198
- <p className="text-2xl font-semibold text-gray-900">
199
- {formatPercentage(benchmarkData.summary.successRate * 100)}
200
- </p>
201
- </div>
202
- </div>
203
- </div>
204
-
205
- <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
206
- <div className="flex items-center">
207
- <div className="flex-shrink-0">
208
- <div className="w-8 h-8 bg-blue-500 rounded-md flex items-center justify-center">
209
- <span className="text-white font-bold">E</span>
210
- </div>
211
- </div>
212
- <div className="ml-4">
213
- <p className="text-sm font-medium text-gray-500">
214
- Total Exercises
215
- </p>
216
- <p className="text-2xl font-semibold text-gray-900">
217
- {benchmarkData.summary.totalExercises}
218
- </p>
219
- </div>
220
- </div>
221
- </div>
222
-
223
- <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
224
- <div className="flex items-center">
225
- <div className="flex-shrink-0">
226
- <div className="w-8 h-8 bg-yellow-500 rounded-md flex items-center justify-center">
227
- <span className="text-white font-bold">$</span>
228
- </div>
229
- </div>
230
- <div className="ml-4">
231
- <p className="text-sm font-medium text-gray-500">Total Cost</p>
232
- <p className="text-2xl font-semibold text-gray-900">
233
- {formatCurrency(benchmarkData.summary.totalCost)}
234
- </p>
235
- </div>
236
- </div>
237
- </div>
238
-
239
- <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
240
- <div className="flex items-center">
241
- <div className="flex-shrink-0">
242
- <div className="w-8 h-8 bg-purple-500 rounded-md flex items-center justify-center">
243
- <span className="text-white font-bold">T</span>
244
- </div>
245
- </div>
246
- <div className="ml-4">
247
- <p className="text-sm font-medium text-gray-500">Total Time</p>
248
- <p className="text-2xl font-semibold text-gray-900">
249
- {formatTime(benchmarkData.summary.totalTime)}
250
- </p>
251
- </div>
252
- </div>
253
- </div>
254
- </div>
255
-
256
- {/* Exercise Results Table */}
257
- <div className="bg-white rounded-lg shadow-sm border border-gray-200">
258
- <div className="px-6 py-4 border-b border-gray-200">
259
- <h2 className="text-xl font-semibold text-gray-900">
260
- Exercise Results
261
- </h2>
262
- <p className="mt-1 text-sm text-gray-500">
263
- Detailed breakdown of each exercise performance
264
- </p>
265
- </div>
266
- <div className="overflow-x-auto">
267
- <table className="min-w-full">
268
- <thead className="bg-gray-50">
269
- <tr>
270
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
271
- Exercise
272
- </th>
273
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
274
- Status
275
- </th>
276
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
277
- Pass / Total
278
- </th>
279
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
280
- Time
281
- </th>
282
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
283
- Cost
284
- </th>
285
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
286
- Turns
287
- </th>
288
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
289
- Output
290
- </th>
291
- </tr>
292
- </thead>
293
- <tbody className="bg-white divide-y divide-gray-200">
294
- {benchmarkData.exercises.map((exercise, index) => (
295
- <tr key={exercise.exerciseName} className="hover:bg-gray-50">
296
- <td className="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
297
- <button
298
- onClick={() => router.push(`/exercise/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}/${encodeURIComponent(exercise.exerciseName)}`)}
299
- className="text-blue-600 hover:text-blue-800 hover:underline text-left"
300
- title="Click to view detailed exercise results"
301
- >
302
- {exercise.exerciseName}
303
- </button>
304
- </td>
305
- <td className="px-6 py-4 whitespace-nowrap">
306
- <div className="flex items-center">
307
- <span className="mr-2">
308
- {getStatusIcon(
309
- exercise.testResult?.success ? "success" : "failure"
310
- )}
311
- </span>
312
- <span
313
- className={getStatusBadge(
314
- exercise.testResult?.success
315
- )}
316
- >
317
- {exercise.testResult?.success ? "Pass" : "Fail"}
318
- </span>
319
- </div>
320
- </td>
321
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
322
- {exercise.testResult?.passed} /{" "}
323
- {exercise.testResult?.total}{" "}
324
- <div>
325
- {exercise.testResult?.skipped
326
- ? `(${exercise.testResult?.skipped} skipped)`
327
- : ""}
328
- </div>
329
- </td>
330
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
331
- {formatTime(exercise.timeElapsed)}
332
- </td>
333
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
334
- {formatCurrency(exercise.cost)}
335
- </td>
336
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
337
- {exercise.turns}
338
- </td>
339
- <td className="px-6 py-4 text-sm text-gray-500 max-w-xs truncate">
340
- {exercise.finalOutput?.slice(0, 100) || "-"}
341
- </td>
342
- </tr>
343
- ))}
344
- </tbody>
345
- </table>
346
- </div>
347
- </div>
348
-
349
- {/* Historical Performance Section */}
350
- {detailData.history.length > 0 && (
351
- <div className="mt-8 bg-white rounded-lg shadow-sm border border-gray-200">
352
- <div className="px-6 py-4 border-b border-gray-200">
353
- <h2 className="text-xl font-semibold text-gray-900">
354
- Historical Performance
355
- </h2>
356
- <p className="mt-1 text-sm text-gray-500">
357
- Previous runs for this model/provider/language combination ({detailData.totalRuns} total runs)
358
- </p>
359
- </div>
360
- <div className="overflow-x-auto">
361
- <table className="min-w-full">
362
- <thead className="bg-gray-50">
363
- <tr>
364
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
365
- Run Date
366
- </th>
367
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
368
- Success Rate
369
- </th>
370
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
371
- Exercises
372
- </th>
373
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
374
- Avg Cost
375
- </th>
376
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
377
- Avg Time
378
- </th>
379
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
380
- Avg Turns
381
- </th>
382
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
383
- Commit
384
- </th>
385
- </tr>
386
- </thead>
387
- <tbody className="bg-white divide-y divide-gray-200">
388
- {detailData.history.map((run, index) => (
389
- <tr
390
- key={`${run.endTime}-${index}`}
391
- className="hover:bg-gray-50 cursor-pointer"
392
- onClick={() => loadHistoricalRun(run.endTime)}
393
- title="Click to view detailed results for this run"
394
- >
395
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
396
- {new Date(run.endTime).toLocaleDateString()}
397
- </td>
398
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
399
- {formatPercentage(run.successRate)}
400
- </td>
401
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
402
- {run.totalExercises}
403
- </td>
404
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
405
- {formatCurrency(run.averageCost)}
406
- </td>
407
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
408
- {formatTime(run.averageTime)}
409
- </td>
410
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
411
- {run.averageTurns.toFixed(1)}
412
- </td>
413
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500 font-mono">
414
- {run.commitHash.slice(0, 8)}
415
- </td>
416
- </tr>
417
- ))}
418
- </tbody>
419
- </table>
420
- </div>
421
- </div>
422
- )}
423
-
424
- {/* Run Information */}
425
- <div className="mt-8 bg-white rounded-lg shadow-sm border border-gray-200 p-6">
426
- <h3 className="text-lg font-semibold text-gray-900 mb-4">
427
- Run Information
428
- </h3>
429
- <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
430
- <div>
431
- <h4 className="text-sm font-medium text-gray-500 mb-2">
432
- Configuration
433
- </h4>
434
- <dl className="space-y-1">
435
- <div className="flex">
436
- <dt className="text-sm text-gray-500 w-24">Model:</dt>
437
- <dd className="text-sm text-gray-900">
438
- {benchmarkData.config.model}
439
- </dd>
440
- </div>
441
- <div className="flex">
442
- <dt className="text-sm text-gray-500 w-24">Provider:</dt>
443
- <dd className="text-sm text-gray-900">
444
- {benchmarkData.config.provider}
445
- </dd>
446
- </div>
447
- <div className="flex">
448
- <dt className="text-sm text-gray-500 w-24">Language:</dt>
449
- <dd className="text-sm text-gray-900">
450
- {benchmarkData.config.language}
451
- </dd>
452
- </div>
453
- {benchmarkData.config.agent && (
454
- <div className="flex">
455
- <dt className="text-sm text-gray-500 w-24">Agent:</dt>
456
- <dd className="text-sm text-gray-900">
457
- {benchmarkData.config.agent}
458
- </dd>
459
- </div>
460
- )}
461
- </dl>
462
- </div>
463
- <div>
464
- <h4 className="text-sm font-medium text-gray-500 mb-2">Limits</h4>
465
- <dl className="space-y-1">
466
- <div className="flex">
467
- <dt className="text-sm text-gray-500 w-24">Max Turns:</dt>
468
- <dd className="text-sm text-gray-900">
469
- {benchmarkData.config.limits.maxTurns}
470
- </dd>
471
- </div>
472
- <div className="flex">
473
- <dt className="text-sm text-gray-500 w-24">Max Time:</dt>
474
- <dd className="text-sm text-gray-900">
475
- {formatTime(benchmarkData.config.limits.maxTime)}
476
- </dd>
477
- </div>
478
- <div className="flex">
479
- <dt className="text-sm text-gray-500 w-24">Max Cost:</dt>
480
- <dd className="text-sm text-gray-900">
481
- {formatCurrency(benchmarkData.config.limits.maxCost)}
482
- </dd>
483
- </div>
484
- </dl>
485
- </div>
486
- </div>
487
- <div className="mt-4 pt-4 border-t border-gray-200">
488
- <div className="flex justify-between text-sm text-gray-500">
489
- <span>
490
- Started: {new Date(benchmarkData.startTime).toLocaleString()}
491
- </span>
492
- <span>
493
- Completed: {new Date(benchmarkData.endTime).toLocaleString()}
494
- </span>
495
- </div>
496
- </div>
497
- </div>
498
- </div>
499
- </div>
500
- );
501
- }