@tyvm/knowhow 0.0.90 → 0.0.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (272) hide show
  1. package/.depcheckrc +30 -0
  2. package/bin/knowhow.js +1 -1
  3. package/package.json +8 -34
  4. package/src/agents/configurable/ConfigAgent.ts +2 -2
  5. package/src/agents/tools/executeScript/index.ts +5 -0
  6. package/src/agents/tools/googleSearch.ts +2 -2
  7. package/src/agents/tools/index.ts +0 -3
  8. package/src/agents/tools/list.ts +0 -147
  9. package/src/agents/tools/loadWebpage.ts +3 -113
  10. package/src/auth/browserLogin.ts +10 -13
  11. package/src/chat/modules/AgentModule.ts +0 -1
  12. package/src/chat/types.ts +1 -1
  13. package/src/cli.ts +63 -3
  14. package/src/clients/gemini.ts +96 -25
  15. package/src/clients/http.ts +7 -11
  16. package/src/clients/pricing/google.ts +122 -26
  17. package/src/conversion.ts +24 -54
  18. package/src/index.ts +15 -20
  19. package/src/login.ts +5 -6
  20. package/src/plugins/language.ts +0 -4
  21. package/src/plugins/plugins.ts +0 -14
  22. package/src/plugins/url.ts +31 -12
  23. package/src/services/EmbeddingsService.ts +70 -0
  24. package/src/services/KnowhowClient.ts +34 -34
  25. package/src/{plugins/downloader/downloader.ts → services/MediaProcessorService.ts} +109 -267
  26. package/src/services/S3.ts +19 -87
  27. package/src/services/index.ts +8 -8
  28. package/src/services/modules/index.ts +12 -3
  29. package/src/services/modules/types.ts +8 -2
  30. package/src/services/script-execution/ScriptExecutor.ts +29 -10
  31. package/src/services/script-execution/ScriptPolicy.ts +6 -2
  32. package/src/types.ts +1 -0
  33. package/src/utils/http.ts +127 -0
  34. package/src/workers/auth/PasskeySetup.ts +7 -11
  35. package/tests/clients/AIClient.test.ts +24 -21
  36. package/tests/manual/file-edits/figma.test.ts +3 -70
  37. package/tests/plugins/language/languagePlugin-content-triggers.test.ts +2 -0
  38. package/tests/plugins/language/languagePlugin.test.ts +2 -0
  39. package/tests/processors/ToolResponseCache.test.ts +2 -2
  40. package/tests/test.spec.ts +0 -14
  41. package/tests/unit/modules/moduleLoading.test.ts +12 -4
  42. package/tests/unit/plugins/pluginLoading.test.ts +6 -6
  43. package/ts_build/package.json +8 -34
  44. package/ts_build/src/agents/tools/ast/astAppendNode.d.ts +1 -1
  45. package/ts_build/src/agents/tools/ast/astAppendNode.js +2 -90
  46. package/ts_build/src/agents/tools/ast/astAppendNode.js.map +1 -1
  47. package/ts_build/src/agents/tools/ast/astDeleteNode.d.ts +1 -1
  48. package/ts_build/src/agents/tools/ast/astDeleteNode.js +2 -88
  49. package/ts_build/src/agents/tools/ast/astDeleteNode.js.map +1 -1
  50. package/ts_build/src/agents/tools/ast/astEditNode.d.ts +1 -1
  51. package/ts_build/src/agents/tools/ast/astEditNode.js +2 -90
  52. package/ts_build/src/agents/tools/ast/astEditNode.js.map +1 -1
  53. package/ts_build/src/agents/tools/ast/astGetPathForLine.d.ts +1 -1
  54. package/ts_build/src/agents/tools/ast/astGetPathForLine.js +2 -72
  55. package/ts_build/src/agents/tools/ast/astGetPathForLine.js.map +1 -1
  56. package/ts_build/src/agents/tools/ast/astListPaths.d.ts +1 -1
  57. package/ts_build/src/agents/tools/ast/astListPaths.js +2 -72
  58. package/ts_build/src/agents/tools/ast/astListPaths.js.map +1 -1
  59. package/ts_build/src/agents/tools/executeScript/index.d.ts +3 -2
  60. package/ts_build/src/agents/tools/executeScript/index.js +4 -1
  61. package/ts_build/src/agents/tools/executeScript/index.js.map +1 -1
  62. package/ts_build/src/agents/tools/googleSearch.js +2 -2
  63. package/ts_build/src/agents/tools/googleSearch.js.map +1 -1
  64. package/ts_build/src/agents/tools/index.d.ts +0 -3
  65. package/ts_build/src/agents/tools/index.js +0 -3
  66. package/ts_build/src/agents/tools/index.js.map +1 -1
  67. package/ts_build/src/agents/tools/list.js +0 -138
  68. package/ts_build/src/agents/tools/list.js.map +1 -1
  69. package/ts_build/src/agents/tools/loadWebpage.js +1 -89
  70. package/ts_build/src/agents/tools/loadWebpage.js.map +1 -1
  71. package/ts_build/src/agents/tools/textSearch.d.ts +1 -1
  72. package/ts_build/src/auth/browserLogin.js +7 -7
  73. package/ts_build/src/auth/browserLogin.js.map +1 -1
  74. package/ts_build/src/chat/modules/AgentModule.js.map +1 -1
  75. package/ts_build/src/chat/types.d.ts +1 -1
  76. package/ts_build/src/cli.d.ts +1 -1
  77. package/ts_build/src/cli.js +47 -1
  78. package/ts_build/src/cli.js.map +1 -1
  79. package/ts_build/src/clients/gemini.d.ts +1 -73
  80. package/ts_build/src/clients/gemini.js +57 -19
  81. package/ts_build/src/clients/gemini.js.map +1 -1
  82. package/ts_build/src/clients/http.js +5 -9
  83. package/ts_build/src/clients/http.js.map +1 -1
  84. package/ts_build/src/clients/pricing/google.d.ts +17 -73
  85. package/ts_build/src/clients/pricing/google.js +47 -10
  86. package/ts_build/src/clients/pricing/google.js.map +1 -1
  87. package/ts_build/src/conversion.d.ts +1 -4
  88. package/ts_build/src/conversion.js +12 -27
  89. package/ts_build/src/conversion.js.map +1 -1
  90. package/ts_build/src/index.d.ts +4 -0
  91. package/ts_build/src/index.js +15 -14
  92. package/ts_build/src/index.js.map +1 -1
  93. package/ts_build/src/login.js +5 -4
  94. package/ts_build/src/login.js.map +1 -1
  95. package/ts_build/src/plugins/downloader/downloader.js +3 -3
  96. package/ts_build/src/plugins/downloader/downloader.js.map +1 -1
  97. package/ts_build/src/plugins/language.js.map +1 -1
  98. package/ts_build/src/plugins/plugins.js +0 -14
  99. package/ts_build/src/plugins/plugins.js.map +1 -1
  100. package/ts_build/src/plugins/tree-sitter/editor.d.ts +3 -32
  101. package/ts_build/src/plugins/tree-sitter/editor.js +6 -208
  102. package/ts_build/src/plugins/tree-sitter/editor.js.map +1 -1
  103. package/ts_build/src/plugins/tree-sitter/parser.d.ts +19 -54
  104. package/ts_build/src/plugins/tree-sitter/parser.js +19 -293
  105. package/ts_build/src/plugins/tree-sitter/parser.js.map +1 -1
  106. package/ts_build/src/plugins/tree-sitter/simple-paths.d.ts +2 -15
  107. package/ts_build/src/plugins/tree-sitter/simple-paths.js +2 -324
  108. package/ts_build/src/plugins/tree-sitter/simple-paths.js.map +1 -1
  109. package/ts_build/src/plugins/url.js +27 -8
  110. package/ts_build/src/plugins/url.js.map +1 -1
  111. package/ts_build/src/services/EmbeddingsService.d.ts +14 -0
  112. package/ts_build/src/services/EmbeddingsService.js +33 -0
  113. package/ts_build/src/services/EmbeddingsService.js.map +1 -0
  114. package/ts_build/src/services/GitHub.js +2 -2
  115. package/ts_build/src/services/GitHub.js.map +1 -1
  116. package/ts_build/src/services/KnowhowClient.d.ts +29 -29
  117. package/ts_build/src/services/KnowhowClient.js +33 -33
  118. package/ts_build/src/services/KnowhowClient.js.map +1 -1
  119. package/ts_build/src/services/MediaProcessorService.d.ts +22 -0
  120. package/ts_build/src/services/MediaProcessorService.js +215 -0
  121. package/ts_build/src/services/MediaProcessorService.js.map +1 -0
  122. package/ts_build/src/services/S3.d.ts +0 -4
  123. package/ts_build/src/services/S3.js +14 -60
  124. package/ts_build/src/services/S3.js.map +1 -1
  125. package/ts_build/src/services/index.d.ts +6 -5
  126. package/ts_build/src/services/index.js +6 -6
  127. package/ts_build/src/services/index.js.map +1 -1
  128. package/ts_build/src/services/modules/index.js +12 -3
  129. package/ts_build/src/services/modules/index.js.map +1 -1
  130. package/ts_build/src/services/modules/types.d.ts +8 -2
  131. package/ts_build/src/services/script-execution/ScriptExecutor.js +22 -7
  132. package/ts_build/src/services/script-execution/ScriptExecutor.js.map +1 -1
  133. package/ts_build/src/services/script-execution/ScriptPolicy.d.ts +1 -1
  134. package/ts_build/src/services/script-execution/ScriptPolicy.js +4 -2
  135. package/ts_build/src/services/script-execution/ScriptPolicy.js.map +1 -1
  136. package/ts_build/src/types.d.ts +1 -0
  137. package/ts_build/src/types.js +1 -0
  138. package/ts_build/src/types.js.map +1 -1
  139. package/ts_build/src/utils/http.d.ts +27 -0
  140. package/ts_build/src/utils/http.js +98 -0
  141. package/ts_build/src/utils/http.js.map +1 -0
  142. package/ts_build/src/workers/auth/PasskeySetup.js +6 -7
  143. package/ts_build/src/workers/auth/PasskeySetup.js.map +1 -1
  144. package/ts_build/tests/clients/AIClient.test.js +11 -14
  145. package/ts_build/tests/clients/AIClient.test.js.map +1 -1
  146. package/ts_build/tests/manual/file-edits/figma.test.d.ts +0 -1
  147. package/ts_build/tests/manual/file-edits/figma.test.js +1 -46
  148. package/ts_build/tests/manual/file-edits/figma.test.js.map +1 -1
  149. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js +2 -0
  150. package/ts_build/tests/plugins/language/languagePlugin-content-triggers.test.js.map +1 -1
  151. package/ts_build/tests/plugins/language/languagePlugin.test.js +2 -0
  152. package/ts_build/tests/plugins/language/languagePlugin.test.js.map +1 -1
  153. package/ts_build/tests/processors/ToolResponseCache.test.js +2 -2
  154. package/ts_build/tests/processors/ToolResponseCache.test.js.map +1 -1
  155. package/ts_build/tests/test.spec.js +0 -14
  156. package/ts_build/tests/test.spec.js.map +1 -1
  157. package/ts_build/tests/tree-sitter/tree-sitter.test.d.ts +0 -1
  158. package/ts_build/tests/tree-sitter/tree-sitter.test.js +2 -183
  159. package/ts_build/tests/tree-sitter/tree-sitter.test.js.map +1 -1
  160. package/ts_build/tests/unit/modules/moduleLoading.test.js +11 -4
  161. package/ts_build/tests/unit/modules/moduleLoading.test.js.map +1 -1
  162. package/ts_build/tests/unit/plugins/pluginLoading.test.js +4 -4
  163. package/ts_build/tests/unit/plugins/pluginLoading.test.js.map +1 -1
  164. package/benchmarks/.dockerignore +0 -7
  165. package/benchmarks/README.md +0 -166
  166. package/benchmarks/docker/Dockerfile +0 -68
  167. package/benchmarks/example-config.yml +0 -27
  168. package/benchmarks/jest.config.js +0 -13
  169. package/benchmarks/package-lock.json +0 -4297
  170. package/benchmarks/package.json +0 -39
  171. package/benchmarks/results/27b0a06/2025-09-27/xai/xai-grok-code-fast-1.json +0 -2909
  172. package/benchmarks/results/4057aed/2025-08-14/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -1671
  173. package/benchmarks/results/4542435/2025-08-05/lms/lms-openai-gpt-oss-20b.json +0 -2814
  174. package/benchmarks/results/4542435/2025-08-05/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -2014
  175. package/benchmarks/results/4fb9125/2025-08-07/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3121
  176. package/benchmarks/results/5766aee/2025-08-02/lms-qwen/qwen3-coder-30b.json +0 -98
  177. package/benchmarks/results/6d73808/2025-08-07/openai/openai-gpt-5.json +0 -3256
  178. package/benchmarks/results/77bf0a6/2025-08-02/lms-qwen/qwen3-30b-a3b-2507.json +0 -4298
  179. package/benchmarks/results/8c0d445/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3031
  180. package/benchmarks/results/8c0d445/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -2990
  181. package/benchmarks/results/ac6b2ab/2025-08-03/anthropic/anthropic-claude-sonnet-4-20250514.json +0 -3256
  182. package/benchmarks/results/ac6b2ab/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3007
  183. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-2025-04-14.json +0 -3256
  184. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-mini-2025-04-14.json +0 -3036
  185. package/benchmarks/results/ac6b2ab/2025-08-03/openai/openai-gpt-4.1-nano-2025-04-14.json +0 -3280
  186. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-30b-a3b-2507.json +0 -1920
  187. package/benchmarks/results/adff675/2025-08-04/lms/lms-qwen-qwen3-coder-30b.json +0 -3281
  188. package/benchmarks/results/b502ed9/2025-08-03/lms-qwen/qwen3-coder-30b.json +0 -2896
  189. package/benchmarks/results/d1a8129/2025-08-03/lms/lms-qwen-qwen3-coder-30b.json +0 -3011
  190. package/benchmarks/results/e60471c/2025-08-03/lms/qwen3-30b-a3b-2507.json +0 -3003
  191. package/benchmarks/scripts/build-and-run.sh +0 -47
  192. package/benchmarks/scripts/clone-exercism.sh +0 -92
  193. package/benchmarks/scripts/validate.sh +0 -48
  194. package/benchmarks/src/__tests__/runner.test.ts +0 -27
  195. package/benchmarks/src/cli.ts +0 -90
  196. package/benchmarks/src/evaluators/EvaluatorRegistry.ts +0 -64
  197. package/benchmarks/src/evaluators/JavaScriptEvaluator.ts +0 -183
  198. package/benchmarks/src/evaluators/index.ts +0 -3
  199. package/benchmarks/src/evaluators/types.ts +0 -22
  200. package/benchmarks/src/index.ts +0 -3
  201. package/benchmarks/src/providers.ts +0 -13
  202. package/benchmarks/src/runner.ts +0 -824
  203. package/benchmarks/src/types.ts +0 -63
  204. package/benchmarks/tsconfig.json +0 -19
  205. package/leaderboard/README.md +0 -148
  206. package/leaderboard/app/api/benchmark-data/route.ts +0 -131
  207. package/leaderboard/app/api/benchmark-detail/route.ts +0 -172
  208. package/leaderboard/app/details/[model]/[provider]/[language]/page.tsx +0 -501
  209. package/leaderboard/app/exercise/[model]/[provider]/[language]/[exercise]/page.tsx +0 -375
  210. package/leaderboard/app/globals.css +0 -27
  211. package/leaderboard/app/layout.tsx +0 -21
  212. package/leaderboard/app/page.tsx +0 -170
  213. package/leaderboard/components/LeaderboardTable.tsx +0 -168
  214. package/leaderboard/components/PerformanceChart.tsx +0 -109
  215. package/leaderboard/next-env.d.ts +0 -5
  216. package/leaderboard/next.config.js +0 -4
  217. package/leaderboard/package-lock.json +0 -6363
  218. package/leaderboard/package.json +0 -28
  219. package/leaderboard/postcss.config.js +0 -6
  220. package/leaderboard/tailwind.config.js +0 -17
  221. package/leaderboard/tsconfig.json +0 -28
  222. package/leaderboard/types/benchmark.ts +0 -67
  223. package/leaderboard/utils/dataProcessor.ts +0 -33
  224. package/src/agents/tools/asana/definitions.ts +0 -199
  225. package/src/agents/tools/asana/index.ts +0 -108
  226. package/src/agents/tools/ast/astAppendNode.ts +0 -90
  227. package/src/agents/tools/ast/astDeleteNode.ts +0 -88
  228. package/src/agents/tools/ast/astEditNode.ts +0 -95
  229. package/src/agents/tools/ast/astGetPathForLine.ts +0 -73
  230. package/src/agents/tools/ast/astListPaths.ts +0 -66
  231. package/src/agents/tools/ast/index.ts +0 -7
  232. package/src/agents/tools/github/definitions.ts +0 -89
  233. package/src/agents/tools/github/index.ts +0 -67
  234. package/src/chat-old.ts +0 -446
  235. package/src/plugins/asana.ts +0 -146
  236. package/src/plugins/downloader/plugin.ts +0 -103
  237. package/src/plugins/downloader/types.ts +0 -92
  238. package/src/plugins/figma.ts +0 -158
  239. package/src/plugins/github.ts +0 -219
  240. package/src/plugins/jira.ts +0 -115
  241. package/src/plugins/linear.ts +0 -230
  242. package/src/plugins/notion.ts +0 -179
  243. package/src/plugins/tree-sitter/editor.ts +0 -369
  244. package/src/plugins/tree-sitter/lang-packs/index.ts +0 -23
  245. package/src/plugins/tree-sitter/lang-packs/java.ts +0 -59
  246. package/src/plugins/tree-sitter/lang-packs/javascript.ts +0 -57
  247. package/src/plugins/tree-sitter/lang-packs/python.ts +0 -45
  248. package/src/plugins/tree-sitter/lang-packs/types.ts +0 -79
  249. package/src/plugins/tree-sitter/lang-packs/typescript.ts +0 -49
  250. package/src/plugins/tree-sitter/parser.ts +0 -470
  251. package/src/plugins/tree-sitter/simple-paths.ts +0 -467
  252. package/src/services/GitHub.ts +0 -59
  253. package/tests/tree-sitter/editor.test.ts +0 -113
  254. package/tests/tree-sitter/invalid.test.ts +0 -299
  255. package/tests/tree-sitter/paths/common-edits.test.ts +0 -564
  256. package/tests/tree-sitter/paths/debug-exact-position.test.ts +0 -44
  257. package/tests/tree-sitter/paths/debug-line-indexing.test.ts +0 -49
  258. package/tests/tree-sitter/paths/debug-paths.test.ts +0 -90
  259. package/tests/tree-sitter/paths/paths.test.ts +0 -170
  260. package/tests/tree-sitter/paths/simple-paths.test.ts +0 -367
  261. package/tests/tree-sitter/sample-after.ts +0 -48
  262. package/tests/tree-sitter/sample-before.ts +0 -25
  263. package/tests/tree-sitter/test-files/completely-broken.ts +0 -7
  264. package/tests/tree-sitter/test-files/duplicate-braces.ts +0 -39
  265. package/tests/tree-sitter/test-files/invalid-nesting.ts +0 -39
  266. package/tests/tree-sitter/test-files/malformed-signature.ts +0 -39
  267. package/tests/tree-sitter/test-files/mismatched-parens.ts +0 -39
  268. package/tests/tree-sitter/test-files/missing-semicolon.ts +0 -39
  269. package/tests/tree-sitter/test-files/partially-broken.ts +0 -20
  270. package/tests/tree-sitter/test-files/specific-errors.ts +0 -14
  271. package/tests/tree-sitter/test-files/unclosed-string.ts +0 -39
  272. package/tests/tree-sitter/tree-sitter.test.ts +0 -251
@@ -1,501 +0,0 @@
1
- "use client";
2
-
3
- import { useState, useEffect } from "react";
4
- import { useParams, useRouter, useSearchParams } from "next/navigation";
5
- import { BenchmarkResults, ExerciseResult } from "@/types/benchmark";
6
- import {
7
- formatCurrency,
8
- formatTime,
9
- formatPercentage,
10
- } from "@/utils/dataProcessor";
11
-
12
- interface HistoricalRun {
13
- endTime: string;
14
- successRate: number;
15
- totalExercises: number;
16
- totalCost: number;
17
- averageTime: number;
18
- averageTurns: number;
19
- commitHash: string;
20
- averageCost: number;
21
- }
22
-
23
- interface DetailResponse {
24
- latest: BenchmarkResults;
25
- history: HistoricalRun[];
26
- totalRuns: number;
27
- }
28
-
29
- export default function ModelDetailPage() {
30
- const params = useParams();
31
- const router = useRouter();
32
- const searchParams = useSearchParams();
33
- const [detailData, setDetailData] = useState<DetailResponse | null>(
34
- null
35
- );
36
- const [loading, setLoading] = useState(true);
37
- const [error, setError] = useState<string | null>(null);
38
-
39
- const model = decodeURIComponent(params.model as string);
40
- const provider = decodeURIComponent(params.provider as string);
41
- const language = decodeURIComponent(params.language as string);
42
- const timestamp = searchParams.get('timestamp');
43
-
44
- useEffect(() => {
45
- async function fetchDetailData() {
46
- try {
47
- const response = await fetch(
48
- `/api/benchmark-detail?model=${encodeURIComponent(
49
- model
50
- )}&provider=${encodeURIComponent(
51
- provider
52
- )}&language=${encodeURIComponent(language)}${
53
- timestamp ? `&timestamp=${timestamp}` : ''
54
- }`
55
- );
56
- if (!response.ok) {
57
- throw new Error("Failed to fetch benchmark details");
58
- }
59
- const data = await response.json();
60
- setDetailData(data);
61
- } catch (err) {
62
- setError(err instanceof Error ? err.message : "An error occurred");
63
- } finally {
64
- setLoading(false);
65
- }
66
- }
67
-
68
- fetchDetailData();
69
- }, [model, provider, language, timestamp]);
70
-
71
- const loadHistoricalRun = async (timestamp: string) => {
72
- // Navigate to the same page but with timestamp parameter
73
- router.push(`/details/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}?timestamp=${timestamp}`);
74
- };
75
-
76
- const backToLatestRun = () => {
77
- // Navigate to the same page without timestamp parameter
78
- router.push(`/details/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}`);
79
- };
80
-
81
- const getStatusBadge = (status: string) => {
82
- const baseClasses = "px-2 py-1 text-xs font-medium rounded-full";
83
- switch (status) {
84
- case "success":
85
- return `${baseClasses} bg-green-100 text-green-800`;
86
- case "failure":
87
- return `${baseClasses} bg-red-100 text-red-800`;
88
- case "timeout":
89
- return `${baseClasses} bg-yellow-100 text-yellow-800`;
90
- case "cost_limit":
91
- return `${baseClasses} bg-orange-100 text-orange-800`;
92
- case "turn_limit":
93
- return `${baseClasses} bg-purple-100 text-purple-800`;
94
- default:
95
- return `${baseClasses} bg-gray-100 text-gray-800`;
96
- }
97
- };
98
-
99
- const getStatusIcon = (status: string) => {
100
- switch (status) {
101
- case "success":
102
- return "✅";
103
- case "failure":
104
- return "❌";
105
- case "timeout":
106
- return "⏰";
107
- case "cost_limit":
108
- return "💰";
109
- case "turn_limit":
110
- return "🔄";
111
- default:
112
- return "❓";
113
- }
114
- };
115
-
116
- if (loading) {
117
- return (
118
- <div className="min-h-screen bg-gray-50 flex items-center justify-center">
119
- <div className="text-center">
120
- <div className="animate-spin rounded-full h-32 w-32 border-b-2 border-blue-500 mx-auto"></div>
121
- <p className="mt-4 text-gray-600">Loading benchmark details...</p>
122
- </div>
123
- </div>
124
- );
125
- }
126
-
127
- if (error || !detailData) {
128
- return (
129
- <div className="min-h-screen bg-gray-50 flex items-center justify-center">
130
- <div className="text-center">
131
- <div className="text-red-500 text-6xl mb-4">⚠️</div>
132
- <h3 className="text-lg font-medium text-gray-900 mb-2">
133
- Error Loading Details
134
- </h3>
135
- <p className="text-gray-500 mb-4">
136
- {error || "Benchmark data not found"}
137
- </p>
138
- <button
139
- onClick={() => router.back()}
140
- className="bg-blue-500 hover:bg-blue-600 text-white px-4 py-2 rounded-md"
141
- >
142
- Go Back
143
- </button>
144
- </div>
145
- </div>
146
- );
147
- }
148
-
149
- // Extract the latest benchmark data for display
150
- const benchmarkData = detailData.latest;
151
- const isHistoricalView = timestamp !== null;
152
-
153
- return (
154
- <div className="min-h-screen bg-gray-50">
155
- <div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
156
- {/* Header */}
157
- <div className="mb-8">
158
- <button
159
- onClick={() => router.push("/")}
160
- className="mb-4 text-blue-600 hover:text-blue-800 flex items-center"
161
- >
162
- ← Back to Leaderboard
163
- </button>
164
- <h1 className="text-3xl font-bold text-gray-900">
165
- Benchmark Details: {model}
166
- </h1>
167
- <p className="mt-2 text-gray-600">
168
- Provider: {provider} • Language: {language}
169
- </p>
170
- {isHistoricalView && (
171
- <div className="mt-4 p-3 bg-blue-50 border border-blue-200 rounded-md">
172
- <p className="text-blue-800 text-sm">
173
- Viewing historical run from {new Date(benchmarkData.endTime).toLocaleString()}
174
- </p>
175
- <button
176
- onClick={backToLatestRun}
177
- className="mt-2 text-blue-600 hover:text-blue-800 text-sm underline"
178
- >
179
- ← Back to latest run
180
- </button>
181
- </div>
182
- )}
183
- </div>
184
-
185
- {/* Summary Cards */}
186
- <div className="grid grid-cols-1 md:grid-cols-4 gap-6 mb-8">
187
- <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
188
- <div className="flex items-center">
189
- <div className="flex-shrink-0">
190
- <div className="w-8 h-8 bg-green-500 rounded-md flex items-center justify-center">
191
- <span className="text-white font-bold">%</span>
192
- </div>
193
- </div>
194
- <div className="ml-4">
195
- <p className="text-sm font-medium text-gray-500">
196
- Success Rate
197
- </p>
198
- <p className="text-2xl font-semibold text-gray-900">
199
- {formatPercentage(benchmarkData.summary.successRate * 100)}
200
- </p>
201
- </div>
202
- </div>
203
- </div>
204
-
205
- <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
206
- <div className="flex items-center">
207
- <div className="flex-shrink-0">
208
- <div className="w-8 h-8 bg-blue-500 rounded-md flex items-center justify-center">
209
- <span className="text-white font-bold">E</span>
210
- </div>
211
- </div>
212
- <div className="ml-4">
213
- <p className="text-sm font-medium text-gray-500">
214
- Total Exercises
215
- </p>
216
- <p className="text-2xl font-semibold text-gray-900">
217
- {benchmarkData.summary.totalExercises}
218
- </p>
219
- </div>
220
- </div>
221
- </div>
222
-
223
- <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
224
- <div className="flex items-center">
225
- <div className="flex-shrink-0">
226
- <div className="w-8 h-8 bg-yellow-500 rounded-md flex items-center justify-center">
227
- <span className="text-white font-bold">$</span>
228
- </div>
229
- </div>
230
- <div className="ml-4">
231
- <p className="text-sm font-medium text-gray-500">Total Cost</p>
232
- <p className="text-2xl font-semibold text-gray-900">
233
- {formatCurrency(benchmarkData.summary.totalCost)}
234
- </p>
235
- </div>
236
- </div>
237
- </div>
238
-
239
- <div className="bg-white p-6 rounded-lg shadow-sm border border-gray-200">
240
- <div className="flex items-center">
241
- <div className="flex-shrink-0">
242
- <div className="w-8 h-8 bg-purple-500 rounded-md flex items-center justify-center">
243
- <span className="text-white font-bold">T</span>
244
- </div>
245
- </div>
246
- <div className="ml-4">
247
- <p className="text-sm font-medium text-gray-500">Total Time</p>
248
- <p className="text-2xl font-semibold text-gray-900">
249
- {formatTime(benchmarkData.summary.totalTime)}
250
- </p>
251
- </div>
252
- </div>
253
- </div>
254
- </div>
255
-
256
- {/* Exercise Results Table */}
257
- <div className="bg-white rounded-lg shadow-sm border border-gray-200">
258
- <div className="px-6 py-4 border-b border-gray-200">
259
- <h2 className="text-xl font-semibold text-gray-900">
260
- Exercise Results
261
- </h2>
262
- <p className="mt-1 text-sm text-gray-500">
263
- Detailed breakdown of each exercise performance
264
- </p>
265
- </div>
266
- <div className="overflow-x-auto">
267
- <table className="min-w-full">
268
- <thead className="bg-gray-50">
269
- <tr>
270
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
271
- Exercise
272
- </th>
273
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
274
- Status
275
- </th>
276
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
277
- Pass / Total
278
- </th>
279
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
280
- Time
281
- </th>
282
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
283
- Cost
284
- </th>
285
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
286
- Turns
287
- </th>
288
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
289
- Output
290
- </th>
291
- </tr>
292
- </thead>
293
- <tbody className="bg-white divide-y divide-gray-200">
294
- {benchmarkData.exercises.map((exercise, index) => (
295
- <tr key={exercise.exerciseName} className="hover:bg-gray-50">
296
- <td className="px-6 py-4 whitespace-nowrap text-sm font-medium text-gray-900">
297
- <button
298
- onClick={() => router.push(`/exercise/${encodeURIComponent(model)}/${encodeURIComponent(provider)}/${encodeURIComponent(language)}/${encodeURIComponent(exercise.exerciseName)}`)}
299
- className="text-blue-600 hover:text-blue-800 hover:underline text-left"
300
- title="Click to view detailed exercise results"
301
- >
302
- {exercise.exerciseName}
303
- </button>
304
- </td>
305
- <td className="px-6 py-4 whitespace-nowrap">
306
- <div className="flex items-center">
307
- <span className="mr-2">
308
- {getStatusIcon(
309
- exercise.testResult?.success ? "success" : "failure"
310
- )}
311
- </span>
312
- <span
313
- className={getStatusBadge(
314
- exercise.testResult?.success
315
- )}
316
- >
317
- {exercise.testResult?.success ? "Pass" : "Fail"}
318
- </span>
319
- </div>
320
- </td>
321
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
322
- {exercise.testResult?.passed} /{" "}
323
- {exercise.testResult?.total}{" "}
324
- <div>
325
- {exercise.testResult?.skipped
326
- ? `(${exercise.testResult?.skipped} skipped)`
327
- : ""}
328
- </div>
329
- </td>
330
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
331
- {formatTime(exercise.timeElapsed)}
332
- </td>
333
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
334
- {formatCurrency(exercise.cost)}
335
- </td>
336
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500">
337
- {exercise.turns}
338
- </td>
339
- <td className="px-6 py-4 text-sm text-gray-500 max-w-xs truncate">
340
- {exercise.finalOutput?.slice(0, 100) || "-"}
341
- </td>
342
- </tr>
343
- ))}
344
- </tbody>
345
- </table>
346
- </div>
347
- </div>
348
-
349
- {/* Historical Performance Section */}
350
- {detailData.history.length > 0 && (
351
- <div className="mt-8 bg-white rounded-lg shadow-sm border border-gray-200">
352
- <div className="px-6 py-4 border-b border-gray-200">
353
- <h2 className="text-xl font-semibold text-gray-900">
354
- Historical Performance
355
- </h2>
356
- <p className="mt-1 text-sm text-gray-500">
357
- Previous runs for this model/provider/language combination ({detailData.totalRuns} total runs)
358
- </p>
359
- </div>
360
- <div className="overflow-x-auto">
361
- <table className="min-w-full">
362
- <thead className="bg-gray-50">
363
- <tr>
364
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
365
- Run Date
366
- </th>
367
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
368
- Success Rate
369
- </th>
370
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
371
- Exercises
372
- </th>
373
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
374
- Avg Cost
375
- </th>
376
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
377
- Avg Time
378
- </th>
379
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
380
- Avg Turns
381
- </th>
382
- <th className="px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">
383
- Commit
384
- </th>
385
- </tr>
386
- </thead>
387
- <tbody className="bg-white divide-y divide-gray-200">
388
- {detailData.history.map((run, index) => (
389
- <tr
390
- key={`${run.endTime}-${index}`}
391
- className="hover:bg-gray-50 cursor-pointer"
392
- onClick={() => loadHistoricalRun(run.endTime)}
393
- title="Click to view detailed results for this run"
394
- >
395
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
396
- {new Date(run.endTime).toLocaleDateString()}
397
- </td>
398
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
399
- {formatPercentage(run.successRate)}
400
- </td>
401
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
402
- {run.totalExercises}
403
- </td>
404
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
405
- {formatCurrency(run.averageCost)}
406
- </td>
407
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
408
- {formatTime(run.averageTime)}
409
- </td>
410
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-900">
411
- {run.averageTurns.toFixed(1)}
412
- </td>
413
- <td className="px-6 py-4 whitespace-nowrap text-sm text-gray-500 font-mono">
414
- {run.commitHash.slice(0, 8)}
415
- </td>
416
- </tr>
417
- ))}
418
- </tbody>
419
- </table>
420
- </div>
421
- </div>
422
- )}
423
-
424
- {/* Run Information */}
425
- <div className="mt-8 bg-white rounded-lg shadow-sm border border-gray-200 p-6">
426
- <h3 className="text-lg font-semibold text-gray-900 mb-4">
427
- Run Information
428
- </h3>
429
- <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
430
- <div>
431
- <h4 className="text-sm font-medium text-gray-500 mb-2">
432
- Configuration
433
- </h4>
434
- <dl className="space-y-1">
435
- <div className="flex">
436
- <dt className="text-sm text-gray-500 w-24">Model:</dt>
437
- <dd className="text-sm text-gray-900">
438
- {benchmarkData.config.model}
439
- </dd>
440
- </div>
441
- <div className="flex">
442
- <dt className="text-sm text-gray-500 w-24">Provider:</dt>
443
- <dd className="text-sm text-gray-900">
444
- {benchmarkData.config.provider}
445
- </dd>
446
- </div>
447
- <div className="flex">
448
- <dt className="text-sm text-gray-500 w-24">Language:</dt>
449
- <dd className="text-sm text-gray-900">
450
- {benchmarkData.config.language}
451
- </dd>
452
- </div>
453
- {benchmarkData.config.agent && (
454
- <div className="flex">
455
- <dt className="text-sm text-gray-500 w-24">Agent:</dt>
456
- <dd className="text-sm text-gray-900">
457
- {benchmarkData.config.agent}
458
- </dd>
459
- </div>
460
- )}
461
- </dl>
462
- </div>
463
- <div>
464
- <h4 className="text-sm font-medium text-gray-500 mb-2">Limits</h4>
465
- <dl className="space-y-1">
466
- <div className="flex">
467
- <dt className="text-sm text-gray-500 w-24">Max Turns:</dt>
468
- <dd className="text-sm text-gray-900">
469
- {benchmarkData.config.limits.maxTurns}
470
- </dd>
471
- </div>
472
- <div className="flex">
473
- <dt className="text-sm text-gray-500 w-24">Max Time:</dt>
474
- <dd className="text-sm text-gray-900">
475
- {formatTime(benchmarkData.config.limits.maxTime)}
476
- </dd>
477
- </div>
478
- <div className="flex">
479
- <dt className="text-sm text-gray-500 w-24">Max Cost:</dt>
480
- <dd className="text-sm text-gray-900">
481
- {formatCurrency(benchmarkData.config.limits.maxCost)}
482
- </dd>
483
- </div>
484
- </dl>
485
- </div>
486
- </div>
487
- <div className="mt-4 pt-4 border-t border-gray-200">
488
- <div className="flex justify-between text-sm text-gray-500">
489
- <span>
490
- Started: {new Date(benchmarkData.startTime).toLocaleString()}
491
- </span>
492
- <span>
493
- Completed: {new Date(benchmarkData.endTime).toLocaleString()}
494
- </span>
495
- </div>
496
- </div>
497
- </div>
498
- </div>
499
- </div>
500
- );
501
- }