@dailephd/my-dev-kit-lab 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +272 -0
  2. package/benchmarks/contracts/benchmark-project-profiles.json +1199 -0
  3. package/benchmarks/contracts/todo-behavior.md +70 -0
  4. package/benchmarks/contracts/todo-benchmark-case.json +227 -0
  5. package/benchmarks/projects/README.md +34 -0
  6. package/benchmarks/projects/task-analytics-large-mixed/README.md +1 -0
  7. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/__init__.py +3 -0
  8. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/fixtures.py +6 -0
  9. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/metrics.py +29 -0
  10. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/models.py +21 -0
  11. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/parser.py +16 -0
  12. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/pipeline.py +9 -0
  13. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/quality.py +8 -0
  14. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/reporting.py +11 -0
  15. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_metrics.py +19 -0
  16. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_parser.py +15 -0
  17. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_quality.py +19 -0
  18. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_reporting.py +15 -0
  19. package/benchmarks/projects/task-analytics-large-mixed/ts/package.json +12 -0
  20. package/benchmarks/projects/task-analytics-large-mixed/ts/src/index.ts +11 -0
  21. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/analyticsSnapshot.ts +20 -0
  22. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/project.ts +5 -0
  23. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/task.ts +10 -0
  24. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/buildProjectLeaderboard.ts +7 -0
  25. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/formatTaskHealthReport.ts +13 -0
  26. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/buildAnalyticsSnapshot.ts +39 -0
  27. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/completeTask.ts +10 -0
  28. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/createTask.ts +21 -0
  29. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/listTasksByProject.ts +6 -0
  30. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/projectStore.ts +20 -0
  31. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/taskStore.ts +44 -0
  32. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/projectValidation.ts +12 -0
  33. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/taskValidation.ts +18 -0
  34. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/buildAnalyticsSnapshot.test.ts +48 -0
  35. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/completeTask.test.ts +21 -0
  36. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/createTask.test.ts +31 -0
  37. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/listTasksByProject.test.ts +18 -0
  38. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/reporting.test.ts +19 -0
  39. package/benchmarks/projects/task-analytics-large-mixed/ts/tsconfig.json +12 -0
  40. package/benchmarks/projects/task-analytics-large-mixed/ts/vitest.config.ts +5 -0
  41. package/benchmarks/projects/task-workflow-medium-ts/README.md +1 -0
  42. package/benchmarks/projects/task-workflow-medium-ts/package.json +12 -0
  43. package/benchmarks/projects/task-workflow-medium-ts/src/index.ts +9 -0
  44. package/benchmarks/projects/task-workflow-medium-ts/src/models/project.ts +6 -0
  45. package/benchmarks/projects/task-workflow-medium-ts/src/models/task.ts +39 -0
  46. package/benchmarks/projects/task-workflow-medium-ts/src/services/completeTask.ts +15 -0
  47. package/benchmarks/projects/task-workflow-medium-ts/src/services/createTask.ts +26 -0
  48. package/benchmarks/projects/task-workflow-medium-ts/src/services/filterTasks.ts +17 -0
  49. package/benchmarks/projects/task-workflow-medium-ts/src/services/importTasks.ts +33 -0
  50. package/benchmarks/projects/task-workflow-medium-ts/src/services/summarizeTasks.ts +30 -0
  51. package/benchmarks/projects/task-workflow-medium-ts/src/store/taskStore.ts +76 -0
  52. package/benchmarks/projects/task-workflow-medium-ts/src/utils/deterministicId.ts +3 -0
  53. package/benchmarks/projects/task-workflow-medium-ts/src/validation/taskValidation.ts +45 -0
  54. package/benchmarks/projects/task-workflow-medium-ts/tests/completeTask.test.ts +16 -0
  55. package/benchmarks/projects/task-workflow-medium-ts/tests/createTask.test.ts +21 -0
  56. package/benchmarks/projects/task-workflow-medium-ts/tests/filterTasks.test.ts +18 -0
  57. package/benchmarks/projects/task-workflow-medium-ts/tests/importTasks.test.ts +22 -0
  58. package/benchmarks/projects/task-workflow-medium-ts/tests/summarizeTasks.test.ts +29 -0
  59. package/benchmarks/projects/task-workflow-medium-ts/tsconfig.json +12 -0
  60. package/benchmarks/projects/task-workflow-medium-ts/vitest.config.ts +5 -0
  61. package/benchmarks/projects/todo-js/README.md +3 -0
  62. package/benchmarks/projects/todo-js/package.json +11 -0
  63. package/benchmarks/projects/todo-js/src/index.js +2 -0
  64. package/benchmarks/projects/todo-js/src/taskService.js +37 -0
  65. package/benchmarks/projects/todo-js/src/taskStore.js +28 -0
  66. package/benchmarks/projects/todo-js/tests/taskService.test.js +45 -0
  67. package/benchmarks/projects/todo-js/vitest.config.js +5 -0
  68. package/benchmarks/projects/todo-mixed-ts-py/README.md +3 -0
  69. package/benchmarks/projects/todo-mixed-ts-py/package.json +13 -0
  70. package/benchmarks/projects/todo-mixed-ts-py/python/task_service.py +76 -0
  71. package/benchmarks/projects/todo-mixed-ts-py/src/taskCli.ts +38 -0
  72. package/benchmarks/projects/todo-mixed-ts-py/tests/mixedBoundary.test.ts +18 -0
  73. package/benchmarks/projects/todo-mixed-ts-py/tsconfig.json +12 -0
  74. package/benchmarks/projects/todo-mixed-ts-py/vitest.config.ts +5 -0
  75. package/benchmarks/projects/todo-python/README.md +3 -0
  76. package/benchmarks/projects/todo-python/src/__init__.py +4 -0
  77. package/benchmarks/projects/todo-python/src/task_service.py +32 -0
  78. package/benchmarks/projects/todo-python/src/task_store.py +28 -0
  79. package/benchmarks/projects/todo-python/tests/test_task_service.py +52 -0
  80. package/benchmarks/projects/todo-ts/README.md +3 -0
  81. package/benchmarks/projects/todo-ts/package.json +12 -0
  82. package/benchmarks/projects/todo-ts/src/index.ts +2 -0
  83. package/benchmarks/projects/todo-ts/src/taskService.ts +41 -0
  84. package/benchmarks/projects/todo-ts/src/taskStore.ts +34 -0
  85. package/benchmarks/projects/todo-ts/tests/taskService.test.ts +45 -0
  86. package/benchmarks/projects/todo-ts/tsconfig.json +12 -0
  87. package/benchmarks/projects/todo-ts/vitest.config.ts +5 -0
  88. package/dist/scripts/build-gallery.js +3 -0
  89. package/dist/scripts/capture-demo-report.js +3 -0
  90. package/dist/scripts/evaluate-token-savings.js +2 -0
  91. package/dist/scripts/experiments/describeExperiment.js +143 -0
  92. package/dist/scripts/experiments/listExperiments.js +44 -0
  93. package/dist/scripts/experiments/runExperiment.js +199 -0
  94. package/dist/scripts/generate-experiment-plots.js +3 -0
  95. package/dist/scripts/generate-prompt-variants.js +2 -0
  96. package/dist/scripts/render-experiment-report.js +2 -0
  97. package/dist/scripts/run-agent-prompt.js +2 -0
  98. package/dist/scripts/run-controlled-experiment.js +2 -0
  99. package/dist/scripts/run-final-demo.js +3 -0
  100. package/dist/scripts/run-lab-demo.js +5 -0
  101. package/dist/scripts/run-visualization-demos.js +3 -0
  102. package/dist/scripts/security/runCodeql.js +57 -0
  103. package/dist/scripts/security/runDependencyChecks.js +57 -0
  104. package/dist/scripts/security/runFuzzSmoke.js +29 -0
  105. package/dist/scripts/security/runPackageChecks.js +56 -0
  106. package/dist/scripts/security/runSemgrep.js +63 -0
  107. package/dist/scripts/security/validate.js +117 -0
  108. package/dist/scripts/verify-benchmarks.js +202 -0
  109. package/dist/src/agents/adapters/claudeAdapter.js +37 -0
  110. package/dist/src/agents/adapters/codexAdapter.js +110 -0
  111. package/dist/src/agents/adapters/fakeAgentAdapter.js +101 -0
  112. package/dist/src/agents/agentRegistry.js +21 -0
  113. package/dist/src/agents/index.js +7 -0
  114. package/dist/src/agents/parseAgentTokenUsage.js +137 -0
  115. package/dist/src/agents/runAgentPrompt.js +38 -0
  116. package/dist/src/agents/types.js +1 -0
  117. package/dist/src/commands/buildGalleryCommand.js +56 -0
  118. package/dist/src/commands/captureDemoReport.js +116 -0
  119. package/dist/src/commands/evaluateTokenSavings.js +175 -0
  120. package/dist/src/commands/generateExperimentPlotsCommand.js +38 -0
  121. package/dist/src/commands/generatePromptVariants.js +67 -0
  122. package/dist/src/commands/renderExperimentReportCommand.js +131 -0
  123. package/dist/src/commands/runAgentPromptCommand.js +132 -0
  124. package/dist/src/commands/runControlledExperimentCommand.js +174 -0
  125. package/dist/src/commands/runFinalDemoCommand.js +123 -0
  126. package/dist/src/commands/runLabDemo.js +62 -0
  127. package/dist/src/commands/runVisualizationDemosCommand.js +67 -0
  128. package/dist/src/core/commandLine.js +59 -0
  129. package/dist/src/core/countTokens.js +8 -0
  130. package/dist/src/core/fileGlobs.js +100 -0
  131. package/dist/src/core/localProjectTarget.js +75 -0
  132. package/dist/src/core/pathSafety.js +19 -0
  133. package/dist/src/core/pythonCommand.js +30 -0
  134. package/dist/src/core/resolveCommand.js +110 -0
  135. package/dist/src/core/runMeasuredCommand.js +143 -0
  136. package/dist/src/evaluation/benchmarkMetadata.js +207 -0
  137. package/dist/src/evaluation/buildExperimentMatrix.js +75 -0
  138. package/dist/src/evaluation/classifyAgentRunOutcome.js +40 -0
  139. package/dist/src/evaluation/compareExperimentRuns.js +79 -0
  140. package/dist/src/evaluation/compareTokenSavings.js +47 -0
  141. package/dist/src/evaluation/controlledExperimentTypes.js +1 -0
  142. package/dist/src/evaluation/index.js +18 -0
  143. package/dist/src/evaluation/parseAgentAnswer.js +230 -0
  144. package/dist/src/evaluation/projectComplexity.js +126 -0
  145. package/dist/src/evaluation/projectFileTree.js +83 -0
  146. package/dist/src/evaluation/readEvaluationCases.js +59 -0
  147. package/dist/src/evaluation/renderTokenSavingsReportInput.js +55 -0
  148. package/dist/src/evaluation/runControlledExperiment.js +158 -0
  149. package/dist/src/evaluation/runMyDevKitRetrieval.js +197 -0
  150. package/dist/src/evaluation/runRawFullFileBaseline.js +31 -0
  151. package/dist/src/evaluation/scoreCorrectness.js +127 -0
  152. package/dist/src/evaluation/types.js +1 -0
  153. package/dist/src/evaluation/writeExperimentArtifacts.js +104 -0
  154. package/dist/src/evaluation/writeTokenSavingsArtifacts.js +57 -0
  155. package/dist/src/experiments/config.js +24 -0
  156. package/dist/src/experiments/defaultRegistry.js +7 -0
  157. package/dist/src/experiments/errors.js +18 -0
  158. package/dist/src/experiments/index.js +9 -0
  159. package/dist/src/experiments/outputPaths.js +25 -0
  160. package/dist/src/experiments/plugins/contextStrategyComparison/config.js +37 -0
  161. package/dist/src/experiments/plugins/contextStrategyComparison/index.js +3 -0
  162. package/dist/src/experiments/plugins/contextStrategyComparison/plugin.js +83 -0
  163. package/dist/src/experiments/plugins/contextStrategyComparison/resultMapping.js +260 -0
  164. package/dist/src/experiments/plugins/index.js +1 -0
  165. package/dist/src/experiments/registry.js +43 -0
  166. package/dist/src/experiments/results.js +48 -0
  167. package/dist/src/experiments/runner.js +181 -0
  168. package/dist/src/experiments/target.js +8 -0
  169. package/dist/src/experiments/types.js +1 -0
  170. package/dist/src/gallery/index.js +2 -0
  171. package/dist/src/gallery/types.js +1 -0
  172. package/dist/src/gallery/writeGalleryManifest.js +214 -0
  173. package/dist/src/index.js +12 -0
  174. package/dist/src/plots/buildExperimentPlotData.js +137 -0
  175. package/dist/src/plots/index.js +4 -0
  176. package/dist/src/plots/renderSvgChart.js +82 -0
  177. package/dist/src/plots/types.js +1 -0
  178. package/dist/src/plots/writePlotArtifacts.js +46 -0
  179. package/dist/src/prompts/buildPromptContext.js +68 -0
  180. package/dist/src/prompts/generateMyDevKitPrompt.js +106 -0
  181. package/dist/src/prompts/generatePromptVariants.js +36 -0
  182. package/dist/src/prompts/generateRawFullFilePrompt.js +97 -0
  183. package/dist/src/prompts/index.js +7 -0
  184. package/dist/src/prompts/measurePromptComplexity.js +41 -0
  185. package/dist/src/prompts/types.js +1 -0
  186. package/dist/src/prompts/writePromptArtifacts.js +43 -0
  187. package/dist/src/report/buildExperimentReportInput.js +339 -0
  188. package/dist/src/report/experimentReportTypes.js +1 -0
  189. package/dist/src/report/experiments/buildPluginExperimentReport.js +153 -0
  190. package/dist/src/report/experiments/experimentReportModel.js +1 -0
  191. package/dist/src/report/experiments/index.js +4 -0
  192. package/dist/src/report/experiments/renderPluginExperimentReportHtml.js +133 -0
  193. package/dist/src/report/experiments/writePluginExperimentReports.js +30 -0
  194. package/dist/src/report/index.js +8 -0
  195. package/dist/src/report/renderExperimentHtmlReport.js +354 -0
  196. package/dist/src/report/renderHtmlReport.js +103 -0
  197. package/dist/src/report/types.js +10 -0
  198. package/dist/src/report/writeExperimentReportArtifacts.js +38 -0
  199. package/dist/src/report/writeReportArtifacts.js +39 -0
  200. package/dist/src/screenshot/captureReportScreenshot.js +75 -0
  201. package/dist/src/screenshot/index.js +2 -0
  202. package/dist/src/screenshot/types.js +1 -0
  203. package/dist/src/securityValidation/artifacts.js +15 -0
  204. package/dist/src/securityValidation/cliAdversarial/adversarialCliConfig.js +38 -0
  205. package/dist/src/securityValidation/cliAdversarial/dataVolumeChecks.js +194 -0
  206. package/dist/src/securityValidation/cliAdversarial/jsonStdoutChecks.js +359 -0
  207. package/dist/src/securityValidation/cliAdversarial/malformedArtifactChecks.js +284 -0
  208. package/dist/src/securityValidation/cliAdversarial/malformedArtifactFixtures.js +79 -0
  209. package/dist/src/securityValidation/cliAdversarial/pathBoundaryChecks.js +431 -0
  210. package/dist/src/securityValidation/cliAdversarial/pathCases.js +144 -0
  211. package/dist/src/securityValidation/cliAdversarial/readOnlyBoundaryChecks.js +294 -0
  212. package/dist/src/securityValidation/cliAdversarial/runAdversarialCheck.js +149 -0
  213. package/dist/src/securityValidation/cliAdversarial/subprocessSafetyChecks.js +214 -0
  214. package/dist/src/securityValidation/cliAdversarial/tempWorkspace.js +160 -0
  215. package/dist/src/securityValidation/commandRunner.js +136 -0
  216. package/dist/src/securityValidation/config.js +39 -0
  217. package/dist/src/securityValidation/dependencies/parseNpmAudit.js +115 -0
  218. package/dist/src/securityValidation/dependencies/parseNpmLs.js +71 -0
  219. package/dist/src/securityValidation/dependencies/parseNpmOutdated.js +41 -0
  220. package/dist/src/securityValidation/dependencies/runDependencyChecks.js +239 -0
  221. package/dist/src/securityValidation/dependencies/runOsvScanner.js +43 -0
  222. package/dist/src/securityValidation/fuzz/fuzzHarness.js +61 -0
  223. package/dist/src/securityValidation/fuzz/fuzzTargets.js +204 -0
  224. package/dist/src/securityValidation/fuzz/randomInput.js +0 -0
  225. package/dist/src/securityValidation/index.js +34 -0
  226. package/dist/src/securityValidation/packageChecks/forbiddenPackageContents.js +67 -0
  227. package/dist/src/securityValidation/packageChecks/parseNpmPackDryRun.js +56 -0
  228. package/dist/src/securityValidation/packageChecks/runPackageChecks.js +88 -0
  229. package/dist/src/securityValidation/report/renderSecurityReport.js +248 -0
  230. package/dist/src/securityValidation/report/securityReportTypes.js +1 -0
  231. package/dist/src/securityValidation/staticScans/codeql.js +66 -0
  232. package/dist/src/securityValidation/staticScans/semgrep.js +180 -0
  233. package/dist/src/securityValidation/testMatrix.js +535 -0
  234. package/dist/src/securityValidation/types.js +34 -0
  235. package/dist/src/securityValidation/validate/resolveTarget.js +32 -0
  236. package/dist/src/securityValidation/validate/runSecurityValidation.js +169 -0
  237. package/dist/src/securityValidation/validate/verdict.js +73 -0
  238. package/dist/src/visualizationDemos/buildMyDevKitVisualizationCommands.js +59 -0
  239. package/dist/src/visualizationDemos/index.js +4 -0
  240. package/dist/src/visualizationDemos/runVisualizationDemos.js +82 -0
  241. package/dist/src/visualizationDemos/types.js +1 -0
  242. package/dist/src/visualizationDemos/writeVisualizationDemoArtifacts.js +25 -0
  243. package/docs/METRICS.md +286 -0
  244. package/examples/demo-report-input.json +78 -0
  245. package/examples/lab-demo-cases.json +35 -0
  246. package/examples/real-agent-campaign-cases.json +118 -0
  247. package/examples/token-savings-cases.json +122 -0
  248. package/package.json +91 -0
  249. package/tests/fixtures/fake-adversarial-cli.js +152 -0
  250. package/tests/fixtures/fake-my-dev-kit-cli.js +83 -0
@@ -0,0 +1,354 @@
1
+ export function renderExperimentHtmlReport(report) {
2
+ return `<!DOCTYPE html>
3
+ <html lang="en">
4
+ <head>
5
+ <meta charset="utf-8" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
7
+ <title>${escapeHtml(report.title)}</title>
8
+ <style>
9
+ :root { color-scheme: light; font-family: Arial, Helvetica, sans-serif; }
10
+ body { margin: 0; color: #17212b; background: #f5f7fa; }
11
+ main { max-width: 1180px; margin: 0 auto; padding: 28px; }
12
+ h1 { font-size: 34px; margin: 0 0 8px; }
13
+ h2 { font-size: 24px; margin: 0 0 14px; }
14
+ h3 { font-size: 18px; margin: 18px 0 8px; }
15
+ p, li { line-height: 1.45; }
16
+ code, pre { font-family: Consolas, "Courier New", monospace; }
17
+ pre { white-space: pre-wrap; word-break: break-word; background: #101820; color: #f4f7fb; padding: 14px; border-radius: 6px; overflow: auto; }
18
+ table { width: 100%; border-collapse: collapse; margin: 10px 0 18px; background: #fff; }
19
+ th, td { border: 1px solid #d9e1ea; padding: 8px 10px; text-align: left; vertical-align: top; }
20
+ th { background: #edf2f7; }
21
+ .hero, section { background: #fff; border: 1px solid #d7e0ea; border-radius: 8px; padding: 20px; margin-bottom: 18px; }
22
+ .muted { color: #5b6b7b; }
23
+ .grid { display: grid; grid-template-columns: repeat(3, minmax(0, 1fr)); gap: 12px; }
24
+ .card { border: 1px solid #d9e1ea; border-radius: 6px; padding: 12px; background: #fbfcfe; }
25
+ .metric { font-size: 26px; font-weight: 700; margin-top: 6px; }
26
+ .badge { display: inline-block; border-radius: 999px; padding: 3px 9px; font-size: 12px; font-weight: 700; text-transform: uppercase; background: #e8eef7; }
27
+ .status-completed, .answer-yes, .status-pass { background: #dcf4e4; color: #0c6b30; }
28
+ .status-failed, .status-invalid-output, .answer-no, .status-fail { background: #fde2e2; color: #8a1f1f; }
29
+ .status-timeout, .status-agent-limit-reached, .answer-mixed { background: #fff0ce; color: #785400; }
30
+ .status-agent-unavailable, .status-skipped, .answer-unavailable, .answer-inconclusive { background: #ece8ff; color: #5639a8; }
31
+ .two { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 14px; }
32
+ .small { font-size: 13px; }
33
+ .note { margin: 8px 0 14px; padding: 10px 12px; border-left: 4px solid #537895; background: #f4f8fc; color: #324255; }
34
+ @media (max-width: 900px) { main { padding: 16px; } .grid, .two { grid-template-columns: 1fr; } table { font-size: 13px; } }
35
+ </style>
36
+ </head>
37
+ <body>
38
+ <main>
39
+ <header class="hero">
40
+ <h1>${escapeHtml(report.title)}</h1>
41
+ <p>${escapeHtml(report.subtitle)}</p>
42
+ <p class="muted">Generated ${escapeHtml(report.generatedAt)} from <code>${escapeHtml(report.sourceExperimentDir)}</code></p>
43
+ </header>
44
+
45
+ <section id="executive-summary">
46
+ <h2>Executive Summary</h2>
47
+ <p>${escapeHtml(report.executiveSummary.summaryText)}</p>
48
+ <div class="grid">
49
+ ${answerCard("Does my-dev-kit save tokens?", report.executiveSummary.doesMyDevKitSaveTokens)}
50
+ ${answerCard("Does my-dev-kit preserve correctness?", report.executiveSummary.doesMyDevKitPreserveCorrectness)}
51
+ ${answerCard("Does my-dev-kit reduce execution time?", report.executiveSummary.doesMyDevKitReduceExecutionTime)}
52
+ </div>
53
+ <div class="grid">
54
+ ${metricCard("Completed runs", report.executiveSummary.completedRuns)}
55
+ ${metricCard("Failed runs", report.executiveSummary.failedRuns)}
56
+ ${metricCard("Unavailable runs", report.executiveSummary.unavailableRuns)}
57
+ ${metricCard("External limits", report.executiveSummary.limitReachedRuns)}
58
+ ${metricCard("Timeouts", report.executiveSummary.timeoutRuns)}
59
+ ${metricCard("Invalid output", report.executiveSummary.invalidOutputRuns)}
60
+ </div>
61
+ <h3>Comparison Reliability</h3>
62
+ ${note("Reliability label describes whether a comparison had completed runs, available token counts, and valid scores. Strong means both runs completed and token comparison was available.")}
63
+ ${keyValueTable(report.executiveSummary.comparisonReliabilityCounts)}
64
+ </section>
65
+
66
+ <section id="methodology">
67
+ <h2>Methodology</h2>
68
+ ${list(report.methodology)}
69
+ </section>
70
+
71
+ <section id="projects">
72
+ <h2>Benchmark Projects</h2>
73
+ ${note("Complexity score is a 0-100 weighted score based on source file count, source lines, language count, internal imports, max file size, and expected relevant context size. Higher means the project should be harder for raw full-file reading.")}
74
+ ${report.projectProfiles.map(renderProject).join("")}
75
+ </section>
76
+
77
+ <section id="file-trees">
78
+ <h2>File Trees</h2>
79
+ ${note("File tree entries show the compact benchmark layout used by prompts and reports: each row lists the relative path, its role, detected language, and line count.")}
80
+ ${report.fileTreeSections.map(renderFileTree).join("")}
81
+ </section>
82
+
83
+ <section id="benchmark-tasks">
84
+ <h2>Benchmark Tasks</h2>
85
+ ${note("Expected files, symbols, and facts come from deterministic answer keys. They drive correctness scoring and help show how much context a task should need.")}
86
+ ${report.benchmarkCases.map(renderBenchmarkCase).join("")}
87
+ </section>
88
+
89
+ <section id="prompts">
90
+ <h2>Prompt Strategies</h2>
91
+ ${note("Prompt metrics summarize prompt size and structure. Higher prompt chars or estimated tokens mean more instruction overhead before the agent starts its own work.")}
92
+ ${report.promptComparisonSections.map(renderPrompt).join("")}
93
+ </section>
94
+
95
+ <section id="agent-runs">
96
+ <h2>Agent Runs</h2>
97
+ ${note("Run status records whether the agent completed, failed, timed out, was unavailable, hit a usage/session limit, or returned invalid output. Token source and reliability describe how trustworthy the token counts are for that run.")}
98
+ ${renderRuns(report.agentRunSections)}
99
+ </section>
100
+
101
+ <section id="correctness">
102
+ <h2>Correctness Results</h2>
103
+ ${note("Correctness score is deterministic and answer-key based: 25% expected files, 25% expected symbols, 50% expected facts. It is not an LLM judge.")}
104
+ ${renderCorrectness(report.correctnessSections)}
105
+ </section>
106
+
107
+ <section id="tokens">
108
+ <h2>Token Results</h2>
109
+ ${note("Token savings percent is computed as (rawTotalTokens - myDevKitTotalTokens) / rawTotalTokens * 100. Positive means my-dev-kit used fewer tokens. Negative means my-dev-kit used more tokens.")}
110
+ ${renderTokenComparisons(report.tokenSections)}
111
+ </section>
112
+
113
+ <section id="timing">
114
+ <h2>Timing Results</h2>
115
+ ${note("Duration reduction percent is computed as (rawDurationMs - myDevKitDurationMs) / rawDurationMs * 100. Positive means my-dev-kit was faster. Negative means it was slower.")}
116
+ ${renderTimingComparisons(report.timingSections)}
117
+ </section>
118
+
119
+ <section id="comparisons">
120
+ <h2>Raw vs my-dev-kit Comparisons</h2>
121
+ ${note("This table joins the paired raw-full-file and my-dev-kit-guided runs for the same agent, case, and prompt complexity so token, timing, and correctness differences are directly comparable.")}
122
+ ${renderComparisons(report.comparisonSections)}
123
+ </section>
124
+
125
+ ${report.plotSections.length ? `<section id="plots"><h2>Plots</h2>${renderPlots(report.plotSections)}</section>` : ""}
126
+
127
+ ${report.visualizationSections.length
128
+ ? `<section id="visualization-demos"><h2>my-dev-kit Visualization Demos</h2>${renderVisualizations(report.visualizationSections)}</section>`
129
+ : ""}
130
+
131
+ <section id="formulas">
132
+ <h2>Formulas</h2>
133
+ ${note("Full metric definitions live in docs/METRICS.md. The artifact index below links to that glossary alongside the experiment JSON artifacts.")}
134
+ ${report.formulaSections
135
+ .map((section) => `<div class="card"><h3>${escapeHtml(section.title)}</h3><pre>${escapeHtml(section.formula)}</pre>${list(section.notes)}</div>`)
136
+ .join("")}
137
+ </section>
138
+
139
+ <section id="warnings">
140
+ <h2>Warnings and Limitations</h2>
141
+ <h3>Warnings</h3>
142
+ ${report.warnings.length ? list(report.warnings) : "<p>No warnings.</p>"}
143
+ <h3>Limitations</h3>
144
+ ${list(report.limitations)}
145
+ </section>
146
+
147
+ <section id="artifacts">
148
+ <h2>Artifact Index</h2>
149
+ ${table(["Label", "Kind", "Path"], report.artifactLinks.map((artifact) => [artifact.label, artifact.kind, artifact.path]))}
150
+ </section>
151
+
152
+ <section id="next-steps">
153
+ <h2>Next Steps</h2>
154
+ ${list(report.nextSteps)}
155
+ </section>
156
+ </main>
157
+ </body>
158
+ </html>`;
159
+ }
160
+ function renderProject(section) {
161
+ const profile = section.profile;
162
+ return `<div class="card">
163
+ <h3>${escapeHtml(profile.displayName)} <span class="muted">(${escapeHtml(profile.projectId)})</span></h3>
164
+ <p>${escapeHtml(profile.description)}</p>
165
+ ${table(["Field", "Value"], [
166
+ ["Primary language", profile.primaryLanguage],
167
+ ["Languages", profile.languages.join(", ")],
168
+ ["Language mix", profile.languageMix],
169
+ ["Benchmark purpose", profile.benchmarkPurpose],
170
+ ["Expected use cases", profile.expectedUseCases.join(", ")],
171
+ ["Complexity level", profile.complexityLevel],
172
+ ["Complexity score", String(profile.complexityScore)],
173
+ ["Complexity formula", profile.complexityFormula.id]
174
+ ])}
175
+ <h3>Complexity Metrics</h3>
176
+ ${keyValueTable(section.complexityMetrics)}
177
+ </div>`;
178
+ }
179
+ function renderFileTree(section) {
180
+ return `<div class="card">
181
+ <h3>${escapeHtml(section.projectId)}</h3>
182
+ ${table(["Path", "Role", "Language", "Lines"], section.entries.map((entry) => [entry.path, entry.role, entry.language ?? "", entry.lines === undefined ? "" : String(entry.lines)]))}
183
+ ${section.truncated ? `<p class="muted">Showing ${section.entries.length} of ${section.totalEntries} entries.</p>` : ""}
184
+ </div>`;
185
+ }
186
+ function renderBenchmarkCase(section) {
187
+ return `<div class="card">
188
+ <h3>${escapeHtml(section.title)} <span class="muted">(${escapeHtml(section.caseId)})</span></h3>
189
+ ${table(["Field", "Value"], [
190
+ ["Benchmark project", section.benchmarkProject],
191
+ ["Task/query", section.query],
192
+ ["Expected operation", section.expectedOperation ?? ""],
193
+ ["Expected files", section.expectedFiles.join(", ")],
194
+ ["Expected symbols", section.expectedSymbols.join(", ")],
195
+ ["Minimum correct facts", String(section.minimumCorrectFacts)],
196
+ ["Notes", section.notes ?? ""]
197
+ ])}
198
+ <h3>Expected Facts Summary</h3>
199
+ ${table(["ID", "Required", "Weight", "Text"], section.expectedFacts.map((fact) => [fact.id, String(fact.required), String(fact.weight), fact.text]))}
200
+ </div>`;
201
+ }
202
+ function renderPrompt(section) {
203
+ return `<details class="card" open>
204
+ <summary><strong>${escapeHtml(section.strategy)}</strong> ${escapeHtml(section.complexityLevel)} for ${escapeHtml(section.agentId)}</summary>
205
+ ${table(["Metric", "Value"], [
206
+ ["Run ID", section.runId],
207
+ ["Prompt path", section.promptPath ?? ""],
208
+ ["Prompt estimated tokens", String(section.metrics.promptEstimatedTokens)],
209
+ ["Prompt chars", String(section.metrics.promptChars)],
210
+ ["Instruction count", String(section.metrics.instructionCount)],
211
+ ["Constraint count", String(section.metrics.constraintCount)],
212
+ ["Requested output fields", String(section.metrics.requestedOutputFieldCount)],
213
+ ["Task step count", String(section.metrics.taskStepCount)],
214
+ ["Expected fact count", String(section.metrics.expectedFactCount)],
215
+ ["Expected file count", String(section.metrics.expectedFileCount)],
216
+ ["Expected symbol count", String(section.metrics.expectedSymbolCount)],
217
+ ["Graph-guided retrieval required", String(section.metrics.requiresGraphGuidedRetrieval)],
218
+ ["Command execution required", String(section.metrics.requiresCommandExecution)]
219
+ ])}
220
+ <pre>${escapeHtml(section.promptExcerpt)}</pre>
221
+ ${section.promptWasTruncated ? "<p class=\"muted\">Prompt excerpt truncated; see artifact path for full prompt.</p>" : ""}
222
+ </details>`;
223
+ }
224
+ function renderRuns(runs) {
225
+ return table(["Run ID", "Agent", "Strategy", "Complexity", "Status", "Reason", "Duration", "Exit", "Input", "Output", "Total", "Token Source", "Correctness", "Artifacts"], runs.map((run) => [
226
+ run.runId,
227
+ run.agentId,
228
+ run.promptStrategy,
229
+ run.promptComplexityLevel,
230
+ run.status,
231
+ run.statusReason,
232
+ `${run.durationMs} ms`,
233
+ run.agentRunResult.exitCode === null ? "" : String(run.agentRunResult.exitCode),
234
+ run.tokenUsage.inputTokens === undefined ? "unavailable" : String(run.tokenUsage.inputTokens),
235
+ run.tokenUsage.outputTokens === undefined ? "unavailable" : String(run.tokenUsage.outputTokens),
236
+ run.tokenUsage.totalTokens === undefined ? "unavailable" : String(run.tokenUsage.totalTokens),
237
+ `${run.tokenUsageSource} / ${run.tokenUsageReliability}`,
238
+ `${run.correctness.correctnessScore} (${run.correctness.passed ? "pass" : "fail"})`,
239
+ [run.artifactPaths.promptPath, run.artifactPaths.agentRunResultPath, run.artifactPaths.parsedAnswerPath, run.artifactPaths.correctnessScorePath]
240
+ .filter(Boolean)
241
+ .join("\n")
242
+ ]));
243
+ }
244
+ function renderPlots(plots) {
245
+ return table(["Plot", "Kind", "Path"], plots.map((plot) => [plot.title, plot.kind, plot.path]));
246
+ }
247
+ function renderVisualizations(demos) {
248
+ return table(["Command", "Status", "Duration", "Artifacts", "Warnings"], demos.map((demo) => [
249
+ demo.name,
250
+ demo.status,
251
+ `${demo.durationMs} ms`,
252
+ demo.producedArtifactPaths.join("\n"),
253
+ demo.warnings.join("; ")
254
+ ]));
255
+ }
256
+ function renderCorrectness(runs) {
257
+ return table(["Run ID", "File", "Symbol", "Fact", "Score", "Required facts", "Files", "Symbols", "Failure reasons"], runs.map((run) => [
258
+ run.runId,
259
+ String(run.correctness.fileMatchScore),
260
+ String(run.correctness.symbolMatchScore),
261
+ String(run.correctness.factMatchScore),
262
+ String(run.correctness.correctnessScore),
263
+ `${run.correctness.requiredFactsFound}/${run.correctness.requiredFactsTotal}`,
264
+ `${run.correctness.expectedFilesFound}/${run.correctness.expectedFilesTotal}`,
265
+ `${run.correctness.expectedSymbolsFound}/${run.correctness.expectedSymbolsTotal}`,
266
+ run.correctness.failureReasons.join("; ")
267
+ ]));
268
+ }
269
+ function renderTokenComparisons(comparisons) {
270
+ return table(["Comparison", "Raw tokens", "my-dev-kit tokens", "Token delta", "Savings percent", "Available", "Warnings"], comparisons.map((comparison) => [
271
+ comparison.comparisonId,
272
+ comparison.rawTotalTokens === undefined ? "missing" : String(comparison.rawTotalTokens),
273
+ comparison.myDevKitTotalTokens === undefined ? "missing" : String(comparison.myDevKitTotalTokens),
274
+ comparison.tokenDelta === undefined ? "unavailable" : String(comparison.tokenDelta),
275
+ comparison.tokenSavingsPercent === undefined ? "unavailable" : `${comparison.tokenSavingsPercent}%`,
276
+ String(comparison.tokenComparisonAvailable),
277
+ comparison.warnings.join("; ")
278
+ ]));
279
+ }
280
+ function renderTimingComparisons(comparisons) {
281
+ return table(["Comparison", "Raw duration", "my-dev-kit duration", "Delta", "Reduction percent", "Reliability"], comparisons.map((comparison) => [
282
+ comparison.comparisonId,
283
+ comparison.rawDurationMs === undefined ? "missing" : `${comparison.rawDurationMs} ms`,
284
+ comparison.myDevKitDurationMs === undefined ? "missing" : `${comparison.myDevKitDurationMs} ms`,
285
+ comparison.durationDeltaMs === undefined ? "unavailable" : `${comparison.durationDeltaMs} ms`,
286
+ comparison.durationReductionPercent === undefined ? "unavailable" : `${comparison.durationReductionPercent}%`,
287
+ comparison.reliabilityLabel
288
+ ]));
289
+ }
290
+ function renderComparisons(comparisons) {
291
+ return table([
292
+ "Comparison ID",
293
+ "Case",
294
+ "Project",
295
+ "Agent",
296
+ "Complexity",
297
+ "Raw run",
298
+ "my-dev-kit run",
299
+ "Raw status",
300
+ "my-dev-kit status",
301
+ "Raw correctness",
302
+ "my-dev-kit correctness",
303
+ "Same pass",
304
+ "Token savings",
305
+ "Duration reduction",
306
+ "Reliability",
307
+ "Warnings"
308
+ ], comparisons.map((comparison) => [
309
+ comparison.comparisonId,
310
+ comparison.caseId,
311
+ comparison.benchmarkProject,
312
+ comparison.agentId,
313
+ comparison.complexityLevel,
314
+ comparison.rawRunId ?? "",
315
+ comparison.myDevKitRunId ?? "",
316
+ comparison.rawStatus ?? "",
317
+ comparison.myDevKitStatus ?? "",
318
+ comparison.rawCorrectnessScore === undefined ? "" : String(comparison.rawCorrectnessScore),
319
+ comparison.myDevKitCorrectnessScore === undefined ? "" : String(comparison.myDevKitCorrectnessScore),
320
+ String(comparison.sameCorrectnessPass),
321
+ comparison.tokenSavingsPercent === undefined ? "unavailable" : `${comparison.tokenSavingsPercent}%`,
322
+ comparison.durationReductionPercent === undefined ? "unavailable" : `${comparison.durationReductionPercent}%`,
323
+ comparison.reliabilityLabel,
324
+ comparison.warnings.join("; ")
325
+ ]));
326
+ }
327
+ function answerCard(label, answer) {
328
+ return `<div class="card"><div>${escapeHtml(label)}</div><div class="metric"><span class="badge answer-${escapeHtml(answer)}">${escapeHtml(answer)}</span></div></div>`;
329
+ }
330
+ function metricCard(label, value) {
331
+ return `<div class="card"><div>${escapeHtml(label)}</div><div class="metric">${escapeHtml(value)}</div></div>`;
332
+ }
333
+ function keyValueTable(values) {
334
+ return table(["Metric", "Value"], Object.entries(values).map(([key, value]) => [key, value === undefined ? "" : String(value)]));
335
+ }
336
+ function note(text) {
337
+ return `<p class="note small">${escapeHtml(text)}</p>`;
338
+ }
339
+ function table(headers, rows, allowHtml = false) {
340
+ return `<table><thead><tr>${headers.map((header) => `<th>${escapeHtml(header)}</th>`).join("")}</tr></thead><tbody>${rows
341
+ .map((row) => `<tr>${row.map((cell) => `<td>${allowHtml ? cell : escapeHtml(cell)}</td>`).join("")}</tr>`)
342
+ .join("")}</tbody></table>`;
343
+ }
344
+ function list(items) {
345
+ return `<ul>${items.map((item) => `<li>${escapeHtml(item)}</li>`).join("")}</ul>`;
346
+ }
347
+ function escapeHtml(value) {
348
+ return String(value ?? "")
349
+ .replaceAll("&", "&amp;")
350
+ .replaceAll("<", "&lt;")
351
+ .replaceAll(">", "&gt;")
352
+ .replaceAll('"', "&quot;")
353
+ .replaceAll("'", "&#39;");
354
+ }
@@ -0,0 +1,103 @@
1
+ function escapeHtml(value) {
2
+ return String(value ?? "")
3
+ .replaceAll("&", "&amp;")
4
+ .replaceAll("<", "&lt;")
5
+ .replaceAll(">", "&gt;")
6
+ .replaceAll('"', "&quot;")
7
+ .replaceAll("'", "&#39;");
8
+ }
9
+ function renderWarnings(warnings) {
10
+ if (warnings.length === 0) {
11
+ return "<p data-testid=\"warnings-empty\">No warnings.</p>";
12
+ }
13
+ return `<ul>${warnings.map((warning) => `<li>${escapeHtml(warning)}</li>`).join("")}</ul>`;
14
+ }
15
+ export function renderHtmlReport(report) {
16
+ const stepsHtml = report.steps
17
+ .map((step) => `
18
+ <li class="card step" data-testid="step-${escapeHtml(step.id)}">
19
+ <div class="row"><strong>${escapeHtml(step.label)}</strong><span class="badge status-${escapeHtml(step.status)}">${escapeHtml(step.status)}</span></div>
20
+ ${step.command ? `<div class="muted"><code>${escapeHtml(step.command)}</code></div>` : ""}
21
+ ${typeof step.durationMs === "number" ? `<div class="muted">Duration: ${escapeHtml(step.durationMs)} ms</div>` : ""}
22
+ ${step.notes ? `<div>${escapeHtml(step.notes)}</div>` : ""}
23
+ </li>`)
24
+ .join("");
25
+ const metricsHtml = report.metrics
26
+ .map((metric) => `
27
+ <li class="card metric" data-testid="metric-${escapeHtml(metric.id)}">
28
+ <div><strong>${escapeHtml(metric.label)}</strong></div>
29
+ <div class="metric-value">${escapeHtml(metric.value)}${metric.unit ? ` ${escapeHtml(metric.unit)}` : ""}</div>
30
+ ${metric.interpretation ? `<div class="muted">${escapeHtml(metric.interpretation)}</div>` : ""}
31
+ </li>`)
32
+ .join("");
33
+ const artifactsHtml = report.artifacts
34
+ .map((artifact) => `
35
+ <li class="card artifact" data-testid="artifact-${escapeHtml(artifact.id)}">
36
+ <div class="row"><strong>${escapeHtml(artifact.label)}</strong><span class="badge">${escapeHtml(artifact.kind)}</span></div>
37
+ <div><code>${escapeHtml(artifact.path)}</code></div>
38
+ </li>`)
39
+ .join("");
40
+ return `<!DOCTYPE html>
41
+ <html lang="en">
42
+ <head>
43
+ <meta charset="utf-8" />
44
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
45
+ <title>${escapeHtml(report.title)}</title>
46
+ <style>
47
+ :root { color-scheme: light; font-family: Arial, Helvetica, sans-serif; }
48
+ body { margin: 0; background: #f4f6fb; color: #16202a; }
49
+ main { max-width: 1120px; margin: 0 auto; padding: 32px; }
50
+ h1, h2 { margin: 0 0 12px; }
51
+ h1 { font-size: 32px; }
52
+ h2 { font-size: 22px; margin-top: 28px; }
53
+ p { line-height: 1.5; }
54
+ .hero, .section, .card { background: #ffffff; border: 1px solid #d8e0ea; border-radius: 8px; }
55
+ .hero { padding: 24px; }
56
+ .section { padding: 20px; margin-top: 20px; }
57
+ .meta { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 12px; margin-top: 18px; }
58
+ .row { display: flex; align-items: center; justify-content: space-between; gap: 12px; }
59
+ .grid { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 16px; padding: 0; list-style: none; }
60
+ .stack { padding: 0; list-style: none; display: grid; gap: 12px; }
61
+ .card { padding: 16px; }
62
+ .badge { display: inline-block; padding: 4px 10px; background: #e8eef8; border-radius: 999px; font-size: 12px; text-transform: uppercase; }
63
+ .status-pass { background: #dff5e3; color: #0e6b2f; }
64
+ .status-fail { background: #fde2e2; color: #8a1f1f; }
65
+ .status-skipped { background: #ece8ff; color: #5e41b7; }
66
+ .muted { color: #546474; margin-top: 6px; }
67
+ .metric-value { font-size: 28px; margin-top: 8px; }
68
+ code { font-family: Consolas, monospace; font-size: 14px; word-break: break-word; }
69
+ @media (max-width: 800px) { .meta, .grid { grid-template-columns: 1fr; } main { padding: 20px; } }
70
+ </style>
71
+ </head>
72
+ <body>
73
+ <main>
74
+ <section class="hero" data-testid="report-header">
75
+ <h1>${escapeHtml(report.title)}</h1>
76
+ <p>${escapeHtml(report.summary)}</p>
77
+ <div class="meta">
78
+ <div><strong>Project name</strong><div>${escapeHtml(report.projectName)}</div></div>
79
+ <div><strong>Benchmark project</strong><div>${escapeHtml(report.benchmarkProject)}</div></div>
80
+ <div><strong>Workflow name</strong><div>${escapeHtml(report.workflowName)}</div></div>
81
+ <div><strong>Generated timestamp</strong><div>${escapeHtml(report.generatedAt)}</div></div>
82
+ </div>
83
+ </section>
84
+ <section class="section" data-testid="workflow-steps">
85
+ <h2>Workflow steps</h2>
86
+ <ul class="stack">${stepsHtml}</ul>
87
+ </section>
88
+ <section class="section" data-testid="metrics">
89
+ <h2>Metrics</h2>
90
+ <ul class="grid">${metricsHtml}</ul>
91
+ </section>
92
+ <section class="section" data-testid="artifacts">
93
+ <h2>Artifacts</h2>
94
+ <ul class="stack">${artifactsHtml}</ul>
95
+ </section>
96
+ <section class="section" data-testid="warnings">
97
+ <h2>Warnings</h2>
98
+ ${renderWarnings(report.warnings)}
99
+ </section>
100
+ </main>
101
+ </body>
102
+ </html>`;
103
+ }
@@ -0,0 +1,10 @@
1
+ export function normalizeLabReport(input, generatedAt) {
2
+ return {
3
+ ...input,
4
+ generatedAt: generatedAt ?? input.generatedAt ?? new Date().toISOString(),
5
+ steps: input.steps.map((step) => ({ ...step })),
6
+ metrics: input.metrics.map((metric) => ({ ...metric })),
7
+ artifacts: input.artifacts.map((artifact) => ({ ...artifact })),
8
+ warnings: [...input.warnings]
9
+ };
10
+ }
@@ -0,0 +1,38 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { resolveWithinRoot } from "../core/pathSafety.js";
4
+ import { renderExperimentHtmlReport } from "./renderExperimentHtmlReport.js";
5
+ export function getExperimentReportArtifactPaths(outDir) {
6
+ const resolvedOutDir = path.resolve(outDir);
7
+ return {
8
+ outDir: resolvedOutDir,
9
+ jsonPath: resolveWithinRoot(resolvedOutDir, "experiment-report.json"),
10
+ htmlPath: resolveWithinRoot(resolvedOutDir, "experiment-report.html"),
11
+ pngPath: resolveWithinRoot(resolvedOutDir, "experiment-report.png"),
12
+ artifactIndexPath: resolveWithinRoot(resolvedOutDir, "experiment-report-artifacts.json")
13
+ };
14
+ }
15
+ export async function writeExperimentReportArtifacts(args) {
16
+ const outputPaths = getExperimentReportArtifactPaths(args.outDir);
17
+ await mkdir(outputPaths.outDir, { recursive: true });
18
+ const warnings = [...args.report.warnings];
19
+ if (args.screenshot.warning) {
20
+ warnings.push(args.screenshot.warning);
21
+ }
22
+ if (args.screenshot.status === "failed" && args.screenshot.error) {
23
+ warnings.push(`PNG screenshot capture failed: ${args.screenshot.error}`);
24
+ }
25
+ const report = {
26
+ ...args.report,
27
+ warnings
28
+ };
29
+ await writeFile(outputPaths.htmlPath, renderExperimentHtmlReport(report), "utf8");
30
+ await writeFile(outputPaths.jsonPath, `${JSON.stringify({ report, generatedAt: report.generatedAt, outputPaths, screenshot: args.screenshot, warnings }, null, 2)}\n`, "utf8");
31
+ await writeFile(outputPaths.artifactIndexPath, `${JSON.stringify({ generatedAt: report.generatedAt, outputPaths, sourceExperimentDir: report.sourceExperimentDir, artifactLinks: report.artifactLinks }, null, 2)}\n`, "utf8");
32
+ return {
33
+ report,
34
+ outputPaths,
35
+ screenshot: args.screenshot,
36
+ warnings
37
+ };
38
+ }
@@ -0,0 +1,39 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { renderHtmlReport } from "./renderHtmlReport.js";
4
+ import { normalizeLabReport } from "./types.js";
5
+ export function getReportArtifactPaths(outDir, reportId) {
6
+ return {
7
+ outDir,
8
+ jsonPath: path.join(outDir, `${reportId}.json`),
9
+ htmlPath: path.join(outDir, `${reportId}.html`),
10
+ pngPath: path.join(outDir, `${reportId}.png`)
11
+ };
12
+ }
13
+ export async function writeReportArtifacts(options) {
14
+ const report = normalizeLabReport(options.report, options.generatedAt);
15
+ const outputPaths = getReportArtifactPaths(options.outDir, report.reportId);
16
+ const warnings = [...report.warnings];
17
+ if (options.screenshot.warning) {
18
+ warnings.push(options.screenshot.warning);
19
+ }
20
+ if (options.screenshot.status === "failed" && options.screenshot.error) {
21
+ warnings.push(`PNG screenshot capture failed: ${options.screenshot.error}`);
22
+ }
23
+ await mkdir(outputPaths.outDir, { recursive: true });
24
+ await writeFile(outputPaths.htmlPath, renderHtmlReport(report), "utf8");
25
+ const payload = {
26
+ report,
27
+ generatedAt: report.generatedAt,
28
+ outputPaths,
29
+ screenshot: options.screenshot,
30
+ warnings
31
+ };
32
+ await writeFile(outputPaths.jsonPath, JSON.stringify(payload, null, 2), "utf8");
33
+ return {
34
+ report,
35
+ outputPaths,
36
+ screenshot: options.screenshot,
37
+ warnings
38
+ };
39
+ }
@@ -0,0 +1,75 @@
1
+ import { access } from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { pathToFileURL } from "node:url";
4
+ const SKIP_WARNING = "PNG screenshot skipped because Playwright or browser runtime is unavailable.";
5
+ function isMissingBrowserRuntime(error) {
6
+ const message = error instanceof Error ? error.message : String(error);
7
+ return (message.includes("Executable doesn't exist") ||
8
+ message.includes("browserType.launch") ||
9
+ message.includes("playwright install") ||
10
+ message.includes("Failed to launch") ||
11
+ message.includes("Could not find Chrome") ||
12
+ message.includes("ENOENT"));
13
+ }
14
+ async function defaultLoadPlaywright() {
15
+ return (await import("playwright"));
16
+ }
17
+ export async function captureReportScreenshot(htmlPath, pngPath, options) {
18
+ const loadPlaywright = options?.loadPlaywright ?? defaultLoadPlaywright;
19
+ try {
20
+ await access(htmlPath);
21
+ }
22
+ catch (error) {
23
+ return {
24
+ status: "failed",
25
+ htmlPath,
26
+ pngPath,
27
+ error: `HTML report not found: ${htmlPath}`
28
+ };
29
+ }
30
+ let playwright;
31
+ try {
32
+ playwright = await loadPlaywright();
33
+ }
34
+ catch {
35
+ return {
36
+ status: "skipped",
37
+ htmlPath,
38
+ pngPath,
39
+ warning: SKIP_WARNING
40
+ };
41
+ }
42
+ let browser;
43
+ try {
44
+ browser = await playwright.chromium.launch({ headless: true });
45
+ const page = await browser.newPage({ viewport: { width: 1440, height: 1080 } });
46
+ await page.goto(pathToFileURL(path.resolve(htmlPath)).href, { waitUntil: "load" });
47
+ await page.screenshot({ path: pngPath, fullPage: true });
48
+ await browser.close();
49
+ return {
50
+ status: "captured",
51
+ htmlPath,
52
+ pngPath
53
+ };
54
+ }
55
+ catch (error) {
56
+ if (browser) {
57
+ await browser.close().catch(() => undefined);
58
+ }
59
+ if (isMissingBrowserRuntime(error)) {
60
+ return {
61
+ status: "skipped",
62
+ htmlPath,
63
+ pngPath,
64
+ warning: SKIP_WARNING
65
+ };
66
+ }
67
+ return {
68
+ status: "failed",
69
+ htmlPath,
70
+ pngPath,
71
+ error: error instanceof Error ? error.message : String(error)
72
+ };
73
+ }
74
+ }
75
+ export { SKIP_WARNING as SCREENSHOT_SKIP_WARNING };
@@ -0,0 +1,2 @@
1
+ export * from "./types.js";
2
+ export * from "./captureReportScreenshot.js";
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,15 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import path from "node:path";
3
+ // Write a structured security check result to the reports/security directory.
4
+ // Raw stdout/stderr are written to reports/security/raw/ as separate text files.
5
+ export async function writeCheckResult(options) {
6
+ await mkdir(path.dirname(options.outputPath), { recursive: true });
7
+ await mkdir(options.rawDir, { recursive: true });
8
+ if (options.rawStdout !== undefined) {
9
+ await writeFile(path.join(options.rawDir, `${options.result.id}.stdout.txt`), options.rawStdout, "utf8");
10
+ }
11
+ if (options.rawStderr !== undefined) {
12
+ await writeFile(path.join(options.rawDir, `${options.result.id}.stderr.txt`), options.rawStderr, "utf8");
13
+ }
14
+ await writeFile(options.outputPath, JSON.stringify(options.result, null, 2), "utf8");
15
+ }