@dailephd/my-dev-kit-lab 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +272 -0
  2. package/benchmarks/contracts/benchmark-project-profiles.json +1199 -0
  3. package/benchmarks/contracts/todo-behavior.md +70 -0
  4. package/benchmarks/contracts/todo-benchmark-case.json +227 -0
  5. package/benchmarks/projects/README.md +34 -0
  6. package/benchmarks/projects/task-analytics-large-mixed/README.md +1 -0
  7. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/__init__.py +3 -0
  8. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/fixtures.py +6 -0
  9. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/metrics.py +29 -0
  10. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/models.py +21 -0
  11. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/parser.py +16 -0
  12. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/pipeline.py +9 -0
  13. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/quality.py +8 -0
  14. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/reporting.py +11 -0
  15. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_metrics.py +19 -0
  16. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_parser.py +15 -0
  17. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_quality.py +19 -0
  18. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_reporting.py +15 -0
  19. package/benchmarks/projects/task-analytics-large-mixed/ts/package.json +12 -0
  20. package/benchmarks/projects/task-analytics-large-mixed/ts/src/index.ts +11 -0
  21. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/analyticsSnapshot.ts +20 -0
  22. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/project.ts +5 -0
  23. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/task.ts +10 -0
  24. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/buildProjectLeaderboard.ts +7 -0
  25. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/formatTaskHealthReport.ts +13 -0
  26. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/buildAnalyticsSnapshot.ts +39 -0
  27. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/completeTask.ts +10 -0
  28. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/createTask.ts +21 -0
  29. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/listTasksByProject.ts +6 -0
  30. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/projectStore.ts +20 -0
  31. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/taskStore.ts +44 -0
  32. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/projectValidation.ts +12 -0
  33. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/taskValidation.ts +18 -0
  34. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/buildAnalyticsSnapshot.test.ts +48 -0
  35. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/completeTask.test.ts +21 -0
  36. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/createTask.test.ts +31 -0
  37. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/listTasksByProject.test.ts +18 -0
  38. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/reporting.test.ts +19 -0
  39. package/benchmarks/projects/task-analytics-large-mixed/ts/tsconfig.json +12 -0
  40. package/benchmarks/projects/task-analytics-large-mixed/ts/vitest.config.ts +5 -0
  41. package/benchmarks/projects/task-workflow-medium-ts/README.md +1 -0
  42. package/benchmarks/projects/task-workflow-medium-ts/package.json +12 -0
  43. package/benchmarks/projects/task-workflow-medium-ts/src/index.ts +9 -0
  44. package/benchmarks/projects/task-workflow-medium-ts/src/models/project.ts +6 -0
  45. package/benchmarks/projects/task-workflow-medium-ts/src/models/task.ts +39 -0
  46. package/benchmarks/projects/task-workflow-medium-ts/src/services/completeTask.ts +15 -0
  47. package/benchmarks/projects/task-workflow-medium-ts/src/services/createTask.ts +26 -0
  48. package/benchmarks/projects/task-workflow-medium-ts/src/services/filterTasks.ts +17 -0
  49. package/benchmarks/projects/task-workflow-medium-ts/src/services/importTasks.ts +33 -0
  50. package/benchmarks/projects/task-workflow-medium-ts/src/services/summarizeTasks.ts +30 -0
  51. package/benchmarks/projects/task-workflow-medium-ts/src/store/taskStore.ts +76 -0
  52. package/benchmarks/projects/task-workflow-medium-ts/src/utils/deterministicId.ts +3 -0
  53. package/benchmarks/projects/task-workflow-medium-ts/src/validation/taskValidation.ts +45 -0
  54. package/benchmarks/projects/task-workflow-medium-ts/tests/completeTask.test.ts +16 -0
  55. package/benchmarks/projects/task-workflow-medium-ts/tests/createTask.test.ts +21 -0
  56. package/benchmarks/projects/task-workflow-medium-ts/tests/filterTasks.test.ts +18 -0
  57. package/benchmarks/projects/task-workflow-medium-ts/tests/importTasks.test.ts +22 -0
  58. package/benchmarks/projects/task-workflow-medium-ts/tests/summarizeTasks.test.ts +29 -0
  59. package/benchmarks/projects/task-workflow-medium-ts/tsconfig.json +12 -0
  60. package/benchmarks/projects/task-workflow-medium-ts/vitest.config.ts +5 -0
  61. package/benchmarks/projects/todo-js/README.md +3 -0
  62. package/benchmarks/projects/todo-js/package.json +11 -0
  63. package/benchmarks/projects/todo-js/src/index.js +2 -0
  64. package/benchmarks/projects/todo-js/src/taskService.js +37 -0
  65. package/benchmarks/projects/todo-js/src/taskStore.js +28 -0
  66. package/benchmarks/projects/todo-js/tests/taskService.test.js +45 -0
  67. package/benchmarks/projects/todo-js/vitest.config.js +5 -0
  68. package/benchmarks/projects/todo-mixed-ts-py/README.md +3 -0
  69. package/benchmarks/projects/todo-mixed-ts-py/package.json +13 -0
  70. package/benchmarks/projects/todo-mixed-ts-py/python/task_service.py +76 -0
  71. package/benchmarks/projects/todo-mixed-ts-py/src/taskCli.ts +38 -0
  72. package/benchmarks/projects/todo-mixed-ts-py/tests/mixedBoundary.test.ts +18 -0
  73. package/benchmarks/projects/todo-mixed-ts-py/tsconfig.json +12 -0
  74. package/benchmarks/projects/todo-mixed-ts-py/vitest.config.ts +5 -0
  75. package/benchmarks/projects/todo-python/README.md +3 -0
  76. package/benchmarks/projects/todo-python/src/__init__.py +4 -0
  77. package/benchmarks/projects/todo-python/src/task_service.py +32 -0
  78. package/benchmarks/projects/todo-python/src/task_store.py +28 -0
  79. package/benchmarks/projects/todo-python/tests/test_task_service.py +52 -0
  80. package/benchmarks/projects/todo-ts/README.md +3 -0
  81. package/benchmarks/projects/todo-ts/package.json +12 -0
  82. package/benchmarks/projects/todo-ts/src/index.ts +2 -0
  83. package/benchmarks/projects/todo-ts/src/taskService.ts +41 -0
  84. package/benchmarks/projects/todo-ts/src/taskStore.ts +34 -0
  85. package/benchmarks/projects/todo-ts/tests/taskService.test.ts +45 -0
  86. package/benchmarks/projects/todo-ts/tsconfig.json +12 -0
  87. package/benchmarks/projects/todo-ts/vitest.config.ts +5 -0
  88. package/dist/scripts/build-gallery.js +3 -0
  89. package/dist/scripts/capture-demo-report.js +3 -0
  90. package/dist/scripts/evaluate-token-savings.js +2 -0
  91. package/dist/scripts/experiments/describeExperiment.js +143 -0
  92. package/dist/scripts/experiments/listExperiments.js +44 -0
  93. package/dist/scripts/experiments/runExperiment.js +199 -0
  94. package/dist/scripts/generate-experiment-plots.js +3 -0
  95. package/dist/scripts/generate-prompt-variants.js +2 -0
  96. package/dist/scripts/render-experiment-report.js +2 -0
  97. package/dist/scripts/run-agent-prompt.js +2 -0
  98. package/dist/scripts/run-controlled-experiment.js +2 -0
  99. package/dist/scripts/run-final-demo.js +3 -0
  100. package/dist/scripts/run-lab-demo.js +5 -0
  101. package/dist/scripts/run-visualization-demos.js +3 -0
  102. package/dist/scripts/security/runCodeql.js +57 -0
  103. package/dist/scripts/security/runDependencyChecks.js +57 -0
  104. package/dist/scripts/security/runFuzzSmoke.js +29 -0
  105. package/dist/scripts/security/runPackageChecks.js +56 -0
  106. package/dist/scripts/security/runSemgrep.js +63 -0
  107. package/dist/scripts/security/validate.js +117 -0
  108. package/dist/scripts/verify-benchmarks.js +202 -0
  109. package/dist/src/agents/adapters/claudeAdapter.js +37 -0
  110. package/dist/src/agents/adapters/codexAdapter.js +110 -0
  111. package/dist/src/agents/adapters/fakeAgentAdapter.js +101 -0
  112. package/dist/src/agents/agentRegistry.js +21 -0
  113. package/dist/src/agents/index.js +7 -0
  114. package/dist/src/agents/parseAgentTokenUsage.js +137 -0
  115. package/dist/src/agents/runAgentPrompt.js +38 -0
  116. package/dist/src/agents/types.js +1 -0
  117. package/dist/src/commands/buildGalleryCommand.js +56 -0
  118. package/dist/src/commands/captureDemoReport.js +116 -0
  119. package/dist/src/commands/evaluateTokenSavings.js +175 -0
  120. package/dist/src/commands/generateExperimentPlotsCommand.js +38 -0
  121. package/dist/src/commands/generatePromptVariants.js +67 -0
  122. package/dist/src/commands/renderExperimentReportCommand.js +131 -0
  123. package/dist/src/commands/runAgentPromptCommand.js +132 -0
  124. package/dist/src/commands/runControlledExperimentCommand.js +174 -0
  125. package/dist/src/commands/runFinalDemoCommand.js +123 -0
  126. package/dist/src/commands/runLabDemo.js +62 -0
  127. package/dist/src/commands/runVisualizationDemosCommand.js +67 -0
  128. package/dist/src/core/commandLine.js +59 -0
  129. package/dist/src/core/countTokens.js +8 -0
  130. package/dist/src/core/fileGlobs.js +100 -0
  131. package/dist/src/core/localProjectTarget.js +75 -0
  132. package/dist/src/core/pathSafety.js +19 -0
  133. package/dist/src/core/pythonCommand.js +30 -0
  134. package/dist/src/core/resolveCommand.js +110 -0
  135. package/dist/src/core/runMeasuredCommand.js +143 -0
  136. package/dist/src/evaluation/benchmarkMetadata.js +207 -0
  137. package/dist/src/evaluation/buildExperimentMatrix.js +75 -0
  138. package/dist/src/evaluation/classifyAgentRunOutcome.js +40 -0
  139. package/dist/src/evaluation/compareExperimentRuns.js +79 -0
  140. package/dist/src/evaluation/compareTokenSavings.js +47 -0
  141. package/dist/src/evaluation/controlledExperimentTypes.js +1 -0
  142. package/dist/src/evaluation/index.js +18 -0
  143. package/dist/src/evaluation/parseAgentAnswer.js +230 -0
  144. package/dist/src/evaluation/projectComplexity.js +126 -0
  145. package/dist/src/evaluation/projectFileTree.js +83 -0
  146. package/dist/src/evaluation/readEvaluationCases.js +59 -0
  147. package/dist/src/evaluation/renderTokenSavingsReportInput.js +55 -0
  148. package/dist/src/evaluation/runControlledExperiment.js +158 -0
  149. package/dist/src/evaluation/runMyDevKitRetrieval.js +197 -0
  150. package/dist/src/evaluation/runRawFullFileBaseline.js +31 -0
  151. package/dist/src/evaluation/scoreCorrectness.js +127 -0
  152. package/dist/src/evaluation/types.js +1 -0
  153. package/dist/src/evaluation/writeExperimentArtifacts.js +104 -0
  154. package/dist/src/evaluation/writeTokenSavingsArtifacts.js +57 -0
  155. package/dist/src/experiments/config.js +24 -0
  156. package/dist/src/experiments/defaultRegistry.js +7 -0
  157. package/dist/src/experiments/errors.js +18 -0
  158. package/dist/src/experiments/index.js +9 -0
  159. package/dist/src/experiments/outputPaths.js +25 -0
  160. package/dist/src/experiments/plugins/contextStrategyComparison/config.js +37 -0
  161. package/dist/src/experiments/plugins/contextStrategyComparison/index.js +3 -0
  162. package/dist/src/experiments/plugins/contextStrategyComparison/plugin.js +83 -0
  163. package/dist/src/experiments/plugins/contextStrategyComparison/resultMapping.js +260 -0
  164. package/dist/src/experiments/plugins/index.js +1 -0
  165. package/dist/src/experiments/registry.js +43 -0
  166. package/dist/src/experiments/results.js +48 -0
  167. package/dist/src/experiments/runner.js +181 -0
  168. package/dist/src/experiments/target.js +8 -0
  169. package/dist/src/experiments/types.js +1 -0
  170. package/dist/src/gallery/index.js +2 -0
  171. package/dist/src/gallery/types.js +1 -0
  172. package/dist/src/gallery/writeGalleryManifest.js +214 -0
  173. package/dist/src/index.js +12 -0
  174. package/dist/src/plots/buildExperimentPlotData.js +137 -0
  175. package/dist/src/plots/index.js +4 -0
  176. package/dist/src/plots/renderSvgChart.js +82 -0
  177. package/dist/src/plots/types.js +1 -0
  178. package/dist/src/plots/writePlotArtifacts.js +46 -0
  179. package/dist/src/prompts/buildPromptContext.js +68 -0
  180. package/dist/src/prompts/generateMyDevKitPrompt.js +106 -0
  181. package/dist/src/prompts/generatePromptVariants.js +36 -0
  182. package/dist/src/prompts/generateRawFullFilePrompt.js +97 -0
  183. package/dist/src/prompts/index.js +7 -0
  184. package/dist/src/prompts/measurePromptComplexity.js +41 -0
  185. package/dist/src/prompts/types.js +1 -0
  186. package/dist/src/prompts/writePromptArtifacts.js +43 -0
  187. package/dist/src/report/buildExperimentReportInput.js +339 -0
  188. package/dist/src/report/experimentReportTypes.js +1 -0
  189. package/dist/src/report/experiments/buildPluginExperimentReport.js +153 -0
  190. package/dist/src/report/experiments/experimentReportModel.js +1 -0
  191. package/dist/src/report/experiments/index.js +4 -0
  192. package/dist/src/report/experiments/renderPluginExperimentReportHtml.js +133 -0
  193. package/dist/src/report/experiments/writePluginExperimentReports.js +30 -0
  194. package/dist/src/report/index.js +8 -0
  195. package/dist/src/report/renderExperimentHtmlReport.js +354 -0
  196. package/dist/src/report/renderHtmlReport.js +103 -0
  197. package/dist/src/report/types.js +10 -0
  198. package/dist/src/report/writeExperimentReportArtifacts.js +38 -0
  199. package/dist/src/report/writeReportArtifacts.js +39 -0
  200. package/dist/src/screenshot/captureReportScreenshot.js +75 -0
  201. package/dist/src/screenshot/index.js +2 -0
  202. package/dist/src/screenshot/types.js +1 -0
  203. package/dist/src/securityValidation/artifacts.js +15 -0
  204. package/dist/src/securityValidation/cliAdversarial/adversarialCliConfig.js +38 -0
  205. package/dist/src/securityValidation/cliAdversarial/dataVolumeChecks.js +194 -0
  206. package/dist/src/securityValidation/cliAdversarial/jsonStdoutChecks.js +359 -0
  207. package/dist/src/securityValidation/cliAdversarial/malformedArtifactChecks.js +284 -0
  208. package/dist/src/securityValidation/cliAdversarial/malformedArtifactFixtures.js +79 -0
  209. package/dist/src/securityValidation/cliAdversarial/pathBoundaryChecks.js +431 -0
  210. package/dist/src/securityValidation/cliAdversarial/pathCases.js +144 -0
  211. package/dist/src/securityValidation/cliAdversarial/readOnlyBoundaryChecks.js +294 -0
  212. package/dist/src/securityValidation/cliAdversarial/runAdversarialCheck.js +149 -0
  213. package/dist/src/securityValidation/cliAdversarial/subprocessSafetyChecks.js +214 -0
  214. package/dist/src/securityValidation/cliAdversarial/tempWorkspace.js +160 -0
  215. package/dist/src/securityValidation/commandRunner.js +136 -0
  216. package/dist/src/securityValidation/config.js +39 -0
  217. package/dist/src/securityValidation/dependencies/parseNpmAudit.js +115 -0
  218. package/dist/src/securityValidation/dependencies/parseNpmLs.js +71 -0
  219. package/dist/src/securityValidation/dependencies/parseNpmOutdated.js +41 -0
  220. package/dist/src/securityValidation/dependencies/runDependencyChecks.js +239 -0
  221. package/dist/src/securityValidation/dependencies/runOsvScanner.js +43 -0
  222. package/dist/src/securityValidation/fuzz/fuzzHarness.js +61 -0
  223. package/dist/src/securityValidation/fuzz/fuzzTargets.js +204 -0
  224. package/dist/src/securityValidation/fuzz/randomInput.js +0 -0
  225. package/dist/src/securityValidation/index.js +34 -0
  226. package/dist/src/securityValidation/packageChecks/forbiddenPackageContents.js +67 -0
  227. package/dist/src/securityValidation/packageChecks/parseNpmPackDryRun.js +56 -0
  228. package/dist/src/securityValidation/packageChecks/runPackageChecks.js +88 -0
  229. package/dist/src/securityValidation/report/renderSecurityReport.js +248 -0
  230. package/dist/src/securityValidation/report/securityReportTypes.js +1 -0
  231. package/dist/src/securityValidation/staticScans/codeql.js +66 -0
  232. package/dist/src/securityValidation/staticScans/semgrep.js +180 -0
  233. package/dist/src/securityValidation/testMatrix.js +535 -0
  234. package/dist/src/securityValidation/types.js +34 -0
  235. package/dist/src/securityValidation/validate/resolveTarget.js +32 -0
  236. package/dist/src/securityValidation/validate/runSecurityValidation.js +169 -0
  237. package/dist/src/securityValidation/validate/verdict.js +73 -0
  238. package/dist/src/visualizationDemos/buildMyDevKitVisualizationCommands.js +59 -0
  239. package/dist/src/visualizationDemos/index.js +4 -0
  240. package/dist/src/visualizationDemos/runVisualizationDemos.js +82 -0
  241. package/dist/src/visualizationDemos/types.js +1 -0
  242. package/dist/src/visualizationDemos/writeVisualizationDemoArtifacts.js +25 -0
  243. package/docs/METRICS.md +286 -0
  244. package/examples/demo-report-input.json +78 -0
  245. package/examples/lab-demo-cases.json +35 -0
  246. package/examples/real-agent-campaign-cases.json +118 -0
  247. package/examples/token-savings-cases.json +122 -0
  248. package/package.json +91 -0
  249. package/tests/fixtures/fake-adversarial-cli.js +152 -0
  250. package/tests/fixtures/fake-my-dev-kit-cli.js +83 -0
@@ -0,0 +1,56 @@
1
+ #!/usr/bin/env node
2
+ import path from "node:path";
3
+ import { runPackageChecks } from "../../src/securityValidation/index.js";
4
+ import { DEFAULT_SECURITY_CONFIG } from "../../src/securityValidation/index.js";
5
+ import { resolveValidationTarget } from "../../src/securityValidation/validate/resolveTarget.js";
6
+ const rawArgs = process.argv.slice(2);
7
+ const args = parseArgs(rawArgs);
8
+ const toolRoot = process.cwd();
9
+ let targetRoot;
10
+ try {
11
+ const target = resolveValidationTarget(args.target, toolRoot);
12
+ targetRoot = target.targetRoot;
13
+ if (!target.isSelf) {
14
+ console.log(`Target: ${targetRoot}`);
15
+ }
16
+ }
17
+ catch (err) {
18
+ console.error(`ERROR: ${err instanceof Error ? err.message : String(err)}`);
19
+ process.exitCode = 1;
20
+ process.exit(1);
21
+ }
22
+ const config = {
23
+ ...DEFAULT_SECURITY_CONFIG,
24
+ reportDir: path.join(toolRoot, DEFAULT_SECURITY_CONFIG.reportDir),
25
+ rawOutputDir: path.join(toolRoot, DEFAULT_SECURITY_CONFIG.rawOutputDir),
26
+ };
27
+ console.log("Running package content checks...");
28
+ console.log(`Report directory: ${config.reportDir}`);
29
+ const output = await runPackageChecks({ cwd: targetRoot, config });
30
+ const passed = output.checks.filter((c) => c.status === "passed").length;
31
+ const failed = output.checks.filter((c) => c.status === "failed").length;
32
+ const warned = output.checks.filter((c) => c.status === "warning").length;
33
+ console.log(`\nPackage checks complete:`);
34
+ console.log(` Passed: ${passed}`);
35
+ console.log(` Warned: ${warned}`);
36
+ console.log(` Failed: ${failed}`);
37
+ console.log(` Findings: ${output.findings.length}`);
38
+ if (output.findings.length > 0) {
39
+ console.log("\nFindings:");
40
+ for (const f of output.findings) {
41
+ console.log(` [${f.severity.toUpperCase()}] ${f.title}`);
42
+ }
43
+ }
44
+ console.log(`\nResults written to ${config.reportDir}`);
45
+ const hasBlocker = output.findings.some((f) => f.severity === "blocker");
46
+ const hasMajor = output.findings.some((f) => f.severity === "major");
47
+ process.exitCode = hasBlocker || hasMajor ? 1 : 0;
48
+ function parseArgs(argv) {
49
+ const result = {};
50
+ for (let i = 0; i < argv.length; i++) {
51
+ if ((argv[i] === "--target" || argv[i] === "-t") && i + 1 < argv.length) {
52
+ result.target = argv[++i];
53
+ }
54
+ }
55
+ return result;
56
+ }
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env node
2
+ import path from "node:path";
3
+ import { runSemgrepCheck } from "../../src/securityValidation/staticScans/semgrep.js";
4
+ import { resolveValidationTarget } from "../../src/securityValidation/validate/resolveTarget.js";
5
+ const rawArgs = process.argv.slice(2);
6
+ const args = parseArgs(rawArgs);
7
+ const toolRoot = process.cwd();
8
+ let targetRoot;
9
+ try {
10
+ const target = resolveValidationTarget(args.target, toolRoot);
11
+ targetRoot = target.targetRoot;
12
+ if (!target.isSelf) {
13
+ console.log(`Target: ${targetRoot}`);
14
+ }
15
+ }
16
+ catch (err) {
17
+ console.error(`ERROR: ${err instanceof Error ? err.message : String(err)}`);
18
+ process.exitCode = 1;
19
+ process.exit(1);
20
+ }
21
+ console.log("Running Semgrep static analysis check...");
22
+ console.log(`Config: ${path.join(toolRoot, ".semgrep.yml")}`);
23
+ const result = await runSemgrepCheck({
24
+ targetRoot,
25
+ toolRoot,
26
+ configPath: path.join(toolRoot, ".semgrep.yml"),
27
+ timeoutMs: 120_000,
28
+ });
29
+ const label = result.status === "skipped"
30
+ ? `SKIPPED — ${result.skippedReason ?? "tool unavailable"}`
31
+ : result.status.toUpperCase();
32
+ console.log(`\nStatus: ${label}`);
33
+ if (result.findings.length > 0) {
34
+ console.log("\nFindings:");
35
+ for (const f of result.findings) {
36
+ console.log(` [${f.severity.toUpperCase()}] ${f.title}`);
37
+ if (f.affectedFiles && f.affectedFiles.length > 0) {
38
+ console.log(` Location: ${f.affectedFiles[0]}`);
39
+ }
40
+ if (f.description)
41
+ console.log(` ${f.description.slice(0, 120)}`);
42
+ }
43
+ }
44
+ console.log(`\nDuration: ${result.durationMs}ms`);
45
+ if (result.status === "skipped") {
46
+ console.log("\nSemgrep is optional. Absence does not block release.");
47
+ process.exitCode = 0;
48
+ }
49
+ else if (result.status === "failed") {
50
+ process.exitCode = 1;
51
+ }
52
+ else {
53
+ process.exitCode = 0;
54
+ }
55
+ function parseArgs(argv) {
56
+ const result = {};
57
+ for (let i = 0; i < argv.length; i++) {
58
+ if ((argv[i] === "--target" || argv[i] === "-t") && i + 1 < argv.length) {
59
+ result.target = argv[++i];
60
+ }
61
+ }
62
+ return result;
63
+ }
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env node
2
+ import path from "node:path";
3
+ import fs from "node:fs";
4
+ import { runSecurityValidation } from "../../src/securityValidation/validate/runSecurityValidation.js";
5
+ import { resolveValidationTarget, reportFilenamePrefix } from "../../src/securityValidation/validate/resolveTarget.js";
6
+ import { renderTextReport, renderJsonReport } from "../../src/securityValidation/report/renderSecurityReport.js";
7
+ // Parse CLI arguments from process.argv (after the node/tsx and script path).
8
+ const rawArgs = process.argv.slice(2);
9
+ const args = parseArgs(rawArgs);
10
+ const toolRoot = process.cwd();
11
+ // Resolve and validate target early so we can fail fast with a clean error.
12
+ let target;
13
+ try {
14
+ target = resolveValidationTarget(args.target, toolRoot);
15
+ }
16
+ catch (err) {
17
+ const msg = err instanceof Error ? err.message : String(err);
18
+ console.error(`\nERROR: ${msg}`);
19
+ console.error("Usage: npm run security:validate -- [--target <path>] [--out <dir>] [--report-prefix <name>]");
20
+ process.exitCode = 1;
21
+ process.exit(1);
22
+ }
23
+ console.log("=".repeat(60));
24
+ console.log("my-dev-kit-lab security:validate");
25
+ console.log("=".repeat(60));
26
+ console.log(`Tool root : ${toolRoot}`);
27
+ if (!target.isSelf) {
28
+ console.log(`Target : ${target.targetRoot}`);
29
+ if (target.packageName)
30
+ console.log(`Package : ${target.packageName}${target.packageVersion ? `@${target.packageVersion}` : ""}`);
31
+ }
32
+ else {
33
+ console.log(`Mode : self-validation`);
34
+ }
35
+ console.log("");
36
+ const summary = await runSecurityValidation({
37
+ cwd: toolRoot,
38
+ targetPath: args.target,
39
+ fuzzIterations: parseInt(process.env["FUZZ_ITERATIONS"] ?? "50", 10),
40
+ fuzzSeed: parseInt(process.env["FUZZ_SEED"] ?? "0xDEADBEEF", 16),
41
+ });
42
+ // Build report object
43
+ const report = {
44
+ metadata: {
45
+ toolRoot: summary.toolRoot,
46
+ toolPackageName: summary.toolPackageName,
47
+ toolPackageVersion: summary.toolPackageVersion,
48
+ targetRoot: summary.targetRoot,
49
+ targetDescription: summary.targetDescription,
50
+ packageName: summary.packageName,
51
+ packageVersion: summary.packageVersion,
52
+ branch: summary.auditedBranch,
53
+ commit: summary.auditedCommit,
54
+ isSelf: summary.isSelf,
55
+ generatedAt: summary.finishedAt,
56
+ totalDurationMs: new Date(summary.finishedAt).getTime() - new Date(summary.startedAt).getTime(),
57
+ },
58
+ sections: [],
59
+ allChecks: summary.checks,
60
+ allFindings: summary.findings,
61
+ verdict: summary.verdict,
62
+ recommendedNextStep: summary.recommendedNextStep,
63
+ };
64
+ const textReport = renderTextReport(report);
65
+ const jsonReport = renderJsonReport(report);
66
+ // Determine output directory
67
+ const reportsDir = args.out
68
+ ? path.resolve(args.out)
69
+ : path.join(toolRoot, "reports", "security");
70
+ if (!fs.existsSync(reportsDir)) {
71
+ fs.mkdirSync(reportsDir, { recursive: true });
72
+ }
73
+ // Determine report filename prefix
74
+ const prefix = args.reportPrefix ?? reportFilenamePrefix(target);
75
+ const txtPath = path.join(reportsDir, `${prefix}-security-validation.txt`);
76
+ const jsonPath = path.join(reportsDir, `${prefix}-security-validation.json`);
77
+ fs.writeFileSync(txtPath, textReport, "utf8");
78
+ fs.writeFileSync(jsonPath, jsonReport, "utf8");
79
+ // Print report to stdout
80
+ console.log(textReport);
81
+ console.log(`\nReports written:`);
82
+ console.log(` ${txtPath}`);
83
+ console.log(` ${jsonPath}`);
84
+ // Exit code based on verdict
85
+ const blockerExists = summary.verdict === "not-ready-security-blocker-remains";
86
+ const inconclusive = summary.verdict === "inconclusive-audit-environment-incomplete";
87
+ if (blockerExists) {
88
+ console.error("\nExit 1 — security blocker remains.");
89
+ process.exitCode = 1;
90
+ }
91
+ else if (inconclusive) {
92
+ console.warn("\nExit 2 — audit environment incomplete.");
93
+ process.exitCode = 2;
94
+ }
95
+ else {
96
+ console.log("\nExit 0 — validation completed.");
97
+ process.exitCode = 0;
98
+ }
99
+ // ---------------------------------------------------------------------------
100
+ // Argument parser
101
+ // ---------------------------------------------------------------------------
102
+ function parseArgs(argv) {
103
+ const result = {};
104
+ for (let i = 0; i < argv.length; i++) {
105
+ const arg = argv[i];
106
+ if ((arg === "--target" || arg === "-t") && i + 1 < argv.length) {
107
+ result.target = argv[++i];
108
+ }
109
+ else if (arg === "--out" && i + 1 < argv.length) {
110
+ result.out = argv[++i];
111
+ }
112
+ else if (arg === "--report-prefix" && i + 1 < argv.length) {
113
+ result.reportPrefix = argv[++i];
114
+ }
115
+ }
116
+ return result;
117
+ }
@@ -0,0 +1,202 @@
1
+ import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
2
+ import path from "node:path";
3
+ import { fileURLToPath } from "node:url";
4
+ import { REQUIRED_BENCHMARK_PROJECT_IDS, parseBenchmarkProjectProfiles, validateAnswerKey, validateBenchmarkProjectProfiles } from "../src/evaluation/benchmarkMetadata.js";
5
+ const requiredProjects = REQUIRED_BENCHMARK_PROJECT_IDS;
6
+ const projectRequiredPaths = {
7
+ "todo-ts": [
8
+ "README.md",
9
+ "package.json",
10
+ "tsconfig.json",
11
+ "src/taskStore.ts",
12
+ "src/taskService.ts",
13
+ "src/index.ts",
14
+ "tests/taskService.test.ts"
15
+ ],
16
+ "todo-python": [
17
+ "README.md",
18
+ "src/task_store.py",
19
+ "src/task_service.py",
20
+ "src/__init__.py",
21
+ "tests/test_task_service.py"
22
+ ],
23
+ "todo-js": [
24
+ "README.md",
25
+ "package.json",
26
+ "src/taskStore.js",
27
+ "src/taskService.js",
28
+ "src/index.js",
29
+ "tests/taskService.test.js"
30
+ ],
31
+ "todo-mixed-ts-py": [
32
+ "README.md",
33
+ "package.json",
34
+ "tsconfig.json",
35
+ "src/taskCli.ts",
36
+ "python/task_service.py",
37
+ "tests/mixedBoundary.test.ts"
38
+ ],
39
+ "task-workflow-medium-ts": [
40
+ "README.md",
41
+ "package.json",
42
+ "tsconfig.json",
43
+ "src/store/taskStore.ts",
44
+ "src/services/createTask.ts",
45
+ "src/services/importTasks.ts",
46
+ "src/services/summarizeTasks.ts",
47
+ "tests/importTasks.test.ts"
48
+ ],
49
+ "task-analytics-large-mixed": [
50
+ "README.md",
51
+ "ts/package.json",
52
+ "ts/tsconfig.json",
53
+ "ts/src/services/buildAnalyticsSnapshot.ts",
54
+ "ts/src/reporting/formatTaskHealthReport.ts",
55
+ "ts/tests/buildAnalyticsSnapshot.test.ts",
56
+ "py/task_analytics/metrics.py",
57
+ "py/task_analytics/quality.py",
58
+ "py/tests/test_reporting.py"
59
+ ]
60
+ };
61
+ function walk(dir) {
62
+ const entries = readdirSync(dir, { withFileTypes: true });
63
+ const files = [];
64
+ for (const entry of entries) {
65
+ const fullPath = path.join(dir, entry.name);
66
+ if (entry.isDirectory()) {
67
+ files.push(...walk(fullPath));
68
+ }
69
+ else {
70
+ files.push(fullPath);
71
+ }
72
+ }
73
+ return files;
74
+ }
75
+ export function validateBenchmarks(rootDir = process.cwd()) {
76
+ const checks = [];
77
+ const errors = [];
78
+ const contractsDir = path.join(rootDir, "benchmarks", "contracts");
79
+ const projectsDir = path.join(rootDir, "benchmarks", "projects");
80
+ const behaviorPath = path.join(contractsDir, "todo-behavior.md");
81
+ const casesPath = path.join(contractsDir, "todo-benchmark-case.json");
82
+ const profilesPath = path.join(contractsDir, "benchmark-project-profiles.json");
83
+ if (!existsSync(behaviorPath)) {
84
+ errors.push("Missing contract file: benchmarks/contracts/todo-behavior.md");
85
+ }
86
+ else {
87
+ checks.push("found todo-behavior.md");
88
+ }
89
+ let cases = [];
90
+ if (!existsSync(casesPath)) {
91
+ errors.push("Missing contract file: benchmarks/contracts/todo-benchmark-case.json");
92
+ }
93
+ else {
94
+ try {
95
+ cases = JSON.parse(readFileSync(casesPath, "utf8"));
96
+ checks.push("parsed todo-benchmark-case.json");
97
+ }
98
+ catch (error) {
99
+ errors.push(`Invalid JSON in todo-benchmark-case.json: ${error.message}`);
100
+ }
101
+ }
102
+ if (!existsSync(profilesPath)) {
103
+ errors.push("Missing contract file: benchmarks/contracts/benchmark-project-profiles.json");
104
+ }
105
+ else {
106
+ try {
107
+ const profiles = parseBenchmarkProjectProfiles(JSON.parse(readFileSync(profilesPath, "utf8")));
108
+ const profileErrors = validateBenchmarkProjectProfiles(profiles, rootDir);
109
+ errors.push(...profileErrors);
110
+ if (profileErrors.length === 0) {
111
+ checks.push("validated benchmark-project-profiles.json");
112
+ }
113
+ }
114
+ catch (error) {
115
+ errors.push(`Invalid benchmark-project-profiles.json: ${error.message}`);
116
+ }
117
+ }
118
+ const ids = new Set();
119
+ for (const benchmarkCase of cases) {
120
+ if (ids.has(benchmarkCase.id)) {
121
+ errors.push(`Duplicate benchmark case id: ${benchmarkCase.id}`);
122
+ }
123
+ ids.add(benchmarkCase.id);
124
+ }
125
+ if (cases.length > 0 && errors.every((error) => !error.startsWith("Duplicate benchmark case id:"))) {
126
+ checks.push("benchmark case ids are unique");
127
+ }
128
+ for (const project of requiredProjects) {
129
+ const projectDir = path.join(projectsDir, project);
130
+ if (!existsSync(projectDir) || !statSync(projectDir).isDirectory()) {
131
+ errors.push(`Missing benchmark project: benchmarks/projects/${project}`);
132
+ continue;
133
+ }
134
+ checks.push(`found benchmark project ${project}`);
135
+ for (const relPath of projectRequiredPaths[project]) {
136
+ const fullPath = path.join(projectDir, relPath);
137
+ if (!existsSync(fullPath)) {
138
+ errors.push(`Missing required file for ${project}: benchmarks/projects/${project}/${relPath}`);
139
+ }
140
+ }
141
+ }
142
+ for (const benchmarkCase of cases) {
143
+ if (!benchmarkCase.answerKey) {
144
+ errors.push(`Case ${benchmarkCase.id} does not define answerKey`);
145
+ }
146
+ else {
147
+ errors.push(...validateAnswerKey(benchmarkCase.answerKey, `Case ${benchmarkCase.id}`));
148
+ }
149
+ if (!Array.isArray(benchmarkCase.expectedSymbols) || benchmarkCase.expectedSymbols.length === 0) {
150
+ errors.push(`Case ${benchmarkCase.id} does not define expectedSymbols`);
151
+ }
152
+ for (const [project, expectedFiles] of Object.entries(benchmarkCase.expectedFilesByProject ?? {})) {
153
+ if (!requiredProjects.includes(project)) {
154
+ errors.push(`Case ${benchmarkCase.id} references unknown project id: ${project}`);
155
+ continue;
156
+ }
157
+ if (!Array.isArray(expectedFiles) || expectedFiles.length === 0) {
158
+ errors.push(`Case ${benchmarkCase.id} does not define expected files for ${project}`);
159
+ continue;
160
+ }
161
+ for (const expectedFile of expectedFiles) {
162
+ const fullPath = path.join(rootDir, "benchmarks", "projects", project, expectedFile);
163
+ if (!existsSync(fullPath)) {
164
+ errors.push(`Case ${benchmarkCase.id} references missing file: benchmarks/projects/${project}/${expectedFile}`);
165
+ }
166
+ }
167
+ }
168
+ }
169
+ for (const project of requiredProjects) {
170
+ const projectDir = path.join(projectsDir, project);
171
+ if (!existsSync(projectDir)) {
172
+ continue;
173
+ }
174
+ const forbidden = walk(projectDir).filter((fullPath) => {
175
+ const rel = path.relative(projectDir, fullPath).replace(/\\/g, "/");
176
+ return /(^|\/)(node_modules|dist|build|coverage|lab-output)(\/|$)/.test(rel);
177
+ });
178
+ if (forbidden.length > 0) {
179
+ errors.push(`Forbidden generated output found in ${project}: ${forbidden[0]}`);
180
+ }
181
+ }
182
+ return { ok: errors.length === 0, errors, checks };
183
+ }
184
+ function printSummary(result) {
185
+ console.log(`Benchmark verification ${result.ok ? "passed" : "failed"}.`);
186
+ console.log(`Checks: ${result.checks.length}`);
187
+ if (result.errors.length > 0) {
188
+ console.log(`Errors: ${result.errors.length}`);
189
+ for (const error of result.errors) {
190
+ console.log(`- ${error}`);
191
+ }
192
+ }
193
+ }
194
+ const currentFile = fileURLToPath(import.meta.url);
195
+ const invokedPath = process.argv[1] ? path.resolve(process.argv[1]) : "";
196
+ if (invokedPath === currentFile) {
197
+ const result = validateBenchmarks();
198
+ printSummary(result);
199
+ if (!result.ok) {
200
+ process.exitCode = 1;
201
+ }
202
+ }
@@ -0,0 +1,37 @@
1
+ import { runCliAgent } from "./codexAdapter.js";
2
+ import { applyPromptToCommandTemplate } from "../runAgentPrompt.js";
3
+ import { parseAgentTokenUsage } from "../parseAgentTokenUsage.js";
4
+ import { runMeasuredCommand } from "../../core/runMeasuredCommand.js";
5
+ export const claudeAdapter = {
6
+ id: "claude",
7
+ displayName: "Claude",
8
+ surface: "cli",
9
+ async isAvailable(request) {
10
+ if (request.commandTemplate) {
11
+ return true;
12
+ }
13
+ const check = await runMeasuredCommand({
14
+ commandId: "claude-availability",
15
+ commandString: "claude",
16
+ extraArgs: ["--version"],
17
+ cwd: request.cwd,
18
+ outDir: request.outDir,
19
+ env: request.env
20
+ });
21
+ return check.ok;
22
+ },
23
+ buildCommand(request) {
24
+ if (request.commandTemplate) {
25
+ return applyPromptToCommandTemplate(request.commandTemplate, request.promptText);
26
+ }
27
+ return { command: "claude", args: ["-p", request.promptText] };
28
+ },
29
+ async runPrompt(request) {
30
+ return runCliAgent(request, this);
31
+ },
32
+ parseTokenUsage: parseAgentTokenUsage,
33
+ parseFinalAnswer(text) {
34
+ const trimmed = text.trim();
35
+ return { finalAnswerText: trimmed, finalAnswerParseStatus: trimmed ? "parsed" : "empty" };
36
+ }
37
+ };
@@ -0,0 +1,110 @@
1
+ import { runMeasuredCommand } from "../../core/runMeasuredCommand.js";
2
+ import { applyPromptToCommandTemplate } from "../runAgentPrompt.js";
3
+ import { parseAgentTokenUsage } from "../parseAgentTokenUsage.js";
4
+ export const codexAdapter = {
5
+ id: "codex",
6
+ displayName: "Codex",
7
+ surface: "cli",
8
+ async isAvailable(request) {
9
+ if (request.commandTemplate) {
10
+ return true;
11
+ }
12
+ const check = await runMeasuredCommand({
13
+ commandId: "codex-availability",
14
+ commandString: "codex",
15
+ extraArgs: ["--version"],
16
+ cwd: request.cwd,
17
+ outDir: request.outDir,
18
+ env: request.env
19
+ });
20
+ return check.ok;
21
+ },
22
+ buildCommand(request) {
23
+ if (request.commandTemplate) {
24
+ return applyPromptToCommandTemplate(request.commandTemplate, request.promptText);
25
+ }
26
+ return { command: "codex", args: ["exec", "--json", request.promptText] };
27
+ },
28
+ async runPrompt(request) {
29
+ return runCliAgent(request, this);
30
+ },
31
+ parseTokenUsage: parseAgentTokenUsage,
32
+ parseFinalAnswer(text) {
33
+ const trimmed = text.trim();
34
+ return { finalAnswerText: trimmed, finalAnswerParseStatus: trimmed ? "parsed" : "empty" };
35
+ }
36
+ };
37
+ export async function runCliAgent(request, adapter) {
38
+ const started = Date.now();
39
+ const command = adapter.buildCommand(request);
40
+ const available = await adapter.isAvailable(request);
41
+ if (!available) {
42
+ const ended = Date.now();
43
+ const status = request.requireAvailable ? "failed" : "skipped";
44
+ const message = `${adapter.displayName} CLI was not available.`;
45
+ return {
46
+ runId: request.runId,
47
+ agentId: adapter.id,
48
+ displayName: adapter.displayName,
49
+ surface: adapter.surface,
50
+ promptVariantId: request.promptVariant.id,
51
+ promptStrategy: request.promptVariant.strategy,
52
+ promptComplexityLevel: request.promptVariant.complexityLevel,
53
+ startedAt: new Date(started).toISOString(),
54
+ endedAt: new Date(ended).toISOString(),
55
+ durationMs: ended - started,
56
+ status,
57
+ exitCode: null,
58
+ command: command.command,
59
+ args: command.args,
60
+ cwd: request.cwd,
61
+ finalAnswerText: "",
62
+ finalAnswerParseStatus: "empty",
63
+ tokenUsage: { source: "unavailable" },
64
+ tokenUsageSource: "unavailable",
65
+ tokenUsageReliability: "unavailable",
66
+ warnings: status === "skipped" ? [message] : [],
67
+ errors: status === "failed" ? [message] : []
68
+ };
69
+ }
70
+ const measured = await runMeasuredCommand({
71
+ commandId: `${adapter.id}-agent-run`,
72
+ commandString: command.command,
73
+ extraArgs: command.args,
74
+ cwd: request.commandTemplate?.cwd ?? request.cwd,
75
+ outDir: request.outDir,
76
+ env: request.env,
77
+ timeoutMs: request.timeoutMs
78
+ });
79
+ const ended = Date.now();
80
+ const combinedOutput = `${measured.stdout}\n${measured.stderr}`;
81
+ const parsedAnswer = adapter.parseFinalAnswer(measured.stdout || measured.stderr);
82
+ const parsedUsage = adapter.parseTokenUsage(combinedOutput);
83
+ return {
84
+ runId: request.runId,
85
+ agentId: adapter.id,
86
+ displayName: adapter.displayName,
87
+ surface: adapter.surface,
88
+ promptVariantId: request.promptVariant.id,
89
+ promptStrategy: request.promptVariant.strategy,
90
+ promptComplexityLevel: request.promptVariant.complexityLevel,
91
+ startedAt: new Date(started).toISOString(),
92
+ endedAt: new Date(ended).toISOString(),
93
+ durationMs: ended - started,
94
+ status: measured.ok ? "completed" : "failed",
95
+ exitCode: measured.exitCode,
96
+ command: measured.executable,
97
+ args: measured.args,
98
+ cwd: request.commandTemplate?.cwd ?? request.cwd,
99
+ stdoutPath: measured.stdoutPath,
100
+ stderrPath: measured.stderrPath,
101
+ telemetryPath: measured.telemetryPath,
102
+ finalAnswerText: parsedAnswer.finalAnswerText,
103
+ finalAnswerParseStatus: parsedAnswer.finalAnswerParseStatus,
104
+ tokenUsage: parsedUsage.tokenUsage,
105
+ tokenUsageSource: parsedUsage.tokenUsageSource,
106
+ tokenUsageReliability: parsedUsage.tokenUsageReliability,
107
+ warnings: parsedUsage.warnings,
108
+ errors: measured.ok ? [] : [measured.error ?? "Agent command failed."]
109
+ };
110
+ }
@@ -0,0 +1,101 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import path from "node:path";
3
+ export const fakeAgentAdapter = {
4
+ id: "fake-agent",
5
+ displayName: "Fake Agent",
6
+ surface: "simulated",
7
+ async isAvailable() {
8
+ return true;
9
+ },
10
+ buildCommand() {
11
+ return { command: "fake-agent", args: [] };
12
+ },
13
+ async runPrompt(request) {
14
+ const started = Date.now();
15
+ const mode = request.env?.FAKE_AGENT_MODE ?? "success";
16
+ await mkdir(request.outDir, { recursive: true });
17
+ const stdoutPath = path.join(request.outDir, "fake-agent.stdout.txt");
18
+ const stderrPath = path.join(request.outDir, "fake-agent.stderr.txt");
19
+ const telemetryPath = path.join(request.outDir, "fake-agent.telemetry.json");
20
+ const failed = mode === "failure";
21
+ const missingUsage = mode === "missing-token-usage";
22
+ const invalidOutput = mode === "invalid-output";
23
+ const finalAnswerText = invalidOutput ? "Simulated unstructured output without scoreable fields." : buildFakeAnswer(request, missingUsage);
24
+ await writeFile(stdoutPath, `${finalAnswerText}\n`, "utf8");
25
+ await writeFile(stderrPath, failed ? "Simulated fake-agent failure.\n" : "", "utf8");
26
+ const ended = Date.now();
27
+ const result = {
28
+ runId: request.runId,
29
+ agentId: "fake-agent",
30
+ displayName: "Fake Agent",
31
+ surface: "simulated",
32
+ promptVariantId: request.promptVariant.id,
33
+ promptStrategy: request.promptVariant.strategy,
34
+ promptComplexityLevel: request.promptVariant.complexityLevel,
35
+ startedAt: new Date(started).toISOString(),
36
+ endedAt: new Date(ended).toISOString(),
37
+ durationMs: ended - started,
38
+ status: failed ? "failed" : "completed",
39
+ exitCode: failed ? 1 : 0,
40
+ command: "fake-agent",
41
+ args: [],
42
+ cwd: request.cwd,
43
+ stdoutPath,
44
+ stderrPath,
45
+ telemetryPath,
46
+ finalAnswerText,
47
+ finalAnswerParseStatus: invalidOutput ? "empty" : "parsed",
48
+ tokenUsage: missingUsage
49
+ ? { source: "unavailable", rawText: finalAnswerText }
50
+ : {
51
+ inputTokens: request.promptVariant.promptMetrics.promptEstimatedTokens,
52
+ outputTokens: 128,
53
+ totalTokens: request.promptVariant.promptMetrics.promptEstimatedTokens + 128,
54
+ source: "agent-reported",
55
+ rawText: finalAnswerText
56
+ },
57
+ tokenUsageSource: missingUsage ? "unavailable" : "agent-reported",
58
+ tokenUsageReliability: missingUsage ? "unavailable" : "high",
59
+ warnings: missingUsage ? ["Token usage was intentionally omitted by fake-agent mode."] : [],
60
+ errors: failed ? ["Simulated fake-agent failure."] : []
61
+ };
62
+ await writeFile(telemetryPath, `${JSON.stringify({ commandId: "fake-agent", exitCode: result.exitCode, durationMs: result.durationMs }, null, 2)}\n`, "utf8");
63
+ return result;
64
+ },
65
+ parseTokenUsage() {
66
+ return {
67
+ tokenUsage: { source: "agent-reported" },
68
+ tokenUsageSource: "agent-reported",
69
+ tokenUsageReliability: "high",
70
+ warnings: []
71
+ };
72
+ },
73
+ parseFinalAnswer(text) {
74
+ const trimmed = text.trim();
75
+ return {
76
+ finalAnswerText: trimmed,
77
+ finalAnswerParseStatus: trimmed ? "parsed" : "empty"
78
+ };
79
+ }
80
+ };
81
+ function buildFakeAnswer(request, missingUsage) {
82
+ const facts = request.promptVariant.expectedAnswerKey.expectedFacts.slice(0, 2).map((fact) => fact.id);
83
+ const tokenLines = missingUsage
84
+ ? ""
85
+ : [
86
+ `tokenUsage: inputTokens=${request.promptVariant.promptMetrics.promptEstimatedTokens}, outputTokens=128, totalTokens=${request.promptVariant.promptMetrics.promptEstimatedTokens + 128}`,
87
+ "tokenUsageSource: agent-reported"
88
+ ].join("\n");
89
+ return [
90
+ "answer: Simulated benchmark answer from fake-agent.",
91
+ `relevantFiles: ${request.promptVariant.expectedAnswerKey.expectedFiles.join(", ")}`,
92
+ `relevantSymbols: ${request.promptVariant.expectedAnswerKey.expectedSymbols.join(", ")}`,
93
+ `expectedFactsFound: ${facts.join(", ")}`,
94
+ "confidence: high",
95
+ tokenLines,
96
+ `executionTime: simulated-${request.promptVariant.complexityLevel}`,
97
+ "notes: Deterministic fake-agent output for tests."
98
+ ]
99
+ .filter(Boolean)
100
+ .join("\n");
101
+ }