@dailephd/my-dev-kit-lab 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +272 -0
  2. package/benchmarks/contracts/benchmark-project-profiles.json +1199 -0
  3. package/benchmarks/contracts/todo-behavior.md +70 -0
  4. package/benchmarks/contracts/todo-benchmark-case.json +227 -0
  5. package/benchmarks/projects/README.md +34 -0
  6. package/benchmarks/projects/task-analytics-large-mixed/README.md +1 -0
  7. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/__init__.py +3 -0
  8. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/fixtures.py +6 -0
  9. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/metrics.py +29 -0
  10. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/models.py +21 -0
  11. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/parser.py +16 -0
  12. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/pipeline.py +9 -0
  13. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/quality.py +8 -0
  14. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/reporting.py +11 -0
  15. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_metrics.py +19 -0
  16. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_parser.py +15 -0
  17. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_quality.py +19 -0
  18. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_reporting.py +15 -0
  19. package/benchmarks/projects/task-analytics-large-mixed/ts/package.json +12 -0
  20. package/benchmarks/projects/task-analytics-large-mixed/ts/src/index.ts +11 -0
  21. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/analyticsSnapshot.ts +20 -0
  22. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/project.ts +5 -0
  23. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/task.ts +10 -0
  24. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/buildProjectLeaderboard.ts +7 -0
  25. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/formatTaskHealthReport.ts +13 -0
  26. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/buildAnalyticsSnapshot.ts +39 -0
  27. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/completeTask.ts +10 -0
  28. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/createTask.ts +21 -0
  29. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/listTasksByProject.ts +6 -0
  30. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/projectStore.ts +20 -0
  31. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/taskStore.ts +44 -0
  32. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/projectValidation.ts +12 -0
  33. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/taskValidation.ts +18 -0
  34. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/buildAnalyticsSnapshot.test.ts +48 -0
  35. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/completeTask.test.ts +21 -0
  36. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/createTask.test.ts +31 -0
  37. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/listTasksByProject.test.ts +18 -0
  38. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/reporting.test.ts +19 -0
  39. package/benchmarks/projects/task-analytics-large-mixed/ts/tsconfig.json +12 -0
  40. package/benchmarks/projects/task-analytics-large-mixed/ts/vitest.config.ts +5 -0
  41. package/benchmarks/projects/task-workflow-medium-ts/README.md +1 -0
  42. package/benchmarks/projects/task-workflow-medium-ts/package.json +12 -0
  43. package/benchmarks/projects/task-workflow-medium-ts/src/index.ts +9 -0
  44. package/benchmarks/projects/task-workflow-medium-ts/src/models/project.ts +6 -0
  45. package/benchmarks/projects/task-workflow-medium-ts/src/models/task.ts +39 -0
  46. package/benchmarks/projects/task-workflow-medium-ts/src/services/completeTask.ts +15 -0
  47. package/benchmarks/projects/task-workflow-medium-ts/src/services/createTask.ts +26 -0
  48. package/benchmarks/projects/task-workflow-medium-ts/src/services/filterTasks.ts +17 -0
  49. package/benchmarks/projects/task-workflow-medium-ts/src/services/importTasks.ts +33 -0
  50. package/benchmarks/projects/task-workflow-medium-ts/src/services/summarizeTasks.ts +30 -0
  51. package/benchmarks/projects/task-workflow-medium-ts/src/store/taskStore.ts +76 -0
  52. package/benchmarks/projects/task-workflow-medium-ts/src/utils/deterministicId.ts +3 -0
  53. package/benchmarks/projects/task-workflow-medium-ts/src/validation/taskValidation.ts +45 -0
  54. package/benchmarks/projects/task-workflow-medium-ts/tests/completeTask.test.ts +16 -0
  55. package/benchmarks/projects/task-workflow-medium-ts/tests/createTask.test.ts +21 -0
  56. package/benchmarks/projects/task-workflow-medium-ts/tests/filterTasks.test.ts +18 -0
  57. package/benchmarks/projects/task-workflow-medium-ts/tests/importTasks.test.ts +22 -0
  58. package/benchmarks/projects/task-workflow-medium-ts/tests/summarizeTasks.test.ts +29 -0
  59. package/benchmarks/projects/task-workflow-medium-ts/tsconfig.json +12 -0
  60. package/benchmarks/projects/task-workflow-medium-ts/vitest.config.ts +5 -0
  61. package/benchmarks/projects/todo-js/README.md +3 -0
  62. package/benchmarks/projects/todo-js/package.json +11 -0
  63. package/benchmarks/projects/todo-js/src/index.js +2 -0
  64. package/benchmarks/projects/todo-js/src/taskService.js +37 -0
  65. package/benchmarks/projects/todo-js/src/taskStore.js +28 -0
  66. package/benchmarks/projects/todo-js/tests/taskService.test.js +45 -0
  67. package/benchmarks/projects/todo-js/vitest.config.js +5 -0
  68. package/benchmarks/projects/todo-mixed-ts-py/README.md +3 -0
  69. package/benchmarks/projects/todo-mixed-ts-py/package.json +13 -0
  70. package/benchmarks/projects/todo-mixed-ts-py/python/task_service.py +76 -0
  71. package/benchmarks/projects/todo-mixed-ts-py/src/taskCli.ts +38 -0
  72. package/benchmarks/projects/todo-mixed-ts-py/tests/mixedBoundary.test.ts +18 -0
  73. package/benchmarks/projects/todo-mixed-ts-py/tsconfig.json +12 -0
  74. package/benchmarks/projects/todo-mixed-ts-py/vitest.config.ts +5 -0
  75. package/benchmarks/projects/todo-python/README.md +3 -0
  76. package/benchmarks/projects/todo-python/src/__init__.py +4 -0
  77. package/benchmarks/projects/todo-python/src/task_service.py +32 -0
  78. package/benchmarks/projects/todo-python/src/task_store.py +28 -0
  79. package/benchmarks/projects/todo-python/tests/test_task_service.py +52 -0
  80. package/benchmarks/projects/todo-ts/README.md +3 -0
  81. package/benchmarks/projects/todo-ts/package.json +12 -0
  82. package/benchmarks/projects/todo-ts/src/index.ts +2 -0
  83. package/benchmarks/projects/todo-ts/src/taskService.ts +41 -0
  84. package/benchmarks/projects/todo-ts/src/taskStore.ts +34 -0
  85. package/benchmarks/projects/todo-ts/tests/taskService.test.ts +45 -0
  86. package/benchmarks/projects/todo-ts/tsconfig.json +12 -0
  87. package/benchmarks/projects/todo-ts/vitest.config.ts +5 -0
  88. package/dist/scripts/build-gallery.js +3 -0
  89. package/dist/scripts/capture-demo-report.js +3 -0
  90. package/dist/scripts/evaluate-token-savings.js +2 -0
  91. package/dist/scripts/experiments/describeExperiment.js +143 -0
  92. package/dist/scripts/experiments/listExperiments.js +44 -0
  93. package/dist/scripts/experiments/runExperiment.js +199 -0
  94. package/dist/scripts/generate-experiment-plots.js +3 -0
  95. package/dist/scripts/generate-prompt-variants.js +2 -0
  96. package/dist/scripts/render-experiment-report.js +2 -0
  97. package/dist/scripts/run-agent-prompt.js +2 -0
  98. package/dist/scripts/run-controlled-experiment.js +2 -0
  99. package/dist/scripts/run-final-demo.js +3 -0
  100. package/dist/scripts/run-lab-demo.js +5 -0
  101. package/dist/scripts/run-visualization-demos.js +3 -0
  102. package/dist/scripts/security/runCodeql.js +57 -0
  103. package/dist/scripts/security/runDependencyChecks.js +57 -0
  104. package/dist/scripts/security/runFuzzSmoke.js +29 -0
  105. package/dist/scripts/security/runPackageChecks.js +56 -0
  106. package/dist/scripts/security/runSemgrep.js +63 -0
  107. package/dist/scripts/security/validate.js +117 -0
  108. package/dist/scripts/verify-benchmarks.js +202 -0
  109. package/dist/src/agents/adapters/claudeAdapter.js +37 -0
  110. package/dist/src/agents/adapters/codexAdapter.js +110 -0
  111. package/dist/src/agents/adapters/fakeAgentAdapter.js +101 -0
  112. package/dist/src/agents/agentRegistry.js +21 -0
  113. package/dist/src/agents/index.js +7 -0
  114. package/dist/src/agents/parseAgentTokenUsage.js +137 -0
  115. package/dist/src/agents/runAgentPrompt.js +38 -0
  116. package/dist/src/agents/types.js +1 -0
  117. package/dist/src/commands/buildGalleryCommand.js +56 -0
  118. package/dist/src/commands/captureDemoReport.js +116 -0
  119. package/dist/src/commands/evaluateTokenSavings.js +175 -0
  120. package/dist/src/commands/generateExperimentPlotsCommand.js +38 -0
  121. package/dist/src/commands/generatePromptVariants.js +67 -0
  122. package/dist/src/commands/renderExperimentReportCommand.js +131 -0
  123. package/dist/src/commands/runAgentPromptCommand.js +132 -0
  124. package/dist/src/commands/runControlledExperimentCommand.js +174 -0
  125. package/dist/src/commands/runFinalDemoCommand.js +123 -0
  126. package/dist/src/commands/runLabDemo.js +62 -0
  127. package/dist/src/commands/runVisualizationDemosCommand.js +67 -0
  128. package/dist/src/core/commandLine.js +59 -0
  129. package/dist/src/core/countTokens.js +8 -0
  130. package/dist/src/core/fileGlobs.js +100 -0
  131. package/dist/src/core/localProjectTarget.js +75 -0
  132. package/dist/src/core/pathSafety.js +19 -0
  133. package/dist/src/core/pythonCommand.js +30 -0
  134. package/dist/src/core/resolveCommand.js +110 -0
  135. package/dist/src/core/runMeasuredCommand.js +143 -0
  136. package/dist/src/evaluation/benchmarkMetadata.js +207 -0
  137. package/dist/src/evaluation/buildExperimentMatrix.js +75 -0
  138. package/dist/src/evaluation/classifyAgentRunOutcome.js +40 -0
  139. package/dist/src/evaluation/compareExperimentRuns.js +79 -0
  140. package/dist/src/evaluation/compareTokenSavings.js +47 -0
  141. package/dist/src/evaluation/controlledExperimentTypes.js +1 -0
  142. package/dist/src/evaluation/index.js +18 -0
  143. package/dist/src/evaluation/parseAgentAnswer.js +230 -0
  144. package/dist/src/evaluation/projectComplexity.js +126 -0
  145. package/dist/src/evaluation/projectFileTree.js +83 -0
  146. package/dist/src/evaluation/readEvaluationCases.js +59 -0
  147. package/dist/src/evaluation/renderTokenSavingsReportInput.js +55 -0
  148. package/dist/src/evaluation/runControlledExperiment.js +158 -0
  149. package/dist/src/evaluation/runMyDevKitRetrieval.js +197 -0
  150. package/dist/src/evaluation/runRawFullFileBaseline.js +31 -0
  151. package/dist/src/evaluation/scoreCorrectness.js +127 -0
  152. package/dist/src/evaluation/types.js +1 -0
  153. package/dist/src/evaluation/writeExperimentArtifacts.js +104 -0
  154. package/dist/src/evaluation/writeTokenSavingsArtifacts.js +57 -0
  155. package/dist/src/experiments/config.js +24 -0
  156. package/dist/src/experiments/defaultRegistry.js +7 -0
  157. package/dist/src/experiments/errors.js +18 -0
  158. package/dist/src/experiments/index.js +9 -0
  159. package/dist/src/experiments/outputPaths.js +25 -0
  160. package/dist/src/experiments/plugins/contextStrategyComparison/config.js +37 -0
  161. package/dist/src/experiments/plugins/contextStrategyComparison/index.js +3 -0
  162. package/dist/src/experiments/plugins/contextStrategyComparison/plugin.js +83 -0
  163. package/dist/src/experiments/plugins/contextStrategyComparison/resultMapping.js +260 -0
  164. package/dist/src/experiments/plugins/index.js +1 -0
  165. package/dist/src/experiments/registry.js +43 -0
  166. package/dist/src/experiments/results.js +48 -0
  167. package/dist/src/experiments/runner.js +181 -0
  168. package/dist/src/experiments/target.js +8 -0
  169. package/dist/src/experiments/types.js +1 -0
  170. package/dist/src/gallery/index.js +2 -0
  171. package/dist/src/gallery/types.js +1 -0
  172. package/dist/src/gallery/writeGalleryManifest.js +214 -0
  173. package/dist/src/index.js +12 -0
  174. package/dist/src/plots/buildExperimentPlotData.js +137 -0
  175. package/dist/src/plots/index.js +4 -0
  176. package/dist/src/plots/renderSvgChart.js +82 -0
  177. package/dist/src/plots/types.js +1 -0
  178. package/dist/src/plots/writePlotArtifacts.js +46 -0
  179. package/dist/src/prompts/buildPromptContext.js +68 -0
  180. package/dist/src/prompts/generateMyDevKitPrompt.js +106 -0
  181. package/dist/src/prompts/generatePromptVariants.js +36 -0
  182. package/dist/src/prompts/generateRawFullFilePrompt.js +97 -0
  183. package/dist/src/prompts/index.js +7 -0
  184. package/dist/src/prompts/measurePromptComplexity.js +41 -0
  185. package/dist/src/prompts/types.js +1 -0
  186. package/dist/src/prompts/writePromptArtifacts.js +43 -0
  187. package/dist/src/report/buildExperimentReportInput.js +339 -0
  188. package/dist/src/report/experimentReportTypes.js +1 -0
  189. package/dist/src/report/experiments/buildPluginExperimentReport.js +153 -0
  190. package/dist/src/report/experiments/experimentReportModel.js +1 -0
  191. package/dist/src/report/experiments/index.js +4 -0
  192. package/dist/src/report/experiments/renderPluginExperimentReportHtml.js +133 -0
  193. package/dist/src/report/experiments/writePluginExperimentReports.js +30 -0
  194. package/dist/src/report/index.js +8 -0
  195. package/dist/src/report/renderExperimentHtmlReport.js +354 -0
  196. package/dist/src/report/renderHtmlReport.js +103 -0
  197. package/dist/src/report/types.js +10 -0
  198. package/dist/src/report/writeExperimentReportArtifacts.js +38 -0
  199. package/dist/src/report/writeReportArtifacts.js +39 -0
  200. package/dist/src/screenshot/captureReportScreenshot.js +75 -0
  201. package/dist/src/screenshot/index.js +2 -0
  202. package/dist/src/screenshot/types.js +1 -0
  203. package/dist/src/securityValidation/artifacts.js +15 -0
  204. package/dist/src/securityValidation/cliAdversarial/adversarialCliConfig.js +38 -0
  205. package/dist/src/securityValidation/cliAdversarial/dataVolumeChecks.js +194 -0
  206. package/dist/src/securityValidation/cliAdversarial/jsonStdoutChecks.js +359 -0
  207. package/dist/src/securityValidation/cliAdversarial/malformedArtifactChecks.js +284 -0
  208. package/dist/src/securityValidation/cliAdversarial/malformedArtifactFixtures.js +79 -0
  209. package/dist/src/securityValidation/cliAdversarial/pathBoundaryChecks.js +431 -0
  210. package/dist/src/securityValidation/cliAdversarial/pathCases.js +144 -0
  211. package/dist/src/securityValidation/cliAdversarial/readOnlyBoundaryChecks.js +294 -0
  212. package/dist/src/securityValidation/cliAdversarial/runAdversarialCheck.js +149 -0
  213. package/dist/src/securityValidation/cliAdversarial/subprocessSafetyChecks.js +214 -0
  214. package/dist/src/securityValidation/cliAdversarial/tempWorkspace.js +160 -0
  215. package/dist/src/securityValidation/commandRunner.js +136 -0
  216. package/dist/src/securityValidation/config.js +39 -0
  217. package/dist/src/securityValidation/dependencies/parseNpmAudit.js +115 -0
  218. package/dist/src/securityValidation/dependencies/parseNpmLs.js +71 -0
  219. package/dist/src/securityValidation/dependencies/parseNpmOutdated.js +41 -0
  220. package/dist/src/securityValidation/dependencies/runDependencyChecks.js +239 -0
  221. package/dist/src/securityValidation/dependencies/runOsvScanner.js +43 -0
  222. package/dist/src/securityValidation/fuzz/fuzzHarness.js +61 -0
  223. package/dist/src/securityValidation/fuzz/fuzzTargets.js +204 -0
  224. package/dist/src/securityValidation/fuzz/randomInput.js +0 -0
  225. package/dist/src/securityValidation/index.js +34 -0
  226. package/dist/src/securityValidation/packageChecks/forbiddenPackageContents.js +67 -0
  227. package/dist/src/securityValidation/packageChecks/parseNpmPackDryRun.js +56 -0
  228. package/dist/src/securityValidation/packageChecks/runPackageChecks.js +88 -0
  229. package/dist/src/securityValidation/report/renderSecurityReport.js +248 -0
  230. package/dist/src/securityValidation/report/securityReportTypes.js +1 -0
  231. package/dist/src/securityValidation/staticScans/codeql.js +66 -0
  232. package/dist/src/securityValidation/staticScans/semgrep.js +180 -0
  233. package/dist/src/securityValidation/testMatrix.js +535 -0
  234. package/dist/src/securityValidation/types.js +34 -0
  235. package/dist/src/securityValidation/validate/resolveTarget.js +32 -0
  236. package/dist/src/securityValidation/validate/runSecurityValidation.js +169 -0
  237. package/dist/src/securityValidation/validate/verdict.js +73 -0
  238. package/dist/src/visualizationDemos/buildMyDevKitVisualizationCommands.js +59 -0
  239. package/dist/src/visualizationDemos/index.js +4 -0
  240. package/dist/src/visualizationDemos/runVisualizationDemos.js +82 -0
  241. package/dist/src/visualizationDemos/types.js +1 -0
  242. package/dist/src/visualizationDemos/writeVisualizationDemoArtifacts.js +25 -0
  243. package/docs/METRICS.md +286 -0
  244. package/examples/demo-report-input.json +78 -0
  245. package/examples/lab-demo-cases.json +35 -0
  246. package/examples/real-agent-campaign-cases.json +118 -0
  247. package/examples/token-savings-cases.json +122 -0
  248. package/package.json +91 -0
  249. package/tests/fixtures/fake-adversarial-cli.js +152 -0
  250. package/tests/fixtures/fake-my-dev-kit-cli.js +83 -0
@@ -0,0 +1,46 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { resolveWithinRoot } from "../core/pathSafety.js";
4
+ import { buildExperimentPlotData } from "./buildExperimentPlotData.js";
5
+ import { renderSvgChart } from "./renderSvgChart.js";
6
+ const chartFiles = {
7
+ "token-savings-vs-prompt-length": "token-savings-vs-prompt-length.svg",
8
+ "time-reduction-vs-prompt-length": "time-reduction-vs-prompt-length.svg",
9
+ "token-savings-vs-project-complexity": "token-savings-vs-project-complexity.svg",
10
+ "time-reduction-vs-project-complexity": "time-reduction-vs-project-complexity.svg",
11
+ "correctness-by-strategy": "correctness-by-strategy.svg",
12
+ "run-outcomes-by-agent": "run-outcomes-by-agent.svg"
13
+ };
14
+ export async function writePlotArtifacts(options) {
15
+ const data = await buildExperimentPlotData({ experimentDir: options.experimentDir, repoRoot: options.repoRoot });
16
+ return writePlotArtifactsFromData({ data, outDir: options.outDir });
17
+ }
18
+ export async function writePlotArtifactsFromData(options) {
19
+ const outDir = path.resolve(options.outDir);
20
+ const chartsDir = resolveWithinRoot(outDir, "charts");
21
+ await mkdir(chartsDir, { recursive: true });
22
+ const summaryPath = resolveWithinRoot(outDir, "plots-summary.json");
23
+ const dataPath = resolveWithinRoot(outDir, "plot-data.json");
24
+ const charts = {};
25
+ for (const plot of options.data.plots) {
26
+ const fileName = chartFiles[plot.id] ?? `${plot.id}.svg`;
27
+ const chartPath = resolveWithinRoot(chartsDir, fileName);
28
+ await writeFile(chartPath, renderSvgChart(plot), "utf8");
29
+ charts[plot.id] = chartPath;
30
+ }
31
+ const summary = {
32
+ generatedAt: options.data.generatedAt,
33
+ sourceExperimentDir: options.data.sourceExperimentDir,
34
+ chartCount: Object.keys(charts).length,
35
+ skippedPointCount: options.data.skippedPoints.length,
36
+ warnings: options.data.warnings
37
+ };
38
+ await mkdir(outDir, { recursive: true });
39
+ await writeFile(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8");
40
+ await writeFile(dataPath, `${JSON.stringify(options.data, null, 2)}\n`, "utf8");
41
+ return {
42
+ summary,
43
+ data: options.data,
44
+ artifactPaths: { summaryPath, dataPath, chartsDir, charts }
45
+ };
46
+ }
@@ -0,0 +1,68 @@
1
+ export function buildPromptGenerationContext(args) {
2
+ const profileId = args.evaluationCase.projectProfileRef ?? args.evaluationCase.benchmarkProject;
3
+ const projectProfile = args.projectProfiles.find((profile) => profile.projectId === profileId);
4
+ if (!projectProfile) {
5
+ throw new Error(`No benchmark project profile found for ${profileId}.`);
6
+ }
7
+ const answerKey = resolveAnswerKey(args.evaluationCase);
8
+ return {
9
+ evaluationCase: args.evaluationCase,
10
+ projectProfile,
11
+ answerKey,
12
+ fileTree: projectProfile.fileTree,
13
+ complexityLevel: args.complexityLevel,
14
+ strategy: args.strategy
15
+ };
16
+ }
17
+ function resolveAnswerKey(evaluationCase) {
18
+ if (evaluationCase.answerKey) {
19
+ return evaluationCase.answerKey;
20
+ }
21
+ return {
22
+ expectedFiles: evaluationCase.expectedFiles,
23
+ expectedSymbols: evaluationCase.expectedSymbols,
24
+ expectedFacts: evaluationCase.expectedFacts ?? [],
25
+ minimumCorrectFacts: 0,
26
+ notes: "Fallback answer key built from legacy evaluation case fields."
27
+ };
28
+ }
29
+ export function formatCompactFileTree(context) {
30
+ const maxEntries = context.complexityLevel === "short" ? 6 : context.complexityLevel === "medium" ? 12 : 24;
31
+ const entries = context.fileTree.entries
32
+ .filter((entry) => entry.kind === "file")
33
+ .slice(0, maxEntries)
34
+ .map((entry) => {
35
+ const details = [entry.role, entry.language, typeof entry.lines === "number" ? `${entry.lines} lines` : undefined].filter(Boolean).join(", ");
36
+ return `- ${entry.path}${details ? ` (${details})` : ""}`;
37
+ });
38
+ if (context.fileTree.entries.filter((entry) => entry.kind === "file").length > maxEntries) {
39
+ entries.push(`- ... ${context.fileTree.entries.filter((entry) => entry.kind === "file").length - maxEntries} more files omitted from preview`);
40
+ }
41
+ return entries.join("\n");
42
+ }
43
+ export function formatAnswerKeySummary(context) {
44
+ const facts = context.complexityLevel === "long" || context.complexityLevel === "multi-step"
45
+ ? context.answerKey.expectedFacts.map((fact) => `- ${fact.id}: ${fact.required ? "required" : "optional"}, weight ${fact.weight}`).join("\n")
46
+ : `Expected fact count: ${context.answerKey.expectedFacts.length}`;
47
+ return [
48
+ `Expected file count: ${context.answerKey.expectedFiles.length}`,
49
+ `Expected symbol count: ${context.answerKey.expectedSymbols.length}`,
50
+ `Minimum correct facts later required: ${context.answerKey.minimumCorrectFacts}`,
51
+ facts
52
+ ].join("\n");
53
+ }
54
+ export function formatExpectedOutputFields(strategy) {
55
+ const baseFields = [
56
+ "answer",
57
+ "relevantFiles",
58
+ "relevantSymbols",
59
+ "expectedFactsFound",
60
+ "confidence",
61
+ "tokenUsage, if available",
62
+ "tokenUsageSource: provider-reported, agent-reported, estimated, or unavailable",
63
+ "executionTime, if available",
64
+ "notes"
65
+ ];
66
+ const guidedFields = ["commandsRun", "selectedContext", "fullFileReads, if any", "fullFileReadJustifications, if any"];
67
+ return [...baseFields, ...(strategy === "my-dev-kit-guided" ? guidedFields : [])].map((field) => `- ${field}`).join("\n");
68
+ }
@@ -0,0 +1,106 @@
1
+ import { tokenCountMethod } from "../core/countTokens.js";
2
+ import { formatAnswerKeySummary, formatCompactFileTree, formatExpectedOutputFields } from "./buildPromptContext.js";
3
+ import { measurePromptComplexity } from "./measurePromptComplexity.js";
4
+ export function generateMyDevKitPrompt(context) {
5
+ const promptText = buildMyDevKitPromptText(context);
6
+ return {
7
+ id: `${context.evaluationCase.id}.my-dev-kit-guided.${context.complexityLevel}`,
8
+ caseId: context.evaluationCase.id,
9
+ benchmarkProject: context.evaluationCase.benchmarkProject,
10
+ strategy: "my-dev-kit-guided",
11
+ complexityLevel: context.complexityLevel,
12
+ title: `${context.evaluationCase.title} - my-dev-kit guided - ${context.complexityLevel}`,
13
+ promptText,
14
+ promptMetrics: measurePromptComplexity(promptText, context),
15
+ expectedAnswerKey: context.answerKey,
16
+ projectProfile: context.projectProfile,
17
+ createdFrom: {
18
+ evaluationCaseId: context.evaluationCase.id,
19
+ projectProfileId: context.projectProfile.projectId,
20
+ tokenCountMethod
21
+ },
22
+ warnings: []
23
+ };
24
+ }
25
+ function buildMyDevKitPromptText(context) {
26
+ const base = [
27
+ "# my-dev-kit-Guided Benchmark Prompt",
28
+ "",
29
+ `Project: ${context.projectProfile.displayName}`,
30
+ `Project ID: ${context.projectProfile.projectId}`,
31
+ `Complexity: ${context.projectProfile.complexityLevel}, score ${context.projectProfile.complexityScore}`,
32
+ `Task: ${context.evaluationCase.title}`,
33
+ `Query: ${context.evaluationCase.query}`,
34
+ "",
35
+ "Instruction: gather targeted context using my-dev-kit.",
36
+ "Do not read full files by default. Read whole files only if targeted retrieval is insufficient, and explain why.",
37
+ "Use my-dev-kit index before retrieval.",
38
+ "Use my-dev-kit search to find candidate files or symbols.",
39
+ "Use my-dev-kit lookup to inspect selected nodes.",
40
+ "Use my-dev-kit slice where useful for nearby context.",
41
+ "Use my-dev-kit source for selected symbols or line ranges.",
42
+ "Report commandsRun, selected files, selected symbols, and selectedContext.",
43
+ "Return tokenUsage if the platform exposes it, and state tokenUsageSource as provider-reported, agent-reported, estimated, or unavailable.",
44
+ "Return executionTime if available.",
45
+ "",
46
+ "Expected Output Fields:",
47
+ formatExpectedOutputFields("my-dev-kit-guided")
48
+ ];
49
+ if (context.complexityLevel === "short") {
50
+ return [
51
+ ...base,
52
+ "",
53
+ `Expected file count: ${context.answerKey.expectedFiles.length}`,
54
+ `Expected symbol count: ${context.answerKey.expectedSymbols.length}`,
55
+ "Answer with concise reasoning."
56
+ ].join("\n");
57
+ }
58
+ const medium = [
59
+ ...base,
60
+ "",
61
+ `Description: ${context.projectProfile.description}`,
62
+ "",
63
+ "File Tree:",
64
+ formatCompactFileTree(context),
65
+ "",
66
+ "Answer with concise reasoning and cite the my-dev-kit commands that supplied context."
67
+ ];
68
+ if (context.complexityLevel === "medium") {
69
+ return medium.join("\n");
70
+ }
71
+ const long = [
72
+ ...medium,
73
+ "",
74
+ "Answer Key Summary:",
75
+ formatAnswerKeySummary(context),
76
+ "",
77
+ "Constraints:",
78
+ "- Do not create a full-file reading workflow by default.",
79
+ "- Do not invent files, symbols, commands, or behavior.",
80
+ "- Prefer search, lookup, slice, and source over broad reads.",
81
+ "- Include confidence as high, medium, or low.",
82
+ "- State whether token usage is provider-reported, agent-reported, estimated, or unavailable."
83
+ ];
84
+ if (context.complexityLevel === "long") {
85
+ return long.join("\n");
86
+ }
87
+ return [
88
+ ...long,
89
+ "",
90
+ "Workflow Steps:",
91
+ "1. Run my-dev-kit index for the target project.",
92
+ "2. Run my-dev-kit search with the benchmark query.",
93
+ "3. Run my-dev-kit lookup for promising files or symbols.",
94
+ "4. Run my-dev-kit slice where local context is needed.",
95
+ "5. Run my-dev-kit source for selected symbols or line ranges.",
96
+ "6. Avoid full-file reads unless targeted context is insufficient.",
97
+ "7. Answer the task and report selected files, selected symbols, commandsRun, and any fullFileReads.",
98
+ "",
99
+ "Validation Checklist:",
100
+ "- commandsRun includes index, search, lookup, and any slice/source commands used",
101
+ "- selectedContext explains why each file or symbol was chosen",
102
+ "- fullFileReads is empty unless justified",
103
+ "- expectedFactsFound uses fact IDs when known",
104
+ "- notes include uncertainty or missing telemetry"
105
+ ].join("\n");
106
+ }
@@ -0,0 +1,36 @@
1
+ import { buildPromptGenerationContext } from "./buildPromptContext.js";
2
+ import { generateMyDevKitPrompt } from "./generateMyDevKitPrompt.js";
3
+ import { generateRawFullFilePrompt } from "./generateRawFullFilePrompt.js";
4
+ export const ALL_PROMPT_STRATEGIES = ["raw-full-file", "my-dev-kit-guided"];
5
+ export const ALL_PROMPT_COMPLEXITY_LEVELS = ["short", "medium", "long", "multi-step"];
6
+ export function generatePromptVariants(args) {
7
+ const strategies = args.strategies ?? ALL_PROMPT_STRATEGIES;
8
+ const complexityLevels = args.complexityLevels ?? ALL_PROMPT_COMPLEXITY_LEVELS;
9
+ const variants = [];
10
+ for (const evaluationCase of args.cases) {
11
+ for (const strategy of strategies) {
12
+ for (const complexityLevel of complexityLevels) {
13
+ const context = buildPromptGenerationContext({
14
+ evaluationCase,
15
+ projectProfiles: args.projectProfiles,
16
+ strategy,
17
+ complexityLevel
18
+ });
19
+ variants.push(strategy === "raw-full-file" ? generateRawFullFilePrompt(context) : generateMyDevKitPrompt(context));
20
+ }
21
+ }
22
+ }
23
+ return variants;
24
+ }
25
+ export function parsePromptStrategy(value) {
26
+ if (value === "raw-full-file" || value === "my-dev-kit-guided") {
27
+ return value;
28
+ }
29
+ throw new Error(`Invalid prompt strategy: ${value}`);
30
+ }
31
+ export function parsePromptComplexityLevel(value) {
32
+ if (value === "short" || value === "medium" || value === "long" || value === "multi-step") {
33
+ return value;
34
+ }
35
+ throw new Error(`Invalid prompt complexity level: ${value}`);
36
+ }
@@ -0,0 +1,97 @@
1
+ import { tokenCountMethod } from "../core/countTokens.js";
2
+ import { formatAnswerKeySummary, formatCompactFileTree, formatExpectedOutputFields } from "./buildPromptContext.js";
3
+ import { measurePromptComplexity } from "./measurePromptComplexity.js";
4
+ export function generateRawFullFilePrompt(context) {
5
+ const promptText = buildRawPromptText(context);
6
+ return {
7
+ id: `${context.evaluationCase.id}.raw-full-file.${context.complexityLevel}`,
8
+ caseId: context.evaluationCase.id,
9
+ benchmarkProject: context.evaluationCase.benchmarkProject,
10
+ strategy: "raw-full-file",
11
+ complexityLevel: context.complexityLevel,
12
+ title: `${context.evaluationCase.title} - raw full-file - ${context.complexityLevel}`,
13
+ promptText,
14
+ promptMetrics: measurePromptComplexity(promptText, context),
15
+ expectedAnswerKey: context.answerKey,
16
+ projectProfile: context.projectProfile,
17
+ createdFrom: {
18
+ evaluationCaseId: context.evaluationCase.id,
19
+ projectProfileId: context.projectProfile.projectId,
20
+ tokenCountMethod
21
+ },
22
+ warnings: []
23
+ };
24
+ }
25
+ function buildRawPromptText(context) {
26
+ const base = [
27
+ "# Raw Full-File Benchmark Prompt",
28
+ "",
29
+ `Project: ${context.projectProfile.displayName}`,
30
+ `Project ID: ${context.projectProfile.projectId}`,
31
+ `Complexity: ${context.projectProfile.complexityLevel}, score ${context.projectProfile.complexityScore}`,
32
+ `Task: ${context.evaluationCase.title}`,
33
+ `Query: ${context.evaluationCase.query}`,
34
+ "",
35
+ "Instruction: use the full source files supplied separately by the runner as the primary context.",
36
+ "Do not use my-dev-kit as the required retrieval method for this strategy.",
37
+ "Identify the relevant files and relevant symbols before giving the answer.",
38
+ "Return tokenUsage if the platform exposes it, and state tokenUsageSource as provider-reported, agent-reported, estimated, or unavailable.",
39
+ "Return executionTime if available.",
40
+ "",
41
+ "Expected Output Fields:",
42
+ formatExpectedOutputFields("raw-full-file")
43
+ ];
44
+ if (context.complexityLevel === "short") {
45
+ return [
46
+ ...base,
47
+ "",
48
+ `Expected file count: ${context.answerKey.expectedFiles.length}`,
49
+ `Expected symbol count: ${context.answerKey.expectedSymbols.length}`,
50
+ "Answer with concise reasoning."
51
+ ].join("\n");
52
+ }
53
+ const medium = [
54
+ ...base,
55
+ "",
56
+ `Description: ${context.projectProfile.description}`,
57
+ "",
58
+ "File Tree:",
59
+ formatCompactFileTree(context),
60
+ "",
61
+ "Answer with concise reasoning and cite the files/symbols used."
62
+ ];
63
+ if (context.complexityLevel === "medium") {
64
+ return medium.join("\n");
65
+ }
66
+ const long = [
67
+ ...medium,
68
+ "",
69
+ "Answer Key Summary:",
70
+ formatAnswerKeySummary(context),
71
+ "",
72
+ "Constraints:",
73
+ "- Do not invent files, symbols, or behavior.",
74
+ "- Keep the answer tied to supplied source context.",
75
+ "- Include confidence as high, medium, or low.",
76
+ "- State whether token usage is provider-reported, agent-reported, estimated, or unavailable."
77
+ ];
78
+ if (context.complexityLevel === "long") {
79
+ return long.join("\n");
80
+ }
81
+ return [
82
+ ...long,
83
+ "",
84
+ "Workflow Steps:",
85
+ "1. Review the provided full-file context.",
86
+ "2. Identify candidate files and symbols.",
87
+ "3. Match the behavior against the benchmark task.",
88
+ "4. List expected facts found and any missing uncertainty.",
89
+ "5. Return the structured response fields.",
90
+ "",
91
+ "Validation Checklist:",
92
+ "- relevantFiles are project-relative paths",
93
+ "- relevantSymbols are concrete functions, classes, exports, or methods",
94
+ "- expectedFactsFound uses fact IDs when known",
95
+ "- notes include any uncertainty or missing telemetry"
96
+ ].join("\n");
97
+ }
@@ -0,0 +1,7 @@
1
+ export * from "./types.js";
2
+ export * from "./buildPromptContext.js";
3
+ export * from "./generateRawFullFilePrompt.js";
4
+ export * from "./generateMyDevKitPrompt.js";
5
+ export * from "./measurePromptComplexity.js";
6
+ export * from "./generatePromptVariants.js";
7
+ export * from "./writePromptArtifacts.js";
@@ -0,0 +1,41 @@
1
+ import { countEstimatedTokens, tokenCountMethod } from "../core/countTokens.js";
2
+ const OUTPUT_FIELD_NAMES = [
3
+ "answer",
4
+ "relevantFiles",
5
+ "relevantSymbols",
6
+ "expectedFactsFound",
7
+ "confidence",
8
+ "tokenUsage",
9
+ "tokenUsageSource",
10
+ "executionTime",
11
+ "notes",
12
+ "commandsRun",
13
+ "selectedContext",
14
+ "fullFileReads",
15
+ "fullFileReadJustifications"
16
+ ];
17
+ export function measurePromptComplexity(promptText, context) {
18
+ return {
19
+ promptChars: promptText.length,
20
+ promptEstimatedTokens: countEstimatedTokens(promptText),
21
+ tokenCountMethod,
22
+ instructionCount: countMatches(promptText, /\b(instruction|use|return|identify|report|answer|state|include|do not)\b/gi),
23
+ constraintCount: countMatches(promptText, /\b(must|must not|do not|only|unless|avoid|required|default)\b/gi),
24
+ requestedOutputFieldCount: OUTPUT_FIELD_NAMES.filter((field) => promptText.includes(field)).length,
25
+ taskStepCount: countMatches(promptText, /^\d+\./gm),
26
+ expectedFactCount: context.answerKey.expectedFacts.length,
27
+ expectedFileCount: context.answerKey.expectedFiles.length,
28
+ expectedSymbolCount: context.answerKey.expectedSymbols.length,
29
+ includesFileTree: promptText.includes("File Tree"),
30
+ includesProjectDescription: promptText.includes(context.projectProfile.description),
31
+ includesAnswerKeySummary: promptText.includes("Answer Key Summary"),
32
+ requiresMultipleFiles: context.answerKey.expectedFiles.length > 1,
33
+ requiresGraphGuidedRetrieval: context.strategy === "my-dev-kit-guided",
34
+ requiresCommandExecution: context.strategy === "my-dev-kit-guided",
35
+ requiresTokenReport: promptText.includes("tokenUsage"),
36
+ requiresTimingReport: promptText.includes("executionTime")
37
+ };
38
+ }
39
+ function countMatches(text, pattern) {
40
+ return text.match(pattern)?.length ?? 0;
41
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,43 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import path from "node:path";
3
+ import { tokenCountMethod } from "../core/countTokens.js";
4
+ import { resolveWithinRoot } from "../core/pathSafety.js";
5
+ export async function writePromptArtifacts(args) {
6
+ const outDir = path.resolve(args.outDir);
7
+ const promptDir = resolveWithinRoot(outDir, "prompts");
8
+ await mkdir(promptDir, { recursive: true });
9
+ const promptFiles = [];
10
+ for (const variant of args.variants) {
11
+ const fileName = promptVariantFileName(variant);
12
+ const fullPath = resolveWithinRoot(promptDir, fileName);
13
+ await writeFile(fullPath, variant.promptText, "utf8");
14
+ promptFiles.push(path.relative(outDir, fullPath).replace(/\\/g, "/"));
15
+ }
16
+ const summary = {
17
+ generatedAt: args.generatedAt ?? new Date(0).toISOString(),
18
+ caseCount: new Set(args.variants.map((variant) => variant.caseId)).size,
19
+ promptCount: args.variants.length,
20
+ strategies: unique(args.variants.map((variant) => variant.strategy)),
21
+ complexityLevels: unique(args.variants.map((variant) => variant.complexityLevel)),
22
+ tokenCountMethod,
23
+ outputPaths: {
24
+ summaryPath: "prompt-variants-summary.json",
25
+ variantsPath: "prompt-variants.json",
26
+ promptDirectory: "prompts",
27
+ promptFiles
28
+ },
29
+ warnings: args.variants.flatMap((variant) => variant.warnings)
30
+ };
31
+ await writeFile(resolveWithinRoot(outDir, "prompt-variants.json"), JSON.stringify(args.variants, null, 2), "utf8");
32
+ await writeFile(resolveWithinRoot(outDir, "prompt-variants-summary.json"), JSON.stringify(summary, null, 2), "utf8");
33
+ return summary;
34
+ }
35
+ export function promptVariantFileName(variant) {
36
+ return `${safeSegment(variant.caseId)}.${variant.strategy}.${variant.complexityLevel}.txt`;
37
+ }
38
+ function safeSegment(value) {
39
+ return value.replace(/[^a-zA-Z0-9._-]+/g, "-");
40
+ }
41
+ function unique(values) {
42
+ return [...new Set(values)].sort();
43
+ }