@dailephd/my-dev-kit-lab 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +272 -0
  2. package/benchmarks/contracts/benchmark-project-profiles.json +1199 -0
  3. package/benchmarks/contracts/todo-behavior.md +70 -0
  4. package/benchmarks/contracts/todo-benchmark-case.json +227 -0
  5. package/benchmarks/projects/README.md +34 -0
  6. package/benchmarks/projects/task-analytics-large-mixed/README.md +1 -0
  7. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/__init__.py +3 -0
  8. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/fixtures.py +6 -0
  9. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/metrics.py +29 -0
  10. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/models.py +21 -0
  11. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/parser.py +16 -0
  12. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/pipeline.py +9 -0
  13. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/quality.py +8 -0
  14. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/reporting.py +11 -0
  15. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_metrics.py +19 -0
  16. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_parser.py +15 -0
  17. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_quality.py +19 -0
  18. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_reporting.py +15 -0
  19. package/benchmarks/projects/task-analytics-large-mixed/ts/package.json +12 -0
  20. package/benchmarks/projects/task-analytics-large-mixed/ts/src/index.ts +11 -0
  21. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/analyticsSnapshot.ts +20 -0
  22. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/project.ts +5 -0
  23. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/task.ts +10 -0
  24. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/buildProjectLeaderboard.ts +7 -0
  25. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/formatTaskHealthReport.ts +13 -0
  26. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/buildAnalyticsSnapshot.ts +39 -0
  27. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/completeTask.ts +10 -0
  28. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/createTask.ts +21 -0
  29. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/listTasksByProject.ts +6 -0
  30. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/projectStore.ts +20 -0
  31. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/taskStore.ts +44 -0
  32. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/projectValidation.ts +12 -0
  33. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/taskValidation.ts +18 -0
  34. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/buildAnalyticsSnapshot.test.ts +48 -0
  35. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/completeTask.test.ts +21 -0
  36. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/createTask.test.ts +31 -0
  37. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/listTasksByProject.test.ts +18 -0
  38. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/reporting.test.ts +19 -0
  39. package/benchmarks/projects/task-analytics-large-mixed/ts/tsconfig.json +12 -0
  40. package/benchmarks/projects/task-analytics-large-mixed/ts/vitest.config.ts +5 -0
  41. package/benchmarks/projects/task-workflow-medium-ts/README.md +1 -0
  42. package/benchmarks/projects/task-workflow-medium-ts/package.json +12 -0
  43. package/benchmarks/projects/task-workflow-medium-ts/src/index.ts +9 -0
  44. package/benchmarks/projects/task-workflow-medium-ts/src/models/project.ts +6 -0
  45. package/benchmarks/projects/task-workflow-medium-ts/src/models/task.ts +39 -0
  46. package/benchmarks/projects/task-workflow-medium-ts/src/services/completeTask.ts +15 -0
  47. package/benchmarks/projects/task-workflow-medium-ts/src/services/createTask.ts +26 -0
  48. package/benchmarks/projects/task-workflow-medium-ts/src/services/filterTasks.ts +17 -0
  49. package/benchmarks/projects/task-workflow-medium-ts/src/services/importTasks.ts +33 -0
  50. package/benchmarks/projects/task-workflow-medium-ts/src/services/summarizeTasks.ts +30 -0
  51. package/benchmarks/projects/task-workflow-medium-ts/src/store/taskStore.ts +76 -0
  52. package/benchmarks/projects/task-workflow-medium-ts/src/utils/deterministicId.ts +3 -0
  53. package/benchmarks/projects/task-workflow-medium-ts/src/validation/taskValidation.ts +45 -0
  54. package/benchmarks/projects/task-workflow-medium-ts/tests/completeTask.test.ts +16 -0
  55. package/benchmarks/projects/task-workflow-medium-ts/tests/createTask.test.ts +21 -0
  56. package/benchmarks/projects/task-workflow-medium-ts/tests/filterTasks.test.ts +18 -0
  57. package/benchmarks/projects/task-workflow-medium-ts/tests/importTasks.test.ts +22 -0
  58. package/benchmarks/projects/task-workflow-medium-ts/tests/summarizeTasks.test.ts +29 -0
  59. package/benchmarks/projects/task-workflow-medium-ts/tsconfig.json +12 -0
  60. package/benchmarks/projects/task-workflow-medium-ts/vitest.config.ts +5 -0
  61. package/benchmarks/projects/todo-js/README.md +3 -0
  62. package/benchmarks/projects/todo-js/package.json +11 -0
  63. package/benchmarks/projects/todo-js/src/index.js +2 -0
  64. package/benchmarks/projects/todo-js/src/taskService.js +37 -0
  65. package/benchmarks/projects/todo-js/src/taskStore.js +28 -0
  66. package/benchmarks/projects/todo-js/tests/taskService.test.js +45 -0
  67. package/benchmarks/projects/todo-js/vitest.config.js +5 -0
  68. package/benchmarks/projects/todo-mixed-ts-py/README.md +3 -0
  69. package/benchmarks/projects/todo-mixed-ts-py/package.json +13 -0
  70. package/benchmarks/projects/todo-mixed-ts-py/python/task_service.py +76 -0
  71. package/benchmarks/projects/todo-mixed-ts-py/src/taskCli.ts +38 -0
  72. package/benchmarks/projects/todo-mixed-ts-py/tests/mixedBoundary.test.ts +18 -0
  73. package/benchmarks/projects/todo-mixed-ts-py/tsconfig.json +12 -0
  74. package/benchmarks/projects/todo-mixed-ts-py/vitest.config.ts +5 -0
  75. package/benchmarks/projects/todo-python/README.md +3 -0
  76. package/benchmarks/projects/todo-python/src/__init__.py +4 -0
  77. package/benchmarks/projects/todo-python/src/task_service.py +32 -0
  78. package/benchmarks/projects/todo-python/src/task_store.py +28 -0
  79. package/benchmarks/projects/todo-python/tests/test_task_service.py +52 -0
  80. package/benchmarks/projects/todo-ts/README.md +3 -0
  81. package/benchmarks/projects/todo-ts/package.json +12 -0
  82. package/benchmarks/projects/todo-ts/src/index.ts +2 -0
  83. package/benchmarks/projects/todo-ts/src/taskService.ts +41 -0
  84. package/benchmarks/projects/todo-ts/src/taskStore.ts +34 -0
  85. package/benchmarks/projects/todo-ts/tests/taskService.test.ts +45 -0
  86. package/benchmarks/projects/todo-ts/tsconfig.json +12 -0
  87. package/benchmarks/projects/todo-ts/vitest.config.ts +5 -0
  88. package/dist/scripts/build-gallery.js +3 -0
  89. package/dist/scripts/capture-demo-report.js +3 -0
  90. package/dist/scripts/evaluate-token-savings.js +2 -0
  91. package/dist/scripts/experiments/describeExperiment.js +143 -0
  92. package/dist/scripts/experiments/listExperiments.js +44 -0
  93. package/dist/scripts/experiments/runExperiment.js +199 -0
  94. package/dist/scripts/generate-experiment-plots.js +3 -0
  95. package/dist/scripts/generate-prompt-variants.js +2 -0
  96. package/dist/scripts/render-experiment-report.js +2 -0
  97. package/dist/scripts/run-agent-prompt.js +2 -0
  98. package/dist/scripts/run-controlled-experiment.js +2 -0
  99. package/dist/scripts/run-final-demo.js +3 -0
  100. package/dist/scripts/run-lab-demo.js +5 -0
  101. package/dist/scripts/run-visualization-demos.js +3 -0
  102. package/dist/scripts/security/runCodeql.js +57 -0
  103. package/dist/scripts/security/runDependencyChecks.js +57 -0
  104. package/dist/scripts/security/runFuzzSmoke.js +29 -0
  105. package/dist/scripts/security/runPackageChecks.js +56 -0
  106. package/dist/scripts/security/runSemgrep.js +63 -0
  107. package/dist/scripts/security/validate.js +117 -0
  108. package/dist/scripts/verify-benchmarks.js +202 -0
  109. package/dist/src/agents/adapters/claudeAdapter.js +37 -0
  110. package/dist/src/agents/adapters/codexAdapter.js +110 -0
  111. package/dist/src/agents/adapters/fakeAgentAdapter.js +101 -0
  112. package/dist/src/agents/agentRegistry.js +21 -0
  113. package/dist/src/agents/index.js +7 -0
  114. package/dist/src/agents/parseAgentTokenUsage.js +137 -0
  115. package/dist/src/agents/runAgentPrompt.js +38 -0
  116. package/dist/src/agents/types.js +1 -0
  117. package/dist/src/commands/buildGalleryCommand.js +56 -0
  118. package/dist/src/commands/captureDemoReport.js +116 -0
  119. package/dist/src/commands/evaluateTokenSavings.js +175 -0
  120. package/dist/src/commands/generateExperimentPlotsCommand.js +38 -0
  121. package/dist/src/commands/generatePromptVariants.js +67 -0
  122. package/dist/src/commands/renderExperimentReportCommand.js +131 -0
  123. package/dist/src/commands/runAgentPromptCommand.js +132 -0
  124. package/dist/src/commands/runControlledExperimentCommand.js +174 -0
  125. package/dist/src/commands/runFinalDemoCommand.js +123 -0
  126. package/dist/src/commands/runLabDemo.js +62 -0
  127. package/dist/src/commands/runVisualizationDemosCommand.js +67 -0
  128. package/dist/src/core/commandLine.js +59 -0
  129. package/dist/src/core/countTokens.js +8 -0
  130. package/dist/src/core/fileGlobs.js +100 -0
  131. package/dist/src/core/localProjectTarget.js +75 -0
  132. package/dist/src/core/pathSafety.js +19 -0
  133. package/dist/src/core/pythonCommand.js +30 -0
  134. package/dist/src/core/resolveCommand.js +110 -0
  135. package/dist/src/core/runMeasuredCommand.js +143 -0
  136. package/dist/src/evaluation/benchmarkMetadata.js +207 -0
  137. package/dist/src/evaluation/buildExperimentMatrix.js +75 -0
  138. package/dist/src/evaluation/classifyAgentRunOutcome.js +40 -0
  139. package/dist/src/evaluation/compareExperimentRuns.js +79 -0
  140. package/dist/src/evaluation/compareTokenSavings.js +47 -0
  141. package/dist/src/evaluation/controlledExperimentTypes.js +1 -0
  142. package/dist/src/evaluation/index.js +18 -0
  143. package/dist/src/evaluation/parseAgentAnswer.js +230 -0
  144. package/dist/src/evaluation/projectComplexity.js +126 -0
  145. package/dist/src/evaluation/projectFileTree.js +83 -0
  146. package/dist/src/evaluation/readEvaluationCases.js +59 -0
  147. package/dist/src/evaluation/renderTokenSavingsReportInput.js +55 -0
  148. package/dist/src/evaluation/runControlledExperiment.js +158 -0
  149. package/dist/src/evaluation/runMyDevKitRetrieval.js +197 -0
  150. package/dist/src/evaluation/runRawFullFileBaseline.js +31 -0
  151. package/dist/src/evaluation/scoreCorrectness.js +127 -0
  152. package/dist/src/evaluation/types.js +1 -0
  153. package/dist/src/evaluation/writeExperimentArtifacts.js +104 -0
  154. package/dist/src/evaluation/writeTokenSavingsArtifacts.js +57 -0
  155. package/dist/src/experiments/config.js +24 -0
  156. package/dist/src/experiments/defaultRegistry.js +7 -0
  157. package/dist/src/experiments/errors.js +18 -0
  158. package/dist/src/experiments/index.js +9 -0
  159. package/dist/src/experiments/outputPaths.js +25 -0
  160. package/dist/src/experiments/plugins/contextStrategyComparison/config.js +37 -0
  161. package/dist/src/experiments/plugins/contextStrategyComparison/index.js +3 -0
  162. package/dist/src/experiments/plugins/contextStrategyComparison/plugin.js +83 -0
  163. package/dist/src/experiments/plugins/contextStrategyComparison/resultMapping.js +260 -0
  164. package/dist/src/experiments/plugins/index.js +1 -0
  165. package/dist/src/experiments/registry.js +43 -0
  166. package/dist/src/experiments/results.js +48 -0
  167. package/dist/src/experiments/runner.js +181 -0
  168. package/dist/src/experiments/target.js +8 -0
  169. package/dist/src/experiments/types.js +1 -0
  170. package/dist/src/gallery/index.js +2 -0
  171. package/dist/src/gallery/types.js +1 -0
  172. package/dist/src/gallery/writeGalleryManifest.js +214 -0
  173. package/dist/src/index.js +12 -0
  174. package/dist/src/plots/buildExperimentPlotData.js +137 -0
  175. package/dist/src/plots/index.js +4 -0
  176. package/dist/src/plots/renderSvgChart.js +82 -0
  177. package/dist/src/plots/types.js +1 -0
  178. package/dist/src/plots/writePlotArtifacts.js +46 -0
  179. package/dist/src/prompts/buildPromptContext.js +68 -0
  180. package/dist/src/prompts/generateMyDevKitPrompt.js +106 -0
  181. package/dist/src/prompts/generatePromptVariants.js +36 -0
  182. package/dist/src/prompts/generateRawFullFilePrompt.js +97 -0
  183. package/dist/src/prompts/index.js +7 -0
  184. package/dist/src/prompts/measurePromptComplexity.js +41 -0
  185. package/dist/src/prompts/types.js +1 -0
  186. package/dist/src/prompts/writePromptArtifacts.js +43 -0
  187. package/dist/src/report/buildExperimentReportInput.js +339 -0
  188. package/dist/src/report/experimentReportTypes.js +1 -0
  189. package/dist/src/report/experiments/buildPluginExperimentReport.js +153 -0
  190. package/dist/src/report/experiments/experimentReportModel.js +1 -0
  191. package/dist/src/report/experiments/index.js +4 -0
  192. package/dist/src/report/experiments/renderPluginExperimentReportHtml.js +133 -0
  193. package/dist/src/report/experiments/writePluginExperimentReports.js +30 -0
  194. package/dist/src/report/index.js +8 -0
  195. package/dist/src/report/renderExperimentHtmlReport.js +354 -0
  196. package/dist/src/report/renderHtmlReport.js +103 -0
  197. package/dist/src/report/types.js +10 -0
  198. package/dist/src/report/writeExperimentReportArtifacts.js +38 -0
  199. package/dist/src/report/writeReportArtifacts.js +39 -0
  200. package/dist/src/screenshot/captureReportScreenshot.js +75 -0
  201. package/dist/src/screenshot/index.js +2 -0
  202. package/dist/src/screenshot/types.js +1 -0
  203. package/dist/src/securityValidation/artifacts.js +15 -0
  204. package/dist/src/securityValidation/cliAdversarial/adversarialCliConfig.js +38 -0
  205. package/dist/src/securityValidation/cliAdversarial/dataVolumeChecks.js +194 -0
  206. package/dist/src/securityValidation/cliAdversarial/jsonStdoutChecks.js +359 -0
  207. package/dist/src/securityValidation/cliAdversarial/malformedArtifactChecks.js +284 -0
  208. package/dist/src/securityValidation/cliAdversarial/malformedArtifactFixtures.js +79 -0
  209. package/dist/src/securityValidation/cliAdversarial/pathBoundaryChecks.js +431 -0
  210. package/dist/src/securityValidation/cliAdversarial/pathCases.js +144 -0
  211. package/dist/src/securityValidation/cliAdversarial/readOnlyBoundaryChecks.js +294 -0
  212. package/dist/src/securityValidation/cliAdversarial/runAdversarialCheck.js +149 -0
  213. package/dist/src/securityValidation/cliAdversarial/subprocessSafetyChecks.js +214 -0
  214. package/dist/src/securityValidation/cliAdversarial/tempWorkspace.js +160 -0
  215. package/dist/src/securityValidation/commandRunner.js +136 -0
  216. package/dist/src/securityValidation/config.js +39 -0
  217. package/dist/src/securityValidation/dependencies/parseNpmAudit.js +115 -0
  218. package/dist/src/securityValidation/dependencies/parseNpmLs.js +71 -0
  219. package/dist/src/securityValidation/dependencies/parseNpmOutdated.js +41 -0
  220. package/dist/src/securityValidation/dependencies/runDependencyChecks.js +239 -0
  221. package/dist/src/securityValidation/dependencies/runOsvScanner.js +43 -0
  222. package/dist/src/securityValidation/fuzz/fuzzHarness.js +61 -0
  223. package/dist/src/securityValidation/fuzz/fuzzTargets.js +204 -0
  224. package/dist/src/securityValidation/fuzz/randomInput.js +0 -0
  225. package/dist/src/securityValidation/index.js +34 -0
  226. package/dist/src/securityValidation/packageChecks/forbiddenPackageContents.js +67 -0
  227. package/dist/src/securityValidation/packageChecks/parseNpmPackDryRun.js +56 -0
  228. package/dist/src/securityValidation/packageChecks/runPackageChecks.js +88 -0
  229. package/dist/src/securityValidation/report/renderSecurityReport.js +248 -0
  230. package/dist/src/securityValidation/report/securityReportTypes.js +1 -0
  231. package/dist/src/securityValidation/staticScans/codeql.js +66 -0
  232. package/dist/src/securityValidation/staticScans/semgrep.js +180 -0
  233. package/dist/src/securityValidation/testMatrix.js +535 -0
  234. package/dist/src/securityValidation/types.js +34 -0
  235. package/dist/src/securityValidation/validate/resolveTarget.js +32 -0
  236. package/dist/src/securityValidation/validate/runSecurityValidation.js +169 -0
  237. package/dist/src/securityValidation/validate/verdict.js +73 -0
  238. package/dist/src/visualizationDemos/buildMyDevKitVisualizationCommands.js +59 -0
  239. package/dist/src/visualizationDemos/index.js +4 -0
  240. package/dist/src/visualizationDemos/runVisualizationDemos.js +82 -0
  241. package/dist/src/visualizationDemos/types.js +1 -0
  242. package/dist/src/visualizationDemos/writeVisualizationDemoArtifacts.js +25 -0
  243. package/docs/METRICS.md +286 -0
  244. package/examples/demo-report-input.json +78 -0
  245. package/examples/lab-demo-cases.json +35 -0
  246. package/examples/real-agent-campaign-cases.json +118 -0
  247. package/examples/token-savings-cases.json +122 -0
  248. package/package.json +91 -0
  249. package/tests/fixtures/fake-adversarial-cli.js +152 -0
  250. package/tests/fixtures/fake-my-dev-kit-cli.js +83 -0
@@ -0,0 +1,294 @@
1
+ import { spawn } from "node:child_process";
2
+ import { buildCliCommand } from "./adversarialCliConfig.js";
3
+ import { makeFinding, runAdversarialCheck } from "./runAdversarialCheck.js";
4
+ import { createTempWorkspace, diffSnapshots, snapshotDir } from "./tempWorkspace.js";
5
+ // ---------------------------------------------------------------------------
6
+ // Source read-only boundary checks
7
+ // ---------------------------------------------------------------------------
8
+ /**
9
+ * Checks that running the CLI does not modify any files in the --root directory.
10
+ * All source files must have the same content before and after the CLI run.
11
+ */
12
+ export async function checkSourceFilesNotModified(target) {
13
+ const workspace = createTempWorkspace("p4-ro-");
14
+ try {
15
+ const beforeSource = snapshotDir(workspace.sourceDir);
16
+ const { command, args } = buildCliCommand(target, [
17
+ "--root",
18
+ workspace.sourceDir,
19
+ "--out",
20
+ workspace.outputDir,
21
+ ]);
22
+ return await runAdversarialCheck({
23
+ id: "source-files-not-modified",
24
+ name: "Source files are not modified during CLI run",
25
+ category: "cli-adversarial",
26
+ severity: "blocker",
27
+ command,
28
+ args,
29
+ cwd: workspace.root,
30
+ timeoutMs: target.timeoutMs,
31
+ evaluate: () => {
32
+ const afterSource = snapshotDir(workspace.sourceDir);
33
+ const diff = diffSnapshots(beforeSource, afterSource);
34
+ const problems = [
35
+ ...diff.modified.map((f) => `modified: ${f}`),
36
+ ...diff.removed.map((f) => `removed: ${f}`),
37
+ ];
38
+ if (problems.length > 0) {
39
+ return [
40
+ makeFinding({
41
+ id: "source-modified",
42
+ title: "Source files were modified by CLI",
43
+ severity: "blocker",
44
+ category: "cli-adversarial",
45
+ description: `CLI modified or deleted source files: ${problems.join(", ")}`,
46
+ affectedFiles: [...diff.modified, ...diff.removed],
47
+ recommendation: "CLI must treat --root as read-only. No writes should go to the source directory.",
48
+ }),
49
+ ];
50
+ }
51
+ return [];
52
+ },
53
+ });
54
+ }
55
+ finally {
56
+ await workspace.cleanup();
57
+ }
58
+ }
59
+ /**
60
+ * Checks that all writes by the CLI are confined to the declared --out directory.
61
+ * No new files should appear in the source directory or outside the workspace.
62
+ */
63
+ export async function checkWritesLimitedToOutput(target) {
64
+ const workspace = createTempWorkspace("p4-wlt-");
65
+ try {
66
+ const beforeSource = snapshotDir(workspace.sourceDir);
67
+ const beforeOutside = snapshotDir(workspace.outsideDir);
68
+ const { command, args } = buildCliCommand(target, [
69
+ "--root",
70
+ workspace.sourceDir,
71
+ "--out",
72
+ workspace.outputDir,
73
+ ]);
74
+ return await runAdversarialCheck({
75
+ id: "writes-limited-to-output",
76
+ name: "All CLI writes are confined to the declared output directory",
77
+ category: "cli-adversarial",
78
+ severity: "blocker",
79
+ command,
80
+ args,
81
+ cwd: workspace.root,
82
+ timeoutMs: target.timeoutMs,
83
+ evaluate: () => {
84
+ const findings = [];
85
+ // Check source dir: no new files, no modifications.
86
+ const afterSource = snapshotDir(workspace.sourceDir);
87
+ const sourceDiff = diffSnapshots(beforeSource, afterSource);
88
+ const sourceProblems = [
89
+ ...sourceDiff.added.map((f) => `added: ${f}`),
90
+ ...sourceDiff.modified.map((f) => `modified: ${f}`),
91
+ ];
92
+ if (sourceProblems.length > 0) {
93
+ findings.push(makeFinding({
94
+ id: "write-in-source-dir",
95
+ title: "CLI wrote files into the source directory",
96
+ severity: "blocker",
97
+ category: "cli-adversarial",
98
+ description: `Unexpected writes in source dir: ${sourceProblems.join(", ")}`,
99
+ affectedFiles: [...sourceDiff.added, ...sourceDiff.modified],
100
+ recommendation: "CLI writes must never go to --root source directory.",
101
+ }));
102
+ }
103
+ // Check outside dir: nothing should appear there.
104
+ const afterOutside = snapshotDir(workspace.outsideDir);
105
+ const outsideDiff = diffSnapshots(beforeOutside, afterOutside);
106
+ const outsideProblems = [
107
+ ...outsideDiff.added.map((f) => `added: ${f}`),
108
+ ...outsideDiff.modified.map((f) => `modified: ${f}`),
109
+ ];
110
+ if (outsideProblems.length > 0) {
111
+ findings.push(makeFinding({
112
+ id: "write-outside-workspace",
113
+ title: "CLI wrote files outside the workspace",
114
+ severity: "blocker",
115
+ category: "cli-adversarial",
116
+ description: `Unexpected writes outside workspace: ${outsideProblems.join(", ")}`,
117
+ affectedFiles: [...outsideDiff.added, ...outsideDiff.modified],
118
+ recommendation: "CLI must confine all writes to explicitly declared artifact directories.",
119
+ }));
120
+ }
121
+ return findings;
122
+ },
123
+ });
124
+ }
125
+ finally {
126
+ await workspace.cleanup();
127
+ }
128
+ }
129
+ /**
130
+ * Checks that running the CLI with --index does not modify source files
131
+ * and does confine writes to the index directory.
132
+ */
133
+ export async function checkIndexWriteContainment(target) {
134
+ const workspace = createTempWorkspace("p4-idx-ro-");
135
+ try {
136
+ const beforeSource = snapshotDir(workspace.sourceDir);
137
+ const beforeOutside = snapshotDir(workspace.outsideDir);
138
+ const { command, args } = buildCliCommand(target, [
139
+ "--root",
140
+ workspace.sourceDir,
141
+ "--index",
142
+ workspace.indexDir,
143
+ ]);
144
+ return await runAdversarialCheck({
145
+ id: "index-write-containment",
146
+ name: "CLI index writes are confined to the declared index directory",
147
+ category: "cli-adversarial",
148
+ severity: "blocker",
149
+ command,
150
+ args,
151
+ cwd: workspace.root,
152
+ timeoutMs: target.timeoutMs,
153
+ evaluate: () => {
154
+ const findings = [];
155
+ const afterSource = snapshotDir(workspace.sourceDir);
156
+ const sourceDiff = diffSnapshots(beforeSource, afterSource);
157
+ const sourceWrites = [...sourceDiff.added, ...sourceDiff.modified];
158
+ if (sourceWrites.length > 0) {
159
+ findings.push(makeFinding({
160
+ id: "index-write-in-source",
161
+ title: "Index operation modified source files",
162
+ severity: "blocker",
163
+ category: "cli-adversarial",
164
+ description: `Source files modified: ${sourceWrites.join(", ")}`,
165
+ affectedFiles: sourceWrites,
166
+ recommendation: "Index operation must never write to the source root directory.",
167
+ }));
168
+ }
169
+ const afterOutside = snapshotDir(workspace.outsideDir);
170
+ const outsideDiff = diffSnapshots(beforeOutside, afterOutside);
171
+ const outsideWrites = [...outsideDiff.added, ...outsideDiff.modified];
172
+ if (outsideWrites.length > 0) {
173
+ findings.push(makeFinding({
174
+ id: "index-write-outside",
175
+ title: "Index operation wrote outside the workspace",
176
+ severity: "blocker",
177
+ category: "cli-adversarial",
178
+ description: `Writes outside workspace: ${outsideWrites.join(", ")}`,
179
+ affectedFiles: outsideWrites,
180
+ recommendation: "Index writes must be confined to the declared --index path.",
181
+ }));
182
+ }
183
+ return findings;
184
+ },
185
+ });
186
+ }
187
+ finally {
188
+ await workspace.cleanup();
189
+ }
190
+ }
191
+ /**
192
+ * Checks that a simulated artifact refresh/re-index does not delete user source files.
193
+ * Runs the CLI twice and verifies source files survive both runs unchanged.
194
+ */
195
+ export async function checkArtifactCleanupSafe(target) {
196
+ const workspace = createTempWorkspace("p4-cleanup-");
197
+ try {
198
+ // First run — populate output directory.
199
+ const { command: cmd1, args: args1 } = buildCliCommand(target, [
200
+ "--root",
201
+ workspace.sourceDir,
202
+ "--out",
203
+ workspace.outputDir,
204
+ ]);
205
+ await spawnAndWait(cmd1, args1, workspace.root);
206
+ // Snapshot source AFTER first run (baseline for cleanup test).
207
+ const beforeSource = snapshotDir(workspace.sourceDir);
208
+ const beforeOutside = snapshotDir(workspace.outsideDir);
209
+ // Second run — simulates artifact refresh.
210
+ const { command, args } = buildCliCommand(target, [
211
+ "--root",
212
+ workspace.sourceDir,
213
+ "--out",
214
+ workspace.outputDir,
215
+ ]);
216
+ return await runAdversarialCheck({
217
+ id: "generated-cleanup-user-files",
218
+ name: "Generated artifact refresh does not delete user source files",
219
+ category: "artifact-safety",
220
+ severity: "blocker",
221
+ command,
222
+ args,
223
+ cwd: workspace.root,
224
+ timeoutMs: target.timeoutMs,
225
+ evaluate: () => {
226
+ const findings = [];
227
+ const afterSource = snapshotDir(workspace.sourceDir);
228
+ const sourceDiff = diffSnapshots(beforeSource, afterSource);
229
+ const deletedSource = sourceDiff.removed;
230
+ if (deletedSource.length > 0) {
231
+ findings.push(makeFinding({
232
+ id: "cleanup-deleted-source",
233
+ title: "Artifact refresh deleted user source files",
234
+ severity: "blocker",
235
+ category: "artifact-safety",
236
+ description: `Source files deleted during refresh: ${deletedSource.join(", ")}`,
237
+ affectedFiles: deletedSource,
238
+ recommendation: "Generated artifact cleanup must be scoped to the declared output/index path only.",
239
+ }));
240
+ }
241
+ const modifiedSource = sourceDiff.modified;
242
+ if (modifiedSource.length > 0) {
243
+ findings.push(makeFinding({
244
+ id: "cleanup-modified-source",
245
+ title: "Artifact refresh modified user source files",
246
+ severity: "blocker",
247
+ category: "artifact-safety",
248
+ description: `Source files modified during refresh: ${modifiedSource.join(", ")}`,
249
+ affectedFiles: modifiedSource,
250
+ recommendation: "Artifact refresh must never write to the source directory.",
251
+ }));
252
+ }
253
+ const afterOutside = snapshotDir(workspace.outsideDir);
254
+ const outsideDiff = diffSnapshots(beforeOutside, afterOutside);
255
+ if (outsideDiff.modified.length > 0 || outsideDiff.removed.length > 0) {
256
+ findings.push(makeFinding({
257
+ id: "cleanup-outside-impact",
258
+ title: "Artifact refresh affected files outside workspace",
259
+ severity: "blocker",
260
+ category: "artifact-safety",
261
+ description: `Files outside workspace were affected during refresh: ${[...outsideDiff.modified, ...outsideDiff.removed].join(", ")}`,
262
+ recommendation: "Refresh cleanup must be strictly scoped to output directories.",
263
+ }));
264
+ }
265
+ return findings;
266
+ },
267
+ });
268
+ }
269
+ finally {
270
+ await workspace.cleanup();
271
+ }
272
+ }
273
+ // ---------------------------------------------------------------------------
274
+ // Internal helpers
275
+ // ---------------------------------------------------------------------------
276
+ function spawnAndWait(command, args, cwd) {
277
+ return new Promise((resolve) => {
278
+ let child;
279
+ try {
280
+ child = spawn(command, args, {
281
+ cwd,
282
+ shell: false,
283
+ stdio: ["ignore", "ignore", "ignore"],
284
+ env: { ...process.env },
285
+ });
286
+ }
287
+ catch {
288
+ resolve();
289
+ return;
290
+ }
291
+ child.on("close", () => resolve());
292
+ child.on("error", () => resolve());
293
+ });
294
+ }
@@ -0,0 +1,149 @@
1
+ import { spawn } from "node:child_process";
2
+ export async function runAdversarialCheck(input) {
3
+ const startedAt = new Date().toISOString();
4
+ const started = Date.now();
5
+ const cmdResult = await spawnAdversarialCommand({
6
+ command: input.command,
7
+ args: input.args,
8
+ cwd: input.cwd,
9
+ timeoutMs: input.timeoutMs,
10
+ });
11
+ const findings = input.evaluate(cmdResult);
12
+ const finishedAt = new Date().toISOString();
13
+ const durationMs = Date.now() - started;
14
+ const status = findings.length > 0
15
+ ? (findings.some((f) => f.severity === "blocker" || f.severity === "major") ? "failed" : "warning")
16
+ : "passed";
17
+ return {
18
+ id: input.id,
19
+ name: input.name,
20
+ category: input.category,
21
+ status,
22
+ severity: input.severity,
23
+ startedAt,
24
+ finishedAt,
25
+ durationMs,
26
+ findings,
27
+ command: [input.command, ...input.args].join(" "),
28
+ };
29
+ }
30
+ export function skippedCheck(options) {
31
+ const now = new Date().toISOString();
32
+ return {
33
+ id: options.id,
34
+ name: options.name,
35
+ category: options.category,
36
+ status: "skipped",
37
+ severity: "skipped",
38
+ startedAt: now,
39
+ finishedAt: now,
40
+ durationMs: 0,
41
+ findings: [],
42
+ skippedReason: options.reason,
43
+ };
44
+ }
45
+ // ---------------------------------------------------------------------------
46
+ // Finding helpers
47
+ // ---------------------------------------------------------------------------
48
+ export function makeFinding(options) {
49
+ return {
50
+ id: options.id,
51
+ title: options.title,
52
+ severity: options.severity,
53
+ category: options.category,
54
+ description: options.description,
55
+ evidence: options.evidence,
56
+ affectedFiles: options.affectedFiles,
57
+ recommendation: options.recommendation,
58
+ releaseImpact: options.severity === "blocker" || options.severity === "major"
59
+ ? "Must be resolved before release"
60
+ : "Review before release",
61
+ };
62
+ }
63
+ // ---------------------------------------------------------------------------
64
+ // Internal command runner (no shell interpolation)
65
+ // ---------------------------------------------------------------------------
66
+ async function spawnAdversarialCommand(options) {
67
+ const started = Date.now();
68
+ return new Promise((resolve) => {
69
+ let stdout = "";
70
+ let stderr = "";
71
+ let timedOut = false;
72
+ let settled = false;
73
+ let timeout;
74
+ let child;
75
+ try {
76
+ child = spawn(options.command, options.args, {
77
+ cwd: options.cwd,
78
+ shell: false,
79
+ stdio: ["ignore", "pipe", "pipe"],
80
+ env: { ...process.env },
81
+ });
82
+ }
83
+ catch (err) {
84
+ resolve({
85
+ command: options.command,
86
+ args: options.args,
87
+ cwd: options.cwd,
88
+ exitCode: null,
89
+ stdout: "",
90
+ stderr: "",
91
+ durationMs: Date.now() - started,
92
+ timedOut: false,
93
+ spawnError: err instanceof Error ? err.message : String(err),
94
+ });
95
+ return;
96
+ }
97
+ child.stdout.on("data", (chunk) => {
98
+ stdout += chunk.toString("utf8");
99
+ });
100
+ child.stderr.on("data", (chunk) => {
101
+ stderr += chunk.toString("utf8");
102
+ });
103
+ if (options.timeoutMs > 0) {
104
+ timeout = setTimeout(() => {
105
+ timedOut = true;
106
+ try {
107
+ child.kill("SIGTERM");
108
+ }
109
+ catch {
110
+ // Already exited.
111
+ }
112
+ }, options.timeoutMs);
113
+ }
114
+ child.on("error", (err) => {
115
+ if (settled)
116
+ return;
117
+ settled = true;
118
+ if (timeout)
119
+ clearTimeout(timeout);
120
+ resolve({
121
+ command: options.command,
122
+ args: options.args,
123
+ cwd: options.cwd,
124
+ exitCode: null,
125
+ stdout,
126
+ stderr: err.message,
127
+ durationMs: Date.now() - started,
128
+ timedOut,
129
+ });
130
+ });
131
+ child.on("close", (exitCode) => {
132
+ if (settled)
133
+ return;
134
+ settled = true;
135
+ if (timeout)
136
+ clearTimeout(timeout);
137
+ resolve({
138
+ command: options.command,
139
+ args: options.args,
140
+ cwd: options.cwd,
141
+ exitCode,
142
+ stdout,
143
+ stderr,
144
+ durationMs: Date.now() - started,
145
+ timedOut,
146
+ });
147
+ });
148
+ });
149
+ }
@@ -0,0 +1,214 @@
1
+ import { spawn } from "node:child_process";
2
+ import path from "node:path";
3
+ import { buildCliCommand } from "./adversarialCliConfig.js";
4
+ import { makeFinding } from "./runAdversarialCheck.js";
5
+ import { createTempWorkspace, diffSnapshots, snapshotDir } from "./tempWorkspace.js";
6
+ function runAndCapture(command, args, cwd, timeoutMs) {
7
+ return new Promise((resolve) => {
8
+ let child;
9
+ try {
10
+ child = spawn(command, args, {
11
+ cwd,
12
+ shell: false,
13
+ stdio: ["ignore", "pipe", "pipe"],
14
+ env: { ...process.env },
15
+ });
16
+ }
17
+ catch {
18
+ resolve({ exitCode: 1, stdout: "", stderr: "" });
19
+ return;
20
+ }
21
+ const stdoutChunks = [];
22
+ const stderrChunks = [];
23
+ child.stdout.on("data", (chunk) => stdoutChunks.push(chunk));
24
+ child.stderr.on("data", (chunk) => stderrChunks.push(chunk));
25
+ const timer = setTimeout(() => {
26
+ try {
27
+ child.kill();
28
+ }
29
+ catch {
30
+ // ignore
31
+ }
32
+ resolve({
33
+ exitCode: 1,
34
+ stdout: Buffer.concat(stdoutChunks).toString("utf8"),
35
+ stderr: Buffer.concat(stderrChunks).toString("utf8"),
36
+ });
37
+ }, timeoutMs);
38
+ child.on("close", (code) => {
39
+ clearTimeout(timer);
40
+ resolve({
41
+ exitCode: code ?? 1,
42
+ stdout: Buffer.concat(stdoutChunks).toString("utf8"),
43
+ stderr: Buffer.concat(stderrChunks).toString("utf8"),
44
+ });
45
+ });
46
+ child.on("error", () => {
47
+ clearTimeout(timer);
48
+ resolve({
49
+ exitCode: 1,
50
+ stdout: Buffer.concat(stdoutChunks).toString("utf8"),
51
+ stderr: Buffer.concat(stderrChunks).toString("utf8"),
52
+ });
53
+ });
54
+ });
55
+ }
56
+ // Metacharacters that would be interpreted by a shell if the CLI used shell:true.
57
+ const METACHAR_PATHS = [
58
+ { value: `path;echo injected`, label: "semicolon" },
59
+ { value: `path&echo injected`, label: "ampersand" },
60
+ { value: `path$(echo injected)`, label: "command substitution" },
61
+ { value: `path\`echo injected\``, label: "backtick substitution" },
62
+ ];
63
+ /**
64
+ * Checks that paths with shell metacharacters are passed as literal strings
65
+ * and do not trigger command injection via the CLI invocation.
66
+ *
67
+ * The harness invokes the CLI with shell:false, so metacharacters in the
68
+ * path argument array are always literal. This check verifies that the CLI
69
+ * itself does not re-interpret path arguments through a shell.
70
+ */
71
+ export async function checkSubprocessNoShellInterpolation(target) {
72
+ const started = new Date();
73
+ const workspace = createTempWorkspace("p5-shell-");
74
+ try {
75
+ const findings = [];
76
+ for (const { value: metacharPath, label } of METACHAR_PATHS) {
77
+ const beforeOutside = snapshotDir(workspace.outsideDir);
78
+ // Use the metachar path as the --root argument.
79
+ // Since the path likely doesn't exist, the CLI may exit non-zero.
80
+ // What matters is: no shell side effects (no "injected" written anywhere).
81
+ const injectedFile = path.join(workspace.root, "injected");
82
+ const { command, args } = buildCliCommand(target, [
83
+ "--root",
84
+ path.join(workspace.sourceDir, metacharPath),
85
+ "--out",
86
+ workspace.outputDir,
87
+ ]);
88
+ await runAndCapture(command, args, workspace.root, target.timeoutMs);
89
+ // If shell injection worked, "injected" might be created.
90
+ const { existsSync } = await import("node:fs");
91
+ if (existsSync(injectedFile)) {
92
+ findings.push(makeFinding({
93
+ id: `shell-injection-${label.replace(/\s+/g, "-")}`,
94
+ title: `Shell injection via ${label} in --root`,
95
+ severity: "blocker",
96
+ category: "cli-adversarial",
97
+ description: `Shell injection via ${label}: file 'injected' was created, suggesting shell interpolation occurred.`,
98
+ affectedFiles: [injectedFile],
99
+ recommendation: "CLI must never pass path arguments through a shell. Use spawn argument arrays only.",
100
+ }));
101
+ }
102
+ // Also check that outsideDir was not affected.
103
+ const afterOutside = snapshotDir(workspace.outsideDir);
104
+ const outsideDiff = diffSnapshots(beforeOutside, afterOutside);
105
+ if (outsideDiff.added.length > 0 || outsideDiff.modified.length > 0) {
106
+ findings.push(makeFinding({
107
+ id: `shell-injection-outside-${label.replace(/\s+/g, "-")}`,
108
+ title: `Unexpected writes outside workspace for ${label} path`,
109
+ severity: "major",
110
+ category: "cli-adversarial",
111
+ description: `${label} in --root caused writes outside the workspace.`,
112
+ recommendation: "Path arguments with metacharacters must be treated as literal strings.",
113
+ }));
114
+ }
115
+ }
116
+ const finished = new Date();
117
+ return {
118
+ id: "subprocess-no-shell-interpolation",
119
+ name: "Subprocess calls avoid shell-string interpolation for metachar paths",
120
+ category: "cli-adversarial",
121
+ severity: "blocker",
122
+ status: findings.some((f) => f.severity === "blocker" || f.severity === "major")
123
+ ? "failed"
124
+ : findings.length > 0
125
+ ? "warning"
126
+ : "passed",
127
+ findings,
128
+ startedAt: started.toISOString(),
129
+ finishedAt: finished.toISOString(),
130
+ durationMs: finished.getTime() - started.getTime(),
131
+ };
132
+ }
133
+ finally {
134
+ await workspace.cleanup();
135
+ }
136
+ }
137
+ /**
138
+ * DOT label escaping test cases.
139
+ * Each case is a node name that would produce broken DOT syntax if unescaped.
140
+ */
141
+ export const DOT_LABEL_TEST_CASES = [
142
+ { id: "double-quote", value: 'foo"bar', description: 'Double quote in label' },
143
+ { id: "backslash", value: "foo\\bar", description: "Backslash in label" },
144
+ { id: "angle-brackets", value: "foo<bar>", description: "Angle brackets in label" },
145
+ { id: "curly-braces", value: "foo{bar}", description: "Curly braces in label" },
146
+ { id: "pipe", value: "foo|bar", description: "Pipe character in label" },
147
+ { id: "newline", value: "foo\nbar", description: "Newline in label" },
148
+ ];
149
+ /**
150
+ * Generates a minimal DOT-safe label from a raw string.
151
+ * This mirrors what a correct DOT label escaper should do.
152
+ */
153
+ export function escapeDotLabel(raw) {
154
+ return raw
155
+ .replace(/\\/g, "\\\\")
156
+ .replace(/"/g, '\\"')
157
+ .replace(/\n/g, "\\n")
158
+ .replace(/\r/g, "\\r")
159
+ .replace(/[<>{}|]/g, (c) => `\\${c}`);
160
+ }
161
+ /**
162
+ * Checks that DOT label escaping handles all special characters correctly.
163
+ * This is a pure logic check — does not invoke the CLI.
164
+ */
165
+ export async function checkDotLabelEscaping() {
166
+ const started = new Date();
167
+ const findings = [];
168
+ for (const { id, value, description } of DOT_LABEL_TEST_CASES) {
169
+ const escaped = escapeDotLabel(value);
170
+ // A valid escaped label must not contain unescaped special chars.
171
+ // We verify the escaper produces output that differs from the input for problematic chars.
172
+ if (value.includes('"') && escaped === value) {
173
+ findings.push(makeFinding({
174
+ id: `dot-label-${id}`,
175
+ title: `DOT label escaper did not escape: ${description}`,
176
+ severity: "major",
177
+ category: "cli-adversarial",
178
+ description: `Label '${value}' was not escaped. This would produce broken DOT syntax.`,
179
+ recommendation: "All special characters in DOT labels must be properly escaped.",
180
+ }));
181
+ }
182
+ // Verify the escaped label can be safely embedded in a DOT node declaration.
183
+ const dotSnippet = `"${escaped}"`;
184
+ // A correct snippet must not have unescaped double-quotes inside (except the wrappers).
185
+ const inner = dotSnippet.slice(1, -1);
186
+ const hasUnescapedQuote = inner.replace(/\\"/g, "").includes('"');
187
+ if (hasUnescapedQuote) {
188
+ findings.push(makeFinding({
189
+ id: `dot-label-unescaped-${id}`,
190
+ title: `DOT label contains unescaped double quote after escaping: ${description}`,
191
+ severity: "major",
192
+ category: "cli-adversarial",
193
+ description: `Escaped value '${escaped}' still contains an unescaped double-quote.`,
194
+ recommendation: "Double-quotes inside DOT labels must be escaped as \\\".",
195
+ }));
196
+ }
197
+ }
198
+ const finished = new Date();
199
+ return {
200
+ id: "graphviz-label-escaping",
201
+ name: "Graph labels escape special characters correctly",
202
+ category: "cli-adversarial",
203
+ severity: "major",
204
+ status: findings.some((f) => f.severity === "major" || f.severity === "blocker")
205
+ ? "failed"
206
+ : findings.length > 0
207
+ ? "warning"
208
+ : "passed",
209
+ findings,
210
+ startedAt: started.toISOString(),
211
+ finishedAt: finished.toISOString(),
212
+ durationMs: finished.getTime() - started.getTime(),
213
+ };
214
+ }