@dailephd/my-dev-kit-lab 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +272 -0
  2. package/benchmarks/contracts/benchmark-project-profiles.json +1199 -0
  3. package/benchmarks/contracts/todo-behavior.md +70 -0
  4. package/benchmarks/contracts/todo-benchmark-case.json +227 -0
  5. package/benchmarks/projects/README.md +34 -0
  6. package/benchmarks/projects/task-analytics-large-mixed/README.md +1 -0
  7. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/__init__.py +3 -0
  8. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/fixtures.py +6 -0
  9. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/metrics.py +29 -0
  10. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/models.py +21 -0
  11. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/parser.py +16 -0
  12. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/pipeline.py +9 -0
  13. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/quality.py +8 -0
  14. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/reporting.py +11 -0
  15. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_metrics.py +19 -0
  16. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_parser.py +15 -0
  17. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_quality.py +19 -0
  18. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_reporting.py +15 -0
  19. package/benchmarks/projects/task-analytics-large-mixed/ts/package.json +12 -0
  20. package/benchmarks/projects/task-analytics-large-mixed/ts/src/index.ts +11 -0
  21. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/analyticsSnapshot.ts +20 -0
  22. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/project.ts +5 -0
  23. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/task.ts +10 -0
  24. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/buildProjectLeaderboard.ts +7 -0
  25. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/formatTaskHealthReport.ts +13 -0
  26. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/buildAnalyticsSnapshot.ts +39 -0
  27. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/completeTask.ts +10 -0
  28. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/createTask.ts +21 -0
  29. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/listTasksByProject.ts +6 -0
  30. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/projectStore.ts +20 -0
  31. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/taskStore.ts +44 -0
  32. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/projectValidation.ts +12 -0
  33. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/taskValidation.ts +18 -0
  34. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/buildAnalyticsSnapshot.test.ts +48 -0
  35. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/completeTask.test.ts +21 -0
  36. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/createTask.test.ts +31 -0
  37. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/listTasksByProject.test.ts +18 -0
  38. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/reporting.test.ts +19 -0
  39. package/benchmarks/projects/task-analytics-large-mixed/ts/tsconfig.json +12 -0
  40. package/benchmarks/projects/task-analytics-large-mixed/ts/vitest.config.ts +5 -0
  41. package/benchmarks/projects/task-workflow-medium-ts/README.md +1 -0
  42. package/benchmarks/projects/task-workflow-medium-ts/package.json +12 -0
  43. package/benchmarks/projects/task-workflow-medium-ts/src/index.ts +9 -0
  44. package/benchmarks/projects/task-workflow-medium-ts/src/models/project.ts +6 -0
  45. package/benchmarks/projects/task-workflow-medium-ts/src/models/task.ts +39 -0
  46. package/benchmarks/projects/task-workflow-medium-ts/src/services/completeTask.ts +15 -0
  47. package/benchmarks/projects/task-workflow-medium-ts/src/services/createTask.ts +26 -0
  48. package/benchmarks/projects/task-workflow-medium-ts/src/services/filterTasks.ts +17 -0
  49. package/benchmarks/projects/task-workflow-medium-ts/src/services/importTasks.ts +33 -0
  50. package/benchmarks/projects/task-workflow-medium-ts/src/services/summarizeTasks.ts +30 -0
  51. package/benchmarks/projects/task-workflow-medium-ts/src/store/taskStore.ts +76 -0
  52. package/benchmarks/projects/task-workflow-medium-ts/src/utils/deterministicId.ts +3 -0
  53. package/benchmarks/projects/task-workflow-medium-ts/src/validation/taskValidation.ts +45 -0
  54. package/benchmarks/projects/task-workflow-medium-ts/tests/completeTask.test.ts +16 -0
  55. package/benchmarks/projects/task-workflow-medium-ts/tests/createTask.test.ts +21 -0
  56. package/benchmarks/projects/task-workflow-medium-ts/tests/filterTasks.test.ts +18 -0
  57. package/benchmarks/projects/task-workflow-medium-ts/tests/importTasks.test.ts +22 -0
  58. package/benchmarks/projects/task-workflow-medium-ts/tests/summarizeTasks.test.ts +29 -0
  59. package/benchmarks/projects/task-workflow-medium-ts/tsconfig.json +12 -0
  60. package/benchmarks/projects/task-workflow-medium-ts/vitest.config.ts +5 -0
  61. package/benchmarks/projects/todo-js/README.md +3 -0
  62. package/benchmarks/projects/todo-js/package.json +11 -0
  63. package/benchmarks/projects/todo-js/src/index.js +2 -0
  64. package/benchmarks/projects/todo-js/src/taskService.js +37 -0
  65. package/benchmarks/projects/todo-js/src/taskStore.js +28 -0
  66. package/benchmarks/projects/todo-js/tests/taskService.test.js +45 -0
  67. package/benchmarks/projects/todo-js/vitest.config.js +5 -0
  68. package/benchmarks/projects/todo-mixed-ts-py/README.md +3 -0
  69. package/benchmarks/projects/todo-mixed-ts-py/package.json +13 -0
  70. package/benchmarks/projects/todo-mixed-ts-py/python/task_service.py +76 -0
  71. package/benchmarks/projects/todo-mixed-ts-py/src/taskCli.ts +38 -0
  72. package/benchmarks/projects/todo-mixed-ts-py/tests/mixedBoundary.test.ts +18 -0
  73. package/benchmarks/projects/todo-mixed-ts-py/tsconfig.json +12 -0
  74. package/benchmarks/projects/todo-mixed-ts-py/vitest.config.ts +5 -0
  75. package/benchmarks/projects/todo-python/README.md +3 -0
  76. package/benchmarks/projects/todo-python/src/__init__.py +4 -0
  77. package/benchmarks/projects/todo-python/src/task_service.py +32 -0
  78. package/benchmarks/projects/todo-python/src/task_store.py +28 -0
  79. package/benchmarks/projects/todo-python/tests/test_task_service.py +52 -0
  80. package/benchmarks/projects/todo-ts/README.md +3 -0
  81. package/benchmarks/projects/todo-ts/package.json +12 -0
  82. package/benchmarks/projects/todo-ts/src/index.ts +2 -0
  83. package/benchmarks/projects/todo-ts/src/taskService.ts +41 -0
  84. package/benchmarks/projects/todo-ts/src/taskStore.ts +34 -0
  85. package/benchmarks/projects/todo-ts/tests/taskService.test.ts +45 -0
  86. package/benchmarks/projects/todo-ts/tsconfig.json +12 -0
  87. package/benchmarks/projects/todo-ts/vitest.config.ts +5 -0
  88. package/dist/scripts/build-gallery.js +3 -0
  89. package/dist/scripts/capture-demo-report.js +3 -0
  90. package/dist/scripts/evaluate-token-savings.js +2 -0
  91. package/dist/scripts/experiments/describeExperiment.js +143 -0
  92. package/dist/scripts/experiments/listExperiments.js +44 -0
  93. package/dist/scripts/experiments/runExperiment.js +199 -0
  94. package/dist/scripts/generate-experiment-plots.js +3 -0
  95. package/dist/scripts/generate-prompt-variants.js +2 -0
  96. package/dist/scripts/render-experiment-report.js +2 -0
  97. package/dist/scripts/run-agent-prompt.js +2 -0
  98. package/dist/scripts/run-controlled-experiment.js +2 -0
  99. package/dist/scripts/run-final-demo.js +3 -0
  100. package/dist/scripts/run-lab-demo.js +5 -0
  101. package/dist/scripts/run-visualization-demos.js +3 -0
  102. package/dist/scripts/security/runCodeql.js +57 -0
  103. package/dist/scripts/security/runDependencyChecks.js +57 -0
  104. package/dist/scripts/security/runFuzzSmoke.js +29 -0
  105. package/dist/scripts/security/runPackageChecks.js +56 -0
  106. package/dist/scripts/security/runSemgrep.js +63 -0
  107. package/dist/scripts/security/validate.js +117 -0
  108. package/dist/scripts/verify-benchmarks.js +202 -0
  109. package/dist/src/agents/adapters/claudeAdapter.js +37 -0
  110. package/dist/src/agents/adapters/codexAdapter.js +110 -0
  111. package/dist/src/agents/adapters/fakeAgentAdapter.js +101 -0
  112. package/dist/src/agents/agentRegistry.js +21 -0
  113. package/dist/src/agents/index.js +7 -0
  114. package/dist/src/agents/parseAgentTokenUsage.js +137 -0
  115. package/dist/src/agents/runAgentPrompt.js +38 -0
  116. package/dist/src/agents/types.js +1 -0
  117. package/dist/src/commands/buildGalleryCommand.js +56 -0
  118. package/dist/src/commands/captureDemoReport.js +116 -0
  119. package/dist/src/commands/evaluateTokenSavings.js +175 -0
  120. package/dist/src/commands/generateExperimentPlotsCommand.js +38 -0
  121. package/dist/src/commands/generatePromptVariants.js +67 -0
  122. package/dist/src/commands/renderExperimentReportCommand.js +131 -0
  123. package/dist/src/commands/runAgentPromptCommand.js +132 -0
  124. package/dist/src/commands/runControlledExperimentCommand.js +174 -0
  125. package/dist/src/commands/runFinalDemoCommand.js +123 -0
  126. package/dist/src/commands/runLabDemo.js +62 -0
  127. package/dist/src/commands/runVisualizationDemosCommand.js +67 -0
  128. package/dist/src/core/commandLine.js +59 -0
  129. package/dist/src/core/countTokens.js +8 -0
  130. package/dist/src/core/fileGlobs.js +100 -0
  131. package/dist/src/core/localProjectTarget.js +75 -0
  132. package/dist/src/core/pathSafety.js +19 -0
  133. package/dist/src/core/pythonCommand.js +30 -0
  134. package/dist/src/core/resolveCommand.js +110 -0
  135. package/dist/src/core/runMeasuredCommand.js +143 -0
  136. package/dist/src/evaluation/benchmarkMetadata.js +207 -0
  137. package/dist/src/evaluation/buildExperimentMatrix.js +75 -0
  138. package/dist/src/evaluation/classifyAgentRunOutcome.js +40 -0
  139. package/dist/src/evaluation/compareExperimentRuns.js +79 -0
  140. package/dist/src/evaluation/compareTokenSavings.js +47 -0
  141. package/dist/src/evaluation/controlledExperimentTypes.js +1 -0
  142. package/dist/src/evaluation/index.js +18 -0
  143. package/dist/src/evaluation/parseAgentAnswer.js +230 -0
  144. package/dist/src/evaluation/projectComplexity.js +126 -0
  145. package/dist/src/evaluation/projectFileTree.js +83 -0
  146. package/dist/src/evaluation/readEvaluationCases.js +59 -0
  147. package/dist/src/evaluation/renderTokenSavingsReportInput.js +55 -0
  148. package/dist/src/evaluation/runControlledExperiment.js +158 -0
  149. package/dist/src/evaluation/runMyDevKitRetrieval.js +197 -0
  150. package/dist/src/evaluation/runRawFullFileBaseline.js +31 -0
  151. package/dist/src/evaluation/scoreCorrectness.js +127 -0
  152. package/dist/src/evaluation/types.js +1 -0
  153. package/dist/src/evaluation/writeExperimentArtifacts.js +104 -0
  154. package/dist/src/evaluation/writeTokenSavingsArtifacts.js +57 -0
  155. package/dist/src/experiments/config.js +24 -0
  156. package/dist/src/experiments/defaultRegistry.js +7 -0
  157. package/dist/src/experiments/errors.js +18 -0
  158. package/dist/src/experiments/index.js +9 -0
  159. package/dist/src/experiments/outputPaths.js +25 -0
  160. package/dist/src/experiments/plugins/contextStrategyComparison/config.js +37 -0
  161. package/dist/src/experiments/plugins/contextStrategyComparison/index.js +3 -0
  162. package/dist/src/experiments/plugins/contextStrategyComparison/plugin.js +83 -0
  163. package/dist/src/experiments/plugins/contextStrategyComparison/resultMapping.js +260 -0
  164. package/dist/src/experiments/plugins/index.js +1 -0
  165. package/dist/src/experiments/registry.js +43 -0
  166. package/dist/src/experiments/results.js +48 -0
  167. package/dist/src/experiments/runner.js +181 -0
  168. package/dist/src/experiments/target.js +8 -0
  169. package/dist/src/experiments/types.js +1 -0
  170. package/dist/src/gallery/index.js +2 -0
  171. package/dist/src/gallery/types.js +1 -0
  172. package/dist/src/gallery/writeGalleryManifest.js +214 -0
  173. package/dist/src/index.js +12 -0
  174. package/dist/src/plots/buildExperimentPlotData.js +137 -0
  175. package/dist/src/plots/index.js +4 -0
  176. package/dist/src/plots/renderSvgChart.js +82 -0
  177. package/dist/src/plots/types.js +1 -0
  178. package/dist/src/plots/writePlotArtifacts.js +46 -0
  179. package/dist/src/prompts/buildPromptContext.js +68 -0
  180. package/dist/src/prompts/generateMyDevKitPrompt.js +106 -0
  181. package/dist/src/prompts/generatePromptVariants.js +36 -0
  182. package/dist/src/prompts/generateRawFullFilePrompt.js +97 -0
  183. package/dist/src/prompts/index.js +7 -0
  184. package/dist/src/prompts/measurePromptComplexity.js +41 -0
  185. package/dist/src/prompts/types.js +1 -0
  186. package/dist/src/prompts/writePromptArtifacts.js +43 -0
  187. package/dist/src/report/buildExperimentReportInput.js +339 -0
  188. package/dist/src/report/experimentReportTypes.js +1 -0
  189. package/dist/src/report/experiments/buildPluginExperimentReport.js +153 -0
  190. package/dist/src/report/experiments/experimentReportModel.js +1 -0
  191. package/dist/src/report/experiments/index.js +4 -0
  192. package/dist/src/report/experiments/renderPluginExperimentReportHtml.js +133 -0
  193. package/dist/src/report/experiments/writePluginExperimentReports.js +30 -0
  194. package/dist/src/report/index.js +8 -0
  195. package/dist/src/report/renderExperimentHtmlReport.js +354 -0
  196. package/dist/src/report/renderHtmlReport.js +103 -0
  197. package/dist/src/report/types.js +10 -0
  198. package/dist/src/report/writeExperimentReportArtifacts.js +38 -0
  199. package/dist/src/report/writeReportArtifacts.js +39 -0
  200. package/dist/src/screenshot/captureReportScreenshot.js +75 -0
  201. package/dist/src/screenshot/index.js +2 -0
  202. package/dist/src/screenshot/types.js +1 -0
  203. package/dist/src/securityValidation/artifacts.js +15 -0
  204. package/dist/src/securityValidation/cliAdversarial/adversarialCliConfig.js +38 -0
  205. package/dist/src/securityValidation/cliAdversarial/dataVolumeChecks.js +194 -0
  206. package/dist/src/securityValidation/cliAdversarial/jsonStdoutChecks.js +359 -0
  207. package/dist/src/securityValidation/cliAdversarial/malformedArtifactChecks.js +284 -0
  208. package/dist/src/securityValidation/cliAdversarial/malformedArtifactFixtures.js +79 -0
  209. package/dist/src/securityValidation/cliAdversarial/pathBoundaryChecks.js +431 -0
  210. package/dist/src/securityValidation/cliAdversarial/pathCases.js +144 -0
  211. package/dist/src/securityValidation/cliAdversarial/readOnlyBoundaryChecks.js +294 -0
  212. package/dist/src/securityValidation/cliAdversarial/runAdversarialCheck.js +149 -0
  213. package/dist/src/securityValidation/cliAdversarial/subprocessSafetyChecks.js +214 -0
  214. package/dist/src/securityValidation/cliAdversarial/tempWorkspace.js +160 -0
  215. package/dist/src/securityValidation/commandRunner.js +136 -0
  216. package/dist/src/securityValidation/config.js +39 -0
  217. package/dist/src/securityValidation/dependencies/parseNpmAudit.js +115 -0
  218. package/dist/src/securityValidation/dependencies/parseNpmLs.js +71 -0
  219. package/dist/src/securityValidation/dependencies/parseNpmOutdated.js +41 -0
  220. package/dist/src/securityValidation/dependencies/runDependencyChecks.js +239 -0
  221. package/dist/src/securityValidation/dependencies/runOsvScanner.js +43 -0
  222. package/dist/src/securityValidation/fuzz/fuzzHarness.js +61 -0
  223. package/dist/src/securityValidation/fuzz/fuzzTargets.js +204 -0
  224. package/dist/src/securityValidation/fuzz/randomInput.js +0 -0
  225. package/dist/src/securityValidation/index.js +34 -0
  226. package/dist/src/securityValidation/packageChecks/forbiddenPackageContents.js +67 -0
  227. package/dist/src/securityValidation/packageChecks/parseNpmPackDryRun.js +56 -0
  228. package/dist/src/securityValidation/packageChecks/runPackageChecks.js +88 -0
  229. package/dist/src/securityValidation/report/renderSecurityReport.js +248 -0
  230. package/dist/src/securityValidation/report/securityReportTypes.js +1 -0
  231. package/dist/src/securityValidation/staticScans/codeql.js +66 -0
  232. package/dist/src/securityValidation/staticScans/semgrep.js +180 -0
  233. package/dist/src/securityValidation/testMatrix.js +535 -0
  234. package/dist/src/securityValidation/types.js +34 -0
  235. package/dist/src/securityValidation/validate/resolveTarget.js +32 -0
  236. package/dist/src/securityValidation/validate/runSecurityValidation.js +169 -0
  237. package/dist/src/securityValidation/validate/verdict.js +73 -0
  238. package/dist/src/visualizationDemos/buildMyDevKitVisualizationCommands.js +59 -0
  239. package/dist/src/visualizationDemos/index.js +4 -0
  240. package/dist/src/visualizationDemos/runVisualizationDemos.js +82 -0
  241. package/dist/src/visualizationDemos/types.js +1 -0
  242. package/dist/src/visualizationDemos/writeVisualizationDemoArtifacts.js +25 -0
  243. package/docs/METRICS.md +286 -0
  244. package/examples/demo-report-input.json +78 -0
  245. package/examples/lab-demo-cases.json +35 -0
  246. package/examples/real-agent-campaign-cases.json +118 -0
  247. package/examples/token-savings-cases.json +122 -0
  248. package/package.json +91 -0
  249. package/tests/fixtures/fake-adversarial-cli.js +152 -0
  250. package/tests/fixtures/fake-my-dev-kit-cli.js +83 -0
@@ -0,0 +1,431 @@
1
+ import { spawn } from "node:child_process";
2
+ import path from "node:path";
3
+ import { buildCliCommand } from "./adversarialCliConfig.js";
4
+ import { makeFinding, runAdversarialCheck, skippedCheck } from "./runAdversarialCheck.js";
5
+ import { createTempWorkspace, diffSnapshots, snapshotDir } from "./tempWorkspace.js";
6
+ // ---------------------------------------------------------------------------
7
+ // Path traversal checks
8
+ // ---------------------------------------------------------------------------
9
+ /**
10
+ * Checks that a path traversal input to --root does not allow reads or writes
11
+ * outside the declared workspace boundary.
12
+ *
13
+ * The check runs the fake (or real) CLI with a traversal path as --root and
14
+ * verifies no files escaped the temp workspace.
15
+ */
16
+ export async function checkRootPathTraversal(target, testInput) {
17
+ const workspace = createTempWorkspace("p4-root-");
18
+ try {
19
+ const beforeOutside = snapshotDir(workspace.outsideDir);
20
+ const { command, args } = buildCliCommand(target, [
21
+ "--root",
22
+ testInput.value,
23
+ "--out",
24
+ workspace.outputDir,
25
+ ]);
26
+ const result = await runAdversarialCheck({
27
+ id: `path-traversal-root-${testInput.id}`,
28
+ name: `Path traversal via --root: ${testInput.description}`,
29
+ category: "cli-adversarial",
30
+ severity: "blocker",
31
+ command,
32
+ args,
33
+ cwd: workspace.root,
34
+ timeoutMs: target.timeoutMs,
35
+ evaluate: (cmdResult) => {
36
+ const findings = [];
37
+ // Check 1: No files appeared in the outside sentinel dir.
38
+ const afterOutside = snapshotDir(workspace.outsideDir);
39
+ const outsideDiff = diffSnapshots(beforeOutside, afterOutside);
40
+ if (outsideDiff.added.length > 0 || outsideDiff.modified.length > 0) {
41
+ findings.push(makeFinding({
42
+ id: `path-traversal-root-${testInput.id}-escape`,
43
+ title: "Write escaped workspace via --root traversal",
44
+ severity: "blocker",
45
+ category: "cli-adversarial",
46
+ description: `CLI wrote files outside workspace: ${outsideDiff.added.join(", ")}`,
47
+ evidence: `command: ${[cmdResult.command, ...cmdResult.args].join(" ")}`,
48
+ recommendation: "Validate --root argument against an allowed prefix before use.",
49
+ }));
50
+ }
51
+ // Spawn error is informational only — traversal to a non-existent path is expected.
52
+ return findings;
53
+ },
54
+ });
55
+ return result;
56
+ }
57
+ finally {
58
+ await workspace.cleanup();
59
+ }
60
+ }
61
+ /**
62
+ * Checks that a path traversal input to --out does not write outside the workspace.
63
+ */
64
+ export async function checkOutPathTraversal(target, testInput) {
65
+ const workspace = createTempWorkspace("p4-out-");
66
+ try {
67
+ const beforeOutside = snapshotDir(workspace.outsideDir);
68
+ // Build an escaped --out path that points outside the workspace.
69
+ // We use the actual outsideDir path so we can check for writes there.
70
+ const escapedOut = testInput.value.startsWith("/") || /^[A-Za-z]:\\/.test(testInput.value)
71
+ ? testInput.value // absolute path given in test input
72
+ : path.resolve(workspace.outputDir, testInput.value); // resolve relative from output
73
+ const { command, args } = buildCliCommand(target, [
74
+ "--root",
75
+ workspace.sourceDir,
76
+ "--out",
77
+ escapedOut,
78
+ ]);
79
+ const result = await runAdversarialCheck({
80
+ id: `path-traversal-out-${testInput.id}`,
81
+ name: `Path traversal via --out: ${testInput.description}`,
82
+ category: "cli-adversarial",
83
+ severity: "blocker",
84
+ command,
85
+ args,
86
+ cwd: workspace.root,
87
+ timeoutMs: target.timeoutMs,
88
+ evaluate: (cmdResult) => {
89
+ const findings = [];
90
+ const afterOutside = snapshotDir(workspace.outsideDir);
91
+ const outsideDiff = diffSnapshots(beforeOutside, afterOutside);
92
+ if (outsideDiff.added.length > 0 || outsideDiff.modified.length > 0) {
93
+ findings.push(makeFinding({
94
+ id: `path-traversal-out-${testInput.id}-escape`,
95
+ title: "Write escaped workspace via --out traversal",
96
+ severity: "blocker",
97
+ category: "cli-adversarial",
98
+ description: `CLI wrote outside workspace: ${outsideDiff.added.join(", ")}`,
99
+ evidence: `--out value: ${escapedOut}, stdout: ${cmdResult.stdout.slice(0, 200)}`,
100
+ recommendation: "Validate --out argument is within an allowed prefix before writing.",
101
+ }));
102
+ }
103
+ return findings;
104
+ },
105
+ });
106
+ return result;
107
+ }
108
+ finally {
109
+ await workspace.cleanup();
110
+ }
111
+ }
112
+ /**
113
+ * Checks that a path traversal input to --index does not allow writes outside workspace.
114
+ */
115
+ export async function checkIndexPathTraversal(target, testInput) {
116
+ const workspace = createTempWorkspace("p4-idx-");
117
+ try {
118
+ const beforeOutside = snapshotDir(workspace.outsideDir);
119
+ const escapedIndex = testInput.value.startsWith("/") || /^[A-Za-z]:\\/.test(testInput.value)
120
+ ? testInput.value
121
+ : path.resolve(workspace.indexDir, testInput.value);
122
+ const { command, args } = buildCliCommand(target, [
123
+ "--root",
124
+ workspace.sourceDir,
125
+ "--index",
126
+ escapedIndex,
127
+ ]);
128
+ const result = await runAdversarialCheck({
129
+ id: `path-traversal-index-${testInput.id}`,
130
+ name: `Path traversal via --index: ${testInput.description}`,
131
+ category: "cli-adversarial",
132
+ severity: "blocker",
133
+ command,
134
+ args,
135
+ cwd: workspace.root,
136
+ timeoutMs: target.timeoutMs,
137
+ evaluate: (cmdResult) => {
138
+ const findings = [];
139
+ const afterOutside = snapshotDir(workspace.outsideDir);
140
+ const outsideDiff = diffSnapshots(beforeOutside, afterOutside);
141
+ if (outsideDiff.added.length > 0 || outsideDiff.modified.length > 0) {
142
+ findings.push(makeFinding({
143
+ id: `path-traversal-index-${testInput.id}-escape`,
144
+ title: "Write escaped workspace via --index traversal",
145
+ severity: "blocker",
146
+ category: "cli-adversarial",
147
+ description: `CLI wrote outside workspace: ${outsideDiff.added.join(", ")}`,
148
+ evidence: `--index value: ${escapedIndex}, stdout: ${cmdResult.stdout.slice(0, 200)}`,
149
+ recommendation: "Validate --index argument is within an allowed prefix.",
150
+ }));
151
+ }
152
+ return findings;
153
+ },
154
+ });
155
+ return result;
156
+ }
157
+ finally {
158
+ await workspace.cleanup();
159
+ }
160
+ }
161
+ // ---------------------------------------------------------------------------
162
+ // Safe path handling checks
163
+ // ---------------------------------------------------------------------------
164
+ /**
165
+ * Runs the CLI with a path containing spaces and verifies it succeeds.
166
+ * Safe paths with spaces must not cause crashes or spawn failures.
167
+ */
168
+ export async function checkPathWithSpaces(target) {
169
+ const workspace = createTempWorkspace("p4-spaces-");
170
+ try {
171
+ // Create a subdirectory with a space in its name.
172
+ const spacedOut = path.join(workspace.outputDir, "output with spaces");
173
+ const { command, args } = buildCliCommand(target, [
174
+ "--root",
175
+ workspace.sourceDir,
176
+ "--out",
177
+ spacedOut,
178
+ ]);
179
+ return await runAdversarialCheck({
180
+ id: "path-with-spaces",
181
+ name: "Paths with spaces are handled as literal strings",
182
+ category: "cli-adversarial",
183
+ severity: "major",
184
+ command,
185
+ args,
186
+ cwd: workspace.root,
187
+ timeoutMs: target.timeoutMs,
188
+ evaluate: (cmdResult) => {
189
+ if (cmdResult.spawnError) {
190
+ return [
191
+ makeFinding({
192
+ id: "path-spaces-spawn-error",
193
+ title: "CLI spawn failed for path with spaces",
194
+ severity: "major",
195
+ category: "cli-adversarial",
196
+ description: `Spawn error: ${cmdResult.spawnError}`,
197
+ recommendation: "Ensure CLI invocation uses argument arrays, not shell strings.",
198
+ }),
199
+ ];
200
+ }
201
+ // Non-zero exit for a path-with-spaces input is a finding.
202
+ if (cmdResult.exitCode !== 0 && !cmdResult.timedOut) {
203
+ return [
204
+ makeFinding({
205
+ id: "path-spaces-exit-nonzero",
206
+ title: "CLI exited non-zero for path with spaces",
207
+ severity: "major",
208
+ category: "cli-adversarial",
209
+ description: `Exit code: ${cmdResult.exitCode}, stderr: ${cmdResult.stderr.slice(0, 300)}`,
210
+ recommendation: "Spaces in paths must be handled as literal characters.",
211
+ }),
212
+ ];
213
+ }
214
+ return [];
215
+ },
216
+ });
217
+ }
218
+ finally {
219
+ await workspace.cleanup();
220
+ }
221
+ }
222
+ /**
223
+ * Checks that the CLI handles Unicode in output path correctly (no crash).
224
+ */
225
+ export async function checkUnicodePath(target, unicodeSubdir) {
226
+ const workspace = createTempWorkspace("p4-unicode-");
227
+ try {
228
+ const unicodeOut = path.join(workspace.outputDir, unicodeSubdir);
229
+ const { command, args } = buildCliCommand(target, [
230
+ "--root",
231
+ workspace.sourceDir,
232
+ "--out",
233
+ unicodeOut,
234
+ ]);
235
+ return await runAdversarialCheck({
236
+ id: `unicode-path-${unicodeSubdir.replace(/[^a-z0-9]/gi, "-")}`,
237
+ name: `Unicode path handling: "${unicodeSubdir}"`,
238
+ category: "cli-adversarial",
239
+ severity: "minor",
240
+ command,
241
+ args,
242
+ cwd: workspace.root,
243
+ timeoutMs: target.timeoutMs,
244
+ evaluate: (cmdResult) => {
245
+ if (cmdResult.spawnError) {
246
+ return [
247
+ makeFinding({
248
+ id: `unicode-spawn-error-${unicodeSubdir}`,
249
+ title: "CLI spawn failed for Unicode path",
250
+ severity: "minor",
251
+ category: "cli-adversarial",
252
+ description: `Spawn error with Unicode path "${unicodeSubdir}": ${cmdResult.spawnError}`,
253
+ recommendation: "Ensure CLI handles Unicode paths without platform-specific crashes.",
254
+ }),
255
+ ];
256
+ }
257
+ return [];
258
+ },
259
+ });
260
+ }
261
+ finally {
262
+ await workspace.cleanup();
263
+ }
264
+ }
265
+ /**
266
+ * Checks that the CLI handles an absolute path that stays within
267
+ * the temp workspace correctly (safe absolute paths should work).
268
+ */
269
+ export async function checkSafeAbsolutePath(target) {
270
+ const workspace = createTempWorkspace("p4-abs-");
271
+ try {
272
+ // Use the outputDir by its absolute path (should be accepted).
273
+ const { command, args } = buildCliCommand(target, [
274
+ "--root",
275
+ path.resolve(workspace.sourceDir),
276
+ "--out",
277
+ path.resolve(workspace.outputDir),
278
+ ]);
279
+ return await runAdversarialCheck({
280
+ id: "safe-absolute-path",
281
+ name: "Safe absolute path within workspace is accepted",
282
+ category: "cli-adversarial",
283
+ severity: "informational",
284
+ command,
285
+ args,
286
+ cwd: workspace.root,
287
+ timeoutMs: target.timeoutMs,
288
+ evaluate: (cmdResult) => {
289
+ if (cmdResult.spawnError) {
290
+ return [
291
+ makeFinding({
292
+ id: "safe-abs-spawn-error",
293
+ title: "CLI spawn failed for safe absolute path",
294
+ severity: "minor",
295
+ category: "cli-adversarial",
296
+ description: `Spawn error: ${cmdResult.spawnError}`,
297
+ recommendation: "Investigate CLI invocation for absolute path handling.",
298
+ }),
299
+ ];
300
+ }
301
+ return [];
302
+ },
303
+ });
304
+ }
305
+ finally {
306
+ await workspace.cleanup();
307
+ }
308
+ }
309
+ // ---------------------------------------------------------------------------
310
+ // Harness escape detection (infrastructure test)
311
+ // ---------------------------------------------------------------------------
312
+ /**
313
+ * Uses fake-adversarial-cli --escape-to to verify the harness correctly
314
+ * detects writes that land outside the declared output directory.
315
+ *
316
+ * This is a harness SELF-TEST, not a test of the real CLI.
317
+ * Only runs against the fake CLI target.
318
+ */
319
+ export async function checkHarnessEscapeDetection(target) {
320
+ if (target.isRealTarget) {
321
+ return skippedCheck({
322
+ id: "harness-escape-detection",
323
+ name: "Harness escape detection self-test",
324
+ category: "cli-adversarial",
325
+ reason: "Escape detection self-test only runs against the fake CLI fixture.",
326
+ });
327
+ }
328
+ const workspace = createTempWorkspace("p4-escape-");
329
+ try {
330
+ const beforeOutside = snapshotDir(workspace.outsideDir);
331
+ // Run fake CLI with --escape-to pointing to outsideDir.
332
+ const { command, args } = buildCliCommand(target, [
333
+ "--root",
334
+ workspace.sourceDir,
335
+ "--out",
336
+ workspace.outputDir,
337
+ "--escape-to",
338
+ workspace.outsideDir,
339
+ ]);
340
+ const cmdResult = await runAndCaptureRaw(command, args, workspace.root, target.timeoutMs);
341
+ const afterOutside = snapshotDir(workspace.outsideDir);
342
+ const outsideDiff = diffSnapshots(beforeOutside, afterOutside);
343
+ // We EXPECT the escape to be detected (fake CLI wrote there intentionally).
344
+ const now = new Date().toISOString();
345
+ if (outsideDiff.added.length > 0 || outsideDiff.modified.length > 0) {
346
+ // Detection works correctly: the harness saw the escape.
347
+ return {
348
+ id: "harness-escape-detection",
349
+ name: "Harness escape detection self-test",
350
+ category: "cli-adversarial",
351
+ status: "passed",
352
+ severity: "informational",
353
+ startedAt: now,
354
+ finishedAt: now,
355
+ durationMs: cmdResult.durationMs,
356
+ findings: [],
357
+ };
358
+ }
359
+ else {
360
+ // Escape not detected — harness detection infrastructure is broken.
361
+ return {
362
+ id: "harness-escape-detection",
363
+ name: "Harness escape detection self-test",
364
+ category: "cli-adversarial",
365
+ status: "failed",
366
+ severity: "major",
367
+ startedAt: now,
368
+ finishedAt: now,
369
+ durationMs: cmdResult.durationMs,
370
+ findings: [
371
+ makeFinding({
372
+ id: "harness-detection-broken",
373
+ title: "Harness escape detection did not detect expected write",
374
+ severity: "major",
375
+ category: "cli-adversarial",
376
+ description: "The fake CLI wrote to outsideDir but the harness did not detect it. " +
377
+ "The file boundary detection mechanism may be broken.",
378
+ recommendation: "Inspect snapshotDir and diffSnapshots logic.",
379
+ }),
380
+ ],
381
+ };
382
+ }
383
+ }
384
+ finally {
385
+ await workspace.cleanup();
386
+ }
387
+ }
388
+ async function runAndCaptureRaw(command, args, cwd, timeoutMs) {
389
+ const started = Date.now();
390
+ return new Promise((resolve) => {
391
+ let settled = false;
392
+ let timeout;
393
+ let child;
394
+ try {
395
+ child = spawn(command, args, {
396
+ cwd,
397
+ shell: false,
398
+ stdio: ["ignore", "ignore", "ignore"],
399
+ env: { ...process.env },
400
+ });
401
+ }
402
+ catch {
403
+ resolve({ durationMs: Date.now() - started });
404
+ return;
405
+ }
406
+ if (timeoutMs > 0) {
407
+ timeout = setTimeout(() => {
408
+ try {
409
+ child.kill();
410
+ }
411
+ catch { /* ignore */ }
412
+ }, timeoutMs);
413
+ }
414
+ child.on("close", () => {
415
+ if (settled)
416
+ return;
417
+ settled = true;
418
+ if (timeout)
419
+ clearTimeout(timeout);
420
+ resolve({ durationMs: Date.now() - started });
421
+ });
422
+ child.on("error", () => {
423
+ if (settled)
424
+ return;
425
+ settled = true;
426
+ if (timeout)
427
+ clearTimeout(timeout);
428
+ resolve({ durationMs: Date.now() - started });
429
+ });
430
+ });
431
+ }
@@ -0,0 +1,144 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Adversarial path input definitions for CLI harness tests.
3
+ // These represent the attack surface entries from the security test matrix.
4
+ // ---------------------------------------------------------------------------
5
+ // ---------------------------------------------------------------------------
6
+ // Path traversal cases
7
+ // ---------------------------------------------------------------------------
8
+ export const PATH_TRAVERSAL_CASES = [
9
+ {
10
+ id: "traversal-dotdot-unix",
11
+ description: "Unix-style ../.. traversal",
12
+ value: "../../package.json",
13
+ category: "traversal-relative",
14
+ expectedRejection: true,
15
+ },
16
+ {
17
+ id: "traversal-dotdot-windows",
18
+ description: "Windows-style ..\\\\..\\ traversal",
19
+ value: "..\\..\\package.json",
20
+ category: "traversal-relative",
21
+ expectedRejection: true,
22
+ },
23
+ {
24
+ id: "traversal-many-levels",
25
+ description: "Many levels of traversal",
26
+ value: "../../../../../../../etc/passwd",
27
+ category: "traversal-relative",
28
+ expectedRejection: true,
29
+ },
30
+ ];
31
+ // ---------------------------------------------------------------------------
32
+ // Absolute path cases
33
+ // ---------------------------------------------------------------------------
34
+ export const ABSOLUTE_PATH_CASES = [
35
+ {
36
+ id: "absolute-unix-etc",
37
+ description: "Absolute Unix system path",
38
+ value: "/etc/hosts",
39
+ category: "traversal-absolute",
40
+ expectedRejection: true,
41
+ },
42
+ {
43
+ id: "absolute-unix-tmp",
44
+ description: "Absolute Unix tmp path that may be different from workspace",
45
+ value: "/tmp/escape-test-absolute",
46
+ category: "traversal-absolute",
47
+ expectedRejection: false, // May or may not be rejected depending on CLI
48
+ },
49
+ {
50
+ id: "absolute-windows-system",
51
+ description: "Absolute Windows system path",
52
+ value: "C:\\Windows\\System32",
53
+ category: "traversal-absolute",
54
+ expectedRejection: true,
55
+ },
56
+ ];
57
+ // ---------------------------------------------------------------------------
58
+ // Safe path cases (should be handled correctly, not rejected)
59
+ // ---------------------------------------------------------------------------
60
+ export const SPACES_PATH_CASES = [
61
+ {
62
+ id: "spaces-simple",
63
+ description: "Path with a space",
64
+ value: "my source dir",
65
+ category: "spaces",
66
+ expectedRejection: false,
67
+ },
68
+ {
69
+ id: "spaces-multiple",
70
+ description: "Path with multiple spaces",
71
+ value: "my dev kit lab output",
72
+ category: "spaces",
73
+ expectedRejection: false,
74
+ },
75
+ ];
76
+ export const METACHAR_PATH_CASES = [
77
+ {
78
+ id: "metachar-semicolon",
79
+ description: "Path with semicolon",
80
+ value: "output;rm-rf",
81
+ category: "metacharacters",
82
+ expectedRejection: false, // Should be treated as literal
83
+ },
84
+ {
85
+ id: "metachar-quote",
86
+ description: "Path with double quote",
87
+ value: 'out"put',
88
+ category: "metacharacters",
89
+ expectedRejection: false,
90
+ },
91
+ {
92
+ id: "metachar-ampersand",
93
+ description: "Path with ampersand",
94
+ value: "out&put",
95
+ category: "metacharacters",
96
+ expectedRejection: false,
97
+ },
98
+ ];
99
+ export const UNICODE_PATH_CASES = [
100
+ {
101
+ id: "unicode-latin",
102
+ description: "Path with accented Latin characters",
103
+ value: "résultats",
104
+ category: "unicode",
105
+ expectedRejection: false,
106
+ },
107
+ {
108
+ id: "unicode-cjk",
109
+ description: "Path with CJK characters",
110
+ value: "出力",
111
+ category: "unicode",
112
+ expectedRejection: false,
113
+ },
114
+ ];
115
+ export const LONG_NAME_CASES = [
116
+ {
117
+ id: "long-name-200",
118
+ description: "200-character directory name (within OS limits on most systems)",
119
+ value: "a".repeat(200),
120
+ category: "long-name",
121
+ expectedRejection: false,
122
+ },
123
+ ];
124
+ export const MISSING_PATH_CASES = [
125
+ {
126
+ id: "missing-dir",
127
+ description: "Non-existent directory",
128
+ value: "definitely-not-a-real-directory-xyz-123",
129
+ category: "missing",
130
+ expectedRejection: false, // CLI may create it or reject; harness checks results
131
+ },
132
+ ];
133
+ // ---------------------------------------------------------------------------
134
+ // All cases grouped by harness usage
135
+ // ---------------------------------------------------------------------------
136
+ export const ALL_PATH_TEST_INPUTS = [
137
+ ...PATH_TRAVERSAL_CASES,
138
+ ...ABSOLUTE_PATH_CASES,
139
+ ...SPACES_PATH_CASES,
140
+ ...METACHAR_PATH_CASES,
141
+ ...UNICODE_PATH_CASES,
142
+ ...LONG_NAME_CASES,
143
+ ...MISSING_PATH_CASES,
144
+ ];