@dailephd/my-dev-kit-lab 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +272 -0
  2. package/benchmarks/contracts/benchmark-project-profiles.json +1199 -0
  3. package/benchmarks/contracts/todo-behavior.md +70 -0
  4. package/benchmarks/contracts/todo-benchmark-case.json +227 -0
  5. package/benchmarks/projects/README.md +34 -0
  6. package/benchmarks/projects/task-analytics-large-mixed/README.md +1 -0
  7. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/__init__.py +3 -0
  8. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/fixtures.py +6 -0
  9. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/metrics.py +29 -0
  10. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/models.py +21 -0
  11. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/parser.py +16 -0
  12. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/pipeline.py +9 -0
  13. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/quality.py +8 -0
  14. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/reporting.py +11 -0
  15. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_metrics.py +19 -0
  16. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_parser.py +15 -0
  17. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_quality.py +19 -0
  18. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_reporting.py +15 -0
  19. package/benchmarks/projects/task-analytics-large-mixed/ts/package.json +12 -0
  20. package/benchmarks/projects/task-analytics-large-mixed/ts/src/index.ts +11 -0
  21. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/analyticsSnapshot.ts +20 -0
  22. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/project.ts +5 -0
  23. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/task.ts +10 -0
  24. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/buildProjectLeaderboard.ts +7 -0
  25. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/formatTaskHealthReport.ts +13 -0
  26. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/buildAnalyticsSnapshot.ts +39 -0
  27. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/completeTask.ts +10 -0
  28. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/createTask.ts +21 -0
  29. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/listTasksByProject.ts +6 -0
  30. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/projectStore.ts +20 -0
  31. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/taskStore.ts +44 -0
  32. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/projectValidation.ts +12 -0
  33. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/taskValidation.ts +18 -0
  34. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/buildAnalyticsSnapshot.test.ts +48 -0
  35. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/completeTask.test.ts +21 -0
  36. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/createTask.test.ts +31 -0
  37. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/listTasksByProject.test.ts +18 -0
  38. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/reporting.test.ts +19 -0
  39. package/benchmarks/projects/task-analytics-large-mixed/ts/tsconfig.json +12 -0
  40. package/benchmarks/projects/task-analytics-large-mixed/ts/vitest.config.ts +5 -0
  41. package/benchmarks/projects/task-workflow-medium-ts/README.md +1 -0
  42. package/benchmarks/projects/task-workflow-medium-ts/package.json +12 -0
  43. package/benchmarks/projects/task-workflow-medium-ts/src/index.ts +9 -0
  44. package/benchmarks/projects/task-workflow-medium-ts/src/models/project.ts +6 -0
  45. package/benchmarks/projects/task-workflow-medium-ts/src/models/task.ts +39 -0
  46. package/benchmarks/projects/task-workflow-medium-ts/src/services/completeTask.ts +15 -0
  47. package/benchmarks/projects/task-workflow-medium-ts/src/services/createTask.ts +26 -0
  48. package/benchmarks/projects/task-workflow-medium-ts/src/services/filterTasks.ts +17 -0
  49. package/benchmarks/projects/task-workflow-medium-ts/src/services/importTasks.ts +33 -0
  50. package/benchmarks/projects/task-workflow-medium-ts/src/services/summarizeTasks.ts +30 -0
  51. package/benchmarks/projects/task-workflow-medium-ts/src/store/taskStore.ts +76 -0
  52. package/benchmarks/projects/task-workflow-medium-ts/src/utils/deterministicId.ts +3 -0
  53. package/benchmarks/projects/task-workflow-medium-ts/src/validation/taskValidation.ts +45 -0
  54. package/benchmarks/projects/task-workflow-medium-ts/tests/completeTask.test.ts +16 -0
  55. package/benchmarks/projects/task-workflow-medium-ts/tests/createTask.test.ts +21 -0
  56. package/benchmarks/projects/task-workflow-medium-ts/tests/filterTasks.test.ts +18 -0
  57. package/benchmarks/projects/task-workflow-medium-ts/tests/importTasks.test.ts +22 -0
  58. package/benchmarks/projects/task-workflow-medium-ts/tests/summarizeTasks.test.ts +29 -0
  59. package/benchmarks/projects/task-workflow-medium-ts/tsconfig.json +12 -0
  60. package/benchmarks/projects/task-workflow-medium-ts/vitest.config.ts +5 -0
  61. package/benchmarks/projects/todo-js/README.md +3 -0
  62. package/benchmarks/projects/todo-js/package.json +11 -0
  63. package/benchmarks/projects/todo-js/src/index.js +2 -0
  64. package/benchmarks/projects/todo-js/src/taskService.js +37 -0
  65. package/benchmarks/projects/todo-js/src/taskStore.js +28 -0
  66. package/benchmarks/projects/todo-js/tests/taskService.test.js +45 -0
  67. package/benchmarks/projects/todo-js/vitest.config.js +5 -0
  68. package/benchmarks/projects/todo-mixed-ts-py/README.md +3 -0
  69. package/benchmarks/projects/todo-mixed-ts-py/package.json +13 -0
  70. package/benchmarks/projects/todo-mixed-ts-py/python/task_service.py +76 -0
  71. package/benchmarks/projects/todo-mixed-ts-py/src/taskCli.ts +38 -0
  72. package/benchmarks/projects/todo-mixed-ts-py/tests/mixedBoundary.test.ts +18 -0
  73. package/benchmarks/projects/todo-mixed-ts-py/tsconfig.json +12 -0
  74. package/benchmarks/projects/todo-mixed-ts-py/vitest.config.ts +5 -0
  75. package/benchmarks/projects/todo-python/README.md +3 -0
  76. package/benchmarks/projects/todo-python/src/__init__.py +4 -0
  77. package/benchmarks/projects/todo-python/src/task_service.py +32 -0
  78. package/benchmarks/projects/todo-python/src/task_store.py +28 -0
  79. package/benchmarks/projects/todo-python/tests/test_task_service.py +52 -0
  80. package/benchmarks/projects/todo-ts/README.md +3 -0
  81. package/benchmarks/projects/todo-ts/package.json +12 -0
  82. package/benchmarks/projects/todo-ts/src/index.ts +2 -0
  83. package/benchmarks/projects/todo-ts/src/taskService.ts +41 -0
  84. package/benchmarks/projects/todo-ts/src/taskStore.ts +34 -0
  85. package/benchmarks/projects/todo-ts/tests/taskService.test.ts +45 -0
  86. package/benchmarks/projects/todo-ts/tsconfig.json +12 -0
  87. package/benchmarks/projects/todo-ts/vitest.config.ts +5 -0
  88. package/dist/scripts/build-gallery.js +3 -0
  89. package/dist/scripts/capture-demo-report.js +3 -0
  90. package/dist/scripts/evaluate-token-savings.js +2 -0
  91. package/dist/scripts/experiments/describeExperiment.js +143 -0
  92. package/dist/scripts/experiments/listExperiments.js +44 -0
  93. package/dist/scripts/experiments/runExperiment.js +199 -0
  94. package/dist/scripts/generate-experiment-plots.js +3 -0
  95. package/dist/scripts/generate-prompt-variants.js +2 -0
  96. package/dist/scripts/render-experiment-report.js +2 -0
  97. package/dist/scripts/run-agent-prompt.js +2 -0
  98. package/dist/scripts/run-controlled-experiment.js +2 -0
  99. package/dist/scripts/run-final-demo.js +3 -0
  100. package/dist/scripts/run-lab-demo.js +5 -0
  101. package/dist/scripts/run-visualization-demos.js +3 -0
  102. package/dist/scripts/security/runCodeql.js +57 -0
  103. package/dist/scripts/security/runDependencyChecks.js +57 -0
  104. package/dist/scripts/security/runFuzzSmoke.js +29 -0
  105. package/dist/scripts/security/runPackageChecks.js +56 -0
  106. package/dist/scripts/security/runSemgrep.js +63 -0
  107. package/dist/scripts/security/validate.js +117 -0
  108. package/dist/scripts/verify-benchmarks.js +202 -0
  109. package/dist/src/agents/adapters/claudeAdapter.js +37 -0
  110. package/dist/src/agents/adapters/codexAdapter.js +110 -0
  111. package/dist/src/agents/adapters/fakeAgentAdapter.js +101 -0
  112. package/dist/src/agents/agentRegistry.js +21 -0
  113. package/dist/src/agents/index.js +7 -0
  114. package/dist/src/agents/parseAgentTokenUsage.js +137 -0
  115. package/dist/src/agents/runAgentPrompt.js +38 -0
  116. package/dist/src/agents/types.js +1 -0
  117. package/dist/src/commands/buildGalleryCommand.js +56 -0
  118. package/dist/src/commands/captureDemoReport.js +116 -0
  119. package/dist/src/commands/evaluateTokenSavings.js +175 -0
  120. package/dist/src/commands/generateExperimentPlotsCommand.js +38 -0
  121. package/dist/src/commands/generatePromptVariants.js +67 -0
  122. package/dist/src/commands/renderExperimentReportCommand.js +131 -0
  123. package/dist/src/commands/runAgentPromptCommand.js +132 -0
  124. package/dist/src/commands/runControlledExperimentCommand.js +174 -0
  125. package/dist/src/commands/runFinalDemoCommand.js +123 -0
  126. package/dist/src/commands/runLabDemo.js +62 -0
  127. package/dist/src/commands/runVisualizationDemosCommand.js +67 -0
  128. package/dist/src/core/commandLine.js +59 -0
  129. package/dist/src/core/countTokens.js +8 -0
  130. package/dist/src/core/fileGlobs.js +100 -0
  131. package/dist/src/core/localProjectTarget.js +75 -0
  132. package/dist/src/core/pathSafety.js +19 -0
  133. package/dist/src/core/pythonCommand.js +30 -0
  134. package/dist/src/core/resolveCommand.js +110 -0
  135. package/dist/src/core/runMeasuredCommand.js +143 -0
  136. package/dist/src/evaluation/benchmarkMetadata.js +207 -0
  137. package/dist/src/evaluation/buildExperimentMatrix.js +75 -0
  138. package/dist/src/evaluation/classifyAgentRunOutcome.js +40 -0
  139. package/dist/src/evaluation/compareExperimentRuns.js +79 -0
  140. package/dist/src/evaluation/compareTokenSavings.js +47 -0
  141. package/dist/src/evaluation/controlledExperimentTypes.js +1 -0
  142. package/dist/src/evaluation/index.js +18 -0
  143. package/dist/src/evaluation/parseAgentAnswer.js +230 -0
  144. package/dist/src/evaluation/projectComplexity.js +126 -0
  145. package/dist/src/evaluation/projectFileTree.js +83 -0
  146. package/dist/src/evaluation/readEvaluationCases.js +59 -0
  147. package/dist/src/evaluation/renderTokenSavingsReportInput.js +55 -0
  148. package/dist/src/evaluation/runControlledExperiment.js +158 -0
  149. package/dist/src/evaluation/runMyDevKitRetrieval.js +197 -0
  150. package/dist/src/evaluation/runRawFullFileBaseline.js +31 -0
  151. package/dist/src/evaluation/scoreCorrectness.js +127 -0
  152. package/dist/src/evaluation/types.js +1 -0
  153. package/dist/src/evaluation/writeExperimentArtifacts.js +104 -0
  154. package/dist/src/evaluation/writeTokenSavingsArtifacts.js +57 -0
  155. package/dist/src/experiments/config.js +24 -0
  156. package/dist/src/experiments/defaultRegistry.js +7 -0
  157. package/dist/src/experiments/errors.js +18 -0
  158. package/dist/src/experiments/index.js +9 -0
  159. package/dist/src/experiments/outputPaths.js +25 -0
  160. package/dist/src/experiments/plugins/contextStrategyComparison/config.js +37 -0
  161. package/dist/src/experiments/plugins/contextStrategyComparison/index.js +3 -0
  162. package/dist/src/experiments/plugins/contextStrategyComparison/plugin.js +83 -0
  163. package/dist/src/experiments/plugins/contextStrategyComparison/resultMapping.js +260 -0
  164. package/dist/src/experiments/plugins/index.js +1 -0
  165. package/dist/src/experiments/registry.js +43 -0
  166. package/dist/src/experiments/results.js +48 -0
  167. package/dist/src/experiments/runner.js +181 -0
  168. package/dist/src/experiments/target.js +8 -0
  169. package/dist/src/experiments/types.js +1 -0
  170. package/dist/src/gallery/index.js +2 -0
  171. package/dist/src/gallery/types.js +1 -0
  172. package/dist/src/gallery/writeGalleryManifest.js +214 -0
  173. package/dist/src/index.js +12 -0
  174. package/dist/src/plots/buildExperimentPlotData.js +137 -0
  175. package/dist/src/plots/index.js +4 -0
  176. package/dist/src/plots/renderSvgChart.js +82 -0
  177. package/dist/src/plots/types.js +1 -0
  178. package/dist/src/plots/writePlotArtifacts.js +46 -0
  179. package/dist/src/prompts/buildPromptContext.js +68 -0
  180. package/dist/src/prompts/generateMyDevKitPrompt.js +106 -0
  181. package/dist/src/prompts/generatePromptVariants.js +36 -0
  182. package/dist/src/prompts/generateRawFullFilePrompt.js +97 -0
  183. package/dist/src/prompts/index.js +7 -0
  184. package/dist/src/prompts/measurePromptComplexity.js +41 -0
  185. package/dist/src/prompts/types.js +1 -0
  186. package/dist/src/prompts/writePromptArtifacts.js +43 -0
  187. package/dist/src/report/buildExperimentReportInput.js +339 -0
  188. package/dist/src/report/experimentReportTypes.js +1 -0
  189. package/dist/src/report/experiments/buildPluginExperimentReport.js +153 -0
  190. package/dist/src/report/experiments/experimentReportModel.js +1 -0
  191. package/dist/src/report/experiments/index.js +4 -0
  192. package/dist/src/report/experiments/renderPluginExperimentReportHtml.js +133 -0
  193. package/dist/src/report/experiments/writePluginExperimentReports.js +30 -0
  194. package/dist/src/report/index.js +8 -0
  195. package/dist/src/report/renderExperimentHtmlReport.js +354 -0
  196. package/dist/src/report/renderHtmlReport.js +103 -0
  197. package/dist/src/report/types.js +10 -0
  198. package/dist/src/report/writeExperimentReportArtifacts.js +38 -0
  199. package/dist/src/report/writeReportArtifacts.js +39 -0
  200. package/dist/src/screenshot/captureReportScreenshot.js +75 -0
  201. package/dist/src/screenshot/index.js +2 -0
  202. package/dist/src/screenshot/types.js +1 -0
  203. package/dist/src/securityValidation/artifacts.js +15 -0
  204. package/dist/src/securityValidation/cliAdversarial/adversarialCliConfig.js +38 -0
  205. package/dist/src/securityValidation/cliAdversarial/dataVolumeChecks.js +194 -0
  206. package/dist/src/securityValidation/cliAdversarial/jsonStdoutChecks.js +359 -0
  207. package/dist/src/securityValidation/cliAdversarial/malformedArtifactChecks.js +284 -0
  208. package/dist/src/securityValidation/cliAdversarial/malformedArtifactFixtures.js +79 -0
  209. package/dist/src/securityValidation/cliAdversarial/pathBoundaryChecks.js +431 -0
  210. package/dist/src/securityValidation/cliAdversarial/pathCases.js +144 -0
  211. package/dist/src/securityValidation/cliAdversarial/readOnlyBoundaryChecks.js +294 -0
  212. package/dist/src/securityValidation/cliAdversarial/runAdversarialCheck.js +149 -0
  213. package/dist/src/securityValidation/cliAdversarial/subprocessSafetyChecks.js +214 -0
  214. package/dist/src/securityValidation/cliAdversarial/tempWorkspace.js +160 -0
  215. package/dist/src/securityValidation/commandRunner.js +136 -0
  216. package/dist/src/securityValidation/config.js +39 -0
  217. package/dist/src/securityValidation/dependencies/parseNpmAudit.js +115 -0
  218. package/dist/src/securityValidation/dependencies/parseNpmLs.js +71 -0
  219. package/dist/src/securityValidation/dependencies/parseNpmOutdated.js +41 -0
  220. package/dist/src/securityValidation/dependencies/runDependencyChecks.js +239 -0
  221. package/dist/src/securityValidation/dependencies/runOsvScanner.js +43 -0
  222. package/dist/src/securityValidation/fuzz/fuzzHarness.js +61 -0
  223. package/dist/src/securityValidation/fuzz/fuzzTargets.js +204 -0
  224. package/dist/src/securityValidation/fuzz/randomInput.js +0 -0
  225. package/dist/src/securityValidation/index.js +34 -0
  226. package/dist/src/securityValidation/packageChecks/forbiddenPackageContents.js +67 -0
  227. package/dist/src/securityValidation/packageChecks/parseNpmPackDryRun.js +56 -0
  228. package/dist/src/securityValidation/packageChecks/runPackageChecks.js +88 -0
  229. package/dist/src/securityValidation/report/renderSecurityReport.js +248 -0
  230. package/dist/src/securityValidation/report/securityReportTypes.js +1 -0
  231. package/dist/src/securityValidation/staticScans/codeql.js +66 -0
  232. package/dist/src/securityValidation/staticScans/semgrep.js +180 -0
  233. package/dist/src/securityValidation/testMatrix.js +535 -0
  234. package/dist/src/securityValidation/types.js +34 -0
  235. package/dist/src/securityValidation/validate/resolveTarget.js +32 -0
  236. package/dist/src/securityValidation/validate/runSecurityValidation.js +169 -0
  237. package/dist/src/securityValidation/validate/verdict.js +73 -0
  238. package/dist/src/visualizationDemos/buildMyDevKitVisualizationCommands.js +59 -0
  239. package/dist/src/visualizationDemos/index.js +4 -0
  240. package/dist/src/visualizationDemos/runVisualizationDemos.js +82 -0
  241. package/dist/src/visualizationDemos/types.js +1 -0
  242. package/dist/src/visualizationDemos/writeVisualizationDemoArtifacts.js +25 -0
  243. package/docs/METRICS.md +286 -0
  244. package/examples/demo-report-input.json +78 -0
  245. package/examples/lab-demo-cases.json +35 -0
  246. package/examples/real-agent-campaign-cases.json +118 -0
  247. package/examples/token-savings-cases.json +122 -0
  248. package/package.json +91 -0
  249. package/tests/fixtures/fake-adversarial-cli.js +152 -0
  250. package/tests/fixtures/fake-my-dev-kit-cli.js +83 -0
@@ -0,0 +1,38 @@
1
+ import path from "node:path";
2
+ const FAKE_CLI_PATH = path.resolve(process.cwd(), "tests", "fixtures", "fake-adversarial-cli.js");
3
+ /**
4
+ * Returns the adversarial CLI target from environment or falls back to the
5
+ * deterministic fake fixture.
6
+ *
7
+ * To use a real my-dev-kit command in opt-in mode:
8
+ * MY_DEV_KIT_SECURITY_TARGET_COMMAND=my-dev-kit npm run test:security
9
+ *
10
+ * The real target will be treated as a full command string.
11
+ * If the command is not found or not executable, individual checks are skipped.
12
+ */
13
+ export function getAdversarialCliTarget() {
14
+ const realTarget = process.env.MY_DEV_KIT_SECURITY_TARGET_COMMAND;
15
+ if (realTarget) {
16
+ return {
17
+ nodeExec: process.execPath,
18
+ cliArgs: [realTarget],
19
+ isRealTarget: true,
20
+ timeoutMs: 30_000,
21
+ };
22
+ }
23
+ return {
24
+ nodeExec: process.execPath,
25
+ cliArgs: [FAKE_CLI_PATH],
26
+ isRealTarget: false,
27
+ timeoutMs: 10_000,
28
+ };
29
+ }
30
+ /**
31
+ * Returns the base command array (nodeExec + cliArgs) plus any additional args.
32
+ */
33
+ export function buildCliCommand(target, extraArgs) {
34
+ return {
35
+ command: target.nodeExec,
36
+ args: [...target.cliArgs, ...extraArgs],
37
+ };
38
+ }
@@ -0,0 +1,194 @@
1
+ import { mkdirSync, writeFileSync } from "node:fs";
2
+ import path from "node:path";
3
+ import { buildCliCommand } from "./adversarialCliConfig.js";
4
+ import { makeFinding, runAdversarialCheck } from "./runAdversarialCheck.js";
5
+ import { createTempWorkspace, diffSnapshots, snapshotDir } from "./tempWorkspace.js";
6
+ // ---------------------------------------------------------------------------
7
+ // Data-volume smoke checks
8
+ //
9
+ // Verifies that the CLI handles unusually large inputs without crashing
10
+ // or corrupting source files. Tests are bounded to stay CI-fast.
11
+ //
12
+ // Smoke-level targets:
13
+ // - Large source file: 5,000 lines (≈ 100KB)
14
+ // - Many files: 100 source files in the workspace
15
+ // - Deeply nested directory: 10 levels of subdirectories
16
+ // ---------------------------------------------------------------------------
17
+ const LARGE_FILE_LINES = 5_000;
18
+ const MANY_FILES_COUNT = 100;
19
+ const DEEP_NESTING_LEVELS = 10;
20
+ function generateLargeFileContent(lineCount) {
21
+ const lines = [];
22
+ for (let i = 0; i < lineCount; i++) {
23
+ lines.push(`export const value_${i} = ${i}; // line ${i} of large file`);
24
+ }
25
+ return lines.join("\n") + "\n";
26
+ }
27
+ function populateManyFiles(dir, count) {
28
+ mkdirSync(dir, { recursive: true });
29
+ for (let i = 0; i < count; i++) {
30
+ const filename = `module_${String(i).padStart(4, "0")}.ts`;
31
+ writeFileSync(path.join(dir, filename), `export const id = ${i};\nexport function fn_${i}() { return id; }\n`, "utf8");
32
+ }
33
+ }
34
+ function populateDeepNesting(baseDir, levels) {
35
+ let current = baseDir;
36
+ for (let i = 0; i < levels; i++) {
37
+ current = path.join(current, `level_${i}`);
38
+ mkdirSync(current, { recursive: true });
39
+ writeFileSync(path.join(current, `deep_${i}.ts`), `export const depth = ${i};\n`, "utf8");
40
+ }
41
+ }
42
+ // ---------------------------------------------------------------------------
43
+ // Exported checks
44
+ // ---------------------------------------------------------------------------
45
+ /**
46
+ * Checks that the CLI handles a workspace with a large source file (5,000 lines)
47
+ * without hanging, crashing, or modifying the source file.
48
+ */
49
+ export async function checkHugeSourceFile(target) {
50
+ const workspace = createTempWorkspace("p5-large-file-");
51
+ try {
52
+ // Place a large file in the source directory.
53
+ const largeFilePath = path.join(workspace.sourceDir, "large-module.ts");
54
+ writeFileSync(largeFilePath, generateLargeFileContent(LARGE_FILE_LINES), "utf8");
55
+ const beforeSource = snapshotDir(workspace.sourceDir);
56
+ const { command, args } = buildCliCommand(target, [
57
+ "--root",
58
+ workspace.sourceDir,
59
+ "--out",
60
+ workspace.outputDir,
61
+ ]);
62
+ return await runAdversarialCheck({
63
+ id: "huge-source-file",
64
+ name: `Large source file (${LARGE_FILE_LINES} lines) is handled safely`,
65
+ category: "cli-adversarial",
66
+ severity: "major",
67
+ command,
68
+ args,
69
+ cwd: workspace.root,
70
+ timeoutMs: target.timeoutMs,
71
+ evaluate: () => {
72
+ const afterSource = snapshotDir(workspace.sourceDir);
73
+ const diff = diffSnapshots(beforeSource, afterSource);
74
+ const problems = [...diff.modified, ...diff.removed];
75
+ if (problems.length > 0) {
76
+ return [
77
+ makeFinding({
78
+ id: "large-file-source-modified",
79
+ title: "Source files were modified while processing a large file",
80
+ severity: "blocker",
81
+ category: "cli-adversarial",
82
+ description: `Source files modified: ${problems.join(", ")}`,
83
+ affectedFiles: problems,
84
+ recommendation: "CLI must treat large source files as read-only, same as any other source file.",
85
+ }),
86
+ ];
87
+ }
88
+ return [];
89
+ },
90
+ });
91
+ }
92
+ finally {
93
+ await workspace.cleanup();
94
+ }
95
+ }
96
+ /**
97
+ * Checks that the CLI handles a workspace with many files (100 source files)
98
+ * without crashing or corrupting source files.
99
+ */
100
+ export async function checkManyFiles(target) {
101
+ const workspace = createTempWorkspace("p5-many-files-");
102
+ try {
103
+ // Add many files to the source directory.
104
+ populateManyFiles(workspace.sourceDir, MANY_FILES_COUNT);
105
+ const beforeSource = snapshotDir(workspace.sourceDir);
106
+ const { command, args } = buildCliCommand(target, [
107
+ "--root",
108
+ workspace.sourceDir,
109
+ "--out",
110
+ workspace.outputDir,
111
+ ]);
112
+ return await runAdversarialCheck({
113
+ id: "many-graph-nodes-edges",
114
+ name: `Many files (${MANY_FILES_COUNT}) in workspace are handled safely`,
115
+ category: "cli-adversarial",
116
+ severity: "major",
117
+ command,
118
+ args,
119
+ cwd: workspace.root,
120
+ timeoutMs: target.timeoutMs,
121
+ evaluate: () => {
122
+ const afterSource = snapshotDir(workspace.sourceDir);
123
+ const diff = diffSnapshots(beforeSource, afterSource);
124
+ const problems = [...diff.modified, ...diff.removed];
125
+ if (problems.length > 0) {
126
+ return [
127
+ makeFinding({
128
+ id: "many-files-source-modified",
129
+ title: "Source files were modified in a many-file workspace",
130
+ severity: "blocker",
131
+ category: "cli-adversarial",
132
+ description: `${problems.length} source files were modified or removed: ${problems.slice(0, 5).join(", ")}${problems.length > 5 ? " ..." : ""}`,
133
+ affectedFiles: problems,
134
+ recommendation: "CLI must treat all source files as read-only regardless of workspace size.",
135
+ }),
136
+ ];
137
+ }
138
+ return [];
139
+ },
140
+ });
141
+ }
142
+ finally {
143
+ await workspace.cleanup();
144
+ }
145
+ }
146
+ /**
147
+ * Checks that the CLI handles a deeply nested directory structure safely.
148
+ */
149
+ export async function checkDeeplyNestedSource(target) {
150
+ const workspace = createTempWorkspace("p5-deep-nest-");
151
+ try {
152
+ // Add deeply nested directories to the source directory.
153
+ populateDeepNesting(workspace.sourceDir, DEEP_NESTING_LEVELS);
154
+ const beforeSource = snapshotDir(workspace.sourceDir);
155
+ const { command, args } = buildCliCommand(target, [
156
+ "--root",
157
+ workspace.sourceDir,
158
+ "--out",
159
+ workspace.outputDir,
160
+ ]);
161
+ return await runAdversarialCheck({
162
+ id: "deeply-nested-tsx",
163
+ name: `Deeply nested source (${DEEP_NESTING_LEVELS} levels) is handled safely`,
164
+ category: "cli-adversarial",
165
+ severity: "minor",
166
+ command,
167
+ args,
168
+ cwd: workspace.root,
169
+ timeoutMs: target.timeoutMs,
170
+ evaluate: () => {
171
+ const afterSource = snapshotDir(workspace.sourceDir);
172
+ const diff = diffSnapshots(beforeSource, afterSource);
173
+ const problems = [...diff.modified, ...diff.removed];
174
+ if (problems.length > 0) {
175
+ return [
176
+ makeFinding({
177
+ id: "deep-nest-source-modified",
178
+ title: "Source files were modified in a deeply nested workspace",
179
+ severity: "blocker",
180
+ category: "cli-adversarial",
181
+ description: `Source files modified: ${problems.join(", ")}`,
182
+ affectedFiles: problems,
183
+ recommendation: "CLI must treat deeply nested source directories as read-only.",
184
+ }),
185
+ ];
186
+ }
187
+ return [];
188
+ },
189
+ });
190
+ }
191
+ finally {
192
+ await workspace.cleanup();
193
+ }
194
+ }
@@ -0,0 +1,359 @@
1
+ import { spawn } from "node:child_process";
2
+ import { buildCliCommand } from "./adversarialCliConfig.js";
3
+ import { makeFinding, skippedCheck } from "./runAdversarialCheck.js";
4
+ import { createTempWorkspace } from "./tempWorkspace.js";
5
+ function runAndCapture(command, args, cwd, timeoutMs) {
6
+ return new Promise((resolve) => {
7
+ let child;
8
+ try {
9
+ child = spawn(command, args, {
10
+ cwd,
11
+ shell: false,
12
+ stdio: ["ignore", "pipe", "pipe"],
13
+ env: { ...process.env },
14
+ });
15
+ }
16
+ catch {
17
+ resolve({ exitCode: 1, stdout: "", stderr: "" });
18
+ return;
19
+ }
20
+ const stdoutChunks = [];
21
+ const stderrChunks = [];
22
+ child.stdout.on("data", (chunk) => stdoutChunks.push(chunk));
23
+ child.stderr.on("data", (chunk) => stderrChunks.push(chunk));
24
+ const timer = setTimeout(() => {
25
+ try {
26
+ child.kill();
27
+ }
28
+ catch {
29
+ // ignore
30
+ }
31
+ resolve({
32
+ exitCode: 1,
33
+ stdout: Buffer.concat(stdoutChunks).toString("utf8"),
34
+ stderr: Buffer.concat(stderrChunks).toString("utf8"),
35
+ });
36
+ }, timeoutMs);
37
+ child.on("close", (code) => {
38
+ clearTimeout(timer);
39
+ resolve({
40
+ exitCode: code ?? 1,
41
+ stdout: Buffer.concat(stdoutChunks).toString("utf8"),
42
+ stderr: Buffer.concat(stderrChunks).toString("utf8"),
43
+ });
44
+ });
45
+ child.on("error", () => {
46
+ clearTimeout(timer);
47
+ resolve({
48
+ exitCode: 1,
49
+ stdout: Buffer.concat(stdoutChunks).toString("utf8"),
50
+ stderr: Buffer.concat(stderrChunks).toString("utf8"),
51
+ });
52
+ });
53
+ });
54
+ }
55
+ // ---------------------------------------------------------------------------
56
+ // Exported checks
57
+ // ---------------------------------------------------------------------------
58
+ /**
59
+ * Checks that `--format json` produces parseable JSON on stdout.
60
+ */
61
+ export async function checkJsonOutputIsParseable(target) {
62
+ const started = new Date();
63
+ const workspace = createTempWorkspace("p5-json-parse-");
64
+ try {
65
+ const { command, args } = buildCliCommand(target, [
66
+ "--root",
67
+ workspace.sourceDir,
68
+ "--out",
69
+ workspace.outputDir,
70
+ "--format",
71
+ "json",
72
+ ]);
73
+ const result = await runAndCapture(command, args, workspace.root, target.timeoutMs);
74
+ const finished = new Date();
75
+ const findings = [];
76
+ const stdout = result.stdout.trim();
77
+ if (stdout.length === 0) {
78
+ if (!target.isRealTarget) {
79
+ findings.push(makeFinding({
80
+ id: "json-stdout-empty",
81
+ title: "JSON mode produced no stdout output",
82
+ severity: "major",
83
+ category: "cli-adversarial",
84
+ description: "Expected JSON on stdout but got empty output.",
85
+ recommendation: "CLI should emit a JSON object to stdout when --format json is requested.",
86
+ }));
87
+ }
88
+ // For real targets: empty stdout may be intentional; skip finding
89
+ }
90
+ else {
91
+ try {
92
+ const parsed = JSON.parse(stdout);
93
+ if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
94
+ findings.push(makeFinding({
95
+ id: "json-stdout-not-object",
96
+ title: "JSON output is not an object",
97
+ severity: "minor",
98
+ category: "cli-adversarial",
99
+ description: `Parsed JSON is ${Array.isArray(parsed) ? "array" : typeof parsed}, not an object.`,
100
+ recommendation: "JSON output should be a top-level object.",
101
+ }));
102
+ }
103
+ }
104
+ catch (e) {
105
+ findings.push(makeFinding({
106
+ id: "json-stdout-not-parseable",
107
+ title: "stdout is not valid JSON in JSON mode",
108
+ severity: "major",
109
+ category: "cli-adversarial",
110
+ description: `JSON.parse failed: ${e.message}. stdout was: ${stdout.slice(0, 200)}`,
111
+ recommendation: "CLI must emit only valid JSON to stdout when --format json is requested.",
112
+ }));
113
+ }
114
+ }
115
+ return {
116
+ id: "json-mode-parseable-output",
117
+ name: "JSON mode produces parseable JSON on stdout",
118
+ category: "cli-adversarial",
119
+ severity: "major",
120
+ status: findings.some((f) => f.severity === "major" || f.severity === "blocker")
121
+ ? "failed"
122
+ : findings.length > 0
123
+ ? "warning"
124
+ : "passed",
125
+ findings,
126
+ startedAt: started.toISOString(),
127
+ finishedAt: finished.toISOString(),
128
+ durationMs: finished.getTime() - started.getTime(),
129
+ };
130
+ }
131
+ finally {
132
+ await workspace.cleanup();
133
+ }
134
+ }
135
+ /**
136
+ * Checks that warning messages go to stderr and do not corrupt stdout.
137
+ * Uses --emit-stderr to trigger a warning and verifies stdout is still valid JSON.
138
+ */
139
+ export async function checkStderrNotInStdout(target) {
140
+ const started = new Date();
141
+ const workspace = createTempWorkspace("p5-stderr-sep-");
142
+ try {
143
+ const warningText = "test-warning-signal-abc123";
144
+ const { command, args } = buildCliCommand(target, [
145
+ "--root",
146
+ workspace.sourceDir,
147
+ "--out",
148
+ workspace.outputDir,
149
+ "--format",
150
+ "json",
151
+ "--emit-stderr",
152
+ warningText,
153
+ ]);
154
+ const result = await runAndCapture(command, args, workspace.root, target.timeoutMs);
155
+ const finished = new Date();
156
+ const findings = [];
157
+ const stdout = result.stdout.trim();
158
+ if (stdout.includes(warningText)) {
159
+ findings.push(makeFinding({
160
+ id: "warning-in-stdout",
161
+ title: "Warning message appeared in stdout",
162
+ severity: "major",
163
+ category: "cli-adversarial",
164
+ description: `Warning text '${warningText}' was found in stdout. Warnings must go to stderr only.`,
165
+ recommendation: "All warning and progress messages must be written to stderr, not stdout.",
166
+ }));
167
+ }
168
+ if (stdout.length > 0) {
169
+ try {
170
+ JSON.parse(stdout);
171
+ }
172
+ catch (e) {
173
+ findings.push(makeFinding({
174
+ id: "stdout-not-json-with-stderr",
175
+ title: "stdout is not valid JSON when stderr warning is emitted",
176
+ severity: "major",
177
+ category: "cli-adversarial",
178
+ description: `JSON.parse failed: ${e.message}. This may indicate stderr contaminated stdout.`,
179
+ recommendation: "stderr output must not be mixed into stdout in JSON mode.",
180
+ }));
181
+ }
182
+ }
183
+ return {
184
+ id: "warnings-go-to-stderr",
185
+ name: "Warnings go to stderr and do not corrupt JSON stdout",
186
+ category: "cli-adversarial",
187
+ severity: "major",
188
+ status: findings.some((f) => f.severity === "major" || f.severity === "blocker")
189
+ ? "failed"
190
+ : findings.length > 0
191
+ ? "warning"
192
+ : "passed",
193
+ findings,
194
+ startedAt: started.toISOString(),
195
+ finishedAt: finished.toISOString(),
196
+ durationMs: finished.getTime() - started.getTime(),
197
+ };
198
+ }
199
+ finally {
200
+ await workspace.cleanup();
201
+ }
202
+ }
203
+ /**
204
+ * Checks that a CLI failure in JSON mode produces a valid JSON error object.
205
+ * Not a crash, not a raw stack trace, not empty.
206
+ */
207
+ export async function checkFailureProducesJsonError(target) {
208
+ const started = new Date();
209
+ const workspace = createTempWorkspace("p5-json-err-");
210
+ try {
211
+ const { command, args } = buildCliCommand(target, [
212
+ "--root",
213
+ workspace.sourceDir,
214
+ "--format",
215
+ "json",
216
+ "--fail",
217
+ ]);
218
+ const result = await runAndCapture(command, args, workspace.root, target.timeoutMs);
219
+ const finished = new Date();
220
+ const findings = [];
221
+ if (result.exitCode === 0) {
222
+ findings.push(makeFinding({
223
+ id: "failure-exit-code-zero",
224
+ title: "CLI exited 0 even though failure was requested",
225
+ severity: "minor",
226
+ category: "cli-adversarial",
227
+ description: "--fail was passed but the CLI exited with code 0.",
228
+ recommendation: "CLI must exit non-zero on errors.",
229
+ }));
230
+ }
231
+ const stdout = result.stdout.trim();
232
+ if (stdout.length > 0) {
233
+ try {
234
+ const parsed = JSON.parse(stdout);
235
+ if (typeof parsed !== "object" ||
236
+ parsed === null ||
237
+ Array.isArray(parsed)) {
238
+ findings.push(makeFinding({
239
+ id: "json-error-not-object",
240
+ title: "JSON error output is not an object",
241
+ severity: "minor",
242
+ category: "cli-adversarial",
243
+ description: "Expected a JSON object with an error field; got a non-object.",
244
+ recommendation: "CLI should emit a JSON object with an 'error' field on failure.",
245
+ }));
246
+ }
247
+ }
248
+ catch (e) {
249
+ findings.push(makeFinding({
250
+ id: "json-error-not-parseable",
251
+ title: "stdout is not valid JSON on failure in JSON mode",
252
+ severity: "major",
253
+ category: "cli-adversarial",
254
+ description: `JSON.parse failed: ${e.message}. stdout: ${stdout.slice(0, 200)}`,
255
+ recommendation: "CLI must emit valid JSON to stdout even when reporting an error. Stack traces must not appear on stdout.",
256
+ }));
257
+ }
258
+ }
259
+ return {
260
+ id: "json-error-object-on-failure",
261
+ name: "CLI failure in JSON mode produces a valid JSON error object",
262
+ category: "cli-adversarial",
263
+ severity: "minor",
264
+ status: findings.some((f) => f.severity === "major" || f.severity === "blocker")
265
+ ? "failed"
266
+ : findings.length > 0
267
+ ? "warning"
268
+ : "passed",
269
+ findings,
270
+ startedAt: started.toISOString(),
271
+ finishedAt: finished.toISOString(),
272
+ durationMs: finished.getTime() - started.getTime(),
273
+ };
274
+ }
275
+ finally {
276
+ await workspace.cleanup();
277
+ }
278
+ }
279
+ /**
280
+ * Verifies that progress/status output does not corrupt JSON stdout.
281
+ * Runs the CLI with --format json and verifies the output line can be parsed.
282
+ * This is a structural alias over checkJsonOutputIsParseable for naming alignment
283
+ * with the test matrix entry.
284
+ */
285
+ export async function checkProgressNotInJsonStdout(target) {
286
+ if (target.isRealTarget) {
287
+ // Real CLI may emit progress on stdout when no --format json is specified.
288
+ // The check depends on whether the real CLI supports --format json.
289
+ // Return a skipped result to avoid false positives for unknown real CLIs.
290
+ return skippedCheck({
291
+ id: "progress-not-in-json-stdout",
292
+ name: "Progress output does not corrupt JSON stdout",
293
+ category: "cli-adversarial",
294
+ reason: "Real CLI target: --format json behavior is target-specific. Run manually against the real CLI.",
295
+ });
296
+ }
297
+ const started = new Date();
298
+ const workspace = createTempWorkspace("p5-progress-json-");
299
+ try {
300
+ // Emit stderr (simulates progress), request JSON output.
301
+ const { command, args } = buildCliCommand(target, [
302
+ "--root",
303
+ workspace.sourceDir,
304
+ "--out",
305
+ workspace.outputDir,
306
+ "--format",
307
+ "json",
308
+ "--emit-stderr",
309
+ "indexing 3 files...",
310
+ ]);
311
+ const result = await runAndCapture(command, args, workspace.root, target.timeoutMs);
312
+ const finished = new Date();
313
+ const findings = [];
314
+ const stdout = result.stdout.trim();
315
+ if (stdout.includes("indexing")) {
316
+ findings.push(makeFinding({
317
+ id: "progress-in-json-stdout",
318
+ title: "Progress message appeared in JSON stdout",
319
+ severity: "major",
320
+ category: "cli-adversarial",
321
+ description: "Progress message 'indexing 3 files...' was found in stdout. Progress must go to stderr.",
322
+ recommendation: "All progress, status, and informational messages must be written to stderr.",
323
+ }));
324
+ }
325
+ if (stdout.length > 0) {
326
+ try {
327
+ JSON.parse(stdout);
328
+ }
329
+ catch (e) {
330
+ findings.push(makeFinding({
331
+ id: "progress-corrupted-json",
332
+ title: "stdout is not valid JSON when progress is emitted",
333
+ severity: "major",
334
+ category: "cli-adversarial",
335
+ description: `JSON.parse failed: ${e.message}`,
336
+ recommendation: "Progress output must not appear on stdout when JSON mode is active.",
337
+ }));
338
+ }
339
+ }
340
+ return {
341
+ id: "progress-not-in-json-stdout",
342
+ name: "Progress output does not corrupt JSON stdout",
343
+ category: "cli-adversarial",
344
+ severity: "major",
345
+ status: findings.some((f) => f.severity === "major" || f.severity === "blocker")
346
+ ? "failed"
347
+ : findings.length > 0
348
+ ? "warning"
349
+ : "passed",
350
+ findings,
351
+ startedAt: started.toISOString(),
352
+ finishedAt: finished.toISOString(),
353
+ durationMs: finished.getTime() - started.getTime(),
354
+ };
355
+ }
356
+ finally {
357
+ await workspace.cleanup();
358
+ }
359
+ }