@dailephd/my-dev-kit-lab 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +272 -0
  2. package/benchmarks/contracts/benchmark-project-profiles.json +1199 -0
  3. package/benchmarks/contracts/todo-behavior.md +70 -0
  4. package/benchmarks/contracts/todo-benchmark-case.json +227 -0
  5. package/benchmarks/projects/README.md +34 -0
  6. package/benchmarks/projects/task-analytics-large-mixed/README.md +1 -0
  7. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/__init__.py +3 -0
  8. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/fixtures.py +6 -0
  9. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/metrics.py +29 -0
  10. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/models.py +21 -0
  11. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/parser.py +16 -0
  12. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/pipeline.py +9 -0
  13. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/quality.py +8 -0
  14. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/reporting.py +11 -0
  15. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_metrics.py +19 -0
  16. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_parser.py +15 -0
  17. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_quality.py +19 -0
  18. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_reporting.py +15 -0
  19. package/benchmarks/projects/task-analytics-large-mixed/ts/package.json +12 -0
  20. package/benchmarks/projects/task-analytics-large-mixed/ts/src/index.ts +11 -0
  21. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/analyticsSnapshot.ts +20 -0
  22. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/project.ts +5 -0
  23. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/task.ts +10 -0
  24. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/buildProjectLeaderboard.ts +7 -0
  25. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/formatTaskHealthReport.ts +13 -0
  26. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/buildAnalyticsSnapshot.ts +39 -0
  27. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/completeTask.ts +10 -0
  28. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/createTask.ts +21 -0
  29. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/listTasksByProject.ts +6 -0
  30. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/projectStore.ts +20 -0
  31. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/taskStore.ts +44 -0
  32. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/projectValidation.ts +12 -0
  33. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/taskValidation.ts +18 -0
  34. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/buildAnalyticsSnapshot.test.ts +48 -0
  35. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/completeTask.test.ts +21 -0
  36. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/createTask.test.ts +31 -0
  37. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/listTasksByProject.test.ts +18 -0
  38. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/reporting.test.ts +19 -0
  39. package/benchmarks/projects/task-analytics-large-mixed/ts/tsconfig.json +12 -0
  40. package/benchmarks/projects/task-analytics-large-mixed/ts/vitest.config.ts +5 -0
  41. package/benchmarks/projects/task-workflow-medium-ts/README.md +1 -0
  42. package/benchmarks/projects/task-workflow-medium-ts/package.json +12 -0
  43. package/benchmarks/projects/task-workflow-medium-ts/src/index.ts +9 -0
  44. package/benchmarks/projects/task-workflow-medium-ts/src/models/project.ts +6 -0
  45. package/benchmarks/projects/task-workflow-medium-ts/src/models/task.ts +39 -0
  46. package/benchmarks/projects/task-workflow-medium-ts/src/services/completeTask.ts +15 -0
  47. package/benchmarks/projects/task-workflow-medium-ts/src/services/createTask.ts +26 -0
  48. package/benchmarks/projects/task-workflow-medium-ts/src/services/filterTasks.ts +17 -0
  49. package/benchmarks/projects/task-workflow-medium-ts/src/services/importTasks.ts +33 -0
  50. package/benchmarks/projects/task-workflow-medium-ts/src/services/summarizeTasks.ts +30 -0
  51. package/benchmarks/projects/task-workflow-medium-ts/src/store/taskStore.ts +76 -0
  52. package/benchmarks/projects/task-workflow-medium-ts/src/utils/deterministicId.ts +3 -0
  53. package/benchmarks/projects/task-workflow-medium-ts/src/validation/taskValidation.ts +45 -0
  54. package/benchmarks/projects/task-workflow-medium-ts/tests/completeTask.test.ts +16 -0
  55. package/benchmarks/projects/task-workflow-medium-ts/tests/createTask.test.ts +21 -0
  56. package/benchmarks/projects/task-workflow-medium-ts/tests/filterTasks.test.ts +18 -0
  57. package/benchmarks/projects/task-workflow-medium-ts/tests/importTasks.test.ts +22 -0
  58. package/benchmarks/projects/task-workflow-medium-ts/tests/summarizeTasks.test.ts +29 -0
  59. package/benchmarks/projects/task-workflow-medium-ts/tsconfig.json +12 -0
  60. package/benchmarks/projects/task-workflow-medium-ts/vitest.config.ts +5 -0
  61. package/benchmarks/projects/todo-js/README.md +3 -0
  62. package/benchmarks/projects/todo-js/package.json +11 -0
  63. package/benchmarks/projects/todo-js/src/index.js +2 -0
  64. package/benchmarks/projects/todo-js/src/taskService.js +37 -0
  65. package/benchmarks/projects/todo-js/src/taskStore.js +28 -0
  66. package/benchmarks/projects/todo-js/tests/taskService.test.js +45 -0
  67. package/benchmarks/projects/todo-js/vitest.config.js +5 -0
  68. package/benchmarks/projects/todo-mixed-ts-py/README.md +3 -0
  69. package/benchmarks/projects/todo-mixed-ts-py/package.json +13 -0
  70. package/benchmarks/projects/todo-mixed-ts-py/python/task_service.py +76 -0
  71. package/benchmarks/projects/todo-mixed-ts-py/src/taskCli.ts +38 -0
  72. package/benchmarks/projects/todo-mixed-ts-py/tests/mixedBoundary.test.ts +18 -0
  73. package/benchmarks/projects/todo-mixed-ts-py/tsconfig.json +12 -0
  74. package/benchmarks/projects/todo-mixed-ts-py/vitest.config.ts +5 -0
  75. package/benchmarks/projects/todo-python/README.md +3 -0
  76. package/benchmarks/projects/todo-python/src/__init__.py +4 -0
  77. package/benchmarks/projects/todo-python/src/task_service.py +32 -0
  78. package/benchmarks/projects/todo-python/src/task_store.py +28 -0
  79. package/benchmarks/projects/todo-python/tests/test_task_service.py +52 -0
  80. package/benchmarks/projects/todo-ts/README.md +3 -0
  81. package/benchmarks/projects/todo-ts/package.json +12 -0
  82. package/benchmarks/projects/todo-ts/src/index.ts +2 -0
  83. package/benchmarks/projects/todo-ts/src/taskService.ts +41 -0
  84. package/benchmarks/projects/todo-ts/src/taskStore.ts +34 -0
  85. package/benchmarks/projects/todo-ts/tests/taskService.test.ts +45 -0
  86. package/benchmarks/projects/todo-ts/tsconfig.json +12 -0
  87. package/benchmarks/projects/todo-ts/vitest.config.ts +5 -0
  88. package/dist/scripts/build-gallery.js +3 -0
  89. package/dist/scripts/capture-demo-report.js +3 -0
  90. package/dist/scripts/evaluate-token-savings.js +2 -0
  91. package/dist/scripts/experiments/describeExperiment.js +143 -0
  92. package/dist/scripts/experiments/listExperiments.js +44 -0
  93. package/dist/scripts/experiments/runExperiment.js +199 -0
  94. package/dist/scripts/generate-experiment-plots.js +3 -0
  95. package/dist/scripts/generate-prompt-variants.js +2 -0
  96. package/dist/scripts/render-experiment-report.js +2 -0
  97. package/dist/scripts/run-agent-prompt.js +2 -0
  98. package/dist/scripts/run-controlled-experiment.js +2 -0
  99. package/dist/scripts/run-final-demo.js +3 -0
  100. package/dist/scripts/run-lab-demo.js +5 -0
  101. package/dist/scripts/run-visualization-demos.js +3 -0
  102. package/dist/scripts/security/runCodeql.js +57 -0
  103. package/dist/scripts/security/runDependencyChecks.js +57 -0
  104. package/dist/scripts/security/runFuzzSmoke.js +29 -0
  105. package/dist/scripts/security/runPackageChecks.js +56 -0
  106. package/dist/scripts/security/runSemgrep.js +63 -0
  107. package/dist/scripts/security/validate.js +117 -0
  108. package/dist/scripts/verify-benchmarks.js +202 -0
  109. package/dist/src/agents/adapters/claudeAdapter.js +37 -0
  110. package/dist/src/agents/adapters/codexAdapter.js +110 -0
  111. package/dist/src/agents/adapters/fakeAgentAdapter.js +101 -0
  112. package/dist/src/agents/agentRegistry.js +21 -0
  113. package/dist/src/agents/index.js +7 -0
  114. package/dist/src/agents/parseAgentTokenUsage.js +137 -0
  115. package/dist/src/agents/runAgentPrompt.js +38 -0
  116. package/dist/src/agents/types.js +1 -0
  117. package/dist/src/commands/buildGalleryCommand.js +56 -0
  118. package/dist/src/commands/captureDemoReport.js +116 -0
  119. package/dist/src/commands/evaluateTokenSavings.js +175 -0
  120. package/dist/src/commands/generateExperimentPlotsCommand.js +38 -0
  121. package/dist/src/commands/generatePromptVariants.js +67 -0
  122. package/dist/src/commands/renderExperimentReportCommand.js +131 -0
  123. package/dist/src/commands/runAgentPromptCommand.js +132 -0
  124. package/dist/src/commands/runControlledExperimentCommand.js +174 -0
  125. package/dist/src/commands/runFinalDemoCommand.js +123 -0
  126. package/dist/src/commands/runLabDemo.js +62 -0
  127. package/dist/src/commands/runVisualizationDemosCommand.js +67 -0
  128. package/dist/src/core/commandLine.js +59 -0
  129. package/dist/src/core/countTokens.js +8 -0
  130. package/dist/src/core/fileGlobs.js +100 -0
  131. package/dist/src/core/localProjectTarget.js +75 -0
  132. package/dist/src/core/pathSafety.js +19 -0
  133. package/dist/src/core/pythonCommand.js +30 -0
  134. package/dist/src/core/resolveCommand.js +110 -0
  135. package/dist/src/core/runMeasuredCommand.js +143 -0
  136. package/dist/src/evaluation/benchmarkMetadata.js +207 -0
  137. package/dist/src/evaluation/buildExperimentMatrix.js +75 -0
  138. package/dist/src/evaluation/classifyAgentRunOutcome.js +40 -0
  139. package/dist/src/evaluation/compareExperimentRuns.js +79 -0
  140. package/dist/src/evaluation/compareTokenSavings.js +47 -0
  141. package/dist/src/evaluation/controlledExperimentTypes.js +1 -0
  142. package/dist/src/evaluation/index.js +18 -0
  143. package/dist/src/evaluation/parseAgentAnswer.js +230 -0
  144. package/dist/src/evaluation/projectComplexity.js +126 -0
  145. package/dist/src/evaluation/projectFileTree.js +83 -0
  146. package/dist/src/evaluation/readEvaluationCases.js +59 -0
  147. package/dist/src/evaluation/renderTokenSavingsReportInput.js +55 -0
  148. package/dist/src/evaluation/runControlledExperiment.js +158 -0
  149. package/dist/src/evaluation/runMyDevKitRetrieval.js +197 -0
  150. package/dist/src/evaluation/runRawFullFileBaseline.js +31 -0
  151. package/dist/src/evaluation/scoreCorrectness.js +127 -0
  152. package/dist/src/evaluation/types.js +1 -0
  153. package/dist/src/evaluation/writeExperimentArtifacts.js +104 -0
  154. package/dist/src/evaluation/writeTokenSavingsArtifacts.js +57 -0
  155. package/dist/src/experiments/config.js +24 -0
  156. package/dist/src/experiments/defaultRegistry.js +7 -0
  157. package/dist/src/experiments/errors.js +18 -0
  158. package/dist/src/experiments/index.js +9 -0
  159. package/dist/src/experiments/outputPaths.js +25 -0
  160. package/dist/src/experiments/plugins/contextStrategyComparison/config.js +37 -0
  161. package/dist/src/experiments/plugins/contextStrategyComparison/index.js +3 -0
  162. package/dist/src/experiments/plugins/contextStrategyComparison/plugin.js +83 -0
  163. package/dist/src/experiments/plugins/contextStrategyComparison/resultMapping.js +260 -0
  164. package/dist/src/experiments/plugins/index.js +1 -0
  165. package/dist/src/experiments/registry.js +43 -0
  166. package/dist/src/experiments/results.js +48 -0
  167. package/dist/src/experiments/runner.js +181 -0
  168. package/dist/src/experiments/target.js +8 -0
  169. package/dist/src/experiments/types.js +1 -0
  170. package/dist/src/gallery/index.js +2 -0
  171. package/dist/src/gallery/types.js +1 -0
  172. package/dist/src/gallery/writeGalleryManifest.js +214 -0
  173. package/dist/src/index.js +12 -0
  174. package/dist/src/plots/buildExperimentPlotData.js +137 -0
  175. package/dist/src/plots/index.js +4 -0
  176. package/dist/src/plots/renderSvgChart.js +82 -0
  177. package/dist/src/plots/types.js +1 -0
  178. package/dist/src/plots/writePlotArtifacts.js +46 -0
  179. package/dist/src/prompts/buildPromptContext.js +68 -0
  180. package/dist/src/prompts/generateMyDevKitPrompt.js +106 -0
  181. package/dist/src/prompts/generatePromptVariants.js +36 -0
  182. package/dist/src/prompts/generateRawFullFilePrompt.js +97 -0
  183. package/dist/src/prompts/index.js +7 -0
  184. package/dist/src/prompts/measurePromptComplexity.js +41 -0
  185. package/dist/src/prompts/types.js +1 -0
  186. package/dist/src/prompts/writePromptArtifacts.js +43 -0
  187. package/dist/src/report/buildExperimentReportInput.js +339 -0
  188. package/dist/src/report/experimentReportTypes.js +1 -0
  189. package/dist/src/report/experiments/buildPluginExperimentReport.js +153 -0
  190. package/dist/src/report/experiments/experimentReportModel.js +1 -0
  191. package/dist/src/report/experiments/index.js +4 -0
  192. package/dist/src/report/experiments/renderPluginExperimentReportHtml.js +133 -0
  193. package/dist/src/report/experiments/writePluginExperimentReports.js +30 -0
  194. package/dist/src/report/index.js +8 -0
  195. package/dist/src/report/renderExperimentHtmlReport.js +354 -0
  196. package/dist/src/report/renderHtmlReport.js +103 -0
  197. package/dist/src/report/types.js +10 -0
  198. package/dist/src/report/writeExperimentReportArtifacts.js +38 -0
  199. package/dist/src/report/writeReportArtifacts.js +39 -0
  200. package/dist/src/screenshot/captureReportScreenshot.js +75 -0
  201. package/dist/src/screenshot/index.js +2 -0
  202. package/dist/src/screenshot/types.js +1 -0
  203. package/dist/src/securityValidation/artifacts.js +15 -0
  204. package/dist/src/securityValidation/cliAdversarial/adversarialCliConfig.js +38 -0
  205. package/dist/src/securityValidation/cliAdversarial/dataVolumeChecks.js +194 -0
  206. package/dist/src/securityValidation/cliAdversarial/jsonStdoutChecks.js +359 -0
  207. package/dist/src/securityValidation/cliAdversarial/malformedArtifactChecks.js +284 -0
  208. package/dist/src/securityValidation/cliAdversarial/malformedArtifactFixtures.js +79 -0
  209. package/dist/src/securityValidation/cliAdversarial/pathBoundaryChecks.js +431 -0
  210. package/dist/src/securityValidation/cliAdversarial/pathCases.js +144 -0
  211. package/dist/src/securityValidation/cliAdversarial/readOnlyBoundaryChecks.js +294 -0
  212. package/dist/src/securityValidation/cliAdversarial/runAdversarialCheck.js +149 -0
  213. package/dist/src/securityValidation/cliAdversarial/subprocessSafetyChecks.js +214 -0
  214. package/dist/src/securityValidation/cliAdversarial/tempWorkspace.js +160 -0
  215. package/dist/src/securityValidation/commandRunner.js +136 -0
  216. package/dist/src/securityValidation/config.js +39 -0
  217. package/dist/src/securityValidation/dependencies/parseNpmAudit.js +115 -0
  218. package/dist/src/securityValidation/dependencies/parseNpmLs.js +71 -0
  219. package/dist/src/securityValidation/dependencies/parseNpmOutdated.js +41 -0
  220. package/dist/src/securityValidation/dependencies/runDependencyChecks.js +239 -0
  221. package/dist/src/securityValidation/dependencies/runOsvScanner.js +43 -0
  222. package/dist/src/securityValidation/fuzz/fuzzHarness.js +61 -0
  223. package/dist/src/securityValidation/fuzz/fuzzTargets.js +204 -0
  224. package/dist/src/securityValidation/fuzz/randomInput.js +0 -0
  225. package/dist/src/securityValidation/index.js +34 -0
  226. package/dist/src/securityValidation/packageChecks/forbiddenPackageContents.js +67 -0
  227. package/dist/src/securityValidation/packageChecks/parseNpmPackDryRun.js +56 -0
  228. package/dist/src/securityValidation/packageChecks/runPackageChecks.js +88 -0
  229. package/dist/src/securityValidation/report/renderSecurityReport.js +248 -0
  230. package/dist/src/securityValidation/report/securityReportTypes.js +1 -0
  231. package/dist/src/securityValidation/staticScans/codeql.js +66 -0
  232. package/dist/src/securityValidation/staticScans/semgrep.js +180 -0
  233. package/dist/src/securityValidation/testMatrix.js +535 -0
  234. package/dist/src/securityValidation/types.js +34 -0
  235. package/dist/src/securityValidation/validate/resolveTarget.js +32 -0
  236. package/dist/src/securityValidation/validate/runSecurityValidation.js +169 -0
  237. package/dist/src/securityValidation/validate/verdict.js +73 -0
  238. package/dist/src/visualizationDemos/buildMyDevKitVisualizationCommands.js +59 -0
  239. package/dist/src/visualizationDemos/index.js +4 -0
  240. package/dist/src/visualizationDemos/runVisualizationDemos.js +82 -0
  241. package/dist/src/visualizationDemos/types.js +1 -0
  242. package/dist/src/visualizationDemos/writeVisualizationDemoArtifacts.js +25 -0
  243. package/docs/METRICS.md +286 -0
  244. package/examples/demo-report-input.json +78 -0
  245. package/examples/lab-demo-cases.json +35 -0
  246. package/examples/real-agent-campaign-cases.json +118 -0
  247. package/examples/token-savings-cases.json +122 -0
  248. package/package.json +91 -0
  249. package/tests/fixtures/fake-adversarial-cli.js +152 -0
  250. package/tests/fixtures/fake-my-dev-kit-cli.js +83 -0
@@ -0,0 +1,284 @@
1
+ import path from "node:path";
2
+ import { buildCliCommand } from "./adversarialCliConfig.js";
3
+ import { MALFORMED_CODE_GRAPH_CASES, MALFORMED_MANIFEST_CASES, UNSUPPORTED_SCHEMA_VERSION_CASES, placeMalformedCodeGraph, placeMalformedManifest, placeUnsupportedSchemaManifest, } from "./malformedArtifactFixtures.js";
4
+ import { makeFinding, runAdversarialCheck } from "./runAdversarialCheck.js";
5
+ import { createTempWorkspace, diffSnapshots, snapshotDir } from "./tempWorkspace.js";
6
+ // ---------------------------------------------------------------------------
7
+ // Malformed artifact checks
8
+ //
9
+ // Each check pre-places a malformed artifact in the index directory, then
10
+ // runs the CLI against it. Expected behavior:
11
+ // - Fake CLI: always writes a valid manifest.json on top → passes (baseline)
12
+ // - Real CLI: either handles the malformed artifact safely or fails clearly
13
+ //
14
+ // In both cases, source files must remain unmodified.
15
+ // ---------------------------------------------------------------------------
16
+ /**
17
+ * Checks that the CLI handles a pre-placed malformed manifest.json safely.
18
+ * Runs one representative malformed case (truncated JSON) as the primary check.
19
+ */
20
+ export async function checkMalformedManifest(target) {
21
+ const workspace = createTempWorkspace("p5-mf-manifest-");
22
+ try {
23
+ // Pre-place the most adversarial malformed manifest.
24
+ const testCase = MALFORMED_MANIFEST_CASES.find((c) => c.id === "truncated-json");
25
+ placeMalformedManifest(workspace.indexDir, testCase.content);
26
+ const beforeSource = snapshotDir(workspace.sourceDir);
27
+ const { command, args } = buildCliCommand(target, [
28
+ "--root",
29
+ workspace.sourceDir,
30
+ "--index",
31
+ workspace.indexDir,
32
+ ]);
33
+ return await runAdversarialCheck({
34
+ id: "malformed-manifest-json",
35
+ name: "Malformed manifest.json is handled safely",
36
+ category: "artifact-safety",
37
+ severity: "major",
38
+ command,
39
+ args,
40
+ cwd: workspace.root,
41
+ timeoutMs: target.timeoutMs,
42
+ evaluate: () => {
43
+ const afterSource = snapshotDir(workspace.sourceDir);
44
+ const sourceDiff = diffSnapshots(beforeSource, afterSource);
45
+ const sourceProblems = [...sourceDiff.modified, ...sourceDiff.removed];
46
+ if (sourceProblems.length > 0) {
47
+ return [
48
+ makeFinding({
49
+ id: "malformed-manifest-source-modified",
50
+ title: "Source files were modified while handling malformed manifest",
51
+ severity: "blocker",
52
+ category: "artifact-safety",
53
+ description: `CLI modified source files while handling a malformed manifest: ${sourceProblems.join(", ")}`,
54
+ affectedFiles: sourceProblems,
55
+ recommendation: "CLI must not write to the source directory even when encountering malformed artifacts.",
56
+ }),
57
+ ];
58
+ }
59
+ return [];
60
+ },
61
+ });
62
+ }
63
+ finally {
64
+ await workspace.cleanup();
65
+ }
66
+ }
67
+ /**
68
+ * Checks all malformed manifest cases in sequence.
69
+ * Returns a single check result; any source modification is a finding.
70
+ */
71
+ export async function checkAllMalformedManifestCases(target) {
72
+ const workspace = createTempWorkspace("p5-mf-all-");
73
+ try {
74
+ const findings = [];
75
+ for (const testCase of MALFORMED_MANIFEST_CASES) {
76
+ placeMalformedManifest(workspace.indexDir, testCase.content);
77
+ const beforeSource = snapshotDir(workspace.sourceDir);
78
+ const { command, args } = buildCliCommand(target, [
79
+ "--root",
80
+ workspace.sourceDir,
81
+ "--index",
82
+ workspace.indexDir,
83
+ ]);
84
+ const result = await runAdversarialCheck({
85
+ id: `malformed-manifest-${testCase.id}`,
86
+ name: `Malformed manifest case: ${testCase.description}`,
87
+ category: "artifact-safety",
88
+ severity: "major",
89
+ command,
90
+ args,
91
+ cwd: workspace.root,
92
+ timeoutMs: target.timeoutMs,
93
+ evaluate: () => {
94
+ const afterSource = snapshotDir(workspace.sourceDir);
95
+ const sourceDiff = diffSnapshots(beforeSource, afterSource);
96
+ const sourceProblems = [...sourceDiff.modified, ...sourceDiff.removed];
97
+ if (sourceProblems.length > 0) {
98
+ return [
99
+ makeFinding({
100
+ id: `malformed-manifest-${testCase.id}-source-modified`,
101
+ title: `Source modified for malformed manifest case: ${testCase.id}`,
102
+ severity: "blocker",
103
+ category: "artifact-safety",
104
+ description: `Case '${testCase.id}': source files modified: ${sourceProblems.join(", ")}`,
105
+ affectedFiles: sourceProblems,
106
+ recommendation: "Source directory must remain read-only even with malformed artifacts.",
107
+ }),
108
+ ];
109
+ }
110
+ return [];
111
+ },
112
+ });
113
+ findings.push(...result.findings);
114
+ }
115
+ // Return a summary check result.
116
+ return {
117
+ id: "malformed-manifest-all-cases",
118
+ name: "All malformed manifest cases handled safely",
119
+ category: "artifact-safety",
120
+ severity: "major",
121
+ status: findings.some((f) => f.severity === "blocker" || f.severity === "major")
122
+ ? "failed"
123
+ : findings.length > 0
124
+ ? "warning"
125
+ : "passed",
126
+ findings,
127
+ startedAt: new Date().toISOString(),
128
+ finishedAt: new Date().toISOString(),
129
+ durationMs: 0,
130
+ };
131
+ }
132
+ finally {
133
+ await workspace.cleanup();
134
+ }
135
+ }
136
+ /**
137
+ * Checks that the CLI handles a pre-placed malformed code-graph.json safely.
138
+ */
139
+ export async function checkMalformedCodeGraph(target) {
140
+ const workspace = createTempWorkspace("p5-mf-graph-");
141
+ try {
142
+ const testCase = MALFORMED_CODE_GRAPH_CASES[0];
143
+ placeMalformedCodeGraph(workspace.indexDir, testCase.content);
144
+ const beforeSource = snapshotDir(workspace.sourceDir);
145
+ const { command, args } = buildCliCommand(target, [
146
+ "--root",
147
+ workspace.sourceDir,
148
+ "--index",
149
+ workspace.indexDir,
150
+ ]);
151
+ return await runAdversarialCheck({
152
+ id: "malformed-code-graph-json",
153
+ name: "Malformed code-graph.json is handled safely",
154
+ category: "artifact-safety",
155
+ severity: "major",
156
+ command,
157
+ args,
158
+ cwd: workspace.root,
159
+ timeoutMs: target.timeoutMs,
160
+ evaluate: () => {
161
+ const afterSource = snapshotDir(workspace.sourceDir);
162
+ const sourceDiff = diffSnapshots(beforeSource, afterSource);
163
+ const sourceProblems = [...sourceDiff.modified, ...sourceDiff.removed];
164
+ if (sourceProblems.length > 0) {
165
+ return [
166
+ makeFinding({
167
+ id: "malformed-graph-source-modified",
168
+ title: "Source files modified while handling malformed code graph",
169
+ severity: "blocker",
170
+ category: "artifact-safety",
171
+ description: `Source files affected: ${sourceProblems.join(", ")}`,
172
+ affectedFiles: sourceProblems,
173
+ recommendation: "Source directory must remain read-only even with malformed graph artifacts.",
174
+ }),
175
+ ];
176
+ }
177
+ return [];
178
+ },
179
+ });
180
+ }
181
+ finally {
182
+ await workspace.cleanup();
183
+ }
184
+ }
185
+ /**
186
+ * Checks that the CLI handles an artifact with an unsupported schema version safely.
187
+ */
188
+ export async function checkUnsupportedSchemaVersion(target) {
189
+ const workspace = createTempWorkspace("p5-schema-ver-");
190
+ try {
191
+ const testCase = UNSUPPORTED_SCHEMA_VERSION_CASES.find((c) => c.id === "future-version");
192
+ placeUnsupportedSchemaManifest(workspace.indexDir, testCase.content);
193
+ const beforeSource = snapshotDir(workspace.sourceDir);
194
+ const { command, args } = buildCliCommand(target, [
195
+ "--root",
196
+ workspace.sourceDir,
197
+ "--index",
198
+ workspace.indexDir,
199
+ ]);
200
+ return await runAdversarialCheck({
201
+ id: "unsupported-schema-version",
202
+ name: "Unsupported schema version produces a clear error",
203
+ category: "artifact-safety",
204
+ severity: "major",
205
+ command,
206
+ args,
207
+ cwd: workspace.root,
208
+ timeoutMs: target.timeoutMs,
209
+ evaluate: () => {
210
+ const afterSource = snapshotDir(workspace.sourceDir);
211
+ const sourceDiff = diffSnapshots(beforeSource, afterSource);
212
+ const sourceProblems = [...sourceDiff.modified, ...sourceDiff.removed];
213
+ if (sourceProblems.length > 0) {
214
+ return [
215
+ makeFinding({
216
+ id: "schema-version-source-modified",
217
+ title: "Source files modified while handling unsupported schema version",
218
+ severity: "blocker",
219
+ category: "artifact-safety",
220
+ description: `Source files affected: ${sourceProblems.join(", ")}`,
221
+ affectedFiles: sourceProblems,
222
+ recommendation: "Source directory must remain read-only even when the schema version is unsupported.",
223
+ }),
224
+ ];
225
+ }
226
+ return [];
227
+ },
228
+ });
229
+ }
230
+ finally {
231
+ await workspace.cleanup();
232
+ }
233
+ }
234
+ /**
235
+ * Checks that the CLI handles a missing index directory gracefully.
236
+ * The CLI should either create the directory or fail with a clear error.
237
+ */
238
+ export async function checkMissingIndexDirectory(target) {
239
+ const workspace = createTempWorkspace("p5-missing-idx-");
240
+ try {
241
+ // Use a non-existent subdirectory inside the workspace root.
242
+ const missingIndexDir = path.join(workspace.root, "nonexistent-index-dir");
243
+ const beforeSource = snapshotDir(workspace.sourceDir);
244
+ const { command, args } = buildCliCommand(target, [
245
+ "--root",
246
+ workspace.sourceDir,
247
+ "--index",
248
+ missingIndexDir,
249
+ ]);
250
+ return await runAdversarialCheck({
251
+ id: "missing-index-directory",
252
+ name: "Missing index directory is handled gracefully",
253
+ category: "artifact-safety",
254
+ severity: "minor",
255
+ command,
256
+ args,
257
+ cwd: workspace.root,
258
+ timeoutMs: target.timeoutMs,
259
+ evaluate: () => {
260
+ // Source files must not be modified regardless of whether the CLI succeeded.
261
+ const afterSource = snapshotDir(workspace.sourceDir);
262
+ const sourceDiff = diffSnapshots(beforeSource, afterSource);
263
+ const sourceProblems = [...sourceDiff.modified, ...sourceDiff.removed];
264
+ if (sourceProblems.length > 0) {
265
+ return [
266
+ makeFinding({
267
+ id: "missing-index-source-modified",
268
+ title: "Source files modified when index directory was missing",
269
+ severity: "blocker",
270
+ category: "artifact-safety",
271
+ description: `Source files affected: ${sourceProblems.join(", ")}`,
272
+ affectedFiles: sourceProblems,
273
+ recommendation: "Source directory must remain read-only regardless of index directory state.",
274
+ }),
275
+ ];
276
+ }
277
+ return [];
278
+ },
279
+ });
280
+ }
281
+ finally {
282
+ await workspace.cleanup();
283
+ }
284
+ }
@@ -0,0 +1,79 @@
1
+ import { mkdirSync, writeFileSync } from "node:fs";
2
+ import path from "node:path";
3
+ export const MALFORMED_MANIFEST_CASES = [
4
+ {
5
+ id: "truncated-json",
6
+ description: "Truncated JSON (no closing brace)",
7
+ content: "{",
8
+ },
9
+ {
10
+ id: "null-value",
11
+ description: "JSON null at root",
12
+ content: "null",
13
+ },
14
+ {
15
+ id: "wrong-type-array",
16
+ description: "Array instead of object",
17
+ content: "[]",
18
+ },
19
+ {
20
+ id: "missing-version",
21
+ description: "Object missing schemaVersion field",
22
+ content: JSON.stringify({ root: ".", files: [] }),
23
+ },
24
+ {
25
+ id: "not-json",
26
+ description: "Not valid JSON",
27
+ content: "this is not json at all",
28
+ },
29
+ ];
30
+ export const MALFORMED_CODE_GRAPH_CASES = [
31
+ {
32
+ id: "null-nodes",
33
+ description: "nodes array is null",
34
+ content: JSON.stringify({ schemaVersion: 1, nodes: null, edges: [] }),
35
+ },
36
+ {
37
+ id: "empty-object",
38
+ description: "Empty object",
39
+ content: "{}",
40
+ },
41
+ {
42
+ id: "truncated-json",
43
+ description: "Truncated JSON array",
44
+ content: '{"nodes": [{"id"',
45
+ },
46
+ ];
47
+ export const UNSUPPORTED_SCHEMA_VERSION_CASES = [
48
+ {
49
+ id: "future-version",
50
+ description: "Schema version far in the future",
51
+ content: JSON.stringify({ schemaVersion: 9999, root: ".", files: [] }),
52
+ },
53
+ {
54
+ id: "string-version",
55
+ description: "Schema version as a string instead of number",
56
+ content: JSON.stringify({ schemaVersion: "future", root: ".", files: [] }),
57
+ },
58
+ {
59
+ id: "negative-version",
60
+ description: "Negative schema version",
61
+ content: JSON.stringify({ schemaVersion: -1, root: ".", files: [] }),
62
+ },
63
+ ];
64
+ // ---------------------------------------------------------------------------
65
+ // Fixture placement helpers
66
+ // ---------------------------------------------------------------------------
67
+ export function placeMalformedArtifact(dir, filename, content) {
68
+ mkdirSync(dir, { recursive: true });
69
+ writeFileSync(path.join(dir, filename), content, "utf8");
70
+ }
71
+ export function placeMalformedManifest(dir, content) {
72
+ placeMalformedArtifact(dir, "manifest.json", content);
73
+ }
74
+ export function placeMalformedCodeGraph(dir, content) {
75
+ placeMalformedArtifact(dir, "code-graph.json", content);
76
+ }
77
+ export function placeUnsupportedSchemaManifest(dir, content) {
78
+ placeMalformedArtifact(dir, "manifest.json", content);
79
+ }