@dailephd/my-dev-kit-lab 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. package/README.md +272 -0
  2. package/benchmarks/contracts/benchmark-project-profiles.json +1199 -0
  3. package/benchmarks/contracts/todo-behavior.md +70 -0
  4. package/benchmarks/contracts/todo-benchmark-case.json +227 -0
  5. package/benchmarks/projects/README.md +34 -0
  6. package/benchmarks/projects/task-analytics-large-mixed/README.md +1 -0
  7. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/__init__.py +3 -0
  8. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/fixtures.py +6 -0
  9. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/metrics.py +29 -0
  10. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/models.py +21 -0
  11. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/parser.py +16 -0
  12. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/pipeline.py +9 -0
  13. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/quality.py +8 -0
  14. package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/reporting.py +11 -0
  15. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_metrics.py +19 -0
  16. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_parser.py +15 -0
  17. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_quality.py +19 -0
  18. package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_reporting.py +15 -0
  19. package/benchmarks/projects/task-analytics-large-mixed/ts/package.json +12 -0
  20. package/benchmarks/projects/task-analytics-large-mixed/ts/src/index.ts +11 -0
  21. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/analyticsSnapshot.ts +20 -0
  22. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/project.ts +5 -0
  23. package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/task.ts +10 -0
  24. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/buildProjectLeaderboard.ts +7 -0
  25. package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/formatTaskHealthReport.ts +13 -0
  26. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/buildAnalyticsSnapshot.ts +39 -0
  27. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/completeTask.ts +10 -0
  28. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/createTask.ts +21 -0
  29. package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/listTasksByProject.ts +6 -0
  30. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/projectStore.ts +20 -0
  31. package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/taskStore.ts +44 -0
  32. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/projectValidation.ts +12 -0
  33. package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/taskValidation.ts +18 -0
  34. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/buildAnalyticsSnapshot.test.ts +48 -0
  35. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/completeTask.test.ts +21 -0
  36. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/createTask.test.ts +31 -0
  37. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/listTasksByProject.test.ts +18 -0
  38. package/benchmarks/projects/task-analytics-large-mixed/ts/tests/reporting.test.ts +19 -0
  39. package/benchmarks/projects/task-analytics-large-mixed/ts/tsconfig.json +12 -0
  40. package/benchmarks/projects/task-analytics-large-mixed/ts/vitest.config.ts +5 -0
  41. package/benchmarks/projects/task-workflow-medium-ts/README.md +1 -0
  42. package/benchmarks/projects/task-workflow-medium-ts/package.json +12 -0
  43. package/benchmarks/projects/task-workflow-medium-ts/src/index.ts +9 -0
  44. package/benchmarks/projects/task-workflow-medium-ts/src/models/project.ts +6 -0
  45. package/benchmarks/projects/task-workflow-medium-ts/src/models/task.ts +39 -0
  46. package/benchmarks/projects/task-workflow-medium-ts/src/services/completeTask.ts +15 -0
  47. package/benchmarks/projects/task-workflow-medium-ts/src/services/createTask.ts +26 -0
  48. package/benchmarks/projects/task-workflow-medium-ts/src/services/filterTasks.ts +17 -0
  49. package/benchmarks/projects/task-workflow-medium-ts/src/services/importTasks.ts +33 -0
  50. package/benchmarks/projects/task-workflow-medium-ts/src/services/summarizeTasks.ts +30 -0
  51. package/benchmarks/projects/task-workflow-medium-ts/src/store/taskStore.ts +76 -0
  52. package/benchmarks/projects/task-workflow-medium-ts/src/utils/deterministicId.ts +3 -0
  53. package/benchmarks/projects/task-workflow-medium-ts/src/validation/taskValidation.ts +45 -0
  54. package/benchmarks/projects/task-workflow-medium-ts/tests/completeTask.test.ts +16 -0
  55. package/benchmarks/projects/task-workflow-medium-ts/tests/createTask.test.ts +21 -0
  56. package/benchmarks/projects/task-workflow-medium-ts/tests/filterTasks.test.ts +18 -0
  57. package/benchmarks/projects/task-workflow-medium-ts/tests/importTasks.test.ts +22 -0
  58. package/benchmarks/projects/task-workflow-medium-ts/tests/summarizeTasks.test.ts +29 -0
  59. package/benchmarks/projects/task-workflow-medium-ts/tsconfig.json +12 -0
  60. package/benchmarks/projects/task-workflow-medium-ts/vitest.config.ts +5 -0
  61. package/benchmarks/projects/todo-js/README.md +3 -0
  62. package/benchmarks/projects/todo-js/package.json +11 -0
  63. package/benchmarks/projects/todo-js/src/index.js +2 -0
  64. package/benchmarks/projects/todo-js/src/taskService.js +37 -0
  65. package/benchmarks/projects/todo-js/src/taskStore.js +28 -0
  66. package/benchmarks/projects/todo-js/tests/taskService.test.js +45 -0
  67. package/benchmarks/projects/todo-js/vitest.config.js +5 -0
  68. package/benchmarks/projects/todo-mixed-ts-py/README.md +3 -0
  69. package/benchmarks/projects/todo-mixed-ts-py/package.json +13 -0
  70. package/benchmarks/projects/todo-mixed-ts-py/python/task_service.py +76 -0
  71. package/benchmarks/projects/todo-mixed-ts-py/src/taskCli.ts +38 -0
  72. package/benchmarks/projects/todo-mixed-ts-py/tests/mixedBoundary.test.ts +18 -0
  73. package/benchmarks/projects/todo-mixed-ts-py/tsconfig.json +12 -0
  74. package/benchmarks/projects/todo-mixed-ts-py/vitest.config.ts +5 -0
  75. package/benchmarks/projects/todo-python/README.md +3 -0
  76. package/benchmarks/projects/todo-python/src/__init__.py +4 -0
  77. package/benchmarks/projects/todo-python/src/task_service.py +32 -0
  78. package/benchmarks/projects/todo-python/src/task_store.py +28 -0
  79. package/benchmarks/projects/todo-python/tests/test_task_service.py +52 -0
  80. package/benchmarks/projects/todo-ts/README.md +3 -0
  81. package/benchmarks/projects/todo-ts/package.json +12 -0
  82. package/benchmarks/projects/todo-ts/src/index.ts +2 -0
  83. package/benchmarks/projects/todo-ts/src/taskService.ts +41 -0
  84. package/benchmarks/projects/todo-ts/src/taskStore.ts +34 -0
  85. package/benchmarks/projects/todo-ts/tests/taskService.test.ts +45 -0
  86. package/benchmarks/projects/todo-ts/tsconfig.json +12 -0
  87. package/benchmarks/projects/todo-ts/vitest.config.ts +5 -0
  88. package/dist/scripts/build-gallery.js +3 -0
  89. package/dist/scripts/capture-demo-report.js +3 -0
  90. package/dist/scripts/evaluate-token-savings.js +2 -0
  91. package/dist/scripts/experiments/describeExperiment.js +143 -0
  92. package/dist/scripts/experiments/listExperiments.js +44 -0
  93. package/dist/scripts/experiments/runExperiment.js +199 -0
  94. package/dist/scripts/generate-experiment-plots.js +3 -0
  95. package/dist/scripts/generate-prompt-variants.js +2 -0
  96. package/dist/scripts/render-experiment-report.js +2 -0
  97. package/dist/scripts/run-agent-prompt.js +2 -0
  98. package/dist/scripts/run-controlled-experiment.js +2 -0
  99. package/dist/scripts/run-final-demo.js +3 -0
  100. package/dist/scripts/run-lab-demo.js +5 -0
  101. package/dist/scripts/run-visualization-demos.js +3 -0
  102. package/dist/scripts/security/runCodeql.js +57 -0
  103. package/dist/scripts/security/runDependencyChecks.js +57 -0
  104. package/dist/scripts/security/runFuzzSmoke.js +29 -0
  105. package/dist/scripts/security/runPackageChecks.js +56 -0
  106. package/dist/scripts/security/runSemgrep.js +63 -0
  107. package/dist/scripts/security/validate.js +117 -0
  108. package/dist/scripts/verify-benchmarks.js +202 -0
  109. package/dist/src/agents/adapters/claudeAdapter.js +37 -0
  110. package/dist/src/agents/adapters/codexAdapter.js +110 -0
  111. package/dist/src/agents/adapters/fakeAgentAdapter.js +101 -0
  112. package/dist/src/agents/agentRegistry.js +21 -0
  113. package/dist/src/agents/index.js +7 -0
  114. package/dist/src/agents/parseAgentTokenUsage.js +137 -0
  115. package/dist/src/agents/runAgentPrompt.js +38 -0
  116. package/dist/src/agents/types.js +1 -0
  117. package/dist/src/commands/buildGalleryCommand.js +56 -0
  118. package/dist/src/commands/captureDemoReport.js +116 -0
  119. package/dist/src/commands/evaluateTokenSavings.js +175 -0
  120. package/dist/src/commands/generateExperimentPlotsCommand.js +38 -0
  121. package/dist/src/commands/generatePromptVariants.js +67 -0
  122. package/dist/src/commands/renderExperimentReportCommand.js +131 -0
  123. package/dist/src/commands/runAgentPromptCommand.js +132 -0
  124. package/dist/src/commands/runControlledExperimentCommand.js +174 -0
  125. package/dist/src/commands/runFinalDemoCommand.js +123 -0
  126. package/dist/src/commands/runLabDemo.js +62 -0
  127. package/dist/src/commands/runVisualizationDemosCommand.js +67 -0
  128. package/dist/src/core/commandLine.js +59 -0
  129. package/dist/src/core/countTokens.js +8 -0
  130. package/dist/src/core/fileGlobs.js +100 -0
  131. package/dist/src/core/localProjectTarget.js +75 -0
  132. package/dist/src/core/pathSafety.js +19 -0
  133. package/dist/src/core/pythonCommand.js +30 -0
  134. package/dist/src/core/resolveCommand.js +110 -0
  135. package/dist/src/core/runMeasuredCommand.js +143 -0
  136. package/dist/src/evaluation/benchmarkMetadata.js +207 -0
  137. package/dist/src/evaluation/buildExperimentMatrix.js +75 -0
  138. package/dist/src/evaluation/classifyAgentRunOutcome.js +40 -0
  139. package/dist/src/evaluation/compareExperimentRuns.js +79 -0
  140. package/dist/src/evaluation/compareTokenSavings.js +47 -0
  141. package/dist/src/evaluation/controlledExperimentTypes.js +1 -0
  142. package/dist/src/evaluation/index.js +18 -0
  143. package/dist/src/evaluation/parseAgentAnswer.js +230 -0
  144. package/dist/src/evaluation/projectComplexity.js +126 -0
  145. package/dist/src/evaluation/projectFileTree.js +83 -0
  146. package/dist/src/evaluation/readEvaluationCases.js +59 -0
  147. package/dist/src/evaluation/renderTokenSavingsReportInput.js +55 -0
  148. package/dist/src/evaluation/runControlledExperiment.js +158 -0
  149. package/dist/src/evaluation/runMyDevKitRetrieval.js +197 -0
  150. package/dist/src/evaluation/runRawFullFileBaseline.js +31 -0
  151. package/dist/src/evaluation/scoreCorrectness.js +127 -0
  152. package/dist/src/evaluation/types.js +1 -0
  153. package/dist/src/evaluation/writeExperimentArtifacts.js +104 -0
  154. package/dist/src/evaluation/writeTokenSavingsArtifacts.js +57 -0
  155. package/dist/src/experiments/config.js +24 -0
  156. package/dist/src/experiments/defaultRegistry.js +7 -0
  157. package/dist/src/experiments/errors.js +18 -0
  158. package/dist/src/experiments/index.js +9 -0
  159. package/dist/src/experiments/outputPaths.js +25 -0
  160. package/dist/src/experiments/plugins/contextStrategyComparison/config.js +37 -0
  161. package/dist/src/experiments/plugins/contextStrategyComparison/index.js +3 -0
  162. package/dist/src/experiments/plugins/contextStrategyComparison/plugin.js +83 -0
  163. package/dist/src/experiments/plugins/contextStrategyComparison/resultMapping.js +260 -0
  164. package/dist/src/experiments/plugins/index.js +1 -0
  165. package/dist/src/experiments/registry.js +43 -0
  166. package/dist/src/experiments/results.js +48 -0
  167. package/dist/src/experiments/runner.js +181 -0
  168. package/dist/src/experiments/target.js +8 -0
  169. package/dist/src/experiments/types.js +1 -0
  170. package/dist/src/gallery/index.js +2 -0
  171. package/dist/src/gallery/types.js +1 -0
  172. package/dist/src/gallery/writeGalleryManifest.js +214 -0
  173. package/dist/src/index.js +12 -0
  174. package/dist/src/plots/buildExperimentPlotData.js +137 -0
  175. package/dist/src/plots/index.js +4 -0
  176. package/dist/src/plots/renderSvgChart.js +82 -0
  177. package/dist/src/plots/types.js +1 -0
  178. package/dist/src/plots/writePlotArtifacts.js +46 -0
  179. package/dist/src/prompts/buildPromptContext.js +68 -0
  180. package/dist/src/prompts/generateMyDevKitPrompt.js +106 -0
  181. package/dist/src/prompts/generatePromptVariants.js +36 -0
  182. package/dist/src/prompts/generateRawFullFilePrompt.js +97 -0
  183. package/dist/src/prompts/index.js +7 -0
  184. package/dist/src/prompts/measurePromptComplexity.js +41 -0
  185. package/dist/src/prompts/types.js +1 -0
  186. package/dist/src/prompts/writePromptArtifacts.js +43 -0
  187. package/dist/src/report/buildExperimentReportInput.js +339 -0
  188. package/dist/src/report/experimentReportTypes.js +1 -0
  189. package/dist/src/report/experiments/buildPluginExperimentReport.js +153 -0
  190. package/dist/src/report/experiments/experimentReportModel.js +1 -0
  191. package/dist/src/report/experiments/index.js +4 -0
  192. package/dist/src/report/experiments/renderPluginExperimentReportHtml.js +133 -0
  193. package/dist/src/report/experiments/writePluginExperimentReports.js +30 -0
  194. package/dist/src/report/index.js +8 -0
  195. package/dist/src/report/renderExperimentHtmlReport.js +354 -0
  196. package/dist/src/report/renderHtmlReport.js +103 -0
  197. package/dist/src/report/types.js +10 -0
  198. package/dist/src/report/writeExperimentReportArtifacts.js +38 -0
  199. package/dist/src/report/writeReportArtifacts.js +39 -0
  200. package/dist/src/screenshot/captureReportScreenshot.js +75 -0
  201. package/dist/src/screenshot/index.js +2 -0
  202. package/dist/src/screenshot/types.js +1 -0
  203. package/dist/src/securityValidation/artifacts.js +15 -0
  204. package/dist/src/securityValidation/cliAdversarial/adversarialCliConfig.js +38 -0
  205. package/dist/src/securityValidation/cliAdversarial/dataVolumeChecks.js +194 -0
  206. package/dist/src/securityValidation/cliAdversarial/jsonStdoutChecks.js +359 -0
  207. package/dist/src/securityValidation/cliAdversarial/malformedArtifactChecks.js +284 -0
  208. package/dist/src/securityValidation/cliAdversarial/malformedArtifactFixtures.js +79 -0
  209. package/dist/src/securityValidation/cliAdversarial/pathBoundaryChecks.js +431 -0
  210. package/dist/src/securityValidation/cliAdversarial/pathCases.js +144 -0
  211. package/dist/src/securityValidation/cliAdversarial/readOnlyBoundaryChecks.js +294 -0
  212. package/dist/src/securityValidation/cliAdversarial/runAdversarialCheck.js +149 -0
  213. package/dist/src/securityValidation/cliAdversarial/subprocessSafetyChecks.js +214 -0
  214. package/dist/src/securityValidation/cliAdversarial/tempWorkspace.js +160 -0
  215. package/dist/src/securityValidation/commandRunner.js +136 -0
  216. package/dist/src/securityValidation/config.js +39 -0
  217. package/dist/src/securityValidation/dependencies/parseNpmAudit.js +115 -0
  218. package/dist/src/securityValidation/dependencies/parseNpmLs.js +71 -0
  219. package/dist/src/securityValidation/dependencies/parseNpmOutdated.js +41 -0
  220. package/dist/src/securityValidation/dependencies/runDependencyChecks.js +239 -0
  221. package/dist/src/securityValidation/dependencies/runOsvScanner.js +43 -0
  222. package/dist/src/securityValidation/fuzz/fuzzHarness.js +61 -0
  223. package/dist/src/securityValidation/fuzz/fuzzTargets.js +204 -0
  224. package/dist/src/securityValidation/fuzz/randomInput.js +0 -0
  225. package/dist/src/securityValidation/index.js +34 -0
  226. package/dist/src/securityValidation/packageChecks/forbiddenPackageContents.js +67 -0
  227. package/dist/src/securityValidation/packageChecks/parseNpmPackDryRun.js +56 -0
  228. package/dist/src/securityValidation/packageChecks/runPackageChecks.js +88 -0
  229. package/dist/src/securityValidation/report/renderSecurityReport.js +248 -0
  230. package/dist/src/securityValidation/report/securityReportTypes.js +1 -0
  231. package/dist/src/securityValidation/staticScans/codeql.js +66 -0
  232. package/dist/src/securityValidation/staticScans/semgrep.js +180 -0
  233. package/dist/src/securityValidation/testMatrix.js +535 -0
  234. package/dist/src/securityValidation/types.js +34 -0
  235. package/dist/src/securityValidation/validate/resolveTarget.js +32 -0
  236. package/dist/src/securityValidation/validate/runSecurityValidation.js +169 -0
  237. package/dist/src/securityValidation/validate/verdict.js +73 -0
  238. package/dist/src/visualizationDemos/buildMyDevKitVisualizationCommands.js +59 -0
  239. package/dist/src/visualizationDemos/index.js +4 -0
  240. package/dist/src/visualizationDemos/runVisualizationDemos.js +82 -0
  241. package/dist/src/visualizationDemos/types.js +1 -0
  242. package/dist/src/visualizationDemos/writeVisualizationDemoArtifacts.js +25 -0
  243. package/docs/METRICS.md +286 -0
  244. package/examples/demo-report-input.json +78 -0
  245. package/examples/lab-demo-cases.json +35 -0
  246. package/examples/real-agent-campaign-cases.json +118 -0
  247. package/examples/token-savings-cases.json +122 -0
  248. package/package.json +91 -0
  249. package/tests/fixtures/fake-adversarial-cli.js +152 -0
  250. package/tests/fixtures/fake-my-dev-kit-cli.js +83 -0
package/package.json ADDED
@@ -0,0 +1,91 @@
1
+ {
2
+ "name": "@dailephd/my-dev-kit-lab",
3
+ "version": "0.2.0",
4
+ "type": "module",
5
+ "description": "Evidence, benchmark, and evaluation companion for my-dev-kit.",
6
+ "bin": {
7
+ "my-dev-kit-lab": "dist/scripts/run-final-demo.js"
8
+ },
9
+ "files": [
10
+ "dist/scripts/",
11
+ "dist/src/",
12
+ "benchmarks/",
13
+ "docs/METRICS.md",
14
+ "examples/",
15
+ "tests/fixtures/"
16
+ ],
17
+ "repository": {
18
+ "type": "git",
19
+ "url": "git+https://github.com/dailephd/my-dev-kit-lab.git"
20
+ },
21
+ "homepage": "https://github.com/dailephd/my-dev-kit-lab#readme",
22
+ "bugs": {
23
+ "url": "https://github.com/dailephd/my-dev-kit-lab/issues"
24
+ },
25
+ "publishConfig": {
26
+ "access": "public"
27
+ },
28
+ "funding": [
29
+ "https://github.com/sponsors/dailephd",
30
+ "https://paypal.me/daile88"
31
+ ],
32
+ "keywords": [
33
+ "benchmark",
34
+ "evaluation",
35
+ "reporting",
36
+ "graph-retrieval",
37
+ "codex",
38
+ "claude"
39
+ ],
40
+ "author": "Dai Le",
41
+ "license": "UNLICENSED",
42
+ "scripts": {
43
+ "build": "node scripts/clean-dist.mjs && tsc -p tsconfig.json",
44
+ "test": "vitest run",
45
+ "test:benchmarks": "vitest run tests/benchmarks tests/scripts",
46
+ "test:report": "vitest run tests/report tests/commands",
47
+ "test:screenshot": "vitest run tests/screenshot",
48
+ "test:evaluation": "vitest run tests/core tests/evaluation tests/commands",
49
+ "test:gallery": "vitest run tests/gallery",
50
+ "test:demo": "vitest run tests/commands/runLabDemo.spec.ts tests/integration/runLabDemoCommand.spec.ts",
51
+ "test:integration": "vitest run tests/integration",
52
+ "test:e2e": "vitest run tests/e2e",
53
+ "test:agents": "vitest run tests/agents tests/commands/runAgentPromptCommand.spec.ts tests/integration/runAgentPromptFake.spec.ts",
54
+ "test:experiments": "vitest run tests/evaluation/buildExperimentMatrix.spec.ts tests/evaluation/classifyAgentRunOutcome.spec.ts tests/evaluation/parseAgentAnswer.spec.ts tests/evaluation/scoreCorrectness.spec.ts tests/evaluation/compareExperimentRuns.spec.ts tests/evaluation/runControlledExperiment.spec.ts tests/commands/runControlledExperimentCommand.spec.ts tests/integration/runControlledExperimentFake.spec.ts",
55
+ "capture-demo-report": "tsx scripts/capture-demo-report.ts",
56
+ "evaluate-token-savings": "tsx scripts/evaluate-token-savings.ts",
57
+ "generate-experiment-plots": "tsx scripts/generate-experiment-plots.ts",
58
+ "generate-prompt-variants": "tsx scripts/generate-prompt-variants.ts",
59
+ "build-gallery": "tsx scripts/build-gallery.ts",
60
+ "lab-demo": "tsx scripts/run-lab-demo.ts",
61
+ "render-experiment-report": "tsx scripts/render-experiment-report.ts",
62
+ "experiment:list": "tsx scripts/experiments/listExperiments.ts",
63
+ "experiment:describe": "tsx scripts/experiments/describeExperiment.ts",
64
+ "experiment:run": "tsx scripts/experiments/runExperiment.ts",
65
+ "run-final-demo": "tsx scripts/run-final-demo.ts",
66
+ "run-agent-prompt": "tsx scripts/run-agent-prompt.ts",
67
+ "run-controlled-experiment": "tsx scripts/run-controlled-experiment.ts",
68
+ "run-visualization-demos": "tsx scripts/run-visualization-demos.ts",
69
+ "test:plots": "vitest run tests/plots tests/commands/generateExperimentPlotsCommand.spec.ts",
70
+ "test:visualization-demos": "vitest run tests/visualizationDemos tests/commands/runVisualizationDemosCommand.spec.ts",
71
+ "verify:benchmarks": "tsx scripts/verify-benchmarks.ts",
72
+ "verify": "npm run build && npm run test && npm run test:benchmarks && npm run test:report && npm run test:screenshot && npm run test:evaluation && npm run test:agents && npm run test:experiments && npm run test:plots && npm run test:visualization-demos && npm run test:gallery && npm run test:demo && npm run test:integration && npm run test:e2e && npm run verify:benchmarks",
73
+ "security:deps": "tsx scripts/security/runDependencyChecks.ts",
74
+ "security:package": "tsx scripts/security/runPackageChecks.ts",
75
+ "security:codeql": "tsx scripts/security/runCodeql.ts",
76
+ "security:semgrep": "tsx scripts/security/runSemgrep.ts",
77
+ "test:security": "vitest run tests/security/",
78
+ "test:fuzz:smoke": "tsx scripts/security/runFuzzSmoke.ts",
79
+ "security:validate": "tsx scripts/security/validate.ts"
80
+ },
81
+ "devDependencies": {
82
+ "@types/node": "^24.0.0",
83
+ "playwright": "^1.54.1",
84
+ "tsx": "^4.20.3",
85
+ "typescript": "^5.8.3",
86
+ "vitest": "^3.2.4"
87
+ },
88
+ "engines": {
89
+ "node": ">=20"
90
+ }
91
+ }
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Deterministic fake CLI for the security-validation adversarial harness.
4
+ *
5
+ * Simulates a "well-behaved" my-dev-kit-style CLI for CI tests that cannot
6
+ * depend on a globally installed package.
7
+ *
8
+ * Supported flags:
9
+ * --root <dir> Source root (treated as read-only; not validated for traversal)
10
+ * --out <dir> Output directory for generated artifacts
11
+ * --index <dir> Index artifact directory (also written with fake manifest)
12
+ * --file <path> Path to read (simulates source retrieval)
13
+ * --path <path> Graph path argument (logged only)
14
+ * --node <id> Graph node argument (logged only)
15
+ * --query <q> Search query (logged only)
16
+ * --format <f> Output format: "json" | "text" (default: text)
17
+ * --emit-stderr <msg> Write msg to stderr (simulates a warning)
18
+ * --escape-to <dir> [HARNESS TESTING ONLY] Write an escape sentinel file here
19
+ * Used to verify the harness can detect writes outside workspace.
20
+ * --fail Exit with code 1 (simulates a CLI error)
21
+ *
22
+ * On success: writes a fake manifest.json to --out and/or --index, exits 0.
23
+ * On --fail: emits an error message and exits 1.
24
+ * Does NOT modify any files in --root.
25
+ * Does NOT write anywhere other than --out and --index (unless --escape-to is set).
26
+ */
27
+
28
+ import { mkdirSync, writeFileSync } from "node:fs";
29
+ import path from "node:path";
30
+
31
+ function parseArgs(argv) {
32
+ const args = argv.slice(2);
33
+ const result = {
34
+ root: null,
35
+ out: null,
36
+ index: null,
37
+ file: null,
38
+ path: null,
39
+ node: null,
40
+ query: null,
41
+ format: "text",
42
+ emitStderr: null,
43
+ escapeTo: null,
44
+ fail: false,
45
+ };
46
+ for (let i = 0; i < args.length; i++) {
47
+ const a = args[i];
48
+ if (a === "--root") result.root = args[++i];
49
+ else if (a === "--out") result.out = args[++i];
50
+ else if (a === "--index") result.index = args[++i];
51
+ else if (a === "--file") result.file = args[++i];
52
+ else if (a === "--path") result.path = args[++i];
53
+ else if (a === "--node") result.node = args[++i];
54
+ else if (a === "--query") result.query = args[++i];
55
+ else if (a === "--format") result.format = args[++i];
56
+ else if (a === "--emit-stderr") result.emitStderr = args[++i];
57
+ else if (a === "--escape-to") result.escapeTo = args[++i];
58
+ else if (a === "--fail") result.fail = true;
59
+ }
60
+ return result;
61
+ }
62
+
63
+ function fakeManifest(root) {
64
+ return JSON.stringify(
65
+ {
66
+ schemaVersion: 1,
67
+ version: "0.0.0-fake",
68
+ generatedAt: new Date().toISOString(),
69
+ root: root ?? "",
70
+ files: [],
71
+ },
72
+ null,
73
+ 2
74
+ );
75
+ }
76
+
77
+ function run() {
78
+ const opts = parseArgs(process.argv);
79
+
80
+ if (opts.emitStderr) {
81
+ process.stderr.write(`[fake-cli] warning: ${opts.emitStderr}\n`);
82
+ }
83
+
84
+ if (opts.fail) {
85
+ if (opts.format === "json") {
86
+ process.stdout.write(
87
+ JSON.stringify({ error: "fake-cli: --fail was requested" }) + "\n"
88
+ );
89
+ } else {
90
+ process.stderr.write("fake-cli: --fail was requested\n");
91
+ }
92
+ process.exit(1);
93
+ }
94
+
95
+ const manifest = fakeManifest(opts.root);
96
+
97
+ if (opts.out) {
98
+ try {
99
+ mkdirSync(opts.out, { recursive: true });
100
+ writeFileSync(path.join(opts.out, "manifest.json"), manifest, "utf8");
101
+ } catch (err) {
102
+ process.stderr.write(`fake-cli: failed to write --out: ${err.message}\n`);
103
+ process.exit(1);
104
+ }
105
+ }
106
+
107
+ if (opts.index) {
108
+ try {
109
+ mkdirSync(opts.index, { recursive: true });
110
+ writeFileSync(path.join(opts.index, "manifest.json"), manifest, "utf8");
111
+ } catch (err) {
112
+ process.stderr.write(`fake-cli: failed to write --index: ${err.message}\n`);
113
+ process.exit(1);
114
+ }
115
+ }
116
+
117
+ // FOR HARNESS TESTING ONLY: deliberately write outside declared paths.
118
+ // This flag exists solely so the harness can verify its own detection logic.
119
+ if (opts.escapeTo) {
120
+ try {
121
+ mkdirSync(opts.escapeTo, { recursive: true });
122
+ writeFileSync(
123
+ path.join(opts.escapeTo, "escape-sentinel.txt"),
124
+ "harness-escape-detection-test\n",
125
+ "utf8"
126
+ );
127
+ } catch (err) {
128
+ process.stderr.write(`fake-cli: --escape-to failed: ${err.message}\n`);
129
+ }
130
+ }
131
+
132
+ if (opts.format === "json") {
133
+ process.stdout.write(
134
+ JSON.stringify({
135
+ status: "ok",
136
+ root: opts.root,
137
+ out: opts.out,
138
+ index: opts.index,
139
+ }) + "\n"
140
+ );
141
+ } else {
142
+ if (opts.out || opts.index) {
143
+ process.stdout.write("fake-cli: artifacts written\n");
144
+ } else {
145
+ process.stdout.write("fake-cli: ok (no output requested)\n");
146
+ }
147
+ }
148
+
149
+ process.exit(0);
150
+ }
151
+
152
+ run();
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env node
2
+ import fs from "node:fs";
3
+ import path from "node:path";
4
+
5
+ function argValue(flag) {
6
+ const index = process.argv.indexOf(flag);
7
+ return index >= 0 ? process.argv[index + 1] : undefined;
8
+ }
9
+
10
+ function benchmarkProjectFromIndexPath(indexPath) {
11
+ const normalized = String(indexPath || "").replace(/\\/g, "/");
12
+ if (normalized.includes("todo-ts")) return "todo-ts";
13
+ if (normalized.includes("todo-python")) return "todo-python";
14
+ if (normalized.includes("todo-js")) return "todo-js";
15
+ if (normalized.includes("todo-mixed-ts-py")) return "todo-mixed-ts-py";
16
+ return "unknown";
17
+ }
18
+
19
+ const command = process.argv[2];
20
+ if (command === "index") {
21
+ const outDir = argValue("--out");
22
+ if (outDir) {
23
+ fs.mkdirSync(outDir, { recursive: true });
24
+ fs.writeFileSync(path.join(outDir, "fake-index.json"), JSON.stringify({ ok: true }));
25
+ fs.writeFileSync(path.join(outDir, "manifest.json"), JSON.stringify({ ok: true, fake: true }));
26
+ }
27
+ console.log(JSON.stringify({ ok: true, command: "index", outDir }));
28
+ process.exit(0);
29
+ }
30
+
31
+ if (command === "search") {
32
+ const indexPath = argValue("--index");
33
+ const project = benchmarkProjectFromIndexPath(indexPath);
34
+ const mapping = {
35
+ "todo-ts": { nodeId: "todo-ts:createTask", file: "src/taskService.ts", symbol: "createTask" },
36
+ "todo-python": { nodeId: "todo-python:complete_task", file: "src/task_service.py", symbol: "complete_task" },
37
+ "todo-js": { nodeId: "todo-js:listOpenTasks", file: "src/taskService.js", symbol: "listOpenTasks" },
38
+ "todo-mixed-ts-py": { nodeId: "todo-mixed:summarize_tasks", file: "python/task_service.py", symbol: "summarize_tasks" }
39
+ };
40
+ console.log(JSON.stringify({ results: [mapping[project] || { nodeId: "unknown", file: "unknown", symbol: "unknown" }] }));
41
+ process.exit(0);
42
+ }
43
+
44
+ if (command === "lookup") {
45
+ const node = argValue("--node");
46
+ console.log(JSON.stringify({ nodeId: node, summary: `lookup for ${node}` }));
47
+ process.exit(0);
48
+ }
49
+
50
+ if (command === "slice") {
51
+ const node = argValue("--node");
52
+ console.log(JSON.stringify({ nodeId: node, slice: `slice for ${node}` }));
53
+ process.exit(0);
54
+ }
55
+
56
+ if (command === "source") {
57
+ const node = argValue("--node");
58
+ const sourceMap = {
59
+ "todo-ts:createTask": "1 export class TaskService {\n2 createTask(title: string) {\n3 return this.store.create(title.trim());\n4 }\n5 }",
60
+ "todo-python:complete_task": "1 class TaskService:\n2 def complete_task(self, task_id: str) -> dict:\n3 return self._store.update(task_id, lambda task: {**task, 'completed': True})",
61
+ "todo-js:listOpenTasks": "1 export class TaskService {\n2 listOpenTasks() {\n3 return this.store.list().filter((task) => !task.completed);\n4 }\n5 }",
62
+ "todo-mixed:summarize_tasks": "1 def summarize_tasks(self) -> dict:\n2 completed = len([task for task in self._tasks if task['completed']])\n3 return {'total': len(self._tasks), 'open': len(self._tasks) - completed, 'completed': completed}"
63
+ };
64
+ process.stdout.write(sourceMap[node] || `1 source for ${node}`);
65
+ process.exit(0);
66
+ }
67
+
68
+ if (command === "view") {
69
+ const graph = argValue("--graph") || "unknown";
70
+ const outPath = argValue("--out");
71
+ if (outPath) {
72
+ fs.mkdirSync(path.dirname(outPath), { recursive: true });
73
+ fs.writeFileSync(
74
+ outPath,
75
+ `<svg xmlns="http://www.w3.org/2000/svg" width="320" height="160"><text x="20" y="40">fake ${graph}</text><circle cx="80" cy="90" r="18" fill="#2563eb" /><circle cx="200" cy="90" r="18" fill="#16a34a" /><line x1="98" y1="90" x2="182" y2="90" stroke="#344054" /></svg>\n`
76
+ );
77
+ }
78
+ console.log(JSON.stringify({ ok: true, command: "view", graph, outPath }));
79
+ process.exit(0);
80
+ }
81
+
82
+ process.stderr.write(`Unsupported fake my-dev-kit command: ${command}`);
83
+ process.exit(1);