@dailephd/my-dev-kit-lab 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +272 -0
- package/benchmarks/contracts/benchmark-project-profiles.json +1199 -0
- package/benchmarks/contracts/todo-behavior.md +70 -0
- package/benchmarks/contracts/todo-benchmark-case.json +227 -0
- package/benchmarks/projects/README.md +34 -0
- package/benchmarks/projects/task-analytics-large-mixed/README.md +1 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/__init__.py +3 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/fixtures.py +6 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/metrics.py +29 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/models.py +21 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/parser.py +16 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/pipeline.py +9 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/quality.py +8 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/task_analytics/reporting.py +11 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_metrics.py +19 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_parser.py +15 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_quality.py +19 -0
- package/benchmarks/projects/task-analytics-large-mixed/py/tests/test_reporting.py +15 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/package.json +12 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/index.ts +11 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/analyticsSnapshot.ts +20 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/project.ts +5 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/models/task.ts +10 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/buildProjectLeaderboard.ts +7 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/reporting/formatTaskHealthReport.ts +13 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/buildAnalyticsSnapshot.ts +39 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/completeTask.ts +10 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/createTask.ts +21 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/services/listTasksByProject.ts +6 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/projectStore.ts +20 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/store/taskStore.ts +44 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/projectValidation.ts +12 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/src/validation/taskValidation.ts +18 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tests/buildAnalyticsSnapshot.test.ts +48 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tests/completeTask.test.ts +21 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tests/createTask.test.ts +31 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tests/listTasksByProject.test.ts +18 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tests/reporting.test.ts +19 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/tsconfig.json +12 -0
- package/benchmarks/projects/task-analytics-large-mixed/ts/vitest.config.ts +5 -0
- package/benchmarks/projects/task-workflow-medium-ts/README.md +1 -0
- package/benchmarks/projects/task-workflow-medium-ts/package.json +12 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/index.ts +9 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/models/project.ts +6 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/models/task.ts +39 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/services/completeTask.ts +15 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/services/createTask.ts +26 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/services/filterTasks.ts +17 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/services/importTasks.ts +33 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/services/summarizeTasks.ts +30 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/store/taskStore.ts +76 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/utils/deterministicId.ts +3 -0
- package/benchmarks/projects/task-workflow-medium-ts/src/validation/taskValidation.ts +45 -0
- package/benchmarks/projects/task-workflow-medium-ts/tests/completeTask.test.ts +16 -0
- package/benchmarks/projects/task-workflow-medium-ts/tests/createTask.test.ts +21 -0
- package/benchmarks/projects/task-workflow-medium-ts/tests/filterTasks.test.ts +18 -0
- package/benchmarks/projects/task-workflow-medium-ts/tests/importTasks.test.ts +22 -0
- package/benchmarks/projects/task-workflow-medium-ts/tests/summarizeTasks.test.ts +29 -0
- package/benchmarks/projects/task-workflow-medium-ts/tsconfig.json +12 -0
- package/benchmarks/projects/task-workflow-medium-ts/vitest.config.ts +5 -0
- package/benchmarks/projects/todo-js/README.md +3 -0
- package/benchmarks/projects/todo-js/package.json +11 -0
- package/benchmarks/projects/todo-js/src/index.js +2 -0
- package/benchmarks/projects/todo-js/src/taskService.js +37 -0
- package/benchmarks/projects/todo-js/src/taskStore.js +28 -0
- package/benchmarks/projects/todo-js/tests/taskService.test.js +45 -0
- package/benchmarks/projects/todo-js/vitest.config.js +5 -0
- package/benchmarks/projects/todo-mixed-ts-py/README.md +3 -0
- package/benchmarks/projects/todo-mixed-ts-py/package.json +13 -0
- package/benchmarks/projects/todo-mixed-ts-py/python/task_service.py +76 -0
- package/benchmarks/projects/todo-mixed-ts-py/src/taskCli.ts +38 -0
- package/benchmarks/projects/todo-mixed-ts-py/tests/mixedBoundary.test.ts +18 -0
- package/benchmarks/projects/todo-mixed-ts-py/tsconfig.json +12 -0
- package/benchmarks/projects/todo-mixed-ts-py/vitest.config.ts +5 -0
- package/benchmarks/projects/todo-python/README.md +3 -0
- package/benchmarks/projects/todo-python/src/__init__.py +4 -0
- package/benchmarks/projects/todo-python/src/task_service.py +32 -0
- package/benchmarks/projects/todo-python/src/task_store.py +28 -0
- package/benchmarks/projects/todo-python/tests/test_task_service.py +52 -0
- package/benchmarks/projects/todo-ts/README.md +3 -0
- package/benchmarks/projects/todo-ts/package.json +12 -0
- package/benchmarks/projects/todo-ts/src/index.ts +2 -0
- package/benchmarks/projects/todo-ts/src/taskService.ts +41 -0
- package/benchmarks/projects/todo-ts/src/taskStore.ts +34 -0
- package/benchmarks/projects/todo-ts/tests/taskService.test.ts +45 -0
- package/benchmarks/projects/todo-ts/tsconfig.json +12 -0
- package/benchmarks/projects/todo-ts/vitest.config.ts +5 -0
- package/dist/scripts/build-gallery.js +3 -0
- package/dist/scripts/capture-demo-report.js +3 -0
- package/dist/scripts/evaluate-token-savings.js +2 -0
- package/dist/scripts/experiments/describeExperiment.js +143 -0
- package/dist/scripts/experiments/listExperiments.js +44 -0
- package/dist/scripts/experiments/runExperiment.js +199 -0
- package/dist/scripts/generate-experiment-plots.js +3 -0
- package/dist/scripts/generate-prompt-variants.js +2 -0
- package/dist/scripts/render-experiment-report.js +2 -0
- package/dist/scripts/run-agent-prompt.js +2 -0
- package/dist/scripts/run-controlled-experiment.js +2 -0
- package/dist/scripts/run-final-demo.js +3 -0
- package/dist/scripts/run-lab-demo.js +5 -0
- package/dist/scripts/run-visualization-demos.js +3 -0
- package/dist/scripts/security/runCodeql.js +57 -0
- package/dist/scripts/security/runDependencyChecks.js +57 -0
- package/dist/scripts/security/runFuzzSmoke.js +29 -0
- package/dist/scripts/security/runPackageChecks.js +56 -0
- package/dist/scripts/security/runSemgrep.js +63 -0
- package/dist/scripts/security/validate.js +117 -0
- package/dist/scripts/verify-benchmarks.js +202 -0
- package/dist/src/agents/adapters/claudeAdapter.js +37 -0
- package/dist/src/agents/adapters/codexAdapter.js +110 -0
- package/dist/src/agents/adapters/fakeAgentAdapter.js +101 -0
- package/dist/src/agents/agentRegistry.js +21 -0
- package/dist/src/agents/index.js +7 -0
- package/dist/src/agents/parseAgentTokenUsage.js +137 -0
- package/dist/src/agents/runAgentPrompt.js +38 -0
- package/dist/src/agents/types.js +1 -0
- package/dist/src/commands/buildGalleryCommand.js +56 -0
- package/dist/src/commands/captureDemoReport.js +116 -0
- package/dist/src/commands/evaluateTokenSavings.js +175 -0
- package/dist/src/commands/generateExperimentPlotsCommand.js +38 -0
- package/dist/src/commands/generatePromptVariants.js +67 -0
- package/dist/src/commands/renderExperimentReportCommand.js +131 -0
- package/dist/src/commands/runAgentPromptCommand.js +132 -0
- package/dist/src/commands/runControlledExperimentCommand.js +174 -0
- package/dist/src/commands/runFinalDemoCommand.js +123 -0
- package/dist/src/commands/runLabDemo.js +62 -0
- package/dist/src/commands/runVisualizationDemosCommand.js +67 -0
- package/dist/src/core/commandLine.js +59 -0
- package/dist/src/core/countTokens.js +8 -0
- package/dist/src/core/fileGlobs.js +100 -0
- package/dist/src/core/localProjectTarget.js +75 -0
- package/dist/src/core/pathSafety.js +19 -0
- package/dist/src/core/pythonCommand.js +30 -0
- package/dist/src/core/resolveCommand.js +110 -0
- package/dist/src/core/runMeasuredCommand.js +143 -0
- package/dist/src/evaluation/benchmarkMetadata.js +207 -0
- package/dist/src/evaluation/buildExperimentMatrix.js +75 -0
- package/dist/src/evaluation/classifyAgentRunOutcome.js +40 -0
- package/dist/src/evaluation/compareExperimentRuns.js +79 -0
- package/dist/src/evaluation/compareTokenSavings.js +47 -0
- package/dist/src/evaluation/controlledExperimentTypes.js +1 -0
- package/dist/src/evaluation/index.js +18 -0
- package/dist/src/evaluation/parseAgentAnswer.js +230 -0
- package/dist/src/evaluation/projectComplexity.js +126 -0
- package/dist/src/evaluation/projectFileTree.js +83 -0
- package/dist/src/evaluation/readEvaluationCases.js +59 -0
- package/dist/src/evaluation/renderTokenSavingsReportInput.js +55 -0
- package/dist/src/evaluation/runControlledExperiment.js +158 -0
- package/dist/src/evaluation/runMyDevKitRetrieval.js +197 -0
- package/dist/src/evaluation/runRawFullFileBaseline.js +31 -0
- package/dist/src/evaluation/scoreCorrectness.js +127 -0
- package/dist/src/evaluation/types.js +1 -0
- package/dist/src/evaluation/writeExperimentArtifacts.js +104 -0
- package/dist/src/evaluation/writeTokenSavingsArtifacts.js +57 -0
- package/dist/src/experiments/config.js +24 -0
- package/dist/src/experiments/defaultRegistry.js +7 -0
- package/dist/src/experiments/errors.js +18 -0
- package/dist/src/experiments/index.js +9 -0
- package/dist/src/experiments/outputPaths.js +25 -0
- package/dist/src/experiments/plugins/contextStrategyComparison/config.js +37 -0
- package/dist/src/experiments/plugins/contextStrategyComparison/index.js +3 -0
- package/dist/src/experiments/plugins/contextStrategyComparison/plugin.js +83 -0
- package/dist/src/experiments/plugins/contextStrategyComparison/resultMapping.js +260 -0
- package/dist/src/experiments/plugins/index.js +1 -0
- package/dist/src/experiments/registry.js +43 -0
- package/dist/src/experiments/results.js +48 -0
- package/dist/src/experiments/runner.js +181 -0
- package/dist/src/experiments/target.js +8 -0
- package/dist/src/experiments/types.js +1 -0
- package/dist/src/gallery/index.js +2 -0
- package/dist/src/gallery/types.js +1 -0
- package/dist/src/gallery/writeGalleryManifest.js +214 -0
- package/dist/src/index.js +12 -0
- package/dist/src/plots/buildExperimentPlotData.js +137 -0
- package/dist/src/plots/index.js +4 -0
- package/dist/src/plots/renderSvgChart.js +82 -0
- package/dist/src/plots/types.js +1 -0
- package/dist/src/plots/writePlotArtifacts.js +46 -0
- package/dist/src/prompts/buildPromptContext.js +68 -0
- package/dist/src/prompts/generateMyDevKitPrompt.js +106 -0
- package/dist/src/prompts/generatePromptVariants.js +36 -0
- package/dist/src/prompts/generateRawFullFilePrompt.js +97 -0
- package/dist/src/prompts/index.js +7 -0
- package/dist/src/prompts/measurePromptComplexity.js +41 -0
- package/dist/src/prompts/types.js +1 -0
- package/dist/src/prompts/writePromptArtifacts.js +43 -0
- package/dist/src/report/buildExperimentReportInput.js +339 -0
- package/dist/src/report/experimentReportTypes.js +1 -0
- package/dist/src/report/experiments/buildPluginExperimentReport.js +153 -0
- package/dist/src/report/experiments/experimentReportModel.js +1 -0
- package/dist/src/report/experiments/index.js +4 -0
- package/dist/src/report/experiments/renderPluginExperimentReportHtml.js +133 -0
- package/dist/src/report/experiments/writePluginExperimentReports.js +30 -0
- package/dist/src/report/index.js +8 -0
- package/dist/src/report/renderExperimentHtmlReport.js +354 -0
- package/dist/src/report/renderHtmlReport.js +103 -0
- package/dist/src/report/types.js +10 -0
- package/dist/src/report/writeExperimentReportArtifacts.js +38 -0
- package/dist/src/report/writeReportArtifacts.js +39 -0
- package/dist/src/screenshot/captureReportScreenshot.js +75 -0
- package/dist/src/screenshot/index.js +2 -0
- package/dist/src/screenshot/types.js +1 -0
- package/dist/src/securityValidation/artifacts.js +15 -0
- package/dist/src/securityValidation/cliAdversarial/adversarialCliConfig.js +38 -0
- package/dist/src/securityValidation/cliAdversarial/dataVolumeChecks.js +194 -0
- package/dist/src/securityValidation/cliAdversarial/jsonStdoutChecks.js +359 -0
- package/dist/src/securityValidation/cliAdversarial/malformedArtifactChecks.js +284 -0
- package/dist/src/securityValidation/cliAdversarial/malformedArtifactFixtures.js +79 -0
- package/dist/src/securityValidation/cliAdversarial/pathBoundaryChecks.js +431 -0
- package/dist/src/securityValidation/cliAdversarial/pathCases.js +144 -0
- package/dist/src/securityValidation/cliAdversarial/readOnlyBoundaryChecks.js +294 -0
- package/dist/src/securityValidation/cliAdversarial/runAdversarialCheck.js +149 -0
- package/dist/src/securityValidation/cliAdversarial/subprocessSafetyChecks.js +214 -0
- package/dist/src/securityValidation/cliAdversarial/tempWorkspace.js +160 -0
- package/dist/src/securityValidation/commandRunner.js +136 -0
- package/dist/src/securityValidation/config.js +39 -0
- package/dist/src/securityValidation/dependencies/parseNpmAudit.js +115 -0
- package/dist/src/securityValidation/dependencies/parseNpmLs.js +71 -0
- package/dist/src/securityValidation/dependencies/parseNpmOutdated.js +41 -0
- package/dist/src/securityValidation/dependencies/runDependencyChecks.js +239 -0
- package/dist/src/securityValidation/dependencies/runOsvScanner.js +43 -0
- package/dist/src/securityValidation/fuzz/fuzzHarness.js +61 -0
- package/dist/src/securityValidation/fuzz/fuzzTargets.js +204 -0
- package/dist/src/securityValidation/fuzz/randomInput.js +0 -0
- package/dist/src/securityValidation/index.js +34 -0
- package/dist/src/securityValidation/packageChecks/forbiddenPackageContents.js +67 -0
- package/dist/src/securityValidation/packageChecks/parseNpmPackDryRun.js +56 -0
- package/dist/src/securityValidation/packageChecks/runPackageChecks.js +88 -0
- package/dist/src/securityValidation/report/renderSecurityReport.js +248 -0
- package/dist/src/securityValidation/report/securityReportTypes.js +1 -0
- package/dist/src/securityValidation/staticScans/codeql.js +66 -0
- package/dist/src/securityValidation/staticScans/semgrep.js +180 -0
- package/dist/src/securityValidation/testMatrix.js +535 -0
- package/dist/src/securityValidation/types.js +34 -0
- package/dist/src/securityValidation/validate/resolveTarget.js +32 -0
- package/dist/src/securityValidation/validate/runSecurityValidation.js +169 -0
- package/dist/src/securityValidation/validate/verdict.js +73 -0
- package/dist/src/visualizationDemos/buildMyDevKitVisualizationCommands.js +59 -0
- package/dist/src/visualizationDemos/index.js +4 -0
- package/dist/src/visualizationDemos/runVisualizationDemos.js +82 -0
- package/dist/src/visualizationDemos/types.js +1 -0
- package/dist/src/visualizationDemos/writeVisualizationDemoArtifacts.js +25 -0
- package/docs/METRICS.md +286 -0
- package/examples/demo-report-input.json +78 -0
- package/examples/lab-demo-cases.json +35 -0
- package/examples/real-agent-campaign-cases.json +118 -0
- package/examples/token-savings-cases.json +122 -0
- package/package.json +91 -0
- package/tests/fixtures/fake-adversarial-cli.js +152 -0
- package/tests/fixtures/fake-my-dev-kit-cli.js +83 -0
package/package.json
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@dailephd/my-dev-kit-lab",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "Evidence, benchmark, and evaluation companion for my-dev-kit.",
|
|
6
|
+
"bin": {
|
|
7
|
+
"my-dev-kit-lab": "dist/scripts/run-final-demo.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"dist/scripts/",
|
|
11
|
+
"dist/src/",
|
|
12
|
+
"benchmarks/",
|
|
13
|
+
"docs/METRICS.md",
|
|
14
|
+
"examples/",
|
|
15
|
+
"tests/fixtures/"
|
|
16
|
+
],
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "git+https://github.com/dailephd/my-dev-kit-lab.git"
|
|
20
|
+
},
|
|
21
|
+
"homepage": "https://github.com/dailephd/my-dev-kit-lab#readme",
|
|
22
|
+
"bugs": {
|
|
23
|
+
"url": "https://github.com/dailephd/my-dev-kit-lab/issues"
|
|
24
|
+
},
|
|
25
|
+
"publishConfig": {
|
|
26
|
+
"access": "public"
|
|
27
|
+
},
|
|
28
|
+
"funding": [
|
|
29
|
+
"https://github.com/sponsors/dailephd",
|
|
30
|
+
"https://paypal.me/daile88"
|
|
31
|
+
],
|
|
32
|
+
"keywords": [
|
|
33
|
+
"benchmark",
|
|
34
|
+
"evaluation",
|
|
35
|
+
"reporting",
|
|
36
|
+
"graph-retrieval",
|
|
37
|
+
"codex",
|
|
38
|
+
"claude"
|
|
39
|
+
],
|
|
40
|
+
"author": "Dai Le",
|
|
41
|
+
"license": "UNLICENSED",
|
|
42
|
+
"scripts": {
|
|
43
|
+
"build": "node scripts/clean-dist.mjs && tsc -p tsconfig.json",
|
|
44
|
+
"test": "vitest run",
|
|
45
|
+
"test:benchmarks": "vitest run tests/benchmarks tests/scripts",
|
|
46
|
+
"test:report": "vitest run tests/report tests/commands",
|
|
47
|
+
"test:screenshot": "vitest run tests/screenshot",
|
|
48
|
+
"test:evaluation": "vitest run tests/core tests/evaluation tests/commands",
|
|
49
|
+
"test:gallery": "vitest run tests/gallery",
|
|
50
|
+
"test:demo": "vitest run tests/commands/runLabDemo.spec.ts tests/integration/runLabDemoCommand.spec.ts",
|
|
51
|
+
"test:integration": "vitest run tests/integration",
|
|
52
|
+
"test:e2e": "vitest run tests/e2e",
|
|
53
|
+
"test:agents": "vitest run tests/agents tests/commands/runAgentPromptCommand.spec.ts tests/integration/runAgentPromptFake.spec.ts",
|
|
54
|
+
"test:experiments": "vitest run tests/evaluation/buildExperimentMatrix.spec.ts tests/evaluation/classifyAgentRunOutcome.spec.ts tests/evaluation/parseAgentAnswer.spec.ts tests/evaluation/scoreCorrectness.spec.ts tests/evaluation/compareExperimentRuns.spec.ts tests/evaluation/runControlledExperiment.spec.ts tests/commands/runControlledExperimentCommand.spec.ts tests/integration/runControlledExperimentFake.spec.ts",
|
|
55
|
+
"capture-demo-report": "tsx scripts/capture-demo-report.ts",
|
|
56
|
+
"evaluate-token-savings": "tsx scripts/evaluate-token-savings.ts",
|
|
57
|
+
"generate-experiment-plots": "tsx scripts/generate-experiment-plots.ts",
|
|
58
|
+
"generate-prompt-variants": "tsx scripts/generate-prompt-variants.ts",
|
|
59
|
+
"build-gallery": "tsx scripts/build-gallery.ts",
|
|
60
|
+
"lab-demo": "tsx scripts/run-lab-demo.ts",
|
|
61
|
+
"render-experiment-report": "tsx scripts/render-experiment-report.ts",
|
|
62
|
+
"experiment:list": "tsx scripts/experiments/listExperiments.ts",
|
|
63
|
+
"experiment:describe": "tsx scripts/experiments/describeExperiment.ts",
|
|
64
|
+
"experiment:run": "tsx scripts/experiments/runExperiment.ts",
|
|
65
|
+
"run-final-demo": "tsx scripts/run-final-demo.ts",
|
|
66
|
+
"run-agent-prompt": "tsx scripts/run-agent-prompt.ts",
|
|
67
|
+
"run-controlled-experiment": "tsx scripts/run-controlled-experiment.ts",
|
|
68
|
+
"run-visualization-demos": "tsx scripts/run-visualization-demos.ts",
|
|
69
|
+
"test:plots": "vitest run tests/plots tests/commands/generateExperimentPlotsCommand.spec.ts",
|
|
70
|
+
"test:visualization-demos": "vitest run tests/visualizationDemos tests/commands/runVisualizationDemosCommand.spec.ts",
|
|
71
|
+
"verify:benchmarks": "tsx scripts/verify-benchmarks.ts",
|
|
72
|
+
"verify": "npm run build && npm run test && npm run test:benchmarks && npm run test:report && npm run test:screenshot && npm run test:evaluation && npm run test:agents && npm run test:experiments && npm run test:plots && npm run test:visualization-demos && npm run test:gallery && npm run test:demo && npm run test:integration && npm run test:e2e && npm run verify:benchmarks",
|
|
73
|
+
"security:deps": "tsx scripts/security/runDependencyChecks.ts",
|
|
74
|
+
"security:package": "tsx scripts/security/runPackageChecks.ts",
|
|
75
|
+
"security:codeql": "tsx scripts/security/runCodeql.ts",
|
|
76
|
+
"security:semgrep": "tsx scripts/security/runSemgrep.ts",
|
|
77
|
+
"test:security": "vitest run tests/security/",
|
|
78
|
+
"test:fuzz:smoke": "tsx scripts/security/runFuzzSmoke.ts",
|
|
79
|
+
"security:validate": "tsx scripts/security/validate.ts"
|
|
80
|
+
},
|
|
81
|
+
"devDependencies": {
|
|
82
|
+
"@types/node": "^24.0.0",
|
|
83
|
+
"playwright": "^1.54.1",
|
|
84
|
+
"tsx": "^4.20.3",
|
|
85
|
+
"typescript": "^5.8.3",
|
|
86
|
+
"vitest": "^3.2.4"
|
|
87
|
+
},
|
|
88
|
+
"engines": {
|
|
89
|
+
"node": ">=20"
|
|
90
|
+
}
|
|
91
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Deterministic fake CLI for the security-validation adversarial harness.
|
|
4
|
+
*
|
|
5
|
+
* Simulates a "well-behaved" my-dev-kit-style CLI for CI tests that cannot
|
|
6
|
+
* depend on a globally installed package.
|
|
7
|
+
*
|
|
8
|
+
* Supported flags:
|
|
9
|
+
* --root <dir> Source root (treated as read-only; not validated for traversal)
|
|
10
|
+
* --out <dir> Output directory for generated artifacts
|
|
11
|
+
* --index <dir> Index artifact directory (also written with fake manifest)
|
|
12
|
+
* --file <path> Path to read (simulates source retrieval)
|
|
13
|
+
* --path <path> Graph path argument (logged only)
|
|
14
|
+
* --node <id> Graph node argument (logged only)
|
|
15
|
+
* --query <q> Search query (logged only)
|
|
16
|
+
* --format <f> Output format: "json" | "text" (default: text)
|
|
17
|
+
* --emit-stderr <msg> Write msg to stderr (simulates a warning)
|
|
18
|
+
* --escape-to <dir> [HARNESS TESTING ONLY] Write an escape sentinel file here
|
|
19
|
+
* Used to verify the harness can detect writes outside workspace.
|
|
20
|
+
* --fail Exit with code 1 (simulates a CLI error)
|
|
21
|
+
*
|
|
22
|
+
* On success: writes a fake manifest.json to --out and/or --index, exits 0.
|
|
23
|
+
* On --fail: emits an error message and exits 1.
|
|
24
|
+
* Does NOT modify any files in --root.
|
|
25
|
+
* Does NOT write anywhere other than --out and --index (unless --escape-to is set).
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
29
|
+
import path from "node:path";
|
|
30
|
+
|
|
31
|
+
function parseArgs(argv) {
|
|
32
|
+
const args = argv.slice(2);
|
|
33
|
+
const result = {
|
|
34
|
+
root: null,
|
|
35
|
+
out: null,
|
|
36
|
+
index: null,
|
|
37
|
+
file: null,
|
|
38
|
+
path: null,
|
|
39
|
+
node: null,
|
|
40
|
+
query: null,
|
|
41
|
+
format: "text",
|
|
42
|
+
emitStderr: null,
|
|
43
|
+
escapeTo: null,
|
|
44
|
+
fail: false,
|
|
45
|
+
};
|
|
46
|
+
for (let i = 0; i < args.length; i++) {
|
|
47
|
+
const a = args[i];
|
|
48
|
+
if (a === "--root") result.root = args[++i];
|
|
49
|
+
else if (a === "--out") result.out = args[++i];
|
|
50
|
+
else if (a === "--index") result.index = args[++i];
|
|
51
|
+
else if (a === "--file") result.file = args[++i];
|
|
52
|
+
else if (a === "--path") result.path = args[++i];
|
|
53
|
+
else if (a === "--node") result.node = args[++i];
|
|
54
|
+
else if (a === "--query") result.query = args[++i];
|
|
55
|
+
else if (a === "--format") result.format = args[++i];
|
|
56
|
+
else if (a === "--emit-stderr") result.emitStderr = args[++i];
|
|
57
|
+
else if (a === "--escape-to") result.escapeTo = args[++i];
|
|
58
|
+
else if (a === "--fail") result.fail = true;
|
|
59
|
+
}
|
|
60
|
+
return result;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function fakeManifest(root) {
|
|
64
|
+
return JSON.stringify(
|
|
65
|
+
{
|
|
66
|
+
schemaVersion: 1,
|
|
67
|
+
version: "0.0.0-fake",
|
|
68
|
+
generatedAt: new Date().toISOString(),
|
|
69
|
+
root: root ?? "",
|
|
70
|
+
files: [],
|
|
71
|
+
},
|
|
72
|
+
null,
|
|
73
|
+
2
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function run() {
|
|
78
|
+
const opts = parseArgs(process.argv);
|
|
79
|
+
|
|
80
|
+
if (opts.emitStderr) {
|
|
81
|
+
process.stderr.write(`[fake-cli] warning: ${opts.emitStderr}\n`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if (opts.fail) {
|
|
85
|
+
if (opts.format === "json") {
|
|
86
|
+
process.stdout.write(
|
|
87
|
+
JSON.stringify({ error: "fake-cli: --fail was requested" }) + "\n"
|
|
88
|
+
);
|
|
89
|
+
} else {
|
|
90
|
+
process.stderr.write("fake-cli: --fail was requested\n");
|
|
91
|
+
}
|
|
92
|
+
process.exit(1);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const manifest = fakeManifest(opts.root);
|
|
96
|
+
|
|
97
|
+
if (opts.out) {
|
|
98
|
+
try {
|
|
99
|
+
mkdirSync(opts.out, { recursive: true });
|
|
100
|
+
writeFileSync(path.join(opts.out, "manifest.json"), manifest, "utf8");
|
|
101
|
+
} catch (err) {
|
|
102
|
+
process.stderr.write(`fake-cli: failed to write --out: ${err.message}\n`);
|
|
103
|
+
process.exit(1);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (opts.index) {
|
|
108
|
+
try {
|
|
109
|
+
mkdirSync(opts.index, { recursive: true });
|
|
110
|
+
writeFileSync(path.join(opts.index, "manifest.json"), manifest, "utf8");
|
|
111
|
+
} catch (err) {
|
|
112
|
+
process.stderr.write(`fake-cli: failed to write --index: ${err.message}\n`);
|
|
113
|
+
process.exit(1);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// FOR HARNESS TESTING ONLY: deliberately write outside declared paths.
|
|
118
|
+
// This flag exists solely so the harness can verify its own detection logic.
|
|
119
|
+
if (opts.escapeTo) {
|
|
120
|
+
try {
|
|
121
|
+
mkdirSync(opts.escapeTo, { recursive: true });
|
|
122
|
+
writeFileSync(
|
|
123
|
+
path.join(opts.escapeTo, "escape-sentinel.txt"),
|
|
124
|
+
"harness-escape-detection-test\n",
|
|
125
|
+
"utf8"
|
|
126
|
+
);
|
|
127
|
+
} catch (err) {
|
|
128
|
+
process.stderr.write(`fake-cli: --escape-to failed: ${err.message}\n`);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (opts.format === "json") {
|
|
133
|
+
process.stdout.write(
|
|
134
|
+
JSON.stringify({
|
|
135
|
+
status: "ok",
|
|
136
|
+
root: opts.root,
|
|
137
|
+
out: opts.out,
|
|
138
|
+
index: opts.index,
|
|
139
|
+
}) + "\n"
|
|
140
|
+
);
|
|
141
|
+
} else {
|
|
142
|
+
if (opts.out || opts.index) {
|
|
143
|
+
process.stdout.write("fake-cli: artifacts written\n");
|
|
144
|
+
} else {
|
|
145
|
+
process.stdout.write("fake-cli: ok (no output requested)\n");
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
process.exit(0);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
run();
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import fs from "node:fs";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
|
|
5
|
+
function argValue(flag) {
|
|
6
|
+
const index = process.argv.indexOf(flag);
|
|
7
|
+
return index >= 0 ? process.argv[index + 1] : undefined;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function benchmarkProjectFromIndexPath(indexPath) {
|
|
11
|
+
const normalized = String(indexPath || "").replace(/\\/g, "/");
|
|
12
|
+
if (normalized.includes("todo-ts")) return "todo-ts";
|
|
13
|
+
if (normalized.includes("todo-python")) return "todo-python";
|
|
14
|
+
if (normalized.includes("todo-js")) return "todo-js";
|
|
15
|
+
if (normalized.includes("todo-mixed-ts-py")) return "todo-mixed-ts-py";
|
|
16
|
+
return "unknown";
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const command = process.argv[2];
|
|
20
|
+
if (command === "index") {
|
|
21
|
+
const outDir = argValue("--out");
|
|
22
|
+
if (outDir) {
|
|
23
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
24
|
+
fs.writeFileSync(path.join(outDir, "fake-index.json"), JSON.stringify({ ok: true }));
|
|
25
|
+
fs.writeFileSync(path.join(outDir, "manifest.json"), JSON.stringify({ ok: true, fake: true }));
|
|
26
|
+
}
|
|
27
|
+
console.log(JSON.stringify({ ok: true, command: "index", outDir }));
|
|
28
|
+
process.exit(0);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (command === "search") {
|
|
32
|
+
const indexPath = argValue("--index");
|
|
33
|
+
const project = benchmarkProjectFromIndexPath(indexPath);
|
|
34
|
+
const mapping = {
|
|
35
|
+
"todo-ts": { nodeId: "todo-ts:createTask", file: "src/taskService.ts", symbol: "createTask" },
|
|
36
|
+
"todo-python": { nodeId: "todo-python:complete_task", file: "src/task_service.py", symbol: "complete_task" },
|
|
37
|
+
"todo-js": { nodeId: "todo-js:listOpenTasks", file: "src/taskService.js", symbol: "listOpenTasks" },
|
|
38
|
+
"todo-mixed-ts-py": { nodeId: "todo-mixed:summarize_tasks", file: "python/task_service.py", symbol: "summarize_tasks" }
|
|
39
|
+
};
|
|
40
|
+
console.log(JSON.stringify({ results: [mapping[project] || { nodeId: "unknown", file: "unknown", symbol: "unknown" }] }));
|
|
41
|
+
process.exit(0);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (command === "lookup") {
|
|
45
|
+
const node = argValue("--node");
|
|
46
|
+
console.log(JSON.stringify({ nodeId: node, summary: `lookup for ${node}` }));
|
|
47
|
+
process.exit(0);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (command === "slice") {
|
|
51
|
+
const node = argValue("--node");
|
|
52
|
+
console.log(JSON.stringify({ nodeId: node, slice: `slice for ${node}` }));
|
|
53
|
+
process.exit(0);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (command === "source") {
|
|
57
|
+
const node = argValue("--node");
|
|
58
|
+
const sourceMap = {
|
|
59
|
+
"todo-ts:createTask": "1 export class TaskService {\n2 createTask(title: string) {\n3 return this.store.create(title.trim());\n4 }\n5 }",
|
|
60
|
+
"todo-python:complete_task": "1 class TaskService:\n2 def complete_task(self, task_id: str) -> dict:\n3 return self._store.update(task_id, lambda task: {**task, 'completed': True})",
|
|
61
|
+
"todo-js:listOpenTasks": "1 export class TaskService {\n2 listOpenTasks() {\n3 return this.store.list().filter((task) => !task.completed);\n4 }\n5 }",
|
|
62
|
+
"todo-mixed:summarize_tasks": "1 def summarize_tasks(self) -> dict:\n2 completed = len([task for task in self._tasks if task['completed']])\n3 return {'total': len(self._tasks), 'open': len(self._tasks) - completed, 'completed': completed}"
|
|
63
|
+
};
|
|
64
|
+
process.stdout.write(sourceMap[node] || `1 source for ${node}`);
|
|
65
|
+
process.exit(0);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (command === "view") {
|
|
69
|
+
const graph = argValue("--graph") || "unknown";
|
|
70
|
+
const outPath = argValue("--out");
|
|
71
|
+
if (outPath) {
|
|
72
|
+
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
|
73
|
+
fs.writeFileSync(
|
|
74
|
+
outPath,
|
|
75
|
+
`<svg xmlns="http://www.w3.org/2000/svg" width="320" height="160"><text x="20" y="40">fake ${graph}</text><circle cx="80" cy="90" r="18" fill="#2563eb" /><circle cx="200" cy="90" r="18" fill="#16a34a" /><line x1="98" y1="90" x2="182" y2="90" stroke="#344054" /></svg>\n`
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
console.log(JSON.stringify({ ok: true, command: "view", graph, outPath }));
|
|
79
|
+
process.exit(0);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
process.stderr.write(`Unsupported fake my-dev-kit command: ${command}`);
|
|
83
|
+
process.exit(1);
|