npm - @dailephd/my-dev-kit-lab - Versions diffs - 0.2.0 - Mend

@dailephd/my-dev-kit-lab 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (250) hide show

package/package.json ADDED Viewed

@@ -0,0 +1,91 @@
+{
+  "name": "@dailephd/my-dev-kit-lab",
+  "version": "0.2.0",
+  "type": "module",
+  "description": "Evidence, benchmark, and evaluation companion for my-dev-kit.",
+  "bin": {
+    "my-dev-kit-lab": "dist/scripts/run-final-demo.js"
+  },
+  "files": [
+    "dist/scripts/",
+    "dist/src/",
+    "benchmarks/",
+    "docs/METRICS.md",
+    "examples/",
+    "tests/fixtures/"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/dailephd/my-dev-kit-lab.git"
+  },
+  "homepage": "https://github.com/dailephd/my-dev-kit-lab#readme",
+  "bugs": {
+    "url": "https://github.com/dailephd/my-dev-kit-lab/issues"
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "funding": [
+    "https://github.com/sponsors/dailephd",
+    "https://paypal.me/daile88"
+  ],
+  "keywords": [
+    "benchmark",
+    "evaluation",
+    "reporting",
+    "graph-retrieval",
+    "codex",
+    "claude"
+  ],
+  "author": "Dai Le",
+  "license": "UNLICENSED",
+  "scripts": {
+    "build": "node scripts/clean-dist.mjs && tsc -p tsconfig.json",
+    "test": "vitest run",
+    "test:benchmarks": "vitest run tests/benchmarks tests/scripts",
+    "test:report": "vitest run tests/report tests/commands",
+    "test:screenshot": "vitest run tests/screenshot",
+    "test:evaluation": "vitest run tests/core tests/evaluation tests/commands",
+    "test:gallery": "vitest run tests/gallery",
+    "test:demo": "vitest run tests/commands/runLabDemo.spec.ts tests/integration/runLabDemoCommand.spec.ts",
+    "test:integration": "vitest run tests/integration",
+    "test:e2e": "vitest run tests/e2e",
+    "test:agents": "vitest run tests/agents tests/commands/runAgentPromptCommand.spec.ts tests/integration/runAgentPromptFake.spec.ts",
+    "test:experiments": "vitest run tests/evaluation/buildExperimentMatrix.spec.ts tests/evaluation/classifyAgentRunOutcome.spec.ts tests/evaluation/parseAgentAnswer.spec.ts tests/evaluation/scoreCorrectness.spec.ts tests/evaluation/compareExperimentRuns.spec.ts tests/evaluation/runControlledExperiment.spec.ts tests/commands/runControlledExperimentCommand.spec.ts tests/integration/runControlledExperimentFake.spec.ts",
+    "capture-demo-report": "tsx scripts/capture-demo-report.ts",
+    "evaluate-token-savings": "tsx scripts/evaluate-token-savings.ts",
+    "generate-experiment-plots": "tsx scripts/generate-experiment-plots.ts",
+    "generate-prompt-variants": "tsx scripts/generate-prompt-variants.ts",
+    "build-gallery": "tsx scripts/build-gallery.ts",
+    "lab-demo": "tsx scripts/run-lab-demo.ts",
+    "render-experiment-report": "tsx scripts/render-experiment-report.ts",
+    "experiment:list": "tsx scripts/experiments/listExperiments.ts",
+    "experiment:describe": "tsx scripts/experiments/describeExperiment.ts",
+    "experiment:run": "tsx scripts/experiments/runExperiment.ts",
+    "run-final-demo": "tsx scripts/run-final-demo.ts",
+    "run-agent-prompt": "tsx scripts/run-agent-prompt.ts",
+    "run-controlled-experiment": "tsx scripts/run-controlled-experiment.ts",
+    "run-visualization-demos": "tsx scripts/run-visualization-demos.ts",
+    "test:plots": "vitest run tests/plots tests/commands/generateExperimentPlotsCommand.spec.ts",
+    "test:visualization-demos": "vitest run tests/visualizationDemos tests/commands/runVisualizationDemosCommand.spec.ts",
+    "verify:benchmarks": "tsx scripts/verify-benchmarks.ts",
+    "verify": "npm run build && npm run test && npm run test:benchmarks && npm run test:report && npm run test:screenshot && npm run test:evaluation && npm run test:agents && npm run test:experiments && npm run test:plots && npm run test:visualization-demos && npm run test:gallery && npm run test:demo && npm run test:integration && npm run test:e2e && npm run verify:benchmarks",
+    "security:deps": "tsx scripts/security/runDependencyChecks.ts",
+    "security:package": "tsx scripts/security/runPackageChecks.ts",
+    "security:codeql": "tsx scripts/security/runCodeql.ts",
+    "security:semgrep": "tsx scripts/security/runSemgrep.ts",
+    "test:security": "vitest run tests/security/",
+    "test:fuzz:smoke": "tsx scripts/security/runFuzzSmoke.ts",
+    "security:validate": "tsx scripts/security/validate.ts"
+  },
+  "devDependencies": {
+    "@types/node": "^24.0.0",
+    "playwright": "^1.54.1",
+    "tsx": "^4.20.3",
+    "typescript": "^5.8.3",
+    "vitest": "^3.2.4"
+  },
+  "engines": {
+    "node": ">=20"
+  }
+}

package/tests/fixtures/fake-adversarial-cli.js ADDED Viewed

@@ -0,0 +1,152 @@
+#!/usr/bin/env node
+/**
+ * Deterministic fake CLI for the security-validation adversarial harness.
+ *
+ * Simulates a "well-behaved" my-dev-kit-style CLI for CI tests that cannot
+ * depend on a globally installed package.
+ *
+ * Supported flags:
+ *   --root <dir>          Source root (treated as read-only; not validated for traversal)
+ *   --out <dir>           Output directory for generated artifacts
+ *   --index <dir>         Index artifact directory (also written with fake manifest)
+ *   --file <path>         Path to read (simulates source retrieval)
+ *   --path <path>         Graph path argument (logged only)
+ *   --node <id>           Graph node argument (logged only)
+ *   --query <q>           Search query (logged only)
+ *   --format <f>          Output format: "json" | "text" (default: text)
+ *   --emit-stderr <msg>   Write msg to stderr (simulates a warning)
+ *   --escape-to <dir>     [HARNESS TESTING ONLY] Write an escape sentinel file here
+ *                         Used to verify the harness can detect writes outside workspace.
+ *   --fail                Exit with code 1 (simulates a CLI error)
+ *
+ * On success: writes a fake manifest.json to --out and/or --index, exits 0.
+ * On --fail: emits an error message and exits 1.
+ * Does NOT modify any files in --root.
+ * Does NOT write anywhere other than --out and --index (unless --escape-to is set).
+ */
+import { mkdirSync, writeFileSync } from "node:fs";
+import path from "node:path";
+function parseArgs(argv) {
+  const args = argv.slice(2);
+  const result = {
+    root: null,
+    out: null,
+    index: null,
+    file: null,
+    path: null,
+    node: null,
+    query: null,
+    format: "text",
+    emitStderr: null,
+    escapeTo: null,
+    fail: false,
+  };
+  for (let i = 0; i < args.length; i++) {
+    const a = args[i];
+    if (a === "--root") result.root = args[++i];
+    else if (a === "--out") result.out = args[++i];
+    else if (a === "--index") result.index = args[++i];
+    else if (a === "--file") result.file = args[++i];
+    else if (a === "--path") result.path = args[++i];
+    else if (a === "--node") result.node = args[++i];
+    else if (a === "--query") result.query = args[++i];
+    else if (a === "--format") result.format = args[++i];
+    else if (a === "--emit-stderr") result.emitStderr = args[++i];
+    else if (a === "--escape-to") result.escapeTo = args[++i];
+    else if (a === "--fail") result.fail = true;
+  }
+  return result;
+}
+function fakeManifest(root) {
+  return JSON.stringify(
+    {
+      schemaVersion: 1,
+      version: "0.0.0-fake",
+      generatedAt: new Date().toISOString(),
+      root: root ?? "",
+      files: [],
+    },
+    null,
+    2
+  );
+}
+function run() {
+  const opts = parseArgs(process.argv);
+  if (opts.emitStderr) {
+    process.stderr.write(`[fake-cli] warning: ${opts.emitStderr}\n`);
+  }
+  if (opts.fail) {
+    if (opts.format === "json") {
+      process.stdout.write(
+        JSON.stringify({ error: "fake-cli: --fail was requested" }) + "\n"
+      );
+    } else {
+      process.stderr.write("fake-cli: --fail was requested\n");
+    }
+    process.exit(1);
+  }
+  const manifest = fakeManifest(opts.root);
+  if (opts.out) {
+    try {
+      mkdirSync(opts.out, { recursive: true });
+      writeFileSync(path.join(opts.out, "manifest.json"), manifest, "utf8");
+    } catch (err) {
+      process.stderr.write(`fake-cli: failed to write --out: ${err.message}\n`);
+      process.exit(1);
+    }
+  }
+  if (opts.index) {
+    try {
+      mkdirSync(opts.index, { recursive: true });
+      writeFileSync(path.join(opts.index, "manifest.json"), manifest, "utf8");
+    } catch (err) {
+      process.stderr.write(`fake-cli: failed to write --index: ${err.message}\n`);
+      process.exit(1);
+    }
+  }
+  // FOR HARNESS TESTING ONLY: deliberately write outside declared paths.
+  // This flag exists solely so the harness can verify its own detection logic.
+  if (opts.escapeTo) {
+    try {
+      mkdirSync(opts.escapeTo, { recursive: true });
+      writeFileSync(
+        path.join(opts.escapeTo, "escape-sentinel.txt"),
+        "harness-escape-detection-test\n",
+        "utf8"
+      );
+    } catch (err) {
+      process.stderr.write(`fake-cli: --escape-to failed: ${err.message}\n`);
+    }
+  }
+  if (opts.format === "json") {
+    process.stdout.write(
+      JSON.stringify({
+        status: "ok",
+        root: opts.root,
+        out: opts.out,
+        index: opts.index,
+      }) + "\n"
+    );
+  } else {
+    if (opts.out || opts.index) {
+      process.stdout.write("fake-cli: artifacts written\n");
+    } else {
+      process.stdout.write("fake-cli: ok (no output requested)\n");
+    }
+  }
+  process.exit(0);
+}
+run();

package/tests/fixtures/fake-my-dev-kit-cli.js ADDED Viewed

@@ -0,0 +1,83 @@
+#!/usr/bin/env node
+import fs from "node:fs";
+import path from "node:path";
+function argValue(flag) {
+  const index = process.argv.indexOf(flag);
+  return index >= 0 ? process.argv[index + 1] : undefined;
+}
+function benchmarkProjectFromIndexPath(indexPath) {
+  const normalized = String(indexPath || "").replace(/\\/g, "/");
+  if (normalized.includes("todo-ts")) return "todo-ts";
+  if (normalized.includes("todo-python")) return "todo-python";
+  if (normalized.includes("todo-js")) return "todo-js";
+  if (normalized.includes("todo-mixed-ts-py")) return "todo-mixed-ts-py";
+  return "unknown";
+}
+const command = process.argv[2];
+if (command === "index") {
+  const outDir = argValue("--out");
+  if (outDir) {
+    fs.mkdirSync(outDir, { recursive: true });
+    fs.writeFileSync(path.join(outDir, "fake-index.json"), JSON.stringify({ ok: true }));
+    fs.writeFileSync(path.join(outDir, "manifest.json"), JSON.stringify({ ok: true, fake: true }));
+  }
+  console.log(JSON.stringify({ ok: true, command: "index", outDir }));
+  process.exit(0);
+}
+if (command === "search") {
+  const indexPath = argValue("--index");
+  const project = benchmarkProjectFromIndexPath(indexPath);
+  const mapping = {
+    "todo-ts": { nodeId: "todo-ts:createTask", file: "src/taskService.ts", symbol: "createTask" },
+    "todo-python": { nodeId: "todo-python:complete_task", file: "src/task_service.py", symbol: "complete_task" },
+    "todo-js": { nodeId: "todo-js:listOpenTasks", file: "src/taskService.js", symbol: "listOpenTasks" },
+    "todo-mixed-ts-py": { nodeId: "todo-mixed:summarize_tasks", file: "python/task_service.py", symbol: "summarize_tasks" }
+  };
+  console.log(JSON.stringify({ results: [mapping[project] || { nodeId: "unknown", file: "unknown", symbol: "unknown" }] }));
+  process.exit(0);
+}
+if (command === "lookup") {
+  const node = argValue("--node");
+  console.log(JSON.stringify({ nodeId: node, summary: `lookup for ${node}` }));
+  process.exit(0);
+}
+if (command === "slice") {
+  const node = argValue("--node");
+  console.log(JSON.stringify({ nodeId: node, slice: `slice for ${node}` }));
+  process.exit(0);
+}
+if (command === "source") {
+  const node = argValue("--node");
+  const sourceMap = {
+    "todo-ts:createTask": "1 export class TaskService {\n2   createTask(title: string) {\n3     return this.store.create(title.trim());\n4   }\n5 }",
+    "todo-python:complete_task": "1 class TaskService:\n2     def complete_task(self, task_id: str) -> dict:\n3         return self._store.update(task_id, lambda task: {**task, 'completed': True})",
+    "todo-js:listOpenTasks": "1 export class TaskService {\n2   listOpenTasks() {\n3     return this.store.list().filter((task) => !task.completed);\n4   }\n5 }",
+    "todo-mixed:summarize_tasks": "1 def summarize_tasks(self) -> dict:\n2     completed = len([task for task in self._tasks if task['completed']])\n3     return {'total': len(self._tasks), 'open': len(self._tasks) - completed, 'completed': completed}"
+  };
+  process.stdout.write(sourceMap[node] || `1 source for ${node}`);
+  process.exit(0);
+}
+if (command === "view") {
+  const graph = argValue("--graph") || "unknown";
+  const outPath = argValue("--out");
+  if (outPath) {
+    fs.mkdirSync(path.dirname(outPath), { recursive: true });
+    fs.writeFileSync(
+      outPath,
+      `<svg xmlns="http://www.w3.org/2000/svg" width="320" height="160"><text x="20" y="40">fake ${graph}</text><circle cx="80" cy="90" r="18" fill="#2563eb" /><circle cx="200" cy="90" r="18" fill="#16a34a" /><line x1="98" y1="90" x2="182" y2="90" stroke="#344054" /></svg>\n`
+    );
+  }
+  console.log(JSON.stringify({ ok: true, command: "view", graph, outPath }));
+  process.exit(0);
+}
+process.stderr.write(`Unsupported fake my-dev-kit command: ${command}`);
+process.exit(1);