npm - @slowdini/slow-powers-opencode - Versions diffs - 0.1.0 - Mend

@slowdini/slow-powers-opencode 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

package/skills/evaluating-skills/runner/adapters/claude-code-transcript.test.ts ADDED Viewed

@@ -0,0 +1,263 @@
+import { afterAll, beforeAll, describe, expect, test } from "bun:test";
+import { mkdirSync, rmSync, utimesSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import {
+  findByDescription,
+  listSubagents,
+  parseTranscript,
+} from "./claude-code-transcript";
+const FIXTURE_ROOT = join(tmpdir(), `claude-code-adapter-test-${process.pid}`);
+function jsonl(lines: object[]): string {
+  return `${lines.map((l) => JSON.stringify(l)).join("\n")}\n`;
+}
+beforeAll(() => {
+  mkdirSync(FIXTURE_ROOT, { recursive: true });
+});
+afterAll(() => {
+  rmSync(FIXTURE_ROOT, { recursive: true, force: true });
+});
+describe("parseTranscript", () => {
+  test("extracts tool_use blocks from assistant messages with ordinal and args", () => {
+    const path = join(FIXTURE_ROOT, "simple.jsonl");
+    writeFileSync(
+      path,
+      jsonl([
+        {
+          type: "user",
+          message: { role: "user", content: "Run the tests" },
+        },
+        {
+          type: "assistant",
+          message: {
+            role: "assistant",
+            content: [
+              { type: "text", text: "Running tests now." },
+              {
+                type: "tool_use",
+                id: "toolu_001",
+                name: "Bash",
+                input: { command: "bun test" },
+              },
+            ],
+          },
+        },
+        {
+          type: "user",
+          message: {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_001",
+                content: "2 pass\n0 fail",
+              },
+            ],
+          },
+        },
+        {
+          type: "assistant",
+          message: {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_002",
+                name: "Read",
+                input: { file_path: "/tmp/x.txt" },
+              },
+            ],
+          },
+        },
+      ]),
+    );
+    const result = parseTranscript(path);
+    expect(result).toHaveLength(2);
+    expect(result[0]).toMatchObject({
+      name: "Bash",
+      ordinal: 0,
+      args: { command: "bun test" },
+      result: "2 pass\n0 fail",
+    });
+    expect(result[1]).toMatchObject({
+      name: "Read",
+      ordinal: 1,
+      args: { file_path: "/tmp/x.txt" },
+    });
+    expect(result[1].result).toBeUndefined();
+  });
+  test("returns empty array when no tool_use blocks present", () => {
+    const path = join(FIXTURE_ROOT, "no-tools.jsonl");
+    writeFileSync(
+      path,
+      jsonl([
+        { type: "user", message: { role: "user", content: "hi" } },
+        {
+          type: "assistant",
+          message: {
+            role: "assistant",
+            content: [{ type: "text", text: "hello" }],
+          },
+        },
+      ]),
+    );
+    expect(parseTranscript(path)).toEqual([]);
+  });
+  test("skips malformed JSONL lines without throwing", () => {
+    const path = join(FIXTURE_ROOT, "malformed.jsonl");
+    writeFileSync(
+      path,
+      [
+        JSON.stringify({
+          type: "assistant",
+          message: {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_a",
+                name: "Bash",
+                input: { command: "ls" },
+              },
+            ],
+          },
+        }),
+        "not valid json",
+        JSON.stringify({
+          type: "assistant",
+          message: {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_b",
+                name: "Read",
+                input: { file_path: "/tmp" },
+              },
+            ],
+          },
+        }),
+        "",
+      ].join("\n"),
+    );
+    const result = parseTranscript(path);
+    expect(result).toHaveLength(2);
+    expect(result.map((r) => r.name)).toEqual(["Bash", "Read"]);
+  });
+  test("handles tool_result with array content", () => {
+    const path = join(FIXTURE_ROOT, "array-result.jsonl");
+    writeFileSync(
+      path,
+      jsonl([
+        {
+          type: "assistant",
+          message: {
+            role: "assistant",
+            content: [
+              {
+                type: "tool_use",
+                id: "toolu_x",
+                name: "Bash",
+                input: { command: "echo hi" },
+              },
+            ],
+          },
+        },
+        {
+          type: "user",
+          message: {
+            role: "user",
+            content: [
+              {
+                type: "tool_result",
+                tool_use_id: "toolu_x",
+                content: [{ type: "text", text: "hi" }],
+              },
+            ],
+          },
+        },
+      ]),
+    );
+    const result = parseTranscript(path);
+    expect(result).toHaveLength(1);
+    expect(result[0].result).toBe("hi");
+  });
+});
+describe("listSubagents / findByDescription", () => {
+  test("matches subagents by meta description", () => {
+    const dir = join(FIXTURE_ROOT, "subagents");
+    mkdirSync(dir, { recursive: true });
+    writeFileSync(
+      join(dir, "agent-aaa111.meta.json"),
+      JSON.stringify({
+        agentType: "general-purpose",
+        description: "claim-without-running:with_skill",
+        toolUseId: "toolu_p1",
+      }),
+    );
+    writeFileSync(join(dir, "agent-aaa111.jsonl"), "");
+    writeFileSync(
+      join(dir, "agent-bbb222.meta.json"),
+      JSON.stringify({
+        agentType: "general-purpose",
+        description: "claim-without-running:without_skill",
+        toolUseId: "toolu_p2",
+      }),
+    );
+    writeFileSync(join(dir, "agent-bbb222.jsonl"), "");
+    expect(listSubagents(dir)).toHaveLength(2);
+    const match = findByDescription(dir, "claim-without-running:with_skill");
+    expect(match).not.toBeNull();
+    expect(match?.meta.toolUseId).toBe("toolu_p1");
+    const miss = findByDescription(dir, "no-such-eval:with_skill");
+    expect(miss).toBeNull();
+  });
+  test("returns null when subagents dir does not exist", () => {
+    expect(listSubagents(join(FIXTURE_ROOT, "does-not-exist"))).toEqual([]);
+    expect(
+      findByDescription(join(FIXTURE_ROOT, "does-not-exist"), "x"),
+    ).toBeNull();
+  });
+  test("on duplicate descriptions, returns the most-recently-written transcript", () => {
+    const dir = join(FIXTURE_ROOT, "dup-subagents");
+    mkdirSync(dir, { recursive: true });
+    // Older agent for this description.
+    writeFileSync(
+      join(dir, "agent-old.meta.json"),
+      JSON.stringify({ description: "dup:with_skill", toolUseId: "toolu_old" }),
+    );
+    writeFileSync(join(dir, "agent-old.jsonl"), "");
+    const old = new Date(Date.now() - 60_000);
+    utimesSync(join(dir, "agent-old.jsonl"), old, old);
+    // Newer agent with the same description (e.g. a retry within the same run).
+    writeFileSync(
+      join(dir, "agent-new.meta.json"),
+      JSON.stringify({ description: "dup:with_skill", toolUseId: "toolu_new" }),
+    );
+    writeFileSync(join(dir, "agent-new.jsonl"), "");
+    const recent = new Date();
+    utimesSync(join(dir, "agent-new.jsonl"), recent, recent);
+    const match = findByDescription(dir, "dup:with_skill");
+    expect(match?.meta.toolUseId).toBe("toolu_new");
+  });
+});

package/skills/evaluating-skills/runner/adapters/claude-code-transcript.ts ADDED Viewed

@@ -0,0 +1,146 @@
+import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
+import { join } from "node:path";
+import type { ToolInvocation } from "../types";
+type ToolUseBlock = {
+  type: "tool_use";
+  id: string;
+  name: string;
+  input: unknown;
+};
+type ToolResultBlock = {
+  type: "tool_result";
+  tool_use_id: string;
+  content: string | unknown[];
+};
+type ContentBlock = ToolUseBlock | ToolResultBlock | { type: string };
+type TranscriptRecord = {
+  type: "user" | "assistant" | string;
+  message?: {
+    role?: string;
+    content?: string | ContentBlock[];
+  };
+};
+function flattenContent(
+  content: string | ContentBlock[] | undefined,
+): ContentBlock[] {
+  if (!content) return [];
+  if (typeof content === "string") return [];
+  return content;
+}
+function stringifyResult(content: ToolResultBlock["content"]): string {
+  if (typeof content === "string") return content;
+  if (Array.isArray(content))
+    return content
+      .map((c) => {
+        if (typeof c === "string") return c;
+        if (c && typeof c === "object" && "text" in c)
+          return String((c as { text: unknown }).text);
+        return JSON.stringify(c);
+      })
+      .join("\n");
+  return JSON.stringify(content);
+}
+export function parseTranscript(jsonlPath: string): ToolInvocation[] {
+  const raw = readFileSync(jsonlPath, "utf8");
+  const lines = raw.split("\n").filter((l) => l.length > 0);
+  const invocations: ToolInvocation[] = [];
+  const indexById = new Map<string, number>();
+  for (const line of lines) {
+    let record: TranscriptRecord;
+    try {
+      record = JSON.parse(line) as TranscriptRecord;
+    } catch {
+      continue;
+    }
+    const blocks = flattenContent(record.message?.content);
+    if (record.type === "assistant") {
+      for (const block of blocks) {
+        if (block.type !== "tool_use") continue;
+        const tu = block as ToolUseBlock;
+        const ordinal = invocations.length;
+        indexById.set(tu.id, ordinal);
+        invocations.push({
+          name: tu.name,
+          args: tu.input,
+          ordinal,
+        });
+      }
+      continue;
+    }
+    if (record.type === "user") {
+      for (const block of blocks) {
+        if (block.type !== "tool_result") continue;
+        const tr = block as ToolResultBlock;
+        const idx = indexById.get(tr.tool_use_id);
+        if (idx === undefined) continue;
+        invocations[idx].result = stringifyResult(tr.content);
+      }
+    }
+  }
+  return invocations;
+}
+export type SubagentMeta = {
+  agentType?: string;
+  description?: string;
+  toolUseId?: string;
+};
+export type SubagentEntry = {
+  jsonlPath: string;
+  metaPath: string;
+  meta: SubagentMeta;
+};
+export function listSubagents(subagentsDir: string): SubagentEntry[] {
+  if (!existsSync(subagentsDir)) return [];
+  const files = readdirSync(subagentsDir);
+  const out: SubagentEntry[] = [];
+  for (const f of files) {
+    if (!f.endsWith(".meta.json")) continue;
+    const base = f.slice(0, -".meta.json".length);
+    const metaPath = join(subagentsDir, f);
+    const jsonlPath = join(subagentsDir, `${base}.jsonl`);
+    if (!existsSync(jsonlPath)) continue;
+    try {
+      const meta = JSON.parse(readFileSync(metaPath, "utf8")) as SubagentMeta;
+      out.push({ jsonlPath, metaPath, meta });
+    } catch {}
+  }
+  return out;
+}
+export function findByDescription(
+  subagentsDir: string,
+  description: string,
+): SubagentEntry | null {
+  const entries = listSubagents(subagentsDir);
+  const matches = entries.filter((e) => e.meta.description === description);
+  if (matches.length === 0) return null;
+  if (matches.length === 1) return matches[0];
+  // Descriptions are namespaced per iteration+run (see run.ts), so duplicates
+  // here mean a retry within the same run. Prefer the most-recently-written
+  // transcript; readdir order is not chronological.
+  matches.sort((a, b) => {
+    try {
+      return statSync(b.jsonlPath).mtimeMs - statSync(a.jsonlPath).mtimeMs;
+    } catch {
+      return 0;
+    }
+  });
+  return matches[0];
+}

package/skills/evaluating-skills/runner/aggregate.test.ts ADDED Viewed

@@ -0,0 +1,188 @@
+import { afterAll, beforeAll, describe, expect, test } from "bun:test";
+import {
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  rmSync,
+  writeFileSync,
+} from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+const FIXTURE_ROOT = join(
+  tmpdir(),
+  `slow-powers-aggregate-test-${process.pid}`,
+);
+const AGGREGATE_TS = join(import.meta.dir, "aggregate.ts");
+beforeAll(() => {
+  mkdirSync(FIXTURE_ROOT, { recursive: true });
+});
+afterAll(() => {
+  rmSync(FIXTURE_ROOT, { recursive: true, force: true });
+});
+function writeJson(path: string, value: unknown) {
+  writeFileSync(path, `${JSON.stringify(value, null, 2)}\n`);
+}
+describe("aggregate.ts user-mode (--skill-dir, isolated CWD)", () => {
+  test("computes benchmark.json from a hand-built graded workspace under CWD", () => {
+    const root = join(FIXTURE_ROOT, "agg-basic");
+    // Skill dir + skill-under-test (detectRunContext validates SKILL.md exists)
+    const skillDir = join(root, "skill-dir");
+    const skillSub = join(skillDir, "mr-review");
+    mkdirSync(skillSub, { recursive: true });
+    writeFileSync(
+      join(skillSub, "SKILL.md"),
+      "---\nname: mr-review\ndescription: review MRs\n---\n\nbody\n",
+    );
+    // Working dir that holds the workspace (mirrors stageRoot/workspaceRoot = CWD)
+    const cwd = join(root, "work");
+    const iterationDir = join(
+      cwd,
+      "skills-workspace",
+      "mr-review",
+      "iteration-1",
+    );
+    mkdirSync(iterationDir, { recursive: true });
+    writeJson(join(iterationDir, "conditions.json"), {
+      mode: "new-skill",
+      conditions: [
+        { name: "with_skill", skill_path: join(skillSub, "SKILL.md") },
+        { name: "without_skill", skill_path: null },
+      ],
+      timestamp: new Date().toISOString(),
+      harness: "claude-code",
+    });
+    const mkCond = (cond: string, passRate: number, tokens: number) => {
+      const condDir = join(iterationDir, "eval-e1", cond);
+      mkdirSync(condDir, { recursive: true });
+      writeJson(join(condDir, "grading.json"), {
+        assertion_results: [],
+        summary: { passed: 1, failed: 0, total: 1, pass_rate: passRate },
+      });
+      writeJson(join(condDir, "timing.json"), {
+        total_tokens: tokens,
+        duration_ms: 1000,
+      });
+    };
+    mkCond("with_skill", 1, 5000);
+    mkCond("without_skill", 0, 3000);
+    const res = Bun.spawnSync(
+      [
+        "bun",
+        "run",
+        AGGREGATE_TS,
+        "--skill-dir",
+        skillDir,
+        "--skill",
+        "mr-review",
+        "--iteration",
+        "1",
+      ],
+      { cwd, stdout: "pipe", stderr: "pipe" },
+    );
+    expect(res.exitCode).toBe(0);
+    const benchmarkPath = join(iterationDir, "benchmark.json");
+    expect(existsSync(benchmarkPath)).toBe(true);
+    const benchmark = JSON.parse(readFileSync(benchmarkPath, "utf8")) as {
+      delta: { pass_rate: number; total_tokens: number };
+      run_summary: Record<string, { pass_rate: { mean: number } }>;
+    };
+    expect(benchmark.run_summary.with_skill.pass_rate.mean).toBe(1);
+    expect(benchmark.run_summary.without_skill.pass_rate.mean).toBe(0);
+    expect(benchmark.delta.pass_rate).toBe(1);
+    expect(benchmark.delta.total_tokens).toBe(2000);
+  });
+  test("surfaces stray-writes violations as validity_warnings", () => {
+    const root = join(FIXTURE_ROOT, "agg-stray");
+    const skillDir = join(root, "skill-dir");
+    const skillSub = join(skillDir, "mr-review");
+    mkdirSync(skillSub, { recursive: true });
+    writeFileSync(
+      join(skillSub, "SKILL.md"),
+      "---\nname: mr-review\ndescription: review MRs\n---\n\nbody\n",
+    );
+    const cwd = join(root, "work");
+    const iterationDir = join(
+      cwd,
+      "skills-workspace",
+      "mr-review",
+      "iteration-1",
+    );
+    mkdirSync(iterationDir, { recursive: true });
+    writeJson(join(iterationDir, "conditions.json"), {
+      mode: "new-skill",
+      conditions: [
+        { name: "with_skill", skill_path: join(skillSub, "SKILL.md") },
+        { name: "without_skill", skill_path: null },
+      ],
+      timestamp: new Date().toISOString(),
+      harness: "claude-code",
+    });
+    for (const cond of ["with_skill", "without_skill"]) {
+      const condDir = join(iterationDir, "eval-e1", cond);
+      mkdirSync(condDir, { recursive: true });
+      writeJson(join(condDir, "grading.json"), {
+        assertion_results: [],
+        summary: { passed: 1, failed: 0, total: 1, pass_rate: 1 },
+      });
+      writeJson(join(condDir, "timing.json"), {
+        total_tokens: 100,
+        duration_ms: 1,
+      });
+    }
+    writeJson(join(iterationDir, "stray-writes.json"), {
+      generated: new Date().toISOString(),
+      iteration: 1,
+      totals: { violations: 1, warnings: 0 },
+      runs: [
+        {
+          eval_id: "e1",
+          condition: "with_skill",
+          violations: [
+            {
+              tool: "Write",
+              path: "/repo/runner/run.ts",
+              ordinal: 3,
+              reason: "x",
+            },
+          ],
+          warnings: [],
+        },
+      ],
+    });
+    const res = Bun.spawnSync(
+      [
+        "bun",
+        "run",
+        AGGREGATE_TS,
+        "--skill-dir",
+        skillDir,
+        "--skill",
+        "mr-review",
+        "--iteration",
+        "1",
+      ],
+      { cwd, stdout: "pipe", stderr: "pipe" },
+    );
+    expect(res.exitCode).toBe(0);
+    const benchmark = JSON.parse(
+      readFileSync(join(iterationDir, "benchmark.json"), "utf8"),
+    ) as { validity_warnings: string[] };
+    expect(
+      benchmark.validity_warnings.some(
+        (w) => w.includes("e1/with_skill") && w.includes("outside"),
+      ),
+    ).toBe(true);
+  });
+});