npm - @slowdini/slow-powers-opencode - Versions diffs - 0.3.0 → 0.4.0 - Mend

@slowdini/slow-powers-opencode 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

package/skills/evaluating-skills/runner/sandbox-policy.ts DELETED Viewed

@@ -1,94 +0,0 @@
-import { isAbsolute, resolve, sep } from "node:path";
-/** Tools that mutate the filesystem and carry a target path argument. */
-export const WRITE_TOOLS = new Set([
-  "Write",
-  "Edit",
-  "MultiEdit",
-  "NotebookEdit",
-]);
-/**
- * Bash command patterns that mutate state outside an eval's sandbox. Heuristics
- * — Bash is too flexible to parse exactly. `detect-stray-writes` surfaces these
- * as warnings; the opt-in guard denies them. Each is meaningful only when the
- * command does not reference an allowed root (see `classifyBash`).
- */
-export const BASH_MUTATION_PATTERNS: Array<{ re: RegExp; reason: string }> = [
-  {
-    re: /\b(npm|pnpm|yarn|bun)\s+(install|add|ci|i)\b/,
-    reason: "package install/add",
-  },
-  { re: /\bpip3?\s+install\b/, reason: "pip install" },
-  { re: /\bsed\s+-i\b/, reason: "in-place file edit (sed -i)" },
-  {
-    re: /\bgit\s+(commit|add|push|checkout|reset|restore|merge|rebase)\b/,
-    reason: "git mutation",
-  },
-  {
-    re: /\bgit\s+worktree\s+add\b/,
-    reason: "git worktree add (working tree outside the sandbox)",
-  },
-  // A create/copy/move/link verb whose operand is a path under `.claude` —
-  // catches stray writes to the harness config dir that aren't a `>` redirect
-  // (those are caught below). Read-only verbs (`cat`, `ls`) aren't listed, so
-  // inspecting `.claude` stays allowed.
-  {
-    re: /\b(cp|mv|mkdir|touch|ln|rsync|install)\b[^|;&\n]*\.claude(\/|\b)/,
-    reason: "path under .claude",
-  },
-  // The same create verbs whose operand is a top-level `skills/` directory —
-  // catches a bare `skills/` left in the cwd. `skills-workspace` and other
-  // `skills`-prefixed names are excluded by the trailing `/`, whitespace, or
-  // end-of-string boundary.
-  {
-    re: /\b(cp|mv|mkdir|touch|ln|rsync)\b[^|;&\n]*[\s'"=/]\.{0,2}\/?skills(\/|\s|$)/,
-    reason: "creates a bare skills/ dir",
-  },
-  { re: /(^|\s)(>>?|tee)\s/, reason: "output redirection to a file" },
-];
-/** Pull the target path from a write tool's arguments. */
-export function pathArg(args: unknown): string | undefined {
-  if (!args || typeof args !== "object") return undefined;
-  const a = args as Record<string, unknown>;
-  const p = a.file_path ?? a.notebook_path ?? a.path;
-  return typeof p === "string" ? p : undefined;
-}
-/** True when `target` resolves to `dir` or a descendant of it. */
-export function isUnder(
-  target: string,
-  dir: string,
-  repoRoot: string,
-): boolean {
-  const base = resolve(dir);
-  const abs = isAbsolute(target) ? resolve(target) : resolve(repoRoot, target);
-  return abs === base || abs.startsWith(base + sep);
-}
-/** True when `target` is under any of `dirs`. */
-export function isUnderAny(
-  target: string,
-  dirs: string[],
-  repoRoot: string,
-): boolean {
-  return dirs.some((d) => isUnder(target, d, repoRoot));
-}
-/**
- * If a Bash command matches a mutation pattern and is not scoped to one of
- * `allowedRoots`, return the human reason; otherwise null. A command is treated
- * as scoped when it textually references an allowed root.
- */
-export function classifyBash(
-  command: string,
-  allowedRoots: string[],
-): string | null {
-  if (!command) return null;
-  if (allowedRoots.some((r) => command.includes(r))) return null;
-  for (const { re, reason } of BASH_MUTATION_PATTERNS) {
-    if (re.test(command)) return reason;
-  }
-  return null;
-}

package/skills/evaluating-skills/runner/types.ts DELETED Viewed

@@ -1,121 +0,0 @@
-export type AssertionTranscriptCheck = {
-  id: string;
-  type: "transcript_check";
-  check: string;
-  pattern?: string;
-  must_precede?: "completion_claim" | "any";
-};
-export type AssertionLLMJudge = {
-  id: string;
-  type: "llm_judge";
-  rubric: string;
-  model?: string;
-};
-export type Assertion = AssertionTranscriptCheck | AssertionLLMJudge;
-export type Eval = {
-  id: string;
-  prompt: string;
-  expected_output: string;
-  files?: string[];
-  assertions?: Assertion[];
-  /**
-   * Whether the skill-under-test is expected to fire on this eval. Defaults to
-   * true. Set to false for negative evals where correct behavior is NOT
-   * invoking the skill (e.g. an over-trigger guard). Negative evals are
-   * excluded from the skill-invocation rate and its validity warning.
-   */
-  skill_should_trigger?: boolean;
-};
-export type EvalsConfig = {
-  skill_name: string;
-  evals: Eval[];
-};
-/** A skill staged and discoverable for an eval — its natural name, on-disk
- * SKILL.md path, and frontmatter description. */
-export type AvailableSkill = {
-  name: string;
-  path: string;
-  description: string;
-};
-export type ConditionEntry = {
-  name: string;
-  skill_path: string | null;
-  staged_skill_slug?: string | null;
-};
-export type ConditionsRecord = {
-  mode: "new-skill" | "revision";
-  baseline?: string;
-  conditions: ConditionEntry[];
-  timestamp: string;
-  harness?: string;
-  /** Per-run nonce; namespaces dispatch descriptions so transcripts can't
-   * collide across iterations sharing one parent session's subagents dir. */
-  run_nonce?: string;
-};
-export type ToolInvocation = {
-  name: string;
-  args?: unknown;
-  result?: unknown;
-  ordinal: number;
-};
-export type RunRecord = {
-  eval_id: string;
-  condition: string;
-  skill_path: string | null;
-  prompt: string;
-  files: string[];
-  final_message: string;
-  tool_invocations: ToolInvocation[];
-  total_tokens: number | null;
-  duration_ms: number | null;
-};
-export type AssertionResult = {
-  id: string;
-  passed: boolean;
-  evidence: string;
-  confidence?: number;
-  grader?: "transcript_check" | "llm_judge";
-};
-export type GradingResult = {
-  assertion_results: AssertionResult[];
-  meta_results?: AssertionResult[];
-  summary: {
-    passed: number;
-    failed: number;
-    total: number;
-    pass_rate: number;
-  };
-  meta_summary?: {
-    passed: number;
-    failed: number;
-    total: number;
-    skill_invoked: boolean | null;
-  };
-};
-export const SKILL_INVOKED_META_ID = "__skill_invoked";
-export type TimingRecord = {
-  total_tokens?: number | null;
-  duration_ms?: number | null;
-  /**
-   * Where the numbers came from. "completion-event" = captured by the
-   * dispatching agent from the harness's task completion event;
-   * "transcript" = derived by record-runs from the persisted transcript
-   * (includes cache accounting — a different metric, not comparable 1:1).
-   * Absent on records written before provenance was tracked
-   * (completion-event in practice).
-   */
-  source?: "completion-event" | "transcript";
-};

package/skills/evaluating-skills/runner/validate-all.ts DELETED Viewed

@@ -1,54 +0,0 @@
-#!/usr/bin/env bun
-import { existsSync, readdirSync, readFileSync, statSync } from "node:fs";
-import { join, resolve } from "node:path";
-import { validateEvalsConfig } from "./validate";
-function flag(argv: string[], name: string): string | undefined {
-  const i = argv.indexOf(`--${name}`);
-  if (i === -1) return undefined;
-  return argv[i + 1];
-}
-const skillDirRaw = flag(Bun.argv.slice(2), "skill-dir");
-if (!skillDirRaw) {
-  console.error("missing required flag --skill-dir <path>");
-  process.exit(1);
-}
-const SKILLS_DIR = resolve(skillDirRaw);
-if (!existsSync(SKILLS_DIR)) {
-  console.error(`skills dir not found: ${SKILLS_DIR}`);
-  process.exit(1);
-}
-const skills = readdirSync(SKILLS_DIR).filter((d) => {
-  const path = join(SKILLS_DIR, d);
-  return statSync(path).isDirectory();
-});
-let validated = 0;
-let failed = 0;
-const errors: string[] = [];
-for (const skill of skills) {
-  const evalsPath = join(SKILLS_DIR, skill, "evals", "evals.json");
-  if (!existsSync(evalsPath)) continue;
-  try {
-    const raw = JSON.parse(readFileSync(evalsPath, "utf8"));
-    validateEvalsConfig(raw, evalsPath);
-    console.log(`✓ ${skill}/evals/evals.json`);
-    validated++;
-  } catch (err) {
-    console.error(`✗ ${skill}/evals/evals.json: ${(err as Error).message}`);
-    errors.push(`${skill}: ${(err as Error).message}`);
-    failed++;
-  }
-}
-console.log(`\nValidated ${validated} evals.json file(s); ${failed} failed.`);
-if (failed > 0) {
-  console.error("\nFailures:");
-  for (const e of errors) console.error(`  - ${e}`);
-  process.exit(1);
-}

package/skills/evaluating-skills/runner/validate-schema.test.ts DELETED Viewed

@@ -1,99 +0,0 @@
-import { describe, expect, test } from "bun:test";
-import { validateAgainstSchema } from "./validate-schema";
-const validRunRecord = {
-  eval_id: "e1",
-  condition: "with_skill",
-  skill_path: null,
-  prompt: "do the thing",
-  files: [],
-  final_message: "done",
-  tool_invocations: [],
-  total_tokens: 100,
-  duration_ms: 1000,
-};
-describe("validateAgainstSchema", () => {
-  test("returns the data when it matches the run-record schema", () => {
-    const result = validateAgainstSchema(
-      "run-record",
-      validRunRecord,
-      "run.json",
-    );
-    expect(result).toEqual(validRunRecord);
-  });
-  test("accepts an empty tool_invocations array (written pre-fill)", () => {
-    expect(() =>
-      validateAgainstSchema(
-        "run-record",
-        { ...validRunRecord, tool_invocations: [] },
-        "run.json",
-      ),
-    ).not.toThrow();
-  });
-  test("accepts skill_path: null on the without_skill arm", () => {
-    expect(() =>
-      validateAgainstSchema(
-        "run-record",
-        { ...validRunRecord, skill_path: null },
-        "run.json",
-      ),
-    ).not.toThrow();
-  });
-  test("throws a source-prefixed error when a required field is missing", () => {
-    const { eval_id, ...missing } = validRunRecord;
-    expect(() =>
-      validateAgainstSchema("run-record", missing, "/tmp/run.json"),
-    ).toThrow(/\/tmp\/run\.json/);
-  });
-  test("requires skill_path and files (type is the contract)", () => {
-    const { skill_path, ...noSkillPath } = validRunRecord;
-    expect(() =>
-      validateAgainstSchema("run-record", noSkillPath, "run.json"),
-    ).toThrow(/skill_path/);
-    const { files, ...noFiles } = validRunRecord;
-    expect(() =>
-      validateAgainstSchema("run-record", noFiles, "run.json"),
-    ).toThrow(/files/);
-  });
-  test("rejects a run record with an unknown extra property", () => {
-    expect(() =>
-      validateAgainstSchema(
-        "run-record",
-        { ...validRunRecord, surprise: true },
-        "run.json",
-      ),
-    ).toThrow();
-  });
-  test("validates a tool_invocation's ordinal must be an integer", () => {
-    expect(() =>
-      validateAgainstSchema(
-        "run-record",
-        {
-          ...validRunRecord,
-          tool_invocations: [{ name: "Bash", ordinal: "zero" }],
-        },
-        "run.json",
-      ),
-    ).toThrow();
-  });
-  test("compiles and validates the grading schema too", () => {
-    const validGrading = {
-      assertion_results: [
-        { id: "a1", passed: true, evidence: "quote", grader: "llm_judge" },
-      ],
-      summary: { passed: 1, failed: 0, total: 1, pass_rate: 1 },
-    };
-    expect(() =>
-      validateAgainstSchema("grading", validGrading, "grading.json"),
-    ).not.toThrow();
-  });
-});

package/skills/evaluating-skills/runner/validate-schema.ts DELETED Viewed

@@ -1,51 +0,0 @@
-import { readFileSync } from "node:fs";
-import { join } from "node:path";
-import { Ajv, type ValidateFunction } from "ajv";
-/**
- * The four portable artifact schemas live in `../schema/<name>.schema.json` and
- * are the single source of truth for each artifact's shape. This helper compiles
- * them with ajv and enforces them at runtime, so the schema files are an enforced
- * contract rather than documentation a hand-rolled validator can drift from.
- */
-export type SchemaName = "run-record" | "evals" | "grading" | "stray-writes";
-const SCHEMA_DIR = join(import.meta.dir, "..", "schema");
-// strict: false — the schemas are plain draft-07; we don't want ajv's strict
-// metaschema checks to reject otherwise-valid schemas over stylistic keywords.
-const ajv = new Ajv({ allErrors: true, strict: false });
-const validators = new Map<SchemaName, ValidateFunction>();
-function getValidator(name: SchemaName): ValidateFunction {
-  let validate = validators.get(name);
-  if (!validate) {
-    const schema = JSON.parse(
-      readFileSync(join(SCHEMA_DIR, `${name}.schema.json`), "utf8"),
-    );
-    validate = ajv.compile(schema);
-    validators.set(name, validate);
-  }
-  return validate;
-}
-/**
- * Validate `data` against the named schema. Returns the data typed as `T` on
- * success; throws a `source`-prefixed Error listing every failure on mismatch.
- */
-export function validateAgainstSchema<T>(
-  name: SchemaName,
-  data: unknown,
-  source: string,
-): T {
-  const validate = getValidator(name);
-  if (!validate(data)) {
-    const details = (validate.errors ?? [])
-      .map((e) => `  ${e.instancePath || "/"} ${e.message}`)
-      .join("\n");
-    throw new Error(
-      `${source}: does not match the ${name} schema:\n${details}`,
-    );
-  }
-  return data as T;
-}

package/skills/evaluating-skills/runner/validate.test.ts DELETED Viewed

@@ -1,56 +0,0 @@
-import { describe, expect, test } from "bun:test";
-import { validateEvalsConfig } from "./validate";
-const base = {
-  skill_name: "demo",
-  evals: [
-    {
-      id: "e1",
-      prompt: "do the thing",
-      expected_output: "the thing is done",
-    },
-  ],
-};
-describe("validateEvalsConfig skill_should_trigger", () => {
-  test("accepts a boolean skill_should_trigger", () => {
-    const cfg = {
-      ...base,
-      evals: [{ ...base.evals[0], skill_should_trigger: false }],
-    };
-    expect(() => validateEvalsConfig(cfg, "test")).not.toThrow();
-  });
-  test("accepts evals with no skill_should_trigger (defaults to true)", () => {
-    expect(() => validateEvalsConfig(base, "test")).not.toThrow();
-  });
-  test("rejects a non-boolean skill_should_trigger", () => {
-    const cfg = {
-      ...base,
-      evals: [{ ...base.evals[0], skill_should_trigger: "false" }],
-    };
-    expect(() => validateEvalsConfig(cfg, "test")).toThrow(
-      /skill_should_trigger/,
-    );
-  });
-});
-describe("validateEvalsConfig structural + duplicate-id", () => {
-  test("rejects a non-kebab-case id", () => {
-    const cfg = { ...base, evals: [{ ...base.evals[0], id: "Not Kebab" }] };
-    expect(() => validateEvalsConfig(cfg, "test")).toThrow();
-  });
-  test("rejects duplicate eval ids (not expressible in JSON Schema)", () => {
-    const cfg = {
-      ...base,
-      evals: [base.evals[0], { ...base.evals[0] }],
-    };
-    expect(() => validateEvalsConfig(cfg, "test")).toThrow(/duplicate/);
-  });
-  test("rejects an empty evals array", () => {
-    expect(() => validateEvalsConfig({ ...base, evals: [] }, "test")).toThrow();
-  });
-});

package/skills/evaluating-skills/runner/validate.ts DELETED Viewed

@@ -1,21 +0,0 @@
-import type { EvalsConfig } from "./types";
-import { validateAgainstSchema } from "./validate-schema";
-export function validateEvalsConfig(
-  config: unknown,
-  source: string,
-): EvalsConfig {
-  // Structural validation against the single source of truth.
-  const validated = validateAgainstSchema<EvalsConfig>("evals", config, source);
-  // Supplemental check: JSON Schema (draft-07) can't enforce uniqueness by a
-  // sub-field, so the duplicate-id guard stays hand-rolled.
-  const seenIds = new Set<string>();
-  for (const [i, ev] of validated.evals.entries()) {
-    if (seenIds.has(ev.id))
-      throw new Error(`${source}: evals[${i}].id duplicate: ${ev.id}`);
-    seenIds.add(ev.id);
-  }
-  return validated;
-}