libretto 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +109 -35
  2. package/dist/cli/cli.js +22 -97
  3. package/dist/cli/commands/browser.js +86 -59
  4. package/dist/cli/commands/execution.js +199 -86
  5. package/dist/cli/commands/init.js +34 -29
  6. package/dist/cli/commands/logs.js +4 -5
  7. package/dist/cli/commands/shared.js +30 -29
  8. package/dist/cli/commands/snapshot.js +26 -39
  9. package/dist/cli/core/ai-config.js +21 -4
  10. package/dist/cli/core/api-snapshot-analyzer.js +15 -5
  11. package/dist/cli/core/browser.js +207 -37
  12. package/dist/cli/core/context.js +4 -1
  13. package/dist/cli/core/session-telemetry.js +434 -174
  14. package/dist/cli/core/session.js +21 -8
  15. package/dist/cli/core/snapshot-analyzer.js +14 -31
  16. package/dist/cli/core/snapshot-api-config.js +2 -6
  17. package/dist/cli/core/telemetry.js +20 -4
  18. package/dist/cli/framework/simple-cli.js +45 -25
  19. package/dist/cli/router.js +14 -21
  20. package/dist/cli/workers/run-integration-runtime.js +24 -5
  21. package/dist/cli/workers/run-integration-worker-protocol.js +3 -1
  22. package/dist/cli/workers/run-integration-worker.js +1 -4
  23. package/dist/index.d.ts +1 -2
  24. package/dist/index.js +7 -10
  25. package/dist/runtime/download/download.js +5 -1
  26. package/dist/runtime/extract/extract.js +11 -2
  27. package/dist/runtime/network/network.js +8 -1
  28. package/dist/runtime/recovery/agent.js +6 -2
  29. package/dist/runtime/recovery/errors.js +3 -1
  30. package/dist/runtime/recovery/recovery.js +3 -1
  31. package/dist/shared/condense-dom/condense-dom.js +17 -69
  32. package/dist/shared/config/config.d.ts +1 -9
  33. package/dist/shared/config/config.js +0 -18
  34. package/dist/shared/config/index.d.ts +2 -1
  35. package/dist/shared/config/index.js +0 -10
  36. package/dist/shared/debug/pause.js +9 -3
  37. package/dist/shared/dom-semantics.d.ts +8 -0
  38. package/dist/shared/dom-semantics.js +69 -0
  39. package/dist/shared/instrumentation/instrument.js +101 -5
  40. package/dist/shared/llm/ai-sdk-adapter.js +3 -1
  41. package/dist/shared/llm/client.js +3 -1
  42. package/dist/shared/logger/index.js +4 -1
  43. package/dist/shared/run/api.js +3 -1
  44. package/dist/shared/run/browser.js +47 -3
  45. package/dist/shared/state/session-state.d.ts +2 -1
  46. package/dist/shared/state/session-state.js +5 -2
  47. package/dist/shared/visualization/ghost-cursor.js +36 -14
  48. package/dist/shared/visualization/highlight.js +9 -6
  49. package/dist/shared/workflow/workflow.d.ts +4 -5
  50. package/dist/shared/workflow/workflow.js +3 -5
  51. package/package.json +6 -2
  52. package/scripts/check-skills-sync.mjs +25 -0
  53. package/scripts/compare-eval-summary.mjs +47 -0
  54. package/scripts/postinstall.mjs +15 -15
  55. package/scripts/prepare-release.sh +97 -0
  56. package/scripts/skills-libretto.mjs +103 -0
  57. package/scripts/summarize-evals.mjs +135 -0
  58. package/scripts/sync-skills.mjs +12 -0
  59. package/skills/libretto/SKILL.md +132 -54
  60. package/skills/libretto/references/action-logs.md +101 -0
  61. package/skills/libretto/references/auth-profiles.md +1 -2
  62. package/skills/libretto/references/code-generation-rules.md +210 -0
  63. package/skills/libretto/references/configuration-file-reference.md +53 -0
  64. package/skills/libretto/references/pages-and-page-targeting.md +1 -1
  65. package/skills/libretto/references/site-security-review.md +143 -0
  66. package/src/cli/cli.ts +23 -110
  67. package/src/cli/commands/browser.ts +94 -70
  68. package/src/cli/commands/execution.ts +233 -102
  69. package/src/cli/commands/init.ts +37 -33
  70. package/src/cli/commands/logs.ts +7 -7
  71. package/src/cli/commands/shared.ts +36 -37
  72. package/src/cli/commands/snapshot.ts +44 -59
  73. package/src/cli/core/ai-config.ts +24 -4
  74. package/src/cli/core/api-snapshot-analyzer.ts +17 -6
  75. package/src/cli/core/browser.ts +260 -49
  76. package/src/cli/core/context.ts +7 -2
  77. package/src/cli/core/session-telemetry.ts +449 -197
  78. package/src/cli/core/session.ts +21 -7
  79. package/src/cli/core/snapshot-analyzer.ts +26 -46
  80. package/src/cli/core/snapshot-api-config.ts +170 -175
  81. package/src/cli/core/telemetry.ts +39 -4
  82. package/src/cli/framework/simple-cli.ts +144 -77
  83. package/src/cli/router.ts +13 -21
  84. package/src/cli/workers/run-integration-runtime.ts +36 -9
  85. package/src/cli/workers/run-integration-worker-protocol.ts +2 -0
  86. package/src/cli/workers/run-integration-worker.ts +1 -4
  87. package/src/index.ts +73 -66
  88. package/src/runtime/download/download.ts +62 -58
  89. package/src/runtime/download/index.ts +5 -5
  90. package/src/runtime/extract/extract.ts +71 -61
  91. package/src/runtime/network/index.ts +3 -3
  92. package/src/runtime/network/network.ts +99 -93
  93. package/src/runtime/recovery/agent.ts +217 -212
  94. package/src/runtime/recovery/errors.ts +107 -104
  95. package/src/runtime/recovery/index.ts +3 -3
  96. package/src/runtime/recovery/recovery.ts +38 -35
  97. package/src/shared/condense-dom/condense-dom.ts +27 -82
  98. package/src/shared/config/config.ts +0 -19
  99. package/src/shared/config/index.ts +0 -5
  100. package/src/shared/debug/pause.ts +57 -51
  101. package/src/shared/dom-semantics.ts +68 -0
  102. package/src/shared/instrumentation/errors.ts +64 -62
  103. package/src/shared/instrumentation/index.ts +5 -5
  104. package/src/shared/instrumentation/instrument.ts +339 -209
  105. package/src/shared/llm/ai-sdk-adapter.ts +58 -55
  106. package/src/shared/llm/client.ts +181 -174
  107. package/src/shared/llm/types.ts +39 -39
  108. package/src/shared/logger/index.ts +11 -4
  109. package/src/shared/logger/logger.ts +312 -306
  110. package/src/shared/logger/sinks.ts +118 -114
  111. package/src/shared/paths/paths.ts +50 -49
  112. package/src/shared/paths/repo-root.ts +17 -17
  113. package/src/shared/run/api.ts +5 -1
  114. package/src/shared/run/browser.ts +65 -3
  115. package/src/shared/state/index.ts +9 -9
  116. package/src/shared/state/session-state.ts +46 -43
  117. package/src/shared/visualization/ghost-cursor.ts +180 -149
  118. package/src/shared/visualization/highlight.ts +89 -86
  119. package/src/shared/visualization/index.ts +13 -13
  120. package/src/shared/workflow/workflow.ts +19 -25
  121. package/skills/libretto/references/reverse-engineering-network-requests.md +0 -39
  122. package/skills/libretto/references/user-action-log.md +0 -31
@@ -92,12 +92,15 @@ async function showHighlight(page, params) {
92
92
  }
93
93
  async function clearHighlights(page) {
94
94
  try {
95
- await page.evaluate(({ layerId }) => {
96
- const layer = document.getElementById(layerId);
97
- if (!layer) return;
98
- const rects = layer.querySelectorAll(".__libretto_highlight_rect__");
99
- rects.forEach((r) => r.remove());
100
- }, { layerId: LAYER_ID });
95
+ await page.evaluate(
96
+ ({ layerId }) => {
97
+ const layer = document.getElementById(layerId);
98
+ if (!layer) return;
99
+ const rects = layer.querySelectorAll(".__libretto_highlight_rect__");
100
+ rects.forEach((r) => r.remove());
101
+ },
102
+ { layerId: LAYER_ID }
103
+ );
101
104
  } catch {
102
105
  }
103
106
  }
@@ -2,8 +2,8 @@ import { Page } from 'playwright';
2
2
  import { MinimalLogger } from '../logger/logger.js';
3
3
 
4
4
  declare const LIBRETTO_WORKFLOW_BRAND: unique symbol;
5
- type LibrettoWorkflowMetadata = {};
6
5
  type LibrettoWorkflowContext<S = {}> = {
6
+ session: string;
7
7
  page: Page;
8
8
  logger: MinimalLogger;
9
9
  services: S;
@@ -11,11 +11,10 @@ type LibrettoWorkflowContext<S = {}> = {
11
11
  type LibrettoWorkflowHandler<Input = unknown, Output = unknown, S = {}> = (ctx: LibrettoWorkflowContext<S>, input: Input) => Promise<Output>;
12
12
  declare class LibrettoWorkflow<Input = unknown, Output = unknown, S = {}> {
13
13
  readonly [LIBRETTO_WORKFLOW_BRAND] = true;
14
- readonly metadata: LibrettoWorkflowMetadata;
15
14
  private readonly handler;
16
- constructor(metadata: LibrettoWorkflowMetadata, handler: LibrettoWorkflowHandler<Input, Output, S>);
15
+ constructor(handler: LibrettoWorkflowHandler<Input, Output, S>);
17
16
  run(ctx: LibrettoWorkflowContext<S>, input: Input): Promise<Output>;
18
17
  }
19
- declare function workflow<Input = unknown, Output = unknown, S = {}>(metadata: LibrettoWorkflowMetadata, handler: LibrettoWorkflowHandler<Input, Output, S>): LibrettoWorkflow<Input, Output, S>;
18
+ declare function workflow<Input = unknown, Output = unknown, S = {}>(handler: LibrettoWorkflowHandler<Input, Output, S>): LibrettoWorkflow<Input, Output, S>;
20
19
 
21
- export { LIBRETTO_WORKFLOW_BRAND, LibrettoWorkflow, type LibrettoWorkflowContext, type LibrettoWorkflowHandler, type LibrettoWorkflowMetadata, workflow };
20
+ export { LIBRETTO_WORKFLOW_BRAND, LibrettoWorkflow, type LibrettoWorkflowContext, type LibrettoWorkflowHandler, workflow };
@@ -1,18 +1,16 @@
1
1
  const LIBRETTO_WORKFLOW_BRAND = /* @__PURE__ */ Symbol.for("libretto.workflow");
2
2
  class LibrettoWorkflow {
3
3
  [LIBRETTO_WORKFLOW_BRAND] = true;
4
- metadata;
5
4
  handler;
6
- constructor(metadata, handler) {
7
- this.metadata = metadata;
5
+ constructor(handler) {
8
6
  this.handler = handler;
9
7
  }
10
8
  async run(ctx, input) {
11
9
  return this.handler(ctx, input);
12
10
  }
13
11
  }
14
- function workflow(metadata, handler) {
15
- return new LibrettoWorkflow(metadata, handler);
12
+ function workflow(handler) {
13
+ return new LibrettoWorkflow(handler);
16
14
  }
17
15
  export {
18
16
  LIBRETTO_WORKFLOW_BRAND,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "libretto",
3
- "version": "0.5.0",
3
+ "version": "0.5.2",
4
4
  "description": "AI-powered browser automation library and CLI built on Playwright",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -31,6 +31,8 @@
31
31
  },
32
32
  "scripts": {
33
33
  "postinstall": "node scripts/postinstall.mjs",
34
+ "sync-skills": "node scripts/sync-skills.mjs",
35
+ "check:skills": "node scripts/check-skills-sync.mjs",
34
36
  "build": "tsup --config tsup.config.ts",
35
37
  "type-check": "tsc --noEmit",
36
38
  "test": "pnpm run build && vitest run",
@@ -38,6 +40,7 @@
38
40
  "benchmark": "pnpm run build && tsx benchmarks/run.ts",
39
41
  "test:watch": "vitest",
40
42
  "cli": "node dist/index.js",
43
+ "prepare-release": "bash ./scripts/prepare-release.sh",
41
44
  "prepack": "pnpm run build"
42
45
  },
43
46
  "peerDependencies": {
@@ -61,12 +64,13 @@
61
64
  }
62
65
  },
63
66
  "devDependencies": {
64
- "@anthropic-ai/claude-agent-sdk": "^0.2.75",
65
67
  "@ai-sdk/anthropic": "^3.0.58",
66
68
  "@ai-sdk/google": "^3.0.51",
67
69
  "@ai-sdk/google-vertex": "^4.0.80",
68
70
  "@ai-sdk/openai": "^3.0.41",
71
+ "@anthropic-ai/claude-agent-sdk": "^0.2.75",
69
72
  "@types/node": "^25.5.0",
73
+ "glimpseui": "^0.5.1",
70
74
  "openai": "^6.29.0",
71
75
  "tsup": "^8.5.1",
72
76
  "typescript": "^5.9.3",
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { dirname, join } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+
6
+ import { compareSkillDirs, SKILL_DIRS } from "./skills-libretto.mjs";
7
+
8
+ const __dirname = dirname(fileURLToPath(import.meta.url));
9
+ const repoRoot = join(__dirname, "..");
10
+ const result = compareSkillDirs(repoRoot);
11
+
12
+ if (result.ok) {
13
+ console.log(
14
+ `libretto: verified identical skill mirrors across ${SKILL_DIRS.join(", ")}`,
15
+ );
16
+ process.exit(0);
17
+ }
18
+
19
+ console.error("libretto: skill directories must be identical:");
20
+ for (const issue of result.issues) {
21
+ console.error(`- ${issue}`);
22
+ }
23
+ console.error("");
24
+ console.error("Run `pnpm i` to resync the mirrors in this repository.");
25
+ process.exit(1);
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync } from "node:fs";
4
+ import { resolve } from "node:path";
5
+
6
+ function usage() {
7
+ console.error(
8
+ "Usage: node scripts/compare-eval-summary.mjs <baseline-summary.json> <current-summary.json> [threshold-percent]",
9
+ );
10
+ }
11
+
12
+ const [, , baselineArg, currentArg, thresholdArg] = process.argv;
13
+
14
+ if (!baselineArg || !currentArg) {
15
+ usage();
16
+ process.exit(1);
17
+ }
18
+
19
+ const baseline = JSON.parse(readFileSync(resolve(baselineArg), "utf8"));
20
+ const current = JSON.parse(readFileSync(resolve(currentArg), "utf8"));
21
+ const threshold = thresholdArg ? Number(thresholdArg) : 5;
22
+
23
+ if (!Number.isFinite(threshold) || threshold < 0) {
24
+ console.error(`Invalid threshold percent: ${thresholdArg}`);
25
+ process.exit(1);
26
+ }
27
+
28
+ const delta = Number((current.percent - baseline.percent).toFixed(2));
29
+ const withinThreshold = Math.abs(delta) <= threshold;
30
+
31
+ const lines = [
32
+ "# Eval Baseline Comparison",
33
+ "",
34
+ `- Baseline score: \`${baseline.percent}%\``,
35
+ `- Current score: \`${current.percent}%\``,
36
+ `- Delta: \`${delta > 0 ? "+" : ""}${delta}%\``,
37
+ `- Allowed range: \`+/-${threshold}%\``,
38
+ ];
39
+
40
+ process.stdout.write(`${lines.join("\n")}\n`);
41
+
42
+ if (!withinThreshold) {
43
+ console.error(
44
+ `Eval score delta ${delta > 0 ? "+" : ""}${delta}% is outside the allowed +/-${threshold}% range.`,
45
+ );
46
+ process.exit(1);
47
+ }
@@ -1,10 +1,12 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- import { cpSync, existsSync, mkdirSync, readdirSync, rmSync } from "node:fs";
3
+ import { existsSync } from "node:fs";
4
4
  import { dirname, join } from "node:path";
5
5
  import { spawnSync } from "node:child_process";
6
6
  import { fileURLToPath } from "node:url";
7
7
 
8
+ import { SKILL_DIRS, syncSkillDir } from "./skills-libretto.mjs";
9
+
8
10
  const __dirname = dirname(fileURLToPath(import.meta.url));
9
11
  const packageRoot = join(__dirname, "..");
10
12
 
@@ -29,22 +31,20 @@ const gitResult = spawnSync("git", ["rev-parse", "--show-toplevel"], {
29
31
  encoding: "utf-8",
30
32
  stdio: ["pipe", "pipe", "pipe"],
31
33
  });
32
- const repoRoot = gitResult.status === 0 && gitResult.stdout
33
- ? gitResult.stdout.trim()
34
- : installCwd;
34
+ const repoRoot =
35
+ gitResult.status === 0 && gitResult.stdout
36
+ ? gitResult.stdout.trim()
37
+ : installCwd;
35
38
 
36
- // Sync skills to any agent dirs at repo root
37
39
  const sourceDir = join(packageRoot, "skills", "libretto");
38
40
  if (!existsSync(sourceDir)) process.exit(0);
39
41
 
40
- const agentDirNames = [".agents", ".claude"];
41
- for (const name of agentDirNames) {
42
- const agentDir = join(repoRoot, name);
43
- if (!existsSync(agentDir)) continue;
44
- const dest = join(agentDir, "skills", "libretto");
45
- if (existsSync(dest)) rmSync(dest, { recursive: true });
46
- mkdirSync(dirname(dest), { recursive: true });
47
- cpSync(sourceDir, dest, { recursive: true });
48
- const count = readdirSync(dest).length;
49
- console.log(`libretto: synced ${count} skill files to ${dest}`);
42
+ const syncMissingDirs = repoRoot === packageRoot;
43
+ for (const dir of SKILL_DIRS.slice(1)) {
44
+ const rootName = dir.split("/")[0];
45
+ const rootDir = join(repoRoot, rootName);
46
+ if (!syncMissingDirs && !existsSync(rootDir)) continue;
47
+ const dest = join(repoRoot, dir);
48
+ syncSkillDir(sourceDir, dest);
49
+ console.log(`libretto: synced skills/libretto -> ${dest}`);
50
50
  }
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ usage() {
5
+ cat <<'EOF'
6
+ Usage: scripts/prepare-release.sh [patch|minor|major]
7
+
8
+ Creates a release PR branch from main, bumps package.json, pushes the branch,
9
+ and opens a pull request targeting main.
10
+ EOF
11
+ }
12
+
13
+ bump="${1:-patch}"
14
+
15
+ case "$bump" in
16
+ patch|minor|major)
17
+ ;;
18
+ -h|--help|help)
19
+ usage
20
+ exit 0
21
+ ;;
22
+ *)
23
+ echo "Invalid bump type: $bump" >&2
24
+ usage >&2
25
+ exit 1
26
+ ;;
27
+ esac
28
+
29
+ if ! command -v gh >/dev/null 2>&1; then
30
+ echo "gh CLI is required." >&2
31
+ exit 1
32
+ fi
33
+
34
+ if [ -n "$(git status --porcelain)" ]; then
35
+ echo "Working tree must be clean before preparing a release." >&2
36
+ exit 1
37
+ fi
38
+
39
+ current_branch="$(git branch --show-current)"
40
+ if [ "$current_branch" != "main" ]; then
41
+ echo "Switching from $current_branch to main."
42
+ fi
43
+
44
+ git fetch origin
45
+ git checkout main
46
+ git pull --ff-only origin main
47
+
48
+ pnpm install --frozen-lockfile
49
+ pnpm type-check
50
+ pnpm test
51
+
52
+ current_version="$(node -p "require('./package.json').version")"
53
+ next_version="$(node -e '
54
+ const [major, minor, patch] = process.argv[1].split(".").map(Number)
55
+ const bump = process.argv[2]
56
+
57
+ let next
58
+ if (bump === "major") next = [major + 1, 0, 0]
59
+ else if (bump === "minor") next = [major, minor + 1, 0]
60
+ else next = [major, minor, patch + 1]
61
+
62
+ process.stdout.write(next.join("."))
63
+ ' "$current_version" "$bump")"
64
+ branch_name="tk-release-v${next_version}"
65
+
66
+ if git show-ref --verify --quiet "refs/heads/${branch_name}"; then
67
+ echo "Local branch ${branch_name} already exists." >&2
68
+ exit 1
69
+ fi
70
+
71
+ if git ls-remote --exit-code --heads origin "${branch_name}" >/dev/null 2>&1; then
72
+ echo "Remote branch ${branch_name} already exists." >&2
73
+ exit 1
74
+ fi
75
+
76
+ npm version "$next_version" --no-git-tag-version >/dev/null
77
+
78
+ git checkout -b "$branch_name"
79
+ git add package.json
80
+ git commit -m "release: v${next_version}"
81
+ git push -u origin "$branch_name"
82
+
83
+ gh pr create \
84
+ --base main \
85
+ --head "$branch_name" \
86
+ --title "release: v${next_version}" \
87
+ --body "$(cat <<EOF
88
+ ## Summary
89
+
90
+ - release libretto v${next_version}
91
+
92
+ ## Verification
93
+
94
+ - pnpm type-check
95
+ - pnpm test
96
+ EOF
97
+ )"
@@ -0,0 +1,103 @@
1
+ #!/usr/bin/env node
2
+
3
+ import {
4
+ cpSync,
5
+ existsSync,
6
+ mkdirSync,
7
+ readFileSync,
8
+ readdirSync,
9
+ rmSync,
10
+ } from "node:fs";
11
+ import { relative, resolve, join } from "node:path";
12
+
13
+ export const SKILL_DIRS = [
14
+ "skills/libretto",
15
+ ".agents/skills/libretto",
16
+ ".claude/skills/libretto",
17
+ ];
18
+
19
+ function walkFiles(dir, baseDir = dir) {
20
+ const entries = readdirSync(dir, { withFileTypes: true }).sort((a, b) =>
21
+ a.name.localeCompare(b.name),
22
+ );
23
+ const files = [];
24
+
25
+ for (const entry of entries) {
26
+ const fullPath = join(dir, entry.name);
27
+ if (entry.isDirectory()) {
28
+ files.push(...walkFiles(fullPath, baseDir));
29
+ continue;
30
+ }
31
+ if (entry.isFile()) files.push(relative(baseDir, fullPath));
32
+ }
33
+
34
+ return files;
35
+ }
36
+
37
+ export function syncSkillDir(sourceDir, destDir) {
38
+ rmSync(destDir, { recursive: true, force: true });
39
+ mkdirSync(destDir, { recursive: true });
40
+ cpSync(sourceDir, destDir, { recursive: true });
41
+ }
42
+
43
+ export function syncRepoSkills(repoRoot) {
44
+ const sourceDir = resolve(repoRoot, "skills/libretto");
45
+ for (const dir of SKILL_DIRS.slice(1)) {
46
+ syncSkillDir(sourceDir, resolve(repoRoot, dir));
47
+ }
48
+ }
49
+
50
+ export function compareSkillDirs(repoRoot) {
51
+ const roots = SKILL_DIRS.map((dir) => ({
52
+ label: dir,
53
+ absPath: resolve(repoRoot, dir),
54
+ }));
55
+ const missing = roots.filter(({ absPath }) => !existsSync(absPath));
56
+ const mismatches = [];
57
+
58
+ if (missing.length > 0) {
59
+ return {
60
+ ok: false,
61
+ issues: missing.map(({ label }) => `missing directory: ${label}`),
62
+ };
63
+ }
64
+
65
+ const expectedFiles = walkFiles(roots[0].absPath);
66
+ const expectedFileSet = new Set(expectedFiles);
67
+
68
+ for (const root of roots.slice(1)) {
69
+ const actualFiles = walkFiles(root.absPath);
70
+ const actualFileSet = new Set(actualFiles);
71
+
72
+ for (const file of expectedFiles) {
73
+ if (!actualFileSet.has(file)) {
74
+ mismatches.push(`${root.label} is missing file: ${file}`);
75
+ }
76
+ }
77
+
78
+ for (const file of actualFiles) {
79
+ if (!expectedFileSet.has(file)) {
80
+ mismatches.push(`${root.label} has unexpected file: ${file}`);
81
+ }
82
+ }
83
+ }
84
+
85
+ for (const file of expectedFiles) {
86
+ const expectedContent = readFileSync(join(roots[0].absPath, file));
87
+ for (const root of roots.slice(1)) {
88
+ const targetPath = join(root.absPath, file);
89
+ if (!existsSync(targetPath)) continue;
90
+ const actualContent = readFileSync(targetPath);
91
+ if (!expectedContent.equals(actualContent)) {
92
+ mismatches.push(
93
+ `${root.label} differs from ${roots[0].label}: ${file}`,
94
+ );
95
+ }
96
+ }
97
+ }
98
+
99
+ return {
100
+ ok: mismatches.length === 0,
101
+ issues: mismatches,
102
+ };
103
+ }
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { readdirSync, readFileSync, writeFileSync } from "node:fs";
4
+ import { basename, join, resolve } from "node:path";
5
+ import { fileURLToPath } from "node:url";
6
+
7
+ function usage() {
8
+ console.error(
9
+ "Usage: node scripts/summarize-evals.mjs <score-dir> <summary-json-path>",
10
+ );
11
+ }
12
+
13
+ function normalizeFailureRecord(failure) {
14
+ return {
15
+ criterion: String(failure?.criterion ?? "").trim(),
16
+ reason: String(failure?.reason ?? "").trim(),
17
+ };
18
+ }
19
+
20
+ function normalizeRecord(record) {
21
+ const failures = Array.isArray(record?.failures)
22
+ ? record.failures
23
+ .map(normalizeFailureRecord)
24
+ .filter(
25
+ (failure) =>
26
+ failure.criterion.length > 0 && failure.reason.length > 0,
27
+ )
28
+ : [];
29
+
30
+ return {
31
+ name: String(record?.name ?? "").trim(),
32
+ passed: Number(record?.passed ?? 0),
33
+ total: Number(record?.total ?? 0),
34
+ percent: Number(record?.percent ?? 0),
35
+ failures,
36
+ };
37
+ }
38
+
39
+ export function loadScoreRecords(scoreDirArg) {
40
+ const scoreDir = resolve(scoreDirArg);
41
+ return readdirSync(scoreDir, { withFileTypes: true })
42
+ .filter((entry) => entry.isFile() && entry.name.endsWith(".json"))
43
+ .map((entry) =>
44
+ JSON.parse(readFileSync(join(scoreDir, entry.name), "utf8")),
45
+ )
46
+ .map(normalizeRecord)
47
+ .sort((a, b) => String(a.name).localeCompare(String(b.name)));
48
+ }
49
+
50
+ export function buildSummary(records) {
51
+ const passed = records.reduce(
52
+ (sum, record) => sum + Number(record.passed || 0),
53
+ 0,
54
+ );
55
+ const total = records.reduce(
56
+ (sum, record) => sum + Number(record.total || 0),
57
+ 0,
58
+ );
59
+ const percent = total > 0 ? Number(((passed / total) * 100).toFixed(2)) : 0;
60
+ const failingRecords = records.filter((record) => record.failures.length > 0);
61
+
62
+ return {
63
+ generatedAt: new Date().toISOString(),
64
+ recordCount: records.length,
65
+ passed,
66
+ total,
67
+ percent,
68
+ failingRecordCount: failingRecords.length,
69
+ records,
70
+ };
71
+ }
72
+
73
+ export function buildMarkdown(summary, summaryPathArg) {
74
+ const lines = [
75
+ "# Eval Summary",
76
+ "",
77
+ `- Overall score: \`${summary.percent}%\``,
78
+ `- Passed criteria: \`${summary.passed}/${summary.total}\``,
79
+ `- Recorded score entries: \`${summary.recordCount}\``,
80
+ `- Failed evals: \`${summary.failingRecordCount}\``,
81
+ `- Summary file: \`${basename(summaryPathArg)}\``,
82
+ ];
83
+
84
+ if (summary.records.length > 0) {
85
+ lines.push("", "## Breakdown", "");
86
+ for (const record of summary.records) {
87
+ const status = record.failures.length > 0 ? "fail" : "pass";
88
+ lines.push(
89
+ `- ${status} \`${record.name}\`: \`${record.percent}%\` (${record.passed}/${record.total})`,
90
+ );
91
+ }
92
+ }
93
+
94
+ if (summary.failingRecordCount > 0) {
95
+ lines.push("", "## Failed Evals", "");
96
+ for (const record of summary.records.filter(
97
+ (candidate) => candidate.failures.length > 0,
98
+ )) {
99
+ lines.push(`### \`${record.name}\``);
100
+ lines.push("");
101
+ lines.push(
102
+ `- Score: \`${record.percent}%\` (${record.passed}/${record.total})`,
103
+ );
104
+ for (const failure of record.failures) {
105
+ lines.push(`- ${failure.criterion}: ${failure.reason}`);
106
+ }
107
+ lines.push("");
108
+ }
109
+ }
110
+
111
+ return `${lines.join("\n").trimEnd()}\n`;
112
+ }
113
+
114
+ function main(argv) {
115
+ const [, , scoreDirArg, summaryPathArg] = argv;
116
+
117
+ if (!scoreDirArg || !summaryPathArg) {
118
+ usage();
119
+ process.exit(1);
120
+ }
121
+
122
+ const summaryPath = resolve(summaryPathArg);
123
+ const records = loadScoreRecords(scoreDirArg);
124
+ const summary = buildSummary(records);
125
+
126
+ writeFileSync(summaryPath, `${JSON.stringify(summary, null, 2)}\n`, "utf8");
127
+ process.stdout.write(buildMarkdown(summary, summaryPath));
128
+ }
129
+
130
+ if (
131
+ process.argv[1] &&
132
+ resolve(process.argv[1]) === fileURLToPath(import.meta.url)
133
+ ) {
134
+ main(process.argv);
135
+ }
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { dirname, join } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+
6
+ import { SKILL_DIRS, syncRepoSkills } from "./skills-libretto.mjs";
7
+
8
+ const __dirname = dirname(fileURLToPath(import.meta.url));
9
+ const repoRoot = join(__dirname, "..");
10
+
11
+ syncRepoSkills(repoRoot);
12
+ console.log(`libretto: synced skill mirrors across ${SKILL_DIRS.join(", ")}`);