npm - @clipboard-health/ai-rules - Versions diffs - 2.23.0 → 2.24.0 - Mend

@clipboard-health/ai-rules 2.23.0 → 2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +6 -6
package/package.json +1 -1
package/scripts/sync.js +41 -24
package/skills/babysit-pr/SKILL.md +4 -4
package/skills/babysit-pr/scripts/_sentinel.sh +1 -1
package/skills/commit-push-pr/SKILL.md +2 -2
package/skills/flaky-test-bulk-debugger/SKILL.md +89 -0

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # @clipboard-health/ai-rules
-Pre-built AI agent rules for consistent coding standards. Uses a retrieval-based approach: generates a compressed index in `AGENTS.md` pointing to individual rule files that agents read on demand.
+Pre-built AI agent rules for consistent coding standards. Uses a retrieval-based approach: generates a compressed index in `AGENTS.md` pointing to copied `.rules/` files that agents read on demand, with `.agents/` linked to package-provided agent assets.
 ## Table of contents
@@ -55,7 +55,7 @@ npm install --save-dev @clipboard-health/ai-rules
 5. Commit the generated files:
    ```bash
-   git add .rules/ AGENTS.md CLAUDE.md
+   git add .rules/ .agents/ AGENTS.md CLAUDE.md
    git commit -m "feat: add AI coding rules"
    ```
@@ -96,10 +96,10 @@ npm update @clipboard-health/ai-rules
 npm install
 # Review the changes
-git diff .rules/ AGENTS.md
+git diff .rules/ .agents/ AGENTS.md
 # Commit the updates
-git add .rules/ AGENTS.md CLAUDE.md
+git add .rules/ .agents/ AGENTS.md CLAUDE.md
 git commit -m "chore: update AI coding rules"
 ```
@@ -170,10 +170,10 @@ v2 replaces the monolithic `AGENTS.md` with a retrieval-based approach. Rule fil
    npm install
    ```
-3. Add `.rules/` to git and commit:
+3. Add `.rules/` and `.agents/` to git and commit:
    ```bash
-   git add .rules/ AGENTS.md CLAUDE.md
+   git add .rules/ .agents/ AGENTS.md CLAUDE.md
    git commit -m "feat!: update ai-rules to v2 retrieval-based approach"
    ```

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@clipboard-health/ai-rules",
-  "version": "2.23.0",
+  "version": "2.24.0",
   "description": "Pre-built AI agent rules for consistent coding standards.",
   "keywords": [
     "ai",

package/scripts/sync.js CHANGED Viewed

@@ -30,10 +30,10 @@ async function sync() {
             (0, promises_1.rm)(skillsOutput, { recursive: true, force: true }),
             (0, promises_1.rm)(libraryOutput, { recursive: true, force: true }),
         ]);
-        const [, skillsCopied, libraryCopied] = await Promise.all([
+        const [, skillsSyncResult, librarySyncResult] = await Promise.all([
             copyRuleFiles(ruleIds, rulesOutput),
-            copySkillFiles(skillsOutput),
-            copyLibraryFiles(libraryOutput),
+            syncAgentDirectory("skills", skillsOutput),
+            syncAgentDirectory("lib", libraryOutput),
             copySetupScript(),
             mergeSessionStartHook(),
         ]);
@@ -42,7 +42,10 @@ async function sync() {
         await (0, promises_1.writeFile)(node_path_1.default.join(PATHS.projectRoot, constants_1.FILES.claude), "@AGENTS.md\n", "utf8");
         console.log(`✅ @clipboard-health/ai-rules synced ${parsedArguments.profile} (${ruleIds.length} rules)`);
         await appendOverlay(PATHS.projectRoot);
-        await formatOutputFiles(PATHS.projectRoot, { skillsCopied, libCopied: libraryCopied });
+        await formatOutputFiles(PATHS.projectRoot, {
+            skillsCopied: skillsSyncResult === "copied",
+            libCopied: librarySyncResult === "copied",
+        });
     }
     catch (error) {
         // Log error but exit gracefully to avoid breaking installs
@@ -120,33 +123,42 @@ async function copyRuleFiles(ruleIds, rulesOutput) {
         await (0, promises_1.cp)(node_path_1.default.join(PATHS.packageRoot, "rules", rulePath), destination);
     }));
 }
-async function copySkillFiles(skillsOutput) {
-    const skillsSource = node_path_1.default.join(PATHS.packageRoot, "skills");
+async function syncAgentDirectory(directoryName, destination) {
+    const source = await resolveAgentDirectorySource(directoryName);
+    if (!source) {
+        return "missing";
+    }
+    await (0, promises_1.mkdir)(node_path_1.default.dirname(destination), { recursive: true });
+    const relativeSource = node_path_1.default.relative(node_path_1.default.dirname(destination), source);
     try {
-        await (0, promises_1.cp)(skillsSource, skillsOutput, { recursive: true });
-        console.log(`📋 Synced skills to .agents/skills/`);
-        return true;
+        await (0, promises_1.symlink)(relativeSource, destination, "dir");
+        console.log(`📋 Linked ${directoryName} to .agents/${directoryName}/`);
+        return "linked";
     }
     catch (error) {
-        if (error.code === "ENOENT") {
-            return false;
-        }
-        throw error;
+        console.warn(`⚠️ Could not symlink ${directoryName}; copying instead: ${(0, toErrorMessage_1.toErrorMessage)(error)}`);
+        await (0, promises_1.cp)(source, destination, { recursive: true });
+        console.log(`📋 Synced ${directoryName} to .agents/${directoryName}/`);
+        return "copied";
     }
 }
-async function copyLibraryFiles(libraryOutput) {
-    const librarySource = node_path_1.default.join(PATHS.packageRoot, "lib");
-    try {
-        await (0, promises_1.cp)(librarySource, libraryOutput, { recursive: true });
-        console.log(`📋 Synced lib to .agents/lib/`);
-        return true;
+async function resolveAgentDirectorySource(directoryName) {
+    const packageSource = node_path_1.default.join(PATHS.packageRoot, directoryName);
+    const sourceTreeSource = node_path_1.default.join(PATHS.projectRoot, "plugins", "core", directoryName);
+    // This repo runs the built sync script from dist/, but checked-in links should
+    // target source assets rather than ignored build output.
+    if (isSourceBuildPackage() && (await fileExists(sourceTreeSource))) {
+        return sourceTreeSource;
     }
-    catch (error) {
-        if (error.code === "ENOENT") {
-            return false;
-        }
-        throw error;
+    if (await fileExists(packageSource)) {
+        return packageSource;
     }
+    return undefined;
+}
+function isSourceBuildPackage() {
+    return node_path_1.default
+        .normalize(PATHS.packageRoot)
+        .endsWith(node_path_1.default.normalize(node_path_1.default.join("dist", "packages", "ai-rules")));
 }
 async function copySetupScript() {
     const source = node_path_1.default.join(PATHS.packageRoot, "scripts", "setup.sh");
@@ -245,6 +257,11 @@ async function generateAgentsIndex(ruleIds) {
         "|------|------|-------------|",
         ...rows,
         "",
+        "## Agent Skills",
+        "",
+        "Agent skills are linked from `node_modules/@clipboard-health/ai-rules` into `.agents/`.",
+        "If a referenced skill is missing or unreadable, run `npm ci` from the repository root and retry.",
+        "",
     ].join("\n");
 }
 async function appendOverlay(projectRoot) {

package/skills/babysit-pr/SKILL.md CHANGED Viewed

@@ -27,9 +27,9 @@ This skill always runs exactly one pass. It never waits or repeats internally. F
 The skill uses two sentinels. Each is a visible footer line wrapped in `<sub>` (a 🤖 mark plus the token in `<code>`).
-**Addressed sentinel**: `<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.0</code></sub>`. Appended on its own line at the end of every reply the skill posts (both thread replies and the review-body summary); this is how re-runs know which threads and review-body comments are already handled. Dedupe matches the version-agnostic substring `babysit-pr:addressed v1` followed by a space (also matches legacy `<!-- babysit-pr:addressed v1 ... -->` sentinels). Grep `babysit-pr:addressed v1` for any version; add `core@3.7.0` for a specific one.
+**Addressed sentinel**: `<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.1</code></sub>`. Appended on its own line at the end of every reply the skill posts (both thread replies and the review-body summary); this is how re-runs know which threads and review-body comments are already handled. Dedupe matches the version-agnostic substring `babysit-pr:addressed v1` followed by a space (also matches legacy `<!-- babysit-pr:addressed v1 ... -->` sentinels). Grep `babysit-pr:addressed v1` for any version; add `core@3.7.1` for a specific one.
-**Follow-up sentinel**: `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.0</code></sub>`. Attached to replies that defer an out-of-scope comment as a tracked follow-up (see the Scope subsection and the Defer verdict in step 6). Grep `babysit-pr:followup` across PR conversation JSON to enumerate deferred items. This sentinel is additive — the post-reply scripts still append the `addressed` sentinel at the end, so a deferred thread is correctly machine-classified as addressed (the skill _has_ handled it — by deferring). Human reviewers and future sweeps distinguish deferred from resolved by looking for the follow-up sentinel.
+**Follow-up sentinel**: `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.1</code></sub>`. Attached to replies that defer an out-of-scope comment as a tracked follow-up (see the Scope subsection and the Defer verdict in step 6). Grep `babysit-pr:followup` across PR conversation JSON to enumerate deferred items. This sentinel is additive — the post-reply scripts still append the `addressed` sentinel at the end, so a deferred thread is correctly machine-classified as addressed (the skill _has_ handled it — by deferring). Human reviewers and future sweeps distinguish deferred from resolved by looking for the follow-up sentinel.
 **Sentinel recency rules.** The script emits a per-thread `activityState` with three values:
@@ -280,7 +280,7 @@ Body templates (the script appends the `addressed` sentinel if missing):
 - **Agree**: `Addressed in <commit-url>. <one-line what-changed>.`
 - **Disagree**: `Leaving current behavior. <reasoning>.`
 - **Already fixed**: `Already handled by <commit-url-or-file:line>. <brief pointer>.`
-- **Defer**: `Out of scope for this PR; this looks like follow-up work rather than something introduced or required by this change. <one-line rationale or pointer if useful>.\n\n<sub>🤖 <code>babysit-pr:followup v1 core@3.7.0</code></sub>`
+- **Defer**: `Out of scope for this PR; this looks like follow-up work rather than something introduced or required by this change. <one-line rationale or pointer if useful>.\n\n<sub>🤖 <code>babysit-pr:followup v1 core@3.7.1</code></sub>`
 For Defer replies, include the follow-up sentinel on its own line as shown. The script will append the `addressed` sentinel after it on its own line, so the final body ends with the follow-up sentinel followed by a blank line followed by the `addressed` sentinel — `grep babysit-pr:followup` finds the deferral and `grep babysit-pr:addressed` still marks the thread handled for dedupe.
@@ -296,7 +296,7 @@ The PR-level summary should:
 - Group by source. Use `## Review-body findings` for step-7 work and `## Conversation-tab comments` for step-6b work. Omit a section if its list is empty.
 - Inside each section, group verdicts under **Agree / Disagree / Already fixed / Deferred (out of scope)** subheadings. Omit a subheading if its list is empty.
-- Under **Deferred (out of scope)**, list each deferred item as a bullet, followed on its own line by `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.0</code></sub>` so grep catches them individually.
+- Under **Deferred (out of scope)**, list each deferred item as a bullet, followed on its own line by `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.1</code></sub>` so grep catches them individually.
 - Include the commit URL for fixes.
 - End with a fenced fingerprint block listing every current fingerprint — addressed and deferred — one per line. Include both `reviewBodyComments[].fingerprint` (whole-body, one per automated review) and `activeIssueComments[].fingerprint` (per Conversation-tab comment). Future runs dedupe by matching these against `priorBabysitSentinels`.

package/skills/babysit-pr/scripts/_sentinel.sh CHANGED Viewed

@@ -9,7 +9,7 @@
 # substituted at build time by embedPluginVersion.mts.
 SENTINEL_PREFIX='babysit-pr:addressed v1 '
-SENTINEL='<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.0</code></sub>'
+SENTINEL='<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.1</code></sub>'
 # Bot author allowlist (JSON array literal). Used by unresolvedPrComments.sh
 # as a fallback when GraphQL's `author.__typename == "Bot"` misses a GitHub

package/skills/commit-push-pr/SKILL.md CHANGED Viewed

@@ -47,7 +47,7 @@ Script paths in this procedure are written as `scripts/...`, relative to this SK
 6. Check for an existing PR with `gh pr view`.
    PR title format: conventional-commit type + description, with no scope, plus the Linear ticket in parentheses at the end when one applies (e.g., `feat: add resume command (STAFF-123)`). This differs from the commit subject, which keeps its scope. Derive the ticket from the branch name, commit body, or session context; omit the parenthetical when no ticket applies.
-   - No PR: create with `gh pr create` using the PR title format above. Description = the PR body shape above, followed by the session footer line if known and the agent footer `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.0</code></sub>` on its own line.
-   - PR exists: if the title doesn't match the format above, correct it with `gh pr edit --title`. Refresh the body via `gh pr edit --body` so (a) the new commit's changes are reflected in the prose while existing `## Summary`, `## Validation`, and `## Notes` sections are preserved unless clearly stale, (b) any known session footer line is appended if missing, never removing or rewriting existing `Agent session: ...` or `Agent session ID: ...` lines, and (c) any existing footer carrying the substring `commit-push-pr:created v1` is preserved verbatim, appending `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.0</code></sub>` only if absent. Then report the URL.
+   - No PR: create with `gh pr create` using the PR title format above. Description = the PR body shape above, followed by the session footer line if known and the agent footer `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.1</code></sub>` on its own line.
+   - PR exists: if the title doesn't match the format above, correct it with `gh pr edit --title`. Refresh the body via `gh pr edit --body` so (a) the new commit's changes are reflected in the prose while existing `## Summary`, `## Validation`, and `## Notes` sections are preserved unless clearly stale, (b) any known session footer line is appended if missing, never removing or rewriting existing `Agent session: ...` or `Agent session ID: ...` lines, and (c) any existing footer carrying the substring `commit-push-pr:created v1` is preserved verbatim, appending `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.1</code></sub>` only if absent. Then report the URL.
 7. End with one short text response: branch name and the full PR URL (e.g., `https://github.com/clipboardhealth/core-utils/pull/123`). Never use shorthand like `repo#123` — always output the complete URL.

package/skills/flaky-test-bulk-debugger/SKILL.md ADDED Viewed

@@ -0,0 +1,89 @@
+---
+name: flaky-test-bulk-debugger
+description: Bulk-triage flaky test investigation tickets by clustering sightings, sharing artifacts, and delegating per-cluster diagnosis to flaky-test-debugger. Use when investigating many flaky test tickets, Linear issues tagged flaky-investigation, CI flake bursts, or repeated failures that may share a root cause.
+---
+# Flaky Test Bulk Debugger
+Use this skill to investigate a queue of flaky test sightings efficiently. Its purpose is orchestration: collect tickets, build a compact manifest, cluster related failures, fetch shared artifacts once, then run `flaky-test-debugger` per cluster.
+Do not duplicate the detailed diagnosis workflow from `flaky-test-debugger`. When a cluster is ready for root-cause analysis, use `flaky-test-debugger` in plan mode unless the user explicitly asks to implement fixes.
+## Rules
+- Treat bulky data as external artifacts. Save full issue descriptions, CI logs, LLM reports, Playwright traces, screenshots, and telemetry extracts to files; keep only manifests and summaries in conversation context.
+- Cluster before per-test debugging. Do not read each test file independently until shared setup, CI, auth, static asset, backend, or infrastructure failures have been ruled out.
+- Prefer one implementation ticket per root cause, not one per sighting.
+- Preserve the source ticket instructions for labels, status, linked issues, PR body requirements, and close-out comments.
+- If parallel workers are available, use them per cluster with minimal context. If not, process clusters serially and keep a running manifest file.
+- Keep the coordinator responsible for queue state, deduplication, and Linear/bookkeeping; keep cluster workers responsible for evidence and diagnosis.
+## Phase 1: Build Queue
+Fetch or receive all candidate tickets. For Linear, filter by the user-provided project/status/label, commonly `Todo` plus `flaky-investigation`.
+For each ticket, extract one manifest row: `issueId`, `repo`, `framework`, `testFile`, `testName`, `runUrl`, `commit`, `branch`, `shard`, `timestamp`, `firstError`, `firstStackFrame`, `priorTickets`, and `sourceInstructions`. Write full raw ticket data to a local artifact file if it is large.
+## Phase 2: Cluster
+Normalize errors before grouping:
+- Replace random IDs, emails, names, phone numbers, shift/facility IDs, UUIDs, ObjectIds, hashes, ports, and timestamps with placeholders.
+- Collapse generated asset filenames to their logical shape, for example `main-<hash>.<hash>.js`.
+- Keep HTTP status codes, helper names, route names, endpoint paths, and lifecycle stages intact.
+Cluster by strongest shared evidence first:
+1. Same CI run, commit, timestamp window, and setup/helper stack.
+2. Same failure surface from `flaky-test-debugger`: CI/job setup, test setup/auth/data, app bootstrap/navigation, user action, backend request, post-success render, assertion/locator.
+3. Same first project stack frame or setup helper.
+4. Same endpoint/static asset/status-code pattern.
+5. Same test file or prior related tickets.
+Do not merge clusters only because they are in the same run. Same-run failures can still have different root causes.
+## Phase 3: Fetch Artifacts
+For each unique CI run, fetch the available reports once and store them under a predictable temporary path. For Playwright LLM reports in Clipboard repos, use the repository helper when available:
+```bash
+bash scripts/fetch-llm-report.sh "<github-actions-url>"
+```
+Record artifact paths in the manifest. Workers should receive paths and the relevant manifest rows, not the full artifact contents.
+## Phase 4: Diagnose Clusters
+For each cluster, run `flaky-test-debugger` in plan mode. If the agent supports skills, explicitly load or invoke `flaky-test-debugger`; otherwise follow its workflow manually.
+Use this worker prompt shape:
+```text
+Use flaky-test-debugger in plan mode.
+Investigate this cluster as one possible shared root cause, not as isolated tickets.
+Inputs: cluster summary, manifest rows, artifact/report paths, prior related tickets, and source ticket close-out instructions.
+Return: final failure surface, evidence artifacts, root cause diagnosis, confidence score, whether one implementation ticket covers all issues, implementation plan or no-code disposition, and exact ticket action recommendation.
+```
+If confidence is below 5/5, the worker must include the observability or artifact changes needed to make the next occurrence diagnosable.
+## Phase 5: Act
+Merge worker outputs into a coordinator summary:
+- Cluster name and issue IDs
+- Shared vs independent root cause
+- Evidence level and confidence
+- Recommended implementation ticket count
+- Tickets to mark duplicate/no-code/human-needed
+- Remaining unknowns
+Create or recommend implementation tickets only after clustering. Link all investigation tickets covered by the same root cause, plus prior related tickets from the source descriptions.
+When updating investigation tickets, comment with the implementation ticket ID or no-code disposition, link related issues as requested, and move the ticket only after the comment/link exists.
+## Output Format
+End with a compact bulk triage report containing: queue size, cluster count, unique CI runs, each cluster's issue IDs, surface, recommendation, confidence, next ticket action, artifact index, and risks such as missing evidence or likely false merges.