@clipboard-health/ai-rules 2.23.0 → 2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # @clipboard-health/ai-rules
2
2
 
3
- Pre-built AI agent rules for consistent coding standards. Uses a retrieval-based approach: generates a compressed index in `AGENTS.md` pointing to individual rule files that agents read on demand.
3
+ Pre-built AI agent rules for consistent coding standards. Uses a retrieval-based approach: generates a compressed index in `AGENTS.md` pointing to copied `.rules/` files that agents read on demand, with `.agents/` linked to package-provided agent assets.
4
4
 
5
5
  ## Table of contents
6
6
 
@@ -55,7 +55,7 @@ npm install --save-dev @clipboard-health/ai-rules
55
55
  5. Commit the generated files:
56
56
 
57
57
  ```bash
58
- git add .rules/ AGENTS.md CLAUDE.md
58
+ git add .rules/ .agents/ AGENTS.md CLAUDE.md
59
59
  git commit -m "feat: add AI coding rules"
60
60
  ```
61
61
 
@@ -96,10 +96,10 @@ npm update @clipboard-health/ai-rules
96
96
  npm install
97
97
 
98
98
  # Review the changes
99
- git diff .rules/ AGENTS.md
99
+ git diff .rules/ .agents/ AGENTS.md
100
100
 
101
101
  # Commit the updates
102
- git add .rules/ AGENTS.md CLAUDE.md
102
+ git add .rules/ .agents/ AGENTS.md CLAUDE.md
103
103
  git commit -m "chore: update AI coding rules"
104
104
  ```
105
105
 
@@ -170,10 +170,10 @@ v2 replaces the monolithic `AGENTS.md` with a retrieval-based approach. Rule fil
170
170
  npm install
171
171
  ```
172
172
 
173
- 3. Add `.rules/` to git and commit:
173
+ 3. Add `.rules/` and `.agents/` to git and commit:
174
174
 
175
175
  ```bash
176
- git add .rules/ AGENTS.md CLAUDE.md
176
+ git add .rules/ .agents/ AGENTS.md CLAUDE.md
177
177
  git commit -m "feat!: update ai-rules to v2 retrieval-based approach"
178
178
  ```
179
179
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@clipboard-health/ai-rules",
3
- "version": "2.23.0",
3
+ "version": "2.24.0",
4
4
  "description": "Pre-built AI agent rules for consistent coding standards.",
5
5
  "keywords": [
6
6
  "ai",
package/scripts/sync.js CHANGED
@@ -30,10 +30,10 @@ async function sync() {
30
30
  (0, promises_1.rm)(skillsOutput, { recursive: true, force: true }),
31
31
  (0, promises_1.rm)(libraryOutput, { recursive: true, force: true }),
32
32
  ]);
33
- const [, skillsCopied, libraryCopied] = await Promise.all([
33
+ const [, skillsSyncResult, librarySyncResult] = await Promise.all([
34
34
  copyRuleFiles(ruleIds, rulesOutput),
35
- copySkillFiles(skillsOutput),
36
- copyLibraryFiles(libraryOutput),
35
+ syncAgentDirectory("skills", skillsOutput),
36
+ syncAgentDirectory("lib", libraryOutput),
37
37
  copySetupScript(),
38
38
  mergeSessionStartHook(),
39
39
  ]);
@@ -42,7 +42,10 @@ async function sync() {
42
42
  await (0, promises_1.writeFile)(node_path_1.default.join(PATHS.projectRoot, constants_1.FILES.claude), "@AGENTS.md\n", "utf8");
43
43
  console.log(`✅ @clipboard-health/ai-rules synced ${parsedArguments.profile} (${ruleIds.length} rules)`);
44
44
  await appendOverlay(PATHS.projectRoot);
45
- await formatOutputFiles(PATHS.projectRoot, { skillsCopied, libCopied: libraryCopied });
45
+ await formatOutputFiles(PATHS.projectRoot, {
46
+ skillsCopied: skillsSyncResult === "copied",
47
+ libCopied: librarySyncResult === "copied",
48
+ });
46
49
  }
47
50
  catch (error) {
48
51
  // Log error but exit gracefully to avoid breaking installs
@@ -120,33 +123,42 @@ async function copyRuleFiles(ruleIds, rulesOutput) {
120
123
  await (0, promises_1.cp)(node_path_1.default.join(PATHS.packageRoot, "rules", rulePath), destination);
121
124
  }));
122
125
  }
123
- async function copySkillFiles(skillsOutput) {
124
- const skillsSource = node_path_1.default.join(PATHS.packageRoot, "skills");
126
+ async function syncAgentDirectory(directoryName, destination) {
127
+ const source = await resolveAgentDirectorySource(directoryName);
128
+ if (!source) {
129
+ return "missing";
130
+ }
131
+ await (0, promises_1.mkdir)(node_path_1.default.dirname(destination), { recursive: true });
132
+ const relativeSource = node_path_1.default.relative(node_path_1.default.dirname(destination), source);
125
133
  try {
126
- await (0, promises_1.cp)(skillsSource, skillsOutput, { recursive: true });
127
- console.log(`📋 Synced skills to .agents/skills/`);
128
- return true;
134
+ await (0, promises_1.symlink)(relativeSource, destination, "dir");
135
+ console.log(`📋 Linked ${directoryName} to .agents/${directoryName}/`);
136
+ return "linked";
129
137
  }
130
138
  catch (error) {
131
- if (error.code === "ENOENT") {
132
- return false;
133
- }
134
- throw error;
139
+ console.warn(`⚠️ Could not symlink ${directoryName}; copying instead: ${(0, toErrorMessage_1.toErrorMessage)(error)}`);
140
+ await (0, promises_1.cp)(source, destination, { recursive: true });
141
+ console.log(`📋 Synced ${directoryName} to .agents/${directoryName}/`);
142
+ return "copied";
135
143
  }
136
144
  }
137
- async function copyLibraryFiles(libraryOutput) {
138
- const librarySource = node_path_1.default.join(PATHS.packageRoot, "lib");
139
- try {
140
- await (0, promises_1.cp)(librarySource, libraryOutput, { recursive: true });
141
- console.log(`📋 Synced lib to .agents/lib/`);
142
- return true;
145
+ async function resolveAgentDirectorySource(directoryName) {
146
+ const packageSource = node_path_1.default.join(PATHS.packageRoot, directoryName);
147
+ const sourceTreeSource = node_path_1.default.join(PATHS.projectRoot, "plugins", "core", directoryName);
148
+ // This repo runs the built sync script from dist/, but checked-in links should
149
+ // target source assets rather than ignored build output.
150
+ if (isSourceBuildPackage() && (await fileExists(sourceTreeSource))) {
151
+ return sourceTreeSource;
143
152
  }
144
- catch (error) {
145
- if (error.code === "ENOENT") {
146
- return false;
147
- }
148
- throw error;
153
+ if (await fileExists(packageSource)) {
154
+ return packageSource;
149
155
  }
156
+ return undefined;
157
+ }
158
+ function isSourceBuildPackage() {
159
+ return node_path_1.default
160
+ .normalize(PATHS.packageRoot)
161
+ .endsWith(node_path_1.default.normalize(node_path_1.default.join("dist", "packages", "ai-rules")));
150
162
  }
151
163
  async function copySetupScript() {
152
164
  const source = node_path_1.default.join(PATHS.packageRoot, "scripts", "setup.sh");
@@ -245,6 +257,11 @@ async function generateAgentsIndex(ruleIds) {
245
257
  "|------|------|-------------|",
246
258
  ...rows,
247
259
  "",
260
+ "## Agent Skills",
261
+ "",
262
+ "Agent skills are linked from `node_modules/@clipboard-health/ai-rules` into `.agents/`.",
263
+ "If a referenced skill is missing or unreadable, run `npm ci` from the repository root and retry.",
264
+ "",
248
265
  ].join("\n");
249
266
  }
250
267
  async function appendOverlay(projectRoot) {
@@ -27,9 +27,9 @@ This skill always runs exactly one pass. It never waits or repeats internally. F
27
27
 
28
28
  The skill uses two sentinels. Each is a visible footer line wrapped in `<sub>` (a 🤖 mark plus the token in `<code>`).
29
29
 
30
- **Addressed sentinel**: `<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.0</code></sub>`. Appended on its own line at the end of every reply the skill posts (both thread replies and the review-body summary); this is how re-runs know which threads and review-body comments are already handled. Dedupe matches the version-agnostic substring `babysit-pr:addressed v1` followed by a space (also matches legacy `<!-- babysit-pr:addressed v1 ... -->` sentinels). Grep `babysit-pr:addressed v1` for any version; add `core@3.7.0` for a specific one.
30
+ **Addressed sentinel**: `<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.1</code></sub>`. Appended on its own line at the end of every reply the skill posts (both thread replies and the review-body summary); this is how re-runs know which threads and review-body comments are already handled. Dedupe matches the version-agnostic substring `babysit-pr:addressed v1` followed by a space (also matches legacy `<!-- babysit-pr:addressed v1 ... -->` sentinels). Grep `babysit-pr:addressed v1` for any version; add `core@3.7.1` for a specific one.
31
31
 
32
- **Follow-up sentinel**: `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.0</code></sub>`. Attached to replies that defer an out-of-scope comment as a tracked follow-up (see the Scope subsection and the Defer verdict in step 6). Grep `babysit-pr:followup` across PR conversation JSON to enumerate deferred items. This sentinel is additive — the post-reply scripts still append the `addressed` sentinel at the end, so a deferred thread is correctly machine-classified as addressed (the skill _has_ handled it — by deferring). Human reviewers and future sweeps distinguish deferred from resolved by looking for the follow-up sentinel.
32
+ **Follow-up sentinel**: `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.1</code></sub>`. Attached to replies that defer an out-of-scope comment as a tracked follow-up (see the Scope subsection and the Defer verdict in step 6). Grep `babysit-pr:followup` across PR conversation JSON to enumerate deferred items. This sentinel is additive — the post-reply scripts still append the `addressed` sentinel at the end, so a deferred thread is correctly machine-classified as addressed (the skill _has_ handled it — by deferring). Human reviewers and future sweeps distinguish deferred from resolved by looking for the follow-up sentinel.
33
33
 
34
34
  **Sentinel recency rules.** The script emits a per-thread `activityState` with three values:
35
35
 
@@ -280,7 +280,7 @@ Body templates (the script appends the `addressed` sentinel if missing):
280
280
  - **Agree**: `Addressed in <commit-url>. <one-line what-changed>.`
281
281
  - **Disagree**: `Leaving current behavior. <reasoning>.`
282
282
  - **Already fixed**: `Already handled by <commit-url-or-file:line>. <brief pointer>.`
283
- - **Defer**: `Out of scope for this PR; this looks like follow-up work rather than something introduced or required by this change. <one-line rationale or pointer if useful>.\n\n<sub>🤖 <code>babysit-pr:followup v1 core@3.7.0</code></sub>`
283
+ - **Defer**: `Out of scope for this PR; this looks like follow-up work rather than something introduced or required by this change. <one-line rationale or pointer if useful>.\n\n<sub>🤖 <code>babysit-pr:followup v1 core@3.7.1</code></sub>`
284
284
 
285
285
  For Defer replies, include the follow-up sentinel on its own line as shown. The script will append the `addressed` sentinel after it on its own line, so the final body ends with the follow-up sentinel followed by a blank line followed by the `addressed` sentinel — `grep babysit-pr:followup` finds the deferral and `grep babysit-pr:addressed` still marks the thread handled for dedupe.
286
286
 
@@ -296,7 +296,7 @@ The PR-level summary should:
296
296
 
297
297
  - Group by source. Use `## Review-body findings` for step-7 work and `## Conversation-tab comments` for step-6b work. Omit a section if its list is empty.
298
298
  - Inside each section, group verdicts under **Agree / Disagree / Already fixed / Deferred (out of scope)** subheadings. Omit a subheading if its list is empty.
299
- - Under **Deferred (out of scope)**, list each deferred item as a bullet, followed on its own line by `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.0</code></sub>` so grep catches them individually.
299
+ - Under **Deferred (out of scope)**, list each deferred item as a bullet, followed on its own line by `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.1</code></sub>` so grep catches them individually.
300
300
  - Include the commit URL for fixes.
301
301
  - End with a fenced fingerprint block listing every current fingerprint — addressed and deferred — one per line. Include both `reviewBodyComments[].fingerprint` (whole-body, one per automated review) and `activeIssueComments[].fingerprint` (per Conversation-tab comment). Future runs dedupe by matching these against `priorBabysitSentinels`.
302
302
 
@@ -9,7 +9,7 @@
9
9
  # substituted at build time by embedPluginVersion.mts.
10
10
 
11
11
  SENTINEL_PREFIX='babysit-pr:addressed v1 '
12
- SENTINEL='<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.0</code></sub>'
12
+ SENTINEL='<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.1</code></sub>'
13
13
 
14
14
  # Bot author allowlist (JSON array literal). Used by unresolvedPrComments.sh
15
15
  # as a fallback when GraphQL's `author.__typename == "Bot"` misses a GitHub
@@ -47,7 +47,7 @@ Script paths in this procedure are written as `scripts/...`, relative to this SK
47
47
  6. Check for an existing PR with `gh pr view`.
48
48
 
49
49
  PR title format: conventional-commit type + description, with no scope, plus the Linear ticket in parentheses at the end when one applies (e.g., `feat: add resume command (STAFF-123)`). This differs from the commit subject, which keeps its scope. Derive the ticket from the branch name, commit body, or session context; omit the parenthetical when no ticket applies.
50
- - No PR: create with `gh pr create` using the PR title format above. Description = the PR body shape above, followed by the session footer line if known and the agent footer `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.0</code></sub>` on its own line.
51
- - PR exists: if the title doesn't match the format above, correct it with `gh pr edit --title`. Refresh the body via `gh pr edit --body` so (a) the new commit's changes are reflected in the prose while existing `## Summary`, `## Validation`, and `## Notes` sections are preserved unless clearly stale, (b) any known session footer line is appended if missing, never removing or rewriting existing `Agent session: ...` or `Agent session ID: ...` lines, and (c) any existing footer carrying the substring `commit-push-pr:created v1` is preserved verbatim, appending `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.0</code></sub>` only if absent. Then report the URL.
50
+ - No PR: create with `gh pr create` using the PR title format above. Description = the PR body shape above, followed by the session footer line if known and the agent footer `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.1</code></sub>` on its own line.
51
+ - PR exists: if the title doesn't match the format above, correct it with `gh pr edit --title`. Refresh the body via `gh pr edit --body` so (a) the new commit's changes are reflected in the prose while existing `## Summary`, `## Validation`, and `## Notes` sections are preserved unless clearly stale, (b) any known session footer line is appended if missing, never removing or rewriting existing `Agent session: ...` or `Agent session ID: ...` lines, and (c) any existing footer carrying the substring `commit-push-pr:created v1` is preserved verbatim, appending `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.1</code></sub>` only if absent. Then report the URL.
52
52
 
53
53
  7. End with one short text response: branch name and the full PR URL (e.g., `https://github.com/clipboardhealth/core-utils/pull/123`). Never use shorthand like `repo#123` — always output the complete URL.
@@ -0,0 +1,89 @@
1
+ ---
2
+ name: flaky-test-bulk-debugger
3
+ description: Bulk-triage flaky test investigation tickets by clustering sightings, sharing artifacts, and delegating per-cluster diagnosis to flaky-test-debugger. Use when investigating many flaky test tickets, Linear issues tagged flaky-investigation, CI flake bursts, or repeated failures that may share a root cause.
4
+ ---
5
+
6
+ # Flaky Test Bulk Debugger
7
+
8
+ Use this skill to investigate a queue of flaky test sightings efficiently. Its purpose is orchestration: collect tickets, build a compact manifest, cluster related failures, fetch shared artifacts once, then run `flaky-test-debugger` per cluster.
9
+
10
+ Do not duplicate the detailed diagnosis workflow from `flaky-test-debugger`. When a cluster is ready for root-cause analysis, use `flaky-test-debugger` in plan mode unless the user explicitly asks to implement fixes.
11
+
12
+ ## Rules
13
+
14
+ - Treat bulky data as external artifacts. Save full issue descriptions, CI logs, LLM reports, Playwright traces, screenshots, and telemetry extracts to files; keep only manifests and summaries in conversation context.
15
+ - Cluster before per-test debugging. Do not read each test file independently until shared setup, CI, auth, static asset, backend, or infrastructure failures have been ruled out.
16
+ - Prefer one implementation ticket per root cause, not one per sighting.
17
+ - Preserve the source ticket instructions for labels, status, linked issues, PR body requirements, and close-out comments.
18
+ - If parallel workers are available, use them per cluster with minimal context. If not, process clusters serially and keep a running manifest file.
19
+ - Keep the coordinator responsible for queue state, deduplication, and Linear/bookkeeping; keep cluster workers responsible for evidence and diagnosis.
20
+
21
+ ## Phase 1: Build Queue
22
+
23
+ Fetch or receive all candidate tickets. For Linear, filter by the user-provided project/status/label, commonly `Todo` plus `flaky-investigation`.
24
+
25
+ For each ticket, extract one manifest row: `issueId`, `repo`, `framework`, `testFile`, `testName`, `runUrl`, `commit`, `branch`, `shard`, `timestamp`, `firstError`, `firstStackFrame`, `priorTickets`, and `sourceInstructions`. Write full raw ticket data to a local artifact file if it is large.
26
+
27
+ ## Phase 2: Cluster
28
+
29
+ Normalize errors before grouping:
30
+
31
+ - Replace random IDs, emails, names, phone numbers, shift/facility IDs, UUIDs, ObjectIds, hashes, ports, and timestamps with placeholders.
32
+ - Collapse generated asset filenames to their logical shape, for example `main-<hash>.<hash>.js`.
33
+ - Keep HTTP status codes, helper names, route names, endpoint paths, and lifecycle stages intact.
34
+
35
+ Cluster by strongest shared evidence first:
36
+
37
+ 1. Same CI run, commit, timestamp window, and setup/helper stack.
38
+ 2. Same failure surface from `flaky-test-debugger`: CI/job setup, test setup/auth/data, app bootstrap/navigation, user action, backend request, post-success render, assertion/locator.
39
+ 3. Same first project stack frame or setup helper.
40
+ 4. Same endpoint/static asset/status-code pattern.
41
+ 5. Same test file or prior related tickets.
42
+
43
+ Do not merge clusters only because they are in the same run. Same-run failures can still have different root causes.
44
+
45
+ ## Phase 3: Fetch Artifacts
46
+
47
+ For each unique CI run, fetch the available reports once and store them under a predictable temporary path. For Playwright LLM reports in Clipboard repos, use the repository helper when available:
48
+
49
+ ```bash
50
+ bash scripts/fetch-llm-report.sh "<github-actions-url>"
51
+ ```
52
+
53
+ Record artifact paths in the manifest. Workers should receive paths and the relevant manifest rows, not the full artifact contents.
54
+
55
+ ## Phase 4: Diagnose Clusters
56
+
57
+ For each cluster, run `flaky-test-debugger` in plan mode. If the agent supports skills, explicitly load or invoke `flaky-test-debugger`; otherwise follow its workflow manually.
58
+
59
+ Use this worker prompt shape:
60
+
61
+ ```text
62
+ Use flaky-test-debugger in plan mode.
63
+ Investigate this cluster as one possible shared root cause, not as isolated tickets.
64
+
65
+ Inputs: cluster summary, manifest rows, artifact/report paths, prior related tickets, and source ticket close-out instructions.
66
+
67
+ Return: final failure surface, evidence artifacts, root cause diagnosis, confidence score, whether one implementation ticket covers all issues, implementation plan or no-code disposition, and exact ticket action recommendation.
68
+ ```
69
+
70
+ If confidence is below 5/5, the worker must include the observability or artifact changes needed to make the next occurrence diagnosable.
71
+
72
+ ## Phase 5: Act
73
+
74
+ Merge worker outputs into a coordinator summary:
75
+
76
+ - Cluster name and issue IDs
77
+ - Shared vs independent root cause
78
+ - Evidence level and confidence
79
+ - Recommended implementation ticket count
80
+ - Tickets to mark duplicate/no-code/human-needed
81
+ - Remaining unknowns
82
+
83
+ Create or recommend implementation tickets only after clustering. Link all investigation tickets covered by the same root cause, plus prior related tickets from the source descriptions.
84
+
85
+ When updating investigation tickets, comment with the implementation ticket ID or no-code disposition, link related issues as requested, and move the ticket only after the comment/link exists.
86
+
87
+ ## Output Format
88
+
89
+ End with a compact bulk triage report containing: queue size, cluster count, unique CI runs, each cluster's issue IDs, surface, recommendation, confidence, next ticket action, artifact index, and risks such as missing evidence or likely false merges.