@clipboard-health/ai-rules 2.23.0 → 2.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# @clipboard-health/ai-rules
|
|
2
2
|
|
|
3
|
-
Pre-built AI agent rules for consistent coding standards. Uses a retrieval-based approach: generates a compressed index in `AGENTS.md` pointing to
|
|
3
|
+
Pre-built AI agent rules for consistent coding standards. Uses a retrieval-based approach: generates a compressed index in `AGENTS.md` pointing to copied `.rules/` files that agents read on demand, with `.agents/` linked to package-provided agent assets.
|
|
4
4
|
|
|
5
5
|
## Table of contents
|
|
6
6
|
|
|
@@ -55,7 +55,7 @@ npm install --save-dev @clipboard-health/ai-rules
|
|
|
55
55
|
5. Commit the generated files:
|
|
56
56
|
|
|
57
57
|
```bash
|
|
58
|
-
git add .rules/ AGENTS.md CLAUDE.md
|
|
58
|
+
git add .rules/ .agents/ AGENTS.md CLAUDE.md
|
|
59
59
|
git commit -m "feat: add AI coding rules"
|
|
60
60
|
```
|
|
61
61
|
|
|
@@ -96,10 +96,10 @@ npm update @clipboard-health/ai-rules
|
|
|
96
96
|
npm install
|
|
97
97
|
|
|
98
98
|
# Review the changes
|
|
99
|
-
git diff .rules/ AGENTS.md
|
|
99
|
+
git diff .rules/ .agents/ AGENTS.md
|
|
100
100
|
|
|
101
101
|
# Commit the updates
|
|
102
|
-
git add .rules/ AGENTS.md CLAUDE.md
|
|
102
|
+
git add .rules/ .agents/ AGENTS.md CLAUDE.md
|
|
103
103
|
git commit -m "chore: update AI coding rules"
|
|
104
104
|
```
|
|
105
105
|
|
|
@@ -170,10 +170,10 @@ v2 replaces the monolithic `AGENTS.md` with a retrieval-based approach. Rule fil
|
|
|
170
170
|
npm install
|
|
171
171
|
```
|
|
172
172
|
|
|
173
|
-
3. Add `.rules/` to git and commit:
|
|
173
|
+
3. Add `.rules/` and `.agents/` to git and commit:
|
|
174
174
|
|
|
175
175
|
```bash
|
|
176
|
-
git add .rules/ AGENTS.md CLAUDE.md
|
|
176
|
+
git add .rules/ .agents/ AGENTS.md CLAUDE.md
|
|
177
177
|
git commit -m "feat!: update ai-rules to v2 retrieval-based approach"
|
|
178
178
|
```
|
|
179
179
|
|
package/package.json
CHANGED
package/scripts/sync.js
CHANGED
|
@@ -30,10 +30,10 @@ async function sync() {
|
|
|
30
30
|
(0, promises_1.rm)(skillsOutput, { recursive: true, force: true }),
|
|
31
31
|
(0, promises_1.rm)(libraryOutput, { recursive: true, force: true }),
|
|
32
32
|
]);
|
|
33
|
-
const [,
|
|
33
|
+
const [, skillsSyncResult, librarySyncResult] = await Promise.all([
|
|
34
34
|
copyRuleFiles(ruleIds, rulesOutput),
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
syncAgentDirectory("skills", skillsOutput),
|
|
36
|
+
syncAgentDirectory("lib", libraryOutput),
|
|
37
37
|
copySetupScript(),
|
|
38
38
|
mergeSessionStartHook(),
|
|
39
39
|
]);
|
|
@@ -42,7 +42,10 @@ async function sync() {
|
|
|
42
42
|
await (0, promises_1.writeFile)(node_path_1.default.join(PATHS.projectRoot, constants_1.FILES.claude), "@AGENTS.md\n", "utf8");
|
|
43
43
|
console.log(`✅ @clipboard-health/ai-rules synced ${parsedArguments.profile} (${ruleIds.length} rules)`);
|
|
44
44
|
await appendOverlay(PATHS.projectRoot);
|
|
45
|
-
await formatOutputFiles(PATHS.projectRoot, {
|
|
45
|
+
await formatOutputFiles(PATHS.projectRoot, {
|
|
46
|
+
skillsCopied: skillsSyncResult === "copied",
|
|
47
|
+
libCopied: librarySyncResult === "copied",
|
|
48
|
+
});
|
|
46
49
|
}
|
|
47
50
|
catch (error) {
|
|
48
51
|
// Log error but exit gracefully to avoid breaking installs
|
|
@@ -120,33 +123,42 @@ async function copyRuleFiles(ruleIds, rulesOutput) {
|
|
|
120
123
|
await (0, promises_1.cp)(node_path_1.default.join(PATHS.packageRoot, "rules", rulePath), destination);
|
|
121
124
|
}));
|
|
122
125
|
}
|
|
123
|
-
async function
|
|
124
|
-
const
|
|
126
|
+
async function syncAgentDirectory(directoryName, destination) {
|
|
127
|
+
const source = await resolveAgentDirectorySource(directoryName);
|
|
128
|
+
if (!source) {
|
|
129
|
+
return "missing";
|
|
130
|
+
}
|
|
131
|
+
await (0, promises_1.mkdir)(node_path_1.default.dirname(destination), { recursive: true });
|
|
132
|
+
const relativeSource = node_path_1.default.relative(node_path_1.default.dirname(destination), source);
|
|
125
133
|
try {
|
|
126
|
-
await (0, promises_1.
|
|
127
|
-
console.log(`📋
|
|
128
|
-
return
|
|
134
|
+
await (0, promises_1.symlink)(relativeSource, destination, "dir");
|
|
135
|
+
console.log(`📋 Linked ${directoryName} to .agents/${directoryName}/`);
|
|
136
|
+
return "linked";
|
|
129
137
|
}
|
|
130
138
|
catch (error) {
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
}
|
|
134
|
-
|
|
139
|
+
console.warn(`⚠️ Could not symlink ${directoryName}; copying instead: ${(0, toErrorMessage_1.toErrorMessage)(error)}`);
|
|
140
|
+
await (0, promises_1.cp)(source, destination, { recursive: true });
|
|
141
|
+
console.log(`📋 Synced ${directoryName} to .agents/${directoryName}/`);
|
|
142
|
+
return "copied";
|
|
135
143
|
}
|
|
136
144
|
}
|
|
137
|
-
async function
|
|
138
|
-
const
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
145
|
+
async function resolveAgentDirectorySource(directoryName) {
|
|
146
|
+
const packageSource = node_path_1.default.join(PATHS.packageRoot, directoryName);
|
|
147
|
+
const sourceTreeSource = node_path_1.default.join(PATHS.projectRoot, "plugins", "core", directoryName);
|
|
148
|
+
// This repo runs the built sync script from dist/, but checked-in links should
|
|
149
|
+
// target source assets rather than ignored build output.
|
|
150
|
+
if (isSourceBuildPackage() && (await fileExists(sourceTreeSource))) {
|
|
151
|
+
return sourceTreeSource;
|
|
143
152
|
}
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
return false;
|
|
147
|
-
}
|
|
148
|
-
throw error;
|
|
153
|
+
if (await fileExists(packageSource)) {
|
|
154
|
+
return packageSource;
|
|
149
155
|
}
|
|
156
|
+
return undefined;
|
|
157
|
+
}
|
|
158
|
+
function isSourceBuildPackage() {
|
|
159
|
+
return node_path_1.default
|
|
160
|
+
.normalize(PATHS.packageRoot)
|
|
161
|
+
.endsWith(node_path_1.default.normalize(node_path_1.default.join("dist", "packages", "ai-rules")));
|
|
150
162
|
}
|
|
151
163
|
async function copySetupScript() {
|
|
152
164
|
const source = node_path_1.default.join(PATHS.packageRoot, "scripts", "setup.sh");
|
|
@@ -245,6 +257,11 @@ async function generateAgentsIndex(ruleIds) {
|
|
|
245
257
|
"|------|------|-------------|",
|
|
246
258
|
...rows,
|
|
247
259
|
"",
|
|
260
|
+
"## Agent Skills",
|
|
261
|
+
"",
|
|
262
|
+
"Agent skills are linked from `node_modules/@clipboard-health/ai-rules` into `.agents/`.",
|
|
263
|
+
"If a referenced skill is missing or unreadable, run `npm ci` from the repository root and retry.",
|
|
264
|
+
"",
|
|
248
265
|
].join("\n");
|
|
249
266
|
}
|
|
250
267
|
async function appendOverlay(projectRoot) {
|
|
@@ -27,9 +27,9 @@ This skill always runs exactly one pass. It never waits or repeats internally. F
|
|
|
27
27
|
|
|
28
28
|
The skill uses two sentinels. Each is a visible footer line wrapped in `<sub>` (a 🤖 mark plus the token in `<code>`).
|
|
29
29
|
|
|
30
|
-
**Addressed sentinel**: `<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.
|
|
30
|
+
**Addressed sentinel**: `<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.1</code></sub>`. Appended on its own line at the end of every reply the skill posts (both thread replies and the review-body summary); this is how re-runs know which threads and review-body comments are already handled. Dedupe matches the version-agnostic substring `babysit-pr:addressed v1` followed by a space (also matches legacy `<!-- babysit-pr:addressed v1 ... -->` sentinels). Grep `babysit-pr:addressed v1` for any version; add `core@3.7.1` for a specific one.
|
|
31
31
|
|
|
32
|
-
**Follow-up sentinel**: `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.
|
|
32
|
+
**Follow-up sentinel**: `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.1</code></sub>`. Attached to replies that defer an out-of-scope comment as a tracked follow-up (see the Scope subsection and the Defer verdict in step 6). Grep `babysit-pr:followup` across PR conversation JSON to enumerate deferred items. This sentinel is additive — the post-reply scripts still append the `addressed` sentinel at the end, so a deferred thread is correctly machine-classified as addressed (the skill _has_ handled it — by deferring). Human reviewers and future sweeps distinguish deferred from resolved by looking for the follow-up sentinel.
|
|
33
33
|
|
|
34
34
|
**Sentinel recency rules.** The script emits a per-thread `activityState` with three values:
|
|
35
35
|
|
|
@@ -280,7 +280,7 @@ Body templates (the script appends the `addressed` sentinel if missing):
|
|
|
280
280
|
- **Agree**: `Addressed in <commit-url>. <one-line what-changed>.`
|
|
281
281
|
- **Disagree**: `Leaving current behavior. <reasoning>.`
|
|
282
282
|
- **Already fixed**: `Already handled by <commit-url-or-file:line>. <brief pointer>.`
|
|
283
|
-
- **Defer**: `Out of scope for this PR; this looks like follow-up work rather than something introduced or required by this change. <one-line rationale or pointer if useful>.\n\n<sub>🤖 <code>babysit-pr:followup v1 core@3.7.
|
|
283
|
+
- **Defer**: `Out of scope for this PR; this looks like follow-up work rather than something introduced or required by this change. <one-line rationale or pointer if useful>.\n\n<sub>🤖 <code>babysit-pr:followup v1 core@3.7.1</code></sub>`
|
|
284
284
|
|
|
285
285
|
For Defer replies, include the follow-up sentinel on its own line as shown. The script will append the `addressed` sentinel after it on its own line, so the final body ends with the follow-up sentinel followed by a blank line followed by the `addressed` sentinel — `grep babysit-pr:followup` finds the deferral and `grep babysit-pr:addressed` still marks the thread handled for dedupe.
|
|
286
286
|
|
|
@@ -296,7 +296,7 @@ The PR-level summary should:
|
|
|
296
296
|
|
|
297
297
|
- Group by source. Use `## Review-body findings` for step-7 work and `## Conversation-tab comments` for step-6b work. Omit a section if its list is empty.
|
|
298
298
|
- Inside each section, group verdicts under **Agree / Disagree / Already fixed / Deferred (out of scope)** subheadings. Omit a subheading if its list is empty.
|
|
299
|
-
- Under **Deferred (out of scope)**, list each deferred item as a bullet, followed on its own line by `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.
|
|
299
|
+
- Under **Deferred (out of scope)**, list each deferred item as a bullet, followed on its own line by `<sub>🤖 <code>babysit-pr:followup v1 core@3.7.1</code></sub>` so grep catches them individually.
|
|
300
300
|
- Include the commit URL for fixes.
|
|
301
301
|
- End with a fenced fingerprint block listing every current fingerprint — addressed and deferred — one per line. Include both `reviewBodyComments[].fingerprint` (whole-body, one per automated review) and `activeIssueComments[].fingerprint` (per Conversation-tab comment). Future runs dedupe by matching these against `priorBabysitSentinels`.
|
|
302
302
|
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
# substituted at build time by embedPluginVersion.mts.
|
|
10
10
|
|
|
11
11
|
SENTINEL_PREFIX='babysit-pr:addressed v1 '
|
|
12
|
-
SENTINEL='<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.
|
|
12
|
+
SENTINEL='<sub>🤖 <code>babysit-pr:addressed v1 core@3.7.1</code></sub>'
|
|
13
13
|
|
|
14
14
|
# Bot author allowlist (JSON array literal). Used by unresolvedPrComments.sh
|
|
15
15
|
# as a fallback when GraphQL's `author.__typename == "Bot"` misses a GitHub
|
|
@@ -47,7 +47,7 @@ Script paths in this procedure are written as `scripts/...`, relative to this SK
|
|
|
47
47
|
6. Check for an existing PR with `gh pr view`.
|
|
48
48
|
|
|
49
49
|
PR title format: conventional-commit type + description, with no scope, plus the Linear ticket in parentheses at the end when one applies (e.g., `feat: add resume command (STAFF-123)`). This differs from the commit subject, which keeps its scope. Derive the ticket from the branch name, commit body, or session context; omit the parenthetical when no ticket applies.
|
|
50
|
-
- No PR: create with `gh pr create` using the PR title format above. Description = the PR body shape above, followed by the session footer line if known and the agent footer `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.
|
|
51
|
-
- PR exists: if the title doesn't match the format above, correct it with `gh pr edit --title`. Refresh the body via `gh pr edit --body` so (a) the new commit's changes are reflected in the prose while existing `## Summary`, `## Validation`, and `## Notes` sections are preserved unless clearly stale, (b) any known session footer line is appended if missing, never removing or rewriting existing `Agent session: ...` or `Agent session ID: ...` lines, and (c) any existing footer carrying the substring `commit-push-pr:created v1` is preserved verbatim, appending `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.
|
|
50
|
+
- No PR: create with `gh pr create` using the PR title format above. Description = the PR body shape above, followed by the session footer line if known and the agent footer `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.1</code></sub>` on its own line.
|
|
51
|
+
- PR exists: if the title doesn't match the format above, correct it with `gh pr edit --title`. Refresh the body via `gh pr edit --body` so (a) the new commit's changes are reflected in the prose while existing `## Summary`, `## Validation`, and `## Notes` sections are preserved unless clearly stale, (b) any known session footer line is appended if missing, never removing or rewriting existing `Agent session: ...` or `Agent session ID: ...` lines, and (c) any existing footer carrying the substring `commit-push-pr:created v1` is preserved verbatim, appending `<sub>🤖 <code>commit-push-pr:created v1 core@3.7.1</code></sub>` only if absent. Then report the URL.
|
|
52
52
|
|
|
53
53
|
7. End with one short text response: branch name and the full PR URL (e.g., `https://github.com/clipboardhealth/core-utils/pull/123`). Never use shorthand like `repo#123` — always output the complete URL.
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: flaky-test-bulk-debugger
|
|
3
|
+
description: Bulk-triage flaky test investigation tickets by clustering sightings, sharing artifacts, and delegating per-cluster diagnosis to flaky-test-debugger. Use when investigating many flaky test tickets, Linear issues tagged flaky-investigation, CI flake bursts, or repeated failures that may share a root cause.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Flaky Test Bulk Debugger
|
|
7
|
+
|
|
8
|
+
Use this skill to investigate a queue of flaky test sightings efficiently. Its purpose is orchestration: collect tickets, build a compact manifest, cluster related failures, fetch shared artifacts once, then run `flaky-test-debugger` per cluster.
|
|
9
|
+
|
|
10
|
+
Do not duplicate the detailed diagnosis workflow from `flaky-test-debugger`. When a cluster is ready for root-cause analysis, use `flaky-test-debugger` in plan mode unless the user explicitly asks to implement fixes.
|
|
11
|
+
|
|
12
|
+
## Rules
|
|
13
|
+
|
|
14
|
+
- Treat bulky data as external artifacts. Save full issue descriptions, CI logs, LLM reports, Playwright traces, screenshots, and telemetry extracts to files; keep only manifests and summaries in conversation context.
|
|
15
|
+
- Cluster before per-test debugging. Do not read each test file independently until shared setup, CI, auth, static asset, backend, or infrastructure failures have been ruled out.
|
|
16
|
+
- Prefer one implementation ticket per root cause, not one per sighting.
|
|
17
|
+
- Preserve the source ticket instructions for labels, status, linked issues, PR body requirements, and close-out comments.
|
|
18
|
+
- If parallel workers are available, use them per cluster with minimal context. If not, process clusters serially and keep a running manifest file.
|
|
19
|
+
- Keep the coordinator responsible for queue state, deduplication, and Linear/bookkeeping; keep cluster workers responsible for evidence and diagnosis.
|
|
20
|
+
|
|
21
|
+
## Phase 1: Build Queue
|
|
22
|
+
|
|
23
|
+
Fetch or receive all candidate tickets. For Linear, filter by the user-provided project/status/label, commonly `Todo` plus `flaky-investigation`.
|
|
24
|
+
|
|
25
|
+
For each ticket, extract one manifest row: `issueId`, `repo`, `framework`, `testFile`, `testName`, `runUrl`, `commit`, `branch`, `shard`, `timestamp`, `firstError`, `firstStackFrame`, `priorTickets`, and `sourceInstructions`. Write full raw ticket data to a local artifact file if it is large.
|
|
26
|
+
|
|
27
|
+
## Phase 2: Cluster
|
|
28
|
+
|
|
29
|
+
Normalize errors before grouping:
|
|
30
|
+
|
|
31
|
+
- Replace random IDs, emails, names, phone numbers, shift/facility IDs, UUIDs, ObjectIds, hashes, ports, and timestamps with placeholders.
|
|
32
|
+
- Collapse generated asset filenames to their logical shape, for example `main-<hash>.<hash>.js`.
|
|
33
|
+
- Keep HTTP status codes, helper names, route names, endpoint paths, and lifecycle stages intact.
|
|
34
|
+
|
|
35
|
+
Cluster by strongest shared evidence first:
|
|
36
|
+
|
|
37
|
+
1. Same CI run, commit, timestamp window, and setup/helper stack.
|
|
38
|
+
2. Same failure surface from `flaky-test-debugger`: CI/job setup, test setup/auth/data, app bootstrap/navigation, user action, backend request, post-success render, assertion/locator.
|
|
39
|
+
3. Same first project stack frame or setup helper.
|
|
40
|
+
4. Same endpoint/static asset/status-code pattern.
|
|
41
|
+
5. Same test file or prior related tickets.
|
|
42
|
+
|
|
43
|
+
Do not merge clusters only because they are in the same run. Same-run failures can still have different root causes.
|
|
44
|
+
|
|
45
|
+
## Phase 3: Fetch Artifacts
|
|
46
|
+
|
|
47
|
+
For each unique CI run, fetch the available reports once and store them under a predictable temporary path. For Playwright LLM reports in Clipboard repos, use the repository helper when available:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
bash scripts/fetch-llm-report.sh "<github-actions-url>"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Record artifact paths in the manifest. Workers should receive paths and the relevant manifest rows, not the full artifact contents.
|
|
54
|
+
|
|
55
|
+
## Phase 4: Diagnose Clusters
|
|
56
|
+
|
|
57
|
+
For each cluster, run `flaky-test-debugger` in plan mode. If the agent supports skills, explicitly load or invoke `flaky-test-debugger`; otherwise follow its workflow manually.
|
|
58
|
+
|
|
59
|
+
Use this worker prompt shape:
|
|
60
|
+
|
|
61
|
+
```text
|
|
62
|
+
Use flaky-test-debugger in plan mode.
|
|
63
|
+
Investigate this cluster as one possible shared root cause, not as isolated tickets.
|
|
64
|
+
|
|
65
|
+
Inputs: cluster summary, manifest rows, artifact/report paths, prior related tickets, and source ticket close-out instructions.
|
|
66
|
+
|
|
67
|
+
Return: final failure surface, evidence artifacts, root cause diagnosis, confidence score, whether one implementation ticket covers all issues, implementation plan or no-code disposition, and exact ticket action recommendation.
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
If confidence is below 5/5, the worker must include the observability or artifact changes needed to make the next occurrence diagnosable.
|
|
71
|
+
|
|
72
|
+
## Phase 5: Act
|
|
73
|
+
|
|
74
|
+
Merge worker outputs into a coordinator summary:
|
|
75
|
+
|
|
76
|
+
- Cluster name and issue IDs
|
|
77
|
+
- Shared vs independent root cause
|
|
78
|
+
- Evidence level and confidence
|
|
79
|
+
- Recommended implementation ticket count
|
|
80
|
+
- Tickets to mark duplicate/no-code/human-needed
|
|
81
|
+
- Remaining unknowns
|
|
82
|
+
|
|
83
|
+
Create or recommend implementation tickets only after clustering. Link all investigation tickets covered by the same root cause, plus prior related tickets from the source descriptions.
|
|
84
|
+
|
|
85
|
+
When updating investigation tickets, comment with the implementation ticket ID or no-code disposition, link related issues as requested, and move the ticket only after the comment/link exists.
|
|
86
|
+
|
|
87
|
+
## Output Format
|
|
88
|
+
|
|
89
|
+
End with a compact bulk triage report containing: queue size, cluster count, unique CI runs, each cluster's issue IDs, surface, recommendation, confidence, next ticket action, artifact index, and risks such as missing evidence or likely false merges.
|