agentic-qe 3.6.9 → 3.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/.validation/schemas/skill-eval.schema.json +11 -1
- package/.claude/skills/pr-review/SKILL.md +2 -2
- package/.claude/skills/qcsd-production-swarm/SKILL.md +2781 -0
- package/.claude/skills/qcsd-production-swarm/evals/qcsd-production-swarm.yaml +246 -0
- package/.claude/skills/qcsd-production-swarm/schemas/output.json +505 -0
- package/.claude/skills/qcsd-production-swarm/scripts/validate-config.json +25 -0
- package/.claude/skills/skills-manifest.json +5 -5
- package/package.json +1 -1
- package/scripts/benchmark-hnsw-loading.ts +480 -0
- package/scripts/benchmark-kg-assisted.ts +725 -0
- package/scripts/collect-production-telemetry.sh +291 -0
- package/scripts/detect-skill-conflicts.ts +347 -0
- package/scripts/eval-driven-workflow.ts +704 -0
- package/scripts/run-skill-eval.ts +210 -10
- package/scripts/score-skill-quality.ts +511 -0
- package/v3/CHANGELOG.md +19 -0
- package/v3/assets/skills/pr-review/SKILL.md +2 -2
- package/v3/dist/cli/bundle.js +1064 -363
- package/v3/dist/cli/commands/hooks.d.ts.map +1 -1
- package/v3/dist/cli/commands/hooks.js +143 -2
- package/v3/dist/cli/commands/hooks.js.map +1 -1
- package/v3/dist/cli/commands/test.d.ts.map +1 -1
- package/v3/dist/cli/commands/test.js +6 -0
- package/v3/dist/cli/commands/test.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.js +58 -6
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/mocha-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/mocha-generator.js +79 -7
- package/v3/dist/domains/test-generation/generators/mocha-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/pytest-generator.d.ts +4 -0
- package/v3/dist/domains/test-generation/generators/pytest-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/pytest-generator.js +77 -10
- package/v3/dist/domains/test-generation/generators/pytest-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/interfaces/test-generator.interface.d.ts +21 -0
- package/v3/dist/domains/test-generation/interfaces/test-generator.interface.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/interfaces.d.ts +21 -0
- package/v3/dist/domains/test-generation/interfaces.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/services/test-generator.d.ts +22 -0
- package/v3/dist/domains/test-generation/services/test-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/services/test-generator.js +163 -3
- package/v3/dist/domains/test-generation/services/test-generator.js.map +1 -1
- package/v3/dist/kernel/unified-memory-hnsw.d.ts +29 -0
- package/v3/dist/kernel/unified-memory-hnsw.d.ts.map +1 -1
- package/v3/dist/kernel/unified-memory-hnsw.js +136 -0
- package/v3/dist/kernel/unified-memory-hnsw.js.map +1 -1
- package/v3/dist/kernel/unified-memory.d.ts +2 -2
- package/v3/dist/kernel/unified-memory.d.ts.map +1 -1
- package/v3/dist/kernel/unified-memory.js +7 -9
- package/v3/dist/kernel/unified-memory.js.map +1 -1
- package/v3/dist/learning/qe-hooks.d.ts.map +1 -1
- package/v3/dist/learning/qe-hooks.js +34 -3
- package/v3/dist/learning/qe-hooks.js.map +1 -1
- package/v3/dist/mcp/bundle.js +857 -329
- package/v3/package.json +1 -1
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
#
|
|
3
|
+
# QCSD Production Telemetry Collection Script
|
|
4
|
+
# Collects DORA-approximation metrics from GitHub API for npm package releases.
|
|
5
|
+
#
|
|
6
|
+
# Usage: ./scripts/collect-production-telemetry.sh [OPTIONS]
|
|
7
|
+
#
|
|
8
|
+
# Options:
|
|
9
|
+
# --release-id <version> Release version (default: latest tag)
|
|
10
|
+
# --lookback <days> Days of history to analyze (default: 30)
|
|
11
|
+
# --output <path> Output directory (default: docs/telemetry/production)
|
|
12
|
+
# --trigger-type <type> Trigger context: post-deploy|scheduled|manual (default: manual)
|
|
13
|
+
#
|
|
14
|
+
# Environment:
|
|
15
|
+
# GITHUB_REPOSITORY Owner/repo (default: auto-detect from git remote)
|
|
16
|
+
# GH_TOKEN / GITHUB_TOKEN GitHub authentication (gh CLI must be authenticated)
|
|
17
|
+
#
|
|
18
|
+
# Designed to run in GitHub Actions or locally with gh CLI.
|
|
19
|
+
# Individual API failures produce null fields, not script crash.
|
|
20
|
+
|
|
21
|
+
# Note: no set -e — individual commands use || fallbacks for error tolerance
|
|
22
|
+
|
|
23
|
+
# ─── Colors ───────────────────────────────────────────────────────────────────
|
|
24
|
+
RED='\033[0;31m'
|
|
25
|
+
GREEN='\033[0;32m'
|
|
26
|
+
YELLOW='\033[1;33m'
|
|
27
|
+
BLUE='\033[0;34m'
|
|
28
|
+
NC='\033[0m'
|
|
29
|
+
|
|
30
|
+
# ─── Defaults ─────────────────────────────────────────────────────────────────
|
|
31
|
+
RELEASE_ID="auto"
|
|
32
|
+
LOOKBACK_DAYS=30
|
|
33
|
+
OUTPUT_DIR="docs/telemetry/production"
|
|
34
|
+
TRIGGER_TYPE="manual"
|
|
35
|
+
REPO="${GITHUB_REPOSITORY:-""}"
|
|
36
|
+
|
|
37
|
+
# ─── Parse Arguments ──────────────────────────────────────────────────────────
|
|
38
|
+
while [[ $# -gt 0 ]]; do
|
|
39
|
+
case $1 in
|
|
40
|
+
--release-id) RELEASE_ID="$2"; shift 2 ;;
|
|
41
|
+
--lookback) LOOKBACK_DAYS="$2"; shift 2 ;;
|
|
42
|
+
--output) OUTPUT_DIR="$2"; shift 2 ;;
|
|
43
|
+
--trigger-type) TRIGGER_TYPE="$2"; shift 2 ;;
|
|
44
|
+
*) echo -e "${RED}Unknown option: $1${NC}"; exit 1 ;;
|
|
45
|
+
esac
|
|
46
|
+
done
|
|
47
|
+
|
|
48
|
+
# ─── Auto-detect repo ────────────────────────────────────────────────────────
|
|
49
|
+
if [ -z "$REPO" ]; then
|
|
50
|
+
REPO=$(gh repo view --json nameWithOwner -q '.nameWithOwner' 2>/dev/null || echo "")
|
|
51
|
+
if [ -z "$REPO" ]; then
|
|
52
|
+
echo -e "${RED}ERROR: Cannot detect repository. Set GITHUB_REPOSITORY or run from a git repo with gh auth.${NC}"
|
|
53
|
+
exit 1
|
|
54
|
+
fi
|
|
55
|
+
fi
|
|
56
|
+
|
|
57
|
+
# ─── Auto-detect release ID ──────────────────────────────────────────────────
|
|
58
|
+
if [ "$RELEASE_ID" = "auto" ]; then
|
|
59
|
+
RELEASE_ID=$(gh release view --repo "$REPO" --json tagName -q '.tagName' 2>/dev/null || echo "unknown")
|
|
60
|
+
fi
|
|
61
|
+
|
|
62
|
+
# ─── Calculate date boundaries ────────────────────────────────────────────────
|
|
63
|
+
# macOS and Linux compatible date calculation
|
|
64
|
+
if date -v-1d > /dev/null 2>&1; then
|
|
65
|
+
SINCE_DATE=$(date -u -v-${LOOKBACK_DAYS}d +%Y-%m-%dT%H:%M:%SZ)
|
|
66
|
+
else
|
|
67
|
+
SINCE_DATE=$(date -u -d "${LOOKBACK_DAYS} days ago" +%Y-%m-%dT%H:%M:%SZ)
|
|
68
|
+
fi
|
|
69
|
+
COLLECTION_TS=$(date -u +%Y-%m-%dT%H:%M:%SZ)
|
|
70
|
+
DATE_STAMP=$(date -u +%Y%m%d)
|
|
71
|
+
|
|
72
|
+
echo "========================================"
|
|
73
|
+
echo -e "${BLUE}QCSD Production Telemetry Collection${NC}"
|
|
74
|
+
echo "========================================"
|
|
75
|
+
echo "Repository: $REPO"
|
|
76
|
+
echo "Release: $RELEASE_ID"
|
|
77
|
+
echo "Lookback: ${LOOKBACK_DAYS} days (since $SINCE_DATE)"
|
|
78
|
+
echo "Output: $OUTPUT_DIR"
|
|
79
|
+
echo "Trigger: $TRIGGER_TYPE"
|
|
80
|
+
echo "========================================"
|
|
81
|
+
echo ""
|
|
82
|
+
|
|
83
|
+
# ─── Helper: safe API call ────────────────────────────────────────────────────
|
|
84
|
+
safe_api() {
|
|
85
|
+
local result
|
|
86
|
+
if result=$("$@" 2>/dev/null); then
|
|
87
|
+
echo "$result"
|
|
88
|
+
else
|
|
89
|
+
echo "null"
|
|
90
|
+
fi
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
# ─── 1. Deployment Frequency ─────────────────────────────────────────────────
|
|
94
|
+
echo -e "${BLUE}[1/5]${NC} Collecting deployment frequency..."
|
|
95
|
+
|
|
96
|
+
RELEASE_DATA=$(safe_api gh api "repos/${REPO}/releases?per_page=100" --paginate --jq "[.[] | select(.published_at >= \"${SINCE_DATE}\") | {tag: .tag_name, published: .published_at, created: .created_at}]")
|
|
97
|
+
|
|
98
|
+
if [ "$RELEASE_DATA" != "null" ] && [ -n "$RELEASE_DATA" ]; then
|
|
99
|
+
RELEASE_COUNT=$(echo "$RELEASE_DATA" | jq 'length')
|
|
100
|
+
FREQ_PER_WEEK=$(echo "scale=2; $RELEASE_COUNT * 7 / $LOOKBACK_DAYS" | bc 2>/dev/null || echo "null")
|
|
101
|
+
echo -e " ${GREEN}Found $RELEASE_COUNT releases in ${LOOKBACK_DAYS}d ($FREQ_PER_WEEK/week)${NC}"
|
|
102
|
+
else
|
|
103
|
+
RELEASE_COUNT=0
|
|
104
|
+
FREQ_PER_WEEK="null"
|
|
105
|
+
RELEASE_DATA="[]"
|
|
106
|
+
echo -e " ${YELLOW}No release data available${NC}"
|
|
107
|
+
fi
|
|
108
|
+
|
|
109
|
+
# ─── 2. Lead Time for Changes ────────────────────────────────────────────────
|
|
110
|
+
echo -e "${BLUE}[2/5]${NC} Computing lead time..."
|
|
111
|
+
|
|
112
|
+
if [ "$RELEASE_DATA" != "[]" ] && [ "$RELEASE_DATA" != "null" ]; then
|
|
113
|
+
LEAD_TIMES=$(echo "$RELEASE_DATA" | jq '[.[] | {
|
|
114
|
+
tag: .tag,
|
|
115
|
+
created: .created,
|
|
116
|
+
published: .published,
|
|
117
|
+
lead_hours: ((((.published | fromdateiso8601) - (.created | fromdateiso8601)) / 3600) | round)
|
|
118
|
+
}]')
|
|
119
|
+
MEDIAN_LEAD=$(echo "$LEAD_TIMES" | jq '[.[].lead_hours] | sort | if length > 0 then .[length/2 | floor] else null end')
|
|
120
|
+
echo -e " ${GREEN}Median lead time: ${MEDIAN_LEAD}h across $RELEASE_COUNT releases${NC}"
|
|
121
|
+
else
|
|
122
|
+
LEAD_TIMES="[]"
|
|
123
|
+
MEDIAN_LEAD="null"
|
|
124
|
+
echo -e " ${YELLOW}No lead time data (no releases)${NC}"
|
|
125
|
+
fi
|
|
126
|
+
|
|
127
|
+
# ─── 3. Change Failure Rate ──────────────────────────────────────────────────
|
|
128
|
+
echo -e "${BLUE}[3/5]${NC} Computing change failure rate..."
|
|
129
|
+
|
|
130
|
+
CFR_DATA=$(safe_api gh api "repos/${REPO}/actions/workflows/npm-publish.yml/runs?per_page=100" --jq "{
|
|
131
|
+
total: [.workflow_runs[] | select(.created_at >= \"${SINCE_DATE}\")] | length,
|
|
132
|
+
failed: [.workflow_runs[] | select(.created_at >= \"${SINCE_DATE}\" and .conclusion == \"failure\")] | length
|
|
133
|
+
}")
|
|
134
|
+
|
|
135
|
+
if [ "$CFR_DATA" != "null" ] && [ -n "$CFR_DATA" ]; then
|
|
136
|
+
CFR_TOTAL=$(echo "$CFR_DATA" | jq '.total')
|
|
137
|
+
CFR_FAILED=$(echo "$CFR_DATA" | jq '.failed')
|
|
138
|
+
if [ "$CFR_TOTAL" -gt 0 ] 2>/dev/null; then
|
|
139
|
+
CFR_RATE=$(echo "scale=1; $CFR_FAILED * 100 / $CFR_TOTAL" | bc 2>/dev/null || echo "null")
|
|
140
|
+
else
|
|
141
|
+
CFR_RATE=0
|
|
142
|
+
fi
|
|
143
|
+
echo -e " ${GREEN}${CFR_FAILED}/${CFR_TOTAL} failed (${CFR_RATE}%)${NC}"
|
|
144
|
+
else
|
|
145
|
+
CFR_TOTAL=0
|
|
146
|
+
CFR_FAILED=0
|
|
147
|
+
CFR_RATE="null"
|
|
148
|
+
echo -e " ${YELLOW}No workflow run data available${NC}"
|
|
149
|
+
fi
|
|
150
|
+
|
|
151
|
+
# ─── 4. MTTR Approximation ───────────────────────────────────────────────────
|
|
152
|
+
echo -e "${BLUE}[4/5]${NC} Computing MTTR approximation (bug issue lifecycle)..."
|
|
153
|
+
|
|
154
|
+
MTTR_DATA=$(safe_api gh api "repos/${REPO}/issues?labels=bug&state=closed&since=${SINCE_DATE}&per_page=100" --paginate --jq '[.[] | select(.pull_request == null) | {
|
|
155
|
+
number: .number,
|
|
156
|
+
created: .created_at,
|
|
157
|
+
closed: .closed_at,
|
|
158
|
+
hours: ((((.closed_at | fromdateiso8601) - (.created_at | fromdateiso8601)) / 3600) | round)
|
|
159
|
+
}]')
|
|
160
|
+
|
|
161
|
+
if [ "$MTTR_DATA" != "null" ] && [ -n "$MTTR_DATA" ]; then
|
|
162
|
+
BUGS_CLOSED=$(echo "$MTTR_DATA" | jq 'length')
|
|
163
|
+
MTTR_MEDIAN=$(echo "$MTTR_DATA" | jq '[.[].hours] | sort | if length > 0 then .[length/2 | floor] else null end')
|
|
164
|
+
echo -e " ${GREEN}${BUGS_CLOSED} bugs closed, median ${MTTR_MEDIAN}h${NC}"
|
|
165
|
+
else
|
|
166
|
+
BUGS_CLOSED=0
|
|
167
|
+
MTTR_MEDIAN="null"
|
|
168
|
+
MTTR_DATA="[]"
|
|
169
|
+
echo -e " ${YELLOW}No closed bug data available${NC}"
|
|
170
|
+
fi
|
|
171
|
+
|
|
172
|
+
# ─── 5. Open Issues Snapshot ─────────────────────────────────────────────────
|
|
173
|
+
echo -e "${BLUE}[5/5]${NC} Collecting open issues snapshot..."
|
|
174
|
+
|
|
175
|
+
OPEN_BUGS=$(safe_api gh issue list --repo "${REPO}" --label bug --state open --json number --jq 'length')
|
|
176
|
+
OPEN_BUGS=${OPEN_BUGS:-0}
|
|
177
|
+
echo -e " ${GREEN}${OPEN_BUGS} open bugs${NC}"
|
|
178
|
+
|
|
179
|
+
# ─── Build JSON Output ───────────────────────────────────────────────────────
|
|
180
|
+
echo ""
|
|
181
|
+
echo -e "${BLUE}Building telemetry JSON...${NC}"
|
|
182
|
+
|
|
183
|
+
# Get current release context
|
|
184
|
+
CURRENT_RELEASE=$(safe_api gh release view "${RELEASE_ID}" --repo "${REPO}" --json tagName,publishedAt,createdAt --jq '{tag: .tagName, publishedAt: .publishedAt, createdAt: .createdAt}')
|
|
185
|
+
if [ "$CURRENT_RELEASE" = "null" ]; then
|
|
186
|
+
CURRENT_RELEASE='{"tag": "'"$RELEASE_ID"'", "publishedAt": null, "createdAt": null}'
|
|
187
|
+
fi
|
|
188
|
+
|
|
189
|
+
# Recent releases list (last 5)
|
|
190
|
+
RECENT_RELEASES=$(safe_api gh api "repos/${REPO}/releases?per_page=5" --jq '[.[] | {tag: .tag_name, publishedAt: .published_at}]')
|
|
191
|
+
if [ "$RECENT_RELEASES" = "null" ]; then
|
|
192
|
+
RECENT_RELEASES="[]"
|
|
193
|
+
fi
|
|
194
|
+
|
|
195
|
+
# Assemble final JSON
|
|
196
|
+
OUTPUT_JSON=$(jq -n \
|
|
197
|
+
--arg ts "$COLLECTION_TS" \
|
|
198
|
+
--arg rid "$RELEASE_ID" \
|
|
199
|
+
--argjson lookback "$LOOKBACK_DAYS" \
|
|
200
|
+
--arg trigger "$TRIGGER_TYPE" \
|
|
201
|
+
--arg repo "$REPO" \
|
|
202
|
+
--argjson relCount "$RELEASE_COUNT" \
|
|
203
|
+
--argjson freqWeek "${FREQ_PER_WEEK:-null}" \
|
|
204
|
+
--argjson leadTimes "$LEAD_TIMES" \
|
|
205
|
+
--argjson medianLead "${MEDIAN_LEAD:-null}" \
|
|
206
|
+
--argjson cfrTotal "$CFR_TOTAL" \
|
|
207
|
+
--argjson cfrFailed "$CFR_FAILED" \
|
|
208
|
+
--argjson cfrRate "${CFR_RATE:-null}" \
|
|
209
|
+
--argjson bugsClosed "$BUGS_CLOSED" \
|
|
210
|
+
--argjson mttrMedian "${MTTR_MEDIAN:-null}" \
|
|
211
|
+
--argjson mttrData "$MTTR_DATA" \
|
|
212
|
+
--argjson openBugs "${OPEN_BUGS:-0}" \
|
|
213
|
+
--argjson currentRelease "$CURRENT_RELEASE" \
|
|
214
|
+
--argjson recentReleases "$RECENT_RELEASES" \
|
|
215
|
+
'{
|
|
216
|
+
collectionTimestamp: $ts,
|
|
217
|
+
releaseId: $rid,
|
|
218
|
+
lookbackDays: $lookback,
|
|
219
|
+
source: "github-api",
|
|
220
|
+
triggerType: $trigger,
|
|
221
|
+
repository: $repo,
|
|
222
|
+
dora: {
|
|
223
|
+
deploymentFrequency: {
|
|
224
|
+
value: $freqWeek,
|
|
225
|
+
unit: "per_week",
|
|
226
|
+
rawCount: $relCount,
|
|
227
|
+
period: "\($lookback)d"
|
|
228
|
+
},
|
|
229
|
+
leadTime: {
|
|
230
|
+
value: $medianLead,
|
|
231
|
+
unit: "hours",
|
|
232
|
+
measurements: $leadTimes
|
|
233
|
+
},
|
|
234
|
+
changeFailureRate: {
|
|
235
|
+
value: $cfrRate,
|
|
236
|
+
totalRuns: $cfrTotal,
|
|
237
|
+
failedRuns: $cfrFailed,
|
|
238
|
+
period: "\($lookback)d"
|
|
239
|
+
},
|
|
240
|
+
mttr: {
|
|
241
|
+
value: $mttrMedian,
|
|
242
|
+
unit: "hours",
|
|
243
|
+
bugsClosed: $bugsClosed,
|
|
244
|
+
medianHours: $mttrMedian,
|
|
245
|
+
details: $mttrData,
|
|
246
|
+
period: "\($lookback)d"
|
|
247
|
+
}
|
|
248
|
+
},
|
|
249
|
+
releaseContext: {
|
|
250
|
+
currentRelease: $currentRelease,
|
|
251
|
+
recentReleases: $recentReleases
|
|
252
|
+
},
|
|
253
|
+
issues: {
|
|
254
|
+
openBugs: $openBugs,
|
|
255
|
+
closedBugsInPeriod: $bugsClosed
|
|
256
|
+
},
|
|
257
|
+
limitations: [
|
|
258
|
+
"DORA metrics are approximated from GitHub API, not from APM/observability tooling",
|
|
259
|
+
"Lead time measures tag-to-publish, not commit-to-production",
|
|
260
|
+
"Change failure rate uses workflow failures as proxy for production incidents",
|
|
261
|
+
"MTTR uses bug issue lifecycle as proxy for incident recovery time"
|
|
262
|
+
]
|
|
263
|
+
}')
|
|
264
|
+
|
|
265
|
+
# ─── Write Output ─────────────────────────────────────────────────────────────
|
|
266
|
+
mkdir -p "$OUTPUT_DIR"
|
|
267
|
+
|
|
268
|
+
OUTFILE="${OUTPUT_DIR}/telemetry-${RELEASE_ID}-${DATE_STAMP}.json"
|
|
269
|
+
LATEST="${OUTPUT_DIR}/latest.json"
|
|
270
|
+
|
|
271
|
+
echo "$OUTPUT_JSON" > "$OUTFILE"
|
|
272
|
+
cp "$OUTFILE" "$LATEST"
|
|
273
|
+
|
|
274
|
+
echo ""
|
|
275
|
+
echo "========================================"
|
|
276
|
+
echo -e "${GREEN}Telemetry collection complete${NC}"
|
|
277
|
+
echo "========================================"
|
|
278
|
+
echo "Output: $OUTFILE"
|
|
279
|
+
echo "Latest: $LATEST"
|
|
280
|
+
echo ""
|
|
281
|
+
|
|
282
|
+
# ─── Summary ──────────────────────────────────────────────────────────────────
|
|
283
|
+
echo -e "${BLUE}DORA Summary:${NC}"
|
|
284
|
+
echo " Deployment Frequency: ${FREQ_PER_WEEK:-?}/week ($RELEASE_COUNT releases in ${LOOKBACK_DAYS}d)"
|
|
285
|
+
echo " Lead Time: ${MEDIAN_LEAD:-?}h median"
|
|
286
|
+
echo " Change Failure Rate: ${CFR_RATE:-?}% ($CFR_FAILED/$CFR_TOTAL)"
|
|
287
|
+
echo " MTTR: ${MTTR_MEDIAN:-?}h median ($BUGS_CLOSED bugs)"
|
|
288
|
+
echo " Open Bugs: $OPEN_BUGS"
|
|
289
|
+
echo ""
|
|
290
|
+
echo -e "${BLUE}Invoke production swarm:${NC}"
|
|
291
|
+
echo " /qcsd-production-swarm TELEMETRY_DATA=$LATEST RELEASE_ID=$RELEASE_ID"
|
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
#!/usr/bin/env npx tsx
|
|
2
|
+
/**
|
|
3
|
+
* Skill Activation Conflict Detector
|
|
4
|
+
* Detects overlapping skill descriptions that may cause mis-routing.
|
|
5
|
+
*
|
|
6
|
+
* Uses semantic similarity (all-MiniLM-L6-v2 via @xenova/transformers)
|
|
7
|
+
* with TF-IDF n-gram fallback when ML model is unavailable.
|
|
8
|
+
*
|
|
9
|
+
* Usage:
|
|
10
|
+
* npx tsx scripts/detect-skill-conflicts.ts
|
|
11
|
+
* npx tsx scripts/detect-skill-conflicts.ts --threshold 0.6
|
|
12
|
+
* npx tsx scripts/detect-skill-conflicts.ts --json
|
|
13
|
+
* npx tsx scripts/detect-skill-conflicts.ts --top 20
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { readFileSync, writeFileSync, existsSync, readdirSync, statSync } from 'fs';
|
|
17
|
+
import { join, dirname } from 'path';
|
|
18
|
+
|
|
19
|
+
// ============================================================================
|
|
20
|
+
// TYPES
|
|
21
|
+
// ============================================================================
|
|
22
|
+
|
|
23
|
+
interface SkillInfo {
|
|
24
|
+
name: string;
|
|
25
|
+
description: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
interface ConflictPair {
|
|
29
|
+
skillA: string;
|
|
30
|
+
skillB: string;
|
|
31
|
+
similarity: number;
|
|
32
|
+
descA: string;
|
|
33
|
+
descB: string;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ============================================================================
|
|
37
|
+
// CONSTANTS
|
|
38
|
+
// ============================================================================
|
|
39
|
+
|
|
40
|
+
const SKILLS_DIR = '.claude/skills';
|
|
41
|
+
const PLATFORM_PREFIXES = ['v3-', 'flow-nexus-', 'agentdb-', 'reasoningbank-', 'swarm-'];
|
|
42
|
+
const CRITICAL_THRESHOLD = 0.85;
|
|
43
|
+
const WARNING_THRESHOLD = 0.70;
|
|
44
|
+
const INFO_THRESHOLD = 0.55;
|
|
45
|
+
|
|
46
|
+
// ============================================================================
|
|
47
|
+
// FRONTMATTER PARSER (from update-skill-manifest.ts)
|
|
48
|
+
// ============================================================================
|
|
49
|
+
|
|
50
|
+
function parseYamlFrontmatter(content: string): Record<string, unknown> {
|
|
51
|
+
const match = content.match(/^---\n([\s\S]*?)\n---/);
|
|
52
|
+
if (!match) return {};
|
|
53
|
+
|
|
54
|
+
const result: Record<string, unknown> = {};
|
|
55
|
+
for (const line of match[1].split('\n')) {
|
|
56
|
+
if (!line.trim() || line.trim().startsWith('#')) continue;
|
|
57
|
+
const indent = line.search(/\S/);
|
|
58
|
+
if (indent > 0) continue; // skip nested
|
|
59
|
+
const kv = line.trim().match(/^(\w+):\s*(.+)$/);
|
|
60
|
+
if (kv) {
|
|
61
|
+
let val: unknown = kv[2].trim();
|
|
62
|
+
if ((val as string).startsWith('"') && (val as string).endsWith('"')) {
|
|
63
|
+
val = (val as string).slice(1, -1);
|
|
64
|
+
} else if ((val as string).startsWith("'") && (val as string).endsWith("'")) {
|
|
65
|
+
val = (val as string).slice(1, -1);
|
|
66
|
+
}
|
|
67
|
+
result[kv[1]] = val;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
return result;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// ============================================================================
|
|
74
|
+
// SKILL DISCOVERY
|
|
75
|
+
// ============================================================================
|
|
76
|
+
|
|
77
|
+
function getProjectRoot(): string {
|
|
78
|
+
let dir = process.cwd();
|
|
79
|
+
while (dir !== '/') {
|
|
80
|
+
if (existsSync(join(dir, 'package.json'))) return dir;
|
|
81
|
+
dir = dirname(dir);
|
|
82
|
+
}
|
|
83
|
+
return process.cwd();
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function discoverSkills(projectRoot: string): SkillInfo[] {
|
|
87
|
+
const skillsPath = join(projectRoot, SKILLS_DIR);
|
|
88
|
+
if (!existsSync(skillsPath)) {
|
|
89
|
+
console.error(`Skills directory not found: ${skillsPath}`);
|
|
90
|
+
process.exit(1);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const skills: SkillInfo[] = [];
|
|
94
|
+
const entries = readdirSync(skillsPath);
|
|
95
|
+
|
|
96
|
+
for (const entry of entries) {
|
|
97
|
+
const fullPath = join(skillsPath, entry);
|
|
98
|
+
if (entry.startsWith('.') || !statSync(fullPath).isDirectory()) continue;
|
|
99
|
+
|
|
100
|
+
// Exclude platform skills
|
|
101
|
+
if (PLATFORM_PREFIXES.some(p => entry.startsWith(p))) continue;
|
|
102
|
+
|
|
103
|
+
// Find SKILL.md
|
|
104
|
+
const skillMd = existsSync(join(fullPath, 'SKILL.md'))
|
|
105
|
+
? join(fullPath, 'SKILL.md')
|
|
106
|
+
: existsSync(join(fullPath, 'skill.md'))
|
|
107
|
+
? join(fullPath, 'skill.md')
|
|
108
|
+
: null;
|
|
109
|
+
if (!skillMd) continue;
|
|
110
|
+
|
|
111
|
+
const content = readFileSync(skillMd, 'utf-8');
|
|
112
|
+
const frontmatter = parseYamlFrontmatter(content);
|
|
113
|
+
|
|
114
|
+
// Get description from frontmatter or first paragraph after frontmatter
|
|
115
|
+
let description = (frontmatter.description as string) || '';
|
|
116
|
+
if (!description) {
|
|
117
|
+
const bodyMatch = content.match(/^---[\s\S]*?---\s*\n+(?:#[^\n]*\n+)?([\s\S]*?)(?:\n\n|\n#)/);
|
|
118
|
+
if (bodyMatch) {
|
|
119
|
+
description = bodyMatch[1].trim().slice(0, 300);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
if (description) {
|
|
124
|
+
skills.push({ name: entry, description });
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return skills.sort((a, b) => a.name.localeCompare(b.name));
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// ============================================================================
|
|
132
|
+
// TF-IDF N-GRAM FALLBACK SIMILARITY
|
|
133
|
+
// ============================================================================
|
|
134
|
+
|
|
135
|
+
function tokenize(text: string): string[] {
|
|
136
|
+
return text.toLowerCase().replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(t => t.length > 2);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function ngramSimilarity(a: string, b: string): number {
|
|
140
|
+
const tokensA = new Set(tokenize(a));
|
|
141
|
+
const tokensB = new Set(tokenize(b));
|
|
142
|
+
if (tokensA.size === 0 || tokensB.size === 0) return 0;
|
|
143
|
+
|
|
144
|
+
let intersection = 0;
|
|
145
|
+
for (const t of tokensA) {
|
|
146
|
+
if (tokensB.has(t)) intersection++;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Jaccard similarity
|
|
150
|
+
const union = tokensA.size + tokensB.size - intersection;
|
|
151
|
+
return union === 0 ? 0 : intersection / union;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// ============================================================================
|
|
155
|
+
// EMBEDDING-BASED SIMILARITY
|
|
156
|
+
// ============================================================================
|
|
157
|
+
|
|
158
|
+
async function computeEmbeddingSimilarities(
|
|
159
|
+
skills: SkillInfo[]
|
|
160
|
+
): Promise<ConflictPair[] | null> {
|
|
161
|
+
try {
|
|
162
|
+
const { computeBatchEmbeddings } = await import('../v3/src/learning/real-embeddings.js');
|
|
163
|
+
const { cosineSimilarity } = await import('../v3/src/shared/utils/vector-math.js');
|
|
164
|
+
|
|
165
|
+
console.log(`Computing embeddings for ${skills.length} skill descriptions...`);
|
|
166
|
+
const descriptions = skills.map(s => s.description);
|
|
167
|
+
const embeddings = await computeBatchEmbeddings(descriptions);
|
|
168
|
+
|
|
169
|
+
if (embeddings.length !== skills.length) {
|
|
170
|
+
console.warn(`Embedding count mismatch: got ${embeddings.length}, expected ${skills.length}`);
|
|
171
|
+
return null;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const pairs: ConflictPair[] = [];
|
|
175
|
+
for (let i = 0; i < skills.length; i++) {
|
|
176
|
+
for (let j = i + 1; j < skills.length; j++) {
|
|
177
|
+
const sim = cosineSimilarity(embeddings[i], embeddings[j]);
|
|
178
|
+
pairs.push({
|
|
179
|
+
skillA: skills[i].name,
|
|
180
|
+
skillB: skills[j].name,
|
|
181
|
+
similarity: sim,
|
|
182
|
+
descA: skills[i].description,
|
|
183
|
+
descB: skills[j].description,
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return pairs;
|
|
189
|
+
} catch (err) {
|
|
190
|
+
console.warn(`[Fallback] Transformer embeddings unavailable: ${(err as Error).message}`);
|
|
191
|
+
console.warn('[Fallback] Using token-overlap similarity instead.\n');
|
|
192
|
+
return null;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function computeTokenSimilarities(skills: SkillInfo[]): ConflictPair[] {
|
|
197
|
+
const pairs: ConflictPair[] = [];
|
|
198
|
+
for (let i = 0; i < skills.length; i++) {
|
|
199
|
+
for (let j = i + 1; j < skills.length; j++) {
|
|
200
|
+
const sim = ngramSimilarity(skills[i].description, skills[j].description);
|
|
201
|
+
pairs.push({
|
|
202
|
+
skillA: skills[i].name,
|
|
203
|
+
skillB: skills[j].name,
|
|
204
|
+
similarity: sim,
|
|
205
|
+
descA: skills[i].description,
|
|
206
|
+
descB: skills[j].description,
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
return pairs;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// ============================================================================
|
|
214
|
+
// OUTPUT
|
|
215
|
+
// ============================================================================
|
|
216
|
+
|
|
217
|
+
function truncateDesc(desc: string, maxLen = 60): string {
|
|
218
|
+
if (desc.length <= maxLen) return desc;
|
|
219
|
+
return desc.slice(0, maxLen - 3) + '...';
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function formatResults(
|
|
223
|
+
pairs: ConflictPair[],
|
|
224
|
+
skillCount: number,
|
|
225
|
+
thresholds: { critical: number; warning: number; info: number },
|
|
226
|
+
topN?: number,
|
|
227
|
+
): { output: string; critical: ConflictPair[]; warnings: ConflictPair[]; info: ConflictPair[] } {
|
|
228
|
+
// Sort by similarity descending
|
|
229
|
+
pairs.sort((a, b) => b.similarity - a.similarity);
|
|
230
|
+
|
|
231
|
+
const critical = pairs.filter(p => p.similarity >= thresholds.critical);
|
|
232
|
+
const warnings = pairs.filter(p => p.similarity >= thresholds.warning && p.similarity < thresholds.critical);
|
|
233
|
+
const info = pairs.filter(p => p.similarity >= thresholds.info && p.similarity < thresholds.warning);
|
|
234
|
+
|
|
235
|
+
const totalPairs = (skillCount * (skillCount - 1)) / 2;
|
|
236
|
+
const sep = '='.repeat(64);
|
|
237
|
+
|
|
238
|
+
let out = `\n${sep}\nSKILL ACTIVATION CONFLICT DETECTOR\n${skillCount} AQE skills analyzed | ${totalPairs} pairs compared\n${sep}\n`;
|
|
239
|
+
|
|
240
|
+
let idx = 1;
|
|
241
|
+
|
|
242
|
+
const formatSection = (label: string, items: ConflictPair[], limit?: number): string => {
|
|
243
|
+
if (items.length === 0) return `\n${label}:\n (none)\n`;
|
|
244
|
+
const shown = limit ? items.slice(0, limit) : items;
|
|
245
|
+
let s = `\n${label}:\n`;
|
|
246
|
+
for (const p of shown) {
|
|
247
|
+
s += ` ${String(idx++).padStart(3)}. ${p.skillA} <-> ${p.skillB}${' '.repeat(Math.max(1, 50 - p.skillA.length - p.skillB.length))}${p.similarity.toFixed(3)}\n`;
|
|
248
|
+
s += ` a: "${truncateDesc(p.descA)}"\n`;
|
|
249
|
+
s += ` b: "${truncateDesc(p.descB)}"\n`;
|
|
250
|
+
}
|
|
251
|
+
if (limit && items.length > limit) {
|
|
252
|
+
s += ` ... and ${items.length - limit} more\n`;
|
|
253
|
+
}
|
|
254
|
+
return s;
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
const limit = topN;
|
|
258
|
+
out += formatSection(`CRITICAL CONFLICTS (similarity >= ${thresholds.critical})`, critical, limit);
|
|
259
|
+
out += formatSection(`WARNING CONFLICTS (similarity >= ${thresholds.warning})`, warnings, limit);
|
|
260
|
+
out += formatSection(`INFO (similarity >= ${thresholds.info})`, info, limit);
|
|
261
|
+
|
|
262
|
+
out += `\n${sep}\nSUMMARY: ${critical.length} critical | ${warnings.length} warnings | ${info.length} info\n${sep}\n`;
|
|
263
|
+
|
|
264
|
+
return { output: out, critical, warnings, info };
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// ============================================================================
|
|
268
|
+
// MAIN
|
|
269
|
+
// ============================================================================
|
|
270
|
+
|
|
271
|
+
async function main() {
|
|
272
|
+
const args = process.argv.slice(2);
|
|
273
|
+
|
|
274
|
+
let customThreshold: number | undefined;
|
|
275
|
+
let jsonOutput = false;
|
|
276
|
+
let topN: number | undefined;
|
|
277
|
+
|
|
278
|
+
for (let i = 0; i < args.length; i++) {
|
|
279
|
+
switch (args[i]) {
|
|
280
|
+
case '--threshold':
|
|
281
|
+
customThreshold = parseFloat(args[++i]);
|
|
282
|
+
break;
|
|
283
|
+
case '--json':
|
|
284
|
+
jsonOutput = true;
|
|
285
|
+
break;
|
|
286
|
+
case '--top':
|
|
287
|
+
topN = parseInt(args[++i], 10);
|
|
288
|
+
break;
|
|
289
|
+
case '--help':
|
|
290
|
+
case '-h':
|
|
291
|
+
console.log(`
|
|
292
|
+
Skill Activation Conflict Detector
|
|
293
|
+
|
|
294
|
+
Usage:
|
|
295
|
+
npx tsx scripts/detect-skill-conflicts.ts [options]
|
|
296
|
+
|
|
297
|
+
Options:
|
|
298
|
+
--threshold <n> Custom warning threshold (default: 0.70)
|
|
299
|
+
--json Write results to scripts/skill-conflicts.json
|
|
300
|
+
--top <n> Show top N pairs per severity level
|
|
301
|
+
--help Show this help
|
|
302
|
+
`);
|
|
303
|
+
process.exit(0);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const thresholds = {
|
|
308
|
+
critical: CRITICAL_THRESHOLD,
|
|
309
|
+
warning: customThreshold ?? WARNING_THRESHOLD,
|
|
310
|
+
info: INFO_THRESHOLD,
|
|
311
|
+
};
|
|
312
|
+
|
|
313
|
+
const projectRoot = getProjectRoot();
|
|
314
|
+
const skills = discoverSkills(projectRoot);
|
|
315
|
+
console.log(`Discovered ${skills.length} AQE skills (platform skills excluded)`);
|
|
316
|
+
|
|
317
|
+
// Try ML embeddings first, fall back to token overlap
|
|
318
|
+
let pairs = await computeEmbeddingSimilarities(skills);
|
|
319
|
+
const method = pairs ? 'transformer-embeddings' : 'token-overlap';
|
|
320
|
+
if (!pairs) {
|
|
321
|
+
pairs = computeTokenSimilarities(skills);
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
const { output, critical, warnings, info } = formatResults(pairs, skills.length, thresholds, topN);
|
|
325
|
+
console.log(output);
|
|
326
|
+
|
|
327
|
+
if (jsonOutput) {
|
|
328
|
+
const jsonPath = join(projectRoot, 'scripts', 'skill-conflicts.json');
|
|
329
|
+
const data = {
|
|
330
|
+
generatedAt: new Date().toISOString(),
|
|
331
|
+
method,
|
|
332
|
+
skillCount: skills.length,
|
|
333
|
+
pairCount: pairs.length,
|
|
334
|
+
thresholds,
|
|
335
|
+
critical: critical.map(p => ({ ...p, descA: undefined, descB: undefined, skillA: p.skillA, skillB: p.skillB, similarity: p.similarity })),
|
|
336
|
+
warnings: warnings.map(p => ({ skillA: p.skillA, skillB: p.skillB, similarity: p.similarity })),
|
|
337
|
+
info: info.map(p => ({ skillA: p.skillA, skillB: p.skillB, similarity: p.similarity })),
|
|
338
|
+
};
|
|
339
|
+
writeFileSync(jsonPath, JSON.stringify(data, null, 2));
|
|
340
|
+
console.log(`JSON results written to ${jsonPath}`);
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
main().catch(err => {
|
|
345
|
+
console.error('Fatal error:', err);
|
|
346
|
+
process.exit(1);
|
|
347
|
+
});
|