agentic-qe 3.6.9 → 3.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/.validation/schemas/skill-eval.schema.json +11 -1
- package/.claude/skills/pr-review/SKILL.md +2 -2
- package/.claude/skills/qcsd-production-swarm/SKILL.md +2781 -0
- package/.claude/skills/qcsd-production-swarm/evals/qcsd-production-swarm.yaml +246 -0
- package/.claude/skills/qcsd-production-swarm/schemas/output.json +505 -0
- package/.claude/skills/qcsd-production-swarm/scripts/validate-config.json +25 -0
- package/.claude/skills/skills-manifest.json +5 -5
- package/package.json +1 -1
- package/scripts/benchmark-hnsw-loading.ts +480 -0
- package/scripts/benchmark-kg-assisted.ts +725 -0
- package/scripts/collect-production-telemetry.sh +291 -0
- package/scripts/detect-skill-conflicts.ts +347 -0
- package/scripts/eval-driven-workflow.ts +704 -0
- package/scripts/run-skill-eval.ts +210 -10
- package/scripts/score-skill-quality.ts +511 -0
- package/v3/CHANGELOG.md +19 -0
- package/v3/assets/skills/pr-review/SKILL.md +2 -2
- package/v3/dist/cli/bundle.js +1064 -363
- package/v3/dist/cli/commands/hooks.d.ts.map +1 -1
- package/v3/dist/cli/commands/hooks.js +143 -2
- package/v3/dist/cli/commands/hooks.js.map +1 -1
- package/v3/dist/cli/commands/test.d.ts.map +1 -1
- package/v3/dist/cli/commands/test.js +6 -0
- package/v3/dist/cli/commands/test.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.js +58 -6
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/mocha-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/mocha-generator.js +79 -7
- package/v3/dist/domains/test-generation/generators/mocha-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/pytest-generator.d.ts +4 -0
- package/v3/dist/domains/test-generation/generators/pytest-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/pytest-generator.js +77 -10
- package/v3/dist/domains/test-generation/generators/pytest-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/interfaces/test-generator.interface.d.ts +21 -0
- package/v3/dist/domains/test-generation/interfaces/test-generator.interface.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/interfaces.d.ts +21 -0
- package/v3/dist/domains/test-generation/interfaces.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/services/test-generator.d.ts +22 -0
- package/v3/dist/domains/test-generation/services/test-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/services/test-generator.js +163 -3
- package/v3/dist/domains/test-generation/services/test-generator.js.map +1 -1
- package/v3/dist/kernel/unified-memory-hnsw.d.ts +29 -0
- package/v3/dist/kernel/unified-memory-hnsw.d.ts.map +1 -1
- package/v3/dist/kernel/unified-memory-hnsw.js +136 -0
- package/v3/dist/kernel/unified-memory-hnsw.js.map +1 -1
- package/v3/dist/kernel/unified-memory.d.ts +2 -2
- package/v3/dist/kernel/unified-memory.d.ts.map +1 -1
- package/v3/dist/kernel/unified-memory.js +7 -9
- package/v3/dist/kernel/unified-memory.js.map +1 -1
- package/v3/dist/learning/qe-hooks.d.ts.map +1 -1
- package/v3/dist/learning/qe-hooks.js +34 -3
- package/v3/dist/learning/qe-hooks.js.map +1 -1
- package/v3/dist/mcp/bundle.js +857 -329
- package/v3/package.json +1 -1
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
skill: qcsd-production-swarm
|
|
2
|
+
version: 1.0.0
|
|
3
|
+
description: >
|
|
4
|
+
Evaluation suite for QCSD production telemetry swarm orchestration.
|
|
5
|
+
Tests DORA metrics analysis, root cause analysis, defect prediction, agent coordination, and feedback loop synthesis.
|
|
6
|
+
|
|
7
|
+
models_to_test:
|
|
8
|
+
- claude-3.5-sonnet
|
|
9
|
+
- claude-3-haiku
|
|
10
|
+
|
|
11
|
+
mcp_integration:
|
|
12
|
+
enabled: true
|
|
13
|
+
namespace: skill-validation
|
|
14
|
+
query_patterns: true
|
|
15
|
+
track_outcomes: true
|
|
16
|
+
store_patterns: true
|
|
17
|
+
target_agents:
|
|
18
|
+
- qe-learning-coordinator
|
|
19
|
+
- qe-queen-coordinator
|
|
20
|
+
|
|
21
|
+
learning:
|
|
22
|
+
store_success_patterns: true
|
|
23
|
+
pattern_ttl_days: 90
|
|
24
|
+
|
|
25
|
+
result_format:
|
|
26
|
+
json_output: true
|
|
27
|
+
include_timing: true
|
|
28
|
+
include_token_usage: true
|
|
29
|
+
|
|
30
|
+
setup:
|
|
31
|
+
required_tools:
|
|
32
|
+
- jq
|
|
33
|
+
|
|
34
|
+
test_cases:
|
|
35
|
+
|
|
36
|
+
- id: tc001_dora_metrics_analysis
|
|
37
|
+
description: "Perform DORA metrics analysis from production telemetry"
|
|
38
|
+
category: dora
|
|
39
|
+
priority: critical
|
|
40
|
+
|
|
41
|
+
input:
|
|
42
|
+
telemetry_data: "Production metrics for v3.6.9 release"
|
|
43
|
+
project_type: "web_application"
|
|
44
|
+
|
|
45
|
+
expected_output:
|
|
46
|
+
must_contain:
|
|
47
|
+
- "deployment frequency"
|
|
48
|
+
- "lead time"
|
|
49
|
+
- "MTTR"
|
|
50
|
+
- "change failure"
|
|
51
|
+
|
|
52
|
+
validation:
|
|
53
|
+
schema_check: true
|
|
54
|
+
keyword_match_threshold: 0.8
|
|
55
|
+
|
|
56
|
+
- id: tc002_root_cause_analysis
|
|
57
|
+
description: "Systematic root cause analysis of production incidents"
|
|
58
|
+
category: rca
|
|
59
|
+
priority: critical
|
|
60
|
+
|
|
61
|
+
input:
|
|
62
|
+
incidents: "P1 incident: API timeout spike after v3.6.9 deploy"
|
|
63
|
+
has_infrastructure_change: true
|
|
64
|
+
has_performance_sla: true
|
|
65
|
+
|
|
66
|
+
expected_output:
|
|
67
|
+
must_contain:
|
|
68
|
+
- "root cause"
|
|
69
|
+
- "severity"
|
|
70
|
+
- "resolution"
|
|
71
|
+
|
|
72
|
+
validation:
|
|
73
|
+
schema_check: true
|
|
74
|
+
|
|
75
|
+
- id: tc003_domain_flag_detection
|
|
76
|
+
description: "Detect HAS_INFRASTRUCTURE_CHANGE, HAS_PERFORMANCE_SLA, HAS_RECURRING_INCIDENTS flags"
|
|
77
|
+
category: flags
|
|
78
|
+
priority: high
|
|
79
|
+
|
|
80
|
+
input:
|
|
81
|
+
content: "Recent Kubernetes scaling policy change causing SLA breaches with recurring pod eviction incidents"
|
|
82
|
+
analyze_flags: true
|
|
83
|
+
|
|
84
|
+
expected_output:
|
|
85
|
+
must_contain:
|
|
86
|
+
- "HAS_INFRASTRUCTURE_CHANGE"
|
|
87
|
+
- "HAS_PERFORMANCE_SLA"
|
|
88
|
+
- "HAS_RECURRING_INCIDENTS"
|
|
89
|
+
|
|
90
|
+
validation:
|
|
91
|
+
schema_check: true
|
|
92
|
+
|
|
93
|
+
- id: tc004_agent_coordination
|
|
94
|
+
description: "Coordinate multi-agent production telemetry swarm execution"
|
|
95
|
+
category: orchestration
|
|
96
|
+
priority: high
|
|
97
|
+
|
|
98
|
+
input:
|
|
99
|
+
domain_flags:
|
|
100
|
+
HAS_INFRASTRUCTURE_CHANGE: true
|
|
101
|
+
HAS_PERFORMANCE_SLA: true
|
|
102
|
+
HAS_RECURRING_INCIDENTS: false
|
|
103
|
+
|
|
104
|
+
expected_output:
|
|
105
|
+
must_contain:
|
|
106
|
+
- "agent"
|
|
107
|
+
- "coordinate"
|
|
108
|
+
|
|
109
|
+
validation:
|
|
110
|
+
schema_check: true
|
|
111
|
+
|
|
112
|
+
- id: tc005_feedback_loop_synthesis
|
|
113
|
+
description: "Execute feedback loop synthesis to Ideation and Refinement phases"
|
|
114
|
+
category: feedback
|
|
115
|
+
priority: medium
|
|
116
|
+
|
|
117
|
+
input:
|
|
118
|
+
production_data: "Post-release monitoring for authentication service upgrade"
|
|
119
|
+
include_feedback_loops: true
|
|
120
|
+
|
|
121
|
+
expected_output:
|
|
122
|
+
must_contain:
|
|
123
|
+
- "feedback"
|
|
124
|
+
- "ideation"
|
|
125
|
+
- "refinement"
|
|
126
|
+
|
|
127
|
+
validation:
|
|
128
|
+
schema_check: true
|
|
129
|
+
|
|
130
|
+
- id: tc006_decision_logic_critical
|
|
131
|
+
description: "Decision logic must produce CRITICAL when any core metric breaches critical threshold"
|
|
132
|
+
category: decision_logic
|
|
133
|
+
priority: critical
|
|
134
|
+
|
|
135
|
+
input:
|
|
136
|
+
metrics:
|
|
137
|
+
doraScore: 0.35
|
|
138
|
+
slaCompliance: 93.0
|
|
139
|
+
incidentSeverity: "P1"
|
|
140
|
+
rcaCompleteness: 40
|
|
141
|
+
defectTrend: "increasing"
|
|
142
|
+
defectDensity: 6.2
|
|
143
|
+
resilienceScore: 45
|
|
144
|
+
middlewareHealth: 12
|
|
145
|
+
sapHealth: null
|
|
146
|
+
sodCompliance: null
|
|
147
|
+
domain_flags:
|
|
148
|
+
HAS_INFRASTRUCTURE_CHANGE: true
|
|
149
|
+
HAS_PERFORMANCE_SLA: true
|
|
150
|
+
HAS_RECURRING_INCIDENTS: true
|
|
151
|
+
HAS_MIDDLEWARE: true
|
|
152
|
+
|
|
153
|
+
expected_output:
|
|
154
|
+
must_contain:
|
|
155
|
+
- "CRITICAL"
|
|
156
|
+
must_not_contain:
|
|
157
|
+
- "HEALTHY"
|
|
158
|
+
|
|
159
|
+
validation:
|
|
160
|
+
schema_check: true
|
|
161
|
+
decision_deterministic: true
|
|
162
|
+
|
|
163
|
+
- id: tc007_decision_logic_healthy
|
|
164
|
+
description: "Decision logic must produce HEALTHY when all metrics exceed healthy thresholds"
|
|
165
|
+
category: decision_logic
|
|
166
|
+
priority: critical
|
|
167
|
+
|
|
168
|
+
input:
|
|
169
|
+
metrics:
|
|
170
|
+
doraScore: 0.85
|
|
171
|
+
slaCompliance: 99.5
|
|
172
|
+
incidentSeverity: "NONE"
|
|
173
|
+
rcaCompleteness: 95
|
|
174
|
+
defectTrend: "declining"
|
|
175
|
+
defectDensity: 0.8
|
|
176
|
+
resilienceScore: 92
|
|
177
|
+
middlewareHealth: 88
|
|
178
|
+
sapHealth: null
|
|
179
|
+
sodCompliance: 95
|
|
180
|
+
domain_flags:
|
|
181
|
+
HAS_INFRASTRUCTURE_CHANGE: true
|
|
182
|
+
HAS_PERFORMANCE_SLA: true
|
|
183
|
+
HAS_RECURRING_INCIDENTS: false
|
|
184
|
+
HAS_MIDDLEWARE: true
|
|
185
|
+
HAS_AUTHORIZATION: true
|
|
186
|
+
|
|
187
|
+
expected_output:
|
|
188
|
+
must_contain:
|
|
189
|
+
- "HEALTHY"
|
|
190
|
+
must_not_contain:
|
|
191
|
+
- "CRITICAL"
|
|
192
|
+
|
|
193
|
+
validation:
|
|
194
|
+
schema_check: true
|
|
195
|
+
decision_deterministic: true
|
|
196
|
+
|
|
197
|
+
- id: tc008_decision_logic_degraded
|
|
198
|
+
description: "Decision logic must produce DEGRADED when metrics fall between CRITICAL and HEALTHY thresholds"
|
|
199
|
+
category: decision_logic
|
|
200
|
+
priority: critical
|
|
201
|
+
|
|
202
|
+
input:
|
|
203
|
+
metrics:
|
|
204
|
+
doraScore: 0.55
|
|
205
|
+
slaCompliance: 97.0
|
|
206
|
+
incidentSeverity: "P3"
|
|
207
|
+
rcaCompleteness: 60
|
|
208
|
+
defectTrend: "stable"
|
|
209
|
+
defectDensity: 3.1
|
|
210
|
+
resilienceScore: 65
|
|
211
|
+
middlewareHealth: 55
|
|
212
|
+
sapHealth: 40
|
|
213
|
+
sodCompliance: null
|
|
214
|
+
domain_flags:
|
|
215
|
+
HAS_INFRASTRUCTURE_CHANGE: true
|
|
216
|
+
HAS_PERFORMANCE_SLA: true
|
|
217
|
+
HAS_RECURRING_INCIDENTS: false
|
|
218
|
+
HAS_MIDDLEWARE: true
|
|
219
|
+
HAS_SAP_INTEGRATION: true
|
|
220
|
+
|
|
221
|
+
expected_output:
|
|
222
|
+
must_contain:
|
|
223
|
+
- "DEGRADED"
|
|
224
|
+
must_not_contain:
|
|
225
|
+
- "CRITICAL"
|
|
226
|
+
- "HEALTHY"
|
|
227
|
+
|
|
228
|
+
validation:
|
|
229
|
+
schema_check: true
|
|
230
|
+
decision_deterministic: true
|
|
231
|
+
|
|
232
|
+
success_criteria:
|
|
233
|
+
pass_rate: 0.875
|
|
234
|
+
critical_pass_rate: 1.0
|
|
235
|
+
avg_reasoning_quality: 0.75
|
|
236
|
+
max_execution_time_ms: 300000
|
|
237
|
+
decision_logic_pass_rate: 1.0
|
|
238
|
+
|
|
239
|
+
metadata:
|
|
240
|
+
author: "qe-qcsd-orchestrator"
|
|
241
|
+
created: "2026-02-17"
|
|
242
|
+
updated: "2026-02-17"
|
|
243
|
+
coverage_target: >
|
|
244
|
+
QCSD production swarm with 8 test cases covering DORA metrics analysis,
|
|
245
|
+
root cause analysis, domain flag detection, agent coordination, feedback loop synthesis,
|
|
246
|
+
and 3 decision logic determinism tests (CRITICAL, HEALTHY, DEGRADED paths).
|