agentic-qe 3.6.9 → 3.6.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/.validation/schemas/skill-eval.schema.json +11 -1
- package/.claude/skills/pr-review/SKILL.md +2 -2
- package/.claude/skills/qcsd-production-swarm/SKILL.md +2781 -0
- package/.claude/skills/qcsd-production-swarm/evals/qcsd-production-swarm.yaml +246 -0
- package/.claude/skills/qcsd-production-swarm/schemas/output.json +505 -0
- package/.claude/skills/qcsd-production-swarm/scripts/validate-config.json +25 -0
- package/.claude/skills/skills-manifest.json +5 -5
- package/package.json +1 -1
- package/scripts/benchmark-hnsw-loading.ts +480 -0
- package/scripts/benchmark-kg-assisted.ts +725 -0
- package/scripts/collect-production-telemetry.sh +291 -0
- package/scripts/detect-skill-conflicts.ts +347 -0
- package/scripts/eval-driven-workflow.ts +704 -0
- package/scripts/run-skill-eval.ts +210 -10
- package/scripts/score-skill-quality.ts +511 -0
- package/v3/CHANGELOG.md +44 -0
- package/v3/assets/skills/pr-review/SKILL.md +2 -2
- package/v3/dist/cli/bundle.js +1526 -700
- package/v3/dist/cli/commands/code.d.ts.map +1 -1
- package/v3/dist/cli/commands/code.js +9 -85
- package/v3/dist/cli/commands/code.js.map +1 -1
- package/v3/dist/cli/commands/coverage.d.ts.map +1 -1
- package/v3/dist/cli/commands/coverage.js +3 -28
- package/v3/dist/cli/commands/coverage.js.map +1 -1
- package/v3/dist/cli/commands/hooks.d.ts.map +1 -1
- package/v3/dist/cli/commands/hooks.js +143 -2
- package/v3/dist/cli/commands/hooks.js.map +1 -1
- package/v3/dist/cli/commands/security.d.ts.map +1 -1
- package/v3/dist/cli/commands/security.js +3 -29
- package/v3/dist/cli/commands/security.js.map +1 -1
- package/v3/dist/cli/commands/test.d.ts.map +1 -1
- package/v3/dist/cli/commands/test.js +11 -58
- package/v3/dist/cli/commands/test.js.map +1 -1
- package/v3/dist/cli/utils/file-discovery.d.ts +27 -0
- package/v3/dist/cli/utils/file-discovery.d.ts.map +1 -0
- package/v3/dist/cli/utils/file-discovery.js +105 -0
- package/v3/dist/cli/utils/file-discovery.js.map +1 -0
- package/v3/dist/coordination/task-executor.d.ts.map +1 -1
- package/v3/dist/coordination/task-executor.js +304 -44
- package/v3/dist/coordination/task-executor.js.map +1 -1
- package/v3/dist/domains/code-intelligence/coordinator.d.ts.map +1 -1
- package/v3/dist/domains/code-intelligence/coordinator.js +8 -1
- package/v3/dist/domains/code-intelligence/coordinator.js.map +1 -1
- package/v3/dist/domains/code-intelligence/services/metric-collector/index.d.ts.map +1 -1
- package/v3/dist/domains/code-intelligence/services/metric-collector/index.js +10 -0
- package/v3/dist/domains/code-intelligence/services/metric-collector/index.js.map +1 -1
- package/v3/dist/domains/code-intelligence/services/metric-collector/interfaces.d.ts +7 -1
- package/v3/dist/domains/code-intelligence/services/metric-collector/interfaces.d.ts.map +1 -1
- package/v3/dist/domains/code-intelligence/services/metric-collector/interfaces.js +10 -1
- package/v3/dist/domains/code-intelligence/services/metric-collector/interfaces.js.map +1 -1
- package/v3/dist/domains/code-intelligence/services/metric-collector/loc-counter.js +34 -10
- package/v3/dist/domains/code-intelligence/services/metric-collector/loc-counter.js.map +1 -1
- package/v3/dist/domains/coverage-analysis/services/hnsw-index.d.ts +9 -0
- package/v3/dist/domains/coverage-analysis/services/hnsw-index.d.ts.map +1 -1
- package/v3/dist/domains/coverage-analysis/services/hnsw-index.js +38 -3
- package/v3/dist/domains/coverage-analysis/services/hnsw-index.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.js +58 -6
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/mocha-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/mocha-generator.js +79 -7
- package/v3/dist/domains/test-generation/generators/mocha-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/pytest-generator.d.ts +4 -0
- package/v3/dist/domains/test-generation/generators/pytest-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/pytest-generator.js +77 -10
- package/v3/dist/domains/test-generation/generators/pytest-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/interfaces/test-generator.interface.d.ts +21 -0
- package/v3/dist/domains/test-generation/interfaces/test-generator.interface.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/interfaces.d.ts +21 -0
- package/v3/dist/domains/test-generation/interfaces.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/services/test-generator.d.ts +22 -0
- package/v3/dist/domains/test-generation/services/test-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/services/test-generator.js +163 -3
- package/v3/dist/domains/test-generation/services/test-generator.js.map +1 -1
- package/v3/dist/init/init-wizard-hooks.d.ts +8 -1
- package/v3/dist/init/init-wizard-hooks.d.ts.map +1 -1
- package/v3/dist/init/init-wizard-hooks.js +47 -39
- package/v3/dist/init/init-wizard-hooks.js.map +1 -1
- package/v3/dist/init/phases/07-hooks.d.ts +11 -1
- package/v3/dist/init/phases/07-hooks.d.ts.map +1 -1
- package/v3/dist/init/phases/07-hooks.js +46 -50
- package/v3/dist/init/phases/07-hooks.js.map +1 -1
- package/v3/dist/init/settings-merge.d.ts +35 -0
- package/v3/dist/init/settings-merge.d.ts.map +1 -0
- package/v3/dist/init/settings-merge.js +140 -0
- package/v3/dist/init/settings-merge.js.map +1 -0
- package/v3/dist/integrations/agentic-flow/model-router/router.js +1 -1
- package/v3/dist/integrations/agentic-flow/model-router/router.js.map +1 -1
- package/v3/dist/integrations/agentic-flow/model-router/score-calculator.d.ts.map +1 -1
- package/v3/dist/integrations/agentic-flow/model-router/score-calculator.js +18 -3
- package/v3/dist/integrations/agentic-flow/model-router/score-calculator.js.map +1 -1
- package/v3/dist/integrations/agentic-flow/model-router/signal-collector.d.ts +3 -3
- package/v3/dist/integrations/agentic-flow/model-router/signal-collector.d.ts.map +1 -1
- package/v3/dist/integrations/agentic-flow/model-router/signal-collector.js +18 -0
- package/v3/dist/integrations/agentic-flow/model-router/signal-collector.js.map +1 -1
- package/v3/dist/kernel/unified-memory-hnsw.d.ts +29 -0
- package/v3/dist/kernel/unified-memory-hnsw.d.ts.map +1 -1
- package/v3/dist/kernel/unified-memory-hnsw.js +136 -0
- package/v3/dist/kernel/unified-memory-hnsw.js.map +1 -1
- package/v3/dist/kernel/unified-memory.d.ts +2 -2
- package/v3/dist/kernel/unified-memory.d.ts.map +1 -1
- package/v3/dist/kernel/unified-memory.js +7 -9
- package/v3/dist/kernel/unified-memory.js.map +1 -1
- package/v3/dist/learning/qe-hooks.d.ts.map +1 -1
- package/v3/dist/learning/qe-hooks.js +34 -3
- package/v3/dist/learning/qe-hooks.js.map +1 -1
- package/v3/dist/mcp/bundle.js +1403 -425
- package/v3/dist/mcp/handlers/domain-handler-configs.d.ts.map +1 -1
- package/v3/dist/mcp/handlers/domain-handler-configs.js +40 -31
- package/v3/dist/mcp/handlers/domain-handler-configs.js.map +1 -1
- package/v3/dist/mcp/handlers/task-handlers.d.ts.map +1 -1
- package/v3/dist/mcp/handlers/task-handlers.js +68 -5
- package/v3/dist/mcp/handlers/task-handlers.js.map +1 -1
- package/v3/dist/mcp/protocol-server.d.ts.map +1 -1
- package/v3/dist/mcp/protocol-server.js +16 -2
- package/v3/dist/mcp/protocol-server.js.map +1 -1
- package/v3/package.json +1 -1
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
skill: qcsd-production-swarm
|
|
2
|
+
version: 1.0.0
|
|
3
|
+
description: >
|
|
4
|
+
Evaluation suite for QCSD production telemetry swarm orchestration.
|
|
5
|
+
Tests DORA metrics analysis, root cause analysis, defect prediction, agent coordination, and feedback loop synthesis.
|
|
6
|
+
|
|
7
|
+
models_to_test:
|
|
8
|
+
- claude-3.5-sonnet
|
|
9
|
+
- claude-3-haiku
|
|
10
|
+
|
|
11
|
+
mcp_integration:
|
|
12
|
+
enabled: true
|
|
13
|
+
namespace: skill-validation
|
|
14
|
+
query_patterns: true
|
|
15
|
+
track_outcomes: true
|
|
16
|
+
store_patterns: true
|
|
17
|
+
target_agents:
|
|
18
|
+
- qe-learning-coordinator
|
|
19
|
+
- qe-queen-coordinator
|
|
20
|
+
|
|
21
|
+
learning:
|
|
22
|
+
store_success_patterns: true
|
|
23
|
+
pattern_ttl_days: 90
|
|
24
|
+
|
|
25
|
+
result_format:
|
|
26
|
+
json_output: true
|
|
27
|
+
include_timing: true
|
|
28
|
+
include_token_usage: true
|
|
29
|
+
|
|
30
|
+
setup:
|
|
31
|
+
required_tools:
|
|
32
|
+
- jq
|
|
33
|
+
|
|
34
|
+
test_cases:
|
|
35
|
+
|
|
36
|
+
- id: tc001_dora_metrics_analysis
|
|
37
|
+
description: "Perform DORA metrics analysis from production telemetry"
|
|
38
|
+
category: dora
|
|
39
|
+
priority: critical
|
|
40
|
+
|
|
41
|
+
input:
|
|
42
|
+
telemetry_data: "Production metrics for v3.6.9 release"
|
|
43
|
+
project_type: "web_application"
|
|
44
|
+
|
|
45
|
+
expected_output:
|
|
46
|
+
must_contain:
|
|
47
|
+
- "deployment frequency"
|
|
48
|
+
- "lead time"
|
|
49
|
+
- "MTTR"
|
|
50
|
+
- "change failure"
|
|
51
|
+
|
|
52
|
+
validation:
|
|
53
|
+
schema_check: true
|
|
54
|
+
keyword_match_threshold: 0.8
|
|
55
|
+
|
|
56
|
+
- id: tc002_root_cause_analysis
|
|
57
|
+
description: "Systematic root cause analysis of production incidents"
|
|
58
|
+
category: rca
|
|
59
|
+
priority: critical
|
|
60
|
+
|
|
61
|
+
input:
|
|
62
|
+
incidents: "P1 incident: API timeout spike after v3.6.9 deploy"
|
|
63
|
+
has_infrastructure_change: true
|
|
64
|
+
has_performance_sla: true
|
|
65
|
+
|
|
66
|
+
expected_output:
|
|
67
|
+
must_contain:
|
|
68
|
+
- "root cause"
|
|
69
|
+
- "severity"
|
|
70
|
+
- "resolution"
|
|
71
|
+
|
|
72
|
+
validation:
|
|
73
|
+
schema_check: true
|
|
74
|
+
|
|
75
|
+
- id: tc003_domain_flag_detection
|
|
76
|
+
description: "Detect HAS_INFRASTRUCTURE_CHANGE, HAS_PERFORMANCE_SLA, HAS_RECURRING_INCIDENTS flags"
|
|
77
|
+
category: flags
|
|
78
|
+
priority: high
|
|
79
|
+
|
|
80
|
+
input:
|
|
81
|
+
content: "Recent Kubernetes scaling policy change causing SLA breaches with recurring pod eviction incidents"
|
|
82
|
+
analyze_flags: true
|
|
83
|
+
|
|
84
|
+
expected_output:
|
|
85
|
+
must_contain:
|
|
86
|
+
- "HAS_INFRASTRUCTURE_CHANGE"
|
|
87
|
+
- "HAS_PERFORMANCE_SLA"
|
|
88
|
+
- "HAS_RECURRING_INCIDENTS"
|
|
89
|
+
|
|
90
|
+
validation:
|
|
91
|
+
schema_check: true
|
|
92
|
+
|
|
93
|
+
- id: tc004_agent_coordination
|
|
94
|
+
description: "Coordinate multi-agent production telemetry swarm execution"
|
|
95
|
+
category: orchestration
|
|
96
|
+
priority: high
|
|
97
|
+
|
|
98
|
+
input:
|
|
99
|
+
domain_flags:
|
|
100
|
+
HAS_INFRASTRUCTURE_CHANGE: true
|
|
101
|
+
HAS_PERFORMANCE_SLA: true
|
|
102
|
+
HAS_RECURRING_INCIDENTS: false
|
|
103
|
+
|
|
104
|
+
expected_output:
|
|
105
|
+
must_contain:
|
|
106
|
+
- "agent"
|
|
107
|
+
- "coordinate"
|
|
108
|
+
|
|
109
|
+
validation:
|
|
110
|
+
schema_check: true
|
|
111
|
+
|
|
112
|
+
- id: tc005_feedback_loop_synthesis
|
|
113
|
+
description: "Execute feedback loop synthesis to Ideation and Refinement phases"
|
|
114
|
+
category: feedback
|
|
115
|
+
priority: medium
|
|
116
|
+
|
|
117
|
+
input:
|
|
118
|
+
production_data: "Post-release monitoring for authentication service upgrade"
|
|
119
|
+
include_feedback_loops: true
|
|
120
|
+
|
|
121
|
+
expected_output:
|
|
122
|
+
must_contain:
|
|
123
|
+
- "feedback"
|
|
124
|
+
- "ideation"
|
|
125
|
+
- "refinement"
|
|
126
|
+
|
|
127
|
+
validation:
|
|
128
|
+
schema_check: true
|
|
129
|
+
|
|
130
|
+
- id: tc006_decision_logic_critical
|
|
131
|
+
description: "Decision logic must produce CRITICAL when any core metric breaches critical threshold"
|
|
132
|
+
category: decision_logic
|
|
133
|
+
priority: critical
|
|
134
|
+
|
|
135
|
+
input:
|
|
136
|
+
metrics:
|
|
137
|
+
doraScore: 0.35
|
|
138
|
+
slaCompliance: 93.0
|
|
139
|
+
incidentSeverity: "P1"
|
|
140
|
+
rcaCompleteness: 40
|
|
141
|
+
defectTrend: "increasing"
|
|
142
|
+
defectDensity: 6.2
|
|
143
|
+
resilienceScore: 45
|
|
144
|
+
middlewareHealth: 12
|
|
145
|
+
sapHealth: null
|
|
146
|
+
sodCompliance: null
|
|
147
|
+
domain_flags:
|
|
148
|
+
HAS_INFRASTRUCTURE_CHANGE: true
|
|
149
|
+
HAS_PERFORMANCE_SLA: true
|
|
150
|
+
HAS_RECURRING_INCIDENTS: true
|
|
151
|
+
HAS_MIDDLEWARE: true
|
|
152
|
+
|
|
153
|
+
expected_output:
|
|
154
|
+
must_contain:
|
|
155
|
+
- "CRITICAL"
|
|
156
|
+
must_not_contain:
|
|
157
|
+
- "HEALTHY"
|
|
158
|
+
|
|
159
|
+
validation:
|
|
160
|
+
schema_check: true
|
|
161
|
+
decision_deterministic: true
|
|
162
|
+
|
|
163
|
+
- id: tc007_decision_logic_healthy
|
|
164
|
+
description: "Decision logic must produce HEALTHY when all metrics exceed healthy thresholds"
|
|
165
|
+
category: decision_logic
|
|
166
|
+
priority: critical
|
|
167
|
+
|
|
168
|
+
input:
|
|
169
|
+
metrics:
|
|
170
|
+
doraScore: 0.85
|
|
171
|
+
slaCompliance: 99.5
|
|
172
|
+
incidentSeverity: "NONE"
|
|
173
|
+
rcaCompleteness: 95
|
|
174
|
+
defectTrend: "declining"
|
|
175
|
+
defectDensity: 0.8
|
|
176
|
+
resilienceScore: 92
|
|
177
|
+
middlewareHealth: 88
|
|
178
|
+
sapHealth: null
|
|
179
|
+
sodCompliance: 95
|
|
180
|
+
domain_flags:
|
|
181
|
+
HAS_INFRASTRUCTURE_CHANGE: true
|
|
182
|
+
HAS_PERFORMANCE_SLA: true
|
|
183
|
+
HAS_RECURRING_INCIDENTS: false
|
|
184
|
+
HAS_MIDDLEWARE: true
|
|
185
|
+
HAS_AUTHORIZATION: true
|
|
186
|
+
|
|
187
|
+
expected_output:
|
|
188
|
+
must_contain:
|
|
189
|
+
- "HEALTHY"
|
|
190
|
+
must_not_contain:
|
|
191
|
+
- "CRITICAL"
|
|
192
|
+
|
|
193
|
+
validation:
|
|
194
|
+
schema_check: true
|
|
195
|
+
decision_deterministic: true
|
|
196
|
+
|
|
197
|
+
- id: tc008_decision_logic_degraded
|
|
198
|
+
description: "Decision logic must produce DEGRADED when metrics fall between CRITICAL and HEALTHY thresholds"
|
|
199
|
+
category: decision_logic
|
|
200
|
+
priority: critical
|
|
201
|
+
|
|
202
|
+
input:
|
|
203
|
+
metrics:
|
|
204
|
+
doraScore: 0.55
|
|
205
|
+
slaCompliance: 97.0
|
|
206
|
+
incidentSeverity: "P3"
|
|
207
|
+
rcaCompleteness: 60
|
|
208
|
+
defectTrend: "stable"
|
|
209
|
+
defectDensity: 3.1
|
|
210
|
+
resilienceScore: 65
|
|
211
|
+
middlewareHealth: 55
|
|
212
|
+
sapHealth: 40
|
|
213
|
+
sodCompliance: null
|
|
214
|
+
domain_flags:
|
|
215
|
+
HAS_INFRASTRUCTURE_CHANGE: true
|
|
216
|
+
HAS_PERFORMANCE_SLA: true
|
|
217
|
+
HAS_RECURRING_INCIDENTS: false
|
|
218
|
+
HAS_MIDDLEWARE: true
|
|
219
|
+
HAS_SAP_INTEGRATION: true
|
|
220
|
+
|
|
221
|
+
expected_output:
|
|
222
|
+
must_contain:
|
|
223
|
+
- "DEGRADED"
|
|
224
|
+
must_not_contain:
|
|
225
|
+
- "CRITICAL"
|
|
226
|
+
- "HEALTHY"
|
|
227
|
+
|
|
228
|
+
validation:
|
|
229
|
+
schema_check: true
|
|
230
|
+
decision_deterministic: true
|
|
231
|
+
|
|
232
|
+
success_criteria:
|
|
233
|
+
pass_rate: 0.875
|
|
234
|
+
critical_pass_rate: 1.0
|
|
235
|
+
avg_reasoning_quality: 0.75
|
|
236
|
+
max_execution_time_ms: 300000
|
|
237
|
+
decision_logic_pass_rate: 1.0
|
|
238
|
+
|
|
239
|
+
metadata:
|
|
240
|
+
author: "qe-qcsd-orchestrator"
|
|
241
|
+
created: "2026-02-17"
|
|
242
|
+
updated: "2026-02-17"
|
|
243
|
+
coverage_target: >
|
|
244
|
+
QCSD production swarm with 8 test cases covering DORA metrics analysis,
|
|
245
|
+
root cause analysis, domain flag detection, agent coordination, feedback loop synthesis,
|
|
246
|
+
and 3 decision logic determinism tests (CRITICAL, HEALTHY, DEGRADED paths).
|