agentic-qe 3.6.9 → 3.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/.validation/schemas/skill-eval.schema.json +11 -1
- package/.claude/skills/pr-review/SKILL.md +2 -2
- package/.claude/skills/qcsd-production-swarm/SKILL.md +2781 -0
- package/.claude/skills/qcsd-production-swarm/evals/qcsd-production-swarm.yaml +246 -0
- package/.claude/skills/qcsd-production-swarm/schemas/output.json +505 -0
- package/.claude/skills/qcsd-production-swarm/scripts/validate-config.json +25 -0
- package/.claude/skills/skills-manifest.json +5 -5
- package/package.json +1 -1
- package/scripts/benchmark-hnsw-loading.ts +480 -0
- package/scripts/benchmark-kg-assisted.ts +725 -0
- package/scripts/collect-production-telemetry.sh +291 -0
- package/scripts/detect-skill-conflicts.ts +347 -0
- package/scripts/eval-driven-workflow.ts +704 -0
- package/scripts/run-skill-eval.ts +210 -10
- package/scripts/score-skill-quality.ts +511 -0
- package/v3/CHANGELOG.md +19 -0
- package/v3/assets/skills/pr-review/SKILL.md +2 -2
- package/v3/dist/cli/bundle.js +1064 -363
- package/v3/dist/cli/commands/hooks.d.ts.map +1 -1
- package/v3/dist/cli/commands/hooks.js +143 -2
- package/v3/dist/cli/commands/hooks.js.map +1 -1
- package/v3/dist/cli/commands/test.d.ts.map +1 -1
- package/v3/dist/cli/commands/test.js +6 -0
- package/v3/dist/cli/commands/test.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.js +58 -6
- package/v3/dist/domains/test-generation/generators/jest-vitest-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/mocha-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/mocha-generator.js +79 -7
- package/v3/dist/domains/test-generation/generators/mocha-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/generators/pytest-generator.d.ts +4 -0
- package/v3/dist/domains/test-generation/generators/pytest-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/generators/pytest-generator.js +77 -10
- package/v3/dist/domains/test-generation/generators/pytest-generator.js.map +1 -1
- package/v3/dist/domains/test-generation/interfaces/test-generator.interface.d.ts +21 -0
- package/v3/dist/domains/test-generation/interfaces/test-generator.interface.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/interfaces.d.ts +21 -0
- package/v3/dist/domains/test-generation/interfaces.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/services/test-generator.d.ts +22 -0
- package/v3/dist/domains/test-generation/services/test-generator.d.ts.map +1 -1
- package/v3/dist/domains/test-generation/services/test-generator.js +163 -3
- package/v3/dist/domains/test-generation/services/test-generator.js.map +1 -1
- package/v3/dist/kernel/unified-memory-hnsw.d.ts +29 -0
- package/v3/dist/kernel/unified-memory-hnsw.d.ts.map +1 -1
- package/v3/dist/kernel/unified-memory-hnsw.js +136 -0
- package/v3/dist/kernel/unified-memory-hnsw.js.map +1 -1
- package/v3/dist/kernel/unified-memory.d.ts +2 -2
- package/v3/dist/kernel/unified-memory.d.ts.map +1 -1
- package/v3/dist/kernel/unified-memory.js +7 -9
- package/v3/dist/kernel/unified-memory.js.map +1 -1
- package/v3/dist/learning/qe-hooks.d.ts.map +1 -1
- package/v3/dist/learning/qe-hooks.js +34 -3
- package/v3/dist/learning/qe-hooks.js.map +1 -1
- package/v3/dist/mcp/bundle.js +857 -329
- package/v3/package.json +1 -1
|
@@ -167,6 +167,11 @@
|
|
|
167
167
|
"type": "string",
|
|
168
168
|
"description": "Reason for skipping"
|
|
169
169
|
},
|
|
170
|
+
"negative_control": {
|
|
171
|
+
"type": "boolean",
|
|
172
|
+
"default": false,
|
|
173
|
+
"description": "When true, grading logic inverts: test passes when must_contain items are ABSENT (skill correctly declines irrelevant prompts)"
|
|
174
|
+
},
|
|
170
175
|
"input": {
|
|
171
176
|
"$ref": "#/$defs/test_input"
|
|
172
177
|
},
|
|
@@ -324,6 +329,11 @@
|
|
|
324
329
|
"default": false,
|
|
325
330
|
"description": "Allow partial matches"
|
|
326
331
|
},
|
|
332
|
+
"adaptive_rubric": {
|
|
333
|
+
"type": "boolean",
|
|
334
|
+
"default": false,
|
|
335
|
+
"description": "When true, dynamically extracts keywords from test prompt (quoted strings, format words, standards) and adds them to must_contain checks"
|
|
336
|
+
},
|
|
327
337
|
"grading_rubric": {
|
|
328
338
|
"type": "object",
|
|
329
339
|
"properties": {
|
|
@@ -331,7 +341,7 @@
|
|
|
331
341
|
"accuracy": { "type": "number", "minimum": 0, "maximum": 1 },
|
|
332
342
|
"actionability": { "type": "number", "minimum": 0, "maximum": 1 }
|
|
333
343
|
},
|
|
334
|
-
"description": "Weighted grading rubric (weights should sum to 1.0)"
|
|
344
|
+
"description": "Weighted grading rubric (weights should sum to 1.0). Computes sub-scores: completeness (must_contain match ratio), accuracy (1 - violation ratio), actionability (code blocks, steps, recommendations)"
|
|
335
345
|
}
|
|
336
346
|
}
|
|
337
347
|
},
|
|
@@ -24,8 +24,8 @@ Read the complete diff and PR description. Do not skim — read every changed fi
|
|
|
24
24
|
|
|
25
25
|
### 2. Scope Check
|
|
26
26
|
- Only analyze AQE/QE skills (NOT Claude Flow platform skills)
|
|
27
|
-
- Platform skills to EXCLUDE: v3-*, flow-nexus-*, agentdb-*, reasoningbank-*, swarm-advanced,
|
|
28
|
-
- If the PR touches skills, verify the count/scope matches expectations (~
|
|
27
|
+
- Platform skills to EXCLUDE: v3-*, flow-nexus-*, agentdb-*, reasoningbank-*, swarm-*, github-*, hive-mind-advanced, hooks-automation, iterative-loop, stream-chain, skill-builder, sparc-methodology, pair-programming, release, debug-loop, aqe-v2-v3-migration
|
|
28
|
+
- If the PR touches skills, verify the count/scope matches expectations (~75 AQE skills)
|
|
29
29
|
- Flag any platform skill changes that may have leaked into an AQE-focused PR
|
|
30
30
|
|
|
31
31
|
### 3. Summarize Changes
|