agentic-qe 3.8.11 → 3.8.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/qe-code-intelligence/SKILL.md +29 -20
- package/.claude/skills/qe-code-intelligence/evals/qe-code-intelligence.yaml +3 -3
- package/.claude/skills/qe-quality-assessment/SKILL.md +1 -1
- package/.claude/skills/qe-test-generation/SKILL.md +1 -1
- package/.claude/skills/skills-manifest.json +1 -1
- package/CHANGELOG.md +45 -0
- package/README.md +9 -0
- package/assets/skills/qe-code-intelligence/SKILL.md +29 -20
- package/assets/skills/qe-code-intelligence/evals/qe-code-intelligence.yaml +3 -3
- package/assets/skills/qe-quality-assessment/SKILL.md +1 -1
- package/assets/skills/qe-test-generation/SKILL.md +1 -1
- package/dist/cli/bundle.js +1162 -1046
- package/dist/cli/commands/code.js +149 -11
- package/dist/cli/commands/init.js +3 -2
- package/dist/cli/commands/ruvector-commands.js +17 -0
- package/dist/cli/handlers/init-handler.d.ts +1 -0
- package/dist/cli/handlers/init-handler.js +15 -10
- package/dist/cli/utils/file-discovery.d.ts +1 -0
- package/dist/cli/utils/file-discovery.js +1 -1
- package/dist/domains/code-intelligence/coordinator-gnn.d.ts +21 -0
- package/dist/domains/code-intelligence/coordinator-gnn.js +102 -0
- package/dist/domains/contract-testing/coordinator.js +13 -0
- package/dist/domains/coverage-analysis/coordinator.js +5 -0
- package/dist/domains/defect-intelligence/coordinator.d.ts +1 -0
- package/dist/domains/defect-intelligence/coordinator.js +43 -0
- package/dist/domains/quality-assessment/coordinator.js +26 -0
- package/dist/domains/test-generation/coordinator.js +14 -0
- package/dist/init/orchestrator.js +1 -0
- package/dist/init/phases/08-mcp.js +4 -4
- package/dist/init/phases/phase-interface.d.ts +3 -1
- package/dist/integrations/agentic-flow/reasoning-bank/experience-replay.d.ts +11 -0
- package/dist/integrations/agentic-flow/reasoning-bank/experience-replay.js +44 -1
- package/dist/integrations/rl-suite/algorithms/eprop.d.ts +79 -0
- package/dist/integrations/rl-suite/algorithms/eprop.js +284 -0
- package/dist/integrations/rl-suite/algorithms/index.d.ts +2 -1
- package/dist/integrations/rl-suite/algorithms/index.js +2 -1
- package/dist/integrations/rl-suite/index.d.ts +2 -2
- package/dist/integrations/rl-suite/index.js +2 -2
- package/dist/integrations/rl-suite/interfaces.d.ts +3 -3
- package/dist/integrations/rl-suite/interfaces.js +1 -1
- package/dist/integrations/rl-suite/orchestrator.d.ts +2 -2
- package/dist/integrations/rl-suite/orchestrator.js +3 -2
- package/dist/integrations/rl-suite/reward-signals.d.ts +1 -1
- package/dist/integrations/rl-suite/reward-signals.js +1 -1
- package/dist/integrations/ruvector/coherence-gate-cohomology.d.ts +41 -0
- package/dist/integrations/ruvector/coherence-gate-cohomology.js +47 -0
- package/dist/integrations/ruvector/coherence-gate-core.d.ts +200 -0
- package/dist/integrations/ruvector/coherence-gate-core.js +294 -0
- package/dist/integrations/ruvector/coherence-gate-energy.d.ts +136 -0
- package/dist/integrations/ruvector/coherence-gate-energy.js +373 -0
- package/dist/integrations/ruvector/coherence-gate-vector.d.ts +38 -0
- package/dist/integrations/ruvector/coherence-gate-vector.js +76 -0
- package/dist/integrations/ruvector/coherence-gate.d.ts +10 -311
- package/dist/integrations/ruvector/coherence-gate.js +10 -652
- package/dist/integrations/ruvector/cold-tier-trainer.d.ts +103 -0
- package/dist/integrations/ruvector/cold-tier-trainer.js +377 -0
- package/dist/integrations/ruvector/cusum-detector.d.ts +70 -0
- package/dist/integrations/ruvector/cusum-detector.js +142 -0
- package/dist/integrations/ruvector/delta-tracker.d.ts +122 -0
- package/dist/integrations/ruvector/delta-tracker.js +311 -0
- package/dist/integrations/ruvector/domain-transfer.d.ts +79 -1
- package/dist/integrations/ruvector/domain-transfer.js +158 -2
- package/dist/integrations/ruvector/eprop-learner.d.ts +135 -0
- package/dist/integrations/ruvector/eprop-learner.js +351 -0
- package/dist/integrations/ruvector/feature-flags.d.ts +177 -0
- package/dist/integrations/ruvector/feature-flags.js +145 -0
- package/dist/integrations/ruvector/graphmae-encoder.d.ts +88 -0
- package/dist/integrations/ruvector/graphmae-encoder.js +360 -0
- package/dist/integrations/ruvector/hdc-fingerprint.d.ts +127 -0
- package/dist/integrations/ruvector/hdc-fingerprint.js +222 -0
- package/dist/integrations/ruvector/hopfield-memory.d.ts +97 -0
- package/dist/integrations/ruvector/hopfield-memory.js +238 -0
- package/dist/integrations/ruvector/index.d.ts +13 -2
- package/dist/integrations/ruvector/index.js +46 -2
- package/dist/integrations/ruvector/mincut-wrapper.d.ts +7 -0
- package/dist/integrations/ruvector/mincut-wrapper.js +54 -2
- package/dist/integrations/ruvector/reservoir-replay.d.ts +172 -0
- package/dist/integrations/ruvector/reservoir-replay.js +335 -0
- package/dist/integrations/ruvector/solver-adapter.d.ts +93 -0
- package/dist/integrations/ruvector/solver-adapter.js +299 -0
- package/dist/integrations/ruvector/sona-persistence.d.ts +33 -0
- package/dist/integrations/ruvector/sona-persistence.js +47 -0
- package/dist/integrations/ruvector/spectral-sparsifier.d.ts +154 -0
- package/dist/integrations/ruvector/spectral-sparsifier.js +389 -0
- package/dist/integrations/ruvector/temporal-causality.d.ts +63 -0
- package/dist/integrations/ruvector/temporal-causality.js +317 -0
- package/dist/learning/pattern-promotion.d.ts +63 -0
- package/dist/learning/pattern-promotion.js +235 -1
- package/dist/learning/pattern-store.d.ts +2 -0
- package/dist/learning/pattern-store.js +187 -1
- package/dist/learning/sqlite-persistence.d.ts +2 -0
- package/dist/learning/sqlite-persistence.js +4 -0
- package/dist/mcp/bundle.js +506 -427
- package/dist/shared/utils/index.d.ts +1 -0
- package/dist/shared/utils/index.js +1 -0
- package/dist/shared/utils/xorshift128.d.ts +24 -0
- package/dist/shared/utils/xorshift128.js +50 -0
- package/package.json +1 -1
|
@@ -28,6 +28,8 @@ import * as ClaimVerifierHelpers from './coordinator-claim-verifier.js';
|
|
|
28
28
|
import * as GateEvalHelpers from './coordinator-gate-evaluation.js';
|
|
29
29
|
// ADR-070: Witness Chain audit trail
|
|
30
30
|
import { getWitnessChain } from '../../audit/witness-chain.js';
|
|
31
|
+
// Three-loop feature flag for instantAdapt protocol
|
|
32
|
+
import { isSONAThreeLoopEnabled } from '../../integrations/ruvector/feature-flags.js';
|
|
31
33
|
// CQ-002: Base domain coordinator
|
|
32
34
|
import { BaseDomainCoordinator, } from '../base-domain-coordinator.js';
|
|
33
35
|
const DEFAULT_CONFIG = {
|
|
@@ -227,6 +229,20 @@ export class QualityAssessmentCoordinator extends BaseDomainCoordinator {
|
|
|
227
229
|
}
|
|
228
230
|
// Success path
|
|
229
231
|
this.completeWorkflow(workflowId);
|
|
232
|
+
// Three-loop protocol: instantAdapt must precede recordOutcome
|
|
233
|
+
if (isSONAThreeLoopEnabled() && this.qesona?.isThreeLoopEnabled()) {
|
|
234
|
+
const m = effectiveRequest.metrics;
|
|
235
|
+
this.qesona.instantAdapt([
|
|
236
|
+
m.coverage / 100,
|
|
237
|
+
m.testsPassing / 100,
|
|
238
|
+
m.criticalBugs / 10,
|
|
239
|
+
m.codeSmells / 100,
|
|
240
|
+
m.securityVulnerabilities / 10,
|
|
241
|
+
m.technicalDebt / 100,
|
|
242
|
+
m.duplications / 100,
|
|
243
|
+
finalResult.overallScore / 100,
|
|
244
|
+
]);
|
|
245
|
+
}
|
|
230
246
|
// Store quality pattern in SONA if enabled
|
|
231
247
|
if (this.config.enableSONAPatternLearning && this.qesona) {
|
|
232
248
|
await this.storeQualityPattern(effectiveRequest, finalResult);
|
|
@@ -322,6 +338,16 @@ export class QualityAssessmentCoordinator extends BaseDomainCoordinator {
|
|
|
322
338
|
result.value = enhanced;
|
|
323
339
|
}
|
|
324
340
|
}
|
|
341
|
+
// Three-loop protocol: instantAdapt must precede recordOutcome
|
|
342
|
+
if (isSONAThreeLoopEnabled() && this.qesona?.isThreeLoopEnabled()) {
|
|
343
|
+
const score = result.value.score;
|
|
344
|
+
this.qesona.instantAdapt([
|
|
345
|
+
score.overall / 100,
|
|
346
|
+
result.value.metrics.length / 20,
|
|
347
|
+
result.value.trends.length / 10,
|
|
348
|
+
result.value.recommendations.length / 10,
|
|
349
|
+
]);
|
|
350
|
+
}
|
|
325
351
|
// Store quality pattern in SONA
|
|
326
352
|
if (this.config.enableSONAPatternLearning && this.qesona) {
|
|
327
353
|
await this.storeQualityAnalysisPattern(request, result.value);
|
|
@@ -24,6 +24,8 @@ import { createDomainFinding, } from '../../coordination/consensus/domain-findin
|
|
|
24
24
|
import { createPersistentSONAEngine, } from '../../integrations/ruvector/sona-persistence.js';
|
|
25
25
|
import { createQEFlashAttention, } from '../../integrations/ruvector/wrappers.js';
|
|
26
26
|
import { DecisionTransformerAlgorithm, } from '../../integrations/rl-suite/algorithms/decision-transformer.js';
|
|
27
|
+
// Three-loop feature flag for instantAdapt protocol
|
|
28
|
+
import { isSONAThreeLoopEnabled } from '../../integrations/ruvector/feature-flags.js';
|
|
27
29
|
// Coherence Gate Integration (ADR-052)
|
|
28
30
|
import { createTestGenerationCoherenceGate, } from './services/coherence-gate-service.js';
|
|
29
31
|
const DEFAULT_CONFIG = {
|
|
@@ -307,6 +309,18 @@ export class TestGenerationCoordinator extends BaseDomainCoordinator {
|
|
|
307
309
|
await this.publishTestGenerated(test, request.framework ?? 'vitest');
|
|
308
310
|
}
|
|
309
311
|
}
|
|
312
|
+
// Three-loop protocol: instantAdapt must precede recordOutcome
|
|
313
|
+
if (isSONAThreeLoopEnabled() && this.qesona?.isThreeLoopEnabled()) {
|
|
314
|
+
const tests = result.value;
|
|
315
|
+
this.qesona.instantAdapt([
|
|
316
|
+
tests.tests.length / 20,
|
|
317
|
+
tests.coverageEstimate / 100,
|
|
318
|
+
tests.patternsUsed.length / 10,
|
|
319
|
+
request.sourceFiles.length / 20,
|
|
320
|
+
(request.coverageTarget ?? 80) / 100,
|
|
321
|
+
tests.tests.filter(t => t.type === 'unit').length / 20,
|
|
322
|
+
]);
|
|
323
|
+
}
|
|
310
324
|
// Learn from successful generation using QESONA
|
|
311
325
|
if (this.config.enableQESONA && this.qesona) {
|
|
312
326
|
await this.storeTestGenerationPattern(result.value, request);
|
|
@@ -26,10 +26,10 @@ export class MCPPhase extends BasePhase {
|
|
|
26
26
|
requiresPhases = ['configuration', 'database'];
|
|
27
27
|
async run(context) {
|
|
28
28
|
const { projectRoot } = context;
|
|
29
|
-
// MCP is
|
|
30
|
-
if (
|
|
31
|
-
context.services.log(' MCP: skipped (
|
|
32
|
-
context.services.log('
|
|
29
|
+
// MCP is enabled by default — skip only with --no-mcp
|
|
30
|
+
if (context.options.noMcp) {
|
|
31
|
+
context.services.log(' MCP: skipped (--no-mcp)');
|
|
32
|
+
context.services.log(' CLI commands available: aqe memory, aqe test, aqe coverage, etc.');
|
|
33
33
|
return {
|
|
34
34
|
configured: false,
|
|
35
35
|
mcpPath: '',
|
|
@@ -106,7 +106,9 @@ export interface InitOptions {
|
|
|
106
106
|
withContinueDev?: boolean;
|
|
107
107
|
/** Install all coding agent platform configurations */
|
|
108
108
|
withAllPlatforms?: boolean;
|
|
109
|
-
/**
|
|
109
|
+
/** Skip MCP server config (MCP is enabled by default) */
|
|
110
|
+
noMcp?: boolean;
|
|
111
|
+
/** @deprecated Use default behavior instead — MCP is now enabled by default */
|
|
110
112
|
withMcp?: boolean;
|
|
111
113
|
}
|
|
112
114
|
/**
|
|
@@ -117,6 +117,7 @@ export declare class ExperienceReplay {
|
|
|
117
117
|
private experienceIdToHnswId;
|
|
118
118
|
private nextHnswId;
|
|
119
119
|
private recentExperiences;
|
|
120
|
+
private reservoirBuffer;
|
|
120
121
|
private stats;
|
|
121
122
|
constructor(config?: Partial<ExperienceReplayConfig>);
|
|
122
123
|
/**
|
|
@@ -197,6 +198,16 @@ export declare class ExperienceReplay {
|
|
|
197
198
|
hnswIndexSize: number;
|
|
198
199
|
recentBufferSize: number;
|
|
199
200
|
};
|
|
201
|
+
/**
|
|
202
|
+
* Get reservoir buffer stats (R10, ADR-087).
|
|
203
|
+
* Returns null if the reservoir is not enabled.
|
|
204
|
+
*/
|
|
205
|
+
getReservoirStats(): {
|
|
206
|
+
size: number;
|
|
207
|
+
totalAdmitted: number;
|
|
208
|
+
totalRejected: number;
|
|
209
|
+
tierCounts: Record<string, number>;
|
|
210
|
+
} | null;
|
|
200
211
|
/**
|
|
201
212
|
* Dispose and cleanup
|
|
202
213
|
*/
|
|
@@ -19,6 +19,8 @@ import { CircularBuffer } from '../../../shared/utils/circular-buffer.js';
|
|
|
19
19
|
import { HNSWEmbeddingIndex } from '../../embeddings/index/HNSWIndex.js';
|
|
20
20
|
import { safeJsonParse } from '../../../shared/safe-json.js';
|
|
21
21
|
import { ExperienceConsolidator } from '../../../learning/experience-consolidation.js';
|
|
22
|
+
import { getRuVectorFeatureFlags } from '../../ruvector/feature-flags.js';
|
|
23
|
+
import { ReservoirReplayBuffer } from '../../ruvector/reservoir-replay.js';
|
|
22
24
|
const DEFAULT_CONFIG = {
|
|
23
25
|
minQualityThreshold: 0.6,
|
|
24
26
|
maxExperiencesPerDomain: 500,
|
|
@@ -64,6 +66,8 @@ export class ExperienceReplay {
|
|
|
64
66
|
nextHnswId = 0;
|
|
65
67
|
// Recent experiences buffer
|
|
66
68
|
recentExperiences;
|
|
69
|
+
// Reservoir replay buffer (R10, ADR-087) — coherence-gated admission
|
|
70
|
+
reservoirBuffer = null;
|
|
67
71
|
// Statistics
|
|
68
72
|
stats = {
|
|
69
73
|
experiencesStored: 0,
|
|
@@ -99,6 +103,11 @@ export class ExperienceReplay {
|
|
|
99
103
|
this.prepareStatements();
|
|
100
104
|
// Load embeddings into memory index
|
|
101
105
|
await this.loadEmbeddingIndex();
|
|
106
|
+
// Initialize reservoir buffer if feature flag is enabled (R10, ADR-087)
|
|
107
|
+
if (getRuVectorFeatureFlags().useReservoirReplay) {
|
|
108
|
+
this.reservoirBuffer = new ReservoirReplayBuffer({ capacity: 10_000 });
|
|
109
|
+
console.log('[ExperienceReplay] Reservoir replay buffer enabled');
|
|
110
|
+
}
|
|
102
111
|
this.initialized = true;
|
|
103
112
|
console.log('[ExperienceReplay] Initialized');
|
|
104
113
|
}
|
|
@@ -333,6 +342,10 @@ export class ExperienceReplay {
|
|
|
333
342
|
}
|
|
334
343
|
// Add to recent buffer
|
|
335
344
|
this.recentExperiences.push(experience);
|
|
345
|
+
// Admit to reservoir buffer with coherence gating (R10, ADR-087)
|
|
346
|
+
if (this.reservoirBuffer) {
|
|
347
|
+
this.reservoirBuffer.admit(experience.id, experience, experience.qualityScore);
|
|
348
|
+
}
|
|
336
349
|
this.stats.experiencesStored++;
|
|
337
350
|
// Auto-consolidate if enabled (replaces destructive auto-prune)
|
|
338
351
|
if (this.config.autoPrune) {
|
|
@@ -349,8 +362,23 @@ export class ExperienceReplay {
|
|
|
349
362
|
*/
|
|
350
363
|
async getGuidance(task, domain) {
|
|
351
364
|
this.ensureInitialized();
|
|
352
|
-
// Find similar experiences
|
|
365
|
+
// Find similar experiences via HNSW
|
|
353
366
|
const similar = await this.findSimilarExperiences(task, domain);
|
|
367
|
+
// Blend in high-coherence experiences from reservoir buffer (R10, ADR-087)
|
|
368
|
+
if (this.reservoirBuffer && this.reservoirBuffer.size() > 0) {
|
|
369
|
+
const reservoirSamples = this.reservoirBuffer.sample(Math.max(2, Math.floor(this.config.topK / 2)), 0.6);
|
|
370
|
+
for (const entry of reservoirSamples) {
|
|
371
|
+
const exp = entry.data;
|
|
372
|
+
// Skip if already in HNSW results
|
|
373
|
+
if (similar.some(s => s.experience.id === exp.id))
|
|
374
|
+
continue;
|
|
375
|
+
// Skip if domain filter doesn't match
|
|
376
|
+
if (domain && exp.domain !== domain)
|
|
377
|
+
continue;
|
|
378
|
+
// Add with a coherence-based similarity score
|
|
379
|
+
similar.push({ experience: exp, similarity: entry.coherenceScore * 0.8 });
|
|
380
|
+
}
|
|
381
|
+
}
|
|
354
382
|
if (similar.length === 0) {
|
|
355
383
|
return null;
|
|
356
384
|
}
|
|
@@ -564,6 +592,21 @@ export class ExperienceReplay {
|
|
|
564
592
|
recentBufferSize: this.recentExperiences.length,
|
|
565
593
|
};
|
|
566
594
|
}
|
|
595
|
+
/**
|
|
596
|
+
* Get reservoir buffer stats (R10, ADR-087).
|
|
597
|
+
* Returns null if the reservoir is not enabled.
|
|
598
|
+
*/
|
|
599
|
+
getReservoirStats() {
|
|
600
|
+
if (!this.reservoirBuffer)
|
|
601
|
+
return null;
|
|
602
|
+
const stats = this.reservoirBuffer.getStats();
|
|
603
|
+
return {
|
|
604
|
+
size: stats.size,
|
|
605
|
+
totalAdmitted: stats.totalAdmitted,
|
|
606
|
+
totalRejected: stats.totalRejected,
|
|
607
|
+
tierCounts: stats.tierCounts,
|
|
608
|
+
};
|
|
609
|
+
}
|
|
567
610
|
/**
|
|
568
611
|
* Dispose and cleanup
|
|
569
612
|
*/
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agentic QE v3 - E-prop Online Learning Algorithm (ADR-087 Milestone 4)
|
|
3
|
+
*
|
|
4
|
+
* RL algorithm #10: Eligibility propagation for online learning.
|
|
5
|
+
* Uses 12 bytes/synapse with no backprop required.
|
|
6
|
+
*
|
|
7
|
+
* Application: Online adaptive test strategies — learns in real time
|
|
8
|
+
* from test execution feedback without storing replay buffers.
|
|
9
|
+
*/
|
|
10
|
+
import { BaseRLAlgorithm } from '../base-algorithm';
|
|
11
|
+
import type { RLState, RLPrediction, RLTrainingStats, RLExperience, RLAlgorithmInfo, RewardSignal } from '../interfaces';
|
|
12
|
+
interface EpropAlgorithmConfig {
|
|
13
|
+
/** Number of state features */
|
|
14
|
+
stateSize: number;
|
|
15
|
+
/** Hidden layer size for the E-prop network */
|
|
16
|
+
hiddenSize: number;
|
|
17
|
+
/** Number of discrete actions */
|
|
18
|
+
actionSize: number;
|
|
19
|
+
/** E-prop learning rate */
|
|
20
|
+
epropLearningRate: number;
|
|
21
|
+
/** Eligibility trace decay */
|
|
22
|
+
eligibilityDecay: number;
|
|
23
|
+
/** Use feedback alignment */
|
|
24
|
+
feedbackAlignment: boolean;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* E-prop online learning algorithm for adaptive test strategies.
|
|
28
|
+
*
|
|
29
|
+
* Unlike batch RL algorithms, E-prop learns from each experience
|
|
30
|
+
* immediately using eligibility traces — no replay buffer needed.
|
|
31
|
+
*
|
|
32
|
+
* Key advantages:
|
|
33
|
+
* - Online: updates weights after every step
|
|
34
|
+
* - Memory-efficient: 12 bytes/synapse (vs kilobytes for replay-based)
|
|
35
|
+
* - Biologically plausible: no weight transport (feedback alignment)
|
|
36
|
+
* - Fast: no backward pass through the full network
|
|
37
|
+
*/
|
|
38
|
+
export declare class EpropAlgorithm extends BaseRLAlgorithm {
|
|
39
|
+
private network;
|
|
40
|
+
private epropConfig;
|
|
41
|
+
private actions;
|
|
42
|
+
constructor(config?: Partial<EpropAlgorithmConfig>, rewardSignals?: RewardSignal[]);
|
|
43
|
+
/**
|
|
44
|
+
* Predict best action for a given state.
|
|
45
|
+
* Runs the E-prop network forward pass and selects the action
|
|
46
|
+
* with highest output activation.
|
|
47
|
+
*/
|
|
48
|
+
predict(state: RLState): Promise<RLPrediction>;
|
|
49
|
+
/**
|
|
50
|
+
* Train with a single experience — the core online learning step.
|
|
51
|
+
*
|
|
52
|
+
* Unlike batch algorithms, E-prop processes each experience immediately:
|
|
53
|
+
* 1. Forward pass (already done during predict)
|
|
54
|
+
* 2. Online update: dw = eta * eligibility * reward
|
|
55
|
+
*/
|
|
56
|
+
train(experience: RLExperience): Promise<RLTrainingStats>;
|
|
57
|
+
/**
|
|
58
|
+
* Core training logic for batch experiences.
|
|
59
|
+
* E-prop processes each experience online (sequentially).
|
|
60
|
+
*/
|
|
61
|
+
protected trainCore(experiences: RLExperience[]): Promise<RLTrainingStats>;
|
|
62
|
+
/**
|
|
63
|
+
* Get algorithm-specific info.
|
|
64
|
+
*/
|
|
65
|
+
protected getAlgorithmInfo(): RLAlgorithmInfo;
|
|
66
|
+
protected exportCustomData(): Promise<Record<string, unknown>>;
|
|
67
|
+
protected importCustomData(data: Record<string, unknown>): Promise<void>;
|
|
68
|
+
protected resetAlgorithm(): Promise<void>;
|
|
69
|
+
private prepareState;
|
|
70
|
+
private argmax;
|
|
71
|
+
private calculateConfidence;
|
|
72
|
+
private generateReasoning;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Create a new E-prop RL algorithm instance.
|
|
76
|
+
*/
|
|
77
|
+
export declare function createEpropAlgorithm(config?: Partial<EpropAlgorithmConfig>, rewardSignals?: RewardSignal[]): EpropAlgorithm;
|
|
78
|
+
export {};
|
|
79
|
+
//# sourceMappingURL=eprop.d.ts.map
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agentic QE v3 - E-prop Online Learning Algorithm (ADR-087 Milestone 4)
|
|
3
|
+
*
|
|
4
|
+
* RL algorithm #10: Eligibility propagation for online learning.
|
|
5
|
+
* Uses 12 bytes/synapse with no backprop required.
|
|
6
|
+
*
|
|
7
|
+
* Application: Online adaptive test strategies — learns in real time
|
|
8
|
+
* from test execution feedback without storing replay buffers.
|
|
9
|
+
*/
|
|
10
|
+
import { BaseRLAlgorithm } from '../base-algorithm';
|
|
11
|
+
import { TEST_EXECUTION_REWARDS } from '../interfaces';
|
|
12
|
+
import { createEpropNetwork, } from '../../ruvector/eprop-learner.js';
|
|
13
|
+
import { getRuVectorFeatureFlags } from '../../ruvector/feature-flags.js';
|
|
14
|
+
const DEFAULT_EPROP_ALGORITHM_CONFIG = {
|
|
15
|
+
stateSize: 10,
|
|
16
|
+
hiddenSize: 50,
|
|
17
|
+
actionSize: 4,
|
|
18
|
+
epropLearningRate: 0.01,
|
|
19
|
+
eligibilityDecay: 0.95,
|
|
20
|
+
feedbackAlignment: true,
|
|
21
|
+
};
|
|
22
|
+
// ============================================================================
|
|
23
|
+
// E-prop RL Algorithm
|
|
24
|
+
// ============================================================================
|
|
25
|
+
/**
|
|
26
|
+
* E-prop online learning algorithm for adaptive test strategies.
|
|
27
|
+
*
|
|
28
|
+
* Unlike batch RL algorithms, E-prop learns from each experience
|
|
29
|
+
* immediately using eligibility traces — no replay buffer needed.
|
|
30
|
+
*
|
|
31
|
+
* Key advantages:
|
|
32
|
+
* - Online: updates weights after every step
|
|
33
|
+
* - Memory-efficient: 12 bytes/synapse (vs kilobytes for replay-based)
|
|
34
|
+
* - Biologically plausible: no weight transport (feedback alignment)
|
|
35
|
+
* - Fast: no backward pass through the full network
|
|
36
|
+
*/
|
|
37
|
+
export class EpropAlgorithm extends BaseRLAlgorithm {
|
|
38
|
+
network;
|
|
39
|
+
epropConfig;
|
|
40
|
+
actions;
|
|
41
|
+
constructor(config = {}, rewardSignals = TEST_EXECUTION_REWARDS) {
|
|
42
|
+
super('eprop', 'online-learning', {}, rewardSignals);
|
|
43
|
+
this.epropConfig = { ...DEFAULT_EPROP_ALGORITHM_CONFIG, ...config };
|
|
44
|
+
// Create the underlying E-prop network
|
|
45
|
+
this.network = createEpropNetwork({
|
|
46
|
+
inputSize: this.epropConfig.stateSize,
|
|
47
|
+
hiddenSize: this.epropConfig.hiddenSize,
|
|
48
|
+
outputSize: this.epropConfig.actionSize,
|
|
49
|
+
learningRate: this.epropConfig.epropLearningRate,
|
|
50
|
+
eligibilityDecay: this.epropConfig.eligibilityDecay,
|
|
51
|
+
feedbackAlignment: this.epropConfig.feedbackAlignment,
|
|
52
|
+
});
|
|
53
|
+
// Default action space for test execution
|
|
54
|
+
this.actions = [
|
|
55
|
+
{ type: 'execute', value: 'standard' },
|
|
56
|
+
{ type: 'prioritize', value: 'high' },
|
|
57
|
+
{ type: 'retry', value: 'adaptive' },
|
|
58
|
+
{ type: 'skip', value: 0 },
|
|
59
|
+
];
|
|
60
|
+
// Trim or pad action space to match config
|
|
61
|
+
while (this.actions.length < this.epropConfig.actionSize) {
|
|
62
|
+
this.actions.push({ type: 'explore', value: this.actions.length });
|
|
63
|
+
}
|
|
64
|
+
this.actions = this.actions.slice(0, this.epropConfig.actionSize);
|
|
65
|
+
}
|
|
66
|
+
// ==========================================================================
|
|
67
|
+
// RLAlgorithm Interface
|
|
68
|
+
// ==========================================================================
|
|
69
|
+
/**
|
|
70
|
+
* Predict best action for a given state.
|
|
71
|
+
* Runs the E-prop network forward pass and selects the action
|
|
72
|
+
* with highest output activation.
|
|
73
|
+
*/
|
|
74
|
+
async predict(state) {
|
|
75
|
+
if (!getRuVectorFeatureFlags().useEpropOnlineLearning) {
|
|
76
|
+
// Feature flag disabled — return default action with zero confidence
|
|
77
|
+
return {
|
|
78
|
+
action: { type: this.actions[0]?.type ?? 'test-action', value: this.actions[0]?.value ?? 'default' },
|
|
79
|
+
confidence: 0,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
if (!this.initialized) {
|
|
83
|
+
await this.initialize();
|
|
84
|
+
}
|
|
85
|
+
const stateFeatures = this.prepareState(state);
|
|
86
|
+
const output = this.network.forward(stateFeatures);
|
|
87
|
+
// Select action with highest activation
|
|
88
|
+
const actionIndex = this.argmax(output);
|
|
89
|
+
const action = this.actions[actionIndex];
|
|
90
|
+
const confidence = this.calculateConfidence(output);
|
|
91
|
+
return {
|
|
92
|
+
action: { type: action.type, value: action.value },
|
|
93
|
+
confidence,
|
|
94
|
+
value: output[actionIndex],
|
|
95
|
+
reasoning: this.generateReasoning(action, output[actionIndex], confidence),
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Train with a single experience — the core online learning step.
|
|
100
|
+
*
|
|
101
|
+
* Unlike batch algorithms, E-prop processes each experience immediately:
|
|
102
|
+
* 1. Forward pass (already done during predict)
|
|
103
|
+
* 2. Online update: dw = eta * eligibility * reward
|
|
104
|
+
*/
|
|
105
|
+
async train(experience) {
|
|
106
|
+
if (!this.initialized) {
|
|
107
|
+
await this.initialize();
|
|
108
|
+
}
|
|
109
|
+
const startTime = Date.now();
|
|
110
|
+
// Run forward pass on the state to update eligibility traces
|
|
111
|
+
const stateFeatures = this.prepareState(experience.state);
|
|
112
|
+
this.network.forward(stateFeatures);
|
|
113
|
+
// Apply online update with reward signal
|
|
114
|
+
this.network.updateOnline(experience.reward);
|
|
115
|
+
// Reset traces if episode ended
|
|
116
|
+
if (experience.done) {
|
|
117
|
+
this.network.resetTraces();
|
|
118
|
+
}
|
|
119
|
+
// Track stats
|
|
120
|
+
this.episodeCount++;
|
|
121
|
+
this.totalReward += experience.reward;
|
|
122
|
+
this.rewardHistory.push(experience.reward);
|
|
123
|
+
if (this.rewardHistory.length > 1000) {
|
|
124
|
+
this.rewardHistory.shift();
|
|
125
|
+
}
|
|
126
|
+
const avgReward = this.rewardHistory.reduce((a, b) => a + b, 0) / this.rewardHistory.length;
|
|
127
|
+
this.stats = {
|
|
128
|
+
episode: this.episodeCount,
|
|
129
|
+
totalReward: this.totalReward,
|
|
130
|
+
averageReward: avgReward,
|
|
131
|
+
trainingTimeMs: Date.now() - startTime,
|
|
132
|
+
timestamp: new Date(),
|
|
133
|
+
explorationRate: this.config.explorationRate,
|
|
134
|
+
};
|
|
135
|
+
return this.stats;
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Core training logic for batch experiences.
|
|
139
|
+
* E-prop processes each experience online (sequentially).
|
|
140
|
+
*/
|
|
141
|
+
async trainCore(experiences) {
|
|
142
|
+
for (const exp of experiences) {
|
|
143
|
+
const stateFeatures = this.prepareState(exp.state);
|
|
144
|
+
this.network.forward(stateFeatures);
|
|
145
|
+
this.network.updateOnline(exp.reward);
|
|
146
|
+
if (exp.done) {
|
|
147
|
+
this.network.resetTraces();
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
const avgReward = this.rewardHistory.length > 0
|
|
151
|
+
? this.rewardHistory.reduce((a, b) => a + b, 0) / this.rewardHistory.length
|
|
152
|
+
: 0;
|
|
153
|
+
return {
|
|
154
|
+
episode: this.episodeCount,
|
|
155
|
+
totalReward: this.totalReward,
|
|
156
|
+
averageReward: avgReward,
|
|
157
|
+
trainingTimeMs: 0,
|
|
158
|
+
timestamp: new Date(),
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Get algorithm-specific info.
|
|
163
|
+
*/
|
|
164
|
+
getAlgorithmInfo() {
|
|
165
|
+
const networkStats = this.network.getStats();
|
|
166
|
+
return {
|
|
167
|
+
type: 'eprop',
|
|
168
|
+
category: 'online-learning',
|
|
169
|
+
version: '1.0.0',
|
|
170
|
+
description: 'E-prop Online Learning for Adaptive Test Strategies',
|
|
171
|
+
capabilities: [
|
|
172
|
+
'Online learning (no replay buffer)',
|
|
173
|
+
'Eligibility trace propagation',
|
|
174
|
+
'Feedback alignment (no weight transport)',
|
|
175
|
+
'12 bytes/synapse memory budget',
|
|
176
|
+
'Real-time adaptation to test results',
|
|
177
|
+
],
|
|
178
|
+
hyperparameters: {
|
|
179
|
+
stateSize: this.epropConfig.stateSize,
|
|
180
|
+
hiddenSize: this.epropConfig.hiddenSize,
|
|
181
|
+
actionSize: this.epropConfig.actionSize,
|
|
182
|
+
learningRate: this.epropConfig.epropLearningRate,
|
|
183
|
+
eligibilityDecay: this.epropConfig.eligibilityDecay,
|
|
184
|
+
feedbackAlignment: String(this.epropConfig.feedbackAlignment),
|
|
185
|
+
synapsCount: networkStats.synapsCount,
|
|
186
|
+
memoryBytes: networkStats.memoryBytes,
|
|
187
|
+
},
|
|
188
|
+
stats: this.stats,
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
// ==========================================================================
|
|
192
|
+
// Export / Import
|
|
193
|
+
// ==========================================================================
|
|
194
|
+
async exportCustomData() {
|
|
195
|
+
const weights = this.network.exportWeights();
|
|
196
|
+
return {
|
|
197
|
+
inputHidden: Array.from(weights.inputHidden),
|
|
198
|
+
hiddenOutput: Array.from(weights.hiddenOutput),
|
|
199
|
+
epropConfig: this.epropConfig,
|
|
200
|
+
networkStats: this.network.getStats(),
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
async importCustomData(data) {
|
|
204
|
+
if (data.epropConfig) {
|
|
205
|
+
this.epropConfig = { ...this.epropConfig, ...data.epropConfig };
|
|
206
|
+
}
|
|
207
|
+
if (data.inputHidden && data.hiddenOutput) {
|
|
208
|
+
this.network.importWeights({
|
|
209
|
+
inputHidden: new Float32Array(data.inputHidden),
|
|
210
|
+
hiddenOutput: new Float32Array(data.hiddenOutput),
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
this.initialized = true;
|
|
214
|
+
}
|
|
215
|
+
async resetAlgorithm() {
|
|
216
|
+
this.network = createEpropNetwork({
|
|
217
|
+
inputSize: this.epropConfig.stateSize,
|
|
218
|
+
hiddenSize: this.epropConfig.hiddenSize,
|
|
219
|
+
outputSize: this.epropConfig.actionSize,
|
|
220
|
+
learningRate: this.epropConfig.epropLearningRate,
|
|
221
|
+
eligibilityDecay: this.epropConfig.eligibilityDecay,
|
|
222
|
+
feedbackAlignment: this.epropConfig.feedbackAlignment,
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
// ==========================================================================
|
|
226
|
+
// Private Helpers
|
|
227
|
+
// ==========================================================================
|
|
228
|
+
prepareState(state) {
|
|
229
|
+
const features = state.features.slice(0, this.epropConfig.stateSize);
|
|
230
|
+
// Pad with zeros if needed
|
|
231
|
+
while (features.length < this.epropConfig.stateSize) {
|
|
232
|
+
features.push(0);
|
|
233
|
+
}
|
|
234
|
+
// Normalize to [-1, 1]
|
|
235
|
+
const max = Math.max(...features.map(Math.abs));
|
|
236
|
+
if (max > 0) {
|
|
237
|
+
for (let i = 0; i < features.length; i++) {
|
|
238
|
+
features[i] = features[i] / max;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return new Float32Array(features);
|
|
242
|
+
}
|
|
243
|
+
argmax(array) {
|
|
244
|
+
let maxIndex = 0;
|
|
245
|
+
let maxValue = array[0];
|
|
246
|
+
for (let i = 1; i < array.length; i++) {
|
|
247
|
+
if (array[i] > maxValue) {
|
|
248
|
+
maxValue = array[i];
|
|
249
|
+
maxIndex = i;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
return maxIndex;
|
|
253
|
+
}
|
|
254
|
+
calculateConfidence(output) {
|
|
255
|
+
const arr = Array.from(output);
|
|
256
|
+
const max = Math.max(...arr);
|
|
257
|
+
const min = Math.min(...arr);
|
|
258
|
+
if (max === min)
|
|
259
|
+
return 0.5;
|
|
260
|
+
const spread = max - min;
|
|
261
|
+
return Math.min(1, 0.3 + spread * 2);
|
|
262
|
+
}
|
|
263
|
+
generateReasoning(action, value, confidence) {
|
|
264
|
+
const stats = this.network.getStats();
|
|
265
|
+
if (stats.totalSteps < 10) {
|
|
266
|
+
return `E-prop exploration phase (step ${stats.totalSteps}): ${action.type} action`;
|
|
267
|
+
}
|
|
268
|
+
if (confidence > 0.8) {
|
|
269
|
+
return (`High-confidence E-prop decision (${confidence.toFixed(2)}): ${action.type} ` +
|
|
270
|
+
`with value ${value.toFixed(3)} after ${stats.totalSteps} online updates`);
|
|
271
|
+
}
|
|
272
|
+
return `E-prop online learning: ${action.type} with confidence ${confidence.toFixed(2)}`;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
// ============================================================================
|
|
276
|
+
// Factory
|
|
277
|
+
// ============================================================================
|
|
278
|
+
/**
|
|
279
|
+
* Create a new E-prop RL algorithm instance.
|
|
280
|
+
*/
|
|
281
|
+
export function createEpropAlgorithm(config, rewardSignals) {
|
|
282
|
+
return new EpropAlgorithm(config, rewardSignals);
|
|
283
|
+
}
|
|
284
|
+
//# sourceMappingURL=eprop.js.map
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Agentic QE v3 - RL Algorithms Index
|
|
3
3
|
*
|
|
4
|
-
* Exports all
|
|
4
|
+
* Exports all 10 RL algorithms for QE applications.
|
|
5
5
|
*/
|
|
6
6
|
export { QLearningAlgorithm } from './q-learning';
|
|
7
7
|
export { DecisionTransformerAlgorithm } from './decision-transformer';
|
|
@@ -12,4 +12,5 @@ export { DQNAlgorithm } from './dqn';
|
|
|
12
12
|
export { PPOAlgorithm } from './ppo';
|
|
13
13
|
export { A2CAlgorithm } from './a2c';
|
|
14
14
|
export { DDPGAlgorithm } from './ddpg';
|
|
15
|
+
export { EpropAlgorithm, createEpropAlgorithm } from './eprop';
|
|
15
16
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Agentic QE v3 - RL Algorithms Index
|
|
3
3
|
*
|
|
4
|
-
* Exports all
|
|
4
|
+
* Exports all 10 RL algorithms for QE applications.
|
|
5
5
|
*/
|
|
6
6
|
export { QLearningAlgorithm } from './q-learning';
|
|
7
7
|
export { DecisionTransformerAlgorithm } from './decision-transformer';
|
|
@@ -12,4 +12,5 @@ export { DQNAlgorithm } from './dqn';
|
|
|
12
12
|
export { PPOAlgorithm } from './ppo';
|
|
13
13
|
export { A2CAlgorithm } from './a2c';
|
|
14
14
|
export { DDPGAlgorithm } from './ddpg';
|
|
15
|
+
export { EpropAlgorithm, createEpropAlgorithm } from './eprop';
|
|
15
16
|
//# sourceMappingURL=index.js.map
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Agentic QE v3 - RL Suite Main Export
|
|
3
3
|
*
|
|
4
|
-
* Complete RL Suite for Quality Engineering with
|
|
4
|
+
* Complete RL Suite for Quality Engineering with 10 algorithms.
|
|
5
5
|
* Per ADR-040, provides reinforcement learning for QE decision-making.
|
|
6
6
|
*/
|
|
7
7
|
import type { DomainName } from '../../shared/types';
|
|
@@ -10,7 +10,7 @@ import type { QERLSuite } from './orchestrator';
|
|
|
10
10
|
export { QERLSuite, createQERLSuite } from './orchestrator';
|
|
11
11
|
export type { RLSuiteStats } from './orchestrator';
|
|
12
12
|
export { BaseRLAlgorithm } from './base-algorithm';
|
|
13
|
-
export { QLearningAlgorithm, DecisionTransformerAlgorithm, SARSAAlgorithm, ActorCriticAlgorithm, PolicyGradientAlgorithm, DQNAlgorithm, PPOAlgorithm, A2CAlgorithm, DDPGAlgorithm, } from './algorithms';
|
|
13
|
+
export { QLearningAlgorithm, DecisionTransformerAlgorithm, SARSAAlgorithm, ActorCriticAlgorithm, PolicyGradientAlgorithm, DQNAlgorithm, PPOAlgorithm, A2CAlgorithm, DDPGAlgorithm, EpropAlgorithm, createEpropAlgorithm, } from './algorithms';
|
|
14
14
|
export type { RLAlgorithmType, RLAlgorithmCategory, QEDomainApplication, RLState, RLAction, RLExperience, RLPrediction, RLTrainingStats, RLTrainingConfig, RLAlgorithmInfo, TestExecutionState, TestExecutionAction, CoverageAnalysisState, CoverageOptimizationAction, QualityGateState, QualityGateAction, ResourceAllocationState, ResourceAllocationAction, RewardSignal, RewardContext, RewardCalculation, AlgorithmDomainMapping, RLSuiteConfig, ALGORITHM_DOMAIN_MAPPINGS, RLAlgorithmError, RLTrainingError, RLPredictionError, RLConfigError, } from './interfaces';
|
|
15
15
|
export { SONA, SONAIndex, SONAOptimizer, SONAPatternCache, createSONA, createDomainSONA, } from './sona';
|
|
16
16
|
export type { SONAPattern, SONAPatternType, SONAAdaptationResult, SONAStats, SONAConfig, } from './sona';
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Agentic QE v3 - RL Suite Main Export
|
|
3
3
|
*
|
|
4
|
-
* Complete RL Suite for Quality Engineering with
|
|
4
|
+
* Complete RL Suite for Quality Engineering with 10 algorithms.
|
|
5
5
|
* Per ADR-040, provides reinforcement learning for QE decision-making.
|
|
6
6
|
*/
|
|
7
7
|
// ============================================================================
|
|
@@ -15,7 +15,7 @@ export { BaseRLAlgorithm } from './base-algorithm';
|
|
|
15
15
|
// ============================================================================
|
|
16
16
|
// Algorithms
|
|
17
17
|
// ============================================================================
|
|
18
|
-
export { QLearningAlgorithm, DecisionTransformerAlgorithm, SARSAAlgorithm, ActorCriticAlgorithm, PolicyGradientAlgorithm, DQNAlgorithm, PPOAlgorithm, A2CAlgorithm, DDPGAlgorithm, } from './algorithms';
|
|
18
|
+
export { QLearningAlgorithm, DecisionTransformerAlgorithm, SARSAAlgorithm, ActorCriticAlgorithm, PolicyGradientAlgorithm, DQNAlgorithm, PPOAlgorithm, A2CAlgorithm, DDPGAlgorithm, EpropAlgorithm, createEpropAlgorithm, } from './algorithms';
|
|
19
19
|
// ============================================================================
|
|
20
20
|
// SONA (Self-Optimizing Neural Architecture)
|
|
21
21
|
// ============================================================================
|