agentic-qe 3.8.11 → 3.8.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/.claude/skills/qe-code-intelligence/SKILL.md +29 -20
  2. package/.claude/skills/qe-code-intelligence/evals/qe-code-intelligence.yaml +3 -3
  3. package/.claude/skills/qe-quality-assessment/SKILL.md +1 -1
  4. package/.claude/skills/qe-test-generation/SKILL.md +1 -1
  5. package/.claude/skills/skills-manifest.json +1 -1
  6. package/CHANGELOG.md +45 -0
  7. package/README.md +9 -0
  8. package/assets/skills/qe-code-intelligence/SKILL.md +29 -20
  9. package/assets/skills/qe-code-intelligence/evals/qe-code-intelligence.yaml +3 -3
  10. package/assets/skills/qe-quality-assessment/SKILL.md +1 -1
  11. package/assets/skills/qe-test-generation/SKILL.md +1 -1
  12. package/dist/cli/bundle.js +1162 -1046
  13. package/dist/cli/commands/code.js +149 -11
  14. package/dist/cli/commands/init.js +3 -2
  15. package/dist/cli/commands/ruvector-commands.js +17 -0
  16. package/dist/cli/handlers/init-handler.d.ts +1 -0
  17. package/dist/cli/handlers/init-handler.js +15 -10
  18. package/dist/cli/utils/file-discovery.d.ts +1 -0
  19. package/dist/cli/utils/file-discovery.js +1 -1
  20. package/dist/domains/code-intelligence/coordinator-gnn.d.ts +21 -0
  21. package/dist/domains/code-intelligence/coordinator-gnn.js +102 -0
  22. package/dist/domains/contract-testing/coordinator.js +13 -0
  23. package/dist/domains/coverage-analysis/coordinator.js +5 -0
  24. package/dist/domains/defect-intelligence/coordinator.d.ts +1 -0
  25. package/dist/domains/defect-intelligence/coordinator.js +43 -0
  26. package/dist/domains/quality-assessment/coordinator.js +26 -0
  27. package/dist/domains/test-generation/coordinator.js +14 -0
  28. package/dist/init/orchestrator.js +1 -0
  29. package/dist/init/phases/08-mcp.js +4 -4
  30. package/dist/init/phases/phase-interface.d.ts +3 -1
  31. package/dist/integrations/agentic-flow/reasoning-bank/experience-replay.d.ts +11 -0
  32. package/dist/integrations/agentic-flow/reasoning-bank/experience-replay.js +44 -1
  33. package/dist/integrations/rl-suite/algorithms/eprop.d.ts +79 -0
  34. package/dist/integrations/rl-suite/algorithms/eprop.js +284 -0
  35. package/dist/integrations/rl-suite/algorithms/index.d.ts +2 -1
  36. package/dist/integrations/rl-suite/algorithms/index.js +2 -1
  37. package/dist/integrations/rl-suite/index.d.ts +2 -2
  38. package/dist/integrations/rl-suite/index.js +2 -2
  39. package/dist/integrations/rl-suite/interfaces.d.ts +3 -3
  40. package/dist/integrations/rl-suite/interfaces.js +1 -1
  41. package/dist/integrations/rl-suite/orchestrator.d.ts +2 -2
  42. package/dist/integrations/rl-suite/orchestrator.js +3 -2
  43. package/dist/integrations/rl-suite/reward-signals.d.ts +1 -1
  44. package/dist/integrations/rl-suite/reward-signals.js +1 -1
  45. package/dist/integrations/ruvector/coherence-gate-cohomology.d.ts +41 -0
  46. package/dist/integrations/ruvector/coherence-gate-cohomology.js +47 -0
  47. package/dist/integrations/ruvector/coherence-gate-core.d.ts +200 -0
  48. package/dist/integrations/ruvector/coherence-gate-core.js +294 -0
  49. package/dist/integrations/ruvector/coherence-gate-energy.d.ts +136 -0
  50. package/dist/integrations/ruvector/coherence-gate-energy.js +373 -0
  51. package/dist/integrations/ruvector/coherence-gate-vector.d.ts +38 -0
  52. package/dist/integrations/ruvector/coherence-gate-vector.js +76 -0
  53. package/dist/integrations/ruvector/coherence-gate.d.ts +10 -311
  54. package/dist/integrations/ruvector/coherence-gate.js +10 -652
  55. package/dist/integrations/ruvector/cold-tier-trainer.d.ts +103 -0
  56. package/dist/integrations/ruvector/cold-tier-trainer.js +377 -0
  57. package/dist/integrations/ruvector/cusum-detector.d.ts +70 -0
  58. package/dist/integrations/ruvector/cusum-detector.js +142 -0
  59. package/dist/integrations/ruvector/delta-tracker.d.ts +122 -0
  60. package/dist/integrations/ruvector/delta-tracker.js +311 -0
  61. package/dist/integrations/ruvector/domain-transfer.d.ts +79 -1
  62. package/dist/integrations/ruvector/domain-transfer.js +158 -2
  63. package/dist/integrations/ruvector/eprop-learner.d.ts +135 -0
  64. package/dist/integrations/ruvector/eprop-learner.js +351 -0
  65. package/dist/integrations/ruvector/feature-flags.d.ts +177 -0
  66. package/dist/integrations/ruvector/feature-flags.js +145 -0
  67. package/dist/integrations/ruvector/graphmae-encoder.d.ts +88 -0
  68. package/dist/integrations/ruvector/graphmae-encoder.js +360 -0
  69. package/dist/integrations/ruvector/hdc-fingerprint.d.ts +127 -0
  70. package/dist/integrations/ruvector/hdc-fingerprint.js +222 -0
  71. package/dist/integrations/ruvector/hopfield-memory.d.ts +97 -0
  72. package/dist/integrations/ruvector/hopfield-memory.js +238 -0
  73. package/dist/integrations/ruvector/index.d.ts +13 -2
  74. package/dist/integrations/ruvector/index.js +46 -2
  75. package/dist/integrations/ruvector/mincut-wrapper.d.ts +7 -0
  76. package/dist/integrations/ruvector/mincut-wrapper.js +54 -2
  77. package/dist/integrations/ruvector/reservoir-replay.d.ts +172 -0
  78. package/dist/integrations/ruvector/reservoir-replay.js +335 -0
  79. package/dist/integrations/ruvector/solver-adapter.d.ts +93 -0
  80. package/dist/integrations/ruvector/solver-adapter.js +299 -0
  81. package/dist/integrations/ruvector/sona-persistence.d.ts +33 -0
  82. package/dist/integrations/ruvector/sona-persistence.js +47 -0
  83. package/dist/integrations/ruvector/spectral-sparsifier.d.ts +154 -0
  84. package/dist/integrations/ruvector/spectral-sparsifier.js +389 -0
  85. package/dist/integrations/ruvector/temporal-causality.d.ts +63 -0
  86. package/dist/integrations/ruvector/temporal-causality.js +317 -0
  87. package/dist/learning/pattern-promotion.d.ts +63 -0
  88. package/dist/learning/pattern-promotion.js +235 -1
  89. package/dist/learning/pattern-store.d.ts +2 -0
  90. package/dist/learning/pattern-store.js +187 -1
  91. package/dist/learning/sqlite-persistence.d.ts +2 -0
  92. package/dist/learning/sqlite-persistence.js +4 -0
  93. package/dist/mcp/bundle.js +506 -427
  94. package/dist/shared/utils/index.d.ts +1 -0
  95. package/dist/shared/utils/index.js +1 -0
  96. package/dist/shared/utils/xorshift128.d.ts +24 -0
  97. package/dist/shared/utils/xorshift128.js +50 -0
  98. package/package.json +1 -1
@@ -28,6 +28,8 @@ import * as ClaimVerifierHelpers from './coordinator-claim-verifier.js';
28
28
  import * as GateEvalHelpers from './coordinator-gate-evaluation.js';
29
29
  // ADR-070: Witness Chain audit trail
30
30
  import { getWitnessChain } from '../../audit/witness-chain.js';
31
+ // Three-loop feature flag for instantAdapt protocol
32
+ import { isSONAThreeLoopEnabled } from '../../integrations/ruvector/feature-flags.js';
31
33
  // CQ-002: Base domain coordinator
32
34
  import { BaseDomainCoordinator, } from '../base-domain-coordinator.js';
33
35
  const DEFAULT_CONFIG = {
@@ -227,6 +229,20 @@ export class QualityAssessmentCoordinator extends BaseDomainCoordinator {
227
229
  }
228
230
  // Success path
229
231
  this.completeWorkflow(workflowId);
232
+ // Three-loop protocol: instantAdapt must precede recordOutcome
233
+ if (isSONAThreeLoopEnabled() && this.qesona?.isThreeLoopEnabled()) {
234
+ const m = effectiveRequest.metrics;
235
+ this.qesona.instantAdapt([
236
+ m.coverage / 100,
237
+ m.testsPassing / 100,
238
+ m.criticalBugs / 10,
239
+ m.codeSmells / 100,
240
+ m.securityVulnerabilities / 10,
241
+ m.technicalDebt / 100,
242
+ m.duplications / 100,
243
+ finalResult.overallScore / 100,
244
+ ]);
245
+ }
230
246
  // Store quality pattern in SONA if enabled
231
247
  if (this.config.enableSONAPatternLearning && this.qesona) {
232
248
  await this.storeQualityPattern(effectiveRequest, finalResult);
@@ -322,6 +338,16 @@ export class QualityAssessmentCoordinator extends BaseDomainCoordinator {
322
338
  result.value = enhanced;
323
339
  }
324
340
  }
341
+ // Three-loop protocol: instantAdapt must precede recordOutcome
342
+ if (isSONAThreeLoopEnabled() && this.qesona?.isThreeLoopEnabled()) {
343
+ const score = result.value.score;
344
+ this.qesona.instantAdapt([
345
+ score.overall / 100,
346
+ result.value.metrics.length / 20,
347
+ result.value.trends.length / 10,
348
+ result.value.recommendations.length / 10,
349
+ ]);
350
+ }
325
351
  // Store quality pattern in SONA
326
352
  if (this.config.enableSONAPatternLearning && this.qesona) {
327
353
  await this.storeQualityAnalysisPattern(request, result.value);
@@ -24,6 +24,8 @@ import { createDomainFinding, } from '../../coordination/consensus/domain-findin
24
24
  import { createPersistentSONAEngine, } from '../../integrations/ruvector/sona-persistence.js';
25
25
  import { createQEFlashAttention, } from '../../integrations/ruvector/wrappers.js';
26
26
  import { DecisionTransformerAlgorithm, } from '../../integrations/rl-suite/algorithms/decision-transformer.js';
27
+ // Three-loop feature flag for instantAdapt protocol
28
+ import { isSONAThreeLoopEnabled } from '../../integrations/ruvector/feature-flags.js';
27
29
  // Coherence Gate Integration (ADR-052)
28
30
  import { createTestGenerationCoherenceGate, } from './services/coherence-gate-service.js';
29
31
  const DEFAULT_CONFIG = {
@@ -307,6 +309,18 @@ export class TestGenerationCoordinator extends BaseDomainCoordinator {
307
309
  await this.publishTestGenerated(test, request.framework ?? 'vitest');
308
310
  }
309
311
  }
312
+ // Three-loop protocol: instantAdapt must precede recordOutcome
313
+ if (isSONAThreeLoopEnabled() && this.qesona?.isThreeLoopEnabled()) {
314
+ const tests = result.value;
315
+ this.qesona.instantAdapt([
316
+ tests.tests.length / 20,
317
+ tests.coverageEstimate / 100,
318
+ tests.patternsUsed.length / 10,
319
+ request.sourceFiles.length / 20,
320
+ (request.coverageTarget ?? 80) / 100,
321
+ tests.tests.filter(t => t.type === 'unit').length / 20,
322
+ ]);
323
+ }
310
324
  // Learn from successful generation using QESONA
311
325
  if (this.config.enableQESONA && this.qesona) {
312
326
  await this.storeTestGenerationPattern(result.value, request);
@@ -47,6 +47,7 @@ export class ModularInitOrchestrator {
47
47
  n8nApiConfig: options.n8nApiConfig,
48
48
  wizardAnswers: options.wizardAnswers,
49
49
  noGovernance: options.noGovernance,
50
+ noMcp: options.noMcp,
50
51
  },
51
52
  config: {},
52
53
  enhancements: {
@@ -26,10 +26,10 @@ export class MCPPhase extends BasePhase {
26
26
  requiresPhases = ['configuration', 'database'];
27
27
  async run(context) {
28
28
  const { projectRoot } = context;
29
- // MCP is opt-in: skip unless --with-mcp is passed
30
- if (!context.options.withMcp) {
31
- context.services.log(' MCP: skipped (opt-in — use --with-mcp to enable)');
32
- context.services.log(' All QE commands available via CLI: aqe memory, aqe test, aqe coverage, etc.');
29
+ // MCP is enabled by default — skip only with --no-mcp
30
+ if (context.options.noMcp) {
31
+ context.services.log(' MCP: skipped (--no-mcp)');
32
+ context.services.log(' CLI commands available: aqe memory, aqe test, aqe coverage, etc.');
33
33
  return {
34
34
  configured: false,
35
35
  mcpPath: '',
@@ -106,7 +106,9 @@ export interface InitOptions {
106
106
  withContinueDev?: boolean;
107
107
  /** Install all coding agent platform configurations */
108
108
  withAllPlatforms?: boolean;
109
- /** Install MCP server config (opt-in CLI commands work without MCP) */
109
+ /** Skip MCP server config (MCP is enabled by default) */
110
+ noMcp?: boolean;
111
+ /** @deprecated Use default behavior instead — MCP is now enabled by default */
110
112
  withMcp?: boolean;
111
113
  }
112
114
  /**
@@ -117,6 +117,7 @@ export declare class ExperienceReplay {
117
117
  private experienceIdToHnswId;
118
118
  private nextHnswId;
119
119
  private recentExperiences;
120
+ private reservoirBuffer;
120
121
  private stats;
121
122
  constructor(config?: Partial<ExperienceReplayConfig>);
122
123
  /**
@@ -197,6 +198,16 @@ export declare class ExperienceReplay {
197
198
  hnswIndexSize: number;
198
199
  recentBufferSize: number;
199
200
  };
201
+ /**
202
+ * Get reservoir buffer stats (R10, ADR-087).
203
+ * Returns null if the reservoir is not enabled.
204
+ */
205
+ getReservoirStats(): {
206
+ size: number;
207
+ totalAdmitted: number;
208
+ totalRejected: number;
209
+ tierCounts: Record<string, number>;
210
+ } | null;
200
211
  /**
201
212
  * Dispose and cleanup
202
213
  */
@@ -19,6 +19,8 @@ import { CircularBuffer } from '../../../shared/utils/circular-buffer.js';
19
19
  import { HNSWEmbeddingIndex } from '../../embeddings/index/HNSWIndex.js';
20
20
  import { safeJsonParse } from '../../../shared/safe-json.js';
21
21
  import { ExperienceConsolidator } from '../../../learning/experience-consolidation.js';
22
+ import { getRuVectorFeatureFlags } from '../../ruvector/feature-flags.js';
23
+ import { ReservoirReplayBuffer } from '../../ruvector/reservoir-replay.js';
22
24
  const DEFAULT_CONFIG = {
23
25
  minQualityThreshold: 0.6,
24
26
  maxExperiencesPerDomain: 500,
@@ -64,6 +66,8 @@ export class ExperienceReplay {
64
66
  nextHnswId = 0;
65
67
  // Recent experiences buffer
66
68
  recentExperiences;
69
+ // Reservoir replay buffer (R10, ADR-087) — coherence-gated admission
70
+ reservoirBuffer = null;
67
71
  // Statistics
68
72
  stats = {
69
73
  experiencesStored: 0,
@@ -99,6 +103,11 @@ export class ExperienceReplay {
99
103
  this.prepareStatements();
100
104
  // Load embeddings into memory index
101
105
  await this.loadEmbeddingIndex();
106
+ // Initialize reservoir buffer if feature flag is enabled (R10, ADR-087)
107
+ if (getRuVectorFeatureFlags().useReservoirReplay) {
108
+ this.reservoirBuffer = new ReservoirReplayBuffer({ capacity: 10_000 });
109
+ console.log('[ExperienceReplay] Reservoir replay buffer enabled');
110
+ }
102
111
  this.initialized = true;
103
112
  console.log('[ExperienceReplay] Initialized');
104
113
  }
@@ -333,6 +342,10 @@ export class ExperienceReplay {
333
342
  }
334
343
  // Add to recent buffer
335
344
  this.recentExperiences.push(experience);
345
+ // Admit to reservoir buffer with coherence gating (R10, ADR-087)
346
+ if (this.reservoirBuffer) {
347
+ this.reservoirBuffer.admit(experience.id, experience, experience.qualityScore);
348
+ }
336
349
  this.stats.experiencesStored++;
337
350
  // Auto-consolidate if enabled (replaces destructive auto-prune)
338
351
  if (this.config.autoPrune) {
@@ -349,8 +362,23 @@ export class ExperienceReplay {
349
362
  */
350
363
  async getGuidance(task, domain) {
351
364
  this.ensureInitialized();
352
- // Find similar experiences
365
+ // Find similar experiences via HNSW
353
366
  const similar = await this.findSimilarExperiences(task, domain);
367
+ // Blend in high-coherence experiences from reservoir buffer (R10, ADR-087)
368
+ if (this.reservoirBuffer && this.reservoirBuffer.size() > 0) {
369
+ const reservoirSamples = this.reservoirBuffer.sample(Math.max(2, Math.floor(this.config.topK / 2)), 0.6);
370
+ for (const entry of reservoirSamples) {
371
+ const exp = entry.data;
372
+ // Skip if already in HNSW results
373
+ if (similar.some(s => s.experience.id === exp.id))
374
+ continue;
375
+ // Skip if domain filter doesn't match
376
+ if (domain && exp.domain !== domain)
377
+ continue;
378
+ // Add with a coherence-based similarity score
379
+ similar.push({ experience: exp, similarity: entry.coherenceScore * 0.8 });
380
+ }
381
+ }
354
382
  if (similar.length === 0) {
355
383
  return null;
356
384
  }
@@ -564,6 +592,21 @@ export class ExperienceReplay {
564
592
  recentBufferSize: this.recentExperiences.length,
565
593
  };
566
594
  }
595
+ /**
596
+ * Get reservoir buffer stats (R10, ADR-087).
597
+ * Returns null if the reservoir is not enabled.
598
+ */
599
+ getReservoirStats() {
600
+ if (!this.reservoirBuffer)
601
+ return null;
602
+ const stats = this.reservoirBuffer.getStats();
603
+ return {
604
+ size: stats.size,
605
+ totalAdmitted: stats.totalAdmitted,
606
+ totalRejected: stats.totalRejected,
607
+ tierCounts: stats.tierCounts,
608
+ };
609
+ }
567
610
  /**
568
611
  * Dispose and cleanup
569
612
  */
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Agentic QE v3 - E-prop Online Learning Algorithm (ADR-087 Milestone 4)
3
+ *
4
+ * RL algorithm #10: Eligibility propagation for online learning.
5
+ * Uses 12 bytes/synapse with no backprop required.
6
+ *
7
+ * Application: Online adaptive test strategies — learns in real time
8
+ * from test execution feedback without storing replay buffers.
9
+ */
10
+ import { BaseRLAlgorithm } from '../base-algorithm';
11
+ import type { RLState, RLPrediction, RLTrainingStats, RLExperience, RLAlgorithmInfo, RewardSignal } from '../interfaces';
12
+ interface EpropAlgorithmConfig {
13
+ /** Number of state features */
14
+ stateSize: number;
15
+ /** Hidden layer size for the E-prop network */
16
+ hiddenSize: number;
17
+ /** Number of discrete actions */
18
+ actionSize: number;
19
+ /** E-prop learning rate */
20
+ epropLearningRate: number;
21
+ /** Eligibility trace decay */
22
+ eligibilityDecay: number;
23
+ /** Use feedback alignment */
24
+ feedbackAlignment: boolean;
25
+ }
26
+ /**
27
+ * E-prop online learning algorithm for adaptive test strategies.
28
+ *
29
+ * Unlike batch RL algorithms, E-prop learns from each experience
30
+ * immediately using eligibility traces — no replay buffer needed.
31
+ *
32
+ * Key advantages:
33
+ * - Online: updates weights after every step
34
+ * - Memory-efficient: 12 bytes/synapse (vs kilobytes for replay-based)
35
+ * - Biologically plausible: no weight transport (feedback alignment)
36
+ * - Fast: no backward pass through the full network
37
+ */
38
+ export declare class EpropAlgorithm extends BaseRLAlgorithm {
39
+ private network;
40
+ private epropConfig;
41
+ private actions;
42
+ constructor(config?: Partial<EpropAlgorithmConfig>, rewardSignals?: RewardSignal[]);
43
+ /**
44
+ * Predict best action for a given state.
45
+ * Runs the E-prop network forward pass and selects the action
46
+ * with highest output activation.
47
+ */
48
+ predict(state: RLState): Promise<RLPrediction>;
49
+ /**
50
+ * Train with a single experience — the core online learning step.
51
+ *
52
+ * Unlike batch algorithms, E-prop processes each experience immediately:
53
+ * 1. Forward pass (already done during predict)
54
+ * 2. Online update: dw = eta * eligibility * reward
55
+ */
56
+ train(experience: RLExperience): Promise<RLTrainingStats>;
57
+ /**
58
+ * Core training logic for batch experiences.
59
+ * E-prop processes each experience online (sequentially).
60
+ */
61
+ protected trainCore(experiences: RLExperience[]): Promise<RLTrainingStats>;
62
+ /**
63
+ * Get algorithm-specific info.
64
+ */
65
+ protected getAlgorithmInfo(): RLAlgorithmInfo;
66
+ protected exportCustomData(): Promise<Record<string, unknown>>;
67
+ protected importCustomData(data: Record<string, unknown>): Promise<void>;
68
+ protected resetAlgorithm(): Promise<void>;
69
+ private prepareState;
70
+ private argmax;
71
+ private calculateConfidence;
72
+ private generateReasoning;
73
+ }
74
+ /**
75
+ * Create a new E-prop RL algorithm instance.
76
+ */
77
+ export declare function createEpropAlgorithm(config?: Partial<EpropAlgorithmConfig>, rewardSignals?: RewardSignal[]): EpropAlgorithm;
78
+ export {};
79
+ //# sourceMappingURL=eprop.d.ts.map
@@ -0,0 +1,284 @@
1
+ /**
2
+ * Agentic QE v3 - E-prop Online Learning Algorithm (ADR-087 Milestone 4)
3
+ *
4
+ * RL algorithm #10: Eligibility propagation for online learning.
5
+ * Uses 12 bytes/synapse with no backprop required.
6
+ *
7
+ * Application: Online adaptive test strategies — learns in real time
8
+ * from test execution feedback without storing replay buffers.
9
+ */
10
+ import { BaseRLAlgorithm } from '../base-algorithm';
11
+ import { TEST_EXECUTION_REWARDS } from '../interfaces';
12
+ import { createEpropNetwork, } from '../../ruvector/eprop-learner.js';
13
+ import { getRuVectorFeatureFlags } from '../../ruvector/feature-flags.js';
14
+ const DEFAULT_EPROP_ALGORITHM_CONFIG = {
15
+ stateSize: 10,
16
+ hiddenSize: 50,
17
+ actionSize: 4,
18
+ epropLearningRate: 0.01,
19
+ eligibilityDecay: 0.95,
20
+ feedbackAlignment: true,
21
+ };
22
+ // ============================================================================
23
+ // E-prop RL Algorithm
24
+ // ============================================================================
25
+ /**
26
+ * E-prop online learning algorithm for adaptive test strategies.
27
+ *
28
+ * Unlike batch RL algorithms, E-prop learns from each experience
29
+ * immediately using eligibility traces — no replay buffer needed.
30
+ *
31
+ * Key advantages:
32
+ * - Online: updates weights after every step
33
+ * - Memory-efficient: 12 bytes/synapse (vs kilobytes for replay-based)
34
+ * - Biologically plausible: no weight transport (feedback alignment)
35
+ * - Fast: no backward pass through the full network
36
+ */
37
+ export class EpropAlgorithm extends BaseRLAlgorithm {
38
+ network;
39
+ epropConfig;
40
+ actions;
41
+ constructor(config = {}, rewardSignals = TEST_EXECUTION_REWARDS) {
42
+ super('eprop', 'online-learning', {}, rewardSignals);
43
+ this.epropConfig = { ...DEFAULT_EPROP_ALGORITHM_CONFIG, ...config };
44
+ // Create the underlying E-prop network
45
+ this.network = createEpropNetwork({
46
+ inputSize: this.epropConfig.stateSize,
47
+ hiddenSize: this.epropConfig.hiddenSize,
48
+ outputSize: this.epropConfig.actionSize,
49
+ learningRate: this.epropConfig.epropLearningRate,
50
+ eligibilityDecay: this.epropConfig.eligibilityDecay,
51
+ feedbackAlignment: this.epropConfig.feedbackAlignment,
52
+ });
53
+ // Default action space for test execution
54
+ this.actions = [
55
+ { type: 'execute', value: 'standard' },
56
+ { type: 'prioritize', value: 'high' },
57
+ { type: 'retry', value: 'adaptive' },
58
+ { type: 'skip', value: 0 },
59
+ ];
60
+ // Trim or pad action space to match config
61
+ while (this.actions.length < this.epropConfig.actionSize) {
62
+ this.actions.push({ type: 'explore', value: this.actions.length });
63
+ }
64
+ this.actions = this.actions.slice(0, this.epropConfig.actionSize);
65
+ }
66
+ // ==========================================================================
67
+ // RLAlgorithm Interface
68
+ // ==========================================================================
69
+ /**
70
+ * Predict best action for a given state.
71
+ * Runs the E-prop network forward pass and selects the action
72
+ * with highest output activation.
73
+ */
74
+ async predict(state) {
75
+ if (!getRuVectorFeatureFlags().useEpropOnlineLearning) {
76
+ // Feature flag disabled — return default action with zero confidence
77
+ return {
78
+ action: { type: this.actions[0]?.type ?? 'test-action', value: this.actions[0]?.value ?? 'default' },
79
+ confidence: 0,
80
+ };
81
+ }
82
+ if (!this.initialized) {
83
+ await this.initialize();
84
+ }
85
+ const stateFeatures = this.prepareState(state);
86
+ const output = this.network.forward(stateFeatures);
87
+ // Select action with highest activation
88
+ const actionIndex = this.argmax(output);
89
+ const action = this.actions[actionIndex];
90
+ const confidence = this.calculateConfidence(output);
91
+ return {
92
+ action: { type: action.type, value: action.value },
93
+ confidence,
94
+ value: output[actionIndex],
95
+ reasoning: this.generateReasoning(action, output[actionIndex], confidence),
96
+ };
97
+ }
98
+ /**
99
+ * Train with a single experience — the core online learning step.
100
+ *
101
+ * Unlike batch algorithms, E-prop processes each experience immediately:
102
+ * 1. Forward pass (already done during predict)
103
+ * 2. Online update: dw = eta * eligibility * reward
104
+ */
105
+ async train(experience) {
106
+ if (!this.initialized) {
107
+ await this.initialize();
108
+ }
109
+ const startTime = Date.now();
110
+ // Run forward pass on the state to update eligibility traces
111
+ const stateFeatures = this.prepareState(experience.state);
112
+ this.network.forward(stateFeatures);
113
+ // Apply online update with reward signal
114
+ this.network.updateOnline(experience.reward);
115
+ // Reset traces if episode ended
116
+ if (experience.done) {
117
+ this.network.resetTraces();
118
+ }
119
+ // Track stats
120
+ this.episodeCount++;
121
+ this.totalReward += experience.reward;
122
+ this.rewardHistory.push(experience.reward);
123
+ if (this.rewardHistory.length > 1000) {
124
+ this.rewardHistory.shift();
125
+ }
126
+ const avgReward = this.rewardHistory.reduce((a, b) => a + b, 0) / this.rewardHistory.length;
127
+ this.stats = {
128
+ episode: this.episodeCount,
129
+ totalReward: this.totalReward,
130
+ averageReward: avgReward,
131
+ trainingTimeMs: Date.now() - startTime,
132
+ timestamp: new Date(),
133
+ explorationRate: this.config.explorationRate,
134
+ };
135
+ return this.stats;
136
+ }
137
+ /**
138
+ * Core training logic for batch experiences.
139
+ * E-prop processes each experience online (sequentially).
140
+ */
141
+ async trainCore(experiences) {
142
+ for (const exp of experiences) {
143
+ const stateFeatures = this.prepareState(exp.state);
144
+ this.network.forward(stateFeatures);
145
+ this.network.updateOnline(exp.reward);
146
+ if (exp.done) {
147
+ this.network.resetTraces();
148
+ }
149
+ }
150
+ const avgReward = this.rewardHistory.length > 0
151
+ ? this.rewardHistory.reduce((a, b) => a + b, 0) / this.rewardHistory.length
152
+ : 0;
153
+ return {
154
+ episode: this.episodeCount,
155
+ totalReward: this.totalReward,
156
+ averageReward: avgReward,
157
+ trainingTimeMs: 0,
158
+ timestamp: new Date(),
159
+ };
160
+ }
161
+ /**
162
+ * Get algorithm-specific info.
163
+ */
164
+ getAlgorithmInfo() {
165
+ const networkStats = this.network.getStats();
166
+ return {
167
+ type: 'eprop',
168
+ category: 'online-learning',
169
+ version: '1.0.0',
170
+ description: 'E-prop Online Learning for Adaptive Test Strategies',
171
+ capabilities: [
172
+ 'Online learning (no replay buffer)',
173
+ 'Eligibility trace propagation',
174
+ 'Feedback alignment (no weight transport)',
175
+ '12 bytes/synapse memory budget',
176
+ 'Real-time adaptation to test results',
177
+ ],
178
+ hyperparameters: {
179
+ stateSize: this.epropConfig.stateSize,
180
+ hiddenSize: this.epropConfig.hiddenSize,
181
+ actionSize: this.epropConfig.actionSize,
182
+ learningRate: this.epropConfig.epropLearningRate,
183
+ eligibilityDecay: this.epropConfig.eligibilityDecay,
184
+ feedbackAlignment: String(this.epropConfig.feedbackAlignment),
185
+ synapsCount: networkStats.synapsCount,
186
+ memoryBytes: networkStats.memoryBytes,
187
+ },
188
+ stats: this.stats,
189
+ };
190
+ }
191
+ // ==========================================================================
192
+ // Export / Import
193
+ // ==========================================================================
194
+ async exportCustomData() {
195
+ const weights = this.network.exportWeights();
196
+ return {
197
+ inputHidden: Array.from(weights.inputHidden),
198
+ hiddenOutput: Array.from(weights.hiddenOutput),
199
+ epropConfig: this.epropConfig,
200
+ networkStats: this.network.getStats(),
201
+ };
202
+ }
203
+ async importCustomData(data) {
204
+ if (data.epropConfig) {
205
+ this.epropConfig = { ...this.epropConfig, ...data.epropConfig };
206
+ }
207
+ if (data.inputHidden && data.hiddenOutput) {
208
+ this.network.importWeights({
209
+ inputHidden: new Float32Array(data.inputHidden),
210
+ hiddenOutput: new Float32Array(data.hiddenOutput),
211
+ });
212
+ }
213
+ this.initialized = true;
214
+ }
215
+ async resetAlgorithm() {
216
+ this.network = createEpropNetwork({
217
+ inputSize: this.epropConfig.stateSize,
218
+ hiddenSize: this.epropConfig.hiddenSize,
219
+ outputSize: this.epropConfig.actionSize,
220
+ learningRate: this.epropConfig.epropLearningRate,
221
+ eligibilityDecay: this.epropConfig.eligibilityDecay,
222
+ feedbackAlignment: this.epropConfig.feedbackAlignment,
223
+ });
224
+ }
225
+ // ==========================================================================
226
+ // Private Helpers
227
+ // ==========================================================================
228
+ prepareState(state) {
229
+ const features = state.features.slice(0, this.epropConfig.stateSize);
230
+ // Pad with zeros if needed
231
+ while (features.length < this.epropConfig.stateSize) {
232
+ features.push(0);
233
+ }
234
+ // Normalize to [-1, 1]
235
+ const max = Math.max(...features.map(Math.abs));
236
+ if (max > 0) {
237
+ for (let i = 0; i < features.length; i++) {
238
+ features[i] = features[i] / max;
239
+ }
240
+ }
241
+ return new Float32Array(features);
242
+ }
243
+ argmax(array) {
244
+ let maxIndex = 0;
245
+ let maxValue = array[0];
246
+ for (let i = 1; i < array.length; i++) {
247
+ if (array[i] > maxValue) {
248
+ maxValue = array[i];
249
+ maxIndex = i;
250
+ }
251
+ }
252
+ return maxIndex;
253
+ }
254
+ calculateConfidence(output) {
255
+ const arr = Array.from(output);
256
+ const max = Math.max(...arr);
257
+ const min = Math.min(...arr);
258
+ if (max === min)
259
+ return 0.5;
260
+ const spread = max - min;
261
+ return Math.min(1, 0.3 + spread * 2);
262
+ }
263
+ generateReasoning(action, value, confidence) {
264
+ const stats = this.network.getStats();
265
+ if (stats.totalSteps < 10) {
266
+ return `E-prop exploration phase (step ${stats.totalSteps}): ${action.type} action`;
267
+ }
268
+ if (confidence > 0.8) {
269
+ return (`High-confidence E-prop decision (${confidence.toFixed(2)}): ${action.type} ` +
270
+ `with value ${value.toFixed(3)} after ${stats.totalSteps} online updates`);
271
+ }
272
+ return `E-prop online learning: ${action.type} with confidence ${confidence.toFixed(2)}`;
273
+ }
274
+ }
275
+ // ============================================================================
276
+ // Factory
277
+ // ============================================================================
278
+ /**
279
+ * Create a new E-prop RL algorithm instance.
280
+ */
281
+ export function createEpropAlgorithm(config, rewardSignals) {
282
+ return new EpropAlgorithm(config, rewardSignals);
283
+ }
284
+ //# sourceMappingURL=eprop.js.map
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Agentic QE v3 - RL Algorithms Index
3
3
  *
4
- * Exports all 9 RL algorithms for QE applications.
4
+ * Exports all 10 RL algorithms for QE applications.
5
5
  */
6
6
  export { QLearningAlgorithm } from './q-learning';
7
7
  export { DecisionTransformerAlgorithm } from './decision-transformer';
@@ -12,4 +12,5 @@ export { DQNAlgorithm } from './dqn';
12
12
  export { PPOAlgorithm } from './ppo';
13
13
  export { A2CAlgorithm } from './a2c';
14
14
  export { DDPGAlgorithm } from './ddpg';
15
+ export { EpropAlgorithm, createEpropAlgorithm } from './eprop';
15
16
  //# sourceMappingURL=index.d.ts.map
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Agentic QE v3 - RL Algorithms Index
3
3
  *
4
- * Exports all 9 RL algorithms for QE applications.
4
+ * Exports all 10 RL algorithms for QE applications.
5
5
  */
6
6
  export { QLearningAlgorithm } from './q-learning';
7
7
  export { DecisionTransformerAlgorithm } from './decision-transformer';
@@ -12,4 +12,5 @@ export { DQNAlgorithm } from './dqn';
12
12
  export { PPOAlgorithm } from './ppo';
13
13
  export { A2CAlgorithm } from './a2c';
14
14
  export { DDPGAlgorithm } from './ddpg';
15
+ export { EpropAlgorithm, createEpropAlgorithm } from './eprop';
15
16
  //# sourceMappingURL=index.js.map
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Agentic QE v3 - RL Suite Main Export
3
3
  *
4
- * Complete RL Suite for Quality Engineering with 9 algorithms.
4
+ * Complete RL Suite for Quality Engineering with 10 algorithms.
5
5
  * Per ADR-040, provides reinforcement learning for QE decision-making.
6
6
  */
7
7
  import type { DomainName } from '../../shared/types';
@@ -10,7 +10,7 @@ import type { QERLSuite } from './orchestrator';
10
10
  export { QERLSuite, createQERLSuite } from './orchestrator';
11
11
  export type { RLSuiteStats } from './orchestrator';
12
12
  export { BaseRLAlgorithm } from './base-algorithm';
13
- export { QLearningAlgorithm, DecisionTransformerAlgorithm, SARSAAlgorithm, ActorCriticAlgorithm, PolicyGradientAlgorithm, DQNAlgorithm, PPOAlgorithm, A2CAlgorithm, DDPGAlgorithm, } from './algorithms';
13
+ export { QLearningAlgorithm, DecisionTransformerAlgorithm, SARSAAlgorithm, ActorCriticAlgorithm, PolicyGradientAlgorithm, DQNAlgorithm, PPOAlgorithm, A2CAlgorithm, DDPGAlgorithm, EpropAlgorithm, createEpropAlgorithm, } from './algorithms';
14
14
  export type { RLAlgorithmType, RLAlgorithmCategory, QEDomainApplication, RLState, RLAction, RLExperience, RLPrediction, RLTrainingStats, RLTrainingConfig, RLAlgorithmInfo, TestExecutionState, TestExecutionAction, CoverageAnalysisState, CoverageOptimizationAction, QualityGateState, QualityGateAction, ResourceAllocationState, ResourceAllocationAction, RewardSignal, RewardContext, RewardCalculation, AlgorithmDomainMapping, RLSuiteConfig, ALGORITHM_DOMAIN_MAPPINGS, RLAlgorithmError, RLTrainingError, RLPredictionError, RLConfigError, } from './interfaces';
15
15
  export { SONA, SONAIndex, SONAOptimizer, SONAPatternCache, createSONA, createDomainSONA, } from './sona';
16
16
  export type { SONAPattern, SONAPatternType, SONAAdaptationResult, SONAStats, SONAConfig, } from './sona';
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Agentic QE v3 - RL Suite Main Export
3
3
  *
4
- * Complete RL Suite for Quality Engineering with 9 algorithms.
4
+ * Complete RL Suite for Quality Engineering with 10 algorithms.
5
5
  * Per ADR-040, provides reinforcement learning for QE decision-making.
6
6
  */
7
7
  // ============================================================================
@@ -15,7 +15,7 @@ export { BaseRLAlgorithm } from './base-algorithm';
15
15
  // ============================================================================
16
16
  // Algorithms
17
17
  // ============================================================================
18
- export { QLearningAlgorithm, DecisionTransformerAlgorithm, SARSAAlgorithm, ActorCriticAlgorithm, PolicyGradientAlgorithm, DQNAlgorithm, PPOAlgorithm, A2CAlgorithm, DDPGAlgorithm, } from './algorithms';
18
+ export { QLearningAlgorithm, DecisionTransformerAlgorithm, SARSAAlgorithm, ActorCriticAlgorithm, PolicyGradientAlgorithm, DQNAlgorithm, PPOAlgorithm, A2CAlgorithm, DDPGAlgorithm, EpropAlgorithm, createEpropAlgorithm, } from './algorithms';
19
19
  // ============================================================================
20
20
  // SONA (Self-Optimizing Neural Architecture)
21
21
  // ============================================================================