agentic-qe 3.8.9 → 3.8.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/.claude/skills/skills-manifest.json +1 -1
  2. package/CHANGELOG.md +27 -0
  3. package/dist/cli/bundle.js +1049 -803
  4. package/dist/cli/command-registry.js +5 -1
  5. package/dist/cli/commands/pipeline.d.ts +16 -0
  6. package/dist/cli/commands/pipeline.js +314 -0
  7. package/dist/cli/commands/token-usage.js +24 -1
  8. package/dist/cli/handlers/heartbeat-handler.d.ts +26 -0
  9. package/dist/cli/handlers/heartbeat-handler.js +382 -0
  10. package/dist/cli/handlers/index.d.ts +2 -0
  11. package/dist/cli/handlers/index.js +2 -0
  12. package/dist/cli/handlers/routing-handler.d.ts +22 -0
  13. package/dist/cli/handlers/routing-handler.js +227 -0
  14. package/dist/cli/index.js +2 -0
  15. package/dist/coordination/deterministic-actions.d.ts +36 -0
  16. package/dist/coordination/deterministic-actions.js +257 -0
  17. package/dist/coordination/protocols/defect-investigation.js +3 -3
  18. package/dist/coordination/workflow-orchestrator.d.ts +18 -1
  19. package/dist/coordination/workflow-orchestrator.js +113 -3
  20. package/dist/coordination/workflow-types.d.ts +19 -1
  21. package/dist/coordination/workflow-types.js +3 -0
  22. package/dist/coordination/yaml-pipeline-loader.d.ts +1 -0
  23. package/dist/coordination/yaml-pipeline-loader.js +34 -0
  24. package/dist/domains/coverage-analysis/services/coverage-analyzer.d.ts +6 -0
  25. package/dist/domains/coverage-analysis/services/coverage-analyzer.js +35 -1
  26. package/dist/domains/defect-intelligence/services/defect-predictor.js +16 -6
  27. package/dist/domains/quality-assessment/coordinator.js +8 -1
  28. package/dist/domains/quality-assessment/plugin.js +8 -5
  29. package/dist/domains/quality-assessment/services/quality-analyzer.d.ts +0 -1
  30. package/dist/domains/quality-assessment/services/quality-analyzer.js +30 -17
  31. package/dist/domains/test-execution/interfaces.d.ts +11 -0
  32. package/dist/domains/test-execution/services/test-executor.d.ts +25 -0
  33. package/dist/domains/test-execution/services/test-executor.js +236 -13
  34. package/dist/mcp/bundle.js +399 -381
  35. package/dist/mcp/handlers/heartbeat-handlers.d.ts +67 -0
  36. package/dist/mcp/handlers/heartbeat-handlers.js +180 -0
  37. package/dist/mcp/handlers/index.d.ts +2 -1
  38. package/dist/mcp/handlers/index.js +5 -1
  39. package/dist/mcp/handlers/task-handlers.d.ts +28 -0
  40. package/dist/mcp/handlers/task-handlers.js +39 -0
  41. package/dist/mcp/protocol-server.js +45 -1
  42. package/dist/mcp/server.js +41 -1
  43. package/dist/optimization/index.d.ts +2 -0
  44. package/dist/optimization/index.js +1 -0
  45. package/dist/optimization/session-cache.d.ts +80 -0
  46. package/dist/optimization/session-cache.js +227 -0
  47. package/dist/optimization/token-optimizer-service.d.ts +10 -0
  48. package/dist/optimization/token-optimizer-service.js +51 -0
  49. package/dist/routing/economic-routing.d.ts +126 -0
  50. package/dist/routing/economic-routing.js +290 -0
  51. package/dist/routing/index.d.ts +2 -0
  52. package/dist/routing/index.js +2 -0
  53. package/dist/routing/routing-feedback.d.ts +29 -0
  54. package/dist/routing/routing-feedback.js +75 -0
  55. package/dist/workers/workers/coverage-tracker.js +25 -30
  56. package/package.json +1 -1
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Agentic QE v3 - Session Operation Cache
3
+ * Imp-15: Session Reuse for Repeated Operations
4
+ *
5
+ * Lightweight fingerprint-based cache for operation results that provides
6
+ * O(1) exact-match lookups before falling back to HNSW similarity search.
7
+ * Supplements (does not replace) the EarlyExitTokenOptimizer.
8
+ *
9
+ * Architecture:
10
+ * - SHA-256 fingerprint from canonicalized (domain + action + input)
11
+ * - In-memory Map for O(1) lookups
12
+ * - Optional SQLite persistence via kv_store (namespace: 'session_cache')
13
+ * - TTL-based expiry, LRU-ish eviction at capacity
14
+ */
15
+ import { createHash } from 'crypto';
16
+ export const DEFAULT_SESSION_CACHE_CONFIG = {
17
+ enabled: true,
18
+ maxEntries: 500,
19
+ ttlMs: 60 * 60 * 1000, // 1 hour
20
+ persistToDb: true,
21
+ };
22
+ // ============================================================================
23
+ // Canonical JSON (deterministic, recursively sorted keys)
24
+ // ============================================================================
25
+ /**
26
+ * Produce a deterministic JSON string with recursively sorted object keys.
27
+ * Ensures identical logical objects always produce the same fingerprint.
28
+ */
29
+ function canonicalStringify(value) {
30
+ if (value === null || value === undefined)
31
+ return JSON.stringify(value);
32
+ if (typeof value !== 'object')
33
+ return JSON.stringify(value);
34
+ if (Array.isArray(value)) {
35
+ return '[' + value.map(v => canonicalStringify(v)).join(',') + ']';
36
+ }
37
+ const obj = value;
38
+ const sortedKeys = Object.keys(obj).sort();
39
+ const pairs = sortedKeys.map(k => JSON.stringify(k) + ':' + canonicalStringify(obj[k]));
40
+ return '{' + pairs.join(',') + '}';
41
+ }
42
+ // ============================================================================
43
+ // Implementation
44
+ // ============================================================================
45
+ export class SessionOperationCache {
46
+ cache = new Map();
47
+ config;
48
+ hits = 0;
49
+ misses = 0;
50
+ constructor(config) {
51
+ this.config = { ...DEFAULT_SESSION_CACHE_CONFIG, ...config };
52
+ }
53
+ /**
54
+ * Compute a deterministic fingerprint from domain + action + input.
55
+ * Uses SHA-256 of the canonicalized JSON (recursively sorted keys), truncated to 16 hex chars.
56
+ */
57
+ computeFingerprint(domain, action, input) {
58
+ const canonical = canonicalStringify({ action, domain, input });
59
+ return createHash('sha256').update(canonical).digest('hex').slice(0, 16);
60
+ }
61
+ /**
62
+ * Look up a cached result by fingerprint.
63
+ * Returns null on miss or expired entry.
64
+ */
65
+ get(fingerprint) {
66
+ if (!this.config.enabled)
67
+ return null;
68
+ const entry = this.cache.get(fingerprint);
69
+ if (!entry) {
70
+ this.misses++;
71
+ return null;
72
+ }
73
+ // Check TTL
74
+ if (Date.now() - entry.cachedAt > this.config.ttlMs) {
75
+ this.cache.delete(fingerprint);
76
+ this.misses++;
77
+ return null;
78
+ }
79
+ entry.hitCount++;
80
+ entry.lastHitAt = Date.now();
81
+ this.hits++;
82
+ return entry;
83
+ }
84
+ /**
85
+ * Store an operation result in the cache.
86
+ */
87
+ set(fingerprint, domain, action, result, estimatedTokens) {
88
+ if (!this.config.enabled)
89
+ return;
90
+ // Evict oldest if at capacity
91
+ if (this.cache.size >= this.config.maxEntries) {
92
+ this.evictOldest();
93
+ }
94
+ const entry = {
95
+ fingerprint,
96
+ domain,
97
+ action,
98
+ result,
99
+ tokensSaved: estimatedTokens,
100
+ cachedAt: Date.now(),
101
+ hitCount: 0,
102
+ lastHitAt: 0,
103
+ };
104
+ this.cache.set(fingerprint, entry);
105
+ // Persist to DB (fire-and-forget, non-blocking)
106
+ if (this.config.persistToDb) {
107
+ this.persistEntry(entry);
108
+ }
109
+ }
110
+ /**
111
+ * Load persisted cache entries from SQLite kv_store.
112
+ * Called on service initialization. Gracefully degrades if DB unavailable.
113
+ */
114
+ loadFromDb() {
115
+ try {
116
+ const db = tryGetDb();
117
+ if (!db)
118
+ return;
119
+ const cutoffMs = Date.now() - this.config.ttlMs;
120
+ const rows = db.prepare(`SELECT key, value FROM kv_store
121
+ WHERE namespace = 'session_cache'
122
+ AND created_at > ?
123
+ ORDER BY created_at DESC LIMIT ?`).all(cutoffMs, this.config.maxEntries);
124
+ for (const row of rows) {
125
+ try {
126
+ const entry = JSON.parse(row.value);
127
+ if (Date.now() - entry.cachedAt <= this.config.ttlMs) {
128
+ this.cache.set(entry.fingerprint, entry);
129
+ }
130
+ }
131
+ catch {
132
+ /* skip corrupt entries */
133
+ }
134
+ }
135
+ }
136
+ catch {
137
+ /* graceful degradation - cache works without persistence */
138
+ }
139
+ }
140
+ /** Get cache statistics */
141
+ getStats() {
142
+ const total = this.hits + this.misses;
143
+ let totalSaved = 0;
144
+ for (const entry of this.cache.values()) {
145
+ totalSaved += entry.tokensSaved * entry.hitCount;
146
+ }
147
+ return {
148
+ size: this.cache.size,
149
+ hits: this.hits,
150
+ misses: this.misses,
151
+ hitRate: total > 0 ? this.hits / total : 0,
152
+ estimatedTokensSaved: totalSaved,
153
+ };
154
+ }
155
+ /** Clear all cache entries and reset counters */
156
+ clear() {
157
+ this.cache.clear();
158
+ this.hits = 0;
159
+ this.misses = 0;
160
+ }
161
+ /** Evict the oldest entry by cachedAt */
162
+ evictOldest() {
163
+ let oldestKey = null;
164
+ let oldestTime = Infinity;
165
+ for (const [key, entry] of this.cache) {
166
+ if (entry.cachedAt < oldestTime) {
167
+ oldestTime = entry.cachedAt;
168
+ oldestKey = key;
169
+ }
170
+ }
171
+ if (oldestKey)
172
+ this.cache.delete(oldestKey);
173
+ }
174
+ /** Persist a single entry to kv_store */
175
+ persistEntry(entry) {
176
+ try {
177
+ const db = tryGetDb();
178
+ if (!db)
179
+ return;
180
+ db.prepare(`INSERT OR REPLACE INTO kv_store (key, namespace, value, created_at)
181
+ VALUES (?, 'session_cache', ?, ?)`).run(`session_cache:${entry.fingerprint}`, JSON.stringify(entry), Date.now());
182
+ }
183
+ catch {
184
+ /* non-critical - cache works without persistence */
185
+ }
186
+ }
187
+ }
188
+ // ============================================================================
189
+ // DB Helper
190
+ // ============================================================================
191
+ /**
192
+ * Attempt to get the unified memory database.
193
+ * Returns null if unavailable (graceful degradation).
194
+ */
195
+ function tryGetDb() {
196
+ try {
197
+ // Dynamic require to avoid circular dependencies at import time
198
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
199
+ const { getUnifiedMemory } = require('../kernel/unified-memory.js');
200
+ const um = getUnifiedMemory();
201
+ if (!um.isInitialized())
202
+ return null;
203
+ return um.getDatabase();
204
+ }
205
+ catch {
206
+ return null;
207
+ }
208
+ }
209
+ // ============================================================================
210
+ // Singleton
211
+ // ============================================================================
212
+ let instance = null;
213
+ export function getSessionCache(config) {
214
+ if (!instance) {
215
+ instance = new SessionOperationCache(config);
216
+ instance.loadFromDb();
217
+ }
218
+ return instance;
219
+ }
220
+ /** Reset the singleton (for testing) */
221
+ export function resetSessionCache() {
222
+ if (instance) {
223
+ instance.clear();
224
+ }
225
+ instance = null;
226
+ }
227
+ //# sourceMappingURL=session-cache.js.map
@@ -6,6 +6,7 @@
6
6
  * integrating with PatternStore and TokenMetricsCollector.
7
7
  */
8
8
  import { EarlyExitConfig, EarlyExitResult, EarlyExitTask, ReuseStats } from './early-exit-token-optimizer.js';
9
+ import { type SessionCacheStats } from './session-cache.js';
9
10
  import type { MemoryBackend } from '../kernel/interfaces.js';
10
11
  import type { QEPattern, QEDomain } from '../learning/qe-patterns.js';
11
12
  /**
@@ -82,6 +83,15 @@ declare class TokenOptimizerServiceImpl {
82
83
  * @returns Multi-line dashboard string
83
84
  */
84
85
  getDashboardSummary(): string;
86
+ /**
87
+ * Imp-15: Store a result in the session cache for future O(1) reuse.
88
+ * Call this after a successful LLM execution to enable exact-match caching.
89
+ */
90
+ cacheOperationResult(domain: string, action: string, input: Record<string, unknown>, result: Record<string, unknown>, estimatedTokens: number): void;
91
+ /**
92
+ * Imp-15: Get session cache statistics (hit rate, tokens saved, cache size).
93
+ */
94
+ getSessionCacheStats(): SessionCacheStats;
85
95
  /**
86
96
  * Reset the service (useful for testing)
87
97
  */
@@ -9,6 +9,7 @@ import { randomUUID } from 'crypto';
9
9
  import { EarlyExitTokenOptimizer, DEFAULT_EARLY_EXIT_CONFIG } from './early-exit-token-optimizer.js';
10
10
  import { createPatternStore } from '../learning/pattern-store.js';
11
11
  import { TokenMetricsCollector, formatDashboardSummary } from '../learning/token-tracker.js';
12
+ import { getSessionCache } from './session-cache.js';
12
13
  const DEFAULT_SERVICE_CONFIG = {
13
14
  enabled: true,
14
15
  earlyExit: DEFAULT_EARLY_EXIT_CONFIG,
@@ -72,6 +73,31 @@ class TokenOptimizerServiceImpl {
72
73
  searchLatencyMs: 0,
73
74
  };
74
75
  }
76
+ // Imp-15: O(1) exact-match check via fingerprint cache BEFORE HNSW search
77
+ try {
78
+ const cache = getSessionCache();
79
+ const fingerprint = cache.computeFingerprint(task.domain ?? 'unknown', task.description, task.context ?? {});
80
+ const cached = cache.get(fingerprint);
81
+ if (cached) {
82
+ TokenMetricsCollector.recordEarlyExit(cached.tokensSaved);
83
+ if (this.config.verbose) {
84
+ console.log(`[TokenOptimizerService] Session cache hit: ${fingerprint.slice(0, 8)}... ` +
85
+ `(saved ${cached.tokensSaved} tokens)`);
86
+ }
87
+ return {
88
+ canExit: true,
89
+ estimatedTokensSaved: cached.tokensSaved,
90
+ confidence: 1.0,
91
+ similarityScore: 1.0,
92
+ reason: 'pattern_reused',
93
+ explanation: `Session cache exact match (fingerprint: ${fingerprint.slice(0, 8)}...)`,
94
+ searchLatencyMs: 0,
95
+ };
96
+ }
97
+ }
98
+ catch {
99
+ // Graceful degradation: if session cache fails, fall through to HNSW
100
+ }
75
101
  const result = await this.optimizer.checkEarlyExit(task);
76
102
  // Record early exit in TokenMetricsCollector
77
103
  if (result.canExit && result.estimatedTokensSaved) {
@@ -166,6 +192,31 @@ class TokenOptimizerServiceImpl {
166
192
  getDashboardSummary() {
167
193
  return formatDashboardSummary();
168
194
  }
195
+ /**
196
+ * Imp-15: Store a result in the session cache for future O(1) reuse.
197
+ * Call this after a successful LLM execution to enable exact-match caching.
198
+ */
199
+ cacheOperationResult(domain, action, input, result, estimatedTokens) {
200
+ try {
201
+ const cache = getSessionCache();
202
+ const fingerprint = cache.computeFingerprint(domain, action, input);
203
+ cache.set(fingerprint, domain, action, result, estimatedTokens);
204
+ }
205
+ catch {
206
+ // Graceful degradation
207
+ }
208
+ }
209
+ /**
210
+ * Imp-15: Get session cache statistics (hit rate, tokens saved, cache size).
211
+ */
212
+ getSessionCacheStats() {
213
+ try {
214
+ return getSessionCache().getStats();
215
+ }
216
+ catch {
217
+ return { size: 0, hits: 0, misses: 0, hitRate: 0, estimatedTokensSaved: 0 };
218
+ }
219
+ }
169
220
  /**
170
221
  * Reset the service (useful for testing)
171
222
  */
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Economic Routing Model — Imp-18 (Issue #334)
3
+ *
4
+ * Quality-weighted cost optimization for the routing system.
5
+ * Scores tiers by quality-per-dollar efficiency, respects budget limits,
6
+ * and produces cost-adjusted rewards so the neural router learns to
7
+ * prefer cost-efficient tiers.
8
+ *
9
+ * @module routing/economic-routing
10
+ */
11
+ import { CostTracker } from '../shared/llm/cost-tracker.js';
12
+ import type { AgentTier } from './routing-config.js';
13
+ import type { RoutingOutcome } from './types.js';
14
+ /**
15
+ * Tier cost estimates (per typical QE task, in USD).
16
+ * Based on average token usage per task type.
17
+ */
18
+ export declare const TIER_COST_ESTIMATES: Record<AgentTier, {
19
+ avgInputTokens: number;
20
+ avgOutputTokens: number;
21
+ costPerTask: number;
22
+ }>;
23
+ export interface EconomicScore {
24
+ tier: AgentTier;
25
+ /** Expected quality (0-1) */
26
+ qualityScore: number;
27
+ /** Estimated cost per task in USD */
28
+ estimatedCostUsd: number;
29
+ /** quality / cost (higher = more efficient). Infinity for zero-cost tiers. */
30
+ qualityPerDollar: number;
31
+ /** Combined score factoring quality + cost (higher = better) */
32
+ economicScore: number;
33
+ }
34
+ export interface EconomicRoutingConfig {
35
+ /** Weight for quality in combined score (0-1, default 0.6) */
36
+ qualityWeight: number;
37
+ /** Weight for cost efficiency in combined score (0-1, default 0.4) */
38
+ costWeight: number;
39
+ /** Budget limit per hour in USD (0 = unlimited) */
40
+ budgetPerHourUsd: number;
41
+ /** Budget limit per day in USD (0 = unlimited) */
42
+ budgetPerDayUsd: number;
43
+ /** Minimum quality threshold -- never route to cheaper tier below this (0-1) */
44
+ minQualityThreshold: number;
45
+ /** Enable economic routing (default: true) */
46
+ enabled: boolean;
47
+ }
48
+ export declare const DEFAULT_ECONOMIC_CONFIG: EconomicRoutingConfig;
49
+ export interface EconomicReport {
50
+ tierEfficiency: EconomicScore[];
51
+ currentHourlyCostUsd: number;
52
+ currentDailyCostUsd: number;
53
+ budgetRemaining: {
54
+ hourly: number | null;
55
+ daily: number | null;
56
+ };
57
+ recommendation: string;
58
+ savingsOpportunity: {
59
+ usd: number;
60
+ description: string;
61
+ } | null;
62
+ }
63
+ export declare class EconomicRoutingModel {
64
+ private config;
65
+ private costTracker;
66
+ private tierQualityEstimates;
67
+ private tierOutcomeCounts;
68
+ constructor(costTracker: CostTracker, config?: Partial<EconomicRoutingConfig>);
69
+ /**
70
+ * Score each tier by quality-per-dollar efficiency.
71
+ * Returns all tiers sorted by economicScore descending.
72
+ */
73
+ scoreTiers(taskComplexity: number): EconomicScore[];
74
+ /**
75
+ * Select the best tier considering quality AND cost.
76
+ * Respects budget limits and minimum quality thresholds.
77
+ */
78
+ selectTier(taskComplexity: number): {
79
+ tier: AgentTier;
80
+ reason: string;
81
+ scores: EconomicScore[];
82
+ };
83
+ /**
84
+ * Check if a tier would exceed the budget.
85
+ */
86
+ wouldExceedBudget(tier: AgentTier): boolean;
87
+ /**
88
+ * Update quality estimates from observed outcomes.
89
+ * Uses EMA to smooth estimates.
90
+ */
91
+ updateFromOutcome(outcome: RoutingOutcome, tier: AgentTier): void;
92
+ /**
93
+ * Get economic efficiency report.
94
+ */
95
+ getEconomicReport(): EconomicReport;
96
+ /**
97
+ * Compute cost-adjusted reward for the neural router.
98
+ * Penalizes expensive tiers that don't deliver proportionally higher quality.
99
+ */
100
+ computeCostAdjustedReward(baseReward: number, tier: AgentTier, qualityScore: number): number;
101
+ /**
102
+ * Serialize quality estimates for persistence.
103
+ */
104
+ serializeEstimates(): Record<string, {
105
+ quality: number;
106
+ count: number;
107
+ }>;
108
+ /**
109
+ * Deserialize quality estimates from persistence.
110
+ */
111
+ deserializeEstimates(data: Record<string, {
112
+ quality: number;
113
+ count: number;
114
+ }>): void;
115
+ /**
116
+ * Get the current config (read-only copy).
117
+ */
118
+ getConfig(): Readonly<EconomicRoutingConfig>;
119
+ /**
120
+ * Get quality estimate for a tier, adjusted by task complexity.
121
+ * Higher complexity tasks benefit more from higher-tier models.
122
+ */
123
+ private getQualityEstimate;
124
+ private generateRecommendation;
125
+ }
126
+ //# sourceMappingURL=economic-routing.d.ts.map