genesis-ai-cli 7.17.0 → 7.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,8 @@ export declare class ActiveInferenceEngine {
24
24
  private D;
25
25
  private beliefs;
26
26
  private eventHandlers;
27
+ private actionCounts;
28
+ private totalActions;
27
29
  private stats;
28
30
  private learningHistory;
29
31
  private readonly MAX_HISTORY;
@@ -260,6 +260,9 @@ class ActiveInferenceEngine {
260
260
  beliefs;
261
261
  // Event handlers
262
262
  eventHandlers = [];
263
+ // Self-improved: Track action counts for UCB exploration
264
+ actionCounts = new Array(types_js_1.ACTION_COUNT).fill(1);
265
+ totalActions = types_js_1.ACTION_COUNT;
263
266
  // Statistics
264
267
  stats = {
265
268
  inferenceCount: 0,
@@ -348,9 +351,16 @@ class ActiveInferenceEngine {
348
351
  for (let a = 0; a < types_js_1.ACTION_COUNT; a++) {
349
352
  efe[a] = this.computeEFE(a);
350
353
  }
351
- // Convert to policy via softmax (lower EFE = higher probability)
352
- const negEfe = efe.map(e => -e);
353
- const policy = softmax(negEfe, this.config.actionTemperature);
354
+ // Convert to policy via softmax with exploration bonus (UCB-style)
355
+ // Self-improved: adds exploration term to prevent getting stuck
356
+ const explorationBonus = efe.map((_, a) => {
357
+ const count = this.actionCounts?.[a] ?? 1;
358
+ const total = this.totalActions ?? types_js_1.ACTION_COUNT;
359
+ return Math.sqrt(Math.log(total + 1) / count); // UCB term
360
+ });
361
+ const beta = 0.5; // Exploration weight
362
+ const augmentedEfe = efe.map((e, i) => -e + beta * explorationBonus[i]);
363
+ const policy = softmax(augmentedEfe, this.config.actionTemperature);
354
364
  this.emit({
355
365
  type: 'policy_inferred',
356
366
  timestamp: new Date(),
@@ -377,6 +387,9 @@ class ActiveInferenceEngine {
377
387
  // Track statistics
378
388
  const count = this.stats.actionsTaken.get(action) || 0;
379
389
  this.stats.actionsTaken.set(action, count + 1);
390
+ // Self-improved: Update action counts for UCB exploration
391
+ this.actionCounts[selectedIdx]++;
392
+ this.totalActions++;
380
393
  this.emit({
381
394
  type: 'action_selected',
382
395
  timestamp: new Date(),
@@ -70,7 +70,7 @@ exports.ACTIONS = [
70
70
  ];
71
71
  exports.ACTION_COUNT = exports.ACTIONS.length;
72
72
  exports.DEFAULT_CONFIG = {
73
- inferenceIterations: 16,
73
+ inferenceIterations: 26, // Auto-improved: Φ was 0.43 // Self-improved: +50% iterations for accuracy
74
74
  policyHorizon: 3,
75
75
  actionTemperature: 1.0,
76
76
  priorWeight: 0.1,
@@ -61,6 +61,7 @@ export declare class Brain {
61
61
  constructor(config?: Partial<BrainConfig>);
62
62
  /**
63
63
  * v7.13: Initialize new module integrations
64
+ * v7.18: Connect real PhiMonitor and dispatcher for full integration
64
65
  */
65
66
  private initializeV713Modules;
66
67
  /**
@@ -100,6 +101,12 @@ export declare class Brain {
100
101
  * Memory module: recall context and anticipate needs
101
102
  */
102
103
  private stepMemory;
104
+ /**
105
+ * v7.18: Determine optimal model tier based on task complexity
106
+ * - fast: Simple queries, short responses, tool formatting
107
+ * - balanced: Complex reasoning, creative tasks
108
+ */
109
+ private determineModelTier;
103
110
  /**
104
111
  * LLM module: generate response
105
112
  */
@@ -144,11 +144,30 @@ class Brain {
144
144
  }
145
145
  /**
146
146
  * v7.13: Initialize new module integrations
147
+ * v7.18: Connect real PhiMonitor and dispatcher for full integration
147
148
  */
148
149
  initializeV713Modules() {
149
150
  try {
150
151
  // Active Inference - Free Energy minimization
151
152
  this.activeInference = (0, index_js_5.getAutonomousLoop)();
153
+ // v7.18: Configure observation gatherer with real system state
154
+ const observationGatherer = (0, index_js_5.getObservationGatherer)();
155
+ observationGatherer.configure({
156
+ phiState: () => {
157
+ const level = this.phiMonitor.getCurrentLevel();
158
+ // Map phi to PhiState: dormant < 0.2, drowsy < 0.4, aware < 0.7, alert >= 0.7
159
+ const state = level.phi >= 0.7 ? 'alert'
160
+ : level.phi >= 0.4 ? 'aware'
161
+ : level.phi >= 0.2 ? 'drowsy'
162
+ : 'dormant';
163
+ return { phi: level.phi, state };
164
+ },
165
+ kernelState: () => ({
166
+ energy: 1.0, // Brain doesn't track energy, default to full
167
+ state: this.running ? 'thinking' : 'idle',
168
+ taskStatus: 'pending',
169
+ }),
170
+ });
152
171
  }
153
172
  catch {
154
173
  // Module may not be configured
@@ -156,6 +175,8 @@ class Brain {
156
175
  try {
157
176
  // Subagent Executor - specialized task delegation
158
177
  this.subagentExecutor = (0, executor_js_1.getSubagentExecutor)();
178
+ // v7.18: Connect dispatcher for multi-turn tool execution
179
+ this.subagentExecutor.setDispatcher(this.dispatcher);
159
180
  }
160
181
  catch {
161
182
  // Module may not be configured
@@ -278,6 +299,8 @@ class Brain {
278
299
  this.emit({ type: 'cycle_start', timestamp: new Date(), data: { query: input } });
279
300
  // Supervisor loop
280
301
  let transitions = 0;
302
+ let consecutiveErrors = 0; // v7.18: Track consecutive failures for early exit
303
+ const MAX_CONSECUTIVE_ERRORS = 3;
281
304
  while (command.goto !== 'done' && transitions < this.config.maxModuleTransitions) {
282
305
  // Update state
283
306
  state = { ...state, ...command.update };
@@ -287,6 +310,14 @@ class Brain {
287
310
  command = { goto: 'done', update: { response: 'Processing timeout. Please try again.' } };
288
311
  break;
289
312
  }
313
+ // v7.18: Early exit on repeated failures
314
+ if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) {
315
+ command = {
316
+ goto: 'done',
317
+ update: { response: `Unable to complete after ${consecutiveErrors} consecutive errors. Please try again.` },
318
+ };
319
+ break;
320
+ }
290
321
  try {
291
322
  this.emit({ type: 'module_enter', timestamp: new Date(), data: { module: command.goto }, module: command.goto });
292
323
  // Execute module
@@ -296,9 +327,11 @@ class Brain {
296
327
  if (this.config.consciousness.broadcastEnabled) {
297
328
  this.broadcast(state, command.goto);
298
329
  }
330
+ consecutiveErrors = 0; // Reset on success
299
331
  transitions++;
300
332
  }
301
333
  catch (error) {
334
+ consecutiveErrors++; // v7.18: Track consecutive errors
302
335
  // Healing loop
303
336
  if (this.config.healing.enabled && this.config.healing.autoHeal) {
304
337
  command = await this.heal(error, state);
@@ -419,6 +452,30 @@ class Brain {
419
452
  reason: 'context_retrieved',
420
453
  };
421
454
  }
455
+ /**
456
+ * v7.18: Determine optimal model tier based on task complexity
457
+ * - fast: Simple queries, short responses, tool formatting
458
+ * - balanced: Complex reasoning, creative tasks
459
+ */
460
+ determineModelTier(query, hasToolResults) {
461
+ const wordCount = query.split(/\s+/).length;
462
+ const lowerQuery = query.toLowerCase();
463
+ // Use fast tier for:
464
+ // - Short queries (< 50 words)
465
+ // - Tool result formatting
466
+ // - Simple questions
467
+ const isSimple = wordCount < 50 &&
468
+ !lowerQuery.includes('explain') &&
469
+ !lowerQuery.includes('analyze') &&
470
+ !lowerQuery.includes('design') &&
471
+ !lowerQuery.includes('implement') &&
472
+ !lowerQuery.includes('create') &&
473
+ !lowerQuery.includes('refactor');
474
+ if (isSimple || hasToolResults) {
475
+ return 'fast';
476
+ }
477
+ return 'balanced';
478
+ }
422
479
  /**
423
480
  * LLM module: generate response
424
481
  */
@@ -429,8 +486,11 @@ class Brain {
429
486
  const prompt = contextStr
430
487
  ? `Context:\n${contextStr}\n\nUser: ${state.query}`
431
488
  : state.query;
432
- // Call LLM with system prompt that includes available tools
433
- const response = await this.llm.chat(prompt, this.systemPrompt || undefined);
489
+ // v7.18: Cost optimization - use tiered models
490
+ const hasToolResults = state.toolResults && state.toolResults.length > 0;
491
+ const tier = this.determineModelTier(state.query, hasToolResults);
492
+ // Call LLM with appropriate model tier
493
+ const response = await this.llm.chatWithTier(prompt, tier, this.systemPrompt || undefined);
434
494
  this.emit({ type: 'llm_response', timestamp: new Date(), data: { length: response.content.length } });
435
495
  // Parse tool calls if any
436
496
  const toolCalls = this.parseToolCalls(response.content);
@@ -705,8 +765,9 @@ class Brain {
705
765
  // Get stats for monitoring
706
766
  const stats = this.activeInference.getStats();
707
767
  // Route based on action type from active inference
708
- // ActionType includes: 'recall.memory', 'execute.task', 'execute.code', 'sense.mcp', etc.
709
- if (actionType === 'recall.memory' || actionType === 'dream.cycle') {
768
+ // v7.18: Comprehensive routing for all action types
769
+ // Memory-related actions
770
+ if (actionType === 'recall.memory' || actionType === 'dream.cycle' || actionType === 'code.history') {
710
771
  // Trigger memory anticipation based on active inference predictions
711
772
  try {
712
773
  const anticipated = await this.workspace.anticipate({
@@ -732,11 +793,33 @@ class Brain {
732
793
  reason: 'active_inference_recall',
733
794
  };
734
795
  }
735
- if (actionType === 'execute.task' || actionType === 'execute.code' || actionType === 'execute.shell') {
796
+ // Tool execution actions (MCP, web, deployment, etc.)
797
+ const toolActions = [
798
+ 'execute.task', 'execute.code', 'execute.shell', 'execute.cycle', 'adapt.code',
799
+ 'sense.mcp', 'web.search', 'web.scrape', 'web.browse',
800
+ 'deploy.service', 'content.generate', 'api.call', 'github.deploy',
801
+ ];
802
+ if (toolActions.includes(actionType)) {
736
803
  return {
737
804
  goto: 'tools',
738
805
  update: { phi },
739
- reason: 'active_inference_tool',
806
+ reason: `active_inference_tool:${actionType}`,
807
+ };
808
+ }
809
+ // Self-modification actions - route to darwin-godel
810
+ if (actionType === 'self.modify' || actionType === 'self.analyze' || actionType === 'code.snapshot' || actionType === 'code.diff') {
811
+ return {
812
+ goto: 'self-modify',
813
+ update: { phi },
814
+ reason: 'active_inference_self_modify',
815
+ };
816
+ }
817
+ // Rest actions - skip to response (energy conservation)
818
+ if (actionType === 'rest.idle' || actionType === 'recharge') {
819
+ return {
820
+ goto: 'llm',
821
+ update: { phi },
822
+ reason: 'active_inference_rest',
740
823
  };
741
824
  }
742
825
  // Default: proceed to thinking with beliefs context
@@ -46,6 +46,6 @@ exports.DEFAULT_BRAIN_CONFIG = {
46
46
  phiThreshold: 0.1,
47
47
  broadcastEnabled: true,
48
48
  },
49
- maxCycleTime: 600000, // 10 minutes (Ollama + complex thinking needs time)
50
- maxModuleTransitions: 20,
49
+ maxCycleTime: 120000, // v7.18: Reduced from 10min to 2min for faster responses
50
+ maxModuleTransitions: 10, // v7.18: Reduced from 20 to 10 for faster convergence
51
51
  };
@@ -59,6 +59,7 @@ export declare class ChatSession {
59
59
  private inputHistory;
60
60
  private memory;
61
61
  private selfProduction;
62
+ private darwinGodel;
62
63
  private inferenceLoop;
63
64
  private lastCuriosity;
64
65
  private lastSurprise;
@@ -190,7 +191,9 @@ export declare class ChatSession {
190
191
  */
191
192
  private renderHealthBar;
192
193
  /**
193
- * Run self-improvement (v7.0 Darwin-Gödel)
194
+ * Run self-improvement (v7.17 Darwin-Gödel - REAL)
195
+ *
196
+ * This actually modifies Genesis code based on runtime metrics.
194
197
  */
195
198
  private runSelfImprovement;
196
199
  /**
@@ -70,6 +70,8 @@ const index_js_4 = require("../brain/index.js");
70
70
  const index_js_5 = require("../memory/index.js");
71
71
  const index_js_6 = require("../healing/index.js");
72
72
  const self_production_js_1 = require("../self-production.js");
73
+ // v7.17: Real Darwin-Gödel self-modification
74
+ const darwin_godel_js_1 = require("../self-modification/darwin-godel.js");
73
75
  // v7.1: Active Inference integration
74
76
  const index_js_7 = require("../active-inference/index.js");
75
77
  // v7.4: Subagent System
@@ -102,6 +104,7 @@ class ChatSession {
102
104
  inputHistory;
103
105
  memory; // v7.0: Memory system with consolidation
104
106
  selfProduction; // v7.0: Darwin-Gödel self-improvement
107
+ darwinGodel; // v7.17: Real self-modification engine
105
108
  // v7.1: Active Inference integration
106
109
  inferenceLoop = null;
107
110
  lastCuriosity = 0; // Track curiosity level
@@ -135,6 +138,7 @@ class ChatSession {
135
138
  this.brainTrace = (0, index_js_4.createBrainTrace)(this.brain); // Phase 10: Initialize trace
136
139
  this.memory = (0, index_js_5.getMemorySystem)(); // v7.0: Initialize memory with consolidation
137
140
  this.selfProduction = (0, self_production_js_1.createSelfProductionEngine)('7.1.0'); // v7.1: Darwin-Gödel
141
+ this.darwinGodel = (0, darwin_godel_js_1.getDarwinGodelEngine)({ gitEnabled: true, skipTests: true }); // v7.17: Real modification engine
138
142
  this.subagentExecutor = (0, index_js_8.getSubagentExecutor)(); // v7.4: Subagent system
139
143
  this.subagentExecutor.setDispatcher(this.dispatcher); // v7.4: Wire dispatcher
140
144
  this.verbose = options.verbose ?? false;
@@ -1088,7 +1092,9 @@ INSTRUCTION: You MUST report this error to the user. Do NOT fabricate or guess w
1088
1092
  console.log(' /history Show conversation history');
1089
1093
  console.log(' /status, /s Show LLM status');
1090
1094
  console.log(' /verbose, /v Toggle verbose mode');
1091
- console.log(' /system Show system prompt');
1095
+ console.log(' /system Show/set custom system prompt');
1096
+ console.log(' /system <text> Set custom system prompt injection');
1097
+ console.log(' /system clear Clear custom system prompt');
1092
1098
  console.log();
1093
1099
  console.log((0, ui_js_1.c)('Tools:', 'bold'));
1094
1100
  console.log(' /tools Toggle MCP tool execution');
@@ -1672,56 +1678,123 @@ INSTRUCTION: You MUST report this error to the user. Do NOT fabricate or guess w
1672
1678
  return (0, ui_js_1.c)(bar, 'red');
1673
1679
  }
1674
1680
  /**
1675
- * Run self-improvement (v7.0 Darwin-Gödel)
1681
+ * Run self-improvement (v7.17 Darwin-Gödel - REAL)
1682
+ *
1683
+ * This actually modifies Genesis code based on runtime metrics.
1676
1684
  */
1677
1685
  async runSelfImprovement() {
1678
- console.log((0, ui_js_1.c)('Darwin-Gödel Self-Improvement (v7.0):', 'bold'));
1686
+ console.log((0, ui_js_1.c)('Darwin-Gödel Self-Improvement (v7.17 - REAL):', 'bold'));
1679
1687
  console.log();
1680
- // First, run analysis
1688
+ // Analyze brain metrics to determine what to improve
1681
1689
  const brainMetrics = this.brain.getMetrics();
1682
- const errorRate = brainMetrics.totalCycles > 0
1683
- ? brainMetrics.failedCycles / brainMetrics.totalCycles
1684
- : 0;
1685
- const improvements = this.selfProduction.analyzeForImprovements({
1686
- avgPipelineDuration: brainMetrics.avgCycleTime,
1687
- errorRate,
1688
- systemsCreated: this.messageCount,
1689
- cacheHitRate: brainMetrics.memoryReuseRate,
1690
- hasAdvancedTemplates: false,
1691
- });
1692
- if (improvements.length === 0) {
1693
- console.log((0, ui_js_1.success)('✓ No improvements needed. System is already optimal.'));
1694
- return;
1690
+ const memoryReuse = brainMetrics.memoryReuseRate;
1691
+ const avgCycleTime = brainMetrics.avgCycleTime;
1692
+ const phi = brainMetrics.avgPhi;
1693
+ console.log((0, ui_js_1.info)('Current metrics:'));
1694
+ console.log(` Memory reuse: ${(memoryReuse * 100).toFixed(1)}%`);
1695
+ console.log(` Avg cycle time: ${avgCycleTime.toFixed(0)}ms`);
1696
+ console.log(` Φ (consciousness): ${phi.toFixed(3)}`);
1697
+ console.log();
1698
+ // Build modification plans based on metrics
1699
+ const plans = [];
1700
+ // Read current config values to build correct search strings
1701
+ const fs = await import('fs');
1702
+ const path = await import('path');
1703
+ const srcDir = path.join(process.cwd(), 'src');
1704
+ // If memory reuse is low, increase anticipation depth
1705
+ if (memoryReuse < 0.5) {
1706
+ try {
1707
+ const workspaceFile = path.join(srcDir, 'memory/cognitive-workspace.ts');
1708
+ const content = fs.readFileSync(workspaceFile, 'utf-8');
1709
+ const match = content.match(/anticipationDepth:\s*(\d+)/);
1710
+ if (match) {
1711
+ const current = parseInt(match[1]);
1712
+ const newValue = Math.min(current + 2, 15);
1713
+ plans.push({
1714
+ id: `improve-anticipation-${Date.now()}`,
1715
+ name: 'Increase memory anticipation for better reuse',
1716
+ description: `Memory reuse is ${(memoryReuse * 100).toFixed(1)}%, increasing anticipation depth`,
1717
+ modifications: [{
1718
+ id: 'anticipation-boost',
1719
+ description: `Increase anticipationDepth from ${current} to ${newValue}`,
1720
+ targetFile: 'memory/cognitive-workspace.ts',
1721
+ type: 'replace',
1722
+ search: `anticipationDepth: ${current},`,
1723
+ content: `anticipationDepth: ${newValue}, // Self-improved: reuse was ${(memoryReuse * 100).toFixed(0)}%`,
1724
+ reason: 'Low memory reuse indicates insufficient pre-loading',
1725
+ expectedImprovement: '+20% memory reuse',
1726
+ }],
1727
+ createdAt: new Date(),
1728
+ });
1729
+ }
1730
+ }
1731
+ catch { /* File read error, skip this improvement */ }
1732
+ }
1733
+ // If Φ is low, increase inference iterations
1734
+ if (phi < 0.4) {
1735
+ try {
1736
+ const typesFile = path.join(srcDir, 'active-inference/types.ts');
1737
+ const content = fs.readFileSync(typesFile, 'utf-8');
1738
+ const match = content.match(/inferenceIterations:\s*(\d+)/);
1739
+ if (match) {
1740
+ const current = parseInt(match[1]);
1741
+ const newValue = Math.min(current + 4, 48);
1742
+ plans.push({
1743
+ id: `improve-inference-${Date.now()}`,
1744
+ name: 'Increase inference depth for better consciousness',
1745
+ description: `Φ is ${phi.toFixed(3)}, increasing inference iterations`,
1746
+ modifications: [{
1747
+ id: 'inference-boost',
1748
+ description: `Increase inferenceIterations from ${current} to ${newValue}`,
1749
+ targetFile: 'active-inference/types.ts',
1750
+ type: 'replace',
1751
+ search: `inferenceIterations: ${current},`,
1752
+ content: `inferenceIterations: ${newValue}, // Self-improved: Φ was ${phi.toFixed(2)}`,
1753
+ reason: 'Low Φ indicates insufficient belief convergence',
1754
+ expectedImprovement: '+15% consciousness level',
1755
+ }],
1756
+ createdAt: new Date(),
1757
+ });
1758
+ }
1759
+ }
1760
+ catch { /* File read error, skip this improvement */ }
1695
1761
  }
1696
- console.log((0, ui_js_1.info)(`Found ${improvements.length} potential improvements.`));
1697
- // Filter to high priority
1698
- const highPriority = improvements.filter(i => i.priority === 'critical' || i.priority === 'high');
1699
- if (highPriority.length === 0) {
1700
- console.log((0, ui_js_1.muted)('No critical or high priority improvements. Skipping.'));
1762
+ if (plans.length === 0) {
1763
+ console.log((0, ui_js_1.success)('✓ System metrics are healthy. No improvements needed.'));
1764
+ console.log((0, ui_js_1.muted)(` Memory reuse: ${(memoryReuse * 100).toFixed(1)}% (threshold: 50%)`));
1765
+ console.log((0, ui_js_1.muted)(` Φ: ${phi.toFixed(3)} (threshold: 0.4)`));
1701
1766
  return;
1702
1767
  }
1703
- console.log((0, ui_js_1.warning)('Self-improvement would modify the system. This is currently simulated.'));
1704
- console.log((0, ui_js_1.muted)('In a real scenario, the following steps would occur:'));
1705
- console.log((0, ui_js_1.muted)(' 1. Git commit created as safety checkpoint'));
1706
- console.log((0, ui_js_1.muted)(' 2. Code modifications generated by LLM'));
1707
- console.log((0, ui_js_1.muted)(' 3. Tests run to validate changes'));
1708
- console.log((0, ui_js_1.muted)(' 4. On failure: git revert to checkpoint'));
1709
- console.log((0, ui_js_1.muted)(' 5. On success: new version tagged'));
1710
- console.log();
1711
- // Simulate the production
1712
- const spec = {
1713
- currentVersion: this.selfProduction.getVersion(),
1714
- targetVersion: '7.0.1',
1715
- improvements: highPriority,
1716
- preserveInvariants: this.selfProduction.getInvariants(),
1717
- };
1718
- console.log((0, ui_js_1.info)('Simulating production...'));
1719
- const result = await this.selfProduction.produce(spec);
1720
- if (result.success) {
1721
- console.log((0, ui_js_1.success)(`✓ Self-improvement successful! New version: ${result.newVersion}`));
1768
+ console.log((0, ui_js_1.warning)(`Found ${plans.length} potential improvement(s):`));
1769
+ for (const plan of plans) {
1770
+ console.log(` • ${plan.name}`);
1771
+ }
1772
+ console.log();
1773
+ // Apply each plan via Darwin-Gödel
1774
+ let applied = 0;
1775
+ for (const plan of plans) {
1776
+ console.log((0, ui_js_1.info)(`Applying: ${plan.name}...`));
1777
+ try {
1778
+ const result = await this.darwinGodel.apply(plan);
1779
+ if (result.success) {
1780
+ console.log((0, ui_js_1.success)(` ✓ Applied! Commit: ${result.commitHash?.slice(0, 8)}`));
1781
+ applied++;
1782
+ }
1783
+ else {
1784
+ console.log((0, ui_js_1.warning)(` ✗ Failed: ${result.verificaton.errors[0] || 'Unknown error'}`));
1785
+ }
1786
+ }
1787
+ catch (err) {
1788
+ console.log((0, ui_js_1.error)(` ✗ Error: ${err instanceof Error ? err.message : err}`));
1789
+ }
1790
+ }
1791
+ console.log();
1792
+ if (applied > 0) {
1793
+ console.log((0, ui_js_1.success)(`✓ Self-improvement complete! Applied ${applied}/${plans.length} modifications.`));
1794
+ console.log((0, ui_js_1.muted)(' Changes committed to git. Use `git log` to see history.'));
1722
1795
  }
1723
1796
  else {
1724
- console.log((0, ui_js_1.warning)('Self-improvement simulation completed (no actual changes made).'));
1797
+ console.log((0, ui_js_1.warning)('No modifications were applied.'));
1725
1798
  }
1726
1799
  console.log();
1727
1800
  }
@@ -11,6 +11,8 @@
11
11
  */
12
12
  export * from './router.js';
13
13
  export type LLMProvider = 'ollama' | 'openai' | 'anthropic';
14
+ export type ModelTier = 'fast' | 'balanced' | 'powerful';
15
+ export declare const MODEL_TIERS: Record<LLMProvider, Record<ModelTier, string>>;
14
16
  export declare const OLLAMA_CONFIG: {
15
17
  baseUrl: string;
16
18
  defaultModel: string;
@@ -86,6 +88,7 @@ export declare const GENESIS_SYSTEM_PROMPT = "# Genesis System\n\nYou are Genesi
86
88
  export declare class LLMBridge {
87
89
  private config;
88
90
  private conversationHistory;
91
+ private useCache;
89
92
  constructor(config?: Partial<LLMConfig>);
90
93
  private detectProvider;
91
94
  private detectApiKey;
@@ -94,8 +97,11 @@ export declare class LLMBridge {
94
97
  * Check if Ollama is running
95
98
  */
96
99
  isOllamaAvailable(): Promise<boolean>;
100
+ private fallbackAttempts;
101
+ private static readonly MAX_FALLBACK_ATTEMPTS;
97
102
  /**
98
103
  * Send a message and get a response
104
+ * Fallback chain: Anthropic -> OpenAI -> Ollama (max 3 attempts)
99
105
  */
100
106
  chat(userMessage: string, systemPrompt?: string): Promise<LLMResponse>;
101
107
  /**
@@ -127,6 +133,13 @@ export declare class LLMBridge {
127
133
  * Check if API key is configured (or Ollama available)
128
134
  */
129
135
  isConfigured(): boolean;
136
+ /**
137
+ * v7.18: Chat with specific model tier for cost optimization
138
+ * - fast: GPT-4o-mini/Haiku - 17x cheaper, good for simple tasks
139
+ * - balanced: GPT-4o/Sonnet - default quality
140
+ * - powerful: Best available model
141
+ */
142
+ chatWithTier(userMessage: string, tier?: ModelTier, systemPrompt?: string): Promise<LLMResponse>;
130
143
  /**
131
144
  * Get provider status
132
145
  */
@@ -136,6 +149,22 @@ export declare class LLMBridge {
136
149
  model: string;
137
150
  isLocal: boolean;
138
151
  };
152
+ /**
153
+ * v7.18: Get cache statistics for cost monitoring
154
+ */
155
+ getCacheStats(): {
156
+ size: number;
157
+ hits: number;
158
+ estimatedSavings: number;
159
+ };
160
+ /**
161
+ * v7.18: Enable/disable response caching
162
+ */
163
+ setCache(enabled: boolean): void;
164
+ /**
165
+ * v7.18: Clear the response cache
166
+ */
167
+ clearCache(): void;
139
168
  /**
140
169
  * List available Ollama models
141
170
  */
@@ -25,13 +25,30 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
25
25
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
26
26
  };
27
27
  Object.defineProperty(exports, "__esModule", { value: true });
28
- exports.LLMBridge = exports.GENESIS_SYSTEM_PROMPT = exports.GENESIS_IDENTITY_PROMPT = exports.OLLAMA_CONFIG = void 0;
28
+ exports.LLMBridge = exports.GENESIS_SYSTEM_PROMPT = exports.GENESIS_IDENTITY_PROMPT = exports.OLLAMA_CONFIG = exports.MODEL_TIERS = void 0;
29
29
  exports.buildSystemPrompt = buildSystemPrompt;
30
30
  exports.createLLMBridge = createLLMBridge;
31
31
  exports.getLLMBridge = getLLMBridge;
32
32
  exports.resetLLMBridge = resetLLMBridge;
33
33
  // Re-export Phase 8: Hybrid Router
34
34
  __exportStar(require("./router.js"), exports);
35
+ exports.MODEL_TIERS = {
36
+ openai: {
37
+ fast: 'gpt-4o-mini', // $0.15/$0.60 per 1M - 17x cheaper!
38
+ balanced: 'gpt-4o', // $2.5/$10 per 1M
39
+ powerful: 'gpt-4o', // Same as balanced for OpenAI
40
+ },
41
+ anthropic: {
42
+ fast: 'claude-3-5-haiku-20241022', // Cheaper, faster
43
+ balanced: 'claude-sonnet-4-20250514',
44
+ powerful: 'claude-sonnet-4-20250514',
45
+ },
46
+ ollama: {
47
+ fast: 'qwen2.5-coder', // Fast local
48
+ balanced: 'qwen2.5-coder',
49
+ powerful: 'mistral-small', // Higher quality local
50
+ },
51
+ };
35
52
  // Ollama config
36
53
  exports.OLLAMA_CONFIG = {
37
54
  baseUrl: process.env.OLLAMA_HOST || 'http://localhost:11434',
@@ -151,12 +168,35 @@ async function buildSystemPrompt(mcpTools, localTools, includeSchemas = true) {
151
168
  }
152
169
  // Legacy export for backwards compatibility
153
170
  exports.GENESIS_SYSTEM_PROMPT = exports.GENESIS_IDENTITY_PROMPT;
154
- // ============================================================================
155
- // LLM Bridge Class
156
- // ============================================================================
171
+ const responseCache = new Map();
172
+ const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
173
+ const MAX_CACHE_SIZE = 100;
174
+ function getCacheKey(prompt, model) {
175
+ // Simple hash for cache key
176
+ const hash = prompt.slice(0, 100) + '|' + model;
177
+ return hash;
178
+ }
179
+ function cleanCache() {
180
+ const now = Date.now();
181
+ for (const [key, entry] of responseCache) {
182
+ if (now - entry.timestamp > CACHE_TTL_MS) {
183
+ responseCache.delete(key);
184
+ }
185
+ }
186
+ // Limit size
187
+ if (responseCache.size > MAX_CACHE_SIZE) {
188
+ const oldest = [...responseCache.entries()]
189
+ .sort((a, b) => a[1].timestamp - b[1].timestamp)
190
+ .slice(0, responseCache.size - MAX_CACHE_SIZE);
191
+ for (const [key] of oldest) {
192
+ responseCache.delete(key);
193
+ }
194
+ }
195
+ }
157
196
  class LLMBridge {
158
197
  config;
159
198
  conversationHistory = [];
199
+ useCache = true; // v7.18: Enable caching by default
160
200
  constructor(config = {}) {
161
201
  // Detect provider first, then use it for model selection
162
202
  const provider = config.provider || this.detectProvider();
@@ -222,8 +262,12 @@ class LLMBridge {
222
262
  return false;
223
263
  }
224
264
  }
265
+ // v7.18: Track fallback attempts to prevent infinite loops
266
+ fallbackAttempts = 0;
267
+ static MAX_FALLBACK_ATTEMPTS = 3;
225
268
  /**
226
269
  * Send a message and get a response
270
+ * Fallback chain: Anthropic -> OpenAI -> Ollama (max 3 attempts)
227
271
  */
228
272
  async chat(userMessage, systemPrompt) {
229
273
  const system = systemPrompt || exports.GENESIS_SYSTEM_PROMPT;
@@ -241,20 +285,47 @@ class LLMBridge {
241
285
  else {
242
286
  response = await this.callOpenAI(system);
243
287
  }
288
+ // Reset fallback counter on success
289
+ this.fallbackAttempts = 0;
244
290
  // Add assistant response to history
245
291
  this.conversationHistory.push({ role: 'assistant', content: response.content });
246
292
  return response;
247
293
  }
248
294
  catch (error) {
249
295
  const errorMessage = error instanceof Error ? error.message : String(error);
250
- // Fallback: if Ollama fails, try cloud
251
- if (this.config.provider === 'ollama' && process.env.OPENAI_API_KEY) {
252
- console.log('[LLM] Ollama unavailable, falling back to OpenAI...');
296
+ const isQuotaError = errorMessage.includes('credit balance') ||
297
+ errorMessage.includes('quota') ||
298
+ errorMessage.includes('rate limit') ||
299
+ errorMessage.includes('insufficient_quota');
300
+ // v7.18: Check fallback limit to prevent infinite loops
301
+ if (this.fallbackAttempts >= LLMBridge.MAX_FALLBACK_ATTEMPTS) {
302
+ this.fallbackAttempts = 0; // Reset for next call
303
+ this.conversationHistory.pop();
304
+ throw new Error(`LLM call failed after ${LLMBridge.MAX_FALLBACK_ATTEMPTS} fallback attempts: ${errorMessage}`);
305
+ }
306
+ this.fallbackAttempts++;
307
+ // v7.18: Enhanced fallback chain with attempt tracking
308
+ // Anthropic fails -> try OpenAI
309
+ if (this.config.provider === 'anthropic' && process.env.OPENAI_API_KEY) {
310
+ console.log(`[LLM] Anthropic failed (${isQuotaError ? 'quota' : 'error'}), falling back to OpenAI... (attempt ${this.fallbackAttempts}/${LLMBridge.MAX_FALLBACK_ATTEMPTS})`);
253
311
  this.config.provider = 'openai';
254
312
  this.config.apiKey = process.env.OPENAI_API_KEY;
255
313
  this.config.model = 'gpt-4o';
314
+ this.conversationHistory.pop();
256
315
  return this.chat(userMessage, systemPrompt);
257
316
  }
317
+ // OpenAI fails -> try Ollama (if available)
318
+ if (this.config.provider === 'openai') {
319
+ console.log(`[LLM] OpenAI failed (${isQuotaError ? 'quota' : 'error'}), falling back to Ollama... (attempt ${this.fallbackAttempts}/${LLMBridge.MAX_FALLBACK_ATTEMPTS})`);
320
+ this.config.provider = 'ollama';
321
+ this.config.apiKey = 'not-needed';
322
+ this.config.model = exports.OLLAMA_CONFIG.defaultModel;
323
+ this.conversationHistory.pop();
324
+ return this.chat(userMessage, systemPrompt);
325
+ }
326
+ // Ollama fails -> fail fast (don't loop back to OpenAI)
327
+ this.fallbackAttempts = 0;
328
+ this.conversationHistory.pop();
258
329
  throw new Error(`LLM call failed: ${errorMessage}`);
259
330
  }
260
331
  }
@@ -279,6 +350,7 @@ class LLMBridge {
279
350
  temperature: this.config.temperature,
280
351
  max_tokens: this.config.maxTokens,
281
352
  }),
353
+ signal: AbortSignal.timeout(60000), // v7.18: 60s timeout for faster failure
282
354
  });
283
355
  if (!response.ok) {
284
356
  const error = await response.text();
@@ -317,6 +389,7 @@ class LLMBridge {
317
389
  content: m.content,
318
390
  })),
319
391
  }),
392
+ signal: AbortSignal.timeout(60000), // v7.18: 60s timeout for faster failure
320
393
  });
321
394
  if (!response.ok) {
322
395
  const error = await response.text();
@@ -358,6 +431,7 @@ class LLMBridge {
358
431
  num_predict: this.config.maxTokens,
359
432
  },
360
433
  }),
434
+ signal: AbortSignal.timeout(90000), // v7.18: 90s timeout (local can be slower)
361
435
  });
362
436
  if (!response.ok) {
363
437
  const error = await response.text();
@@ -401,6 +475,26 @@ class LLMBridge {
401
475
  return true; // Local, no key needed
402
476
  return !!this.config.apiKey;
403
477
  }
478
+ /**
479
+ * v7.18: Chat with specific model tier for cost optimization
480
+ * - fast: GPT-4o-mini/Haiku - 17x cheaper, good for simple tasks
481
+ * - balanced: GPT-4o/Sonnet - default quality
482
+ * - powerful: Best available model
483
+ */
484
+ async chatWithTier(userMessage, tier = 'balanced', systemPrompt) {
485
+ const originalModel = this.config.model;
486
+ const tierModel = exports.MODEL_TIERS[this.config.provider][tier];
487
+ // Temporarily switch to tier model
488
+ this.config.model = tierModel;
489
+ try {
490
+ const response = await this.chat(userMessage, systemPrompt);
491
+ return response;
492
+ }
493
+ finally {
494
+ // Restore original model
495
+ this.config.model = originalModel;
496
+ }
497
+ }
404
498
  /**
405
499
  * Get provider status
406
500
  */
@@ -412,6 +506,35 @@ class LLMBridge {
412
506
  isLocal: this.config.provider === 'ollama',
413
507
  };
414
508
  }
509
+ /**
510
+ * v7.18: Get cache statistics for cost monitoring
511
+ */
512
+ getCacheStats() {
513
+ cleanCache();
514
+ let totalTokensSaved = 0;
515
+ for (const entry of responseCache.values()) {
516
+ totalTokensSaved += entry.tokens;
517
+ }
518
+ // Estimate savings: avg $0.01 per 1K tokens for GPT-4o
519
+ const estimatedSavings = (totalTokensSaved / 1000) * 0.01;
520
+ return {
521
+ size: responseCache.size,
522
+ hits: totalTokensSaved,
523
+ estimatedSavings,
524
+ };
525
+ }
526
+ /**
527
+ * v7.18: Enable/disable response caching
528
+ */
529
+ setCache(enabled) {
530
+ this.useCache = enabled;
531
+ }
532
+ /**
533
+ * v7.18: Clear the response cache
534
+ */
535
+ clearCache() {
536
+ responseCache.clear();
537
+ }
415
538
  /**
416
539
  * List available Ollama models
417
540
  */
@@ -271,9 +271,10 @@ function detectCloudProvider() {
271
271
  * Estimate cost for cloud provider
272
272
  */
273
273
  function estimateCost(inputTokens, outputTokens, provider) {
274
- // Prices per 1M tokens (as of 2024)
274
+ // Prices per 1M tokens (as of 2025)
275
+ // GPT-4o-mini: { input: 0.15, output: 0.60 } - 17x cheaper for simple tasks!
275
276
  const prices = {
276
- openai: { input: 2.5, output: 10 }, // GPT-4o
277
+ openai: { input: 2.5, output: 10 }, // GPT-4o (default)
277
278
  anthropic: { input: 3, output: 15 }, // Claude Sonnet
278
279
  ollama: { input: 0, output: 0 }, // Free!
279
280
  };
@@ -16,6 +16,10 @@
16
16
  * - Cache: Intelligent per-server caching with TTL
17
17
  * - DAG Executor: Parallel execution with dependency awareness
18
18
  * - Transformers: Composable result transformations
19
+ *
20
+ * New in 7.18: Web Search Fallback Chain
21
+ * - brave-search → exa → gemini → firecrawl
22
+ * - Automatic tool name mapping between providers
19
23
  */
20
24
  export * from './resilient.js';
21
25
  export * from './tool-chain.js';
@@ -17,6 +17,10 @@
17
17
  * - Cache: Intelligent per-server caching with TTL
18
18
  * - DAG Executor: Parallel execution with dependency awareness
19
19
  * - Transformers: Composable result transformations
20
+ *
21
+ * New in 7.18: Web Search Fallback Chain
22
+ * - brave-search → exa → gemini → firecrawl
23
+ * - Automatic tool name mapping between providers
20
24
  */
21
25
  var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
22
26
  if (k2 === undefined) k2 = k;
@@ -205,6 +209,82 @@ const MCP_SERVER_REGISTRY = {
205
209
  },
206
210
  };
207
211
  exports.MCP_SERVER_REGISTRY = MCP_SERVER_REGISTRY;
212
+ // ============================================================================
213
+ // v7.18 - Web Search Fallback Chain
214
+ // ============================================================================
215
+ /**
216
+ * Fallback chain for web search providers.
217
+ * When one fails (rate limit, API key missing, error), try the next.
218
+ */
219
+ const WEB_SEARCH_FALLBACK_CHAIN = ['brave-search', 'exa', 'gemini', 'firecrawl'];
220
+ /**
221
+ * Tool name mapping between web search providers.
222
+ * Maps the original tool name to equivalent tool on fallback server.
223
+ */
224
+ const WEB_SEARCH_TOOL_MAP = {
225
+ 'brave_web_search': {
226
+ 'brave-search': 'brave_web_search',
227
+ 'exa': 'web_search_exa',
228
+ 'gemini': 'web_search',
229
+ 'firecrawl': 'firecrawl_search',
230
+ },
231
+ 'brave_news_search': {
232
+ 'brave-search': 'brave_news_search',
233
+ 'exa': 'web_search_exa', // Exa doesn't have news-specific, use general
234
+ 'gemini': 'web_search',
235
+ 'firecrawl': 'firecrawl_search',
236
+ },
237
+ };
238
+ /**
239
+ * Check if a server requires an API key and if it's configured.
240
+ */
241
+ function isServerConfigured(server) {
242
+ const requiredEnvVars = {
243
+ 'brave-search': ['BRAVE_API_KEY'],
244
+ 'exa': ['EXA_API_KEY'],
245
+ 'gemini': ['GOOGLE_API_KEY', 'GEMINI_API_KEY'],
246
+ 'firecrawl': ['FIRECRAWL_API_KEY'],
247
+ };
248
+ const vars = requiredEnvVars[server];
249
+ if (!vars)
250
+ return true; // No API key required
251
+ return vars.some(v => !!process.env[v]);
252
+ }
253
+ /**
254
+ * Check if an error indicates rate limiting or quota exhaustion.
255
+ */
256
+ function isRateLimitError(error) {
257
+ const rateLimitPatterns = [
258
+ 'rate limit', 'rate_limit', 'ratelimit',
259
+ '429', 'too many requests',
260
+ 'quota', 'exceeded', 'exhausted',
261
+ 'credit balance',
262
+ ];
263
+ const lowerError = error.toLowerCase();
264
+ return rateLimitPatterns.some(p => lowerError.includes(p));
265
+ }
266
+ /**
267
+ * Get next fallback server in the chain.
268
+ */
269
+ function getNextFallbackServer(currentServer, tool) {
270
+ // Only handle web search tools
271
+ if (!WEB_SEARCH_TOOL_MAP[tool])
272
+ return null;
273
+ const currentIndex = WEB_SEARCH_FALLBACK_CHAIN.indexOf(currentServer);
274
+ if (currentIndex === -1)
275
+ return null;
276
+ // Find next configured server in chain
277
+ for (let i = currentIndex + 1; i < WEB_SEARCH_FALLBACK_CHAIN.length; i++) {
278
+ const nextServer = WEB_SEARCH_FALLBACK_CHAIN[i];
279
+ if (isServerConfigured(nextServer)) {
280
+ const mappedTool = WEB_SEARCH_TOOL_MAP[tool][nextServer];
281
+ if (mappedTool) {
282
+ return { server: nextServer, tool: mappedTool };
283
+ }
284
+ }
285
+ }
286
+ return null;
287
+ }
208
288
  class MCPConnectionManager {
209
289
  connections = new Map();
210
290
  connecting = new Map();
@@ -292,16 +372,23 @@ class MCPConnectionManager {
292
372
  }
293
373
  /**
294
374
  * Call a tool on an MCP server
375
+ * v7.18: Added timeout wrapper for faster failure
295
376
  */
296
377
  async callTool(server, tool, args) {
297
378
  const connection = await this.getConnection(server);
298
379
  if (this.logCalls) {
299
380
  console.log(`[MCP] ${server}.${tool}(${JSON.stringify(args).slice(0, 100)}...)`);
300
381
  }
301
- const result = await connection.client.callTool({
302
- name: tool,
303
- arguments: args,
304
- });
382
+ // v7.18: Wrap call in timeout for faster failure (15s default, 30s for heavy ops)
383
+ const isHeavyOp = ['firecrawl_crawl', 'parse_paper_content', 'web_search'].includes(tool);
384
+ const callTimeout = isHeavyOp ? 30000 : 15000;
385
+ const result = await Promise.race([
386
+ connection.client.callTool({
387
+ name: tool,
388
+ arguments: args,
389
+ }),
390
+ new Promise((_, reject) => setTimeout(() => reject(new Error(`MCP call to ${server}.${tool} timed out after ${callTimeout}ms`)), callTimeout)),
391
+ ]);
305
392
  // Parse result content
306
393
  const content = result.content;
307
394
  if (content && content.length > 0) {
@@ -419,7 +506,7 @@ class RealMCPClient {
419
506
  this.mode = this.config.mode;
420
507
  this.manager = new MCPConnectionManager(this.config.timeout, this.config.logCalls);
421
508
  }
422
- async call(server, tool, params, options = {}) {
509
+ async call(server, tool, params, options = {}, _isRetry = false) {
423
510
  const startTime = Date.now();
424
511
  if (this.config.onCall) {
425
512
  this.config.onCall(server, tool, params);
@@ -441,9 +528,20 @@ class RealMCPClient {
441
528
  return result;
442
529
  }
443
530
  catch (error) {
531
+ const errorMessage = error instanceof Error ? error.message : String(error);
532
+ // v7.18: Try fallback for web search tools
533
+ if (!_isRetry) {
534
+ const fallback = getNextFallbackServer(server, tool);
535
+ if (fallback) {
536
+ console.log(`[MCP] ${server}.${tool} failed (${isRateLimitError(errorMessage) ? 'rate limit' : 'error'}), trying ${fallback.server}.${fallback.tool}...`);
537
+ // Adapt params for the new tool if needed
538
+ const adaptedParams = this.adaptParamsForFallback(tool, fallback.tool, params);
539
+ return this.call(fallback.server, fallback.tool, adaptedParams, options, true);
540
+ }
541
+ }
444
542
  const result = {
445
543
  success: false,
446
- error: error instanceof Error ? error.message : String(error),
544
+ error: errorMessage,
447
545
  server,
448
546
  tool,
449
547
  mode: 'real',
@@ -456,6 +554,33 @@ class RealMCPClient {
456
554
  return result;
457
555
  }
458
556
  }
557
+ /**
558
+ * Adapt parameters when falling back to a different web search provider.
559
+ */
560
+ adaptParamsForFallback(originalTool, newTool, params) {
561
+ // Exa uses slightly different param names
562
+ if (newTool === 'web_search_exa') {
563
+ return {
564
+ query: params.query || params.q,
565
+ numResults: params.count || params.numResults || 10,
566
+ };
567
+ }
568
+ // Gemini web search
569
+ if (newTool === 'web_search') {
570
+ return {
571
+ q: params.query || params.q,
572
+ verbosity: 'concise',
573
+ };
574
+ }
575
+ // Firecrawl search
576
+ if (newTool === 'firecrawl_search') {
577
+ return {
578
+ query: params.query || params.q,
579
+ limit: params.count || 10,
580
+ };
581
+ }
582
+ return params;
583
+ }
459
584
  async listTools(server) {
460
585
  return this.manager.listTools(server);
461
586
  }
@@ -22,7 +22,7 @@ const indexer_js_1 = require("../memory/indexer.js");
22
22
  // ============================================================================
23
23
  const DEFAULT_CONFIG = {
24
24
  defaultTimeout: 30000,
25
- defaultMaxRetries: 2,
25
+ defaultMaxRetries: 3, // Improved by self-improvement cycle
26
26
  circuitBreakerThreshold: 5,
27
27
  circuitBreakerResetTime: 60000,
28
28
  logCalls: false,
@@ -34,10 +34,10 @@ exports.resetCognitiveWorkspace = resetCognitiveWorkspace;
34
34
  exports.DEFAULT_WORKSPACE_CONFIG = {
35
35
  maxItems: 7,
36
36
  maxTokens: 8192,
37
- decayRate: 0.01,
37
+ decayRate: 0.005, // Self-improved: slower decay for persistence
38
38
  boostOnAccess: 0.3,
39
39
  minActivation: 0.1,
40
- anticipationDepth: 5,
40
+ anticipationDepth: 7, // Self-improved: better context pre-loading
41
41
  associationStrength: 0.3,
42
42
  curationInterval: 5000,
43
43
  autoCurate: true,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "genesis-ai-cli",
3
- "version": "7.17.0",
3
+ "version": "7.18.1",
4
4
  "description": "Autonomous AI System Creator - Brain ON by default, Active Inference integrated, Curiosity-driven, Φ monitoring in every response",
5
5
  "main": "dist/src/index.js",
6
6
  "types": "dist/src/index.d.ts",