npm - omnius - Versions diffs - 1.0.154 → 1.0.155 - Mend

omnius 1.0.154 → 1.0.155

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -12170,6 +12170,298 @@ function _readJson(path, fallback) {
 function _writeJson(path, value) {
   try { writeFileSync(path, JSON.stringify(value, null, 2)); } catch {}
 }
+const cohereEndpointCatalogFile = join(nexusDir, 'cohere-endpoint-catalog.json');
+var _cohereEndpointCatalog = {
+  source: 'ollama',
+  passthrough: false,
+  endpointUrl: process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434',
+  endpointAuth: '',
+  models: [],
+  pricingMenu: [],
+  updatedAt: 0,
+};
+function _cohereRedactUrl(url) {
+  var raw = String(url || '').trim();
+  if (!raw) return '';
+  try {
+    var u = new URL(raw);
+    u.username = '';
+    u.password = '';
+    u.search = '';
+    u.hash = '';
+    return u.toString().replace(/\\/+$/, '');
+  } catch {
+    return raw.replace(/([?&](?:api[_-]?key|key|token|auth)=)[^&]+/ig, '$1***');
+  }
+}
+function _cohereNormalizeBaseUrl(rawUrl, passthrough) {
+  var raw = String(rawUrl || '').trim() || 'http://localhost:11434';
+  if (!passthrough) return raw.replace(/\\/+$/, '');
+  return raw
+    .replace(/\\/+$/, '')
+    .replace(/\\/chat\\/completions$/, '')
+    .replace(/\\/completions$/, '')
+    .replace(/\\/models(\\/.*)?$/, '')
+    .replace(/\\/v1$/, '')
+    .replace(/\\/+$/, '');
+}
+function _cohereNormalizeModelRecord(model, source, passthrough) {
+  if (!model) return null;
+  var name = String(model.name || model.model || model.id || '').trim();
+  if (!name) return null;
+  var details = model.details || {};
+  return {
+    name: name,
+    size: Number(model.size || 0) || 0,
+    family: String(model.family || details.family || model.owned_by || ''),
+    parameterSize: String(model.parameterSize || details.parameter_size || ''),
+    quantization: String(model.quantization || details.quantization_level || ''),
+    source: source || (passthrough ? 'openai-compatible' : 'ollama'),
+    passthrough: !!passthrough,
+  };
+}
+function _cohereModelLooksChatCapable(model) {
+  var name = String(model && model.name || '').toLowerCase();
+  var family = String(model && model.family || '').toLowerCase();
+  var text = name + ' ' + family;
+  if (!name) return false;
+  if (/embed|embedding|rerank|nomic-bert|bge-|e5-|clip|whisper|tts|audio/i.test(text)) return false;
+  if (/image|flux|stable.?diffusion|sdxl|dall|midjourney|vision-encoder/i.test(text)) return false;
+  return true;
+}
+function _cohereChatScore(model) {
+  var name = String(model && model.name || '').toLowerCase();
+  var family = String(model && model.family || '').toLowerCase();
+  var score = 1;
+  if (/qwen3\\.5|qwen35|qwen3/i.test(name + ' ' + family)) score = 10;
+  else if (/gpt-|claude|gemini|deepseek|llama|mistral|mixtral|command-r|nemotron|gemma/i.test(name + ' ' + family)) score = 8;
+  else if (/chat|instruct|turbo|sonnet|opus|haiku/i.test(name + ' ' + family)) score = 6;
+  if (/^omnius-/i.test(name)) score += 3;
+  if (model && model.passthrough) score += 1;
+  return score;
+}
+function _cohereAnnotateModels(models, source, passthrough) {
+  var out = [];
+  var input = Array.isArray(models) ? models : [];
+  for (var i = 0; i < input.length; i++) {
+    var rec = _cohereNormalizeModelRecord(input[i], source, passthrough);
+    if (!rec || !_cohereModelLooksChatCapable(rec)) continue;
+    rec._chatScore = _cohereChatScore(rec);
+    out.push(rec);
+  }
+  return out;
+}
+function _cohereApplyAllowlist(models) {
+  if (!_cohereAllowedModels) return Array.isArray(models) ? models : [];
+  return (Array.isArray(models) ? models : []).filter(function(m) { return _cohereAllowedModels.has(m.name); });
+}
+function _coherePersistEndpointCatalog() {
+  var pub = _cohereEndpointSnapshot(_cohereEndpointCatalog);
+  pub.models = (_cohereEndpointCatalog.models || []).map(function(m) {
+    return {
+      name: m.name,
+      size: m.size || 0,
+      family: m.family || '',
+      parameterSize: m.parameterSize || '',
+      quantization: m.quantization || '',
+      source: m.source || _cohereEndpointCatalog.source,
+      passthrough: !!m.passthrough,
+    };
+  });
+  pub.pricingMenu = Array.isArray(_cohereEndpointCatalog.pricingMenu) ? _cohereEndpointCatalog.pricingMenu : [];
+  _writeJson(cohereEndpointCatalogFile, pub);
+}
+function _cohereRememberEndpointCatalog(opts) {
+  opts = opts || {};
+  var passthrough = opts.passthrough === true;
+  var source = String(opts.source || (passthrough ? 'openai-compatible' : 'ollama'));
+  var endpointUrl = _cohereNormalizeBaseUrl(opts.endpointUrl || opts.ollamaUrl || process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434', passthrough);
+  var models = _cohereAnnotateModels(opts.models || [], source, passthrough);
+  _cohereEndpointCatalog = {
+    source: source,
+    passthrough: passthrough,
+    endpointUrl: endpointUrl,
+    endpointAuth: String(opts.endpointAuth || ''),
+    models: models,
+    pricingMenu: Array.isArray(opts.pricingMenu) ? opts.pricingMenu : [],
+    updatedAt: Date.now(),
+  };
+  _coherePersistEndpointCatalog();
+  dlog('COHERE endpoint catalog updated: source=' + source + ' passthrough=' + passthrough + ' models=' + models.length);
+  return _cohereEndpointCatalog;
+}
+function _cohereLoadPersistedEndpointCatalog() {
+  var stored = _readJson(cohereEndpointCatalogFile, null);
+  if (!stored || !Array.isArray(stored.models)) return null;
+  return {
+    source: String(stored.source || 'cached'),
+    passthrough: stored.passthrough === true,
+    endpointUrl: String(stored.endpointUrl || ''),
+    endpointAuth: '',
+    models: _cohereAnnotateModels(stored.models, String(stored.source || 'cached'), stored.passthrough === true),
+    pricingMenu: Array.isArray(stored.pricingMenu) ? stored.pricingMenu : [],
+    updatedAt: Number(stored.updatedAt || 0) || 0,
+    cachedOnly: true,
+  };
+}
+async function _cohereFetchEndpointCatalog(baseUrl, endpointAuth, passthrough) {
+  var endpointUrl = _cohereNormalizeBaseUrl(baseUrl, passthrough);
+  if (passthrough) {
+    var headers = { 'Content-Type': 'application/json' };
+    if (endpointAuth) headers['Authorization'] = 'Bearer ' + endpointAuth;
+    var resp = await fetch(endpointUrl + '/v1/models', { headers: headers, signal: AbortSignal.timeout(10000) });
+    if (!resp.ok) throw new Error('/v1/models HTTP ' + resp.status);
+    var data = await resp.json();
+    var list = Array.isArray(data.data) ? data.data : (Array.isArray(data.models) ? data.models : []);
+    return {
+      source: 'openai-compatible',
+      passthrough: true,
+      endpointUrl: endpointUrl,
+      endpointAuth: String(endpointAuth || ''),
+      models: _cohereAnnotateModels(list.map(function(m) {
+        return {
+          name: m.id || m.name || m.model,
+          family: m.owned_by || m.family || '',
+          size: m.size || 0,
+        };
+      }), 'openai-compatible', true),
+      pricingMenu: [],
+      updatedAt: Date.now(),
+    };
+  }
+  var tagsResp = await fetch(endpointUrl + '/api/tags', { signal: AbortSignal.timeout(10000) });
+  if (!tagsResp.ok) throw new Error('/api/tags HTTP ' + tagsResp.status);
+  var tags = await tagsResp.json();
+  return {
+    source: 'ollama',
+    passthrough: false,
+    endpointUrl: endpointUrl,
+    endpointAuth: '',
+    models: _cohereAnnotateModels(tags.models || [], 'ollama', false),
+    pricingMenu: [],
+    updatedAt: Date.now(),
+  };
+}
+async function _cohereGetModelCatalog(opts) {
+  opts = opts || {};
+  var now = Date.now();
+  var active = _cohereEndpointCatalog;
+  if (!opts.forceRefresh && active && active.models && active.models.length > 0 && now - (active.updatedAt || 0) < 60000) {
+    return active;
+  }
+  if (active && active.endpointUrl && (!active.cachedOnly || active.endpointAuth || !active.passthrough)) {
+    try {
+      var fresh = await _cohereFetchEndpointCatalog(active.endpointUrl, active.endpointAuth || '', !!active.passthrough);
+      fresh.pricingMenu = Array.isArray(active.pricingMenu) ? active.pricingMenu : [];
+      _cohereEndpointCatalog = fresh;
+      _coherePersistEndpointCatalog();
+      return _cohereEndpointCatalog;
+    } catch (err) {
+      dlog('COHERE endpoint catalog refresh failed: ' + (err.message || err));
+      if (active.models && active.models.length > 0) return active;
+    }
+  }
+  var persisted = _cohereLoadPersistedEndpointCatalog();
+  if (persisted && persisted.models && persisted.models.length > 0) return persisted;
+  try {
+    var fallback = await _cohereFetchEndpointCatalog(process.env.OLLAMA_HOST || process.env.OLLAMA_URL || 'http://localhost:11434', '', false);
+    _cohereEndpointCatalog = fallback;
+    _coherePersistEndpointCatalog();
+    return _cohereEndpointCatalog;
+  } catch (err2) {
+    dlog('COHERE local Ollama catalog unavailable: ' + (err2.message || err2));
+  }
+  return Object.assign({}, active || {}, { models: [] });
+}
+function _cohereEndpointSnapshot(catalog) {
+  var c = catalog || _cohereEndpointCatalog || {};
+  return {
+    source: String(c.source || 'unknown'),
+    passthrough: c.passthrough === true,
+    endpointUrl: _cohereRedactUrl(c.endpointUrl || ''),
+    modelCount: Array.isArray(c.models) ? c.models.length : 0,
+    updatedAt: Number(c.updatedAt || 0) || 0,
+    cachedOnly: c.cachedOnly === true,
+  };
+}
+function _cohereSelectModel(catalog, tier) {
+  var models = _cohereApplyAllowlist(catalog && catalog.models || []);
+  if (models.length === 0) return { model: '', models: [] };
+  var passthrough = catalog && catalog.passthrough === true;
+  var gb = 1024 * 1024 * 1024;
+  var maxThresh = tier === 0 ? 8 * gb : tier === 1 ? 50 * gb : tier === 2 ? 100 * gb : Infinity;
+  var minSize = tier === 0 ? 0 : tier === 1 ? 4 * gb : tier === 2 ? 12 * gb : 25 * gb;
+  models.sort(function(a, b) {
+    var sa = a._chatScore || 1, sb = b._chatScore || 1;
+    if (sa !== sb) return sb - sa;
+    return (a.size || 0) - (b.size || 0);
+  });
+  var selected = '';
+  if (_cLastModel) {
+    var warm = models.find(function(m) { return m.name === _cLastModel; });
+    if (warm && (passthrough || ((warm.size || 0) >= minSize && (warm.size || 0) <= maxThresh))) selected = warm.name;
+  }
+  if (!selected) {
+    var fit = passthrough
+      ? models
+      : models.filter(function(m) { return (m.size || 0) >= minSize && (m.size || 0) <= maxThresh; });
+    selected = fit.length > 0 ? fit[0].name : '';
+  }
+  if (!selected && models.length > 0) selected = models[0].name;
+  return { model: selected, models: models };
+}
+async function _cohereDirectEndpointFallback(catalog, model, requestData) {
+  if (!catalog || catalog.passthrough !== true || !catalog.endpointUrl) {
+    throw new Error('No passthrough endpoint is active for direct COHERE fallback');
+  }
+  var headers = { 'Content-Type': 'application/json' };
+  if (catalog.endpointAuth) headers['Authorization'] = 'Bearer ' + catalog.endpointAuth;
+  var messages = [];
+  if (requestData && Array.isArray(requestData.messages) && requestData.messages.length > 0) {
+    messages = requestData.messages;
+  } else {
+    messages = [{ role: 'user', content: String(requestData && requestData.query || '') }];
+  }
+  var maxTokens = Number(requestData && (requestData.maxTokens || requestData.max_tokens)) || 1024;
+  var temperature = Number(requestData && requestData.temperature);
+  var body = {
+    model: model,
+    messages: messages,
+    stream: false,
+    max_tokens: maxTokens > 0 ? maxTokens : 1024,
+    temperature: Number.isFinite(temperature) ? temperature : 0.2,
+    think: false,
+  };
+  var resp = await fetch(_cohereNormalizeBaseUrl(catalog.endpointUrl, true) + '/v1/chat/completions', {
+    method: 'POST',
+    headers: headers,
+    body: JSON.stringify(body),
+    signal: AbortSignal.timeout(120000),
+  });
+  if (!resp.ok) {
+    var errText = '';
+    try { errText = await resp.text(); } catch {}
+    throw new Error('passthrough /v1/chat/completions HTTP ' + resp.status + ': ' + errText.slice(0, 200));
+  }
+  var data = await resp.json();
+  var choices = data.choices || [];
+  var first = choices[0] && choices[0].message ? choices[0].message : {};
+  var content = String(first.content || first.reasoning || '').replace(/<think>[\\s\\S]*?<\\/think>/g, '').trim();
+  var usage = data.usage || {};
+  return {
+    content: content,
+    usage: {
+      inputTokens: usage.prompt_tokens || usage.input_tokens || 0,
+      outputTokens: usage.completion_tokens || usage.output_tokens || 0,
+    },
+  };
+}
+try {
+  var _cohereStartupCatalog = _cohereLoadPersistedEndpointCatalog();
+  if (_cohereStartupCatalog && _cohereStartupCatalog.models && _cohereStartupCatalog.models.length > 0) {
+    _cohereEndpointCatalog = _cohereStartupCatalog;
+  }
+} catch {}
 async function _dhtPutBounded(dht, key, value, label) {
   var op = (async function() {
     for await (var _ of dht.put(key, value)) {}
@@ -13556,7 +13848,20 @@ async function handleCmd(cmd) {
         // WO-1.5: Publish capacity announcement on enable
         if (typeof _publishCapacityAnnouncement === 'function') {
-          try { _publishCapacityAnnouncement(); } catch {}
+          try { await _publishCapacityAnnouncement(); } catch {}
+        }
+        try {
+          var _ceCatalog = await _cohereGetModelCatalog({});
+          await _publishCapabilityRecord('cohere_inference', {
+            description: 'COHERE distributed inference provider',
+            pricing: 'free',
+            rateLimit: _sponsorLimits ? String(_sponsorLimits.maxRequestsPerMinute) + '/min' : 'provider-policy',
+            endpoint: _cohereEndpointSnapshot(_ceCatalog),
+            models: (_ceCatalog.models || []).map(function(m) { return m.name; }),
+            passthrough: _ceCatalog.passthrough === true,
+          });
+        } catch (_cePubErr) {
+          dlog('COHERE capability publish failed: ' + (_cePubErr.message || _cePubErr));
         }
         writeResp(id, { ok: true, output: 'COHERE inference handler enabled' });
         break;
@@ -13589,7 +13894,8 @@ async function handleCmd(cmd) {
           bytesOut: _cohereStats.bytesOut,
           modelsUsed: _cohereStats.modelsUsed,
           peersServed: _cohereStats.peersServed,
-          allowedModels: _cohereAllowedModels ? [..._cohereAllowedModels] : null
+          allowedModels: _cohereAllowedModels ? [..._cohereAllowedModels] : null,
+          endpoint: _cohereEndpointSnapshot(),
         };
         if (args.format === 'json' || args.json === true || args.json === 'true' || args.json === '1') {
           writeResp(id, { ok: true, output: JSON.stringify(_csSnapshot) });
@@ -13627,9 +13933,15 @@ async function handleCmd(cmd) {
           _csLines.push('  ' + _csPeers[_cpi][0].slice(0, 20) + '...: ' + _csPeers[_cpi][1] + ' queries');
         }
         _csLines.push('');
+        _csLines.push('── Endpoint ──');
+        var _csEndpoint = _cohereEndpointSnapshot();
+        _csLines.push('  Source: ' + _csEndpoint.source + (_csEndpoint.passthrough ? ' (passthrough)' : ''));
+        _csLines.push('  URL: ' + (_csEndpoint.endpointUrl || '(not set)'));
+        _csLines.push('  Models: ' + _csEndpoint.modelCount + (_csEndpoint.cachedOnly ? ' (cached)' : ''));
+        _csLines.push('');
         _csLines.push('── Model Allowlist ──');
         if (!_cohereAllowedModels) {
-          _csLines.push('  All downloaded models exposed (no filter)');
+          _csLines.push('  All endpoint models exposed (no filter)');
         } else {
           _csLines.push('  ' + [..._cohereAllowedModels].join(', '));
         }
@@ -13659,21 +13971,36 @@ async function handleCmd(cmd) {
         break;
       }
       case 'cohere_list_models': {
-        var _clmOllamaUrl = process.env.OLLAMA_HOST || 'http://localhost:11434';
-        var _clmModels = [];
-        try {
-          var _clmResp = await fetch(_clmOllamaUrl + '/api/tags');
-          var _clmData = await _clmResp.json();
-          _clmModels = (_clmData.models || []).map(function(m) { return { name: m.name, size: m.size || 0, family: m.details?.family || '' }; });
-        } catch {}
-        var _clmLines = ['── Downloaded Models ──'];
+        var _clmCatalog = await _cohereGetModelCatalog({ forceRefresh: args.refresh === 'true' || args.refresh === true || args.refresh === '1' });
+        var _clmModels = Array.isArray(_clmCatalog.models) ? _clmCatalog.models : [];
+        var _clmPayload = {
+          models: _clmModels.map(function(m) { return m.name; }),
+          exposedModels: _cohereApplyAllowlist(_clmModels).map(function(m) { return m.name; }),
+          modelDetails: _clmModels.map(function(m) {
+            return Object.assign({}, m, { exposed: !_cohereAllowedModels || _cohereAllowedModels.has(m.name) });
+          }),
+          source: _clmCatalog.source || 'unknown',
+          passthrough: _clmCatalog.passthrough === true,
+          endpoint: _cohereEndpointSnapshot(_clmCatalog),
+          allowedModels: _cohereAllowedModels ? [..._cohereAllowedModels] : null,
+        };
+        if (args.format === 'json' || args.json === true || args.json === 'true' || args.json === '1') {
+          writeResp(id, { ok: true, output: JSON.stringify(_clmPayload) });
+          break;
+        }
+        var _clmLines = ['── Endpoint Models ──'];
+        var _clmEndpoint = _cohereEndpointSnapshot(_clmCatalog);
+        _clmLines.push('  Source: ' + _clmEndpoint.source + (_clmEndpoint.passthrough ? ' (passthrough)' : ''));
+        _clmLines.push('  URL: ' + (_clmEndpoint.endpointUrl || '(not set)'));
+        _clmLines.push('');
         for (var _clmi = 0; _clmi < _clmModels.length; _clmi++) {
           var _clmM = _clmModels[_clmi];
           var _clmAllowed = !_cohereAllowedModels || _cohereAllowedModels.has(_clmM.name);
           var _clmSizeGB = (_clmM.size / (1024*1024*1024)).toFixed(1);
-          _clmLines.push('  ' + (_clmAllowed ? '[EXPOSED]' : '[HIDDEN] ') + ' ' + _clmM.name + ' (' + _clmSizeGB + 'GB' + (_clmM.family ? ', ' + _clmM.family : '') + ')');
+          var _clmMeta = _clmM.size > 0 ? _clmSizeGB + 'GB' : (_clmM.passthrough ? 'external' : 'size unknown');
+          _clmLines.push('  ' + (_clmAllowed ? '[EXPOSED]' : '[HIDDEN] ') + ' ' + _clmM.name + ' (' + _clmMeta + (_clmM.family ? ', ' + _clmM.family : '') + ')');
         }
-        if (_clmModels.length === 0) _clmLines.push('  (no models found — is Ollama running?)');
+        if (_clmModels.length === 0) _clmLines.push('  (no endpoint models found — check /endpoint and /expose passthrough)');
         _clmLines.push('');
         _clmLines.push(_cohereAllowedModels ? 'Allowlist: ' + [..._cohereAllowedModels].join(', ') : 'Allowlist: ALL (no filter active)');
         writeResp(id, { ok: true, output: _clmLines.join('\\n') });
@@ -14392,6 +14719,32 @@ async function handleCmd(cmd) {
           }
         }
+        var _exCohereCatalog = _cohereRememberEndpointCatalog({
+          source: isPassthrough ? 'openai-compatible' : 'ollama',
+          passthrough: isPassthrough,
+          endpointUrl: ollamaUrl,
+          endpointAuth: endpointAuth,
+          models: models,
+          pricingMenu: pricingMenu,
+        });
+        if (cohereActive) {
+          try {
+            await _publishCapabilityRecord('cohere_inference', {
+              description: 'COHERE distributed inference provider',
+              pricing: 'free',
+              rateLimit: _sponsorLimits ? String(_sponsorLimits.maxRequestsPerMinute) + '/min' : 'provider-policy',
+              endpoint: _cohereEndpointSnapshot(_exCohereCatalog),
+              models: (_exCohereCatalog.models || []).map(function(m) { return m.name; }),
+              passthrough: isPassthrough,
+            });
+          } catch (_exCoherePubErr) {
+            dlog('COHERE expose capability publish failed: ' + (_exCoherePubErr.message || _exCoherePubErr));
+          }
+          if (typeof _publishCapacityAnnouncement === 'function') {
+            try { await _publishCapacityAnnouncement(); } catch {}
+          }
+        }
         for (var pci = 0; pci < pricingMenu.length; pci++) {
           var capPricing = pricingMenu[pci];
           await _publishCapabilityRecord('inference:' + capPricing.model.replace(/[^a-zA-Z0-9._-]/g, '_'), {
@@ -15340,13 +15693,12 @@ process.on('unhandledRejection', (reason) => {
     }
     // ── COHERE distributed inference handler ─────────────────────────
-    // ── COHERE distributed inference handler ─────────────────────────
-    // Subscribe to nexus.cohere.query, process through local Ollama,
-    // publish response to nexus.cohere.response.
+    // Subscribe to nexus.cohere.query, process through the active Omnius
+    // endpoint catalog, publish response to nexus.cohere.response.
     // SECURITY INVARIANTS:
     //   1. Handler constructs ISOLATED messages — no history, no system prompt
-    //   2. Only /api/tags (read model list) and /api/chat (inference) are called
-    //   3. NEVER calls /api/pull, /api/delete, /api/push, /api/create, /api/copy
+    //   2. Model discovery is read-only: /api/tags or /v1/models
+    //   3. Inference uses /v1/run first; direct fallback is passthrough-only
     //   4. Model allowlist filters which models are served to remote queries
     //   5. Inbound queries scanned for leaked secrets
     if (_natsConn && _natsCodec) {
@@ -15534,89 +15886,34 @@ process.on('unhandledRejection', (reason) => {
             dlog('COHERE query: ' + _cData.queryId + ' — ' + (_cData.query || '').slice(0, 80) + ' [elected, score=' + _cBidScore.toFixed(2) + ', bids=' + _cBids.size + ']');
             const _cStart = Date.now();
-            // OLLAMA SAFETY: Only two endpoints are ever called:
-            //   GET  /api/tags  — read available models (READ-ONLY)
-            //   POST /api/chat  — run inference on existing model (READ-ONLY)
+            // Endpoint safety: COHERE model discovery is read-only.
+            //   Ollama providers: GET /api/tags only
+            //   Passthrough providers: GET /v1/models only
+            // Remote COHERE queries never call Ollama model-management endpoints.
             // The following are NEVER called from remote requests:
             //   POST /api/pull    — download model (BLOCKED)
             //   DELETE /api/delete — remove model (BLOCKED)
             //   POST /api/push    — upload model (BLOCKED)
             //   POST /api/create  — create model (BLOCKED)
             //   POST /api/copy    — copy model (BLOCKED)
-            const _cOllamaUrl = process.env.OLLAMA_HOST || 'http://localhost:11434';
+            var _cCatalog = null;
             let _cModel = '';
             try {
-              const _cTags = await fetch(_cOllamaUrl + '/api/tags').then(function(r) { return r.json(); });
-              // Filter out non-chat models: embeddings, image generators, vision-only
-              // Use family/families from Ollama model cards for capability detection
-              var _cAllModels = (_cTags.models || []).filter(function(m) {
-                var name = (m.name || '').toLowerCase();
-                var family = ((m.details && m.details.family) || '').toLowerCase();
-                var families = (m.details && m.details.families) || [];
-                var famStr = families.join(' ').toLowerCase();
-                // Exclude embeddings
-                if (/embed|nomic-bert/i.test(name) || /embed|nomic-bert/i.test(family)) return false;
-                // Exclude image generation models (flux, sd, sdxl, image-turbo, etc.)
-                if (/image|flux|stable.?diffusion|sdxl|sd[0-9]|dall|turbo.*image|image.*turbo/i.test(name)) return false;
-                if (/flux|diffusion/i.test(family)) return false;
-                // Exclude pure vision models (keep multimodal like qwen3-vl that also do chat)
-                if (/^clip$|^vit$/i.test(family)) return false;
-                return true;
-              });
-              // Score models for chat/tool capability: prefer qwen, then tool-capable families
-              var _cChatFamilies = { qwen35: 10, qwen35moe: 10, qwen3: 9, qwen3moe: 9, qwen3next: 9, nemotron_h_moe: 8, mistral3: 7, llama: 6, gemma3: 6, seed_oss: 5, phi2: 3 };
-              for (var _cmi = 0; _cmi < _cAllModels.length; _cmi++) {
-                var _cmFam = ((_cAllModels[_cmi].details && _cAllModels[_cmi].details.family) || '').toLowerCase();
-                _cAllModels[_cmi]._chatScore = _cChatFamilies[_cmFam] || 1;
-                // Boost omnius- prefixed models (known good configs)
-                if (/^omnius-/i.test(_cAllModels[_cmi].name)) _cAllModels[_cmi]._chatScore += 3;
-              }
-              // Apply model allowlist — only serve allowed models to remote queries
-              var _cModels = _cohereAllowedModels
-                ? _cAllModels.filter(function(m) { return _cohereAllowedModels.has(m.name); })
-                : _cAllModels;
+              _cCatalog = await _cohereGetModelCatalog({});
+              var _cSelected = _cohereSelectModel(_cCatalog, _cTier);
+              var _cModels = _cSelected.models || [];
               if (_cModels.length === 0 && _cohereAllowedModels) {
-                dlog('COHERE: no allowed models match downloaded models. Allowlist: ' + [..._cohereAllowedModels].join(', '));
+                dlog('COHERE: no allowed models match endpoint catalog. Allowlist: ' + [..._cohereAllowedModels].join(', '));
                 _cohereStats.queriesErrors++;
                 _saveStats();
                 continue;
               }
-              // CO-04/05: Model selection — tier determines minimum size, warm gets priority within tier
-              var _cGB = 1024 * 1024 * 1024;
-              // Tier thresholds: [max for trivial, max for moderate, max for complex, unlimited]
-              var _cMaxThresh = _cTier === 0 ? 8 * _cGB : _cTier === 1 ? 50 * _cGB : _cTier === 2 ? 100 * _cGB : Infinity;
-              // Minimum size: higher tiers REQUIRE bigger models
-              var _cMinSize = _cTier === 0 ? 0 : _cTier === 1 ? 4 * _cGB : _cTier === 2 ? 12 * _cGB : 25 * _cGB;
-              // Sort by chat capability score (desc), then size within same score (asc)
-              _cModels.sort(function(a, b) {
-                var sa = a._chatScore || 1, sb = b._chatScore || 1;
-                if (sa !== sb) return sb - sa; // higher chat score first
-                return (a.size || 0) - (b.size || 0); // then smaller first within same score
-              });
-              // Prefer warm model ONLY if it meets the minimum size for this tier
-              if (_cLastModel) {
-                var _cWarm = _cModels.find(function(m) { return m.name === _cLastModel; });
-                if (_cWarm && (_cWarm.size || 0) >= _cMinSize && (_cWarm.size || 0) <= _cMaxThresh) {
-                  _cModel = _cWarm.name;
-                }
-              }
-              // Otherwise pick best-scored model that fits the tier range
-              // Models already sorted by chatScore desc, so first match is best
-              if (!_cModel) {
-                var _cFit = _cModels.filter(function(m) { return (m.size || 0) >= _cMinSize && (m.size || 0) <= _cMaxThresh; });
-                // Pick the one with highest chat score (already sorted)
-                _cModel = _cFit.length > 0 ? _cFit[0].name : '';
-              }
-              // Fallback: if no model fits the range, take the best-scored available
-              if (!_cModel && _cModels.length > 0) {
-                _cModel = _cModels[0].name; // first = highest chatScore
-                dlog('COHERE: no model fits tier ' + _cTier + ' range [' + (_cMinSize / _cGB).toFixed(0) + 'GB-' + (_cMaxThresh / _cGB).toFixed(0) + 'GB], using best-scored: ' + _cModel);
-              }
-              dlog('COHERE routing: tier=' + ['trivial','moderate','complex','expert'][_cTier] + ' model=' + _cModel + ' (chatScore=' + ((_cModels.find(function(m){return m.name===_cModel})||{})._chatScore||'?') + ')');
-            } catch {}
-            if (!_cModel) { dlog('COHERE: no Ollama models available'); _cohereStats.queriesErrors++; _saveStats(); continue; }
+              _cModel = _cSelected.model || '';
+              dlog('COHERE routing: source=' + ((_cCatalog && _cCatalog.source) || 'unknown') + ' passthrough=' + (!!(_cCatalog && _cCatalog.passthrough)) + ' tier=' + ['trivial','moderate','complex','expert'][_cTier] + ' model=' + _cModel + ' (chatScore=' + ((_cModels.find(function(m){return m.name===_cModel})||{})._chatScore||'?') + ')');
+            } catch (_cCatalogErr) {
+              dlog('COHERE catalog error: ' + (_cCatalogErr.message || _cCatalogErr));
+            }
+            if (!_cModel) { dlog('COHERE: no endpoint models available'); _cohereStats.queriesErrors++; _saveStats(); continue; }
             try {
               // Scan inbound query for leaked secrets (defense-in-depth)
               const _cSecretPatterns = [/sk-[a-zA-Z0-9]{20,}/g, /ghp_[a-zA-Z0-9]{36,}/g, /AKIA[0-9A-Z]{16}/g];
@@ -15652,6 +15949,10 @@ process.on('unhandledRejection', (reason) => {
               if (_cApiAvailable) {
                 dlog('COHERE: routing through full AgenticRunner at ' + _cApiUrl + '/v1/run');
                 try {
+                  var _cRunEnv = {};
+                  if (_cCatalog && _cCatalog.endpointUrl) _cRunEnv.OMNIUS_BACKEND_URL = _cCatalog.endpointUrl;
+                  if (_cCatalog && _cCatalog.endpointAuth) _cRunEnv.OMNIUS_API_KEY = _cCatalog.endpointAuth;
+                  if (_cModel) _cRunEnv.OMNIUS_MODEL = _cModel;
                   var _cRunResp = await fetch(_cApiUrl + '/v1/run', {
                     method: 'POST',
                     headers: { 'Content-Type': 'application/json' },
@@ -15662,6 +15963,7 @@ process.on('unhandledRejection', (reason) => {
                       timeout_s: 90,
                       sandbox: 'none',
                       profile: 'cohere-mesh',
+                      env: _cRunEnv,
                     }),
                     signal: AbortSignal.timeout(120000),
                   });
@@ -15701,22 +16003,34 @@ process.on('unhandledRejection', (reason) => {
                   if (_cContent) {
                     dlog('COHERE: AgenticRunner responded (' + _cContent.length + ' chars)');
                   } else {
-                    dlog('COHERE: AgenticRunner returned empty — falling back to raw Ollama');
+                    dlog('COHERE: AgenticRunner returned empty');
                   }
                 } catch (_cRunErr) {
-                  dlog('COHERE: AgenticRunner error: ' + (_cRunErr.message || _cRunErr) + ' — falling back');
+                  dlog('COHERE: AgenticRunner error: ' + (_cRunErr.message || _cRunErr));
                 }
               } else {
-                dlog('COHERE: Omnius API not available at ' + _cApiUrl + ' — CANNOT process query (no raw Ollama fallback)');
-                _cContent = '[COHERE error] Omnius API server not running on this node. Start it with: omnius serve';
+                dlog('COHERE: Omnius API not available at ' + _cApiUrl);
+              }
+              // No raw local-Ollama fallback. If the user explicitly exposed an
+              // OpenAI-compatible passthrough endpoint, use that endpoint directly
+              // as a degraded fallback so COHERE still serves external providers.
+              if (!_cContent && _cCatalog && _cCatalog.passthrough === true) {
+                try {
+                  var _cDirect = await _cohereDirectEndpointFallback(_cCatalog, _cModel, _cData);
+                  _cContent = _cDirect.content || '';
+                  _cUsage = _cDirect.usage;
+                  if (_cContent) dlog('COHERE: direct passthrough fallback responded (' + _cContent.length + ' chars)');
+                } catch (_cDirectErr) {
+                  dlog('COHERE: direct passthrough fallback failed: ' + (_cDirectErr.message || _cDirectErr));
+                }
               }
-              // NO raw Ollama fallback — all queries MUST go through AgenticRunner
-              // If /v1/run failed or API unavailable, report the error instead of
-              // sending garbage responses without tools/context/system prompt.
               if (!_cContent) {
-                _cContent = '[COHERE error] AgenticRunner returned empty response. Check Omnius API server logs.';
-                dlog('COHERE: no content from AgenticRunner — reporting error (no raw Ollama fallback)');
+                _cContent = _cApiAvailable
+                  ? '[COHERE error] AgenticRunner returned empty response. Check Omnius API server logs.'
+                  : '[COHERE error] Omnius API server not running on this node. Start it with: omnius serve, or expose an OpenAI-compatible endpoint with /expose passthrough.';
+                dlog('COHERE: no content from AgenticRunner or passthrough endpoint');
               }
               const _cLatency = Date.now() - _cStart;
@@ -16072,24 +16386,8 @@ process.on('unhandledRejection', (reason) => {
       async function _publishCapacityAnnouncement() {
         if (!cohereActive || !_natsConn || !_natsCodec) return;
         try {
-          var _capOllamaUrl = process.env.OLLAMA_HOST || 'http://localhost:11434';
-          var _capModels = [];
-          try {
-            var _capTags = await fetch(_capOllamaUrl + '/api/tags').then(function(r) { return r.json(); });
-            _capModels = (_capTags.models || []).map(function(m) {
-              return {
-                name: m.name,
-                size: m.size || 0,
-                family: m.details ? m.details.family || '' : '',
-                parameterSize: m.details ? m.details.parameter_size || '' : '',
-                quantization: m.details ? m.details.quantization_level || '' : '',
-              };
-            });
-          } catch {}
-          // Filter by allowlist
-          if (_cohereAllowedModels) {
-            _capModels = _capModels.filter(function(m) { return _cohereAllowedModels.has(m.name); });
-          }
+          var _capCatalog = await _cohereGetModelCatalog({});
+          var _capModels = _cohereApplyAllowlist(_capCatalog.models || []);
           var _capMetrics = await _collectSysMetrics();
           // CO-02: Enriched per-model capacity — warm/cold, specialty, estimated latency
           var _capEnriched = _capModels.map(function(m) {
@@ -16122,6 +16420,7 @@ process.on('unhandledRejection', (reason) => {
             agentName: agentName,
             agentType: agentType,
             cohereActive: cohereActive,
+            endpoint: _cohereEndpointSnapshot(_capCatalog),
             models: _capEnriched,
             warmModel: _cLastModel || null,
             modelCount: _capEnriched.length,
@@ -16129,7 +16428,7 @@ process.on('unhandledRejection', (reason) => {
             totalVram: _capTotalVram,
             availableVram: _capAvailVram,
             specialties: _capSpecialties,
-            capabilities: ['inference'],  // future: web_search, code_execution
+            capabilities: ['inference', 'cohere_inference'],
             allowedModels: _cohereAllowedModels ? [..._cohereAllowedModels] : null,
             stats: {
               queriesAnswered: _cohereStats.queriesAnswered,
@@ -16198,6 +16497,7 @@ process.on('unhandledRejection', (reason) => {
               multiaddrs: [],
               timestamp: Date.now(),
               capabilities: _capModels.map(function(m) { return m.name; }),
+              endpoint: _cohereEndpointSnapshot(_capCatalog),
               identityCid: _idCid || undefined,
               identityHash: _idHash || undefined,
               identityVersion: _idVersion || undefined,
@@ -17131,11 +17431,15 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
           },
           format: {
             type: "string",
-            description: "For cohere_stats: set to 'json' for structured stats"
+            description: "For cohere_stats/cohere_list_models: set to 'json' for structured output"
           },
           json: {
             type: "string",
-            description: "For cohere_stats: set to '1' for structured stats"
+            description: "For cohere_stats/cohere_list_models: set to '1' for structured output"
+          },
+          refresh: {
+            type: "string",
+            description: "For cohere_list_models: set to '1' to refresh the active endpoint catalog"
           }
         },
         required: ["action"],
@@ -17282,7 +17586,7 @@ process.on('SIGINT', () => process.emit('SIGTERM'));
               result = await this.sendDaemonCmd("cohere_deny_model", { model: String(args.model ?? "") });
               break;
             case "cohere_list_models":
-              result = await this.sendDaemonCmd("cohere_list_models", {});
+              result = await this.sendDaemonCmd("cohere_list_models", { format: String(args.format ?? ""), json: String(args.json ?? ""), refresh: String(args.refresh ?? "") });
               break;
             case "ipfs_add":
               result = await this.sendDaemonCmd("ipfs_add", { content: String(args.content ?? args.message ?? "") });
@@ -604995,7 +605299,14 @@ function emptyCohereStats(isActive = false) {
     bytesOut: 0,
     modelsUsed: {},
     peersServed: {},
-    allowedModels: null
+    allowedModels: null,
+    endpoint: {
+      source: "unknown",
+      passthrough: false,
+      endpointUrl: "",
+      modelCount: 0,
+      updatedAt: 0
+    }
   };
 }
 function numberField(value2) {
@@ -605014,6 +605325,7 @@ function parseCohereStatsOutput(output, isActive = false) {
   try {
     const parsed = JSON.parse(output);
     const active = typeof parsed.active === "boolean" ? parsed.active : String(parsed.status ?? "").toLowerCase() === "active";
+    const endpointRaw = parsed.endpoint && typeof parsed.endpoint === "object" ? parsed.endpoint : {};
     return {
       status: active ? "active" : "inactive",
       active,
@@ -605029,7 +605341,15 @@ function parseCohereStatsOutput(output, isActive = false) {
       bytesOut: numberField(parsed.bytesOut),
       modelsUsed: mapNumberRecord(parsed.modelsUsed),
       peersServed: mapNumberRecord(parsed.peersServed),
-      allowedModels: Array.isArray(parsed.allowedModels) ? parsed.allowedModels.map(String) : null
+      allowedModels: Array.isArray(parsed.allowedModels) ? parsed.allowedModels.map(String) : null,
+      endpoint: {
+        source: String(endpointRaw.source ?? "unknown"),
+        passthrough: endpointRaw.passthrough === true,
+        endpointUrl: String(endpointRaw.endpointUrl ?? ""),
+        modelCount: numberField(endpointRaw.modelCount),
+        updatedAt: numberField(endpointRaw.updatedAt),
+        cachedOnly: endpointRaw.cachedOnly === true
+      }
     };
   } catch {
     return emptyCohereStats(isActive);
@@ -605054,10 +605374,11 @@ async function fetchCohereDashboardState(ctx3) {
     } catch {
     }
     try {
-      const r2 = await nexus.execute({ action: "cohere_list_models" });
+      const r2 = await nexus.execute({ action: "cohere_list_models", format: "json" });
       if (r2.success) {
         try {
-          state.modelList = JSON.parse(r2.output).models || [];
+          const parsed = JSON.parse(r2.output);
+          state.modelList = Array.isArray(parsed.models) ? parsed.models.map(String) : [];
         } catch {
           state.modelList = r2.output.split("\n").map((l2) => l2.trim()).filter(Boolean);
         }
@@ -605083,8 +605404,9 @@ function cohereStatusLines(stats, modelList) {
     `Sent out: ${stats.queriesSent} · avg latency ${stats.avgLatencyMs}ms`,
     `Data: in ${formatFileSize(stats.bytesIn)} · out ${formatFileSize(stats.bytesOut)}`,
     "",
-    `Models exposed: ${modelList.length}`,
-    `Allowlist: ${stats.allowedModels ? stats.allowedModels.join(", ") || "(empty)" : "all downloaded models"}`,
+    `Endpoint: ${stats.endpoint.source}${stats.endpoint.passthrough ? " passthrough" : ""}${stats.endpoint.endpointUrl ? ` · ${stats.endpoint.endpointUrl}` : ""}`,
+    `Models available: ${modelList.length || stats.endpoint.modelCount}${stats.endpoint.cachedOnly ? " (cached)" : ""}`,
+    `Allowlist: ${stats.allowedModels ? stats.allowedModels.join(", ") || "(empty)" : "all endpoint models"}`,
     `Top models: ${modelEntries.length ? modelEntries.slice(0, 5).map(([m2, n2]) => `${m2} (${n2})`).join(", ") : "none yet"}`,
     `Peers served: ${peerEntries.length ? peerEntries.slice(0, 5).map(([p2, n2]) => `${p2.slice(0, 20)} (${n2})`).join(", ") : "none yet"}`
   ];
@@ -605098,7 +605420,7 @@ async function showCohereDashboard(ctx3) {
   while (true) {
     const currentActive = ctx3.isCohere?.() ?? false;
     const toggleLabel = currentActive ? "Disable COHERE" : "Enable COHERE";
-    const toggleDetail = currentActive ? `Active — forwarding ${ctx3.config.model}` : "Join the distributed cognitive commons";
+    const toggleDetail = currentActive ? `Active — forwarding ${ctx3.config.model} via current endpoint` : "Join the distributed cognitive commons";
     const items = [
       {
         key: "hdr-status",
@@ -663757,7 +664079,7 @@ The user pasted a clipboard image saved at ${relPath}. Use the OCR, vision analy
           try {
             if (!commandCtx.isExposeActive?.()) {
               writeContent(
-                () => renderInfo("COHERE: exposing local inference to mesh...")
+                () => renderInfo("COHERE: exposing current endpoint to mesh...")
               );
               await commandCtx.exposeStart?.("passthrough");
             }

package/npm-shrinkwrap.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "name": "omnius",
-  "version": "1.0.154",
+  "version": "1.0.155",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "omnius",
-      "version": "1.0.154",
+      "version": "1.0.155",
       "bundleDependencies": [
         "image-to-ascii"
       ],

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "omnius",
-  "version": "1.0.154",
+  "version": "1.0.155",
   "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
   "type": "module",
   "main": "./dist/index.js",