@antseed/cli 0.1.21 → 0.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,31 @@ function log(...args) {
10
10
  if (DEBUG())
11
11
  console.log('[proxy]', ...args);
12
12
  }
13
+ const CLAUDE_PROVIDER_PREFERENCE = ['claude-oauth', 'anthropic', 'claude-code'];
14
+ function inferPreferredProvidersForRequest(requestProtocol, requestedModel) {
15
+ const model = requestedModel?.trim().toLowerCase() ?? '';
16
+ if (model.length === 0) {
17
+ return [];
18
+ }
19
+ const providers = [];
20
+ const pushProvider = (value) => {
21
+ const provider = value?.trim().toLowerCase();
22
+ if (!provider || provider.length === 0 || providers.includes(provider)) {
23
+ return;
24
+ }
25
+ providers.push(provider);
26
+ };
27
+ const slashIndex = model.indexOf('/');
28
+ if (slashIndex > 0) {
29
+ pushProvider(model.slice(0, slashIndex));
30
+ }
31
+ if (requestProtocol === 'anthropic-messages' || model.startsWith('claude-') || model.includes('claude')) {
32
+ for (const provider of CLAUDE_PROVIDER_PREFERENCE) {
33
+ pushProvider(provider);
34
+ }
35
+ }
36
+ return providers;
37
+ }
13
38
  function getExplicitProviderOverride(request) {
14
39
  const provider = request.headers['x-antseed-provider']?.trim().toLowerCase();
15
40
  return provider && provider.length > 0 ? provider : null;
@@ -21,23 +46,49 @@ function getExplicitPeerIdOverride(request, sessionPinnedPeerId) {
21
46
  return header;
22
47
  return sessionPinnedPeerId?.toLowerCase() ?? null;
23
48
  }
49
+ function getPreferredPeerIdHint(request) {
50
+ const header = request.headers['x-antseed-prefer-peer']?.trim().toLowerCase();
51
+ if (!header || header.length === 0) {
52
+ return null;
53
+ }
54
+ return header;
55
+ }
24
56
  function getPeerProviderProtocols(peer, provider, requestedModel) {
25
57
  const fromMetadata = peer.providerModelApiProtocols?.[provider]?.models;
26
58
  if (fromMetadata) {
27
59
  if (requestedModel && fromMetadata[requestedModel]?.length) {
60
+ log(`Model match: peer ${peer.peerId.slice(0, 8)} provider=${provider} model="${requestedModel}" → [${fromMetadata[requestedModel].join(',')}]`);
28
61
  return Array.from(new Set(fromMetadata[requestedModel]));
29
62
  }
30
- // If the peer advertises specific models and the requested model is not among them, return empty
31
- // so this peer is filtered out in favour of peers that actually serve the model.
32
- if (requestedModel && Object.keys(fromMetadata).length > 0) {
33
- return [];
34
- }
35
63
  const merged = Object.values(fromMetadata).flat();
36
64
  if (merged.length > 0) {
65
+ if (requestedModel) {
66
+ log(`Model hint miss: peer ${peer.peerId.slice(0, 8)} provider=${provider} model="${requestedModel}" not in metadata; falling back to provider protocol set [${Array.from(new Set(merged)).join(',')}]`);
67
+ }
37
68
  return Array.from(new Set(merged));
38
69
  }
39
70
  }
40
- return inferProviderDefaultModelApiProtocols(provider);
71
+ const inferred = inferProviderDefaultModelApiProtocols(provider);
72
+ log(`No metadata: peer ${peer.peerId.slice(0, 8)} provider=${provider} → inferred [${inferred.join(',')}]`);
73
+ return inferred;
74
+ }
75
+ function isProviderModelExplicitlyUnsupported(peer, provider, requestedModel) {
76
+ if (!requestedModel) {
77
+ return false;
78
+ }
79
+ const modelMatrix = peer.providerModelApiProtocols?.[provider]?.models;
80
+ if (!modelMatrix) {
81
+ return false;
82
+ }
83
+ const advertisedModels = Object.keys(modelMatrix);
84
+ if (advertisedModels.length === 0) {
85
+ return false;
86
+ }
87
+ if (Object.prototype.hasOwnProperty.call(modelMatrix, requestedModel)) {
88
+ return false;
89
+ }
90
+ log(`Model strict-miss: peer ${peer.peerId.slice(0, 8)} provider=${provider} does not advertise model="${requestedModel}"`);
91
+ return true;
41
92
  }
42
93
  function resolvePeerRoutePlan(peer, requestProtocol, requestedModel, explicitProvider) {
43
94
  const providers = peer.providers
@@ -56,6 +107,9 @@ function resolvePeerRoutePlan(peer, requestProtocol, requestedModel, explicitPro
56
107
  }
57
108
  let transformedFallback = null;
58
109
  for (const provider of candidates) {
110
+ if (!explicitProvider && isProviderModelExplicitlyUnsupported(peer, provider, requestedModel)) {
111
+ continue;
112
+ }
59
113
  const supportedProtocols = getPeerProviderProtocols(peer, provider, requestedModel);
60
114
  const selection = selectTargetProtocolForRequest(requestProtocol, supportedProtocols);
61
115
  if (!selection) {
@@ -241,6 +295,162 @@ function extractRequestedModel(request) {
241
295
  return null;
242
296
  }
243
297
  }
298
+ function decodeJsonBody(body) {
299
+ if (!body || body.length === 0) {
300
+ return null;
301
+ }
302
+ try {
303
+ const parsed = JSON.parse(new TextDecoder().decode(body));
304
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
305
+ return null;
306
+ }
307
+ return parsed;
308
+ }
309
+ catch {
310
+ return null;
311
+ }
312
+ }
313
+ function summarizeMessageShape(messagesRaw) {
314
+ if (!Array.isArray(messagesRaw)) {
315
+ return 'msgShape=none';
316
+ }
317
+ const roleCounts = new Map();
318
+ const contentKindCounts = new Map();
319
+ const blockTypeCounts = new Map();
320
+ let invalidMessages = 0;
321
+ let firstRole = 'none';
322
+ let lastRole = 'none';
323
+ const bump = (map, key) => {
324
+ map.set(key, (map.get(key) ?? 0) + 1);
325
+ };
326
+ for (const entry of messagesRaw) {
327
+ if (!entry || typeof entry !== 'object' || Array.isArray(entry)) {
328
+ invalidMessages += 1;
329
+ continue;
330
+ }
331
+ const message = entry;
332
+ const role = typeof message.role === 'string' && message.role.trim().length > 0
333
+ ? message.role.trim().toLowerCase()
334
+ : 'invalid-role';
335
+ bump(roleCounts, role);
336
+ if (firstRole === 'none') {
337
+ firstRole = role;
338
+ }
339
+ lastRole = role;
340
+ const content = message.content;
341
+ if (typeof content === 'string') {
342
+ bump(contentKindCounts, 'string');
343
+ continue;
344
+ }
345
+ if (Array.isArray(content)) {
346
+ bump(contentKindCounts, 'array');
347
+ for (const block of content) {
348
+ if (!block || typeof block !== 'object' || Array.isArray(block)) {
349
+ bump(blockTypeCounts, 'invalid');
350
+ continue;
351
+ }
352
+ const blockType = typeof block.type === 'string'
353
+ ? String(block.type).trim().toLowerCase()
354
+ : 'missing-type';
355
+ bump(blockTypeCounts, blockType || 'missing-type');
356
+ }
357
+ continue;
358
+ }
359
+ if (content && typeof content === 'object') {
360
+ bump(contentKindCounts, 'object');
361
+ continue;
362
+ }
363
+ bump(contentKindCounts, 'other');
364
+ }
365
+ const joinMap = (map) => ([...map.entries()]
366
+ .sort((left, right) => left[0].localeCompare(right[0]))
367
+ .map(([key, value]) => `${key}:${String(value)}`)
368
+ .join(','));
369
+ const roleSummary = joinMap(roleCounts) || 'none';
370
+ const contentSummary = joinMap(contentKindCounts) || 'none';
371
+ const blockSummary = joinMap(blockTypeCounts) || 'none';
372
+ return [
373
+ `msgShape=roles{${roleSummary}}`,
374
+ `content{${contentSummary}}`,
375
+ `blocks{${blockSummary}}`,
376
+ `firstRole=${firstRole}`,
377
+ `lastRole=${lastRole}`,
378
+ `invalidMsgs=${String(invalidMessages)}`,
379
+ ].join(' ');
380
+ }
381
+ function summarizeRequestShape(request) {
382
+ const contentType = (request.headers['content-type'] ?? request.headers['Content-Type'] ?? '').toLowerCase();
383
+ const accept = (request.headers['accept'] ?? request.headers['Accept'] ?? '').toLowerCase();
384
+ const providerHeader = request.headers['x-antseed-provider'] ?? 'none';
385
+ const preferPeerHeader = request.headers['x-antseed-prefer-peer'] ?? 'none';
386
+ const model = extractRequestedModel(request) ?? 'none';
387
+ const wantsStreaming = requestWantsStreaming(request.headers, request.body);
388
+ const baseParts = [
389
+ `method=${request.method}`,
390
+ `path=${request.path}`,
391
+ `provider=${providerHeader}`,
392
+ `preferPeer=${preferPeerHeader}`,
393
+ `contentType=${contentType || 'none'}`,
394
+ `accept=${accept || 'none'}`,
395
+ `stream=${String(wantsStreaming)}`,
396
+ `model=${model}`,
397
+ `bodyBytes=${String(request.body.length)}`,
398
+ ];
399
+ const jsonBody = decodeJsonBody(request.body);
400
+ if (!jsonBody) {
401
+ return baseParts.join(' ');
402
+ }
403
+ const messagesRaw = jsonBody.messages;
404
+ const toolsRaw = jsonBody.tools;
405
+ const messageCount = Array.isArray(messagesRaw) ? messagesRaw.length : 0;
406
+ const toolCount = Array.isArray(toolsRaw) ? toolsRaw.length : 0;
407
+ const maxTokens = Number(jsonBody.max_tokens ?? jsonBody.maxTokens);
408
+ const keys = Object.keys(jsonBody).sort().join(',');
409
+ baseParts.push(`messages=${String(messageCount)}`);
410
+ baseParts.push(`tools=${String(toolCount)}`);
411
+ if (Number.isFinite(maxTokens) && maxTokens > 0) {
412
+ baseParts.push(`maxTokens=${String(Math.floor(maxTokens))}`);
413
+ }
414
+ if (keys.length > 0) {
415
+ baseParts.push(`keys=[${keys}]`);
416
+ }
417
+ baseParts.push(summarizeMessageShape(messagesRaw));
418
+ return baseParts.join(' ');
419
+ }
420
+ function summarizeErrorResponse(response) {
421
+ const contentType = (response.headers['content-type'] ?? '').toLowerCase();
422
+ if (!response.body || response.body.length === 0) {
423
+ return 'empty response body';
424
+ }
425
+ const raw = new TextDecoder().decode(response.body).trim();
426
+ if (raw.length === 0) {
427
+ return 'empty response body';
428
+ }
429
+ if (contentType.includes('application/json')) {
430
+ try {
431
+ const parsed = JSON.parse(raw);
432
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
433
+ const object = parsed;
434
+ const nestedError = object.error && typeof object.error === 'object' && !Array.isArray(object.error)
435
+ ? object.error
436
+ : null;
437
+ const message = ((typeof nestedError?.message === 'string' && nestedError.message)
438
+ || (typeof object.message === 'string' && object.message)
439
+ || (typeof object.detail === 'string' && object.detail));
440
+ if (message) {
441
+ return `message="${message}"`;
442
+ }
443
+ }
444
+ }
445
+ catch {
446
+ // fall through to raw snippet
447
+ }
448
+ }
449
+ const compact = raw.replace(/\s+/g, ' ');
450
+ const maxChars = 280;
451
+ const snippet = compact.length > maxChars ? `${compact.slice(0, maxChars)}...` : compact;
452
+ return `body="${snippet}"`;
453
+ }
244
454
  function toFiniteNumberOrNull(value) {
245
455
  return typeof value === 'number' && Number.isFinite(value) ? value : null;
246
456
  }
@@ -401,13 +611,20 @@ export class BuyerProxy {
401
611
  _node;
402
612
  _port;
403
613
  _bgRefreshIntervalMs;
614
+ _peerCacheTtlMs;
404
615
  _pinnedPeerId;
405
616
  _cachedPeers = [];
617
+ _cacheLastUpdatedAtMs = 0;
618
+ _peerRefreshPromise = null;
619
+ _lastStaleCacheLogAtMs = 0;
406
620
  _bgRefreshHandle = null;
621
+ _lastSuccessfulPeerId = null;
622
+ _lastSuccessfulPeerByRouteKey = new Map();
407
623
  constructor(config) {
408
624
  this._node = config.node;
409
625
  this._port = config.port;
410
626
  this._bgRefreshIntervalMs = config.backgroundRefreshIntervalMs ?? 5 * 60_000;
627
+ this._peerCacheTtlMs = Math.max(0, config.peerCacheTtlMs ?? 30_000);
411
628
  this._pinnedPeerId = config.pinnedPeerId?.toLowerCase();
412
629
  this._server = createServer((req, res) => {
413
630
  this._handleRequest(req, res).catch((err) => {
@@ -440,34 +657,60 @@ export class BuyerProxy {
440
657
  }
441
658
  _startBackgroundRefresh() {
442
659
  this._bgRefreshHandle = setInterval(() => {
443
- this._node.discoverPeers().then((peers) => {
444
- if (peers.length > 0) {
445
- this._mergePeers(peers);
446
- }
447
- }).catch(() => { });
660
+ void this._refreshPeersNow().catch(() => {
661
+ // background refresh failure is non-fatal
662
+ });
448
663
  }, this._bgRefreshIntervalMs);
449
664
  }
450
- _mergePeers(incoming) {
451
- const existing = new Map(this._cachedPeers.map((p) => [p.peerId, p]));
452
- let added = 0;
453
- for (const p of incoming) {
454
- if (!existing.has(p.peerId)) {
455
- existing.set(p.peerId, p);
456
- added++;
457
- }
458
- }
459
- if (added > 0) {
460
- this._cachedPeers = Array.from(existing.values());
461
- log(`[background] Merged ${added} new peer(s) into cache (total: ${this._cachedPeers.length})`);
462
- }
665
+ _replacePeers(incoming) {
666
+ this._cachedPeers = incoming;
667
+ this._cacheLastUpdatedAtMs = Date.now();
463
668
  }
464
669
  _evictPeer(peerId) {
465
670
  const before = this._cachedPeers.length;
466
671
  this._cachedPeers = this._cachedPeers.filter((p) => p.peerId !== peerId);
467
672
  if (this._cachedPeers.length < before) {
673
+ this._cacheLastUpdatedAtMs = Date.now();
468
674
  log(`Evicted failing peer ${peerId.slice(0, 12)}... from cache (${this._cachedPeers.length} remaining)`);
469
675
  }
470
676
  }
677
+ _rememberSuccessfulPeer(routeKey, peerId) {
678
+ this._lastSuccessfulPeerId = peerId;
679
+ this._lastSuccessfulPeerByRouteKey.set(routeKey, peerId);
680
+ // Keep map bounded to prevent unbounded growth from long-running sessions.
681
+ const MAX_ROUTE_HISTORY = 200;
682
+ if (this._lastSuccessfulPeerByRouteKey.size > MAX_ROUTE_HISTORY) {
683
+ const oldestKey = this._lastSuccessfulPeerByRouteKey.keys().next().value;
684
+ if (typeof oldestKey === 'string') {
685
+ this._lastSuccessfulPeerByRouteKey.delete(oldestKey);
686
+ }
687
+ }
688
+ }
689
+ _forgetSuccessfulPeer(routeKey, peerId) {
690
+ const rememberedForRoute = this._lastSuccessfulPeerByRouteKey.get(routeKey);
691
+ if (rememberedForRoute === peerId) {
692
+ this._lastSuccessfulPeerByRouteKey.delete(routeKey);
693
+ }
694
+ if (this._lastSuccessfulPeerId === peerId) {
695
+ this._lastSuccessfulPeerId = null;
696
+ }
697
+ }
698
+ _buildRouteKey(path, requestProtocol, requestedModel, explicitProvider) {
699
+ const normalizedPath = path.split('?')[0]?.trim().toLowerCase() ?? '/';
700
+ const pathGroup = (normalizedPath.startsWith('/v1/messages')
701
+ ? '/v1/messages'
702
+ : normalizedPath.startsWith('/v1/chat/completions')
703
+ ? '/v1/chat/completions'
704
+ : normalizedPath.startsWith('/v1/models')
705
+ ? '/v1/models'
706
+ : normalizedPath);
707
+ return [
708
+ pathGroup,
709
+ requestProtocol ?? 'unknown-protocol',
710
+ requestedModel ?? 'unknown-model',
711
+ explicitProvider ?? 'auto-provider',
712
+ ].join('|');
713
+ }
471
714
  async _readLocalSeederFallback() {
472
715
  try {
473
716
  const raw = await readFile(DAEMON_STATE_FILE, 'utf-8');
@@ -511,27 +754,62 @@ export class BuyerProxy {
511
754
  return null;
512
755
  }
513
756
  }
514
- async _getPeers() {
515
- // Return cache immediately — no TTL expiry; peers are evicted on failure
516
- // and refreshed in the background on a fixed interval.
517
- if (this._cachedPeers.length > 0) {
518
- return this._cachedPeers;
519
- }
520
- // Cache is empty — must block on discovery (first request or all peers evicted)
757
+ async _discoverAndCachePeers() {
521
758
  const localSeeder = await this._readLocalSeederFallback();
522
759
  if (localSeeder) {
523
- this._cachedPeers = [localSeeder];
760
+ this._replacePeers([localSeeder]);
524
761
  log(`Using local seeder ${localSeeder.peerId.slice(0, 12)}... @ ${localSeeder.publicAddress} (skipping DHT lookup)`);
525
762
  return this._cachedPeers;
526
763
  }
527
764
  log('Discovering peers via DHT...');
528
765
  const peers = await this._node.discoverPeers();
766
+ this._replacePeers(peers);
529
767
  if (peers.length > 0) {
530
- this._cachedPeers = peers;
531
768
  log(`Found ${peers.length} peer(s)`);
532
769
  }
533
770
  return peers;
534
771
  }
772
+ async _refreshPeersNow() {
773
+ if (this._peerRefreshPromise) {
774
+ return this._peerRefreshPromise;
775
+ }
776
+ const previousCachedPeers = this._cachedPeers;
777
+ this._peerRefreshPromise = (async () => {
778
+ const peers = await this._discoverAndCachePeers();
779
+ if (peers.length === 0 && previousCachedPeers.length > 0) {
780
+ // Preserve stale cache as fallback when discovery transiently fails.
781
+ log('Discovery returned 0 peers; keeping previous cached peers as fallback.');
782
+ this._replacePeers(previousCachedPeers);
783
+ return previousCachedPeers;
784
+ }
785
+ return peers;
786
+ })().finally(() => {
787
+ this._peerRefreshPromise = null;
788
+ });
789
+ return this._peerRefreshPromise;
790
+ }
791
+ async _getPeers(options) {
792
+ const forceRefresh = options?.forceRefresh === true;
793
+ const cacheAgeMs = Date.now() - this._cacheLastUpdatedAtMs;
794
+ const cacheFresh = this._cacheLastUpdatedAtMs > 0 && cacheAgeMs <= this._peerCacheTtlMs;
795
+ if (forceRefresh) {
796
+ log('Forcing peer refresh before routing.');
797
+ return this._refreshPeersNow();
798
+ }
799
+ if (this._cachedPeers.length > 0) {
800
+ if (cacheFresh) {
801
+ return this._cachedPeers;
802
+ }
803
+ const now = Date.now();
804
+ if (now - this._lastStaleCacheLogAtMs >= 10_000) {
805
+ this._lastStaleCacheLogAtMs = now;
806
+ log(`Peer cache stale (${cacheAgeMs}ms old); routing from cached peers.`);
807
+ }
808
+ return this._cachedPeers;
809
+ }
810
+ // No cached peers yet — block on initial discovery.
811
+ return this._refreshPeersNow();
812
+ }
535
813
  _formatPeerSelectionDiagnostics(peers) {
536
814
  if (peers.length === 0) {
537
815
  return 'No peers discovered.';
@@ -580,6 +858,20 @@ export class BuyerProxy {
580
858
  headers,
581
859
  body: new Uint8Array(body),
582
860
  };
861
+ const clientAbortController = new AbortController();
862
+ const onClientAbort = () => {
863
+ if (clientAbortController.signal.aborted) {
864
+ return;
865
+ }
866
+ clientAbortController.abort();
867
+ log(`Client disconnected; aborting upstream request reqId=${serializedReq.requestId.slice(0, 8)}`);
868
+ };
869
+ req.once('aborted', onClientAbort);
870
+ res.once('close', () => {
871
+ if (!res.writableEnded) {
872
+ onClientAbort();
873
+ }
874
+ });
583
875
  // Discover peers
584
876
  const peers = await this._getPeers();
585
877
  if (peers.length === 0) {
@@ -590,11 +882,25 @@ export class BuyerProxy {
590
882
  }
591
883
  const requestProtocol = detectRequestModelApiProtocol(serializedReq);
592
884
  const requestedModel = extractRequestedModel(serializedReq);
885
+ log(`Routing: protocol=${requestProtocol ?? 'null'} model=${requestedModel ?? 'null'}`);
593
886
  const explicitProvider = getExplicitProviderOverride(serializedReq);
594
887
  const explicitPeerId = getExplicitPeerIdOverride(serializedReq, this._pinnedPeerId);
888
+ const preferredPeerId = getPreferredPeerIdHint(serializedReq);
889
+ log(`Routing hints: provider=${explicitProvider ?? 'auto'} pin-peer=${explicitPeerId ?? 'none'} prefer-peer=${preferredPeerId ?? 'none'}`);
890
+ const routeKey = this._buildRouteKey(serializedReq.path, requestProtocol, requestedModel, explicitProvider);
595
891
  const { candidatePeers, routePlanByPeerId, } = selectCandidatePeersForRouting(peers, requestProtocol, requestedModel, explicitProvider);
596
- if (candidatePeers.length === 0) {
597
- const diagnostics = this._formatPeerSelectionDiagnostics(peers);
892
+ let routingPeers = candidatePeers;
893
+ let routingPlans = routePlanByPeerId;
894
+ let discoveredPeers = peers;
895
+ if (routingPeers.length === 0) {
896
+ // One forced refresh handles stale-cache routing mismatches (e.g. missing provider/model updates).
897
+ discoveredPeers = await this._getPeers({ forceRefresh: true });
898
+ const refreshedSelection = selectCandidatePeersForRouting(discoveredPeers, requestProtocol, requestedModel, explicitProvider);
899
+ routingPeers = refreshedSelection.candidatePeers;
900
+ routingPlans = refreshedSelection.routePlanByPeerId;
901
+ }
902
+ if (routingPeers.length === 0) {
903
+ const diagnostics = this._formatPeerSelectionDiagnostics(discoveredPeers);
598
904
  res.writeHead(502, { 'content-type': 'text/plain' });
599
905
  if (requestProtocol) {
600
906
  const protocolLabel = requestProtocol;
@@ -606,21 +912,40 @@ export class BuyerProxy {
606
912
  }
607
913
  return;
608
914
  }
915
+ log(`Routing candidates: ${routingPeers.length} peer(s)`);
609
916
  // Select peer: explicit pin bypasses the router (and retry)
610
917
  const router = this._node.router;
611
- const RETRYABLE_STATUS_CODES = new Set([400, 404, 408, 429, 500, 502, 503, 504]);
918
+ const RETRYABLE_STATUS_CODES = new Set([408, 429, 500, 502, 503, 504]);
612
919
  if (explicitPeerId) {
613
- const selectedPeer = candidatePeers.find((p) => p.peerId.toLowerCase() === explicitPeerId) ?? null;
920
+ let pinnedRoutingPeers = routingPeers;
921
+ let pinnedRoutePlans = routingPlans;
922
+ let selectedPeer = pinnedRoutingPeers.find((p) => p.peerId.toLowerCase() === explicitPeerId) ?? null;
923
+ if (!selectedPeer) {
924
+ log(`Pinned peer ${explicitPeerId.slice(0, 12)}... not in current candidate set; forcing refresh.`);
925
+ discoveredPeers = await this._getPeers({ forceRefresh: true });
926
+ const refreshedSelection = selectCandidatePeersForRouting(discoveredPeers, requestProtocol, requestedModel, explicitProvider);
927
+ pinnedRoutingPeers = refreshedSelection.candidatePeers;
928
+ pinnedRoutePlans = refreshedSelection.routePlanByPeerId;
929
+ selectedPeer = pinnedRoutingPeers.find((p) => p.peerId.toLowerCase() === explicitPeerId) ?? null;
930
+ }
614
931
  if (!selectedPeer) {
615
932
  const source = serializedReq.headers['x-antseed-pin-peer'] ? 'x-antseed-pin-peer header' : '--peer flag';
933
+ const peerDiscovered = discoveredPeers.some((peer) => peer.peerId.toLowerCase() === explicitPeerId);
934
+ const protocolLabel = requestProtocol ? `protocol=${requestProtocol}` : 'protocol=unknown';
935
+ const providerLabel = explicitProvider ? `provider=${explicitProvider}` : 'provider=auto';
936
+ const modelLabel = requestedModel ? `model=${requestedModel}` : 'model=none';
937
+ const mismatchHint = peerDiscovered
938
+ ? `Peer is discoverable but filtered as incompatible (${protocolLabel}, ${providerLabel}, ${modelLabel}).`
939
+ : 'Peer is not discoverable right now.';
616
940
  log(`Pinned peer ${explicitPeerId.slice(0, 12)}... not found in candidate list (${source})`);
617
941
  res.writeHead(502, { 'content-type': 'text/plain' });
618
- res.end(`Pinned peer ${explicitPeerId.slice(0, 12)}... is not available or does not support this request.`);
942
+ res.end(`Pinned peer ${explicitPeerId.slice(0, 12)}... is not available or does not support this request. ${mismatchHint}`);
619
943
  return;
620
944
  }
621
945
  log(`Using pinned peer ${selectedPeer.peerId.slice(0, 12)}...`);
622
- const result = await this._dispatchToPeer(res, serializedReq, selectedPeer, routePlanByPeerId, requestProtocol, requestedModel, explicitProvider, router, RETRYABLE_STATUS_CODES);
946
+ const result = await this._dispatchToPeer(res, serializedReq, selectedPeer, routeKey, pinnedRoutePlans, requestProtocol, requestedModel, explicitProvider, router, RETRYABLE_STATUS_CODES, clientAbortController.signal);
623
947
  if (!result.done) {
948
+ this._forgetSuccessfulPeer(routeKey, selectedPeer.peerId);
624
949
  // Pinned peer returned a retryable error, but we don't retry — send error to client
625
950
  res.writeHead(result.statusCode, result.responseHeaders);
626
951
  res.end(result.responseBody);
@@ -630,16 +955,80 @@ export class BuyerProxy {
630
955
  // Non-pinned: retry with failover on provider errors
631
956
  const MAX_ATTEMPTS = 3;
632
957
  const triedPeerIds = new Set();
958
+ const preferredProviders = explicitProvider
959
+ ? []
960
+ : inferPreferredProvidersForRequest(requestProtocol, requestedModel);
961
+ const hasPreferredProviderCandidate = preferredProviders.length > 0
962
+ && routingPeers.some((peer) => {
963
+ const provider = routingPlans.get(peer.peerId)?.provider?.trim().toLowerCase();
964
+ return Boolean(provider && preferredProviders.includes(provider));
965
+ });
966
+ const restrictFailoverToPreferredProviders = preferredProviders.length > 0 && hasPreferredProviderCandidate;
967
+ if (restrictFailoverToPreferredProviders) {
968
+ log(`Provider-family failover lock active: [${preferredProviders.join(',')}]`);
969
+ }
633
970
  let lastStatusCode = 502;
634
971
  let lastResponseBody = null;
635
972
  let lastResponseHeaders = { 'content-type': 'text/plain' };
636
973
  for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
637
- const availableCandidates = candidatePeers.filter((p) => !triedPeerIds.has(p.peerId));
974
+ const availableCandidates = routingPeers.filter((peer) => {
975
+ if (triedPeerIds.has(peer.peerId)) {
976
+ return false;
977
+ }
978
+ if (!restrictFailoverToPreferredProviders) {
979
+ return true;
980
+ }
981
+ const provider = routingPlans.get(peer.peerId)?.provider?.trim().toLowerCase();
982
+ return Boolean(provider && preferredProviders.includes(provider));
983
+ });
638
984
  if (availableCandidates.length === 0)
639
985
  break;
640
986
  let selectedPeer = null;
641
- // Prefer local peers on first attempt
987
+ // Prefer a recently successful peer for the same request route.
642
988
  if (attempt === 0) {
989
+ const routePreferredPeerId = this._lastSuccessfulPeerByRouteKey.get(routeKey);
990
+ if (routePreferredPeerId) {
991
+ const remembered = availableCandidates.find((peer) => peer.peerId === routePreferredPeerId) ?? null;
992
+ if (remembered) {
993
+ selectedPeer = remembered;
994
+ log(`Reusing last successful route peer ${selectedPeer.peerId.slice(0, 12)}...`);
995
+ }
996
+ }
997
+ }
998
+ // Fallback to the latest globally successful peer.
999
+ if (!selectedPeer && attempt === 0 && this._lastSuccessfulPeerId && !requestedModel) {
1000
+ const remembered = availableCandidates.find((peer) => peer.peerId === this._lastSuccessfulPeerId) ?? null;
1001
+ if (remembered) {
1002
+ selectedPeer = remembered;
1003
+ log(`Reusing last successful peer ${selectedPeer.peerId.slice(0, 12)}...`);
1004
+ }
1005
+ }
1006
+ // Soft peer affinity: try caller-preferred peer first, but allow normal fallback.
1007
+ if (!selectedPeer && attempt === 0 && preferredPeerId) {
1008
+ const preferred = availableCandidates.find((peer) => peer.peerId.toLowerCase() === preferredPeerId) ?? null;
1009
+ if (preferred) {
1010
+ selectedPeer = preferred;
1011
+ log(`Preferring requested peer ${selectedPeer.peerId.slice(0, 12)}...`);
1012
+ }
1013
+ }
1014
+ // Strongly prefer providers that match the requested model family (e.g. claude-* -> claude/anthropic providers).
1015
+ if (!selectedPeer && attempt === 0 && preferredProviders.length > 0) {
1016
+ const providerMatchedPeers = availableCandidates.filter((peer) => {
1017
+ const plannedProvider = routingPlans.get(peer.peerId)?.provider?.trim().toLowerCase();
1018
+ return plannedProvider ? preferredProviders.includes(plannedProvider) : false;
1019
+ });
1020
+ if (providerMatchedPeers.length > 0) {
1021
+ selectedPeer = router
1022
+ ? router.selectPeer(serializedReq, providerMatchedPeers)
1023
+ : providerMatchedPeers[0] ?? null;
1024
+ if (selectedPeer) {
1025
+ const plannedProvider = routingPlans.get(selectedPeer.peerId)?.provider ?? 'unknown';
1026
+ log(`Preferring model-matched provider "${plannedProvider}" for model "${requestedModel ?? 'unknown'}"`);
1027
+ }
1028
+ }
1029
+ }
1030
+ // Prefer local peers on first attempt
1031
+ if (!selectedPeer && attempt === 0) {
643
1032
  const localPeers = availableCandidates.filter((peer) => isLoopbackPeer(peer));
644
1033
  if (localPeers.length > 0) {
645
1034
  selectedPeer = router
@@ -650,6 +1039,26 @@ export class BuyerProxy {
650
1039
  }
651
1040
  }
652
1041
  }
1042
+ // Prefer peers that can serve the request protocol directly without adapter transform.
1043
+ if (!selectedPeer && requestProtocol === 'anthropic-messages') {
1044
+ const shouldPreferDirect = !requestedModel || /claude|anthropic/i.test(requestedModel);
1045
+ if (shouldPreferDirect) {
1046
+ const directPeers = availableCandidates.filter((peer) => {
1047
+ const plan = routingPlans.get(peer.peerId);
1048
+ if (!plan)
1049
+ return false;
1050
+ return !plan.selection || !plan.selection.requiresTransform;
1051
+ });
1052
+ if (directPeers.length > 0) {
1053
+ selectedPeer = router
1054
+ ? router.selectPeer(serializedReq, directPeers)
1055
+ : directPeers[0] ?? null;
1056
+ if (selectedPeer) {
1057
+ log(`Preferring direct protocol peer ${selectedPeer.peerId.slice(0, 12)}...`);
1058
+ }
1059
+ }
1060
+ }
1061
+ }
653
1062
  if (!selectedPeer) {
654
1063
  selectedPeer = router
655
1064
  ? router.selectPeer(serializedReq, availableCandidates)
@@ -658,9 +1067,10 @@ export class BuyerProxy {
658
1067
  if (!selectedPeer)
659
1068
  break;
660
1069
  triedPeerIds.add(selectedPeer.peerId);
661
- const result = await this._dispatchToPeer(res, serializedReq, selectedPeer, routePlanByPeerId, requestProtocol, requestedModel, explicitProvider, router, RETRYABLE_STATUS_CODES);
1070
+ const result = await this._dispatchToPeer(res, serializedReq, selectedPeer, routeKey, routingPlans, requestProtocol, requestedModel, explicitProvider, router, RETRYABLE_STATUS_CODES, clientAbortController.signal);
662
1071
  if (result.done)
663
1072
  return;
1073
+ this._forgetSuccessfulPeer(routeKey, selectedPeer.peerId);
664
1074
  // Request failed with a retryable error — try another peer
665
1075
  lastStatusCode = result.statusCode;
666
1076
  lastResponseBody = result.responseBody;
@@ -677,7 +1087,7 @@ export class BuyerProxy {
677
1087
  res.end(lastResponseBody);
678
1088
  }
679
1089
  else {
680
- const diagnostics = this._formatPeerSelectionDiagnostics(candidatePeers);
1090
+ const diagnostics = this._formatPeerSelectionDiagnostics(routingPeers);
681
1091
  log('No peers available for request');
682
1092
  res.writeHead(502, { 'content-type': 'text/plain' });
683
1093
  res.end(`Router could not select a suitable peer. ${diagnostics}`);
@@ -689,13 +1099,13 @@ export class BuyerProxy {
689
1099
  * was sent to the client (success or non-retryable error), or retry info if the
690
1100
  * caller should try another peer.
691
1101
  */
692
- async _dispatchToPeer(res, serializedReq, selectedPeer, routePlanByPeerId, requestProtocol, requestedModel, explicitProvider, router, retryableStatusCodes) {
1102
+ async _dispatchToPeer(res, serializedReq, selectedPeer, routeKey, routePlanByPeerId, requestProtocol, requestedModel, explicitProvider, router, retryableStatusCodes, requestSignal) {
693
1103
  const selectedRoutePlan = routePlanByPeerId.get(selectedPeer.peerId)
694
1104
  ?? resolvePeerRoutePlan(selectedPeer, requestProtocol, requestedModel, explicitProvider);
695
1105
  if (!selectedRoutePlan) {
696
1106
  return { done: false, statusCode: 502, responseBody: Buffer.from('No compatible provider route'), responseHeaders: { 'content-type': 'text/plain' }, errorMessage: null };
697
1107
  }
698
- const { 'x-antseed-pin-peer': _pinPeer, ...headersForPeer } = serializedReq.headers;
1108
+ const { 'x-antseed-pin-peer': _pinPeer, 'x-antseed-prefer-peer': _preferPeer, ...headersForPeer } = serializedReq.headers;
699
1109
  let requestForPeer = {
700
1110
  ...serializedReq,
701
1111
  headers: {
@@ -734,6 +1144,7 @@ export class BuyerProxy {
734
1144
  return { done: true };
735
1145
  }
736
1146
  }
1147
+ log(`Outbound request shape: ${summarizeRequestShape(requestForPeer)}`);
737
1148
  log(`Routing to peer ${selectedPeer.peerId.slice(0, 12)}...`);
738
1149
  // Forward through P2P
739
1150
  const wantsStreaming = !forceDisableUpstreamStreaming
@@ -748,7 +1159,12 @@ export class BuyerProxy {
748
1159
  return;
749
1160
  streamed = true;
750
1161
  const streamingHeaders = attachStreamingAntseedHeaders(startResponse.headers, selectedPeer, requestForPeer.requestId);
1162
+ // Ensure SSE-friendly headers so intermediaries don't buffer
1163
+ /* streamingHeaders['cache-control'] = 'no-cache, no-transform'
1164
+ streamingHeaders['x-accel-buffering'] = 'no' */
751
1165
  res.writeHead(startResponse.statusCode, streamingHeaders);
1166
+ // Disable Nagle's algorithm on the underlying socket for low-latency streaming
1167
+ // res.socket?.setNoDelay(true)
752
1168
  if (startResponse.body.length > 0) {
753
1169
  res.write(Buffer.from(startResponse.body));
754
1170
  }
@@ -760,9 +1176,12 @@ export class BuyerProxy {
760
1176
  res.write(Buffer.from(chunk.data));
761
1177
  }
762
1178
  },
763
- });
1179
+ }, { signal: requestSignal });
764
1180
  const latencyMs = Date.now() - startTime;
765
1181
  log(`Response: ${response.statusCode} (${latencyMs}ms, ${response.body.length} bytes)`);
1182
+ if (response.statusCode >= 400) {
1183
+ log(`Upstream error detail: ${summarizeErrorResponse(response)}`);
1184
+ }
766
1185
  const telemetry = computeResponseTelemetry(requestForPeer, response.headers, response.body, selectedPeer);
767
1186
  if (router) {
768
1187
  router.onResult(selectedPeer, {
@@ -773,6 +1192,9 @@ export class BuyerProxy {
773
1192
  }
774
1193
  if (streamed) {
775
1194
  // Headers already sent to client, can't retry
1195
+ if (response.statusCode >= 200 && response.statusCode < 400) {
1196
+ this._rememberSuccessfulPeer(routeKey, selectedPeer.peerId);
1197
+ }
776
1198
  if (!res.writableEnded) {
777
1199
  res.end();
778
1200
  }
@@ -783,17 +1205,27 @@ export class BuyerProxy {
783
1205
  if (retryableStatusCodes.has(response.statusCode)) {
784
1206
  return { done: false, statusCode: response.statusCode, responseBody: Buffer.from(response.body), responseHeaders, errorMessage: null };
785
1207
  }
1208
+ if (response.statusCode >= 200 && response.statusCode < 400) {
1209
+ this._rememberSuccessfulPeer(routeKey, selectedPeer.peerId);
1210
+ }
786
1211
  res.writeHead(response.statusCode, responseHeaders);
787
1212
  res.end(Buffer.from(response.body));
788
1213
  return { done: true };
789
1214
  }
790
1215
  else {
791
- let response = await this._node.sendRequest(selectedPeer, requestForPeer);
1216
+ const upstreamResponse = await this._node.sendRequest(selectedPeer, requestForPeer, { signal: requestSignal });
1217
+ if (upstreamResponse.statusCode >= 400) {
1218
+ log(`Upstream raw error detail: ${summarizeErrorResponse(upstreamResponse)}`);
1219
+ }
1220
+ let response = upstreamResponse;
792
1221
  if (adaptResponse) {
793
1222
  response = adaptResponse(response);
794
1223
  }
795
1224
  const latencyMs = Date.now() - startTime;
796
1225
  log(`Response: ${response.statusCode} (${latencyMs}ms, ${response.body.length} bytes)`);
1226
+ if (response.statusCode >= 400) {
1227
+ log(`Upstream error detail: ${summarizeErrorResponse(response)}`);
1228
+ }
797
1229
  const telemetry = computeResponseTelemetry(requestForPeer, response.headers, response.body, selectedPeer);
798
1230
  const responseHeaders = attachAntseedTelemetryHeaders(response.headers, selectedPeer, telemetry, requestForPeer.requestId, latencyMs);
799
1231
  // Report result to router for learning
@@ -808,6 +1240,9 @@ export class BuyerProxy {
808
1240
  if (retryableStatusCodes.has(response.statusCode)) {
809
1241
  return { done: false, statusCode: response.statusCode, responseBody: Buffer.from(response.body), responseHeaders, errorMessage: null };
810
1242
  }
1243
+ if (response.statusCode >= 200 && response.statusCode < 400) {
1244
+ this._rememberSuccessfulPeer(routeKey, selectedPeer.peerId);
1245
+ }
811
1246
  // Forward response headers and body to the HTTP client
812
1247
  res.writeHead(response.statusCode, responseHeaders);
813
1248
  res.end(Buffer.from(response.body));
@@ -817,7 +1252,18 @@ export class BuyerProxy {
817
1252
  catch (err) {
818
1253
  const latencyMs = Date.now() - startTime;
819
1254
  const message = err instanceof Error ? err.message : String(err);
1255
+ const abortedLocally = requestSignal.aborted || /\baborted\b/i.test(message);
820
1256
  log(`Request failed after ${latencyMs}ms: ${message}`);
1257
+ if (abortedLocally) {
1258
+ log(`Request ${requestForPeer.requestId.slice(0, 8)} aborted locally; skipping retry, router penalty, and peer eviction.`);
1259
+ if (res.headersSent) {
1260
+ if (!res.writableEnded) {
1261
+ res.end();
1262
+ }
1263
+ return { done: true };
1264
+ }
1265
+ return { done: true };
1266
+ }
821
1267
  if (router) {
822
1268
  router.onResult(selectedPeer, {
823
1269
  success: false,
@@ -825,8 +1271,18 @@ export class BuyerProxy {
825
1271
  tokens: 0,
826
1272
  });
827
1273
  }
828
- // Evict only the failing peer others remain usable
829
- this._evictPeer(selectedPeer.peerId);
1274
+ // Avoid poisoning routing cache from control-plane model enumeration failures.
1275
+ // Some peers can time out on /v1/models while still serving inference paths.
1276
+ const normalizedPath = requestForPeer.path.toLowerCase();
1277
+ const isControlPlaneModelsRequest = normalizedPath.startsWith('/v1/models');
1278
+ if (isControlPlaneModelsRequest) {
1279
+ log(`Skipping peer eviction for control-plane failure on ${requestForPeer.path}`);
1280
+ }
1281
+ else {
1282
+ // Evict only the failing peer — others remain usable.
1283
+ this._evictPeer(selectedPeer.peerId);
1284
+ }
1285
+ this._forgetSuccessfulPeer(routeKey, selectedPeer.peerId);
830
1286
  if (res.headersSent) {
831
1287
  // Headers already sent (streaming), can't retry
832
1288
  if (!res.writableEnded) {