@antseed/cli 0.1.22 → 0.1.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,42 @@
1
1
  import { createServer } from 'node:http';
2
2
  import { randomUUID } from 'node:crypto';
3
- import { readFile } from 'node:fs/promises';
3
+ import { watch } from 'node:fs';
4
+ import { readFile, writeFile, rename, mkdir } from 'node:fs/promises';
4
5
  import { join } from 'node:path';
5
6
  import { homedir } from 'node:os';
6
7
  import { detectRequestModelApiProtocol, inferProviderDefaultModelApiProtocols, selectTargetProtocolForRequest, transformAnthropicMessagesRequestToOpenAIChat, transformOpenAIChatResponseToAnthropicMessage, } from './model-api-adapter.js';
7
8
  const DAEMON_STATE_FILE = join(homedir(), '.antseed', 'daemon.state.json');
9
+ const BUYER_STATE_FILE = join(homedir(), '.antseed', 'buyer.state.json');
8
10
  const DEBUG = () => ['1', 'true', 'yes', 'on'].includes((process.env['ANTSEED_DEBUG'] ?? '').trim().toLowerCase());
9
11
  function log(...args) {
10
12
  if (DEBUG())
11
13
  console.log('[proxy]', ...args);
12
14
  }
15
+ const CLAUDE_PROVIDER_PREFERENCE = ['claude-oauth', 'anthropic', 'claude-code'];
16
+ function inferPreferredProvidersForRequest(requestProtocol, requestedModel) {
17
+ const model = requestedModel?.trim().toLowerCase() ?? '';
18
+ if (model.length === 0) {
19
+ return [];
20
+ }
21
+ const providers = [];
22
+ const pushProvider = (value) => {
23
+ const provider = value?.trim().toLowerCase();
24
+ if (!provider || provider.length === 0 || providers.includes(provider)) {
25
+ return;
26
+ }
27
+ providers.push(provider);
28
+ };
29
+ const slashIndex = model.indexOf('/');
30
+ if (slashIndex > 0) {
31
+ pushProvider(model.slice(0, slashIndex));
32
+ }
33
+ if (requestProtocol === 'anthropic-messages' || model.startsWith('claude-') || model.includes('claude')) {
34
+ for (const provider of CLAUDE_PROVIDER_PREFERENCE) {
35
+ pushProvider(provider);
36
+ }
37
+ }
38
+ return providers;
39
+ }
13
40
  function getExplicitProviderOverride(request) {
14
41
  const provider = request.headers['x-antseed-provider']?.trim().toLowerCase();
15
42
  return provider && provider.length > 0 ? provider : null;
@@ -21,21 +48,35 @@ function getExplicitPeerIdOverride(request, sessionPinnedPeerId) {
21
48
  return header;
22
49
  return sessionPinnedPeerId?.toLowerCase() ?? null;
23
50
  }
51
+ function getPreferredPeerIdHint(request) {
52
+ const header = request.headers['x-antseed-prefer-peer']?.trim().toLowerCase();
53
+ if (!header || header.length === 0) {
54
+ return null;
55
+ }
56
+ return header;
57
+ }
24
58
  function getPeerProviderProtocols(peer, provider, requestedModel) {
59
+ const normalizedRequestedModel = requestedModel?.trim();
25
60
  const fromMetadata = peer.providerModelApiProtocols?.[provider]?.models;
26
61
  if (fromMetadata) {
27
- if (requestedModel && fromMetadata[requestedModel]?.length) {
28
- log(`Model match: peer ${peer.peerId.slice(0, 8)} provider=${provider} model="${requestedModel}" [${fromMetadata[requestedModel].join(',')}]`);
29
- return Array.from(new Set(fromMetadata[requestedModel]));
30
- }
31
- // If the peer advertises specific models and the requested model is not among them, return empty
32
- // so this peer is filtered out in favour of peers that actually serve the model.
33
- if (requestedModel && Object.keys(fromMetadata).length > 0) {
34
- log(`Model filter: peer ${peer.peerId.slice(0, 8)} provider=${provider} model="${requestedModel}" not in [${Object.keys(fromMetadata).join(',')}] filtered`);
35
- return [];
62
+ if (normalizedRequestedModel) {
63
+ const directMatchKey = Object.keys(fromMetadata).find((model) => model.toLowerCase() === normalizedRequestedModel.toLowerCase());
64
+ if (directMatchKey && fromMetadata[directMatchKey]?.length) {
65
+ log(`Model match: peer ${peer.peerId.slice(0, 8)} provider=${provider} model="${normalizedRequestedModel}" `
66
+ + `→ [${fromMetadata[directMatchKey].join(',')}]`);
67
+ return Array.from(new Set(fromMetadata[directMatchKey]));
68
+ }
69
+ if (Object.keys(fromMetadata).length > 0) {
70
+ log(`Model strict-miss: peer ${peer.peerId.slice(0, 8)} provider=${provider} model="${normalizedRequestedModel}" `
71
+ + 'not in metadata; excluding from route candidates.');
72
+ return [];
73
+ }
36
74
  }
37
75
  const merged = Object.values(fromMetadata).flat();
38
76
  if (merged.length > 0) {
77
+ if (requestedModel) {
78
+ log(`Model hint miss: peer ${peer.peerId.slice(0, 8)} provider=${provider} model="${requestedModel}" not in metadata; falling back to provider protocol set [${Array.from(new Set(merged)).join(',')}]`);
79
+ }
39
80
  return Array.from(new Set(merged));
40
81
  }
41
82
  }
@@ -245,6 +286,162 @@ function extractRequestedModel(request) {
245
286
  return null;
246
287
  }
247
288
  }
289
+ function decodeJsonBody(body) {
290
+ if (!body || body.length === 0) {
291
+ return null;
292
+ }
293
+ try {
294
+ const parsed = JSON.parse(new TextDecoder().decode(body));
295
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
296
+ return null;
297
+ }
298
+ return parsed;
299
+ }
300
+ catch {
301
+ return null;
302
+ }
303
+ }
304
+ function summarizeMessageShape(messagesRaw) {
305
+ if (!Array.isArray(messagesRaw)) {
306
+ return 'msgShape=none';
307
+ }
308
+ const roleCounts = new Map();
309
+ const contentKindCounts = new Map();
310
+ const blockTypeCounts = new Map();
311
+ let invalidMessages = 0;
312
+ let firstRole = 'none';
313
+ let lastRole = 'none';
314
+ const bump = (map, key) => {
315
+ map.set(key, (map.get(key) ?? 0) + 1);
316
+ };
317
+ for (const entry of messagesRaw) {
318
+ if (!entry || typeof entry !== 'object' || Array.isArray(entry)) {
319
+ invalidMessages += 1;
320
+ continue;
321
+ }
322
+ const message = entry;
323
+ const role = typeof message.role === 'string' && message.role.trim().length > 0
324
+ ? message.role.trim().toLowerCase()
325
+ : 'invalid-role';
326
+ bump(roleCounts, role);
327
+ if (firstRole === 'none') {
328
+ firstRole = role;
329
+ }
330
+ lastRole = role;
331
+ const content = message.content;
332
+ if (typeof content === 'string') {
333
+ bump(contentKindCounts, 'string');
334
+ continue;
335
+ }
336
+ if (Array.isArray(content)) {
337
+ bump(contentKindCounts, 'array');
338
+ for (const block of content) {
339
+ if (!block || typeof block !== 'object' || Array.isArray(block)) {
340
+ bump(blockTypeCounts, 'invalid');
341
+ continue;
342
+ }
343
+ const blockType = typeof block.type === 'string'
344
+ ? String(block.type).trim().toLowerCase()
345
+ : 'missing-type';
346
+ bump(blockTypeCounts, blockType || 'missing-type');
347
+ }
348
+ continue;
349
+ }
350
+ if (content && typeof content === 'object') {
351
+ bump(contentKindCounts, 'object');
352
+ continue;
353
+ }
354
+ bump(contentKindCounts, 'other');
355
+ }
356
+ const joinMap = (map) => ([...map.entries()]
357
+ .sort((left, right) => left[0].localeCompare(right[0]))
358
+ .map(([key, value]) => `${key}:${String(value)}`)
359
+ .join(','));
360
+ const roleSummary = joinMap(roleCounts) || 'none';
361
+ const contentSummary = joinMap(contentKindCounts) || 'none';
362
+ const blockSummary = joinMap(blockTypeCounts) || 'none';
363
+ return [
364
+ `msgShape=roles{${roleSummary}}`,
365
+ `content{${contentSummary}}`,
366
+ `blocks{${blockSummary}}`,
367
+ `firstRole=${firstRole}`,
368
+ `lastRole=${lastRole}`,
369
+ `invalidMsgs=${String(invalidMessages)}`,
370
+ ].join(' ');
371
+ }
372
+ function summarizeRequestShape(request) {
373
+ const contentType = (request.headers['content-type'] ?? request.headers['Content-Type'] ?? '').toLowerCase();
374
+ const accept = (request.headers['accept'] ?? request.headers['Accept'] ?? '').toLowerCase();
375
+ const providerHeader = request.headers['x-antseed-provider'] ?? 'none';
376
+ const preferPeerHeader = request.headers['x-antseed-prefer-peer'] ?? 'none';
377
+ const model = extractRequestedModel(request) ?? 'none';
378
+ const wantsStreaming = requestWantsStreaming(request.headers, request.body);
379
+ const baseParts = [
380
+ `method=${request.method}`,
381
+ `path=${request.path}`,
382
+ `provider=${providerHeader}`,
383
+ `preferPeer=${preferPeerHeader}`,
384
+ `contentType=${contentType || 'none'}`,
385
+ `accept=${accept || 'none'}`,
386
+ `stream=${String(wantsStreaming)}`,
387
+ `model=${model}`,
388
+ `bodyBytes=${String(request.body.length)}`,
389
+ ];
390
+ const jsonBody = decodeJsonBody(request.body);
391
+ if (!jsonBody) {
392
+ return baseParts.join(' ');
393
+ }
394
+ const messagesRaw = jsonBody.messages;
395
+ const toolsRaw = jsonBody.tools;
396
+ const messageCount = Array.isArray(messagesRaw) ? messagesRaw.length : 0;
397
+ const toolCount = Array.isArray(toolsRaw) ? toolsRaw.length : 0;
398
+ const maxTokens = Number(jsonBody.max_tokens ?? jsonBody.maxTokens);
399
+ const keys = Object.keys(jsonBody).sort().join(',');
400
+ baseParts.push(`messages=${String(messageCount)}`);
401
+ baseParts.push(`tools=${String(toolCount)}`);
402
+ if (Number.isFinite(maxTokens) && maxTokens > 0) {
403
+ baseParts.push(`maxTokens=${String(Math.floor(maxTokens))}`);
404
+ }
405
+ if (keys.length > 0) {
406
+ baseParts.push(`keys=[${keys}]`);
407
+ }
408
+ baseParts.push(summarizeMessageShape(messagesRaw));
409
+ return baseParts.join(' ');
410
+ }
411
+ function summarizeErrorResponse(response) {
412
+ const contentType = (response.headers['content-type'] ?? '').toLowerCase();
413
+ if (!response.body || response.body.length === 0) {
414
+ return 'empty response body';
415
+ }
416
+ const raw = new TextDecoder().decode(response.body).trim();
417
+ if (raw.length === 0) {
418
+ return 'empty response body';
419
+ }
420
+ if (contentType.includes('application/json')) {
421
+ try {
422
+ const parsed = JSON.parse(raw);
423
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
424
+ const object = parsed;
425
+ const nestedError = object.error && typeof object.error === 'object' && !Array.isArray(object.error)
426
+ ? object.error
427
+ : null;
428
+ const message = ((typeof nestedError?.message === 'string' && nestedError.message)
429
+ || (typeof object.message === 'string' && object.message)
430
+ || (typeof object.detail === 'string' && object.detail));
431
+ if (message) {
432
+ return `message="${message}"`;
433
+ }
434
+ }
435
+ }
436
+ catch {
437
+ // fall through to raw snippet
438
+ }
439
+ }
440
+ const compact = raw.replace(/\s+/g, ' ');
441
+ const maxChars = 280;
442
+ const snippet = compact.length > maxChars ? `${compact.slice(0, maxChars)}...` : compact;
443
+ return `body="${snippet}"`;
444
+ }
248
445
  function toFiniteNumberOrNull(value) {
249
446
  return typeof value === 'number' && Number.isFinite(value) ? value : null;
250
447
  }
@@ -369,6 +566,16 @@ function requestWantsStreaming(headers, body) {
369
566
  return false;
370
567
  }
371
568
  }
569
+ function isConnectionChurnError(message) {
570
+ return /connection .*?\b(closed|failed)\s+during request\b/i.test(message);
571
+ }
572
+ function isConnectionHealthy(state) {
573
+ if (!state) {
574
+ return false;
575
+ }
576
+ const normalized = String(state).toLowerCase();
577
+ return normalized === 'open' || normalized === 'authenticated' || normalized === 'connecting';
578
+ }
372
579
  function extractHostFromAddress(address) {
373
580
  const trimmed = address.trim();
374
581
  if (trimmed.length === 0)
@@ -393,6 +600,38 @@ function isLoopbackPeer(peer) {
393
600
  const host = extractHostFromAddress(peer.publicAddress);
394
601
  return isLoopbackHost(host);
395
602
  }
603
+ /**
604
+ * Rewrite the `model` field in a JSON request body.
605
+ * Also updates `content-length` if present in headers.
606
+ * Returns the original body/headers unchanged if the body is not JSON,
607
+ * is empty, or cannot be parsed.
608
+ */
609
+ export function rewriteModelInBody(body, headers, model) {
610
+ const contentType = (headers['content-type'] ?? headers['Content-Type'] ?? '').toLowerCase();
611
+ if (!contentType.includes('application/json') || body.length === 0) {
612
+ return { body, headers };
613
+ }
614
+ try {
615
+ const parsed = JSON.parse(new TextDecoder().decode(body));
616
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
617
+ return { body, headers };
618
+ }
619
+ const obj = parsed;
620
+ obj['model'] = model;
621
+ const rewritten = new TextEncoder().encode(JSON.stringify(obj));
622
+ const updatedHeaders = { ...headers };
623
+ if ('content-length' in updatedHeaders) {
624
+ updatedHeaders['content-length'] = String(rewritten.length);
625
+ }
626
+ else if ('Content-Length' in updatedHeaders) {
627
+ updatedHeaders['Content-Length'] = String(rewritten.length);
628
+ }
629
+ return { body: rewritten, headers: updatedHeaders };
630
+ }
631
+ catch {
632
+ return { body, headers };
633
+ }
634
+ }
396
635
  /**
397
636
  * Local HTTP proxy that forwards requests to P2P sellers.
398
637
  *
@@ -406,16 +645,25 @@ export class BuyerProxy {
406
645
  _port;
407
646
  _bgRefreshIntervalMs;
408
647
  _peerCacheTtlMs;
409
- _pinnedPeerId;
648
+ _pinnedPeer;
649
+ _pinnedModel;
650
+ _stateFileWatcher = null;
651
+ _stateWatchDebounce = null;
410
652
  _cachedPeers = [];
411
653
  _cacheLastUpdatedAtMs = 0;
654
+ _cacheMutationEpoch = 0;
655
+ _peerRefreshPromise = null;
656
+ _lastStaleCacheLogAtMs = 0;
412
657
  _bgRefreshHandle = null;
658
+ _lastSuccessfulPeerId = null;
659
+ _lastSuccessfulPeerByRouteKey = new Map();
413
660
  constructor(config) {
414
661
  this._node = config.node;
415
662
  this._port = config.port;
416
663
  this._bgRefreshIntervalMs = config.backgroundRefreshIntervalMs ?? 5 * 60_000;
417
664
  this._peerCacheTtlMs = Math.max(0, config.peerCacheTtlMs ?? 30_000);
418
- this._pinnedPeerId = config.pinnedPeerId?.toLowerCase();
665
+ this._pinnedPeer = config.pinnedPeerId?.toLowerCase() ?? null;
666
+ this._pinnedModel = config.pinnedModel?.trim() ?? null;
419
667
  this._server = createServer((req, res) => {
420
668
  this._handleRequest(req, res).catch((err) => {
421
669
  log('Unhandled error:', err);
@@ -435,52 +683,158 @@ export class BuyerProxy {
435
683
  });
436
684
  });
437
685
  this._startBackgroundRefresh();
686
+ await this._writeStateFile('connected');
687
+ this._watchStateFile();
438
688
  }
439
689
  async stop() {
690
+ if (this._stateWatchDebounce) {
691
+ clearTimeout(this._stateWatchDebounce);
692
+ this._stateWatchDebounce = null;
693
+ }
694
+ if (this._stateFileWatcher) {
695
+ this._stateFileWatcher.close();
696
+ this._stateFileWatcher = null;
697
+ }
440
698
  if (this._bgRefreshHandle) {
441
699
  clearInterval(this._bgRefreshHandle);
442
700
  this._bgRefreshHandle = null;
443
701
  }
702
+ await this._writeStateFile('stopped');
444
703
  return new Promise((resolve) => {
445
704
  this._server.close(() => resolve());
446
705
  });
447
706
  }
448
- _startBackgroundRefresh() {
449
- this._bgRefreshHandle = setInterval(() => {
450
- this._node.discoverPeers().then((peers) => {
451
- if (peers.length > 0) {
452
- this._mergePeers(peers);
453
- }
454
- }).catch(() => { });
455
- }, this._bgRefreshIntervalMs);
707
+ _watchStateFile() {
708
+ try {
709
+ this._stateFileWatcher = watch(BUYER_STATE_FILE, { persistent: false }, () => {
710
+ if (this._stateWatchDebounce)
711
+ clearTimeout(this._stateWatchDebounce);
712
+ this._stateWatchDebounce = setTimeout(() => {
713
+ this._stateWatchDebounce = null;
714
+ void this._reloadSessionOverrides().catch(() => { });
715
+ }, 50);
716
+ });
717
+ this._stateFileWatcher.on('error', () => {
718
+ // watcher error is non-fatal
719
+ });
720
+ }
721
+ catch {
722
+ // watcher setup failed; non-fatal
723
+ }
456
724
  }
457
- _mergePeers(incoming) {
458
- const existing = new Map(this._cachedPeers.map((p) => [p.peerId, p]));
459
- let added = 0;
460
- for (const p of incoming) {
461
- if (!existing.has(p.peerId)) {
462
- existing.set(p.peerId, p);
463
- added++;
725
+ async _reloadSessionOverrides() {
726
+ try {
727
+ const raw = await readFile(BUYER_STATE_FILE, 'utf-8');
728
+ const parsed = JSON.parse(raw);
729
+ const pinnedModel = typeof parsed.pinnedModel === 'string' && parsed.pinnedModel.trim().length > 0
730
+ ? parsed.pinnedModel.trim()
731
+ : null;
732
+ const pinnedPeer = typeof parsed.pinnedPeerId === 'string' && parsed.pinnedPeerId.trim().length > 0
733
+ ? parsed.pinnedPeerId.trim().toLowerCase()
734
+ : null;
735
+ this._pinnedModel = pinnedModel;
736
+ this._pinnedPeer = pinnedPeer;
737
+ log(`Session overrides reloaded: model=${pinnedModel ?? 'none'} peer=${pinnedPeer ?? 'none'}`);
738
+ }
739
+ catch {
740
+ // state file unreadable; keep current values
741
+ }
742
+ }
743
+ async _writeStateFile(state) {
744
+ try {
745
+ const dir = join(homedir(), '.antseed');
746
+ await mkdir(dir, { recursive: true });
747
+ let existing = {};
748
+ try {
749
+ const raw = await readFile(BUYER_STATE_FILE, 'utf-8');
750
+ existing = JSON.parse(raw);
751
+ }
752
+ catch {
753
+ // file doesn't exist yet
464
754
  }
755
+ // When stopping, preserve whatever pinnedModel/pinnedPeerId is already
756
+ // in the file — the debounce may have been cancelled before
757
+ // _reloadSessionOverrides could commit the latest CLI-written values.
758
+ const sessionOverrides = state === 'connected'
759
+ ? { pinnedModel: this._pinnedModel, pinnedPeerId: this._pinnedPeer }
760
+ : {};
761
+ const data = {
762
+ ...existing,
763
+ state,
764
+ pid: process.pid,
765
+ port: this._port,
766
+ ...sessionOverrides,
767
+ };
768
+ const tmp = join(homedir(), '.antseed', `.buyer.state.${randomUUID()}.json.tmp`);
769
+ await writeFile(tmp, JSON.stringify(data, null, 2));
770
+ await rename(tmp, BUYER_STATE_FILE);
465
771
  }
466
- if (added > 0) {
467
- this._cachedPeers = Array.from(existing.values());
468
- this._cacheLastUpdatedAtMs = Date.now();
469
- log(`[background] Merged ${added} new peer(s) into cache (total: ${this._cachedPeers.length})`);
772
+ catch {
773
+ // non-fatal
470
774
  }
471
775
  }
776
+ _startBackgroundRefresh() {
777
+ this._bgRefreshHandle = setInterval(() => {
778
+ void this._refreshPeersNow().catch(() => {
779
+ // background refresh failure is non-fatal
780
+ });
781
+ }, this._bgRefreshIntervalMs);
782
+ }
472
783
  _replacePeers(incoming) {
473
784
  this._cachedPeers = incoming;
474
785
  this._cacheLastUpdatedAtMs = Date.now();
786
+ this._cacheMutationEpoch += 1;
475
787
  }
476
788
  _evictPeer(peerId) {
477
789
  const before = this._cachedPeers.length;
478
790
  this._cachedPeers = this._cachedPeers.filter((p) => p.peerId !== peerId);
479
791
  if (this._cachedPeers.length < before) {
480
792
  this._cacheLastUpdatedAtMs = Date.now();
793
+ this._cacheMutationEpoch += 1;
481
794
  log(`Evicted failing peer ${peerId.slice(0, 12)}... from cache (${this._cachedPeers.length} remaining)`);
482
795
  }
483
796
  }
797
+ _rememberSuccessfulPeer(routeKey, peerId) {
798
+ this._lastSuccessfulPeerId = peerId;
799
+ this._lastSuccessfulPeerByRouteKey.set(routeKey, peerId);
800
+ // Keep map bounded to prevent unbounded growth from long-running sessions.
801
+ const MAX_ROUTE_HISTORY = 200;
802
+ if (this._lastSuccessfulPeerByRouteKey.size > MAX_ROUTE_HISTORY) {
803
+ const oldestKey = this._lastSuccessfulPeerByRouteKey.keys().next().value;
804
+ if (typeof oldestKey === 'string') {
805
+ this._lastSuccessfulPeerByRouteKey.delete(oldestKey);
806
+ }
807
+ }
808
+ }
809
+ _forgetSuccessfulPeer(routeKey, peerId) {
810
+ const rememberedForRoute = this._lastSuccessfulPeerByRouteKey.get(routeKey);
811
+ if (rememberedForRoute === peerId) {
812
+ this._lastSuccessfulPeerByRouteKey.delete(routeKey);
813
+ }
814
+ if (this._lastSuccessfulPeerId === peerId) {
815
+ const stillUsedByOtherRoute = Array.from(this._lastSuccessfulPeerByRouteKey.values())
816
+ .some((rememberedPeerId) => rememberedPeerId === peerId);
817
+ if (!stillUsedByOtherRoute) {
818
+ this._lastSuccessfulPeerId = null;
819
+ }
820
+ }
821
+ }
822
+ _buildRouteKey(path, requestProtocol, requestedModel, explicitProvider) {
823
+ const normalizedPath = path.split('?')[0]?.trim().toLowerCase() ?? '/';
824
+ const pathGroup = (normalizedPath.startsWith('/v1/messages')
825
+ ? '/v1/messages'
826
+ : normalizedPath.startsWith('/v1/chat/completions')
827
+ ? '/v1/chat/completions'
828
+ : normalizedPath.startsWith('/v1/models')
829
+ ? '/v1/models'
830
+ : normalizedPath);
831
+ return [
832
+ pathGroup,
833
+ requestProtocol ?? 'unknown-protocol',
834
+ requestedModel ?? 'unknown-model',
835
+ explicitProvider ?? 'auto-provider',
836
+ ].join('|');
837
+ }
484
838
  async _readLocalSeederFallback() {
485
839
  try {
486
840
  const raw = await readFile(DAEMON_STATE_FILE, 'utf-8');
@@ -524,44 +878,67 @@ export class BuyerProxy {
524
878
  return null;
525
879
  }
526
880
  }
527
- async _discoverAndCachePeers() {
881
+ async _discoverPeersFromNetwork() {
528
882
  const localSeeder = await this._readLocalSeederFallback();
529
883
  if (localSeeder) {
530
- this._replacePeers([localSeeder]);
531
884
  log(`Using local seeder ${localSeeder.peerId.slice(0, 12)}... @ ${localSeeder.publicAddress} (skipping DHT lookup)`);
532
- return this._cachedPeers;
885
+ return [localSeeder];
533
886
  }
534
887
  log('Discovering peers via DHT...');
535
888
  const peers = await this._node.discoverPeers();
536
- this._replacePeers(peers);
537
889
  if (peers.length > 0) {
538
890
  log(`Found ${peers.length} peer(s)`);
539
891
  }
540
892
  return peers;
541
893
  }
894
+ async _refreshPeersNow() {
895
+ if (this._peerRefreshPromise) {
896
+ return this._peerRefreshPromise;
897
+ }
898
+ const previousCachedPeers = [...this._cachedPeers];
899
+ const mutationEpochAtStart = this._cacheMutationEpoch;
900
+ this._peerRefreshPromise = (async () => {
901
+ const peers = await this._discoverPeersFromNetwork();
902
+ if (peers.length > 0) {
903
+ this._replacePeers(peers);
904
+ return peers;
905
+ }
906
+ const fallbackPeers = previousCachedPeers.length > 0 && this._cacheMutationEpoch === mutationEpochAtStart
907
+ ? [...previousCachedPeers]
908
+ : [];
909
+ if (fallbackPeers.length > 0) {
910
+ // Preserve stale cache as fallback when discovery transiently fails.
911
+ log('Discovery returned 0 peers; preserving most-recent cached peers as fallback.');
912
+ this._replacePeers(fallbackPeers);
913
+ return fallbackPeers;
914
+ }
915
+ return peers;
916
+ })().finally(() => {
917
+ this._peerRefreshPromise = null;
918
+ });
919
+ return this._peerRefreshPromise;
920
+ }
542
921
  async _getPeers(options) {
543
922
  const forceRefresh = options?.forceRefresh === true;
544
923
  const cacheAgeMs = Date.now() - this._cacheLastUpdatedAtMs;
545
924
  const cacheFresh = this._cacheLastUpdatedAtMs > 0 && cacheAgeMs <= this._peerCacheTtlMs;
546
- const previousCachedPeers = this._cachedPeers;
547
- if (!forceRefresh && this._cachedPeers.length > 0 && cacheFresh) {
548
- return this._cachedPeers;
549
- }
550
- // Cache is empty, stale, or a forced refresh was requested.
551
- if (!forceRefresh && this._cachedPeers.length > 0) {
552
- log(`Peer cache stale (${cacheAgeMs}ms old); refreshing before routing.`);
553
- }
554
- else if (forceRefresh) {
925
+ if (forceRefresh) {
555
926
  log('Forcing peer refresh before routing.');
927
+ return this._refreshPeersNow();
556
928
  }
557
- const peers = await this._discoverAndCachePeers();
558
- if (peers.length === 0 && previousCachedPeers.length > 0) {
559
- // Preserve stale cache as fallback when discovery transiently fails.
560
- log('Discovery returned 0 peers; keeping previous cached peers as fallback.');
561
- this._replacePeers(previousCachedPeers);
562
- return previousCachedPeers;
929
+ if (this._cachedPeers.length > 0) {
930
+ if (cacheFresh) {
931
+ return this._cachedPeers;
932
+ }
933
+ const now = Date.now();
934
+ if (now - this._lastStaleCacheLogAtMs >= 10_000) {
935
+ this._lastStaleCacheLogAtMs = now;
936
+ log(`Peer cache stale (${cacheAgeMs}ms old); routing from cached peers.`);
937
+ }
938
+ return this._cachedPeers;
563
939
  }
564
- return peers;
940
+ // No cached peers yet — block on initial discovery.
941
+ return this._refreshPeersNow();
565
942
  }
566
943
  _formatPeerSelectionDiagnostics(peers) {
567
944
  if (peers.length === 0) {
@@ -604,13 +981,42 @@ export class BuyerProxy {
604
981
  }
605
982
  // Remove host header (points to localhost, not the seller)
606
983
  delete headers['host'];
607
- const serializedReq = {
984
+ let serializedReq = {
608
985
  requestId: randomUUID(),
609
986
  method,
610
987
  path,
611
988
  headers,
612
989
  body: new Uint8Array(body),
613
990
  };
991
+ // Snapshot both session overrides together before any await so a concurrent
992
+ // _reloadSessionOverrides() cannot produce a model/peer mismatch mid-request.
993
+ const effectivePinnedModel = this._pinnedModel;
994
+ const effectivePinnedPeer = this._pinnedPeer;
995
+ if (effectivePinnedModel) {
996
+ const { body: rewrittenBody, headers: rewrittenHeaders } = rewriteModelInBody(serializedReq.body, serializedReq.headers, effectivePinnedModel);
997
+ if (rewrittenBody !== serializedReq.body) {
998
+ serializedReq = { ...serializedReq, body: rewrittenBody, headers: rewrittenHeaders };
999
+ log(`Model override applied: ${effectivePinnedModel}`);
1000
+ }
1001
+ }
1002
+ const clientAbortController = new AbortController();
1003
+ const onClientAbort = () => {
1004
+ if (clientAbortController.signal.aborted) {
1005
+ return;
1006
+ }
1007
+ clientAbortController.abort();
1008
+ log(`Client disconnected; aborting upstream request reqId=${serializedReq.requestId.slice(0, 8)}`);
1009
+ };
1010
+ req.once('close', () => {
1011
+ if (!req.complete && !res.writableEnded) {
1012
+ onClientAbort();
1013
+ }
1014
+ });
1015
+ res.once('close', () => {
1016
+ if (!res.writableEnded) {
1017
+ onClientAbort();
1018
+ }
1019
+ });
614
1020
  // Discover peers
615
1021
  const peers = await this._getPeers();
616
1022
  if (peers.length === 0) {
@@ -623,17 +1029,30 @@ export class BuyerProxy {
623
1029
  const requestedModel = extractRequestedModel(serializedReq);
624
1030
  log(`Routing: protocol=${requestProtocol ?? 'null'} model=${requestedModel ?? 'null'}`);
625
1031
  const explicitProvider = getExplicitProviderOverride(serializedReq);
626
- const explicitPeerId = getExplicitPeerIdOverride(serializedReq, this._pinnedPeerId);
627
- const { candidatePeers, routePlanByPeerId, } = selectCandidatePeersForRouting(peers, requestProtocol, requestedModel, explicitProvider);
1032
+ const explicitPeerId = getExplicitPeerIdOverride(serializedReq, effectivePinnedPeer ?? undefined);
1033
+ const preferredPeerId = getPreferredPeerIdHint(serializedReq);
1034
+ log(`Routing hints: provider=${explicitProvider ?? 'auto'} pin-peer=${explicitPeerId ?? 'none'} prefer-peer=${preferredPeerId ?? 'none'}`);
1035
+ const routeKey = this._buildRouteKey(serializedReq.path, requestProtocol, requestedModel, explicitProvider);
1036
+ const selectPeers = (candidateSources) => selectCandidatePeersForRouting(candidateSources, requestProtocol, requestedModel, explicitProvider);
1037
+ let hasForcedRefresh = false;
1038
+ const refreshPeerSelection = async (reason) => {
1039
+ if (hasForcedRefresh) {
1040
+ return;
1041
+ }
1042
+ hasForcedRefresh = true;
1043
+ log(`Forcing peer refresh before routing after ${reason}.`);
1044
+ discoveredPeers = await this._getPeers({ forceRefresh: true });
1045
+ ({
1046
+ candidatePeers: routingPeers,
1047
+ routePlanByPeerId: routingPlans,
1048
+ } = selectPeers(discoveredPeers));
1049
+ };
1050
+ let { candidatePeers, routePlanByPeerId, } = selectPeers(peers);
628
1051
  let routingPeers = candidatePeers;
629
1052
  let routingPlans = routePlanByPeerId;
630
1053
  let discoveredPeers = peers;
631
1054
  if (routingPeers.length === 0) {
632
- // One forced refresh handles stale-cache routing mismatches (e.g. missing provider/model updates).
633
- discoveredPeers = await this._getPeers({ forceRefresh: true });
634
- const refreshedSelection = selectCandidatePeersForRouting(discoveredPeers, requestProtocol, requestedModel, explicitProvider);
635
- routingPeers = refreshedSelection.candidatePeers;
636
- routingPlans = refreshedSelection.routePlanByPeerId;
1055
+ await refreshPeerSelection('empty initial routing candidate set');
637
1056
  }
638
1057
  if (routingPeers.length === 0) {
639
1058
  const diagnostics = this._formatPeerSelectionDiagnostics(discoveredPeers);
@@ -648,19 +1067,40 @@ export class BuyerProxy {
648
1067
  }
649
1068
  return;
650
1069
  }
1070
+ const preferredProviders = explicitProvider
1071
+ ? []
1072
+ : inferPreferredProvidersForRequest(requestProtocol, requestedModel);
1073
+ let hasPreferredProviderCandidate = preferredProviders.length > 0
1074
+ && routingPeers.some((peer) => {
1075
+ const provider = routingPlans.get(peer.peerId)?.provider?.trim().toLowerCase();
1076
+ return Boolean(provider && preferredProviders.includes(provider));
1077
+ });
1078
+ if (preferredProviders.length > 0 && !hasPreferredProviderCandidate) {
1079
+ await refreshPeerSelection(`missing preferred providers [${preferredProviders.join(',')}]`);
1080
+ hasPreferredProviderCandidate = routingPeers.some((peer) => {
1081
+ const provider = routingPlans.get(peer.peerId)?.provider?.trim().toLowerCase();
1082
+ return Boolean(provider && preferredProviders.includes(provider));
1083
+ });
1084
+ }
1085
+ if (routingPeers.length === 0) {
1086
+ const diagnostics = this._formatPeerSelectionDiagnostics(discoveredPeers);
1087
+ res.writeHead(502, { 'content-type': 'text/plain' });
1088
+ const providerLabel = explicitProvider ? ` for provider "${explicitProvider}"` : '';
1089
+ res.end(`No peers support ${requestProtocol ?? 'this request'}${providerLabel}. ${diagnostics}`);
1090
+ return;
1091
+ }
1092
+ log(`Routing candidates: ${routingPeers.length} peer(s)`);
651
1093
  // Select peer: explicit pin bypasses the router (and retry)
652
1094
  const router = this._node.router;
653
- const RETRYABLE_STATUS_CODES = new Set([400, 404, 408, 429, 500, 502, 503, 504]);
1095
+ const RETRYABLE_STATUS_CODES = new Set([408, 429, 500, 502, 503, 504]);
654
1096
  if (explicitPeerId) {
655
1097
  let pinnedRoutingPeers = routingPeers;
656
1098
  let pinnedRoutePlans = routingPlans;
657
1099
  let selectedPeer = pinnedRoutingPeers.find((p) => p.peerId.toLowerCase() === explicitPeerId) ?? null;
658
1100
  if (!selectedPeer) {
659
- log(`Pinned peer ${explicitPeerId.slice(0, 12)}... not in current candidate set; forcing refresh.`);
660
- discoveredPeers = await this._getPeers({ forceRefresh: true });
661
- const refreshedSelection = selectCandidatePeersForRouting(discoveredPeers, requestProtocol, requestedModel, explicitProvider);
662
- pinnedRoutingPeers = refreshedSelection.candidatePeers;
663
- pinnedRoutePlans = refreshedSelection.routePlanByPeerId;
1101
+ await refreshPeerSelection(`pinned peer ${explicitPeerId.slice(0, 12)}... not in candidate set`);
1102
+ pinnedRoutingPeers = routingPeers;
1103
+ pinnedRoutePlans = routingPlans;
664
1104
  selectedPeer = pinnedRoutingPeers.find((p) => p.peerId.toLowerCase() === explicitPeerId) ?? null;
665
1105
  }
666
1106
  if (!selectedPeer) {
@@ -678,8 +1118,9 @@ export class BuyerProxy {
678
1118
  return;
679
1119
  }
680
1120
  log(`Using pinned peer ${selectedPeer.peerId.slice(0, 12)}...`);
681
- const result = await this._dispatchToPeer(res, serializedReq, selectedPeer, pinnedRoutePlans, requestProtocol, requestedModel, explicitProvider, router, RETRYABLE_STATUS_CODES);
1121
+ const result = await this._dispatchToPeer(res, serializedReq, selectedPeer, routeKey, pinnedRoutePlans, requestProtocol, requestedModel, explicitProvider, router, RETRYABLE_STATUS_CODES, clientAbortController.signal);
682
1122
  if (!result.done) {
1123
+ this._forgetSuccessfulPeer(routeKey, selectedPeer.peerId);
683
1124
  // Pinned peer returned a retryable error, but we don't retry — send error to client
684
1125
  res.writeHead(result.statusCode, result.responseHeaders);
685
1126
  res.end(result.responseBody);
@@ -689,16 +1130,76 @@ export class BuyerProxy {
689
1130
  // Non-pinned: retry with failover on provider errors
690
1131
  const MAX_ATTEMPTS = 3;
691
1132
  const triedPeerIds = new Set();
1133
+ const restrictFailoverToPreferredProviders = preferredProviders.length > 0 && hasPreferredProviderCandidate;
1134
+ if (restrictFailoverToPreferredProviders) {
1135
+ log(`Provider-family preference active (attempt 1): [${preferredProviders.join(',')}]`);
1136
+ }
692
1137
  let lastStatusCode = 502;
693
1138
  let lastResponseBody = null;
694
1139
  let lastResponseHeaders = { 'content-type': 'text/plain' };
695
1140
  for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
696
- const availableCandidates = routingPeers.filter((p) => !triedPeerIds.has(p.peerId));
1141
+ const limitToPreferredProviders = restrictFailoverToPreferredProviders && attempt === 0;
1142
+ if (restrictFailoverToPreferredProviders && attempt === 1) {
1143
+ log('Preferred provider attempt failed; expanding failover to all compatible providers.');
1144
+ }
1145
+ const availableCandidates = routingPeers.filter((peer) => {
1146
+ if (triedPeerIds.has(peer.peerId)) {
1147
+ return false;
1148
+ }
1149
+ if (!limitToPreferredProviders) {
1150
+ return true;
1151
+ }
1152
+ const provider = routingPlans.get(peer.peerId)?.provider?.trim().toLowerCase();
1153
+ return Boolean(provider && preferredProviders.includes(provider));
1154
+ });
697
1155
  if (availableCandidates.length === 0)
698
1156
  break;
699
1157
  let selectedPeer = null;
700
- // Prefer local peers on first attempt
1158
+ // Prefer a recently successful peer for the same request route.
701
1159
  if (attempt === 0) {
1160
+ const routePreferredPeerId = this._lastSuccessfulPeerByRouteKey.get(routeKey);
1161
+ if (routePreferredPeerId) {
1162
+ const remembered = availableCandidates.find((peer) => peer.peerId === routePreferredPeerId) ?? null;
1163
+ if (remembered) {
1164
+ selectedPeer = remembered;
1165
+ log(`Reusing last successful route peer ${selectedPeer.peerId.slice(0, 12)}...`);
1166
+ }
1167
+ }
1168
+ }
1169
+ // Fallback to the latest globally successful peer.
1170
+ if (!selectedPeer && attempt === 0 && this._lastSuccessfulPeerId && !requestedModel) {
1171
+ const remembered = availableCandidates.find((peer) => peer.peerId === this._lastSuccessfulPeerId) ?? null;
1172
+ if (remembered) {
1173
+ selectedPeer = remembered;
1174
+ log(`Reusing last successful peer ${selectedPeer.peerId.slice(0, 12)}...`);
1175
+ }
1176
+ }
1177
+ // Soft peer affinity: try caller-preferred peer first, but allow normal fallback.
1178
+ if (!selectedPeer && attempt === 0 && preferredPeerId) {
1179
+ const preferred = availableCandidates.find((peer) => peer.peerId.toLowerCase() === preferredPeerId) ?? null;
1180
+ if (preferred) {
1181
+ selectedPeer = preferred;
1182
+ log(`Preferring requested peer ${selectedPeer.peerId.slice(0, 12)}...`);
1183
+ }
1184
+ }
1185
+ // Strongly prefer providers that match the requested model family (e.g. claude-* -> claude/anthropic providers).
1186
+ if (!selectedPeer && attempt === 0 && preferredProviders.length > 0) {
1187
+ const providerMatchedPeers = availableCandidates.filter((peer) => {
1188
+ const plannedProvider = routingPlans.get(peer.peerId)?.provider?.trim().toLowerCase();
1189
+ return plannedProvider ? preferredProviders.includes(plannedProvider) : false;
1190
+ });
1191
+ if (providerMatchedPeers.length > 0) {
1192
+ selectedPeer = router
1193
+ ? router.selectPeer(serializedReq, providerMatchedPeers)
1194
+ : providerMatchedPeers[0] ?? null;
1195
+ if (selectedPeer) {
1196
+ const plannedProvider = routingPlans.get(selectedPeer.peerId)?.provider ?? 'unknown';
1197
+ log(`Preferring model-matched provider "${plannedProvider}" for model "${requestedModel ?? 'unknown'}"`);
1198
+ }
1199
+ }
1200
+ }
1201
+ // Prefer local peers on first attempt
1202
+ if (!selectedPeer && attempt === 0) {
702
1203
  const localPeers = availableCandidates.filter((peer) => isLoopbackPeer(peer));
703
1204
  if (localPeers.length > 0) {
704
1205
  selectedPeer = router
@@ -709,6 +1210,26 @@ export class BuyerProxy {
709
1210
  }
710
1211
  }
711
1212
  }
1213
+ // Prefer peers that can serve the request protocol directly without adapter transform.
1214
+ if (!selectedPeer && requestProtocol === 'anthropic-messages') {
1215
+ const shouldPreferDirect = !requestedModel || /claude|anthropic/i.test(requestedModel);
1216
+ if (shouldPreferDirect) {
1217
+ const directPeers = availableCandidates.filter((peer) => {
1218
+ const plan = routingPlans.get(peer.peerId);
1219
+ if (!plan)
1220
+ return false;
1221
+ return !plan.selection || !plan.selection.requiresTransform;
1222
+ });
1223
+ if (directPeers.length > 0) {
1224
+ selectedPeer = router
1225
+ ? router.selectPeer(serializedReq, directPeers)
1226
+ : directPeers[0] ?? null;
1227
+ if (selectedPeer) {
1228
+ log(`Preferring direct protocol peer ${selectedPeer.peerId.slice(0, 12)}...`);
1229
+ }
1230
+ }
1231
+ }
1232
+ }
712
1233
  if (!selectedPeer) {
713
1234
  selectedPeer = router
714
1235
  ? router.selectPeer(serializedReq, availableCandidates)
@@ -717,9 +1238,10 @@ export class BuyerProxy {
717
1238
  if (!selectedPeer)
718
1239
  break;
719
1240
  triedPeerIds.add(selectedPeer.peerId);
720
- const result = await this._dispatchToPeer(res, serializedReq, selectedPeer, routePlanByPeerId, requestProtocol, requestedModel, explicitProvider, router, RETRYABLE_STATUS_CODES);
1241
+ const result = await this._dispatchToPeer(res, serializedReq, selectedPeer, routeKey, routingPlans, requestProtocol, requestedModel, explicitProvider, router, RETRYABLE_STATUS_CODES, clientAbortController.signal);
721
1242
  if (result.done)
722
1243
  return;
1244
+ this._forgetSuccessfulPeer(routeKey, selectedPeer.peerId);
723
1245
  // Request failed with a retryable error — try another peer
724
1246
  lastStatusCode = result.statusCode;
725
1247
  lastResponseBody = result.responseBody;
@@ -748,13 +1270,13 @@ export class BuyerProxy {
748
1270
  * was sent to the client (success or non-retryable error), or retry info if the
749
1271
  * caller should try another peer.
750
1272
  */
751
- async _dispatchToPeer(res, serializedReq, selectedPeer, routePlanByPeerId, requestProtocol, requestedModel, explicitProvider, router, retryableStatusCodes) {
1273
+ async _dispatchToPeer(res, serializedReq, selectedPeer, routeKey, routePlanByPeerId, requestProtocol, requestedModel, explicitProvider, router, retryableStatusCodes, requestSignal) {
752
1274
  const selectedRoutePlan = routePlanByPeerId.get(selectedPeer.peerId)
753
1275
  ?? resolvePeerRoutePlan(selectedPeer, requestProtocol, requestedModel, explicitProvider);
754
1276
  if (!selectedRoutePlan) {
755
1277
  return { done: false, statusCode: 502, responseBody: Buffer.from('No compatible provider route'), responseHeaders: { 'content-type': 'text/plain' }, errorMessage: null };
756
1278
  }
757
- const { 'x-antseed-pin-peer': _pinPeer, ...headersForPeer } = serializedReq.headers;
1279
+ const { 'x-antseed-pin-peer': _pinPeer, 'x-antseed-prefer-peer': _preferPeer, ...headersForPeer } = serializedReq.headers;
758
1280
  let requestForPeer = {
759
1281
  ...serializedReq,
760
1282
  headers: {
@@ -793,6 +1315,9 @@ export class BuyerProxy {
793
1315
  return { done: true };
794
1316
  }
795
1317
  }
1318
+ if (DEBUG()) {
1319
+ log(`Outbound request shape: ${summarizeRequestShape(requestForPeer)}`);
1320
+ }
796
1321
  log(`Routing to peer ${selectedPeer.peerId.slice(0, 12)}...`);
797
1322
  // Forward through P2P
798
1323
  const wantsStreaming = !forceDisableUpstreamStreaming
@@ -807,12 +1332,7 @@ export class BuyerProxy {
807
1332
  return;
808
1333
  streamed = true;
809
1334
  const streamingHeaders = attachStreamingAntseedHeaders(startResponse.headers, selectedPeer, requestForPeer.requestId);
810
- // Ensure SSE-friendly headers so intermediaries don't buffer
811
- /* streamingHeaders['cache-control'] = 'no-cache, no-transform'
812
- streamingHeaders['x-accel-buffering'] = 'no' */
813
1335
  res.writeHead(startResponse.statusCode, streamingHeaders);
814
- // Disable Nagle's algorithm on the underlying socket for low-latency streaming
815
- // res.socket?.setNoDelay(true)
816
1336
  if (startResponse.body.length > 0) {
817
1337
  res.write(Buffer.from(startResponse.body));
818
1338
  }
@@ -824,9 +1344,12 @@ export class BuyerProxy {
824
1344
  res.write(Buffer.from(chunk.data));
825
1345
  }
826
1346
  },
827
- });
1347
+ }, { signal: requestSignal });
828
1348
  const latencyMs = Date.now() - startTime;
829
1349
  log(`Response: ${response.statusCode} (${latencyMs}ms, ${response.body.length} bytes)`);
1350
+ if (response.statusCode >= 400) {
1351
+ log(`Upstream error detail: ${summarizeErrorResponse(response)}`);
1352
+ }
830
1353
  const telemetry = computeResponseTelemetry(requestForPeer, response.headers, response.body, selectedPeer);
831
1354
  if (router) {
832
1355
  router.onResult(selectedPeer, {
@@ -837,6 +1360,9 @@ export class BuyerProxy {
837
1360
  }
838
1361
  if (streamed) {
839
1362
  // Headers already sent to client, can't retry
1363
+ if (response.statusCode >= 200 && response.statusCode < 400) {
1364
+ this._rememberSuccessfulPeer(routeKey, selectedPeer.peerId);
1365
+ }
840
1366
  if (!res.writableEnded) {
841
1367
  res.end();
842
1368
  }
@@ -847,17 +1373,28 @@ export class BuyerProxy {
847
1373
  if (retryableStatusCodes.has(response.statusCode)) {
848
1374
  return { done: false, statusCode: response.statusCode, responseBody: Buffer.from(response.body), responseHeaders, errorMessage: null };
849
1375
  }
1376
+ if (response.statusCode >= 200 && response.statusCode < 400) {
1377
+ this._rememberSuccessfulPeer(routeKey, selectedPeer.peerId);
1378
+ }
850
1379
  res.writeHead(response.statusCode, responseHeaders);
851
1380
  res.end(Buffer.from(response.body));
852
1381
  return { done: true };
853
1382
  }
854
1383
  else {
855
- let response = await this._node.sendRequest(selectedPeer, requestForPeer);
1384
+ const upstreamResponse = await this._node.sendRequest(selectedPeer, requestForPeer, { signal: requestSignal });
1385
+ if (upstreamResponse.statusCode >= 400 && !adaptResponse) {
1386
+ log(`Upstream raw error detail: ${summarizeErrorResponse(upstreamResponse)}`);
1387
+ }
1388
+ let response = upstreamResponse;
856
1389
  if (adaptResponse) {
857
1390
  response = adaptResponse(response);
858
1391
  }
859
1392
  const latencyMs = Date.now() - startTime;
860
1393
  log(`Response: ${response.statusCode} (${latencyMs}ms, ${response.body.length} bytes)`);
1394
+ if (response.statusCode >= 400) {
1395
+ const prefix = adaptResponse ? 'Upstream adapted error detail' : 'Upstream error detail';
1396
+ log(`${prefix}: ${summarizeErrorResponse(response)}`);
1397
+ }
861
1398
  const telemetry = computeResponseTelemetry(requestForPeer, response.headers, response.body, selectedPeer);
862
1399
  const responseHeaders = attachAntseedTelemetryHeaders(response.headers, selectedPeer, telemetry, requestForPeer.requestId, latencyMs);
863
1400
  // Report result to router for learning
@@ -872,6 +1409,9 @@ export class BuyerProxy {
872
1409
  if (retryableStatusCodes.has(response.statusCode)) {
873
1410
  return { done: false, statusCode: response.statusCode, responseBody: Buffer.from(response.body), responseHeaders, errorMessage: null };
874
1411
  }
1412
+ if (response.statusCode >= 200 && response.statusCode < 400) {
1413
+ this._rememberSuccessfulPeer(routeKey, selectedPeer.peerId);
1414
+ }
875
1415
  // Forward response headers and body to the HTTP client
876
1416
  res.writeHead(response.statusCode, responseHeaders);
877
1417
  res.end(Buffer.from(response.body));
@@ -881,7 +1421,42 @@ export class BuyerProxy {
881
1421
  catch (err) {
882
1422
  const latencyMs = Date.now() - startTime;
883
1423
  const message = err instanceof Error ? err.message : String(err);
1424
+ const abortedLocally = requestSignal.aborted;
1425
+ const connectionChurnError = isConnectionChurnError(message);
884
1426
  log(`Request failed after ${latencyMs}ms: ${message}`);
1427
+ if (abortedLocally) {
1428
+ log(`Request ${requestForPeer.requestId.slice(0, 8)} aborted locally; skipping retry, router penalty, and peer eviction.`);
1429
+ if (!res.writableEnded) {
1430
+ let responded = false;
1431
+ if (!res.headersSent) {
1432
+ try {
1433
+ res.writeHead(499, { 'content-type': 'text/plain' });
1434
+ responded = true;
1435
+ }
1436
+ catch {
1437
+ // ignore
1438
+ }
1439
+ }
1440
+ try {
1441
+ if (res.writableEnded) {
1442
+ // no-op
1443
+ }
1444
+ else {
1445
+ if (responded) {
1446
+ res.end('Request cancelled');
1447
+ }
1448
+ else {
1449
+ res.end();
1450
+ }
1451
+ responded = true;
1452
+ }
1453
+ }
1454
+ catch {
1455
+ // ignore
1456
+ }
1457
+ }
1458
+ return { done: true };
1459
+ }
885
1460
  if (router) {
886
1461
  router.onResult(selectedPeer, {
887
1462
  success: false,
@@ -889,8 +1464,28 @@ export class BuyerProxy {
889
1464
  tokens: 0,
890
1465
  });
891
1466
  }
892
- // Evict only the failing peer others remain usable
893
- this._evictPeer(selectedPeer.peerId);
1467
+ // Avoid poisoning routing cache from control-plane model enumeration failures.
1468
+ // Some peers can time out on /v1/models while still serving inference paths.
1469
+ const normalizedPath = requestForPeer.path.toLowerCase();
1470
+ const isControlPlaneModelsRequest = normalizedPath.startsWith('/v1/models');
1471
+ if (isControlPlaneModelsRequest) {
1472
+ log(`Skipping peer eviction for control-plane failure on ${requestForPeer.path}`);
1473
+ }
1474
+ else if (connectionChurnError) {
1475
+ const currentState = this._node.getPeerConnectionState(selectedPeer.peerId);
1476
+ if (isConnectionHealthy(currentState)) {
1477
+ log(`Skipping peer eviction after connection churn: peer ${selectedPeer.peerId.slice(0, 12)}... `
1478
+ + `has replacement connection state=${currentState}`);
1479
+ }
1480
+ else {
1481
+ this._evictPeer(selectedPeer.peerId);
1482
+ }
1483
+ }
1484
+ else {
1485
+ // Evict only the failing peer — others remain usable.
1486
+ this._evictPeer(selectedPeer.peerId);
1487
+ }
1488
+ this._forgetSuccessfulPeer(routeKey, selectedPeer.peerId);
894
1489
  if (res.headersSent) {
895
1490
  // Headers already sent (streaming), can't retry
896
1491
  if (!res.writableEnded) {