mixdog 0.7.7 → 0.7.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,15 @@
1
1
  import Anthropic from '@anthropic-ai/sdk';
2
2
  import { loadConfig } from '../config.mjs';
3
3
  import { sanitizeToolPairs, sanitizeAnthropicContentPairs } from '../session/trim.mjs';
4
- import { withRetry } from './retry-classifier.mjs';
4
+ import { classifyError, withRetry } from './retry-classifier.mjs';
5
5
  import { traceBridgeUsage } from '../bridge-trace.mjs';
6
6
  import {
7
7
  PROVIDER_FIRST_BYTE_TIMEOUT_MS,
8
- PROVIDER_GENERATE_TOTAL_TIMEOUT_MS,
8
+ PROVIDER_NONSTREAM_TOTAL_TIMEOUT_MS,
9
9
  createTimeoutSignal,
10
10
  } from '../stall-policy.mjs';
11
+ import { createAbortController } from '../../../shared/abort-controller.mjs';
12
+ import { parseSSEStream, _classifyMidstreamError } from './anthropic-oauth.mjs';
11
13
  import { buildAnthropicBetaHeaders, supportsAnthropicFastMode } from './anthropic-betas.mjs';
12
14
 
13
15
  // 4-BP cache policy aligned with anthropic-oauth — tools + system + tier3
@@ -327,96 +329,251 @@ export class AnthropicProvider {
327
329
  params.speed = 'fast';
328
330
  this.fastModeBetaHeaderLatched = true;
329
331
  }
330
- const totalSignal = createTimeoutSignal(opts.signal || null, PROVIDER_GENERATE_TOTAL_TIMEOUT_MS, 'Anthropic total');
331
332
  params.messages = sanitizeAnthropicContentPairs(params.messages);
332
- // Wrap the SDK call in shared retry — surfaces "overloaded" / 5xx
333
- // through populateHttpStatusFromMessage and retries up to 5 times
334
- // with exponential backoff. The Anthropic SDK throws errors with
335
- // .status set on HTTP responses; the classifier picks that up too.
336
- let response;
337
- try {
338
- response = await withRetry(
339
- ({ signal: attemptSignal }) => this.client.messages.create(params, {
340
- signal: attemptSignal,
341
- headers: {
342
- 'anthropic-beta': buildAnthropicBetaHeaders({
343
- fastMode: this.fastModeBetaHeaderLatched,
344
- }),
345
- },
346
- }),
347
- {
348
- signal: totalSignal.signal,
349
- perAttemptTimeoutMs: PROVIDER_FIRST_BYTE_TIMEOUT_MS,
350
- perAttemptLabel: 'Anthropic first byte',
351
- onRetry: ({ attempt, lastErr, delayMs, delayReason }) => {
352
- const delayLabel = Number.isFinite(Number(delayMs)) ? `, delay ${delayMs}ms${delayReason ? ` (${delayReason})` : ''}` : '';
353
- process.stderr.write(`[anthropic] retry attempt ${attempt + 1} after ${lastErr?.message || lastErr?.code || 'transient error'}${delayLabel}\n`);
354
- },
355
- },
356
- );
357
- } finally {
358
- totalSignal.cleanup();
359
- }
360
- const textBlock = response.content.find(b => b.type === 'text');
361
- const toolCalls = parseToolCalls(response);
362
- // Mirror anthropic-oauth SSE parser: classify the assistant turn by
363
- // content-block presence so the orchestrator can distinguish a
364
- // synthesis-stalled "thinking-only" turn (no text, no tool_use)
365
- // from a silent empty response. Track block types and a quick
366
- // hasThinkingContent flag from response.content directly since the
367
- // non-streaming SDK call exposes the final block list.
368
- let hasThinkingContent = false;
369
- const contentBlockTypes = new Set();
370
- for (const block of Array.isArray(response.content) ? response.content : []) {
371
- if (block?.type) contentBlockTypes.add(block.type);
372
- if (block?.type === 'thinking' || block?.type === 'redacted_thinking') {
373
- hasThinkingContent = true;
333
+ params.stream = true;
334
+
335
+ const onStageChange = typeof opts.onStageChange === 'function' ? opts.onStageChange : null;
336
+ const onStreamDelta = typeof opts.onStreamDelta === 'function' ? opts.onStreamDelta : null;
337
+ const onToolCall = typeof opts.onToolCall === 'function' ? opts.onToolCall : null;
338
+
339
+ const totalTimeout = createTimeoutSignal(
340
+ opts.signal || null,
341
+ PROVIDER_NONSTREAM_TOTAL_TIMEOUT_MS,
342
+ 'Anthropic total',
343
+ );
344
+ const totalSignal = totalTimeout.signal;
345
+
346
+ const cleanupCancelHandler = (handler) => {
347
+ if (!handler) return;
348
+ try { totalSignal.removeEventListener('abort', handler); } catch {}
349
+ };
350
+
351
+ const betaHeaders = {
352
+ 'anthropic-beta': buildAnthropicBetaHeaders({
353
+ fastMode: this.fastModeBetaHeaderLatched,
354
+ }),
355
+ };
356
+
357
+ const MAX_MIDSTREAM_RETRIES = 1;
358
+ let firstAttemptError = null;
359
+ let firstAttemptClassifier = null;
360
+
361
+ const buildReturnFromParse = (parseResult) => {
362
+ const usageRaw = parseResult.usage?.raw || null;
363
+ const input = parseResult.usage?.inputTokens || 0;
364
+ const cacheRead = parseResult.usage?.cachedTokens || 0;
365
+ const cacheWrite = parseResult.usage?.cacheWriteTokens || 0;
366
+ const output = parseResult.usage?.outputTokens || 0;
367
+ const promptTokens = parseResult.usage?.promptTokens ?? (input + cacheRead + cacheWrite);
368
+ const liveModel = parseResult.model || useModel;
369
+
370
+ if (usageRaw || input || output || cacheRead || cacheWrite) {
371
+ traceBridgeUsage({
372
+ sessionId: opts.sessionId || opts.session?.id || null,
373
+ iteration: Number.isFinite(Number(opts.iteration)) ? Number(opts.iteration) : null,
374
+ inputTokens: input,
375
+ outputTokens: output,
376
+ cachedTokens: cacheRead,
377
+ cacheWriteTokens: cacheWrite,
378
+ promptTokens,
379
+ model: liveModel,
380
+ modelDisplay: liveModel,
381
+ responseId: null,
382
+ rawUsage: usageRaw,
383
+ provider: 'anthropic',
384
+ });
374
385
  }
375
- }
376
- const usageRaw = response.usage || null;
377
- if (usageRaw) {
378
- const input = usageRaw.input_tokens || 0;
379
- const cacheRead = usageRaw.cache_read_input_tokens || 0;
380
- const cacheWrite = usageRaw.cache_creation_input_tokens || 0;
381
- traceBridgeUsage({
382
- sessionId: opts.sessionId || opts.session?.id || null,
383
- iteration: Number.isFinite(Number(opts.iteration)) ? Number(opts.iteration) : null,
384
- inputTokens: input,
385
- outputTokens: usageRaw.output_tokens || 0,
386
- cachedTokens: cacheRead,
387
- cacheWriteTokens: cacheWrite,
388
- promptTokens: input + cacheRead + cacheWrite,
389
- model: response.model,
390
- modelDisplay: response.model,
391
- responseId: response.id || null,
392
- rawUsage: usageRaw,
393
- provider: 'anthropic',
394
- });
395
- }
396
- return {
397
- content: textBlock?.type === 'text' ? textBlock.text : '',
398
- model: response.model,
399
- toolCalls,
400
- stopReason: response.stop_reason || null,
401
- hasThinkingContent,
402
- contentBlockTypes: Array.from(contentBlockTypes),
403
- usage: (() => {
404
- const input = response.usage.input_tokens || 0;
405
- const cacheRead = response.usage.cache_read_input_tokens || 0;
406
- const cacheWrite = response.usage.cache_creation_input_tokens || 0;
407
- return {
386
+
387
+ return {
388
+ content: parseResult.content || '',
389
+ model: liveModel,
390
+ toolCalls: parseResult.toolCalls,
391
+ stopReason: parseResult.stopReason || null,
392
+ hasThinkingContent: !!parseResult.hasThinkingContent,
393
+ contentBlockTypes: Array.isArray(parseResult.contentBlockTypes)
394
+ ? parseResult.contentBlockTypes
395
+ : [],
396
+ usage: {
408
397
  inputTokens: input,
409
- outputTokens: response.usage.output_tokens || 0,
398
+ outputTokens: output,
410
399
  cachedTokens: cacheRead,
411
400
  cacheWriteTokens: cacheWrite,
412
- // Unified prompt volume — what the model actually ingested,
413
- // regardless of cache splitting. Anthropic reports input
414
- // uncached-only; sum the three billable slots so the
415
- // cross-provider `promptTokens` field has consistent meaning.
416
- promptTokens: input + cacheRead + cacheWrite,
417
- };
418
- })(),
401
+ promptTokens,
402
+ },
403
+ };
419
404
  };
405
+
406
+ try {
407
+ for (let attemptIndex = 0; attemptIndex <= MAX_MIDSTREAM_RETRIES; attemptIndex++) {
408
+ const streamController = createAbortController();
409
+ let cancelHandler = null;
410
+
411
+ if (totalSignal) {
412
+ if (totalSignal.aborted) {
413
+ const reason = totalSignal.reason;
414
+ throw reason instanceof Error
415
+ ? reason
416
+ : new Error('Anthropic request aborted');
417
+ }
418
+ cancelHandler = () => {
419
+ try { streamController.abort(totalSignal.reason); } catch {}
420
+ };
421
+ totalSignal.addEventListener('abort', cancelHandler, { once: true });
422
+ }
423
+
424
+ const midState = {
425
+ attemptIndex,
426
+ sawMessageStart: false,
427
+ sawCompleted: false,
428
+ emittedToolCall: false,
429
+ userAbort: false,
430
+ watchdogAbort: null,
431
+ };
432
+
433
+ let firstBytePoll = null;
434
+ let firstByteTimeout = null;
435
+
436
+ try {
437
+ try { onStageChange?.('requesting'); } catch {}
438
+
439
+ const response = await withRetry(
440
+ async ({ signal: attemptSignal }) => {
441
+ const res = await this.client.messages.create(params, {
442
+ signal: attemptSignal,
443
+ headers: betaHeaders,
444
+ }).asResponse();
445
+ if (!res.ok) {
446
+ const text = await res.text().catch(() => '');
447
+ const err = new Error(`Anthropic API ${res.status}: ${text.slice(0, 200)}`);
448
+ err.status = res.status;
449
+ err.httpStatus = res.status;
450
+ throw err;
451
+ }
452
+ if (!res.body) {
453
+ throw new Error('Anthropic streaming response has no body');
454
+ }
455
+ return res;
456
+ },
457
+ {
458
+ signal: totalSignal,
459
+ perAttemptTimeoutMs: PROVIDER_FIRST_BYTE_TIMEOUT_MS,
460
+ perAttemptLabel: 'Anthropic streaming response',
461
+ onRetry: ({ attempt, lastErr, delayMs, delayReason }) => {
462
+ const delayLabel = Number.isFinite(Number(delayMs))
463
+ ? `, delay ${delayMs}ms${delayReason ? ` (${delayReason})` : ''}`
464
+ : '';
465
+ process.stderr.write(
466
+ `[anthropic] retry attempt ${attempt + 1} after ${lastErr?.message || lastErr?.code || 'transient error'}${delayLabel}\n`,
467
+ );
468
+ },
469
+ },
470
+ );
471
+
472
+ try { onStageChange?.('streaming'); } catch {}
473
+
474
+ firstByteTimeout = createTimeoutSignal(
475
+ streamController.signal,
476
+ PROVIDER_FIRST_BYTE_TIMEOUT_MS,
477
+ 'Anthropic SSE first byte',
478
+ );
479
+ firstByteTimeout.signal.addEventListener('abort', () => {
480
+ if (!midState.sawMessageStart) {
481
+ try { streamController.abort(firstByteTimeout.signal.reason); } catch {}
482
+ }
483
+ }, { once: true });
484
+
485
+ firstBytePoll = setInterval(() => {
486
+ if (midState.sawMessageStart) {
487
+ firstByteTimeout?.cleanup();
488
+ clearInterval(firstBytePoll);
489
+ firstBytePoll = null;
490
+ }
491
+ }, 25);
492
+
493
+ const parseResult = await parseSSEStream(
494
+ response,
495
+ streamController.signal,
496
+ () => streamController.abort(),
497
+ onStreamDelta,
498
+ onToolCall,
499
+ midState,
500
+ );
501
+
502
+ if (firstBytePoll) {
503
+ clearInterval(firstBytePoll);
504
+ firstBytePoll = null;
505
+ }
506
+ firstByteTimeout?.cleanup();
507
+
508
+ if (!midState.sawMessageStart
509
+ && !midState.userAbort
510
+ && !midState.watchdogAbort
511
+ && !parseResult.content
512
+ && !(parseResult.toolCalls && parseResult.toolCalls.length)
513
+ && !(parseResult.usage && parseResult.usage.inputTokens > 0)) {
514
+ const emptyErr = new Error(
515
+ 'Anthropic SSE stream produced no message_start (empty/dropped stream — likely transient or rate-limited)',
516
+ );
517
+ emptyErr.code = 'EEMPTYSTREAM';
518
+ emptyErr.isEmptyStream = true;
519
+ throw emptyErr;
520
+ }
521
+
522
+ return buildReturnFromParse(parseResult);
523
+ } catch (err) {
524
+ if (err?.isEmptyStream && attemptIndex < MAX_MIDSTREAM_RETRIES) {
525
+ firstAttemptError = err;
526
+ firstAttemptClassifier = 'empty_stream';
527
+ try { streamController.abort?.(err); } catch {}
528
+ try {
529
+ process.stderr.write(
530
+ `[anthropic] empty stream (no message_start) — retry ${attemptIndex + 1}/${MAX_MIDSTREAM_RETRIES}\n`,
531
+ );
532
+ } catch {}
533
+ continue;
534
+ }
535
+ if ((err?.truncatedStream === true || err?.code === 'TRUNCATED_STREAM')
536
+ && classifyError(err) === 'transient'
537
+ && !midState.emittedToolCall
538
+ && attemptIndex < MAX_MIDSTREAM_RETRIES) {
539
+ firstAttemptError = err;
540
+ firstAttemptClassifier = 'truncated_stream';
541
+ try { streamController.abort?.(err); } catch {}
542
+ try {
543
+ process.stderr.write(
544
+ `[anthropic] truncated stream — retry ${attemptIndex + 1}/${MAX_MIDSTREAM_RETRIES}\n`,
545
+ );
546
+ } catch {}
547
+ continue;
548
+ }
549
+ const classifier = _classifyMidstreamError(err, midState);
550
+ if (classifier && attemptIndex < MAX_MIDSTREAM_RETRIES) {
551
+ firstAttemptError = err;
552
+ firstAttemptClassifier = classifier;
553
+ try { streamController.abort?.(err); } catch {}
554
+ try {
555
+ process.stderr.write(
556
+ `[anthropic] mid-stream recovered: retry ${attemptIndex + 1}/${MAX_MIDSTREAM_RETRIES} (cause: ${classifier})\n`,
557
+ );
558
+ } catch {}
559
+ continue;
560
+ }
561
+ if (attemptIndex > 0 && firstAttemptError) {
562
+ try { firstAttemptError.midstreamRetries = attemptIndex; } catch {}
563
+ try { firstAttemptError.midstreamClassifier = firstAttemptClassifier; } catch {}
564
+ throw firstAttemptError;
565
+ }
566
+ throw err;
567
+ } finally {
568
+ if (firstBytePoll) clearInterval(firstBytePoll);
569
+ firstByteTimeout?.cleanup();
570
+ cleanupCancelHandler(cancelHandler);
571
+ }
572
+ }
573
+ throw firstAttemptError || new Error('Anthropic mid-stream retry: unreachable');
574
+ } finally {
575
+ totalTimeout.cleanup();
576
+ }
420
577
  }
421
578
  async listModels() {
422
579
  return MODELS;