@auxiora/providers 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/LICENSE +191 -0
  2. package/dist/anthropic.d.ts +82 -0
  3. package/dist/anthropic.d.ts.map +1 -0
  4. package/dist/anthropic.js +618 -0
  5. package/dist/anthropic.js.map +1 -0
  6. package/dist/claude-code-tools.d.ts +29 -0
  7. package/dist/claude-code-tools.d.ts.map +1 -0
  8. package/dist/claude-code-tools.js +221 -0
  9. package/dist/claude-code-tools.js.map +1 -0
  10. package/dist/claude-oauth.d.ts +86 -0
  11. package/dist/claude-oauth.d.ts.map +1 -0
  12. package/dist/claude-oauth.js +318 -0
  13. package/dist/claude-oauth.js.map +1 -0
  14. package/dist/cohere.d.ts +18 -0
  15. package/dist/cohere.d.ts.map +1 -0
  16. package/dist/cohere.js +163 -0
  17. package/dist/cohere.js.map +1 -0
  18. package/dist/deepseek.d.ts +18 -0
  19. package/dist/deepseek.d.ts.map +1 -0
  20. package/dist/deepseek.js +164 -0
  21. package/dist/deepseek.js.map +1 -0
  22. package/dist/factory.d.ts +19 -0
  23. package/dist/factory.d.ts.map +1 -0
  24. package/dist/factory.js +108 -0
  25. package/dist/factory.js.map +1 -0
  26. package/dist/google.d.ts +18 -0
  27. package/dist/google.d.ts.map +1 -0
  28. package/dist/google.js +141 -0
  29. package/dist/google.js.map +1 -0
  30. package/dist/groq.d.ts +18 -0
  31. package/dist/groq.d.ts.map +1 -0
  32. package/dist/groq.js +186 -0
  33. package/dist/groq.js.map +1 -0
  34. package/dist/index.d.ts +15 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +14 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/ollama.d.ts +18 -0
  39. package/dist/ollama.d.ts.map +1 -0
  40. package/dist/ollama.js +141 -0
  41. package/dist/ollama.js.map +1 -0
  42. package/dist/openai-compatible.d.ts +20 -0
  43. package/dist/openai-compatible.d.ts.map +1 -0
  44. package/dist/openai-compatible.js +112 -0
  45. package/dist/openai-compatible.js.map +1 -0
  46. package/dist/openai.d.ts +20 -0
  47. package/dist/openai.d.ts.map +1 -0
  48. package/dist/openai.js +259 -0
  49. package/dist/openai.js.map +1 -0
  50. package/dist/replicate.d.ts +20 -0
  51. package/dist/replicate.d.ts.map +1 -0
  52. package/dist/replicate.js +186 -0
  53. package/dist/replicate.js.map +1 -0
  54. package/dist/thinking-levels.d.ts +16 -0
  55. package/dist/thinking-levels.d.ts.map +1 -0
  56. package/dist/thinking-levels.js +34 -0
  57. package/dist/thinking-levels.js.map +1 -0
  58. package/dist/types.d.ts +157 -0
  59. package/dist/types.d.ts.map +1 -0
  60. package/dist/types.js +2 -0
  61. package/dist/types.js.map +1 -0
  62. package/dist/xai.d.ts +18 -0
  63. package/dist/xai.d.ts.map +1 -0
  64. package/dist/xai.js +164 -0
  65. package/dist/xai.js.map +1 -0
  66. package/package.json +30 -0
  67. package/src/anthropic.ts +691 -0
  68. package/src/claude-code-tools.ts +233 -0
  69. package/src/claude-oauth.ts +410 -0
  70. package/src/cohere.ts +242 -0
  71. package/src/deepseek.ts +241 -0
  72. package/src/factory.ts +142 -0
  73. package/src/google.ts +176 -0
  74. package/src/groq.ts +263 -0
  75. package/src/index.ts +44 -0
  76. package/src/ollama.ts +194 -0
  77. package/src/openai-compatible.ts +154 -0
  78. package/src/openai.ts +307 -0
  79. package/src/replicate.ts +247 -0
  80. package/src/thinking-levels.ts +37 -0
  81. package/src/types.ts +171 -0
  82. package/src/xai.ts +241 -0
  83. package/tests/adapters.test.ts +185 -0
  84. package/tests/claude-oauth.test.ts +45 -0
  85. package/tests/new-providers.test.ts +732 -0
  86. package/tests/thinking-levels.test.ts +82 -0
  87. package/tsconfig.json +8 -0
  88. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,691 @@
1
+ import Anthropic from '@anthropic-ai/sdk';
2
+ import { createHash } from 'node:crypto';
3
+ import { execFileSync } from 'node:child_process';
4
+ import { getLogger } from '@auxiora/logger';
5
+
6
+ const logger = getLogger('providers:anthropic');
7
+ import type {
8
+ Provider,
9
+ ProviderMetadata,
10
+ ChatMessage,
11
+ CompletionOptions,
12
+ CompletionResult,
13
+ StreamChunk,
14
+ } from './types.js';
15
+ import {
16
+ resolveAnthropicApiKey,
17
+ isSetupToken,
18
+ readClaudeCliCredentials,
19
+ getValidAccessToken,
20
+ } from './claude-oauth.js';
21
+ import { CLAUDE_CODE_TOOLS } from './claude-code-tools.js';
22
+ import { getAnthropicThinkingBudget } from './thinking-levels.js';
23
+
24
+ const DEFAULT_MODEL = 'claude-sonnet-4-20250514';
25
+ const DEFAULT_MAX_TOKENS = 4096;
26
+
27
+ // Fallback version if detection fails (keep in sync with real Claude Code)
28
+ const CLAUDE_CODE_VERSION_FALLBACK = '2.1.41';
29
+
30
+ // Salt for attribution SHA computation (from Claude Code binary)
31
+ const ATTRIBUTION_SALT = '59cf53e54c78';
32
+
33
+ // Required system prompt for OAuth tokens
34
+ const CLAUDE_CODE_SYSTEM_PROMPT = 'You are Claude Code, Anthropic\'s official CLI for Claude.';
35
+
36
+ /**
37
+ * Detect the installed Claude Code version from the CLI.
38
+ * Falls back to hardcoded version if detection fails.
39
+ */
40
+ function detectClaudeCodeVersion(): string {
41
+ try {
42
+ const output = execFileSync('claude', ['--version'], {
43
+ encoding: 'utf-8',
44
+ timeout: 3000,
45
+ stdio: ['pipe', 'pipe', 'pipe'],
46
+ }).trim();
47
+ // Output format: "2.1.41 (Claude Code)" — extract version number
48
+ const match = output.match(/^(\d+\.\d+\.\d+)/);
49
+ if (match) {
50
+ return match[1];
51
+ }
52
+ } catch {
53
+ // Claude CLI not installed or not accessible
54
+ }
55
+ return CLAUDE_CODE_VERSION_FALLBACK;
56
+ }
57
+
58
+ /**
59
+ * Compute the attribution SHA matching Claude Code's algorithm.
60
+ * Uses chars at positions 4, 7, 20 from the first user message + salt + version.
61
+ */
62
+ function computeAttributionSha(firstUserMessage: string, version: string): string {
63
+ const chars = [4, 7, 20].map(i => firstUserMessage[i] || '0').join('');
64
+ return createHash('sha256')
65
+ .update(`${ATTRIBUTION_SALT}${chars}${version}`)
66
+ .digest('hex')
67
+ .slice(0, 3);
68
+ }
69
+
70
+ // Cache the detected version
71
+ let cachedVersion: string | undefined;
72
+
73
+ export interface AnthropicProviderOptions {
74
+ apiKey?: string;
75
+ oauthToken?: string;
76
+ model?: string;
77
+ maxTokens?: number;
78
+ /** Whether to read credentials from Claude CLI (~/.claude/.credentials.json) */
79
+ useCliCredentials?: boolean;
80
+ /** Callback to refresh the OAuth token when expired. Returns new access token. */
81
+ onTokenRefresh?: () => Promise<string | null>;
82
+ /** When the current OAuth token expires (epoch ms). Used for proactive refresh. */
83
+ tokenExpiresAt?: number;
84
+ }
85
+
86
+ export class AnthropicProvider implements Provider {
87
+ name = 'anthropic';
88
+ metadata: ProviderMetadata = {
89
+ name: 'anthropic',
90
+ displayName: 'Anthropic Claude',
91
+ models: {
92
+ 'claude-opus-4-6': {
93
+ maxContextTokens: 200000,
94
+ supportsVision: true,
95
+ supportsTools: true,
96
+ supportsStreaming: true,
97
+ supportsImageGen: false,
98
+ costPer1kInput: 0.015,
99
+ costPer1kOutput: 0.075,
100
+ strengths: ['reasoning', 'code', 'long-context', 'creative'],
101
+ isLocal: false,
102
+ },
103
+ 'claude-sonnet-4-5-20250929': {
104
+ maxContextTokens: 200000,
105
+ supportsVision: true,
106
+ supportsTools: true,
107
+ supportsStreaming: true,
108
+ supportsImageGen: false,
109
+ costPer1kInput: 0.003,
110
+ costPer1kOutput: 0.015,
111
+ strengths: ['reasoning', 'code', 'long-context', 'creative'],
112
+ isLocal: false,
113
+ },
114
+ 'claude-opus-4-20250514': {
115
+ maxContextTokens: 200000,
116
+ supportsVision: true,
117
+ supportsTools: true,
118
+ supportsStreaming: true,
119
+ supportsImageGen: false,
120
+ costPer1kInput: 0.015,
121
+ costPer1kOutput: 0.075,
122
+ strengths: ['reasoning', 'code', 'long-context', 'creative'],
123
+ isLocal: false,
124
+ },
125
+ 'claude-sonnet-4-20250514': {
126
+ maxContextTokens: 200000,
127
+ supportsVision: true,
128
+ supportsTools: true,
129
+ supportsStreaming: true,
130
+ supportsImageGen: false,
131
+ costPer1kInput: 0.003,
132
+ costPer1kOutput: 0.015,
133
+ strengths: ['reasoning', 'code', 'long-context', 'creative'],
134
+ isLocal: false,
135
+ },
136
+ 'claude-haiku-4-5-20251001': {
137
+ maxContextTokens: 200000,
138
+ supportsVision: true,
139
+ supportsTools: true,
140
+ supportsStreaming: true,
141
+ supportsImageGen: false,
142
+ costPer1kInput: 0.0008,
143
+ costPer1kOutput: 0.004,
144
+ strengths: ['fast', 'code', 'vision'],
145
+ isLocal: false,
146
+ },
147
+ 'claude-3-opus-20240229': {
148
+ maxContextTokens: 200000,
149
+ supportsVision: true,
150
+ supportsTools: true,
151
+ supportsStreaming: true,
152
+ supportsImageGen: false,
153
+ costPer1kInput: 0.015,
154
+ costPer1kOutput: 0.075,
155
+ strengths: ['reasoning', 'creative'],
156
+ isLocal: false,
157
+ },
158
+ 'claude-3-5-haiku-20241022': {
159
+ maxContextTokens: 200000,
160
+ supportsVision: true,
161
+ supportsTools: true,
162
+ supportsStreaming: true,
163
+ supportsImageGen: false,
164
+ costPer1kInput: 0.0008,
165
+ costPer1kOutput: 0.004,
166
+ strengths: ['fast', 'code'],
167
+ isLocal: false,
168
+ },
169
+ },
170
+ isAvailable: async () => {
171
+ try {
172
+ // Check if we have valid credentials
173
+ return this.client !== undefined;
174
+ } catch {
175
+ return false;
176
+ }
177
+ },
178
+ };
179
+ private client: Anthropic;
180
+ private defaultModel: string;
181
+ private defaultMaxTokens: number;
182
+ private authMode: 'api-key' | 'setup-token' | 'oauth';
183
+ private oauthToken?: string;
184
+ private useCliCredentials: boolean;
185
+ private onTokenRefresh?: () => Promise<string | null>;
186
+ private tokenExpiresAt?: number;
187
+
188
+ /**
189
+ * Create an Anthropic provider.
190
+ *
191
+ * Authentication modes:
192
+ * 1. Setup token (sk-ant-oat01-*) - OAuth token, uses authToken parameter
193
+ * 2. OAuth access token - Uses authToken parameter
194
+ * 3. Claude CLI credentials - Read from ~/.claude/.credentials.json
195
+ * 4. Regular API key (sk-ant-api03-*) - Standard API key
196
+ */
197
+ constructor(options: AnthropicProviderOptions) {
198
+ this.defaultModel = options.model || DEFAULT_MODEL;
199
+ this.defaultMaxTokens = options.maxTokens || DEFAULT_MAX_TOKENS;
200
+ this.oauthToken = options.oauthToken;
201
+ this.useCliCredentials = options.useCliCredentials ?? true;
202
+ this.onTokenRefresh = options.onTokenRefresh;
203
+ this.tokenExpiresAt = options.tokenExpiresAt;
204
+
205
+ // Determine auth mode and initialize client
206
+ if (options.oauthToken) {
207
+ if (isSetupToken(options.oauthToken)) {
208
+ // Setup tokens (sk-ant-oat01-*) use authToken, not apiKey
209
+ this.authMode = 'setup-token';
210
+ this.client = this.createOAuthClient(options.oauthToken);
211
+ logger.info('Using setup-token auth mode (Claude Code emulation enabled)');
212
+ } else {
213
+ // Other OAuth tokens (access tokens)
214
+ this.authMode = 'oauth';
215
+ this.client = this.createOAuthClient(options.oauthToken);
216
+ logger.info('Using oauth auth mode (Claude Code emulation enabled)');
217
+ }
218
+ } else if (options.apiKey) {
219
+ this.authMode = 'api-key';
220
+ this.client = new Anthropic({ apiKey: options.apiKey });
221
+ } else if (options.useCliCredentials !== false) {
222
+ // Try Claude CLI credentials
223
+ const cliCreds = readClaudeCliCredentials();
224
+ if (cliCreds) {
225
+ this.authMode = cliCreds.type === 'oauth' ? 'oauth' : 'setup-token';
226
+ this.client = this.createOAuthClient(cliCreds.accessToken);
227
+ logger.info(`Using CLI credentials, auth mode: ${this.authMode} (Claude Code emulation enabled)`);
228
+ } else {
229
+ throw new Error(
230
+ 'No credentials found. Provide apiKey, oauthToken, or authenticate with `claude setup-token`.'
231
+ );
232
+ }
233
+ } else {
234
+ throw new Error('Either apiKey or oauthToken must be provided');
235
+ }
236
+ }
237
+
238
+ /**
239
+ * Get the Claude Code version (detected or cached).
240
+ */
241
+ private getVersion(): string {
242
+ if (!cachedVersion) {
243
+ cachedVersion = detectClaudeCodeVersion();
244
+ logger.info(`Detected Claude Code version: ${cachedVersion}`);
245
+ }
246
+ return cachedVersion;
247
+ }
248
+
249
+ /**
250
+ * Create an Anthropic client configured for OAuth tokens.
251
+ * OAuth tokens require authToken parameter and Claude Code headers.
252
+ * We mimic Claude Code exactly to satisfy the API restriction.
253
+ */
254
+ private createOAuthClient(token: string): Anthropic {
255
+ const version = this.getVersion();
256
+ return new Anthropic({
257
+ apiKey: null as unknown as string,
258
+ authToken: token,
259
+ baseURL: 'https://api.anthropic.com',
260
+ defaultHeaders: {
261
+ 'anthropic-beta': 'claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,prompt-caching-scope-2026-01-05',
262
+ 'user-agent': `claude-code/${version}`,
263
+ },
264
+ });
265
+ }
266
+
267
+ /**
268
+ * Build the attribution billing header for a specific request.
269
+ * The SHA is computed per-request from the first user message content.
270
+ */
271
+ private buildBillingHeader(messages: Anthropic.MessageParam[]): string {
272
+ const version = this.getVersion();
273
+ // Extract first user message text for SHA computation
274
+ let firstUserText = '';
275
+ for (const msg of messages) {
276
+ if (msg.role === 'user') {
277
+ if (typeof msg.content === 'string') {
278
+ firstUserText = msg.content;
279
+ } else if (Array.isArray(msg.content)) {
280
+ const textBlock = msg.content.find(b => b.type === 'text');
281
+ if (textBlock && 'text' in textBlock) {
282
+ firstUserText = textBlock.text;
283
+ }
284
+ }
285
+ break;
286
+ }
287
+ }
288
+ const sha = computeAttributionSha(firstUserText, version);
289
+ const entrypoint = process.env.CLAUDE_CODE_ENTRYPOINT ?? 'cli';
290
+ return `cc_version=${version}.${sha}; cc_entrypoint=${entrypoint}; cch=00000;`;
291
+ }
292
+
293
+ /**
294
+ * Check if OAuth mode requires Claude Code tool emulation.
295
+ */
296
+ private requiresClaudeCodeEmulation(): boolean {
297
+ return this.authMode === 'setup-token' || this.authMode === 'oauth';
298
+ }
299
+
300
+ /**
301
+ * Create provider asynchronously with token refresh support.
302
+ */
303
+ static async create(options: AnthropicProviderOptions): Promise<AnthropicProvider> {
304
+ const resolved = await resolveAnthropicApiKey({
305
+ apiKey: options.apiKey,
306
+ oauthToken: options.oauthToken,
307
+ useCliCredentials: options.useCliCredentials,
308
+ });
309
+
310
+ return new AnthropicProvider({
311
+ ...options,
312
+ apiKey: resolved.apiKey,
313
+ oauthToken: undefined, // Already resolved
314
+ useCliCredentials: false, // Already resolved
315
+ });
316
+ }
317
+
318
+ /**
319
+ * Check if the current OAuth token is near expiry (within 5 minutes).
320
+ */
321
+ private isTokenExpiringSoon(): boolean {
322
+ if (!this.tokenExpiresAt) return false;
323
+ return Date.now() >= this.tokenExpiresAt - 5 * 60 * 1000;
324
+ }
325
+
326
+ /**
327
+ * Refresh credentials if using OAuth/setup-token and tokens are expired or expiring soon.
328
+ * Both 'oauth' and 'setup-token' modes use OAuth tokens that expire.
329
+ */
330
+ private async ensureValidCredentials(): Promise<void> {
331
+ // API keys don't expire
332
+ if (this.authMode === 'api-key') return;
333
+
334
+ // If we know the expiry time and it's not close, skip refresh
335
+ if (this.tokenExpiresAt && !this.isTokenExpiringSoon()) return;
336
+
337
+ // Try Claude CLI credentials for refresh (host environment)
338
+ const cliCreds = readClaudeCliCredentials();
339
+ if (cliCreds && cliCreds.type === 'oauth') {
340
+ try {
341
+ const token = await getValidAccessToken(cliCreds);
342
+ this.client = this.createOAuthClient(token);
343
+ this.tokenExpiresAt = cliCreds.expiresAt;
344
+ return;
345
+ } catch (err) {
346
+ logger.warn('CLI credential refresh failed, trying vault callback', { error: err instanceof Error ? err : new Error(String(err)) });
347
+ }
348
+ }
349
+
350
+ // Fallback: use vault-based refresh callback (e.g. in Docker)
351
+ if (this.onTokenRefresh) {
352
+ const newToken = await this.onTokenRefresh();
353
+ if (newToken) {
354
+ this.client = this.createOAuthClient(newToken);
355
+ // Token was just refreshed; assume ~1 hour validity
356
+ this.tokenExpiresAt = Date.now() + 3600 * 1000;
357
+ logger.info('OAuth token refreshed via callback');
358
+ }
359
+ }
360
+ }
361
+
362
+ /**
363
+ * Refresh the token after a 401 error and return true if successful.
364
+ */
365
+ private async handleAuthError(): Promise<boolean> {
366
+ if (this.authMode === 'api-key') return false;
367
+
368
+ logger.warn('Got 401 from API, attempting token refresh');
369
+
370
+ // Force refresh by clearing expiry so ensureValidCredentials doesn't skip
371
+ this.tokenExpiresAt = 0;
372
+
373
+ try {
374
+ await this.ensureValidCredentials();
375
+ return true;
376
+ } catch (err) {
377
+ logger.error('Token refresh after 401 failed', { error: err instanceof Error ? err : new Error(String(err)) });
378
+ return false;
379
+ }
380
+ }
381
+
382
+ /**
383
+ * Check if an error is a 401 authentication error.
384
+ */
385
+ private isAuthError(error: unknown): boolean {
386
+ if (error instanceof Anthropic.AuthenticationError) return true;
387
+ if (error instanceof Error && error.message.includes('401')) return true;
388
+ return false;
389
+ }
390
+
391
+ async complete(
392
+ messages: ChatMessage[],
393
+ options?: CompletionOptions
394
+ ): Promise<CompletionResult> {
395
+ // Refresh credentials if needed
396
+ await this.ensureValidCredentials();
397
+
398
+ try {
399
+ return await this.doComplete(messages, options);
400
+ } catch (error) {
401
+ // On 401, refresh token and retry once
402
+ if (this.isAuthError(error) && await this.handleAuthError()) {
403
+ return await this.doComplete(messages, options);
404
+ }
405
+ throw error;
406
+ }
407
+ }
408
+
409
+ private async doComplete(
410
+ messages: ChatMessage[],
411
+ options?: CompletionOptions
412
+ ): Promise<CompletionResult> {
413
+ const { systemPrompt, anthropicMessages } = this.prepareMessages(messages, options);
414
+
415
+ // Build request parameters
416
+ const params: Anthropic.MessageCreateParams = {
417
+ model: options?.model || this.defaultModel,
418
+ max_tokens: options?.maxTokens || this.defaultMaxTokens,
419
+ messages: anthropicMessages,
420
+ };
421
+
422
+ // Add thinking budget if requested
423
+ const thinkingBudget = options?.thinkingLevel
424
+ ? getAnthropicThinkingBudget(options.thinkingLevel)
425
+ : undefined;
426
+ if (thinkingBudget) {
427
+ (params as any).thinking = { type: 'enabled', budget_tokens: thinkingBudget };
428
+ }
429
+
430
+ // For OAuth tokens, include Claude Code emulation (tools + system prompt)
431
+ if (this.requiresClaudeCodeEmulation()) {
432
+ // Claude Code tools MUST be included for the API to accept OAuth tokens
433
+ const callerTools = (options?.tools ?? []) as Anthropic.Tool[];
434
+ params.tools = [...(CLAUDE_CODE_TOOLS as unknown as Anthropic.Tool[]), ...callerTools];
435
+
436
+ // Claude Code identity MUST be first in system prompt (array format with cache_control)
437
+ const systemBlocks: Array<{ type: 'text'; text: string; cache_control?: { type: 'ephemeral' } }> = [
438
+ {
439
+ type: 'text',
440
+ text: CLAUDE_CODE_SYSTEM_PROMPT,
441
+ cache_control: { type: 'ephemeral' },
442
+ },
443
+ ];
444
+ if (systemPrompt) {
445
+ systemBlocks.push({
446
+ type: 'text',
447
+ text: systemPrompt,
448
+ cache_control: { type: 'ephemeral' },
449
+ });
450
+ }
451
+ params.system = systemBlocks as Anthropic.TextBlockParam[];
452
+ } else {
453
+ if (options?.tools && options.tools.length > 0) {
454
+ params.tools = options.tools as Anthropic.Tool[];
455
+ }
456
+ params.system = systemPrompt;
457
+ }
458
+
459
+ // Set per-request billing header for OAuth mode
460
+ const requestOptions = this.requiresClaudeCodeEmulation()
461
+ ? { headers: { 'x-anthropic-billing-header': this.buildBillingHeader(anthropicMessages) } }
462
+ : undefined;
463
+
464
+ const response = await this.client.messages.create(params, requestOptions);
465
+
466
+ // Extract text content, filtering out tool calls for Claude Code tools
467
+ const content = response.content
468
+ .filter((block): block is Anthropic.TextBlock => block.type === 'text')
469
+ .map((block) => block.text)
470
+ .join('');
471
+
472
+ return {
473
+ content,
474
+ usage: {
475
+ inputTokens: response.usage.input_tokens,
476
+ outputTokens: response.usage.output_tokens,
477
+ },
478
+ model: response.model,
479
+ finishReason: response.stop_reason || 'unknown',
480
+ };
481
+ }
482
+
483
+ async *stream(
484
+ messages: ChatMessage[],
485
+ options?: CompletionOptions
486
+ ): AsyncGenerator<StreamChunk, void, unknown> {
487
+ // Refresh credentials if needed
488
+ await this.ensureValidCredentials();
489
+
490
+ try {
491
+ yield* this.doStream(messages, options);
492
+ } catch (error) {
493
+ // On 401 before any chunks were yielded, refresh and retry
494
+ if (this.isAuthError(error) && await this.handleAuthError()) {
495
+ yield* this.doStream(messages, options);
496
+ } else {
497
+ yield {
498
+ type: 'error',
499
+ error: error instanceof Error ? error.message : 'Unknown error',
500
+ };
501
+ }
502
+ }
503
+ }
504
+
505
+ private async *doStream(
506
+ messages: ChatMessage[],
507
+ options?: CompletionOptions
508
+ ): AsyncGenerator<StreamChunk, void, unknown> {
509
+ const { systemPrompt, anthropicMessages } = this.prepareMessages(messages, options);
510
+
511
+ // Build request parameters
512
+ const params: Anthropic.MessageStreamParams = {
513
+ model: options?.model || this.defaultModel,
514
+ max_tokens: options?.maxTokens || this.defaultMaxTokens,
515
+ messages: anthropicMessages,
516
+ };
517
+
518
+ // Add thinking budget if requested
519
+ const thinkingBudget = options?.thinkingLevel
520
+ ? getAnthropicThinkingBudget(options.thinkingLevel)
521
+ : undefined;
522
+ if (thinkingBudget) {
523
+ (params as any).thinking = { type: 'enabled', budget_tokens: thinkingBudget };
524
+ }
525
+
526
+ // For OAuth tokens, include Claude Code emulation (tools + system prompt)
527
+ if (this.requiresClaudeCodeEmulation()) {
528
+ const callerTools = (options?.tools ?? []) as Anthropic.Tool[];
529
+ params.tools = [...(CLAUDE_CODE_TOOLS as unknown as Anthropic.Tool[]), ...callerTools];
530
+
531
+ const systemBlocks: Array<{ type: 'text'; text: string; cache_control?: { type: 'ephemeral' } }> = [
532
+ {
533
+ type: 'text',
534
+ text: CLAUDE_CODE_SYSTEM_PROMPT,
535
+ cache_control: { type: 'ephemeral' },
536
+ },
537
+ ];
538
+ if (systemPrompt) {
539
+ systemBlocks.push({
540
+ type: 'text',
541
+ text: systemPrompt,
542
+ cache_control: { type: 'ephemeral' },
543
+ });
544
+ }
545
+ params.system = systemBlocks as Anthropic.TextBlockParam[];
546
+ } else {
547
+ if (options?.tools && options.tools.length > 0) {
548
+ params.tools = options.tools as Anthropic.Tool[];
549
+ }
550
+ params.system = systemPrompt;
551
+ }
552
+
553
+ // Track Claude Code tool names to filter them from output (unless passthrough enabled)
554
+ const filterCCTools = !options?.passThroughAllTools;
555
+ const ccToolNames = new Set(CLAUDE_CODE_TOOLS.map(t => t.name));
556
+
557
+ // Set per-request billing header for OAuth mode
558
+ const requestOptions = this.requiresClaudeCodeEmulation()
559
+ ? { headers: { 'x-anthropic-billing-header': this.buildBillingHeader(anthropicMessages) } }
560
+ : undefined;
561
+
562
+ const stream = this.client.messages.stream(params, requestOptions);
563
+ let currentToolUse: { id: string; name: string; input: string } | null = null;
564
+ let inThinkingBlock = false;
565
+
566
+ for await (const event of stream) {
567
+ if (event.type === 'content_block_start') {
568
+ const block = event.content_block;
569
+ if (block.type === 'tool_use') {
570
+ // Start collecting tool use
571
+ currentToolUse = {
572
+ id: block.id,
573
+ name: block.name,
574
+ input: '',
575
+ };
576
+ inThinkingBlock = false;
577
+ } else if ((block as any).type === 'thinking') {
578
+ inThinkingBlock = true;
579
+ } else {
580
+ inThinkingBlock = false;
581
+ }
582
+ } else if (event.type === 'content_block_delta') {
583
+ const delta = event.delta;
584
+ if ('text' in delta) {
585
+ yield { type: 'text', content: delta.text };
586
+ } else if ((delta as any).thinking && inThinkingBlock) {
587
+ yield { type: 'thinking', content: (delta as any).thinking };
588
+ } else if ('partial_json' in delta && currentToolUse) {
589
+ // Accumulate tool input
590
+ currentToolUse.input += delta.partial_json;
591
+ }
592
+ } else if (event.type === 'content_block_stop' && currentToolUse) {
593
+ // Skip Claude Code emulation tools unless passthrough is enabled
594
+ if (filterCCTools && ccToolNames.has(currentToolUse.name)) {
595
+ currentToolUse = null;
596
+ } else {
597
+ // Tool use complete - parse and yield
598
+ try {
599
+ const input = currentToolUse.input ? JSON.parse(currentToolUse.input) : {};
600
+ yield {
601
+ type: 'tool_use',
602
+ toolUse: {
603
+ id: currentToolUse.id,
604
+ name: currentToolUse.name,
605
+ input,
606
+ },
607
+ };
608
+ } catch {
609
+ // Log but don't propagate as a stream error — skip this tool call
610
+ yield {
611
+ type: 'tool_use',
612
+ toolUse: {
613
+ id: currentToolUse.id,
614
+ name: currentToolUse.name,
615
+ input: {},
616
+ },
617
+ };
618
+ }
619
+ currentToolUse = null;
620
+ }
621
+ } else if (event.type === 'message_stop') {
622
+ const finalMessage = await stream.finalMessage();
623
+ yield {
624
+ type: 'done',
625
+ usage: {
626
+ inputTokens: finalMessage.usage.input_tokens,
627
+ outputTokens: finalMessage.usage.output_tokens,
628
+ },
629
+ };
630
+ }
631
+ }
632
+ }
633
+
634
+ private prepareMessages(
635
+ messages: ChatMessage[],
636
+ options?: CompletionOptions
637
+ ): {
638
+ systemPrompt: string | undefined;
639
+ anthropicMessages: Anthropic.MessageParam[];
640
+ } {
641
+ let systemPrompt = options?.systemPrompt;
642
+ const anthropicMessages: Anthropic.MessageParam[] = [];
643
+
644
+ for (const msg of messages) {
645
+ if (msg.role === 'system') {
646
+ // Anthropic uses system as a separate parameter
647
+ systemPrompt = systemPrompt ? `${systemPrompt}\n\n${msg.content}` : msg.content;
648
+ } else if (msg.content) {
649
+ // Skip messages with empty content — Anthropic API rejects them
650
+ anthropicMessages.push({
651
+ role: msg.role,
652
+ content: msg.content,
653
+ });
654
+ }
655
+ }
656
+
657
+ // Ensure message alternation (Anthropic requirement)
658
+ const fixed = this.fixMessageAlternation(anthropicMessages);
659
+
660
+ return { systemPrompt, anthropicMessages: fixed };
661
+ }
662
+
663
+ private fixMessageAlternation(
664
+ messages: Anthropic.MessageParam[]
665
+ ): Anthropic.MessageParam[] {
666
+ if (messages.length === 0) return messages;
667
+
668
+ const fixed: Anthropic.MessageParam[] = [];
669
+
670
+ for (const msg of messages) {
671
+ const lastRole = fixed.length > 0 ? fixed[fixed.length - 1].role : null;
672
+
673
+ if (lastRole === msg.role) {
674
+ // Merge consecutive messages of same role
675
+ const last = fixed[fixed.length - 1];
676
+ if (typeof last.content === 'string' && typeof msg.content === 'string') {
677
+ last.content = `${last.content}\n\n${msg.content}`;
678
+ }
679
+ } else {
680
+ fixed.push({ ...msg });
681
+ }
682
+ }
683
+
684
+ // Ensure first message is from user
685
+ if (fixed.length > 0 && fixed[0].role !== 'user') {
686
+ fixed.unshift({ role: 'user', content: '(Starting conversation)' });
687
+ }
688
+
689
+ return fixed;
690
+ }
691
+ }