@auxiora/providers 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/LICENSE +191 -0
  2. package/dist/anthropic.d.ts +82 -0
  3. package/dist/anthropic.d.ts.map +1 -0
  4. package/dist/anthropic.js +618 -0
  5. package/dist/anthropic.js.map +1 -0
  6. package/dist/claude-code-tools.d.ts +29 -0
  7. package/dist/claude-code-tools.d.ts.map +1 -0
  8. package/dist/claude-code-tools.js +221 -0
  9. package/dist/claude-code-tools.js.map +1 -0
  10. package/dist/claude-oauth.d.ts +86 -0
  11. package/dist/claude-oauth.d.ts.map +1 -0
  12. package/dist/claude-oauth.js +318 -0
  13. package/dist/claude-oauth.js.map +1 -0
  14. package/dist/cohere.d.ts +18 -0
  15. package/dist/cohere.d.ts.map +1 -0
  16. package/dist/cohere.js +163 -0
  17. package/dist/cohere.js.map +1 -0
  18. package/dist/deepseek.d.ts +18 -0
  19. package/dist/deepseek.d.ts.map +1 -0
  20. package/dist/deepseek.js +164 -0
  21. package/dist/deepseek.js.map +1 -0
  22. package/dist/factory.d.ts +19 -0
  23. package/dist/factory.d.ts.map +1 -0
  24. package/dist/factory.js +108 -0
  25. package/dist/factory.js.map +1 -0
  26. package/dist/google.d.ts +18 -0
  27. package/dist/google.d.ts.map +1 -0
  28. package/dist/google.js +141 -0
  29. package/dist/google.js.map +1 -0
  30. package/dist/groq.d.ts +18 -0
  31. package/dist/groq.d.ts.map +1 -0
  32. package/dist/groq.js +186 -0
  33. package/dist/groq.js.map +1 -0
  34. package/dist/index.d.ts +15 -0
  35. package/dist/index.d.ts.map +1 -0
  36. package/dist/index.js +14 -0
  37. package/dist/index.js.map +1 -0
  38. package/dist/ollama.d.ts +18 -0
  39. package/dist/ollama.d.ts.map +1 -0
  40. package/dist/ollama.js +141 -0
  41. package/dist/ollama.js.map +1 -0
  42. package/dist/openai-compatible.d.ts +20 -0
  43. package/dist/openai-compatible.d.ts.map +1 -0
  44. package/dist/openai-compatible.js +112 -0
  45. package/dist/openai-compatible.js.map +1 -0
  46. package/dist/openai.d.ts +20 -0
  47. package/dist/openai.d.ts.map +1 -0
  48. package/dist/openai.js +259 -0
  49. package/dist/openai.js.map +1 -0
  50. package/dist/replicate.d.ts +20 -0
  51. package/dist/replicate.d.ts.map +1 -0
  52. package/dist/replicate.js +186 -0
  53. package/dist/replicate.js.map +1 -0
  54. package/dist/thinking-levels.d.ts +16 -0
  55. package/dist/thinking-levels.d.ts.map +1 -0
  56. package/dist/thinking-levels.js +34 -0
  57. package/dist/thinking-levels.js.map +1 -0
  58. package/dist/types.d.ts +157 -0
  59. package/dist/types.d.ts.map +1 -0
  60. package/dist/types.js +2 -0
  61. package/dist/types.js.map +1 -0
  62. package/dist/xai.d.ts +18 -0
  63. package/dist/xai.d.ts.map +1 -0
  64. package/dist/xai.js +164 -0
  65. package/dist/xai.js.map +1 -0
  66. package/package.json +30 -0
  67. package/src/anthropic.ts +691 -0
  68. package/src/claude-code-tools.ts +233 -0
  69. package/src/claude-oauth.ts +410 -0
  70. package/src/cohere.ts +242 -0
  71. package/src/deepseek.ts +241 -0
  72. package/src/factory.ts +142 -0
  73. package/src/google.ts +176 -0
  74. package/src/groq.ts +263 -0
  75. package/src/index.ts +44 -0
  76. package/src/ollama.ts +194 -0
  77. package/src/openai-compatible.ts +154 -0
  78. package/src/openai.ts +307 -0
  79. package/src/replicate.ts +247 -0
  80. package/src/thinking-levels.ts +37 -0
  81. package/src/types.ts +171 -0
  82. package/src/xai.ts +241 -0
  83. package/tests/adapters.test.ts +185 -0
  84. package/tests/claude-oauth.test.ts +45 -0
  85. package/tests/new-providers.test.ts +732 -0
  86. package/tests/thinking-levels.test.ts +82 -0
  87. package/tsconfig.json +8 -0
  88. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,154 @@
1
+ import OpenAI from 'openai';
2
+ import type {
3
+ Provider,
4
+ ProviderMetadata,
5
+ ChatMessage,
6
+ CompletionOptions,
7
+ CompletionResult,
8
+ StreamChunk,
9
+ } from './types.js';
10
+
11
+ const DEFAULT_MAX_TOKENS = 4096;
12
+
13
+ export interface OpenAICompatibleProviderOptions {
14
+ baseUrl: string;
15
+ apiKey?: string;
16
+ model: string;
17
+ maxTokens?: number;
18
+ name?: string;
19
+ }
20
+
21
+ export class OpenAICompatibleProvider implements Provider {
22
+ name: string;
23
+ metadata: ProviderMetadata;
24
+ private client: OpenAI;
25
+ private defaultModel: string;
26
+ private defaultMaxTokens: number;
27
+
28
+ constructor(options: OpenAICompatibleProviderOptions) {
29
+ this.name = options.name || 'openai-compatible';
30
+ this.defaultModel = options.model;
31
+ this.defaultMaxTokens = options.maxTokens || DEFAULT_MAX_TOKENS;
32
+
33
+ this.client = new OpenAI({
34
+ apiKey: options.apiKey || 'not-needed',
35
+ baseURL: options.baseUrl,
36
+ });
37
+
38
+ this.metadata = {
39
+ name: this.name,
40
+ displayName: options.name ? `${options.name} (OpenAI-compatible)` : 'OpenAI-compatible',
41
+ models: {
42
+ [this.defaultModel]: {
43
+ maxContextTokens: 128000,
44
+ supportsVision: false,
45
+ supportsTools: true,
46
+ supportsStreaming: true,
47
+ supportsImageGen: false,
48
+ costPer1kInput: 0,
49
+ costPer1kOutput: 0,
50
+ strengths: ['code', 'reasoning'],
51
+ isLocal: true,
52
+ },
53
+ },
54
+ isAvailable: async () => {
55
+ try {
56
+ await this.client.models.list();
57
+ return true;
58
+ } catch {
59
+ return false;
60
+ }
61
+ },
62
+ };
63
+ }
64
+
65
+ async complete(
66
+ messages: ChatMessage[],
67
+ options?: CompletionOptions,
68
+ ): Promise<CompletionResult> {
69
+ const openaiMessages = this.prepareMessages(messages, options);
70
+
71
+ const response = await this.client.chat.completions.create({
72
+ model: options?.model || this.defaultModel,
73
+ max_tokens: options?.maxTokens || this.defaultMaxTokens,
74
+ messages: openaiMessages,
75
+ temperature: options?.temperature,
76
+ });
77
+
78
+ const choice = response.choices[0];
79
+ const content = choice?.message?.content || '';
80
+
81
+ return {
82
+ content,
83
+ usage: {
84
+ inputTokens: response.usage?.prompt_tokens || 0,
85
+ outputTokens: response.usage?.completion_tokens || 0,
86
+ },
87
+ model: response.model,
88
+ finishReason: choice?.finish_reason || 'unknown',
89
+ };
90
+ }
91
+
92
+ async *stream(
93
+ messages: ChatMessage[],
94
+ options?: CompletionOptions,
95
+ ): AsyncGenerator<StreamChunk, void, unknown> {
96
+ const openaiMessages = this.prepareMessages(messages, options);
97
+
98
+ try {
99
+ const stream = await this.client.chat.completions.create({
100
+ model: options?.model || this.defaultModel,
101
+ max_tokens: options?.maxTokens || this.defaultMaxTokens,
102
+ messages: openaiMessages,
103
+ temperature: options?.temperature,
104
+ stream: true,
105
+ stream_options: { include_usage: true },
106
+ });
107
+
108
+ let inputTokens = 0;
109
+ let outputTokens = 0;
110
+
111
+ for await (const chunk of stream) {
112
+ const delta = chunk.choices[0]?.delta;
113
+
114
+ if (delta?.content) {
115
+ yield { type: 'text', content: delta.content };
116
+ }
117
+
118
+ if (chunk.usage) {
119
+ inputTokens = chunk.usage.prompt_tokens || 0;
120
+ outputTokens = chunk.usage.completion_tokens || 0;
121
+ }
122
+
123
+ if (chunk.choices[0]?.finish_reason) {
124
+ yield {
125
+ type: 'done',
126
+ usage: { inputTokens, outputTokens },
127
+ };
128
+ }
129
+ }
130
+ } catch (error) {
131
+ yield {
132
+ type: 'error',
133
+ error: error instanceof Error ? error.message : 'Unknown error',
134
+ };
135
+ }
136
+ }
137
+
138
+ private prepareMessages(
139
+ messages: ChatMessage[],
140
+ options?: CompletionOptions,
141
+ ): OpenAI.ChatCompletionMessageParam[] {
142
+ const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [];
143
+
144
+ if (options?.systemPrompt) {
145
+ openaiMessages.push({ role: 'system', content: options.systemPrompt });
146
+ }
147
+
148
+ for (const msg of messages) {
149
+ openaiMessages.push({ role: msg.role, content: msg.content });
150
+ }
151
+
152
+ return openaiMessages;
153
+ }
154
+ }
package/src/openai.ts ADDED
@@ -0,0 +1,307 @@
1
+ import OpenAI from 'openai';
2
+ import type {
3
+ Provider,
4
+ ProviderMetadata,
5
+ ChatMessage,
6
+ CompletionOptions,
7
+ CompletionResult,
8
+ StreamChunk,
9
+ ToolDefinition,
10
+ } from './types.js';
11
+ import { getOpenAIReasoningEffort, isOpenAIReasoningModel } from './thinking-levels.js';
12
+
13
+ const DEFAULT_MODEL = 'gpt-5.2';
14
+ const DEFAULT_MAX_TOKENS = 4096;
15
+
16
+ export interface OpenAIProviderOptions {
17
+ apiKey: string;
18
+ model?: string;
19
+ maxTokens?: number;
20
+ baseURL?: string;
21
+ }
22
+
23
+ export class OpenAIProvider implements Provider {
24
+ name = 'openai';
25
+ metadata: ProviderMetadata = {
26
+ name: 'openai',
27
+ displayName: 'OpenAI GPT',
28
+ models: {
29
+ 'gpt-5.2': {
30
+ maxContextTokens: 1048576,
31
+ supportsVision: true,
32
+ supportsTools: true,
33
+ supportsStreaming: true,
34
+ supportsImageGen: false,
35
+ costPer1kInput: 0.003,
36
+ costPer1kOutput: 0.012,
37
+ strengths: ['reasoning', 'code', 'vision', 'creative', 'agentic'],
38
+ isLocal: false,
39
+ },
40
+ 'gpt-5.2-pro': {
41
+ maxContextTokens: 1048576,
42
+ supportsVision: true,
43
+ supportsTools: true,
44
+ supportsStreaming: true,
45
+ supportsImageGen: false,
46
+ costPer1kInput: 0.01,
47
+ costPer1kOutput: 0.04,
48
+ strengths: ['reasoning', 'code', 'precision', 'agentic'],
49
+ isLocal: false,
50
+ },
51
+ 'gpt-5': {
52
+ maxContextTokens: 1048576,
53
+ supportsVision: true,
54
+ supportsTools: true,
55
+ supportsStreaming: true,
56
+ supportsImageGen: false,
57
+ costPer1kInput: 0.0025,
58
+ costPer1kOutput: 0.01,
59
+ strengths: ['reasoning', 'code', 'agentic'],
60
+ isLocal: false,
61
+ },
62
+ 'gpt-5-mini': {
63
+ maxContextTokens: 1048576,
64
+ supportsVision: true,
65
+ supportsTools: true,
66
+ supportsStreaming: true,
67
+ supportsImageGen: false,
68
+ costPer1kInput: 0.0004,
69
+ costPer1kOutput: 0.0016,
70
+ strengths: ['fast', 'code', 'cost-efficient'],
71
+ isLocal: false,
72
+ },
73
+ 'gpt-5-nano': {
74
+ maxContextTokens: 1048576,
75
+ supportsVision: true,
76
+ supportsTools: true,
77
+ supportsStreaming: true,
78
+ supportsImageGen: false,
79
+ costPer1kInput: 0.0001,
80
+ costPer1kOutput: 0.0004,
81
+ strengths: ['fast', 'cost-efficient'],
82
+ isLocal: false,
83
+ },
84
+ 'gpt-4.1': {
85
+ maxContextTokens: 1048576,
86
+ supportsVision: true,
87
+ supportsTools: true,
88
+ supportsStreaming: true,
89
+ supportsImageGen: false,
90
+ costPer1kInput: 0.002,
91
+ costPer1kOutput: 0.008,
92
+ strengths: ['code', 'instruction-following', 'long-context'],
93
+ isLocal: false,
94
+ },
95
+ },
96
+ isAvailable: async () => {
97
+ try {
98
+ return this.client !== undefined;
99
+ } catch {
100
+ return false;
101
+ }
102
+ },
103
+ };
104
+ private client: OpenAI;
105
+ private defaultModel: string;
106
+ private defaultMaxTokens: number;
107
+
108
+ constructor(options: OpenAIProviderOptions) {
109
+ this.client = new OpenAI({
110
+ apiKey: options.apiKey,
111
+ baseURL: options.baseURL,
112
+ });
113
+ this.defaultModel = options.model || DEFAULT_MODEL;
114
+ this.defaultMaxTokens = options.maxTokens || DEFAULT_MAX_TOKENS;
115
+ }
116
+
117
+ async complete(
118
+ messages: ChatMessage[],
119
+ options?: CompletionOptions
120
+ ): Promise<CompletionResult> {
121
+ const openaiMessages = this.prepareMessages(messages, options);
122
+
123
+ const model = options?.model || this.defaultModel;
124
+ const createParams: OpenAI.ChatCompletionCreateParams = {
125
+ model,
126
+ max_tokens: options?.maxTokens || this.defaultMaxTokens,
127
+ messages: openaiMessages,
128
+ };
129
+
130
+ // Add tools if provided
131
+ if (options?.tools && options.tools.length > 0) {
132
+ createParams.tools = this.transformTools(options.tools);
133
+ }
134
+
135
+ // Add reasoning_effort for o-series models
136
+ if (options?.thinkingLevel && isOpenAIReasoningModel(model)) {
137
+ const effort = getOpenAIReasoningEffort(options.thinkingLevel);
138
+ if (effort) {
139
+ (createParams as any).reasoning_effort = effort;
140
+ }
141
+ }
142
+
143
+ const response = await this.client.chat.completions.create(createParams);
144
+
145
+ const choice = response.choices[0];
146
+ const message = choice?.message;
147
+ const content = message?.content || '';
148
+
149
+ // Handle tool calls in the response
150
+ const toolCalls = message?.tool_calls;
151
+ if (toolCalls && toolCalls.length > 0) {
152
+ return {
153
+ content,
154
+ toolUse: toolCalls.map((tc) => ({
155
+ id: tc.id,
156
+ name: tc.function.name,
157
+ input: JSON.parse(tc.function.arguments || '{}'),
158
+ })),
159
+ usage: {
160
+ inputTokens: response.usage?.prompt_tokens || 0,
161
+ outputTokens: response.usage?.completion_tokens || 0,
162
+ },
163
+ model: response.model,
164
+ finishReason: 'tool_use',
165
+ };
166
+ }
167
+
168
+ return {
169
+ content,
170
+ usage: {
171
+ inputTokens: response.usage?.prompt_tokens || 0,
172
+ outputTokens: response.usage?.completion_tokens || 0,
173
+ },
174
+ model: response.model,
175
+ finishReason: choice?.finish_reason || 'unknown',
176
+ };
177
+ }
178
+
179
+ async *stream(
180
+ messages: ChatMessage[],
181
+ options?: CompletionOptions
182
+ ): AsyncGenerator<StreamChunk, void, unknown> {
183
+ const openaiMessages = this.prepareMessages(messages, options);
184
+
185
+ try {
186
+ const model = options?.model || this.defaultModel;
187
+ const createParams: OpenAI.ChatCompletionCreateParams = {
188
+ model,
189
+ max_tokens: options?.maxTokens || this.defaultMaxTokens,
190
+ messages: openaiMessages,
191
+ stream: true,
192
+ stream_options: { include_usage: true },
193
+ };
194
+
195
+ // Add tools if provided
196
+ if (options?.tools && options.tools.length > 0) {
197
+ createParams.tools = this.transformTools(options.tools);
198
+ }
199
+
200
+ // Add reasoning_effort for o-series models
201
+ if (options?.thinkingLevel && isOpenAIReasoningModel(model)) {
202
+ const effort = getOpenAIReasoningEffort(options.thinkingLevel);
203
+ if (effort) {
204
+ (createParams as any).reasoning_effort = effort;
205
+ }
206
+ }
207
+
208
+ const stream = await this.client.chat.completions.create(createParams);
209
+
210
+ let inputTokens = 0;
211
+ let outputTokens = 0;
212
+
213
+ // Track streaming tool calls: index -> { id, name, arguments }
214
+ const toolCallAccumulators = new Map<number, { id: string; name: string; arguments: string }>();
215
+
216
+ for await (const chunk of stream) {
217
+ const choice = chunk.choices[0];
218
+ const delta = choice?.delta;
219
+
220
+ if (delta?.content) {
221
+ yield { type: 'text', content: delta.content };
222
+ }
223
+
224
+ // Handle streaming tool_calls deltas
225
+ if (delta?.tool_calls) {
226
+ for (const tc of delta.tool_calls) {
227
+ const idx = tc.index;
228
+ if (!toolCallAccumulators.has(idx)) {
229
+ toolCallAccumulators.set(idx, { id: '', name: '', arguments: '' });
230
+ }
231
+ const acc = toolCallAccumulators.get(idx)!;
232
+ if (tc.id) acc.id = tc.id;
233
+ if (tc.function?.name) acc.name = tc.function.name;
234
+ if (tc.function?.arguments) acc.arguments += tc.function.arguments;
235
+ }
236
+ }
237
+
238
+ if (chunk.usage) {
239
+ inputTokens = chunk.usage.prompt_tokens || 0;
240
+ outputTokens = chunk.usage.completion_tokens || 0;
241
+ }
242
+
243
+ if (choice?.finish_reason) {
244
+ // Yield accumulated tool calls before done
245
+ if (choice.finish_reason === 'tool_calls' && toolCallAccumulators.size > 0) {
246
+ for (const [, acc] of toolCallAccumulators) {
247
+ yield {
248
+ type: 'tool_use',
249
+ toolUse: {
250
+ id: acc.id,
251
+ name: acc.name,
252
+ input: JSON.parse(acc.arguments || '{}'),
253
+ },
254
+ };
255
+ }
256
+ }
257
+
258
+ yield {
259
+ type: 'done',
260
+ usage: { inputTokens, outputTokens },
261
+ };
262
+ }
263
+ }
264
+ } catch (error) {
265
+ yield {
266
+ type: 'error',
267
+ error: error instanceof Error ? error.message : 'Unknown error',
268
+ };
269
+ }
270
+ }
271
+
272
+ private transformTools(tools: ToolDefinition[]): OpenAI.ChatCompletionTool[] {
273
+ return tools.map((tool) => ({
274
+ type: 'function' as const,
275
+ function: {
276
+ name: tool.name,
277
+ description: tool.description,
278
+ parameters: tool.input_schema as OpenAI.FunctionParameters,
279
+ },
280
+ }));
281
+ }
282
+
283
+ private prepareMessages(
284
+ messages: ChatMessage[],
285
+ options?: CompletionOptions
286
+ ): OpenAI.ChatCompletionMessageParam[] {
287
+ const openaiMessages: OpenAI.ChatCompletionMessageParam[] = [];
288
+
289
+ // Add system prompt if provided
290
+ if (options?.systemPrompt) {
291
+ openaiMessages.push({
292
+ role: 'system',
293
+ content: options.systemPrompt,
294
+ });
295
+ }
296
+
297
+ // Convert messages
298
+ for (const msg of messages) {
299
+ openaiMessages.push({
300
+ role: msg.role,
301
+ content: msg.content,
302
+ });
303
+ }
304
+
305
+ return openaiMessages;
306
+ }
307
+ }
@@ -0,0 +1,247 @@
1
+ import type {
2
+ Provider,
3
+ ProviderMetadata,
4
+ ChatMessage,
5
+ CompletionOptions,
6
+ CompletionResult,
7
+ StreamChunk,
8
+ } from './types.js';
9
+
10
+ const DEFAULT_MODEL = 'meta/meta-llama-3-70b-instruct';
11
+ const DEFAULT_POLL_INTERVAL = 1000;
12
+ const BASE_URL = 'https://api.replicate.com/v1';
13
+
14
+ export interface ReplicateProviderOptions {
15
+ apiToken: string;
16
+ model?: string;
17
+ pollInterval?: number;
18
+ }
19
+
20
+ interface ReplicatePrediction {
21
+ id: string;
22
+ status: 'starting' | 'processing' | 'succeeded' | 'failed' | 'canceled';
23
+ output?: string[] | string;
24
+ error?: string;
25
+ metrics?: {
26
+ predict_time?: number;
27
+ };
28
+ }
29
+
30
+ export class ReplicateProvider implements Provider {
31
+ name = 'replicate';
32
+ metadata: ProviderMetadata = {
33
+ name: 'replicate',
34
+ displayName: 'Replicate',
35
+ models: {
36
+ 'meta/meta-llama-3-70b-instruct': {
37
+ maxContextTokens: 8192,
38
+ supportsVision: false,
39
+ supportsTools: false,
40
+ supportsStreaming: false,
41
+ supportsImageGen: false,
42
+ costPer1kInput: 0.00065,
43
+ costPer1kOutput: 0.00275,
44
+ strengths: ['reasoning', 'code'],
45
+ isLocal: false,
46
+ },
47
+ 'stability-ai/sdxl': {
48
+ maxContextTokens: 0,
49
+ supportsVision: false,
50
+ supportsTools: false,
51
+ supportsStreaming: false,
52
+ supportsImageGen: true,
53
+ costPer1kInput: 0,
54
+ costPer1kOutput: 0,
55
+ strengths: ['image-generation'],
56
+ isLocal: false,
57
+ },
58
+ },
59
+ isAvailable: async () => {
60
+ try {
61
+ const response = await fetch(`${BASE_URL}/models`, {
62
+ headers: { Authorization: `Bearer ${this.apiToken}` },
63
+ });
64
+ return response.ok;
65
+ } catch {
66
+ return false;
67
+ }
68
+ },
69
+ };
70
+
71
+ private apiToken: string;
72
+ private defaultModel: string;
73
+ private pollInterval: number;
74
+
75
+ constructor(options: ReplicateProviderOptions) {
76
+ this.apiToken = options.apiToken;
77
+ this.defaultModel = options.model || DEFAULT_MODEL;
78
+ this.pollInterval = options.pollInterval || DEFAULT_POLL_INTERVAL;
79
+ }
80
+
81
+ async complete(
82
+ messages: ChatMessage[],
83
+ options?: CompletionOptions,
84
+ ): Promise<CompletionResult> {
85
+ const model = options?.model || this.defaultModel;
86
+ const prompt = this.formatPrompt(messages, options);
87
+
88
+ const prediction = await this.createPrediction(model, {
89
+ prompt,
90
+ max_tokens: options?.maxTokens || 4096,
91
+ temperature: options?.temperature,
92
+ });
93
+
94
+ const result = await this.pollPrediction(prediction.id);
95
+
96
+ if (result.status === 'failed') {
97
+ throw new Error(`Replicate prediction failed: ${result.error || 'unknown error'}`);
98
+ }
99
+
100
+ const output = Array.isArray(result.output)
101
+ ? result.output.join('')
102
+ : result.output || '';
103
+
104
+ return {
105
+ content: output,
106
+ usage: {
107
+ inputTokens: 0,
108
+ outputTokens: 0,
109
+ },
110
+ model,
111
+ finishReason: 'stop',
112
+ };
113
+ }
114
+
115
+ async *stream(
116
+ messages: ChatMessage[],
117
+ options?: CompletionOptions,
118
+ ): AsyncGenerator<StreamChunk, void, unknown> {
119
+ try {
120
+ const model = options?.model || this.defaultModel;
121
+ const prompt = this.formatPrompt(messages, options);
122
+
123
+ const response = await fetch(`${BASE_URL}/models/${model}/predictions`, {
124
+ method: 'POST',
125
+ headers: {
126
+ 'Content-Type': 'application/json',
127
+ Authorization: `Bearer ${this.apiToken}`,
128
+ Prefer: 'respond-async',
129
+ },
130
+ body: JSON.stringify({
131
+ input: {
132
+ prompt,
133
+ max_tokens: options?.maxTokens || 4096,
134
+ temperature: options?.temperature,
135
+ },
136
+ stream: true,
137
+ }),
138
+ });
139
+
140
+ if (!response.ok) {
141
+ throw new Error(`Replicate API error: ${response.status} ${response.statusText}`);
142
+ }
143
+
144
+ const prediction = (await response.json()) as ReplicatePrediction;
145
+ const streamUrl = `${BASE_URL}/predictions/${prediction.id}/stream`;
146
+
147
+ const streamResponse = await fetch(streamUrl, {
148
+ headers: { Authorization: `Bearer ${this.apiToken}`, Accept: 'text/event-stream' },
149
+ });
150
+
151
+ if (!streamResponse.ok || !streamResponse.body) {
152
+ const result = await this.pollPrediction(prediction.id);
153
+ const output = Array.isArray(result.output)
154
+ ? result.output.join('')
155
+ : result.output || '';
156
+ yield { type: 'text', content: output };
157
+ yield { type: 'done', usage: { inputTokens: 0, outputTokens: 0 } };
158
+ return;
159
+ }
160
+
161
+ const reader = streamResponse.body.getReader();
162
+ const decoder = new TextDecoder();
163
+ let buffer = '';
164
+
165
+ while (true) {
166
+ const { done, value } = await reader.read();
167
+ if (done) break;
168
+
169
+ buffer += decoder.decode(value, { stream: true });
170
+ const lines = buffer.split('\n');
171
+ buffer = lines.pop() || '';
172
+
173
+ for (const line of lines) {
174
+ const trimmed = line.trim();
175
+ if (!trimmed || !trimmed.startsWith('data: ')) continue;
176
+ const data = trimmed.slice(6);
177
+ if (data === '[DONE]') continue;
178
+ yield { type: 'text', content: data };
179
+ }
180
+ }
181
+
182
+ yield { type: 'done', usage: { inputTokens: 0, outputTokens: 0 } };
183
+ } catch (error) {
184
+ yield {
185
+ type: 'error',
186
+ error: error instanceof Error ? error.message : 'Unknown error',
187
+ };
188
+ }
189
+ }
190
+
191
+ private async createPrediction(
192
+ model: string,
193
+ input: Record<string, unknown>,
194
+ ): Promise<ReplicatePrediction> {
195
+ const response = await fetch(`${BASE_URL}/models/${model}/predictions`, {
196
+ method: 'POST',
197
+ headers: {
198
+ 'Content-Type': 'application/json',
199
+ Authorization: `Bearer ${this.apiToken}`,
200
+ Prefer: 'respond-async',
201
+ },
202
+ body: JSON.stringify({ input }),
203
+ });
204
+
205
+ if (!response.ok) {
206
+ throw new Error(`Replicate API error: ${response.status} ${response.statusText}`);
207
+ }
208
+
209
+ return (await response.json()) as ReplicatePrediction;
210
+ }
211
+
212
+ private async pollPrediction(id: string): Promise<ReplicatePrediction> {
213
+ while (true) {
214
+ const response = await fetch(`${BASE_URL}/predictions/${id}`, {
215
+ headers: { Authorization: `Bearer ${this.apiToken}` },
216
+ });
217
+
218
+ if (!response.ok) {
219
+ throw new Error(`Replicate API error: ${response.status} ${response.statusText}`);
220
+ }
221
+
222
+ const prediction = (await response.json()) as ReplicatePrediction;
223
+
224
+ if (prediction.status === 'succeeded' || prediction.status === 'failed' || prediction.status === 'canceled') {
225
+ return prediction;
226
+ }
227
+
228
+ await new Promise((resolve) => setTimeout(resolve, this.pollInterval));
229
+ }
230
+ }
231
+
232
+ private formatPrompt(messages: ChatMessage[], options?: CompletionOptions): string {
233
+ const parts: string[] = [];
234
+
235
+ if (options?.systemPrompt) {
236
+ parts.push(`<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n${options.systemPrompt}<|eot_id|>`);
237
+ }
238
+
239
+ for (const msg of messages) {
240
+ parts.push(`<|start_header_id|>${msg.role}<|end_header_id|>\n\n${msg.content}<|eot_id|>`);
241
+ }
242
+
243
+ parts.push('<|start_header_id|>assistant<|end_header_id|>\n\n');
244
+
245
+ return parts.join('');
246
+ }
247
+ }