indoxrouter 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indoxrouter-0.1.3.dist-info/METADATA +188 -0
- indoxrouter-0.1.3.dist-info/RECORD +4 -0
- indoxrouter-0.1.3.dist-info/top_level.txt +1 -0
- indoxRouter/__init__.py +0 -83
- indoxRouter/client.py +0 -632
- indoxRouter/client_resourses/__init__.py +0 -20
- indoxRouter/client_resourses/base.py +0 -67
- indoxRouter/client_resourses/chat.py +0 -144
- indoxRouter/client_resourses/completion.py +0 -138
- indoxRouter/client_resourses/embedding.py +0 -83
- indoxRouter/client_resourses/image.py +0 -116
- indoxRouter/client_resourses/models.py +0 -114
- indoxRouter/config.py +0 -151
- indoxRouter/constants/__init__.py +0 -81
- indoxRouter/exceptions/__init__.py +0 -70
- indoxRouter/models/__init__.py +0 -111
- indoxRouter/providers/__init__.py +0 -108
- indoxRouter/providers/ai21labs.json +0 -128
- indoxRouter/providers/base_provider.py +0 -101
- indoxRouter/providers/claude.json +0 -164
- indoxRouter/providers/cohere.json +0 -116
- indoxRouter/providers/databricks.json +0 -110
- indoxRouter/providers/deepseek.json +0 -110
- indoxRouter/providers/google.json +0 -128
- indoxRouter/providers/meta.json +0 -128
- indoxRouter/providers/mistral.json +0 -146
- indoxRouter/providers/nvidia.json +0 -110
- indoxRouter/providers/openai.json +0 -308
- indoxRouter/providers/openai.py +0 -521
- indoxRouter/providers/qwen.json +0 -110
- indoxRouter/utils/__init__.py +0 -240
- indoxrouter-0.1.2.dist-info/LICENSE +0 -21
- indoxrouter-0.1.2.dist-info/METADATA +0 -259
- indoxrouter-0.1.2.dist-info/RECORD +0 -33
- indoxrouter-0.1.2.dist-info/top_level.txt +0 -1
- {indoxrouter-0.1.2.dist-info → indoxrouter-0.1.3.dist-info}/WHEEL +0 -0
indoxRouter/providers/openai.py
DELETED
@@ -1,521 +0,0 @@
|
|
1
|
-
"""
|
2
|
-
OpenAI provider for indoxRouter.
|
3
|
-
"""
|
4
|
-
|
5
|
-
import os
|
6
|
-
from typing import Dict, List, Any, Optional, Union
|
7
|
-
|
8
|
-
import openai
|
9
|
-
from openai import OpenAI
|
10
|
-
from datetime import datetime
|
11
|
-
from .base_provider import BaseProvider
|
12
|
-
from ..exceptions import AuthenticationError, RequestError, RateLimitError
|
13
|
-
from ..utils import calculate_cost, get_model_info
|
14
|
-
from ..models import ChatMessage
|
15
|
-
|
16
|
-
|
17
|
-
class Provider(BaseProvider):
|
18
|
-
"""OpenAI provider implementation."""
|
19
|
-
|
20
|
-
def __init__(self, api_key: str, model_name: str):
|
21
|
-
"""
|
22
|
-
Initialize the OpenAI provider.
|
23
|
-
|
24
|
-
Args:
|
25
|
-
api_key: The API key for OpenAI.
|
26
|
-
model_name: The name of the model to use.
|
27
|
-
"""
|
28
|
-
super().__init__(api_key, model_name)
|
29
|
-
self.client = OpenAI(api_key=api_key)
|
30
|
-
self.model_info = get_model_info("openai", model_name)
|
31
|
-
|
32
|
-
def chat(self, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
|
33
|
-
"""
|
34
|
-
Send a chat request to OpenAI.
|
35
|
-
|
36
|
-
Args:
|
37
|
-
messages: A list of message dictionaries with 'role' and 'content' keys.
|
38
|
-
**kwargs: Additional parameters to pass to the OpenAI API.
|
39
|
-
|
40
|
-
Returns:
|
41
|
-
A dictionary containing the response from OpenAI.
|
42
|
-
If stream=True and return_generator=True, returns a generator that yields chunks of the response.
|
43
|
-
|
44
|
-
Raises:
|
45
|
-
AuthenticationError: If the API key is invalid.
|
46
|
-
RequestError: If the request fails.
|
47
|
-
RateLimitError: If the rate limit is exceeded.
|
48
|
-
"""
|
49
|
-
try:
|
50
|
-
# Check if streaming is requested
|
51
|
-
stream = kwargs.pop("stream", False)
|
52
|
-
# Check if we should return a generator
|
53
|
-
return_generator = kwargs.pop("return_generator", False)
|
54
|
-
|
55
|
-
# If streaming is requested, we need to handle it differently
|
56
|
-
if stream:
|
57
|
-
# Remove stream from kwargs to avoid passing it twice
|
58
|
-
openai_messages = []
|
59
|
-
for msg in messages:
|
60
|
-
if isinstance(msg, ChatMessage):
|
61
|
-
openai_messages.append(
|
62
|
-
{"role": msg.role, "content": msg.content}
|
63
|
-
)
|
64
|
-
else:
|
65
|
-
openai_messages.append(msg)
|
66
|
-
|
67
|
-
# Create the streaming response
|
68
|
-
stream_response = self.client.chat.completions.create(
|
69
|
-
model=self.model_name,
|
70
|
-
messages=openai_messages,
|
71
|
-
stream=True,
|
72
|
-
**kwargs,
|
73
|
-
)
|
74
|
-
|
75
|
-
# If return_generator is True, return a generator that yields chunks
|
76
|
-
if return_generator:
|
77
|
-
# Create a streaming generator with usage tracking
|
78
|
-
return StreamingGenerator(
|
79
|
-
stream_response=stream_response,
|
80
|
-
model_name=self.model_name,
|
81
|
-
messages=messages,
|
82
|
-
)
|
83
|
-
|
84
|
-
# Otherwise, collect the full response content from the stream
|
85
|
-
content = ""
|
86
|
-
for chunk in stream_response:
|
87
|
-
if hasattr(chunk, "choices") and len(chunk.choices) > 0:
|
88
|
-
delta = chunk.choices[0].delta
|
89
|
-
if hasattr(delta, "content") and delta.content is not None:
|
90
|
-
content += delta.content
|
91
|
-
|
92
|
-
# For streaming responses, we don't have usage information directly
|
93
|
-
# We'll provide a minimal response with the content
|
94
|
-
return {
|
95
|
-
"data": content,
|
96
|
-
"model": self.model_name,
|
97
|
-
"provider": "openai",
|
98
|
-
"success": True,
|
99
|
-
"message": "Successfully completed streaming chat request",
|
100
|
-
"cost": 0.0, # We don't have cost information for streaming responses
|
101
|
-
"timestamp": datetime.now().isoformat(),
|
102
|
-
"usage": {
|
103
|
-
"tokens_prompt": 0, # We don't have token information for streaming responses
|
104
|
-
"tokens_completion": 0,
|
105
|
-
"tokens_total": 0,
|
106
|
-
},
|
107
|
-
"finish_reason": "stop", # Default finish reason
|
108
|
-
"raw_response": None, # We don't have the raw response for streaming
|
109
|
-
}
|
110
|
-
|
111
|
-
# Handle non-streaming responses as before
|
112
|
-
openai_messages = []
|
113
|
-
for msg in messages:
|
114
|
-
if isinstance(msg, ChatMessage):
|
115
|
-
openai_messages.append({"role": msg.role, "content": msg.content})
|
116
|
-
else:
|
117
|
-
openai_messages.append(msg)
|
118
|
-
response = self.client.chat.completions.create(
|
119
|
-
model=self.model_name,
|
120
|
-
messages=openai_messages,
|
121
|
-
**kwargs,
|
122
|
-
)
|
123
|
-
# Extract the response content
|
124
|
-
content = response.choices[0].message.content
|
125
|
-
|
126
|
-
# Extract usage information from the response
|
127
|
-
prompt_tokens = (
|
128
|
-
response.usage.prompt_tokens
|
129
|
-
if hasattr(response.usage, "prompt_tokens")
|
130
|
-
else 0
|
131
|
-
)
|
132
|
-
completion_tokens = (
|
133
|
-
response.usage.completion_tokens
|
134
|
-
if hasattr(response.usage, "completion_tokens")
|
135
|
-
else 0
|
136
|
-
)
|
137
|
-
total_tokens = (
|
138
|
-
response.usage.total_tokens
|
139
|
-
if hasattr(response.usage, "total_tokens")
|
140
|
-
else 0
|
141
|
-
)
|
142
|
-
|
143
|
-
cost = calculate_cost(
|
144
|
-
f"openai/{self.model_name}",
|
145
|
-
input_tokens=prompt_tokens,
|
146
|
-
output_tokens=completion_tokens,
|
147
|
-
)
|
148
|
-
|
149
|
-
# Create a response dictionary with the extracted information
|
150
|
-
return {
|
151
|
-
"data": content,
|
152
|
-
"model": self.model_name,
|
153
|
-
"provider": "openai",
|
154
|
-
"success": True,
|
155
|
-
"message": "Successfully completed chat request",
|
156
|
-
"cost": cost,
|
157
|
-
"timestamp": datetime.now().isoformat(),
|
158
|
-
# Add usage as dict with consistent field names
|
159
|
-
"usage": {
|
160
|
-
"tokens_prompt": prompt_tokens,
|
161
|
-
"tokens_completion": completion_tokens,
|
162
|
-
"tokens_total": total_tokens,
|
163
|
-
},
|
164
|
-
# Optional fields
|
165
|
-
"finish_reason": response.choices[0].finish_reason,
|
166
|
-
"raw_response": response.model_dump(),
|
167
|
-
}
|
168
|
-
|
169
|
-
except openai.AuthenticationError:
|
170
|
-
raise AuthenticationError("Invalid OpenAI API key.")
|
171
|
-
except openai.RateLimitError:
|
172
|
-
raise RateLimitError("OpenAI rate limit exceeded.")
|
173
|
-
except Exception as e:
|
174
|
-
raise RequestError(f"OpenAI request failed: {str(e)}")
|
175
|
-
|
176
|
-
def complete(self, prompt: str, **kwargs) -> Dict[str, Any]:
|
177
|
-
"""
|
178
|
-
Send a completion request to OpenAI.
|
179
|
-
|
180
|
-
Args:
|
181
|
-
prompt: The prompt to complete.
|
182
|
-
**kwargs: Additional parameters to pass to the OpenAI API.
|
183
|
-
|
184
|
-
Returns:
|
185
|
-
A dictionary containing the response from OpenAI.
|
186
|
-
If stream=True and return_generator=True, returns a generator that yields chunks of the response.
|
187
|
-
|
188
|
-
Raises:
|
189
|
-
AuthenticationError: If the API key is invalid.
|
190
|
-
RequestError: If the request fails.
|
191
|
-
RateLimitError: If the rate limit is exceeded.
|
192
|
-
"""
|
193
|
-
# Check if streaming is requested
|
194
|
-
stream = kwargs.pop("stream", False)
|
195
|
-
return_generator = kwargs.pop("return_generator", False)
|
196
|
-
|
197
|
-
# For OpenAI, we'll use the chat API for completions as well
|
198
|
-
messages = [{"role": "user", "content": prompt}]
|
199
|
-
|
200
|
-
# If streaming is requested, handle it through the chat method
|
201
|
-
if stream:
|
202
|
-
return self.chat(
|
203
|
-
messages, stream=True, return_generator=return_generator, **kwargs
|
204
|
-
)
|
205
|
-
|
206
|
-
# Otherwise, use the regular chat method
|
207
|
-
return self.chat(messages, **kwargs)
|
208
|
-
|
209
|
-
def embed(self, text: Union[str, List[str]], **kwargs) -> Dict[str, Any]:
|
210
|
-
"""
|
211
|
-
Send an embedding request to OpenAI.
|
212
|
-
|
213
|
-
Args:
|
214
|
-
text: The text to embed. Can be a single string or a list of strings.
|
215
|
-
**kwargs: Additional parameters to pass to the OpenAI API.
|
216
|
-
|
217
|
-
Returns:
|
218
|
-
A dictionary containing the embeddings from OpenAI.
|
219
|
-
|
220
|
-
Raises:
|
221
|
-
AuthenticationError: If the API key is invalid.
|
222
|
-
RequestError: If the request fails.
|
223
|
-
RateLimitError: If the rate limit is exceeded.
|
224
|
-
"""
|
225
|
-
try:
|
226
|
-
# Ensure text is a list
|
227
|
-
if isinstance(text, str):
|
228
|
-
text = [text]
|
229
|
-
|
230
|
-
# Use the embedding model
|
231
|
-
response = self.client.embeddings.create(
|
232
|
-
model=self.model_name, input=text, **kwargs
|
233
|
-
)
|
234
|
-
|
235
|
-
# Extract embeddings
|
236
|
-
embeddings = [item.embedding for item in response.data]
|
237
|
-
|
238
|
-
# Create a list of embedding objects with the expected structure
|
239
|
-
embedding_objects = []
|
240
|
-
for i, embedding in enumerate(embeddings):
|
241
|
-
embedding_objects.append(
|
242
|
-
{
|
243
|
-
"embedding": embedding,
|
244
|
-
"index": i,
|
245
|
-
"text": text[i] if i < len(text) else "",
|
246
|
-
}
|
247
|
-
)
|
248
|
-
|
249
|
-
# Extract usage information from the response
|
250
|
-
prompt_tokens = (
|
251
|
-
response.usage.prompt_tokens
|
252
|
-
if hasattr(response.usage, "prompt_tokens")
|
253
|
-
else 0
|
254
|
-
)
|
255
|
-
total_tokens = (
|
256
|
-
response.usage.total_tokens
|
257
|
-
if hasattr(response.usage, "total_tokens")
|
258
|
-
else 0
|
259
|
-
)
|
260
|
-
|
261
|
-
embedding_price_per_1k = get_model_info("openai", self.model_name).get(
|
262
|
-
"inputPricePer1KTokens"
|
263
|
-
)
|
264
|
-
|
265
|
-
# Calculate the cost
|
266
|
-
cost = (prompt_tokens / 1000) * embedding_price_per_1k
|
267
|
-
|
268
|
-
# Create usage information
|
269
|
-
usage = {
|
270
|
-
"tokens_prompt": prompt_tokens,
|
271
|
-
"tokens_completion": 0,
|
272
|
-
"tokens_total": total_tokens,
|
273
|
-
"cost": cost,
|
274
|
-
"latency": 0.0, # We don't have latency information from the API
|
275
|
-
"timestamp": datetime.now().isoformat(),
|
276
|
-
}
|
277
|
-
|
278
|
-
return {
|
279
|
-
"data": embedding_objects,
|
280
|
-
"model": self.model_name,
|
281
|
-
"provider": "openai",
|
282
|
-
"success": True,
|
283
|
-
"message": "Successfully generated embeddings",
|
284
|
-
"usage": usage,
|
285
|
-
"raw_response": response.model_dump(),
|
286
|
-
}
|
287
|
-
except openai.AuthenticationError:
|
288
|
-
raise AuthenticationError("Invalid OpenAI API key.")
|
289
|
-
except openai.RateLimitError:
|
290
|
-
raise RateLimitError("OpenAI rate limit exceeded.")
|
291
|
-
except Exception as e:
|
292
|
-
raise RequestError(f"OpenAI embedding request failed: {str(e)}")
|
293
|
-
|
294
|
-
def generate_image(self, prompt: str, **kwargs) -> Dict[str, Any]:
|
295
|
-
"""
|
296
|
-
Generate an image from a prompt using OpenAI.
|
297
|
-
|
298
|
-
Args:
|
299
|
-
prompt: The prompt to generate an image from.
|
300
|
-
**kwargs: Additional parameters to pass to the OpenAI API.
|
301
|
-
|
302
|
-
Returns:
|
303
|
-
A dictionary containing the image URL or data.
|
304
|
-
|
305
|
-
Raises:
|
306
|
-
AuthenticationError: If the API key is invalid.
|
307
|
-
RequestError: If the request fails.
|
308
|
-
RateLimitError: If the rate limit is exceeded.
|
309
|
-
"""
|
310
|
-
try:
|
311
|
-
# Use DALL-E model
|
312
|
-
model = kwargs.get("model", "dall-e-3")
|
313
|
-
size = kwargs.get("size", "1024x1024")
|
314
|
-
quality = kwargs.get("quality", "standard")
|
315
|
-
n = kwargs.get("n", 1)
|
316
|
-
|
317
|
-
response = self.client.images.generate(
|
318
|
-
model=model, prompt=prompt, size=size, quality=quality, n=n
|
319
|
-
)
|
320
|
-
|
321
|
-
# Extract image URLs
|
322
|
-
images = [item.url for item in response.data]
|
323
|
-
|
324
|
-
# For image generation, we don't have token usage, so we'll estimate cost
|
325
|
-
# based on the model and parameters
|
326
|
-
cost = calculate_cost(
|
327
|
-
f"openai/{model}", # e.g., "openai/dall-e-3"
|
328
|
-
input_tokens=n, # Number of images
|
329
|
-
output_tokens=0,
|
330
|
-
)
|
331
|
-
|
332
|
-
# Create usage information
|
333
|
-
usage = {
|
334
|
-
"tokens_prompt": 0, # We don't have token information for images
|
335
|
-
"tokens_completion": 0,
|
336
|
-
"tokens_total": 0,
|
337
|
-
"cost": cost,
|
338
|
-
"latency": 0.0,
|
339
|
-
"timestamp": datetime.now().isoformat(),
|
340
|
-
}
|
341
|
-
|
342
|
-
return {
|
343
|
-
"data": images,
|
344
|
-
"model": model,
|
345
|
-
"provider": "openai",
|
346
|
-
"success": True,
|
347
|
-
"message": "Successfully generated images",
|
348
|
-
"usage": usage,
|
349
|
-
"sizes": [size] * n,
|
350
|
-
"formats": ["url"] * n,
|
351
|
-
"raw_response": response.model_dump(),
|
352
|
-
}
|
353
|
-
|
354
|
-
except openai.AuthenticationError:
|
355
|
-
raise AuthenticationError("Invalid OpenAI API key.")
|
356
|
-
except openai.RateLimitError:
|
357
|
-
raise RateLimitError("OpenAI rate limit exceeded.")
|
358
|
-
except Exception as e:
|
359
|
-
raise RequestError(f"OpenAI image generation request failed: {str(e)}")
|
360
|
-
|
361
|
-
def get_token_count(self, text: str) -> int:
|
362
|
-
"""
|
363
|
-
Get the number of tokens in a text using OpenAI's tokenizer.
|
364
|
-
|
365
|
-
Args:
|
366
|
-
text: The text to count tokens for.
|
367
|
-
|
368
|
-
Returns:
|
369
|
-
The number of tokens in the text.
|
370
|
-
"""
|
371
|
-
try:
|
372
|
-
# Use tiktoken for token counting
|
373
|
-
import tiktoken
|
374
|
-
|
375
|
-
encoding = tiktoken.encoding_for_model(self.model_name)
|
376
|
-
return len(encoding.encode(text))
|
377
|
-
except ImportError:
|
378
|
-
# Fallback to a simple approximation if tiktoken is not available
|
379
|
-
return len(text.split()) * 1.3 # Rough approximation
|
380
|
-
|
381
|
-
def get_model_info(self) -> Dict[str, Any]:
|
382
|
-
"""
|
383
|
-
Get information about the model.
|
384
|
-
|
385
|
-
Returns:
|
386
|
-
A dictionary containing information about the model.
|
387
|
-
"""
|
388
|
-
return self.model_info
|
389
|
-
|
390
|
-
|
391
|
-
class StreamingGenerator:
|
392
|
-
"""
|
393
|
-
A generator class that yields chunks of text from a streaming response
|
394
|
-
and provides methods to get usage information at any point.
|
395
|
-
"""
|
396
|
-
|
397
|
-
def __init__(self, stream_response, model_name, messages):
|
398
|
-
"""
|
399
|
-
Initialize the streaming generator.
|
400
|
-
|
401
|
-
Args:
|
402
|
-
stream_response: The streaming response from the provider.
|
403
|
-
model_name: The name of the model being used.
|
404
|
-
messages: The messages sent to the provider.
|
405
|
-
"""
|
406
|
-
self.stream_response = stream_response
|
407
|
-
self.model_name = model_name
|
408
|
-
self.messages = messages
|
409
|
-
self.full_content = ""
|
410
|
-
self.finish_reason = None
|
411
|
-
self.is_finished = False
|
412
|
-
|
413
|
-
# Try to initialize tiktoken for token counting
|
414
|
-
try:
|
415
|
-
import tiktoken
|
416
|
-
|
417
|
-
self.encoding = tiktoken.encoding_for_model(model_name)
|
418
|
-
self.has_tiktoken = True
|
419
|
-
except (ImportError, Exception):
|
420
|
-
self.has_tiktoken = False
|
421
|
-
|
422
|
-
# Estimate prompt tokens
|
423
|
-
self.prompt_tokens = self._count_prompt_tokens()
|
424
|
-
|
425
|
-
def _count_prompt_tokens(self):
|
426
|
-
"""Count tokens in the prompt messages."""
|
427
|
-
if self.has_tiktoken:
|
428
|
-
# Use tiktoken for accurate token counting
|
429
|
-
prompt_text = " ".join(
|
430
|
-
[
|
431
|
-
msg.get("content", "") if isinstance(msg, dict) else msg.content
|
432
|
-
for msg in self.messages
|
433
|
-
]
|
434
|
-
)
|
435
|
-
return len(self.encoding.encode(prompt_text))
|
436
|
-
else:
|
437
|
-
# Fallback to character-based estimation
|
438
|
-
prompt_text = " ".join(
|
439
|
-
[
|
440
|
-
msg.get("content", "") if isinstance(msg, dict) else msg.content
|
441
|
-
for msg in self.messages
|
442
|
-
]
|
443
|
-
)
|
444
|
-
return len(prompt_text) // 4 # Rough estimate: 4 chars per token
|
445
|
-
|
446
|
-
def _count_completion_tokens(self):
|
447
|
-
"""Count tokens in the completion text."""
|
448
|
-
if self.has_tiktoken:
|
449
|
-
# Use tiktoken for accurate token counting
|
450
|
-
return len(self.encoding.encode(self.full_content))
|
451
|
-
else:
|
452
|
-
# Fallback to character-based estimation
|
453
|
-
return len(self.full_content) // 4 # Rough estimate: 4 chars per token
|
454
|
-
|
455
|
-
def get_usage_info(self):
|
456
|
-
"""
|
457
|
-
Get usage information based on the current state.
|
458
|
-
|
459
|
-
Returns:
|
460
|
-
A dictionary with usage information.
|
461
|
-
"""
|
462
|
-
completion_tokens = self._count_completion_tokens()
|
463
|
-
total_tokens = self.prompt_tokens + completion_tokens
|
464
|
-
|
465
|
-
# Calculate cost
|
466
|
-
cost = calculate_cost(
|
467
|
-
f"openai/{self.model_name}",
|
468
|
-
input_tokens=self.prompt_tokens,
|
469
|
-
output_tokens=completion_tokens,
|
470
|
-
)
|
471
|
-
|
472
|
-
return {
|
473
|
-
"usage": {
|
474
|
-
"tokens_prompt": self.prompt_tokens,
|
475
|
-
"tokens_completion": completion_tokens,
|
476
|
-
"tokens_total": total_tokens,
|
477
|
-
},
|
478
|
-
"cost": cost,
|
479
|
-
"model": self.model_name,
|
480
|
-
"provider": "openai",
|
481
|
-
"finish_reason": self.finish_reason,
|
482
|
-
"is_finished": self.is_finished,
|
483
|
-
}
|
484
|
-
|
485
|
-
def __iter__(self):
|
486
|
-
return self
|
487
|
-
|
488
|
-
def __next__(self):
|
489
|
-
"""Get the next chunk from the stream."""
|
490
|
-
if self.is_finished:
|
491
|
-
raise StopIteration
|
492
|
-
|
493
|
-
try:
|
494
|
-
chunk = next(self.stream_response)
|
495
|
-
|
496
|
-
if hasattr(chunk, "choices") and len(chunk.choices) > 0:
|
497
|
-
# Check for finish reason
|
498
|
-
if (
|
499
|
-
hasattr(chunk.choices[0], "finish_reason")
|
500
|
-
and chunk.choices[0].finish_reason
|
501
|
-
):
|
502
|
-
self.finish_reason = chunk.choices[0].finish_reason
|
503
|
-
|
504
|
-
# Get content delta
|
505
|
-
delta = chunk.choices[0].delta
|
506
|
-
if hasattr(delta, "content") and delta.content is not None:
|
507
|
-
content_chunk = delta.content
|
508
|
-
self.full_content += content_chunk
|
509
|
-
return content_chunk
|
510
|
-
|
511
|
-
# If we got a chunk with no content but with finish_reason, we're done
|
512
|
-
if self.finish_reason:
|
513
|
-
self.is_finished = True
|
514
|
-
raise StopIteration
|
515
|
-
|
516
|
-
# If we got here, try the next chunk
|
517
|
-
return next(self)
|
518
|
-
|
519
|
-
except StopIteration:
|
520
|
-
self.is_finished = True
|
521
|
-
raise
|
indoxRouter/providers/qwen.json
DELETED
@@ -1,110 +0,0 @@
|
|
1
|
-
[
|
2
|
-
{
|
3
|
-
"number": "1",
|
4
|
-
"modelName": "qwen-72b-chat",
|
5
|
-
"name": "Qwen 72B Chat",
|
6
|
-
"type": "Text Generation",
|
7
|
-
"inputPricePer1KTokens": 0.0006,
|
8
|
-
"outputPricePer1KTokens": 0.0018,
|
9
|
-
"description": "Alibaba's flagship large language model with 72B parameters, optimized for conversational applications.",
|
10
|
-
"contextWindows": "32k Tokens",
|
11
|
-
"recommended": true,
|
12
|
-
"commercial": true,
|
13
|
-
"pricey": false,
|
14
|
-
"output": "4096 Tokens",
|
15
|
-
"comments": "Strong multilingual capabilities, especially for Chinese.",
|
16
|
-
"companyModelName": "Alibaba : Qwen 72B Chat",
|
17
|
-
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>",
|
18
|
-
"systemPrompt": ""
|
19
|
-
},
|
20
|
-
{
|
21
|
-
"number": "2",
|
22
|
-
"modelName": "qwen-7b-chat",
|
23
|
-
"name": "Qwen 7B Chat",
|
24
|
-
"type": "Text Generation",
|
25
|
-
"inputPricePer1KTokens": 0.0002,
|
26
|
-
"outputPricePer1KTokens": 0.0006,
|
27
|
-
"description": "Compact version of Qwen with 7B parameters, offering good performance for general applications at lower cost.",
|
28
|
-
"contextWindows": "32k Tokens",
|
29
|
-
"recommended": true,
|
30
|
-
"commercial": true,
|
31
|
-
"pricey": false,
|
32
|
-
"output": "4096 Tokens",
|
33
|
-
"comments": "Cost-effective model with good multilingual support.",
|
34
|
-
"companyModelName": "Alibaba : Qwen 7B Chat",
|
35
|
-
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>",
|
36
|
-
"systemPrompt": ""
|
37
|
-
},
|
38
|
-
{
|
39
|
-
"number": "3",
|
40
|
-
"modelName": "qwen-max",
|
41
|
-
"name": "Qwen Max",
|
42
|
-
"type": "Text Generation",
|
43
|
-
"inputPricePer1KTokens": 0.0015,
|
44
|
-
"outputPricePer1KTokens": 0.0045,
|
45
|
-
"description": "Highest capability model from Qwen with advanced reasoning and superior instruction following across domains.",
|
46
|
-
"contextWindows": "64k Tokens",
|
47
|
-
"recommended": true,
|
48
|
-
"commercial": true,
|
49
|
-
"pricey": true,
|
50
|
-
"output": "8192 Tokens",
|
51
|
-
"comments": "Top-tier performance for enterprise applications.",
|
52
|
-
"companyModelName": "Alibaba : Qwen Max",
|
53
|
-
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>",
|
54
|
-
"systemPrompt": ""
|
55
|
-
},
|
56
|
-
{
|
57
|
-
"number": "4",
|
58
|
-
"modelName": "qwen-14b-chat",
|
59
|
-
"name": "Qwen 14B Chat",
|
60
|
-
"type": "Text Generation",
|
61
|
-
"inputPricePer1KTokens": 0.0004,
|
62
|
-
"outputPricePer1KTokens": 0.0012,
|
63
|
-
"description": "Mid-sized Qwen model with 14B parameters, offering a good balance between performance and efficiency.",
|
64
|
-
"contextWindows": "32k Tokens",
|
65
|
-
"recommended": true,
|
66
|
-
"commercial": true,
|
67
|
-
"pricey": false,
|
68
|
-
"output": "4096 Tokens",
|
69
|
-
"comments": "Well-balanced model for most applications.",
|
70
|
-
"companyModelName": "Alibaba : Qwen 14B Chat",
|
71
|
-
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>",
|
72
|
-
"systemPrompt": ""
|
73
|
-
},
|
74
|
-
{
|
75
|
-
"number": "5",
|
76
|
-
"modelName": "qwen2-72b-instruct",
|
77
|
-
"name": "Qwen2 72B Instruct",
|
78
|
-
"type": "Text Generation",
|
79
|
-
"inputPricePer1KTokens": 0.0008,
|
80
|
-
"outputPricePer1KTokens": 0.0024,
|
81
|
-
"description": "Second-generation Qwen model with 72B parameters, featuring improved reasoning and instruction following.",
|
82
|
-
"contextWindows": "128k Tokens",
|
83
|
-
"recommended": true,
|
84
|
-
"commercial": true,
|
85
|
-
"pricey": false,
|
86
|
-
"output": "8192 Tokens",
|
87
|
-
"comments": "Latest generation with enhanced capabilities.",
|
88
|
-
"companyModelName": "Alibaba : Qwen2 72B Instruct",
|
89
|
-
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>",
|
90
|
-
"systemPrompt": ""
|
91
|
-
},
|
92
|
-
{
|
93
|
-
"number": "6",
|
94
|
-
"modelName": "qwen-vl-plus",
|
95
|
-
"name": "Qwen VL Plus",
|
96
|
-
"type": "Text and Vision",
|
97
|
-
"inputPricePer1KTokens": 0.001,
|
98
|
-
"outputPricePer1KTokens": 0.003,
|
99
|
-
"description": "Multimodal model capable of understanding both images and text, with strong visual reasoning capabilities.",
|
100
|
-
"contextWindows": "32k Tokens",
|
101
|
-
"recommended": true,
|
102
|
-
"commercial": true,
|
103
|
-
"pricey": false,
|
104
|
-
"output": "4096 Tokens",
|
105
|
-
"comments": "Advanced vision-language capabilities with multilingual support.",
|
106
|
-
"companyModelName": "Alibaba : Qwen VL Plus",
|
107
|
-
"promptTemplate": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n%2<|im_end|>",
|
108
|
-
"systemPrompt": ""
|
109
|
-
}
|
110
|
-
]
|