indoxrouter 0.1.0__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- indoxrouter-0.1.3.dist-info/METADATA +188 -0
- indoxrouter-0.1.3.dist-info/RECORD +4 -0
- indoxrouter-0.1.3.dist-info/top_level.txt +1 -0
- indoxRouter/__init__.py +0 -0
- indoxRouter/api_endpoints.py +0 -336
- indoxRouter/client.py +0 -286
- indoxRouter/client_package.py +0 -138
- indoxRouter/init_db.py +0 -71
- indoxRouter/main.py +0 -711
- indoxRouter/migrations/__init__.py +0 -1
- indoxRouter/migrations/env.py +0 -98
- indoxRouter/migrations/versions/__init__.py +0 -1
- indoxRouter/migrations/versions/initial_schema.py +0 -84
- indoxRouter/providers/__init__.py +0 -108
- indoxRouter/providers/ai21.py +0 -268
- indoxRouter/providers/base_provider.py +0 -69
- indoxRouter/providers/claude.py +0 -177
- indoxRouter/providers/cohere.py +0 -171
- indoxRouter/providers/databricks.py +0 -166
- indoxRouter/providers/deepseek.py +0 -166
- indoxRouter/providers/google.py +0 -216
- indoxRouter/providers/llama.py +0 -164
- indoxRouter/providers/meta.py +0 -227
- indoxRouter/providers/mistral.py +0 -182
- indoxRouter/providers/nvidia.py +0 -164
- indoxRouter/providers/openai.py +0 -122
- indoxrouter-0.1.0.dist-info/METADATA +0 -179
- indoxrouter-0.1.0.dist-info/RECORD +0 -27
- indoxrouter-0.1.0.dist-info/top_level.txt +0 -1
- {indoxrouter-0.1.0.dist-info → indoxrouter-0.1.3.dist-info}/WHEEL +0 -0
indoxRouter/providers/claude.py
DELETED
@@ -1,177 +0,0 @@
|
|
1
|
-
from typing import Dict, Any, Optional, List
|
2
|
-
import json
|
3
|
-
import os
|
4
|
-
from pathlib import Path
|
5
|
-
|
6
|
-
try:
|
7
|
-
import anthropic
|
8
|
-
from anthropic import Anthropic
|
9
|
-
except ImportError:
|
10
|
-
raise ImportError(
|
11
|
-
"Anthropic package not installed. Install it with 'pip install anthropic'"
|
12
|
-
)
|
13
|
-
|
14
|
-
from ..utils.exceptions import RateLimitError, ModelNotFoundError
|
15
|
-
from .base_provider import BaseProvider
|
16
|
-
|
17
|
-
|
18
|
-
class Provider(BaseProvider):
|
19
|
-
"""
|
20
|
-
Anthropic (Claude) provider implementation
|
21
|
-
"""
|
22
|
-
|
23
|
-
def __init__(self, api_key: str, model_name: str):
|
24
|
-
"""
|
25
|
-
Initialize the Anthropic provider
|
26
|
-
|
27
|
-
Args:
|
28
|
-
api_key: Anthropic API key
|
29
|
-
model_name: Model name to use (e.g., 'claude-3-opus-20240229')
|
30
|
-
"""
|
31
|
-
super().__init__(api_key, model_name)
|
32
|
-
|
33
|
-
# Initialize Anthropic client
|
34
|
-
self.client = Anthropic(api_key=api_key)
|
35
|
-
|
36
|
-
# Load model configuration
|
37
|
-
self.model_config = self._load_model_config(model_name)
|
38
|
-
|
39
|
-
def _load_model_config(self, model_name: str) -> Dict[str, Any]:
|
40
|
-
"""
|
41
|
-
Load model configuration from JSON file
|
42
|
-
|
43
|
-
Args:
|
44
|
-
model_name: Model name
|
45
|
-
|
46
|
-
Returns:
|
47
|
-
Model configuration dictionary
|
48
|
-
"""
|
49
|
-
# Get the path to the model configuration file
|
50
|
-
config_path = Path(__file__).parent / "claude.json"
|
51
|
-
|
52
|
-
# Load the configuration
|
53
|
-
with open(config_path, "r") as f:
|
54
|
-
models = json.load(f)
|
55
|
-
|
56
|
-
# Find the model configuration
|
57
|
-
for model in models:
|
58
|
-
if model.get("modelName") == model_name:
|
59
|
-
return model
|
60
|
-
|
61
|
-
# If model not found, raise an error
|
62
|
-
raise ModelNotFoundError(
|
63
|
-
f"Model {model_name} not found in Anthropic configuration"
|
64
|
-
)
|
65
|
-
|
66
|
-
def estimate_cost(self, prompt: str, max_tokens: int) -> float:
|
67
|
-
"""
|
68
|
-
Estimate the cost of generating a completion
|
69
|
-
|
70
|
-
Args:
|
71
|
-
prompt: The prompt to generate a completion for
|
72
|
-
max_tokens: Maximum number of tokens to generate
|
73
|
-
|
74
|
-
Returns:
|
75
|
-
Estimated cost in credits
|
76
|
-
"""
|
77
|
-
# Estimate token count (rough approximation)
|
78
|
-
prompt_tokens = self.count_tokens(prompt)
|
79
|
-
|
80
|
-
# Get pricing for the model
|
81
|
-
input_price = self.model_config.get("inputPricePer1KTokens", 0)
|
82
|
-
output_price = self.model_config.get("outputPricePer1KTokens", 0)
|
83
|
-
|
84
|
-
# Calculate cost
|
85
|
-
prompt_cost = (prompt_tokens / 1000) * input_price
|
86
|
-
completion_cost = (max_tokens / 1000) * output_price
|
87
|
-
|
88
|
-
return prompt_cost + completion_cost
|
89
|
-
|
90
|
-
def count_tokens(self, text: str) -> int:
|
91
|
-
"""
|
92
|
-
Count the number of tokens in a text
|
93
|
-
|
94
|
-
Args:
|
95
|
-
text: Text to count tokens for
|
96
|
-
|
97
|
-
Returns:
|
98
|
-
Number of tokens
|
99
|
-
"""
|
100
|
-
try:
|
101
|
-
# Use Anthropic's token counter if available
|
102
|
-
return anthropic.count_tokens(text)
|
103
|
-
except:
|
104
|
-
# Fallback to simple approximation
|
105
|
-
return len(text.split()) * 1.3
|
106
|
-
|
107
|
-
def generate(self, prompt: str, **kwargs) -> Dict[str, Any]:
|
108
|
-
"""
|
109
|
-
Generate a completion using Anthropic
|
110
|
-
|
111
|
-
Args:
|
112
|
-
prompt: The prompt to generate a completion for
|
113
|
-
**kwargs: Additional parameters for the generation
|
114
|
-
- max_tokens: Maximum number of tokens to generate
|
115
|
-
- temperature: Sampling temperature (0.0 to 1.0)
|
116
|
-
- top_p: Nucleus sampling parameter (0.0 to 1.0)
|
117
|
-
|
118
|
-
Returns:
|
119
|
-
Dictionary containing the response text, cost, and other metadata
|
120
|
-
"""
|
121
|
-
try:
|
122
|
-
# Extract parameters
|
123
|
-
max_tokens = kwargs.get("max_tokens", 1024)
|
124
|
-
temperature = kwargs.get("temperature", 0.7)
|
125
|
-
top_p = kwargs.get("top_p", 1.0)
|
126
|
-
|
127
|
-
# Format prompt using the template from model config
|
128
|
-
prompt_template = self.model_config.get(
|
129
|
-
"promptTemplate", "Human: %1\n\nAssistant: %2"
|
130
|
-
)
|
131
|
-
formatted_prompt = prompt_template.replace("%1", prompt).replace("%2", "")
|
132
|
-
|
133
|
-
# Get system prompt if available
|
134
|
-
system_prompt = self.model_config.get("systemPrompt", "")
|
135
|
-
|
136
|
-
# Make API call
|
137
|
-
response = self.client.messages.create(
|
138
|
-
model=self.model_name,
|
139
|
-
messages=[{"role": "user", "content": prompt}],
|
140
|
-
system=system_prompt if system_prompt else None,
|
141
|
-
max_tokens=max_tokens,
|
142
|
-
temperature=temperature,
|
143
|
-
top_p=top_p,
|
144
|
-
)
|
145
|
-
|
146
|
-
# Extract response text
|
147
|
-
text = response.content[0].text
|
148
|
-
|
149
|
-
# Calculate actual cost
|
150
|
-
input_price = self.model_config.get("inputPricePer1KTokens", 0)
|
151
|
-
output_price = self.model_config.get("outputPricePer1KTokens", 0)
|
152
|
-
|
153
|
-
prompt_tokens = response.usage.input_tokens
|
154
|
-
completion_tokens = response.usage.output_tokens
|
155
|
-
|
156
|
-
prompt_cost = (prompt_tokens / 1000) * input_price
|
157
|
-
completion_cost = (completion_tokens / 1000) * output_price
|
158
|
-
total_cost = prompt_cost + completion_cost
|
159
|
-
|
160
|
-
# Return standardized response
|
161
|
-
return {
|
162
|
-
"text": text,
|
163
|
-
"cost": total_cost,
|
164
|
-
"usage": {
|
165
|
-
"prompt_tokens": prompt_tokens,
|
166
|
-
"completion_tokens": completion_tokens,
|
167
|
-
"total_tokens": prompt_tokens + completion_tokens,
|
168
|
-
},
|
169
|
-
"model": self.model_name,
|
170
|
-
}
|
171
|
-
|
172
|
-
except anthropic.RateLimitError as e:
|
173
|
-
raise RateLimitError(f"Anthropic rate limit exceeded: {str(e)}")
|
174
|
-
except anthropic.APIError as e:
|
175
|
-
raise Exception(f"Anthropic API error: {str(e)}")
|
176
|
-
except Exception as e:
|
177
|
-
raise Exception(f"Error generating completion: {str(e)}")
|
indoxRouter/providers/cohere.py
DELETED
@@ -1,171 +0,0 @@
|
|
1
|
-
from typing import Dict, Any, Optional, List
|
2
|
-
import json
|
3
|
-
import os
|
4
|
-
from pathlib import Path
|
5
|
-
|
6
|
-
try:
|
7
|
-
import cohere
|
8
|
-
from cohere import ClientV2 as CohereClient
|
9
|
-
except ImportError:
|
10
|
-
raise ImportError(
|
11
|
-
"Cohere package not installed. Install it with 'pip install cohere'"
|
12
|
-
)
|
13
|
-
|
14
|
-
from ..utils.exceptions import RateLimitError, ModelNotFoundError
|
15
|
-
from .base_provider import BaseProvider
|
16
|
-
|
17
|
-
|
18
|
-
class Provider(BaseProvider):
|
19
|
-
"""
|
20
|
-
Cohere provider implementation
|
21
|
-
"""
|
22
|
-
|
23
|
-
def __init__(self, api_key: str, model_name: str):
|
24
|
-
"""
|
25
|
-
Initialize the Cohere provider
|
26
|
-
|
27
|
-
Args:
|
28
|
-
api_key: Cohere API key
|
29
|
-
model_name: Model name to use (e.g., 'command-r-plus')
|
30
|
-
"""
|
31
|
-
super().__init__(api_key, model_name)
|
32
|
-
|
33
|
-
# Initialize Cohere client
|
34
|
-
self.client = CohereClient(api_key=api_key)
|
35
|
-
|
36
|
-
# Load model configuration
|
37
|
-
self.model_config = self._load_model_config(model_name)
|
38
|
-
|
39
|
-
def _load_model_config(self, model_name: str) -> Dict[str, Any]:
|
40
|
-
"""
|
41
|
-
Load model configuration from JSON file
|
42
|
-
|
43
|
-
Args:
|
44
|
-
model_name: Model name
|
45
|
-
|
46
|
-
Returns:
|
47
|
-
Model configuration dictionary
|
48
|
-
"""
|
49
|
-
# Get the path to the model configuration file
|
50
|
-
config_path = Path(__file__).parent / "cohere.json"
|
51
|
-
|
52
|
-
# Load the configuration
|
53
|
-
with open(config_path, "r") as f:
|
54
|
-
models = json.load(f)
|
55
|
-
|
56
|
-
# Find the model configuration
|
57
|
-
for model in models:
|
58
|
-
if model.get("modelName") == model_name:
|
59
|
-
return model
|
60
|
-
|
61
|
-
# If model not found, raise an error
|
62
|
-
raise ModelNotFoundError(
|
63
|
-
f"Model {model_name} not found in Cohere configuration"
|
64
|
-
)
|
65
|
-
|
66
|
-
def estimate_cost(self, prompt: str, max_tokens: int) -> float:
|
67
|
-
"""
|
68
|
-
Estimate the cost of generating a completion
|
69
|
-
|
70
|
-
Args:
|
71
|
-
prompt: The prompt to generate a completion for
|
72
|
-
max_tokens: Maximum number of tokens to generate
|
73
|
-
|
74
|
-
Returns:
|
75
|
-
Estimated cost in credits
|
76
|
-
"""
|
77
|
-
# Estimate token count (rough approximation)
|
78
|
-
prompt_tokens = self.count_tokens(prompt)
|
79
|
-
|
80
|
-
# Get pricing for the model
|
81
|
-
input_price = self.model_config.get("inputPricePer1KTokens", 0)
|
82
|
-
output_price = self.model_config.get("outputPricePer1KTokens", 0)
|
83
|
-
|
84
|
-
# Calculate cost
|
85
|
-
prompt_cost = (prompt_tokens / 1000) * input_price
|
86
|
-
completion_cost = (max_tokens / 1000) * output_price
|
87
|
-
|
88
|
-
return prompt_cost + completion_cost
|
89
|
-
|
90
|
-
def count_tokens(self, text: str) -> int:
|
91
|
-
"""
|
92
|
-
Count the number of tokens in a text
|
93
|
-
|
94
|
-
Args:
|
95
|
-
text: Text to count tokens for
|
96
|
-
|
97
|
-
Returns:
|
98
|
-
Number of tokens
|
99
|
-
"""
|
100
|
-
# Simple approximation - in production, use a proper tokenizer
|
101
|
-
return len(text.split()) * 1.3
|
102
|
-
|
103
|
-
def generate(self, prompt: str, **kwargs) -> Dict[str, Any]:
|
104
|
-
"""
|
105
|
-
Generate a completion using Cohere
|
106
|
-
|
107
|
-
Args:
|
108
|
-
prompt: The prompt to generate a completion for
|
109
|
-
**kwargs: Additional parameters for the generation
|
110
|
-
- max_tokens: Maximum number of tokens to generate
|
111
|
-
- temperature: Sampling temperature (0.0 to 2.0)
|
112
|
-
- top_p: Nucleus sampling parameter (0.0 to 1.0)
|
113
|
-
- system_prompt: System prompt to use
|
114
|
-
|
115
|
-
Returns:
|
116
|
-
Dictionary containing the response text, cost, and other metadata
|
117
|
-
"""
|
118
|
-
try:
|
119
|
-
# Extract parameters
|
120
|
-
max_tokens = kwargs.get("max_tokens", 1024)
|
121
|
-
temperature = kwargs.get("temperature", 0.7)
|
122
|
-
top_p = kwargs.get("top_p", 1.0)
|
123
|
-
system_prompt = kwargs.get(
|
124
|
-
"system_prompt", self.model_config.get("systemPrompt", "")
|
125
|
-
)
|
126
|
-
|
127
|
-
# Prepare messages
|
128
|
-
messages = [{"role": "user", "content": prompt}]
|
129
|
-
|
130
|
-
# Make API call using the new format
|
131
|
-
response = self.client.chat(
|
132
|
-
model=self.model_name,
|
133
|
-
messages=messages,
|
134
|
-
max_tokens=max_tokens,
|
135
|
-
temperature=temperature,
|
136
|
-
p=top_p,
|
137
|
-
system=system_prompt if system_prompt else None,
|
138
|
-
)
|
139
|
-
|
140
|
-
# Extract response text
|
141
|
-
text = response.text
|
142
|
-
|
143
|
-
# Calculate actual cost
|
144
|
-
input_price = self.model_config.get("inputPricePer1KTokens", 0)
|
145
|
-
output_price = self.model_config.get("outputPricePer1KTokens", 0)
|
146
|
-
|
147
|
-
prompt_tokens = response.meta.billed_units.input_tokens
|
148
|
-
completion_tokens = response.meta.billed_units.output_tokens
|
149
|
-
|
150
|
-
prompt_cost = (prompt_tokens / 1000) * input_price
|
151
|
-
completion_cost = (completion_tokens / 1000) * output_price
|
152
|
-
total_cost = prompt_cost + completion_cost
|
153
|
-
|
154
|
-
# Return standardized response
|
155
|
-
return {
|
156
|
-
"text": text,
|
157
|
-
"cost": total_cost,
|
158
|
-
"usage": {
|
159
|
-
"prompt_tokens": prompt_tokens,
|
160
|
-
"completion_tokens": completion_tokens,
|
161
|
-
"total_tokens": prompt_tokens + completion_tokens,
|
162
|
-
},
|
163
|
-
"model": self.model_name,
|
164
|
-
}
|
165
|
-
|
166
|
-
except cohere.error.CohereAPIError as e:
|
167
|
-
if "rate limit" in str(e).lower():
|
168
|
-
raise RateLimitError(f"Cohere rate limit exceeded: {str(e)}")
|
169
|
-
raise Exception(f"Cohere API error: {str(e)}")
|
170
|
-
except Exception as e:
|
171
|
-
raise Exception(f"Error generating completion: {str(e)}")
|
@@ -1,166 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import json
|
3
|
-
import requests
|
4
|
-
from typing import Dict, Any, Optional, List
|
5
|
-
from .base_provider import BaseProvider
|
6
|
-
|
7
|
-
|
8
|
-
class Provider(BaseProvider):
|
9
|
-
def __init__(self, api_key: str, model_name: str):
|
10
|
-
"""
|
11
|
-
Initialize the Databricks provider with API key and model name.
|
12
|
-
|
13
|
-
Args:
|
14
|
-
api_key (str): The API key for authentication
|
15
|
-
model_name (str): The name of the model to use
|
16
|
-
"""
|
17
|
-
super().__init__(api_key, model_name)
|
18
|
-
self.base_url = os.environ.get(
|
19
|
-
"DATABRICKS_API_BASE", "https://api.databricks.com/v1"
|
20
|
-
)
|
21
|
-
self.headers = {
|
22
|
-
"Authorization": f"Bearer {api_key}",
|
23
|
-
"Content-Type": "application/json",
|
24
|
-
}
|
25
|
-
self.model_config = self._load_model_config()
|
26
|
-
|
27
|
-
def _load_model_config(self) -> Dict[str, Any]:
|
28
|
-
"""
|
29
|
-
Load the model configuration from the JSON file.
|
30
|
-
|
31
|
-
Returns:
|
32
|
-
Dict[str, Any]: The model configuration
|
33
|
-
"""
|
34
|
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
35
|
-
config_path = os.path.join(current_dir, "databricks.json")
|
36
|
-
|
37
|
-
with open(config_path, "r") as f:
|
38
|
-
models = json.load(f)
|
39
|
-
|
40
|
-
for model in models:
|
41
|
-
if model["modelName"] == self.model_name:
|
42
|
-
return model
|
43
|
-
|
44
|
-
raise ValueError(f"Model {self.model_name} not found in configuration")
|
45
|
-
|
46
|
-
def estimate_cost(self, prompt: str, max_tokens: int = 100) -> float:
|
47
|
-
"""
|
48
|
-
Estimate the cost of generating a completion.
|
49
|
-
|
50
|
-
Args:
|
51
|
-
prompt (str): The prompt to generate a completion for
|
52
|
-
max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 100.
|
53
|
-
|
54
|
-
Returns:
|
55
|
-
float: The estimated cost in USD
|
56
|
-
"""
|
57
|
-
input_tokens = self.count_tokens(prompt)
|
58
|
-
input_cost = (input_tokens / 1000) * self.model_config["inputPricePer1KTokens"]
|
59
|
-
output_cost = (max_tokens / 1000) * self.model_config["outputPricePer1KTokens"]
|
60
|
-
return input_cost + output_cost
|
61
|
-
|
62
|
-
def count_tokens(self, text: str) -> int:
|
63
|
-
"""
|
64
|
-
Count the number of tokens in a text.
|
65
|
-
This is a simple approximation. For more accurate counts, consider using a tokenizer.
|
66
|
-
|
67
|
-
Args:
|
68
|
-
text (str): The text to count tokens for
|
69
|
-
|
70
|
-
Returns:
|
71
|
-
int: The number of tokens
|
72
|
-
"""
|
73
|
-
# Simple approximation: 1 token ≈ 4 characters
|
74
|
-
return len(text) // 4
|
75
|
-
|
76
|
-
def generate(
|
77
|
-
self,
|
78
|
-
prompt: str,
|
79
|
-
max_tokens: int = 100,
|
80
|
-
temperature: float = 0.7,
|
81
|
-
top_p: float = 1.0,
|
82
|
-
frequency_penalty: float = 0.0,
|
83
|
-
presence_penalty: float = 0.0,
|
84
|
-
stop: Optional[List[str]] = None,
|
85
|
-
) -> Dict[str, Any]:
|
86
|
-
"""
|
87
|
-
Generate a completion for the given prompt.
|
88
|
-
|
89
|
-
Args:
|
90
|
-
prompt (str): The prompt to generate a completion for
|
91
|
-
max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 100.
|
92
|
-
temperature (float, optional): The temperature for sampling. Defaults to 0.7.
|
93
|
-
top_p (float, optional): The top-p value for nucleus sampling. Defaults to 1.0.
|
94
|
-
frequency_penalty (float, optional): The frequency penalty. Defaults to 0.0.
|
95
|
-
presence_penalty (float, optional): The presence penalty. Defaults to 0.0.
|
96
|
-
stop (Optional[List[str]], optional): A list of stop sequences. Defaults to None.
|
97
|
-
|
98
|
-
Returns:
|
99
|
-
Dict[str, Any]: The generated completion
|
100
|
-
"""
|
101
|
-
# Format the prompt according to the model's template
|
102
|
-
prompt_template = self.model_config.get("promptTemplate", "%1%2")
|
103
|
-
formatted_prompt = prompt_template.replace("%1", prompt).replace("%2", "")
|
104
|
-
|
105
|
-
# Prepare the request payload
|
106
|
-
payload = {
|
107
|
-
"model": self.model_config.get("companyModelName", self.model_name),
|
108
|
-
"prompt": formatted_prompt,
|
109
|
-
"max_tokens": max_tokens,
|
110
|
-
"temperature": temperature,
|
111
|
-
"top_p": top_p,
|
112
|
-
"frequency_penalty": frequency_penalty,
|
113
|
-
"presence_penalty": presence_penalty,
|
114
|
-
}
|
115
|
-
|
116
|
-
if stop:
|
117
|
-
payload["stop"] = stop
|
118
|
-
|
119
|
-
# Make the API request
|
120
|
-
try:
|
121
|
-
response = requests.post(
|
122
|
-
f"{self.base_url}/completions", headers=self.headers, json=payload
|
123
|
-
)
|
124
|
-
response.raise_for_status()
|
125
|
-
result = response.json()
|
126
|
-
|
127
|
-
# Calculate the cost
|
128
|
-
input_tokens = result.get("usage", {}).get(
|
129
|
-
"prompt_tokens", self.count_tokens(prompt)
|
130
|
-
)
|
131
|
-
output_tokens = result.get("usage", {}).get("completion_tokens", 0)
|
132
|
-
input_cost = (input_tokens / 1000) * self.model_config[
|
133
|
-
"inputPricePer1KTokens"
|
134
|
-
]
|
135
|
-
output_cost = (output_tokens / 1000) * self.model_config[
|
136
|
-
"outputPricePer1KTokens"
|
137
|
-
]
|
138
|
-
total_cost = input_cost + output_cost
|
139
|
-
|
140
|
-
# Format the response
|
141
|
-
return self.validate_response(
|
142
|
-
{
|
143
|
-
"text": result.get("choices", [{}])[0].get("text", ""),
|
144
|
-
"cost": total_cost,
|
145
|
-
"usage": {
|
146
|
-
"input_tokens": input_tokens,
|
147
|
-
"output_tokens": output_tokens,
|
148
|
-
"input_cost": input_cost,
|
149
|
-
"output_cost": output_cost,
|
150
|
-
},
|
151
|
-
"raw_response": result,
|
152
|
-
}
|
153
|
-
)
|
154
|
-
|
155
|
-
except requests.exceptions.RequestException as e:
|
156
|
-
return {
|
157
|
-
"text": f"Error: {str(e)}",
|
158
|
-
"cost": 0,
|
159
|
-
"usage": {
|
160
|
-
"input_tokens": 0,
|
161
|
-
"output_tokens": 0,
|
162
|
-
"input_cost": 0,
|
163
|
-
"output_cost": 0,
|
164
|
-
},
|
165
|
-
"error": str(e),
|
166
|
-
}
|
@@ -1,166 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import json
|
3
|
-
import requests
|
4
|
-
from typing import Dict, Any, Optional, List
|
5
|
-
from .base_provider import BaseProvider
|
6
|
-
|
7
|
-
|
8
|
-
class Provider(BaseProvider):
|
9
|
-
def __init__(self, api_key: str, model_name: str):
|
10
|
-
"""
|
11
|
-
Initialize the Deepseek provider with API key and model name.
|
12
|
-
|
13
|
-
Args:
|
14
|
-
api_key (str): The API key for authentication
|
15
|
-
model_name (str): The name of the model to use
|
16
|
-
"""
|
17
|
-
super().__init__(api_key, model_name)
|
18
|
-
self.base_url = os.environ.get(
|
19
|
-
"DEEPSEEK_API_BASE", "https://api.deepseek.com/v1"
|
20
|
-
)
|
21
|
-
self.headers = {
|
22
|
-
"Authorization": f"Bearer {api_key}",
|
23
|
-
"Content-Type": "application/json",
|
24
|
-
}
|
25
|
-
self.model_config = self._load_model_config()
|
26
|
-
|
27
|
-
def _load_model_config(self) -> Dict[str, Any]:
|
28
|
-
"""
|
29
|
-
Load the model configuration from the JSON file.
|
30
|
-
|
31
|
-
Returns:
|
32
|
-
Dict[str, Any]: The model configuration
|
33
|
-
"""
|
34
|
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
35
|
-
config_path = os.path.join(current_dir, "deepseek.json")
|
36
|
-
|
37
|
-
with open(config_path, "r") as f:
|
38
|
-
models = json.load(f)
|
39
|
-
|
40
|
-
for model in models:
|
41
|
-
if model["modelName"] == self.model_name:
|
42
|
-
return model
|
43
|
-
|
44
|
-
raise ValueError(f"Model {self.model_name} not found in configuration")
|
45
|
-
|
46
|
-
def estimate_cost(self, prompt: str, max_tokens: int = 100) -> float:
|
47
|
-
"""
|
48
|
-
Estimate the cost of generating a completion.
|
49
|
-
|
50
|
-
Args:
|
51
|
-
prompt (str): The prompt to generate a completion for
|
52
|
-
max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 100.
|
53
|
-
|
54
|
-
Returns:
|
55
|
-
float: The estimated cost in USD
|
56
|
-
"""
|
57
|
-
input_tokens = self.count_tokens(prompt)
|
58
|
-
input_cost = (input_tokens / 1000) * self.model_config["inputPricePer1KTokens"]
|
59
|
-
output_cost = (max_tokens / 1000) * self.model_config["outputPricePer1KTokens"]
|
60
|
-
return input_cost + output_cost
|
61
|
-
|
62
|
-
def count_tokens(self, text: str) -> int:
|
63
|
-
"""
|
64
|
-
Count the number of tokens in a text.
|
65
|
-
This is a simple approximation. For more accurate counts, consider using a tokenizer.
|
66
|
-
|
67
|
-
Args:
|
68
|
-
text (str): The text to count tokens for
|
69
|
-
|
70
|
-
Returns:
|
71
|
-
int: The number of tokens
|
72
|
-
"""
|
73
|
-
# Simple approximation: 1 token ≈ 4 characters
|
74
|
-
return len(text) // 4
|
75
|
-
|
76
|
-
def generate(
|
77
|
-
self,
|
78
|
-
prompt: str,
|
79
|
-
max_tokens: int = 100,
|
80
|
-
temperature: float = 0.7,
|
81
|
-
top_p: float = 1.0,
|
82
|
-
frequency_penalty: float = 0.0,
|
83
|
-
presence_penalty: float = 0.0,
|
84
|
-
stop: Optional[List[str]] = None,
|
85
|
-
) -> Dict[str, Any]:
|
86
|
-
"""
|
87
|
-
Generate a completion for the given prompt.
|
88
|
-
|
89
|
-
Args:
|
90
|
-
prompt (str): The prompt to generate a completion for
|
91
|
-
max_tokens (int, optional): The maximum number of tokens to generate. Defaults to 100.
|
92
|
-
temperature (float, optional): The temperature for sampling. Defaults to 0.7.
|
93
|
-
top_p (float, optional): The top-p value for nucleus sampling. Defaults to 1.0.
|
94
|
-
frequency_penalty (float, optional): The frequency penalty. Defaults to 0.0.
|
95
|
-
presence_penalty (float, optional): The presence penalty. Defaults to 0.0.
|
96
|
-
stop (Optional[List[str]], optional): A list of stop sequences. Defaults to None.
|
97
|
-
|
98
|
-
Returns:
|
99
|
-
Dict[str, Any]: The generated completion
|
100
|
-
"""
|
101
|
-
# Format the prompt according to the model's template
|
102
|
-
prompt_template = self.model_config.get("promptTemplate", "%1%2")
|
103
|
-
formatted_prompt = prompt_template.replace("%1", prompt).replace("%2", "")
|
104
|
-
|
105
|
-
# Prepare the request payload
|
106
|
-
payload = {
|
107
|
-
"model": self.model_config.get("companyModelName", self.model_name),
|
108
|
-
"prompt": formatted_prompt,
|
109
|
-
"max_tokens": max_tokens,
|
110
|
-
"temperature": temperature,
|
111
|
-
"top_p": top_p,
|
112
|
-
"frequency_penalty": frequency_penalty,
|
113
|
-
"presence_penalty": presence_penalty,
|
114
|
-
}
|
115
|
-
|
116
|
-
if stop:
|
117
|
-
payload["stop"] = stop
|
118
|
-
|
119
|
-
# Make the API request
|
120
|
-
try:
|
121
|
-
response = requests.post(
|
122
|
-
f"{self.base_url}/completions", headers=self.headers, json=payload
|
123
|
-
)
|
124
|
-
response.raise_for_status()
|
125
|
-
result = response.json()
|
126
|
-
|
127
|
-
# Calculate the cost
|
128
|
-
input_tokens = result.get("usage", {}).get(
|
129
|
-
"prompt_tokens", self.count_tokens(prompt)
|
130
|
-
)
|
131
|
-
output_tokens = result.get("usage", {}).get("completion_tokens", 0)
|
132
|
-
input_cost = (input_tokens / 1000) * self.model_config[
|
133
|
-
"inputPricePer1KTokens"
|
134
|
-
]
|
135
|
-
output_cost = (output_tokens / 1000) * self.model_config[
|
136
|
-
"outputPricePer1KTokens"
|
137
|
-
]
|
138
|
-
total_cost = input_cost + output_cost
|
139
|
-
|
140
|
-
# Format the response
|
141
|
-
return self.validate_response(
|
142
|
-
{
|
143
|
-
"text": result.get("choices", [{}])[0].get("text", ""),
|
144
|
-
"cost": total_cost,
|
145
|
-
"usage": {
|
146
|
-
"input_tokens": input_tokens,
|
147
|
-
"output_tokens": output_tokens,
|
148
|
-
"input_cost": input_cost,
|
149
|
-
"output_cost": output_cost,
|
150
|
-
},
|
151
|
-
"raw_response": result,
|
152
|
-
}
|
153
|
-
)
|
154
|
-
|
155
|
-
except requests.exceptions.RequestException as e:
|
156
|
-
return {
|
157
|
-
"text": f"Error: {str(e)}",
|
158
|
-
"cost": 0,
|
159
|
-
"usage": {
|
160
|
-
"input_tokens": 0,
|
161
|
-
"output_tokens": 0,
|
162
|
-
"input_cost": 0,
|
163
|
-
"output_cost": 0,
|
164
|
-
},
|
165
|
-
"error": str(e),
|
166
|
-
}
|