polyrouter 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- polyrouter/Exceptions.py +49 -0
- polyrouter/LLMClients.py +565 -0
- polyrouter/LLMOrchestrator.py +83 -0
- polyrouter/__init__.py +41 -0
- polyrouter-1.0.0.dist-info/METADATA +304 -0
- polyrouter-1.0.0.dist-info/RECORD +8 -0
- polyrouter-1.0.0.dist-info/WHEEL +5 -0
- polyrouter-1.0.0.dist-info/top_level.txt +1 -0
polyrouter/Exceptions.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# this file will be responsible for defination of all the user defined Exceptions
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
logger = logging.getLogger(__name__)
|
|
5
|
+
|
|
6
|
+
class LLMError(Exception):
|
|
7
|
+
def __init__(self, message):
|
|
8
|
+
logger.error(message)
|
|
9
|
+
super().__init__(message)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AllModelsFailedError(LLMError):
|
|
13
|
+
def __init__(self, message):
|
|
14
|
+
super().__init__(message)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ModelRateLimit(LLMError):
|
|
18
|
+
def __init__(self, message):
|
|
19
|
+
super().__init__(message)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AllClientsExhaustedError(LLMError):
|
|
23
|
+
def __init__(self, message):
|
|
24
|
+
super().__init__(message)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class InvalidAPIKey(LLMError):
|
|
28
|
+
def __init__(self, message):
|
|
29
|
+
super().__init__(message)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class InvalidJSONResponseError(LLMError):
|
|
33
|
+
def __init__(self, message):
|
|
34
|
+
super().__init__(message)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class NoAPIKeysError(LLMError):
|
|
38
|
+
def __init__(self,message):
|
|
39
|
+
super().__init__(message)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class NoModelMentioned(LLMError):
|
|
43
|
+
def __init__(self,message):
|
|
44
|
+
super().__init__(message)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class UnknownError(LLMError):
|
|
48
|
+
def __init__(self,message):
|
|
49
|
+
super().__init__(message)
|
polyrouter/LLMClients.py
ADDED
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
# File act as a interface between user defined universal call functions and llm clients specific invoke functions
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# ALL the clients to which i can make calls
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
import os
|
|
8
|
+
import json
|
|
9
|
+
import threading
|
|
10
|
+
import logging
|
|
11
|
+
from groq import Groq
|
|
12
|
+
from google import genai
|
|
13
|
+
from google.genai import types
|
|
14
|
+
from .Exceptions import (
|
|
15
|
+
NoAPIKeysError, NoModelMentioned, UnknownError
|
|
16
|
+
)
|
|
17
|
+
from cerebras.cloud.sdk import Cerebras
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
class LLM(ABC):
|
|
22
|
+
"""
|
|
23
|
+
Each LLM subclass manages its own keys and models.
|
|
24
|
+
call() returns None when fully exhausted (all keys x all models tried).
|
|
25
|
+
InitLLM uses this signal to switch to the next client.
|
|
26
|
+
"""
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def call(self, user_input, json_mode=False):
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# If a particulat api key model crashes it should work for and it works for other api key, then dont crash the whole system , just switch api key
|
|
33
|
+
# always take the first model whose TPM is available (otherwise client building fails -> assume wrong client and crash)
|
|
34
|
+
# or hardcode model checking in try block e.g. model=GROQ_MODEL[0] -> model="openai/gpt-oss-120b"
|
|
35
|
+
|
|
36
|
+
# ------------------------------------------- GROQ -------------------------------------------
|
|
37
|
+
|
|
38
|
+
class GroqLLM(LLM):
|
|
39
|
+
def __init__(self, GROQ_MODEL, GROQ_KEY, prompt="You are a helpful assistant", temperature=0.5, max_output_tokens=500,DEBUG=0,IN_DEPTH_DEBUG=0, test_mode=0):
|
|
40
|
+
if len(GROQ_MODEL) == 0:
|
|
41
|
+
raise NoModelMentioned("GROQ : No models listed in GROQ_MODELS.")
|
|
42
|
+
if len(GROQ_KEY) == 0:
|
|
43
|
+
raise NoAPIKeysError("GROQ : No api key listed in GROQ_KEYS.")
|
|
44
|
+
|
|
45
|
+
self.DEBUG = DEBUG
|
|
46
|
+
self.IN_DEPTH_DEBUG = IN_DEPTH_DEBUG
|
|
47
|
+
self.prompt = prompt
|
|
48
|
+
self.temperature = temperature
|
|
49
|
+
self.max_output_tokens = max_output_tokens
|
|
50
|
+
self._lock = threading.Lock()
|
|
51
|
+
self.clients = []
|
|
52
|
+
|
|
53
|
+
# storing all client so that we dont get any issue once clients are build
|
|
54
|
+
for api_key in GROQ_KEY:
|
|
55
|
+
if api_key and api_key.startswith("gsk"):
|
|
56
|
+
|
|
57
|
+
# check if key is really valid or not -> pipeline doenst break inbetween
|
|
58
|
+
try:
|
|
59
|
+
client = Groq(api_key=api_key)
|
|
60
|
+
response = client.chat.completions.create(
|
|
61
|
+
model=GROQ_MODEL[0],
|
|
62
|
+
messages=[
|
|
63
|
+
{"role": "user", "content": "Reply: ok"}
|
|
64
|
+
],
|
|
65
|
+
max_completion_tokens=100
|
|
66
|
+
)
|
|
67
|
+
self.clients.append(client)
|
|
68
|
+
if self.IN_DEPTH_DEBUG:
|
|
69
|
+
logger.debug("GROQ : Working Key ....%s", api_key[10:])
|
|
70
|
+
logger.debug("Output: %s", response.choices[0].message.content)
|
|
71
|
+
|
|
72
|
+
# # What is the current api fails for 1/2 model and works on other -> just give a warning to user
|
|
73
|
+
if(test_mode):
|
|
74
|
+
for model in GROQ_MODEL:
|
|
75
|
+
try:
|
|
76
|
+
response = client.chat.completions.create(
|
|
77
|
+
model=model,
|
|
78
|
+
messages=[
|
|
79
|
+
{"role": "user", "content": "Reply: ok"}
|
|
80
|
+
],
|
|
81
|
+
max_completion_tokens=100
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
if self.IN_DEPTH_DEBUG:
|
|
85
|
+
logger.debug("GROQ : Working Model %s", model)
|
|
86
|
+
logger.debug("Output: %s", response.choices[0].message.content)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.error("GROQ : Model usage failed ....%s : %s", api_key[10:], model)
|
|
91
|
+
# # Uncomment if you want to stop code if 'model' fails
|
|
92
|
+
# raise UnknownError(f"GROQ : Model usage failed -> ....{api_key[10:]} : {model}")
|
|
93
|
+
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.error("GROQ : Model usage failed ....%s", api_key[10:])
|
|
96
|
+
if self.IN_DEPTH_DEBUG:
|
|
97
|
+
logger.debug("Exception details", exc_info=True)
|
|
98
|
+
# # Uncomment if you want to fail if api key fails
|
|
99
|
+
# raise UnknownError(f"GROQ : API Error Initialising or 1st Model in GROQ_LLM failed : with API key ....{api_key[10:]}")
|
|
100
|
+
# print("===============================================================")
|
|
101
|
+
|
|
102
|
+
else:
|
|
103
|
+
raise NoAPIKeysError(f"GROQ : Key ....{api_key[10:]} is missing or has an invalid format.")
|
|
104
|
+
|
|
105
|
+
if len(self.clients) == 0:
|
|
106
|
+
raise NoAPIKeysError("GROQ : No valid API keys found.")
|
|
107
|
+
|
|
108
|
+
self.models = list(GROQ_MODEL)
|
|
109
|
+
self.current_model_idx = 0
|
|
110
|
+
self.current_client_idx = 0
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _get_current(self):
|
|
114
|
+
with self._lock:
|
|
115
|
+
if self.DEBUG:
|
|
116
|
+
logger.info("Using Model: %s", self.models[self.current_model_idx])
|
|
117
|
+
return (
|
|
118
|
+
self.clients[self.current_client_idx],
|
|
119
|
+
self.models[self.current_model_idx]
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _rotate_client(self):
|
|
124
|
+
"""Move to next key. Returns True if a full cycle of all keys is complete."""
|
|
125
|
+
with self._lock:
|
|
126
|
+
self.current_client_idx += 1
|
|
127
|
+
if self.current_client_idx == len(self.clients):
|
|
128
|
+
self.current_client_idx = 0
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _rotate_model(self):
|
|
132
|
+
"""Move to next model. Returns True if all models are exhausted."""
|
|
133
|
+
with self._lock:
|
|
134
|
+
self.current_model_idx += 1
|
|
135
|
+
self.current_client_idx = 0
|
|
136
|
+
if self.current_model_idx == len(self.models):
|
|
137
|
+
self.current_model_idx = 0
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# return number of tokens left with current client
|
|
141
|
+
# if number of tokens left < 500, maybe for next response they are not enough, so rotate proactively
|
|
142
|
+
def _get_remaining_tokens(self, response) -> int:
|
|
143
|
+
tokens_str = response.headers.get('x-ratelimit-remaining-tokens')
|
|
144
|
+
if self.IN_DEPTH_DEBUG:
|
|
145
|
+
logger.debug("GROQ Tokens Remaining: %s", tokens_str)
|
|
146
|
+
return int(tokens_str) if tokens_str is not None else 9999
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def call(self, user_input, json_mode=False):
|
|
150
|
+
"""
|
|
151
|
+
Rotation strategy:
|
|
152
|
+
1. Try current key on current model.
|
|
153
|
+
2. On failure -> rotate key.
|
|
154
|
+
3. All keys exhausted on current model -> rotate model, reset keys.
|
|
155
|
+
4. All models exhausted -> return None (signals InitLLM to switch client).
|
|
156
|
+
"""
|
|
157
|
+
models_tried = 0
|
|
158
|
+
|
|
159
|
+
while models_tried < len(self.models):
|
|
160
|
+
keys_tried = 0
|
|
161
|
+
|
|
162
|
+
while keys_tried < len(self.clients):
|
|
163
|
+
client, model = self._get_current()
|
|
164
|
+
try:
|
|
165
|
+
kwargs = {
|
|
166
|
+
"model": model,
|
|
167
|
+
"temperature": self.temperature,
|
|
168
|
+
"max_tokens": self.max_output_tokens,
|
|
169
|
+
"messages": [
|
|
170
|
+
{"role": "system", "content": self.prompt},
|
|
171
|
+
{"role": "user", "content": user_input}
|
|
172
|
+
]
|
|
173
|
+
}
|
|
174
|
+
if json_mode:
|
|
175
|
+
kwargs["response_format"] = {"type": "json_object"}
|
|
176
|
+
|
|
177
|
+
response = client.chat.completions.with_raw_response.create(**kwargs)
|
|
178
|
+
|
|
179
|
+
if self._get_remaining_tokens(response) < 500:
|
|
180
|
+
if self.DEBUG:
|
|
181
|
+
logger.info("GROQ Low tokens on key %s, rotating proactively.", self.current_client_idx)
|
|
182
|
+
keys_tried += 1
|
|
183
|
+
self._rotate_client()
|
|
184
|
+
|
|
185
|
+
parsed = response.parse()
|
|
186
|
+
output_text = parsed.choices[0].message.content
|
|
187
|
+
|
|
188
|
+
if self.IN_DEPTH_DEBUG:
|
|
189
|
+
logger.debug("%s : %s", user_input, output_text)
|
|
190
|
+
|
|
191
|
+
if json_mode:
|
|
192
|
+
return json.loads(output_text)
|
|
193
|
+
return output_text
|
|
194
|
+
|
|
195
|
+
except Exception as e:
|
|
196
|
+
if self.IN_DEPTH_DEBUG:
|
|
197
|
+
logger.debug("GROQ Unknown error on key %s", self.current_client_idx, exc_info=True)
|
|
198
|
+
keys_tried += 1
|
|
199
|
+
self._rotate_client()
|
|
200
|
+
|
|
201
|
+
if self.IN_DEPTH_DEBUG:
|
|
202
|
+
logger.debug("GROQ All keys exhausted for model '%s'. Rotating model.", self.models[self.current_model_idx])
|
|
203
|
+
|
|
204
|
+
models_tried += 1
|
|
205
|
+
self._rotate_model()
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
if self.DEBUG:
|
|
209
|
+
logger.info("GROQ All keys x all models exhausted.")
|
|
210
|
+
return None
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
# ------------------------------------------- GEMINI -------------------------------------------
|
|
215
|
+
# cannot move proactively as gemini doesn't provide the TPM left
|
|
216
|
+
# check if it is giving response, if it stucks then rotate model
|
|
217
|
+
class GeminiLLM(LLM):
|
|
218
|
+
def __init__(self,GEMINI_MODEL, GEMINI_KEY, prompt="You are a helpful assistant", temperature=0.5, max_output_tokens=500,DEBUG=0,IN_DEPTH_DEBUG=0,test_mode=0):
|
|
219
|
+
if len(GEMINI_MODEL) == 0:
|
|
220
|
+
raise NoModelMentioned("GEMINI : No models listed in GEMINI_MODEL.")
|
|
221
|
+
if len(GEMINI_KEY) == 0:
|
|
222
|
+
raise NoAPIKeysError("GEMINI : No api key listed in GEMINI_KEYS.")
|
|
223
|
+
|
|
224
|
+
self.DEBUG = DEBUG
|
|
225
|
+
self.IN_DEPTH_DEBUG = IN_DEPTH_DEBUG
|
|
226
|
+
self.prompt = prompt
|
|
227
|
+
self.temperature = temperature
|
|
228
|
+
self.max_output_tokens = max_output_tokens
|
|
229
|
+
self._lock = threading.Lock()
|
|
230
|
+
self.clients = []
|
|
231
|
+
|
|
232
|
+
# storing all client so that we dont get any issue once clients are build
|
|
233
|
+
for api_key in GEMINI_KEY:
|
|
234
|
+
if api_key and api_key.startswith("AIz"):
|
|
235
|
+
|
|
236
|
+
# check if key is really valid or not -> pipeline doenst break inbetween
|
|
237
|
+
try:
|
|
238
|
+
client = genai.Client(api_key=api_key)
|
|
239
|
+
contents = [
|
|
240
|
+
types.Content(
|
|
241
|
+
role="user",
|
|
242
|
+
parts=[types.Part(text="Reply: ok")]
|
|
243
|
+
)
|
|
244
|
+
]
|
|
245
|
+
response = client.models.generate_content(
|
|
246
|
+
model=GEMINI_MODEL[0],
|
|
247
|
+
contents=contents,
|
|
248
|
+
config=types.GenerateContentConfig(
|
|
249
|
+
temperature=0.1,
|
|
250
|
+
max_output_tokens=100,
|
|
251
|
+
)
|
|
252
|
+
)
|
|
253
|
+
self.clients.append(client)
|
|
254
|
+
|
|
255
|
+
if self.IN_DEPTH_DEBUG:
|
|
256
|
+
logger.debug("GEMINI : Working Key ....%s", api_key[10:])
|
|
257
|
+
logger.debug("Output: %s", response.text)
|
|
258
|
+
|
|
259
|
+
# # What is the current api fails for 1/2 model and works on other -> just give a warning to user
|
|
260
|
+
if(test_mode):
|
|
261
|
+
for model in GEMINI_MODEL:
|
|
262
|
+
try:
|
|
263
|
+
response = client.models.generate_content(
|
|
264
|
+
model=model,
|
|
265
|
+
contents=contents,
|
|
266
|
+
config=types.GenerateContentConfig(
|
|
267
|
+
temperature=0.1,
|
|
268
|
+
max_output_tokens=100,
|
|
269
|
+
)
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
if self.IN_DEPTH_DEBUG:
|
|
273
|
+
logger.debug("GEMINI : Working Model %s", model)
|
|
274
|
+
logger.debug("Output: %s", response.text)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
except Exception as e:
|
|
278
|
+
logger.error("GEMINI : Model usage failed ....%s : %s", api_key[10:], model)
|
|
279
|
+
# # Uncomment if you want to stop in between if model fails
|
|
280
|
+
# raise UnknownError(f"GEMINI : Model usage failed ....{api_key[10:]} : {model}")
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
except Exception as e:
|
|
284
|
+
logger.error("GEMINI : API usage failed ....%s", api_key[10:])
|
|
285
|
+
if self.IN_DEPTH_DEBUG:
|
|
286
|
+
logger.debug("Exception details", exc_info=True)
|
|
287
|
+
# # Uncomment if you want to fail if api key fails
|
|
288
|
+
# raise UnknownError(f"GEMINI : API Error Initialising or 1st Model in GEMINI_LLM failed : with API key ....{api_key[10:]}")
|
|
289
|
+
# print("===============================================================")
|
|
290
|
+
|
|
291
|
+
else:
|
|
292
|
+
raise NoAPIKeysError(f"GEMINI : Key ....{api_key[10:]} is missing or has an invalid format.")
|
|
293
|
+
|
|
294
|
+
if len(self.clients) == 0:
|
|
295
|
+
raise NoAPIKeysError("GEMINI : No valid API keys found.")
|
|
296
|
+
|
|
297
|
+
self.models = list(GEMINI_MODEL)
|
|
298
|
+
self.current_model_idx = 0
|
|
299
|
+
self.current_client_idx = 0
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _get_current(self):
|
|
303
|
+
with self._lock:
|
|
304
|
+
if self.DEBUG:
|
|
305
|
+
logger.info("Using Model: %s", self.models[self.current_model_idx])
|
|
306
|
+
return (
|
|
307
|
+
self.clients[self.current_client_idx],
|
|
308
|
+
self.models[self.current_model_idx]
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def _rotate_client(self):
|
|
313
|
+
"""Move to next key. Returns True if a full cycle of all keys is complete."""
|
|
314
|
+
with self._lock:
|
|
315
|
+
self.current_client_idx += 1
|
|
316
|
+
if self.current_client_idx == len(self.clients):
|
|
317
|
+
self.current_client_idx = 0
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _rotate_model(self):
|
|
321
|
+
"""Move to next model. Returns True if all models are exhausted."""
|
|
322
|
+
with self._lock:
|
|
323
|
+
self.current_model_idx += 1
|
|
324
|
+
self.current_client_idx = 0
|
|
325
|
+
if self.current_model_idx == len(self.models):
|
|
326
|
+
self.current_model_idx = 0
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def call(self, user_input, json_mode=False):
|
|
330
|
+
"""
|
|
331
|
+
Rotation strategy:
|
|
332
|
+
1. Try current key on current model.
|
|
333
|
+
2. On failure -> rotate key.
|
|
334
|
+
3. All keys exhausted on current model -> rotate model, reset keys.
|
|
335
|
+
4. All models exhausted -> return None (signals InitLLM to switch client).
|
|
336
|
+
"""
|
|
337
|
+
models_tried = 0
|
|
338
|
+
|
|
339
|
+
while models_tried < len(self.models):
|
|
340
|
+
keys_tried = 0
|
|
341
|
+
|
|
342
|
+
while keys_tried < len(self.clients):
|
|
343
|
+
client, model = self._get_current()
|
|
344
|
+
try:
|
|
345
|
+
|
|
346
|
+
contents = [types.Content(role="user", parts=[types.Part(text=user_input)])]
|
|
347
|
+
config = types.GenerateContentConfig(
|
|
348
|
+
system_instruction=self.prompt,
|
|
349
|
+
temperature=self.temperature,
|
|
350
|
+
max_output_tokens=self.max_output_tokens,
|
|
351
|
+
response_mime_type="application/json" if json_mode else None
|
|
352
|
+
)
|
|
353
|
+
response = client.models.generate_content(model=model, contents=contents, config=config)
|
|
354
|
+
|
|
355
|
+
output_text = response.text
|
|
356
|
+
|
|
357
|
+
if self.IN_DEPTH_DEBUG:
|
|
358
|
+
logger.debug("%s : %s", user_input, output_text)
|
|
359
|
+
|
|
360
|
+
if json_mode:
|
|
361
|
+
return json.loads(output_text)
|
|
362
|
+
return output_text
|
|
363
|
+
|
|
364
|
+
except Exception as e:
|
|
365
|
+
if self.IN_DEPTH_DEBUG:
|
|
366
|
+
logger.debug("GEMINI Unknown error on key %s", self.current_client_idx, exc_info=True)
|
|
367
|
+
keys_tried += 1
|
|
368
|
+
self._rotate_client()
|
|
369
|
+
|
|
370
|
+
if self.IN_DEPTH_DEBUG:
|
|
371
|
+
logger.debug("GEMINI All keys exhausted for model '%s'. Rotating model.", self.models[self.current_model_idx])
|
|
372
|
+
|
|
373
|
+
models_tried += 1
|
|
374
|
+
self._rotate_model()
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
if self.DEBUG:
|
|
378
|
+
logger.info("GEMINI All keys x all models exhausted.")
|
|
379
|
+
return None
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
# ------------------------------------------- CEREBRAS -------------------------------------------
|
|
387
|
+
# cannot move proactively as gemini doesn't provide the TPM left
|
|
388
|
+
# check if it is giving response, if it stucks then rotate model
|
|
389
|
+
class CereBrasLLM(LLM):
|
|
390
|
+
def __init__(self, CEREBRAS_MODEL, CEREBRAS_KEY, prompt="You are a helpful assistant", temperature=0.5, max_output_tokens=500,DEBUG=0,IN_DEPTH_DEBUG=0, test_mode=0):
|
|
391
|
+
if len(CEREBRAS_MODEL) == 0:
|
|
392
|
+
raise NoModelMentioned("CEREBRAS : No models listed in CEREBRAS_MODEL.")
|
|
393
|
+
if len(CEREBRAS_KEY) == 0:
|
|
394
|
+
raise NoAPIKeysError("CEREBRAS : No api key listed in CEREBRAS_KEYS.")
|
|
395
|
+
|
|
396
|
+
self.DEBUG = DEBUG
|
|
397
|
+
self.IN_DEPTH_DEBUG = IN_DEPTH_DEBUG
|
|
398
|
+
self.prompt = prompt
|
|
399
|
+
self.temperature = temperature
|
|
400
|
+
self.max_output_tokens = max_output_tokens
|
|
401
|
+
self._lock = threading.Lock()
|
|
402
|
+
self.clients = []
|
|
403
|
+
|
|
404
|
+
# storing all client so that we dont get any issue once clients are build
|
|
405
|
+
for api_key in CEREBRAS_KEY:
|
|
406
|
+
if api_key and api_key.startswith("csk"):
|
|
407
|
+
|
|
408
|
+
# check if key is really valid or not -> pipeline doenst break inbetween
|
|
409
|
+
try:
|
|
410
|
+
client = Cerebras(api_key=api_key)
|
|
411
|
+
response = client.chat.completions.create(
|
|
412
|
+
model=CEREBRAS_MODEL[0],
|
|
413
|
+
max_completion_tokens=200,
|
|
414
|
+
temperature=0.2,
|
|
415
|
+
top_p=1,
|
|
416
|
+
messages=[
|
|
417
|
+
{
|
|
418
|
+
"role": "system",
|
|
419
|
+
"content": "Output exactly the requested text. No extra words."
|
|
420
|
+
},
|
|
421
|
+
{
|
|
422
|
+
"role": "user",
|
|
423
|
+
"content": "Return exactly: ok"
|
|
424
|
+
}
|
|
425
|
+
],
|
|
426
|
+
)
|
|
427
|
+
self.clients.append(client)
|
|
428
|
+
if self.IN_DEPTH_DEBUG:
|
|
429
|
+
logger.debug("CEREBRAS : Working Key ....%s", api_key[10:])
|
|
430
|
+
logger.debug("Output: %s", response.choices[0].message.content)
|
|
431
|
+
|
|
432
|
+
# # What is the current api fails for 1/2 model and works on other -> just give a warning to user
|
|
433
|
+
if(test_mode):
|
|
434
|
+
for model in CEREBRAS_MODEL:
|
|
435
|
+
try:
|
|
436
|
+
response = client.chat.completions.create(
|
|
437
|
+
model=model,
|
|
438
|
+
max_completion_tokens=200,
|
|
439
|
+
temperature=0.2,
|
|
440
|
+
top_p=1,
|
|
441
|
+
messages=[
|
|
442
|
+
{
|
|
443
|
+
"role": "system",
|
|
444
|
+
"content": "Output exactly the requested text. No extra words."
|
|
445
|
+
},
|
|
446
|
+
{
|
|
447
|
+
"role": "user",
|
|
448
|
+
"content": "Return exactly: ok"
|
|
449
|
+
}
|
|
450
|
+
],
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if self.IN_DEPTH_DEBUG:
|
|
454
|
+
logger.debug("CEREBRAS : Working Model %s", model)
|
|
455
|
+
logger.debug("Output: %s", response.choices[0].message.content)
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
except Exception as e:
|
|
459
|
+
logger.error("CEREBRAS : Model usage failed ....%s : %s", api_key[10:], model)
|
|
460
|
+
# # Uncomment if you want to stop code if 'model' fails
|
|
461
|
+
# raise UnknownError(f"CEREBRAS : Model usage failed ....{api_key[10:]} : {model}")
|
|
462
|
+
|
|
463
|
+
except Exception as e:
|
|
464
|
+
logger.error("CEREBRAS : Model usage failed ....%s", api_key[10:])
|
|
465
|
+
if self.IN_DEPTH_DEBUG:
|
|
466
|
+
logger.debug("Exception details", exc_info=True)
|
|
467
|
+
# # Uncomment if you want to fail if api key fails
|
|
468
|
+
# raise UnknownError(f"CEREBRAS : API Error Initialising or 1st Model in CEREBRAS_LLM failed : with API key ....{api_key[10:]}")
|
|
469
|
+
# print("===============================================================")
|
|
470
|
+
|
|
471
|
+
else:
|
|
472
|
+
raise NoAPIKeysError(f"CEREBRAS : Key ....{api_key[10:]} is missing or has an invalid format.")
|
|
473
|
+
|
|
474
|
+
if len(self.clients) == 0:
|
|
475
|
+
raise NoAPIKeysError("CEREBRAS : No valid API keys found.")
|
|
476
|
+
|
|
477
|
+
self.models = list(CEREBRAS_MODEL)
|
|
478
|
+
self.current_model_idx = 0
|
|
479
|
+
self.current_client_idx = 0
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def _get_current(self):
|
|
483
|
+
with self._lock:
|
|
484
|
+
if self.DEBUG:
|
|
485
|
+
logger.info("Using Model: %s", self.models[self.current_model_idx])
|
|
486
|
+
return (
|
|
487
|
+
self.clients[self.current_client_idx],
|
|
488
|
+
self.models[self.current_model_idx]
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def _rotate_client(self):
|
|
493
|
+
"""Move to next key. Returns True if a full cycle of all keys is complete."""
|
|
494
|
+
with self._lock:
|
|
495
|
+
self.current_client_idx += 1
|
|
496
|
+
if self.current_client_idx == len(self.clients):
|
|
497
|
+
self.current_client_idx = 0
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def _rotate_model(self):
|
|
501
|
+
"""Move to next model. Returns True if all models are exhausted."""
|
|
502
|
+
with self._lock:
|
|
503
|
+
self.current_model_idx += 1
|
|
504
|
+
self.current_client_idx = 0
|
|
505
|
+
if self.current_model_idx == len(self.models):
|
|
506
|
+
self.current_model_idx = 0
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
def call(self, user_input, json_mode=False):
|
|
510
|
+
"""
|
|
511
|
+
Rotation strategy:
|
|
512
|
+
1. Try current key on current model.
|
|
513
|
+
2. On failure -> rotate key.
|
|
514
|
+
3. All keys exhausted on current model -> rotate model, reset keys.
|
|
515
|
+
4. All models exhausted -> return None (signals InitLLM to switch client).
|
|
516
|
+
"""
|
|
517
|
+
models_tried = 0
|
|
518
|
+
|
|
519
|
+
while models_tried < len(self.models):
|
|
520
|
+
keys_tried = 0
|
|
521
|
+
|
|
522
|
+
while keys_tried < len(self.clients):
|
|
523
|
+
client, model = self._get_current()
|
|
524
|
+
try:
|
|
525
|
+
kwargs = {
|
|
526
|
+
"model": model,
|
|
527
|
+
"temperature": self.temperature,
|
|
528
|
+
"max_tokens": self.max_output_tokens,
|
|
529
|
+
"messages": [
|
|
530
|
+
{"role": "system", "content": self.prompt},
|
|
531
|
+
{"role": "user", "content": user_input}
|
|
532
|
+
]
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
if json_mode:
|
|
536
|
+
kwargs["response_format"] = {"type": "json_object"}
|
|
537
|
+
|
|
538
|
+
response = client.chat.completions.with_raw_response.create(**kwargs)
|
|
539
|
+
|
|
540
|
+
parsed_response = response.parse()
|
|
541
|
+
output_text = parsed_response.choices[0].message.content
|
|
542
|
+
|
|
543
|
+
if self.IN_DEPTH_DEBUG:
|
|
544
|
+
logger.debug("%s : %s", user_input, output_text)
|
|
545
|
+
|
|
546
|
+
if json_mode:
|
|
547
|
+
return json.loads(output_text)
|
|
548
|
+
return output_text
|
|
549
|
+
|
|
550
|
+
except Exception as e:
|
|
551
|
+
if self.IN_DEPTH_DEBUG:
|
|
552
|
+
logger.debug("CEREBRAS Unknown error on key %s", self.current_client_idx, exc_info=True)
|
|
553
|
+
keys_tried += 1
|
|
554
|
+
self._rotate_client()
|
|
555
|
+
|
|
556
|
+
if self.IN_DEPTH_DEBUG:
|
|
557
|
+
logger.debug("CEREBRAS All keys exhausted for model '%s'. Rotating model.", self.models[self.current_model_idx])
|
|
558
|
+
|
|
559
|
+
models_tried += 1
|
|
560
|
+
self._rotate_model()
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
if self.DEBUG:
|
|
564
|
+
logger.info("CEREBRAS All keys x all models exhausted.")
|
|
565
|
+
return None
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# File respnsible for handling LLMClients and handle dynamic model and client routing
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
from .LLMClients import LLM, GroqLLM, GeminiLLM, CereBrasLLM
|
|
5
|
+
from .Exceptions import AllClientsExhaustedError, AllModelsFailedError
|
|
6
|
+
import threading
|
|
7
|
+
|
|
8
|
+
CURR_DIR = "LLMOrchestrator"
|
|
9
|
+
|
|
10
|
+
class LLMOrchestrator:
|
|
11
|
+
"""
|
|
12
|
+
- Holds all LLM Client Object.
|
|
13
|
+
- Stays on current client until it returns None (fully exhausted).
|
|
14
|
+
- Moves to next client and stays there until it too is exhausted.
|
|
15
|
+
- Raises AllModelsFailedError only when every client is exhausted.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, groq=None, gemini=None, cerebras=None, debug = 0, verbose = 0, prompt="You are a helpful assistant", temperature=0.5, max_output_tokens=1000,test_mode=0):
|
|
19
|
+
self.clients: list[LLM] = []
|
|
20
|
+
if(groq and groq.get("groq_models") and groq.get("groq_keys")):
|
|
21
|
+
# if(debug):
|
|
22
|
+
# print("GROQ_KEYS: ",groq["groq_keys"])
|
|
23
|
+
self.clients.append(
|
|
24
|
+
GroqLLM(groq["groq_models"], groq["groq_keys"], prompt=prompt, temperature=temperature,max_output_tokens=max_output_tokens,DEBUG=debug,IN_DEPTH_DEBUG=verbose,test_mode=test_mode),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
if(gemini and gemini.get("gemini_models") and gemini.get("gemini_keys")):
|
|
28
|
+
# if(debug):
|
|
29
|
+
# print("GEMINI_KEYS: ",gemini["gemini_keys"])
|
|
30
|
+
self.clients.append(
|
|
31
|
+
GeminiLLM(gemini["gemini_models"], gemini["gemini_keys"], prompt=prompt, temperature=temperature,max_output_tokens=max_output_tokens,DEBUG=debug,IN_DEPTH_DEBUG=verbose,test_mode=test_mode),
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
if(cerebras and cerebras.get("cerebras_models") and cerebras.get("cerebras_keys")):
|
|
35
|
+
# if(debug):
|
|
36
|
+
# print("CEREBRAS_KEYS: ",cerebras["cerebras_keys"])
|
|
37
|
+
self.clients.append(
|
|
38
|
+
CereBrasLLM(cerebras["cerebras_models"], cerebras["cerebras_keys"], prompt=prompt, temperature=temperature,max_output_tokens=max_output_tokens,DEBUG=debug,IN_DEPTH_DEBUG=verbose,test_mode=test_mode),
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
if len(self.clients) == 0:
|
|
42
|
+
raise AllClientsExhaustedError("InitLLM : No LLM clients configured. Either models or keys are empty")
|
|
43
|
+
|
|
44
|
+
self.current_idx = 0
|
|
45
|
+
self.DEBUG = debug
|
|
46
|
+
self.IN_DEPTH_DEBUG = verbose
|
|
47
|
+
self._lock = threading.Lock()
|
|
48
|
+
|
|
49
|
+
def _rotate_client(self) -> bool:
|
|
50
|
+
"""Move to next available client. Returns False if all are exhausted."""
|
|
51
|
+
with self._lock:
|
|
52
|
+
self.current_idx += 1
|
|
53
|
+
return self.current_idx < len(self.clients)
|
|
54
|
+
|
|
55
|
+
def call(self, user_input, json_mode=False):
|
|
56
|
+
"""
|
|
57
|
+
Try current client. If it returns None (exhausted),
|
|
58
|
+
move to next and stay there. Repeat until all exhausted.
|
|
59
|
+
"""
|
|
60
|
+
while True:
|
|
61
|
+
with self._lock:
|
|
62
|
+
current_client = self.clients[self.current_idx]
|
|
63
|
+
|
|
64
|
+
result = current_client.call(user_input, json_mode=json_mode)
|
|
65
|
+
|
|
66
|
+
if result is not None:
|
|
67
|
+
return result
|
|
68
|
+
|
|
69
|
+
# Current client fully exhausted — move to next
|
|
70
|
+
if self.DEBUG:
|
|
71
|
+
print(f"[{CURR_DIR}] Client {self.current_idx} ({type(current_client).__name__}) exhausted. Switching.")
|
|
72
|
+
|
|
73
|
+
has_next = self._rotate_client()
|
|
74
|
+
|
|
75
|
+
if not has_next:
|
|
76
|
+
break
|
|
77
|
+
|
|
78
|
+
# instead of this we can put a while loop which breaks and raise error after 5 or 10 repetative failures
|
|
79
|
+
# cause every failure tells a story -> maybe that failure is no more a failure
|
|
80
|
+
# are bhaiya recover ho gaya hoga so (TPM resets after 1 min in groq sooo) (not in gemini)
|
|
81
|
+
raise AllModelsFailedError(
|
|
82
|
+
f"[{CURR_DIR}] All clients and models failed -> {CURR_DIR}"
|
|
83
|
+
)
|
polyrouter/__init__.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# goal -> keep it simple and light weight
|
|
2
|
+
# initialise the public scope classes which users can access
|
|
3
|
+
# metadata for the library
|
|
4
|
+
|
|
5
|
+
from .LLMOrchestrator import LLMOrchestrator
|
|
6
|
+
import logging
|
|
7
|
+
|
|
8
|
+
# prevent unexpected output if user dont want to see logs
|
|
9
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
10
|
+
|
|
11
|
+
from .Exceptions import (
|
|
12
|
+
AllModelsFailedError,
|
|
13
|
+
AllClientsExhaustedError,
|
|
14
|
+
ModelRateLimit,
|
|
15
|
+
InvalidAPIKey,
|
|
16
|
+
UnknownError,
|
|
17
|
+
NoModelMentioned,
|
|
18
|
+
NoAPIKeysError,
|
|
19
|
+
InvalidJSONResponseError
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Package Metadata
|
|
24
|
+
__version__ = "0.1.0"
|
|
25
|
+
__author__ = "Pratham Tomar"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# if something is not defines in __all__ it means that class is private for the users
|
|
30
|
+
# instead of from polyrouter.LLMOrchestrator import LLMOrchestrator -> polyrouter import LLMOrchestrator
|
|
31
|
+
__all__ = [
|
|
32
|
+
"LLMOrchestrator",
|
|
33
|
+
"AllModelsFailedError",
|
|
34
|
+
"AllClientsExhaustedError",
|
|
35
|
+
"ModelRateLimit",
|
|
36
|
+
"InvalidAPIKey",
|
|
37
|
+
"UnknownError",
|
|
38
|
+
"NoModelMentioned",
|
|
39
|
+
"NoAPIKeysError",
|
|
40
|
+
"InvalidJSONResponseError"
|
|
41
|
+
]
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: polyrouter
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A routing/orchestration library (adjust as needed)
|
|
5
|
+
Author-email: Pratham Tomar <prathamtomar1733@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.11
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: cerebras_cloud_sdk>=1.67.0
|
|
14
|
+
Requires-Dist: google-genai>=2.4.0
|
|
15
|
+
Requires-Dist: groq>=1.2.0
|
|
16
|
+
|
|
17
|
+
# PolyRouter
|
|
18
|
+
|
|
19
|
+

|
|
20
|
+

|
|
21
|
+

|
|
22
|
+
|
|
23
|
+
PolyRouter is a lightweight Python library that routes requests across multiple LLM providers. It helps applications achieve deterministic failover by rotating API keys, client providers, and model candidates when requests fail.
|
|
24
|
+
|
|
25
|
+
## Overview
|
|
26
|
+
|
|
27
|
+
This repository provides an orchestration layer that can be embedded in your application to manage provider rotation, API-key pools, and model fallbacks. Keys are loaded from the environment (see `.env`), and the orchestrator tries configured provider/model/key combinations until a request succeeds or all combinations are exhausted.
|
|
28
|
+
|
|
29
|
+
Example provider adapters included in this snapshot:
|
|
30
|
+
|
|
31
|
+
- Groq
|
|
32
|
+
- Google Gemini
|
|
33
|
+
- Cerebras
|
|
34
|
+
|
|
35
|
+
## Badges
|
|
36
|
+
|
|
37
|
+
| Badge | Meaning |
|
|
38
|
+
| ----------------------------------------------------------------------------------------------- | -------------------------------------- |
|
|
39
|
+
|  | Python implementation |
|
|
40
|
+
|  | Multi-provider failover design |
|
|
41
|
+
|  | Update once a formal license is chosen |
|
|
42
|
+
|
|
43
|
+
## Key Features
|
|
44
|
+
|
|
45
|
+
| Capability | Technical Detail |
|
|
46
|
+
| ------------------------- | -------------------------------------------------------------------------------------------------------------------------------- |
|
|
47
|
+
| Provider rotation | Requests can move across Groq, Gemini, and Cerebras client pools without changing application code. |
|
|
48
|
+
| Key pool management | Each provider can be backed by multiple API keys, allowing the runtime to continue when a single key expires or is rate-limited. |
|
|
49
|
+
| Model pool fallback | Ordered model lists in `config.py` act as a preference chain, so the router can try alternate models before surfacing a failure. |
|
|
50
|
+
| Debug visibility | `DEBUG` and `IN_DEPTH_DEBUG` control log verbosity so you can switch between concise operational logs and deep request tracing. |
|
|
51
|
+
| Centralized configuration | Provider counts, model lists, and debug mode live in one place instead of being duplicated across call sites. |
|
|
52
|
+
| Failure isolation | Provider-specific errors do not have to terminate the entire workflow if another valid key/model combination is still available. |
|
|
53
|
+
|
|
54
|
+
## How It Works
|
|
55
|
+
|
|
56
|
+
```mermaid
|
|
57
|
+
flowchart TD
|
|
58
|
+
A[Application request] --> B[Load config.py]
|
|
59
|
+
B --> C[Load .env API keys]
|
|
60
|
+
C --> D[Try primary provider]
|
|
61
|
+
D --> E{Request succeeds?}
|
|
62
|
+
E -->|Yes| F[Return response]
|
|
63
|
+
E -->|No| G[Rotate key / model / client]
|
|
64
|
+
G --> H{Any combinations left?}
|
|
65
|
+
H -->|Yes| D
|
|
66
|
+
H -->|No| I[Raise exhaustion error]
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
The intended behavior is simple:
|
|
70
|
+
|
|
71
|
+
1. Read provider preferences, model lists, and key counts from `config.py`.
|
|
72
|
+
2. Load API credentials from the environment.
|
|
73
|
+
3. Attempt a request with the active provider/model combination.
|
|
74
|
+
4. On provider failure, rotate through the next key or model.
|
|
75
|
+
5. When a provider pool is exhausted, move to the next client family.
|
|
76
|
+
6. Stop only when every configured combination has been tried.
|
|
77
|
+
|
|
78
|
+
> The repo is built for operational resilience, not for single-provider purity.
|
|
79
|
+
|
|
80
|
+
## Project Structure
|
|
81
|
+
|
|
82
|
+
```text
|
|
83
|
+
PolyRouter/
|
|
84
|
+
├── examples/ # Example usage scenarios
|
|
85
|
+
│ └── basic_usage.py
|
|
86
|
+
├── polyrouter/ # Library source
|
|
87
|
+
│ ├── Exceptions.py
|
|
88
|
+
│ ├── LLMClients.py
|
|
89
|
+
│ ├── LLMOrchestrator.py
|
|
90
|
+
│ └── __init__.py
|
|
91
|
+
├── .env # (example present in repo snapshot)
|
|
92
|
+
├── requirements-dev.txt # Development / runtime deps
|
|
93
|
+
└── README.md
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Installation
|
|
97
|
+
|
|
98
|
+
Local setup (recommended):
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
git clone <repository-url>
|
|
102
|
+
cd PolyRouter
|
|
103
|
+
python3 -m venv .venv
|
|
104
|
+
source .venv/bin/activate
|
|
105
|
+
pip install --upgrade pip
|
|
106
|
+
pip install -r requirements-dev.txt
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Environment variables
|
|
110
|
+
|
|
111
|
+
This repo includes a `.env` file in the snapshot. In normal usage copy and populate a local `.env` (do not commit secrets):
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
cp .env .env.local
|
|
115
|
+
# edit .env.local and export provider API keys, e.g.:
|
|
116
|
+
# GROQ_API_KEY0=...
|
|
117
|
+
# GEMINI_API_KEY0=...
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
The examples use environment variables named like `GROQ_API_KEY0`, `GEMINI_API_KEY0`, etc.
|
|
121
|
+
|
|
122
|
+
### Dependencies
|
|
123
|
+
|
|
124
|
+
Dependencies used by examples and adapters may include provider SDKs and `python-dotenv`. Install via `requirements-dev.txt`.
|
|
125
|
+
|
|
126
|
+
<details>
|
|
127
|
+
<summary>Build / verification steps</summary>
|
|
128
|
+
|
|
129
|
+
This repository is a Python library-style project rather than a packaged service, so the essential validation step is import verification plus a smoke test in your host application.
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
python -m compileall .
|
|
133
|
+
python - <<'PY'
|
|
134
|
+
from config import DEBUG, IN_DEPTH_DEBUG, GROQ_MODEL
|
|
135
|
+
print("config loaded:", DEBUG, IN_DEPTH_DEBUG, GROQ_MODEL)
|
|
136
|
+
PY
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
</details>
|
|
140
|
+
|
|
141
|
+
## Configuration
|
|
142
|
+
|
|
143
|
+
`config.py` is the primary customization point.
|
|
144
|
+
|
|
145
|
+
| Setting | Role |
|
|
146
|
+
| ---------------- | ------------------------------------------------------------ |
|
|
147
|
+
| `DEBUG` | Enables the main debug statement stream. |
|
|
148
|
+
| `IN_DEPTH_DEBUG` | Enables detailed trace output for low-level troubleshooting. |
|
|
149
|
+
| `GROQ_MODEL` | Ordered Groq model preference list. |
|
|
150
|
+
| `GEMINI_MODEL` | Ordered Gemini model preference list. |
|
|
151
|
+
| `CEREBRAS_MODEL` | Ordered Cerebras model preference list. |
|
|
152
|
+
| `GROQ_KEY` | Number of Groq API keys to scan. |
|
|
153
|
+
| `GEMINI_KEY` | Number of Gemini API keys to scan. |
|
|
154
|
+
| `CEREBRAS_KEY` | Number of Cerebras API keys to scan. |
|
|
155
|
+
|
|
156
|
+
Recommended operating model:
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
DEBUG = 1
|
|
160
|
+
IN_DEPTH_DEBUG = 0
|
|
161
|
+
|
|
162
|
+
GROQ_MODEL = ["openai/gpt-oss-120b", "llama-3.3-70b-versatile"]
|
|
163
|
+
GEMINI_MODEL = ["gemini-2.5-flash"]
|
|
164
|
+
CEREBRAS_MODEL = ["gpt-oss-120b"]
|
|
165
|
+
|
|
166
|
+
GROQ_KEY = 2
|
|
167
|
+
GEMINI_KEY = 1
|
|
168
|
+
CEREBRAS_KEY = 1
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Usage
|
|
172
|
+
|
|
173
|
+
See `examples/basic_usage.py` for a minimal example. The orchestrator can be constructed directly in your application; you do not need a `config.py` file if you prefer to pass provider settings programmatically.
|
|
174
|
+
|
|
175
|
+
Minimal usage:
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
from dotenv import load_dotenv
|
|
179
|
+
from polyrouter.LLMOrchestrator import LLMOrchestrator
|
|
180
|
+
import os
|
|
181
|
+
|
|
182
|
+
load_dotenv()
|
|
183
|
+
|
|
184
|
+
llm = LLMOrchestrator(
|
|
185
|
+
groq={
|
|
186
|
+
"groq_models": ["openai/gpt-oss-120b"],
|
|
187
|
+
"groq_keys": [os.getenv("GROQ_API_KEY0")],
|
|
188
|
+
},
|
|
189
|
+
debug=True,
|
|
190
|
+
verbose=True,
|
|
191
|
+
prompt="You are a helpful assistant",
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
response = llm.request()
|
|
195
|
+
print(response)
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
When `debug`/`verbose` are enabled the orchestrator prints provider, model and key selection and rotation decisions.
|
|
199
|
+
|
|
200
|
+
## API / CLI
|
|
201
|
+
|
|
202
|
+
No standalone CLI or HTTP API is exposed in this repository snapshot.
|
|
203
|
+
|
|
204
|
+
The public surface is intentionally library-oriented:
|
|
205
|
+
|
|
206
|
+
- `config.py` controls behavior
|
|
207
|
+
- `LLMClients.py` defines the client abstraction
|
|
208
|
+
- `LLMOrchestrator.py` is the orchestration boundary
|
|
209
|
+
|
|
210
|
+
If you add a CLI later, document it here with exact command syntax and exit codes.
|
|
211
|
+
|
|
212
|
+
## Deployment
|
|
213
|
+
|
|
214
|
+
Because this project is a routing library, deployment usually means shipping it as part of a larger Python service or worker.
|
|
215
|
+
|
|
216
|
+
### Recommended deployment checklist
|
|
217
|
+
|
|
218
|
+
1. Pin dependencies with `requirements.txt`.
|
|
219
|
+
2. Inject secrets through the runtime environment, not source control.
|
|
220
|
+
3. Set `DEBUG = 0` and `IN_DEPTH_DEBUG = 0` for production unless you are actively diagnosing issues.
|
|
221
|
+
4. Validate all required API keys are present before starting the process.
|
|
222
|
+
5. Run the host service behind your preferred process manager, container runtime, or platform scheduler.
|
|
223
|
+
|
|
224
|
+
### Containerized deployment
|
|
225
|
+
|
|
226
|
+
If you package the project into a container, copy only the source files, install requirements, and mount secrets through environment variables or secret storage.
|
|
227
|
+
|
|
228
|
+
```dockerfile
|
|
229
|
+
FROM python:3.11-slim
|
|
230
|
+
|
|
231
|
+
WORKDIR /app
|
|
232
|
+
COPY requirements.txt .
|
|
233
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
234
|
+
|
|
235
|
+
COPY . .
|
|
236
|
+
CMD ["python", "-c", "import config; print('LLM-Gateway-Service ready')"]
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
## Screenshots
|
|
240
|
+
|
|
241
|
+
> Screenshot placeholder: add an architecture diagram or runtime trace capture here once the project has a visual demo surface.
|
|
242
|
+
|
|
243
|
+
Suggested assets for a production repository:
|
|
244
|
+
|
|
245
|
+
- request-routing diagram
|
|
246
|
+
- provider rotation log snippet
|
|
247
|
+
- environment setup screenshot
|
|
248
|
+
|
|
249
|
+
## Troubleshooting
|
|
250
|
+
|
|
251
|
+
| Symptom | Likely Cause | Resolution |
|
|
252
|
+
| -------------------------------------- | ------------------------------------------------------------ | -------------------------------------------------------------------------- |
|
|
253
|
+
| Import error for a provider SDK | Dependencies are missing from the active virtual environment | Re-run `pip install -r requirements.txt` inside the activated environment. |
|
|
254
|
+
| Requests stop after one provider fails | No fallback keys or models are configured | Add more keys to `.env` and expand the model pool in `config.py`. |
|
|
255
|
+
| All requests fail immediately | Environment variables are missing or misnamed | Verify `.env` matches `.env.template` exactly. |
|
|
256
|
+
| Debug logs are too noisy | Verbosity flags are enabled | Set `DEBUG = 0` and `IN_DEPTH_DEBUG = 0` for normal operation. |
|
|
257
|
+
| A specific model keeps failing | The model is unsupported, rate-limited, or exhausted | Remove it from the preference list or move it later in the rotation order. |
|
|
258
|
+
|
|
259
|
+
## Contributing
|
|
260
|
+
|
|
261
|
+
Contributions are welcome if they improve correctness, observability, or provider coverage.
|
|
262
|
+
|
|
263
|
+
Please keep pull requests focused and include:
|
|
264
|
+
|
|
265
|
+
- a concise description of the routing behavior being changed
|
|
266
|
+
- reproduction steps for any failure-handling update
|
|
267
|
+
- updates to `.env.template` and `config.py` when configuration contracts change
|
|
268
|
+
- tests or a clear validation checklist when the orchestration flow changes
|
|
269
|
+
|
|
270
|
+
Guidelines:
|
|
271
|
+
|
|
272
|
+
1. Do not hard-code secrets.
|
|
273
|
+
2. Preserve backward-compatible configuration names whenever possible.
|
|
274
|
+
3. Keep provider rotation behavior deterministic and well logged.
|
|
275
|
+
4. Prefer small, isolated changes to client adapters and error handling.
|
|
276
|
+
|
|
277
|
+
## Roadmap
|
|
278
|
+
|
|
279
|
+
Planned improvements that would strengthen the project further:
|
|
280
|
+
|
|
281
|
+
- formal public orchestration API with documented inputs and return types
|
|
282
|
+
- structured logging with request IDs and provider attempt history
|
|
283
|
+
- health checks for provider pools and exhausted key detection
|
|
284
|
+
- test coverage for failover, invalid-key handling, and model rotation
|
|
285
|
+
- optional CLI for smoke testing provider credentials
|
|
286
|
+
- metrics hooks for success rate, fallback rate, and exhaustion rate
|
|
287
|
+
|
|
288
|
+
## Acknowledgements
|
|
289
|
+
|
|
290
|
+
LLM-Gateway-Service builds on the ecosystem provided by:
|
|
291
|
+
|
|
292
|
+
- Groq
|
|
293
|
+
- Google Gemini / Google Gen AI SDK
|
|
294
|
+
- Cerebras Cloud SDK
|
|
295
|
+
- python-dotenv
|
|
296
|
+
- tenacity
|
|
297
|
+
|
|
298
|
+
It also follows a common open-source reliability pattern: fail over without forcing callers to understand vendor-specific error recovery.
|
|
299
|
+
|
|
300
|
+
## License
|
|
301
|
+
|
|
302
|
+
License: TBD.
|
|
303
|
+
|
|
304
|
+
Add the repository's chosen license here once it is finalized, and keep the license file in sync with this section.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
polyrouter/Exceptions.py,sha256=4JmiViSyMJWBZ84zfBLeMGFtPTJI66vBBHCiDqO_KIc,1089
|
|
2
|
+
polyrouter/LLMClients.py,sha256=KA9zXQcchds73Sd3LCNEtJEIwDuduDKVau2YorwVPUk,24048
|
|
3
|
+
polyrouter/LLMOrchestrator.py,sha256=_Q_SRd5k0ygTFOUhiSF1GEzYeCRuBr61bRoysIem00U,3750
|
|
4
|
+
polyrouter/__init__.py,sha256=vtX_PhGGQ1DUgwtzCsIG7C9TyH5sz-SZMNDhnma1tRg,1031
|
|
5
|
+
polyrouter-1.0.0.dist-info/METADATA,sha256=7cs2hNvmz5FTS57DdNZXtQJVprktkSdpOxR7579EBpA,12341
|
|
6
|
+
polyrouter-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
7
|
+
polyrouter-1.0.0.dist-info/top_level.txt,sha256=xNqKkGjeByTilrvhWqnm-htkJmity5rtql6EtlznTqM,11
|
|
8
|
+
polyrouter-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
polyrouter
|