fj-llm 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fj_llm/__init__.py +9 -0
- fj_llm/client.py +405 -0
- fj_llm/ping.py +112 -0
- fj_llm-0.2.0.dist-info/METADATA +97 -0
- fj_llm-0.2.0.dist-info/RECORD +8 -0
- fj_llm-0.2.0.dist-info/WHEEL +5 -0
- fj_llm-0.2.0.dist-info/entry_points.txt +3 -0
- fj_llm-0.2.0.dist-info/top_level.txt +1 -0
fj_llm/__init__.py
ADDED
fj_llm/client.py
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Unified LLM Client Interface
|
|
4
|
+
Supports multiple LLM providers with a consistent API
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import time
|
|
11
|
+
import yaml
|
|
12
|
+
import requests
|
|
13
|
+
from typing import Dict, Any, Optional
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
_logger = logging.getLogger("fj_llm")
|
|
19
|
+
|
|
20
|
+
_DEFAULT_COST_LOG = Path("~/.local/share/fj_llm/costs.jsonl").expanduser()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class QuotaExceededError(Exception):
|
|
24
|
+
"""API quota exhausted — non-retryable, triggers fallback model."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class LLMResponse:
|
|
30
|
+
"""Standardized response format"""
|
|
31
|
+
content: str
|
|
32
|
+
model: str
|
|
33
|
+
input_tokens: Optional[int] = None
|
|
34
|
+
output_tokens: Optional[int] = None
|
|
35
|
+
total_tokens: Optional[int] = None
|
|
36
|
+
cost: Optional[float] = None
|
|
37
|
+
success: bool = True
|
|
38
|
+
error: Optional[str] = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LLMClient:
|
|
42
|
+
def __init__(self, config_path: str = "~/.config/fj_llm/config.yaml", config_dict: Optional[Dict] = None):
|
|
43
|
+
"""
|
|
44
|
+
Initialize LLM Client with flexible config loading.
|
|
45
|
+
Priority: config_dict → FJ_LLM_CONFIG env var → config file
|
|
46
|
+
"""
|
|
47
|
+
self.session = requests.Session()
|
|
48
|
+
|
|
49
|
+
if config_dict:
|
|
50
|
+
self.config = config_dict
|
|
51
|
+
else:
|
|
52
|
+
self.config_path = Path(config_path).expanduser()
|
|
53
|
+
self.config = self._load_config()
|
|
54
|
+
|
|
55
|
+
cost_log_env = os.getenv("FJ_LLM_COST_LOG")
|
|
56
|
+
cost_log_cfg = self.config.get("cost_log")
|
|
57
|
+
if cost_log_env:
|
|
58
|
+
self._cost_log = Path(cost_log_env)
|
|
59
|
+
elif cost_log_cfg:
|
|
60
|
+
self._cost_log = Path(cost_log_cfg).expanduser()
|
|
61
|
+
else:
|
|
62
|
+
self._cost_log = _DEFAULT_COST_LOG
|
|
63
|
+
|
|
64
|
+
def _load_config(self) -> Dict[str, Any]:
|
|
65
|
+
env_config = os.getenv("FJ_LLM_CONFIG")
|
|
66
|
+
if env_config:
|
|
67
|
+
try:
|
|
68
|
+
return json.loads(env_config)
|
|
69
|
+
except json.JSONDecodeError as e:
|
|
70
|
+
_logger.warning("Failed to parse FJ_LLM_CONFIG: %s", e)
|
|
71
|
+
|
|
72
|
+
if not self.config_path.exists():
|
|
73
|
+
self._create_default_config()
|
|
74
|
+
|
|
75
|
+
with open(self.config_path, "r") as f:
|
|
76
|
+
return yaml.safe_load(f)
|
|
77
|
+
|
|
78
|
+
def _create_default_config(self):
|
|
79
|
+
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
80
|
+
|
|
81
|
+
default_config = {
|
|
82
|
+
"models": {
|
|
83
|
+
"deepseek-light": {
|
|
84
|
+
"provider": "deepseek",
|
|
85
|
+
"model_name": "deepseek-chat",
|
|
86
|
+
"api_key_env": "DEEPSEEK_API_KEY",
|
|
87
|
+
"base_url": "https://api.deepseek.com/v1",
|
|
88
|
+
"max_tokens": 4000,
|
|
89
|
+
"temperature": 0.1,
|
|
90
|
+
"pricing": {
|
|
91
|
+
"input_per_1m_tokens": 0.14,
|
|
92
|
+
"output_per_1m_tokens": 0.28,
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
"deepseek-reasoner": {
|
|
96
|
+
"provider": "deepseek",
|
|
97
|
+
"model_name": "deepseek-reasoner",
|
|
98
|
+
"api_key_env": "DEEPSEEK_API_KEY",
|
|
99
|
+
"base_url": "https://api.deepseek.com/v1",
|
|
100
|
+
"max_tokens": 8000,
|
|
101
|
+
"temperature": 0.0,
|
|
102
|
+
"pricing": {
|
|
103
|
+
"input_per_1m_tokens": 55.0,
|
|
104
|
+
"output_per_1m_tokens": 55.0,
|
|
105
|
+
},
|
|
106
|
+
},
|
|
107
|
+
"claude-sonnet": {
|
|
108
|
+
"provider": "anthropic",
|
|
109
|
+
"model_name": "claude-sonnet-4-6",
|
|
110
|
+
"api_key_env": "ANTHROPIC_API_KEY",
|
|
111
|
+
"base_url": "https://api.anthropic.com/v1",
|
|
112
|
+
"max_tokens": 4000,
|
|
113
|
+
"temperature": 0.1,
|
|
114
|
+
"pricing": {
|
|
115
|
+
"input_per_1m_tokens": 3.0,
|
|
116
|
+
"output_per_1m_tokens": 15.0,
|
|
117
|
+
},
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
"defaults": {
|
|
121
|
+
"retry_attempts": 3,
|
|
122
|
+
"retry_delay": 1.0,
|
|
123
|
+
"timeout": 30,
|
|
124
|
+
},
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
with open(self.config_path, "w") as f:
|
|
128
|
+
yaml.dump(default_config, f, default_flow_style=False)
|
|
129
|
+
|
|
130
|
+
print(f"Created default config at {self.config_path}")
|
|
131
|
+
print("Please update with your API keys!")
|
|
132
|
+
|
|
133
|
+
def query(self, model_alias: str, prompt: str, context: Optional[str] = None) -> LLMResponse:
|
|
134
|
+
"""Unified LLM query with automatic timing, logging, and cost tracking."""
|
|
135
|
+
t0 = time.time()
|
|
136
|
+
response = self._do_query(model_alias, prompt, context)
|
|
137
|
+
latency_ms = int((time.time() - t0) * 1000)
|
|
138
|
+
|
|
139
|
+
if response.success:
|
|
140
|
+
_logger.info(
|
|
141
|
+
"llm/%s ok — %d in + %d out tokens, $%.6f, %dms",
|
|
142
|
+
model_alias, response.input_tokens or 0, response.output_tokens or 0,
|
|
143
|
+
response.cost or 0.0, latency_ms,
|
|
144
|
+
)
|
|
145
|
+
self._append_cost_log(model_alias, response, latency_ms)
|
|
146
|
+
else:
|
|
147
|
+
_logger.error("llm/%s failed — %s", model_alias, response.error)
|
|
148
|
+
|
|
149
|
+
return response
|
|
150
|
+
|
|
151
|
+
def _append_cost_log(self, alias: str, response: "LLMResponse", latency_ms: int) -> None:
|
|
152
|
+
try:
|
|
153
|
+
self._cost_log.parent.mkdir(parents=True, exist_ok=True)
|
|
154
|
+
record = {
|
|
155
|
+
"ts": datetime.now(timezone.utc).isoformat(),
|
|
156
|
+
"alias": alias,
|
|
157
|
+
"model": response.model,
|
|
158
|
+
"input_tokens": response.input_tokens,
|
|
159
|
+
"output_tokens": response.output_tokens,
|
|
160
|
+
"cost_usd": response.cost,
|
|
161
|
+
"latency_ms": latency_ms,
|
|
162
|
+
}
|
|
163
|
+
with open(self._cost_log, "a") as f:
|
|
164
|
+
f.write(json.dumps(record) + "\n")
|
|
165
|
+
except Exception:
|
|
166
|
+
pass # never crash the caller over logging
|
|
167
|
+
|
|
168
|
+
def _do_query(self, model_alias: str, prompt: str, context: Optional[str] = None, _is_fallback: bool = False) -> LLMResponse:
|
|
169
|
+
if model_alias not in self.config["models"]:
|
|
170
|
+
return LLMResponse(content="", model=model_alias, success=False, error=f"Unknown model alias: {model_alias}")
|
|
171
|
+
|
|
172
|
+
model_config = self.config["models"][model_alias]
|
|
173
|
+
|
|
174
|
+
full_prompt = prompt
|
|
175
|
+
if context:
|
|
176
|
+
full_prompt = f"Context:\n{context}\n\nTask:\n{prompt}"
|
|
177
|
+
|
|
178
|
+
for attempt in range(self.config["defaults"]["retry_attempts"]):
|
|
179
|
+
try:
|
|
180
|
+
provider = model_config["provider"]
|
|
181
|
+
if provider == "deepseek":
|
|
182
|
+
response = self._query_deepseek(model_config, full_prompt)
|
|
183
|
+
elif provider == "anthropic":
|
|
184
|
+
response = self._query_anthropic(model_config, full_prompt)
|
|
185
|
+
elif provider == "openai":
|
|
186
|
+
response = self._query_openai(model_config, full_prompt)
|
|
187
|
+
elif provider in ("google", "gemini"):
|
|
188
|
+
response = self._query_google(model_config, full_prompt)
|
|
189
|
+
else:
|
|
190
|
+
return LLMResponse(content="", model=model_alias, success=False, error=f"Unsupported provider: {provider}")
|
|
191
|
+
|
|
192
|
+
if response.success:
|
|
193
|
+
return response
|
|
194
|
+
|
|
195
|
+
except QuotaExceededError as e:
|
|
196
|
+
fallback = model_config.get("fallback")
|
|
197
|
+
if fallback and not _is_fallback:
|
|
198
|
+
_logger.warning("llm/%s quota exceeded, falling back to %s", model_alias, fallback)
|
|
199
|
+
return self._do_query(fallback, prompt, context, _is_fallback=True)
|
|
200
|
+
return LLMResponse(content="", model=model_alias, success=False, error=f"Quota exceeded, no fallback: {e}")
|
|
201
|
+
|
|
202
|
+
except Exception as e:
|
|
203
|
+
if attempt == self.config["defaults"]["retry_attempts"] - 1:
|
|
204
|
+
return LLMResponse(content="", model=model_alias, success=False, error=f"Failed after {attempt + 1} attempts: {e}")
|
|
205
|
+
time.sleep(self.config["defaults"]["retry_delay"] * (2 ** attempt))
|
|
206
|
+
|
|
207
|
+
return LLMResponse(content="", model=model_alias, success=False, error="Max retries exceeded")
|
|
208
|
+
|
|
209
|
+
def _query_deepseek(self, model_config: Dict, prompt: str) -> LLMResponse:
|
|
210
|
+
api_key = os.getenv(model_config["api_key_env"])
|
|
211
|
+
if not api_key:
|
|
212
|
+
raise ValueError(f"API key not found: {model_config['api_key_env']}")
|
|
213
|
+
|
|
214
|
+
payload = {
|
|
215
|
+
"model": model_config["model_name"],
|
|
216
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
217
|
+
"max_tokens": model_config["max_tokens"],
|
|
218
|
+
"temperature": model_config["temperature"],
|
|
219
|
+
}
|
|
220
|
+
resp = self.session.post(
|
|
221
|
+
f"{model_config['base_url']}/chat/completions",
|
|
222
|
+
json=payload,
|
|
223
|
+
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
|
224
|
+
timeout=self.config["defaults"]["timeout"],
|
|
225
|
+
)
|
|
226
|
+
if resp.status_code != 200:
|
|
227
|
+
raise Exception(f"API error: {resp.status_code} - {resp.text}")
|
|
228
|
+
|
|
229
|
+
data = resp.json()
|
|
230
|
+
usage = data.get("usage", {})
|
|
231
|
+
input_tokens = usage.get("prompt_tokens", 0)
|
|
232
|
+
output_tokens = usage.get("completion_tokens", 0)
|
|
233
|
+
return LLMResponse(
|
|
234
|
+
content=data["choices"][0]["message"]["content"],
|
|
235
|
+
model=model_config["model_name"],
|
|
236
|
+
input_tokens=input_tokens,
|
|
237
|
+
output_tokens=output_tokens,
|
|
238
|
+
total_tokens=usage.get("total_tokens", input_tokens + output_tokens),
|
|
239
|
+
cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
|
|
240
|
+
success=True,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def _query_openai(self, model_config: Dict, prompt: str) -> LLMResponse:
|
|
244
|
+
api_key = os.getenv(model_config["api_key_env"])
|
|
245
|
+
if not api_key:
|
|
246
|
+
raise ValueError(f"API key not found: {model_config['api_key_env']}")
|
|
247
|
+
|
|
248
|
+
model_name = model_config["model_name"]
|
|
249
|
+
payload: Dict[str, Any] = {
|
|
250
|
+
"model": model_name,
|
|
251
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
252
|
+
"max_completion_tokens": model_config["max_tokens"],
|
|
253
|
+
}
|
|
254
|
+
# o-series reasoning models don't accept temperature
|
|
255
|
+
if not (model_name[0] == "o" and model_name[1:2].isdigit()):
|
|
256
|
+
payload["temperature"] = model_config["temperature"]
|
|
257
|
+
|
|
258
|
+
resp = self.session.post(
|
|
259
|
+
f"{model_config.get('base_url', 'https://api.openai.com/v1')}/chat/completions",
|
|
260
|
+
json=payload,
|
|
261
|
+
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
|
262
|
+
timeout=self.config["defaults"]["timeout"],
|
|
263
|
+
)
|
|
264
|
+
if resp.status_code != 200:
|
|
265
|
+
if resp.status_code == 429:
|
|
266
|
+
try:
|
|
267
|
+
if resp.json().get("error", {}).get("code") == "insufficient_quota":
|
|
268
|
+
raise QuotaExceededError(f"OpenAI quota exceeded ({model_name})")
|
|
269
|
+
except (ValueError, KeyError):
|
|
270
|
+
pass
|
|
271
|
+
raise Exception(f"API error: {resp.status_code} - {resp.text}")
|
|
272
|
+
|
|
273
|
+
data = resp.json()
|
|
274
|
+
usage = data.get("usage", {})
|
|
275
|
+
input_tokens = usage.get("prompt_tokens", 0)
|
|
276
|
+
output_tokens = usage.get("completion_tokens", 0)
|
|
277
|
+
return LLMResponse(
|
|
278
|
+
content=data["choices"][0]["message"]["content"],
|
|
279
|
+
model=model_name,
|
|
280
|
+
input_tokens=input_tokens,
|
|
281
|
+
output_tokens=output_tokens,
|
|
282
|
+
total_tokens=usage.get("total_tokens", input_tokens + output_tokens),
|
|
283
|
+
cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
|
|
284
|
+
success=True,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
def _query_anthropic(self, model_config: Dict, prompt: str) -> LLMResponse:
|
|
288
|
+
api_key = os.getenv(model_config["api_key_env"])
|
|
289
|
+
if not api_key:
|
|
290
|
+
raise ValueError(f"API key not found: {model_config['api_key_env']}")
|
|
291
|
+
|
|
292
|
+
resp = self.session.post(
|
|
293
|
+
f"{model_config['base_url']}/messages",
|
|
294
|
+
json={
|
|
295
|
+
"model": model_config["model_name"],
|
|
296
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
297
|
+
"max_tokens": model_config["max_tokens"],
|
|
298
|
+
"temperature": model_config["temperature"],
|
|
299
|
+
},
|
|
300
|
+
headers={
|
|
301
|
+
"x-api-key": api_key,
|
|
302
|
+
"Content-Type": "application/json",
|
|
303
|
+
"anthropic-version": "2023-06-01",
|
|
304
|
+
},
|
|
305
|
+
timeout=self.config["defaults"]["timeout"],
|
|
306
|
+
)
|
|
307
|
+
if resp.status_code != 200:
|
|
308
|
+
if resp.status_code in (400, 429):
|
|
309
|
+
try:
|
|
310
|
+
err = resp.json().get("error", {})
|
|
311
|
+
if "credit balance" in err.get("message", "") or err.get("type") == "insufficient_quota":
|
|
312
|
+
raise QuotaExceededError(f"Anthropic quota exhausted ({model_config['model_name']})")
|
|
313
|
+
except (ValueError, KeyError):
|
|
314
|
+
pass
|
|
315
|
+
raise Exception(f"API error: {resp.status_code} - {resp.text}")
|
|
316
|
+
|
|
317
|
+
data = resp.json()
|
|
318
|
+
usage = data.get("usage", {})
|
|
319
|
+
input_tokens = usage.get("input_tokens", 0)
|
|
320
|
+
output_tokens = usage.get("output_tokens", 0)
|
|
321
|
+
return LLMResponse(
|
|
322
|
+
content=data["content"][0]["text"],
|
|
323
|
+
model=model_config["model_name"],
|
|
324
|
+
input_tokens=input_tokens,
|
|
325
|
+
output_tokens=output_tokens,
|
|
326
|
+
total_tokens=input_tokens + output_tokens,
|
|
327
|
+
cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
|
|
328
|
+
success=True,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
def _query_google(self, model_config: Dict, prompt: str) -> LLMResponse:
|
|
332
|
+
api_key = os.getenv(model_config["api_key_env"])
|
|
333
|
+
if not api_key:
|
|
334
|
+
raise ValueError(f"API key not found: {model_config['api_key_env']}")
|
|
335
|
+
|
|
336
|
+
url = f"{model_config['base_url']}/models/{model_config['model_name']}:generateContent?key={api_key}"
|
|
337
|
+
resp = self.session.post(
|
|
338
|
+
url,
|
|
339
|
+
json={
|
|
340
|
+
"contents": [{"parts": [{"text": prompt}]}],
|
|
341
|
+
"generationConfig": {
|
|
342
|
+
"maxOutputTokens": model_config["max_tokens"],
|
|
343
|
+
"temperature": model_config["temperature"],
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
headers={"Content-Type": "application/json"},
|
|
347
|
+
timeout=self.config["defaults"]["timeout"],
|
|
348
|
+
)
|
|
349
|
+
if resp.status_code != 200:
|
|
350
|
+
raise Exception(f"API error: {resp.status_code} - {resp.text}")
|
|
351
|
+
|
|
352
|
+
data = resp.json()
|
|
353
|
+
candidates = data.get("candidates", [])
|
|
354
|
+
if not candidates:
|
|
355
|
+
raise Exception(f"No candidates in response: {data}")
|
|
356
|
+
content = candidates[0].get("content", {}).get("parts", [{}])[0].get("text", "")
|
|
357
|
+
|
|
358
|
+
usage = data.get("usageMetadata", {})
|
|
359
|
+
input_tokens = usage.get("promptTokenCount", 0)
|
|
360
|
+
output_tokens = usage.get("candidatesTokenCount", 0)
|
|
361
|
+
return LLMResponse(
|
|
362
|
+
content=content,
|
|
363
|
+
model=model_config["model_name"],
|
|
364
|
+
input_tokens=input_tokens,
|
|
365
|
+
output_tokens=output_tokens,
|
|
366
|
+
total_tokens=usage.get("totalTokenCount", input_tokens + output_tokens),
|
|
367
|
+
cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
|
|
368
|
+
success=True,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
def _calculate_cost(self, input_tokens: int, output_tokens: int, pricing: Dict) -> float:
|
|
372
|
+
return (input_tokens / 1_000_000) * pricing["input_per_1m_tokens"] + \
|
|
373
|
+
(output_tokens / 1_000_000) * pricing["output_per_1m_tokens"]
|
|
374
|
+
|
|
375
|
+
def list_models(self) -> list:
|
|
376
|
+
return list(self.config["models"].keys())
|
|
377
|
+
|
|
378
|
+
def get_model_info(self, model_alias: str) -> Optional[Dict]:
|
|
379
|
+
return self.config["models"].get(model_alias)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def main():
|
|
383
|
+
import sys
|
|
384
|
+
|
|
385
|
+
client = LLMClient()
|
|
386
|
+
if len(sys.argv) < 3:
|
|
387
|
+
print("Usage: llm-query <model_alias> <prompt>")
|
|
388
|
+
print("Available models:", ", ".join(client.list_models()))
|
|
389
|
+
sys.exit(1)
|
|
390
|
+
|
|
391
|
+
model_alias = sys.argv[1]
|
|
392
|
+
prompt = " ".join(sys.argv[2:])
|
|
393
|
+
print(f"Querying {model_alias}...")
|
|
394
|
+
response = client.query(model_alias, prompt)
|
|
395
|
+
|
|
396
|
+
if response.success:
|
|
397
|
+
print(f"\nResponse ({response.input_tokens} in + {response.output_tokens} out = {response.total_tokens} tokens, ${response.cost:.6f}):")
|
|
398
|
+
print("-" * 50)
|
|
399
|
+
print(response.content)
|
|
400
|
+
else:
|
|
401
|
+
print(f"Error: {response.error}")
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
if __name__ == "__main__":
|
|
405
|
+
main()
|
fj_llm/ping.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quick connectivity check for each supported provider.
|
|
3
|
+
Uses hardcoded minimal configs so it works independently of the user's config file.
|
|
4
|
+
Requires only the relevant API key env var to be set.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Optional
|
|
10
|
+
from fj_llm.client import LLMClient, LLMResponse
|
|
11
|
+
|
|
12
|
+
PING_PROMPT = "Reply with exactly one word: pong"
|
|
13
|
+
|
|
14
|
+
_PING_CONFIGS = {
|
|
15
|
+
"openai": {
|
|
16
|
+
"models": {
|
|
17
|
+
"ping": {
|
|
18
|
+
"provider": "openai",
|
|
19
|
+
"model_name": "gpt-4o-mini",
|
|
20
|
+
"api_key_env": "OPENAI_API_KEY",
|
|
21
|
+
"base_url": "https://api.openai.com/v1",
|
|
22
|
+
"max_tokens": 10,
|
|
23
|
+
"temperature": 0.0,
|
|
24
|
+
"pricing": {"input_per_1m_tokens": 0.15, "output_per_1m_tokens": 0.60},
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
|
|
28
|
+
},
|
|
29
|
+
"anthropic": {
|
|
30
|
+
"models": {
|
|
31
|
+
"ping": {
|
|
32
|
+
"provider": "anthropic",
|
|
33
|
+
"model_name": "claude-haiku-4-5-20251001",
|
|
34
|
+
"api_key_env": "ANTHROPIC_API_KEY",
|
|
35
|
+
"base_url": "https://api.anthropic.com/v1",
|
|
36
|
+
"max_tokens": 10,
|
|
37
|
+
"temperature": 0.0,
|
|
38
|
+
"pricing": {"input_per_1m_tokens": 0.80, "output_per_1m_tokens": 4.00},
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
"defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
|
|
42
|
+
},
|
|
43
|
+
"deepseek": {
|
|
44
|
+
"models": {
|
|
45
|
+
"ping": {
|
|
46
|
+
"provider": "deepseek",
|
|
47
|
+
"model_name": "deepseek-chat",
|
|
48
|
+
"api_key_env": "DEEPSEEK_API_KEY",
|
|
49
|
+
"base_url": "https://api.deepseek.com/v1",
|
|
50
|
+
"max_tokens": 10,
|
|
51
|
+
"temperature": 0.0,
|
|
52
|
+
"pricing": {"input_per_1m_tokens": 0.14, "output_per_1m_tokens": 0.28},
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
|
|
56
|
+
},
|
|
57
|
+
"google": {
|
|
58
|
+
"models": {
|
|
59
|
+
"ping": {
|
|
60
|
+
"provider": "google",
|
|
61
|
+
"model_name": "gemini-2.5-flash",
|
|
62
|
+
"api_key_env": "GEMINI_API_KEY",
|
|
63
|
+
"base_url": "https://generativelanguage.googleapis.com/v1beta",
|
|
64
|
+
"max_tokens": 100,
|
|
65
|
+
"temperature": 0.0,
|
|
66
|
+
"pricing": {"input_per_1m_tokens": 0.075, "output_per_1m_tokens": 0.30},
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
"defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
|
|
70
|
+
},
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
SUPPORTED_PROVIDERS = list(_PING_CONFIGS.keys())
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def ping(provider: str) -> LLMResponse:
|
|
77
|
+
"""Send a minimal prompt to the given provider. Raises ValueError for unknown providers."""
|
|
78
|
+
if provider not in _PING_CONFIGS:
|
|
79
|
+
raise ValueError(f"Unknown provider '{provider}'. Choose from: {', '.join(SUPPORTED_PROVIDERS)}")
|
|
80
|
+
client = LLMClient(config_dict=_PING_CONFIGS[provider])
|
|
81
|
+
return client.query("ping", PING_PROMPT)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def main():
|
|
85
|
+
"""CLI entry point: llm-ping [provider ...]"""
|
|
86
|
+
targets = sys.argv[1:] if len(sys.argv) > 1 else SUPPORTED_PROVIDERS
|
|
87
|
+
|
|
88
|
+
unknown = [t for t in targets if t not in _PING_CONFIGS]
|
|
89
|
+
if unknown:
|
|
90
|
+
print(f"Unknown provider(s): {', '.join(unknown)}")
|
|
91
|
+
print(f"Supported: {', '.join(SUPPORTED_PROVIDERS)}")
|
|
92
|
+
sys.exit(1)
|
|
93
|
+
|
|
94
|
+
results = {}
|
|
95
|
+
for provider in targets:
|
|
96
|
+
key_env = _PING_CONFIGS[provider]["models"]["ping"]["api_key_env"]
|
|
97
|
+
if not os.getenv(key_env):
|
|
98
|
+
print(f" {provider:<12} SKIP ({key_env} not set)")
|
|
99
|
+
results[provider] = None
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
resp = ping(provider)
|
|
103
|
+
if resp.success:
|
|
104
|
+
snippet = resp.content.strip().replace("\n", " ")[:40]
|
|
105
|
+
print(f" {provider:<12} OK '{snippet}' ({resp.input_tokens}in/{resp.output_tokens}out ${resp.cost:.6f})")
|
|
106
|
+
else:
|
|
107
|
+
print(f" {provider:<12} FAIL {resp.error}")
|
|
108
|
+
results[provider] = resp
|
|
109
|
+
|
|
110
|
+
failed = [p for p, r in results.items() if r is not None and not r.success]
|
|
111
|
+
if failed:
|
|
112
|
+
sys.exit(1)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fj-llm
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Lightweight, config-driven client for multiple LLM providers
|
|
5
|
+
Author-email: Simon Bloch <simon.j.bloch@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: requests>=2.25.0
|
|
10
|
+
Requires-Dist: pyyaml>=5.4.0
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
13
|
+
|
|
14
|
+
# fj-llm
|
|
15
|
+
|
|
16
|
+
Lightweight, config-driven Python client for multiple LLM providers. One interface, any provider — no provider SDK required.
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
pip install fj-llm
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Providers
|
|
25
|
+
|
|
26
|
+
| Provider | Config key | Env var |
|
|
27
|
+
|-----------|--------------|----------------------|
|
|
28
|
+
| OpenAI | `openai` | `OPENAI_API_KEY` |
|
|
29
|
+
| Anthropic | `anthropic` | `ANTHROPIC_API_KEY` |
|
|
30
|
+
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
|
|
31
|
+
| Google | `google` | `GOOGLE_API_KEY` |
|
|
32
|
+
|
|
33
|
+
## Configuration
|
|
34
|
+
|
|
35
|
+
On first use, a default config is created at `~/.config/fj_llm/config.yaml`. Edit it to add your API keys and define model aliases:
|
|
36
|
+
|
|
37
|
+
```yaml
|
|
38
|
+
models:
|
|
39
|
+
gpt-best:
|
|
40
|
+
provider: openai
|
|
41
|
+
model_name: gpt-4o
|
|
42
|
+
api_key_env: OPENAI_API_KEY
|
|
43
|
+
base_url: https://api.openai.com/v1
|
|
44
|
+
max_tokens: 4000
|
|
45
|
+
temperature: 0.1
|
|
46
|
+
pricing:
|
|
47
|
+
input_per_1m_tokens: 2.50
|
|
48
|
+
output_per_1m_tokens: 10.00
|
|
49
|
+
fallback: gpt-light # optional: alias to use on quota exhaustion
|
|
50
|
+
|
|
51
|
+
gpt-light:
|
|
52
|
+
provider: openai
|
|
53
|
+
model_name: gpt-4o-mini
|
|
54
|
+
api_key_env: OPENAI_API_KEY
|
|
55
|
+
base_url: https://api.openai.com/v1
|
|
56
|
+
max_tokens: 4000
|
|
57
|
+
temperature: 0.1
|
|
58
|
+
pricing:
|
|
59
|
+
input_per_1m_tokens: 0.15
|
|
60
|
+
output_per_1m_tokens: 0.60
|
|
61
|
+
|
|
62
|
+
defaults:
|
|
63
|
+
retry_attempts: 3
|
|
64
|
+
retry_delay: 1.0
|
|
65
|
+
timeout: 30
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
For Cloud Functions or other environments without filesystem access, set the `FJ_LLM_CONFIG` environment variable to a JSON string of the same structure.
|
|
69
|
+
|
|
70
|
+
## Usage
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from fj_llm import LLMClient
|
|
74
|
+
|
|
75
|
+
client = LLMClient()
|
|
76
|
+
response = client.query("gpt-best", "Summarise this in one sentence.", context=long_text)
|
|
77
|
+
|
|
78
|
+
if response.success:
|
|
79
|
+
print(response.content)
|
|
80
|
+
print(f"Cost: ${response.cost:.6f}")
|
|
81
|
+
else:
|
|
82
|
+
print(f"Error: {response.error}")
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Cost logging
|
|
86
|
+
|
|
87
|
+
Every successful call is appended as a JSONL record to `~/.local/share/fj_llm/costs.jsonl`. Override the path via the `FJ_LLM_COST_LOG` env var, or set `cost_log` in the config file.
|
|
88
|
+
|
|
89
|
+
### CLI
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
llm-query gpt-best "What is the capital of France?"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## License
|
|
96
|
+
|
|
97
|
+
MIT
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
fj_llm/__init__.py,sha256=Pt_qkB6mvzMS5WFYcYnH_0XtuA1pftkIJahoD3oBqKE,181
|
|
2
|
+
fj_llm/client.py,sha256=jsIQxfKlGd3MBQo4rfvTd6hcj4RL_NSe6NzWw5zajHE,16401
|
|
3
|
+
fj_llm/ping.py,sha256=CMIBfIHqiCgjvRO0Tg21Re4oY4KgBQ2JycHLBvBY324,4040
|
|
4
|
+
fj_llm-0.2.0.dist-info/METADATA,sha256=OOG0veGcMMsKSua_S1rOvv-69Fg33AqJEd8dU6uIy9o,2438
|
|
5
|
+
fj_llm-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
6
|
+
fj_llm-0.2.0.dist-info/entry_points.txt,sha256=vfmtI70Sf76A3ImHQrhNlVxpCU7ObhgxA2K9nn8WxhA,77
|
|
7
|
+
fj_llm-0.2.0.dist-info/top_level.txt,sha256=4-qWPKJSQX3YMzv9DvsEflTbrjvTOtdlw2Gr5rAdBSc,7
|
|
8
|
+
fj_llm-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
fj_llm
|