fj-llm 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fj_llm-0.2.0/PKG-INFO ADDED
@@ -0,0 +1,97 @@
1
+ Metadata-Version: 2.4
2
+ Name: fj-llm
3
+ Version: 0.2.0
4
+ Summary: Lightweight, config-driven client for multiple LLM providers
5
+ Author-email: Simon Bloch <simon.j.bloch@gmail.com>
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: requests>=2.25.0
10
+ Requires-Dist: pyyaml>=5.4.0
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest>=7.0; extra == "dev"
13
+
14
+ # fj-llm
15
+
16
+ Lightweight, config-driven Python client for multiple LLM providers. One interface, any provider — no provider SDK required.
17
+
18
+ ## Install
19
+
20
+ ```
21
+ pip install fj-llm
22
+ ```
23
+
24
+ ## Providers
25
+
26
+ | Provider | Config key | Env var |
27
+ |-----------|--------------|----------------------|
28
+ | OpenAI | `openai` | `OPENAI_API_KEY` |
29
+ | Anthropic | `anthropic` | `ANTHROPIC_API_KEY` |
30
+ | DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
31
+ | Google | `google` | `GOOGLE_API_KEY` |
32
+
33
+ ## Configuration
34
+
35
+ On first use, a default config is created at `~/.config/fj_llm/config.yaml`. Edit it to add your API keys and define model aliases:
36
+
37
+ ```yaml
38
+ models:
39
+ gpt-best:
40
+ provider: openai
41
+ model_name: gpt-4o
42
+ api_key_env: OPENAI_API_KEY
43
+ base_url: https://api.openai.com/v1
44
+ max_tokens: 4000
45
+ temperature: 0.1
46
+ pricing:
47
+ input_per_1m_tokens: 2.50
48
+ output_per_1m_tokens: 10.00
49
+ fallback: gpt-light # optional: alias to use on quota exhaustion
50
+
51
+ gpt-light:
52
+ provider: openai
53
+ model_name: gpt-4o-mini
54
+ api_key_env: OPENAI_API_KEY
55
+ base_url: https://api.openai.com/v1
56
+ max_tokens: 4000
57
+ temperature: 0.1
58
+ pricing:
59
+ input_per_1m_tokens: 0.15
60
+ output_per_1m_tokens: 0.60
61
+
62
+ defaults:
63
+ retry_attempts: 3
64
+ retry_delay: 1.0
65
+ timeout: 30
66
+ ```
67
+
68
+ For Cloud Functions or other environments without filesystem access, set the `FJ_LLM_CONFIG` environment variable to a JSON string of the same structure.
69
+
70
+ ## Usage
71
+
72
+ ```python
73
+ from fj_llm import LLMClient
74
+
75
+ client = LLMClient()
76
+ response = client.query("gpt-best", "Summarise this in one sentence.", context=long_text)
77
+
78
+ if response.success:
79
+ print(response.content)
80
+ print(f"Cost: ${response.cost:.6f}")
81
+ else:
82
+ print(f"Error: {response.error}")
83
+ ```
84
+
85
+ ### Cost logging
86
+
87
+ Every successful call is appended as a JSONL record to `~/.local/share/fj_llm/costs.jsonl`. Override the path via the `FJ_LLM_COST_LOG` env var, or set `cost_log` in the config file.
88
+
89
+ ### CLI
90
+
91
+ ```
92
+ llm-query gpt-best "What is the capital of France?"
93
+ ```
94
+
95
+ ## License
96
+
97
+ MIT
fj_llm-0.2.0/README.md ADDED
@@ -0,0 +1,84 @@
1
+ # fj-llm
2
+
3
+ Lightweight, config-driven Python client for multiple LLM providers. One interface, any provider — no provider SDK required.
4
+
5
+ ## Install
6
+
7
+ ```
8
+ pip install fj-llm
9
+ ```
10
+
11
+ ## Providers
12
+
13
+ | Provider | Config key | Env var |
14
+ |-----------|--------------|----------------------|
15
+ | OpenAI | `openai` | `OPENAI_API_KEY` |
16
+ | Anthropic | `anthropic` | `ANTHROPIC_API_KEY` |
17
+ | DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
18
+ | Google | `google` | `GOOGLE_API_KEY` |
19
+
20
+ ## Configuration
21
+
22
+ On first use, a default config is created at `~/.config/fj_llm/config.yaml`. Edit it to add your API keys and define model aliases:
23
+
24
+ ```yaml
25
+ models:
26
+ gpt-best:
27
+ provider: openai
28
+ model_name: gpt-4o
29
+ api_key_env: OPENAI_API_KEY
30
+ base_url: https://api.openai.com/v1
31
+ max_tokens: 4000
32
+ temperature: 0.1
33
+ pricing:
34
+ input_per_1m_tokens: 2.50
35
+ output_per_1m_tokens: 10.00
36
+ fallback: gpt-light # optional: alias to use on quota exhaustion
37
+
38
+ gpt-light:
39
+ provider: openai
40
+ model_name: gpt-4o-mini
41
+ api_key_env: OPENAI_API_KEY
42
+ base_url: https://api.openai.com/v1
43
+ max_tokens: 4000
44
+ temperature: 0.1
45
+ pricing:
46
+ input_per_1m_tokens: 0.15
47
+ output_per_1m_tokens: 0.60
48
+
49
+ defaults:
50
+ retry_attempts: 3
51
+ retry_delay: 1.0
52
+ timeout: 30
53
+ ```
54
+
55
+ For Cloud Functions or other environments without filesystem access, set the `FJ_LLM_CONFIG` environment variable to a JSON string of the same structure.
56
+
57
+ ## Usage
58
+
59
+ ```python
60
+ from fj_llm import LLMClient
61
+
62
+ client = LLMClient()
63
+ response = client.query("gpt-best", "Summarise this in one sentence.", context=long_text)
64
+
65
+ if response.success:
66
+ print(response.content)
67
+ print(f"Cost: ${response.cost:.6f}")
68
+ else:
69
+ print(f"Error: {response.error}")
70
+ ```
71
+
72
+ ### Cost logging
73
+
74
+ Every successful call is appended as a JSONL record to `~/.local/share/fj_llm/costs.jsonl`. Override the path via the `FJ_LLM_COST_LOG` env var, or set `cost_log` in the config file.
75
+
76
+ ### CLI
77
+
78
+ ```
79
+ llm-query gpt-best "What is the capital of France?"
80
+ ```
81
+
82
+ ## License
83
+
84
+ MIT
@@ -0,0 +1,9 @@
1
+ """
2
+ FJ_LLM (LLM Tools) - Unified interface for multiple LLM providers
3
+ """
4
+
5
+ from .client import LLMClient, LLMResponse
6
+
7
+ __version__ = "0.2.0"
8
+ __all__ = ["LLMClient", "LLMResponse"]
9
+
@@ -0,0 +1,405 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unified LLM Client Interface
4
+ Supports multiple LLM providers with a consistent API
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import os
10
+ import time
11
+ import yaml
12
+ import requests
13
+ from typing import Dict, Any, Optional
14
+ from dataclasses import dataclass
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+
18
+ _logger = logging.getLogger("fj_llm")
19
+
20
+ _DEFAULT_COST_LOG = Path("~/.local/share/fj_llm/costs.jsonl").expanduser()
21
+
22
+
23
+ class QuotaExceededError(Exception):
24
+ """API quota exhausted — non-retryable, triggers fallback model."""
25
+ pass
26
+
27
+
28
+ @dataclass
29
+ class LLMResponse:
30
+ """Standardized response format"""
31
+ content: str
32
+ model: str
33
+ input_tokens: Optional[int] = None
34
+ output_tokens: Optional[int] = None
35
+ total_tokens: Optional[int] = None
36
+ cost: Optional[float] = None
37
+ success: bool = True
38
+ error: Optional[str] = None
39
+
40
+
41
+ class LLMClient:
42
+ def __init__(self, config_path: str = "~/.config/fj_llm/config.yaml", config_dict: Optional[Dict] = None):
43
+ """
44
+ Initialize LLM Client with flexible config loading.
45
+ Priority: config_dict → FJ_LLM_CONFIG env var → config file
46
+ """
47
+ self.session = requests.Session()
48
+
49
+ if config_dict:
50
+ self.config = config_dict
51
+ else:
52
+ self.config_path = Path(config_path).expanduser()
53
+ self.config = self._load_config()
54
+
55
+ cost_log_env = os.getenv("FJ_LLM_COST_LOG")
56
+ cost_log_cfg = self.config.get("cost_log")
57
+ if cost_log_env:
58
+ self._cost_log = Path(cost_log_env)
59
+ elif cost_log_cfg:
60
+ self._cost_log = Path(cost_log_cfg).expanduser()
61
+ else:
62
+ self._cost_log = _DEFAULT_COST_LOG
63
+
64
+ def _load_config(self) -> Dict[str, Any]:
65
+ env_config = os.getenv("FJ_LLM_CONFIG")
66
+ if env_config:
67
+ try:
68
+ return json.loads(env_config)
69
+ except json.JSONDecodeError as e:
70
+ _logger.warning("Failed to parse FJ_LLM_CONFIG: %s", e)
71
+
72
+ if not self.config_path.exists():
73
+ self._create_default_config()
74
+
75
+ with open(self.config_path, "r") as f:
76
+ return yaml.safe_load(f)
77
+
78
+ def _create_default_config(self):
79
+ self.config_path.parent.mkdir(parents=True, exist_ok=True)
80
+
81
+ default_config = {
82
+ "models": {
83
+ "deepseek-light": {
84
+ "provider": "deepseek",
85
+ "model_name": "deepseek-chat",
86
+ "api_key_env": "DEEPSEEK_API_KEY",
87
+ "base_url": "https://api.deepseek.com/v1",
88
+ "max_tokens": 4000,
89
+ "temperature": 0.1,
90
+ "pricing": {
91
+ "input_per_1m_tokens": 0.14,
92
+ "output_per_1m_tokens": 0.28,
93
+ },
94
+ },
95
+ "deepseek-reasoner": {
96
+ "provider": "deepseek",
97
+ "model_name": "deepseek-reasoner",
98
+ "api_key_env": "DEEPSEEK_API_KEY",
99
+ "base_url": "https://api.deepseek.com/v1",
100
+ "max_tokens": 8000,
101
+ "temperature": 0.0,
102
+ "pricing": {
103
+ "input_per_1m_tokens": 55.0,
104
+ "output_per_1m_tokens": 55.0,
105
+ },
106
+ },
107
+ "claude-sonnet": {
108
+ "provider": "anthropic",
109
+ "model_name": "claude-sonnet-4-6",
110
+ "api_key_env": "ANTHROPIC_API_KEY",
111
+ "base_url": "https://api.anthropic.com/v1",
112
+ "max_tokens": 4000,
113
+ "temperature": 0.1,
114
+ "pricing": {
115
+ "input_per_1m_tokens": 3.0,
116
+ "output_per_1m_tokens": 15.0,
117
+ },
118
+ },
119
+ },
120
+ "defaults": {
121
+ "retry_attempts": 3,
122
+ "retry_delay": 1.0,
123
+ "timeout": 30,
124
+ },
125
+ }
126
+
127
+ with open(self.config_path, "w") as f:
128
+ yaml.dump(default_config, f, default_flow_style=False)
129
+
130
+ print(f"Created default config at {self.config_path}")
131
+ print("Please update with your API keys!")
132
+
133
+ def query(self, model_alias: str, prompt: str, context: Optional[str] = None) -> LLMResponse:
134
+ """Unified LLM query with automatic timing, logging, and cost tracking."""
135
+ t0 = time.time()
136
+ response = self._do_query(model_alias, prompt, context)
137
+ latency_ms = int((time.time() - t0) * 1000)
138
+
139
+ if response.success:
140
+ _logger.info(
141
+ "llm/%s ok — %d in + %d out tokens, $%.6f, %dms",
142
+ model_alias, response.input_tokens or 0, response.output_tokens or 0,
143
+ response.cost or 0.0, latency_ms,
144
+ )
145
+ self._append_cost_log(model_alias, response, latency_ms)
146
+ else:
147
+ _logger.error("llm/%s failed — %s", model_alias, response.error)
148
+
149
+ return response
150
+
151
+ def _append_cost_log(self, alias: str, response: "LLMResponse", latency_ms: int) -> None:
152
+ try:
153
+ self._cost_log.parent.mkdir(parents=True, exist_ok=True)
154
+ record = {
155
+ "ts": datetime.now(timezone.utc).isoformat(),
156
+ "alias": alias,
157
+ "model": response.model,
158
+ "input_tokens": response.input_tokens,
159
+ "output_tokens": response.output_tokens,
160
+ "cost_usd": response.cost,
161
+ "latency_ms": latency_ms,
162
+ }
163
+ with open(self._cost_log, "a") as f:
164
+ f.write(json.dumps(record) + "\n")
165
+ except Exception:
166
+ pass # never crash the caller over logging
167
+
168
+ def _do_query(self, model_alias: str, prompt: str, context: Optional[str] = None, _is_fallback: bool = False) -> LLMResponse:
169
+ if model_alias not in self.config["models"]:
170
+ return LLMResponse(content="", model=model_alias, success=False, error=f"Unknown model alias: {model_alias}")
171
+
172
+ model_config = self.config["models"][model_alias]
173
+
174
+ full_prompt = prompt
175
+ if context:
176
+ full_prompt = f"Context:\n{context}\n\nTask:\n{prompt}"
177
+
178
+ for attempt in range(self.config["defaults"]["retry_attempts"]):
179
+ try:
180
+ provider = model_config["provider"]
181
+ if provider == "deepseek":
182
+ response = self._query_deepseek(model_config, full_prompt)
183
+ elif provider == "anthropic":
184
+ response = self._query_anthropic(model_config, full_prompt)
185
+ elif provider == "openai":
186
+ response = self._query_openai(model_config, full_prompt)
187
+ elif provider in ("google", "gemini"):
188
+ response = self._query_google(model_config, full_prompt)
189
+ else:
190
+ return LLMResponse(content="", model=model_alias, success=False, error=f"Unsupported provider: {provider}")
191
+
192
+ if response.success:
193
+ return response
194
+
195
+ except QuotaExceededError as e:
196
+ fallback = model_config.get("fallback")
197
+ if fallback and not _is_fallback:
198
+ _logger.warning("llm/%s quota exceeded, falling back to %s", model_alias, fallback)
199
+ return self._do_query(fallback, prompt, context, _is_fallback=True)
200
+ return LLMResponse(content="", model=model_alias, success=False, error=f"Quota exceeded, no fallback: {e}")
201
+
202
+ except Exception as e:
203
+ if attempt == self.config["defaults"]["retry_attempts"] - 1:
204
+ return LLMResponse(content="", model=model_alias, success=False, error=f"Failed after {attempt + 1} attempts: {e}")
205
+ time.sleep(self.config["defaults"]["retry_delay"] * (2 ** attempt))
206
+
207
+ return LLMResponse(content="", model=model_alias, success=False, error="Max retries exceeded")
208
+
209
+ def _query_deepseek(self, model_config: Dict, prompt: str) -> LLMResponse:
210
+ api_key = os.getenv(model_config["api_key_env"])
211
+ if not api_key:
212
+ raise ValueError(f"API key not found: {model_config['api_key_env']}")
213
+
214
+ payload = {
215
+ "model": model_config["model_name"],
216
+ "messages": [{"role": "user", "content": prompt}],
217
+ "max_tokens": model_config["max_tokens"],
218
+ "temperature": model_config["temperature"],
219
+ }
220
+ resp = self.session.post(
221
+ f"{model_config['base_url']}/chat/completions",
222
+ json=payload,
223
+ headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
224
+ timeout=self.config["defaults"]["timeout"],
225
+ )
226
+ if resp.status_code != 200:
227
+ raise Exception(f"API error: {resp.status_code} - {resp.text}")
228
+
229
+ data = resp.json()
230
+ usage = data.get("usage", {})
231
+ input_tokens = usage.get("prompt_tokens", 0)
232
+ output_tokens = usage.get("completion_tokens", 0)
233
+ return LLMResponse(
234
+ content=data["choices"][0]["message"]["content"],
235
+ model=model_config["model_name"],
236
+ input_tokens=input_tokens,
237
+ output_tokens=output_tokens,
238
+ total_tokens=usage.get("total_tokens", input_tokens + output_tokens),
239
+ cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
240
+ success=True,
241
+ )
242
+
243
+ def _query_openai(self, model_config: Dict, prompt: str) -> LLMResponse:
244
+ api_key = os.getenv(model_config["api_key_env"])
245
+ if not api_key:
246
+ raise ValueError(f"API key not found: {model_config['api_key_env']}")
247
+
248
+ model_name = model_config["model_name"]
249
+ payload: Dict[str, Any] = {
250
+ "model": model_name,
251
+ "messages": [{"role": "user", "content": prompt}],
252
+ "max_completion_tokens": model_config["max_tokens"],
253
+ }
254
+ # o-series reasoning models don't accept temperature
255
+ if not (model_name[0] == "o" and model_name[1:2].isdigit()):
256
+ payload["temperature"] = model_config["temperature"]
257
+
258
+ resp = self.session.post(
259
+ f"{model_config.get('base_url', 'https://api.openai.com/v1')}/chat/completions",
260
+ json=payload,
261
+ headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
262
+ timeout=self.config["defaults"]["timeout"],
263
+ )
264
+ if resp.status_code != 200:
265
+ if resp.status_code == 429:
266
+ try:
267
+ if resp.json().get("error", {}).get("code") == "insufficient_quota":
268
+ raise QuotaExceededError(f"OpenAI quota exceeded ({model_name})")
269
+ except (ValueError, KeyError):
270
+ pass
271
+ raise Exception(f"API error: {resp.status_code} - {resp.text}")
272
+
273
+ data = resp.json()
274
+ usage = data.get("usage", {})
275
+ input_tokens = usage.get("prompt_tokens", 0)
276
+ output_tokens = usage.get("completion_tokens", 0)
277
+ return LLMResponse(
278
+ content=data["choices"][0]["message"]["content"],
279
+ model=model_name,
280
+ input_tokens=input_tokens,
281
+ output_tokens=output_tokens,
282
+ total_tokens=usage.get("total_tokens", input_tokens + output_tokens),
283
+ cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
284
+ success=True,
285
+ )
286
+
287
+ def _query_anthropic(self, model_config: Dict, prompt: str) -> LLMResponse:
288
+ api_key = os.getenv(model_config["api_key_env"])
289
+ if not api_key:
290
+ raise ValueError(f"API key not found: {model_config['api_key_env']}")
291
+
292
+ resp = self.session.post(
293
+ f"{model_config['base_url']}/messages",
294
+ json={
295
+ "model": model_config["model_name"],
296
+ "messages": [{"role": "user", "content": prompt}],
297
+ "max_tokens": model_config["max_tokens"],
298
+ "temperature": model_config["temperature"],
299
+ },
300
+ headers={
301
+ "x-api-key": api_key,
302
+ "Content-Type": "application/json",
303
+ "anthropic-version": "2023-06-01",
304
+ },
305
+ timeout=self.config["defaults"]["timeout"],
306
+ )
307
+ if resp.status_code != 200:
308
+ if resp.status_code in (400, 429):
309
+ try:
310
+ err = resp.json().get("error", {})
311
+ if "credit balance" in err.get("message", "") or err.get("type") == "insufficient_quota":
312
+ raise QuotaExceededError(f"Anthropic quota exhausted ({model_config['model_name']})")
313
+ except (ValueError, KeyError):
314
+ pass
315
+ raise Exception(f"API error: {resp.status_code} - {resp.text}")
316
+
317
+ data = resp.json()
318
+ usage = data.get("usage", {})
319
+ input_tokens = usage.get("input_tokens", 0)
320
+ output_tokens = usage.get("output_tokens", 0)
321
+ return LLMResponse(
322
+ content=data["content"][0]["text"],
323
+ model=model_config["model_name"],
324
+ input_tokens=input_tokens,
325
+ output_tokens=output_tokens,
326
+ total_tokens=input_tokens + output_tokens,
327
+ cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
328
+ success=True,
329
+ )
330
+
331
+ def _query_google(self, model_config: Dict, prompt: str) -> LLMResponse:
332
+ api_key = os.getenv(model_config["api_key_env"])
333
+ if not api_key:
334
+ raise ValueError(f"API key not found: {model_config['api_key_env']}")
335
+
336
+ url = f"{model_config['base_url']}/models/{model_config['model_name']}:generateContent?key={api_key}"
337
+ resp = self.session.post(
338
+ url,
339
+ json={
340
+ "contents": [{"parts": [{"text": prompt}]}],
341
+ "generationConfig": {
342
+ "maxOutputTokens": model_config["max_tokens"],
343
+ "temperature": model_config["temperature"],
344
+ },
345
+ },
346
+ headers={"Content-Type": "application/json"},
347
+ timeout=self.config["defaults"]["timeout"],
348
+ )
349
+ if resp.status_code != 200:
350
+ raise Exception(f"API error: {resp.status_code} - {resp.text}")
351
+
352
+ data = resp.json()
353
+ candidates = data.get("candidates", [])
354
+ if not candidates:
355
+ raise Exception(f"No candidates in response: {data}")
356
+ content = candidates[0].get("content", {}).get("parts", [{}])[0].get("text", "")
357
+
358
+ usage = data.get("usageMetadata", {})
359
+ input_tokens = usage.get("promptTokenCount", 0)
360
+ output_tokens = usage.get("candidatesTokenCount", 0)
361
+ return LLMResponse(
362
+ content=content,
363
+ model=model_config["model_name"],
364
+ input_tokens=input_tokens,
365
+ output_tokens=output_tokens,
366
+ total_tokens=usage.get("totalTokenCount", input_tokens + output_tokens),
367
+ cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
368
+ success=True,
369
+ )
370
+
371
+ def _calculate_cost(self, input_tokens: int, output_tokens: int, pricing: Dict) -> float:
372
+ return (input_tokens / 1_000_000) * pricing["input_per_1m_tokens"] + \
373
+ (output_tokens / 1_000_000) * pricing["output_per_1m_tokens"]
374
+
375
+ def list_models(self) -> list:
376
+ return list(self.config["models"].keys())
377
+
378
+ def get_model_info(self, model_alias: str) -> Optional[Dict]:
379
+ return self.config["models"].get(model_alias)
380
+
381
+
382
+ def main():
383
+ import sys
384
+
385
+ client = LLMClient()
386
+ if len(sys.argv) < 3:
387
+ print("Usage: llm-query <model_alias> <prompt>")
388
+ print("Available models:", ", ".join(client.list_models()))
389
+ sys.exit(1)
390
+
391
+ model_alias = sys.argv[1]
392
+ prompt = " ".join(sys.argv[2:])
393
+ print(f"Querying {model_alias}...")
394
+ response = client.query(model_alias, prompt)
395
+
396
+ if response.success:
397
+ print(f"\nResponse ({response.input_tokens} in + {response.output_tokens} out = {response.total_tokens} tokens, ${response.cost:.6f}):")
398
+ print("-" * 50)
399
+ print(response.content)
400
+ else:
401
+ print(f"Error: {response.error}")
402
+
403
+
404
+ if __name__ == "__main__":
405
+ main()
@@ -0,0 +1,112 @@
1
+ """
2
+ Quick connectivity check for each supported provider.
3
+ Uses hardcoded minimal configs so it works independently of the user's config file.
4
+ Requires only the relevant API key env var to be set.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ from typing import Optional
10
+ from fj_llm.client import LLMClient, LLMResponse
11
+
12
+ PING_PROMPT = "Reply with exactly one word: pong"
13
+
14
+ _PING_CONFIGS = {
15
+ "openai": {
16
+ "models": {
17
+ "ping": {
18
+ "provider": "openai",
19
+ "model_name": "gpt-4o-mini",
20
+ "api_key_env": "OPENAI_API_KEY",
21
+ "base_url": "https://api.openai.com/v1",
22
+ "max_tokens": 10,
23
+ "temperature": 0.0,
24
+ "pricing": {"input_per_1m_tokens": 0.15, "output_per_1m_tokens": 0.60},
25
+ }
26
+ },
27
+ "defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
28
+ },
29
+ "anthropic": {
30
+ "models": {
31
+ "ping": {
32
+ "provider": "anthropic",
33
+ "model_name": "claude-haiku-4-5-20251001",
34
+ "api_key_env": "ANTHROPIC_API_KEY",
35
+ "base_url": "https://api.anthropic.com/v1",
36
+ "max_tokens": 10,
37
+ "temperature": 0.0,
38
+ "pricing": {"input_per_1m_tokens": 0.80, "output_per_1m_tokens": 4.00},
39
+ }
40
+ },
41
+ "defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
42
+ },
43
+ "deepseek": {
44
+ "models": {
45
+ "ping": {
46
+ "provider": "deepseek",
47
+ "model_name": "deepseek-chat",
48
+ "api_key_env": "DEEPSEEK_API_KEY",
49
+ "base_url": "https://api.deepseek.com/v1",
50
+ "max_tokens": 10,
51
+ "temperature": 0.0,
52
+ "pricing": {"input_per_1m_tokens": 0.14, "output_per_1m_tokens": 0.28},
53
+ }
54
+ },
55
+ "defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
56
+ },
57
+ "google": {
58
+ "models": {
59
+ "ping": {
60
+ "provider": "google",
61
+ "model_name": "gemini-2.5-flash",
62
+ "api_key_env": "GEMINI_API_KEY",
63
+ "base_url": "https://generativelanguage.googleapis.com/v1beta",
64
+ "max_tokens": 100,
65
+ "temperature": 0.0,
66
+ "pricing": {"input_per_1m_tokens": 0.075, "output_per_1m_tokens": 0.30},
67
+ }
68
+ },
69
+ "defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
70
+ },
71
+ }
72
+
73
+ SUPPORTED_PROVIDERS = list(_PING_CONFIGS.keys())
74
+
75
+
76
+ def ping(provider: str) -> LLMResponse:
77
+ """Send a minimal prompt to the given provider. Raises ValueError for unknown providers."""
78
+ if provider not in _PING_CONFIGS:
79
+ raise ValueError(f"Unknown provider '{provider}'. Choose from: {', '.join(SUPPORTED_PROVIDERS)}")
80
+ client = LLMClient(config_dict=_PING_CONFIGS[provider])
81
+ return client.query("ping", PING_PROMPT)
82
+
83
+
84
+ def main():
85
+ """CLI entry point: llm-ping [provider ...]"""
86
+ targets = sys.argv[1:] if len(sys.argv) > 1 else SUPPORTED_PROVIDERS
87
+
88
+ unknown = [t for t in targets if t not in _PING_CONFIGS]
89
+ if unknown:
90
+ print(f"Unknown provider(s): {', '.join(unknown)}")
91
+ print(f"Supported: {', '.join(SUPPORTED_PROVIDERS)}")
92
+ sys.exit(1)
93
+
94
+ results = {}
95
+ for provider in targets:
96
+ key_env = _PING_CONFIGS[provider]["models"]["ping"]["api_key_env"]
97
+ if not os.getenv(key_env):
98
+ print(f" {provider:<12} SKIP ({key_env} not set)")
99
+ results[provider] = None
100
+ continue
101
+
102
+ resp = ping(provider)
103
+ if resp.success:
104
+ snippet = resp.content.strip().replace("\n", " ")[:40]
105
+ print(f" {provider:<12} OK '{snippet}' ({resp.input_tokens}in/{resp.output_tokens}out ${resp.cost:.6f})")
106
+ else:
107
+ print(f" {provider:<12} FAIL {resp.error}")
108
+ results[provider] = resp
109
+
110
+ failed = [p for p, r in results.items() if r is not None and not r.success]
111
+ if failed:
112
+ sys.exit(1)
@@ -0,0 +1,97 @@
1
+ Metadata-Version: 2.4
2
+ Name: fj-llm
3
+ Version: 0.2.0
4
+ Summary: Lightweight, config-driven client for multiple LLM providers
5
+ Author-email: Simon Bloch <simon.j.bloch@gmail.com>
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: requests>=2.25.0
10
+ Requires-Dist: pyyaml>=5.4.0
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest>=7.0; extra == "dev"
13
+
14
+ # fj-llm
15
+
16
+ Lightweight, config-driven Python client for multiple LLM providers. One interface, any provider — no provider SDK required.
17
+
18
+ ## Install
19
+
20
+ ```
21
+ pip install fj-llm
22
+ ```
23
+
24
+ ## Providers
25
+
26
+ | Provider | Config key | Env var |
27
+ |-----------|--------------|----------------------|
28
+ | OpenAI | `openai` | `OPENAI_API_KEY` |
29
+ | Anthropic | `anthropic` | `ANTHROPIC_API_KEY` |
30
+ | DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
31
+ | Google | `google` | `GOOGLE_API_KEY` |
32
+
33
+ ## Configuration
34
+
35
+ On first use, a default config is created at `~/.config/fj_llm/config.yaml`. Edit it to add your API keys and define model aliases:
36
+
37
+ ```yaml
38
+ models:
39
+ gpt-best:
40
+ provider: openai
41
+ model_name: gpt-4o
42
+ api_key_env: OPENAI_API_KEY
43
+ base_url: https://api.openai.com/v1
44
+ max_tokens: 4000
45
+ temperature: 0.1
46
+ pricing:
47
+ input_per_1m_tokens: 2.50
48
+ output_per_1m_tokens: 10.00
49
+ fallback: gpt-light # optional: alias to use on quota exhaustion
50
+
51
+ gpt-light:
52
+ provider: openai
53
+ model_name: gpt-4o-mini
54
+ api_key_env: OPENAI_API_KEY
55
+ base_url: https://api.openai.com/v1
56
+ max_tokens: 4000
57
+ temperature: 0.1
58
+ pricing:
59
+ input_per_1m_tokens: 0.15
60
+ output_per_1m_tokens: 0.60
61
+
62
+ defaults:
63
+ retry_attempts: 3
64
+ retry_delay: 1.0
65
+ timeout: 30
66
+ ```
67
+
68
+ For Cloud Functions or other environments without filesystem access, set the `FJ_LLM_CONFIG` environment variable to a JSON string of the same structure.
69
+
70
+ ## Usage
71
+
72
+ ```python
73
+ from fj_llm import LLMClient
74
+
75
+ client = LLMClient()
76
+ response = client.query("gpt-best", "Summarise this in one sentence.", context=long_text)
77
+
78
+ if response.success:
79
+ print(response.content)
80
+ print(f"Cost: ${response.cost:.6f}")
81
+ else:
82
+ print(f"Error: {response.error}")
83
+ ```
84
+
85
+ ### Cost logging
86
+
87
+ Every successful call is appended as a JSONL record to `~/.local/share/fj_llm/costs.jsonl`. Override the path via the `FJ_LLM_COST_LOG` env var, or set `cost_log` in the config file.
88
+
89
+ ### CLI
90
+
91
+ ```
92
+ llm-query gpt-best "What is the capital of France?"
93
+ ```
94
+
95
+ ## License
96
+
97
+ MIT
@@ -0,0 +1,13 @@
1
+ README.md
2
+ pyproject.toml
3
+ fj_llm/__init__.py
4
+ fj_llm/client.py
5
+ fj_llm/ping.py
6
+ fj_llm.egg-info/PKG-INFO
7
+ fj_llm.egg-info/SOURCES.txt
8
+ fj_llm.egg-info/dependency_links.txt
9
+ fj_llm.egg-info/entry_points.txt
10
+ fj_llm.egg-info/requires.txt
11
+ fj_llm.egg-info/top_level.txt
12
+ tests/test_client.py
13
+ tests/test_integration.py
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ llm-ping = fj_llm.ping:main
3
+ llm-query = fj_llm.client:main
@@ -0,0 +1,5 @@
1
+ requests>=2.25.0
2
+ pyyaml>=5.4.0
3
+
4
+ [dev]
5
+ pytest>=7.0
@@ -0,0 +1 @@
1
+ fj_llm
@@ -0,0 +1,30 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "fj-llm"
7
+ version = "0.2.0"
8
+ description = "Lightweight, config-driven client for multiple LLM providers"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ authors = [{ name = "Simon Bloch", email = "simon.j.bloch@gmail.com" }]
12
+ requires-python = ">=3.9"
13
+ dependencies = [
14
+ "requests>=2.25.0",
15
+ "pyyaml>=5.4.0",
16
+ ]
17
+
18
+ [project.scripts]
19
+ llm-query = "fj_llm.client:main"
20
+ llm-ping = "fj_llm.ping:main"
21
+
22
+ [project.optional-dependencies]
23
+ dev = ["pytest>=7.0"]
24
+
25
+ [tool.setuptools.packages.find]
26
+ where = ["."]
27
+ include = ["fj_llm*"]
28
+
29
+ [tool.pytest.ini_options]
30
+ testpaths = ["tests"]
fj_llm-0.2.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,262 @@
1
+ """Unit tests for fj_llm.client — all HTTP calls are mocked."""
2
+
3
+ import json
4
+ import pytest
5
+ from pathlib import Path
6
+ from unittest.mock import MagicMock, patch
7
+
8
+ from fj_llm import LLMClient, LLMResponse
9
+ from fj_llm.client import QuotaExceededError
10
+
11
+ MINIMAL_CONFIG = {
12
+ "models": {
13
+ "test-openai": {
14
+ "provider": "openai",
15
+ "model_name": "gpt-4o-mini",
16
+ "api_key_env": "OPENAI_API_KEY",
17
+ "base_url": "https://api.openai.com/v1",
18
+ "max_tokens": 100,
19
+ "temperature": 0.1,
20
+ "pricing": {"input_per_1m_tokens": 0.15, "output_per_1m_tokens": 0.60},
21
+ },
22
+ "test-anthropic": {
23
+ "provider": "anthropic",
24
+ "model_name": "claude-haiku-4-5-20251001",
25
+ "api_key_env": "ANTHROPIC_API_KEY",
26
+ "base_url": "https://api.anthropic.com/v1",
27
+ "max_tokens": 100,
28
+ "temperature": 0.1,
29
+ "pricing": {"input_per_1m_tokens": 0.80, "output_per_1m_tokens": 4.00},
30
+ },
31
+ "test-deepseek": {
32
+ "provider": "deepseek",
33
+ "model_name": "deepseek-chat",
34
+ "api_key_env": "DEEPSEEK_API_KEY",
35
+ "base_url": "https://api.deepseek.com/v1",
36
+ "max_tokens": 100,
37
+ "temperature": 0.1,
38
+ "pricing": {"input_per_1m_tokens": 0.14, "output_per_1m_tokens": 0.28},
39
+ },
40
+ "test-google": {
41
+ "provider": "google",
42
+ "model_name": "gemini-1.5-flash",
43
+ "api_key_env": "GOOGLE_API_KEY",
44
+ "base_url": "https://generativelanguage.googleapis.com/v1beta",
45
+ "max_tokens": 100,
46
+ "temperature": 0.1,
47
+ "pricing": {"input_per_1m_tokens": 0.075, "output_per_1m_tokens": 0.30},
48
+ },
49
+ "test-with-fallback": {
50
+ "provider": "openai",
51
+ "model_name": "gpt-4o",
52
+ "api_key_env": "OPENAI_API_KEY",
53
+ "base_url": "https://api.openai.com/v1",
54
+ "max_tokens": 100,
55
+ "temperature": 0.1,
56
+ "pricing": {"input_per_1m_tokens": 2.50, "output_per_1m_tokens": 10.00},
57
+ "fallback": "test-openai",
58
+ },
59
+ },
60
+ "defaults": {"retry_attempts": 2, "retry_delay": 0.0, "timeout": 10},
61
+ }
62
+
63
+
64
+ def make_client(**kwargs):
65
+ return LLMClient(config_dict=MINIMAL_CONFIG, **kwargs)
66
+
67
+
68
+ def openai_response(content="hello", in_tok=10, out_tok=5):
69
+ mock = MagicMock()
70
+ mock.status_code = 200
71
+ mock.json.return_value = {
72
+ "choices": [{"message": {"content": content}}],
73
+ "usage": {"prompt_tokens": in_tok, "completion_tokens": out_tok, "total_tokens": in_tok + out_tok},
74
+ }
75
+ return mock
76
+
77
+
78
+ def anthropic_response(content="hello", in_tok=10, out_tok=5):
79
+ mock = MagicMock()
80
+ mock.status_code = 200
81
+ mock.json.return_value = {
82
+ "content": [{"text": content}],
83
+ "usage": {"input_tokens": in_tok, "output_tokens": out_tok},
84
+ }
85
+ return mock
86
+
87
+
88
+ def deepseek_response(content="hello", in_tok=10, out_tok=5):
89
+ return openai_response(content, in_tok, out_tok)
90
+
91
+
92
+ def google_response(content="hello", in_tok=10, out_tok=5):
93
+ mock = MagicMock()
94
+ mock.status_code = 200
95
+ mock.json.return_value = {
96
+ "candidates": [{"content": {"parts": [{"text": content}]}}],
97
+ "usageMetadata": {"promptTokenCount": in_tok, "candidatesTokenCount": out_tok, "totalTokenCount": in_tok + out_tok},
98
+ }
99
+ return mock
100
+
101
+
102
+ class TestConfigLoading:
103
+ def test_config_dict_used_directly(self):
104
+ client = make_client()
105
+ assert "test-openai" in client.config["models"]
106
+
107
+ def test_env_var_config(self, monkeypatch):
108
+ monkeypatch.setenv("FJ_LLM_CONFIG", json.dumps(MINIMAL_CONFIG))
109
+ client = LLMClient(config_path="/nonexistent/path.yaml")
110
+ assert "test-openai" in client.config["models"]
111
+
112
+ def test_unknown_model_returns_error(self):
113
+ client = make_client()
114
+ response = client.query("nonexistent", "hello")
115
+ assert not response.success
116
+ assert "Unknown model alias" in response.error
117
+
118
+
119
+ class TestCostCalculation:
120
+ def test_cost_calculation(self):
121
+ client = make_client()
122
+ pricing = {"input_per_1m_tokens": 2.0, "output_per_1m_tokens": 10.0}
123
+ cost = client._calculate_cost(1_000_000, 500_000, pricing)
124
+ assert abs(cost - 7.0) < 1e-9
125
+
126
+ def test_zero_tokens(self):
127
+ client = make_client()
128
+ pricing = {"input_per_1m_tokens": 2.0, "output_per_1m_tokens": 10.0}
129
+ assert client._calculate_cost(0, 0, pricing) == 0.0
130
+
131
+
132
+ class TestOpenAIProvider:
133
+ def test_successful_query(self, monkeypatch):
134
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key")
135
+ client = make_client()
136
+ with patch.object(client.session, "post", return_value=openai_response("world", 20, 10)):
137
+ resp = client.query("test-openai", "hello")
138
+ assert resp.success
139
+ assert resp.content == "world"
140
+ assert resp.input_tokens == 20
141
+ assert resp.output_tokens == 10
142
+ assert resp.cost > 0
143
+
144
+ def test_quota_exceeded_raises(self, monkeypatch):
145
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key")
146
+ client = make_client()
147
+ mock_resp = MagicMock()
148
+ mock_resp.status_code = 429
149
+ mock_resp.json.return_value = {"error": {"code": "insufficient_quota"}}
150
+ mock_resp.text = "quota"
151
+ with patch.object(client.session, "post", return_value=mock_resp):
152
+ resp = client.query("test-openai", "hello")
153
+ assert not resp.success
154
+ assert "Quota exceeded" in resp.error
155
+
156
+ def test_fallback_on_quota(self, monkeypatch):
157
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key")
158
+ client = make_client()
159
+ quota_resp = MagicMock()
160
+ quota_resp.status_code = 429
161
+ quota_resp.json.return_value = {"error": {"code": "insufficient_quota"}}
162
+ quota_resp.text = "quota"
163
+ with patch.object(client.session, "post", side_effect=[quota_resp, openai_response("fallback!")]):
164
+ resp = client.query("test-with-fallback", "hello")
165
+ assert resp.success
166
+ assert resp.content == "fallback!"
167
+
168
+ def test_missing_api_key(self, monkeypatch):
169
+ monkeypatch.delenv("OPENAI_API_KEY", raising=False)
170
+ client = make_client()
171
+ resp = client.query("test-openai", "hello")
172
+ assert not resp.success
173
+
174
+
175
+ class TestAnthropicProvider:
176
+ def test_successful_query(self, monkeypatch):
177
+ monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
178
+ client = make_client()
179
+ with patch.object(client.session, "post", return_value=anthropic_response("claude says hi", 15, 8)):
180
+ resp = client.query("test-anthropic", "hello")
181
+ assert resp.success
182
+ assert resp.content == "claude says hi"
183
+ assert resp.total_tokens == 23
184
+
185
+ def test_quota_exceeded(self, monkeypatch):
186
+ monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
187
+ client = make_client()
188
+ mock_resp = MagicMock()
189
+ mock_resp.status_code = 400
190
+ mock_resp.json.return_value = {"error": {"message": "credit balance too low", "type": "invalid_request_error"}}
191
+ mock_resp.text = "credits"
192
+ with patch.object(client.session, "post", return_value=mock_resp):
193
+ resp = client.query("test-anthropic", "hello")
194
+ assert not resp.success
195
+ assert "Quota exceeded" in resp.error
196
+
197
+
198
+ class TestDeepSeekProvider:
199
+ def test_successful_query(self, monkeypatch):
200
+ monkeypatch.setenv("DEEPSEEK_API_KEY", "test-key")
201
+ client = make_client()
202
+ with patch.object(client.session, "post", return_value=deepseek_response("deepseek reply", 12, 6)):
203
+ resp = client.query("test-deepseek", "hello")
204
+ assert resp.success
205
+ assert resp.content == "deepseek reply"
206
+
207
+
208
+ class TestGoogleProvider:
209
+ def test_successful_query(self, monkeypatch):
210
+ monkeypatch.setenv("GOOGLE_API_KEY", "test-key")
211
+ client = make_client()
212
+ with patch.object(client.session, "post", return_value=google_response("gemini reply", 8, 4)):
213
+ resp = client.query("test-google", "hello")
214
+ assert resp.success
215
+ assert resp.content == "gemini reply"
216
+ assert resp.input_tokens == 8
217
+
218
+ def test_empty_candidates_raises(self, monkeypatch):
219
+ monkeypatch.setenv("GOOGLE_API_KEY", "test-key")
220
+ client = make_client()
221
+ mock_resp = MagicMock()
222
+ mock_resp.status_code = 200
223
+ mock_resp.json.return_value = {"candidates": [], "usageMetadata": {}}
224
+ with patch.object(client.session, "post", return_value=mock_resp):
225
+ resp = client.query("test-google", "hello")
226
+ assert not resp.success
227
+
228
+
229
+ class TestRetryBehavior:
230
+ def test_retries_on_transient_error(self, monkeypatch):
231
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key")
232
+ client = make_client()
233
+ err_resp = MagicMock()
234
+ err_resp.status_code = 500
235
+ err_resp.text = "server error"
236
+ with patch.object(client.session, "post", side_effect=[
237
+ Exception("transient"), openai_response("ok")
238
+ ]):
239
+ resp = client.query("test-openai", "hello")
240
+ assert resp.success
241
+
242
+ def test_exhausted_retries_returns_error(self, monkeypatch):
243
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key")
244
+ client = make_client()
245
+ with patch.object(client.session, "post", side_effect=Exception("always fails")):
246
+ resp = client.query("test-openai", "hello")
247
+ assert not resp.success
248
+ assert "Failed after" in resp.error
249
+
250
+
251
+ class TestCostLog:
252
+ def test_cost_log_written(self, monkeypatch, tmp_path):
253
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key")
254
+ log_path = tmp_path / "costs.jsonl"
255
+ monkeypatch.setenv("FJ_LLM_COST_LOG", str(log_path))
256
+ client = LLMClient(config_dict=MINIMAL_CONFIG)
257
+ with patch.object(client.session, "post", return_value=openai_response()):
258
+ client.query("test-openai", "hello")
259
+ assert log_path.exists()
260
+ record = json.loads(log_path.read_text().strip())
261
+ assert record["alias"] == "test-openai"
262
+ assert record["cost_usd"] > 0
@@ -0,0 +1,60 @@
1
+ """
2
+ Integration tests — hit real APIs. Skipped automatically if the API key env var is not set.
3
+ Run explicitly: pytest tests/test_integration.py -v
4
+ """
5
+
6
+ import os
7
+ import pytest
8
+ from fj_llm.ping import ping
9
+
10
+
11
+ def _key(provider: str) -> str:
12
+ keys = {
13
+ "openai": "OPENAI_API_KEY",
14
+ "anthropic": "ANTHROPIC_API_KEY",
15
+ "deepseek": "DEEPSEEK_API_KEY",
16
+ "google": "GEMINI_API_KEY",
17
+ }
18
+ return keys[provider]
19
+
20
+
21
+ def integration(provider: str):
22
+ """Decorator: skip the test if the provider's API key is not set."""
23
+ return pytest.mark.skipif(
24
+ not os.getenv(_key(provider)),
25
+ reason=f"{_key(provider)} not set",
26
+ )
27
+
28
+
29
+ @integration("openai")
30
+ def test_ping_openai():
31
+ resp = ping("openai")
32
+ assert resp.success, resp.error
33
+ assert resp.content.strip()
34
+ assert resp.input_tokens and resp.input_tokens > 0
35
+ assert resp.cost and resp.cost > 0
36
+
37
+
38
+ @integration("anthropic")
39
+ def test_ping_anthropic():
40
+ resp = ping("anthropic")
41
+ assert resp.success, resp.error
42
+ assert resp.content.strip()
43
+ assert resp.input_tokens and resp.input_tokens > 0
44
+ assert resp.cost and resp.cost > 0
45
+
46
+
47
+ @integration("deepseek")
48
+ def test_ping_deepseek():
49
+ resp = ping("deepseek")
50
+ assert resp.success, resp.error
51
+ assert resp.content.strip()
52
+ assert resp.input_tokens and resp.input_tokens > 0
53
+
54
+
55
+ @integration("google")
56
+ def test_ping_google():
57
+ resp = ping("google")
58
+ assert resp.success, resp.error
59
+ assert resp.content.strip()
60
+ assert resp.input_tokens and resp.input_tokens > 0