fj-llm 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fj_llm/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ """
2
+ FJ_LLM (LLM Tools) - Unified interface for multiple LLM providers
3
+ """
4
+
5
+ from .client import LLMClient, LLMResponse
6
+
7
+ __version__ = "0.2.0"
8
+ __all__ = ["LLMClient", "LLMResponse"]
9
+
fj_llm/client.py ADDED
@@ -0,0 +1,405 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unified LLM Client Interface
4
+ Supports multiple LLM providers with a consistent API
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import os
10
+ import time
11
+ import yaml
12
+ import requests
13
+ from typing import Dict, Any, Optional
14
+ from dataclasses import dataclass
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+
18
+ _logger = logging.getLogger("fj_llm")
19
+
20
+ _DEFAULT_COST_LOG = Path("~/.local/share/fj_llm/costs.jsonl").expanduser()
21
+
22
+
23
+ class QuotaExceededError(Exception):
24
+ """API quota exhausted — non-retryable, triggers fallback model."""
25
+ pass
26
+
27
+
28
+ @dataclass
29
+ class LLMResponse:
30
+ """Standardized response format"""
31
+ content: str
32
+ model: str
33
+ input_tokens: Optional[int] = None
34
+ output_tokens: Optional[int] = None
35
+ total_tokens: Optional[int] = None
36
+ cost: Optional[float] = None
37
+ success: bool = True
38
+ error: Optional[str] = None
39
+
40
+
41
+ class LLMClient:
42
+ def __init__(self, config_path: str = "~/.config/fj_llm/config.yaml", config_dict: Optional[Dict] = None):
43
+ """
44
+ Initialize LLM Client with flexible config loading.
45
+ Priority: config_dict → FJ_LLM_CONFIG env var → config file
46
+ """
47
+ self.session = requests.Session()
48
+
49
+ if config_dict:
50
+ self.config = config_dict
51
+ else:
52
+ self.config_path = Path(config_path).expanduser()
53
+ self.config = self._load_config()
54
+
55
+ cost_log_env = os.getenv("FJ_LLM_COST_LOG")
56
+ cost_log_cfg = self.config.get("cost_log")
57
+ if cost_log_env:
58
+ self._cost_log = Path(cost_log_env)
59
+ elif cost_log_cfg:
60
+ self._cost_log = Path(cost_log_cfg).expanduser()
61
+ else:
62
+ self._cost_log = _DEFAULT_COST_LOG
63
+
64
+ def _load_config(self) -> Dict[str, Any]:
65
+ env_config = os.getenv("FJ_LLM_CONFIG")
66
+ if env_config:
67
+ try:
68
+ return json.loads(env_config)
69
+ except json.JSONDecodeError as e:
70
+ _logger.warning("Failed to parse FJ_LLM_CONFIG: %s", e)
71
+
72
+ if not self.config_path.exists():
73
+ self._create_default_config()
74
+
75
+ with open(self.config_path, "r") as f:
76
+ return yaml.safe_load(f)
77
+
78
+ def _create_default_config(self):
79
+ self.config_path.parent.mkdir(parents=True, exist_ok=True)
80
+
81
+ default_config = {
82
+ "models": {
83
+ "deepseek-light": {
84
+ "provider": "deepseek",
85
+ "model_name": "deepseek-chat",
86
+ "api_key_env": "DEEPSEEK_API_KEY",
87
+ "base_url": "https://api.deepseek.com/v1",
88
+ "max_tokens": 4000,
89
+ "temperature": 0.1,
90
+ "pricing": {
91
+ "input_per_1m_tokens": 0.14,
92
+ "output_per_1m_tokens": 0.28,
93
+ },
94
+ },
95
+ "deepseek-reasoner": {
96
+ "provider": "deepseek",
97
+ "model_name": "deepseek-reasoner",
98
+ "api_key_env": "DEEPSEEK_API_KEY",
99
+ "base_url": "https://api.deepseek.com/v1",
100
+ "max_tokens": 8000,
101
+ "temperature": 0.0,
102
+ "pricing": {
103
+ "input_per_1m_tokens": 55.0,
104
+ "output_per_1m_tokens": 55.0,
105
+ },
106
+ },
107
+ "claude-sonnet": {
108
+ "provider": "anthropic",
109
+ "model_name": "claude-sonnet-4-6",
110
+ "api_key_env": "ANTHROPIC_API_KEY",
111
+ "base_url": "https://api.anthropic.com/v1",
112
+ "max_tokens": 4000,
113
+ "temperature": 0.1,
114
+ "pricing": {
115
+ "input_per_1m_tokens": 3.0,
116
+ "output_per_1m_tokens": 15.0,
117
+ },
118
+ },
119
+ },
120
+ "defaults": {
121
+ "retry_attempts": 3,
122
+ "retry_delay": 1.0,
123
+ "timeout": 30,
124
+ },
125
+ }
126
+
127
+ with open(self.config_path, "w") as f:
128
+ yaml.dump(default_config, f, default_flow_style=False)
129
+
130
+ print(f"Created default config at {self.config_path}")
131
+ print("Please update with your API keys!")
132
+
133
+ def query(self, model_alias: str, prompt: str, context: Optional[str] = None) -> LLMResponse:
134
+ """Unified LLM query with automatic timing, logging, and cost tracking."""
135
+ t0 = time.time()
136
+ response = self._do_query(model_alias, prompt, context)
137
+ latency_ms = int((time.time() - t0) * 1000)
138
+
139
+ if response.success:
140
+ _logger.info(
141
+ "llm/%s ok — %d in + %d out tokens, $%.6f, %dms",
142
+ model_alias, response.input_tokens or 0, response.output_tokens or 0,
143
+ response.cost or 0.0, latency_ms,
144
+ )
145
+ self._append_cost_log(model_alias, response, latency_ms)
146
+ else:
147
+ _logger.error("llm/%s failed — %s", model_alias, response.error)
148
+
149
+ return response
150
+
151
+ def _append_cost_log(self, alias: str, response: "LLMResponse", latency_ms: int) -> None:
152
+ try:
153
+ self._cost_log.parent.mkdir(parents=True, exist_ok=True)
154
+ record = {
155
+ "ts": datetime.now(timezone.utc).isoformat(),
156
+ "alias": alias,
157
+ "model": response.model,
158
+ "input_tokens": response.input_tokens,
159
+ "output_tokens": response.output_tokens,
160
+ "cost_usd": response.cost,
161
+ "latency_ms": latency_ms,
162
+ }
163
+ with open(self._cost_log, "a") as f:
164
+ f.write(json.dumps(record) + "\n")
165
+ except Exception:
166
+ pass # never crash the caller over logging
167
+
168
+ def _do_query(self, model_alias: str, prompt: str, context: Optional[str] = None, _is_fallback: bool = False) -> LLMResponse:
169
+ if model_alias not in self.config["models"]:
170
+ return LLMResponse(content="", model=model_alias, success=False, error=f"Unknown model alias: {model_alias}")
171
+
172
+ model_config = self.config["models"][model_alias]
173
+
174
+ full_prompt = prompt
175
+ if context:
176
+ full_prompt = f"Context:\n{context}\n\nTask:\n{prompt}"
177
+
178
+ for attempt in range(self.config["defaults"]["retry_attempts"]):
179
+ try:
180
+ provider = model_config["provider"]
181
+ if provider == "deepseek":
182
+ response = self._query_deepseek(model_config, full_prompt)
183
+ elif provider == "anthropic":
184
+ response = self._query_anthropic(model_config, full_prompt)
185
+ elif provider == "openai":
186
+ response = self._query_openai(model_config, full_prompt)
187
+ elif provider in ("google", "gemini"):
188
+ response = self._query_google(model_config, full_prompt)
189
+ else:
190
+ return LLMResponse(content="", model=model_alias, success=False, error=f"Unsupported provider: {provider}")
191
+
192
+ if response.success:
193
+ return response
194
+
195
+ except QuotaExceededError as e:
196
+ fallback = model_config.get("fallback")
197
+ if fallback and not _is_fallback:
198
+ _logger.warning("llm/%s quota exceeded, falling back to %s", model_alias, fallback)
199
+ return self._do_query(fallback, prompt, context, _is_fallback=True)
200
+ return LLMResponse(content="", model=model_alias, success=False, error=f"Quota exceeded, no fallback: {e}")
201
+
202
+ except Exception as e:
203
+ if attempt == self.config["defaults"]["retry_attempts"] - 1:
204
+ return LLMResponse(content="", model=model_alias, success=False, error=f"Failed after {attempt + 1} attempts: {e}")
205
+ time.sleep(self.config["defaults"]["retry_delay"] * (2 ** attempt))
206
+
207
+ return LLMResponse(content="", model=model_alias, success=False, error="Max retries exceeded")
208
+
209
+ def _query_deepseek(self, model_config: Dict, prompt: str) -> LLMResponse:
210
+ api_key = os.getenv(model_config["api_key_env"])
211
+ if not api_key:
212
+ raise ValueError(f"API key not found: {model_config['api_key_env']}")
213
+
214
+ payload = {
215
+ "model": model_config["model_name"],
216
+ "messages": [{"role": "user", "content": prompt}],
217
+ "max_tokens": model_config["max_tokens"],
218
+ "temperature": model_config["temperature"],
219
+ }
220
+ resp = self.session.post(
221
+ f"{model_config['base_url']}/chat/completions",
222
+ json=payload,
223
+ headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
224
+ timeout=self.config["defaults"]["timeout"],
225
+ )
226
+ if resp.status_code != 200:
227
+ raise Exception(f"API error: {resp.status_code} - {resp.text}")
228
+
229
+ data = resp.json()
230
+ usage = data.get("usage", {})
231
+ input_tokens = usage.get("prompt_tokens", 0)
232
+ output_tokens = usage.get("completion_tokens", 0)
233
+ return LLMResponse(
234
+ content=data["choices"][0]["message"]["content"],
235
+ model=model_config["model_name"],
236
+ input_tokens=input_tokens,
237
+ output_tokens=output_tokens,
238
+ total_tokens=usage.get("total_tokens", input_tokens + output_tokens),
239
+ cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
240
+ success=True,
241
+ )
242
+
243
+ def _query_openai(self, model_config: Dict, prompt: str) -> LLMResponse:
244
+ api_key = os.getenv(model_config["api_key_env"])
245
+ if not api_key:
246
+ raise ValueError(f"API key not found: {model_config['api_key_env']}")
247
+
248
+ model_name = model_config["model_name"]
249
+ payload: Dict[str, Any] = {
250
+ "model": model_name,
251
+ "messages": [{"role": "user", "content": prompt}],
252
+ "max_completion_tokens": model_config["max_tokens"],
253
+ }
254
+ # o-series reasoning models don't accept temperature
255
+ if not (model_name[0] == "o" and model_name[1:2].isdigit()):
256
+ payload["temperature"] = model_config["temperature"]
257
+
258
+ resp = self.session.post(
259
+ f"{model_config.get('base_url', 'https://api.openai.com/v1')}/chat/completions",
260
+ json=payload,
261
+ headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
262
+ timeout=self.config["defaults"]["timeout"],
263
+ )
264
+ if resp.status_code != 200:
265
+ if resp.status_code == 429:
266
+ try:
267
+ if resp.json().get("error", {}).get("code") == "insufficient_quota":
268
+ raise QuotaExceededError(f"OpenAI quota exceeded ({model_name})")
269
+ except (ValueError, KeyError):
270
+ pass
271
+ raise Exception(f"API error: {resp.status_code} - {resp.text}")
272
+
273
+ data = resp.json()
274
+ usage = data.get("usage", {})
275
+ input_tokens = usage.get("prompt_tokens", 0)
276
+ output_tokens = usage.get("completion_tokens", 0)
277
+ return LLMResponse(
278
+ content=data["choices"][0]["message"]["content"],
279
+ model=model_name,
280
+ input_tokens=input_tokens,
281
+ output_tokens=output_tokens,
282
+ total_tokens=usage.get("total_tokens", input_tokens + output_tokens),
283
+ cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
284
+ success=True,
285
+ )
286
+
287
+ def _query_anthropic(self, model_config: Dict, prompt: str) -> LLMResponse:
288
+ api_key = os.getenv(model_config["api_key_env"])
289
+ if not api_key:
290
+ raise ValueError(f"API key not found: {model_config['api_key_env']}")
291
+
292
+ resp = self.session.post(
293
+ f"{model_config['base_url']}/messages",
294
+ json={
295
+ "model": model_config["model_name"],
296
+ "messages": [{"role": "user", "content": prompt}],
297
+ "max_tokens": model_config["max_tokens"],
298
+ "temperature": model_config["temperature"],
299
+ },
300
+ headers={
301
+ "x-api-key": api_key,
302
+ "Content-Type": "application/json",
303
+ "anthropic-version": "2023-06-01",
304
+ },
305
+ timeout=self.config["defaults"]["timeout"],
306
+ )
307
+ if resp.status_code != 200:
308
+ if resp.status_code in (400, 429):
309
+ try:
310
+ err = resp.json().get("error", {})
311
+ if "credit balance" in err.get("message", "") or err.get("type") == "insufficient_quota":
312
+ raise QuotaExceededError(f"Anthropic quota exhausted ({model_config['model_name']})")
313
+ except (ValueError, KeyError):
314
+ pass
315
+ raise Exception(f"API error: {resp.status_code} - {resp.text}")
316
+
317
+ data = resp.json()
318
+ usage = data.get("usage", {})
319
+ input_tokens = usage.get("input_tokens", 0)
320
+ output_tokens = usage.get("output_tokens", 0)
321
+ return LLMResponse(
322
+ content=data["content"][0]["text"],
323
+ model=model_config["model_name"],
324
+ input_tokens=input_tokens,
325
+ output_tokens=output_tokens,
326
+ total_tokens=input_tokens + output_tokens,
327
+ cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
328
+ success=True,
329
+ )
330
+
331
+ def _query_google(self, model_config: Dict, prompt: str) -> LLMResponse:
332
+ api_key = os.getenv(model_config["api_key_env"])
333
+ if not api_key:
334
+ raise ValueError(f"API key not found: {model_config['api_key_env']}")
335
+
336
+ url = f"{model_config['base_url']}/models/{model_config['model_name']}:generateContent?key={api_key}"
337
+ resp = self.session.post(
338
+ url,
339
+ json={
340
+ "contents": [{"parts": [{"text": prompt}]}],
341
+ "generationConfig": {
342
+ "maxOutputTokens": model_config["max_tokens"],
343
+ "temperature": model_config["temperature"],
344
+ },
345
+ },
346
+ headers={"Content-Type": "application/json"},
347
+ timeout=self.config["defaults"]["timeout"],
348
+ )
349
+ if resp.status_code != 200:
350
+ raise Exception(f"API error: {resp.status_code} - {resp.text}")
351
+
352
+ data = resp.json()
353
+ candidates = data.get("candidates", [])
354
+ if not candidates:
355
+ raise Exception(f"No candidates in response: {data}")
356
+ content = candidates[0].get("content", {}).get("parts", [{}])[0].get("text", "")
357
+
358
+ usage = data.get("usageMetadata", {})
359
+ input_tokens = usage.get("promptTokenCount", 0)
360
+ output_tokens = usage.get("candidatesTokenCount", 0)
361
+ return LLMResponse(
362
+ content=content,
363
+ model=model_config["model_name"],
364
+ input_tokens=input_tokens,
365
+ output_tokens=output_tokens,
366
+ total_tokens=usage.get("totalTokenCount", input_tokens + output_tokens),
367
+ cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
368
+ success=True,
369
+ )
370
+
371
+ def _calculate_cost(self, input_tokens: int, output_tokens: int, pricing: Dict) -> float:
372
+ return (input_tokens / 1_000_000) * pricing["input_per_1m_tokens"] + \
373
+ (output_tokens / 1_000_000) * pricing["output_per_1m_tokens"]
374
+
375
+ def list_models(self) -> list:
376
+ return list(self.config["models"].keys())
377
+
378
+ def get_model_info(self, model_alias: str) -> Optional[Dict]:
379
+ return self.config["models"].get(model_alias)
380
+
381
+
382
+ def main():
383
+ import sys
384
+
385
+ client = LLMClient()
386
+ if len(sys.argv) < 3:
387
+ print("Usage: llm-query <model_alias> <prompt>")
388
+ print("Available models:", ", ".join(client.list_models()))
389
+ sys.exit(1)
390
+
391
+ model_alias = sys.argv[1]
392
+ prompt = " ".join(sys.argv[2:])
393
+ print(f"Querying {model_alias}...")
394
+ response = client.query(model_alias, prompt)
395
+
396
+ if response.success:
397
+ print(f"\nResponse ({response.input_tokens} in + {response.output_tokens} out = {response.total_tokens} tokens, ${response.cost:.6f}):")
398
+ print("-" * 50)
399
+ print(response.content)
400
+ else:
401
+ print(f"Error: {response.error}")
402
+
403
+
404
+ if __name__ == "__main__":
405
+ main()
fj_llm/ping.py ADDED
@@ -0,0 +1,112 @@
1
+ """
2
+ Quick connectivity check for each supported provider.
3
+ Uses hardcoded minimal configs so it works independently of the user's config file.
4
+ Requires only the relevant API key env var to be set.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ from typing import Optional
10
+ from fj_llm.client import LLMClient, LLMResponse
11
+
12
+ PING_PROMPT = "Reply with exactly one word: pong"
13
+
14
+ _PING_CONFIGS = {
15
+ "openai": {
16
+ "models": {
17
+ "ping": {
18
+ "provider": "openai",
19
+ "model_name": "gpt-4o-mini",
20
+ "api_key_env": "OPENAI_API_KEY",
21
+ "base_url": "https://api.openai.com/v1",
22
+ "max_tokens": 10,
23
+ "temperature": 0.0,
24
+ "pricing": {"input_per_1m_tokens": 0.15, "output_per_1m_tokens": 0.60},
25
+ }
26
+ },
27
+ "defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
28
+ },
29
+ "anthropic": {
30
+ "models": {
31
+ "ping": {
32
+ "provider": "anthropic",
33
+ "model_name": "claude-haiku-4-5-20251001",
34
+ "api_key_env": "ANTHROPIC_API_KEY",
35
+ "base_url": "https://api.anthropic.com/v1",
36
+ "max_tokens": 10,
37
+ "temperature": 0.0,
38
+ "pricing": {"input_per_1m_tokens": 0.80, "output_per_1m_tokens": 4.00},
39
+ }
40
+ },
41
+ "defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
42
+ },
43
+ "deepseek": {
44
+ "models": {
45
+ "ping": {
46
+ "provider": "deepseek",
47
+ "model_name": "deepseek-chat",
48
+ "api_key_env": "DEEPSEEK_API_KEY",
49
+ "base_url": "https://api.deepseek.com/v1",
50
+ "max_tokens": 10,
51
+ "temperature": 0.0,
52
+ "pricing": {"input_per_1m_tokens": 0.14, "output_per_1m_tokens": 0.28},
53
+ }
54
+ },
55
+ "defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
56
+ },
57
+ "google": {
58
+ "models": {
59
+ "ping": {
60
+ "provider": "google",
61
+ "model_name": "gemini-2.5-flash",
62
+ "api_key_env": "GEMINI_API_KEY",
63
+ "base_url": "https://generativelanguage.googleapis.com/v1beta",
64
+ "max_tokens": 100,
65
+ "temperature": 0.0,
66
+ "pricing": {"input_per_1m_tokens": 0.075, "output_per_1m_tokens": 0.30},
67
+ }
68
+ },
69
+ "defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
70
+ },
71
+ }
72
+
73
+ SUPPORTED_PROVIDERS = list(_PING_CONFIGS.keys())
74
+
75
+
76
+ def ping(provider: str) -> LLMResponse:
77
+ """Send a minimal prompt to the given provider. Raises ValueError for unknown providers."""
78
+ if provider not in _PING_CONFIGS:
79
+ raise ValueError(f"Unknown provider '{provider}'. Choose from: {', '.join(SUPPORTED_PROVIDERS)}")
80
+ client = LLMClient(config_dict=_PING_CONFIGS[provider])
81
+ return client.query("ping", PING_PROMPT)
82
+
83
+
84
+ def main():
85
+ """CLI entry point: llm-ping [provider ...]"""
86
+ targets = sys.argv[1:] if len(sys.argv) > 1 else SUPPORTED_PROVIDERS
87
+
88
+ unknown = [t for t in targets if t not in _PING_CONFIGS]
89
+ if unknown:
90
+ print(f"Unknown provider(s): {', '.join(unknown)}")
91
+ print(f"Supported: {', '.join(SUPPORTED_PROVIDERS)}")
92
+ sys.exit(1)
93
+
94
+ results = {}
95
+ for provider in targets:
96
+ key_env = _PING_CONFIGS[provider]["models"]["ping"]["api_key_env"]
97
+ if not os.getenv(key_env):
98
+ print(f" {provider:<12} SKIP ({key_env} not set)")
99
+ results[provider] = None
100
+ continue
101
+
102
+ resp = ping(provider)
103
+ if resp.success:
104
+ snippet = resp.content.strip().replace("\n", " ")[:40]
105
+ print(f" {provider:<12} OK '{snippet}' ({resp.input_tokens}in/{resp.output_tokens}out ${resp.cost:.6f})")
106
+ else:
107
+ print(f" {provider:<12} FAIL {resp.error}")
108
+ results[provider] = resp
109
+
110
+ failed = [p for p, r in results.items() if r is not None and not r.success]
111
+ if failed:
112
+ sys.exit(1)
@@ -0,0 +1,97 @@
1
+ Metadata-Version: 2.4
2
+ Name: fj-llm
3
+ Version: 0.2.0
4
+ Summary: Lightweight, config-driven client for multiple LLM providers
5
+ Author-email: Simon Bloch <simon.j.bloch@gmail.com>
6
+ License: MIT
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: requests>=2.25.0
10
+ Requires-Dist: pyyaml>=5.4.0
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest>=7.0; extra == "dev"
13
+
14
+ # fj-llm
15
+
16
+ Lightweight, config-driven Python client for multiple LLM providers. One interface, any provider — no provider SDK required.
17
+
18
+ ## Install
19
+
20
+ ```
21
+ pip install fj-llm
22
+ ```
23
+
24
+ ## Providers
25
+
26
+ | Provider | Config key | Env var |
27
+ |-----------|--------------|----------------------|
28
+ | OpenAI | `openai` | `OPENAI_API_KEY` |
29
+ | Anthropic | `anthropic` | `ANTHROPIC_API_KEY` |
30
+ | DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
31
+ | Google | `google` | `GOOGLE_API_KEY` |
32
+
33
+ ## Configuration
34
+
35
+ On first use, a default config is created at `~/.config/fj_llm/config.yaml`. Edit it to add your API keys and define model aliases:
36
+
37
+ ```yaml
38
+ models:
39
+ gpt-best:
40
+ provider: openai
41
+ model_name: gpt-4o
42
+ api_key_env: OPENAI_API_KEY
43
+ base_url: https://api.openai.com/v1
44
+ max_tokens: 4000
45
+ temperature: 0.1
46
+ pricing:
47
+ input_per_1m_tokens: 2.50
48
+ output_per_1m_tokens: 10.00
49
+ fallback: gpt-light # optional: alias to use on quota exhaustion
50
+
51
+ gpt-light:
52
+ provider: openai
53
+ model_name: gpt-4o-mini
54
+ api_key_env: OPENAI_API_KEY
55
+ base_url: https://api.openai.com/v1
56
+ max_tokens: 4000
57
+ temperature: 0.1
58
+ pricing:
59
+ input_per_1m_tokens: 0.15
60
+ output_per_1m_tokens: 0.60
61
+
62
+ defaults:
63
+ retry_attempts: 3
64
+ retry_delay: 1.0
65
+ timeout: 30
66
+ ```
67
+
68
+ For Cloud Functions or other environments without filesystem access, set the `FJ_LLM_CONFIG` environment variable to a JSON string of the same structure.
69
+
70
+ ## Usage
71
+
72
+ ```python
73
+ from fj_llm import LLMClient
74
+
75
+ client = LLMClient()
76
+ response = client.query("gpt-best", "Summarise this in one sentence.", context=long_text)
77
+
78
+ if response.success:
79
+ print(response.content)
80
+ print(f"Cost: ${response.cost:.6f}")
81
+ else:
82
+ print(f"Error: {response.error}")
83
+ ```
84
+
85
+ ### Cost logging
86
+
87
+ Every successful call is appended as a JSONL record to `~/.local/share/fj_llm/costs.jsonl`. Override the path via the `FJ_LLM_COST_LOG` env var, or set `cost_log` in the config file.
88
+
89
+ ### CLI
90
+
91
+ ```
92
+ llm-query gpt-best "What is the capital of France?"
93
+ ```
94
+
95
+ ## License
96
+
97
+ MIT
@@ -0,0 +1,8 @@
1
+ fj_llm/__init__.py,sha256=Pt_qkB6mvzMS5WFYcYnH_0XtuA1pftkIJahoD3oBqKE,181
2
+ fj_llm/client.py,sha256=jsIQxfKlGd3MBQo4rfvTd6hcj4RL_NSe6NzWw5zajHE,16401
3
+ fj_llm/ping.py,sha256=CMIBfIHqiCgjvRO0Tg21Re4oY4KgBQ2JycHLBvBY324,4040
4
+ fj_llm-0.2.0.dist-info/METADATA,sha256=OOG0veGcMMsKSua_S1rOvv-69Fg33AqJEd8dU6uIy9o,2438
5
+ fj_llm-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ fj_llm-0.2.0.dist-info/entry_points.txt,sha256=vfmtI70Sf76A3ImHQrhNlVxpCU7ObhgxA2K9nn8WxhA,77
7
+ fj_llm-0.2.0.dist-info/top_level.txt,sha256=4-qWPKJSQX3YMzv9DvsEflTbrjvTOtdlw2Gr5rAdBSc,7
8
+ fj_llm-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ llm-ping = fj_llm.ping:main
3
+ llm-query = fj_llm.client:main
@@ -0,0 +1 @@
1
+ fj_llm