fj-llm 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fj_llm-0.2.0/PKG-INFO +97 -0
- fj_llm-0.2.0/README.md +84 -0
- fj_llm-0.2.0/fj_llm/__init__.py +9 -0
- fj_llm-0.2.0/fj_llm/client.py +405 -0
- fj_llm-0.2.0/fj_llm/ping.py +112 -0
- fj_llm-0.2.0/fj_llm.egg-info/PKG-INFO +97 -0
- fj_llm-0.2.0/fj_llm.egg-info/SOURCES.txt +13 -0
- fj_llm-0.2.0/fj_llm.egg-info/dependency_links.txt +1 -0
- fj_llm-0.2.0/fj_llm.egg-info/entry_points.txt +3 -0
- fj_llm-0.2.0/fj_llm.egg-info/requires.txt +5 -0
- fj_llm-0.2.0/fj_llm.egg-info/top_level.txt +1 -0
- fj_llm-0.2.0/pyproject.toml +30 -0
- fj_llm-0.2.0/setup.cfg +4 -0
- fj_llm-0.2.0/tests/test_client.py +262 -0
- fj_llm-0.2.0/tests/test_integration.py +60 -0
fj_llm-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fj-llm
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Lightweight, config-driven client for multiple LLM providers
|
|
5
|
+
Author-email: Simon Bloch <simon.j.bloch@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: requests>=2.25.0
|
|
10
|
+
Requires-Dist: pyyaml>=5.4.0
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
13
|
+
|
|
14
|
+
# fj-llm
|
|
15
|
+
|
|
16
|
+
Lightweight, config-driven Python client for multiple LLM providers. One interface, any provider — no provider SDK required.
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
pip install fj-llm
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Providers
|
|
25
|
+
|
|
26
|
+
| Provider | Config key | Env var |
|
|
27
|
+
|-----------|--------------|----------------------|
|
|
28
|
+
| OpenAI | `openai` | `OPENAI_API_KEY` |
|
|
29
|
+
| Anthropic | `anthropic` | `ANTHROPIC_API_KEY` |
|
|
30
|
+
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
|
|
31
|
+
| Google | `google` | `GOOGLE_API_KEY` |
|
|
32
|
+
|
|
33
|
+
## Configuration
|
|
34
|
+
|
|
35
|
+
On first use, a default config is created at `~/.config/fj_llm/config.yaml`. Edit it to add your API keys and define model aliases:
|
|
36
|
+
|
|
37
|
+
```yaml
|
|
38
|
+
models:
|
|
39
|
+
gpt-best:
|
|
40
|
+
provider: openai
|
|
41
|
+
model_name: gpt-4o
|
|
42
|
+
api_key_env: OPENAI_API_KEY
|
|
43
|
+
base_url: https://api.openai.com/v1
|
|
44
|
+
max_tokens: 4000
|
|
45
|
+
temperature: 0.1
|
|
46
|
+
pricing:
|
|
47
|
+
input_per_1m_tokens: 2.50
|
|
48
|
+
output_per_1m_tokens: 10.00
|
|
49
|
+
fallback: gpt-light # optional: alias to use on quota exhaustion
|
|
50
|
+
|
|
51
|
+
gpt-light:
|
|
52
|
+
provider: openai
|
|
53
|
+
model_name: gpt-4o-mini
|
|
54
|
+
api_key_env: OPENAI_API_KEY
|
|
55
|
+
base_url: https://api.openai.com/v1
|
|
56
|
+
max_tokens: 4000
|
|
57
|
+
temperature: 0.1
|
|
58
|
+
pricing:
|
|
59
|
+
input_per_1m_tokens: 0.15
|
|
60
|
+
output_per_1m_tokens: 0.60
|
|
61
|
+
|
|
62
|
+
defaults:
|
|
63
|
+
retry_attempts: 3
|
|
64
|
+
retry_delay: 1.0
|
|
65
|
+
timeout: 30
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
For Cloud Functions or other environments without filesystem access, set the `FJ_LLM_CONFIG` environment variable to a JSON string of the same structure.
|
|
69
|
+
|
|
70
|
+
## Usage
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from fj_llm import LLMClient
|
|
74
|
+
|
|
75
|
+
client = LLMClient()
|
|
76
|
+
response = client.query("gpt-best", "Summarise this in one sentence.", context=long_text)
|
|
77
|
+
|
|
78
|
+
if response.success:
|
|
79
|
+
print(response.content)
|
|
80
|
+
print(f"Cost: ${response.cost:.6f}")
|
|
81
|
+
else:
|
|
82
|
+
print(f"Error: {response.error}")
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Cost logging
|
|
86
|
+
|
|
87
|
+
Every successful call is appended as a JSONL record to `~/.local/share/fj_llm/costs.jsonl`. Override the path via the `FJ_LLM_COST_LOG` env var, or set `cost_log` in the config file.
|
|
88
|
+
|
|
89
|
+
### CLI
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
llm-query gpt-best "What is the capital of France?"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## License
|
|
96
|
+
|
|
97
|
+
MIT
|
fj_llm-0.2.0/README.md
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# fj-llm
|
|
2
|
+
|
|
3
|
+
Lightweight, config-driven Python client for multiple LLM providers. One interface, any provider — no provider SDK required.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
pip install fj-llm
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Providers
|
|
12
|
+
|
|
13
|
+
| Provider | Config key | Env var |
|
|
14
|
+
|-----------|--------------|----------------------|
|
|
15
|
+
| OpenAI | `openai` | `OPENAI_API_KEY` |
|
|
16
|
+
| Anthropic | `anthropic` | `ANTHROPIC_API_KEY` |
|
|
17
|
+
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
|
|
18
|
+
| Google | `google` | `GOOGLE_API_KEY` |
|
|
19
|
+
|
|
20
|
+
## Configuration
|
|
21
|
+
|
|
22
|
+
On first use, a default config is created at `~/.config/fj_llm/config.yaml`. Edit it to add your API keys and define model aliases:
|
|
23
|
+
|
|
24
|
+
```yaml
|
|
25
|
+
models:
|
|
26
|
+
gpt-best:
|
|
27
|
+
provider: openai
|
|
28
|
+
model_name: gpt-4o
|
|
29
|
+
api_key_env: OPENAI_API_KEY
|
|
30
|
+
base_url: https://api.openai.com/v1
|
|
31
|
+
max_tokens: 4000
|
|
32
|
+
temperature: 0.1
|
|
33
|
+
pricing:
|
|
34
|
+
input_per_1m_tokens: 2.50
|
|
35
|
+
output_per_1m_tokens: 10.00
|
|
36
|
+
fallback: gpt-light # optional: alias to use on quota exhaustion
|
|
37
|
+
|
|
38
|
+
gpt-light:
|
|
39
|
+
provider: openai
|
|
40
|
+
model_name: gpt-4o-mini
|
|
41
|
+
api_key_env: OPENAI_API_KEY
|
|
42
|
+
base_url: https://api.openai.com/v1
|
|
43
|
+
max_tokens: 4000
|
|
44
|
+
temperature: 0.1
|
|
45
|
+
pricing:
|
|
46
|
+
input_per_1m_tokens: 0.15
|
|
47
|
+
output_per_1m_tokens: 0.60
|
|
48
|
+
|
|
49
|
+
defaults:
|
|
50
|
+
retry_attempts: 3
|
|
51
|
+
retry_delay: 1.0
|
|
52
|
+
timeout: 30
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
For Cloud Functions or other environments without filesystem access, set the `FJ_LLM_CONFIG` environment variable to a JSON string of the same structure.
|
|
56
|
+
|
|
57
|
+
## Usage
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from fj_llm import LLMClient
|
|
61
|
+
|
|
62
|
+
client = LLMClient()
|
|
63
|
+
response = client.query("gpt-best", "Summarise this in one sentence.", context=long_text)
|
|
64
|
+
|
|
65
|
+
if response.success:
|
|
66
|
+
print(response.content)
|
|
67
|
+
print(f"Cost: ${response.cost:.6f}")
|
|
68
|
+
else:
|
|
69
|
+
print(f"Error: {response.error}")
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Cost logging
|
|
73
|
+
|
|
74
|
+
Every successful call is appended as a JSONL record to `~/.local/share/fj_llm/costs.jsonl`. Override the path via the `FJ_LLM_COST_LOG` env var, or set `cost_log` in the config file.
|
|
75
|
+
|
|
76
|
+
### CLI
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
llm-query gpt-best "What is the capital of France?"
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## License
|
|
83
|
+
|
|
84
|
+
MIT
|
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Unified LLM Client Interface
|
|
4
|
+
Supports multiple LLM providers with a consistent API
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import time
|
|
11
|
+
import yaml
|
|
12
|
+
import requests
|
|
13
|
+
from typing import Dict, Any, Optional
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
_logger = logging.getLogger("fj_llm")
|
|
19
|
+
|
|
20
|
+
_DEFAULT_COST_LOG = Path("~/.local/share/fj_llm/costs.jsonl").expanduser()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class QuotaExceededError(Exception):
|
|
24
|
+
"""API quota exhausted — non-retryable, triggers fallback model."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class LLMResponse:
|
|
30
|
+
"""Standardized response format"""
|
|
31
|
+
content: str
|
|
32
|
+
model: str
|
|
33
|
+
input_tokens: Optional[int] = None
|
|
34
|
+
output_tokens: Optional[int] = None
|
|
35
|
+
total_tokens: Optional[int] = None
|
|
36
|
+
cost: Optional[float] = None
|
|
37
|
+
success: bool = True
|
|
38
|
+
error: Optional[str] = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class LLMClient:
|
|
42
|
+
def __init__(self, config_path: str = "~/.config/fj_llm/config.yaml", config_dict: Optional[Dict] = None):
|
|
43
|
+
"""
|
|
44
|
+
Initialize LLM Client with flexible config loading.
|
|
45
|
+
Priority: config_dict → FJ_LLM_CONFIG env var → config file
|
|
46
|
+
"""
|
|
47
|
+
self.session = requests.Session()
|
|
48
|
+
|
|
49
|
+
if config_dict:
|
|
50
|
+
self.config = config_dict
|
|
51
|
+
else:
|
|
52
|
+
self.config_path = Path(config_path).expanduser()
|
|
53
|
+
self.config = self._load_config()
|
|
54
|
+
|
|
55
|
+
cost_log_env = os.getenv("FJ_LLM_COST_LOG")
|
|
56
|
+
cost_log_cfg = self.config.get("cost_log")
|
|
57
|
+
if cost_log_env:
|
|
58
|
+
self._cost_log = Path(cost_log_env)
|
|
59
|
+
elif cost_log_cfg:
|
|
60
|
+
self._cost_log = Path(cost_log_cfg).expanduser()
|
|
61
|
+
else:
|
|
62
|
+
self._cost_log = _DEFAULT_COST_LOG
|
|
63
|
+
|
|
64
|
+
def _load_config(self) -> Dict[str, Any]:
|
|
65
|
+
env_config = os.getenv("FJ_LLM_CONFIG")
|
|
66
|
+
if env_config:
|
|
67
|
+
try:
|
|
68
|
+
return json.loads(env_config)
|
|
69
|
+
except json.JSONDecodeError as e:
|
|
70
|
+
_logger.warning("Failed to parse FJ_LLM_CONFIG: %s", e)
|
|
71
|
+
|
|
72
|
+
if not self.config_path.exists():
|
|
73
|
+
self._create_default_config()
|
|
74
|
+
|
|
75
|
+
with open(self.config_path, "r") as f:
|
|
76
|
+
return yaml.safe_load(f)
|
|
77
|
+
|
|
78
|
+
def _create_default_config(self):
|
|
79
|
+
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
80
|
+
|
|
81
|
+
default_config = {
|
|
82
|
+
"models": {
|
|
83
|
+
"deepseek-light": {
|
|
84
|
+
"provider": "deepseek",
|
|
85
|
+
"model_name": "deepseek-chat",
|
|
86
|
+
"api_key_env": "DEEPSEEK_API_KEY",
|
|
87
|
+
"base_url": "https://api.deepseek.com/v1",
|
|
88
|
+
"max_tokens": 4000,
|
|
89
|
+
"temperature": 0.1,
|
|
90
|
+
"pricing": {
|
|
91
|
+
"input_per_1m_tokens": 0.14,
|
|
92
|
+
"output_per_1m_tokens": 0.28,
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
"deepseek-reasoner": {
|
|
96
|
+
"provider": "deepseek",
|
|
97
|
+
"model_name": "deepseek-reasoner",
|
|
98
|
+
"api_key_env": "DEEPSEEK_API_KEY",
|
|
99
|
+
"base_url": "https://api.deepseek.com/v1",
|
|
100
|
+
"max_tokens": 8000,
|
|
101
|
+
"temperature": 0.0,
|
|
102
|
+
"pricing": {
|
|
103
|
+
"input_per_1m_tokens": 55.0,
|
|
104
|
+
"output_per_1m_tokens": 55.0,
|
|
105
|
+
},
|
|
106
|
+
},
|
|
107
|
+
"claude-sonnet": {
|
|
108
|
+
"provider": "anthropic",
|
|
109
|
+
"model_name": "claude-sonnet-4-6",
|
|
110
|
+
"api_key_env": "ANTHROPIC_API_KEY",
|
|
111
|
+
"base_url": "https://api.anthropic.com/v1",
|
|
112
|
+
"max_tokens": 4000,
|
|
113
|
+
"temperature": 0.1,
|
|
114
|
+
"pricing": {
|
|
115
|
+
"input_per_1m_tokens": 3.0,
|
|
116
|
+
"output_per_1m_tokens": 15.0,
|
|
117
|
+
},
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
"defaults": {
|
|
121
|
+
"retry_attempts": 3,
|
|
122
|
+
"retry_delay": 1.0,
|
|
123
|
+
"timeout": 30,
|
|
124
|
+
},
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
with open(self.config_path, "w") as f:
|
|
128
|
+
yaml.dump(default_config, f, default_flow_style=False)
|
|
129
|
+
|
|
130
|
+
print(f"Created default config at {self.config_path}")
|
|
131
|
+
print("Please update with your API keys!")
|
|
132
|
+
|
|
133
|
+
def query(self, model_alias: str, prompt: str, context: Optional[str] = None) -> LLMResponse:
|
|
134
|
+
"""Unified LLM query with automatic timing, logging, and cost tracking."""
|
|
135
|
+
t0 = time.time()
|
|
136
|
+
response = self._do_query(model_alias, prompt, context)
|
|
137
|
+
latency_ms = int((time.time() - t0) * 1000)
|
|
138
|
+
|
|
139
|
+
if response.success:
|
|
140
|
+
_logger.info(
|
|
141
|
+
"llm/%s ok — %d in + %d out tokens, $%.6f, %dms",
|
|
142
|
+
model_alias, response.input_tokens or 0, response.output_tokens or 0,
|
|
143
|
+
response.cost or 0.0, latency_ms,
|
|
144
|
+
)
|
|
145
|
+
self._append_cost_log(model_alias, response, latency_ms)
|
|
146
|
+
else:
|
|
147
|
+
_logger.error("llm/%s failed — %s", model_alias, response.error)
|
|
148
|
+
|
|
149
|
+
return response
|
|
150
|
+
|
|
151
|
+
def _append_cost_log(self, alias: str, response: "LLMResponse", latency_ms: int) -> None:
|
|
152
|
+
try:
|
|
153
|
+
self._cost_log.parent.mkdir(parents=True, exist_ok=True)
|
|
154
|
+
record = {
|
|
155
|
+
"ts": datetime.now(timezone.utc).isoformat(),
|
|
156
|
+
"alias": alias,
|
|
157
|
+
"model": response.model,
|
|
158
|
+
"input_tokens": response.input_tokens,
|
|
159
|
+
"output_tokens": response.output_tokens,
|
|
160
|
+
"cost_usd": response.cost,
|
|
161
|
+
"latency_ms": latency_ms,
|
|
162
|
+
}
|
|
163
|
+
with open(self._cost_log, "a") as f:
|
|
164
|
+
f.write(json.dumps(record) + "\n")
|
|
165
|
+
except Exception:
|
|
166
|
+
pass # never crash the caller over logging
|
|
167
|
+
|
|
168
|
+
def _do_query(self, model_alias: str, prompt: str, context: Optional[str] = None, _is_fallback: bool = False) -> LLMResponse:
|
|
169
|
+
if model_alias not in self.config["models"]:
|
|
170
|
+
return LLMResponse(content="", model=model_alias, success=False, error=f"Unknown model alias: {model_alias}")
|
|
171
|
+
|
|
172
|
+
model_config = self.config["models"][model_alias]
|
|
173
|
+
|
|
174
|
+
full_prompt = prompt
|
|
175
|
+
if context:
|
|
176
|
+
full_prompt = f"Context:\n{context}\n\nTask:\n{prompt}"
|
|
177
|
+
|
|
178
|
+
for attempt in range(self.config["defaults"]["retry_attempts"]):
|
|
179
|
+
try:
|
|
180
|
+
provider = model_config["provider"]
|
|
181
|
+
if provider == "deepseek":
|
|
182
|
+
response = self._query_deepseek(model_config, full_prompt)
|
|
183
|
+
elif provider == "anthropic":
|
|
184
|
+
response = self._query_anthropic(model_config, full_prompt)
|
|
185
|
+
elif provider == "openai":
|
|
186
|
+
response = self._query_openai(model_config, full_prompt)
|
|
187
|
+
elif provider in ("google", "gemini"):
|
|
188
|
+
response = self._query_google(model_config, full_prompt)
|
|
189
|
+
else:
|
|
190
|
+
return LLMResponse(content="", model=model_alias, success=False, error=f"Unsupported provider: {provider}")
|
|
191
|
+
|
|
192
|
+
if response.success:
|
|
193
|
+
return response
|
|
194
|
+
|
|
195
|
+
except QuotaExceededError as e:
|
|
196
|
+
fallback = model_config.get("fallback")
|
|
197
|
+
if fallback and not _is_fallback:
|
|
198
|
+
_logger.warning("llm/%s quota exceeded, falling back to %s", model_alias, fallback)
|
|
199
|
+
return self._do_query(fallback, prompt, context, _is_fallback=True)
|
|
200
|
+
return LLMResponse(content="", model=model_alias, success=False, error=f"Quota exceeded, no fallback: {e}")
|
|
201
|
+
|
|
202
|
+
except Exception as e:
|
|
203
|
+
if attempt == self.config["defaults"]["retry_attempts"] - 1:
|
|
204
|
+
return LLMResponse(content="", model=model_alias, success=False, error=f"Failed after {attempt + 1} attempts: {e}")
|
|
205
|
+
time.sleep(self.config["defaults"]["retry_delay"] * (2 ** attempt))
|
|
206
|
+
|
|
207
|
+
return LLMResponse(content="", model=model_alias, success=False, error="Max retries exceeded")
|
|
208
|
+
|
|
209
|
+
def _query_deepseek(self, model_config: Dict, prompt: str) -> LLMResponse:
|
|
210
|
+
api_key = os.getenv(model_config["api_key_env"])
|
|
211
|
+
if not api_key:
|
|
212
|
+
raise ValueError(f"API key not found: {model_config['api_key_env']}")
|
|
213
|
+
|
|
214
|
+
payload = {
|
|
215
|
+
"model": model_config["model_name"],
|
|
216
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
217
|
+
"max_tokens": model_config["max_tokens"],
|
|
218
|
+
"temperature": model_config["temperature"],
|
|
219
|
+
}
|
|
220
|
+
resp = self.session.post(
|
|
221
|
+
f"{model_config['base_url']}/chat/completions",
|
|
222
|
+
json=payload,
|
|
223
|
+
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
|
224
|
+
timeout=self.config["defaults"]["timeout"],
|
|
225
|
+
)
|
|
226
|
+
if resp.status_code != 200:
|
|
227
|
+
raise Exception(f"API error: {resp.status_code} - {resp.text}")
|
|
228
|
+
|
|
229
|
+
data = resp.json()
|
|
230
|
+
usage = data.get("usage", {})
|
|
231
|
+
input_tokens = usage.get("prompt_tokens", 0)
|
|
232
|
+
output_tokens = usage.get("completion_tokens", 0)
|
|
233
|
+
return LLMResponse(
|
|
234
|
+
content=data["choices"][0]["message"]["content"],
|
|
235
|
+
model=model_config["model_name"],
|
|
236
|
+
input_tokens=input_tokens,
|
|
237
|
+
output_tokens=output_tokens,
|
|
238
|
+
total_tokens=usage.get("total_tokens", input_tokens + output_tokens),
|
|
239
|
+
cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
|
|
240
|
+
success=True,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def _query_openai(self, model_config: Dict, prompt: str) -> LLMResponse:
|
|
244
|
+
api_key = os.getenv(model_config["api_key_env"])
|
|
245
|
+
if not api_key:
|
|
246
|
+
raise ValueError(f"API key not found: {model_config['api_key_env']}")
|
|
247
|
+
|
|
248
|
+
model_name = model_config["model_name"]
|
|
249
|
+
payload: Dict[str, Any] = {
|
|
250
|
+
"model": model_name,
|
|
251
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
252
|
+
"max_completion_tokens": model_config["max_tokens"],
|
|
253
|
+
}
|
|
254
|
+
# o-series reasoning models don't accept temperature
|
|
255
|
+
if not (model_name[0] == "o" and model_name[1:2].isdigit()):
|
|
256
|
+
payload["temperature"] = model_config["temperature"]
|
|
257
|
+
|
|
258
|
+
resp = self.session.post(
|
|
259
|
+
f"{model_config.get('base_url', 'https://api.openai.com/v1')}/chat/completions",
|
|
260
|
+
json=payload,
|
|
261
|
+
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
|
262
|
+
timeout=self.config["defaults"]["timeout"],
|
|
263
|
+
)
|
|
264
|
+
if resp.status_code != 200:
|
|
265
|
+
if resp.status_code == 429:
|
|
266
|
+
try:
|
|
267
|
+
if resp.json().get("error", {}).get("code") == "insufficient_quota":
|
|
268
|
+
raise QuotaExceededError(f"OpenAI quota exceeded ({model_name})")
|
|
269
|
+
except (ValueError, KeyError):
|
|
270
|
+
pass
|
|
271
|
+
raise Exception(f"API error: {resp.status_code} - {resp.text}")
|
|
272
|
+
|
|
273
|
+
data = resp.json()
|
|
274
|
+
usage = data.get("usage", {})
|
|
275
|
+
input_tokens = usage.get("prompt_tokens", 0)
|
|
276
|
+
output_tokens = usage.get("completion_tokens", 0)
|
|
277
|
+
return LLMResponse(
|
|
278
|
+
content=data["choices"][0]["message"]["content"],
|
|
279
|
+
model=model_name,
|
|
280
|
+
input_tokens=input_tokens,
|
|
281
|
+
output_tokens=output_tokens,
|
|
282
|
+
total_tokens=usage.get("total_tokens", input_tokens + output_tokens),
|
|
283
|
+
cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
|
|
284
|
+
success=True,
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
def _query_anthropic(self, model_config: Dict, prompt: str) -> LLMResponse:
|
|
288
|
+
api_key = os.getenv(model_config["api_key_env"])
|
|
289
|
+
if not api_key:
|
|
290
|
+
raise ValueError(f"API key not found: {model_config['api_key_env']}")
|
|
291
|
+
|
|
292
|
+
resp = self.session.post(
|
|
293
|
+
f"{model_config['base_url']}/messages",
|
|
294
|
+
json={
|
|
295
|
+
"model": model_config["model_name"],
|
|
296
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
297
|
+
"max_tokens": model_config["max_tokens"],
|
|
298
|
+
"temperature": model_config["temperature"],
|
|
299
|
+
},
|
|
300
|
+
headers={
|
|
301
|
+
"x-api-key": api_key,
|
|
302
|
+
"Content-Type": "application/json",
|
|
303
|
+
"anthropic-version": "2023-06-01",
|
|
304
|
+
},
|
|
305
|
+
timeout=self.config["defaults"]["timeout"],
|
|
306
|
+
)
|
|
307
|
+
if resp.status_code != 200:
|
|
308
|
+
if resp.status_code in (400, 429):
|
|
309
|
+
try:
|
|
310
|
+
err = resp.json().get("error", {})
|
|
311
|
+
if "credit balance" in err.get("message", "") or err.get("type") == "insufficient_quota":
|
|
312
|
+
raise QuotaExceededError(f"Anthropic quota exhausted ({model_config['model_name']})")
|
|
313
|
+
except (ValueError, KeyError):
|
|
314
|
+
pass
|
|
315
|
+
raise Exception(f"API error: {resp.status_code} - {resp.text}")
|
|
316
|
+
|
|
317
|
+
data = resp.json()
|
|
318
|
+
usage = data.get("usage", {})
|
|
319
|
+
input_tokens = usage.get("input_tokens", 0)
|
|
320
|
+
output_tokens = usage.get("output_tokens", 0)
|
|
321
|
+
return LLMResponse(
|
|
322
|
+
content=data["content"][0]["text"],
|
|
323
|
+
model=model_config["model_name"],
|
|
324
|
+
input_tokens=input_tokens,
|
|
325
|
+
output_tokens=output_tokens,
|
|
326
|
+
total_tokens=input_tokens + output_tokens,
|
|
327
|
+
cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
|
|
328
|
+
success=True,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
def _query_google(self, model_config: Dict, prompt: str) -> LLMResponse:
|
|
332
|
+
api_key = os.getenv(model_config["api_key_env"])
|
|
333
|
+
if not api_key:
|
|
334
|
+
raise ValueError(f"API key not found: {model_config['api_key_env']}")
|
|
335
|
+
|
|
336
|
+
url = f"{model_config['base_url']}/models/{model_config['model_name']}:generateContent?key={api_key}"
|
|
337
|
+
resp = self.session.post(
|
|
338
|
+
url,
|
|
339
|
+
json={
|
|
340
|
+
"contents": [{"parts": [{"text": prompt}]}],
|
|
341
|
+
"generationConfig": {
|
|
342
|
+
"maxOutputTokens": model_config["max_tokens"],
|
|
343
|
+
"temperature": model_config["temperature"],
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
headers={"Content-Type": "application/json"},
|
|
347
|
+
timeout=self.config["defaults"]["timeout"],
|
|
348
|
+
)
|
|
349
|
+
if resp.status_code != 200:
|
|
350
|
+
raise Exception(f"API error: {resp.status_code} - {resp.text}")
|
|
351
|
+
|
|
352
|
+
data = resp.json()
|
|
353
|
+
candidates = data.get("candidates", [])
|
|
354
|
+
if not candidates:
|
|
355
|
+
raise Exception(f"No candidates in response: {data}")
|
|
356
|
+
content = candidates[0].get("content", {}).get("parts", [{}])[0].get("text", "")
|
|
357
|
+
|
|
358
|
+
usage = data.get("usageMetadata", {})
|
|
359
|
+
input_tokens = usage.get("promptTokenCount", 0)
|
|
360
|
+
output_tokens = usage.get("candidatesTokenCount", 0)
|
|
361
|
+
return LLMResponse(
|
|
362
|
+
content=content,
|
|
363
|
+
model=model_config["model_name"],
|
|
364
|
+
input_tokens=input_tokens,
|
|
365
|
+
output_tokens=output_tokens,
|
|
366
|
+
total_tokens=usage.get("totalTokenCount", input_tokens + output_tokens),
|
|
367
|
+
cost=self._calculate_cost(input_tokens, output_tokens, model_config["pricing"]),
|
|
368
|
+
success=True,
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
def _calculate_cost(self, input_tokens: int, output_tokens: int, pricing: Dict) -> float:
|
|
372
|
+
return (input_tokens / 1_000_000) * pricing["input_per_1m_tokens"] + \
|
|
373
|
+
(output_tokens / 1_000_000) * pricing["output_per_1m_tokens"]
|
|
374
|
+
|
|
375
|
+
def list_models(self) -> list:
|
|
376
|
+
return list(self.config["models"].keys())
|
|
377
|
+
|
|
378
|
+
def get_model_info(self, model_alias: str) -> Optional[Dict]:
|
|
379
|
+
return self.config["models"].get(model_alias)
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def main():
|
|
383
|
+
import sys
|
|
384
|
+
|
|
385
|
+
client = LLMClient()
|
|
386
|
+
if len(sys.argv) < 3:
|
|
387
|
+
print("Usage: llm-query <model_alias> <prompt>")
|
|
388
|
+
print("Available models:", ", ".join(client.list_models()))
|
|
389
|
+
sys.exit(1)
|
|
390
|
+
|
|
391
|
+
model_alias = sys.argv[1]
|
|
392
|
+
prompt = " ".join(sys.argv[2:])
|
|
393
|
+
print(f"Querying {model_alias}...")
|
|
394
|
+
response = client.query(model_alias, prompt)
|
|
395
|
+
|
|
396
|
+
if response.success:
|
|
397
|
+
print(f"\nResponse ({response.input_tokens} in + {response.output_tokens} out = {response.total_tokens} tokens, ${response.cost:.6f}):")
|
|
398
|
+
print("-" * 50)
|
|
399
|
+
print(response.content)
|
|
400
|
+
else:
|
|
401
|
+
print(f"Error: {response.error}")
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
if __name__ == "__main__":
|
|
405
|
+
main()
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Quick connectivity check for each supported provider.
|
|
3
|
+
Uses hardcoded minimal configs so it works independently of the user's config file.
|
|
4
|
+
Requires only the relevant API key env var to be set.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
from typing import Optional
|
|
10
|
+
from fj_llm.client import LLMClient, LLMResponse
|
|
11
|
+
|
|
12
|
+
PING_PROMPT = "Reply with exactly one word: pong"
|
|
13
|
+
|
|
14
|
+
_PING_CONFIGS = {
|
|
15
|
+
"openai": {
|
|
16
|
+
"models": {
|
|
17
|
+
"ping": {
|
|
18
|
+
"provider": "openai",
|
|
19
|
+
"model_name": "gpt-4o-mini",
|
|
20
|
+
"api_key_env": "OPENAI_API_KEY",
|
|
21
|
+
"base_url": "https://api.openai.com/v1",
|
|
22
|
+
"max_tokens": 10,
|
|
23
|
+
"temperature": 0.0,
|
|
24
|
+
"pricing": {"input_per_1m_tokens": 0.15, "output_per_1m_tokens": 0.60},
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
|
|
28
|
+
},
|
|
29
|
+
"anthropic": {
|
|
30
|
+
"models": {
|
|
31
|
+
"ping": {
|
|
32
|
+
"provider": "anthropic",
|
|
33
|
+
"model_name": "claude-haiku-4-5-20251001",
|
|
34
|
+
"api_key_env": "ANTHROPIC_API_KEY",
|
|
35
|
+
"base_url": "https://api.anthropic.com/v1",
|
|
36
|
+
"max_tokens": 10,
|
|
37
|
+
"temperature": 0.0,
|
|
38
|
+
"pricing": {"input_per_1m_tokens": 0.80, "output_per_1m_tokens": 4.00},
|
|
39
|
+
}
|
|
40
|
+
},
|
|
41
|
+
"defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
|
|
42
|
+
},
|
|
43
|
+
"deepseek": {
|
|
44
|
+
"models": {
|
|
45
|
+
"ping": {
|
|
46
|
+
"provider": "deepseek",
|
|
47
|
+
"model_name": "deepseek-chat",
|
|
48
|
+
"api_key_env": "DEEPSEEK_API_KEY",
|
|
49
|
+
"base_url": "https://api.deepseek.com/v1",
|
|
50
|
+
"max_tokens": 10,
|
|
51
|
+
"temperature": 0.0,
|
|
52
|
+
"pricing": {"input_per_1m_tokens": 0.14, "output_per_1m_tokens": 0.28},
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
|
|
56
|
+
},
|
|
57
|
+
"google": {
|
|
58
|
+
"models": {
|
|
59
|
+
"ping": {
|
|
60
|
+
"provider": "google",
|
|
61
|
+
"model_name": "gemini-2.5-flash",
|
|
62
|
+
"api_key_env": "GEMINI_API_KEY",
|
|
63
|
+
"base_url": "https://generativelanguage.googleapis.com/v1beta",
|
|
64
|
+
"max_tokens": 100,
|
|
65
|
+
"temperature": 0.0,
|
|
66
|
+
"pricing": {"input_per_1m_tokens": 0.075, "output_per_1m_tokens": 0.30},
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
"defaults": {"retry_attempts": 1, "retry_delay": 0.0, "timeout": 15},
|
|
70
|
+
},
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
SUPPORTED_PROVIDERS = list(_PING_CONFIGS.keys())
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def ping(provider: str) -> LLMResponse:
|
|
77
|
+
"""Send a minimal prompt to the given provider. Raises ValueError for unknown providers."""
|
|
78
|
+
if provider not in _PING_CONFIGS:
|
|
79
|
+
raise ValueError(f"Unknown provider '{provider}'. Choose from: {', '.join(SUPPORTED_PROVIDERS)}")
|
|
80
|
+
client = LLMClient(config_dict=_PING_CONFIGS[provider])
|
|
81
|
+
return client.query("ping", PING_PROMPT)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def main():
|
|
85
|
+
"""CLI entry point: llm-ping [provider ...]"""
|
|
86
|
+
targets = sys.argv[1:] if len(sys.argv) > 1 else SUPPORTED_PROVIDERS
|
|
87
|
+
|
|
88
|
+
unknown = [t for t in targets if t not in _PING_CONFIGS]
|
|
89
|
+
if unknown:
|
|
90
|
+
print(f"Unknown provider(s): {', '.join(unknown)}")
|
|
91
|
+
print(f"Supported: {', '.join(SUPPORTED_PROVIDERS)}")
|
|
92
|
+
sys.exit(1)
|
|
93
|
+
|
|
94
|
+
results = {}
|
|
95
|
+
for provider in targets:
|
|
96
|
+
key_env = _PING_CONFIGS[provider]["models"]["ping"]["api_key_env"]
|
|
97
|
+
if not os.getenv(key_env):
|
|
98
|
+
print(f" {provider:<12} SKIP ({key_env} not set)")
|
|
99
|
+
results[provider] = None
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
resp = ping(provider)
|
|
103
|
+
if resp.success:
|
|
104
|
+
snippet = resp.content.strip().replace("\n", " ")[:40]
|
|
105
|
+
print(f" {provider:<12} OK '{snippet}' ({resp.input_tokens}in/{resp.output_tokens}out ${resp.cost:.6f})")
|
|
106
|
+
else:
|
|
107
|
+
print(f" {provider:<12} FAIL {resp.error}")
|
|
108
|
+
results[provider] = resp
|
|
109
|
+
|
|
110
|
+
failed = [p for p, r in results.items() if r is not None and not r.success]
|
|
111
|
+
if failed:
|
|
112
|
+
sys.exit(1)
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fj-llm
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Lightweight, config-driven client for multiple LLM providers
|
|
5
|
+
Author-email: Simon Bloch <simon.j.bloch@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
Requires-Dist: requests>=2.25.0
|
|
10
|
+
Requires-Dist: pyyaml>=5.4.0
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
13
|
+
|
|
14
|
+
# fj-llm
|
|
15
|
+
|
|
16
|
+
Lightweight, config-driven Python client for multiple LLM providers. One interface, any provider — no provider SDK required.
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```
|
|
21
|
+
pip install fj-llm
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Providers
|
|
25
|
+
|
|
26
|
+
| Provider | Config key | Env var |
|
|
27
|
+
|-----------|--------------|----------------------|
|
|
28
|
+
| OpenAI | `openai` | `OPENAI_API_KEY` |
|
|
29
|
+
| Anthropic | `anthropic` | `ANTHROPIC_API_KEY` |
|
|
30
|
+
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
|
|
31
|
+
| Google | `google` | `GOOGLE_API_KEY` |
|
|
32
|
+
|
|
33
|
+
## Configuration
|
|
34
|
+
|
|
35
|
+
On first use, a default config is created at `~/.config/fj_llm/config.yaml`. Edit it to add your API keys and define model aliases:
|
|
36
|
+
|
|
37
|
+
```yaml
|
|
38
|
+
models:
|
|
39
|
+
gpt-best:
|
|
40
|
+
provider: openai
|
|
41
|
+
model_name: gpt-4o
|
|
42
|
+
api_key_env: OPENAI_API_KEY
|
|
43
|
+
base_url: https://api.openai.com/v1
|
|
44
|
+
max_tokens: 4000
|
|
45
|
+
temperature: 0.1
|
|
46
|
+
pricing:
|
|
47
|
+
input_per_1m_tokens: 2.50
|
|
48
|
+
output_per_1m_tokens: 10.00
|
|
49
|
+
fallback: gpt-light # optional: alias to use on quota exhaustion
|
|
50
|
+
|
|
51
|
+
gpt-light:
|
|
52
|
+
provider: openai
|
|
53
|
+
model_name: gpt-4o-mini
|
|
54
|
+
api_key_env: OPENAI_API_KEY
|
|
55
|
+
base_url: https://api.openai.com/v1
|
|
56
|
+
max_tokens: 4000
|
|
57
|
+
temperature: 0.1
|
|
58
|
+
pricing:
|
|
59
|
+
input_per_1m_tokens: 0.15
|
|
60
|
+
output_per_1m_tokens: 0.60
|
|
61
|
+
|
|
62
|
+
defaults:
|
|
63
|
+
retry_attempts: 3
|
|
64
|
+
retry_delay: 1.0
|
|
65
|
+
timeout: 30
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
For Cloud Functions or other environments without filesystem access, set the `FJ_LLM_CONFIG` environment variable to a JSON string of the same structure.
|
|
69
|
+
|
|
70
|
+
## Usage
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from fj_llm import LLMClient
|
|
74
|
+
|
|
75
|
+
client = LLMClient()
|
|
76
|
+
response = client.query("gpt-best", "Summarise this in one sentence.", context=long_text)
|
|
77
|
+
|
|
78
|
+
if response.success:
|
|
79
|
+
print(response.content)
|
|
80
|
+
print(f"Cost: ${response.cost:.6f}")
|
|
81
|
+
else:
|
|
82
|
+
print(f"Error: {response.error}")
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Cost logging
|
|
86
|
+
|
|
87
|
+
Every successful call is appended as a JSONL record to `~/.local/share/fj_llm/costs.jsonl`. Override the path via the `FJ_LLM_COST_LOG` env var, or set `cost_log` in the config file.
|
|
88
|
+
|
|
89
|
+
### CLI
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
llm-query gpt-best "What is the capital of France?"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## License
|
|
96
|
+
|
|
97
|
+
MIT
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
fj_llm/__init__.py
|
|
4
|
+
fj_llm/client.py
|
|
5
|
+
fj_llm/ping.py
|
|
6
|
+
fj_llm.egg-info/PKG-INFO
|
|
7
|
+
fj_llm.egg-info/SOURCES.txt
|
|
8
|
+
fj_llm.egg-info/dependency_links.txt
|
|
9
|
+
fj_llm.egg-info/entry_points.txt
|
|
10
|
+
fj_llm.egg-info/requires.txt
|
|
11
|
+
fj_llm.egg-info/top_level.txt
|
|
12
|
+
tests/test_client.py
|
|
13
|
+
tests/test_integration.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
fj_llm
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "fj-llm"
|
|
7
|
+
version = "0.2.0"
|
|
8
|
+
description = "Lightweight, config-driven client for multiple LLM providers"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
authors = [{ name = "Simon Bloch", email = "simon.j.bloch@gmail.com" }]
|
|
12
|
+
requires-python = ">=3.9"
|
|
13
|
+
dependencies = [
|
|
14
|
+
"requests>=2.25.0",
|
|
15
|
+
"pyyaml>=5.4.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.scripts]
|
|
19
|
+
llm-query = "fj_llm.client:main"
|
|
20
|
+
llm-ping = "fj_llm.ping:main"
|
|
21
|
+
|
|
22
|
+
[project.optional-dependencies]
|
|
23
|
+
dev = ["pytest>=7.0"]
|
|
24
|
+
|
|
25
|
+
[tool.setuptools.packages.find]
|
|
26
|
+
where = ["."]
|
|
27
|
+
include = ["fj_llm*"]
|
|
28
|
+
|
|
29
|
+
[tool.pytest.ini_options]
|
|
30
|
+
testpaths = ["tests"]
|
fj_llm-0.2.0/setup.cfg
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""Unit tests for fj_llm.client — all HTTP calls are mocked."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import pytest
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from unittest.mock import MagicMock, patch
|
|
7
|
+
|
|
8
|
+
from fj_llm import LLMClient, LLMResponse
|
|
9
|
+
from fj_llm.client import QuotaExceededError
|
|
10
|
+
|
|
11
|
+
MINIMAL_CONFIG = {
|
|
12
|
+
"models": {
|
|
13
|
+
"test-openai": {
|
|
14
|
+
"provider": "openai",
|
|
15
|
+
"model_name": "gpt-4o-mini",
|
|
16
|
+
"api_key_env": "OPENAI_API_KEY",
|
|
17
|
+
"base_url": "https://api.openai.com/v1",
|
|
18
|
+
"max_tokens": 100,
|
|
19
|
+
"temperature": 0.1,
|
|
20
|
+
"pricing": {"input_per_1m_tokens": 0.15, "output_per_1m_tokens": 0.60},
|
|
21
|
+
},
|
|
22
|
+
"test-anthropic": {
|
|
23
|
+
"provider": "anthropic",
|
|
24
|
+
"model_name": "claude-haiku-4-5-20251001",
|
|
25
|
+
"api_key_env": "ANTHROPIC_API_KEY",
|
|
26
|
+
"base_url": "https://api.anthropic.com/v1",
|
|
27
|
+
"max_tokens": 100,
|
|
28
|
+
"temperature": 0.1,
|
|
29
|
+
"pricing": {"input_per_1m_tokens": 0.80, "output_per_1m_tokens": 4.00},
|
|
30
|
+
},
|
|
31
|
+
"test-deepseek": {
|
|
32
|
+
"provider": "deepseek",
|
|
33
|
+
"model_name": "deepseek-chat",
|
|
34
|
+
"api_key_env": "DEEPSEEK_API_KEY",
|
|
35
|
+
"base_url": "https://api.deepseek.com/v1",
|
|
36
|
+
"max_tokens": 100,
|
|
37
|
+
"temperature": 0.1,
|
|
38
|
+
"pricing": {"input_per_1m_tokens": 0.14, "output_per_1m_tokens": 0.28},
|
|
39
|
+
},
|
|
40
|
+
"test-google": {
|
|
41
|
+
"provider": "google",
|
|
42
|
+
"model_name": "gemini-1.5-flash",
|
|
43
|
+
"api_key_env": "GOOGLE_API_KEY",
|
|
44
|
+
"base_url": "https://generativelanguage.googleapis.com/v1beta",
|
|
45
|
+
"max_tokens": 100,
|
|
46
|
+
"temperature": 0.1,
|
|
47
|
+
"pricing": {"input_per_1m_tokens": 0.075, "output_per_1m_tokens": 0.30},
|
|
48
|
+
},
|
|
49
|
+
"test-with-fallback": {
|
|
50
|
+
"provider": "openai",
|
|
51
|
+
"model_name": "gpt-4o",
|
|
52
|
+
"api_key_env": "OPENAI_API_KEY",
|
|
53
|
+
"base_url": "https://api.openai.com/v1",
|
|
54
|
+
"max_tokens": 100,
|
|
55
|
+
"temperature": 0.1,
|
|
56
|
+
"pricing": {"input_per_1m_tokens": 2.50, "output_per_1m_tokens": 10.00},
|
|
57
|
+
"fallback": "test-openai",
|
|
58
|
+
},
|
|
59
|
+
},
|
|
60
|
+
"defaults": {"retry_attempts": 2, "retry_delay": 0.0, "timeout": 10},
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def make_client(**kwargs):
|
|
65
|
+
return LLMClient(config_dict=MINIMAL_CONFIG, **kwargs)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def openai_response(content="hello", in_tok=10, out_tok=5):
|
|
69
|
+
mock = MagicMock()
|
|
70
|
+
mock.status_code = 200
|
|
71
|
+
mock.json.return_value = {
|
|
72
|
+
"choices": [{"message": {"content": content}}],
|
|
73
|
+
"usage": {"prompt_tokens": in_tok, "completion_tokens": out_tok, "total_tokens": in_tok + out_tok},
|
|
74
|
+
}
|
|
75
|
+
return mock
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def anthropic_response(content="hello", in_tok=10, out_tok=5):
|
|
79
|
+
mock = MagicMock()
|
|
80
|
+
mock.status_code = 200
|
|
81
|
+
mock.json.return_value = {
|
|
82
|
+
"content": [{"text": content}],
|
|
83
|
+
"usage": {"input_tokens": in_tok, "output_tokens": out_tok},
|
|
84
|
+
}
|
|
85
|
+
return mock
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def deepseek_response(content="hello", in_tok=10, out_tok=5):
|
|
89
|
+
return openai_response(content, in_tok, out_tok)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def google_response(content="hello", in_tok=10, out_tok=5):
|
|
93
|
+
mock = MagicMock()
|
|
94
|
+
mock.status_code = 200
|
|
95
|
+
mock.json.return_value = {
|
|
96
|
+
"candidates": [{"content": {"parts": [{"text": content}]}}],
|
|
97
|
+
"usageMetadata": {"promptTokenCount": in_tok, "candidatesTokenCount": out_tok, "totalTokenCount": in_tok + out_tok},
|
|
98
|
+
}
|
|
99
|
+
return mock
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class TestConfigLoading:
|
|
103
|
+
def test_config_dict_used_directly(self):
|
|
104
|
+
client = make_client()
|
|
105
|
+
assert "test-openai" in client.config["models"]
|
|
106
|
+
|
|
107
|
+
def test_env_var_config(self, monkeypatch):
|
|
108
|
+
monkeypatch.setenv("FJ_LLM_CONFIG", json.dumps(MINIMAL_CONFIG))
|
|
109
|
+
client = LLMClient(config_path="/nonexistent/path.yaml")
|
|
110
|
+
assert "test-openai" in client.config["models"]
|
|
111
|
+
|
|
112
|
+
def test_unknown_model_returns_error(self):
|
|
113
|
+
client = make_client()
|
|
114
|
+
response = client.query("nonexistent", "hello")
|
|
115
|
+
assert not response.success
|
|
116
|
+
assert "Unknown model alias" in response.error
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class TestCostCalculation:
|
|
120
|
+
def test_cost_calculation(self):
|
|
121
|
+
client = make_client()
|
|
122
|
+
pricing = {"input_per_1m_tokens": 2.0, "output_per_1m_tokens": 10.0}
|
|
123
|
+
cost = client._calculate_cost(1_000_000, 500_000, pricing)
|
|
124
|
+
assert abs(cost - 7.0) < 1e-9
|
|
125
|
+
|
|
126
|
+
def test_zero_tokens(self):
|
|
127
|
+
client = make_client()
|
|
128
|
+
pricing = {"input_per_1m_tokens": 2.0, "output_per_1m_tokens": 10.0}
|
|
129
|
+
assert client._calculate_cost(0, 0, pricing) == 0.0
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class TestOpenAIProvider:
|
|
133
|
+
def test_successful_query(self, monkeypatch):
|
|
134
|
+
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
|
135
|
+
client = make_client()
|
|
136
|
+
with patch.object(client.session, "post", return_value=openai_response("world", 20, 10)):
|
|
137
|
+
resp = client.query("test-openai", "hello")
|
|
138
|
+
assert resp.success
|
|
139
|
+
assert resp.content == "world"
|
|
140
|
+
assert resp.input_tokens == 20
|
|
141
|
+
assert resp.output_tokens == 10
|
|
142
|
+
assert resp.cost > 0
|
|
143
|
+
|
|
144
|
+
def test_quota_exceeded_raises(self, monkeypatch):
|
|
145
|
+
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
|
146
|
+
client = make_client()
|
|
147
|
+
mock_resp = MagicMock()
|
|
148
|
+
mock_resp.status_code = 429
|
|
149
|
+
mock_resp.json.return_value = {"error": {"code": "insufficient_quota"}}
|
|
150
|
+
mock_resp.text = "quota"
|
|
151
|
+
with patch.object(client.session, "post", return_value=mock_resp):
|
|
152
|
+
resp = client.query("test-openai", "hello")
|
|
153
|
+
assert not resp.success
|
|
154
|
+
assert "Quota exceeded" in resp.error
|
|
155
|
+
|
|
156
|
+
def test_fallback_on_quota(self, monkeypatch):
|
|
157
|
+
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
|
158
|
+
client = make_client()
|
|
159
|
+
quota_resp = MagicMock()
|
|
160
|
+
quota_resp.status_code = 429
|
|
161
|
+
quota_resp.json.return_value = {"error": {"code": "insufficient_quota"}}
|
|
162
|
+
quota_resp.text = "quota"
|
|
163
|
+
with patch.object(client.session, "post", side_effect=[quota_resp, openai_response("fallback!")]):
|
|
164
|
+
resp = client.query("test-with-fallback", "hello")
|
|
165
|
+
assert resp.success
|
|
166
|
+
assert resp.content == "fallback!"
|
|
167
|
+
|
|
168
|
+
def test_missing_api_key(self, monkeypatch):
|
|
169
|
+
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
|
170
|
+
client = make_client()
|
|
171
|
+
resp = client.query("test-openai", "hello")
|
|
172
|
+
assert not resp.success
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class TestAnthropicProvider:
|
|
176
|
+
def test_successful_query(self, monkeypatch):
|
|
177
|
+
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
|
178
|
+
client = make_client()
|
|
179
|
+
with patch.object(client.session, "post", return_value=anthropic_response("claude says hi", 15, 8)):
|
|
180
|
+
resp = client.query("test-anthropic", "hello")
|
|
181
|
+
assert resp.success
|
|
182
|
+
assert resp.content == "claude says hi"
|
|
183
|
+
assert resp.total_tokens == 23
|
|
184
|
+
|
|
185
|
+
def test_quota_exceeded(self, monkeypatch):
|
|
186
|
+
monkeypatch.setenv("ANTHROPIC_API_KEY", "test-key")
|
|
187
|
+
client = make_client()
|
|
188
|
+
mock_resp = MagicMock()
|
|
189
|
+
mock_resp.status_code = 400
|
|
190
|
+
mock_resp.json.return_value = {"error": {"message": "credit balance too low", "type": "invalid_request_error"}}
|
|
191
|
+
mock_resp.text = "credits"
|
|
192
|
+
with patch.object(client.session, "post", return_value=mock_resp):
|
|
193
|
+
resp = client.query("test-anthropic", "hello")
|
|
194
|
+
assert not resp.success
|
|
195
|
+
assert "Quota exceeded" in resp.error
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
class TestDeepSeekProvider:
|
|
199
|
+
def test_successful_query(self, monkeypatch):
|
|
200
|
+
monkeypatch.setenv("DEEPSEEK_API_KEY", "test-key")
|
|
201
|
+
client = make_client()
|
|
202
|
+
with patch.object(client.session, "post", return_value=deepseek_response("deepseek reply", 12, 6)):
|
|
203
|
+
resp = client.query("test-deepseek", "hello")
|
|
204
|
+
assert resp.success
|
|
205
|
+
assert resp.content == "deepseek reply"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class TestGoogleProvider:
|
|
209
|
+
def test_successful_query(self, monkeypatch):
|
|
210
|
+
monkeypatch.setenv("GOOGLE_API_KEY", "test-key")
|
|
211
|
+
client = make_client()
|
|
212
|
+
with patch.object(client.session, "post", return_value=google_response("gemini reply", 8, 4)):
|
|
213
|
+
resp = client.query("test-google", "hello")
|
|
214
|
+
assert resp.success
|
|
215
|
+
assert resp.content == "gemini reply"
|
|
216
|
+
assert resp.input_tokens == 8
|
|
217
|
+
|
|
218
|
+
def test_empty_candidates_raises(self, monkeypatch):
|
|
219
|
+
monkeypatch.setenv("GOOGLE_API_KEY", "test-key")
|
|
220
|
+
client = make_client()
|
|
221
|
+
mock_resp = MagicMock()
|
|
222
|
+
mock_resp.status_code = 200
|
|
223
|
+
mock_resp.json.return_value = {"candidates": [], "usageMetadata": {}}
|
|
224
|
+
with patch.object(client.session, "post", return_value=mock_resp):
|
|
225
|
+
resp = client.query("test-google", "hello")
|
|
226
|
+
assert not resp.success
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class TestRetryBehavior:
|
|
230
|
+
def test_retries_on_transient_error(self, monkeypatch):
|
|
231
|
+
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
|
232
|
+
client = make_client()
|
|
233
|
+
err_resp = MagicMock()
|
|
234
|
+
err_resp.status_code = 500
|
|
235
|
+
err_resp.text = "server error"
|
|
236
|
+
with patch.object(client.session, "post", side_effect=[
|
|
237
|
+
Exception("transient"), openai_response("ok")
|
|
238
|
+
]):
|
|
239
|
+
resp = client.query("test-openai", "hello")
|
|
240
|
+
assert resp.success
|
|
241
|
+
|
|
242
|
+
def test_exhausted_retries_returns_error(self, monkeypatch):
|
|
243
|
+
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
|
244
|
+
client = make_client()
|
|
245
|
+
with patch.object(client.session, "post", side_effect=Exception("always fails")):
|
|
246
|
+
resp = client.query("test-openai", "hello")
|
|
247
|
+
assert not resp.success
|
|
248
|
+
assert "Failed after" in resp.error
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class TestCostLog:
|
|
252
|
+
def test_cost_log_written(self, monkeypatch, tmp_path):
|
|
253
|
+
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
|
|
254
|
+
log_path = tmp_path / "costs.jsonl"
|
|
255
|
+
monkeypatch.setenv("FJ_LLM_COST_LOG", str(log_path))
|
|
256
|
+
client = LLMClient(config_dict=MINIMAL_CONFIG)
|
|
257
|
+
with patch.object(client.session, "post", return_value=openai_response()):
|
|
258
|
+
client.query("test-openai", "hello")
|
|
259
|
+
assert log_path.exists()
|
|
260
|
+
record = json.loads(log_path.read_text().strip())
|
|
261
|
+
assert record["alias"] == "test-openai"
|
|
262
|
+
assert record["cost_usd"] > 0
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Integration tests — hit real APIs. Skipped automatically if the API key env var is not set.
|
|
3
|
+
Run explicitly: pytest tests/test_integration.py -v
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import pytest
|
|
8
|
+
from fj_llm.ping import ping
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _key(provider: str) -> str:
|
|
12
|
+
keys = {
|
|
13
|
+
"openai": "OPENAI_API_KEY",
|
|
14
|
+
"anthropic": "ANTHROPIC_API_KEY",
|
|
15
|
+
"deepseek": "DEEPSEEK_API_KEY",
|
|
16
|
+
"google": "GEMINI_API_KEY",
|
|
17
|
+
}
|
|
18
|
+
return keys[provider]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def integration(provider: str):
|
|
22
|
+
"""Decorator: skip the test if the provider's API key is not set."""
|
|
23
|
+
return pytest.mark.skipif(
|
|
24
|
+
not os.getenv(_key(provider)),
|
|
25
|
+
reason=f"{_key(provider)} not set",
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@integration("openai")
|
|
30
|
+
def test_ping_openai():
|
|
31
|
+
resp = ping("openai")
|
|
32
|
+
assert resp.success, resp.error
|
|
33
|
+
assert resp.content.strip()
|
|
34
|
+
assert resp.input_tokens and resp.input_tokens > 0
|
|
35
|
+
assert resp.cost and resp.cost > 0
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@integration("anthropic")
|
|
39
|
+
def test_ping_anthropic():
|
|
40
|
+
resp = ping("anthropic")
|
|
41
|
+
assert resp.success, resp.error
|
|
42
|
+
assert resp.content.strip()
|
|
43
|
+
assert resp.input_tokens and resp.input_tokens > 0
|
|
44
|
+
assert resp.cost and resp.cost > 0
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@integration("deepseek")
|
|
48
|
+
def test_ping_deepseek():
|
|
49
|
+
resp = ping("deepseek")
|
|
50
|
+
assert resp.success, resp.error
|
|
51
|
+
assert resp.content.strip()
|
|
52
|
+
assert resp.input_tokens and resp.input_tokens > 0
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@integration("google")
|
|
56
|
+
def test_ping_google():
|
|
57
|
+
resp = ping("google")
|
|
58
|
+
assert resp.success, resp.error
|
|
59
|
+
assert resp.content.strip()
|
|
60
|
+
assert resp.input_tokens and resp.input_tokens > 0
|