nthlayer-common 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nthlayer_common/llm.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified LLM interface for NthLayer agentic components.
|
|
3
|
+
|
|
4
|
+
Two API formats cover the entire market:
|
|
5
|
+
- Anthropic Messages API (Anthropic only)
|
|
6
|
+
- OpenAI Chat Completions API (everyone else)
|
|
7
|
+
|
|
8
|
+
No third-party LLM libraries. No LiteLLM.
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
from nthlayer_common.llm import llm_call
|
|
12
|
+
|
|
13
|
+
response = llm_call(
|
|
14
|
+
system="You are a triage agent...",
|
|
15
|
+
user="Evaluate this incident...",
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
Configuration via environment:
|
|
19
|
+
NTHLAYER_MODEL - provider/model (default: anthropic/claude-sonnet-4-20250514)
|
|
20
|
+
NTHLAYER_LLM_TIMEOUT - seconds (default: 60)
|
|
21
|
+
ANTHROPIC_API_KEY - for anthropic/* models
|
|
22
|
+
OPENAI_API_KEY - for openai/*, together/*, groq/*, mistral/*, azure/* models
|
|
23
|
+
OPENAI_API_BASE - override endpoint URL for any provider
|
|
24
|
+
AZURE_OPENAI_ENDPOINT - Azure OpenAI resource URL
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import os
|
|
28
|
+
import json
|
|
29
|
+
import httpx
|
|
30
|
+
from dataclasses import dataclass
|
|
31
|
+
|
|
32
|
+
DEFAULT_MODEL = os.environ.get("NTHLAYER_MODEL", "anthropic/claude-sonnet-4-20250514")
|
|
33
|
+
try:
|
|
34
|
+
TIMEOUT = int(os.environ.get("NTHLAYER_LLM_TIMEOUT", "60"))
|
|
35
|
+
except (ValueError, TypeError):
|
|
36
|
+
TIMEOUT = 60
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class LLMResponse:
|
|
41
|
+
"""Response from an LLM call."""
|
|
42
|
+
text: str # The response content
|
|
43
|
+
model: str # Model that was used
|
|
44
|
+
provider: str # Provider that was used
|
|
45
|
+
input_tokens: int | None = None # Token count for input (if available)
|
|
46
|
+
output_tokens: int | None = None # Token count for output (if available)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class LLMError(Exception):
|
|
50
|
+
"""Raised when an LLM call fails."""
|
|
51
|
+
def __init__(self, message: str, provider: str, model: str, cause: Exception | None = None):
|
|
52
|
+
self.provider = provider
|
|
53
|
+
self.model = model
|
|
54
|
+
self.cause = cause
|
|
55
|
+
super().__init__(f"[{provider}/{model}] {message}")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def llm_call(
|
|
59
|
+
system: str,
|
|
60
|
+
user: str,
|
|
61
|
+
model: str | None = None,
|
|
62
|
+
max_tokens: int = 2000,
|
|
63
|
+
timeout: int | None = None,
|
|
64
|
+
) -> LLMResponse:
|
|
65
|
+
"""
|
|
66
|
+
Unified LLM call for all NthLayer agentic components.
|
|
67
|
+
|
|
68
|
+
Model format: "provider/model-name"
|
|
69
|
+
- anthropic/claude-sonnet-4-20250514
|
|
70
|
+
- openai/gpt-4o
|
|
71
|
+
- ollama/llama3.1
|
|
72
|
+
- azure/my-deployment
|
|
73
|
+
- together/meta-llama/Llama-3-70b
|
|
74
|
+
- groq/llama-3.1-70b-versatile
|
|
75
|
+
- mistral/mistral-large-latest
|
|
76
|
+
- vllm/my-model
|
|
77
|
+
- lmstudio/my-model
|
|
78
|
+
- custom/my-model (with OPENAI_API_BASE set)
|
|
79
|
+
|
|
80
|
+
Provider determines the API format and endpoint:
|
|
81
|
+
- "anthropic/*" -> Anthropic Messages API
|
|
82
|
+
- Everything else -> OpenAI-compatible Chat Completions API
|
|
83
|
+
|
|
84
|
+
Returns LLMResponse with the text content, model, and provider.
|
|
85
|
+
Raises LLMError on failure with provider/model context.
|
|
86
|
+
|
|
87
|
+
Note: callers that wrap llm_call() in asyncio.wait_for(timeout=T) should
|
|
88
|
+
use the same timeout value. httpx fires the network timeout first; the
|
|
89
|
+
asyncio.wait_for is a safety net for thread scheduling delays.
|
|
90
|
+
"""
|
|
91
|
+
model = model or DEFAULT_MODEL
|
|
92
|
+
_timeout = timeout if timeout is not None else TIMEOUT
|
|
93
|
+
|
|
94
|
+
# Parse provider from model string
|
|
95
|
+
if "/" in model:
|
|
96
|
+
provider, _, model_name = model.partition("/")
|
|
97
|
+
else:
|
|
98
|
+
# Bare model name - guess provider from known prefixes
|
|
99
|
+
provider = _guess_provider(model)
|
|
100
|
+
model_name = model
|
|
101
|
+
|
|
102
|
+
try:
|
|
103
|
+
if provider == "anthropic":
|
|
104
|
+
text, in_tok, out_tok = _call_anthropic(system, user, model_name, max_tokens, _timeout)
|
|
105
|
+
else:
|
|
106
|
+
text, in_tok, out_tok = _call_openai_compat(system, user, model_name, provider, max_tokens, _timeout)
|
|
107
|
+
|
|
108
|
+
return LLMResponse(
|
|
109
|
+
text=text, model=model_name, provider=provider,
|
|
110
|
+
input_tokens=in_tok, output_tokens=out_tok,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
except httpx.HTTPStatusError as e:
|
|
114
|
+
raise LLMError(
|
|
115
|
+
f"HTTP {e.response.status_code}: {e.response.text[:200]}",
|
|
116
|
+
provider, model_name, e,
|
|
117
|
+
) from e
|
|
118
|
+
except httpx.TimeoutException as e:
|
|
119
|
+
raise LLMError(
|
|
120
|
+
f"Timeout after {_timeout}s",
|
|
121
|
+
provider, model_name, e,
|
|
122
|
+
) from e
|
|
123
|
+
except Exception as e:
|
|
124
|
+
if isinstance(e, LLMError):
|
|
125
|
+
raise
|
|
126
|
+
raise LLMError(str(e), provider, model_name, e) from e
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _call_anthropic(system: str, user: str, model: str, max_tokens: int, timeout: int) -> tuple[str, int | None, int | None]:
|
|
130
|
+
"""Call Anthropic Messages API."""
|
|
131
|
+
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
|
132
|
+
if not api_key:
|
|
133
|
+
raise LLMError("ANTHROPIC_API_KEY not set", "anthropic", model)
|
|
134
|
+
|
|
135
|
+
response = httpx.post(
|
|
136
|
+
"https://api.anthropic.com/v1/messages",
|
|
137
|
+
headers={
|
|
138
|
+
"x-api-key": api_key,
|
|
139
|
+
"anthropic-version": "2023-06-01",
|
|
140
|
+
"content-type": "application/json",
|
|
141
|
+
},
|
|
142
|
+
json={
|
|
143
|
+
"model": model,
|
|
144
|
+
"max_tokens": max_tokens,
|
|
145
|
+
"system": system,
|
|
146
|
+
"messages": [{"role": "user", "content": user}],
|
|
147
|
+
},
|
|
148
|
+
timeout=timeout,
|
|
149
|
+
)
|
|
150
|
+
response.raise_for_status()
|
|
151
|
+
data = response.json()
|
|
152
|
+
content = data.get("content", [])
|
|
153
|
+
if not content:
|
|
154
|
+
raise LLMError("Model returned empty content", "anthropic", model)
|
|
155
|
+
text = content[0].get("text", "")
|
|
156
|
+
usage = data.get("usage", {})
|
|
157
|
+
return text, usage.get("input_tokens"), usage.get("output_tokens")
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _call_openai_compat(
|
|
161
|
+
system: str, user: str, model: str, provider: str, max_tokens: int, timeout: int
|
|
162
|
+
) -> tuple[str, int | None, int | None]:
|
|
163
|
+
"""
|
|
164
|
+
Call OpenAI-compatible Chat Completions API.
|
|
165
|
+
|
|
166
|
+
Works with: OpenAI, Azure OpenAI, Ollama, vLLM, Together AI,
|
|
167
|
+
Groq, Mistral, LM Studio, any OpenAI-compatible server.
|
|
168
|
+
"""
|
|
169
|
+
base_url = os.environ.get("OPENAI_API_BASE") or _default_base_url(provider)
|
|
170
|
+
if not base_url and provider == "azure":
|
|
171
|
+
raise LLMError("AZURE_OPENAI_ENDPOINT not set", "azure", model)
|
|
172
|
+
api_key = os.environ.get("OPENAI_API_KEY", "not-needed") # Ollama/vLLM don't require keys
|
|
173
|
+
|
|
174
|
+
# Azure uses api-key header; everything else uses Bearer token
|
|
175
|
+
if provider == "azure":
|
|
176
|
+
headers = {
|
|
177
|
+
"api-key": api_key,
|
|
178
|
+
"content-type": "application/json",
|
|
179
|
+
}
|
|
180
|
+
url = f"{base_url}/{model}/chat/completions?api-version=2024-02-01"
|
|
181
|
+
else:
|
|
182
|
+
headers = {
|
|
183
|
+
"Authorization": f"Bearer {api_key}",
|
|
184
|
+
"content-type": "application/json",
|
|
185
|
+
}
|
|
186
|
+
url = f"{base_url}/chat/completions"
|
|
187
|
+
|
|
188
|
+
response = httpx.post(
|
|
189
|
+
url,
|
|
190
|
+
headers=headers,
|
|
191
|
+
json={
|
|
192
|
+
"model": model,
|
|
193
|
+
"max_tokens": max_tokens,
|
|
194
|
+
"messages": [
|
|
195
|
+
{"role": "system", "content": system},
|
|
196
|
+
{"role": "user", "content": user},
|
|
197
|
+
],
|
|
198
|
+
},
|
|
199
|
+
timeout=timeout,
|
|
200
|
+
)
|
|
201
|
+
response.raise_for_status()
|
|
202
|
+
data = response.json()
|
|
203
|
+
choices = data.get("choices", [])
|
|
204
|
+
if not choices:
|
|
205
|
+
raise LLMError("Model returned empty choices", provider, model)
|
|
206
|
+
text = (choices[0].get("message") or {}).get("content", "")
|
|
207
|
+
usage = data.get("usage", {})
|
|
208
|
+
return text, usage.get("prompt_tokens"), usage.get("completion_tokens")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _default_base_url(provider: str) -> str:
|
|
212
|
+
"""Default API base URLs by provider."""
|
|
213
|
+
defaults = {
|
|
214
|
+
"openai": "https://api.openai.com/v1",
|
|
215
|
+
"ollama": "http://localhost:11434/v1",
|
|
216
|
+
"vllm": "http://localhost:8000/v1",
|
|
217
|
+
"lmstudio": "http://localhost:1234/v1",
|
|
218
|
+
"together": "https://api.together.xyz/v1",
|
|
219
|
+
"groq": "https://api.groq.com/openai/v1",
|
|
220
|
+
"mistral": "https://api.mistral.ai/v1",
|
|
221
|
+
"azure": os.environ.get("AZURE_OPENAI_ENDPOINT", ""),
|
|
222
|
+
}
|
|
223
|
+
return defaults.get(provider, "https://api.openai.com/v1")
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def _guess_provider(model: str) -> str:
|
|
227
|
+
"""Guess provider from bare model name."""
|
|
228
|
+
if model.startswith("claude"):
|
|
229
|
+
return "anthropic"
|
|
230
|
+
if model.startswith("gpt") or model.startswith("o1") or model.startswith("o3"):
|
|
231
|
+
return "openai"
|
|
232
|
+
if model.startswith("llama") or model.startswith("mistral") or model.startswith("gemma"):
|
|
233
|
+
return "ollama"
|
|
234
|
+
return "openai" # Default: assume OpenAI-compatible
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Shared parsing utilities for LLM responses.
|
|
2
|
+
|
|
3
|
+
Used by every component that calls llm_call() and parses the response.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def strip_markdown_fences(text: str) -> str:
|
|
9
|
+
"""Strip markdown code fences from model response text.
|
|
10
|
+
|
|
11
|
+
Handles ```json, ```, and bare ``` patterns.
|
|
12
|
+
"""
|
|
13
|
+
text = text.strip()
|
|
14
|
+
if text.startswith("```"):
|
|
15
|
+
lines = text.split("\n")
|
|
16
|
+
if lines[0].strip().startswith("```"):
|
|
17
|
+
lines = lines[1:]
|
|
18
|
+
if lines and lines[-1].strip().startswith("```"):
|
|
19
|
+
lines = lines[:-1]
|
|
20
|
+
text = "\n".join(lines)
|
|
21
|
+
return text
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def clamp(value: float, low: float = 0.0, high: float = 1.0) -> float:
|
|
25
|
+
"""Clamp a value to [low, high]. Default: [0.0, 1.0]."""
|
|
26
|
+
return max(low, min(high, value))
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nthlayer-common
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared utilities for the NthLayer ecosystem
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: httpx>=0.27
|
|
8
|
+
Provides-Extra: dev
|
|
9
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
10
|
+
Requires-Dist: ruff>=0.8; extra == "dev"
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
nthlayer_common/__init__.py,sha256=65P-cbfiQUFnjUKdhQ5IP7YL6mti6RLBmUZVnMhf2Qs,214
|
|
2
|
+
nthlayer_common/llm.py,sha256=HC4GccvpVmZIovUmN4KacBNxX8YzWxnTQlN6PQKuvPM,8101
|
|
3
|
+
nthlayer_common/parsing.py,sha256=kKS2na_oiGh8mpVZXVwwfU7mWF85YZ-JYXvxhZ1Hgac,797
|
|
4
|
+
nthlayer_common-0.1.0.dist-info/METADATA,sha256=fU4n4aJ3GnWUkS5N2KiN1aIvLZC9Mwe-8WOasVYoH8Y,287
|
|
5
|
+
nthlayer_common-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
6
|
+
nthlayer_common-0.1.0.dist-info/top_level.txt,sha256=cbYVPV48OGCotmiiFFn1AOVXwj7nzwClfw89cprZJ0w,16
|
|
7
|
+
nthlayer_common-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
nthlayer_common
|