skill-cert 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adapters/__init__.py +0 -0
- adapters/anthropic_compat.py +129 -0
- adapters/base.py +106 -0
- adapters/factory.py +65 -0
- adapters/openai_compat.py +200 -0
- adapters/pricing.py +73 -0
- engine/__init__.py +4 -0
- engine/adversarial.py +262 -0
- engine/analyzer.py +622 -0
- engine/calibration.py +222 -0
- engine/config.py +265 -0
- engine/constants.py +129 -0
- engine/deadline.py +155 -0
- engine/dialogue_evaluator.py +518 -0
- engine/dialogue_runner.py +194 -0
- engine/drift.py +325 -0
- engine/envelope.py +56 -0
- engine/gotchas_flywheel.py +131 -0
- engine/grader.py +639 -0
- engine/integrations.py +110 -0
- engine/maintainability.py +581 -0
- engine/metrics.py +718 -0
- engine/multi_skill.py +356 -0
- engine/observability.py +437 -0
- engine/progressive_disclosure.py +351 -0
- engine/reliability.py +87 -0
- engine/replay.py +113 -0
- engine/report_models.py +211 -0
- engine/reporter.py +10 -0
- engine/reporters/__init__.py +11 -0
- engine/reporters/builders.py +570 -0
- engine/reporters/formatters.py +42 -0
- engine/reporters/generator.py +665 -0
- engine/runner.py +388 -0
- engine/security_probes.py +600 -0
- engine/simulator.py +132 -0
- engine/skills_bench.py +192 -0
- engine/stability.py +261 -0
- engine/stress_test.py +333 -0
- engine/testgen.py +978 -0
- engine/token_ledger.py +221 -0
- engine/trace_models.py +247 -0
- engine/trajectory_evaluator.py +217 -0
- engine/trigger_accuracy_eval.py +175 -0
- skill_cert/__init__.py +1 -0
- skill_cert/cli/__init__.py +92 -0
- skill_cert/cli/dialogue.py +79 -0
- skill_cert/cli/evals.py +645 -0
- skill_cert/cli/helpers.py +57 -0
- skill_cert/cli/main.py +273 -0
- skill_cert/cli/multi_skill.py +67 -0
- skill_cert/cli/replay.py +52 -0
- skill_cert/cli/setup.py +353 -0
- skill_cert/cli/single.py +225 -0
- skill_cert/cli/stress.py +81 -0
- skill_cert-0.5.4.dist-info/METADATA +633 -0
- skill_cert-0.5.4.dist-info/RECORD +61 -0
- skill_cert-0.5.4.dist-info/WHEEL +5 -0
- skill_cert-0.5.4.dist-info/entry_points.txt +2 -0
- skill_cert-0.5.4.dist-info/licenses/LICENSE +191 -0
- skill_cert-0.5.4.dist-info/top_level.txt +3 -0
adapters/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import time
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
from .base import ModelAdapter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AnthropicCompatAdapter(ModelAdapter):
|
|
11
|
+
SUPPORTED_MODELS = [
|
|
12
|
+
"qwen3.6-plus",
|
|
13
|
+
"qwen3.5-plus",
|
|
14
|
+
"qwen3-max-2026-01-23",
|
|
15
|
+
"qwen3-coder-next",
|
|
16
|
+
"qwen3-coder-plus",
|
|
17
|
+
"glm-5",
|
|
18
|
+
"glm-4.7",
|
|
19
|
+
"kimi-k2.5",
|
|
20
|
+
"MiniMax-M2.5",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
base_url: str,
|
|
26
|
+
api_key: str,
|
|
27
|
+
model: str,
|
|
28
|
+
fallback_model: str | None = None,
|
|
29
|
+
rpm_limit: int = 60,
|
|
30
|
+
):
|
|
31
|
+
self.base_url = base_url.rstrip("/")
|
|
32
|
+
self.api_key = api_key
|
|
33
|
+
self.model = model
|
|
34
|
+
self.fallback_model = fallback_model
|
|
35
|
+
self.session = requests.Session()
|
|
36
|
+
self.session.headers.update(
|
|
37
|
+
{
|
|
38
|
+
"x-api-key": api_key,
|
|
39
|
+
"Content-Type": "application/json",
|
|
40
|
+
"anthropic-version": "2023-06-01",
|
|
41
|
+
}
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def chat(
|
|
45
|
+
self,
|
|
46
|
+
messages: list[dict[str, str]],
|
|
47
|
+
system: str | None = None,
|
|
48
|
+
timeout: int = 120,
|
|
49
|
+
) -> str:
|
|
50
|
+
formatted = [
|
|
51
|
+
{"role": "user" if m["role"] == "user" else "assistant", "content": m["content"]}
|
|
52
|
+
for m in messages
|
|
53
|
+
if m["role"] in ("user", "assistant")
|
|
54
|
+
]
|
|
55
|
+
payload = {"model": self.model, "max_tokens": 8192, "messages": formatted}
|
|
56
|
+
if system:
|
|
57
|
+
payload["system"] = system
|
|
58
|
+
return self._request(payload)
|
|
59
|
+
|
|
60
|
+
def chat_with_usage(
|
|
61
|
+
self,
|
|
62
|
+
messages: list[dict[str, str]],
|
|
63
|
+
system: str | None = None,
|
|
64
|
+
timeout: int = 120,
|
|
65
|
+
) -> tuple[str, dict[str, int]]:
|
|
66
|
+
formatted = [
|
|
67
|
+
{"role": "user" if m["role"] == "user" else "assistant", "content": m["content"]}
|
|
68
|
+
for m in messages
|
|
69
|
+
if m["role"] in ("user", "assistant")
|
|
70
|
+
]
|
|
71
|
+
payload = {"model": self.model, "max_tokens": 8192, "messages": formatted}
|
|
72
|
+
if system:
|
|
73
|
+
payload["system"] = system
|
|
74
|
+
return self._request_with_usage(payload)
|
|
75
|
+
|
|
76
|
+
def batch_chat(
|
|
77
|
+
self,
|
|
78
|
+
requests: list[dict[str, Any]],
|
|
79
|
+
max_concurrency: int = 5,
|
|
80
|
+
) -> list[str]:
|
|
81
|
+
results = []
|
|
82
|
+
for req in requests:
|
|
83
|
+
try:
|
|
84
|
+
results.append(self.chat(req.get("messages", []), req.get("system")))
|
|
85
|
+
except Exception as e:
|
|
86
|
+
results.append(f"ERROR: {e}")
|
|
87
|
+
return results
|
|
88
|
+
|
|
89
|
+
def _request(
|
|
90
|
+
self,
|
|
91
|
+
payload: dict[str, Any],
|
|
92
|
+
max_retries: int = 3,
|
|
93
|
+
) -> str:
|
|
94
|
+
content, _ = self._request_with_usage(payload, max_retries)
|
|
95
|
+
return content
|
|
96
|
+
|
|
97
|
+
def _request_with_usage(
|
|
98
|
+
self,
|
|
99
|
+
payload: dict[str, Any],
|
|
100
|
+
max_retries: int = 3,
|
|
101
|
+
) -> tuple[str, dict[str, int]]:
|
|
102
|
+
for attempt in range(max_retries):
|
|
103
|
+
try:
|
|
104
|
+
resp = self.session.post(
|
|
105
|
+
f"{self.base_url}/messages",
|
|
106
|
+
json=payload,
|
|
107
|
+
timeout=120,
|
|
108
|
+
)
|
|
109
|
+
resp.raise_for_status()
|
|
110
|
+
data = resp.json()
|
|
111
|
+
content = ""
|
|
112
|
+
for block in data.get("content", []):
|
|
113
|
+
if block.get("type") == "text":
|
|
114
|
+
content = block["text"]
|
|
115
|
+
break
|
|
116
|
+
if not content:
|
|
117
|
+
content = json.dumps(data)
|
|
118
|
+
usage = data.get("usage", {})
|
|
119
|
+
token_data = {
|
|
120
|
+
"prompt_tokens": usage.get("input_tokens", 0),
|
|
121
|
+
"completion_tokens": usage.get("output_tokens", 0),
|
|
122
|
+
"total_tokens": usage.get("input_tokens", 0) + usage.get("output_tokens", 0),
|
|
123
|
+
}
|
|
124
|
+
return content, token_data
|
|
125
|
+
except Exception:
|
|
126
|
+
if attempt == max_retries - 1:
|
|
127
|
+
raise
|
|
128
|
+
time.sleep(2**attempt)
|
|
129
|
+
raise RuntimeError("Request failed after retries")
|
adapters/base.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class TokenUsage:
|
|
8
|
+
input_tokens: int = 0
|
|
9
|
+
output_tokens: int = 0
|
|
10
|
+
total_tokens: int = 0
|
|
11
|
+
|
|
12
|
+
def to_dict(self) -> dict[str, int]:
|
|
13
|
+
return {
|
|
14
|
+
"input_tokens": self.input_tokens,
|
|
15
|
+
"output_tokens": self.output_tokens,
|
|
16
|
+
"total_tokens": self.total_tokens,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def from_dict(cls, data: dict[str, int]) -> "TokenUsage":
|
|
21
|
+
return cls(
|
|
22
|
+
input_tokens=data.get("input_tokens", 0),
|
|
23
|
+
output_tokens=data.get("output_tokens", 0),
|
|
24
|
+
total_tokens=data.get("total_tokens", 0),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class LLMResponse:
|
|
30
|
+
text: str
|
|
31
|
+
token_usage: TokenUsage | None = None
|
|
32
|
+
latency_ms: float = 0.0
|
|
33
|
+
|
|
34
|
+
def to_dict(self) -> dict[str, Any]:
|
|
35
|
+
return {
|
|
36
|
+
"text": self.text,
|
|
37
|
+
"token_usage": self.token_usage.to_dict() if self.token_usage else None,
|
|
38
|
+
"latency_ms": self.latency_ms,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ModelAdapter(ABC):
|
|
43
|
+
"""
|
|
44
|
+
Abstract base class for model adapters.
|
|
45
|
+
Defines the interface for interacting with different LLM providers.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def chat(
|
|
50
|
+
self,
|
|
51
|
+
messages: list[dict[str, str]],
|
|
52
|
+
system: str | None = None,
|
|
53
|
+
timeout: int = 120,
|
|
54
|
+
) -> str:
|
|
55
|
+
"""
|
|
56
|
+
Send a chat request to the model.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
messages: List of message dictionaries with 'role' and 'content'
|
|
60
|
+
system: Optional system message
|
|
61
|
+
timeout: Request timeout in seconds
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Model response as a string
|
|
65
|
+
"""
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
def chat_with_usage(
|
|
69
|
+
self,
|
|
70
|
+
messages: list[dict[str, str]],
|
|
71
|
+
system: str | None = None,
|
|
72
|
+
timeout: int = 120,
|
|
73
|
+
) -> tuple[str, dict[str, int]]:
|
|
74
|
+
"""
|
|
75
|
+
Send a chat request and return both content and token usage.
|
|
76
|
+
Default fallback: uses chat() and estimates usage.
|
|
77
|
+
Override in subclasses for real token counts.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Tuple of (response_text, {
|
|
81
|
+
"prompt_tokens": N,
|
|
82
|
+
"completion_tokens": N,
|
|
83
|
+
"total_tokens": N,
|
|
84
|
+
})
|
|
85
|
+
"""
|
|
86
|
+
content = self.chat(messages, system, timeout)
|
|
87
|
+
estimated = len(content.split()) if content else 0
|
|
88
|
+
return content, {
|
|
89
|
+
"prompt_tokens": 0,
|
|
90
|
+
"completion_tokens": estimated,
|
|
91
|
+
"total_tokens": estimated,
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
@abstractmethod
|
|
95
|
+
def batch_chat(self, requests: list[dict[str, Any]], max_concurrency: int = 5) -> list[str]:
|
|
96
|
+
"""
|
|
97
|
+
Send multiple chat requests concurrently.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
requests: List of request dictionaries containing messages, system, timeout
|
|
101
|
+
max_concurrency: Maximum number of concurrent requests
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
List of model responses in the same order as requests
|
|
105
|
+
"""
|
|
106
|
+
pass
|
adapters/factory.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Factory for creating model adapters by auto-detecting the correct type from model name."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from engine.config import ModelConfig
|
|
6
|
+
|
|
7
|
+
from .anthropic_compat import AnthropicCompatAdapter
|
|
8
|
+
from .base import ModelAdapter
|
|
9
|
+
from .openai_compat import OpenAICompatAdapter
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _is_known_provider(model_name: str) -> bool:
|
|
15
|
+
"""Check if a model name matches a known provider pattern (case-insensitive)."""
|
|
16
|
+
return any(kw in model_name for kw in ("claude", "qwen", "deepseek"))
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def create_adapter(model_config: ModelConfig, rpm_limit: int = 60) -> ModelAdapter:
|
|
20
|
+
"""Create a model adapter by auto-detecting the correct type from the model name.
|
|
21
|
+
|
|
22
|
+
Auto-detection rules (checked case-insensitively):
|
|
23
|
+
- "claude" in model_name → AnthropicCompatAdapter
|
|
24
|
+
- "qwen" in model_name → OpenAICompatAdapter (Qwen uses OpenAI-compat API)
|
|
25
|
+
- "deepseek" in model_name → OpenAICompatAdapter
|
|
26
|
+
- Any other name → OpenAICompatAdapter (default, with warning logged)
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
model_config: ModelConfig with base_url, api_key, model_name, and optional fallback fields.
|
|
30
|
+
rpm_limit: Rate limit in requests per minute (default: 60).
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
An initialized ModelAdapter instance.
|
|
34
|
+
"""
|
|
35
|
+
model_name = model_config.model_name.lower()
|
|
36
|
+
|
|
37
|
+
if "claude" in model_name:
|
|
38
|
+
logger.info("Detected Anthropic-compatible model: %s", model_config.model_name)
|
|
39
|
+
return AnthropicCompatAdapter(
|
|
40
|
+
base_url=model_config.base_url,
|
|
41
|
+
api_key=model_config.api_key,
|
|
42
|
+
model=model_config.model_name,
|
|
43
|
+
fallback_model=model_config.fallback_model,
|
|
44
|
+
rpm_limit=rpm_limit,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
if "qwen" in model_name:
|
|
48
|
+
logger.info("Detected Qwen model (OpenAI-compatible): %s", model_config.model_name)
|
|
49
|
+
elif "deepseek" in model_name:
|
|
50
|
+
logger.info("Detected DeepSeek model (OpenAI-compatible): %s", model_config.model_name)
|
|
51
|
+
else:
|
|
52
|
+
logger.warning(
|
|
53
|
+
"Unknown model name '%s', falling back to OpenAICompatAdapter",
|
|
54
|
+
model_config.model_name,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
return OpenAICompatAdapter(
|
|
58
|
+
base_url=model_config.base_url,
|
|
59
|
+
api_key=model_config.api_key,
|
|
60
|
+
model=model_config.provider_model or model_config.fallback_model or model_config.model_name,
|
|
61
|
+
fallback_model=model_config.fallback_model,
|
|
62
|
+
fallback_base_url=model_config.fallback_base_url,
|
|
63
|
+
fallback_api_key=model_config.fallback_api_key,
|
|
64
|
+
rpm_limit=rpm_limit,
|
|
65
|
+
)
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
import httpx
|
|
5
|
+
|
|
6
|
+
from .base import ModelAdapter
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class OpenAICompatAdapter(ModelAdapter):
|
|
12
|
+
"""Adapter for OpenAI-compatible APIs — uses sync httpx.Client for thread safety.
|
|
13
|
+
Supports fallback to a different endpoint when primary is unreachable."""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
base_url: str,
|
|
18
|
+
api_key: str,
|
|
19
|
+
model: str,
|
|
20
|
+
fallback_model: str | None = None,
|
|
21
|
+
fallback_base_url: str | None = None,
|
|
22
|
+
fallback_api_key: str | None = None,
|
|
23
|
+
rpm_limit: int = 60,
|
|
24
|
+
):
|
|
25
|
+
self.base_url = base_url.rstrip("/")
|
|
26
|
+
self.api_key = api_key
|
|
27
|
+
self.model = model
|
|
28
|
+
self.fallback_model = fallback_model
|
|
29
|
+
self.fallback_base_url = fallback_base_url.rstrip("/") if fallback_base_url else None
|
|
30
|
+
self.fallback_api_key = fallback_api_key
|
|
31
|
+
self._has_fallback = bool(
|
|
32
|
+
self.fallback_model and self.fallback_base_url and self.fallback_api_key
|
|
33
|
+
)
|
|
34
|
+
# Force HTTP/1.1 to avoid HTTP/2 negotiation issues with some proxies
|
|
35
|
+
self.client = httpx.Client(
|
|
36
|
+
timeout=httpx.Timeout(120.0),
|
|
37
|
+
http1=True,
|
|
38
|
+
http2=False,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def _extract_error_detail(response: httpx.Response) -> str:
|
|
43
|
+
"""Extract error detail from API response body, if available."""
|
|
44
|
+
try:
|
|
45
|
+
body = response.json()
|
|
46
|
+
msg = body.get("error", {}).get("message", "") or body.get("message", "")
|
|
47
|
+
if msg:
|
|
48
|
+
return msg[:200]
|
|
49
|
+
except Exception:
|
|
50
|
+
pass
|
|
51
|
+
return response.text[:200] if response.text else "No detail provided"
|
|
52
|
+
|
|
53
|
+
def _call_with_usage(
|
|
54
|
+
self,
|
|
55
|
+
messages: list[dict[str, str]],
|
|
56
|
+
model: str,
|
|
57
|
+
timeout: int,
|
|
58
|
+
base_url: str | None = None,
|
|
59
|
+
api_key: str | None = None,
|
|
60
|
+
use_requests_fallback: bool = False,
|
|
61
|
+
) -> tuple[str, dict[str, int]]:
|
|
62
|
+
use_base = base_url or self.base_url
|
|
63
|
+
use_key = api_key or self.api_key
|
|
64
|
+
|
|
65
|
+
headers = {"Authorization": f"Bearer {use_key}", "Content-Type": "application/json"}
|
|
66
|
+
|
|
67
|
+
payload = {"model": model, "messages": messages, "temperature": 0.0}
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
response = self.client.post(
|
|
71
|
+
f"{use_base}/chat/completions", headers=headers, json=payload, timeout=timeout
|
|
72
|
+
)
|
|
73
|
+
except httpx.ConnectError as e:
|
|
74
|
+
# Fallback to requests when httpx SSL fails (corporate proxy compatibility)
|
|
75
|
+
if not use_requests_fallback:
|
|
76
|
+
logger.warning(f"httpx SSL failed, falling back to requests: {e}")
|
|
77
|
+
import requests as _requests
|
|
78
|
+
|
|
79
|
+
response = _requests.post( # type: ignore[assignment]
|
|
80
|
+
f"{use_base}/chat/completions", headers=headers, json=payload, timeout=timeout
|
|
81
|
+
)
|
|
82
|
+
if response.status_code == 401:
|
|
83
|
+
raise RuntimeError("Invalid API key")
|
|
84
|
+
elif response.status_code == 404:
|
|
85
|
+
raise RuntimeError("Model not found")
|
|
86
|
+
elif response.status_code == 429:
|
|
87
|
+
raise RuntimeError("Insufficient quota")
|
|
88
|
+
else:
|
|
89
|
+
response.raise_for_status()
|
|
90
|
+
|
|
91
|
+
result = response.json()
|
|
92
|
+
content = result["choices"][0]["message"]["content"]
|
|
93
|
+
usage = result.get("usage", {})
|
|
94
|
+
token_data = {
|
|
95
|
+
"prompt_tokens": usage.get("prompt_tokens", 0),
|
|
96
|
+
"completion_tokens": usage.get("completion_tokens", 0),
|
|
97
|
+
"total_tokens": usage.get("total_tokens", 0),
|
|
98
|
+
}
|
|
99
|
+
return content, token_data
|
|
100
|
+
raise
|
|
101
|
+
|
|
102
|
+
if response.status_code == 401:
|
|
103
|
+
raise RuntimeError("Invalid API key")
|
|
104
|
+
elif response.status_code == 400:
|
|
105
|
+
detail = self._extract_error_detail(response)
|
|
106
|
+
raise RuntimeError(
|
|
107
|
+
f"API returned 400 for model '{model}'. "
|
|
108
|
+
f"Verify the model name matches the API endpoint's expected format. "
|
|
109
|
+
f"Detail: {detail}"
|
|
110
|
+
)
|
|
111
|
+
elif response.status_code == 404:
|
|
112
|
+
raise RuntimeError("Model not found")
|
|
113
|
+
elif response.status_code == 429:
|
|
114
|
+
raise RuntimeError("Insufficient quota")
|
|
115
|
+
else:
|
|
116
|
+
response.raise_for_status()
|
|
117
|
+
|
|
118
|
+
result = response.json()
|
|
119
|
+
content = result["choices"][0]["message"]["content"]
|
|
120
|
+
usage = result.get("usage", {})
|
|
121
|
+
token_data = {
|
|
122
|
+
"prompt_tokens": usage.get("prompt_tokens", 0),
|
|
123
|
+
"completion_tokens": usage.get("completion_tokens", 0),
|
|
124
|
+
"total_tokens": usage.get("total_tokens", 0),
|
|
125
|
+
}
|
|
126
|
+
return content, token_data
|
|
127
|
+
|
|
128
|
+
def _call_with_usage_sync(
|
|
129
|
+
self, messages: list[dict[str, str]], system: str | None = None, timeout: int = 120
|
|
130
|
+
) -> tuple[str, dict[str, int]]:
|
|
131
|
+
prepared_messages = []
|
|
132
|
+
if system:
|
|
133
|
+
prepared_messages.append({"role": "system", "content": system})
|
|
134
|
+
prepared_messages.extend(messages)
|
|
135
|
+
|
|
136
|
+
return self._call_with_usage_with_fallback(prepared_messages, self.model, timeout)
|
|
137
|
+
|
|
138
|
+
def _call_with_usage_with_fallback(
|
|
139
|
+
self, messages: list[dict[str, str]], model: str, timeout: int
|
|
140
|
+
) -> tuple[str, dict[str, int]]:
|
|
141
|
+
try:
|
|
142
|
+
return self._call_with_usage(messages, model, timeout, use_requests_fallback=True)
|
|
143
|
+
except (httpx.ConnectError, httpx.ConnectTimeout, OSError) as e:
|
|
144
|
+
if self._has_fallback:
|
|
145
|
+
logger.warning(
|
|
146
|
+
f"Primary endpoint unreachable ({self.base_url}, {self.model}), "
|
|
147
|
+
f"falling back to {self.fallback_base_url}/{self.fallback_model}: {e}"
|
|
148
|
+
)
|
|
149
|
+
return self._call_with_usage(
|
|
150
|
+
messages,
|
|
151
|
+
self.fallback_model or model,
|
|
152
|
+
timeout,
|
|
153
|
+
base_url=self.fallback_base_url,
|
|
154
|
+
api_key=self.fallback_api_key,
|
|
155
|
+
use_requests_fallback=True,
|
|
156
|
+
)
|
|
157
|
+
raise
|
|
158
|
+
|
|
159
|
+
def chat(
|
|
160
|
+
self,
|
|
161
|
+
messages: list[dict[str, str]],
|
|
162
|
+
system: str | None = None,
|
|
163
|
+
timeout: int = 120,
|
|
164
|
+
) -> str:
|
|
165
|
+
content, _ = self._call_with_usage_sync(messages, system, timeout)
|
|
166
|
+
return content
|
|
167
|
+
|
|
168
|
+
def chat_with_usage(
|
|
169
|
+
self,
|
|
170
|
+
messages: list[dict[str, str]],
|
|
171
|
+
system: str | None = None,
|
|
172
|
+
timeout: int = 120,
|
|
173
|
+
) -> tuple[str, dict[str, int]]:
|
|
174
|
+
return self._call_with_usage_sync(messages, system, timeout)
|
|
175
|
+
|
|
176
|
+
def batch_chat(self, requests: list[dict[str, Any]], max_concurrency: int = 5) -> list[str]:
|
|
177
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
178
|
+
|
|
179
|
+
results: list[str] = []
|
|
180
|
+
with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
|
|
181
|
+
futures = {
|
|
182
|
+
executor.submit(
|
|
183
|
+
self.chat,
|
|
184
|
+
req.get("messages", []),
|
|
185
|
+
req.get("system"),
|
|
186
|
+
req.get("timeout", 120),
|
|
187
|
+
): i
|
|
188
|
+
for i, req in enumerate(requests)
|
|
189
|
+
}
|
|
190
|
+
result_map: dict[int, str] = {}
|
|
191
|
+
for future in as_completed(futures):
|
|
192
|
+
result_map[futures[future]] = future.result()
|
|
193
|
+
results = [result_map[i] for i in range(len(requests))]
|
|
194
|
+
return results
|
|
195
|
+
|
|
196
|
+
def __del__(self):
|
|
197
|
+
try:
|
|
198
|
+
self.client.close()
|
|
199
|
+
except Exception:
|
|
200
|
+
pass
|
adapters/pricing.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Model pricing table — converts token usage to $ cost."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
_MODEL_PRICING = {
|
|
8
|
+
# Anthropic Claude family (per 1M tokens)
|
|
9
|
+
"claude-sonnet-4-5-20250514": {"input_per_m": 3.0, "output_per_m": 15.0},
|
|
10
|
+
"claude-sonnet-4-20250514": {"input_per_m": 3.0, "output_per_m": 15.0},
|
|
11
|
+
"claude-opus-4-20250514": {"input_per_m": 15.0, "output_per_m": 75.0},
|
|
12
|
+
"claude-opus-4-1-20250805": {"input_per_m": 15.0, "output_per_m": 75.0},
|
|
13
|
+
"claude-haiku-4-20250514": {"input_per_m": 0.8, "output_per_m": 4.0},
|
|
14
|
+
# OpenAI GPT family
|
|
15
|
+
"gpt-5": {"input_per_m": 1.25, "output_per_m": 10.0},
|
|
16
|
+
"gpt-5-mini": {"input_per_m": 0.25, "output_per_m": 2.0},
|
|
17
|
+
"gpt-4o": {"input_per_m": 2.5, "output_per_m": 10.0},
|
|
18
|
+
"gpt-4o-mini": {"input_per_m": 0.15, "output_per_m": 0.6},
|
|
19
|
+
# Qwen family
|
|
20
|
+
"qwen3.6-plus": {"input_per_m": 0.3, "output_per_m": 0.9},
|
|
21
|
+
"qwen3.5-plus": {"input_per_m": 0.3, "output_per_m": 0.9},
|
|
22
|
+
"qwen3-coder-plus": {"input_per_m": 0.3, "output_per_m": 0.9},
|
|
23
|
+
"qwen3-coder-next": {"input_per_m": 0.4, "output_per_m": 1.2},
|
|
24
|
+
# DeepSeek
|
|
25
|
+
"deepseek-chat": {"input_per_m": 0.14, "output_per_m": 0.56},
|
|
26
|
+
"deepseek-reasoner": {"input_per_m": 0.55, "output_per_m": 2.19},
|
|
27
|
+
# Google Gemini
|
|
28
|
+
"gemini-2.5-pro": {"input_per_m": 1.25, "output_per_m": 10.0},
|
|
29
|
+
"gemini-2.5-flash": {"input_per_m": 0.15, "output_per_m": 0.6},
|
|
30
|
+
# Whalecloud LOCAL (free — local deployment)
|
|
31
|
+
"LOCAL/Qwen3.5-122B-A10B": {"input_per_m": 0.0, "output_per_m": 0.0},
|
|
32
|
+
"LOCAL/MiniMax-M2.7": {"input_per_m": 0.0, "output_per_m": 0.0},
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ModelPricing:
|
|
37
|
+
def __init__(self):
|
|
38
|
+
self.models: dict[str, dict[str, float]] = dict(_MODEL_PRICING)
|
|
39
|
+
|
|
40
|
+
def get_model_price(self, model_name: str) -> dict[str, float] | None:
|
|
41
|
+
price = self.models.get(model_name)
|
|
42
|
+
if price is not None:
|
|
43
|
+
return price
|
|
44
|
+
for known_name, known_price in self.models.items():
|
|
45
|
+
if model_name.startswith(known_name):
|
|
46
|
+
logger.warning(
|
|
47
|
+
"Model '%s' not in pricing table, falling back to prefix match '%s'",
|
|
48
|
+
model_name,
|
|
49
|
+
known_name,
|
|
50
|
+
)
|
|
51
|
+
return known_price
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
def add_model(self, model_name: str, input_per_m: float, output_per_m: float):
|
|
55
|
+
self.models[model_name] = {"input_per_m": input_per_m, "output_per_m": output_per_m}
|
|
56
|
+
|
|
57
|
+
def calculate_cost(self, prompt_tokens: int, completion_tokens: int, model_name: str) -> float:
|
|
58
|
+
price = self.get_model_price(model_name)
|
|
59
|
+
if price is None:
|
|
60
|
+
return 0.0
|
|
61
|
+
return (prompt_tokens / 1_000_000) * price["input_per_m"] + (
|
|
62
|
+
completion_tokens / 1_000_000
|
|
63
|
+
) * price["output_per_m"]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_pricing_instance: ModelPricing | None = None
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_pricing() -> ModelPricing:
|
|
70
|
+
global _pricing_instance
|
|
71
|
+
if _pricing_instance is None:
|
|
72
|
+
_pricing_instance = ModelPricing()
|
|
73
|
+
return _pricing_instance
|
engine/__init__.py
ADDED