npm - @qa-gentic/stlc-agents - Versions diffs - 1.0.25 → 1.0.26 - Mend

@qa-gentic/stlc-agents 1.0.25 → 1.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/src/stlc_agents/shared/pricing.py CHANGED Viewed

@@ -2,13 +2,28 @@
 pricing.py  —  Model pricing registry for stlc-agents cost tracking.
 Prices: USD per million tokens (MTok).
-Source: Anthropic official docs, April 2026.
-Models this repo actually calls:
-  - claude-sonnet-4-20250514  (LocatorHealer AI Vision, default)
-  - gpt-4o                    (LocatorHealer AI Vision, copilot provider)
-  + whatever coding agent the user runs (Claude / Copilot / Cursor / Windsurf)
-    — the user declares this via STLC_CODING_AGENT_MODEL env var.
+Sources (verified April 2026):
+  Anthropic  — https://docs.anthropic.com/en/docs/about-claude/models/overview
+               cache_write = 5-min TTL rate; cache_read = 0.1× input rate
+  OpenAI     — https://openai.com/api/pricing  (standard tier)
+               columns: [model, input, cached_input, output]
+               no cache_write fee (caching is automatic)
+  xAI        — https://docs.x.ai/docs/models
+  DeepSeek   — https://api-docs.deepseek.com/quick_start/pricing
+               cache_read = cache-hit input rate (10× cheaper than cache miss)
+Column layout for ModelPricing:
+  input_per_mtok      — uncached input tokens (full rate)
+  output_per_mtok     — output / completion tokens
+  cache_write_per_mtok — Anthropic only: tokens written into the prompt cache
+  cache_read_per_mtok  — tokens served from cache (cheaper rate)
+For OpenAI the API returns prompt_tokens (total) and
+prompt_tokens_details.cached_tokens (the subset from cache).
+Split them before calling cost():
+  uncached = prompt_tokens - cached_tokens   → input_tokens arg
+  cached   = cached_tokens                   → cache_read_tokens arg
 """
 from __future__ import annotations
@@ -21,9 +36,9 @@ class ModelPricing:
     model_id: str
     display_name: str
     provider: str
-    input_per_mtok: float        # USD / 1M input tokens
+    input_per_mtok: float        # USD / 1M input tokens  (uncached)
     output_per_mtok: float       # USD / 1M output tokens
-    cache_write_per_mtok: float  # USD / 1M cache-write tokens
+    cache_write_per_mtok: float  # USD / 1M cache-write tokens  (Anthropic only)
     cache_read_per_mtok: float   # USD / 1M cache-read tokens
     def cost(
@@ -34,39 +49,115 @@ class ModelPricing:
         cache_read_tokens: int = 0,
     ) -> float:
         return (
-            (input_tokens        / 1_000_000) * self.input_per_mtok
-            + (output_tokens     / 1_000_000) * self.output_per_mtok
-            + (cache_write_tokens/ 1_000_000) * self.cache_write_per_mtok
-            + (cache_read_tokens / 1_000_000) * self.cache_read_per_mtok
+            (input_tokens         / 1_000_000) * self.input_per_mtok
+            + (output_tokens      / 1_000_000) * self.output_per_mtok
+            + (cache_write_tokens / 1_000_000) * self.cache_write_per_mtok
+            + (cache_read_tokens  / 1_000_000) * self.cache_read_per_mtok
         )
 _REGISTRY: list[ModelPricing] = [
-    # ── Anthropic ──────────────────────────────────────────────────────────
-    ModelPricing("claude-sonnet-4-20250514", "Claude Sonnet 4",    "anthropic",  3.00, 15.00,  3.75, 0.30),
-    ModelPricing("claude-sonnet-4-6",        "Claude Sonnet 4.6",  "anthropic",  3.00, 15.00,  3.75, 0.30),
-    ModelPricing("claude-haiku-4-5-20251001","Claude Haiku 4.5",   "anthropic",  1.00,  5.00,  1.25, 0.10),
-    ModelPricing("claude-opus-4-6",          "Claude Opus 4.6",    "anthropic",  5.00, 25.00,  6.25, 0.50),
-    ModelPricing("claude-opus-4-7",          "Claude Opus 4.7",    "anthropic",  5.00, 25.00,  6.25, 0.50),
-    # ── OpenAI / Copilot ──────────────────────────────────────────────────
-    ModelPricing("gpt-4o",                   "GPT-4o",             "openai",     2.50, 10.00,  0.00, 0.00),
-    ModelPricing("gpt-4o-mini",              "GPT-4o Mini",        "openai",     0.15,  0.60,  0.00, 0.00),
+    # ── Anthropic ─────────────────────────────────────────────────────────────
+    # Source: https://docs.anthropic.com/en/docs/about-claude/models/overview
+    #         input / output / cache_write(5min) / cache_read
+    ModelPricing("claude-sonnet-4-20250514", "Claude Sonnet 4",   "anthropic",  3.00, 15.00, 3.75, 0.30),
+    ModelPricing("claude-sonnet-4-6",        "Claude Sonnet 4.6", "anthropic",  3.00, 15.00, 3.75, 0.30),
+    ModelPricing("claude-haiku-4-5-20251001","Claude Haiku 4.5",  "anthropic",  1.00,  5.00, 1.25, 0.10),
+    ModelPricing("claude-opus-4-6",          "Claude Opus 4.6",   "anthropic",  5.00, 25.00, 6.25, 0.50),
+    ModelPricing("claude-opus-4-7",          "Claude Opus 4.7",   "anthropic",  5.00, 25.00, 6.25, 0.50),
+    # ── OpenAI / Copilot ──────────────────────────────────────────────────────
+    # Source: https://openai.com/api/pricing  (standard tier, April 2026)
+    #         input / output / cache_write(n/a=0) / cache_read
+    # GPT-5.x flagship
+    ModelPricing("gpt-5.4",      "GPT-5.4",       "openai",  2.50, 15.00, 0.00, 0.250),
+    ModelPricing("gpt-5.4-mini", "GPT-5.4 Mini",  "openai",  0.75,  4.50, 0.00, 0.075),
+    ModelPricing("gpt-5.4-nano", "GPT-5.4 Nano",  "openai",  0.20,  1.25, 0.00, 0.020),
+    ModelPricing("gpt-5.2",      "GPT-5.2",       "openai",  1.75, 14.00, 0.00, 0.175),
+    ModelPricing("gpt-5.1",      "GPT-5.1",       "openai",  1.25, 10.00, 0.00, 0.125),
+    ModelPricing("gpt-5",        "GPT-5",         "openai",  1.25, 10.00, 0.00, 0.125),
+    ModelPricing("gpt-5-mini",   "GPT-5 Mini",    "openai",  0.25,  2.00, 0.00, 0.025),
+    ModelPricing("gpt-5-nano",   "GPT-5 Nano",    "openai",  0.05,  0.40, 0.00, 0.005),
+    # GPT-4.1 family
+    ModelPricing("gpt-4.1",      "GPT-4.1",       "openai",  2.00,  8.00, 0.00, 0.500),
+    ModelPricing("gpt-4.1-mini", "GPT-4.1 Mini",  "openai",  0.40,  1.60, 0.00, 0.100),
+    ModelPricing("gpt-4.1-nano", "GPT-4.1 Nano",  "openai",  0.10,  0.40, 0.00, 0.025),
+    # GPT-4o family
+    ModelPricing("gpt-4o",       "GPT-4o",        "openai",  2.50, 10.00, 0.00, 1.250),
+    ModelPricing("gpt-4o-mini",  "GPT-4o Mini",   "openai",  0.15,  0.60, 0.00, 0.075),
+    # o-series reasoning models
+    ModelPricing("o1",           "o1",            "openai", 15.00, 60.00, 0.00, 7.500),
+    ModelPricing("o1-mini",      "o1 mini",       "openai",  1.10,  4.40, 0.00, 0.550),
+    ModelPricing("o3",           "o3",            "openai",  2.00,  8.00, 0.00, 0.500),
+    ModelPricing("o3-mini",      "o3 mini",       "openai",  1.10,  4.40, 0.00, 0.550),
+    ModelPricing("o4-mini",      "o4 mini",       "openai",  1.10,  4.40, 0.00, 0.275),
+    # ── xAI ───────────────────────────────────────────────────────────────────
+    # Source: https://docs.x.ai/docs/models
+    ModelPricing("grok-4",       "Grok 4",        "xai",     2.00,  6.00, 0.00, 0.00),
+    ModelPricing("grok-4-fast",  "Grok 4 Fast",   "xai",     0.20,  0.50, 0.00, 0.00),
+    ModelPricing("grok-3",       "Grok 3",        "xai",     3.00, 15.00, 0.00, 0.00),
+    ModelPricing("grok-3-mini",  "Grok 3 Mini",   "xai",     0.30,  0.50, 0.00, 0.00),
+    # ── DeepSeek ──────────────────────────────────────────────────────────────
+    # Source: https://api-docs.deepseek.com/quick_start/pricing
+    #         cache_read = cache-hit input rate ($0.028), cache_write not charged separately
+    ModelPricing("deepseek-chat",      "DeepSeek V3",       "deepseek", 0.28, 0.42, 0.00, 0.028),
+    ModelPricing("deepseek-reasoner",  "DeepSeek R1",       "deepseek", 0.28, 0.42, 0.00, 0.028),
+    ModelPricing("deepseek-r1",        "DeepSeek R1 (alt)", "deepseek", 0.28, 0.42, 0.00, 0.028),
+    # ── Local / Ollama / LM Studio ────────────────────────────────────────────
+    ModelPricing("llama3.2",     "Llama 3.2",     "ollama",    0.00,  0.00, 0.00, 0.00),
+    ModelPricing("llama3.1",     "Llama 3.1",     "ollama",    0.00,  0.00, 0.00, 0.00),
+    ModelPricing("local-model",  "Local Model",   "lm-studio", 0.00,  0.00, 0.00, 0.00),
 ]
 _by_id: dict[str, ModelPricing] = {p.model_id: p for p in _REGISTRY}
 def get_pricing(model_id: str) -> Optional[ModelPricing]:
-    """Exact match first, then longest substring match."""
+    """
+    Exact match → substring match → family fallback.
+    The family fallback ensures unknown/future model names (e.g. gpt-6, claude-sonnet-5)
+    always return a pricing estimate rather than None, so cost never shows as $0.000000
+    simply because a new model name isn't in the registry yet.
+    """
     key = model_id.lower().strip()
     if key in _by_id:
         return _by_id[key]
-    # Substring: "claude-sonnet-4-20250514" ⊇ "sonnet-4"
+    # Substring: "claude-sonnet-4-20250514" ⊇ "sonnet-4", "gpt-5" ⊆ "gpt-5.4"
     for p in _REGISTRY:
         if key in p.model_id or p.model_id in key:
             return p
+    # Family fallback — same-family estimate for unknown/future model versions
+    if "claude" in key:
+        return _by_id.get("claude-sonnet-4-6")
+    if "gpt-5" in key:
+        return _by_id.get("gpt-5")
+    if "gpt-4.1" in key:
+        return _by_id.get("gpt-4.1")
+    if any(k in key for k in ("gpt-4o", "gpt-4", "gpt")) or key in ("gpt",):
+        return _by_id.get("gpt-4o")
+    if key.startswith("o1") or "/o1" in key or key == "o1":
+        return _by_id.get("o1")
+    if key.startswith("o3") or "/o3" in key or key == "o3":
+        return _by_id.get("o3")
+    if key.startswith("o4") or "/o4" in key or key == "o4":
+        return _by_id.get("o4-mini")
+    if "grok-4" in key:
+        return _by_id.get("grok-4")
+    if "grok" in key:
+        return _by_id.get("grok-3")
+    if "deepseek" in key:
+        return _by_id.get("deepseek-chat")
+    if any(k in key for k in ("llama", "mistral", "qwen", "gemma", "phi")):
+        return _by_id.get("local-model")
     return None
 def list_models() -> list[ModelPricing]:
-    return list(_REGISTRY)
+    return list(_REGISTRY)

package/src/stlc_agents/webhook_orchestrator/__init__.py ADDED Viewed

File without changes