nirixa 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nirixa-0.1.0/PKG-INFO +142 -0
- nirixa-0.1.0/README.md +93 -0
- nirixa-0.1.0/nirixa/__init__.py +40 -0
- nirixa-0.1.0/nirixa/client.py +326 -0
- nirixa-0.1.0/nirixa/middleware.py +180 -0
- nirixa-0.1.0/nirixa/scorer.py +119 -0
- nirixa-0.1.0/nirixa.egg-info/PKG-INFO +142 -0
- nirixa-0.1.0/nirixa.egg-info/SOURCES.txt +11 -0
- nirixa-0.1.0/nirixa.egg-info/dependency_links.txt +1 -0
- nirixa-0.1.0/nirixa.egg-info/requires.txt +31 -0
- nirixa-0.1.0/nirixa.egg-info/top_level.txt +1 -0
- nirixa-0.1.0/setup.cfg +4 -0
- nirixa-0.1.0/setup.py +42 -0
nirixa-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nirixa
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI Observability & Cost Intelligence — track token costs, latency, and hallucination risk
|
|
5
|
+
Home-page: https://nirixa.in
|
|
6
|
+
Author: Nirixa
|
|
7
|
+
Author-email: nirixaai@gmail.com
|
|
8
|
+
Keywords: llm observability openai anthropic groq gemini mistral ollama cost monitoring hallucination
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: requests>=2.28.0
|
|
16
|
+
Provides-Extra: openai
|
|
17
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
18
|
+
Provides-Extra: anthropic
|
|
19
|
+
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
|
20
|
+
Provides-Extra: groq
|
|
21
|
+
Requires-Dist: groq>=0.4.0; extra == "groq"
|
|
22
|
+
Provides-Extra: gemini
|
|
23
|
+
Requires-Dist: google-generativeai>=0.5.0; extra == "gemini"
|
|
24
|
+
Provides-Extra: mistral
|
|
25
|
+
Requires-Dist: mistralai>=1.0.0; extra == "mistral"
|
|
26
|
+
Provides-Extra: together
|
|
27
|
+
Requires-Dist: together>=1.0.0; extra == "together"
|
|
28
|
+
Provides-Extra: ollama
|
|
29
|
+
Requires-Dist: ollama>=0.1.0; extra == "ollama"
|
|
30
|
+
Provides-Extra: all
|
|
31
|
+
Requires-Dist: openai>=1.0.0; extra == "all"
|
|
32
|
+
Requires-Dist: anthropic>=0.20.0; extra == "all"
|
|
33
|
+
Requires-Dist: groq>=0.4.0; extra == "all"
|
|
34
|
+
Requires-Dist: google-generativeai>=0.5.0; extra == "all"
|
|
35
|
+
Requires-Dist: mistralai>=1.0.0; extra == "all"
|
|
36
|
+
Requires-Dist: together>=1.0.0; extra == "all"
|
|
37
|
+
Requires-Dist: ollama>=0.1.0; extra == "all"
|
|
38
|
+
Dynamic: author
|
|
39
|
+
Dynamic: author-email
|
|
40
|
+
Dynamic: classifier
|
|
41
|
+
Dynamic: description
|
|
42
|
+
Dynamic: description-content-type
|
|
43
|
+
Dynamic: home-page
|
|
44
|
+
Dynamic: keywords
|
|
45
|
+
Dynamic: provides-extra
|
|
46
|
+
Dynamic: requires-dist
|
|
47
|
+
Dynamic: requires-python
|
|
48
|
+
Dynamic: summary
|
|
49
|
+
|
|
50
|
+
# nirixa
|
|
51
|
+
|
|
52
|
+
**AI Observability & Cost Intelligence** — track token costs, latency, and hallucination risk for every LLM call.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install nirixa
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from nirixa import NirixaClient
|
|
62
|
+
import openai
|
|
63
|
+
|
|
64
|
+
client = NirixaClient(api_key="nirixa-your-key")
|
|
65
|
+
|
|
66
|
+
# Wrap any LLM call — zero changes to your existing code
|
|
67
|
+
response = client.track(
|
|
68
|
+
feature="/api/chat",
|
|
69
|
+
fn=lambda: openai.chat.completions.create(
|
|
70
|
+
model="gpt-4o",
|
|
71
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# response is the original OpenAI response — unchanged
|
|
76
|
+
print(response.choices[0].message.content)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Module-level API
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import nirixa
|
|
83
|
+
import openai
|
|
84
|
+
|
|
85
|
+
nirixa.init(api_key="nirixa-your-key")
|
|
86
|
+
|
|
87
|
+
response = nirixa.track(
|
|
88
|
+
feature="/api/summarize",
|
|
89
|
+
fn=lambda: openai.chat.completions.create(...)
|
|
90
|
+
)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Auto-patch (track everything automatically)
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from nirixa import NirixaClient
|
|
97
|
+
from nirixa.middleware import patch_openai
|
|
98
|
+
|
|
99
|
+
client = NirixaClient(api_key="nirixa-your-key")
|
|
100
|
+
patch_openai(client, feature="/api/chat")
|
|
101
|
+
|
|
102
|
+
# All openai calls now tracked automatically — no changes needed
|
|
103
|
+
import openai
|
|
104
|
+
openai.chat.completions.create(...)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Anthropic Support
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from nirixa import NirixaClient
|
|
111
|
+
import anthropic
|
|
112
|
+
|
|
113
|
+
client = NirixaClient(api_key="nirixa-your-key")
|
|
114
|
+
claude = anthropic.Anthropic()
|
|
115
|
+
|
|
116
|
+
response = client.track(
|
|
117
|
+
feature="/api/analyze",
|
|
118
|
+
model="claude-3-5-sonnet-20241022",
|
|
119
|
+
fn=lambda: claude.messages.create(
|
|
120
|
+
model="claude-3-5-sonnet-20241022",
|
|
121
|
+
max_tokens=1024,
|
|
122
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## What gets tracked
|
|
128
|
+
|
|
129
|
+
| Metric | Description |
|
|
130
|
+
|--------|-------------|
|
|
131
|
+
| Token cost | Per-call USD cost by feature and model |
|
|
132
|
+
| Latency | p50/p95/p99 response times |
|
|
133
|
+
| Hallucination risk | LOW / MEDIUM / HIGH scoring |
|
|
134
|
+
| Prompt drift | Output variance over time |
|
|
135
|
+
| Error rate | Failed calls by endpoint |
|
|
136
|
+
|
|
137
|
+
## Dashboard
|
|
138
|
+
|
|
139
|
+
View all your data at [nirixa.in](https://nirixa.in)
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
निरीक्षा — Observe everything.
|
nirixa-0.1.0/README.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# nirixa
|
|
2
|
+
|
|
3
|
+
**AI Observability & Cost Intelligence** — track token costs, latency, and hallucination risk for every LLM call.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install nirixa
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## Quick Start
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from nirixa import NirixaClient
|
|
13
|
+
import openai
|
|
14
|
+
|
|
15
|
+
client = NirixaClient(api_key="nirixa-your-key")
|
|
16
|
+
|
|
17
|
+
# Wrap any LLM call — zero changes to your existing code
|
|
18
|
+
response = client.track(
|
|
19
|
+
feature="/api/chat",
|
|
20
|
+
fn=lambda: openai.chat.completions.create(
|
|
21
|
+
model="gpt-4o",
|
|
22
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
23
|
+
)
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
# response is the original OpenAI response — unchanged
|
|
27
|
+
print(response.choices[0].message.content)
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Module-level API
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import nirixa
|
|
34
|
+
import openai
|
|
35
|
+
|
|
36
|
+
nirixa.init(api_key="nirixa-your-key")
|
|
37
|
+
|
|
38
|
+
response = nirixa.track(
|
|
39
|
+
feature="/api/summarize",
|
|
40
|
+
fn=lambda: openai.chat.completions.create(...)
|
|
41
|
+
)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Auto-patch (track everything automatically)
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from nirixa import NirixaClient
|
|
48
|
+
from nirixa.middleware import patch_openai
|
|
49
|
+
|
|
50
|
+
client = NirixaClient(api_key="nirixa-your-key")
|
|
51
|
+
patch_openai(client, feature="/api/chat")
|
|
52
|
+
|
|
53
|
+
# All openai calls now tracked automatically — no changes needed
|
|
54
|
+
import openai
|
|
55
|
+
openai.chat.completions.create(...)
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Anthropic Support
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from nirixa import NirixaClient
|
|
62
|
+
import anthropic
|
|
63
|
+
|
|
64
|
+
client = NirixaClient(api_key="nirixa-your-key")
|
|
65
|
+
claude = anthropic.Anthropic()
|
|
66
|
+
|
|
67
|
+
response = client.track(
|
|
68
|
+
feature="/api/analyze",
|
|
69
|
+
model="claude-3-5-sonnet-20241022",
|
|
70
|
+
fn=lambda: claude.messages.create(
|
|
71
|
+
model="claude-3-5-sonnet-20241022",
|
|
72
|
+
max_tokens=1024,
|
|
73
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
74
|
+
)
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## What gets tracked
|
|
79
|
+
|
|
80
|
+
| Metric | Description |
|
|
81
|
+
|--------|-------------|
|
|
82
|
+
| Token cost | Per-call USD cost by feature and model |
|
|
83
|
+
| Latency | p50/p95/p99 response times |
|
|
84
|
+
| Hallucination risk | LOW / MEDIUM / HIGH scoring |
|
|
85
|
+
| Prompt drift | Output variance over time |
|
|
86
|
+
| Error rate | Failed calls by endpoint |
|
|
87
|
+
|
|
88
|
+
## Dashboard
|
|
89
|
+
|
|
90
|
+
View all your data at [nirixa.in](https://nirixa.in)
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
निरीक्षा — Observe everything.
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# nirixa/__init__.py
|
|
2
|
+
|
|
3
|
+
from .client import NirixaClient
|
|
4
|
+
|
|
5
|
+
__version__ = "0.1.0"
|
|
6
|
+
__all__ = ["NirixaClient", "track"]
|
|
7
|
+
|
|
8
|
+
# Module-level singleton — optional convenience API
|
|
9
|
+
_client: NirixaClient | None = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def init(api_key: str, host: str = "https://api.nirixa.in", **kwargs):
|
|
13
|
+
"""
|
|
14
|
+
Initialize module-level client for simple usage.
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
import nirixa
|
|
18
|
+
nirixa.init(api_key="nirixa-xxx")
|
|
19
|
+
nirixa.track(feature="/api/chat", fn=lambda: openai.chat.completions.create(...))
|
|
20
|
+
"""
|
|
21
|
+
global _client
|
|
22
|
+
_client = NirixaClient(api_key=api_key, host=host, **kwargs)
|
|
23
|
+
return _client
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def track(feature: str, fn, **kwargs):
|
|
27
|
+
"""
|
|
28
|
+
Track an LLM call using the module-level client.
|
|
29
|
+
Call nirixa.init() first.
|
|
30
|
+
"""
|
|
31
|
+
if _client is None:
|
|
32
|
+
raise RuntimeError("Call nirixa.init(api_key='...') before using nirixa.track()")
|
|
33
|
+
return _client.track(feature=feature, fn=fn, **kwargs)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_client() -> NirixaClient:
|
|
37
|
+
"""Get the module-level client."""
|
|
38
|
+
if _client is None:
|
|
39
|
+
raise RuntimeError("Call nirixa.init(api_key='...') first")
|
|
40
|
+
return _client
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
# nirixa/client.py
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import uuid
|
|
5
|
+
import threading
|
|
6
|
+
import requests
|
|
7
|
+
from typing import Callable, Optional, Any
|
|
8
|
+
from . import scorer as halluc_scorer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class NirixaClient:
|
|
12
|
+
"""
|
|
13
|
+
Main Nirixa client. Track any LLM call with one line.
|
|
14
|
+
|
|
15
|
+
Supported providers:
|
|
16
|
+
- OpenAI (gpt-4o, gpt-4o-mini, gpt-3.5-turbo, ...)
|
|
17
|
+
- Anthropic (claude-3-5-sonnet, claude-3-opus, ...)
|
|
18
|
+
- Google (gemini-1.5-pro, gemini-2.0-flash, ...)
|
|
19
|
+
- Groq (llama-3.1, mixtral, gemma, ...)
|
|
20
|
+
- Mistral (mistral-large, mistral-small, ...)
|
|
21
|
+
- Together AI (llama, qwen, deepseek, ...)
|
|
22
|
+
- Ollama (llama3, mistral, phi3, local models)
|
|
23
|
+
- AWS Bedrock (claude, llama, titan, ...)
|
|
24
|
+
|
|
25
|
+
Usage:
|
|
26
|
+
client = NirixaClient(api_key="nirixa-xxx")
|
|
27
|
+
result = client.track(
|
|
28
|
+
feature="/api/chat",
|
|
29
|
+
fn=lambda: openai.chat.completions.create(...)
|
|
30
|
+
)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
api_key: str,
|
|
36
|
+
host: str = "https://api.nirixa.in",
|
|
37
|
+
score_hallucinations: bool = True,
|
|
38
|
+
async_ingest: bool = True,
|
|
39
|
+
debug: bool = False,
|
|
40
|
+
):
|
|
41
|
+
self.api_key = api_key
|
|
42
|
+
self.host = host.rstrip("/")
|
|
43
|
+
self.score_hallucinations = score_hallucinations
|
|
44
|
+
self.async_ingest = async_ingest
|
|
45
|
+
self.debug = debug
|
|
46
|
+
self._session = requests.Session()
|
|
47
|
+
self._session.headers.update({
|
|
48
|
+
"Authorization": f"Bearer {api_key}",
|
|
49
|
+
"Content-Type": "application/json",
|
|
50
|
+
"User-Agent": "nirixa-python/0.1.0",
|
|
51
|
+
})
|
|
52
|
+
self._threads = []
|
|
53
|
+
|
|
54
|
+
# ── Public API ────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
def track(
|
|
57
|
+
self,
|
|
58
|
+
feature: str,
|
|
59
|
+
fn: Callable,
|
|
60
|
+
model: Optional[str] = None,
|
|
61
|
+
provider: Optional[str] = None,
|
|
62
|
+
prompt: Optional[str] = None,
|
|
63
|
+
) -> Any:
|
|
64
|
+
"""
|
|
65
|
+
Wrap any LLM call and track cost, latency, and hallucination risk.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
feature: Your feature/endpoint name e.g. "/api/summarize"
|
|
69
|
+
fn: Lambda or callable that makes the LLM call
|
|
70
|
+
model: Model name override (auto-detected for most providers)
|
|
71
|
+
provider: Provider override ("openai", "anthropic", "google", etc.)
|
|
72
|
+
prompt: Optional prompt text for better hallucination scoring
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
The original LLM response unchanged
|
|
76
|
+
"""
|
|
77
|
+
start = time.time()
|
|
78
|
+
error_msg = None
|
|
79
|
+
response = None
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
response = fn()
|
|
83
|
+
except Exception as e:
|
|
84
|
+
error_msg = str(e)
|
|
85
|
+
raise
|
|
86
|
+
finally:
|
|
87
|
+
latency_ms = int((time.time() - start) * 1000)
|
|
88
|
+
meta = self._extract_meta(response, model, provider)
|
|
89
|
+
|
|
90
|
+
halluc = {"score": None, "risk": None}
|
|
91
|
+
if self.score_hallucinations and response is not None:
|
|
92
|
+
output_text = self._extract_text(response)
|
|
93
|
+
if output_text:
|
|
94
|
+
halluc = halluc_scorer.score(
|
|
95
|
+
output=output_text,
|
|
96
|
+
prompt=prompt,
|
|
97
|
+
model=meta["model"],
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
payload = {
|
|
101
|
+
"call_id": str(uuid.uuid4()),
|
|
102
|
+
"provider": meta["provider"],
|
|
103
|
+
"model": meta["model"] or "unknown",
|
|
104
|
+
"feature": feature,
|
|
105
|
+
"prompt_tokens": meta["prompt_tokens"],
|
|
106
|
+
"completion_tokens": meta["completion_tokens"],
|
|
107
|
+
"total_tokens": meta["total_tokens"],
|
|
108
|
+
"cost_usd": meta["cost_usd"],
|
|
109
|
+
"latency_ms": latency_ms,
|
|
110
|
+
"halluc_score": halluc["score"],
|
|
111
|
+
"halluc_risk": halluc["risk"],
|
|
112
|
+
"error": error_msg,
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
if self.async_ingest:
|
|
116
|
+
t = threading.Thread(target=self._ingest, args=(payload,), daemon=True)
|
|
117
|
+
t.start()
|
|
118
|
+
self._threads.append(t)
|
|
119
|
+
else:
|
|
120
|
+
self._ingest(payload)
|
|
121
|
+
|
|
122
|
+
return response
|
|
123
|
+
|
|
124
|
+
def flush(self, timeout: float = 5.0):
|
|
125
|
+
"""Wait for all pending ingest threads. Call before script exits."""
|
|
126
|
+
for t in self._threads:
|
|
127
|
+
t.join(timeout=timeout)
|
|
128
|
+
self._threads.clear()
|
|
129
|
+
|
|
130
|
+
def health(self) -> bool:
|
|
131
|
+
"""Check if Nirixa backend is reachable."""
|
|
132
|
+
try:
|
|
133
|
+
r = self._session.get(f"{self.host}/health", timeout=3)
|
|
134
|
+
return r.status_code == 200
|
|
135
|
+
except Exception:
|
|
136
|
+
return False
|
|
137
|
+
|
|
138
|
+
# ── Ingest ────────────────────────────────────────────
|
|
139
|
+
|
|
140
|
+
def _ingest(self, payload: dict):
|
|
141
|
+
try:
|
|
142
|
+
r = self._session.post(f"{self.host}/ingest", json=payload, timeout=5)
|
|
143
|
+
if self.debug:
|
|
144
|
+
print(f"[nirixa] {payload['feature']} → {r.status_code} ({payload['total_tokens']} tokens, ${payload['cost_usd']})")
|
|
145
|
+
except Exception as e:
|
|
146
|
+
if self.debug:
|
|
147
|
+
print(f"[nirixa] ingest failed: {e}")
|
|
148
|
+
|
|
149
|
+
# ── Meta Extraction (per provider) ───────────────────
|
|
150
|
+
|
|
151
|
+
def _extract_meta(self, response, model_override, provider_override) -> dict:
|
|
152
|
+
meta = {
|
|
153
|
+
"model": model_override,
|
|
154
|
+
"provider": provider_override,
|
|
155
|
+
"prompt_tokens": 0,
|
|
156
|
+
"completion_tokens": 0,
|
|
157
|
+
"total_tokens": 0,
|
|
158
|
+
"cost_usd": 0.0,
|
|
159
|
+
}
|
|
160
|
+
if response is None:
|
|
161
|
+
return meta
|
|
162
|
+
|
|
163
|
+
# ── OpenAI & Groq & Together AI & Mistral ──────────
|
|
164
|
+
# All use OpenAI-compatible response format
|
|
165
|
+
if hasattr(response, "usage") and hasattr(response, "choices"):
|
|
166
|
+
usage = response.usage
|
|
167
|
+
raw_model = getattr(response, "model", "") or ""
|
|
168
|
+
meta["model"] = model_override or raw_model
|
|
169
|
+
meta["provider"] = provider_override or self._detect_provider(raw_model)
|
|
170
|
+
|
|
171
|
+
if usage:
|
|
172
|
+
meta["prompt_tokens"] = getattr(usage, "prompt_tokens", 0) or 0
|
|
173
|
+
meta["completion_tokens"] = getattr(usage, "completion_tokens", 0) or 0
|
|
174
|
+
meta["total_tokens"] = getattr(usage, "total_tokens", 0) or (meta["prompt_tokens"] + meta["completion_tokens"])
|
|
175
|
+
meta["cost_usd"] = self._calc_cost(meta["model"], meta["prompt_tokens"], meta["completion_tokens"])
|
|
176
|
+
return meta
|
|
177
|
+
|
|
178
|
+
# ── Anthropic ──────────────────────────────────────
|
|
179
|
+
if hasattr(response, "usage") and hasattr(response, "content") and not hasattr(response, "choices"):
|
|
180
|
+
usage = response.usage
|
|
181
|
+
raw_model = getattr(response, "model", "") or ""
|
|
182
|
+
meta["model"] = model_override or raw_model
|
|
183
|
+
meta["provider"] = provider_override or "anthropic"
|
|
184
|
+
|
|
185
|
+
if usage:
|
|
186
|
+
meta["prompt_tokens"] = getattr(usage, "input_tokens", 0) or 0
|
|
187
|
+
meta["completion_tokens"] = getattr(usage, "output_tokens", 0) or 0
|
|
188
|
+
meta["total_tokens"] = meta["prompt_tokens"] + meta["completion_tokens"]
|
|
189
|
+
meta["cost_usd"] = self._calc_cost(meta["model"], meta["prompt_tokens"], meta["completion_tokens"])
|
|
190
|
+
return meta
|
|
191
|
+
|
|
192
|
+
# ── Google Gemini (native SDK) ─────────────────────
|
|
193
|
+
# response.usage_metadata.prompt_token_count
|
|
194
|
+
if hasattr(response, "usage_metadata"):
|
|
195
|
+
um = response.usage_metadata
|
|
196
|
+
meta["model"] = model_override or getattr(response, "model", "gemini")
|
|
197
|
+
meta["provider"] = provider_override or "google"
|
|
198
|
+
meta["prompt_tokens"] = getattr(um, "prompt_token_count", 0) or 0
|
|
199
|
+
meta["completion_tokens"] = getattr(um, "candidates_token_count", 0) or 0
|
|
200
|
+
meta["total_tokens"] = getattr(um, "total_token_count", 0) or (meta["prompt_tokens"] + meta["completion_tokens"])
|
|
201
|
+
meta["cost_usd"] = self._calc_cost(meta["model"], meta["prompt_tokens"], meta["completion_tokens"])
|
|
202
|
+
return meta
|
|
203
|
+
|
|
204
|
+
# ── Ollama ─────────────────────────────────────────
|
|
205
|
+
# response is a dict: {"model": ..., "prompt_eval_count": ..., "eval_count": ...}
|
|
206
|
+
if isinstance(response, dict) and "prompt_eval_count" in response:
|
|
207
|
+
meta["model"] = model_override or response.get("model", "ollama")
|
|
208
|
+
meta["provider"] = provider_override or "ollama"
|
|
209
|
+
meta["prompt_tokens"] = response.get("prompt_eval_count", 0) or 0
|
|
210
|
+
meta["completion_tokens"] = response.get("eval_count", 0) or 0
|
|
211
|
+
meta["total_tokens"] = meta["prompt_tokens"] + meta["completion_tokens"]
|
|
212
|
+
meta["cost_usd"] = 0.0 # local models = free
|
|
213
|
+
return meta
|
|
214
|
+
|
|
215
|
+
# ── AWS Bedrock ────────────────────────────────────
|
|
216
|
+
# response["ResponseMetadata"] or response.get("usage")
|
|
217
|
+
if isinstance(response, dict) and "ResponseMetadata" in response:
|
|
218
|
+
meta["provider"] = provider_override or "bedrock"
|
|
219
|
+
body = response.get("body", {})
|
|
220
|
+
if isinstance(body, dict):
|
|
221
|
+
usage = body.get("usage", {})
|
|
222
|
+
meta["model"] = model_override or response.get("model", "bedrock")
|
|
223
|
+
meta["prompt_tokens"] = usage.get("input_tokens", 0) or 0
|
|
224
|
+
meta["completion_tokens"] = usage.get("output_tokens", 0) or 0
|
|
225
|
+
meta["total_tokens"] = meta["prompt_tokens"] + meta["completion_tokens"]
|
|
226
|
+
meta["cost_usd"] = self._calc_cost(meta["model"], meta["prompt_tokens"], meta["completion_tokens"])
|
|
227
|
+
return meta
|
|
228
|
+
|
|
229
|
+
return meta
|
|
230
|
+
|
|
231
|
+
def _detect_provider(self, model: str) -> str:
|
|
232
|
+
"""Auto-detect provider from model name."""
|
|
233
|
+
m = model.lower()
|
|
234
|
+
if any(x in m for x in ["gpt", "o1", "o3", "text-davinci"]):
|
|
235
|
+
return "openai"
|
|
236
|
+
if any(x in m for x in ["claude"]):
|
|
237
|
+
return "anthropic"
|
|
238
|
+
if any(x in m for x in ["gemini"]):
|
|
239
|
+
return "google"
|
|
240
|
+
if any(x in m for x in ["llama", "mixtral", "gemma", "whisper"]):
|
|
241
|
+
# Could be Groq, Together, or Ollama — default groq for API calls
|
|
242
|
+
return "groq"
|
|
243
|
+
if any(x in m for x in ["mistral", "codestral"]):
|
|
244
|
+
return "mistral"
|
|
245
|
+
if any(x in m for x in ["qwen", "deepseek", "falcon"]):
|
|
246
|
+
return "together"
|
|
247
|
+
return "unknown"
|
|
248
|
+
|
|
249
|
+
def _extract_text(self, response) -> Optional[str]:
|
|
250
|
+
"""Extract text output for hallucination scoring."""
|
|
251
|
+
try:
|
|
252
|
+
# OpenAI / Groq / Together / Mistral
|
|
253
|
+
if hasattr(response, "choices"):
|
|
254
|
+
return response.choices[0].message.content or ""
|
|
255
|
+
# Anthropic
|
|
256
|
+
if hasattr(response, "content") and isinstance(response.content, list):
|
|
257
|
+
return getattr(response.content[0], "text", "") or ""
|
|
258
|
+
# Gemini
|
|
259
|
+
if hasattr(response, "candidates"):
|
|
260
|
+
return response.candidates[0].content.parts[0].text or ""
|
|
261
|
+
# Ollama
|
|
262
|
+
if isinstance(response, dict) and "response" in response:
|
|
263
|
+
return response["response"] or ""
|
|
264
|
+
# Bedrock (Claude via Bedrock)
|
|
265
|
+
if isinstance(response, dict) and "content" in response:
|
|
266
|
+
content = response["content"]
|
|
267
|
+
if isinstance(content, list) and content:
|
|
268
|
+
return content[0].get("text", "") or ""
|
|
269
|
+
except Exception:
|
|
270
|
+
pass
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
def _calc_cost(self, model: str, prompt_tokens: int, completion_tokens: int) -> float:
|
|
274
|
+
"""Estimate USD cost. Prices per 1M tokens [input, output]."""
|
|
275
|
+
if not model:
|
|
276
|
+
return 0.0
|
|
277
|
+
|
|
278
|
+
m = model.lower()
|
|
279
|
+
|
|
280
|
+
PRICES = {
|
|
281
|
+
# OpenAI
|
|
282
|
+
"gpt-4o": (2.50, 10.00),
|
|
283
|
+
"gpt-4o-mini": (0.15, 0.60),
|
|
284
|
+
"gpt-4-turbo": (10.00, 30.00),
|
|
285
|
+
"gpt-4": (30.00, 60.00),
|
|
286
|
+
"gpt-3.5-turbo": (0.50, 1.50),
|
|
287
|
+
"o1": (15.00, 60.00),
|
|
288
|
+
"o3-mini": (1.10, 4.40),
|
|
289
|
+
# Anthropic
|
|
290
|
+
"claude-3-5-sonnet": (3.00, 15.00),
|
|
291
|
+
"claude-3-5-haiku": (0.80, 4.00),
|
|
292
|
+
"claude-3-opus": (15.00, 75.00),
|
|
293
|
+
"claude-3-sonnet": (3.00, 15.00),
|
|
294
|
+
"claude-3-haiku": (0.25, 1.25),
|
|
295
|
+
# Google
|
|
296
|
+
"gemini-2.0-flash": (0.10, 0.40),
|
|
297
|
+
"gemini-1.5-pro": (3.50, 10.50),
|
|
298
|
+
"gemini-1.5-flash": (0.35, 1.05),
|
|
299
|
+
"gemini-1.0-pro": (0.50, 1.50),
|
|
300
|
+
# Groq (very cheap)
|
|
301
|
+
"llama-3.1-70b": (0.59, 0.79),
|
|
302
|
+
"llama-3.1-8b": (0.05, 0.08),
|
|
303
|
+
"llama-3.3-70b": (0.59, 0.79),
|
|
304
|
+
"mixtral-8x7b": (0.24, 0.24),
|
|
305
|
+
"gemma2-9b": (0.20, 0.20),
|
|
306
|
+
# Mistral
|
|
307
|
+
"mistral-large": (2.00, 6.00),
|
|
308
|
+
"mistral-small": (0.20, 0.60),
|
|
309
|
+
"codestral": (0.20, 0.60),
|
|
310
|
+
"mistral-7b": (0.25, 0.25),
|
|
311
|
+
# Together AI
|
|
312
|
+
"llama-3-70b": (0.90, 0.90),
|
|
313
|
+
"llama-3-8b": (0.20, 0.20),
|
|
314
|
+
"deepseek-r1": (0.55, 2.19),
|
|
315
|
+
"qwen2.5-72b": (1.20, 1.20),
|
|
316
|
+
# Ollama — free (local)
|
|
317
|
+
"ollama": (0.00, 0.00),
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
for key, (input_price, output_price) in PRICES.items():
|
|
321
|
+
if key in m:
|
|
322
|
+
cost = (prompt_tokens * input_price + completion_tokens * output_price) / 1_000_000
|
|
323
|
+
return round(cost, 8)
|
|
324
|
+
|
|
325
|
+
# Fallback
|
|
326
|
+
return round((prompt_tokens * 0.50 + completion_tokens * 1.50) / 1_000_000, 8)
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# nirixa/middleware.py
|
|
2
|
+
# Auto-patchers for all major LLM providers.
|
|
3
|
+
# One call tracks everything without changing your existing code.
|
|
4
|
+
|
|
5
|
+
from .client import NirixaClient
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def patch_openai(client: NirixaClient, feature: str = "/api/openai"):
|
|
9
|
+
"""Auto-track all OpenAI chat completions."""
|
|
10
|
+
try:
|
|
11
|
+
import openai as _openai
|
|
12
|
+
original = _openai.chat.completions.create
|
|
13
|
+
|
|
14
|
+
def tracked(*args, **kwargs):
|
|
15
|
+
return client.track(
|
|
16
|
+
feature=feature,
|
|
17
|
+
model=kwargs.get("model", "gpt-4o"),
|
|
18
|
+
provider="openai",
|
|
19
|
+
prompt=str(kwargs.get("messages", "")),
|
|
20
|
+
fn=lambda: original(*args, **kwargs),
|
|
21
|
+
)
|
|
22
|
+
_openai.chat.completions.create = tracked
|
|
23
|
+
print(f"[nirixa] ✓ OpenAI patched → tracking '{feature}'")
|
|
24
|
+
return True
|
|
25
|
+
except ImportError:
|
|
26
|
+
return False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def patch_anthropic(client: NirixaClient, feature: str = "/api/anthropic"):
|
|
30
|
+
"""Auto-track all Anthropic messages."""
|
|
31
|
+
try:
|
|
32
|
+
import anthropic as _anthropic
|
|
33
|
+
original = _anthropic.resources.messages.Messages.create
|
|
34
|
+
|
|
35
|
+
def tracked(self_ref, *args, **kwargs):
|
|
36
|
+
return client.track(
|
|
37
|
+
feature=feature,
|
|
38
|
+
model=kwargs.get("model", "claude-3-5-sonnet-20241022"),
|
|
39
|
+
provider="anthropic",
|
|
40
|
+
prompt=str(kwargs.get("messages", "")),
|
|
41
|
+
fn=lambda: original(self_ref, *args, **kwargs),
|
|
42
|
+
)
|
|
43
|
+
_anthropic.resources.messages.Messages.create = tracked
|
|
44
|
+
print(f"[nirixa] ✓ Anthropic patched → tracking '{feature}'")
|
|
45
|
+
return True
|
|
46
|
+
except ImportError:
|
|
47
|
+
return False
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def patch_groq(client: NirixaClient, feature: str = "/api/groq"):
|
|
51
|
+
"""Auto-track all Groq completions (Llama, Mixtral, Gemma)."""
|
|
52
|
+
try:
|
|
53
|
+
import groq as _groq
|
|
54
|
+
original = _groq.resources.chat.completions.Completions.create
|
|
55
|
+
|
|
56
|
+
def tracked(self_ref, *args, **kwargs):
|
|
57
|
+
return client.track(
|
|
58
|
+
feature=feature,
|
|
59
|
+
model=kwargs.get("model", "llama-3.1-70b-versatile"),
|
|
60
|
+
provider="groq",
|
|
61
|
+
prompt=str(kwargs.get("messages", "")),
|
|
62
|
+
fn=lambda: original(self_ref, *args, **kwargs),
|
|
63
|
+
)
|
|
64
|
+
_groq.resources.chat.completions.Completions.create = tracked
|
|
65
|
+
print(f"[nirixa] ✓ Groq patched → tracking '{feature}'")
|
|
66
|
+
return True
|
|
67
|
+
except ImportError:
|
|
68
|
+
return False
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def patch_gemini(client: NirixaClient, feature: str = "/api/gemini"):
|
|
72
|
+
"""Auto-track all Google Gemini generate_content calls."""
|
|
73
|
+
try:
|
|
74
|
+
import google.generativeai as genai
|
|
75
|
+
original = genai.GenerativeModel.generate_content
|
|
76
|
+
|
|
77
|
+
def tracked(self_ref, *args, **kwargs):
|
|
78
|
+
return client.track(
|
|
79
|
+
feature=feature,
|
|
80
|
+
model=getattr(self_ref, "model_name", "gemini-1.5-pro"),
|
|
81
|
+
provider="google",
|
|
82
|
+
prompt=str(args[0] if args else kwargs.get("contents", "")),
|
|
83
|
+
fn=lambda: original(self_ref, *args, **kwargs),
|
|
84
|
+
)
|
|
85
|
+
genai.GenerativeModel.generate_content = tracked
|
|
86
|
+
print(f"[nirixa] ✓ Gemini patched → tracking '{feature}'")
|
|
87
|
+
return True
|
|
88
|
+
except ImportError:
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def patch_mistral(client: NirixaClient, feature: str = "/api/mistral"):
|
|
93
|
+
"""Auto-track all Mistral AI completions."""
|
|
94
|
+
try:
|
|
95
|
+
from mistralai import Mistral as _Mistral
|
|
96
|
+
original = _Mistral.chat.complete
|
|
97
|
+
|
|
98
|
+
def tracked(self_ref, *args, **kwargs):
|
|
99
|
+
return client.track(
|
|
100
|
+
feature=feature,
|
|
101
|
+
model=kwargs.get("model", "mistral-large-latest"),
|
|
102
|
+
provider="mistral",
|
|
103
|
+
prompt=str(kwargs.get("messages", "")),
|
|
104
|
+
fn=lambda: original(self_ref, *args, **kwargs),
|
|
105
|
+
)
|
|
106
|
+
_Mistral.chat.complete = tracked
|
|
107
|
+
print(f"[nirixa] ✓ Mistral patched → tracking '{feature}'")
|
|
108
|
+
return True
|
|
109
|
+
except ImportError:
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def patch_together(client: NirixaClient, feature: str = "/api/together"):
|
|
114
|
+
"""Auto-track all Together AI completions."""
|
|
115
|
+
try:
|
|
116
|
+
import together as _together
|
|
117
|
+
original = _together.Complete.create
|
|
118
|
+
|
|
119
|
+
def tracked(*args, **kwargs):
|
|
120
|
+
return client.track(
|
|
121
|
+
feature=feature,
|
|
122
|
+
model=kwargs.get("model", "meta-llama/Llama-3-70b-chat-hf"),
|
|
123
|
+
provider="together",
|
|
124
|
+
prompt=str(kwargs.get("prompt", "")),
|
|
125
|
+
fn=lambda: original(*args, **kwargs),
|
|
126
|
+
)
|
|
127
|
+
_together.Complete.create = tracked
|
|
128
|
+
print(f"[nirixa] ✓ Together AI patched → tracking '{feature}'")
|
|
129
|
+
return True
|
|
130
|
+
except ImportError:
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def patch_ollama(client: NirixaClient, feature: str = "/api/ollama"):
|
|
135
|
+
"""Auto-track all Ollama chat calls (local models)."""
|
|
136
|
+
try:
|
|
137
|
+
import ollama as _ollama
|
|
138
|
+
original = _ollama.chat
|
|
139
|
+
|
|
140
|
+
def tracked(*args, **kwargs):
|
|
141
|
+
return client.track(
|
|
142
|
+
feature=feature,
|
|
143
|
+
model=kwargs.get("model", args[0] if args else "llama3"),
|
|
144
|
+
provider="ollama",
|
|
145
|
+
prompt=str(kwargs.get("messages", "")),
|
|
146
|
+
fn=lambda: original(*args, **kwargs),
|
|
147
|
+
)
|
|
148
|
+
_ollama.chat = tracked
|
|
149
|
+
print(f"[nirixa] ✓ Ollama patched → tracking '{feature}'")
|
|
150
|
+
return True
|
|
151
|
+
except ImportError:
|
|
152
|
+
return False
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def patch_all(client: NirixaClient, feature: str = "/api/llm"):
|
|
156
|
+
"""
|
|
157
|
+
Patch all installed LLM providers at once.
|
|
158
|
+
|
|
159
|
+
Usage:
|
|
160
|
+
from nirixa import NirixaClient
|
|
161
|
+
from nirixa.middleware import patch_all
|
|
162
|
+
|
|
163
|
+
nirixa = NirixaClient(api_key="nirixa-xxx")
|
|
164
|
+
patch_all(nirixa) # patches everything installed
|
|
165
|
+
"""
|
|
166
|
+
patched = []
|
|
167
|
+
for name, fn in [
|
|
168
|
+
("OpenAI", lambda: patch_openai(client, feature)),
|
|
169
|
+
("Anthropic", lambda: patch_anthropic(client, feature)),
|
|
170
|
+
("Groq", lambda: patch_groq(client, feature)),
|
|
171
|
+
("Gemini", lambda: patch_gemini(client, feature)),
|
|
172
|
+
("Mistral", lambda: patch_mistral(client, feature)),
|
|
173
|
+
("Together", lambda: patch_together(client, feature)),
|
|
174
|
+
("Ollama", lambda: patch_ollama(client, feature)),
|
|
175
|
+
]:
|
|
176
|
+
if fn():
|
|
177
|
+
patched.append(name)
|
|
178
|
+
|
|
179
|
+
print(f"[nirixa] Patched {len(patched)} providers: {', '.join(patched)}")
|
|
180
|
+
return patched
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# nirixa/scorer.py
|
|
2
|
+
# Lightweight hallucination risk scoring — no external deps needed.
|
|
3
|
+
# Uses heuristic patterns to score LLM output confidence.
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# Phrases that suggest the model is uncertain / hallucinating
|
|
10
|
+
UNCERTAINTY_PHRASES = [
|
|
11
|
+
"i'm not sure", "i am not sure", "i think", "i believe", "i'm not certain",
|
|
12
|
+
"it's possible", "it is possible", "might be", "could be", "may be",
|
|
13
|
+
"not entirely sure", "to my knowledge", "as far as i know",
|
|
14
|
+
"i don't have information", "i cannot confirm", "i cannot verify",
|
|
15
|
+
"i'm unable to verify", "approximately", "roughly", "around",
|
|
16
|
+
"it seems", "it appears", "seemingly", "supposedly", "allegedly",
|
|
17
|
+
"some sources say", "according to some", "i recall", "if i remember",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
# Phrases that suggest confident, grounded responses
|
|
21
|
+
CONFIDENCE_PHRASES = [
|
|
22
|
+
"according to", "based on the provided", "the document states",
|
|
23
|
+
"as stated", "as mentioned", "the data shows", "research shows",
|
|
24
|
+
"studies show", "it is known that", "it is established",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
# Structural risk patterns
|
|
28
|
+
HIGH_RISK_PATTERNS = [
|
|
29
|
+
r'\b\d{4}\b', # years — often hallucinated
|
|
30
|
+
r'\b\d+\.\d+\b', # version numbers
|
|
31
|
+
r'\b[A-Z][a-z]+ [A-Z][a-z]+\b', # proper names
|
|
32
|
+
r'https?://\S+', # URLs — often fabricated
|
|
33
|
+
r'\$[\d,]+', # dollar amounts
|
|
34
|
+
r'\b\d+%\b', # percentages
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def score(
|
|
39
|
+
output: str,
|
|
40
|
+
prompt: Optional[str] = None,
|
|
41
|
+
model: Optional[str] = None,
|
|
42
|
+
) -> dict:
|
|
43
|
+
"""
|
|
44
|
+
Score an LLM output for hallucination risk.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
{
|
|
48
|
+
"score": float (0.0 = safe, 1.0 = high risk),
|
|
49
|
+
"risk": "LOW" | "MEDIUM" | "HIGH",
|
|
50
|
+
"flags": list of detected risk signals
|
|
51
|
+
}
|
|
52
|
+
"""
|
|
53
|
+
if not output or not output.strip():
|
|
54
|
+
return {"score": 0.0, "risk": "LOW", "flags": []}
|
|
55
|
+
|
|
56
|
+
text = output.lower()
|
|
57
|
+
flags = []
|
|
58
|
+
score = 0.0
|
|
59
|
+
|
|
60
|
+
# Uncertainty language
|
|
61
|
+
uncertainty_hits = [p for p in UNCERTAINTY_PHRASES if p in text]
|
|
62
|
+
if uncertainty_hits:
|
|
63
|
+
score += 0.15 * min(len(uncertainty_hits), 3)
|
|
64
|
+
flags.append(f"uncertainty language ({len(uncertainty_hits)} phrases)")
|
|
65
|
+
|
|
66
|
+
# Confidence language (negative signal — reduces risk)
|
|
67
|
+
confidence_hits = [p for p in CONFIDENCE_PHRASES if p in text]
|
|
68
|
+
if confidence_hits:
|
|
69
|
+
score -= 0.1 * min(len(confidence_hits), 2)
|
|
70
|
+
|
|
71
|
+
# High risk structural patterns
|
|
72
|
+
pattern_hits = []
|
|
73
|
+
for pattern in HIGH_RISK_PATTERNS:
|
|
74
|
+
matches = re.findall(pattern, output)
|
|
75
|
+
if matches:
|
|
76
|
+
pattern_hits.extend(matches[:2])
|
|
77
|
+
if pattern_hits:
|
|
78
|
+
score += 0.05 * min(len(pattern_hits), 4)
|
|
79
|
+
flags.append(f"specific claims detected ({len(pattern_hits)} instances)")
|
|
80
|
+
|
|
81
|
+
# Very long output = more surface area for errors
|
|
82
|
+
word_count = len(output.split())
|
|
83
|
+
if word_count > 500:
|
|
84
|
+
score += 0.1
|
|
85
|
+
flags.append("long output (>500 words)")
|
|
86
|
+
elif word_count > 200:
|
|
87
|
+
score += 0.05
|
|
88
|
+
|
|
89
|
+
# Model-specific adjustments
|
|
90
|
+
if model:
|
|
91
|
+
m = model.lower()
|
|
92
|
+
if "gpt-4" in m or "claude-3-5" in m or "gemini-1.5-pro" in m:
|
|
93
|
+
score -= 0.05 # frontier models = lower base risk
|
|
94
|
+
elif "gpt-3.5" in m or "mini" in m or "haiku" in m:
|
|
95
|
+
score += 0.05 # smaller models = slightly higher risk
|
|
96
|
+
|
|
97
|
+
# Prompt-output length ratio
|
|
98
|
+
if prompt and output:
|
|
99
|
+
ratio = len(output) / max(len(prompt), 1)
|
|
100
|
+
if ratio > 5:
|
|
101
|
+
score += 0.05
|
|
102
|
+
flags.append("output much longer than prompt")
|
|
103
|
+
|
|
104
|
+
# Clamp 0–1
|
|
105
|
+
score = max(0.0, min(1.0, score))
|
|
106
|
+
|
|
107
|
+
# Risk tier
|
|
108
|
+
if score >= 0.6:
|
|
109
|
+
risk = "HIGH"
|
|
110
|
+
elif score >= 0.3:
|
|
111
|
+
risk = "MEDIUM"
|
|
112
|
+
else:
|
|
113
|
+
risk = "LOW"
|
|
114
|
+
|
|
115
|
+
return {
|
|
116
|
+
"score": round(score, 4),
|
|
117
|
+
"risk": risk,
|
|
118
|
+
"flags": flags,
|
|
119
|
+
}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nirixa
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI Observability & Cost Intelligence — track token costs, latency, and hallucination risk
|
|
5
|
+
Home-page: https://nirixa.in
|
|
6
|
+
Author: Nirixa
|
|
7
|
+
Author-email: nirixaai@gmail.com
|
|
8
|
+
Keywords: llm observability openai anthropic groq gemini mistral ollama cost monitoring hallucination
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Requires-Python: >=3.8
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
Requires-Dist: requests>=2.28.0
|
|
16
|
+
Provides-Extra: openai
|
|
17
|
+
Requires-Dist: openai>=1.0.0; extra == "openai"
|
|
18
|
+
Provides-Extra: anthropic
|
|
19
|
+
Requires-Dist: anthropic>=0.20.0; extra == "anthropic"
|
|
20
|
+
Provides-Extra: groq
|
|
21
|
+
Requires-Dist: groq>=0.4.0; extra == "groq"
|
|
22
|
+
Provides-Extra: gemini
|
|
23
|
+
Requires-Dist: google-generativeai>=0.5.0; extra == "gemini"
|
|
24
|
+
Provides-Extra: mistral
|
|
25
|
+
Requires-Dist: mistralai>=1.0.0; extra == "mistral"
|
|
26
|
+
Provides-Extra: together
|
|
27
|
+
Requires-Dist: together>=1.0.0; extra == "together"
|
|
28
|
+
Provides-Extra: ollama
|
|
29
|
+
Requires-Dist: ollama>=0.1.0; extra == "ollama"
|
|
30
|
+
Provides-Extra: all
|
|
31
|
+
Requires-Dist: openai>=1.0.0; extra == "all"
|
|
32
|
+
Requires-Dist: anthropic>=0.20.0; extra == "all"
|
|
33
|
+
Requires-Dist: groq>=0.4.0; extra == "all"
|
|
34
|
+
Requires-Dist: google-generativeai>=0.5.0; extra == "all"
|
|
35
|
+
Requires-Dist: mistralai>=1.0.0; extra == "all"
|
|
36
|
+
Requires-Dist: together>=1.0.0; extra == "all"
|
|
37
|
+
Requires-Dist: ollama>=0.1.0; extra == "all"
|
|
38
|
+
Dynamic: author
|
|
39
|
+
Dynamic: author-email
|
|
40
|
+
Dynamic: classifier
|
|
41
|
+
Dynamic: description
|
|
42
|
+
Dynamic: description-content-type
|
|
43
|
+
Dynamic: home-page
|
|
44
|
+
Dynamic: keywords
|
|
45
|
+
Dynamic: provides-extra
|
|
46
|
+
Dynamic: requires-dist
|
|
47
|
+
Dynamic: requires-python
|
|
48
|
+
Dynamic: summary
|
|
49
|
+
|
|
50
|
+
# nirixa
|
|
51
|
+
|
|
52
|
+
**AI Observability & Cost Intelligence** — track token costs, latency, and hallucination risk for every LLM call.
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install nirixa
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Quick Start
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
from nirixa import NirixaClient
|
|
62
|
+
import openai
|
|
63
|
+
|
|
64
|
+
client = NirixaClient(api_key="nirixa-your-key")
|
|
65
|
+
|
|
66
|
+
# Wrap any LLM call — zero changes to your existing code
|
|
67
|
+
response = client.track(
|
|
68
|
+
feature="/api/chat",
|
|
69
|
+
fn=lambda: openai.chat.completions.create(
|
|
70
|
+
model="gpt-4o",
|
|
71
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# response is the original OpenAI response — unchanged
|
|
76
|
+
print(response.choices[0].message.content)
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Module-level API
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
import nirixa
|
|
83
|
+
import openai
|
|
84
|
+
|
|
85
|
+
nirixa.init(api_key="nirixa-your-key")
|
|
86
|
+
|
|
87
|
+
response = nirixa.track(
|
|
88
|
+
feature="/api/summarize",
|
|
89
|
+
fn=lambda: openai.chat.completions.create(...)
|
|
90
|
+
)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Auto-patch (track everything automatically)
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from nirixa import NirixaClient
|
|
97
|
+
from nirixa.middleware import patch_openai
|
|
98
|
+
|
|
99
|
+
client = NirixaClient(api_key="nirixa-your-key")
|
|
100
|
+
patch_openai(client, feature="/api/chat")
|
|
101
|
+
|
|
102
|
+
# All openai calls now tracked automatically — no changes needed
|
|
103
|
+
import openai
|
|
104
|
+
openai.chat.completions.create(...)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Anthropic Support
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
from nirixa import NirixaClient
|
|
111
|
+
import anthropic
|
|
112
|
+
|
|
113
|
+
client = NirixaClient(api_key="nirixa-your-key")
|
|
114
|
+
claude = anthropic.Anthropic()
|
|
115
|
+
|
|
116
|
+
response = client.track(
|
|
117
|
+
feature="/api/analyze",
|
|
118
|
+
model="claude-3-5-sonnet-20241022",
|
|
119
|
+
fn=lambda: claude.messages.create(
|
|
120
|
+
model="claude-3-5-sonnet-20241022",
|
|
121
|
+
max_tokens=1024,
|
|
122
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## What gets tracked
|
|
128
|
+
|
|
129
|
+
| Metric | Description |
|
|
130
|
+
|--------|-------------|
|
|
131
|
+
| Token cost | Per-call USD cost by feature and model |
|
|
132
|
+
| Latency | p50/p95/p99 response times |
|
|
133
|
+
| Hallucination risk | LOW / MEDIUM / HIGH scoring |
|
|
134
|
+
| Prompt drift | Output variance over time |
|
|
135
|
+
| Error rate | Failed calls by endpoint |
|
|
136
|
+
|
|
137
|
+
## Dashboard
|
|
138
|
+
|
|
139
|
+
View all your data at [nirixa.in](https://nirixa.in)
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
निरीक्षा — Observe everything.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
nirixa/__init__.py
|
|
4
|
+
nirixa/client.py
|
|
5
|
+
nirixa/middleware.py
|
|
6
|
+
nirixa/scorer.py
|
|
7
|
+
nirixa.egg-info/PKG-INFO
|
|
8
|
+
nirixa.egg-info/SOURCES.txt
|
|
9
|
+
nirixa.egg-info/dependency_links.txt
|
|
10
|
+
nirixa.egg-info/requires.txt
|
|
11
|
+
nirixa.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
requests>=2.28.0
|
|
2
|
+
|
|
3
|
+
[all]
|
|
4
|
+
openai>=1.0.0
|
|
5
|
+
anthropic>=0.20.0
|
|
6
|
+
groq>=0.4.0
|
|
7
|
+
google-generativeai>=0.5.0
|
|
8
|
+
mistralai>=1.0.0
|
|
9
|
+
together>=1.0.0
|
|
10
|
+
ollama>=0.1.0
|
|
11
|
+
|
|
12
|
+
[anthropic]
|
|
13
|
+
anthropic>=0.20.0
|
|
14
|
+
|
|
15
|
+
[gemini]
|
|
16
|
+
google-generativeai>=0.5.0
|
|
17
|
+
|
|
18
|
+
[groq]
|
|
19
|
+
groq>=0.4.0
|
|
20
|
+
|
|
21
|
+
[mistral]
|
|
22
|
+
mistralai>=1.0.0
|
|
23
|
+
|
|
24
|
+
[ollama]
|
|
25
|
+
ollama>=0.1.0
|
|
26
|
+
|
|
27
|
+
[openai]
|
|
28
|
+
openai>=1.0.0
|
|
29
|
+
|
|
30
|
+
[together]
|
|
31
|
+
together>=1.0.0
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
nirixa
|
nirixa-0.1.0/setup.cfg
ADDED
nirixa-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from setuptools import setup, find_packages
|
|
2
|
+
|
|
3
|
+
setup(
|
|
4
|
+
name="nirixa",
|
|
5
|
+
version="0.1.0",
|
|
6
|
+
description="AI Observability & Cost Intelligence — track token costs, latency, and hallucination risk",
|
|
7
|
+
long_description=open("README.md").read(),
|
|
8
|
+
long_description_content_type="text/markdown",
|
|
9
|
+
author="Nirixa",
|
|
10
|
+
author_email="nirixaai@gmail.com",
|
|
11
|
+
url="https://nirixa.in",
|
|
12
|
+
packages=find_packages(),
|
|
13
|
+
python_requires=">=3.8",
|
|
14
|
+
install_requires=[
|
|
15
|
+
"requests>=2.28.0",
|
|
16
|
+
],
|
|
17
|
+
extras_require={
|
|
18
|
+
"openai": ["openai>=1.0.0"],
|
|
19
|
+
"anthropic": ["anthropic>=0.20.0"],
|
|
20
|
+
"groq": ["groq>=0.4.0"],
|
|
21
|
+
"gemini": ["google-generativeai>=0.5.0"],
|
|
22
|
+
"mistral": ["mistralai>=1.0.0"],
|
|
23
|
+
"together": ["together>=1.0.0"],
|
|
24
|
+
"ollama": ["ollama>=0.1.0"],
|
|
25
|
+
"all": [
|
|
26
|
+
"openai>=1.0.0",
|
|
27
|
+
"anthropic>=0.20.0",
|
|
28
|
+
"groq>=0.4.0",
|
|
29
|
+
"google-generativeai>=0.5.0",
|
|
30
|
+
"mistralai>=1.0.0",
|
|
31
|
+
"together>=1.0.0",
|
|
32
|
+
"ollama>=0.1.0",
|
|
33
|
+
],
|
|
34
|
+
},
|
|
35
|
+
classifiers=[
|
|
36
|
+
"Development Status :: 3 - Alpha",
|
|
37
|
+
"Intended Audience :: Developers",
|
|
38
|
+
"License :: OSI Approved :: MIT License",
|
|
39
|
+
"Programming Language :: Python :: 3",
|
|
40
|
+
],
|
|
41
|
+
keywords="llm observability openai anthropic groq gemini mistral ollama cost monitoring hallucination",
|
|
42
|
+
)
|