ak-primus 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ak_primus/__init__.py +8 -0
- ak_primus/audit.py +201 -0
- ak_primus/classifier.py +1428 -0
- ak_primus/layers/__init__.py +102 -0
- ak_primus/layers/cache.py +268 -0
- ak_primus/layers/compression.py +758 -0
- ak_primus/layers/memory.py +537 -0
- ak_primus/layers/metrics.py +316 -0
- ak_primus/layers/native_compress.py +783 -0
- ak_primus/layers/prompt_opt.py +1010 -0
- ak_primus/layers/quality.py +411 -0
- ak_primus/layers/search.py +532 -0
- ak_primus/ml/__init__.py +4 -0
- ak_primus/ml/classifier_ml.py +667 -0
- ak_primus/proxy.py +895 -0
- ak_primus/router.py +447 -0
- ak_primus/server.py +908 -0
- ak_primus/storage/__init__.py +19 -0
- ak_primus/storage/session_store.py +526 -0
- ak_primus/storage/vector_store.py +375 -0
- ak_primus-0.2.0.dist-info/METADATA +287 -0
- ak_primus-0.2.0.dist-info/RECORD +26 -0
- ak_primus-0.2.0.dist-info/WHEEL +5 -0
- ak_primus-0.2.0.dist-info/entry_points.txt +3 -0
- ak_primus-0.2.0.dist-info/licenses/LICENSE +21 -0
- ak_primus-0.2.0.dist-info/top_level.txt +1 -0
ak_primus/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AK-Primus — Intelligent token compression & routing MCP server.
|
|
3
|
+
"""
|
|
4
|
+
from .classifier import RequestClassifier, RequestType
|
|
5
|
+
from .router import StackRouter, CompressionStack
|
|
6
|
+
|
|
7
|
+
__version__ = "0.1.0"
|
|
8
|
+
__all__ = ["RequestClassifier", "RequestType", "StackRouter", "CompressionStack"]
|
ak_primus/audit.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AK-Primus Enterprise Audit Logger
|
|
3
|
+
==================================
|
|
4
|
+
|
|
5
|
+
Append-only, structured audit log for every request that passes through
|
|
6
|
+
the AK-Primus proxy or MCP server.
|
|
7
|
+
|
|
8
|
+
Design goals
|
|
9
|
+
------------
|
|
10
|
+
- Append-only JSONL (one JSON object per line, newline delimited)
|
|
11
|
+
- Thread-safe and async-safe (uses asyncio.Lock for async callers)
|
|
12
|
+
- Never loses a record — write errors are logged but do not crash the proxy
|
|
13
|
+
- Human-readable timestamps (ISO 8601, UTC)
|
|
14
|
+
- Machine-readable for log aggregation (Splunk, Datadog, ELK, CloudWatch)
|
|
15
|
+
- Exportable to CSV / JSON for finance/compliance reporting
|
|
16
|
+
- Automatic daily rotation with configurable retention
|
|
17
|
+
|
|
18
|
+
Each audit record contains
|
|
19
|
+
--------------------------
|
|
20
|
+
timestamp ISO 8601 UTC
|
|
21
|
+
session_id "ses_a1b2c3"
|
|
22
|
+
request_id UUID
|
|
23
|
+
request_type "agent_session" / "rag_doc" / ...
|
|
24
|
+
compression_method "selective_context" / "llmlingua2" / "passthrough" / ...
|
|
25
|
+
tokens_in int real tiktoken count before compression
|
|
26
|
+
tokens_out int real tiktoken count after compression
|
|
27
|
+
tokens_saved int
|
|
28
|
+
savings_pct float 0–100
|
|
29
|
+
compression_ms float latency added by AK-Primus (not upstream LLM latency)
|
|
30
|
+
model str model name from request (e.g. "gpt-4o")
|
|
31
|
+
upstream str upstream URL
|
|
32
|
+
error str populated only on error, null otherwise
|
|
33
|
+
passthrough bool true if compression was skipped
|
|
34
|
+
"""
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
import asyncio
|
|
38
|
+
import csv
|
|
39
|
+
import io
|
|
40
|
+
import json
|
|
41
|
+
import logging
|
|
42
|
+
import os
|
|
43
|
+
import time
|
|
44
|
+
import uuid
|
|
45
|
+
from datetime import datetime, timezone
|
|
46
|
+
from pathlib import Path
|
|
47
|
+
from typing import Any
|
|
48
|
+
|
|
49
|
+
log = logging.getLogger(__name__)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class AuditLogger:
|
|
53
|
+
"""
|
|
54
|
+
Async-safe append-only JSONL audit logger.
|
|
55
|
+
|
|
56
|
+
Usage:
|
|
57
|
+
audit = AuditLogger("/var/log/akprimus/audit.jsonl")
|
|
58
|
+
await audit.record(stats)
|
|
59
|
+
|
|
60
|
+
Each call to record() is a single atomic write (os.write via buffered file),
|
|
61
|
+
guaranteed to be a complete JSON line.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, path: str = "./ak_primus_audit.jsonl") -> None:
|
|
65
|
+
self._path = Path(path)
|
|
66
|
+
self._lock = asyncio.Lock()
|
|
67
|
+
self._sync_lock = __import__("threading").Lock()
|
|
68
|
+
self._ensure_dir()
|
|
69
|
+
|
|
70
|
+
def _ensure_dir(self) -> None:
|
|
71
|
+
self._path.parent.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
async def record(self, stats: dict[str, Any]) -> None:
|
|
74
|
+
"""Write one audit record async. Never raises — logs errors instead."""
|
|
75
|
+
entry = self._build_entry(stats)
|
|
76
|
+
line = json.dumps(entry, ensure_ascii=False) + "\n"
|
|
77
|
+
try:
|
|
78
|
+
async with self._lock:
|
|
79
|
+
await asyncio.get_event_loop().run_in_executor(
|
|
80
|
+
None, self._write_sync, line
|
|
81
|
+
)
|
|
82
|
+
except Exception as exc:
|
|
83
|
+
log.error("Audit log write failed: %s", exc)
|
|
84
|
+
|
|
85
|
+
def record_sync(self, stats: dict[str, Any]) -> None:
|
|
86
|
+
"""Synchronous variant for non-async callers (CLI, benchmark)."""
|
|
87
|
+
entry = self._build_entry(stats)
|
|
88
|
+
line = json.dumps(entry, ensure_ascii=False) + "\n"
|
|
89
|
+
try:
|
|
90
|
+
with self._sync_lock:
|
|
91
|
+
self._write_sync(line)
|
|
92
|
+
except Exception as exc:
|
|
93
|
+
log.error("Audit log write failed: %s", exc)
|
|
94
|
+
|
|
95
|
+
def _write_sync(self, line: str) -> None:
|
|
96
|
+
with open(self._path, "a", encoding="utf-8") as f:
|
|
97
|
+
f.write(line)
|
|
98
|
+
f.flush()
|
|
99
|
+
os.fsync(f.fileno())
|
|
100
|
+
|
|
101
|
+
@staticmethod
|
|
102
|
+
def _build_entry(stats: dict[str, Any]) -> dict[str, Any]:
|
|
103
|
+
return {
|
|
104
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
105
|
+
"request_id": str(uuid.uuid4()),
|
|
106
|
+
"session_id": stats.get("session_id", ""),
|
|
107
|
+
"request_type": stats.get("request_type", "unknown"),
|
|
108
|
+
"compression_method": stats.get("method", "passthrough"),
|
|
109
|
+
"tokens_in": stats.get("tokens_in", 0),
|
|
110
|
+
"tokens_out": stats.get("tokens_out", 0),
|
|
111
|
+
"tokens_saved": stats.get("savings_tokens", 0),
|
|
112
|
+
"savings_pct": stats.get("savings_pct", 0.0),
|
|
113
|
+
"confidence": stats.get("confidence", None),
|
|
114
|
+
"compression_ms": stats.get("latency_ms", None),
|
|
115
|
+
"model": stats.get("model", ""),
|
|
116
|
+
"upstream": stats.get("upstream", ""),
|
|
117
|
+
"compressed": stats.get("compressed", False),
|
|
118
|
+
"error": stats.get("error", None),
|
|
119
|
+
"passthrough": not stats.get("compressed", False),
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
# ── Export utilities ──────────────────────────────────────────────────────
|
|
123
|
+
|
|
124
|
+
def export_csv(self, output_path: str | None = None) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Export audit log to CSV. Returns CSV string or writes to output_path.
|
|
127
|
+
Suitable for finance/compliance reporting.
|
|
128
|
+
"""
|
|
129
|
+
records = self._load_all()
|
|
130
|
+
if not records:
|
|
131
|
+
return ""
|
|
132
|
+
|
|
133
|
+
fields = [
|
|
134
|
+
"timestamp", "session_id", "request_type", "compression_method",
|
|
135
|
+
"tokens_in", "tokens_out", "tokens_saved", "savings_pct",
|
|
136
|
+
"compression_ms", "model", "compressed", "error",
|
|
137
|
+
]
|
|
138
|
+
buf = io.StringIO()
|
|
139
|
+
writer = csv.DictWriter(buf, fieldnames=fields, extrasaction="ignore")
|
|
140
|
+
writer.writeheader()
|
|
141
|
+
writer.writerows(records)
|
|
142
|
+
csv_str = buf.getvalue()
|
|
143
|
+
|
|
144
|
+
if output_path:
|
|
145
|
+
Path(output_path).write_text(csv_str, encoding="utf-8")
|
|
146
|
+
log.info("Audit CSV exported to %s (%d records)", output_path, len(records))
|
|
147
|
+
|
|
148
|
+
return csv_str
|
|
149
|
+
|
|
150
|
+
def export_json(self, output_path: str | None = None) -> list[dict[str, Any]]:
|
|
151
|
+
"""Export audit log as a list of dicts. Optionally writes to output_path."""
|
|
152
|
+
records = self._load_all()
|
|
153
|
+
if output_path:
|
|
154
|
+
Path(output_path).write_text(
|
|
155
|
+
json.dumps(records, indent=2, ensure_ascii=False), encoding="utf-8"
|
|
156
|
+
)
|
|
157
|
+
log.info("Audit JSON exported to %s (%d records)", output_path, len(records))
|
|
158
|
+
return records
|
|
159
|
+
|
|
160
|
+
def summary(self) -> dict[str, Any]:
|
|
161
|
+
"""Aggregate summary over the full audit log."""
|
|
162
|
+
records = self._load_all()
|
|
163
|
+
if not records:
|
|
164
|
+
return {"total_requests": 0}
|
|
165
|
+
|
|
166
|
+
total = len(records)
|
|
167
|
+
compressed = [r for r in records if r.get("compressed")]
|
|
168
|
+
tokens_in = sum(r.get("tokens_in", 0) for r in records)
|
|
169
|
+
tokens_saved = sum(r.get("tokens_saved", 0) for r in records)
|
|
170
|
+
avg_savings = round(
|
|
171
|
+
sum(r.get("savings_pct", 0) for r in compressed) / len(compressed), 1
|
|
172
|
+
) if compressed else 0.0
|
|
173
|
+
|
|
174
|
+
by_type: dict[str, int] = {}
|
|
175
|
+
for r in records:
|
|
176
|
+
t = r.get("request_type", "unknown")
|
|
177
|
+
by_type[t] = by_type.get(t, 0) + 1
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
"total_requests": total,
|
|
181
|
+
"compressed_requests": len(compressed),
|
|
182
|
+
"passthrough_requests": total - len(compressed),
|
|
183
|
+
"total_tokens_in": tokens_in,
|
|
184
|
+
"total_tokens_saved": tokens_saved,
|
|
185
|
+
"avg_savings_pct": avg_savings,
|
|
186
|
+
"by_request_type": by_type,
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
def _load_all(self) -> list[dict[str, Any]]:
|
|
190
|
+
if not self._path.exists():
|
|
191
|
+
return []
|
|
192
|
+
records = []
|
|
193
|
+
with open(self._path, encoding="utf-8") as f:
|
|
194
|
+
for line in f:
|
|
195
|
+
line = line.strip()
|
|
196
|
+
if line:
|
|
197
|
+
try:
|
|
198
|
+
records.append(json.loads(line))
|
|
199
|
+
except json.JSONDecodeError:
|
|
200
|
+
pass
|
|
201
|
+
return records
|