kompact 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kompact/__init__.py +3 -0
- kompact/__main__.py +73 -0
- kompact/cache/__init__.py +0 -0
- kompact/cache/store.py +224 -0
- kompact/config.py +110 -0
- kompact/metrics/__init__.py +0 -0
- kompact/metrics/tracker.py +138 -0
- kompact/parser/__init__.py +0 -0
- kompact/parser/messages.py +311 -0
- kompact/proxy/__init__.py +0 -0
- kompact/proxy/server.py +290 -0
- kompact/transforms/__init__.py +0 -0
- kompact/transforms/cache_aligner.py +125 -0
- kompact/transforms/code_compressor.py +204 -0
- kompact/transforms/content_compressor.py +255 -0
- kompact/transforms/json_crusher.py +200 -0
- kompact/transforms/log_compressor.py +155 -0
- kompact/transforms/observation_masker.py +100 -0
- kompact/transforms/pipeline.py +139 -0
- kompact/transforms/schema_optimizer.py +200 -0
- kompact/transforms/toon.py +482 -0
- kompact/types.py +119 -0
- kompact-0.1.0.dist-info/METADATA +170 -0
- kompact-0.1.0.dist-info/RECORD +27 -0
- kompact-0.1.0.dist-info/WHEEL +4 -0
- kompact-0.1.0.dist-info/entry_points.txt +2 -0
- kompact-0.1.0.dist-info/licenses/LICENSE +21 -0
kompact/__init__.py
ADDED
kompact/__main__.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""CLI entry point for Kompact."""
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@click.group()
|
|
7
|
+
def cli():
|
|
8
|
+
"""Kompact — Multi-layer context optimization proxy for LLM agents."""
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@cli.command()
|
|
13
|
+
@click.option("--port", default=7878, help="Port to listen on")
|
|
14
|
+
@click.option("--host", default="0.0.0.0", help="Host to bind to")
|
|
15
|
+
@click.option("--verbose", is_flag=True, help="Enable verbose logging")
|
|
16
|
+
@click.option(
|
|
17
|
+
"--disable",
|
|
18
|
+
multiple=True,
|
|
19
|
+
help="Disable a transform (e.g. --disable toon --disable log_compressor)",
|
|
20
|
+
)
|
|
21
|
+
@click.option(
|
|
22
|
+
"--anthropic-base-url",
|
|
23
|
+
default="https://api.anthropic.com",
|
|
24
|
+
help="Upstream Anthropic API URL",
|
|
25
|
+
)
|
|
26
|
+
@click.option(
|
|
27
|
+
"--openai-base-url",
|
|
28
|
+
default="https://api.openai.com",
|
|
29
|
+
help="Upstream OpenAI API URL",
|
|
30
|
+
)
|
|
31
|
+
def proxy(
|
|
32
|
+
port: int,
|
|
33
|
+
host: str,
|
|
34
|
+
verbose: bool,
|
|
35
|
+
disable: tuple[str, ...],
|
|
36
|
+
anthropic_base_url: str,
|
|
37
|
+
openai_base_url: str,
|
|
38
|
+
):
|
|
39
|
+
"""Start the Kompact optimization proxy."""
|
|
40
|
+
import uvicorn
|
|
41
|
+
|
|
42
|
+
from kompact.config import KompactConfig
|
|
43
|
+
from kompact.proxy.server import create_app
|
|
44
|
+
|
|
45
|
+
config = KompactConfig(
|
|
46
|
+
host=host,
|
|
47
|
+
port=port,
|
|
48
|
+
verbose=verbose,
|
|
49
|
+
anthropic_base_url=anthropic_base_url,
|
|
50
|
+
openai_base_url=openai_base_url,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Disable requested transforms
|
|
54
|
+
for name in disable:
|
|
55
|
+
transform_config = getattr(config, name, None)
|
|
56
|
+
if transform_config and hasattr(transform_config, "enabled"):
|
|
57
|
+
transform_config.enabled = False
|
|
58
|
+
else:
|
|
59
|
+
click.echo(f"Warning: Unknown transform '{name}'", err=True)
|
|
60
|
+
|
|
61
|
+
app = create_app(config)
|
|
62
|
+
|
|
63
|
+
click.echo(f"Kompact proxy starting on {host}:{port}")
|
|
64
|
+
click.echo(f" Anthropic upstream: {anthropic_base_url}")
|
|
65
|
+
click.echo(f" OpenAI upstream: {openai_base_url}")
|
|
66
|
+
click.echo(f" Disabled transforms: {', '.join(disable) or 'none'}")
|
|
67
|
+
click.echo(f" Dashboard: http://{host}:{port}/dashboard")
|
|
68
|
+
|
|
69
|
+
uvicorn.run(app, host=host, port=port, log_level="info" if verbose else "warning")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
if __name__ == "__main__":
|
|
73
|
+
cli()
|
|
File without changes
|
kompact/cache/store.py
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""Compression store for CCR-style content retrieval.
|
|
2
|
+
|
|
3
|
+
Stores original content replaced by compression markers, allowing the LLM
|
|
4
|
+
to request full content back when needed.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Statistical summary in markers (not just token count)
|
|
8
|
+
- Adaptive TTL based on access frequency
|
|
9
|
+
- In-memory with bounded size (LRU eviction)
|
|
10
|
+
- Artifact index tracking what was compressed (kind, key, summary, turn)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import hashlib
|
|
16
|
+
import time
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class ArtifactEntry:
|
|
23
|
+
"""A tracked artifact that was compressed/masked."""
|
|
24
|
+
|
|
25
|
+
kind: str # "tool_result", "file", "search", "log", "code"
|
|
26
|
+
key: str # identifier (tool name, file path, etc.)
|
|
27
|
+
summary: str # brief description of what was stored
|
|
28
|
+
turn_id: int # conversation turn where this appeared
|
|
29
|
+
store_key: str = "" # key in the compression store for retrieval
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ArtifactIndex:
|
|
34
|
+
"""Persistent index of compressed artifacts for re-fetching."""
|
|
35
|
+
|
|
36
|
+
entries: list[ArtifactEntry] = field(default_factory=list)
|
|
37
|
+
|
|
38
|
+
def add(
|
|
39
|
+
self,
|
|
40
|
+
kind: str,
|
|
41
|
+
key: str,
|
|
42
|
+
summary: str,
|
|
43
|
+
turn_id: int,
|
|
44
|
+
store_key: str = "",
|
|
45
|
+
) -> None:
|
|
46
|
+
self.entries.append(ArtifactEntry(
|
|
47
|
+
kind=kind, key=key, summary=summary, turn_id=turn_id, store_key=store_key,
|
|
48
|
+
))
|
|
49
|
+
|
|
50
|
+
def get_by_kind(self, kind: str) -> list[ArtifactEntry]:
|
|
51
|
+
return [e for e in self.entries if e.kind == kind]
|
|
52
|
+
|
|
53
|
+
def to_text(self) -> str:
|
|
54
|
+
"""Render as a compact text block for inclusion in context."""
|
|
55
|
+
if not self.entries:
|
|
56
|
+
return ""
|
|
57
|
+
lines = ["[Artifact Index]"]
|
|
58
|
+
by_kind: dict[str, list[ArtifactEntry]] = {}
|
|
59
|
+
for e in self.entries:
|
|
60
|
+
by_kind.setdefault(e.kind, []).append(e)
|
|
61
|
+
for kind, items in by_kind.items():
|
|
62
|
+
lines.append(f" {kind}:")
|
|
63
|
+
for item in items:
|
|
64
|
+
lines.append(f" - {item.key}: {item.summary} (turn {item.turn_id})")
|
|
65
|
+
return "\n".join(lines)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class StoreEntry:
|
|
70
|
+
content: str
|
|
71
|
+
metadata: dict[str, Any]
|
|
72
|
+
created_at: float
|
|
73
|
+
ttl_seconds: float
|
|
74
|
+
access_count: int = 0
|
|
75
|
+
last_accessed: float = 0.0
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def is_expired(self) -> bool:
|
|
79
|
+
return time.time() > self.created_at + self.ttl_seconds
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass
|
|
83
|
+
class CompressionStore:
|
|
84
|
+
"""In-memory store for compressed content retrieval."""
|
|
85
|
+
|
|
86
|
+
max_entries: int = 10000
|
|
87
|
+
default_ttl_seconds: float = 3600
|
|
88
|
+
adaptive_ttl: bool = True
|
|
89
|
+
_entries: dict[str, StoreEntry] = field(default_factory=dict)
|
|
90
|
+
_stats: dict[str, int] = field(default_factory=lambda: {
|
|
91
|
+
"puts": 0,
|
|
92
|
+
"gets": 0,
|
|
93
|
+
"hits": 0,
|
|
94
|
+
"misses": 0,
|
|
95
|
+
"evictions": 0,
|
|
96
|
+
})
|
|
97
|
+
artifact_index: ArtifactIndex = field(default_factory=ArtifactIndex)
|
|
98
|
+
|
|
99
|
+
def put(
|
|
100
|
+
self,
|
|
101
|
+
key: str,
|
|
102
|
+
content: str,
|
|
103
|
+
metadata: dict[str, Any] | None = None,
|
|
104
|
+
ttl_seconds: float | None = None,
|
|
105
|
+
) -> str:
|
|
106
|
+
"""Store content and return a retrieval key."""
|
|
107
|
+
# Generate stable key from content hash if not provided
|
|
108
|
+
store_key = self._make_key(key, content)
|
|
109
|
+
|
|
110
|
+
# Evict if at capacity
|
|
111
|
+
if len(self._entries) >= self.max_entries:
|
|
112
|
+
self._evict()
|
|
113
|
+
|
|
114
|
+
self._entries[store_key] = StoreEntry(
|
|
115
|
+
content=content,
|
|
116
|
+
metadata=metadata or {},
|
|
117
|
+
created_at=time.time(),
|
|
118
|
+
ttl_seconds=ttl_seconds or self.default_ttl_seconds,
|
|
119
|
+
last_accessed=time.time(),
|
|
120
|
+
)
|
|
121
|
+
self._stats["puts"] += 1
|
|
122
|
+
return store_key
|
|
123
|
+
|
|
124
|
+
def track(
|
|
125
|
+
self,
|
|
126
|
+
kind: str,
|
|
127
|
+
key: str,
|
|
128
|
+
content: str,
|
|
129
|
+
turn_id: int = 0,
|
|
130
|
+
metadata: dict[str, Any] | None = None,
|
|
131
|
+
) -> str:
|
|
132
|
+
"""Store content and record it in the artifact index."""
|
|
133
|
+
summary = content[:80].split("\n")[0] if content else ""
|
|
134
|
+
store_key = self.put(key, content, metadata)
|
|
135
|
+
self.artifact_index.add(
|
|
136
|
+
kind=kind, key=key, summary=summary,
|
|
137
|
+
turn_id=turn_id, store_key=store_key,
|
|
138
|
+
)
|
|
139
|
+
return store_key
|
|
140
|
+
|
|
141
|
+
def get(self, key: str) -> str | None:
|
|
142
|
+
"""Retrieve stored content by key."""
|
|
143
|
+
self._stats["gets"] += 1
|
|
144
|
+
|
|
145
|
+
entry = self._entries.get(key)
|
|
146
|
+
if entry is None:
|
|
147
|
+
self._stats["misses"] += 1
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
if entry.is_expired:
|
|
151
|
+
del self._entries[key]
|
|
152
|
+
self._stats["misses"] += 1
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
entry.access_count += 1
|
|
156
|
+
entry.last_accessed = time.time()
|
|
157
|
+
|
|
158
|
+
# Extend TTL on access if adaptive
|
|
159
|
+
if self.adaptive_ttl:
|
|
160
|
+
entry.ttl_seconds = min(
|
|
161
|
+
entry.ttl_seconds * 1.5,
|
|
162
|
+
self.default_ttl_seconds * 4,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
self._stats["hits"] += 1
|
|
166
|
+
return entry.content
|
|
167
|
+
|
|
168
|
+
def get_metadata(self, key: str) -> dict[str, Any] | None:
|
|
169
|
+
"""Get metadata for a stored entry."""
|
|
170
|
+
entry = self._entries.get(key)
|
|
171
|
+
if entry is None or entry.is_expired:
|
|
172
|
+
return None
|
|
173
|
+
return entry.metadata
|
|
174
|
+
|
|
175
|
+
def summary(self, key: str, max_length: int = 200) -> str | None:
|
|
176
|
+
"""Get a brief summary of stored content."""
|
|
177
|
+
content = self.get(key)
|
|
178
|
+
if content is None:
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
if len(content) <= max_length:
|
|
182
|
+
return content
|
|
183
|
+
|
|
184
|
+
# Return first and last portions
|
|
185
|
+
half = max_length // 2
|
|
186
|
+
return f"{content[:half]}...[{len(content)} chars total]...{content[-half:]}"
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def stats(self) -> dict[str, Any]:
|
|
190
|
+
return {
|
|
191
|
+
**self._stats,
|
|
192
|
+
"entries": len(self._entries),
|
|
193
|
+
"hit_rate": (
|
|
194
|
+
self._stats["hits"] / self._stats["gets"]
|
|
195
|
+
if self._stats["gets"] > 0
|
|
196
|
+
else 0.0
|
|
197
|
+
),
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
def clear(self) -> None:
|
|
201
|
+
"""Clear all entries."""
|
|
202
|
+
self._entries.clear()
|
|
203
|
+
|
|
204
|
+
def _make_key(self, key: str, content: str) -> str:
|
|
205
|
+
"""Generate a store key."""
|
|
206
|
+
content_hash = hashlib.sha256(content.encode()).hexdigest()[:12]
|
|
207
|
+
return f"{key}:{content_hash}"
|
|
208
|
+
|
|
209
|
+
def _evict(self) -> None:
|
|
210
|
+
"""Evict least recently accessed entries."""
|
|
211
|
+
# Remove expired first
|
|
212
|
+
expired = [k for k, v in self._entries.items() if v.is_expired]
|
|
213
|
+
for k in expired:
|
|
214
|
+
del self._entries[k]
|
|
215
|
+
self._stats["evictions"] += 1
|
|
216
|
+
|
|
217
|
+
# If still over capacity, remove LRU
|
|
218
|
+
while len(self._entries) >= self.max_entries:
|
|
219
|
+
lru_key = min(
|
|
220
|
+
self._entries,
|
|
221
|
+
key=lambda k: self._entries[k].last_accessed,
|
|
222
|
+
)
|
|
223
|
+
del self._entries[lru_key]
|
|
224
|
+
self._stats["evictions"] += 1
|
kompact/config.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Configuration for Kompact proxy and transforms."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class ToonConfig:
|
|
10
|
+
enabled: bool = True
|
|
11
|
+
min_array_length: int = 2
|
|
12
|
+
separator: str = " | "
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ObservationMaskerConfig:
|
|
17
|
+
enabled: bool = True
|
|
18
|
+
keep_last_n: int = 3
|
|
19
|
+
include_summary: bool = True
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class CacheAlignerConfig:
|
|
24
|
+
enabled: bool = True
|
|
25
|
+
normalize_uuids: bool = True
|
|
26
|
+
normalize_timestamps: bool = True
|
|
27
|
+
normalize_paths: bool = True
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class JsonCrusherConfig:
|
|
32
|
+
enabled: bool = True
|
|
33
|
+
min_array_length: int = 3
|
|
34
|
+
constant_threshold: float = 1.0 # fraction of items that must match
|
|
35
|
+
low_cardinality_threshold: int = 5
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class SchemaOptimizerConfig:
|
|
40
|
+
enabled: bool = False # Requires embedding model, off by default
|
|
41
|
+
max_tools: int = 10
|
|
42
|
+
min_relevance_score: float = 0.3
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class CodeCompressorConfig:
|
|
47
|
+
enabled: bool = True
|
|
48
|
+
keep_signatures: bool = True
|
|
49
|
+
keep_imports: bool = True
|
|
50
|
+
keep_docstrings: bool = True
|
|
51
|
+
keep_type_annotations: bool = True
|
|
52
|
+
max_body_lines: int = 0 # 0 = remove all bodies
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class LogCompressorConfig:
|
|
57
|
+
enabled: bool = True
|
|
58
|
+
dedup_threshold: int = 3 # Min consecutive similar lines to compress
|
|
59
|
+
keep_first_last: bool = True
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass
|
|
63
|
+
class ContentCompressorConfig:
|
|
64
|
+
enabled: bool = True
|
|
65
|
+
target_ratio: float = 0.5 # Keep 50% of tokens
|
|
66
|
+
min_tokens_to_compress: int = 200 # Only compress blocks > this
|
|
67
|
+
entity_boost: float = 1.5
|
|
68
|
+
position_boost: float = 1.2
|
|
69
|
+
protect_recent_user_messages: int = 1
|
|
70
|
+
protect_code_blocks: bool = True
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class StoreConfig:
|
|
75
|
+
max_entries: int = 10000
|
|
76
|
+
default_ttl_seconds: int = 3600
|
|
77
|
+
adaptive_ttl: bool = True
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class KompactConfig:
|
|
82
|
+
"""Top-level configuration."""
|
|
83
|
+
|
|
84
|
+
host: str = "0.0.0.0"
|
|
85
|
+
port: int = 7878
|
|
86
|
+
anthropic_base_url: str = "https://api.anthropic.com"
|
|
87
|
+
openai_base_url: str = "https://api.openai.com"
|
|
88
|
+
verbose: bool = False
|
|
89
|
+
|
|
90
|
+
toon: ToonConfig = field(default_factory=ToonConfig)
|
|
91
|
+
observation_masker: ObservationMaskerConfig = field(default_factory=ObservationMaskerConfig)
|
|
92
|
+
cache_aligner: CacheAlignerConfig = field(default_factory=CacheAlignerConfig)
|
|
93
|
+
json_crusher: JsonCrusherConfig = field(default_factory=JsonCrusherConfig)
|
|
94
|
+
schema_optimizer: SchemaOptimizerConfig = field(default_factory=SchemaOptimizerConfig)
|
|
95
|
+
code_compressor: CodeCompressorConfig = field(default_factory=CodeCompressorConfig)
|
|
96
|
+
log_compressor: LogCompressorConfig = field(default_factory=LogCompressorConfig)
|
|
97
|
+
content_compressor: ContentCompressorConfig = field(default_factory=ContentCompressorConfig)
|
|
98
|
+
store: StoreConfig = field(default_factory=StoreConfig)
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def disabled_transforms(self) -> set[str]:
|
|
102
|
+
disabled = set()
|
|
103
|
+
for name in [
|
|
104
|
+
"toon", "observation_masker", "cache_aligner",
|
|
105
|
+
"json_crusher", "schema_optimizer", "code_compressor",
|
|
106
|
+
"log_compressor", "content_compressor",
|
|
107
|
+
]:
|
|
108
|
+
if not getattr(getattr(self, name), "enabled"):
|
|
109
|
+
disabled.add(name)
|
|
110
|
+
return disabled
|
|
File without changes
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
"""Metrics tracking for Kompact proxy.
|
|
2
|
+
|
|
3
|
+
Tracks per-request and cumulative metrics:
|
|
4
|
+
- Tokens before/after per transform
|
|
5
|
+
- Compression ratio
|
|
6
|
+
- Transform latencies
|
|
7
|
+
- Request counts
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import time
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from kompact.types import PipelineResult
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class RequestMetrics:
|
|
21
|
+
"""Metrics for a single request."""
|
|
22
|
+
|
|
23
|
+
timestamp: float
|
|
24
|
+
tokens_before: int
|
|
25
|
+
tokens_after: int
|
|
26
|
+
tokens_saved: int
|
|
27
|
+
compression_ratio: float
|
|
28
|
+
transform_details: list[dict[str, Any]]
|
|
29
|
+
latency_ms: float
|
|
30
|
+
model: str = ""
|
|
31
|
+
provider: str = ""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class MetricsTracker:
|
|
36
|
+
"""Tracks compression metrics across requests."""
|
|
37
|
+
|
|
38
|
+
_requests: list[RequestMetrics] = field(default_factory=list)
|
|
39
|
+
_max_history: int = 10000
|
|
40
|
+
|
|
41
|
+
def record(
|
|
42
|
+
self,
|
|
43
|
+
pipeline_result: PipelineResult,
|
|
44
|
+
tokens_before: int,
|
|
45
|
+
latency_ms: float,
|
|
46
|
+
) -> RequestMetrics:
|
|
47
|
+
"""Record metrics from a pipeline run."""
|
|
48
|
+
tokens_after = tokens_before - pipeline_result.total_tokens_saved
|
|
49
|
+
|
|
50
|
+
metrics = RequestMetrics(
|
|
51
|
+
timestamp=time.time(),
|
|
52
|
+
tokens_before=tokens_before,
|
|
53
|
+
tokens_after=max(0, tokens_after),
|
|
54
|
+
tokens_saved=pipeline_result.total_tokens_saved,
|
|
55
|
+
compression_ratio=(
|
|
56
|
+
tokens_after / tokens_before if tokens_before > 0 else 1.0
|
|
57
|
+
),
|
|
58
|
+
transform_details=[
|
|
59
|
+
{
|
|
60
|
+
"name": r.transform_name,
|
|
61
|
+
"tokens_saved": r.tokens_saved,
|
|
62
|
+
**r.details,
|
|
63
|
+
}
|
|
64
|
+
for r in pipeline_result.transform_results
|
|
65
|
+
],
|
|
66
|
+
latency_ms=latency_ms,
|
|
67
|
+
model=pipeline_result.request.model,
|
|
68
|
+
provider=pipeline_result.request.provider.value,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
self._requests.append(metrics)
|
|
72
|
+
if len(self._requests) > self._max_history:
|
|
73
|
+
self._requests = self._requests[-self._max_history:]
|
|
74
|
+
|
|
75
|
+
return metrics
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def summary(self) -> dict[str, Any]:
|
|
79
|
+
"""Get cumulative summary metrics."""
|
|
80
|
+
if not self._requests:
|
|
81
|
+
return {
|
|
82
|
+
"total_requests": 0,
|
|
83
|
+
"total_tokens_saved": 0,
|
|
84
|
+
"total_tokens_processed": 0,
|
|
85
|
+
"average_compression_ratio": 1.0,
|
|
86
|
+
"average_latency_ms": 0.0,
|
|
87
|
+
"transforms": {},
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
total_saved = sum(r.tokens_saved for r in self._requests)
|
|
91
|
+
total_before = sum(r.tokens_before for r in self._requests)
|
|
92
|
+
total_after = sum(r.tokens_after for r in self._requests)
|
|
93
|
+
|
|
94
|
+
# Per-transform breakdown
|
|
95
|
+
transform_stats: dict[str, dict[str, Any]] = {}
|
|
96
|
+
for req in self._requests:
|
|
97
|
+
for td in req.transform_details:
|
|
98
|
+
name = td["name"]
|
|
99
|
+
if name not in transform_stats:
|
|
100
|
+
transform_stats[name] = {"tokens_saved": 0, "invocations": 0}
|
|
101
|
+
transform_stats[name]["tokens_saved"] += td.get("tokens_saved", 0)
|
|
102
|
+
transform_stats[name]["invocations"] += 1
|
|
103
|
+
|
|
104
|
+
return {
|
|
105
|
+
"total_requests": len(self._requests),
|
|
106
|
+
"total_tokens_saved": total_saved,
|
|
107
|
+
"total_tokens_processed": total_before,
|
|
108
|
+
"total_tokens_output": total_after,
|
|
109
|
+
"average_compression_ratio": (
|
|
110
|
+
total_after / total_before if total_before > 0 else 1.0
|
|
111
|
+
),
|
|
112
|
+
"average_latency_ms": (
|
|
113
|
+
sum(r.latency_ms for r in self._requests) / len(self._requests)
|
|
114
|
+
),
|
|
115
|
+
"transforms": transform_stats,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
@property
|
|
119
|
+
def recent(self) -> list[dict[str, Any]]:
|
|
120
|
+
"""Get the last 20 request metrics."""
|
|
121
|
+
return [
|
|
122
|
+
{
|
|
123
|
+
"timestamp": r.timestamp,
|
|
124
|
+
"tokens_before": r.tokens_before,
|
|
125
|
+
"tokens_after": r.tokens_after,
|
|
126
|
+
"tokens_saved": r.tokens_saved,
|
|
127
|
+
"compression_ratio": round(r.compression_ratio, 3),
|
|
128
|
+
"latency_ms": round(r.latency_ms, 1),
|
|
129
|
+
"model": r.model,
|
|
130
|
+
"provider": r.provider,
|
|
131
|
+
"transforms": r.transform_details,
|
|
132
|
+
}
|
|
133
|
+
for r in self._requests[-20:]
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
def reset(self) -> None:
|
|
137
|
+
"""Clear all recorded metrics."""
|
|
138
|
+
self._requests.clear()
|
|
File without changes
|