brevitas-systems 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brevitas_systems-0.2.0/PKG-INFO +65 -0
- brevitas_systems-0.2.0/README.md +38 -0
- brevitas_systems-0.2.0/brevitas/__init__.py +71 -0
- brevitas_systems-0.2.0/brevitas/__main__.py +4 -0
- brevitas_systems-0.2.0/brevitas/_compress.py +174 -0
- brevitas_systems-0.2.0/brevitas/cli.py +107 -0
- brevitas_systems-0.2.0/brevitas/config.py +24 -0
- brevitas_systems-0.2.0/brevitas/labels.py +86 -0
- brevitas_systems-0.2.0/brevitas/proxy.py +233 -0
- brevitas_systems-0.2.0/brevitas/session.py +35 -0
- brevitas_systems-0.2.0/brevitas/wrappers/__init__.py +0 -0
- brevitas_systems-0.2.0/brevitas/wrappers/anthropic.py +86 -0
- brevitas_systems-0.2.0/brevitas/wrappers/openai.py +83 -0
- brevitas_systems-0.2.0/brevitas_systems.egg-info/PKG-INFO +65 -0
- brevitas_systems-0.2.0/brevitas_systems.egg-info/SOURCES.txt +35 -0
- brevitas_systems-0.2.0/brevitas_systems.egg-info/dependency_links.txt +1 -0
- brevitas_systems-0.2.0/brevitas_systems.egg-info/entry_points.txt +2 -0
- brevitas_systems-0.2.0/brevitas_systems.egg-info/requires.txt +23 -0
- brevitas_systems-0.2.0/brevitas_systems.egg-info/top_level.txt +2 -0
- brevitas_systems-0.2.0/pyproject.toml +38 -0
- brevitas_systems-0.2.0/setup.cfg +4 -0
- brevitas_systems-0.2.0/tests/test_phase_a_labels.py +379 -0
- brevitas_systems-0.2.0/tests/test_phase_b_aggregation.py +284 -0
- brevitas_systems-0.2.0/tests/test_phase_d_marketing_agency.py +300 -0
- brevitas_systems-0.2.0/tests/test_phase_f_supabase_mirror.py +315 -0
- brevitas_systems-0.2.0/token_efficiency_model/__init__.py +6 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/__init__.py +21 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/api_adapter.py +75 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/content_store.py +191 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/delta.py +287 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/dropin.py +255 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/engine.py +87 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/provider_cache.py +193 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/retrieval.py +321 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/rlm.py +242 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/router.py +119 -0
- brevitas_systems-0.2.0/token_efficiency_model/lossless/session_cache.py +347 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: brevitas-systems
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Lossless token savings + per-pipeline/agent tracking between your agents and the model.
|
|
5
|
+
License: Proprietary
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: httpx>=0.27.0
|
|
9
|
+
Requires-Dist: requests>=2.31.0
|
|
10
|
+
Requires-Dist: tiktoken>=0.7.0
|
|
11
|
+
Requires-Dist: numpy>=1.24.0
|
|
12
|
+
Requires-Dist: fastapi>=0.111.0
|
|
13
|
+
Requires-Dist: uvicorn[standard]>=0.29.0
|
|
14
|
+
Requires-Dist: pydantic>=2.0.0
|
|
15
|
+
Requires-Dist: click>=8.1.0
|
|
16
|
+
Requires-Dist: rich>=13.0.0
|
|
17
|
+
Provides-Extra: retrieval
|
|
18
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "retrieval"
|
|
19
|
+
Provides-Extra: anthropic
|
|
20
|
+
Requires-Dist: anthropic>=0.28.0; extra == "anthropic"
|
|
21
|
+
Provides-Extra: openai
|
|
22
|
+
Requires-Dist: openai>=1.30.0; extra == "openai"
|
|
23
|
+
Provides-Extra: all
|
|
24
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "all"
|
|
25
|
+
Requires-Dist: anthropic>=0.28.0; extra == "all"
|
|
26
|
+
Requires-Dist: openai>=1.30.0; extra == "all"
|
|
27
|
+
|
|
28
|
+
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
|
|
29
|
+
|
|
30
|
+
site: https://brevitassystems.com
|
|
31
|
+
|
|
32
|
+
## Getting Started
|
|
33
|
+
|
|
34
|
+
First, run the development server:
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
npm run dev
|
|
38
|
+
# or
|
|
39
|
+
yarn dev
|
|
40
|
+
# or
|
|
41
|
+
pnpm dev
|
|
42
|
+
# or
|
|
43
|
+
bun dev
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
|
|
47
|
+
|
|
48
|
+
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
|
|
49
|
+
|
|
50
|
+
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
|
|
51
|
+
|
|
52
|
+
## Learn More
|
|
53
|
+
|
|
54
|
+
To learn more about Next.js, take a look at the following resources:
|
|
55
|
+
|
|
56
|
+
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
|
|
57
|
+
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
|
|
58
|
+
|
|
59
|
+
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
|
|
60
|
+
|
|
61
|
+
## Deploy on Vercel
|
|
62
|
+
|
|
63
|
+
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
|
|
64
|
+
|
|
65
|
+
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
|
|
2
|
+
|
|
3
|
+
site: https://brevitassystems.com
|
|
4
|
+
|
|
5
|
+
## Getting Started
|
|
6
|
+
|
|
7
|
+
First, run the development server:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm run dev
|
|
11
|
+
# or
|
|
12
|
+
yarn dev
|
|
13
|
+
# or
|
|
14
|
+
pnpm dev
|
|
15
|
+
# or
|
|
16
|
+
bun dev
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
|
|
20
|
+
|
|
21
|
+
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
|
|
22
|
+
|
|
23
|
+
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
|
|
24
|
+
|
|
25
|
+
## Learn More
|
|
26
|
+
|
|
27
|
+
To learn more about Next.js, take a look at the following resources:
|
|
28
|
+
|
|
29
|
+
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
|
|
30
|
+
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
|
|
31
|
+
|
|
32
|
+
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
|
|
33
|
+
|
|
34
|
+
## Deploy on Vercel
|
|
35
|
+
|
|
36
|
+
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
|
|
37
|
+
|
|
38
|
+
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Brevitas — drop lossless token savings between your agents and the model.
|
|
3
|
+
|
|
4
|
+
Quick start (importable service — recommended):
|
|
5
|
+
from brevitas import BrevitasClient
|
|
6
|
+
|
|
7
|
+
client = BrevitasClient(provider="openai", api_key="sk-...")
|
|
8
|
+
response, savings = client.chat(
|
|
9
|
+
messages=[{"role": "system", "content": BRAND_PROMPT},
|
|
10
|
+
{"role": "user", "content": "Write a tweet for our oak table."}],
|
|
11
|
+
model="gpt-4o", session_id="marketing-agent",
|
|
12
|
+
)
|
|
13
|
+
print(savings.savings_pct, savings.cache_placement["strategy"])
|
|
14
|
+
|
|
15
|
+
The client auto-routes every call (cache vs retrieve), keeps the prefix byte-identical so
|
|
16
|
+
provider caching fires, learns each provider's real cache-hit rate, and reports honest
|
|
17
|
+
savings. Lossless — never drops load-bearing content; fails safe to full context.
|
|
18
|
+
|
|
19
|
+
Quick start (SDK wrapper around an existing client):
|
|
20
|
+
import anthropic, brevitas
|
|
21
|
+
client = brevitas.wrap(anthropic.Anthropic(api_key="sk-ant-..."))
|
|
22
|
+
|
|
23
|
+
# All calls are now automatically compressed
|
|
24
|
+
response = client.messages.create(
|
|
25
|
+
model="claude-sonnet-4-6",
|
|
26
|
+
max_tokens=1024,
|
|
27
|
+
messages=[{"role": "user", "content": "..."}],
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
Quick start (zero-code proxy):
|
|
31
|
+
$ brevitas start --api-key bvt_... --port 4242
|
|
32
|
+
$ export ANTHROPIC_BASE_URL=http://localhost:4242
|
|
33
|
+
# Your existing code works unchanged.
|
|
34
|
+
"""
|
|
35
|
+
from .config import configure, get as get_config
|
|
36
|
+
from .session import BrevitasSession
|
|
37
|
+
from .labels import start_run, agent, get_pipeline, get_agent, get_run_id, resolve_labels
|
|
38
|
+
from token_efficiency_model.lossless import BrevitasClient, SavingsReport, BrevitasRouter
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def wrap(client, session: BrevitasSession | None = None):
|
|
42
|
+
"""
|
|
43
|
+
Wrap an Anthropic or OpenAI client.
|
|
44
|
+
|
|
45
|
+
Returns a drop-in replacement that compresses messages before each call
|
|
46
|
+
and tracks multi-hop context within the same pipeline run.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
client: An anthropic.Anthropic or openai.OpenAI instance.
|
|
50
|
+
session: Optional existing BrevitasSession (creates a new one if omitted).
|
|
51
|
+
"""
|
|
52
|
+
# Anthropic detection: has .messages attribute with a .create method
|
|
53
|
+
if hasattr(client, "messages") and hasattr(getattr(client, "messages", None), "create"):
|
|
54
|
+
from .wrappers.anthropic import BrevitasAnthropicClient
|
|
55
|
+
return BrevitasAnthropicClient(client, session=session)
|
|
56
|
+
|
|
57
|
+
# OpenAI detection: has .chat.completions
|
|
58
|
+
if hasattr(client, "chat") and hasattr(getattr(client, "chat", None), "completions"):
|
|
59
|
+
from .wrappers.openai import BrevitasOpenAIClient
|
|
60
|
+
return BrevitasOpenAIClient(client, session=session)
|
|
61
|
+
|
|
62
|
+
raise TypeError(
|
|
63
|
+
f"brevitas.wrap() does not recognise client type {type(client).__name__!r}. "
|
|
64
|
+
"Pass an anthropic.Anthropic or openai.OpenAI instance."
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
__all__ = ["BrevitasClient", "SavingsReport", "BrevitasRouter",
|
|
69
|
+
"configure", "get_config", "wrap", "BrevitasSession",
|
|
70
|
+
"start_run", "agent", "get_pipeline", "get_agent", "get_run_id", "resolve_labels"]
|
|
71
|
+
__version__ = "0.2.0"
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Shared compression + usage-reporting logic used by both the SDK wrappers and the proxy.
|
|
3
|
+
All compression is done by calling the Brevitas REST API so every call is tracked.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from .config import get as _cfg
|
|
10
|
+
from .session import BrevitasSession
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
import tiktoken
|
|
14
|
+
_enc = tiktoken.get_encoding("cl100k_base")
|
|
15
|
+
def _count_tokens(text: str) -> int:
|
|
16
|
+
return len(_enc.encode(text, disallowed_special=()))
|
|
17
|
+
except Exception:
|
|
18
|
+
def _count_tokens(text: str) -> int:
|
|
19
|
+
return max(1, int(len(text.split()) * 1.3))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def count_messages_tokens(messages: list[dict]) -> int:
|
|
23
|
+
total = 0
|
|
24
|
+
for m in messages:
|
|
25
|
+
content = m.get("content", "")
|
|
26
|
+
if isinstance(content, str):
|
|
27
|
+
total += _count_tokens(content)
|
|
28
|
+
elif isinstance(content, list):
|
|
29
|
+
for block in content:
|
|
30
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
31
|
+
total += _count_tokens(block.get("text", ""))
|
|
32
|
+
return total
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def compress_messages(
|
|
36
|
+
messages: list[dict],
|
|
37
|
+
session: BrevitasSession,
|
|
38
|
+
task: str = "",
|
|
39
|
+
complexity: float = 0.5,
|
|
40
|
+
compression_level: int = 2,
|
|
41
|
+
prune_budget: int = 5,
|
|
42
|
+
pipeline: str = "",
|
|
43
|
+
agent: str = "",
|
|
44
|
+
run_id: str = "",
|
|
45
|
+
) -> tuple[list[dict], int, int]:
|
|
46
|
+
"""
|
|
47
|
+
Compress a messages list via the Brevitas API, preserving prefix stability.
|
|
48
|
+
Only the LAST user message is compressed (volatile tail).
|
|
49
|
+
All earlier messages (system + tools + prior turns = stable prefix) are left BYTE-IDENTICAL.
|
|
50
|
+
Returns (compressed_messages, baseline_tokens, compressed_tokens).
|
|
51
|
+
If compression fails or is disabled, returns the original messages unchanged.
|
|
52
|
+
"""
|
|
53
|
+
cfg = _cfg()
|
|
54
|
+
if not cfg.get("enabled") or not cfg.get("api_key"):
|
|
55
|
+
baseline = count_messages_tokens(messages)
|
|
56
|
+
return messages, baseline, baseline
|
|
57
|
+
|
|
58
|
+
baseline_tokens = count_messages_tokens(messages)
|
|
59
|
+
|
|
60
|
+
# Find the index of the last user-role message (volatile tail)
|
|
61
|
+
last_user_idx = -1
|
|
62
|
+
for i in range(len(messages) - 1, -1, -1):
|
|
63
|
+
if messages[i].get("role") == "user":
|
|
64
|
+
last_user_idx = i
|
|
65
|
+
break
|
|
66
|
+
|
|
67
|
+
# If no user message, return unchanged
|
|
68
|
+
if last_user_idx < 0:
|
|
69
|
+
return messages, baseline_tokens, baseline_tokens
|
|
70
|
+
|
|
71
|
+
# Extract only the last user message for compression
|
|
72
|
+
last_msg = messages[last_user_idx]
|
|
73
|
+
content = last_msg.get("content", "")
|
|
74
|
+
if isinstance(content, str):
|
|
75
|
+
last_text = content
|
|
76
|
+
elif isinstance(content, list):
|
|
77
|
+
parts = [b.get("text", "") for b in content if isinstance(b, dict) and b.get("type") == "text"]
|
|
78
|
+
last_text = " ".join(parts)
|
|
79
|
+
else:
|
|
80
|
+
return messages, baseline_tokens, baseline_tokens
|
|
81
|
+
|
|
82
|
+
prior = session.prior_context()
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
resp = httpx.post(
|
|
86
|
+
f"{cfg['base_url']}/v1/compress",
|
|
87
|
+
headers={"X-API-Key": cfg["api_key"]},
|
|
88
|
+
json={
|
|
89
|
+
"task": task or (last_text[:200] if last_text else ""),
|
|
90
|
+
"messages": [last_text], # Only compress the last user message
|
|
91
|
+
"prior_context": prior,
|
|
92
|
+
"complexity": complexity,
|
|
93
|
+
"compression_level": compression_level,
|
|
94
|
+
"prune_budget": prune_budget,
|
|
95
|
+
"pipeline": pipeline,
|
|
96
|
+
"agent": agent,
|
|
97
|
+
"run_id": run_id,
|
|
98
|
+
# report_usage() below records the billing row via /v1/usage;
|
|
99
|
+
# don't also record here or every call double-counts.
|
|
100
|
+
"meter": False,
|
|
101
|
+
},
|
|
102
|
+
timeout=cfg.get("timeout", 30),
|
|
103
|
+
)
|
|
104
|
+
resp.raise_for_status()
|
|
105
|
+
data = resp.json()
|
|
106
|
+
except Exception:
|
|
107
|
+
return messages, baseline_tokens, baseline_tokens
|
|
108
|
+
|
|
109
|
+
compressed_texts = data.get("compressed_messages", [last_text])
|
|
110
|
+
|
|
111
|
+
# Rebuild messages: prefix stays IDENTICAL, only last user message may change
|
|
112
|
+
out_messages: list[dict] = []
|
|
113
|
+
for i, m in enumerate(messages):
|
|
114
|
+
if i == last_user_idx and compressed_texts:
|
|
115
|
+
# Replace only the last user message with its compressed version
|
|
116
|
+
new_m = dict(m)
|
|
117
|
+
if isinstance(m.get("content"), list):
|
|
118
|
+
new_m["content"] = [{"type": "text", "text": compressed_texts[0]}]
|
|
119
|
+
else:
|
|
120
|
+
new_m["content"] = compressed_texts[0]
|
|
121
|
+
out_messages.append(new_m)
|
|
122
|
+
else:
|
|
123
|
+
# All other messages pass through unchanged (same dict, same content)
|
|
124
|
+
out_messages.append(m)
|
|
125
|
+
|
|
126
|
+
# Remember the pipeline's quality estimate so report_usage() can forward it
|
|
127
|
+
# to the billing quality gate (otherwise savings are never billed).
|
|
128
|
+
session.last_quality = data.get("quality_proxy")
|
|
129
|
+
|
|
130
|
+
# Use the server's authoritative counts for BOTH baseline and optimized so
|
|
131
|
+
# report_usage compares like-for-like. The client-side baseline counted only
|
|
132
|
+
# the messages (not prior_context), while optimized comes from the server and
|
|
133
|
+
# includes pruned context — mixing them made multi-hop calls look like a loss
|
|
134
|
+
# (compressed >= baseline) and get dropped from billing.
|
|
135
|
+
server_baseline = data.get("baseline_tokens")
|
|
136
|
+
if server_baseline:
|
|
137
|
+
baseline_tokens = int(server_baseline)
|
|
138
|
+
compressed_tokens = data.get("optimized_tokens", count_messages_tokens(out_messages))
|
|
139
|
+
return out_messages, baseline_tokens, compressed_tokens
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def report_usage(
|
|
143
|
+
provider: str,
|
|
144
|
+
model: str,
|
|
145
|
+
baseline_tokens: int,
|
|
146
|
+
compressed_tokens: int,
|
|
147
|
+
session: BrevitasSession,
|
|
148
|
+
pipeline: str = "",
|
|
149
|
+
agent: str = "",
|
|
150
|
+
run_id: str = "",
|
|
151
|
+
) -> None:
|
|
152
|
+
"""Report usage to Brevitas for billing. Fire-and-forget."""
|
|
153
|
+
cfg = _cfg()
|
|
154
|
+
if not cfg.get("api_key") or baseline_tokens <= compressed_tokens:
|
|
155
|
+
return
|
|
156
|
+
try:
|
|
157
|
+
httpx.post(
|
|
158
|
+
f"{cfg['base_url']}/v1/usage",
|
|
159
|
+
headers={"X-API-Key": cfg["api_key"]},
|
|
160
|
+
json={
|
|
161
|
+
"provider": provider,
|
|
162
|
+
"model": model,
|
|
163
|
+
"baseline_tokens": baseline_tokens,
|
|
164
|
+
"compressed_tokens": compressed_tokens,
|
|
165
|
+
"session_id": session.session_id,
|
|
166
|
+
"pipeline": pipeline,
|
|
167
|
+
"agent": agent,
|
|
168
|
+
"run_id": run_id,
|
|
169
|
+
"quality_score": session.last_quality,
|
|
170
|
+
},
|
|
171
|
+
timeout=5,
|
|
172
|
+
)
|
|
173
|
+
except Exception:
|
|
174
|
+
pass # billing reporting is best-effort; never break the user's pipeline
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""
|
|
2
|
+
brevitas CLI
|
|
3
|
+
"""
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from rich.console import Console
|
|
13
|
+
from rich.table import Table
|
|
14
|
+
_console = Console()
|
|
15
|
+
except ImportError:
|
|
16
|
+
_console = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _print(msg: str) -> None:
|
|
20
|
+
if _console:
|
|
21
|
+
_console.print(msg)
|
|
22
|
+
else:
|
|
23
|
+
print(msg)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@click.group()
|
|
27
|
+
def main() -> None:
|
|
28
|
+
"""Brevitas — drop compression between your agents."""
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@main.command()
|
|
32
|
+
@click.option("--port", default=4242, show_default=True, help="Proxy listen port")
|
|
33
|
+
@click.option("--api-key", default="", envvar="BREVITAS_API_KEY", help="Your Brevitas API key")
|
|
34
|
+
@click.option("--base-url", default="http://localhost:8000", envvar="BREVITAS_BASE_URL", show_default=True, help="Brevitas API base URL")
|
|
35
|
+
@click.option("--host", default="127.0.0.1", show_default=True, help="Bind host")
|
|
36
|
+
def start(port: int, api_key: str, base_url: str, host: str) -> None:
|
|
37
|
+
"""Start the local Brevitas proxy server."""
|
|
38
|
+
if api_key:
|
|
39
|
+
os.environ["BREVITAS_API_KEY"] = api_key
|
|
40
|
+
if base_url:
|
|
41
|
+
os.environ["BREVITAS_BASE_URL"] = base_url
|
|
42
|
+
|
|
43
|
+
from . import configure
|
|
44
|
+
configure(api_key=api_key or os.getenv("BREVITAS_API_KEY", ""), base_url=base_url)
|
|
45
|
+
|
|
46
|
+
_print(f"\n[bold green]Brevitas proxy starting on {host}:{port}[/bold green]")
|
|
47
|
+
_print(f" Compression API → [cyan]{base_url}[/cyan]")
|
|
48
|
+
_print("\n[dim]Set your SDK base URL:[/dim]")
|
|
49
|
+
_print(f" [yellow]ANTHROPIC_BASE_URL=http://{host}:{port}[/yellow]")
|
|
50
|
+
_print(f" [yellow]OPENAI_BASE_URL=http://{host}:{port}/openai[/yellow]\n")
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
import uvicorn
|
|
54
|
+
from .proxy import proxy_app
|
|
55
|
+
uvicorn.run(proxy_app, host=host, port=port, log_level="warning")
|
|
56
|
+
except ImportError:
|
|
57
|
+
_print("[red]uvicorn not installed. Run: pip install brevitas-systems[/red]")
|
|
58
|
+
sys.exit(1)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@main.command()
|
|
62
|
+
@click.argument("key")
|
|
63
|
+
@click.argument("value")
|
|
64
|
+
def config(key: str, value: str) -> None:
|
|
65
|
+
"""Set a config value (api-key, base-url)."""
|
|
66
|
+
cfg_map = {"api-key": "BREVITAS_API_KEY", "base-url": "BREVITAS_BASE_URL"}
|
|
67
|
+
env_key = cfg_map.get(key.lower())
|
|
68
|
+
if not env_key:
|
|
69
|
+
_print(f"[red]Unknown config key '{key}'. Valid: {list(cfg_map)}[/red]")
|
|
70
|
+
sys.exit(1)
|
|
71
|
+
_print(f"[green]Set {env_key}={value}[/green]")
|
|
72
|
+
_print(f"[dim]Add to your shell profile: export {env_key}={value}[/dim]")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@main.command()
|
|
76
|
+
@click.option("--api-key", default="", envvar="BREVITAS_API_KEY")
|
|
77
|
+
@click.option("--base-url", default="http://localhost:8000", envvar="BREVITAS_BASE_URL")
|
|
78
|
+
def status(api_key: str, base_url: str) -> None:
|
|
79
|
+
"""Check connectivity to the Brevitas API."""
|
|
80
|
+
import httpx
|
|
81
|
+
_print(f"\nChecking [cyan]{base_url}/v1/health[/cyan] …")
|
|
82
|
+
try:
|
|
83
|
+
r = httpx.get(f"{base_url}/v1/health", timeout=5)
|
|
84
|
+
if r.status_code == 200:
|
|
85
|
+
_print("[green]✓ Brevitas API reachable[/green]")
|
|
86
|
+
else:
|
|
87
|
+
_print(f"[yellow]API returned {r.status_code}[/yellow]")
|
|
88
|
+
except Exception as e:
|
|
89
|
+
_print(f"[red]✗ Could not reach API: {e}[/red]")
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
if api_key:
|
|
93
|
+
try:
|
|
94
|
+
r = httpx.get(f"{base_url}/v1/stats", headers={"X-API-Key": api_key}, timeout=5)
|
|
95
|
+
if r.status_code == 200:
|
|
96
|
+
data = r.json()
|
|
97
|
+
_print(f"[green]✓ API key valid[/green]")
|
|
98
|
+
_print(f" Total calls: {data.get('total_calls', 0)}")
|
|
99
|
+
_print(f" Total tokens saved: {data.get('total_tokens_saved', 0):,}")
|
|
100
|
+
_print(f" Total cost saved: ${data.get('total_cost_saved_usd', 0):.4f}")
|
|
101
|
+
_print(f" Brevitas fee owed: ${data.get('total_brevitas_fee_usd', 0):.4f}")
|
|
102
|
+
else:
|
|
103
|
+
_print(f"[red]✗ API key invalid (status {r.status_code})[/red]")
|
|
104
|
+
except Exception as e:
|
|
105
|
+
_print(f"[red]✗ Stats check failed: {e}[/red]")
|
|
106
|
+
else:
|
|
107
|
+
_print("[dim]No API key set — set BREVITAS_API_KEY to check usage[/dim]")
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
_cfg: dict = {
|
|
4
|
+
"api_key": os.getenv("BREVITAS_API_KEY", ""),
|
|
5
|
+
"base_url": os.getenv("BREVITAS_BASE_URL", "http://localhost:8000"),
|
|
6
|
+
"enabled": os.getenv("BREVITAS_ENABLED", "true").lower() != "false",
|
|
7
|
+
"timeout": 30,
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def configure(
|
|
12
|
+
api_key: str = "",
|
|
13
|
+
base_url: str = "",
|
|
14
|
+
enabled: bool = True,
|
|
15
|
+
timeout: int = 30,
|
|
16
|
+
) -> None:
|
|
17
|
+
if api_key: _cfg["api_key"] = api_key
|
|
18
|
+
if base_url: _cfg["base_url"] = base_url.rstrip("/")
|
|
19
|
+
_cfg["enabled"] = enabled
|
|
20
|
+
_cfg["timeout"] = timeout
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get() -> dict:
|
|
24
|
+
return dict(_cfg)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Contextvar-based label propagation for tracking pipeline/agent/run_id.
|
|
3
|
+
|
|
4
|
+
Resolution order (highest to lowest priority):
|
|
5
|
+
1. Per-call _brevitas_meta override
|
|
6
|
+
2. Contextvar value (set by start_run/agent context managers)
|
|
7
|
+
3. Default (empty string)
|
|
8
|
+
"""
|
|
9
|
+
from contextvars import ContextVar
|
|
10
|
+
from contextlib import contextmanager
|
|
11
|
+
import secrets
|
|
12
|
+
from typing import Optional, Dict, Any
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# Context variables for label propagation
|
|
16
|
+
_pipeline_var: ContextVar[str] = ContextVar("brevitas_pipeline", default="")
|
|
17
|
+
_agent_var: ContextVar[str] = ContextVar("brevitas_agent", default="")
|
|
18
|
+
_run_id_var: ContextVar[str] = ContextVar("brevitas_run_id", default="")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def start_run(pipeline: str = "", run_id: str = "") -> str:
|
|
22
|
+
"""
|
|
23
|
+
Start a new run context, setting pipeline and run_id labels.
|
|
24
|
+
Auto-generates run_id if not provided.
|
|
25
|
+
Returns the run_id.
|
|
26
|
+
"""
|
|
27
|
+
if not run_id:
|
|
28
|
+
run_id = "run_" + secrets.token_urlsafe(12)
|
|
29
|
+
|
|
30
|
+
_pipeline_var.set(pipeline)
|
|
31
|
+
_run_id_var.set(run_id)
|
|
32
|
+
return run_id
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_pipeline() -> str:
|
|
36
|
+
"""Get the current pipeline label from contextvar."""
|
|
37
|
+
return _pipeline_var.get("")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_agent() -> str:
|
|
41
|
+
"""Get the current agent label from contextvar."""
|
|
42
|
+
return _agent_var.get("")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_run_id() -> str:
|
|
46
|
+
"""Get the current run_id label from contextvar."""
|
|
47
|
+
return _run_id_var.get("")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@contextmanager
|
|
51
|
+
def agent(agent_name: str):
|
|
52
|
+
"""
|
|
53
|
+
Context manager to set the agent label for all calls within the block.
|
|
54
|
+
Example:
|
|
55
|
+
with agent("copywriter"):
|
|
56
|
+
client.messages.create(...) # auto-tagged with agent="copywriter"
|
|
57
|
+
"""
|
|
58
|
+
token = _agent_var.set(agent_name)
|
|
59
|
+
try:
|
|
60
|
+
yield
|
|
61
|
+
finally:
|
|
62
|
+
_agent_var.reset(token)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def resolve_labels(
|
|
66
|
+
_brevitas_meta: Optional[Dict[str, Any]] = None,
|
|
67
|
+
) -> Dict[str, str]:
|
|
68
|
+
"""
|
|
69
|
+
Resolve labels using the priority order:
|
|
70
|
+
1. Per-call _brevitas_meta override (highest)
|
|
71
|
+
2. Contextvar value (set by start_run/agent)
|
|
72
|
+
3. Default empty string (lowest)
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
_brevitas_meta: Optional per-call override dict with 'pipeline', 'agent', 'run_id'
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Dict with 'pipeline', 'agent', 'run_id' keys
|
|
79
|
+
"""
|
|
80
|
+
_brevitas_meta = _brevitas_meta or {}
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
"pipeline": _brevitas_meta.get("pipeline") or get_pipeline(),
|
|
84
|
+
"agent": _brevitas_meta.get("agent") or get_agent(),
|
|
85
|
+
"run_id": _brevitas_meta.get("run_id") or get_run_id(),
|
|
86
|
+
}
|